diff options
Diffstat (limited to 'fs/btrfs')
35 files changed, 2004 insertions, 1629 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index c1e0b0caf9cc..ecb5832c0967 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -1,5 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007 Oracle. All rights reserved. | 2 | * Copyright (C) 2007 Oracle. All rights reserved. |
3 | * Copyright (C) 2014 Fujitsu. All rights reserved. | ||
3 | * | 4 | * |
4 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU General Public | 6 | * modify it under the terms of the GNU General Public |
@@ -21,708 +22,313 @@ | |||
21 | #include <linux/list.h> | 22 | #include <linux/list.h> |
22 | #include <linux/spinlock.h> | 23 | #include <linux/spinlock.h> |
23 | #include <linux/freezer.h> | 24 | #include <linux/freezer.h> |
25 | #include <linux/workqueue.h> | ||
24 | #include "async-thread.h" | 26 | #include "async-thread.h" |
27 | #include "ctree.h" | ||
28 | |||
29 | #define WORK_DONE_BIT 0 | ||
30 | #define WORK_ORDER_DONE_BIT 1 | ||
31 | #define WORK_HIGH_PRIO_BIT 2 | ||
32 | |||
33 | #define NO_THRESHOLD (-1) | ||
34 | #define DFT_THRESHOLD (32) | ||
35 | |||
36 | struct __btrfs_workqueue { | ||
37 | struct workqueue_struct *normal_wq; | ||
38 | /* List head pointing to ordered work list */ | ||
39 | struct list_head ordered_list; | ||
40 | |||
41 | /* Spinlock for ordered_list */ | ||
42 | spinlock_t list_lock; | ||
43 | |||
44 | /* Thresholding related variants */ | ||
45 | atomic_t pending; | ||
46 | int max_active; | ||
47 | int current_max; | ||
48 | int thresh; | ||
49 | unsigned int count; | ||
50 | spinlock_t thres_lock; | ||
51 | }; | ||
25 | 52 | ||
26 | #define WORK_QUEUED_BIT 0 | 53 | struct btrfs_workqueue { |
27 | #define WORK_DONE_BIT 1 | 54 | struct __btrfs_workqueue *normal; |
28 | #define WORK_ORDER_DONE_BIT 2 | 55 | struct __btrfs_workqueue *high; |
29 | #define WORK_HIGH_PRIO_BIT 3 | 56 | }; |
30 | |||
31 | /* | ||
32 | * container for the kthread task pointer and the list of pending work | ||
33 | * One of these is allocated per thread. | ||
34 | */ | ||
35 | struct btrfs_worker_thread { | ||
36 | /* pool we belong to */ | ||
37 | struct btrfs_workers *workers; | ||
38 | |||
39 | /* list of struct btrfs_work that are waiting for service */ | ||
40 | struct list_head pending; | ||
41 | struct list_head prio_pending; | ||
42 | |||
43 | /* list of worker threads from struct btrfs_workers */ | ||
44 | struct list_head worker_list; | ||
45 | |||
46 | /* kthread */ | ||
47 | struct task_struct *task; | ||
48 | 57 | ||
49 | /* number of things on the pending list */ | 58 | static inline struct __btrfs_workqueue |
50 | atomic_t num_pending; | 59 | *__btrfs_alloc_workqueue(const char *name, int flags, int max_active, |
60 | int thresh) | ||
61 | { | ||
62 | struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); | ||
51 | 63 | ||
52 | /* reference counter for this struct */ | 64 | if (unlikely(!ret)) |
53 | atomic_t refs; | 65 | return NULL; |
54 | 66 | ||
55 | unsigned long sequence; | 67 | ret->max_active = max_active; |
68 | atomic_set(&ret->pending, 0); | ||
69 | if (thresh == 0) | ||
70 | thresh = DFT_THRESHOLD; | ||
71 | /* For low threshold, disabling threshold is a better choice */ | ||
72 | if (thresh < DFT_THRESHOLD) { | ||
73 | ret->current_max = max_active; | ||
74 | ret->thresh = NO_THRESHOLD; | ||
75 | } else { | ||
76 | ret->current_max = 1; | ||
77 | ret->thresh = thresh; | ||
78 | } | ||
56 | 79 | ||
57 | /* protects the pending list. */ | 80 | if (flags & WQ_HIGHPRI) |
58 | spinlock_t lock; | 81 | ret->normal_wq = alloc_workqueue("%s-%s-high", flags, |
82 | ret->max_active, | ||
83 | "btrfs", name); | ||
84 | else | ||
85 | ret->normal_wq = alloc_workqueue("%s-%s", flags, | ||
86 | ret->max_active, "btrfs", | ||
87 | name); | ||
88 | if (unlikely(!ret->normal_wq)) { | ||
89 | kfree(ret); | ||
90 | return NULL; | ||
91 | } | ||
59 | 92 | ||
60 | /* set to non-zero when this thread is already awake and kicking */ | 93 | INIT_LIST_HEAD(&ret->ordered_list); |
61 | int working; | 94 | spin_lock_init(&ret->list_lock); |
95 | spin_lock_init(&ret->thres_lock); | ||
96 | trace_btrfs_workqueue_alloc(ret, name, flags & WQ_HIGHPRI); | ||
97 | return ret; | ||
98 | } | ||
62 | 99 | ||
63 | /* are we currently idle */ | 100 | static inline void |
64 | int idle; | 101 | __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq); |
65 | }; | ||
66 | 102 | ||
67 | static int __btrfs_start_workers(struct btrfs_workers *workers); | 103 | struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, |
104 | int flags, | ||
105 | int max_active, | ||
106 | int thresh) | ||
107 | { | ||
108 | struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); | ||
68 | 109 | ||
69 | /* | 110 | if (unlikely(!ret)) |
70 | * btrfs_start_workers uses kthread_run, which can block waiting for memory | 111 | return NULL; |
71 | * for a very long time. It will actually throttle on page writeback, | ||
72 | * and so it may not make progress until after our btrfs worker threads | ||
73 | * process all of the pending work structs in their queue | ||
74 | * | ||
75 | * This means we can't use btrfs_start_workers from inside a btrfs worker | ||
76 | * thread that is used as part of cleaning dirty memory, which pretty much | ||
77 | * involves all of the worker threads. | ||
78 | * | ||
79 | * Instead we have a helper queue who never has more than one thread | ||
80 | * where we scheduler thread start operations. This worker_start struct | ||
81 | * is used to contain the work and hold a pointer to the queue that needs | ||
82 | * another worker. | ||
83 | */ | ||
84 | struct worker_start { | ||
85 | struct btrfs_work work; | ||
86 | struct btrfs_workers *queue; | ||
87 | }; | ||
88 | 112 | ||
89 | static void start_new_worker_func(struct btrfs_work *work) | 113 | ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI, |
90 | { | 114 | max_active, thresh); |
91 | struct worker_start *start; | 115 | if (unlikely(!ret->normal)) { |
92 | start = container_of(work, struct worker_start, work); | 116 | kfree(ret); |
93 | __btrfs_start_workers(start->queue); | 117 | return NULL; |
94 | kfree(start); | 118 | } |
95 | } | ||
96 | 119 | ||
97 | /* | 120 | if (flags & WQ_HIGHPRI) { |
98 | * helper function to move a thread onto the idle list after it | 121 | ret->high = __btrfs_alloc_workqueue(name, flags, max_active, |
99 | * has finished some requests. | 122 | thresh); |
100 | */ | 123 | if (unlikely(!ret->high)) { |
101 | static void check_idle_worker(struct btrfs_worker_thread *worker) | 124 | __btrfs_destroy_workqueue(ret->normal); |
102 | { | 125 | kfree(ret); |
103 | if (!worker->idle && atomic_read(&worker->num_pending) < | 126 | return NULL; |
104 | worker->workers->idle_thresh / 2) { | ||
105 | unsigned long flags; | ||
106 | spin_lock_irqsave(&worker->workers->lock, flags); | ||
107 | worker->idle = 1; | ||
108 | |||
109 | /* the list may be empty if the worker is just starting */ | ||
110 | if (!list_empty(&worker->worker_list) && | ||
111 | !worker->workers->stopping) { | ||
112 | list_move(&worker->worker_list, | ||
113 | &worker->workers->idle_list); | ||
114 | } | 127 | } |
115 | spin_unlock_irqrestore(&worker->workers->lock, flags); | ||
116 | } | 128 | } |
129 | return ret; | ||
117 | } | 130 | } |
118 | 131 | ||
119 | /* | 132 | /* |
120 | * helper function to move a thread off the idle list after new | 133 | * Hook for threshold which will be called in btrfs_queue_work. |
121 | * pending work is added. | 134 | * This hook WILL be called in IRQ handler context, |
135 | * so workqueue_set_max_active MUST NOT be called in this hook | ||
122 | */ | 136 | */ |
123 | static void check_busy_worker(struct btrfs_worker_thread *worker) | 137 | static inline void thresh_queue_hook(struct __btrfs_workqueue *wq) |
124 | { | 138 | { |
125 | if (worker->idle && atomic_read(&worker->num_pending) >= | 139 | if (wq->thresh == NO_THRESHOLD) |
126 | worker->workers->idle_thresh) { | 140 | return; |
127 | unsigned long flags; | 141 | atomic_inc(&wq->pending); |
128 | spin_lock_irqsave(&worker->workers->lock, flags); | ||
129 | worker->idle = 0; | ||
130 | |||
131 | if (!list_empty(&worker->worker_list) && | ||
132 | !worker->workers->stopping) { | ||
133 | list_move_tail(&worker->worker_list, | ||
134 | &worker->workers->worker_list); | ||
135 | } | ||
136 | spin_unlock_irqrestore(&worker->workers->lock, flags); | ||
137 | } | ||
138 | } | 142 | } |
139 | 143 | ||
140 | static void check_pending_worker_creates(struct btrfs_worker_thread *worker) | 144 | /* |
145 | * Hook for threshold which will be called before executing the work, | ||
146 | * This hook is called in kthread content. | ||
147 | * So workqueue_set_max_active is called here. | ||
148 | */ | ||
149 | static inline void thresh_exec_hook(struct __btrfs_workqueue *wq) | ||
141 | { | 150 | { |
142 | struct btrfs_workers *workers = worker->workers; | 151 | int new_max_active; |
143 | struct worker_start *start; | 152 | long pending; |
144 | unsigned long flags; | 153 | int need_change = 0; |
145 | 154 | ||
146 | rmb(); | 155 | if (wq->thresh == NO_THRESHOLD) |
147 | if (!workers->atomic_start_pending) | ||
148 | return; | 156 | return; |
149 | 157 | ||
150 | start = kzalloc(sizeof(*start), GFP_NOFS); | 158 | atomic_dec(&wq->pending); |
151 | if (!start) | 159 | spin_lock(&wq->thres_lock); |
152 | return; | 160 | /* |
153 | 161 | * Use wq->count to limit the calling frequency of | |
154 | start->work.func = start_new_worker_func; | 162 | * workqueue_set_max_active. |
155 | start->queue = workers; | 163 | */ |
156 | 164 | wq->count++; | |
157 | spin_lock_irqsave(&workers->lock, flags); | 165 | wq->count %= (wq->thresh / 4); |
158 | if (!workers->atomic_start_pending) | 166 | if (!wq->count) |
159 | goto out; | 167 | goto out; |
160 | 168 | new_max_active = wq->current_max; | |
161 | workers->atomic_start_pending = 0; | ||
162 | if (workers->num_workers + workers->num_workers_starting >= | ||
163 | workers->max_workers) | ||
164 | goto out; | ||
165 | |||
166 | workers->num_workers_starting += 1; | ||
167 | spin_unlock_irqrestore(&workers->lock, flags); | ||
168 | btrfs_queue_worker(workers->atomic_worker_start, &start->work); | ||
169 | return; | ||
170 | 169 | ||
170 | /* | ||
171 | * pending may be changed later, but it's OK since we really | ||
172 | * don't need it so accurate to calculate new_max_active. | ||
173 | */ | ||
174 | pending = atomic_read(&wq->pending); | ||
175 | if (pending > wq->thresh) | ||
176 | new_max_active++; | ||
177 | if (pending < wq->thresh / 2) | ||
178 | new_max_active--; | ||
179 | new_max_active = clamp_val(new_max_active, 1, wq->max_active); | ||
180 | if (new_max_active != wq->current_max) { | ||
181 | need_change = 1; | ||
182 | wq->current_max = new_max_active; | ||
183 | } | ||
171 | out: | 184 | out: |
172 | kfree(start); | 185 | spin_unlock(&wq->thres_lock); |
173 | spin_unlock_irqrestore(&workers->lock, flags); | 186 | |
187 | if (need_change) { | ||
188 | workqueue_set_max_active(wq->normal_wq, wq->current_max); | ||
189 | } | ||
174 | } | 190 | } |
175 | 191 | ||
176 | static noinline void run_ordered_completions(struct btrfs_workers *workers, | 192 | static void run_ordered_work(struct __btrfs_workqueue *wq) |
177 | struct btrfs_work *work) | ||
178 | { | 193 | { |
179 | if (!workers->ordered) | 194 | struct list_head *list = &wq->ordered_list; |
180 | return; | 195 | struct btrfs_work *work; |
181 | 196 | spinlock_t *lock = &wq->list_lock; | |
182 | set_bit(WORK_DONE_BIT, &work->flags); | 197 | unsigned long flags; |
183 | |||
184 | spin_lock(&workers->order_lock); | ||
185 | 198 | ||
186 | while (1) { | 199 | while (1) { |
187 | if (!list_empty(&workers->prio_order_list)) { | 200 | spin_lock_irqsave(lock, flags); |
188 | work = list_entry(workers->prio_order_list.next, | 201 | if (list_empty(list)) |
189 | struct btrfs_work, order_list); | ||
190 | } else if (!list_empty(&workers->order_list)) { | ||
191 | work = list_entry(workers->order_list.next, | ||
192 | struct btrfs_work, order_list); | ||
193 | } else { | ||
194 | break; | 202 | break; |
195 | } | 203 | work = list_entry(list->next, struct btrfs_work, |
204 | ordered_list); | ||
196 | if (!test_bit(WORK_DONE_BIT, &work->flags)) | 205 | if (!test_bit(WORK_DONE_BIT, &work->flags)) |
197 | break; | 206 | break; |
198 | 207 | ||
199 | /* we are going to call the ordered done function, but | 208 | /* |
209 | * we are going to call the ordered done function, but | ||
200 | * we leave the work item on the list as a barrier so | 210 | * we leave the work item on the list as a barrier so |
201 | * that later work items that are done don't have their | 211 | * that later work items that are done don't have their |
202 | * functions called before this one returns | 212 | * functions called before this one returns |
203 | */ | 213 | */ |
204 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) | 214 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) |
205 | break; | 215 | break; |
206 | 216 | trace_btrfs_ordered_sched(work); | |
207 | spin_unlock(&workers->order_lock); | 217 | spin_unlock_irqrestore(lock, flags); |
208 | |||
209 | work->ordered_func(work); | 218 | work->ordered_func(work); |
210 | 219 | ||
211 | /* now take the lock again and drop our item from the list */ | 220 | /* now take the lock again and drop our item from the list */ |
212 | spin_lock(&workers->order_lock); | 221 | spin_lock_irqsave(lock, flags); |
213 | list_del(&work->order_list); | 222 | list_del(&work->ordered_list); |
214 | spin_unlock(&workers->order_lock); | 223 | spin_unlock_irqrestore(lock, flags); |
215 | 224 | ||
216 | /* | 225 | /* |
217 | * we don't want to call the ordered free functions | 226 | * we don't want to call the ordered free functions |
218 | * with the lock held though | 227 | * with the lock held though |
219 | */ | 228 | */ |
220 | work->ordered_free(work); | 229 | work->ordered_free(work); |
221 | spin_lock(&workers->order_lock); | 230 | trace_btrfs_all_work_done(work); |
222 | } | ||
223 | |||
224 | spin_unlock(&workers->order_lock); | ||
225 | } | ||
226 | |||
227 | static void put_worker(struct btrfs_worker_thread *worker) | ||
228 | { | ||
229 | if (atomic_dec_and_test(&worker->refs)) | ||
230 | kfree(worker); | ||
231 | } | ||
232 | |||
233 | static int try_worker_shutdown(struct btrfs_worker_thread *worker) | ||
234 | { | ||
235 | int freeit = 0; | ||
236 | |||
237 | spin_lock_irq(&worker->lock); | ||
238 | spin_lock(&worker->workers->lock); | ||
239 | if (worker->workers->num_workers > 1 && | ||
240 | worker->idle && | ||
241 | !worker->working && | ||
242 | !list_empty(&worker->worker_list) && | ||
243 | list_empty(&worker->prio_pending) && | ||
244 | list_empty(&worker->pending) && | ||
245 | atomic_read(&worker->num_pending) == 0) { | ||
246 | freeit = 1; | ||
247 | list_del_init(&worker->worker_list); | ||
248 | worker->workers->num_workers--; | ||
249 | } | 231 | } |
250 | spin_unlock(&worker->workers->lock); | 232 | spin_unlock_irqrestore(lock, flags); |
251 | spin_unlock_irq(&worker->lock); | ||
252 | |||
253 | if (freeit) | ||
254 | put_worker(worker); | ||
255 | return freeit; | ||
256 | } | 233 | } |
257 | 234 | ||
258 | static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker, | 235 | static void normal_work_helper(struct work_struct *arg) |
259 | struct list_head *prio_head, | ||
260 | struct list_head *head) | ||
261 | { | ||
262 | struct btrfs_work *work = NULL; | ||
263 | struct list_head *cur = NULL; | ||
264 | |||
265 | if (!list_empty(prio_head)) | ||
266 | cur = prio_head->next; | ||
267 | |||
268 | smp_mb(); | ||
269 | if (!list_empty(&worker->prio_pending)) | ||
270 | goto refill; | ||
271 | |||
272 | if (!list_empty(head)) | ||
273 | cur = head->next; | ||
274 | |||
275 | if (cur) | ||
276 | goto out; | ||
277 | |||
278 | refill: | ||
279 | spin_lock_irq(&worker->lock); | ||
280 | list_splice_tail_init(&worker->prio_pending, prio_head); | ||
281 | list_splice_tail_init(&worker->pending, head); | ||
282 | |||
283 | if (!list_empty(prio_head)) | ||
284 | cur = prio_head->next; | ||
285 | else if (!list_empty(head)) | ||
286 | cur = head->next; | ||
287 | spin_unlock_irq(&worker->lock); | ||
288 | |||
289 | if (!cur) | ||
290 | goto out_fail; | ||
291 | |||
292 | out: | ||
293 | work = list_entry(cur, struct btrfs_work, list); | ||
294 | |||
295 | out_fail: | ||
296 | return work; | ||
297 | } | ||
298 | |||
299 | /* | ||
300 | * main loop for servicing work items | ||
301 | */ | ||
302 | static int worker_loop(void *arg) | ||
303 | { | 236 | { |
304 | struct btrfs_worker_thread *worker = arg; | ||
305 | struct list_head head; | ||
306 | struct list_head prio_head; | ||
307 | struct btrfs_work *work; | 237 | struct btrfs_work *work; |
238 | struct __btrfs_workqueue *wq; | ||
239 | int need_order = 0; | ||
308 | 240 | ||
309 | INIT_LIST_HEAD(&head); | 241 | work = container_of(arg, struct btrfs_work, normal_work); |
310 | INIT_LIST_HEAD(&prio_head); | 242 | /* |
311 | 243 | * We should not touch things inside work in the following cases: | |
312 | do { | 244 | * 1) after work->func() if it has no ordered_free |
313 | again: | 245 | * Since the struct is freed in work->func(). |
314 | while (1) { | 246 | * 2) after setting WORK_DONE_BIT |
315 | 247 | * The work may be freed in other threads almost instantly. | |
316 | 248 | * So we save the needed things here. | |
317 | work = get_next_work(worker, &prio_head, &head); | 249 | */ |
318 | if (!work) | 250 | if (work->ordered_func) |
319 | break; | 251 | need_order = 1; |
320 | 252 | wq = work->wq; | |
321 | list_del(&work->list); | 253 | |
322 | clear_bit(WORK_QUEUED_BIT, &work->flags); | 254 | trace_btrfs_work_sched(work); |
323 | 255 | thresh_exec_hook(wq); | |
324 | work->worker = worker; | 256 | work->func(work); |
325 | 257 | if (need_order) { | |
326 | work->func(work); | 258 | set_bit(WORK_DONE_BIT, &work->flags); |
327 | 259 | run_ordered_work(wq); | |
328 | atomic_dec(&worker->num_pending); | ||
329 | /* | ||
330 | * unless this is an ordered work queue, | ||
331 | * 'work' was probably freed by func above. | ||
332 | */ | ||
333 | run_ordered_completions(worker->workers, work); | ||
334 | |||
335 | check_pending_worker_creates(worker); | ||
336 | cond_resched(); | ||
337 | } | ||
338 | |||
339 | spin_lock_irq(&worker->lock); | ||
340 | check_idle_worker(worker); | ||
341 | |||
342 | if (freezing(current)) { | ||
343 | worker->working = 0; | ||
344 | spin_unlock_irq(&worker->lock); | ||
345 | try_to_freeze(); | ||
346 | } else { | ||
347 | spin_unlock_irq(&worker->lock); | ||
348 | if (!kthread_should_stop()) { | ||
349 | cpu_relax(); | ||
350 | /* | ||
351 | * we've dropped the lock, did someone else | ||
352 | * jump_in? | ||
353 | */ | ||
354 | smp_mb(); | ||
355 | if (!list_empty(&worker->pending) || | ||
356 | !list_empty(&worker->prio_pending)) | ||
357 | continue; | ||
358 | |||
359 | /* | ||
360 | * this short schedule allows more work to | ||
361 | * come in without the queue functions | ||
362 | * needing to go through wake_up_process() | ||
363 | * | ||
364 | * worker->working is still 1, so nobody | ||
365 | * is going to try and wake us up | ||
366 | */ | ||
367 | schedule_timeout(1); | ||
368 | smp_mb(); | ||
369 | if (!list_empty(&worker->pending) || | ||
370 | !list_empty(&worker->prio_pending)) | ||
371 | continue; | ||
372 | |||
373 | if (kthread_should_stop()) | ||
374 | break; | ||
375 | |||
376 | /* still no more work?, sleep for real */ | ||
377 | spin_lock_irq(&worker->lock); | ||
378 | set_current_state(TASK_INTERRUPTIBLE); | ||
379 | if (!list_empty(&worker->pending) || | ||
380 | !list_empty(&worker->prio_pending)) { | ||
381 | spin_unlock_irq(&worker->lock); | ||
382 | set_current_state(TASK_RUNNING); | ||
383 | goto again; | ||
384 | } | ||
385 | |||
386 | /* | ||
387 | * this makes sure we get a wakeup when someone | ||
388 | * adds something new to the queue | ||
389 | */ | ||
390 | worker->working = 0; | ||
391 | spin_unlock_irq(&worker->lock); | ||
392 | |||
393 | if (!kthread_should_stop()) { | ||
394 | schedule_timeout(HZ * 120); | ||
395 | if (!worker->working && | ||
396 | try_worker_shutdown(worker)) { | ||
397 | return 0; | ||
398 | } | ||
399 | } | ||
400 | } | ||
401 | __set_current_state(TASK_RUNNING); | ||
402 | } | ||
403 | } while (!kthread_should_stop()); | ||
404 | return 0; | ||
405 | } | ||
406 | |||
407 | /* | ||
408 | * this will wait for all the worker threads to shutdown | ||
409 | */ | ||
410 | void btrfs_stop_workers(struct btrfs_workers *workers) | ||
411 | { | ||
412 | struct list_head *cur; | ||
413 | struct btrfs_worker_thread *worker; | ||
414 | int can_stop; | ||
415 | |||
416 | spin_lock_irq(&workers->lock); | ||
417 | workers->stopping = 1; | ||
418 | list_splice_init(&workers->idle_list, &workers->worker_list); | ||
419 | while (!list_empty(&workers->worker_list)) { | ||
420 | cur = workers->worker_list.next; | ||
421 | worker = list_entry(cur, struct btrfs_worker_thread, | ||
422 | worker_list); | ||
423 | |||
424 | atomic_inc(&worker->refs); | ||
425 | workers->num_workers -= 1; | ||
426 | if (!list_empty(&worker->worker_list)) { | ||
427 | list_del_init(&worker->worker_list); | ||
428 | put_worker(worker); | ||
429 | can_stop = 1; | ||
430 | } else | ||
431 | can_stop = 0; | ||
432 | spin_unlock_irq(&workers->lock); | ||
433 | if (can_stop) | ||
434 | kthread_stop(worker->task); | ||
435 | spin_lock_irq(&workers->lock); | ||
436 | put_worker(worker); | ||
437 | } | 260 | } |
438 | spin_unlock_irq(&workers->lock); | 261 | if (!need_order) |
262 | trace_btrfs_all_work_done(work); | ||
439 | } | 263 | } |
440 | 264 | ||
441 | /* | 265 | void btrfs_init_work(struct btrfs_work *work, |
442 | * simple init on struct btrfs_workers | 266 | btrfs_func_t func, |
443 | */ | 267 | btrfs_func_t ordered_func, |
444 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, | 268 | btrfs_func_t ordered_free) |
445 | struct btrfs_workers *async_helper) | ||
446 | { | 269 | { |
447 | workers->num_workers = 0; | 270 | work->func = func; |
448 | workers->num_workers_starting = 0; | 271 | work->ordered_func = ordered_func; |
449 | INIT_LIST_HEAD(&workers->worker_list); | 272 | work->ordered_free = ordered_free; |
450 | INIT_LIST_HEAD(&workers->idle_list); | 273 | INIT_WORK(&work->normal_work, normal_work_helper); |
451 | INIT_LIST_HEAD(&workers->order_list); | 274 | INIT_LIST_HEAD(&work->ordered_list); |
452 | INIT_LIST_HEAD(&workers->prio_order_list); | 275 | work->flags = 0; |
453 | spin_lock_init(&workers->lock); | ||
454 | spin_lock_init(&workers->order_lock); | ||
455 | workers->max_workers = max; | ||
456 | workers->idle_thresh = 32; | ||
457 | workers->name = name; | ||
458 | workers->ordered = 0; | ||
459 | workers->atomic_start_pending = 0; | ||
460 | workers->atomic_worker_start = async_helper; | ||
461 | workers->stopping = 0; | ||
462 | } | 276 | } |
463 | 277 | ||
464 | /* | 278 | static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq, |
465 | * starts new worker threads. This does not enforce the max worker | 279 | struct btrfs_work *work) |
466 | * count in case you need to temporarily go past it. | ||
467 | */ | ||
468 | static int __btrfs_start_workers(struct btrfs_workers *workers) | ||
469 | { | 280 | { |
470 | struct btrfs_worker_thread *worker; | 281 | unsigned long flags; |
471 | int ret = 0; | ||
472 | |||
473 | worker = kzalloc(sizeof(*worker), GFP_NOFS); | ||
474 | if (!worker) { | ||
475 | ret = -ENOMEM; | ||
476 | goto fail; | ||
477 | } | ||
478 | |||
479 | INIT_LIST_HEAD(&worker->pending); | ||
480 | INIT_LIST_HEAD(&worker->prio_pending); | ||
481 | INIT_LIST_HEAD(&worker->worker_list); | ||
482 | spin_lock_init(&worker->lock); | ||
483 | |||
484 | atomic_set(&worker->num_pending, 0); | ||
485 | atomic_set(&worker->refs, 1); | ||
486 | worker->workers = workers; | ||
487 | worker->task = kthread_create(worker_loop, worker, | ||
488 | "btrfs-%s-%d", workers->name, | ||
489 | workers->num_workers + 1); | ||
490 | if (IS_ERR(worker->task)) { | ||
491 | ret = PTR_ERR(worker->task); | ||
492 | goto fail; | ||
493 | } | ||
494 | 282 | ||
495 | spin_lock_irq(&workers->lock); | 283 | work->wq = wq; |
496 | if (workers->stopping) { | 284 | thresh_queue_hook(wq); |
497 | spin_unlock_irq(&workers->lock); | 285 | if (work->ordered_func) { |
498 | ret = -EINVAL; | 286 | spin_lock_irqsave(&wq->list_lock, flags); |
499 | goto fail_kthread; | 287 | list_add_tail(&work->ordered_list, &wq->ordered_list); |
288 | spin_unlock_irqrestore(&wq->list_lock, flags); | ||
500 | } | 289 | } |
501 | list_add_tail(&worker->worker_list, &workers->idle_list); | 290 | queue_work(wq->normal_wq, &work->normal_work); |
502 | worker->idle = 1; | 291 | trace_btrfs_work_queued(work); |
503 | workers->num_workers++; | ||
504 | workers->num_workers_starting--; | ||
505 | WARN_ON(workers->num_workers_starting < 0); | ||
506 | spin_unlock_irq(&workers->lock); | ||
507 | |||
508 | wake_up_process(worker->task); | ||
509 | return 0; | ||
510 | |||
511 | fail_kthread: | ||
512 | kthread_stop(worker->task); | ||
513 | fail: | ||
514 | kfree(worker); | ||
515 | spin_lock_irq(&workers->lock); | ||
516 | workers->num_workers_starting--; | ||
517 | spin_unlock_irq(&workers->lock); | ||
518 | return ret; | ||
519 | } | 292 | } |
520 | 293 | ||
521 | int btrfs_start_workers(struct btrfs_workers *workers) | 294 | void btrfs_queue_work(struct btrfs_workqueue *wq, |
295 | struct btrfs_work *work) | ||
522 | { | 296 | { |
523 | spin_lock_irq(&workers->lock); | 297 | struct __btrfs_workqueue *dest_wq; |
524 | workers->num_workers_starting++; | ||
525 | spin_unlock_irq(&workers->lock); | ||
526 | return __btrfs_start_workers(workers); | ||
527 | } | ||
528 | |||
529 | /* | ||
530 | * run through the list and find a worker thread that doesn't have a lot | ||
531 | * to do right now. This can return null if we aren't yet at the thread | ||
532 | * count limit and all of the threads are busy. | ||
533 | */ | ||
534 | static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) | ||
535 | { | ||
536 | struct btrfs_worker_thread *worker; | ||
537 | struct list_head *next; | ||
538 | int enforce_min; | ||
539 | |||
540 | enforce_min = (workers->num_workers + workers->num_workers_starting) < | ||
541 | workers->max_workers; | ||
542 | |||
543 | /* | ||
544 | * if we find an idle thread, don't move it to the end of the | ||
545 | * idle list. This improves the chance that the next submission | ||
546 | * will reuse the same thread, and maybe catch it while it is still | ||
547 | * working | ||
548 | */ | ||
549 | if (!list_empty(&workers->idle_list)) { | ||
550 | next = workers->idle_list.next; | ||
551 | worker = list_entry(next, struct btrfs_worker_thread, | ||
552 | worker_list); | ||
553 | return worker; | ||
554 | } | ||
555 | if (enforce_min || list_empty(&workers->worker_list)) | ||
556 | return NULL; | ||
557 | |||
558 | /* | ||
559 | * if we pick a busy task, move the task to the end of the list. | ||
560 | * hopefully this will keep things somewhat evenly balanced. | ||
561 | * Do the move in batches based on the sequence number. This groups | ||
562 | * requests submitted at roughly the same time onto the same worker. | ||
563 | */ | ||
564 | next = workers->worker_list.next; | ||
565 | worker = list_entry(next, struct btrfs_worker_thread, worker_list); | ||
566 | worker->sequence++; | ||
567 | 298 | ||
568 | if (worker->sequence % workers->idle_thresh == 0) | 299 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags) && wq->high) |
569 | list_move_tail(next, &workers->worker_list); | 300 | dest_wq = wq->high; |
570 | return worker; | 301 | else |
302 | dest_wq = wq->normal; | ||
303 | __btrfs_queue_work(dest_wq, work); | ||
571 | } | 304 | } |
572 | 305 | ||
573 | /* | 306 | static inline void |
574 | * selects a worker thread to take the next job. This will either find | 307 | __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq) |
575 | * an idle worker, start a new worker up to the max count, or just return | ||
576 | * one of the existing busy workers. | ||
577 | */ | ||
578 | static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) | ||
579 | { | 308 | { |
580 | struct btrfs_worker_thread *worker; | 309 | destroy_workqueue(wq->normal_wq); |
581 | unsigned long flags; | 310 | trace_btrfs_workqueue_destroy(wq); |
582 | struct list_head *fallback; | 311 | kfree(wq); |
583 | int ret; | ||
584 | |||
585 | spin_lock_irqsave(&workers->lock, flags); | ||
586 | again: | ||
587 | worker = next_worker(workers); | ||
588 | |||
589 | if (!worker) { | ||
590 | if (workers->num_workers + workers->num_workers_starting >= | ||
591 | workers->max_workers) { | ||
592 | goto fallback; | ||
593 | } else if (workers->atomic_worker_start) { | ||
594 | workers->atomic_start_pending = 1; | ||
595 | goto fallback; | ||
596 | } else { | ||
597 | workers->num_workers_starting++; | ||
598 | spin_unlock_irqrestore(&workers->lock, flags); | ||
599 | /* we're below the limit, start another worker */ | ||
600 | ret = __btrfs_start_workers(workers); | ||
601 | spin_lock_irqsave(&workers->lock, flags); | ||
602 | if (ret) | ||
603 | goto fallback; | ||
604 | goto again; | ||
605 | } | ||
606 | } | ||
607 | goto found; | ||
608 | |||
609 | fallback: | ||
610 | fallback = NULL; | ||
611 | /* | ||
612 | * we have failed to find any workers, just | ||
613 | * return the first one we can find. | ||
614 | */ | ||
615 | if (!list_empty(&workers->worker_list)) | ||
616 | fallback = workers->worker_list.next; | ||
617 | if (!list_empty(&workers->idle_list)) | ||
618 | fallback = workers->idle_list.next; | ||
619 | BUG_ON(!fallback); | ||
620 | worker = list_entry(fallback, | ||
621 | struct btrfs_worker_thread, worker_list); | ||
622 | found: | ||
623 | /* | ||
624 | * this makes sure the worker doesn't exit before it is placed | ||
625 | * onto a busy/idle list | ||
626 | */ | ||
627 | atomic_inc(&worker->num_pending); | ||
628 | spin_unlock_irqrestore(&workers->lock, flags); | ||
629 | return worker; | ||
630 | } | 312 | } |
631 | 313 | ||
632 | /* | 314 | void btrfs_destroy_workqueue(struct btrfs_workqueue *wq) |
633 | * btrfs_requeue_work just puts the work item back on the tail of the list | ||
634 | * it was taken from. It is intended for use with long running work functions | ||
635 | * that make some progress and want to give the cpu up for others. | ||
636 | */ | ||
637 | void btrfs_requeue_work(struct btrfs_work *work) | ||
638 | { | 315 | { |
639 | struct btrfs_worker_thread *worker = work->worker; | 316 | if (!wq) |
640 | unsigned long flags; | ||
641 | int wake = 0; | ||
642 | |||
643 | if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) | ||
644 | return; | 317 | return; |
645 | 318 | if (wq->high) | |
646 | spin_lock_irqsave(&worker->lock, flags); | 319 | __btrfs_destroy_workqueue(wq->high); |
647 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) | 320 | __btrfs_destroy_workqueue(wq->normal); |
648 | list_add_tail(&work->list, &worker->prio_pending); | 321 | kfree(wq); |
649 | else | ||
650 | list_add_tail(&work->list, &worker->pending); | ||
651 | atomic_inc(&worker->num_pending); | ||
652 | |||
653 | /* by definition we're busy, take ourselves off the idle | ||
654 | * list | ||
655 | */ | ||
656 | if (worker->idle) { | ||
657 | spin_lock(&worker->workers->lock); | ||
658 | worker->idle = 0; | ||
659 | list_move_tail(&worker->worker_list, | ||
660 | &worker->workers->worker_list); | ||
661 | spin_unlock(&worker->workers->lock); | ||
662 | } | ||
663 | if (!worker->working) { | ||
664 | wake = 1; | ||
665 | worker->working = 1; | ||
666 | } | ||
667 | |||
668 | if (wake) | ||
669 | wake_up_process(worker->task); | ||
670 | spin_unlock_irqrestore(&worker->lock, flags); | ||
671 | } | 322 | } |
672 | 323 | ||
673 | void btrfs_set_work_high_prio(struct btrfs_work *work) | 324 | void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max) |
674 | { | 325 | { |
675 | set_bit(WORK_HIGH_PRIO_BIT, &work->flags); | 326 | wq->normal->max_active = max; |
327 | if (wq->high) | ||
328 | wq->high->max_active = max; | ||
676 | } | 329 | } |
677 | 330 | ||
678 | /* | 331 | void btrfs_set_work_high_priority(struct btrfs_work *work) |
679 | * places a struct btrfs_work into the pending queue of one of the kthreads | ||
680 | */ | ||
681 | void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | ||
682 | { | 332 | { |
683 | struct btrfs_worker_thread *worker; | 333 | set_bit(WORK_HIGH_PRIO_BIT, &work->flags); |
684 | unsigned long flags; | ||
685 | int wake = 0; | ||
686 | |||
687 | /* don't requeue something already on a list */ | ||
688 | if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) | ||
689 | return; | ||
690 | |||
691 | worker = find_worker(workers); | ||
692 | if (workers->ordered) { | ||
693 | /* | ||
694 | * you're not allowed to do ordered queues from an | ||
695 | * interrupt handler | ||
696 | */ | ||
697 | spin_lock(&workers->order_lock); | ||
698 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { | ||
699 | list_add_tail(&work->order_list, | ||
700 | &workers->prio_order_list); | ||
701 | } else { | ||
702 | list_add_tail(&work->order_list, &workers->order_list); | ||
703 | } | ||
704 | spin_unlock(&workers->order_lock); | ||
705 | } else { | ||
706 | INIT_LIST_HEAD(&work->order_list); | ||
707 | } | ||
708 | |||
709 | spin_lock_irqsave(&worker->lock, flags); | ||
710 | |||
711 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) | ||
712 | list_add_tail(&work->list, &worker->prio_pending); | ||
713 | else | ||
714 | list_add_tail(&work->list, &worker->pending); | ||
715 | check_busy_worker(worker); | ||
716 | |||
717 | /* | ||
718 | * avoid calling into wake_up_process if this thread has already | ||
719 | * been kicked | ||
720 | */ | ||
721 | if (!worker->working) | ||
722 | wake = 1; | ||
723 | worker->working = 1; | ||
724 | |||
725 | if (wake) | ||
726 | wake_up_process(worker->task); | ||
727 | spin_unlock_irqrestore(&worker->lock, flags); | ||
728 | } | 334 | } |
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 1f26792683ed..9c6b66d15fb0 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h | |||
@@ -1,5 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007 Oracle. All rights reserved. | 2 | * Copyright (C) 2007 Oracle. All rights reserved. |
3 | * Copyright (C) 2014 Fujitsu. All rights reserved. | ||
3 | * | 4 | * |
4 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU General Public | 6 | * modify it under the terms of the GNU General Public |
@@ -19,103 +20,35 @@ | |||
19 | #ifndef __BTRFS_ASYNC_THREAD_ | 20 | #ifndef __BTRFS_ASYNC_THREAD_ |
20 | #define __BTRFS_ASYNC_THREAD_ | 21 | #define __BTRFS_ASYNC_THREAD_ |
21 | 22 | ||
22 | struct btrfs_worker_thread; | 23 | struct btrfs_workqueue; |
24 | /* Internal use only */ | ||
25 | struct __btrfs_workqueue; | ||
26 | struct btrfs_work; | ||
27 | typedef void (*btrfs_func_t)(struct btrfs_work *arg); | ||
23 | 28 | ||
24 | /* | ||
25 | * This is similar to a workqueue, but it is meant to spread the operations | ||
26 | * across all available cpus instead of just the CPU that was used to | ||
27 | * queue the work. There is also some batching introduced to try and | ||
28 | * cut down on context switches. | ||
29 | * | ||
30 | * By default threads are added on demand up to 2 * the number of cpus. | ||
31 | * Changing struct btrfs_workers->max_workers is one way to prevent | ||
32 | * demand creation of kthreads. | ||
33 | * | ||
34 | * the basic model of these worker threads is to embed a btrfs_work | ||
35 | * structure in your own data struct, and use container_of in a | ||
36 | * work function to get back to your data struct. | ||
37 | */ | ||
38 | struct btrfs_work { | 29 | struct btrfs_work { |
39 | /* | 30 | btrfs_func_t func; |
40 | * func should be set to the function you want called | 31 | btrfs_func_t ordered_func; |
41 | * your work struct is passed as the only arg | 32 | btrfs_func_t ordered_free; |
42 | * | 33 | |
43 | * ordered_func must be set for work sent to an ordered work queue, | 34 | /* Don't touch things below */ |
44 | * and it is called to complete a given work item in the same | 35 | struct work_struct normal_work; |
45 | * order they were sent to the queue. | 36 | struct list_head ordered_list; |
46 | */ | 37 | struct __btrfs_workqueue *wq; |
47 | void (*func)(struct btrfs_work *work); | ||
48 | void (*ordered_func)(struct btrfs_work *work); | ||
49 | void (*ordered_free)(struct btrfs_work *work); | ||
50 | |||
51 | /* | ||
52 | * flags should be set to zero. It is used to make sure the | ||
53 | * struct is only inserted once into the list. | ||
54 | */ | ||
55 | unsigned long flags; | 38 | unsigned long flags; |
56 | |||
57 | /* don't touch these */ | ||
58 | struct btrfs_worker_thread *worker; | ||
59 | struct list_head list; | ||
60 | struct list_head order_list; | ||
61 | }; | ||
62 | |||
63 | struct btrfs_workers { | ||
64 | /* current number of running workers */ | ||
65 | int num_workers; | ||
66 | |||
67 | int num_workers_starting; | ||
68 | |||
69 | /* max number of workers allowed. changed by btrfs_start_workers */ | ||
70 | int max_workers; | ||
71 | |||
72 | /* once a worker has this many requests or fewer, it is idle */ | ||
73 | int idle_thresh; | ||
74 | |||
75 | /* force completions in the order they were queued */ | ||
76 | int ordered; | ||
77 | |||
78 | /* more workers required, but in an interrupt handler */ | ||
79 | int atomic_start_pending; | ||
80 | |||
81 | /* | ||
82 | * are we allowed to sleep while starting workers or are we required | ||
83 | * to start them at a later time? If we can't sleep, this indicates | ||
84 | * which queue we need to use to schedule thread creation. | ||
85 | */ | ||
86 | struct btrfs_workers *atomic_worker_start; | ||
87 | |||
88 | /* list with all the work threads. The workers on the idle thread | ||
89 | * may be actively servicing jobs, but they haven't yet hit the | ||
90 | * idle thresh limit above. | ||
91 | */ | ||
92 | struct list_head worker_list; | ||
93 | struct list_head idle_list; | ||
94 | |||
95 | /* | ||
96 | * when operating in ordered mode, this maintains the list | ||
97 | * of work items waiting for completion | ||
98 | */ | ||
99 | struct list_head order_list; | ||
100 | struct list_head prio_order_list; | ||
101 | |||
102 | /* lock for finding the next worker thread to queue on */ | ||
103 | spinlock_t lock; | ||
104 | |||
105 | /* lock for the ordered lists */ | ||
106 | spinlock_t order_lock; | ||
107 | |||
108 | /* extra name for this worker, used for current->name */ | ||
109 | char *name; | ||
110 | |||
111 | int stopping; | ||
112 | }; | 39 | }; |
113 | 40 | ||
114 | void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); | 41 | struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, |
115 | int btrfs_start_workers(struct btrfs_workers *workers); | 42 | int flags, |
116 | void btrfs_stop_workers(struct btrfs_workers *workers); | 43 | int max_active, |
117 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, | 44 | int thresh); |
118 | struct btrfs_workers *async_starter); | 45 | void btrfs_init_work(struct btrfs_work *work, |
119 | void btrfs_requeue_work(struct btrfs_work *work); | 46 | btrfs_func_t func, |
120 | void btrfs_set_work_high_prio(struct btrfs_work *work); | 47 | btrfs_func_t ordered_func, |
48 | btrfs_func_t ordered_free); | ||
49 | void btrfs_queue_work(struct btrfs_workqueue *wq, | ||
50 | struct btrfs_work *work); | ||
51 | void btrfs_destroy_workqueue(struct btrfs_workqueue *wq); | ||
52 | void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max); | ||
53 | void btrfs_set_work_high_priority(struct btrfs_work *work); | ||
121 | #endif | 54 | #endif |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index aded3ef3d3d4..aad7201ad11b 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -220,7 +220,8 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, | |||
220 | 220 | ||
221 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | 221 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, |
222 | struct ulist *parents, struct __prelim_ref *ref, | 222 | struct ulist *parents, struct __prelim_ref *ref, |
223 | int level, u64 time_seq, const u64 *extent_item_pos) | 223 | int level, u64 time_seq, const u64 *extent_item_pos, |
224 | u64 total_refs) | ||
224 | { | 225 | { |
225 | int ret = 0; | 226 | int ret = 0; |
226 | int slot; | 227 | int slot; |
@@ -249,7 +250,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | |||
249 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) | 250 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) |
250 | ret = btrfs_next_old_leaf(root, path, time_seq); | 251 | ret = btrfs_next_old_leaf(root, path, time_seq); |
251 | 252 | ||
252 | while (!ret && count < ref->count) { | 253 | while (!ret && count < total_refs) { |
253 | eb = path->nodes[0]; | 254 | eb = path->nodes[0]; |
254 | slot = path->slots[0]; | 255 | slot = path->slots[0]; |
255 | 256 | ||
@@ -306,7 +307,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
306 | struct btrfs_path *path, u64 time_seq, | 307 | struct btrfs_path *path, u64 time_seq, |
307 | struct __prelim_ref *ref, | 308 | struct __prelim_ref *ref, |
308 | struct ulist *parents, | 309 | struct ulist *parents, |
309 | const u64 *extent_item_pos) | 310 | const u64 *extent_item_pos, u64 total_refs) |
310 | { | 311 | { |
311 | struct btrfs_root *root; | 312 | struct btrfs_root *root; |
312 | struct btrfs_key root_key; | 313 | struct btrfs_key root_key; |
@@ -361,7 +362,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
361 | } | 362 | } |
362 | 363 | ||
363 | ret = add_all_parents(root, path, parents, ref, level, time_seq, | 364 | ret = add_all_parents(root, path, parents, ref, level, time_seq, |
364 | extent_item_pos); | 365 | extent_item_pos, total_refs); |
365 | out: | 366 | out: |
366 | path->lowest_level = 0; | 367 | path->lowest_level = 0; |
367 | btrfs_release_path(path); | 368 | btrfs_release_path(path); |
@@ -374,7 +375,7 @@ out: | |||
374 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | 375 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, |
375 | struct btrfs_path *path, u64 time_seq, | 376 | struct btrfs_path *path, u64 time_seq, |
376 | struct list_head *head, | 377 | struct list_head *head, |
377 | const u64 *extent_item_pos) | 378 | const u64 *extent_item_pos, u64 total_refs) |
378 | { | 379 | { |
379 | int err; | 380 | int err; |
380 | int ret = 0; | 381 | int ret = 0; |
@@ -400,7 +401,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
400 | if (ref->count == 0) | 401 | if (ref->count == 0) |
401 | continue; | 402 | continue; |
402 | err = __resolve_indirect_ref(fs_info, path, time_seq, ref, | 403 | err = __resolve_indirect_ref(fs_info, path, time_seq, ref, |
403 | parents, extent_item_pos); | 404 | parents, extent_item_pos, |
405 | total_refs); | ||
404 | /* | 406 | /* |
405 | * we can only tolerate ENOENT,otherwise,we should catch error | 407 | * we can only tolerate ENOENT,otherwise,we should catch error |
406 | * and return directly. | 408 | * and return directly. |
@@ -557,7 +559,7 @@ static void __merge_refs(struct list_head *head, int mode) | |||
557 | * smaller or equal that seq to the list | 559 | * smaller or equal that seq to the list |
558 | */ | 560 | */ |
559 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | 561 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, |
560 | struct list_head *prefs) | 562 | struct list_head *prefs, u64 *total_refs) |
561 | { | 563 | { |
562 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; | 564 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; |
563 | struct rb_node *n = &head->node.rb_node; | 565 | struct rb_node *n = &head->node.rb_node; |
@@ -593,6 +595,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
593 | default: | 595 | default: |
594 | BUG_ON(1); | 596 | BUG_ON(1); |
595 | } | 597 | } |
598 | *total_refs += (node->ref_mod * sgn); | ||
596 | switch (node->type) { | 599 | switch (node->type) { |
597 | case BTRFS_TREE_BLOCK_REF_KEY: { | 600 | case BTRFS_TREE_BLOCK_REF_KEY: { |
598 | struct btrfs_delayed_tree_ref *ref; | 601 | struct btrfs_delayed_tree_ref *ref; |
@@ -653,7 +656,8 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
653 | */ | 656 | */ |
654 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, | 657 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, |
655 | struct btrfs_path *path, u64 bytenr, | 658 | struct btrfs_path *path, u64 bytenr, |
656 | int *info_level, struct list_head *prefs) | 659 | int *info_level, struct list_head *prefs, |
660 | u64 *total_refs) | ||
657 | { | 661 | { |
658 | int ret = 0; | 662 | int ret = 0; |
659 | int slot; | 663 | int slot; |
@@ -677,6 +681,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
677 | 681 | ||
678 | ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); | 682 | ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); |
679 | flags = btrfs_extent_flags(leaf, ei); | 683 | flags = btrfs_extent_flags(leaf, ei); |
684 | *total_refs += btrfs_extent_refs(leaf, ei); | ||
680 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | 685 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
681 | 686 | ||
682 | ptr = (unsigned long)(ei + 1); | 687 | ptr = (unsigned long)(ei + 1); |
@@ -859,6 +864,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, | |||
859 | struct list_head prefs; | 864 | struct list_head prefs; |
860 | struct __prelim_ref *ref; | 865 | struct __prelim_ref *ref; |
861 | struct extent_inode_elem *eie = NULL; | 866 | struct extent_inode_elem *eie = NULL; |
867 | u64 total_refs = 0; | ||
862 | 868 | ||
863 | INIT_LIST_HEAD(&prefs); | 869 | INIT_LIST_HEAD(&prefs); |
864 | INIT_LIST_HEAD(&prefs_delayed); | 870 | INIT_LIST_HEAD(&prefs_delayed); |
@@ -873,8 +879,10 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, | |||
873 | path = btrfs_alloc_path(); | 879 | path = btrfs_alloc_path(); |
874 | if (!path) | 880 | if (!path) |
875 | return -ENOMEM; | 881 | return -ENOMEM; |
876 | if (!trans) | 882 | if (!trans) { |
877 | path->search_commit_root = 1; | 883 | path->search_commit_root = 1; |
884 | path->skip_locking = 1; | ||
885 | } | ||
878 | 886 | ||
879 | /* | 887 | /* |
880 | * grab both a lock on the path and a lock on the delayed ref head. | 888 | * grab both a lock on the path and a lock on the delayed ref head. |
@@ -915,7 +923,7 @@ again: | |||
915 | } | 923 | } |
916 | spin_unlock(&delayed_refs->lock); | 924 | spin_unlock(&delayed_refs->lock); |
917 | ret = __add_delayed_refs(head, time_seq, | 925 | ret = __add_delayed_refs(head, time_seq, |
918 | &prefs_delayed); | 926 | &prefs_delayed, &total_refs); |
919 | mutex_unlock(&head->mutex); | 927 | mutex_unlock(&head->mutex); |
920 | if (ret) | 928 | if (ret) |
921 | goto out; | 929 | goto out; |
@@ -936,7 +944,8 @@ again: | |||
936 | (key.type == BTRFS_EXTENT_ITEM_KEY || | 944 | (key.type == BTRFS_EXTENT_ITEM_KEY || |
937 | key.type == BTRFS_METADATA_ITEM_KEY)) { | 945 | key.type == BTRFS_METADATA_ITEM_KEY)) { |
938 | ret = __add_inline_refs(fs_info, path, bytenr, | 946 | ret = __add_inline_refs(fs_info, path, bytenr, |
939 | &info_level, &prefs); | 947 | &info_level, &prefs, |
948 | &total_refs); | ||
940 | if (ret) | 949 | if (ret) |
941 | goto out; | 950 | goto out; |
942 | ret = __add_keyed_refs(fs_info, path, bytenr, | 951 | ret = __add_keyed_refs(fs_info, path, bytenr, |
@@ -956,7 +965,7 @@ again: | |||
956 | __merge_refs(&prefs, 1); | 965 | __merge_refs(&prefs, 1); |
957 | 966 | ||
958 | ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, | 967 | ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, |
959 | extent_item_pos); | 968 | extent_item_pos, total_refs); |
960 | if (ret) | 969 | if (ret) |
961 | goto out; | 970 | goto out; |
962 | 971 | ||
@@ -965,7 +974,7 @@ again: | |||
965 | while (!list_empty(&prefs)) { | 974 | while (!list_empty(&prefs)) { |
966 | ref = list_first_entry(&prefs, struct __prelim_ref, list); | 975 | ref = list_first_entry(&prefs, struct __prelim_ref, list); |
967 | WARN_ON(ref->count < 0); | 976 | WARN_ON(ref->count < 0); |
968 | if (ref->count && ref->root_id && ref->parent == 0) { | 977 | if (roots && ref->count && ref->root_id && ref->parent == 0) { |
969 | /* no parent == root of tree */ | 978 | /* no parent == root of tree */ |
970 | ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); | 979 | ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); |
971 | if (ret < 0) | 980 | if (ret < 0) |
@@ -1061,22 +1070,14 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
1061 | u64 time_seq, struct ulist **leafs, | 1070 | u64 time_seq, struct ulist **leafs, |
1062 | const u64 *extent_item_pos) | 1071 | const u64 *extent_item_pos) |
1063 | { | 1072 | { |
1064 | struct ulist *tmp; | ||
1065 | int ret; | 1073 | int ret; |
1066 | 1074 | ||
1067 | tmp = ulist_alloc(GFP_NOFS); | ||
1068 | if (!tmp) | ||
1069 | return -ENOMEM; | ||
1070 | *leafs = ulist_alloc(GFP_NOFS); | 1075 | *leafs = ulist_alloc(GFP_NOFS); |
1071 | if (!*leafs) { | 1076 | if (!*leafs) |
1072 | ulist_free(tmp); | ||
1073 | return -ENOMEM; | 1077 | return -ENOMEM; |
1074 | } | ||
1075 | 1078 | ||
1076 | ret = find_parent_nodes(trans, fs_info, bytenr, | 1079 | ret = find_parent_nodes(trans, fs_info, bytenr, |
1077 | time_seq, *leafs, tmp, extent_item_pos); | 1080 | time_seq, *leafs, NULL, extent_item_pos); |
1078 | ulist_free(tmp); | ||
1079 | |||
1080 | if (ret < 0 && ret != -ENOENT) { | 1081 | if (ret < 0 && ret != -ENOENT) { |
1081 | free_leaf_list(*leafs); | 1082 | free_leaf_list(*leafs); |
1082 | return ret; | 1083 | return ret; |
@@ -1333,38 +1334,13 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | |||
1333 | if (ret < 0) | 1334 | if (ret < 0) |
1334 | return ret; | 1335 | return ret; |
1335 | 1336 | ||
1336 | while (1) { | 1337 | ret = btrfs_previous_extent_item(fs_info->extent_root, path, 0); |
1337 | u32 nritems; | 1338 | if (ret) { |
1338 | if (path->slots[0] == 0) { | 1339 | if (ret > 0) |
1339 | btrfs_set_path_blocking(path); | 1340 | ret = -ENOENT; |
1340 | ret = btrfs_prev_leaf(fs_info->extent_root, path); | 1341 | return ret; |
1341 | if (ret != 0) { | ||
1342 | if (ret > 0) { | ||
1343 | pr_debug("logical %llu is not within " | ||
1344 | "any extent\n", logical); | ||
1345 | ret = -ENOENT; | ||
1346 | } | ||
1347 | return ret; | ||
1348 | } | ||
1349 | } else { | ||
1350 | path->slots[0]--; | ||
1351 | } | ||
1352 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
1353 | if (nritems == 0) { | ||
1354 | pr_debug("logical %llu is not within any extent\n", | ||
1355 | logical); | ||
1356 | return -ENOENT; | ||
1357 | } | ||
1358 | if (path->slots[0] == nritems) | ||
1359 | path->slots[0]--; | ||
1360 | |||
1361 | btrfs_item_key_to_cpu(path->nodes[0], found_key, | ||
1362 | path->slots[0]); | ||
1363 | if (found_key->type == BTRFS_EXTENT_ITEM_KEY || | ||
1364 | found_key->type == BTRFS_METADATA_ITEM_KEY) | ||
1365 | break; | ||
1366 | } | 1342 | } |
1367 | 1343 | btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); | |
1368 | if (found_key->type == BTRFS_METADATA_ITEM_KEY) | 1344 | if (found_key->type == BTRFS_METADATA_ITEM_KEY) |
1369 | size = fs_info->extent_root->leafsize; | 1345 | size = fs_info->extent_root->leafsize; |
1370 | else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) | 1346 | else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 8fed2125689e..c9a24444ec9a 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -109,14 +109,17 @@ struct btrfs_inode { | |||
109 | u64 last_trans; | 109 | u64 last_trans; |
110 | 110 | ||
111 | /* | 111 | /* |
112 | * log transid when this inode was last modified | 112 | * transid that last logged this inode |
113 | */ | 113 | */ |
114 | u64 last_sub_trans; | 114 | u64 logged_trans; |
115 | 115 | ||
116 | /* | 116 | /* |
117 | * transid that last logged this inode | 117 | * log transid when this inode was last modified |
118 | */ | 118 | */ |
119 | u64 logged_trans; | 119 | int last_sub_trans; |
120 | |||
121 | /* a local copy of root's last_log_commit */ | ||
122 | int last_log_commit; | ||
120 | 123 | ||
121 | /* total number of bytes pending delalloc, used by stat to calc the | 124 | /* total number of bytes pending delalloc, used by stat to calc the |
122 | * real block usage of the file | 125 | * real block usage of the file |
@@ -155,9 +158,6 @@ struct btrfs_inode { | |||
155 | /* flags field from the on disk inode */ | 158 | /* flags field from the on disk inode */ |
156 | u32 flags; | 159 | u32 flags; |
157 | 160 | ||
158 | /* a local copy of root's last_log_commit */ | ||
159 | unsigned long last_log_commit; | ||
160 | |||
161 | /* | 161 | /* |
162 | * Counters to keep track of the number of extent item's we may use due | 162 | * Counters to keep track of the number of extent item's we may use due |
163 | * to delalloc and such. outstanding_extents is the number of extent | 163 | * to delalloc and such. outstanding_extents is the number of extent |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b01fb6c527e3..d43c544d3b68 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -472,7 +472,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
472 | rcu_read_lock(); | 472 | rcu_read_lock(); |
473 | page = radix_tree_lookup(&mapping->page_tree, pg_index); | 473 | page = radix_tree_lookup(&mapping->page_tree, pg_index); |
474 | rcu_read_unlock(); | 474 | rcu_read_unlock(); |
475 | if (page) { | 475 | if (page && !radix_tree_exceptional_entry(page)) { |
476 | misses++; | 476 | misses++; |
477 | if (misses > 4) | 477 | if (misses > 4) |
478 | break; | 478 | break; |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index cbd3a7d6fa68..88d1b1eedc9c 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -5376,6 +5376,8 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
5376 | int advance_right; | 5376 | int advance_right; |
5377 | u64 left_blockptr; | 5377 | u64 left_blockptr; |
5378 | u64 right_blockptr; | 5378 | u64 right_blockptr; |
5379 | u64 left_gen; | ||
5380 | u64 right_gen; | ||
5379 | u64 left_start_ctransid; | 5381 | u64 left_start_ctransid; |
5380 | u64 right_start_ctransid; | 5382 | u64 right_start_ctransid; |
5381 | u64 ctransid; | 5383 | u64 ctransid; |
@@ -5640,7 +5642,14 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
5640 | right_blockptr = btrfs_node_blockptr( | 5642 | right_blockptr = btrfs_node_blockptr( |
5641 | right_path->nodes[right_level], | 5643 | right_path->nodes[right_level], |
5642 | right_path->slots[right_level]); | 5644 | right_path->slots[right_level]); |
5643 | if (left_blockptr == right_blockptr) { | 5645 | left_gen = btrfs_node_ptr_generation( |
5646 | left_path->nodes[left_level], | ||
5647 | left_path->slots[left_level]); | ||
5648 | right_gen = btrfs_node_ptr_generation( | ||
5649 | right_path->nodes[right_level], | ||
5650 | right_path->slots[right_level]); | ||
5651 | if (left_blockptr == right_blockptr && | ||
5652 | left_gen == right_gen) { | ||
5644 | /* | 5653 | /* |
5645 | * As we're on a shared block, don't | 5654 | * As we're on a shared block, don't |
5646 | * allow to go deeper. | 5655 | * allow to go deeper. |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2c1a42ca519f..bc96c03dd259 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -351,6 +351,7 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) | |||
351 | #define BTRFS_FS_STATE_ERROR 0 | 351 | #define BTRFS_FS_STATE_ERROR 0 |
352 | #define BTRFS_FS_STATE_REMOUNTING 1 | 352 | #define BTRFS_FS_STATE_REMOUNTING 1 |
353 | #define BTRFS_FS_STATE_TRANS_ABORTED 2 | 353 | #define BTRFS_FS_STATE_TRANS_ABORTED 2 |
354 | #define BTRFS_FS_STATE_DEV_REPLACING 3 | ||
354 | 355 | ||
355 | /* Super block flags */ | 356 | /* Super block flags */ |
356 | /* Errors detected */ | 357 | /* Errors detected */ |
@@ -1489,6 +1490,7 @@ struct btrfs_fs_info { | |||
1489 | */ | 1490 | */ |
1490 | struct list_head ordered_roots; | 1491 | struct list_head ordered_roots; |
1491 | 1492 | ||
1493 | struct mutex delalloc_root_mutex; | ||
1492 | spinlock_t delalloc_root_lock; | 1494 | spinlock_t delalloc_root_lock; |
1493 | /* all fs/file tree roots that have delalloc inodes. */ | 1495 | /* all fs/file tree roots that have delalloc inodes. */ |
1494 | struct list_head delalloc_roots; | 1496 | struct list_head delalloc_roots; |
@@ -1503,28 +1505,27 @@ struct btrfs_fs_info { | |||
1503 | * A third pool does submit_bio to avoid deadlocking with the other | 1505 | * A third pool does submit_bio to avoid deadlocking with the other |
1504 | * two | 1506 | * two |
1505 | */ | 1507 | */ |
1506 | struct btrfs_workers generic_worker; | 1508 | struct btrfs_workqueue *workers; |
1507 | struct btrfs_workers workers; | 1509 | struct btrfs_workqueue *delalloc_workers; |
1508 | struct btrfs_workers delalloc_workers; | 1510 | struct btrfs_workqueue *flush_workers; |
1509 | struct btrfs_workers flush_workers; | 1511 | struct btrfs_workqueue *endio_workers; |
1510 | struct btrfs_workers endio_workers; | 1512 | struct btrfs_workqueue *endio_meta_workers; |
1511 | struct btrfs_workers endio_meta_workers; | 1513 | struct btrfs_workqueue *endio_raid56_workers; |
1512 | struct btrfs_workers endio_raid56_workers; | 1514 | struct btrfs_workqueue *rmw_workers; |
1513 | struct btrfs_workers rmw_workers; | 1515 | struct btrfs_workqueue *endio_meta_write_workers; |
1514 | struct btrfs_workers endio_meta_write_workers; | 1516 | struct btrfs_workqueue *endio_write_workers; |
1515 | struct btrfs_workers endio_write_workers; | 1517 | struct btrfs_workqueue *endio_freespace_worker; |
1516 | struct btrfs_workers endio_freespace_worker; | 1518 | struct btrfs_workqueue *submit_workers; |
1517 | struct btrfs_workers submit_workers; | 1519 | struct btrfs_workqueue *caching_workers; |
1518 | struct btrfs_workers caching_workers; | 1520 | struct btrfs_workqueue *readahead_workers; |
1519 | struct btrfs_workers readahead_workers; | ||
1520 | 1521 | ||
1521 | /* | 1522 | /* |
1522 | * fixup workers take dirty pages that didn't properly go through | 1523 | * fixup workers take dirty pages that didn't properly go through |
1523 | * the cow mechanism and make them safe to write. It happens | 1524 | * the cow mechanism and make them safe to write. It happens |
1524 | * for the sys_munmap function call path | 1525 | * for the sys_munmap function call path |
1525 | */ | 1526 | */ |
1526 | struct btrfs_workers fixup_workers; | 1527 | struct btrfs_workqueue *fixup_workers; |
1527 | struct btrfs_workers delayed_workers; | 1528 | struct btrfs_workqueue *delayed_workers; |
1528 | struct task_struct *transaction_kthread; | 1529 | struct task_struct *transaction_kthread; |
1529 | struct task_struct *cleaner_kthread; | 1530 | struct task_struct *cleaner_kthread; |
1530 | int thread_pool_size; | 1531 | int thread_pool_size; |
@@ -1604,9 +1605,9 @@ struct btrfs_fs_info { | |||
1604 | atomic_t scrub_cancel_req; | 1605 | atomic_t scrub_cancel_req; |
1605 | wait_queue_head_t scrub_pause_wait; | 1606 | wait_queue_head_t scrub_pause_wait; |
1606 | int scrub_workers_refcnt; | 1607 | int scrub_workers_refcnt; |
1607 | struct btrfs_workers scrub_workers; | 1608 | struct btrfs_workqueue *scrub_workers; |
1608 | struct btrfs_workers scrub_wr_completion_workers; | 1609 | struct btrfs_workqueue *scrub_wr_completion_workers; |
1609 | struct btrfs_workers scrub_nocow_workers; | 1610 | struct btrfs_workqueue *scrub_nocow_workers; |
1610 | 1611 | ||
1611 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 1612 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
1612 | u32 check_integrity_print_mask; | 1613 | u32 check_integrity_print_mask; |
@@ -1647,7 +1648,7 @@ struct btrfs_fs_info { | |||
1647 | /* qgroup rescan items */ | 1648 | /* qgroup rescan items */ |
1648 | struct mutex qgroup_rescan_lock; /* protects the progress item */ | 1649 | struct mutex qgroup_rescan_lock; /* protects the progress item */ |
1649 | struct btrfs_key qgroup_rescan_progress; | 1650 | struct btrfs_key qgroup_rescan_progress; |
1650 | struct btrfs_workers qgroup_rescan_workers; | 1651 | struct btrfs_workqueue *qgroup_rescan_workers; |
1651 | struct completion qgroup_rescan_completion; | 1652 | struct completion qgroup_rescan_completion; |
1652 | struct btrfs_work qgroup_rescan_work; | 1653 | struct btrfs_work qgroup_rescan_work; |
1653 | 1654 | ||
@@ -1674,10 +1675,18 @@ struct btrfs_fs_info { | |||
1674 | 1675 | ||
1675 | atomic_t mutually_exclusive_operation_running; | 1676 | atomic_t mutually_exclusive_operation_running; |
1676 | 1677 | ||
1678 | struct percpu_counter bio_counter; | ||
1679 | wait_queue_head_t replace_wait; | ||
1680 | |||
1677 | struct semaphore uuid_tree_rescan_sem; | 1681 | struct semaphore uuid_tree_rescan_sem; |
1678 | unsigned int update_uuid_tree_gen:1; | 1682 | unsigned int update_uuid_tree_gen:1; |
1679 | }; | 1683 | }; |
1680 | 1684 | ||
1685 | struct btrfs_subvolume_writers { | ||
1686 | struct percpu_counter counter; | ||
1687 | wait_queue_head_t wait; | ||
1688 | }; | ||
1689 | |||
1681 | /* | 1690 | /* |
1682 | * in ram representation of the tree. extent_root is used for all allocations | 1691 | * in ram representation of the tree. extent_root is used for all allocations |
1683 | * and for the extent tree extent_root root. | 1692 | * and for the extent tree extent_root root. |
@@ -1714,11 +1723,15 @@ struct btrfs_root { | |||
1714 | struct mutex log_mutex; | 1723 | struct mutex log_mutex; |
1715 | wait_queue_head_t log_writer_wait; | 1724 | wait_queue_head_t log_writer_wait; |
1716 | wait_queue_head_t log_commit_wait[2]; | 1725 | wait_queue_head_t log_commit_wait[2]; |
1726 | struct list_head log_ctxs[2]; | ||
1717 | atomic_t log_writers; | 1727 | atomic_t log_writers; |
1718 | atomic_t log_commit[2]; | 1728 | atomic_t log_commit[2]; |
1719 | atomic_t log_batch; | 1729 | atomic_t log_batch; |
1720 | unsigned long log_transid; | 1730 | int log_transid; |
1721 | unsigned long last_log_commit; | 1731 | /* No matter the commit succeeds or not*/ |
1732 | int log_transid_committed; | ||
1733 | /* Just be updated when the commit succeeds. */ | ||
1734 | int last_log_commit; | ||
1722 | pid_t log_start_pid; | 1735 | pid_t log_start_pid; |
1723 | bool log_multiple_pids; | 1736 | bool log_multiple_pids; |
1724 | 1737 | ||
@@ -1793,6 +1806,7 @@ struct btrfs_root { | |||
1793 | spinlock_t root_item_lock; | 1806 | spinlock_t root_item_lock; |
1794 | atomic_t refs; | 1807 | atomic_t refs; |
1795 | 1808 | ||
1809 | struct mutex delalloc_mutex; | ||
1796 | spinlock_t delalloc_lock; | 1810 | spinlock_t delalloc_lock; |
1797 | /* | 1811 | /* |
1798 | * all of the inodes that have delalloc bytes. It is possible for | 1812 | * all of the inodes that have delalloc bytes. It is possible for |
@@ -1802,6 +1816,8 @@ struct btrfs_root { | |||
1802 | struct list_head delalloc_inodes; | 1816 | struct list_head delalloc_inodes; |
1803 | struct list_head delalloc_root; | 1817 | struct list_head delalloc_root; |
1804 | u64 nr_delalloc_inodes; | 1818 | u64 nr_delalloc_inodes; |
1819 | |||
1820 | struct mutex ordered_extent_mutex; | ||
1805 | /* | 1821 | /* |
1806 | * this is used by the balancing code to wait for all the pending | 1822 | * this is used by the balancing code to wait for all the pending |
1807 | * ordered extents | 1823 | * ordered extents |
@@ -1822,6 +1838,8 @@ struct btrfs_root { | |||
1822 | * manipulation with the read-only status via SUBVOL_SETFLAGS | 1838 | * manipulation with the read-only status via SUBVOL_SETFLAGS |
1823 | */ | 1839 | */ |
1824 | int send_in_progress; | 1840 | int send_in_progress; |
1841 | struct btrfs_subvolume_writers *subv_writers; | ||
1842 | atomic_t will_be_snapshoted; | ||
1825 | }; | 1843 | }; |
1826 | 1844 | ||
1827 | struct btrfs_ioctl_defrag_range_args { | 1845 | struct btrfs_ioctl_defrag_range_args { |
@@ -3346,6 +3364,9 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info); | |||
3346 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | 3364 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, |
3347 | struct btrfs_fs_info *fs_info); | 3365 | struct btrfs_fs_info *fs_info); |
3348 | int __get_raid_index(u64 flags); | 3366 | int __get_raid_index(u64 flags); |
3367 | |||
3368 | int btrfs_start_nocow_write(struct btrfs_root *root); | ||
3369 | void btrfs_end_nocow_write(struct btrfs_root *root); | ||
3349 | /* ctree.c */ | 3370 | /* ctree.c */ |
3350 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 3371 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
3351 | int level, int *slot); | 3372 | int level, int *slot); |
@@ -3723,7 +3744,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
3723 | u32 min_type); | 3744 | u32 min_type); |
3724 | 3745 | ||
3725 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); | 3746 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); |
3726 | int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput); | 3747 | int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput, |
3748 | int nr); | ||
3727 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, | 3749 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, |
3728 | struct extent_state **cached_state); | 3750 | struct extent_state **cached_state); |
3729 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 3751 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
@@ -4005,6 +4027,11 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info, | |||
4005 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, | 4027 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, |
4006 | struct btrfs_scrub_progress *progress); | 4028 | struct btrfs_scrub_progress *progress); |
4007 | 4029 | ||
4030 | /* dev-replace.c */ | ||
4031 | void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info); | ||
4032 | void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info); | ||
4033 | void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info); | ||
4034 | |||
4008 | /* reada.c */ | 4035 | /* reada.c */ |
4009 | struct reada_control { | 4036 | struct reada_control { |
4010 | struct btrfs_root *root; /* tree to prefetch */ | 4037 | struct btrfs_root *root; /* tree to prefetch */ |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 451b00c86f6c..33e561a84013 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -1392,11 +1392,11 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, | |||
1392 | return -ENOMEM; | 1392 | return -ENOMEM; |
1393 | 1393 | ||
1394 | async_work->delayed_root = delayed_root; | 1394 | async_work->delayed_root = delayed_root; |
1395 | async_work->work.func = btrfs_async_run_delayed_root; | 1395 | btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, |
1396 | async_work->work.flags = 0; | 1396 | NULL, NULL); |
1397 | async_work->nr = nr; | 1397 | async_work->nr = nr; |
1398 | 1398 | ||
1399 | btrfs_queue_worker(&root->fs_info->delayed_workers, &async_work->work); | 1399 | btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work); |
1400 | return 0; | 1400 | return 0; |
1401 | } | 1401 | } |
1402 | 1402 | ||
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index f3bff89eecf0..31299646024d 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -199,44 +199,31 @@ static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root, | |||
199 | */ | 199 | */ |
200 | static struct btrfs_delayed_ref_head * | 200 | static struct btrfs_delayed_ref_head * |
201 | find_ref_head(struct rb_root *root, u64 bytenr, | 201 | find_ref_head(struct rb_root *root, u64 bytenr, |
202 | struct btrfs_delayed_ref_head **last, int return_bigger) | 202 | int return_bigger) |
203 | { | 203 | { |
204 | struct rb_node *n; | 204 | struct rb_node *n; |
205 | struct btrfs_delayed_ref_head *entry; | 205 | struct btrfs_delayed_ref_head *entry; |
206 | int cmp = 0; | ||
207 | 206 | ||
208 | again: | ||
209 | n = root->rb_node; | 207 | n = root->rb_node; |
210 | entry = NULL; | 208 | entry = NULL; |
211 | while (n) { | 209 | while (n) { |
212 | entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); | 210 | entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); |
213 | if (last) | ||
214 | *last = entry; | ||
215 | 211 | ||
216 | if (bytenr < entry->node.bytenr) | 212 | if (bytenr < entry->node.bytenr) |
217 | cmp = -1; | ||
218 | else if (bytenr > entry->node.bytenr) | ||
219 | cmp = 1; | ||
220 | else | ||
221 | cmp = 0; | ||
222 | |||
223 | if (cmp < 0) | ||
224 | n = n->rb_left; | 213 | n = n->rb_left; |
225 | else if (cmp > 0) | 214 | else if (bytenr > entry->node.bytenr) |
226 | n = n->rb_right; | 215 | n = n->rb_right; |
227 | else | 216 | else |
228 | return entry; | 217 | return entry; |
229 | } | 218 | } |
230 | if (entry && return_bigger) { | 219 | if (entry && return_bigger) { |
231 | if (cmp > 0) { | 220 | if (bytenr > entry->node.bytenr) { |
232 | n = rb_next(&entry->href_node); | 221 | n = rb_next(&entry->href_node); |
233 | if (!n) | 222 | if (!n) |
234 | n = rb_first(root); | 223 | n = rb_first(root); |
235 | entry = rb_entry(n, struct btrfs_delayed_ref_head, | 224 | entry = rb_entry(n, struct btrfs_delayed_ref_head, |
236 | href_node); | 225 | href_node); |
237 | bytenr = entry->node.bytenr; | 226 | return entry; |
238 | return_bigger = 0; | ||
239 | goto again; | ||
240 | } | 227 | } |
241 | return entry; | 228 | return entry; |
242 | } | 229 | } |
@@ -415,12 +402,12 @@ btrfs_select_ref_head(struct btrfs_trans_handle *trans) | |||
415 | 402 | ||
416 | again: | 403 | again: |
417 | start = delayed_refs->run_delayed_start; | 404 | start = delayed_refs->run_delayed_start; |
418 | head = find_ref_head(&delayed_refs->href_root, start, NULL, 1); | 405 | head = find_ref_head(&delayed_refs->href_root, start, 1); |
419 | if (!head && !loop) { | 406 | if (!head && !loop) { |
420 | delayed_refs->run_delayed_start = 0; | 407 | delayed_refs->run_delayed_start = 0; |
421 | start = 0; | 408 | start = 0; |
422 | loop = true; | 409 | loop = true; |
423 | head = find_ref_head(&delayed_refs->href_root, start, NULL, 1); | 410 | head = find_ref_head(&delayed_refs->href_root, start, 1); |
424 | if (!head) | 411 | if (!head) |
425 | return NULL; | 412 | return NULL; |
426 | } else if (!head && loop) { | 413 | } else if (!head && loop) { |
@@ -508,6 +495,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, | |||
508 | ref = btrfs_delayed_node_to_head(update); | 495 | ref = btrfs_delayed_node_to_head(update); |
509 | BUG_ON(existing_ref->is_data != ref->is_data); | 496 | BUG_ON(existing_ref->is_data != ref->is_data); |
510 | 497 | ||
498 | spin_lock(&existing_ref->lock); | ||
511 | if (ref->must_insert_reserved) { | 499 | if (ref->must_insert_reserved) { |
512 | /* if the extent was freed and then | 500 | /* if the extent was freed and then |
513 | * reallocated before the delayed ref | 501 | * reallocated before the delayed ref |
@@ -549,7 +537,6 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, | |||
549 | * only need the lock for this case cause we could be processing it | 537 | * only need the lock for this case cause we could be processing it |
550 | * currently, for refs we just added we know we're a-ok. | 538 | * currently, for refs we just added we know we're a-ok. |
551 | */ | 539 | */ |
552 | spin_lock(&existing_ref->lock); | ||
553 | existing->ref_mod += update->ref_mod; | 540 | existing->ref_mod += update->ref_mod; |
554 | spin_unlock(&existing_ref->lock); | 541 | spin_unlock(&existing_ref->lock); |
555 | } | 542 | } |
@@ -898,7 +885,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) | |||
898 | struct btrfs_delayed_ref_root *delayed_refs; | 885 | struct btrfs_delayed_ref_root *delayed_refs; |
899 | 886 | ||
900 | delayed_refs = &trans->transaction->delayed_refs; | 887 | delayed_refs = &trans->transaction->delayed_refs; |
901 | return find_ref_head(&delayed_refs->href_root, bytenr, NULL, 0); | 888 | return find_ref_head(&delayed_refs->href_root, bytenr, 0); |
902 | } | 889 | } |
903 | 890 | ||
904 | void btrfs_delayed_ref_exit(void) | 891 | void btrfs_delayed_ref_exit(void) |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 564c92638b20..9f2290509aca 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
@@ -431,6 +431,35 @@ leave_no_lock: | |||
431 | return ret; | 431 | return ret; |
432 | } | 432 | } |
433 | 433 | ||
434 | /* | ||
435 | * blocked until all flighting bios are finished. | ||
436 | */ | ||
437 | static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info) | ||
438 | { | ||
439 | s64 writers; | ||
440 | DEFINE_WAIT(wait); | ||
441 | |||
442 | set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state); | ||
443 | do { | ||
444 | prepare_to_wait(&fs_info->replace_wait, &wait, | ||
445 | TASK_UNINTERRUPTIBLE); | ||
446 | writers = percpu_counter_sum(&fs_info->bio_counter); | ||
447 | if (writers) | ||
448 | schedule(); | ||
449 | finish_wait(&fs_info->replace_wait, &wait); | ||
450 | } while (writers); | ||
451 | } | ||
452 | |||
453 | /* | ||
454 | * we have removed target device, it is safe to allow new bios request. | ||
455 | */ | ||
456 | static void btrfs_rm_dev_replace_unblocked(struct btrfs_fs_info *fs_info) | ||
457 | { | ||
458 | clear_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state); | ||
459 | if (waitqueue_active(&fs_info->replace_wait)) | ||
460 | wake_up(&fs_info->replace_wait); | ||
461 | } | ||
462 | |||
434 | static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | 463 | static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, |
435 | int scrub_ret) | 464 | int scrub_ret) |
436 | { | 465 | { |
@@ -458,17 +487,11 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
458 | src_device = dev_replace->srcdev; | 487 | src_device = dev_replace->srcdev; |
459 | btrfs_dev_replace_unlock(dev_replace); | 488 | btrfs_dev_replace_unlock(dev_replace); |
460 | 489 | ||
461 | /* replace old device with new one in mapping tree */ | ||
462 | if (!scrub_ret) | ||
463 | btrfs_dev_replace_update_device_in_mapping_tree(fs_info, | ||
464 | src_device, | ||
465 | tgt_device); | ||
466 | |||
467 | /* | 490 | /* |
468 | * flush all outstanding I/O and inode extent mappings before the | 491 | * flush all outstanding I/O and inode extent mappings before the |
469 | * copy operation is declared as being finished | 492 | * copy operation is declared as being finished |
470 | */ | 493 | */ |
471 | ret = btrfs_start_delalloc_roots(root->fs_info, 0); | 494 | ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1); |
472 | if (ret) { | 495 | if (ret) { |
473 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | 496 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); |
474 | return ret; | 497 | return ret; |
@@ -484,6 +507,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
484 | WARN_ON(ret); | 507 | WARN_ON(ret); |
485 | 508 | ||
486 | /* keep away write_all_supers() during the finishing procedure */ | 509 | /* keep away write_all_supers() during the finishing procedure */ |
510 | mutex_lock(&root->fs_info->chunk_mutex); | ||
487 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 511 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
488 | btrfs_dev_replace_lock(dev_replace); | 512 | btrfs_dev_replace_lock(dev_replace); |
489 | dev_replace->replace_state = | 513 | dev_replace->replace_state = |
@@ -494,7 +518,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
494 | dev_replace->time_stopped = get_seconds(); | 518 | dev_replace->time_stopped = get_seconds(); |
495 | dev_replace->item_needs_writeback = 1; | 519 | dev_replace->item_needs_writeback = 1; |
496 | 520 | ||
497 | if (scrub_ret) { | 521 | /* replace old device with new one in mapping tree */ |
522 | if (!scrub_ret) { | ||
523 | btrfs_dev_replace_update_device_in_mapping_tree(fs_info, | ||
524 | src_device, | ||
525 | tgt_device); | ||
526 | } else { | ||
498 | printk_in_rcu(KERN_ERR | 527 | printk_in_rcu(KERN_ERR |
499 | "BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n", | 528 | "BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n", |
500 | src_device->missing ? "<missing disk>" : | 529 | src_device->missing ? "<missing disk>" : |
@@ -503,6 +532,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
503 | rcu_str_deref(tgt_device->name), scrub_ret); | 532 | rcu_str_deref(tgt_device->name), scrub_ret); |
504 | btrfs_dev_replace_unlock(dev_replace); | 533 | btrfs_dev_replace_unlock(dev_replace); |
505 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 534 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
535 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
506 | if (tgt_device) | 536 | if (tgt_device) |
507 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); | 537 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); |
508 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | 538 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); |
@@ -532,8 +562,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
532 | fs_info->fs_devices->latest_bdev = tgt_device->bdev; | 562 | fs_info->fs_devices->latest_bdev = tgt_device->bdev; |
533 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); | 563 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); |
534 | 564 | ||
565 | btrfs_rm_dev_replace_blocked(fs_info); | ||
566 | |||
535 | btrfs_rm_dev_replace_srcdev(fs_info, src_device); | 567 | btrfs_rm_dev_replace_srcdev(fs_info, src_device); |
536 | 568 | ||
569 | btrfs_rm_dev_replace_unblocked(fs_info); | ||
570 | |||
537 | /* | 571 | /* |
538 | * this is again a consistent state where no dev_replace procedure | 572 | * this is again a consistent state where no dev_replace procedure |
539 | * is running, the target device is part of the filesystem, the | 573 | * is running, the target device is part of the filesystem, the |
@@ -543,6 +577,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
543 | */ | 577 | */ |
544 | btrfs_dev_replace_unlock(dev_replace); | 578 | btrfs_dev_replace_unlock(dev_replace); |
545 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 579 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
580 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
546 | 581 | ||
547 | /* write back the superblocks */ | 582 | /* write back the superblocks */ |
548 | trans = btrfs_start_transaction(root, 0); | 583 | trans = btrfs_start_transaction(root, 0); |
@@ -862,3 +897,31 @@ void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace) | |||
862 | mutex_unlock(&dev_replace->lock_management_lock); | 897 | mutex_unlock(&dev_replace->lock_management_lock); |
863 | } | 898 | } |
864 | } | 899 | } |
900 | |||
901 | void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info) | ||
902 | { | ||
903 | percpu_counter_inc(&fs_info->bio_counter); | ||
904 | } | ||
905 | |||
906 | void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info) | ||
907 | { | ||
908 | percpu_counter_dec(&fs_info->bio_counter); | ||
909 | |||
910 | if (waitqueue_active(&fs_info->replace_wait)) | ||
911 | wake_up(&fs_info->replace_wait); | ||
912 | } | ||
913 | |||
914 | void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info) | ||
915 | { | ||
916 | DEFINE_WAIT(wait); | ||
917 | again: | ||
918 | percpu_counter_inc(&fs_info->bio_counter); | ||
919 | if (test_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state)) { | ||
920 | btrfs_bio_counter_dec(fs_info); | ||
921 | wait_event(fs_info->replace_wait, | ||
922 | !test_bit(BTRFS_FS_STATE_DEV_REPLACING, | ||
923 | &fs_info->fs_state)); | ||
924 | goto again; | ||
925 | } | ||
926 | |||
927 | } | ||
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 81ea55314b1f..bd0f752b797b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -678,32 +678,31 @@ static void end_workqueue_bio(struct bio *bio, int err) | |||
678 | 678 | ||
679 | fs_info = end_io_wq->info; | 679 | fs_info = end_io_wq->info; |
680 | end_io_wq->error = err; | 680 | end_io_wq->error = err; |
681 | end_io_wq->work.func = end_workqueue_fn; | 681 | btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL); |
682 | end_io_wq->work.flags = 0; | ||
683 | 682 | ||
684 | if (bio->bi_rw & REQ_WRITE) { | 683 | if (bio->bi_rw & REQ_WRITE) { |
685 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) | 684 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) |
686 | btrfs_queue_worker(&fs_info->endio_meta_write_workers, | 685 | btrfs_queue_work(fs_info->endio_meta_write_workers, |
687 | &end_io_wq->work); | 686 | &end_io_wq->work); |
688 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) | 687 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) |
689 | btrfs_queue_worker(&fs_info->endio_freespace_worker, | 688 | btrfs_queue_work(fs_info->endio_freespace_worker, |
690 | &end_io_wq->work); | 689 | &end_io_wq->work); |
691 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) | 690 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) |
692 | btrfs_queue_worker(&fs_info->endio_raid56_workers, | 691 | btrfs_queue_work(fs_info->endio_raid56_workers, |
693 | &end_io_wq->work); | 692 | &end_io_wq->work); |
694 | else | 693 | else |
695 | btrfs_queue_worker(&fs_info->endio_write_workers, | 694 | btrfs_queue_work(fs_info->endio_write_workers, |
696 | &end_io_wq->work); | 695 | &end_io_wq->work); |
697 | } else { | 696 | } else { |
698 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) | 697 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) |
699 | btrfs_queue_worker(&fs_info->endio_raid56_workers, | 698 | btrfs_queue_work(fs_info->endio_raid56_workers, |
700 | &end_io_wq->work); | 699 | &end_io_wq->work); |
701 | else if (end_io_wq->metadata) | 700 | else if (end_io_wq->metadata) |
702 | btrfs_queue_worker(&fs_info->endio_meta_workers, | 701 | btrfs_queue_work(fs_info->endio_meta_workers, |
703 | &end_io_wq->work); | 702 | &end_io_wq->work); |
704 | else | 703 | else |
705 | btrfs_queue_worker(&fs_info->endio_workers, | 704 | btrfs_queue_work(fs_info->endio_workers, |
706 | &end_io_wq->work); | 705 | &end_io_wq->work); |
707 | } | 706 | } |
708 | } | 707 | } |
709 | 708 | ||
@@ -738,7 +737,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | |||
738 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) | 737 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) |
739 | { | 738 | { |
740 | unsigned long limit = min_t(unsigned long, | 739 | unsigned long limit = min_t(unsigned long, |
741 | info->workers.max_workers, | 740 | info->thread_pool_size, |
742 | info->fs_devices->open_devices); | 741 | info->fs_devices->open_devices); |
743 | return 256 * limit; | 742 | return 256 * limit; |
744 | } | 743 | } |
@@ -811,11 +810,9 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
811 | async->submit_bio_start = submit_bio_start; | 810 | async->submit_bio_start = submit_bio_start; |
812 | async->submit_bio_done = submit_bio_done; | 811 | async->submit_bio_done = submit_bio_done; |
813 | 812 | ||
814 | async->work.func = run_one_async_start; | 813 | btrfs_init_work(&async->work, run_one_async_start, |
815 | async->work.ordered_func = run_one_async_done; | 814 | run_one_async_done, run_one_async_free); |
816 | async->work.ordered_free = run_one_async_free; | ||
817 | 815 | ||
818 | async->work.flags = 0; | ||
819 | async->bio_flags = bio_flags; | 816 | async->bio_flags = bio_flags; |
820 | async->bio_offset = bio_offset; | 817 | async->bio_offset = bio_offset; |
821 | 818 | ||
@@ -824,9 +821,9 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
824 | atomic_inc(&fs_info->nr_async_submits); | 821 | atomic_inc(&fs_info->nr_async_submits); |
825 | 822 | ||
826 | if (rw & REQ_SYNC) | 823 | if (rw & REQ_SYNC) |
827 | btrfs_set_work_high_prio(&async->work); | 824 | btrfs_set_work_high_priority(&async->work); |
828 | 825 | ||
829 | btrfs_queue_worker(&fs_info->workers, &async->work); | 826 | btrfs_queue_work(fs_info->workers, &async->work); |
830 | 827 | ||
831 | while (atomic_read(&fs_info->async_submit_draining) && | 828 | while (atomic_read(&fs_info->async_submit_draining) && |
832 | atomic_read(&fs_info->nr_async_submits)) { | 829 | atomic_read(&fs_info->nr_async_submits)) { |
@@ -1149,6 +1146,32 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
1149 | } | 1146 | } |
1150 | } | 1147 | } |
1151 | 1148 | ||
1149 | static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void) | ||
1150 | { | ||
1151 | struct btrfs_subvolume_writers *writers; | ||
1152 | int ret; | ||
1153 | |||
1154 | writers = kmalloc(sizeof(*writers), GFP_NOFS); | ||
1155 | if (!writers) | ||
1156 | return ERR_PTR(-ENOMEM); | ||
1157 | |||
1158 | ret = percpu_counter_init(&writers->counter, 0); | ||
1159 | if (ret < 0) { | ||
1160 | kfree(writers); | ||
1161 | return ERR_PTR(ret); | ||
1162 | } | ||
1163 | |||
1164 | init_waitqueue_head(&writers->wait); | ||
1165 | return writers; | ||
1166 | } | ||
1167 | |||
1168 | static void | ||
1169 | btrfs_free_subvolume_writers(struct btrfs_subvolume_writers *writers) | ||
1170 | { | ||
1171 | percpu_counter_destroy(&writers->counter); | ||
1172 | kfree(writers); | ||
1173 | } | ||
1174 | |||
1152 | static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | 1175 | static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, |
1153 | u32 stripesize, struct btrfs_root *root, | 1176 | u32 stripesize, struct btrfs_root *root, |
1154 | struct btrfs_fs_info *fs_info, | 1177 | struct btrfs_fs_info *fs_info, |
@@ -1194,16 +1217,22 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1194 | spin_lock_init(&root->log_extents_lock[1]); | 1217 | spin_lock_init(&root->log_extents_lock[1]); |
1195 | mutex_init(&root->objectid_mutex); | 1218 | mutex_init(&root->objectid_mutex); |
1196 | mutex_init(&root->log_mutex); | 1219 | mutex_init(&root->log_mutex); |
1220 | mutex_init(&root->ordered_extent_mutex); | ||
1221 | mutex_init(&root->delalloc_mutex); | ||
1197 | init_waitqueue_head(&root->log_writer_wait); | 1222 | init_waitqueue_head(&root->log_writer_wait); |
1198 | init_waitqueue_head(&root->log_commit_wait[0]); | 1223 | init_waitqueue_head(&root->log_commit_wait[0]); |
1199 | init_waitqueue_head(&root->log_commit_wait[1]); | 1224 | init_waitqueue_head(&root->log_commit_wait[1]); |
1225 | INIT_LIST_HEAD(&root->log_ctxs[0]); | ||
1226 | INIT_LIST_HEAD(&root->log_ctxs[1]); | ||
1200 | atomic_set(&root->log_commit[0], 0); | 1227 | atomic_set(&root->log_commit[0], 0); |
1201 | atomic_set(&root->log_commit[1], 0); | 1228 | atomic_set(&root->log_commit[1], 0); |
1202 | atomic_set(&root->log_writers, 0); | 1229 | atomic_set(&root->log_writers, 0); |
1203 | atomic_set(&root->log_batch, 0); | 1230 | atomic_set(&root->log_batch, 0); |
1204 | atomic_set(&root->orphan_inodes, 0); | 1231 | atomic_set(&root->orphan_inodes, 0); |
1205 | atomic_set(&root->refs, 1); | 1232 | atomic_set(&root->refs, 1); |
1233 | atomic_set(&root->will_be_snapshoted, 0); | ||
1206 | root->log_transid = 0; | 1234 | root->log_transid = 0; |
1235 | root->log_transid_committed = -1; | ||
1207 | root->last_log_commit = 0; | 1236 | root->last_log_commit = 0; |
1208 | if (fs_info) | 1237 | if (fs_info) |
1209 | extent_io_tree_init(&root->dirty_log_pages, | 1238 | extent_io_tree_init(&root->dirty_log_pages, |
@@ -1417,6 +1446,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | |||
1417 | WARN_ON(root->log_root); | 1446 | WARN_ON(root->log_root); |
1418 | root->log_root = log_root; | 1447 | root->log_root = log_root; |
1419 | root->log_transid = 0; | 1448 | root->log_transid = 0; |
1449 | root->log_transid_committed = -1; | ||
1420 | root->last_log_commit = 0; | 1450 | root->last_log_commit = 0; |
1421 | return 0; | 1451 | return 0; |
1422 | } | 1452 | } |
@@ -1498,6 +1528,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, | |||
1498 | int btrfs_init_fs_root(struct btrfs_root *root) | 1528 | int btrfs_init_fs_root(struct btrfs_root *root) |
1499 | { | 1529 | { |
1500 | int ret; | 1530 | int ret; |
1531 | struct btrfs_subvolume_writers *writers; | ||
1501 | 1532 | ||
1502 | root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); | 1533 | root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); |
1503 | root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), | 1534 | root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), |
@@ -1507,6 +1538,13 @@ int btrfs_init_fs_root(struct btrfs_root *root) | |||
1507 | goto fail; | 1538 | goto fail; |
1508 | } | 1539 | } |
1509 | 1540 | ||
1541 | writers = btrfs_alloc_subvolume_writers(); | ||
1542 | if (IS_ERR(writers)) { | ||
1543 | ret = PTR_ERR(writers); | ||
1544 | goto fail; | ||
1545 | } | ||
1546 | root->subv_writers = writers; | ||
1547 | |||
1510 | btrfs_init_free_ino_ctl(root); | 1548 | btrfs_init_free_ino_ctl(root); |
1511 | mutex_init(&root->fs_commit_mutex); | 1549 | mutex_init(&root->fs_commit_mutex); |
1512 | spin_lock_init(&root->cache_lock); | 1550 | spin_lock_init(&root->cache_lock); |
@@ -1514,8 +1552,11 @@ int btrfs_init_fs_root(struct btrfs_root *root) | |||
1514 | 1552 | ||
1515 | ret = get_anon_bdev(&root->anon_dev); | 1553 | ret = get_anon_bdev(&root->anon_dev); |
1516 | if (ret) | 1554 | if (ret) |
1517 | goto fail; | 1555 | goto free_writers; |
1518 | return 0; | 1556 | return 0; |
1557 | |||
1558 | free_writers: | ||
1559 | btrfs_free_subvolume_writers(root->subv_writers); | ||
1519 | fail: | 1560 | fail: |
1520 | kfree(root->free_ino_ctl); | 1561 | kfree(root->free_ino_ctl); |
1521 | kfree(root->free_ino_pinned); | 1562 | kfree(root->free_ino_pinned); |
@@ -1990,23 +2031,22 @@ static noinline int next_root_backup(struct btrfs_fs_info *info, | |||
1990 | /* helper to cleanup workers */ | 2031 | /* helper to cleanup workers */ |
1991 | static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) | 2032 | static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) |
1992 | { | 2033 | { |
1993 | btrfs_stop_workers(&fs_info->generic_worker); | 2034 | btrfs_destroy_workqueue(fs_info->fixup_workers); |
1994 | btrfs_stop_workers(&fs_info->fixup_workers); | 2035 | btrfs_destroy_workqueue(fs_info->delalloc_workers); |
1995 | btrfs_stop_workers(&fs_info->delalloc_workers); | 2036 | btrfs_destroy_workqueue(fs_info->workers); |
1996 | btrfs_stop_workers(&fs_info->workers); | 2037 | btrfs_destroy_workqueue(fs_info->endio_workers); |
1997 | btrfs_stop_workers(&fs_info->endio_workers); | 2038 | btrfs_destroy_workqueue(fs_info->endio_meta_workers); |
1998 | btrfs_stop_workers(&fs_info->endio_meta_workers); | 2039 | btrfs_destroy_workqueue(fs_info->endio_raid56_workers); |
1999 | btrfs_stop_workers(&fs_info->endio_raid56_workers); | 2040 | btrfs_destroy_workqueue(fs_info->rmw_workers); |
2000 | btrfs_stop_workers(&fs_info->rmw_workers); | 2041 | btrfs_destroy_workqueue(fs_info->endio_meta_write_workers); |
2001 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2042 | btrfs_destroy_workqueue(fs_info->endio_write_workers); |
2002 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2043 | btrfs_destroy_workqueue(fs_info->endio_freespace_worker); |
2003 | btrfs_stop_workers(&fs_info->endio_freespace_worker); | 2044 | btrfs_destroy_workqueue(fs_info->submit_workers); |
2004 | btrfs_stop_workers(&fs_info->submit_workers); | 2045 | btrfs_destroy_workqueue(fs_info->delayed_workers); |
2005 | btrfs_stop_workers(&fs_info->delayed_workers); | 2046 | btrfs_destroy_workqueue(fs_info->caching_workers); |
2006 | btrfs_stop_workers(&fs_info->caching_workers); | 2047 | btrfs_destroy_workqueue(fs_info->readahead_workers); |
2007 | btrfs_stop_workers(&fs_info->readahead_workers); | 2048 | btrfs_destroy_workqueue(fs_info->flush_workers); |
2008 | btrfs_stop_workers(&fs_info->flush_workers); | 2049 | btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers); |
2009 | btrfs_stop_workers(&fs_info->qgroup_rescan_workers); | ||
2010 | } | 2050 | } |
2011 | 2051 | ||
2012 | static void free_root_extent_buffers(struct btrfs_root *root) | 2052 | static void free_root_extent_buffers(struct btrfs_root *root) |
@@ -2097,6 +2137,8 @@ int open_ctree(struct super_block *sb, | |||
2097 | int err = -EINVAL; | 2137 | int err = -EINVAL; |
2098 | int num_backups_tried = 0; | 2138 | int num_backups_tried = 0; |
2099 | int backup_index = 0; | 2139 | int backup_index = 0; |
2140 | int max_active; | ||
2141 | int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND; | ||
2100 | bool create_uuid_tree; | 2142 | bool create_uuid_tree; |
2101 | bool check_uuid_tree; | 2143 | bool check_uuid_tree; |
2102 | 2144 | ||
@@ -2133,10 +2175,16 @@ int open_ctree(struct super_block *sb, | |||
2133 | goto fail_dirty_metadata_bytes; | 2175 | goto fail_dirty_metadata_bytes; |
2134 | } | 2176 | } |
2135 | 2177 | ||
2178 | ret = percpu_counter_init(&fs_info->bio_counter, 0); | ||
2179 | if (ret) { | ||
2180 | err = ret; | ||
2181 | goto fail_delalloc_bytes; | ||
2182 | } | ||
2183 | |||
2136 | fs_info->btree_inode = new_inode(sb); | 2184 | fs_info->btree_inode = new_inode(sb); |
2137 | if (!fs_info->btree_inode) { | 2185 | if (!fs_info->btree_inode) { |
2138 | err = -ENOMEM; | 2186 | err = -ENOMEM; |
2139 | goto fail_delalloc_bytes; | 2187 | goto fail_bio_counter; |
2140 | } | 2188 | } |
2141 | 2189 | ||
2142 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); | 2190 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); |
@@ -2159,6 +2207,7 @@ int open_ctree(struct super_block *sb, | |||
2159 | spin_lock_init(&fs_info->buffer_lock); | 2207 | spin_lock_init(&fs_info->buffer_lock); |
2160 | rwlock_init(&fs_info->tree_mod_log_lock); | 2208 | rwlock_init(&fs_info->tree_mod_log_lock); |
2161 | mutex_init(&fs_info->reloc_mutex); | 2209 | mutex_init(&fs_info->reloc_mutex); |
2210 | mutex_init(&fs_info->delalloc_root_mutex); | ||
2162 | seqlock_init(&fs_info->profiles_lock); | 2211 | seqlock_init(&fs_info->profiles_lock); |
2163 | 2212 | ||
2164 | init_completion(&fs_info->kobj_unregister); | 2213 | init_completion(&fs_info->kobj_unregister); |
@@ -2211,6 +2260,7 @@ int open_ctree(struct super_block *sb, | |||
2211 | atomic_set(&fs_info->scrub_pause_req, 0); | 2260 | atomic_set(&fs_info->scrub_pause_req, 0); |
2212 | atomic_set(&fs_info->scrubs_paused, 0); | 2261 | atomic_set(&fs_info->scrubs_paused, 0); |
2213 | atomic_set(&fs_info->scrub_cancel_req, 0); | 2262 | atomic_set(&fs_info->scrub_cancel_req, 0); |
2263 | init_waitqueue_head(&fs_info->replace_wait); | ||
2214 | init_waitqueue_head(&fs_info->scrub_pause_wait); | 2264 | init_waitqueue_head(&fs_info->scrub_pause_wait); |
2215 | fs_info->scrub_workers_refcnt = 0; | 2265 | fs_info->scrub_workers_refcnt = 0; |
2216 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 2266 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
@@ -2458,104 +2508,68 @@ int open_ctree(struct super_block *sb, | |||
2458 | goto fail_alloc; | 2508 | goto fail_alloc; |
2459 | } | 2509 | } |
2460 | 2510 | ||
2461 | btrfs_init_workers(&fs_info->generic_worker, | 2511 | max_active = fs_info->thread_pool_size; |
2462 | "genwork", 1, NULL); | ||
2463 | |||
2464 | btrfs_init_workers(&fs_info->workers, "worker", | ||
2465 | fs_info->thread_pool_size, | ||
2466 | &fs_info->generic_worker); | ||
2467 | 2512 | ||
2468 | btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", | 2513 | fs_info->workers = |
2469 | fs_info->thread_pool_size, NULL); | 2514 | btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI, |
2515 | max_active, 16); | ||
2470 | 2516 | ||
2471 | btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc", | 2517 | fs_info->delalloc_workers = |
2472 | fs_info->thread_pool_size, NULL); | 2518 | btrfs_alloc_workqueue("delalloc", flags, max_active, 2); |
2473 | 2519 | ||
2474 | btrfs_init_workers(&fs_info->submit_workers, "submit", | 2520 | fs_info->flush_workers = |
2475 | min_t(u64, fs_devices->num_devices, | 2521 | btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0); |
2476 | fs_info->thread_pool_size), NULL); | ||
2477 | 2522 | ||
2478 | btrfs_init_workers(&fs_info->caching_workers, "cache", | 2523 | fs_info->caching_workers = |
2479 | fs_info->thread_pool_size, NULL); | 2524 | btrfs_alloc_workqueue("cache", flags, max_active, 0); |
2480 | 2525 | ||
2481 | /* a higher idle thresh on the submit workers makes it much more | 2526 | /* |
2527 | * a higher idle thresh on the submit workers makes it much more | ||
2482 | * likely that bios will be send down in a sane order to the | 2528 | * likely that bios will be send down in a sane order to the |
2483 | * devices | 2529 | * devices |
2484 | */ | 2530 | */ |
2485 | fs_info->submit_workers.idle_thresh = 64; | 2531 | fs_info->submit_workers = |
2486 | 2532 | btrfs_alloc_workqueue("submit", flags, | |
2487 | fs_info->workers.idle_thresh = 16; | 2533 | min_t(u64, fs_devices->num_devices, |
2488 | fs_info->workers.ordered = 1; | 2534 | max_active), 64); |
2489 | 2535 | ||
2490 | fs_info->delalloc_workers.idle_thresh = 2; | 2536 | fs_info->fixup_workers = |
2491 | fs_info->delalloc_workers.ordered = 1; | 2537 | btrfs_alloc_workqueue("fixup", flags, 1, 0); |
2492 | |||
2493 | btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1, | ||
2494 | &fs_info->generic_worker); | ||
2495 | btrfs_init_workers(&fs_info->endio_workers, "endio", | ||
2496 | fs_info->thread_pool_size, | ||
2497 | &fs_info->generic_worker); | ||
2498 | btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta", | ||
2499 | fs_info->thread_pool_size, | ||
2500 | &fs_info->generic_worker); | ||
2501 | btrfs_init_workers(&fs_info->endio_meta_write_workers, | ||
2502 | "endio-meta-write", fs_info->thread_pool_size, | ||
2503 | &fs_info->generic_worker); | ||
2504 | btrfs_init_workers(&fs_info->endio_raid56_workers, | ||
2505 | "endio-raid56", fs_info->thread_pool_size, | ||
2506 | &fs_info->generic_worker); | ||
2507 | btrfs_init_workers(&fs_info->rmw_workers, | ||
2508 | "rmw", fs_info->thread_pool_size, | ||
2509 | &fs_info->generic_worker); | ||
2510 | btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", | ||
2511 | fs_info->thread_pool_size, | ||
2512 | &fs_info->generic_worker); | ||
2513 | btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write", | ||
2514 | 1, &fs_info->generic_worker); | ||
2515 | btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta", | ||
2516 | fs_info->thread_pool_size, | ||
2517 | &fs_info->generic_worker); | ||
2518 | btrfs_init_workers(&fs_info->readahead_workers, "readahead", | ||
2519 | fs_info->thread_pool_size, | ||
2520 | &fs_info->generic_worker); | ||
2521 | btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1, | ||
2522 | &fs_info->generic_worker); | ||
2523 | 2538 | ||
2524 | /* | 2539 | /* |
2525 | * endios are largely parallel and should have a very | 2540 | * endios are largely parallel and should have a very |
2526 | * low idle thresh | 2541 | * low idle thresh |
2527 | */ | 2542 | */ |
2528 | fs_info->endio_workers.idle_thresh = 4; | 2543 | fs_info->endio_workers = |
2529 | fs_info->endio_meta_workers.idle_thresh = 4; | 2544 | btrfs_alloc_workqueue("endio", flags, max_active, 4); |
2530 | fs_info->endio_raid56_workers.idle_thresh = 4; | 2545 | fs_info->endio_meta_workers = |
2531 | fs_info->rmw_workers.idle_thresh = 2; | 2546 | btrfs_alloc_workqueue("endio-meta", flags, max_active, 4); |
2532 | 2547 | fs_info->endio_meta_write_workers = | |
2533 | fs_info->endio_write_workers.idle_thresh = 2; | 2548 | btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2); |
2534 | fs_info->endio_meta_write_workers.idle_thresh = 2; | 2549 | fs_info->endio_raid56_workers = |
2535 | fs_info->readahead_workers.idle_thresh = 2; | 2550 | btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4); |
2536 | 2551 | fs_info->rmw_workers = | |
2537 | /* | 2552 | btrfs_alloc_workqueue("rmw", flags, max_active, 2); |
2538 | * btrfs_start_workers can really only fail because of ENOMEM so just | 2553 | fs_info->endio_write_workers = |
2539 | * return -ENOMEM if any of these fail. | 2554 | btrfs_alloc_workqueue("endio-write", flags, max_active, 2); |
2540 | */ | 2555 | fs_info->endio_freespace_worker = |
2541 | ret = btrfs_start_workers(&fs_info->workers); | 2556 | btrfs_alloc_workqueue("freespace-write", flags, max_active, 0); |
2542 | ret |= btrfs_start_workers(&fs_info->generic_worker); | 2557 | fs_info->delayed_workers = |
2543 | ret |= btrfs_start_workers(&fs_info->submit_workers); | 2558 | btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0); |
2544 | ret |= btrfs_start_workers(&fs_info->delalloc_workers); | 2559 | fs_info->readahead_workers = |
2545 | ret |= btrfs_start_workers(&fs_info->fixup_workers); | 2560 | btrfs_alloc_workqueue("readahead", flags, max_active, 2); |
2546 | ret |= btrfs_start_workers(&fs_info->endio_workers); | 2561 | fs_info->qgroup_rescan_workers = |
2547 | ret |= btrfs_start_workers(&fs_info->endio_meta_workers); | 2562 | btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0); |
2548 | ret |= btrfs_start_workers(&fs_info->rmw_workers); | 2563 | |
2549 | ret |= btrfs_start_workers(&fs_info->endio_raid56_workers); | 2564 | if (!(fs_info->workers && fs_info->delalloc_workers && |
2550 | ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers); | 2565 | fs_info->submit_workers && fs_info->flush_workers && |
2551 | ret |= btrfs_start_workers(&fs_info->endio_write_workers); | 2566 | fs_info->endio_workers && fs_info->endio_meta_workers && |
2552 | ret |= btrfs_start_workers(&fs_info->endio_freespace_worker); | 2567 | fs_info->endio_meta_write_workers && |
2553 | ret |= btrfs_start_workers(&fs_info->delayed_workers); | 2568 | fs_info->endio_write_workers && fs_info->endio_raid56_workers && |
2554 | ret |= btrfs_start_workers(&fs_info->caching_workers); | 2569 | fs_info->endio_freespace_worker && fs_info->rmw_workers && |
2555 | ret |= btrfs_start_workers(&fs_info->readahead_workers); | 2570 | fs_info->caching_workers && fs_info->readahead_workers && |
2556 | ret |= btrfs_start_workers(&fs_info->flush_workers); | 2571 | fs_info->fixup_workers && fs_info->delayed_workers && |
2557 | ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers); | 2572 | fs_info->qgroup_rescan_workers)) { |
2558 | if (ret) { | ||
2559 | err = -ENOMEM; | 2573 | err = -ENOMEM; |
2560 | goto fail_sb_buffer; | 2574 | goto fail_sb_buffer; |
2561 | } | 2575 | } |
@@ -2963,6 +2977,8 @@ fail_iput: | |||
2963 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2977 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
2964 | 2978 | ||
2965 | iput(fs_info->btree_inode); | 2979 | iput(fs_info->btree_inode); |
2980 | fail_bio_counter: | ||
2981 | percpu_counter_destroy(&fs_info->bio_counter); | ||
2966 | fail_delalloc_bytes: | 2982 | fail_delalloc_bytes: |
2967 | percpu_counter_destroy(&fs_info->delalloc_bytes); | 2983 | percpu_counter_destroy(&fs_info->delalloc_bytes); |
2968 | fail_dirty_metadata_bytes: | 2984 | fail_dirty_metadata_bytes: |
@@ -3244,6 +3260,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
3244 | /* send down all the barriers */ | 3260 | /* send down all the barriers */ |
3245 | head = &info->fs_devices->devices; | 3261 | head = &info->fs_devices->devices; |
3246 | list_for_each_entry_rcu(dev, head, dev_list) { | 3262 | list_for_each_entry_rcu(dev, head, dev_list) { |
3263 | if (dev->missing) | ||
3264 | continue; | ||
3247 | if (!dev->bdev) { | 3265 | if (!dev->bdev) { |
3248 | errors_send++; | 3266 | errors_send++; |
3249 | continue; | 3267 | continue; |
@@ -3258,6 +3276,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
3258 | 3276 | ||
3259 | /* wait for all the barriers */ | 3277 | /* wait for all the barriers */ |
3260 | list_for_each_entry_rcu(dev, head, dev_list) { | 3278 | list_for_each_entry_rcu(dev, head, dev_list) { |
3279 | if (dev->missing) | ||
3280 | continue; | ||
3261 | if (!dev->bdev) { | 3281 | if (!dev->bdev) { |
3262 | errors_wait++; | 3282 | errors_wait++; |
3263 | continue; | 3283 | continue; |
@@ -3477,6 +3497,8 @@ static void free_fs_root(struct btrfs_root *root) | |||
3477 | root->orphan_block_rsv = NULL; | 3497 | root->orphan_block_rsv = NULL; |
3478 | if (root->anon_dev) | 3498 | if (root->anon_dev) |
3479 | free_anon_bdev(root->anon_dev); | 3499 | free_anon_bdev(root->anon_dev); |
3500 | if (root->subv_writers) | ||
3501 | btrfs_free_subvolume_writers(root->subv_writers); | ||
3480 | free_extent_buffer(root->node); | 3502 | free_extent_buffer(root->node); |
3481 | free_extent_buffer(root->commit_root); | 3503 | free_extent_buffer(root->commit_root); |
3482 | kfree(root->free_ino_ctl); | 3504 | kfree(root->free_ino_ctl); |
@@ -3610,6 +3632,7 @@ int close_ctree(struct btrfs_root *root) | |||
3610 | 3632 | ||
3611 | percpu_counter_destroy(&fs_info->dirty_metadata_bytes); | 3633 | percpu_counter_destroy(&fs_info->dirty_metadata_bytes); |
3612 | percpu_counter_destroy(&fs_info->delalloc_bytes); | 3634 | percpu_counter_destroy(&fs_info->delalloc_bytes); |
3635 | percpu_counter_destroy(&fs_info->bio_counter); | ||
3613 | bdi_destroy(&fs_info->bdi); | 3636 | bdi_destroy(&fs_info->bdi); |
3614 | cleanup_srcu_struct(&fs_info->subvol_srcu); | 3637 | cleanup_srcu_struct(&fs_info->subvol_srcu); |
3615 | 3638 | ||
@@ -3791,9 +3814,11 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) | |||
3791 | list_move_tail(&root->ordered_root, | 3814 | list_move_tail(&root->ordered_root, |
3792 | &fs_info->ordered_roots); | 3815 | &fs_info->ordered_roots); |
3793 | 3816 | ||
3817 | spin_unlock(&fs_info->ordered_root_lock); | ||
3794 | btrfs_destroy_ordered_extents(root); | 3818 | btrfs_destroy_ordered_extents(root); |
3795 | 3819 | ||
3796 | cond_resched_lock(&fs_info->ordered_root_lock); | 3820 | cond_resched(); |
3821 | spin_lock(&fs_info->ordered_root_lock); | ||
3797 | } | 3822 | } |
3798 | spin_unlock(&fs_info->ordered_root_lock); | 3823 | spin_unlock(&fs_info->ordered_root_lock); |
3799 | } | 3824 | } |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 32312e09f0f5..c6b6a6e3e735 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -549,7 +549,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
549 | caching_ctl->block_group = cache; | 549 | caching_ctl->block_group = cache; |
550 | caching_ctl->progress = cache->key.objectid; | 550 | caching_ctl->progress = cache->key.objectid; |
551 | atomic_set(&caching_ctl->count, 1); | 551 | atomic_set(&caching_ctl->count, 1); |
552 | caching_ctl->work.func = caching_thread; | 552 | btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); |
553 | 553 | ||
554 | spin_lock(&cache->lock); | 554 | spin_lock(&cache->lock); |
555 | /* | 555 | /* |
@@ -640,7 +640,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
640 | 640 | ||
641 | btrfs_get_block_group(cache); | 641 | btrfs_get_block_group(cache); |
642 | 642 | ||
643 | btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work); | 643 | btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work); |
644 | 644 | ||
645 | return ret; | 645 | return ret; |
646 | } | 646 | } |
@@ -3971,7 +3971,7 @@ static int can_overcommit(struct btrfs_root *root, | |||
3971 | } | 3971 | } |
3972 | 3972 | ||
3973 | static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | 3973 | static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, |
3974 | unsigned long nr_pages) | 3974 | unsigned long nr_pages, int nr_items) |
3975 | { | 3975 | { |
3976 | struct super_block *sb = root->fs_info->sb; | 3976 | struct super_block *sb = root->fs_info->sb; |
3977 | 3977 | ||
@@ -3986,9 +3986,9 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | |||
3986 | * the filesystem is readonly(all dirty pages are written to | 3986 | * the filesystem is readonly(all dirty pages are written to |
3987 | * the disk). | 3987 | * the disk). |
3988 | */ | 3988 | */ |
3989 | btrfs_start_delalloc_roots(root->fs_info, 0); | 3989 | btrfs_start_delalloc_roots(root->fs_info, 0, nr_items); |
3990 | if (!current->journal_info) | 3990 | if (!current->journal_info) |
3991 | btrfs_wait_ordered_roots(root->fs_info, -1); | 3991 | btrfs_wait_ordered_roots(root->fs_info, nr_items); |
3992 | } | 3992 | } |
3993 | } | 3993 | } |
3994 | 3994 | ||
@@ -4045,7 +4045,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
4045 | while (delalloc_bytes && loops < 3) { | 4045 | while (delalloc_bytes && loops < 3) { |
4046 | max_reclaim = min(delalloc_bytes, to_reclaim); | 4046 | max_reclaim = min(delalloc_bytes, to_reclaim); |
4047 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; | 4047 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; |
4048 | btrfs_writeback_inodes_sb_nr(root, nr_pages); | 4048 | btrfs_writeback_inodes_sb_nr(root, nr_pages, items); |
4049 | /* | 4049 | /* |
4050 | * We need to wait for the async pages to actually start before | 4050 | * We need to wait for the async pages to actually start before |
4051 | * we do anything. | 4051 | * we do anything. |
@@ -4112,13 +4112,9 @@ static int may_commit_transaction(struct btrfs_root *root, | |||
4112 | goto commit; | 4112 | goto commit; |
4113 | 4113 | ||
4114 | /* See if there is enough pinned space to make this reservation */ | 4114 | /* See if there is enough pinned space to make this reservation */ |
4115 | spin_lock(&space_info->lock); | ||
4116 | if (percpu_counter_compare(&space_info->total_bytes_pinned, | 4115 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
4117 | bytes) >= 0) { | 4116 | bytes) >= 0) |
4118 | spin_unlock(&space_info->lock); | ||
4119 | goto commit; | 4117 | goto commit; |
4120 | } | ||
4121 | spin_unlock(&space_info->lock); | ||
4122 | 4118 | ||
4123 | /* | 4119 | /* |
4124 | * See if there is some space in the delayed insertion reservation for | 4120 | * See if there is some space in the delayed insertion reservation for |
@@ -4127,16 +4123,13 @@ static int may_commit_transaction(struct btrfs_root *root, | |||
4127 | if (space_info != delayed_rsv->space_info) | 4123 | if (space_info != delayed_rsv->space_info) |
4128 | return -ENOSPC; | 4124 | return -ENOSPC; |
4129 | 4125 | ||
4130 | spin_lock(&space_info->lock); | ||
4131 | spin_lock(&delayed_rsv->lock); | 4126 | spin_lock(&delayed_rsv->lock); |
4132 | if (percpu_counter_compare(&space_info->total_bytes_pinned, | 4127 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
4133 | bytes - delayed_rsv->size) >= 0) { | 4128 | bytes - delayed_rsv->size) >= 0) { |
4134 | spin_unlock(&delayed_rsv->lock); | 4129 | spin_unlock(&delayed_rsv->lock); |
4135 | spin_unlock(&space_info->lock); | ||
4136 | return -ENOSPC; | 4130 | return -ENOSPC; |
4137 | } | 4131 | } |
4138 | spin_unlock(&delayed_rsv->lock); | 4132 | spin_unlock(&delayed_rsv->lock); |
4139 | spin_unlock(&space_info->lock); | ||
4140 | 4133 | ||
4141 | commit: | 4134 | commit: |
4142 | trans = btrfs_join_transaction(root); | 4135 | trans = btrfs_join_transaction(root); |
@@ -4181,7 +4174,7 @@ static int flush_space(struct btrfs_root *root, | |||
4181 | break; | 4174 | break; |
4182 | case FLUSH_DELALLOC: | 4175 | case FLUSH_DELALLOC: |
4183 | case FLUSH_DELALLOC_WAIT: | 4176 | case FLUSH_DELALLOC_WAIT: |
4184 | shrink_delalloc(root, num_bytes, orig_bytes, | 4177 | shrink_delalloc(root, num_bytes * 2, orig_bytes, |
4185 | state == FLUSH_DELALLOC_WAIT); | 4178 | state == FLUSH_DELALLOC_WAIT); |
4186 | break; | 4179 | break; |
4187 | case ALLOC_CHUNK: | 4180 | case ALLOC_CHUNK: |
@@ -8938,3 +8931,38 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | |||
8938 | range->len = trimmed; | 8931 | range->len = trimmed; |
8939 | return ret; | 8932 | return ret; |
8940 | } | 8933 | } |
8934 | |||
8935 | /* | ||
8936 | * btrfs_{start,end}_write() is similar to mnt_{want, drop}_write(), | ||
8937 | * they are used to prevent the some tasks writing data into the page cache | ||
8938 | * by nocow before the subvolume is snapshoted, but flush the data into | ||
8939 | * the disk after the snapshot creation. | ||
8940 | */ | ||
8941 | void btrfs_end_nocow_write(struct btrfs_root *root) | ||
8942 | { | ||
8943 | percpu_counter_dec(&root->subv_writers->counter); | ||
8944 | /* | ||
8945 | * Make sure counter is updated before we wake up | ||
8946 | * waiters. | ||
8947 | */ | ||
8948 | smp_mb(); | ||
8949 | if (waitqueue_active(&root->subv_writers->wait)) | ||
8950 | wake_up(&root->subv_writers->wait); | ||
8951 | } | ||
8952 | |||
8953 | int btrfs_start_nocow_write(struct btrfs_root *root) | ||
8954 | { | ||
8955 | if (unlikely(atomic_read(&root->will_be_snapshoted))) | ||
8956 | return 0; | ||
8957 | |||
8958 | percpu_counter_inc(&root->subv_writers->counter); | ||
8959 | /* | ||
8960 | * Make sure counter is updated before we check for snapshot creation. | ||
8961 | */ | ||
8962 | smp_mb(); | ||
8963 | if (unlikely(atomic_read(&root->will_be_snapshoted))) { | ||
8964 | btrfs_end_nocow_write(root); | ||
8965 | return 0; | ||
8966 | } | ||
8967 | return 1; | ||
8968 | } | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 85bbd01f1271..ae69a00387e7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -229,12 +229,14 @@ void free_extent_state(struct extent_state *state) | |||
229 | } | 229 | } |
230 | } | 230 | } |
231 | 231 | ||
232 | static struct rb_node *tree_insert(struct rb_root *root, u64 offset, | 232 | static struct rb_node *tree_insert(struct rb_root *root, |
233 | struct rb_node *search_start, | ||
234 | u64 offset, | ||
233 | struct rb_node *node, | 235 | struct rb_node *node, |
234 | struct rb_node ***p_in, | 236 | struct rb_node ***p_in, |
235 | struct rb_node **parent_in) | 237 | struct rb_node **parent_in) |
236 | { | 238 | { |
237 | struct rb_node **p = &root->rb_node; | 239 | struct rb_node **p; |
238 | struct rb_node *parent = NULL; | 240 | struct rb_node *parent = NULL; |
239 | struct tree_entry *entry; | 241 | struct tree_entry *entry; |
240 | 242 | ||
@@ -244,6 +246,7 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, | |||
244 | goto do_insert; | 246 | goto do_insert; |
245 | } | 247 | } |
246 | 248 | ||
249 | p = search_start ? &search_start : &root->rb_node; | ||
247 | while (*p) { | 250 | while (*p) { |
248 | parent = *p; | 251 | parent = *p; |
249 | entry = rb_entry(parent, struct tree_entry, rb_node); | 252 | entry = rb_entry(parent, struct tree_entry, rb_node); |
@@ -430,7 +433,7 @@ static int insert_state(struct extent_io_tree *tree, | |||
430 | 433 | ||
431 | set_state_bits(tree, state, bits); | 434 | set_state_bits(tree, state, bits); |
432 | 435 | ||
433 | node = tree_insert(&tree->state, end, &state->rb_node, p, parent); | 436 | node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent); |
434 | if (node) { | 437 | if (node) { |
435 | struct extent_state *found; | 438 | struct extent_state *found; |
436 | found = rb_entry(node, struct extent_state, rb_node); | 439 | found = rb_entry(node, struct extent_state, rb_node); |
@@ -477,8 +480,8 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
477 | prealloc->state = orig->state; | 480 | prealloc->state = orig->state; |
478 | orig->start = split; | 481 | orig->start = split; |
479 | 482 | ||
480 | node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node, | 483 | node = tree_insert(&tree->state, &orig->rb_node, prealloc->end, |
481 | NULL, NULL); | 484 | &prealloc->rb_node, NULL, NULL); |
482 | if (node) { | 485 | if (node) { |
483 | free_extent_state(prealloc); | 486 | free_extent_state(prealloc); |
484 | return -EEXIST; | 487 | return -EEXIST; |
@@ -2757,7 +2760,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset, | |||
2757 | 2760 | ||
2758 | if (em_cached && *em_cached) { | 2761 | if (em_cached && *em_cached) { |
2759 | em = *em_cached; | 2762 | em = *em_cached; |
2760 | if (em->in_tree && start >= em->start && | 2763 | if (extent_map_in_tree(em) && start >= em->start && |
2761 | start < extent_map_end(em)) { | 2764 | start < extent_map_end(em)) { |
2762 | atomic_inc(&em->refs); | 2765 | atomic_inc(&em->refs); |
2763 | return em; | 2766 | return em; |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 996ad56b57db..1874aee69c86 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -51,7 +51,7 @@ struct extent_map *alloc_extent_map(void) | |||
51 | em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS); | 51 | em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS); |
52 | if (!em) | 52 | if (!em) |
53 | return NULL; | 53 | return NULL; |
54 | em->in_tree = 0; | 54 | RB_CLEAR_NODE(&em->rb_node); |
55 | em->flags = 0; | 55 | em->flags = 0; |
56 | em->compress_type = BTRFS_COMPRESS_NONE; | 56 | em->compress_type = BTRFS_COMPRESS_NONE; |
57 | em->generation = 0; | 57 | em->generation = 0; |
@@ -73,7 +73,7 @@ void free_extent_map(struct extent_map *em) | |||
73 | return; | 73 | return; |
74 | WARN_ON(atomic_read(&em->refs) == 0); | 74 | WARN_ON(atomic_read(&em->refs) == 0); |
75 | if (atomic_dec_and_test(&em->refs)) { | 75 | if (atomic_dec_and_test(&em->refs)) { |
76 | WARN_ON(em->in_tree); | 76 | WARN_ON(extent_map_in_tree(em)); |
77 | WARN_ON(!list_empty(&em->list)); | 77 | WARN_ON(!list_empty(&em->list)); |
78 | kmem_cache_free(extent_map_cache, em); | 78 | kmem_cache_free(extent_map_cache, em); |
79 | } | 79 | } |
@@ -99,8 +99,6 @@ static int tree_insert(struct rb_root *root, struct extent_map *em) | |||
99 | parent = *p; | 99 | parent = *p; |
100 | entry = rb_entry(parent, struct extent_map, rb_node); | 100 | entry = rb_entry(parent, struct extent_map, rb_node); |
101 | 101 | ||
102 | WARN_ON(!entry->in_tree); | ||
103 | |||
104 | if (em->start < entry->start) | 102 | if (em->start < entry->start) |
105 | p = &(*p)->rb_left; | 103 | p = &(*p)->rb_left; |
106 | else if (em->start >= extent_map_end(entry)) | 104 | else if (em->start >= extent_map_end(entry)) |
@@ -128,7 +126,6 @@ static int tree_insert(struct rb_root *root, struct extent_map *em) | |||
128 | if (end > entry->start && em->start < extent_map_end(entry)) | 126 | if (end > entry->start && em->start < extent_map_end(entry)) |
129 | return -EEXIST; | 127 | return -EEXIST; |
130 | 128 | ||
131 | em->in_tree = 1; | ||
132 | rb_link_node(&em->rb_node, orig_parent, p); | 129 | rb_link_node(&em->rb_node, orig_parent, p); |
133 | rb_insert_color(&em->rb_node, root); | 130 | rb_insert_color(&em->rb_node, root); |
134 | return 0; | 131 | return 0; |
@@ -153,8 +150,6 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, | |||
153 | prev = n; | 150 | prev = n; |
154 | prev_entry = entry; | 151 | prev_entry = entry; |
155 | 152 | ||
156 | WARN_ON(!entry->in_tree); | ||
157 | |||
158 | if (offset < entry->start) | 153 | if (offset < entry->start) |
159 | n = n->rb_left; | 154 | n = n->rb_left; |
160 | else if (offset >= extent_map_end(entry)) | 155 | else if (offset >= extent_map_end(entry)) |
@@ -240,12 +235,12 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
240 | em->len += merge->len; | 235 | em->len += merge->len; |
241 | em->block_len += merge->block_len; | 236 | em->block_len += merge->block_len; |
242 | em->block_start = merge->block_start; | 237 | em->block_start = merge->block_start; |
243 | merge->in_tree = 0; | ||
244 | em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; | 238 | em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; |
245 | em->mod_start = merge->mod_start; | 239 | em->mod_start = merge->mod_start; |
246 | em->generation = max(em->generation, merge->generation); | 240 | em->generation = max(em->generation, merge->generation); |
247 | 241 | ||
248 | rb_erase(&merge->rb_node, &tree->map); | 242 | rb_erase(&merge->rb_node, &tree->map); |
243 | RB_CLEAR_NODE(&merge->rb_node); | ||
249 | free_extent_map(merge); | 244 | free_extent_map(merge); |
250 | } | 245 | } |
251 | } | 246 | } |
@@ -257,7 +252,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
257 | em->len += merge->len; | 252 | em->len += merge->len; |
258 | em->block_len += merge->block_len; | 253 | em->block_len += merge->block_len; |
259 | rb_erase(&merge->rb_node, &tree->map); | 254 | rb_erase(&merge->rb_node, &tree->map); |
260 | merge->in_tree = 0; | 255 | RB_CLEAR_NODE(&merge->rb_node); |
261 | em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; | 256 | em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; |
262 | em->generation = max(em->generation, merge->generation); | 257 | em->generation = max(em->generation, merge->generation); |
263 | free_extent_map(merge); | 258 | free_extent_map(merge); |
@@ -319,7 +314,21 @@ out: | |||
319 | void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em) | 314 | void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em) |
320 | { | 315 | { |
321 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); | 316 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); |
322 | if (em->in_tree) | 317 | if (extent_map_in_tree(em)) |
318 | try_merge_map(tree, em); | ||
319 | } | ||
320 | |||
321 | static inline void setup_extent_mapping(struct extent_map_tree *tree, | ||
322 | struct extent_map *em, | ||
323 | int modified) | ||
324 | { | ||
325 | atomic_inc(&em->refs); | ||
326 | em->mod_start = em->start; | ||
327 | em->mod_len = em->len; | ||
328 | |||
329 | if (modified) | ||
330 | list_move(&em->list, &tree->modified_extents); | ||
331 | else | ||
323 | try_merge_map(tree, em); | 332 | try_merge_map(tree, em); |
324 | } | 333 | } |
325 | 334 | ||
@@ -342,15 +351,7 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
342 | if (ret) | 351 | if (ret) |
343 | goto out; | 352 | goto out; |
344 | 353 | ||
345 | atomic_inc(&em->refs); | 354 | setup_extent_mapping(tree, em, modified); |
346 | |||
347 | em->mod_start = em->start; | ||
348 | em->mod_len = em->len; | ||
349 | |||
350 | if (modified) | ||
351 | list_move(&em->list, &tree->modified_extents); | ||
352 | else | ||
353 | try_merge_map(tree, em); | ||
354 | out: | 355 | out: |
355 | return ret; | 356 | return ret; |
356 | } | 357 | } |
@@ -434,6 +435,21 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) | |||
434 | rb_erase(&em->rb_node, &tree->map); | 435 | rb_erase(&em->rb_node, &tree->map); |
435 | if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) | 436 | if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) |
436 | list_del_init(&em->list); | 437 | list_del_init(&em->list); |
437 | em->in_tree = 0; | 438 | RB_CLEAR_NODE(&em->rb_node); |
438 | return ret; | 439 | return ret; |
439 | } | 440 | } |
441 | |||
442 | void replace_extent_mapping(struct extent_map_tree *tree, | ||
443 | struct extent_map *cur, | ||
444 | struct extent_map *new, | ||
445 | int modified) | ||
446 | { | ||
447 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags)); | ||
448 | ASSERT(extent_map_in_tree(cur)); | ||
449 | if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags)) | ||
450 | list_del_init(&cur->list); | ||
451 | rb_replace_node(&cur->rb_node, &new->rb_node, &tree->map); | ||
452 | RB_CLEAR_NODE(&cur->rb_node); | ||
453 | |||
454 | setup_extent_mapping(tree, new, modified); | ||
455 | } | ||
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 93fba716d7f8..e7fd8a56a140 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
@@ -33,7 +33,6 @@ struct extent_map { | |||
33 | unsigned long flags; | 33 | unsigned long flags; |
34 | struct block_device *bdev; | 34 | struct block_device *bdev; |
35 | atomic_t refs; | 35 | atomic_t refs; |
36 | unsigned int in_tree; | ||
37 | unsigned int compress_type; | 36 | unsigned int compress_type; |
38 | struct list_head list; | 37 | struct list_head list; |
39 | }; | 38 | }; |
@@ -44,6 +43,11 @@ struct extent_map_tree { | |||
44 | rwlock_t lock; | 43 | rwlock_t lock; |
45 | }; | 44 | }; |
46 | 45 | ||
46 | static inline int extent_map_in_tree(const struct extent_map *em) | ||
47 | { | ||
48 | return !RB_EMPTY_NODE(&em->rb_node); | ||
49 | } | ||
50 | |||
47 | static inline u64 extent_map_end(struct extent_map *em) | 51 | static inline u64 extent_map_end(struct extent_map *em) |
48 | { | 52 | { |
49 | if (em->start + em->len < em->start) | 53 | if (em->start + em->len < em->start) |
@@ -64,6 +68,10 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | |||
64 | int add_extent_mapping(struct extent_map_tree *tree, | 68 | int add_extent_mapping(struct extent_map_tree *tree, |
65 | struct extent_map *em, int modified); | 69 | struct extent_map *em, int modified); |
66 | int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); | 70 | int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); |
71 | void replace_extent_mapping(struct extent_map_tree *tree, | ||
72 | struct extent_map *cur, | ||
73 | struct extent_map *new, | ||
74 | int modified); | ||
67 | 75 | ||
68 | struct extent_map *alloc_extent_map(void); | 76 | struct extent_map *alloc_extent_map(void); |
69 | void free_extent_map(struct extent_map *em); | 77 | void free_extent_map(struct extent_map *em); |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0165b8672f09..e1ffb1e22898 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -591,7 +591,6 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
591 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | 591 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); |
592 | clear_bit(EXTENT_FLAG_LOGGING, &flags); | 592 | clear_bit(EXTENT_FLAG_LOGGING, &flags); |
593 | modified = !list_empty(&em->list); | 593 | modified = !list_empty(&em->list); |
594 | remove_extent_mapping(em_tree, em); | ||
595 | if (no_splits) | 594 | if (no_splits) |
596 | goto next; | 595 | goto next; |
597 | 596 | ||
@@ -622,8 +621,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
622 | split->bdev = em->bdev; | 621 | split->bdev = em->bdev; |
623 | split->flags = flags; | 622 | split->flags = flags; |
624 | split->compress_type = em->compress_type; | 623 | split->compress_type = em->compress_type; |
625 | ret = add_extent_mapping(em_tree, split, modified); | 624 | replace_extent_mapping(em_tree, em, split, modified); |
626 | BUG_ON(ret); /* Logic error */ | ||
627 | free_extent_map(split); | 625 | free_extent_map(split); |
628 | split = split2; | 626 | split = split2; |
629 | split2 = NULL; | 627 | split2 = NULL; |
@@ -661,12 +659,20 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
661 | split->orig_block_len = 0; | 659 | split->orig_block_len = 0; |
662 | } | 660 | } |
663 | 661 | ||
664 | ret = add_extent_mapping(em_tree, split, modified); | 662 | if (extent_map_in_tree(em)) { |
665 | BUG_ON(ret); /* Logic error */ | 663 | replace_extent_mapping(em_tree, em, split, |
664 | modified); | ||
665 | } else { | ||
666 | ret = add_extent_mapping(em_tree, split, | ||
667 | modified); | ||
668 | ASSERT(ret == 0); /* Logic error */ | ||
669 | } | ||
666 | free_extent_map(split); | 670 | free_extent_map(split); |
667 | split = NULL; | 671 | split = NULL; |
668 | } | 672 | } |
669 | next: | 673 | next: |
674 | if (extent_map_in_tree(em)) | ||
675 | remove_extent_mapping(em_tree, em); | ||
670 | write_unlock(&em_tree->lock); | 676 | write_unlock(&em_tree->lock); |
671 | 677 | ||
672 | /* once for us */ | 678 | /* once for us */ |
@@ -720,7 +726,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
720 | if (drop_cache) | 726 | if (drop_cache) |
721 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | 727 | btrfs_drop_extent_cache(inode, start, end - 1, 0); |
722 | 728 | ||
723 | if (start >= BTRFS_I(inode)->disk_i_size) | 729 | if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent) |
724 | modify_tree = 0; | 730 | modify_tree = 0; |
725 | 731 | ||
726 | while (1) { | 732 | while (1) { |
@@ -798,7 +804,10 @@ next_slot: | |||
798 | */ | 804 | */ |
799 | if (start > key.offset && end < extent_end) { | 805 | if (start > key.offset && end < extent_end) { |
800 | BUG_ON(del_nr > 0); | 806 | BUG_ON(del_nr > 0); |
801 | BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); | 807 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
808 | ret = -EINVAL; | ||
809 | break; | ||
810 | } | ||
802 | 811 | ||
803 | memcpy(&new_key, &key, sizeof(new_key)); | 812 | memcpy(&new_key, &key, sizeof(new_key)); |
804 | new_key.offset = start; | 813 | new_key.offset = start; |
@@ -841,7 +850,10 @@ next_slot: | |||
841 | * | -------- extent -------- | | 850 | * | -------- extent -------- | |
842 | */ | 851 | */ |
843 | if (start <= key.offset && end < extent_end) { | 852 | if (start <= key.offset && end < extent_end) { |
844 | BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); | 853 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
854 | ret = -EINVAL; | ||
855 | break; | ||
856 | } | ||
845 | 857 | ||
846 | memcpy(&new_key, &key, sizeof(new_key)); | 858 | memcpy(&new_key, &key, sizeof(new_key)); |
847 | new_key.offset = end; | 859 | new_key.offset = end; |
@@ -864,7 +876,10 @@ next_slot: | |||
864 | */ | 876 | */ |
865 | if (start > key.offset && end >= extent_end) { | 877 | if (start > key.offset && end >= extent_end) { |
866 | BUG_ON(del_nr > 0); | 878 | BUG_ON(del_nr > 0); |
867 | BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); | 879 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
880 | ret = -EINVAL; | ||
881 | break; | ||
882 | } | ||
868 | 883 | ||
869 | btrfs_set_file_extent_num_bytes(leaf, fi, | 884 | btrfs_set_file_extent_num_bytes(leaf, fi, |
870 | start - key.offset); | 885 | start - key.offset); |
@@ -938,34 +953,42 @@ next_slot: | |||
938 | * Set path->slots[0] to first slot, so that after the delete | 953 | * Set path->slots[0] to first slot, so that after the delete |
939 | * if items are move off from our leaf to its immediate left or | 954 | * if items are move off from our leaf to its immediate left or |
940 | * right neighbor leafs, we end up with a correct and adjusted | 955 | * right neighbor leafs, we end up with a correct and adjusted |
941 | * path->slots[0] for our insertion. | 956 | * path->slots[0] for our insertion (if replace_extent != 0). |
942 | */ | 957 | */ |
943 | path->slots[0] = del_slot; | 958 | path->slots[0] = del_slot; |
944 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); | 959 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); |
945 | if (ret) | 960 | if (ret) |
946 | btrfs_abort_transaction(trans, root, ret); | 961 | btrfs_abort_transaction(trans, root, ret); |
962 | } | ||
947 | 963 | ||
948 | leaf = path->nodes[0]; | 964 | leaf = path->nodes[0]; |
949 | /* | 965 | /* |
950 | * leaf eb has flag EXTENT_BUFFER_STALE if it was deleted (that | 966 | * If btrfs_del_items() was called, it might have deleted a leaf, in |
951 | * is, its contents got pushed to its neighbors), in which case | 967 | * which case it unlocked our path, so check path->locks[0] matches a |
952 | * it means path->locks[0] == 0 | 968 | * write lock. |
953 | */ | 969 | */ |
954 | if (!ret && replace_extent && leafs_visited == 1 && | 970 | if (!ret && replace_extent && leafs_visited == 1 && |
955 | path->locks[0] && | 971 | (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING || |
956 | btrfs_leaf_free_space(root, leaf) >= | 972 | path->locks[0] == BTRFS_WRITE_LOCK) && |
957 | sizeof(struct btrfs_item) + extent_item_size) { | 973 | btrfs_leaf_free_space(root, leaf) >= |
958 | 974 | sizeof(struct btrfs_item) + extent_item_size) { | |
959 | key.objectid = ino; | 975 | |
960 | key.type = BTRFS_EXTENT_DATA_KEY; | 976 | key.objectid = ino; |
961 | key.offset = start; | 977 | key.type = BTRFS_EXTENT_DATA_KEY; |
962 | setup_items_for_insert(root, path, &key, | 978 | key.offset = start; |
963 | &extent_item_size, | 979 | if (!del_nr && path->slots[0] < btrfs_header_nritems(leaf)) { |
964 | extent_item_size, | 980 | struct btrfs_key slot_key; |
965 | sizeof(struct btrfs_item) + | 981 | |
966 | extent_item_size, 1); | 982 | btrfs_item_key_to_cpu(leaf, &slot_key, path->slots[0]); |
967 | *key_inserted = 1; | 983 | if (btrfs_comp_cpu_keys(&key, &slot_key) > 0) |
984 | path->slots[0]++; | ||
968 | } | 985 | } |
986 | setup_items_for_insert(root, path, &key, | ||
987 | &extent_item_size, | ||
988 | extent_item_size, | ||
989 | sizeof(struct btrfs_item) + | ||
990 | extent_item_size, 1); | ||
991 | *key_inserted = 1; | ||
969 | } | 992 | } |
970 | 993 | ||
971 | if (!replace_extent || !(*key_inserted)) | 994 | if (!replace_extent || !(*key_inserted)) |
@@ -1346,11 +1369,11 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages, | |||
1346 | struct btrfs_ordered_extent *ordered; | 1369 | struct btrfs_ordered_extent *ordered; |
1347 | lock_extent_bits(&BTRFS_I(inode)->io_tree, | 1370 | lock_extent_bits(&BTRFS_I(inode)->io_tree, |
1348 | start_pos, last_pos, 0, cached_state); | 1371 | start_pos, last_pos, 0, cached_state); |
1349 | ordered = btrfs_lookup_first_ordered_extent(inode, last_pos); | 1372 | ordered = btrfs_lookup_ordered_range(inode, start_pos, |
1373 | last_pos - start_pos + 1); | ||
1350 | if (ordered && | 1374 | if (ordered && |
1351 | ordered->file_offset + ordered->len > start_pos && | 1375 | ordered->file_offset + ordered->len > start_pos && |
1352 | ordered->file_offset <= last_pos) { | 1376 | ordered->file_offset <= last_pos) { |
1353 | btrfs_put_ordered_extent(ordered); | ||
1354 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | 1377 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
1355 | start_pos, last_pos, | 1378 | start_pos, last_pos, |
1356 | cached_state, GFP_NOFS); | 1379 | cached_state, GFP_NOFS); |
@@ -1358,12 +1381,9 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages, | |||
1358 | unlock_page(pages[i]); | 1381 | unlock_page(pages[i]); |
1359 | page_cache_release(pages[i]); | 1382 | page_cache_release(pages[i]); |
1360 | } | 1383 | } |
1361 | ret = btrfs_wait_ordered_range(inode, start_pos, | 1384 | btrfs_start_ordered_extent(inode, ordered, 1); |
1362 | last_pos - start_pos + 1); | 1385 | btrfs_put_ordered_extent(ordered); |
1363 | if (ret) | 1386 | return -EAGAIN; |
1364 | return ret; | ||
1365 | else | ||
1366 | return -EAGAIN; | ||
1367 | } | 1387 | } |
1368 | if (ordered) | 1388 | if (ordered) |
1369 | btrfs_put_ordered_extent(ordered); | 1389 | btrfs_put_ordered_extent(ordered); |
@@ -1396,8 +1416,12 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos, | |||
1396 | u64 num_bytes; | 1416 | u64 num_bytes; |
1397 | int ret; | 1417 | int ret; |
1398 | 1418 | ||
1419 | ret = btrfs_start_nocow_write(root); | ||
1420 | if (!ret) | ||
1421 | return -ENOSPC; | ||
1422 | |||
1399 | lockstart = round_down(pos, root->sectorsize); | 1423 | lockstart = round_down(pos, root->sectorsize); |
1400 | lockend = lockstart + round_up(*write_bytes, root->sectorsize) - 1; | 1424 | lockend = round_up(pos + *write_bytes, root->sectorsize) - 1; |
1401 | 1425 | ||
1402 | while (1) { | 1426 | while (1) { |
1403 | lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | 1427 | lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); |
@@ -1415,12 +1439,10 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos, | |||
1415 | ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL); | 1439 | ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL); |
1416 | if (ret <= 0) { | 1440 | if (ret <= 0) { |
1417 | ret = 0; | 1441 | ret = 0; |
1442 | btrfs_end_nocow_write(root); | ||
1418 | } else { | 1443 | } else { |
1419 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | 1444 | *write_bytes = min_t(size_t, *write_bytes , |
1420 | EXTENT_DIRTY | EXTENT_DELALLOC | | 1445 | num_bytes - pos + lockstart); |
1421 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, | ||
1422 | NULL, GFP_NOFS); | ||
1423 | *write_bytes = min_t(size_t, *write_bytes, num_bytes); | ||
1424 | } | 1446 | } |
1425 | 1447 | ||
1426 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | 1448 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); |
@@ -1510,6 +1532,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1510 | if (!only_release_metadata) | 1532 | if (!only_release_metadata) |
1511 | btrfs_free_reserved_data_space(inode, | 1533 | btrfs_free_reserved_data_space(inode, |
1512 | reserve_bytes); | 1534 | reserve_bytes); |
1535 | else | ||
1536 | btrfs_end_nocow_write(root); | ||
1513 | break; | 1537 | break; |
1514 | } | 1538 | } |
1515 | 1539 | ||
@@ -1598,6 +1622,9 @@ again: | |||
1598 | } | 1622 | } |
1599 | 1623 | ||
1600 | release_bytes = 0; | 1624 | release_bytes = 0; |
1625 | if (only_release_metadata) | ||
1626 | btrfs_end_nocow_write(root); | ||
1627 | |||
1601 | if (only_release_metadata && copied > 0) { | 1628 | if (only_release_metadata && copied > 0) { |
1602 | u64 lockstart = round_down(pos, root->sectorsize); | 1629 | u64 lockstart = round_down(pos, root->sectorsize); |
1603 | u64 lockend = lockstart + | 1630 | u64 lockend = lockstart + |
@@ -1624,10 +1651,12 @@ again: | |||
1624 | kfree(pages); | 1651 | kfree(pages); |
1625 | 1652 | ||
1626 | if (release_bytes) { | 1653 | if (release_bytes) { |
1627 | if (only_release_metadata) | 1654 | if (only_release_metadata) { |
1655 | btrfs_end_nocow_write(root); | ||
1628 | btrfs_delalloc_release_metadata(inode, release_bytes); | 1656 | btrfs_delalloc_release_metadata(inode, release_bytes); |
1629 | else | 1657 | } else { |
1630 | btrfs_delalloc_release_space(inode, release_bytes); | 1658 | btrfs_delalloc_release_space(inode, release_bytes); |
1659 | } | ||
1631 | } | 1660 | } |
1632 | 1661 | ||
1633 | return num_written ? num_written : ret; | 1662 | return num_written ? num_written : ret; |
@@ -1797,7 +1826,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1797 | BTRFS_I(inode)->last_sub_trans = root->log_transid; | 1826 | BTRFS_I(inode)->last_sub_trans = root->log_transid; |
1798 | if (num_written > 0) { | 1827 | if (num_written > 0) { |
1799 | err = generic_write_sync(file, pos, num_written); | 1828 | err = generic_write_sync(file, pos, num_written); |
1800 | if (err < 0 && num_written > 0) | 1829 | if (err < 0) |
1801 | num_written = err; | 1830 | num_written = err; |
1802 | } | 1831 | } |
1803 | 1832 | ||
@@ -1856,8 +1885,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1856 | struct dentry *dentry = file->f_path.dentry; | 1885 | struct dentry *dentry = file->f_path.dentry; |
1857 | struct inode *inode = dentry->d_inode; | 1886 | struct inode *inode = dentry->d_inode; |
1858 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1887 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1859 | int ret = 0; | ||
1860 | struct btrfs_trans_handle *trans; | 1888 | struct btrfs_trans_handle *trans; |
1889 | struct btrfs_log_ctx ctx; | ||
1890 | int ret = 0; | ||
1861 | bool full_sync = 0; | 1891 | bool full_sync = 0; |
1862 | 1892 | ||
1863 | trace_btrfs_sync_file(file, datasync); | 1893 | trace_btrfs_sync_file(file, datasync); |
@@ -1951,7 +1981,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1951 | } | 1981 | } |
1952 | trans->sync = true; | 1982 | trans->sync = true; |
1953 | 1983 | ||
1954 | ret = btrfs_log_dentry_safe(trans, root, dentry); | 1984 | btrfs_init_log_ctx(&ctx); |
1985 | |||
1986 | ret = btrfs_log_dentry_safe(trans, root, dentry, &ctx); | ||
1955 | if (ret < 0) { | 1987 | if (ret < 0) { |
1956 | /* Fallthrough and commit/free transaction. */ | 1988 | /* Fallthrough and commit/free transaction. */ |
1957 | ret = 1; | 1989 | ret = 1; |
@@ -1971,7 +2003,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1971 | 2003 | ||
1972 | if (ret != BTRFS_NO_LOG_SYNC) { | 2004 | if (ret != BTRFS_NO_LOG_SYNC) { |
1973 | if (!ret) { | 2005 | if (!ret) { |
1974 | ret = btrfs_sync_log(trans, root); | 2006 | ret = btrfs_sync_log(trans, root, &ctx); |
1975 | if (!ret) { | 2007 | if (!ret) { |
1976 | ret = btrfs_end_transaction(trans, root); | 2008 | ret = btrfs_end_transaction(trans, root); |
1977 | goto out; | 2009 | goto out; |
@@ -2157,6 +2189,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2157 | bool same_page = ((offset >> PAGE_CACHE_SHIFT) == | 2189 | bool same_page = ((offset >> PAGE_CACHE_SHIFT) == |
2158 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); | 2190 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); |
2159 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); | 2191 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); |
2192 | u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); | ||
2160 | 2193 | ||
2161 | ret = btrfs_wait_ordered_range(inode, offset, len); | 2194 | ret = btrfs_wait_ordered_range(inode, offset, len); |
2162 | if (ret) | 2195 | if (ret) |
@@ -2172,14 +2205,14 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2172 | * entire page. | 2205 | * entire page. |
2173 | */ | 2206 | */ |
2174 | if (same_page && len < PAGE_CACHE_SIZE) { | 2207 | if (same_page && len < PAGE_CACHE_SIZE) { |
2175 | if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) | 2208 | if (offset < ino_size) |
2176 | ret = btrfs_truncate_page(inode, offset, len, 0); | 2209 | ret = btrfs_truncate_page(inode, offset, len, 0); |
2177 | mutex_unlock(&inode->i_mutex); | 2210 | mutex_unlock(&inode->i_mutex); |
2178 | return ret; | 2211 | return ret; |
2179 | } | 2212 | } |
2180 | 2213 | ||
2181 | /* zero back part of the first page */ | 2214 | /* zero back part of the first page */ |
2182 | if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) { | 2215 | if (offset < ino_size) { |
2183 | ret = btrfs_truncate_page(inode, offset, 0, 0); | 2216 | ret = btrfs_truncate_page(inode, offset, 0, 0); |
2184 | if (ret) { | 2217 | if (ret) { |
2185 | mutex_unlock(&inode->i_mutex); | 2218 | mutex_unlock(&inode->i_mutex); |
@@ -2188,7 +2221,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2188 | } | 2221 | } |
2189 | 2222 | ||
2190 | /* zero the front end of the last page */ | 2223 | /* zero the front end of the last page */ |
2191 | if (offset + len < round_up(inode->i_size, PAGE_CACHE_SIZE)) { | 2224 | if (offset + len < ino_size) { |
2192 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); | 2225 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); |
2193 | if (ret) { | 2226 | if (ret) { |
2194 | mutex_unlock(&inode->i_mutex); | 2227 | mutex_unlock(&inode->i_mutex); |
@@ -2277,10 +2310,13 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2277 | 2310 | ||
2278 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 2311 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
2279 | 2312 | ||
2280 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | 2313 | if (cur_offset < ino_size) { |
2281 | if (ret) { | 2314 | ret = fill_holes(trans, inode, path, cur_offset, |
2282 | err = ret; | 2315 | drop_end); |
2283 | break; | 2316 | if (ret) { |
2317 | err = ret; | ||
2318 | break; | ||
2319 | } | ||
2284 | } | 2320 | } |
2285 | 2321 | ||
2286 | cur_offset = drop_end; | 2322 | cur_offset = drop_end; |
@@ -2313,10 +2349,12 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2313 | } | 2349 | } |
2314 | 2350 | ||
2315 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 2351 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
2316 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | 2352 | if (cur_offset < ino_size) { |
2317 | if (ret) { | 2353 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); |
2318 | err = ret; | 2354 | if (ret) { |
2319 | goto out_trans; | 2355 | err = ret; |
2356 | goto out_trans; | ||
2357 | } | ||
2320 | } | 2358 | } |
2321 | 2359 | ||
2322 | out_trans: | 2360 | out_trans: |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d3d44486290b..06e9a4152b14 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -864,7 +864,8 @@ static noinline int cow_file_range(struct inode *inode, | |||
864 | 864 | ||
865 | if (btrfs_is_free_space_inode(inode)) { | 865 | if (btrfs_is_free_space_inode(inode)) { |
866 | WARN_ON_ONCE(1); | 866 | WARN_ON_ONCE(1); |
867 | return -EINVAL; | 867 | ret = -EINVAL; |
868 | goto out_unlock; | ||
868 | } | 869 | } |
869 | 870 | ||
870 | num_bytes = ALIGN(end - start + 1, blocksize); | 871 | num_bytes = ALIGN(end - start + 1, blocksize); |
@@ -1075,17 +1076,15 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
1075 | async_cow->end = cur_end; | 1076 | async_cow->end = cur_end; |
1076 | INIT_LIST_HEAD(&async_cow->extents); | 1077 | INIT_LIST_HEAD(&async_cow->extents); |
1077 | 1078 | ||
1078 | async_cow->work.func = async_cow_start; | 1079 | btrfs_init_work(&async_cow->work, async_cow_start, |
1079 | async_cow->work.ordered_func = async_cow_submit; | 1080 | async_cow_submit, async_cow_free); |
1080 | async_cow->work.ordered_free = async_cow_free; | ||
1081 | async_cow->work.flags = 0; | ||
1082 | 1081 | ||
1083 | nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> | 1082 | nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> |
1084 | PAGE_CACHE_SHIFT; | 1083 | PAGE_CACHE_SHIFT; |
1085 | atomic_add(nr_pages, &root->fs_info->async_delalloc_pages); | 1084 | atomic_add(nr_pages, &root->fs_info->async_delalloc_pages); |
1086 | 1085 | ||
1087 | btrfs_queue_worker(&root->fs_info->delalloc_workers, | 1086 | btrfs_queue_work(root->fs_info->delalloc_workers, |
1088 | &async_cow->work); | 1087 | &async_cow->work); |
1089 | 1088 | ||
1090 | if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) { | 1089 | if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) { |
1091 | wait_event(root->fs_info->async_submit_wait, | 1090 | wait_event(root->fs_info->async_submit_wait, |
@@ -1843,9 +1842,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) | |||
1843 | 1842 | ||
1844 | SetPageChecked(page); | 1843 | SetPageChecked(page); |
1845 | page_cache_get(page); | 1844 | page_cache_get(page); |
1846 | fixup->work.func = btrfs_writepage_fixup_worker; | 1845 | btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); |
1847 | fixup->page = page; | 1846 | fixup->page = page; |
1848 | btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work); | 1847 | btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work); |
1849 | return -EBUSY; | 1848 | return -EBUSY; |
1850 | } | 1849 | } |
1851 | 1850 | ||
@@ -2239,6 +2238,11 @@ static noinline int relink_extent_backref(struct btrfs_path *path, | |||
2239 | return PTR_ERR(root); | 2238 | return PTR_ERR(root); |
2240 | } | 2239 | } |
2241 | 2240 | ||
2241 | if (btrfs_root_readonly(root)) { | ||
2242 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
2243 | return 0; | ||
2244 | } | ||
2245 | |||
2242 | /* step 2: get inode */ | 2246 | /* step 2: get inode */ |
2243 | key.objectid = backref->inum; | 2247 | key.objectid = backref->inum; |
2244 | key.type = BTRFS_INODE_ITEM_KEY; | 2248 | key.type = BTRFS_INODE_ITEM_KEY; |
@@ -2759,7 +2763,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
2759 | struct inode *inode = page->mapping->host; | 2763 | struct inode *inode = page->mapping->host; |
2760 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2764 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2761 | struct btrfs_ordered_extent *ordered_extent = NULL; | 2765 | struct btrfs_ordered_extent *ordered_extent = NULL; |
2762 | struct btrfs_workers *workers; | 2766 | struct btrfs_workqueue *workers; |
2763 | 2767 | ||
2764 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); | 2768 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); |
2765 | 2769 | ||
@@ -2768,14 +2772,13 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
2768 | end - start + 1, uptodate)) | 2772 | end - start + 1, uptodate)) |
2769 | return 0; | 2773 | return 0; |
2770 | 2774 | ||
2771 | ordered_extent->work.func = finish_ordered_fn; | 2775 | btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL); |
2772 | ordered_extent->work.flags = 0; | ||
2773 | 2776 | ||
2774 | if (btrfs_is_free_space_inode(inode)) | 2777 | if (btrfs_is_free_space_inode(inode)) |
2775 | workers = &root->fs_info->endio_freespace_worker; | 2778 | workers = root->fs_info->endio_freespace_worker; |
2776 | else | 2779 | else |
2777 | workers = &root->fs_info->endio_write_workers; | 2780 | workers = root->fs_info->endio_write_workers; |
2778 | btrfs_queue_worker(workers, &ordered_extent->work); | 2781 | btrfs_queue_work(workers, &ordered_extent->work); |
2779 | 2782 | ||
2780 | return 0; | 2783 | return 0; |
2781 | } | 2784 | } |
@@ -4593,7 +4596,7 @@ static void evict_inode_truncate_pages(struct inode *inode) | |||
4593 | struct rb_node *node; | 4596 | struct rb_node *node; |
4594 | 4597 | ||
4595 | ASSERT(inode->i_state & I_FREEING); | 4598 | ASSERT(inode->i_state & I_FREEING); |
4596 | truncate_inode_pages(&inode->i_data, 0); | 4599 | truncate_inode_pages_final(&inode->i_data); |
4597 | 4600 | ||
4598 | write_lock(&map_tree->lock); | 4601 | write_lock(&map_tree->lock); |
4599 | while (!RB_EMPTY_ROOT(&map_tree->map)) { | 4602 | while (!RB_EMPTY_ROOT(&map_tree->map)) { |
@@ -4924,7 +4927,8 @@ void btrfs_invalidate_inodes(struct btrfs_root *root) | |||
4924 | struct inode *inode; | 4927 | struct inode *inode; |
4925 | u64 objectid = 0; | 4928 | u64 objectid = 0; |
4926 | 4929 | ||
4927 | WARN_ON(btrfs_root_refs(&root->root_item) != 0); | 4930 | if (!test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) |
4931 | WARN_ON(btrfs_root_refs(&root->root_item) != 0); | ||
4928 | 4932 | ||
4929 | spin_lock(&root->inode_lock); | 4933 | spin_lock(&root->inode_lock); |
4930 | again: | 4934 | again: |
@@ -5799,6 +5803,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
5799 | } | 5803 | } |
5800 | out_unlock: | 5804 | out_unlock: |
5801 | btrfs_end_transaction(trans, root); | 5805 | btrfs_end_transaction(trans, root); |
5806 | btrfs_balance_delayed_items(root); | ||
5802 | btrfs_btree_balance_dirty(root); | 5807 | btrfs_btree_balance_dirty(root); |
5803 | if (drop_inode) { | 5808 | if (drop_inode) { |
5804 | inode_dec_link_count(inode); | 5809 | inode_dec_link_count(inode); |
@@ -5872,6 +5877,7 @@ out_unlock: | |||
5872 | inode_dec_link_count(inode); | 5877 | inode_dec_link_count(inode); |
5873 | iput(inode); | 5878 | iput(inode); |
5874 | } | 5879 | } |
5880 | btrfs_balance_delayed_items(root); | ||
5875 | btrfs_btree_balance_dirty(root); | 5881 | btrfs_btree_balance_dirty(root); |
5876 | return err; | 5882 | return err; |
5877 | } | 5883 | } |
@@ -5930,6 +5936,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
5930 | } | 5936 | } |
5931 | 5937 | ||
5932 | btrfs_end_transaction(trans, root); | 5938 | btrfs_end_transaction(trans, root); |
5939 | btrfs_balance_delayed_items(root); | ||
5933 | fail: | 5940 | fail: |
5934 | if (drop_inode) { | 5941 | if (drop_inode) { |
5935 | inode_dec_link_count(inode); | 5942 | inode_dec_link_count(inode); |
@@ -5996,6 +6003,7 @@ out_fail: | |||
5996 | btrfs_end_transaction(trans, root); | 6003 | btrfs_end_transaction(trans, root); |
5997 | if (drop_on_err) | 6004 | if (drop_on_err) |
5998 | iput(inode); | 6005 | iput(inode); |
6006 | btrfs_balance_delayed_items(root); | ||
5999 | btrfs_btree_balance_dirty(root); | 6007 | btrfs_btree_balance_dirty(root); |
6000 | return err; | 6008 | return err; |
6001 | } | 6009 | } |
@@ -6550,6 +6558,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, | |||
6550 | int ret; | 6558 | int ret; |
6551 | struct extent_buffer *leaf; | 6559 | struct extent_buffer *leaf; |
6552 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6560 | struct btrfs_root *root = BTRFS_I(inode)->root; |
6561 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
6553 | struct btrfs_file_extent_item *fi; | 6562 | struct btrfs_file_extent_item *fi; |
6554 | struct btrfs_key key; | 6563 | struct btrfs_key key; |
6555 | u64 disk_bytenr; | 6564 | u64 disk_bytenr; |
@@ -6626,6 +6635,20 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, | |||
6626 | 6635 | ||
6627 | if (btrfs_extent_readonly(root, disk_bytenr)) | 6636 | if (btrfs_extent_readonly(root, disk_bytenr)) |
6628 | goto out; | 6637 | goto out; |
6638 | |||
6639 | num_bytes = min(offset + *len, extent_end) - offset; | ||
6640 | if (!nocow && found_type == BTRFS_FILE_EXTENT_PREALLOC) { | ||
6641 | u64 range_end; | ||
6642 | |||
6643 | range_end = round_up(offset + num_bytes, root->sectorsize) - 1; | ||
6644 | ret = test_range_bit(io_tree, offset, range_end, | ||
6645 | EXTENT_DELALLOC, 0, NULL); | ||
6646 | if (ret) { | ||
6647 | ret = -EAGAIN; | ||
6648 | goto out; | ||
6649 | } | ||
6650 | } | ||
6651 | |||
6629 | btrfs_release_path(path); | 6652 | btrfs_release_path(path); |
6630 | 6653 | ||
6631 | /* | 6654 | /* |
@@ -6654,7 +6677,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, | |||
6654 | */ | 6677 | */ |
6655 | disk_bytenr += backref_offset; | 6678 | disk_bytenr += backref_offset; |
6656 | disk_bytenr += offset - key.offset; | 6679 | disk_bytenr += offset - key.offset; |
6657 | num_bytes = min(offset + *len, extent_end) - offset; | ||
6658 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) | 6680 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) |
6659 | goto out; | 6681 | goto out; |
6660 | /* | 6682 | /* |
@@ -7024,10 +7046,9 @@ again: | |||
7024 | if (!ret) | 7046 | if (!ret) |
7025 | goto out_test; | 7047 | goto out_test; |
7026 | 7048 | ||
7027 | ordered->work.func = finish_ordered_fn; | 7049 | btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL); |
7028 | ordered->work.flags = 0; | 7050 | btrfs_queue_work(root->fs_info->endio_write_workers, |
7029 | btrfs_queue_worker(&root->fs_info->endio_write_workers, | 7051 | &ordered->work); |
7030 | &ordered->work); | ||
7031 | out_test: | 7052 | out_test: |
7032 | /* | 7053 | /* |
7033 | * our bio might span multiple ordered extents. If we haven't | 7054 | * our bio might span multiple ordered extents. If we haven't |
@@ -7404,15 +7425,15 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
7404 | smp_mb__after_atomic_inc(); | 7425 | smp_mb__after_atomic_inc(); |
7405 | 7426 | ||
7406 | /* | 7427 | /* |
7407 | * The generic stuff only does filemap_write_and_wait_range, which isn't | 7428 | * The generic stuff only does filemap_write_and_wait_range, which |
7408 | * enough if we've written compressed pages to this area, so we need to | 7429 | * isn't enough if we've written compressed pages to this area, so |
7409 | * call btrfs_wait_ordered_range to make absolutely sure that any | 7430 | * we need to flush the dirty pages again to make absolutely sure |
7410 | * outstanding dirty pages are on disk. | 7431 | * that any outstanding dirty pages are on disk. |
7411 | */ | 7432 | */ |
7412 | count = iov_length(iov, nr_segs); | 7433 | count = iov_length(iov, nr_segs); |
7413 | ret = btrfs_wait_ordered_range(inode, offset, count); | 7434 | if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, |
7414 | if (ret) | 7435 | &BTRFS_I(inode)->runtime_flags)) |
7415 | return ret; | 7436 | filemap_fdatawrite_range(inode->i_mapping, offset, count); |
7416 | 7437 | ||
7417 | if (rw & WRITE) { | 7438 | if (rw & WRITE) { |
7418 | /* | 7439 | /* |
@@ -8404,7 +8425,7 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, | |||
8404 | work->inode = inode; | 8425 | work->inode = inode; |
8405 | work->wait = wait; | 8426 | work->wait = wait; |
8406 | work->delay_iput = delay_iput; | 8427 | work->delay_iput = delay_iput; |
8407 | work->work.func = btrfs_run_delalloc_work; | 8428 | btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL); |
8408 | 8429 | ||
8409 | return work; | 8430 | return work; |
8410 | } | 8431 | } |
@@ -8419,7 +8440,8 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work) | |||
8419 | * some fairly slow code that needs optimization. This walks the list | 8440 | * some fairly slow code that needs optimization. This walks the list |
8420 | * of all the inodes with pending delalloc and forces them to disk. | 8441 | * of all the inodes with pending delalloc and forces them to disk. |
8421 | */ | 8442 | */ |
8422 | static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | 8443 | static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput, |
8444 | int nr) | ||
8423 | { | 8445 | { |
8424 | struct btrfs_inode *binode; | 8446 | struct btrfs_inode *binode; |
8425 | struct inode *inode; | 8447 | struct inode *inode; |
@@ -8431,6 +8453,7 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
8431 | INIT_LIST_HEAD(&works); | 8453 | INIT_LIST_HEAD(&works); |
8432 | INIT_LIST_HEAD(&splice); | 8454 | INIT_LIST_HEAD(&splice); |
8433 | 8455 | ||
8456 | mutex_lock(&root->delalloc_mutex); | ||
8434 | spin_lock(&root->delalloc_lock); | 8457 | spin_lock(&root->delalloc_lock); |
8435 | list_splice_init(&root->delalloc_inodes, &splice); | 8458 | list_splice_init(&root->delalloc_inodes, &splice); |
8436 | while (!list_empty(&splice)) { | 8459 | while (!list_empty(&splice)) { |
@@ -8453,12 +8476,14 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
8453 | else | 8476 | else |
8454 | iput(inode); | 8477 | iput(inode); |
8455 | ret = -ENOMEM; | 8478 | ret = -ENOMEM; |
8456 | goto out; | 8479 | break; |
8457 | } | 8480 | } |
8458 | list_add_tail(&work->list, &works); | 8481 | list_add_tail(&work->list, &works); |
8459 | btrfs_queue_worker(&root->fs_info->flush_workers, | 8482 | btrfs_queue_work(root->fs_info->flush_workers, |
8460 | &work->work); | 8483 | &work->work); |
8461 | 8484 | ret++; | |
8485 | if (nr != -1 && ret >= nr) | ||
8486 | break; | ||
8462 | cond_resched(); | 8487 | cond_resched(); |
8463 | spin_lock(&root->delalloc_lock); | 8488 | spin_lock(&root->delalloc_lock); |
8464 | } | 8489 | } |
@@ -8468,18 +8493,13 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
8468 | list_del_init(&work->list); | 8493 | list_del_init(&work->list); |
8469 | btrfs_wait_and_free_delalloc_work(work); | 8494 | btrfs_wait_and_free_delalloc_work(work); |
8470 | } | 8495 | } |
8471 | return 0; | ||
8472 | out: | ||
8473 | list_for_each_entry_safe(work, next, &works, list) { | ||
8474 | list_del_init(&work->list); | ||
8475 | btrfs_wait_and_free_delalloc_work(work); | ||
8476 | } | ||
8477 | 8496 | ||
8478 | if (!list_empty_careful(&splice)) { | 8497 | if (!list_empty_careful(&splice)) { |
8479 | spin_lock(&root->delalloc_lock); | 8498 | spin_lock(&root->delalloc_lock); |
8480 | list_splice_tail(&splice, &root->delalloc_inodes); | 8499 | list_splice_tail(&splice, &root->delalloc_inodes); |
8481 | spin_unlock(&root->delalloc_lock); | 8500 | spin_unlock(&root->delalloc_lock); |
8482 | } | 8501 | } |
8502 | mutex_unlock(&root->delalloc_mutex); | ||
8483 | return ret; | 8503 | return ret; |
8484 | } | 8504 | } |
8485 | 8505 | ||
@@ -8490,7 +8510,9 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
8490 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) | 8510 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) |
8491 | return -EROFS; | 8511 | return -EROFS; |
8492 | 8512 | ||
8493 | ret = __start_delalloc_inodes(root, delay_iput); | 8513 | ret = __start_delalloc_inodes(root, delay_iput, -1); |
8514 | if (ret > 0) | ||
8515 | ret = 0; | ||
8494 | /* | 8516 | /* |
8495 | * the filemap_flush will queue IO into the worker threads, but | 8517 | * the filemap_flush will queue IO into the worker threads, but |
8496 | * we have to make sure the IO is actually started and that | 8518 | * we have to make sure the IO is actually started and that |
@@ -8507,7 +8529,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
8507 | return ret; | 8529 | return ret; |
8508 | } | 8530 | } |
8509 | 8531 | ||
8510 | int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) | 8532 | int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput, |
8533 | int nr) | ||
8511 | { | 8534 | { |
8512 | struct btrfs_root *root; | 8535 | struct btrfs_root *root; |
8513 | struct list_head splice; | 8536 | struct list_head splice; |
@@ -8518,9 +8541,10 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) | |||
8518 | 8541 | ||
8519 | INIT_LIST_HEAD(&splice); | 8542 | INIT_LIST_HEAD(&splice); |
8520 | 8543 | ||
8544 | mutex_lock(&fs_info->delalloc_root_mutex); | ||
8521 | spin_lock(&fs_info->delalloc_root_lock); | 8545 | spin_lock(&fs_info->delalloc_root_lock); |
8522 | list_splice_init(&fs_info->delalloc_roots, &splice); | 8546 | list_splice_init(&fs_info->delalloc_roots, &splice); |
8523 | while (!list_empty(&splice)) { | 8547 | while (!list_empty(&splice) && nr) { |
8524 | root = list_first_entry(&splice, struct btrfs_root, | 8548 | root = list_first_entry(&splice, struct btrfs_root, |
8525 | delalloc_root); | 8549 | delalloc_root); |
8526 | root = btrfs_grab_fs_root(root); | 8550 | root = btrfs_grab_fs_root(root); |
@@ -8529,15 +8553,20 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) | |||
8529 | &fs_info->delalloc_roots); | 8553 | &fs_info->delalloc_roots); |
8530 | spin_unlock(&fs_info->delalloc_root_lock); | 8554 | spin_unlock(&fs_info->delalloc_root_lock); |
8531 | 8555 | ||
8532 | ret = __start_delalloc_inodes(root, delay_iput); | 8556 | ret = __start_delalloc_inodes(root, delay_iput, nr); |
8533 | btrfs_put_fs_root(root); | 8557 | btrfs_put_fs_root(root); |
8534 | if (ret) | 8558 | if (ret < 0) |
8535 | goto out; | 8559 | goto out; |
8536 | 8560 | ||
8561 | if (nr != -1) { | ||
8562 | nr -= ret; | ||
8563 | WARN_ON(nr < 0); | ||
8564 | } | ||
8537 | spin_lock(&fs_info->delalloc_root_lock); | 8565 | spin_lock(&fs_info->delalloc_root_lock); |
8538 | } | 8566 | } |
8539 | spin_unlock(&fs_info->delalloc_root_lock); | 8567 | spin_unlock(&fs_info->delalloc_root_lock); |
8540 | 8568 | ||
8569 | ret = 0; | ||
8541 | atomic_inc(&fs_info->async_submit_draining); | 8570 | atomic_inc(&fs_info->async_submit_draining); |
8542 | while (atomic_read(&fs_info->nr_async_submits) || | 8571 | while (atomic_read(&fs_info->nr_async_submits) || |
8543 | atomic_read(&fs_info->async_delalloc_pages)) { | 8572 | atomic_read(&fs_info->async_delalloc_pages)) { |
@@ -8546,13 +8575,13 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) | |||
8546 | atomic_read(&fs_info->async_delalloc_pages) == 0)); | 8575 | atomic_read(&fs_info->async_delalloc_pages) == 0)); |
8547 | } | 8576 | } |
8548 | atomic_dec(&fs_info->async_submit_draining); | 8577 | atomic_dec(&fs_info->async_submit_draining); |
8549 | return 0; | ||
8550 | out: | 8578 | out: |
8551 | if (!list_empty_careful(&splice)) { | 8579 | if (!list_empty_careful(&splice)) { |
8552 | spin_lock(&fs_info->delalloc_root_lock); | 8580 | spin_lock(&fs_info->delalloc_root_lock); |
8553 | list_splice_tail(&splice, &fs_info->delalloc_roots); | 8581 | list_splice_tail(&splice, &fs_info->delalloc_roots); |
8554 | spin_unlock(&fs_info->delalloc_root_lock); | 8582 | spin_unlock(&fs_info->delalloc_root_lock); |
8555 | } | 8583 | } |
8584 | mutex_unlock(&fs_info->delalloc_root_mutex); | ||
8556 | return ret; | 8585 | return ret; |
8557 | } | 8586 | } |
8558 | 8587 | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a6d8efa46bfe..0401397b5c92 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -59,6 +59,32 @@ | |||
59 | #include "props.h" | 59 | #include "props.h" |
60 | #include "sysfs.h" | 60 | #include "sysfs.h" |
61 | 61 | ||
62 | #ifdef CONFIG_64BIT | ||
63 | /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI | ||
64 | * structures are incorrect, as the timespec structure from userspace | ||
65 | * is 4 bytes too small. We define these alternatives here to teach | ||
66 | * the kernel about the 32-bit struct packing. | ||
67 | */ | ||
68 | struct btrfs_ioctl_timespec_32 { | ||
69 | __u64 sec; | ||
70 | __u32 nsec; | ||
71 | } __attribute__ ((__packed__)); | ||
72 | |||
73 | struct btrfs_ioctl_received_subvol_args_32 { | ||
74 | char uuid[BTRFS_UUID_SIZE]; /* in */ | ||
75 | __u64 stransid; /* in */ | ||
76 | __u64 rtransid; /* out */ | ||
77 | struct btrfs_ioctl_timespec_32 stime; /* in */ | ||
78 | struct btrfs_ioctl_timespec_32 rtime; /* out */ | ||
79 | __u64 flags; /* in */ | ||
80 | __u64 reserved[16]; /* in */ | ||
81 | } __attribute__ ((__packed__)); | ||
82 | |||
83 | #define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \ | ||
84 | struct btrfs_ioctl_received_subvol_args_32) | ||
85 | #endif | ||
86 | |||
87 | |||
62 | static int btrfs_clone(struct inode *src, struct inode *inode, | 88 | static int btrfs_clone(struct inode *src, struct inode *inode, |
63 | u64 off, u64 olen, u64 olen_aligned, u64 destoff); | 89 | u64 off, u64 olen, u64 olen_aligned, u64 destoff); |
64 | 90 | ||
@@ -585,6 +611,23 @@ fail: | |||
585 | return ret; | 611 | return ret; |
586 | } | 612 | } |
587 | 613 | ||
614 | static void btrfs_wait_nocow_write(struct btrfs_root *root) | ||
615 | { | ||
616 | s64 writers; | ||
617 | DEFINE_WAIT(wait); | ||
618 | |||
619 | do { | ||
620 | prepare_to_wait(&root->subv_writers->wait, &wait, | ||
621 | TASK_UNINTERRUPTIBLE); | ||
622 | |||
623 | writers = percpu_counter_sum(&root->subv_writers->counter); | ||
624 | if (writers) | ||
625 | schedule(); | ||
626 | |||
627 | finish_wait(&root->subv_writers->wait, &wait); | ||
628 | } while (writers); | ||
629 | } | ||
630 | |||
588 | static int create_snapshot(struct btrfs_root *root, struct inode *dir, | 631 | static int create_snapshot(struct btrfs_root *root, struct inode *dir, |
589 | struct dentry *dentry, char *name, int namelen, | 632 | struct dentry *dentry, char *name, int namelen, |
590 | u64 *async_transid, bool readonly, | 633 | u64 *async_transid, bool readonly, |
@@ -598,15 +641,21 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
598 | if (!root->ref_cows) | 641 | if (!root->ref_cows) |
599 | return -EINVAL; | 642 | return -EINVAL; |
600 | 643 | ||
644 | atomic_inc(&root->will_be_snapshoted); | ||
645 | smp_mb__after_atomic_inc(); | ||
646 | btrfs_wait_nocow_write(root); | ||
647 | |||
601 | ret = btrfs_start_delalloc_inodes(root, 0); | 648 | ret = btrfs_start_delalloc_inodes(root, 0); |
602 | if (ret) | 649 | if (ret) |
603 | return ret; | 650 | goto out; |
604 | 651 | ||
605 | btrfs_wait_ordered_extents(root, -1); | 652 | btrfs_wait_ordered_extents(root, -1); |
606 | 653 | ||
607 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); | 654 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); |
608 | if (!pending_snapshot) | 655 | if (!pending_snapshot) { |
609 | return -ENOMEM; | 656 | ret = -ENOMEM; |
657 | goto out; | ||
658 | } | ||
610 | 659 | ||
611 | btrfs_init_block_rsv(&pending_snapshot->block_rsv, | 660 | btrfs_init_block_rsv(&pending_snapshot->block_rsv, |
612 | BTRFS_BLOCK_RSV_TEMP); | 661 | BTRFS_BLOCK_RSV_TEMP); |
@@ -623,7 +672,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
623 | &pending_snapshot->qgroup_reserved, | 672 | &pending_snapshot->qgroup_reserved, |
624 | false); | 673 | false); |
625 | if (ret) | 674 | if (ret) |
626 | goto out; | 675 | goto free; |
627 | 676 | ||
628 | pending_snapshot->dentry = dentry; | 677 | pending_snapshot->dentry = dentry; |
629 | pending_snapshot->root = root; | 678 | pending_snapshot->root = root; |
@@ -674,8 +723,10 @@ fail: | |||
674 | btrfs_subvolume_release_metadata(BTRFS_I(dir)->root, | 723 | btrfs_subvolume_release_metadata(BTRFS_I(dir)->root, |
675 | &pending_snapshot->block_rsv, | 724 | &pending_snapshot->block_rsv, |
676 | pending_snapshot->qgroup_reserved); | 725 | pending_snapshot->qgroup_reserved); |
677 | out: | 726 | free: |
678 | kfree(pending_snapshot); | 727 | kfree(pending_snapshot); |
728 | out: | ||
729 | atomic_dec(&root->will_be_snapshoted); | ||
679 | return ret; | 730 | return ret; |
680 | } | 731 | } |
681 | 732 | ||
@@ -884,12 +935,14 @@ static int find_new_extents(struct btrfs_root *root, | |||
884 | min_key.type = BTRFS_EXTENT_DATA_KEY; | 935 | min_key.type = BTRFS_EXTENT_DATA_KEY; |
885 | min_key.offset = *off; | 936 | min_key.offset = *off; |
886 | 937 | ||
887 | path->keep_locks = 1; | ||
888 | |||
889 | while (1) { | 938 | while (1) { |
939 | path->keep_locks = 1; | ||
890 | ret = btrfs_search_forward(root, &min_key, path, newer_than); | 940 | ret = btrfs_search_forward(root, &min_key, path, newer_than); |
891 | if (ret != 0) | 941 | if (ret != 0) |
892 | goto none; | 942 | goto none; |
943 | path->keep_locks = 0; | ||
944 | btrfs_unlock_up_safe(path, 1); | ||
945 | process_slot: | ||
893 | if (min_key.objectid != ino) | 946 | if (min_key.objectid != ino) |
894 | goto none; | 947 | goto none; |
895 | if (min_key.type != BTRFS_EXTENT_DATA_KEY) | 948 | if (min_key.type != BTRFS_EXTENT_DATA_KEY) |
@@ -908,6 +961,12 @@ static int find_new_extents(struct btrfs_root *root, | |||
908 | return 0; | 961 | return 0; |
909 | } | 962 | } |
910 | 963 | ||
964 | path->slots[0]++; | ||
965 | if (path->slots[0] < btrfs_header_nritems(leaf)) { | ||
966 | btrfs_item_key_to_cpu(leaf, &min_key, path->slots[0]); | ||
967 | goto process_slot; | ||
968 | } | ||
969 | |||
911 | if (min_key.offset == (u64)-1) | 970 | if (min_key.offset == (u64)-1) |
912 | goto none; | 971 | goto none; |
913 | 972 | ||
@@ -935,10 +994,13 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) | |||
935 | read_unlock(&em_tree->lock); | 994 | read_unlock(&em_tree->lock); |
936 | 995 | ||
937 | if (!em) { | 996 | if (!em) { |
997 | struct extent_state *cached = NULL; | ||
998 | u64 end = start + len - 1; | ||
999 | |||
938 | /* get the big lock and read metadata off disk */ | 1000 | /* get the big lock and read metadata off disk */ |
939 | lock_extent(io_tree, start, start + len - 1); | 1001 | lock_extent_bits(io_tree, start, end, 0, &cached); |
940 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | 1002 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); |
941 | unlock_extent(io_tree, start, start + len - 1); | 1003 | unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS); |
942 | 1004 | ||
943 | if (IS_ERR(em)) | 1005 | if (IS_ERR(em)) |
944 | return NULL; | 1006 | return NULL; |
@@ -957,7 +1019,8 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) | |||
957 | return false; | 1019 | return false; |
958 | 1020 | ||
959 | next = defrag_lookup_extent(inode, em->start + em->len); | 1021 | next = defrag_lookup_extent(inode, em->start + em->len); |
960 | if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) | 1022 | if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE || |
1023 | (em->block_start + em->block_len == next->block_start)) | ||
961 | ret = false; | 1024 | ret = false; |
962 | 1025 | ||
963 | free_extent_map(next); | 1026 | free_extent_map(next); |
@@ -1076,10 +1139,12 @@ again: | |||
1076 | page_start = page_offset(page); | 1139 | page_start = page_offset(page); |
1077 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 1140 | page_end = page_start + PAGE_CACHE_SIZE - 1; |
1078 | while (1) { | 1141 | while (1) { |
1079 | lock_extent(tree, page_start, page_end); | 1142 | lock_extent_bits(tree, page_start, page_end, |
1143 | 0, &cached_state); | ||
1080 | ordered = btrfs_lookup_ordered_extent(inode, | 1144 | ordered = btrfs_lookup_ordered_extent(inode, |
1081 | page_start); | 1145 | page_start); |
1082 | unlock_extent(tree, page_start, page_end); | 1146 | unlock_extent_cached(tree, page_start, page_end, |
1147 | &cached_state, GFP_NOFS); | ||
1083 | if (!ordered) | 1148 | if (!ordered) |
1084 | break; | 1149 | break; |
1085 | 1150 | ||
@@ -1356,8 +1421,12 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1356 | } | 1421 | } |
1357 | } | 1422 | } |
1358 | 1423 | ||
1359 | if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) | 1424 | if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) { |
1360 | filemap_flush(inode->i_mapping); | 1425 | filemap_flush(inode->i_mapping); |
1426 | if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, | ||
1427 | &BTRFS_I(inode)->runtime_flags)) | ||
1428 | filemap_flush(inode->i_mapping); | ||
1429 | } | ||
1361 | 1430 | ||
1362 | if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { | 1431 | if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { |
1363 | /* the filemap_flush will queue IO into the worker threads, but | 1432 | /* the filemap_flush will queue IO into the worker threads, but |
@@ -1573,7 +1642,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
1573 | if (src_inode->i_sb != file_inode(file)->i_sb) { | 1642 | if (src_inode->i_sb != file_inode(file)->i_sb) { |
1574 | btrfs_info(BTRFS_I(src_inode)->root->fs_info, | 1643 | btrfs_info(BTRFS_I(src_inode)->root->fs_info, |
1575 | "Snapshot src from another FS"); | 1644 | "Snapshot src from another FS"); |
1576 | ret = -EINVAL; | 1645 | ret = -EXDEV; |
1577 | } else if (!inode_owner_or_capable(src_inode)) { | 1646 | } else if (!inode_owner_or_capable(src_inode)) { |
1578 | /* | 1647 | /* |
1579 | * Subvolume creation is not restricted, but snapshots | 1648 | * Subvolume creation is not restricted, but snapshots |
@@ -1797,7 +1866,9 @@ static noinline int may_destroy_subvol(struct btrfs_root *root) | |||
1797 | if (di && !IS_ERR(di)) { | 1866 | if (di && !IS_ERR(di)) { |
1798 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); | 1867 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); |
1799 | if (key.objectid == root->root_key.objectid) { | 1868 | if (key.objectid == root->root_key.objectid) { |
1800 | ret = -ENOTEMPTY; | 1869 | ret = -EPERM; |
1870 | btrfs_err(root->fs_info, "deleting default subvolume " | ||
1871 | "%llu is not allowed", key.objectid); | ||
1801 | goto out; | 1872 | goto out; |
1802 | } | 1873 | } |
1803 | btrfs_release_path(path); | 1874 | btrfs_release_path(path); |
@@ -2994,8 +3065,9 @@ process_slot: | |||
2994 | new_key.offset + datal, | 3065 | new_key.offset + datal, |
2995 | 1); | 3066 | 1); |
2996 | if (ret) { | 3067 | if (ret) { |
2997 | btrfs_abort_transaction(trans, root, | 3068 | if (ret != -EINVAL) |
2998 | ret); | 3069 | btrfs_abort_transaction(trans, |
3070 | root, ret); | ||
2999 | btrfs_end_transaction(trans, root); | 3071 | btrfs_end_transaction(trans, root); |
3000 | goto out; | 3072 | goto out; |
3001 | } | 3073 | } |
@@ -3153,8 +3225,9 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
3153 | * decompress into destination's address_space (the file offset | 3225 | * decompress into destination's address_space (the file offset |
3154 | * may change, so source mapping won't do), then recompress (or | 3226 | * may change, so source mapping won't do), then recompress (or |
3155 | * otherwise reinsert) a subrange. | 3227 | * otherwise reinsert) a subrange. |
3156 | * - allow ranges within the same file to be cloned (provided | 3228 | * |
3157 | * they don't overlap)? | 3229 | * - split destination inode's inline extents. The inline extents can |
3230 | * be either compressed or non-compressed. | ||
3158 | */ | 3231 | */ |
3159 | 3232 | ||
3160 | /* the destination must be opened for writing */ | 3233 | /* the destination must be opened for writing */ |
@@ -4353,10 +4426,9 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) | |||
4353 | return btrfs_qgroup_wait_for_completion(root->fs_info); | 4426 | return btrfs_qgroup_wait_for_completion(root->fs_info); |
4354 | } | 4427 | } |
4355 | 4428 | ||
4356 | static long btrfs_ioctl_set_received_subvol(struct file *file, | 4429 | static long _btrfs_ioctl_set_received_subvol(struct file *file, |
4357 | void __user *arg) | 4430 | struct btrfs_ioctl_received_subvol_args *sa) |
4358 | { | 4431 | { |
4359 | struct btrfs_ioctl_received_subvol_args *sa = NULL; | ||
4360 | struct inode *inode = file_inode(file); | 4432 | struct inode *inode = file_inode(file); |
4361 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4433 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4362 | struct btrfs_root_item *root_item = &root->root_item; | 4434 | struct btrfs_root_item *root_item = &root->root_item; |
@@ -4384,13 +4456,6 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, | |||
4384 | goto out; | 4456 | goto out; |
4385 | } | 4457 | } |
4386 | 4458 | ||
4387 | sa = memdup_user(arg, sizeof(*sa)); | ||
4388 | if (IS_ERR(sa)) { | ||
4389 | ret = PTR_ERR(sa); | ||
4390 | sa = NULL; | ||
4391 | goto out; | ||
4392 | } | ||
4393 | |||
4394 | /* | 4459 | /* |
4395 | * 1 - root item | 4460 | * 1 - root item |
4396 | * 2 - uuid items (received uuid + subvol uuid) | 4461 | * 2 - uuid items (received uuid + subvol uuid) |
@@ -4444,14 +4509,91 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, | |||
4444 | goto out; | 4509 | goto out; |
4445 | } | 4510 | } |
4446 | 4511 | ||
4512 | out: | ||
4513 | up_write(&root->fs_info->subvol_sem); | ||
4514 | mnt_drop_write_file(file); | ||
4515 | return ret; | ||
4516 | } | ||
4517 | |||
4518 | #ifdef CONFIG_64BIT | ||
4519 | static long btrfs_ioctl_set_received_subvol_32(struct file *file, | ||
4520 | void __user *arg) | ||
4521 | { | ||
4522 | struct btrfs_ioctl_received_subvol_args_32 *args32 = NULL; | ||
4523 | struct btrfs_ioctl_received_subvol_args *args64 = NULL; | ||
4524 | int ret = 0; | ||
4525 | |||
4526 | args32 = memdup_user(arg, sizeof(*args32)); | ||
4527 | if (IS_ERR(args32)) { | ||
4528 | ret = PTR_ERR(args32); | ||
4529 | args32 = NULL; | ||
4530 | goto out; | ||
4531 | } | ||
4532 | |||
4533 | args64 = kmalloc(sizeof(*args64), GFP_NOFS); | ||
4534 | if (IS_ERR(args64)) { | ||
4535 | ret = PTR_ERR(args64); | ||
4536 | args64 = NULL; | ||
4537 | goto out; | ||
4538 | } | ||
4539 | |||
4540 | memcpy(args64->uuid, args32->uuid, BTRFS_UUID_SIZE); | ||
4541 | args64->stransid = args32->stransid; | ||
4542 | args64->rtransid = args32->rtransid; | ||
4543 | args64->stime.sec = args32->stime.sec; | ||
4544 | args64->stime.nsec = args32->stime.nsec; | ||
4545 | args64->rtime.sec = args32->rtime.sec; | ||
4546 | args64->rtime.nsec = args32->rtime.nsec; | ||
4547 | args64->flags = args32->flags; | ||
4548 | |||
4549 | ret = _btrfs_ioctl_set_received_subvol(file, args64); | ||
4550 | if (ret) | ||
4551 | goto out; | ||
4552 | |||
4553 | memcpy(args32->uuid, args64->uuid, BTRFS_UUID_SIZE); | ||
4554 | args32->stransid = args64->stransid; | ||
4555 | args32->rtransid = args64->rtransid; | ||
4556 | args32->stime.sec = args64->stime.sec; | ||
4557 | args32->stime.nsec = args64->stime.nsec; | ||
4558 | args32->rtime.sec = args64->rtime.sec; | ||
4559 | args32->rtime.nsec = args64->rtime.nsec; | ||
4560 | args32->flags = args64->flags; | ||
4561 | |||
4562 | ret = copy_to_user(arg, args32, sizeof(*args32)); | ||
4563 | if (ret) | ||
4564 | ret = -EFAULT; | ||
4565 | |||
4566 | out: | ||
4567 | kfree(args32); | ||
4568 | kfree(args64); | ||
4569 | return ret; | ||
4570 | } | ||
4571 | #endif | ||
4572 | |||
4573 | static long btrfs_ioctl_set_received_subvol(struct file *file, | ||
4574 | void __user *arg) | ||
4575 | { | ||
4576 | struct btrfs_ioctl_received_subvol_args *sa = NULL; | ||
4577 | int ret = 0; | ||
4578 | |||
4579 | sa = memdup_user(arg, sizeof(*sa)); | ||
4580 | if (IS_ERR(sa)) { | ||
4581 | ret = PTR_ERR(sa); | ||
4582 | sa = NULL; | ||
4583 | goto out; | ||
4584 | } | ||
4585 | |||
4586 | ret = _btrfs_ioctl_set_received_subvol(file, sa); | ||
4587 | |||
4588 | if (ret) | ||
4589 | goto out; | ||
4590 | |||
4447 | ret = copy_to_user(arg, sa, sizeof(*sa)); | 4591 | ret = copy_to_user(arg, sa, sizeof(*sa)); |
4448 | if (ret) | 4592 | if (ret) |
4449 | ret = -EFAULT; | 4593 | ret = -EFAULT; |
4450 | 4594 | ||
4451 | out: | 4595 | out: |
4452 | kfree(sa); | 4596 | kfree(sa); |
4453 | up_write(&root->fs_info->subvol_sem); | ||
4454 | mnt_drop_write_file(file); | ||
4455 | return ret; | 4597 | return ret; |
4456 | } | 4598 | } |
4457 | 4599 | ||
@@ -4746,7 +4888,7 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
4746 | case BTRFS_IOC_SYNC: { | 4888 | case BTRFS_IOC_SYNC: { |
4747 | int ret; | 4889 | int ret; |
4748 | 4890 | ||
4749 | ret = btrfs_start_delalloc_roots(root->fs_info, 0); | 4891 | ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1); |
4750 | if (ret) | 4892 | if (ret) |
4751 | return ret; | 4893 | return ret; |
4752 | ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); | 4894 | ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); |
@@ -4770,6 +4912,10 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
4770 | return btrfs_ioctl_balance_progress(root, argp); | 4912 | return btrfs_ioctl_balance_progress(root, argp); |
4771 | case BTRFS_IOC_SET_RECEIVED_SUBVOL: | 4913 | case BTRFS_IOC_SET_RECEIVED_SUBVOL: |
4772 | return btrfs_ioctl_set_received_subvol(file, argp); | 4914 | return btrfs_ioctl_set_received_subvol(file, argp); |
4915 | #ifdef CONFIG_64BIT | ||
4916 | case BTRFS_IOC_SET_RECEIVED_SUBVOL_32: | ||
4917 | return btrfs_ioctl_set_received_subvol_32(file, argp); | ||
4918 | #endif | ||
4773 | case BTRFS_IOC_SEND: | 4919 | case BTRFS_IOC_SEND: |
4774 | return btrfs_ioctl_send(file, argp); | 4920 | return btrfs_ioctl_send(file, argp); |
4775 | case BTRFS_IOC_GET_DEV_STATS: | 4921 | case BTRFS_IOC_GET_DEV_STATS: |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index b16450b840e7..a94b05f72869 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -349,10 +349,13 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode, | |||
349 | if (!uptodate) | 349 | if (!uptodate) |
350 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); | 350 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); |
351 | 351 | ||
352 | if (entry->bytes_left == 0) | 352 | if (entry->bytes_left == 0) { |
353 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 353 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
354 | else | 354 | if (waitqueue_active(&entry->wait)) |
355 | wake_up(&entry->wait); | ||
356 | } else { | ||
355 | ret = 1; | 357 | ret = 1; |
358 | } | ||
356 | out: | 359 | out: |
357 | if (!ret && cached && entry) { | 360 | if (!ret && cached && entry) { |
358 | *cached = entry; | 361 | *cached = entry; |
@@ -410,10 +413,13 @@ have_entry: | |||
410 | if (!uptodate) | 413 | if (!uptodate) |
411 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); | 414 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); |
412 | 415 | ||
413 | if (entry->bytes_left == 0) | 416 | if (entry->bytes_left == 0) { |
414 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 417 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
415 | else | 418 | if (waitqueue_active(&entry->wait)) |
419 | wake_up(&entry->wait); | ||
420 | } else { | ||
416 | ret = 1; | 421 | ret = 1; |
422 | } | ||
417 | out: | 423 | out: |
418 | if (!ret && cached && entry) { | 424 | if (!ret && cached && entry) { |
419 | *cached = entry; | 425 | *cached = entry; |
@@ -424,27 +430,48 @@ out: | |||
424 | } | 430 | } |
425 | 431 | ||
426 | /* Needs to either be called under a log transaction or the log_mutex */ | 432 | /* Needs to either be called under a log transaction or the log_mutex */ |
427 | void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode) | 433 | void btrfs_get_logged_extents(struct inode *inode, |
434 | struct list_head *logged_list) | ||
428 | { | 435 | { |
429 | struct btrfs_ordered_inode_tree *tree; | 436 | struct btrfs_ordered_inode_tree *tree; |
430 | struct btrfs_ordered_extent *ordered; | 437 | struct btrfs_ordered_extent *ordered; |
431 | struct rb_node *n; | 438 | struct rb_node *n; |
432 | int index = log->log_transid % 2; | ||
433 | 439 | ||
434 | tree = &BTRFS_I(inode)->ordered_tree; | 440 | tree = &BTRFS_I(inode)->ordered_tree; |
435 | spin_lock_irq(&tree->lock); | 441 | spin_lock_irq(&tree->lock); |
436 | for (n = rb_first(&tree->tree); n; n = rb_next(n)) { | 442 | for (n = rb_first(&tree->tree); n; n = rb_next(n)) { |
437 | ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); | 443 | ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); |
438 | spin_lock(&log->log_extents_lock[index]); | 444 | if (!list_empty(&ordered->log_list)) |
439 | if (list_empty(&ordered->log_list)) { | 445 | continue; |
440 | list_add_tail(&ordered->log_list, &log->logged_list[index]); | 446 | list_add_tail(&ordered->log_list, logged_list); |
441 | atomic_inc(&ordered->refs); | 447 | atomic_inc(&ordered->refs); |
442 | } | ||
443 | spin_unlock(&log->log_extents_lock[index]); | ||
444 | } | 448 | } |
445 | spin_unlock_irq(&tree->lock); | 449 | spin_unlock_irq(&tree->lock); |
446 | } | 450 | } |
447 | 451 | ||
452 | void btrfs_put_logged_extents(struct list_head *logged_list) | ||
453 | { | ||
454 | struct btrfs_ordered_extent *ordered; | ||
455 | |||
456 | while (!list_empty(logged_list)) { | ||
457 | ordered = list_first_entry(logged_list, | ||
458 | struct btrfs_ordered_extent, | ||
459 | log_list); | ||
460 | list_del_init(&ordered->log_list); | ||
461 | btrfs_put_ordered_extent(ordered); | ||
462 | } | ||
463 | } | ||
464 | |||
465 | void btrfs_submit_logged_extents(struct list_head *logged_list, | ||
466 | struct btrfs_root *log) | ||
467 | { | ||
468 | int index = log->log_transid % 2; | ||
469 | |||
470 | spin_lock_irq(&log->log_extents_lock[index]); | ||
471 | list_splice_tail(logged_list, &log->logged_list[index]); | ||
472 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
473 | } | ||
474 | |||
448 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) | 475 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) |
449 | { | 476 | { |
450 | struct btrfs_ordered_extent *ordered; | 477 | struct btrfs_ordered_extent *ordered; |
@@ -577,7 +604,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) | |||
577 | INIT_LIST_HEAD(&splice); | 604 | INIT_LIST_HEAD(&splice); |
578 | INIT_LIST_HEAD(&works); | 605 | INIT_LIST_HEAD(&works); |
579 | 606 | ||
580 | mutex_lock(&root->fs_info->ordered_operations_mutex); | 607 | mutex_lock(&root->ordered_extent_mutex); |
581 | spin_lock(&root->ordered_extent_lock); | 608 | spin_lock(&root->ordered_extent_lock); |
582 | list_splice_init(&root->ordered_extents, &splice); | 609 | list_splice_init(&root->ordered_extents, &splice); |
583 | while (!list_empty(&splice) && nr) { | 610 | while (!list_empty(&splice) && nr) { |
@@ -588,10 +615,11 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) | |||
588 | atomic_inc(&ordered->refs); | 615 | atomic_inc(&ordered->refs); |
589 | spin_unlock(&root->ordered_extent_lock); | 616 | spin_unlock(&root->ordered_extent_lock); |
590 | 617 | ||
591 | ordered->flush_work.func = btrfs_run_ordered_extent_work; | 618 | btrfs_init_work(&ordered->flush_work, |
619 | btrfs_run_ordered_extent_work, NULL, NULL); | ||
592 | list_add_tail(&ordered->work_list, &works); | 620 | list_add_tail(&ordered->work_list, &works); |
593 | btrfs_queue_worker(&root->fs_info->flush_workers, | 621 | btrfs_queue_work(root->fs_info->flush_workers, |
594 | &ordered->flush_work); | 622 | &ordered->flush_work); |
595 | 623 | ||
596 | cond_resched(); | 624 | cond_resched(); |
597 | spin_lock(&root->ordered_extent_lock); | 625 | spin_lock(&root->ordered_extent_lock); |
@@ -608,7 +636,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) | |||
608 | btrfs_put_ordered_extent(ordered); | 636 | btrfs_put_ordered_extent(ordered); |
609 | cond_resched(); | 637 | cond_resched(); |
610 | } | 638 | } |
611 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | 639 | mutex_unlock(&root->ordered_extent_mutex); |
612 | 640 | ||
613 | return count; | 641 | return count; |
614 | } | 642 | } |
@@ -621,6 +649,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) | |||
621 | 649 | ||
622 | INIT_LIST_HEAD(&splice); | 650 | INIT_LIST_HEAD(&splice); |
623 | 651 | ||
652 | mutex_lock(&fs_info->ordered_operations_mutex); | ||
624 | spin_lock(&fs_info->ordered_root_lock); | 653 | spin_lock(&fs_info->ordered_root_lock); |
625 | list_splice_init(&fs_info->ordered_roots, &splice); | 654 | list_splice_init(&fs_info->ordered_roots, &splice); |
626 | while (!list_empty(&splice) && nr) { | 655 | while (!list_empty(&splice) && nr) { |
@@ -643,6 +672,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) | |||
643 | } | 672 | } |
644 | list_splice_tail(&splice, &fs_info->ordered_roots); | 673 | list_splice_tail(&splice, &fs_info->ordered_roots); |
645 | spin_unlock(&fs_info->ordered_root_lock); | 674 | spin_unlock(&fs_info->ordered_root_lock); |
675 | mutex_unlock(&fs_info->ordered_operations_mutex); | ||
646 | } | 676 | } |
647 | 677 | ||
648 | /* | 678 | /* |
@@ -704,8 +734,8 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, | |||
704 | goto out; | 734 | goto out; |
705 | } | 735 | } |
706 | list_add_tail(&work->list, &works); | 736 | list_add_tail(&work->list, &works); |
707 | btrfs_queue_worker(&root->fs_info->flush_workers, | 737 | btrfs_queue_work(root->fs_info->flush_workers, |
708 | &work->work); | 738 | &work->work); |
709 | 739 | ||
710 | cond_resched(); | 740 | cond_resched(); |
711 | spin_lock(&root->fs_info->ordered_root_lock); | 741 | spin_lock(&root->fs_info->ordered_root_lock); |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 9b0450f7ac20..246897058efb 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -197,7 +197,11 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | |||
197 | struct inode *inode); | 197 | struct inode *inode); |
198 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); | 198 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); |
199 | void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); | 199 | void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); |
200 | void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode); | 200 | void btrfs_get_logged_extents(struct inode *inode, |
201 | struct list_head *logged_list); | ||
202 | void btrfs_put_logged_extents(struct list_head *logged_list); | ||
203 | void btrfs_submit_logged_extents(struct list_head *logged_list, | ||
204 | struct btrfs_root *log); | ||
201 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); | 205 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); |
202 | void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); | 206 | void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); |
203 | int __init ordered_data_init(void); | 207 | int __init ordered_data_init(void); |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 472302a2d745..2cf905877aaf 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
@@ -1509,8 +1509,8 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, | |||
1509 | ret = qgroup_rescan_init(fs_info, 0, 1); | 1509 | ret = qgroup_rescan_init(fs_info, 0, 1); |
1510 | if (!ret) { | 1510 | if (!ret) { |
1511 | qgroup_rescan_zero_tracking(fs_info); | 1511 | qgroup_rescan_zero_tracking(fs_info); |
1512 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | 1512 | btrfs_queue_work(fs_info->qgroup_rescan_workers, |
1513 | &fs_info->qgroup_rescan_work); | 1513 | &fs_info->qgroup_rescan_work); |
1514 | } | 1514 | } |
1515 | ret = 0; | 1515 | ret = 0; |
1516 | } | 1516 | } |
@@ -2095,7 +2095,8 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, | |||
2095 | 2095 | ||
2096 | memset(&fs_info->qgroup_rescan_work, 0, | 2096 | memset(&fs_info->qgroup_rescan_work, 0, |
2097 | sizeof(fs_info->qgroup_rescan_work)); | 2097 | sizeof(fs_info->qgroup_rescan_work)); |
2098 | fs_info->qgroup_rescan_work.func = btrfs_qgroup_rescan_worker; | 2098 | btrfs_init_work(&fs_info->qgroup_rescan_work, |
2099 | btrfs_qgroup_rescan_worker, NULL, NULL); | ||
2099 | 2100 | ||
2100 | if (ret) { | 2101 | if (ret) { |
2101 | err: | 2102 | err: |
@@ -2158,8 +2159,8 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) | |||
2158 | 2159 | ||
2159 | qgroup_rescan_zero_tracking(fs_info); | 2160 | qgroup_rescan_zero_tracking(fs_info); |
2160 | 2161 | ||
2161 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | 2162 | btrfs_queue_work(fs_info->qgroup_rescan_workers, |
2162 | &fs_info->qgroup_rescan_work); | 2163 | &fs_info->qgroup_rescan_work); |
2163 | 2164 | ||
2164 | return 0; | 2165 | return 0; |
2165 | } | 2166 | } |
@@ -2190,6 +2191,6 @@ void | |||
2190 | btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) | 2191 | btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) |
2191 | { | 2192 | { |
2192 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) | 2193 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) |
2193 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | 2194 | btrfs_queue_work(fs_info->qgroup_rescan_workers, |
2194 | &fs_info->qgroup_rescan_work); | 2195 | &fs_info->qgroup_rescan_work); |
2195 | } | 2196 | } |
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 9af0b25d991a..4055291a523e 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c | |||
@@ -1416,20 +1416,18 @@ cleanup: | |||
1416 | 1416 | ||
1417 | static void async_rmw_stripe(struct btrfs_raid_bio *rbio) | 1417 | static void async_rmw_stripe(struct btrfs_raid_bio *rbio) |
1418 | { | 1418 | { |
1419 | rbio->work.flags = 0; | 1419 | btrfs_init_work(&rbio->work, rmw_work, NULL, NULL); |
1420 | rbio->work.func = rmw_work; | ||
1421 | 1420 | ||
1422 | btrfs_queue_worker(&rbio->fs_info->rmw_workers, | 1421 | btrfs_queue_work(rbio->fs_info->rmw_workers, |
1423 | &rbio->work); | 1422 | &rbio->work); |
1424 | } | 1423 | } |
1425 | 1424 | ||
1426 | static void async_read_rebuild(struct btrfs_raid_bio *rbio) | 1425 | static void async_read_rebuild(struct btrfs_raid_bio *rbio) |
1427 | { | 1426 | { |
1428 | rbio->work.flags = 0; | 1427 | btrfs_init_work(&rbio->work, read_rebuild_work, NULL, NULL); |
1429 | rbio->work.func = read_rebuild_work; | ||
1430 | 1428 | ||
1431 | btrfs_queue_worker(&rbio->fs_info->rmw_workers, | 1429 | btrfs_queue_work(rbio->fs_info->rmw_workers, |
1432 | &rbio->work); | 1430 | &rbio->work); |
1433 | } | 1431 | } |
1434 | 1432 | ||
1435 | /* | 1433 | /* |
@@ -1667,10 +1665,9 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule) | |||
1667 | plug = container_of(cb, struct btrfs_plug_cb, cb); | 1665 | plug = container_of(cb, struct btrfs_plug_cb, cb); |
1668 | 1666 | ||
1669 | if (from_schedule) { | 1667 | if (from_schedule) { |
1670 | plug->work.flags = 0; | 1668 | btrfs_init_work(&plug->work, unplug_work, NULL, NULL); |
1671 | plug->work.func = unplug_work; | 1669 | btrfs_queue_work(plug->info->rmw_workers, |
1672 | btrfs_queue_worker(&plug->info->rmw_workers, | 1670 | &plug->work); |
1673 | &plug->work); | ||
1674 | return; | 1671 | return; |
1675 | } | 1672 | } |
1676 | run_plug(plug); | 1673 | run_plug(plug); |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 31c797c48c3e..30947f923620 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
@@ -793,10 +793,10 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info) | |||
793 | /* FIXME we cannot handle this properly right now */ | 793 | /* FIXME we cannot handle this properly right now */ |
794 | BUG(); | 794 | BUG(); |
795 | } | 795 | } |
796 | rmw->work.func = reada_start_machine_worker; | 796 | btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL); |
797 | rmw->fs_info = fs_info; | 797 | rmw->fs_info = fs_info; |
798 | 798 | ||
799 | btrfs_queue_worker(&fs_info->readahead_workers, &rmw->work); | 799 | btrfs_queue_work(fs_info->readahead_workers, &rmw->work); |
800 | } | 800 | } |
801 | 801 | ||
802 | #ifdef DEBUG | 802 | #ifdef DEBUG |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 07b3b36f40ee..def428a25b2a 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -4248,7 +4248,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
4248 | btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu", | 4248 | btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu", |
4249 | rc->block_group->key.objectid, rc->block_group->flags); | 4249 | rc->block_group->key.objectid, rc->block_group->flags); |
4250 | 4250 | ||
4251 | ret = btrfs_start_delalloc_roots(fs_info, 0); | 4251 | ret = btrfs_start_delalloc_roots(fs_info, 0, -1); |
4252 | if (ret < 0) { | 4252 | if (ret < 0) { |
4253 | err = ret; | 4253 | err = ret; |
4254 | goto out; | 4254 | goto out; |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 1389b69059de..38bb47e7d6b1 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -16,6 +16,7 @@ | |||
16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/err.h> | ||
19 | #include <linux/uuid.h> | 20 | #include <linux/uuid.h> |
20 | #include "ctree.h" | 21 | #include "ctree.h" |
21 | #include "transaction.h" | 22 | #include "transaction.h" |
@@ -271,7 +272,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
271 | key.offset++; | 272 | key.offset++; |
272 | 273 | ||
273 | root = btrfs_read_fs_root(tree_root, &root_key); | 274 | root = btrfs_read_fs_root(tree_root, &root_key); |
274 | err = PTR_RET(root); | 275 | err = PTR_ERR_OR_ZERO(root); |
275 | if (err && err != -ENOENT) { | 276 | if (err && err != -ENOENT) { |
276 | break; | 277 | break; |
277 | } else if (err == -ENOENT) { | 278 | } else if (err == -ENOENT) { |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index efba5d1282ee..93e6d7172844 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -315,6 +315,16 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx) | |||
315 | atomic_inc(&fs_info->scrubs_running); | 315 | atomic_inc(&fs_info->scrubs_running); |
316 | atomic_inc(&fs_info->scrubs_paused); | 316 | atomic_inc(&fs_info->scrubs_paused); |
317 | mutex_unlock(&fs_info->scrub_lock); | 317 | mutex_unlock(&fs_info->scrub_lock); |
318 | |||
319 | /* | ||
320 | * check if @scrubs_running=@scrubs_paused condition | ||
321 | * inside wait_event() is not an atomic operation. | ||
322 | * which means we may inc/dec @scrub_running/paused | ||
323 | * at any time. Let's wake up @scrub_pause_wait as | ||
324 | * much as we can to let commit transaction blocked less. | ||
325 | */ | ||
326 | wake_up(&fs_info->scrub_pause_wait); | ||
327 | |||
318 | atomic_inc(&sctx->workers_pending); | 328 | atomic_inc(&sctx->workers_pending); |
319 | } | 329 | } |
320 | 330 | ||
@@ -418,7 +428,8 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) | |||
418 | sbio->index = i; | 428 | sbio->index = i; |
419 | sbio->sctx = sctx; | 429 | sbio->sctx = sctx; |
420 | sbio->page_count = 0; | 430 | sbio->page_count = 0; |
421 | sbio->work.func = scrub_bio_end_io_worker; | 431 | btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, |
432 | NULL, NULL); | ||
422 | 433 | ||
423 | if (i != SCRUB_BIOS_PER_SCTX - 1) | 434 | if (i != SCRUB_BIOS_PER_SCTX - 1) |
424 | sctx->bios[i]->next_free = i + 1; | 435 | sctx->bios[i]->next_free = i + 1; |
@@ -987,9 +998,10 @@ nodatasum_case: | |||
987 | fixup_nodatasum->root = fs_info->extent_root; | 998 | fixup_nodatasum->root = fs_info->extent_root; |
988 | fixup_nodatasum->mirror_num = failed_mirror_index + 1; | 999 | fixup_nodatasum->mirror_num = failed_mirror_index + 1; |
989 | scrub_pending_trans_workers_inc(sctx); | 1000 | scrub_pending_trans_workers_inc(sctx); |
990 | fixup_nodatasum->work.func = scrub_fixup_nodatasum; | 1001 | btrfs_init_work(&fixup_nodatasum->work, scrub_fixup_nodatasum, |
991 | btrfs_queue_worker(&fs_info->scrub_workers, | 1002 | NULL, NULL); |
992 | &fixup_nodatasum->work); | 1003 | btrfs_queue_work(fs_info->scrub_workers, |
1004 | &fixup_nodatasum->work); | ||
993 | goto out; | 1005 | goto out; |
994 | } | 1006 | } |
995 | 1007 | ||
@@ -1603,8 +1615,8 @@ static void scrub_wr_bio_end_io(struct bio *bio, int err) | |||
1603 | sbio->err = err; | 1615 | sbio->err = err; |
1604 | sbio->bio = bio; | 1616 | sbio->bio = bio; |
1605 | 1617 | ||
1606 | sbio->work.func = scrub_wr_bio_end_io_worker; | 1618 | btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL); |
1607 | btrfs_queue_worker(&fs_info->scrub_wr_completion_workers, &sbio->work); | 1619 | btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work); |
1608 | } | 1620 | } |
1609 | 1621 | ||
1610 | static void scrub_wr_bio_end_io_worker(struct btrfs_work *work) | 1622 | static void scrub_wr_bio_end_io_worker(struct btrfs_work *work) |
@@ -2072,7 +2084,7 @@ static void scrub_bio_end_io(struct bio *bio, int err) | |||
2072 | sbio->err = err; | 2084 | sbio->err = err; |
2073 | sbio->bio = bio; | 2085 | sbio->bio = bio; |
2074 | 2086 | ||
2075 | btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); | 2087 | btrfs_queue_work(fs_info->scrub_workers, &sbio->work); |
2076 | } | 2088 | } |
2077 | 2089 | ||
2078 | static void scrub_bio_end_io_worker(struct btrfs_work *work) | 2090 | static void scrub_bio_end_io_worker(struct btrfs_work *work) |
@@ -2686,10 +2698,23 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, | |||
2686 | 2698 | ||
2687 | wait_event(sctx->list_wait, | 2699 | wait_event(sctx->list_wait, |
2688 | atomic_read(&sctx->bios_in_flight) == 0); | 2700 | atomic_read(&sctx->bios_in_flight) == 0); |
2689 | atomic_set(&sctx->wr_ctx.flush_all_writes, 0); | 2701 | atomic_inc(&fs_info->scrubs_paused); |
2702 | wake_up(&fs_info->scrub_pause_wait); | ||
2703 | |||
2704 | /* | ||
2705 | * must be called before we decrease @scrub_paused. | ||
2706 | * make sure we don't block transaction commit while | ||
2707 | * we are waiting pending workers finished. | ||
2708 | */ | ||
2690 | wait_event(sctx->list_wait, | 2709 | wait_event(sctx->list_wait, |
2691 | atomic_read(&sctx->workers_pending) == 0); | 2710 | atomic_read(&sctx->workers_pending) == 0); |
2692 | scrub_blocked_if_needed(fs_info); | 2711 | atomic_set(&sctx->wr_ctx.flush_all_writes, 0); |
2712 | |||
2713 | mutex_lock(&fs_info->scrub_lock); | ||
2714 | __scrub_blocked_if_needed(fs_info); | ||
2715 | atomic_dec(&fs_info->scrubs_paused); | ||
2716 | mutex_unlock(&fs_info->scrub_lock); | ||
2717 | wake_up(&fs_info->scrub_pause_wait); | ||
2693 | 2718 | ||
2694 | btrfs_put_block_group(cache); | 2719 | btrfs_put_block_group(cache); |
2695 | if (ret) | 2720 | if (ret) |
@@ -2757,33 +2782,35 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, | |||
2757 | int is_dev_replace) | 2782 | int is_dev_replace) |
2758 | { | 2783 | { |
2759 | int ret = 0; | 2784 | int ret = 0; |
2785 | int flags = WQ_FREEZABLE | WQ_UNBOUND; | ||
2786 | int max_active = fs_info->thread_pool_size; | ||
2760 | 2787 | ||
2761 | if (fs_info->scrub_workers_refcnt == 0) { | 2788 | if (fs_info->scrub_workers_refcnt == 0) { |
2762 | if (is_dev_replace) | 2789 | if (is_dev_replace) |
2763 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1, | 2790 | fs_info->scrub_workers = |
2764 | &fs_info->generic_worker); | 2791 | btrfs_alloc_workqueue("btrfs-scrub", flags, |
2792 | 1, 4); | ||
2765 | else | 2793 | else |
2766 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", | 2794 | fs_info->scrub_workers = |
2767 | fs_info->thread_pool_size, | 2795 | btrfs_alloc_workqueue("btrfs-scrub", flags, |
2768 | &fs_info->generic_worker); | 2796 | max_active, 4); |
2769 | fs_info->scrub_workers.idle_thresh = 4; | 2797 | if (!fs_info->scrub_workers) { |
2770 | ret = btrfs_start_workers(&fs_info->scrub_workers); | 2798 | ret = -ENOMEM; |
2771 | if (ret) | ||
2772 | goto out; | 2799 | goto out; |
2773 | btrfs_init_workers(&fs_info->scrub_wr_completion_workers, | 2800 | } |
2774 | "scrubwrc", | 2801 | fs_info->scrub_wr_completion_workers = |
2775 | fs_info->thread_pool_size, | 2802 | btrfs_alloc_workqueue("btrfs-scrubwrc", flags, |
2776 | &fs_info->generic_worker); | 2803 | max_active, 2); |
2777 | fs_info->scrub_wr_completion_workers.idle_thresh = 2; | 2804 | if (!fs_info->scrub_wr_completion_workers) { |
2778 | ret = btrfs_start_workers( | 2805 | ret = -ENOMEM; |
2779 | &fs_info->scrub_wr_completion_workers); | ||
2780 | if (ret) | ||
2781 | goto out; | 2806 | goto out; |
2782 | btrfs_init_workers(&fs_info->scrub_nocow_workers, "scrubnc", 1, | 2807 | } |
2783 | &fs_info->generic_worker); | 2808 | fs_info->scrub_nocow_workers = |
2784 | ret = btrfs_start_workers(&fs_info->scrub_nocow_workers); | 2809 | btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0); |
2785 | if (ret) | 2810 | if (!fs_info->scrub_nocow_workers) { |
2811 | ret = -ENOMEM; | ||
2786 | goto out; | 2812 | goto out; |
2813 | } | ||
2787 | } | 2814 | } |
2788 | ++fs_info->scrub_workers_refcnt; | 2815 | ++fs_info->scrub_workers_refcnt; |
2789 | out: | 2816 | out: |
@@ -2793,9 +2820,9 @@ out: | |||
2793 | static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) | 2820 | static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) |
2794 | { | 2821 | { |
2795 | if (--fs_info->scrub_workers_refcnt == 0) { | 2822 | if (--fs_info->scrub_workers_refcnt == 0) { |
2796 | btrfs_stop_workers(&fs_info->scrub_workers); | 2823 | btrfs_destroy_workqueue(fs_info->scrub_workers); |
2797 | btrfs_stop_workers(&fs_info->scrub_wr_completion_workers); | 2824 | btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); |
2798 | btrfs_stop_workers(&fs_info->scrub_nocow_workers); | 2825 | btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); |
2799 | } | 2826 | } |
2800 | WARN_ON(fs_info->scrub_workers_refcnt < 0); | 2827 | WARN_ON(fs_info->scrub_workers_refcnt < 0); |
2801 | } | 2828 | } |
@@ -3106,10 +3133,10 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
3106 | nocow_ctx->len = len; | 3133 | nocow_ctx->len = len; |
3107 | nocow_ctx->mirror_num = mirror_num; | 3134 | nocow_ctx->mirror_num = mirror_num; |
3108 | nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; | 3135 | nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; |
3109 | nocow_ctx->work.func = copy_nocow_pages_worker; | 3136 | btrfs_init_work(&nocow_ctx->work, copy_nocow_pages_worker, NULL, NULL); |
3110 | INIT_LIST_HEAD(&nocow_ctx->inodes); | 3137 | INIT_LIST_HEAD(&nocow_ctx->inodes); |
3111 | btrfs_queue_worker(&fs_info->scrub_nocow_workers, | 3138 | btrfs_queue_work(fs_info->scrub_nocow_workers, |
3112 | &nocow_ctx->work); | 3139 | &nocow_ctx->work); |
3113 | 3140 | ||
3114 | return 0; | 3141 | return 0; |
3115 | } | 3142 | } |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 9dde9717c1b9..9b6da9d55f9a 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
@@ -51,15 +51,18 @@ struct fs_path { | |||
51 | struct { | 51 | struct { |
52 | char *start; | 52 | char *start; |
53 | char *end; | 53 | char *end; |
54 | char *prepared; | ||
55 | 54 | ||
56 | char *buf; | 55 | char *buf; |
57 | int buf_len; | 56 | unsigned short buf_len:15; |
58 | unsigned int reversed:1; | 57 | unsigned short reversed:1; |
59 | unsigned int virtual_mem:1; | ||
60 | char inline_buf[]; | 58 | char inline_buf[]; |
61 | }; | 59 | }; |
62 | char pad[PAGE_SIZE]; | 60 | /* |
61 | * Average path length does not exceed 200 bytes, we'll have | ||
62 | * better packing in the slab and higher chance to satisfy | ||
63 | * a allocation later during send. | ||
64 | */ | ||
65 | char pad[256]; | ||
63 | }; | 66 | }; |
64 | }; | 67 | }; |
65 | #define FS_PATH_INLINE_SIZE \ | 68 | #define FS_PATH_INLINE_SIZE \ |
@@ -109,6 +112,7 @@ struct send_ctx { | |||
109 | int cur_inode_deleted; | 112 | int cur_inode_deleted; |
110 | u64 cur_inode_size; | 113 | u64 cur_inode_size; |
111 | u64 cur_inode_mode; | 114 | u64 cur_inode_mode; |
115 | u64 cur_inode_rdev; | ||
112 | u64 cur_inode_last_extent; | 116 | u64 cur_inode_last_extent; |
113 | 117 | ||
114 | u64 send_progress; | 118 | u64 send_progress; |
@@ -120,6 +124,8 @@ struct send_ctx { | |||
120 | struct list_head name_cache_list; | 124 | struct list_head name_cache_list; |
121 | int name_cache_size; | 125 | int name_cache_size; |
122 | 126 | ||
127 | struct file_ra_state ra; | ||
128 | |||
123 | char *read_buf; | 129 | char *read_buf; |
124 | 130 | ||
125 | /* | 131 | /* |
@@ -175,6 +181,47 @@ struct send_ctx { | |||
175 | * own move/rename can be performed. | 181 | * own move/rename can be performed. |
176 | */ | 182 | */ |
177 | struct rb_root waiting_dir_moves; | 183 | struct rb_root waiting_dir_moves; |
184 | |||
185 | /* | ||
186 | * A directory that is going to be rm'ed might have a child directory | ||
187 | * which is in the pending directory moves index above. In this case, | ||
188 | * the directory can only be removed after the move/rename of its child | ||
189 | * is performed. Example: | ||
190 | * | ||
191 | * Parent snapshot: | ||
192 | * | ||
193 | * . (ino 256) | ||
194 | * |-- a/ (ino 257) | ||
195 | * |-- b/ (ino 258) | ||
196 | * |-- c/ (ino 259) | ||
197 | * | |-- x/ (ino 260) | ||
198 | * | | ||
199 | * |-- y/ (ino 261) | ||
200 | * | ||
201 | * Send snapshot: | ||
202 | * | ||
203 | * . (ino 256) | ||
204 | * |-- a/ (ino 257) | ||
205 | * |-- b/ (ino 258) | ||
206 | * |-- YY/ (ino 261) | ||
207 | * |-- x/ (ino 260) | ||
208 | * | ||
209 | * Sequence of steps that lead to the send snapshot: | ||
210 | * rm -f /a/b/c/foo.txt | ||
211 | * mv /a/b/y /a/b/YY | ||
212 | * mv /a/b/c/x /a/b/YY | ||
213 | * rmdir /a/b/c | ||
214 | * | ||
215 | * When the child is processed, its move/rename is delayed until its | ||
216 | * parent is processed (as explained above), but all other operations | ||
217 | * like update utimes, chown, chgrp, etc, are performed and the paths | ||
218 | * that it uses for those operations must use the orphanized name of | ||
219 | * its parent (the directory we're going to rm later), so we need to | ||
220 | * memorize that name. | ||
221 | * | ||
222 | * Indexed by the inode number of the directory to be deleted. | ||
223 | */ | ||
224 | struct rb_root orphan_dirs; | ||
178 | }; | 225 | }; |
179 | 226 | ||
180 | struct pending_dir_move { | 227 | struct pending_dir_move { |
@@ -189,6 +236,18 @@ struct pending_dir_move { | |||
189 | struct waiting_dir_move { | 236 | struct waiting_dir_move { |
190 | struct rb_node node; | 237 | struct rb_node node; |
191 | u64 ino; | 238 | u64 ino; |
239 | /* | ||
240 | * There might be some directory that could not be removed because it | ||
241 | * was waiting for this directory inode to be moved first. Therefore | ||
242 | * after this directory is moved, we can try to rmdir the ino rmdir_ino. | ||
243 | */ | ||
244 | u64 rmdir_ino; | ||
245 | }; | ||
246 | |||
247 | struct orphan_dir_info { | ||
248 | struct rb_node node; | ||
249 | u64 ino; | ||
250 | u64 gen; | ||
192 | }; | 251 | }; |
193 | 252 | ||
194 | struct name_cache_entry { | 253 | struct name_cache_entry { |
@@ -214,6 +273,11 @@ struct name_cache_entry { | |||
214 | 273 | ||
215 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); | 274 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); |
216 | 275 | ||
276 | static struct waiting_dir_move * | ||
277 | get_waiting_dir_move(struct send_ctx *sctx, u64 ino); | ||
278 | |||
279 | static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino); | ||
280 | |||
217 | static int need_send_hole(struct send_ctx *sctx) | 281 | static int need_send_hole(struct send_ctx *sctx) |
218 | { | 282 | { |
219 | return (sctx->parent_root && !sctx->cur_inode_new && | 283 | return (sctx->parent_root && !sctx->cur_inode_new && |
@@ -242,7 +306,6 @@ static struct fs_path *fs_path_alloc(void) | |||
242 | if (!p) | 306 | if (!p) |
243 | return NULL; | 307 | return NULL; |
244 | p->reversed = 0; | 308 | p->reversed = 0; |
245 | p->virtual_mem = 0; | ||
246 | p->buf = p->inline_buf; | 309 | p->buf = p->inline_buf; |
247 | p->buf_len = FS_PATH_INLINE_SIZE; | 310 | p->buf_len = FS_PATH_INLINE_SIZE; |
248 | fs_path_reset(p); | 311 | fs_path_reset(p); |
@@ -265,12 +328,8 @@ static void fs_path_free(struct fs_path *p) | |||
265 | { | 328 | { |
266 | if (!p) | 329 | if (!p) |
267 | return; | 330 | return; |
268 | if (p->buf != p->inline_buf) { | 331 | if (p->buf != p->inline_buf) |
269 | if (p->virtual_mem) | 332 | kfree(p->buf); |
270 | vfree(p->buf); | ||
271 | else | ||
272 | kfree(p->buf); | ||
273 | } | ||
274 | kfree(p); | 333 | kfree(p); |
275 | } | 334 | } |
276 | 335 | ||
@@ -292,40 +351,23 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) | |||
292 | 351 | ||
293 | path_len = p->end - p->start; | 352 | path_len = p->end - p->start; |
294 | old_buf_len = p->buf_len; | 353 | old_buf_len = p->buf_len; |
295 | len = PAGE_ALIGN(len); | 354 | |
296 | 355 | /* | |
297 | if (p->buf == p->inline_buf) { | 356 | * First time the inline_buf does not suffice |
298 | tmp_buf = kmalloc(len, GFP_NOFS | __GFP_NOWARN); | 357 | */ |
299 | if (!tmp_buf) { | 358 | if (p->buf == p->inline_buf) |
300 | tmp_buf = vmalloc(len); | 359 | tmp_buf = kmalloc(len, GFP_NOFS); |
301 | if (!tmp_buf) | 360 | else |
302 | return -ENOMEM; | 361 | tmp_buf = krealloc(p->buf, len, GFP_NOFS); |
303 | p->virtual_mem = 1; | 362 | if (!tmp_buf) |
304 | } | 363 | return -ENOMEM; |
305 | memcpy(tmp_buf, p->buf, p->buf_len); | 364 | p->buf = tmp_buf; |
306 | p->buf = tmp_buf; | 365 | /* |
307 | p->buf_len = len; | 366 | * The real size of the buffer is bigger, this will let the fast path |
308 | } else { | 367 | * happen most of the time |
309 | if (p->virtual_mem) { | 368 | */ |
310 | tmp_buf = vmalloc(len); | 369 | p->buf_len = ksize(p->buf); |
311 | if (!tmp_buf) | 370 | |
312 | return -ENOMEM; | ||
313 | memcpy(tmp_buf, p->buf, p->buf_len); | ||
314 | vfree(p->buf); | ||
315 | } else { | ||
316 | tmp_buf = krealloc(p->buf, len, GFP_NOFS); | ||
317 | if (!tmp_buf) { | ||
318 | tmp_buf = vmalloc(len); | ||
319 | if (!tmp_buf) | ||
320 | return -ENOMEM; | ||
321 | memcpy(tmp_buf, p->buf, p->buf_len); | ||
322 | kfree(p->buf); | ||
323 | p->virtual_mem = 1; | ||
324 | } | ||
325 | } | ||
326 | p->buf = tmp_buf; | ||
327 | p->buf_len = len; | ||
328 | } | ||
329 | if (p->reversed) { | 371 | if (p->reversed) { |
330 | tmp_buf = p->buf + old_buf_len - path_len - 1; | 372 | tmp_buf = p->buf + old_buf_len - path_len - 1; |
331 | p->end = p->buf + p->buf_len - 1; | 373 | p->end = p->buf + p->buf_len - 1; |
@@ -338,7 +380,8 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) | |||
338 | return 0; | 380 | return 0; |
339 | } | 381 | } |
340 | 382 | ||
341 | static int fs_path_prepare_for_add(struct fs_path *p, int name_len) | 383 | static int fs_path_prepare_for_add(struct fs_path *p, int name_len, |
384 | char **prepared) | ||
342 | { | 385 | { |
343 | int ret; | 386 | int ret; |
344 | int new_len; | 387 | int new_len; |
@@ -354,11 +397,11 @@ static int fs_path_prepare_for_add(struct fs_path *p, int name_len) | |||
354 | if (p->start != p->end) | 397 | if (p->start != p->end) |
355 | *--p->start = '/'; | 398 | *--p->start = '/'; |
356 | p->start -= name_len; | 399 | p->start -= name_len; |
357 | p->prepared = p->start; | 400 | *prepared = p->start; |
358 | } else { | 401 | } else { |
359 | if (p->start != p->end) | 402 | if (p->start != p->end) |
360 | *p->end++ = '/'; | 403 | *p->end++ = '/'; |
361 | p->prepared = p->end; | 404 | *prepared = p->end; |
362 | p->end += name_len; | 405 | p->end += name_len; |
363 | *p->end = 0; | 406 | *p->end = 0; |
364 | } | 407 | } |
@@ -370,12 +413,12 @@ out: | |||
370 | static int fs_path_add(struct fs_path *p, const char *name, int name_len) | 413 | static int fs_path_add(struct fs_path *p, const char *name, int name_len) |
371 | { | 414 | { |
372 | int ret; | 415 | int ret; |
416 | char *prepared; | ||
373 | 417 | ||
374 | ret = fs_path_prepare_for_add(p, name_len); | 418 | ret = fs_path_prepare_for_add(p, name_len, &prepared); |
375 | if (ret < 0) | 419 | if (ret < 0) |
376 | goto out; | 420 | goto out; |
377 | memcpy(p->prepared, name, name_len); | 421 | memcpy(prepared, name, name_len); |
378 | p->prepared = NULL; | ||
379 | 422 | ||
380 | out: | 423 | out: |
381 | return ret; | 424 | return ret; |
@@ -384,12 +427,12 @@ out: | |||
384 | static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) | 427 | static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) |
385 | { | 428 | { |
386 | int ret; | 429 | int ret; |
430 | char *prepared; | ||
387 | 431 | ||
388 | ret = fs_path_prepare_for_add(p, p2->end - p2->start); | 432 | ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared); |
389 | if (ret < 0) | 433 | if (ret < 0) |
390 | goto out; | 434 | goto out; |
391 | memcpy(p->prepared, p2->start, p2->end - p2->start); | 435 | memcpy(prepared, p2->start, p2->end - p2->start); |
392 | p->prepared = NULL; | ||
393 | 436 | ||
394 | out: | 437 | out: |
395 | return ret; | 438 | return ret; |
@@ -400,13 +443,13 @@ static int fs_path_add_from_extent_buffer(struct fs_path *p, | |||
400 | unsigned long off, int len) | 443 | unsigned long off, int len) |
401 | { | 444 | { |
402 | int ret; | 445 | int ret; |
446 | char *prepared; | ||
403 | 447 | ||
404 | ret = fs_path_prepare_for_add(p, len); | 448 | ret = fs_path_prepare_for_add(p, len, &prepared); |
405 | if (ret < 0) | 449 | if (ret < 0) |
406 | goto out; | 450 | goto out; |
407 | 451 | ||
408 | read_extent_buffer(eb, p->prepared, off, len); | 452 | read_extent_buffer(eb, prepared, off, len); |
409 | p->prepared = NULL; | ||
410 | 453 | ||
411 | out: | 454 | out: |
412 | return ret; | 455 | return ret; |
@@ -915,9 +958,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
915 | struct btrfs_dir_item *di; | 958 | struct btrfs_dir_item *di; |
916 | struct btrfs_key di_key; | 959 | struct btrfs_key di_key; |
917 | char *buf = NULL; | 960 | char *buf = NULL; |
918 | char *buf2 = NULL; | 961 | const int buf_len = PATH_MAX; |
919 | int buf_len; | ||
920 | int buf_virtual = 0; | ||
921 | u32 name_len; | 962 | u32 name_len; |
922 | u32 data_len; | 963 | u32 data_len; |
923 | u32 cur; | 964 | u32 cur; |
@@ -927,7 +968,6 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
927 | int num; | 968 | int num; |
928 | u8 type; | 969 | u8 type; |
929 | 970 | ||
930 | buf_len = PAGE_SIZE; | ||
931 | buf = kmalloc(buf_len, GFP_NOFS); | 971 | buf = kmalloc(buf_len, GFP_NOFS); |
932 | if (!buf) { | 972 | if (!buf) { |
933 | ret = -ENOMEM; | 973 | ret = -ENOMEM; |
@@ -949,30 +989,12 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
949 | type = btrfs_dir_type(eb, di); | 989 | type = btrfs_dir_type(eb, di); |
950 | btrfs_dir_item_key_to_cpu(eb, di, &di_key); | 990 | btrfs_dir_item_key_to_cpu(eb, di, &di_key); |
951 | 991 | ||
992 | /* | ||
993 | * Path too long | ||
994 | */ | ||
952 | if (name_len + data_len > buf_len) { | 995 | if (name_len + data_len > buf_len) { |
953 | buf_len = PAGE_ALIGN(name_len + data_len); | 996 | ret = -ENAMETOOLONG; |
954 | if (buf_virtual) { | 997 | goto out; |
955 | buf2 = vmalloc(buf_len); | ||
956 | if (!buf2) { | ||
957 | ret = -ENOMEM; | ||
958 | goto out; | ||
959 | } | ||
960 | vfree(buf); | ||
961 | } else { | ||
962 | buf2 = krealloc(buf, buf_len, GFP_NOFS); | ||
963 | if (!buf2) { | ||
964 | buf2 = vmalloc(buf_len); | ||
965 | if (!buf2) { | ||
966 | ret = -ENOMEM; | ||
967 | goto out; | ||
968 | } | ||
969 | kfree(buf); | ||
970 | buf_virtual = 1; | ||
971 | } | ||
972 | } | ||
973 | |||
974 | buf = buf2; | ||
975 | buf2 = NULL; | ||
976 | } | 998 | } |
977 | 999 | ||
978 | read_extent_buffer(eb, buf, (unsigned long)(di + 1), | 1000 | read_extent_buffer(eb, buf, (unsigned long)(di + 1), |
@@ -995,10 +1017,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
995 | } | 1017 | } |
996 | 1018 | ||
997 | out: | 1019 | out: |
998 | if (buf_virtual) | 1020 | kfree(buf); |
999 | vfree(buf); | ||
1000 | else | ||
1001 | kfree(buf); | ||
1002 | return ret; | 1021 | return ret; |
1003 | } | 1022 | } |
1004 | 1023 | ||
@@ -1292,8 +1311,6 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
1292 | extent_item_pos = logical - found_key.objectid; | 1311 | extent_item_pos = logical - found_key.objectid; |
1293 | else | 1312 | else |
1294 | extent_item_pos = 0; | 1313 | extent_item_pos = 0; |
1295 | |||
1296 | extent_item_pos = logical - found_key.objectid; | ||
1297 | ret = iterate_extent_inodes(sctx->send_root->fs_info, | 1314 | ret = iterate_extent_inodes(sctx->send_root->fs_info, |
1298 | found_key.objectid, extent_item_pos, 1, | 1315 | found_key.objectid, extent_item_pos, 1, |
1299 | __iterate_backrefs, backref_ctx); | 1316 | __iterate_backrefs, backref_ctx); |
@@ -1418,11 +1435,7 @@ static int gen_unique_name(struct send_ctx *sctx, | |||
1418 | while (1) { | 1435 | while (1) { |
1419 | len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", | 1436 | len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", |
1420 | ino, gen, idx); | 1437 | ino, gen, idx); |
1421 | if (len >= sizeof(tmp)) { | 1438 | ASSERT(len < sizeof(tmp)); |
1422 | /* should really not happen */ | ||
1423 | ret = -EOVERFLOW; | ||
1424 | goto out; | ||
1425 | } | ||
1426 | 1439 | ||
1427 | di = btrfs_lookup_dir_item(NULL, sctx->send_root, | 1440 | di = btrfs_lookup_dir_item(NULL, sctx->send_root, |
1428 | path, BTRFS_FIRST_FREE_OBJECTID, | 1441 | path, BTRFS_FIRST_FREE_OBJECTID, |
@@ -1898,13 +1911,20 @@ static void name_cache_delete(struct send_ctx *sctx, | |||
1898 | 1911 | ||
1899 | nce_head = radix_tree_lookup(&sctx->name_cache, | 1912 | nce_head = radix_tree_lookup(&sctx->name_cache, |
1900 | (unsigned long)nce->ino); | 1913 | (unsigned long)nce->ino); |
1901 | BUG_ON(!nce_head); | 1914 | if (!nce_head) { |
1915 | btrfs_err(sctx->send_root->fs_info, | ||
1916 | "name_cache_delete lookup failed ino %llu cache size %d, leaking memory", | ||
1917 | nce->ino, sctx->name_cache_size); | ||
1918 | } | ||
1902 | 1919 | ||
1903 | list_del(&nce->radix_list); | 1920 | list_del(&nce->radix_list); |
1904 | list_del(&nce->list); | 1921 | list_del(&nce->list); |
1905 | sctx->name_cache_size--; | 1922 | sctx->name_cache_size--; |
1906 | 1923 | ||
1907 | if (list_empty(nce_head)) { | 1924 | /* |
1925 | * We may not get to the final release of nce_head if the lookup fails | ||
1926 | */ | ||
1927 | if (nce_head && list_empty(nce_head)) { | ||
1908 | radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); | 1928 | radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); |
1909 | kfree(nce_head); | 1929 | kfree(nce_head); |
1910 | } | 1930 | } |
@@ -1977,7 +1997,6 @@ static void name_cache_free(struct send_ctx *sctx) | |||
1977 | */ | 1997 | */ |
1978 | static int __get_cur_name_and_parent(struct send_ctx *sctx, | 1998 | static int __get_cur_name_and_parent(struct send_ctx *sctx, |
1979 | u64 ino, u64 gen, | 1999 | u64 ino, u64 gen, |
1980 | int skip_name_cache, | ||
1981 | u64 *parent_ino, | 2000 | u64 *parent_ino, |
1982 | u64 *parent_gen, | 2001 | u64 *parent_gen, |
1983 | struct fs_path *dest) | 2002 | struct fs_path *dest) |
@@ -1987,8 +2006,6 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
1987 | struct btrfs_path *path = NULL; | 2006 | struct btrfs_path *path = NULL; |
1988 | struct name_cache_entry *nce = NULL; | 2007 | struct name_cache_entry *nce = NULL; |
1989 | 2008 | ||
1990 | if (skip_name_cache) | ||
1991 | goto get_ref; | ||
1992 | /* | 2009 | /* |
1993 | * First check if we already did a call to this function with the same | 2010 | * First check if we already did a call to this function with the same |
1994 | * ino/gen. If yes, check if the cache entry is still up-to-date. If yes | 2011 | * ino/gen. If yes, check if the cache entry is still up-to-date. If yes |
@@ -2033,12 +2050,11 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
2033 | goto out_cache; | 2050 | goto out_cache; |
2034 | } | 2051 | } |
2035 | 2052 | ||
2036 | get_ref: | ||
2037 | /* | 2053 | /* |
2038 | * Depending on whether the inode was already processed or not, use | 2054 | * Depending on whether the inode was already processed or not, use |
2039 | * send_root or parent_root for ref lookup. | 2055 | * send_root or parent_root for ref lookup. |
2040 | */ | 2056 | */ |
2041 | if (ino < sctx->send_progress && !skip_name_cache) | 2057 | if (ino < sctx->send_progress) |
2042 | ret = get_first_ref(sctx->send_root, ino, | 2058 | ret = get_first_ref(sctx->send_root, ino, |
2043 | parent_ino, parent_gen, dest); | 2059 | parent_ino, parent_gen, dest); |
2044 | else | 2060 | else |
@@ -2062,8 +2078,6 @@ get_ref: | |||
2062 | goto out; | 2078 | goto out; |
2063 | ret = 1; | 2079 | ret = 1; |
2064 | } | 2080 | } |
2065 | if (skip_name_cache) | ||
2066 | goto out; | ||
2067 | 2081 | ||
2068 | out_cache: | 2082 | out_cache: |
2069 | /* | 2083 | /* |
@@ -2131,9 +2145,6 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, | |||
2131 | u64 parent_inode = 0; | 2145 | u64 parent_inode = 0; |
2132 | u64 parent_gen = 0; | 2146 | u64 parent_gen = 0; |
2133 | int stop = 0; | 2147 | int stop = 0; |
2134 | u64 start_ino = ino; | ||
2135 | u64 start_gen = gen; | ||
2136 | int skip_name_cache = 0; | ||
2137 | 2148 | ||
2138 | name = fs_path_alloc(); | 2149 | name = fs_path_alloc(); |
2139 | if (!name) { | 2150 | if (!name) { |
@@ -2141,31 +2152,33 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, | |||
2141 | goto out; | 2152 | goto out; |
2142 | } | 2153 | } |
2143 | 2154 | ||
2144 | if (is_waiting_for_move(sctx, ino)) | ||
2145 | skip_name_cache = 1; | ||
2146 | |||
2147 | again: | ||
2148 | dest->reversed = 1; | 2155 | dest->reversed = 1; |
2149 | fs_path_reset(dest); | 2156 | fs_path_reset(dest); |
2150 | 2157 | ||
2151 | while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { | 2158 | while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { |
2152 | fs_path_reset(name); | 2159 | fs_path_reset(name); |
2153 | 2160 | ||
2154 | ret = __get_cur_name_and_parent(sctx, ino, gen, skip_name_cache, | 2161 | if (is_waiting_for_rm(sctx, ino)) { |
2155 | &parent_inode, &parent_gen, name); | 2162 | ret = gen_unique_name(sctx, ino, gen, name); |
2163 | if (ret < 0) | ||
2164 | goto out; | ||
2165 | ret = fs_path_add_path(dest, name); | ||
2166 | break; | ||
2167 | } | ||
2168 | |||
2169 | if (is_waiting_for_move(sctx, ino)) { | ||
2170 | ret = get_first_ref(sctx->parent_root, ino, | ||
2171 | &parent_inode, &parent_gen, name); | ||
2172 | } else { | ||
2173 | ret = __get_cur_name_and_parent(sctx, ino, gen, | ||
2174 | &parent_inode, | ||
2175 | &parent_gen, name); | ||
2176 | if (ret) | ||
2177 | stop = 1; | ||
2178 | } | ||
2179 | |||
2156 | if (ret < 0) | 2180 | if (ret < 0) |
2157 | goto out; | 2181 | goto out; |
2158 | if (ret) | ||
2159 | stop = 1; | ||
2160 | |||
2161 | if (!skip_name_cache && | ||
2162 | is_waiting_for_move(sctx, parent_inode)) { | ||
2163 | ino = start_ino; | ||
2164 | gen = start_gen; | ||
2165 | stop = 0; | ||
2166 | skip_name_cache = 1; | ||
2167 | goto again; | ||
2168 | } | ||
2169 | 2182 | ||
2170 | ret = fs_path_add_path(dest, name); | 2183 | ret = fs_path_add_path(dest, name); |
2171 | if (ret < 0) | 2184 | if (ret < 0) |
@@ -2429,10 +2442,16 @@ verbose_printk("btrfs: send_create_inode %llu\n", ino); | |||
2429 | if (!p) | 2442 | if (!p) |
2430 | return -ENOMEM; | 2443 | return -ENOMEM; |
2431 | 2444 | ||
2432 | ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, NULL, | 2445 | if (ino != sctx->cur_ino) { |
2433 | NULL, &rdev); | 2446 | ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, |
2434 | if (ret < 0) | 2447 | NULL, NULL, &rdev); |
2435 | goto out; | 2448 | if (ret < 0) |
2449 | goto out; | ||
2450 | } else { | ||
2451 | gen = sctx->cur_inode_gen; | ||
2452 | mode = sctx->cur_inode_mode; | ||
2453 | rdev = sctx->cur_inode_rdev; | ||
2454 | } | ||
2436 | 2455 | ||
2437 | if (S_ISREG(mode)) { | 2456 | if (S_ISREG(mode)) { |
2438 | cmd = BTRFS_SEND_C_MKFILE; | 2457 | cmd = BTRFS_SEND_C_MKFILE; |
@@ -2512,17 +2531,26 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir) | |||
2512 | key.objectid = dir; | 2531 | key.objectid = dir; |
2513 | key.type = BTRFS_DIR_INDEX_KEY; | 2532 | key.type = BTRFS_DIR_INDEX_KEY; |
2514 | key.offset = 0; | 2533 | key.offset = 0; |
2534 | ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); | ||
2535 | if (ret < 0) | ||
2536 | goto out; | ||
2537 | |||
2515 | while (1) { | 2538 | while (1) { |
2516 | ret = btrfs_search_slot_for_read(sctx->send_root, &key, path, | 2539 | eb = path->nodes[0]; |
2517 | 1, 0); | 2540 | slot = path->slots[0]; |
2518 | if (ret < 0) | 2541 | if (slot >= btrfs_header_nritems(eb)) { |
2519 | goto out; | 2542 | ret = btrfs_next_leaf(sctx->send_root, path); |
2520 | if (!ret) { | 2543 | if (ret < 0) { |
2521 | eb = path->nodes[0]; | 2544 | goto out; |
2522 | slot = path->slots[0]; | 2545 | } else if (ret > 0) { |
2523 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 2546 | ret = 0; |
2547 | break; | ||
2548 | } | ||
2549 | continue; | ||
2524 | } | 2550 | } |
2525 | if (ret || found_key.objectid != key.objectid || | 2551 | |
2552 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
2553 | if (found_key.objectid != key.objectid || | ||
2526 | found_key.type != key.type) { | 2554 | found_key.type != key.type) { |
2527 | ret = 0; | 2555 | ret = 0; |
2528 | goto out; | 2556 | goto out; |
@@ -2537,8 +2565,7 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir) | |||
2537 | goto out; | 2565 | goto out; |
2538 | } | 2566 | } |
2539 | 2567 | ||
2540 | key.offset = found_key.offset + 1; | 2568 | path->slots[0]++; |
2541 | btrfs_release_path(path); | ||
2542 | } | 2569 | } |
2543 | 2570 | ||
2544 | out: | 2571 | out: |
@@ -2590,7 +2617,7 @@ struct recorded_ref { | |||
2590 | * everything mixed. So we first record all refs and later process them. | 2617 | * everything mixed. So we first record all refs and later process them. |
2591 | * This function is a helper to record one ref. | 2618 | * This function is a helper to record one ref. |
2592 | */ | 2619 | */ |
2593 | static int record_ref(struct list_head *head, u64 dir, | 2620 | static int __record_ref(struct list_head *head, u64 dir, |
2594 | u64 dir_gen, struct fs_path *path) | 2621 | u64 dir_gen, struct fs_path *path) |
2595 | { | 2622 | { |
2596 | struct recorded_ref *ref; | 2623 | struct recorded_ref *ref; |
@@ -2676,12 +2703,78 @@ out: | |||
2676 | return ret; | 2703 | return ret; |
2677 | } | 2704 | } |
2678 | 2705 | ||
2706 | static struct orphan_dir_info * | ||
2707 | add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) | ||
2708 | { | ||
2709 | struct rb_node **p = &sctx->orphan_dirs.rb_node; | ||
2710 | struct rb_node *parent = NULL; | ||
2711 | struct orphan_dir_info *entry, *odi; | ||
2712 | |||
2713 | odi = kmalloc(sizeof(*odi), GFP_NOFS); | ||
2714 | if (!odi) | ||
2715 | return ERR_PTR(-ENOMEM); | ||
2716 | odi->ino = dir_ino; | ||
2717 | odi->gen = 0; | ||
2718 | |||
2719 | while (*p) { | ||
2720 | parent = *p; | ||
2721 | entry = rb_entry(parent, struct orphan_dir_info, node); | ||
2722 | if (dir_ino < entry->ino) { | ||
2723 | p = &(*p)->rb_left; | ||
2724 | } else if (dir_ino > entry->ino) { | ||
2725 | p = &(*p)->rb_right; | ||
2726 | } else { | ||
2727 | kfree(odi); | ||
2728 | return entry; | ||
2729 | } | ||
2730 | } | ||
2731 | |||
2732 | rb_link_node(&odi->node, parent, p); | ||
2733 | rb_insert_color(&odi->node, &sctx->orphan_dirs); | ||
2734 | return odi; | ||
2735 | } | ||
2736 | |||
2737 | static struct orphan_dir_info * | ||
2738 | get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) | ||
2739 | { | ||
2740 | struct rb_node *n = sctx->orphan_dirs.rb_node; | ||
2741 | struct orphan_dir_info *entry; | ||
2742 | |||
2743 | while (n) { | ||
2744 | entry = rb_entry(n, struct orphan_dir_info, node); | ||
2745 | if (dir_ino < entry->ino) | ||
2746 | n = n->rb_left; | ||
2747 | else if (dir_ino > entry->ino) | ||
2748 | n = n->rb_right; | ||
2749 | else | ||
2750 | return entry; | ||
2751 | } | ||
2752 | return NULL; | ||
2753 | } | ||
2754 | |||
2755 | static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino) | ||
2756 | { | ||
2757 | struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino); | ||
2758 | |||
2759 | return odi != NULL; | ||
2760 | } | ||
2761 | |||
2762 | static void free_orphan_dir_info(struct send_ctx *sctx, | ||
2763 | struct orphan_dir_info *odi) | ||
2764 | { | ||
2765 | if (!odi) | ||
2766 | return; | ||
2767 | rb_erase(&odi->node, &sctx->orphan_dirs); | ||
2768 | kfree(odi); | ||
2769 | } | ||
2770 | |||
2679 | /* | 2771 | /* |
2680 | * Returns 1 if a directory can be removed at this point in time. | 2772 | * Returns 1 if a directory can be removed at this point in time. |
2681 | * We check this by iterating all dir items and checking if the inode behind | 2773 | * We check this by iterating all dir items and checking if the inode behind |
2682 | * the dir item was already processed. | 2774 | * the dir item was already processed. |
2683 | */ | 2775 | */ |
2684 | static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress) | 2776 | static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, |
2777 | u64 send_progress) | ||
2685 | { | 2778 | { |
2686 | int ret = 0; | 2779 | int ret = 0; |
2687 | struct btrfs_root *root = sctx->parent_root; | 2780 | struct btrfs_root *root = sctx->parent_root; |
@@ -2704,31 +2797,52 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress) | |||
2704 | key.objectid = dir; | 2797 | key.objectid = dir; |
2705 | key.type = BTRFS_DIR_INDEX_KEY; | 2798 | key.type = BTRFS_DIR_INDEX_KEY; |
2706 | key.offset = 0; | 2799 | key.offset = 0; |
2800 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
2801 | if (ret < 0) | ||
2802 | goto out; | ||
2707 | 2803 | ||
2708 | while (1) { | 2804 | while (1) { |
2709 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | 2805 | struct waiting_dir_move *dm; |
2710 | if (ret < 0) | 2806 | |
2711 | goto out; | 2807 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { |
2712 | if (!ret) { | 2808 | ret = btrfs_next_leaf(root, path); |
2713 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | 2809 | if (ret < 0) |
2714 | path->slots[0]); | 2810 | goto out; |
2811 | else if (ret > 0) | ||
2812 | break; | ||
2813 | continue; | ||
2715 | } | 2814 | } |
2716 | if (ret || found_key.objectid != key.objectid || | 2815 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, |
2717 | found_key.type != key.type) { | 2816 | path->slots[0]); |
2817 | if (found_key.objectid != key.objectid || | ||
2818 | found_key.type != key.type) | ||
2718 | break; | 2819 | break; |
2719 | } | ||
2720 | 2820 | ||
2721 | di = btrfs_item_ptr(path->nodes[0], path->slots[0], | 2821 | di = btrfs_item_ptr(path->nodes[0], path->slots[0], |
2722 | struct btrfs_dir_item); | 2822 | struct btrfs_dir_item); |
2723 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc); | 2823 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc); |
2724 | 2824 | ||
2825 | dm = get_waiting_dir_move(sctx, loc.objectid); | ||
2826 | if (dm) { | ||
2827 | struct orphan_dir_info *odi; | ||
2828 | |||
2829 | odi = add_orphan_dir_info(sctx, dir); | ||
2830 | if (IS_ERR(odi)) { | ||
2831 | ret = PTR_ERR(odi); | ||
2832 | goto out; | ||
2833 | } | ||
2834 | odi->gen = dir_gen; | ||
2835 | dm->rmdir_ino = dir; | ||
2836 | ret = 0; | ||
2837 | goto out; | ||
2838 | } | ||
2839 | |||
2725 | if (loc.objectid > send_progress) { | 2840 | if (loc.objectid > send_progress) { |
2726 | ret = 0; | 2841 | ret = 0; |
2727 | goto out; | 2842 | goto out; |
2728 | } | 2843 | } |
2729 | 2844 | ||
2730 | btrfs_release_path(path); | 2845 | path->slots[0]++; |
2731 | key.offset = found_key.offset + 1; | ||
2732 | } | 2846 | } |
2733 | 2847 | ||
2734 | ret = 1; | 2848 | ret = 1; |
@@ -2740,19 +2854,9 @@ out: | |||
2740 | 2854 | ||
2741 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino) | 2855 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino) |
2742 | { | 2856 | { |
2743 | struct rb_node *n = sctx->waiting_dir_moves.rb_node; | 2857 | struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino); |
2744 | struct waiting_dir_move *entry; | ||
2745 | 2858 | ||
2746 | while (n) { | 2859 | return entry != NULL; |
2747 | entry = rb_entry(n, struct waiting_dir_move, node); | ||
2748 | if (ino < entry->ino) | ||
2749 | n = n->rb_left; | ||
2750 | else if (ino > entry->ino) | ||
2751 | n = n->rb_right; | ||
2752 | else | ||
2753 | return 1; | ||
2754 | } | ||
2755 | return 0; | ||
2756 | } | 2860 | } |
2757 | 2861 | ||
2758 | static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) | 2862 | static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) |
@@ -2765,6 +2869,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) | |||
2765 | if (!dm) | 2869 | if (!dm) |
2766 | return -ENOMEM; | 2870 | return -ENOMEM; |
2767 | dm->ino = ino; | 2871 | dm->ino = ino; |
2872 | dm->rmdir_ino = 0; | ||
2768 | 2873 | ||
2769 | while (*p) { | 2874 | while (*p) { |
2770 | parent = *p; | 2875 | parent = *p; |
@@ -2784,31 +2889,41 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) | |||
2784 | return 0; | 2889 | return 0; |
2785 | } | 2890 | } |
2786 | 2891 | ||
2787 | static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino) | 2892 | static struct waiting_dir_move * |
2893 | get_waiting_dir_move(struct send_ctx *sctx, u64 ino) | ||
2788 | { | 2894 | { |
2789 | struct rb_node *n = sctx->waiting_dir_moves.rb_node; | 2895 | struct rb_node *n = sctx->waiting_dir_moves.rb_node; |
2790 | struct waiting_dir_move *entry; | 2896 | struct waiting_dir_move *entry; |
2791 | 2897 | ||
2792 | while (n) { | 2898 | while (n) { |
2793 | entry = rb_entry(n, struct waiting_dir_move, node); | 2899 | entry = rb_entry(n, struct waiting_dir_move, node); |
2794 | if (ino < entry->ino) { | 2900 | if (ino < entry->ino) |
2795 | n = n->rb_left; | 2901 | n = n->rb_left; |
2796 | } else if (ino > entry->ino) { | 2902 | else if (ino > entry->ino) |
2797 | n = n->rb_right; | 2903 | n = n->rb_right; |
2798 | } else { | 2904 | else |
2799 | rb_erase(&entry->node, &sctx->waiting_dir_moves); | 2905 | return entry; |
2800 | kfree(entry); | ||
2801 | return 0; | ||
2802 | } | ||
2803 | } | 2906 | } |
2804 | return -ENOENT; | 2907 | return NULL; |
2908 | } | ||
2909 | |||
2910 | static void free_waiting_dir_move(struct send_ctx *sctx, | ||
2911 | struct waiting_dir_move *dm) | ||
2912 | { | ||
2913 | if (!dm) | ||
2914 | return; | ||
2915 | rb_erase(&dm->node, &sctx->waiting_dir_moves); | ||
2916 | kfree(dm); | ||
2805 | } | 2917 | } |
2806 | 2918 | ||
2807 | static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino) | 2919 | static int add_pending_dir_move(struct send_ctx *sctx, |
2920 | u64 ino, | ||
2921 | u64 ino_gen, | ||
2922 | u64 parent_ino) | ||
2808 | { | 2923 | { |
2809 | struct rb_node **p = &sctx->pending_dir_moves.rb_node; | 2924 | struct rb_node **p = &sctx->pending_dir_moves.rb_node; |
2810 | struct rb_node *parent = NULL; | 2925 | struct rb_node *parent = NULL; |
2811 | struct pending_dir_move *entry, *pm; | 2926 | struct pending_dir_move *entry = NULL, *pm; |
2812 | struct recorded_ref *cur; | 2927 | struct recorded_ref *cur; |
2813 | int exists = 0; | 2928 | int exists = 0; |
2814 | int ret; | 2929 | int ret; |
@@ -2817,8 +2932,8 @@ static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino) | |||
2817 | if (!pm) | 2932 | if (!pm) |
2818 | return -ENOMEM; | 2933 | return -ENOMEM; |
2819 | pm->parent_ino = parent_ino; | 2934 | pm->parent_ino = parent_ino; |
2820 | pm->ino = sctx->cur_ino; | 2935 | pm->ino = ino; |
2821 | pm->gen = sctx->cur_inode_gen; | 2936 | pm->gen = ino_gen; |
2822 | INIT_LIST_HEAD(&pm->list); | 2937 | INIT_LIST_HEAD(&pm->list); |
2823 | INIT_LIST_HEAD(&pm->update_refs); | 2938 | INIT_LIST_HEAD(&pm->update_refs); |
2824 | RB_CLEAR_NODE(&pm->node); | 2939 | RB_CLEAR_NODE(&pm->node); |
@@ -2888,19 +3003,52 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
2888 | { | 3003 | { |
2889 | struct fs_path *from_path = NULL; | 3004 | struct fs_path *from_path = NULL; |
2890 | struct fs_path *to_path = NULL; | 3005 | struct fs_path *to_path = NULL; |
3006 | struct fs_path *name = NULL; | ||
2891 | u64 orig_progress = sctx->send_progress; | 3007 | u64 orig_progress = sctx->send_progress; |
2892 | struct recorded_ref *cur; | 3008 | struct recorded_ref *cur; |
3009 | u64 parent_ino, parent_gen; | ||
3010 | struct waiting_dir_move *dm = NULL; | ||
3011 | u64 rmdir_ino = 0; | ||
2893 | int ret; | 3012 | int ret; |
2894 | 3013 | ||
3014 | name = fs_path_alloc(); | ||
2895 | from_path = fs_path_alloc(); | 3015 | from_path = fs_path_alloc(); |
2896 | if (!from_path) | 3016 | if (!name || !from_path) { |
2897 | return -ENOMEM; | 3017 | ret = -ENOMEM; |
3018 | goto out; | ||
3019 | } | ||
2898 | 3020 | ||
2899 | sctx->send_progress = pm->ino; | 3021 | dm = get_waiting_dir_move(sctx, pm->ino); |
2900 | ret = get_cur_path(sctx, pm->ino, pm->gen, from_path); | 3022 | ASSERT(dm); |
3023 | rmdir_ino = dm->rmdir_ino; | ||
3024 | free_waiting_dir_move(sctx, dm); | ||
3025 | |||
3026 | ret = get_first_ref(sctx->parent_root, pm->ino, | ||
3027 | &parent_ino, &parent_gen, name); | ||
2901 | if (ret < 0) | 3028 | if (ret < 0) |
2902 | goto out; | 3029 | goto out; |
2903 | 3030 | ||
3031 | if (parent_ino == sctx->cur_ino) { | ||
3032 | /* child only renamed, not moved */ | ||
3033 | ASSERT(parent_gen == sctx->cur_inode_gen); | ||
3034 | ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, | ||
3035 | from_path); | ||
3036 | if (ret < 0) | ||
3037 | goto out; | ||
3038 | ret = fs_path_add_path(from_path, name); | ||
3039 | if (ret < 0) | ||
3040 | goto out; | ||
3041 | } else { | ||
3042 | /* child moved and maybe renamed too */ | ||
3043 | sctx->send_progress = pm->ino; | ||
3044 | ret = get_cur_path(sctx, pm->ino, pm->gen, from_path); | ||
3045 | if (ret < 0) | ||
3046 | goto out; | ||
3047 | } | ||
3048 | |||
3049 | fs_path_free(name); | ||
3050 | name = NULL; | ||
3051 | |||
2904 | to_path = fs_path_alloc(); | 3052 | to_path = fs_path_alloc(); |
2905 | if (!to_path) { | 3053 | if (!to_path) { |
2906 | ret = -ENOMEM; | 3054 | ret = -ENOMEM; |
@@ -2908,9 +3056,6 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
2908 | } | 3056 | } |
2909 | 3057 | ||
2910 | sctx->send_progress = sctx->cur_ino + 1; | 3058 | sctx->send_progress = sctx->cur_ino + 1; |
2911 | ret = del_waiting_dir_move(sctx, pm->ino); | ||
2912 | ASSERT(ret == 0); | ||
2913 | |||
2914 | ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); | 3059 | ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); |
2915 | if (ret < 0) | 3060 | if (ret < 0) |
2916 | goto out; | 3061 | goto out; |
@@ -2919,6 +3064,35 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
2919 | if (ret < 0) | 3064 | if (ret < 0) |
2920 | goto out; | 3065 | goto out; |
2921 | 3066 | ||
3067 | if (rmdir_ino) { | ||
3068 | struct orphan_dir_info *odi; | ||
3069 | |||
3070 | odi = get_orphan_dir_info(sctx, rmdir_ino); | ||
3071 | if (!odi) { | ||
3072 | /* already deleted */ | ||
3073 | goto finish; | ||
3074 | } | ||
3075 | ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1); | ||
3076 | if (ret < 0) | ||
3077 | goto out; | ||
3078 | if (!ret) | ||
3079 | goto finish; | ||
3080 | |||
3081 | name = fs_path_alloc(); | ||
3082 | if (!name) { | ||
3083 | ret = -ENOMEM; | ||
3084 | goto out; | ||
3085 | } | ||
3086 | ret = get_cur_path(sctx, rmdir_ino, odi->gen, name); | ||
3087 | if (ret < 0) | ||
3088 | goto out; | ||
3089 | ret = send_rmdir(sctx, name); | ||
3090 | if (ret < 0) | ||
3091 | goto out; | ||
3092 | free_orphan_dir_info(sctx, odi); | ||
3093 | } | ||
3094 | |||
3095 | finish: | ||
2922 | ret = send_utimes(sctx, pm->ino, pm->gen); | 3096 | ret = send_utimes(sctx, pm->ino, pm->gen); |
2923 | if (ret < 0) | 3097 | if (ret < 0) |
2924 | goto out; | 3098 | goto out; |
@@ -2928,12 +3102,15 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
2928 | * and old parent(s). | 3102 | * and old parent(s). |
2929 | */ | 3103 | */ |
2930 | list_for_each_entry(cur, &pm->update_refs, list) { | 3104 | list_for_each_entry(cur, &pm->update_refs, list) { |
3105 | if (cur->dir == rmdir_ino) | ||
3106 | continue; | ||
2931 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); | 3107 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); |
2932 | if (ret < 0) | 3108 | if (ret < 0) |
2933 | goto out; | 3109 | goto out; |
2934 | } | 3110 | } |
2935 | 3111 | ||
2936 | out: | 3112 | out: |
3113 | fs_path_free(name); | ||
2937 | fs_path_free(from_path); | 3114 | fs_path_free(from_path); |
2938 | fs_path_free(to_path); | 3115 | fs_path_free(to_path); |
2939 | sctx->send_progress = orig_progress; | 3116 | sctx->send_progress = orig_progress; |
@@ -3005,17 +3182,19 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
3005 | int ret; | 3182 | int ret; |
3006 | u64 ino = parent_ref->dir; | 3183 | u64 ino = parent_ref->dir; |
3007 | u64 parent_ino_before, parent_ino_after; | 3184 | u64 parent_ino_before, parent_ino_after; |
3008 | u64 new_gen, old_gen; | 3185 | u64 old_gen; |
3009 | struct fs_path *path_before = NULL; | 3186 | struct fs_path *path_before = NULL; |
3010 | struct fs_path *path_after = NULL; | 3187 | struct fs_path *path_after = NULL; |
3011 | int len1, len2; | 3188 | int len1, len2; |
3012 | 3189 | int register_upper_dirs; | |
3013 | if (parent_ref->dir <= sctx->cur_ino) | 3190 | u64 gen; |
3014 | return 0; | ||
3015 | 3191 | ||
3016 | if (is_waiting_for_move(sctx, ino)) | 3192 | if (is_waiting_for_move(sctx, ino)) |
3017 | return 1; | 3193 | return 1; |
3018 | 3194 | ||
3195 | if (parent_ref->dir <= sctx->cur_ino) | ||
3196 | return 0; | ||
3197 | |||
3019 | ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen, | 3198 | ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen, |
3020 | NULL, NULL, NULL, NULL); | 3199 | NULL, NULL, NULL, NULL); |
3021 | if (ret == -ENOENT) | 3200 | if (ret == -ENOENT) |
@@ -3023,12 +3202,7 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
3023 | else if (ret < 0) | 3202 | else if (ret < 0) |
3024 | return ret; | 3203 | return ret; |
3025 | 3204 | ||
3026 | ret = get_inode_info(sctx->send_root, ino, NULL, &new_gen, | 3205 | if (parent_ref->dir_gen != old_gen) |
3027 | NULL, NULL, NULL, NULL); | ||
3028 | if (ret < 0) | ||
3029 | return ret; | ||
3030 | |||
3031 | if (new_gen != old_gen) | ||
3032 | return 0; | 3206 | return 0; |
3033 | 3207 | ||
3034 | path_before = fs_path_alloc(); | 3208 | path_before = fs_path_alloc(); |
@@ -3051,7 +3225,7 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
3051 | } | 3225 | } |
3052 | 3226 | ||
3053 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, | 3227 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, |
3054 | NULL, path_after); | 3228 | &gen, path_after); |
3055 | if (ret == -ENOENT) { | 3229 | if (ret == -ENOENT) { |
3056 | ret = 0; | 3230 | ret = 0; |
3057 | goto out; | 3231 | goto out; |
@@ -3061,13 +3235,67 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
3061 | 3235 | ||
3062 | len1 = fs_path_len(path_before); | 3236 | len1 = fs_path_len(path_before); |
3063 | len2 = fs_path_len(path_after); | 3237 | len2 = fs_path_len(path_after); |
3064 | if ((parent_ino_before != parent_ino_after) && (len1 != len2 || | 3238 | if (parent_ino_before != parent_ino_after || len1 != len2 || |
3065 | memcmp(path_before->start, path_after->start, len1))) { | 3239 | memcmp(path_before->start, path_after->start, len1)) { |
3066 | ret = 1; | 3240 | ret = 1; |
3067 | goto out; | 3241 | goto out; |
3068 | } | 3242 | } |
3069 | ret = 0; | 3243 | ret = 0; |
3070 | 3244 | ||
3245 | /* | ||
3246 | * Ok, our new most direct ancestor has a higher inode number but | ||
3247 | * wasn't moved/renamed. So maybe some of the new ancestors higher in | ||
3248 | * the hierarchy have an higher inode number too *and* were renamed | ||
3249 | * or moved - in this case we need to wait for the ancestor's rename | ||
3250 | * or move operation before we can do the move/rename for the current | ||
3251 | * inode. | ||
3252 | */ | ||
3253 | register_upper_dirs = 0; | ||
3254 | ino = parent_ino_after; | ||
3255 | again: | ||
3256 | while ((ret == 0 || register_upper_dirs) && ino > sctx->cur_ino) { | ||
3257 | u64 parent_gen; | ||
3258 | |||
3259 | fs_path_reset(path_before); | ||
3260 | fs_path_reset(path_after); | ||
3261 | |||
3262 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, | ||
3263 | &parent_gen, path_after); | ||
3264 | if (ret < 0) | ||
3265 | goto out; | ||
3266 | ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, | ||
3267 | NULL, path_before); | ||
3268 | if (ret == -ENOENT) { | ||
3269 | ret = 0; | ||
3270 | break; | ||
3271 | } else if (ret < 0) { | ||
3272 | goto out; | ||
3273 | } | ||
3274 | |||
3275 | len1 = fs_path_len(path_before); | ||
3276 | len2 = fs_path_len(path_after); | ||
3277 | if (parent_ino_before != parent_ino_after || len1 != len2 || | ||
3278 | memcmp(path_before->start, path_after->start, len1)) { | ||
3279 | ret = 1; | ||
3280 | if (register_upper_dirs) { | ||
3281 | break; | ||
3282 | } else { | ||
3283 | register_upper_dirs = 1; | ||
3284 | ino = parent_ref->dir; | ||
3285 | gen = parent_ref->dir_gen; | ||
3286 | goto again; | ||
3287 | } | ||
3288 | } else if (register_upper_dirs) { | ||
3289 | ret = add_pending_dir_move(sctx, ino, gen, | ||
3290 | parent_ino_after); | ||
3291 | if (ret < 0 && ret != -EEXIST) | ||
3292 | goto out; | ||
3293 | } | ||
3294 | |||
3295 | ino = parent_ino_after; | ||
3296 | gen = parent_gen; | ||
3297 | } | ||
3298 | |||
3071 | out: | 3299 | out: |
3072 | fs_path_free(path_before); | 3300 | fs_path_free(path_before); |
3073 | fs_path_free(path_after); | 3301 | fs_path_free(path_after); |
@@ -3089,6 +3317,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) | |||
3089 | u64 ow_gen; | 3317 | u64 ow_gen; |
3090 | int did_overwrite = 0; | 3318 | int did_overwrite = 0; |
3091 | int is_orphan = 0; | 3319 | int is_orphan = 0; |
3320 | u64 last_dir_ino_rm = 0; | ||
3092 | 3321 | ||
3093 | verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | 3322 | verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); |
3094 | 3323 | ||
@@ -3227,9 +3456,14 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
3227 | * dirs, we always have one new and one deleted | 3456 | * dirs, we always have one new and one deleted |
3228 | * ref. The deleted ref is ignored later. | 3457 | * ref. The deleted ref is ignored later. |
3229 | */ | 3458 | */ |
3230 | if (wait_for_parent_move(sctx, cur)) { | 3459 | ret = wait_for_parent_move(sctx, cur); |
3460 | if (ret < 0) | ||
3461 | goto out; | ||
3462 | if (ret) { | ||
3231 | ret = add_pending_dir_move(sctx, | 3463 | ret = add_pending_dir_move(sctx, |
3232 | cur->dir); | 3464 | sctx->cur_ino, |
3465 | sctx->cur_inode_gen, | ||
3466 | cur->dir); | ||
3233 | *pending_move = 1; | 3467 | *pending_move = 1; |
3234 | } else { | 3468 | } else { |
3235 | ret = send_rename(sctx, valid_path, | 3469 | ret = send_rename(sctx, valid_path, |
@@ -3259,7 +3493,8 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
3259 | * later, we do this check again and rmdir it then if possible. | 3493 | * later, we do this check again and rmdir it then if possible. |
3260 | * See the use of check_dirs for more details. | 3494 | * See the use of check_dirs for more details. |
3261 | */ | 3495 | */ |
3262 | ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_ino); | 3496 | ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen, |
3497 | sctx->cur_ino); | ||
3263 | if (ret < 0) | 3498 | if (ret < 0) |
3264 | goto out; | 3499 | goto out; |
3265 | if (ret) { | 3500 | if (ret) { |
@@ -3350,8 +3585,10 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
3350 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); | 3585 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); |
3351 | if (ret < 0) | 3586 | if (ret < 0) |
3352 | goto out; | 3587 | goto out; |
3353 | } else if (ret == inode_state_did_delete) { | 3588 | } else if (ret == inode_state_did_delete && |
3354 | ret = can_rmdir(sctx, cur->dir, sctx->cur_ino); | 3589 | cur->dir != last_dir_ino_rm) { |
3590 | ret = can_rmdir(sctx, cur->dir, cur->dir_gen, | ||
3591 | sctx->cur_ino); | ||
3355 | if (ret < 0) | 3592 | if (ret < 0) |
3356 | goto out; | 3593 | goto out; |
3357 | if (ret) { | 3594 | if (ret) { |
@@ -3362,6 +3599,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
3362 | ret = send_rmdir(sctx, valid_path); | 3599 | ret = send_rmdir(sctx, valid_path); |
3363 | if (ret < 0) | 3600 | if (ret < 0) |
3364 | goto out; | 3601 | goto out; |
3602 | last_dir_ino_rm = cur->dir; | ||
3365 | } | 3603 | } |
3366 | } | 3604 | } |
3367 | } | 3605 | } |
@@ -3375,9 +3613,8 @@ out: | |||
3375 | return ret; | 3613 | return ret; |
3376 | } | 3614 | } |
3377 | 3615 | ||
3378 | static int __record_new_ref(int num, u64 dir, int index, | 3616 | static int record_ref(struct btrfs_root *root, int num, u64 dir, int index, |
3379 | struct fs_path *name, | 3617 | struct fs_path *name, void *ctx, struct list_head *refs) |
3380 | void *ctx) | ||
3381 | { | 3618 | { |
3382 | int ret = 0; | 3619 | int ret = 0; |
3383 | struct send_ctx *sctx = ctx; | 3620 | struct send_ctx *sctx = ctx; |
@@ -3388,7 +3625,7 @@ static int __record_new_ref(int num, u64 dir, int index, | |||
3388 | if (!p) | 3625 | if (!p) |
3389 | return -ENOMEM; | 3626 | return -ENOMEM; |
3390 | 3627 | ||
3391 | ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL, | 3628 | ret = get_inode_info(root, dir, NULL, &gen, NULL, NULL, |
3392 | NULL, NULL); | 3629 | NULL, NULL); |
3393 | if (ret < 0) | 3630 | if (ret < 0) |
3394 | goto out; | 3631 | goto out; |
@@ -3400,7 +3637,7 @@ static int __record_new_ref(int num, u64 dir, int index, | |||
3400 | if (ret < 0) | 3637 | if (ret < 0) |
3401 | goto out; | 3638 | goto out; |
3402 | 3639 | ||
3403 | ret = record_ref(&sctx->new_refs, dir, gen, p); | 3640 | ret = __record_ref(refs, dir, gen, p); |
3404 | 3641 | ||
3405 | out: | 3642 | out: |
3406 | if (ret) | 3643 | if (ret) |
@@ -3408,37 +3645,23 @@ out: | |||
3408 | return ret; | 3645 | return ret; |
3409 | } | 3646 | } |
3410 | 3647 | ||
3648 | static int __record_new_ref(int num, u64 dir, int index, | ||
3649 | struct fs_path *name, | ||
3650 | void *ctx) | ||
3651 | { | ||
3652 | struct send_ctx *sctx = ctx; | ||
3653 | return record_ref(sctx->send_root, num, dir, index, name, | ||
3654 | ctx, &sctx->new_refs); | ||
3655 | } | ||
3656 | |||
3657 | |||
3411 | static int __record_deleted_ref(int num, u64 dir, int index, | 3658 | static int __record_deleted_ref(int num, u64 dir, int index, |
3412 | struct fs_path *name, | 3659 | struct fs_path *name, |
3413 | void *ctx) | 3660 | void *ctx) |
3414 | { | 3661 | { |
3415 | int ret = 0; | ||
3416 | struct send_ctx *sctx = ctx; | 3662 | struct send_ctx *sctx = ctx; |
3417 | struct fs_path *p; | 3663 | return record_ref(sctx->parent_root, num, dir, index, name, |
3418 | u64 gen; | 3664 | ctx, &sctx->deleted_refs); |
3419 | |||
3420 | p = fs_path_alloc(); | ||
3421 | if (!p) | ||
3422 | return -ENOMEM; | ||
3423 | |||
3424 | ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL, | ||
3425 | NULL, NULL); | ||
3426 | if (ret < 0) | ||
3427 | goto out; | ||
3428 | |||
3429 | ret = get_cur_path(sctx, dir, gen, p); | ||
3430 | if (ret < 0) | ||
3431 | goto out; | ||
3432 | ret = fs_path_add_path(p, name); | ||
3433 | if (ret < 0) | ||
3434 | goto out; | ||
3435 | |||
3436 | ret = record_ref(&sctx->deleted_refs, dir, gen, p); | ||
3437 | |||
3438 | out: | ||
3439 | if (ret) | ||
3440 | fs_path_free(p); | ||
3441 | return ret; | ||
3442 | } | 3665 | } |
3443 | 3666 | ||
3444 | static int record_new_ref(struct send_ctx *sctx) | 3667 | static int record_new_ref(struct send_ctx *sctx) |
@@ -3619,21 +3842,31 @@ static int process_all_refs(struct send_ctx *sctx, | |||
3619 | root = sctx->parent_root; | 3842 | root = sctx->parent_root; |
3620 | cb = __record_deleted_ref; | 3843 | cb = __record_deleted_ref; |
3621 | } else { | 3844 | } else { |
3622 | BUG(); | 3845 | btrfs_err(sctx->send_root->fs_info, |
3846 | "Wrong command %d in process_all_refs", cmd); | ||
3847 | ret = -EINVAL; | ||
3848 | goto out; | ||
3623 | } | 3849 | } |
3624 | 3850 | ||
3625 | key.objectid = sctx->cmp_key->objectid; | 3851 | key.objectid = sctx->cmp_key->objectid; |
3626 | key.type = BTRFS_INODE_REF_KEY; | 3852 | key.type = BTRFS_INODE_REF_KEY; |
3627 | key.offset = 0; | 3853 | key.offset = 0; |
3628 | while (1) { | 3854 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
3629 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | 3855 | if (ret < 0) |
3630 | if (ret < 0) | 3856 | goto out; |
3631 | goto out; | ||
3632 | if (ret) | ||
3633 | break; | ||
3634 | 3857 | ||
3858 | while (1) { | ||
3635 | eb = path->nodes[0]; | 3859 | eb = path->nodes[0]; |
3636 | slot = path->slots[0]; | 3860 | slot = path->slots[0]; |
3861 | if (slot >= btrfs_header_nritems(eb)) { | ||
3862 | ret = btrfs_next_leaf(root, path); | ||
3863 | if (ret < 0) | ||
3864 | goto out; | ||
3865 | else if (ret > 0) | ||
3866 | break; | ||
3867 | continue; | ||
3868 | } | ||
3869 | |||
3637 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 3870 | btrfs_item_key_to_cpu(eb, &found_key, slot); |
3638 | 3871 | ||
3639 | if (found_key.objectid != key.objectid || | 3872 | if (found_key.objectid != key.objectid || |
@@ -3642,11 +3875,10 @@ static int process_all_refs(struct send_ctx *sctx, | |||
3642 | break; | 3875 | break; |
3643 | 3876 | ||
3644 | ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); | 3877 | ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); |
3645 | btrfs_release_path(path); | ||
3646 | if (ret < 0) | 3878 | if (ret < 0) |
3647 | goto out; | 3879 | goto out; |
3648 | 3880 | ||
3649 | key.offset = found_key.offset + 1; | 3881 | path->slots[0]++; |
3650 | } | 3882 | } |
3651 | btrfs_release_path(path); | 3883 | btrfs_release_path(path); |
3652 | 3884 | ||
@@ -3927,19 +4159,25 @@ static int process_all_new_xattrs(struct send_ctx *sctx) | |||
3927 | key.objectid = sctx->cmp_key->objectid; | 4159 | key.objectid = sctx->cmp_key->objectid; |
3928 | key.type = BTRFS_XATTR_ITEM_KEY; | 4160 | key.type = BTRFS_XATTR_ITEM_KEY; |
3929 | key.offset = 0; | 4161 | key.offset = 0; |
3930 | while (1) { | 4162 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
3931 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | 4163 | if (ret < 0) |
3932 | if (ret < 0) | 4164 | goto out; |
3933 | goto out; | ||
3934 | if (ret) { | ||
3935 | ret = 0; | ||
3936 | goto out; | ||
3937 | } | ||
3938 | 4165 | ||
4166 | while (1) { | ||
3939 | eb = path->nodes[0]; | 4167 | eb = path->nodes[0]; |
3940 | slot = path->slots[0]; | 4168 | slot = path->slots[0]; |
3941 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 4169 | if (slot >= btrfs_header_nritems(eb)) { |
4170 | ret = btrfs_next_leaf(root, path); | ||
4171 | if (ret < 0) { | ||
4172 | goto out; | ||
4173 | } else if (ret > 0) { | ||
4174 | ret = 0; | ||
4175 | break; | ||
4176 | } | ||
4177 | continue; | ||
4178 | } | ||
3942 | 4179 | ||
4180 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
3943 | if (found_key.objectid != key.objectid || | 4181 | if (found_key.objectid != key.objectid || |
3944 | found_key.type != key.type) { | 4182 | found_key.type != key.type) { |
3945 | ret = 0; | 4183 | ret = 0; |
@@ -3951,8 +4189,7 @@ static int process_all_new_xattrs(struct send_ctx *sctx) | |||
3951 | if (ret < 0) | 4189 | if (ret < 0) |
3952 | goto out; | 4190 | goto out; |
3953 | 4191 | ||
3954 | btrfs_release_path(path); | 4192 | path->slots[0]++; |
3955 | key.offset = found_key.offset + 1; | ||
3956 | } | 4193 | } |
3957 | 4194 | ||
3958 | out: | 4195 | out: |
@@ -3991,6 +4228,13 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) | |||
3991 | goto out; | 4228 | goto out; |
3992 | 4229 | ||
3993 | last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT; | 4230 | last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT; |
4231 | |||
4232 | /* initial readahead */ | ||
4233 | memset(&sctx->ra, 0, sizeof(struct file_ra_state)); | ||
4234 | file_ra_state_init(&sctx->ra, inode->i_mapping); | ||
4235 | btrfs_force_ra(inode->i_mapping, &sctx->ra, NULL, index, | ||
4236 | last_index - index + 1); | ||
4237 | |||
3994 | while (index <= last_index) { | 4238 | while (index <= last_index) { |
3995 | unsigned cur_len = min_t(unsigned, len, | 4239 | unsigned cur_len = min_t(unsigned, len, |
3996 | PAGE_CACHE_SIZE - pg_offset); | 4240 | PAGE_CACHE_SIZE - pg_offset); |
@@ -4763,18 +5007,19 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) | |||
4763 | ret = apply_children_dir_moves(sctx); | 5007 | ret = apply_children_dir_moves(sctx); |
4764 | if (ret) | 5008 | if (ret) |
4765 | goto out; | 5009 | goto out; |
5010 | /* | ||
5011 | * Need to send that every time, no matter if it actually | ||
5012 | * changed between the two trees as we have done changes to | ||
5013 | * the inode before. If our inode is a directory and it's | ||
5014 | * waiting to be moved/renamed, we will send its utimes when | ||
5015 | * it's moved/renamed, therefore we don't need to do it here. | ||
5016 | */ | ||
5017 | sctx->send_progress = sctx->cur_ino + 1; | ||
5018 | ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); | ||
5019 | if (ret < 0) | ||
5020 | goto out; | ||
4766 | } | 5021 | } |
4767 | 5022 | ||
4768 | /* | ||
4769 | * Need to send that every time, no matter if it actually | ||
4770 | * changed between the two trees as we have done changes to | ||
4771 | * the inode before. | ||
4772 | */ | ||
4773 | sctx->send_progress = sctx->cur_ino + 1; | ||
4774 | ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); | ||
4775 | if (ret < 0) | ||
4776 | goto out; | ||
4777 | |||
4778 | out: | 5023 | out: |
4779 | return ret; | 5024 | return ret; |
4780 | } | 5025 | } |
@@ -4840,6 +5085,8 @@ static int changed_inode(struct send_ctx *sctx, | |||
4840 | sctx->left_path->nodes[0], left_ii); | 5085 | sctx->left_path->nodes[0], left_ii); |
4841 | sctx->cur_inode_mode = btrfs_inode_mode( | 5086 | sctx->cur_inode_mode = btrfs_inode_mode( |
4842 | sctx->left_path->nodes[0], left_ii); | 5087 | sctx->left_path->nodes[0], left_ii); |
5088 | sctx->cur_inode_rdev = btrfs_inode_rdev( | ||
5089 | sctx->left_path->nodes[0], left_ii); | ||
4843 | if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) | 5090 | if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) |
4844 | ret = send_create_inode_if_needed(sctx); | 5091 | ret = send_create_inode_if_needed(sctx); |
4845 | } else if (result == BTRFS_COMPARE_TREE_DELETED) { | 5092 | } else if (result == BTRFS_COMPARE_TREE_DELETED) { |
@@ -4884,6 +5131,8 @@ static int changed_inode(struct send_ctx *sctx, | |||
4884 | sctx->left_path->nodes[0], left_ii); | 5131 | sctx->left_path->nodes[0], left_ii); |
4885 | sctx->cur_inode_mode = btrfs_inode_mode( | 5132 | sctx->cur_inode_mode = btrfs_inode_mode( |
4886 | sctx->left_path->nodes[0], left_ii); | 5133 | sctx->left_path->nodes[0], left_ii); |
5134 | sctx->cur_inode_rdev = btrfs_inode_rdev( | ||
5135 | sctx->left_path->nodes[0], left_ii); | ||
4887 | ret = send_create_inode_if_needed(sctx); | 5136 | ret = send_create_inode_if_needed(sctx); |
4888 | if (ret < 0) | 5137 | if (ret < 0) |
4889 | goto out; | 5138 | goto out; |
@@ -5118,6 +5367,7 @@ out: | |||
5118 | static int full_send_tree(struct send_ctx *sctx) | 5367 | static int full_send_tree(struct send_ctx *sctx) |
5119 | { | 5368 | { |
5120 | int ret; | 5369 | int ret; |
5370 | struct btrfs_trans_handle *trans = NULL; | ||
5121 | struct btrfs_root *send_root = sctx->send_root; | 5371 | struct btrfs_root *send_root = sctx->send_root; |
5122 | struct btrfs_key key; | 5372 | struct btrfs_key key; |
5123 | struct btrfs_key found_key; | 5373 | struct btrfs_key found_key; |
@@ -5139,6 +5389,19 @@ static int full_send_tree(struct send_ctx *sctx) | |||
5139 | key.type = BTRFS_INODE_ITEM_KEY; | 5389 | key.type = BTRFS_INODE_ITEM_KEY; |
5140 | key.offset = 0; | 5390 | key.offset = 0; |
5141 | 5391 | ||
5392 | join_trans: | ||
5393 | /* | ||
5394 | * We need to make sure the transaction does not get committed | ||
5395 | * while we do anything on commit roots. Join a transaction to prevent | ||
5396 | * this. | ||
5397 | */ | ||
5398 | trans = btrfs_join_transaction(send_root); | ||
5399 | if (IS_ERR(trans)) { | ||
5400 | ret = PTR_ERR(trans); | ||
5401 | trans = NULL; | ||
5402 | goto out; | ||
5403 | } | ||
5404 | |||
5142 | /* | 5405 | /* |
5143 | * Make sure the tree has not changed after re-joining. We detect this | 5406 | * Make sure the tree has not changed after re-joining. We detect this |
5144 | * by comparing start_ctransid and ctransid. They should always match. | 5407 | * by comparing start_ctransid and ctransid. They should always match. |
@@ -5162,6 +5425,19 @@ static int full_send_tree(struct send_ctx *sctx) | |||
5162 | goto out_finish; | 5425 | goto out_finish; |
5163 | 5426 | ||
5164 | while (1) { | 5427 | while (1) { |
5428 | /* | ||
5429 | * When someone want to commit while we iterate, end the | ||
5430 | * joined transaction and rejoin. | ||
5431 | */ | ||
5432 | if (btrfs_should_end_transaction(trans, send_root)) { | ||
5433 | ret = btrfs_end_transaction(trans, send_root); | ||
5434 | trans = NULL; | ||
5435 | if (ret < 0) | ||
5436 | goto out; | ||
5437 | btrfs_release_path(path); | ||
5438 | goto join_trans; | ||
5439 | } | ||
5440 | |||
5165 | eb = path->nodes[0]; | 5441 | eb = path->nodes[0]; |
5166 | slot = path->slots[0]; | 5442 | slot = path->slots[0]; |
5167 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 5443 | btrfs_item_key_to_cpu(eb, &found_key, slot); |
@@ -5189,6 +5465,12 @@ out_finish: | |||
5189 | 5465 | ||
5190 | out: | 5466 | out: |
5191 | btrfs_free_path(path); | 5467 | btrfs_free_path(path); |
5468 | if (trans) { | ||
5469 | if (!ret) | ||
5470 | ret = btrfs_end_transaction(trans, send_root); | ||
5471 | else | ||
5472 | btrfs_end_transaction(trans, send_root); | ||
5473 | } | ||
5192 | return ret; | 5474 | return ret; |
5193 | } | 5475 | } |
5194 | 5476 | ||
@@ -5340,6 +5622,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
5340 | 5622 | ||
5341 | sctx->pending_dir_moves = RB_ROOT; | 5623 | sctx->pending_dir_moves = RB_ROOT; |
5342 | sctx->waiting_dir_moves = RB_ROOT; | 5624 | sctx->waiting_dir_moves = RB_ROOT; |
5625 | sctx->orphan_dirs = RB_ROOT; | ||
5343 | 5626 | ||
5344 | sctx->clone_roots = vzalloc(sizeof(struct clone_root) * | 5627 | sctx->clone_roots = vzalloc(sizeof(struct clone_root) * |
5345 | (arg->clone_sources_count + 1)); | 5628 | (arg->clone_sources_count + 1)); |
@@ -5477,6 +5760,16 @@ out: | |||
5477 | kfree(dm); | 5760 | kfree(dm); |
5478 | } | 5761 | } |
5479 | 5762 | ||
5763 | WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs)); | ||
5764 | while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) { | ||
5765 | struct rb_node *n; | ||
5766 | struct orphan_dir_info *odi; | ||
5767 | |||
5768 | n = rb_first(&sctx->orphan_dirs); | ||
5769 | odi = rb_entry(n, struct orphan_dir_info, node); | ||
5770 | free_orphan_dir_info(sctx, odi); | ||
5771 | } | ||
5772 | |||
5480 | if (sort_clone_roots) { | 5773 | if (sort_clone_roots) { |
5481 | for (i = 0; i < sctx->clone_roots_cnt; i++) | 5774 | for (i = 0; i < sctx->clone_roots_cnt; i++) |
5482 | btrfs_root_dec_send_in_progress( | 5775 | btrfs_root_dec_send_in_progress( |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d04db817be5c..9dbf42395153 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -1305,13 +1305,6 @@ error_fs_info: | |||
1305 | return ERR_PTR(error); | 1305 | return ERR_PTR(error); |
1306 | } | 1306 | } |
1307 | 1307 | ||
1308 | static void btrfs_set_max_workers(struct btrfs_workers *workers, int new_limit) | ||
1309 | { | ||
1310 | spin_lock_irq(&workers->lock); | ||
1311 | workers->max_workers = new_limit; | ||
1312 | spin_unlock_irq(&workers->lock); | ||
1313 | } | ||
1314 | |||
1315 | static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, | 1308 | static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, |
1316 | int new_pool_size, int old_pool_size) | 1309 | int new_pool_size, int old_pool_size) |
1317 | { | 1310 | { |
@@ -1323,21 +1316,20 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, | |||
1323 | btrfs_info(fs_info, "resize thread pool %d -> %d", | 1316 | btrfs_info(fs_info, "resize thread pool %d -> %d", |
1324 | old_pool_size, new_pool_size); | 1317 | old_pool_size, new_pool_size); |
1325 | 1318 | ||
1326 | btrfs_set_max_workers(&fs_info->generic_worker, new_pool_size); | 1319 | btrfs_workqueue_set_max(fs_info->workers, new_pool_size); |
1327 | btrfs_set_max_workers(&fs_info->workers, new_pool_size); | 1320 | btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size); |
1328 | btrfs_set_max_workers(&fs_info->delalloc_workers, new_pool_size); | 1321 | btrfs_workqueue_set_max(fs_info->submit_workers, new_pool_size); |
1329 | btrfs_set_max_workers(&fs_info->submit_workers, new_pool_size); | 1322 | btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size); |
1330 | btrfs_set_max_workers(&fs_info->caching_workers, new_pool_size); | 1323 | btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size); |
1331 | btrfs_set_max_workers(&fs_info->fixup_workers, new_pool_size); | 1324 | btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size); |
1332 | btrfs_set_max_workers(&fs_info->endio_workers, new_pool_size); | 1325 | btrfs_workqueue_set_max(fs_info->endio_meta_write_workers, |
1333 | btrfs_set_max_workers(&fs_info->endio_meta_workers, new_pool_size); | 1326 | new_pool_size); |
1334 | btrfs_set_max_workers(&fs_info->endio_meta_write_workers, new_pool_size); | 1327 | btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size); |
1335 | btrfs_set_max_workers(&fs_info->endio_write_workers, new_pool_size); | 1328 | btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size); |
1336 | btrfs_set_max_workers(&fs_info->endio_freespace_worker, new_pool_size); | 1329 | btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size); |
1337 | btrfs_set_max_workers(&fs_info->delayed_workers, new_pool_size); | 1330 | btrfs_workqueue_set_max(fs_info->readahead_workers, new_pool_size); |
1338 | btrfs_set_max_workers(&fs_info->readahead_workers, new_pool_size); | 1331 | btrfs_workqueue_set_max(fs_info->scrub_wr_completion_workers, |
1339 | btrfs_set_max_workers(&fs_info->scrub_wr_completion_workers, | 1332 | new_pool_size); |
1340 | new_pool_size); | ||
1341 | } | 1333 | } |
1342 | 1334 | ||
1343 | static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info) | 1335 | static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info) |
@@ -1388,6 +1380,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
1388 | unsigned int old_metadata_ratio = fs_info->metadata_ratio; | 1380 | unsigned int old_metadata_ratio = fs_info->metadata_ratio; |
1389 | int ret; | 1381 | int ret; |
1390 | 1382 | ||
1383 | sync_filesystem(sb); | ||
1391 | btrfs_remount_prepare(fs_info); | 1384 | btrfs_remount_prepare(fs_info); |
1392 | 1385 | ||
1393 | ret = btrfs_parse_options(root, data); | 1386 | ret = btrfs_parse_options(root, data); |
@@ -1479,6 +1472,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
1479 | sb->s_flags &= ~MS_RDONLY; | 1472 | sb->s_flags &= ~MS_RDONLY; |
1480 | } | 1473 | } |
1481 | out: | 1474 | out: |
1475 | wake_up_process(fs_info->transaction_kthread); | ||
1482 | btrfs_remount_cleanup(fs_info, old_opts); | 1476 | btrfs_remount_cleanup(fs_info, old_opts); |
1483 | return 0; | 1477 | return 0; |
1484 | 1478 | ||
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 865f4cf9a769..c5eb2143dc66 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/kobject.h> | 24 | #include <linux/kobject.h> |
25 | #include <linux/bug.h> | 25 | #include <linux/bug.h> |
26 | #include <linux/genhd.h> | 26 | #include <linux/genhd.h> |
27 | #include <linux/debugfs.h> | ||
27 | 28 | ||
28 | #include "ctree.h" | 29 | #include "ctree.h" |
29 | #include "disk-io.h" | 30 | #include "disk-io.h" |
@@ -599,6 +600,12 @@ static int add_device_membership(struct btrfs_fs_info *fs_info) | |||
599 | /* /sys/fs/btrfs/ entry */ | 600 | /* /sys/fs/btrfs/ entry */ |
600 | static struct kset *btrfs_kset; | 601 | static struct kset *btrfs_kset; |
601 | 602 | ||
603 | /* /sys/kernel/debug/btrfs */ | ||
604 | static struct dentry *btrfs_debugfs_root_dentry; | ||
605 | |||
606 | /* Debugging tunables and exported data */ | ||
607 | u64 btrfs_debugfs_test; | ||
608 | |||
602 | int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info) | 609 | int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info) |
603 | { | 610 | { |
604 | int error; | 611 | int error; |
@@ -642,27 +649,41 @@ failure: | |||
642 | return error; | 649 | return error; |
643 | } | 650 | } |
644 | 651 | ||
652 | static int btrfs_init_debugfs(void) | ||
653 | { | ||
654 | #ifdef CONFIG_DEBUG_FS | ||
655 | btrfs_debugfs_root_dentry = debugfs_create_dir("btrfs", NULL); | ||
656 | if (!btrfs_debugfs_root_dentry) | ||
657 | return -ENOMEM; | ||
658 | |||
659 | debugfs_create_u64("test", S_IRUGO | S_IWUGO, btrfs_debugfs_root_dentry, | ||
660 | &btrfs_debugfs_test); | ||
661 | #endif | ||
662 | return 0; | ||
663 | } | ||
664 | |||
645 | int btrfs_init_sysfs(void) | 665 | int btrfs_init_sysfs(void) |
646 | { | 666 | { |
647 | int ret; | 667 | int ret; |
668 | |||
648 | btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); | 669 | btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); |
649 | if (!btrfs_kset) | 670 | if (!btrfs_kset) |
650 | return -ENOMEM; | 671 | return -ENOMEM; |
651 | 672 | ||
652 | init_feature_attrs(); | 673 | ret = btrfs_init_debugfs(); |
674 | if (ret) | ||
675 | return ret; | ||
653 | 676 | ||
677 | init_feature_attrs(); | ||
654 | ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); | 678 | ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); |
655 | if (ret) { | ||
656 | kset_unregister(btrfs_kset); | ||
657 | return ret; | ||
658 | } | ||
659 | 679 | ||
660 | return 0; | 680 | return ret; |
661 | } | 681 | } |
662 | 682 | ||
663 | void btrfs_exit_sysfs(void) | 683 | void btrfs_exit_sysfs(void) |
664 | { | 684 | { |
665 | sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); | 685 | sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); |
666 | kset_unregister(btrfs_kset); | 686 | kset_unregister(btrfs_kset); |
687 | debugfs_remove_recursive(btrfs_debugfs_root_dentry); | ||
667 | } | 688 | } |
668 | 689 | ||
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h index f3cea3710d44..9ab576318a84 100644 --- a/fs/btrfs/sysfs.h +++ b/fs/btrfs/sysfs.h | |||
@@ -1,6 +1,11 @@ | |||
1 | #ifndef _BTRFS_SYSFS_H_ | 1 | #ifndef _BTRFS_SYSFS_H_ |
2 | #define _BTRFS_SYSFS_H_ | 2 | #define _BTRFS_SYSFS_H_ |
3 | 3 | ||
4 | /* | ||
5 | * Data exported through sysfs | ||
6 | */ | ||
7 | extern u64 btrfs_debugfs_test; | ||
8 | |||
4 | enum btrfs_feature_set { | 9 | enum btrfs_feature_set { |
5 | FEAT_COMPAT, | 10 | FEAT_COMPAT, |
6 | FEAT_COMPAT_RO, | 11 | FEAT_COMPAT_RO, |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 34cd83184c4a..a04707f740d6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -683,7 +683,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
683 | int lock = (trans->type != TRANS_JOIN_NOLOCK); | 683 | int lock = (trans->type != TRANS_JOIN_NOLOCK); |
684 | int err = 0; | 684 | int err = 0; |
685 | 685 | ||
686 | if (--trans->use_count) { | 686 | if (trans->use_count > 1) { |
687 | trans->use_count--; | ||
687 | trans->block_rsv = trans->orig_rsv; | 688 | trans->block_rsv = trans->orig_rsv; |
688 | return 0; | 689 | return 0; |
689 | } | 690 | } |
@@ -731,17 +732,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
731 | } | 732 | } |
732 | 733 | ||
733 | if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { | 734 | if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { |
734 | if (throttle) { | 735 | if (throttle) |
735 | /* | ||
736 | * We may race with somebody else here so end up having | ||
737 | * to call end_transaction on ourselves again, so inc | ||
738 | * our use_count. | ||
739 | */ | ||
740 | trans->use_count++; | ||
741 | return btrfs_commit_transaction(trans, root); | 736 | return btrfs_commit_transaction(trans, root); |
742 | } else { | 737 | else |
743 | wake_up_process(info->transaction_kthread); | 738 | wake_up_process(info->transaction_kthread); |
744 | } | ||
745 | } | 739 | } |
746 | 740 | ||
747 | if (trans->type & __TRANS_FREEZABLE) | 741 | if (trans->type & __TRANS_FREEZABLE) |
@@ -1578,10 +1572,9 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, | |||
1578 | 1572 | ||
1579 | trace_btrfs_transaction_commit(root); | 1573 | trace_btrfs_transaction_commit(root); |
1580 | 1574 | ||
1581 | btrfs_scrub_continue(root); | ||
1582 | |||
1583 | if (current->journal_info == trans) | 1575 | if (current->journal_info == trans) |
1584 | current->journal_info = NULL; | 1576 | current->journal_info = NULL; |
1577 | btrfs_scrub_cancel(root->fs_info); | ||
1585 | 1578 | ||
1586 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 1579 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
1587 | } | 1580 | } |
@@ -1621,7 +1614,7 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, | |||
1621 | static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) | 1614 | static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) |
1622 | { | 1615 | { |
1623 | if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) | 1616 | if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) |
1624 | return btrfs_start_delalloc_roots(fs_info, 1); | 1617 | return btrfs_start_delalloc_roots(fs_info, 1, -1); |
1625 | return 0; | 1618 | return 0; |
1626 | } | 1619 | } |
1627 | 1620 | ||
@@ -1754,7 +1747,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1754 | /* ->aborted might be set after the previous check, so check it */ | 1747 | /* ->aborted might be set after the previous check, so check it */ |
1755 | if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { | 1748 | if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { |
1756 | ret = cur_trans->aborted; | 1749 | ret = cur_trans->aborted; |
1757 | goto cleanup_transaction; | 1750 | goto scrub_continue; |
1758 | } | 1751 | } |
1759 | /* | 1752 | /* |
1760 | * the reloc mutex makes sure that we stop | 1753 | * the reloc mutex makes sure that we stop |
@@ -1771,7 +1764,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1771 | ret = create_pending_snapshots(trans, root->fs_info); | 1764 | ret = create_pending_snapshots(trans, root->fs_info); |
1772 | if (ret) { | 1765 | if (ret) { |
1773 | mutex_unlock(&root->fs_info->reloc_mutex); | 1766 | mutex_unlock(&root->fs_info->reloc_mutex); |
1774 | goto cleanup_transaction; | 1767 | goto scrub_continue; |
1775 | } | 1768 | } |
1776 | 1769 | ||
1777 | /* | 1770 | /* |
@@ -1787,13 +1780,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1787 | ret = btrfs_run_delayed_items(trans, root); | 1780 | ret = btrfs_run_delayed_items(trans, root); |
1788 | if (ret) { | 1781 | if (ret) { |
1789 | mutex_unlock(&root->fs_info->reloc_mutex); | 1782 | mutex_unlock(&root->fs_info->reloc_mutex); |
1790 | goto cleanup_transaction; | 1783 | goto scrub_continue; |
1791 | } | 1784 | } |
1792 | 1785 | ||
1793 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | 1786 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
1794 | if (ret) { | 1787 | if (ret) { |
1795 | mutex_unlock(&root->fs_info->reloc_mutex); | 1788 | mutex_unlock(&root->fs_info->reloc_mutex); |
1796 | goto cleanup_transaction; | 1789 | goto scrub_continue; |
1797 | } | 1790 | } |
1798 | 1791 | ||
1799 | /* | 1792 | /* |
@@ -1823,7 +1816,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1823 | if (ret) { | 1816 | if (ret) { |
1824 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1817 | mutex_unlock(&root->fs_info->tree_log_mutex); |
1825 | mutex_unlock(&root->fs_info->reloc_mutex); | 1818 | mutex_unlock(&root->fs_info->reloc_mutex); |
1826 | goto cleanup_transaction; | 1819 | goto scrub_continue; |
1827 | } | 1820 | } |
1828 | 1821 | ||
1829 | /* | 1822 | /* |
@@ -1844,7 +1837,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1844 | if (ret) { | 1837 | if (ret) { |
1845 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1838 | mutex_unlock(&root->fs_info->tree_log_mutex); |
1846 | mutex_unlock(&root->fs_info->reloc_mutex); | 1839 | mutex_unlock(&root->fs_info->reloc_mutex); |
1847 | goto cleanup_transaction; | 1840 | goto scrub_continue; |
1848 | } | 1841 | } |
1849 | 1842 | ||
1850 | /* | 1843 | /* |
@@ -1855,7 +1848,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1855 | ret = cur_trans->aborted; | 1848 | ret = cur_trans->aborted; |
1856 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1849 | mutex_unlock(&root->fs_info->tree_log_mutex); |
1857 | mutex_unlock(&root->fs_info->reloc_mutex); | 1850 | mutex_unlock(&root->fs_info->reloc_mutex); |
1858 | goto cleanup_transaction; | 1851 | goto scrub_continue; |
1859 | } | 1852 | } |
1860 | 1853 | ||
1861 | btrfs_prepare_extent_commit(trans, root); | 1854 | btrfs_prepare_extent_commit(trans, root); |
@@ -1891,13 +1884,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1891 | btrfs_error(root->fs_info, ret, | 1884 | btrfs_error(root->fs_info, ret, |
1892 | "Error while writing out transaction"); | 1885 | "Error while writing out transaction"); |
1893 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1886 | mutex_unlock(&root->fs_info->tree_log_mutex); |
1894 | goto cleanup_transaction; | 1887 | goto scrub_continue; |
1895 | } | 1888 | } |
1896 | 1889 | ||
1897 | ret = write_ctree_super(trans, root, 0); | 1890 | ret = write_ctree_super(trans, root, 0); |
1898 | if (ret) { | 1891 | if (ret) { |
1899 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1892 | mutex_unlock(&root->fs_info->tree_log_mutex); |
1900 | goto cleanup_transaction; | 1893 | goto scrub_continue; |
1901 | } | 1894 | } |
1902 | 1895 | ||
1903 | /* | 1896 | /* |
@@ -1940,6 +1933,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1940 | 1933 | ||
1941 | return ret; | 1934 | return ret; |
1942 | 1935 | ||
1936 | scrub_continue: | ||
1937 | btrfs_scrub_continue(root); | ||
1943 | cleanup_transaction: | 1938 | cleanup_transaction: |
1944 | btrfs_trans_release_metadata(trans, root); | 1939 | btrfs_trans_release_metadata(trans, root); |
1945 | trans->block_rsv = NULL; | 1940 | trans->block_rsv = NULL; |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 39d83da03e03..e2f45fc02610 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -136,13 +136,20 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, | |||
136 | * syncing the tree wait for us to finish | 136 | * syncing the tree wait for us to finish |
137 | */ | 137 | */ |
138 | static int start_log_trans(struct btrfs_trans_handle *trans, | 138 | static int start_log_trans(struct btrfs_trans_handle *trans, |
139 | struct btrfs_root *root) | 139 | struct btrfs_root *root, |
140 | struct btrfs_log_ctx *ctx) | ||
140 | { | 141 | { |
142 | int index; | ||
141 | int ret; | 143 | int ret; |
142 | int err = 0; | ||
143 | 144 | ||
144 | mutex_lock(&root->log_mutex); | 145 | mutex_lock(&root->log_mutex); |
145 | if (root->log_root) { | 146 | if (root->log_root) { |
147 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == | ||
148 | trans->transid) { | ||
149 | ret = -EAGAIN; | ||
150 | goto out; | ||
151 | } | ||
152 | |||
146 | if (!root->log_start_pid) { | 153 | if (!root->log_start_pid) { |
147 | root->log_start_pid = current->pid; | 154 | root->log_start_pid = current->pid; |
148 | root->log_multiple_pids = false; | 155 | root->log_multiple_pids = false; |
@@ -152,27 +159,40 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
152 | 159 | ||
153 | atomic_inc(&root->log_batch); | 160 | atomic_inc(&root->log_batch); |
154 | atomic_inc(&root->log_writers); | 161 | atomic_inc(&root->log_writers); |
162 | if (ctx) { | ||
163 | index = root->log_transid % 2; | ||
164 | list_add_tail(&ctx->list, &root->log_ctxs[index]); | ||
165 | ctx->log_transid = root->log_transid; | ||
166 | } | ||
155 | mutex_unlock(&root->log_mutex); | 167 | mutex_unlock(&root->log_mutex); |
156 | return 0; | 168 | return 0; |
157 | } | 169 | } |
158 | root->log_multiple_pids = false; | 170 | |
159 | root->log_start_pid = current->pid; | 171 | ret = 0; |
160 | mutex_lock(&root->fs_info->tree_log_mutex); | 172 | mutex_lock(&root->fs_info->tree_log_mutex); |
161 | if (!root->fs_info->log_root_tree) { | 173 | if (!root->fs_info->log_root_tree) |
162 | ret = btrfs_init_log_root_tree(trans, root->fs_info); | 174 | ret = btrfs_init_log_root_tree(trans, root->fs_info); |
163 | if (ret) | 175 | mutex_unlock(&root->fs_info->tree_log_mutex); |
164 | err = ret; | 176 | if (ret) |
165 | } | 177 | goto out; |
166 | if (err == 0 && !root->log_root) { | 178 | |
179 | if (!root->log_root) { | ||
167 | ret = btrfs_add_log_tree(trans, root); | 180 | ret = btrfs_add_log_tree(trans, root); |
168 | if (ret) | 181 | if (ret) |
169 | err = ret; | 182 | goto out; |
170 | } | 183 | } |
171 | mutex_unlock(&root->fs_info->tree_log_mutex); | 184 | root->log_multiple_pids = false; |
185 | root->log_start_pid = current->pid; | ||
172 | atomic_inc(&root->log_batch); | 186 | atomic_inc(&root->log_batch); |
173 | atomic_inc(&root->log_writers); | 187 | atomic_inc(&root->log_writers); |
188 | if (ctx) { | ||
189 | index = root->log_transid % 2; | ||
190 | list_add_tail(&ctx->list, &root->log_ctxs[index]); | ||
191 | ctx->log_transid = root->log_transid; | ||
192 | } | ||
193 | out: | ||
174 | mutex_unlock(&root->log_mutex); | 194 | mutex_unlock(&root->log_mutex); |
175 | return err; | 195 | return ret; |
176 | } | 196 | } |
177 | 197 | ||
178 | /* | 198 | /* |
@@ -2359,8 +2379,8 @@ static int update_log_root(struct btrfs_trans_handle *trans, | |||
2359 | return ret; | 2379 | return ret; |
2360 | } | 2380 | } |
2361 | 2381 | ||
2362 | static int wait_log_commit(struct btrfs_trans_handle *trans, | 2382 | static void wait_log_commit(struct btrfs_trans_handle *trans, |
2363 | struct btrfs_root *root, unsigned long transid) | 2383 | struct btrfs_root *root, int transid) |
2364 | { | 2384 | { |
2365 | DEFINE_WAIT(wait); | 2385 | DEFINE_WAIT(wait); |
2366 | int index = transid % 2; | 2386 | int index = transid % 2; |
@@ -2375,36 +2395,63 @@ static int wait_log_commit(struct btrfs_trans_handle *trans, | |||
2375 | &wait, TASK_UNINTERRUPTIBLE); | 2395 | &wait, TASK_UNINTERRUPTIBLE); |
2376 | mutex_unlock(&root->log_mutex); | 2396 | mutex_unlock(&root->log_mutex); |
2377 | 2397 | ||
2378 | if (root->fs_info->last_trans_log_full_commit != | 2398 | if (root->log_transid_committed < transid && |
2379 | trans->transid && root->log_transid < transid + 2 && | ||
2380 | atomic_read(&root->log_commit[index])) | 2399 | atomic_read(&root->log_commit[index])) |
2381 | schedule(); | 2400 | schedule(); |
2382 | 2401 | ||
2383 | finish_wait(&root->log_commit_wait[index], &wait); | 2402 | finish_wait(&root->log_commit_wait[index], &wait); |
2384 | mutex_lock(&root->log_mutex); | 2403 | mutex_lock(&root->log_mutex); |
2385 | } while (root->fs_info->last_trans_log_full_commit != | 2404 | } while (root->log_transid_committed < transid && |
2386 | trans->transid && root->log_transid < transid + 2 && | ||
2387 | atomic_read(&root->log_commit[index])); | 2405 | atomic_read(&root->log_commit[index])); |
2388 | return 0; | ||
2389 | } | 2406 | } |
2390 | 2407 | ||
2391 | static void wait_for_writer(struct btrfs_trans_handle *trans, | 2408 | static void wait_for_writer(struct btrfs_trans_handle *trans, |
2392 | struct btrfs_root *root) | 2409 | struct btrfs_root *root) |
2393 | { | 2410 | { |
2394 | DEFINE_WAIT(wait); | 2411 | DEFINE_WAIT(wait); |
2395 | while (root->fs_info->last_trans_log_full_commit != | 2412 | |
2396 | trans->transid && atomic_read(&root->log_writers)) { | 2413 | while (atomic_read(&root->log_writers)) { |
2397 | prepare_to_wait(&root->log_writer_wait, | 2414 | prepare_to_wait(&root->log_writer_wait, |
2398 | &wait, TASK_UNINTERRUPTIBLE); | 2415 | &wait, TASK_UNINTERRUPTIBLE); |
2399 | mutex_unlock(&root->log_mutex); | 2416 | mutex_unlock(&root->log_mutex); |
2400 | if (root->fs_info->last_trans_log_full_commit != | 2417 | if (atomic_read(&root->log_writers)) |
2401 | trans->transid && atomic_read(&root->log_writers)) | ||
2402 | schedule(); | 2418 | schedule(); |
2403 | mutex_lock(&root->log_mutex); | 2419 | mutex_lock(&root->log_mutex); |
2404 | finish_wait(&root->log_writer_wait, &wait); | 2420 | finish_wait(&root->log_writer_wait, &wait); |
2405 | } | 2421 | } |
2406 | } | 2422 | } |
2407 | 2423 | ||
2424 | static inline void btrfs_remove_log_ctx(struct btrfs_root *root, | ||
2425 | struct btrfs_log_ctx *ctx) | ||
2426 | { | ||
2427 | if (!ctx) | ||
2428 | return; | ||
2429 | |||
2430 | mutex_lock(&root->log_mutex); | ||
2431 | list_del_init(&ctx->list); | ||
2432 | mutex_unlock(&root->log_mutex); | ||
2433 | } | ||
2434 | |||
2435 | /* | ||
2436 | * Invoked in log mutex context, or be sure there is no other task which | ||
2437 | * can access the list. | ||
2438 | */ | ||
2439 | static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root, | ||
2440 | int index, int error) | ||
2441 | { | ||
2442 | struct btrfs_log_ctx *ctx; | ||
2443 | |||
2444 | if (!error) { | ||
2445 | INIT_LIST_HEAD(&root->log_ctxs[index]); | ||
2446 | return; | ||
2447 | } | ||
2448 | |||
2449 | list_for_each_entry(ctx, &root->log_ctxs[index], list) | ||
2450 | ctx->log_ret = error; | ||
2451 | |||
2452 | INIT_LIST_HEAD(&root->log_ctxs[index]); | ||
2453 | } | ||
2454 | |||
2408 | /* | 2455 | /* |
2409 | * btrfs_sync_log does sends a given tree log down to the disk and | 2456 | * btrfs_sync_log does sends a given tree log down to the disk and |
2410 | * updates the super blocks to record it. When this call is done, | 2457 | * updates the super blocks to record it. When this call is done, |
@@ -2418,7 +2465,7 @@ static void wait_for_writer(struct btrfs_trans_handle *trans, | |||
2418 | * that has happened. | 2465 | * that has happened. |
2419 | */ | 2466 | */ |
2420 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 2467 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
2421 | struct btrfs_root *root) | 2468 | struct btrfs_root *root, struct btrfs_log_ctx *ctx) |
2422 | { | 2469 | { |
2423 | int index1; | 2470 | int index1; |
2424 | int index2; | 2471 | int index2; |
@@ -2426,22 +2473,30 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2426 | int ret; | 2473 | int ret; |
2427 | struct btrfs_root *log = root->log_root; | 2474 | struct btrfs_root *log = root->log_root; |
2428 | struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; | 2475 | struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; |
2429 | unsigned long log_transid = 0; | 2476 | int log_transid = 0; |
2477 | struct btrfs_log_ctx root_log_ctx; | ||
2430 | struct blk_plug plug; | 2478 | struct blk_plug plug; |
2431 | 2479 | ||
2432 | mutex_lock(&root->log_mutex); | 2480 | mutex_lock(&root->log_mutex); |
2433 | log_transid = root->log_transid; | 2481 | log_transid = ctx->log_transid; |
2434 | index1 = root->log_transid % 2; | 2482 | if (root->log_transid_committed >= log_transid) { |
2483 | mutex_unlock(&root->log_mutex); | ||
2484 | return ctx->log_ret; | ||
2485 | } | ||
2486 | |||
2487 | index1 = log_transid % 2; | ||
2435 | if (atomic_read(&root->log_commit[index1])) { | 2488 | if (atomic_read(&root->log_commit[index1])) { |
2436 | wait_log_commit(trans, root, root->log_transid); | 2489 | wait_log_commit(trans, root, log_transid); |
2437 | mutex_unlock(&root->log_mutex); | 2490 | mutex_unlock(&root->log_mutex); |
2438 | return 0; | 2491 | return ctx->log_ret; |
2439 | } | 2492 | } |
2493 | ASSERT(log_transid == root->log_transid); | ||
2440 | atomic_set(&root->log_commit[index1], 1); | 2494 | atomic_set(&root->log_commit[index1], 1); |
2441 | 2495 | ||
2442 | /* wait for previous tree log sync to complete */ | 2496 | /* wait for previous tree log sync to complete */ |
2443 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) | 2497 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) |
2444 | wait_log_commit(trans, root, root->log_transid - 1); | 2498 | wait_log_commit(trans, root, log_transid - 1); |
2499 | |||
2445 | while (1) { | 2500 | while (1) { |
2446 | int batch = atomic_read(&root->log_batch); | 2501 | int batch = atomic_read(&root->log_batch); |
2447 | /* when we're on an ssd, just kick the log commit out */ | 2502 | /* when we're on an ssd, just kick the log commit out */ |
@@ -2456,7 +2511,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2456 | } | 2511 | } |
2457 | 2512 | ||
2458 | /* bail out if we need to do a full commit */ | 2513 | /* bail out if we need to do a full commit */ |
2459 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | 2514 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == |
2515 | trans->transid) { | ||
2460 | ret = -EAGAIN; | 2516 | ret = -EAGAIN; |
2461 | btrfs_free_logged_extents(log, log_transid); | 2517 | btrfs_free_logged_extents(log, log_transid); |
2462 | mutex_unlock(&root->log_mutex); | 2518 | mutex_unlock(&root->log_mutex); |
@@ -2477,6 +2533,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2477 | blk_finish_plug(&plug); | 2533 | blk_finish_plug(&plug); |
2478 | btrfs_abort_transaction(trans, root, ret); | 2534 | btrfs_abort_transaction(trans, root, ret); |
2479 | btrfs_free_logged_extents(log, log_transid); | 2535 | btrfs_free_logged_extents(log, log_transid); |
2536 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | ||
2537 | trans->transid; | ||
2480 | mutex_unlock(&root->log_mutex); | 2538 | mutex_unlock(&root->log_mutex); |
2481 | goto out; | 2539 | goto out; |
2482 | } | 2540 | } |
@@ -2486,7 +2544,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2486 | root->log_transid++; | 2544 | root->log_transid++; |
2487 | log->log_transid = root->log_transid; | 2545 | log->log_transid = root->log_transid; |
2488 | root->log_start_pid = 0; | 2546 | root->log_start_pid = 0; |
2489 | smp_mb(); | ||
2490 | /* | 2547 | /* |
2491 | * IO has been started, blocks of the log tree have WRITTEN flag set | 2548 | * IO has been started, blocks of the log tree have WRITTEN flag set |
2492 | * in their headers. new modifications of the log will be written to | 2549 | * in their headers. new modifications of the log will be written to |
@@ -2494,9 +2551,16 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2494 | */ | 2551 | */ |
2495 | mutex_unlock(&root->log_mutex); | 2552 | mutex_unlock(&root->log_mutex); |
2496 | 2553 | ||
2554 | btrfs_init_log_ctx(&root_log_ctx); | ||
2555 | |||
2497 | mutex_lock(&log_root_tree->log_mutex); | 2556 | mutex_lock(&log_root_tree->log_mutex); |
2498 | atomic_inc(&log_root_tree->log_batch); | 2557 | atomic_inc(&log_root_tree->log_batch); |
2499 | atomic_inc(&log_root_tree->log_writers); | 2558 | atomic_inc(&log_root_tree->log_writers); |
2559 | |||
2560 | index2 = log_root_tree->log_transid % 2; | ||
2561 | list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]); | ||
2562 | root_log_ctx.log_transid = log_root_tree->log_transid; | ||
2563 | |||
2500 | mutex_unlock(&log_root_tree->log_mutex); | 2564 | mutex_unlock(&log_root_tree->log_mutex); |
2501 | 2565 | ||
2502 | ret = update_log_root(trans, log); | 2566 | ret = update_log_root(trans, log); |
@@ -2509,13 +2573,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2509 | } | 2573 | } |
2510 | 2574 | ||
2511 | if (ret) { | 2575 | if (ret) { |
2576 | if (!list_empty(&root_log_ctx.list)) | ||
2577 | list_del_init(&root_log_ctx.list); | ||
2578 | |||
2512 | blk_finish_plug(&plug); | 2579 | blk_finish_plug(&plug); |
2580 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | ||
2581 | trans->transid; | ||
2513 | if (ret != -ENOSPC) { | 2582 | if (ret != -ENOSPC) { |
2514 | btrfs_abort_transaction(trans, root, ret); | 2583 | btrfs_abort_transaction(trans, root, ret); |
2515 | mutex_unlock(&log_root_tree->log_mutex); | 2584 | mutex_unlock(&log_root_tree->log_mutex); |
2516 | goto out; | 2585 | goto out; |
2517 | } | 2586 | } |
2518 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
2519 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2587 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2520 | btrfs_free_logged_extents(log, log_transid); | 2588 | btrfs_free_logged_extents(log, log_transid); |
2521 | mutex_unlock(&log_root_tree->log_mutex); | 2589 | mutex_unlock(&log_root_tree->log_mutex); |
@@ -2523,22 +2591,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2523 | goto out; | 2591 | goto out; |
2524 | } | 2592 | } |
2525 | 2593 | ||
2526 | index2 = log_root_tree->log_transid % 2; | 2594 | if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) { |
2595 | mutex_unlock(&log_root_tree->log_mutex); | ||
2596 | ret = root_log_ctx.log_ret; | ||
2597 | goto out; | ||
2598 | } | ||
2599 | |||
2600 | index2 = root_log_ctx.log_transid % 2; | ||
2527 | if (atomic_read(&log_root_tree->log_commit[index2])) { | 2601 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
2528 | blk_finish_plug(&plug); | 2602 | blk_finish_plug(&plug); |
2529 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2603 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2530 | wait_log_commit(trans, log_root_tree, | 2604 | wait_log_commit(trans, log_root_tree, |
2531 | log_root_tree->log_transid); | 2605 | root_log_ctx.log_transid); |
2532 | btrfs_free_logged_extents(log, log_transid); | 2606 | btrfs_free_logged_extents(log, log_transid); |
2533 | mutex_unlock(&log_root_tree->log_mutex); | 2607 | mutex_unlock(&log_root_tree->log_mutex); |
2534 | ret = 0; | 2608 | ret = root_log_ctx.log_ret; |
2535 | goto out; | 2609 | goto out; |
2536 | } | 2610 | } |
2611 | ASSERT(root_log_ctx.log_transid == log_root_tree->log_transid); | ||
2537 | atomic_set(&log_root_tree->log_commit[index2], 1); | 2612 | atomic_set(&log_root_tree->log_commit[index2], 1); |
2538 | 2613 | ||
2539 | if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) { | 2614 | if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) { |
2540 | wait_log_commit(trans, log_root_tree, | 2615 | wait_log_commit(trans, log_root_tree, |
2541 | log_root_tree->log_transid - 1); | 2616 | root_log_ctx.log_transid - 1); |
2542 | } | 2617 | } |
2543 | 2618 | ||
2544 | wait_for_writer(trans, log_root_tree); | 2619 | wait_for_writer(trans, log_root_tree); |
@@ -2547,7 +2622,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2547 | * now that we've moved on to the tree of log tree roots, | 2622 | * now that we've moved on to the tree of log tree roots, |
2548 | * check the full commit flag again | 2623 | * check the full commit flag again |
2549 | */ | 2624 | */ |
2550 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | 2625 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == |
2626 | trans->transid) { | ||
2551 | blk_finish_plug(&plug); | 2627 | blk_finish_plug(&plug); |
2552 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2628 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2553 | btrfs_free_logged_extents(log, log_transid); | 2629 | btrfs_free_logged_extents(log, log_transid); |
@@ -2561,6 +2637,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2561 | EXTENT_DIRTY | EXTENT_NEW); | 2637 | EXTENT_DIRTY | EXTENT_NEW); |
2562 | blk_finish_plug(&plug); | 2638 | blk_finish_plug(&plug); |
2563 | if (ret) { | 2639 | if (ret) { |
2640 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | ||
2641 | trans->transid; | ||
2564 | btrfs_abort_transaction(trans, root, ret); | 2642 | btrfs_abort_transaction(trans, root, ret); |
2565 | btrfs_free_logged_extents(log, log_transid); | 2643 | btrfs_free_logged_extents(log, log_transid); |
2566 | mutex_unlock(&log_root_tree->log_mutex); | 2644 | mutex_unlock(&log_root_tree->log_mutex); |
@@ -2578,8 +2656,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2578 | btrfs_header_level(log_root_tree->node)); | 2656 | btrfs_header_level(log_root_tree->node)); |
2579 | 2657 | ||
2580 | log_root_tree->log_transid++; | 2658 | log_root_tree->log_transid++; |
2581 | smp_mb(); | ||
2582 | |||
2583 | mutex_unlock(&log_root_tree->log_mutex); | 2659 | mutex_unlock(&log_root_tree->log_mutex); |
2584 | 2660 | ||
2585 | /* | 2661 | /* |
@@ -2591,6 +2667,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2591 | */ | 2667 | */ |
2592 | ret = write_ctree_super(trans, root->fs_info->tree_root, 1); | 2668 | ret = write_ctree_super(trans, root->fs_info->tree_root, 1); |
2593 | if (ret) { | 2669 | if (ret) { |
2670 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | ||
2671 | trans->transid; | ||
2594 | btrfs_abort_transaction(trans, root, ret); | 2672 | btrfs_abort_transaction(trans, root, ret); |
2595 | goto out_wake_log_root; | 2673 | goto out_wake_log_root; |
2596 | } | 2674 | } |
@@ -2601,13 +2679,28 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2601 | mutex_unlock(&root->log_mutex); | 2679 | mutex_unlock(&root->log_mutex); |
2602 | 2680 | ||
2603 | out_wake_log_root: | 2681 | out_wake_log_root: |
2682 | /* | ||
2683 | * We needn't get log_mutex here because we are sure all | ||
2684 | * the other tasks are blocked. | ||
2685 | */ | ||
2686 | btrfs_remove_all_log_ctxs(log_root_tree, index2, ret); | ||
2687 | |||
2688 | mutex_lock(&log_root_tree->log_mutex); | ||
2689 | log_root_tree->log_transid_committed++; | ||
2604 | atomic_set(&log_root_tree->log_commit[index2], 0); | 2690 | atomic_set(&log_root_tree->log_commit[index2], 0); |
2605 | smp_mb(); | 2691 | mutex_unlock(&log_root_tree->log_mutex); |
2692 | |||
2606 | if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) | 2693 | if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) |
2607 | wake_up(&log_root_tree->log_commit_wait[index2]); | 2694 | wake_up(&log_root_tree->log_commit_wait[index2]); |
2608 | out: | 2695 | out: |
2696 | /* See above. */ | ||
2697 | btrfs_remove_all_log_ctxs(root, index1, ret); | ||
2698 | |||
2699 | mutex_lock(&root->log_mutex); | ||
2700 | root->log_transid_committed++; | ||
2609 | atomic_set(&root->log_commit[index1], 0); | 2701 | atomic_set(&root->log_commit[index1], 0); |
2610 | smp_mb(); | 2702 | mutex_unlock(&root->log_mutex); |
2703 | |||
2611 | if (waitqueue_active(&root->log_commit_wait[index1])) | 2704 | if (waitqueue_active(&root->log_commit_wait[index1])) |
2612 | wake_up(&root->log_commit_wait[index1]); | 2705 | wake_up(&root->log_commit_wait[index1]); |
2613 | return ret; | 2706 | return ret; |
@@ -3479,7 +3572,8 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) | |||
3479 | 3572 | ||
3480 | static int log_one_extent(struct btrfs_trans_handle *trans, | 3573 | static int log_one_extent(struct btrfs_trans_handle *trans, |
3481 | struct inode *inode, struct btrfs_root *root, | 3574 | struct inode *inode, struct btrfs_root *root, |
3482 | struct extent_map *em, struct btrfs_path *path) | 3575 | struct extent_map *em, struct btrfs_path *path, |
3576 | struct list_head *logged_list) | ||
3483 | { | 3577 | { |
3484 | struct btrfs_root *log = root->log_root; | 3578 | struct btrfs_root *log = root->log_root; |
3485 | struct btrfs_file_extent_item *fi; | 3579 | struct btrfs_file_extent_item *fi; |
@@ -3495,7 +3589,6 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
3495 | u64 extent_offset = em->start - em->orig_start; | 3589 | u64 extent_offset = em->start - em->orig_start; |
3496 | u64 block_len; | 3590 | u64 block_len; |
3497 | int ret; | 3591 | int ret; |
3498 | int index = log->log_transid % 2; | ||
3499 | bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 3592 | bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
3500 | int extent_inserted = 0; | 3593 | int extent_inserted = 0; |
3501 | 3594 | ||
@@ -3579,17 +3672,12 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
3579 | * First check and see if our csums are on our outstanding ordered | 3672 | * First check and see if our csums are on our outstanding ordered |
3580 | * extents. | 3673 | * extents. |
3581 | */ | 3674 | */ |
3582 | again: | 3675 | list_for_each_entry(ordered, logged_list, log_list) { |
3583 | spin_lock_irq(&log->log_extents_lock[index]); | ||
3584 | list_for_each_entry(ordered, &log->logged_list[index], log_list) { | ||
3585 | struct btrfs_ordered_sum *sum; | 3676 | struct btrfs_ordered_sum *sum; |
3586 | 3677 | ||
3587 | if (!mod_len) | 3678 | if (!mod_len) |
3588 | break; | 3679 | break; |
3589 | 3680 | ||
3590 | if (ordered->inode != inode) | ||
3591 | continue; | ||
3592 | |||
3593 | if (ordered->file_offset + ordered->len <= mod_start || | 3681 | if (ordered->file_offset + ordered->len <= mod_start || |
3594 | mod_start + mod_len <= ordered->file_offset) | 3682 | mod_start + mod_len <= ordered->file_offset) |
3595 | continue; | 3683 | continue; |
@@ -3632,12 +3720,6 @@ again: | |||
3632 | if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, | 3720 | if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, |
3633 | &ordered->flags)) | 3721 | &ordered->flags)) |
3634 | continue; | 3722 | continue; |
3635 | atomic_inc(&ordered->refs); | ||
3636 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
3637 | /* | ||
3638 | * we've dropped the lock, we must either break or | ||
3639 | * start over after this. | ||
3640 | */ | ||
3641 | 3723 | ||
3642 | if (ordered->csum_bytes_left) { | 3724 | if (ordered->csum_bytes_left) { |
3643 | btrfs_start_ordered_extent(inode, ordered, 0); | 3725 | btrfs_start_ordered_extent(inode, ordered, 0); |
@@ -3647,16 +3729,11 @@ again: | |||
3647 | 3729 | ||
3648 | list_for_each_entry(sum, &ordered->list, list) { | 3730 | list_for_each_entry(sum, &ordered->list, list) { |
3649 | ret = btrfs_csum_file_blocks(trans, log, sum); | 3731 | ret = btrfs_csum_file_blocks(trans, log, sum); |
3650 | if (ret) { | 3732 | if (ret) |
3651 | btrfs_put_ordered_extent(ordered); | ||
3652 | goto unlocked; | 3733 | goto unlocked; |
3653 | } | ||
3654 | } | 3734 | } |
3655 | btrfs_put_ordered_extent(ordered); | ||
3656 | goto again; | ||
3657 | 3735 | ||
3658 | } | 3736 | } |
3659 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
3660 | unlocked: | 3737 | unlocked: |
3661 | 3738 | ||
3662 | if (!mod_len || ret) | 3739 | if (!mod_len || ret) |
@@ -3694,7 +3771,8 @@ unlocked: | |||
3694 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | 3771 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, |
3695 | struct btrfs_root *root, | 3772 | struct btrfs_root *root, |
3696 | struct inode *inode, | 3773 | struct inode *inode, |
3697 | struct btrfs_path *path) | 3774 | struct btrfs_path *path, |
3775 | struct list_head *logged_list) | ||
3698 | { | 3776 | { |
3699 | struct extent_map *em, *n; | 3777 | struct extent_map *em, *n; |
3700 | struct list_head extents; | 3778 | struct list_head extents; |
@@ -3752,7 +3830,7 @@ process: | |||
3752 | 3830 | ||
3753 | write_unlock(&tree->lock); | 3831 | write_unlock(&tree->lock); |
3754 | 3832 | ||
3755 | ret = log_one_extent(trans, inode, root, em, path); | 3833 | ret = log_one_extent(trans, inode, root, em, path, logged_list); |
3756 | write_lock(&tree->lock); | 3834 | write_lock(&tree->lock); |
3757 | clear_em_logging(tree, em); | 3835 | clear_em_logging(tree, em); |
3758 | free_extent_map(em); | 3836 | free_extent_map(em); |
@@ -3788,6 +3866,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
3788 | struct btrfs_key max_key; | 3866 | struct btrfs_key max_key; |
3789 | struct btrfs_root *log = root->log_root; | 3867 | struct btrfs_root *log = root->log_root; |
3790 | struct extent_buffer *src = NULL; | 3868 | struct extent_buffer *src = NULL; |
3869 | LIST_HEAD(logged_list); | ||
3791 | u64 last_extent = 0; | 3870 | u64 last_extent = 0; |
3792 | int err = 0; | 3871 | int err = 0; |
3793 | int ret; | 3872 | int ret; |
@@ -3836,7 +3915,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
3836 | 3915 | ||
3837 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 3916 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
3838 | 3917 | ||
3839 | btrfs_get_logged_extents(log, inode); | 3918 | btrfs_get_logged_extents(inode, &logged_list); |
3840 | 3919 | ||
3841 | /* | 3920 | /* |
3842 | * a brute force approach to making sure we get the most uptodate | 3921 | * a brute force approach to making sure we get the most uptodate |
@@ -3962,7 +4041,8 @@ log_extents: | |||
3962 | btrfs_release_path(path); | 4041 | btrfs_release_path(path); |
3963 | btrfs_release_path(dst_path); | 4042 | btrfs_release_path(dst_path); |
3964 | if (fast_search) { | 4043 | if (fast_search) { |
3965 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path); | 4044 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path, |
4045 | &logged_list); | ||
3966 | if (ret) { | 4046 | if (ret) { |
3967 | err = ret; | 4047 | err = ret; |
3968 | goto out_unlock; | 4048 | goto out_unlock; |
@@ -3987,8 +4067,10 @@ log_extents: | |||
3987 | BTRFS_I(inode)->logged_trans = trans->transid; | 4067 | BTRFS_I(inode)->logged_trans = trans->transid; |
3988 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; | 4068 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; |
3989 | out_unlock: | 4069 | out_unlock: |
3990 | if (err) | 4070 | if (unlikely(err)) |
3991 | btrfs_free_logged_extents(log, log->log_transid); | 4071 | btrfs_put_logged_extents(&logged_list); |
4072 | else | ||
4073 | btrfs_submit_logged_extents(&logged_list, log); | ||
3992 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 4074 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
3993 | 4075 | ||
3994 | btrfs_free_path(path); | 4076 | btrfs_free_path(path); |
@@ -4079,7 +4161,8 @@ out: | |||
4079 | */ | 4161 | */ |
4080 | static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | 4162 | static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, |
4081 | struct btrfs_root *root, struct inode *inode, | 4163 | struct btrfs_root *root, struct inode *inode, |
4082 | struct dentry *parent, int exists_only) | 4164 | struct dentry *parent, int exists_only, |
4165 | struct btrfs_log_ctx *ctx) | ||
4083 | { | 4166 | { |
4084 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; | 4167 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; |
4085 | struct super_block *sb; | 4168 | struct super_block *sb; |
@@ -4116,9 +4199,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4116 | goto end_no_trans; | 4199 | goto end_no_trans; |
4117 | } | 4200 | } |
4118 | 4201 | ||
4119 | ret = start_log_trans(trans, root); | 4202 | ret = start_log_trans(trans, root, ctx); |
4120 | if (ret) | 4203 | if (ret) |
4121 | goto end_trans; | 4204 | goto end_no_trans; |
4122 | 4205 | ||
4123 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 4206 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
4124 | if (ret) | 4207 | if (ret) |
@@ -4166,6 +4249,9 @@ end_trans: | |||
4166 | root->fs_info->last_trans_log_full_commit = trans->transid; | 4249 | root->fs_info->last_trans_log_full_commit = trans->transid; |
4167 | ret = 1; | 4250 | ret = 1; |
4168 | } | 4251 | } |
4252 | |||
4253 | if (ret) | ||
4254 | btrfs_remove_log_ctx(root, ctx); | ||
4169 | btrfs_end_log_trans(root); | 4255 | btrfs_end_log_trans(root); |
4170 | end_no_trans: | 4256 | end_no_trans: |
4171 | return ret; | 4257 | return ret; |
@@ -4178,12 +4264,14 @@ end_no_trans: | |||
4178 | * data on disk. | 4264 | * data on disk. |
4179 | */ | 4265 | */ |
4180 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 4266 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
4181 | struct btrfs_root *root, struct dentry *dentry) | 4267 | struct btrfs_root *root, struct dentry *dentry, |
4268 | struct btrfs_log_ctx *ctx) | ||
4182 | { | 4269 | { |
4183 | struct dentry *parent = dget_parent(dentry); | 4270 | struct dentry *parent = dget_parent(dentry); |
4184 | int ret; | 4271 | int ret; |
4185 | 4272 | ||
4186 | ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0); | 4273 | ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, |
4274 | 0, ctx); | ||
4187 | dput(parent); | 4275 | dput(parent); |
4188 | 4276 | ||
4189 | return ret; | 4277 | return ret; |
@@ -4420,6 +4508,6 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans, | |||
4420 | root->fs_info->last_trans_committed)) | 4508 | root->fs_info->last_trans_committed)) |
4421 | return 0; | 4509 | return 0; |
4422 | 4510 | ||
4423 | return btrfs_log_inode_parent(trans, root, inode, parent, 1); | 4511 | return btrfs_log_inode_parent(trans, root, inode, parent, 1, NULL); |
4424 | } | 4512 | } |
4425 | 4513 | ||
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 1d4ae0d15a70..91b145fce333 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
@@ -22,14 +22,28 @@ | |||
22 | /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */ | 22 | /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */ |
23 | #define BTRFS_NO_LOG_SYNC 256 | 23 | #define BTRFS_NO_LOG_SYNC 256 |
24 | 24 | ||
25 | struct btrfs_log_ctx { | ||
26 | int log_ret; | ||
27 | int log_transid; | ||
28 | struct list_head list; | ||
29 | }; | ||
30 | |||
31 | static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) | ||
32 | { | ||
33 | ctx->log_ret = 0; | ||
34 | ctx->log_transid = 0; | ||
35 | INIT_LIST_HEAD(&ctx->list); | ||
36 | } | ||
37 | |||
25 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 38 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
26 | struct btrfs_root *root); | 39 | struct btrfs_root *root, struct btrfs_log_ctx *ctx); |
27 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); | 40 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); |
28 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | 41 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, |
29 | struct btrfs_fs_info *fs_info); | 42 | struct btrfs_fs_info *fs_info); |
30 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); | 43 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); |
31 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 44 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
32 | struct btrfs_root *root, struct dentry *dentry); | 45 | struct btrfs_root *root, struct dentry *dentry, |
46 | struct btrfs_log_ctx *ctx); | ||
33 | int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | 47 | int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, |
34 | struct btrfs_root *root, | 48 | struct btrfs_root *root, |
35 | const char *name, int name_len, | 49 | const char *name, int name_len, |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bab0b84d8f80..d241130a32fd 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -415,7 +415,8 @@ loop_lock: | |||
415 | device->running_pending = 1; | 415 | device->running_pending = 1; |
416 | 416 | ||
417 | spin_unlock(&device->io_lock); | 417 | spin_unlock(&device->io_lock); |
418 | btrfs_requeue_work(&device->work); | 418 | btrfs_queue_work(fs_info->submit_workers, |
419 | &device->work); | ||
419 | goto done; | 420 | goto done; |
420 | } | 421 | } |
421 | /* unplug every 64 requests just for good measure */ | 422 | /* unplug every 64 requests just for good measure */ |
@@ -5263,6 +5264,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
5263 | static void btrfs_end_bio(struct bio *bio, int err) | 5264 | static void btrfs_end_bio(struct bio *bio, int err) |
5264 | { | 5265 | { |
5265 | struct btrfs_bio *bbio = bio->bi_private; | 5266 | struct btrfs_bio *bbio = bio->bi_private; |
5267 | struct btrfs_device *dev = bbio->stripes[0].dev; | ||
5266 | int is_orig_bio = 0; | 5268 | int is_orig_bio = 0; |
5267 | 5269 | ||
5268 | if (err) { | 5270 | if (err) { |
@@ -5270,7 +5272,6 @@ static void btrfs_end_bio(struct bio *bio, int err) | |||
5270 | if (err == -EIO || err == -EREMOTEIO) { | 5272 | if (err == -EIO || err == -EREMOTEIO) { |
5271 | unsigned int stripe_index = | 5273 | unsigned int stripe_index = |
5272 | btrfs_io_bio(bio)->stripe_index; | 5274 | btrfs_io_bio(bio)->stripe_index; |
5273 | struct btrfs_device *dev; | ||
5274 | 5275 | ||
5275 | BUG_ON(stripe_index >= bbio->num_stripes); | 5276 | BUG_ON(stripe_index >= bbio->num_stripes); |
5276 | dev = bbio->stripes[stripe_index].dev; | 5277 | dev = bbio->stripes[stripe_index].dev; |
@@ -5292,6 +5293,8 @@ static void btrfs_end_bio(struct bio *bio, int err) | |||
5292 | if (bio == bbio->orig_bio) | 5293 | if (bio == bbio->orig_bio) |
5293 | is_orig_bio = 1; | 5294 | is_orig_bio = 1; |
5294 | 5295 | ||
5296 | btrfs_bio_counter_dec(bbio->fs_info); | ||
5297 | |||
5295 | if (atomic_dec_and_test(&bbio->stripes_pending)) { | 5298 | if (atomic_dec_and_test(&bbio->stripes_pending)) { |
5296 | if (!is_orig_bio) { | 5299 | if (!is_orig_bio) { |
5297 | bio_put(bio); | 5300 | bio_put(bio); |
@@ -5328,13 +5331,6 @@ static void btrfs_end_bio(struct bio *bio, int err) | |||
5328 | } | 5331 | } |
5329 | } | 5332 | } |
5330 | 5333 | ||
5331 | struct async_sched { | ||
5332 | struct bio *bio; | ||
5333 | int rw; | ||
5334 | struct btrfs_fs_info *info; | ||
5335 | struct btrfs_work work; | ||
5336 | }; | ||
5337 | |||
5338 | /* | 5334 | /* |
5339 | * see run_scheduled_bios for a description of why bios are collected for | 5335 | * see run_scheduled_bios for a description of why bios are collected for |
5340 | * async submit. | 5336 | * async submit. |
@@ -5391,8 +5387,8 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root, | |||
5391 | spin_unlock(&device->io_lock); | 5387 | spin_unlock(&device->io_lock); |
5392 | 5388 | ||
5393 | if (should_queue) | 5389 | if (should_queue) |
5394 | btrfs_queue_worker(&root->fs_info->submit_workers, | 5390 | btrfs_queue_work(root->fs_info->submit_workers, |
5395 | &device->work); | 5391 | &device->work); |
5396 | } | 5392 | } |
5397 | 5393 | ||
5398 | static int bio_size_ok(struct block_device *bdev, struct bio *bio, | 5394 | static int bio_size_ok(struct block_device *bdev, struct bio *bio, |
@@ -5447,6 +5443,9 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, | |||
5447 | } | 5443 | } |
5448 | #endif | 5444 | #endif |
5449 | bio->bi_bdev = dev->bdev; | 5445 | bio->bi_bdev = dev->bdev; |
5446 | |||
5447 | btrfs_bio_counter_inc_noblocked(root->fs_info); | ||
5448 | |||
5450 | if (async) | 5449 | if (async) |
5451 | btrfs_schedule_bio(root, dev, rw, bio); | 5450 | btrfs_schedule_bio(root, dev, rw, bio); |
5452 | else | 5451 | else |
@@ -5515,28 +5514,38 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
5515 | length = bio->bi_iter.bi_size; | 5514 | length = bio->bi_iter.bi_size; |
5516 | map_length = length; | 5515 | map_length = length; |
5517 | 5516 | ||
5517 | btrfs_bio_counter_inc_blocked(root->fs_info); | ||
5518 | ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, | 5518 | ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, |
5519 | mirror_num, &raid_map); | 5519 | mirror_num, &raid_map); |
5520 | if (ret) /* -ENOMEM */ | 5520 | if (ret) { |
5521 | btrfs_bio_counter_dec(root->fs_info); | ||
5521 | return ret; | 5522 | return ret; |
5523 | } | ||
5522 | 5524 | ||
5523 | total_devs = bbio->num_stripes; | 5525 | total_devs = bbio->num_stripes; |
5524 | bbio->orig_bio = first_bio; | 5526 | bbio->orig_bio = first_bio; |
5525 | bbio->private = first_bio->bi_private; | 5527 | bbio->private = first_bio->bi_private; |
5526 | bbio->end_io = first_bio->bi_end_io; | 5528 | bbio->end_io = first_bio->bi_end_io; |
5529 | bbio->fs_info = root->fs_info; | ||
5527 | atomic_set(&bbio->stripes_pending, bbio->num_stripes); | 5530 | atomic_set(&bbio->stripes_pending, bbio->num_stripes); |
5528 | 5531 | ||
5529 | if (raid_map) { | 5532 | if (raid_map) { |
5530 | /* In this case, map_length has been set to the length of | 5533 | /* In this case, map_length has been set to the length of |
5531 | a single stripe; not the whole write */ | 5534 | a single stripe; not the whole write */ |
5532 | if (rw & WRITE) { | 5535 | if (rw & WRITE) { |
5533 | return raid56_parity_write(root, bio, bbio, | 5536 | ret = raid56_parity_write(root, bio, bbio, |
5534 | raid_map, map_length); | 5537 | raid_map, map_length); |
5535 | } else { | 5538 | } else { |
5536 | return raid56_parity_recover(root, bio, bbio, | 5539 | ret = raid56_parity_recover(root, bio, bbio, |
5537 | raid_map, map_length, | 5540 | raid_map, map_length, |
5538 | mirror_num); | 5541 | mirror_num); |
5539 | } | 5542 | } |
5543 | /* | ||
5544 | * FIXME, replace dosen't support raid56 yet, please fix | ||
5545 | * it in the future. | ||
5546 | */ | ||
5547 | btrfs_bio_counter_dec(root->fs_info); | ||
5548 | return ret; | ||
5540 | } | 5549 | } |
5541 | 5550 | ||
5542 | if (map_length < length) { | 5551 | if (map_length < length) { |
@@ -5578,6 +5587,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
5578 | async_submit); | 5587 | async_submit); |
5579 | dev_nr++; | 5588 | dev_nr++; |
5580 | } | 5589 | } |
5590 | btrfs_bio_counter_dec(root->fs_info); | ||
5581 | return 0; | 5591 | return 0; |
5582 | } | 5592 | } |
5583 | 5593 | ||
@@ -5666,7 +5676,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, | |||
5666 | else | 5676 | else |
5667 | generate_random_uuid(dev->uuid); | 5677 | generate_random_uuid(dev->uuid); |
5668 | 5678 | ||
5669 | dev->work.func = pending_bios_fn; | 5679 | btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL); |
5670 | 5680 | ||
5671 | return dev; | 5681 | return dev; |
5672 | } | 5682 | } |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 8b3cd142b373..80754f9dd3df 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -192,6 +192,7 @@ typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); | |||
192 | 192 | ||
193 | struct btrfs_bio { | 193 | struct btrfs_bio { |
194 | atomic_t stripes_pending; | 194 | atomic_t stripes_pending; |
195 | struct btrfs_fs_info *fs_info; | ||
195 | bio_end_io_t *end_io; | 196 | bio_end_io_t *end_io; |
196 | struct bio *orig_bio; | 197 | struct bio *orig_bio; |
197 | void *private; | 198 | void *private; |