diff options
Diffstat (limited to 'fs/btrfs')
35 files changed, 4013 insertions, 2326 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index f128427b995b..361604244271 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -27,7 +27,7 @@ | |||
27 | #include "btrfs_inode.h" | 27 | #include "btrfs_inode.h" |
28 | #include "xattr.h" | 28 | #include "xattr.h" |
29 | 29 | ||
30 | #ifdef CONFIG_FS_POSIX_ACL | 30 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
31 | 31 | ||
32 | static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | 32 | static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) |
33 | { | 33 | { |
@@ -313,7 +313,7 @@ struct xattr_handler btrfs_xattr_acl_access_handler = { | |||
313 | .set = btrfs_xattr_acl_access_set, | 313 | .set = btrfs_xattr_acl_access_set, |
314 | }; | 314 | }; |
315 | 315 | ||
316 | #else /* CONFIG_FS_POSIX_ACL */ | 316 | #else /* CONFIG_BTRFS_FS_POSIX_ACL */ |
317 | 317 | ||
318 | int btrfs_acl_chmod(struct inode *inode) | 318 | int btrfs_acl_chmod(struct inode *inode) |
319 | { | 319 | { |
@@ -325,4 +325,4 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir) | |||
325 | return 0; | 325 | return 0; |
326 | } | 326 | } |
327 | 327 | ||
328 | #endif /* CONFIG_FS_POSIX_ACL */ | 328 | #endif /* CONFIG_BTRFS_FS_POSIX_ACL */ |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 019e8af449ab..c0861e781cdb 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -48,6 +48,9 @@ struct btrfs_worker_thread { | |||
48 | /* number of things on the pending list */ | 48 | /* number of things on the pending list */ |
49 | atomic_t num_pending; | 49 | atomic_t num_pending; |
50 | 50 | ||
51 | /* reference counter for this struct */ | ||
52 | atomic_t refs; | ||
53 | |||
51 | unsigned long sequence; | 54 | unsigned long sequence; |
52 | 55 | ||
53 | /* protects the pending list. */ | 56 | /* protects the pending list. */ |
@@ -61,6 +64,51 @@ struct btrfs_worker_thread { | |||
61 | }; | 64 | }; |
62 | 65 | ||
63 | /* | 66 | /* |
67 | * btrfs_start_workers uses kthread_run, which can block waiting for memory | ||
68 | * for a very long time. It will actually throttle on page writeback, | ||
69 | * and so it may not make progress until after our btrfs worker threads | ||
70 | * process all of the pending work structs in their queue | ||
71 | * | ||
72 | * This means we can't use btrfs_start_workers from inside a btrfs worker | ||
73 | * thread that is used as part of cleaning dirty memory, which pretty much | ||
74 | * involves all of the worker threads. | ||
75 | * | ||
76 | * Instead we have a helper queue who never has more than one thread | ||
77 | * where we scheduler thread start operations. This worker_start struct | ||
78 | * is used to contain the work and hold a pointer to the queue that needs | ||
79 | * another worker. | ||
80 | */ | ||
81 | struct worker_start { | ||
82 | struct btrfs_work work; | ||
83 | struct btrfs_workers *queue; | ||
84 | }; | ||
85 | |||
86 | static void start_new_worker_func(struct btrfs_work *work) | ||
87 | { | ||
88 | struct worker_start *start; | ||
89 | start = container_of(work, struct worker_start, work); | ||
90 | btrfs_start_workers(start->queue, 1); | ||
91 | kfree(start); | ||
92 | } | ||
93 | |||
94 | static int start_new_worker(struct btrfs_workers *queue) | ||
95 | { | ||
96 | struct worker_start *start; | ||
97 | int ret; | ||
98 | |||
99 | start = kzalloc(sizeof(*start), GFP_NOFS); | ||
100 | if (!start) | ||
101 | return -ENOMEM; | ||
102 | |||
103 | start->work.func = start_new_worker_func; | ||
104 | start->queue = queue; | ||
105 | ret = btrfs_queue_worker(queue->atomic_worker_start, &start->work); | ||
106 | if (ret) | ||
107 | kfree(start); | ||
108 | return ret; | ||
109 | } | ||
110 | |||
111 | /* | ||
64 | * helper function to move a thread onto the idle list after it | 112 | * helper function to move a thread onto the idle list after it |
65 | * has finished some requests. | 113 | * has finished some requests. |
66 | */ | 114 | */ |
@@ -71,7 +119,12 @@ static void check_idle_worker(struct btrfs_worker_thread *worker) | |||
71 | unsigned long flags; | 119 | unsigned long flags; |
72 | spin_lock_irqsave(&worker->workers->lock, flags); | 120 | spin_lock_irqsave(&worker->workers->lock, flags); |
73 | worker->idle = 1; | 121 | worker->idle = 1; |
74 | list_move(&worker->worker_list, &worker->workers->idle_list); | 122 | |
123 | /* the list may be empty if the worker is just starting */ | ||
124 | if (!list_empty(&worker->worker_list)) { | ||
125 | list_move(&worker->worker_list, | ||
126 | &worker->workers->idle_list); | ||
127 | } | ||
75 | spin_unlock_irqrestore(&worker->workers->lock, flags); | 128 | spin_unlock_irqrestore(&worker->workers->lock, flags); |
76 | } | 129 | } |
77 | } | 130 | } |
@@ -87,23 +140,51 @@ static void check_busy_worker(struct btrfs_worker_thread *worker) | |||
87 | unsigned long flags; | 140 | unsigned long flags; |
88 | spin_lock_irqsave(&worker->workers->lock, flags); | 141 | spin_lock_irqsave(&worker->workers->lock, flags); |
89 | worker->idle = 0; | 142 | worker->idle = 0; |
90 | list_move_tail(&worker->worker_list, | 143 | |
91 | &worker->workers->worker_list); | 144 | if (!list_empty(&worker->worker_list)) { |
145 | list_move_tail(&worker->worker_list, | ||
146 | &worker->workers->worker_list); | ||
147 | } | ||
92 | spin_unlock_irqrestore(&worker->workers->lock, flags); | 148 | spin_unlock_irqrestore(&worker->workers->lock, flags); |
93 | } | 149 | } |
94 | } | 150 | } |
95 | 151 | ||
96 | static noinline int run_ordered_completions(struct btrfs_workers *workers, | 152 | static void check_pending_worker_creates(struct btrfs_worker_thread *worker) |
97 | struct btrfs_work *work) | ||
98 | { | 153 | { |
154 | struct btrfs_workers *workers = worker->workers; | ||
99 | unsigned long flags; | 155 | unsigned long flags; |
100 | 156 | ||
157 | rmb(); | ||
158 | if (!workers->atomic_start_pending) | ||
159 | return; | ||
160 | |||
161 | spin_lock_irqsave(&workers->lock, flags); | ||
162 | if (!workers->atomic_start_pending) | ||
163 | goto out; | ||
164 | |||
165 | workers->atomic_start_pending = 0; | ||
166 | if (workers->num_workers + workers->num_workers_starting >= | ||
167 | workers->max_workers) | ||
168 | goto out; | ||
169 | |||
170 | workers->num_workers_starting += 1; | ||
171 | spin_unlock_irqrestore(&workers->lock, flags); | ||
172 | start_new_worker(workers); | ||
173 | return; | ||
174 | |||
175 | out: | ||
176 | spin_unlock_irqrestore(&workers->lock, flags); | ||
177 | } | ||
178 | |||
179 | static noinline int run_ordered_completions(struct btrfs_workers *workers, | ||
180 | struct btrfs_work *work) | ||
181 | { | ||
101 | if (!workers->ordered) | 182 | if (!workers->ordered) |
102 | return 0; | 183 | return 0; |
103 | 184 | ||
104 | set_bit(WORK_DONE_BIT, &work->flags); | 185 | set_bit(WORK_DONE_BIT, &work->flags); |
105 | 186 | ||
106 | spin_lock_irqsave(&workers->lock, flags); | 187 | spin_lock(&workers->order_lock); |
107 | 188 | ||
108 | while (1) { | 189 | while (1) { |
109 | if (!list_empty(&workers->prio_order_list)) { | 190 | if (!list_empty(&workers->prio_order_list)) { |
@@ -126,45 +207,118 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers, | |||
126 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) | 207 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) |
127 | break; | 208 | break; |
128 | 209 | ||
129 | spin_unlock_irqrestore(&workers->lock, flags); | 210 | spin_unlock(&workers->order_lock); |
130 | 211 | ||
131 | work->ordered_func(work); | 212 | work->ordered_func(work); |
132 | 213 | ||
133 | /* now take the lock again and call the freeing code */ | 214 | /* now take the lock again and call the freeing code */ |
134 | spin_lock_irqsave(&workers->lock, flags); | 215 | spin_lock(&workers->order_lock); |
135 | list_del(&work->order_list); | 216 | list_del(&work->order_list); |
136 | work->ordered_free(work); | 217 | work->ordered_free(work); |
137 | } | 218 | } |
138 | 219 | ||
139 | spin_unlock_irqrestore(&workers->lock, flags); | 220 | spin_unlock(&workers->order_lock); |
140 | return 0; | 221 | return 0; |
141 | } | 222 | } |
142 | 223 | ||
224 | static void put_worker(struct btrfs_worker_thread *worker) | ||
225 | { | ||
226 | if (atomic_dec_and_test(&worker->refs)) | ||
227 | kfree(worker); | ||
228 | } | ||
229 | |||
230 | static int try_worker_shutdown(struct btrfs_worker_thread *worker) | ||
231 | { | ||
232 | int freeit = 0; | ||
233 | |||
234 | spin_lock_irq(&worker->lock); | ||
235 | spin_lock(&worker->workers->lock); | ||
236 | if (worker->workers->num_workers > 1 && | ||
237 | worker->idle && | ||
238 | !worker->working && | ||
239 | !list_empty(&worker->worker_list) && | ||
240 | list_empty(&worker->prio_pending) && | ||
241 | list_empty(&worker->pending) && | ||
242 | atomic_read(&worker->num_pending) == 0) { | ||
243 | freeit = 1; | ||
244 | list_del_init(&worker->worker_list); | ||
245 | worker->workers->num_workers--; | ||
246 | } | ||
247 | spin_unlock(&worker->workers->lock); | ||
248 | spin_unlock_irq(&worker->lock); | ||
249 | |||
250 | if (freeit) | ||
251 | put_worker(worker); | ||
252 | return freeit; | ||
253 | } | ||
254 | |||
255 | static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker, | ||
256 | struct list_head *prio_head, | ||
257 | struct list_head *head) | ||
258 | { | ||
259 | struct btrfs_work *work = NULL; | ||
260 | struct list_head *cur = NULL; | ||
261 | |||
262 | if(!list_empty(prio_head)) | ||
263 | cur = prio_head->next; | ||
264 | |||
265 | smp_mb(); | ||
266 | if (!list_empty(&worker->prio_pending)) | ||
267 | goto refill; | ||
268 | |||
269 | if (!list_empty(head)) | ||
270 | cur = head->next; | ||
271 | |||
272 | if (cur) | ||
273 | goto out; | ||
274 | |||
275 | refill: | ||
276 | spin_lock_irq(&worker->lock); | ||
277 | list_splice_tail_init(&worker->prio_pending, prio_head); | ||
278 | list_splice_tail_init(&worker->pending, head); | ||
279 | |||
280 | if (!list_empty(prio_head)) | ||
281 | cur = prio_head->next; | ||
282 | else if (!list_empty(head)) | ||
283 | cur = head->next; | ||
284 | spin_unlock_irq(&worker->lock); | ||
285 | |||
286 | if (!cur) | ||
287 | goto out_fail; | ||
288 | |||
289 | out: | ||
290 | work = list_entry(cur, struct btrfs_work, list); | ||
291 | |||
292 | out_fail: | ||
293 | return work; | ||
294 | } | ||
295 | |||
143 | /* | 296 | /* |
144 | * main loop for servicing work items | 297 | * main loop for servicing work items |
145 | */ | 298 | */ |
146 | static int worker_loop(void *arg) | 299 | static int worker_loop(void *arg) |
147 | { | 300 | { |
148 | struct btrfs_worker_thread *worker = arg; | 301 | struct btrfs_worker_thread *worker = arg; |
149 | struct list_head *cur; | 302 | struct list_head head; |
303 | struct list_head prio_head; | ||
150 | struct btrfs_work *work; | 304 | struct btrfs_work *work; |
305 | |||
306 | INIT_LIST_HEAD(&head); | ||
307 | INIT_LIST_HEAD(&prio_head); | ||
308 | |||
151 | do { | 309 | do { |
152 | spin_lock_irq(&worker->lock); | 310 | again: |
153 | again_locked: | ||
154 | while (1) { | 311 | while (1) { |
155 | if (!list_empty(&worker->prio_pending)) | 312 | |
156 | cur = worker->prio_pending.next; | 313 | |
157 | else if (!list_empty(&worker->pending)) | 314 | work = get_next_work(worker, &prio_head, &head); |
158 | cur = worker->pending.next; | 315 | if (!work) |
159 | else | ||
160 | break; | 316 | break; |
161 | 317 | ||
162 | work = list_entry(cur, struct btrfs_work, list); | ||
163 | list_del(&work->list); | 318 | list_del(&work->list); |
164 | clear_bit(WORK_QUEUED_BIT, &work->flags); | 319 | clear_bit(WORK_QUEUED_BIT, &work->flags); |
165 | 320 | ||
166 | work->worker = worker; | 321 | work->worker = worker; |
167 | spin_unlock_irq(&worker->lock); | ||
168 | 322 | ||
169 | work->func(work); | 323 | work->func(work); |
170 | 324 | ||
@@ -175,9 +329,13 @@ again_locked: | |||
175 | */ | 329 | */ |
176 | run_ordered_completions(worker->workers, work); | 330 | run_ordered_completions(worker->workers, work); |
177 | 331 | ||
178 | spin_lock_irq(&worker->lock); | 332 | check_pending_worker_creates(worker); |
179 | check_idle_worker(worker); | 333 | |
180 | } | 334 | } |
335 | |||
336 | spin_lock_irq(&worker->lock); | ||
337 | check_idle_worker(worker); | ||
338 | |||
181 | if (freezing(current)) { | 339 | if (freezing(current)) { |
182 | worker->working = 0; | 340 | worker->working = 0; |
183 | spin_unlock_irq(&worker->lock); | 341 | spin_unlock_irq(&worker->lock); |
@@ -216,8 +374,10 @@ again_locked: | |||
216 | spin_lock_irq(&worker->lock); | 374 | spin_lock_irq(&worker->lock); |
217 | set_current_state(TASK_INTERRUPTIBLE); | 375 | set_current_state(TASK_INTERRUPTIBLE); |
218 | if (!list_empty(&worker->pending) || | 376 | if (!list_empty(&worker->pending) || |
219 | !list_empty(&worker->prio_pending)) | 377 | !list_empty(&worker->prio_pending)) { |
220 | goto again_locked; | 378 | spin_unlock_irq(&worker->lock); |
379 | goto again; | ||
380 | } | ||
221 | 381 | ||
222 | /* | 382 | /* |
223 | * this makes sure we get a wakeup when someone | 383 | * this makes sure we get a wakeup when someone |
@@ -226,8 +386,13 @@ again_locked: | |||
226 | worker->working = 0; | 386 | worker->working = 0; |
227 | spin_unlock_irq(&worker->lock); | 387 | spin_unlock_irq(&worker->lock); |
228 | 388 | ||
229 | if (!kthread_should_stop()) | 389 | if (!kthread_should_stop()) { |
230 | schedule(); | 390 | schedule_timeout(HZ * 120); |
391 | if (!worker->working && | ||
392 | try_worker_shutdown(worker)) { | ||
393 | return 0; | ||
394 | } | ||
395 | } | ||
231 | } | 396 | } |
232 | __set_current_state(TASK_RUNNING); | 397 | __set_current_state(TASK_RUNNING); |
233 | } | 398 | } |
@@ -242,41 +407,61 @@ int btrfs_stop_workers(struct btrfs_workers *workers) | |||
242 | { | 407 | { |
243 | struct list_head *cur; | 408 | struct list_head *cur; |
244 | struct btrfs_worker_thread *worker; | 409 | struct btrfs_worker_thread *worker; |
410 | int can_stop; | ||
245 | 411 | ||
412 | spin_lock_irq(&workers->lock); | ||
246 | list_splice_init(&workers->idle_list, &workers->worker_list); | 413 | list_splice_init(&workers->idle_list, &workers->worker_list); |
247 | while (!list_empty(&workers->worker_list)) { | 414 | while (!list_empty(&workers->worker_list)) { |
248 | cur = workers->worker_list.next; | 415 | cur = workers->worker_list.next; |
249 | worker = list_entry(cur, struct btrfs_worker_thread, | 416 | worker = list_entry(cur, struct btrfs_worker_thread, |
250 | worker_list); | 417 | worker_list); |
251 | kthread_stop(worker->task); | 418 | |
252 | list_del(&worker->worker_list); | 419 | atomic_inc(&worker->refs); |
253 | kfree(worker); | 420 | workers->num_workers -= 1; |
421 | if (!list_empty(&worker->worker_list)) { | ||
422 | list_del_init(&worker->worker_list); | ||
423 | put_worker(worker); | ||
424 | can_stop = 1; | ||
425 | } else | ||
426 | can_stop = 0; | ||
427 | spin_unlock_irq(&workers->lock); | ||
428 | if (can_stop) | ||
429 | kthread_stop(worker->task); | ||
430 | spin_lock_irq(&workers->lock); | ||
431 | put_worker(worker); | ||
254 | } | 432 | } |
433 | spin_unlock_irq(&workers->lock); | ||
255 | return 0; | 434 | return 0; |
256 | } | 435 | } |
257 | 436 | ||
258 | /* | 437 | /* |
259 | * simple init on struct btrfs_workers | 438 | * simple init on struct btrfs_workers |
260 | */ | 439 | */ |
261 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max) | 440 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, |
441 | struct btrfs_workers *async_helper) | ||
262 | { | 442 | { |
263 | workers->num_workers = 0; | 443 | workers->num_workers = 0; |
444 | workers->num_workers_starting = 0; | ||
264 | INIT_LIST_HEAD(&workers->worker_list); | 445 | INIT_LIST_HEAD(&workers->worker_list); |
265 | INIT_LIST_HEAD(&workers->idle_list); | 446 | INIT_LIST_HEAD(&workers->idle_list); |
266 | INIT_LIST_HEAD(&workers->order_list); | 447 | INIT_LIST_HEAD(&workers->order_list); |
267 | INIT_LIST_HEAD(&workers->prio_order_list); | 448 | INIT_LIST_HEAD(&workers->prio_order_list); |
268 | spin_lock_init(&workers->lock); | 449 | spin_lock_init(&workers->lock); |
450 | spin_lock_init(&workers->order_lock); | ||
269 | workers->max_workers = max; | 451 | workers->max_workers = max; |
270 | workers->idle_thresh = 32; | 452 | workers->idle_thresh = 32; |
271 | workers->name = name; | 453 | workers->name = name; |
272 | workers->ordered = 0; | 454 | workers->ordered = 0; |
455 | workers->atomic_start_pending = 0; | ||
456 | workers->atomic_worker_start = async_helper; | ||
273 | } | 457 | } |
274 | 458 | ||
275 | /* | 459 | /* |
276 | * starts new worker threads. This does not enforce the max worker | 460 | * starts new worker threads. This does not enforce the max worker |
277 | * count in case you need to temporarily go past it. | 461 | * count in case you need to temporarily go past it. |
278 | */ | 462 | */ |
279 | int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | 463 | static int __btrfs_start_workers(struct btrfs_workers *workers, |
464 | int num_workers) | ||
280 | { | 465 | { |
281 | struct btrfs_worker_thread *worker; | 466 | struct btrfs_worker_thread *worker; |
282 | int ret = 0; | 467 | int ret = 0; |
@@ -293,7 +478,9 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | |||
293 | INIT_LIST_HEAD(&worker->prio_pending); | 478 | INIT_LIST_HEAD(&worker->prio_pending); |
294 | INIT_LIST_HEAD(&worker->worker_list); | 479 | INIT_LIST_HEAD(&worker->worker_list); |
295 | spin_lock_init(&worker->lock); | 480 | spin_lock_init(&worker->lock); |
481 | |||
296 | atomic_set(&worker->num_pending, 0); | 482 | atomic_set(&worker->num_pending, 0); |
483 | atomic_set(&worker->refs, 1); | ||
297 | worker->workers = workers; | 484 | worker->workers = workers; |
298 | worker->task = kthread_run(worker_loop, worker, | 485 | worker->task = kthread_run(worker_loop, worker, |
299 | "btrfs-%s-%d", workers->name, | 486 | "btrfs-%s-%d", workers->name, |
@@ -303,11 +490,12 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | |||
303 | kfree(worker); | 490 | kfree(worker); |
304 | goto fail; | 491 | goto fail; |
305 | } | 492 | } |
306 | |||
307 | spin_lock_irq(&workers->lock); | 493 | spin_lock_irq(&workers->lock); |
308 | list_add_tail(&worker->worker_list, &workers->idle_list); | 494 | list_add_tail(&worker->worker_list, &workers->idle_list); |
309 | worker->idle = 1; | 495 | worker->idle = 1; |
310 | workers->num_workers++; | 496 | workers->num_workers++; |
497 | workers->num_workers_starting--; | ||
498 | WARN_ON(workers->num_workers_starting < 0); | ||
311 | spin_unlock_irq(&workers->lock); | 499 | spin_unlock_irq(&workers->lock); |
312 | } | 500 | } |
313 | return 0; | 501 | return 0; |
@@ -316,6 +504,14 @@ fail: | |||
316 | return ret; | 504 | return ret; |
317 | } | 505 | } |
318 | 506 | ||
507 | int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | ||
508 | { | ||
509 | spin_lock_irq(&workers->lock); | ||
510 | workers->num_workers_starting += num_workers; | ||
511 | spin_unlock_irq(&workers->lock); | ||
512 | return __btrfs_start_workers(workers, num_workers); | ||
513 | } | ||
514 | |||
319 | /* | 515 | /* |
320 | * run through the list and find a worker thread that doesn't have a lot | 516 | * run through the list and find a worker thread that doesn't have a lot |
321 | * to do right now. This can return null if we aren't yet at the thread | 517 | * to do right now. This can return null if we aren't yet at the thread |
@@ -325,7 +521,10 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) | |||
325 | { | 521 | { |
326 | struct btrfs_worker_thread *worker; | 522 | struct btrfs_worker_thread *worker; |
327 | struct list_head *next; | 523 | struct list_head *next; |
328 | int enforce_min = workers->num_workers < workers->max_workers; | 524 | int enforce_min; |
525 | |||
526 | enforce_min = (workers->num_workers + workers->num_workers_starting) < | ||
527 | workers->max_workers; | ||
329 | 528 | ||
330 | /* | 529 | /* |
331 | * if we find an idle thread, don't move it to the end of the | 530 | * if we find an idle thread, don't move it to the end of the |
@@ -350,7 +549,6 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) | |||
350 | */ | 549 | */ |
351 | next = workers->worker_list.next; | 550 | next = workers->worker_list.next; |
352 | worker = list_entry(next, struct btrfs_worker_thread, worker_list); | 551 | worker = list_entry(next, struct btrfs_worker_thread, worker_list); |
353 | atomic_inc(&worker->num_pending); | ||
354 | worker->sequence++; | 552 | worker->sequence++; |
355 | 553 | ||
356 | if (worker->sequence % workers->idle_thresh == 0) | 554 | if (worker->sequence % workers->idle_thresh == 0) |
@@ -367,35 +565,49 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) | |||
367 | { | 565 | { |
368 | struct btrfs_worker_thread *worker; | 566 | struct btrfs_worker_thread *worker; |
369 | unsigned long flags; | 567 | unsigned long flags; |
568 | struct list_head *fallback; | ||
370 | 569 | ||
371 | again: | 570 | again: |
372 | spin_lock_irqsave(&workers->lock, flags); | 571 | spin_lock_irqsave(&workers->lock, flags); |
373 | worker = next_worker(workers); | 572 | worker = next_worker(workers); |
374 | spin_unlock_irqrestore(&workers->lock, flags); | ||
375 | 573 | ||
376 | if (!worker) { | 574 | if (!worker) { |
377 | spin_lock_irqsave(&workers->lock, flags); | 575 | if (workers->num_workers + workers->num_workers_starting >= |
378 | if (workers->num_workers >= workers->max_workers) { | 576 | workers->max_workers) { |
379 | struct list_head *fallback = NULL; | 577 | goto fallback; |
380 | /* | 578 | } else if (workers->atomic_worker_start) { |
381 | * we have failed to find any workers, just | 579 | workers->atomic_start_pending = 1; |
382 | * return the force one | 580 | goto fallback; |
383 | */ | ||
384 | if (!list_empty(&workers->worker_list)) | ||
385 | fallback = workers->worker_list.next; | ||
386 | if (!list_empty(&workers->idle_list)) | ||
387 | fallback = workers->idle_list.next; | ||
388 | BUG_ON(!fallback); | ||
389 | worker = list_entry(fallback, | ||
390 | struct btrfs_worker_thread, worker_list); | ||
391 | spin_unlock_irqrestore(&workers->lock, flags); | ||
392 | } else { | 581 | } else { |
582 | workers->num_workers_starting++; | ||
393 | spin_unlock_irqrestore(&workers->lock, flags); | 583 | spin_unlock_irqrestore(&workers->lock, flags); |
394 | /* we're below the limit, start another worker */ | 584 | /* we're below the limit, start another worker */ |
395 | btrfs_start_workers(workers, 1); | 585 | __btrfs_start_workers(workers, 1); |
396 | goto again; | 586 | goto again; |
397 | } | 587 | } |
398 | } | 588 | } |
589 | goto found; | ||
590 | |||
591 | fallback: | ||
592 | fallback = NULL; | ||
593 | /* | ||
594 | * we have failed to find any workers, just | ||
595 | * return the first one we can find. | ||
596 | */ | ||
597 | if (!list_empty(&workers->worker_list)) | ||
598 | fallback = workers->worker_list.next; | ||
599 | if (!list_empty(&workers->idle_list)) | ||
600 | fallback = workers->idle_list.next; | ||
601 | BUG_ON(!fallback); | ||
602 | worker = list_entry(fallback, | ||
603 | struct btrfs_worker_thread, worker_list); | ||
604 | found: | ||
605 | /* | ||
606 | * this makes sure the worker doesn't exit before it is placed | ||
607 | * onto a busy/idle list | ||
608 | */ | ||
609 | atomic_inc(&worker->num_pending); | ||
610 | spin_unlock_irqrestore(&workers->lock, flags); | ||
399 | return worker; | 611 | return worker; |
400 | } | 612 | } |
401 | 613 | ||
@@ -427,7 +639,7 @@ int btrfs_requeue_work(struct btrfs_work *work) | |||
427 | spin_lock(&worker->workers->lock); | 639 | spin_lock(&worker->workers->lock); |
428 | worker->idle = 0; | 640 | worker->idle = 0; |
429 | list_move_tail(&worker->worker_list, | 641 | list_move_tail(&worker->worker_list, |
430 | &worker->workers->worker_list); | 642 | &worker->workers->worker_list); |
431 | spin_unlock(&worker->workers->lock); | 643 | spin_unlock(&worker->workers->lock); |
432 | } | 644 | } |
433 | if (!worker->working) { | 645 | if (!worker->working) { |
@@ -435,9 +647,9 @@ int btrfs_requeue_work(struct btrfs_work *work) | |||
435 | worker->working = 1; | 647 | worker->working = 1; |
436 | } | 648 | } |
437 | 649 | ||
438 | spin_unlock_irqrestore(&worker->lock, flags); | ||
439 | if (wake) | 650 | if (wake) |
440 | wake_up_process(worker->task); | 651 | wake_up_process(worker->task); |
652 | spin_unlock_irqrestore(&worker->lock, flags); | ||
441 | out: | 653 | out: |
442 | 654 | ||
443 | return 0; | 655 | return 0; |
@@ -463,14 +675,18 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
463 | 675 | ||
464 | worker = find_worker(workers); | 676 | worker = find_worker(workers); |
465 | if (workers->ordered) { | 677 | if (workers->ordered) { |
466 | spin_lock_irqsave(&workers->lock, flags); | 678 | /* |
679 | * you're not allowed to do ordered queues from an | ||
680 | * interrupt handler | ||
681 | */ | ||
682 | spin_lock(&workers->order_lock); | ||
467 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { | 683 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { |
468 | list_add_tail(&work->order_list, | 684 | list_add_tail(&work->order_list, |
469 | &workers->prio_order_list); | 685 | &workers->prio_order_list); |
470 | } else { | 686 | } else { |
471 | list_add_tail(&work->order_list, &workers->order_list); | 687 | list_add_tail(&work->order_list, &workers->order_list); |
472 | } | 688 | } |
473 | spin_unlock_irqrestore(&workers->lock, flags); | 689 | spin_unlock(&workers->order_lock); |
474 | } else { | 690 | } else { |
475 | INIT_LIST_HEAD(&work->order_list); | 691 | INIT_LIST_HEAD(&work->order_list); |
476 | } | 692 | } |
@@ -481,7 +697,6 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
481 | list_add_tail(&work->list, &worker->prio_pending); | 697 | list_add_tail(&work->list, &worker->prio_pending); |
482 | else | 698 | else |
483 | list_add_tail(&work->list, &worker->pending); | 699 | list_add_tail(&work->list, &worker->pending); |
484 | atomic_inc(&worker->num_pending); | ||
485 | check_busy_worker(worker); | 700 | check_busy_worker(worker); |
486 | 701 | ||
487 | /* | 702 | /* |
@@ -492,10 +707,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
492 | wake = 1; | 707 | wake = 1; |
493 | worker->working = 1; | 708 | worker->working = 1; |
494 | 709 | ||
495 | spin_unlock_irqrestore(&worker->lock, flags); | ||
496 | |||
497 | if (wake) | 710 | if (wake) |
498 | wake_up_process(worker->task); | 711 | wake_up_process(worker->task); |
712 | spin_unlock_irqrestore(&worker->lock, flags); | ||
713 | |||
499 | out: | 714 | out: |
500 | return 0; | 715 | return 0; |
501 | } | 716 | } |
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 1b511c109db6..5077746cf85e 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h | |||
@@ -64,6 +64,8 @@ struct btrfs_workers { | |||
64 | /* current number of running workers */ | 64 | /* current number of running workers */ |
65 | int num_workers; | 65 | int num_workers; |
66 | 66 | ||
67 | int num_workers_starting; | ||
68 | |||
67 | /* max number of workers allowed. changed by btrfs_start_workers */ | 69 | /* max number of workers allowed. changed by btrfs_start_workers */ |
68 | int max_workers; | 70 | int max_workers; |
69 | 71 | ||
@@ -73,6 +75,16 @@ struct btrfs_workers { | |||
73 | /* force completions in the order they were queued */ | 75 | /* force completions in the order they were queued */ |
74 | int ordered; | 76 | int ordered; |
75 | 77 | ||
78 | /* more workers required, but in an interrupt handler */ | ||
79 | int atomic_start_pending; | ||
80 | |||
81 | /* | ||
82 | * are we allowed to sleep while starting workers or are we required | ||
83 | * to start them at a later time? If we can't sleep, this indicates | ||
84 | * which queue we need to use to schedule thread creation. | ||
85 | */ | ||
86 | struct btrfs_workers *atomic_worker_start; | ||
87 | |||
76 | /* list with all the work threads. The workers on the idle thread | 88 | /* list with all the work threads. The workers on the idle thread |
77 | * may be actively servicing jobs, but they haven't yet hit the | 89 | * may be actively servicing jobs, but they haven't yet hit the |
78 | * idle thresh limit above. | 90 | * idle thresh limit above. |
@@ -90,6 +102,9 @@ struct btrfs_workers { | |||
90 | /* lock for finding the next worker thread to queue on */ | 102 | /* lock for finding the next worker thread to queue on */ |
91 | spinlock_t lock; | 103 | spinlock_t lock; |
92 | 104 | ||
105 | /* lock for the ordered lists */ | ||
106 | spinlock_t order_lock; | ||
107 | |||
93 | /* extra name for this worker, used for current->name */ | 108 | /* extra name for this worker, used for current->name */ |
94 | char *name; | 109 | char *name; |
95 | }; | 110 | }; |
@@ -97,7 +112,8 @@ struct btrfs_workers { | |||
97 | int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); | 112 | int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); |
98 | int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); | 113 | int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); |
99 | int btrfs_stop_workers(struct btrfs_workers *workers); | 114 | int btrfs_stop_workers(struct btrfs_workers *workers); |
100 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max); | 115 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, |
116 | struct btrfs_workers *async_starter); | ||
101 | int btrfs_requeue_work(struct btrfs_work *work); | 117 | int btrfs_requeue_work(struct btrfs_work *work); |
102 | void btrfs_set_work_high_prio(struct btrfs_work *work); | 118 | void btrfs_set_work_high_prio(struct btrfs_work *work); |
103 | #endif | 119 | #endif |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index ea1ea0af8c0e..f6783a42f010 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -86,6 +86,12 @@ struct btrfs_inode { | |||
86 | * transid of the trans_handle that last modified this inode | 86 | * transid of the trans_handle that last modified this inode |
87 | */ | 87 | */ |
88 | u64 last_trans; | 88 | u64 last_trans; |
89 | |||
90 | /* | ||
91 | * log transid when this inode was last modified | ||
92 | */ | ||
93 | u64 last_sub_trans; | ||
94 | |||
89 | /* | 95 | /* |
90 | * transid that last logged this inode | 96 | * transid that last logged this inode |
91 | */ | 97 | */ |
@@ -128,6 +134,16 @@ struct btrfs_inode { | |||
128 | u64 last_unlink_trans; | 134 | u64 last_unlink_trans; |
129 | 135 | ||
130 | /* | 136 | /* |
137 | * Counters to keep track of the number of extent item's we may use due | ||
138 | * to delalloc and such. outstanding_extents is the number of extent | ||
139 | * items we think we'll end up using, and reserved_extents is the number | ||
140 | * of extent items we've reserved metadata for. | ||
141 | */ | ||
142 | spinlock_t accounting_lock; | ||
143 | int reserved_extents; | ||
144 | int outstanding_extents; | ||
145 | |||
146 | /* | ||
131 | * ordered_data_close is set by truncate when a file that used | 147 | * ordered_data_close is set by truncate when a file that used |
132 | * to have good data has been truncated to zero. When it is set | 148 | * to have good data has been truncated to zero. When it is set |
133 | * the btrfs file release call will add this inode to the | 149 | * the btrfs file release call will add this inode to the |
@@ -138,6 +154,7 @@ struct btrfs_inode { | |||
138 | * of these. | 154 | * of these. |
139 | */ | 155 | */ |
140 | unsigned ordered_data_close:1; | 156 | unsigned ordered_data_close:1; |
157 | unsigned dummy_inode:1; | ||
141 | 158 | ||
142 | struct inode vfs_inode; | 159 | struct inode vfs_inode; |
143 | }; | 160 | }; |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 9d8ba4d54a37..a11a32058b50 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -506,10 +506,10 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
506 | */ | 506 | */ |
507 | set_page_extent_mapped(page); | 507 | set_page_extent_mapped(page); |
508 | lock_extent(tree, last_offset, end, GFP_NOFS); | 508 | lock_extent(tree, last_offset, end, GFP_NOFS); |
509 | spin_lock(&em_tree->lock); | 509 | read_lock(&em_tree->lock); |
510 | em = lookup_extent_mapping(em_tree, last_offset, | 510 | em = lookup_extent_mapping(em_tree, last_offset, |
511 | PAGE_CACHE_SIZE); | 511 | PAGE_CACHE_SIZE); |
512 | spin_unlock(&em_tree->lock); | 512 | read_unlock(&em_tree->lock); |
513 | 513 | ||
514 | if (!em || last_offset < em->start || | 514 | if (!em || last_offset < em->start || |
515 | (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || | 515 | (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || |
@@ -593,11 +593,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
593 | em_tree = &BTRFS_I(inode)->extent_tree; | 593 | em_tree = &BTRFS_I(inode)->extent_tree; |
594 | 594 | ||
595 | /* we need the actual starting offset of this extent in the file */ | 595 | /* we need the actual starting offset of this extent in the file */ |
596 | spin_lock(&em_tree->lock); | 596 | read_lock(&em_tree->lock); |
597 | em = lookup_extent_mapping(em_tree, | 597 | em = lookup_extent_mapping(em_tree, |
598 | page_offset(bio->bi_io_vec->bv_page), | 598 | page_offset(bio->bi_io_vec->bv_page), |
599 | PAGE_CACHE_SIZE); | 599 | PAGE_CACHE_SIZE); |
600 | spin_unlock(&em_tree->lock); | 600 | read_unlock(&em_tree->lock); |
601 | 601 | ||
602 | compressed_len = em->block_len; | 602 | compressed_len = em->block_len; |
603 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); | 603 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3fdcc0512d3a..ec96f3a6d536 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -2853,6 +2853,12 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
2853 | int split; | 2853 | int split; |
2854 | int num_doubles = 0; | 2854 | int num_doubles = 0; |
2855 | 2855 | ||
2856 | l = path->nodes[0]; | ||
2857 | slot = path->slots[0]; | ||
2858 | if (extend && data_size + btrfs_item_size_nr(l, slot) + | ||
2859 | sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root)) | ||
2860 | return -EOVERFLOW; | ||
2861 | |||
2856 | /* first try to make some room by pushing left and right */ | 2862 | /* first try to make some room by pushing left and right */ |
2857 | if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { | 2863 | if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { |
2858 | wret = push_leaf_right(trans, root, path, data_size, 0); | 2864 | wret = push_leaf_right(trans, root, path, data_size, 0); |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 837435ce84ca..444b3e9b92a4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -114,6 +114,10 @@ struct btrfs_ordered_sum; | |||
114 | */ | 114 | */ |
115 | #define BTRFS_DEV_ITEMS_OBJECTID 1ULL | 115 | #define BTRFS_DEV_ITEMS_OBJECTID 1ULL |
116 | 116 | ||
117 | #define BTRFS_BTREE_INODE_OBJECTID 1 | ||
118 | |||
119 | #define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 | ||
120 | |||
117 | /* | 121 | /* |
118 | * we can actually store much bigger names, but lets not confuse the rest | 122 | * we can actually store much bigger names, but lets not confuse the rest |
119 | * of linux | 123 | * of linux |
@@ -670,21 +674,29 @@ struct btrfs_space_info { | |||
670 | u64 bytes_reserved; /* total bytes the allocator has reserved for | 674 | u64 bytes_reserved; /* total bytes the allocator has reserved for |
671 | current allocations */ | 675 | current allocations */ |
672 | u64 bytes_readonly; /* total bytes that are read only */ | 676 | u64 bytes_readonly; /* total bytes that are read only */ |
673 | 677 | u64 bytes_super; /* total bytes reserved for the super blocks */ | |
674 | /* delalloc accounting */ | 678 | u64 bytes_root; /* the number of bytes needed to commit a |
675 | u64 bytes_delalloc; /* number of bytes reserved for allocation, | 679 | transaction */ |
676 | this space is not necessarily reserved yet | ||
677 | by the allocator */ | ||
678 | u64 bytes_may_use; /* number of bytes that may be used for | 680 | u64 bytes_may_use; /* number of bytes that may be used for |
679 | delalloc */ | 681 | delalloc/allocations */ |
682 | u64 bytes_delalloc; /* number of bytes currently reserved for | ||
683 | delayed allocation */ | ||
680 | 684 | ||
681 | int full; /* indicates that we cannot allocate any more | 685 | int full; /* indicates that we cannot allocate any more |
682 | chunks for this space */ | 686 | chunks for this space */ |
683 | int force_alloc; /* set if we need to force a chunk alloc for | 687 | int force_alloc; /* set if we need to force a chunk alloc for |
684 | this space */ | 688 | this space */ |
689 | int force_delalloc; /* make people start doing filemap_flush until | ||
690 | we're under a threshold */ | ||
685 | 691 | ||
686 | struct list_head list; | 692 | struct list_head list; |
687 | 693 | ||
694 | /* for controlling how we free up space for allocations */ | ||
695 | wait_queue_head_t allocate_wait; | ||
696 | wait_queue_head_t flush_wait; | ||
697 | int allocating_chunk; | ||
698 | int flushing; | ||
699 | |||
688 | /* for block groups in our same type */ | 700 | /* for block groups in our same type */ |
689 | struct list_head block_groups; | 701 | struct list_head block_groups; |
690 | spinlock_t lock; | 702 | spinlock_t lock; |
@@ -726,6 +738,15 @@ enum btrfs_caching_type { | |||
726 | BTRFS_CACHE_FINISHED = 2, | 738 | BTRFS_CACHE_FINISHED = 2, |
727 | }; | 739 | }; |
728 | 740 | ||
741 | struct btrfs_caching_control { | ||
742 | struct list_head list; | ||
743 | struct mutex mutex; | ||
744 | wait_queue_head_t wait; | ||
745 | struct btrfs_block_group_cache *block_group; | ||
746 | u64 progress; | ||
747 | atomic_t count; | ||
748 | }; | ||
749 | |||
729 | struct btrfs_block_group_cache { | 750 | struct btrfs_block_group_cache { |
730 | struct btrfs_key key; | 751 | struct btrfs_key key; |
731 | struct btrfs_block_group_item item; | 752 | struct btrfs_block_group_item item; |
@@ -733,6 +754,7 @@ struct btrfs_block_group_cache { | |||
733 | spinlock_t lock; | 754 | spinlock_t lock; |
734 | u64 pinned; | 755 | u64 pinned; |
735 | u64 reserved; | 756 | u64 reserved; |
757 | u64 bytes_super; | ||
736 | u64 flags; | 758 | u64 flags; |
737 | u64 sectorsize; | 759 | u64 sectorsize; |
738 | int extents_thresh; | 760 | int extents_thresh; |
@@ -742,8 +764,9 @@ struct btrfs_block_group_cache { | |||
742 | int dirty; | 764 | int dirty; |
743 | 765 | ||
744 | /* cache tracking stuff */ | 766 | /* cache tracking stuff */ |
745 | wait_queue_head_t caching_q; | ||
746 | int cached; | 767 | int cached; |
768 | struct btrfs_caching_control *caching_ctl; | ||
769 | u64 last_byte_to_unpin; | ||
747 | 770 | ||
748 | struct btrfs_space_info *space_info; | 771 | struct btrfs_space_info *space_info; |
749 | 772 | ||
@@ -782,13 +805,16 @@ struct btrfs_fs_info { | |||
782 | 805 | ||
783 | /* the log root tree is a directory of all the other log roots */ | 806 | /* the log root tree is a directory of all the other log roots */ |
784 | struct btrfs_root *log_root_tree; | 807 | struct btrfs_root *log_root_tree; |
808 | |||
809 | spinlock_t fs_roots_radix_lock; | ||
785 | struct radix_tree_root fs_roots_radix; | 810 | struct radix_tree_root fs_roots_radix; |
786 | 811 | ||
787 | /* block group cache stuff */ | 812 | /* block group cache stuff */ |
788 | spinlock_t block_group_cache_lock; | 813 | spinlock_t block_group_cache_lock; |
789 | struct rb_root block_group_cache_tree; | 814 | struct rb_root block_group_cache_tree; |
790 | 815 | ||
791 | struct extent_io_tree pinned_extents; | 816 | struct extent_io_tree freed_extents[2]; |
817 | struct extent_io_tree *pinned_extents; | ||
792 | 818 | ||
793 | /* logical->physical extent mapping */ | 819 | /* logical->physical extent mapping */ |
794 | struct btrfs_mapping_tree mapping_tree; | 820 | struct btrfs_mapping_tree mapping_tree; |
@@ -822,11 +848,7 @@ struct btrfs_fs_info { | |||
822 | struct mutex transaction_kthread_mutex; | 848 | struct mutex transaction_kthread_mutex; |
823 | struct mutex cleaner_mutex; | 849 | struct mutex cleaner_mutex; |
824 | struct mutex chunk_mutex; | 850 | struct mutex chunk_mutex; |
825 | struct mutex drop_mutex; | ||
826 | struct mutex volume_mutex; | 851 | struct mutex volume_mutex; |
827 | struct mutex tree_reloc_mutex; | ||
828 | struct rw_semaphore extent_commit_sem; | ||
829 | |||
830 | /* | 852 | /* |
831 | * this protects the ordered operations list only while we are | 853 | * this protects the ordered operations list only while we are |
832 | * processing all of the entries on it. This way we make | 854 | * processing all of the entries on it. This way we make |
@@ -835,10 +857,16 @@ struct btrfs_fs_info { | |||
835 | * before jumping into the main commit. | 857 | * before jumping into the main commit. |
836 | */ | 858 | */ |
837 | struct mutex ordered_operations_mutex; | 859 | struct mutex ordered_operations_mutex; |
860 | struct rw_semaphore extent_commit_sem; | ||
861 | |||
862 | struct rw_semaphore subvol_sem; | ||
863 | |||
864 | struct srcu_struct subvol_srcu; | ||
838 | 865 | ||
839 | struct list_head trans_list; | 866 | struct list_head trans_list; |
840 | struct list_head hashers; | 867 | struct list_head hashers; |
841 | struct list_head dead_roots; | 868 | struct list_head dead_roots; |
869 | struct list_head caching_block_groups; | ||
842 | 870 | ||
843 | atomic_t nr_async_submits; | 871 | atomic_t nr_async_submits; |
844 | atomic_t async_submit_draining; | 872 | atomic_t async_submit_draining; |
@@ -882,6 +910,7 @@ struct btrfs_fs_info { | |||
882 | * A third pool does submit_bio to avoid deadlocking with the other | 910 | * A third pool does submit_bio to avoid deadlocking with the other |
883 | * two | 911 | * two |
884 | */ | 912 | */ |
913 | struct btrfs_workers generic_worker; | ||
885 | struct btrfs_workers workers; | 914 | struct btrfs_workers workers; |
886 | struct btrfs_workers delalloc_workers; | 915 | struct btrfs_workers delalloc_workers; |
887 | struct btrfs_workers endio_workers; | 916 | struct btrfs_workers endio_workers; |
@@ -889,6 +918,7 @@ struct btrfs_fs_info { | |||
889 | struct btrfs_workers endio_meta_write_workers; | 918 | struct btrfs_workers endio_meta_write_workers; |
890 | struct btrfs_workers endio_write_workers; | 919 | struct btrfs_workers endio_write_workers; |
891 | struct btrfs_workers submit_workers; | 920 | struct btrfs_workers submit_workers; |
921 | struct btrfs_workers enospc_workers; | ||
892 | /* | 922 | /* |
893 | * fixup workers take dirty pages that didn't properly go through | 923 | * fixup workers take dirty pages that didn't properly go through |
894 | * the cow mechanism and make them safe to write. It happens | 924 | * the cow mechanism and make them safe to write. It happens |
@@ -979,7 +1009,10 @@ struct btrfs_root { | |||
979 | atomic_t log_writers; | 1009 | atomic_t log_writers; |
980 | atomic_t log_commit[2]; | 1010 | atomic_t log_commit[2]; |
981 | unsigned long log_transid; | 1011 | unsigned long log_transid; |
1012 | unsigned long last_log_commit; | ||
982 | unsigned long log_batch; | 1013 | unsigned long log_batch; |
1014 | pid_t log_start_pid; | ||
1015 | bool log_multiple_pids; | ||
983 | 1016 | ||
984 | u64 objectid; | 1017 | u64 objectid; |
985 | u64 last_trans; | 1018 | u64 last_trans; |
@@ -996,10 +1029,12 @@ struct btrfs_root { | |||
996 | u32 stripesize; | 1029 | u32 stripesize; |
997 | 1030 | ||
998 | u32 type; | 1031 | u32 type; |
999 | u64 highest_inode; | 1032 | |
1000 | u64 last_inode_alloc; | 1033 | u64 highest_objectid; |
1001 | int ref_cows; | 1034 | int ref_cows; |
1002 | int track_dirty; | 1035 | int track_dirty; |
1036 | int in_radix; | ||
1037 | |||
1003 | u64 defrag_trans_start; | 1038 | u64 defrag_trans_start; |
1004 | struct btrfs_key defrag_progress; | 1039 | struct btrfs_key defrag_progress; |
1005 | struct btrfs_key defrag_max; | 1040 | struct btrfs_key defrag_max; |
@@ -1118,6 +1153,7 @@ struct btrfs_root { | |||
1118 | #define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7) | 1153 | #define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7) |
1119 | #define BTRFS_MOUNT_SSD_SPREAD (1 << 8) | 1154 | #define BTRFS_MOUNT_SSD_SPREAD (1 << 8) |
1120 | #define BTRFS_MOUNT_NOSSD (1 << 9) | 1155 | #define BTRFS_MOUNT_NOSSD (1 << 9) |
1156 | #define BTRFS_MOUNT_DISCARD (1 << 10) | ||
1121 | 1157 | ||
1122 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 1158 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
1123 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 1159 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
@@ -1920,8 +1956,8 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | |||
1920 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 1956 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
1921 | struct btrfs_root *root, unsigned long count); | 1957 | struct btrfs_root *root, unsigned long count); |
1922 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 1958 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
1923 | int btrfs_update_pinned_extents(struct btrfs_root *root, | 1959 | int btrfs_pin_extent(struct btrfs_root *root, |
1924 | u64 bytenr, u64 num, int pin); | 1960 | u64 bytenr, u64 num, int reserved); |
1925 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 1961 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, |
1926 | struct btrfs_root *root, struct extent_buffer *leaf); | 1962 | struct btrfs_root *root, struct extent_buffer *leaf); |
1927 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | 1963 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, |
@@ -1971,9 +2007,10 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
1971 | u64 root_objectid, u64 owner, u64 offset); | 2007 | u64 root_objectid, u64 owner, u64 offset); |
1972 | 2008 | ||
1973 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); | 2009 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); |
2010 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | ||
2011 | struct btrfs_root *root); | ||
1974 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | 2012 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, |
1975 | struct btrfs_root *root, | 2013 | struct btrfs_root *root); |
1976 | struct extent_io_tree *unpin); | ||
1977 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 2014 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
1978 | struct btrfs_root *root, | 2015 | struct btrfs_root *root, |
1979 | u64 bytenr, u64 num_bytes, u64 parent, | 2016 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -1984,6 +2021,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
1984 | int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); | 2021 | int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); |
1985 | int btrfs_free_block_groups(struct btrfs_fs_info *info); | 2022 | int btrfs_free_block_groups(struct btrfs_fs_info *info); |
1986 | int btrfs_read_block_groups(struct btrfs_root *root); | 2023 | int btrfs_read_block_groups(struct btrfs_root *root); |
2024 | int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr); | ||
1987 | int btrfs_make_block_group(struct btrfs_trans_handle *trans, | 2025 | int btrfs_make_block_group(struct btrfs_trans_handle *trans, |
1988 | struct btrfs_root *root, u64 bytes_used, | 2026 | struct btrfs_root *root, u64 bytes_used, |
1989 | u64 type, u64 chunk_objectid, u64 chunk_offset, | 2027 | u64 type, u64 chunk_objectid, u64 chunk_offset, |
@@ -1997,7 +2035,12 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | |||
1997 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | 2035 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); |
1998 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2036 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
1999 | 2037 | ||
2000 | int btrfs_check_metadata_free_space(struct btrfs_root *root); | 2038 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items); |
2039 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items); | ||
2040 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, | ||
2041 | struct inode *inode, int num_items); | ||
2042 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
2043 | struct inode *inode, int num_items); | ||
2001 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | 2044 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, |
2002 | u64 bytes); | 2045 | u64 bytes); |
2003 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | 2046 | void btrfs_free_reserved_data_space(struct btrfs_root *root, |
@@ -2006,7 +2049,6 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | |||
2006 | u64 bytes); | 2049 | u64 bytes); |
2007 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | 2050 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, |
2008 | u64 bytes); | 2051 | u64 bytes); |
2009 | void btrfs_free_pinned_extents(struct btrfs_fs_info *info); | ||
2010 | /* ctree.c */ | 2052 | /* ctree.c */ |
2011 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2053 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
2012 | int level, int *slot); | 2054 | int level, int *slot); |
@@ -2100,12 +2142,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
2100 | struct extent_buffer *parent); | 2142 | struct extent_buffer *parent); |
2101 | /* root-item.c */ | 2143 | /* root-item.c */ |
2102 | int btrfs_find_root_ref(struct btrfs_root *tree_root, | 2144 | int btrfs_find_root_ref(struct btrfs_root *tree_root, |
2103 | struct btrfs_path *path, | 2145 | struct btrfs_path *path, |
2104 | u64 root_id, u64 ref_id); | 2146 | u64 root_id, u64 ref_id); |
2105 | int btrfs_add_root_ref(struct btrfs_trans_handle *trans, | 2147 | int btrfs_add_root_ref(struct btrfs_trans_handle *trans, |
2106 | struct btrfs_root *tree_root, | 2148 | struct btrfs_root *tree_root, |
2107 | u64 root_id, u8 type, u64 ref_id, | 2149 | u64 root_id, u64 ref_id, u64 dirid, u64 sequence, |
2108 | u64 dirid, u64 sequence, | 2150 | const char *name, int name_len); |
2151 | int btrfs_del_root_ref(struct btrfs_trans_handle *trans, | ||
2152 | struct btrfs_root *tree_root, | ||
2153 | u64 root_id, u64 ref_id, u64 dirid, u64 *sequence, | ||
2109 | const char *name, int name_len); | 2154 | const char *name, int name_len); |
2110 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 2155 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
2111 | struct btrfs_key *key); | 2156 | struct btrfs_key *key); |
@@ -2120,6 +2165,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct | |||
2120 | int btrfs_search_root(struct btrfs_root *root, u64 search_start, | 2165 | int btrfs_search_root(struct btrfs_root *root, u64 search_start, |
2121 | u64 *found_objectid); | 2166 | u64 *found_objectid); |
2122 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); | 2167 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); |
2168 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root); | ||
2123 | int btrfs_set_root_node(struct btrfs_root_item *item, | 2169 | int btrfs_set_root_node(struct btrfs_root_item *item, |
2124 | struct extent_buffer *node); | 2170 | struct extent_buffer *node); |
2125 | /* dir-item.c */ | 2171 | /* dir-item.c */ |
@@ -2138,6 +2184,10 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, | |||
2138 | struct btrfs_path *path, u64 dir, | 2184 | struct btrfs_path *path, u64 dir, |
2139 | u64 objectid, const char *name, int name_len, | 2185 | u64 objectid, const char *name, int name_len, |
2140 | int mod); | 2186 | int mod); |
2187 | struct btrfs_dir_item * | ||
2188 | btrfs_search_dir_index_item(struct btrfs_root *root, | ||
2189 | struct btrfs_path *path, u64 dirid, | ||
2190 | const char *name, int name_len); | ||
2141 | struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, | 2191 | struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, |
2142 | struct btrfs_path *path, | 2192 | struct btrfs_path *path, |
2143 | const char *name, int name_len); | 2193 | const char *name, int name_len); |
@@ -2160,6 +2210,7 @@ int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, | |||
2160 | struct btrfs_root *root, u64 offset); | 2210 | struct btrfs_root *root, u64 offset); |
2161 | int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, | 2211 | int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, |
2162 | struct btrfs_root *root, u64 offset); | 2212 | struct btrfs_root *root, u64 offset); |
2213 | int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset); | ||
2163 | 2214 | ||
2164 | /* inode-map.c */ | 2215 | /* inode-map.c */ |
2165 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, | 2216 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, |
@@ -2232,6 +2283,10 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | |||
2232 | int btrfs_add_link(struct btrfs_trans_handle *trans, | 2283 | int btrfs_add_link(struct btrfs_trans_handle *trans, |
2233 | struct inode *parent_inode, struct inode *inode, | 2284 | struct inode *parent_inode, struct inode *inode, |
2234 | const char *name, int name_len, int add_backref, u64 index); | 2285 | const char *name, int name_len, int add_backref, u64 index); |
2286 | int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | ||
2287 | struct btrfs_root *root, | ||
2288 | struct inode *dir, u64 objectid, | ||
2289 | const char *name, int name_len); | ||
2235 | int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | 2290 | int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, |
2236 | struct btrfs_root *root, | 2291 | struct btrfs_root *root, |
2237 | struct inode *inode, u64 new_size, | 2292 | struct inode *inode, u64 new_size, |
@@ -2242,7 +2297,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); | |||
2242 | int btrfs_writepages(struct address_space *mapping, | 2297 | int btrfs_writepages(struct address_space *mapping, |
2243 | struct writeback_control *wbc); | 2298 | struct writeback_control *wbc); |
2244 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 2299 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
2245 | struct btrfs_root *new_root, struct dentry *dentry, | 2300 | struct btrfs_root *new_root, |
2246 | u64 new_dirid, u64 alloc_hint); | 2301 | u64 new_dirid, u64 alloc_hint); |
2247 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | 2302 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, |
2248 | size_t size, struct bio *bio, unsigned long bio_flags); | 2303 | size_t size, struct bio *bio, unsigned long bio_flags); |
@@ -2258,6 +2313,7 @@ int btrfs_write_inode(struct inode *inode, int wait); | |||
2258 | void btrfs_dirty_inode(struct inode *inode); | 2313 | void btrfs_dirty_inode(struct inode *inode); |
2259 | struct inode *btrfs_alloc_inode(struct super_block *sb); | 2314 | struct inode *btrfs_alloc_inode(struct super_block *sb); |
2260 | void btrfs_destroy_inode(struct inode *inode); | 2315 | void btrfs_destroy_inode(struct inode *inode); |
2316 | void btrfs_drop_inode(struct inode *inode); | ||
2261 | int btrfs_init_cachep(void); | 2317 | int btrfs_init_cachep(void); |
2262 | void btrfs_destroy_cachep(void); | 2318 | void btrfs_destroy_cachep(void); |
2263 | long btrfs_ioctl_trans_end(struct file *file); | 2319 | long btrfs_ioctl_trans_end(struct file *file); |
@@ -2275,6 +2331,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); | |||
2275 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); | 2331 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); |
2276 | void btrfs_orphan_cleanup(struct btrfs_root *root); | 2332 | void btrfs_orphan_cleanup(struct btrfs_root *root); |
2277 | int btrfs_cont_expand(struct inode *inode, loff_t size); | 2333 | int btrfs_cont_expand(struct inode *inode, loff_t size); |
2334 | int btrfs_invalidate_inodes(struct btrfs_root *root); | ||
2335 | extern const struct dentry_operations btrfs_dentry_operations; | ||
2278 | 2336 | ||
2279 | /* ioctl.c */ | 2337 | /* ioctl.c */ |
2280 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | 2338 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); |
@@ -2286,11 +2344,11 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync); | |||
2286 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 2344 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
2287 | int skip_pinned); | 2345 | int skip_pinned); |
2288 | int btrfs_check_file(struct btrfs_root *root, struct inode *inode); | 2346 | int btrfs_check_file(struct btrfs_root *root, struct inode *inode); |
2289 | extern struct file_operations btrfs_file_operations; | 2347 | extern const struct file_operations btrfs_file_operations; |
2290 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 2348 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
2291 | struct btrfs_root *root, struct inode *inode, | 2349 | struct btrfs_root *root, struct inode *inode, |
2292 | u64 start, u64 end, u64 locked_end, | 2350 | u64 start, u64 end, u64 locked_end, |
2293 | u64 inline_limit, u64 *hint_block); | 2351 | u64 inline_limit, u64 *hint_block, int drop_cache); |
2294 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | 2352 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, |
2295 | struct btrfs_root *root, | 2353 | struct btrfs_root *root, |
2296 | struct inode *inode, u64 start, u64 end); | 2354 | struct inode *inode, u64 start, u64 end); |
@@ -2317,7 +2375,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options); | |||
2317 | int btrfs_sync_fs(struct super_block *sb, int wait); | 2375 | int btrfs_sync_fs(struct super_block *sb, int wait); |
2318 | 2376 | ||
2319 | /* acl.c */ | 2377 | /* acl.c */ |
2320 | #ifdef CONFIG_FS_POSIX_ACL | 2378 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
2321 | int btrfs_check_acl(struct inode *inode, int mask); | 2379 | int btrfs_check_acl(struct inode *inode, int mask); |
2322 | #else | 2380 | #else |
2323 | #define btrfs_check_acl NULL | 2381 | #define btrfs_check_acl NULL |
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 1d70236ba00c..f3a6075519cc 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
@@ -281,6 +281,53 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, | |||
281 | return btrfs_match_dir_item_name(root, path, name, name_len); | 281 | return btrfs_match_dir_item_name(root, path, name, name_len); |
282 | } | 282 | } |
283 | 283 | ||
284 | struct btrfs_dir_item * | ||
285 | btrfs_search_dir_index_item(struct btrfs_root *root, | ||
286 | struct btrfs_path *path, u64 dirid, | ||
287 | const char *name, int name_len) | ||
288 | { | ||
289 | struct extent_buffer *leaf; | ||
290 | struct btrfs_dir_item *di; | ||
291 | struct btrfs_key key; | ||
292 | u32 nritems; | ||
293 | int ret; | ||
294 | |||
295 | key.objectid = dirid; | ||
296 | key.type = BTRFS_DIR_INDEX_KEY; | ||
297 | key.offset = 0; | ||
298 | |||
299 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
300 | if (ret < 0) | ||
301 | return ERR_PTR(ret); | ||
302 | |||
303 | leaf = path->nodes[0]; | ||
304 | nritems = btrfs_header_nritems(leaf); | ||
305 | |||
306 | while (1) { | ||
307 | if (path->slots[0] >= nritems) { | ||
308 | ret = btrfs_next_leaf(root, path); | ||
309 | if (ret < 0) | ||
310 | return ERR_PTR(ret); | ||
311 | if (ret > 0) | ||
312 | break; | ||
313 | leaf = path->nodes[0]; | ||
314 | nritems = btrfs_header_nritems(leaf); | ||
315 | continue; | ||
316 | } | ||
317 | |||
318 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
319 | if (key.objectid != dirid || key.type != BTRFS_DIR_INDEX_KEY) | ||
320 | break; | ||
321 | |||
322 | di = btrfs_match_dir_item_name(root, path, name, name_len); | ||
323 | if (di) | ||
324 | return di; | ||
325 | |||
326 | path->slots[0]++; | ||
327 | } | ||
328 | return NULL; | ||
329 | } | ||
330 | |||
284 | struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | 331 | struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, |
285 | struct btrfs_root *root, | 332 | struct btrfs_root *root, |
286 | struct btrfs_path *path, u64 dir, | 333 | struct btrfs_path *path, u64 dir, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8b8192790011..02b6afbd7450 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -41,6 +41,7 @@ | |||
41 | 41 | ||
42 | static struct extent_io_ops btree_extent_io_ops; | 42 | static struct extent_io_ops btree_extent_io_ops; |
43 | static void end_workqueue_fn(struct btrfs_work *work); | 43 | static void end_workqueue_fn(struct btrfs_work *work); |
44 | static void free_fs_root(struct btrfs_root *root); | ||
44 | 45 | ||
45 | static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); | 46 | static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); |
46 | 47 | ||
@@ -123,15 +124,15 @@ static struct extent_map *btree_get_extent(struct inode *inode, | |||
123 | struct extent_map *em; | 124 | struct extent_map *em; |
124 | int ret; | 125 | int ret; |
125 | 126 | ||
126 | spin_lock(&em_tree->lock); | 127 | read_lock(&em_tree->lock); |
127 | em = lookup_extent_mapping(em_tree, start, len); | 128 | em = lookup_extent_mapping(em_tree, start, len); |
128 | if (em) { | 129 | if (em) { |
129 | em->bdev = | 130 | em->bdev = |
130 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 131 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
131 | spin_unlock(&em_tree->lock); | 132 | read_unlock(&em_tree->lock); |
132 | goto out; | 133 | goto out; |
133 | } | 134 | } |
134 | spin_unlock(&em_tree->lock); | 135 | read_unlock(&em_tree->lock); |
135 | 136 | ||
136 | em = alloc_extent_map(GFP_NOFS); | 137 | em = alloc_extent_map(GFP_NOFS); |
137 | if (!em) { | 138 | if (!em) { |
@@ -144,7 +145,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, | |||
144 | em->block_start = 0; | 145 | em->block_start = 0; |
145 | em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 146 | em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
146 | 147 | ||
147 | spin_lock(&em_tree->lock); | 148 | write_lock(&em_tree->lock); |
148 | ret = add_extent_mapping(em_tree, em); | 149 | ret = add_extent_mapping(em_tree, em); |
149 | if (ret == -EEXIST) { | 150 | if (ret == -EEXIST) { |
150 | u64 failed_start = em->start; | 151 | u64 failed_start = em->start; |
@@ -163,7 +164,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, | |||
163 | free_extent_map(em); | 164 | free_extent_map(em); |
164 | em = NULL; | 165 | em = NULL; |
165 | } | 166 | } |
166 | spin_unlock(&em_tree->lock); | 167 | write_unlock(&em_tree->lock); |
167 | 168 | ||
168 | if (ret) | 169 | if (ret) |
169 | em = ERR_PTR(ret); | 170 | em = ERR_PTR(ret); |
@@ -772,7 +773,7 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) | |||
772 | } | 773 | } |
773 | } | 774 | } |
774 | 775 | ||
775 | static struct address_space_operations btree_aops = { | 776 | static const struct address_space_operations btree_aops = { |
776 | .readpage = btree_readpage, | 777 | .readpage = btree_readpage, |
777 | .writepage = btree_writepage, | 778 | .writepage = btree_writepage, |
778 | .writepages = btree_writepages, | 779 | .writepages = btree_writepages, |
@@ -821,14 +822,14 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | |||
821 | 822 | ||
822 | int btrfs_write_tree_block(struct extent_buffer *buf) | 823 | int btrfs_write_tree_block(struct extent_buffer *buf) |
823 | { | 824 | { |
824 | return btrfs_fdatawrite_range(buf->first_page->mapping, buf->start, | 825 | return filemap_fdatawrite_range(buf->first_page->mapping, buf->start, |
825 | buf->start + buf->len - 1, WB_SYNC_ALL); | 826 | buf->start + buf->len - 1); |
826 | } | 827 | } |
827 | 828 | ||
828 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) | 829 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) |
829 | { | 830 | { |
830 | return btrfs_wait_on_page_writeback_range(buf->first_page->mapping, | 831 | return filemap_fdatawait_range(buf->first_page->mapping, |
831 | buf->start, buf->start + buf->len - 1); | 832 | buf->start, buf->start + buf->len - 1); |
832 | } | 833 | } |
833 | 834 | ||
834 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | 835 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, |
@@ -895,8 +896,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
895 | root->fs_info = fs_info; | 896 | root->fs_info = fs_info; |
896 | root->objectid = objectid; | 897 | root->objectid = objectid; |
897 | root->last_trans = 0; | 898 | root->last_trans = 0; |
898 | root->highest_inode = 0; | 899 | root->highest_objectid = 0; |
899 | root->last_inode_alloc = 0; | ||
900 | root->name = NULL; | 900 | root->name = NULL; |
901 | root->in_sysfs = 0; | 901 | root->in_sysfs = 0; |
902 | root->inode_tree.rb_node = NULL; | 902 | root->inode_tree.rb_node = NULL; |
@@ -917,6 +917,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
917 | atomic_set(&root->log_writers, 0); | 917 | atomic_set(&root->log_writers, 0); |
918 | root->log_batch = 0; | 918 | root->log_batch = 0; |
919 | root->log_transid = 0; | 919 | root->log_transid = 0; |
920 | root->last_log_commit = 0; | ||
920 | extent_io_tree_init(&root->dirty_log_pages, | 921 | extent_io_tree_init(&root->dirty_log_pages, |
921 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 922 | fs_info->btree_inode->i_mapping, GFP_NOFS); |
922 | 923 | ||
@@ -952,14 +953,16 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
952 | root, fs_info, objectid); | 953 | root, fs_info, objectid); |
953 | ret = btrfs_find_last_root(tree_root, objectid, | 954 | ret = btrfs_find_last_root(tree_root, objectid, |
954 | &root->root_item, &root->root_key); | 955 | &root->root_item, &root->root_key); |
956 | if (ret > 0) | ||
957 | return -ENOENT; | ||
955 | BUG_ON(ret); | 958 | BUG_ON(ret); |
956 | 959 | ||
957 | generation = btrfs_root_generation(&root->root_item); | 960 | generation = btrfs_root_generation(&root->root_item); |
958 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 961 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
959 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 962 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
960 | blocksize, generation); | 963 | blocksize, generation); |
961 | root->commit_root = btrfs_root_node(root); | ||
962 | BUG_ON(!root->node); | 964 | BUG_ON(!root->node); |
965 | root->commit_root = btrfs_root_node(root); | ||
963 | return 0; | 966 | return 0; |
964 | } | 967 | } |
965 | 968 | ||
@@ -1085,6 +1088,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | |||
1085 | WARN_ON(root->log_root); | 1088 | WARN_ON(root->log_root); |
1086 | root->log_root = log_root; | 1089 | root->log_root = log_root; |
1087 | root->log_transid = 0; | 1090 | root->log_transid = 0; |
1091 | root->last_log_commit = 0; | ||
1088 | return 0; | 1092 | return 0; |
1089 | } | 1093 | } |
1090 | 1094 | ||
@@ -1095,7 +1099,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1095 | struct btrfs_fs_info *fs_info = tree_root->fs_info; | 1099 | struct btrfs_fs_info *fs_info = tree_root->fs_info; |
1096 | struct btrfs_path *path; | 1100 | struct btrfs_path *path; |
1097 | struct extent_buffer *l; | 1101 | struct extent_buffer *l; |
1098 | u64 highest_inode; | ||
1099 | u64 generation; | 1102 | u64 generation; |
1100 | u32 blocksize; | 1103 | u32 blocksize; |
1101 | int ret = 0; | 1104 | int ret = 0; |
@@ -1110,7 +1113,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1110 | kfree(root); | 1113 | kfree(root); |
1111 | return ERR_PTR(ret); | 1114 | return ERR_PTR(ret); |
1112 | } | 1115 | } |
1113 | goto insert; | 1116 | goto out; |
1114 | } | 1117 | } |
1115 | 1118 | ||
1116 | __setup_root(tree_root->nodesize, tree_root->leafsize, | 1119 | __setup_root(tree_root->nodesize, tree_root->leafsize, |
@@ -1120,39 +1123,30 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1120 | path = btrfs_alloc_path(); | 1123 | path = btrfs_alloc_path(); |
1121 | BUG_ON(!path); | 1124 | BUG_ON(!path); |
1122 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); | 1125 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); |
1123 | if (ret != 0) { | 1126 | if (ret == 0) { |
1124 | if (ret > 0) | 1127 | l = path->nodes[0]; |
1125 | ret = -ENOENT; | 1128 | read_extent_buffer(l, &root->root_item, |
1126 | goto out; | 1129 | btrfs_item_ptr_offset(l, path->slots[0]), |
1130 | sizeof(root->root_item)); | ||
1131 | memcpy(&root->root_key, location, sizeof(*location)); | ||
1127 | } | 1132 | } |
1128 | l = path->nodes[0]; | ||
1129 | read_extent_buffer(l, &root->root_item, | ||
1130 | btrfs_item_ptr_offset(l, path->slots[0]), | ||
1131 | sizeof(root->root_item)); | ||
1132 | memcpy(&root->root_key, location, sizeof(*location)); | ||
1133 | ret = 0; | ||
1134 | out: | ||
1135 | btrfs_release_path(root, path); | ||
1136 | btrfs_free_path(path); | 1133 | btrfs_free_path(path); |
1137 | if (ret) { | 1134 | if (ret) { |
1138 | kfree(root); | 1135 | if (ret > 0) |
1136 | ret = -ENOENT; | ||
1139 | return ERR_PTR(ret); | 1137 | return ERR_PTR(ret); |
1140 | } | 1138 | } |
1139 | |||
1141 | generation = btrfs_root_generation(&root->root_item); | 1140 | generation = btrfs_root_generation(&root->root_item); |
1142 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 1141 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
1143 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 1142 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
1144 | blocksize, generation); | 1143 | blocksize, generation); |
1145 | root->commit_root = btrfs_root_node(root); | 1144 | root->commit_root = btrfs_root_node(root); |
1146 | BUG_ON(!root->node); | 1145 | BUG_ON(!root->node); |
1147 | insert: | 1146 | out: |
1148 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { | 1147 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) |
1149 | root->ref_cows = 1; | 1148 | root->ref_cows = 1; |
1150 | ret = btrfs_find_highest_inode(root, &highest_inode); | 1149 | |
1151 | if (ret == 0) { | ||
1152 | root->highest_inode = highest_inode; | ||
1153 | root->last_inode_alloc = highest_inode; | ||
1154 | } | ||
1155 | } | ||
1156 | return root; | 1150 | return root; |
1157 | } | 1151 | } |
1158 | 1152 | ||
@@ -1187,39 +1181,66 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | |||
1187 | return fs_info->dev_root; | 1181 | return fs_info->dev_root; |
1188 | if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) | 1182 | if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) |
1189 | return fs_info->csum_root; | 1183 | return fs_info->csum_root; |
1190 | 1184 | again: | |
1185 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
1191 | root = radix_tree_lookup(&fs_info->fs_roots_radix, | 1186 | root = radix_tree_lookup(&fs_info->fs_roots_radix, |
1192 | (unsigned long)location->objectid); | 1187 | (unsigned long)location->objectid); |
1188 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
1193 | if (root) | 1189 | if (root) |
1194 | return root; | 1190 | return root; |
1195 | 1191 | ||
1192 | ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); | ||
1193 | if (ret == 0) | ||
1194 | ret = -ENOENT; | ||
1195 | if (ret < 0) | ||
1196 | return ERR_PTR(ret); | ||
1197 | |||
1196 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); | 1198 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); |
1197 | if (IS_ERR(root)) | 1199 | if (IS_ERR(root)) |
1198 | return root; | 1200 | return root; |
1199 | 1201 | ||
1202 | WARN_ON(btrfs_root_refs(&root->root_item) == 0); | ||
1200 | set_anon_super(&root->anon_super, NULL); | 1203 | set_anon_super(&root->anon_super, NULL); |
1201 | 1204 | ||
1205 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | ||
1206 | if (ret) | ||
1207 | goto fail; | ||
1208 | |||
1209 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
1202 | ret = radix_tree_insert(&fs_info->fs_roots_radix, | 1210 | ret = radix_tree_insert(&fs_info->fs_roots_radix, |
1203 | (unsigned long)root->root_key.objectid, | 1211 | (unsigned long)root->root_key.objectid, |
1204 | root); | 1212 | root); |
1213 | if (ret == 0) | ||
1214 | root->in_radix = 1; | ||
1215 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
1216 | radix_tree_preload_end(); | ||
1205 | if (ret) { | 1217 | if (ret) { |
1206 | free_extent_buffer(root->node); | 1218 | if (ret == -EEXIST) { |
1207 | kfree(root); | 1219 | free_fs_root(root); |
1208 | return ERR_PTR(ret); | 1220 | goto again; |
1221 | } | ||
1222 | goto fail; | ||
1209 | } | 1223 | } |
1210 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 1224 | |
1211 | ret = btrfs_find_dead_roots(fs_info->tree_root, | 1225 | ret = btrfs_find_dead_roots(fs_info->tree_root, |
1212 | root->root_key.objectid); | 1226 | root->root_key.objectid); |
1213 | BUG_ON(ret); | 1227 | WARN_ON(ret); |
1228 | |||
1229 | if (!(fs_info->sb->s_flags & MS_RDONLY)) | ||
1214 | btrfs_orphan_cleanup(root); | 1230 | btrfs_orphan_cleanup(root); |
1215 | } | 1231 | |
1216 | return root; | 1232 | return root; |
1233 | fail: | ||
1234 | free_fs_root(root); | ||
1235 | return ERR_PTR(ret); | ||
1217 | } | 1236 | } |
1218 | 1237 | ||
1219 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, | 1238 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, |
1220 | struct btrfs_key *location, | 1239 | struct btrfs_key *location, |
1221 | const char *name, int namelen) | 1240 | const char *name, int namelen) |
1222 | { | 1241 | { |
1242 | return btrfs_read_fs_root_no_name(fs_info, location); | ||
1243 | #if 0 | ||
1223 | struct btrfs_root *root; | 1244 | struct btrfs_root *root; |
1224 | int ret; | 1245 | int ret; |
1225 | 1246 | ||
@@ -1236,7 +1257,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, | |||
1236 | kfree(root); | 1257 | kfree(root); |
1237 | return ERR_PTR(ret); | 1258 | return ERR_PTR(ret); |
1238 | } | 1259 | } |
1239 | #if 0 | 1260 | |
1240 | ret = btrfs_sysfs_add_root(root); | 1261 | ret = btrfs_sysfs_add_root(root); |
1241 | if (ret) { | 1262 | if (ret) { |
1242 | free_extent_buffer(root->node); | 1263 | free_extent_buffer(root->node); |
@@ -1244,9 +1265,9 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, | |||
1244 | kfree(root); | 1265 | kfree(root); |
1245 | return ERR_PTR(ret); | 1266 | return ERR_PTR(ret); |
1246 | } | 1267 | } |
1247 | #endif | ||
1248 | root->in_sysfs = 1; | 1268 | root->in_sysfs = 1; |
1249 | return root; | 1269 | return root; |
1270 | #endif | ||
1250 | } | 1271 | } |
1251 | 1272 | ||
1252 | static int btrfs_congested_fn(void *congested_data, int bdi_bits) | 1273 | static int btrfs_congested_fn(void *congested_data, int bdi_bits) |
@@ -1325,9 +1346,9 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | |||
1325 | offset = page_offset(page); | 1346 | offset = page_offset(page); |
1326 | 1347 | ||
1327 | em_tree = &BTRFS_I(inode)->extent_tree; | 1348 | em_tree = &BTRFS_I(inode)->extent_tree; |
1328 | spin_lock(&em_tree->lock); | 1349 | read_lock(&em_tree->lock); |
1329 | em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); | 1350 | em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); |
1330 | spin_unlock(&em_tree->lock); | 1351 | read_unlock(&em_tree->lock); |
1331 | if (!em) { | 1352 | if (!em) { |
1332 | __unplug_io_fn(bdi, page); | 1353 | __unplug_io_fn(bdi, page); |
1333 | return; | 1354 | return; |
@@ -1360,8 +1381,10 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | |||
1360 | 1381 | ||
1361 | err = bdi_register(bdi, NULL, "btrfs-%d", | 1382 | err = bdi_register(bdi, NULL, "btrfs-%d", |
1362 | atomic_inc_return(&btrfs_bdi_num)); | 1383 | atomic_inc_return(&btrfs_bdi_num)); |
1363 | if (err) | 1384 | if (err) { |
1385 | bdi_destroy(bdi); | ||
1364 | return err; | 1386 | return err; |
1387 | } | ||
1365 | 1388 | ||
1366 | bdi->ra_pages = default_backing_dev_info.ra_pages; | 1389 | bdi->ra_pages = default_backing_dev_info.ra_pages; |
1367 | bdi->unplug_io_fn = btrfs_unplug_io_fn; | 1390 | bdi->unplug_io_fn = btrfs_unplug_io_fn; |
@@ -1451,9 +1474,12 @@ static int cleaner_kthread(void *arg) | |||
1451 | break; | 1474 | break; |
1452 | 1475 | ||
1453 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1476 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
1454 | mutex_lock(&root->fs_info->cleaner_mutex); | 1477 | |
1455 | btrfs_clean_old_snapshots(root); | 1478 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && |
1456 | mutex_unlock(&root->fs_info->cleaner_mutex); | 1479 | mutex_trylock(&root->fs_info->cleaner_mutex)) { |
1480 | btrfs_clean_old_snapshots(root); | ||
1481 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
1482 | } | ||
1457 | 1483 | ||
1458 | if (freezing(current)) { | 1484 | if (freezing(current)) { |
1459 | refrigerator(); | 1485 | refrigerator(); |
@@ -1558,15 +1584,36 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1558 | err = -ENOMEM; | 1584 | err = -ENOMEM; |
1559 | goto fail; | 1585 | goto fail; |
1560 | } | 1586 | } |
1561 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); | 1587 | |
1588 | ret = init_srcu_struct(&fs_info->subvol_srcu); | ||
1589 | if (ret) { | ||
1590 | err = ret; | ||
1591 | goto fail; | ||
1592 | } | ||
1593 | |||
1594 | ret = setup_bdi(fs_info, &fs_info->bdi); | ||
1595 | if (ret) { | ||
1596 | err = ret; | ||
1597 | goto fail_srcu; | ||
1598 | } | ||
1599 | |||
1600 | fs_info->btree_inode = new_inode(sb); | ||
1601 | if (!fs_info->btree_inode) { | ||
1602 | err = -ENOMEM; | ||
1603 | goto fail_bdi; | ||
1604 | } | ||
1605 | |||
1606 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); | ||
1562 | INIT_LIST_HEAD(&fs_info->trans_list); | 1607 | INIT_LIST_HEAD(&fs_info->trans_list); |
1563 | INIT_LIST_HEAD(&fs_info->dead_roots); | 1608 | INIT_LIST_HEAD(&fs_info->dead_roots); |
1564 | INIT_LIST_HEAD(&fs_info->hashers); | 1609 | INIT_LIST_HEAD(&fs_info->hashers); |
1565 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | 1610 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); |
1566 | INIT_LIST_HEAD(&fs_info->ordered_operations); | 1611 | INIT_LIST_HEAD(&fs_info->ordered_operations); |
1612 | INIT_LIST_HEAD(&fs_info->caching_block_groups); | ||
1567 | spin_lock_init(&fs_info->delalloc_lock); | 1613 | spin_lock_init(&fs_info->delalloc_lock); |
1568 | spin_lock_init(&fs_info->new_trans_lock); | 1614 | spin_lock_init(&fs_info->new_trans_lock); |
1569 | spin_lock_init(&fs_info->ref_cache_lock); | 1615 | spin_lock_init(&fs_info->ref_cache_lock); |
1616 | spin_lock_init(&fs_info->fs_roots_radix_lock); | ||
1570 | 1617 | ||
1571 | init_completion(&fs_info->kobj_unregister); | 1618 | init_completion(&fs_info->kobj_unregister); |
1572 | fs_info->tree_root = tree_root; | 1619 | fs_info->tree_root = tree_root; |
@@ -1585,12 +1632,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1585 | fs_info->sb = sb; | 1632 | fs_info->sb = sb; |
1586 | fs_info->max_extent = (u64)-1; | 1633 | fs_info->max_extent = (u64)-1; |
1587 | fs_info->max_inline = 8192 * 1024; | 1634 | fs_info->max_inline = 8192 * 1024; |
1588 | if (setup_bdi(fs_info, &fs_info->bdi)) | 1635 | fs_info->metadata_ratio = 0; |
1589 | goto fail_bdi; | ||
1590 | fs_info->btree_inode = new_inode(sb); | ||
1591 | fs_info->btree_inode->i_ino = 1; | ||
1592 | fs_info->btree_inode->i_nlink = 1; | ||
1593 | fs_info->metadata_ratio = 8; | ||
1594 | 1636 | ||
1595 | fs_info->thread_pool_size = min_t(unsigned long, | 1637 | fs_info->thread_pool_size = min_t(unsigned long, |
1596 | num_online_cpus() + 2, 8); | 1638 | num_online_cpus() + 2, 8); |
@@ -1602,6 +1644,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1602 | sb->s_blocksize_bits = blksize_bits(4096); | 1644 | sb->s_blocksize_bits = blksize_bits(4096); |
1603 | sb->s_bdi = &fs_info->bdi; | 1645 | sb->s_bdi = &fs_info->bdi; |
1604 | 1646 | ||
1647 | fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; | ||
1648 | fs_info->btree_inode->i_nlink = 1; | ||
1605 | /* | 1649 | /* |
1606 | * we set the i_size on the btree inode to the max possible int. | 1650 | * we set the i_size on the btree inode to the max possible int. |
1607 | * the real end of the address space is determined by all of | 1651 | * the real end of the address space is determined by all of |
@@ -1620,28 +1664,32 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1620 | 1664 | ||
1621 | BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; | 1665 | BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; |
1622 | 1666 | ||
1667 | BTRFS_I(fs_info->btree_inode)->root = tree_root; | ||
1668 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, | ||
1669 | sizeof(struct btrfs_key)); | ||
1670 | BTRFS_I(fs_info->btree_inode)->dummy_inode = 1; | ||
1671 | insert_inode_hash(fs_info->btree_inode); | ||
1672 | |||
1623 | spin_lock_init(&fs_info->block_group_cache_lock); | 1673 | spin_lock_init(&fs_info->block_group_cache_lock); |
1624 | fs_info->block_group_cache_tree.rb_node = NULL; | 1674 | fs_info->block_group_cache_tree.rb_node = NULL; |
1625 | 1675 | ||
1626 | extent_io_tree_init(&fs_info->pinned_extents, | 1676 | extent_io_tree_init(&fs_info->freed_extents[0], |
1627 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 1677 | fs_info->btree_inode->i_mapping, GFP_NOFS); |
1678 | extent_io_tree_init(&fs_info->freed_extents[1], | ||
1679 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
1680 | fs_info->pinned_extents = &fs_info->freed_extents[0]; | ||
1628 | fs_info->do_barriers = 1; | 1681 | fs_info->do_barriers = 1; |
1629 | 1682 | ||
1630 | BTRFS_I(fs_info->btree_inode)->root = tree_root; | ||
1631 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, | ||
1632 | sizeof(struct btrfs_key)); | ||
1633 | insert_inode_hash(fs_info->btree_inode); | ||
1634 | 1683 | ||
1635 | mutex_init(&fs_info->trans_mutex); | 1684 | mutex_init(&fs_info->trans_mutex); |
1636 | mutex_init(&fs_info->ordered_operations_mutex); | 1685 | mutex_init(&fs_info->ordered_operations_mutex); |
1637 | mutex_init(&fs_info->tree_log_mutex); | 1686 | mutex_init(&fs_info->tree_log_mutex); |
1638 | mutex_init(&fs_info->drop_mutex); | ||
1639 | mutex_init(&fs_info->chunk_mutex); | 1687 | mutex_init(&fs_info->chunk_mutex); |
1640 | mutex_init(&fs_info->transaction_kthread_mutex); | 1688 | mutex_init(&fs_info->transaction_kthread_mutex); |
1641 | mutex_init(&fs_info->cleaner_mutex); | 1689 | mutex_init(&fs_info->cleaner_mutex); |
1642 | mutex_init(&fs_info->volume_mutex); | 1690 | mutex_init(&fs_info->volume_mutex); |
1643 | mutex_init(&fs_info->tree_reloc_mutex); | ||
1644 | init_rwsem(&fs_info->extent_commit_sem); | 1691 | init_rwsem(&fs_info->extent_commit_sem); |
1692 | init_rwsem(&fs_info->subvol_sem); | ||
1645 | 1693 | ||
1646 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); | 1694 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); |
1647 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); | 1695 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); |
@@ -1701,20 +1749,24 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1701 | goto fail_iput; | 1749 | goto fail_iput; |
1702 | } | 1750 | } |
1703 | 1751 | ||
1704 | /* | 1752 | btrfs_init_workers(&fs_info->generic_worker, |
1705 | * we need to start all the end_io workers up front because the | 1753 | "genwork", 1, NULL); |
1706 | * queue work function gets called at interrupt time, and so it | 1754 | |
1707 | * cannot dynamically grow. | ||
1708 | */ | ||
1709 | btrfs_init_workers(&fs_info->workers, "worker", | 1755 | btrfs_init_workers(&fs_info->workers, "worker", |
1710 | fs_info->thread_pool_size); | 1756 | fs_info->thread_pool_size, |
1757 | &fs_info->generic_worker); | ||
1711 | 1758 | ||
1712 | btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", | 1759 | btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", |
1713 | fs_info->thread_pool_size); | 1760 | fs_info->thread_pool_size, |
1761 | &fs_info->generic_worker); | ||
1714 | 1762 | ||
1715 | btrfs_init_workers(&fs_info->submit_workers, "submit", | 1763 | btrfs_init_workers(&fs_info->submit_workers, "submit", |
1716 | min_t(u64, fs_devices->num_devices, | 1764 | min_t(u64, fs_devices->num_devices, |
1717 | fs_info->thread_pool_size)); | 1765 | fs_info->thread_pool_size), |
1766 | &fs_info->generic_worker); | ||
1767 | btrfs_init_workers(&fs_info->enospc_workers, "enospc", | ||
1768 | fs_info->thread_pool_size, | ||
1769 | &fs_info->generic_worker); | ||
1718 | 1770 | ||
1719 | /* a higher idle thresh on the submit workers makes it much more | 1771 | /* a higher idle thresh on the submit workers makes it much more |
1720 | * likely that bios will be send down in a sane order to the | 1772 | * likely that bios will be send down in a sane order to the |
@@ -1728,15 +1780,20 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1728 | fs_info->delalloc_workers.idle_thresh = 2; | 1780 | fs_info->delalloc_workers.idle_thresh = 2; |
1729 | fs_info->delalloc_workers.ordered = 1; | 1781 | fs_info->delalloc_workers.ordered = 1; |
1730 | 1782 | ||
1731 | btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); | 1783 | btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1, |
1784 | &fs_info->generic_worker); | ||
1732 | btrfs_init_workers(&fs_info->endio_workers, "endio", | 1785 | btrfs_init_workers(&fs_info->endio_workers, "endio", |
1733 | fs_info->thread_pool_size); | 1786 | fs_info->thread_pool_size, |
1787 | &fs_info->generic_worker); | ||
1734 | btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta", | 1788 | btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta", |
1735 | fs_info->thread_pool_size); | 1789 | fs_info->thread_pool_size, |
1790 | &fs_info->generic_worker); | ||
1736 | btrfs_init_workers(&fs_info->endio_meta_write_workers, | 1791 | btrfs_init_workers(&fs_info->endio_meta_write_workers, |
1737 | "endio-meta-write", fs_info->thread_pool_size); | 1792 | "endio-meta-write", fs_info->thread_pool_size, |
1793 | &fs_info->generic_worker); | ||
1738 | btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", | 1794 | btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", |
1739 | fs_info->thread_pool_size); | 1795 | fs_info->thread_pool_size, |
1796 | &fs_info->generic_worker); | ||
1740 | 1797 | ||
1741 | /* | 1798 | /* |
1742 | * endios are largely parallel and should have a very | 1799 | * endios are largely parallel and should have a very |
@@ -1745,20 +1802,19 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1745 | fs_info->endio_workers.idle_thresh = 4; | 1802 | fs_info->endio_workers.idle_thresh = 4; |
1746 | fs_info->endio_meta_workers.idle_thresh = 4; | 1803 | fs_info->endio_meta_workers.idle_thresh = 4; |
1747 | 1804 | ||
1748 | fs_info->endio_write_workers.idle_thresh = 64; | 1805 | fs_info->endio_write_workers.idle_thresh = 2; |
1749 | fs_info->endio_meta_write_workers.idle_thresh = 64; | 1806 | fs_info->endio_meta_write_workers.idle_thresh = 2; |
1750 | 1807 | ||
1751 | btrfs_start_workers(&fs_info->workers, 1); | 1808 | btrfs_start_workers(&fs_info->workers, 1); |
1809 | btrfs_start_workers(&fs_info->generic_worker, 1); | ||
1752 | btrfs_start_workers(&fs_info->submit_workers, 1); | 1810 | btrfs_start_workers(&fs_info->submit_workers, 1); |
1753 | btrfs_start_workers(&fs_info->delalloc_workers, 1); | 1811 | btrfs_start_workers(&fs_info->delalloc_workers, 1); |
1754 | btrfs_start_workers(&fs_info->fixup_workers, 1); | 1812 | btrfs_start_workers(&fs_info->fixup_workers, 1); |
1755 | btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); | 1813 | btrfs_start_workers(&fs_info->endio_workers, 1); |
1756 | btrfs_start_workers(&fs_info->endio_meta_workers, | 1814 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); |
1757 | fs_info->thread_pool_size); | 1815 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); |
1758 | btrfs_start_workers(&fs_info->endio_meta_write_workers, | 1816 | btrfs_start_workers(&fs_info->endio_write_workers, 1); |
1759 | fs_info->thread_pool_size); | 1817 | btrfs_start_workers(&fs_info->enospc_workers, 1); |
1760 | btrfs_start_workers(&fs_info->endio_write_workers, | ||
1761 | fs_info->thread_pool_size); | ||
1762 | 1818 | ||
1763 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 1819 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
1764 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 1820 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
@@ -1918,6 +1974,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1918 | } | 1974 | } |
1919 | } | 1975 | } |
1920 | 1976 | ||
1977 | ret = btrfs_find_orphan_roots(tree_root); | ||
1978 | BUG_ON(ret); | ||
1979 | |||
1921 | if (!(sb->s_flags & MS_RDONLY)) { | 1980 | if (!(sb->s_flags & MS_RDONLY)) { |
1922 | ret = btrfs_recover_relocation(tree_root); | 1981 | ret = btrfs_recover_relocation(tree_root); |
1923 | BUG_ON(ret); | 1982 | BUG_ON(ret); |
@@ -1961,6 +2020,7 @@ fail_chunk_root: | |||
1961 | free_extent_buffer(chunk_root->node); | 2020 | free_extent_buffer(chunk_root->node); |
1962 | free_extent_buffer(chunk_root->commit_root); | 2021 | free_extent_buffer(chunk_root->commit_root); |
1963 | fail_sb_buffer: | 2022 | fail_sb_buffer: |
2023 | btrfs_stop_workers(&fs_info->generic_worker); | ||
1964 | btrfs_stop_workers(&fs_info->fixup_workers); | 2024 | btrfs_stop_workers(&fs_info->fixup_workers); |
1965 | btrfs_stop_workers(&fs_info->delalloc_workers); | 2025 | btrfs_stop_workers(&fs_info->delalloc_workers); |
1966 | btrfs_stop_workers(&fs_info->workers); | 2026 | btrfs_stop_workers(&fs_info->workers); |
@@ -1969,6 +2029,7 @@ fail_sb_buffer: | |||
1969 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2029 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
1970 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2030 | btrfs_stop_workers(&fs_info->endio_write_workers); |
1971 | btrfs_stop_workers(&fs_info->submit_workers); | 2031 | btrfs_stop_workers(&fs_info->submit_workers); |
2032 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
1972 | fail_iput: | 2033 | fail_iput: |
1973 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 2034 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
1974 | iput(fs_info->btree_inode); | 2035 | iput(fs_info->btree_inode); |
@@ -1977,6 +2038,8 @@ fail_iput: | |||
1977 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2038 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
1978 | fail_bdi: | 2039 | fail_bdi: |
1979 | bdi_destroy(&fs_info->bdi); | 2040 | bdi_destroy(&fs_info->bdi); |
2041 | fail_srcu: | ||
2042 | cleanup_srcu_struct(&fs_info->subvol_srcu); | ||
1980 | fail: | 2043 | fail: |
1981 | kfree(extent_root); | 2044 | kfree(extent_root); |
1982 | kfree(tree_root); | 2045 | kfree(tree_root); |
@@ -2236,20 +2299,29 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
2236 | 2299 | ||
2237 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | 2300 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) |
2238 | { | 2301 | { |
2239 | WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); | 2302 | spin_lock(&fs_info->fs_roots_radix_lock); |
2240 | radix_tree_delete(&fs_info->fs_roots_radix, | 2303 | radix_tree_delete(&fs_info->fs_roots_radix, |
2241 | (unsigned long)root->root_key.objectid); | 2304 | (unsigned long)root->root_key.objectid); |
2305 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
2306 | |||
2307 | if (btrfs_root_refs(&root->root_item) == 0) | ||
2308 | synchronize_srcu(&fs_info->subvol_srcu); | ||
2309 | |||
2310 | free_fs_root(root); | ||
2311 | return 0; | ||
2312 | } | ||
2313 | |||
2314 | static void free_fs_root(struct btrfs_root *root) | ||
2315 | { | ||
2316 | WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); | ||
2242 | if (root->anon_super.s_dev) { | 2317 | if (root->anon_super.s_dev) { |
2243 | down_write(&root->anon_super.s_umount); | 2318 | down_write(&root->anon_super.s_umount); |
2244 | kill_anon_super(&root->anon_super); | 2319 | kill_anon_super(&root->anon_super); |
2245 | } | 2320 | } |
2246 | if (root->node) | 2321 | free_extent_buffer(root->node); |
2247 | free_extent_buffer(root->node); | 2322 | free_extent_buffer(root->commit_root); |
2248 | if (root->commit_root) | ||
2249 | free_extent_buffer(root->commit_root); | ||
2250 | kfree(root->name); | 2323 | kfree(root->name); |
2251 | kfree(root); | 2324 | kfree(root); |
2252 | return 0; | ||
2253 | } | 2325 | } |
2254 | 2326 | ||
2255 | static int del_fs_roots(struct btrfs_fs_info *fs_info) | 2327 | static int del_fs_roots(struct btrfs_fs_info *fs_info) |
@@ -2258,6 +2330,20 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info) | |||
2258 | struct btrfs_root *gang[8]; | 2330 | struct btrfs_root *gang[8]; |
2259 | int i; | 2331 | int i; |
2260 | 2332 | ||
2333 | while (!list_empty(&fs_info->dead_roots)) { | ||
2334 | gang[0] = list_entry(fs_info->dead_roots.next, | ||
2335 | struct btrfs_root, root_list); | ||
2336 | list_del(&gang[0]->root_list); | ||
2337 | |||
2338 | if (gang[0]->in_radix) { | ||
2339 | btrfs_free_fs_root(fs_info, gang[0]); | ||
2340 | } else { | ||
2341 | free_extent_buffer(gang[0]->node); | ||
2342 | free_extent_buffer(gang[0]->commit_root); | ||
2343 | kfree(gang[0]); | ||
2344 | } | ||
2345 | } | ||
2346 | |||
2261 | while (1) { | 2347 | while (1) { |
2262 | ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, | 2348 | ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, |
2263 | (void **)gang, 0, | 2349 | (void **)gang, 0, |
@@ -2287,9 +2373,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) | |||
2287 | root_objectid = gang[ret - 1]->root_key.objectid + 1; | 2373 | root_objectid = gang[ret - 1]->root_key.objectid + 1; |
2288 | for (i = 0; i < ret; i++) { | 2374 | for (i = 0; i < ret; i++) { |
2289 | root_objectid = gang[i]->root_key.objectid; | 2375 | root_objectid = gang[i]->root_key.objectid; |
2290 | ret = btrfs_find_dead_roots(fs_info->tree_root, | ||
2291 | root_objectid); | ||
2292 | BUG_ON(ret); | ||
2293 | btrfs_orphan_cleanup(gang[i]); | 2376 | btrfs_orphan_cleanup(gang[i]); |
2294 | } | 2377 | } |
2295 | root_objectid++; | 2378 | root_objectid++; |
@@ -2359,12 +2442,12 @@ int close_ctree(struct btrfs_root *root) | |||
2359 | free_extent_buffer(root->fs_info->csum_root->commit_root); | 2442 | free_extent_buffer(root->fs_info->csum_root->commit_root); |
2360 | 2443 | ||
2361 | btrfs_free_block_groups(root->fs_info); | 2444 | btrfs_free_block_groups(root->fs_info); |
2362 | btrfs_free_pinned_extents(root->fs_info); | ||
2363 | 2445 | ||
2364 | del_fs_roots(fs_info); | 2446 | del_fs_roots(fs_info); |
2365 | 2447 | ||
2366 | iput(fs_info->btree_inode); | 2448 | iput(fs_info->btree_inode); |
2367 | 2449 | ||
2450 | btrfs_stop_workers(&fs_info->generic_worker); | ||
2368 | btrfs_stop_workers(&fs_info->fixup_workers); | 2451 | btrfs_stop_workers(&fs_info->fixup_workers); |
2369 | btrfs_stop_workers(&fs_info->delalloc_workers); | 2452 | btrfs_stop_workers(&fs_info->delalloc_workers); |
2370 | btrfs_stop_workers(&fs_info->workers); | 2453 | btrfs_stop_workers(&fs_info->workers); |
@@ -2373,11 +2456,13 @@ int close_ctree(struct btrfs_root *root) | |||
2373 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2456 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
2374 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2457 | btrfs_stop_workers(&fs_info->endio_write_workers); |
2375 | btrfs_stop_workers(&fs_info->submit_workers); | 2458 | btrfs_stop_workers(&fs_info->submit_workers); |
2459 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
2376 | 2460 | ||
2377 | btrfs_close_devices(fs_info->fs_devices); | 2461 | btrfs_close_devices(fs_info->fs_devices); |
2378 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2462 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
2379 | 2463 | ||
2380 | bdi_destroy(&fs_info->bdi); | 2464 | bdi_destroy(&fs_info->bdi); |
2465 | cleanup_srcu_struct(&fs_info->subvol_srcu); | ||
2381 | 2466 | ||
2382 | kfree(fs_info->extent_root); | 2467 | kfree(fs_info->extent_root); |
2383 | kfree(fs_info->tree_root); | 2468 | kfree(fs_info->tree_root); |
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 9596b40caa4e..ba5c3fd5ab8c 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c | |||
@@ -28,7 +28,7 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, | |||
28 | len = BTRFS_FID_SIZE_NON_CONNECTABLE; | 28 | len = BTRFS_FID_SIZE_NON_CONNECTABLE; |
29 | type = FILEID_BTRFS_WITHOUT_PARENT; | 29 | type = FILEID_BTRFS_WITHOUT_PARENT; |
30 | 30 | ||
31 | fid->objectid = BTRFS_I(inode)->location.objectid; | 31 | fid->objectid = inode->i_ino; |
32 | fid->root_objectid = BTRFS_I(inode)->root->objectid; | 32 | fid->root_objectid = BTRFS_I(inode)->root->objectid; |
33 | fid->gen = inode->i_generation; | 33 | fid->gen = inode->i_generation; |
34 | 34 | ||
@@ -60,34 +60,61 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, | |||
60 | } | 60 | } |
61 | 61 | ||
62 | static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | 62 | static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, |
63 | u64 root_objectid, u32 generation) | 63 | u64 root_objectid, u32 generation, |
64 | int check_generation) | ||
64 | { | 65 | { |
66 | struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info; | ||
65 | struct btrfs_root *root; | 67 | struct btrfs_root *root; |
68 | struct dentry *dentry; | ||
66 | struct inode *inode; | 69 | struct inode *inode; |
67 | struct btrfs_key key; | 70 | struct btrfs_key key; |
71 | int index; | ||
72 | int err = 0; | ||
73 | |||
74 | if (objectid < BTRFS_FIRST_FREE_OBJECTID) | ||
75 | return ERR_PTR(-ESTALE); | ||
68 | 76 | ||
69 | key.objectid = root_objectid; | 77 | key.objectid = root_objectid; |
70 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 78 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); |
71 | key.offset = (u64)-1; | 79 | key.offset = (u64)-1; |
72 | 80 | ||
73 | root = btrfs_read_fs_root_no_name(btrfs_sb(sb)->fs_info, &key); | 81 | index = srcu_read_lock(&fs_info->subvol_srcu); |
74 | if (IS_ERR(root)) | 82 | |
75 | return ERR_CAST(root); | 83 | root = btrfs_read_fs_root_no_name(fs_info, &key); |
84 | if (IS_ERR(root)) { | ||
85 | err = PTR_ERR(root); | ||
86 | goto fail; | ||
87 | } | ||
88 | |||
89 | if (btrfs_root_refs(&root->root_item) == 0) { | ||
90 | err = -ENOENT; | ||
91 | goto fail; | ||
92 | } | ||
76 | 93 | ||
77 | key.objectid = objectid; | 94 | key.objectid = objectid; |
78 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 95 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); |
79 | key.offset = 0; | 96 | key.offset = 0; |
80 | 97 | ||
81 | inode = btrfs_iget(sb, &key, root); | 98 | inode = btrfs_iget(sb, &key, root); |
82 | if (IS_ERR(inode)) | 99 | if (IS_ERR(inode)) { |
83 | return (void *)inode; | 100 | err = PTR_ERR(inode); |
101 | goto fail; | ||
102 | } | ||
103 | |||
104 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
84 | 105 | ||
85 | if (generation != inode->i_generation) { | 106 | if (check_generation && generation != inode->i_generation) { |
86 | iput(inode); | 107 | iput(inode); |
87 | return ERR_PTR(-ESTALE); | 108 | return ERR_PTR(-ESTALE); |
88 | } | 109 | } |
89 | 110 | ||
90 | return d_obtain_alias(inode); | 111 | dentry = d_obtain_alias(inode); |
112 | if (!IS_ERR(dentry)) | ||
113 | dentry->d_op = &btrfs_dentry_operations; | ||
114 | return dentry; | ||
115 | fail: | ||
116 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
117 | return ERR_PTR(err); | ||
91 | } | 118 | } |
92 | 119 | ||
93 | static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, | 120 | static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, |
@@ -111,7 +138,7 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, | |||
111 | objectid = fid->parent_objectid; | 138 | objectid = fid->parent_objectid; |
112 | generation = fid->parent_gen; | 139 | generation = fid->parent_gen; |
113 | 140 | ||
114 | return btrfs_get_dentry(sb, objectid, root_objectid, generation); | 141 | return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1); |
115 | } | 142 | } |
116 | 143 | ||
117 | static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, | 144 | static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, |
@@ -133,66 +160,76 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, | |||
133 | root_objectid = fid->root_objectid; | 160 | root_objectid = fid->root_objectid; |
134 | generation = fid->gen; | 161 | generation = fid->gen; |
135 | 162 | ||
136 | return btrfs_get_dentry(sb, objectid, root_objectid, generation); | 163 | return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1); |
137 | } | 164 | } |
138 | 165 | ||
139 | static struct dentry *btrfs_get_parent(struct dentry *child) | 166 | static struct dentry *btrfs_get_parent(struct dentry *child) |
140 | { | 167 | { |
141 | struct inode *dir = child->d_inode; | 168 | struct inode *dir = child->d_inode; |
169 | static struct dentry *dentry; | ||
142 | struct btrfs_root *root = BTRFS_I(dir)->root; | 170 | struct btrfs_root *root = BTRFS_I(dir)->root; |
143 | struct btrfs_key key; | ||
144 | struct btrfs_path *path; | 171 | struct btrfs_path *path; |
145 | struct extent_buffer *leaf; | 172 | struct extent_buffer *leaf; |
146 | int slot; | 173 | struct btrfs_root_ref *ref; |
147 | u64 objectid; | 174 | struct btrfs_key key; |
175 | struct btrfs_key found_key; | ||
148 | int ret; | 176 | int ret; |
149 | 177 | ||
150 | path = btrfs_alloc_path(); | 178 | path = btrfs_alloc_path(); |
151 | 179 | ||
152 | key.objectid = dir->i_ino; | 180 | if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) { |
153 | btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); | 181 | key.objectid = root->root_key.objectid; |
154 | key.offset = (u64)-1; | 182 | key.type = BTRFS_ROOT_BACKREF_KEY; |
183 | key.offset = (u64)-1; | ||
184 | root = root->fs_info->tree_root; | ||
185 | } else { | ||
186 | key.objectid = dir->i_ino; | ||
187 | key.type = BTRFS_INODE_REF_KEY; | ||
188 | key.offset = (u64)-1; | ||
189 | } | ||
155 | 190 | ||
156 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 191 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
157 | if (ret < 0) { | 192 | if (ret < 0) |
158 | /* Error */ | 193 | goto fail; |
159 | btrfs_free_path(path); | 194 | |
160 | return ERR_PTR(ret); | 195 | BUG_ON(ret == 0); |
196 | if (path->slots[0] == 0) { | ||
197 | ret = -ENOENT; | ||
198 | goto fail; | ||
161 | } | 199 | } |
200 | |||
201 | path->slots[0]--; | ||
162 | leaf = path->nodes[0]; | 202 | leaf = path->nodes[0]; |
163 | slot = path->slots[0]; | 203 | |
164 | if (ret) { | 204 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
165 | /* btrfs_search_slot() returns the slot where we'd want to | 205 | if (found_key.objectid != key.objectid || found_key.type != key.type) { |
166 | insert a backref for parent inode #0xFFFFFFFFFFFFFFFF. | 206 | ret = -ENOENT; |
167 | The _real_ backref, telling us what the parent inode | 207 | goto fail; |
168 | _actually_ is, will be in the slot _before_ the one | ||
169 | that btrfs_search_slot() returns. */ | ||
170 | if (!slot) { | ||
171 | /* Unless there is _no_ key in the tree before... */ | ||
172 | btrfs_free_path(path); | ||
173 | return ERR_PTR(-EIO); | ||
174 | } | ||
175 | slot--; | ||
176 | } | 208 | } |
177 | 209 | ||
178 | btrfs_item_key_to_cpu(leaf, &key, slot); | 210 | if (found_key.type == BTRFS_ROOT_BACKREF_KEY) { |
211 | ref = btrfs_item_ptr(leaf, path->slots[0], | ||
212 | struct btrfs_root_ref); | ||
213 | key.objectid = btrfs_root_ref_dirid(leaf, ref); | ||
214 | } else { | ||
215 | key.objectid = found_key.offset; | ||
216 | } | ||
179 | btrfs_free_path(path); | 217 | btrfs_free_path(path); |
180 | 218 | ||
181 | if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY) | 219 | if (found_key.type == BTRFS_ROOT_BACKREF_KEY) { |
182 | return ERR_PTR(-EINVAL); | 220 | return btrfs_get_dentry(root->fs_info->sb, key.objectid, |
183 | 221 | found_key.offset, 0, 0); | |
184 | objectid = key.offset; | 222 | } |
185 | |||
186 | /* If we are already at the root of a subvol, return the real root */ | ||
187 | if (objectid == dir->i_ino) | ||
188 | return dget(dir->i_sb->s_root); | ||
189 | 223 | ||
190 | /* Build a new key for the inode item */ | 224 | key.type = BTRFS_INODE_ITEM_KEY; |
191 | key.objectid = objectid; | ||
192 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
193 | key.offset = 0; | 225 | key.offset = 0; |
194 | 226 | dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root)); | |
195 | return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root)); | 227 | if (!IS_ERR(dentry)) |
228 | dentry->d_op = &btrfs_dentry_operations; | ||
229 | return dentry; | ||
230 | fail: | ||
231 | btrfs_free_path(path); | ||
232 | return ERR_PTR(ret); | ||
196 | } | 233 | } |
197 | 234 | ||
198 | const struct export_operations btrfs_export_ops = { | 235 | const struct export_operations btrfs_export_ops = { |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 535f85ba104f..e238a0cdac67 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -32,12 +32,12 @@ | |||
32 | #include "locking.h" | 32 | #include "locking.h" |
33 | #include "free-space-cache.h" | 33 | #include "free-space-cache.h" |
34 | 34 | ||
35 | static int update_reserved_extents(struct btrfs_root *root, | ||
36 | u64 bytenr, u64 num, int reserve); | ||
37 | static int update_block_group(struct btrfs_trans_handle *trans, | 35 | static int update_block_group(struct btrfs_trans_handle *trans, |
38 | struct btrfs_root *root, | 36 | struct btrfs_root *root, |
39 | u64 bytenr, u64 num_bytes, int alloc, | 37 | u64 bytenr, u64 num_bytes, int alloc, |
40 | int mark_free); | 38 | int mark_free); |
39 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, | ||
40 | u64 num_bytes, int reserve); | ||
41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
42 | struct btrfs_root *root, | 42 | struct btrfs_root *root, |
43 | u64 bytenr, u64 num_bytes, u64 parent, | 43 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -57,10 +57,19 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
57 | u64 parent, u64 root_objectid, | 57 | u64 parent, u64 root_objectid, |
58 | u64 flags, struct btrfs_disk_key *key, | 58 | u64 flags, struct btrfs_disk_key *key, |
59 | int level, struct btrfs_key *ins); | 59 | int level, struct btrfs_key *ins); |
60 | |||
61 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 60 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
62 | struct btrfs_root *extent_root, u64 alloc_bytes, | 61 | struct btrfs_root *extent_root, u64 alloc_bytes, |
63 | u64 flags, int force); | 62 | u64 flags, int force); |
63 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | ||
64 | struct btrfs_root *root, | ||
65 | struct btrfs_path *path, | ||
66 | u64 bytenr, u64 num_bytes, | ||
67 | int is_data, int reserved, | ||
68 | struct extent_buffer **must_clean); | ||
69 | static int find_next_key(struct btrfs_path *path, int level, | ||
70 | struct btrfs_key *key); | ||
71 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | ||
72 | int dump_block_groups); | ||
64 | 73 | ||
65 | static noinline int | 74 | static noinline int |
66 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 75 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
@@ -153,34 +162,34 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, | |||
153 | return ret; | 162 | return ret; |
154 | } | 163 | } |
155 | 164 | ||
156 | /* | 165 | static int add_excluded_extent(struct btrfs_root *root, |
157 | * We always set EXTENT_LOCKED for the super mirror extents so we don't | 166 | u64 start, u64 num_bytes) |
158 | * overwrite them, so those bits need to be unset. Also, if we are unmounting | ||
159 | * with pinned extents still sitting there because we had a block group caching, | ||
160 | * we need to clear those now, since we are done. | ||
161 | */ | ||
162 | void btrfs_free_pinned_extents(struct btrfs_fs_info *info) | ||
163 | { | 167 | { |
164 | u64 start, end, last = 0; | 168 | u64 end = start + num_bytes - 1; |
165 | int ret; | 169 | set_extent_bits(&root->fs_info->freed_extents[0], |
170 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
171 | set_extent_bits(&root->fs_info->freed_extents[1], | ||
172 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
173 | return 0; | ||
174 | } | ||
166 | 175 | ||
167 | while (1) { | 176 | static void free_excluded_extents(struct btrfs_root *root, |
168 | ret = find_first_extent_bit(&info->pinned_extents, last, | 177 | struct btrfs_block_group_cache *cache) |
169 | &start, &end, | 178 | { |
170 | EXTENT_LOCKED|EXTENT_DIRTY); | 179 | u64 start, end; |
171 | if (ret) | ||
172 | break; | ||
173 | 180 | ||
174 | clear_extent_bits(&info->pinned_extents, start, end, | 181 | start = cache->key.objectid; |
175 | EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS); | 182 | end = start + cache->key.offset - 1; |
176 | last = end+1; | 183 | |
177 | } | 184 | clear_extent_bits(&root->fs_info->freed_extents[0], |
185 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
186 | clear_extent_bits(&root->fs_info->freed_extents[1], | ||
187 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
178 | } | 188 | } |
179 | 189 | ||
180 | static int remove_sb_from_cache(struct btrfs_root *root, | 190 | static int exclude_super_stripes(struct btrfs_root *root, |
181 | struct btrfs_block_group_cache *cache) | 191 | struct btrfs_block_group_cache *cache) |
182 | { | 192 | { |
183 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
184 | u64 bytenr; | 193 | u64 bytenr; |
185 | u64 *logical; | 194 | u64 *logical; |
186 | int stripe_len; | 195 | int stripe_len; |
@@ -192,17 +201,42 @@ static int remove_sb_from_cache(struct btrfs_root *root, | |||
192 | cache->key.objectid, bytenr, | 201 | cache->key.objectid, bytenr, |
193 | 0, &logical, &nr, &stripe_len); | 202 | 0, &logical, &nr, &stripe_len); |
194 | BUG_ON(ret); | 203 | BUG_ON(ret); |
204 | |||
195 | while (nr--) { | 205 | while (nr--) { |
196 | try_lock_extent(&fs_info->pinned_extents, | 206 | cache->bytes_super += stripe_len; |
197 | logical[nr], | 207 | ret = add_excluded_extent(root, logical[nr], |
198 | logical[nr] + stripe_len - 1, GFP_NOFS); | 208 | stripe_len); |
209 | BUG_ON(ret); | ||
199 | } | 210 | } |
211 | |||
200 | kfree(logical); | 212 | kfree(logical); |
201 | } | 213 | } |
202 | |||
203 | return 0; | 214 | return 0; |
204 | } | 215 | } |
205 | 216 | ||
217 | static struct btrfs_caching_control * | ||
218 | get_caching_control(struct btrfs_block_group_cache *cache) | ||
219 | { | ||
220 | struct btrfs_caching_control *ctl; | ||
221 | |||
222 | spin_lock(&cache->lock); | ||
223 | if (cache->cached != BTRFS_CACHE_STARTED) { | ||
224 | spin_unlock(&cache->lock); | ||
225 | return NULL; | ||
226 | } | ||
227 | |||
228 | ctl = cache->caching_ctl; | ||
229 | atomic_inc(&ctl->count); | ||
230 | spin_unlock(&cache->lock); | ||
231 | return ctl; | ||
232 | } | ||
233 | |||
234 | static void put_caching_control(struct btrfs_caching_control *ctl) | ||
235 | { | ||
236 | if (atomic_dec_and_test(&ctl->count)) | ||
237 | kfree(ctl); | ||
238 | } | ||
239 | |||
206 | /* | 240 | /* |
207 | * this is only called by cache_block_group, since we could have freed extents | 241 | * this is only called by cache_block_group, since we could have freed extents |
208 | * we need to check the pinned_extents for any extents that can't be used yet | 242 | * we need to check the pinned_extents for any extents that can't be used yet |
@@ -215,9 +249,9 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
215 | int ret; | 249 | int ret; |
216 | 250 | ||
217 | while (start < end) { | 251 | while (start < end) { |
218 | ret = find_first_extent_bit(&info->pinned_extents, start, | 252 | ret = find_first_extent_bit(info->pinned_extents, start, |
219 | &extent_start, &extent_end, | 253 | &extent_start, &extent_end, |
220 | EXTENT_DIRTY|EXTENT_LOCKED); | 254 | EXTENT_DIRTY | EXTENT_UPTODATE); |
221 | if (ret) | 255 | if (ret) |
222 | break; | 256 | break; |
223 | 257 | ||
@@ -249,22 +283,27 @@ static int caching_kthread(void *data) | |||
249 | { | 283 | { |
250 | struct btrfs_block_group_cache *block_group = data; | 284 | struct btrfs_block_group_cache *block_group = data; |
251 | struct btrfs_fs_info *fs_info = block_group->fs_info; | 285 | struct btrfs_fs_info *fs_info = block_group->fs_info; |
252 | u64 last = 0; | 286 | struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; |
287 | struct btrfs_root *extent_root = fs_info->extent_root; | ||
253 | struct btrfs_path *path; | 288 | struct btrfs_path *path; |
254 | int ret = 0; | ||
255 | struct btrfs_key key; | ||
256 | struct extent_buffer *leaf; | 289 | struct extent_buffer *leaf; |
257 | int slot; | 290 | struct btrfs_key key; |
258 | u64 total_found = 0; | 291 | u64 total_found = 0; |
259 | 292 | u64 last = 0; | |
260 | BUG_ON(!fs_info); | 293 | u32 nritems; |
294 | int ret = 0; | ||
261 | 295 | ||
262 | path = btrfs_alloc_path(); | 296 | path = btrfs_alloc_path(); |
263 | if (!path) | 297 | if (!path) |
264 | return -ENOMEM; | 298 | return -ENOMEM; |
265 | 299 | ||
266 | atomic_inc(&block_group->space_info->caching_threads); | 300 | exclude_super_stripes(extent_root, block_group); |
301 | spin_lock(&block_group->space_info->lock); | ||
302 | block_group->space_info->bytes_super += block_group->bytes_super; | ||
303 | spin_unlock(&block_group->space_info->lock); | ||
304 | |||
267 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 305 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
306 | |||
268 | /* | 307 | /* |
269 | * We don't want to deadlock with somebody trying to allocate a new | 308 | * We don't want to deadlock with somebody trying to allocate a new |
270 | * extent for the extent root while also trying to search the extent | 309 | * extent for the extent root while also trying to search the extent |
@@ -277,74 +316,64 @@ static int caching_kthread(void *data) | |||
277 | 316 | ||
278 | key.objectid = last; | 317 | key.objectid = last; |
279 | key.offset = 0; | 318 | key.offset = 0; |
280 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | 319 | key.type = BTRFS_EXTENT_ITEM_KEY; |
281 | again: | 320 | again: |
321 | mutex_lock(&caching_ctl->mutex); | ||
282 | /* need to make sure the commit_root doesn't disappear */ | 322 | /* need to make sure the commit_root doesn't disappear */ |
283 | down_read(&fs_info->extent_commit_sem); | 323 | down_read(&fs_info->extent_commit_sem); |
284 | 324 | ||
285 | ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); | 325 | ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); |
286 | if (ret < 0) | 326 | if (ret < 0) |
287 | goto err; | 327 | goto err; |
288 | 328 | ||
329 | leaf = path->nodes[0]; | ||
330 | nritems = btrfs_header_nritems(leaf); | ||
331 | |||
289 | while (1) { | 332 | while (1) { |
290 | smp_mb(); | 333 | smp_mb(); |
291 | if (block_group->fs_info->closing > 1) { | 334 | if (fs_info->closing > 1) { |
292 | last = (u64)-1; | 335 | last = (u64)-1; |
293 | break; | 336 | break; |
294 | } | 337 | } |
295 | 338 | ||
296 | leaf = path->nodes[0]; | 339 | if (path->slots[0] < nritems) { |
297 | slot = path->slots[0]; | 340 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
298 | if (slot >= btrfs_header_nritems(leaf)) { | 341 | } else { |
299 | ret = btrfs_next_leaf(fs_info->extent_root, path); | 342 | ret = find_next_key(path, 0, &key); |
300 | if (ret < 0) | 343 | if (ret) |
301 | goto err; | ||
302 | else if (ret) | ||
303 | break; | 344 | break; |
304 | 345 | ||
305 | if (need_resched() || | 346 | caching_ctl->progress = last; |
306 | btrfs_transaction_in_commit(fs_info)) { | 347 | btrfs_release_path(extent_root, path); |
307 | leaf = path->nodes[0]; | 348 | up_read(&fs_info->extent_commit_sem); |
308 | 349 | mutex_unlock(&caching_ctl->mutex); | |
309 | /* this shouldn't happen, but if the | 350 | if (btrfs_transaction_in_commit(fs_info)) |
310 | * leaf is empty just move on. | ||
311 | */ | ||
312 | if (btrfs_header_nritems(leaf) == 0) | ||
313 | break; | ||
314 | /* | ||
315 | * we need to copy the key out so that | ||
316 | * we are sure the next search advances | ||
317 | * us forward in the btree. | ||
318 | */ | ||
319 | btrfs_item_key_to_cpu(leaf, &key, 0); | ||
320 | btrfs_release_path(fs_info->extent_root, path); | ||
321 | up_read(&fs_info->extent_commit_sem); | ||
322 | schedule_timeout(1); | 351 | schedule_timeout(1); |
323 | goto again; | 352 | else |
324 | } | 353 | cond_resched(); |
354 | goto again; | ||
355 | } | ||
325 | 356 | ||
357 | if (key.objectid < block_group->key.objectid) { | ||
358 | path->slots[0]++; | ||
326 | continue; | 359 | continue; |
327 | } | 360 | } |
328 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
329 | if (key.objectid < block_group->key.objectid) | ||
330 | goto next; | ||
331 | 361 | ||
332 | if (key.objectid >= block_group->key.objectid + | 362 | if (key.objectid >= block_group->key.objectid + |
333 | block_group->key.offset) | 363 | block_group->key.offset) |
334 | break; | 364 | break; |
335 | 365 | ||
336 | if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { | 366 | if (key.type == BTRFS_EXTENT_ITEM_KEY) { |
337 | total_found += add_new_free_space(block_group, | 367 | total_found += add_new_free_space(block_group, |
338 | fs_info, last, | 368 | fs_info, last, |
339 | key.objectid); | 369 | key.objectid); |
340 | last = key.objectid + key.offset; | 370 | last = key.objectid + key.offset; |
341 | } | ||
342 | 371 | ||
343 | if (total_found > (1024 * 1024 * 2)) { | 372 | if (total_found > (1024 * 1024 * 2)) { |
344 | total_found = 0; | 373 | total_found = 0; |
345 | wake_up(&block_group->caching_q); | 374 | wake_up(&caching_ctl->wait); |
375 | } | ||
346 | } | 376 | } |
347 | next: | ||
348 | path->slots[0]++; | 377 | path->slots[0]++; |
349 | } | 378 | } |
350 | ret = 0; | 379 | ret = 0; |
@@ -352,33 +381,65 @@ next: | |||
352 | total_found += add_new_free_space(block_group, fs_info, last, | 381 | total_found += add_new_free_space(block_group, fs_info, last, |
353 | block_group->key.objectid + | 382 | block_group->key.objectid + |
354 | block_group->key.offset); | 383 | block_group->key.offset); |
384 | caching_ctl->progress = (u64)-1; | ||
355 | 385 | ||
356 | spin_lock(&block_group->lock); | 386 | spin_lock(&block_group->lock); |
387 | block_group->caching_ctl = NULL; | ||
357 | block_group->cached = BTRFS_CACHE_FINISHED; | 388 | block_group->cached = BTRFS_CACHE_FINISHED; |
358 | spin_unlock(&block_group->lock); | 389 | spin_unlock(&block_group->lock); |
359 | 390 | ||
360 | err: | 391 | err: |
361 | btrfs_free_path(path); | 392 | btrfs_free_path(path); |
362 | up_read(&fs_info->extent_commit_sem); | 393 | up_read(&fs_info->extent_commit_sem); |
363 | atomic_dec(&block_group->space_info->caching_threads); | ||
364 | wake_up(&block_group->caching_q); | ||
365 | 394 | ||
395 | free_excluded_extents(extent_root, block_group); | ||
396 | |||
397 | mutex_unlock(&caching_ctl->mutex); | ||
398 | wake_up(&caching_ctl->wait); | ||
399 | |||
400 | put_caching_control(caching_ctl); | ||
401 | atomic_dec(&block_group->space_info->caching_threads); | ||
366 | return 0; | 402 | return 0; |
367 | } | 403 | } |
368 | 404 | ||
369 | static int cache_block_group(struct btrfs_block_group_cache *cache) | 405 | static int cache_block_group(struct btrfs_block_group_cache *cache) |
370 | { | 406 | { |
407 | struct btrfs_fs_info *fs_info = cache->fs_info; | ||
408 | struct btrfs_caching_control *caching_ctl; | ||
371 | struct task_struct *tsk; | 409 | struct task_struct *tsk; |
372 | int ret = 0; | 410 | int ret = 0; |
373 | 411 | ||
412 | smp_mb(); | ||
413 | if (cache->cached != BTRFS_CACHE_NO) | ||
414 | return 0; | ||
415 | |||
416 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); | ||
417 | BUG_ON(!caching_ctl); | ||
418 | |||
419 | INIT_LIST_HEAD(&caching_ctl->list); | ||
420 | mutex_init(&caching_ctl->mutex); | ||
421 | init_waitqueue_head(&caching_ctl->wait); | ||
422 | caching_ctl->block_group = cache; | ||
423 | caching_ctl->progress = cache->key.objectid; | ||
424 | /* one for caching kthread, one for caching block group list */ | ||
425 | atomic_set(&caching_ctl->count, 2); | ||
426 | |||
374 | spin_lock(&cache->lock); | 427 | spin_lock(&cache->lock); |
375 | if (cache->cached != BTRFS_CACHE_NO) { | 428 | if (cache->cached != BTRFS_CACHE_NO) { |
376 | spin_unlock(&cache->lock); | 429 | spin_unlock(&cache->lock); |
377 | return ret; | 430 | kfree(caching_ctl); |
431 | return 0; | ||
378 | } | 432 | } |
433 | cache->caching_ctl = caching_ctl; | ||
379 | cache->cached = BTRFS_CACHE_STARTED; | 434 | cache->cached = BTRFS_CACHE_STARTED; |
380 | spin_unlock(&cache->lock); | 435 | spin_unlock(&cache->lock); |
381 | 436 | ||
437 | down_write(&fs_info->extent_commit_sem); | ||
438 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); | ||
439 | up_write(&fs_info->extent_commit_sem); | ||
440 | |||
441 | atomic_inc(&cache->space_info->caching_threads); | ||
442 | |||
382 | tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", | 443 | tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", |
383 | cache->key.objectid); | 444 | cache->key.objectid); |
384 | if (IS_ERR(tsk)) { | 445 | if (IS_ERR(tsk)) { |
@@ -1507,23 +1568,23 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
1507 | return ret; | 1568 | return ret; |
1508 | } | 1569 | } |
1509 | 1570 | ||
1510 | #ifdef BIO_RW_DISCARD | ||
1511 | static void btrfs_issue_discard(struct block_device *bdev, | 1571 | static void btrfs_issue_discard(struct block_device *bdev, |
1512 | u64 start, u64 len) | 1572 | u64 start, u64 len) |
1513 | { | 1573 | { |
1514 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, | 1574 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, |
1515 | DISCARD_FL_BARRIER); | 1575 | DISCARD_FL_BARRIER); |
1516 | } | 1576 | } |
1517 | #endif | ||
1518 | 1577 | ||
1519 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | 1578 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, |
1520 | u64 num_bytes) | 1579 | u64 num_bytes) |
1521 | { | 1580 | { |
1522 | #ifdef BIO_RW_DISCARD | ||
1523 | int ret; | 1581 | int ret; |
1524 | u64 map_length = num_bytes; | 1582 | u64 map_length = num_bytes; |
1525 | struct btrfs_multi_bio *multi = NULL; | 1583 | struct btrfs_multi_bio *multi = NULL; |
1526 | 1584 | ||
1585 | if (!btrfs_test_opt(root, DISCARD)) | ||
1586 | return 0; | ||
1587 | |||
1527 | /* Tell the block device(s) that the sectors can be discarded */ | 1588 | /* Tell the block device(s) that the sectors can be discarded */ |
1528 | ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, | 1589 | ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, |
1529 | bytenr, &map_length, &multi, 0); | 1590 | bytenr, &map_length, &multi, 0); |
@@ -1543,9 +1604,6 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
1543 | } | 1604 | } |
1544 | 1605 | ||
1545 | return ret; | 1606 | return ret; |
1546 | #else | ||
1547 | return 0; | ||
1548 | #endif | ||
1549 | } | 1607 | } |
1550 | 1608 | ||
1551 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 1609 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
@@ -1657,7 +1715,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, | |||
1657 | parent, ref_root, flags, | 1715 | parent, ref_root, flags, |
1658 | ref->objectid, ref->offset, | 1716 | ref->objectid, ref->offset, |
1659 | &ins, node->ref_mod); | 1717 | &ins, node->ref_mod); |
1660 | update_reserved_extents(root, ins.objectid, ins.offset, 0); | ||
1661 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { | 1718 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { |
1662 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, | 1719 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, |
1663 | node->num_bytes, parent, | 1720 | node->num_bytes, parent, |
@@ -1783,7 +1840,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
1783 | extent_op->flags_to_set, | 1840 | extent_op->flags_to_set, |
1784 | &extent_op->key, | 1841 | &extent_op->key, |
1785 | ref->level, &ins); | 1842 | ref->level, &ins); |
1786 | update_reserved_extents(root, ins.objectid, ins.offset, 0); | ||
1787 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { | 1843 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { |
1788 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, | 1844 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, |
1789 | node->num_bytes, parent, ref_root, | 1845 | node->num_bytes, parent, ref_root, |
@@ -1818,16 +1874,32 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | |||
1818 | BUG_ON(extent_op); | 1874 | BUG_ON(extent_op); |
1819 | head = btrfs_delayed_node_to_head(node); | 1875 | head = btrfs_delayed_node_to_head(node); |
1820 | if (insert_reserved) { | 1876 | if (insert_reserved) { |
1877 | int mark_free = 0; | ||
1878 | struct extent_buffer *must_clean = NULL; | ||
1879 | |||
1880 | ret = pin_down_bytes(trans, root, NULL, | ||
1881 | node->bytenr, node->num_bytes, | ||
1882 | head->is_data, 1, &must_clean); | ||
1883 | if (ret > 0) | ||
1884 | mark_free = 1; | ||
1885 | |||
1886 | if (must_clean) { | ||
1887 | clean_tree_block(NULL, root, must_clean); | ||
1888 | btrfs_tree_unlock(must_clean); | ||
1889 | free_extent_buffer(must_clean); | ||
1890 | } | ||
1821 | if (head->is_data) { | 1891 | if (head->is_data) { |
1822 | ret = btrfs_del_csums(trans, root, | 1892 | ret = btrfs_del_csums(trans, root, |
1823 | node->bytenr, | 1893 | node->bytenr, |
1824 | node->num_bytes); | 1894 | node->num_bytes); |
1825 | BUG_ON(ret); | 1895 | BUG_ON(ret); |
1826 | } | 1896 | } |
1827 | btrfs_update_pinned_extents(root, node->bytenr, | 1897 | if (mark_free) { |
1828 | node->num_bytes, 1); | 1898 | ret = btrfs_free_reserved_extent(root, |
1829 | update_reserved_extents(root, node->bytenr, | 1899 | node->bytenr, |
1830 | node->num_bytes, 0); | 1900 | node->num_bytes); |
1901 | BUG_ON(ret); | ||
1902 | } | ||
1831 | } | 1903 | } |
1832 | mutex_unlock(&head->mutex); | 1904 | mutex_unlock(&head->mutex); |
1833 | return 0; | 1905 | return 0; |
@@ -2692,60 +2764,448 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) | |||
2692 | alloc_target); | 2764 | alloc_target); |
2693 | } | 2765 | } |
2694 | 2766 | ||
2767 | static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) | ||
2768 | { | ||
2769 | u64 num_bytes; | ||
2770 | int level; | ||
2771 | |||
2772 | level = BTRFS_MAX_LEVEL - 2; | ||
2773 | /* | ||
2774 | * NOTE: these calculations are absolutely the worst possible case. | ||
2775 | * This assumes that _every_ item we insert will require a new leaf, and | ||
2776 | * that the tree has grown to its maximum level size. | ||
2777 | */ | ||
2778 | |||
2779 | /* | ||
2780 | * for every item we insert we could insert both an extent item and a | ||
2781 | * extent ref item. Then for ever item we insert, we will need to cow | ||
2782 | * both the original leaf, plus the leaf to the left and right of it. | ||
2783 | * | ||
2784 | * Unless we are talking about the extent root, then we just want the | ||
2785 | * number of items * 2, since we just need the extent item plus its ref. | ||
2786 | */ | ||
2787 | if (root == root->fs_info->extent_root) | ||
2788 | num_bytes = num_items * 2; | ||
2789 | else | ||
2790 | num_bytes = (num_items + (2 * num_items)) * 3; | ||
2791 | |||
2792 | /* | ||
2793 | * num_bytes is total number of leaves we could need times the leaf | ||
2794 | * size, and then for every leaf we could end up cow'ing 2 nodes per | ||
2795 | * level, down to the leaf level. | ||
2796 | */ | ||
2797 | num_bytes = (num_bytes * root->leafsize) + | ||
2798 | (num_bytes * (level * 2)) * root->nodesize; | ||
2799 | |||
2800 | return num_bytes; | ||
2801 | } | ||
2802 | |||
2695 | /* | 2803 | /* |
2696 | * for now this just makes sure we have at least 5% of our metadata space free | 2804 | * Unreserve metadata space for delalloc. If we have less reserved credits than |
2697 | * for use. | 2805 | * we have extents, this function does nothing. |
2698 | */ | 2806 | */ |
2699 | int btrfs_check_metadata_free_space(struct btrfs_root *root) | 2807 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, |
2808 | struct inode *inode, int num_items) | ||
2700 | { | 2809 | { |
2701 | struct btrfs_fs_info *info = root->fs_info; | 2810 | struct btrfs_fs_info *info = root->fs_info; |
2702 | struct btrfs_space_info *meta_sinfo; | 2811 | struct btrfs_space_info *meta_sinfo; |
2703 | u64 alloc_target, thresh; | 2812 | u64 num_bytes; |
2704 | int committed = 0, ret; | 2813 | u64 alloc_target; |
2814 | bool bug = false; | ||
2705 | 2815 | ||
2706 | /* get the space info for where the metadata will live */ | 2816 | /* get the space info for where the metadata will live */ |
2707 | alloc_target = btrfs_get_alloc_profile(root, 0); | 2817 | alloc_target = btrfs_get_alloc_profile(root, 0); |
2708 | meta_sinfo = __find_space_info(info, alloc_target); | 2818 | meta_sinfo = __find_space_info(info, alloc_target); |
2709 | 2819 | ||
2710 | again: | 2820 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, |
2821 | num_items); | ||
2822 | |||
2711 | spin_lock(&meta_sinfo->lock); | 2823 | spin_lock(&meta_sinfo->lock); |
2712 | if (!meta_sinfo->full) | 2824 | spin_lock(&BTRFS_I(inode)->accounting_lock); |
2713 | thresh = meta_sinfo->total_bytes * 80; | 2825 | if (BTRFS_I(inode)->reserved_extents <= |
2714 | else | 2826 | BTRFS_I(inode)->outstanding_extents) { |
2715 | thresh = meta_sinfo->total_bytes * 95; | 2827 | spin_unlock(&BTRFS_I(inode)->accounting_lock); |
2828 | spin_unlock(&meta_sinfo->lock); | ||
2829 | return 0; | ||
2830 | } | ||
2831 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
2832 | |||
2833 | BTRFS_I(inode)->reserved_extents--; | ||
2834 | BUG_ON(BTRFS_I(inode)->reserved_extents < 0); | ||
2835 | |||
2836 | if (meta_sinfo->bytes_delalloc < num_bytes) { | ||
2837 | bug = true; | ||
2838 | meta_sinfo->bytes_delalloc = 0; | ||
2839 | } else { | ||
2840 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
2841 | } | ||
2842 | spin_unlock(&meta_sinfo->lock); | ||
2843 | |||
2844 | BUG_ON(bug); | ||
2845 | |||
2846 | return 0; | ||
2847 | } | ||
2848 | |||
2849 | static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) | ||
2850 | { | ||
2851 | u64 thresh; | ||
2852 | |||
2853 | thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
2854 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
2855 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
2856 | meta_sinfo->bytes_may_use; | ||
2716 | 2857 | ||
2858 | thresh = meta_sinfo->total_bytes - thresh; | ||
2859 | thresh *= 80; | ||
2717 | do_div(thresh, 100); | 2860 | do_div(thresh, 100); |
2861 | if (thresh <= meta_sinfo->bytes_delalloc) | ||
2862 | meta_sinfo->force_delalloc = 1; | ||
2863 | else | ||
2864 | meta_sinfo->force_delalloc = 0; | ||
2865 | } | ||
2718 | 2866 | ||
2719 | if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | 2867 | struct async_flush { |
2720 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { | 2868 | struct btrfs_root *root; |
2721 | struct btrfs_trans_handle *trans; | 2869 | struct btrfs_space_info *info; |
2722 | if (!meta_sinfo->full) { | 2870 | struct btrfs_work work; |
2723 | meta_sinfo->force_alloc = 1; | 2871 | }; |
2724 | spin_unlock(&meta_sinfo->lock); | ||
2725 | 2872 | ||
2726 | trans = btrfs_start_transaction(root, 1); | 2873 | static noinline void flush_delalloc_async(struct btrfs_work *work) |
2727 | if (!trans) | 2874 | { |
2728 | return -ENOMEM; | 2875 | struct async_flush *async; |
2876 | struct btrfs_root *root; | ||
2877 | struct btrfs_space_info *info; | ||
2729 | 2878 | ||
2730 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 2879 | async = container_of(work, struct async_flush, work); |
2731 | 2 * 1024 * 1024, alloc_target, 0); | 2880 | root = async->root; |
2732 | btrfs_end_transaction(trans, root); | 2881 | info = async->info; |
2882 | |||
2883 | btrfs_start_delalloc_inodes(root); | ||
2884 | wake_up(&info->flush_wait); | ||
2885 | btrfs_wait_ordered_extents(root, 0); | ||
2886 | |||
2887 | spin_lock(&info->lock); | ||
2888 | info->flushing = 0; | ||
2889 | spin_unlock(&info->lock); | ||
2890 | wake_up(&info->flush_wait); | ||
2891 | |||
2892 | kfree(async); | ||
2893 | } | ||
2894 | |||
2895 | static void wait_on_flush(struct btrfs_space_info *info) | ||
2896 | { | ||
2897 | DEFINE_WAIT(wait); | ||
2898 | u64 used; | ||
2899 | |||
2900 | while (1) { | ||
2901 | prepare_to_wait(&info->flush_wait, &wait, | ||
2902 | TASK_UNINTERRUPTIBLE); | ||
2903 | spin_lock(&info->lock); | ||
2904 | if (!info->flushing) { | ||
2905 | spin_unlock(&info->lock); | ||
2906 | break; | ||
2907 | } | ||
2908 | |||
2909 | used = info->bytes_used + info->bytes_reserved + | ||
2910 | info->bytes_pinned + info->bytes_readonly + | ||
2911 | info->bytes_super + info->bytes_root + | ||
2912 | info->bytes_may_use + info->bytes_delalloc; | ||
2913 | if (used < info->total_bytes) { | ||
2914 | spin_unlock(&info->lock); | ||
2915 | break; | ||
2916 | } | ||
2917 | spin_unlock(&info->lock); | ||
2918 | schedule(); | ||
2919 | } | ||
2920 | finish_wait(&info->flush_wait, &wait); | ||
2921 | } | ||
2922 | |||
2923 | static void flush_delalloc(struct btrfs_root *root, | ||
2924 | struct btrfs_space_info *info) | ||
2925 | { | ||
2926 | struct async_flush *async; | ||
2927 | bool wait = false; | ||
2928 | |||
2929 | spin_lock(&info->lock); | ||
2930 | |||
2931 | if (!info->flushing) { | ||
2932 | info->flushing = 1; | ||
2933 | init_waitqueue_head(&info->flush_wait); | ||
2934 | } else { | ||
2935 | wait = true; | ||
2936 | } | ||
2937 | |||
2938 | spin_unlock(&info->lock); | ||
2939 | |||
2940 | if (wait) { | ||
2941 | wait_on_flush(info); | ||
2942 | return; | ||
2943 | } | ||
2944 | |||
2945 | async = kzalloc(sizeof(*async), GFP_NOFS); | ||
2946 | if (!async) | ||
2947 | goto flush; | ||
2948 | |||
2949 | async->root = root; | ||
2950 | async->info = info; | ||
2951 | async->work.func = flush_delalloc_async; | ||
2952 | |||
2953 | btrfs_queue_worker(&root->fs_info->enospc_workers, | ||
2954 | &async->work); | ||
2955 | wait_on_flush(info); | ||
2956 | return; | ||
2957 | |||
2958 | flush: | ||
2959 | btrfs_start_delalloc_inodes(root); | ||
2960 | btrfs_wait_ordered_extents(root, 0); | ||
2961 | |||
2962 | spin_lock(&info->lock); | ||
2963 | info->flushing = 0; | ||
2964 | spin_unlock(&info->lock); | ||
2965 | wake_up(&info->flush_wait); | ||
2966 | } | ||
2967 | |||
2968 | static int maybe_allocate_chunk(struct btrfs_root *root, | ||
2969 | struct btrfs_space_info *info) | ||
2970 | { | ||
2971 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; | ||
2972 | struct btrfs_trans_handle *trans; | ||
2973 | bool wait = false; | ||
2974 | int ret = 0; | ||
2975 | u64 min_metadata; | ||
2976 | u64 free_space; | ||
2977 | |||
2978 | free_space = btrfs_super_total_bytes(disk_super); | ||
2979 | /* | ||
2980 | * we allow the metadata to grow to a max of either 5gb or 5% of the | ||
2981 | * space in the volume. | ||
2982 | */ | ||
2983 | min_metadata = min((u64)5 * 1024 * 1024 * 1024, | ||
2984 | div64_u64(free_space * 5, 100)); | ||
2985 | if (info->total_bytes >= min_metadata) { | ||
2986 | spin_unlock(&info->lock); | ||
2987 | return 0; | ||
2988 | } | ||
2989 | |||
2990 | if (info->full) { | ||
2991 | spin_unlock(&info->lock); | ||
2992 | return 0; | ||
2993 | } | ||
2994 | |||
2995 | if (!info->allocating_chunk) { | ||
2996 | info->force_alloc = 1; | ||
2997 | info->allocating_chunk = 1; | ||
2998 | init_waitqueue_head(&info->allocate_wait); | ||
2999 | } else { | ||
3000 | wait = true; | ||
3001 | } | ||
3002 | |||
3003 | spin_unlock(&info->lock); | ||
3004 | |||
3005 | if (wait) { | ||
3006 | wait_event(info->allocate_wait, | ||
3007 | !info->allocating_chunk); | ||
3008 | return 1; | ||
3009 | } | ||
3010 | |||
3011 | trans = btrfs_start_transaction(root, 1); | ||
3012 | if (!trans) { | ||
3013 | ret = -ENOMEM; | ||
3014 | goto out; | ||
3015 | } | ||
3016 | |||
3017 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
3018 | 4096 + 2 * 1024 * 1024, | ||
3019 | info->flags, 0); | ||
3020 | btrfs_end_transaction(trans, root); | ||
3021 | if (ret) | ||
3022 | goto out; | ||
3023 | out: | ||
3024 | spin_lock(&info->lock); | ||
3025 | info->allocating_chunk = 0; | ||
3026 | spin_unlock(&info->lock); | ||
3027 | wake_up(&info->allocate_wait); | ||
3028 | |||
3029 | if (ret) | ||
3030 | return 0; | ||
3031 | return 1; | ||
3032 | } | ||
3033 | |||
3034 | /* | ||
3035 | * Reserve metadata space for delalloc. | ||
3036 | */ | ||
3037 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
3038 | struct inode *inode, int num_items) | ||
3039 | { | ||
3040 | struct btrfs_fs_info *info = root->fs_info; | ||
3041 | struct btrfs_space_info *meta_sinfo; | ||
3042 | u64 num_bytes; | ||
3043 | u64 used; | ||
3044 | u64 alloc_target; | ||
3045 | int flushed = 0; | ||
3046 | int force_delalloc; | ||
3047 | |||
3048 | /* get the space info for where the metadata will live */ | ||
3049 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3050 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3051 | |||
3052 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | ||
3053 | num_items); | ||
3054 | again: | ||
3055 | spin_lock(&meta_sinfo->lock); | ||
3056 | |||
3057 | force_delalloc = meta_sinfo->force_delalloc; | ||
3058 | |||
3059 | if (unlikely(!meta_sinfo->bytes_root)) | ||
3060 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
3061 | |||
3062 | if (!flushed) | ||
3063 | meta_sinfo->bytes_delalloc += num_bytes; | ||
3064 | |||
3065 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
3066 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
3067 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
3068 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
3069 | |||
3070 | if (used > meta_sinfo->total_bytes) { | ||
3071 | flushed++; | ||
3072 | |||
3073 | if (flushed == 1) { | ||
3074 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
3075 | goto again; | ||
3076 | flushed++; | ||
3077 | } else { | ||
3078 | spin_unlock(&meta_sinfo->lock); | ||
3079 | } | ||
3080 | |||
3081 | if (flushed == 2) { | ||
3082 | filemap_flush(inode->i_mapping); | ||
3083 | goto again; | ||
3084 | } else if (flushed == 3) { | ||
3085 | flush_delalloc(root, meta_sinfo); | ||
2733 | goto again; | 3086 | goto again; |
2734 | } | 3087 | } |
3088 | spin_lock(&meta_sinfo->lock); | ||
3089 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
2735 | spin_unlock(&meta_sinfo->lock); | 3090 | spin_unlock(&meta_sinfo->lock); |
3091 | printk(KERN_ERR "enospc, has %d, reserved %d\n", | ||
3092 | BTRFS_I(inode)->outstanding_extents, | ||
3093 | BTRFS_I(inode)->reserved_extents); | ||
3094 | dump_space_info(meta_sinfo, 0, 0); | ||
3095 | return -ENOSPC; | ||
3096 | } | ||
2736 | 3097 | ||
2737 | if (!committed) { | 3098 | BTRFS_I(inode)->reserved_extents++; |
2738 | committed = 1; | 3099 | check_force_delalloc(meta_sinfo); |
2739 | trans = btrfs_join_transaction(root, 1); | 3100 | spin_unlock(&meta_sinfo->lock); |
2740 | if (!trans) | 3101 | |
2741 | return -ENOMEM; | 3102 | if (!flushed && force_delalloc) |
2742 | ret = btrfs_commit_transaction(trans, root); | 3103 | filemap_flush(inode->i_mapping); |
2743 | if (ret) | 3104 | |
2744 | return ret; | 3105 | return 0; |
3106 | } | ||
3107 | |||
3108 | /* | ||
3109 | * unreserve num_items number of items worth of metadata space. This needs to | ||
3110 | * be paired with btrfs_reserve_metadata_space. | ||
3111 | * | ||
3112 | * NOTE: if you have the option, run this _AFTER_ you do a | ||
3113 | * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref | ||
3114 | * oprations which will result in more used metadata, so we want to make sure we | ||
3115 | * can do that without issue. | ||
3116 | */ | ||
3117 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) | ||
3118 | { | ||
3119 | struct btrfs_fs_info *info = root->fs_info; | ||
3120 | struct btrfs_space_info *meta_sinfo; | ||
3121 | u64 num_bytes; | ||
3122 | u64 alloc_target; | ||
3123 | bool bug = false; | ||
3124 | |||
3125 | /* get the space info for where the metadata will live */ | ||
3126 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3127 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3128 | |||
3129 | num_bytes = calculate_bytes_needed(root, num_items); | ||
3130 | |||
3131 | spin_lock(&meta_sinfo->lock); | ||
3132 | if (meta_sinfo->bytes_may_use < num_bytes) { | ||
3133 | bug = true; | ||
3134 | meta_sinfo->bytes_may_use = 0; | ||
3135 | } else { | ||
3136 | meta_sinfo->bytes_may_use -= num_bytes; | ||
3137 | } | ||
3138 | spin_unlock(&meta_sinfo->lock); | ||
3139 | |||
3140 | BUG_ON(bug); | ||
3141 | |||
3142 | return 0; | ||
3143 | } | ||
3144 | |||
3145 | /* | ||
3146 | * Reserve some metadata space for use. We'll calculate the worste case number | ||
3147 | * of bytes that would be needed to modify num_items number of items. If we | ||
3148 | * have space, fantastic, if not, you get -ENOSPC. Please call | ||
3149 | * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of | ||
3150 | * items you reserved, since whatever metadata you needed should have already | ||
3151 | * been allocated. | ||
3152 | * | ||
3153 | * This will commit the transaction to make more space if we don't have enough | ||
3154 | * metadata space. THe only time we don't do this is if we're reserving space | ||
3155 | * inside of a transaction, then we will just return -ENOSPC and it is the | ||
3156 | * callers responsibility to handle it properly. | ||
3157 | */ | ||
3158 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) | ||
3159 | { | ||
3160 | struct btrfs_fs_info *info = root->fs_info; | ||
3161 | struct btrfs_space_info *meta_sinfo; | ||
3162 | u64 num_bytes; | ||
3163 | u64 used; | ||
3164 | u64 alloc_target; | ||
3165 | int retries = 0; | ||
3166 | |||
3167 | /* get the space info for where the metadata will live */ | ||
3168 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3169 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3170 | |||
3171 | num_bytes = calculate_bytes_needed(root, num_items); | ||
3172 | again: | ||
3173 | spin_lock(&meta_sinfo->lock); | ||
3174 | |||
3175 | if (unlikely(!meta_sinfo->bytes_root)) | ||
3176 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
3177 | |||
3178 | if (!retries) | ||
3179 | meta_sinfo->bytes_may_use += num_bytes; | ||
3180 | |||
3181 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
3182 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
3183 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
3184 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
3185 | |||
3186 | if (used > meta_sinfo->total_bytes) { | ||
3187 | retries++; | ||
3188 | if (retries == 1) { | ||
3189 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
3190 | goto again; | ||
3191 | retries++; | ||
3192 | } else { | ||
3193 | spin_unlock(&meta_sinfo->lock); | ||
3194 | } | ||
3195 | |||
3196 | if (retries == 2) { | ||
3197 | flush_delalloc(root, meta_sinfo); | ||
2745 | goto again; | 3198 | goto again; |
2746 | } | 3199 | } |
3200 | spin_lock(&meta_sinfo->lock); | ||
3201 | meta_sinfo->bytes_may_use -= num_bytes; | ||
3202 | spin_unlock(&meta_sinfo->lock); | ||
3203 | |||
3204 | dump_space_info(meta_sinfo, 0, 0); | ||
2747 | return -ENOSPC; | 3205 | return -ENOSPC; |
2748 | } | 3206 | } |
3207 | |||
3208 | check_force_delalloc(meta_sinfo); | ||
2749 | spin_unlock(&meta_sinfo->lock); | 3209 | spin_unlock(&meta_sinfo->lock); |
2750 | 3210 | ||
2751 | return 0; | 3211 | return 0; |
@@ -2765,13 +3225,16 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | |||
2765 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 3225 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
2766 | 3226 | ||
2767 | data_sinfo = BTRFS_I(inode)->space_info; | 3227 | data_sinfo = BTRFS_I(inode)->space_info; |
3228 | if (!data_sinfo) | ||
3229 | goto alloc; | ||
3230 | |||
2768 | again: | 3231 | again: |
2769 | /* make sure we have enough space to handle the data first */ | 3232 | /* make sure we have enough space to handle the data first */ |
2770 | spin_lock(&data_sinfo->lock); | 3233 | spin_lock(&data_sinfo->lock); |
2771 | if (data_sinfo->total_bytes - data_sinfo->bytes_used - | 3234 | if (data_sinfo->total_bytes - data_sinfo->bytes_used - |
2772 | data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - | 3235 | data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - |
2773 | data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - | 3236 | data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - |
2774 | data_sinfo->bytes_may_use < bytes) { | 3237 | data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) { |
2775 | struct btrfs_trans_handle *trans; | 3238 | struct btrfs_trans_handle *trans; |
2776 | 3239 | ||
2777 | /* | 3240 | /* |
@@ -2783,7 +3246,7 @@ again: | |||
2783 | 3246 | ||
2784 | data_sinfo->force_alloc = 1; | 3247 | data_sinfo->force_alloc = 1; |
2785 | spin_unlock(&data_sinfo->lock); | 3248 | spin_unlock(&data_sinfo->lock); |
2786 | 3249 | alloc: | |
2787 | alloc_target = btrfs_get_alloc_profile(root, 1); | 3250 | alloc_target = btrfs_get_alloc_profile(root, 1); |
2788 | trans = btrfs_start_transaction(root, 1); | 3251 | trans = btrfs_start_transaction(root, 1); |
2789 | if (!trans) | 3252 | if (!trans) |
@@ -2795,12 +3258,17 @@ again: | |||
2795 | btrfs_end_transaction(trans, root); | 3258 | btrfs_end_transaction(trans, root); |
2796 | if (ret) | 3259 | if (ret) |
2797 | return ret; | 3260 | return ret; |
3261 | |||
3262 | if (!data_sinfo) { | ||
3263 | btrfs_set_inode_space_info(root, inode); | ||
3264 | data_sinfo = BTRFS_I(inode)->space_info; | ||
3265 | } | ||
2798 | goto again; | 3266 | goto again; |
2799 | } | 3267 | } |
2800 | spin_unlock(&data_sinfo->lock); | 3268 | spin_unlock(&data_sinfo->lock); |
2801 | 3269 | ||
2802 | /* commit the current transaction and try again */ | 3270 | /* commit the current transaction and try again */ |
2803 | if (!committed) { | 3271 | if (!committed && !root->fs_info->open_ioctl_trans) { |
2804 | committed = 1; | 3272 | committed = 1; |
2805 | trans = btrfs_join_transaction(root, 1); | 3273 | trans = btrfs_join_transaction(root, 1); |
2806 | if (!trans) | 3274 | if (!trans) |
@@ -2828,7 +3296,7 @@ again: | |||
2828 | BTRFS_I(inode)->reserved_bytes += bytes; | 3296 | BTRFS_I(inode)->reserved_bytes += bytes; |
2829 | spin_unlock(&data_sinfo->lock); | 3297 | spin_unlock(&data_sinfo->lock); |
2830 | 3298 | ||
2831 | return btrfs_check_metadata_free_space(root); | 3299 | return 0; |
2832 | } | 3300 | } |
2833 | 3301 | ||
2834 | /* | 3302 | /* |
@@ -2927,17 +3395,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
2927 | BUG_ON(!space_info); | 3395 | BUG_ON(!space_info); |
2928 | 3396 | ||
2929 | spin_lock(&space_info->lock); | 3397 | spin_lock(&space_info->lock); |
2930 | if (space_info->force_alloc) { | 3398 | if (space_info->force_alloc) |
2931 | force = 1; | 3399 | force = 1; |
2932 | space_info->force_alloc = 0; | ||
2933 | } | ||
2934 | if (space_info->full) { | 3400 | if (space_info->full) { |
2935 | spin_unlock(&space_info->lock); | 3401 | spin_unlock(&space_info->lock); |
2936 | goto out; | 3402 | goto out; |
2937 | } | 3403 | } |
2938 | 3404 | ||
2939 | thresh = space_info->total_bytes - space_info->bytes_readonly; | 3405 | thresh = space_info->total_bytes - space_info->bytes_readonly; |
2940 | thresh = div_factor(thresh, 6); | 3406 | thresh = div_factor(thresh, 8); |
2941 | if (!force && | 3407 | if (!force && |
2942 | (space_info->bytes_used + space_info->bytes_pinned + | 3408 | (space_info->bytes_used + space_info->bytes_pinned + |
2943 | space_info->bytes_reserved + alloc_bytes) < thresh) { | 3409 | space_info->bytes_reserved + alloc_bytes) < thresh) { |
@@ -2951,7 +3417,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
2951 | * we keep a reasonable number of metadata chunks allocated in the | 3417 | * we keep a reasonable number of metadata chunks allocated in the |
2952 | * FS as well. | 3418 | * FS as well. |
2953 | */ | 3419 | */ |
2954 | if (flags & BTRFS_BLOCK_GROUP_DATA) { | 3420 | if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { |
2955 | fs_info->data_chunk_allocations++; | 3421 | fs_info->data_chunk_allocations++; |
2956 | if (!(fs_info->data_chunk_allocations % | 3422 | if (!(fs_info->data_chunk_allocations % |
2957 | fs_info->metadata_ratio)) | 3423 | fs_info->metadata_ratio)) |
@@ -2959,8 +3425,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
2959 | } | 3425 | } |
2960 | 3426 | ||
2961 | ret = btrfs_alloc_chunk(trans, extent_root, flags); | 3427 | ret = btrfs_alloc_chunk(trans, extent_root, flags); |
3428 | spin_lock(&space_info->lock); | ||
2962 | if (ret) | 3429 | if (ret) |
2963 | space_info->full = 1; | 3430 | space_info->full = 1; |
3431 | space_info->force_alloc = 0; | ||
3432 | spin_unlock(&space_info->lock); | ||
2964 | out: | 3433 | out: |
2965 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | 3434 | mutex_unlock(&extent_root->fs_info->chunk_mutex); |
2966 | return ret; | 3435 | return ret; |
@@ -3009,10 +3478,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
3009 | num_bytes = min(total, cache->key.offset - byte_in_group); | 3478 | num_bytes = min(total, cache->key.offset - byte_in_group); |
3010 | if (alloc) { | 3479 | if (alloc) { |
3011 | old_val += num_bytes; | 3480 | old_val += num_bytes; |
3481 | btrfs_set_block_group_used(&cache->item, old_val); | ||
3482 | cache->reserved -= num_bytes; | ||
3012 | cache->space_info->bytes_used += num_bytes; | 3483 | cache->space_info->bytes_used += num_bytes; |
3484 | cache->space_info->bytes_reserved -= num_bytes; | ||
3013 | if (cache->ro) | 3485 | if (cache->ro) |
3014 | cache->space_info->bytes_readonly -= num_bytes; | 3486 | cache->space_info->bytes_readonly -= num_bytes; |
3015 | btrfs_set_block_group_used(&cache->item, old_val); | ||
3016 | spin_unlock(&cache->lock); | 3487 | spin_unlock(&cache->lock); |
3017 | spin_unlock(&cache->space_info->lock); | 3488 | spin_unlock(&cache->space_info->lock); |
3018 | } else { | 3489 | } else { |
@@ -3057,127 +3528,136 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
3057 | return bytenr; | 3528 | return bytenr; |
3058 | } | 3529 | } |
3059 | 3530 | ||
3060 | int btrfs_update_pinned_extents(struct btrfs_root *root, | 3531 | /* |
3061 | u64 bytenr, u64 num, int pin) | 3532 | * this function must be called within transaction |
3533 | */ | ||
3534 | int btrfs_pin_extent(struct btrfs_root *root, | ||
3535 | u64 bytenr, u64 num_bytes, int reserved) | ||
3062 | { | 3536 | { |
3063 | u64 len; | ||
3064 | struct btrfs_block_group_cache *cache; | ||
3065 | struct btrfs_fs_info *fs_info = root->fs_info; | 3537 | struct btrfs_fs_info *fs_info = root->fs_info; |
3538 | struct btrfs_block_group_cache *cache; | ||
3066 | 3539 | ||
3067 | if (pin) | 3540 | cache = btrfs_lookup_block_group(fs_info, bytenr); |
3068 | set_extent_dirty(&fs_info->pinned_extents, | 3541 | BUG_ON(!cache); |
3069 | bytenr, bytenr + num - 1, GFP_NOFS); | ||
3070 | |||
3071 | while (num > 0) { | ||
3072 | cache = btrfs_lookup_block_group(fs_info, bytenr); | ||
3073 | BUG_ON(!cache); | ||
3074 | len = min(num, cache->key.offset - | ||
3075 | (bytenr - cache->key.objectid)); | ||
3076 | if (pin) { | ||
3077 | spin_lock(&cache->space_info->lock); | ||
3078 | spin_lock(&cache->lock); | ||
3079 | cache->pinned += len; | ||
3080 | cache->space_info->bytes_pinned += len; | ||
3081 | spin_unlock(&cache->lock); | ||
3082 | spin_unlock(&cache->space_info->lock); | ||
3083 | fs_info->total_pinned += len; | ||
3084 | } else { | ||
3085 | int unpin = 0; | ||
3086 | 3542 | ||
3087 | /* | 3543 | spin_lock(&cache->space_info->lock); |
3088 | * in order to not race with the block group caching, we | 3544 | spin_lock(&cache->lock); |
3089 | * only want to unpin the extent if we are cached. If | 3545 | cache->pinned += num_bytes; |
3090 | * we aren't cached, we want to start async caching this | 3546 | cache->space_info->bytes_pinned += num_bytes; |
3091 | * block group so we can free the extent the next time | 3547 | if (reserved) { |
3092 | * around. | 3548 | cache->reserved -= num_bytes; |
3093 | */ | 3549 | cache->space_info->bytes_reserved -= num_bytes; |
3094 | spin_lock(&cache->space_info->lock); | 3550 | } |
3095 | spin_lock(&cache->lock); | 3551 | spin_unlock(&cache->lock); |
3096 | unpin = (cache->cached == BTRFS_CACHE_FINISHED); | 3552 | spin_unlock(&cache->space_info->lock); |
3097 | if (likely(unpin)) { | ||
3098 | cache->pinned -= len; | ||
3099 | cache->space_info->bytes_pinned -= len; | ||
3100 | fs_info->total_pinned -= len; | ||
3101 | } | ||
3102 | spin_unlock(&cache->lock); | ||
3103 | spin_unlock(&cache->space_info->lock); | ||
3104 | 3553 | ||
3105 | if (likely(unpin)) | 3554 | btrfs_put_block_group(cache); |
3106 | clear_extent_dirty(&fs_info->pinned_extents, | ||
3107 | bytenr, bytenr + len -1, | ||
3108 | GFP_NOFS); | ||
3109 | else | ||
3110 | cache_block_group(cache); | ||
3111 | 3555 | ||
3112 | if (unpin) | 3556 | set_extent_dirty(fs_info->pinned_extents, |
3113 | btrfs_add_free_space(cache, bytenr, len); | 3557 | bytenr, bytenr + num_bytes - 1, GFP_NOFS); |
3114 | } | 3558 | return 0; |
3115 | btrfs_put_block_group(cache); | 3559 | } |
3116 | bytenr += len; | 3560 | |
3117 | num -= len; | 3561 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, |
3562 | u64 num_bytes, int reserve) | ||
3563 | { | ||
3564 | spin_lock(&cache->space_info->lock); | ||
3565 | spin_lock(&cache->lock); | ||
3566 | if (reserve) { | ||
3567 | cache->reserved += num_bytes; | ||
3568 | cache->space_info->bytes_reserved += num_bytes; | ||
3569 | } else { | ||
3570 | cache->reserved -= num_bytes; | ||
3571 | cache->space_info->bytes_reserved -= num_bytes; | ||
3118 | } | 3572 | } |
3573 | spin_unlock(&cache->lock); | ||
3574 | spin_unlock(&cache->space_info->lock); | ||
3119 | return 0; | 3575 | return 0; |
3120 | } | 3576 | } |
3121 | 3577 | ||
3122 | static int update_reserved_extents(struct btrfs_root *root, | 3578 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
3123 | u64 bytenr, u64 num, int reserve) | 3579 | struct btrfs_root *root) |
3124 | { | 3580 | { |
3125 | u64 len; | ||
3126 | struct btrfs_block_group_cache *cache; | ||
3127 | struct btrfs_fs_info *fs_info = root->fs_info; | 3581 | struct btrfs_fs_info *fs_info = root->fs_info; |
3582 | struct btrfs_caching_control *next; | ||
3583 | struct btrfs_caching_control *caching_ctl; | ||
3584 | struct btrfs_block_group_cache *cache; | ||
3128 | 3585 | ||
3129 | while (num > 0) { | 3586 | down_write(&fs_info->extent_commit_sem); |
3130 | cache = btrfs_lookup_block_group(fs_info, bytenr); | ||
3131 | BUG_ON(!cache); | ||
3132 | len = min(num, cache->key.offset - | ||
3133 | (bytenr - cache->key.objectid)); | ||
3134 | 3587 | ||
3135 | spin_lock(&cache->space_info->lock); | 3588 | list_for_each_entry_safe(caching_ctl, next, |
3136 | spin_lock(&cache->lock); | 3589 | &fs_info->caching_block_groups, list) { |
3137 | if (reserve) { | 3590 | cache = caching_ctl->block_group; |
3138 | cache->reserved += len; | 3591 | if (block_group_cache_done(cache)) { |
3139 | cache->space_info->bytes_reserved += len; | 3592 | cache->last_byte_to_unpin = (u64)-1; |
3593 | list_del_init(&caching_ctl->list); | ||
3594 | put_caching_control(caching_ctl); | ||
3140 | } else { | 3595 | } else { |
3141 | cache->reserved -= len; | 3596 | cache->last_byte_to_unpin = caching_ctl->progress; |
3142 | cache->space_info->bytes_reserved -= len; | ||
3143 | } | 3597 | } |
3144 | spin_unlock(&cache->lock); | ||
3145 | spin_unlock(&cache->space_info->lock); | ||
3146 | btrfs_put_block_group(cache); | ||
3147 | bytenr += len; | ||
3148 | num -= len; | ||
3149 | } | 3598 | } |
3599 | |||
3600 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | ||
3601 | fs_info->pinned_extents = &fs_info->freed_extents[1]; | ||
3602 | else | ||
3603 | fs_info->pinned_extents = &fs_info->freed_extents[0]; | ||
3604 | |||
3605 | up_write(&fs_info->extent_commit_sem); | ||
3150 | return 0; | 3606 | return 0; |
3151 | } | 3607 | } |
3152 | 3608 | ||
3153 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) | 3609 | static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) |
3154 | { | 3610 | { |
3155 | u64 last = 0; | 3611 | struct btrfs_fs_info *fs_info = root->fs_info; |
3156 | u64 start; | 3612 | struct btrfs_block_group_cache *cache = NULL; |
3157 | u64 end; | 3613 | u64 len; |
3158 | struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; | ||
3159 | int ret; | ||
3160 | 3614 | ||
3161 | while (1) { | 3615 | while (start <= end) { |
3162 | ret = find_first_extent_bit(pinned_extents, last, | 3616 | if (!cache || |
3163 | &start, &end, EXTENT_DIRTY); | 3617 | start >= cache->key.objectid + cache->key.offset) { |
3164 | if (ret) | 3618 | if (cache) |
3165 | break; | 3619 | btrfs_put_block_group(cache); |
3620 | cache = btrfs_lookup_block_group(fs_info, start); | ||
3621 | BUG_ON(!cache); | ||
3622 | } | ||
3166 | 3623 | ||
3167 | set_extent_dirty(copy, start, end, GFP_NOFS); | 3624 | len = cache->key.objectid + cache->key.offset - start; |
3168 | last = end + 1; | 3625 | len = min(len, end + 1 - start); |
3626 | |||
3627 | if (start < cache->last_byte_to_unpin) { | ||
3628 | len = min(len, cache->last_byte_to_unpin - start); | ||
3629 | btrfs_add_free_space(cache, start, len); | ||
3630 | } | ||
3631 | |||
3632 | spin_lock(&cache->space_info->lock); | ||
3633 | spin_lock(&cache->lock); | ||
3634 | cache->pinned -= len; | ||
3635 | cache->space_info->bytes_pinned -= len; | ||
3636 | spin_unlock(&cache->lock); | ||
3637 | spin_unlock(&cache->space_info->lock); | ||
3638 | |||
3639 | start += len; | ||
3169 | } | 3640 | } |
3641 | |||
3642 | if (cache) | ||
3643 | btrfs_put_block_group(cache); | ||
3170 | return 0; | 3644 | return 0; |
3171 | } | 3645 | } |
3172 | 3646 | ||
3173 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | 3647 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, |
3174 | struct btrfs_root *root, | 3648 | struct btrfs_root *root) |
3175 | struct extent_io_tree *unpin) | ||
3176 | { | 3649 | { |
3650 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
3651 | struct extent_io_tree *unpin; | ||
3177 | u64 start; | 3652 | u64 start; |
3178 | u64 end; | 3653 | u64 end; |
3179 | int ret; | 3654 | int ret; |
3180 | 3655 | ||
3656 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | ||
3657 | unpin = &fs_info->freed_extents[1]; | ||
3658 | else | ||
3659 | unpin = &fs_info->freed_extents[0]; | ||
3660 | |||
3181 | while (1) { | 3661 | while (1) { |
3182 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 3662 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
3183 | EXTENT_DIRTY); | 3663 | EXTENT_DIRTY); |
@@ -3186,10 +3666,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3186 | 3666 | ||
3187 | ret = btrfs_discard_extent(root, start, end + 1 - start); | 3667 | ret = btrfs_discard_extent(root, start, end + 1 - start); |
3188 | 3668 | ||
3189 | /* unlocks the pinned mutex */ | ||
3190 | btrfs_update_pinned_extents(root, start, end + 1 - start, 0); | ||
3191 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | 3669 | clear_extent_dirty(unpin, start, end, GFP_NOFS); |
3192 | 3670 | unpin_extent_range(root, start, end); | |
3193 | cond_resched(); | 3671 | cond_resched(); |
3194 | } | 3672 | } |
3195 | 3673 | ||
@@ -3199,7 +3677,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3199 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | 3677 | static int pin_down_bytes(struct btrfs_trans_handle *trans, |
3200 | struct btrfs_root *root, | 3678 | struct btrfs_root *root, |
3201 | struct btrfs_path *path, | 3679 | struct btrfs_path *path, |
3202 | u64 bytenr, u64 num_bytes, int is_data, | 3680 | u64 bytenr, u64 num_bytes, |
3681 | int is_data, int reserved, | ||
3203 | struct extent_buffer **must_clean) | 3682 | struct extent_buffer **must_clean) |
3204 | { | 3683 | { |
3205 | int err = 0; | 3684 | int err = 0; |
@@ -3208,6 +3687,14 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
3208 | if (is_data) | 3687 | if (is_data) |
3209 | goto pinit; | 3688 | goto pinit; |
3210 | 3689 | ||
3690 | /* | ||
3691 | * discard is sloooow, and so triggering discards on | ||
3692 | * individual btree blocks isn't a good plan. Just | ||
3693 | * pin everything in discard mode. | ||
3694 | */ | ||
3695 | if (btrfs_test_opt(root, DISCARD)) | ||
3696 | goto pinit; | ||
3697 | |||
3211 | buf = btrfs_find_tree_block(root, bytenr, num_bytes); | 3698 | buf = btrfs_find_tree_block(root, bytenr, num_bytes); |
3212 | if (!buf) | 3699 | if (!buf) |
3213 | goto pinit; | 3700 | goto pinit; |
@@ -3231,15 +3718,15 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
3231 | } | 3718 | } |
3232 | free_extent_buffer(buf); | 3719 | free_extent_buffer(buf); |
3233 | pinit: | 3720 | pinit: |
3234 | btrfs_set_path_blocking(path); | 3721 | if (path) |
3722 | btrfs_set_path_blocking(path); | ||
3235 | /* unlocks the pinned mutex */ | 3723 | /* unlocks the pinned mutex */ |
3236 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | 3724 | btrfs_pin_extent(root, bytenr, num_bytes, reserved); |
3237 | 3725 | ||
3238 | BUG_ON(err < 0); | 3726 | BUG_ON(err < 0); |
3239 | return 0; | 3727 | return 0; |
3240 | } | 3728 | } |
3241 | 3729 | ||
3242 | |||
3243 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 3730 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
3244 | struct btrfs_root *root, | 3731 | struct btrfs_root *root, |
3245 | u64 bytenr, u64 num_bytes, u64 parent, | 3732 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -3413,7 +3900,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3413 | } | 3900 | } |
3414 | 3901 | ||
3415 | ret = pin_down_bytes(trans, root, path, bytenr, | 3902 | ret = pin_down_bytes(trans, root, path, bytenr, |
3416 | num_bytes, is_data, &must_clean); | 3903 | num_bytes, is_data, 0, &must_clean); |
3417 | if (ret > 0) | 3904 | if (ret > 0) |
3418 | mark_free = 1; | 3905 | mark_free = 1; |
3419 | BUG_ON(ret < 0); | 3906 | BUG_ON(ret < 0); |
@@ -3544,8 +4031,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3544 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { | 4031 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { |
3545 | WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); | 4032 | WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); |
3546 | /* unlocks the pinned mutex */ | 4033 | /* unlocks the pinned mutex */ |
3547 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | 4034 | btrfs_pin_extent(root, bytenr, num_bytes, 1); |
3548 | update_reserved_extents(root, bytenr, num_bytes, 0); | ||
3549 | ret = 0; | 4035 | ret = 0; |
3550 | } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { | 4036 | } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
3551 | ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, | 4037 | ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, |
@@ -3585,19 +4071,33 @@ static noinline int | |||
3585 | wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, | 4071 | wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, |
3586 | u64 num_bytes) | 4072 | u64 num_bytes) |
3587 | { | 4073 | { |
4074 | struct btrfs_caching_control *caching_ctl; | ||
3588 | DEFINE_WAIT(wait); | 4075 | DEFINE_WAIT(wait); |
3589 | 4076 | ||
3590 | prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE); | 4077 | caching_ctl = get_caching_control(cache); |
3591 | 4078 | if (!caching_ctl) | |
3592 | if (block_group_cache_done(cache)) { | ||
3593 | finish_wait(&cache->caching_q, &wait); | ||
3594 | return 0; | 4079 | return 0; |
3595 | } | ||
3596 | schedule(); | ||
3597 | finish_wait(&cache->caching_q, &wait); | ||
3598 | 4080 | ||
3599 | wait_event(cache->caching_q, block_group_cache_done(cache) || | 4081 | wait_event(caching_ctl->wait, block_group_cache_done(cache) || |
3600 | (cache->free_space >= num_bytes)); | 4082 | (cache->free_space >= num_bytes)); |
4083 | |||
4084 | put_caching_control(caching_ctl); | ||
4085 | return 0; | ||
4086 | } | ||
4087 | |||
4088 | static noinline int | ||
4089 | wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | ||
4090 | { | ||
4091 | struct btrfs_caching_control *caching_ctl; | ||
4092 | DEFINE_WAIT(wait); | ||
4093 | |||
4094 | caching_ctl = get_caching_control(cache); | ||
4095 | if (!caching_ctl) | ||
4096 | return 0; | ||
4097 | |||
4098 | wait_event(caching_ctl->wait, block_group_cache_done(cache)); | ||
4099 | |||
4100 | put_caching_control(caching_ctl); | ||
3601 | return 0; | 4101 | return 0; |
3602 | } | 4102 | } |
3603 | 4103 | ||
@@ -3635,6 +4135,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
3635 | int last_ptr_loop = 0; | 4135 | int last_ptr_loop = 0; |
3636 | int loop = 0; | 4136 | int loop = 0; |
3637 | bool found_uncached_bg = false; | 4137 | bool found_uncached_bg = false; |
4138 | bool failed_cluster_refill = false; | ||
4139 | bool failed_alloc = false; | ||
3638 | 4140 | ||
3639 | WARN_ON(num_bytes < root->sectorsize); | 4141 | WARN_ON(num_bytes < root->sectorsize); |
3640 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | 4142 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); |
@@ -3732,7 +4234,16 @@ have_block_group: | |||
3732 | if (unlikely(block_group->ro)) | 4234 | if (unlikely(block_group->ro)) |
3733 | goto loop; | 4235 | goto loop; |
3734 | 4236 | ||
3735 | if (last_ptr) { | 4237 | /* |
4238 | * Ok we want to try and use the cluster allocator, so lets look | ||
4239 | * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will | ||
4240 | * have tried the cluster allocator plenty of times at this | ||
4241 | * point and not have found anything, so we are likely way too | ||
4242 | * fragmented for the clustering stuff to find anything, so lets | ||
4243 | * just skip it and let the allocator find whatever block it can | ||
4244 | * find | ||
4245 | */ | ||
4246 | if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { | ||
3736 | /* | 4247 | /* |
3737 | * the refill lock keeps out other | 4248 | * the refill lock keeps out other |
3738 | * people trying to start a new cluster | 4249 | * people trying to start a new cluster |
@@ -3807,9 +4318,11 @@ refill_cluster: | |||
3807 | spin_unlock(&last_ptr->refill_lock); | 4318 | spin_unlock(&last_ptr->refill_lock); |
3808 | goto checks; | 4319 | goto checks; |
3809 | } | 4320 | } |
3810 | } else if (!cached && loop > LOOP_CACHING_NOWAIT) { | 4321 | } else if (!cached && loop > LOOP_CACHING_NOWAIT |
4322 | && !failed_cluster_refill) { | ||
3811 | spin_unlock(&last_ptr->refill_lock); | 4323 | spin_unlock(&last_ptr->refill_lock); |
3812 | 4324 | ||
4325 | failed_cluster_refill = true; | ||
3813 | wait_block_group_cache_progress(block_group, | 4326 | wait_block_group_cache_progress(block_group, |
3814 | num_bytes + empty_cluster + empty_size); | 4327 | num_bytes + empty_cluster + empty_size); |
3815 | goto have_block_group; | 4328 | goto have_block_group; |
@@ -3821,25 +4334,30 @@ refill_cluster: | |||
3821 | * cluster. Free the cluster we've been trying | 4334 | * cluster. Free the cluster we've been trying |
3822 | * to use, and go to the next block group | 4335 | * to use, and go to the next block group |
3823 | */ | 4336 | */ |
3824 | if (loop < LOOP_NO_EMPTY_SIZE) { | 4337 | btrfs_return_cluster_to_free_space(NULL, last_ptr); |
3825 | btrfs_return_cluster_to_free_space(NULL, | ||
3826 | last_ptr); | ||
3827 | spin_unlock(&last_ptr->refill_lock); | ||
3828 | goto loop; | ||
3829 | } | ||
3830 | spin_unlock(&last_ptr->refill_lock); | 4338 | spin_unlock(&last_ptr->refill_lock); |
4339 | goto loop; | ||
3831 | } | 4340 | } |
3832 | 4341 | ||
3833 | offset = btrfs_find_space_for_alloc(block_group, search_start, | 4342 | offset = btrfs_find_space_for_alloc(block_group, search_start, |
3834 | num_bytes, empty_size); | 4343 | num_bytes, empty_size); |
3835 | if (!offset && (cached || (!cached && | 4344 | /* |
3836 | loop == LOOP_CACHING_NOWAIT))) { | 4345 | * If we didn't find a chunk, and we haven't failed on this |
3837 | goto loop; | 4346 | * block group before, and this block group is in the middle of |
3838 | } else if (!offset && (!cached && | 4347 | * caching and we are ok with waiting, then go ahead and wait |
3839 | loop > LOOP_CACHING_NOWAIT)) { | 4348 | * for progress to be made, and set failed_alloc to true. |
4349 | * | ||
4350 | * If failed_alloc is true then we've already waited on this | ||
4351 | * block group once and should move on to the next block group. | ||
4352 | */ | ||
4353 | if (!offset && !failed_alloc && !cached && | ||
4354 | loop > LOOP_CACHING_NOWAIT) { | ||
3840 | wait_block_group_cache_progress(block_group, | 4355 | wait_block_group_cache_progress(block_group, |
3841 | num_bytes + empty_size); | 4356 | num_bytes + empty_size); |
4357 | failed_alloc = true; | ||
3842 | goto have_block_group; | 4358 | goto have_block_group; |
4359 | } else if (!offset) { | ||
4360 | goto loop; | ||
3843 | } | 4361 | } |
3844 | checks: | 4362 | checks: |
3845 | search_start = stripe_align(root, offset); | 4363 | search_start = stripe_align(root, offset); |
@@ -3881,9 +4399,13 @@ checks: | |||
3881 | search_start - offset); | 4399 | search_start - offset); |
3882 | BUG_ON(offset > search_start); | 4400 | BUG_ON(offset > search_start); |
3883 | 4401 | ||
4402 | update_reserved_extents(block_group, num_bytes, 1); | ||
4403 | |||
3884 | /* we are all good, lets return */ | 4404 | /* we are all good, lets return */ |
3885 | break; | 4405 | break; |
3886 | loop: | 4406 | loop: |
4407 | failed_cluster_refill = false; | ||
4408 | failed_alloc = false; | ||
3887 | btrfs_put_block_group(block_group); | 4409 | btrfs_put_block_group(block_group); |
3888 | } | 4410 | } |
3889 | up_read(&space_info->groups_sem); | 4411 | up_read(&space_info->groups_sem); |
@@ -3941,21 +4463,32 @@ loop: | |||
3941 | return ret; | 4463 | return ret; |
3942 | } | 4464 | } |
3943 | 4465 | ||
3944 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | 4466 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
4467 | int dump_block_groups) | ||
3945 | { | 4468 | { |
3946 | struct btrfs_block_group_cache *cache; | 4469 | struct btrfs_block_group_cache *cache; |
3947 | 4470 | ||
4471 | spin_lock(&info->lock); | ||
3948 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", | 4472 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", |
3949 | (unsigned long long)(info->total_bytes - info->bytes_used - | 4473 | (unsigned long long)(info->total_bytes - info->bytes_used - |
3950 | info->bytes_pinned - info->bytes_reserved), | 4474 | info->bytes_pinned - info->bytes_reserved - |
4475 | info->bytes_super), | ||
3951 | (info->full) ? "" : "not "); | 4476 | (info->full) ? "" : "not "); |
3952 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | 4477 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," |
3953 | " may_use=%llu, used=%llu\n", | 4478 | " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu" |
4479 | "\n", | ||
3954 | (unsigned long long)info->total_bytes, | 4480 | (unsigned long long)info->total_bytes, |
3955 | (unsigned long long)info->bytes_pinned, | 4481 | (unsigned long long)info->bytes_pinned, |
3956 | (unsigned long long)info->bytes_delalloc, | 4482 | (unsigned long long)info->bytes_delalloc, |
3957 | (unsigned long long)info->bytes_may_use, | 4483 | (unsigned long long)info->bytes_may_use, |
3958 | (unsigned long long)info->bytes_used); | 4484 | (unsigned long long)info->bytes_used, |
4485 | (unsigned long long)info->bytes_root, | ||
4486 | (unsigned long long)info->bytes_super, | ||
4487 | (unsigned long long)info->bytes_reserved); | ||
4488 | spin_unlock(&info->lock); | ||
4489 | |||
4490 | if (!dump_block_groups) | ||
4491 | return; | ||
3959 | 4492 | ||
3960 | down_read(&info->groups_sem); | 4493 | down_read(&info->groups_sem); |
3961 | list_for_each_entry(cache, &info->block_groups, list) { | 4494 | list_for_each_entry(cache, &info->block_groups, list) { |
@@ -3973,12 +4506,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | |||
3973 | up_read(&info->groups_sem); | 4506 | up_read(&info->groups_sem); |
3974 | } | 4507 | } |
3975 | 4508 | ||
3976 | static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, | 4509 | int btrfs_reserve_extent(struct btrfs_trans_handle *trans, |
3977 | struct btrfs_root *root, | 4510 | struct btrfs_root *root, |
3978 | u64 num_bytes, u64 min_alloc_size, | 4511 | u64 num_bytes, u64 min_alloc_size, |
3979 | u64 empty_size, u64 hint_byte, | 4512 | u64 empty_size, u64 hint_byte, |
3980 | u64 search_end, struct btrfs_key *ins, | 4513 | u64 search_end, struct btrfs_key *ins, |
3981 | u64 data) | 4514 | u64 data) |
3982 | { | 4515 | { |
3983 | int ret; | 4516 | int ret; |
3984 | u64 search_start = 0; | 4517 | u64 search_start = 0; |
@@ -4023,7 +4556,7 @@ again: | |||
4023 | printk(KERN_ERR "btrfs allocation failed flags %llu, " | 4556 | printk(KERN_ERR "btrfs allocation failed flags %llu, " |
4024 | "wanted %llu\n", (unsigned long long)data, | 4557 | "wanted %llu\n", (unsigned long long)data, |
4025 | (unsigned long long)num_bytes); | 4558 | (unsigned long long)num_bytes); |
4026 | dump_space_info(sinfo, num_bytes); | 4559 | dump_space_info(sinfo, num_bytes, 1); |
4027 | } | 4560 | } |
4028 | 4561 | ||
4029 | return ret; | 4562 | return ret; |
@@ -4044,25 +4577,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
4044 | ret = btrfs_discard_extent(root, start, len); | 4577 | ret = btrfs_discard_extent(root, start, len); |
4045 | 4578 | ||
4046 | btrfs_add_free_space(cache, start, len); | 4579 | btrfs_add_free_space(cache, start, len); |
4580 | update_reserved_extents(cache, len, 0); | ||
4047 | btrfs_put_block_group(cache); | 4581 | btrfs_put_block_group(cache); |
4048 | update_reserved_extents(root, start, len, 0); | ||
4049 | |||
4050 | return ret; | ||
4051 | } | ||
4052 | |||
4053 | int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | ||
4054 | struct btrfs_root *root, | ||
4055 | u64 num_bytes, u64 min_alloc_size, | ||
4056 | u64 empty_size, u64 hint_byte, | ||
4057 | u64 search_end, struct btrfs_key *ins, | ||
4058 | u64 data) | ||
4059 | { | ||
4060 | int ret; | ||
4061 | ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, | ||
4062 | empty_size, hint_byte, search_end, ins, | ||
4063 | data); | ||
4064 | if (!ret) | ||
4065 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
4066 | 4582 | ||
4067 | return ret; | 4583 | return ret; |
4068 | } | 4584 | } |
@@ -4223,15 +4739,46 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
4223 | { | 4739 | { |
4224 | int ret; | 4740 | int ret; |
4225 | struct btrfs_block_group_cache *block_group; | 4741 | struct btrfs_block_group_cache *block_group; |
4742 | struct btrfs_caching_control *caching_ctl; | ||
4743 | u64 start = ins->objectid; | ||
4744 | u64 num_bytes = ins->offset; | ||
4226 | 4745 | ||
4227 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | 4746 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); |
4228 | cache_block_group(block_group); | 4747 | cache_block_group(block_group); |
4229 | wait_event(block_group->caching_q, | 4748 | caching_ctl = get_caching_control(block_group); |
4230 | block_group_cache_done(block_group)); | ||
4231 | 4749 | ||
4232 | ret = btrfs_remove_free_space(block_group, ins->objectid, | 4750 | if (!caching_ctl) { |
4233 | ins->offset); | 4751 | BUG_ON(!block_group_cache_done(block_group)); |
4234 | BUG_ON(ret); | 4752 | ret = btrfs_remove_free_space(block_group, start, num_bytes); |
4753 | BUG_ON(ret); | ||
4754 | } else { | ||
4755 | mutex_lock(&caching_ctl->mutex); | ||
4756 | |||
4757 | if (start >= caching_ctl->progress) { | ||
4758 | ret = add_excluded_extent(root, start, num_bytes); | ||
4759 | BUG_ON(ret); | ||
4760 | } else if (start + num_bytes <= caching_ctl->progress) { | ||
4761 | ret = btrfs_remove_free_space(block_group, | ||
4762 | start, num_bytes); | ||
4763 | BUG_ON(ret); | ||
4764 | } else { | ||
4765 | num_bytes = caching_ctl->progress - start; | ||
4766 | ret = btrfs_remove_free_space(block_group, | ||
4767 | start, num_bytes); | ||
4768 | BUG_ON(ret); | ||
4769 | |||
4770 | start = caching_ctl->progress; | ||
4771 | num_bytes = ins->objectid + ins->offset - | ||
4772 | caching_ctl->progress; | ||
4773 | ret = add_excluded_extent(root, start, num_bytes); | ||
4774 | BUG_ON(ret); | ||
4775 | } | ||
4776 | |||
4777 | mutex_unlock(&caching_ctl->mutex); | ||
4778 | put_caching_control(caching_ctl); | ||
4779 | } | ||
4780 | |||
4781 | update_reserved_extents(block_group, ins->offset, 1); | ||
4235 | btrfs_put_block_group(block_group); | 4782 | btrfs_put_block_group(block_group); |
4236 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 4783 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
4237 | 0, owner, offset, ins, 1); | 4784 | 0, owner, offset, ins, 1); |
@@ -4255,9 +4802,9 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, | |||
4255 | int ret; | 4802 | int ret; |
4256 | u64 flags = 0; | 4803 | u64 flags = 0; |
4257 | 4804 | ||
4258 | ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, | 4805 | ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes, |
4259 | empty_size, hint_byte, search_end, | 4806 | empty_size, hint_byte, search_end, |
4260 | ins, 0); | 4807 | ins, 0); |
4261 | if (ret) | 4808 | if (ret) |
4262 | return ret; | 4809 | return ret; |
4263 | 4810 | ||
@@ -4268,7 +4815,6 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, | |||
4268 | } else | 4815 | } else |
4269 | BUG_ON(parent > 0); | 4816 | BUG_ON(parent > 0); |
4270 | 4817 | ||
4271 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
4272 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | 4818 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { |
4273 | struct btrfs_delayed_extent_op *extent_op; | 4819 | struct btrfs_delayed_extent_op *extent_op; |
4274 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | 4820 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); |
@@ -4347,452 +4893,108 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
4347 | return buf; | 4893 | return buf; |
4348 | } | 4894 | } |
4349 | 4895 | ||
4350 | #if 0 | 4896 | struct walk_control { |
4351 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 4897 | u64 refs[BTRFS_MAX_LEVEL]; |
4352 | struct btrfs_root *root, struct extent_buffer *leaf) | 4898 | u64 flags[BTRFS_MAX_LEVEL]; |
4353 | { | 4899 | struct btrfs_key update_progress; |
4354 | u64 disk_bytenr; | 4900 | int stage; |
4355 | u64 num_bytes; | 4901 | int level; |
4356 | struct btrfs_key key; | 4902 | int shared_level; |
4357 | struct btrfs_file_extent_item *fi; | 4903 | int update_ref; |
4358 | u32 nritems; | 4904 | int keep_locks; |
4359 | int i; | 4905 | int reada_slot; |
4360 | int ret; | 4906 | int reada_count; |
4361 | 4907 | }; | |
4362 | BUG_ON(!btrfs_is_leaf(leaf)); | ||
4363 | nritems = btrfs_header_nritems(leaf); | ||
4364 | |||
4365 | for (i = 0; i < nritems; i++) { | ||
4366 | cond_resched(); | ||
4367 | btrfs_item_key_to_cpu(leaf, &key, i); | ||
4368 | |||
4369 | /* only extents have references, skip everything else */ | ||
4370 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | ||
4371 | continue; | ||
4372 | |||
4373 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
4374 | |||
4375 | /* inline extents live in the btree, they don't have refs */ | ||
4376 | if (btrfs_file_extent_type(leaf, fi) == | ||
4377 | BTRFS_FILE_EXTENT_INLINE) | ||
4378 | continue; | ||
4379 | |||
4380 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
4381 | |||
4382 | /* holes don't have refs */ | ||
4383 | if (disk_bytenr == 0) | ||
4384 | continue; | ||
4385 | |||
4386 | num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); | ||
4387 | ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes, | ||
4388 | leaf->start, 0, key.objectid, 0); | ||
4389 | BUG_ON(ret); | ||
4390 | } | ||
4391 | return 0; | ||
4392 | } | ||
4393 | |||
4394 | static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, | ||
4395 | struct btrfs_root *root, | ||
4396 | struct btrfs_leaf_ref *ref) | ||
4397 | { | ||
4398 | int i; | ||
4399 | int ret; | ||
4400 | struct btrfs_extent_info *info; | ||
4401 | struct refsort *sorted; | ||
4402 | |||
4403 | if (ref->nritems == 0) | ||
4404 | return 0; | ||
4405 | |||
4406 | sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS); | ||
4407 | for (i = 0; i < ref->nritems; i++) { | ||
4408 | sorted[i].bytenr = ref->extents[i].bytenr; | ||
4409 | sorted[i].slot = i; | ||
4410 | } | ||
4411 | sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL); | ||
4412 | |||
4413 | /* | ||
4414 | * the items in the ref were sorted when the ref was inserted | ||
4415 | * into the ref cache, so this is already in order | ||
4416 | */ | ||
4417 | for (i = 0; i < ref->nritems; i++) { | ||
4418 | info = ref->extents + sorted[i].slot; | ||
4419 | ret = btrfs_free_extent(trans, root, info->bytenr, | ||
4420 | info->num_bytes, ref->bytenr, | ||
4421 | ref->owner, ref->generation, | ||
4422 | info->objectid, 0); | ||
4423 | |||
4424 | atomic_inc(&root->fs_info->throttle_gen); | ||
4425 | wake_up(&root->fs_info->transaction_throttle); | ||
4426 | cond_resched(); | ||
4427 | |||
4428 | BUG_ON(ret); | ||
4429 | info++; | ||
4430 | } | ||
4431 | |||
4432 | kfree(sorted); | ||
4433 | return 0; | ||
4434 | } | ||
4435 | |||
4436 | |||
4437 | static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, | ||
4438 | struct btrfs_root *root, u64 start, | ||
4439 | u64 len, u32 *refs) | ||
4440 | { | ||
4441 | int ret; | ||
4442 | |||
4443 | ret = btrfs_lookup_extent_refs(trans, root, start, len, refs); | ||
4444 | BUG_ON(ret); | ||
4445 | |||
4446 | #if 0 /* some debugging code in case we see problems here */ | ||
4447 | /* if the refs count is one, it won't get increased again. But | ||
4448 | * if the ref count is > 1, someone may be decreasing it at | ||
4449 | * the same time we are. | ||
4450 | */ | ||
4451 | if (*refs != 1) { | ||
4452 | struct extent_buffer *eb = NULL; | ||
4453 | eb = btrfs_find_create_tree_block(root, start, len); | ||
4454 | if (eb) | ||
4455 | btrfs_tree_lock(eb); | ||
4456 | |||
4457 | mutex_lock(&root->fs_info->alloc_mutex); | ||
4458 | ret = lookup_extent_ref(NULL, root, start, len, refs); | ||
4459 | BUG_ON(ret); | ||
4460 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
4461 | |||
4462 | if (eb) { | ||
4463 | btrfs_tree_unlock(eb); | ||
4464 | free_extent_buffer(eb); | ||
4465 | } | ||
4466 | if (*refs == 1) { | ||
4467 | printk(KERN_ERR "btrfs block %llu went down to one " | ||
4468 | "during drop_snap\n", (unsigned long long)start); | ||
4469 | } | ||
4470 | |||
4471 | } | ||
4472 | #endif | ||
4473 | |||
4474 | cond_resched(); | ||
4475 | return ret; | ||
4476 | } | ||
4477 | 4908 | ||
4909 | #define DROP_REFERENCE 1 | ||
4910 | #define UPDATE_BACKREF 2 | ||
4478 | 4911 | ||
4479 | /* | 4912 | static noinline void reada_walk_down(struct btrfs_trans_handle *trans, |
4480 | * this is used while deleting old snapshots, and it drops the refs | 4913 | struct btrfs_root *root, |
4481 | * on a whole subtree starting from a level 1 node. | 4914 | struct walk_control *wc, |
4482 | * | 4915 | struct btrfs_path *path) |
4483 | * The idea is to sort all the leaf pointers, and then drop the | ||
4484 | * ref on all the leaves in order. Most of the time the leaves | ||
4485 | * will have ref cache entries, so no leaf IOs will be required to | ||
4486 | * find the extents they have references on. | ||
4487 | * | ||
4488 | * For each leaf, any references it has are also dropped in order | ||
4489 | * | ||
4490 | * This ends up dropping the references in something close to optimal | ||
4491 | * order for reading and modifying the extent allocation tree. | ||
4492 | */ | ||
4493 | static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, | ||
4494 | struct btrfs_root *root, | ||
4495 | struct btrfs_path *path) | ||
4496 | { | 4916 | { |
4497 | u64 bytenr; | 4917 | u64 bytenr; |
4498 | u64 root_owner; | 4918 | u64 generation; |
4499 | u64 root_gen; | 4919 | u64 refs; |
4500 | struct extent_buffer *eb = path->nodes[1]; | 4920 | u64 flags; |
4501 | struct extent_buffer *leaf; | 4921 | u64 last = 0; |
4502 | struct btrfs_leaf_ref *ref; | 4922 | u32 nritems; |
4503 | struct refsort *sorted = NULL; | 4923 | u32 blocksize; |
4504 | int nritems = btrfs_header_nritems(eb); | 4924 | struct btrfs_key key; |
4925 | struct extent_buffer *eb; | ||
4505 | int ret; | 4926 | int ret; |
4506 | int i; | 4927 | int slot; |
4507 | int refi = 0; | 4928 | int nread = 0; |
4508 | int slot = path->slots[1]; | ||
4509 | u32 blocksize = btrfs_level_size(root, 0); | ||
4510 | u32 refs; | ||
4511 | |||
4512 | if (nritems == 0) | ||
4513 | goto out; | ||
4514 | |||
4515 | root_owner = btrfs_header_owner(eb); | ||
4516 | root_gen = btrfs_header_generation(eb); | ||
4517 | sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); | ||
4518 | 4929 | ||
4519 | /* | 4930 | if (path->slots[wc->level] < wc->reada_slot) { |
4520 | * step one, sort all the leaf pointers so we don't scribble | 4931 | wc->reada_count = wc->reada_count * 2 / 3; |
4521 | * randomly into the extent allocation tree | 4932 | wc->reada_count = max(wc->reada_count, 2); |
4522 | */ | 4933 | } else { |
4523 | for (i = slot; i < nritems; i++) { | 4934 | wc->reada_count = wc->reada_count * 3 / 2; |
4524 | sorted[refi].bytenr = btrfs_node_blockptr(eb, i); | 4935 | wc->reada_count = min_t(int, wc->reada_count, |
4525 | sorted[refi].slot = i; | 4936 | BTRFS_NODEPTRS_PER_BLOCK(root)); |
4526 | refi++; | ||
4527 | } | 4937 | } |
4528 | 4938 | ||
4529 | /* | 4939 | eb = path->nodes[wc->level]; |
4530 | * nritems won't be zero, but if we're picking up drop_snapshot | 4940 | nritems = btrfs_header_nritems(eb); |
4531 | * after a crash, slot might be > 0, so double check things | 4941 | blocksize = btrfs_level_size(root, wc->level - 1); |
4532 | * just in case. | ||
4533 | */ | ||
4534 | if (refi == 0) | ||
4535 | goto out; | ||
4536 | 4942 | ||
4537 | sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); | 4943 | for (slot = path->slots[wc->level]; slot < nritems; slot++) { |
4944 | if (nread >= wc->reada_count) | ||
4945 | break; | ||
4538 | 4946 | ||
4539 | /* | 4947 | cond_resched(); |
4540 | * the first loop frees everything the leaves point to | 4948 | bytenr = btrfs_node_blockptr(eb, slot); |
4541 | */ | 4949 | generation = btrfs_node_ptr_generation(eb, slot); |
4542 | for (i = 0; i < refi; i++) { | ||
4543 | u64 ptr_gen; | ||
4544 | 4950 | ||
4545 | bytenr = sorted[i].bytenr; | 4951 | if (slot == path->slots[wc->level]) |
4952 | goto reada; | ||
4546 | 4953 | ||
4547 | /* | 4954 | if (wc->stage == UPDATE_BACKREF && |
4548 | * check the reference count on this leaf. If it is > 1 | 4955 | generation <= root->root_key.offset) |
4549 | * we just decrement it below and don't update any | ||
4550 | * of the refs the leaf points to. | ||
4551 | */ | ||
4552 | ret = drop_snap_lookup_refcount(trans, root, bytenr, | ||
4553 | blocksize, &refs); | ||
4554 | BUG_ON(ret); | ||
4555 | if (refs != 1) | ||
4556 | continue; | 4956 | continue; |
4557 | 4957 | ||
4558 | ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); | 4958 | /* We don't lock the tree block, it's OK to be racy here */ |
4559 | 4959 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, | |
4560 | /* | 4960 | &refs, &flags); |
4561 | * the leaf only had one reference, which means the | ||
4562 | * only thing pointing to this leaf is the snapshot | ||
4563 | * we're deleting. It isn't possible for the reference | ||
4564 | * count to increase again later | ||
4565 | * | ||
4566 | * The reference cache is checked for the leaf, | ||
4567 | * and if found we'll be able to drop any refs held by | ||
4568 | * the leaf without needing to read it in. | ||
4569 | */ | ||
4570 | ref = btrfs_lookup_leaf_ref(root, bytenr); | ||
4571 | if (ref && ref->generation != ptr_gen) { | ||
4572 | btrfs_free_leaf_ref(root, ref); | ||
4573 | ref = NULL; | ||
4574 | } | ||
4575 | if (ref) { | ||
4576 | ret = cache_drop_leaf_ref(trans, root, ref); | ||
4577 | BUG_ON(ret); | ||
4578 | btrfs_remove_leaf_ref(root, ref); | ||
4579 | btrfs_free_leaf_ref(root, ref); | ||
4580 | } else { | ||
4581 | /* | ||
4582 | * the leaf wasn't in the reference cache, so | ||
4583 | * we have to read it. | ||
4584 | */ | ||
4585 | leaf = read_tree_block(root, bytenr, blocksize, | ||
4586 | ptr_gen); | ||
4587 | ret = btrfs_drop_leaf_ref(trans, root, leaf); | ||
4588 | BUG_ON(ret); | ||
4589 | free_extent_buffer(leaf); | ||
4590 | } | ||
4591 | atomic_inc(&root->fs_info->throttle_gen); | ||
4592 | wake_up(&root->fs_info->transaction_throttle); | ||
4593 | cond_resched(); | ||
4594 | } | ||
4595 | |||
4596 | /* | ||
4597 | * run through the loop again to free the refs on the leaves. | ||
4598 | * This is faster than doing it in the loop above because | ||
4599 | * the leaves are likely to be clustered together. We end up | ||
4600 | * working in nice chunks on the extent allocation tree. | ||
4601 | */ | ||
4602 | for (i = 0; i < refi; i++) { | ||
4603 | bytenr = sorted[i].bytenr; | ||
4604 | ret = btrfs_free_extent(trans, root, bytenr, | ||
4605 | blocksize, eb->start, | ||
4606 | root_owner, root_gen, 0, 1); | ||
4607 | BUG_ON(ret); | 4961 | BUG_ON(ret); |
4962 | BUG_ON(refs == 0); | ||
4608 | 4963 | ||
4609 | atomic_inc(&root->fs_info->throttle_gen); | 4964 | if (wc->stage == DROP_REFERENCE) { |
4610 | wake_up(&root->fs_info->transaction_throttle); | 4965 | if (refs == 1) |
4611 | cond_resched(); | 4966 | goto reada; |
4612 | } | ||
4613 | out: | ||
4614 | kfree(sorted); | ||
4615 | |||
4616 | /* | ||
4617 | * update the path to show we've processed the entire level 1 | ||
4618 | * node. This will get saved into the root's drop_snapshot_progress | ||
4619 | * field so these drops are not repeated again if this transaction | ||
4620 | * commits. | ||
4621 | */ | ||
4622 | path->slots[1] = nritems; | ||
4623 | return 0; | ||
4624 | } | ||
4625 | |||
4626 | /* | ||
4627 | * helper function for drop_snapshot, this walks down the tree dropping ref | ||
4628 | * counts as it goes. | ||
4629 | */ | ||
4630 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | ||
4631 | struct btrfs_root *root, | ||
4632 | struct btrfs_path *path, int *level) | ||
4633 | { | ||
4634 | u64 root_owner; | ||
4635 | u64 root_gen; | ||
4636 | u64 bytenr; | ||
4637 | u64 ptr_gen; | ||
4638 | struct extent_buffer *next; | ||
4639 | struct extent_buffer *cur; | ||
4640 | struct extent_buffer *parent; | ||
4641 | u32 blocksize; | ||
4642 | int ret; | ||
4643 | u32 refs; | ||
4644 | |||
4645 | WARN_ON(*level < 0); | ||
4646 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
4647 | ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start, | ||
4648 | path->nodes[*level]->len, &refs); | ||
4649 | BUG_ON(ret); | ||
4650 | if (refs > 1) | ||
4651 | goto out; | ||
4652 | |||
4653 | /* | ||
4654 | * walk down to the last node level and free all the leaves | ||
4655 | */ | ||
4656 | while (*level >= 0) { | ||
4657 | WARN_ON(*level < 0); | ||
4658 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
4659 | cur = path->nodes[*level]; | ||
4660 | |||
4661 | if (btrfs_header_level(cur) != *level) | ||
4662 | WARN_ON(1); | ||
4663 | |||
4664 | if (path->slots[*level] >= | ||
4665 | btrfs_header_nritems(cur)) | ||
4666 | break; | ||
4667 | 4967 | ||
4668 | /* the new code goes down to level 1 and does all the | 4968 | if (wc->level == 1 && |
4669 | * leaves pointed to that node in bulk. So, this check | 4969 | (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) |
4670 | * for level 0 will always be false. | 4970 | continue; |
4671 | * | 4971 | if (!wc->update_ref || |
4672 | * But, the disk format allows the drop_snapshot_progress | 4972 | generation <= root->root_key.offset) |
4673 | * field in the root to leave things in a state where | 4973 | continue; |
4674 | * a leaf will need cleaning up here. If someone crashes | 4974 | btrfs_node_key_to_cpu(eb, &key, slot); |
4675 | * with the old code and then boots with the new code, | 4975 | ret = btrfs_comp_cpu_keys(&key, |
4676 | * we might find a leaf here. | 4976 | &wc->update_progress); |
4677 | */ | 4977 | if (ret < 0) |
4678 | if (*level == 0) { | 4978 | continue; |
4679 | ret = btrfs_drop_leaf_ref(trans, root, cur); | 4979 | } else { |
4680 | BUG_ON(ret); | 4980 | if (wc->level == 1 && |
4681 | break; | 4981 | (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) |
4982 | continue; | ||
4682 | } | 4983 | } |
4683 | 4984 | reada: | |
4684 | /* | 4985 | ret = readahead_tree_block(root, bytenr, blocksize, |
4685 | * once we get to level one, process the whole node | 4986 | generation); |
4686 | * at once, including everything below it. | 4987 | if (ret) |
4687 | */ | ||
4688 | if (*level == 1) { | ||
4689 | ret = drop_level_one_refs(trans, root, path); | ||
4690 | BUG_ON(ret); | ||
4691 | break; | 4988 | break; |
4692 | } | 4989 | last = bytenr + blocksize; |
4693 | 4990 | nread++; | |
4694 | bytenr = btrfs_node_blockptr(cur, path->slots[*level]); | ||
4695 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); | ||
4696 | blocksize = btrfs_level_size(root, *level - 1); | ||
4697 | |||
4698 | ret = drop_snap_lookup_refcount(trans, root, bytenr, | ||
4699 | blocksize, &refs); | ||
4700 | BUG_ON(ret); | ||
4701 | |||
4702 | /* | ||
4703 | * if there is more than one reference, we don't need | ||
4704 | * to read that node to drop any references it has. We | ||
4705 | * just drop the ref we hold on that node and move on to the | ||
4706 | * next slot in this level. | ||
4707 | */ | ||
4708 | if (refs != 1) { | ||
4709 | parent = path->nodes[*level]; | ||
4710 | root_owner = btrfs_header_owner(parent); | ||
4711 | root_gen = btrfs_header_generation(parent); | ||
4712 | path->slots[*level]++; | ||
4713 | |||
4714 | ret = btrfs_free_extent(trans, root, bytenr, | ||
4715 | blocksize, parent->start, | ||
4716 | root_owner, root_gen, | ||
4717 | *level - 1, 1); | ||
4718 | BUG_ON(ret); | ||
4719 | |||
4720 | atomic_inc(&root->fs_info->throttle_gen); | ||
4721 | wake_up(&root->fs_info->transaction_throttle); | ||
4722 | cond_resched(); | ||
4723 | |||
4724 | continue; | ||
4725 | } | ||
4726 | |||
4727 | /* | ||
4728 | * we need to keep freeing things in the next level down. | ||
4729 | * read the block and loop around to process it | ||
4730 | */ | ||
4731 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); | ||
4732 | WARN_ON(*level <= 0); | ||
4733 | if (path->nodes[*level-1]) | ||
4734 | free_extent_buffer(path->nodes[*level-1]); | ||
4735 | path->nodes[*level-1] = next; | ||
4736 | *level = btrfs_header_level(next); | ||
4737 | path->slots[*level] = 0; | ||
4738 | cond_resched(); | ||
4739 | } | 4991 | } |
4740 | out: | 4992 | wc->reada_slot = slot; |
4741 | WARN_ON(*level < 0); | ||
4742 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
4743 | |||
4744 | if (path->nodes[*level] == root->node) { | ||
4745 | parent = path->nodes[*level]; | ||
4746 | bytenr = path->nodes[*level]->start; | ||
4747 | } else { | ||
4748 | parent = path->nodes[*level + 1]; | ||
4749 | bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]); | ||
4750 | } | ||
4751 | |||
4752 | blocksize = btrfs_level_size(root, *level); | ||
4753 | root_owner = btrfs_header_owner(parent); | ||
4754 | root_gen = btrfs_header_generation(parent); | ||
4755 | |||
4756 | /* | ||
4757 | * cleanup and free the reference on the last node | ||
4758 | * we processed | ||
4759 | */ | ||
4760 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, | ||
4761 | parent->start, root_owner, root_gen, | ||
4762 | *level, 1); | ||
4763 | free_extent_buffer(path->nodes[*level]); | ||
4764 | path->nodes[*level] = NULL; | ||
4765 | |||
4766 | *level += 1; | ||
4767 | BUG_ON(ret); | ||
4768 | |||
4769 | cond_resched(); | ||
4770 | return 0; | ||
4771 | } | 4993 | } |
4772 | #endif | ||
4773 | |||
4774 | struct walk_control { | ||
4775 | u64 refs[BTRFS_MAX_LEVEL]; | ||
4776 | u64 flags[BTRFS_MAX_LEVEL]; | ||
4777 | struct btrfs_key update_progress; | ||
4778 | int stage; | ||
4779 | int level; | ||
4780 | int shared_level; | ||
4781 | int update_ref; | ||
4782 | int keep_locks; | ||
4783 | }; | ||
4784 | |||
4785 | #define DROP_REFERENCE 1 | ||
4786 | #define UPDATE_BACKREF 2 | ||
4787 | 4994 | ||
4788 | /* | 4995 | /* |
4789 | * hepler to process tree block while walking down the tree. | 4996 | * hepler to process tree block while walking down the tree. |
4790 | * | 4997 | * |
4791 | * when wc->stage == DROP_REFERENCE, this function checks | ||
4792 | * reference count of the block. if the block is shared and | ||
4793 | * we need update back refs for the subtree rooted at the | ||
4794 | * block, this function changes wc->stage to UPDATE_BACKREF | ||
4795 | * | ||
4796 | * when wc->stage == UPDATE_BACKREF, this function updates | 4998 | * when wc->stage == UPDATE_BACKREF, this function updates |
4797 | * back refs for pointers in the block. | 4999 | * back refs for pointers in the block. |
4798 | * | 5000 | * |
@@ -4801,11 +5003,10 @@ struct walk_control { | |||
4801 | static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | 5003 | static noinline int walk_down_proc(struct btrfs_trans_handle *trans, |
4802 | struct btrfs_root *root, | 5004 | struct btrfs_root *root, |
4803 | struct btrfs_path *path, | 5005 | struct btrfs_path *path, |
4804 | struct walk_control *wc) | 5006 | struct walk_control *wc, int lookup_info) |
4805 | { | 5007 | { |
4806 | int level = wc->level; | 5008 | int level = wc->level; |
4807 | struct extent_buffer *eb = path->nodes[level]; | 5009 | struct extent_buffer *eb = path->nodes[level]; |
4808 | struct btrfs_key key; | ||
4809 | u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; | 5010 | u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; |
4810 | int ret; | 5011 | int ret; |
4811 | 5012 | ||
@@ -4817,8 +5018,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
4817 | * when reference count of tree block is 1, it won't increase | 5018 | * when reference count of tree block is 1, it won't increase |
4818 | * again. once full backref flag is set, we never clear it. | 5019 | * again. once full backref flag is set, we never clear it. |
4819 | */ | 5020 | */ |
4820 | if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || | 5021 | if (lookup_info && |
4821 | (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { | 5022 | ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || |
5023 | (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) { | ||
4822 | BUG_ON(!path->locks[level]); | 5024 | BUG_ON(!path->locks[level]); |
4823 | ret = btrfs_lookup_extent_info(trans, root, | 5025 | ret = btrfs_lookup_extent_info(trans, root, |
4824 | eb->start, eb->len, | 5026 | eb->start, eb->len, |
@@ -4828,21 +5030,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
4828 | BUG_ON(wc->refs[level] == 0); | 5030 | BUG_ON(wc->refs[level] == 0); |
4829 | } | 5031 | } |
4830 | 5032 | ||
4831 | if (wc->stage == DROP_REFERENCE && | ||
4832 | wc->update_ref && wc->refs[level] > 1) { | ||
4833 | BUG_ON(eb == root->node); | ||
4834 | BUG_ON(path->slots[level] > 0); | ||
4835 | if (level == 0) | ||
4836 | btrfs_item_key_to_cpu(eb, &key, path->slots[level]); | ||
4837 | else | ||
4838 | btrfs_node_key_to_cpu(eb, &key, path->slots[level]); | ||
4839 | if (btrfs_header_owner(eb) == root->root_key.objectid && | ||
4840 | btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) { | ||
4841 | wc->stage = UPDATE_BACKREF; | ||
4842 | wc->shared_level = level; | ||
4843 | } | ||
4844 | } | ||
4845 | |||
4846 | if (wc->stage == DROP_REFERENCE) { | 5033 | if (wc->stage == DROP_REFERENCE) { |
4847 | if (wc->refs[level] > 1) | 5034 | if (wc->refs[level] > 1) |
4848 | return 1; | 5035 | return 1; |
@@ -4879,6 +5066,136 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
4879 | } | 5066 | } |
4880 | 5067 | ||
4881 | /* | 5068 | /* |
5069 | * hepler to process tree block pointer. | ||
5070 | * | ||
5071 | * when wc->stage == DROP_REFERENCE, this function checks | ||
5072 | * reference count of the block pointed to. if the block | ||
5073 | * is shared and we need update back refs for the subtree | ||
5074 | * rooted at the block, this function changes wc->stage to | ||
5075 | * UPDATE_BACKREF. if the block is shared and there is no | ||
5076 | * need to update back, this function drops the reference | ||
5077 | * to the block. | ||
5078 | * | ||
5079 | * NOTE: return value 1 means we should stop walking down. | ||
5080 | */ | ||
5081 | static noinline int do_walk_down(struct btrfs_trans_handle *trans, | ||
5082 | struct btrfs_root *root, | ||
5083 | struct btrfs_path *path, | ||
5084 | struct walk_control *wc, int *lookup_info) | ||
5085 | { | ||
5086 | u64 bytenr; | ||
5087 | u64 generation; | ||
5088 | u64 parent; | ||
5089 | u32 blocksize; | ||
5090 | struct btrfs_key key; | ||
5091 | struct extent_buffer *next; | ||
5092 | int level = wc->level; | ||
5093 | int reada = 0; | ||
5094 | int ret = 0; | ||
5095 | |||
5096 | generation = btrfs_node_ptr_generation(path->nodes[level], | ||
5097 | path->slots[level]); | ||
5098 | /* | ||
5099 | * if the lower level block was created before the snapshot | ||
5100 | * was created, we know there is no need to update back refs | ||
5101 | * for the subtree | ||
5102 | */ | ||
5103 | if (wc->stage == UPDATE_BACKREF && | ||
5104 | generation <= root->root_key.offset) { | ||
5105 | *lookup_info = 1; | ||
5106 | return 1; | ||
5107 | } | ||
5108 | |||
5109 | bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); | ||
5110 | blocksize = btrfs_level_size(root, level - 1); | ||
5111 | |||
5112 | next = btrfs_find_tree_block(root, bytenr, blocksize); | ||
5113 | if (!next) { | ||
5114 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | ||
5115 | reada = 1; | ||
5116 | } | ||
5117 | btrfs_tree_lock(next); | ||
5118 | btrfs_set_lock_blocking(next); | ||
5119 | |||
5120 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, | ||
5121 | &wc->refs[level - 1], | ||
5122 | &wc->flags[level - 1]); | ||
5123 | BUG_ON(ret); | ||
5124 | BUG_ON(wc->refs[level - 1] == 0); | ||
5125 | *lookup_info = 0; | ||
5126 | |||
5127 | if (wc->stage == DROP_REFERENCE) { | ||
5128 | if (wc->refs[level - 1] > 1) { | ||
5129 | if (level == 1 && | ||
5130 | (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
5131 | goto skip; | ||
5132 | |||
5133 | if (!wc->update_ref || | ||
5134 | generation <= root->root_key.offset) | ||
5135 | goto skip; | ||
5136 | |||
5137 | btrfs_node_key_to_cpu(path->nodes[level], &key, | ||
5138 | path->slots[level]); | ||
5139 | ret = btrfs_comp_cpu_keys(&key, &wc->update_progress); | ||
5140 | if (ret < 0) | ||
5141 | goto skip; | ||
5142 | |||
5143 | wc->stage = UPDATE_BACKREF; | ||
5144 | wc->shared_level = level - 1; | ||
5145 | } | ||
5146 | } else { | ||
5147 | if (level == 1 && | ||
5148 | (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
5149 | goto skip; | ||
5150 | } | ||
5151 | |||
5152 | if (!btrfs_buffer_uptodate(next, generation)) { | ||
5153 | btrfs_tree_unlock(next); | ||
5154 | free_extent_buffer(next); | ||
5155 | next = NULL; | ||
5156 | *lookup_info = 1; | ||
5157 | } | ||
5158 | |||
5159 | if (!next) { | ||
5160 | if (reada && level == 1) | ||
5161 | reada_walk_down(trans, root, wc, path); | ||
5162 | next = read_tree_block(root, bytenr, blocksize, generation); | ||
5163 | btrfs_tree_lock(next); | ||
5164 | btrfs_set_lock_blocking(next); | ||
5165 | } | ||
5166 | |||
5167 | level--; | ||
5168 | BUG_ON(level != btrfs_header_level(next)); | ||
5169 | path->nodes[level] = next; | ||
5170 | path->slots[level] = 0; | ||
5171 | path->locks[level] = 1; | ||
5172 | wc->level = level; | ||
5173 | if (wc->level == 1) | ||
5174 | wc->reada_slot = 0; | ||
5175 | return 0; | ||
5176 | skip: | ||
5177 | wc->refs[level - 1] = 0; | ||
5178 | wc->flags[level - 1] = 0; | ||
5179 | if (wc->stage == DROP_REFERENCE) { | ||
5180 | if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { | ||
5181 | parent = path->nodes[level]->start; | ||
5182 | } else { | ||
5183 | BUG_ON(root->root_key.objectid != | ||
5184 | btrfs_header_owner(path->nodes[level])); | ||
5185 | parent = 0; | ||
5186 | } | ||
5187 | |||
5188 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, | ||
5189 | root->root_key.objectid, level - 1, 0); | ||
5190 | BUG_ON(ret); | ||
5191 | } | ||
5192 | btrfs_tree_unlock(next); | ||
5193 | free_extent_buffer(next); | ||
5194 | *lookup_info = 1; | ||
5195 | return 1; | ||
5196 | } | ||
5197 | |||
5198 | /* | ||
4882 | * hepler to process tree block while walking up the tree. | 5199 | * hepler to process tree block while walking up the tree. |
4883 | * | 5200 | * |
4884 | * when wc->stage == DROP_REFERENCE, this function drops | 5201 | * when wc->stage == DROP_REFERENCE, this function drops |
@@ -4905,7 +5222,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
4905 | if (level < wc->shared_level) | 5222 | if (level < wc->shared_level) |
4906 | goto out; | 5223 | goto out; |
4907 | 5224 | ||
4908 | BUG_ON(wc->refs[level] <= 1); | ||
4909 | ret = find_next_key(path, level + 1, &wc->update_progress); | 5225 | ret = find_next_key(path, level + 1, &wc->update_progress); |
4910 | if (ret > 0) | 5226 | if (ret > 0) |
4911 | wc->update_ref = 0; | 5227 | wc->update_ref = 0; |
@@ -4936,8 +5252,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
4936 | path->locks[level] = 0; | 5252 | path->locks[level] = 0; |
4937 | return 1; | 5253 | return 1; |
4938 | } | 5254 | } |
4939 | } else { | ||
4940 | BUG_ON(level != 0); | ||
4941 | } | 5255 | } |
4942 | } | 5256 | } |
4943 | 5257 | ||
@@ -4990,39 +5304,28 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
4990 | struct btrfs_path *path, | 5304 | struct btrfs_path *path, |
4991 | struct walk_control *wc) | 5305 | struct walk_control *wc) |
4992 | { | 5306 | { |
4993 | struct extent_buffer *next; | ||
4994 | struct extent_buffer *cur; | ||
4995 | u64 bytenr; | ||
4996 | u64 ptr_gen; | ||
4997 | u32 blocksize; | ||
4998 | int level = wc->level; | 5307 | int level = wc->level; |
5308 | int lookup_info = 1; | ||
4999 | int ret; | 5309 | int ret; |
5000 | 5310 | ||
5001 | while (level >= 0) { | 5311 | while (level >= 0) { |
5002 | cur = path->nodes[level]; | 5312 | if (path->slots[level] >= |
5003 | BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); | 5313 | btrfs_header_nritems(path->nodes[level])) |
5314 | break; | ||
5004 | 5315 | ||
5005 | ret = walk_down_proc(trans, root, path, wc); | 5316 | ret = walk_down_proc(trans, root, path, wc, lookup_info); |
5006 | if (ret > 0) | 5317 | if (ret > 0) |
5007 | break; | 5318 | break; |
5008 | 5319 | ||
5009 | if (level == 0) | 5320 | if (level == 0) |
5010 | break; | 5321 | break; |
5011 | 5322 | ||
5012 | bytenr = btrfs_node_blockptr(cur, path->slots[level]); | 5323 | ret = do_walk_down(trans, root, path, wc, &lookup_info); |
5013 | blocksize = btrfs_level_size(root, level - 1); | 5324 | if (ret > 0) { |
5014 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); | 5325 | path->slots[level]++; |
5015 | 5326 | continue; | |
5016 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); | 5327 | } |
5017 | btrfs_tree_lock(next); | 5328 | level = wc->level; |
5018 | btrfs_set_lock_blocking(next); | ||
5019 | |||
5020 | level--; | ||
5021 | BUG_ON(level != btrfs_header_level(next)); | ||
5022 | path->nodes[level] = next; | ||
5023 | path->slots[level] = 0; | ||
5024 | path->locks[level] = 1; | ||
5025 | wc->level = level; | ||
5026 | } | 5329 | } |
5027 | return 0; | 5330 | return 0; |
5028 | } | 5331 | } |
@@ -5112,9 +5415,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5112 | err = ret; | 5415 | err = ret; |
5113 | goto out; | 5416 | goto out; |
5114 | } | 5417 | } |
5115 | btrfs_node_key_to_cpu(path->nodes[level], &key, | 5418 | WARN_ON(ret > 0); |
5116 | path->slots[level]); | ||
5117 | WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key))); | ||
5118 | 5419 | ||
5119 | /* | 5420 | /* |
5120 | * unlock our path, this is safe because only this | 5421 | * unlock our path, this is safe because only this |
@@ -5149,6 +5450,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5149 | wc->stage = DROP_REFERENCE; | 5450 | wc->stage = DROP_REFERENCE; |
5150 | wc->update_ref = update_ref; | 5451 | wc->update_ref = update_ref; |
5151 | wc->keep_locks = 0; | 5452 | wc->keep_locks = 0; |
5453 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); | ||
5152 | 5454 | ||
5153 | while (1) { | 5455 | while (1) { |
5154 | ret = walk_down_tree(trans, root, path, wc); | 5456 | ret = walk_down_tree(trans, root, path, wc); |
@@ -5201,9 +5503,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5201 | ret = btrfs_del_root(trans, tree_root, &root->root_key); | 5503 | ret = btrfs_del_root(trans, tree_root, &root->root_key); |
5202 | BUG_ON(ret); | 5504 | BUG_ON(ret); |
5203 | 5505 | ||
5204 | free_extent_buffer(root->node); | 5506 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { |
5205 | free_extent_buffer(root->commit_root); | 5507 | ret = btrfs_find_last_root(tree_root, root->root_key.objectid, |
5206 | kfree(root); | 5508 | NULL, NULL); |
5509 | BUG_ON(ret < 0); | ||
5510 | if (ret > 0) { | ||
5511 | ret = btrfs_del_orphan_item(trans, tree_root, | ||
5512 | root->root_key.objectid); | ||
5513 | BUG_ON(ret); | ||
5514 | } | ||
5515 | } | ||
5516 | |||
5517 | if (root->in_radix) { | ||
5518 | btrfs_free_fs_root(tree_root->fs_info, root); | ||
5519 | } else { | ||
5520 | free_extent_buffer(root->node); | ||
5521 | free_extent_buffer(root->commit_root); | ||
5522 | kfree(root); | ||
5523 | } | ||
5207 | out: | 5524 | out: |
5208 | btrfs_end_transaction(trans, tree_root); | 5525 | btrfs_end_transaction(trans, tree_root); |
5209 | kfree(wc); | 5526 | kfree(wc); |
@@ -5255,6 +5572,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
5255 | wc->stage = DROP_REFERENCE; | 5572 | wc->stage = DROP_REFERENCE; |
5256 | wc->update_ref = 0; | 5573 | wc->update_ref = 0; |
5257 | wc->keep_locks = 1; | 5574 | wc->keep_locks = 1; |
5575 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); | ||
5258 | 5576 | ||
5259 | while (1) { | 5577 | while (1) { |
5260 | wret = walk_down_tree(trans, root, path, wc); | 5578 | wret = walk_down_tree(trans, root, path, wc); |
@@ -5397,9 +5715,9 @@ static noinline int relocate_data_extent(struct inode *reloc_inode, | |||
5397 | lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); | 5715 | lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); |
5398 | while (1) { | 5716 | while (1) { |
5399 | int ret; | 5717 | int ret; |
5400 | spin_lock(&em_tree->lock); | 5718 | write_lock(&em_tree->lock); |
5401 | ret = add_extent_mapping(em_tree, em); | 5719 | ret = add_extent_mapping(em_tree, em); |
5402 | spin_unlock(&em_tree->lock); | 5720 | write_unlock(&em_tree->lock); |
5403 | if (ret != -EEXIST) { | 5721 | if (ret != -EEXIST) { |
5404 | free_extent_map(em); | 5722 | free_extent_map(em); |
5405 | break; | 5723 | break; |
@@ -6842,287 +7160,86 @@ int btrfs_prepare_block_group_relocation(struct btrfs_root *root, | |||
6842 | return 0; | 7160 | return 0; |
6843 | } | 7161 | } |
6844 | 7162 | ||
6845 | #if 0 | 7163 | /* |
6846 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | 7164 | * checks to see if its even possible to relocate this block group. |
6847 | struct btrfs_root *root, | 7165 | * |
6848 | u64 objectid, u64 size) | 7166 | * @return - -1 if it's not a good idea to relocate this block group, 0 if its |
6849 | { | 7167 | * ok to go ahead and try. |
6850 | struct btrfs_path *path; | 7168 | */ |
6851 | struct btrfs_inode_item *item; | 7169 | int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) |
6852 | struct extent_buffer *leaf; | ||
6853 | int ret; | ||
6854 | |||
6855 | path = btrfs_alloc_path(); | ||
6856 | if (!path) | ||
6857 | return -ENOMEM; | ||
6858 | |||
6859 | path->leave_spinning = 1; | ||
6860 | ret = btrfs_insert_empty_inode(trans, root, path, objectid); | ||
6861 | if (ret) | ||
6862 | goto out; | ||
6863 | |||
6864 | leaf = path->nodes[0]; | ||
6865 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); | ||
6866 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); | ||
6867 | btrfs_set_inode_generation(leaf, item, 1); | ||
6868 | btrfs_set_inode_size(leaf, item, size); | ||
6869 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | ||
6870 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); | ||
6871 | btrfs_mark_buffer_dirty(leaf); | ||
6872 | btrfs_release_path(root, path); | ||
6873 | out: | ||
6874 | btrfs_free_path(path); | ||
6875 | return ret; | ||
6876 | } | ||
6877 | |||
6878 | static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | ||
6879 | struct btrfs_block_group_cache *group) | ||
6880 | { | 7170 | { |
6881 | struct inode *inode = NULL; | 7171 | struct btrfs_block_group_cache *block_group; |
6882 | struct btrfs_trans_handle *trans; | 7172 | struct btrfs_space_info *space_info; |
6883 | struct btrfs_root *root; | 7173 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; |
6884 | struct btrfs_key root_key; | 7174 | struct btrfs_device *device; |
6885 | u64 objectid = BTRFS_FIRST_FREE_OBJECTID; | 7175 | int full = 0; |
6886 | int err = 0; | 7176 | int ret = 0; |
6887 | 7177 | ||
6888 | root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; | 7178 | block_group = btrfs_lookup_block_group(root->fs_info, bytenr); |
6889 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
6890 | root_key.offset = (u64)-1; | ||
6891 | root = btrfs_read_fs_root_no_name(fs_info, &root_key); | ||
6892 | if (IS_ERR(root)) | ||
6893 | return ERR_CAST(root); | ||
6894 | 7179 | ||
6895 | trans = btrfs_start_transaction(root, 1); | 7180 | /* odd, couldn't find the block group, leave it alone */ |
6896 | BUG_ON(!trans); | 7181 | if (!block_group) |
7182 | return -1; | ||
6897 | 7183 | ||
6898 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); | 7184 | /* no bytes used, we're good */ |
6899 | if (err) | 7185 | if (!btrfs_block_group_used(&block_group->item)) |
6900 | goto out; | 7186 | goto out; |
6901 | 7187 | ||
6902 | err = __insert_orphan_inode(trans, root, objectid, group->key.offset); | 7188 | space_info = block_group->space_info; |
6903 | BUG_ON(err); | 7189 | spin_lock(&space_info->lock); |
6904 | |||
6905 | err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, | ||
6906 | group->key.offset, 0, group->key.offset, | ||
6907 | 0, 0, 0); | ||
6908 | BUG_ON(err); | ||
6909 | |||
6910 | inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); | ||
6911 | if (inode->i_state & I_NEW) { | ||
6912 | BTRFS_I(inode)->root = root; | ||
6913 | BTRFS_I(inode)->location.objectid = objectid; | ||
6914 | BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; | ||
6915 | BTRFS_I(inode)->location.offset = 0; | ||
6916 | btrfs_read_locked_inode(inode); | ||
6917 | unlock_new_inode(inode); | ||
6918 | BUG_ON(is_bad_inode(inode)); | ||
6919 | } else { | ||
6920 | BUG_ON(1); | ||
6921 | } | ||
6922 | BTRFS_I(inode)->index_cnt = group->key.objectid; | ||
6923 | |||
6924 | err = btrfs_orphan_add(trans, inode); | ||
6925 | out: | ||
6926 | btrfs_end_transaction(trans, root); | ||
6927 | if (err) { | ||
6928 | if (inode) | ||
6929 | iput(inode); | ||
6930 | inode = ERR_PTR(err); | ||
6931 | } | ||
6932 | return inode; | ||
6933 | } | ||
6934 | |||
6935 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) | ||
6936 | { | ||
6937 | |||
6938 | struct btrfs_ordered_sum *sums; | ||
6939 | struct btrfs_sector_sum *sector_sum; | ||
6940 | struct btrfs_ordered_extent *ordered; | ||
6941 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
6942 | struct list_head list; | ||
6943 | size_t offset; | ||
6944 | int ret; | ||
6945 | u64 disk_bytenr; | ||
6946 | |||
6947 | INIT_LIST_HEAD(&list); | ||
6948 | |||
6949 | ordered = btrfs_lookup_ordered_extent(inode, file_pos); | ||
6950 | BUG_ON(ordered->file_offset != file_pos || ordered->len != len); | ||
6951 | |||
6952 | disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; | ||
6953 | ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, | ||
6954 | disk_bytenr + len - 1, &list); | ||
6955 | |||
6956 | while (!list_empty(&list)) { | ||
6957 | sums = list_entry(list.next, struct btrfs_ordered_sum, list); | ||
6958 | list_del_init(&sums->list); | ||
6959 | |||
6960 | sector_sum = sums->sums; | ||
6961 | sums->bytenr = ordered->start; | ||
6962 | 7190 | ||
6963 | offset = 0; | 7191 | full = space_info->full; |
6964 | while (offset < sums->len) { | ||
6965 | sector_sum->bytenr += ordered->start - disk_bytenr; | ||
6966 | sector_sum++; | ||
6967 | offset += root->sectorsize; | ||
6968 | } | ||
6969 | 7192 | ||
6970 | btrfs_add_ordered_sum(inode, ordered, sums); | 7193 | /* |
7194 | * if this is the last block group we have in this space, we can't | ||
7195 | * relocate it unless we're able to allocate a new chunk below. | ||
7196 | * | ||
7197 | * Otherwise, we need to make sure we have room in the space to handle | ||
7198 | * all of the extents from this block group. If we can, we're good | ||
7199 | */ | ||
7200 | if ((space_info->total_bytes != block_group->key.offset) && | ||
7201 | (space_info->bytes_used + space_info->bytes_reserved + | ||
7202 | space_info->bytes_pinned + space_info->bytes_readonly + | ||
7203 | btrfs_block_group_used(&block_group->item) < | ||
7204 | space_info->total_bytes)) { | ||
7205 | spin_unlock(&space_info->lock); | ||
7206 | goto out; | ||
6971 | } | 7207 | } |
6972 | btrfs_put_ordered_extent(ordered); | 7208 | spin_unlock(&space_info->lock); |
6973 | return 0; | ||
6974 | } | ||
6975 | |||
6976 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) | ||
6977 | { | ||
6978 | struct btrfs_trans_handle *trans; | ||
6979 | struct btrfs_path *path; | ||
6980 | struct btrfs_fs_info *info = root->fs_info; | ||
6981 | struct extent_buffer *leaf; | ||
6982 | struct inode *reloc_inode; | ||
6983 | struct btrfs_block_group_cache *block_group; | ||
6984 | struct btrfs_key key; | ||
6985 | u64 skipped; | ||
6986 | u64 cur_byte; | ||
6987 | u64 total_found; | ||
6988 | u32 nritems; | ||
6989 | int ret; | ||
6990 | int progress; | ||
6991 | int pass = 0; | ||
6992 | |||
6993 | root = root->fs_info->extent_root; | ||
6994 | |||
6995 | block_group = btrfs_lookup_block_group(info, group_start); | ||
6996 | BUG_ON(!block_group); | ||
6997 | |||
6998 | printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n", | ||
6999 | (unsigned long long)block_group->key.objectid, | ||
7000 | (unsigned long long)block_group->flags); | ||
7001 | |||
7002 | path = btrfs_alloc_path(); | ||
7003 | BUG_ON(!path); | ||
7004 | |||
7005 | reloc_inode = create_reloc_inode(info, block_group); | ||
7006 | BUG_ON(IS_ERR(reloc_inode)); | ||
7007 | |||
7008 | __alloc_chunk_for_shrink(root, block_group, 1); | ||
7009 | set_block_group_readonly(block_group); | ||
7010 | |||
7011 | btrfs_start_delalloc_inodes(info->tree_root); | ||
7012 | btrfs_wait_ordered_extents(info->tree_root, 0); | ||
7013 | again: | ||
7014 | skipped = 0; | ||
7015 | total_found = 0; | ||
7016 | progress = 0; | ||
7017 | key.objectid = block_group->key.objectid; | ||
7018 | key.offset = 0; | ||
7019 | key.type = 0; | ||
7020 | cur_byte = key.objectid; | ||
7021 | |||
7022 | trans = btrfs_start_transaction(info->tree_root, 1); | ||
7023 | btrfs_commit_transaction(trans, info->tree_root); | ||
7024 | 7209 | ||
7025 | mutex_lock(&root->fs_info->cleaner_mutex); | 7210 | /* |
7026 | btrfs_clean_old_snapshots(info->tree_root); | 7211 | * ok we don't have enough space, but maybe we have free space on our |
7027 | btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); | 7212 | * devices to allocate new chunks for relocation, so loop through our |
7028 | mutex_unlock(&root->fs_info->cleaner_mutex); | 7213 | * alloc devices and guess if we have enough space. However, if we |
7214 | * were marked as full, then we know there aren't enough chunks, and we | ||
7215 | * can just return. | ||
7216 | */ | ||
7217 | ret = -1; | ||
7218 | if (full) | ||
7219 | goto out; | ||
7029 | 7220 | ||
7030 | trans = btrfs_start_transaction(info->tree_root, 1); | 7221 | mutex_lock(&root->fs_info->chunk_mutex); |
7031 | btrfs_commit_transaction(trans, info->tree_root); | 7222 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
7223 | u64 min_free = btrfs_block_group_used(&block_group->item); | ||
7224 | u64 dev_offset, max_avail; | ||
7032 | 7225 | ||
7033 | while (1) { | 7226 | /* |
7034 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 7227 | * check to make sure we can actually find a chunk with enough |
7035 | if (ret < 0) | 7228 | * space to fit our block group in. |
7036 | goto out; | 7229 | */ |
7037 | next: | 7230 | if (device->total_bytes > device->bytes_used + min_free) { |
7038 | leaf = path->nodes[0]; | 7231 | ret = find_free_dev_extent(NULL, device, min_free, |
7039 | nritems = btrfs_header_nritems(leaf); | 7232 | &dev_offset, &max_avail); |
7040 | if (path->slots[0] >= nritems) { | 7233 | if (!ret) |
7041 | ret = btrfs_next_leaf(root, path); | ||
7042 | if (ret < 0) | ||
7043 | goto out; | ||
7044 | if (ret == 1) { | ||
7045 | ret = 0; | ||
7046 | break; | 7234 | break; |
7047 | } | 7235 | ret = -1; |
7048 | leaf = path->nodes[0]; | ||
7049 | nritems = btrfs_header_nritems(leaf); | ||
7050 | } | ||
7051 | |||
7052 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
7053 | |||
7054 | if (key.objectid >= block_group->key.objectid + | ||
7055 | block_group->key.offset) | ||
7056 | break; | ||
7057 | |||
7058 | if (progress && need_resched()) { | ||
7059 | btrfs_release_path(root, path); | ||
7060 | cond_resched(); | ||
7061 | progress = 0; | ||
7062 | continue; | ||
7063 | } | ||
7064 | progress = 1; | ||
7065 | |||
7066 | if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY || | ||
7067 | key.objectid + key.offset <= cur_byte) { | ||
7068 | path->slots[0]++; | ||
7069 | goto next; | ||
7070 | } | 7236 | } |
7071 | |||
7072 | total_found++; | ||
7073 | cur_byte = key.objectid + key.offset; | ||
7074 | btrfs_release_path(root, path); | ||
7075 | |||
7076 | __alloc_chunk_for_shrink(root, block_group, 0); | ||
7077 | ret = relocate_one_extent(root, path, &key, block_group, | ||
7078 | reloc_inode, pass); | ||
7079 | BUG_ON(ret < 0); | ||
7080 | if (ret > 0) | ||
7081 | skipped++; | ||
7082 | |||
7083 | key.objectid = cur_byte; | ||
7084 | key.type = 0; | ||
7085 | key.offset = 0; | ||
7086 | } | 7237 | } |
7087 | 7238 | mutex_unlock(&root->fs_info->chunk_mutex); | |
7088 | btrfs_release_path(root, path); | ||
7089 | |||
7090 | if (pass == 0) { | ||
7091 | btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); | ||
7092 | invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1); | ||
7093 | } | ||
7094 | |||
7095 | if (total_found > 0) { | ||
7096 | printk(KERN_INFO "btrfs found %llu extents in pass %d\n", | ||
7097 | (unsigned long long)total_found, pass); | ||
7098 | pass++; | ||
7099 | if (total_found == skipped && pass > 2) { | ||
7100 | iput(reloc_inode); | ||
7101 | reloc_inode = create_reloc_inode(info, block_group); | ||
7102 | pass = 0; | ||
7103 | } | ||
7104 | goto again; | ||
7105 | } | ||
7106 | |||
7107 | /* delete reloc_inode */ | ||
7108 | iput(reloc_inode); | ||
7109 | |||
7110 | /* unpin extents in this range */ | ||
7111 | trans = btrfs_start_transaction(info->tree_root, 1); | ||
7112 | btrfs_commit_transaction(trans, info->tree_root); | ||
7113 | |||
7114 | spin_lock(&block_group->lock); | ||
7115 | WARN_ON(block_group->pinned > 0); | ||
7116 | WARN_ON(block_group->reserved > 0); | ||
7117 | WARN_ON(btrfs_block_group_used(&block_group->item) > 0); | ||
7118 | spin_unlock(&block_group->lock); | ||
7119 | btrfs_put_block_group(block_group); | ||
7120 | ret = 0; | ||
7121 | out: | 7239 | out: |
7122 | btrfs_free_path(path); | 7240 | btrfs_put_block_group(block_group); |
7123 | return ret; | 7241 | return ret; |
7124 | } | 7242 | } |
7125 | #endif | ||
7126 | 7243 | ||
7127 | static int find_first_block_group(struct btrfs_root *root, | 7244 | static int find_first_block_group(struct btrfs_root *root, |
7128 | struct btrfs_path *path, struct btrfs_key *key) | 7245 | struct btrfs_path *path, struct btrfs_key *key) |
@@ -7165,8 +7282,18 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
7165 | { | 7282 | { |
7166 | struct btrfs_block_group_cache *block_group; | 7283 | struct btrfs_block_group_cache *block_group; |
7167 | struct btrfs_space_info *space_info; | 7284 | struct btrfs_space_info *space_info; |
7285 | struct btrfs_caching_control *caching_ctl; | ||
7168 | struct rb_node *n; | 7286 | struct rb_node *n; |
7169 | 7287 | ||
7288 | down_write(&info->extent_commit_sem); | ||
7289 | while (!list_empty(&info->caching_block_groups)) { | ||
7290 | caching_ctl = list_entry(info->caching_block_groups.next, | ||
7291 | struct btrfs_caching_control, list); | ||
7292 | list_del(&caching_ctl->list); | ||
7293 | put_caching_control(caching_ctl); | ||
7294 | } | ||
7295 | up_write(&info->extent_commit_sem); | ||
7296 | |||
7170 | spin_lock(&info->block_group_cache_lock); | 7297 | spin_lock(&info->block_group_cache_lock); |
7171 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { | 7298 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { |
7172 | block_group = rb_entry(n, struct btrfs_block_group_cache, | 7299 | block_group = rb_entry(n, struct btrfs_block_group_cache, |
@@ -7180,8 +7307,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
7180 | up_write(&block_group->space_info->groups_sem); | 7307 | up_write(&block_group->space_info->groups_sem); |
7181 | 7308 | ||
7182 | if (block_group->cached == BTRFS_CACHE_STARTED) | 7309 | if (block_group->cached == BTRFS_CACHE_STARTED) |
7183 | wait_event(block_group->caching_q, | 7310 | wait_block_group_cache_done(block_group); |
7184 | block_group_cache_done(block_group)); | ||
7185 | 7311 | ||
7186 | btrfs_remove_free_space_cache(block_group); | 7312 | btrfs_remove_free_space_cache(block_group); |
7187 | 7313 | ||
@@ -7251,7 +7377,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7251 | spin_lock_init(&cache->lock); | 7377 | spin_lock_init(&cache->lock); |
7252 | spin_lock_init(&cache->tree_lock); | 7378 | spin_lock_init(&cache->tree_lock); |
7253 | cache->fs_info = info; | 7379 | cache->fs_info = info; |
7254 | init_waitqueue_head(&cache->caching_q); | ||
7255 | INIT_LIST_HEAD(&cache->list); | 7380 | INIT_LIST_HEAD(&cache->list); |
7256 | INIT_LIST_HEAD(&cache->cluster_list); | 7381 | INIT_LIST_HEAD(&cache->cluster_list); |
7257 | 7382 | ||
@@ -7273,8 +7398,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7273 | cache->flags = btrfs_block_group_flags(&cache->item); | 7398 | cache->flags = btrfs_block_group_flags(&cache->item); |
7274 | cache->sectorsize = root->sectorsize; | 7399 | cache->sectorsize = root->sectorsize; |
7275 | 7400 | ||
7276 | remove_sb_from_cache(root, cache); | ||
7277 | |||
7278 | /* | 7401 | /* |
7279 | * check for two cases, either we are full, and therefore | 7402 | * check for two cases, either we are full, and therefore |
7280 | * don't need to bother with the caching work since we won't | 7403 | * don't need to bother with the caching work since we won't |
@@ -7283,13 +7406,19 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7283 | * time, particularly in the full case. | 7406 | * time, particularly in the full case. |
7284 | */ | 7407 | */ |
7285 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { | 7408 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { |
7409 | exclude_super_stripes(root, cache); | ||
7410 | cache->last_byte_to_unpin = (u64)-1; | ||
7286 | cache->cached = BTRFS_CACHE_FINISHED; | 7411 | cache->cached = BTRFS_CACHE_FINISHED; |
7412 | free_excluded_extents(root, cache); | ||
7287 | } else if (btrfs_block_group_used(&cache->item) == 0) { | 7413 | } else if (btrfs_block_group_used(&cache->item) == 0) { |
7414 | exclude_super_stripes(root, cache); | ||
7415 | cache->last_byte_to_unpin = (u64)-1; | ||
7288 | cache->cached = BTRFS_CACHE_FINISHED; | 7416 | cache->cached = BTRFS_CACHE_FINISHED; |
7289 | add_new_free_space(cache, root->fs_info, | 7417 | add_new_free_space(cache, root->fs_info, |
7290 | found_key.objectid, | 7418 | found_key.objectid, |
7291 | found_key.objectid + | 7419 | found_key.objectid + |
7292 | found_key.offset); | 7420 | found_key.offset); |
7421 | free_excluded_extents(root, cache); | ||
7293 | } | 7422 | } |
7294 | 7423 | ||
7295 | ret = update_space_info(info, cache->flags, found_key.offset, | 7424 | ret = update_space_info(info, cache->flags, found_key.offset, |
@@ -7297,6 +7426,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7297 | &space_info); | 7426 | &space_info); |
7298 | BUG_ON(ret); | 7427 | BUG_ON(ret); |
7299 | cache->space_info = space_info; | 7428 | cache->space_info = space_info; |
7429 | spin_lock(&cache->space_info->lock); | ||
7430 | cache->space_info->bytes_super += cache->bytes_super; | ||
7431 | spin_unlock(&cache->space_info->lock); | ||
7432 | |||
7300 | down_write(&space_info->groups_sem); | 7433 | down_write(&space_info->groups_sem); |
7301 | list_add_tail(&cache->list, &space_info->block_groups); | 7434 | list_add_tail(&cache->list, &space_info->block_groups); |
7302 | up_write(&space_info->groups_sem); | 7435 | up_write(&space_info->groups_sem); |
@@ -7346,7 +7479,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7346 | atomic_set(&cache->count, 1); | 7479 | atomic_set(&cache->count, 1); |
7347 | spin_lock_init(&cache->lock); | 7480 | spin_lock_init(&cache->lock); |
7348 | spin_lock_init(&cache->tree_lock); | 7481 | spin_lock_init(&cache->tree_lock); |
7349 | init_waitqueue_head(&cache->caching_q); | ||
7350 | INIT_LIST_HEAD(&cache->list); | 7482 | INIT_LIST_HEAD(&cache->list); |
7351 | INIT_LIST_HEAD(&cache->cluster_list); | 7483 | INIT_LIST_HEAD(&cache->cluster_list); |
7352 | 7484 | ||
@@ -7355,15 +7487,23 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7355 | cache->flags = type; | 7487 | cache->flags = type; |
7356 | btrfs_set_block_group_flags(&cache->item, type); | 7488 | btrfs_set_block_group_flags(&cache->item, type); |
7357 | 7489 | ||
7490 | cache->last_byte_to_unpin = (u64)-1; | ||
7358 | cache->cached = BTRFS_CACHE_FINISHED; | 7491 | cache->cached = BTRFS_CACHE_FINISHED; |
7359 | remove_sb_from_cache(root, cache); | 7492 | exclude_super_stripes(root, cache); |
7360 | 7493 | ||
7361 | add_new_free_space(cache, root->fs_info, chunk_offset, | 7494 | add_new_free_space(cache, root->fs_info, chunk_offset, |
7362 | chunk_offset + size); | 7495 | chunk_offset + size); |
7363 | 7496 | ||
7497 | free_excluded_extents(root, cache); | ||
7498 | |||
7364 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, | 7499 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, |
7365 | &cache->space_info); | 7500 | &cache->space_info); |
7366 | BUG_ON(ret); | 7501 | BUG_ON(ret); |
7502 | |||
7503 | spin_lock(&cache->space_info->lock); | ||
7504 | cache->space_info->bytes_super += cache->bytes_super; | ||
7505 | spin_unlock(&cache->space_info->lock); | ||
7506 | |||
7367 | down_write(&cache->space_info->groups_sem); | 7507 | down_write(&cache->space_info->groups_sem); |
7368 | list_add_tail(&cache->list, &cache->space_info->block_groups); | 7508 | list_add_tail(&cache->list, &cache->space_info->block_groups); |
7369 | up_write(&cache->space_info->groups_sem); | 7509 | up_write(&cache->space_info->groups_sem); |
@@ -7429,8 +7569,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7429 | up_write(&block_group->space_info->groups_sem); | 7569 | up_write(&block_group->space_info->groups_sem); |
7430 | 7570 | ||
7431 | if (block_group->cached == BTRFS_CACHE_STARTED) | 7571 | if (block_group->cached == BTRFS_CACHE_STARTED) |
7432 | wait_event(block_group->caching_q, | 7572 | wait_block_group_cache_done(block_group); |
7433 | block_group_cache_done(block_group)); | ||
7434 | 7573 | ||
7435 | btrfs_remove_free_space_cache(block_group); | 7574 | btrfs_remove_free_space_cache(block_group); |
7436 | 7575 | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 68260180f587..96577e8bf9fd 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -280,6 +280,14 @@ static struct extent_buffer *buffer_search(struct extent_io_tree *tree, | |||
280 | return NULL; | 280 | return NULL; |
281 | } | 281 | } |
282 | 282 | ||
283 | static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, | ||
284 | struct extent_state *other) | ||
285 | { | ||
286 | if (tree->ops && tree->ops->merge_extent_hook) | ||
287 | tree->ops->merge_extent_hook(tree->mapping->host, new, | ||
288 | other); | ||
289 | } | ||
290 | |||
283 | /* | 291 | /* |
284 | * utility function to look for merge candidates inside a given range. | 292 | * utility function to look for merge candidates inside a given range. |
285 | * Any extents with matching state are merged together into a single | 293 | * Any extents with matching state are merged together into a single |
@@ -303,6 +311,7 @@ static int merge_state(struct extent_io_tree *tree, | |||
303 | other = rb_entry(other_node, struct extent_state, rb_node); | 311 | other = rb_entry(other_node, struct extent_state, rb_node); |
304 | if (other->end == state->start - 1 && | 312 | if (other->end == state->start - 1 && |
305 | other->state == state->state) { | 313 | other->state == state->state) { |
314 | merge_cb(tree, state, other); | ||
306 | state->start = other->start; | 315 | state->start = other->start; |
307 | other->tree = NULL; | 316 | other->tree = NULL; |
308 | rb_erase(&other->rb_node, &tree->state); | 317 | rb_erase(&other->rb_node, &tree->state); |
@@ -314,33 +323,37 @@ static int merge_state(struct extent_io_tree *tree, | |||
314 | other = rb_entry(other_node, struct extent_state, rb_node); | 323 | other = rb_entry(other_node, struct extent_state, rb_node); |
315 | if (other->start == state->end + 1 && | 324 | if (other->start == state->end + 1 && |
316 | other->state == state->state) { | 325 | other->state == state->state) { |
326 | merge_cb(tree, state, other); | ||
317 | other->start = state->start; | 327 | other->start = state->start; |
318 | state->tree = NULL; | 328 | state->tree = NULL; |
319 | rb_erase(&state->rb_node, &tree->state); | 329 | rb_erase(&state->rb_node, &tree->state); |
320 | free_extent_state(state); | 330 | free_extent_state(state); |
331 | state = NULL; | ||
321 | } | 332 | } |
322 | } | 333 | } |
334 | |||
323 | return 0; | 335 | return 0; |
324 | } | 336 | } |
325 | 337 | ||
326 | static void set_state_cb(struct extent_io_tree *tree, | 338 | static int set_state_cb(struct extent_io_tree *tree, |
327 | struct extent_state *state, | 339 | struct extent_state *state, |
328 | unsigned long bits) | 340 | unsigned long bits) |
329 | { | 341 | { |
330 | if (tree->ops && tree->ops->set_bit_hook) { | 342 | if (tree->ops && tree->ops->set_bit_hook) { |
331 | tree->ops->set_bit_hook(tree->mapping->host, state->start, | 343 | return tree->ops->set_bit_hook(tree->mapping->host, |
332 | state->end, state->state, bits); | 344 | state->start, state->end, |
345 | state->state, bits); | ||
333 | } | 346 | } |
347 | |||
348 | return 0; | ||
334 | } | 349 | } |
335 | 350 | ||
336 | static void clear_state_cb(struct extent_io_tree *tree, | 351 | static void clear_state_cb(struct extent_io_tree *tree, |
337 | struct extent_state *state, | 352 | struct extent_state *state, |
338 | unsigned long bits) | 353 | unsigned long bits) |
339 | { | 354 | { |
340 | if (tree->ops && tree->ops->clear_bit_hook) { | 355 | if (tree->ops && tree->ops->clear_bit_hook) |
341 | tree->ops->clear_bit_hook(tree->mapping->host, state->start, | 356 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); |
342 | state->end, state->state, bits); | ||
343 | } | ||
344 | } | 357 | } |
345 | 358 | ||
346 | /* | 359 | /* |
@@ -358,6 +371,7 @@ static int insert_state(struct extent_io_tree *tree, | |||
358 | int bits) | 371 | int bits) |
359 | { | 372 | { |
360 | struct rb_node *node; | 373 | struct rb_node *node; |
374 | int ret; | ||
361 | 375 | ||
362 | if (end < start) { | 376 | if (end < start) { |
363 | printk(KERN_ERR "btrfs end < start %llu %llu\n", | 377 | printk(KERN_ERR "btrfs end < start %llu %llu\n", |
@@ -365,12 +379,15 @@ static int insert_state(struct extent_io_tree *tree, | |||
365 | (unsigned long long)start); | 379 | (unsigned long long)start); |
366 | WARN_ON(1); | 380 | WARN_ON(1); |
367 | } | 381 | } |
382 | state->start = start; | ||
383 | state->end = end; | ||
384 | ret = set_state_cb(tree, state, bits); | ||
385 | if (ret) | ||
386 | return ret; | ||
387 | |||
368 | if (bits & EXTENT_DIRTY) | 388 | if (bits & EXTENT_DIRTY) |
369 | tree->dirty_bytes += end - start + 1; | 389 | tree->dirty_bytes += end - start + 1; |
370 | set_state_cb(tree, state, bits); | ||
371 | state->state |= bits; | 390 | state->state |= bits; |
372 | state->start = start; | ||
373 | state->end = end; | ||
374 | node = tree_insert(&tree->state, end, &state->rb_node); | 391 | node = tree_insert(&tree->state, end, &state->rb_node); |
375 | if (node) { | 392 | if (node) { |
376 | struct extent_state *found; | 393 | struct extent_state *found; |
@@ -387,6 +404,15 @@ static int insert_state(struct extent_io_tree *tree, | |||
387 | return 0; | 404 | return 0; |
388 | } | 405 | } |
389 | 406 | ||
407 | static int split_cb(struct extent_io_tree *tree, struct extent_state *orig, | ||
408 | u64 split) | ||
409 | { | ||
410 | if (tree->ops && tree->ops->split_extent_hook) | ||
411 | return tree->ops->split_extent_hook(tree->mapping->host, | ||
412 | orig, split); | ||
413 | return 0; | ||
414 | } | ||
415 | |||
390 | /* | 416 | /* |
391 | * split a given extent state struct in two, inserting the preallocated | 417 | * split a given extent state struct in two, inserting the preallocated |
392 | * struct 'prealloc' as the newly created second half. 'split' indicates an | 418 | * struct 'prealloc' as the newly created second half. 'split' indicates an |
@@ -405,6 +431,9 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
405 | struct extent_state *prealloc, u64 split) | 431 | struct extent_state *prealloc, u64 split) |
406 | { | 432 | { |
407 | struct rb_node *node; | 433 | struct rb_node *node; |
434 | |||
435 | split_cb(tree, orig, split); | ||
436 | |||
408 | prealloc->start = orig->start; | 437 | prealloc->start = orig->start; |
409 | prealloc->end = split - 1; | 438 | prealloc->end = split - 1; |
410 | prealloc->state = orig->state; | 439 | prealloc->state = orig->state; |
@@ -431,7 +460,8 @@ static int clear_state_bit(struct extent_io_tree *tree, | |||
431 | struct extent_state *state, int bits, int wake, | 460 | struct extent_state *state, int bits, int wake, |
432 | int delete) | 461 | int delete) |
433 | { | 462 | { |
434 | int ret = state->state & bits; | 463 | int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING; |
464 | int ret = state->state & bits_to_clear; | ||
435 | 465 | ||
436 | if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { | 466 | if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { |
437 | u64 range = state->end - state->start + 1; | 467 | u64 range = state->end - state->start + 1; |
@@ -439,7 +469,7 @@ static int clear_state_bit(struct extent_io_tree *tree, | |||
439 | tree->dirty_bytes -= range; | 469 | tree->dirty_bytes -= range; |
440 | } | 470 | } |
441 | clear_state_cb(tree, state, bits); | 471 | clear_state_cb(tree, state, bits); |
442 | state->state &= ~bits; | 472 | state->state &= ~bits_to_clear; |
443 | if (wake) | 473 | if (wake) |
444 | wake_up(&state->wq); | 474 | wake_up(&state->wq); |
445 | if (delete || state->state == 0) { | 475 | if (delete || state->state == 0) { |
@@ -471,10 +501,14 @@ static int clear_state_bit(struct extent_io_tree *tree, | |||
471 | * bits were already set, or zero if none of the bits were already set. | 501 | * bits were already set, or zero if none of the bits were already set. |
472 | */ | 502 | */ |
473 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 503 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
474 | int bits, int wake, int delete, gfp_t mask) | 504 | int bits, int wake, int delete, |
505 | struct extent_state **cached_state, | ||
506 | gfp_t mask) | ||
475 | { | 507 | { |
476 | struct extent_state *state; | 508 | struct extent_state *state; |
509 | struct extent_state *cached; | ||
477 | struct extent_state *prealloc = NULL; | 510 | struct extent_state *prealloc = NULL; |
511 | struct rb_node *next_node; | ||
478 | struct rb_node *node; | 512 | struct rb_node *node; |
479 | u64 last_end; | 513 | u64 last_end; |
480 | int err; | 514 | int err; |
@@ -488,6 +522,17 @@ again: | |||
488 | } | 522 | } |
489 | 523 | ||
490 | spin_lock(&tree->lock); | 524 | spin_lock(&tree->lock); |
525 | if (cached_state) { | ||
526 | cached = *cached_state; | ||
527 | *cached_state = NULL; | ||
528 | cached_state = NULL; | ||
529 | if (cached && cached->tree && cached->start == start) { | ||
530 | atomic_dec(&cached->refs); | ||
531 | state = cached; | ||
532 | goto hit_next; | ||
533 | } | ||
534 | free_extent_state(cached); | ||
535 | } | ||
491 | /* | 536 | /* |
492 | * this search will find the extents that end after | 537 | * this search will find the extents that end after |
493 | * our range starts | 538 | * our range starts |
@@ -496,6 +541,7 @@ again: | |||
496 | if (!node) | 541 | if (!node) |
497 | goto out; | 542 | goto out; |
498 | state = rb_entry(node, struct extent_state, rb_node); | 543 | state = rb_entry(node, struct extent_state, rb_node); |
544 | hit_next: | ||
499 | if (state->start > end) | 545 | if (state->start > end) |
500 | goto out; | 546 | goto out; |
501 | WARN_ON(state->end < start); | 547 | WARN_ON(state->end < start); |
@@ -526,13 +572,11 @@ again: | |||
526 | if (err) | 572 | if (err) |
527 | goto out; | 573 | goto out; |
528 | if (state->end <= end) { | 574 | if (state->end <= end) { |
529 | set |= clear_state_bit(tree, state, bits, | 575 | set |= clear_state_bit(tree, state, bits, wake, |
530 | wake, delete); | 576 | delete); |
531 | if (last_end == (u64)-1) | 577 | if (last_end == (u64)-1) |
532 | goto out; | 578 | goto out; |
533 | start = last_end + 1; | 579 | start = last_end + 1; |
534 | } else { | ||
535 | start = state->start; | ||
536 | } | 580 | } |
537 | goto search_again; | 581 | goto search_again; |
538 | } | 582 | } |
@@ -547,19 +591,30 @@ again: | |||
547 | prealloc = alloc_extent_state(GFP_ATOMIC); | 591 | prealloc = alloc_extent_state(GFP_ATOMIC); |
548 | err = split_state(tree, state, prealloc, end + 1); | 592 | err = split_state(tree, state, prealloc, end + 1); |
549 | BUG_ON(err == -EEXIST); | 593 | BUG_ON(err == -EEXIST); |
550 | |||
551 | if (wake) | 594 | if (wake) |
552 | wake_up(&state->wq); | 595 | wake_up(&state->wq); |
553 | set |= clear_state_bit(tree, prealloc, bits, | 596 | |
554 | wake, delete); | 597 | set |= clear_state_bit(tree, prealloc, bits, wake, delete); |
598 | |||
555 | prealloc = NULL; | 599 | prealloc = NULL; |
556 | goto out; | 600 | goto out; |
557 | } | 601 | } |
558 | 602 | ||
603 | if (state->end < end && prealloc && !need_resched()) | ||
604 | next_node = rb_next(&state->rb_node); | ||
605 | else | ||
606 | next_node = NULL; | ||
607 | |||
559 | set |= clear_state_bit(tree, state, bits, wake, delete); | 608 | set |= clear_state_bit(tree, state, bits, wake, delete); |
560 | if (last_end == (u64)-1) | 609 | if (last_end == (u64)-1) |
561 | goto out; | 610 | goto out; |
562 | start = last_end + 1; | 611 | start = last_end + 1; |
612 | if (start <= end && next_node) { | ||
613 | state = rb_entry(next_node, struct extent_state, | ||
614 | rb_node); | ||
615 | if (state->start == start) | ||
616 | goto hit_next; | ||
617 | } | ||
563 | goto search_again; | 618 | goto search_again; |
564 | 619 | ||
565 | out: | 620 | out: |
@@ -641,40 +696,59 @@ out: | |||
641 | return 0; | 696 | return 0; |
642 | } | 697 | } |
643 | 698 | ||
644 | static void set_state_bits(struct extent_io_tree *tree, | 699 | static int set_state_bits(struct extent_io_tree *tree, |
645 | struct extent_state *state, | 700 | struct extent_state *state, |
646 | int bits) | 701 | int bits) |
647 | { | 702 | { |
703 | int ret; | ||
704 | |||
705 | ret = set_state_cb(tree, state, bits); | ||
706 | if (ret) | ||
707 | return ret; | ||
708 | |||
648 | if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | 709 | if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { |
649 | u64 range = state->end - state->start + 1; | 710 | u64 range = state->end - state->start + 1; |
650 | tree->dirty_bytes += range; | 711 | tree->dirty_bytes += range; |
651 | } | 712 | } |
652 | set_state_cb(tree, state, bits); | ||
653 | state->state |= bits; | 713 | state->state |= bits; |
714 | |||
715 | return 0; | ||
716 | } | ||
717 | |||
718 | static void cache_state(struct extent_state *state, | ||
719 | struct extent_state **cached_ptr) | ||
720 | { | ||
721 | if (cached_ptr && !(*cached_ptr)) { | ||
722 | if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) { | ||
723 | *cached_ptr = state; | ||
724 | atomic_inc(&state->refs); | ||
725 | } | ||
726 | } | ||
654 | } | 727 | } |
655 | 728 | ||
656 | /* | 729 | /* |
657 | * set some bits on a range in the tree. This may require allocations | 730 | * set some bits on a range in the tree. This may require allocations or |
658 | * or sleeping, so the gfp mask is used to indicate what is allowed. | 731 | * sleeping, so the gfp mask is used to indicate what is allowed. |
659 | * | 732 | * |
660 | * If 'exclusive' == 1, this will fail with -EEXIST if some part of the | 733 | * If any of the exclusive bits are set, this will fail with -EEXIST if some |
661 | * range already has the desired bits set. The start of the existing | 734 | * part of the range already has the desired bits set. The start of the |
662 | * range is returned in failed_start in this case. | 735 | * existing range is returned in failed_start in this case. |
663 | * | 736 | * |
664 | * [start, end] is inclusive | 737 | * [start, end] is inclusive This takes the tree lock. |
665 | * This takes the tree lock. | ||
666 | */ | 738 | */ |
739 | |||
667 | static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 740 | static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
668 | int bits, int exclusive, u64 *failed_start, | 741 | int bits, int exclusive_bits, u64 *failed_start, |
742 | struct extent_state **cached_state, | ||
669 | gfp_t mask) | 743 | gfp_t mask) |
670 | { | 744 | { |
671 | struct extent_state *state; | 745 | struct extent_state *state; |
672 | struct extent_state *prealloc = NULL; | 746 | struct extent_state *prealloc = NULL; |
673 | struct rb_node *node; | 747 | struct rb_node *node; |
674 | int err = 0; | 748 | int err = 0; |
675 | int set; | ||
676 | u64 last_start; | 749 | u64 last_start; |
677 | u64 last_end; | 750 | u64 last_end; |
751 | |||
678 | again: | 752 | again: |
679 | if (!prealloc && (mask & __GFP_WAIT)) { | 753 | if (!prealloc && (mask & __GFP_WAIT)) { |
680 | prealloc = alloc_extent_state(mask); | 754 | prealloc = alloc_extent_state(mask); |
@@ -683,6 +757,13 @@ again: | |||
683 | } | 757 | } |
684 | 758 | ||
685 | spin_lock(&tree->lock); | 759 | spin_lock(&tree->lock); |
760 | if (cached_state && *cached_state) { | ||
761 | state = *cached_state; | ||
762 | if (state->start == start && state->tree) { | ||
763 | node = &state->rb_node; | ||
764 | goto hit_next; | ||
765 | } | ||
766 | } | ||
686 | /* | 767 | /* |
687 | * this search will find all the extents that end after | 768 | * this search will find all the extents that end after |
688 | * our range starts. | 769 | * our range starts. |
@@ -694,8 +775,8 @@ again: | |||
694 | BUG_ON(err == -EEXIST); | 775 | BUG_ON(err == -EEXIST); |
695 | goto out; | 776 | goto out; |
696 | } | 777 | } |
697 | |||
698 | state = rb_entry(node, struct extent_state, rb_node); | 778 | state = rb_entry(node, struct extent_state, rb_node); |
779 | hit_next: | ||
699 | last_start = state->start; | 780 | last_start = state->start; |
700 | last_end = state->end; | 781 | last_end = state->end; |
701 | 782 | ||
@@ -706,17 +787,32 @@ again: | |||
706 | * Just lock what we found and keep going | 787 | * Just lock what we found and keep going |
707 | */ | 788 | */ |
708 | if (state->start == start && state->end <= end) { | 789 | if (state->start == start && state->end <= end) { |
709 | set = state->state & bits; | 790 | struct rb_node *next_node; |
710 | if (set && exclusive) { | 791 | if (state->state & exclusive_bits) { |
711 | *failed_start = state->start; | 792 | *failed_start = state->start; |
712 | err = -EEXIST; | 793 | err = -EEXIST; |
713 | goto out; | 794 | goto out; |
714 | } | 795 | } |
715 | set_state_bits(tree, state, bits); | 796 | |
797 | err = set_state_bits(tree, state, bits); | ||
798 | if (err) | ||
799 | goto out; | ||
800 | |||
801 | cache_state(state, cached_state); | ||
716 | merge_state(tree, state); | 802 | merge_state(tree, state); |
717 | if (last_end == (u64)-1) | 803 | if (last_end == (u64)-1) |
718 | goto out; | 804 | goto out; |
805 | |||
719 | start = last_end + 1; | 806 | start = last_end + 1; |
807 | if (start < end && prealloc && !need_resched()) { | ||
808 | next_node = rb_next(node); | ||
809 | if (next_node) { | ||
810 | state = rb_entry(next_node, struct extent_state, | ||
811 | rb_node); | ||
812 | if (state->start == start) | ||
813 | goto hit_next; | ||
814 | } | ||
815 | } | ||
720 | goto search_again; | 816 | goto search_again; |
721 | } | 817 | } |
722 | 818 | ||
@@ -737,8 +833,7 @@ again: | |||
737 | * desired bit on it. | 833 | * desired bit on it. |
738 | */ | 834 | */ |
739 | if (state->start < start) { | 835 | if (state->start < start) { |
740 | set = state->state & bits; | 836 | if (state->state & exclusive_bits) { |
741 | if (exclusive && set) { | ||
742 | *failed_start = start; | 837 | *failed_start = start; |
743 | err = -EEXIST; | 838 | err = -EEXIST; |
744 | goto out; | 839 | goto out; |
@@ -749,13 +844,14 @@ again: | |||
749 | if (err) | 844 | if (err) |
750 | goto out; | 845 | goto out; |
751 | if (state->end <= end) { | 846 | if (state->end <= end) { |
752 | set_state_bits(tree, state, bits); | 847 | err = set_state_bits(tree, state, bits); |
848 | if (err) | ||
849 | goto out; | ||
850 | cache_state(state, cached_state); | ||
753 | merge_state(tree, state); | 851 | merge_state(tree, state); |
754 | if (last_end == (u64)-1) | 852 | if (last_end == (u64)-1) |
755 | goto out; | 853 | goto out; |
756 | start = last_end + 1; | 854 | start = last_end + 1; |
757 | } else { | ||
758 | start = state->start; | ||
759 | } | 855 | } |
760 | goto search_again; | 856 | goto search_again; |
761 | } | 857 | } |
@@ -774,10 +870,13 @@ again: | |||
774 | this_end = last_start - 1; | 870 | this_end = last_start - 1; |
775 | err = insert_state(tree, prealloc, start, this_end, | 871 | err = insert_state(tree, prealloc, start, this_end, |
776 | bits); | 872 | bits); |
777 | prealloc = NULL; | ||
778 | BUG_ON(err == -EEXIST); | 873 | BUG_ON(err == -EEXIST); |
779 | if (err) | 874 | if (err) { |
875 | prealloc = NULL; | ||
780 | goto out; | 876 | goto out; |
877 | } | ||
878 | cache_state(prealloc, cached_state); | ||
879 | prealloc = NULL; | ||
781 | start = this_end + 1; | 880 | start = this_end + 1; |
782 | goto search_again; | 881 | goto search_again; |
783 | } | 882 | } |
@@ -788,8 +887,7 @@ again: | |||
788 | * on the first half | 887 | * on the first half |
789 | */ | 888 | */ |
790 | if (state->start <= end && state->end > end) { | 889 | if (state->start <= end && state->end > end) { |
791 | set = state->state & bits; | 890 | if (state->state & exclusive_bits) { |
792 | if (exclusive && set) { | ||
793 | *failed_start = start; | 891 | *failed_start = start; |
794 | err = -EEXIST; | 892 | err = -EEXIST; |
795 | goto out; | 893 | goto out; |
@@ -797,7 +895,12 @@ again: | |||
797 | err = split_state(tree, state, prealloc, end + 1); | 895 | err = split_state(tree, state, prealloc, end + 1); |
798 | BUG_ON(err == -EEXIST); | 896 | BUG_ON(err == -EEXIST); |
799 | 897 | ||
800 | set_state_bits(tree, prealloc, bits); | 898 | err = set_state_bits(tree, prealloc, bits); |
899 | if (err) { | ||
900 | prealloc = NULL; | ||
901 | goto out; | ||
902 | } | ||
903 | cache_state(prealloc, cached_state); | ||
801 | merge_state(tree, prealloc); | 904 | merge_state(tree, prealloc); |
802 | prealloc = NULL; | 905 | prealloc = NULL; |
803 | goto out; | 906 | goto out; |
@@ -826,86 +929,65 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | |||
826 | gfp_t mask) | 929 | gfp_t mask) |
827 | { | 930 | { |
828 | return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, | 931 | return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, |
829 | mask); | 932 | NULL, mask); |
830 | } | ||
831 | |||
832 | int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, | ||
833 | gfp_t mask) | ||
834 | { | ||
835 | return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask); | ||
836 | } | 933 | } |
837 | 934 | ||
838 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 935 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
839 | int bits, gfp_t mask) | 936 | int bits, gfp_t mask) |
840 | { | 937 | { |
841 | return set_extent_bit(tree, start, end, bits, 0, NULL, | 938 | return set_extent_bit(tree, start, end, bits, 0, NULL, |
842 | mask); | 939 | NULL, mask); |
843 | } | 940 | } |
844 | 941 | ||
845 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 942 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
846 | int bits, gfp_t mask) | 943 | int bits, gfp_t mask) |
847 | { | 944 | { |
848 | return clear_extent_bit(tree, start, end, bits, 0, 0, mask); | 945 | return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask); |
849 | } | 946 | } |
850 | 947 | ||
851 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | 948 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, |
852 | gfp_t mask) | 949 | gfp_t mask) |
853 | { | 950 | { |
854 | return set_extent_bit(tree, start, end, | 951 | return set_extent_bit(tree, start, end, |
855 | EXTENT_DELALLOC | EXTENT_DIRTY, | 952 | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE, |
856 | 0, NULL, mask); | 953 | 0, NULL, NULL, mask); |
857 | } | 954 | } |
858 | 955 | ||
859 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 956 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
860 | gfp_t mask) | 957 | gfp_t mask) |
861 | { | 958 | { |
862 | return clear_extent_bit(tree, start, end, | 959 | return clear_extent_bit(tree, start, end, |
863 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); | 960 | EXTENT_DIRTY | EXTENT_DELALLOC | |
864 | } | 961 | EXTENT_DO_ACCOUNTING, 0, 0, |
865 | 962 | NULL, mask); | |
866 | int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, | ||
867 | gfp_t mask) | ||
868 | { | ||
869 | return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask); | ||
870 | } | 963 | } |
871 | 964 | ||
872 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 965 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
873 | gfp_t mask) | 966 | gfp_t mask) |
874 | { | 967 | { |
875 | return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, | 968 | return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, |
876 | mask); | 969 | NULL, mask); |
877 | } | 970 | } |
878 | 971 | ||
879 | static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 972 | static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
880 | gfp_t mask) | 973 | gfp_t mask) |
881 | { | 974 | { |
882 | return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); | 975 | return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, |
976 | NULL, mask); | ||
883 | } | 977 | } |
884 | 978 | ||
885 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 979 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
886 | gfp_t mask) | 980 | gfp_t mask) |
887 | { | 981 | { |
888 | return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, | 982 | return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, |
889 | mask); | 983 | NULL, mask); |
890 | } | 984 | } |
891 | 985 | ||
892 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, | 986 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, |
893 | u64 end, gfp_t mask) | 987 | u64 end, gfp_t mask) |
894 | { | 988 | { |
895 | return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); | 989 | return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, |
896 | } | 990 | NULL, mask); |
897 | |||
898 | static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, | ||
899 | gfp_t mask) | ||
900 | { | ||
901 | return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, | ||
902 | 0, NULL, mask); | ||
903 | } | ||
904 | |||
905 | static int clear_extent_writeback(struct extent_io_tree *tree, u64 start, | ||
906 | u64 end, gfp_t mask) | ||
907 | { | ||
908 | return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); | ||
909 | } | 991 | } |
910 | 992 | ||
911 | int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) | 993 | int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) |
@@ -917,13 +999,15 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) | |||
917 | * either insert or lock state struct between start and end use mask to tell | 999 | * either insert or lock state struct between start and end use mask to tell |
918 | * us if waiting is desired. | 1000 | * us if waiting is desired. |
919 | */ | 1001 | */ |
920 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) | 1002 | int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
1003 | int bits, struct extent_state **cached_state, gfp_t mask) | ||
921 | { | 1004 | { |
922 | int err; | 1005 | int err; |
923 | u64 failed_start; | 1006 | u64 failed_start; |
924 | while (1) { | 1007 | while (1) { |
925 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, | 1008 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits, |
926 | &failed_start, mask); | 1009 | EXTENT_LOCKED, &failed_start, |
1010 | cached_state, mask); | ||
927 | if (err == -EEXIST && (mask & __GFP_WAIT)) { | 1011 | if (err == -EEXIST && (mask & __GFP_WAIT)) { |
928 | wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); | 1012 | wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); |
929 | start = failed_start; | 1013 | start = failed_start; |
@@ -935,27 +1019,40 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) | |||
935 | return err; | 1019 | return err; |
936 | } | 1020 | } |
937 | 1021 | ||
1022 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) | ||
1023 | { | ||
1024 | return lock_extent_bits(tree, start, end, 0, NULL, mask); | ||
1025 | } | ||
1026 | |||
938 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 1027 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, |
939 | gfp_t mask) | 1028 | gfp_t mask) |
940 | { | 1029 | { |
941 | int err; | 1030 | int err; |
942 | u64 failed_start; | 1031 | u64 failed_start; |
943 | 1032 | ||
944 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, | 1033 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED, |
945 | &failed_start, mask); | 1034 | &failed_start, NULL, mask); |
946 | if (err == -EEXIST) { | 1035 | if (err == -EEXIST) { |
947 | if (failed_start > start) | 1036 | if (failed_start > start) |
948 | clear_extent_bit(tree, start, failed_start - 1, | 1037 | clear_extent_bit(tree, start, failed_start - 1, |
949 | EXTENT_LOCKED, 1, 0, mask); | 1038 | EXTENT_LOCKED, 1, 0, NULL, mask); |
950 | return 0; | 1039 | return 0; |
951 | } | 1040 | } |
952 | return 1; | 1041 | return 1; |
953 | } | 1042 | } |
954 | 1043 | ||
1044 | int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, | ||
1045 | struct extent_state **cached, gfp_t mask) | ||
1046 | { | ||
1047 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached, | ||
1048 | mask); | ||
1049 | } | ||
1050 | |||
955 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 1051 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, |
956 | gfp_t mask) | 1052 | gfp_t mask) |
957 | { | 1053 | { |
958 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); | 1054 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, |
1055 | mask); | ||
959 | } | 1056 | } |
960 | 1057 | ||
961 | /* | 1058 | /* |
@@ -974,7 +1071,6 @@ int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end) | |||
974 | page_cache_release(page); | 1071 | page_cache_release(page); |
975 | index++; | 1072 | index++; |
976 | } | 1073 | } |
977 | set_extent_dirty(tree, start, end, GFP_NOFS); | ||
978 | return 0; | 1074 | return 0; |
979 | } | 1075 | } |
980 | 1076 | ||
@@ -994,7 +1090,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) | |||
994 | page_cache_release(page); | 1090 | page_cache_release(page); |
995 | index++; | 1091 | index++; |
996 | } | 1092 | } |
997 | set_extent_writeback(tree, start, end, GFP_NOFS); | ||
998 | return 0; | 1093 | return 0; |
999 | } | 1094 | } |
1000 | 1095 | ||
@@ -1232,6 +1327,7 @@ static noinline u64 find_lock_delalloc_range(struct inode *inode, | |||
1232 | u64 delalloc_start; | 1327 | u64 delalloc_start; |
1233 | u64 delalloc_end; | 1328 | u64 delalloc_end; |
1234 | u64 found; | 1329 | u64 found; |
1330 | struct extent_state *cached_state = NULL; | ||
1235 | int ret; | 1331 | int ret; |
1236 | int loops = 0; | 1332 | int loops = 0; |
1237 | 1333 | ||
@@ -1269,6 +1365,7 @@ again: | |||
1269 | /* some of the pages are gone, lets avoid looping by | 1365 | /* some of the pages are gone, lets avoid looping by |
1270 | * shortening the size of the delalloc range we're searching | 1366 | * shortening the size of the delalloc range we're searching |
1271 | */ | 1367 | */ |
1368 | free_extent_state(cached_state); | ||
1272 | if (!loops) { | 1369 | if (!loops) { |
1273 | unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); | 1370 | unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); |
1274 | max_bytes = PAGE_CACHE_SIZE - offset; | 1371 | max_bytes = PAGE_CACHE_SIZE - offset; |
@@ -1282,18 +1379,21 @@ again: | |||
1282 | BUG_ON(ret); | 1379 | BUG_ON(ret); |
1283 | 1380 | ||
1284 | /* step three, lock the state bits for the whole range */ | 1381 | /* step three, lock the state bits for the whole range */ |
1285 | lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); | 1382 | lock_extent_bits(tree, delalloc_start, delalloc_end, |
1383 | 0, &cached_state, GFP_NOFS); | ||
1286 | 1384 | ||
1287 | /* then test to make sure it is all still delalloc */ | 1385 | /* then test to make sure it is all still delalloc */ |
1288 | ret = test_range_bit(tree, delalloc_start, delalloc_end, | 1386 | ret = test_range_bit(tree, delalloc_start, delalloc_end, |
1289 | EXTENT_DELALLOC, 1); | 1387 | EXTENT_DELALLOC, 1, cached_state); |
1290 | if (!ret) { | 1388 | if (!ret) { |
1291 | unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); | 1389 | unlock_extent_cached(tree, delalloc_start, delalloc_end, |
1390 | &cached_state, GFP_NOFS); | ||
1292 | __unlock_for_delalloc(inode, locked_page, | 1391 | __unlock_for_delalloc(inode, locked_page, |
1293 | delalloc_start, delalloc_end); | 1392 | delalloc_start, delalloc_end); |
1294 | cond_resched(); | 1393 | cond_resched(); |
1295 | goto again; | 1394 | goto again; |
1296 | } | 1395 | } |
1396 | free_extent_state(cached_state); | ||
1297 | *start = delalloc_start; | 1397 | *start = delalloc_start; |
1298 | *end = delalloc_end; | 1398 | *end = delalloc_end; |
1299 | out_failed: | 1399 | out_failed: |
@@ -1303,11 +1403,7 @@ out_failed: | |||
1303 | int extent_clear_unlock_delalloc(struct inode *inode, | 1403 | int extent_clear_unlock_delalloc(struct inode *inode, |
1304 | struct extent_io_tree *tree, | 1404 | struct extent_io_tree *tree, |
1305 | u64 start, u64 end, struct page *locked_page, | 1405 | u64 start, u64 end, struct page *locked_page, |
1306 | int unlock_pages, | 1406 | unsigned long op) |
1307 | int clear_unlock, | ||
1308 | int clear_delalloc, int clear_dirty, | ||
1309 | int set_writeback, | ||
1310 | int end_writeback) | ||
1311 | { | 1407 | { |
1312 | int ret; | 1408 | int ret; |
1313 | struct page *pages[16]; | 1409 | struct page *pages[16]; |
@@ -1317,16 +1413,21 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
1317 | int i; | 1413 | int i; |
1318 | int clear_bits = 0; | 1414 | int clear_bits = 0; |
1319 | 1415 | ||
1320 | if (clear_unlock) | 1416 | if (op & EXTENT_CLEAR_UNLOCK) |
1321 | clear_bits |= EXTENT_LOCKED; | 1417 | clear_bits |= EXTENT_LOCKED; |
1322 | if (clear_dirty) | 1418 | if (op & EXTENT_CLEAR_DIRTY) |
1323 | clear_bits |= EXTENT_DIRTY; | 1419 | clear_bits |= EXTENT_DIRTY; |
1324 | 1420 | ||
1325 | if (clear_delalloc) | 1421 | if (op & EXTENT_CLEAR_DELALLOC) |
1326 | clear_bits |= EXTENT_DELALLOC; | 1422 | clear_bits |= EXTENT_DELALLOC; |
1327 | 1423 | ||
1328 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); | 1424 | if (op & EXTENT_CLEAR_ACCOUNTING) |
1329 | if (!(unlock_pages || clear_dirty || set_writeback || end_writeback)) | 1425 | clear_bits |= EXTENT_DO_ACCOUNTING; |
1426 | |||
1427 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); | ||
1428 | if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | | ||
1429 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | | ||
1430 | EXTENT_SET_PRIVATE2))) | ||
1330 | return 0; | 1431 | return 0; |
1331 | 1432 | ||
1332 | while (nr_pages > 0) { | 1433 | while (nr_pages > 0) { |
@@ -1334,17 +1435,21 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
1334 | min_t(unsigned long, | 1435 | min_t(unsigned long, |
1335 | nr_pages, ARRAY_SIZE(pages)), pages); | 1436 | nr_pages, ARRAY_SIZE(pages)), pages); |
1336 | for (i = 0; i < ret; i++) { | 1437 | for (i = 0; i < ret; i++) { |
1438 | |||
1439 | if (op & EXTENT_SET_PRIVATE2) | ||
1440 | SetPagePrivate2(pages[i]); | ||
1441 | |||
1337 | if (pages[i] == locked_page) { | 1442 | if (pages[i] == locked_page) { |
1338 | page_cache_release(pages[i]); | 1443 | page_cache_release(pages[i]); |
1339 | continue; | 1444 | continue; |
1340 | } | 1445 | } |
1341 | if (clear_dirty) | 1446 | if (op & EXTENT_CLEAR_DIRTY) |
1342 | clear_page_dirty_for_io(pages[i]); | 1447 | clear_page_dirty_for_io(pages[i]); |
1343 | if (set_writeback) | 1448 | if (op & EXTENT_SET_WRITEBACK) |
1344 | set_page_writeback(pages[i]); | 1449 | set_page_writeback(pages[i]); |
1345 | if (end_writeback) | 1450 | if (op & EXTENT_END_WRITEBACK) |
1346 | end_page_writeback(pages[i]); | 1451 | end_page_writeback(pages[i]); |
1347 | if (unlock_pages) | 1452 | if (op & EXTENT_CLEAR_UNLOCK_PAGE) |
1348 | unlock_page(pages[i]); | 1453 | unlock_page(pages[i]); |
1349 | page_cache_release(pages[i]); | 1454 | page_cache_release(pages[i]); |
1350 | } | 1455 | } |
@@ -1476,14 +1581,17 @@ out: | |||
1476 | * range is found set. | 1581 | * range is found set. |
1477 | */ | 1582 | */ |
1478 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 1583 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
1479 | int bits, int filled) | 1584 | int bits, int filled, struct extent_state *cached) |
1480 | { | 1585 | { |
1481 | struct extent_state *state = NULL; | 1586 | struct extent_state *state = NULL; |
1482 | struct rb_node *node; | 1587 | struct rb_node *node; |
1483 | int bitset = 0; | 1588 | int bitset = 0; |
1484 | 1589 | ||
1485 | spin_lock(&tree->lock); | 1590 | spin_lock(&tree->lock); |
1486 | node = tree_search(tree, start); | 1591 | if (cached && cached->tree && cached->start == start) |
1592 | node = &cached->rb_node; | ||
1593 | else | ||
1594 | node = tree_search(tree, start); | ||
1487 | while (node && start <= end) { | 1595 | while (node && start <= end) { |
1488 | state = rb_entry(node, struct extent_state, rb_node); | 1596 | state = rb_entry(node, struct extent_state, rb_node); |
1489 | 1597 | ||
@@ -1503,6 +1611,10 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
1503 | bitset = 0; | 1611 | bitset = 0; |
1504 | break; | 1612 | break; |
1505 | } | 1613 | } |
1614 | |||
1615 | if (state->end == (u64)-1) | ||
1616 | break; | ||
1617 | |||
1506 | start = state->end + 1; | 1618 | start = state->end + 1; |
1507 | if (start > end) | 1619 | if (start > end) |
1508 | break; | 1620 | break; |
@@ -1526,7 +1638,7 @@ static int check_page_uptodate(struct extent_io_tree *tree, | |||
1526 | { | 1638 | { |
1527 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 1639 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; |
1528 | u64 end = start + PAGE_CACHE_SIZE - 1; | 1640 | u64 end = start + PAGE_CACHE_SIZE - 1; |
1529 | if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) | 1641 | if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL)) |
1530 | SetPageUptodate(page); | 1642 | SetPageUptodate(page); |
1531 | return 0; | 1643 | return 0; |
1532 | } | 1644 | } |
@@ -1540,7 +1652,7 @@ static int check_page_locked(struct extent_io_tree *tree, | |||
1540 | { | 1652 | { |
1541 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 1653 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; |
1542 | u64 end = start + PAGE_CACHE_SIZE - 1; | 1654 | u64 end = start + PAGE_CACHE_SIZE - 1; |
1543 | if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) | 1655 | if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) |
1544 | unlock_page(page); | 1656 | unlock_page(page); |
1545 | return 0; | 1657 | return 0; |
1546 | } | 1658 | } |
@@ -1552,10 +1664,7 @@ static int check_page_locked(struct extent_io_tree *tree, | |||
1552 | static int check_page_writeback(struct extent_io_tree *tree, | 1664 | static int check_page_writeback(struct extent_io_tree *tree, |
1553 | struct page *page) | 1665 | struct page *page) |
1554 | { | 1666 | { |
1555 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 1667 | end_page_writeback(page); |
1556 | u64 end = start + PAGE_CACHE_SIZE - 1; | ||
1557 | if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) | ||
1558 | end_page_writeback(page); | ||
1559 | return 0; | 1668 | return 0; |
1560 | } | 1669 | } |
1561 | 1670 | ||
@@ -1613,13 +1722,11 @@ static void end_bio_extent_writepage(struct bio *bio, int err) | |||
1613 | } | 1722 | } |
1614 | 1723 | ||
1615 | if (!uptodate) { | 1724 | if (!uptodate) { |
1616 | clear_extent_uptodate(tree, start, end, GFP_ATOMIC); | 1725 | clear_extent_uptodate(tree, start, end, GFP_NOFS); |
1617 | ClearPageUptodate(page); | 1726 | ClearPageUptodate(page); |
1618 | SetPageError(page); | 1727 | SetPageError(page); |
1619 | } | 1728 | } |
1620 | 1729 | ||
1621 | clear_extent_writeback(tree, start, end, GFP_ATOMIC); | ||
1622 | |||
1623 | if (whole_page) | 1730 | if (whole_page) |
1624 | end_page_writeback(page); | 1731 | end_page_writeback(page); |
1625 | else | 1732 | else |
@@ -1983,7 +2090,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
1983 | continue; | 2090 | continue; |
1984 | } | 2091 | } |
1985 | /* the get_extent function already copied into the page */ | 2092 | /* the get_extent function already copied into the page */ |
1986 | if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { | 2093 | if (test_range_bit(tree, cur, cur_end, |
2094 | EXTENT_UPTODATE, 1, NULL)) { | ||
1987 | check_page_uptodate(tree, page); | 2095 | check_page_uptodate(tree, page); |
1988 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | 2096 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); |
1989 | cur = cur + iosize; | 2097 | cur = cur + iosize; |
@@ -2078,6 +2186,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2078 | u64 iosize; | 2186 | u64 iosize; |
2079 | u64 unlock_start; | 2187 | u64 unlock_start; |
2080 | sector_t sector; | 2188 | sector_t sector; |
2189 | struct extent_state *cached_state = NULL; | ||
2081 | struct extent_map *em; | 2190 | struct extent_map *em; |
2082 | struct block_device *bdev; | 2191 | struct block_device *bdev; |
2083 | int ret; | 2192 | int ret; |
@@ -2124,6 +2233,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2124 | delalloc_end = 0; | 2233 | delalloc_end = 0; |
2125 | page_started = 0; | 2234 | page_started = 0; |
2126 | if (!epd->extent_locked) { | 2235 | if (!epd->extent_locked) { |
2236 | u64 delalloc_to_write = 0; | ||
2127 | /* | 2237 | /* |
2128 | * make sure the wbc mapping index is at least updated | 2238 | * make sure the wbc mapping index is at least updated |
2129 | * to this page. | 2239 | * to this page. |
@@ -2143,8 +2253,24 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2143 | tree->ops->fill_delalloc(inode, page, delalloc_start, | 2253 | tree->ops->fill_delalloc(inode, page, delalloc_start, |
2144 | delalloc_end, &page_started, | 2254 | delalloc_end, &page_started, |
2145 | &nr_written); | 2255 | &nr_written); |
2256 | /* | ||
2257 | * delalloc_end is already one less than the total | ||
2258 | * length, so we don't subtract one from | ||
2259 | * PAGE_CACHE_SIZE | ||
2260 | */ | ||
2261 | delalloc_to_write += (delalloc_end - delalloc_start + | ||
2262 | PAGE_CACHE_SIZE) >> | ||
2263 | PAGE_CACHE_SHIFT; | ||
2146 | delalloc_start = delalloc_end + 1; | 2264 | delalloc_start = delalloc_end + 1; |
2147 | } | 2265 | } |
2266 | if (wbc->nr_to_write < delalloc_to_write) { | ||
2267 | int thresh = 8192; | ||
2268 | |||
2269 | if (delalloc_to_write < thresh * 2) | ||
2270 | thresh = delalloc_to_write; | ||
2271 | wbc->nr_to_write = min_t(u64, delalloc_to_write, | ||
2272 | thresh); | ||
2273 | } | ||
2148 | 2274 | ||
2149 | /* did the fill delalloc function already unlock and start | 2275 | /* did the fill delalloc function already unlock and start |
2150 | * the IO? | 2276 | * the IO? |
@@ -2160,15 +2286,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2160 | goto done_unlocked; | 2286 | goto done_unlocked; |
2161 | } | 2287 | } |
2162 | } | 2288 | } |
2163 | lock_extent(tree, start, page_end, GFP_NOFS); | ||
2164 | |||
2165 | unlock_start = start; | ||
2166 | |||
2167 | if (tree->ops && tree->ops->writepage_start_hook) { | 2289 | if (tree->ops && tree->ops->writepage_start_hook) { |
2168 | ret = tree->ops->writepage_start_hook(page, start, | 2290 | ret = tree->ops->writepage_start_hook(page, start, |
2169 | page_end); | 2291 | page_end); |
2170 | if (ret == -EAGAIN) { | 2292 | if (ret == -EAGAIN) { |
2171 | unlock_extent(tree, start, page_end, GFP_NOFS); | ||
2172 | redirty_page_for_writepage(wbc, page); | 2293 | redirty_page_for_writepage(wbc, page); |
2173 | update_nr_written(page, wbc, nr_written); | 2294 | update_nr_written(page, wbc, nr_written); |
2174 | unlock_page(page); | 2295 | unlock_page(page); |
@@ -2184,12 +2305,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2184 | update_nr_written(page, wbc, nr_written + 1); | 2305 | update_nr_written(page, wbc, nr_written + 1); |
2185 | 2306 | ||
2186 | end = page_end; | 2307 | end = page_end; |
2187 | if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) | ||
2188 | printk(KERN_ERR "btrfs delalloc bits after lock_extent\n"); | ||
2189 | |||
2190 | if (last_byte <= start) { | 2308 | if (last_byte <= start) { |
2191 | clear_extent_dirty(tree, start, page_end, GFP_NOFS); | ||
2192 | unlock_extent(tree, start, page_end, GFP_NOFS); | ||
2193 | if (tree->ops && tree->ops->writepage_end_io_hook) | 2309 | if (tree->ops && tree->ops->writepage_end_io_hook) |
2194 | tree->ops->writepage_end_io_hook(page, start, | 2310 | tree->ops->writepage_end_io_hook(page, start, |
2195 | page_end, NULL, 1); | 2311 | page_end, NULL, 1); |
@@ -2197,13 +2313,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2197 | goto done; | 2313 | goto done; |
2198 | } | 2314 | } |
2199 | 2315 | ||
2200 | set_extent_uptodate(tree, start, page_end, GFP_NOFS); | ||
2201 | blocksize = inode->i_sb->s_blocksize; | 2316 | blocksize = inode->i_sb->s_blocksize; |
2202 | 2317 | ||
2203 | while (cur <= end) { | 2318 | while (cur <= end) { |
2204 | if (cur >= last_byte) { | 2319 | if (cur >= last_byte) { |
2205 | clear_extent_dirty(tree, cur, page_end, GFP_NOFS); | ||
2206 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); | ||
2207 | if (tree->ops && tree->ops->writepage_end_io_hook) | 2320 | if (tree->ops && tree->ops->writepage_end_io_hook) |
2208 | tree->ops->writepage_end_io_hook(page, cur, | 2321 | tree->ops->writepage_end_io_hook(page, cur, |
2209 | page_end, NULL, 1); | 2322 | page_end, NULL, 1); |
@@ -2235,12 +2348,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2235 | */ | 2348 | */ |
2236 | if (compressed || block_start == EXTENT_MAP_HOLE || | 2349 | if (compressed || block_start == EXTENT_MAP_HOLE || |
2237 | block_start == EXTENT_MAP_INLINE) { | 2350 | block_start == EXTENT_MAP_INLINE) { |
2238 | clear_extent_dirty(tree, cur, | ||
2239 | cur + iosize - 1, GFP_NOFS); | ||
2240 | |||
2241 | unlock_extent(tree, unlock_start, cur + iosize - 1, | ||
2242 | GFP_NOFS); | ||
2243 | |||
2244 | /* | 2351 | /* |
2245 | * end_io notification does not happen here for | 2352 | * end_io notification does not happen here for |
2246 | * compressed extents | 2353 | * compressed extents |
@@ -2265,13 +2372,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2265 | } | 2372 | } |
2266 | /* leave this out until we have a page_mkwrite call */ | 2373 | /* leave this out until we have a page_mkwrite call */ |
2267 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, | 2374 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, |
2268 | EXTENT_DIRTY, 0)) { | 2375 | EXTENT_DIRTY, 0, NULL)) { |
2269 | cur = cur + iosize; | 2376 | cur = cur + iosize; |
2270 | pg_offset += iosize; | 2377 | pg_offset += iosize; |
2271 | continue; | 2378 | continue; |
2272 | } | 2379 | } |
2273 | 2380 | ||
2274 | clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); | ||
2275 | if (tree->ops && tree->ops->writepage_io_hook) { | 2381 | if (tree->ops && tree->ops->writepage_io_hook) { |
2276 | ret = tree->ops->writepage_io_hook(page, cur, | 2382 | ret = tree->ops->writepage_io_hook(page, cur, |
2277 | cur + iosize - 1); | 2383 | cur + iosize - 1); |
@@ -2309,12 +2415,12 @@ done: | |||
2309 | set_page_writeback(page); | 2415 | set_page_writeback(page); |
2310 | end_page_writeback(page); | 2416 | end_page_writeback(page); |
2311 | } | 2417 | } |
2312 | if (unlock_start <= page_end) | ||
2313 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); | ||
2314 | unlock_page(page); | 2418 | unlock_page(page); |
2315 | 2419 | ||
2316 | done_unlocked: | 2420 | done_unlocked: |
2317 | 2421 | ||
2422 | /* drop our reference on any cached states */ | ||
2423 | free_extent_state(cached_state); | ||
2318 | return 0; | 2424 | return 0; |
2319 | } | 2425 | } |
2320 | 2426 | ||
@@ -2339,9 +2445,9 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
2339 | writepage_t writepage, void *data, | 2445 | writepage_t writepage, void *data, |
2340 | void (*flush_fn)(void *)) | 2446 | void (*flush_fn)(void *)) |
2341 | { | 2447 | { |
2342 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
2343 | int ret = 0; | 2448 | int ret = 0; |
2344 | int done = 0; | 2449 | int done = 0; |
2450 | int nr_to_write_done = 0; | ||
2345 | struct pagevec pvec; | 2451 | struct pagevec pvec; |
2346 | int nr_pages; | 2452 | int nr_pages; |
2347 | pgoff_t index; | 2453 | pgoff_t index; |
@@ -2361,7 +2467,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
2361 | scanned = 1; | 2467 | scanned = 1; |
2362 | } | 2468 | } |
2363 | retry: | 2469 | retry: |
2364 | while (!done && (index <= end) && | 2470 | while (!done && !nr_to_write_done && (index <= end) && |
2365 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 2471 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, |
2366 | PAGECACHE_TAG_DIRTY, min(end - index, | 2472 | PAGECACHE_TAG_DIRTY, min(end - index, |
2367 | (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | 2473 | (pgoff_t)PAGEVEC_SIZE-1) + 1))) { |
@@ -2412,12 +2518,15 @@ retry: | |||
2412 | unlock_page(page); | 2518 | unlock_page(page); |
2413 | ret = 0; | 2519 | ret = 0; |
2414 | } | 2520 | } |
2415 | if (ret || wbc->nr_to_write <= 0) | 2521 | if (ret) |
2416 | done = 1; | ||
2417 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
2418 | wbc->encountered_congestion = 1; | ||
2419 | done = 1; | 2522 | done = 1; |
2420 | } | 2523 | |
2524 | /* | ||
2525 | * the filesystem may choose to bump up nr_to_write. | ||
2526 | * We have to make sure to honor the new nr_to_write | ||
2527 | * at any time | ||
2528 | */ | ||
2529 | nr_to_write_done = wbc->nr_to_write <= 0; | ||
2421 | } | 2530 | } |
2422 | pagevec_release(&pvec); | 2531 | pagevec_release(&pvec); |
2423 | cond_resched(); | 2532 | cond_resched(); |
@@ -2604,10 +2713,11 @@ int extent_invalidatepage(struct extent_io_tree *tree, | |||
2604 | return 0; | 2713 | return 0; |
2605 | 2714 | ||
2606 | lock_extent(tree, start, end, GFP_NOFS); | 2715 | lock_extent(tree, start, end, GFP_NOFS); |
2607 | wait_on_extent_writeback(tree, start, end); | 2716 | wait_on_page_writeback(page); |
2608 | clear_extent_bit(tree, start, end, | 2717 | clear_extent_bit(tree, start, end, |
2609 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, | 2718 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | |
2610 | 1, 1, GFP_NOFS); | 2719 | EXTENT_DO_ACCOUNTING, |
2720 | 1, 1, NULL, GFP_NOFS); | ||
2611 | return 0; | 2721 | return 0; |
2612 | } | 2722 | } |
2613 | 2723 | ||
@@ -2687,7 +2797,7 @@ int extent_prepare_write(struct extent_io_tree *tree, | |||
2687 | !isnew && !PageUptodate(page) && | 2797 | !isnew && !PageUptodate(page) && |
2688 | (block_off_end > to || block_off_start < from) && | 2798 | (block_off_end > to || block_off_start < from) && |
2689 | !test_range_bit(tree, block_start, cur_end, | 2799 | !test_range_bit(tree, block_start, cur_end, |
2690 | EXTENT_UPTODATE, 1)) { | 2800 | EXTENT_UPTODATE, 1, NULL)) { |
2691 | u64 sector; | 2801 | u64 sector; |
2692 | u64 extent_offset = block_start - em->start; | 2802 | u64 extent_offset = block_start - em->start; |
2693 | size_t iosize; | 2803 | size_t iosize; |
@@ -2701,7 +2811,7 @@ int extent_prepare_write(struct extent_io_tree *tree, | |||
2701 | */ | 2811 | */ |
2702 | set_extent_bit(tree, block_start, | 2812 | set_extent_bit(tree, block_start, |
2703 | block_start + iosize - 1, | 2813 | block_start + iosize - 1, |
2704 | EXTENT_LOCKED, 0, NULL, GFP_NOFS); | 2814 | EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS); |
2705 | ret = submit_extent_page(READ, tree, page, | 2815 | ret = submit_extent_page(READ, tree, page, |
2706 | sector, iosize, page_offset, em->bdev, | 2816 | sector, iosize, page_offset, em->bdev, |
2707 | NULL, 1, | 2817 | NULL, 1, |
@@ -2742,13 +2852,18 @@ int try_release_extent_state(struct extent_map_tree *map, | |||
2742 | int ret = 1; | 2852 | int ret = 1; |
2743 | 2853 | ||
2744 | if (test_range_bit(tree, start, end, | 2854 | if (test_range_bit(tree, start, end, |
2745 | EXTENT_IOBITS | EXTENT_ORDERED, 0)) | 2855 | EXTENT_IOBITS, 0, NULL)) |
2746 | ret = 0; | 2856 | ret = 0; |
2747 | else { | 2857 | else { |
2748 | if ((mask & GFP_NOFS) == GFP_NOFS) | 2858 | if ((mask & GFP_NOFS) == GFP_NOFS) |
2749 | mask = GFP_NOFS; | 2859 | mask = GFP_NOFS; |
2750 | clear_extent_bit(tree, start, end, EXTENT_UPTODATE, | 2860 | /* |
2751 | 1, 1, mask); | 2861 | * at this point we can safely clear everything except the |
2862 | * locked bit and the nodatasum bit | ||
2863 | */ | ||
2864 | clear_extent_bit(tree, start, end, | ||
2865 | ~(EXTENT_LOCKED | EXTENT_NODATASUM), | ||
2866 | 0, 0, NULL, mask); | ||
2752 | } | 2867 | } |
2753 | return ret; | 2868 | return ret; |
2754 | } | 2869 | } |
@@ -2771,29 +2886,28 @@ int try_release_extent_mapping(struct extent_map_tree *map, | |||
2771 | u64 len; | 2886 | u64 len; |
2772 | while (start <= end) { | 2887 | while (start <= end) { |
2773 | len = end - start + 1; | 2888 | len = end - start + 1; |
2774 | spin_lock(&map->lock); | 2889 | write_lock(&map->lock); |
2775 | em = lookup_extent_mapping(map, start, len); | 2890 | em = lookup_extent_mapping(map, start, len); |
2776 | if (!em || IS_ERR(em)) { | 2891 | if (!em || IS_ERR(em)) { |
2777 | spin_unlock(&map->lock); | 2892 | write_unlock(&map->lock); |
2778 | break; | 2893 | break; |
2779 | } | 2894 | } |
2780 | if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || | 2895 | if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || |
2781 | em->start != start) { | 2896 | em->start != start) { |
2782 | spin_unlock(&map->lock); | 2897 | write_unlock(&map->lock); |
2783 | free_extent_map(em); | 2898 | free_extent_map(em); |
2784 | break; | 2899 | break; |
2785 | } | 2900 | } |
2786 | if (!test_range_bit(tree, em->start, | 2901 | if (!test_range_bit(tree, em->start, |
2787 | extent_map_end(em) - 1, | 2902 | extent_map_end(em) - 1, |
2788 | EXTENT_LOCKED | EXTENT_WRITEBACK | | 2903 | EXTENT_LOCKED | EXTENT_WRITEBACK, |
2789 | EXTENT_ORDERED, | 2904 | 0, NULL)) { |
2790 | 0)) { | ||
2791 | remove_extent_mapping(map, em); | 2905 | remove_extent_mapping(map, em); |
2792 | /* once for the rb tree */ | 2906 | /* once for the rb tree */ |
2793 | free_extent_map(em); | 2907 | free_extent_map(em); |
2794 | } | 2908 | } |
2795 | start = extent_map_end(em); | 2909 | start = extent_map_end(em); |
2796 | spin_unlock(&map->lock); | 2910 | write_unlock(&map->lock); |
2797 | 2911 | ||
2798 | /* once for us */ | 2912 | /* once for us */ |
2799 | free_extent_map(em); | 2913 | free_extent_map(em); |
@@ -3203,7 +3317,7 @@ int extent_range_uptodate(struct extent_io_tree *tree, | |||
3203 | int uptodate; | 3317 | int uptodate; |
3204 | unsigned long index; | 3318 | unsigned long index; |
3205 | 3319 | ||
3206 | ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1); | 3320 | ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL); |
3207 | if (ret) | 3321 | if (ret) |
3208 | return 1; | 3322 | return 1; |
3209 | while (start <= end) { | 3323 | while (start <= end) { |
@@ -3233,7 +3347,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3233 | return 1; | 3347 | return 1; |
3234 | 3348 | ||
3235 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3349 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
3236 | EXTENT_UPTODATE, 1); | 3350 | EXTENT_UPTODATE, 1, NULL); |
3237 | if (ret) | 3351 | if (ret) |
3238 | return ret; | 3352 | return ret; |
3239 | 3353 | ||
@@ -3269,7 +3383,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
3269 | return 0; | 3383 | return 0; |
3270 | 3384 | ||
3271 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3385 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
3272 | EXTENT_UPTODATE, 1)) { | 3386 | EXTENT_UPTODATE, 1, NULL)) { |
3273 | return 0; | 3387 | return 0; |
3274 | } | 3388 | } |
3275 | 3389 | ||
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 5bc20abf3f3d..36de250a7b2b 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -13,10 +13,9 @@ | |||
13 | #define EXTENT_DEFRAG (1 << 6) | 13 | #define EXTENT_DEFRAG (1 << 6) |
14 | #define EXTENT_DEFRAG_DONE (1 << 7) | 14 | #define EXTENT_DEFRAG_DONE (1 << 7) |
15 | #define EXTENT_BUFFER_FILLED (1 << 8) | 15 | #define EXTENT_BUFFER_FILLED (1 << 8) |
16 | #define EXTENT_ORDERED (1 << 9) | 16 | #define EXTENT_BOUNDARY (1 << 9) |
17 | #define EXTENT_ORDERED_METADATA (1 << 10) | 17 | #define EXTENT_NODATASUM (1 << 10) |
18 | #define EXTENT_BOUNDARY (1 << 11) | 18 | #define EXTENT_DO_ACCOUNTING (1 << 11) |
19 | #define EXTENT_NODATASUM (1 << 12) | ||
20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 19 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
21 | 20 | ||
22 | /* flags for bio submission */ | 21 | /* flags for bio submission */ |
@@ -27,6 +26,16 @@ | |||
27 | #define EXTENT_BUFFER_BLOCKING 1 | 26 | #define EXTENT_BUFFER_BLOCKING 1 |
28 | #define EXTENT_BUFFER_DIRTY 2 | 27 | #define EXTENT_BUFFER_DIRTY 2 |
29 | 28 | ||
29 | /* these are flags for extent_clear_unlock_delalloc */ | ||
30 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 | ||
31 | #define EXTENT_CLEAR_UNLOCK 0x2 | ||
32 | #define EXTENT_CLEAR_DELALLOC 0x4 | ||
33 | #define EXTENT_CLEAR_DIRTY 0x8 | ||
34 | #define EXTENT_SET_WRITEBACK 0x10 | ||
35 | #define EXTENT_END_WRITEBACK 0x20 | ||
36 | #define EXTENT_SET_PRIVATE2 0x40 | ||
37 | #define EXTENT_CLEAR_ACCOUNTING 0x80 | ||
38 | |||
30 | /* | 39 | /* |
31 | * page->private values. Every page that is controlled by the extent | 40 | * page->private values. Every page that is controlled by the extent |
32 | * map has page->private set to one. | 41 | * map has page->private set to one. |
@@ -62,8 +71,13 @@ struct extent_io_ops { | |||
62 | struct extent_state *state, int uptodate); | 71 | struct extent_state *state, int uptodate); |
63 | int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, | 72 | int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, |
64 | unsigned long old, unsigned long bits); | 73 | unsigned long old, unsigned long bits); |
65 | int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, | 74 | int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, |
66 | unsigned long old, unsigned long bits); | 75 | unsigned long bits); |
76 | int (*merge_extent_hook)(struct inode *inode, | ||
77 | struct extent_state *new, | ||
78 | struct extent_state *other); | ||
79 | int (*split_extent_hook)(struct inode *inode, | ||
80 | struct extent_state *orig, u64 split); | ||
67 | int (*write_cache_pages_lock_hook)(struct page *page); | 81 | int (*write_cache_pages_lock_hook)(struct page *page); |
68 | }; | 82 | }; |
69 | 83 | ||
@@ -81,10 +95,14 @@ struct extent_state { | |||
81 | u64 start; | 95 | u64 start; |
82 | u64 end; /* inclusive */ | 96 | u64 end; /* inclusive */ |
83 | struct rb_node rb_node; | 97 | struct rb_node rb_node; |
98 | |||
99 | /* ADD NEW ELEMENTS AFTER THIS */ | ||
84 | struct extent_io_tree *tree; | 100 | struct extent_io_tree *tree; |
85 | wait_queue_head_t wq; | 101 | wait_queue_head_t wq; |
86 | atomic_t refs; | 102 | atomic_t refs; |
87 | unsigned long state; | 103 | unsigned long state; |
104 | u64 split_start; | ||
105 | u64 split_end; | ||
88 | 106 | ||
89 | /* for use by the FS */ | 107 | /* for use by the FS */ |
90 | u64 private; | 108 | u64 private; |
@@ -142,6 +160,8 @@ int try_release_extent_state(struct extent_map_tree *map, | |||
142 | struct extent_io_tree *tree, struct page *page, | 160 | struct extent_io_tree *tree, struct page *page, |
143 | gfp_t mask); | 161 | gfp_t mask); |
144 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); | 162 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); |
163 | int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | ||
164 | int bits, struct extent_state **cached, gfp_t mask); | ||
145 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); | 165 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); |
146 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 166 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, |
147 | gfp_t mask); | 167 | gfp_t mask); |
@@ -155,11 +175,12 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
155 | u64 max_bytes, unsigned long bits); | 175 | u64 max_bytes, unsigned long bits); |
156 | 176 | ||
157 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 177 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
158 | int bits, int filled); | 178 | int bits, int filled, struct extent_state *cached_state); |
159 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 179 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
160 | int bits, gfp_t mask); | 180 | int bits, gfp_t mask); |
161 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 181 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
162 | int bits, int wake, int delete, gfp_t mask); | 182 | int bits, int wake, int delete, struct extent_state **cached, |
183 | gfp_t mask); | ||
163 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 184 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
164 | int bits, gfp_t mask); | 185 | int bits, gfp_t mask); |
165 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 186 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
@@ -278,9 +299,5 @@ int extent_range_uptodate(struct extent_io_tree *tree, | |||
278 | int extent_clear_unlock_delalloc(struct inode *inode, | 299 | int extent_clear_unlock_delalloc(struct inode *inode, |
279 | struct extent_io_tree *tree, | 300 | struct extent_io_tree *tree, |
280 | u64 start, u64 end, struct page *locked_page, | 301 | u64 start, u64 end, struct page *locked_page, |
281 | int unlock_page, | 302 | unsigned long op); |
282 | int clear_unlock, | ||
283 | int clear_delalloc, int clear_dirty, | ||
284 | int set_writeback, | ||
285 | int end_writeback); | ||
286 | #endif | 303 | #endif |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 30c9365861e6..2c726b7b9faa 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -36,7 +36,7 @@ void extent_map_exit(void) | |||
36 | void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) | 36 | void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) |
37 | { | 37 | { |
38 | tree->map.rb_node = NULL; | 38 | tree->map.rb_node = NULL; |
39 | spin_lock_init(&tree->lock); | 39 | rwlock_init(&tree->lock); |
40 | } | 40 | } |
41 | 41 | ||
42 | /** | 42 | /** |
@@ -198,6 +198,56 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) | |||
198 | return 0; | 198 | return 0; |
199 | } | 199 | } |
200 | 200 | ||
201 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) | ||
202 | { | ||
203 | int ret = 0; | ||
204 | struct extent_map *merge = NULL; | ||
205 | struct rb_node *rb; | ||
206 | struct extent_map *em; | ||
207 | |||
208 | write_lock(&tree->lock); | ||
209 | em = lookup_extent_mapping(tree, start, len); | ||
210 | |||
211 | WARN_ON(em->start != start || !em); | ||
212 | |||
213 | if (!em) | ||
214 | goto out; | ||
215 | |||
216 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
217 | |||
218 | if (em->start != 0) { | ||
219 | rb = rb_prev(&em->rb_node); | ||
220 | if (rb) | ||
221 | merge = rb_entry(rb, struct extent_map, rb_node); | ||
222 | if (rb && mergable_maps(merge, em)) { | ||
223 | em->start = merge->start; | ||
224 | em->len += merge->len; | ||
225 | em->block_len += merge->block_len; | ||
226 | em->block_start = merge->block_start; | ||
227 | merge->in_tree = 0; | ||
228 | rb_erase(&merge->rb_node, &tree->map); | ||
229 | free_extent_map(merge); | ||
230 | } | ||
231 | } | ||
232 | |||
233 | rb = rb_next(&em->rb_node); | ||
234 | if (rb) | ||
235 | merge = rb_entry(rb, struct extent_map, rb_node); | ||
236 | if (rb && mergable_maps(em, merge)) { | ||
237 | em->len += merge->len; | ||
238 | em->block_len += merge->len; | ||
239 | rb_erase(&merge->rb_node, &tree->map); | ||
240 | merge->in_tree = 0; | ||
241 | free_extent_map(merge); | ||
242 | } | ||
243 | |||
244 | free_extent_map(em); | ||
245 | out: | ||
246 | write_unlock(&tree->lock); | ||
247 | return ret; | ||
248 | |||
249 | } | ||
250 | |||
201 | /** | 251 | /** |
202 | * add_extent_mapping - add new extent map to the extent tree | 252 | * add_extent_mapping - add new extent map to the extent tree |
203 | * @tree: tree to insert new map in | 253 | * @tree: tree to insert new map in |
@@ -222,7 +272,6 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
222 | ret = -EEXIST; | 272 | ret = -EEXIST; |
223 | goto out; | 273 | goto out; |
224 | } | 274 | } |
225 | assert_spin_locked(&tree->lock); | ||
226 | rb = tree_insert(&tree->map, em->start, &em->rb_node); | 275 | rb = tree_insert(&tree->map, em->start, &em->rb_node); |
227 | if (rb) { | 276 | if (rb) { |
228 | ret = -EEXIST; | 277 | ret = -EEXIST; |
@@ -285,7 +334,6 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | |||
285 | struct rb_node *next = NULL; | 334 | struct rb_node *next = NULL; |
286 | u64 end = range_end(start, len); | 335 | u64 end = range_end(start, len); |
287 | 336 | ||
288 | assert_spin_locked(&tree->lock); | ||
289 | rb_node = __tree_search(&tree->map, start, &prev, &next); | 337 | rb_node = __tree_search(&tree->map, start, &prev, &next); |
290 | if (!rb_node && prev) { | 338 | if (!rb_node && prev) { |
291 | em = rb_entry(prev, struct extent_map, rb_node); | 339 | em = rb_entry(prev, struct extent_map, rb_node); |
@@ -319,6 +367,54 @@ out: | |||
319 | } | 367 | } |
320 | 368 | ||
321 | /** | 369 | /** |
370 | * search_extent_mapping - find a nearby extent map | ||
371 | * @tree: tree to lookup in | ||
372 | * @start: byte offset to start the search | ||
373 | * @len: length of the lookup range | ||
374 | * | ||
375 | * Find and return the first extent_map struct in @tree that intersects the | ||
376 | * [start, len] range. | ||
377 | * | ||
378 | * If one can't be found, any nearby extent may be returned | ||
379 | */ | ||
380 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, | ||
381 | u64 start, u64 len) | ||
382 | { | ||
383 | struct extent_map *em; | ||
384 | struct rb_node *rb_node; | ||
385 | struct rb_node *prev = NULL; | ||
386 | struct rb_node *next = NULL; | ||
387 | |||
388 | rb_node = __tree_search(&tree->map, start, &prev, &next); | ||
389 | if (!rb_node && prev) { | ||
390 | em = rb_entry(prev, struct extent_map, rb_node); | ||
391 | goto found; | ||
392 | } | ||
393 | if (!rb_node && next) { | ||
394 | em = rb_entry(next, struct extent_map, rb_node); | ||
395 | goto found; | ||
396 | } | ||
397 | if (!rb_node) { | ||
398 | em = NULL; | ||
399 | goto out; | ||
400 | } | ||
401 | if (IS_ERR(rb_node)) { | ||
402 | em = ERR_PTR(PTR_ERR(rb_node)); | ||
403 | goto out; | ||
404 | } | ||
405 | em = rb_entry(rb_node, struct extent_map, rb_node); | ||
406 | goto found; | ||
407 | |||
408 | em = NULL; | ||
409 | goto out; | ||
410 | |||
411 | found: | ||
412 | atomic_inc(&em->refs); | ||
413 | out: | ||
414 | return em; | ||
415 | } | ||
416 | |||
417 | /** | ||
322 | * remove_extent_mapping - removes an extent_map from the extent tree | 418 | * remove_extent_mapping - removes an extent_map from the extent tree |
323 | * @tree: extent tree to remove from | 419 | * @tree: extent tree to remove from |
324 | * @em: extent map beeing removed | 420 | * @em: extent map beeing removed |
@@ -331,7 +427,6 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) | |||
331 | int ret = 0; | 427 | int ret = 0; |
332 | 428 | ||
333 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); | 429 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); |
334 | assert_spin_locked(&tree->lock); | ||
335 | rb_erase(&em->rb_node, &tree->map); | 430 | rb_erase(&em->rb_node, &tree->map); |
336 | em->in_tree = 0; | 431 | em->in_tree = 0; |
337 | return ret; | 432 | return ret; |
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index fb6eeef06bb0..ab6d74b6e647 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
@@ -31,7 +31,7 @@ struct extent_map { | |||
31 | 31 | ||
32 | struct extent_map_tree { | 32 | struct extent_map_tree { |
33 | struct rb_root map; | 33 | struct rb_root map; |
34 | spinlock_t lock; | 34 | rwlock_t lock; |
35 | }; | 35 | }; |
36 | 36 | ||
37 | static inline u64 extent_map_end(struct extent_map *em) | 37 | static inline u64 extent_map_end(struct extent_map *em) |
@@ -59,4 +59,7 @@ struct extent_map *alloc_extent_map(gfp_t mask); | |||
59 | void free_extent_map(struct extent_map *em); | 59 | void free_extent_map(struct extent_map *em); |
60 | int __init extent_map_init(void); | 60 | int __init extent_map_init(void); |
61 | void extent_map_exit(void); | 61 | void extent_map_exit(void); |
62 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len); | ||
63 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, | ||
64 | u64 start, u64 len); | ||
62 | #endif | 65 | #endif |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4b833972273a..06550affbd27 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -112,8 +112,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
112 | int err = 0; | 112 | int err = 0; |
113 | int i; | 113 | int i; |
114 | struct inode *inode = fdentry(file)->d_inode; | 114 | struct inode *inode = fdentry(file)->d_inode; |
115 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
116 | u64 hint_byte; | ||
117 | u64 num_bytes; | 115 | u64 num_bytes; |
118 | u64 start_pos; | 116 | u64 start_pos; |
119 | u64 end_of_last_block; | 117 | u64 end_of_last_block; |
@@ -125,23 +123,10 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
125 | root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 123 | root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
126 | 124 | ||
127 | end_of_last_block = start_pos + num_bytes - 1; | 125 | end_of_last_block = start_pos + num_bytes - 1; |
126 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); | ||
127 | if (err) | ||
128 | return err; | ||
128 | 129 | ||
129 | lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); | ||
130 | trans = btrfs_join_transaction(root, 1); | ||
131 | if (!trans) { | ||
132 | err = -ENOMEM; | ||
133 | goto out_unlock; | ||
134 | } | ||
135 | btrfs_set_trans_block_group(trans, inode); | ||
136 | hint_byte = 0; | ||
137 | |||
138 | set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS); | ||
139 | |||
140 | /* check for reserved extents on each page, we don't want | ||
141 | * to reset the delalloc bit on things that already have | ||
142 | * extents reserved. | ||
143 | */ | ||
144 | btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); | ||
145 | for (i = 0; i < num_pages; i++) { | 130 | for (i = 0; i < num_pages; i++) { |
146 | struct page *p = pages[i]; | 131 | struct page *p = pages[i]; |
147 | SetPageUptodate(p); | 132 | SetPageUptodate(p); |
@@ -155,9 +140,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
155 | * at this time. | 140 | * at this time. |
156 | */ | 141 | */ |
157 | } | 142 | } |
158 | err = btrfs_end_transaction(trans, root); | ||
159 | out_unlock: | ||
160 | unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); | ||
161 | return err; | 143 | return err; |
162 | } | 144 | } |
163 | 145 | ||
@@ -189,18 +171,18 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
189 | if (!split2) | 171 | if (!split2) |
190 | split2 = alloc_extent_map(GFP_NOFS); | 172 | split2 = alloc_extent_map(GFP_NOFS); |
191 | 173 | ||
192 | spin_lock(&em_tree->lock); | 174 | write_lock(&em_tree->lock); |
193 | em = lookup_extent_mapping(em_tree, start, len); | 175 | em = lookup_extent_mapping(em_tree, start, len); |
194 | if (!em) { | 176 | if (!em) { |
195 | spin_unlock(&em_tree->lock); | 177 | write_unlock(&em_tree->lock); |
196 | break; | 178 | break; |
197 | } | 179 | } |
198 | flags = em->flags; | 180 | flags = em->flags; |
199 | if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { | 181 | if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { |
200 | spin_unlock(&em_tree->lock); | ||
201 | if (em->start <= start && | 182 | if (em->start <= start && |
202 | (!testend || em->start + em->len >= start + len)) { | 183 | (!testend || em->start + em->len >= start + len)) { |
203 | free_extent_map(em); | 184 | free_extent_map(em); |
185 | write_unlock(&em_tree->lock); | ||
204 | break; | 186 | break; |
205 | } | 187 | } |
206 | if (start < em->start) { | 188 | if (start < em->start) { |
@@ -210,6 +192,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
210 | start = em->start + em->len; | 192 | start = em->start + em->len; |
211 | } | 193 | } |
212 | free_extent_map(em); | 194 | free_extent_map(em); |
195 | write_unlock(&em_tree->lock); | ||
213 | continue; | 196 | continue; |
214 | } | 197 | } |
215 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 198 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
@@ -260,7 +243,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
260 | free_extent_map(split); | 243 | free_extent_map(split); |
261 | split = NULL; | 244 | split = NULL; |
262 | } | 245 | } |
263 | spin_unlock(&em_tree->lock); | 246 | write_unlock(&em_tree->lock); |
264 | 247 | ||
265 | /* once for us */ | 248 | /* once for us */ |
266 | free_extent_map(em); | 249 | free_extent_map(em); |
@@ -289,7 +272,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
289 | noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 272 | noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
290 | struct btrfs_root *root, struct inode *inode, | 273 | struct btrfs_root *root, struct inode *inode, |
291 | u64 start, u64 end, u64 locked_end, | 274 | u64 start, u64 end, u64 locked_end, |
292 | u64 inline_limit, u64 *hint_byte) | 275 | u64 inline_limit, u64 *hint_byte, int drop_cache) |
293 | { | 276 | { |
294 | u64 extent_end = 0; | 277 | u64 extent_end = 0; |
295 | u64 search_start = start; | 278 | u64 search_start = start; |
@@ -314,7 +297,8 @@ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
314 | int ret; | 297 | int ret; |
315 | 298 | ||
316 | inline_limit = 0; | 299 | inline_limit = 0; |
317 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | 300 | if (drop_cache) |
301 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | ||
318 | 302 | ||
319 | path = btrfs_alloc_path(); | 303 | path = btrfs_alloc_path(); |
320 | if (!path) | 304 | if (!path) |
@@ -894,7 +878,8 @@ again: | |||
894 | btrfs_put_ordered_extent(ordered); | 878 | btrfs_put_ordered_extent(ordered); |
895 | 879 | ||
896 | clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, | 880 | clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, |
897 | last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC, | 881 | last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | |
882 | EXTENT_DO_ACCOUNTING, | ||
898 | GFP_NOFS); | 883 | GFP_NOFS); |
899 | unlock_extent(&BTRFS_I(inode)->io_tree, | 884 | unlock_extent(&BTRFS_I(inode)->io_tree, |
900 | start_pos, last_pos - 1, GFP_NOFS); | 885 | start_pos, last_pos - 1, GFP_NOFS); |
@@ -936,21 +921,35 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
936 | start_pos = pos; | 921 | start_pos = pos; |
937 | 922 | ||
938 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | 923 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
924 | |||
925 | /* do the reserve before the mutex lock in case we have to do some | ||
926 | * flushing. We wouldn't deadlock, but this is more polite. | ||
927 | */ | ||
928 | err = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
929 | if (err) | ||
930 | goto out_nolock; | ||
931 | |||
932 | mutex_lock(&inode->i_mutex); | ||
933 | |||
939 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | 934 | current->backing_dev_info = inode->i_mapping->backing_dev_info; |
940 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 935 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
941 | if (err) | 936 | if (err) |
942 | goto out_nolock; | 937 | goto out; |
938 | |||
943 | if (count == 0) | 939 | if (count == 0) |
944 | goto out_nolock; | 940 | goto out; |
945 | 941 | ||
946 | err = file_remove_suid(file); | 942 | err = file_remove_suid(file); |
947 | if (err) | 943 | if (err) |
948 | goto out_nolock; | 944 | goto out; |
945 | |||
949 | file_update_time(file); | 946 | file_update_time(file); |
950 | 947 | ||
951 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 948 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
952 | 949 | ||
953 | mutex_lock(&inode->i_mutex); | 950 | /* generic_write_checks can change our pos */ |
951 | start_pos = pos; | ||
952 | |||
954 | BTRFS_I(inode)->sequence++; | 953 | BTRFS_I(inode)->sequence++; |
955 | first_index = pos >> PAGE_CACHE_SHIFT; | 954 | first_index = pos >> PAGE_CACHE_SHIFT; |
956 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; | 955 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; |
@@ -1024,9 +1023,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
1024 | } | 1023 | } |
1025 | 1024 | ||
1026 | if (will_write) { | 1025 | if (will_write) { |
1027 | btrfs_fdatawrite_range(inode->i_mapping, pos, | 1026 | filemap_fdatawrite_range(inode->i_mapping, pos, |
1028 | pos + write_bytes - 1, | 1027 | pos + write_bytes - 1); |
1029 | WB_SYNC_ALL); | ||
1030 | } else { | 1028 | } else { |
1031 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | 1029 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, |
1032 | num_pages); | 1030 | num_pages); |
@@ -1047,6 +1045,7 @@ out: | |||
1047 | mutex_unlock(&inode->i_mutex); | 1045 | mutex_unlock(&inode->i_mutex); |
1048 | if (ret) | 1046 | if (ret) |
1049 | err = ret; | 1047 | err = ret; |
1048 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
1050 | 1049 | ||
1051 | out_nolock: | 1050 | out_nolock: |
1052 | kfree(pages); | 1051 | kfree(pages); |
@@ -1087,8 +1086,10 @@ out_nolock: | |||
1087 | btrfs_end_transaction(trans, root); | 1086 | btrfs_end_transaction(trans, root); |
1088 | else | 1087 | else |
1089 | btrfs_commit_transaction(trans, root); | 1088 | btrfs_commit_transaction(trans, root); |
1090 | } else { | 1089 | } else if (ret != BTRFS_NO_LOG_SYNC) { |
1091 | btrfs_commit_transaction(trans, root); | 1090 | btrfs_commit_transaction(trans, root); |
1091 | } else { | ||
1092 | btrfs_end_transaction(trans, root); | ||
1092 | } | 1093 | } |
1093 | } | 1094 | } |
1094 | if (file->f_flags & O_DIRECT) { | 1095 | if (file->f_flags & O_DIRECT) { |
@@ -1138,6 +1139,13 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
1138 | int ret = 0; | 1139 | int ret = 0; |
1139 | struct btrfs_trans_handle *trans; | 1140 | struct btrfs_trans_handle *trans; |
1140 | 1141 | ||
1142 | |||
1143 | /* we wait first, since the writeback may change the inode */ | ||
1144 | root->log_batch++; | ||
1145 | /* the VFS called filemap_fdatawrite for us */ | ||
1146 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | ||
1147 | root->log_batch++; | ||
1148 | |||
1141 | /* | 1149 | /* |
1142 | * check the transaction that last modified this inode | 1150 | * check the transaction that last modified this inode |
1143 | * and see if its already been committed | 1151 | * and see if its already been committed |
@@ -1145,6 +1153,11 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
1145 | if (!BTRFS_I(inode)->last_trans) | 1153 | if (!BTRFS_I(inode)->last_trans) |
1146 | goto out; | 1154 | goto out; |
1147 | 1155 | ||
1156 | /* | ||
1157 | * if the last transaction that changed this file was before | ||
1158 | * the current transaction, we can bail out now without any | ||
1159 | * syncing | ||
1160 | */ | ||
1148 | mutex_lock(&root->fs_info->trans_mutex); | 1161 | mutex_lock(&root->fs_info->trans_mutex); |
1149 | if (BTRFS_I(inode)->last_trans <= | 1162 | if (BTRFS_I(inode)->last_trans <= |
1150 | root->fs_info->last_trans_committed) { | 1163 | root->fs_info->last_trans_committed) { |
@@ -1154,13 +1167,6 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
1154 | } | 1167 | } |
1155 | mutex_unlock(&root->fs_info->trans_mutex); | 1168 | mutex_unlock(&root->fs_info->trans_mutex); |
1156 | 1169 | ||
1157 | root->log_batch++; | ||
1158 | filemap_fdatawrite(inode->i_mapping); | ||
1159 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | ||
1160 | root->log_batch++; | ||
1161 | |||
1162 | if (datasync && !(inode->i_state & I_DIRTY_PAGES)) | ||
1163 | goto out; | ||
1164 | /* | 1170 | /* |
1165 | * ok we haven't committed the transaction yet, lets do a commit | 1171 | * ok we haven't committed the transaction yet, lets do a commit |
1166 | */ | 1172 | */ |
@@ -1189,21 +1195,25 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
1189 | */ | 1195 | */ |
1190 | mutex_unlock(&dentry->d_inode->i_mutex); | 1196 | mutex_unlock(&dentry->d_inode->i_mutex); |
1191 | 1197 | ||
1192 | if (ret > 0) { | 1198 | if (ret != BTRFS_NO_LOG_SYNC) { |
1193 | ret = btrfs_commit_transaction(trans, root); | 1199 | if (ret > 0) { |
1194 | } else { | ||
1195 | ret = btrfs_sync_log(trans, root); | ||
1196 | if (ret == 0) | ||
1197 | ret = btrfs_end_transaction(trans, root); | ||
1198 | else | ||
1199 | ret = btrfs_commit_transaction(trans, root); | 1200 | ret = btrfs_commit_transaction(trans, root); |
1201 | } else { | ||
1202 | ret = btrfs_sync_log(trans, root); | ||
1203 | if (ret == 0) | ||
1204 | ret = btrfs_end_transaction(trans, root); | ||
1205 | else | ||
1206 | ret = btrfs_commit_transaction(trans, root); | ||
1207 | } | ||
1208 | } else { | ||
1209 | ret = btrfs_end_transaction(trans, root); | ||
1200 | } | 1210 | } |
1201 | mutex_lock(&dentry->d_inode->i_mutex); | 1211 | mutex_lock(&dentry->d_inode->i_mutex); |
1202 | out: | 1212 | out: |
1203 | return ret > 0 ? EIO : ret; | 1213 | return ret > 0 ? EIO : ret; |
1204 | } | 1214 | } |
1205 | 1215 | ||
1206 | static struct vm_operations_struct btrfs_file_vm_ops = { | 1216 | static const struct vm_operations_struct btrfs_file_vm_ops = { |
1207 | .fault = filemap_fault, | 1217 | .fault = filemap_fault, |
1208 | .page_mkwrite = btrfs_page_mkwrite, | 1218 | .page_mkwrite = btrfs_page_mkwrite, |
1209 | }; | 1219 | }; |
@@ -1215,7 +1225,7 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | |||
1215 | return 0; | 1225 | return 0; |
1216 | } | 1226 | } |
1217 | 1227 | ||
1218 | struct file_operations btrfs_file_operations = { | 1228 | const struct file_operations btrfs_file_operations = { |
1219 | .llseek = generic_file_llseek, | 1229 | .llseek = generic_file_llseek, |
1220 | .read = do_sync_read, | 1230 | .read = do_sync_read, |
1221 | .aio_read = generic_file_aio_read, | 1231 | .aio_read = generic_file_aio_read, |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 5edcee3a617f..5c2caad76212 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -259,7 +259,9 @@ static int link_free_space(struct btrfs_block_group_cache *block_group, | |||
259 | 259 | ||
260 | static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) | 260 | static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) |
261 | { | 261 | { |
262 | u64 max_bytes, possible_bytes; | 262 | u64 max_bytes; |
263 | u64 bitmap_bytes; | ||
264 | u64 extent_bytes; | ||
263 | 265 | ||
264 | /* | 266 | /* |
265 | * The goal is to keep the total amount of memory used per 1gb of space | 267 | * The goal is to keep the total amount of memory used per 1gb of space |
@@ -269,22 +271,27 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) | |||
269 | max_bytes = MAX_CACHE_BYTES_PER_GIG * | 271 | max_bytes = MAX_CACHE_BYTES_PER_GIG * |
270 | (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); | 272 | (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); |
271 | 273 | ||
272 | possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) + | 274 | /* |
273 | (sizeof(struct btrfs_free_space) * | 275 | * we want to account for 1 more bitmap than what we have so we can make |
274 | block_group->extents_thresh); | 276 | * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as |
277 | * we add more bitmaps. | ||
278 | */ | ||
279 | bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE; | ||
275 | 280 | ||
276 | if (possible_bytes > max_bytes) { | 281 | if (bitmap_bytes >= max_bytes) { |
277 | int extent_bytes = max_bytes - | 282 | block_group->extents_thresh = 0; |
278 | (block_group->total_bitmaps * PAGE_CACHE_SIZE); | 283 | return; |
284 | } | ||
279 | 285 | ||
280 | if (extent_bytes <= 0) { | 286 | /* |
281 | block_group->extents_thresh = 0; | 287 | * we want the extent entry threshold to always be at most 1/2 the maxw |
282 | return; | 288 | * bytes we can have, or whatever is less than that. |
283 | } | 289 | */ |
290 | extent_bytes = max_bytes - bitmap_bytes; | ||
291 | extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); | ||
284 | 292 | ||
285 | block_group->extents_thresh = extent_bytes / | 293 | block_group->extents_thresh = |
286 | (sizeof(struct btrfs_free_space)); | 294 | div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); |
287 | } | ||
288 | } | 295 | } |
289 | 296 | ||
290 | static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, | 297 | static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, |
@@ -403,6 +410,7 @@ static void add_new_bitmap(struct btrfs_block_group_cache *block_group, | |||
403 | BUG_ON(block_group->total_bitmaps >= max_bitmaps); | 410 | BUG_ON(block_group->total_bitmaps >= max_bitmaps); |
404 | 411 | ||
405 | info->offset = offset_to_bitmap(block_group, offset); | 412 | info->offset = offset_to_bitmap(block_group, offset); |
413 | info->bytes = 0; | ||
406 | link_free_space(block_group, info); | 414 | link_free_space(block_group, info); |
407 | block_group->total_bitmaps++; | 415 | block_group->total_bitmaps++; |
408 | 416 | ||
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 6b627c611808..72ce3c173d6a 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
@@ -149,6 +149,8 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | |||
149 | ptr = (unsigned long)(ref + 1); | 149 | ptr = (unsigned long)(ref + 1); |
150 | ret = 0; | 150 | ret = 0; |
151 | } else if (ret < 0) { | 151 | } else if (ret < 0) { |
152 | if (ret == -EOVERFLOW) | ||
153 | ret = -EMLINK; | ||
152 | goto out; | 154 | goto out; |
153 | } else { | 155 | } else { |
154 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0], | 156 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0], |
@@ -177,8 +179,6 @@ int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, | |||
177 | 179 | ||
178 | ret = btrfs_insert_empty_item(trans, root, path, &key, | 180 | ret = btrfs_insert_empty_item(trans, root, path, &key, |
179 | sizeof(struct btrfs_inode_item)); | 181 | sizeof(struct btrfs_inode_item)); |
180 | if (ret == 0 && objectid > root->highest_inode) | ||
181 | root->highest_inode = objectid; | ||
182 | return ret; | 182 | return ret; |
183 | } | 183 | } |
184 | 184 | ||
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 9abbced1123d..c56eb5909172 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -43,9 +43,10 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) | |||
43 | slot = path->slots[0] - 1; | 43 | slot = path->slots[0] - 1; |
44 | l = path->nodes[0]; | 44 | l = path->nodes[0]; |
45 | btrfs_item_key_to_cpu(l, &found_key, slot); | 45 | btrfs_item_key_to_cpu(l, &found_key, slot); |
46 | *objectid = found_key.objectid; | 46 | *objectid = max_t(u64, found_key.objectid, |
47 | BTRFS_FIRST_FREE_OBJECTID - 1); | ||
47 | } else { | 48 | } else { |
48 | *objectid = BTRFS_FIRST_FREE_OBJECTID; | 49 | *objectid = BTRFS_FIRST_FREE_OBJECTID - 1; |
49 | } | 50 | } |
50 | ret = 0; | 51 | ret = 0; |
51 | error: | 52 | error: |
@@ -53,91 +54,27 @@ error: | |||
53 | return ret; | 54 | return ret; |
54 | } | 55 | } |
55 | 56 | ||
56 | /* | ||
57 | * walks the btree of allocated inodes and find a hole. | ||
58 | */ | ||
59 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, | 57 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, |
60 | struct btrfs_root *root, | 58 | struct btrfs_root *root, |
61 | u64 dirid, u64 *objectid) | 59 | u64 dirid, u64 *objectid) |
62 | { | 60 | { |
63 | struct btrfs_path *path; | ||
64 | struct btrfs_key key; | ||
65 | int ret; | 61 | int ret; |
66 | int slot = 0; | ||
67 | u64 last_ino = 0; | ||
68 | int start_found; | ||
69 | struct extent_buffer *l; | ||
70 | struct btrfs_key search_key; | ||
71 | u64 search_start = dirid; | ||
72 | |||
73 | mutex_lock(&root->objectid_mutex); | 62 | mutex_lock(&root->objectid_mutex); |
74 | if (root->last_inode_alloc >= BTRFS_FIRST_FREE_OBJECTID && | ||
75 | root->last_inode_alloc < BTRFS_LAST_FREE_OBJECTID) { | ||
76 | *objectid = ++root->last_inode_alloc; | ||
77 | mutex_unlock(&root->objectid_mutex); | ||
78 | return 0; | ||
79 | } | ||
80 | path = btrfs_alloc_path(); | ||
81 | BUG_ON(!path); | ||
82 | search_start = max(search_start, (u64)BTRFS_FIRST_FREE_OBJECTID); | ||
83 | search_key.objectid = search_start; | ||
84 | search_key.type = 0; | ||
85 | search_key.offset = 0; | ||
86 | |||
87 | start_found = 0; | ||
88 | ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0); | ||
89 | if (ret < 0) | ||
90 | goto error; | ||
91 | 63 | ||
92 | while (1) { | 64 | if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) { |
93 | l = path->nodes[0]; | 65 | ret = btrfs_find_highest_inode(root, &root->highest_objectid); |
94 | slot = path->slots[0]; | 66 | if (ret) |
95 | if (slot >= btrfs_header_nritems(l)) { | 67 | goto out; |
96 | ret = btrfs_next_leaf(root, path); | 68 | } |
97 | if (ret == 0) | ||
98 | continue; | ||
99 | if (ret < 0) | ||
100 | goto error; | ||
101 | if (!start_found) { | ||
102 | *objectid = search_start; | ||
103 | start_found = 1; | ||
104 | goto found; | ||
105 | } | ||
106 | *objectid = last_ino > search_start ? | ||
107 | last_ino : search_start; | ||
108 | goto found; | ||
109 | } | ||
110 | btrfs_item_key_to_cpu(l, &key, slot); | ||
111 | if (key.objectid >= search_start) { | ||
112 | if (start_found) { | ||
113 | if (last_ino < search_start) | ||
114 | last_ino = search_start; | ||
115 | if (key.objectid > last_ino) { | ||
116 | *objectid = last_ino; | ||
117 | goto found; | ||
118 | } | ||
119 | } else if (key.objectid > search_start) { | ||
120 | *objectid = search_start; | ||
121 | goto found; | ||
122 | } | ||
123 | } | ||
124 | if (key.objectid >= BTRFS_LAST_FREE_OBJECTID) | ||
125 | break; | ||
126 | 69 | ||
127 | start_found = 1; | 70 | if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) { |
128 | last_ino = key.objectid + 1; | 71 | ret = -ENOSPC; |
129 | path->slots[0]++; | 72 | goto out; |
130 | } | 73 | } |
131 | BUG_ON(1); | 74 | |
132 | found: | 75 | *objectid = ++root->highest_objectid; |
133 | btrfs_release_path(root, path); | 76 | ret = 0; |
134 | btrfs_free_path(path); | 77 | out: |
135 | BUG_ON(*objectid < search_start); | ||
136 | mutex_unlock(&root->objectid_mutex); | ||
137 | return 0; | ||
138 | error: | ||
139 | btrfs_release_path(root, path); | ||
140 | btrfs_free_path(path); | ||
141 | mutex_unlock(&root->objectid_mutex); | 78 | mutex_unlock(&root->objectid_mutex); |
142 | return ret; | 79 | return ret; |
143 | } | 80 | } |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 59cba180fe83..dae12dc7e159 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -55,14 +55,14 @@ struct btrfs_iget_args { | |||
55 | struct btrfs_root *root; | 55 | struct btrfs_root *root; |
56 | }; | 56 | }; |
57 | 57 | ||
58 | static struct inode_operations btrfs_dir_inode_operations; | 58 | static const struct inode_operations btrfs_dir_inode_operations; |
59 | static struct inode_operations btrfs_symlink_inode_operations; | 59 | static const struct inode_operations btrfs_symlink_inode_operations; |
60 | static struct inode_operations btrfs_dir_ro_inode_operations; | 60 | static const struct inode_operations btrfs_dir_ro_inode_operations; |
61 | static struct inode_operations btrfs_special_inode_operations; | 61 | static const struct inode_operations btrfs_special_inode_operations; |
62 | static struct inode_operations btrfs_file_inode_operations; | 62 | static const struct inode_operations btrfs_file_inode_operations; |
63 | static struct address_space_operations btrfs_aops; | 63 | static const struct address_space_operations btrfs_aops; |
64 | static struct address_space_operations btrfs_symlink_aops; | 64 | static const struct address_space_operations btrfs_symlink_aops; |
65 | static struct file_operations btrfs_dir_file_operations; | 65 | static const struct file_operations btrfs_dir_file_operations; |
66 | static struct extent_io_ops btrfs_extent_io_ops; | 66 | static struct extent_io_ops btrfs_extent_io_ops; |
67 | 67 | ||
68 | static struct kmem_cache *btrfs_inode_cachep; | 68 | static struct kmem_cache *btrfs_inode_cachep; |
@@ -231,7 +231,8 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
231 | } | 231 | } |
232 | 232 | ||
233 | ret = btrfs_drop_extents(trans, root, inode, start, | 233 | ret = btrfs_drop_extents(trans, root, inode, start, |
234 | aligned_end, aligned_end, start, &hint_byte); | 234 | aligned_end, aligned_end, start, |
235 | &hint_byte, 1); | ||
235 | BUG_ON(ret); | 236 | BUG_ON(ret); |
236 | 237 | ||
237 | if (isize > actual_end) | 238 | if (isize > actual_end) |
@@ -240,7 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
240 | inline_len, compressed_size, | 241 | inline_len, compressed_size, |
241 | compressed_pages); | 242 | compressed_pages); |
242 | BUG_ON(ret); | 243 | BUG_ON(ret); |
243 | btrfs_drop_extent_cache(inode, start, aligned_end, 0); | 244 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); |
244 | return 0; | 245 | return 0; |
245 | } | 246 | } |
246 | 247 | ||
@@ -423,9 +424,12 @@ again: | |||
423 | * and free up our temp pages. | 424 | * and free up our temp pages. |
424 | */ | 425 | */ |
425 | extent_clear_unlock_delalloc(inode, | 426 | extent_clear_unlock_delalloc(inode, |
426 | &BTRFS_I(inode)->io_tree, | 427 | &BTRFS_I(inode)->io_tree, |
427 | start, end, NULL, 1, 0, | 428 | start, end, NULL, |
428 | 0, 1, 1, 1); | 429 | EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | |
430 | EXTENT_CLEAR_DELALLOC | | ||
431 | EXTENT_CLEAR_ACCOUNTING | | ||
432 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); | ||
429 | ret = 0; | 433 | ret = 0; |
430 | goto free_pages_out; | 434 | goto free_pages_out; |
431 | } | 435 | } |
@@ -611,9 +615,9 @@ static noinline int submit_compressed_extents(struct inode *inode, | |||
611 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 615 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
612 | 616 | ||
613 | while (1) { | 617 | while (1) { |
614 | spin_lock(&em_tree->lock); | 618 | write_lock(&em_tree->lock); |
615 | ret = add_extent_mapping(em_tree, em); | 619 | ret = add_extent_mapping(em_tree, em); |
616 | spin_unlock(&em_tree->lock); | 620 | write_unlock(&em_tree->lock); |
617 | if (ret != -EEXIST) { | 621 | if (ret != -EEXIST) { |
618 | free_extent_map(em); | 622 | free_extent_map(em); |
619 | break; | 623 | break; |
@@ -636,11 +640,14 @@ static noinline int submit_compressed_extents(struct inode *inode, | |||
636 | * clear dirty, set writeback and unlock the pages. | 640 | * clear dirty, set writeback and unlock the pages. |
637 | */ | 641 | */ |
638 | extent_clear_unlock_delalloc(inode, | 642 | extent_clear_unlock_delalloc(inode, |
639 | &BTRFS_I(inode)->io_tree, | 643 | &BTRFS_I(inode)->io_tree, |
640 | async_extent->start, | 644 | async_extent->start, |
641 | async_extent->start + | 645 | async_extent->start + |
642 | async_extent->ram_size - 1, | 646 | async_extent->ram_size - 1, |
643 | NULL, 1, 1, 0, 1, 1, 0); | 647 | NULL, EXTENT_CLEAR_UNLOCK_PAGE | |
648 | EXTENT_CLEAR_UNLOCK | | ||
649 | EXTENT_CLEAR_DELALLOC | | ||
650 | EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK); | ||
644 | 651 | ||
645 | ret = btrfs_submit_compressed_write(inode, | 652 | ret = btrfs_submit_compressed_write(inode, |
646 | async_extent->start, | 653 | async_extent->start, |
@@ -711,9 +718,15 @@ static noinline int cow_file_range(struct inode *inode, | |||
711 | start, end, 0, NULL); | 718 | start, end, 0, NULL); |
712 | if (ret == 0) { | 719 | if (ret == 0) { |
713 | extent_clear_unlock_delalloc(inode, | 720 | extent_clear_unlock_delalloc(inode, |
714 | &BTRFS_I(inode)->io_tree, | 721 | &BTRFS_I(inode)->io_tree, |
715 | start, end, NULL, 1, 1, | 722 | start, end, NULL, |
716 | 1, 1, 1, 1); | 723 | EXTENT_CLEAR_UNLOCK_PAGE | |
724 | EXTENT_CLEAR_UNLOCK | | ||
725 | EXTENT_CLEAR_DELALLOC | | ||
726 | EXTENT_CLEAR_ACCOUNTING | | ||
727 | EXTENT_CLEAR_DIRTY | | ||
728 | EXTENT_SET_WRITEBACK | | ||
729 | EXTENT_END_WRITEBACK); | ||
717 | *nr_written = *nr_written + | 730 | *nr_written = *nr_written + |
718 | (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; | 731 | (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; |
719 | *page_started = 1; | 732 | *page_started = 1; |
@@ -725,9 +738,20 @@ static noinline int cow_file_range(struct inode *inode, | |||
725 | BUG_ON(disk_num_bytes > | 738 | BUG_ON(disk_num_bytes > |
726 | btrfs_super_total_bytes(&root->fs_info->super_copy)); | 739 | btrfs_super_total_bytes(&root->fs_info->super_copy)); |
727 | 740 | ||
741 | |||
742 | read_lock(&BTRFS_I(inode)->extent_tree.lock); | ||
743 | em = search_extent_mapping(&BTRFS_I(inode)->extent_tree, | ||
744 | start, num_bytes); | ||
745 | if (em) { | ||
746 | alloc_hint = em->block_start; | ||
747 | free_extent_map(em); | ||
748 | } | ||
749 | read_unlock(&BTRFS_I(inode)->extent_tree.lock); | ||
728 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); | 750 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); |
729 | 751 | ||
730 | while (disk_num_bytes > 0) { | 752 | while (disk_num_bytes > 0) { |
753 | unsigned long op; | ||
754 | |||
731 | cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); | 755 | cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); |
732 | ret = btrfs_reserve_extent(trans, root, cur_alloc_size, | 756 | ret = btrfs_reserve_extent(trans, root, cur_alloc_size, |
733 | root->sectorsize, 0, alloc_hint, | 757 | root->sectorsize, 0, alloc_hint, |
@@ -737,7 +761,6 @@ static noinline int cow_file_range(struct inode *inode, | |||
737 | em = alloc_extent_map(GFP_NOFS); | 761 | em = alloc_extent_map(GFP_NOFS); |
738 | em->start = start; | 762 | em->start = start; |
739 | em->orig_start = em->start; | 763 | em->orig_start = em->start; |
740 | |||
741 | ram_size = ins.offset; | 764 | ram_size = ins.offset; |
742 | em->len = ins.offset; | 765 | em->len = ins.offset; |
743 | 766 | ||
@@ -747,9 +770,9 @@ static noinline int cow_file_range(struct inode *inode, | |||
747 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 770 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
748 | 771 | ||
749 | while (1) { | 772 | while (1) { |
750 | spin_lock(&em_tree->lock); | 773 | write_lock(&em_tree->lock); |
751 | ret = add_extent_mapping(em_tree, em); | 774 | ret = add_extent_mapping(em_tree, em); |
752 | spin_unlock(&em_tree->lock); | 775 | write_unlock(&em_tree->lock); |
753 | if (ret != -EEXIST) { | 776 | if (ret != -EEXIST) { |
754 | free_extent_map(em); | 777 | free_extent_map(em); |
755 | break; | 778 | break; |
@@ -776,11 +799,17 @@ static noinline int cow_file_range(struct inode *inode, | |||
776 | /* we're not doing compressed IO, don't unlock the first | 799 | /* we're not doing compressed IO, don't unlock the first |
777 | * page (which the caller expects to stay locked), don't | 800 | * page (which the caller expects to stay locked), don't |
778 | * clear any dirty bits and don't set any writeback bits | 801 | * clear any dirty bits and don't set any writeback bits |
802 | * | ||
803 | * Do set the Private2 bit so we know this page was properly | ||
804 | * setup for writepage | ||
779 | */ | 805 | */ |
806 | op = unlock ? EXTENT_CLEAR_UNLOCK_PAGE : 0; | ||
807 | op |= EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | | ||
808 | EXTENT_SET_PRIVATE2; | ||
809 | |||
780 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | 810 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, |
781 | start, start + ram_size - 1, | 811 | start, start + ram_size - 1, |
782 | locked_page, unlock, 1, | 812 | locked_page, op); |
783 | 1, 0, 0, 0); | ||
784 | disk_num_bytes -= cur_alloc_size; | 813 | disk_num_bytes -= cur_alloc_size; |
785 | num_bytes -= cur_alloc_size; | 814 | num_bytes -= cur_alloc_size; |
786 | alloc_hint = ins.objectid + ins.offset; | 815 | alloc_hint = ins.objectid + ins.offset; |
@@ -852,8 +881,8 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
852 | u64 cur_end; | 881 | u64 cur_end; |
853 | int limit = 10 * 1024 * 1042; | 882 | int limit = 10 * 1024 * 1042; |
854 | 883 | ||
855 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | | 884 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED, |
856 | EXTENT_DELALLOC, 1, 0, GFP_NOFS); | 885 | 1, 0, NULL, GFP_NOFS); |
857 | while (start < end) { | 886 | while (start < end) { |
858 | async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); | 887 | async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); |
859 | async_cow->inode = inode; | 888 | async_cow->inode = inode; |
@@ -994,6 +1023,7 @@ next_slot: | |||
994 | 1023 | ||
995 | if (found_key.offset > cur_offset) { | 1024 | if (found_key.offset > cur_offset) { |
996 | extent_end = found_key.offset; | 1025 | extent_end = found_key.offset; |
1026 | extent_type = 0; | ||
997 | goto out_check; | 1027 | goto out_check; |
998 | } | 1028 | } |
999 | 1029 | ||
@@ -1080,9 +1110,9 @@ out_check: | |||
1080 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 1110 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
1081 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 1111 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
1082 | while (1) { | 1112 | while (1) { |
1083 | spin_lock(&em_tree->lock); | 1113 | write_lock(&em_tree->lock); |
1084 | ret = add_extent_mapping(em_tree, em); | 1114 | ret = add_extent_mapping(em_tree, em); |
1085 | spin_unlock(&em_tree->lock); | 1115 | write_unlock(&em_tree->lock); |
1086 | if (ret != -EEXIST) { | 1116 | if (ret != -EEXIST) { |
1087 | free_extent_map(em); | 1117 | free_extent_map(em); |
1088 | break; | 1118 | break; |
@@ -1100,8 +1130,10 @@ out_check: | |||
1100 | BUG_ON(ret); | 1130 | BUG_ON(ret); |
1101 | 1131 | ||
1102 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | 1132 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, |
1103 | cur_offset, cur_offset + num_bytes - 1, | 1133 | cur_offset, cur_offset + num_bytes - 1, |
1104 | locked_page, 1, 1, 1, 0, 0, 0); | 1134 | locked_page, EXTENT_CLEAR_UNLOCK_PAGE | |
1135 | EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | | ||
1136 | EXTENT_SET_PRIVATE2); | ||
1105 | cur_offset = extent_end; | 1137 | cur_offset = extent_end; |
1106 | if (cur_offset > end) | 1138 | if (cur_offset > end) |
1107 | break; | 1139 | break; |
@@ -1147,6 +1179,89 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1147 | return ret; | 1179 | return ret; |
1148 | } | 1180 | } |
1149 | 1181 | ||
1182 | static int btrfs_split_extent_hook(struct inode *inode, | ||
1183 | struct extent_state *orig, u64 split) | ||
1184 | { | ||
1185 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1186 | u64 size; | ||
1187 | |||
1188 | if (!(orig->state & EXTENT_DELALLOC)) | ||
1189 | return 0; | ||
1190 | |||
1191 | size = orig->end - orig->start + 1; | ||
1192 | if (size > root->fs_info->max_extent) { | ||
1193 | u64 num_extents; | ||
1194 | u64 new_size; | ||
1195 | |||
1196 | new_size = orig->end - split + 1; | ||
1197 | num_extents = div64_u64(size + root->fs_info->max_extent - 1, | ||
1198 | root->fs_info->max_extent); | ||
1199 | |||
1200 | /* | ||
1201 | * if we break a large extent up then leave oustanding_extents | ||
1202 | * be, since we've already accounted for the large extent. | ||
1203 | */ | ||
1204 | if (div64_u64(new_size + root->fs_info->max_extent - 1, | ||
1205 | root->fs_info->max_extent) < num_extents) | ||
1206 | return 0; | ||
1207 | } | ||
1208 | |||
1209 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
1210 | BTRFS_I(inode)->outstanding_extents++; | ||
1211 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
1212 | |||
1213 | return 0; | ||
1214 | } | ||
1215 | |||
1216 | /* | ||
1217 | * extent_io.c merge_extent_hook, used to track merged delayed allocation | ||
1218 | * extents so we can keep track of new extents that are just merged onto old | ||
1219 | * extents, such as when we are doing sequential writes, so we can properly | ||
1220 | * account for the metadata space we'll need. | ||
1221 | */ | ||
1222 | static int btrfs_merge_extent_hook(struct inode *inode, | ||
1223 | struct extent_state *new, | ||
1224 | struct extent_state *other) | ||
1225 | { | ||
1226 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1227 | u64 new_size, old_size; | ||
1228 | u64 num_extents; | ||
1229 | |||
1230 | /* not delalloc, ignore it */ | ||
1231 | if (!(other->state & EXTENT_DELALLOC)) | ||
1232 | return 0; | ||
1233 | |||
1234 | old_size = other->end - other->start + 1; | ||
1235 | if (new->start < other->start) | ||
1236 | new_size = other->end - new->start + 1; | ||
1237 | else | ||
1238 | new_size = new->end - other->start + 1; | ||
1239 | |||
1240 | /* we're not bigger than the max, unreserve the space and go */ | ||
1241 | if (new_size <= root->fs_info->max_extent) { | ||
1242 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
1243 | BTRFS_I(inode)->outstanding_extents--; | ||
1244 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
1245 | return 0; | ||
1246 | } | ||
1247 | |||
1248 | /* | ||
1249 | * If we grew by another max_extent, just return, we want to keep that | ||
1250 | * reserved amount. | ||
1251 | */ | ||
1252 | num_extents = div64_u64(old_size + root->fs_info->max_extent - 1, | ||
1253 | root->fs_info->max_extent); | ||
1254 | if (div64_u64(new_size + root->fs_info->max_extent - 1, | ||
1255 | root->fs_info->max_extent) > num_extents) | ||
1256 | return 0; | ||
1257 | |||
1258 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
1259 | BTRFS_I(inode)->outstanding_extents--; | ||
1260 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
1261 | |||
1262 | return 0; | ||
1263 | } | ||
1264 | |||
1150 | /* | 1265 | /* |
1151 | * extent_io.c set_bit_hook, used to track delayed allocation | 1266 | * extent_io.c set_bit_hook, used to track delayed allocation |
1152 | * bytes in this file, and to maintain the list of inodes that | 1267 | * bytes in this file, and to maintain the list of inodes that |
@@ -1155,6 +1270,7 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1155 | static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | 1270 | static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, |
1156 | unsigned long old, unsigned long bits) | 1271 | unsigned long old, unsigned long bits) |
1157 | { | 1272 | { |
1273 | |||
1158 | /* | 1274 | /* |
1159 | * set_bit and clear bit hooks normally require _irqsave/restore | 1275 | * set_bit and clear bit hooks normally require _irqsave/restore |
1160 | * but in this case, we are only testeing for the DELALLOC | 1276 | * but in this case, we are only testeing for the DELALLOC |
@@ -1162,6 +1278,10 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
1162 | */ | 1278 | */ |
1163 | if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1279 | if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { |
1164 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1280 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1281 | |||
1282 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
1283 | BTRFS_I(inode)->outstanding_extents++; | ||
1284 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
1165 | btrfs_delalloc_reserve_space(root, inode, end - start + 1); | 1285 | btrfs_delalloc_reserve_space(root, inode, end - start + 1); |
1166 | spin_lock(&root->fs_info->delalloc_lock); | 1286 | spin_lock(&root->fs_info->delalloc_lock); |
1167 | BTRFS_I(inode)->delalloc_bytes += end - start + 1; | 1287 | BTRFS_I(inode)->delalloc_bytes += end - start + 1; |
@@ -1178,22 +1298,31 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
1178 | /* | 1298 | /* |
1179 | * extent_io.c clear_bit_hook, see set_bit_hook for why | 1299 | * extent_io.c clear_bit_hook, see set_bit_hook for why |
1180 | */ | 1300 | */ |
1181 | static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, | 1301 | static int btrfs_clear_bit_hook(struct inode *inode, |
1182 | unsigned long old, unsigned long bits) | 1302 | struct extent_state *state, unsigned long bits) |
1183 | { | 1303 | { |
1184 | /* | 1304 | /* |
1185 | * set_bit and clear bit hooks normally require _irqsave/restore | 1305 | * set_bit and clear bit hooks normally require _irqsave/restore |
1186 | * but in this case, we are only testeing for the DELALLOC | 1306 | * but in this case, we are only testeing for the DELALLOC |
1187 | * bit, which is only set or cleared with irqs on | 1307 | * bit, which is only set or cleared with irqs on |
1188 | */ | 1308 | */ |
1189 | if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1309 | if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { |
1190 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1310 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1191 | 1311 | ||
1312 | if (bits & EXTENT_DO_ACCOUNTING) { | ||
1313 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
1314 | BTRFS_I(inode)->outstanding_extents--; | ||
1315 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
1316 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
1317 | } | ||
1318 | |||
1192 | spin_lock(&root->fs_info->delalloc_lock); | 1319 | spin_lock(&root->fs_info->delalloc_lock); |
1193 | if (end - start + 1 > root->fs_info->delalloc_bytes) { | 1320 | if (state->end - state->start + 1 > |
1321 | root->fs_info->delalloc_bytes) { | ||
1194 | printk(KERN_INFO "btrfs warning: delalloc account " | 1322 | printk(KERN_INFO "btrfs warning: delalloc account " |
1195 | "%llu %llu\n", | 1323 | "%llu %llu\n", |
1196 | (unsigned long long)end - start + 1, | 1324 | (unsigned long long) |
1325 | state->end - state->start + 1, | ||
1197 | (unsigned long long) | 1326 | (unsigned long long) |
1198 | root->fs_info->delalloc_bytes); | 1327 | root->fs_info->delalloc_bytes); |
1199 | btrfs_delalloc_free_space(root, inode, (u64)-1); | 1328 | btrfs_delalloc_free_space(root, inode, (u64)-1); |
@@ -1201,9 +1330,12 @@ static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, | |||
1201 | BTRFS_I(inode)->delalloc_bytes = 0; | 1330 | BTRFS_I(inode)->delalloc_bytes = 0; |
1202 | } else { | 1331 | } else { |
1203 | btrfs_delalloc_free_space(root, inode, | 1332 | btrfs_delalloc_free_space(root, inode, |
1204 | end - start + 1); | 1333 | state->end - |
1205 | root->fs_info->delalloc_bytes -= end - start + 1; | 1334 | state->start + 1); |
1206 | BTRFS_I(inode)->delalloc_bytes -= end - start + 1; | 1335 | root->fs_info->delalloc_bytes -= state->end - |
1336 | state->start + 1; | ||
1337 | BTRFS_I(inode)->delalloc_bytes -= state->end - | ||
1338 | state->start + 1; | ||
1207 | } | 1339 | } |
1208 | if (BTRFS_I(inode)->delalloc_bytes == 0 && | 1340 | if (BTRFS_I(inode)->delalloc_bytes == 0 && |
1209 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1341 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { |
@@ -1374,10 +1506,8 @@ again: | |||
1374 | lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); | 1506 | lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); |
1375 | 1507 | ||
1376 | /* already ordered? We're done */ | 1508 | /* already ordered? We're done */ |
1377 | if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, | 1509 | if (PagePrivate2(page)) |
1378 | EXTENT_ORDERED, 0)) { | ||
1379 | goto out; | 1510 | goto out; |
1380 | } | ||
1381 | 1511 | ||
1382 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 1512 | ordered = btrfs_lookup_ordered_extent(inode, page_start); |
1383 | if (ordered) { | 1513 | if (ordered) { |
@@ -1413,11 +1543,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) | |||
1413 | struct inode *inode = page->mapping->host; | 1543 | struct inode *inode = page->mapping->host; |
1414 | struct btrfs_writepage_fixup *fixup; | 1544 | struct btrfs_writepage_fixup *fixup; |
1415 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1545 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1416 | int ret; | ||
1417 | 1546 | ||
1418 | ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end, | 1547 | /* this page is properly in the ordered list */ |
1419 | EXTENT_ORDERED, 0); | 1548 | if (TestClearPagePrivate2(page)) |
1420 | if (ret) | ||
1421 | return 0; | 1549 | return 0; |
1422 | 1550 | ||
1423 | if (PageChecked(page)) | 1551 | if (PageChecked(page)) |
@@ -1455,9 +1583,19 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1455 | BUG_ON(!path); | 1583 | BUG_ON(!path); |
1456 | 1584 | ||
1457 | path->leave_spinning = 1; | 1585 | path->leave_spinning = 1; |
1586 | |||
1587 | /* | ||
1588 | * we may be replacing one extent in the tree with another. | ||
1589 | * The new extent is pinned in the extent map, and we don't want | ||
1590 | * to drop it from the cache until it is completely in the btree. | ||
1591 | * | ||
1592 | * So, tell btrfs_drop_extents to leave this extent in the cache. | ||
1593 | * the caller is expected to unpin it and allow it to be merged | ||
1594 | * with the others. | ||
1595 | */ | ||
1458 | ret = btrfs_drop_extents(trans, root, inode, file_pos, | 1596 | ret = btrfs_drop_extents(trans, root, inode, file_pos, |
1459 | file_pos + num_bytes, locked_end, | 1597 | file_pos + num_bytes, locked_end, |
1460 | file_pos, &hint); | 1598 | file_pos, &hint, 0); |
1461 | BUG_ON(ret); | 1599 | BUG_ON(ret); |
1462 | 1600 | ||
1463 | ins.objectid = inode->i_ino; | 1601 | ins.objectid = inode->i_ino; |
@@ -1485,7 +1623,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1485 | btrfs_mark_buffer_dirty(leaf); | 1623 | btrfs_mark_buffer_dirty(leaf); |
1486 | 1624 | ||
1487 | inode_add_bytes(inode, num_bytes); | 1625 | inode_add_bytes(inode, num_bytes); |
1488 | btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0); | ||
1489 | 1626 | ||
1490 | ins.objectid = disk_bytenr; | 1627 | ins.objectid = disk_bytenr; |
1491 | ins.offset = disk_num_bytes; | 1628 | ins.offset = disk_num_bytes; |
@@ -1596,6 +1733,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1596 | ordered_extent->len, | 1733 | ordered_extent->len, |
1597 | compressed, 0, 0, | 1734 | compressed, 0, 0, |
1598 | BTRFS_FILE_EXTENT_REG); | 1735 | BTRFS_FILE_EXTENT_REG); |
1736 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | ||
1737 | ordered_extent->file_offset, | ||
1738 | ordered_extent->len); | ||
1599 | BUG_ON(ret); | 1739 | BUG_ON(ret); |
1600 | } | 1740 | } |
1601 | unlock_extent(io_tree, ordered_extent->file_offset, | 1741 | unlock_extent(io_tree, ordered_extent->file_offset, |
@@ -1623,6 +1763,7 @@ nocow: | |||
1623 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | 1763 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, |
1624 | struct extent_state *state, int uptodate) | 1764 | struct extent_state *state, int uptodate) |
1625 | { | 1765 | { |
1766 | ClearPagePrivate2(page); | ||
1626 | return btrfs_finish_ordered_io(page->mapping->host, start, end); | 1767 | return btrfs_finish_ordered_io(page->mapping->host, start, end); |
1627 | } | 1768 | } |
1628 | 1769 | ||
@@ -1669,13 +1810,13 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
1669 | failrec->last_mirror = 0; | 1810 | failrec->last_mirror = 0; |
1670 | failrec->bio_flags = 0; | 1811 | failrec->bio_flags = 0; |
1671 | 1812 | ||
1672 | spin_lock(&em_tree->lock); | 1813 | read_lock(&em_tree->lock); |
1673 | em = lookup_extent_mapping(em_tree, start, failrec->len); | 1814 | em = lookup_extent_mapping(em_tree, start, failrec->len); |
1674 | if (em->start > start || em->start + em->len < start) { | 1815 | if (em->start > start || em->start + em->len < start) { |
1675 | free_extent_map(em); | 1816 | free_extent_map(em); |
1676 | em = NULL; | 1817 | em = NULL; |
1677 | } | 1818 | } |
1678 | spin_unlock(&em_tree->lock); | 1819 | read_unlock(&em_tree->lock); |
1679 | 1820 | ||
1680 | if (!em || IS_ERR(em)) { | 1821 | if (!em || IS_ERR(em)) { |
1681 | kfree(failrec); | 1822 | kfree(failrec); |
@@ -1794,7 +1935,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
1794 | return 0; | 1935 | return 0; |
1795 | 1936 | ||
1796 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && | 1937 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && |
1797 | test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) { | 1938 | test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { |
1798 | clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM, | 1939 | clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM, |
1799 | GFP_NOFS); | 1940 | GFP_NOFS); |
1800 | return 0; | 1941 | return 0; |
@@ -2352,6 +2493,69 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
2352 | return ret; | 2493 | return ret; |
2353 | } | 2494 | } |
2354 | 2495 | ||
2496 | int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | ||
2497 | struct btrfs_root *root, | ||
2498 | struct inode *dir, u64 objectid, | ||
2499 | const char *name, int name_len) | ||
2500 | { | ||
2501 | struct btrfs_path *path; | ||
2502 | struct extent_buffer *leaf; | ||
2503 | struct btrfs_dir_item *di; | ||
2504 | struct btrfs_key key; | ||
2505 | u64 index; | ||
2506 | int ret; | ||
2507 | |||
2508 | path = btrfs_alloc_path(); | ||
2509 | if (!path) | ||
2510 | return -ENOMEM; | ||
2511 | |||
2512 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | ||
2513 | name, name_len, -1); | ||
2514 | BUG_ON(!di || IS_ERR(di)); | ||
2515 | |||
2516 | leaf = path->nodes[0]; | ||
2517 | btrfs_dir_item_key_to_cpu(leaf, di, &key); | ||
2518 | WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); | ||
2519 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | ||
2520 | BUG_ON(ret); | ||
2521 | btrfs_release_path(root, path); | ||
2522 | |||
2523 | ret = btrfs_del_root_ref(trans, root->fs_info->tree_root, | ||
2524 | objectid, root->root_key.objectid, | ||
2525 | dir->i_ino, &index, name, name_len); | ||
2526 | if (ret < 0) { | ||
2527 | BUG_ON(ret != -ENOENT); | ||
2528 | di = btrfs_search_dir_index_item(root, path, dir->i_ino, | ||
2529 | name, name_len); | ||
2530 | BUG_ON(!di || IS_ERR(di)); | ||
2531 | |||
2532 | leaf = path->nodes[0]; | ||
2533 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
2534 | btrfs_release_path(root, path); | ||
2535 | index = key.offset; | ||
2536 | } | ||
2537 | |||
2538 | di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, | ||
2539 | index, name, name_len, -1); | ||
2540 | BUG_ON(!di || IS_ERR(di)); | ||
2541 | |||
2542 | leaf = path->nodes[0]; | ||
2543 | btrfs_dir_item_key_to_cpu(leaf, di, &key); | ||
2544 | WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); | ||
2545 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | ||
2546 | BUG_ON(ret); | ||
2547 | btrfs_release_path(root, path); | ||
2548 | |||
2549 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); | ||
2550 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | ||
2551 | ret = btrfs_update_inode(trans, root, dir); | ||
2552 | BUG_ON(ret); | ||
2553 | dir->i_sb->s_dirt = 1; | ||
2554 | |||
2555 | btrfs_free_path(path); | ||
2556 | return 0; | ||
2557 | } | ||
2558 | |||
2355 | static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | 2559 | static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) |
2356 | { | 2560 | { |
2357 | struct inode *inode = dentry->d_inode; | 2561 | struct inode *inode = dentry->d_inode; |
@@ -2361,29 +2565,31 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2361 | struct btrfs_trans_handle *trans; | 2565 | struct btrfs_trans_handle *trans; |
2362 | unsigned long nr = 0; | 2566 | unsigned long nr = 0; |
2363 | 2567 | ||
2364 | /* | ||
2365 | * the FIRST_FREE_OBJECTID check makes sure we don't try to rmdir | ||
2366 | * the root of a subvolume or snapshot | ||
2367 | */ | ||
2368 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || | 2568 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || |
2369 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { | 2569 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
2370 | return -ENOTEMPTY; | 2570 | return -ENOTEMPTY; |
2371 | } | ||
2372 | 2571 | ||
2373 | trans = btrfs_start_transaction(root, 1); | 2572 | trans = btrfs_start_transaction(root, 1); |
2374 | btrfs_set_trans_block_group(trans, dir); | 2573 | btrfs_set_trans_block_group(trans, dir); |
2375 | 2574 | ||
2575 | if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { | ||
2576 | err = btrfs_unlink_subvol(trans, root, dir, | ||
2577 | BTRFS_I(inode)->location.objectid, | ||
2578 | dentry->d_name.name, | ||
2579 | dentry->d_name.len); | ||
2580 | goto out; | ||
2581 | } | ||
2582 | |||
2376 | err = btrfs_orphan_add(trans, inode); | 2583 | err = btrfs_orphan_add(trans, inode); |
2377 | if (err) | 2584 | if (err) |
2378 | goto fail_trans; | 2585 | goto out; |
2379 | 2586 | ||
2380 | /* now the directory is empty */ | 2587 | /* now the directory is empty */ |
2381 | err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | 2588 | err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, |
2382 | dentry->d_name.name, dentry->d_name.len); | 2589 | dentry->d_name.name, dentry->d_name.len); |
2383 | if (!err) | 2590 | if (!err) |
2384 | btrfs_i_size_write(inode, 0); | 2591 | btrfs_i_size_write(inode, 0); |
2385 | 2592 | out: | |
2386 | fail_trans: | ||
2387 | nr = trans->blocks_used; | 2593 | nr = trans->blocks_used; |
2388 | ret = btrfs_end_transaction_throttle(trans, root); | 2594 | ret = btrfs_end_transaction_throttle(trans, root); |
2389 | btrfs_btree_balance_dirty(root, nr); | 2595 | btrfs_btree_balance_dirty(root, nr); |
@@ -2826,12 +3032,22 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
2826 | 3032 | ||
2827 | if ((offset & (blocksize - 1)) == 0) | 3033 | if ((offset & (blocksize - 1)) == 0) |
2828 | goto out; | 3034 | goto out; |
3035 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); | ||
3036 | if (ret) | ||
3037 | goto out; | ||
3038 | |||
3039 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
3040 | if (ret) | ||
3041 | goto out; | ||
2829 | 3042 | ||
2830 | ret = -ENOMEM; | 3043 | ret = -ENOMEM; |
2831 | again: | 3044 | again: |
2832 | page = grab_cache_page(mapping, index); | 3045 | page = grab_cache_page(mapping, index); |
2833 | if (!page) | 3046 | if (!page) { |
3047 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
3048 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
2834 | goto out; | 3049 | goto out; |
3050 | } | ||
2835 | 3051 | ||
2836 | page_start = page_offset(page); | 3052 | page_start = page_offset(page); |
2837 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 3053 | page_end = page_start + PAGE_CACHE_SIZE - 1; |
@@ -2864,7 +3080,16 @@ again: | |||
2864 | goto again; | 3080 | goto again; |
2865 | } | 3081 | } |
2866 | 3082 | ||
2867 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 3083 | clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, |
3084 | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, | ||
3085 | GFP_NOFS); | ||
3086 | |||
3087 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end); | ||
3088 | if (ret) { | ||
3089 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
3090 | goto out_unlock; | ||
3091 | } | ||
3092 | |||
2868 | ret = 0; | 3093 | ret = 0; |
2869 | if (offset != PAGE_CACHE_SIZE) { | 3094 | if (offset != PAGE_CACHE_SIZE) { |
2870 | kaddr = kmap(page); | 3095 | kaddr = kmap(page); |
@@ -2877,6 +3102,9 @@ again: | |||
2877 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 3102 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); |
2878 | 3103 | ||
2879 | out_unlock: | 3104 | out_unlock: |
3105 | if (ret) | ||
3106 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
3107 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
2880 | unlock_page(page); | 3108 | unlock_page(page); |
2881 | page_cache_release(page); | 3109 | page_cache_release(page); |
2882 | out: | 3110 | out: |
@@ -2895,17 +3123,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
2895 | u64 last_byte; | 3123 | u64 last_byte; |
2896 | u64 cur_offset; | 3124 | u64 cur_offset; |
2897 | u64 hole_size; | 3125 | u64 hole_size; |
2898 | int err; | 3126 | int err = 0; |
2899 | 3127 | ||
2900 | if (size <= hole_start) | 3128 | if (size <= hole_start) |
2901 | return 0; | 3129 | return 0; |
2902 | 3130 | ||
2903 | err = btrfs_check_metadata_free_space(root); | 3131 | err = btrfs_truncate_page(inode->i_mapping, inode->i_size); |
2904 | if (err) | 3132 | if (err) |
2905 | return err; | 3133 | return err; |
2906 | 3134 | ||
2907 | btrfs_truncate_page(inode->i_mapping, inode->i_size); | ||
2908 | |||
2909 | while (1) { | 3135 | while (1) { |
2910 | struct btrfs_ordered_extent *ordered; | 3136 | struct btrfs_ordered_extent *ordered; |
2911 | btrfs_wait_ordered_range(inode, hole_start, | 3137 | btrfs_wait_ordered_range(inode, hole_start, |
@@ -2935,15 +3161,21 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
2935 | cur_offset, | 3161 | cur_offset, |
2936 | cur_offset + hole_size, | 3162 | cur_offset + hole_size, |
2937 | block_end, | 3163 | block_end, |
2938 | cur_offset, &hint_byte); | 3164 | cur_offset, &hint_byte, 1); |
3165 | if (err) | ||
3166 | break; | ||
3167 | |||
3168 | err = btrfs_reserve_metadata_space(root, 1); | ||
2939 | if (err) | 3169 | if (err) |
2940 | break; | 3170 | break; |
3171 | |||
2941 | err = btrfs_insert_file_extent(trans, root, | 3172 | err = btrfs_insert_file_extent(trans, root, |
2942 | inode->i_ino, cur_offset, 0, | 3173 | inode->i_ino, cur_offset, 0, |
2943 | 0, hole_size, 0, hole_size, | 3174 | 0, hole_size, 0, hole_size, |
2944 | 0, 0, 0); | 3175 | 0, 0, 0); |
2945 | btrfs_drop_extent_cache(inode, hole_start, | 3176 | btrfs_drop_extent_cache(inode, hole_start, |
2946 | last_byte - 1, 0); | 3177 | last_byte - 1, 0); |
3178 | btrfs_unreserve_metadata_space(root, 1); | ||
2947 | } | 3179 | } |
2948 | free_extent_map(em); | 3180 | free_extent_map(em); |
2949 | cur_offset = last_byte; | 3181 | cur_offset = last_byte; |
@@ -3003,6 +3235,11 @@ void btrfs_delete_inode(struct inode *inode) | |||
3003 | } | 3235 | } |
3004 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 3236 | btrfs_wait_ordered_range(inode, 0, (u64)-1); |
3005 | 3237 | ||
3238 | if (inode->i_nlink > 0) { | ||
3239 | BUG_ON(btrfs_root_refs(&root->root_item) != 0); | ||
3240 | goto no_delete; | ||
3241 | } | ||
3242 | |||
3006 | btrfs_i_size_write(inode, 0); | 3243 | btrfs_i_size_write(inode, 0); |
3007 | trans = btrfs_join_transaction(root, 1); | 3244 | trans = btrfs_join_transaction(root, 1); |
3008 | 3245 | ||
@@ -3070,29 +3307,67 @@ out_err: | |||
3070 | * is kind of like crossing a mount point. | 3307 | * is kind of like crossing a mount point. |
3071 | */ | 3308 | */ |
3072 | static int fixup_tree_root_location(struct btrfs_root *root, | 3309 | static int fixup_tree_root_location(struct btrfs_root *root, |
3073 | struct btrfs_key *location, | 3310 | struct inode *dir, |
3074 | struct btrfs_root **sub_root, | 3311 | struct dentry *dentry, |
3075 | struct dentry *dentry) | 3312 | struct btrfs_key *location, |
3313 | struct btrfs_root **sub_root) | ||
3076 | { | 3314 | { |
3077 | struct btrfs_root_item *ri; | 3315 | struct btrfs_path *path; |
3316 | struct btrfs_root *new_root; | ||
3317 | struct btrfs_root_ref *ref; | ||
3318 | struct extent_buffer *leaf; | ||
3319 | int ret; | ||
3320 | int err = 0; | ||
3078 | 3321 | ||
3079 | if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY) | 3322 | path = btrfs_alloc_path(); |
3080 | return 0; | 3323 | if (!path) { |
3081 | if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) | 3324 | err = -ENOMEM; |
3082 | return 0; | 3325 | goto out; |
3326 | } | ||
3083 | 3327 | ||
3084 | *sub_root = btrfs_read_fs_root(root->fs_info, location, | 3328 | err = -ENOENT; |
3085 | dentry->d_name.name, | 3329 | ret = btrfs_find_root_ref(root->fs_info->tree_root, path, |
3086 | dentry->d_name.len); | 3330 | BTRFS_I(dir)->root->root_key.objectid, |
3087 | if (IS_ERR(*sub_root)) | 3331 | location->objectid); |
3088 | return PTR_ERR(*sub_root); | 3332 | if (ret) { |
3333 | if (ret < 0) | ||
3334 | err = ret; | ||
3335 | goto out; | ||
3336 | } | ||
3089 | 3337 | ||
3090 | ri = &(*sub_root)->root_item; | 3338 | leaf = path->nodes[0]; |
3091 | location->objectid = btrfs_root_dirid(ri); | 3339 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); |
3092 | btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); | 3340 | if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino || |
3093 | location->offset = 0; | 3341 | btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len) |
3342 | goto out; | ||
3094 | 3343 | ||
3095 | return 0; | 3344 | ret = memcmp_extent_buffer(leaf, dentry->d_name.name, |
3345 | (unsigned long)(ref + 1), | ||
3346 | dentry->d_name.len); | ||
3347 | if (ret) | ||
3348 | goto out; | ||
3349 | |||
3350 | btrfs_release_path(root->fs_info->tree_root, path); | ||
3351 | |||
3352 | new_root = btrfs_read_fs_root_no_name(root->fs_info, location); | ||
3353 | if (IS_ERR(new_root)) { | ||
3354 | err = PTR_ERR(new_root); | ||
3355 | goto out; | ||
3356 | } | ||
3357 | |||
3358 | if (btrfs_root_refs(&new_root->root_item) == 0) { | ||
3359 | err = -ENOENT; | ||
3360 | goto out; | ||
3361 | } | ||
3362 | |||
3363 | *sub_root = new_root; | ||
3364 | location->objectid = btrfs_root_dirid(&new_root->root_item); | ||
3365 | location->type = BTRFS_INODE_ITEM_KEY; | ||
3366 | location->offset = 0; | ||
3367 | err = 0; | ||
3368 | out: | ||
3369 | btrfs_free_path(path); | ||
3370 | return err; | ||
3096 | } | 3371 | } |
3097 | 3372 | ||
3098 | static void inode_tree_add(struct inode *inode) | 3373 | static void inode_tree_add(struct inode *inode) |
@@ -3101,11 +3376,13 @@ static void inode_tree_add(struct inode *inode) | |||
3101 | struct btrfs_inode *entry; | 3376 | struct btrfs_inode *entry; |
3102 | struct rb_node **p; | 3377 | struct rb_node **p; |
3103 | struct rb_node *parent; | 3378 | struct rb_node *parent; |
3104 | |||
3105 | again: | 3379 | again: |
3106 | p = &root->inode_tree.rb_node; | 3380 | p = &root->inode_tree.rb_node; |
3107 | parent = NULL; | 3381 | parent = NULL; |
3108 | 3382 | ||
3383 | if (hlist_unhashed(&inode->i_hash)) | ||
3384 | return; | ||
3385 | |||
3109 | spin_lock(&root->inode_lock); | 3386 | spin_lock(&root->inode_lock); |
3110 | while (*p) { | 3387 | while (*p) { |
3111 | parent = *p; | 3388 | parent = *p; |
@@ -3132,13 +3409,87 @@ again: | |||
3132 | static void inode_tree_del(struct inode *inode) | 3409 | static void inode_tree_del(struct inode *inode) |
3133 | { | 3410 | { |
3134 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3411 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3412 | int empty = 0; | ||
3135 | 3413 | ||
3136 | spin_lock(&root->inode_lock); | 3414 | spin_lock(&root->inode_lock); |
3137 | if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { | 3415 | if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { |
3138 | rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); | 3416 | rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); |
3139 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); | 3417 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); |
3418 | empty = RB_EMPTY_ROOT(&root->inode_tree); | ||
3140 | } | 3419 | } |
3141 | spin_unlock(&root->inode_lock); | 3420 | spin_unlock(&root->inode_lock); |
3421 | |||
3422 | if (empty && btrfs_root_refs(&root->root_item) == 0) { | ||
3423 | synchronize_srcu(&root->fs_info->subvol_srcu); | ||
3424 | spin_lock(&root->inode_lock); | ||
3425 | empty = RB_EMPTY_ROOT(&root->inode_tree); | ||
3426 | spin_unlock(&root->inode_lock); | ||
3427 | if (empty) | ||
3428 | btrfs_add_dead_root(root); | ||
3429 | } | ||
3430 | } | ||
3431 | |||
3432 | int btrfs_invalidate_inodes(struct btrfs_root *root) | ||
3433 | { | ||
3434 | struct rb_node *node; | ||
3435 | struct rb_node *prev; | ||
3436 | struct btrfs_inode *entry; | ||
3437 | struct inode *inode; | ||
3438 | u64 objectid = 0; | ||
3439 | |||
3440 | WARN_ON(btrfs_root_refs(&root->root_item) != 0); | ||
3441 | |||
3442 | spin_lock(&root->inode_lock); | ||
3443 | again: | ||
3444 | node = root->inode_tree.rb_node; | ||
3445 | prev = NULL; | ||
3446 | while (node) { | ||
3447 | prev = node; | ||
3448 | entry = rb_entry(node, struct btrfs_inode, rb_node); | ||
3449 | |||
3450 | if (objectid < entry->vfs_inode.i_ino) | ||
3451 | node = node->rb_left; | ||
3452 | else if (objectid > entry->vfs_inode.i_ino) | ||
3453 | node = node->rb_right; | ||
3454 | else | ||
3455 | break; | ||
3456 | } | ||
3457 | if (!node) { | ||
3458 | while (prev) { | ||
3459 | entry = rb_entry(prev, struct btrfs_inode, rb_node); | ||
3460 | if (objectid <= entry->vfs_inode.i_ino) { | ||
3461 | node = prev; | ||
3462 | break; | ||
3463 | } | ||
3464 | prev = rb_next(prev); | ||
3465 | } | ||
3466 | } | ||
3467 | while (node) { | ||
3468 | entry = rb_entry(node, struct btrfs_inode, rb_node); | ||
3469 | objectid = entry->vfs_inode.i_ino + 1; | ||
3470 | inode = igrab(&entry->vfs_inode); | ||
3471 | if (inode) { | ||
3472 | spin_unlock(&root->inode_lock); | ||
3473 | if (atomic_read(&inode->i_count) > 1) | ||
3474 | d_prune_aliases(inode); | ||
3475 | /* | ||
3476 | * btrfs_drop_inode will remove it from | ||
3477 | * the inode cache when its usage count | ||
3478 | * hits zero. | ||
3479 | */ | ||
3480 | iput(inode); | ||
3481 | cond_resched(); | ||
3482 | spin_lock(&root->inode_lock); | ||
3483 | goto again; | ||
3484 | } | ||
3485 | |||
3486 | if (cond_resched_lock(&root->inode_lock)) | ||
3487 | goto again; | ||
3488 | |||
3489 | node = rb_next(node); | ||
3490 | } | ||
3491 | spin_unlock(&root->inode_lock); | ||
3492 | return 0; | ||
3142 | } | 3493 | } |
3143 | 3494 | ||
3144 | static noinline void init_btrfs_i(struct inode *inode) | 3495 | static noinline void init_btrfs_i(struct inode *inode) |
@@ -3148,6 +3499,7 @@ static noinline void init_btrfs_i(struct inode *inode) | |||
3148 | bi->generation = 0; | 3499 | bi->generation = 0; |
3149 | bi->sequence = 0; | 3500 | bi->sequence = 0; |
3150 | bi->last_trans = 0; | 3501 | bi->last_trans = 0; |
3502 | bi->last_sub_trans = 0; | ||
3151 | bi->logged_trans = 0; | 3503 | bi->logged_trans = 0; |
3152 | bi->delalloc_bytes = 0; | 3504 | bi->delalloc_bytes = 0; |
3153 | bi->reserved_bytes = 0; | 3505 | bi->reserved_bytes = 0; |
@@ -3225,15 +3577,41 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | |||
3225 | return inode; | 3577 | return inode; |
3226 | } | 3578 | } |
3227 | 3579 | ||
3580 | static struct inode *new_simple_dir(struct super_block *s, | ||
3581 | struct btrfs_key *key, | ||
3582 | struct btrfs_root *root) | ||
3583 | { | ||
3584 | struct inode *inode = new_inode(s); | ||
3585 | |||
3586 | if (!inode) | ||
3587 | return ERR_PTR(-ENOMEM); | ||
3588 | |||
3589 | init_btrfs_i(inode); | ||
3590 | |||
3591 | BTRFS_I(inode)->root = root; | ||
3592 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); | ||
3593 | BTRFS_I(inode)->dummy_inode = 1; | ||
3594 | |||
3595 | inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; | ||
3596 | inode->i_op = &simple_dir_inode_operations; | ||
3597 | inode->i_fop = &simple_dir_operations; | ||
3598 | inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; | ||
3599 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
3600 | |||
3601 | return inode; | ||
3602 | } | ||
3603 | |||
3228 | struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | 3604 | struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) |
3229 | { | 3605 | { |
3230 | struct inode *inode; | 3606 | struct inode *inode; |
3231 | struct btrfs_inode *bi = BTRFS_I(dir); | 3607 | struct btrfs_root *root = BTRFS_I(dir)->root; |
3232 | struct btrfs_root *root = bi->root; | ||
3233 | struct btrfs_root *sub_root = root; | 3608 | struct btrfs_root *sub_root = root; |
3234 | struct btrfs_key location; | 3609 | struct btrfs_key location; |
3610 | int index; | ||
3235 | int ret; | 3611 | int ret; |
3236 | 3612 | ||
3613 | dentry->d_op = &btrfs_dentry_operations; | ||
3614 | |||
3237 | if (dentry->d_name.len > BTRFS_NAME_LEN) | 3615 | if (dentry->d_name.len > BTRFS_NAME_LEN) |
3238 | return ERR_PTR(-ENAMETOOLONG); | 3616 | return ERR_PTR(-ENAMETOOLONG); |
3239 | 3617 | ||
@@ -3242,29 +3620,52 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
3242 | if (ret < 0) | 3620 | if (ret < 0) |
3243 | return ERR_PTR(ret); | 3621 | return ERR_PTR(ret); |
3244 | 3622 | ||
3245 | inode = NULL; | 3623 | if (location.objectid == 0) |
3246 | if (location.objectid) { | 3624 | return NULL; |
3247 | ret = fixup_tree_root_location(root, &location, &sub_root, | 3625 | |
3248 | dentry); | 3626 | if (location.type == BTRFS_INODE_ITEM_KEY) { |
3249 | if (ret < 0) | 3627 | inode = btrfs_iget(dir->i_sb, &location, root); |
3250 | return ERR_PTR(ret); | 3628 | return inode; |
3251 | if (ret > 0) | 3629 | } |
3252 | return ERR_PTR(-ENOENT); | 3630 | |
3631 | BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY); | ||
3632 | |||
3633 | index = srcu_read_lock(&root->fs_info->subvol_srcu); | ||
3634 | ret = fixup_tree_root_location(root, dir, dentry, | ||
3635 | &location, &sub_root); | ||
3636 | if (ret < 0) { | ||
3637 | if (ret != -ENOENT) | ||
3638 | inode = ERR_PTR(ret); | ||
3639 | else | ||
3640 | inode = new_simple_dir(dir->i_sb, &location, sub_root); | ||
3641 | } else { | ||
3253 | inode = btrfs_iget(dir->i_sb, &location, sub_root); | 3642 | inode = btrfs_iget(dir->i_sb, &location, sub_root); |
3254 | if (IS_ERR(inode)) | ||
3255 | return ERR_CAST(inode); | ||
3256 | } | 3643 | } |
3644 | srcu_read_unlock(&root->fs_info->subvol_srcu, index); | ||
3645 | |||
3257 | return inode; | 3646 | return inode; |
3258 | } | 3647 | } |
3259 | 3648 | ||
3649 | static int btrfs_dentry_delete(struct dentry *dentry) | ||
3650 | { | ||
3651 | struct btrfs_root *root; | ||
3652 | |||
3653 | if (!dentry->d_inode && !IS_ROOT(dentry)) | ||
3654 | dentry = dentry->d_parent; | ||
3655 | |||
3656 | if (dentry->d_inode) { | ||
3657 | root = BTRFS_I(dentry->d_inode)->root; | ||
3658 | if (btrfs_root_refs(&root->root_item) == 0) | ||
3659 | return 1; | ||
3660 | } | ||
3661 | return 0; | ||
3662 | } | ||
3663 | |||
3260 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, | 3664 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, |
3261 | struct nameidata *nd) | 3665 | struct nameidata *nd) |
3262 | { | 3666 | { |
3263 | struct inode *inode; | 3667 | struct inode *inode; |
3264 | 3668 | ||
3265 | if (dentry->d_name.len > BTRFS_NAME_LEN) | ||
3266 | return ERR_PTR(-ENAMETOOLONG); | ||
3267 | |||
3268 | inode = btrfs_lookup_dentry(dir, dentry); | 3669 | inode = btrfs_lookup_dentry(dir, dentry); |
3269 | if (IS_ERR(inode)) | 3670 | if (IS_ERR(inode)) |
3270 | return ERR_CAST(inode); | 3671 | return ERR_CAST(inode); |
@@ -3603,9 +4004,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
3603 | if (ret != 0) | 4004 | if (ret != 0) |
3604 | goto fail; | 4005 | goto fail; |
3605 | 4006 | ||
3606 | if (objectid > root->highest_inode) | ||
3607 | root->highest_inode = objectid; | ||
3608 | |||
3609 | inode->i_uid = current_fsuid(); | 4007 | inode->i_uid = current_fsuid(); |
3610 | 4008 | ||
3611 | if (dir && (dir->i_mode & S_ISGID)) { | 4009 | if (dir && (dir->i_mode & S_ISGID)) { |
@@ -3673,26 +4071,35 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, | |||
3673 | struct inode *parent_inode, struct inode *inode, | 4071 | struct inode *parent_inode, struct inode *inode, |
3674 | const char *name, int name_len, int add_backref, u64 index) | 4072 | const char *name, int name_len, int add_backref, u64 index) |
3675 | { | 4073 | { |
3676 | int ret; | 4074 | int ret = 0; |
3677 | struct btrfs_key key; | 4075 | struct btrfs_key key; |
3678 | struct btrfs_root *root = BTRFS_I(parent_inode)->root; | 4076 | struct btrfs_root *root = BTRFS_I(parent_inode)->root; |
3679 | 4077 | ||
3680 | key.objectid = inode->i_ino; | 4078 | if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { |
3681 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 4079 | memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); |
3682 | key.offset = 0; | 4080 | } else { |
4081 | key.objectid = inode->i_ino; | ||
4082 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
4083 | key.offset = 0; | ||
4084 | } | ||
4085 | |||
4086 | if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { | ||
4087 | ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, | ||
4088 | key.objectid, root->root_key.objectid, | ||
4089 | parent_inode->i_ino, | ||
4090 | index, name, name_len); | ||
4091 | } else if (add_backref) { | ||
4092 | ret = btrfs_insert_inode_ref(trans, root, | ||
4093 | name, name_len, inode->i_ino, | ||
4094 | parent_inode->i_ino, index); | ||
4095 | } | ||
3683 | 4096 | ||
3684 | ret = btrfs_insert_dir_item(trans, root, name, name_len, | ||
3685 | parent_inode->i_ino, | ||
3686 | &key, btrfs_inode_type(inode), | ||
3687 | index); | ||
3688 | if (ret == 0) { | 4097 | if (ret == 0) { |
3689 | if (add_backref) { | 4098 | ret = btrfs_insert_dir_item(trans, root, name, name_len, |
3690 | ret = btrfs_insert_inode_ref(trans, root, | 4099 | parent_inode->i_ino, &key, |
3691 | name, name_len, | 4100 | btrfs_inode_type(inode), index); |
3692 | inode->i_ino, | 4101 | BUG_ON(ret); |
3693 | parent_inode->i_ino, | 4102 | |
3694 | index); | ||
3695 | } | ||
3696 | btrfs_i_size_write(parent_inode, parent_inode->i_size + | 4103 | btrfs_i_size_write(parent_inode, parent_inode->i_size + |
3697 | name_len * 2); | 4104 | name_len * 2); |
3698 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; | 4105 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; |
@@ -3732,11 +4139,18 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
3732 | if (!new_valid_dev(rdev)) | 4139 | if (!new_valid_dev(rdev)) |
3733 | return -EINVAL; | 4140 | return -EINVAL; |
3734 | 4141 | ||
3735 | err = btrfs_check_metadata_free_space(root); | 4142 | /* |
4143 | * 2 for inode item and ref | ||
4144 | * 2 for dir items | ||
4145 | * 1 for xattr if selinux is on | ||
4146 | */ | ||
4147 | err = btrfs_reserve_metadata_space(root, 5); | ||
3736 | if (err) | 4148 | if (err) |
3737 | goto fail; | 4149 | return err; |
3738 | 4150 | ||
3739 | trans = btrfs_start_transaction(root, 1); | 4151 | trans = btrfs_start_transaction(root, 1); |
4152 | if (!trans) | ||
4153 | goto fail; | ||
3740 | btrfs_set_trans_block_group(trans, dir); | 4154 | btrfs_set_trans_block_group(trans, dir); |
3741 | 4155 | ||
3742 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | 4156 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); |
@@ -3774,6 +4188,7 @@ out_unlock: | |||
3774 | nr = trans->blocks_used; | 4188 | nr = trans->blocks_used; |
3775 | btrfs_end_transaction_throttle(trans, root); | 4189 | btrfs_end_transaction_throttle(trans, root); |
3776 | fail: | 4190 | fail: |
4191 | btrfs_unreserve_metadata_space(root, 5); | ||
3777 | if (drop_inode) { | 4192 | if (drop_inode) { |
3778 | inode_dec_link_count(inode); | 4193 | inode_dec_link_count(inode); |
3779 | iput(inode); | 4194 | iput(inode); |
@@ -3794,10 +4209,18 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
3794 | u64 objectid; | 4209 | u64 objectid; |
3795 | u64 index = 0; | 4210 | u64 index = 0; |
3796 | 4211 | ||
3797 | err = btrfs_check_metadata_free_space(root); | 4212 | /* |
4213 | * 2 for inode item and ref | ||
4214 | * 2 for dir items | ||
4215 | * 1 for xattr if selinux is on | ||
4216 | */ | ||
4217 | err = btrfs_reserve_metadata_space(root, 5); | ||
3798 | if (err) | 4218 | if (err) |
3799 | goto fail; | 4219 | return err; |
4220 | |||
3800 | trans = btrfs_start_transaction(root, 1); | 4221 | trans = btrfs_start_transaction(root, 1); |
4222 | if (!trans) | ||
4223 | goto fail; | ||
3801 | btrfs_set_trans_block_group(trans, dir); | 4224 | btrfs_set_trans_block_group(trans, dir); |
3802 | 4225 | ||
3803 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | 4226 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); |
@@ -3838,6 +4261,7 @@ out_unlock: | |||
3838 | nr = trans->blocks_used; | 4261 | nr = trans->blocks_used; |
3839 | btrfs_end_transaction_throttle(trans, root); | 4262 | btrfs_end_transaction_throttle(trans, root); |
3840 | fail: | 4263 | fail: |
4264 | btrfs_unreserve_metadata_space(root, 5); | ||
3841 | if (drop_inode) { | 4265 | if (drop_inode) { |
3842 | inode_dec_link_count(inode); | 4266 | inode_dec_link_count(inode); |
3843 | iput(inode); | 4267 | iput(inode); |
@@ -3860,10 +4284,16 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
3860 | if (inode->i_nlink == 0) | 4284 | if (inode->i_nlink == 0) |
3861 | return -ENOENT; | 4285 | return -ENOENT; |
3862 | 4286 | ||
3863 | btrfs_inc_nlink(inode); | 4287 | /* |
3864 | err = btrfs_check_metadata_free_space(root); | 4288 | * 1 item for inode ref |
4289 | * 2 items for dir items | ||
4290 | */ | ||
4291 | err = btrfs_reserve_metadata_space(root, 3); | ||
3865 | if (err) | 4292 | if (err) |
3866 | goto fail; | 4293 | return err; |
4294 | |||
4295 | btrfs_inc_nlink(inode); | ||
4296 | |||
3867 | err = btrfs_set_inode_index(dir, &index); | 4297 | err = btrfs_set_inode_index(dir, &index); |
3868 | if (err) | 4298 | if (err) |
3869 | goto fail; | 4299 | goto fail; |
@@ -3875,20 +4305,19 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
3875 | 4305 | ||
3876 | err = btrfs_add_nondir(trans, dentry, inode, 1, index); | 4306 | err = btrfs_add_nondir(trans, dentry, inode, 1, index); |
3877 | 4307 | ||
3878 | if (err) | 4308 | if (err) { |
3879 | drop_inode = 1; | ||
3880 | |||
3881 | btrfs_update_inode_block_group(trans, dir); | ||
3882 | err = btrfs_update_inode(trans, root, inode); | ||
3883 | |||
3884 | if (err) | ||
3885 | drop_inode = 1; | 4309 | drop_inode = 1; |
4310 | } else { | ||
4311 | btrfs_update_inode_block_group(trans, dir); | ||
4312 | err = btrfs_update_inode(trans, root, inode); | ||
4313 | BUG_ON(err); | ||
4314 | btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); | ||
4315 | } | ||
3886 | 4316 | ||
3887 | nr = trans->blocks_used; | 4317 | nr = trans->blocks_used; |
3888 | |||
3889 | btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); | ||
3890 | btrfs_end_transaction_throttle(trans, root); | 4318 | btrfs_end_transaction_throttle(trans, root); |
3891 | fail: | 4319 | fail: |
4320 | btrfs_unreserve_metadata_space(root, 3); | ||
3892 | if (drop_inode) { | 4321 | if (drop_inode) { |
3893 | inode_dec_link_count(inode); | 4322 | inode_dec_link_count(inode); |
3894 | iput(inode); | 4323 | iput(inode); |
@@ -3908,17 +4337,21 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
3908 | u64 index = 0; | 4337 | u64 index = 0; |
3909 | unsigned long nr = 1; | 4338 | unsigned long nr = 1; |
3910 | 4339 | ||
3911 | err = btrfs_check_metadata_free_space(root); | 4340 | /* |
4341 | * 2 items for inode and ref | ||
4342 | * 2 items for dir items | ||
4343 | * 1 for xattr if selinux is on | ||
4344 | */ | ||
4345 | err = btrfs_reserve_metadata_space(root, 5); | ||
3912 | if (err) | 4346 | if (err) |
3913 | goto out_unlock; | 4347 | return err; |
3914 | 4348 | ||
3915 | trans = btrfs_start_transaction(root, 1); | 4349 | trans = btrfs_start_transaction(root, 1); |
3916 | btrfs_set_trans_block_group(trans, dir); | 4350 | if (!trans) { |
3917 | 4351 | err = -ENOMEM; | |
3918 | if (IS_ERR(trans)) { | ||
3919 | err = PTR_ERR(trans); | ||
3920 | goto out_unlock; | 4352 | goto out_unlock; |
3921 | } | 4353 | } |
4354 | btrfs_set_trans_block_group(trans, dir); | ||
3922 | 4355 | ||
3923 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | 4356 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); |
3924 | if (err) { | 4357 | if (err) { |
@@ -3967,6 +4400,7 @@ out_fail: | |||
3967 | btrfs_end_transaction_throttle(trans, root); | 4400 | btrfs_end_transaction_throttle(trans, root); |
3968 | 4401 | ||
3969 | out_unlock: | 4402 | out_unlock: |
4403 | btrfs_unreserve_metadata_space(root, 5); | ||
3970 | if (drop_on_err) | 4404 | if (drop_on_err) |
3971 | iput(inode); | 4405 | iput(inode); |
3972 | btrfs_btree_balance_dirty(root, nr); | 4406 | btrfs_btree_balance_dirty(root, nr); |
@@ -4064,11 +4498,11 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | |||
4064 | int compressed; | 4498 | int compressed; |
4065 | 4499 | ||
4066 | again: | 4500 | again: |
4067 | spin_lock(&em_tree->lock); | 4501 | read_lock(&em_tree->lock); |
4068 | em = lookup_extent_mapping(em_tree, start, len); | 4502 | em = lookup_extent_mapping(em_tree, start, len); |
4069 | if (em) | 4503 | if (em) |
4070 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 4504 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
4071 | spin_unlock(&em_tree->lock); | 4505 | read_unlock(&em_tree->lock); |
4072 | 4506 | ||
4073 | if (em) { | 4507 | if (em) { |
4074 | if (em->start > start || em->start + em->len <= start) | 4508 | if (em->start > start || em->start + em->len <= start) |
@@ -4215,6 +4649,11 @@ again: | |||
4215 | map = kmap(page); | 4649 | map = kmap(page); |
4216 | read_extent_buffer(leaf, map + pg_offset, ptr, | 4650 | read_extent_buffer(leaf, map + pg_offset, ptr, |
4217 | copy_size); | 4651 | copy_size); |
4652 | if (pg_offset + copy_size < PAGE_CACHE_SIZE) { | ||
4653 | memset(map + pg_offset + copy_size, 0, | ||
4654 | PAGE_CACHE_SIZE - pg_offset - | ||
4655 | copy_size); | ||
4656 | } | ||
4218 | kunmap(page); | 4657 | kunmap(page); |
4219 | } | 4658 | } |
4220 | flush_dcache_page(page); | 4659 | flush_dcache_page(page); |
@@ -4259,7 +4698,7 @@ insert: | |||
4259 | } | 4698 | } |
4260 | 4699 | ||
4261 | err = 0; | 4700 | err = 0; |
4262 | spin_lock(&em_tree->lock); | 4701 | write_lock(&em_tree->lock); |
4263 | ret = add_extent_mapping(em_tree, em); | 4702 | ret = add_extent_mapping(em_tree, em); |
4264 | /* it is possible that someone inserted the extent into the tree | 4703 | /* it is possible that someone inserted the extent into the tree |
4265 | * while we had the lock dropped. It is also possible that | 4704 | * while we had the lock dropped. It is also possible that |
@@ -4299,7 +4738,7 @@ insert: | |||
4299 | err = 0; | 4738 | err = 0; |
4300 | } | 4739 | } |
4301 | } | 4740 | } |
4302 | spin_unlock(&em_tree->lock); | 4741 | write_unlock(&em_tree->lock); |
4303 | out: | 4742 | out: |
4304 | if (path) | 4743 | if (path) |
4305 | btrfs_free_path(path); | 4744 | btrfs_free_path(path); |
@@ -4398,13 +4837,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
4398 | u64 page_start = page_offset(page); | 4837 | u64 page_start = page_offset(page); |
4399 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; | 4838 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; |
4400 | 4839 | ||
4840 | |||
4841 | /* | ||
4842 | * we have the page locked, so new writeback can't start, | ||
4843 | * and the dirty bit won't be cleared while we are here. | ||
4844 | * | ||
4845 | * Wait for IO on this page so that we can safely clear | ||
4846 | * the PagePrivate2 bit and do ordered accounting | ||
4847 | */ | ||
4401 | wait_on_page_writeback(page); | 4848 | wait_on_page_writeback(page); |
4849 | |||
4402 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 4850 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
4403 | if (offset) { | 4851 | if (offset) { |
4404 | btrfs_releasepage(page, GFP_NOFS); | 4852 | btrfs_releasepage(page, GFP_NOFS); |
4405 | return; | 4853 | return; |
4406 | } | 4854 | } |
4407 | |||
4408 | lock_extent(tree, page_start, page_end, GFP_NOFS); | 4855 | lock_extent(tree, page_start, page_end, GFP_NOFS); |
4409 | ordered = btrfs_lookup_ordered_extent(page->mapping->host, | 4856 | ordered = btrfs_lookup_ordered_extent(page->mapping->host, |
4410 | page_offset(page)); | 4857 | page_offset(page)); |
@@ -4415,16 +4862,22 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
4415 | */ | 4862 | */ |
4416 | clear_extent_bit(tree, page_start, page_end, | 4863 | clear_extent_bit(tree, page_start, page_end, |
4417 | EXTENT_DIRTY | EXTENT_DELALLOC | | 4864 | EXTENT_DIRTY | EXTENT_DELALLOC | |
4418 | EXTENT_LOCKED, 1, 0, GFP_NOFS); | 4865 | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0, |
4419 | btrfs_finish_ordered_io(page->mapping->host, | 4866 | NULL, GFP_NOFS); |
4420 | page_start, page_end); | 4867 | /* |
4868 | * whoever cleared the private bit is responsible | ||
4869 | * for the finish_ordered_io | ||
4870 | */ | ||
4871 | if (TestClearPagePrivate2(page)) { | ||
4872 | btrfs_finish_ordered_io(page->mapping->host, | ||
4873 | page_start, page_end); | ||
4874 | } | ||
4421 | btrfs_put_ordered_extent(ordered); | 4875 | btrfs_put_ordered_extent(ordered); |
4422 | lock_extent(tree, page_start, page_end, GFP_NOFS); | 4876 | lock_extent(tree, page_start, page_end, GFP_NOFS); |
4423 | } | 4877 | } |
4424 | clear_extent_bit(tree, page_start, page_end, | 4878 | clear_extent_bit(tree, page_start, page_end, |
4425 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | | 4879 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | |
4426 | EXTENT_ORDERED, | 4880 | EXTENT_DO_ACCOUNTING, 1, 1, NULL, GFP_NOFS); |
4427 | 1, 1, GFP_NOFS); | ||
4428 | __btrfs_releasepage(page, GFP_NOFS); | 4881 | __btrfs_releasepage(page, GFP_NOFS); |
4429 | 4882 | ||
4430 | ClearPageChecked(page); | 4883 | ClearPageChecked(page); |
@@ -4473,6 +4926,13 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
4473 | goto out; | 4926 | goto out; |
4474 | } | 4927 | } |
4475 | 4928 | ||
4929 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
4930 | if (ret) { | ||
4931 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
4932 | ret = VM_FAULT_SIGBUS; | ||
4933 | goto out; | ||
4934 | } | ||
4935 | |||
4476 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ | 4936 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ |
4477 | again: | 4937 | again: |
4478 | lock_page(page); | 4938 | lock_page(page); |
@@ -4504,7 +4964,24 @@ again: | |||
4504 | goto again; | 4964 | goto again; |
4505 | } | 4965 | } |
4506 | 4966 | ||
4507 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 4967 | /* |
4968 | * XXX - page_mkwrite gets called every time the page is dirtied, even | ||
4969 | * if it was already dirty, so for space accounting reasons we need to | ||
4970 | * clear any delalloc bits for the range we are fixing to save. There | ||
4971 | * is probably a better way to do this, but for now keep consistent with | ||
4972 | * prepare_pages in the normal write path. | ||
4973 | */ | ||
4974 | clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, | ||
4975 | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, | ||
4976 | GFP_NOFS); | ||
4977 | |||
4978 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end); | ||
4979 | if (ret) { | ||
4980 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
4981 | ret = VM_FAULT_SIGBUS; | ||
4982 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
4983 | goto out_unlock; | ||
4984 | } | ||
4508 | ret = 0; | 4985 | ret = 0; |
4509 | 4986 | ||
4510 | /* page is wholly or partially inside EOF */ | 4987 | /* page is wholly or partially inside EOF */ |
@@ -4521,11 +4998,17 @@ again: | |||
4521 | } | 4998 | } |
4522 | ClearPageChecked(page); | 4999 | ClearPageChecked(page); |
4523 | set_page_dirty(page); | 5000 | set_page_dirty(page); |
5001 | SetPageUptodate(page); | ||
5002 | |||
5003 | BTRFS_I(inode)->last_trans = root->fs_info->generation; | ||
5004 | BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; | ||
4524 | 5005 | ||
4525 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | ||
4526 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 5006 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); |
4527 | 5007 | ||
4528 | out_unlock: | 5008 | out_unlock: |
5009 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
5010 | if (!ret) | ||
5011 | return VM_FAULT_LOCKED; | ||
4529 | unlock_page(page); | 5012 | unlock_page(page); |
4530 | out: | 5013 | out: |
4531 | return ret; | 5014 | return ret; |
@@ -4544,7 +5027,9 @@ static void btrfs_truncate(struct inode *inode) | |||
4544 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | 5027 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) |
4545 | return; | 5028 | return; |
4546 | 5029 | ||
4547 | btrfs_truncate_page(inode->i_mapping, inode->i_size); | 5030 | ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); |
5031 | if (ret) | ||
5032 | return; | ||
4548 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | 5033 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); |
4549 | 5034 | ||
4550 | trans = btrfs_start_transaction(root, 1); | 5035 | trans = btrfs_start_transaction(root, 1); |
@@ -4594,11 +5079,11 @@ out: | |||
4594 | * create a new subvolume directory/inode (helper for the ioctl). | 5079 | * create a new subvolume directory/inode (helper for the ioctl). |
4595 | */ | 5080 | */ |
4596 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 5081 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
4597 | struct btrfs_root *new_root, struct dentry *dentry, | 5082 | struct btrfs_root *new_root, |
4598 | u64 new_dirid, u64 alloc_hint) | 5083 | u64 new_dirid, u64 alloc_hint) |
4599 | { | 5084 | { |
4600 | struct inode *inode; | 5085 | struct inode *inode; |
4601 | int error; | 5086 | int err; |
4602 | u64 index = 0; | 5087 | u64 index = 0; |
4603 | 5088 | ||
4604 | inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, | 5089 | inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, |
@@ -4611,11 +5096,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | |||
4611 | inode->i_nlink = 1; | 5096 | inode->i_nlink = 1; |
4612 | btrfs_i_size_write(inode, 0); | 5097 | btrfs_i_size_write(inode, 0); |
4613 | 5098 | ||
4614 | error = btrfs_update_inode(trans, new_root, inode); | 5099 | err = btrfs_update_inode(trans, new_root, inode); |
4615 | if (error) | 5100 | BUG_ON(err); |
4616 | return error; | ||
4617 | 5101 | ||
4618 | d_instantiate(dentry, inode); | 5102 | iput(inode); |
4619 | return 0; | 5103 | return 0; |
4620 | } | 5104 | } |
4621 | 5105 | ||
@@ -4640,7 +5124,11 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
4640 | if (!ei) | 5124 | if (!ei) |
4641 | return NULL; | 5125 | return NULL; |
4642 | ei->last_trans = 0; | 5126 | ei->last_trans = 0; |
5127 | ei->last_sub_trans = 0; | ||
4643 | ei->logged_trans = 0; | 5128 | ei->logged_trans = 0; |
5129 | ei->outstanding_extents = 0; | ||
5130 | ei->reserved_extents = 0; | ||
5131 | spin_lock_init(&ei->accounting_lock); | ||
4644 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); | 5132 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); |
4645 | INIT_LIST_HEAD(&ei->i_orphan); | 5133 | INIT_LIST_HEAD(&ei->i_orphan); |
4646 | INIT_LIST_HEAD(&ei->ordered_operations); | 5134 | INIT_LIST_HEAD(&ei->ordered_operations); |
@@ -4693,6 +5181,16 @@ void btrfs_destroy_inode(struct inode *inode) | |||
4693 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); | 5181 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); |
4694 | } | 5182 | } |
4695 | 5183 | ||
5184 | void btrfs_drop_inode(struct inode *inode) | ||
5185 | { | ||
5186 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5187 | |||
5188 | if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) | ||
5189 | generic_delete_inode(inode); | ||
5190 | else | ||
5191 | generic_drop_inode(inode); | ||
5192 | } | ||
5193 | |||
4696 | static void init_once(void *foo) | 5194 | static void init_once(void *foo) |
4697 | { | 5195 | { |
4698 | struct btrfs_inode *ei = (struct btrfs_inode *) foo; | 5196 | struct btrfs_inode *ei = (struct btrfs_inode *) foo; |
@@ -4761,31 +5259,37 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4761 | { | 5259 | { |
4762 | struct btrfs_trans_handle *trans; | 5260 | struct btrfs_trans_handle *trans; |
4763 | struct btrfs_root *root = BTRFS_I(old_dir)->root; | 5261 | struct btrfs_root *root = BTRFS_I(old_dir)->root; |
5262 | struct btrfs_root *dest = BTRFS_I(new_dir)->root; | ||
4764 | struct inode *new_inode = new_dentry->d_inode; | 5263 | struct inode *new_inode = new_dentry->d_inode; |
4765 | struct inode *old_inode = old_dentry->d_inode; | 5264 | struct inode *old_inode = old_dentry->d_inode; |
4766 | struct timespec ctime = CURRENT_TIME; | 5265 | struct timespec ctime = CURRENT_TIME; |
4767 | u64 index = 0; | 5266 | u64 index = 0; |
5267 | u64 root_objectid; | ||
4768 | int ret; | 5268 | int ret; |
4769 | 5269 | ||
4770 | /* we're not allowed to rename between subvolumes */ | 5270 | if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) |
4771 | if (BTRFS_I(old_inode)->root->root_key.objectid != | 5271 | return -EPERM; |
4772 | BTRFS_I(new_dir)->root->root_key.objectid) | 5272 | |
5273 | /* we only allow rename subvolume link between subvolumes */ | ||
5274 | if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) | ||
4773 | return -EXDEV; | 5275 | return -EXDEV; |
4774 | 5276 | ||
5277 | if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID || | ||
5278 | (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) | ||
5279 | return -ENOTEMPTY; | ||
5280 | |||
4775 | if (S_ISDIR(old_inode->i_mode) && new_inode && | 5281 | if (S_ISDIR(old_inode->i_mode) && new_inode && |
4776 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { | 5282 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) |
4777 | return -ENOTEMPTY; | 5283 | return -ENOTEMPTY; |
4778 | } | ||
4779 | 5284 | ||
4780 | /* to rename a snapshot or subvolume, we need to juggle the | 5285 | /* |
4781 | * backrefs. This isn't coded yet | 5286 | * 2 items for dir items |
5287 | * 1 item for orphan entry | ||
5288 | * 1 item for ref | ||
4782 | */ | 5289 | */ |
4783 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 5290 | ret = btrfs_reserve_metadata_space(root, 4); |
4784 | return -EXDEV; | ||
4785 | |||
4786 | ret = btrfs_check_metadata_free_space(root); | ||
4787 | if (ret) | 5291 | if (ret) |
4788 | goto out_unlock; | 5292 | return ret; |
4789 | 5293 | ||
4790 | /* | 5294 | /* |
4791 | * we're using rename to replace one file with another. | 5295 | * we're using rename to replace one file with another. |
@@ -4796,8 +5300,40 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4796 | old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) | 5300 | old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) |
4797 | filemap_flush(old_inode->i_mapping); | 5301 | filemap_flush(old_inode->i_mapping); |
4798 | 5302 | ||
5303 | /* close the racy window with snapshot create/destroy ioctl */ | ||
5304 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | ||
5305 | down_read(&root->fs_info->subvol_sem); | ||
5306 | |||
4799 | trans = btrfs_start_transaction(root, 1); | 5307 | trans = btrfs_start_transaction(root, 1); |
5308 | btrfs_set_trans_block_group(trans, new_dir); | ||
5309 | |||
5310 | if (dest != root) | ||
5311 | btrfs_record_root_in_trans(trans, dest); | ||
4800 | 5312 | ||
5313 | ret = btrfs_set_inode_index(new_dir, &index); | ||
5314 | if (ret) | ||
5315 | goto out_fail; | ||
5316 | |||
5317 | if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { | ||
5318 | /* force full log commit if subvolume involved. */ | ||
5319 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
5320 | } else { | ||
5321 | ret = btrfs_insert_inode_ref(trans, dest, | ||
5322 | new_dentry->d_name.name, | ||
5323 | new_dentry->d_name.len, | ||
5324 | old_inode->i_ino, | ||
5325 | new_dir->i_ino, index); | ||
5326 | if (ret) | ||
5327 | goto out_fail; | ||
5328 | /* | ||
5329 | * this is an ugly little race, but the rename is required | ||
5330 | * to make sure that if we crash, the inode is either at the | ||
5331 | * old name or the new one. pinning the log transaction lets | ||
5332 | * us make sure we don't allow a log commit to come in after | ||
5333 | * we unlink the name but before we add the new name back in. | ||
5334 | */ | ||
5335 | btrfs_pin_log_trans(root); | ||
5336 | } | ||
4801 | /* | 5337 | /* |
4802 | * make sure the inode gets flushed if it is replacing | 5338 | * make sure the inode gets flushed if it is replacing |
4803 | * something. | 5339 | * something. |
@@ -4807,18 +5343,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4807 | btrfs_add_ordered_operation(trans, root, old_inode); | 5343 | btrfs_add_ordered_operation(trans, root, old_inode); |
4808 | } | 5344 | } |
4809 | 5345 | ||
4810 | /* | ||
4811 | * this is an ugly little race, but the rename is required to make | ||
4812 | * sure that if we crash, the inode is either at the old name | ||
4813 | * or the new one. pinning the log transaction lets us make sure | ||
4814 | * we don't allow a log commit to come in after we unlink the | ||
4815 | * name but before we add the new name back in. | ||
4816 | */ | ||
4817 | btrfs_pin_log_trans(root); | ||
4818 | |||
4819 | btrfs_set_trans_block_group(trans, new_dir); | ||
4820 | |||
4821 | btrfs_inc_nlink(old_dentry->d_inode); | ||
4822 | old_dir->i_ctime = old_dir->i_mtime = ctime; | 5346 | old_dir->i_ctime = old_dir->i_mtime = ctime; |
4823 | new_dir->i_ctime = new_dir->i_mtime = ctime; | 5347 | new_dir->i_ctime = new_dir->i_mtime = ctime; |
4824 | old_inode->i_ctime = ctime; | 5348 | old_inode->i_ctime = ctime; |
@@ -4826,47 +5350,60 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4826 | if (old_dentry->d_parent != new_dentry->d_parent) | 5350 | if (old_dentry->d_parent != new_dentry->d_parent) |
4827 | btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); | 5351 | btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); |
4828 | 5352 | ||
4829 | ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, | 5353 | if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { |
4830 | old_dentry->d_name.name, | 5354 | root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; |
4831 | old_dentry->d_name.len); | 5355 | ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid, |
4832 | if (ret) | 5356 | old_dentry->d_name.name, |
4833 | goto out_fail; | 5357 | old_dentry->d_name.len); |
5358 | } else { | ||
5359 | btrfs_inc_nlink(old_dentry->d_inode); | ||
5360 | ret = btrfs_unlink_inode(trans, root, old_dir, | ||
5361 | old_dentry->d_inode, | ||
5362 | old_dentry->d_name.name, | ||
5363 | old_dentry->d_name.len); | ||
5364 | } | ||
5365 | BUG_ON(ret); | ||
4834 | 5366 | ||
4835 | if (new_inode) { | 5367 | if (new_inode) { |
4836 | new_inode->i_ctime = CURRENT_TIME; | 5368 | new_inode->i_ctime = CURRENT_TIME; |
4837 | ret = btrfs_unlink_inode(trans, root, new_dir, | 5369 | if (unlikely(new_inode->i_ino == |
4838 | new_dentry->d_inode, | 5370 | BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { |
4839 | new_dentry->d_name.name, | 5371 | root_objectid = BTRFS_I(new_inode)->location.objectid; |
4840 | new_dentry->d_name.len); | 5372 | ret = btrfs_unlink_subvol(trans, dest, new_dir, |
4841 | if (ret) | 5373 | root_objectid, |
4842 | goto out_fail; | 5374 | new_dentry->d_name.name, |
5375 | new_dentry->d_name.len); | ||
5376 | BUG_ON(new_inode->i_nlink == 0); | ||
5377 | } else { | ||
5378 | ret = btrfs_unlink_inode(trans, dest, new_dir, | ||
5379 | new_dentry->d_inode, | ||
5380 | new_dentry->d_name.name, | ||
5381 | new_dentry->d_name.len); | ||
5382 | } | ||
5383 | BUG_ON(ret); | ||
4843 | if (new_inode->i_nlink == 0) { | 5384 | if (new_inode->i_nlink == 0) { |
4844 | ret = btrfs_orphan_add(trans, new_dentry->d_inode); | 5385 | ret = btrfs_orphan_add(trans, new_dentry->d_inode); |
4845 | if (ret) | 5386 | BUG_ON(ret); |
4846 | goto out_fail; | ||
4847 | } | 5387 | } |
4848 | |||
4849 | } | 5388 | } |
4850 | ret = btrfs_set_inode_index(new_dir, &index); | ||
4851 | if (ret) | ||
4852 | goto out_fail; | ||
4853 | 5389 | ||
4854 | ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode, | 5390 | ret = btrfs_add_link(trans, new_dir, old_inode, |
4855 | old_inode, new_dentry->d_name.name, | 5391 | new_dentry->d_name.name, |
4856 | new_dentry->d_name.len, 1, index); | 5392 | new_dentry->d_name.len, 0, index); |
4857 | if (ret) | 5393 | BUG_ON(ret); |
4858 | goto out_fail; | ||
4859 | 5394 | ||
4860 | btrfs_log_new_name(trans, old_inode, old_dir, | 5395 | if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { |
4861 | new_dentry->d_parent); | 5396 | btrfs_log_new_name(trans, old_inode, old_dir, |
5397 | new_dentry->d_parent); | ||
5398 | btrfs_end_log_trans(root); | ||
5399 | } | ||
4862 | out_fail: | 5400 | out_fail: |
4863 | |||
4864 | /* this btrfs_end_log_trans just allows the current | ||
4865 | * log-sub transaction to complete | ||
4866 | */ | ||
4867 | btrfs_end_log_trans(root); | ||
4868 | btrfs_end_transaction_throttle(trans, root); | 5401 | btrfs_end_transaction_throttle(trans, root); |
4869 | out_unlock: | 5402 | |
5403 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | ||
5404 | up_read(&root->fs_info->subvol_sem); | ||
5405 | |||
5406 | btrfs_unreserve_metadata_space(root, 4); | ||
4870 | return ret; | 5407 | return ret; |
4871 | } | 5408 | } |
4872 | 5409 | ||
@@ -4938,11 +5475,18 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
4938 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) | 5475 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) |
4939 | return -ENAMETOOLONG; | 5476 | return -ENAMETOOLONG; |
4940 | 5477 | ||
4941 | err = btrfs_check_metadata_free_space(root); | 5478 | /* |
5479 | * 2 items for inode item and ref | ||
5480 | * 2 items for dir items | ||
5481 | * 1 item for xattr if selinux is on | ||
5482 | */ | ||
5483 | err = btrfs_reserve_metadata_space(root, 5); | ||
4942 | if (err) | 5484 | if (err) |
4943 | goto out_fail; | 5485 | return err; |
4944 | 5486 | ||
4945 | trans = btrfs_start_transaction(root, 1); | 5487 | trans = btrfs_start_transaction(root, 1); |
5488 | if (!trans) | ||
5489 | goto out_fail; | ||
4946 | btrfs_set_trans_block_group(trans, dir); | 5490 | btrfs_set_trans_block_group(trans, dir); |
4947 | 5491 | ||
4948 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | 5492 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); |
@@ -5023,6 +5567,7 @@ out_unlock: | |||
5023 | nr = trans->blocks_used; | 5567 | nr = trans->blocks_used; |
5024 | btrfs_end_transaction_throttle(trans, root); | 5568 | btrfs_end_transaction_throttle(trans, root); |
5025 | out_fail: | 5569 | out_fail: |
5570 | btrfs_unreserve_metadata_space(root, 5); | ||
5026 | if (drop_inode) { | 5571 | if (drop_inode) { |
5027 | inode_dec_link_count(inode); | 5572 | inode_dec_link_count(inode); |
5028 | iput(inode); | 5573 | iput(inode); |
@@ -5044,6 +5589,11 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, | |||
5044 | 5589 | ||
5045 | while (num_bytes > 0) { | 5590 | while (num_bytes > 0) { |
5046 | alloc_size = min(num_bytes, root->fs_info->max_extent); | 5591 | alloc_size = min(num_bytes, root->fs_info->max_extent); |
5592 | |||
5593 | ret = btrfs_reserve_metadata_space(root, 1); | ||
5594 | if (ret) | ||
5595 | goto out; | ||
5596 | |||
5047 | ret = btrfs_reserve_extent(trans, root, alloc_size, | 5597 | ret = btrfs_reserve_extent(trans, root, alloc_size, |
5048 | root->sectorsize, 0, alloc_hint, | 5598 | root->sectorsize, 0, alloc_hint, |
5049 | (u64)-1, &ins, 1); | 5599 | (u64)-1, &ins, 1); |
@@ -5058,9 +5608,12 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, | |||
5058 | 0, 0, 0, | 5608 | 0, 0, 0, |
5059 | BTRFS_FILE_EXTENT_PREALLOC); | 5609 | BTRFS_FILE_EXTENT_PREALLOC); |
5060 | BUG_ON(ret); | 5610 | BUG_ON(ret); |
5611 | btrfs_drop_extent_cache(inode, cur_offset, | ||
5612 | cur_offset + ins.offset -1, 0); | ||
5061 | num_bytes -= ins.offset; | 5613 | num_bytes -= ins.offset; |
5062 | cur_offset += ins.offset; | 5614 | cur_offset += ins.offset; |
5063 | alloc_hint = ins.objectid + ins.offset; | 5615 | alloc_hint = ins.objectid + ins.offset; |
5616 | btrfs_unreserve_metadata_space(root, 1); | ||
5064 | } | 5617 | } |
5065 | out: | 5618 | out: |
5066 | if (cur_offset > start) { | 5619 | if (cur_offset > start) { |
@@ -5201,7 +5754,7 @@ static int btrfs_permission(struct inode *inode, int mask) | |||
5201 | return generic_permission(inode, mask, btrfs_check_acl); | 5754 | return generic_permission(inode, mask, btrfs_check_acl); |
5202 | } | 5755 | } |
5203 | 5756 | ||
5204 | static struct inode_operations btrfs_dir_inode_operations = { | 5757 | static const struct inode_operations btrfs_dir_inode_operations = { |
5205 | .getattr = btrfs_getattr, | 5758 | .getattr = btrfs_getattr, |
5206 | .lookup = btrfs_lookup, | 5759 | .lookup = btrfs_lookup, |
5207 | .create = btrfs_create, | 5760 | .create = btrfs_create, |
@@ -5219,11 +5772,12 @@ static struct inode_operations btrfs_dir_inode_operations = { | |||
5219 | .removexattr = btrfs_removexattr, | 5772 | .removexattr = btrfs_removexattr, |
5220 | .permission = btrfs_permission, | 5773 | .permission = btrfs_permission, |
5221 | }; | 5774 | }; |
5222 | static struct inode_operations btrfs_dir_ro_inode_operations = { | 5775 | static const struct inode_operations btrfs_dir_ro_inode_operations = { |
5223 | .lookup = btrfs_lookup, | 5776 | .lookup = btrfs_lookup, |
5224 | .permission = btrfs_permission, | 5777 | .permission = btrfs_permission, |
5225 | }; | 5778 | }; |
5226 | static struct file_operations btrfs_dir_file_operations = { | 5779 | |
5780 | static const struct file_operations btrfs_dir_file_operations = { | ||
5227 | .llseek = generic_file_llseek, | 5781 | .llseek = generic_file_llseek, |
5228 | .read = generic_read_dir, | 5782 | .read = generic_read_dir, |
5229 | .readdir = btrfs_real_readdir, | 5783 | .readdir = btrfs_real_readdir, |
@@ -5245,6 +5799,8 @@ static struct extent_io_ops btrfs_extent_io_ops = { | |||
5245 | .readpage_io_failed_hook = btrfs_io_failed_hook, | 5799 | .readpage_io_failed_hook = btrfs_io_failed_hook, |
5246 | .set_bit_hook = btrfs_set_bit_hook, | 5800 | .set_bit_hook = btrfs_set_bit_hook, |
5247 | .clear_bit_hook = btrfs_clear_bit_hook, | 5801 | .clear_bit_hook = btrfs_clear_bit_hook, |
5802 | .merge_extent_hook = btrfs_merge_extent_hook, | ||
5803 | .split_extent_hook = btrfs_split_extent_hook, | ||
5248 | }; | 5804 | }; |
5249 | 5805 | ||
5250 | /* | 5806 | /* |
@@ -5259,7 +5815,7 @@ static struct extent_io_ops btrfs_extent_io_ops = { | |||
5259 | * | 5815 | * |
5260 | * For now we're avoiding this by dropping bmap. | 5816 | * For now we're avoiding this by dropping bmap. |
5261 | */ | 5817 | */ |
5262 | static struct address_space_operations btrfs_aops = { | 5818 | static const struct address_space_operations btrfs_aops = { |
5263 | .readpage = btrfs_readpage, | 5819 | .readpage = btrfs_readpage, |
5264 | .writepage = btrfs_writepage, | 5820 | .writepage = btrfs_writepage, |
5265 | .writepages = btrfs_writepages, | 5821 | .writepages = btrfs_writepages, |
@@ -5269,16 +5825,17 @@ static struct address_space_operations btrfs_aops = { | |||
5269 | .invalidatepage = btrfs_invalidatepage, | 5825 | .invalidatepage = btrfs_invalidatepage, |
5270 | .releasepage = btrfs_releasepage, | 5826 | .releasepage = btrfs_releasepage, |
5271 | .set_page_dirty = btrfs_set_page_dirty, | 5827 | .set_page_dirty = btrfs_set_page_dirty, |
5828 | .error_remove_page = generic_error_remove_page, | ||
5272 | }; | 5829 | }; |
5273 | 5830 | ||
5274 | static struct address_space_operations btrfs_symlink_aops = { | 5831 | static const struct address_space_operations btrfs_symlink_aops = { |
5275 | .readpage = btrfs_readpage, | 5832 | .readpage = btrfs_readpage, |
5276 | .writepage = btrfs_writepage, | 5833 | .writepage = btrfs_writepage, |
5277 | .invalidatepage = btrfs_invalidatepage, | 5834 | .invalidatepage = btrfs_invalidatepage, |
5278 | .releasepage = btrfs_releasepage, | 5835 | .releasepage = btrfs_releasepage, |
5279 | }; | 5836 | }; |
5280 | 5837 | ||
5281 | static struct inode_operations btrfs_file_inode_operations = { | 5838 | static const struct inode_operations btrfs_file_inode_operations = { |
5282 | .truncate = btrfs_truncate, | 5839 | .truncate = btrfs_truncate, |
5283 | .getattr = btrfs_getattr, | 5840 | .getattr = btrfs_getattr, |
5284 | .setattr = btrfs_setattr, | 5841 | .setattr = btrfs_setattr, |
@@ -5290,7 +5847,7 @@ static struct inode_operations btrfs_file_inode_operations = { | |||
5290 | .fallocate = btrfs_fallocate, | 5847 | .fallocate = btrfs_fallocate, |
5291 | .fiemap = btrfs_fiemap, | 5848 | .fiemap = btrfs_fiemap, |
5292 | }; | 5849 | }; |
5293 | static struct inode_operations btrfs_special_inode_operations = { | 5850 | static const struct inode_operations btrfs_special_inode_operations = { |
5294 | .getattr = btrfs_getattr, | 5851 | .getattr = btrfs_getattr, |
5295 | .setattr = btrfs_setattr, | 5852 | .setattr = btrfs_setattr, |
5296 | .permission = btrfs_permission, | 5853 | .permission = btrfs_permission, |
@@ -5299,7 +5856,7 @@ static struct inode_operations btrfs_special_inode_operations = { | |||
5299 | .listxattr = btrfs_listxattr, | 5856 | .listxattr = btrfs_listxattr, |
5300 | .removexattr = btrfs_removexattr, | 5857 | .removexattr = btrfs_removexattr, |
5301 | }; | 5858 | }; |
5302 | static struct inode_operations btrfs_symlink_inode_operations = { | 5859 | static const struct inode_operations btrfs_symlink_inode_operations = { |
5303 | .readlink = generic_readlink, | 5860 | .readlink = generic_readlink, |
5304 | .follow_link = page_follow_link_light, | 5861 | .follow_link = page_follow_link_light, |
5305 | .put_link = page_put_link, | 5862 | .put_link = page_put_link, |
@@ -5309,3 +5866,7 @@ static struct inode_operations btrfs_symlink_inode_operations = { | |||
5309 | .listxattr = btrfs_listxattr, | 5866 | .listxattr = btrfs_listxattr, |
5310 | .removexattr = btrfs_removexattr, | 5867 | .removexattr = btrfs_removexattr, |
5311 | }; | 5868 | }; |
5869 | |||
5870 | const struct dentry_operations btrfs_dentry_operations = { | ||
5871 | .d_delete = btrfs_dentry_delete, | ||
5872 | }; | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index bd88f25889f7..cdbb054102b9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -230,8 +230,8 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
230 | struct btrfs_root_item root_item; | 230 | struct btrfs_root_item root_item; |
231 | struct btrfs_inode_item *inode_item; | 231 | struct btrfs_inode_item *inode_item; |
232 | struct extent_buffer *leaf; | 232 | struct extent_buffer *leaf; |
233 | struct btrfs_root *new_root = root; | 233 | struct btrfs_root *new_root; |
234 | struct inode *dir; | 234 | struct inode *dir = dentry->d_parent->d_inode; |
235 | int ret; | 235 | int ret; |
236 | int err; | 236 | int err; |
237 | u64 objectid; | 237 | u64 objectid; |
@@ -239,9 +239,15 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
239 | u64 index = 0; | 239 | u64 index = 0; |
240 | unsigned long nr = 1; | 240 | unsigned long nr = 1; |
241 | 241 | ||
242 | ret = btrfs_check_metadata_free_space(root); | 242 | /* |
243 | * 1 - inode item | ||
244 | * 2 - refs | ||
245 | * 1 - root item | ||
246 | * 2 - dir items | ||
247 | */ | ||
248 | ret = btrfs_reserve_metadata_space(root, 6); | ||
243 | if (ret) | 249 | if (ret) |
244 | goto fail_commit; | 250 | return ret; |
245 | 251 | ||
246 | trans = btrfs_start_transaction(root, 1); | 252 | trans = btrfs_start_transaction(root, 1); |
247 | BUG_ON(!trans); | 253 | BUG_ON(!trans); |
@@ -304,11 +310,17 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
304 | if (ret) | 310 | if (ret) |
305 | goto fail; | 311 | goto fail; |
306 | 312 | ||
313 | key.offset = (u64)-1; | ||
314 | new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); | ||
315 | BUG_ON(IS_ERR(new_root)); | ||
316 | |||
317 | btrfs_record_root_in_trans(trans, new_root); | ||
318 | |||
319 | ret = btrfs_create_subvol_root(trans, new_root, new_dirid, | ||
320 | BTRFS_I(dir)->block_group); | ||
307 | /* | 321 | /* |
308 | * insert the directory item | 322 | * insert the directory item |
309 | */ | 323 | */ |
310 | key.offset = (u64)-1; | ||
311 | dir = dentry->d_parent->d_inode; | ||
312 | ret = btrfs_set_inode_index(dir, &index); | 324 | ret = btrfs_set_inode_index(dir, &index); |
313 | BUG_ON(ret); | 325 | BUG_ON(ret); |
314 | 326 | ||
@@ -322,43 +334,20 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
322 | ret = btrfs_update_inode(trans, root, dir); | 334 | ret = btrfs_update_inode(trans, root, dir); |
323 | BUG_ON(ret); | 335 | BUG_ON(ret); |
324 | 336 | ||
325 | /* add the backref first */ | ||
326 | ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, | 337 | ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, |
327 | objectid, BTRFS_ROOT_BACKREF_KEY, | 338 | objectid, root->root_key.objectid, |
328 | root->root_key.objectid, | ||
329 | dir->i_ino, index, name, namelen); | 339 | dir->i_ino, index, name, namelen); |
330 | 340 | ||
331 | BUG_ON(ret); | 341 | BUG_ON(ret); |
332 | 342 | ||
333 | /* now add the forward ref */ | 343 | d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); |
334 | ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, | ||
335 | root->root_key.objectid, BTRFS_ROOT_REF_KEY, | ||
336 | objectid, | ||
337 | dir->i_ino, index, name, namelen); | ||
338 | |||
339 | BUG_ON(ret); | ||
340 | |||
341 | ret = btrfs_commit_transaction(trans, root); | ||
342 | if (ret) | ||
343 | goto fail_commit; | ||
344 | |||
345 | new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); | ||
346 | BUG_ON(!new_root); | ||
347 | |||
348 | trans = btrfs_start_transaction(new_root, 1); | ||
349 | BUG_ON(!trans); | ||
350 | |||
351 | ret = btrfs_create_subvol_root(trans, new_root, dentry, new_dirid, | ||
352 | BTRFS_I(dir)->block_group); | ||
353 | if (ret) | ||
354 | goto fail; | ||
355 | |||
356 | fail: | 344 | fail: |
357 | nr = trans->blocks_used; | 345 | nr = trans->blocks_used; |
358 | err = btrfs_commit_transaction(trans, new_root); | 346 | err = btrfs_commit_transaction(trans, root); |
359 | if (err && !ret) | 347 | if (err && !ret) |
360 | ret = err; | 348 | ret = err; |
361 | fail_commit: | 349 | |
350 | btrfs_unreserve_metadata_space(root, 6); | ||
362 | btrfs_btree_balance_dirty(root, nr); | 351 | btrfs_btree_balance_dirty(root, nr); |
363 | return ret; | 352 | return ret; |
364 | } | 353 | } |
@@ -375,19 +364,27 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
375 | if (!root->ref_cows) | 364 | if (!root->ref_cows) |
376 | return -EINVAL; | 365 | return -EINVAL; |
377 | 366 | ||
378 | ret = btrfs_check_metadata_free_space(root); | 367 | /* |
368 | * 1 - inode item | ||
369 | * 2 - refs | ||
370 | * 1 - root item | ||
371 | * 2 - dir items | ||
372 | */ | ||
373 | ret = btrfs_reserve_metadata_space(root, 6); | ||
379 | if (ret) | 374 | if (ret) |
380 | goto fail_unlock; | 375 | goto fail_unlock; |
381 | 376 | ||
382 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); | 377 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); |
383 | if (!pending_snapshot) { | 378 | if (!pending_snapshot) { |
384 | ret = -ENOMEM; | 379 | ret = -ENOMEM; |
380 | btrfs_unreserve_metadata_space(root, 6); | ||
385 | goto fail_unlock; | 381 | goto fail_unlock; |
386 | } | 382 | } |
387 | pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); | 383 | pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); |
388 | if (!pending_snapshot->name) { | 384 | if (!pending_snapshot->name) { |
389 | ret = -ENOMEM; | 385 | ret = -ENOMEM; |
390 | kfree(pending_snapshot); | 386 | kfree(pending_snapshot); |
387 | btrfs_unreserve_metadata_space(root, 6); | ||
391 | goto fail_unlock; | 388 | goto fail_unlock; |
392 | } | 389 | } |
393 | memcpy(pending_snapshot->name, name, namelen); | 390 | memcpy(pending_snapshot->name, name, namelen); |
@@ -420,14 +417,15 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) | |||
420 | * sys_mkdirat and vfs_mkdir, but we only do a single component lookup | 417 | * sys_mkdirat and vfs_mkdir, but we only do a single component lookup |
421 | * inside this filesystem so it's quite a bit simpler. | 418 | * inside this filesystem so it's quite a bit simpler. |
422 | */ | 419 | */ |
423 | static noinline int btrfs_mksubvol(struct path *parent, char *name, | 420 | static noinline int btrfs_mksubvol(struct path *parent, |
424 | int mode, int namelen, | 421 | char *name, int namelen, |
425 | struct btrfs_root *snap_src) | 422 | struct btrfs_root *snap_src) |
426 | { | 423 | { |
424 | struct inode *dir = parent->dentry->d_inode; | ||
427 | struct dentry *dentry; | 425 | struct dentry *dentry; |
428 | int error; | 426 | int error; |
429 | 427 | ||
430 | mutex_lock_nested(&parent->dentry->d_inode->i_mutex, I_MUTEX_PARENT); | 428 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); |
431 | 429 | ||
432 | dentry = lookup_one_len(name, parent->dentry, namelen); | 430 | dentry = lookup_one_len(name, parent->dentry, namelen); |
433 | error = PTR_ERR(dentry); | 431 | error = PTR_ERR(dentry); |
@@ -438,99 +436,39 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, | |||
438 | if (dentry->d_inode) | 436 | if (dentry->d_inode) |
439 | goto out_dput; | 437 | goto out_dput; |
440 | 438 | ||
441 | if (!IS_POSIXACL(parent->dentry->d_inode)) | ||
442 | mode &= ~current_umask(); | ||
443 | |||
444 | error = mnt_want_write(parent->mnt); | 439 | error = mnt_want_write(parent->mnt); |
445 | if (error) | 440 | if (error) |
446 | goto out_dput; | 441 | goto out_dput; |
447 | 442 | ||
448 | error = btrfs_may_create(parent->dentry->d_inode, dentry); | 443 | error = btrfs_may_create(dir, dentry); |
449 | if (error) | 444 | if (error) |
450 | goto out_drop_write; | 445 | goto out_drop_write; |
451 | 446 | ||
452 | /* | 447 | down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); |
453 | * Actually perform the low-level subvolume creation after all | 448 | |
454 | * this VFS fuzz. | 449 | if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) |
455 | * | 450 | goto out_up_read; |
456 | * Eventually we want to pass in an inode under which we create this | 451 | |
457 | * subvolume, but for now all are under the filesystem root. | ||
458 | * | ||
459 | * Also we should pass on the mode eventually to allow creating new | ||
460 | * subvolume with specific mode bits. | ||
461 | */ | ||
462 | if (snap_src) { | 452 | if (snap_src) { |
463 | struct dentry *dir = dentry->d_parent; | 453 | error = create_snapshot(snap_src, dentry, |
464 | struct dentry *test = dir->d_parent; | 454 | name, namelen); |
465 | struct btrfs_path *path = btrfs_alloc_path(); | ||
466 | int ret; | ||
467 | u64 test_oid; | ||
468 | u64 parent_oid = BTRFS_I(dir->d_inode)->root->root_key.objectid; | ||
469 | |||
470 | test_oid = snap_src->root_key.objectid; | ||
471 | |||
472 | ret = btrfs_find_root_ref(snap_src->fs_info->tree_root, | ||
473 | path, parent_oid, test_oid); | ||
474 | if (ret == 0) | ||
475 | goto create; | ||
476 | btrfs_release_path(snap_src->fs_info->tree_root, path); | ||
477 | |||
478 | /* we need to make sure we aren't creating a directory loop | ||
479 | * by taking a snapshot of something that has our current | ||
480 | * subvol in its directory tree. So, this loops through | ||
481 | * the dentries and checks the forward refs for each subvolume | ||
482 | * to see if is references the subvolume where we are | ||
483 | * placing this new snapshot. | ||
484 | */ | ||
485 | while (1) { | ||
486 | if (!test || | ||
487 | dir == snap_src->fs_info->sb->s_root || | ||
488 | test == snap_src->fs_info->sb->s_root || | ||
489 | test->d_inode->i_sb != snap_src->fs_info->sb) { | ||
490 | break; | ||
491 | } | ||
492 | if (S_ISLNK(test->d_inode->i_mode)) { | ||
493 | printk(KERN_INFO "Btrfs symlink in snapshot " | ||
494 | "path, failed\n"); | ||
495 | error = -EMLINK; | ||
496 | btrfs_free_path(path); | ||
497 | goto out_drop_write; | ||
498 | } | ||
499 | test_oid = | ||
500 | BTRFS_I(test->d_inode)->root->root_key.objectid; | ||
501 | ret = btrfs_find_root_ref(snap_src->fs_info->tree_root, | ||
502 | path, test_oid, parent_oid); | ||
503 | if (ret == 0) { | ||
504 | printk(KERN_INFO "Btrfs snapshot creation " | ||
505 | "failed, looping\n"); | ||
506 | error = -EMLINK; | ||
507 | btrfs_free_path(path); | ||
508 | goto out_drop_write; | ||
509 | } | ||
510 | btrfs_release_path(snap_src->fs_info->tree_root, path); | ||
511 | test = test->d_parent; | ||
512 | } | ||
513 | create: | ||
514 | btrfs_free_path(path); | ||
515 | error = create_snapshot(snap_src, dentry, name, namelen); | ||
516 | } else { | 455 | } else { |
517 | error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root, | 456 | error = create_subvol(BTRFS_I(dir)->root, dentry, |
518 | dentry, name, namelen); | 457 | name, namelen); |
519 | } | 458 | } |
520 | if (error) | 459 | if (!error) |
521 | goto out_drop_write; | 460 | fsnotify_mkdir(dir, dentry); |
522 | 461 | out_up_read: | |
523 | fsnotify_mkdir(parent->dentry->d_inode, dentry); | 462 | up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); |
524 | out_drop_write: | 463 | out_drop_write: |
525 | mnt_drop_write(parent->mnt); | 464 | mnt_drop_write(parent->mnt); |
526 | out_dput: | 465 | out_dput: |
527 | dput(dentry); | 466 | dput(dentry); |
528 | out_unlock: | 467 | out_unlock: |
529 | mutex_unlock(&parent->dentry->d_inode->i_mutex); | 468 | mutex_unlock(&dir->i_mutex); |
530 | return error; | 469 | return error; |
531 | } | 470 | } |
532 | 471 | ||
533 | |||
534 | static int btrfs_defrag_file(struct file *file) | 472 | static int btrfs_defrag_file(struct file *file) |
535 | { | 473 | { |
536 | struct inode *inode = fdentry(file)->d_inode; | 474 | struct inode *inode = fdentry(file)->d_inode; |
@@ -596,9 +534,8 @@ again: | |||
596 | clear_page_dirty_for_io(page); | 534 | clear_page_dirty_for_io(page); |
597 | 535 | ||
598 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 536 | btrfs_set_extent_delalloc(inode, page_start, page_end); |
599 | |||
600 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
601 | set_page_dirty(page); | 537 | set_page_dirty(page); |
538 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
602 | unlock_page(page); | 539 | unlock_page(page); |
603 | page_cache_release(page); | 540 | page_cache_release(page); |
604 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); | 541 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); |
@@ -609,7 +546,8 @@ out_unlock: | |||
609 | return 0; | 546 | return 0; |
610 | } | 547 | } |
611 | 548 | ||
612 | static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) | 549 | static noinline int btrfs_ioctl_resize(struct btrfs_root *root, |
550 | void __user *arg) | ||
613 | { | 551 | { |
614 | u64 new_size; | 552 | u64 new_size; |
615 | u64 old_size; | 553 | u64 old_size; |
@@ -718,10 +656,7 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
718 | { | 656 | { |
719 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | 657 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; |
720 | struct btrfs_ioctl_vol_args *vol_args; | 658 | struct btrfs_ioctl_vol_args *vol_args; |
721 | struct btrfs_dir_item *di; | ||
722 | struct btrfs_path *path; | ||
723 | struct file *src_file; | 659 | struct file *src_file; |
724 | u64 root_dirid; | ||
725 | int namelen; | 660 | int namelen; |
726 | int ret = 0; | 661 | int ret = 0; |
727 | 662 | ||
@@ -739,32 +674,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
739 | goto out; | 674 | goto out; |
740 | } | 675 | } |
741 | 676 | ||
742 | path = btrfs_alloc_path(); | ||
743 | if (!path) { | ||
744 | ret = -ENOMEM; | ||
745 | goto out; | ||
746 | } | ||
747 | |||
748 | root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, | ||
749 | di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, | ||
750 | path, root_dirid, | ||
751 | vol_args->name, namelen, 0); | ||
752 | btrfs_free_path(path); | ||
753 | |||
754 | if (di && !IS_ERR(di)) { | ||
755 | ret = -EEXIST; | ||
756 | goto out; | ||
757 | } | ||
758 | |||
759 | if (IS_ERR(di)) { | ||
760 | ret = PTR_ERR(di); | ||
761 | goto out; | ||
762 | } | ||
763 | |||
764 | if (subvol) { | 677 | if (subvol) { |
765 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, | 678 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, |
766 | file->f_path.dentry->d_inode->i_mode, | 679 | NULL); |
767 | namelen, NULL); | ||
768 | } else { | 680 | } else { |
769 | struct inode *src_inode; | 681 | struct inode *src_inode; |
770 | src_file = fget(vol_args->fd); | 682 | src_file = fget(vol_args->fd); |
@@ -781,17 +693,157 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
781 | fput(src_file); | 693 | fput(src_file); |
782 | goto out; | 694 | goto out; |
783 | } | 695 | } |
784 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, | 696 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, |
785 | file->f_path.dentry->d_inode->i_mode, | 697 | BTRFS_I(src_inode)->root); |
786 | namelen, BTRFS_I(src_inode)->root); | ||
787 | fput(src_file); | 698 | fput(src_file); |
788 | } | 699 | } |
789 | |||
790 | out: | 700 | out: |
791 | kfree(vol_args); | 701 | kfree(vol_args); |
792 | return ret; | 702 | return ret; |
793 | } | 703 | } |
794 | 704 | ||
705 | /* | ||
706 | * helper to check if the subvolume references other subvolumes | ||
707 | */ | ||
708 | static noinline int may_destroy_subvol(struct btrfs_root *root) | ||
709 | { | ||
710 | struct btrfs_path *path; | ||
711 | struct btrfs_key key; | ||
712 | int ret; | ||
713 | |||
714 | path = btrfs_alloc_path(); | ||
715 | if (!path) | ||
716 | return -ENOMEM; | ||
717 | |||
718 | key.objectid = root->root_key.objectid; | ||
719 | key.type = BTRFS_ROOT_REF_KEY; | ||
720 | key.offset = (u64)-1; | ||
721 | |||
722 | ret = btrfs_search_slot(NULL, root->fs_info->tree_root, | ||
723 | &key, path, 0, 0); | ||
724 | if (ret < 0) | ||
725 | goto out; | ||
726 | BUG_ON(ret == 0); | ||
727 | |||
728 | ret = 0; | ||
729 | if (path->slots[0] > 0) { | ||
730 | path->slots[0]--; | ||
731 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | ||
732 | if (key.objectid == root->root_key.objectid && | ||
733 | key.type == BTRFS_ROOT_REF_KEY) | ||
734 | ret = -ENOTEMPTY; | ||
735 | } | ||
736 | out: | ||
737 | btrfs_free_path(path); | ||
738 | return ret; | ||
739 | } | ||
740 | |||
741 | static noinline int btrfs_ioctl_snap_destroy(struct file *file, | ||
742 | void __user *arg) | ||
743 | { | ||
744 | struct dentry *parent = fdentry(file); | ||
745 | struct dentry *dentry; | ||
746 | struct inode *dir = parent->d_inode; | ||
747 | struct inode *inode; | ||
748 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
749 | struct btrfs_root *dest = NULL; | ||
750 | struct btrfs_ioctl_vol_args *vol_args; | ||
751 | struct btrfs_trans_handle *trans; | ||
752 | int namelen; | ||
753 | int ret; | ||
754 | int err = 0; | ||
755 | |||
756 | if (!capable(CAP_SYS_ADMIN)) | ||
757 | return -EPERM; | ||
758 | |||
759 | vol_args = memdup_user(arg, sizeof(*vol_args)); | ||
760 | if (IS_ERR(vol_args)) | ||
761 | return PTR_ERR(vol_args); | ||
762 | |||
763 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
764 | namelen = strlen(vol_args->name); | ||
765 | if (strchr(vol_args->name, '/') || | ||
766 | strncmp(vol_args->name, "..", namelen) == 0) { | ||
767 | err = -EINVAL; | ||
768 | goto out; | ||
769 | } | ||
770 | |||
771 | err = mnt_want_write(file->f_path.mnt); | ||
772 | if (err) | ||
773 | goto out; | ||
774 | |||
775 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | ||
776 | dentry = lookup_one_len(vol_args->name, parent, namelen); | ||
777 | if (IS_ERR(dentry)) { | ||
778 | err = PTR_ERR(dentry); | ||
779 | goto out_unlock_dir; | ||
780 | } | ||
781 | |||
782 | if (!dentry->d_inode) { | ||
783 | err = -ENOENT; | ||
784 | goto out_dput; | ||
785 | } | ||
786 | |||
787 | inode = dentry->d_inode; | ||
788 | if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { | ||
789 | err = -EINVAL; | ||
790 | goto out_dput; | ||
791 | } | ||
792 | |||
793 | dest = BTRFS_I(inode)->root; | ||
794 | |||
795 | mutex_lock(&inode->i_mutex); | ||
796 | err = d_invalidate(dentry); | ||
797 | if (err) | ||
798 | goto out_unlock; | ||
799 | |||
800 | down_write(&root->fs_info->subvol_sem); | ||
801 | |||
802 | err = may_destroy_subvol(dest); | ||
803 | if (err) | ||
804 | goto out_up_write; | ||
805 | |||
806 | trans = btrfs_start_transaction(root, 1); | ||
807 | ret = btrfs_unlink_subvol(trans, root, dir, | ||
808 | dest->root_key.objectid, | ||
809 | dentry->d_name.name, | ||
810 | dentry->d_name.len); | ||
811 | BUG_ON(ret); | ||
812 | |||
813 | btrfs_record_root_in_trans(trans, dest); | ||
814 | |||
815 | memset(&dest->root_item.drop_progress, 0, | ||
816 | sizeof(dest->root_item.drop_progress)); | ||
817 | dest->root_item.drop_level = 0; | ||
818 | btrfs_set_root_refs(&dest->root_item, 0); | ||
819 | |||
820 | ret = btrfs_insert_orphan_item(trans, | ||
821 | root->fs_info->tree_root, | ||
822 | dest->root_key.objectid); | ||
823 | BUG_ON(ret); | ||
824 | |||
825 | ret = btrfs_commit_transaction(trans, root); | ||
826 | BUG_ON(ret); | ||
827 | inode->i_flags |= S_DEAD; | ||
828 | out_up_write: | ||
829 | up_write(&root->fs_info->subvol_sem); | ||
830 | out_unlock: | ||
831 | mutex_unlock(&inode->i_mutex); | ||
832 | if (!err) { | ||
833 | shrink_dcache_sb(root->fs_info->sb); | ||
834 | btrfs_invalidate_inodes(dest); | ||
835 | d_delete(dentry); | ||
836 | } | ||
837 | out_dput: | ||
838 | dput(dentry); | ||
839 | out_unlock_dir: | ||
840 | mutex_unlock(&dir->i_mutex); | ||
841 | mnt_drop_write(file->f_path.mnt); | ||
842 | out: | ||
843 | kfree(vol_args); | ||
844 | return err; | ||
845 | } | ||
846 | |||
795 | static int btrfs_ioctl_defrag(struct file *file) | 847 | static int btrfs_ioctl_defrag(struct file *file) |
796 | { | 848 | { |
797 | struct inode *inode = fdentry(file)->d_inode; | 849 | struct inode *inode = fdentry(file)->d_inode; |
@@ -865,8 +917,8 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) | |||
865 | return ret; | 917 | return ret; |
866 | } | 918 | } |
867 | 919 | ||
868 | static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | 920 | static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, |
869 | u64 off, u64 olen, u64 destoff) | 921 | u64 off, u64 olen, u64 destoff) |
870 | { | 922 | { |
871 | struct inode *inode = fdentry(file)->d_inode; | 923 | struct inode *inode = fdentry(file)->d_inode; |
872 | struct btrfs_root *root = BTRFS_I(inode)->root; | 924 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -976,7 +1028,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
976 | 1028 | ||
977 | /* punch hole in destination first */ | 1029 | /* punch hole in destination first */ |
978 | btrfs_drop_extents(trans, root, inode, off, off + len, | 1030 | btrfs_drop_extents(trans, root, inode, off, off + len, |
979 | off + len, 0, &hint_byte); | 1031 | off + len, 0, &hint_byte, 1); |
980 | 1032 | ||
981 | /* clone data */ | 1033 | /* clone data */ |
982 | key.objectid = src->i_ino; | 1034 | key.objectid = src->i_ino; |
@@ -1071,9 +1123,10 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1071 | datao += off - key.offset; | 1123 | datao += off - key.offset; |
1072 | datal -= off - key.offset; | 1124 | datal -= off - key.offset; |
1073 | } | 1125 | } |
1074 | if (key.offset + datao + datal + key.offset > | 1126 | |
1075 | off + len) | 1127 | if (key.offset + datal > off + len) |
1076 | datal = off + len - key.offset - datao; | 1128 | datal = off + len - key.offset; |
1129 | |||
1077 | /* disko == 0 means it's a hole */ | 1130 | /* disko == 0 means it's a hole */ |
1078 | if (!disko) | 1131 | if (!disko) |
1079 | datao = 0; | 1132 | datao = 0; |
@@ -1182,15 +1235,15 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
1182 | struct inode *inode = fdentry(file)->d_inode; | 1235 | struct inode *inode = fdentry(file)->d_inode; |
1183 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1236 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1184 | struct btrfs_trans_handle *trans; | 1237 | struct btrfs_trans_handle *trans; |
1185 | int ret = 0; | 1238 | int ret; |
1186 | 1239 | ||
1240 | ret = -EPERM; | ||
1187 | if (!capable(CAP_SYS_ADMIN)) | 1241 | if (!capable(CAP_SYS_ADMIN)) |
1188 | return -EPERM; | 1242 | goto out; |
1189 | 1243 | ||
1190 | if (file->private_data) { | 1244 | ret = -EINPROGRESS; |
1191 | ret = -EINPROGRESS; | 1245 | if (file->private_data) |
1192 | goto out; | 1246 | goto out; |
1193 | } | ||
1194 | 1247 | ||
1195 | ret = mnt_want_write(file->f_path.mnt); | 1248 | ret = mnt_want_write(file->f_path.mnt); |
1196 | if (ret) | 1249 | if (ret) |
@@ -1200,12 +1253,19 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
1200 | root->fs_info->open_ioctl_trans++; | 1253 | root->fs_info->open_ioctl_trans++; |
1201 | mutex_unlock(&root->fs_info->trans_mutex); | 1254 | mutex_unlock(&root->fs_info->trans_mutex); |
1202 | 1255 | ||
1256 | ret = -ENOMEM; | ||
1203 | trans = btrfs_start_ioctl_transaction(root, 0); | 1257 | trans = btrfs_start_ioctl_transaction(root, 0); |
1204 | if (trans) | 1258 | if (!trans) |
1205 | file->private_data = trans; | 1259 | goto out_drop; |
1206 | else | 1260 | |
1207 | ret = -ENOMEM; | 1261 | file->private_data = trans; |
1208 | /*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/ | 1262 | return 0; |
1263 | |||
1264 | out_drop: | ||
1265 | mutex_lock(&root->fs_info->trans_mutex); | ||
1266 | root->fs_info->open_ioctl_trans--; | ||
1267 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1268 | mnt_drop_write(file->f_path.mnt); | ||
1209 | out: | 1269 | out: |
1210 | return ret; | 1270 | return ret; |
1211 | } | 1271 | } |
@@ -1221,24 +1281,20 @@ long btrfs_ioctl_trans_end(struct file *file) | |||
1221 | struct inode *inode = fdentry(file)->d_inode; | 1281 | struct inode *inode = fdentry(file)->d_inode; |
1222 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1282 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1223 | struct btrfs_trans_handle *trans; | 1283 | struct btrfs_trans_handle *trans; |
1224 | int ret = 0; | ||
1225 | 1284 | ||
1226 | trans = file->private_data; | 1285 | trans = file->private_data; |
1227 | if (!trans) { | 1286 | if (!trans) |
1228 | ret = -EINVAL; | 1287 | return -EINVAL; |
1229 | goto out; | ||
1230 | } | ||
1231 | btrfs_end_transaction(trans, root); | ||
1232 | file->private_data = NULL; | 1288 | file->private_data = NULL; |
1233 | 1289 | ||
1290 | btrfs_end_transaction(trans, root); | ||
1291 | |||
1234 | mutex_lock(&root->fs_info->trans_mutex); | 1292 | mutex_lock(&root->fs_info->trans_mutex); |
1235 | root->fs_info->open_ioctl_trans--; | 1293 | root->fs_info->open_ioctl_trans--; |
1236 | mutex_unlock(&root->fs_info->trans_mutex); | 1294 | mutex_unlock(&root->fs_info->trans_mutex); |
1237 | 1295 | ||
1238 | mnt_drop_write(file->f_path.mnt); | 1296 | mnt_drop_write(file->f_path.mnt); |
1239 | 1297 | return 0; | |
1240 | out: | ||
1241 | return ret; | ||
1242 | } | 1298 | } |
1243 | 1299 | ||
1244 | long btrfs_ioctl(struct file *file, unsigned int | 1300 | long btrfs_ioctl(struct file *file, unsigned int |
@@ -1258,6 +1314,8 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
1258 | return btrfs_ioctl_snap_create(file, argp, 0); | 1314 | return btrfs_ioctl_snap_create(file, argp, 0); |
1259 | case BTRFS_IOC_SUBVOL_CREATE: | 1315 | case BTRFS_IOC_SUBVOL_CREATE: |
1260 | return btrfs_ioctl_snap_create(file, argp, 1); | 1316 | return btrfs_ioctl_snap_create(file, argp, 1); |
1317 | case BTRFS_IOC_SNAP_DESTROY: | ||
1318 | return btrfs_ioctl_snap_destroy(file, argp); | ||
1261 | case BTRFS_IOC_DEFRAG: | 1319 | case BTRFS_IOC_DEFRAG: |
1262 | return btrfs_ioctl_defrag(file); | 1320 | return btrfs_ioctl_defrag(file); |
1263 | case BTRFS_IOC_RESIZE: | 1321 | case BTRFS_IOC_RESIZE: |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index b320b103fa13..bc49914475eb 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
@@ -65,5 +65,6 @@ struct btrfs_ioctl_clone_range_args { | |||
65 | 65 | ||
66 | #define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ | 66 | #define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ |
67 | struct btrfs_ioctl_vol_args) | 67 | struct btrfs_ioctl_vol_args) |
68 | 68 | #define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ | |
69 | struct btrfs_ioctl_vol_args) | ||
69 | #endif | 70 | #endif |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 7b2f401e604e..5799bc46a309 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -159,8 +159,6 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
159 | * | 159 | * |
160 | * len is the length of the extent | 160 | * len is the length of the extent |
161 | * | 161 | * |
162 | * This also sets the EXTENT_ORDERED bit on the range in the inode. | ||
163 | * | ||
164 | * The tree is given a single reference on the ordered extent that was | 162 | * The tree is given a single reference on the ordered extent that was |
165 | * inserted. | 163 | * inserted. |
166 | */ | 164 | */ |
@@ -181,6 +179,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
181 | entry->start = start; | 179 | entry->start = start; |
182 | entry->len = len; | 180 | entry->len = len; |
183 | entry->disk_len = disk_len; | 181 | entry->disk_len = disk_len; |
182 | entry->bytes_left = len; | ||
184 | entry->inode = inode; | 183 | entry->inode = inode; |
185 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 184 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
186 | set_bit(type, &entry->flags); | 185 | set_bit(type, &entry->flags); |
@@ -195,9 +194,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
195 | &entry->rb_node); | 194 | &entry->rb_node); |
196 | BUG_ON(node); | 195 | BUG_ON(node); |
197 | 196 | ||
198 | set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, | ||
199 | entry_end(entry) - 1, GFP_NOFS); | ||
200 | |||
201 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 197 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
202 | list_add_tail(&entry->root_extent_list, | 198 | list_add_tail(&entry->root_extent_list, |
203 | &BTRFS_I(inode)->root->fs_info->ordered_extents); | 199 | &BTRFS_I(inode)->root->fs_info->ordered_extents); |
@@ -241,13 +237,10 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
241 | struct btrfs_ordered_inode_tree *tree; | 237 | struct btrfs_ordered_inode_tree *tree; |
242 | struct rb_node *node; | 238 | struct rb_node *node; |
243 | struct btrfs_ordered_extent *entry; | 239 | struct btrfs_ordered_extent *entry; |
244 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
245 | int ret; | 240 | int ret; |
246 | 241 | ||
247 | tree = &BTRFS_I(inode)->ordered_tree; | 242 | tree = &BTRFS_I(inode)->ordered_tree; |
248 | mutex_lock(&tree->mutex); | 243 | mutex_lock(&tree->mutex); |
249 | clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1, | ||
250 | GFP_NOFS); | ||
251 | node = tree_search(tree, file_offset); | 244 | node = tree_search(tree, file_offset); |
252 | if (!node) { | 245 | if (!node) { |
253 | ret = 1; | 246 | ret = 1; |
@@ -260,11 +253,16 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
260 | goto out; | 253 | goto out; |
261 | } | 254 | } |
262 | 255 | ||
263 | ret = test_range_bit(io_tree, entry->file_offset, | 256 | if (io_size > entry->bytes_left) { |
264 | entry->file_offset + entry->len - 1, | 257 | printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", |
265 | EXTENT_ORDERED, 0); | 258 | (unsigned long long)entry->bytes_left, |
266 | if (ret == 0) | 259 | (unsigned long long)io_size); |
260 | } | ||
261 | entry->bytes_left -= io_size; | ||
262 | if (entry->bytes_left == 0) | ||
267 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 263 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
264 | else | ||
265 | ret = 1; | ||
268 | out: | 266 | out: |
269 | mutex_unlock(&tree->mutex); | 267 | mutex_unlock(&tree->mutex); |
270 | return ret == 0; | 268 | return ret == 0; |
@@ -308,6 +306,12 @@ int btrfs_remove_ordered_extent(struct inode *inode, | |||
308 | tree->last = NULL; | 306 | tree->last = NULL; |
309 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); | 307 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); |
310 | 308 | ||
309 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
310 | BTRFS_I(inode)->outstanding_extents--; | ||
311 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
312 | btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root, | ||
313 | inode, 1); | ||
314 | |||
311 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 315 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
312 | list_del_init(&entry->root_extent_list); | 316 | list_del_init(&entry->root_extent_list); |
313 | 317 | ||
@@ -460,7 +464,7 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
460 | * start IO on any dirty ones so the wait doesn't stall waiting | 464 | * start IO on any dirty ones so the wait doesn't stall waiting |
461 | * for pdflush to find them | 465 | * for pdflush to find them |
462 | */ | 466 | */ |
463 | btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_ALL); | 467 | filemap_fdatawrite_range(inode->i_mapping, start, end); |
464 | if (wait) { | 468 | if (wait) { |
465 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, | 469 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, |
466 | &entry->flags)); | 470 | &entry->flags)); |
@@ -476,6 +480,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
476 | u64 orig_end; | 480 | u64 orig_end; |
477 | u64 wait_end; | 481 | u64 wait_end; |
478 | struct btrfs_ordered_extent *ordered; | 482 | struct btrfs_ordered_extent *ordered; |
483 | int found; | ||
479 | 484 | ||
480 | if (start + len < start) { | 485 | if (start + len < start) { |
481 | orig_end = INT_LIMIT(loff_t); | 486 | orig_end = INT_LIMIT(loff_t); |
@@ -489,19 +494,18 @@ again: | |||
489 | /* start IO across the range first to instantiate any delalloc | 494 | /* start IO across the range first to instantiate any delalloc |
490 | * extents | 495 | * extents |
491 | */ | 496 | */ |
492 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); | 497 | filemap_fdatawrite_range(inode->i_mapping, start, orig_end); |
493 | 498 | ||
494 | /* The compression code will leave pages locked but return from | 499 | /* The compression code will leave pages locked but return from |
495 | * writepage without setting the page writeback. Starting again | 500 | * writepage without setting the page writeback. Starting again |
496 | * with WB_SYNC_ALL will end up waiting for the IO to actually start. | 501 | * with WB_SYNC_ALL will end up waiting for the IO to actually start. |
497 | */ | 502 | */ |
498 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); | 503 | filemap_fdatawrite_range(inode->i_mapping, start, orig_end); |
499 | 504 | ||
500 | btrfs_wait_on_page_writeback_range(inode->i_mapping, | 505 | filemap_fdatawait_range(inode->i_mapping, start, orig_end); |
501 | start >> PAGE_CACHE_SHIFT, | ||
502 | orig_end >> PAGE_CACHE_SHIFT); | ||
503 | 506 | ||
504 | end = orig_end; | 507 | end = orig_end; |
508 | found = 0; | ||
505 | while (1) { | 509 | while (1) { |
506 | ordered = btrfs_lookup_first_ordered_extent(inode, end); | 510 | ordered = btrfs_lookup_first_ordered_extent(inode, end); |
507 | if (!ordered) | 511 | if (!ordered) |
@@ -514,6 +518,7 @@ again: | |||
514 | btrfs_put_ordered_extent(ordered); | 518 | btrfs_put_ordered_extent(ordered); |
515 | break; | 519 | break; |
516 | } | 520 | } |
521 | found++; | ||
517 | btrfs_start_ordered_extent(inode, ordered, 1); | 522 | btrfs_start_ordered_extent(inode, ordered, 1); |
518 | end = ordered->file_offset; | 523 | end = ordered->file_offset; |
519 | btrfs_put_ordered_extent(ordered); | 524 | btrfs_put_ordered_extent(ordered); |
@@ -521,8 +526,8 @@ again: | |||
521 | break; | 526 | break; |
522 | end--; | 527 | end--; |
523 | } | 528 | } |
524 | if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, | 529 | if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, |
525 | EXTENT_ORDERED | EXTENT_DELALLOC, 0)) { | 530 | EXTENT_DELALLOC, 0, NULL)) { |
526 | schedule_timeout(1); | 531 | schedule_timeout(1); |
527 | goto again; | 532 | goto again; |
528 | } | 533 | } |
@@ -613,7 +618,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, | |||
613 | */ | 618 | */ |
614 | if (test_range_bit(io_tree, disk_i_size, | 619 | if (test_range_bit(io_tree, disk_i_size, |
615 | ordered->file_offset + ordered->len - 1, | 620 | ordered->file_offset + ordered->len - 1, |
616 | EXTENT_DELALLOC, 0)) { | 621 | EXTENT_DELALLOC, 0, NULL)) { |
617 | goto out; | 622 | goto out; |
618 | } | 623 | } |
619 | /* | 624 | /* |
@@ -664,7 +669,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, | |||
664 | */ | 669 | */ |
665 | if (i_size_test > entry_end(ordered) && | 670 | if (i_size_test > entry_end(ordered) && |
666 | !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, | 671 | !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, |
667 | EXTENT_DELALLOC, 0)) { | 672 | EXTENT_DELALLOC, 0, NULL)) { |
668 | new_i_size = min_t(u64, i_size_test, i_size_read(inode)); | 673 | new_i_size = min_t(u64, i_size_test, i_size_read(inode)); |
669 | } | 674 | } |
670 | BTRFS_I(inode)->disk_i_size = new_i_size; | 675 | BTRFS_I(inode)->disk_i_size = new_i_size; |
@@ -715,89 +720,6 @@ out: | |||
715 | } | 720 | } |
716 | 721 | ||
717 | 722 | ||
718 | /** | ||
719 | * taken from mm/filemap.c because it isn't exported | ||
720 | * | ||
721 | * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range | ||
722 | * @mapping: address space structure to write | ||
723 | * @start: offset in bytes where the range starts | ||
724 | * @end: offset in bytes where the range ends (inclusive) | ||
725 | * @sync_mode: enable synchronous operation | ||
726 | * | ||
727 | * Start writeback against all of a mapping's dirty pages that lie | ||
728 | * within the byte offsets <start, end> inclusive. | ||
729 | * | ||
730 | * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as | ||
731 | * opposed to a regular memory cleansing writeback. The difference between | ||
732 | * these two operations is that if a dirty page/buffer is encountered, it must | ||
733 | * be waited upon, and not just skipped over. | ||
734 | */ | ||
735 | int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, | ||
736 | loff_t end, int sync_mode) | ||
737 | { | ||
738 | struct writeback_control wbc = { | ||
739 | .sync_mode = sync_mode, | ||
740 | .nr_to_write = mapping->nrpages * 2, | ||
741 | .range_start = start, | ||
742 | .range_end = end, | ||
743 | }; | ||
744 | return btrfs_writepages(mapping, &wbc); | ||
745 | } | ||
746 | |||
747 | /** | ||
748 | * taken from mm/filemap.c because it isn't exported | ||
749 | * | ||
750 | * wait_on_page_writeback_range - wait for writeback to complete | ||
751 | * @mapping: target address_space | ||
752 | * @start: beginning page index | ||
753 | * @end: ending page index | ||
754 | * | ||
755 | * Wait for writeback to complete against pages indexed by start->end | ||
756 | * inclusive | ||
757 | */ | ||
758 | int btrfs_wait_on_page_writeback_range(struct address_space *mapping, | ||
759 | pgoff_t start, pgoff_t end) | ||
760 | { | ||
761 | struct pagevec pvec; | ||
762 | int nr_pages; | ||
763 | int ret = 0; | ||
764 | pgoff_t index; | ||
765 | |||
766 | if (end < start) | ||
767 | return 0; | ||
768 | |||
769 | pagevec_init(&pvec, 0); | ||
770 | index = start; | ||
771 | while ((index <= end) && | ||
772 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
773 | PAGECACHE_TAG_WRITEBACK, | ||
774 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) { | ||
775 | unsigned i; | ||
776 | |||
777 | for (i = 0; i < nr_pages; i++) { | ||
778 | struct page *page = pvec.pages[i]; | ||
779 | |||
780 | /* until radix tree lookup accepts end_index */ | ||
781 | if (page->index > end) | ||
782 | continue; | ||
783 | |||
784 | wait_on_page_writeback(page); | ||
785 | if (PageError(page)) | ||
786 | ret = -EIO; | ||
787 | } | ||
788 | pagevec_release(&pvec); | ||
789 | cond_resched(); | ||
790 | } | ||
791 | |||
792 | /* Check for outstanding write errors */ | ||
793 | if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) | ||
794 | ret = -ENOSPC; | ||
795 | if (test_and_clear_bit(AS_EIO, &mapping->flags)) | ||
796 | ret = -EIO; | ||
797 | |||
798 | return ret; | ||
799 | } | ||
800 | |||
801 | /* | 723 | /* |
802 | * add a given inode to the list of inodes that must be fully on | 724 | * add a given inode to the list of inodes that must be fully on |
803 | * disk before a transaction commit finishes. | 725 | * disk before a transaction commit finishes. |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 3d31c8827b01..f82e87488ca8 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -85,6 +85,9 @@ struct btrfs_ordered_extent { | |||
85 | /* extent length on disk */ | 85 | /* extent length on disk */ |
86 | u64 disk_len; | 86 | u64 disk_len; |
87 | 87 | ||
88 | /* number of bytes that still need writing */ | ||
89 | u64 bytes_left; | ||
90 | |||
88 | /* flags (described above) */ | 91 | /* flags (described above) */ |
89 | unsigned long flags; | 92 | unsigned long flags; |
90 | 93 | ||
@@ -150,10 +153,6 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); | |||
150 | int btrfs_ordered_update_i_size(struct inode *inode, | 153 | int btrfs_ordered_update_i_size(struct inode *inode, |
151 | struct btrfs_ordered_extent *ordered); | 154 | struct btrfs_ordered_extent *ordered); |
152 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); | 155 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); |
153 | int btrfs_wait_on_page_writeback_range(struct address_space *mapping, | ||
154 | pgoff_t start, pgoff_t end); | ||
155 | int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, | ||
156 | loff_t end, int sync_mode); | ||
157 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); | 156 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); |
158 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); | 157 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); |
159 | int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | 158 | int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c index 3c0d52af4f80..79cba5fbc28e 100644 --- a/fs/btrfs/orphan.c +++ b/fs/btrfs/orphan.c | |||
@@ -65,3 +65,23 @@ out: | |||
65 | btrfs_free_path(path); | 65 | btrfs_free_path(path); |
66 | return ret; | 66 | return ret; |
67 | } | 67 | } |
68 | |||
69 | int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset) | ||
70 | { | ||
71 | struct btrfs_path *path; | ||
72 | struct btrfs_key key; | ||
73 | int ret; | ||
74 | |||
75 | key.objectid = BTRFS_ORPHAN_OBJECTID; | ||
76 | key.type = BTRFS_ORPHAN_ITEM_KEY; | ||
77 | key.offset = offset; | ||
78 | |||
79 | path = btrfs_alloc_path(); | ||
80 | if (!path) | ||
81 | return -ENOMEM; | ||
82 | |||
83 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
84 | |||
85 | btrfs_free_path(path); | ||
86 | return ret; | ||
87 | } | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index c04f7f212602..cfcc93c93a7b 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -121,6 +121,15 @@ struct inodevec { | |||
121 | int nr; | 121 | int nr; |
122 | }; | 122 | }; |
123 | 123 | ||
124 | #define MAX_EXTENTS 128 | ||
125 | |||
126 | struct file_extent_cluster { | ||
127 | u64 start; | ||
128 | u64 end; | ||
129 | u64 boundary[MAX_EXTENTS]; | ||
130 | unsigned int nr; | ||
131 | }; | ||
132 | |||
124 | struct reloc_control { | 133 | struct reloc_control { |
125 | /* block group to relocate */ | 134 | /* block group to relocate */ |
126 | struct btrfs_block_group_cache *block_group; | 135 | struct btrfs_block_group_cache *block_group; |
@@ -2180,7 +2189,7 @@ static int tree_block_processed(u64 bytenr, u32 blocksize, | |||
2180 | struct reloc_control *rc) | 2189 | struct reloc_control *rc) |
2181 | { | 2190 | { |
2182 | if (test_range_bit(&rc->processed_blocks, bytenr, | 2191 | if (test_range_bit(&rc->processed_blocks, bytenr, |
2183 | bytenr + blocksize - 1, EXTENT_DIRTY, 1)) | 2192 | bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) |
2184 | return 1; | 2193 | return 1; |
2185 | return 0; | 2194 | return 0; |
2186 | } | 2195 | } |
@@ -2529,56 +2538,94 @@ out: | |||
2529 | } | 2538 | } |
2530 | 2539 | ||
2531 | static noinline_for_stack | 2540 | static noinline_for_stack |
2532 | int relocate_inode_pages(struct inode *inode, u64 start, u64 len) | 2541 | int setup_extent_mapping(struct inode *inode, u64 start, u64 end, |
2542 | u64 block_start) | ||
2543 | { | ||
2544 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
2545 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
2546 | struct extent_map *em; | ||
2547 | int ret = 0; | ||
2548 | |||
2549 | em = alloc_extent_map(GFP_NOFS); | ||
2550 | if (!em) | ||
2551 | return -ENOMEM; | ||
2552 | |||
2553 | em->start = start; | ||
2554 | em->len = end + 1 - start; | ||
2555 | em->block_len = em->len; | ||
2556 | em->block_start = block_start; | ||
2557 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
2558 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
2559 | |||
2560 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
2561 | while (1) { | ||
2562 | write_lock(&em_tree->lock); | ||
2563 | ret = add_extent_mapping(em_tree, em); | ||
2564 | write_unlock(&em_tree->lock); | ||
2565 | if (ret != -EEXIST) { | ||
2566 | free_extent_map(em); | ||
2567 | break; | ||
2568 | } | ||
2569 | btrfs_drop_extent_cache(inode, start, end, 0); | ||
2570 | } | ||
2571 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
2572 | return ret; | ||
2573 | } | ||
2574 | |||
2575 | static int relocate_file_extent_cluster(struct inode *inode, | ||
2576 | struct file_extent_cluster *cluster) | ||
2533 | { | 2577 | { |
2534 | u64 page_start; | 2578 | u64 page_start; |
2535 | u64 page_end; | 2579 | u64 page_end; |
2536 | unsigned long i; | 2580 | u64 offset = BTRFS_I(inode)->index_cnt; |
2537 | unsigned long first_index; | 2581 | unsigned long index; |
2538 | unsigned long last_index; | 2582 | unsigned long last_index; |
2539 | unsigned int total_read = 0; | 2583 | unsigned int dirty_page = 0; |
2540 | unsigned int total_dirty = 0; | ||
2541 | struct page *page; | 2584 | struct page *page; |
2542 | struct file_ra_state *ra; | 2585 | struct file_ra_state *ra; |
2543 | struct btrfs_ordered_extent *ordered; | 2586 | int nr = 0; |
2544 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
2545 | int ret = 0; | 2587 | int ret = 0; |
2546 | 2588 | ||
2589 | if (!cluster->nr) | ||
2590 | return 0; | ||
2591 | |||
2547 | ra = kzalloc(sizeof(*ra), GFP_NOFS); | 2592 | ra = kzalloc(sizeof(*ra), GFP_NOFS); |
2548 | if (!ra) | 2593 | if (!ra) |
2549 | return -ENOMEM; | 2594 | return -ENOMEM; |
2550 | 2595 | ||
2596 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; | ||
2597 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; | ||
2598 | |||
2551 | mutex_lock(&inode->i_mutex); | 2599 | mutex_lock(&inode->i_mutex); |
2552 | first_index = start >> PAGE_CACHE_SHIFT; | ||
2553 | last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; | ||
2554 | 2600 | ||
2555 | /* make sure the dirty trick played by the caller work */ | 2601 | i_size_write(inode, cluster->end + 1 - offset); |
2556 | while (1) { | 2602 | ret = setup_extent_mapping(inode, cluster->start - offset, |
2557 | ret = invalidate_inode_pages2_range(inode->i_mapping, | 2603 | cluster->end - offset, cluster->start); |
2558 | first_index, last_index); | ||
2559 | if (ret != -EBUSY) | ||
2560 | break; | ||
2561 | schedule_timeout(HZ/10); | ||
2562 | } | ||
2563 | if (ret) | 2604 | if (ret) |
2564 | goto out_unlock; | 2605 | goto out_unlock; |
2565 | 2606 | ||
2566 | file_ra_state_init(ra, inode->i_mapping); | 2607 | file_ra_state_init(ra, inode->i_mapping); |
2567 | 2608 | ||
2568 | for (i = first_index ; i <= last_index; i++) { | 2609 | WARN_ON(cluster->start != cluster->boundary[0]); |
2569 | if (total_read % ra->ra_pages == 0) { | 2610 | while (index <= last_index) { |
2570 | btrfs_force_ra(inode->i_mapping, ra, NULL, i, | 2611 | page = find_lock_page(inode->i_mapping, index); |
2571 | min(last_index, ra->ra_pages + i - 1)); | ||
2572 | } | ||
2573 | total_read++; | ||
2574 | again: | ||
2575 | if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) | ||
2576 | BUG_ON(1); | ||
2577 | page = grab_cache_page(inode->i_mapping, i); | ||
2578 | if (!page) { | 2612 | if (!page) { |
2579 | ret = -ENOMEM; | 2613 | page_cache_sync_readahead(inode->i_mapping, |
2580 | goto out_unlock; | 2614 | ra, NULL, index, |
2615 | last_index + 1 - index); | ||
2616 | page = grab_cache_page(inode->i_mapping, index); | ||
2617 | if (!page) { | ||
2618 | ret = -ENOMEM; | ||
2619 | goto out_unlock; | ||
2620 | } | ||
2621 | } | ||
2622 | |||
2623 | if (PageReadahead(page)) { | ||
2624 | page_cache_async_readahead(inode->i_mapping, | ||
2625 | ra, NULL, page, index, | ||
2626 | last_index + 1 - index); | ||
2581 | } | 2627 | } |
2628 | |||
2582 | if (!PageUptodate(page)) { | 2629 | if (!PageUptodate(page)) { |
2583 | btrfs_readpage(NULL, page); | 2630 | btrfs_readpage(NULL, page); |
2584 | lock_page(page); | 2631 | lock_page(page); |
@@ -2589,75 +2636,79 @@ again: | |||
2589 | goto out_unlock; | 2636 | goto out_unlock; |
2590 | } | 2637 | } |
2591 | } | 2638 | } |
2592 | wait_on_page_writeback(page); | ||
2593 | 2639 | ||
2594 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; | 2640 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; |
2595 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 2641 | page_end = page_start + PAGE_CACHE_SIZE - 1; |
2596 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | 2642 | |
2597 | 2643 | lock_extent(&BTRFS_I(inode)->io_tree, | |
2598 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 2644 | page_start, page_end, GFP_NOFS); |
2599 | if (ordered) { | 2645 | |
2600 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
2601 | unlock_page(page); | ||
2602 | page_cache_release(page); | ||
2603 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
2604 | btrfs_put_ordered_extent(ordered); | ||
2605 | goto again; | ||
2606 | } | ||
2607 | set_page_extent_mapped(page); | 2646 | set_page_extent_mapped(page); |
2608 | 2647 | ||
2609 | if (i == first_index) | 2648 | if (nr < cluster->nr && |
2610 | set_extent_bits(io_tree, page_start, page_end, | 2649 | page_start + offset == cluster->boundary[nr]) { |
2650 | set_extent_bits(&BTRFS_I(inode)->io_tree, | ||
2651 | page_start, page_end, | ||
2611 | EXTENT_BOUNDARY, GFP_NOFS); | 2652 | EXTENT_BOUNDARY, GFP_NOFS); |
2653 | nr++; | ||
2654 | } | ||
2612 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 2655 | btrfs_set_extent_delalloc(inode, page_start, page_end); |
2613 | 2656 | ||
2614 | set_page_dirty(page); | 2657 | set_page_dirty(page); |
2615 | total_dirty++; | 2658 | dirty_page++; |
2616 | 2659 | ||
2617 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 2660 | unlock_extent(&BTRFS_I(inode)->io_tree, |
2661 | page_start, page_end, GFP_NOFS); | ||
2618 | unlock_page(page); | 2662 | unlock_page(page); |
2619 | page_cache_release(page); | 2663 | page_cache_release(page); |
2664 | |||
2665 | index++; | ||
2666 | if (nr < cluster->nr && | ||
2667 | page_end + 1 + offset == cluster->boundary[nr]) { | ||
2668 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
2669 | dirty_page); | ||
2670 | dirty_page = 0; | ||
2671 | } | ||
2672 | } | ||
2673 | if (dirty_page) { | ||
2674 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
2675 | dirty_page); | ||
2620 | } | 2676 | } |
2677 | WARN_ON(nr != cluster->nr); | ||
2621 | out_unlock: | 2678 | out_unlock: |
2622 | mutex_unlock(&inode->i_mutex); | 2679 | mutex_unlock(&inode->i_mutex); |
2623 | kfree(ra); | 2680 | kfree(ra); |
2624 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); | ||
2625 | return ret; | 2681 | return ret; |
2626 | } | 2682 | } |
2627 | 2683 | ||
2628 | static noinline_for_stack | 2684 | static noinline_for_stack |
2629 | int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key) | 2685 | int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key, |
2686 | struct file_extent_cluster *cluster) | ||
2630 | { | 2687 | { |
2631 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2688 | int ret; |
2632 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
2633 | struct extent_map *em; | ||
2634 | u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt; | ||
2635 | u64 end = start + extent_key->offset - 1; | ||
2636 | |||
2637 | em = alloc_extent_map(GFP_NOFS); | ||
2638 | em->start = start; | ||
2639 | em->len = extent_key->offset; | ||
2640 | em->block_len = extent_key->offset; | ||
2641 | em->block_start = extent_key->objectid; | ||
2642 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
2643 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
2644 | 2689 | ||
2645 | /* setup extent map to cheat btrfs_readpage */ | 2690 | if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) { |
2646 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | 2691 | ret = relocate_file_extent_cluster(inode, cluster); |
2647 | while (1) { | 2692 | if (ret) |
2648 | int ret; | 2693 | return ret; |
2649 | spin_lock(&em_tree->lock); | 2694 | cluster->nr = 0; |
2650 | ret = add_extent_mapping(em_tree, em); | ||
2651 | spin_unlock(&em_tree->lock); | ||
2652 | if (ret != -EEXIST) { | ||
2653 | free_extent_map(em); | ||
2654 | break; | ||
2655 | } | ||
2656 | btrfs_drop_extent_cache(inode, start, end, 0); | ||
2657 | } | 2695 | } |
2658 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
2659 | 2696 | ||
2660 | return relocate_inode_pages(inode, start, extent_key->offset); | 2697 | if (!cluster->nr) |
2698 | cluster->start = extent_key->objectid; | ||
2699 | else | ||
2700 | BUG_ON(cluster->nr >= MAX_EXTENTS); | ||
2701 | cluster->end = extent_key->objectid + extent_key->offset - 1; | ||
2702 | cluster->boundary[cluster->nr] = extent_key->objectid; | ||
2703 | cluster->nr++; | ||
2704 | |||
2705 | if (cluster->nr >= MAX_EXTENTS) { | ||
2706 | ret = relocate_file_extent_cluster(inode, cluster); | ||
2707 | if (ret) | ||
2708 | return ret; | ||
2709 | cluster->nr = 0; | ||
2710 | } | ||
2711 | return 0; | ||
2661 | } | 2712 | } |
2662 | 2713 | ||
2663 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | 2714 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
@@ -3203,10 +3254,12 @@ static int check_extent_flags(u64 flags) | |||
3203 | return 0; | 3254 | return 0; |
3204 | } | 3255 | } |
3205 | 3256 | ||
3257 | |||
3206 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | 3258 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) |
3207 | { | 3259 | { |
3208 | struct rb_root blocks = RB_ROOT; | 3260 | struct rb_root blocks = RB_ROOT; |
3209 | struct btrfs_key key; | 3261 | struct btrfs_key key; |
3262 | struct file_extent_cluster *cluster; | ||
3210 | struct btrfs_trans_handle *trans = NULL; | 3263 | struct btrfs_trans_handle *trans = NULL; |
3211 | struct btrfs_path *path; | 3264 | struct btrfs_path *path; |
3212 | struct btrfs_extent_item *ei; | 3265 | struct btrfs_extent_item *ei; |
@@ -3216,10 +3269,17 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3216 | int ret; | 3269 | int ret; |
3217 | int err = 0; | 3270 | int err = 0; |
3218 | 3271 | ||
3272 | cluster = kzalloc(sizeof(*cluster), GFP_NOFS); | ||
3273 | if (!cluster) | ||
3274 | return -ENOMEM; | ||
3275 | |||
3219 | path = btrfs_alloc_path(); | 3276 | path = btrfs_alloc_path(); |
3220 | if (!path) | 3277 | if (!path) |
3221 | return -ENOMEM; | 3278 | return -ENOMEM; |
3222 | 3279 | ||
3280 | rc->extents_found = 0; | ||
3281 | rc->extents_skipped = 0; | ||
3282 | |||
3223 | rc->search_start = rc->block_group->key.objectid; | 3283 | rc->search_start = rc->block_group->key.objectid; |
3224 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, | 3284 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, |
3225 | GFP_NOFS); | 3285 | GFP_NOFS); |
@@ -3306,14 +3366,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3306 | } | 3366 | } |
3307 | 3367 | ||
3308 | nr = trans->blocks_used; | 3368 | nr = trans->blocks_used; |
3309 | btrfs_end_transaction_throttle(trans, rc->extent_root); | 3369 | btrfs_end_transaction(trans, rc->extent_root); |
3310 | trans = NULL; | 3370 | trans = NULL; |
3311 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3371 | btrfs_btree_balance_dirty(rc->extent_root, nr); |
3312 | 3372 | ||
3313 | if (rc->stage == MOVE_DATA_EXTENTS && | 3373 | if (rc->stage == MOVE_DATA_EXTENTS && |
3314 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | 3374 | (flags & BTRFS_EXTENT_FLAG_DATA)) { |
3315 | rc->found_file_extent = 1; | 3375 | rc->found_file_extent = 1; |
3316 | ret = relocate_data_extent(rc->data_inode, &key); | 3376 | ret = relocate_data_extent(rc->data_inode, |
3377 | &key, cluster); | ||
3317 | if (ret < 0) { | 3378 | if (ret < 0) { |
3318 | err = ret; | 3379 | err = ret; |
3319 | break; | 3380 | break; |
@@ -3328,6 +3389,14 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3328 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3389 | btrfs_btree_balance_dirty(rc->extent_root, nr); |
3329 | } | 3390 | } |
3330 | 3391 | ||
3392 | if (!err) { | ||
3393 | ret = relocate_file_extent_cluster(rc->data_inode, cluster); | ||
3394 | if (ret < 0) | ||
3395 | err = ret; | ||
3396 | } | ||
3397 | |||
3398 | kfree(cluster); | ||
3399 | |||
3331 | rc->create_reloc_root = 0; | 3400 | rc->create_reloc_root = 0; |
3332 | smp_mb(); | 3401 | smp_mb(); |
3333 | 3402 | ||
@@ -3348,8 +3417,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3348 | } | 3417 | } |
3349 | 3418 | ||
3350 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | 3419 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, |
3351 | struct btrfs_root *root, | 3420 | struct btrfs_root *root, u64 objectid) |
3352 | u64 objectid, u64 size) | ||
3353 | { | 3421 | { |
3354 | struct btrfs_path *path; | 3422 | struct btrfs_path *path; |
3355 | struct btrfs_inode_item *item; | 3423 | struct btrfs_inode_item *item; |
@@ -3368,7 +3436,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | |||
3368 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); | 3436 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); |
3369 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); | 3437 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); |
3370 | btrfs_set_inode_generation(leaf, item, 1); | 3438 | btrfs_set_inode_generation(leaf, item, 1); |
3371 | btrfs_set_inode_size(leaf, item, size); | 3439 | btrfs_set_inode_size(leaf, item, 0); |
3372 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | 3440 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); |
3373 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); | 3441 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); |
3374 | btrfs_mark_buffer_dirty(leaf); | 3442 | btrfs_mark_buffer_dirty(leaf); |
@@ -3404,12 +3472,7 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
3404 | if (err) | 3472 | if (err) |
3405 | goto out; | 3473 | goto out; |
3406 | 3474 | ||
3407 | err = __insert_orphan_inode(trans, root, objectid, group->key.offset); | 3475 | err = __insert_orphan_inode(trans, root, objectid); |
3408 | BUG_ON(err); | ||
3409 | |||
3410 | err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, | ||
3411 | group->key.offset, 0, group->key.offset, | ||
3412 | 0, 0, 0); | ||
3413 | BUG_ON(err); | 3476 | BUG_ON(err); |
3414 | 3477 | ||
3415 | key.objectid = objectid; | 3478 | key.objectid = objectid; |
@@ -3455,7 +3518,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3455 | BUG_ON(!rc->block_group); | 3518 | BUG_ON(!rc->block_group); |
3456 | 3519 | ||
3457 | btrfs_init_workers(&rc->workers, "relocate", | 3520 | btrfs_init_workers(&rc->workers, "relocate", |
3458 | fs_info->thread_pool_size); | 3521 | fs_info->thread_pool_size, NULL); |
3459 | 3522 | ||
3460 | rc->extent_root = extent_root; | 3523 | rc->extent_root = extent_root; |
3461 | btrfs_prepare_block_group_relocation(extent_root, rc->block_group); | 3524 | btrfs_prepare_block_group_relocation(extent_root, rc->block_group); |
@@ -3475,14 +3538,15 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3475 | btrfs_wait_ordered_extents(fs_info->tree_root, 0); | 3538 | btrfs_wait_ordered_extents(fs_info->tree_root, 0); |
3476 | 3539 | ||
3477 | while (1) { | 3540 | while (1) { |
3478 | mutex_lock(&fs_info->cleaner_mutex); | ||
3479 | btrfs_clean_old_snapshots(fs_info->tree_root); | ||
3480 | mutex_unlock(&fs_info->cleaner_mutex); | ||
3481 | |||
3482 | rc->extents_found = 0; | 3541 | rc->extents_found = 0; |
3483 | rc->extents_skipped = 0; | 3542 | rc->extents_skipped = 0; |
3484 | 3543 | ||
3544 | mutex_lock(&fs_info->cleaner_mutex); | ||
3545 | |||
3546 | btrfs_clean_old_snapshots(fs_info->tree_root); | ||
3485 | ret = relocate_block_group(rc); | 3547 | ret = relocate_block_group(rc); |
3548 | |||
3549 | mutex_unlock(&fs_info->cleaner_mutex); | ||
3486 | if (ret < 0) { | 3550 | if (ret < 0) { |
3487 | err = ret; | 3551 | err = ret; |
3488 | break; | 3552 | break; |
@@ -3514,10 +3578,10 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3514 | } | 3578 | } |
3515 | } | 3579 | } |
3516 | 3580 | ||
3517 | filemap_fdatawrite_range(fs_info->btree_inode->i_mapping, | 3581 | filemap_write_and_wait_range(fs_info->btree_inode->i_mapping, |
3518 | rc->block_group->key.objectid, | 3582 | rc->block_group->key.objectid, |
3519 | rc->block_group->key.objectid + | 3583 | rc->block_group->key.objectid + |
3520 | rc->block_group->key.offset - 1); | 3584 | rc->block_group->key.offset - 1); |
3521 | 3585 | ||
3522 | WARN_ON(rc->block_group->pinned > 0); | 3586 | WARN_ON(rc->block_group->pinned > 0); |
3523 | WARN_ON(rc->block_group->reserved > 0); | 3587 | WARN_ON(rc->block_group->reserved > 0); |
@@ -3530,6 +3594,26 @@ out: | |||
3530 | return err; | 3594 | return err; |
3531 | } | 3595 | } |
3532 | 3596 | ||
3597 | static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) | ||
3598 | { | ||
3599 | struct btrfs_trans_handle *trans; | ||
3600 | int ret; | ||
3601 | |||
3602 | trans = btrfs_start_transaction(root->fs_info->tree_root, 1); | ||
3603 | |||
3604 | memset(&root->root_item.drop_progress, 0, | ||
3605 | sizeof(root->root_item.drop_progress)); | ||
3606 | root->root_item.drop_level = 0; | ||
3607 | btrfs_set_root_refs(&root->root_item, 0); | ||
3608 | ret = btrfs_update_root(trans, root->fs_info->tree_root, | ||
3609 | &root->root_key, &root->root_item); | ||
3610 | BUG_ON(ret); | ||
3611 | |||
3612 | ret = btrfs_end_transaction(trans, root->fs_info->tree_root); | ||
3613 | BUG_ON(ret); | ||
3614 | return 0; | ||
3615 | } | ||
3616 | |||
3533 | /* | 3617 | /* |
3534 | * recover relocation interrupted by system crash. | 3618 | * recover relocation interrupted by system crash. |
3535 | * | 3619 | * |
@@ -3589,8 +3673,12 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
3589 | fs_root = read_fs_root(root->fs_info, | 3673 | fs_root = read_fs_root(root->fs_info, |
3590 | reloc_root->root_key.offset); | 3674 | reloc_root->root_key.offset); |
3591 | if (IS_ERR(fs_root)) { | 3675 | if (IS_ERR(fs_root)) { |
3592 | err = PTR_ERR(fs_root); | 3676 | ret = PTR_ERR(fs_root); |
3593 | goto out; | 3677 | if (ret != -ENOENT) { |
3678 | err = ret; | ||
3679 | goto out; | ||
3680 | } | ||
3681 | mark_garbage_root(reloc_root); | ||
3594 | } | 3682 | } |
3595 | } | 3683 | } |
3596 | 3684 | ||
@@ -3613,7 +3701,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
3613 | mapping_tree_init(&rc->reloc_root_tree); | 3701 | mapping_tree_init(&rc->reloc_root_tree); |
3614 | INIT_LIST_HEAD(&rc->reloc_roots); | 3702 | INIT_LIST_HEAD(&rc->reloc_roots); |
3615 | btrfs_init_workers(&rc->workers, "relocate", | 3703 | btrfs_init_workers(&rc->workers, "relocate", |
3616 | root->fs_info->thread_pool_size); | 3704 | root->fs_info->thread_pool_size, NULL); |
3617 | rc->extent_root = root->fs_info->extent_root; | 3705 | rc->extent_root = root->fs_info->extent_root; |
3618 | 3706 | ||
3619 | set_reloc_control(rc); | 3707 | set_reloc_control(rc); |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 0ddc6d61c55a..9351428f30e2 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -94,17 +94,23 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, | |||
94 | goto out; | 94 | goto out; |
95 | 95 | ||
96 | BUG_ON(ret == 0); | 96 | BUG_ON(ret == 0); |
97 | if (path->slots[0] == 0) { | ||
98 | ret = 1; | ||
99 | goto out; | ||
100 | } | ||
97 | l = path->nodes[0]; | 101 | l = path->nodes[0]; |
98 | BUG_ON(path->slots[0] == 0); | ||
99 | slot = path->slots[0] - 1; | 102 | slot = path->slots[0] - 1; |
100 | btrfs_item_key_to_cpu(l, &found_key, slot); | 103 | btrfs_item_key_to_cpu(l, &found_key, slot); |
101 | if (found_key.objectid != objectid) { | 104 | if (found_key.objectid != objectid || |
105 | found_key.type != BTRFS_ROOT_ITEM_KEY) { | ||
102 | ret = 1; | 106 | ret = 1; |
103 | goto out; | 107 | goto out; |
104 | } | 108 | } |
105 | read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), | 109 | if (item) |
106 | sizeof(*item)); | 110 | read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), |
107 | memcpy(key, &found_key, sizeof(found_key)); | 111 | sizeof(*item)); |
112 | if (key) | ||
113 | memcpy(key, &found_key, sizeof(found_key)); | ||
108 | ret = 0; | 114 | ret = 0; |
109 | out: | 115 | out: |
110 | btrfs_free_path(path); | 116 | btrfs_free_path(path); |
@@ -249,6 +255,59 @@ err: | |||
249 | return ret; | 255 | return ret; |
250 | } | 256 | } |
251 | 257 | ||
258 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | ||
259 | { | ||
260 | struct extent_buffer *leaf; | ||
261 | struct btrfs_path *path; | ||
262 | struct btrfs_key key; | ||
263 | int err = 0; | ||
264 | int ret; | ||
265 | |||
266 | path = btrfs_alloc_path(); | ||
267 | if (!path) | ||
268 | return -ENOMEM; | ||
269 | |||
270 | key.objectid = BTRFS_ORPHAN_OBJECTID; | ||
271 | key.type = BTRFS_ORPHAN_ITEM_KEY; | ||
272 | key.offset = 0; | ||
273 | |||
274 | while (1) { | ||
275 | ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); | ||
276 | if (ret < 0) { | ||
277 | err = ret; | ||
278 | break; | ||
279 | } | ||
280 | |||
281 | leaf = path->nodes[0]; | ||
282 | if (path->slots[0] >= btrfs_header_nritems(leaf)) { | ||
283 | ret = btrfs_next_leaf(tree_root, path); | ||
284 | if (ret < 0) | ||
285 | err = ret; | ||
286 | if (ret != 0) | ||
287 | break; | ||
288 | leaf = path->nodes[0]; | ||
289 | } | ||
290 | |||
291 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
292 | btrfs_release_path(tree_root, path); | ||
293 | |||
294 | if (key.objectid != BTRFS_ORPHAN_OBJECTID || | ||
295 | key.type != BTRFS_ORPHAN_ITEM_KEY) | ||
296 | break; | ||
297 | |||
298 | ret = btrfs_find_dead_roots(tree_root, key.offset); | ||
299 | if (ret) { | ||
300 | err = ret; | ||
301 | break; | ||
302 | } | ||
303 | |||
304 | key.offset++; | ||
305 | } | ||
306 | |||
307 | btrfs_free_path(path); | ||
308 | return err; | ||
309 | } | ||
310 | |||
252 | /* drop the root item for 'key' from 'root' */ | 311 | /* drop the root item for 'key' from 'root' */ |
253 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 312 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
254 | struct btrfs_key *key) | 313 | struct btrfs_key *key) |
@@ -278,31 +337,57 @@ out: | |||
278 | return ret; | 337 | return ret; |
279 | } | 338 | } |
280 | 339 | ||
281 | #if 0 /* this will get used when snapshot deletion is implemented */ | ||
282 | int btrfs_del_root_ref(struct btrfs_trans_handle *trans, | 340 | int btrfs_del_root_ref(struct btrfs_trans_handle *trans, |
283 | struct btrfs_root *tree_root, | 341 | struct btrfs_root *tree_root, |
284 | u64 root_id, u8 type, u64 ref_id) | 342 | u64 root_id, u64 ref_id, u64 dirid, u64 *sequence, |
343 | const char *name, int name_len) | ||
344 | |||
285 | { | 345 | { |
346 | struct btrfs_path *path; | ||
347 | struct btrfs_root_ref *ref; | ||
348 | struct extent_buffer *leaf; | ||
286 | struct btrfs_key key; | 349 | struct btrfs_key key; |
350 | unsigned long ptr; | ||
351 | int err = 0; | ||
287 | int ret; | 352 | int ret; |
288 | struct btrfs_path *path; | ||
289 | 353 | ||
290 | path = btrfs_alloc_path(); | 354 | path = btrfs_alloc_path(); |
355 | if (!path) | ||
356 | return -ENOMEM; | ||
291 | 357 | ||
292 | key.objectid = root_id; | 358 | key.objectid = root_id; |
293 | key.type = type; | 359 | key.type = BTRFS_ROOT_BACKREF_KEY; |
294 | key.offset = ref_id; | 360 | key.offset = ref_id; |
295 | 361 | again: | |
296 | ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); | 362 | ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); |
297 | BUG_ON(ret); | 363 | BUG_ON(ret < 0); |
298 | 364 | if (ret == 0) { | |
299 | ret = btrfs_del_item(trans, tree_root, path); | 365 | leaf = path->nodes[0]; |
300 | BUG_ON(ret); | 366 | ref = btrfs_item_ptr(leaf, path->slots[0], |
367 | struct btrfs_root_ref); | ||
368 | |||
369 | WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid); | ||
370 | WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len); | ||
371 | ptr = (unsigned long)(ref + 1); | ||
372 | WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len)); | ||
373 | *sequence = btrfs_root_ref_sequence(leaf, ref); | ||
374 | |||
375 | ret = btrfs_del_item(trans, tree_root, path); | ||
376 | BUG_ON(ret); | ||
377 | } else | ||
378 | err = -ENOENT; | ||
379 | |||
380 | if (key.type == BTRFS_ROOT_BACKREF_KEY) { | ||
381 | btrfs_release_path(tree_root, path); | ||
382 | key.objectid = ref_id; | ||
383 | key.type = BTRFS_ROOT_REF_KEY; | ||
384 | key.offset = root_id; | ||
385 | goto again; | ||
386 | } | ||
301 | 387 | ||
302 | btrfs_free_path(path); | 388 | btrfs_free_path(path); |
303 | return ret; | 389 | return err; |
304 | } | 390 | } |
305 | #endif | ||
306 | 391 | ||
307 | int btrfs_find_root_ref(struct btrfs_root *tree_root, | 392 | int btrfs_find_root_ref(struct btrfs_root *tree_root, |
308 | struct btrfs_path *path, | 393 | struct btrfs_path *path, |
@@ -319,7 +404,6 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root, | |||
319 | return ret; | 404 | return ret; |
320 | } | 405 | } |
321 | 406 | ||
322 | |||
323 | /* | 407 | /* |
324 | * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY | 408 | * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY |
325 | * or BTRFS_ROOT_BACKREF_KEY. | 409 | * or BTRFS_ROOT_BACKREF_KEY. |
@@ -335,8 +419,7 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root, | |||
335 | */ | 419 | */ |
336 | int btrfs_add_root_ref(struct btrfs_trans_handle *trans, | 420 | int btrfs_add_root_ref(struct btrfs_trans_handle *trans, |
337 | struct btrfs_root *tree_root, | 421 | struct btrfs_root *tree_root, |
338 | u64 root_id, u8 type, u64 ref_id, | 422 | u64 root_id, u64 ref_id, u64 dirid, u64 sequence, |
339 | u64 dirid, u64 sequence, | ||
340 | const char *name, int name_len) | 423 | const char *name, int name_len) |
341 | { | 424 | { |
342 | struct btrfs_key key; | 425 | struct btrfs_key key; |
@@ -346,13 +429,14 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, | |||
346 | struct extent_buffer *leaf; | 429 | struct extent_buffer *leaf; |
347 | unsigned long ptr; | 430 | unsigned long ptr; |
348 | 431 | ||
349 | |||
350 | path = btrfs_alloc_path(); | 432 | path = btrfs_alloc_path(); |
433 | if (!path) | ||
434 | return -ENOMEM; | ||
351 | 435 | ||
352 | key.objectid = root_id; | 436 | key.objectid = root_id; |
353 | key.type = type; | 437 | key.type = BTRFS_ROOT_BACKREF_KEY; |
354 | key.offset = ref_id; | 438 | key.offset = ref_id; |
355 | 439 | again: | |
356 | ret = btrfs_insert_empty_item(trans, tree_root, path, &key, | 440 | ret = btrfs_insert_empty_item(trans, tree_root, path, &key, |
357 | sizeof(*ref) + name_len); | 441 | sizeof(*ref) + name_len); |
358 | BUG_ON(ret); | 442 | BUG_ON(ret); |
@@ -366,6 +450,14 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, | |||
366 | write_extent_buffer(leaf, name, ptr, name_len); | 450 | write_extent_buffer(leaf, name, ptr, name_len); |
367 | btrfs_mark_buffer_dirty(leaf); | 451 | btrfs_mark_buffer_dirty(leaf); |
368 | 452 | ||
453 | if (key.type == BTRFS_ROOT_BACKREF_KEY) { | ||
454 | btrfs_release_path(tree_root, path); | ||
455 | key.objectid = ref_id; | ||
456 | key.type = BTRFS_ROOT_REF_KEY; | ||
457 | key.offset = root_id; | ||
458 | goto again; | ||
459 | } | ||
460 | |||
369 | btrfs_free_path(path); | 461 | btrfs_free_path(path); |
370 | return ret; | 462 | return 0; |
371 | } | 463 | } |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6d6d06cb6dfc..752a5463bf53 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -51,7 +51,7 @@ | |||
51 | #include "export.h" | 51 | #include "export.h" |
52 | #include "compression.h" | 52 | #include "compression.h" |
53 | 53 | ||
54 | static struct super_operations btrfs_super_ops; | 54 | static const struct super_operations btrfs_super_ops; |
55 | 55 | ||
56 | static void btrfs_put_super(struct super_block *sb) | 56 | static void btrfs_put_super(struct super_block *sb) |
57 | { | 57 | { |
@@ -66,7 +66,8 @@ enum { | |||
66 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, | 66 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, |
67 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, | 67 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, |
68 | Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, | 68 | Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, |
69 | Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_err, | 69 | Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, |
70 | Opt_discard, Opt_err, | ||
70 | }; | 71 | }; |
71 | 72 | ||
72 | static match_table_t tokens = { | 73 | static match_table_t tokens = { |
@@ -88,6 +89,7 @@ static match_table_t tokens = { | |||
88 | {Opt_notreelog, "notreelog"}, | 89 | {Opt_notreelog, "notreelog"}, |
89 | {Opt_flushoncommit, "flushoncommit"}, | 90 | {Opt_flushoncommit, "flushoncommit"}, |
90 | {Opt_ratio, "metadata_ratio=%d"}, | 91 | {Opt_ratio, "metadata_ratio=%d"}, |
92 | {Opt_discard, "discard"}, | ||
91 | {Opt_err, NULL}, | 93 | {Opt_err, NULL}, |
92 | }; | 94 | }; |
93 | 95 | ||
@@ -257,6 +259,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
257 | info->metadata_ratio); | 259 | info->metadata_ratio); |
258 | } | 260 | } |
259 | break; | 261 | break; |
262 | case Opt_discard: | ||
263 | btrfs_set_opt(info->mount_opt, DISCARD); | ||
264 | break; | ||
260 | default: | 265 | default: |
261 | break; | 266 | break; |
262 | } | 267 | } |
@@ -344,7 +349,9 @@ static int btrfs_fill_super(struct super_block *sb, | |||
344 | sb->s_export_op = &btrfs_export_ops; | 349 | sb->s_export_op = &btrfs_export_ops; |
345 | sb->s_xattr = btrfs_xattr_handlers; | 350 | sb->s_xattr = btrfs_xattr_handlers; |
346 | sb->s_time_gran = 1; | 351 | sb->s_time_gran = 1; |
352 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | ||
347 | sb->s_flags |= MS_POSIXACL; | 353 | sb->s_flags |= MS_POSIXACL; |
354 | #endif | ||
348 | 355 | ||
349 | tree_root = open_ctree(sb, fs_devices, (char *)data); | 356 | tree_root = open_ctree(sb, fs_devices, (char *)data); |
350 | 357 | ||
@@ -675,7 +682,8 @@ static int btrfs_unfreeze(struct super_block *sb) | |||
675 | return 0; | 682 | return 0; |
676 | } | 683 | } |
677 | 684 | ||
678 | static struct super_operations btrfs_super_ops = { | 685 | static const struct super_operations btrfs_super_ops = { |
686 | .drop_inode = btrfs_drop_inode, | ||
679 | .delete_inode = btrfs_delete_inode, | 687 | .delete_inode = btrfs_delete_inode, |
680 | .put_super = btrfs_put_super, | 688 | .put_super = btrfs_put_super, |
681 | .sync_fs = btrfs_sync_fs, | 689 | .sync_fs = btrfs_sync_fs, |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index cdbb5022da52..bca82a4ca8e6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -104,7 +104,6 @@ static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, | |||
104 | { | 104 | { |
105 | if (root->ref_cows && root->last_trans < trans->transid) { | 105 | if (root->ref_cows && root->last_trans < trans->transid) { |
106 | WARN_ON(root == root->fs_info->extent_root); | 106 | WARN_ON(root == root->fs_info->extent_root); |
107 | WARN_ON(root->root_item.refs == 0); | ||
108 | WARN_ON(root->commit_root != root->node); | 107 | WARN_ON(root->commit_root != root->node); |
109 | 108 | ||
110 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, | 109 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, |
@@ -187,6 +186,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
187 | h->alloc_exclude_start = 0; | 186 | h->alloc_exclude_start = 0; |
188 | h->delayed_ref_updates = 0; | 187 | h->delayed_ref_updates = 0; |
189 | 188 | ||
189 | if (!current->journal_info) | ||
190 | current->journal_info = h; | ||
191 | |||
190 | root->fs_info->running_transaction->use_count++; | 192 | root->fs_info->running_transaction->use_count++; |
191 | record_root_in_trans(h, root); | 193 | record_root_in_trans(h, root); |
192 | mutex_unlock(&root->fs_info->trans_mutex); | 194 | mutex_unlock(&root->fs_info->trans_mutex); |
@@ -318,6 +320,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
318 | wake_up(&cur_trans->writer_wait); | 320 | wake_up(&cur_trans->writer_wait); |
319 | put_transaction(cur_trans); | 321 | put_transaction(cur_trans); |
320 | mutex_unlock(&info->trans_mutex); | 322 | mutex_unlock(&info->trans_mutex); |
323 | |||
324 | if (current->journal_info == trans) | ||
325 | current->journal_info = NULL; | ||
321 | memset(trans, 0, sizeof(*trans)); | 326 | memset(trans, 0, sizeof(*trans)); |
322 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 327 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
323 | 328 | ||
@@ -339,10 +344,10 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, | |||
339 | /* | 344 | /* |
340 | * when btree blocks are allocated, they have some corresponding bits set for | 345 | * when btree blocks are allocated, they have some corresponding bits set for |
341 | * them in one of two extent_io trees. This is used to make sure all of | 346 | * them in one of two extent_io trees. This is used to make sure all of |
342 | * those extents are on disk for transaction or log commit | 347 | * those extents are sent to disk but does not wait on them |
343 | */ | 348 | */ |
344 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | 349 | int btrfs_write_marked_extents(struct btrfs_root *root, |
345 | struct extent_io_tree *dirty_pages) | 350 | struct extent_io_tree *dirty_pages) |
346 | { | 351 | { |
347 | int ret; | 352 | int ret; |
348 | int err = 0; | 353 | int err = 0; |
@@ -389,6 +394,29 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | |||
389 | page_cache_release(page); | 394 | page_cache_release(page); |
390 | } | 395 | } |
391 | } | 396 | } |
397 | if (err) | ||
398 | werr = err; | ||
399 | return werr; | ||
400 | } | ||
401 | |||
402 | /* | ||
403 | * when btree blocks are allocated, they have some corresponding bits set for | ||
404 | * them in one of two extent_io trees. This is used to make sure all of | ||
405 | * those extents are on disk for transaction or log commit. We wait | ||
406 | * on all the pages and clear them from the dirty pages state tree | ||
407 | */ | ||
408 | int btrfs_wait_marked_extents(struct btrfs_root *root, | ||
409 | struct extent_io_tree *dirty_pages) | ||
410 | { | ||
411 | int ret; | ||
412 | int err = 0; | ||
413 | int werr = 0; | ||
414 | struct page *page; | ||
415 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
416 | u64 start = 0; | ||
417 | u64 end; | ||
418 | unsigned long index; | ||
419 | |||
392 | while (1) { | 420 | while (1) { |
393 | ret = find_first_extent_bit(dirty_pages, 0, &start, &end, | 421 | ret = find_first_extent_bit(dirty_pages, 0, &start, &end, |
394 | EXTENT_DIRTY); | 422 | EXTENT_DIRTY); |
@@ -419,6 +447,22 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | |||
419 | return werr; | 447 | return werr; |
420 | } | 448 | } |
421 | 449 | ||
450 | /* | ||
451 | * when btree blocks are allocated, they have some corresponding bits set for | ||
452 | * them in one of two extent_io trees. This is used to make sure all of | ||
453 | * those extents are on disk for transaction or log commit | ||
454 | */ | ||
455 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | ||
456 | struct extent_io_tree *dirty_pages) | ||
457 | { | ||
458 | int ret; | ||
459 | int ret2; | ||
460 | |||
461 | ret = btrfs_write_marked_extents(root, dirty_pages); | ||
462 | ret2 = btrfs_wait_marked_extents(root, dirty_pages); | ||
463 | return ret || ret2; | ||
464 | } | ||
465 | |||
422 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 466 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, |
423 | struct btrfs_root *root) | 467 | struct btrfs_root *root) |
424 | { | 468 | { |
@@ -720,7 +764,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
720 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 764 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
721 | 765 | ||
722 | key.objectid = objectid; | 766 | key.objectid = objectid; |
723 | key.offset = 0; | 767 | /* record when the snapshot was created in key.offset */ |
768 | key.offset = trans->transid; | ||
724 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 769 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); |
725 | 770 | ||
726 | old = btrfs_lock_root_node(root); | 771 | old = btrfs_lock_root_node(root); |
@@ -743,6 +788,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
743 | memcpy(&pending->root_key, &key, sizeof(key)); | 788 | memcpy(&pending->root_key, &key, sizeof(key)); |
744 | fail: | 789 | fail: |
745 | kfree(new_root_item); | 790 | kfree(new_root_item); |
791 | btrfs_unreserve_metadata_space(root, 6); | ||
746 | return ret; | 792 | return ret; |
747 | } | 793 | } |
748 | 794 | ||
@@ -778,24 +824,14 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info, | |||
778 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | 824 | ret = btrfs_update_inode(trans, parent_root, parent_inode); |
779 | BUG_ON(ret); | 825 | BUG_ON(ret); |
780 | 826 | ||
781 | /* add the backref first */ | ||
782 | ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, | 827 | ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, |
783 | pending->root_key.objectid, | 828 | pending->root_key.objectid, |
784 | BTRFS_ROOT_BACKREF_KEY, | ||
785 | parent_root->root_key.objectid, | 829 | parent_root->root_key.objectid, |
786 | parent_inode->i_ino, index, pending->name, | 830 | parent_inode->i_ino, index, pending->name, |
787 | namelen); | 831 | namelen); |
788 | 832 | ||
789 | BUG_ON(ret); | 833 | BUG_ON(ret); |
790 | 834 | ||
791 | /* now add the forward ref */ | ||
792 | ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, | ||
793 | parent_root->root_key.objectid, | ||
794 | BTRFS_ROOT_REF_KEY, | ||
795 | pending->root_key.objectid, | ||
796 | parent_inode->i_ino, index, pending->name, | ||
797 | namelen); | ||
798 | |||
799 | inode = btrfs_lookup_dentry(parent_inode, pending->dentry); | 835 | inode = btrfs_lookup_dentry(parent_inode, pending->dentry); |
800 | d_instantiate(pending->dentry, inode); | 836 | d_instantiate(pending->dentry, inode); |
801 | fail: | 837 | fail: |
@@ -874,7 +910,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
874 | unsigned long timeout = 1; | 910 | unsigned long timeout = 1; |
875 | struct btrfs_transaction *cur_trans; | 911 | struct btrfs_transaction *cur_trans; |
876 | struct btrfs_transaction *prev_trans = NULL; | 912 | struct btrfs_transaction *prev_trans = NULL; |
877 | struct extent_io_tree *pinned_copy; | ||
878 | DEFINE_WAIT(wait); | 913 | DEFINE_WAIT(wait); |
879 | int ret; | 914 | int ret; |
880 | int should_grow = 0; | 915 | int should_grow = 0; |
@@ -915,13 +950,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
915 | return 0; | 950 | return 0; |
916 | } | 951 | } |
917 | 952 | ||
918 | pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS); | ||
919 | if (!pinned_copy) | ||
920 | return -ENOMEM; | ||
921 | |||
922 | extent_io_tree_init(pinned_copy, | ||
923 | root->fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
924 | |||
925 | trans->transaction->in_commit = 1; | 953 | trans->transaction->in_commit = 1; |
926 | trans->transaction->blocked = 1; | 954 | trans->transaction->blocked = 1; |
927 | if (cur_trans->list.prev != &root->fs_info->trans_list) { | 955 | if (cur_trans->list.prev != &root->fs_info->trans_list) { |
@@ -1019,6 +1047,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1019 | ret = commit_cowonly_roots(trans, root); | 1047 | ret = commit_cowonly_roots(trans, root); |
1020 | BUG_ON(ret); | 1048 | BUG_ON(ret); |
1021 | 1049 | ||
1050 | btrfs_prepare_extent_commit(trans, root); | ||
1051 | |||
1022 | cur_trans = root->fs_info->running_transaction; | 1052 | cur_trans = root->fs_info->running_transaction; |
1023 | spin_lock(&root->fs_info->new_trans_lock); | 1053 | spin_lock(&root->fs_info->new_trans_lock); |
1024 | root->fs_info->running_transaction = NULL; | 1054 | root->fs_info->running_transaction = NULL; |
@@ -1042,8 +1072,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1042 | memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, | 1072 | memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, |
1043 | sizeof(root->fs_info->super_copy)); | 1073 | sizeof(root->fs_info->super_copy)); |
1044 | 1074 | ||
1045 | btrfs_copy_pinned(root, pinned_copy); | ||
1046 | |||
1047 | trans->transaction->blocked = 0; | 1075 | trans->transaction->blocked = 0; |
1048 | 1076 | ||
1049 | wake_up(&root->fs_info->transaction_wait); | 1077 | wake_up(&root->fs_info->transaction_wait); |
@@ -1059,8 +1087,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1059 | */ | 1087 | */ |
1060 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1088 | mutex_unlock(&root->fs_info->tree_log_mutex); |
1061 | 1089 | ||
1062 | btrfs_finish_extent_commit(trans, root, pinned_copy); | 1090 | btrfs_finish_extent_commit(trans, root); |
1063 | kfree(pinned_copy); | ||
1064 | 1091 | ||
1065 | /* do the directory inserts of any pending snapshot creations */ | 1092 | /* do the directory inserts of any pending snapshot creations */ |
1066 | finish_pending_snapshots(trans, root->fs_info); | 1093 | finish_pending_snapshots(trans, root->fs_info); |
@@ -1078,6 +1105,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1078 | 1105 | ||
1079 | mutex_unlock(&root->fs_info->trans_mutex); | 1106 | mutex_unlock(&root->fs_info->trans_mutex); |
1080 | 1107 | ||
1108 | if (current->journal_info == trans) | ||
1109 | current->journal_info = NULL; | ||
1110 | |||
1081 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 1111 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
1082 | return ret; | 1112 | return ret; |
1083 | } | 1113 | } |
@@ -1096,8 +1126,13 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) | |||
1096 | 1126 | ||
1097 | while (!list_empty(&list)) { | 1127 | while (!list_empty(&list)) { |
1098 | root = list_entry(list.next, struct btrfs_root, root_list); | 1128 | root = list_entry(list.next, struct btrfs_root, root_list); |
1099 | list_del_init(&root->root_list); | 1129 | list_del(&root->root_list); |
1100 | btrfs_drop_snapshot(root, 0); | 1130 | |
1131 | if (btrfs_header_backref_rev(root->node) < | ||
1132 | BTRFS_MIXED_BACKREF_REV) | ||
1133 | btrfs_drop_snapshot(root, 0); | ||
1134 | else | ||
1135 | btrfs_drop_snapshot(root, 1); | ||
1101 | } | 1136 | } |
1102 | return 0; | 1137 | return 0; |
1103 | } | 1138 | } |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 663c67404918..d4e3e7a6938c 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -79,6 +79,7 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, | |||
79 | struct inode *inode) | 79 | struct inode *inode) |
80 | { | 80 | { |
81 | BTRFS_I(inode)->last_trans = trans->transaction->transid; | 81 | BTRFS_I(inode)->last_trans = trans->transaction->transid; |
82 | BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; | ||
82 | } | 83 | } |
83 | 84 | ||
84 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, | 85 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, |
@@ -107,5 +108,9 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | |||
107 | struct btrfs_root *root); | 108 | struct btrfs_root *root); |
108 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | 109 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, |
109 | struct extent_io_tree *dirty_pages); | 110 | struct extent_io_tree *dirty_pages); |
111 | int btrfs_write_marked_extents(struct btrfs_root *root, | ||
112 | struct extent_io_tree *dirty_pages); | ||
113 | int btrfs_wait_marked_extents(struct btrfs_root *root, | ||
114 | struct extent_io_tree *dirty_pages); | ||
110 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); | 115 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); |
111 | #endif | 116 | #endif |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d91b0de7c502..741666a7676a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -137,11 +137,20 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
137 | 137 | ||
138 | mutex_lock(&root->log_mutex); | 138 | mutex_lock(&root->log_mutex); |
139 | if (root->log_root) { | 139 | if (root->log_root) { |
140 | if (!root->log_start_pid) { | ||
141 | root->log_start_pid = current->pid; | ||
142 | root->log_multiple_pids = false; | ||
143 | } else if (root->log_start_pid != current->pid) { | ||
144 | root->log_multiple_pids = true; | ||
145 | } | ||
146 | |||
140 | root->log_batch++; | 147 | root->log_batch++; |
141 | atomic_inc(&root->log_writers); | 148 | atomic_inc(&root->log_writers); |
142 | mutex_unlock(&root->log_mutex); | 149 | mutex_unlock(&root->log_mutex); |
143 | return 0; | 150 | return 0; |
144 | } | 151 | } |
152 | root->log_multiple_pids = false; | ||
153 | root->log_start_pid = current->pid; | ||
145 | mutex_lock(&root->fs_info->tree_log_mutex); | 154 | mutex_lock(&root->fs_info->tree_log_mutex); |
146 | if (!root->fs_info->log_root_tree) { | 155 | if (!root->fs_info->log_root_tree) { |
147 | ret = btrfs_init_log_root_tree(trans, root->fs_info); | 156 | ret = btrfs_init_log_root_tree(trans, root->fs_info); |
@@ -263,8 +272,8 @@ static int process_one_buffer(struct btrfs_root *log, | |||
263 | struct walk_control *wc, u64 gen) | 272 | struct walk_control *wc, u64 gen) |
264 | { | 273 | { |
265 | if (wc->pin) | 274 | if (wc->pin) |
266 | btrfs_update_pinned_extents(log->fs_info->extent_root, | 275 | btrfs_pin_extent(log->fs_info->extent_root, |
267 | eb->start, eb->len, 1); | 276 | eb->start, eb->len, 0); |
268 | 277 | ||
269 | if (btrfs_buffer_uptodate(eb, gen)) { | 278 | if (btrfs_buffer_uptodate(eb, gen)) { |
270 | if (wc->write) | 279 | if (wc->write) |
@@ -534,7 +543,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
534 | saved_nbytes = inode_get_bytes(inode); | 543 | saved_nbytes = inode_get_bytes(inode); |
535 | /* drop any overlapping extents */ | 544 | /* drop any overlapping extents */ |
536 | ret = btrfs_drop_extents(trans, root, inode, | 545 | ret = btrfs_drop_extents(trans, root, inode, |
537 | start, extent_end, extent_end, start, &alloc_hint); | 546 | start, extent_end, extent_end, start, &alloc_hint, 1); |
538 | BUG_ON(ret); | 547 | BUG_ON(ret); |
539 | 548 | ||
540 | if (found_type == BTRFS_FILE_EXTENT_REG || | 549 | if (found_type == BTRFS_FILE_EXTENT_REG || |
@@ -1971,6 +1980,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
1971 | int ret; | 1980 | int ret; |
1972 | struct btrfs_root *log = root->log_root; | 1981 | struct btrfs_root *log = root->log_root; |
1973 | struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; | 1982 | struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; |
1983 | u64 log_transid = 0; | ||
1974 | 1984 | ||
1975 | mutex_lock(&root->log_mutex); | 1985 | mutex_lock(&root->log_mutex); |
1976 | index1 = root->log_transid % 2; | 1986 | index1 = root->log_transid % 2; |
@@ -1987,10 +1997,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
1987 | 1997 | ||
1988 | while (1) { | 1998 | while (1) { |
1989 | unsigned long batch = root->log_batch; | 1999 | unsigned long batch = root->log_batch; |
1990 | mutex_unlock(&root->log_mutex); | 2000 | if (root->log_multiple_pids) { |
1991 | schedule_timeout_uninterruptible(1); | 2001 | mutex_unlock(&root->log_mutex); |
1992 | mutex_lock(&root->log_mutex); | 2002 | schedule_timeout_uninterruptible(1); |
1993 | 2003 | mutex_lock(&root->log_mutex); | |
2004 | } | ||
1994 | wait_for_writer(trans, root); | 2005 | wait_for_writer(trans, root); |
1995 | if (batch == root->log_batch) | 2006 | if (batch == root->log_batch) |
1996 | break; | 2007 | break; |
@@ -2003,14 +2014,19 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2003 | goto out; | 2014 | goto out; |
2004 | } | 2015 | } |
2005 | 2016 | ||
2006 | ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); | 2017 | /* we start IO on all the marked extents here, but we don't actually |
2018 | * wait for them until later. | ||
2019 | */ | ||
2020 | ret = btrfs_write_marked_extents(log, &log->dirty_log_pages); | ||
2007 | BUG_ON(ret); | 2021 | BUG_ON(ret); |
2008 | 2022 | ||
2009 | btrfs_set_root_node(&log->root_item, log->node); | 2023 | btrfs_set_root_node(&log->root_item, log->node); |
2010 | 2024 | ||
2011 | root->log_batch = 0; | 2025 | root->log_batch = 0; |
2026 | log_transid = root->log_transid; | ||
2012 | root->log_transid++; | 2027 | root->log_transid++; |
2013 | log->log_transid = root->log_transid; | 2028 | log->log_transid = root->log_transid; |
2029 | root->log_start_pid = 0; | ||
2014 | smp_mb(); | 2030 | smp_mb(); |
2015 | /* | 2031 | /* |
2016 | * log tree has been flushed to disk, new modifications of | 2032 | * log tree has been flushed to disk, new modifications of |
@@ -2036,6 +2052,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2036 | 2052 | ||
2037 | index2 = log_root_tree->log_transid % 2; | 2053 | index2 = log_root_tree->log_transid % 2; |
2038 | if (atomic_read(&log_root_tree->log_commit[index2])) { | 2054 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
2055 | btrfs_wait_marked_extents(log, &log->dirty_log_pages); | ||
2039 | wait_log_commit(trans, log_root_tree, | 2056 | wait_log_commit(trans, log_root_tree, |
2040 | log_root_tree->log_transid); | 2057 | log_root_tree->log_transid); |
2041 | mutex_unlock(&log_root_tree->log_mutex); | 2058 | mutex_unlock(&log_root_tree->log_mutex); |
@@ -2055,6 +2072,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2055 | * check the full commit flag again | 2072 | * check the full commit flag again |
2056 | */ | 2073 | */ |
2057 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | 2074 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { |
2075 | btrfs_wait_marked_extents(log, &log->dirty_log_pages); | ||
2058 | mutex_unlock(&log_root_tree->log_mutex); | 2076 | mutex_unlock(&log_root_tree->log_mutex); |
2059 | ret = -EAGAIN; | 2077 | ret = -EAGAIN; |
2060 | goto out_wake_log_root; | 2078 | goto out_wake_log_root; |
@@ -2063,6 +2081,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2063 | ret = btrfs_write_and_wait_marked_extents(log_root_tree, | 2081 | ret = btrfs_write_and_wait_marked_extents(log_root_tree, |
2064 | &log_root_tree->dirty_log_pages); | 2082 | &log_root_tree->dirty_log_pages); |
2065 | BUG_ON(ret); | 2083 | BUG_ON(ret); |
2084 | btrfs_wait_marked_extents(log, &log->dirty_log_pages); | ||
2066 | 2085 | ||
2067 | btrfs_set_super_log_root(&root->fs_info->super_for_commit, | 2086 | btrfs_set_super_log_root(&root->fs_info->super_for_commit, |
2068 | log_root_tree->node->start); | 2087 | log_root_tree->node->start); |
@@ -2082,9 +2101,14 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2082 | * the running transaction open, so a full commit can't hop | 2101 | * the running transaction open, so a full commit can't hop |
2083 | * in and cause problems either. | 2102 | * in and cause problems either. |
2084 | */ | 2103 | */ |
2085 | write_ctree_super(trans, root->fs_info->tree_root, 2); | 2104 | write_ctree_super(trans, root->fs_info->tree_root, 1); |
2086 | ret = 0; | 2105 | ret = 0; |
2087 | 2106 | ||
2107 | mutex_lock(&root->log_mutex); | ||
2108 | if (root->last_log_commit < log_transid) | ||
2109 | root->last_log_commit = log_transid; | ||
2110 | mutex_unlock(&root->log_mutex); | ||
2111 | |||
2088 | out_wake_log_root: | 2112 | out_wake_log_root: |
2089 | atomic_set(&log_root_tree->log_commit[index2], 0); | 2113 | atomic_set(&log_root_tree->log_commit[index2], 0); |
2090 | smp_mb(); | 2114 | smp_mb(); |
@@ -2605,7 +2629,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2605 | extent); | 2629 | extent); |
2606 | cs = btrfs_file_extent_offset(src, extent); | 2630 | cs = btrfs_file_extent_offset(src, extent); |
2607 | cl = btrfs_file_extent_num_bytes(src, | 2631 | cl = btrfs_file_extent_num_bytes(src, |
2608 | extent);; | 2632 | extent); |
2609 | if (btrfs_file_extent_compression(src, | 2633 | if (btrfs_file_extent_compression(src, |
2610 | extent)) { | 2634 | extent)) { |
2611 | cs = 0; | 2635 | cs = 0; |
@@ -2841,7 +2865,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | |||
2841 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) | 2865 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) |
2842 | break; | 2866 | break; |
2843 | 2867 | ||
2844 | if (parent == sb->s_root) | 2868 | if (IS_ROOT(parent)) |
2845 | break; | 2869 | break; |
2846 | 2870 | ||
2847 | parent = parent->d_parent; | 2871 | parent = parent->d_parent; |
@@ -2852,6 +2876,21 @@ out: | |||
2852 | return ret; | 2876 | return ret; |
2853 | } | 2877 | } |
2854 | 2878 | ||
2879 | static int inode_in_log(struct btrfs_trans_handle *trans, | ||
2880 | struct inode *inode) | ||
2881 | { | ||
2882 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
2883 | int ret = 0; | ||
2884 | |||
2885 | mutex_lock(&root->log_mutex); | ||
2886 | if (BTRFS_I(inode)->logged_trans == trans->transid && | ||
2887 | BTRFS_I(inode)->last_sub_trans <= root->last_log_commit) | ||
2888 | ret = 1; | ||
2889 | mutex_unlock(&root->log_mutex); | ||
2890 | return ret; | ||
2891 | } | ||
2892 | |||
2893 | |||
2855 | /* | 2894 | /* |
2856 | * helper function around btrfs_log_inode to make sure newly created | 2895 | * helper function around btrfs_log_inode to make sure newly created |
2857 | * parent directories also end up in the log. A minimal inode and backref | 2896 | * parent directories also end up in the log. A minimal inode and backref |
@@ -2880,11 +2919,22 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2880 | goto end_no_trans; | 2919 | goto end_no_trans; |
2881 | } | 2920 | } |
2882 | 2921 | ||
2922 | if (root != BTRFS_I(inode)->root || | ||
2923 | btrfs_root_refs(&root->root_item) == 0) { | ||
2924 | ret = 1; | ||
2925 | goto end_no_trans; | ||
2926 | } | ||
2927 | |||
2883 | ret = check_parent_dirs_for_sync(trans, inode, parent, | 2928 | ret = check_parent_dirs_for_sync(trans, inode, parent, |
2884 | sb, last_committed); | 2929 | sb, last_committed); |
2885 | if (ret) | 2930 | if (ret) |
2886 | goto end_no_trans; | 2931 | goto end_no_trans; |
2887 | 2932 | ||
2933 | if (inode_in_log(trans, inode)) { | ||
2934 | ret = BTRFS_NO_LOG_SYNC; | ||
2935 | goto end_no_trans; | ||
2936 | } | ||
2937 | |||
2888 | start_log_trans(trans, root); | 2938 | start_log_trans(trans, root); |
2889 | 2939 | ||
2890 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 2940 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
@@ -2907,12 +2957,15 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2907 | break; | 2957 | break; |
2908 | 2958 | ||
2909 | inode = parent->d_inode; | 2959 | inode = parent->d_inode; |
2960 | if (root != BTRFS_I(inode)->root) | ||
2961 | break; | ||
2962 | |||
2910 | if (BTRFS_I(inode)->generation > | 2963 | if (BTRFS_I(inode)->generation > |
2911 | root->fs_info->last_trans_committed) { | 2964 | root->fs_info->last_trans_committed) { |
2912 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 2965 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
2913 | BUG_ON(ret); | 2966 | BUG_ON(ret); |
2914 | } | 2967 | } |
2915 | if (parent == sb->s_root) | 2968 | if (IS_ROOT(parent)) |
2916 | break; | 2969 | break; |
2917 | 2970 | ||
2918 | parent = parent->d_parent; | 2971 | parent = parent->d_parent; |
@@ -2951,7 +3004,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | |||
2951 | struct btrfs_key tmp_key; | 3004 | struct btrfs_key tmp_key; |
2952 | struct btrfs_root *log; | 3005 | struct btrfs_root *log; |
2953 | struct btrfs_fs_info *fs_info = log_root_tree->fs_info; | 3006 | struct btrfs_fs_info *fs_info = log_root_tree->fs_info; |
2954 | u64 highest_inode; | ||
2955 | struct walk_control wc = { | 3007 | struct walk_control wc = { |
2956 | .process_func = process_one_buffer, | 3008 | .process_func = process_one_buffer, |
2957 | .stage = 0, | 3009 | .stage = 0, |
@@ -3010,11 +3062,6 @@ again: | |||
3010 | path); | 3062 | path); |
3011 | BUG_ON(ret); | 3063 | BUG_ON(ret); |
3012 | } | 3064 | } |
3013 | ret = btrfs_find_highest_inode(wc.replay_dest, &highest_inode); | ||
3014 | if (ret == 0) { | ||
3015 | wc.replay_dest->highest_inode = highest_inode; | ||
3016 | wc.replay_dest->last_inode_alloc = highest_inode; | ||
3017 | } | ||
3018 | 3065 | ||
3019 | key.offset = found_key.offset - 1; | 3066 | key.offset = found_key.offset - 1; |
3020 | wc.replay_dest->log_root = NULL; | 3067 | wc.replay_dest->log_root = NULL; |
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index d09c7609e16b..0776eacb5083 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
@@ -19,6 +19,9 @@ | |||
19 | #ifndef __TREE_LOG_ | 19 | #ifndef __TREE_LOG_ |
20 | #define __TREE_LOG_ | 20 | #define __TREE_LOG_ |
21 | 21 | ||
22 | /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */ | ||
23 | #define BTRFS_NO_LOG_SYNC 256 | ||
24 | |||
22 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 25 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
23 | struct btrfs_root *root); | 26 | struct btrfs_root *root); |
24 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); | 27 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5cf405b0828d..7eda483d7b5a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -276,7 +276,7 @@ loop_lock: | |||
276 | * is now congested. Back off and let other work structs | 276 | * is now congested. Back off and let other work structs |
277 | * run instead | 277 | * run instead |
278 | */ | 278 | */ |
279 | if (pending && bdi_write_congested(bdi) && batch_run > 32 && | 279 | if (pending && bdi_write_congested(bdi) && batch_run > 8 && |
280 | fs_info->fs_devices->open_devices > 1) { | 280 | fs_info->fs_devices->open_devices > 1) { |
281 | struct io_context *ioc; | 281 | struct io_context *ioc; |
282 | 282 | ||
@@ -446,8 +446,10 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
446 | goto error; | 446 | goto error; |
447 | 447 | ||
448 | device->name = kstrdup(orig_dev->name, GFP_NOFS); | 448 | device->name = kstrdup(orig_dev->name, GFP_NOFS); |
449 | if (!device->name) | 449 | if (!device->name) { |
450 | kfree(device); | ||
450 | goto error; | 451 | goto error; |
452 | } | ||
451 | 453 | ||
452 | device->devid = orig_dev->devid; | 454 | device->devid = orig_dev->devid; |
453 | device->work.func = pending_bios_fn; | 455 | device->work.func = pending_bios_fn; |
@@ -719,10 +721,9 @@ error: | |||
719 | * called very infrequently and that a given device has a small number | 721 | * called very infrequently and that a given device has a small number |
720 | * of extents | 722 | * of extents |
721 | */ | 723 | */ |
722 | static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, | 724 | int find_free_dev_extent(struct btrfs_trans_handle *trans, |
723 | struct btrfs_device *device, | 725 | struct btrfs_device *device, u64 num_bytes, |
724 | u64 num_bytes, u64 *start, | 726 | u64 *start, u64 *max_avail) |
725 | u64 *max_avail) | ||
726 | { | 727 | { |
727 | struct btrfs_key key; | 728 | struct btrfs_key key; |
728 | struct btrfs_root *root = device->dev_root; | 729 | struct btrfs_root *root = device->dev_root; |
@@ -1736,6 +1737,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
1736 | extent_root = root->fs_info->extent_root; | 1737 | extent_root = root->fs_info->extent_root; |
1737 | em_tree = &root->fs_info->mapping_tree.map_tree; | 1738 | em_tree = &root->fs_info->mapping_tree.map_tree; |
1738 | 1739 | ||
1740 | ret = btrfs_can_relocate(extent_root, chunk_offset); | ||
1741 | if (ret) | ||
1742 | return -ENOSPC; | ||
1743 | |||
1739 | /* step one, relocate all the extents inside this chunk */ | 1744 | /* step one, relocate all the extents inside this chunk */ |
1740 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); | 1745 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); |
1741 | BUG_ON(ret); | 1746 | BUG_ON(ret); |
@@ -1749,9 +1754,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
1749 | * step two, delete the device extents and the | 1754 | * step two, delete the device extents and the |
1750 | * chunk tree entries | 1755 | * chunk tree entries |
1751 | */ | 1756 | */ |
1752 | spin_lock(&em_tree->lock); | 1757 | read_lock(&em_tree->lock); |
1753 | em = lookup_extent_mapping(em_tree, chunk_offset, 1); | 1758 | em = lookup_extent_mapping(em_tree, chunk_offset, 1); |
1754 | spin_unlock(&em_tree->lock); | 1759 | read_unlock(&em_tree->lock); |
1755 | 1760 | ||
1756 | BUG_ON(em->start > chunk_offset || | 1761 | BUG_ON(em->start > chunk_offset || |
1757 | em->start + em->len < chunk_offset); | 1762 | em->start + em->len < chunk_offset); |
@@ -1780,9 +1785,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
1780 | ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); | 1785 | ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); |
1781 | BUG_ON(ret); | 1786 | BUG_ON(ret); |
1782 | 1787 | ||
1783 | spin_lock(&em_tree->lock); | 1788 | write_lock(&em_tree->lock); |
1784 | remove_extent_mapping(em_tree, em); | 1789 | remove_extent_mapping(em_tree, em); |
1785 | spin_unlock(&em_tree->lock); | 1790 | write_unlock(&em_tree->lock); |
1786 | 1791 | ||
1787 | kfree(map); | 1792 | kfree(map); |
1788 | em->bdev = NULL; | 1793 | em->bdev = NULL; |
@@ -1807,12 +1812,15 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) | |||
1807 | struct btrfs_key found_key; | 1812 | struct btrfs_key found_key; |
1808 | u64 chunk_tree = chunk_root->root_key.objectid; | 1813 | u64 chunk_tree = chunk_root->root_key.objectid; |
1809 | u64 chunk_type; | 1814 | u64 chunk_type; |
1815 | bool retried = false; | ||
1816 | int failed = 0; | ||
1810 | int ret; | 1817 | int ret; |
1811 | 1818 | ||
1812 | path = btrfs_alloc_path(); | 1819 | path = btrfs_alloc_path(); |
1813 | if (!path) | 1820 | if (!path) |
1814 | return -ENOMEM; | 1821 | return -ENOMEM; |
1815 | 1822 | ||
1823 | again: | ||
1816 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; | 1824 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; |
1817 | key.offset = (u64)-1; | 1825 | key.offset = (u64)-1; |
1818 | key.type = BTRFS_CHUNK_ITEM_KEY; | 1826 | key.type = BTRFS_CHUNK_ITEM_KEY; |
@@ -1842,7 +1850,10 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) | |||
1842 | ret = btrfs_relocate_chunk(chunk_root, chunk_tree, | 1850 | ret = btrfs_relocate_chunk(chunk_root, chunk_tree, |
1843 | found_key.objectid, | 1851 | found_key.objectid, |
1844 | found_key.offset); | 1852 | found_key.offset); |
1845 | BUG_ON(ret); | 1853 | if (ret == -ENOSPC) |
1854 | failed++; | ||
1855 | else if (ret) | ||
1856 | BUG(); | ||
1846 | } | 1857 | } |
1847 | 1858 | ||
1848 | if (found_key.offset == 0) | 1859 | if (found_key.offset == 0) |
@@ -1850,6 +1861,14 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) | |||
1850 | key.offset = found_key.offset - 1; | 1861 | key.offset = found_key.offset - 1; |
1851 | } | 1862 | } |
1852 | ret = 0; | 1863 | ret = 0; |
1864 | if (failed && !retried) { | ||
1865 | failed = 0; | ||
1866 | retried = true; | ||
1867 | goto again; | ||
1868 | } else if (failed && retried) { | ||
1869 | WARN_ON(1); | ||
1870 | ret = -ENOSPC; | ||
1871 | } | ||
1853 | error: | 1872 | error: |
1854 | btrfs_free_path(path); | 1873 | btrfs_free_path(path); |
1855 | return ret; | 1874 | return ret; |
@@ -1894,6 +1913,8 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1894 | continue; | 1913 | continue; |
1895 | 1914 | ||
1896 | ret = btrfs_shrink_device(device, old_size - size_to_free); | 1915 | ret = btrfs_shrink_device(device, old_size - size_to_free); |
1916 | if (ret == -ENOSPC) | ||
1917 | break; | ||
1897 | BUG_ON(ret); | 1918 | BUG_ON(ret); |
1898 | 1919 | ||
1899 | trans = btrfs_start_transaction(dev_root, 1); | 1920 | trans = btrfs_start_transaction(dev_root, 1); |
@@ -1938,9 +1959,8 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1938 | chunk = btrfs_item_ptr(path->nodes[0], | 1959 | chunk = btrfs_item_ptr(path->nodes[0], |
1939 | path->slots[0], | 1960 | path->slots[0], |
1940 | struct btrfs_chunk); | 1961 | struct btrfs_chunk); |
1941 | key.offset = found_key.offset; | ||
1942 | /* chunk zero is special */ | 1962 | /* chunk zero is special */ |
1943 | if (key.offset == 0) | 1963 | if (found_key.offset == 0) |
1944 | break; | 1964 | break; |
1945 | 1965 | ||
1946 | btrfs_release_path(chunk_root, path); | 1966 | btrfs_release_path(chunk_root, path); |
@@ -1948,7 +1968,8 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1948 | chunk_root->root_key.objectid, | 1968 | chunk_root->root_key.objectid, |
1949 | found_key.objectid, | 1969 | found_key.objectid, |
1950 | found_key.offset); | 1970 | found_key.offset); |
1951 | BUG_ON(ret); | 1971 | BUG_ON(ret && ret != -ENOSPC); |
1972 | key.offset = found_key.offset - 1; | ||
1952 | } | 1973 | } |
1953 | ret = 0; | 1974 | ret = 0; |
1954 | error: | 1975 | error: |
@@ -1974,10 +1995,13 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
1974 | u64 chunk_offset; | 1995 | u64 chunk_offset; |
1975 | int ret; | 1996 | int ret; |
1976 | int slot; | 1997 | int slot; |
1998 | int failed = 0; | ||
1999 | bool retried = false; | ||
1977 | struct extent_buffer *l; | 2000 | struct extent_buffer *l; |
1978 | struct btrfs_key key; | 2001 | struct btrfs_key key; |
1979 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; | 2002 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; |
1980 | u64 old_total = btrfs_super_total_bytes(super_copy); | 2003 | u64 old_total = btrfs_super_total_bytes(super_copy); |
2004 | u64 old_size = device->total_bytes; | ||
1981 | u64 diff = device->total_bytes - new_size; | 2005 | u64 diff = device->total_bytes - new_size; |
1982 | 2006 | ||
1983 | if (new_size >= device->total_bytes) | 2007 | if (new_size >= device->total_bytes) |
@@ -1987,12 +2011,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
1987 | if (!path) | 2011 | if (!path) |
1988 | return -ENOMEM; | 2012 | return -ENOMEM; |
1989 | 2013 | ||
1990 | trans = btrfs_start_transaction(root, 1); | ||
1991 | if (!trans) { | ||
1992 | ret = -ENOMEM; | ||
1993 | goto done; | ||
1994 | } | ||
1995 | |||
1996 | path->reada = 2; | 2014 | path->reada = 2; |
1997 | 2015 | ||
1998 | lock_chunks(root); | 2016 | lock_chunks(root); |
@@ -2001,8 +2019,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
2001 | if (device->writeable) | 2019 | if (device->writeable) |
2002 | device->fs_devices->total_rw_bytes -= diff; | 2020 | device->fs_devices->total_rw_bytes -= diff; |
2003 | unlock_chunks(root); | 2021 | unlock_chunks(root); |
2004 | btrfs_end_transaction(trans, root); | ||
2005 | 2022 | ||
2023 | again: | ||
2006 | key.objectid = device->devid; | 2024 | key.objectid = device->devid; |
2007 | key.offset = (u64)-1; | 2025 | key.offset = (u64)-1; |
2008 | key.type = BTRFS_DEV_EXTENT_KEY; | 2026 | key.type = BTRFS_DEV_EXTENT_KEY; |
@@ -2017,6 +2035,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
2017 | goto done; | 2035 | goto done; |
2018 | if (ret) { | 2036 | if (ret) { |
2019 | ret = 0; | 2037 | ret = 0; |
2038 | btrfs_release_path(root, path); | ||
2020 | break; | 2039 | break; |
2021 | } | 2040 | } |
2022 | 2041 | ||
@@ -2024,14 +2043,18 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
2024 | slot = path->slots[0]; | 2043 | slot = path->slots[0]; |
2025 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | 2044 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); |
2026 | 2045 | ||
2027 | if (key.objectid != device->devid) | 2046 | if (key.objectid != device->devid) { |
2047 | btrfs_release_path(root, path); | ||
2028 | break; | 2048 | break; |
2049 | } | ||
2029 | 2050 | ||
2030 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | 2051 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); |
2031 | length = btrfs_dev_extent_length(l, dev_extent); | 2052 | length = btrfs_dev_extent_length(l, dev_extent); |
2032 | 2053 | ||
2033 | if (key.offset + length <= new_size) | 2054 | if (key.offset + length <= new_size) { |
2055 | btrfs_release_path(root, path); | ||
2034 | break; | 2056 | break; |
2057 | } | ||
2035 | 2058 | ||
2036 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); | 2059 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); |
2037 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); | 2060 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); |
@@ -2040,8 +2063,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
2040 | 2063 | ||
2041 | ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, | 2064 | ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, |
2042 | chunk_offset); | 2065 | chunk_offset); |
2043 | if (ret) | 2066 | if (ret && ret != -ENOSPC) |
2044 | goto done; | 2067 | goto done; |
2068 | if (ret == -ENOSPC) | ||
2069 | failed++; | ||
2070 | key.offset -= 1; | ||
2071 | } | ||
2072 | |||
2073 | if (failed && !retried) { | ||
2074 | failed = 0; | ||
2075 | retried = true; | ||
2076 | goto again; | ||
2077 | } else if (failed && retried) { | ||
2078 | ret = -ENOSPC; | ||
2079 | lock_chunks(root); | ||
2080 | |||
2081 | device->total_bytes = old_size; | ||
2082 | if (device->writeable) | ||
2083 | device->fs_devices->total_rw_bytes += diff; | ||
2084 | unlock_chunks(root); | ||
2085 | goto done; | ||
2045 | } | 2086 | } |
2046 | 2087 | ||
2047 | /* Shrinking succeeded, else we would be at "done". */ | 2088 | /* Shrinking succeeded, else we would be at "done". */ |
@@ -2294,9 +2335,9 @@ again: | |||
2294 | em->block_len = em->len; | 2335 | em->block_len = em->len; |
2295 | 2336 | ||
2296 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; | 2337 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; |
2297 | spin_lock(&em_tree->lock); | 2338 | write_lock(&em_tree->lock); |
2298 | ret = add_extent_mapping(em_tree, em); | 2339 | ret = add_extent_mapping(em_tree, em); |
2299 | spin_unlock(&em_tree->lock); | 2340 | write_unlock(&em_tree->lock); |
2300 | BUG_ON(ret); | 2341 | BUG_ON(ret); |
2301 | free_extent_map(em); | 2342 | free_extent_map(em); |
2302 | 2343 | ||
@@ -2491,9 +2532,9 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) | |||
2491 | int readonly = 0; | 2532 | int readonly = 0; |
2492 | int i; | 2533 | int i; |
2493 | 2534 | ||
2494 | spin_lock(&map_tree->map_tree.lock); | 2535 | read_lock(&map_tree->map_tree.lock); |
2495 | em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); | 2536 | em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); |
2496 | spin_unlock(&map_tree->map_tree.lock); | 2537 | read_unlock(&map_tree->map_tree.lock); |
2497 | if (!em) | 2538 | if (!em) |
2498 | return 1; | 2539 | return 1; |
2499 | 2540 | ||
@@ -2518,11 +2559,11 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) | |||
2518 | struct extent_map *em; | 2559 | struct extent_map *em; |
2519 | 2560 | ||
2520 | while (1) { | 2561 | while (1) { |
2521 | spin_lock(&tree->map_tree.lock); | 2562 | write_lock(&tree->map_tree.lock); |
2522 | em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); | 2563 | em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); |
2523 | if (em) | 2564 | if (em) |
2524 | remove_extent_mapping(&tree->map_tree, em); | 2565 | remove_extent_mapping(&tree->map_tree, em); |
2525 | spin_unlock(&tree->map_tree.lock); | 2566 | write_unlock(&tree->map_tree.lock); |
2526 | if (!em) | 2567 | if (!em) |
2527 | break; | 2568 | break; |
2528 | kfree(em->bdev); | 2569 | kfree(em->bdev); |
@@ -2540,9 +2581,9 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) | |||
2540 | struct extent_map_tree *em_tree = &map_tree->map_tree; | 2581 | struct extent_map_tree *em_tree = &map_tree->map_tree; |
2541 | int ret; | 2582 | int ret; |
2542 | 2583 | ||
2543 | spin_lock(&em_tree->lock); | 2584 | read_lock(&em_tree->lock); |
2544 | em = lookup_extent_mapping(em_tree, logical, len); | 2585 | em = lookup_extent_mapping(em_tree, logical, len); |
2545 | spin_unlock(&em_tree->lock); | 2586 | read_unlock(&em_tree->lock); |
2546 | BUG_ON(!em); | 2587 | BUG_ON(!em); |
2547 | 2588 | ||
2548 | BUG_ON(em->start > logical || em->start + em->len < logical); | 2589 | BUG_ON(em->start > logical || em->start + em->len < logical); |
@@ -2604,9 +2645,9 @@ again: | |||
2604 | atomic_set(&multi->error, 0); | 2645 | atomic_set(&multi->error, 0); |
2605 | } | 2646 | } |
2606 | 2647 | ||
2607 | spin_lock(&em_tree->lock); | 2648 | read_lock(&em_tree->lock); |
2608 | em = lookup_extent_mapping(em_tree, logical, *length); | 2649 | em = lookup_extent_mapping(em_tree, logical, *length); |
2609 | spin_unlock(&em_tree->lock); | 2650 | read_unlock(&em_tree->lock); |
2610 | 2651 | ||
2611 | if (!em && unplug_page) | 2652 | if (!em && unplug_page) |
2612 | return 0; | 2653 | return 0; |
@@ -2763,9 +2804,9 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
2763 | u64 stripe_nr; | 2804 | u64 stripe_nr; |
2764 | int i, j, nr = 0; | 2805 | int i, j, nr = 0; |
2765 | 2806 | ||
2766 | spin_lock(&em_tree->lock); | 2807 | read_lock(&em_tree->lock); |
2767 | em = lookup_extent_mapping(em_tree, chunk_start, 1); | 2808 | em = lookup_extent_mapping(em_tree, chunk_start, 1); |
2768 | spin_unlock(&em_tree->lock); | 2809 | read_unlock(&em_tree->lock); |
2769 | 2810 | ||
2770 | BUG_ON(!em || em->start != chunk_start); | 2811 | BUG_ON(!em || em->start != chunk_start); |
2771 | map = (struct map_lookup *)em->bdev; | 2812 | map = (struct map_lookup *)em->bdev; |
@@ -3053,9 +3094,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
3053 | logical = key->offset; | 3094 | logical = key->offset; |
3054 | length = btrfs_chunk_length(leaf, chunk); | 3095 | length = btrfs_chunk_length(leaf, chunk); |
3055 | 3096 | ||
3056 | spin_lock(&map_tree->map_tree.lock); | 3097 | read_lock(&map_tree->map_tree.lock); |
3057 | em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); | 3098 | em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); |
3058 | spin_unlock(&map_tree->map_tree.lock); | 3099 | read_unlock(&map_tree->map_tree.lock); |
3059 | 3100 | ||
3060 | /* already mapped? */ | 3101 | /* already mapped? */ |
3061 | if (em && em->start <= logical && em->start + em->len > logical) { | 3102 | if (em && em->start <= logical && em->start + em->len > logical) { |
@@ -3114,9 +3155,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
3114 | map->stripes[i].dev->in_fs_metadata = 1; | 3155 | map->stripes[i].dev->in_fs_metadata = 1; |
3115 | } | 3156 | } |
3116 | 3157 | ||
3117 | spin_lock(&map_tree->map_tree.lock); | 3158 | write_lock(&map_tree->map_tree.lock); |
3118 | ret = add_extent_mapping(&map_tree->map_tree, em); | 3159 | ret = add_extent_mapping(&map_tree->map_tree, em); |
3119 | spin_unlock(&map_tree->map_tree.lock); | 3160 | write_unlock(&map_tree->map_tree.lock); |
3120 | BUG_ON(ret); | 3161 | BUG_ON(ret); |
3121 | free_extent_map(em); | 3162 | free_extent_map(em); |
3122 | 3163 | ||
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 5139a833f721..31b0fabdd2ea 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -181,4 +181,7 @@ int btrfs_balance(struct btrfs_root *dev_root); | |||
181 | void btrfs_unlock_volumes(void); | 181 | void btrfs_unlock_volumes(void); |
182 | void btrfs_lock_volumes(void); | 182 | void btrfs_lock_volumes(void); |
183 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); | 183 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); |
184 | int find_free_dev_extent(struct btrfs_trans_handle *trans, | ||
185 | struct btrfs_device *device, u64 num_bytes, | ||
186 | u64 *start, u64 *max_avail); | ||
184 | #endif | 187 | #endif |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index a9d3bf4d2689..b6dd5967c48a 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -260,7 +260,7 @@ err: | |||
260 | * attributes are handled directly. | 260 | * attributes are handled directly. |
261 | */ | 261 | */ |
262 | struct xattr_handler *btrfs_xattr_handlers[] = { | 262 | struct xattr_handler *btrfs_xattr_handlers[] = { |
263 | #ifdef CONFIG_FS_POSIX_ACL | 263 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
264 | &btrfs_xattr_acl_access_handler, | 264 | &btrfs_xattr_acl_access_handler, |
265 | &btrfs_xattr_acl_default_handler, | 265 | &btrfs_xattr_acl_default_handler, |
266 | #endif | 266 | #endif |