diff options
Diffstat (limited to 'fs/btrfs')
35 files changed, 4185 insertions, 2365 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index f128427b995b..361604244271 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
| @@ -27,7 +27,7 @@ | |||
| 27 | #include "btrfs_inode.h" | 27 | #include "btrfs_inode.h" |
| 28 | #include "xattr.h" | 28 | #include "xattr.h" |
| 29 | 29 | ||
| 30 | #ifdef CONFIG_FS_POSIX_ACL | 30 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
| 31 | 31 | ||
| 32 | static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | 32 | static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) |
| 33 | { | 33 | { |
| @@ -313,7 +313,7 @@ struct xattr_handler btrfs_xattr_acl_access_handler = { | |||
| 313 | .set = btrfs_xattr_acl_access_set, | 313 | .set = btrfs_xattr_acl_access_set, |
| 314 | }; | 314 | }; |
| 315 | 315 | ||
| 316 | #else /* CONFIG_FS_POSIX_ACL */ | 316 | #else /* CONFIG_BTRFS_FS_POSIX_ACL */ |
| 317 | 317 | ||
| 318 | int btrfs_acl_chmod(struct inode *inode) | 318 | int btrfs_acl_chmod(struct inode *inode) |
| 319 | { | 319 | { |
| @@ -325,4 +325,4 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir) | |||
| 325 | return 0; | 325 | return 0; |
| 326 | } | 326 | } |
| 327 | 327 | ||
| 328 | #endif /* CONFIG_FS_POSIX_ACL */ | 328 | #endif /* CONFIG_BTRFS_FS_POSIX_ACL */ |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 019e8af449ab..c0861e781cdb 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
| @@ -48,6 +48,9 @@ struct btrfs_worker_thread { | |||
| 48 | /* number of things on the pending list */ | 48 | /* number of things on the pending list */ |
| 49 | atomic_t num_pending; | 49 | atomic_t num_pending; |
| 50 | 50 | ||
| 51 | /* reference counter for this struct */ | ||
| 52 | atomic_t refs; | ||
| 53 | |||
| 51 | unsigned long sequence; | 54 | unsigned long sequence; |
| 52 | 55 | ||
| 53 | /* protects the pending list. */ | 56 | /* protects the pending list. */ |
| @@ -61,6 +64,51 @@ struct btrfs_worker_thread { | |||
| 61 | }; | 64 | }; |
| 62 | 65 | ||
| 63 | /* | 66 | /* |
| 67 | * btrfs_start_workers uses kthread_run, which can block waiting for memory | ||
| 68 | * for a very long time. It will actually throttle on page writeback, | ||
| 69 | * and so it may not make progress until after our btrfs worker threads | ||
| 70 | * process all of the pending work structs in their queue | ||
| 71 | * | ||
| 72 | * This means we can't use btrfs_start_workers from inside a btrfs worker | ||
| 73 | * thread that is used as part of cleaning dirty memory, which pretty much | ||
| 74 | * involves all of the worker threads. | ||
| 75 | * | ||
| 76 | * Instead we have a helper queue who never has more than one thread | ||
| 77 | * where we scheduler thread start operations. This worker_start struct | ||
| 78 | * is used to contain the work and hold a pointer to the queue that needs | ||
| 79 | * another worker. | ||
| 80 | */ | ||
| 81 | struct worker_start { | ||
| 82 | struct btrfs_work work; | ||
| 83 | struct btrfs_workers *queue; | ||
| 84 | }; | ||
| 85 | |||
| 86 | static void start_new_worker_func(struct btrfs_work *work) | ||
| 87 | { | ||
| 88 | struct worker_start *start; | ||
| 89 | start = container_of(work, struct worker_start, work); | ||
| 90 | btrfs_start_workers(start->queue, 1); | ||
| 91 | kfree(start); | ||
| 92 | } | ||
| 93 | |||
| 94 | static int start_new_worker(struct btrfs_workers *queue) | ||
| 95 | { | ||
| 96 | struct worker_start *start; | ||
| 97 | int ret; | ||
| 98 | |||
| 99 | start = kzalloc(sizeof(*start), GFP_NOFS); | ||
| 100 | if (!start) | ||
| 101 | return -ENOMEM; | ||
| 102 | |||
| 103 | start->work.func = start_new_worker_func; | ||
| 104 | start->queue = queue; | ||
| 105 | ret = btrfs_queue_worker(queue->atomic_worker_start, &start->work); | ||
| 106 | if (ret) | ||
| 107 | kfree(start); | ||
| 108 | return ret; | ||
| 109 | } | ||
| 110 | |||
| 111 | /* | ||
| 64 | * helper function to move a thread onto the idle list after it | 112 | * helper function to move a thread onto the idle list after it |
| 65 | * has finished some requests. | 113 | * has finished some requests. |
| 66 | */ | 114 | */ |
| @@ -71,7 +119,12 @@ static void check_idle_worker(struct btrfs_worker_thread *worker) | |||
| 71 | unsigned long flags; | 119 | unsigned long flags; |
| 72 | spin_lock_irqsave(&worker->workers->lock, flags); | 120 | spin_lock_irqsave(&worker->workers->lock, flags); |
| 73 | worker->idle = 1; | 121 | worker->idle = 1; |
| 74 | list_move(&worker->worker_list, &worker->workers->idle_list); | 122 | |
| 123 | /* the list may be empty if the worker is just starting */ | ||
| 124 | if (!list_empty(&worker->worker_list)) { | ||
| 125 | list_move(&worker->worker_list, | ||
| 126 | &worker->workers->idle_list); | ||
| 127 | } | ||
| 75 | spin_unlock_irqrestore(&worker->workers->lock, flags); | 128 | spin_unlock_irqrestore(&worker->workers->lock, flags); |
| 76 | } | 129 | } |
| 77 | } | 130 | } |
| @@ -87,23 +140,51 @@ static void check_busy_worker(struct btrfs_worker_thread *worker) | |||
| 87 | unsigned long flags; | 140 | unsigned long flags; |
| 88 | spin_lock_irqsave(&worker->workers->lock, flags); | 141 | spin_lock_irqsave(&worker->workers->lock, flags); |
| 89 | worker->idle = 0; | 142 | worker->idle = 0; |
| 90 | list_move_tail(&worker->worker_list, | 143 | |
| 91 | &worker->workers->worker_list); | 144 | if (!list_empty(&worker->worker_list)) { |
| 145 | list_move_tail(&worker->worker_list, | ||
| 146 | &worker->workers->worker_list); | ||
| 147 | } | ||
| 92 | spin_unlock_irqrestore(&worker->workers->lock, flags); | 148 | spin_unlock_irqrestore(&worker->workers->lock, flags); |
| 93 | } | 149 | } |
| 94 | } | 150 | } |
| 95 | 151 | ||
| 96 | static noinline int run_ordered_completions(struct btrfs_workers *workers, | 152 | static void check_pending_worker_creates(struct btrfs_worker_thread *worker) |
| 97 | struct btrfs_work *work) | ||
| 98 | { | 153 | { |
| 154 | struct btrfs_workers *workers = worker->workers; | ||
| 99 | unsigned long flags; | 155 | unsigned long flags; |
| 100 | 156 | ||
| 157 | rmb(); | ||
| 158 | if (!workers->atomic_start_pending) | ||
| 159 | return; | ||
| 160 | |||
| 161 | spin_lock_irqsave(&workers->lock, flags); | ||
| 162 | if (!workers->atomic_start_pending) | ||
| 163 | goto out; | ||
| 164 | |||
| 165 | workers->atomic_start_pending = 0; | ||
| 166 | if (workers->num_workers + workers->num_workers_starting >= | ||
| 167 | workers->max_workers) | ||
| 168 | goto out; | ||
| 169 | |||
| 170 | workers->num_workers_starting += 1; | ||
| 171 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 172 | start_new_worker(workers); | ||
| 173 | return; | ||
| 174 | |||
| 175 | out: | ||
| 176 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 177 | } | ||
| 178 | |||
| 179 | static noinline int run_ordered_completions(struct btrfs_workers *workers, | ||
| 180 | struct btrfs_work *work) | ||
| 181 | { | ||
| 101 | if (!workers->ordered) | 182 | if (!workers->ordered) |
| 102 | return 0; | 183 | return 0; |
| 103 | 184 | ||
| 104 | set_bit(WORK_DONE_BIT, &work->flags); | 185 | set_bit(WORK_DONE_BIT, &work->flags); |
| 105 | 186 | ||
| 106 | spin_lock_irqsave(&workers->lock, flags); | 187 | spin_lock(&workers->order_lock); |
| 107 | 188 | ||
| 108 | while (1) { | 189 | while (1) { |
| 109 | if (!list_empty(&workers->prio_order_list)) { | 190 | if (!list_empty(&workers->prio_order_list)) { |
| @@ -126,45 +207,118 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers, | |||
| 126 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) | 207 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) |
| 127 | break; | 208 | break; |
| 128 | 209 | ||
| 129 | spin_unlock_irqrestore(&workers->lock, flags); | 210 | spin_unlock(&workers->order_lock); |
| 130 | 211 | ||
| 131 | work->ordered_func(work); | 212 | work->ordered_func(work); |
| 132 | 213 | ||
| 133 | /* now take the lock again and call the freeing code */ | 214 | /* now take the lock again and call the freeing code */ |
| 134 | spin_lock_irqsave(&workers->lock, flags); | 215 | spin_lock(&workers->order_lock); |
| 135 | list_del(&work->order_list); | 216 | list_del(&work->order_list); |
| 136 | work->ordered_free(work); | 217 | work->ordered_free(work); |
| 137 | } | 218 | } |
| 138 | 219 | ||
| 139 | spin_unlock_irqrestore(&workers->lock, flags); | 220 | spin_unlock(&workers->order_lock); |
| 140 | return 0; | 221 | return 0; |
| 141 | } | 222 | } |
| 142 | 223 | ||
| 224 | static void put_worker(struct btrfs_worker_thread *worker) | ||
| 225 | { | ||
| 226 | if (atomic_dec_and_test(&worker->refs)) | ||
| 227 | kfree(worker); | ||
| 228 | } | ||
| 229 | |||
| 230 | static int try_worker_shutdown(struct btrfs_worker_thread *worker) | ||
| 231 | { | ||
| 232 | int freeit = 0; | ||
| 233 | |||
| 234 | spin_lock_irq(&worker->lock); | ||
| 235 | spin_lock(&worker->workers->lock); | ||
| 236 | if (worker->workers->num_workers > 1 && | ||
| 237 | worker->idle && | ||
| 238 | !worker->working && | ||
| 239 | !list_empty(&worker->worker_list) && | ||
| 240 | list_empty(&worker->prio_pending) && | ||
| 241 | list_empty(&worker->pending) && | ||
| 242 | atomic_read(&worker->num_pending) == 0) { | ||
| 243 | freeit = 1; | ||
| 244 | list_del_init(&worker->worker_list); | ||
| 245 | worker->workers->num_workers--; | ||
| 246 | } | ||
| 247 | spin_unlock(&worker->workers->lock); | ||
| 248 | spin_unlock_irq(&worker->lock); | ||
| 249 | |||
| 250 | if (freeit) | ||
| 251 | put_worker(worker); | ||
| 252 | return freeit; | ||
| 253 | } | ||
| 254 | |||
| 255 | static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker, | ||
| 256 | struct list_head *prio_head, | ||
| 257 | struct list_head *head) | ||
| 258 | { | ||
| 259 | struct btrfs_work *work = NULL; | ||
| 260 | struct list_head *cur = NULL; | ||
| 261 | |||
| 262 | if(!list_empty(prio_head)) | ||
| 263 | cur = prio_head->next; | ||
| 264 | |||
| 265 | smp_mb(); | ||
| 266 | if (!list_empty(&worker->prio_pending)) | ||
| 267 | goto refill; | ||
| 268 | |||
| 269 | if (!list_empty(head)) | ||
| 270 | cur = head->next; | ||
| 271 | |||
| 272 | if (cur) | ||
| 273 | goto out; | ||
| 274 | |||
| 275 | refill: | ||
| 276 | spin_lock_irq(&worker->lock); | ||
| 277 | list_splice_tail_init(&worker->prio_pending, prio_head); | ||
| 278 | list_splice_tail_init(&worker->pending, head); | ||
| 279 | |||
| 280 | if (!list_empty(prio_head)) | ||
| 281 | cur = prio_head->next; | ||
| 282 | else if (!list_empty(head)) | ||
| 283 | cur = head->next; | ||
| 284 | spin_unlock_irq(&worker->lock); | ||
| 285 | |||
| 286 | if (!cur) | ||
| 287 | goto out_fail; | ||
| 288 | |||
| 289 | out: | ||
| 290 | work = list_entry(cur, struct btrfs_work, list); | ||
| 291 | |||
| 292 | out_fail: | ||
| 293 | return work; | ||
| 294 | } | ||
| 295 | |||
| 143 | /* | 296 | /* |
| 144 | * main loop for servicing work items | 297 | * main loop for servicing work items |
| 145 | */ | 298 | */ |
| 146 | static int worker_loop(void *arg) | 299 | static int worker_loop(void *arg) |
| 147 | { | 300 | { |
| 148 | struct btrfs_worker_thread *worker = arg; | 301 | struct btrfs_worker_thread *worker = arg; |
| 149 | struct list_head *cur; | 302 | struct list_head head; |
| 303 | struct list_head prio_head; | ||
| 150 | struct btrfs_work *work; | 304 | struct btrfs_work *work; |
| 305 | |||
| 306 | INIT_LIST_HEAD(&head); | ||
| 307 | INIT_LIST_HEAD(&prio_head); | ||
| 308 | |||
| 151 | do { | 309 | do { |
| 152 | spin_lock_irq(&worker->lock); | 310 | again: |
| 153 | again_locked: | ||
| 154 | while (1) { | 311 | while (1) { |
| 155 | if (!list_empty(&worker->prio_pending)) | 312 | |
| 156 | cur = worker->prio_pending.next; | 313 | |
| 157 | else if (!list_empty(&worker->pending)) | 314 | work = get_next_work(worker, &prio_head, &head); |
| 158 | cur = worker->pending.next; | 315 | if (!work) |
| 159 | else | ||
| 160 | break; | 316 | break; |
| 161 | 317 | ||
| 162 | work = list_entry(cur, struct btrfs_work, list); | ||
| 163 | list_del(&work->list); | 318 | list_del(&work->list); |
| 164 | clear_bit(WORK_QUEUED_BIT, &work->flags); | 319 | clear_bit(WORK_QUEUED_BIT, &work->flags); |
| 165 | 320 | ||
| 166 | work->worker = worker; | 321 | work->worker = worker; |
| 167 | spin_unlock_irq(&worker->lock); | ||
| 168 | 322 | ||
| 169 | work->func(work); | 323 | work->func(work); |
| 170 | 324 | ||
| @@ -175,9 +329,13 @@ again_locked: | |||
| 175 | */ | 329 | */ |
| 176 | run_ordered_completions(worker->workers, work); | 330 | run_ordered_completions(worker->workers, work); |
| 177 | 331 | ||
| 178 | spin_lock_irq(&worker->lock); | 332 | check_pending_worker_creates(worker); |
| 179 | check_idle_worker(worker); | 333 | |
| 180 | } | 334 | } |
| 335 | |||
| 336 | spin_lock_irq(&worker->lock); | ||
| 337 | check_idle_worker(worker); | ||
| 338 | |||
| 181 | if (freezing(current)) { | 339 | if (freezing(current)) { |
| 182 | worker->working = 0; | 340 | worker->working = 0; |
| 183 | spin_unlock_irq(&worker->lock); | 341 | spin_unlock_irq(&worker->lock); |
| @@ -216,8 +374,10 @@ again_locked: | |||
| 216 | spin_lock_irq(&worker->lock); | 374 | spin_lock_irq(&worker->lock); |
| 217 | set_current_state(TASK_INTERRUPTIBLE); | 375 | set_current_state(TASK_INTERRUPTIBLE); |
| 218 | if (!list_empty(&worker->pending) || | 376 | if (!list_empty(&worker->pending) || |
| 219 | !list_empty(&worker->prio_pending)) | 377 | !list_empty(&worker->prio_pending)) { |
| 220 | goto again_locked; | 378 | spin_unlock_irq(&worker->lock); |
| 379 | goto again; | ||
| 380 | } | ||
| 221 | 381 | ||
| 222 | /* | 382 | /* |
| 223 | * this makes sure we get a wakeup when someone | 383 | * this makes sure we get a wakeup when someone |
| @@ -226,8 +386,13 @@ again_locked: | |||
| 226 | worker->working = 0; | 386 | worker->working = 0; |
| 227 | spin_unlock_irq(&worker->lock); | 387 | spin_unlock_irq(&worker->lock); |
| 228 | 388 | ||
| 229 | if (!kthread_should_stop()) | 389 | if (!kthread_should_stop()) { |
| 230 | schedule(); | 390 | schedule_timeout(HZ * 120); |
| 391 | if (!worker->working && | ||
| 392 | try_worker_shutdown(worker)) { | ||
| 393 | return 0; | ||
| 394 | } | ||
| 395 | } | ||
| 231 | } | 396 | } |
| 232 | __set_current_state(TASK_RUNNING); | 397 | __set_current_state(TASK_RUNNING); |
| 233 | } | 398 | } |
| @@ -242,41 +407,61 @@ int btrfs_stop_workers(struct btrfs_workers *workers) | |||
| 242 | { | 407 | { |
| 243 | struct list_head *cur; | 408 | struct list_head *cur; |
| 244 | struct btrfs_worker_thread *worker; | 409 | struct btrfs_worker_thread *worker; |
| 410 | int can_stop; | ||
| 245 | 411 | ||
| 412 | spin_lock_irq(&workers->lock); | ||
| 246 | list_splice_init(&workers->idle_list, &workers->worker_list); | 413 | list_splice_init(&workers->idle_list, &workers->worker_list); |
| 247 | while (!list_empty(&workers->worker_list)) { | 414 | while (!list_empty(&workers->worker_list)) { |
| 248 | cur = workers->worker_list.next; | 415 | cur = workers->worker_list.next; |
| 249 | worker = list_entry(cur, struct btrfs_worker_thread, | 416 | worker = list_entry(cur, struct btrfs_worker_thread, |
| 250 | worker_list); | 417 | worker_list); |
| 251 | kthread_stop(worker->task); | 418 | |
| 252 | list_del(&worker->worker_list); | 419 | atomic_inc(&worker->refs); |
| 253 | kfree(worker); | 420 | workers->num_workers -= 1; |
| 421 | if (!list_empty(&worker->worker_list)) { | ||
| 422 | list_del_init(&worker->worker_list); | ||
| 423 | put_worker(worker); | ||
| 424 | can_stop = 1; | ||
| 425 | } else | ||
| 426 | can_stop = 0; | ||
| 427 | spin_unlock_irq(&workers->lock); | ||
| 428 | if (can_stop) | ||
| 429 | kthread_stop(worker->task); | ||
| 430 | spin_lock_irq(&workers->lock); | ||
| 431 | put_worker(worker); | ||
| 254 | } | 432 | } |
| 433 | spin_unlock_irq(&workers->lock); | ||
| 255 | return 0; | 434 | return 0; |
| 256 | } | 435 | } |
| 257 | 436 | ||
| 258 | /* | 437 | /* |
| 259 | * simple init on struct btrfs_workers | 438 | * simple init on struct btrfs_workers |
| 260 | */ | 439 | */ |
| 261 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max) | 440 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, |
| 441 | struct btrfs_workers *async_helper) | ||
| 262 | { | 442 | { |
| 263 | workers->num_workers = 0; | 443 | workers->num_workers = 0; |
| 444 | workers->num_workers_starting = 0; | ||
| 264 | INIT_LIST_HEAD(&workers->worker_list); | 445 | INIT_LIST_HEAD(&workers->worker_list); |
| 265 | INIT_LIST_HEAD(&workers->idle_list); | 446 | INIT_LIST_HEAD(&workers->idle_list); |
| 266 | INIT_LIST_HEAD(&workers->order_list); | 447 | INIT_LIST_HEAD(&workers->order_list); |
| 267 | INIT_LIST_HEAD(&workers->prio_order_list); | 448 | INIT_LIST_HEAD(&workers->prio_order_list); |
| 268 | spin_lock_init(&workers->lock); | 449 | spin_lock_init(&workers->lock); |
| 450 | spin_lock_init(&workers->order_lock); | ||
| 269 | workers->max_workers = max; | 451 | workers->max_workers = max; |
| 270 | workers->idle_thresh = 32; | 452 | workers->idle_thresh = 32; |
| 271 | workers->name = name; | 453 | workers->name = name; |
| 272 | workers->ordered = 0; | 454 | workers->ordered = 0; |
| 455 | workers->atomic_start_pending = 0; | ||
| 456 | workers->atomic_worker_start = async_helper; | ||
| 273 | } | 457 | } |
| 274 | 458 | ||
| 275 | /* | 459 | /* |
| 276 | * starts new worker threads. This does not enforce the max worker | 460 | * starts new worker threads. This does not enforce the max worker |
| 277 | * count in case you need to temporarily go past it. | 461 | * count in case you need to temporarily go past it. |
| 278 | */ | 462 | */ |
| 279 | int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | 463 | static int __btrfs_start_workers(struct btrfs_workers *workers, |
| 464 | int num_workers) | ||
| 280 | { | 465 | { |
| 281 | struct btrfs_worker_thread *worker; | 466 | struct btrfs_worker_thread *worker; |
| 282 | int ret = 0; | 467 | int ret = 0; |
| @@ -293,7 +478,9 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | |||
| 293 | INIT_LIST_HEAD(&worker->prio_pending); | 478 | INIT_LIST_HEAD(&worker->prio_pending); |
| 294 | INIT_LIST_HEAD(&worker->worker_list); | 479 | INIT_LIST_HEAD(&worker->worker_list); |
| 295 | spin_lock_init(&worker->lock); | 480 | spin_lock_init(&worker->lock); |
| 481 | |||
| 296 | atomic_set(&worker->num_pending, 0); | 482 | atomic_set(&worker->num_pending, 0); |
| 483 | atomic_set(&worker->refs, 1); | ||
| 297 | worker->workers = workers; | 484 | worker->workers = workers; |
| 298 | worker->task = kthread_run(worker_loop, worker, | 485 | worker->task = kthread_run(worker_loop, worker, |
| 299 | "btrfs-%s-%d", workers->name, | 486 | "btrfs-%s-%d", workers->name, |
| @@ -303,11 +490,12 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | |||
| 303 | kfree(worker); | 490 | kfree(worker); |
| 304 | goto fail; | 491 | goto fail; |
| 305 | } | 492 | } |
| 306 | |||
| 307 | spin_lock_irq(&workers->lock); | 493 | spin_lock_irq(&workers->lock); |
| 308 | list_add_tail(&worker->worker_list, &workers->idle_list); | 494 | list_add_tail(&worker->worker_list, &workers->idle_list); |
| 309 | worker->idle = 1; | 495 | worker->idle = 1; |
| 310 | workers->num_workers++; | 496 | workers->num_workers++; |
| 497 | workers->num_workers_starting--; | ||
| 498 | WARN_ON(workers->num_workers_starting < 0); | ||
| 311 | spin_unlock_irq(&workers->lock); | 499 | spin_unlock_irq(&workers->lock); |
| 312 | } | 500 | } |
| 313 | return 0; | 501 | return 0; |
| @@ -316,6 +504,14 @@ fail: | |||
| 316 | return ret; | 504 | return ret; |
| 317 | } | 505 | } |
| 318 | 506 | ||
| 507 | int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | ||
| 508 | { | ||
| 509 | spin_lock_irq(&workers->lock); | ||
| 510 | workers->num_workers_starting += num_workers; | ||
| 511 | spin_unlock_irq(&workers->lock); | ||
| 512 | return __btrfs_start_workers(workers, num_workers); | ||
| 513 | } | ||
| 514 | |||
| 319 | /* | 515 | /* |
| 320 | * run through the list and find a worker thread that doesn't have a lot | 516 | * run through the list and find a worker thread that doesn't have a lot |
| 321 | * to do right now. This can return null if we aren't yet at the thread | 517 | * to do right now. This can return null if we aren't yet at the thread |
| @@ -325,7 +521,10 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) | |||
| 325 | { | 521 | { |
| 326 | struct btrfs_worker_thread *worker; | 522 | struct btrfs_worker_thread *worker; |
| 327 | struct list_head *next; | 523 | struct list_head *next; |
| 328 | int enforce_min = workers->num_workers < workers->max_workers; | 524 | int enforce_min; |
| 525 | |||
| 526 | enforce_min = (workers->num_workers + workers->num_workers_starting) < | ||
| 527 | workers->max_workers; | ||
| 329 | 528 | ||
| 330 | /* | 529 | /* |
| 331 | * if we find an idle thread, don't move it to the end of the | 530 | * if we find an idle thread, don't move it to the end of the |
| @@ -350,7 +549,6 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) | |||
| 350 | */ | 549 | */ |
| 351 | next = workers->worker_list.next; | 550 | next = workers->worker_list.next; |
| 352 | worker = list_entry(next, struct btrfs_worker_thread, worker_list); | 551 | worker = list_entry(next, struct btrfs_worker_thread, worker_list); |
| 353 | atomic_inc(&worker->num_pending); | ||
| 354 | worker->sequence++; | 552 | worker->sequence++; |
| 355 | 553 | ||
| 356 | if (worker->sequence % workers->idle_thresh == 0) | 554 | if (worker->sequence % workers->idle_thresh == 0) |
| @@ -367,35 +565,49 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) | |||
| 367 | { | 565 | { |
| 368 | struct btrfs_worker_thread *worker; | 566 | struct btrfs_worker_thread *worker; |
| 369 | unsigned long flags; | 567 | unsigned long flags; |
| 568 | struct list_head *fallback; | ||
| 370 | 569 | ||
| 371 | again: | 570 | again: |
| 372 | spin_lock_irqsave(&workers->lock, flags); | 571 | spin_lock_irqsave(&workers->lock, flags); |
| 373 | worker = next_worker(workers); | 572 | worker = next_worker(workers); |
| 374 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 375 | 573 | ||
| 376 | if (!worker) { | 574 | if (!worker) { |
| 377 | spin_lock_irqsave(&workers->lock, flags); | 575 | if (workers->num_workers + workers->num_workers_starting >= |
| 378 | if (workers->num_workers >= workers->max_workers) { | 576 | workers->max_workers) { |
| 379 | struct list_head *fallback = NULL; | 577 | goto fallback; |
| 380 | /* | 578 | } else if (workers->atomic_worker_start) { |
| 381 | * we have failed to find any workers, just | 579 | workers->atomic_start_pending = 1; |
| 382 | * return the force one | 580 | goto fallback; |
| 383 | */ | ||
| 384 | if (!list_empty(&workers->worker_list)) | ||
| 385 | fallback = workers->worker_list.next; | ||
| 386 | if (!list_empty(&workers->idle_list)) | ||
| 387 | fallback = workers->idle_list.next; | ||
| 388 | BUG_ON(!fallback); | ||
| 389 | worker = list_entry(fallback, | ||
| 390 | struct btrfs_worker_thread, worker_list); | ||
| 391 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 392 | } else { | 581 | } else { |
| 582 | workers->num_workers_starting++; | ||
| 393 | spin_unlock_irqrestore(&workers->lock, flags); | 583 | spin_unlock_irqrestore(&workers->lock, flags); |
| 394 | /* we're below the limit, start another worker */ | 584 | /* we're below the limit, start another worker */ |
| 395 | btrfs_start_workers(workers, 1); | 585 | __btrfs_start_workers(workers, 1); |
| 396 | goto again; | 586 | goto again; |
| 397 | } | 587 | } |
| 398 | } | 588 | } |
| 589 | goto found; | ||
| 590 | |||
| 591 | fallback: | ||
| 592 | fallback = NULL; | ||
| 593 | /* | ||
| 594 | * we have failed to find any workers, just | ||
| 595 | * return the first one we can find. | ||
| 596 | */ | ||
| 597 | if (!list_empty(&workers->worker_list)) | ||
| 598 | fallback = workers->worker_list.next; | ||
| 599 | if (!list_empty(&workers->idle_list)) | ||
| 600 | fallback = workers->idle_list.next; | ||
| 601 | BUG_ON(!fallback); | ||
| 602 | worker = list_entry(fallback, | ||
| 603 | struct btrfs_worker_thread, worker_list); | ||
| 604 | found: | ||
| 605 | /* | ||
| 606 | * this makes sure the worker doesn't exit before it is placed | ||
| 607 | * onto a busy/idle list | ||
| 608 | */ | ||
| 609 | atomic_inc(&worker->num_pending); | ||
| 610 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 399 | return worker; | 611 | return worker; |
| 400 | } | 612 | } |
| 401 | 613 | ||
| @@ -427,7 +639,7 @@ int btrfs_requeue_work(struct btrfs_work *work) | |||
| 427 | spin_lock(&worker->workers->lock); | 639 | spin_lock(&worker->workers->lock); |
| 428 | worker->idle = 0; | 640 | worker->idle = 0; |
| 429 | list_move_tail(&worker->worker_list, | 641 | list_move_tail(&worker->worker_list, |
| 430 | &worker->workers->worker_list); | 642 | &worker->workers->worker_list); |
| 431 | spin_unlock(&worker->workers->lock); | 643 | spin_unlock(&worker->workers->lock); |
| 432 | } | 644 | } |
| 433 | if (!worker->working) { | 645 | if (!worker->working) { |
| @@ -435,9 +647,9 @@ int btrfs_requeue_work(struct btrfs_work *work) | |||
| 435 | worker->working = 1; | 647 | worker->working = 1; |
| 436 | } | 648 | } |
| 437 | 649 | ||
| 438 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 439 | if (wake) | 650 | if (wake) |
| 440 | wake_up_process(worker->task); | 651 | wake_up_process(worker->task); |
| 652 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 441 | out: | 653 | out: |
| 442 | 654 | ||
| 443 | return 0; | 655 | return 0; |
| @@ -463,14 +675,18 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
| 463 | 675 | ||
| 464 | worker = find_worker(workers); | 676 | worker = find_worker(workers); |
| 465 | if (workers->ordered) { | 677 | if (workers->ordered) { |
| 466 | spin_lock_irqsave(&workers->lock, flags); | 678 | /* |
| 679 | * you're not allowed to do ordered queues from an | ||
| 680 | * interrupt handler | ||
| 681 | */ | ||
| 682 | spin_lock(&workers->order_lock); | ||
| 467 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { | 683 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { |
| 468 | list_add_tail(&work->order_list, | 684 | list_add_tail(&work->order_list, |
| 469 | &workers->prio_order_list); | 685 | &workers->prio_order_list); |
| 470 | } else { | 686 | } else { |
| 471 | list_add_tail(&work->order_list, &workers->order_list); | 687 | list_add_tail(&work->order_list, &workers->order_list); |
| 472 | } | 688 | } |
| 473 | spin_unlock_irqrestore(&workers->lock, flags); | 689 | spin_unlock(&workers->order_lock); |
| 474 | } else { | 690 | } else { |
| 475 | INIT_LIST_HEAD(&work->order_list); | 691 | INIT_LIST_HEAD(&work->order_list); |
| 476 | } | 692 | } |
| @@ -481,7 +697,6 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
| 481 | list_add_tail(&work->list, &worker->prio_pending); | 697 | list_add_tail(&work->list, &worker->prio_pending); |
| 482 | else | 698 | else |
| 483 | list_add_tail(&work->list, &worker->pending); | 699 | list_add_tail(&work->list, &worker->pending); |
| 484 | atomic_inc(&worker->num_pending); | ||
| 485 | check_busy_worker(worker); | 700 | check_busy_worker(worker); |
| 486 | 701 | ||
| 487 | /* | 702 | /* |
| @@ -492,10 +707,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
| 492 | wake = 1; | 707 | wake = 1; |
| 493 | worker->working = 1; | 708 | worker->working = 1; |
| 494 | 709 | ||
| 495 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 496 | |||
| 497 | if (wake) | 710 | if (wake) |
| 498 | wake_up_process(worker->task); | 711 | wake_up_process(worker->task); |
| 712 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 713 | |||
| 499 | out: | 714 | out: |
| 500 | return 0; | 715 | return 0; |
| 501 | } | 716 | } |
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 1b511c109db6..5077746cf85e 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h | |||
| @@ -64,6 +64,8 @@ struct btrfs_workers { | |||
| 64 | /* current number of running workers */ | 64 | /* current number of running workers */ |
| 65 | int num_workers; | 65 | int num_workers; |
| 66 | 66 | ||
| 67 | int num_workers_starting; | ||
| 68 | |||
| 67 | /* max number of workers allowed. changed by btrfs_start_workers */ | 69 | /* max number of workers allowed. changed by btrfs_start_workers */ |
| 68 | int max_workers; | 70 | int max_workers; |
| 69 | 71 | ||
| @@ -73,6 +75,16 @@ struct btrfs_workers { | |||
| 73 | /* force completions in the order they were queued */ | 75 | /* force completions in the order they were queued */ |
| 74 | int ordered; | 76 | int ordered; |
| 75 | 77 | ||
| 78 | /* more workers required, but in an interrupt handler */ | ||
| 79 | int atomic_start_pending; | ||
| 80 | |||
| 81 | /* | ||
| 82 | * are we allowed to sleep while starting workers or are we required | ||
| 83 | * to start them at a later time? If we can't sleep, this indicates | ||
| 84 | * which queue we need to use to schedule thread creation. | ||
| 85 | */ | ||
| 86 | struct btrfs_workers *atomic_worker_start; | ||
| 87 | |||
| 76 | /* list with all the work threads. The workers on the idle thread | 88 | /* list with all the work threads. The workers on the idle thread |
| 77 | * may be actively servicing jobs, but they haven't yet hit the | 89 | * may be actively servicing jobs, but they haven't yet hit the |
| 78 | * idle thresh limit above. | 90 | * idle thresh limit above. |
| @@ -90,6 +102,9 @@ struct btrfs_workers { | |||
| 90 | /* lock for finding the next worker thread to queue on */ | 102 | /* lock for finding the next worker thread to queue on */ |
| 91 | spinlock_t lock; | 103 | spinlock_t lock; |
| 92 | 104 | ||
| 105 | /* lock for the ordered lists */ | ||
| 106 | spinlock_t order_lock; | ||
| 107 | |||
| 93 | /* extra name for this worker, used for current->name */ | 108 | /* extra name for this worker, used for current->name */ |
| 94 | char *name; | 109 | char *name; |
| 95 | }; | 110 | }; |
| @@ -97,7 +112,8 @@ struct btrfs_workers { | |||
| 97 | int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); | 112 | int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); |
| 98 | int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); | 113 | int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); |
| 99 | int btrfs_stop_workers(struct btrfs_workers *workers); | 114 | int btrfs_stop_workers(struct btrfs_workers *workers); |
| 100 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max); | 115 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, |
| 116 | struct btrfs_workers *async_starter); | ||
| 101 | int btrfs_requeue_work(struct btrfs_work *work); | 117 | int btrfs_requeue_work(struct btrfs_work *work); |
| 102 | void btrfs_set_work_high_prio(struct btrfs_work *work); | 118 | void btrfs_set_work_high_prio(struct btrfs_work *work); |
| 103 | #endif | 119 | #endif |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index ea1ea0af8c0e..f6783a42f010 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
| @@ -86,6 +86,12 @@ struct btrfs_inode { | |||
| 86 | * transid of the trans_handle that last modified this inode | 86 | * transid of the trans_handle that last modified this inode |
| 87 | */ | 87 | */ |
| 88 | u64 last_trans; | 88 | u64 last_trans; |
| 89 | |||
| 90 | /* | ||
| 91 | * log transid when this inode was last modified | ||
| 92 | */ | ||
| 93 | u64 last_sub_trans; | ||
| 94 | |||
| 89 | /* | 95 | /* |
| 90 | * transid that last logged this inode | 96 | * transid that last logged this inode |
| 91 | */ | 97 | */ |
| @@ -128,6 +134,16 @@ struct btrfs_inode { | |||
| 128 | u64 last_unlink_trans; | 134 | u64 last_unlink_trans; |
| 129 | 135 | ||
| 130 | /* | 136 | /* |
| 137 | * Counters to keep track of the number of extent item's we may use due | ||
| 138 | * to delalloc and such. outstanding_extents is the number of extent | ||
| 139 | * items we think we'll end up using, and reserved_extents is the number | ||
| 140 | * of extent items we've reserved metadata for. | ||
| 141 | */ | ||
| 142 | spinlock_t accounting_lock; | ||
| 143 | int reserved_extents; | ||
| 144 | int outstanding_extents; | ||
| 145 | |||
| 146 | /* | ||
| 131 | * ordered_data_close is set by truncate when a file that used | 147 | * ordered_data_close is set by truncate when a file that used |
| 132 | * to have good data has been truncated to zero. When it is set | 148 | * to have good data has been truncated to zero. When it is set |
| 133 | * the btrfs file release call will add this inode to the | 149 | * the btrfs file release call will add this inode to the |
| @@ -138,6 +154,7 @@ struct btrfs_inode { | |||
| 138 | * of these. | 154 | * of these. |
| 139 | */ | 155 | */ |
| 140 | unsigned ordered_data_close:1; | 156 | unsigned ordered_data_close:1; |
| 157 | unsigned dummy_inode:1; | ||
| 141 | 158 | ||
| 142 | struct inode vfs_inode; | 159 | struct inode vfs_inode; |
| 143 | }; | 160 | }; |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 9d8ba4d54a37..a11a32058b50 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
| @@ -506,10 +506,10 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
| 506 | */ | 506 | */ |
| 507 | set_page_extent_mapped(page); | 507 | set_page_extent_mapped(page); |
| 508 | lock_extent(tree, last_offset, end, GFP_NOFS); | 508 | lock_extent(tree, last_offset, end, GFP_NOFS); |
| 509 | spin_lock(&em_tree->lock); | 509 | read_lock(&em_tree->lock); |
| 510 | em = lookup_extent_mapping(em_tree, last_offset, | 510 | em = lookup_extent_mapping(em_tree, last_offset, |
| 511 | PAGE_CACHE_SIZE); | 511 | PAGE_CACHE_SIZE); |
| 512 | spin_unlock(&em_tree->lock); | 512 | read_unlock(&em_tree->lock); |
| 513 | 513 | ||
| 514 | if (!em || last_offset < em->start || | 514 | if (!em || last_offset < em->start || |
| 515 | (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || | 515 | (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || |
| @@ -593,11 +593,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
| 593 | em_tree = &BTRFS_I(inode)->extent_tree; | 593 | em_tree = &BTRFS_I(inode)->extent_tree; |
| 594 | 594 | ||
| 595 | /* we need the actual starting offset of this extent in the file */ | 595 | /* we need the actual starting offset of this extent in the file */ |
| 596 | spin_lock(&em_tree->lock); | 596 | read_lock(&em_tree->lock); |
| 597 | em = lookup_extent_mapping(em_tree, | 597 | em = lookup_extent_mapping(em_tree, |
| 598 | page_offset(bio->bi_io_vec->bv_page), | 598 | page_offset(bio->bi_io_vec->bv_page), |
| 599 | PAGE_CACHE_SIZE); | 599 | PAGE_CACHE_SIZE); |
| 600 | spin_unlock(&em_tree->lock); | 600 | read_unlock(&em_tree->lock); |
| 601 | 601 | ||
| 602 | compressed_len = em->block_len; | 602 | compressed_len = em->block_len; |
| 603 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); | 603 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3fdcc0512d3a..ec96f3a6d536 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -2853,6 +2853,12 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
| 2853 | int split; | 2853 | int split; |
| 2854 | int num_doubles = 0; | 2854 | int num_doubles = 0; |
| 2855 | 2855 | ||
| 2856 | l = path->nodes[0]; | ||
| 2857 | slot = path->slots[0]; | ||
| 2858 | if (extend && data_size + btrfs_item_size_nr(l, slot) + | ||
| 2859 | sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root)) | ||
| 2860 | return -EOVERFLOW; | ||
| 2861 | |||
| 2856 | /* first try to make some room by pushing left and right */ | 2862 | /* first try to make some room by pushing left and right */ |
| 2857 | if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { | 2863 | if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { |
| 2858 | wret = push_leaf_right(trans, root, path, data_size, 0); | 2864 | wret = push_leaf_right(trans, root, path, data_size, 0); |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 837435ce84ca..444b3e9b92a4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -114,6 +114,10 @@ struct btrfs_ordered_sum; | |||
| 114 | */ | 114 | */ |
| 115 | #define BTRFS_DEV_ITEMS_OBJECTID 1ULL | 115 | #define BTRFS_DEV_ITEMS_OBJECTID 1ULL |
| 116 | 116 | ||
| 117 | #define BTRFS_BTREE_INODE_OBJECTID 1 | ||
| 118 | |||
| 119 | #define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 | ||
| 120 | |||
| 117 | /* | 121 | /* |
| 118 | * we can actually store much bigger names, but lets not confuse the rest | 122 | * we can actually store much bigger names, but lets not confuse the rest |
| 119 | * of linux | 123 | * of linux |
| @@ -670,21 +674,29 @@ struct btrfs_space_info { | |||
| 670 | u64 bytes_reserved; /* total bytes the allocator has reserved for | 674 | u64 bytes_reserved; /* total bytes the allocator has reserved for |
| 671 | current allocations */ | 675 | current allocations */ |
| 672 | u64 bytes_readonly; /* total bytes that are read only */ | 676 | u64 bytes_readonly; /* total bytes that are read only */ |
| 673 | 677 | u64 bytes_super; /* total bytes reserved for the super blocks */ | |
| 674 | /* delalloc accounting */ | 678 | u64 bytes_root; /* the number of bytes needed to commit a |
| 675 | u64 bytes_delalloc; /* number of bytes reserved for allocation, | 679 | transaction */ |
| 676 | this space is not necessarily reserved yet | ||
| 677 | by the allocator */ | ||
| 678 | u64 bytes_may_use; /* number of bytes that may be used for | 680 | u64 bytes_may_use; /* number of bytes that may be used for |
| 679 | delalloc */ | 681 | delalloc/allocations */ |
| 682 | u64 bytes_delalloc; /* number of bytes currently reserved for | ||
| 683 | delayed allocation */ | ||
| 680 | 684 | ||
| 681 | int full; /* indicates that we cannot allocate any more | 685 | int full; /* indicates that we cannot allocate any more |
| 682 | chunks for this space */ | 686 | chunks for this space */ |
| 683 | int force_alloc; /* set if we need to force a chunk alloc for | 687 | int force_alloc; /* set if we need to force a chunk alloc for |
| 684 | this space */ | 688 | this space */ |
| 689 | int force_delalloc; /* make people start doing filemap_flush until | ||
| 690 | we're under a threshold */ | ||
| 685 | 691 | ||
| 686 | struct list_head list; | 692 | struct list_head list; |
| 687 | 693 | ||
| 694 | /* for controlling how we free up space for allocations */ | ||
| 695 | wait_queue_head_t allocate_wait; | ||
| 696 | wait_queue_head_t flush_wait; | ||
| 697 | int allocating_chunk; | ||
| 698 | int flushing; | ||
| 699 | |||
| 688 | /* for block groups in our same type */ | 700 | /* for block groups in our same type */ |
| 689 | struct list_head block_groups; | 701 | struct list_head block_groups; |
| 690 | spinlock_t lock; | 702 | spinlock_t lock; |
| @@ -726,6 +738,15 @@ enum btrfs_caching_type { | |||
| 726 | BTRFS_CACHE_FINISHED = 2, | 738 | BTRFS_CACHE_FINISHED = 2, |
| 727 | }; | 739 | }; |
| 728 | 740 | ||
| 741 | struct btrfs_caching_control { | ||
| 742 | struct list_head list; | ||
| 743 | struct mutex mutex; | ||
| 744 | wait_queue_head_t wait; | ||
| 745 | struct btrfs_block_group_cache *block_group; | ||
| 746 | u64 progress; | ||
| 747 | atomic_t count; | ||
| 748 | }; | ||
| 749 | |||
| 729 | struct btrfs_block_group_cache { | 750 | struct btrfs_block_group_cache { |
| 730 | struct btrfs_key key; | 751 | struct btrfs_key key; |
| 731 | struct btrfs_block_group_item item; | 752 | struct btrfs_block_group_item item; |
| @@ -733,6 +754,7 @@ struct btrfs_block_group_cache { | |||
| 733 | spinlock_t lock; | 754 | spinlock_t lock; |
| 734 | u64 pinned; | 755 | u64 pinned; |
| 735 | u64 reserved; | 756 | u64 reserved; |
| 757 | u64 bytes_super; | ||
| 736 | u64 flags; | 758 | u64 flags; |
| 737 | u64 sectorsize; | 759 | u64 sectorsize; |
| 738 | int extents_thresh; | 760 | int extents_thresh; |
| @@ -742,8 +764,9 @@ struct btrfs_block_group_cache { | |||
| 742 | int dirty; | 764 | int dirty; |
| 743 | 765 | ||
| 744 | /* cache tracking stuff */ | 766 | /* cache tracking stuff */ |
| 745 | wait_queue_head_t caching_q; | ||
| 746 | int cached; | 767 | int cached; |
| 768 | struct btrfs_caching_control *caching_ctl; | ||
| 769 | u64 last_byte_to_unpin; | ||
| 747 | 770 | ||
| 748 | struct btrfs_space_info *space_info; | 771 | struct btrfs_space_info *space_info; |
| 749 | 772 | ||
| @@ -782,13 +805,16 @@ struct btrfs_fs_info { | |||
| 782 | 805 | ||
| 783 | /* the log root tree is a directory of all the other log roots */ | 806 | /* the log root tree is a directory of all the other log roots */ |
| 784 | struct btrfs_root *log_root_tree; | 807 | struct btrfs_root *log_root_tree; |
| 808 | |||
| 809 | spinlock_t fs_roots_radix_lock; | ||
| 785 | struct radix_tree_root fs_roots_radix; | 810 | struct radix_tree_root fs_roots_radix; |
| 786 | 811 | ||
| 787 | /* block group cache stuff */ | 812 | /* block group cache stuff */ |
| 788 | spinlock_t block_group_cache_lock; | 813 | spinlock_t block_group_cache_lock; |
| 789 | struct rb_root block_group_cache_tree; | 814 | struct rb_root block_group_cache_tree; |
| 790 | 815 | ||
| 791 | struct extent_io_tree pinned_extents; | 816 | struct extent_io_tree freed_extents[2]; |
| 817 | struct extent_io_tree *pinned_extents; | ||
| 792 | 818 | ||
| 793 | /* logical->physical extent mapping */ | 819 | /* logical->physical extent mapping */ |
| 794 | struct btrfs_mapping_tree mapping_tree; | 820 | struct btrfs_mapping_tree mapping_tree; |
| @@ -822,11 +848,7 @@ struct btrfs_fs_info { | |||
| 822 | struct mutex transaction_kthread_mutex; | 848 | struct mutex transaction_kthread_mutex; |
| 823 | struct mutex cleaner_mutex; | 849 | struct mutex cleaner_mutex; |
| 824 | struct mutex chunk_mutex; | 850 | struct mutex chunk_mutex; |
| 825 | struct mutex drop_mutex; | ||
| 826 | struct mutex volume_mutex; | 851 | struct mutex volume_mutex; |
| 827 | struct mutex tree_reloc_mutex; | ||
| 828 | struct rw_semaphore extent_commit_sem; | ||
| 829 | |||
| 830 | /* | 852 | /* |
| 831 | * this protects the ordered operations list only while we are | 853 | * this protects the ordered operations list only while we are |
| 832 | * processing all of the entries on it. This way we make | 854 | * processing all of the entries on it. This way we make |
| @@ -835,10 +857,16 @@ struct btrfs_fs_info { | |||
| 835 | * before jumping into the main commit. | 857 | * before jumping into the main commit. |
| 836 | */ | 858 | */ |
| 837 | struct mutex ordered_operations_mutex; | 859 | struct mutex ordered_operations_mutex; |
| 860 | struct rw_semaphore extent_commit_sem; | ||
| 861 | |||
| 862 | struct rw_semaphore subvol_sem; | ||
| 863 | |||
| 864 | struct srcu_struct subvol_srcu; | ||
| 838 | 865 | ||
| 839 | struct list_head trans_list; | 866 | struct list_head trans_list; |
| 840 | struct list_head hashers; | 867 | struct list_head hashers; |
| 841 | struct list_head dead_roots; | 868 | struct list_head dead_roots; |
| 869 | struct list_head caching_block_groups; | ||
| 842 | 870 | ||
| 843 | atomic_t nr_async_submits; | 871 | atomic_t nr_async_submits; |
| 844 | atomic_t async_submit_draining; | 872 | atomic_t async_submit_draining; |
| @@ -882,6 +910,7 @@ struct btrfs_fs_info { | |||
| 882 | * A third pool does submit_bio to avoid deadlocking with the other | 910 | * A third pool does submit_bio to avoid deadlocking with the other |
| 883 | * two | 911 | * two |
| 884 | */ | 912 | */ |
| 913 | struct btrfs_workers generic_worker; | ||
| 885 | struct btrfs_workers workers; | 914 | struct btrfs_workers workers; |
| 886 | struct btrfs_workers delalloc_workers; | 915 | struct btrfs_workers delalloc_workers; |
| 887 | struct btrfs_workers endio_workers; | 916 | struct btrfs_workers endio_workers; |
| @@ -889,6 +918,7 @@ struct btrfs_fs_info { | |||
| 889 | struct btrfs_workers endio_meta_write_workers; | 918 | struct btrfs_workers endio_meta_write_workers; |
| 890 | struct btrfs_workers endio_write_workers; | 919 | struct btrfs_workers endio_write_workers; |
| 891 | struct btrfs_workers submit_workers; | 920 | struct btrfs_workers submit_workers; |
| 921 | struct btrfs_workers enospc_workers; | ||
| 892 | /* | 922 | /* |
| 893 | * fixup workers take dirty pages that didn't properly go through | 923 | * fixup workers take dirty pages that didn't properly go through |
| 894 | * the cow mechanism and make them safe to write. It happens | 924 | * the cow mechanism and make them safe to write. It happens |
| @@ -979,7 +1009,10 @@ struct btrfs_root { | |||
| 979 | atomic_t log_writers; | 1009 | atomic_t log_writers; |
| 980 | atomic_t log_commit[2]; | 1010 | atomic_t log_commit[2]; |
| 981 | unsigned long log_transid; | 1011 | unsigned long log_transid; |
| 1012 | unsigned long last_log_commit; | ||
| 982 | unsigned long log_batch; | 1013 | unsigned long log_batch; |
| 1014 | pid_t log_start_pid; | ||
| 1015 | bool log_multiple_pids; | ||
| 983 | 1016 | ||
| 984 | u64 objectid; | 1017 | u64 objectid; |
| 985 | u64 last_trans; | 1018 | u64 last_trans; |
| @@ -996,10 +1029,12 @@ struct btrfs_root { | |||
| 996 | u32 stripesize; | 1029 | u32 stripesize; |
| 997 | 1030 | ||
| 998 | u32 type; | 1031 | u32 type; |
| 999 | u64 highest_inode; | 1032 | |
| 1000 | u64 last_inode_alloc; | 1033 | u64 highest_objectid; |
| 1001 | int ref_cows; | 1034 | int ref_cows; |
| 1002 | int track_dirty; | 1035 | int track_dirty; |
| 1036 | int in_radix; | ||
| 1037 | |||
| 1003 | u64 defrag_trans_start; | 1038 | u64 defrag_trans_start; |
| 1004 | struct btrfs_key defrag_progress; | 1039 | struct btrfs_key defrag_progress; |
| 1005 | struct btrfs_key defrag_max; | 1040 | struct btrfs_key defrag_max; |
| @@ -1118,6 +1153,7 @@ struct btrfs_root { | |||
| 1118 | #define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7) | 1153 | #define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7) |
| 1119 | #define BTRFS_MOUNT_SSD_SPREAD (1 << 8) | 1154 | #define BTRFS_MOUNT_SSD_SPREAD (1 << 8) |
| 1120 | #define BTRFS_MOUNT_NOSSD (1 << 9) | 1155 | #define BTRFS_MOUNT_NOSSD (1 << 9) |
| 1156 | #define BTRFS_MOUNT_DISCARD (1 << 10) | ||
| 1121 | 1157 | ||
| 1122 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 1158 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
| 1123 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 1159 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
| @@ -1920,8 +1956,8 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | |||
| 1920 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 1956 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
| 1921 | struct btrfs_root *root, unsigned long count); | 1957 | struct btrfs_root *root, unsigned long count); |
| 1922 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 1958 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
| 1923 | int btrfs_update_pinned_extents(struct btrfs_root *root, | 1959 | int btrfs_pin_extent(struct btrfs_root *root, |
| 1924 | u64 bytenr, u64 num, int pin); | 1960 | u64 bytenr, u64 num, int reserved); |
| 1925 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 1961 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, |
| 1926 | struct btrfs_root *root, struct extent_buffer *leaf); | 1962 | struct btrfs_root *root, struct extent_buffer *leaf); |
| 1927 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | 1963 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, |
| @@ -1971,9 +2007,10 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 1971 | u64 root_objectid, u64 owner, u64 offset); | 2007 | u64 root_objectid, u64 owner, u64 offset); |
| 1972 | 2008 | ||
| 1973 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); | 2009 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); |
| 2010 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | ||
| 2011 | struct btrfs_root *root); | ||
| 1974 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | 2012 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, |
| 1975 | struct btrfs_root *root, | 2013 | struct btrfs_root *root); |
| 1976 | struct extent_io_tree *unpin); | ||
| 1977 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 2014 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
| 1978 | struct btrfs_root *root, | 2015 | struct btrfs_root *root, |
| 1979 | u64 bytenr, u64 num_bytes, u64 parent, | 2016 | u64 bytenr, u64 num_bytes, u64 parent, |
| @@ -1984,6 +2021,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
| 1984 | int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); | 2021 | int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); |
| 1985 | int btrfs_free_block_groups(struct btrfs_fs_info *info); | 2022 | int btrfs_free_block_groups(struct btrfs_fs_info *info); |
| 1986 | int btrfs_read_block_groups(struct btrfs_root *root); | 2023 | int btrfs_read_block_groups(struct btrfs_root *root); |
| 2024 | int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr); | ||
| 1987 | int btrfs_make_block_group(struct btrfs_trans_handle *trans, | 2025 | int btrfs_make_block_group(struct btrfs_trans_handle *trans, |
| 1988 | struct btrfs_root *root, u64 bytes_used, | 2026 | struct btrfs_root *root, u64 bytes_used, |
| 1989 | u64 type, u64 chunk_objectid, u64 chunk_offset, | 2027 | u64 type, u64 chunk_objectid, u64 chunk_offset, |
| @@ -1997,7 +2035,12 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | |||
| 1997 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | 2035 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); |
| 1998 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2036 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
| 1999 | 2037 | ||
| 2000 | int btrfs_check_metadata_free_space(struct btrfs_root *root); | 2038 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items); |
| 2039 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items); | ||
| 2040 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, | ||
| 2041 | struct inode *inode, int num_items); | ||
| 2042 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
| 2043 | struct inode *inode, int num_items); | ||
| 2001 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | 2044 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, |
| 2002 | u64 bytes); | 2045 | u64 bytes); |
| 2003 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | 2046 | void btrfs_free_reserved_data_space(struct btrfs_root *root, |
| @@ -2006,7 +2049,6 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | |||
| 2006 | u64 bytes); | 2049 | u64 bytes); |
| 2007 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | 2050 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, |
| 2008 | u64 bytes); | 2051 | u64 bytes); |
| 2009 | void btrfs_free_pinned_extents(struct btrfs_fs_info *info); | ||
| 2010 | /* ctree.c */ | 2052 | /* ctree.c */ |
| 2011 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2053 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
| 2012 | int level, int *slot); | 2054 | int level, int *slot); |
| @@ -2100,12 +2142,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
| 2100 | struct extent_buffer *parent); | 2142 | struct extent_buffer *parent); |
| 2101 | /* root-item.c */ | 2143 | /* root-item.c */ |
| 2102 | int btrfs_find_root_ref(struct btrfs_root *tree_root, | 2144 | int btrfs_find_root_ref(struct btrfs_root *tree_root, |
| 2103 | struct btrfs_path *path, | 2145 | struct btrfs_path *path, |
| 2104 | u64 root_id, u64 ref_id); | 2146 | u64 root_id, u64 ref_id); |
| 2105 | int btrfs_add_root_ref(struct btrfs_trans_handle *trans, | 2147 | int btrfs_add_root_ref(struct btrfs_trans_handle *trans, |
| 2106 | struct btrfs_root *tree_root, | 2148 | struct btrfs_root *tree_root, |
| 2107 | u64 root_id, u8 type, u64 ref_id, | 2149 | u64 root_id, u64 ref_id, u64 dirid, u64 sequence, |
| 2108 | u64 dirid, u64 sequence, | 2150 | const char *name, int name_len); |
| 2151 | int btrfs_del_root_ref(struct btrfs_trans_handle *trans, | ||
| 2152 | struct btrfs_root *tree_root, | ||
| 2153 | u64 root_id, u64 ref_id, u64 dirid, u64 *sequence, | ||
| 2109 | const char *name, int name_len); | 2154 | const char *name, int name_len); |
| 2110 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 2155 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
| 2111 | struct btrfs_key *key); | 2156 | struct btrfs_key *key); |
| @@ -2120,6 +2165,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct | |||
| 2120 | int btrfs_search_root(struct btrfs_root *root, u64 search_start, | 2165 | int btrfs_search_root(struct btrfs_root *root, u64 search_start, |
| 2121 | u64 *found_objectid); | 2166 | u64 *found_objectid); |
| 2122 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); | 2167 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); |
| 2168 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root); | ||
| 2123 | int btrfs_set_root_node(struct btrfs_root_item *item, | 2169 | int btrfs_set_root_node(struct btrfs_root_item *item, |
| 2124 | struct extent_buffer *node); | 2170 | struct extent_buffer *node); |
| 2125 | /* dir-item.c */ | 2171 | /* dir-item.c */ |
| @@ -2138,6 +2184,10 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, | |||
| 2138 | struct btrfs_path *path, u64 dir, | 2184 | struct btrfs_path *path, u64 dir, |
| 2139 | u64 objectid, const char *name, int name_len, | 2185 | u64 objectid, const char *name, int name_len, |
| 2140 | int mod); | 2186 | int mod); |
| 2187 | struct btrfs_dir_item * | ||
| 2188 | btrfs_search_dir_index_item(struct btrfs_root *root, | ||
| 2189 | struct btrfs_path *path, u64 dirid, | ||
| 2190 | const char *name, int name_len); | ||
| 2141 | struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, | 2191 | struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, |
| 2142 | struct btrfs_path *path, | 2192 | struct btrfs_path *path, |
| 2143 | const char *name, int name_len); | 2193 | const char *name, int name_len); |
| @@ -2160,6 +2210,7 @@ int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, | |||
| 2160 | struct btrfs_root *root, u64 offset); | 2210 | struct btrfs_root *root, u64 offset); |
| 2161 | int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, | 2211 | int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, |
| 2162 | struct btrfs_root *root, u64 offset); | 2212 | struct btrfs_root *root, u64 offset); |
| 2213 | int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset); | ||
| 2163 | 2214 | ||
| 2164 | /* inode-map.c */ | 2215 | /* inode-map.c */ |
| 2165 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, | 2216 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, |
| @@ -2232,6 +2283,10 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | |||
| 2232 | int btrfs_add_link(struct btrfs_trans_handle *trans, | 2283 | int btrfs_add_link(struct btrfs_trans_handle *trans, |
| 2233 | struct inode *parent_inode, struct inode *inode, | 2284 | struct inode *parent_inode, struct inode *inode, |
| 2234 | const char *name, int name_len, int add_backref, u64 index); | 2285 | const char *name, int name_len, int add_backref, u64 index); |
| 2286 | int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | ||
| 2287 | struct btrfs_root *root, | ||
| 2288 | struct inode *dir, u64 objectid, | ||
| 2289 | const char *name, int name_len); | ||
| 2235 | int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | 2290 | int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, |
| 2236 | struct btrfs_root *root, | 2291 | struct btrfs_root *root, |
| 2237 | struct inode *inode, u64 new_size, | 2292 | struct inode *inode, u64 new_size, |
| @@ -2242,7 +2297,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); | |||
| 2242 | int btrfs_writepages(struct address_space *mapping, | 2297 | int btrfs_writepages(struct address_space *mapping, |
| 2243 | struct writeback_control *wbc); | 2298 | struct writeback_control *wbc); |
| 2244 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 2299 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
| 2245 | struct btrfs_root *new_root, struct dentry *dentry, | 2300 | struct btrfs_root *new_root, |
| 2246 | u64 new_dirid, u64 alloc_hint); | 2301 | u64 new_dirid, u64 alloc_hint); |
| 2247 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | 2302 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, |
| 2248 | size_t size, struct bio *bio, unsigned long bio_flags); | 2303 | size_t size, struct bio *bio, unsigned long bio_flags); |
| @@ -2258,6 +2313,7 @@ int btrfs_write_inode(struct inode *inode, int wait); | |||
| 2258 | void btrfs_dirty_inode(struct inode *inode); | 2313 | void btrfs_dirty_inode(struct inode *inode); |
| 2259 | struct inode *btrfs_alloc_inode(struct super_block *sb); | 2314 | struct inode *btrfs_alloc_inode(struct super_block *sb); |
| 2260 | void btrfs_destroy_inode(struct inode *inode); | 2315 | void btrfs_destroy_inode(struct inode *inode); |
| 2316 | void btrfs_drop_inode(struct inode *inode); | ||
| 2261 | int btrfs_init_cachep(void); | 2317 | int btrfs_init_cachep(void); |
| 2262 | void btrfs_destroy_cachep(void); | 2318 | void btrfs_destroy_cachep(void); |
| 2263 | long btrfs_ioctl_trans_end(struct file *file); | 2319 | long btrfs_ioctl_trans_end(struct file *file); |
| @@ -2275,6 +2331,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); | |||
| 2275 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); | 2331 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); |
| 2276 | void btrfs_orphan_cleanup(struct btrfs_root *root); | 2332 | void btrfs_orphan_cleanup(struct btrfs_root *root); |
| 2277 | int btrfs_cont_expand(struct inode *inode, loff_t size); | 2333 | int btrfs_cont_expand(struct inode *inode, loff_t size); |
| 2334 | int btrfs_invalidate_inodes(struct btrfs_root *root); | ||
| 2335 | extern const struct dentry_operations btrfs_dentry_operations; | ||
| 2278 | 2336 | ||
| 2279 | /* ioctl.c */ | 2337 | /* ioctl.c */ |
| 2280 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | 2338 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); |
| @@ -2286,11 +2344,11 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync); | |||
| 2286 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 2344 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
| 2287 | int skip_pinned); | 2345 | int skip_pinned); |
| 2288 | int btrfs_check_file(struct btrfs_root *root, struct inode *inode); | 2346 | int btrfs_check_file(struct btrfs_root *root, struct inode *inode); |
| 2289 | extern struct file_operations btrfs_file_operations; | 2347 | extern const struct file_operations btrfs_file_operations; |
| 2290 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 2348 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
| 2291 | struct btrfs_root *root, struct inode *inode, | 2349 | struct btrfs_root *root, struct inode *inode, |
| 2292 | u64 start, u64 end, u64 locked_end, | 2350 | u64 start, u64 end, u64 locked_end, |
| 2293 | u64 inline_limit, u64 *hint_block); | 2351 | u64 inline_limit, u64 *hint_block, int drop_cache); |
| 2294 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | 2352 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, |
| 2295 | struct btrfs_root *root, | 2353 | struct btrfs_root *root, |
| 2296 | struct inode *inode, u64 start, u64 end); | 2354 | struct inode *inode, u64 start, u64 end); |
| @@ -2317,7 +2375,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options); | |||
| 2317 | int btrfs_sync_fs(struct super_block *sb, int wait); | 2375 | int btrfs_sync_fs(struct super_block *sb, int wait); |
| 2318 | 2376 | ||
| 2319 | /* acl.c */ | 2377 | /* acl.c */ |
| 2320 | #ifdef CONFIG_FS_POSIX_ACL | 2378 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
| 2321 | int btrfs_check_acl(struct inode *inode, int mask); | 2379 | int btrfs_check_acl(struct inode *inode, int mask); |
| 2322 | #else | 2380 | #else |
| 2323 | #define btrfs_check_acl NULL | 2381 | #define btrfs_check_acl NULL |
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 1d70236ba00c..f3a6075519cc 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
| @@ -281,6 +281,53 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, | |||
| 281 | return btrfs_match_dir_item_name(root, path, name, name_len); | 281 | return btrfs_match_dir_item_name(root, path, name, name_len); |
| 282 | } | 282 | } |
| 283 | 283 | ||
| 284 | struct btrfs_dir_item * | ||
| 285 | btrfs_search_dir_index_item(struct btrfs_root *root, | ||
| 286 | struct btrfs_path *path, u64 dirid, | ||
| 287 | const char *name, int name_len) | ||
| 288 | { | ||
| 289 | struct extent_buffer *leaf; | ||
| 290 | struct btrfs_dir_item *di; | ||
| 291 | struct btrfs_key key; | ||
| 292 | u32 nritems; | ||
| 293 | int ret; | ||
| 294 | |||
| 295 | key.objectid = dirid; | ||
| 296 | key.type = BTRFS_DIR_INDEX_KEY; | ||
| 297 | key.offset = 0; | ||
| 298 | |||
| 299 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 300 | if (ret < 0) | ||
| 301 | return ERR_PTR(ret); | ||
| 302 | |||
| 303 | leaf = path->nodes[0]; | ||
| 304 | nritems = btrfs_header_nritems(leaf); | ||
| 305 | |||
| 306 | while (1) { | ||
| 307 | if (path->slots[0] >= nritems) { | ||
| 308 | ret = btrfs_next_leaf(root, path); | ||
| 309 | if (ret < 0) | ||
| 310 | return ERR_PTR(ret); | ||
| 311 | if (ret > 0) | ||
| 312 | break; | ||
| 313 | leaf = path->nodes[0]; | ||
| 314 | nritems = btrfs_header_nritems(leaf); | ||
| 315 | continue; | ||
| 316 | } | ||
| 317 | |||
| 318 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
| 319 | if (key.objectid != dirid || key.type != BTRFS_DIR_INDEX_KEY) | ||
| 320 | break; | ||
| 321 | |||
| 322 | di = btrfs_match_dir_item_name(root, path, name, name_len); | ||
| 323 | if (di) | ||
| 324 | return di; | ||
| 325 | |||
| 326 | path->slots[0]++; | ||
| 327 | } | ||
| 328 | return NULL; | ||
| 329 | } | ||
| 330 | |||
| 284 | struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | 331 | struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, |
| 285 | struct btrfs_root *root, | 332 | struct btrfs_root *root, |
| 286 | struct btrfs_path *path, u64 dir, | 333 | struct btrfs_path *path, u64 dir, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8b8192790011..02b6afbd7450 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -41,6 +41,7 @@ | |||
| 41 | 41 | ||
| 42 | static struct extent_io_ops btree_extent_io_ops; | 42 | static struct extent_io_ops btree_extent_io_ops; |
| 43 | static void end_workqueue_fn(struct btrfs_work *work); | 43 | static void end_workqueue_fn(struct btrfs_work *work); |
| 44 | static void free_fs_root(struct btrfs_root *root); | ||
| 44 | 45 | ||
| 45 | static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); | 46 | static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); |
| 46 | 47 | ||
| @@ -123,15 +124,15 @@ static struct extent_map *btree_get_extent(struct inode *inode, | |||
| 123 | struct extent_map *em; | 124 | struct extent_map *em; |
| 124 | int ret; | 125 | int ret; |
| 125 | 126 | ||
| 126 | spin_lock(&em_tree->lock); | 127 | read_lock(&em_tree->lock); |
| 127 | em = lookup_extent_mapping(em_tree, start, len); | 128 | em = lookup_extent_mapping(em_tree, start, len); |
| 128 | if (em) { | 129 | if (em) { |
| 129 | em->bdev = | 130 | em->bdev = |
| 130 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 131 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
| 131 | spin_unlock(&em_tree->lock); | 132 | read_unlock(&em_tree->lock); |
| 132 | goto out; | 133 | goto out; |
| 133 | } | 134 | } |
| 134 | spin_unlock(&em_tree->lock); | 135 | read_unlock(&em_tree->lock); |
| 135 | 136 | ||
| 136 | em = alloc_extent_map(GFP_NOFS); | 137 | em = alloc_extent_map(GFP_NOFS); |
| 137 | if (!em) { | 138 | if (!em) { |
| @@ -144,7 +145,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, | |||
| 144 | em->block_start = 0; | 145 | em->block_start = 0; |
| 145 | em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 146 | em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
| 146 | 147 | ||
| 147 | spin_lock(&em_tree->lock); | 148 | write_lock(&em_tree->lock); |
| 148 | ret = add_extent_mapping(em_tree, em); | 149 | ret = add_extent_mapping(em_tree, em); |
| 149 | if (ret == -EEXIST) { | 150 | if (ret == -EEXIST) { |
| 150 | u64 failed_start = em->start; | 151 | u64 failed_start = em->start; |
| @@ -163,7 +164,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, | |||
| 163 | free_extent_map(em); | 164 | free_extent_map(em); |
| 164 | em = NULL; | 165 | em = NULL; |
| 165 | } | 166 | } |
| 166 | spin_unlock(&em_tree->lock); | 167 | write_unlock(&em_tree->lock); |
| 167 | 168 | ||
| 168 | if (ret) | 169 | if (ret) |
| 169 | em = ERR_PTR(ret); | 170 | em = ERR_PTR(ret); |
| @@ -772,7 +773,7 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) | |||
| 772 | } | 773 | } |
| 773 | } | 774 | } |
| 774 | 775 | ||
| 775 | static struct address_space_operations btree_aops = { | 776 | static const struct address_space_operations btree_aops = { |
| 776 | .readpage = btree_readpage, | 777 | .readpage = btree_readpage, |
| 777 | .writepage = btree_writepage, | 778 | .writepage = btree_writepage, |
| 778 | .writepages = btree_writepages, | 779 | .writepages = btree_writepages, |
| @@ -821,14 +822,14 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | |||
| 821 | 822 | ||
| 822 | int btrfs_write_tree_block(struct extent_buffer *buf) | 823 | int btrfs_write_tree_block(struct extent_buffer *buf) |
| 823 | { | 824 | { |
| 824 | return btrfs_fdatawrite_range(buf->first_page->mapping, buf->start, | 825 | return filemap_fdatawrite_range(buf->first_page->mapping, buf->start, |
| 825 | buf->start + buf->len - 1, WB_SYNC_ALL); | 826 | buf->start + buf->len - 1); |
| 826 | } | 827 | } |
| 827 | 828 | ||
| 828 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) | 829 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) |
| 829 | { | 830 | { |
| 830 | return btrfs_wait_on_page_writeback_range(buf->first_page->mapping, | 831 | return filemap_fdatawait_range(buf->first_page->mapping, |
| 831 | buf->start, buf->start + buf->len - 1); | 832 | buf->start, buf->start + buf->len - 1); |
| 832 | } | 833 | } |
| 833 | 834 | ||
| 834 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | 835 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, |
| @@ -895,8 +896,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 895 | root->fs_info = fs_info; | 896 | root->fs_info = fs_info; |
| 896 | root->objectid = objectid; | 897 | root->objectid = objectid; |
| 897 | root->last_trans = 0; | 898 | root->last_trans = 0; |
| 898 | root->highest_inode = 0; | 899 | root->highest_objectid = 0; |
| 899 | root->last_inode_alloc = 0; | ||
| 900 | root->name = NULL; | 900 | root->name = NULL; |
| 901 | root->in_sysfs = 0; | 901 | root->in_sysfs = 0; |
| 902 | root->inode_tree.rb_node = NULL; | 902 | root->inode_tree.rb_node = NULL; |
| @@ -917,6 +917,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 917 | atomic_set(&root->log_writers, 0); | 917 | atomic_set(&root->log_writers, 0); |
| 918 | root->log_batch = 0; | 918 | root->log_batch = 0; |
| 919 | root->log_transid = 0; | 919 | root->log_transid = 0; |
| 920 | root->last_log_commit = 0; | ||
| 920 | extent_io_tree_init(&root->dirty_log_pages, | 921 | extent_io_tree_init(&root->dirty_log_pages, |
| 921 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 922 | fs_info->btree_inode->i_mapping, GFP_NOFS); |
| 922 | 923 | ||
| @@ -952,14 +953,16 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
| 952 | root, fs_info, objectid); | 953 | root, fs_info, objectid); |
| 953 | ret = btrfs_find_last_root(tree_root, objectid, | 954 | ret = btrfs_find_last_root(tree_root, objectid, |
| 954 | &root->root_item, &root->root_key); | 955 | &root->root_item, &root->root_key); |
| 956 | if (ret > 0) | ||
| 957 | return -ENOENT; | ||
| 955 | BUG_ON(ret); | 958 | BUG_ON(ret); |
| 956 | 959 | ||
| 957 | generation = btrfs_root_generation(&root->root_item); | 960 | generation = btrfs_root_generation(&root->root_item); |
| 958 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 961 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
| 959 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 962 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
| 960 | blocksize, generation); | 963 | blocksize, generation); |
| 961 | root->commit_root = btrfs_root_node(root); | ||
| 962 | BUG_ON(!root->node); | 964 | BUG_ON(!root->node); |
| 965 | root->commit_root = btrfs_root_node(root); | ||
| 963 | return 0; | 966 | return 0; |
| 964 | } | 967 | } |
| 965 | 968 | ||
| @@ -1085,6 +1088,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | |||
| 1085 | WARN_ON(root->log_root); | 1088 | WARN_ON(root->log_root); |
| 1086 | root->log_root = log_root; | 1089 | root->log_root = log_root; |
| 1087 | root->log_transid = 0; | 1090 | root->log_transid = 0; |
| 1091 | root->last_log_commit = 0; | ||
| 1088 | return 0; | 1092 | return 0; |
| 1089 | } | 1093 | } |
| 1090 | 1094 | ||
| @@ -1095,7 +1099,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
| 1095 | struct btrfs_fs_info *fs_info = tree_root->fs_info; | 1099 | struct btrfs_fs_info *fs_info = tree_root->fs_info; |
| 1096 | struct btrfs_path *path; | 1100 | struct btrfs_path *path; |
| 1097 | struct extent_buffer *l; | 1101 | struct extent_buffer *l; |
| 1098 | u64 highest_inode; | ||
| 1099 | u64 generation; | 1102 | u64 generation; |
| 1100 | u32 blocksize; | 1103 | u32 blocksize; |
| 1101 | int ret = 0; | 1104 | int ret = 0; |
| @@ -1110,7 +1113,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
| 1110 | kfree(root); | 1113 | kfree(root); |
| 1111 | return ERR_PTR(ret); | 1114 | return ERR_PTR(ret); |
| 1112 | } | 1115 | } |
| 1113 | goto insert; | 1116 | goto out; |
| 1114 | } | 1117 | } |
| 1115 | 1118 | ||
| 1116 | __setup_root(tree_root->nodesize, tree_root->leafsize, | 1119 | __setup_root(tree_root->nodesize, tree_root->leafsize, |
| @@ -1120,39 +1123,30 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
| 1120 | path = btrfs_alloc_path(); | 1123 | path = btrfs_alloc_path(); |
| 1121 | BUG_ON(!path); | 1124 | BUG_ON(!path); |
| 1122 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); | 1125 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); |
| 1123 | if (ret != 0) { | 1126 | if (ret == 0) { |
| 1124 | if (ret > 0) | 1127 | l = path->nodes[0]; |
| 1125 | ret = -ENOENT; | 1128 | read_extent_buffer(l, &root->root_item, |
| 1126 | goto out; | 1129 | btrfs_item_ptr_offset(l, path->slots[0]), |
| 1130 | sizeof(root->root_item)); | ||
| 1131 | memcpy(&root->root_key, location, sizeof(*location)); | ||
| 1127 | } | 1132 | } |
| 1128 | l = path->nodes[0]; | ||
| 1129 | read_extent_buffer(l, &root->root_item, | ||
| 1130 | btrfs_item_ptr_offset(l, path->slots[0]), | ||
| 1131 | sizeof(root->root_item)); | ||
| 1132 | memcpy(&root->root_key, location, sizeof(*location)); | ||
| 1133 | ret = 0; | ||
| 1134 | out: | ||
| 1135 | btrfs_release_path(root, path); | ||
| 1136 | btrfs_free_path(path); | 1133 | btrfs_free_path(path); |
| 1137 | if (ret) { | 1134 | if (ret) { |
| 1138 | kfree(root); | 1135 | if (ret > 0) |
| 1136 | ret = -ENOENT; | ||
| 1139 | return ERR_PTR(ret); | 1137 | return ERR_PTR(ret); |
| 1140 | } | 1138 | } |
| 1139 | |||
| 1141 | generation = btrfs_root_generation(&root->root_item); | 1140 | generation = btrfs_root_generation(&root->root_item); |
| 1142 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 1141 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
| 1143 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 1142 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
| 1144 | blocksize, generation); | 1143 | blocksize, generation); |
| 1145 | root->commit_root = btrfs_root_node(root); | 1144 | root->commit_root = btrfs_root_node(root); |
| 1146 | BUG_ON(!root->node); | 1145 | BUG_ON(!root->node); |
| 1147 | insert: | 1146 | out: |
| 1148 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { | 1147 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) |
| 1149 | root->ref_cows = 1; | 1148 | root->ref_cows = 1; |
| 1150 | ret = btrfs_find_highest_inode(root, &highest_inode); | 1149 | |
| 1151 | if (ret == 0) { | ||
| 1152 | root->highest_inode = highest_inode; | ||
| 1153 | root->last_inode_alloc = highest_inode; | ||
| 1154 | } | ||
| 1155 | } | ||
| 1156 | return root; | 1150 | return root; |
| 1157 | } | 1151 | } |
| 1158 | 1152 | ||
| @@ -1187,39 +1181,66 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | |||
| 1187 | return fs_info->dev_root; | 1181 | return fs_info->dev_root; |
| 1188 | if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) | 1182 | if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) |
| 1189 | return fs_info->csum_root; | 1183 | return fs_info->csum_root; |
| 1190 | 1184 | again: | |
| 1185 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
| 1191 | root = radix_tree_lookup(&fs_info->fs_roots_radix, | 1186 | root = radix_tree_lookup(&fs_info->fs_roots_radix, |
| 1192 | (unsigned long)location->objectid); | 1187 | (unsigned long)location->objectid); |
| 1188 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
| 1193 | if (root) | 1189 | if (root) |
| 1194 | return root; | 1190 | return root; |
| 1195 | 1191 | ||
| 1192 | ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); | ||
| 1193 | if (ret == 0) | ||
| 1194 | ret = -ENOENT; | ||
| 1195 | if (ret < 0) | ||
| 1196 | return ERR_PTR(ret); | ||
| 1197 | |||
| 1196 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); | 1198 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); |
| 1197 | if (IS_ERR(root)) | 1199 | if (IS_ERR(root)) |
| 1198 | return root; | 1200 | return root; |
| 1199 | 1201 | ||
| 1202 | WARN_ON(btrfs_root_refs(&root->root_item) == 0); | ||
| 1200 | set_anon_super(&root->anon_super, NULL); | 1203 | set_anon_super(&root->anon_super, NULL); |
| 1201 | 1204 | ||
| 1205 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | ||
| 1206 | if (ret) | ||
| 1207 | goto fail; | ||
| 1208 | |||
| 1209 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
| 1202 | ret = radix_tree_insert(&fs_info->fs_roots_radix, | 1210 | ret = radix_tree_insert(&fs_info->fs_roots_radix, |
| 1203 | (unsigned long)root->root_key.objectid, | 1211 | (unsigned long)root->root_key.objectid, |
| 1204 | root); | 1212 | root); |
| 1213 | if (ret == 0) | ||
| 1214 | root->in_radix = 1; | ||
| 1215 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
| 1216 | radix_tree_preload_end(); | ||
| 1205 | if (ret) { | 1217 | if (ret) { |
| 1206 | free_extent_buffer(root->node); | 1218 | if (ret == -EEXIST) { |
| 1207 | kfree(root); | 1219 | free_fs_root(root); |
| 1208 | return ERR_PTR(ret); | 1220 | goto again; |
| 1221 | } | ||
| 1222 | goto fail; | ||
| 1209 | } | 1223 | } |
| 1210 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 1224 | |
| 1211 | ret = btrfs_find_dead_roots(fs_info->tree_root, | 1225 | ret = btrfs_find_dead_roots(fs_info->tree_root, |
| 1212 | root->root_key.objectid); | 1226 | root->root_key.objectid); |
| 1213 | BUG_ON(ret); | 1227 | WARN_ON(ret); |
| 1228 | |||
| 1229 | if (!(fs_info->sb->s_flags & MS_RDONLY)) | ||
| 1214 | btrfs_orphan_cleanup(root); | 1230 | btrfs_orphan_cleanup(root); |
| 1215 | } | 1231 | |
| 1216 | return root; | 1232 | return root; |
| 1233 | fail: | ||
| 1234 | free_fs_root(root); | ||
| 1235 | return ERR_PTR(ret); | ||
| 1217 | } | 1236 | } |
| 1218 | 1237 | ||
| 1219 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, | 1238 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, |
| 1220 | struct btrfs_key *location, | 1239 | struct btrfs_key *location, |
| 1221 | const char *name, int namelen) | 1240 | const char *name, int namelen) |
| 1222 | { | 1241 | { |
| 1242 | return btrfs_read_fs_root_no_name(fs_info, location); | ||
| 1243 | #if 0 | ||
| 1223 | struct btrfs_root *root; | 1244 | struct btrfs_root *root; |
| 1224 | int ret; | 1245 | int ret; |
| 1225 | 1246 | ||
| @@ -1236,7 +1257,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, | |||
| 1236 | kfree(root); | 1257 | kfree(root); |
| 1237 | return ERR_PTR(ret); | 1258 | return ERR_PTR(ret); |
| 1238 | } | 1259 | } |
| 1239 | #if 0 | 1260 | |
| 1240 | ret = btrfs_sysfs_add_root(root); | 1261 | ret = btrfs_sysfs_add_root(root); |
| 1241 | if (ret) { | 1262 | if (ret) { |
| 1242 | free_extent_buffer(root->node); | 1263 | free_extent_buffer(root->node); |
| @@ -1244,9 +1265,9 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, | |||
| 1244 | kfree(root); | 1265 | kfree(root); |
| 1245 | return ERR_PTR(ret); | 1266 | return ERR_PTR(ret); |
| 1246 | } | 1267 | } |
| 1247 | #endif | ||
| 1248 | root->in_sysfs = 1; | 1268 | root->in_sysfs = 1; |
| 1249 | return root; | 1269 | return root; |
| 1270 | #endif | ||
| 1250 | } | 1271 | } |
| 1251 | 1272 | ||
| 1252 | static int btrfs_congested_fn(void *congested_data, int bdi_bits) | 1273 | static int btrfs_congested_fn(void *congested_data, int bdi_bits) |
| @@ -1325,9 +1346,9 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | |||
| 1325 | offset = page_offset(page); | 1346 | offset = page_offset(page); |
| 1326 | 1347 | ||
| 1327 | em_tree = &BTRFS_I(inode)->extent_tree; | 1348 | em_tree = &BTRFS_I(inode)->extent_tree; |
| 1328 | spin_lock(&em_tree->lock); | 1349 | read_lock(&em_tree->lock); |
| 1329 | em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); | 1350 | em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); |
| 1330 | spin_unlock(&em_tree->lock); | 1351 | read_unlock(&em_tree->lock); |
| 1331 | if (!em) { | 1352 | if (!em) { |
| 1332 | __unplug_io_fn(bdi, page); | 1353 | __unplug_io_fn(bdi, page); |
| 1333 | return; | 1354 | return; |
| @@ -1360,8 +1381,10 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | |||
| 1360 | 1381 | ||
| 1361 | err = bdi_register(bdi, NULL, "btrfs-%d", | 1382 | err = bdi_register(bdi, NULL, "btrfs-%d", |
| 1362 | atomic_inc_return(&btrfs_bdi_num)); | 1383 | atomic_inc_return(&btrfs_bdi_num)); |
| 1363 | if (err) | 1384 | if (err) { |
| 1385 | bdi_destroy(bdi); | ||
| 1364 | return err; | 1386 | return err; |
| 1387 | } | ||
| 1365 | 1388 | ||
| 1366 | bdi->ra_pages = default_backing_dev_info.ra_pages; | 1389 | bdi->ra_pages = default_backing_dev_info.ra_pages; |
| 1367 | bdi->unplug_io_fn = btrfs_unplug_io_fn; | 1390 | bdi->unplug_io_fn = btrfs_unplug_io_fn; |
| @@ -1451,9 +1474,12 @@ static int cleaner_kthread(void *arg) | |||
| 1451 | break; | 1474 | break; |
| 1452 | 1475 | ||
| 1453 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1476 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
| 1454 | mutex_lock(&root->fs_info->cleaner_mutex); | 1477 | |
| 1455 | btrfs_clean_old_snapshots(root); | 1478 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && |
| 1456 | mutex_unlock(&root->fs_info->cleaner_mutex); | 1479 | mutex_trylock(&root->fs_info->cleaner_mutex)) { |
| 1480 | btrfs_clean_old_snapshots(root); | ||
| 1481 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
| 1482 | } | ||
| 1457 | 1483 | ||
| 1458 | if (freezing(current)) { | 1484 | if (freezing(current)) { |
| 1459 | refrigerator(); | 1485 | refrigerator(); |
| @@ -1558,15 +1584,36 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1558 | err = -ENOMEM; | 1584 | err = -ENOMEM; |
| 1559 | goto fail; | 1585 | goto fail; |
| 1560 | } | 1586 | } |
| 1561 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); | 1587 | |
| 1588 | ret = init_srcu_struct(&fs_info->subvol_srcu); | ||
| 1589 | if (ret) { | ||
| 1590 | err = ret; | ||
| 1591 | goto fail; | ||
| 1592 | } | ||
| 1593 | |||
| 1594 | ret = setup_bdi(fs_info, &fs_info->bdi); | ||
| 1595 | if (ret) { | ||
| 1596 | err = ret; | ||
| 1597 | goto fail_srcu; | ||
| 1598 | } | ||
| 1599 | |||
| 1600 | fs_info->btree_inode = new_inode(sb); | ||
| 1601 | if (!fs_info->btree_inode) { | ||
| 1602 | err = -ENOMEM; | ||
| 1603 | goto fail_bdi; | ||
| 1604 | } | ||
| 1605 | |||
| 1606 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); | ||
| 1562 | INIT_LIST_HEAD(&fs_info->trans_list); | 1607 | INIT_LIST_HEAD(&fs_info->trans_list); |
| 1563 | INIT_LIST_HEAD(&fs_info->dead_roots); | 1608 | INIT_LIST_HEAD(&fs_info->dead_roots); |
| 1564 | INIT_LIST_HEAD(&fs_info->hashers); | 1609 | INIT_LIST_HEAD(&fs_info->hashers); |
| 1565 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | 1610 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); |
| 1566 | INIT_LIST_HEAD(&fs_info->ordered_operations); | 1611 | INIT_LIST_HEAD(&fs_info->ordered_operations); |
| 1612 | INIT_LIST_HEAD(&fs_info->caching_block_groups); | ||
| 1567 | spin_lock_init(&fs_info->delalloc_lock); | 1613 | spin_lock_init(&fs_info->delalloc_lock); |
| 1568 | spin_lock_init(&fs_info->new_trans_lock); | 1614 | spin_lock_init(&fs_info->new_trans_lock); |
| 1569 | spin_lock_init(&fs_info->ref_cache_lock); | 1615 | spin_lock_init(&fs_info->ref_cache_lock); |
| 1616 | spin_lock_init(&fs_info->fs_roots_radix_lock); | ||
| 1570 | 1617 | ||
| 1571 | init_completion(&fs_info->kobj_unregister); | 1618 | init_completion(&fs_info->kobj_unregister); |
| 1572 | fs_info->tree_root = tree_root; | 1619 | fs_info->tree_root = tree_root; |
| @@ -1585,12 +1632,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1585 | fs_info->sb = sb; | 1632 | fs_info->sb = sb; |
| 1586 | fs_info->max_extent = (u64)-1; | 1633 | fs_info->max_extent = (u64)-1; |
| 1587 | fs_info->max_inline = 8192 * 1024; | 1634 | fs_info->max_inline = 8192 * 1024; |
| 1588 | if (setup_bdi(fs_info, &fs_info->bdi)) | 1635 | fs_info->metadata_ratio = 0; |
| 1589 | goto fail_bdi; | ||
| 1590 | fs_info->btree_inode = new_inode(sb); | ||
| 1591 | fs_info->btree_inode->i_ino = 1; | ||
| 1592 | fs_info->btree_inode->i_nlink = 1; | ||
| 1593 | fs_info->metadata_ratio = 8; | ||
| 1594 | 1636 | ||
| 1595 | fs_info->thread_pool_size = min_t(unsigned long, | 1637 | fs_info->thread_pool_size = min_t(unsigned long, |
| 1596 | num_online_cpus() + 2, 8); | 1638 | num_online_cpus() + 2, 8); |
| @@ -1602,6 +1644,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1602 | sb->s_blocksize_bits = blksize_bits(4096); | 1644 | sb->s_blocksize_bits = blksize_bits(4096); |
| 1603 | sb->s_bdi = &fs_info->bdi; | 1645 | sb->s_bdi = &fs_info->bdi; |
| 1604 | 1646 | ||
| 1647 | fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; | ||
| 1648 | fs_info->btree_inode->i_nlink = 1; | ||
| 1605 | /* | 1649 | /* |
| 1606 | * we set the i_size on the btree inode to the max possible int. | 1650 | * we set the i_size on the btree inode to the max possible int. |
| 1607 | * the real end of the address space is determined by all of | 1651 | * the real end of the address space is determined by all of |
| @@ -1620,28 +1664,32 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1620 | 1664 | ||
| 1621 | BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; | 1665 | BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; |
| 1622 | 1666 | ||
| 1667 | BTRFS_I(fs_info->btree_inode)->root = tree_root; | ||
| 1668 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, | ||
| 1669 | sizeof(struct btrfs_key)); | ||
| 1670 | BTRFS_I(fs_info->btree_inode)->dummy_inode = 1; | ||
| 1671 | insert_inode_hash(fs_info->btree_inode); | ||
| 1672 | |||
| 1623 | spin_lock_init(&fs_info->block_group_cache_lock); | 1673 | spin_lock_init(&fs_info->block_group_cache_lock); |
| 1624 | fs_info->block_group_cache_tree.rb_node = NULL; | 1674 | fs_info->block_group_cache_tree.rb_node = NULL; |
| 1625 | 1675 | ||
| 1626 | extent_io_tree_init(&fs_info->pinned_extents, | 1676 | extent_io_tree_init(&fs_info->freed_extents[0], |
| 1627 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 1677 | fs_info->btree_inode->i_mapping, GFP_NOFS); |
| 1678 | extent_io_tree_init(&fs_info->freed_extents[1], | ||
| 1679 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
| 1680 | fs_info->pinned_extents = &fs_info->freed_extents[0]; | ||
| 1628 | fs_info->do_barriers = 1; | 1681 | fs_info->do_barriers = 1; |
| 1629 | 1682 | ||
| 1630 | BTRFS_I(fs_info->btree_inode)->root = tree_root; | ||
| 1631 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, | ||
| 1632 | sizeof(struct btrfs_key)); | ||
| 1633 | insert_inode_hash(fs_info->btree_inode); | ||
| 1634 | 1683 | ||
| 1635 | mutex_init(&fs_info->trans_mutex); | 1684 | mutex_init(&fs_info->trans_mutex); |
| 1636 | mutex_init(&fs_info->ordered_operations_mutex); | 1685 | mutex_init(&fs_info->ordered_operations_mutex); |
| 1637 | mutex_init(&fs_info->tree_log_mutex); | 1686 | mutex_init(&fs_info->tree_log_mutex); |
| 1638 | mutex_init(&fs_info->drop_mutex); | ||
| 1639 | mutex_init(&fs_info->chunk_mutex); | 1687 | mutex_init(&fs_info->chunk_mutex); |
| 1640 | mutex_init(&fs_info->transaction_kthread_mutex); | 1688 | mutex_init(&fs_info->transaction_kthread_mutex); |
| 1641 | mutex_init(&fs_info->cleaner_mutex); | 1689 | mutex_init(&fs_info->cleaner_mutex); |
| 1642 | mutex_init(&fs_info->volume_mutex); | 1690 | mutex_init(&fs_info->volume_mutex); |
| 1643 | mutex_init(&fs_info->tree_reloc_mutex); | ||
| 1644 | init_rwsem(&fs_info->extent_commit_sem); | 1691 | init_rwsem(&fs_info->extent_commit_sem); |
| 1692 | init_rwsem(&fs_info->subvol_sem); | ||
| 1645 | 1693 | ||
| 1646 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); | 1694 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); |
| 1647 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); | 1695 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); |
| @@ -1701,20 +1749,24 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1701 | goto fail_iput; | 1749 | goto fail_iput; |
| 1702 | } | 1750 | } |
| 1703 | 1751 | ||
| 1704 | /* | 1752 | btrfs_init_workers(&fs_info->generic_worker, |
| 1705 | * we need to start all the end_io workers up front because the | 1753 | "genwork", 1, NULL); |
| 1706 | * queue work function gets called at interrupt time, and so it | 1754 | |
| 1707 | * cannot dynamically grow. | ||
| 1708 | */ | ||
| 1709 | btrfs_init_workers(&fs_info->workers, "worker", | 1755 | btrfs_init_workers(&fs_info->workers, "worker", |
| 1710 | fs_info->thread_pool_size); | 1756 | fs_info->thread_pool_size, |
| 1757 | &fs_info->generic_worker); | ||
| 1711 | 1758 | ||
| 1712 | btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", | 1759 | btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", |
| 1713 | fs_info->thread_pool_size); | 1760 | fs_info->thread_pool_size, |
| 1761 | &fs_info->generic_worker); | ||
| 1714 | 1762 | ||
| 1715 | btrfs_init_workers(&fs_info->submit_workers, "submit", | 1763 | btrfs_init_workers(&fs_info->submit_workers, "submit", |
| 1716 | min_t(u64, fs_devices->num_devices, | 1764 | min_t(u64, fs_devices->num_devices, |
| 1717 | fs_info->thread_pool_size)); | 1765 | fs_info->thread_pool_size), |
| 1766 | &fs_info->generic_worker); | ||
| 1767 | btrfs_init_workers(&fs_info->enospc_workers, "enospc", | ||
| 1768 | fs_info->thread_pool_size, | ||
| 1769 | &fs_info->generic_worker); | ||
| 1718 | 1770 | ||
| 1719 | /* a higher idle thresh on the submit workers makes it much more | 1771 | /* a higher idle thresh on the submit workers makes it much more |
| 1720 | * likely that bios will be send down in a sane order to the | 1772 | * likely that bios will be send down in a sane order to the |
| @@ -1728,15 +1780,20 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1728 | fs_info->delalloc_workers.idle_thresh = 2; | 1780 | fs_info->delalloc_workers.idle_thresh = 2; |
| 1729 | fs_info->delalloc_workers.ordered = 1; | 1781 | fs_info->delalloc_workers.ordered = 1; |
| 1730 | 1782 | ||
| 1731 | btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); | 1783 | btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1, |
| 1784 | &fs_info->generic_worker); | ||
| 1732 | btrfs_init_workers(&fs_info->endio_workers, "endio", | 1785 | btrfs_init_workers(&fs_info->endio_workers, "endio", |
| 1733 | fs_info->thread_pool_size); | 1786 | fs_info->thread_pool_size, |
| 1787 | &fs_info->generic_worker); | ||
| 1734 | btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta", | 1788 | btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta", |
| 1735 | fs_info->thread_pool_size); | 1789 | fs_info->thread_pool_size, |
| 1790 | &fs_info->generic_worker); | ||
| 1736 | btrfs_init_workers(&fs_info->endio_meta_write_workers, | 1791 | btrfs_init_workers(&fs_info->endio_meta_write_workers, |
| 1737 | "endio-meta-write", fs_info->thread_pool_size); | 1792 | "endio-meta-write", fs_info->thread_pool_size, |
| 1793 | &fs_info->generic_worker); | ||
| 1738 | btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", | 1794 | btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", |
| 1739 | fs_info->thread_pool_size); | 1795 | fs_info->thread_pool_size, |
| 1796 | &fs_info->generic_worker); | ||
| 1740 | 1797 | ||
| 1741 | /* | 1798 | /* |
| 1742 | * endios are largely parallel and should have a very | 1799 | * endios are largely parallel and should have a very |
| @@ -1745,20 +1802,19 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1745 | fs_info->endio_workers.idle_thresh = 4; | 1802 | fs_info->endio_workers.idle_thresh = 4; |
| 1746 | fs_info->endio_meta_workers.idle_thresh = 4; | 1803 | fs_info->endio_meta_workers.idle_thresh = 4; |
| 1747 | 1804 | ||
| 1748 | fs_info->endio_write_workers.idle_thresh = 64; | 1805 | fs_info->endio_write_workers.idle_thresh = 2; |
| 1749 | fs_info->endio_meta_write_workers.idle_thresh = 64; | 1806 | fs_info->endio_meta_write_workers.idle_thresh = 2; |
| 1750 | 1807 | ||
| 1751 | btrfs_start_workers(&fs_info->workers, 1); | 1808 | btrfs_start_workers(&fs_info->workers, 1); |
| 1809 | btrfs_start_workers(&fs_info->generic_worker, 1); | ||
| 1752 | btrfs_start_workers(&fs_info->submit_workers, 1); | 1810 | btrfs_start_workers(&fs_info->submit_workers, 1); |
| 1753 | btrfs_start_workers(&fs_info->delalloc_workers, 1); | 1811 | btrfs_start_workers(&fs_info->delalloc_workers, 1); |
| 1754 | btrfs_start_workers(&fs_info->fixup_workers, 1); | 1812 | btrfs_start_workers(&fs_info->fixup_workers, 1); |
| 1755 | btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); | 1813 | btrfs_start_workers(&fs_info->endio_workers, 1); |
| 1756 | btrfs_start_workers(&fs_info->endio_meta_workers, | 1814 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); |
| 1757 | fs_info->thread_pool_size); | 1815 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); |
| 1758 | btrfs_start_workers(&fs_info->endio_meta_write_workers, | 1816 | btrfs_start_workers(&fs_info->endio_write_workers, 1); |
| 1759 | fs_info->thread_pool_size); | 1817 | btrfs_start_workers(&fs_info->enospc_workers, 1); |
| 1760 | btrfs_start_workers(&fs_info->endio_write_workers, | ||
| 1761 | fs_info->thread_pool_size); | ||
| 1762 | 1818 | ||
| 1763 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 1819 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
| 1764 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 1820 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
| @@ -1918,6 +1974,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1918 | } | 1974 | } |
| 1919 | } | 1975 | } |
| 1920 | 1976 | ||
| 1977 | ret = btrfs_find_orphan_roots(tree_root); | ||
| 1978 | BUG_ON(ret); | ||
| 1979 | |||
| 1921 | if (!(sb->s_flags & MS_RDONLY)) { | 1980 | if (!(sb->s_flags & MS_RDONLY)) { |
| 1922 | ret = btrfs_recover_relocation(tree_root); | 1981 | ret = btrfs_recover_relocation(tree_root); |
| 1923 | BUG_ON(ret); | 1982 | BUG_ON(ret); |
| @@ -1961,6 +2020,7 @@ fail_chunk_root: | |||
| 1961 | free_extent_buffer(chunk_root->node); | 2020 | free_extent_buffer(chunk_root->node); |
| 1962 | free_extent_buffer(chunk_root->commit_root); | 2021 | free_extent_buffer(chunk_root->commit_root); |
| 1963 | fail_sb_buffer: | 2022 | fail_sb_buffer: |
| 2023 | btrfs_stop_workers(&fs_info->generic_worker); | ||
| 1964 | btrfs_stop_workers(&fs_info->fixup_workers); | 2024 | btrfs_stop_workers(&fs_info->fixup_workers); |
| 1965 | btrfs_stop_workers(&fs_info->delalloc_workers); | 2025 | btrfs_stop_workers(&fs_info->delalloc_workers); |
| 1966 | btrfs_stop_workers(&fs_info->workers); | 2026 | btrfs_stop_workers(&fs_info->workers); |
| @@ -1969,6 +2029,7 @@ fail_sb_buffer: | |||
| 1969 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2029 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
| 1970 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2030 | btrfs_stop_workers(&fs_info->endio_write_workers); |
| 1971 | btrfs_stop_workers(&fs_info->submit_workers); | 2031 | btrfs_stop_workers(&fs_info->submit_workers); |
| 2032 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
| 1972 | fail_iput: | 2033 | fail_iput: |
| 1973 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 2034 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
| 1974 | iput(fs_info->btree_inode); | 2035 | iput(fs_info->btree_inode); |
| @@ -1977,6 +2038,8 @@ fail_iput: | |||
| 1977 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2038 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
| 1978 | fail_bdi: | 2039 | fail_bdi: |
| 1979 | bdi_destroy(&fs_info->bdi); | 2040 | bdi_destroy(&fs_info->bdi); |
| 2041 | fail_srcu: | ||
| 2042 | cleanup_srcu_struct(&fs_info->subvol_srcu); | ||
| 1980 | fail: | 2043 | fail: |
| 1981 | kfree(extent_root); | 2044 | kfree(extent_root); |
| 1982 | kfree(tree_root); | 2045 | kfree(tree_root); |
| @@ -2236,20 +2299,29 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
| 2236 | 2299 | ||
| 2237 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | 2300 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) |
| 2238 | { | 2301 | { |
| 2239 | WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); | 2302 | spin_lock(&fs_info->fs_roots_radix_lock); |
| 2240 | radix_tree_delete(&fs_info->fs_roots_radix, | 2303 | radix_tree_delete(&fs_info->fs_roots_radix, |
| 2241 | (unsigned long)root->root_key.objectid); | 2304 | (unsigned long)root->root_key.objectid); |
| 2305 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
| 2306 | |||
| 2307 | if (btrfs_root_refs(&root->root_item) == 0) | ||
| 2308 | synchronize_srcu(&fs_info->subvol_srcu); | ||
| 2309 | |||
| 2310 | free_fs_root(root); | ||
| 2311 | return 0; | ||
| 2312 | } | ||
| 2313 | |||
| 2314 | static void free_fs_root(struct btrfs_root *root) | ||
| 2315 | { | ||
| 2316 | WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); | ||
| 2242 | if (root->anon_super.s_dev) { | 2317 | if (root->anon_super.s_dev) { |
| 2243 | down_write(&root->anon_super.s_umount); | 2318 | down_write(&root->anon_super.s_umount); |
| 2244 | kill_anon_super(&root->anon_super); | 2319 | kill_anon_super(&root->anon_super); |
| 2245 | } | 2320 | } |
| 2246 | if (root->node) | 2321 | free_extent_buffer(root->node); |
| 2247 | free_extent_buffer(root->node); | 2322 | free_extent_buffer(root->commit_root); |
| 2248 | if (root->commit_root) | ||
| 2249 | free_extent_buffer(root->commit_root); | ||
| 2250 | kfree(root->name); | 2323 | kfree(root->name); |
| 2251 | kfree(root); | 2324 | kfree(root); |
| 2252 | return 0; | ||
| 2253 | } | 2325 | } |
| 2254 | 2326 | ||
| 2255 | static int del_fs_roots(struct btrfs_fs_info *fs_info) | 2327 | static int del_fs_roots(struct btrfs_fs_info *fs_info) |
| @@ -2258,6 +2330,20 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info) | |||
| 2258 | struct btrfs_root *gang[8]; | 2330 | struct btrfs_root *gang[8]; |
| 2259 | int i; | 2331 | int i; |
| 2260 | 2332 | ||
| 2333 | while (!list_empty(&fs_info->dead_roots)) { | ||
| 2334 | gang[0] = list_entry(fs_info->dead_roots.next, | ||
| 2335 | struct btrfs_root, root_list); | ||
| 2336 | list_del(&gang[0]->root_list); | ||
| 2337 | |||
| 2338 | if (gang[0]->in_radix) { | ||
| 2339 | btrfs_free_fs_root(fs_info, gang[0]); | ||
| 2340 | } else { | ||
| 2341 | free_extent_buffer(gang[0]->node); | ||
| 2342 | free_extent_buffer(gang[0]->commit_root); | ||
| 2343 | kfree(gang[0]); | ||
| 2344 | } | ||
| 2345 | } | ||
| 2346 | |||
| 2261 | while (1) { | 2347 | while (1) { |
| 2262 | ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, | 2348 | ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, |
| 2263 | (void **)gang, 0, | 2349 | (void **)gang, 0, |
| @@ -2287,9 +2373,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) | |||
| 2287 | root_objectid = gang[ret - 1]->root_key.objectid + 1; | 2373 | root_objectid = gang[ret - 1]->root_key.objectid + 1; |
| 2288 | for (i = 0; i < ret; i++) { | 2374 | for (i = 0; i < ret; i++) { |
| 2289 | root_objectid = gang[i]->root_key.objectid; | 2375 | root_objectid = gang[i]->root_key.objectid; |
| 2290 | ret = btrfs_find_dead_roots(fs_info->tree_root, | ||
| 2291 | root_objectid); | ||
| 2292 | BUG_ON(ret); | ||
| 2293 | btrfs_orphan_cleanup(gang[i]); | 2376 | btrfs_orphan_cleanup(gang[i]); |
| 2294 | } | 2377 | } |
| 2295 | root_objectid++; | 2378 | root_objectid++; |
| @@ -2359,12 +2442,12 @@ int close_ctree(struct btrfs_root *root) | |||
| 2359 | free_extent_buffer(root->fs_info->csum_root->commit_root); | 2442 | free_extent_buffer(root->fs_info->csum_root->commit_root); |
| 2360 | 2443 | ||
| 2361 | btrfs_free_block_groups(root->fs_info); | 2444 | btrfs_free_block_groups(root->fs_info); |
| 2362 | btrfs_free_pinned_extents(root->fs_info); | ||
| 2363 | 2445 | ||
| 2364 | del_fs_roots(fs_info); | 2446 | del_fs_roots(fs_info); |
| 2365 | 2447 | ||
| 2366 | iput(fs_info->btree_inode); | 2448 | iput(fs_info->btree_inode); |
| 2367 | 2449 | ||
| 2450 | btrfs_stop_workers(&fs_info->generic_worker); | ||
| 2368 | btrfs_stop_workers(&fs_info->fixup_workers); | 2451 | btrfs_stop_workers(&fs_info->fixup_workers); |
| 2369 | btrfs_stop_workers(&fs_info->delalloc_workers); | 2452 | btrfs_stop_workers(&fs_info->delalloc_workers); |
| 2370 | btrfs_stop_workers(&fs_info->workers); | 2453 | btrfs_stop_workers(&fs_info->workers); |
| @@ -2373,11 +2456,13 @@ int close_ctree(struct btrfs_root *root) | |||
| 2373 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2456 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
| 2374 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2457 | btrfs_stop_workers(&fs_info->endio_write_workers); |
| 2375 | btrfs_stop_workers(&fs_info->submit_workers); | 2458 | btrfs_stop_workers(&fs_info->submit_workers); |
| 2459 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
| 2376 | 2460 | ||
| 2377 | btrfs_close_devices(fs_info->fs_devices); | 2461 | btrfs_close_devices(fs_info->fs_devices); |
| 2378 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2462 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
| 2379 | 2463 | ||
| 2380 | bdi_destroy(&fs_info->bdi); | 2464 | bdi_destroy(&fs_info->bdi); |
| 2465 | cleanup_srcu_struct(&fs_info->subvol_srcu); | ||
| 2381 | 2466 | ||
| 2382 | kfree(fs_info->extent_root); | 2467 | kfree(fs_info->extent_root); |
| 2383 | kfree(fs_info->tree_root); | 2468 | kfree(fs_info->tree_root); |
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 9596b40caa4e..ba5c3fd5ab8c 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c | |||
| @@ -28,7 +28,7 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, | |||
| 28 | len = BTRFS_FID_SIZE_NON_CONNECTABLE; | 28 | len = BTRFS_FID_SIZE_NON_CONNECTABLE; |
| 29 | type = FILEID_BTRFS_WITHOUT_PARENT; | 29 | type = FILEID_BTRFS_WITHOUT_PARENT; |
| 30 | 30 | ||
| 31 | fid->objectid = BTRFS_I(inode)->location.objectid; | 31 | fid->objectid = inode->i_ino; |
| 32 | fid->root_objectid = BTRFS_I(inode)->root->objectid; | 32 | fid->root_objectid = BTRFS_I(inode)->root->objectid; |
| 33 | fid->gen = inode->i_generation; | 33 | fid->gen = inode->i_generation; |
| 34 | 34 | ||
| @@ -60,34 +60,61 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, | |||
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | 62 | static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, |
| 63 | u64 root_objectid, u32 generation) | 63 | u64 root_objectid, u32 generation, |
| 64 | int check_generation) | ||
| 64 | { | 65 | { |
| 66 | struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info; | ||
| 65 | struct btrfs_root *root; | 67 | struct btrfs_root *root; |
| 68 | struct dentry *dentry; | ||
| 66 | struct inode *inode; | 69 | struct inode *inode; |
| 67 | struct btrfs_key key; | 70 | struct btrfs_key key; |
| 71 | int index; | ||
| 72 | int err = 0; | ||
| 73 | |||
| 74 | if (objectid < BTRFS_FIRST_FREE_OBJECTID) | ||
| 75 | return ERR_PTR(-ESTALE); | ||
| 68 | 76 | ||
| 69 | key.objectid = root_objectid; | 77 | key.objectid = root_objectid; |
| 70 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 78 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); |
| 71 | key.offset = (u64)-1; | 79 | key.offset = (u64)-1; |
| 72 | 80 | ||
| 73 | root = btrfs_read_fs_root_no_name(btrfs_sb(sb)->fs_info, &key); | 81 | index = srcu_read_lock(&fs_info->subvol_srcu); |
| 74 | if (IS_ERR(root)) | 82 | |
| 75 | return ERR_CAST(root); | 83 | root = btrfs_read_fs_root_no_name(fs_info, &key); |
| 84 | if (IS_ERR(root)) { | ||
| 85 | err = PTR_ERR(root); | ||
| 86 | goto fail; | ||
| 87 | } | ||
| 88 | |||
| 89 | if (btrfs_root_refs(&root->root_item) == 0) { | ||
| 90 | err = -ENOENT; | ||
| 91 | goto fail; | ||
| 92 | } | ||
| 76 | 93 | ||
| 77 | key.objectid = objectid; | 94 | key.objectid = objectid; |
| 78 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 95 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); |
| 79 | key.offset = 0; | 96 | key.offset = 0; |
| 80 | 97 | ||
| 81 | inode = btrfs_iget(sb, &key, root); | 98 | inode = btrfs_iget(sb, &key, root); |
| 82 | if (IS_ERR(inode)) | 99 | if (IS_ERR(inode)) { |
| 83 | return (void *)inode; | 100 | err = PTR_ERR(inode); |
| 101 | goto fail; | ||
| 102 | } | ||
| 103 | |||
| 104 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
| 84 | 105 | ||
| 85 | if (generation != inode->i_generation) { | 106 | if (check_generation && generation != inode->i_generation) { |
| 86 | iput(inode); | 107 | iput(inode); |
| 87 | return ERR_PTR(-ESTALE); | 108 | return ERR_PTR(-ESTALE); |
| 88 | } | 109 | } |
| 89 | 110 | ||
| 90 | return d_obtain_alias(inode); | 111 | dentry = d_obtain_alias(inode); |
| 112 | if (!IS_ERR(dentry)) | ||
| 113 | dentry->d_op = &btrfs_dentry_operations; | ||
| 114 | return dentry; | ||
| 115 | fail: | ||
| 116 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
| 117 | return ERR_PTR(err); | ||
| 91 | } | 118 | } |
| 92 | 119 | ||
| 93 | static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, | 120 | static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, |
| @@ -111,7 +138,7 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, | |||
| 111 | objectid = fid->parent_objectid; | 138 | objectid = fid->parent_objectid; |
| 112 | generation = fid->parent_gen; | 139 | generation = fid->parent_gen; |
| 113 | 140 | ||
| 114 | return btrfs_get_dentry(sb, objectid, root_objectid, generation); | 141 | return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1); |
| 115 | } | 142 | } |
| 116 | 143 | ||
| 117 | static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, | 144 | static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, |
| @@ -133,66 +160,76 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, | |||
| 133 | root_objectid = fid->root_objectid; | 160 | root_objectid = fid->root_objectid; |
| 134 | generation = fid->gen; | 161 | generation = fid->gen; |
| 135 | 162 | ||
| 136 | return btrfs_get_dentry(sb, objectid, root_objectid, generation); | 163 | return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1); |
| 137 | } | 164 | } |
| 138 | 165 | ||
| 139 | static struct dentry *btrfs_get_parent(struct dentry *child) | 166 | static struct dentry *btrfs_get_parent(struct dentry *child) |
| 140 | { | 167 | { |
| 141 | struct inode *dir = child->d_inode; | 168 | struct inode *dir = child->d_inode; |
| 169 | static struct dentry *dentry; | ||
| 142 | struct btrfs_root *root = BTRFS_I(dir)->root; | 170 | struct btrfs_root *root = BTRFS_I(dir)->root; |
| 143 | struct btrfs_key key; | ||
| 144 | struct btrfs_path *path; | 171 | struct btrfs_path *path; |
| 145 | struct extent_buffer *leaf; | 172 | struct extent_buffer *leaf; |
| 146 | int slot; | 173 | struct btrfs_root_ref *ref; |
| 147 | u64 objectid; | 174 | struct btrfs_key key; |
| 175 | struct btrfs_key found_key; | ||
| 148 | int ret; | 176 | int ret; |
| 149 | 177 | ||
| 150 | path = btrfs_alloc_path(); | 178 | path = btrfs_alloc_path(); |
| 151 | 179 | ||
| 152 | key.objectid = dir->i_ino; | 180 | if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) { |
| 153 | btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); | 181 | key.objectid = root->root_key.objectid; |
| 154 | key.offset = (u64)-1; | 182 | key.type = BTRFS_ROOT_BACKREF_KEY; |
| 183 | key.offset = (u64)-1; | ||
| 184 | root = root->fs_info->tree_root; | ||
| 185 | } else { | ||
| 186 | key.objectid = dir->i_ino; | ||
| 187 | key.type = BTRFS_INODE_REF_KEY; | ||
| 188 | key.offset = (u64)-1; | ||
| 189 | } | ||
| 155 | 190 | ||
| 156 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 191 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| 157 | if (ret < 0) { | 192 | if (ret < 0) |
| 158 | /* Error */ | 193 | goto fail; |
| 159 | btrfs_free_path(path); | 194 | |
| 160 | return ERR_PTR(ret); | 195 | BUG_ON(ret == 0); |
| 196 | if (path->slots[0] == 0) { | ||
| 197 | ret = -ENOENT; | ||
| 198 | goto fail; | ||
| 161 | } | 199 | } |
| 200 | |||
| 201 | path->slots[0]--; | ||
| 162 | leaf = path->nodes[0]; | 202 | leaf = path->nodes[0]; |
| 163 | slot = path->slots[0]; | 203 | |
| 164 | if (ret) { | 204 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
| 165 | /* btrfs_search_slot() returns the slot where we'd want to | 205 | if (found_key.objectid != key.objectid || found_key.type != key.type) { |
| 166 | insert a backref for parent inode #0xFFFFFFFFFFFFFFFF. | 206 | ret = -ENOENT; |
| 167 | The _real_ backref, telling us what the parent inode | 207 | goto fail; |
| 168 | _actually_ is, will be in the slot _before_ the one | ||
| 169 | that btrfs_search_slot() returns. */ | ||
| 170 | if (!slot) { | ||
| 171 | /* Unless there is _no_ key in the tree before... */ | ||
| 172 | btrfs_free_path(path); | ||
| 173 | return ERR_PTR(-EIO); | ||
| 174 | } | ||
| 175 | slot--; | ||
| 176 | } | 208 | } |
| 177 | 209 | ||
| 178 | btrfs_item_key_to_cpu(leaf, &key, slot); | 210 | if (found_key.type == BTRFS_ROOT_BACKREF_KEY) { |
| 211 | ref = btrfs_item_ptr(leaf, path->slots[0], | ||
| 212 | struct btrfs_root_ref); | ||
| 213 | key.objectid = btrfs_root_ref_dirid(leaf, ref); | ||
| 214 | } else { | ||
| 215 | key.objectid = found_key.offset; | ||
| 216 | } | ||
| 179 | btrfs_free_path(path); | 217 | btrfs_free_path(path); |
| 180 | 218 | ||
| 181 | if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY) | 219 | if (found_key.type == BTRFS_ROOT_BACKREF_KEY) { |
| 182 | return ERR_PTR(-EINVAL); | 220 | return btrfs_get_dentry(root->fs_info->sb, key.objectid, |
| 183 | 221 | found_key.offset, 0, 0); | |
| 184 | objectid = key.offset; | 222 | } |
| 185 | |||
| 186 | /* If we are already at the root of a subvol, return the real root */ | ||
| 187 | if (objectid == dir->i_ino) | ||
| 188 | return dget(dir->i_sb->s_root); | ||
| 189 | 223 | ||
| 190 | /* Build a new key for the inode item */ | 224 | key.type = BTRFS_INODE_ITEM_KEY; |
| 191 | key.objectid = objectid; | ||
| 192 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
| 193 | key.offset = 0; | 225 | key.offset = 0; |
| 194 | 226 | dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root)); | |
| 195 | return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root)); | 227 | if (!IS_ERR(dentry)) |
| 228 | dentry->d_op = &btrfs_dentry_operations; | ||
| 229 | return dentry; | ||
| 230 | fail: | ||
| 231 | btrfs_free_path(path); | ||
| 232 | return ERR_PTR(ret); | ||
| 196 | } | 233 | } |
| 197 | 234 | ||
| 198 | const struct export_operations btrfs_export_ops = { | 235 | const struct export_operations btrfs_export_ops = { |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 535f85ba104f..94627c4cc193 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -32,12 +32,12 @@ | |||
| 32 | #include "locking.h" | 32 | #include "locking.h" |
| 33 | #include "free-space-cache.h" | 33 | #include "free-space-cache.h" |
| 34 | 34 | ||
| 35 | static int update_reserved_extents(struct btrfs_root *root, | ||
| 36 | u64 bytenr, u64 num, int reserve); | ||
| 37 | static int update_block_group(struct btrfs_trans_handle *trans, | 35 | static int update_block_group(struct btrfs_trans_handle *trans, |
| 38 | struct btrfs_root *root, | 36 | struct btrfs_root *root, |
| 39 | u64 bytenr, u64 num_bytes, int alloc, | 37 | u64 bytenr, u64 num_bytes, int alloc, |
| 40 | int mark_free); | 38 | int mark_free); |
| 39 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, | ||
| 40 | u64 num_bytes, int reserve); | ||
| 41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
| 42 | struct btrfs_root *root, | 42 | struct btrfs_root *root, |
| 43 | u64 bytenr, u64 num_bytes, u64 parent, | 43 | u64 bytenr, u64 num_bytes, u64 parent, |
| @@ -57,10 +57,19 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
| 57 | u64 parent, u64 root_objectid, | 57 | u64 parent, u64 root_objectid, |
| 58 | u64 flags, struct btrfs_disk_key *key, | 58 | u64 flags, struct btrfs_disk_key *key, |
| 59 | int level, struct btrfs_key *ins); | 59 | int level, struct btrfs_key *ins); |
| 60 | |||
| 61 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 60 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
| 62 | struct btrfs_root *extent_root, u64 alloc_bytes, | 61 | struct btrfs_root *extent_root, u64 alloc_bytes, |
| 63 | u64 flags, int force); | 62 | u64 flags, int force); |
| 63 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | ||
| 64 | struct btrfs_root *root, | ||
| 65 | struct btrfs_path *path, | ||
| 66 | u64 bytenr, u64 num_bytes, | ||
| 67 | int is_data, int reserved, | ||
| 68 | struct extent_buffer **must_clean); | ||
| 69 | static int find_next_key(struct btrfs_path *path, int level, | ||
| 70 | struct btrfs_key *key); | ||
| 71 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | ||
| 72 | int dump_block_groups); | ||
| 64 | 73 | ||
| 65 | static noinline int | 74 | static noinline int |
| 66 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 75 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
| @@ -153,34 +162,34 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, | |||
| 153 | return ret; | 162 | return ret; |
| 154 | } | 163 | } |
| 155 | 164 | ||
| 156 | /* | 165 | static int add_excluded_extent(struct btrfs_root *root, |
| 157 | * We always set EXTENT_LOCKED for the super mirror extents so we don't | 166 | u64 start, u64 num_bytes) |
| 158 | * overwrite them, so those bits need to be unset. Also, if we are unmounting | ||
| 159 | * with pinned extents still sitting there because we had a block group caching, | ||
| 160 | * we need to clear those now, since we are done. | ||
| 161 | */ | ||
| 162 | void btrfs_free_pinned_extents(struct btrfs_fs_info *info) | ||
| 163 | { | 167 | { |
| 164 | u64 start, end, last = 0; | 168 | u64 end = start + num_bytes - 1; |
| 165 | int ret; | 169 | set_extent_bits(&root->fs_info->freed_extents[0], |
| 170 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
| 171 | set_extent_bits(&root->fs_info->freed_extents[1], | ||
| 172 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
| 173 | return 0; | ||
| 174 | } | ||
| 166 | 175 | ||
| 167 | while (1) { | 176 | static void free_excluded_extents(struct btrfs_root *root, |
| 168 | ret = find_first_extent_bit(&info->pinned_extents, last, | 177 | struct btrfs_block_group_cache *cache) |
| 169 | &start, &end, | 178 | { |
| 170 | EXTENT_LOCKED|EXTENT_DIRTY); | 179 | u64 start, end; |
| 171 | if (ret) | ||
| 172 | break; | ||
| 173 | 180 | ||
| 174 | clear_extent_bits(&info->pinned_extents, start, end, | 181 | start = cache->key.objectid; |
| 175 | EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS); | 182 | end = start + cache->key.offset - 1; |
| 176 | last = end+1; | 183 | |
| 177 | } | 184 | clear_extent_bits(&root->fs_info->freed_extents[0], |
| 185 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
| 186 | clear_extent_bits(&root->fs_info->freed_extents[1], | ||
| 187 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
| 178 | } | 188 | } |
| 179 | 189 | ||
| 180 | static int remove_sb_from_cache(struct btrfs_root *root, | 190 | static int exclude_super_stripes(struct btrfs_root *root, |
| 181 | struct btrfs_block_group_cache *cache) | 191 | struct btrfs_block_group_cache *cache) |
| 182 | { | 192 | { |
| 183 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 184 | u64 bytenr; | 193 | u64 bytenr; |
| 185 | u64 *logical; | 194 | u64 *logical; |
| 186 | int stripe_len; | 195 | int stripe_len; |
| @@ -192,17 +201,42 @@ static int remove_sb_from_cache(struct btrfs_root *root, | |||
| 192 | cache->key.objectid, bytenr, | 201 | cache->key.objectid, bytenr, |
| 193 | 0, &logical, &nr, &stripe_len); | 202 | 0, &logical, &nr, &stripe_len); |
| 194 | BUG_ON(ret); | 203 | BUG_ON(ret); |
| 204 | |||
| 195 | while (nr--) { | 205 | while (nr--) { |
| 196 | try_lock_extent(&fs_info->pinned_extents, | 206 | cache->bytes_super += stripe_len; |
| 197 | logical[nr], | 207 | ret = add_excluded_extent(root, logical[nr], |
| 198 | logical[nr] + stripe_len - 1, GFP_NOFS); | 208 | stripe_len); |
| 209 | BUG_ON(ret); | ||
| 199 | } | 210 | } |
| 211 | |||
| 200 | kfree(logical); | 212 | kfree(logical); |
| 201 | } | 213 | } |
| 202 | |||
| 203 | return 0; | 214 | return 0; |
| 204 | } | 215 | } |
| 205 | 216 | ||
| 217 | static struct btrfs_caching_control * | ||
| 218 | get_caching_control(struct btrfs_block_group_cache *cache) | ||
| 219 | { | ||
| 220 | struct btrfs_caching_control *ctl; | ||
| 221 | |||
| 222 | spin_lock(&cache->lock); | ||
| 223 | if (cache->cached != BTRFS_CACHE_STARTED) { | ||
| 224 | spin_unlock(&cache->lock); | ||
| 225 | return NULL; | ||
| 226 | } | ||
| 227 | |||
| 228 | ctl = cache->caching_ctl; | ||
| 229 | atomic_inc(&ctl->count); | ||
| 230 | spin_unlock(&cache->lock); | ||
| 231 | return ctl; | ||
| 232 | } | ||
| 233 | |||
| 234 | static void put_caching_control(struct btrfs_caching_control *ctl) | ||
| 235 | { | ||
| 236 | if (atomic_dec_and_test(&ctl->count)) | ||
| 237 | kfree(ctl); | ||
| 238 | } | ||
| 239 | |||
| 206 | /* | 240 | /* |
| 207 | * this is only called by cache_block_group, since we could have freed extents | 241 | * this is only called by cache_block_group, since we could have freed extents |
| 208 | * we need to check the pinned_extents for any extents that can't be used yet | 242 | * we need to check the pinned_extents for any extents that can't be used yet |
| @@ -215,9 +249,9 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
| 215 | int ret; | 249 | int ret; |
| 216 | 250 | ||
| 217 | while (start < end) { | 251 | while (start < end) { |
| 218 | ret = find_first_extent_bit(&info->pinned_extents, start, | 252 | ret = find_first_extent_bit(info->pinned_extents, start, |
| 219 | &extent_start, &extent_end, | 253 | &extent_start, &extent_end, |
| 220 | EXTENT_DIRTY|EXTENT_LOCKED); | 254 | EXTENT_DIRTY | EXTENT_UPTODATE); |
| 221 | if (ret) | 255 | if (ret) |
| 222 | break; | 256 | break; |
| 223 | 257 | ||
| @@ -249,22 +283,27 @@ static int caching_kthread(void *data) | |||
| 249 | { | 283 | { |
| 250 | struct btrfs_block_group_cache *block_group = data; | 284 | struct btrfs_block_group_cache *block_group = data; |
| 251 | struct btrfs_fs_info *fs_info = block_group->fs_info; | 285 | struct btrfs_fs_info *fs_info = block_group->fs_info; |
| 252 | u64 last = 0; | 286 | struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; |
| 287 | struct btrfs_root *extent_root = fs_info->extent_root; | ||
| 253 | struct btrfs_path *path; | 288 | struct btrfs_path *path; |
| 254 | int ret = 0; | ||
| 255 | struct btrfs_key key; | ||
| 256 | struct extent_buffer *leaf; | 289 | struct extent_buffer *leaf; |
| 257 | int slot; | 290 | struct btrfs_key key; |
| 258 | u64 total_found = 0; | 291 | u64 total_found = 0; |
| 259 | 292 | u64 last = 0; | |
| 260 | BUG_ON(!fs_info); | 293 | u32 nritems; |
| 294 | int ret = 0; | ||
| 261 | 295 | ||
| 262 | path = btrfs_alloc_path(); | 296 | path = btrfs_alloc_path(); |
| 263 | if (!path) | 297 | if (!path) |
| 264 | return -ENOMEM; | 298 | return -ENOMEM; |
| 265 | 299 | ||
| 266 | atomic_inc(&block_group->space_info->caching_threads); | 300 | exclude_super_stripes(extent_root, block_group); |
| 301 | spin_lock(&block_group->space_info->lock); | ||
| 302 | block_group->space_info->bytes_super += block_group->bytes_super; | ||
| 303 | spin_unlock(&block_group->space_info->lock); | ||
| 304 | |||
| 267 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 305 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
| 306 | |||
| 268 | /* | 307 | /* |
| 269 | * We don't want to deadlock with somebody trying to allocate a new | 308 | * We don't want to deadlock with somebody trying to allocate a new |
| 270 | * extent for the extent root while also trying to search the extent | 309 | * extent for the extent root while also trying to search the extent |
| @@ -277,74 +316,64 @@ static int caching_kthread(void *data) | |||
| 277 | 316 | ||
| 278 | key.objectid = last; | 317 | key.objectid = last; |
| 279 | key.offset = 0; | 318 | key.offset = 0; |
| 280 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | 319 | key.type = BTRFS_EXTENT_ITEM_KEY; |
| 281 | again: | 320 | again: |
| 321 | mutex_lock(&caching_ctl->mutex); | ||
| 282 | /* need to make sure the commit_root doesn't disappear */ | 322 | /* need to make sure the commit_root doesn't disappear */ |
| 283 | down_read(&fs_info->extent_commit_sem); | 323 | down_read(&fs_info->extent_commit_sem); |
| 284 | 324 | ||
| 285 | ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); | 325 | ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); |
| 286 | if (ret < 0) | 326 | if (ret < 0) |
| 287 | goto err; | 327 | goto err; |
| 288 | 328 | ||
| 329 | leaf = path->nodes[0]; | ||
| 330 | nritems = btrfs_header_nritems(leaf); | ||
| 331 | |||
| 289 | while (1) { | 332 | while (1) { |
| 290 | smp_mb(); | 333 | smp_mb(); |
| 291 | if (block_group->fs_info->closing > 1) { | 334 | if (fs_info->closing > 1) { |
| 292 | last = (u64)-1; | 335 | last = (u64)-1; |
| 293 | break; | 336 | break; |
| 294 | } | 337 | } |
| 295 | 338 | ||
| 296 | leaf = path->nodes[0]; | 339 | if (path->slots[0] < nritems) { |
| 297 | slot = path->slots[0]; | 340 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
| 298 | if (slot >= btrfs_header_nritems(leaf)) { | 341 | } else { |
| 299 | ret = btrfs_next_leaf(fs_info->extent_root, path); | 342 | ret = find_next_key(path, 0, &key); |
| 300 | if (ret < 0) | 343 | if (ret) |
| 301 | goto err; | ||
| 302 | else if (ret) | ||
| 303 | break; | 344 | break; |
| 304 | 345 | ||
| 305 | if (need_resched() || | 346 | caching_ctl->progress = last; |
| 306 | btrfs_transaction_in_commit(fs_info)) { | 347 | btrfs_release_path(extent_root, path); |
| 307 | leaf = path->nodes[0]; | 348 | up_read(&fs_info->extent_commit_sem); |
| 308 | 349 | mutex_unlock(&caching_ctl->mutex); | |
| 309 | /* this shouldn't happen, but if the | 350 | if (btrfs_transaction_in_commit(fs_info)) |
| 310 | * leaf is empty just move on. | ||
| 311 | */ | ||
| 312 | if (btrfs_header_nritems(leaf) == 0) | ||
| 313 | break; | ||
| 314 | /* | ||
| 315 | * we need to copy the key out so that | ||
| 316 | * we are sure the next search advances | ||
| 317 | * us forward in the btree. | ||
| 318 | */ | ||
| 319 | btrfs_item_key_to_cpu(leaf, &key, 0); | ||
| 320 | btrfs_release_path(fs_info->extent_root, path); | ||
| 321 | up_read(&fs_info->extent_commit_sem); | ||
| 322 | schedule_timeout(1); | 351 | schedule_timeout(1); |
| 323 | goto again; | 352 | else |
| 324 | } | 353 | cond_resched(); |
| 354 | goto again; | ||
| 355 | } | ||
| 325 | 356 | ||
| 357 | if (key.objectid < block_group->key.objectid) { | ||
| 358 | path->slots[0]++; | ||
| 326 | continue; | 359 | continue; |
| 327 | } | 360 | } |
| 328 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
| 329 | if (key.objectid < block_group->key.objectid) | ||
| 330 | goto next; | ||
| 331 | 361 | ||
| 332 | if (key.objectid >= block_group->key.objectid + | 362 | if (key.objectid >= block_group->key.objectid + |
| 333 | block_group->key.offset) | 363 | block_group->key.offset) |
| 334 | break; | 364 | break; |
| 335 | 365 | ||
| 336 | if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { | 366 | if (key.type == BTRFS_EXTENT_ITEM_KEY) { |
| 337 | total_found += add_new_free_space(block_group, | 367 | total_found += add_new_free_space(block_group, |
| 338 | fs_info, last, | 368 | fs_info, last, |
| 339 | key.objectid); | 369 | key.objectid); |
| 340 | last = key.objectid + key.offset; | 370 | last = key.objectid + key.offset; |
| 341 | } | ||
| 342 | 371 | ||
| 343 | if (total_found > (1024 * 1024 * 2)) { | 372 | if (total_found > (1024 * 1024 * 2)) { |
| 344 | total_found = 0; | 373 | total_found = 0; |
| 345 | wake_up(&block_group->caching_q); | 374 | wake_up(&caching_ctl->wait); |
| 375 | } | ||
| 346 | } | 376 | } |
| 347 | next: | ||
| 348 | path->slots[0]++; | 377 | path->slots[0]++; |
| 349 | } | 378 | } |
| 350 | ret = 0; | 379 | ret = 0; |
| @@ -352,33 +381,65 @@ next: | |||
| 352 | total_found += add_new_free_space(block_group, fs_info, last, | 381 | total_found += add_new_free_space(block_group, fs_info, last, |
| 353 | block_group->key.objectid + | 382 | block_group->key.objectid + |
| 354 | block_group->key.offset); | 383 | block_group->key.offset); |
| 384 | caching_ctl->progress = (u64)-1; | ||
| 355 | 385 | ||
| 356 | spin_lock(&block_group->lock); | 386 | spin_lock(&block_group->lock); |
| 387 | block_group->caching_ctl = NULL; | ||
| 357 | block_group->cached = BTRFS_CACHE_FINISHED; | 388 | block_group->cached = BTRFS_CACHE_FINISHED; |
| 358 | spin_unlock(&block_group->lock); | 389 | spin_unlock(&block_group->lock); |
| 359 | 390 | ||
| 360 | err: | 391 | err: |
| 361 | btrfs_free_path(path); | 392 | btrfs_free_path(path); |
| 362 | up_read(&fs_info->extent_commit_sem); | 393 | up_read(&fs_info->extent_commit_sem); |
| 363 | atomic_dec(&block_group->space_info->caching_threads); | ||
| 364 | wake_up(&block_group->caching_q); | ||
| 365 | 394 | ||
| 395 | free_excluded_extents(extent_root, block_group); | ||
| 396 | |||
| 397 | mutex_unlock(&caching_ctl->mutex); | ||
| 398 | wake_up(&caching_ctl->wait); | ||
| 399 | |||
| 400 | put_caching_control(caching_ctl); | ||
| 401 | atomic_dec(&block_group->space_info->caching_threads); | ||
| 366 | return 0; | 402 | return 0; |
| 367 | } | 403 | } |
| 368 | 404 | ||
| 369 | static int cache_block_group(struct btrfs_block_group_cache *cache) | 405 | static int cache_block_group(struct btrfs_block_group_cache *cache) |
| 370 | { | 406 | { |
| 407 | struct btrfs_fs_info *fs_info = cache->fs_info; | ||
| 408 | struct btrfs_caching_control *caching_ctl; | ||
| 371 | struct task_struct *tsk; | 409 | struct task_struct *tsk; |
| 372 | int ret = 0; | 410 | int ret = 0; |
| 373 | 411 | ||
| 412 | smp_mb(); | ||
| 413 | if (cache->cached != BTRFS_CACHE_NO) | ||
| 414 | return 0; | ||
| 415 | |||
| 416 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); | ||
| 417 | BUG_ON(!caching_ctl); | ||
| 418 | |||
| 419 | INIT_LIST_HEAD(&caching_ctl->list); | ||
| 420 | mutex_init(&caching_ctl->mutex); | ||
| 421 | init_waitqueue_head(&caching_ctl->wait); | ||
| 422 | caching_ctl->block_group = cache; | ||
| 423 | caching_ctl->progress = cache->key.objectid; | ||
| 424 | /* one for caching kthread, one for caching block group list */ | ||
| 425 | atomic_set(&caching_ctl->count, 2); | ||
| 426 | |||
| 374 | spin_lock(&cache->lock); | 427 | spin_lock(&cache->lock); |
| 375 | if (cache->cached != BTRFS_CACHE_NO) { | 428 | if (cache->cached != BTRFS_CACHE_NO) { |
| 376 | spin_unlock(&cache->lock); | 429 | spin_unlock(&cache->lock); |
| 377 | return ret; | 430 | kfree(caching_ctl); |
| 431 | return 0; | ||
| 378 | } | 432 | } |
| 433 | cache->caching_ctl = caching_ctl; | ||
| 379 | cache->cached = BTRFS_CACHE_STARTED; | 434 | cache->cached = BTRFS_CACHE_STARTED; |
| 380 | spin_unlock(&cache->lock); | 435 | spin_unlock(&cache->lock); |
| 381 | 436 | ||
| 437 | down_write(&fs_info->extent_commit_sem); | ||
| 438 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); | ||
| 439 | up_write(&fs_info->extent_commit_sem); | ||
| 440 | |||
| 441 | atomic_inc(&cache->space_info->caching_threads); | ||
| 442 | |||
| 382 | tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", | 443 | tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", |
| 383 | cache->key.objectid); | 444 | cache->key.objectid); |
| 384 | if (IS_ERR(tsk)) { | 445 | if (IS_ERR(tsk)) { |
| @@ -1507,23 +1568,23 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
| 1507 | return ret; | 1568 | return ret; |
| 1508 | } | 1569 | } |
| 1509 | 1570 | ||
| 1510 | #ifdef BIO_RW_DISCARD | ||
| 1511 | static void btrfs_issue_discard(struct block_device *bdev, | 1571 | static void btrfs_issue_discard(struct block_device *bdev, |
| 1512 | u64 start, u64 len) | 1572 | u64 start, u64 len) |
| 1513 | { | 1573 | { |
| 1514 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, | 1574 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, |
| 1515 | DISCARD_FL_BARRIER); | 1575 | DISCARD_FL_BARRIER); |
| 1516 | } | 1576 | } |
| 1517 | #endif | ||
| 1518 | 1577 | ||
| 1519 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | 1578 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, |
| 1520 | u64 num_bytes) | 1579 | u64 num_bytes) |
| 1521 | { | 1580 | { |
| 1522 | #ifdef BIO_RW_DISCARD | ||
| 1523 | int ret; | 1581 | int ret; |
| 1524 | u64 map_length = num_bytes; | 1582 | u64 map_length = num_bytes; |
| 1525 | struct btrfs_multi_bio *multi = NULL; | 1583 | struct btrfs_multi_bio *multi = NULL; |
| 1526 | 1584 | ||
| 1585 | if (!btrfs_test_opt(root, DISCARD)) | ||
| 1586 | return 0; | ||
| 1587 | |||
| 1527 | /* Tell the block device(s) that the sectors can be discarded */ | 1588 | /* Tell the block device(s) that the sectors can be discarded */ |
| 1528 | ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, | 1589 | ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, |
| 1529 | bytenr, &map_length, &multi, 0); | 1590 | bytenr, &map_length, &multi, 0); |
| @@ -1543,9 +1604,6 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
| 1543 | } | 1604 | } |
| 1544 | 1605 | ||
| 1545 | return ret; | 1606 | return ret; |
| 1546 | #else | ||
| 1547 | return 0; | ||
| 1548 | #endif | ||
| 1549 | } | 1607 | } |
| 1550 | 1608 | ||
| 1551 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 1609 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
| @@ -1657,7 +1715,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, | |||
| 1657 | parent, ref_root, flags, | 1715 | parent, ref_root, flags, |
| 1658 | ref->objectid, ref->offset, | 1716 | ref->objectid, ref->offset, |
| 1659 | &ins, node->ref_mod); | 1717 | &ins, node->ref_mod); |
| 1660 | update_reserved_extents(root, ins.objectid, ins.offset, 0); | ||
| 1661 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { | 1718 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { |
| 1662 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, | 1719 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, |
| 1663 | node->num_bytes, parent, | 1720 | node->num_bytes, parent, |
| @@ -1783,7 +1840,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
| 1783 | extent_op->flags_to_set, | 1840 | extent_op->flags_to_set, |
| 1784 | &extent_op->key, | 1841 | &extent_op->key, |
| 1785 | ref->level, &ins); | 1842 | ref->level, &ins); |
| 1786 | update_reserved_extents(root, ins.objectid, ins.offset, 0); | ||
| 1787 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { | 1843 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { |
| 1788 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, | 1844 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, |
| 1789 | node->num_bytes, parent, ref_root, | 1845 | node->num_bytes, parent, ref_root, |
| @@ -1818,16 +1874,32 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | |||
| 1818 | BUG_ON(extent_op); | 1874 | BUG_ON(extent_op); |
| 1819 | head = btrfs_delayed_node_to_head(node); | 1875 | head = btrfs_delayed_node_to_head(node); |
| 1820 | if (insert_reserved) { | 1876 | if (insert_reserved) { |
| 1877 | int mark_free = 0; | ||
| 1878 | struct extent_buffer *must_clean = NULL; | ||
| 1879 | |||
| 1880 | ret = pin_down_bytes(trans, root, NULL, | ||
| 1881 | node->bytenr, node->num_bytes, | ||
| 1882 | head->is_data, 1, &must_clean); | ||
| 1883 | if (ret > 0) | ||
| 1884 | mark_free = 1; | ||
| 1885 | |||
| 1886 | if (must_clean) { | ||
| 1887 | clean_tree_block(NULL, root, must_clean); | ||
| 1888 | btrfs_tree_unlock(must_clean); | ||
| 1889 | free_extent_buffer(must_clean); | ||
| 1890 | } | ||
| 1821 | if (head->is_data) { | 1891 | if (head->is_data) { |
| 1822 | ret = btrfs_del_csums(trans, root, | 1892 | ret = btrfs_del_csums(trans, root, |
| 1823 | node->bytenr, | 1893 | node->bytenr, |
| 1824 | node->num_bytes); | 1894 | node->num_bytes); |
| 1825 | BUG_ON(ret); | 1895 | BUG_ON(ret); |
| 1826 | } | 1896 | } |
| 1827 | btrfs_update_pinned_extents(root, node->bytenr, | 1897 | if (mark_free) { |
| 1828 | node->num_bytes, 1); | 1898 | ret = btrfs_free_reserved_extent(root, |
| 1829 | update_reserved_extents(root, node->bytenr, | 1899 | node->bytenr, |
| 1830 | node->num_bytes, 0); | 1900 | node->num_bytes); |
| 1901 | BUG_ON(ret); | ||
| 1902 | } | ||
| 1831 | } | 1903 | } |
| 1832 | mutex_unlock(&head->mutex); | 1904 | mutex_unlock(&head->mutex); |
| 1833 | return 0; | 1905 | return 0; |
| @@ -2692,60 +2764,448 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) | |||
| 2692 | alloc_target); | 2764 | alloc_target); |
| 2693 | } | 2765 | } |
| 2694 | 2766 | ||
| 2767 | static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) | ||
| 2768 | { | ||
| 2769 | u64 num_bytes; | ||
| 2770 | int level; | ||
| 2771 | |||
| 2772 | level = BTRFS_MAX_LEVEL - 2; | ||
| 2773 | /* | ||
| 2774 | * NOTE: these calculations are absolutely the worst possible case. | ||
| 2775 | * This assumes that _every_ item we insert will require a new leaf, and | ||
| 2776 | * that the tree has grown to its maximum level size. | ||
| 2777 | */ | ||
| 2778 | |||
| 2779 | /* | ||
| 2780 | * for every item we insert we could insert both an extent item and a | ||
| 2781 | * extent ref item. Then for ever item we insert, we will need to cow | ||
| 2782 | * both the original leaf, plus the leaf to the left and right of it. | ||
| 2783 | * | ||
| 2784 | * Unless we are talking about the extent root, then we just want the | ||
| 2785 | * number of items * 2, since we just need the extent item plus its ref. | ||
| 2786 | */ | ||
| 2787 | if (root == root->fs_info->extent_root) | ||
| 2788 | num_bytes = num_items * 2; | ||
| 2789 | else | ||
| 2790 | num_bytes = (num_items + (2 * num_items)) * 3; | ||
| 2791 | |||
| 2792 | /* | ||
| 2793 | * num_bytes is total number of leaves we could need times the leaf | ||
| 2794 | * size, and then for every leaf we could end up cow'ing 2 nodes per | ||
| 2795 | * level, down to the leaf level. | ||
| 2796 | */ | ||
| 2797 | num_bytes = (num_bytes * root->leafsize) + | ||
| 2798 | (num_bytes * (level * 2)) * root->nodesize; | ||
| 2799 | |||
| 2800 | return num_bytes; | ||
| 2801 | } | ||
| 2802 | |||
| 2695 | /* | 2803 | /* |
| 2696 | * for now this just makes sure we have at least 5% of our metadata space free | 2804 | * Unreserve metadata space for delalloc. If we have less reserved credits than |
| 2697 | * for use. | 2805 | * we have extents, this function does nothing. |
| 2698 | */ | 2806 | */ |
| 2699 | int btrfs_check_metadata_free_space(struct btrfs_root *root) | 2807 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, |
| 2808 | struct inode *inode, int num_items) | ||
| 2700 | { | 2809 | { |
| 2701 | struct btrfs_fs_info *info = root->fs_info; | 2810 | struct btrfs_fs_info *info = root->fs_info; |
| 2702 | struct btrfs_space_info *meta_sinfo; | 2811 | struct btrfs_space_info *meta_sinfo; |
| 2703 | u64 alloc_target, thresh; | 2812 | u64 num_bytes; |
| 2704 | int committed = 0, ret; | 2813 | u64 alloc_target; |
| 2814 | bool bug = false; | ||
| 2705 | 2815 | ||
| 2706 | /* get the space info for where the metadata will live */ | 2816 | /* get the space info for where the metadata will live */ |
| 2707 | alloc_target = btrfs_get_alloc_profile(root, 0); | 2817 | alloc_target = btrfs_get_alloc_profile(root, 0); |
| 2708 | meta_sinfo = __find_space_info(info, alloc_target); | 2818 | meta_sinfo = __find_space_info(info, alloc_target); |
| 2709 | 2819 | ||
| 2710 | again: | 2820 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, |
| 2821 | num_items); | ||
| 2822 | |||
| 2711 | spin_lock(&meta_sinfo->lock); | 2823 | spin_lock(&meta_sinfo->lock); |
| 2712 | if (!meta_sinfo->full) | 2824 | spin_lock(&BTRFS_I(inode)->accounting_lock); |
| 2713 | thresh = meta_sinfo->total_bytes * 80; | 2825 | if (BTRFS_I(inode)->reserved_extents <= |
| 2714 | else | 2826 | BTRFS_I(inode)->outstanding_extents) { |
| 2715 | thresh = meta_sinfo->total_bytes * 95; | 2827 | spin_unlock(&BTRFS_I(inode)->accounting_lock); |
| 2828 | spin_unlock(&meta_sinfo->lock); | ||
| 2829 | return 0; | ||
| 2830 | } | ||
| 2831 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 2832 | |||
| 2833 | BTRFS_I(inode)->reserved_extents--; | ||
| 2834 | BUG_ON(BTRFS_I(inode)->reserved_extents < 0); | ||
| 2835 | |||
| 2836 | if (meta_sinfo->bytes_delalloc < num_bytes) { | ||
| 2837 | bug = true; | ||
| 2838 | meta_sinfo->bytes_delalloc = 0; | ||
| 2839 | } else { | ||
| 2840 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
| 2841 | } | ||
| 2842 | spin_unlock(&meta_sinfo->lock); | ||
| 2843 | |||
| 2844 | BUG_ON(bug); | ||
| 2845 | |||
| 2846 | return 0; | ||
| 2847 | } | ||
| 2716 | 2848 | ||
| 2849 | static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) | ||
| 2850 | { | ||
| 2851 | u64 thresh; | ||
| 2852 | |||
| 2853 | thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
| 2854 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
| 2855 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
| 2856 | meta_sinfo->bytes_may_use; | ||
| 2857 | |||
| 2858 | thresh = meta_sinfo->total_bytes - thresh; | ||
| 2859 | thresh *= 80; | ||
| 2717 | do_div(thresh, 100); | 2860 | do_div(thresh, 100); |
| 2861 | if (thresh <= meta_sinfo->bytes_delalloc) | ||
| 2862 | meta_sinfo->force_delalloc = 1; | ||
| 2863 | else | ||
| 2864 | meta_sinfo->force_delalloc = 0; | ||
| 2865 | } | ||
| 2718 | 2866 | ||
| 2719 | if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | 2867 | struct async_flush { |
| 2720 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { | 2868 | struct btrfs_root *root; |
| 2721 | struct btrfs_trans_handle *trans; | 2869 | struct btrfs_space_info *info; |
| 2722 | if (!meta_sinfo->full) { | 2870 | struct btrfs_work work; |
| 2723 | meta_sinfo->force_alloc = 1; | 2871 | }; |
| 2724 | spin_unlock(&meta_sinfo->lock); | ||
| 2725 | 2872 | ||
| 2726 | trans = btrfs_start_transaction(root, 1); | 2873 | static noinline void flush_delalloc_async(struct btrfs_work *work) |
| 2727 | if (!trans) | 2874 | { |
| 2728 | return -ENOMEM; | 2875 | struct async_flush *async; |
| 2876 | struct btrfs_root *root; | ||
| 2877 | struct btrfs_space_info *info; | ||
| 2729 | 2878 | ||
| 2730 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 2879 | async = container_of(work, struct async_flush, work); |
| 2731 | 2 * 1024 * 1024, alloc_target, 0); | 2880 | root = async->root; |
| 2732 | btrfs_end_transaction(trans, root); | 2881 | info = async->info; |
| 2882 | |||
| 2883 | btrfs_start_delalloc_inodes(root); | ||
| 2884 | wake_up(&info->flush_wait); | ||
| 2885 | btrfs_wait_ordered_extents(root, 0); | ||
| 2886 | |||
| 2887 | spin_lock(&info->lock); | ||
| 2888 | info->flushing = 0; | ||
| 2889 | spin_unlock(&info->lock); | ||
| 2890 | wake_up(&info->flush_wait); | ||
| 2891 | |||
| 2892 | kfree(async); | ||
| 2893 | } | ||
| 2894 | |||
| 2895 | static void wait_on_flush(struct btrfs_space_info *info) | ||
| 2896 | { | ||
| 2897 | DEFINE_WAIT(wait); | ||
| 2898 | u64 used; | ||
| 2899 | |||
| 2900 | while (1) { | ||
| 2901 | prepare_to_wait(&info->flush_wait, &wait, | ||
| 2902 | TASK_UNINTERRUPTIBLE); | ||
| 2903 | spin_lock(&info->lock); | ||
| 2904 | if (!info->flushing) { | ||
| 2905 | spin_unlock(&info->lock); | ||
| 2906 | break; | ||
| 2907 | } | ||
| 2908 | |||
| 2909 | used = info->bytes_used + info->bytes_reserved + | ||
| 2910 | info->bytes_pinned + info->bytes_readonly + | ||
| 2911 | info->bytes_super + info->bytes_root + | ||
| 2912 | info->bytes_may_use + info->bytes_delalloc; | ||
| 2913 | if (used < info->total_bytes) { | ||
| 2914 | spin_unlock(&info->lock); | ||
| 2915 | break; | ||
| 2916 | } | ||
| 2917 | spin_unlock(&info->lock); | ||
| 2918 | schedule(); | ||
| 2919 | } | ||
| 2920 | finish_wait(&info->flush_wait, &wait); | ||
| 2921 | } | ||
| 2922 | |||
| 2923 | static void flush_delalloc(struct btrfs_root *root, | ||
| 2924 | struct btrfs_space_info *info) | ||
| 2925 | { | ||
| 2926 | struct async_flush *async; | ||
| 2927 | bool wait = false; | ||
| 2928 | |||
| 2929 | spin_lock(&info->lock); | ||
| 2930 | |||
| 2931 | if (!info->flushing) { | ||
| 2932 | info->flushing = 1; | ||
| 2933 | init_waitqueue_head(&info->flush_wait); | ||
| 2934 | } else { | ||
| 2935 | wait = true; | ||
| 2936 | } | ||
| 2937 | |||
| 2938 | spin_unlock(&info->lock); | ||
| 2939 | |||
| 2940 | if (wait) { | ||
| 2941 | wait_on_flush(info); | ||
| 2942 | return; | ||
| 2943 | } | ||
| 2944 | |||
| 2945 | async = kzalloc(sizeof(*async), GFP_NOFS); | ||
| 2946 | if (!async) | ||
| 2947 | goto flush; | ||
| 2948 | |||
| 2949 | async->root = root; | ||
| 2950 | async->info = info; | ||
| 2951 | async->work.func = flush_delalloc_async; | ||
| 2952 | |||
| 2953 | btrfs_queue_worker(&root->fs_info->enospc_workers, | ||
| 2954 | &async->work); | ||
| 2955 | wait_on_flush(info); | ||
| 2956 | return; | ||
| 2957 | |||
| 2958 | flush: | ||
| 2959 | btrfs_start_delalloc_inodes(root); | ||
| 2960 | btrfs_wait_ordered_extents(root, 0); | ||
| 2961 | |||
| 2962 | spin_lock(&info->lock); | ||
| 2963 | info->flushing = 0; | ||
| 2964 | spin_unlock(&info->lock); | ||
| 2965 | wake_up(&info->flush_wait); | ||
| 2966 | } | ||
| 2967 | |||
| 2968 | static int maybe_allocate_chunk(struct btrfs_root *root, | ||
| 2969 | struct btrfs_space_info *info) | ||
| 2970 | { | ||
| 2971 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; | ||
| 2972 | struct btrfs_trans_handle *trans; | ||
| 2973 | bool wait = false; | ||
| 2974 | int ret = 0; | ||
| 2975 | u64 min_metadata; | ||
| 2976 | u64 free_space; | ||
| 2977 | |||
| 2978 | free_space = btrfs_super_total_bytes(disk_super); | ||
| 2979 | /* | ||
| 2980 | * we allow the metadata to grow to a max of either 10gb or 5% of the | ||
| 2981 | * space in the volume. | ||
| 2982 | */ | ||
| 2983 | min_metadata = min((u64)10 * 1024 * 1024 * 1024, | ||
| 2984 | div64_u64(free_space * 5, 100)); | ||
| 2985 | if (info->total_bytes >= min_metadata) { | ||
| 2986 | spin_unlock(&info->lock); | ||
| 2987 | return 0; | ||
| 2988 | } | ||
| 2989 | |||
| 2990 | if (info->full) { | ||
| 2991 | spin_unlock(&info->lock); | ||
| 2992 | return 0; | ||
| 2993 | } | ||
| 2994 | |||
| 2995 | if (!info->allocating_chunk) { | ||
| 2996 | info->force_alloc = 1; | ||
| 2997 | info->allocating_chunk = 1; | ||
| 2998 | init_waitqueue_head(&info->allocate_wait); | ||
| 2999 | } else { | ||
| 3000 | wait = true; | ||
| 3001 | } | ||
| 3002 | |||
| 3003 | spin_unlock(&info->lock); | ||
| 3004 | |||
| 3005 | if (wait) { | ||
| 3006 | wait_event(info->allocate_wait, | ||
| 3007 | !info->allocating_chunk); | ||
| 3008 | return 1; | ||
| 3009 | } | ||
| 3010 | |||
| 3011 | trans = btrfs_start_transaction(root, 1); | ||
| 3012 | if (!trans) { | ||
| 3013 | ret = -ENOMEM; | ||
| 3014 | goto out; | ||
| 3015 | } | ||
| 3016 | |||
| 3017 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 3018 | 4096 + 2 * 1024 * 1024, | ||
| 3019 | info->flags, 0); | ||
| 3020 | btrfs_end_transaction(trans, root); | ||
| 3021 | if (ret) | ||
| 3022 | goto out; | ||
| 3023 | out: | ||
| 3024 | spin_lock(&info->lock); | ||
| 3025 | info->allocating_chunk = 0; | ||
| 3026 | spin_unlock(&info->lock); | ||
| 3027 | wake_up(&info->allocate_wait); | ||
| 3028 | |||
| 3029 | if (ret) | ||
| 3030 | return 0; | ||
| 3031 | return 1; | ||
| 3032 | } | ||
| 3033 | |||
| 3034 | /* | ||
| 3035 | * Reserve metadata space for delalloc. | ||
| 3036 | */ | ||
| 3037 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
| 3038 | struct inode *inode, int num_items) | ||
| 3039 | { | ||
| 3040 | struct btrfs_fs_info *info = root->fs_info; | ||
| 3041 | struct btrfs_space_info *meta_sinfo; | ||
| 3042 | u64 num_bytes; | ||
| 3043 | u64 used; | ||
| 3044 | u64 alloc_target; | ||
| 3045 | int flushed = 0; | ||
| 3046 | int force_delalloc; | ||
| 3047 | |||
| 3048 | /* get the space info for where the metadata will live */ | ||
| 3049 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 3050 | meta_sinfo = __find_space_info(info, alloc_target); | ||
| 3051 | |||
| 3052 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | ||
| 3053 | num_items); | ||
| 3054 | again: | ||
| 3055 | spin_lock(&meta_sinfo->lock); | ||
| 3056 | |||
| 3057 | force_delalloc = meta_sinfo->force_delalloc; | ||
| 3058 | |||
| 3059 | if (unlikely(!meta_sinfo->bytes_root)) | ||
| 3060 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
| 3061 | |||
| 3062 | if (!flushed) | ||
| 3063 | meta_sinfo->bytes_delalloc += num_bytes; | ||
| 3064 | |||
| 3065 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
| 3066 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
| 3067 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
| 3068 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
| 3069 | |||
| 3070 | if (used > meta_sinfo->total_bytes) { | ||
| 3071 | flushed++; | ||
| 3072 | |||
| 3073 | if (flushed == 1) { | ||
| 3074 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
| 3075 | goto again; | ||
| 3076 | flushed++; | ||
| 3077 | } else { | ||
| 3078 | spin_unlock(&meta_sinfo->lock); | ||
| 3079 | } | ||
| 3080 | |||
| 3081 | if (flushed == 2) { | ||
| 3082 | filemap_flush(inode->i_mapping); | ||
| 3083 | goto again; | ||
| 3084 | } else if (flushed == 3) { | ||
| 3085 | flush_delalloc(root, meta_sinfo); | ||
| 2733 | goto again; | 3086 | goto again; |
| 2734 | } | 3087 | } |
| 3088 | spin_lock(&meta_sinfo->lock); | ||
| 3089 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
| 2735 | spin_unlock(&meta_sinfo->lock); | 3090 | spin_unlock(&meta_sinfo->lock); |
| 3091 | printk(KERN_ERR "enospc, has %d, reserved %d\n", | ||
| 3092 | BTRFS_I(inode)->outstanding_extents, | ||
| 3093 | BTRFS_I(inode)->reserved_extents); | ||
| 3094 | dump_space_info(meta_sinfo, 0, 0); | ||
| 3095 | return -ENOSPC; | ||
| 3096 | } | ||
| 2736 | 3097 | ||
| 2737 | if (!committed) { | 3098 | BTRFS_I(inode)->reserved_extents++; |
| 2738 | committed = 1; | 3099 | check_force_delalloc(meta_sinfo); |
| 2739 | trans = btrfs_join_transaction(root, 1); | 3100 | spin_unlock(&meta_sinfo->lock); |
| 2740 | if (!trans) | 3101 | |
| 2741 | return -ENOMEM; | 3102 | if (!flushed && force_delalloc) |
| 2742 | ret = btrfs_commit_transaction(trans, root); | 3103 | filemap_flush(inode->i_mapping); |
| 2743 | if (ret) | 3104 | |
| 2744 | return ret; | 3105 | return 0; |
| 3106 | } | ||
| 3107 | |||
| 3108 | /* | ||
| 3109 | * unreserve num_items number of items worth of metadata space. This needs to | ||
| 3110 | * be paired with btrfs_reserve_metadata_space. | ||
| 3111 | * | ||
| 3112 | * NOTE: if you have the option, run this _AFTER_ you do a | ||
| 3113 | * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref | ||
| 3114 | * oprations which will result in more used metadata, so we want to make sure we | ||
| 3115 | * can do that without issue. | ||
| 3116 | */ | ||
| 3117 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) | ||
| 3118 | { | ||
| 3119 | struct btrfs_fs_info *info = root->fs_info; | ||
| 3120 | struct btrfs_space_info *meta_sinfo; | ||
| 3121 | u64 num_bytes; | ||
| 3122 | u64 alloc_target; | ||
| 3123 | bool bug = false; | ||
| 3124 | |||
| 3125 | /* get the space info for where the metadata will live */ | ||
| 3126 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 3127 | meta_sinfo = __find_space_info(info, alloc_target); | ||
| 3128 | |||
| 3129 | num_bytes = calculate_bytes_needed(root, num_items); | ||
| 3130 | |||
| 3131 | spin_lock(&meta_sinfo->lock); | ||
| 3132 | if (meta_sinfo->bytes_may_use < num_bytes) { | ||
| 3133 | bug = true; | ||
| 3134 | meta_sinfo->bytes_may_use = 0; | ||
| 3135 | } else { | ||
| 3136 | meta_sinfo->bytes_may_use -= num_bytes; | ||
| 3137 | } | ||
| 3138 | spin_unlock(&meta_sinfo->lock); | ||
| 3139 | |||
| 3140 | BUG_ON(bug); | ||
| 3141 | |||
| 3142 | return 0; | ||
| 3143 | } | ||
| 3144 | |||
| 3145 | /* | ||
| 3146 | * Reserve some metadata space for use. We'll calculate the worste case number | ||
| 3147 | * of bytes that would be needed to modify num_items number of items. If we | ||
| 3148 | * have space, fantastic, if not, you get -ENOSPC. Please call | ||
| 3149 | * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of | ||
| 3150 | * items you reserved, since whatever metadata you needed should have already | ||
| 3151 | * been allocated. | ||
| 3152 | * | ||
| 3153 | * This will commit the transaction to make more space if we don't have enough | ||
| 3154 | * metadata space. THe only time we don't do this is if we're reserving space | ||
| 3155 | * inside of a transaction, then we will just return -ENOSPC and it is the | ||
| 3156 | * callers responsibility to handle it properly. | ||
| 3157 | */ | ||
| 3158 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) | ||
| 3159 | { | ||
| 3160 | struct btrfs_fs_info *info = root->fs_info; | ||
| 3161 | struct btrfs_space_info *meta_sinfo; | ||
| 3162 | u64 num_bytes; | ||
| 3163 | u64 used; | ||
| 3164 | u64 alloc_target; | ||
| 3165 | int retries = 0; | ||
| 3166 | |||
| 3167 | /* get the space info for where the metadata will live */ | ||
| 3168 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 3169 | meta_sinfo = __find_space_info(info, alloc_target); | ||
| 3170 | |||
| 3171 | num_bytes = calculate_bytes_needed(root, num_items); | ||
| 3172 | again: | ||
| 3173 | spin_lock(&meta_sinfo->lock); | ||
| 3174 | |||
| 3175 | if (unlikely(!meta_sinfo->bytes_root)) | ||
| 3176 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
| 3177 | |||
| 3178 | if (!retries) | ||
| 3179 | meta_sinfo->bytes_may_use += num_bytes; | ||
| 3180 | |||
| 3181 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
| 3182 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
| 3183 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
| 3184 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
| 3185 | |||
| 3186 | if (used > meta_sinfo->total_bytes) { | ||
| 3187 | retries++; | ||
| 3188 | if (retries == 1) { | ||
| 3189 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
| 3190 | goto again; | ||
| 3191 | retries++; | ||
| 3192 | } else { | ||
| 3193 | spin_unlock(&meta_sinfo->lock); | ||
| 3194 | } | ||
| 3195 | |||
| 3196 | if (retries == 2) { | ||
| 3197 | flush_delalloc(root, meta_sinfo); | ||
| 2745 | goto again; | 3198 | goto again; |
| 2746 | } | 3199 | } |
| 3200 | spin_lock(&meta_sinfo->lock); | ||
| 3201 | meta_sinfo->bytes_may_use -= num_bytes; | ||
| 3202 | spin_unlock(&meta_sinfo->lock); | ||
| 3203 | |||
| 3204 | dump_space_info(meta_sinfo, 0, 0); | ||
| 2747 | return -ENOSPC; | 3205 | return -ENOSPC; |
| 2748 | } | 3206 | } |
| 3207 | |||
| 3208 | check_force_delalloc(meta_sinfo); | ||
| 2749 | spin_unlock(&meta_sinfo->lock); | 3209 | spin_unlock(&meta_sinfo->lock); |
| 2750 | 3210 | ||
| 2751 | return 0; | 3211 | return 0; |
| @@ -2765,13 +3225,16 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | |||
| 2765 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 3225 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
| 2766 | 3226 | ||
| 2767 | data_sinfo = BTRFS_I(inode)->space_info; | 3227 | data_sinfo = BTRFS_I(inode)->space_info; |
| 3228 | if (!data_sinfo) | ||
| 3229 | goto alloc; | ||
| 3230 | |||
| 2768 | again: | 3231 | again: |
| 2769 | /* make sure we have enough space to handle the data first */ | 3232 | /* make sure we have enough space to handle the data first */ |
| 2770 | spin_lock(&data_sinfo->lock); | 3233 | spin_lock(&data_sinfo->lock); |
| 2771 | if (data_sinfo->total_bytes - data_sinfo->bytes_used - | 3234 | if (data_sinfo->total_bytes - data_sinfo->bytes_used - |
| 2772 | data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - | 3235 | data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - |
| 2773 | data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - | 3236 | data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - |
| 2774 | data_sinfo->bytes_may_use < bytes) { | 3237 | data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) { |
| 2775 | struct btrfs_trans_handle *trans; | 3238 | struct btrfs_trans_handle *trans; |
| 2776 | 3239 | ||
| 2777 | /* | 3240 | /* |
| @@ -2783,7 +3246,7 @@ again: | |||
| 2783 | 3246 | ||
| 2784 | data_sinfo->force_alloc = 1; | 3247 | data_sinfo->force_alloc = 1; |
| 2785 | spin_unlock(&data_sinfo->lock); | 3248 | spin_unlock(&data_sinfo->lock); |
| 2786 | 3249 | alloc: | |
| 2787 | alloc_target = btrfs_get_alloc_profile(root, 1); | 3250 | alloc_target = btrfs_get_alloc_profile(root, 1); |
| 2788 | trans = btrfs_start_transaction(root, 1); | 3251 | trans = btrfs_start_transaction(root, 1); |
| 2789 | if (!trans) | 3252 | if (!trans) |
| @@ -2795,12 +3258,17 @@ again: | |||
| 2795 | btrfs_end_transaction(trans, root); | 3258 | btrfs_end_transaction(trans, root); |
| 2796 | if (ret) | 3259 | if (ret) |
| 2797 | return ret; | 3260 | return ret; |
| 3261 | |||
| 3262 | if (!data_sinfo) { | ||
| 3263 | btrfs_set_inode_space_info(root, inode); | ||
| 3264 | data_sinfo = BTRFS_I(inode)->space_info; | ||
| 3265 | } | ||
| 2798 | goto again; | 3266 | goto again; |
| 2799 | } | 3267 | } |
| 2800 | spin_unlock(&data_sinfo->lock); | 3268 | spin_unlock(&data_sinfo->lock); |
| 2801 | 3269 | ||
| 2802 | /* commit the current transaction and try again */ | 3270 | /* commit the current transaction and try again */ |
| 2803 | if (!committed) { | 3271 | if (!committed && !root->fs_info->open_ioctl_trans) { |
| 2804 | committed = 1; | 3272 | committed = 1; |
| 2805 | trans = btrfs_join_transaction(root, 1); | 3273 | trans = btrfs_join_transaction(root, 1); |
| 2806 | if (!trans) | 3274 | if (!trans) |
| @@ -2828,7 +3296,7 @@ again: | |||
| 2828 | BTRFS_I(inode)->reserved_bytes += bytes; | 3296 | BTRFS_I(inode)->reserved_bytes += bytes; |
| 2829 | spin_unlock(&data_sinfo->lock); | 3297 | spin_unlock(&data_sinfo->lock); |
| 2830 | 3298 | ||
| 2831 | return btrfs_check_metadata_free_space(root); | 3299 | return 0; |
| 2832 | } | 3300 | } |
| 2833 | 3301 | ||
| 2834 | /* | 3302 | /* |
| @@ -2927,17 +3395,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 2927 | BUG_ON(!space_info); | 3395 | BUG_ON(!space_info); |
| 2928 | 3396 | ||
| 2929 | spin_lock(&space_info->lock); | 3397 | spin_lock(&space_info->lock); |
| 2930 | if (space_info->force_alloc) { | 3398 | if (space_info->force_alloc) |
| 2931 | force = 1; | 3399 | force = 1; |
| 2932 | space_info->force_alloc = 0; | ||
| 2933 | } | ||
| 2934 | if (space_info->full) { | 3400 | if (space_info->full) { |
| 2935 | spin_unlock(&space_info->lock); | 3401 | spin_unlock(&space_info->lock); |
| 2936 | goto out; | 3402 | goto out; |
| 2937 | } | 3403 | } |
| 2938 | 3404 | ||
| 2939 | thresh = space_info->total_bytes - space_info->bytes_readonly; | 3405 | thresh = space_info->total_bytes - space_info->bytes_readonly; |
| 2940 | thresh = div_factor(thresh, 6); | 3406 | thresh = div_factor(thresh, 8); |
| 2941 | if (!force && | 3407 | if (!force && |
| 2942 | (space_info->bytes_used + space_info->bytes_pinned + | 3408 | (space_info->bytes_used + space_info->bytes_pinned + |
| 2943 | space_info->bytes_reserved + alloc_bytes) < thresh) { | 3409 | space_info->bytes_reserved + alloc_bytes) < thresh) { |
| @@ -2951,7 +3417,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 2951 | * we keep a reasonable number of metadata chunks allocated in the | 3417 | * we keep a reasonable number of metadata chunks allocated in the |
| 2952 | * FS as well. | 3418 | * FS as well. |
| 2953 | */ | 3419 | */ |
| 2954 | if (flags & BTRFS_BLOCK_GROUP_DATA) { | 3420 | if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { |
| 2955 | fs_info->data_chunk_allocations++; | 3421 | fs_info->data_chunk_allocations++; |
| 2956 | if (!(fs_info->data_chunk_allocations % | 3422 | if (!(fs_info->data_chunk_allocations % |
| 2957 | fs_info->metadata_ratio)) | 3423 | fs_info->metadata_ratio)) |
| @@ -2959,8 +3425,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 2959 | } | 3425 | } |
| 2960 | 3426 | ||
| 2961 | ret = btrfs_alloc_chunk(trans, extent_root, flags); | 3427 | ret = btrfs_alloc_chunk(trans, extent_root, flags); |
| 3428 | spin_lock(&space_info->lock); | ||
| 2962 | if (ret) | 3429 | if (ret) |
| 2963 | space_info->full = 1; | 3430 | space_info->full = 1; |
| 3431 | space_info->force_alloc = 0; | ||
| 3432 | spin_unlock(&space_info->lock); | ||
| 2964 | out: | 3433 | out: |
| 2965 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | 3434 | mutex_unlock(&extent_root->fs_info->chunk_mutex); |
| 2966 | return ret; | 3435 | return ret; |
| @@ -3009,10 +3478,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
| 3009 | num_bytes = min(total, cache->key.offset - byte_in_group); | 3478 | num_bytes = min(total, cache->key.offset - byte_in_group); |
| 3010 | if (alloc) { | 3479 | if (alloc) { |
| 3011 | old_val += num_bytes; | 3480 | old_val += num_bytes; |
| 3481 | btrfs_set_block_group_used(&cache->item, old_val); | ||
| 3482 | cache->reserved -= num_bytes; | ||
| 3012 | cache->space_info->bytes_used += num_bytes; | 3483 | cache->space_info->bytes_used += num_bytes; |
| 3484 | cache->space_info->bytes_reserved -= num_bytes; | ||
| 3013 | if (cache->ro) | 3485 | if (cache->ro) |
| 3014 | cache->space_info->bytes_readonly -= num_bytes; | 3486 | cache->space_info->bytes_readonly -= num_bytes; |
| 3015 | btrfs_set_block_group_used(&cache->item, old_val); | ||
| 3016 | spin_unlock(&cache->lock); | 3487 | spin_unlock(&cache->lock); |
| 3017 | spin_unlock(&cache->space_info->lock); | 3488 | spin_unlock(&cache->space_info->lock); |
| 3018 | } else { | 3489 | } else { |
| @@ -3057,127 +3528,136 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
| 3057 | return bytenr; | 3528 | return bytenr; |
| 3058 | } | 3529 | } |
| 3059 | 3530 | ||
| 3060 | int btrfs_update_pinned_extents(struct btrfs_root *root, | 3531 | /* |
| 3061 | u64 bytenr, u64 num, int pin) | 3532 | * this function must be called within transaction |
| 3533 | */ | ||
| 3534 | int btrfs_pin_extent(struct btrfs_root *root, | ||
| 3535 | u64 bytenr, u64 num_bytes, int reserved) | ||
| 3062 | { | 3536 | { |
| 3063 | u64 len; | ||
| 3064 | struct btrfs_block_group_cache *cache; | ||
| 3065 | struct btrfs_fs_info *fs_info = root->fs_info; | 3537 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 3538 | struct btrfs_block_group_cache *cache; | ||
| 3066 | 3539 | ||
| 3067 | if (pin) | 3540 | cache = btrfs_lookup_block_group(fs_info, bytenr); |
| 3068 | set_extent_dirty(&fs_info->pinned_extents, | 3541 | BUG_ON(!cache); |
| 3069 | bytenr, bytenr + num - 1, GFP_NOFS); | ||
| 3070 | |||
| 3071 | while (num > 0) { | ||
| 3072 | cache = btrfs_lookup_block_group(fs_info, bytenr); | ||
| 3073 | BUG_ON(!cache); | ||
| 3074 | len = min(num, cache->key.offset - | ||
| 3075 | (bytenr - cache->key.objectid)); | ||
| 3076 | if (pin) { | ||
| 3077 | spin_lock(&cache->space_info->lock); | ||
| 3078 | spin_lock(&cache->lock); | ||
| 3079 | cache->pinned += len; | ||
| 3080 | cache->space_info->bytes_pinned += len; | ||
| 3081 | spin_unlock(&cache->lock); | ||
| 3082 | spin_unlock(&cache->space_info->lock); | ||
| 3083 | fs_info->total_pinned += len; | ||
| 3084 | } else { | ||
| 3085 | int unpin = 0; | ||
| 3086 | 3542 | ||
| 3087 | /* | 3543 | spin_lock(&cache->space_info->lock); |
| 3088 | * in order to not race with the block group caching, we | 3544 | spin_lock(&cache->lock); |
| 3089 | * only want to unpin the extent if we are cached. If | 3545 | cache->pinned += num_bytes; |
| 3090 | * we aren't cached, we want to start async caching this | 3546 | cache->space_info->bytes_pinned += num_bytes; |
| 3091 | * block group so we can free the extent the next time | 3547 | if (reserved) { |
| 3092 | * around. | 3548 | cache->reserved -= num_bytes; |
| 3093 | */ | 3549 | cache->space_info->bytes_reserved -= num_bytes; |
| 3094 | spin_lock(&cache->space_info->lock); | 3550 | } |
| 3095 | spin_lock(&cache->lock); | 3551 | spin_unlock(&cache->lock); |
| 3096 | unpin = (cache->cached == BTRFS_CACHE_FINISHED); | 3552 | spin_unlock(&cache->space_info->lock); |
| 3097 | if (likely(unpin)) { | ||
| 3098 | cache->pinned -= len; | ||
| 3099 | cache->space_info->bytes_pinned -= len; | ||
| 3100 | fs_info->total_pinned -= len; | ||
| 3101 | } | ||
| 3102 | spin_unlock(&cache->lock); | ||
| 3103 | spin_unlock(&cache->space_info->lock); | ||
| 3104 | 3553 | ||
| 3105 | if (likely(unpin)) | 3554 | btrfs_put_block_group(cache); |
| 3106 | clear_extent_dirty(&fs_info->pinned_extents, | ||
| 3107 | bytenr, bytenr + len -1, | ||
| 3108 | GFP_NOFS); | ||
| 3109 | else | ||
| 3110 | cache_block_group(cache); | ||
| 3111 | 3555 | ||
| 3112 | if (unpin) | 3556 | set_extent_dirty(fs_info->pinned_extents, |
| 3113 | btrfs_add_free_space(cache, bytenr, len); | 3557 | bytenr, bytenr + num_bytes - 1, GFP_NOFS); |
| 3114 | } | 3558 | return 0; |
| 3115 | btrfs_put_block_group(cache); | 3559 | } |
| 3116 | bytenr += len; | 3560 | |
| 3117 | num -= len; | 3561 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, |
| 3562 | u64 num_bytes, int reserve) | ||
| 3563 | { | ||
| 3564 | spin_lock(&cache->space_info->lock); | ||
| 3565 | spin_lock(&cache->lock); | ||
| 3566 | if (reserve) { | ||
| 3567 | cache->reserved += num_bytes; | ||
| 3568 | cache->space_info->bytes_reserved += num_bytes; | ||
| 3569 | } else { | ||
| 3570 | cache->reserved -= num_bytes; | ||
| 3571 | cache->space_info->bytes_reserved -= num_bytes; | ||
| 3118 | } | 3572 | } |
| 3573 | spin_unlock(&cache->lock); | ||
| 3574 | spin_unlock(&cache->space_info->lock); | ||
| 3119 | return 0; | 3575 | return 0; |
| 3120 | } | 3576 | } |
| 3121 | 3577 | ||
| 3122 | static int update_reserved_extents(struct btrfs_root *root, | 3578 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
| 3123 | u64 bytenr, u64 num, int reserve) | 3579 | struct btrfs_root *root) |
| 3124 | { | 3580 | { |
| 3125 | u64 len; | ||
| 3126 | struct btrfs_block_group_cache *cache; | ||
| 3127 | struct btrfs_fs_info *fs_info = root->fs_info; | 3581 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 3582 | struct btrfs_caching_control *next; | ||
| 3583 | struct btrfs_caching_control *caching_ctl; | ||
| 3584 | struct btrfs_block_group_cache *cache; | ||
| 3128 | 3585 | ||
| 3129 | while (num > 0) { | 3586 | down_write(&fs_info->extent_commit_sem); |
| 3130 | cache = btrfs_lookup_block_group(fs_info, bytenr); | ||
| 3131 | BUG_ON(!cache); | ||
| 3132 | len = min(num, cache->key.offset - | ||
| 3133 | (bytenr - cache->key.objectid)); | ||
| 3134 | 3587 | ||
| 3135 | spin_lock(&cache->space_info->lock); | 3588 | list_for_each_entry_safe(caching_ctl, next, |
| 3136 | spin_lock(&cache->lock); | 3589 | &fs_info->caching_block_groups, list) { |
| 3137 | if (reserve) { | 3590 | cache = caching_ctl->block_group; |
| 3138 | cache->reserved += len; | 3591 | if (block_group_cache_done(cache)) { |
| 3139 | cache->space_info->bytes_reserved += len; | 3592 | cache->last_byte_to_unpin = (u64)-1; |
| 3593 | list_del_init(&caching_ctl->list); | ||
| 3594 | put_caching_control(caching_ctl); | ||
| 3140 | } else { | 3595 | } else { |
| 3141 | cache->reserved -= len; | 3596 | cache->last_byte_to_unpin = caching_ctl->progress; |
| 3142 | cache->space_info->bytes_reserved -= len; | ||
| 3143 | } | 3597 | } |
| 3144 | spin_unlock(&cache->lock); | ||
| 3145 | spin_unlock(&cache->space_info->lock); | ||
| 3146 | btrfs_put_block_group(cache); | ||
| 3147 | bytenr += len; | ||
| 3148 | num -= len; | ||
| 3149 | } | 3598 | } |
| 3599 | |||
| 3600 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | ||
| 3601 | fs_info->pinned_extents = &fs_info->freed_extents[1]; | ||
| 3602 | else | ||
| 3603 | fs_info->pinned_extents = &fs_info->freed_extents[0]; | ||
| 3604 | |||
| 3605 | up_write(&fs_info->extent_commit_sem); | ||
| 3150 | return 0; | 3606 | return 0; |
| 3151 | } | 3607 | } |
| 3152 | 3608 | ||
| 3153 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) | 3609 | static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) |
| 3154 | { | 3610 | { |
| 3155 | u64 last = 0; | 3611 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 3156 | u64 start; | 3612 | struct btrfs_block_group_cache *cache = NULL; |
| 3157 | u64 end; | 3613 | u64 len; |
| 3158 | struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; | ||
| 3159 | int ret; | ||
| 3160 | 3614 | ||
| 3161 | while (1) { | 3615 | while (start <= end) { |
| 3162 | ret = find_first_extent_bit(pinned_extents, last, | 3616 | if (!cache || |
| 3163 | &start, &end, EXTENT_DIRTY); | 3617 | start >= cache->key.objectid + cache->key.offset) { |
| 3164 | if (ret) | 3618 | if (cache) |
| 3165 | break; | 3619 | btrfs_put_block_group(cache); |
| 3620 | cache = btrfs_lookup_block_group(fs_info, start); | ||
| 3621 | BUG_ON(!cache); | ||
| 3622 | } | ||
| 3623 | |||
| 3624 | len = cache->key.objectid + cache->key.offset - start; | ||
| 3625 | len = min(len, end + 1 - start); | ||
| 3626 | |||
| 3627 | if (start < cache->last_byte_to_unpin) { | ||
| 3628 | len = min(len, cache->last_byte_to_unpin - start); | ||
| 3629 | btrfs_add_free_space(cache, start, len); | ||
| 3630 | } | ||
| 3166 | 3631 | ||
| 3167 | set_extent_dirty(copy, start, end, GFP_NOFS); | 3632 | spin_lock(&cache->space_info->lock); |
| 3168 | last = end + 1; | 3633 | spin_lock(&cache->lock); |
| 3634 | cache->pinned -= len; | ||
| 3635 | cache->space_info->bytes_pinned -= len; | ||
| 3636 | spin_unlock(&cache->lock); | ||
| 3637 | spin_unlock(&cache->space_info->lock); | ||
| 3638 | |||
| 3639 | start += len; | ||
| 3169 | } | 3640 | } |
| 3641 | |||
| 3642 | if (cache) | ||
| 3643 | btrfs_put_block_group(cache); | ||
| 3170 | return 0; | 3644 | return 0; |
| 3171 | } | 3645 | } |
| 3172 | 3646 | ||
| 3173 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | 3647 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, |
| 3174 | struct btrfs_root *root, | 3648 | struct btrfs_root *root) |
| 3175 | struct extent_io_tree *unpin) | ||
| 3176 | { | 3649 | { |
| 3650 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 3651 | struct extent_io_tree *unpin; | ||
| 3177 | u64 start; | 3652 | u64 start; |
| 3178 | u64 end; | 3653 | u64 end; |
| 3179 | int ret; | 3654 | int ret; |
| 3180 | 3655 | ||
| 3656 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | ||
| 3657 | unpin = &fs_info->freed_extents[1]; | ||
| 3658 | else | ||
| 3659 | unpin = &fs_info->freed_extents[0]; | ||
| 3660 | |||
| 3181 | while (1) { | 3661 | while (1) { |
| 3182 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 3662 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
| 3183 | EXTENT_DIRTY); | 3663 | EXTENT_DIRTY); |
| @@ -3186,10 +3666,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
| 3186 | 3666 | ||
| 3187 | ret = btrfs_discard_extent(root, start, end + 1 - start); | 3667 | ret = btrfs_discard_extent(root, start, end + 1 - start); |
| 3188 | 3668 | ||
| 3189 | /* unlocks the pinned mutex */ | ||
| 3190 | btrfs_update_pinned_extents(root, start, end + 1 - start, 0); | ||
| 3191 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | 3669 | clear_extent_dirty(unpin, start, end, GFP_NOFS); |
| 3192 | 3670 | unpin_extent_range(root, start, end); | |
| 3193 | cond_resched(); | 3671 | cond_resched(); |
| 3194 | } | 3672 | } |
| 3195 | 3673 | ||
| @@ -3199,7 +3677,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
| 3199 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | 3677 | static int pin_down_bytes(struct btrfs_trans_handle *trans, |
| 3200 | struct btrfs_root *root, | 3678 | struct btrfs_root *root, |
| 3201 | struct btrfs_path *path, | 3679 | struct btrfs_path *path, |
| 3202 | u64 bytenr, u64 num_bytes, int is_data, | 3680 | u64 bytenr, u64 num_bytes, |
| 3681 | int is_data, int reserved, | ||
| 3203 | struct extent_buffer **must_clean) | 3682 | struct extent_buffer **must_clean) |
| 3204 | { | 3683 | { |
| 3205 | int err = 0; | 3684 | int err = 0; |
| @@ -3208,6 +3687,14 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
| 3208 | if (is_data) | 3687 | if (is_data) |
| 3209 | goto pinit; | 3688 | goto pinit; |
| 3210 | 3689 | ||
| 3690 | /* | ||
| 3691 | * discard is sloooow, and so triggering discards on | ||
| 3692 | * individual btree blocks isn't a good plan. Just | ||
| 3693 | * pin everything in discard mode. | ||
| 3694 | */ | ||
| 3695 | if (btrfs_test_opt(root, DISCARD)) | ||
| 3696 | goto pinit; | ||
| 3697 | |||
| 3211 | buf = btrfs_find_tree_block(root, bytenr, num_bytes); | 3698 | buf = btrfs_find_tree_block(root, bytenr, num_bytes); |
| 3212 | if (!buf) | 3699 | if (!buf) |
| 3213 | goto pinit; | 3700 | goto pinit; |
| @@ -3231,15 +3718,15 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
| 3231 | } | 3718 | } |
| 3232 | free_extent_buffer(buf); | 3719 | free_extent_buffer(buf); |
| 3233 | pinit: | 3720 | pinit: |
| 3234 | btrfs_set_path_blocking(path); | 3721 | if (path) |
| 3722 | btrfs_set_path_blocking(path); | ||
| 3235 | /* unlocks the pinned mutex */ | 3723 | /* unlocks the pinned mutex */ |
| 3236 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | 3724 | btrfs_pin_extent(root, bytenr, num_bytes, reserved); |
| 3237 | 3725 | ||
| 3238 | BUG_ON(err < 0); | 3726 | BUG_ON(err < 0); |
| 3239 | return 0; | 3727 | return 0; |
| 3240 | } | 3728 | } |
| 3241 | 3729 | ||
| 3242 | |||
| 3243 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 3730 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
| 3244 | struct btrfs_root *root, | 3731 | struct btrfs_root *root, |
| 3245 | u64 bytenr, u64 num_bytes, u64 parent, | 3732 | u64 bytenr, u64 num_bytes, u64 parent, |
| @@ -3413,7 +3900,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 3413 | } | 3900 | } |
| 3414 | 3901 | ||
| 3415 | ret = pin_down_bytes(trans, root, path, bytenr, | 3902 | ret = pin_down_bytes(trans, root, path, bytenr, |
| 3416 | num_bytes, is_data, &must_clean); | 3903 | num_bytes, is_data, 0, &must_clean); |
| 3417 | if (ret > 0) | 3904 | if (ret > 0) |
| 3418 | mark_free = 1; | 3905 | mark_free = 1; |
| 3419 | BUG_ON(ret < 0); | 3906 | BUG_ON(ret < 0); |
| @@ -3544,8 +4031,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 3544 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { | 4031 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { |
| 3545 | WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); | 4032 | WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); |
| 3546 | /* unlocks the pinned mutex */ | 4033 | /* unlocks the pinned mutex */ |
| 3547 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | 4034 | btrfs_pin_extent(root, bytenr, num_bytes, 1); |
| 3548 | update_reserved_extents(root, bytenr, num_bytes, 0); | ||
| 3549 | ret = 0; | 4035 | ret = 0; |
| 3550 | } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { | 4036 | } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
| 3551 | ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, | 4037 | ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, |
| @@ -3585,24 +4071,38 @@ static noinline int | |||
| 3585 | wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, | 4071 | wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, |
| 3586 | u64 num_bytes) | 4072 | u64 num_bytes) |
| 3587 | { | 4073 | { |
| 4074 | struct btrfs_caching_control *caching_ctl; | ||
| 3588 | DEFINE_WAIT(wait); | 4075 | DEFINE_WAIT(wait); |
| 3589 | 4076 | ||
| 3590 | prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE); | 4077 | caching_ctl = get_caching_control(cache); |
| 3591 | 4078 | if (!caching_ctl) | |
| 3592 | if (block_group_cache_done(cache)) { | ||
| 3593 | finish_wait(&cache->caching_q, &wait); | ||
| 3594 | return 0; | 4079 | return 0; |
| 3595 | } | ||
| 3596 | schedule(); | ||
| 3597 | finish_wait(&cache->caching_q, &wait); | ||
| 3598 | 4080 | ||
| 3599 | wait_event(cache->caching_q, block_group_cache_done(cache) || | 4081 | wait_event(caching_ctl->wait, block_group_cache_done(cache) || |
| 3600 | (cache->free_space >= num_bytes)); | 4082 | (cache->free_space >= num_bytes)); |
| 4083 | |||
| 4084 | put_caching_control(caching_ctl); | ||
| 4085 | return 0; | ||
| 4086 | } | ||
| 4087 | |||
| 4088 | static noinline int | ||
| 4089 | wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | ||
| 4090 | { | ||
| 4091 | struct btrfs_caching_control *caching_ctl; | ||
| 4092 | DEFINE_WAIT(wait); | ||
| 4093 | |||
| 4094 | caching_ctl = get_caching_control(cache); | ||
| 4095 | if (!caching_ctl) | ||
| 4096 | return 0; | ||
| 4097 | |||
| 4098 | wait_event(caching_ctl->wait, block_group_cache_done(cache)); | ||
| 4099 | |||
| 4100 | put_caching_control(caching_ctl); | ||
| 3601 | return 0; | 4101 | return 0; |
| 3602 | } | 4102 | } |
| 3603 | 4103 | ||
| 3604 | enum btrfs_loop_type { | 4104 | enum btrfs_loop_type { |
| 3605 | LOOP_CACHED_ONLY = 0, | 4105 | LOOP_FIND_IDEAL = 0, |
| 3606 | LOOP_CACHING_NOWAIT = 1, | 4106 | LOOP_CACHING_NOWAIT = 1, |
| 3607 | LOOP_CACHING_WAIT = 2, | 4107 | LOOP_CACHING_WAIT = 2, |
| 3608 | LOOP_ALLOC_CHUNK = 3, | 4108 | LOOP_ALLOC_CHUNK = 3, |
| @@ -3631,10 +4131,15 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
| 3631 | struct btrfs_block_group_cache *block_group = NULL; | 4131 | struct btrfs_block_group_cache *block_group = NULL; |
| 3632 | int empty_cluster = 2 * 1024 * 1024; | 4132 | int empty_cluster = 2 * 1024 * 1024; |
| 3633 | int allowed_chunk_alloc = 0; | 4133 | int allowed_chunk_alloc = 0; |
| 4134 | int done_chunk_alloc = 0; | ||
| 3634 | struct btrfs_space_info *space_info; | 4135 | struct btrfs_space_info *space_info; |
| 3635 | int last_ptr_loop = 0; | 4136 | int last_ptr_loop = 0; |
| 3636 | int loop = 0; | 4137 | int loop = 0; |
| 3637 | bool found_uncached_bg = false; | 4138 | bool found_uncached_bg = false; |
| 4139 | bool failed_cluster_refill = false; | ||
| 4140 | bool failed_alloc = false; | ||
| 4141 | u64 ideal_cache_percent = 0; | ||
| 4142 | u64 ideal_cache_offset = 0; | ||
| 3638 | 4143 | ||
| 3639 | WARN_ON(num_bytes < root->sectorsize); | 4144 | WARN_ON(num_bytes < root->sectorsize); |
| 3640 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | 4145 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); |
| @@ -3670,14 +4175,19 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
| 3670 | empty_cluster = 0; | 4175 | empty_cluster = 0; |
| 3671 | 4176 | ||
| 3672 | if (search_start == hint_byte) { | 4177 | if (search_start == hint_byte) { |
| 4178 | ideal_cache: | ||
| 3673 | block_group = btrfs_lookup_block_group(root->fs_info, | 4179 | block_group = btrfs_lookup_block_group(root->fs_info, |
| 3674 | search_start); | 4180 | search_start); |
| 3675 | /* | 4181 | /* |
| 3676 | * we don't want to use the block group if it doesn't match our | 4182 | * we don't want to use the block group if it doesn't match our |
| 3677 | * allocation bits, or if its not cached. | 4183 | * allocation bits, or if its not cached. |
| 4184 | * | ||
| 4185 | * However if we are re-searching with an ideal block group | ||
| 4186 | * picked out then we don't care that the block group is cached. | ||
| 3678 | */ | 4187 | */ |
| 3679 | if (block_group && block_group_bits(block_group, data) && | 4188 | if (block_group && block_group_bits(block_group, data) && |
| 3680 | block_group_cache_done(block_group)) { | 4189 | (block_group->cached != BTRFS_CACHE_NO || |
| 4190 | search_start == ideal_cache_offset)) { | ||
| 3681 | down_read(&space_info->groups_sem); | 4191 | down_read(&space_info->groups_sem); |
| 3682 | if (list_empty(&block_group->list) || | 4192 | if (list_empty(&block_group->list) || |
| 3683 | block_group->ro) { | 4193 | block_group->ro) { |
| @@ -3689,13 +4199,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
| 3689 | */ | 4199 | */ |
| 3690 | btrfs_put_block_group(block_group); | 4200 | btrfs_put_block_group(block_group); |
| 3691 | up_read(&space_info->groups_sem); | 4201 | up_read(&space_info->groups_sem); |
| 3692 | } else | 4202 | } else { |
| 3693 | goto have_block_group; | 4203 | goto have_block_group; |
| 4204 | } | ||
| 3694 | } else if (block_group) { | 4205 | } else if (block_group) { |
| 3695 | btrfs_put_block_group(block_group); | 4206 | btrfs_put_block_group(block_group); |
| 3696 | } | 4207 | } |
| 3697 | } | 4208 | } |
| 3698 | |||
| 3699 | search: | 4209 | search: |
| 3700 | down_read(&space_info->groups_sem); | 4210 | down_read(&space_info->groups_sem); |
| 3701 | list_for_each_entry(block_group, &space_info->block_groups, list) { | 4211 | list_for_each_entry(block_group, &space_info->block_groups, list) { |
| @@ -3707,32 +4217,58 @@ search: | |||
| 3707 | 4217 | ||
| 3708 | have_block_group: | 4218 | have_block_group: |
| 3709 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { | 4219 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { |
| 4220 | u64 free_percent; | ||
| 4221 | |||
| 4222 | free_percent = btrfs_block_group_used(&block_group->item); | ||
| 4223 | free_percent *= 100; | ||
| 4224 | free_percent = div64_u64(free_percent, | ||
| 4225 | block_group->key.offset); | ||
| 4226 | free_percent = 100 - free_percent; | ||
| 4227 | if (free_percent > ideal_cache_percent && | ||
| 4228 | likely(!block_group->ro)) { | ||
| 4229 | ideal_cache_offset = block_group->key.objectid; | ||
| 4230 | ideal_cache_percent = free_percent; | ||
| 4231 | } | ||
| 4232 | |||
| 3710 | /* | 4233 | /* |
| 3711 | * we want to start caching kthreads, but not too many | 4234 | * We only want to start kthread caching if we are at |
| 3712 | * right off the bat so we don't overwhelm the system, | 4235 | * the point where we will wait for caching to make |
| 3713 | * so only start them if there are less than 2 and we're | 4236 | * progress, or if our ideal search is over and we've |
| 3714 | * in the initial allocation phase. | 4237 | * found somebody to start caching. |
| 3715 | */ | 4238 | */ |
| 3716 | if (loop > LOOP_CACHING_NOWAIT || | 4239 | if (loop > LOOP_CACHING_NOWAIT || |
| 3717 | atomic_read(&space_info->caching_threads) < 2) { | 4240 | (loop > LOOP_FIND_IDEAL && |
| 4241 | atomic_read(&space_info->caching_threads) < 2)) { | ||
| 3718 | ret = cache_block_group(block_group); | 4242 | ret = cache_block_group(block_group); |
| 3719 | BUG_ON(ret); | 4243 | BUG_ON(ret); |
| 3720 | } | 4244 | } |
| 3721 | } | ||
| 3722 | |||
| 3723 | cached = block_group_cache_done(block_group); | ||
| 3724 | if (unlikely(!cached)) { | ||
| 3725 | found_uncached_bg = true; | 4245 | found_uncached_bg = true; |
| 3726 | 4246 | ||
| 3727 | /* if we only want cached bgs, loop */ | 4247 | /* |
| 3728 | if (loop == LOOP_CACHED_ONLY) | 4248 | * If loop is set for cached only, try the next block |
| 4249 | * group. | ||
| 4250 | */ | ||
| 4251 | if (loop == LOOP_FIND_IDEAL) | ||
| 3729 | goto loop; | 4252 | goto loop; |
| 3730 | } | 4253 | } |
| 3731 | 4254 | ||
| 4255 | cached = block_group_cache_done(block_group); | ||
| 4256 | if (unlikely(!cached)) | ||
| 4257 | found_uncached_bg = true; | ||
| 4258 | |||
| 3732 | if (unlikely(block_group->ro)) | 4259 | if (unlikely(block_group->ro)) |
| 3733 | goto loop; | 4260 | goto loop; |
| 3734 | 4261 | ||
| 3735 | if (last_ptr) { | 4262 | /* |
| 4263 | * Ok we want to try and use the cluster allocator, so lets look | ||
| 4264 | * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will | ||
| 4265 | * have tried the cluster allocator plenty of times at this | ||
| 4266 | * point and not have found anything, so we are likely way too | ||
| 4267 | * fragmented for the clustering stuff to find anything, so lets | ||
| 4268 | * just skip it and let the allocator find whatever block it can | ||
| 4269 | * find | ||
| 4270 | */ | ||
| 4271 | if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { | ||
| 3736 | /* | 4272 | /* |
| 3737 | * the refill lock keeps out other | 4273 | * the refill lock keeps out other |
| 3738 | * people trying to start a new cluster | 4274 | * people trying to start a new cluster |
| @@ -3807,9 +4343,11 @@ refill_cluster: | |||
| 3807 | spin_unlock(&last_ptr->refill_lock); | 4343 | spin_unlock(&last_ptr->refill_lock); |
| 3808 | goto checks; | 4344 | goto checks; |
| 3809 | } | 4345 | } |
| 3810 | } else if (!cached && loop > LOOP_CACHING_NOWAIT) { | 4346 | } else if (!cached && loop > LOOP_CACHING_NOWAIT |
| 4347 | && !failed_cluster_refill) { | ||
| 3811 | spin_unlock(&last_ptr->refill_lock); | 4348 | spin_unlock(&last_ptr->refill_lock); |
| 3812 | 4349 | ||
| 4350 | failed_cluster_refill = true; | ||
| 3813 | wait_block_group_cache_progress(block_group, | 4351 | wait_block_group_cache_progress(block_group, |
| 3814 | num_bytes + empty_cluster + empty_size); | 4352 | num_bytes + empty_cluster + empty_size); |
| 3815 | goto have_block_group; | 4353 | goto have_block_group; |
| @@ -3821,25 +4359,30 @@ refill_cluster: | |||
| 3821 | * cluster. Free the cluster we've been trying | 4359 | * cluster. Free the cluster we've been trying |
| 3822 | * to use, and go to the next block group | 4360 | * to use, and go to the next block group |
| 3823 | */ | 4361 | */ |
| 3824 | if (loop < LOOP_NO_EMPTY_SIZE) { | 4362 | btrfs_return_cluster_to_free_space(NULL, last_ptr); |
| 3825 | btrfs_return_cluster_to_free_space(NULL, | ||
| 3826 | last_ptr); | ||
| 3827 | spin_unlock(&last_ptr->refill_lock); | ||
| 3828 | goto loop; | ||
| 3829 | } | ||
| 3830 | spin_unlock(&last_ptr->refill_lock); | 4363 | spin_unlock(&last_ptr->refill_lock); |
| 4364 | goto loop; | ||
| 3831 | } | 4365 | } |
| 3832 | 4366 | ||
| 3833 | offset = btrfs_find_space_for_alloc(block_group, search_start, | 4367 | offset = btrfs_find_space_for_alloc(block_group, search_start, |
| 3834 | num_bytes, empty_size); | 4368 | num_bytes, empty_size); |
| 3835 | if (!offset && (cached || (!cached && | 4369 | /* |
| 3836 | loop == LOOP_CACHING_NOWAIT))) { | 4370 | * If we didn't find a chunk, and we haven't failed on this |
| 3837 | goto loop; | 4371 | * block group before, and this block group is in the middle of |
| 3838 | } else if (!offset && (!cached && | 4372 | * caching and we are ok with waiting, then go ahead and wait |
| 3839 | loop > LOOP_CACHING_NOWAIT)) { | 4373 | * for progress to be made, and set failed_alloc to true. |
| 4374 | * | ||
| 4375 | * If failed_alloc is true then we've already waited on this | ||
| 4376 | * block group once and should move on to the next block group. | ||
| 4377 | */ | ||
| 4378 | if (!offset && !failed_alloc && !cached && | ||
| 4379 | loop > LOOP_CACHING_NOWAIT) { | ||
| 3840 | wait_block_group_cache_progress(block_group, | 4380 | wait_block_group_cache_progress(block_group, |
| 3841 | num_bytes + empty_size); | 4381 | num_bytes + empty_size); |
| 4382 | failed_alloc = true; | ||
| 3842 | goto have_block_group; | 4383 | goto have_block_group; |
| 4384 | } else if (!offset) { | ||
| 4385 | goto loop; | ||
| 3843 | } | 4386 | } |
| 3844 | checks: | 4387 | checks: |
| 3845 | search_start = stripe_align(root, offset); | 4388 | search_start = stripe_align(root, offset); |
| @@ -3881,16 +4424,22 @@ checks: | |||
| 3881 | search_start - offset); | 4424 | search_start - offset); |
| 3882 | BUG_ON(offset > search_start); | 4425 | BUG_ON(offset > search_start); |
| 3883 | 4426 | ||
| 4427 | update_reserved_extents(block_group, num_bytes, 1); | ||
| 4428 | |||
| 3884 | /* we are all good, lets return */ | 4429 | /* we are all good, lets return */ |
| 3885 | break; | 4430 | break; |
| 3886 | loop: | 4431 | loop: |
| 4432 | failed_cluster_refill = false; | ||
| 4433 | failed_alloc = false; | ||
| 3887 | btrfs_put_block_group(block_group); | 4434 | btrfs_put_block_group(block_group); |
| 3888 | } | 4435 | } |
| 3889 | up_read(&space_info->groups_sem); | 4436 | up_read(&space_info->groups_sem); |
| 3890 | 4437 | ||
| 3891 | /* LOOP_CACHED_ONLY, only search fully cached block groups | 4438 | /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for |
| 3892 | * LOOP_CACHING_NOWAIT, search partially cached block groups, but | 4439 | * for them to make caching progress. Also |
| 3893 | * dont wait foR them to finish caching | 4440 | * determine the best possible bg to cache |
| 4441 | * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking | ||
| 4442 | * caching kthreads as we move along | ||
| 3894 | * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching | 4443 | * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching |
| 3895 | * LOOP_ALLOC_CHUNK, force a chunk allocation and try again | 4444 | * LOOP_ALLOC_CHUNK, force a chunk allocation and try again |
| 3896 | * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try | 4445 | * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try |
| @@ -3899,12 +4448,47 @@ loop: | |||
| 3899 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && | 4448 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && |
| 3900 | (found_uncached_bg || empty_size || empty_cluster || | 4449 | (found_uncached_bg || empty_size || empty_cluster || |
| 3901 | allowed_chunk_alloc)) { | 4450 | allowed_chunk_alloc)) { |
| 3902 | if (found_uncached_bg) { | 4451 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { |
| 3903 | found_uncached_bg = false; | 4452 | found_uncached_bg = false; |
| 3904 | if (loop < LOOP_CACHING_WAIT) { | 4453 | loop++; |
| 3905 | loop++; | 4454 | if (!ideal_cache_percent && |
| 4455 | atomic_read(&space_info->caching_threads)) | ||
| 3906 | goto search; | 4456 | goto search; |
| 3907 | } | 4457 | |
| 4458 | /* | ||
| 4459 | * 1 of the following 2 things have happened so far | ||
| 4460 | * | ||
| 4461 | * 1) We found an ideal block group for caching that | ||
| 4462 | * is mostly full and will cache quickly, so we might | ||
| 4463 | * as well wait for it. | ||
| 4464 | * | ||
| 4465 | * 2) We searched for cached only and we didn't find | ||
| 4466 | * anything, and we didn't start any caching kthreads | ||
| 4467 | * either, so chances are we will loop through and | ||
| 4468 | * start a couple caching kthreads, and then come back | ||
| 4469 | * around and just wait for them. This will be slower | ||
| 4470 | * because we will have 2 caching kthreads reading at | ||
| 4471 | * the same time when we could have just started one | ||
| 4472 | * and waited for it to get far enough to give us an | ||
| 4473 | * allocation, so go ahead and go to the wait caching | ||
| 4474 | * loop. | ||
| 4475 | */ | ||
| 4476 | loop = LOOP_CACHING_WAIT; | ||
| 4477 | search_start = ideal_cache_offset; | ||
| 4478 | ideal_cache_percent = 0; | ||
| 4479 | goto ideal_cache; | ||
| 4480 | } else if (loop == LOOP_FIND_IDEAL) { | ||
| 4481 | /* | ||
| 4482 | * Didn't find a uncached bg, wait on anything we find | ||
| 4483 | * next. | ||
| 4484 | */ | ||
| 4485 | loop = LOOP_CACHING_WAIT; | ||
| 4486 | goto search; | ||
| 4487 | } | ||
| 4488 | |||
| 4489 | if (loop < LOOP_CACHING_WAIT) { | ||
| 4490 | loop++; | ||
| 4491 | goto search; | ||
| 3908 | } | 4492 | } |
| 3909 | 4493 | ||
| 3910 | if (loop == LOOP_ALLOC_CHUNK) { | 4494 | if (loop == LOOP_ALLOC_CHUNK) { |
| @@ -3916,7 +4500,8 @@ loop: | |||
| 3916 | ret = do_chunk_alloc(trans, root, num_bytes + | 4500 | ret = do_chunk_alloc(trans, root, num_bytes + |
| 3917 | 2 * 1024 * 1024, data, 1); | 4501 | 2 * 1024 * 1024, data, 1); |
| 3918 | allowed_chunk_alloc = 0; | 4502 | allowed_chunk_alloc = 0; |
| 3919 | } else { | 4503 | done_chunk_alloc = 1; |
| 4504 | } else if (!done_chunk_alloc) { | ||
| 3920 | space_info->force_alloc = 1; | 4505 | space_info->force_alloc = 1; |
| 3921 | } | 4506 | } |
| 3922 | 4507 | ||
| @@ -3941,21 +4526,32 @@ loop: | |||
| 3941 | return ret; | 4526 | return ret; |
| 3942 | } | 4527 | } |
| 3943 | 4528 | ||
| 3944 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | 4529 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
| 4530 | int dump_block_groups) | ||
| 3945 | { | 4531 | { |
| 3946 | struct btrfs_block_group_cache *cache; | 4532 | struct btrfs_block_group_cache *cache; |
| 3947 | 4533 | ||
| 4534 | spin_lock(&info->lock); | ||
| 3948 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", | 4535 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", |
| 3949 | (unsigned long long)(info->total_bytes - info->bytes_used - | 4536 | (unsigned long long)(info->total_bytes - info->bytes_used - |
| 3950 | info->bytes_pinned - info->bytes_reserved), | 4537 | info->bytes_pinned - info->bytes_reserved - |
| 4538 | info->bytes_super), | ||
| 3951 | (info->full) ? "" : "not "); | 4539 | (info->full) ? "" : "not "); |
| 3952 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | 4540 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," |
| 3953 | " may_use=%llu, used=%llu\n", | 4541 | " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu" |
| 4542 | "\n", | ||
| 3954 | (unsigned long long)info->total_bytes, | 4543 | (unsigned long long)info->total_bytes, |
| 3955 | (unsigned long long)info->bytes_pinned, | 4544 | (unsigned long long)info->bytes_pinned, |
| 3956 | (unsigned long long)info->bytes_delalloc, | 4545 | (unsigned long long)info->bytes_delalloc, |
| 3957 | (unsigned long long)info->bytes_may_use, | 4546 | (unsigned long long)info->bytes_may_use, |
| 3958 | (unsigned long long)info->bytes_used); | 4547 | (unsigned long long)info->bytes_used, |
| 4548 | (unsigned long long)info->bytes_root, | ||
| 4549 | (unsigned long long)info->bytes_super, | ||
| 4550 | (unsigned long long)info->bytes_reserved); | ||
| 4551 | spin_unlock(&info->lock); | ||
| 4552 | |||
| 4553 | if (!dump_block_groups) | ||
| 4554 | return; | ||
| 3959 | 4555 | ||
| 3960 | down_read(&info->groups_sem); | 4556 | down_read(&info->groups_sem); |
| 3961 | list_for_each_entry(cache, &info->block_groups, list) { | 4557 | list_for_each_entry(cache, &info->block_groups, list) { |
| @@ -3973,12 +4569,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | |||
| 3973 | up_read(&info->groups_sem); | 4569 | up_read(&info->groups_sem); |
| 3974 | } | 4570 | } |
| 3975 | 4571 | ||
| 3976 | static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, | 4572 | int btrfs_reserve_extent(struct btrfs_trans_handle *trans, |
| 3977 | struct btrfs_root *root, | 4573 | struct btrfs_root *root, |
| 3978 | u64 num_bytes, u64 min_alloc_size, | 4574 | u64 num_bytes, u64 min_alloc_size, |
| 3979 | u64 empty_size, u64 hint_byte, | 4575 | u64 empty_size, u64 hint_byte, |
| 3980 | u64 search_end, struct btrfs_key *ins, | 4576 | u64 search_end, struct btrfs_key *ins, |
| 3981 | u64 data) | 4577 | u64 data) |
| 3982 | { | 4578 | { |
| 3983 | int ret; | 4579 | int ret; |
| 3984 | u64 search_start = 0; | 4580 | u64 search_start = 0; |
| @@ -4023,7 +4619,7 @@ again: | |||
| 4023 | printk(KERN_ERR "btrfs allocation failed flags %llu, " | 4619 | printk(KERN_ERR "btrfs allocation failed flags %llu, " |
| 4024 | "wanted %llu\n", (unsigned long long)data, | 4620 | "wanted %llu\n", (unsigned long long)data, |
| 4025 | (unsigned long long)num_bytes); | 4621 | (unsigned long long)num_bytes); |
| 4026 | dump_space_info(sinfo, num_bytes); | 4622 | dump_space_info(sinfo, num_bytes, 1); |
| 4027 | } | 4623 | } |
| 4028 | 4624 | ||
| 4029 | return ret; | 4625 | return ret; |
| @@ -4044,25 +4640,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
| 4044 | ret = btrfs_discard_extent(root, start, len); | 4640 | ret = btrfs_discard_extent(root, start, len); |
| 4045 | 4641 | ||
| 4046 | btrfs_add_free_space(cache, start, len); | 4642 | btrfs_add_free_space(cache, start, len); |
| 4643 | update_reserved_extents(cache, len, 0); | ||
| 4047 | btrfs_put_block_group(cache); | 4644 | btrfs_put_block_group(cache); |
| 4048 | update_reserved_extents(root, start, len, 0); | ||
| 4049 | |||
| 4050 | return ret; | ||
| 4051 | } | ||
| 4052 | |||
| 4053 | int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | ||
| 4054 | struct btrfs_root *root, | ||
| 4055 | u64 num_bytes, u64 min_alloc_size, | ||
| 4056 | u64 empty_size, u64 hint_byte, | ||
| 4057 | u64 search_end, struct btrfs_key *ins, | ||
| 4058 | u64 data) | ||
| 4059 | { | ||
| 4060 | int ret; | ||
| 4061 | ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, | ||
| 4062 | empty_size, hint_byte, search_end, ins, | ||
| 4063 | data); | ||
| 4064 | if (!ret) | ||
| 4065 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
| 4066 | 4645 | ||
| 4067 | return ret; | 4646 | return ret; |
| 4068 | } | 4647 | } |
| @@ -4223,15 +4802,46 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
| 4223 | { | 4802 | { |
| 4224 | int ret; | 4803 | int ret; |
| 4225 | struct btrfs_block_group_cache *block_group; | 4804 | struct btrfs_block_group_cache *block_group; |
| 4805 | struct btrfs_caching_control *caching_ctl; | ||
| 4806 | u64 start = ins->objectid; | ||
| 4807 | u64 num_bytes = ins->offset; | ||
| 4226 | 4808 | ||
| 4227 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | 4809 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); |
| 4228 | cache_block_group(block_group); | 4810 | cache_block_group(block_group); |
| 4229 | wait_event(block_group->caching_q, | 4811 | caching_ctl = get_caching_control(block_group); |
| 4230 | block_group_cache_done(block_group)); | ||
| 4231 | 4812 | ||
| 4232 | ret = btrfs_remove_free_space(block_group, ins->objectid, | 4813 | if (!caching_ctl) { |
| 4233 | ins->offset); | 4814 | BUG_ON(!block_group_cache_done(block_group)); |
| 4234 | BUG_ON(ret); | 4815 | ret = btrfs_remove_free_space(block_group, start, num_bytes); |
| 4816 | BUG_ON(ret); | ||
| 4817 | } else { | ||
| 4818 | mutex_lock(&caching_ctl->mutex); | ||
| 4819 | |||
| 4820 | if (start >= caching_ctl->progress) { | ||
| 4821 | ret = add_excluded_extent(root, start, num_bytes); | ||
| 4822 | BUG_ON(ret); | ||
| 4823 | } else if (start + num_bytes <= caching_ctl->progress) { | ||
| 4824 | ret = btrfs_remove_free_space(block_group, | ||
| 4825 | start, num_bytes); | ||
| 4826 | BUG_ON(ret); | ||
| 4827 | } else { | ||
| 4828 | num_bytes = caching_ctl->progress - start; | ||
| 4829 | ret = btrfs_remove_free_space(block_group, | ||
| 4830 | start, num_bytes); | ||
| 4831 | BUG_ON(ret); | ||
| 4832 | |||
| 4833 | start = caching_ctl->progress; | ||
| 4834 | num_bytes = ins->objectid + ins->offset - | ||
| 4835 | caching_ctl->progress; | ||
| 4836 | ret = add_excluded_extent(root, start, num_bytes); | ||
| 4837 | BUG_ON(ret); | ||
| 4838 | } | ||
| 4839 | |||
| 4840 | mutex_unlock(&caching_ctl->mutex); | ||
| 4841 | put_caching_control(caching_ctl); | ||
| 4842 | } | ||
| 4843 | |||
| 4844 | update_reserved_extents(block_group, ins->offset, 1); | ||
| 4235 | btrfs_put_block_group(block_group); | 4845 | btrfs_put_block_group(block_group); |
| 4236 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 4846 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
| 4237 | 0, owner, offset, ins, 1); | 4847 | 0, owner, offset, ins, 1); |
| @@ -4255,9 +4865,9 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, | |||
| 4255 | int ret; | 4865 | int ret; |
| 4256 | u64 flags = 0; | 4866 | u64 flags = 0; |
| 4257 | 4867 | ||
| 4258 | ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, | 4868 | ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes, |
| 4259 | empty_size, hint_byte, search_end, | 4869 | empty_size, hint_byte, search_end, |
| 4260 | ins, 0); | 4870 | ins, 0); |
| 4261 | if (ret) | 4871 | if (ret) |
| 4262 | return ret; | 4872 | return ret; |
| 4263 | 4873 | ||
| @@ -4268,7 +4878,6 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, | |||
| 4268 | } else | 4878 | } else |
| 4269 | BUG_ON(parent > 0); | 4879 | BUG_ON(parent > 0); |
| 4270 | 4880 | ||
| 4271 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
| 4272 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | 4881 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { |
| 4273 | struct btrfs_delayed_extent_op *extent_op; | 4882 | struct btrfs_delayed_extent_op *extent_op; |
| 4274 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | 4883 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); |
| @@ -4347,452 +4956,108 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
| 4347 | return buf; | 4956 | return buf; |
| 4348 | } | 4957 | } |
| 4349 | 4958 | ||
| 4350 | #if 0 | 4959 | struct walk_control { |
| 4351 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 4960 | u64 refs[BTRFS_MAX_LEVEL]; |
| 4352 | struct btrfs_root *root, struct extent_buffer *leaf) | 4961 | u64 flags[BTRFS_MAX_LEVEL]; |
| 4353 | { | 4962 | struct btrfs_key update_progress; |
| 4354 | u64 disk_bytenr; | 4963 | int stage; |
| 4355 | u64 num_bytes; | 4964 | int level; |
| 4356 | struct btrfs_key key; | 4965 | int shared_level; |
| 4357 | struct btrfs_file_extent_item *fi; | 4966 | int update_ref; |
| 4358 | u32 nritems; | 4967 | int keep_locks; |
| 4359 | int i; | 4968 | int reada_slot; |
| 4360 | int ret; | 4969 | int reada_count; |
| 4361 | 4970 | }; | |
| 4362 | BUG_ON(!btrfs_is_leaf(leaf)); | ||
| 4363 | nritems = btrfs_header_nritems(leaf); | ||
| 4364 | |||
| 4365 | for (i = 0; i < nritems; i++) { | ||
| 4366 | cond_resched(); | ||
| 4367 | btrfs_item_key_to_cpu(leaf, &key, i); | ||
| 4368 | |||
| 4369 | /* only extents have references, skip everything else */ | ||
| 4370 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | ||
| 4371 | continue; | ||
| 4372 | |||
| 4373 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
| 4374 | |||
| 4375 | /* inline extents live in the btree, they don't have refs */ | ||
| 4376 | if (btrfs_file_extent_type(leaf, fi) == | ||
| 4377 | BTRFS_FILE_EXTENT_INLINE) | ||
| 4378 | continue; | ||
| 4379 | |||
| 4380 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
| 4381 | |||
| 4382 | /* holes don't have refs */ | ||
| 4383 | if (disk_bytenr == 0) | ||
| 4384 | continue; | ||
| 4385 | |||
| 4386 | num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); | ||
| 4387 | ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes, | ||
| 4388 | leaf->start, 0, key.objectid, 0); | ||
| 4389 | BUG_ON(ret); | ||
| 4390 | } | ||
| 4391 | return 0; | ||
| 4392 | } | ||
| 4393 | |||
| 4394 | static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, | ||
| 4395 | struct btrfs_root *root, | ||
| 4396 | struct btrfs_leaf_ref *ref) | ||
| 4397 | { | ||
| 4398 | int i; | ||
| 4399 | int ret; | ||
| 4400 | struct btrfs_extent_info *info; | ||
| 4401 | struct refsort *sorted; | ||
| 4402 | |||
| 4403 | if (ref->nritems == 0) | ||
| 4404 | return 0; | ||
| 4405 | |||
| 4406 | sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS); | ||
| 4407 | for (i = 0; i < ref->nritems; i++) { | ||
| 4408 | sorted[i].bytenr = ref->extents[i].bytenr; | ||
| 4409 | sorted[i].slot = i; | ||
| 4410 | } | ||
| 4411 | sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL); | ||
| 4412 | |||
| 4413 | /* | ||
| 4414 | * the items in the ref were sorted when the ref was inserted | ||
| 4415 | * into the ref cache, so this is already in order | ||
| 4416 | */ | ||
| 4417 | for (i = 0; i < ref->nritems; i++) { | ||
| 4418 | info = ref->extents + sorted[i].slot; | ||
| 4419 | ret = btrfs_free_extent(trans, root, info->bytenr, | ||
| 4420 | info->num_bytes, ref->bytenr, | ||
| 4421 | ref->owner, ref->generation, | ||
| 4422 | info->objectid, 0); | ||
| 4423 | |||
| 4424 | atomic_inc(&root->fs_info->throttle_gen); | ||
| 4425 | wake_up(&root->fs_info->transaction_throttle); | ||
| 4426 | cond_resched(); | ||
| 4427 | |||
| 4428 | BUG_ON(ret); | ||
| 4429 | info++; | ||
| 4430 | } | ||
| 4431 | |||
| 4432 | kfree(sorted); | ||
| 4433 | return 0; | ||
| 4434 | } | ||
| 4435 | |||
| 4436 | |||
| 4437 | static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, | ||
| 4438 | struct btrfs_root *root, u64 start, | ||
| 4439 | u64 len, u32 *refs) | ||
| 4440 | { | ||
| 4441 | int ret; | ||
| 4442 | |||
| 4443 | ret = btrfs_lookup_extent_refs(trans, root, start, len, refs); | ||
| 4444 | BUG_ON(ret); | ||
| 4445 | |||
| 4446 | #if 0 /* some debugging code in case we see problems here */ | ||
| 4447 | /* if the refs count is one, it won't get increased again. But | ||
| 4448 | * if the ref count is > 1, someone may be decreasing it at | ||
| 4449 | * the same time we are. | ||
| 4450 | */ | ||
| 4451 | if (*refs != 1) { | ||
| 4452 | struct extent_buffer *eb = NULL; | ||
| 4453 | eb = btrfs_find_create_tree_block(root, start, len); | ||
| 4454 | if (eb) | ||
| 4455 | btrfs_tree_lock(eb); | ||
| 4456 | |||
| 4457 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 4458 | ret = lookup_extent_ref(NULL, root, start, len, refs); | ||
| 4459 | BUG_ON(ret); | ||
| 4460 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 4461 | |||
| 4462 | if (eb) { | ||
| 4463 | btrfs_tree_unlock(eb); | ||
| 4464 | free_extent_buffer(eb); | ||
| 4465 | } | ||
| 4466 | if (*refs == 1) { | ||
| 4467 | printk(KERN_ERR "btrfs block %llu went down to one " | ||
| 4468 | "during drop_snap\n", (unsigned long long)start); | ||
| 4469 | } | ||
| 4470 | |||
| 4471 | } | ||
| 4472 | #endif | ||
| 4473 | |||
| 4474 | cond_resched(); | ||
| 4475 | return ret; | ||
| 4476 | } | ||
| 4477 | 4971 | ||
| 4972 | #define DROP_REFERENCE 1 | ||
| 4973 | #define UPDATE_BACKREF 2 | ||
| 4478 | 4974 | ||
| 4479 | /* | 4975 | static noinline void reada_walk_down(struct btrfs_trans_handle *trans, |
| 4480 | * this is used while deleting old snapshots, and it drops the refs | 4976 | struct btrfs_root *root, |
| 4481 | * on a whole subtree starting from a level 1 node. | 4977 | struct walk_control *wc, |
| 4482 | * | 4978 | struct btrfs_path *path) |
| 4483 | * The idea is to sort all the leaf pointers, and then drop the | ||
| 4484 | * ref on all the leaves in order. Most of the time the leaves | ||
| 4485 | * will have ref cache entries, so no leaf IOs will be required to | ||
| 4486 | * find the extents they have references on. | ||
| 4487 | * | ||
| 4488 | * For each leaf, any references it has are also dropped in order | ||
| 4489 | * | ||
| 4490 | * This ends up dropping the references in something close to optimal | ||
| 4491 | * order for reading and modifying the extent allocation tree. | ||
| 4492 | */ | ||
| 4493 | static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, | ||
| 4494 | struct btrfs_root *root, | ||
| 4495 | struct btrfs_path *path) | ||
| 4496 | { | 4979 | { |
| 4497 | u64 bytenr; | 4980 | u64 bytenr; |
| 4498 | u64 root_owner; | 4981 | u64 generation; |
| 4499 | u64 root_gen; | 4982 | u64 refs; |
| 4500 | struct extent_buffer *eb = path->nodes[1]; | 4983 | u64 flags; |
| 4501 | struct extent_buffer *leaf; | 4984 | u64 last = 0; |
| 4502 | struct btrfs_leaf_ref *ref; | 4985 | u32 nritems; |
| 4503 | struct refsort *sorted = NULL; | 4986 | u32 blocksize; |
| 4504 | int nritems = btrfs_header_nritems(eb); | 4987 | struct btrfs_key key; |
| 4988 | struct extent_buffer *eb; | ||
| 4505 | int ret; | 4989 | int ret; |
| 4506 | int i; | 4990 | int slot; |
| 4507 | int refi = 0; | 4991 | int nread = 0; |
| 4508 | int slot = path->slots[1]; | ||
| 4509 | u32 blocksize = btrfs_level_size(root, 0); | ||
| 4510 | u32 refs; | ||
| 4511 | |||
| 4512 | if (nritems == 0) | ||
| 4513 | goto out; | ||
| 4514 | |||
| 4515 | root_owner = btrfs_header_owner(eb); | ||
| 4516 | root_gen = btrfs_header_generation(eb); | ||
| 4517 | sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); | ||
| 4518 | 4992 | ||
| 4519 | /* | 4993 | if (path->slots[wc->level] < wc->reada_slot) { |
| 4520 | * step one, sort all the leaf pointers so we don't scribble | 4994 | wc->reada_count = wc->reada_count * 2 / 3; |
| 4521 | * randomly into the extent allocation tree | 4995 | wc->reada_count = max(wc->reada_count, 2); |
| 4522 | */ | 4996 | } else { |
| 4523 | for (i = slot; i < nritems; i++) { | 4997 | wc->reada_count = wc->reada_count * 3 / 2; |
| 4524 | sorted[refi].bytenr = btrfs_node_blockptr(eb, i); | 4998 | wc->reada_count = min_t(int, wc->reada_count, |
| 4525 | sorted[refi].slot = i; | 4999 | BTRFS_NODEPTRS_PER_BLOCK(root)); |
| 4526 | refi++; | ||
| 4527 | } | 5000 | } |
| 4528 | 5001 | ||
| 4529 | /* | 5002 | eb = path->nodes[wc->level]; |
| 4530 | * nritems won't be zero, but if we're picking up drop_snapshot | 5003 | nritems = btrfs_header_nritems(eb); |
| 4531 | * after a crash, slot might be > 0, so double check things | 5004 | blocksize = btrfs_level_size(root, wc->level - 1); |
| 4532 | * just in case. | ||
| 4533 | */ | ||
| 4534 | if (refi == 0) | ||
| 4535 | goto out; | ||
| 4536 | 5005 | ||
| 4537 | sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); | 5006 | for (slot = path->slots[wc->level]; slot < nritems; slot++) { |
| 5007 | if (nread >= wc->reada_count) | ||
| 5008 | break; | ||
| 4538 | 5009 | ||
| 4539 | /* | 5010 | cond_resched(); |
| 4540 | * the first loop frees everything the leaves point to | 5011 | bytenr = btrfs_node_blockptr(eb, slot); |
| 4541 | */ | 5012 | generation = btrfs_node_ptr_generation(eb, slot); |
| 4542 | for (i = 0; i < refi; i++) { | ||
| 4543 | u64 ptr_gen; | ||
| 4544 | 5013 | ||
| 4545 | bytenr = sorted[i].bytenr; | 5014 | if (slot == path->slots[wc->level]) |
| 5015 | goto reada; | ||
| 4546 | 5016 | ||
| 4547 | /* | 5017 | if (wc->stage == UPDATE_BACKREF && |
| 4548 | * check the reference count on this leaf. If it is > 1 | 5018 | generation <= root->root_key.offset) |
| 4549 | * we just decrement it below and don't update any | ||
| 4550 | * of the refs the leaf points to. | ||
| 4551 | */ | ||
| 4552 | ret = drop_snap_lookup_refcount(trans, root, bytenr, | ||
| 4553 | blocksize, &refs); | ||
| 4554 | BUG_ON(ret); | ||
| 4555 | if (refs != 1) | ||
| 4556 | continue; | 5019 | continue; |
| 4557 | 5020 | ||
| 4558 | ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); | 5021 | /* We don't lock the tree block, it's OK to be racy here */ |
| 4559 | 5022 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, | |
| 4560 | /* | 5023 | &refs, &flags); |
| 4561 | * the leaf only had one reference, which means the | ||
| 4562 | * only thing pointing to this leaf is the snapshot | ||
| 4563 | * we're deleting. It isn't possible for the reference | ||
| 4564 | * count to increase again later | ||
| 4565 | * | ||
| 4566 | * The reference cache is checked for the leaf, | ||
| 4567 | * and if found we'll be able to drop any refs held by | ||
| 4568 | * the leaf without needing to read it in. | ||
| 4569 | */ | ||
| 4570 | ref = btrfs_lookup_leaf_ref(root, bytenr); | ||
| 4571 | if (ref && ref->generation != ptr_gen) { | ||
| 4572 | btrfs_free_leaf_ref(root, ref); | ||
| 4573 | ref = NULL; | ||
| 4574 | } | ||
| 4575 | if (ref) { | ||
| 4576 | ret = cache_drop_leaf_ref(trans, root, ref); | ||
| 4577 | BUG_ON(ret); | ||
| 4578 | btrfs_remove_leaf_ref(root, ref); | ||
| 4579 | btrfs_free_leaf_ref(root, ref); | ||
| 4580 | } else { | ||
| 4581 | /* | ||
| 4582 | * the leaf wasn't in the reference cache, so | ||
| 4583 | * we have to read it. | ||
| 4584 | */ | ||
| 4585 | leaf = read_tree_block(root, bytenr, blocksize, | ||
| 4586 | ptr_gen); | ||
| 4587 | ret = btrfs_drop_leaf_ref(trans, root, leaf); | ||
| 4588 | BUG_ON(ret); | ||
| 4589 | free_extent_buffer(leaf); | ||
| 4590 | } | ||
| 4591 | atomic_inc(&root->fs_info->throttle_gen); | ||
| 4592 | wake_up(&root->fs_info->transaction_throttle); | ||
| 4593 | cond_resched(); | ||
| 4594 | } | ||
| 4595 | |||
| 4596 | /* | ||
| 4597 | * run through the loop again to free the refs on the leaves. | ||
| 4598 | * This is faster than doing it in the loop above because | ||
| 4599 | * the leaves are likely to be clustered together. We end up | ||
| 4600 | * working in nice chunks on the extent allocation tree. | ||
| 4601 | */ | ||
| 4602 | for (i = 0; i < refi; i++) { | ||
| 4603 | bytenr = sorted[i].bytenr; | ||
| 4604 | ret = btrfs_free_extent(trans, root, bytenr, | ||
| 4605 | blocksize, eb->start, | ||
| 4606 | root_owner, root_gen, 0, 1); | ||
| 4607 | BUG_ON(ret); | 5024 | BUG_ON(ret); |
| 5025 | BUG_ON(refs == 0); | ||
| 4608 | 5026 | ||
| 4609 | atomic_inc(&root->fs_info->throttle_gen); | 5027 | if (wc->stage == DROP_REFERENCE) { |
| 4610 | wake_up(&root->fs_info->transaction_throttle); | 5028 | if (refs == 1) |
| 4611 | cond_resched(); | 5029 | goto reada; |
| 4612 | } | ||
| 4613 | out: | ||
| 4614 | kfree(sorted); | ||
| 4615 | |||
| 4616 | /* | ||
| 4617 | * update the path to show we've processed the entire level 1 | ||
| 4618 | * node. This will get saved into the root's drop_snapshot_progress | ||
| 4619 | * field so these drops are not repeated again if this transaction | ||
| 4620 | * commits. | ||
| 4621 | */ | ||
| 4622 | path->slots[1] = nritems; | ||
| 4623 | return 0; | ||
| 4624 | } | ||
| 4625 | |||
| 4626 | /* | ||
| 4627 | * helper function for drop_snapshot, this walks down the tree dropping ref | ||
| 4628 | * counts as it goes. | ||
| 4629 | */ | ||
| 4630 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | ||
| 4631 | struct btrfs_root *root, | ||
| 4632 | struct btrfs_path *path, int *level) | ||
| 4633 | { | ||
| 4634 | u64 root_owner; | ||
| 4635 | u64 root_gen; | ||
| 4636 | u64 bytenr; | ||
| 4637 | u64 ptr_gen; | ||
| 4638 | struct extent_buffer *next; | ||
| 4639 | struct extent_buffer *cur; | ||
| 4640 | struct extent_buffer *parent; | ||
| 4641 | u32 blocksize; | ||
| 4642 | int ret; | ||
| 4643 | u32 refs; | ||
| 4644 | |||
| 4645 | WARN_ON(*level < 0); | ||
| 4646 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
| 4647 | ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start, | ||
| 4648 | path->nodes[*level]->len, &refs); | ||
| 4649 | BUG_ON(ret); | ||
| 4650 | if (refs > 1) | ||
| 4651 | goto out; | ||
| 4652 | |||
| 4653 | /* | ||
| 4654 | * walk down to the last node level and free all the leaves | ||
| 4655 | */ | ||
| 4656 | while (*level >= 0) { | ||
| 4657 | WARN_ON(*level < 0); | ||
| 4658 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
| 4659 | cur = path->nodes[*level]; | ||
| 4660 | |||
| 4661 | if (btrfs_header_level(cur) != *level) | ||
| 4662 | WARN_ON(1); | ||
| 4663 | |||
| 4664 | if (path->slots[*level] >= | ||
| 4665 | btrfs_header_nritems(cur)) | ||
| 4666 | break; | ||
| 4667 | 5030 | ||
| 4668 | /* the new code goes down to level 1 and does all the | 5031 | if (wc->level == 1 && |
| 4669 | * leaves pointed to that node in bulk. So, this check | 5032 | (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) |
| 4670 | * for level 0 will always be false. | 5033 | continue; |
| 4671 | * | 5034 | if (!wc->update_ref || |
| 4672 | * But, the disk format allows the drop_snapshot_progress | 5035 | generation <= root->root_key.offset) |
| 4673 | * field in the root to leave things in a state where | 5036 | continue; |
| 4674 | * a leaf will need cleaning up here. If someone crashes | 5037 | btrfs_node_key_to_cpu(eb, &key, slot); |
| 4675 | * with the old code and then boots with the new code, | 5038 | ret = btrfs_comp_cpu_keys(&key, |
| 4676 | * we might find a leaf here. | 5039 | &wc->update_progress); |
| 4677 | */ | 5040 | if (ret < 0) |
| 4678 | if (*level == 0) { | 5041 | continue; |
| 4679 | ret = btrfs_drop_leaf_ref(trans, root, cur); | 5042 | } else { |
| 4680 | BUG_ON(ret); | 5043 | if (wc->level == 1 && |
| 4681 | break; | 5044 | (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) |
| 5045 | continue; | ||
| 4682 | } | 5046 | } |
| 4683 | 5047 | reada: | |
| 4684 | /* | 5048 | ret = readahead_tree_block(root, bytenr, blocksize, |
| 4685 | * once we get to level one, process the whole node | 5049 | generation); |
| 4686 | * at once, including everything below it. | 5050 | if (ret) |
| 4687 | */ | ||
| 4688 | if (*level == 1) { | ||
| 4689 | ret = drop_level_one_refs(trans, root, path); | ||
| 4690 | BUG_ON(ret); | ||
| 4691 | break; | 5051 | break; |
| 4692 | } | 5052 | last = bytenr + blocksize; |
| 4693 | 5053 | nread++; | |
| 4694 | bytenr = btrfs_node_blockptr(cur, path->slots[*level]); | ||
| 4695 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); | ||
| 4696 | blocksize = btrfs_level_size(root, *level - 1); | ||
| 4697 | |||
| 4698 | ret = drop_snap_lookup_refcount(trans, root, bytenr, | ||
| 4699 | blocksize, &refs); | ||
| 4700 | BUG_ON(ret); | ||
| 4701 | |||
| 4702 | /* | ||
| 4703 | * if there is more than one reference, we don't need | ||
| 4704 | * to read that node to drop any references it has. We | ||
| 4705 | * just drop the ref we hold on that node and move on to the | ||
| 4706 | * next slot in this level. | ||
| 4707 | */ | ||
| 4708 | if (refs != 1) { | ||
| 4709 | parent = path->nodes[*level]; | ||
| 4710 | root_owner = btrfs_header_owner(parent); | ||
| 4711 | root_gen = btrfs_header_generation(parent); | ||
| 4712 | path->slots[*level]++; | ||
| 4713 | |||
| 4714 | ret = btrfs_free_extent(trans, root, bytenr, | ||
| 4715 | blocksize, parent->start, | ||
| 4716 | root_owner, root_gen, | ||
| 4717 | *level - 1, 1); | ||
| 4718 | BUG_ON(ret); | ||
| 4719 | |||
| 4720 | atomic_inc(&root->fs_info->throttle_gen); | ||
| 4721 | wake_up(&root->fs_info->transaction_throttle); | ||
| 4722 | cond_resched(); | ||
| 4723 | |||
| 4724 | continue; | ||
| 4725 | } | ||
| 4726 | |||
| 4727 | /* | ||
| 4728 | * we need to keep freeing things in the next level down. | ||
| 4729 | * read the block and loop around to process it | ||
| 4730 | */ | ||
| 4731 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); | ||
| 4732 | WARN_ON(*level <= 0); | ||
| 4733 | if (path->nodes[*level-1]) | ||
| 4734 | free_extent_buffer(path->nodes[*level-1]); | ||
| 4735 | path->nodes[*level-1] = next; | ||
| 4736 | *level = btrfs_header_level(next); | ||
| 4737 | path->slots[*level] = 0; | ||
| 4738 | cond_resched(); | ||
| 4739 | } | 5054 | } |
| 4740 | out: | 5055 | wc->reada_slot = slot; |
| 4741 | WARN_ON(*level < 0); | ||
| 4742 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
| 4743 | |||
| 4744 | if (path->nodes[*level] == root->node) { | ||
| 4745 | parent = path->nodes[*level]; | ||
| 4746 | bytenr = path->nodes[*level]->start; | ||
| 4747 | } else { | ||
| 4748 | parent = path->nodes[*level + 1]; | ||
| 4749 | bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]); | ||
| 4750 | } | ||
| 4751 | |||
| 4752 | blocksize = btrfs_level_size(root, *level); | ||
| 4753 | root_owner = btrfs_header_owner(parent); | ||
| 4754 | root_gen = btrfs_header_generation(parent); | ||
| 4755 | |||
| 4756 | /* | ||
| 4757 | * cleanup and free the reference on the last node | ||
| 4758 | * we processed | ||
| 4759 | */ | ||
| 4760 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, | ||
| 4761 | parent->start, root_owner, root_gen, | ||
| 4762 | *level, 1); | ||
| 4763 | free_extent_buffer(path->nodes[*level]); | ||
| 4764 | path->nodes[*level] = NULL; | ||
| 4765 | |||
| 4766 | *level += 1; | ||
| 4767 | BUG_ON(ret); | ||
| 4768 | |||
| 4769 | cond_resched(); | ||
| 4770 | return 0; | ||
| 4771 | } | 5056 | } |
| 4772 | #endif | ||
| 4773 | |||
| 4774 | struct walk_control { | ||
| 4775 | u64 refs[BTRFS_MAX_LEVEL]; | ||
| 4776 | u64 flags[BTRFS_MAX_LEVEL]; | ||
| 4777 | struct btrfs_key update_progress; | ||
| 4778 | int stage; | ||
| 4779 | int level; | ||
| 4780 | int shared_level; | ||
| 4781 | int update_ref; | ||
| 4782 | int keep_locks; | ||
| 4783 | }; | ||
| 4784 | |||
| 4785 | #define DROP_REFERENCE 1 | ||
| 4786 | #define UPDATE_BACKREF 2 | ||
| 4787 | 5057 | ||
| 4788 | /* | 5058 | /* |
| 4789 | * hepler to process tree block while walking down the tree. | 5059 | * hepler to process tree block while walking down the tree. |
| 4790 | * | 5060 | * |
| 4791 | * when wc->stage == DROP_REFERENCE, this function checks | ||
| 4792 | * reference count of the block. if the block is shared and | ||
| 4793 | * we need update back refs for the subtree rooted at the | ||
| 4794 | * block, this function changes wc->stage to UPDATE_BACKREF | ||
| 4795 | * | ||
| 4796 | * when wc->stage == UPDATE_BACKREF, this function updates | 5061 | * when wc->stage == UPDATE_BACKREF, this function updates |
| 4797 | * back refs for pointers in the block. | 5062 | * back refs for pointers in the block. |
| 4798 | * | 5063 | * |
| @@ -4801,11 +5066,10 @@ struct walk_control { | |||
| 4801 | static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | 5066 | static noinline int walk_down_proc(struct btrfs_trans_handle *trans, |
| 4802 | struct btrfs_root *root, | 5067 | struct btrfs_root *root, |
| 4803 | struct btrfs_path *path, | 5068 | struct btrfs_path *path, |
| 4804 | struct walk_control *wc) | 5069 | struct walk_control *wc, int lookup_info) |
| 4805 | { | 5070 | { |
| 4806 | int level = wc->level; | 5071 | int level = wc->level; |
| 4807 | struct extent_buffer *eb = path->nodes[level]; | 5072 | struct extent_buffer *eb = path->nodes[level]; |
| 4808 | struct btrfs_key key; | ||
| 4809 | u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; | 5073 | u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; |
| 4810 | int ret; | 5074 | int ret; |
| 4811 | 5075 | ||
| @@ -4817,8 +5081,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
| 4817 | * when reference count of tree block is 1, it won't increase | 5081 | * when reference count of tree block is 1, it won't increase |
| 4818 | * again. once full backref flag is set, we never clear it. | 5082 | * again. once full backref flag is set, we never clear it. |
| 4819 | */ | 5083 | */ |
| 4820 | if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || | 5084 | if (lookup_info && |
| 4821 | (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { | 5085 | ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || |
| 5086 | (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) { | ||
| 4822 | BUG_ON(!path->locks[level]); | 5087 | BUG_ON(!path->locks[level]); |
| 4823 | ret = btrfs_lookup_extent_info(trans, root, | 5088 | ret = btrfs_lookup_extent_info(trans, root, |
| 4824 | eb->start, eb->len, | 5089 | eb->start, eb->len, |
| @@ -4828,21 +5093,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
| 4828 | BUG_ON(wc->refs[level] == 0); | 5093 | BUG_ON(wc->refs[level] == 0); |
| 4829 | } | 5094 | } |
| 4830 | 5095 | ||
| 4831 | if (wc->stage == DROP_REFERENCE && | ||
| 4832 | wc->update_ref && wc->refs[level] > 1) { | ||
| 4833 | BUG_ON(eb == root->node); | ||
| 4834 | BUG_ON(path->slots[level] > 0); | ||
| 4835 | if (level == 0) | ||
| 4836 | btrfs_item_key_to_cpu(eb, &key, path->slots[level]); | ||
| 4837 | else | ||
| 4838 | btrfs_node_key_to_cpu(eb, &key, path->slots[level]); | ||
| 4839 | if (btrfs_header_owner(eb) == root->root_key.objectid && | ||
| 4840 | btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) { | ||
| 4841 | wc->stage = UPDATE_BACKREF; | ||
| 4842 | wc->shared_level = level; | ||
| 4843 | } | ||
| 4844 | } | ||
| 4845 | |||
| 4846 | if (wc->stage == DROP_REFERENCE) { | 5096 | if (wc->stage == DROP_REFERENCE) { |
| 4847 | if (wc->refs[level] > 1) | 5097 | if (wc->refs[level] > 1) |
| 4848 | return 1; | 5098 | return 1; |
| @@ -4879,6 +5129,136 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
| 4879 | } | 5129 | } |
| 4880 | 5130 | ||
| 4881 | /* | 5131 | /* |
| 5132 | * hepler to process tree block pointer. | ||
| 5133 | * | ||
| 5134 | * when wc->stage == DROP_REFERENCE, this function checks | ||
| 5135 | * reference count of the block pointed to. if the block | ||
| 5136 | * is shared and we need update back refs for the subtree | ||
| 5137 | * rooted at the block, this function changes wc->stage to | ||
| 5138 | * UPDATE_BACKREF. if the block is shared and there is no | ||
| 5139 | * need to update back, this function drops the reference | ||
| 5140 | * to the block. | ||
| 5141 | * | ||
| 5142 | * NOTE: return value 1 means we should stop walking down. | ||
| 5143 | */ | ||
| 5144 | static noinline int do_walk_down(struct btrfs_trans_handle *trans, | ||
| 5145 | struct btrfs_root *root, | ||
| 5146 | struct btrfs_path *path, | ||
| 5147 | struct walk_control *wc, int *lookup_info) | ||
| 5148 | { | ||
| 5149 | u64 bytenr; | ||
| 5150 | u64 generation; | ||
| 5151 | u64 parent; | ||
| 5152 | u32 blocksize; | ||
| 5153 | struct btrfs_key key; | ||
| 5154 | struct extent_buffer *next; | ||
| 5155 | int level = wc->level; | ||
| 5156 | int reada = 0; | ||
| 5157 | int ret = 0; | ||
| 5158 | |||
| 5159 | generation = btrfs_node_ptr_generation(path->nodes[level], | ||
| 5160 | path->slots[level]); | ||
| 5161 | /* | ||
| 5162 | * if the lower level block was created before the snapshot | ||
| 5163 | * was created, we know there is no need to update back refs | ||
| 5164 | * for the subtree | ||
| 5165 | */ | ||
| 5166 | if (wc->stage == UPDATE_BACKREF && | ||
| 5167 | generation <= root->root_key.offset) { | ||
| 5168 | *lookup_info = 1; | ||
| 5169 | return 1; | ||
| 5170 | } | ||
| 5171 | |||
| 5172 | bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); | ||
| 5173 | blocksize = btrfs_level_size(root, level - 1); | ||
| 5174 | |||
| 5175 | next = btrfs_find_tree_block(root, bytenr, blocksize); | ||
| 5176 | if (!next) { | ||
| 5177 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | ||
| 5178 | reada = 1; | ||
| 5179 | } | ||
| 5180 | btrfs_tree_lock(next); | ||
| 5181 | btrfs_set_lock_blocking(next); | ||
| 5182 | |||
| 5183 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, | ||
| 5184 | &wc->refs[level - 1], | ||
| 5185 | &wc->flags[level - 1]); | ||
| 5186 | BUG_ON(ret); | ||
| 5187 | BUG_ON(wc->refs[level - 1] == 0); | ||
| 5188 | *lookup_info = 0; | ||
| 5189 | |||
| 5190 | if (wc->stage == DROP_REFERENCE) { | ||
| 5191 | if (wc->refs[level - 1] > 1) { | ||
| 5192 | if (level == 1 && | ||
| 5193 | (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
| 5194 | goto skip; | ||
| 5195 | |||
| 5196 | if (!wc->update_ref || | ||
| 5197 | generation <= root->root_key.offset) | ||
| 5198 | goto skip; | ||
| 5199 | |||
| 5200 | btrfs_node_key_to_cpu(path->nodes[level], &key, | ||
| 5201 | path->slots[level]); | ||
| 5202 | ret = btrfs_comp_cpu_keys(&key, &wc->update_progress); | ||
| 5203 | if (ret < 0) | ||
| 5204 | goto skip; | ||
| 5205 | |||
| 5206 | wc->stage = UPDATE_BACKREF; | ||
| 5207 | wc->shared_level = level - 1; | ||
| 5208 | } | ||
| 5209 | } else { | ||
| 5210 | if (level == 1 && | ||
| 5211 | (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
| 5212 | goto skip; | ||
| 5213 | } | ||
| 5214 | |||
| 5215 | if (!btrfs_buffer_uptodate(next, generation)) { | ||
| 5216 | btrfs_tree_unlock(next); | ||
| 5217 | free_extent_buffer(next); | ||
| 5218 | next = NULL; | ||
| 5219 | *lookup_info = 1; | ||
| 5220 | } | ||
| 5221 | |||
| 5222 | if (!next) { | ||
| 5223 | if (reada && level == 1) | ||
| 5224 | reada_walk_down(trans, root, wc, path); | ||
| 5225 | next = read_tree_block(root, bytenr, blocksize, generation); | ||
| 5226 | btrfs_tree_lock(next); | ||
| 5227 | btrfs_set_lock_blocking(next); | ||
| 5228 | } | ||
| 5229 | |||
| 5230 | level--; | ||
| 5231 | BUG_ON(level != btrfs_header_level(next)); | ||
| 5232 | path->nodes[level] = next; | ||
| 5233 | path->slots[level] = 0; | ||
| 5234 | path->locks[level] = 1; | ||
| 5235 | wc->level = level; | ||
| 5236 | if (wc->level == 1) | ||
| 5237 | wc->reada_slot = 0; | ||
| 5238 | return 0; | ||
| 5239 | skip: | ||
| 5240 | wc->refs[level - 1] = 0; | ||
| 5241 | wc->flags[level - 1] = 0; | ||
| 5242 | if (wc->stage == DROP_REFERENCE) { | ||
| 5243 | if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { | ||
| 5244 | parent = path->nodes[level]->start; | ||
| 5245 | } else { | ||
| 5246 | BUG_ON(root->root_key.objectid != | ||
| 5247 | btrfs_header_owner(path->nodes[level])); | ||
| 5248 | parent = 0; | ||
| 5249 | } | ||
| 5250 | |||
| 5251 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, | ||
| 5252 | root->root_key.objectid, level - 1, 0); | ||
| 5253 | BUG_ON(ret); | ||
| 5254 | } | ||
| 5255 | btrfs_tree_unlock(next); | ||
| 5256 | free_extent_buffer(next); | ||
| 5257 | *lookup_info = 1; | ||
| 5258 | return 1; | ||
| 5259 | } | ||
| 5260 | |||
| 5261 | /* | ||
| 4882 | * hepler to process tree block while walking up the tree. | 5262 | * hepler to process tree block while walking up the tree. |
| 4883 | * | 5263 | * |
| 4884 | * when wc->stage == DROP_REFERENCE, this function drops | 5264 | * when wc->stage == DROP_REFERENCE, this function drops |
| @@ -4905,7 +5285,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
| 4905 | if (level < wc->shared_level) | 5285 | if (level < wc->shared_level) |
| 4906 | goto out; | 5286 | goto out; |
| 4907 | 5287 | ||
| 4908 | BUG_ON(wc->refs[level] <= 1); | ||
| 4909 | ret = find_next_key(path, level + 1, &wc->update_progress); | 5288 | ret = find_next_key(path, level + 1, &wc->update_progress); |
| 4910 | if (ret > 0) | 5289 | if (ret > 0) |
| 4911 | wc->update_ref = 0; | 5290 | wc->update_ref = 0; |
| @@ -4936,8 +5315,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
| 4936 | path->locks[level] = 0; | 5315 | path->locks[level] = 0; |
| 4937 | return 1; | 5316 | return 1; |
| 4938 | } | 5317 | } |
| 4939 | } else { | ||
| 4940 | BUG_ON(level != 0); | ||
| 4941 | } | 5318 | } |
| 4942 | } | 5319 | } |
| 4943 | 5320 | ||
| @@ -4990,39 +5367,28 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
| 4990 | struct btrfs_path *path, | 5367 | struct btrfs_path *path, |
| 4991 | struct walk_control *wc) | 5368 | struct walk_control *wc) |
| 4992 | { | 5369 | { |
| 4993 | struct extent_buffer *next; | ||
| 4994 | struct extent_buffer *cur; | ||
| 4995 | u64 bytenr; | ||
| 4996 | u64 ptr_gen; | ||
| 4997 | u32 blocksize; | ||
| 4998 | int level = wc->level; | 5370 | int level = wc->level; |
| 5371 | int lookup_info = 1; | ||
| 4999 | int ret; | 5372 | int ret; |
| 5000 | 5373 | ||
| 5001 | while (level >= 0) { | 5374 | while (level >= 0) { |
| 5002 | cur = path->nodes[level]; | 5375 | if (path->slots[level] >= |
| 5003 | BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); | 5376 | btrfs_header_nritems(path->nodes[level])) |
| 5377 | break; | ||
| 5004 | 5378 | ||
| 5005 | ret = walk_down_proc(trans, root, path, wc); | 5379 | ret = walk_down_proc(trans, root, path, wc, lookup_info); |
| 5006 | if (ret > 0) | 5380 | if (ret > 0) |
| 5007 | break; | 5381 | break; |
| 5008 | 5382 | ||
| 5009 | if (level == 0) | 5383 | if (level == 0) |
| 5010 | break; | 5384 | break; |
| 5011 | 5385 | ||
| 5012 | bytenr = btrfs_node_blockptr(cur, path->slots[level]); | 5386 | ret = do_walk_down(trans, root, path, wc, &lookup_info); |
| 5013 | blocksize = btrfs_level_size(root, level - 1); | 5387 | if (ret > 0) { |
| 5014 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); | 5388 | path->slots[level]++; |
| 5015 | 5389 | continue; | |
| 5016 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); | 5390 | } |
| 5017 | btrfs_tree_lock(next); | 5391 | level = wc->level; |
| 5018 | btrfs_set_lock_blocking(next); | ||
| 5019 | |||
| 5020 | level--; | ||
| 5021 | BUG_ON(level != btrfs_header_level(next)); | ||
| 5022 | path->nodes[level] = next; | ||
| 5023 | path->slots[level] = 0; | ||
| 5024 | path->locks[level] = 1; | ||
| 5025 | wc->level = level; | ||
| 5026 | } | 5392 | } |
| 5027 | return 0; | 5393 | return 0; |
| 5028 | } | 5394 | } |
| @@ -5112,9 +5478,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
| 5112 | err = ret; | 5478 | err = ret; |
| 5113 | goto out; | 5479 | goto out; |
| 5114 | } | 5480 | } |
| 5115 | btrfs_node_key_to_cpu(path->nodes[level], &key, | 5481 | WARN_ON(ret > 0); |
| 5116 | path->slots[level]); | ||
| 5117 | WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key))); | ||
| 5118 | 5482 | ||
| 5119 | /* | 5483 | /* |
| 5120 | * unlock our path, this is safe because only this | 5484 | * unlock our path, this is safe because only this |
| @@ -5149,6 +5513,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
| 5149 | wc->stage = DROP_REFERENCE; | 5513 | wc->stage = DROP_REFERENCE; |
| 5150 | wc->update_ref = update_ref; | 5514 | wc->update_ref = update_ref; |
| 5151 | wc->keep_locks = 0; | 5515 | wc->keep_locks = 0; |
| 5516 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); | ||
| 5152 | 5517 | ||
| 5153 | while (1) { | 5518 | while (1) { |
| 5154 | ret = walk_down_tree(trans, root, path, wc); | 5519 | ret = walk_down_tree(trans, root, path, wc); |
| @@ -5201,9 +5566,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
| 5201 | ret = btrfs_del_root(trans, tree_root, &root->root_key); | 5566 | ret = btrfs_del_root(trans, tree_root, &root->root_key); |
| 5202 | BUG_ON(ret); | 5567 | BUG_ON(ret); |
| 5203 | 5568 | ||
| 5204 | free_extent_buffer(root->node); | 5569 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { |
| 5205 | free_extent_buffer(root->commit_root); | 5570 | ret = btrfs_find_last_root(tree_root, root->root_key.objectid, |
| 5206 | kfree(root); | 5571 | NULL, NULL); |
| 5572 | BUG_ON(ret < 0); | ||
| 5573 | if (ret > 0) { | ||
| 5574 | ret = btrfs_del_orphan_item(trans, tree_root, | ||
| 5575 | root->root_key.objectid); | ||
| 5576 | BUG_ON(ret); | ||
| 5577 | } | ||
| 5578 | } | ||
| 5579 | |||
| 5580 | if (root->in_radix) { | ||
| 5581 | btrfs_free_fs_root(tree_root->fs_info, root); | ||
| 5582 | } else { | ||
| 5583 | free_extent_buffer(root->node); | ||
| 5584 | free_extent_buffer(root->commit_root); | ||
| 5585 | kfree(root); | ||
| 5586 | } | ||
| 5207 | out: | 5587 | out: |
| 5208 | btrfs_end_transaction(trans, tree_root); | 5588 | btrfs_end_transaction(trans, tree_root); |
| 5209 | kfree(wc); | 5589 | kfree(wc); |
| @@ -5255,6 +5635,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
| 5255 | wc->stage = DROP_REFERENCE; | 5635 | wc->stage = DROP_REFERENCE; |
| 5256 | wc->update_ref = 0; | 5636 | wc->update_ref = 0; |
| 5257 | wc->keep_locks = 1; | 5637 | wc->keep_locks = 1; |
| 5638 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); | ||
| 5258 | 5639 | ||
| 5259 | while (1) { | 5640 | while (1) { |
| 5260 | wret = walk_down_tree(trans, root, path, wc); | 5641 | wret = walk_down_tree(trans, root, path, wc); |
| @@ -5397,9 +5778,9 @@ static noinline int relocate_data_extent(struct inode *reloc_inode, | |||
| 5397 | lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); | 5778 | lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); |
| 5398 | while (1) { | 5779 | while (1) { |
| 5399 | int ret; | 5780 | int ret; |
| 5400 | spin_lock(&em_tree->lock); | 5781 | write_lock(&em_tree->lock); |
| 5401 | ret = add_extent_mapping(em_tree, em); | 5782 | ret = add_extent_mapping(em_tree, em); |
| 5402 | spin_unlock(&em_tree->lock); | 5783 | write_unlock(&em_tree->lock); |
| 5403 | if (ret != -EEXIST) { | 5784 | if (ret != -EEXIST) { |
| 5404 | free_extent_map(em); | 5785 | free_extent_map(em); |
| 5405 | break; | 5786 | break; |
| @@ -6842,287 +7223,86 @@ int btrfs_prepare_block_group_relocation(struct btrfs_root *root, | |||
| 6842 | return 0; | 7223 | return 0; |
| 6843 | } | 7224 | } |
| 6844 | 7225 | ||
| 6845 | #if 0 | 7226 | /* |
| 6846 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | 7227 | * checks to see if its even possible to relocate this block group. |
| 6847 | struct btrfs_root *root, | 7228 | * |
| 6848 | u64 objectid, u64 size) | 7229 | * @return - -1 if it's not a good idea to relocate this block group, 0 if its |
| 6849 | { | 7230 | * ok to go ahead and try. |
| 6850 | struct btrfs_path *path; | 7231 | */ |
| 6851 | struct btrfs_inode_item *item; | 7232 | int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) |
| 6852 | struct extent_buffer *leaf; | ||
| 6853 | int ret; | ||
| 6854 | |||
| 6855 | path = btrfs_alloc_path(); | ||
| 6856 | if (!path) | ||
| 6857 | return -ENOMEM; | ||
| 6858 | |||
| 6859 | path->leave_spinning = 1; | ||
| 6860 | ret = btrfs_insert_empty_inode(trans, root, path, objectid); | ||
| 6861 | if (ret) | ||
| 6862 | goto out; | ||
| 6863 | |||
| 6864 | leaf = path->nodes[0]; | ||
| 6865 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); | ||
| 6866 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); | ||
| 6867 | btrfs_set_inode_generation(leaf, item, 1); | ||
| 6868 | btrfs_set_inode_size(leaf, item, size); | ||
| 6869 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | ||
| 6870 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); | ||
| 6871 | btrfs_mark_buffer_dirty(leaf); | ||
| 6872 | btrfs_release_path(root, path); | ||
| 6873 | out: | ||
| 6874 | btrfs_free_path(path); | ||
| 6875 | return ret; | ||
| 6876 | } | ||
| 6877 | |||
| 6878 | static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | ||
| 6879 | struct btrfs_block_group_cache *group) | ||
| 6880 | { | 7233 | { |
| 6881 | struct inode *inode = NULL; | 7234 | struct btrfs_block_group_cache *block_group; |
| 6882 | struct btrfs_trans_handle *trans; | 7235 | struct btrfs_space_info *space_info; |
| 6883 | struct btrfs_root *root; | 7236 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; |
| 6884 | struct btrfs_key root_key; | 7237 | struct btrfs_device *device; |
| 6885 | u64 objectid = BTRFS_FIRST_FREE_OBJECTID; | 7238 | int full = 0; |
| 6886 | int err = 0; | 7239 | int ret = 0; |
| 6887 | 7240 | ||
| 6888 | root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; | 7241 | block_group = btrfs_lookup_block_group(root->fs_info, bytenr); |
| 6889 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
| 6890 | root_key.offset = (u64)-1; | ||
| 6891 | root = btrfs_read_fs_root_no_name(fs_info, &root_key); | ||
| 6892 | if (IS_ERR(root)) | ||
| 6893 | return ERR_CAST(root); | ||
| 6894 | 7242 | ||
| 6895 | trans = btrfs_start_transaction(root, 1); | 7243 | /* odd, couldn't find the block group, leave it alone */ |
| 6896 | BUG_ON(!trans); | 7244 | if (!block_group) |
| 7245 | return -1; | ||
| 6897 | 7246 | ||
| 6898 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); | 7247 | /* no bytes used, we're good */ |
| 6899 | if (err) | 7248 | if (!btrfs_block_group_used(&block_group->item)) |
| 6900 | goto out; | 7249 | goto out; |
| 6901 | 7250 | ||
| 6902 | err = __insert_orphan_inode(trans, root, objectid, group->key.offset); | 7251 | space_info = block_group->space_info; |
| 6903 | BUG_ON(err); | 7252 | spin_lock(&space_info->lock); |
| 6904 | |||
| 6905 | err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, | ||
| 6906 | group->key.offset, 0, group->key.offset, | ||
| 6907 | 0, 0, 0); | ||
| 6908 | BUG_ON(err); | ||
| 6909 | |||
| 6910 | inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); | ||
| 6911 | if (inode->i_state & I_NEW) { | ||
| 6912 | BTRFS_I(inode)->root = root; | ||
| 6913 | BTRFS_I(inode)->location.objectid = objectid; | ||
| 6914 | BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; | ||
| 6915 | BTRFS_I(inode)->location.offset = 0; | ||
| 6916 | btrfs_read_locked_inode(inode); | ||
| 6917 | unlock_new_inode(inode); | ||
| 6918 | BUG_ON(is_bad_inode(inode)); | ||
| 6919 | } else { | ||
| 6920 | BUG_ON(1); | ||
| 6921 | } | ||
| 6922 | BTRFS_I(inode)->index_cnt = group->key.objectid; | ||
| 6923 | |||
| 6924 | err = btrfs_orphan_add(trans, inode); | ||
| 6925 | out: | ||
| 6926 | btrfs_end_transaction(trans, root); | ||
| 6927 | if (err) { | ||
| 6928 | if (inode) | ||
| 6929 | iput(inode); | ||
| 6930 | inode = ERR_PTR(err); | ||
| 6931 | } | ||
| 6932 | return inode; | ||
| 6933 | } | ||
| 6934 | |||
| 6935 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) | ||
| 6936 | { | ||
| 6937 | |||
| 6938 | struct btrfs_ordered_sum *sums; | ||
| 6939 | struct btrfs_sector_sum *sector_sum; | ||
| 6940 | struct btrfs_ordered_extent *ordered; | ||
| 6941 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 6942 | struct list_head list; | ||
| 6943 | size_t offset; | ||
| 6944 | int ret; | ||
| 6945 | u64 disk_bytenr; | ||
| 6946 | |||
| 6947 | INIT_LIST_HEAD(&list); | ||
| 6948 | |||
| 6949 | ordered = btrfs_lookup_ordered_extent(inode, file_pos); | ||
| 6950 | BUG_ON(ordered->file_offset != file_pos || ordered->len != len); | ||
| 6951 | |||
| 6952 | disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; | ||
| 6953 | ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, | ||
| 6954 | disk_bytenr + len - 1, &list); | ||
| 6955 | |||
| 6956 | while (!list_empty(&list)) { | ||
| 6957 | sums = list_entry(list.next, struct btrfs_ordered_sum, list); | ||
| 6958 | list_del_init(&sums->list); | ||
| 6959 | |||
| 6960 | sector_sum = sums->sums; | ||
| 6961 | sums->bytenr = ordered->start; | ||
| 6962 | 7253 | ||
| 6963 | offset = 0; | 7254 | full = space_info->full; |
| 6964 | while (offset < sums->len) { | ||
| 6965 | sector_sum->bytenr += ordered->start - disk_bytenr; | ||
| 6966 | sector_sum++; | ||
| 6967 | offset += root->sectorsize; | ||
| 6968 | } | ||
| 6969 | 7255 | ||
| 6970 | btrfs_add_ordered_sum(inode, ordered, sums); | 7256 | /* |
| 7257 | * if this is the last block group we have in this space, we can't | ||
| 7258 | * relocate it unless we're able to allocate a new chunk below. | ||
| 7259 | * | ||
| 7260 | * Otherwise, we need to make sure we have room in the space to handle | ||
| 7261 | * all of the extents from this block group. If we can, we're good | ||
| 7262 | */ | ||
| 7263 | if ((space_info->total_bytes != block_group->key.offset) && | ||
| 7264 | (space_info->bytes_used + space_info->bytes_reserved + | ||
| 7265 | space_info->bytes_pinned + space_info->bytes_readonly + | ||
| 7266 | btrfs_block_group_used(&block_group->item) < | ||
| 7267 | space_info->total_bytes)) { | ||
| 7268 | spin_unlock(&space_info->lock); | ||
| 7269 | goto out; | ||
| 6971 | } | 7270 | } |
| 6972 | btrfs_put_ordered_extent(ordered); | 7271 | spin_unlock(&space_info->lock); |
| 6973 | return 0; | ||
| 6974 | } | ||
| 6975 | |||
| 6976 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) | ||
| 6977 | { | ||
| 6978 | struct btrfs_trans_handle *trans; | ||
| 6979 | struct btrfs_path *path; | ||
| 6980 | struct btrfs_fs_info *info = root->fs_info; | ||
| 6981 | struct extent_buffer *leaf; | ||
| 6982 | struct inode *reloc_inode; | ||
| 6983 | struct btrfs_block_group_cache *block_group; | ||
| 6984 | struct btrfs_key key; | ||
| 6985 | u64 skipped; | ||
| 6986 | u64 cur_byte; | ||
| 6987 | u64 total_found; | ||
| 6988 | u32 nritems; | ||
| 6989 | int ret; | ||
| 6990 | int progress; | ||
| 6991 | int pass = 0; | ||
| 6992 | |||
| 6993 | root = root->fs_info->extent_root; | ||
| 6994 | |||
| 6995 | block_group = btrfs_lookup_block_group(info, group_start); | ||
| 6996 | BUG_ON(!block_group); | ||
| 6997 | |||
| 6998 | printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n", | ||
| 6999 | (unsigned long long)block_group->key.objectid, | ||
| 7000 | (unsigned long long)block_group->flags); | ||
| 7001 | |||
| 7002 | path = btrfs_alloc_path(); | ||
| 7003 | BUG_ON(!path); | ||
| 7004 | |||
| 7005 | reloc_inode = create_reloc_inode(info, block_group); | ||
| 7006 | BUG_ON(IS_ERR(reloc_inode)); | ||
| 7007 | |||
| 7008 | __alloc_chunk_for_shrink(root, block_group, 1); | ||
| 7009 | set_block_group_readonly(block_group); | ||
| 7010 | |||
| 7011 | btrfs_start_delalloc_inodes(info->tree_root); | ||
| 7012 | btrfs_wait_ordered_extents(info->tree_root, 0); | ||
| 7013 | again: | ||
| 7014 | skipped = 0; | ||
| 7015 | total_found = 0; | ||
| 7016 | progress = 0; | ||
| 7017 | key.objectid = block_group->key.objectid; | ||
| 7018 | key.offset = 0; | ||
| 7019 | key.type = 0; | ||
| 7020 | cur_byte = key.objectid; | ||
| 7021 | |||
| 7022 | trans = btrfs_start_transaction(info->tree_root, 1); | ||
| 7023 | btrfs_commit_transaction(trans, info->tree_root); | ||
| 7024 | 7272 | ||
| 7025 | mutex_lock(&root->fs_info->cleaner_mutex); | 7273 | /* |
| 7026 | btrfs_clean_old_snapshots(info->tree_root); | 7274 | * ok we don't have enough space, but maybe we have free space on our |
| 7027 | btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); | 7275 | * devices to allocate new chunks for relocation, so loop through our |
| 7028 | mutex_unlock(&root->fs_info->cleaner_mutex); | 7276 | * alloc devices and guess if we have enough space. However, if we |
| 7277 | * were marked as full, then we know there aren't enough chunks, and we | ||
| 7278 | * can just return. | ||
| 7279 | */ | ||
| 7280 | ret = -1; | ||
| 7281 | if (full) | ||
| 7282 | goto out; | ||
| 7029 | 7283 | ||
| 7030 | trans = btrfs_start_transaction(info->tree_root, 1); | 7284 | mutex_lock(&root->fs_info->chunk_mutex); |
| 7031 | btrfs_commit_transaction(trans, info->tree_root); | 7285 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
| 7286 | u64 min_free = btrfs_block_group_used(&block_group->item); | ||
| 7287 | u64 dev_offset, max_avail; | ||
| 7032 | 7288 | ||
| 7033 | while (1) { | 7289 | /* |
| 7034 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 7290 | * check to make sure we can actually find a chunk with enough |
| 7035 | if (ret < 0) | 7291 | * space to fit our block group in. |
| 7036 | goto out; | 7292 | */ |
| 7037 | next: | 7293 | if (device->total_bytes > device->bytes_used + min_free) { |
| 7038 | leaf = path->nodes[0]; | 7294 | ret = find_free_dev_extent(NULL, device, min_free, |
| 7039 | nritems = btrfs_header_nritems(leaf); | 7295 | &dev_offset, &max_avail); |
| 7040 | if (path->slots[0] >= nritems) { | 7296 | if (!ret) |
| 7041 | ret = btrfs_next_leaf(root, path); | ||
| 7042 | if (ret < 0) | ||
| 7043 | goto out; | ||
| 7044 | if (ret == 1) { | ||
| 7045 | ret = 0; | ||
| 7046 | break; | 7297 | break; |
| 7047 | } | 7298 | ret = -1; |
| 7048 | leaf = path->nodes[0]; | ||
| 7049 | nritems = btrfs_header_nritems(leaf); | ||
| 7050 | } | 7299 | } |
| 7051 | |||
| 7052 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
| 7053 | |||
| 7054 | if (key.objectid >= block_group->key.objectid + | ||
| 7055 | block_group->key.offset) | ||
| 7056 | break; | ||
| 7057 | |||
| 7058 | if (progress && need_resched()) { | ||
| 7059 | btrfs_release_path(root, path); | ||
| 7060 | cond_resched(); | ||
| 7061 | progress = 0; | ||
| 7062 | continue; | ||
| 7063 | } | ||
| 7064 | progress = 1; | ||
| 7065 | |||
| 7066 | if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY || | ||
| 7067 | key.objectid + key.offset <= cur_byte) { | ||
| 7068 | path->slots[0]++; | ||
| 7069 | goto next; | ||
| 7070 | } | ||
| 7071 | |||
| 7072 | total_found++; | ||
| 7073 | cur_byte = key.objectid + key.offset; | ||
| 7074 | btrfs_release_path(root, path); | ||
| 7075 | |||
| 7076 | __alloc_chunk_for_shrink(root, block_group, 0); | ||
| 7077 | ret = relocate_one_extent(root, path, &key, block_group, | ||
| 7078 | reloc_inode, pass); | ||
| 7079 | BUG_ON(ret < 0); | ||
| 7080 | if (ret > 0) | ||
| 7081 | skipped++; | ||
| 7082 | |||
| 7083 | key.objectid = cur_byte; | ||
| 7084 | key.type = 0; | ||
| 7085 | key.offset = 0; | ||
| 7086 | } | ||
| 7087 | |||
| 7088 | btrfs_release_path(root, path); | ||
| 7089 | |||
| 7090 | if (pass == 0) { | ||
| 7091 | btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); | ||
| 7092 | invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1); | ||
| 7093 | } | 7300 | } |
| 7094 | 7301 | mutex_unlock(&root->fs_info->chunk_mutex); | |
| 7095 | if (total_found > 0) { | ||
| 7096 | printk(KERN_INFO "btrfs found %llu extents in pass %d\n", | ||
| 7097 | (unsigned long long)total_found, pass); | ||
| 7098 | pass++; | ||
| 7099 | if (total_found == skipped && pass > 2) { | ||
| 7100 | iput(reloc_inode); | ||
| 7101 | reloc_inode = create_reloc_inode(info, block_group); | ||
| 7102 | pass = 0; | ||
| 7103 | } | ||
| 7104 | goto again; | ||
| 7105 | } | ||
| 7106 | |||
| 7107 | /* delete reloc_inode */ | ||
| 7108 | iput(reloc_inode); | ||
| 7109 | |||
| 7110 | /* unpin extents in this range */ | ||
| 7111 | trans = btrfs_start_transaction(info->tree_root, 1); | ||
| 7112 | btrfs_commit_transaction(trans, info->tree_root); | ||
| 7113 | |||
| 7114 | spin_lock(&block_group->lock); | ||
| 7115 | WARN_ON(block_group->pinned > 0); | ||
| 7116 | WARN_ON(block_group->reserved > 0); | ||
| 7117 | WARN_ON(btrfs_block_group_used(&block_group->item) > 0); | ||
| 7118 | spin_unlock(&block_group->lock); | ||
| 7119 | btrfs_put_block_group(block_group); | ||
| 7120 | ret = 0; | ||
| 7121 | out: | 7302 | out: |
| 7122 | btrfs_free_path(path); | 7303 | btrfs_put_block_group(block_group); |
| 7123 | return ret; | 7304 | return ret; |
| 7124 | } | 7305 | } |
| 7125 | #endif | ||
| 7126 | 7306 | ||
| 7127 | static int find_first_block_group(struct btrfs_root *root, | 7307 | static int find_first_block_group(struct btrfs_root *root, |
| 7128 | struct btrfs_path *path, struct btrfs_key *key) | 7308 | struct btrfs_path *path, struct btrfs_key *key) |
| @@ -7165,8 +7345,18 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
| 7165 | { | 7345 | { |
| 7166 | struct btrfs_block_group_cache *block_group; | 7346 | struct btrfs_block_group_cache *block_group; |
| 7167 | struct btrfs_space_info *space_info; | 7347 | struct btrfs_space_info *space_info; |
| 7348 | struct btrfs_caching_control *caching_ctl; | ||
| 7168 | struct rb_node *n; | 7349 | struct rb_node *n; |
| 7169 | 7350 | ||
| 7351 | down_write(&info->extent_commit_sem); | ||
| 7352 | while (!list_empty(&info->caching_block_groups)) { | ||
| 7353 | caching_ctl = list_entry(info->caching_block_groups.next, | ||
| 7354 | struct btrfs_caching_control, list); | ||
| 7355 | list_del(&caching_ctl->list); | ||
| 7356 | put_caching_control(caching_ctl); | ||
| 7357 | } | ||
| 7358 | up_write(&info->extent_commit_sem); | ||
| 7359 | |||
| 7170 | spin_lock(&info->block_group_cache_lock); | 7360 | spin_lock(&info->block_group_cache_lock); |
| 7171 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { | 7361 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { |
| 7172 | block_group = rb_entry(n, struct btrfs_block_group_cache, | 7362 | block_group = rb_entry(n, struct btrfs_block_group_cache, |
| @@ -7180,8 +7370,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
| 7180 | up_write(&block_group->space_info->groups_sem); | 7370 | up_write(&block_group->space_info->groups_sem); |
| 7181 | 7371 | ||
| 7182 | if (block_group->cached == BTRFS_CACHE_STARTED) | 7372 | if (block_group->cached == BTRFS_CACHE_STARTED) |
| 7183 | wait_event(block_group->caching_q, | 7373 | wait_block_group_cache_done(block_group); |
| 7184 | block_group_cache_done(block_group)); | ||
| 7185 | 7374 | ||
| 7186 | btrfs_remove_free_space_cache(block_group); | 7375 | btrfs_remove_free_space_cache(block_group); |
| 7187 | 7376 | ||
| @@ -7251,7 +7440,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7251 | spin_lock_init(&cache->lock); | 7440 | spin_lock_init(&cache->lock); |
| 7252 | spin_lock_init(&cache->tree_lock); | 7441 | spin_lock_init(&cache->tree_lock); |
| 7253 | cache->fs_info = info; | 7442 | cache->fs_info = info; |
| 7254 | init_waitqueue_head(&cache->caching_q); | ||
| 7255 | INIT_LIST_HEAD(&cache->list); | 7443 | INIT_LIST_HEAD(&cache->list); |
| 7256 | INIT_LIST_HEAD(&cache->cluster_list); | 7444 | INIT_LIST_HEAD(&cache->cluster_list); |
| 7257 | 7445 | ||
| @@ -7273,8 +7461,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7273 | cache->flags = btrfs_block_group_flags(&cache->item); | 7461 | cache->flags = btrfs_block_group_flags(&cache->item); |
| 7274 | cache->sectorsize = root->sectorsize; | 7462 | cache->sectorsize = root->sectorsize; |
| 7275 | 7463 | ||
| 7276 | remove_sb_from_cache(root, cache); | ||
| 7277 | |||
| 7278 | /* | 7464 | /* |
| 7279 | * check for two cases, either we are full, and therefore | 7465 | * check for two cases, either we are full, and therefore |
| 7280 | * don't need to bother with the caching work since we won't | 7466 | * don't need to bother with the caching work since we won't |
| @@ -7283,13 +7469,19 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7283 | * time, particularly in the full case. | 7469 | * time, particularly in the full case. |
| 7284 | */ | 7470 | */ |
| 7285 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { | 7471 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { |
| 7472 | exclude_super_stripes(root, cache); | ||
| 7473 | cache->last_byte_to_unpin = (u64)-1; | ||
| 7286 | cache->cached = BTRFS_CACHE_FINISHED; | 7474 | cache->cached = BTRFS_CACHE_FINISHED; |
| 7475 | free_excluded_extents(root, cache); | ||
| 7287 | } else if (btrfs_block_group_used(&cache->item) == 0) { | 7476 | } else if (btrfs_block_group_used(&cache->item) == 0) { |
| 7477 | exclude_super_stripes(root, cache); | ||
| 7478 | cache->last_byte_to_unpin = (u64)-1; | ||
| 7288 | cache->cached = BTRFS_CACHE_FINISHED; | 7479 | cache->cached = BTRFS_CACHE_FINISHED; |
| 7289 | add_new_free_space(cache, root->fs_info, | 7480 | add_new_free_space(cache, root->fs_info, |
| 7290 | found_key.objectid, | 7481 | found_key.objectid, |
| 7291 | found_key.objectid + | 7482 | found_key.objectid + |
| 7292 | found_key.offset); | 7483 | found_key.offset); |
| 7484 | free_excluded_extents(root, cache); | ||
| 7293 | } | 7485 | } |
| 7294 | 7486 | ||
| 7295 | ret = update_space_info(info, cache->flags, found_key.offset, | 7487 | ret = update_space_info(info, cache->flags, found_key.offset, |
| @@ -7297,6 +7489,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7297 | &space_info); | 7489 | &space_info); |
| 7298 | BUG_ON(ret); | 7490 | BUG_ON(ret); |
| 7299 | cache->space_info = space_info; | 7491 | cache->space_info = space_info; |
| 7492 | spin_lock(&cache->space_info->lock); | ||
| 7493 | cache->space_info->bytes_super += cache->bytes_super; | ||
| 7494 | spin_unlock(&cache->space_info->lock); | ||
| 7495 | |||
| 7300 | down_write(&space_info->groups_sem); | 7496 | down_write(&space_info->groups_sem); |
| 7301 | list_add_tail(&cache->list, &space_info->block_groups); | 7497 | list_add_tail(&cache->list, &space_info->block_groups); |
| 7302 | up_write(&space_info->groups_sem); | 7498 | up_write(&space_info->groups_sem); |
| @@ -7346,7 +7542,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
| 7346 | atomic_set(&cache->count, 1); | 7542 | atomic_set(&cache->count, 1); |
| 7347 | spin_lock_init(&cache->lock); | 7543 | spin_lock_init(&cache->lock); |
| 7348 | spin_lock_init(&cache->tree_lock); | 7544 | spin_lock_init(&cache->tree_lock); |
| 7349 | init_waitqueue_head(&cache->caching_q); | ||
| 7350 | INIT_LIST_HEAD(&cache->list); | 7545 | INIT_LIST_HEAD(&cache->list); |
| 7351 | INIT_LIST_HEAD(&cache->cluster_list); | 7546 | INIT_LIST_HEAD(&cache->cluster_list); |
| 7352 | 7547 | ||
| @@ -7355,15 +7550,23 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
| 7355 | cache->flags = type; | 7550 | cache->flags = type; |
| 7356 | btrfs_set_block_group_flags(&cache->item, type); | 7551 | btrfs_set_block_group_flags(&cache->item, type); |
| 7357 | 7552 | ||
| 7553 | cache->last_byte_to_unpin = (u64)-1; | ||
| 7358 | cache->cached = BTRFS_CACHE_FINISHED; | 7554 | cache->cached = BTRFS_CACHE_FINISHED; |
| 7359 | remove_sb_from_cache(root, cache); | 7555 | exclude_super_stripes(root, cache); |
| 7360 | 7556 | ||
| 7361 | add_new_free_space(cache, root->fs_info, chunk_offset, | 7557 | add_new_free_space(cache, root->fs_info, chunk_offset, |
| 7362 | chunk_offset + size); | 7558 | chunk_offset + size); |
| 7363 | 7559 | ||
| 7560 | free_excluded_extents(root, cache); | ||
| 7561 | |||
| 7364 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, | 7562 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, |
| 7365 | &cache->space_info); | 7563 | &cache->space_info); |
| 7366 | BUG_ON(ret); | 7564 | BUG_ON(ret); |
| 7565 | |||
| 7566 | spin_lock(&cache->space_info->lock); | ||
| 7567 | cache->space_info->bytes_super += cache->bytes_super; | ||
| 7568 | spin_unlock(&cache->space_info->lock); | ||
| 7569 | |||
| 7367 | down_write(&cache->space_info->groups_sem); | 7570 | down_write(&cache->space_info->groups_sem); |
| 7368 | list_add_tail(&cache->list, &cache->space_info->block_groups); | 7571 | list_add_tail(&cache->list, &cache->space_info->block_groups); |
| 7369 | up_write(&cache->space_info->groups_sem); | 7572 | up_write(&cache->space_info->groups_sem); |
| @@ -7429,8 +7632,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
| 7429 | up_write(&block_group->space_info->groups_sem); | 7632 | up_write(&block_group->space_info->groups_sem); |
| 7430 | 7633 | ||
| 7431 | if (block_group->cached == BTRFS_CACHE_STARTED) | 7634 | if (block_group->cached == BTRFS_CACHE_STARTED) |
| 7432 | wait_event(block_group->caching_q, | 7635 | wait_block_group_cache_done(block_group); |
| 7433 | block_group_cache_done(block_group)); | ||
| 7434 | 7636 | ||
| 7435 | btrfs_remove_free_space_cache(block_group); | 7637 | btrfs_remove_free_space_cache(block_group); |
| 7436 | 7638 | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 68260180f587..96577e8bf9fd 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -280,6 +280,14 @@ static struct extent_buffer *buffer_search(struct extent_io_tree *tree, | |||
| 280 | return NULL; | 280 | return NULL; |
| 281 | } | 281 | } |
| 282 | 282 | ||
| 283 | static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, | ||
| 284 | struct extent_state *other) | ||
| 285 | { | ||
| 286 | if (tree->ops && tree->ops->merge_extent_hook) | ||
| 287 | tree->ops->merge_extent_hook(tree->mapping->host, new, | ||
| 288 | other); | ||
| 289 | } | ||
| 290 | |||
| 283 | /* | 291 | /* |
| 284 | * utility function to look for merge candidates inside a given range. | 292 | * utility function to look for merge candidates inside a given range. |
| 285 | * Any extents with matching state are merged together into a single | 293 | * Any extents with matching state are merged together into a single |
| @@ -303,6 +311,7 @@ static int merge_state(struct extent_io_tree *tree, | |||
| 303 | other = rb_entry(other_node, struct extent_state, rb_node); | 311 | other = rb_entry(other_node, struct extent_state, rb_node); |
| 304 | if (other->end == state->start - 1 && | 312 | if (other->end == state->start - 1 && |
| 305 | other->state == state->state) { | 313 | other->state == state->state) { |
| 314 | merge_cb(tree, state, other); | ||
| 306 | state->start = other->start; | 315 | state->start = other->start; |
| 307 | other->tree = NULL; | 316 | other->tree = NULL; |
| 308 | rb_erase(&other->rb_node, &tree->state); | 317 | rb_erase(&other->rb_node, &tree->state); |
| @@ -314,33 +323,37 @@ static int merge_state(struct extent_io_tree *tree, | |||
| 314 | other = rb_entry(other_node, struct extent_state, rb_node); | 323 | other = rb_entry(other_node, struct extent_state, rb_node); |
| 315 | if (other->start == state->end + 1 && | 324 | if (other->start == state->end + 1 && |
| 316 | other->state == state->state) { | 325 | other->state == state->state) { |
| 326 | merge_cb(tree, state, other); | ||
| 317 | other->start = state->start; | 327 | other->start = state->start; |
| 318 | state->tree = NULL; | 328 | state->tree = NULL; |
| 319 | rb_erase(&state->rb_node, &tree->state); | 329 | rb_erase(&state->rb_node, &tree->state); |
| 320 | free_extent_state(state); | 330 | free_extent_state(state); |
| 331 | state = NULL; | ||
| 321 | } | 332 | } |
| 322 | } | 333 | } |
| 334 | |||
| 323 | return 0; | 335 | return 0; |
| 324 | } | 336 | } |
| 325 | 337 | ||
| 326 | static void set_state_cb(struct extent_io_tree *tree, | 338 | static int set_state_cb(struct extent_io_tree *tree, |
| 327 | struct extent_state *state, | 339 | struct extent_state *state, |
| 328 | unsigned long bits) | 340 | unsigned long bits) |
| 329 | { | 341 | { |
| 330 | if (tree->ops && tree->ops->set_bit_hook) { | 342 | if (tree->ops && tree->ops->set_bit_hook) { |
| 331 | tree->ops->set_bit_hook(tree->mapping->host, state->start, | 343 | return tree->ops->set_bit_hook(tree->mapping->host, |
| 332 | state->end, state->state, bits); | 344 | state->start, state->end, |
| 345 | state->state, bits); | ||
| 333 | } | 346 | } |
| 347 | |||
| 348 | return 0; | ||
| 334 | } | 349 | } |
| 335 | 350 | ||
| 336 | static void clear_state_cb(struct extent_io_tree *tree, | 351 | static void clear_state_cb(struct extent_io_tree *tree, |
| 337 | struct extent_state *state, | 352 | struct extent_state *state, |
| 338 | unsigned long bits) | 353 | unsigned long bits) |
| 339 | { | 354 | { |
| 340 | if (tree->ops && tree->ops->clear_bit_hook) { | 355 | if (tree->ops && tree->ops->clear_bit_hook) |
| 341 | tree->ops->clear_bit_hook(tree->mapping->host, state->start, | 356 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); |
| 342 | state->end, state->state, bits); | ||
| 343 | } | ||
| 344 | } | 357 | } |
| 345 | 358 | ||
| 346 | /* | 359 | /* |
| @@ -358,6 +371,7 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 358 | int bits) | 371 | int bits) |
| 359 | { | 372 | { |
| 360 | struct rb_node *node; | 373 | struct rb_node *node; |
| 374 | int ret; | ||
| 361 | 375 | ||
| 362 | if (end < start) { | 376 | if (end < start) { |
| 363 | printk(KERN_ERR "btrfs end < start %llu %llu\n", | 377 | printk(KERN_ERR "btrfs end < start %llu %llu\n", |
| @@ -365,12 +379,15 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 365 | (unsigned long long)start); | 379 | (unsigned long long)start); |
| 366 | WARN_ON(1); | 380 | WARN_ON(1); |
| 367 | } | 381 | } |
| 382 | state->start = start; | ||
| 383 | state->end = end; | ||
| 384 | ret = set_state_cb(tree, state, bits); | ||
| 385 | if (ret) | ||
| 386 | return ret; | ||
| 387 | |||
| 368 | if (bits & EXTENT_DIRTY) | 388 | if (bits & EXTENT_DIRTY) |
| 369 | tree->dirty_bytes += end - start + 1; | 389 | tree->dirty_bytes += end - start + 1; |
| 370 | set_state_cb(tree, state, bits); | ||
| 371 | state->state |= bits; | 390 | state->state |= bits; |
| 372 | state->start = start; | ||
| 373 | state->end = end; | ||
| 374 | node = tree_insert(&tree->state, end, &state->rb_node); | 391 | node = tree_insert(&tree->state, end, &state->rb_node); |
| 375 | if (node) { | 392 | if (node) { |
| 376 | struct extent_state *found; | 393 | struct extent_state *found; |
| @@ -387,6 +404,15 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 387 | return 0; | 404 | return 0; |
| 388 | } | 405 | } |
| 389 | 406 | ||
| 407 | static int split_cb(struct extent_io_tree *tree, struct extent_state *orig, | ||
| 408 | u64 split) | ||
| 409 | { | ||
| 410 | if (tree->ops && tree->ops->split_extent_hook) | ||
| 411 | return tree->ops->split_extent_hook(tree->mapping->host, | ||
| 412 | orig, split); | ||
| 413 | return 0; | ||
| 414 | } | ||
| 415 | |||
| 390 | /* | 416 | /* |
| 391 | * split a given extent state struct in two, inserting the preallocated | 417 | * split a given extent state struct in two, inserting the preallocated |
| 392 | * struct 'prealloc' as the newly created second half. 'split' indicates an | 418 | * struct 'prealloc' as the newly created second half. 'split' indicates an |
| @@ -405,6 +431,9 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
| 405 | struct extent_state *prealloc, u64 split) | 431 | struct extent_state *prealloc, u64 split) |
| 406 | { | 432 | { |
| 407 | struct rb_node *node; | 433 | struct rb_node *node; |
| 434 | |||
| 435 | split_cb(tree, orig, split); | ||
| 436 | |||
| 408 | prealloc->start = orig->start; | 437 | prealloc->start = orig->start; |
| 409 | prealloc->end = split - 1; | 438 | prealloc->end = split - 1; |
| 410 | prealloc->state = orig->state; | 439 | prealloc->state = orig->state; |
| @@ -431,7 +460,8 @@ static int clear_state_bit(struct extent_io_tree *tree, | |||
| 431 | struct extent_state *state, int bits, int wake, | 460 | struct extent_state *state, int bits, int wake, |
| 432 | int delete) | 461 | int delete) |
| 433 | { | 462 | { |
| 434 | int ret = state->state & bits; | 463 | int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING; |
| 464 | int ret = state->state & bits_to_clear; | ||
| 435 | 465 | ||
| 436 | if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { | 466 | if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { |
| 437 | u64 range = state->end - state->start + 1; | 467 | u64 range = state->end - state->start + 1; |
| @@ -439,7 +469,7 @@ static int clear_state_bit(struct extent_io_tree *tree, | |||
| 439 | tree->dirty_bytes -= range; | 469 | tree->dirty_bytes -= range; |
| 440 | } | 470 | } |
| 441 | clear_state_cb(tree, state, bits); | 471 | clear_state_cb(tree, state, bits); |
| 442 | state->state &= ~bits; | 472 | state->state &= ~bits_to_clear; |
| 443 | if (wake) | 473 | if (wake) |
| 444 | wake_up(&state->wq); | 474 | wake_up(&state->wq); |
| 445 | if (delete || state->state == 0) { | 475 | if (delete || state->state == 0) { |
| @@ -471,10 +501,14 @@ static int clear_state_bit(struct extent_io_tree *tree, | |||
| 471 | * bits were already set, or zero if none of the bits were already set. | 501 | * bits were already set, or zero if none of the bits were already set. |
| 472 | */ | 502 | */ |
| 473 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 503 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 474 | int bits, int wake, int delete, gfp_t mask) | 504 | int bits, int wake, int delete, |
| 505 | struct extent_state **cached_state, | ||
| 506 | gfp_t mask) | ||
| 475 | { | 507 | { |
| 476 | struct extent_state *state; | 508 | struct extent_state *state; |
| 509 | struct extent_state *cached; | ||
| 477 | struct extent_state *prealloc = NULL; | 510 | struct extent_state *prealloc = NULL; |
| 511 | struct rb_node *next_node; | ||
| 478 | struct rb_node *node; | 512 | struct rb_node *node; |
| 479 | u64 last_end; | 513 | u64 last_end; |
| 480 | int err; | 514 | int err; |
| @@ -488,6 +522,17 @@ again: | |||
| 488 | } | 522 | } |
| 489 | 523 | ||
| 490 | spin_lock(&tree->lock); | 524 | spin_lock(&tree->lock); |
| 525 | if (cached_state) { | ||
| 526 | cached = *cached_state; | ||
| 527 | *cached_state = NULL; | ||
| 528 | cached_state = NULL; | ||
| 529 | if (cached && cached->tree && cached->start == start) { | ||
| 530 | atomic_dec(&cached->refs); | ||
| 531 | state = cached; | ||
| 532 | goto hit_next; | ||
| 533 | } | ||
| 534 | free_extent_state(cached); | ||
| 535 | } | ||
| 491 | /* | 536 | /* |
| 492 | * this search will find the extents that end after | 537 | * this search will find the extents that end after |
| 493 | * our range starts | 538 | * our range starts |
| @@ -496,6 +541,7 @@ again: | |||
| 496 | if (!node) | 541 | if (!node) |
| 497 | goto out; | 542 | goto out; |
| 498 | state = rb_entry(node, struct extent_state, rb_node); | 543 | state = rb_entry(node, struct extent_state, rb_node); |
| 544 | hit_next: | ||
| 499 | if (state->start > end) | 545 | if (state->start > end) |
| 500 | goto out; | 546 | goto out; |
| 501 | WARN_ON(state->end < start); | 547 | WARN_ON(state->end < start); |
| @@ -526,13 +572,11 @@ again: | |||
| 526 | if (err) | 572 | if (err) |
| 527 | goto out; | 573 | goto out; |
| 528 | if (state->end <= end) { | 574 | if (state->end <= end) { |
| 529 | set |= clear_state_bit(tree, state, bits, | 575 | set |= clear_state_bit(tree, state, bits, wake, |
| 530 | wake, delete); | 576 | delete); |
| 531 | if (last_end == (u64)-1) | 577 | if (last_end == (u64)-1) |
| 532 | goto out; | 578 | goto out; |
| 533 | start = last_end + 1; | 579 | start = last_end + 1; |
| 534 | } else { | ||
| 535 | start = state->start; | ||
| 536 | } | 580 | } |
| 537 | goto search_again; | 581 | goto search_again; |
| 538 | } | 582 | } |
| @@ -547,19 +591,30 @@ again: | |||
| 547 | prealloc = alloc_extent_state(GFP_ATOMIC); | 591 | prealloc = alloc_extent_state(GFP_ATOMIC); |
| 548 | err = split_state(tree, state, prealloc, end + 1); | 592 | err = split_state(tree, state, prealloc, end + 1); |
| 549 | BUG_ON(err == -EEXIST); | 593 | BUG_ON(err == -EEXIST); |
| 550 | |||
| 551 | if (wake) | 594 | if (wake) |
| 552 | wake_up(&state->wq); | 595 | wake_up(&state->wq); |
| 553 | set |= clear_state_bit(tree, prealloc, bits, | 596 | |
| 554 | wake, delete); | 597 | set |= clear_state_bit(tree, prealloc, bits, wake, delete); |
| 598 | |||
| 555 | prealloc = NULL; | 599 | prealloc = NULL; |
| 556 | goto out; | 600 | goto out; |
| 557 | } | 601 | } |
| 558 | 602 | ||
| 603 | if (state->end < end && prealloc && !need_resched()) | ||
| 604 | next_node = rb_next(&state->rb_node); | ||
| 605 | else | ||
| 606 | next_node = NULL; | ||
| 607 | |||
| 559 | set |= clear_state_bit(tree, state, bits, wake, delete); | 608 | set |= clear_state_bit(tree, state, bits, wake, delete); |
| 560 | if (last_end == (u64)-1) | 609 | if (last_end == (u64)-1) |
| 561 | goto out; | 610 | goto out; |
| 562 | start = last_end + 1; | 611 | start = last_end + 1; |
| 612 | if (start <= end && next_node) { | ||
| 613 | state = rb_entry(next_node, struct extent_state, | ||
| 614 | rb_node); | ||
| 615 | if (state->start == start) | ||
| 616 | goto hit_next; | ||
| 617 | } | ||
| 563 | goto search_again; | 618 | goto search_again; |
| 564 | 619 | ||
| 565 | out: | 620 | out: |
| @@ -641,40 +696,59 @@ out: | |||
| 641 | return 0; | 696 | return 0; |
| 642 | } | 697 | } |
| 643 | 698 | ||
| 644 | static void set_state_bits(struct extent_io_tree *tree, | 699 | static int set_state_bits(struct extent_io_tree *tree, |
| 645 | struct extent_state *state, | 700 | struct extent_state *state, |
| 646 | int bits) | 701 | int bits) |
| 647 | { | 702 | { |
| 703 | int ret; | ||
| 704 | |||
| 705 | ret = set_state_cb(tree, state, bits); | ||
| 706 | if (ret) | ||
| 707 | return ret; | ||
| 708 | |||
| 648 | if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | 709 | if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { |
| 649 | u64 range = state->end - state->start + 1; | 710 | u64 range = state->end - state->start + 1; |
| 650 | tree->dirty_bytes += range; | 711 | tree->dirty_bytes += range; |
| 651 | } | 712 | } |
| 652 | set_state_cb(tree, state, bits); | ||
| 653 | state->state |= bits; | 713 | state->state |= bits; |
| 714 | |||
| 715 | return 0; | ||
| 716 | } | ||
| 717 | |||
| 718 | static void cache_state(struct extent_state *state, | ||
| 719 | struct extent_state **cached_ptr) | ||
| 720 | { | ||
| 721 | if (cached_ptr && !(*cached_ptr)) { | ||
| 722 | if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) { | ||
| 723 | *cached_ptr = state; | ||
| 724 | atomic_inc(&state->refs); | ||
| 725 | } | ||
| 726 | } | ||
| 654 | } | 727 | } |
| 655 | 728 | ||
| 656 | /* | 729 | /* |
| 657 | * set some bits on a range in the tree. This may require allocations | 730 | * set some bits on a range in the tree. This may require allocations or |
| 658 | * or sleeping, so the gfp mask is used to indicate what is allowed. | 731 | * sleeping, so the gfp mask is used to indicate what is allowed. |
| 659 | * | 732 | * |
| 660 | * If 'exclusive' == 1, this will fail with -EEXIST if some part of the | 733 | * If any of the exclusive bits are set, this will fail with -EEXIST if some |
| 661 | * range already has the desired bits set. The start of the existing | 734 | * part of the range already has the desired bits set. The start of the |
| 662 | * range is returned in failed_start in this case. | 735 | * existing range is returned in failed_start in this case. |
| 663 | * | 736 | * |
| 664 | * [start, end] is inclusive | 737 | * [start, end] is inclusive This takes the tree lock. |
| 665 | * This takes the tree lock. | ||
| 666 | */ | 738 | */ |
| 739 | |||
| 667 | static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 740 | static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 668 | int bits, int exclusive, u64 *failed_start, | 741 | int bits, int exclusive_bits, u64 *failed_start, |
| 742 | struct extent_state **cached_state, | ||
| 669 | gfp_t mask) | 743 | gfp_t mask) |
| 670 | { | 744 | { |
| 671 | struct extent_state *state; | 745 | struct extent_state *state; |
| 672 | struct extent_state *prealloc = NULL; | 746 | struct extent_state *prealloc = NULL; |
| 673 | struct rb_node *node; | 747 | struct rb_node *node; |
| 674 | int err = 0; | 748 | int err = 0; |
| 675 | int set; | ||
| 676 | u64 last_start; | 749 | u64 last_start; |
| 677 | u64 last_end; | 750 | u64 last_end; |
| 751 | |||
| 678 | again: | 752 | again: |
| 679 | if (!prealloc && (mask & __GFP_WAIT)) { | 753 | if (!prealloc && (mask & __GFP_WAIT)) { |
| 680 | prealloc = alloc_extent_state(mask); | 754 | prealloc = alloc_extent_state(mask); |
| @@ -683,6 +757,13 @@ again: | |||
| 683 | } | 757 | } |
| 684 | 758 | ||
| 685 | spin_lock(&tree->lock); | 759 | spin_lock(&tree->lock); |
| 760 | if (cached_state && *cached_state) { | ||
| 761 | state = *cached_state; | ||
| 762 | if (state->start == start && state->tree) { | ||
| 763 | node = &state->rb_node; | ||
| 764 | goto hit_next; | ||
| 765 | } | ||
| 766 | } | ||
| 686 | /* | 767 | /* |
| 687 | * this search will find all the extents that end after | 768 | * this search will find all the extents that end after |
| 688 | * our range starts. | 769 | * our range starts. |
| @@ -694,8 +775,8 @@ again: | |||
| 694 | BUG_ON(err == -EEXIST); | 775 | BUG_ON(err == -EEXIST); |
| 695 | goto out; | 776 | goto out; |
| 696 | } | 777 | } |
| 697 | |||
| 698 | state = rb_entry(node, struct extent_state, rb_node); | 778 | state = rb_entry(node, struct extent_state, rb_node); |
| 779 | hit_next: | ||
| 699 | last_start = state->start; | 780 | last_start = state->start; |
| 700 | last_end = state->end; | 781 | last_end = state->end; |
| 701 | 782 | ||
| @@ -706,17 +787,32 @@ again: | |||
| 706 | * Just lock what we found and keep going | 787 | * Just lock what we found and keep going |
| 707 | */ | 788 | */ |
| 708 | if (state->start == start && state->end <= end) { | 789 | if (state->start == start && state->end <= end) { |
| 709 | set = state->state & bits; | 790 | struct rb_node *next_node; |
| 710 | if (set && exclusive) { | 791 | if (state->state & exclusive_bits) { |
| 711 | *failed_start = state->start; | 792 | *failed_start = state->start; |
| 712 | err = -EEXIST; | 793 | err = -EEXIST; |
| 713 | goto out; | 794 | goto out; |
| 714 | } | 795 | } |
| 715 | set_state_bits(tree, state, bits); | 796 | |
| 797 | err = set_state_bits(tree, state, bits); | ||
| 798 | if (err) | ||
| 799 | goto out; | ||
| 800 | |||
| 801 | cache_state(state, cached_state); | ||
| 716 | merge_state(tree, state); | 802 | merge_state(tree, state); |
| 717 | if (last_end == (u64)-1) | 803 | if (last_end == (u64)-1) |
| 718 | goto out; | 804 | goto out; |
| 805 | |||
| 719 | start = last_end + 1; | 806 | start = last_end + 1; |
| 807 | if (start < end && prealloc && !need_resched()) { | ||
| 808 | next_node = rb_next(node); | ||
| 809 | if (next_node) { | ||
| 810 | state = rb_entry(next_node, struct extent_state, | ||
| 811 | rb_node); | ||
| 812 | if (state->start == start) | ||
| 813 | goto hit_next; | ||
| 814 | } | ||
| 815 | } | ||
| 720 | goto search_again; | 816 | goto search_again; |
| 721 | } | 817 | } |
| 722 | 818 | ||
| @@ -737,8 +833,7 @@ again: | |||
| 737 | * desired bit on it. | 833 | * desired bit on it. |
| 738 | */ | 834 | */ |
| 739 | if (state->start < start) { | 835 | if (state->start < start) { |
| 740 | set = state->state & bits; | 836 | if (state->state & exclusive_bits) { |
| 741 | if (exclusive && set) { | ||
| 742 | *failed_start = start; | 837 | *failed_start = start; |
| 743 | err = -EEXIST; | 838 | err = -EEXIST; |
| 744 | goto out; | 839 | goto out; |
| @@ -749,13 +844,14 @@ again: | |||
| 749 | if (err) | 844 | if (err) |
| 750 | goto out; | 845 | goto out; |
| 751 | if (state->end <= end) { | 846 | if (state->end <= end) { |
| 752 | set_state_bits(tree, state, bits); | 847 | err = set_state_bits(tree, state, bits); |
| 848 | if (err) | ||
| 849 | goto out; | ||
| 850 | cache_state(state, cached_state); | ||
| 753 | merge_state(tree, state); | 851 | merge_state(tree, state); |
| 754 | if (last_end == (u64)-1) | 852 | if (last_end == (u64)-1) |
| 755 | goto out; | 853 | goto out; |
| 756 | start = last_end + 1; | 854 | start = last_end + 1; |
| 757 | } else { | ||
| 758 | start = state->start; | ||
| 759 | } | 855 | } |
| 760 | goto search_again; | 856 | goto search_again; |
| 761 | } | 857 | } |
| @@ -774,10 +870,13 @@ again: | |||
| 774 | this_end = last_start - 1; | 870 | this_end = last_start - 1; |
| 775 | err = insert_state(tree, prealloc, start, this_end, | 871 | err = insert_state(tree, prealloc, start, this_end, |
| 776 | bits); | 872 | bits); |
| 777 | prealloc = NULL; | ||
| 778 | BUG_ON(err == -EEXIST); | 873 | BUG_ON(err == -EEXIST); |
| 779 | if (err) | 874 | if (err) { |
| 875 | prealloc = NULL; | ||
| 780 | goto out; | 876 | goto out; |
| 877 | } | ||
| 878 | cache_state(prealloc, cached_state); | ||
| 879 | prealloc = NULL; | ||
| 781 | start = this_end + 1; | 880 | start = this_end + 1; |
| 782 | goto search_again; | 881 | goto search_again; |
| 783 | } | 882 | } |
| @@ -788,8 +887,7 @@ again: | |||
| 788 | * on the first half | 887 | * on the first half |
| 789 | */ | 888 | */ |
| 790 | if (state->start <= end && state->end > end) { | 889 | if (state->start <= end && state->end > end) { |
| 791 | set = state->state & bits; | 890 | if (state->state & exclusive_bits) { |
| 792 | if (exclusive && set) { | ||
| 793 | *failed_start = start; | 891 | *failed_start = start; |
| 794 | err = -EEXIST; | 892 | err = -EEXIST; |
| 795 | goto out; | 893 | goto out; |
| @@ -797,7 +895,12 @@ again: | |||
| 797 | err = split_state(tree, state, prealloc, end + 1); | 895 | err = split_state(tree, state, prealloc, end + 1); |
| 798 | BUG_ON(err == -EEXIST); | 896 | BUG_ON(err == -EEXIST); |
| 799 | 897 | ||
| 800 | set_state_bits(tree, prealloc, bits); | 898 | err = set_state_bits(tree, prealloc, bits); |
| 899 | if (err) { | ||
| 900 | prealloc = NULL; | ||
| 901 | goto out; | ||
| 902 | } | ||
| 903 | cache_state(prealloc, cached_state); | ||
| 801 | merge_state(tree, prealloc); | 904 | merge_state(tree, prealloc); |
| 802 | prealloc = NULL; | 905 | prealloc = NULL; |
| 803 | goto out; | 906 | goto out; |
| @@ -826,86 +929,65 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 826 | gfp_t mask) | 929 | gfp_t mask) |
| 827 | { | 930 | { |
| 828 | return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, | 931 | return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, |
| 829 | mask); | 932 | NULL, mask); |
| 830 | } | ||
| 831 | |||
| 832 | int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 833 | gfp_t mask) | ||
| 834 | { | ||
| 835 | return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask); | ||
| 836 | } | 933 | } |
| 837 | 934 | ||
| 838 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 935 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| 839 | int bits, gfp_t mask) | 936 | int bits, gfp_t mask) |
| 840 | { | 937 | { |
| 841 | return set_extent_bit(tree, start, end, bits, 0, NULL, | 938 | return set_extent_bit(tree, start, end, bits, 0, NULL, |
| 842 | mask); | 939 | NULL, mask); |
| 843 | } | 940 | } |
| 844 | 941 | ||
| 845 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 942 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| 846 | int bits, gfp_t mask) | 943 | int bits, gfp_t mask) |
| 847 | { | 944 | { |
| 848 | return clear_extent_bit(tree, start, end, bits, 0, 0, mask); | 945 | return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask); |
| 849 | } | 946 | } |
| 850 | 947 | ||
| 851 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | 948 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, |
| 852 | gfp_t mask) | 949 | gfp_t mask) |
| 853 | { | 950 | { |
| 854 | return set_extent_bit(tree, start, end, | 951 | return set_extent_bit(tree, start, end, |
| 855 | EXTENT_DELALLOC | EXTENT_DIRTY, | 952 | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE, |
| 856 | 0, NULL, mask); | 953 | 0, NULL, NULL, mask); |
| 857 | } | 954 | } |
| 858 | 955 | ||
| 859 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 956 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
| 860 | gfp_t mask) | 957 | gfp_t mask) |
| 861 | { | 958 | { |
| 862 | return clear_extent_bit(tree, start, end, | 959 | return clear_extent_bit(tree, start, end, |
| 863 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); | 960 | EXTENT_DIRTY | EXTENT_DELALLOC | |
| 864 | } | 961 | EXTENT_DO_ACCOUNTING, 0, 0, |
| 865 | 962 | NULL, mask); | |
| 866 | int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 867 | gfp_t mask) | ||
| 868 | { | ||
| 869 | return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask); | ||
| 870 | } | 963 | } |
| 871 | 964 | ||
| 872 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 965 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
| 873 | gfp_t mask) | 966 | gfp_t mask) |
| 874 | { | 967 | { |
| 875 | return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, | 968 | return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, |
| 876 | mask); | 969 | NULL, mask); |
| 877 | } | 970 | } |
| 878 | 971 | ||
| 879 | static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 972 | static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
| 880 | gfp_t mask) | 973 | gfp_t mask) |
| 881 | { | 974 | { |
| 882 | return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); | 975 | return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, |
| 976 | NULL, mask); | ||
| 883 | } | 977 | } |
| 884 | 978 | ||
| 885 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 979 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
| 886 | gfp_t mask) | 980 | gfp_t mask) |
| 887 | { | 981 | { |
| 888 | return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, | 982 | return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, |
| 889 | mask); | 983 | NULL, mask); |
| 890 | } | 984 | } |
| 891 | 985 | ||
| 892 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, | 986 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, |
| 893 | u64 end, gfp_t mask) | 987 | u64 end, gfp_t mask) |
| 894 | { | 988 | { |
| 895 | return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); | 989 | return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, |
| 896 | } | 990 | NULL, mask); |
| 897 | |||
| 898 | static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 899 | gfp_t mask) | ||
| 900 | { | ||
| 901 | return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, | ||
| 902 | 0, NULL, mask); | ||
| 903 | } | ||
| 904 | |||
| 905 | static int clear_extent_writeback(struct extent_io_tree *tree, u64 start, | ||
| 906 | u64 end, gfp_t mask) | ||
| 907 | { | ||
| 908 | return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); | ||
| 909 | } | 991 | } |
| 910 | 992 | ||
| 911 | int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) | 993 | int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) |
| @@ -917,13 +999,15 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) | |||
| 917 | * either insert or lock state struct between start and end use mask to tell | 999 | * either insert or lock state struct between start and end use mask to tell |
| 918 | * us if waiting is desired. | 1000 | * us if waiting is desired. |
| 919 | */ | 1001 | */ |
| 920 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) | 1002 | int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| 1003 | int bits, struct extent_state **cached_state, gfp_t mask) | ||
| 921 | { | 1004 | { |
| 922 | int err; | 1005 | int err; |
| 923 | u64 failed_start; | 1006 | u64 failed_start; |
| 924 | while (1) { | 1007 | while (1) { |
| 925 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, | 1008 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits, |
| 926 | &failed_start, mask); | 1009 | EXTENT_LOCKED, &failed_start, |
| 1010 | cached_state, mask); | ||
| 927 | if (err == -EEXIST && (mask & __GFP_WAIT)) { | 1011 | if (err == -EEXIST && (mask & __GFP_WAIT)) { |
| 928 | wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); | 1012 | wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); |
| 929 | start = failed_start; | 1013 | start = failed_start; |
| @@ -935,27 +1019,40 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) | |||
| 935 | return err; | 1019 | return err; |
| 936 | } | 1020 | } |
| 937 | 1021 | ||
| 1022 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) | ||
| 1023 | { | ||
| 1024 | return lock_extent_bits(tree, start, end, 0, NULL, mask); | ||
| 1025 | } | ||
| 1026 | |||
| 938 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 1027 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, |
| 939 | gfp_t mask) | 1028 | gfp_t mask) |
| 940 | { | 1029 | { |
| 941 | int err; | 1030 | int err; |
| 942 | u64 failed_start; | 1031 | u64 failed_start; |
| 943 | 1032 | ||
| 944 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, | 1033 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED, |
| 945 | &failed_start, mask); | 1034 | &failed_start, NULL, mask); |
| 946 | if (err == -EEXIST) { | 1035 | if (err == -EEXIST) { |
| 947 | if (failed_start > start) | 1036 | if (failed_start > start) |
| 948 | clear_extent_bit(tree, start, failed_start - 1, | 1037 | clear_extent_bit(tree, start, failed_start - 1, |
| 949 | EXTENT_LOCKED, 1, 0, mask); | 1038 | EXTENT_LOCKED, 1, 0, NULL, mask); |
| 950 | return 0; | 1039 | return 0; |
| 951 | } | 1040 | } |
| 952 | return 1; | 1041 | return 1; |
| 953 | } | 1042 | } |
| 954 | 1043 | ||
| 1044 | int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 1045 | struct extent_state **cached, gfp_t mask) | ||
| 1046 | { | ||
| 1047 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached, | ||
| 1048 | mask); | ||
| 1049 | } | ||
| 1050 | |||
| 955 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 1051 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, |
| 956 | gfp_t mask) | 1052 | gfp_t mask) |
| 957 | { | 1053 | { |
| 958 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); | 1054 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, |
| 1055 | mask); | ||
| 959 | } | 1056 | } |
| 960 | 1057 | ||
| 961 | /* | 1058 | /* |
| @@ -974,7 +1071,6 @@ int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end) | |||
| 974 | page_cache_release(page); | 1071 | page_cache_release(page); |
| 975 | index++; | 1072 | index++; |
| 976 | } | 1073 | } |
| 977 | set_extent_dirty(tree, start, end, GFP_NOFS); | ||
| 978 | return 0; | 1074 | return 0; |
| 979 | } | 1075 | } |
| 980 | 1076 | ||
| @@ -994,7 +1090,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) | |||
| 994 | page_cache_release(page); | 1090 | page_cache_release(page); |
| 995 | index++; | 1091 | index++; |
| 996 | } | 1092 | } |
| 997 | set_extent_writeback(tree, start, end, GFP_NOFS); | ||
| 998 | return 0; | 1093 | return 0; |
| 999 | } | 1094 | } |
| 1000 | 1095 | ||
| @@ -1232,6 +1327,7 @@ static noinline u64 find_lock_delalloc_range(struct inode *inode, | |||
| 1232 | u64 delalloc_start; | 1327 | u64 delalloc_start; |
| 1233 | u64 delalloc_end; | 1328 | u64 delalloc_end; |
| 1234 | u64 found; | 1329 | u64 found; |
| 1330 | struct extent_state *cached_state = NULL; | ||
| 1235 | int ret; | 1331 | int ret; |
| 1236 | int loops = 0; | 1332 | int loops = 0; |
| 1237 | 1333 | ||
| @@ -1269,6 +1365,7 @@ again: | |||
| 1269 | /* some of the pages are gone, lets avoid looping by | 1365 | /* some of the pages are gone, lets avoid looping by |
| 1270 | * shortening the size of the delalloc range we're searching | 1366 | * shortening the size of the delalloc range we're searching |
| 1271 | */ | 1367 | */ |
| 1368 | free_extent_state(cached_state); | ||
| 1272 | if (!loops) { | 1369 | if (!loops) { |
| 1273 | unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); | 1370 | unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); |
| 1274 | max_bytes = PAGE_CACHE_SIZE - offset; | 1371 | max_bytes = PAGE_CACHE_SIZE - offset; |
| @@ -1282,18 +1379,21 @@ again: | |||
| 1282 | BUG_ON(ret); | 1379 | BUG_ON(ret); |
| 1283 | 1380 | ||
| 1284 | /* step three, lock the state bits for the whole range */ | 1381 | /* step three, lock the state bits for the whole range */ |
| 1285 | lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); | 1382 | lock_extent_bits(tree, delalloc_start, delalloc_end, |
| 1383 | 0, &cached_state, GFP_NOFS); | ||
| 1286 | 1384 | ||
| 1287 | /* then test to make sure it is all still delalloc */ | 1385 | /* then test to make sure it is all still delalloc */ |
| 1288 | ret = test_range_bit(tree, delalloc_start, delalloc_end, | 1386 | ret = test_range_bit(tree, delalloc_start, delalloc_end, |
| 1289 | EXTENT_DELALLOC, 1); | 1387 | EXTENT_DELALLOC, 1, cached_state); |
| 1290 | if (!ret) { | 1388 | if (!ret) { |
| 1291 | unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); | 1389 | unlock_extent_cached(tree, delalloc_start, delalloc_end, |
| 1390 | &cached_state, GFP_NOFS); | ||
| 1292 | __unlock_for_delalloc(inode, locked_page, | 1391 | __unlock_for_delalloc(inode, locked_page, |
| 1293 | delalloc_start, delalloc_end); | 1392 | delalloc_start, delalloc_end); |
| 1294 | cond_resched(); | 1393 | cond_resched(); |
| 1295 | goto again; | 1394 | goto again; |
| 1296 | } | 1395 | } |
| 1396 | free_extent_state(cached_state); | ||
| 1297 | *start = delalloc_start; | 1397 | *start = delalloc_start; |
| 1298 | *end = delalloc_end; | 1398 | *end = delalloc_end; |
| 1299 | out_failed: | 1399 | out_failed: |
| @@ -1303,11 +1403,7 @@ out_failed: | |||
| 1303 | int extent_clear_unlock_delalloc(struct inode *inode, | 1403 | int extent_clear_unlock_delalloc(struct inode *inode, |
| 1304 | struct extent_io_tree *tree, | 1404 | struct extent_io_tree *tree, |
| 1305 | u64 start, u64 end, struct page *locked_page, | 1405 | u64 start, u64 end, struct page *locked_page, |
| 1306 | int unlock_pages, | 1406 | unsigned long op) |
| 1307 | int clear_unlock, | ||
| 1308 | int clear_delalloc, int clear_dirty, | ||
| 1309 | int set_writeback, | ||
| 1310 | int end_writeback) | ||
| 1311 | { | 1407 | { |
| 1312 | int ret; | 1408 | int ret; |
| 1313 | struct page *pages[16]; | 1409 | struct page *pages[16]; |
| @@ -1317,16 +1413,21 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
| 1317 | int i; | 1413 | int i; |
| 1318 | int clear_bits = 0; | 1414 | int clear_bits = 0; |
| 1319 | 1415 | ||
| 1320 | if (clear_unlock) | 1416 | if (op & EXTENT_CLEAR_UNLOCK) |
| 1321 | clear_bits |= EXTENT_LOCKED; | 1417 | clear_bits |= EXTENT_LOCKED; |
| 1322 | if (clear_dirty) | 1418 | if (op & EXTENT_CLEAR_DIRTY) |
| 1323 | clear_bits |= EXTENT_DIRTY; | 1419 | clear_bits |= EXTENT_DIRTY; |
| 1324 | 1420 | ||
| 1325 | if (clear_delalloc) | 1421 | if (op & EXTENT_CLEAR_DELALLOC) |
| 1326 | clear_bits |= EXTENT_DELALLOC; | 1422 | clear_bits |= EXTENT_DELALLOC; |
| 1327 | 1423 | ||
| 1328 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); | 1424 | if (op & EXTENT_CLEAR_ACCOUNTING) |
| 1329 | if (!(unlock_pages || clear_dirty || set_writeback || end_writeback)) | 1425 | clear_bits |= EXTENT_DO_ACCOUNTING; |
| 1426 | |||
| 1427 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); | ||
| 1428 | if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | | ||
| 1429 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | | ||
| 1430 | EXTENT_SET_PRIVATE2))) | ||
| 1330 | return 0; | 1431 | return 0; |
| 1331 | 1432 | ||
| 1332 | while (nr_pages > 0) { | 1433 | while (nr_pages > 0) { |
| @@ -1334,17 +1435,21 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
| 1334 | min_t(unsigned long, | 1435 | min_t(unsigned long, |
| 1335 | nr_pages, ARRAY_SIZE(pages)), pages); | 1436 | nr_pages, ARRAY_SIZE(pages)), pages); |
| 1336 | for (i = 0; i < ret; i++) { | 1437 | for (i = 0; i < ret; i++) { |
| 1438 | |||
| 1439 | if (op & EXTENT_SET_PRIVATE2) | ||
| 1440 | SetPagePrivate2(pages[i]); | ||
| 1441 | |||
| 1337 | if (pages[i] == locked_page) { | 1442 | if (pages[i] == locked_page) { |
| 1338 | page_cache_release(pages[i]); | 1443 | page_cache_release(pages[i]); |
| 1339 | continue; | 1444 | continue; |
| 1340 | } | 1445 | } |
| 1341 | if (clear_dirty) | 1446 | if (op & EXTENT_CLEAR_DIRTY) |
| 1342 | clear_page_dirty_for_io(pages[i]); | 1447 | clear_page_dirty_for_io(pages[i]); |
| 1343 | if (set_writeback) | 1448 | if (op & EXTENT_SET_WRITEBACK) |
| 1344 | set_page_writeback(pages[i]); | 1449 | set_page_writeback(pages[i]); |
| 1345 | if (end_writeback) | 1450 | if (op & EXTENT_END_WRITEBACK) |
| 1346 | end_page_writeback(pages[i]); | 1451 | end_page_writeback(pages[i]); |
| 1347 | if (unlock_pages) | 1452 | if (op & EXTENT_CLEAR_UNLOCK_PAGE) |
| 1348 | unlock_page(pages[i]); | 1453 | unlock_page(pages[i]); |
| 1349 | page_cache_release(pages[i]); | 1454 | page_cache_release(pages[i]); |
| 1350 | } | 1455 | } |
| @@ -1476,14 +1581,17 @@ out: | |||
| 1476 | * range is found set. | 1581 | * range is found set. |
| 1477 | */ | 1582 | */ |
| 1478 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 1583 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 1479 | int bits, int filled) | 1584 | int bits, int filled, struct extent_state *cached) |
| 1480 | { | 1585 | { |
| 1481 | struct extent_state *state = NULL; | 1586 | struct extent_state *state = NULL; |
| 1482 | struct rb_node *node; | 1587 | struct rb_node *node; |
| 1483 | int bitset = 0; | 1588 | int bitset = 0; |
| 1484 | 1589 | ||
| 1485 | spin_lock(&tree->lock); | 1590 | spin_lock(&tree->lock); |
| 1486 | node = tree_search(tree, start); | 1591 | if (cached && cached->tree && cached->start == start) |
| 1592 | node = &cached->rb_node; | ||
| 1593 | else | ||
| 1594 | node = tree_search(tree, start); | ||
| 1487 | while (node && start <= end) { | 1595 | while (node && start <= end) { |
| 1488 | state = rb_entry(node, struct extent_state, rb_node); | 1596 | state = rb_entry(node, struct extent_state, rb_node); |
| 1489 | 1597 | ||
| @@ -1503,6 +1611,10 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 1503 | bitset = 0; | 1611 | bitset = 0; |
| 1504 | break; | 1612 | break; |
| 1505 | } | 1613 | } |
| 1614 | |||
| 1615 | if (state->end == (u64)-1) | ||
| 1616 | break; | ||
| 1617 | |||
| 1506 | start = state->end + 1; | 1618 | start = state->end + 1; |
| 1507 | if (start > end) | 1619 | if (start > end) |
| 1508 | break; | 1620 | break; |
| @@ -1526,7 +1638,7 @@ static int check_page_uptodate(struct extent_io_tree *tree, | |||
| 1526 | { | 1638 | { |
| 1527 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 1639 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; |
| 1528 | u64 end = start + PAGE_CACHE_SIZE - 1; | 1640 | u64 end = start + PAGE_CACHE_SIZE - 1; |
| 1529 | if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) | 1641 | if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL)) |
| 1530 | SetPageUptodate(page); | 1642 | SetPageUptodate(page); |
| 1531 | return 0; | 1643 | return 0; |
| 1532 | } | 1644 | } |
| @@ -1540,7 +1652,7 @@ static int check_page_locked(struct extent_io_tree *tree, | |||
| 1540 | { | 1652 | { |
| 1541 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 1653 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; |
| 1542 | u64 end = start + PAGE_CACHE_SIZE - 1; | 1654 | u64 end = start + PAGE_CACHE_SIZE - 1; |
| 1543 | if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) | 1655 | if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) |
| 1544 | unlock_page(page); | 1656 | unlock_page(page); |
| 1545 | return 0; | 1657 | return 0; |
| 1546 | } | 1658 | } |
| @@ -1552,10 +1664,7 @@ static int check_page_locked(struct extent_io_tree *tree, | |||
| 1552 | static int check_page_writeback(struct extent_io_tree *tree, | 1664 | static int check_page_writeback(struct extent_io_tree *tree, |
| 1553 | struct page *page) | 1665 | struct page *page) |
| 1554 | { | 1666 | { |
| 1555 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 1667 | end_page_writeback(page); |
| 1556 | u64 end = start + PAGE_CACHE_SIZE - 1; | ||
| 1557 | if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) | ||
| 1558 | end_page_writeback(page); | ||
| 1559 | return 0; | 1668 | return 0; |
| 1560 | } | 1669 | } |
| 1561 | 1670 | ||
| @@ -1613,13 +1722,11 @@ static void end_bio_extent_writepage(struct bio *bio, int err) | |||
| 1613 | } | 1722 | } |
| 1614 | 1723 | ||
| 1615 | if (!uptodate) { | 1724 | if (!uptodate) { |
| 1616 | clear_extent_uptodate(tree, start, end, GFP_ATOMIC); | 1725 | clear_extent_uptodate(tree, start, end, GFP_NOFS); |
| 1617 | ClearPageUptodate(page); | 1726 | ClearPageUptodate(page); |
| 1618 | SetPageError(page); | 1727 | SetPageError(page); |
| 1619 | } | 1728 | } |
| 1620 | 1729 | ||
| 1621 | clear_extent_writeback(tree, start, end, GFP_ATOMIC); | ||
| 1622 | |||
| 1623 | if (whole_page) | 1730 | if (whole_page) |
| 1624 | end_page_writeback(page); | 1731 | end_page_writeback(page); |
| 1625 | else | 1732 | else |
| @@ -1983,7 +2090,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
| 1983 | continue; | 2090 | continue; |
| 1984 | } | 2091 | } |
| 1985 | /* the get_extent function already copied into the page */ | 2092 | /* the get_extent function already copied into the page */ |
| 1986 | if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { | 2093 | if (test_range_bit(tree, cur, cur_end, |
| 2094 | EXTENT_UPTODATE, 1, NULL)) { | ||
| 1987 | check_page_uptodate(tree, page); | 2095 | check_page_uptodate(tree, page); |
| 1988 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | 2096 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); |
| 1989 | cur = cur + iosize; | 2097 | cur = cur + iosize; |
| @@ -2078,6 +2186,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2078 | u64 iosize; | 2186 | u64 iosize; |
| 2079 | u64 unlock_start; | 2187 | u64 unlock_start; |
| 2080 | sector_t sector; | 2188 | sector_t sector; |
| 2189 | struct extent_state *cached_state = NULL; | ||
| 2081 | struct extent_map *em; | 2190 | struct extent_map *em; |
| 2082 | struct block_device *bdev; | 2191 | struct block_device *bdev; |
| 2083 | int ret; | 2192 | int ret; |
| @@ -2124,6 +2233,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2124 | delalloc_end = 0; | 2233 | delalloc_end = 0; |
| 2125 | page_started = 0; | 2234 | page_started = 0; |
| 2126 | if (!epd->extent_locked) { | 2235 | if (!epd->extent_locked) { |
| 2236 | u64 delalloc_to_write = 0; | ||
| 2127 | /* | 2237 | /* |
| 2128 | * make sure the wbc mapping index is at least updated | 2238 | * make sure the wbc mapping index is at least updated |
| 2129 | * to this page. | 2239 | * to this page. |
| @@ -2143,8 +2253,24 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2143 | tree->ops->fill_delalloc(inode, page, delalloc_start, | 2253 | tree->ops->fill_delalloc(inode, page, delalloc_start, |
| 2144 | delalloc_end, &page_started, | 2254 | delalloc_end, &page_started, |
| 2145 | &nr_written); | 2255 | &nr_written); |
| 2256 | /* | ||
| 2257 | * delalloc_end is already one less than the total | ||
| 2258 | * length, so we don't subtract one from | ||
| 2259 | * PAGE_CACHE_SIZE | ||
| 2260 | */ | ||
| 2261 | delalloc_to_write += (delalloc_end - delalloc_start + | ||
| 2262 | PAGE_CACHE_SIZE) >> | ||
| 2263 | PAGE_CACHE_SHIFT; | ||
| 2146 | delalloc_start = delalloc_end + 1; | 2264 | delalloc_start = delalloc_end + 1; |
| 2147 | } | 2265 | } |
| 2266 | if (wbc->nr_to_write < delalloc_to_write) { | ||
| 2267 | int thresh = 8192; | ||
| 2268 | |||
| 2269 | if (delalloc_to_write < thresh * 2) | ||
| 2270 | thresh = delalloc_to_write; | ||
| 2271 | wbc->nr_to_write = min_t(u64, delalloc_to_write, | ||
| 2272 | thresh); | ||
| 2273 | } | ||
| 2148 | 2274 | ||
| 2149 | /* did the fill delalloc function already unlock and start | 2275 | /* did the fill delalloc function already unlock and start |
| 2150 | * the IO? | 2276 | * the IO? |
| @@ -2160,15 +2286,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2160 | goto done_unlocked; | 2286 | goto done_unlocked; |
| 2161 | } | 2287 | } |
| 2162 | } | 2288 | } |
| 2163 | lock_extent(tree, start, page_end, GFP_NOFS); | ||
| 2164 | |||
| 2165 | unlock_start = start; | ||
| 2166 | |||
| 2167 | if (tree->ops && tree->ops->writepage_start_hook) { | 2289 | if (tree->ops && tree->ops->writepage_start_hook) { |
| 2168 | ret = tree->ops->writepage_start_hook(page, start, | 2290 | ret = tree->ops->writepage_start_hook(page, start, |
| 2169 | page_end); | 2291 | page_end); |
| 2170 | if (ret == -EAGAIN) { | 2292 | if (ret == -EAGAIN) { |
| 2171 | unlock_extent(tree, start, page_end, GFP_NOFS); | ||
| 2172 | redirty_page_for_writepage(wbc, page); | 2293 | redirty_page_for_writepage(wbc, page); |
| 2173 | update_nr_written(page, wbc, nr_written); | 2294 | update_nr_written(page, wbc, nr_written); |
| 2174 | unlock_page(page); | 2295 | unlock_page(page); |
| @@ -2184,12 +2305,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2184 | update_nr_written(page, wbc, nr_written + 1); | 2305 | update_nr_written(page, wbc, nr_written + 1); |
| 2185 | 2306 | ||
| 2186 | end = page_end; | 2307 | end = page_end; |
| 2187 | if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) | ||
| 2188 | printk(KERN_ERR "btrfs delalloc bits after lock_extent\n"); | ||
| 2189 | |||
| 2190 | if (last_byte <= start) { | 2308 | if (last_byte <= start) { |
| 2191 | clear_extent_dirty(tree, start, page_end, GFP_NOFS); | ||
| 2192 | unlock_extent(tree, start, page_end, GFP_NOFS); | ||
| 2193 | if (tree->ops && tree->ops->writepage_end_io_hook) | 2309 | if (tree->ops && tree->ops->writepage_end_io_hook) |
| 2194 | tree->ops->writepage_end_io_hook(page, start, | 2310 | tree->ops->writepage_end_io_hook(page, start, |
| 2195 | page_end, NULL, 1); | 2311 | page_end, NULL, 1); |
| @@ -2197,13 +2313,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2197 | goto done; | 2313 | goto done; |
| 2198 | } | 2314 | } |
| 2199 | 2315 | ||
| 2200 | set_extent_uptodate(tree, start, page_end, GFP_NOFS); | ||
| 2201 | blocksize = inode->i_sb->s_blocksize; | 2316 | blocksize = inode->i_sb->s_blocksize; |
| 2202 | 2317 | ||
| 2203 | while (cur <= end) { | 2318 | while (cur <= end) { |
| 2204 | if (cur >= last_byte) { | 2319 | if (cur >= last_byte) { |
| 2205 | clear_extent_dirty(tree, cur, page_end, GFP_NOFS); | ||
| 2206 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); | ||
| 2207 | if (tree->ops && tree->ops->writepage_end_io_hook) | 2320 | if (tree->ops && tree->ops->writepage_end_io_hook) |
| 2208 | tree->ops->writepage_end_io_hook(page, cur, | 2321 | tree->ops->writepage_end_io_hook(page, cur, |
| 2209 | page_end, NULL, 1); | 2322 | page_end, NULL, 1); |
| @@ -2235,12 +2348,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2235 | */ | 2348 | */ |
| 2236 | if (compressed || block_start == EXTENT_MAP_HOLE || | 2349 | if (compressed || block_start == EXTENT_MAP_HOLE || |
| 2237 | block_start == EXTENT_MAP_INLINE) { | 2350 | block_start == EXTENT_MAP_INLINE) { |
| 2238 | clear_extent_dirty(tree, cur, | ||
| 2239 | cur + iosize - 1, GFP_NOFS); | ||
| 2240 | |||
| 2241 | unlock_extent(tree, unlock_start, cur + iosize - 1, | ||
| 2242 | GFP_NOFS); | ||
| 2243 | |||
| 2244 | /* | 2351 | /* |
| 2245 | * end_io notification does not happen here for | 2352 | * end_io notification does not happen here for |
| 2246 | * compressed extents | 2353 | * compressed extents |
| @@ -2265,13 +2372,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2265 | } | 2372 | } |
| 2266 | /* leave this out until we have a page_mkwrite call */ | 2373 | /* leave this out until we have a page_mkwrite call */ |
| 2267 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, | 2374 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, |
| 2268 | EXTENT_DIRTY, 0)) { | 2375 | EXTENT_DIRTY, 0, NULL)) { |
| 2269 | cur = cur + iosize; | 2376 | cur = cur + iosize; |
| 2270 | pg_offset += iosize; | 2377 | pg_offset += iosize; |
| 2271 | continue; | 2378 | continue; |
| 2272 | } | 2379 | } |
| 2273 | 2380 | ||
| 2274 | clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); | ||
| 2275 | if (tree->ops && tree->ops->writepage_io_hook) { | 2381 | if (tree->ops && tree->ops->writepage_io_hook) { |
| 2276 | ret = tree->ops->writepage_io_hook(page, cur, | 2382 | ret = tree->ops->writepage_io_hook(page, cur, |
| 2277 | cur + iosize - 1); | 2383 | cur + iosize - 1); |
| @@ -2309,12 +2415,12 @@ done: | |||
| 2309 | set_page_writeback(page); | 2415 | set_page_writeback(page); |
| 2310 | end_page_writeback(page); | 2416 | end_page_writeback(page); |
| 2311 | } | 2417 | } |
| 2312 | if (unlock_start <= page_end) | ||
| 2313 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); | ||
| 2314 | unlock_page(page); | 2418 | unlock_page(page); |
| 2315 | 2419 | ||
| 2316 | done_unlocked: | 2420 | done_unlocked: |
| 2317 | 2421 | ||
| 2422 | /* drop our reference on any cached states */ | ||
| 2423 | free_extent_state(cached_state); | ||
| 2318 | return 0; | 2424 | return 0; |
| 2319 | } | 2425 | } |
| 2320 | 2426 | ||
| @@ -2339,9 +2445,9 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
| 2339 | writepage_t writepage, void *data, | 2445 | writepage_t writepage, void *data, |
| 2340 | void (*flush_fn)(void *)) | 2446 | void (*flush_fn)(void *)) |
| 2341 | { | 2447 | { |
| 2342 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
| 2343 | int ret = 0; | 2448 | int ret = 0; |
| 2344 | int done = 0; | 2449 | int done = 0; |
| 2450 | int nr_to_write_done = 0; | ||
| 2345 | struct pagevec pvec; | 2451 | struct pagevec pvec; |
| 2346 | int nr_pages; | 2452 | int nr_pages; |
| 2347 | pgoff_t index; | 2453 | pgoff_t index; |
| @@ -2361,7 +2467,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
| 2361 | scanned = 1; | 2467 | scanned = 1; |
| 2362 | } | 2468 | } |
| 2363 | retry: | 2469 | retry: |
| 2364 | while (!done && (index <= end) && | 2470 | while (!done && !nr_to_write_done && (index <= end) && |
| 2365 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 2471 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, |
| 2366 | PAGECACHE_TAG_DIRTY, min(end - index, | 2472 | PAGECACHE_TAG_DIRTY, min(end - index, |
| 2367 | (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | 2473 | (pgoff_t)PAGEVEC_SIZE-1) + 1))) { |
| @@ -2412,12 +2518,15 @@ retry: | |||
| 2412 | unlock_page(page); | 2518 | unlock_page(page); |
| 2413 | ret = 0; | 2519 | ret = 0; |
| 2414 | } | 2520 | } |
| 2415 | if (ret || wbc->nr_to_write <= 0) | 2521 | if (ret) |
| 2416 | done = 1; | ||
| 2417 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
| 2418 | wbc->encountered_congestion = 1; | ||
| 2419 | done = 1; | 2522 | done = 1; |
| 2420 | } | 2523 | |
| 2524 | /* | ||
| 2525 | * the filesystem may choose to bump up nr_to_write. | ||
| 2526 | * We have to make sure to honor the new nr_to_write | ||
| 2527 | * at any time | ||
| 2528 | */ | ||
| 2529 | nr_to_write_done = wbc->nr_to_write <= 0; | ||
| 2421 | } | 2530 | } |
| 2422 | pagevec_release(&pvec); | 2531 | pagevec_release(&pvec); |
| 2423 | cond_resched(); | 2532 | cond_resched(); |
| @@ -2604,10 +2713,11 @@ int extent_invalidatepage(struct extent_io_tree *tree, | |||
| 2604 | return 0; | 2713 | return 0; |
| 2605 | 2714 | ||
| 2606 | lock_extent(tree, start, end, GFP_NOFS); | 2715 | lock_extent(tree, start, end, GFP_NOFS); |
| 2607 | wait_on_extent_writeback(tree, start, end); | 2716 | wait_on_page_writeback(page); |
| 2608 | clear_extent_bit(tree, start, end, | 2717 | clear_extent_bit(tree, start, end, |
| 2609 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, | 2718 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | |
| 2610 | 1, 1, GFP_NOFS); | 2719 | EXTENT_DO_ACCOUNTING, |
| 2720 | 1, 1, NULL, GFP_NOFS); | ||
| 2611 | return 0; | 2721 | return 0; |
| 2612 | } | 2722 | } |
| 2613 | 2723 | ||
| @@ -2687,7 +2797,7 @@ int extent_prepare_write(struct extent_io_tree *tree, | |||
| 2687 | !isnew && !PageUptodate(page) && | 2797 | !isnew && !PageUptodate(page) && |
| 2688 | (block_off_end > to || block_off_start < from) && | 2798 | (block_off_end > to || block_off_start < from) && |
| 2689 | !test_range_bit(tree, block_start, cur_end, | 2799 | !test_range_bit(tree, block_start, cur_end, |
| 2690 | EXTENT_UPTODATE, 1)) { | 2800 | EXTENT_UPTODATE, 1, NULL)) { |
| 2691 | u64 sector; | 2801 | u64 sector; |
| 2692 | u64 extent_offset = block_start - em->start; | 2802 | u64 extent_offset = block_start - em->start; |
| 2693 | size_t iosize; | 2803 | size_t iosize; |
| @@ -2701,7 +2811,7 @@ int extent_prepare_write(struct extent_io_tree *tree, | |||
| 2701 | */ | 2811 | */ |
| 2702 | set_extent_bit(tree, block_start, | 2812 | set_extent_bit(tree, block_start, |
| 2703 | block_start + iosize - 1, | 2813 | block_start + iosize - 1, |
| 2704 | EXTENT_LOCKED, 0, NULL, GFP_NOFS); | 2814 | EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS); |
| 2705 | ret = submit_extent_page(READ, tree, page, | 2815 | ret = submit_extent_page(READ, tree, page, |
| 2706 | sector, iosize, page_offset, em->bdev, | 2816 | sector, iosize, page_offset, em->bdev, |
| 2707 | NULL, 1, | 2817 | NULL, 1, |
| @@ -2742,13 +2852,18 @@ int try_release_extent_state(struct extent_map_tree *map, | |||
| 2742 | int ret = 1; | 2852 | int ret = 1; |
| 2743 | 2853 | ||
| 2744 | if (test_range_bit(tree, start, end, | 2854 | if (test_range_bit(tree, start, end, |
| 2745 | EXTENT_IOBITS | EXTENT_ORDERED, 0)) | 2855 | EXTENT_IOBITS, 0, NULL)) |
| 2746 | ret = 0; | 2856 | ret = 0; |
| 2747 | else { | 2857 | else { |
| 2748 | if ((mask & GFP_NOFS) == GFP_NOFS) | 2858 | if ((mask & GFP_NOFS) == GFP_NOFS) |
| 2749 | mask = GFP_NOFS; | 2859 | mask = GFP_NOFS; |
| 2750 | clear_extent_bit(tree, start, end, EXTENT_UPTODATE, | 2860 | /* |
| 2751 | 1, 1, mask); | 2861 | * at this point we can safely clear everything except the |
| 2862 | * locked bit and the nodatasum bit | ||
| 2863 | */ | ||
| 2864 | clear_extent_bit(tree, start, end, | ||
| 2865 | ~(EXTENT_LOCKED | EXTENT_NODATASUM), | ||
| 2866 | 0, 0, NULL, mask); | ||
| 2752 | } | 2867 | } |
| 2753 | return ret; | 2868 | return ret; |
| 2754 | } | 2869 | } |
| @@ -2771,29 +2886,28 @@ int try_release_extent_mapping(struct extent_map_tree *map, | |||
| 2771 | u64 len; | 2886 | u64 len; |
| 2772 | while (start <= end) { | 2887 | while (start <= end) { |
| 2773 | len = end - start + 1; | 2888 | len = end - start + 1; |
| 2774 | spin_lock(&map->lock); | 2889 | write_lock(&map->lock); |
| 2775 | em = lookup_extent_mapping(map, start, len); | 2890 | em = lookup_extent_mapping(map, start, len); |
| 2776 | if (!em || IS_ERR(em)) { | 2891 | if (!em || IS_ERR(em)) { |
| 2777 | spin_unlock(&map->lock); | 2892 | write_unlock(&map->lock); |
| 2778 | break; | 2893 | break; |
| 2779 | } | 2894 | } |
| 2780 | if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || | 2895 | if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || |
| 2781 | em->start != start) { | 2896 | em->start != start) { |
| 2782 | spin_unlock(&map->lock); | 2897 | write_unlock(&map->lock); |
| 2783 | free_extent_map(em); | 2898 | free_extent_map(em); |
| 2784 | break; | 2899 | break; |
| 2785 | } | 2900 | } |
| 2786 | if (!test_range_bit(tree, em->start, | 2901 | if (!test_range_bit(tree, em->start, |
| 2787 | extent_map_end(em) - 1, | 2902 | extent_map_end(em) - 1, |
| 2788 | EXTENT_LOCKED | EXTENT_WRITEBACK | | 2903 | EXTENT_LOCKED | EXTENT_WRITEBACK, |
| 2789 | EXTENT_ORDERED, | 2904 | 0, NULL)) { |
| 2790 | 0)) { | ||
| 2791 | remove_extent_mapping(map, em); | 2905 | remove_extent_mapping(map, em); |
| 2792 | /* once for the rb tree */ | 2906 | /* once for the rb tree */ |
| 2793 | free_extent_map(em); | 2907 | free_extent_map(em); |
| 2794 | } | 2908 | } |
| 2795 | start = extent_map_end(em); | 2909 | start = extent_map_end(em); |
| 2796 | spin_unlock(&map->lock); | 2910 | write_unlock(&map->lock); |
| 2797 | 2911 | ||
| 2798 | /* once for us */ | 2912 | /* once for us */ |
| 2799 | free_extent_map(em); | 2913 | free_extent_map(em); |
| @@ -3203,7 +3317,7 @@ int extent_range_uptodate(struct extent_io_tree *tree, | |||
| 3203 | int uptodate; | 3317 | int uptodate; |
| 3204 | unsigned long index; | 3318 | unsigned long index; |
| 3205 | 3319 | ||
| 3206 | ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1); | 3320 | ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL); |
| 3207 | if (ret) | 3321 | if (ret) |
| 3208 | return 1; | 3322 | return 1; |
| 3209 | while (start <= end) { | 3323 | while (start <= end) { |
| @@ -3233,7 +3347,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, | |||
| 3233 | return 1; | 3347 | return 1; |
| 3234 | 3348 | ||
| 3235 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3349 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
| 3236 | EXTENT_UPTODATE, 1); | 3350 | EXTENT_UPTODATE, 1, NULL); |
| 3237 | if (ret) | 3351 | if (ret) |
| 3238 | return ret; | 3352 | return ret; |
| 3239 | 3353 | ||
| @@ -3269,7 +3383,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
| 3269 | return 0; | 3383 | return 0; |
| 3270 | 3384 | ||
| 3271 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3385 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
| 3272 | EXTENT_UPTODATE, 1)) { | 3386 | EXTENT_UPTODATE, 1, NULL)) { |
| 3273 | return 0; | 3387 | return 0; |
| 3274 | } | 3388 | } |
| 3275 | 3389 | ||
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 5bc20abf3f3d..36de250a7b2b 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
| @@ -13,10 +13,9 @@ | |||
| 13 | #define EXTENT_DEFRAG (1 << 6) | 13 | #define EXTENT_DEFRAG (1 << 6) |
| 14 | #define EXTENT_DEFRAG_DONE (1 << 7) | 14 | #define EXTENT_DEFRAG_DONE (1 << 7) |
| 15 | #define EXTENT_BUFFER_FILLED (1 << 8) | 15 | #define EXTENT_BUFFER_FILLED (1 << 8) |
| 16 | #define EXTENT_ORDERED (1 << 9) | 16 | #define EXTENT_BOUNDARY (1 << 9) |
| 17 | #define EXTENT_ORDERED_METADATA (1 << 10) | 17 | #define EXTENT_NODATASUM (1 << 10) |
| 18 | #define EXTENT_BOUNDARY (1 << 11) | 18 | #define EXTENT_DO_ACCOUNTING (1 << 11) |
| 19 | #define EXTENT_NODATASUM (1 << 12) | ||
| 20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 19 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
| 21 | 20 | ||
| 22 | /* flags for bio submission */ | 21 | /* flags for bio submission */ |
| @@ -27,6 +26,16 @@ | |||
| 27 | #define EXTENT_BUFFER_BLOCKING 1 | 26 | #define EXTENT_BUFFER_BLOCKING 1 |
| 28 | #define EXTENT_BUFFER_DIRTY 2 | 27 | #define EXTENT_BUFFER_DIRTY 2 |
| 29 | 28 | ||
| 29 | /* these are flags for extent_clear_unlock_delalloc */ | ||
| 30 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 | ||
| 31 | #define EXTENT_CLEAR_UNLOCK 0x2 | ||
| 32 | #define EXTENT_CLEAR_DELALLOC 0x4 | ||
| 33 | #define EXTENT_CLEAR_DIRTY 0x8 | ||
| 34 | #define EXTENT_SET_WRITEBACK 0x10 | ||
| 35 | #define EXTENT_END_WRITEBACK 0x20 | ||
| 36 | #define EXTENT_SET_PRIVATE2 0x40 | ||
| 37 | #define EXTENT_CLEAR_ACCOUNTING 0x80 | ||
| 38 | |||
| 30 | /* | 39 | /* |
| 31 | * page->private values. Every page that is controlled by the extent | 40 | * page->private values. Every page that is controlled by the extent |
| 32 | * map has page->private set to one. | 41 | * map has page->private set to one. |
| @@ -62,8 +71,13 @@ struct extent_io_ops { | |||
| 62 | struct extent_state *state, int uptodate); | 71 | struct extent_state *state, int uptodate); |
| 63 | int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, | 72 | int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, |
| 64 | unsigned long old, unsigned long bits); | 73 | unsigned long old, unsigned long bits); |
| 65 | int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, | 74 | int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, |
| 66 | unsigned long old, unsigned long bits); | 75 | unsigned long bits); |
| 76 | int (*merge_extent_hook)(struct inode *inode, | ||
| 77 | struct extent_state *new, | ||
| 78 | struct extent_state *other); | ||
| 79 | int (*split_extent_hook)(struct inode *inode, | ||
| 80 | struct extent_state *orig, u64 split); | ||
| 67 | int (*write_cache_pages_lock_hook)(struct page *page); | 81 | int (*write_cache_pages_lock_hook)(struct page *page); |
| 68 | }; | 82 | }; |
| 69 | 83 | ||
| @@ -81,10 +95,14 @@ struct extent_state { | |||
| 81 | u64 start; | 95 | u64 start; |
| 82 | u64 end; /* inclusive */ | 96 | u64 end; /* inclusive */ |
| 83 | struct rb_node rb_node; | 97 | struct rb_node rb_node; |
| 98 | |||
| 99 | /* ADD NEW ELEMENTS AFTER THIS */ | ||
| 84 | struct extent_io_tree *tree; | 100 | struct extent_io_tree *tree; |
| 85 | wait_queue_head_t wq; | 101 | wait_queue_head_t wq; |
| 86 | atomic_t refs; | 102 | atomic_t refs; |
| 87 | unsigned long state; | 103 | unsigned long state; |
| 104 | u64 split_start; | ||
| 105 | u64 split_end; | ||
| 88 | 106 | ||
| 89 | /* for use by the FS */ | 107 | /* for use by the FS */ |
| 90 | u64 private; | 108 | u64 private; |
| @@ -142,6 +160,8 @@ int try_release_extent_state(struct extent_map_tree *map, | |||
| 142 | struct extent_io_tree *tree, struct page *page, | 160 | struct extent_io_tree *tree, struct page *page, |
| 143 | gfp_t mask); | 161 | gfp_t mask); |
| 144 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); | 162 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); |
| 163 | int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 164 | int bits, struct extent_state **cached, gfp_t mask); | ||
| 145 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); | 165 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); |
| 146 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 166 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, |
| 147 | gfp_t mask); | 167 | gfp_t mask); |
| @@ -155,11 +175,12 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
| 155 | u64 max_bytes, unsigned long bits); | 175 | u64 max_bytes, unsigned long bits); |
| 156 | 176 | ||
| 157 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 177 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 158 | int bits, int filled); | 178 | int bits, int filled, struct extent_state *cached_state); |
| 159 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 179 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| 160 | int bits, gfp_t mask); | 180 | int bits, gfp_t mask); |
| 161 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 181 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 162 | int bits, int wake, int delete, gfp_t mask); | 182 | int bits, int wake, int delete, struct extent_state **cached, |
| 183 | gfp_t mask); | ||
| 163 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 184 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| 164 | int bits, gfp_t mask); | 185 | int bits, gfp_t mask); |
| 165 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 186 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
| @@ -278,9 +299,5 @@ int extent_range_uptodate(struct extent_io_tree *tree, | |||
| 278 | int extent_clear_unlock_delalloc(struct inode *inode, | 299 | int extent_clear_unlock_delalloc(struct inode *inode, |
| 279 | struct extent_io_tree *tree, | 300 | struct extent_io_tree *tree, |
| 280 | u64 start, u64 end, struct page *locked_page, | 301 | u64 start, u64 end, struct page *locked_page, |
| 281 | int unlock_page, | 302 | unsigned long op); |
| 282 | int clear_unlock, | ||
| 283 | int clear_delalloc, int clear_dirty, | ||
| 284 | int set_writeback, | ||
| 285 | int end_writeback); | ||
| 286 | #endif | 303 | #endif |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 30c9365861e6..ccbdcb54ec5d 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
| @@ -36,7 +36,7 @@ void extent_map_exit(void) | |||
| 36 | void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) | 36 | void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) |
| 37 | { | 37 | { |
| 38 | tree->map.rb_node = NULL; | 38 | tree->map.rb_node = NULL; |
| 39 | spin_lock_init(&tree->lock); | 39 | rwlock_init(&tree->lock); |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | /** | 42 | /** |
| @@ -198,6 +198,56 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) | |||
| 198 | return 0; | 198 | return 0; |
| 199 | } | 199 | } |
| 200 | 200 | ||
| 201 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) | ||
| 202 | { | ||
| 203 | int ret = 0; | ||
| 204 | struct extent_map *merge = NULL; | ||
| 205 | struct rb_node *rb; | ||
| 206 | struct extent_map *em; | ||
| 207 | |||
| 208 | write_lock(&tree->lock); | ||
| 209 | em = lookup_extent_mapping(tree, start, len); | ||
| 210 | |||
| 211 | WARN_ON(!em || em->start != start); | ||
| 212 | |||
| 213 | if (!em) | ||
| 214 | goto out; | ||
| 215 | |||
| 216 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
| 217 | |||
| 218 | if (em->start != 0) { | ||
| 219 | rb = rb_prev(&em->rb_node); | ||
| 220 | if (rb) | ||
| 221 | merge = rb_entry(rb, struct extent_map, rb_node); | ||
| 222 | if (rb && mergable_maps(merge, em)) { | ||
| 223 | em->start = merge->start; | ||
| 224 | em->len += merge->len; | ||
| 225 | em->block_len += merge->block_len; | ||
| 226 | em->block_start = merge->block_start; | ||
| 227 | merge->in_tree = 0; | ||
| 228 | rb_erase(&merge->rb_node, &tree->map); | ||
| 229 | free_extent_map(merge); | ||
| 230 | } | ||
| 231 | } | ||
| 232 | |||
| 233 | rb = rb_next(&em->rb_node); | ||
| 234 | if (rb) | ||
| 235 | merge = rb_entry(rb, struct extent_map, rb_node); | ||
| 236 | if (rb && mergable_maps(em, merge)) { | ||
| 237 | em->len += merge->len; | ||
| 238 | em->block_len += merge->len; | ||
| 239 | rb_erase(&merge->rb_node, &tree->map); | ||
| 240 | merge->in_tree = 0; | ||
| 241 | free_extent_map(merge); | ||
| 242 | } | ||
| 243 | |||
| 244 | free_extent_map(em); | ||
| 245 | out: | ||
| 246 | write_unlock(&tree->lock); | ||
| 247 | return ret; | ||
| 248 | |||
| 249 | } | ||
| 250 | |||
| 201 | /** | 251 | /** |
| 202 | * add_extent_mapping - add new extent map to the extent tree | 252 | * add_extent_mapping - add new extent map to the extent tree |
| 203 | * @tree: tree to insert new map in | 253 | * @tree: tree to insert new map in |
| @@ -222,7 +272,6 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
| 222 | ret = -EEXIST; | 272 | ret = -EEXIST; |
| 223 | goto out; | 273 | goto out; |
| 224 | } | 274 | } |
| 225 | assert_spin_locked(&tree->lock); | ||
| 226 | rb = tree_insert(&tree->map, em->start, &em->rb_node); | 275 | rb = tree_insert(&tree->map, em->start, &em->rb_node); |
| 227 | if (rb) { | 276 | if (rb) { |
| 228 | ret = -EEXIST; | 277 | ret = -EEXIST; |
| @@ -285,7 +334,6 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | |||
| 285 | struct rb_node *next = NULL; | 334 | struct rb_node *next = NULL; |
| 286 | u64 end = range_end(start, len); | 335 | u64 end = range_end(start, len); |
| 287 | 336 | ||
| 288 | assert_spin_locked(&tree->lock); | ||
| 289 | rb_node = __tree_search(&tree->map, start, &prev, &next); | 337 | rb_node = __tree_search(&tree->map, start, &prev, &next); |
| 290 | if (!rb_node && prev) { | 338 | if (!rb_node && prev) { |
| 291 | em = rb_entry(prev, struct extent_map, rb_node); | 339 | em = rb_entry(prev, struct extent_map, rb_node); |
| @@ -319,6 +367,54 @@ out: | |||
| 319 | } | 367 | } |
| 320 | 368 | ||
| 321 | /** | 369 | /** |
| 370 | * search_extent_mapping - find a nearby extent map | ||
| 371 | * @tree: tree to lookup in | ||
| 372 | * @start: byte offset to start the search | ||
| 373 | * @len: length of the lookup range | ||
| 374 | * | ||
| 375 | * Find and return the first extent_map struct in @tree that intersects the | ||
| 376 | * [start, len] range. | ||
| 377 | * | ||
| 378 | * If one can't be found, any nearby extent may be returned | ||
| 379 | */ | ||
| 380 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, | ||
| 381 | u64 start, u64 len) | ||
| 382 | { | ||
| 383 | struct extent_map *em; | ||
| 384 | struct rb_node *rb_node; | ||
| 385 | struct rb_node *prev = NULL; | ||
| 386 | struct rb_node *next = NULL; | ||
| 387 | |||
| 388 | rb_node = __tree_search(&tree->map, start, &prev, &next); | ||
| 389 | if (!rb_node && prev) { | ||
| 390 | em = rb_entry(prev, struct extent_map, rb_node); | ||
| 391 | goto found; | ||
| 392 | } | ||
| 393 | if (!rb_node && next) { | ||
| 394 | em = rb_entry(next, struct extent_map, rb_node); | ||
| 395 | goto found; | ||
| 396 | } | ||
| 397 | if (!rb_node) { | ||
| 398 | em = NULL; | ||
| 399 | goto out; | ||
| 400 | } | ||
| 401 | if (IS_ERR(rb_node)) { | ||
| 402 | em = ERR_PTR(PTR_ERR(rb_node)); | ||
| 403 | goto out; | ||
| 404 | } | ||
| 405 | em = rb_entry(rb_node, struct extent_map, rb_node); | ||
| 406 | goto found; | ||
| 407 | |||
| 408 | em = NULL; | ||
| 409 | goto out; | ||
| 410 | |||
| 411 | found: | ||
| 412 | atomic_inc(&em->refs); | ||
| 413 | out: | ||
| 414 | return em; | ||
| 415 | } | ||
| 416 | |||
| 417 | /** | ||
| 322 | * remove_extent_mapping - removes an extent_map from the extent tree | 418 | * remove_extent_mapping - removes an extent_map from the extent tree |
| 323 | * @tree: extent tree to remove from | 419 | * @tree: extent tree to remove from |
| 324 | * @em: extent map beeing removed | 420 | * @em: extent map beeing removed |
| @@ -331,7 +427,6 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) | |||
| 331 | int ret = 0; | 427 | int ret = 0; |
| 332 | 428 | ||
| 333 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); | 429 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); |
| 334 | assert_spin_locked(&tree->lock); | ||
| 335 | rb_erase(&em->rb_node, &tree->map); | 430 | rb_erase(&em->rb_node, &tree->map); |
| 336 | em->in_tree = 0; | 431 | em->in_tree = 0; |
| 337 | return ret; | 432 | return ret; |
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index fb6eeef06bb0..ab6d74b6e647 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
| @@ -31,7 +31,7 @@ struct extent_map { | |||
| 31 | 31 | ||
| 32 | struct extent_map_tree { | 32 | struct extent_map_tree { |
| 33 | struct rb_root map; | 33 | struct rb_root map; |
| 34 | spinlock_t lock; | 34 | rwlock_t lock; |
| 35 | }; | 35 | }; |
| 36 | 36 | ||
| 37 | static inline u64 extent_map_end(struct extent_map *em) | 37 | static inline u64 extent_map_end(struct extent_map *em) |
| @@ -59,4 +59,7 @@ struct extent_map *alloc_extent_map(gfp_t mask); | |||
| 59 | void free_extent_map(struct extent_map *em); | 59 | void free_extent_map(struct extent_map *em); |
| 60 | int __init extent_map_init(void); | 60 | int __init extent_map_init(void); |
| 61 | void extent_map_exit(void); | 61 | void extent_map_exit(void); |
| 62 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len); | ||
| 63 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, | ||
| 64 | u64 start, u64 len); | ||
| 62 | #endif | 65 | #endif |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4b833972273a..06550affbd27 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -112,8 +112,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
| 112 | int err = 0; | 112 | int err = 0; |
| 113 | int i; | 113 | int i; |
| 114 | struct inode *inode = fdentry(file)->d_inode; | 114 | struct inode *inode = fdentry(file)->d_inode; |
| 115 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 116 | u64 hint_byte; | ||
| 117 | u64 num_bytes; | 115 | u64 num_bytes; |
| 118 | u64 start_pos; | 116 | u64 start_pos; |
| 119 | u64 end_of_last_block; | 117 | u64 end_of_last_block; |
| @@ -125,23 +123,10 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
| 125 | root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 123 | root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
| 126 | 124 | ||
| 127 | end_of_last_block = start_pos + num_bytes - 1; | 125 | end_of_last_block = start_pos + num_bytes - 1; |
| 126 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); | ||
| 127 | if (err) | ||
| 128 | return err; | ||
| 128 | 129 | ||
| 129 | lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); | ||
| 130 | trans = btrfs_join_transaction(root, 1); | ||
| 131 | if (!trans) { | ||
| 132 | err = -ENOMEM; | ||
| 133 | goto out_unlock; | ||
| 134 | } | ||
| 135 | btrfs_set_trans_block_group(trans, inode); | ||
| 136 | hint_byte = 0; | ||
| 137 | |||
| 138 | set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS); | ||
| 139 | |||
| 140 | /* check for reserved extents on each page, we don't want | ||
| 141 | * to reset the delalloc bit on things that already have | ||
| 142 | * extents reserved. | ||
| 143 | */ | ||
| 144 | btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); | ||
| 145 | for (i = 0; i < num_pages; i++) { | 130 | for (i = 0; i < num_pages; i++) { |
| 146 | struct page *p = pages[i]; | 131 | struct page *p = pages[i]; |
| 147 | SetPageUptodate(p); | 132 | SetPageUptodate(p); |
| @@ -155,9 +140,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
| 155 | * at this time. | 140 | * at this time. |
| 156 | */ | 141 | */ |
| 157 | } | 142 | } |
| 158 | err = btrfs_end_transaction(trans, root); | ||
| 159 | out_unlock: | ||
| 160 | unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); | ||
| 161 | return err; | 143 | return err; |
| 162 | } | 144 | } |
| 163 | 145 | ||
| @@ -189,18 +171,18 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 189 | if (!split2) | 171 | if (!split2) |
| 190 | split2 = alloc_extent_map(GFP_NOFS); | 172 | split2 = alloc_extent_map(GFP_NOFS); |
| 191 | 173 | ||
| 192 | spin_lock(&em_tree->lock); | 174 | write_lock(&em_tree->lock); |
| 193 | em = lookup_extent_mapping(em_tree, start, len); | 175 | em = lookup_extent_mapping(em_tree, start, len); |
| 194 | if (!em) { | 176 | if (!em) { |
| 195 | spin_unlock(&em_tree->lock); | 177 | write_unlock(&em_tree->lock); |
| 196 | break; | 178 | break; |
| 197 | } | 179 | } |
| 198 | flags = em->flags; | 180 | flags = em->flags; |
| 199 | if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { | 181 | if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { |
| 200 | spin_unlock(&em_tree->lock); | ||
| 201 | if (em->start <= start && | 182 | if (em->start <= start && |
| 202 | (!testend || em->start + em->len >= start + len)) { | 183 | (!testend || em->start + em->len >= start + len)) { |
| 203 | free_extent_map(em); | 184 | free_extent_map(em); |
| 185 | write_unlock(&em_tree->lock); | ||
| 204 | break; | 186 | break; |
| 205 | } | 187 | } |
| 206 | if (start < em->start) { | 188 | if (start < em->start) { |
| @@ -210,6 +192,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 210 | start = em->start + em->len; | 192 | start = em->start + em->len; |
| 211 | } | 193 | } |
| 212 | free_extent_map(em); | 194 | free_extent_map(em); |
| 195 | write_unlock(&em_tree->lock); | ||
| 213 | continue; | 196 | continue; |
| 214 | } | 197 | } |
| 215 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 198 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
| @@ -260,7 +243,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 260 | free_extent_map(split); | 243 | free_extent_map(split); |
| 261 | split = NULL; | 244 | split = NULL; |
| 262 | } | 245 | } |
| 263 | spin_unlock(&em_tree->lock); | 246 | write_unlock(&em_tree->lock); |
| 264 | 247 | ||
| 265 | /* once for us */ | 248 | /* once for us */ |
| 266 | free_extent_map(em); | 249 | free_extent_map(em); |
| @@ -289,7 +272,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 289 | noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 272 | noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
| 290 | struct btrfs_root *root, struct inode *inode, | 273 | struct btrfs_root *root, struct inode *inode, |
| 291 | u64 start, u64 end, u64 locked_end, | 274 | u64 start, u64 end, u64 locked_end, |
| 292 | u64 inline_limit, u64 *hint_byte) | 275 | u64 inline_limit, u64 *hint_byte, int drop_cache) |
| 293 | { | 276 | { |
| 294 | u64 extent_end = 0; | 277 | u64 extent_end = 0; |
| 295 | u64 search_start = start; | 278 | u64 search_start = start; |
| @@ -314,7 +297,8 @@ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
| 314 | int ret; | 297 | int ret; |
| 315 | 298 | ||
| 316 | inline_limit = 0; | 299 | inline_limit = 0; |
| 317 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | 300 | if (drop_cache) |
| 301 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | ||
| 318 | 302 | ||
| 319 | path = btrfs_alloc_path(); | 303 | path = btrfs_alloc_path(); |
| 320 | if (!path) | 304 | if (!path) |
| @@ -894,7 +878,8 @@ again: | |||
| 894 | btrfs_put_ordered_extent(ordered); | 878 | btrfs_put_ordered_extent(ordered); |
| 895 | 879 | ||
| 896 | clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, | 880 | clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, |
| 897 | last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC, | 881 | last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | |
| 882 | EXTENT_DO_ACCOUNTING, | ||
| 898 | GFP_NOFS); | 883 | GFP_NOFS); |
| 899 | unlock_extent(&BTRFS_I(inode)->io_tree, | 884 | unlock_extent(&BTRFS_I(inode)->io_tree, |
| 900 | start_pos, last_pos - 1, GFP_NOFS); | 885 | start_pos, last_pos - 1, GFP_NOFS); |
| @@ -936,21 +921,35 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 936 | start_pos = pos; | 921 | start_pos = pos; |
| 937 | 922 | ||
| 938 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | 923 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
| 924 | |||
| 925 | /* do the reserve before the mutex lock in case we have to do some | ||
| 926 | * flushing. We wouldn't deadlock, but this is more polite. | ||
| 927 | */ | ||
| 928 | err = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
| 929 | if (err) | ||
| 930 | goto out_nolock; | ||
| 931 | |||
| 932 | mutex_lock(&inode->i_mutex); | ||
| 933 | |||
| 939 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | 934 | current->backing_dev_info = inode->i_mapping->backing_dev_info; |
| 940 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 935 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
| 941 | if (err) | 936 | if (err) |
| 942 | goto out_nolock; | 937 | goto out; |
| 938 | |||
| 943 | if (count == 0) | 939 | if (count == 0) |
| 944 | goto out_nolock; | 940 | goto out; |
| 945 | 941 | ||
| 946 | err = file_remove_suid(file); | 942 | err = file_remove_suid(file); |
| 947 | if (err) | 943 | if (err) |
| 948 | goto out_nolock; | 944 | goto out; |
| 945 | |||
| 949 | file_update_time(file); | 946 | file_update_time(file); |
| 950 | 947 | ||
| 951 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 948 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
| 952 | 949 | ||
| 953 | mutex_lock(&inode->i_mutex); | 950 | /* generic_write_checks can change our pos */ |
| 951 | start_pos = pos; | ||
| 952 | |||
| 954 | BTRFS_I(inode)->sequence++; | 953 | BTRFS_I(inode)->sequence++; |
| 955 | first_index = pos >> PAGE_CACHE_SHIFT; | 954 | first_index = pos >> PAGE_CACHE_SHIFT; |
| 956 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; | 955 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; |
| @@ -1024,9 +1023,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 1024 | } | 1023 | } |
| 1025 | 1024 | ||
| 1026 | if (will_write) { | 1025 | if (will_write) { |
| 1027 | btrfs_fdatawrite_range(inode->i_mapping, pos, | 1026 | filemap_fdatawrite_range(inode->i_mapping, pos, |
| 1028 | pos + write_bytes - 1, | 1027 | pos + write_bytes - 1); |
| 1029 | WB_SYNC_ALL); | ||
| 1030 | } else { | 1028 | } else { |
| 1031 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | 1029 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, |
| 1032 | num_pages); | 1030 | num_pages); |
| @@ -1047,6 +1045,7 @@ out: | |||
| 1047 | mutex_unlock(&inode->i_mutex); | 1045 | mutex_unlock(&inode->i_mutex); |
| 1048 | if (ret) | 1046 | if (ret) |
| 1049 | err = ret; | 1047 | err = ret; |
| 1048 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 1050 | 1049 | ||
| 1051 | out_nolock: | 1050 | out_nolock: |
| 1052 | kfree(pages); | 1051 | kfree(pages); |
| @@ -1087,8 +1086,10 @@ out_nolock: | |||
| 1087 | btrfs_end_transaction(trans, root); | 1086 | btrfs_end_transaction(trans, root); |
| 1088 | else | 1087 | else |
| 1089 | btrfs_commit_transaction(trans, root); | 1088 | btrfs_commit_transaction(trans, root); |
| 1090 | } else { | 1089 | } else if (ret != BTRFS_NO_LOG_SYNC) { |
| 1091 | btrfs_commit_transaction(trans, root); | 1090 | btrfs_commit_transaction(trans, root); |
| 1091 | } else { | ||
| 1092 | btrfs_end_transaction(trans, root); | ||
| 1092 | } | 1093 | } |
| 1093 | } | 1094 | } |
| 1094 | if (file->f_flags & O_DIRECT) { | 1095 | if (file->f_flags & O_DIRECT) { |
| @@ -1138,6 +1139,13 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
| 1138 | int ret = 0; | 1139 | int ret = 0; |
| 1139 | struct btrfs_trans_handle *trans; | 1140 | struct btrfs_trans_handle *trans; |
| 1140 | 1141 | ||
| 1142 | |||
| 1143 | /* we wait first, since the writeback may change the inode */ | ||
| 1144 | root->log_batch++; | ||
| 1145 | /* the VFS called filemap_fdatawrite for us */ | ||
| 1146 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | ||
| 1147 | root->log_batch++; | ||
| 1148 | |||
| 1141 | /* | 1149 | /* |
| 1142 | * check the transaction that last modified this inode | 1150 | * check the transaction that last modified this inode |
| 1143 | * and see if its already been committed | 1151 | * and see if its already been committed |
| @@ -1145,6 +1153,11 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
| 1145 | if (!BTRFS_I(inode)->last_trans) | 1153 | if (!BTRFS_I(inode)->last_trans) |
| 1146 | goto out; | 1154 | goto out; |
| 1147 | 1155 | ||
| 1156 | /* | ||
| 1157 | * if the last transaction that changed this file was before | ||
| 1158 | * the current transaction, we can bail out now without any | ||
| 1159 | * syncing | ||
| 1160 | */ | ||
| 1148 | mutex_lock(&root->fs_info->trans_mutex); | 1161 | mutex_lock(&root->fs_info->trans_mutex); |
| 1149 | if (BTRFS_I(inode)->last_trans <= | 1162 | if (BTRFS_I(inode)->last_trans <= |
| 1150 | root->fs_info->last_trans_committed) { | 1163 | root->fs_info->last_trans_committed) { |
| @@ -1154,13 +1167,6 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
| 1154 | } | 1167 | } |
| 1155 | mutex_unlock(&root->fs_info->trans_mutex); | 1168 | mutex_unlock(&root->fs_info->trans_mutex); |
| 1156 | 1169 | ||
| 1157 | root->log_batch++; | ||
| 1158 | filemap_fdatawrite(inode->i_mapping); | ||
| 1159 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | ||
| 1160 | root->log_batch++; | ||
| 1161 | |||
| 1162 | if (datasync && !(inode->i_state & I_DIRTY_PAGES)) | ||
| 1163 | goto out; | ||
| 1164 | /* | 1170 | /* |
| 1165 | * ok we haven't committed the transaction yet, lets do a commit | 1171 | * ok we haven't committed the transaction yet, lets do a commit |
| 1166 | */ | 1172 | */ |
| @@ -1189,21 +1195,25 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
| 1189 | */ | 1195 | */ |
| 1190 | mutex_unlock(&dentry->d_inode->i_mutex); | 1196 | mutex_unlock(&dentry->d_inode->i_mutex); |
| 1191 | 1197 | ||
| 1192 | if (ret > 0) { | 1198 | if (ret != BTRFS_NO_LOG_SYNC) { |
| 1193 | ret = btrfs_commit_transaction(trans, root); | 1199 | if (ret > 0) { |
| 1194 | } else { | ||
| 1195 | ret = btrfs_sync_log(trans, root); | ||
| 1196 | if (ret == 0) | ||
| 1197 | ret = btrfs_end_transaction(trans, root); | ||
| 1198 | else | ||
| 1199 | ret = btrfs_commit_transaction(trans, root); | 1200 | ret = btrfs_commit_transaction(trans, root); |
| 1201 | } else { | ||
| 1202 | ret = btrfs_sync_log(trans, root); | ||
| 1203 | if (ret == 0) | ||
| 1204 | ret = btrfs_end_transaction(trans, root); | ||
| 1205 | else | ||
| 1206 | ret = btrfs_commit_transaction(trans, root); | ||
| 1207 | } | ||
| 1208 | } else { | ||
| 1209 | ret = btrfs_end_transaction(trans, root); | ||
| 1200 | } | 1210 | } |
| 1201 | mutex_lock(&dentry->d_inode->i_mutex); | 1211 | mutex_lock(&dentry->d_inode->i_mutex); |
| 1202 | out: | 1212 | out: |
| 1203 | return ret > 0 ? EIO : ret; | 1213 | return ret > 0 ? EIO : ret; |
| 1204 | } | 1214 | } |
| 1205 | 1215 | ||
| 1206 | static struct vm_operations_struct btrfs_file_vm_ops = { | 1216 | static const struct vm_operations_struct btrfs_file_vm_ops = { |
| 1207 | .fault = filemap_fault, | 1217 | .fault = filemap_fault, |
| 1208 | .page_mkwrite = btrfs_page_mkwrite, | 1218 | .page_mkwrite = btrfs_page_mkwrite, |
| 1209 | }; | 1219 | }; |
| @@ -1215,7 +1225,7 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | |||
| 1215 | return 0; | 1225 | return 0; |
| 1216 | } | 1226 | } |
| 1217 | 1227 | ||
| 1218 | struct file_operations btrfs_file_operations = { | 1228 | const struct file_operations btrfs_file_operations = { |
| 1219 | .llseek = generic_file_llseek, | 1229 | .llseek = generic_file_llseek, |
| 1220 | .read = do_sync_read, | 1230 | .read = do_sync_read, |
| 1221 | .aio_read = generic_file_aio_read, | 1231 | .aio_read = generic_file_aio_read, |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 5edcee3a617f..cb2849f03251 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
| @@ -259,7 +259,9 @@ static int link_free_space(struct btrfs_block_group_cache *block_group, | |||
| 259 | 259 | ||
| 260 | static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) | 260 | static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) |
| 261 | { | 261 | { |
| 262 | u64 max_bytes, possible_bytes; | 262 | u64 max_bytes; |
| 263 | u64 bitmap_bytes; | ||
| 264 | u64 extent_bytes; | ||
| 263 | 265 | ||
| 264 | /* | 266 | /* |
| 265 | * The goal is to keep the total amount of memory used per 1gb of space | 267 | * The goal is to keep the total amount of memory used per 1gb of space |
| @@ -269,22 +271,27 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) | |||
| 269 | max_bytes = MAX_CACHE_BYTES_PER_GIG * | 271 | max_bytes = MAX_CACHE_BYTES_PER_GIG * |
| 270 | (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); | 272 | (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); |
| 271 | 273 | ||
| 272 | possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) + | 274 | /* |
| 273 | (sizeof(struct btrfs_free_space) * | 275 | * we want to account for 1 more bitmap than what we have so we can make |
| 274 | block_group->extents_thresh); | 276 | * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as |
| 277 | * we add more bitmaps. | ||
| 278 | */ | ||
| 279 | bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE; | ||
| 275 | 280 | ||
| 276 | if (possible_bytes > max_bytes) { | 281 | if (bitmap_bytes >= max_bytes) { |
| 277 | int extent_bytes = max_bytes - | 282 | block_group->extents_thresh = 0; |
| 278 | (block_group->total_bitmaps * PAGE_CACHE_SIZE); | 283 | return; |
| 284 | } | ||
| 279 | 285 | ||
| 280 | if (extent_bytes <= 0) { | 286 | /* |
| 281 | block_group->extents_thresh = 0; | 287 | * we want the extent entry threshold to always be at most 1/2 the maxw |
| 282 | return; | 288 | * bytes we can have, or whatever is less than that. |
| 283 | } | 289 | */ |
| 290 | extent_bytes = max_bytes - bitmap_bytes; | ||
| 291 | extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); | ||
| 284 | 292 | ||
| 285 | block_group->extents_thresh = extent_bytes / | 293 | block_group->extents_thresh = |
| 286 | (sizeof(struct btrfs_free_space)); | 294 | div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); |
| 287 | } | ||
| 288 | } | 295 | } |
| 289 | 296 | ||
| 290 | static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, | 297 | static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, |
| @@ -403,6 +410,7 @@ static void add_new_bitmap(struct btrfs_block_group_cache *block_group, | |||
| 403 | BUG_ON(block_group->total_bitmaps >= max_bitmaps); | 410 | BUG_ON(block_group->total_bitmaps >= max_bitmaps); |
| 404 | 411 | ||
| 405 | info->offset = offset_to_bitmap(block_group, offset); | 412 | info->offset = offset_to_bitmap(block_group, offset); |
| 413 | info->bytes = 0; | ||
| 406 | link_free_space(block_group, info); | 414 | link_free_space(block_group, info); |
| 407 | block_group->total_bitmaps++; | 415 | block_group->total_bitmaps++; |
| 408 | 416 | ||
| @@ -1288,7 +1296,7 @@ again: | |||
| 1288 | window_start = entry->offset; | 1296 | window_start = entry->offset; |
| 1289 | window_free = entry->bytes; | 1297 | window_free = entry->bytes; |
| 1290 | last = entry; | 1298 | last = entry; |
| 1291 | max_extent = 0; | 1299 | max_extent = entry->bytes; |
| 1292 | } else { | 1300 | } else { |
| 1293 | last = next; | 1301 | last = next; |
| 1294 | window_free += next->bytes; | 1302 | window_free += next->bytes; |
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 6b627c611808..72ce3c173d6a 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
| @@ -149,6 +149,8 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | |||
| 149 | ptr = (unsigned long)(ref + 1); | 149 | ptr = (unsigned long)(ref + 1); |
| 150 | ret = 0; | 150 | ret = 0; |
| 151 | } else if (ret < 0) { | 151 | } else if (ret < 0) { |
| 152 | if (ret == -EOVERFLOW) | ||
| 153 | ret = -EMLINK; | ||
| 152 | goto out; | 154 | goto out; |
| 153 | } else { | 155 | } else { |
| 154 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0], | 156 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0], |
| @@ -177,8 +179,6 @@ int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, | |||
| 177 | 179 | ||
| 178 | ret = btrfs_insert_empty_item(trans, root, path, &key, | 180 | ret = btrfs_insert_empty_item(trans, root, path, &key, |
| 179 | sizeof(struct btrfs_inode_item)); | 181 | sizeof(struct btrfs_inode_item)); |
| 180 | if (ret == 0 && objectid > root->highest_inode) | ||
| 181 | root->highest_inode = objectid; | ||
| 182 | return ret; | 182 | return ret; |
| 183 | } | 183 | } |
| 184 | 184 | ||
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 9abbced1123d..c56eb5909172 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
| @@ -43,9 +43,10 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) | |||
| 43 | slot = path->slots[0] - 1; | 43 | slot = path->slots[0] - 1; |
| 44 | l = path->nodes[0]; | 44 | l = path->nodes[0]; |
| 45 | btrfs_item_key_to_cpu(l, &found_key, slot); | 45 | btrfs_item_key_to_cpu(l, &found_key, slot); |
| 46 | *objectid = found_key.objectid; | 46 | *objectid = max_t(u64, found_key.objectid, |
| 47 | BTRFS_FIRST_FREE_OBJECTID - 1); | ||
| 47 | } else { | 48 | } else { |
| 48 | *objectid = BTRFS_FIRST_FREE_OBJECTID; | 49 | *objectid = BTRFS_FIRST_FREE_OBJECTID - 1; |
| 49 | } | 50 | } |
| 50 | ret = 0; | 51 | ret = 0; |
| 51 | error: | 52 | error: |
| @@ -53,91 +54,27 @@ error: | |||
| 53 | return ret; | 54 | return ret; |
| 54 | } | 55 | } |
| 55 | 56 | ||
| 56 | /* | ||
| 57 | * walks the btree of allocated inodes and find a hole. | ||
| 58 | */ | ||
| 59 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, | 57 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, |
| 60 | struct btrfs_root *root, | 58 | struct btrfs_root *root, |
| 61 | u64 dirid, u64 *objectid) | 59 | u64 dirid, u64 *objectid) |
| 62 | { | 60 | { |
| 63 | struct btrfs_path *path; | ||
| 64 | struct btrfs_key key; | ||
| 65 | int ret; | 61 | int ret; |
| 66 | int slot = 0; | ||
| 67 | u64 last_ino = 0; | ||
| 68 | int start_found; | ||
| 69 | struct extent_buffer *l; | ||
| 70 | struct btrfs_key search_key; | ||
| 71 | u64 search_start = dirid; | ||
| 72 | |||
| 73 | mutex_lock(&root->objectid_mutex); | 62 | mutex_lock(&root->objectid_mutex); |
| 74 | if (root->last_inode_alloc >= BTRFS_FIRST_FREE_OBJECTID && | ||
| 75 | root->last_inode_alloc < BTRFS_LAST_FREE_OBJECTID) { | ||
| 76 | *objectid = ++root->last_inode_alloc; | ||
| 77 | mutex_unlock(&root->objectid_mutex); | ||
| 78 | return 0; | ||
| 79 | } | ||
| 80 | path = btrfs_alloc_path(); | ||
| 81 | BUG_ON(!path); | ||
| 82 | search_start = max(search_start, (u64)BTRFS_FIRST_FREE_OBJECTID); | ||
| 83 | search_key.objectid = search_start; | ||
| 84 | search_key.type = 0; | ||
| 85 | search_key.offset = 0; | ||
| 86 | |||
| 87 | start_found = 0; | ||
| 88 | ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0); | ||
| 89 | if (ret < 0) | ||
| 90 | goto error; | ||
| 91 | 63 | ||
| 92 | while (1) { | 64 | if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) { |
| 93 | l = path->nodes[0]; | 65 | ret = btrfs_find_highest_inode(root, &root->highest_objectid); |
| 94 | slot = path->slots[0]; | 66 | if (ret) |
| 95 | if (slot >= btrfs_header_nritems(l)) { | 67 | goto out; |
| 96 | ret = btrfs_next_leaf(root, path); | 68 | } |
| 97 | if (ret == 0) | ||
| 98 | continue; | ||
| 99 | if (ret < 0) | ||
| 100 | goto error; | ||
| 101 | if (!start_found) { | ||
| 102 | *objectid = search_start; | ||
| 103 | start_found = 1; | ||
| 104 | goto found; | ||
| 105 | } | ||
| 106 | *objectid = last_ino > search_start ? | ||
| 107 | last_ino : search_start; | ||
| 108 | goto found; | ||
| 109 | } | ||
| 110 | btrfs_item_key_to_cpu(l, &key, slot); | ||
| 111 | if (key.objectid >= search_start) { | ||
| 112 | if (start_found) { | ||
| 113 | if (last_ino < search_start) | ||
| 114 | last_ino = search_start; | ||
| 115 | if (key.objectid > last_ino) { | ||
| 116 | *objectid = last_ino; | ||
| 117 | goto found; | ||
| 118 | } | ||
| 119 | } else if (key.objectid > search_start) { | ||
| 120 | *objectid = search_start; | ||
| 121 | goto found; | ||
| 122 | } | ||
| 123 | } | ||
| 124 | if (key.objectid >= BTRFS_LAST_FREE_OBJECTID) | ||
| 125 | break; | ||
| 126 | 69 | ||
| 127 | start_found = 1; | 70 | if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) { |
| 128 | last_ino = key.objectid + 1; | 71 | ret = -ENOSPC; |
| 129 | path->slots[0]++; | 72 | goto out; |
| 130 | } | 73 | } |
| 131 | BUG_ON(1); | 74 | |
| 132 | found: | 75 | *objectid = ++root->highest_objectid; |
| 133 | btrfs_release_path(root, path); | 76 | ret = 0; |
| 134 | btrfs_free_path(path); | 77 | out: |
| 135 | BUG_ON(*objectid < search_start); | ||
| 136 | mutex_unlock(&root->objectid_mutex); | ||
| 137 | return 0; | ||
| 138 | error: | ||
| 139 | btrfs_release_path(root, path); | ||
| 140 | btrfs_free_path(path); | ||
| 141 | mutex_unlock(&root->objectid_mutex); | 78 | mutex_unlock(&root->objectid_mutex); |
| 142 | return ret; | 79 | return ret; |
| 143 | } | 80 | } |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 59cba180fe83..b3ad168a0bfc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -55,14 +55,14 @@ struct btrfs_iget_args { | |||
| 55 | struct btrfs_root *root; | 55 | struct btrfs_root *root; |
| 56 | }; | 56 | }; |
| 57 | 57 | ||
| 58 | static struct inode_operations btrfs_dir_inode_operations; | 58 | static const struct inode_operations btrfs_dir_inode_operations; |
| 59 | static struct inode_operations btrfs_symlink_inode_operations; | 59 | static const struct inode_operations btrfs_symlink_inode_operations; |
| 60 | static struct inode_operations btrfs_dir_ro_inode_operations; | 60 | static const struct inode_operations btrfs_dir_ro_inode_operations; |
| 61 | static struct inode_operations btrfs_special_inode_operations; | 61 | static const struct inode_operations btrfs_special_inode_operations; |
| 62 | static struct inode_operations btrfs_file_inode_operations; | 62 | static const struct inode_operations btrfs_file_inode_operations; |
| 63 | static struct address_space_operations btrfs_aops; | 63 | static const struct address_space_operations btrfs_aops; |
| 64 | static struct address_space_operations btrfs_symlink_aops; | 64 | static const struct address_space_operations btrfs_symlink_aops; |
| 65 | static struct file_operations btrfs_dir_file_operations; | 65 | static const struct file_operations btrfs_dir_file_operations; |
| 66 | static struct extent_io_ops btrfs_extent_io_ops; | 66 | static struct extent_io_ops btrfs_extent_io_ops; |
| 67 | 67 | ||
| 68 | static struct kmem_cache *btrfs_inode_cachep; | 68 | static struct kmem_cache *btrfs_inode_cachep; |
| @@ -231,7 +231,8 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
| 231 | } | 231 | } |
| 232 | 232 | ||
| 233 | ret = btrfs_drop_extents(trans, root, inode, start, | 233 | ret = btrfs_drop_extents(trans, root, inode, start, |
| 234 | aligned_end, aligned_end, start, &hint_byte); | 234 | aligned_end, aligned_end, start, |
| 235 | &hint_byte, 1); | ||
| 235 | BUG_ON(ret); | 236 | BUG_ON(ret); |
| 236 | 237 | ||
| 237 | if (isize > actual_end) | 238 | if (isize > actual_end) |
| @@ -240,7 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
| 240 | inline_len, compressed_size, | 241 | inline_len, compressed_size, |
| 241 | compressed_pages); | 242 | compressed_pages); |
| 242 | BUG_ON(ret); | 243 | BUG_ON(ret); |
| 243 | btrfs_drop_extent_cache(inode, start, aligned_end, 0); | 244 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); |
| 244 | return 0; | 245 | return 0; |
| 245 | } | 246 | } |
| 246 | 247 | ||
| @@ -423,9 +424,12 @@ again: | |||
| 423 | * and free up our temp pages. | 424 | * and free up our temp pages. |
| 424 | */ | 425 | */ |
| 425 | extent_clear_unlock_delalloc(inode, | 426 | extent_clear_unlock_delalloc(inode, |
| 426 | &BTRFS_I(inode)->io_tree, | 427 | &BTRFS_I(inode)->io_tree, |
| 427 | start, end, NULL, 1, 0, | 428 | start, end, NULL, |
| 428 | 0, 1, 1, 1); | 429 | EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | |
| 430 | EXTENT_CLEAR_DELALLOC | | ||
| 431 | EXTENT_CLEAR_ACCOUNTING | | ||
| 432 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); | ||
| 429 | ret = 0; | 433 | ret = 0; |
| 430 | goto free_pages_out; | 434 | goto free_pages_out; |
| 431 | } | 435 | } |
| @@ -534,7 +538,7 @@ static noinline int submit_compressed_extents(struct inode *inode, | |||
| 534 | struct btrfs_root *root = BTRFS_I(inode)->root; | 538 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 535 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 539 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
| 536 | struct extent_io_tree *io_tree; | 540 | struct extent_io_tree *io_tree; |
| 537 | int ret; | 541 | int ret = 0; |
| 538 | 542 | ||
| 539 | if (list_empty(&async_cow->extents)) | 543 | if (list_empty(&async_cow->extents)) |
| 540 | return 0; | 544 | return 0; |
| @@ -548,6 +552,7 @@ static noinline int submit_compressed_extents(struct inode *inode, | |||
| 548 | 552 | ||
| 549 | io_tree = &BTRFS_I(inode)->io_tree; | 553 | io_tree = &BTRFS_I(inode)->io_tree; |
| 550 | 554 | ||
| 555 | retry: | ||
| 551 | /* did the compression code fall back to uncompressed IO? */ | 556 | /* did the compression code fall back to uncompressed IO? */ |
| 552 | if (!async_extent->pages) { | 557 | if (!async_extent->pages) { |
| 553 | int page_started = 0; | 558 | int page_started = 0; |
| @@ -558,11 +563,11 @@ static noinline int submit_compressed_extents(struct inode *inode, | |||
| 558 | async_extent->ram_size - 1, GFP_NOFS); | 563 | async_extent->ram_size - 1, GFP_NOFS); |
| 559 | 564 | ||
| 560 | /* allocate blocks */ | 565 | /* allocate blocks */ |
| 561 | cow_file_range(inode, async_cow->locked_page, | 566 | ret = cow_file_range(inode, async_cow->locked_page, |
| 562 | async_extent->start, | 567 | async_extent->start, |
| 563 | async_extent->start + | 568 | async_extent->start + |
| 564 | async_extent->ram_size - 1, | 569 | async_extent->ram_size - 1, |
| 565 | &page_started, &nr_written, 0); | 570 | &page_started, &nr_written, 0); |
| 566 | 571 | ||
| 567 | /* | 572 | /* |
| 568 | * if page_started, cow_file_range inserted an | 573 | * if page_started, cow_file_range inserted an |
| @@ -570,7 +575,7 @@ static noinline int submit_compressed_extents(struct inode *inode, | |||
| 570 | * and IO for us. Otherwise, we need to submit | 575 | * and IO for us. Otherwise, we need to submit |
| 571 | * all those pages down to the drive. | 576 | * all those pages down to the drive. |
| 572 | */ | 577 | */ |
| 573 | if (!page_started) | 578 | if (!page_started && !ret) |
| 574 | extent_write_locked_range(io_tree, | 579 | extent_write_locked_range(io_tree, |
| 575 | inode, async_extent->start, | 580 | inode, async_extent->start, |
| 576 | async_extent->start + | 581 | async_extent->start + |
| @@ -598,7 +603,21 @@ static noinline int submit_compressed_extents(struct inode *inode, | |||
| 598 | async_extent->compressed_size, | 603 | async_extent->compressed_size, |
| 599 | 0, alloc_hint, | 604 | 0, alloc_hint, |
| 600 | (u64)-1, &ins, 1); | 605 | (u64)-1, &ins, 1); |
| 601 | BUG_ON(ret); | 606 | if (ret) { |
| 607 | int i; | ||
| 608 | for (i = 0; i < async_extent->nr_pages; i++) { | ||
| 609 | WARN_ON(async_extent->pages[i]->mapping); | ||
| 610 | page_cache_release(async_extent->pages[i]); | ||
| 611 | } | ||
| 612 | kfree(async_extent->pages); | ||
| 613 | async_extent->nr_pages = 0; | ||
| 614 | async_extent->pages = NULL; | ||
| 615 | unlock_extent(io_tree, async_extent->start, | ||
| 616 | async_extent->start + | ||
| 617 | async_extent->ram_size - 1, GFP_NOFS); | ||
| 618 | goto retry; | ||
| 619 | } | ||
| 620 | |||
| 602 | em = alloc_extent_map(GFP_NOFS); | 621 | em = alloc_extent_map(GFP_NOFS); |
| 603 | em->start = async_extent->start; | 622 | em->start = async_extent->start; |
| 604 | em->len = async_extent->ram_size; | 623 | em->len = async_extent->ram_size; |
| @@ -611,9 +630,9 @@ static noinline int submit_compressed_extents(struct inode *inode, | |||
| 611 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 630 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
| 612 | 631 | ||
| 613 | while (1) { | 632 | while (1) { |
| 614 | spin_lock(&em_tree->lock); | 633 | write_lock(&em_tree->lock); |
| 615 | ret = add_extent_mapping(em_tree, em); | 634 | ret = add_extent_mapping(em_tree, em); |
| 616 | spin_unlock(&em_tree->lock); | 635 | write_unlock(&em_tree->lock); |
| 617 | if (ret != -EEXIST) { | 636 | if (ret != -EEXIST) { |
| 618 | free_extent_map(em); | 637 | free_extent_map(em); |
| 619 | break; | 638 | break; |
| @@ -636,11 +655,14 @@ static noinline int submit_compressed_extents(struct inode *inode, | |||
| 636 | * clear dirty, set writeback and unlock the pages. | 655 | * clear dirty, set writeback and unlock the pages. |
| 637 | */ | 656 | */ |
| 638 | extent_clear_unlock_delalloc(inode, | 657 | extent_clear_unlock_delalloc(inode, |
| 639 | &BTRFS_I(inode)->io_tree, | 658 | &BTRFS_I(inode)->io_tree, |
| 640 | async_extent->start, | 659 | async_extent->start, |
| 641 | async_extent->start + | 660 | async_extent->start + |
| 642 | async_extent->ram_size - 1, | 661 | async_extent->ram_size - 1, |
| 643 | NULL, 1, 1, 0, 1, 1, 0); | 662 | NULL, EXTENT_CLEAR_UNLOCK_PAGE | |
| 663 | EXTENT_CLEAR_UNLOCK | | ||
| 664 | EXTENT_CLEAR_DELALLOC | | ||
| 665 | EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK); | ||
| 644 | 666 | ||
| 645 | ret = btrfs_submit_compressed_write(inode, | 667 | ret = btrfs_submit_compressed_write(inode, |
| 646 | async_extent->start, | 668 | async_extent->start, |
| @@ -711,9 +733,15 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 711 | start, end, 0, NULL); | 733 | start, end, 0, NULL); |
| 712 | if (ret == 0) { | 734 | if (ret == 0) { |
| 713 | extent_clear_unlock_delalloc(inode, | 735 | extent_clear_unlock_delalloc(inode, |
| 714 | &BTRFS_I(inode)->io_tree, | 736 | &BTRFS_I(inode)->io_tree, |
| 715 | start, end, NULL, 1, 1, | 737 | start, end, NULL, |
| 716 | 1, 1, 1, 1); | 738 | EXTENT_CLEAR_UNLOCK_PAGE | |
| 739 | EXTENT_CLEAR_UNLOCK | | ||
| 740 | EXTENT_CLEAR_DELALLOC | | ||
| 741 | EXTENT_CLEAR_ACCOUNTING | | ||
| 742 | EXTENT_CLEAR_DIRTY | | ||
| 743 | EXTENT_SET_WRITEBACK | | ||
| 744 | EXTENT_END_WRITEBACK); | ||
| 717 | *nr_written = *nr_written + | 745 | *nr_written = *nr_written + |
| 718 | (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; | 746 | (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; |
| 719 | *page_started = 1; | 747 | *page_started = 1; |
| @@ -725,9 +753,34 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 725 | BUG_ON(disk_num_bytes > | 753 | BUG_ON(disk_num_bytes > |
| 726 | btrfs_super_total_bytes(&root->fs_info->super_copy)); | 754 | btrfs_super_total_bytes(&root->fs_info->super_copy)); |
| 727 | 755 | ||
| 756 | |||
| 757 | read_lock(&BTRFS_I(inode)->extent_tree.lock); | ||
| 758 | em = search_extent_mapping(&BTRFS_I(inode)->extent_tree, | ||
| 759 | start, num_bytes); | ||
| 760 | if (em) { | ||
| 761 | /* | ||
| 762 | * if block start isn't an actual block number then find the | ||
| 763 | * first block in this inode and use that as a hint. If that | ||
| 764 | * block is also bogus then just don't worry about it. | ||
| 765 | */ | ||
| 766 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
| 767 | free_extent_map(em); | ||
| 768 | em = search_extent_mapping(em_tree, 0, 0); | ||
| 769 | if (em && em->block_start < EXTENT_MAP_LAST_BYTE) | ||
| 770 | alloc_hint = em->block_start; | ||
| 771 | if (em) | ||
| 772 | free_extent_map(em); | ||
| 773 | } else { | ||
| 774 | alloc_hint = em->block_start; | ||
| 775 | free_extent_map(em); | ||
| 776 | } | ||
| 777 | } | ||
| 778 | read_unlock(&BTRFS_I(inode)->extent_tree.lock); | ||
| 728 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); | 779 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); |
| 729 | 780 | ||
| 730 | while (disk_num_bytes > 0) { | 781 | while (disk_num_bytes > 0) { |
| 782 | unsigned long op; | ||
| 783 | |||
| 731 | cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); | 784 | cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); |
| 732 | ret = btrfs_reserve_extent(trans, root, cur_alloc_size, | 785 | ret = btrfs_reserve_extent(trans, root, cur_alloc_size, |
| 733 | root->sectorsize, 0, alloc_hint, | 786 | root->sectorsize, 0, alloc_hint, |
| @@ -737,7 +790,6 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 737 | em = alloc_extent_map(GFP_NOFS); | 790 | em = alloc_extent_map(GFP_NOFS); |
| 738 | em->start = start; | 791 | em->start = start; |
| 739 | em->orig_start = em->start; | 792 | em->orig_start = em->start; |
| 740 | |||
| 741 | ram_size = ins.offset; | 793 | ram_size = ins.offset; |
| 742 | em->len = ins.offset; | 794 | em->len = ins.offset; |
| 743 | 795 | ||
| @@ -747,9 +799,9 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 747 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 799 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 748 | 800 | ||
| 749 | while (1) { | 801 | while (1) { |
| 750 | spin_lock(&em_tree->lock); | 802 | write_lock(&em_tree->lock); |
| 751 | ret = add_extent_mapping(em_tree, em); | 803 | ret = add_extent_mapping(em_tree, em); |
| 752 | spin_unlock(&em_tree->lock); | 804 | write_unlock(&em_tree->lock); |
| 753 | if (ret != -EEXIST) { | 805 | if (ret != -EEXIST) { |
| 754 | free_extent_map(em); | 806 | free_extent_map(em); |
| 755 | break; | 807 | break; |
| @@ -776,11 +828,17 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 776 | /* we're not doing compressed IO, don't unlock the first | 828 | /* we're not doing compressed IO, don't unlock the first |
| 777 | * page (which the caller expects to stay locked), don't | 829 | * page (which the caller expects to stay locked), don't |
| 778 | * clear any dirty bits and don't set any writeback bits | 830 | * clear any dirty bits and don't set any writeback bits |
| 831 | * | ||
| 832 | * Do set the Private2 bit so we know this page was properly | ||
| 833 | * setup for writepage | ||
| 779 | */ | 834 | */ |
| 835 | op = unlock ? EXTENT_CLEAR_UNLOCK_PAGE : 0; | ||
| 836 | op |= EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | | ||
| 837 | EXTENT_SET_PRIVATE2; | ||
| 838 | |||
| 780 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | 839 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, |
| 781 | start, start + ram_size - 1, | 840 | start, start + ram_size - 1, |
| 782 | locked_page, unlock, 1, | 841 | locked_page, op); |
| 783 | 1, 0, 0, 0); | ||
| 784 | disk_num_bytes -= cur_alloc_size; | 842 | disk_num_bytes -= cur_alloc_size; |
| 785 | num_bytes -= cur_alloc_size; | 843 | num_bytes -= cur_alloc_size; |
| 786 | alloc_hint = ins.objectid + ins.offset; | 844 | alloc_hint = ins.objectid + ins.offset; |
| @@ -852,8 +910,8 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
| 852 | u64 cur_end; | 910 | u64 cur_end; |
| 853 | int limit = 10 * 1024 * 1042; | 911 | int limit = 10 * 1024 * 1042; |
| 854 | 912 | ||
| 855 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | | 913 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED, |
| 856 | EXTENT_DELALLOC, 1, 0, GFP_NOFS); | 914 | 1, 0, NULL, GFP_NOFS); |
| 857 | while (start < end) { | 915 | while (start < end) { |
| 858 | async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); | 916 | async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); |
| 859 | async_cow->inode = inode; | 917 | async_cow->inode = inode; |
| @@ -994,6 +1052,7 @@ next_slot: | |||
| 994 | 1052 | ||
| 995 | if (found_key.offset > cur_offset) { | 1053 | if (found_key.offset > cur_offset) { |
| 996 | extent_end = found_key.offset; | 1054 | extent_end = found_key.offset; |
| 1055 | extent_type = 0; | ||
| 997 | goto out_check; | 1056 | goto out_check; |
| 998 | } | 1057 | } |
| 999 | 1058 | ||
| @@ -1080,9 +1139,9 @@ out_check: | |||
| 1080 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 1139 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 1081 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 1140 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 1082 | while (1) { | 1141 | while (1) { |
| 1083 | spin_lock(&em_tree->lock); | 1142 | write_lock(&em_tree->lock); |
| 1084 | ret = add_extent_mapping(em_tree, em); | 1143 | ret = add_extent_mapping(em_tree, em); |
| 1085 | spin_unlock(&em_tree->lock); | 1144 | write_unlock(&em_tree->lock); |
| 1086 | if (ret != -EEXIST) { | 1145 | if (ret != -EEXIST) { |
| 1087 | free_extent_map(em); | 1146 | free_extent_map(em); |
| 1088 | break; | 1147 | break; |
| @@ -1100,8 +1159,10 @@ out_check: | |||
| 1100 | BUG_ON(ret); | 1159 | BUG_ON(ret); |
| 1101 | 1160 | ||
| 1102 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | 1161 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, |
| 1103 | cur_offset, cur_offset + num_bytes - 1, | 1162 | cur_offset, cur_offset + num_bytes - 1, |
| 1104 | locked_page, 1, 1, 1, 0, 0, 0); | 1163 | locked_page, EXTENT_CLEAR_UNLOCK_PAGE | |
| 1164 | EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | | ||
| 1165 | EXTENT_SET_PRIVATE2); | ||
| 1105 | cur_offset = extent_end; | 1166 | cur_offset = extent_end; |
| 1106 | if (cur_offset > end) | 1167 | if (cur_offset > end) |
| 1107 | break; | 1168 | break; |
| @@ -1147,6 +1208,89 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
| 1147 | return ret; | 1208 | return ret; |
| 1148 | } | 1209 | } |
| 1149 | 1210 | ||
| 1211 | static int btrfs_split_extent_hook(struct inode *inode, | ||
| 1212 | struct extent_state *orig, u64 split) | ||
| 1213 | { | ||
| 1214 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1215 | u64 size; | ||
| 1216 | |||
| 1217 | if (!(orig->state & EXTENT_DELALLOC)) | ||
| 1218 | return 0; | ||
| 1219 | |||
| 1220 | size = orig->end - orig->start + 1; | ||
| 1221 | if (size > root->fs_info->max_extent) { | ||
| 1222 | u64 num_extents; | ||
| 1223 | u64 new_size; | ||
| 1224 | |||
| 1225 | new_size = orig->end - split + 1; | ||
| 1226 | num_extents = div64_u64(size + root->fs_info->max_extent - 1, | ||
| 1227 | root->fs_info->max_extent); | ||
| 1228 | |||
| 1229 | /* | ||
| 1230 | * if we break a large extent up then leave oustanding_extents | ||
| 1231 | * be, since we've already accounted for the large extent. | ||
| 1232 | */ | ||
| 1233 | if (div64_u64(new_size + root->fs_info->max_extent - 1, | ||
| 1234 | root->fs_info->max_extent) < num_extents) | ||
| 1235 | return 0; | ||
| 1236 | } | ||
| 1237 | |||
| 1238 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 1239 | BTRFS_I(inode)->outstanding_extents++; | ||
| 1240 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 1241 | |||
| 1242 | return 0; | ||
| 1243 | } | ||
| 1244 | |||
| 1245 | /* | ||
| 1246 | * extent_io.c merge_extent_hook, used to track merged delayed allocation | ||
| 1247 | * extents so we can keep track of new extents that are just merged onto old | ||
| 1248 | * extents, such as when we are doing sequential writes, so we can properly | ||
| 1249 | * account for the metadata space we'll need. | ||
| 1250 | */ | ||
| 1251 | static int btrfs_merge_extent_hook(struct inode *inode, | ||
| 1252 | struct extent_state *new, | ||
| 1253 | struct extent_state *other) | ||
| 1254 | { | ||
| 1255 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1256 | u64 new_size, old_size; | ||
| 1257 | u64 num_extents; | ||
| 1258 | |||
| 1259 | /* not delalloc, ignore it */ | ||
| 1260 | if (!(other->state & EXTENT_DELALLOC)) | ||
| 1261 | return 0; | ||
| 1262 | |||
| 1263 | old_size = other->end - other->start + 1; | ||
| 1264 | if (new->start < other->start) | ||
| 1265 | new_size = other->end - new->start + 1; | ||
| 1266 | else | ||
| 1267 | new_size = new->end - other->start + 1; | ||
| 1268 | |||
| 1269 | /* we're not bigger than the max, unreserve the space and go */ | ||
| 1270 | if (new_size <= root->fs_info->max_extent) { | ||
| 1271 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 1272 | BTRFS_I(inode)->outstanding_extents--; | ||
| 1273 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 1274 | return 0; | ||
| 1275 | } | ||
| 1276 | |||
| 1277 | /* | ||
| 1278 | * If we grew by another max_extent, just return, we want to keep that | ||
| 1279 | * reserved amount. | ||
| 1280 | */ | ||
| 1281 | num_extents = div64_u64(old_size + root->fs_info->max_extent - 1, | ||
| 1282 | root->fs_info->max_extent); | ||
| 1283 | if (div64_u64(new_size + root->fs_info->max_extent - 1, | ||
| 1284 | root->fs_info->max_extent) > num_extents) | ||
| 1285 | return 0; | ||
| 1286 | |||
| 1287 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 1288 | BTRFS_I(inode)->outstanding_extents--; | ||
| 1289 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 1290 | |||
| 1291 | return 0; | ||
| 1292 | } | ||
| 1293 | |||
| 1150 | /* | 1294 | /* |
| 1151 | * extent_io.c set_bit_hook, used to track delayed allocation | 1295 | * extent_io.c set_bit_hook, used to track delayed allocation |
| 1152 | * bytes in this file, and to maintain the list of inodes that | 1296 | * bytes in this file, and to maintain the list of inodes that |
| @@ -1155,6 +1299,7 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
| 1155 | static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | 1299 | static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, |
| 1156 | unsigned long old, unsigned long bits) | 1300 | unsigned long old, unsigned long bits) |
| 1157 | { | 1301 | { |
| 1302 | |||
| 1158 | /* | 1303 | /* |
| 1159 | * set_bit and clear bit hooks normally require _irqsave/restore | 1304 | * set_bit and clear bit hooks normally require _irqsave/restore |
| 1160 | * but in this case, we are only testeing for the DELALLOC | 1305 | * but in this case, we are only testeing for the DELALLOC |
| @@ -1162,6 +1307,10 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
| 1162 | */ | 1307 | */ |
| 1163 | if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1308 | if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { |
| 1164 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1309 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1310 | |||
| 1311 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 1312 | BTRFS_I(inode)->outstanding_extents++; | ||
| 1313 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 1165 | btrfs_delalloc_reserve_space(root, inode, end - start + 1); | 1314 | btrfs_delalloc_reserve_space(root, inode, end - start + 1); |
| 1166 | spin_lock(&root->fs_info->delalloc_lock); | 1315 | spin_lock(&root->fs_info->delalloc_lock); |
| 1167 | BTRFS_I(inode)->delalloc_bytes += end - start + 1; | 1316 | BTRFS_I(inode)->delalloc_bytes += end - start + 1; |
| @@ -1178,22 +1327,31 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
| 1178 | /* | 1327 | /* |
| 1179 | * extent_io.c clear_bit_hook, see set_bit_hook for why | 1328 | * extent_io.c clear_bit_hook, see set_bit_hook for why |
| 1180 | */ | 1329 | */ |
| 1181 | static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, | 1330 | static int btrfs_clear_bit_hook(struct inode *inode, |
| 1182 | unsigned long old, unsigned long bits) | 1331 | struct extent_state *state, unsigned long bits) |
| 1183 | { | 1332 | { |
| 1184 | /* | 1333 | /* |
| 1185 | * set_bit and clear bit hooks normally require _irqsave/restore | 1334 | * set_bit and clear bit hooks normally require _irqsave/restore |
| 1186 | * but in this case, we are only testeing for the DELALLOC | 1335 | * but in this case, we are only testeing for the DELALLOC |
| 1187 | * bit, which is only set or cleared with irqs on | 1336 | * bit, which is only set or cleared with irqs on |
| 1188 | */ | 1337 | */ |
| 1189 | if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1338 | if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { |
| 1190 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1339 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1191 | 1340 | ||
| 1341 | if (bits & EXTENT_DO_ACCOUNTING) { | ||
| 1342 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 1343 | BTRFS_I(inode)->outstanding_extents--; | ||
| 1344 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 1345 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 1346 | } | ||
| 1347 | |||
| 1192 | spin_lock(&root->fs_info->delalloc_lock); | 1348 | spin_lock(&root->fs_info->delalloc_lock); |
| 1193 | if (end - start + 1 > root->fs_info->delalloc_bytes) { | 1349 | if (state->end - state->start + 1 > |
| 1350 | root->fs_info->delalloc_bytes) { | ||
| 1194 | printk(KERN_INFO "btrfs warning: delalloc account " | 1351 | printk(KERN_INFO "btrfs warning: delalloc account " |
| 1195 | "%llu %llu\n", | 1352 | "%llu %llu\n", |
| 1196 | (unsigned long long)end - start + 1, | 1353 | (unsigned long long) |
| 1354 | state->end - state->start + 1, | ||
| 1197 | (unsigned long long) | 1355 | (unsigned long long) |
| 1198 | root->fs_info->delalloc_bytes); | 1356 | root->fs_info->delalloc_bytes); |
| 1199 | btrfs_delalloc_free_space(root, inode, (u64)-1); | 1357 | btrfs_delalloc_free_space(root, inode, (u64)-1); |
| @@ -1201,9 +1359,12 @@ static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, | |||
| 1201 | BTRFS_I(inode)->delalloc_bytes = 0; | 1359 | BTRFS_I(inode)->delalloc_bytes = 0; |
| 1202 | } else { | 1360 | } else { |
| 1203 | btrfs_delalloc_free_space(root, inode, | 1361 | btrfs_delalloc_free_space(root, inode, |
| 1204 | end - start + 1); | 1362 | state->end - |
| 1205 | root->fs_info->delalloc_bytes -= end - start + 1; | 1363 | state->start + 1); |
| 1206 | BTRFS_I(inode)->delalloc_bytes -= end - start + 1; | 1364 | root->fs_info->delalloc_bytes -= state->end - |
| 1365 | state->start + 1; | ||
| 1366 | BTRFS_I(inode)->delalloc_bytes -= state->end - | ||
| 1367 | state->start + 1; | ||
| 1207 | } | 1368 | } |
| 1208 | if (BTRFS_I(inode)->delalloc_bytes == 0 && | 1369 | if (BTRFS_I(inode)->delalloc_bytes == 0 && |
| 1209 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1370 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { |
| @@ -1374,10 +1535,8 @@ again: | |||
| 1374 | lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); | 1535 | lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); |
| 1375 | 1536 | ||
| 1376 | /* already ordered? We're done */ | 1537 | /* already ordered? We're done */ |
| 1377 | if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, | 1538 | if (PagePrivate2(page)) |
| 1378 | EXTENT_ORDERED, 0)) { | ||
| 1379 | goto out; | 1539 | goto out; |
| 1380 | } | ||
| 1381 | 1540 | ||
| 1382 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 1541 | ordered = btrfs_lookup_ordered_extent(inode, page_start); |
| 1383 | if (ordered) { | 1542 | if (ordered) { |
| @@ -1413,11 +1572,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) | |||
| 1413 | struct inode *inode = page->mapping->host; | 1572 | struct inode *inode = page->mapping->host; |
| 1414 | struct btrfs_writepage_fixup *fixup; | 1573 | struct btrfs_writepage_fixup *fixup; |
| 1415 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1574 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1416 | int ret; | ||
| 1417 | 1575 | ||
| 1418 | ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end, | 1576 | /* this page is properly in the ordered list */ |
| 1419 | EXTENT_ORDERED, 0); | 1577 | if (TestClearPagePrivate2(page)) |
| 1420 | if (ret) | ||
| 1421 | return 0; | 1578 | return 0; |
| 1422 | 1579 | ||
| 1423 | if (PageChecked(page)) | 1580 | if (PageChecked(page)) |
| @@ -1455,9 +1612,19 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
| 1455 | BUG_ON(!path); | 1612 | BUG_ON(!path); |
| 1456 | 1613 | ||
| 1457 | path->leave_spinning = 1; | 1614 | path->leave_spinning = 1; |
| 1615 | |||
| 1616 | /* | ||
| 1617 | * we may be replacing one extent in the tree with another. | ||
| 1618 | * The new extent is pinned in the extent map, and we don't want | ||
| 1619 | * to drop it from the cache until it is completely in the btree. | ||
| 1620 | * | ||
| 1621 | * So, tell btrfs_drop_extents to leave this extent in the cache. | ||
| 1622 | * the caller is expected to unpin it and allow it to be merged | ||
| 1623 | * with the others. | ||
| 1624 | */ | ||
| 1458 | ret = btrfs_drop_extents(trans, root, inode, file_pos, | 1625 | ret = btrfs_drop_extents(trans, root, inode, file_pos, |
| 1459 | file_pos + num_bytes, locked_end, | 1626 | file_pos + num_bytes, locked_end, |
| 1460 | file_pos, &hint); | 1627 | file_pos, &hint, 0); |
| 1461 | BUG_ON(ret); | 1628 | BUG_ON(ret); |
| 1462 | 1629 | ||
| 1463 | ins.objectid = inode->i_ino; | 1630 | ins.objectid = inode->i_ino; |
| @@ -1485,7 +1652,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
| 1485 | btrfs_mark_buffer_dirty(leaf); | 1652 | btrfs_mark_buffer_dirty(leaf); |
| 1486 | 1653 | ||
| 1487 | inode_add_bytes(inode, num_bytes); | 1654 | inode_add_bytes(inode, num_bytes); |
| 1488 | btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0); | ||
| 1489 | 1655 | ||
| 1490 | ins.objectid = disk_bytenr; | 1656 | ins.objectid = disk_bytenr; |
| 1491 | ins.offset = disk_num_bytes; | 1657 | ins.offset = disk_num_bytes; |
| @@ -1596,6 +1762,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
| 1596 | ordered_extent->len, | 1762 | ordered_extent->len, |
| 1597 | compressed, 0, 0, | 1763 | compressed, 0, 0, |
| 1598 | BTRFS_FILE_EXTENT_REG); | 1764 | BTRFS_FILE_EXTENT_REG); |
| 1765 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | ||
| 1766 | ordered_extent->file_offset, | ||
| 1767 | ordered_extent->len); | ||
| 1599 | BUG_ON(ret); | 1768 | BUG_ON(ret); |
| 1600 | } | 1769 | } |
| 1601 | unlock_extent(io_tree, ordered_extent->file_offset, | 1770 | unlock_extent(io_tree, ordered_extent->file_offset, |
| @@ -1623,6 +1792,7 @@ nocow: | |||
| 1623 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | 1792 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, |
| 1624 | struct extent_state *state, int uptodate) | 1793 | struct extent_state *state, int uptodate) |
| 1625 | { | 1794 | { |
| 1795 | ClearPagePrivate2(page); | ||
| 1626 | return btrfs_finish_ordered_io(page->mapping->host, start, end); | 1796 | return btrfs_finish_ordered_io(page->mapping->host, start, end); |
| 1627 | } | 1797 | } |
| 1628 | 1798 | ||
| @@ -1669,13 +1839,13 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
| 1669 | failrec->last_mirror = 0; | 1839 | failrec->last_mirror = 0; |
| 1670 | failrec->bio_flags = 0; | 1840 | failrec->bio_flags = 0; |
| 1671 | 1841 | ||
| 1672 | spin_lock(&em_tree->lock); | 1842 | read_lock(&em_tree->lock); |
| 1673 | em = lookup_extent_mapping(em_tree, start, failrec->len); | 1843 | em = lookup_extent_mapping(em_tree, start, failrec->len); |
| 1674 | if (em->start > start || em->start + em->len < start) { | 1844 | if (em->start > start || em->start + em->len < start) { |
| 1675 | free_extent_map(em); | 1845 | free_extent_map(em); |
| 1676 | em = NULL; | 1846 | em = NULL; |
| 1677 | } | 1847 | } |
| 1678 | spin_unlock(&em_tree->lock); | 1848 | read_unlock(&em_tree->lock); |
| 1679 | 1849 | ||
| 1680 | if (!em || IS_ERR(em)) { | 1850 | if (!em || IS_ERR(em)) { |
| 1681 | kfree(failrec); | 1851 | kfree(failrec); |
| @@ -1794,7 +1964,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
| 1794 | return 0; | 1964 | return 0; |
| 1795 | 1965 | ||
| 1796 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && | 1966 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && |
| 1797 | test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) { | 1967 | test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { |
| 1798 | clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM, | 1968 | clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM, |
| 1799 | GFP_NOFS); | 1969 | GFP_NOFS); |
| 1800 | return 0; | 1970 | return 0; |
| @@ -2333,7 +2503,19 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
| 2333 | 2503 | ||
| 2334 | root = BTRFS_I(dir)->root; | 2504 | root = BTRFS_I(dir)->root; |
| 2335 | 2505 | ||
| 2506 | /* | ||
| 2507 | * 5 items for unlink inode | ||
| 2508 | * 1 for orphan | ||
| 2509 | */ | ||
| 2510 | ret = btrfs_reserve_metadata_space(root, 6); | ||
| 2511 | if (ret) | ||
| 2512 | return ret; | ||
| 2513 | |||
| 2336 | trans = btrfs_start_transaction(root, 1); | 2514 | trans = btrfs_start_transaction(root, 1); |
| 2515 | if (IS_ERR(trans)) { | ||
| 2516 | btrfs_unreserve_metadata_space(root, 6); | ||
| 2517 | return PTR_ERR(trans); | ||
| 2518 | } | ||
| 2337 | 2519 | ||
| 2338 | btrfs_set_trans_block_group(trans, dir); | 2520 | btrfs_set_trans_block_group(trans, dir); |
| 2339 | 2521 | ||
| @@ -2348,10 +2530,74 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
| 2348 | nr = trans->blocks_used; | 2530 | nr = trans->blocks_used; |
| 2349 | 2531 | ||
| 2350 | btrfs_end_transaction_throttle(trans, root); | 2532 | btrfs_end_transaction_throttle(trans, root); |
| 2533 | btrfs_unreserve_metadata_space(root, 6); | ||
| 2351 | btrfs_btree_balance_dirty(root, nr); | 2534 | btrfs_btree_balance_dirty(root, nr); |
| 2352 | return ret; | 2535 | return ret; |
| 2353 | } | 2536 | } |
| 2354 | 2537 | ||
| 2538 | int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | ||
| 2539 | struct btrfs_root *root, | ||
| 2540 | struct inode *dir, u64 objectid, | ||
| 2541 | const char *name, int name_len) | ||
| 2542 | { | ||
| 2543 | struct btrfs_path *path; | ||
| 2544 | struct extent_buffer *leaf; | ||
| 2545 | struct btrfs_dir_item *di; | ||
| 2546 | struct btrfs_key key; | ||
| 2547 | u64 index; | ||
| 2548 | int ret; | ||
| 2549 | |||
| 2550 | path = btrfs_alloc_path(); | ||
| 2551 | if (!path) | ||
| 2552 | return -ENOMEM; | ||
| 2553 | |||
| 2554 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | ||
| 2555 | name, name_len, -1); | ||
| 2556 | BUG_ON(!di || IS_ERR(di)); | ||
| 2557 | |||
| 2558 | leaf = path->nodes[0]; | ||
| 2559 | btrfs_dir_item_key_to_cpu(leaf, di, &key); | ||
| 2560 | WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); | ||
| 2561 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | ||
| 2562 | BUG_ON(ret); | ||
| 2563 | btrfs_release_path(root, path); | ||
| 2564 | |||
| 2565 | ret = btrfs_del_root_ref(trans, root->fs_info->tree_root, | ||
| 2566 | objectid, root->root_key.objectid, | ||
| 2567 | dir->i_ino, &index, name, name_len); | ||
| 2568 | if (ret < 0) { | ||
| 2569 | BUG_ON(ret != -ENOENT); | ||
| 2570 | di = btrfs_search_dir_index_item(root, path, dir->i_ino, | ||
| 2571 | name, name_len); | ||
| 2572 | BUG_ON(!di || IS_ERR(di)); | ||
| 2573 | |||
| 2574 | leaf = path->nodes[0]; | ||
| 2575 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
| 2576 | btrfs_release_path(root, path); | ||
| 2577 | index = key.offset; | ||
| 2578 | } | ||
| 2579 | |||
| 2580 | di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, | ||
| 2581 | index, name, name_len, -1); | ||
| 2582 | BUG_ON(!di || IS_ERR(di)); | ||
| 2583 | |||
| 2584 | leaf = path->nodes[0]; | ||
| 2585 | btrfs_dir_item_key_to_cpu(leaf, di, &key); | ||
| 2586 | WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); | ||
| 2587 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | ||
| 2588 | BUG_ON(ret); | ||
| 2589 | btrfs_release_path(root, path); | ||
| 2590 | |||
| 2591 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); | ||
| 2592 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | ||
| 2593 | ret = btrfs_update_inode(trans, root, dir); | ||
| 2594 | BUG_ON(ret); | ||
| 2595 | dir->i_sb->s_dirt = 1; | ||
| 2596 | |||
| 2597 | btrfs_free_path(path); | ||
| 2598 | return 0; | ||
| 2599 | } | ||
| 2600 | |||
| 2355 | static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | 2601 | static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) |
| 2356 | { | 2602 | { |
| 2357 | struct inode *inode = dentry->d_inode; | 2603 | struct inode *inode = dentry->d_inode; |
| @@ -2361,31 +2607,43 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 2361 | struct btrfs_trans_handle *trans; | 2607 | struct btrfs_trans_handle *trans; |
| 2362 | unsigned long nr = 0; | 2608 | unsigned long nr = 0; |
| 2363 | 2609 | ||
| 2364 | /* | ||
| 2365 | * the FIRST_FREE_OBJECTID check makes sure we don't try to rmdir | ||
| 2366 | * the root of a subvolume or snapshot | ||
| 2367 | */ | ||
| 2368 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || | 2610 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || |
| 2369 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { | 2611 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
| 2370 | return -ENOTEMPTY; | 2612 | return -ENOTEMPTY; |
| 2371 | } | 2613 | |
| 2614 | ret = btrfs_reserve_metadata_space(root, 5); | ||
| 2615 | if (ret) | ||
| 2616 | return ret; | ||
| 2372 | 2617 | ||
| 2373 | trans = btrfs_start_transaction(root, 1); | 2618 | trans = btrfs_start_transaction(root, 1); |
| 2619 | if (IS_ERR(trans)) { | ||
| 2620 | btrfs_unreserve_metadata_space(root, 5); | ||
| 2621 | return PTR_ERR(trans); | ||
| 2622 | } | ||
| 2623 | |||
| 2374 | btrfs_set_trans_block_group(trans, dir); | 2624 | btrfs_set_trans_block_group(trans, dir); |
| 2375 | 2625 | ||
| 2626 | if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { | ||
| 2627 | err = btrfs_unlink_subvol(trans, root, dir, | ||
| 2628 | BTRFS_I(inode)->location.objectid, | ||
| 2629 | dentry->d_name.name, | ||
| 2630 | dentry->d_name.len); | ||
| 2631 | goto out; | ||
| 2632 | } | ||
| 2633 | |||
| 2376 | err = btrfs_orphan_add(trans, inode); | 2634 | err = btrfs_orphan_add(trans, inode); |
| 2377 | if (err) | 2635 | if (err) |
| 2378 | goto fail_trans; | 2636 | goto out; |
| 2379 | 2637 | ||
| 2380 | /* now the directory is empty */ | 2638 | /* now the directory is empty */ |
| 2381 | err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | 2639 | err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, |
| 2382 | dentry->d_name.name, dentry->d_name.len); | 2640 | dentry->d_name.name, dentry->d_name.len); |
| 2383 | if (!err) | 2641 | if (!err) |
| 2384 | btrfs_i_size_write(inode, 0); | 2642 | btrfs_i_size_write(inode, 0); |
| 2385 | 2643 | out: | |
| 2386 | fail_trans: | ||
| 2387 | nr = trans->blocks_used; | 2644 | nr = trans->blocks_used; |
| 2388 | ret = btrfs_end_transaction_throttle(trans, root); | 2645 | ret = btrfs_end_transaction_throttle(trans, root); |
| 2646 | btrfs_unreserve_metadata_space(root, 5); | ||
| 2389 | btrfs_btree_balance_dirty(root, nr); | 2647 | btrfs_btree_balance_dirty(root, nr); |
| 2390 | 2648 | ||
| 2391 | if (ret && !err) | 2649 | if (ret && !err) |
| @@ -2826,12 +3084,22 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
| 2826 | 3084 | ||
| 2827 | if ((offset & (blocksize - 1)) == 0) | 3085 | if ((offset & (blocksize - 1)) == 0) |
| 2828 | goto out; | 3086 | goto out; |
| 3087 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); | ||
| 3088 | if (ret) | ||
| 3089 | goto out; | ||
| 3090 | |||
| 3091 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
| 3092 | if (ret) | ||
| 3093 | goto out; | ||
| 2829 | 3094 | ||
| 2830 | ret = -ENOMEM; | 3095 | ret = -ENOMEM; |
| 2831 | again: | 3096 | again: |
| 2832 | page = grab_cache_page(mapping, index); | 3097 | page = grab_cache_page(mapping, index); |
| 2833 | if (!page) | 3098 | if (!page) { |
| 3099 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
| 3100 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 2834 | goto out; | 3101 | goto out; |
| 3102 | } | ||
| 2835 | 3103 | ||
| 2836 | page_start = page_offset(page); | 3104 | page_start = page_offset(page); |
| 2837 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 3105 | page_end = page_start + PAGE_CACHE_SIZE - 1; |
| @@ -2864,7 +3132,16 @@ again: | |||
| 2864 | goto again; | 3132 | goto again; |
| 2865 | } | 3133 | } |
| 2866 | 3134 | ||
| 2867 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 3135 | clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, |
| 3136 | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, | ||
| 3137 | GFP_NOFS); | ||
| 3138 | |||
| 3139 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end); | ||
| 3140 | if (ret) { | ||
| 3141 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 3142 | goto out_unlock; | ||
| 3143 | } | ||
| 3144 | |||
| 2868 | ret = 0; | 3145 | ret = 0; |
| 2869 | if (offset != PAGE_CACHE_SIZE) { | 3146 | if (offset != PAGE_CACHE_SIZE) { |
| 2870 | kaddr = kmap(page); | 3147 | kaddr = kmap(page); |
| @@ -2877,6 +3154,9 @@ again: | |||
| 2877 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 3154 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); |
| 2878 | 3155 | ||
| 2879 | out_unlock: | 3156 | out_unlock: |
| 3157 | if (ret) | ||
| 3158 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
| 3159 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 2880 | unlock_page(page); | 3160 | unlock_page(page); |
| 2881 | page_cache_release(page); | 3161 | page_cache_release(page); |
| 2882 | out: | 3162 | out: |
| @@ -2895,17 +3175,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
| 2895 | u64 last_byte; | 3175 | u64 last_byte; |
| 2896 | u64 cur_offset; | 3176 | u64 cur_offset; |
| 2897 | u64 hole_size; | 3177 | u64 hole_size; |
| 2898 | int err; | 3178 | int err = 0; |
| 2899 | 3179 | ||
| 2900 | if (size <= hole_start) | 3180 | if (size <= hole_start) |
| 2901 | return 0; | 3181 | return 0; |
| 2902 | 3182 | ||
| 2903 | err = btrfs_check_metadata_free_space(root); | 3183 | err = btrfs_truncate_page(inode->i_mapping, inode->i_size); |
| 2904 | if (err) | 3184 | if (err) |
| 2905 | return err; | 3185 | return err; |
| 2906 | 3186 | ||
| 2907 | btrfs_truncate_page(inode->i_mapping, inode->i_size); | ||
| 2908 | |||
| 2909 | while (1) { | 3187 | while (1) { |
| 2910 | struct btrfs_ordered_extent *ordered; | 3188 | struct btrfs_ordered_extent *ordered; |
| 2911 | btrfs_wait_ordered_range(inode, hole_start, | 3189 | btrfs_wait_ordered_range(inode, hole_start, |
| @@ -2935,15 +3213,21 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
| 2935 | cur_offset, | 3213 | cur_offset, |
| 2936 | cur_offset + hole_size, | 3214 | cur_offset + hole_size, |
| 2937 | block_end, | 3215 | block_end, |
| 2938 | cur_offset, &hint_byte); | 3216 | cur_offset, &hint_byte, 1); |
| 2939 | if (err) | 3217 | if (err) |
| 2940 | break; | 3218 | break; |
| 3219 | |||
| 3220 | err = btrfs_reserve_metadata_space(root, 1); | ||
| 3221 | if (err) | ||
| 3222 | break; | ||
| 3223 | |||
| 2941 | err = btrfs_insert_file_extent(trans, root, | 3224 | err = btrfs_insert_file_extent(trans, root, |
| 2942 | inode->i_ino, cur_offset, 0, | 3225 | inode->i_ino, cur_offset, 0, |
| 2943 | 0, hole_size, 0, hole_size, | 3226 | 0, hole_size, 0, hole_size, |
| 2944 | 0, 0, 0); | 3227 | 0, 0, 0); |
| 2945 | btrfs_drop_extent_cache(inode, hole_start, | 3228 | btrfs_drop_extent_cache(inode, hole_start, |
| 2946 | last_byte - 1, 0); | 3229 | last_byte - 1, 0); |
| 3230 | btrfs_unreserve_metadata_space(root, 1); | ||
| 2947 | } | 3231 | } |
| 2948 | free_extent_map(em); | 3232 | free_extent_map(em); |
| 2949 | cur_offset = last_byte; | 3233 | cur_offset = last_byte; |
| @@ -3003,6 +3287,11 @@ void btrfs_delete_inode(struct inode *inode) | |||
| 3003 | } | 3287 | } |
| 3004 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 3288 | btrfs_wait_ordered_range(inode, 0, (u64)-1); |
| 3005 | 3289 | ||
| 3290 | if (inode->i_nlink > 0) { | ||
| 3291 | BUG_ON(btrfs_root_refs(&root->root_item) != 0); | ||
| 3292 | goto no_delete; | ||
| 3293 | } | ||
| 3294 | |||
| 3006 | btrfs_i_size_write(inode, 0); | 3295 | btrfs_i_size_write(inode, 0); |
| 3007 | trans = btrfs_join_transaction(root, 1); | 3296 | trans = btrfs_join_transaction(root, 1); |
| 3008 | 3297 | ||
| @@ -3070,29 +3359,67 @@ out_err: | |||
| 3070 | * is kind of like crossing a mount point. | 3359 | * is kind of like crossing a mount point. |
| 3071 | */ | 3360 | */ |
| 3072 | static int fixup_tree_root_location(struct btrfs_root *root, | 3361 | static int fixup_tree_root_location(struct btrfs_root *root, |
| 3073 | struct btrfs_key *location, | 3362 | struct inode *dir, |
| 3074 | struct btrfs_root **sub_root, | 3363 | struct dentry *dentry, |
| 3075 | struct dentry *dentry) | 3364 | struct btrfs_key *location, |
| 3365 | struct btrfs_root **sub_root) | ||
| 3076 | { | 3366 | { |
| 3077 | struct btrfs_root_item *ri; | 3367 | struct btrfs_path *path; |
| 3368 | struct btrfs_root *new_root; | ||
| 3369 | struct btrfs_root_ref *ref; | ||
| 3370 | struct extent_buffer *leaf; | ||
| 3371 | int ret; | ||
| 3372 | int err = 0; | ||
| 3078 | 3373 | ||
| 3079 | if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY) | 3374 | path = btrfs_alloc_path(); |
| 3080 | return 0; | 3375 | if (!path) { |
| 3081 | if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) | 3376 | err = -ENOMEM; |
| 3082 | return 0; | 3377 | goto out; |
| 3378 | } | ||
| 3083 | 3379 | ||
| 3084 | *sub_root = btrfs_read_fs_root(root->fs_info, location, | 3380 | err = -ENOENT; |
| 3085 | dentry->d_name.name, | 3381 | ret = btrfs_find_root_ref(root->fs_info->tree_root, path, |
| 3086 | dentry->d_name.len); | 3382 | BTRFS_I(dir)->root->root_key.objectid, |
| 3087 | if (IS_ERR(*sub_root)) | 3383 | location->objectid); |
| 3088 | return PTR_ERR(*sub_root); | 3384 | if (ret) { |
| 3385 | if (ret < 0) | ||
| 3386 | err = ret; | ||
| 3387 | goto out; | ||
| 3388 | } | ||
| 3089 | 3389 | ||
| 3090 | ri = &(*sub_root)->root_item; | 3390 | leaf = path->nodes[0]; |
| 3091 | location->objectid = btrfs_root_dirid(ri); | 3391 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); |
| 3092 | btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); | 3392 | if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino || |
| 3093 | location->offset = 0; | 3393 | btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len) |
| 3394 | goto out; | ||
| 3094 | 3395 | ||
| 3095 | return 0; | 3396 | ret = memcmp_extent_buffer(leaf, dentry->d_name.name, |
| 3397 | (unsigned long)(ref + 1), | ||
| 3398 | dentry->d_name.len); | ||
| 3399 | if (ret) | ||
| 3400 | goto out; | ||
| 3401 | |||
| 3402 | btrfs_release_path(root->fs_info->tree_root, path); | ||
| 3403 | |||
| 3404 | new_root = btrfs_read_fs_root_no_name(root->fs_info, location); | ||
| 3405 | if (IS_ERR(new_root)) { | ||
| 3406 | err = PTR_ERR(new_root); | ||
| 3407 | goto out; | ||
| 3408 | } | ||
| 3409 | |||
| 3410 | if (btrfs_root_refs(&new_root->root_item) == 0) { | ||
| 3411 | err = -ENOENT; | ||
| 3412 | goto out; | ||
| 3413 | } | ||
| 3414 | |||
| 3415 | *sub_root = new_root; | ||
| 3416 | location->objectid = btrfs_root_dirid(&new_root->root_item); | ||
| 3417 | location->type = BTRFS_INODE_ITEM_KEY; | ||
| 3418 | location->offset = 0; | ||
| 3419 | err = 0; | ||
| 3420 | out: | ||
| 3421 | btrfs_free_path(path); | ||
| 3422 | return err; | ||
| 3096 | } | 3423 | } |
| 3097 | 3424 | ||
| 3098 | static void inode_tree_add(struct inode *inode) | 3425 | static void inode_tree_add(struct inode *inode) |
| @@ -3101,11 +3428,13 @@ static void inode_tree_add(struct inode *inode) | |||
| 3101 | struct btrfs_inode *entry; | 3428 | struct btrfs_inode *entry; |
| 3102 | struct rb_node **p; | 3429 | struct rb_node **p; |
| 3103 | struct rb_node *parent; | 3430 | struct rb_node *parent; |
| 3104 | |||
| 3105 | again: | 3431 | again: |
| 3106 | p = &root->inode_tree.rb_node; | 3432 | p = &root->inode_tree.rb_node; |
| 3107 | parent = NULL; | 3433 | parent = NULL; |
| 3108 | 3434 | ||
| 3435 | if (hlist_unhashed(&inode->i_hash)) | ||
| 3436 | return; | ||
| 3437 | |||
| 3109 | spin_lock(&root->inode_lock); | 3438 | spin_lock(&root->inode_lock); |
| 3110 | while (*p) { | 3439 | while (*p) { |
| 3111 | parent = *p; | 3440 | parent = *p; |
| @@ -3132,13 +3461,87 @@ again: | |||
| 3132 | static void inode_tree_del(struct inode *inode) | 3461 | static void inode_tree_del(struct inode *inode) |
| 3133 | { | 3462 | { |
| 3134 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3463 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 3464 | int empty = 0; | ||
| 3135 | 3465 | ||
| 3136 | spin_lock(&root->inode_lock); | 3466 | spin_lock(&root->inode_lock); |
| 3137 | if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { | 3467 | if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { |
| 3138 | rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); | 3468 | rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); |
| 3139 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); | 3469 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); |
| 3470 | empty = RB_EMPTY_ROOT(&root->inode_tree); | ||
| 3471 | } | ||
| 3472 | spin_unlock(&root->inode_lock); | ||
| 3473 | |||
| 3474 | if (empty && btrfs_root_refs(&root->root_item) == 0) { | ||
| 3475 | synchronize_srcu(&root->fs_info->subvol_srcu); | ||
| 3476 | spin_lock(&root->inode_lock); | ||
| 3477 | empty = RB_EMPTY_ROOT(&root->inode_tree); | ||
| 3478 | spin_unlock(&root->inode_lock); | ||
| 3479 | if (empty) | ||
| 3480 | btrfs_add_dead_root(root); | ||
| 3481 | } | ||
| 3482 | } | ||
| 3483 | |||
| 3484 | int btrfs_invalidate_inodes(struct btrfs_root *root) | ||
| 3485 | { | ||
| 3486 | struct rb_node *node; | ||
| 3487 | struct rb_node *prev; | ||
| 3488 | struct btrfs_inode *entry; | ||
| 3489 | struct inode *inode; | ||
| 3490 | u64 objectid = 0; | ||
| 3491 | |||
| 3492 | WARN_ON(btrfs_root_refs(&root->root_item) != 0); | ||
| 3493 | |||
| 3494 | spin_lock(&root->inode_lock); | ||
| 3495 | again: | ||
| 3496 | node = root->inode_tree.rb_node; | ||
| 3497 | prev = NULL; | ||
| 3498 | while (node) { | ||
| 3499 | prev = node; | ||
| 3500 | entry = rb_entry(node, struct btrfs_inode, rb_node); | ||
| 3501 | |||
| 3502 | if (objectid < entry->vfs_inode.i_ino) | ||
| 3503 | node = node->rb_left; | ||
| 3504 | else if (objectid > entry->vfs_inode.i_ino) | ||
| 3505 | node = node->rb_right; | ||
| 3506 | else | ||
| 3507 | break; | ||
| 3508 | } | ||
| 3509 | if (!node) { | ||
| 3510 | while (prev) { | ||
| 3511 | entry = rb_entry(prev, struct btrfs_inode, rb_node); | ||
| 3512 | if (objectid <= entry->vfs_inode.i_ino) { | ||
| 3513 | node = prev; | ||
| 3514 | break; | ||
| 3515 | } | ||
| 3516 | prev = rb_next(prev); | ||
| 3517 | } | ||
| 3518 | } | ||
| 3519 | while (node) { | ||
| 3520 | entry = rb_entry(node, struct btrfs_inode, rb_node); | ||
| 3521 | objectid = entry->vfs_inode.i_ino + 1; | ||
| 3522 | inode = igrab(&entry->vfs_inode); | ||
| 3523 | if (inode) { | ||
| 3524 | spin_unlock(&root->inode_lock); | ||
| 3525 | if (atomic_read(&inode->i_count) > 1) | ||
| 3526 | d_prune_aliases(inode); | ||
| 3527 | /* | ||
| 3528 | * btrfs_drop_inode will remove it from | ||
| 3529 | * the inode cache when its usage count | ||
| 3530 | * hits zero. | ||
| 3531 | */ | ||
| 3532 | iput(inode); | ||
| 3533 | cond_resched(); | ||
| 3534 | spin_lock(&root->inode_lock); | ||
| 3535 | goto again; | ||
| 3536 | } | ||
| 3537 | |||
| 3538 | if (cond_resched_lock(&root->inode_lock)) | ||
| 3539 | goto again; | ||
| 3540 | |||
| 3541 | node = rb_next(node); | ||
| 3140 | } | 3542 | } |
| 3141 | spin_unlock(&root->inode_lock); | 3543 | spin_unlock(&root->inode_lock); |
| 3544 | return 0; | ||
| 3142 | } | 3545 | } |
| 3143 | 3546 | ||
| 3144 | static noinline void init_btrfs_i(struct inode *inode) | 3547 | static noinline void init_btrfs_i(struct inode *inode) |
| @@ -3148,6 +3551,7 @@ static noinline void init_btrfs_i(struct inode *inode) | |||
| 3148 | bi->generation = 0; | 3551 | bi->generation = 0; |
| 3149 | bi->sequence = 0; | 3552 | bi->sequence = 0; |
| 3150 | bi->last_trans = 0; | 3553 | bi->last_trans = 0; |
| 3554 | bi->last_sub_trans = 0; | ||
| 3151 | bi->logged_trans = 0; | 3555 | bi->logged_trans = 0; |
| 3152 | bi->delalloc_bytes = 0; | 3556 | bi->delalloc_bytes = 0; |
| 3153 | bi->reserved_bytes = 0; | 3557 | bi->reserved_bytes = 0; |
| @@ -3225,15 +3629,41 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | |||
| 3225 | return inode; | 3629 | return inode; |
| 3226 | } | 3630 | } |
| 3227 | 3631 | ||
| 3632 | static struct inode *new_simple_dir(struct super_block *s, | ||
| 3633 | struct btrfs_key *key, | ||
| 3634 | struct btrfs_root *root) | ||
| 3635 | { | ||
| 3636 | struct inode *inode = new_inode(s); | ||
| 3637 | |||
| 3638 | if (!inode) | ||
| 3639 | return ERR_PTR(-ENOMEM); | ||
| 3640 | |||
| 3641 | init_btrfs_i(inode); | ||
| 3642 | |||
| 3643 | BTRFS_I(inode)->root = root; | ||
| 3644 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); | ||
| 3645 | BTRFS_I(inode)->dummy_inode = 1; | ||
| 3646 | |||
| 3647 | inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; | ||
| 3648 | inode->i_op = &simple_dir_inode_operations; | ||
| 3649 | inode->i_fop = &simple_dir_operations; | ||
| 3650 | inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; | ||
| 3651 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
| 3652 | |||
| 3653 | return inode; | ||
| 3654 | } | ||
| 3655 | |||
| 3228 | struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | 3656 | struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) |
| 3229 | { | 3657 | { |
| 3230 | struct inode *inode; | 3658 | struct inode *inode; |
| 3231 | struct btrfs_inode *bi = BTRFS_I(dir); | 3659 | struct btrfs_root *root = BTRFS_I(dir)->root; |
| 3232 | struct btrfs_root *root = bi->root; | ||
| 3233 | struct btrfs_root *sub_root = root; | 3660 | struct btrfs_root *sub_root = root; |
| 3234 | struct btrfs_key location; | 3661 | struct btrfs_key location; |
| 3662 | int index; | ||
| 3235 | int ret; | 3663 | int ret; |
| 3236 | 3664 | ||
| 3665 | dentry->d_op = &btrfs_dentry_operations; | ||
| 3666 | |||
| 3237 | if (dentry->d_name.len > BTRFS_NAME_LEN) | 3667 | if (dentry->d_name.len > BTRFS_NAME_LEN) |
| 3238 | return ERR_PTR(-ENAMETOOLONG); | 3668 | return ERR_PTR(-ENAMETOOLONG); |
| 3239 | 3669 | ||
| @@ -3242,29 +3672,52 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
| 3242 | if (ret < 0) | 3672 | if (ret < 0) |
| 3243 | return ERR_PTR(ret); | 3673 | return ERR_PTR(ret); |
| 3244 | 3674 | ||
| 3245 | inode = NULL; | 3675 | if (location.objectid == 0) |
| 3246 | if (location.objectid) { | 3676 | return NULL; |
| 3247 | ret = fixup_tree_root_location(root, &location, &sub_root, | 3677 | |
| 3248 | dentry); | 3678 | if (location.type == BTRFS_INODE_ITEM_KEY) { |
| 3249 | if (ret < 0) | 3679 | inode = btrfs_iget(dir->i_sb, &location, root); |
| 3250 | return ERR_PTR(ret); | 3680 | return inode; |
| 3251 | if (ret > 0) | 3681 | } |
| 3252 | return ERR_PTR(-ENOENT); | 3682 | |
| 3683 | BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY); | ||
| 3684 | |||
| 3685 | index = srcu_read_lock(&root->fs_info->subvol_srcu); | ||
| 3686 | ret = fixup_tree_root_location(root, dir, dentry, | ||
| 3687 | &location, &sub_root); | ||
| 3688 | if (ret < 0) { | ||
| 3689 | if (ret != -ENOENT) | ||
| 3690 | inode = ERR_PTR(ret); | ||
| 3691 | else | ||
| 3692 | inode = new_simple_dir(dir->i_sb, &location, sub_root); | ||
| 3693 | } else { | ||
| 3253 | inode = btrfs_iget(dir->i_sb, &location, sub_root); | 3694 | inode = btrfs_iget(dir->i_sb, &location, sub_root); |
| 3254 | if (IS_ERR(inode)) | ||
| 3255 | return ERR_CAST(inode); | ||
| 3256 | } | 3695 | } |
| 3696 | srcu_read_unlock(&root->fs_info->subvol_srcu, index); | ||
| 3697 | |||
| 3257 | return inode; | 3698 | return inode; |
| 3258 | } | 3699 | } |
| 3259 | 3700 | ||
| 3701 | static int btrfs_dentry_delete(struct dentry *dentry) | ||
| 3702 | { | ||
| 3703 | struct btrfs_root *root; | ||
| 3704 | |||
| 3705 | if (!dentry->d_inode && !IS_ROOT(dentry)) | ||
| 3706 | dentry = dentry->d_parent; | ||
| 3707 | |||
| 3708 | if (dentry->d_inode) { | ||
| 3709 | root = BTRFS_I(dentry->d_inode)->root; | ||
| 3710 | if (btrfs_root_refs(&root->root_item) == 0) | ||
| 3711 | return 1; | ||
| 3712 | } | ||
| 3713 | return 0; | ||
| 3714 | } | ||
| 3715 | |||
| 3260 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, | 3716 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, |
| 3261 | struct nameidata *nd) | 3717 | struct nameidata *nd) |
| 3262 | { | 3718 | { |
| 3263 | struct inode *inode; | 3719 | struct inode *inode; |
| 3264 | 3720 | ||
| 3265 | if (dentry->d_name.len > BTRFS_NAME_LEN) | ||
| 3266 | return ERR_PTR(-ENAMETOOLONG); | ||
| 3267 | |||
| 3268 | inode = btrfs_lookup_dentry(dir, dentry); | 3721 | inode = btrfs_lookup_dentry(dir, dentry); |
| 3269 | if (IS_ERR(inode)) | 3722 | if (IS_ERR(inode)) |
| 3270 | return ERR_CAST(inode); | 3723 | return ERR_CAST(inode); |
| @@ -3603,9 +4056,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
| 3603 | if (ret != 0) | 4056 | if (ret != 0) |
| 3604 | goto fail; | 4057 | goto fail; |
| 3605 | 4058 | ||
| 3606 | if (objectid > root->highest_inode) | ||
| 3607 | root->highest_inode = objectid; | ||
| 3608 | |||
| 3609 | inode->i_uid = current_fsuid(); | 4059 | inode->i_uid = current_fsuid(); |
| 3610 | 4060 | ||
| 3611 | if (dir && (dir->i_mode & S_ISGID)) { | 4061 | if (dir && (dir->i_mode & S_ISGID)) { |
| @@ -3673,26 +4123,35 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, | |||
| 3673 | struct inode *parent_inode, struct inode *inode, | 4123 | struct inode *parent_inode, struct inode *inode, |
| 3674 | const char *name, int name_len, int add_backref, u64 index) | 4124 | const char *name, int name_len, int add_backref, u64 index) |
| 3675 | { | 4125 | { |
| 3676 | int ret; | 4126 | int ret = 0; |
| 3677 | struct btrfs_key key; | 4127 | struct btrfs_key key; |
| 3678 | struct btrfs_root *root = BTRFS_I(parent_inode)->root; | 4128 | struct btrfs_root *root = BTRFS_I(parent_inode)->root; |
| 3679 | 4129 | ||
| 3680 | key.objectid = inode->i_ino; | 4130 | if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { |
| 3681 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 4131 | memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); |
| 3682 | key.offset = 0; | 4132 | } else { |
| 4133 | key.objectid = inode->i_ino; | ||
| 4134 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
| 4135 | key.offset = 0; | ||
| 4136 | } | ||
| 4137 | |||
| 4138 | if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { | ||
| 4139 | ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, | ||
| 4140 | key.objectid, root->root_key.objectid, | ||
| 4141 | parent_inode->i_ino, | ||
| 4142 | index, name, name_len); | ||
| 4143 | } else if (add_backref) { | ||
| 4144 | ret = btrfs_insert_inode_ref(trans, root, | ||
| 4145 | name, name_len, inode->i_ino, | ||
| 4146 | parent_inode->i_ino, index); | ||
| 4147 | } | ||
| 3683 | 4148 | ||
| 3684 | ret = btrfs_insert_dir_item(trans, root, name, name_len, | ||
| 3685 | parent_inode->i_ino, | ||
| 3686 | &key, btrfs_inode_type(inode), | ||
| 3687 | index); | ||
| 3688 | if (ret == 0) { | 4149 | if (ret == 0) { |
| 3689 | if (add_backref) { | 4150 | ret = btrfs_insert_dir_item(trans, root, name, name_len, |
| 3690 | ret = btrfs_insert_inode_ref(trans, root, | 4151 | parent_inode->i_ino, &key, |
| 3691 | name, name_len, | 4152 | btrfs_inode_type(inode), index); |
| 3692 | inode->i_ino, | 4153 | BUG_ON(ret); |
| 3693 | parent_inode->i_ino, | 4154 | |
| 3694 | index); | ||
| 3695 | } | ||
| 3696 | btrfs_i_size_write(parent_inode, parent_inode->i_size + | 4155 | btrfs_i_size_write(parent_inode, parent_inode->i_size + |
| 3697 | name_len * 2); | 4156 | name_len * 2); |
| 3698 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; | 4157 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; |
| @@ -3732,11 +4191,18 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
| 3732 | if (!new_valid_dev(rdev)) | 4191 | if (!new_valid_dev(rdev)) |
| 3733 | return -EINVAL; | 4192 | return -EINVAL; |
| 3734 | 4193 | ||
| 3735 | err = btrfs_check_metadata_free_space(root); | 4194 | /* |
| 4195 | * 2 for inode item and ref | ||
| 4196 | * 2 for dir items | ||
| 4197 | * 1 for xattr if selinux is on | ||
| 4198 | */ | ||
| 4199 | err = btrfs_reserve_metadata_space(root, 5); | ||
| 3736 | if (err) | 4200 | if (err) |
| 3737 | goto fail; | 4201 | return err; |
| 3738 | 4202 | ||
| 3739 | trans = btrfs_start_transaction(root, 1); | 4203 | trans = btrfs_start_transaction(root, 1); |
| 4204 | if (!trans) | ||
| 4205 | goto fail; | ||
| 3740 | btrfs_set_trans_block_group(trans, dir); | 4206 | btrfs_set_trans_block_group(trans, dir); |
| 3741 | 4207 | ||
| 3742 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | 4208 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); |
| @@ -3774,6 +4240,7 @@ out_unlock: | |||
| 3774 | nr = trans->blocks_used; | 4240 | nr = trans->blocks_used; |
| 3775 | btrfs_end_transaction_throttle(trans, root); | 4241 | btrfs_end_transaction_throttle(trans, root); |
| 3776 | fail: | 4242 | fail: |
| 4243 | btrfs_unreserve_metadata_space(root, 5); | ||
| 3777 | if (drop_inode) { | 4244 | if (drop_inode) { |
| 3778 | inode_dec_link_count(inode); | 4245 | inode_dec_link_count(inode); |
| 3779 | iput(inode); | 4246 | iput(inode); |
| @@ -3794,10 +4261,18 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
| 3794 | u64 objectid; | 4261 | u64 objectid; |
| 3795 | u64 index = 0; | 4262 | u64 index = 0; |
| 3796 | 4263 | ||
| 3797 | err = btrfs_check_metadata_free_space(root); | 4264 | /* |
| 4265 | * 2 for inode item and ref | ||
| 4266 | * 2 for dir items | ||
| 4267 | * 1 for xattr if selinux is on | ||
| 4268 | */ | ||
| 4269 | err = btrfs_reserve_metadata_space(root, 5); | ||
| 3798 | if (err) | 4270 | if (err) |
| 3799 | goto fail; | 4271 | return err; |
| 4272 | |||
| 3800 | trans = btrfs_start_transaction(root, 1); | 4273 | trans = btrfs_start_transaction(root, 1); |
| 4274 | if (!trans) | ||
| 4275 | goto fail; | ||
| 3801 | btrfs_set_trans_block_group(trans, dir); | 4276 | btrfs_set_trans_block_group(trans, dir); |
| 3802 | 4277 | ||
| 3803 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | 4278 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); |
| @@ -3838,6 +4313,7 @@ out_unlock: | |||
| 3838 | nr = trans->blocks_used; | 4313 | nr = trans->blocks_used; |
| 3839 | btrfs_end_transaction_throttle(trans, root); | 4314 | btrfs_end_transaction_throttle(trans, root); |
| 3840 | fail: | 4315 | fail: |
| 4316 | btrfs_unreserve_metadata_space(root, 5); | ||
| 3841 | if (drop_inode) { | 4317 | if (drop_inode) { |
| 3842 | inode_dec_link_count(inode); | 4318 | inode_dec_link_count(inode); |
| 3843 | iput(inode); | 4319 | iput(inode); |
| @@ -3860,10 +4336,16 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 3860 | if (inode->i_nlink == 0) | 4336 | if (inode->i_nlink == 0) |
| 3861 | return -ENOENT; | 4337 | return -ENOENT; |
| 3862 | 4338 | ||
| 3863 | btrfs_inc_nlink(inode); | 4339 | /* |
| 3864 | err = btrfs_check_metadata_free_space(root); | 4340 | * 1 item for inode ref |
| 4341 | * 2 items for dir items | ||
| 4342 | */ | ||
| 4343 | err = btrfs_reserve_metadata_space(root, 3); | ||
| 3865 | if (err) | 4344 | if (err) |
| 3866 | goto fail; | 4345 | return err; |
| 4346 | |||
| 4347 | btrfs_inc_nlink(inode); | ||
| 4348 | |||
| 3867 | err = btrfs_set_inode_index(dir, &index); | 4349 | err = btrfs_set_inode_index(dir, &index); |
| 3868 | if (err) | 4350 | if (err) |
| 3869 | goto fail; | 4351 | goto fail; |
| @@ -3875,20 +4357,19 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 3875 | 4357 | ||
| 3876 | err = btrfs_add_nondir(trans, dentry, inode, 1, index); | 4358 | err = btrfs_add_nondir(trans, dentry, inode, 1, index); |
| 3877 | 4359 | ||
| 3878 | if (err) | 4360 | if (err) { |
| 3879 | drop_inode = 1; | ||
| 3880 | |||
| 3881 | btrfs_update_inode_block_group(trans, dir); | ||
| 3882 | err = btrfs_update_inode(trans, root, inode); | ||
| 3883 | |||
| 3884 | if (err) | ||
| 3885 | drop_inode = 1; | 4361 | drop_inode = 1; |
| 4362 | } else { | ||
| 4363 | btrfs_update_inode_block_group(trans, dir); | ||
| 4364 | err = btrfs_update_inode(trans, root, inode); | ||
| 4365 | BUG_ON(err); | ||
| 4366 | btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); | ||
| 4367 | } | ||
| 3886 | 4368 | ||
| 3887 | nr = trans->blocks_used; | 4369 | nr = trans->blocks_used; |
| 3888 | |||
| 3889 | btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); | ||
| 3890 | btrfs_end_transaction_throttle(trans, root); | 4370 | btrfs_end_transaction_throttle(trans, root); |
| 3891 | fail: | 4371 | fail: |
| 4372 | btrfs_unreserve_metadata_space(root, 3); | ||
| 3892 | if (drop_inode) { | 4373 | if (drop_inode) { |
| 3893 | inode_dec_link_count(inode); | 4374 | inode_dec_link_count(inode); |
| 3894 | iput(inode); | 4375 | iput(inode); |
| @@ -3908,17 +4389,21 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 3908 | u64 index = 0; | 4389 | u64 index = 0; |
| 3909 | unsigned long nr = 1; | 4390 | unsigned long nr = 1; |
| 3910 | 4391 | ||
| 3911 | err = btrfs_check_metadata_free_space(root); | 4392 | /* |
| 4393 | * 2 items for inode and ref | ||
| 4394 | * 2 items for dir items | ||
| 4395 | * 1 for xattr if selinux is on | ||
| 4396 | */ | ||
| 4397 | err = btrfs_reserve_metadata_space(root, 5); | ||
| 3912 | if (err) | 4398 | if (err) |
| 3913 | goto out_unlock; | 4399 | return err; |
| 3914 | 4400 | ||
| 3915 | trans = btrfs_start_transaction(root, 1); | 4401 | trans = btrfs_start_transaction(root, 1); |
| 3916 | btrfs_set_trans_block_group(trans, dir); | 4402 | if (!trans) { |
| 3917 | 4403 | err = -ENOMEM; | |
| 3918 | if (IS_ERR(trans)) { | ||
| 3919 | err = PTR_ERR(trans); | ||
| 3920 | goto out_unlock; | 4404 | goto out_unlock; |
| 3921 | } | 4405 | } |
| 4406 | btrfs_set_trans_block_group(trans, dir); | ||
| 3922 | 4407 | ||
| 3923 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | 4408 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); |
| 3924 | if (err) { | 4409 | if (err) { |
| @@ -3967,6 +4452,7 @@ out_fail: | |||
| 3967 | btrfs_end_transaction_throttle(trans, root); | 4452 | btrfs_end_transaction_throttle(trans, root); |
| 3968 | 4453 | ||
| 3969 | out_unlock: | 4454 | out_unlock: |
| 4455 | btrfs_unreserve_metadata_space(root, 5); | ||
| 3970 | if (drop_on_err) | 4456 | if (drop_on_err) |
| 3971 | iput(inode); | 4457 | iput(inode); |
| 3972 | btrfs_btree_balance_dirty(root, nr); | 4458 | btrfs_btree_balance_dirty(root, nr); |
| @@ -4064,11 +4550,11 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | |||
| 4064 | int compressed; | 4550 | int compressed; |
| 4065 | 4551 | ||
| 4066 | again: | 4552 | again: |
| 4067 | spin_lock(&em_tree->lock); | 4553 | read_lock(&em_tree->lock); |
| 4068 | em = lookup_extent_mapping(em_tree, start, len); | 4554 | em = lookup_extent_mapping(em_tree, start, len); |
| 4069 | if (em) | 4555 | if (em) |
| 4070 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 4556 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 4071 | spin_unlock(&em_tree->lock); | 4557 | read_unlock(&em_tree->lock); |
| 4072 | 4558 | ||
| 4073 | if (em) { | 4559 | if (em) { |
| 4074 | if (em->start > start || em->start + em->len <= start) | 4560 | if (em->start > start || em->start + em->len <= start) |
| @@ -4215,6 +4701,11 @@ again: | |||
| 4215 | map = kmap(page); | 4701 | map = kmap(page); |
| 4216 | read_extent_buffer(leaf, map + pg_offset, ptr, | 4702 | read_extent_buffer(leaf, map + pg_offset, ptr, |
| 4217 | copy_size); | 4703 | copy_size); |
| 4704 | if (pg_offset + copy_size < PAGE_CACHE_SIZE) { | ||
| 4705 | memset(map + pg_offset + copy_size, 0, | ||
| 4706 | PAGE_CACHE_SIZE - pg_offset - | ||
| 4707 | copy_size); | ||
| 4708 | } | ||
| 4218 | kunmap(page); | 4709 | kunmap(page); |
| 4219 | } | 4710 | } |
| 4220 | flush_dcache_page(page); | 4711 | flush_dcache_page(page); |
| @@ -4259,7 +4750,7 @@ insert: | |||
| 4259 | } | 4750 | } |
| 4260 | 4751 | ||
| 4261 | err = 0; | 4752 | err = 0; |
| 4262 | spin_lock(&em_tree->lock); | 4753 | write_lock(&em_tree->lock); |
| 4263 | ret = add_extent_mapping(em_tree, em); | 4754 | ret = add_extent_mapping(em_tree, em); |
| 4264 | /* it is possible that someone inserted the extent into the tree | 4755 | /* it is possible that someone inserted the extent into the tree |
| 4265 | * while we had the lock dropped. It is also possible that | 4756 | * while we had the lock dropped. It is also possible that |
| @@ -4299,7 +4790,7 @@ insert: | |||
| 4299 | err = 0; | 4790 | err = 0; |
| 4300 | } | 4791 | } |
| 4301 | } | 4792 | } |
| 4302 | spin_unlock(&em_tree->lock); | 4793 | write_unlock(&em_tree->lock); |
| 4303 | out: | 4794 | out: |
| 4304 | if (path) | 4795 | if (path) |
| 4305 | btrfs_free_path(path); | 4796 | btrfs_free_path(path); |
| @@ -4398,13 +4889,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
| 4398 | u64 page_start = page_offset(page); | 4889 | u64 page_start = page_offset(page); |
| 4399 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; | 4890 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; |
| 4400 | 4891 | ||
| 4892 | |||
| 4893 | /* | ||
| 4894 | * we have the page locked, so new writeback can't start, | ||
| 4895 | * and the dirty bit won't be cleared while we are here. | ||
| 4896 | * | ||
| 4897 | * Wait for IO on this page so that we can safely clear | ||
| 4898 | * the PagePrivate2 bit and do ordered accounting | ||
| 4899 | */ | ||
| 4401 | wait_on_page_writeback(page); | 4900 | wait_on_page_writeback(page); |
| 4901 | |||
| 4402 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 4902 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
| 4403 | if (offset) { | 4903 | if (offset) { |
| 4404 | btrfs_releasepage(page, GFP_NOFS); | 4904 | btrfs_releasepage(page, GFP_NOFS); |
| 4405 | return; | 4905 | return; |
| 4406 | } | 4906 | } |
| 4407 | |||
| 4408 | lock_extent(tree, page_start, page_end, GFP_NOFS); | 4907 | lock_extent(tree, page_start, page_end, GFP_NOFS); |
| 4409 | ordered = btrfs_lookup_ordered_extent(page->mapping->host, | 4908 | ordered = btrfs_lookup_ordered_extent(page->mapping->host, |
| 4410 | page_offset(page)); | 4909 | page_offset(page)); |
| @@ -4415,16 +4914,22 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
| 4415 | */ | 4914 | */ |
| 4416 | clear_extent_bit(tree, page_start, page_end, | 4915 | clear_extent_bit(tree, page_start, page_end, |
| 4417 | EXTENT_DIRTY | EXTENT_DELALLOC | | 4916 | EXTENT_DIRTY | EXTENT_DELALLOC | |
| 4418 | EXTENT_LOCKED, 1, 0, GFP_NOFS); | 4917 | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0, |
| 4419 | btrfs_finish_ordered_io(page->mapping->host, | 4918 | NULL, GFP_NOFS); |
| 4420 | page_start, page_end); | 4919 | /* |
| 4920 | * whoever cleared the private bit is responsible | ||
| 4921 | * for the finish_ordered_io | ||
| 4922 | */ | ||
| 4923 | if (TestClearPagePrivate2(page)) { | ||
| 4924 | btrfs_finish_ordered_io(page->mapping->host, | ||
| 4925 | page_start, page_end); | ||
| 4926 | } | ||
| 4421 | btrfs_put_ordered_extent(ordered); | 4927 | btrfs_put_ordered_extent(ordered); |
| 4422 | lock_extent(tree, page_start, page_end, GFP_NOFS); | 4928 | lock_extent(tree, page_start, page_end, GFP_NOFS); |
| 4423 | } | 4929 | } |
| 4424 | clear_extent_bit(tree, page_start, page_end, | 4930 | clear_extent_bit(tree, page_start, page_end, |
| 4425 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | | 4931 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | |
| 4426 | EXTENT_ORDERED, | 4932 | EXTENT_DO_ACCOUNTING, 1, 1, NULL, GFP_NOFS); |
| 4427 | 1, 1, GFP_NOFS); | ||
| 4428 | __btrfs_releasepage(page, GFP_NOFS); | 4933 | __btrfs_releasepage(page, GFP_NOFS); |
| 4429 | 4934 | ||
| 4430 | ClearPageChecked(page); | 4935 | ClearPageChecked(page); |
| @@ -4473,6 +4978,13 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 4473 | goto out; | 4978 | goto out; |
| 4474 | } | 4979 | } |
| 4475 | 4980 | ||
| 4981 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
| 4982 | if (ret) { | ||
| 4983 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
| 4984 | ret = VM_FAULT_SIGBUS; | ||
| 4985 | goto out; | ||
| 4986 | } | ||
| 4987 | |||
| 4476 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ | 4988 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ |
| 4477 | again: | 4989 | again: |
| 4478 | lock_page(page); | 4990 | lock_page(page); |
| @@ -4504,7 +5016,24 @@ again: | |||
| 4504 | goto again; | 5016 | goto again; |
| 4505 | } | 5017 | } |
| 4506 | 5018 | ||
| 4507 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 5019 | /* |
| 5020 | * XXX - page_mkwrite gets called every time the page is dirtied, even | ||
| 5021 | * if it was already dirty, so for space accounting reasons we need to | ||
| 5022 | * clear any delalloc bits for the range we are fixing to save. There | ||
| 5023 | * is probably a better way to do this, but for now keep consistent with | ||
| 5024 | * prepare_pages in the normal write path. | ||
| 5025 | */ | ||
| 5026 | clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, | ||
| 5027 | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, | ||
| 5028 | GFP_NOFS); | ||
| 5029 | |||
| 5030 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end); | ||
| 5031 | if (ret) { | ||
| 5032 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 5033 | ret = VM_FAULT_SIGBUS; | ||
| 5034 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
| 5035 | goto out_unlock; | ||
| 5036 | } | ||
| 4508 | ret = 0; | 5037 | ret = 0; |
| 4509 | 5038 | ||
| 4510 | /* page is wholly or partially inside EOF */ | 5039 | /* page is wholly or partially inside EOF */ |
| @@ -4521,11 +5050,17 @@ again: | |||
| 4521 | } | 5050 | } |
| 4522 | ClearPageChecked(page); | 5051 | ClearPageChecked(page); |
| 4523 | set_page_dirty(page); | 5052 | set_page_dirty(page); |
| 5053 | SetPageUptodate(page); | ||
| 5054 | |||
| 5055 | BTRFS_I(inode)->last_trans = root->fs_info->generation; | ||
| 5056 | BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; | ||
| 4524 | 5057 | ||
| 4525 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | ||
| 4526 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 5058 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); |
| 4527 | 5059 | ||
| 4528 | out_unlock: | 5060 | out_unlock: |
| 5061 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 5062 | if (!ret) | ||
| 5063 | return VM_FAULT_LOCKED; | ||
| 4529 | unlock_page(page); | 5064 | unlock_page(page); |
| 4530 | out: | 5065 | out: |
| 4531 | return ret; | 5066 | return ret; |
| @@ -4544,7 +5079,9 @@ static void btrfs_truncate(struct inode *inode) | |||
| 4544 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | 5079 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) |
| 4545 | return; | 5080 | return; |
| 4546 | 5081 | ||
| 4547 | btrfs_truncate_page(inode->i_mapping, inode->i_size); | 5082 | ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); |
| 5083 | if (ret) | ||
| 5084 | return; | ||
| 4548 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | 5085 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); |
| 4549 | 5086 | ||
| 4550 | trans = btrfs_start_transaction(root, 1); | 5087 | trans = btrfs_start_transaction(root, 1); |
| @@ -4594,11 +5131,11 @@ out: | |||
| 4594 | * create a new subvolume directory/inode (helper for the ioctl). | 5131 | * create a new subvolume directory/inode (helper for the ioctl). |
| 4595 | */ | 5132 | */ |
| 4596 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 5133 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
| 4597 | struct btrfs_root *new_root, struct dentry *dentry, | 5134 | struct btrfs_root *new_root, |
| 4598 | u64 new_dirid, u64 alloc_hint) | 5135 | u64 new_dirid, u64 alloc_hint) |
| 4599 | { | 5136 | { |
| 4600 | struct inode *inode; | 5137 | struct inode *inode; |
| 4601 | int error; | 5138 | int err; |
| 4602 | u64 index = 0; | 5139 | u64 index = 0; |
| 4603 | 5140 | ||
| 4604 | inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, | 5141 | inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, |
| @@ -4611,11 +5148,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | |||
| 4611 | inode->i_nlink = 1; | 5148 | inode->i_nlink = 1; |
| 4612 | btrfs_i_size_write(inode, 0); | 5149 | btrfs_i_size_write(inode, 0); |
| 4613 | 5150 | ||
| 4614 | error = btrfs_update_inode(trans, new_root, inode); | 5151 | err = btrfs_update_inode(trans, new_root, inode); |
| 4615 | if (error) | 5152 | BUG_ON(err); |
| 4616 | return error; | ||
| 4617 | 5153 | ||
| 4618 | d_instantiate(dentry, inode); | 5154 | iput(inode); |
| 4619 | return 0; | 5155 | return 0; |
| 4620 | } | 5156 | } |
| 4621 | 5157 | ||
| @@ -4640,7 +5176,12 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
| 4640 | if (!ei) | 5176 | if (!ei) |
| 4641 | return NULL; | 5177 | return NULL; |
| 4642 | ei->last_trans = 0; | 5178 | ei->last_trans = 0; |
| 5179 | ei->last_sub_trans = 0; | ||
| 4643 | ei->logged_trans = 0; | 5180 | ei->logged_trans = 0; |
| 5181 | ei->outstanding_extents = 0; | ||
| 5182 | ei->reserved_extents = 0; | ||
| 5183 | ei->root = NULL; | ||
| 5184 | spin_lock_init(&ei->accounting_lock); | ||
| 4644 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); | 5185 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); |
| 4645 | INIT_LIST_HEAD(&ei->i_orphan); | 5186 | INIT_LIST_HEAD(&ei->i_orphan); |
| 4646 | INIT_LIST_HEAD(&ei->ordered_operations); | 5187 | INIT_LIST_HEAD(&ei->ordered_operations); |
| @@ -4656,6 +5197,14 @@ void btrfs_destroy_inode(struct inode *inode) | |||
| 4656 | WARN_ON(inode->i_data.nrpages); | 5197 | WARN_ON(inode->i_data.nrpages); |
| 4657 | 5198 | ||
| 4658 | /* | 5199 | /* |
| 5200 | * This can happen where we create an inode, but somebody else also | ||
| 5201 | * created the same inode and we need to destroy the one we already | ||
| 5202 | * created. | ||
| 5203 | */ | ||
| 5204 | if (!root) | ||
| 5205 | goto free; | ||
| 5206 | |||
| 5207 | /* | ||
| 4659 | * Make sure we're properly removed from the ordered operation | 5208 | * Make sure we're properly removed from the ordered operation |
| 4660 | * lists. | 5209 | * lists. |
| 4661 | */ | 5210 | */ |
| @@ -4690,9 +5239,20 @@ void btrfs_destroy_inode(struct inode *inode) | |||
| 4690 | } | 5239 | } |
| 4691 | inode_tree_del(inode); | 5240 | inode_tree_del(inode); |
| 4692 | btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); | 5241 | btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); |
| 5242 | free: | ||
| 4693 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); | 5243 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); |
| 4694 | } | 5244 | } |
| 4695 | 5245 | ||
| 5246 | void btrfs_drop_inode(struct inode *inode) | ||
| 5247 | { | ||
| 5248 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 5249 | |||
| 5250 | if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) | ||
| 5251 | generic_delete_inode(inode); | ||
| 5252 | else | ||
| 5253 | generic_drop_inode(inode); | ||
| 5254 | } | ||
| 5255 | |||
| 4696 | static void init_once(void *foo) | 5256 | static void init_once(void *foo) |
| 4697 | { | 5257 | { |
| 4698 | struct btrfs_inode *ei = (struct btrfs_inode *) foo; | 5258 | struct btrfs_inode *ei = (struct btrfs_inode *) foo; |
| @@ -4761,31 +5321,40 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 4761 | { | 5321 | { |
| 4762 | struct btrfs_trans_handle *trans; | 5322 | struct btrfs_trans_handle *trans; |
| 4763 | struct btrfs_root *root = BTRFS_I(old_dir)->root; | 5323 | struct btrfs_root *root = BTRFS_I(old_dir)->root; |
| 5324 | struct btrfs_root *dest = BTRFS_I(new_dir)->root; | ||
| 4764 | struct inode *new_inode = new_dentry->d_inode; | 5325 | struct inode *new_inode = new_dentry->d_inode; |
| 4765 | struct inode *old_inode = old_dentry->d_inode; | 5326 | struct inode *old_inode = old_dentry->d_inode; |
| 4766 | struct timespec ctime = CURRENT_TIME; | 5327 | struct timespec ctime = CURRENT_TIME; |
| 4767 | u64 index = 0; | 5328 | u64 index = 0; |
| 5329 | u64 root_objectid; | ||
| 4768 | int ret; | 5330 | int ret; |
| 4769 | 5331 | ||
| 4770 | /* we're not allowed to rename between subvolumes */ | 5332 | if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) |
| 4771 | if (BTRFS_I(old_inode)->root->root_key.objectid != | 5333 | return -EPERM; |
| 4772 | BTRFS_I(new_dir)->root->root_key.objectid) | 5334 | |
| 5335 | /* we only allow rename subvolume link between subvolumes */ | ||
| 5336 | if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) | ||
| 4773 | return -EXDEV; | 5337 | return -EXDEV; |
| 4774 | 5338 | ||
| 5339 | if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID || | ||
| 5340 | (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) | ||
| 5341 | return -ENOTEMPTY; | ||
| 5342 | |||
| 4775 | if (S_ISDIR(old_inode->i_mode) && new_inode && | 5343 | if (S_ISDIR(old_inode->i_mode) && new_inode && |
| 4776 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { | 5344 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) |
| 4777 | return -ENOTEMPTY; | 5345 | return -ENOTEMPTY; |
| 4778 | } | ||
| 4779 | 5346 | ||
| 4780 | /* to rename a snapshot or subvolume, we need to juggle the | 5347 | /* |
| 4781 | * backrefs. This isn't coded yet | 5348 | * We want to reserve the absolute worst case amount of items. So if |
| 5349 | * both inodes are subvols and we need to unlink them then that would | ||
| 5350 | * require 4 item modifications, but if they are both normal inodes it | ||
| 5351 | * would require 5 item modifications, so we'll assume their normal | ||
| 5352 | * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items | ||
| 5353 | * should cover the worst case number of items we'll modify. | ||
| 4782 | */ | 5354 | */ |
| 4783 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 5355 | ret = btrfs_reserve_metadata_space(root, 11); |
| 4784 | return -EXDEV; | ||
| 4785 | |||
| 4786 | ret = btrfs_check_metadata_free_space(root); | ||
| 4787 | if (ret) | 5356 | if (ret) |
| 4788 | goto out_unlock; | 5357 | return ret; |
| 4789 | 5358 | ||
| 4790 | /* | 5359 | /* |
| 4791 | * we're using rename to replace one file with another. | 5360 | * we're using rename to replace one file with another. |
| @@ -4796,8 +5365,40 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 4796 | old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) | 5365 | old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) |
| 4797 | filemap_flush(old_inode->i_mapping); | 5366 | filemap_flush(old_inode->i_mapping); |
| 4798 | 5367 | ||
| 5368 | /* close the racy window with snapshot create/destroy ioctl */ | ||
| 5369 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | ||
| 5370 | down_read(&root->fs_info->subvol_sem); | ||
| 5371 | |||
| 4799 | trans = btrfs_start_transaction(root, 1); | 5372 | trans = btrfs_start_transaction(root, 1); |
| 5373 | btrfs_set_trans_block_group(trans, new_dir); | ||
| 5374 | |||
| 5375 | if (dest != root) | ||
| 5376 | btrfs_record_root_in_trans(trans, dest); | ||
| 4800 | 5377 | ||
| 5378 | ret = btrfs_set_inode_index(new_dir, &index); | ||
| 5379 | if (ret) | ||
| 5380 | goto out_fail; | ||
| 5381 | |||
| 5382 | if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { | ||
| 5383 | /* force full log commit if subvolume involved. */ | ||
| 5384 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
| 5385 | } else { | ||
| 5386 | ret = btrfs_insert_inode_ref(trans, dest, | ||
| 5387 | new_dentry->d_name.name, | ||
| 5388 | new_dentry->d_name.len, | ||
| 5389 | old_inode->i_ino, | ||
| 5390 | new_dir->i_ino, index); | ||
| 5391 | if (ret) | ||
| 5392 | goto out_fail; | ||
| 5393 | /* | ||
| 5394 | * this is an ugly little race, but the rename is required | ||
| 5395 | * to make sure that if we crash, the inode is either at the | ||
| 5396 | * old name or the new one. pinning the log transaction lets | ||
| 5397 | * us make sure we don't allow a log commit to come in after | ||
| 5398 | * we unlink the name but before we add the new name back in. | ||
| 5399 | */ | ||
| 5400 | btrfs_pin_log_trans(root); | ||
| 5401 | } | ||
| 4801 | /* | 5402 | /* |
| 4802 | * make sure the inode gets flushed if it is replacing | 5403 | * make sure the inode gets flushed if it is replacing |
| 4803 | * something. | 5404 | * something. |
| @@ -4807,18 +5408,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 4807 | btrfs_add_ordered_operation(trans, root, old_inode); | 5408 | btrfs_add_ordered_operation(trans, root, old_inode); |
| 4808 | } | 5409 | } |
| 4809 | 5410 | ||
| 4810 | /* | ||
| 4811 | * this is an ugly little race, but the rename is required to make | ||
| 4812 | * sure that if we crash, the inode is either at the old name | ||
| 4813 | * or the new one. pinning the log transaction lets us make sure | ||
| 4814 | * we don't allow a log commit to come in after we unlink the | ||
| 4815 | * name but before we add the new name back in. | ||
| 4816 | */ | ||
| 4817 | btrfs_pin_log_trans(root); | ||
| 4818 | |||
| 4819 | btrfs_set_trans_block_group(trans, new_dir); | ||
| 4820 | |||
| 4821 | btrfs_inc_nlink(old_dentry->d_inode); | ||
| 4822 | old_dir->i_ctime = old_dir->i_mtime = ctime; | 5411 | old_dir->i_ctime = old_dir->i_mtime = ctime; |
| 4823 | new_dir->i_ctime = new_dir->i_mtime = ctime; | 5412 | new_dir->i_ctime = new_dir->i_mtime = ctime; |
| 4824 | old_inode->i_ctime = ctime; | 5413 | old_inode->i_ctime = ctime; |
| @@ -4826,47 +5415,60 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 4826 | if (old_dentry->d_parent != new_dentry->d_parent) | 5415 | if (old_dentry->d_parent != new_dentry->d_parent) |
| 4827 | btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); | 5416 | btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); |
| 4828 | 5417 | ||
| 4829 | ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, | 5418 | if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { |
| 4830 | old_dentry->d_name.name, | 5419 | root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; |
| 4831 | old_dentry->d_name.len); | 5420 | ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid, |
| 4832 | if (ret) | 5421 | old_dentry->d_name.name, |
| 4833 | goto out_fail; | 5422 | old_dentry->d_name.len); |
| 5423 | } else { | ||
| 5424 | btrfs_inc_nlink(old_dentry->d_inode); | ||
| 5425 | ret = btrfs_unlink_inode(trans, root, old_dir, | ||
| 5426 | old_dentry->d_inode, | ||
| 5427 | old_dentry->d_name.name, | ||
| 5428 | old_dentry->d_name.len); | ||
| 5429 | } | ||
| 5430 | BUG_ON(ret); | ||
| 4834 | 5431 | ||
| 4835 | if (new_inode) { | 5432 | if (new_inode) { |
| 4836 | new_inode->i_ctime = CURRENT_TIME; | 5433 | new_inode->i_ctime = CURRENT_TIME; |
| 4837 | ret = btrfs_unlink_inode(trans, root, new_dir, | 5434 | if (unlikely(new_inode->i_ino == |
| 4838 | new_dentry->d_inode, | 5435 | BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { |
| 4839 | new_dentry->d_name.name, | 5436 | root_objectid = BTRFS_I(new_inode)->location.objectid; |
| 4840 | new_dentry->d_name.len); | 5437 | ret = btrfs_unlink_subvol(trans, dest, new_dir, |
| 4841 | if (ret) | 5438 | root_objectid, |
| 4842 | goto out_fail; | 5439 | new_dentry->d_name.name, |
| 5440 | new_dentry->d_name.len); | ||
| 5441 | BUG_ON(new_inode->i_nlink == 0); | ||
| 5442 | } else { | ||
| 5443 | ret = btrfs_unlink_inode(trans, dest, new_dir, | ||
| 5444 | new_dentry->d_inode, | ||
| 5445 | new_dentry->d_name.name, | ||
| 5446 | new_dentry->d_name.len); | ||
| 5447 | } | ||
| 5448 | BUG_ON(ret); | ||
| 4843 | if (new_inode->i_nlink == 0) { | 5449 | if (new_inode->i_nlink == 0) { |
| 4844 | ret = btrfs_orphan_add(trans, new_dentry->d_inode); | 5450 | ret = btrfs_orphan_add(trans, new_dentry->d_inode); |
| 4845 | if (ret) | 5451 | BUG_ON(ret); |
| 4846 | goto out_fail; | ||
| 4847 | } | 5452 | } |
| 4848 | |||
| 4849 | } | 5453 | } |
| 4850 | ret = btrfs_set_inode_index(new_dir, &index); | ||
| 4851 | if (ret) | ||
| 4852 | goto out_fail; | ||
| 4853 | 5454 | ||
| 4854 | ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode, | 5455 | ret = btrfs_add_link(trans, new_dir, old_inode, |
| 4855 | old_inode, new_dentry->d_name.name, | 5456 | new_dentry->d_name.name, |
| 4856 | new_dentry->d_name.len, 1, index); | 5457 | new_dentry->d_name.len, 0, index); |
| 4857 | if (ret) | 5458 | BUG_ON(ret); |
| 4858 | goto out_fail; | ||
| 4859 | 5459 | ||
| 4860 | btrfs_log_new_name(trans, old_inode, old_dir, | 5460 | if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { |
| 4861 | new_dentry->d_parent); | 5461 | btrfs_log_new_name(trans, old_inode, old_dir, |
| 5462 | new_dentry->d_parent); | ||
| 5463 | btrfs_end_log_trans(root); | ||
| 5464 | } | ||
| 4862 | out_fail: | 5465 | out_fail: |
| 4863 | |||
| 4864 | /* this btrfs_end_log_trans just allows the current | ||
| 4865 | * log-sub transaction to complete | ||
| 4866 | */ | ||
| 4867 | btrfs_end_log_trans(root); | ||
| 4868 | btrfs_end_transaction_throttle(trans, root); | 5466 | btrfs_end_transaction_throttle(trans, root); |
| 4869 | out_unlock: | 5467 | |
| 5468 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | ||
| 5469 | up_read(&root->fs_info->subvol_sem); | ||
| 5470 | |||
| 5471 | btrfs_unreserve_metadata_space(root, 11); | ||
| 4870 | return ret; | 5472 | return ret; |
| 4871 | } | 5473 | } |
| 4872 | 5474 | ||
| @@ -4938,11 +5540,18 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
| 4938 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) | 5540 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) |
| 4939 | return -ENAMETOOLONG; | 5541 | return -ENAMETOOLONG; |
| 4940 | 5542 | ||
| 4941 | err = btrfs_check_metadata_free_space(root); | 5543 | /* |
| 5544 | * 2 items for inode item and ref | ||
| 5545 | * 2 items for dir items | ||
| 5546 | * 1 item for xattr if selinux is on | ||
| 5547 | */ | ||
| 5548 | err = btrfs_reserve_metadata_space(root, 5); | ||
| 4942 | if (err) | 5549 | if (err) |
| 4943 | goto out_fail; | 5550 | return err; |
| 4944 | 5551 | ||
| 4945 | trans = btrfs_start_transaction(root, 1); | 5552 | trans = btrfs_start_transaction(root, 1); |
| 5553 | if (!trans) | ||
| 5554 | goto out_fail; | ||
| 4946 | btrfs_set_trans_block_group(trans, dir); | 5555 | btrfs_set_trans_block_group(trans, dir); |
| 4947 | 5556 | ||
| 4948 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | 5557 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); |
| @@ -5023,6 +5632,7 @@ out_unlock: | |||
| 5023 | nr = trans->blocks_used; | 5632 | nr = trans->blocks_used; |
| 5024 | btrfs_end_transaction_throttle(trans, root); | 5633 | btrfs_end_transaction_throttle(trans, root); |
| 5025 | out_fail: | 5634 | out_fail: |
| 5635 | btrfs_unreserve_metadata_space(root, 5); | ||
| 5026 | if (drop_inode) { | 5636 | if (drop_inode) { |
| 5027 | inode_dec_link_count(inode); | 5637 | inode_dec_link_count(inode); |
| 5028 | iput(inode); | 5638 | iput(inode); |
| @@ -5044,6 +5654,11 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, | |||
| 5044 | 5654 | ||
| 5045 | while (num_bytes > 0) { | 5655 | while (num_bytes > 0) { |
| 5046 | alloc_size = min(num_bytes, root->fs_info->max_extent); | 5656 | alloc_size = min(num_bytes, root->fs_info->max_extent); |
| 5657 | |||
| 5658 | ret = btrfs_reserve_metadata_space(root, 1); | ||
| 5659 | if (ret) | ||
| 5660 | goto out; | ||
| 5661 | |||
| 5047 | ret = btrfs_reserve_extent(trans, root, alloc_size, | 5662 | ret = btrfs_reserve_extent(trans, root, alloc_size, |
| 5048 | root->sectorsize, 0, alloc_hint, | 5663 | root->sectorsize, 0, alloc_hint, |
| 5049 | (u64)-1, &ins, 1); | 5664 | (u64)-1, &ins, 1); |
| @@ -5058,9 +5673,12 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, | |||
| 5058 | 0, 0, 0, | 5673 | 0, 0, 0, |
| 5059 | BTRFS_FILE_EXTENT_PREALLOC); | 5674 | BTRFS_FILE_EXTENT_PREALLOC); |
| 5060 | BUG_ON(ret); | 5675 | BUG_ON(ret); |
| 5676 | btrfs_drop_extent_cache(inode, cur_offset, | ||
| 5677 | cur_offset + ins.offset -1, 0); | ||
| 5061 | num_bytes -= ins.offset; | 5678 | num_bytes -= ins.offset; |
| 5062 | cur_offset += ins.offset; | 5679 | cur_offset += ins.offset; |
| 5063 | alloc_hint = ins.objectid + ins.offset; | 5680 | alloc_hint = ins.objectid + ins.offset; |
| 5681 | btrfs_unreserve_metadata_space(root, 1); | ||
| 5064 | } | 5682 | } |
| 5065 | out: | 5683 | out: |
| 5066 | if (cur_offset > start) { | 5684 | if (cur_offset > start) { |
| @@ -5201,7 +5819,7 @@ static int btrfs_permission(struct inode *inode, int mask) | |||
| 5201 | return generic_permission(inode, mask, btrfs_check_acl); | 5819 | return generic_permission(inode, mask, btrfs_check_acl); |
| 5202 | } | 5820 | } |
| 5203 | 5821 | ||
| 5204 | static struct inode_operations btrfs_dir_inode_operations = { | 5822 | static const struct inode_operations btrfs_dir_inode_operations = { |
| 5205 | .getattr = btrfs_getattr, | 5823 | .getattr = btrfs_getattr, |
| 5206 | .lookup = btrfs_lookup, | 5824 | .lookup = btrfs_lookup, |
| 5207 | .create = btrfs_create, | 5825 | .create = btrfs_create, |
| @@ -5219,11 +5837,12 @@ static struct inode_operations btrfs_dir_inode_operations = { | |||
| 5219 | .removexattr = btrfs_removexattr, | 5837 | .removexattr = btrfs_removexattr, |
| 5220 | .permission = btrfs_permission, | 5838 | .permission = btrfs_permission, |
| 5221 | }; | 5839 | }; |
| 5222 | static struct inode_operations btrfs_dir_ro_inode_operations = { | 5840 | static const struct inode_operations btrfs_dir_ro_inode_operations = { |
| 5223 | .lookup = btrfs_lookup, | 5841 | .lookup = btrfs_lookup, |
| 5224 | .permission = btrfs_permission, | 5842 | .permission = btrfs_permission, |
| 5225 | }; | 5843 | }; |
| 5226 | static struct file_operations btrfs_dir_file_operations = { | 5844 | |
| 5845 | static const struct file_operations btrfs_dir_file_operations = { | ||
| 5227 | .llseek = generic_file_llseek, | 5846 | .llseek = generic_file_llseek, |
| 5228 | .read = generic_read_dir, | 5847 | .read = generic_read_dir, |
| 5229 | .readdir = btrfs_real_readdir, | 5848 | .readdir = btrfs_real_readdir, |
| @@ -5245,6 +5864,8 @@ static struct extent_io_ops btrfs_extent_io_ops = { | |||
| 5245 | .readpage_io_failed_hook = btrfs_io_failed_hook, | 5864 | .readpage_io_failed_hook = btrfs_io_failed_hook, |
| 5246 | .set_bit_hook = btrfs_set_bit_hook, | 5865 | .set_bit_hook = btrfs_set_bit_hook, |
| 5247 | .clear_bit_hook = btrfs_clear_bit_hook, | 5866 | .clear_bit_hook = btrfs_clear_bit_hook, |
| 5867 | .merge_extent_hook = btrfs_merge_extent_hook, | ||
| 5868 | .split_extent_hook = btrfs_split_extent_hook, | ||
| 5248 | }; | 5869 | }; |
| 5249 | 5870 | ||
| 5250 | /* | 5871 | /* |
| @@ -5259,7 +5880,7 @@ static struct extent_io_ops btrfs_extent_io_ops = { | |||
| 5259 | * | 5880 | * |
| 5260 | * For now we're avoiding this by dropping bmap. | 5881 | * For now we're avoiding this by dropping bmap. |
| 5261 | */ | 5882 | */ |
| 5262 | static struct address_space_operations btrfs_aops = { | 5883 | static const struct address_space_operations btrfs_aops = { |
| 5263 | .readpage = btrfs_readpage, | 5884 | .readpage = btrfs_readpage, |
| 5264 | .writepage = btrfs_writepage, | 5885 | .writepage = btrfs_writepage, |
| 5265 | .writepages = btrfs_writepages, | 5886 | .writepages = btrfs_writepages, |
| @@ -5269,16 +5890,17 @@ static struct address_space_operations btrfs_aops = { | |||
| 5269 | .invalidatepage = btrfs_invalidatepage, | 5890 | .invalidatepage = btrfs_invalidatepage, |
| 5270 | .releasepage = btrfs_releasepage, | 5891 | .releasepage = btrfs_releasepage, |
| 5271 | .set_page_dirty = btrfs_set_page_dirty, | 5892 | .set_page_dirty = btrfs_set_page_dirty, |
| 5893 | .error_remove_page = generic_error_remove_page, | ||
| 5272 | }; | 5894 | }; |
| 5273 | 5895 | ||
| 5274 | static struct address_space_operations btrfs_symlink_aops = { | 5896 | static const struct address_space_operations btrfs_symlink_aops = { |
| 5275 | .readpage = btrfs_readpage, | 5897 | .readpage = btrfs_readpage, |
| 5276 | .writepage = btrfs_writepage, | 5898 | .writepage = btrfs_writepage, |
| 5277 | .invalidatepage = btrfs_invalidatepage, | 5899 | .invalidatepage = btrfs_invalidatepage, |
| 5278 | .releasepage = btrfs_releasepage, | 5900 | .releasepage = btrfs_releasepage, |
| 5279 | }; | 5901 | }; |
| 5280 | 5902 | ||
| 5281 | static struct inode_operations btrfs_file_inode_operations = { | 5903 | static const struct inode_operations btrfs_file_inode_operations = { |
| 5282 | .truncate = btrfs_truncate, | 5904 | .truncate = btrfs_truncate, |
| 5283 | .getattr = btrfs_getattr, | 5905 | .getattr = btrfs_getattr, |
| 5284 | .setattr = btrfs_setattr, | 5906 | .setattr = btrfs_setattr, |
| @@ -5290,7 +5912,7 @@ static struct inode_operations btrfs_file_inode_operations = { | |||
| 5290 | .fallocate = btrfs_fallocate, | 5912 | .fallocate = btrfs_fallocate, |
| 5291 | .fiemap = btrfs_fiemap, | 5913 | .fiemap = btrfs_fiemap, |
| 5292 | }; | 5914 | }; |
| 5293 | static struct inode_operations btrfs_special_inode_operations = { | 5915 | static const struct inode_operations btrfs_special_inode_operations = { |
| 5294 | .getattr = btrfs_getattr, | 5916 | .getattr = btrfs_getattr, |
| 5295 | .setattr = btrfs_setattr, | 5917 | .setattr = btrfs_setattr, |
| 5296 | .permission = btrfs_permission, | 5918 | .permission = btrfs_permission, |
| @@ -5299,7 +5921,7 @@ static struct inode_operations btrfs_special_inode_operations = { | |||
| 5299 | .listxattr = btrfs_listxattr, | 5921 | .listxattr = btrfs_listxattr, |
| 5300 | .removexattr = btrfs_removexattr, | 5922 | .removexattr = btrfs_removexattr, |
| 5301 | }; | 5923 | }; |
| 5302 | static struct inode_operations btrfs_symlink_inode_operations = { | 5924 | static const struct inode_operations btrfs_symlink_inode_operations = { |
| 5303 | .readlink = generic_readlink, | 5925 | .readlink = generic_readlink, |
| 5304 | .follow_link = page_follow_link_light, | 5926 | .follow_link = page_follow_link_light, |
| 5305 | .put_link = page_put_link, | 5927 | .put_link = page_put_link, |
| @@ -5309,3 +5931,7 @@ static struct inode_operations btrfs_symlink_inode_operations = { | |||
| 5309 | .listxattr = btrfs_listxattr, | 5931 | .listxattr = btrfs_listxattr, |
| 5310 | .removexattr = btrfs_removexattr, | 5932 | .removexattr = btrfs_removexattr, |
| 5311 | }; | 5933 | }; |
| 5934 | |||
| 5935 | const struct dentry_operations btrfs_dentry_operations = { | ||
| 5936 | .d_delete = btrfs_dentry_delete, | ||
| 5937 | }; | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index bd88f25889f7..cdbb054102b9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -230,8 +230,8 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
| 230 | struct btrfs_root_item root_item; | 230 | struct btrfs_root_item root_item; |
| 231 | struct btrfs_inode_item *inode_item; | 231 | struct btrfs_inode_item *inode_item; |
| 232 | struct extent_buffer *leaf; | 232 | struct extent_buffer *leaf; |
| 233 | struct btrfs_root *new_root = root; | 233 | struct btrfs_root *new_root; |
| 234 | struct inode *dir; | 234 | struct inode *dir = dentry->d_parent->d_inode; |
| 235 | int ret; | 235 | int ret; |
| 236 | int err; | 236 | int err; |
| 237 | u64 objectid; | 237 | u64 objectid; |
| @@ -239,9 +239,15 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
| 239 | u64 index = 0; | 239 | u64 index = 0; |
| 240 | unsigned long nr = 1; | 240 | unsigned long nr = 1; |
| 241 | 241 | ||
| 242 | ret = btrfs_check_metadata_free_space(root); | 242 | /* |
| 243 | * 1 - inode item | ||
| 244 | * 2 - refs | ||
| 245 | * 1 - root item | ||
| 246 | * 2 - dir items | ||
| 247 | */ | ||
| 248 | ret = btrfs_reserve_metadata_space(root, 6); | ||
| 243 | if (ret) | 249 | if (ret) |
| 244 | goto fail_commit; | 250 | return ret; |
| 245 | 251 | ||
| 246 | trans = btrfs_start_transaction(root, 1); | 252 | trans = btrfs_start_transaction(root, 1); |
| 247 | BUG_ON(!trans); | 253 | BUG_ON(!trans); |
| @@ -304,11 +310,17 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
| 304 | if (ret) | 310 | if (ret) |
| 305 | goto fail; | 311 | goto fail; |
| 306 | 312 | ||
| 313 | key.offset = (u64)-1; | ||
| 314 | new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); | ||
| 315 | BUG_ON(IS_ERR(new_root)); | ||
| 316 | |||
| 317 | btrfs_record_root_in_trans(trans, new_root); | ||
| 318 | |||
| 319 | ret = btrfs_create_subvol_root(trans, new_root, new_dirid, | ||
| 320 | BTRFS_I(dir)->block_group); | ||
| 307 | /* | 321 | /* |
| 308 | * insert the directory item | 322 | * insert the directory item |
| 309 | */ | 323 | */ |
| 310 | key.offset = (u64)-1; | ||
| 311 | dir = dentry->d_parent->d_inode; | ||
| 312 | ret = btrfs_set_inode_index(dir, &index); | 324 | ret = btrfs_set_inode_index(dir, &index); |
| 313 | BUG_ON(ret); | 325 | BUG_ON(ret); |
| 314 | 326 | ||
| @@ -322,43 +334,20 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
| 322 | ret = btrfs_update_inode(trans, root, dir); | 334 | ret = btrfs_update_inode(trans, root, dir); |
| 323 | BUG_ON(ret); | 335 | BUG_ON(ret); |
| 324 | 336 | ||
| 325 | /* add the backref first */ | ||
| 326 | ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, | 337 | ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, |
| 327 | objectid, BTRFS_ROOT_BACKREF_KEY, | 338 | objectid, root->root_key.objectid, |
| 328 | root->root_key.objectid, | ||
| 329 | dir->i_ino, index, name, namelen); | 339 | dir->i_ino, index, name, namelen); |
| 330 | 340 | ||
| 331 | BUG_ON(ret); | 341 | BUG_ON(ret); |
| 332 | 342 | ||
| 333 | /* now add the forward ref */ | 343 | d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); |
| 334 | ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, | ||
| 335 | root->root_key.objectid, BTRFS_ROOT_REF_KEY, | ||
| 336 | objectid, | ||
| 337 | dir->i_ino, index, name, namelen); | ||
| 338 | |||
| 339 | BUG_ON(ret); | ||
| 340 | |||
| 341 | ret = btrfs_commit_transaction(trans, root); | ||
| 342 | if (ret) | ||
| 343 | goto fail_commit; | ||
| 344 | |||
| 345 | new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); | ||
| 346 | BUG_ON(!new_root); | ||
| 347 | |||
| 348 | trans = btrfs_start_transaction(new_root, 1); | ||
| 349 | BUG_ON(!trans); | ||
| 350 | |||
| 351 | ret = btrfs_create_subvol_root(trans, new_root, dentry, new_dirid, | ||
| 352 | BTRFS_I(dir)->block_group); | ||
| 353 | if (ret) | ||
| 354 | goto fail; | ||
| 355 | |||
| 356 | fail: | 344 | fail: |
| 357 | nr = trans->blocks_used; | 345 | nr = trans->blocks_used; |
| 358 | err = btrfs_commit_transaction(trans, new_root); | 346 | err = btrfs_commit_transaction(trans, root); |
| 359 | if (err && !ret) | 347 | if (err && !ret) |
| 360 | ret = err; | 348 | ret = err; |
| 361 | fail_commit: | 349 | |
| 350 | btrfs_unreserve_metadata_space(root, 6); | ||
| 362 | btrfs_btree_balance_dirty(root, nr); | 351 | btrfs_btree_balance_dirty(root, nr); |
| 363 | return ret; | 352 | return ret; |
| 364 | } | 353 | } |
| @@ -375,19 +364,27 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
| 375 | if (!root->ref_cows) | 364 | if (!root->ref_cows) |
| 376 | return -EINVAL; | 365 | return -EINVAL; |
| 377 | 366 | ||
| 378 | ret = btrfs_check_metadata_free_space(root); | 367 | /* |
| 368 | * 1 - inode item | ||
| 369 | * 2 - refs | ||
| 370 | * 1 - root item | ||
| 371 | * 2 - dir items | ||
| 372 | */ | ||
| 373 | ret = btrfs_reserve_metadata_space(root, 6); | ||
| 379 | if (ret) | 374 | if (ret) |
| 380 | goto fail_unlock; | 375 | goto fail_unlock; |
| 381 | 376 | ||
| 382 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); | 377 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); |
| 383 | if (!pending_snapshot) { | 378 | if (!pending_snapshot) { |
| 384 | ret = -ENOMEM; | 379 | ret = -ENOMEM; |
| 380 | btrfs_unreserve_metadata_space(root, 6); | ||
| 385 | goto fail_unlock; | 381 | goto fail_unlock; |
| 386 | } | 382 | } |
| 387 | pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); | 383 | pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); |
| 388 | if (!pending_snapshot->name) { | 384 | if (!pending_snapshot->name) { |
| 389 | ret = -ENOMEM; | 385 | ret = -ENOMEM; |
| 390 | kfree(pending_snapshot); | 386 | kfree(pending_snapshot); |
| 387 | btrfs_unreserve_metadata_space(root, 6); | ||
| 391 | goto fail_unlock; | 388 | goto fail_unlock; |
| 392 | } | 389 | } |
| 393 | memcpy(pending_snapshot->name, name, namelen); | 390 | memcpy(pending_snapshot->name, name, namelen); |
| @@ -420,14 +417,15 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) | |||
| 420 | * sys_mkdirat and vfs_mkdir, but we only do a single component lookup | 417 | * sys_mkdirat and vfs_mkdir, but we only do a single component lookup |
| 421 | * inside this filesystem so it's quite a bit simpler. | 418 | * inside this filesystem so it's quite a bit simpler. |
| 422 | */ | 419 | */ |
| 423 | static noinline int btrfs_mksubvol(struct path *parent, char *name, | 420 | static noinline int btrfs_mksubvol(struct path *parent, |
| 424 | int mode, int namelen, | 421 | char *name, int namelen, |
| 425 | struct btrfs_root *snap_src) | 422 | struct btrfs_root *snap_src) |
| 426 | { | 423 | { |
| 424 | struct inode *dir = parent->dentry->d_inode; | ||
| 427 | struct dentry *dentry; | 425 | struct dentry *dentry; |
| 428 | int error; | 426 | int error; |
| 429 | 427 | ||
| 430 | mutex_lock_nested(&parent->dentry->d_inode->i_mutex, I_MUTEX_PARENT); | 428 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); |
| 431 | 429 | ||
| 432 | dentry = lookup_one_len(name, parent->dentry, namelen); | 430 | dentry = lookup_one_len(name, parent->dentry, namelen); |
| 433 | error = PTR_ERR(dentry); | 431 | error = PTR_ERR(dentry); |
| @@ -438,99 +436,39 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, | |||
| 438 | if (dentry->d_inode) | 436 | if (dentry->d_inode) |
| 439 | goto out_dput; | 437 | goto out_dput; |
| 440 | 438 | ||
| 441 | if (!IS_POSIXACL(parent->dentry->d_inode)) | ||
| 442 | mode &= ~current_umask(); | ||
| 443 | |||
| 444 | error = mnt_want_write(parent->mnt); | 439 | error = mnt_want_write(parent->mnt); |
| 445 | if (error) | 440 | if (error) |
| 446 | goto out_dput; | 441 | goto out_dput; |
| 447 | 442 | ||
| 448 | error = btrfs_may_create(parent->dentry->d_inode, dentry); | 443 | error = btrfs_may_create(dir, dentry); |
| 449 | if (error) | 444 | if (error) |
| 450 | goto out_drop_write; | 445 | goto out_drop_write; |
| 451 | 446 | ||
| 452 | /* | 447 | down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); |
| 453 | * Actually perform the low-level subvolume creation after all | 448 | |
| 454 | * this VFS fuzz. | 449 | if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) |
| 455 | * | 450 | goto out_up_read; |
| 456 | * Eventually we want to pass in an inode under which we create this | 451 | |
| 457 | * subvolume, but for now all are under the filesystem root. | ||
| 458 | * | ||
| 459 | * Also we should pass on the mode eventually to allow creating new | ||
| 460 | * subvolume with specific mode bits. | ||
| 461 | */ | ||
| 462 | if (snap_src) { | 452 | if (snap_src) { |
| 463 | struct dentry *dir = dentry->d_parent; | 453 | error = create_snapshot(snap_src, dentry, |
| 464 | struct dentry *test = dir->d_parent; | 454 | name, namelen); |
| 465 | struct btrfs_path *path = btrfs_alloc_path(); | ||
| 466 | int ret; | ||
| 467 | u64 test_oid; | ||
| 468 | u64 parent_oid = BTRFS_I(dir->d_inode)->root->root_key.objectid; | ||
| 469 | |||
| 470 | test_oid = snap_src->root_key.objectid; | ||
| 471 | |||
| 472 | ret = btrfs_find_root_ref(snap_src->fs_info->tree_root, | ||
| 473 | path, parent_oid, test_oid); | ||
| 474 | if (ret == 0) | ||
| 475 | goto create; | ||
| 476 | btrfs_release_path(snap_src->fs_info->tree_root, path); | ||
| 477 | |||
| 478 | /* we need to make sure we aren't creating a directory loop | ||
| 479 | * by taking a snapshot of something that has our current | ||
| 480 | * subvol in its directory tree. So, this loops through | ||
| 481 | * the dentries and checks the forward refs for each subvolume | ||
| 482 | * to see if is references the subvolume where we are | ||
| 483 | * placing this new snapshot. | ||
| 484 | */ | ||
| 485 | while (1) { | ||
| 486 | if (!test || | ||
| 487 | dir == snap_src->fs_info->sb->s_root || | ||
| 488 | test == snap_src->fs_info->sb->s_root || | ||
| 489 | test->d_inode->i_sb != snap_src->fs_info->sb) { | ||
| 490 | break; | ||
| 491 | } | ||
| 492 | if (S_ISLNK(test->d_inode->i_mode)) { | ||
| 493 | printk(KERN_INFO "Btrfs symlink in snapshot " | ||
| 494 | "path, failed\n"); | ||
| 495 | error = -EMLINK; | ||
| 496 | btrfs_free_path(path); | ||
| 497 | goto out_drop_write; | ||
| 498 | } | ||
| 499 | test_oid = | ||
| 500 | BTRFS_I(test->d_inode)->root->root_key.objectid; | ||
| 501 | ret = btrfs_find_root_ref(snap_src->fs_info->tree_root, | ||
| 502 | path, test_oid, parent_oid); | ||
| 503 | if (ret == 0) { | ||
| 504 | printk(KERN_INFO "Btrfs snapshot creation " | ||
| 505 | "failed, looping\n"); | ||
| 506 | error = -EMLINK; | ||
| 507 | btrfs_free_path(path); | ||
| 508 | goto out_drop_write; | ||
| 509 | } | ||
| 510 | btrfs_release_path(snap_src->fs_info->tree_root, path); | ||
| 511 | test = test->d_parent; | ||
| 512 | } | ||
| 513 | create: | ||
| 514 | btrfs_free_path(path); | ||
| 515 | error = create_snapshot(snap_src, dentry, name, namelen); | ||
| 516 | } else { | 455 | } else { |
| 517 | error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root, | 456 | error = create_subvol(BTRFS_I(dir)->root, dentry, |
| 518 | dentry, name, namelen); | 457 | name, namelen); |
| 519 | } | 458 | } |
| 520 | if (error) | 459 | if (!error) |
| 521 | goto out_drop_write; | 460 | fsnotify_mkdir(dir, dentry); |
| 522 | 461 | out_up_read: | |
| 523 | fsnotify_mkdir(parent->dentry->d_inode, dentry); | 462 | up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); |
| 524 | out_drop_write: | 463 | out_drop_write: |
| 525 | mnt_drop_write(parent->mnt); | 464 | mnt_drop_write(parent->mnt); |
| 526 | out_dput: | 465 | out_dput: |
| 527 | dput(dentry); | 466 | dput(dentry); |
| 528 | out_unlock: | 467 | out_unlock: |
| 529 | mutex_unlock(&parent->dentry->d_inode->i_mutex); | 468 | mutex_unlock(&dir->i_mutex); |
| 530 | return error; | 469 | return error; |
| 531 | } | 470 | } |
| 532 | 471 | ||
| 533 | |||
| 534 | static int btrfs_defrag_file(struct file *file) | 472 | static int btrfs_defrag_file(struct file *file) |
| 535 | { | 473 | { |
| 536 | struct inode *inode = fdentry(file)->d_inode; | 474 | struct inode *inode = fdentry(file)->d_inode; |
| @@ -596,9 +534,8 @@ again: | |||
| 596 | clear_page_dirty_for_io(page); | 534 | clear_page_dirty_for_io(page); |
| 597 | 535 | ||
| 598 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 536 | btrfs_set_extent_delalloc(inode, page_start, page_end); |
| 599 | |||
| 600 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 601 | set_page_dirty(page); | 537 | set_page_dirty(page); |
| 538 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 602 | unlock_page(page); | 539 | unlock_page(page); |
| 603 | page_cache_release(page); | 540 | page_cache_release(page); |
| 604 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); | 541 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); |
| @@ -609,7 +546,8 @@ out_unlock: | |||
| 609 | return 0; | 546 | return 0; |
| 610 | } | 547 | } |
| 611 | 548 | ||
| 612 | static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) | 549 | static noinline int btrfs_ioctl_resize(struct btrfs_root *root, |
| 550 | void __user *arg) | ||
| 613 | { | 551 | { |
| 614 | u64 new_size; | 552 | u64 new_size; |
| 615 | u64 old_size; | 553 | u64 old_size; |
| @@ -718,10 +656,7 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
| 718 | { | 656 | { |
| 719 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | 657 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; |
| 720 | struct btrfs_ioctl_vol_args *vol_args; | 658 | struct btrfs_ioctl_vol_args *vol_args; |
| 721 | struct btrfs_dir_item *di; | ||
| 722 | struct btrfs_path *path; | ||
| 723 | struct file *src_file; | 659 | struct file *src_file; |
| 724 | u64 root_dirid; | ||
| 725 | int namelen; | 660 | int namelen; |
| 726 | int ret = 0; | 661 | int ret = 0; |
| 727 | 662 | ||
| @@ -739,32 +674,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
| 739 | goto out; | 674 | goto out; |
| 740 | } | 675 | } |
| 741 | 676 | ||
| 742 | path = btrfs_alloc_path(); | ||
| 743 | if (!path) { | ||
| 744 | ret = -ENOMEM; | ||
| 745 | goto out; | ||
| 746 | } | ||
| 747 | |||
| 748 | root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, | ||
| 749 | di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, | ||
| 750 | path, root_dirid, | ||
| 751 | vol_args->name, namelen, 0); | ||
| 752 | btrfs_free_path(path); | ||
| 753 | |||
| 754 | if (di && !IS_ERR(di)) { | ||
| 755 | ret = -EEXIST; | ||
| 756 | goto out; | ||
| 757 | } | ||
| 758 | |||
| 759 | if (IS_ERR(di)) { | ||
| 760 | ret = PTR_ERR(di); | ||
| 761 | goto out; | ||
| 762 | } | ||
| 763 | |||
| 764 | if (subvol) { | 677 | if (subvol) { |
| 765 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, | 678 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, |
| 766 | file->f_path.dentry->d_inode->i_mode, | 679 | NULL); |
| 767 | namelen, NULL); | ||
| 768 | } else { | 680 | } else { |
| 769 | struct inode *src_inode; | 681 | struct inode *src_inode; |
| 770 | src_file = fget(vol_args->fd); | 682 | src_file = fget(vol_args->fd); |
| @@ -781,17 +693,157 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
| 781 | fput(src_file); | 693 | fput(src_file); |
| 782 | goto out; | 694 | goto out; |
| 783 | } | 695 | } |
| 784 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, | 696 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, |
| 785 | file->f_path.dentry->d_inode->i_mode, | 697 | BTRFS_I(src_inode)->root); |
| 786 | namelen, BTRFS_I(src_inode)->root); | ||
| 787 | fput(src_file); | 698 | fput(src_file); |
| 788 | } | 699 | } |
| 789 | |||
| 790 | out: | 700 | out: |
| 791 | kfree(vol_args); | 701 | kfree(vol_args); |
| 792 | return ret; | 702 | return ret; |
| 793 | } | 703 | } |
| 794 | 704 | ||
| 705 | /* | ||
| 706 | * helper to check if the subvolume references other subvolumes | ||
| 707 | */ | ||
| 708 | static noinline int may_destroy_subvol(struct btrfs_root *root) | ||
| 709 | { | ||
| 710 | struct btrfs_path *path; | ||
| 711 | struct btrfs_key key; | ||
| 712 | int ret; | ||
| 713 | |||
| 714 | path = btrfs_alloc_path(); | ||
| 715 | if (!path) | ||
| 716 | return -ENOMEM; | ||
| 717 | |||
| 718 | key.objectid = root->root_key.objectid; | ||
| 719 | key.type = BTRFS_ROOT_REF_KEY; | ||
| 720 | key.offset = (u64)-1; | ||
| 721 | |||
| 722 | ret = btrfs_search_slot(NULL, root->fs_info->tree_root, | ||
| 723 | &key, path, 0, 0); | ||
| 724 | if (ret < 0) | ||
| 725 | goto out; | ||
| 726 | BUG_ON(ret == 0); | ||
| 727 | |||
| 728 | ret = 0; | ||
| 729 | if (path->slots[0] > 0) { | ||
| 730 | path->slots[0]--; | ||
| 731 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | ||
| 732 | if (key.objectid == root->root_key.objectid && | ||
| 733 | key.type == BTRFS_ROOT_REF_KEY) | ||
| 734 | ret = -ENOTEMPTY; | ||
| 735 | } | ||
| 736 | out: | ||
| 737 | btrfs_free_path(path); | ||
| 738 | return ret; | ||
| 739 | } | ||
| 740 | |||
| 741 | static noinline int btrfs_ioctl_snap_destroy(struct file *file, | ||
| 742 | void __user *arg) | ||
| 743 | { | ||
| 744 | struct dentry *parent = fdentry(file); | ||
| 745 | struct dentry *dentry; | ||
| 746 | struct inode *dir = parent->d_inode; | ||
| 747 | struct inode *inode; | ||
| 748 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
| 749 | struct btrfs_root *dest = NULL; | ||
| 750 | struct btrfs_ioctl_vol_args *vol_args; | ||
| 751 | struct btrfs_trans_handle *trans; | ||
| 752 | int namelen; | ||
| 753 | int ret; | ||
| 754 | int err = 0; | ||
| 755 | |||
| 756 | if (!capable(CAP_SYS_ADMIN)) | ||
| 757 | return -EPERM; | ||
| 758 | |||
| 759 | vol_args = memdup_user(arg, sizeof(*vol_args)); | ||
| 760 | if (IS_ERR(vol_args)) | ||
| 761 | return PTR_ERR(vol_args); | ||
| 762 | |||
| 763 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
| 764 | namelen = strlen(vol_args->name); | ||
| 765 | if (strchr(vol_args->name, '/') || | ||
| 766 | strncmp(vol_args->name, "..", namelen) == 0) { | ||
| 767 | err = -EINVAL; | ||
| 768 | goto out; | ||
| 769 | } | ||
| 770 | |||
| 771 | err = mnt_want_write(file->f_path.mnt); | ||
| 772 | if (err) | ||
| 773 | goto out; | ||
| 774 | |||
| 775 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | ||
| 776 | dentry = lookup_one_len(vol_args->name, parent, namelen); | ||
| 777 | if (IS_ERR(dentry)) { | ||
| 778 | err = PTR_ERR(dentry); | ||
| 779 | goto out_unlock_dir; | ||
| 780 | } | ||
| 781 | |||
| 782 | if (!dentry->d_inode) { | ||
| 783 | err = -ENOENT; | ||
| 784 | goto out_dput; | ||
| 785 | } | ||
| 786 | |||
| 787 | inode = dentry->d_inode; | ||
| 788 | if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { | ||
| 789 | err = -EINVAL; | ||
| 790 | goto out_dput; | ||
| 791 | } | ||
| 792 | |||
| 793 | dest = BTRFS_I(inode)->root; | ||
| 794 | |||
| 795 | mutex_lock(&inode->i_mutex); | ||
| 796 | err = d_invalidate(dentry); | ||
| 797 | if (err) | ||
| 798 | goto out_unlock; | ||
| 799 | |||
| 800 | down_write(&root->fs_info->subvol_sem); | ||
| 801 | |||
| 802 | err = may_destroy_subvol(dest); | ||
| 803 | if (err) | ||
| 804 | goto out_up_write; | ||
| 805 | |||
| 806 | trans = btrfs_start_transaction(root, 1); | ||
| 807 | ret = btrfs_unlink_subvol(trans, root, dir, | ||
| 808 | dest->root_key.objectid, | ||
| 809 | dentry->d_name.name, | ||
| 810 | dentry->d_name.len); | ||
| 811 | BUG_ON(ret); | ||
| 812 | |||
| 813 | btrfs_record_root_in_trans(trans, dest); | ||
| 814 | |||
| 815 | memset(&dest->root_item.drop_progress, 0, | ||
| 816 | sizeof(dest->root_item.drop_progress)); | ||
| 817 | dest->root_item.drop_level = 0; | ||
| 818 | btrfs_set_root_refs(&dest->root_item, 0); | ||
| 819 | |||
| 820 | ret = btrfs_insert_orphan_item(trans, | ||
| 821 | root->fs_info->tree_root, | ||
| 822 | dest->root_key.objectid); | ||
| 823 | BUG_ON(ret); | ||
| 824 | |||
| 825 | ret = btrfs_commit_transaction(trans, root); | ||
| 826 | BUG_ON(ret); | ||
| 827 | inode->i_flags |= S_DEAD; | ||
| 828 | out_up_write: | ||
| 829 | up_write(&root->fs_info->subvol_sem); | ||
| 830 | out_unlock: | ||
| 831 | mutex_unlock(&inode->i_mutex); | ||
| 832 | if (!err) { | ||
| 833 | shrink_dcache_sb(root->fs_info->sb); | ||
| 834 | btrfs_invalidate_inodes(dest); | ||
| 835 | d_delete(dentry); | ||
| 836 | } | ||
| 837 | out_dput: | ||
| 838 | dput(dentry); | ||
| 839 | out_unlock_dir: | ||
| 840 | mutex_unlock(&dir->i_mutex); | ||
| 841 | mnt_drop_write(file->f_path.mnt); | ||
| 842 | out: | ||
| 843 | kfree(vol_args); | ||
| 844 | return err; | ||
| 845 | } | ||
| 846 | |||
| 795 | static int btrfs_ioctl_defrag(struct file *file) | 847 | static int btrfs_ioctl_defrag(struct file *file) |
| 796 | { | 848 | { |
| 797 | struct inode *inode = fdentry(file)->d_inode; | 849 | struct inode *inode = fdentry(file)->d_inode; |
| @@ -865,8 +917,8 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) | |||
| 865 | return ret; | 917 | return ret; |
| 866 | } | 918 | } |
| 867 | 919 | ||
| 868 | static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | 920 | static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, |
| 869 | u64 off, u64 olen, u64 destoff) | 921 | u64 off, u64 olen, u64 destoff) |
| 870 | { | 922 | { |
| 871 | struct inode *inode = fdentry(file)->d_inode; | 923 | struct inode *inode = fdentry(file)->d_inode; |
| 872 | struct btrfs_root *root = BTRFS_I(inode)->root; | 924 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| @@ -976,7 +1028,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 976 | 1028 | ||
| 977 | /* punch hole in destination first */ | 1029 | /* punch hole in destination first */ |
| 978 | btrfs_drop_extents(trans, root, inode, off, off + len, | 1030 | btrfs_drop_extents(trans, root, inode, off, off + len, |
| 979 | off + len, 0, &hint_byte); | 1031 | off + len, 0, &hint_byte, 1); |
| 980 | 1032 | ||
| 981 | /* clone data */ | 1033 | /* clone data */ |
| 982 | key.objectid = src->i_ino; | 1034 | key.objectid = src->i_ino; |
| @@ -1071,9 +1123,10 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 1071 | datao += off - key.offset; | 1123 | datao += off - key.offset; |
| 1072 | datal -= off - key.offset; | 1124 | datal -= off - key.offset; |
| 1073 | } | 1125 | } |
| 1074 | if (key.offset + datao + datal + key.offset > | 1126 | |
| 1075 | off + len) | 1127 | if (key.offset + datal > off + len) |
| 1076 | datal = off + len - key.offset - datao; | 1128 | datal = off + len - key.offset; |
| 1129 | |||
| 1077 | /* disko == 0 means it's a hole */ | 1130 | /* disko == 0 means it's a hole */ |
| 1078 | if (!disko) | 1131 | if (!disko) |
| 1079 | datao = 0; | 1132 | datao = 0; |
| @@ -1182,15 +1235,15 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
| 1182 | struct inode *inode = fdentry(file)->d_inode; | 1235 | struct inode *inode = fdentry(file)->d_inode; |
| 1183 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1236 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1184 | struct btrfs_trans_handle *trans; | 1237 | struct btrfs_trans_handle *trans; |
| 1185 | int ret = 0; | 1238 | int ret; |
| 1186 | 1239 | ||
| 1240 | ret = -EPERM; | ||
| 1187 | if (!capable(CAP_SYS_ADMIN)) | 1241 | if (!capable(CAP_SYS_ADMIN)) |
| 1188 | return -EPERM; | 1242 | goto out; |
| 1189 | 1243 | ||
| 1190 | if (file->private_data) { | 1244 | ret = -EINPROGRESS; |
| 1191 | ret = -EINPROGRESS; | 1245 | if (file->private_data) |
| 1192 | goto out; | 1246 | goto out; |
| 1193 | } | ||
| 1194 | 1247 | ||
| 1195 | ret = mnt_want_write(file->f_path.mnt); | 1248 | ret = mnt_want_write(file->f_path.mnt); |
| 1196 | if (ret) | 1249 | if (ret) |
| @@ -1200,12 +1253,19 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
| 1200 | root->fs_info->open_ioctl_trans++; | 1253 | root->fs_info->open_ioctl_trans++; |
| 1201 | mutex_unlock(&root->fs_info->trans_mutex); | 1254 | mutex_unlock(&root->fs_info->trans_mutex); |
| 1202 | 1255 | ||
| 1256 | ret = -ENOMEM; | ||
| 1203 | trans = btrfs_start_ioctl_transaction(root, 0); | 1257 | trans = btrfs_start_ioctl_transaction(root, 0); |
| 1204 | if (trans) | 1258 | if (!trans) |
| 1205 | file->private_data = trans; | 1259 | goto out_drop; |
| 1206 | else | 1260 | |
| 1207 | ret = -ENOMEM; | 1261 | file->private_data = trans; |
| 1208 | /*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/ | 1262 | return 0; |
| 1263 | |||
| 1264 | out_drop: | ||
| 1265 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 1266 | root->fs_info->open_ioctl_trans--; | ||
| 1267 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 1268 | mnt_drop_write(file->f_path.mnt); | ||
| 1209 | out: | 1269 | out: |
| 1210 | return ret; | 1270 | return ret; |
| 1211 | } | 1271 | } |
| @@ -1221,24 +1281,20 @@ long btrfs_ioctl_trans_end(struct file *file) | |||
| 1221 | struct inode *inode = fdentry(file)->d_inode; | 1281 | struct inode *inode = fdentry(file)->d_inode; |
| 1222 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1282 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1223 | struct btrfs_trans_handle *trans; | 1283 | struct btrfs_trans_handle *trans; |
| 1224 | int ret = 0; | ||
| 1225 | 1284 | ||
| 1226 | trans = file->private_data; | 1285 | trans = file->private_data; |
| 1227 | if (!trans) { | 1286 | if (!trans) |
| 1228 | ret = -EINVAL; | 1287 | return -EINVAL; |
| 1229 | goto out; | ||
| 1230 | } | ||
| 1231 | btrfs_end_transaction(trans, root); | ||
| 1232 | file->private_data = NULL; | 1288 | file->private_data = NULL; |
| 1233 | 1289 | ||
| 1290 | btrfs_end_transaction(trans, root); | ||
| 1291 | |||
| 1234 | mutex_lock(&root->fs_info->trans_mutex); | 1292 | mutex_lock(&root->fs_info->trans_mutex); |
| 1235 | root->fs_info->open_ioctl_trans--; | 1293 | root->fs_info->open_ioctl_trans--; |
| 1236 | mutex_unlock(&root->fs_info->trans_mutex); | 1294 | mutex_unlock(&root->fs_info->trans_mutex); |
| 1237 | 1295 | ||
| 1238 | mnt_drop_write(file->f_path.mnt); | 1296 | mnt_drop_write(file->f_path.mnt); |
| 1239 | 1297 | return 0; | |
| 1240 | out: | ||
| 1241 | return ret; | ||
| 1242 | } | 1298 | } |
| 1243 | 1299 | ||
| 1244 | long btrfs_ioctl(struct file *file, unsigned int | 1300 | long btrfs_ioctl(struct file *file, unsigned int |
| @@ -1258,6 +1314,8 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 1258 | return btrfs_ioctl_snap_create(file, argp, 0); | 1314 | return btrfs_ioctl_snap_create(file, argp, 0); |
| 1259 | case BTRFS_IOC_SUBVOL_CREATE: | 1315 | case BTRFS_IOC_SUBVOL_CREATE: |
| 1260 | return btrfs_ioctl_snap_create(file, argp, 1); | 1316 | return btrfs_ioctl_snap_create(file, argp, 1); |
| 1317 | case BTRFS_IOC_SNAP_DESTROY: | ||
| 1318 | return btrfs_ioctl_snap_destroy(file, argp); | ||
| 1261 | case BTRFS_IOC_DEFRAG: | 1319 | case BTRFS_IOC_DEFRAG: |
| 1262 | return btrfs_ioctl_defrag(file); | 1320 | return btrfs_ioctl_defrag(file); |
| 1263 | case BTRFS_IOC_RESIZE: | 1321 | case BTRFS_IOC_RESIZE: |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index b320b103fa13..bc49914475eb 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
| @@ -65,5 +65,6 @@ struct btrfs_ioctl_clone_range_args { | |||
| 65 | 65 | ||
| 66 | #define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ | 66 | #define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ |
| 67 | struct btrfs_ioctl_vol_args) | 67 | struct btrfs_ioctl_vol_args) |
| 68 | 68 | #define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ | |
| 69 | struct btrfs_ioctl_vol_args) | ||
| 69 | #endif | 70 | #endif |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 7b2f401e604e..5799bc46a309 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
| @@ -159,8 +159,6 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
| 159 | * | 159 | * |
| 160 | * len is the length of the extent | 160 | * len is the length of the extent |
| 161 | * | 161 | * |
| 162 | * This also sets the EXTENT_ORDERED bit on the range in the inode. | ||
| 163 | * | ||
| 164 | * The tree is given a single reference on the ordered extent that was | 162 | * The tree is given a single reference on the ordered extent that was |
| 165 | * inserted. | 163 | * inserted. |
| 166 | */ | 164 | */ |
| @@ -181,6 +179,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 181 | entry->start = start; | 179 | entry->start = start; |
| 182 | entry->len = len; | 180 | entry->len = len; |
| 183 | entry->disk_len = disk_len; | 181 | entry->disk_len = disk_len; |
| 182 | entry->bytes_left = len; | ||
| 184 | entry->inode = inode; | 183 | entry->inode = inode; |
| 185 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 184 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
| 186 | set_bit(type, &entry->flags); | 185 | set_bit(type, &entry->flags); |
| @@ -195,9 +194,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 195 | &entry->rb_node); | 194 | &entry->rb_node); |
| 196 | BUG_ON(node); | 195 | BUG_ON(node); |
| 197 | 196 | ||
| 198 | set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, | ||
| 199 | entry_end(entry) - 1, GFP_NOFS); | ||
| 200 | |||
| 201 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 197 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
| 202 | list_add_tail(&entry->root_extent_list, | 198 | list_add_tail(&entry->root_extent_list, |
| 203 | &BTRFS_I(inode)->root->fs_info->ordered_extents); | 199 | &BTRFS_I(inode)->root->fs_info->ordered_extents); |
| @@ -241,13 +237,10 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
| 241 | struct btrfs_ordered_inode_tree *tree; | 237 | struct btrfs_ordered_inode_tree *tree; |
| 242 | struct rb_node *node; | 238 | struct rb_node *node; |
| 243 | struct btrfs_ordered_extent *entry; | 239 | struct btrfs_ordered_extent *entry; |
| 244 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 245 | int ret; | 240 | int ret; |
| 246 | 241 | ||
| 247 | tree = &BTRFS_I(inode)->ordered_tree; | 242 | tree = &BTRFS_I(inode)->ordered_tree; |
| 248 | mutex_lock(&tree->mutex); | 243 | mutex_lock(&tree->mutex); |
| 249 | clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1, | ||
| 250 | GFP_NOFS); | ||
| 251 | node = tree_search(tree, file_offset); | 244 | node = tree_search(tree, file_offset); |
| 252 | if (!node) { | 245 | if (!node) { |
| 253 | ret = 1; | 246 | ret = 1; |
| @@ -260,11 +253,16 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
| 260 | goto out; | 253 | goto out; |
| 261 | } | 254 | } |
| 262 | 255 | ||
| 263 | ret = test_range_bit(io_tree, entry->file_offset, | 256 | if (io_size > entry->bytes_left) { |
| 264 | entry->file_offset + entry->len - 1, | 257 | printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", |
| 265 | EXTENT_ORDERED, 0); | 258 | (unsigned long long)entry->bytes_left, |
| 266 | if (ret == 0) | 259 | (unsigned long long)io_size); |
| 260 | } | ||
| 261 | entry->bytes_left -= io_size; | ||
| 262 | if (entry->bytes_left == 0) | ||
| 267 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 263 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
| 264 | else | ||
| 265 | ret = 1; | ||
| 268 | out: | 266 | out: |
| 269 | mutex_unlock(&tree->mutex); | 267 | mutex_unlock(&tree->mutex); |
| 270 | return ret == 0; | 268 | return ret == 0; |
| @@ -308,6 +306,12 @@ int btrfs_remove_ordered_extent(struct inode *inode, | |||
| 308 | tree->last = NULL; | 306 | tree->last = NULL; |
| 309 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); | 307 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); |
| 310 | 308 | ||
| 309 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 310 | BTRFS_I(inode)->outstanding_extents--; | ||
| 311 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 312 | btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root, | ||
| 313 | inode, 1); | ||
| 314 | |||
| 311 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 315 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
| 312 | list_del_init(&entry->root_extent_list); | 316 | list_del_init(&entry->root_extent_list); |
| 313 | 317 | ||
| @@ -460,7 +464,7 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
| 460 | * start IO on any dirty ones so the wait doesn't stall waiting | 464 | * start IO on any dirty ones so the wait doesn't stall waiting |
| 461 | * for pdflush to find them | 465 | * for pdflush to find them |
| 462 | */ | 466 | */ |
| 463 | btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_ALL); | 467 | filemap_fdatawrite_range(inode->i_mapping, start, end); |
| 464 | if (wait) { | 468 | if (wait) { |
| 465 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, | 469 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, |
| 466 | &entry->flags)); | 470 | &entry->flags)); |
| @@ -476,6 +480,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
| 476 | u64 orig_end; | 480 | u64 orig_end; |
| 477 | u64 wait_end; | 481 | u64 wait_end; |
| 478 | struct btrfs_ordered_extent *ordered; | 482 | struct btrfs_ordered_extent *ordered; |
| 483 | int found; | ||
| 479 | 484 | ||
| 480 | if (start + len < start) { | 485 | if (start + len < start) { |
| 481 | orig_end = INT_LIMIT(loff_t); | 486 | orig_end = INT_LIMIT(loff_t); |
| @@ -489,19 +494,18 @@ again: | |||
| 489 | /* start IO across the range first to instantiate any delalloc | 494 | /* start IO across the range first to instantiate any delalloc |
| 490 | * extents | 495 | * extents |
| 491 | */ | 496 | */ |
| 492 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); | 497 | filemap_fdatawrite_range(inode->i_mapping, start, orig_end); |
| 493 | 498 | ||
| 494 | /* The compression code will leave pages locked but return from | 499 | /* The compression code will leave pages locked but return from |
| 495 | * writepage without setting the page writeback. Starting again | 500 | * writepage without setting the page writeback. Starting again |
| 496 | * with WB_SYNC_ALL will end up waiting for the IO to actually start. | 501 | * with WB_SYNC_ALL will end up waiting for the IO to actually start. |
| 497 | */ | 502 | */ |
| 498 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); | 503 | filemap_fdatawrite_range(inode->i_mapping, start, orig_end); |
| 499 | 504 | ||
| 500 | btrfs_wait_on_page_writeback_range(inode->i_mapping, | 505 | filemap_fdatawait_range(inode->i_mapping, start, orig_end); |
| 501 | start >> PAGE_CACHE_SHIFT, | ||
| 502 | orig_end >> PAGE_CACHE_SHIFT); | ||
| 503 | 506 | ||
| 504 | end = orig_end; | 507 | end = orig_end; |
| 508 | found = 0; | ||
| 505 | while (1) { | 509 | while (1) { |
| 506 | ordered = btrfs_lookup_first_ordered_extent(inode, end); | 510 | ordered = btrfs_lookup_first_ordered_extent(inode, end); |
| 507 | if (!ordered) | 511 | if (!ordered) |
| @@ -514,6 +518,7 @@ again: | |||
| 514 | btrfs_put_ordered_extent(ordered); | 518 | btrfs_put_ordered_extent(ordered); |
| 515 | break; | 519 | break; |
| 516 | } | 520 | } |
| 521 | found++; | ||
| 517 | btrfs_start_ordered_extent(inode, ordered, 1); | 522 | btrfs_start_ordered_extent(inode, ordered, 1); |
| 518 | end = ordered->file_offset; | 523 | end = ordered->file_offset; |
| 519 | btrfs_put_ordered_extent(ordered); | 524 | btrfs_put_ordered_extent(ordered); |
| @@ -521,8 +526,8 @@ again: | |||
| 521 | break; | 526 | break; |
| 522 | end--; | 527 | end--; |
| 523 | } | 528 | } |
| 524 | if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, | 529 | if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, |
| 525 | EXTENT_ORDERED | EXTENT_DELALLOC, 0)) { | 530 | EXTENT_DELALLOC, 0, NULL)) { |
| 526 | schedule_timeout(1); | 531 | schedule_timeout(1); |
| 527 | goto again; | 532 | goto again; |
| 528 | } | 533 | } |
| @@ -613,7 +618,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, | |||
| 613 | */ | 618 | */ |
| 614 | if (test_range_bit(io_tree, disk_i_size, | 619 | if (test_range_bit(io_tree, disk_i_size, |
| 615 | ordered->file_offset + ordered->len - 1, | 620 | ordered->file_offset + ordered->len - 1, |
| 616 | EXTENT_DELALLOC, 0)) { | 621 | EXTENT_DELALLOC, 0, NULL)) { |
| 617 | goto out; | 622 | goto out; |
| 618 | } | 623 | } |
| 619 | /* | 624 | /* |
| @@ -664,7 +669,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, | |||
| 664 | */ | 669 | */ |
| 665 | if (i_size_test > entry_end(ordered) && | 670 | if (i_size_test > entry_end(ordered) && |
| 666 | !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, | 671 | !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, |
| 667 | EXTENT_DELALLOC, 0)) { | 672 | EXTENT_DELALLOC, 0, NULL)) { |
| 668 | new_i_size = min_t(u64, i_size_test, i_size_read(inode)); | 673 | new_i_size = min_t(u64, i_size_test, i_size_read(inode)); |
| 669 | } | 674 | } |
| 670 | BTRFS_I(inode)->disk_i_size = new_i_size; | 675 | BTRFS_I(inode)->disk_i_size = new_i_size; |
| @@ -715,89 +720,6 @@ out: | |||
| 715 | } | 720 | } |
| 716 | 721 | ||
| 717 | 722 | ||
| 718 | /** | ||
| 719 | * taken from mm/filemap.c because it isn't exported | ||
| 720 | * | ||
| 721 | * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range | ||
| 722 | * @mapping: address space structure to write | ||
| 723 | * @start: offset in bytes where the range starts | ||
| 724 | * @end: offset in bytes where the range ends (inclusive) | ||
| 725 | * @sync_mode: enable synchronous operation | ||
| 726 | * | ||
| 727 | * Start writeback against all of a mapping's dirty pages that lie | ||
| 728 | * within the byte offsets <start, end> inclusive. | ||
| 729 | * | ||
| 730 | * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as | ||
| 731 | * opposed to a regular memory cleansing writeback. The difference between | ||
| 732 | * these two operations is that if a dirty page/buffer is encountered, it must | ||
| 733 | * be waited upon, and not just skipped over. | ||
| 734 | */ | ||
| 735 | int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, | ||
| 736 | loff_t end, int sync_mode) | ||
| 737 | { | ||
| 738 | struct writeback_control wbc = { | ||
| 739 | .sync_mode = sync_mode, | ||
| 740 | .nr_to_write = mapping->nrpages * 2, | ||
| 741 | .range_start = start, | ||
| 742 | .range_end = end, | ||
| 743 | }; | ||
| 744 | return btrfs_writepages(mapping, &wbc); | ||
| 745 | } | ||
| 746 | |||
| 747 | /** | ||
| 748 | * taken from mm/filemap.c because it isn't exported | ||
| 749 | * | ||
| 750 | * wait_on_page_writeback_range - wait for writeback to complete | ||
| 751 | * @mapping: target address_space | ||
| 752 | * @start: beginning page index | ||
| 753 | * @end: ending page index | ||
| 754 | * | ||
| 755 | * Wait for writeback to complete against pages indexed by start->end | ||
| 756 | * inclusive | ||
| 757 | */ | ||
| 758 | int btrfs_wait_on_page_writeback_range(struct address_space *mapping, | ||
| 759 | pgoff_t start, pgoff_t end) | ||
| 760 | { | ||
| 761 | struct pagevec pvec; | ||
| 762 | int nr_pages; | ||
| 763 | int ret = 0; | ||
| 764 | pgoff_t index; | ||
| 765 | |||
| 766 | if (end < start) | ||
| 767 | return 0; | ||
| 768 | |||
| 769 | pagevec_init(&pvec, 0); | ||
| 770 | index = start; | ||
| 771 | while ((index <= end) && | ||
| 772 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
| 773 | PAGECACHE_TAG_WRITEBACK, | ||
| 774 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) { | ||
| 775 | unsigned i; | ||
| 776 | |||
| 777 | for (i = 0; i < nr_pages; i++) { | ||
| 778 | struct page *page = pvec.pages[i]; | ||
| 779 | |||
| 780 | /* until radix tree lookup accepts end_index */ | ||
| 781 | if (page->index > end) | ||
| 782 | continue; | ||
| 783 | |||
| 784 | wait_on_page_writeback(page); | ||
| 785 | if (PageError(page)) | ||
| 786 | ret = -EIO; | ||
| 787 | } | ||
| 788 | pagevec_release(&pvec); | ||
| 789 | cond_resched(); | ||
| 790 | } | ||
| 791 | |||
| 792 | /* Check for outstanding write errors */ | ||
| 793 | if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) | ||
| 794 | ret = -ENOSPC; | ||
| 795 | if (test_and_clear_bit(AS_EIO, &mapping->flags)) | ||
| 796 | ret = -EIO; | ||
| 797 | |||
| 798 | return ret; | ||
| 799 | } | ||
| 800 | |||
| 801 | /* | 723 | /* |
| 802 | * add a given inode to the list of inodes that must be fully on | 724 | * add a given inode to the list of inodes that must be fully on |
| 803 | * disk before a transaction commit finishes. | 725 | * disk before a transaction commit finishes. |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 3d31c8827b01..f82e87488ca8 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
| @@ -85,6 +85,9 @@ struct btrfs_ordered_extent { | |||
| 85 | /* extent length on disk */ | 85 | /* extent length on disk */ |
| 86 | u64 disk_len; | 86 | u64 disk_len; |
| 87 | 87 | ||
| 88 | /* number of bytes that still need writing */ | ||
| 89 | u64 bytes_left; | ||
| 90 | |||
| 88 | /* flags (described above) */ | 91 | /* flags (described above) */ |
| 89 | unsigned long flags; | 92 | unsigned long flags; |
| 90 | 93 | ||
| @@ -150,10 +153,6 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); | |||
| 150 | int btrfs_ordered_update_i_size(struct inode *inode, | 153 | int btrfs_ordered_update_i_size(struct inode *inode, |
| 151 | struct btrfs_ordered_extent *ordered); | 154 | struct btrfs_ordered_extent *ordered); |
| 152 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); | 155 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); |
| 153 | int btrfs_wait_on_page_writeback_range(struct address_space *mapping, | ||
| 154 | pgoff_t start, pgoff_t end); | ||
| 155 | int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, | ||
| 156 | loff_t end, int sync_mode); | ||
| 157 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); | 156 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); |
| 158 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); | 157 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); |
| 159 | int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | 158 | int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c index 3c0d52af4f80..79cba5fbc28e 100644 --- a/fs/btrfs/orphan.c +++ b/fs/btrfs/orphan.c | |||
| @@ -65,3 +65,23 @@ out: | |||
| 65 | btrfs_free_path(path); | 65 | btrfs_free_path(path); |
| 66 | return ret; | 66 | return ret; |
| 67 | } | 67 | } |
| 68 | |||
| 69 | int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset) | ||
| 70 | { | ||
| 71 | struct btrfs_path *path; | ||
| 72 | struct btrfs_key key; | ||
| 73 | int ret; | ||
| 74 | |||
| 75 | key.objectid = BTRFS_ORPHAN_OBJECTID; | ||
| 76 | key.type = BTRFS_ORPHAN_ITEM_KEY; | ||
| 77 | key.offset = offset; | ||
| 78 | |||
| 79 | path = btrfs_alloc_path(); | ||
| 80 | if (!path) | ||
| 81 | return -ENOMEM; | ||
| 82 | |||
| 83 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 84 | |||
| 85 | btrfs_free_path(path); | ||
| 86 | return ret; | ||
| 87 | } | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index c04f7f212602..cfcc93c93a7b 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
| @@ -121,6 +121,15 @@ struct inodevec { | |||
| 121 | int nr; | 121 | int nr; |
| 122 | }; | 122 | }; |
| 123 | 123 | ||
| 124 | #define MAX_EXTENTS 128 | ||
| 125 | |||
| 126 | struct file_extent_cluster { | ||
| 127 | u64 start; | ||
| 128 | u64 end; | ||
| 129 | u64 boundary[MAX_EXTENTS]; | ||
| 130 | unsigned int nr; | ||
| 131 | }; | ||
| 132 | |||
| 124 | struct reloc_control { | 133 | struct reloc_control { |
| 125 | /* block group to relocate */ | 134 | /* block group to relocate */ |
| 126 | struct btrfs_block_group_cache *block_group; | 135 | struct btrfs_block_group_cache *block_group; |
| @@ -2180,7 +2189,7 @@ static int tree_block_processed(u64 bytenr, u32 blocksize, | |||
| 2180 | struct reloc_control *rc) | 2189 | struct reloc_control *rc) |
| 2181 | { | 2190 | { |
| 2182 | if (test_range_bit(&rc->processed_blocks, bytenr, | 2191 | if (test_range_bit(&rc->processed_blocks, bytenr, |
| 2183 | bytenr + blocksize - 1, EXTENT_DIRTY, 1)) | 2192 | bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) |
| 2184 | return 1; | 2193 | return 1; |
| 2185 | return 0; | 2194 | return 0; |
| 2186 | } | 2195 | } |
| @@ -2529,56 +2538,94 @@ out: | |||
| 2529 | } | 2538 | } |
| 2530 | 2539 | ||
| 2531 | static noinline_for_stack | 2540 | static noinline_for_stack |
| 2532 | int relocate_inode_pages(struct inode *inode, u64 start, u64 len) | 2541 | int setup_extent_mapping(struct inode *inode, u64 start, u64 end, |
| 2542 | u64 block_start) | ||
| 2543 | { | ||
| 2544 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 2545 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 2546 | struct extent_map *em; | ||
| 2547 | int ret = 0; | ||
| 2548 | |||
| 2549 | em = alloc_extent_map(GFP_NOFS); | ||
| 2550 | if (!em) | ||
| 2551 | return -ENOMEM; | ||
| 2552 | |||
| 2553 | em->start = start; | ||
| 2554 | em->len = end + 1 - start; | ||
| 2555 | em->block_len = em->len; | ||
| 2556 | em->block_start = block_start; | ||
| 2557 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 2558 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
| 2559 | |||
| 2560 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
| 2561 | while (1) { | ||
| 2562 | write_lock(&em_tree->lock); | ||
| 2563 | ret = add_extent_mapping(em_tree, em); | ||
| 2564 | write_unlock(&em_tree->lock); | ||
| 2565 | if (ret != -EEXIST) { | ||
| 2566 | free_extent_map(em); | ||
| 2567 | break; | ||
| 2568 | } | ||
| 2569 | btrfs_drop_extent_cache(inode, start, end, 0); | ||
| 2570 | } | ||
| 2571 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
| 2572 | return ret; | ||
| 2573 | } | ||
| 2574 | |||
| 2575 | static int relocate_file_extent_cluster(struct inode *inode, | ||
| 2576 | struct file_extent_cluster *cluster) | ||
| 2533 | { | 2577 | { |
| 2534 | u64 page_start; | 2578 | u64 page_start; |
| 2535 | u64 page_end; | 2579 | u64 page_end; |
| 2536 | unsigned long i; | 2580 | u64 offset = BTRFS_I(inode)->index_cnt; |
| 2537 | unsigned long first_index; | 2581 | unsigned long index; |
| 2538 | unsigned long last_index; | 2582 | unsigned long last_index; |
| 2539 | unsigned int total_read = 0; | 2583 | unsigned int dirty_page = 0; |
| 2540 | unsigned int total_dirty = 0; | ||
| 2541 | struct page *page; | 2584 | struct page *page; |
| 2542 | struct file_ra_state *ra; | 2585 | struct file_ra_state *ra; |
| 2543 | struct btrfs_ordered_extent *ordered; | 2586 | int nr = 0; |
| 2544 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 2545 | int ret = 0; | 2587 | int ret = 0; |
| 2546 | 2588 | ||
| 2589 | if (!cluster->nr) | ||
| 2590 | return 0; | ||
| 2591 | |||
| 2547 | ra = kzalloc(sizeof(*ra), GFP_NOFS); | 2592 | ra = kzalloc(sizeof(*ra), GFP_NOFS); |
| 2548 | if (!ra) | 2593 | if (!ra) |
| 2549 | return -ENOMEM; | 2594 | return -ENOMEM; |
| 2550 | 2595 | ||
| 2596 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; | ||
| 2597 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; | ||
| 2598 | |||
| 2551 | mutex_lock(&inode->i_mutex); | 2599 | mutex_lock(&inode->i_mutex); |
| 2552 | first_index = start >> PAGE_CACHE_SHIFT; | ||
| 2553 | last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; | ||
| 2554 | 2600 | ||
| 2555 | /* make sure the dirty trick played by the caller work */ | 2601 | i_size_write(inode, cluster->end + 1 - offset); |
| 2556 | while (1) { | 2602 | ret = setup_extent_mapping(inode, cluster->start - offset, |
| 2557 | ret = invalidate_inode_pages2_range(inode->i_mapping, | 2603 | cluster->end - offset, cluster->start); |
| 2558 | first_index, last_index); | ||
| 2559 | if (ret != -EBUSY) | ||
| 2560 | break; | ||
| 2561 | schedule_timeout(HZ/10); | ||
| 2562 | } | ||
| 2563 | if (ret) | 2604 | if (ret) |
| 2564 | goto out_unlock; | 2605 | goto out_unlock; |
| 2565 | 2606 | ||
| 2566 | file_ra_state_init(ra, inode->i_mapping); | 2607 | file_ra_state_init(ra, inode->i_mapping); |
| 2567 | 2608 | ||
| 2568 | for (i = first_index ; i <= last_index; i++) { | 2609 | WARN_ON(cluster->start != cluster->boundary[0]); |
| 2569 | if (total_read % ra->ra_pages == 0) { | 2610 | while (index <= last_index) { |
| 2570 | btrfs_force_ra(inode->i_mapping, ra, NULL, i, | 2611 | page = find_lock_page(inode->i_mapping, index); |
| 2571 | min(last_index, ra->ra_pages + i - 1)); | ||
| 2572 | } | ||
| 2573 | total_read++; | ||
| 2574 | again: | ||
| 2575 | if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) | ||
| 2576 | BUG_ON(1); | ||
| 2577 | page = grab_cache_page(inode->i_mapping, i); | ||
| 2578 | if (!page) { | 2612 | if (!page) { |
| 2579 | ret = -ENOMEM; | 2613 | page_cache_sync_readahead(inode->i_mapping, |
| 2580 | goto out_unlock; | 2614 | ra, NULL, index, |
| 2615 | last_index + 1 - index); | ||
| 2616 | page = grab_cache_page(inode->i_mapping, index); | ||
| 2617 | if (!page) { | ||
| 2618 | ret = -ENOMEM; | ||
| 2619 | goto out_unlock; | ||
| 2620 | } | ||
| 2621 | } | ||
| 2622 | |||
| 2623 | if (PageReadahead(page)) { | ||
| 2624 | page_cache_async_readahead(inode->i_mapping, | ||
| 2625 | ra, NULL, page, index, | ||
| 2626 | last_index + 1 - index); | ||
| 2581 | } | 2627 | } |
| 2628 | |||
| 2582 | if (!PageUptodate(page)) { | 2629 | if (!PageUptodate(page)) { |
| 2583 | btrfs_readpage(NULL, page); | 2630 | btrfs_readpage(NULL, page); |
| 2584 | lock_page(page); | 2631 | lock_page(page); |
| @@ -2589,75 +2636,79 @@ again: | |||
| 2589 | goto out_unlock; | 2636 | goto out_unlock; |
| 2590 | } | 2637 | } |
| 2591 | } | 2638 | } |
| 2592 | wait_on_page_writeback(page); | ||
| 2593 | 2639 | ||
| 2594 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; | 2640 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; |
| 2595 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 2641 | page_end = page_start + PAGE_CACHE_SIZE - 1; |
| 2596 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | 2642 | |
| 2597 | 2643 | lock_extent(&BTRFS_I(inode)->io_tree, | |
| 2598 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 2644 | page_start, page_end, GFP_NOFS); |
| 2599 | if (ordered) { | 2645 | |
| 2600 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 2601 | unlock_page(page); | ||
| 2602 | page_cache_release(page); | ||
| 2603 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
| 2604 | btrfs_put_ordered_extent(ordered); | ||
| 2605 | goto again; | ||
| 2606 | } | ||
| 2607 | set_page_extent_mapped(page); | 2646 | set_page_extent_mapped(page); |
| 2608 | 2647 | ||
| 2609 | if (i == first_index) | 2648 | if (nr < cluster->nr && |
| 2610 | set_extent_bits(io_tree, page_start, page_end, | 2649 | page_start + offset == cluster->boundary[nr]) { |
| 2650 | set_extent_bits(&BTRFS_I(inode)->io_tree, | ||
| 2651 | page_start, page_end, | ||
| 2611 | EXTENT_BOUNDARY, GFP_NOFS); | 2652 | EXTENT_BOUNDARY, GFP_NOFS); |
| 2653 | nr++; | ||
| 2654 | } | ||
| 2612 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 2655 | btrfs_set_extent_delalloc(inode, page_start, page_end); |
| 2613 | 2656 | ||
| 2614 | set_page_dirty(page); | 2657 | set_page_dirty(page); |
| 2615 | total_dirty++; | 2658 | dirty_page++; |
| 2616 | 2659 | ||
| 2617 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 2660 | unlock_extent(&BTRFS_I(inode)->io_tree, |
| 2661 | page_start, page_end, GFP_NOFS); | ||
| 2618 | unlock_page(page); | 2662 | unlock_page(page); |
| 2619 | page_cache_release(page); | 2663 | page_cache_release(page); |
| 2664 | |||
| 2665 | index++; | ||
| 2666 | if (nr < cluster->nr && | ||
| 2667 | page_end + 1 + offset == cluster->boundary[nr]) { | ||
| 2668 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
| 2669 | dirty_page); | ||
| 2670 | dirty_page = 0; | ||
| 2671 | } | ||
| 2672 | } | ||
| 2673 | if (dirty_page) { | ||
| 2674 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
| 2675 | dirty_page); | ||
| 2620 | } | 2676 | } |
| 2677 | WARN_ON(nr != cluster->nr); | ||
| 2621 | out_unlock: | 2678 | out_unlock: |
| 2622 | mutex_unlock(&inode->i_mutex); | 2679 | mutex_unlock(&inode->i_mutex); |
| 2623 | kfree(ra); | 2680 | kfree(ra); |
| 2624 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); | ||
| 2625 | return ret; | 2681 | return ret; |
| 2626 | } | 2682 | } |
| 2627 | 2683 | ||
| 2628 | static noinline_for_stack | 2684 | static noinline_for_stack |
| 2629 | int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key) | 2685 | int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key, |
| 2686 | struct file_extent_cluster *cluster) | ||
| 2630 | { | 2687 | { |
| 2631 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2688 | int ret; |
| 2632 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 2633 | struct extent_map *em; | ||
| 2634 | u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt; | ||
| 2635 | u64 end = start + extent_key->offset - 1; | ||
| 2636 | |||
| 2637 | em = alloc_extent_map(GFP_NOFS); | ||
| 2638 | em->start = start; | ||
| 2639 | em->len = extent_key->offset; | ||
| 2640 | em->block_len = extent_key->offset; | ||
| 2641 | em->block_start = extent_key->objectid; | ||
| 2642 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 2643 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
| 2644 | 2689 | ||
| 2645 | /* setup extent map to cheat btrfs_readpage */ | 2690 | if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) { |
| 2646 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | 2691 | ret = relocate_file_extent_cluster(inode, cluster); |
| 2647 | while (1) { | 2692 | if (ret) |
| 2648 | int ret; | 2693 | return ret; |
| 2649 | spin_lock(&em_tree->lock); | 2694 | cluster->nr = 0; |
| 2650 | ret = add_extent_mapping(em_tree, em); | ||
| 2651 | spin_unlock(&em_tree->lock); | ||
| 2652 | if (ret != -EEXIST) { | ||
| 2653 | free_extent_map(em); | ||
| 2654 | break; | ||
| 2655 | } | ||
| 2656 | btrfs_drop_extent_cache(inode, start, end, 0); | ||
| 2657 | } | 2695 | } |
| 2658 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
| 2659 | 2696 | ||
| 2660 | return relocate_inode_pages(inode, start, extent_key->offset); | 2697 | if (!cluster->nr) |
| 2698 | cluster->start = extent_key->objectid; | ||
| 2699 | else | ||
| 2700 | BUG_ON(cluster->nr >= MAX_EXTENTS); | ||
| 2701 | cluster->end = extent_key->objectid + extent_key->offset - 1; | ||
| 2702 | cluster->boundary[cluster->nr] = extent_key->objectid; | ||
| 2703 | cluster->nr++; | ||
| 2704 | |||
| 2705 | if (cluster->nr >= MAX_EXTENTS) { | ||
| 2706 | ret = relocate_file_extent_cluster(inode, cluster); | ||
| 2707 | if (ret) | ||
| 2708 | return ret; | ||
| 2709 | cluster->nr = 0; | ||
| 2710 | } | ||
| 2711 | return 0; | ||
| 2661 | } | 2712 | } |
| 2662 | 2713 | ||
| 2663 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | 2714 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
| @@ -3203,10 +3254,12 @@ static int check_extent_flags(u64 flags) | |||
| 3203 | return 0; | 3254 | return 0; |
| 3204 | } | 3255 | } |
| 3205 | 3256 | ||
| 3257 | |||
| 3206 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | 3258 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) |
| 3207 | { | 3259 | { |
| 3208 | struct rb_root blocks = RB_ROOT; | 3260 | struct rb_root blocks = RB_ROOT; |
| 3209 | struct btrfs_key key; | 3261 | struct btrfs_key key; |
| 3262 | struct file_extent_cluster *cluster; | ||
| 3210 | struct btrfs_trans_handle *trans = NULL; | 3263 | struct btrfs_trans_handle *trans = NULL; |
| 3211 | struct btrfs_path *path; | 3264 | struct btrfs_path *path; |
| 3212 | struct btrfs_extent_item *ei; | 3265 | struct btrfs_extent_item *ei; |
| @@ -3216,10 +3269,17 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3216 | int ret; | 3269 | int ret; |
| 3217 | int err = 0; | 3270 | int err = 0; |
| 3218 | 3271 | ||
| 3272 | cluster = kzalloc(sizeof(*cluster), GFP_NOFS); | ||
| 3273 | if (!cluster) | ||
| 3274 | return -ENOMEM; | ||
| 3275 | |||
| 3219 | path = btrfs_alloc_path(); | 3276 | path = btrfs_alloc_path(); |
| 3220 | if (!path) | 3277 | if (!path) |
| 3221 | return -ENOMEM; | 3278 | return -ENOMEM; |
| 3222 | 3279 | ||
| 3280 | rc->extents_found = 0; | ||
| 3281 | rc->extents_skipped = 0; | ||
| 3282 | |||
| 3223 | rc->search_start = rc->block_group->key.objectid; | 3283 | rc->search_start = rc->block_group->key.objectid; |
| 3224 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, | 3284 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, |
| 3225 | GFP_NOFS); | 3285 | GFP_NOFS); |
| @@ -3306,14 +3366,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3306 | } | 3366 | } |
| 3307 | 3367 | ||
| 3308 | nr = trans->blocks_used; | 3368 | nr = trans->blocks_used; |
| 3309 | btrfs_end_transaction_throttle(trans, rc->extent_root); | 3369 | btrfs_end_transaction(trans, rc->extent_root); |
| 3310 | trans = NULL; | 3370 | trans = NULL; |
| 3311 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3371 | btrfs_btree_balance_dirty(rc->extent_root, nr); |
| 3312 | 3372 | ||
| 3313 | if (rc->stage == MOVE_DATA_EXTENTS && | 3373 | if (rc->stage == MOVE_DATA_EXTENTS && |
| 3314 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | 3374 | (flags & BTRFS_EXTENT_FLAG_DATA)) { |
| 3315 | rc->found_file_extent = 1; | 3375 | rc->found_file_extent = 1; |
| 3316 | ret = relocate_data_extent(rc->data_inode, &key); | 3376 | ret = relocate_data_extent(rc->data_inode, |
| 3377 | &key, cluster); | ||
| 3317 | if (ret < 0) { | 3378 | if (ret < 0) { |
| 3318 | err = ret; | 3379 | err = ret; |
| 3319 | break; | 3380 | break; |
| @@ -3328,6 +3389,14 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3328 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3389 | btrfs_btree_balance_dirty(rc->extent_root, nr); |
| 3329 | } | 3390 | } |
| 3330 | 3391 | ||
| 3392 | if (!err) { | ||
| 3393 | ret = relocate_file_extent_cluster(rc->data_inode, cluster); | ||
| 3394 | if (ret < 0) | ||
| 3395 | err = ret; | ||
| 3396 | } | ||
| 3397 | |||
| 3398 | kfree(cluster); | ||
| 3399 | |||
| 3331 | rc->create_reloc_root = 0; | 3400 | rc->create_reloc_root = 0; |
| 3332 | smp_mb(); | 3401 | smp_mb(); |
| 3333 | 3402 | ||
| @@ -3348,8 +3417,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3348 | } | 3417 | } |
| 3349 | 3418 | ||
| 3350 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | 3419 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, |
| 3351 | struct btrfs_root *root, | 3420 | struct btrfs_root *root, u64 objectid) |
| 3352 | u64 objectid, u64 size) | ||
| 3353 | { | 3421 | { |
| 3354 | struct btrfs_path *path; | 3422 | struct btrfs_path *path; |
| 3355 | struct btrfs_inode_item *item; | 3423 | struct btrfs_inode_item *item; |
| @@ -3368,7 +3436,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | |||
| 3368 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); | 3436 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); |
| 3369 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); | 3437 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); |
| 3370 | btrfs_set_inode_generation(leaf, item, 1); | 3438 | btrfs_set_inode_generation(leaf, item, 1); |
| 3371 | btrfs_set_inode_size(leaf, item, size); | 3439 | btrfs_set_inode_size(leaf, item, 0); |
| 3372 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | 3440 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); |
| 3373 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); | 3441 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); |
| 3374 | btrfs_mark_buffer_dirty(leaf); | 3442 | btrfs_mark_buffer_dirty(leaf); |
| @@ -3404,12 +3472,7 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
| 3404 | if (err) | 3472 | if (err) |
| 3405 | goto out; | 3473 | goto out; |
| 3406 | 3474 | ||
| 3407 | err = __insert_orphan_inode(trans, root, objectid, group->key.offset); | 3475 | err = __insert_orphan_inode(trans, root, objectid); |
| 3408 | BUG_ON(err); | ||
| 3409 | |||
| 3410 | err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, | ||
| 3411 | group->key.offset, 0, group->key.offset, | ||
| 3412 | 0, 0, 0); | ||
| 3413 | BUG_ON(err); | 3476 | BUG_ON(err); |
| 3414 | 3477 | ||
| 3415 | key.objectid = objectid; | 3478 | key.objectid = objectid; |
| @@ -3455,7 +3518,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 3455 | BUG_ON(!rc->block_group); | 3518 | BUG_ON(!rc->block_group); |
| 3456 | 3519 | ||
| 3457 | btrfs_init_workers(&rc->workers, "relocate", | 3520 | btrfs_init_workers(&rc->workers, "relocate", |
| 3458 | fs_info->thread_pool_size); | 3521 | fs_info->thread_pool_size, NULL); |
| 3459 | 3522 | ||
| 3460 | rc->extent_root = extent_root; | 3523 | rc->extent_root = extent_root; |
| 3461 | btrfs_prepare_block_group_relocation(extent_root, rc->block_group); | 3524 | btrfs_prepare_block_group_relocation(extent_root, rc->block_group); |
| @@ -3475,14 +3538,15 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 3475 | btrfs_wait_ordered_extents(fs_info->tree_root, 0); | 3538 | btrfs_wait_ordered_extents(fs_info->tree_root, 0); |
| 3476 | 3539 | ||
| 3477 | while (1) { | 3540 | while (1) { |
| 3478 | mutex_lock(&fs_info->cleaner_mutex); | ||
| 3479 | btrfs_clean_old_snapshots(fs_info->tree_root); | ||
| 3480 | mutex_unlock(&fs_info->cleaner_mutex); | ||
| 3481 | |||
| 3482 | rc->extents_found = 0; | 3541 | rc->extents_found = 0; |
| 3483 | rc->extents_skipped = 0; | 3542 | rc->extents_skipped = 0; |
| 3484 | 3543 | ||
| 3544 | mutex_lock(&fs_info->cleaner_mutex); | ||
| 3545 | |||
| 3546 | btrfs_clean_old_snapshots(fs_info->tree_root); | ||
| 3485 | ret = relocate_block_group(rc); | 3547 | ret = relocate_block_group(rc); |
| 3548 | |||
| 3549 | mutex_unlock(&fs_info->cleaner_mutex); | ||
| 3486 | if (ret < 0) { | 3550 | if (ret < 0) { |
| 3487 | err = ret; | 3551 | err = ret; |
| 3488 | break; | 3552 | break; |
| @@ -3514,10 +3578,10 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 3514 | } | 3578 | } |
| 3515 | } | 3579 | } |
| 3516 | 3580 | ||
| 3517 | filemap_fdatawrite_range(fs_info->btree_inode->i_mapping, | 3581 | filemap_write_and_wait_range(fs_info->btree_inode->i_mapping, |
| 3518 | rc->block_group->key.objectid, | 3582 | rc->block_group->key.objectid, |
| 3519 | rc->block_group->key.objectid + | 3583 | rc->block_group->key.objectid + |
| 3520 | rc->block_group->key.offset - 1); | 3584 | rc->block_group->key.offset - 1); |
| 3521 | 3585 | ||
| 3522 | WARN_ON(rc->block_group->pinned > 0); | 3586 | WARN_ON(rc->block_group->pinned > 0); |
| 3523 | WARN_ON(rc->block_group->reserved > 0); | 3587 | WARN_ON(rc->block_group->reserved > 0); |
| @@ -3530,6 +3594,26 @@ out: | |||
| 3530 | return err; | 3594 | return err; |
| 3531 | } | 3595 | } |
| 3532 | 3596 | ||
| 3597 | static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) | ||
| 3598 | { | ||
| 3599 | struct btrfs_trans_handle *trans; | ||
| 3600 | int ret; | ||
| 3601 | |||
| 3602 | trans = btrfs_start_transaction(root->fs_info->tree_root, 1); | ||
| 3603 | |||
| 3604 | memset(&root->root_item.drop_progress, 0, | ||
| 3605 | sizeof(root->root_item.drop_progress)); | ||
| 3606 | root->root_item.drop_level = 0; | ||
| 3607 | btrfs_set_root_refs(&root->root_item, 0); | ||
| 3608 | ret = btrfs_update_root(trans, root->fs_info->tree_root, | ||
| 3609 | &root->root_key, &root->root_item); | ||
| 3610 | BUG_ON(ret); | ||
| 3611 | |||
| 3612 | ret = btrfs_end_transaction(trans, root->fs_info->tree_root); | ||
| 3613 | BUG_ON(ret); | ||
| 3614 | return 0; | ||
| 3615 | } | ||
| 3616 | |||
| 3533 | /* | 3617 | /* |
| 3534 | * recover relocation interrupted by system crash. | 3618 | * recover relocation interrupted by system crash. |
| 3535 | * | 3619 | * |
| @@ -3589,8 +3673,12 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
| 3589 | fs_root = read_fs_root(root->fs_info, | 3673 | fs_root = read_fs_root(root->fs_info, |
| 3590 | reloc_root->root_key.offset); | 3674 | reloc_root->root_key.offset); |
| 3591 | if (IS_ERR(fs_root)) { | 3675 | if (IS_ERR(fs_root)) { |
| 3592 | err = PTR_ERR(fs_root); | 3676 | ret = PTR_ERR(fs_root); |
| 3593 | goto out; | 3677 | if (ret != -ENOENT) { |
| 3678 | err = ret; | ||
| 3679 | goto out; | ||
| 3680 | } | ||
| 3681 | mark_garbage_root(reloc_root); | ||
| 3594 | } | 3682 | } |
| 3595 | } | 3683 | } |
| 3596 | 3684 | ||
| @@ -3613,7 +3701,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
| 3613 | mapping_tree_init(&rc->reloc_root_tree); | 3701 | mapping_tree_init(&rc->reloc_root_tree); |
| 3614 | INIT_LIST_HEAD(&rc->reloc_roots); | 3702 | INIT_LIST_HEAD(&rc->reloc_roots); |
| 3615 | btrfs_init_workers(&rc->workers, "relocate", | 3703 | btrfs_init_workers(&rc->workers, "relocate", |
| 3616 | root->fs_info->thread_pool_size); | 3704 | root->fs_info->thread_pool_size, NULL); |
| 3617 | rc->extent_root = root->fs_info->extent_root; | 3705 | rc->extent_root = root->fs_info->extent_root; |
| 3618 | 3706 | ||
| 3619 | set_reloc_control(rc); | 3707 | set_reloc_control(rc); |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 0ddc6d61c55a..67fa2d29d663 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
| @@ -94,17 +94,23 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, | |||
| 94 | goto out; | 94 | goto out; |
| 95 | 95 | ||
| 96 | BUG_ON(ret == 0); | 96 | BUG_ON(ret == 0); |
| 97 | if (path->slots[0] == 0) { | ||
| 98 | ret = 1; | ||
| 99 | goto out; | ||
| 100 | } | ||
| 97 | l = path->nodes[0]; | 101 | l = path->nodes[0]; |
| 98 | BUG_ON(path->slots[0] == 0); | ||
| 99 | slot = path->slots[0] - 1; | 102 | slot = path->slots[0] - 1; |
| 100 | btrfs_item_key_to_cpu(l, &found_key, slot); | 103 | btrfs_item_key_to_cpu(l, &found_key, slot); |
| 101 | if (found_key.objectid != objectid) { | 104 | if (found_key.objectid != objectid || |
| 105 | found_key.type != BTRFS_ROOT_ITEM_KEY) { | ||
| 102 | ret = 1; | 106 | ret = 1; |
| 103 | goto out; | 107 | goto out; |
| 104 | } | 108 | } |
| 105 | read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), | 109 | if (item) |
| 106 | sizeof(*item)); | 110 | read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), |
| 107 | memcpy(key, &found_key, sizeof(found_key)); | 111 | sizeof(*item)); |
| 112 | if (key) | ||
| 113 | memcpy(key, &found_key, sizeof(found_key)); | ||
| 108 | ret = 0; | 114 | ret = 0; |
| 109 | out: | 115 | out: |
| 110 | btrfs_free_path(path); | 116 | btrfs_free_path(path); |
| @@ -153,7 +159,6 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 153 | write_extent_buffer(l, item, ptr, sizeof(*item)); | 159 | write_extent_buffer(l, item, ptr, sizeof(*item)); |
| 154 | btrfs_mark_buffer_dirty(path->nodes[0]); | 160 | btrfs_mark_buffer_dirty(path->nodes[0]); |
| 155 | out: | 161 | out: |
| 156 | btrfs_release_path(root, path); | ||
| 157 | btrfs_free_path(path); | 162 | btrfs_free_path(path); |
| 158 | return ret; | 163 | return ret; |
| 159 | } | 164 | } |
| @@ -249,6 +254,59 @@ err: | |||
| 249 | return ret; | 254 | return ret; |
| 250 | } | 255 | } |
| 251 | 256 | ||
| 257 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | ||
| 258 | { | ||
| 259 | struct extent_buffer *leaf; | ||
| 260 | struct btrfs_path *path; | ||
| 261 | struct btrfs_key key; | ||
| 262 | int err = 0; | ||
| 263 | int ret; | ||
| 264 | |||
| 265 | path = btrfs_alloc_path(); | ||
| 266 | if (!path) | ||
| 267 | return -ENOMEM; | ||
| 268 | |||
| 269 | key.objectid = BTRFS_ORPHAN_OBJECTID; | ||
| 270 | key.type = BTRFS_ORPHAN_ITEM_KEY; | ||
| 271 | key.offset = 0; | ||
| 272 | |||
| 273 | while (1) { | ||
| 274 | ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); | ||
| 275 | if (ret < 0) { | ||
| 276 | err = ret; | ||
| 277 | break; | ||
| 278 | } | ||
| 279 | |||
| 280 | leaf = path->nodes[0]; | ||
| 281 | if (path->slots[0] >= btrfs_header_nritems(leaf)) { | ||
| 282 | ret = btrfs_next_leaf(tree_root, path); | ||
| 283 | if (ret < 0) | ||
| 284 | err = ret; | ||
| 285 | if (ret != 0) | ||
| 286 | break; | ||
| 287 | leaf = path->nodes[0]; | ||
| 288 | } | ||
| 289 | |||
| 290 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
| 291 | btrfs_release_path(tree_root, path); | ||
| 292 | |||
| 293 | if (key.objectid != BTRFS_ORPHAN_OBJECTID || | ||
| 294 | key.type != BTRFS_ORPHAN_ITEM_KEY) | ||
| 295 | break; | ||
| 296 | |||
| 297 | ret = btrfs_find_dead_roots(tree_root, key.offset); | ||
| 298 | if (ret) { | ||
| 299 | err = ret; | ||
| 300 | break; | ||
| 301 | } | ||
| 302 | |||
| 303 | key.offset++; | ||
| 304 | } | ||
| 305 | |||
| 306 | btrfs_free_path(path); | ||
| 307 | return err; | ||
| 308 | } | ||
| 309 | |||
| 252 | /* drop the root item for 'key' from 'root' */ | 310 | /* drop the root item for 'key' from 'root' */ |
| 253 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 311 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
| 254 | struct btrfs_key *key) | 312 | struct btrfs_key *key) |
| @@ -273,36 +331,61 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 273 | BUG_ON(refs != 0); | 331 | BUG_ON(refs != 0); |
| 274 | ret = btrfs_del_item(trans, root, path); | 332 | ret = btrfs_del_item(trans, root, path); |
| 275 | out: | 333 | out: |
| 276 | btrfs_release_path(root, path); | ||
| 277 | btrfs_free_path(path); | 334 | btrfs_free_path(path); |
| 278 | return ret; | 335 | return ret; |
| 279 | } | 336 | } |
| 280 | 337 | ||
| 281 | #if 0 /* this will get used when snapshot deletion is implemented */ | ||
| 282 | int btrfs_del_root_ref(struct btrfs_trans_handle *trans, | 338 | int btrfs_del_root_ref(struct btrfs_trans_handle *trans, |
| 283 | struct btrfs_root *tree_root, | 339 | struct btrfs_root *tree_root, |
| 284 | u64 root_id, u8 type, u64 ref_id) | 340 | u64 root_id, u64 ref_id, u64 dirid, u64 *sequence, |
| 341 | const char *name, int name_len) | ||
| 342 | |||
| 285 | { | 343 | { |
| 344 | struct btrfs_path *path; | ||
| 345 | struct btrfs_root_ref *ref; | ||
| 346 | struct extent_buffer *leaf; | ||
| 286 | struct btrfs_key key; | 347 | struct btrfs_key key; |
| 348 | unsigned long ptr; | ||
| 349 | int err = 0; | ||
| 287 | int ret; | 350 | int ret; |
| 288 | struct btrfs_path *path; | ||
| 289 | 351 | ||
| 290 | path = btrfs_alloc_path(); | 352 | path = btrfs_alloc_path(); |
| 353 | if (!path) | ||
| 354 | return -ENOMEM; | ||
| 291 | 355 | ||
| 292 | key.objectid = root_id; | 356 | key.objectid = root_id; |
| 293 | key.type = type; | 357 | key.type = BTRFS_ROOT_BACKREF_KEY; |
| 294 | key.offset = ref_id; | 358 | key.offset = ref_id; |
| 295 | 359 | again: | |
| 296 | ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); | 360 | ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); |
| 297 | BUG_ON(ret); | 361 | BUG_ON(ret < 0); |
| 298 | 362 | if (ret == 0) { | |
| 299 | ret = btrfs_del_item(trans, tree_root, path); | 363 | leaf = path->nodes[0]; |
| 300 | BUG_ON(ret); | 364 | ref = btrfs_item_ptr(leaf, path->slots[0], |
| 365 | struct btrfs_root_ref); | ||
| 366 | |||
| 367 | WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid); | ||
| 368 | WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len); | ||
| 369 | ptr = (unsigned long)(ref + 1); | ||
| 370 | WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len)); | ||
| 371 | *sequence = btrfs_root_ref_sequence(leaf, ref); | ||
| 372 | |||
| 373 | ret = btrfs_del_item(trans, tree_root, path); | ||
| 374 | BUG_ON(ret); | ||
| 375 | } else | ||
| 376 | err = -ENOENT; | ||
| 377 | |||
| 378 | if (key.type == BTRFS_ROOT_BACKREF_KEY) { | ||
| 379 | btrfs_release_path(tree_root, path); | ||
| 380 | key.objectid = ref_id; | ||
| 381 | key.type = BTRFS_ROOT_REF_KEY; | ||
| 382 | key.offset = root_id; | ||
| 383 | goto again; | ||
| 384 | } | ||
| 301 | 385 | ||
| 302 | btrfs_free_path(path); | 386 | btrfs_free_path(path); |
| 303 | return ret; | 387 | return err; |
| 304 | } | 388 | } |
| 305 | #endif | ||
| 306 | 389 | ||
| 307 | int btrfs_find_root_ref(struct btrfs_root *tree_root, | 390 | int btrfs_find_root_ref(struct btrfs_root *tree_root, |
| 308 | struct btrfs_path *path, | 391 | struct btrfs_path *path, |
| @@ -319,7 +402,6 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root, | |||
| 319 | return ret; | 402 | return ret; |
| 320 | } | 403 | } |
| 321 | 404 | ||
| 322 | |||
| 323 | /* | 405 | /* |
| 324 | * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY | 406 | * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY |
| 325 | * or BTRFS_ROOT_BACKREF_KEY. | 407 | * or BTRFS_ROOT_BACKREF_KEY. |
| @@ -335,8 +417,7 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root, | |||
| 335 | */ | 417 | */ |
| 336 | int btrfs_add_root_ref(struct btrfs_trans_handle *trans, | 418 | int btrfs_add_root_ref(struct btrfs_trans_handle *trans, |
| 337 | struct btrfs_root *tree_root, | 419 | struct btrfs_root *tree_root, |
| 338 | u64 root_id, u8 type, u64 ref_id, | 420 | u64 root_id, u64 ref_id, u64 dirid, u64 sequence, |
| 339 | u64 dirid, u64 sequence, | ||
| 340 | const char *name, int name_len) | 421 | const char *name, int name_len) |
| 341 | { | 422 | { |
| 342 | struct btrfs_key key; | 423 | struct btrfs_key key; |
| @@ -346,13 +427,14 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, | |||
| 346 | struct extent_buffer *leaf; | 427 | struct extent_buffer *leaf; |
| 347 | unsigned long ptr; | 428 | unsigned long ptr; |
| 348 | 429 | ||
| 349 | |||
| 350 | path = btrfs_alloc_path(); | 430 | path = btrfs_alloc_path(); |
| 431 | if (!path) | ||
| 432 | return -ENOMEM; | ||
| 351 | 433 | ||
| 352 | key.objectid = root_id; | 434 | key.objectid = root_id; |
| 353 | key.type = type; | 435 | key.type = BTRFS_ROOT_BACKREF_KEY; |
| 354 | key.offset = ref_id; | 436 | key.offset = ref_id; |
| 355 | 437 | again: | |
| 356 | ret = btrfs_insert_empty_item(trans, tree_root, path, &key, | 438 | ret = btrfs_insert_empty_item(trans, tree_root, path, &key, |
| 357 | sizeof(*ref) + name_len); | 439 | sizeof(*ref) + name_len); |
| 358 | BUG_ON(ret); | 440 | BUG_ON(ret); |
| @@ -366,6 +448,14 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, | |||
| 366 | write_extent_buffer(leaf, name, ptr, name_len); | 448 | write_extent_buffer(leaf, name, ptr, name_len); |
| 367 | btrfs_mark_buffer_dirty(leaf); | 449 | btrfs_mark_buffer_dirty(leaf); |
| 368 | 450 | ||
| 451 | if (key.type == BTRFS_ROOT_BACKREF_KEY) { | ||
| 452 | btrfs_release_path(tree_root, path); | ||
| 453 | key.objectid = ref_id; | ||
| 454 | key.type = BTRFS_ROOT_REF_KEY; | ||
| 455 | key.offset = root_id; | ||
| 456 | goto again; | ||
| 457 | } | ||
| 458 | |||
| 369 | btrfs_free_path(path); | 459 | btrfs_free_path(path); |
| 370 | return ret; | 460 | return 0; |
| 371 | } | 461 | } |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6d6d06cb6dfc..752a5463bf53 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -51,7 +51,7 @@ | |||
| 51 | #include "export.h" | 51 | #include "export.h" |
| 52 | #include "compression.h" | 52 | #include "compression.h" |
| 53 | 53 | ||
| 54 | static struct super_operations btrfs_super_ops; | 54 | static const struct super_operations btrfs_super_ops; |
| 55 | 55 | ||
| 56 | static void btrfs_put_super(struct super_block *sb) | 56 | static void btrfs_put_super(struct super_block *sb) |
| 57 | { | 57 | { |
| @@ -66,7 +66,8 @@ enum { | |||
| 66 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, | 66 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, |
| 67 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, | 67 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, |
| 68 | Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, | 68 | Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, |
| 69 | Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_err, | 69 | Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, |
| 70 | Opt_discard, Opt_err, | ||
| 70 | }; | 71 | }; |
| 71 | 72 | ||
| 72 | static match_table_t tokens = { | 73 | static match_table_t tokens = { |
| @@ -88,6 +89,7 @@ static match_table_t tokens = { | |||
| 88 | {Opt_notreelog, "notreelog"}, | 89 | {Opt_notreelog, "notreelog"}, |
| 89 | {Opt_flushoncommit, "flushoncommit"}, | 90 | {Opt_flushoncommit, "flushoncommit"}, |
| 90 | {Opt_ratio, "metadata_ratio=%d"}, | 91 | {Opt_ratio, "metadata_ratio=%d"}, |
| 92 | {Opt_discard, "discard"}, | ||
| 91 | {Opt_err, NULL}, | 93 | {Opt_err, NULL}, |
| 92 | }; | 94 | }; |
| 93 | 95 | ||
| @@ -257,6 +259,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 257 | info->metadata_ratio); | 259 | info->metadata_ratio); |
| 258 | } | 260 | } |
| 259 | break; | 261 | break; |
| 262 | case Opt_discard: | ||
| 263 | btrfs_set_opt(info->mount_opt, DISCARD); | ||
| 264 | break; | ||
| 260 | default: | 265 | default: |
| 261 | break; | 266 | break; |
| 262 | } | 267 | } |
| @@ -344,7 +349,9 @@ static int btrfs_fill_super(struct super_block *sb, | |||
| 344 | sb->s_export_op = &btrfs_export_ops; | 349 | sb->s_export_op = &btrfs_export_ops; |
| 345 | sb->s_xattr = btrfs_xattr_handlers; | 350 | sb->s_xattr = btrfs_xattr_handlers; |
| 346 | sb->s_time_gran = 1; | 351 | sb->s_time_gran = 1; |
| 352 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | ||
| 347 | sb->s_flags |= MS_POSIXACL; | 353 | sb->s_flags |= MS_POSIXACL; |
| 354 | #endif | ||
| 348 | 355 | ||
| 349 | tree_root = open_ctree(sb, fs_devices, (char *)data); | 356 | tree_root = open_ctree(sb, fs_devices, (char *)data); |
| 350 | 357 | ||
| @@ -675,7 +682,8 @@ static int btrfs_unfreeze(struct super_block *sb) | |||
| 675 | return 0; | 682 | return 0; |
| 676 | } | 683 | } |
| 677 | 684 | ||
| 678 | static struct super_operations btrfs_super_ops = { | 685 | static const struct super_operations btrfs_super_ops = { |
| 686 | .drop_inode = btrfs_drop_inode, | ||
| 679 | .delete_inode = btrfs_delete_inode, | 687 | .delete_inode = btrfs_delete_inode, |
| 680 | .put_super = btrfs_put_super, | 688 | .put_super = btrfs_put_super, |
| 681 | .sync_fs = btrfs_sync_fs, | 689 | .sync_fs = btrfs_sync_fs, |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index cdbb5022da52..c207e8c32c9b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -104,7 +104,6 @@ static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, | |||
| 104 | { | 104 | { |
| 105 | if (root->ref_cows && root->last_trans < trans->transid) { | 105 | if (root->ref_cows && root->last_trans < trans->transid) { |
| 106 | WARN_ON(root == root->fs_info->extent_root); | 106 | WARN_ON(root == root->fs_info->extent_root); |
| 107 | WARN_ON(root->root_item.refs == 0); | ||
| 108 | WARN_ON(root->commit_root != root->node); | 107 | WARN_ON(root->commit_root != root->node); |
| 109 | 108 | ||
| 110 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, | 109 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, |
| @@ -164,8 +163,14 @@ static void wait_current_trans(struct btrfs_root *root) | |||
| 164 | } | 163 | } |
| 165 | } | 164 | } |
| 166 | 165 | ||
| 166 | enum btrfs_trans_type { | ||
| 167 | TRANS_START, | ||
| 168 | TRANS_JOIN, | ||
| 169 | TRANS_USERSPACE, | ||
| 170 | }; | ||
| 171 | |||
| 167 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | 172 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, |
| 168 | int num_blocks, int wait) | 173 | int num_blocks, int type) |
| 169 | { | 174 | { |
| 170 | struct btrfs_trans_handle *h = | 175 | struct btrfs_trans_handle *h = |
| 171 | kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | 176 | kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); |
| @@ -173,7 +178,8 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
| 173 | 178 | ||
| 174 | mutex_lock(&root->fs_info->trans_mutex); | 179 | mutex_lock(&root->fs_info->trans_mutex); |
| 175 | if (!root->fs_info->log_root_recovering && | 180 | if (!root->fs_info->log_root_recovering && |
| 176 | ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2)) | 181 | ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || |
| 182 | type == TRANS_USERSPACE)) | ||
| 177 | wait_current_trans(root); | 183 | wait_current_trans(root); |
| 178 | ret = join_transaction(root); | 184 | ret = join_transaction(root); |
| 179 | BUG_ON(ret); | 185 | BUG_ON(ret); |
| @@ -187,6 +193,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
| 187 | h->alloc_exclude_start = 0; | 193 | h->alloc_exclude_start = 0; |
| 188 | h->delayed_ref_updates = 0; | 194 | h->delayed_ref_updates = 0; |
| 189 | 195 | ||
| 196 | if (!current->journal_info && type != TRANS_USERSPACE) | ||
| 197 | current->journal_info = h; | ||
| 198 | |||
| 190 | root->fs_info->running_transaction->use_count++; | 199 | root->fs_info->running_transaction->use_count++; |
| 191 | record_root_in_trans(h, root); | 200 | record_root_in_trans(h, root); |
| 192 | mutex_unlock(&root->fs_info->trans_mutex); | 201 | mutex_unlock(&root->fs_info->trans_mutex); |
| @@ -196,18 +205,18 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
| 196 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 205 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
| 197 | int num_blocks) | 206 | int num_blocks) |
| 198 | { | 207 | { |
| 199 | return start_transaction(root, num_blocks, 1); | 208 | return start_transaction(root, num_blocks, TRANS_START); |
| 200 | } | 209 | } |
| 201 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | 210 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, |
| 202 | int num_blocks) | 211 | int num_blocks) |
| 203 | { | 212 | { |
| 204 | return start_transaction(root, num_blocks, 0); | 213 | return start_transaction(root, num_blocks, TRANS_JOIN); |
| 205 | } | 214 | } |
| 206 | 215 | ||
| 207 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | 216 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, |
| 208 | int num_blocks) | 217 | int num_blocks) |
| 209 | { | 218 | { |
| 210 | return start_transaction(r, num_blocks, 2); | 219 | return start_transaction(r, num_blocks, TRANS_USERSPACE); |
| 211 | } | 220 | } |
| 212 | 221 | ||
| 213 | /* wait for a transaction commit to be fully complete */ | 222 | /* wait for a transaction commit to be fully complete */ |
| @@ -318,6 +327,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 318 | wake_up(&cur_trans->writer_wait); | 327 | wake_up(&cur_trans->writer_wait); |
| 319 | put_transaction(cur_trans); | 328 | put_transaction(cur_trans); |
| 320 | mutex_unlock(&info->trans_mutex); | 329 | mutex_unlock(&info->trans_mutex); |
| 330 | |||
| 331 | if (current->journal_info == trans) | ||
| 332 | current->journal_info = NULL; | ||
| 321 | memset(trans, 0, sizeof(*trans)); | 333 | memset(trans, 0, sizeof(*trans)); |
| 322 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 334 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
| 323 | 335 | ||
| @@ -339,10 +351,10 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, | |||
| 339 | /* | 351 | /* |
| 340 | * when btree blocks are allocated, they have some corresponding bits set for | 352 | * when btree blocks are allocated, they have some corresponding bits set for |
| 341 | * them in one of two extent_io trees. This is used to make sure all of | 353 | * them in one of two extent_io trees. This is used to make sure all of |
| 342 | * those extents are on disk for transaction or log commit | 354 | * those extents are sent to disk but does not wait on them |
| 343 | */ | 355 | */ |
| 344 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | 356 | int btrfs_write_marked_extents(struct btrfs_root *root, |
| 345 | struct extent_io_tree *dirty_pages) | 357 | struct extent_io_tree *dirty_pages) |
| 346 | { | 358 | { |
| 347 | int ret; | 359 | int ret; |
| 348 | int err = 0; | 360 | int err = 0; |
| @@ -389,6 +401,29 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | |||
| 389 | page_cache_release(page); | 401 | page_cache_release(page); |
| 390 | } | 402 | } |
| 391 | } | 403 | } |
| 404 | if (err) | ||
| 405 | werr = err; | ||
| 406 | return werr; | ||
| 407 | } | ||
| 408 | |||
| 409 | /* | ||
| 410 | * when btree blocks are allocated, they have some corresponding bits set for | ||
| 411 | * them in one of two extent_io trees. This is used to make sure all of | ||
| 412 | * those extents are on disk for transaction or log commit. We wait | ||
| 413 | * on all the pages and clear them from the dirty pages state tree | ||
| 414 | */ | ||
| 415 | int btrfs_wait_marked_extents(struct btrfs_root *root, | ||
| 416 | struct extent_io_tree *dirty_pages) | ||
| 417 | { | ||
| 418 | int ret; | ||
| 419 | int err = 0; | ||
| 420 | int werr = 0; | ||
| 421 | struct page *page; | ||
| 422 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
| 423 | u64 start = 0; | ||
| 424 | u64 end; | ||
| 425 | unsigned long index; | ||
| 426 | |||
| 392 | while (1) { | 427 | while (1) { |
| 393 | ret = find_first_extent_bit(dirty_pages, 0, &start, &end, | 428 | ret = find_first_extent_bit(dirty_pages, 0, &start, &end, |
| 394 | EXTENT_DIRTY); | 429 | EXTENT_DIRTY); |
| @@ -419,6 +454,22 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | |||
| 419 | return werr; | 454 | return werr; |
| 420 | } | 455 | } |
| 421 | 456 | ||
| 457 | /* | ||
| 458 | * when btree blocks are allocated, they have some corresponding bits set for | ||
| 459 | * them in one of two extent_io trees. This is used to make sure all of | ||
| 460 | * those extents are on disk for transaction or log commit | ||
| 461 | */ | ||
| 462 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | ||
| 463 | struct extent_io_tree *dirty_pages) | ||
| 464 | { | ||
| 465 | int ret; | ||
| 466 | int ret2; | ||
| 467 | |||
| 468 | ret = btrfs_write_marked_extents(root, dirty_pages); | ||
| 469 | ret2 = btrfs_wait_marked_extents(root, dirty_pages); | ||
| 470 | return ret || ret2; | ||
| 471 | } | ||
| 472 | |||
| 422 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 473 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, |
| 423 | struct btrfs_root *root) | 474 | struct btrfs_root *root) |
| 424 | { | 475 | { |
| @@ -720,7 +771,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 720 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 771 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
| 721 | 772 | ||
| 722 | key.objectid = objectid; | 773 | key.objectid = objectid; |
| 723 | key.offset = 0; | 774 | /* record when the snapshot was created in key.offset */ |
| 775 | key.offset = trans->transid; | ||
| 724 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 776 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); |
| 725 | 777 | ||
| 726 | old = btrfs_lock_root_node(root); | 778 | old = btrfs_lock_root_node(root); |
| @@ -743,6 +795,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 743 | memcpy(&pending->root_key, &key, sizeof(key)); | 795 | memcpy(&pending->root_key, &key, sizeof(key)); |
| 744 | fail: | 796 | fail: |
| 745 | kfree(new_root_item); | 797 | kfree(new_root_item); |
| 798 | btrfs_unreserve_metadata_space(root, 6); | ||
| 746 | return ret; | 799 | return ret; |
| 747 | } | 800 | } |
| 748 | 801 | ||
| @@ -778,24 +831,14 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info, | |||
| 778 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | 831 | ret = btrfs_update_inode(trans, parent_root, parent_inode); |
| 779 | BUG_ON(ret); | 832 | BUG_ON(ret); |
| 780 | 833 | ||
| 781 | /* add the backref first */ | ||
| 782 | ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, | 834 | ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, |
| 783 | pending->root_key.objectid, | 835 | pending->root_key.objectid, |
| 784 | BTRFS_ROOT_BACKREF_KEY, | ||
| 785 | parent_root->root_key.objectid, | 836 | parent_root->root_key.objectid, |
| 786 | parent_inode->i_ino, index, pending->name, | 837 | parent_inode->i_ino, index, pending->name, |
| 787 | namelen); | 838 | namelen); |
| 788 | 839 | ||
| 789 | BUG_ON(ret); | 840 | BUG_ON(ret); |
| 790 | 841 | ||
| 791 | /* now add the forward ref */ | ||
| 792 | ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, | ||
| 793 | parent_root->root_key.objectid, | ||
| 794 | BTRFS_ROOT_REF_KEY, | ||
| 795 | pending->root_key.objectid, | ||
| 796 | parent_inode->i_ino, index, pending->name, | ||
| 797 | namelen); | ||
| 798 | |||
| 799 | inode = btrfs_lookup_dentry(parent_inode, pending->dentry); | 842 | inode = btrfs_lookup_dentry(parent_inode, pending->dentry); |
| 800 | d_instantiate(pending->dentry, inode); | 843 | d_instantiate(pending->dentry, inode); |
| 801 | fail: | 844 | fail: |
| @@ -874,7 +917,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 874 | unsigned long timeout = 1; | 917 | unsigned long timeout = 1; |
| 875 | struct btrfs_transaction *cur_trans; | 918 | struct btrfs_transaction *cur_trans; |
| 876 | struct btrfs_transaction *prev_trans = NULL; | 919 | struct btrfs_transaction *prev_trans = NULL; |
| 877 | struct extent_io_tree *pinned_copy; | ||
| 878 | DEFINE_WAIT(wait); | 920 | DEFINE_WAIT(wait); |
| 879 | int ret; | 921 | int ret; |
| 880 | int should_grow = 0; | 922 | int should_grow = 0; |
| @@ -915,13 +957,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 915 | return 0; | 957 | return 0; |
| 916 | } | 958 | } |
| 917 | 959 | ||
| 918 | pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS); | ||
| 919 | if (!pinned_copy) | ||
| 920 | return -ENOMEM; | ||
| 921 | |||
| 922 | extent_io_tree_init(pinned_copy, | ||
| 923 | root->fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
| 924 | |||
| 925 | trans->transaction->in_commit = 1; | 960 | trans->transaction->in_commit = 1; |
| 926 | trans->transaction->blocked = 1; | 961 | trans->transaction->blocked = 1; |
| 927 | if (cur_trans->list.prev != &root->fs_info->trans_list) { | 962 | if (cur_trans->list.prev != &root->fs_info->trans_list) { |
| @@ -1019,6 +1054,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1019 | ret = commit_cowonly_roots(trans, root); | 1054 | ret = commit_cowonly_roots(trans, root); |
| 1020 | BUG_ON(ret); | 1055 | BUG_ON(ret); |
| 1021 | 1056 | ||
| 1057 | btrfs_prepare_extent_commit(trans, root); | ||
| 1058 | |||
| 1022 | cur_trans = root->fs_info->running_transaction; | 1059 | cur_trans = root->fs_info->running_transaction; |
| 1023 | spin_lock(&root->fs_info->new_trans_lock); | 1060 | spin_lock(&root->fs_info->new_trans_lock); |
| 1024 | root->fs_info->running_transaction = NULL; | 1061 | root->fs_info->running_transaction = NULL; |
| @@ -1042,8 +1079,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1042 | memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, | 1079 | memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, |
| 1043 | sizeof(root->fs_info->super_copy)); | 1080 | sizeof(root->fs_info->super_copy)); |
| 1044 | 1081 | ||
| 1045 | btrfs_copy_pinned(root, pinned_copy); | ||
| 1046 | |||
| 1047 | trans->transaction->blocked = 0; | 1082 | trans->transaction->blocked = 0; |
| 1048 | 1083 | ||
| 1049 | wake_up(&root->fs_info->transaction_wait); | 1084 | wake_up(&root->fs_info->transaction_wait); |
| @@ -1059,8 +1094,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1059 | */ | 1094 | */ |
| 1060 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1095 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 1061 | 1096 | ||
| 1062 | btrfs_finish_extent_commit(trans, root, pinned_copy); | 1097 | btrfs_finish_extent_commit(trans, root); |
| 1063 | kfree(pinned_copy); | ||
| 1064 | 1098 | ||
| 1065 | /* do the directory inserts of any pending snapshot creations */ | 1099 | /* do the directory inserts of any pending snapshot creations */ |
| 1066 | finish_pending_snapshots(trans, root->fs_info); | 1100 | finish_pending_snapshots(trans, root->fs_info); |
| @@ -1078,6 +1112,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1078 | 1112 | ||
| 1079 | mutex_unlock(&root->fs_info->trans_mutex); | 1113 | mutex_unlock(&root->fs_info->trans_mutex); |
| 1080 | 1114 | ||
| 1115 | if (current->journal_info == trans) | ||
| 1116 | current->journal_info = NULL; | ||
| 1117 | |||
| 1081 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 1118 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
| 1082 | return ret; | 1119 | return ret; |
| 1083 | } | 1120 | } |
| @@ -1096,8 +1133,13 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) | |||
| 1096 | 1133 | ||
| 1097 | while (!list_empty(&list)) { | 1134 | while (!list_empty(&list)) { |
| 1098 | root = list_entry(list.next, struct btrfs_root, root_list); | 1135 | root = list_entry(list.next, struct btrfs_root, root_list); |
| 1099 | list_del_init(&root->root_list); | 1136 | list_del(&root->root_list); |
| 1100 | btrfs_drop_snapshot(root, 0); | 1137 | |
| 1138 | if (btrfs_header_backref_rev(root->node) < | ||
| 1139 | BTRFS_MIXED_BACKREF_REV) | ||
| 1140 | btrfs_drop_snapshot(root, 0); | ||
| 1141 | else | ||
| 1142 | btrfs_drop_snapshot(root, 1); | ||
| 1101 | } | 1143 | } |
| 1102 | return 0; | 1144 | return 0; |
| 1103 | } | 1145 | } |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 663c67404918..d4e3e7a6938c 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
| @@ -79,6 +79,7 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, | |||
| 79 | struct inode *inode) | 79 | struct inode *inode) |
| 80 | { | 80 | { |
| 81 | BTRFS_I(inode)->last_trans = trans->transaction->transid; | 81 | BTRFS_I(inode)->last_trans = trans->transaction->transid; |
| 82 | BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; | ||
| 82 | } | 83 | } |
| 83 | 84 | ||
| 84 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, | 85 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, |
| @@ -107,5 +108,9 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | |||
| 107 | struct btrfs_root *root); | 108 | struct btrfs_root *root); |
| 108 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, | 109 | int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, |
| 109 | struct extent_io_tree *dirty_pages); | 110 | struct extent_io_tree *dirty_pages); |
| 111 | int btrfs_write_marked_extents(struct btrfs_root *root, | ||
| 112 | struct extent_io_tree *dirty_pages); | ||
| 113 | int btrfs_wait_marked_extents(struct btrfs_root *root, | ||
| 114 | struct extent_io_tree *dirty_pages); | ||
| 110 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); | 115 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); |
| 111 | #endif | 116 | #endif |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d91b0de7c502..741666a7676a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
| @@ -137,11 +137,20 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
| 137 | 137 | ||
| 138 | mutex_lock(&root->log_mutex); | 138 | mutex_lock(&root->log_mutex); |
| 139 | if (root->log_root) { | 139 | if (root->log_root) { |
| 140 | if (!root->log_start_pid) { | ||
| 141 | root->log_start_pid = current->pid; | ||
| 142 | root->log_multiple_pids = false; | ||
| 143 | } else if (root->log_start_pid != current->pid) { | ||
| 144 | root->log_multiple_pids = true; | ||
| 145 | } | ||
| 146 | |||
| 140 | root->log_batch++; | 147 | root->log_batch++; |
| 141 | atomic_inc(&root->log_writers); | 148 | atomic_inc(&root->log_writers); |
| 142 | mutex_unlock(&root->log_mutex); | 149 | mutex_unlock(&root->log_mutex); |
| 143 | return 0; | 150 | return 0; |
| 144 | } | 151 | } |
| 152 | root->log_multiple_pids = false; | ||
| 153 | root->log_start_pid = current->pid; | ||
| 145 | mutex_lock(&root->fs_info->tree_log_mutex); | 154 | mutex_lock(&root->fs_info->tree_log_mutex); |
| 146 | if (!root->fs_info->log_root_tree) { | 155 | if (!root->fs_info->log_root_tree) { |
| 147 | ret = btrfs_init_log_root_tree(trans, root->fs_info); | 156 | ret = btrfs_init_log_root_tree(trans, root->fs_info); |
| @@ -263,8 +272,8 @@ static int process_one_buffer(struct btrfs_root *log, | |||
| 263 | struct walk_control *wc, u64 gen) | 272 | struct walk_control *wc, u64 gen) |
| 264 | { | 273 | { |
| 265 | if (wc->pin) | 274 | if (wc->pin) |
| 266 | btrfs_update_pinned_extents(log->fs_info->extent_root, | 275 | btrfs_pin_extent(log->fs_info->extent_root, |
| 267 | eb->start, eb->len, 1); | 276 | eb->start, eb->len, 0); |
| 268 | 277 | ||
| 269 | if (btrfs_buffer_uptodate(eb, gen)) { | 278 | if (btrfs_buffer_uptodate(eb, gen)) { |
| 270 | if (wc->write) | 279 | if (wc->write) |
| @@ -534,7 +543,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
| 534 | saved_nbytes = inode_get_bytes(inode); | 543 | saved_nbytes = inode_get_bytes(inode); |
| 535 | /* drop any overlapping extents */ | 544 | /* drop any overlapping extents */ |
| 536 | ret = btrfs_drop_extents(trans, root, inode, | 545 | ret = btrfs_drop_extents(trans, root, inode, |
| 537 | start, extent_end, extent_end, start, &alloc_hint); | 546 | start, extent_end, extent_end, start, &alloc_hint, 1); |
| 538 | BUG_ON(ret); | 547 | BUG_ON(ret); |
| 539 | 548 | ||
| 540 | if (found_type == BTRFS_FILE_EXTENT_REG || | 549 | if (found_type == BTRFS_FILE_EXTENT_REG || |
| @@ -1971,6 +1980,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 1971 | int ret; | 1980 | int ret; |
| 1972 | struct btrfs_root *log = root->log_root; | 1981 | struct btrfs_root *log = root->log_root; |
| 1973 | struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; | 1982 | struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; |
| 1983 | u64 log_transid = 0; | ||
| 1974 | 1984 | ||
| 1975 | mutex_lock(&root->log_mutex); | 1985 | mutex_lock(&root->log_mutex); |
| 1976 | index1 = root->log_transid % 2; | 1986 | index1 = root->log_transid % 2; |
| @@ -1987,10 +1997,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 1987 | 1997 | ||
| 1988 | while (1) { | 1998 | while (1) { |
| 1989 | unsigned long batch = root->log_batch; | 1999 | unsigned long batch = root->log_batch; |
| 1990 | mutex_unlock(&root->log_mutex); | 2000 | if (root->log_multiple_pids) { |
| 1991 | schedule_timeout_uninterruptible(1); | 2001 | mutex_unlock(&root->log_mutex); |
| 1992 | mutex_lock(&root->log_mutex); | 2002 | schedule_timeout_uninterruptible(1); |
| 1993 | 2003 | mutex_lock(&root->log_mutex); | |
| 2004 | } | ||
| 1994 | wait_for_writer(trans, root); | 2005 | wait_for_writer(trans, root); |
| 1995 | if (batch == root->log_batch) | 2006 | if (batch == root->log_batch) |
| 1996 | break; | 2007 | break; |
| @@ -2003,14 +2014,19 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2003 | goto out; | 2014 | goto out; |
| 2004 | } | 2015 | } |
| 2005 | 2016 | ||
| 2006 | ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); | 2017 | /* we start IO on all the marked extents here, but we don't actually |
| 2018 | * wait for them until later. | ||
| 2019 | */ | ||
| 2020 | ret = btrfs_write_marked_extents(log, &log->dirty_log_pages); | ||
| 2007 | BUG_ON(ret); | 2021 | BUG_ON(ret); |
| 2008 | 2022 | ||
| 2009 | btrfs_set_root_node(&log->root_item, log->node); | 2023 | btrfs_set_root_node(&log->root_item, log->node); |
| 2010 | 2024 | ||
| 2011 | root->log_batch = 0; | 2025 | root->log_batch = 0; |
| 2026 | log_transid = root->log_transid; | ||
| 2012 | root->log_transid++; | 2027 | root->log_transid++; |
| 2013 | log->log_transid = root->log_transid; | 2028 | log->log_transid = root->log_transid; |
| 2029 | root->log_start_pid = 0; | ||
| 2014 | smp_mb(); | 2030 | smp_mb(); |
| 2015 | /* | 2031 | /* |
| 2016 | * log tree has been flushed to disk, new modifications of | 2032 | * log tree has been flushed to disk, new modifications of |
| @@ -2036,6 +2052,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2036 | 2052 | ||
| 2037 | index2 = log_root_tree->log_transid % 2; | 2053 | index2 = log_root_tree->log_transid % 2; |
| 2038 | if (atomic_read(&log_root_tree->log_commit[index2])) { | 2054 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
| 2055 | btrfs_wait_marked_extents(log, &log->dirty_log_pages); | ||
| 2039 | wait_log_commit(trans, log_root_tree, | 2056 | wait_log_commit(trans, log_root_tree, |
| 2040 | log_root_tree->log_transid); | 2057 | log_root_tree->log_transid); |
| 2041 | mutex_unlock(&log_root_tree->log_mutex); | 2058 | mutex_unlock(&log_root_tree->log_mutex); |
| @@ -2055,6 +2072,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2055 | * check the full commit flag again | 2072 | * check the full commit flag again |
| 2056 | */ | 2073 | */ |
| 2057 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | 2074 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { |
| 2075 | btrfs_wait_marked_extents(log, &log->dirty_log_pages); | ||
| 2058 | mutex_unlock(&log_root_tree->log_mutex); | 2076 | mutex_unlock(&log_root_tree->log_mutex); |
| 2059 | ret = -EAGAIN; | 2077 | ret = -EAGAIN; |
| 2060 | goto out_wake_log_root; | 2078 | goto out_wake_log_root; |
| @@ -2063,6 +2081,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2063 | ret = btrfs_write_and_wait_marked_extents(log_root_tree, | 2081 | ret = btrfs_write_and_wait_marked_extents(log_root_tree, |
| 2064 | &log_root_tree->dirty_log_pages); | 2082 | &log_root_tree->dirty_log_pages); |
| 2065 | BUG_ON(ret); | 2083 | BUG_ON(ret); |
| 2084 | btrfs_wait_marked_extents(log, &log->dirty_log_pages); | ||
| 2066 | 2085 | ||
| 2067 | btrfs_set_super_log_root(&root->fs_info->super_for_commit, | 2086 | btrfs_set_super_log_root(&root->fs_info->super_for_commit, |
| 2068 | log_root_tree->node->start); | 2087 | log_root_tree->node->start); |
| @@ -2082,9 +2101,14 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2082 | * the running transaction open, so a full commit can't hop | 2101 | * the running transaction open, so a full commit can't hop |
| 2083 | * in and cause problems either. | 2102 | * in and cause problems either. |
| 2084 | */ | 2103 | */ |
| 2085 | write_ctree_super(trans, root->fs_info->tree_root, 2); | 2104 | write_ctree_super(trans, root->fs_info->tree_root, 1); |
| 2086 | ret = 0; | 2105 | ret = 0; |
| 2087 | 2106 | ||
| 2107 | mutex_lock(&root->log_mutex); | ||
| 2108 | if (root->last_log_commit < log_transid) | ||
| 2109 | root->last_log_commit = log_transid; | ||
| 2110 | mutex_unlock(&root->log_mutex); | ||
| 2111 | |||
| 2088 | out_wake_log_root: | 2112 | out_wake_log_root: |
| 2089 | atomic_set(&log_root_tree->log_commit[index2], 0); | 2113 | atomic_set(&log_root_tree->log_commit[index2], 0); |
| 2090 | smp_mb(); | 2114 | smp_mb(); |
| @@ -2605,7 +2629,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
| 2605 | extent); | 2629 | extent); |
| 2606 | cs = btrfs_file_extent_offset(src, extent); | 2630 | cs = btrfs_file_extent_offset(src, extent); |
| 2607 | cl = btrfs_file_extent_num_bytes(src, | 2631 | cl = btrfs_file_extent_num_bytes(src, |
| 2608 | extent);; | 2632 | extent); |
| 2609 | if (btrfs_file_extent_compression(src, | 2633 | if (btrfs_file_extent_compression(src, |
| 2610 | extent)) { | 2634 | extent)) { |
| 2611 | cs = 0; | 2635 | cs = 0; |
| @@ -2841,7 +2865,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | |||
| 2841 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) | 2865 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) |
| 2842 | break; | 2866 | break; |
| 2843 | 2867 | ||
| 2844 | if (parent == sb->s_root) | 2868 | if (IS_ROOT(parent)) |
| 2845 | break; | 2869 | break; |
| 2846 | 2870 | ||
| 2847 | parent = parent->d_parent; | 2871 | parent = parent->d_parent; |
| @@ -2852,6 +2876,21 @@ out: | |||
| 2852 | return ret; | 2876 | return ret; |
| 2853 | } | 2877 | } |
| 2854 | 2878 | ||
| 2879 | static int inode_in_log(struct btrfs_trans_handle *trans, | ||
| 2880 | struct inode *inode) | ||
| 2881 | { | ||
| 2882 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 2883 | int ret = 0; | ||
| 2884 | |||
| 2885 | mutex_lock(&root->log_mutex); | ||
| 2886 | if (BTRFS_I(inode)->logged_trans == trans->transid && | ||
| 2887 | BTRFS_I(inode)->last_sub_trans <= root->last_log_commit) | ||
| 2888 | ret = 1; | ||
| 2889 | mutex_unlock(&root->log_mutex); | ||
| 2890 | return ret; | ||
| 2891 | } | ||
| 2892 | |||
| 2893 | |||
| 2855 | /* | 2894 | /* |
| 2856 | * helper function around btrfs_log_inode to make sure newly created | 2895 | * helper function around btrfs_log_inode to make sure newly created |
| 2857 | * parent directories also end up in the log. A minimal inode and backref | 2896 | * parent directories also end up in the log. A minimal inode and backref |
| @@ -2880,11 +2919,22 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
| 2880 | goto end_no_trans; | 2919 | goto end_no_trans; |
| 2881 | } | 2920 | } |
| 2882 | 2921 | ||
| 2922 | if (root != BTRFS_I(inode)->root || | ||
| 2923 | btrfs_root_refs(&root->root_item) == 0) { | ||
| 2924 | ret = 1; | ||
| 2925 | goto end_no_trans; | ||
| 2926 | } | ||
| 2927 | |||
| 2883 | ret = check_parent_dirs_for_sync(trans, inode, parent, | 2928 | ret = check_parent_dirs_for_sync(trans, inode, parent, |
| 2884 | sb, last_committed); | 2929 | sb, last_committed); |
| 2885 | if (ret) | 2930 | if (ret) |
| 2886 | goto end_no_trans; | 2931 | goto end_no_trans; |
| 2887 | 2932 | ||
| 2933 | if (inode_in_log(trans, inode)) { | ||
| 2934 | ret = BTRFS_NO_LOG_SYNC; | ||
| 2935 | goto end_no_trans; | ||
| 2936 | } | ||
| 2937 | |||
| 2888 | start_log_trans(trans, root); | 2938 | start_log_trans(trans, root); |
| 2889 | 2939 | ||
| 2890 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 2940 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
| @@ -2907,12 +2957,15 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
| 2907 | break; | 2957 | break; |
| 2908 | 2958 | ||
| 2909 | inode = parent->d_inode; | 2959 | inode = parent->d_inode; |
| 2960 | if (root != BTRFS_I(inode)->root) | ||
| 2961 | break; | ||
| 2962 | |||
| 2910 | if (BTRFS_I(inode)->generation > | 2963 | if (BTRFS_I(inode)->generation > |
| 2911 | root->fs_info->last_trans_committed) { | 2964 | root->fs_info->last_trans_committed) { |
| 2912 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 2965 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
| 2913 | BUG_ON(ret); | 2966 | BUG_ON(ret); |
| 2914 | } | 2967 | } |
| 2915 | if (parent == sb->s_root) | 2968 | if (IS_ROOT(parent)) |
| 2916 | break; | 2969 | break; |
| 2917 | 2970 | ||
| 2918 | parent = parent->d_parent; | 2971 | parent = parent->d_parent; |
| @@ -2951,7 +3004,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | |||
| 2951 | struct btrfs_key tmp_key; | 3004 | struct btrfs_key tmp_key; |
| 2952 | struct btrfs_root *log; | 3005 | struct btrfs_root *log; |
| 2953 | struct btrfs_fs_info *fs_info = log_root_tree->fs_info; | 3006 | struct btrfs_fs_info *fs_info = log_root_tree->fs_info; |
| 2954 | u64 highest_inode; | ||
| 2955 | struct walk_control wc = { | 3007 | struct walk_control wc = { |
| 2956 | .process_func = process_one_buffer, | 3008 | .process_func = process_one_buffer, |
| 2957 | .stage = 0, | 3009 | .stage = 0, |
| @@ -3010,11 +3062,6 @@ again: | |||
| 3010 | path); | 3062 | path); |
| 3011 | BUG_ON(ret); | 3063 | BUG_ON(ret); |
| 3012 | } | 3064 | } |
| 3013 | ret = btrfs_find_highest_inode(wc.replay_dest, &highest_inode); | ||
| 3014 | if (ret == 0) { | ||
| 3015 | wc.replay_dest->highest_inode = highest_inode; | ||
| 3016 | wc.replay_dest->last_inode_alloc = highest_inode; | ||
| 3017 | } | ||
| 3018 | 3065 | ||
| 3019 | key.offset = found_key.offset - 1; | 3066 | key.offset = found_key.offset - 1; |
| 3020 | wc.replay_dest->log_root = NULL; | 3067 | wc.replay_dest->log_root = NULL; |
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index d09c7609e16b..0776eacb5083 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
| @@ -19,6 +19,9 @@ | |||
| 19 | #ifndef __TREE_LOG_ | 19 | #ifndef __TREE_LOG_ |
| 20 | #define __TREE_LOG_ | 20 | #define __TREE_LOG_ |
| 21 | 21 | ||
| 22 | /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */ | ||
| 23 | #define BTRFS_NO_LOG_SYNC 256 | ||
| 24 | |||
| 22 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 25 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
| 23 | struct btrfs_root *root); | 26 | struct btrfs_root *root); |
| 24 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); | 27 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5cf405b0828d..7eda483d7b5a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -276,7 +276,7 @@ loop_lock: | |||
| 276 | * is now congested. Back off and let other work structs | 276 | * is now congested. Back off and let other work structs |
| 277 | * run instead | 277 | * run instead |
| 278 | */ | 278 | */ |
| 279 | if (pending && bdi_write_congested(bdi) && batch_run > 32 && | 279 | if (pending && bdi_write_congested(bdi) && batch_run > 8 && |
| 280 | fs_info->fs_devices->open_devices > 1) { | 280 | fs_info->fs_devices->open_devices > 1) { |
| 281 | struct io_context *ioc; | 281 | struct io_context *ioc; |
| 282 | 282 | ||
| @@ -446,8 +446,10 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
| 446 | goto error; | 446 | goto error; |
| 447 | 447 | ||
| 448 | device->name = kstrdup(orig_dev->name, GFP_NOFS); | 448 | device->name = kstrdup(orig_dev->name, GFP_NOFS); |
| 449 | if (!device->name) | 449 | if (!device->name) { |
| 450 | kfree(device); | ||
| 450 | goto error; | 451 | goto error; |
| 452 | } | ||
| 451 | 453 | ||
| 452 | device->devid = orig_dev->devid; | 454 | device->devid = orig_dev->devid; |
| 453 | device->work.func = pending_bios_fn; | 455 | device->work.func = pending_bios_fn; |
| @@ -719,10 +721,9 @@ error: | |||
| 719 | * called very infrequently and that a given device has a small number | 721 | * called very infrequently and that a given device has a small number |
| 720 | * of extents | 722 | * of extents |
| 721 | */ | 723 | */ |
| 722 | static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, | 724 | int find_free_dev_extent(struct btrfs_trans_handle *trans, |
| 723 | struct btrfs_device *device, | 725 | struct btrfs_device *device, u64 num_bytes, |
| 724 | u64 num_bytes, u64 *start, | 726 | u64 *start, u64 *max_avail) |
| 725 | u64 *max_avail) | ||
| 726 | { | 727 | { |
| 727 | struct btrfs_key key; | 728 | struct btrfs_key key; |
| 728 | struct btrfs_root *root = device->dev_root; | 729 | struct btrfs_root *root = device->dev_root; |
| @@ -1736,6 +1737,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
| 1736 | extent_root = root->fs_info->extent_root; | 1737 | extent_root = root->fs_info->extent_root; |
| 1737 | em_tree = &root->fs_info->mapping_tree.map_tree; | 1738 | em_tree = &root->fs_info->mapping_tree.map_tree; |
| 1738 | 1739 | ||
| 1740 | ret = btrfs_can_relocate(extent_root, chunk_offset); | ||
| 1741 | if (ret) | ||
| 1742 | return -ENOSPC; | ||
| 1743 | |||
| 1739 | /* step one, relocate all the extents inside this chunk */ | 1744 | /* step one, relocate all the extents inside this chunk */ |
| 1740 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); | 1745 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); |
| 1741 | BUG_ON(ret); | 1746 | BUG_ON(ret); |
| @@ -1749,9 +1754,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
| 1749 | * step two, delete the device extents and the | 1754 | * step two, delete the device extents and the |
| 1750 | * chunk tree entries | 1755 | * chunk tree entries |
| 1751 | */ | 1756 | */ |
| 1752 | spin_lock(&em_tree->lock); | 1757 | read_lock(&em_tree->lock); |
| 1753 | em = lookup_extent_mapping(em_tree, chunk_offset, 1); | 1758 | em = lookup_extent_mapping(em_tree, chunk_offset, 1); |
| 1754 | spin_unlock(&em_tree->lock); | 1759 | read_unlock(&em_tree->lock); |
| 1755 | 1760 | ||
| 1756 | BUG_ON(em->start > chunk_offset || | 1761 | BUG_ON(em->start > chunk_offset || |
| 1757 | em->start + em->len < chunk_offset); | 1762 | em->start + em->len < chunk_offset); |
| @@ -1780,9 +1785,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
| 1780 | ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); | 1785 | ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); |
| 1781 | BUG_ON(ret); | 1786 | BUG_ON(ret); |
| 1782 | 1787 | ||
| 1783 | spin_lock(&em_tree->lock); | 1788 | write_lock(&em_tree->lock); |
| 1784 | remove_extent_mapping(em_tree, em); | 1789 | remove_extent_mapping(em_tree, em); |
| 1785 | spin_unlock(&em_tree->lock); | 1790 | write_unlock(&em_tree->lock); |
| 1786 | 1791 | ||
| 1787 | kfree(map); | 1792 | kfree(map); |
| 1788 | em->bdev = NULL; | 1793 | em->bdev = NULL; |
| @@ -1807,12 +1812,15 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) | |||
| 1807 | struct btrfs_key found_key; | 1812 | struct btrfs_key found_key; |
| 1808 | u64 chunk_tree = chunk_root->root_key.objectid; | 1813 | u64 chunk_tree = chunk_root->root_key.objectid; |
| 1809 | u64 chunk_type; | 1814 | u64 chunk_type; |
| 1815 | bool retried = false; | ||
| 1816 | int failed = 0; | ||
| 1810 | int ret; | 1817 | int ret; |
| 1811 | 1818 | ||
| 1812 | path = btrfs_alloc_path(); | 1819 | path = btrfs_alloc_path(); |
| 1813 | if (!path) | 1820 | if (!path) |
| 1814 | return -ENOMEM; | 1821 | return -ENOMEM; |
| 1815 | 1822 | ||
| 1823 | again: | ||
| 1816 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; | 1824 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; |
| 1817 | key.offset = (u64)-1; | 1825 | key.offset = (u64)-1; |
| 1818 | key.type = BTRFS_CHUNK_ITEM_KEY; | 1826 | key.type = BTRFS_CHUNK_ITEM_KEY; |
| @@ -1842,7 +1850,10 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) | |||
| 1842 | ret = btrfs_relocate_chunk(chunk_root, chunk_tree, | 1850 | ret = btrfs_relocate_chunk(chunk_root, chunk_tree, |
| 1843 | found_key.objectid, | 1851 | found_key.objectid, |
| 1844 | found_key.offset); | 1852 | found_key.offset); |
| 1845 | BUG_ON(ret); | 1853 | if (ret == -ENOSPC) |
| 1854 | failed++; | ||
| 1855 | else if (ret) | ||
| 1856 | BUG(); | ||
| 1846 | } | 1857 | } |
| 1847 | 1858 | ||
| 1848 | if (found_key.offset == 0) | 1859 | if (found_key.offset == 0) |
| @@ -1850,6 +1861,14 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) | |||
| 1850 | key.offset = found_key.offset - 1; | 1861 | key.offset = found_key.offset - 1; |
| 1851 | } | 1862 | } |
| 1852 | ret = 0; | 1863 | ret = 0; |
| 1864 | if (failed && !retried) { | ||
| 1865 | failed = 0; | ||
| 1866 | retried = true; | ||
| 1867 | goto again; | ||
| 1868 | } else if (failed && retried) { | ||
| 1869 | WARN_ON(1); | ||
| 1870 | ret = -ENOSPC; | ||
| 1871 | } | ||
| 1853 | error: | 1872 | error: |
| 1854 | btrfs_free_path(path); | 1873 | btrfs_free_path(path); |
| 1855 | return ret; | 1874 | return ret; |
| @@ -1894,6 +1913,8 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
| 1894 | continue; | 1913 | continue; |
| 1895 | 1914 | ||
| 1896 | ret = btrfs_shrink_device(device, old_size - size_to_free); | 1915 | ret = btrfs_shrink_device(device, old_size - size_to_free); |
| 1916 | if (ret == -ENOSPC) | ||
| 1917 | break; | ||
| 1897 | BUG_ON(ret); | 1918 | BUG_ON(ret); |
| 1898 | 1919 | ||
| 1899 | trans = btrfs_start_transaction(dev_root, 1); | 1920 | trans = btrfs_start_transaction(dev_root, 1); |
| @@ -1938,9 +1959,8 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
| 1938 | chunk = btrfs_item_ptr(path->nodes[0], | 1959 | chunk = btrfs_item_ptr(path->nodes[0], |
| 1939 | path->slots[0], | 1960 | path->slots[0], |
| 1940 | struct btrfs_chunk); | 1961 | struct btrfs_chunk); |
| 1941 | key.offset = found_key.offset; | ||
| 1942 | /* chunk zero is special */ | 1962 | /* chunk zero is special */ |
| 1943 | if (key.offset == 0) | 1963 | if (found_key.offset == 0) |
| 1944 | break; | 1964 | break; |
| 1945 | 1965 | ||
| 1946 | btrfs_release_path(chunk_root, path); | 1966 | btrfs_release_path(chunk_root, path); |
| @@ -1948,7 +1968,8 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
| 1948 | chunk_root->root_key.objectid, | 1968 | chunk_root->root_key.objectid, |
| 1949 | found_key.objectid, | 1969 | found_key.objectid, |
| 1950 | found_key.offset); | 1970 | found_key.offset); |
| 1951 | BUG_ON(ret); | 1971 | BUG_ON(ret && ret != -ENOSPC); |
| 1972 | key.offset = found_key.offset - 1; | ||
| 1952 | } | 1973 | } |
| 1953 | ret = 0; | 1974 | ret = 0; |
| 1954 | error: | 1975 | error: |
| @@ -1974,10 +1995,13 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
| 1974 | u64 chunk_offset; | 1995 | u64 chunk_offset; |
| 1975 | int ret; | 1996 | int ret; |
| 1976 | int slot; | 1997 | int slot; |
| 1998 | int failed = 0; | ||
| 1999 | bool retried = false; | ||
| 1977 | struct extent_buffer *l; | 2000 | struct extent_buffer *l; |
| 1978 | struct btrfs_key key; | 2001 | struct btrfs_key key; |
| 1979 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; | 2002 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; |
| 1980 | u64 old_total = btrfs_super_total_bytes(super_copy); | 2003 | u64 old_total = btrfs_super_total_bytes(super_copy); |
| 2004 | u64 old_size = device->total_bytes; | ||
| 1981 | u64 diff = device->total_bytes - new_size; | 2005 | u64 diff = device->total_bytes - new_size; |
| 1982 | 2006 | ||
| 1983 | if (new_size >= device->total_bytes) | 2007 | if (new_size >= device->total_bytes) |
| @@ -1987,12 +2011,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
| 1987 | if (!path) | 2011 | if (!path) |
| 1988 | return -ENOMEM; | 2012 | return -ENOMEM; |
| 1989 | 2013 | ||
| 1990 | trans = btrfs_start_transaction(root, 1); | ||
| 1991 | if (!trans) { | ||
| 1992 | ret = -ENOMEM; | ||
| 1993 | goto done; | ||
| 1994 | } | ||
| 1995 | |||
| 1996 | path->reada = 2; | 2014 | path->reada = 2; |
| 1997 | 2015 | ||
| 1998 | lock_chunks(root); | 2016 | lock_chunks(root); |
| @@ -2001,8 +2019,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
| 2001 | if (device->writeable) | 2019 | if (device->writeable) |
| 2002 | device->fs_devices->total_rw_bytes -= diff; | 2020 | device->fs_devices->total_rw_bytes -= diff; |
| 2003 | unlock_chunks(root); | 2021 | unlock_chunks(root); |
| 2004 | btrfs_end_transaction(trans, root); | ||
| 2005 | 2022 | ||
| 2023 | again: | ||
| 2006 | key.objectid = device->devid; | 2024 | key.objectid = device->devid; |
| 2007 | key.offset = (u64)-1; | 2025 | key.offset = (u64)-1; |
| 2008 | key.type = BTRFS_DEV_EXTENT_KEY; | 2026 | key.type = BTRFS_DEV_EXTENT_KEY; |
| @@ -2017,6 +2035,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
| 2017 | goto done; | 2035 | goto done; |
| 2018 | if (ret) { | 2036 | if (ret) { |
| 2019 | ret = 0; | 2037 | ret = 0; |
| 2038 | btrfs_release_path(root, path); | ||
| 2020 | break; | 2039 | break; |
| 2021 | } | 2040 | } |
| 2022 | 2041 | ||
| @@ -2024,14 +2043,18 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
| 2024 | slot = path->slots[0]; | 2043 | slot = path->slots[0]; |
| 2025 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | 2044 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); |
| 2026 | 2045 | ||
| 2027 | if (key.objectid != device->devid) | 2046 | if (key.objectid != device->devid) { |
| 2047 | btrfs_release_path(root, path); | ||
| 2028 | break; | 2048 | break; |
| 2049 | } | ||
| 2029 | 2050 | ||
| 2030 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | 2051 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); |
| 2031 | length = btrfs_dev_extent_length(l, dev_extent); | 2052 | length = btrfs_dev_extent_length(l, dev_extent); |
| 2032 | 2053 | ||
| 2033 | if (key.offset + length <= new_size) | 2054 | if (key.offset + length <= new_size) { |
| 2055 | btrfs_release_path(root, path); | ||
| 2034 | break; | 2056 | break; |
| 2057 | } | ||
| 2035 | 2058 | ||
| 2036 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); | 2059 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); |
| 2037 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); | 2060 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); |
| @@ -2040,8 +2063,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
| 2040 | 2063 | ||
| 2041 | ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, | 2064 | ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, |
| 2042 | chunk_offset); | 2065 | chunk_offset); |
| 2043 | if (ret) | 2066 | if (ret && ret != -ENOSPC) |
| 2044 | goto done; | 2067 | goto done; |
| 2068 | if (ret == -ENOSPC) | ||
| 2069 | failed++; | ||
| 2070 | key.offset -= 1; | ||
| 2071 | } | ||
| 2072 | |||
| 2073 | if (failed && !retried) { | ||
| 2074 | failed = 0; | ||
| 2075 | retried = true; | ||
| 2076 | goto again; | ||
| 2077 | } else if (failed && retried) { | ||
| 2078 | ret = -ENOSPC; | ||
| 2079 | lock_chunks(root); | ||
| 2080 | |||
| 2081 | device->total_bytes = old_size; | ||
| 2082 | if (device->writeable) | ||
| 2083 | device->fs_devices->total_rw_bytes += diff; | ||
| 2084 | unlock_chunks(root); | ||
| 2085 | goto done; | ||
| 2045 | } | 2086 | } |
| 2046 | 2087 | ||
| 2047 | /* Shrinking succeeded, else we would be at "done". */ | 2088 | /* Shrinking succeeded, else we would be at "done". */ |
| @@ -2294,9 +2335,9 @@ again: | |||
| 2294 | em->block_len = em->len; | 2335 | em->block_len = em->len; |
| 2295 | 2336 | ||
| 2296 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; | 2337 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; |
| 2297 | spin_lock(&em_tree->lock); | 2338 | write_lock(&em_tree->lock); |
| 2298 | ret = add_extent_mapping(em_tree, em); | 2339 | ret = add_extent_mapping(em_tree, em); |
| 2299 | spin_unlock(&em_tree->lock); | 2340 | write_unlock(&em_tree->lock); |
| 2300 | BUG_ON(ret); | 2341 | BUG_ON(ret); |
| 2301 | free_extent_map(em); | 2342 | free_extent_map(em); |
| 2302 | 2343 | ||
| @@ -2491,9 +2532,9 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) | |||
| 2491 | int readonly = 0; | 2532 | int readonly = 0; |
| 2492 | int i; | 2533 | int i; |
| 2493 | 2534 | ||
| 2494 | spin_lock(&map_tree->map_tree.lock); | 2535 | read_lock(&map_tree->map_tree.lock); |
| 2495 | em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); | 2536 | em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); |
| 2496 | spin_unlock(&map_tree->map_tree.lock); | 2537 | read_unlock(&map_tree->map_tree.lock); |
| 2497 | if (!em) | 2538 | if (!em) |
| 2498 | return 1; | 2539 | return 1; |
| 2499 | 2540 | ||
| @@ -2518,11 +2559,11 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) | |||
| 2518 | struct extent_map *em; | 2559 | struct extent_map *em; |
| 2519 | 2560 | ||
| 2520 | while (1) { | 2561 | while (1) { |
| 2521 | spin_lock(&tree->map_tree.lock); | 2562 | write_lock(&tree->map_tree.lock); |
| 2522 | em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); | 2563 | em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); |
| 2523 | if (em) | 2564 | if (em) |
| 2524 | remove_extent_mapping(&tree->map_tree, em); | 2565 | remove_extent_mapping(&tree->map_tree, em); |
| 2525 | spin_unlock(&tree->map_tree.lock); | 2566 | write_unlock(&tree->map_tree.lock); |
| 2526 | if (!em) | 2567 | if (!em) |
| 2527 | break; | 2568 | break; |
| 2528 | kfree(em->bdev); | 2569 | kfree(em->bdev); |
| @@ -2540,9 +2581,9 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) | |||
| 2540 | struct extent_map_tree *em_tree = &map_tree->map_tree; | 2581 | struct extent_map_tree *em_tree = &map_tree->map_tree; |
| 2541 | int ret; | 2582 | int ret; |
| 2542 | 2583 | ||
| 2543 | spin_lock(&em_tree->lock); | 2584 | read_lock(&em_tree->lock); |
| 2544 | em = lookup_extent_mapping(em_tree, logical, len); | 2585 | em = lookup_extent_mapping(em_tree, logical, len); |
| 2545 | spin_unlock(&em_tree->lock); | 2586 | read_unlock(&em_tree->lock); |
| 2546 | BUG_ON(!em); | 2587 | BUG_ON(!em); |
| 2547 | 2588 | ||
| 2548 | BUG_ON(em->start > logical || em->start + em->len < logical); | 2589 | BUG_ON(em->start > logical || em->start + em->len < logical); |
| @@ -2604,9 +2645,9 @@ again: | |||
| 2604 | atomic_set(&multi->error, 0); | 2645 | atomic_set(&multi->error, 0); |
| 2605 | } | 2646 | } |
| 2606 | 2647 | ||
| 2607 | spin_lock(&em_tree->lock); | 2648 | read_lock(&em_tree->lock); |
| 2608 | em = lookup_extent_mapping(em_tree, logical, *length); | 2649 | em = lookup_extent_mapping(em_tree, logical, *length); |
| 2609 | spin_unlock(&em_tree->lock); | 2650 | read_unlock(&em_tree->lock); |
| 2610 | 2651 | ||
| 2611 | if (!em && unplug_page) | 2652 | if (!em && unplug_page) |
| 2612 | return 0; | 2653 | return 0; |
| @@ -2763,9 +2804,9 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
| 2763 | u64 stripe_nr; | 2804 | u64 stripe_nr; |
| 2764 | int i, j, nr = 0; | 2805 | int i, j, nr = 0; |
| 2765 | 2806 | ||
| 2766 | spin_lock(&em_tree->lock); | 2807 | read_lock(&em_tree->lock); |
| 2767 | em = lookup_extent_mapping(em_tree, chunk_start, 1); | 2808 | em = lookup_extent_mapping(em_tree, chunk_start, 1); |
| 2768 | spin_unlock(&em_tree->lock); | 2809 | read_unlock(&em_tree->lock); |
| 2769 | 2810 | ||
| 2770 | BUG_ON(!em || em->start != chunk_start); | 2811 | BUG_ON(!em || em->start != chunk_start); |
| 2771 | map = (struct map_lookup *)em->bdev; | 2812 | map = (struct map_lookup *)em->bdev; |
| @@ -3053,9 +3094,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
| 3053 | logical = key->offset; | 3094 | logical = key->offset; |
| 3054 | length = btrfs_chunk_length(leaf, chunk); | 3095 | length = btrfs_chunk_length(leaf, chunk); |
| 3055 | 3096 | ||
| 3056 | spin_lock(&map_tree->map_tree.lock); | 3097 | read_lock(&map_tree->map_tree.lock); |
| 3057 | em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); | 3098 | em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); |
| 3058 | spin_unlock(&map_tree->map_tree.lock); | 3099 | read_unlock(&map_tree->map_tree.lock); |
| 3059 | 3100 | ||
| 3060 | /* already mapped? */ | 3101 | /* already mapped? */ |
| 3061 | if (em && em->start <= logical && em->start + em->len > logical) { | 3102 | if (em && em->start <= logical && em->start + em->len > logical) { |
| @@ -3114,9 +3155,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
| 3114 | map->stripes[i].dev->in_fs_metadata = 1; | 3155 | map->stripes[i].dev->in_fs_metadata = 1; |
| 3115 | } | 3156 | } |
| 3116 | 3157 | ||
| 3117 | spin_lock(&map_tree->map_tree.lock); | 3158 | write_lock(&map_tree->map_tree.lock); |
| 3118 | ret = add_extent_mapping(&map_tree->map_tree, em); | 3159 | ret = add_extent_mapping(&map_tree->map_tree, em); |
| 3119 | spin_unlock(&map_tree->map_tree.lock); | 3160 | write_unlock(&map_tree->map_tree.lock); |
| 3120 | BUG_ON(ret); | 3161 | BUG_ON(ret); |
| 3121 | free_extent_map(em); | 3162 | free_extent_map(em); |
| 3122 | 3163 | ||
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 5139a833f721..31b0fabdd2ea 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
| @@ -181,4 +181,7 @@ int btrfs_balance(struct btrfs_root *dev_root); | |||
| 181 | void btrfs_unlock_volumes(void); | 181 | void btrfs_unlock_volumes(void); |
| 182 | void btrfs_lock_volumes(void); | 182 | void btrfs_lock_volumes(void); |
| 183 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); | 183 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); |
| 184 | int find_free_dev_extent(struct btrfs_trans_handle *trans, | ||
| 185 | struct btrfs_device *device, u64 num_bytes, | ||
| 186 | u64 *start, u64 *max_avail); | ||
| 184 | #endif | 187 | #endif |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index a9d3bf4d2689..b6dd5967c48a 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
| @@ -260,7 +260,7 @@ err: | |||
| 260 | * attributes are handled directly. | 260 | * attributes are handled directly. |
| 261 | */ | 261 | */ |
| 262 | struct xattr_handler *btrfs_xattr_handlers[] = { | 262 | struct xattr_handler *btrfs_xattr_handlers[] = { |
| 263 | #ifdef CONFIG_FS_POSIX_ACL | 263 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
| 264 | &btrfs_xattr_acl_access_handler, | 264 | &btrfs_xattr_acl_access_handler, |
| 265 | &btrfs_xattr_acl_default_handler, | 265 | &btrfs_xattr_acl_default_handler, |
| 266 | #endif | 266 | #endif |
