diff options
Diffstat (limited to 'fs/btrfs')
33 files changed, 3850 insertions, 2298 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index f128427b995b..69b355ae7f49 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
| @@ -27,7 +27,7 @@ | |||
| 27 | #include "btrfs_inode.h" | 27 | #include "btrfs_inode.h" |
| 28 | #include "xattr.h" | 28 | #include "xattr.h" |
| 29 | 29 | ||
| 30 | #ifdef CONFIG_FS_POSIX_ACL | 30 | #ifdef CONFIG_BTRFS_POSIX_ACL |
| 31 | 31 | ||
| 32 | static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | 32 | static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) |
| 33 | { | 33 | { |
| @@ -313,7 +313,7 @@ struct xattr_handler btrfs_xattr_acl_access_handler = { | |||
| 313 | .set = btrfs_xattr_acl_access_set, | 313 | .set = btrfs_xattr_acl_access_set, |
| 314 | }; | 314 | }; |
| 315 | 315 | ||
| 316 | #else /* CONFIG_FS_POSIX_ACL */ | 316 | #else /* CONFIG_BTRFS_POSIX_ACL */ |
| 317 | 317 | ||
| 318 | int btrfs_acl_chmod(struct inode *inode) | 318 | int btrfs_acl_chmod(struct inode *inode) |
| 319 | { | 319 | { |
| @@ -325,4 +325,4 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir) | |||
| 325 | return 0; | 325 | return 0; |
| 326 | } | 326 | } |
| 327 | 327 | ||
| 328 | #endif /* CONFIG_FS_POSIX_ACL */ | 328 | #endif /* CONFIG_BTRFS_POSIX_ACL */ |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 019e8af449ab..c0861e781cdb 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
| @@ -48,6 +48,9 @@ struct btrfs_worker_thread { | |||
| 48 | /* number of things on the pending list */ | 48 | /* number of things on the pending list */ |
| 49 | atomic_t num_pending; | 49 | atomic_t num_pending; |
| 50 | 50 | ||
| 51 | /* reference counter for this struct */ | ||
| 52 | atomic_t refs; | ||
| 53 | |||
| 51 | unsigned long sequence; | 54 | unsigned long sequence; |
| 52 | 55 | ||
| 53 | /* protects the pending list. */ | 56 | /* protects the pending list. */ |
| @@ -61,6 +64,51 @@ struct btrfs_worker_thread { | |||
| 61 | }; | 64 | }; |
| 62 | 65 | ||
| 63 | /* | 66 | /* |
| 67 | * btrfs_start_workers uses kthread_run, which can block waiting for memory | ||
| 68 | * for a very long time. It will actually throttle on page writeback, | ||
| 69 | * and so it may not make progress until after our btrfs worker threads | ||
| 70 | * process all of the pending work structs in their queue | ||
| 71 | * | ||
| 72 | * This means we can't use btrfs_start_workers from inside a btrfs worker | ||
| 73 | * thread that is used as part of cleaning dirty memory, which pretty much | ||
| 74 | * involves all of the worker threads. | ||
| 75 | * | ||
| 76 | * Instead we have a helper queue who never has more than one thread | ||
| 77 | * where we scheduler thread start operations. This worker_start struct | ||
| 78 | * is used to contain the work and hold a pointer to the queue that needs | ||
| 79 | * another worker. | ||
| 80 | */ | ||
| 81 | struct worker_start { | ||
| 82 | struct btrfs_work work; | ||
| 83 | struct btrfs_workers *queue; | ||
| 84 | }; | ||
| 85 | |||
| 86 | static void start_new_worker_func(struct btrfs_work *work) | ||
| 87 | { | ||
| 88 | struct worker_start *start; | ||
| 89 | start = container_of(work, struct worker_start, work); | ||
| 90 | btrfs_start_workers(start->queue, 1); | ||
| 91 | kfree(start); | ||
| 92 | } | ||
| 93 | |||
| 94 | static int start_new_worker(struct btrfs_workers *queue) | ||
| 95 | { | ||
| 96 | struct worker_start *start; | ||
| 97 | int ret; | ||
| 98 | |||
| 99 | start = kzalloc(sizeof(*start), GFP_NOFS); | ||
| 100 | if (!start) | ||
| 101 | return -ENOMEM; | ||
| 102 | |||
| 103 | start->work.func = start_new_worker_func; | ||
| 104 | start->queue = queue; | ||
| 105 | ret = btrfs_queue_worker(queue->atomic_worker_start, &start->work); | ||
| 106 | if (ret) | ||
| 107 | kfree(start); | ||
| 108 | return ret; | ||
| 109 | } | ||
| 110 | |||
| 111 | /* | ||
| 64 | * helper function to move a thread onto the idle list after it | 112 | * helper function to move a thread onto the idle list after it |
| 65 | * has finished some requests. | 113 | * has finished some requests. |
| 66 | */ | 114 | */ |
| @@ -71,7 +119,12 @@ static void check_idle_worker(struct btrfs_worker_thread *worker) | |||
| 71 | unsigned long flags; | 119 | unsigned long flags; |
| 72 | spin_lock_irqsave(&worker->workers->lock, flags); | 120 | spin_lock_irqsave(&worker->workers->lock, flags); |
| 73 | worker->idle = 1; | 121 | worker->idle = 1; |
| 74 | list_move(&worker->worker_list, &worker->workers->idle_list); | 122 | |
| 123 | /* the list may be empty if the worker is just starting */ | ||
| 124 | if (!list_empty(&worker->worker_list)) { | ||
| 125 | list_move(&worker->worker_list, | ||
| 126 | &worker->workers->idle_list); | ||
| 127 | } | ||
| 75 | spin_unlock_irqrestore(&worker->workers->lock, flags); | 128 | spin_unlock_irqrestore(&worker->workers->lock, flags); |
| 76 | } | 129 | } |
| 77 | } | 130 | } |
| @@ -87,23 +140,51 @@ static void check_busy_worker(struct btrfs_worker_thread *worker) | |||
| 87 | unsigned long flags; | 140 | unsigned long flags; |
| 88 | spin_lock_irqsave(&worker->workers->lock, flags); | 141 | spin_lock_irqsave(&worker->workers->lock, flags); |
| 89 | worker->idle = 0; | 142 | worker->idle = 0; |
| 90 | list_move_tail(&worker->worker_list, | 143 | |
| 91 | &worker->workers->worker_list); | 144 | if (!list_empty(&worker->worker_list)) { |
| 145 | list_move_tail(&worker->worker_list, | ||
| 146 | &worker->workers->worker_list); | ||
| 147 | } | ||
| 92 | spin_unlock_irqrestore(&worker->workers->lock, flags); | 148 | spin_unlock_irqrestore(&worker->workers->lock, flags); |
| 93 | } | 149 | } |
| 94 | } | 150 | } |
| 95 | 151 | ||
| 96 | static noinline int run_ordered_completions(struct btrfs_workers *workers, | 152 | static void check_pending_worker_creates(struct btrfs_worker_thread *worker) |
| 97 | struct btrfs_work *work) | ||
| 98 | { | 153 | { |
| 154 | struct btrfs_workers *workers = worker->workers; | ||
| 99 | unsigned long flags; | 155 | unsigned long flags; |
| 100 | 156 | ||
| 157 | rmb(); | ||
| 158 | if (!workers->atomic_start_pending) | ||
| 159 | return; | ||
| 160 | |||
| 161 | spin_lock_irqsave(&workers->lock, flags); | ||
| 162 | if (!workers->atomic_start_pending) | ||
| 163 | goto out; | ||
| 164 | |||
| 165 | workers->atomic_start_pending = 0; | ||
| 166 | if (workers->num_workers + workers->num_workers_starting >= | ||
| 167 | workers->max_workers) | ||
| 168 | goto out; | ||
| 169 | |||
| 170 | workers->num_workers_starting += 1; | ||
| 171 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 172 | start_new_worker(workers); | ||
| 173 | return; | ||
| 174 | |||
| 175 | out: | ||
| 176 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 177 | } | ||
| 178 | |||
| 179 | static noinline int run_ordered_completions(struct btrfs_workers *workers, | ||
| 180 | struct btrfs_work *work) | ||
| 181 | { | ||
| 101 | if (!workers->ordered) | 182 | if (!workers->ordered) |
| 102 | return 0; | 183 | return 0; |
| 103 | 184 | ||
| 104 | set_bit(WORK_DONE_BIT, &work->flags); | 185 | set_bit(WORK_DONE_BIT, &work->flags); |
| 105 | 186 | ||
| 106 | spin_lock_irqsave(&workers->lock, flags); | 187 | spin_lock(&workers->order_lock); |
| 107 | 188 | ||
| 108 | while (1) { | 189 | while (1) { |
| 109 | if (!list_empty(&workers->prio_order_list)) { | 190 | if (!list_empty(&workers->prio_order_list)) { |
| @@ -126,45 +207,118 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers, | |||
| 126 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) | 207 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) |
| 127 | break; | 208 | break; |
| 128 | 209 | ||
| 129 | spin_unlock_irqrestore(&workers->lock, flags); | 210 | spin_unlock(&workers->order_lock); |
| 130 | 211 | ||
| 131 | work->ordered_func(work); | 212 | work->ordered_func(work); |
| 132 | 213 | ||
| 133 | /* now take the lock again and call the freeing code */ | 214 | /* now take the lock again and call the freeing code */ |
| 134 | spin_lock_irqsave(&workers->lock, flags); | 215 | spin_lock(&workers->order_lock); |
| 135 | list_del(&work->order_list); | 216 | list_del(&work->order_list); |
| 136 | work->ordered_free(work); | 217 | work->ordered_free(work); |
| 137 | } | 218 | } |
| 138 | 219 | ||
| 139 | spin_unlock_irqrestore(&workers->lock, flags); | 220 | spin_unlock(&workers->order_lock); |
| 140 | return 0; | 221 | return 0; |
| 141 | } | 222 | } |
| 142 | 223 | ||
| 224 | static void put_worker(struct btrfs_worker_thread *worker) | ||
| 225 | { | ||
| 226 | if (atomic_dec_and_test(&worker->refs)) | ||
| 227 | kfree(worker); | ||
| 228 | } | ||
| 229 | |||
| 230 | static int try_worker_shutdown(struct btrfs_worker_thread *worker) | ||
| 231 | { | ||
| 232 | int freeit = 0; | ||
| 233 | |||
| 234 | spin_lock_irq(&worker->lock); | ||
| 235 | spin_lock(&worker->workers->lock); | ||
| 236 | if (worker->workers->num_workers > 1 && | ||
| 237 | worker->idle && | ||
| 238 | !worker->working && | ||
| 239 | !list_empty(&worker->worker_list) && | ||
| 240 | list_empty(&worker->prio_pending) && | ||
| 241 | list_empty(&worker->pending) && | ||
| 242 | atomic_read(&worker->num_pending) == 0) { | ||
| 243 | freeit = 1; | ||
| 244 | list_del_init(&worker->worker_list); | ||
| 245 | worker->workers->num_workers--; | ||
| 246 | } | ||
| 247 | spin_unlock(&worker->workers->lock); | ||
| 248 | spin_unlock_irq(&worker->lock); | ||
| 249 | |||
| 250 | if (freeit) | ||
| 251 | put_worker(worker); | ||
| 252 | return freeit; | ||
| 253 | } | ||
| 254 | |||
| 255 | static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker, | ||
| 256 | struct list_head *prio_head, | ||
| 257 | struct list_head *head) | ||
| 258 | { | ||
| 259 | struct btrfs_work *work = NULL; | ||
| 260 | struct list_head *cur = NULL; | ||
| 261 | |||
| 262 | if(!list_empty(prio_head)) | ||
| 263 | cur = prio_head->next; | ||
| 264 | |||
| 265 | smp_mb(); | ||
| 266 | if (!list_empty(&worker->prio_pending)) | ||
| 267 | goto refill; | ||
| 268 | |||
| 269 | if (!list_empty(head)) | ||
| 270 | cur = head->next; | ||
| 271 | |||
| 272 | if (cur) | ||
| 273 | goto out; | ||
| 274 | |||
| 275 | refill: | ||
| 276 | spin_lock_irq(&worker->lock); | ||
| 277 | list_splice_tail_init(&worker->prio_pending, prio_head); | ||
| 278 | list_splice_tail_init(&worker->pending, head); | ||
| 279 | |||
| 280 | if (!list_empty(prio_head)) | ||
| 281 | cur = prio_head->next; | ||
| 282 | else if (!list_empty(head)) | ||
| 283 | cur = head->next; | ||
| 284 | spin_unlock_irq(&worker->lock); | ||
| 285 | |||
| 286 | if (!cur) | ||
| 287 | goto out_fail; | ||
| 288 | |||
| 289 | out: | ||
| 290 | work = list_entry(cur, struct btrfs_work, list); | ||
| 291 | |||
| 292 | out_fail: | ||
| 293 | return work; | ||
| 294 | } | ||
| 295 | |||
| 143 | /* | 296 | /* |
| 144 | * main loop for servicing work items | 297 | * main loop for servicing work items |
| 145 | */ | 298 | */ |
| 146 | static int worker_loop(void *arg) | 299 | static int worker_loop(void *arg) |
| 147 | { | 300 | { |
| 148 | struct btrfs_worker_thread *worker = arg; | 301 | struct btrfs_worker_thread *worker = arg; |
| 149 | struct list_head *cur; | 302 | struct list_head head; |
| 303 | struct list_head prio_head; | ||
| 150 | struct btrfs_work *work; | 304 | struct btrfs_work *work; |
| 305 | |||
| 306 | INIT_LIST_HEAD(&head); | ||
| 307 | INIT_LIST_HEAD(&prio_head); | ||
| 308 | |||
| 151 | do { | 309 | do { |
| 152 | spin_lock_irq(&worker->lock); | 310 | again: |
| 153 | again_locked: | ||
| 154 | while (1) { | 311 | while (1) { |
| 155 | if (!list_empty(&worker->prio_pending)) | 312 | |
| 156 | cur = worker->prio_pending.next; | 313 | |
| 157 | else if (!list_empty(&worker->pending)) | 314 | work = get_next_work(worker, &prio_head, &head); |
| 158 | cur = worker->pending.next; | 315 | if (!work) |
| 159 | else | ||
| 160 | break; | 316 | break; |
| 161 | 317 | ||
| 162 | work = list_entry(cur, struct btrfs_work, list); | ||
| 163 | list_del(&work->list); | 318 | list_del(&work->list); |
| 164 | clear_bit(WORK_QUEUED_BIT, &work->flags); | 319 | clear_bit(WORK_QUEUED_BIT, &work->flags); |
| 165 | 320 | ||
| 166 | work->worker = worker; | 321 | work->worker = worker; |
| 167 | spin_unlock_irq(&worker->lock); | ||
| 168 | 322 | ||
| 169 | work->func(work); | 323 | work->func(work); |
| 170 | 324 | ||
| @@ -175,9 +329,13 @@ again_locked: | |||
| 175 | */ | 329 | */ |
| 176 | run_ordered_completions(worker->workers, work); | 330 | run_ordered_completions(worker->workers, work); |
| 177 | 331 | ||
| 178 | spin_lock_irq(&worker->lock); | 332 | check_pending_worker_creates(worker); |
| 179 | check_idle_worker(worker); | 333 | |
| 180 | } | 334 | } |
| 335 | |||
| 336 | spin_lock_irq(&worker->lock); | ||
| 337 | check_idle_worker(worker); | ||
| 338 | |||
| 181 | if (freezing(current)) { | 339 | if (freezing(current)) { |
| 182 | worker->working = 0; | 340 | worker->working = 0; |
| 183 | spin_unlock_irq(&worker->lock); | 341 | spin_unlock_irq(&worker->lock); |
| @@ -216,8 +374,10 @@ again_locked: | |||
| 216 | spin_lock_irq(&worker->lock); | 374 | spin_lock_irq(&worker->lock); |
| 217 | set_current_state(TASK_INTERRUPTIBLE); | 375 | set_current_state(TASK_INTERRUPTIBLE); |
| 218 | if (!list_empty(&worker->pending) || | 376 | if (!list_empty(&worker->pending) || |
| 219 | !list_empty(&worker->prio_pending)) | 377 | !list_empty(&worker->prio_pending)) { |
| 220 | goto again_locked; | 378 | spin_unlock_irq(&worker->lock); |
| 379 | goto again; | ||
| 380 | } | ||
| 221 | 381 | ||
| 222 | /* | 382 | /* |
| 223 | * this makes sure we get a wakeup when someone | 383 | * this makes sure we get a wakeup when someone |
| @@ -226,8 +386,13 @@ again_locked: | |||
| 226 | worker->working = 0; | 386 | worker->working = 0; |
| 227 | spin_unlock_irq(&worker->lock); | 387 | spin_unlock_irq(&worker->lock); |
| 228 | 388 | ||
| 229 | if (!kthread_should_stop()) | 389 | if (!kthread_should_stop()) { |
| 230 | schedule(); | 390 | schedule_timeout(HZ * 120); |
| 391 | if (!worker->working && | ||
| 392 | try_worker_shutdown(worker)) { | ||
| 393 | return 0; | ||
| 394 | } | ||
| 395 | } | ||
| 231 | } | 396 | } |
| 232 | __set_current_state(TASK_RUNNING); | 397 | __set_current_state(TASK_RUNNING); |
| 233 | } | 398 | } |
| @@ -242,41 +407,61 @@ int btrfs_stop_workers(struct btrfs_workers *workers) | |||
| 242 | { | 407 | { |
| 243 | struct list_head *cur; | 408 | struct list_head *cur; |
| 244 | struct btrfs_worker_thread *worker; | 409 | struct btrfs_worker_thread *worker; |
| 410 | int can_stop; | ||
| 245 | 411 | ||
| 412 | spin_lock_irq(&workers->lock); | ||
| 246 | list_splice_init(&workers->idle_list, &workers->worker_list); | 413 | list_splice_init(&workers->idle_list, &workers->worker_list); |
| 247 | while (!list_empty(&workers->worker_list)) { | 414 | while (!list_empty(&workers->worker_list)) { |
| 248 | cur = workers->worker_list.next; | 415 | cur = workers->worker_list.next; |
| 249 | worker = list_entry(cur, struct btrfs_worker_thread, | 416 | worker = list_entry(cur, struct btrfs_worker_thread, |
| 250 | worker_list); | 417 | worker_list); |
| 251 | kthread_stop(worker->task); | 418 | |
| 252 | list_del(&worker->worker_list); | 419 | atomic_inc(&worker->refs); |
| 253 | kfree(worker); | 420 | workers->num_workers -= 1; |
| 421 | if (!list_empty(&worker->worker_list)) { | ||
| 422 | list_del_init(&worker->worker_list); | ||
| 423 | put_worker(worker); | ||
| 424 | can_stop = 1; | ||
| 425 | } else | ||
| 426 | can_stop = 0; | ||
| 427 | spin_unlock_irq(&workers->lock); | ||
| 428 | if (can_stop) | ||
| 429 | kthread_stop(worker->task); | ||
| 430 | spin_lock_irq(&workers->lock); | ||
| 431 | put_worker(worker); | ||
| 254 | } | 432 | } |
| 433 | spin_unlock_irq(&workers->lock); | ||
| 255 | return 0; | 434 | return 0; |
| 256 | } | 435 | } |
| 257 | 436 | ||
| 258 | /* | 437 | /* |
| 259 | * simple init on struct btrfs_workers | 438 | * simple init on struct btrfs_workers |
| 260 | */ | 439 | */ |
| 261 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max) | 440 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, |
| 441 | struct btrfs_workers *async_helper) | ||
| 262 | { | 442 | { |
| 263 | workers->num_workers = 0; | 443 | workers->num_workers = 0; |
| 444 | workers->num_workers_starting = 0; | ||
| 264 | INIT_LIST_HEAD(&workers->worker_list); | 445 | INIT_LIST_HEAD(&workers->worker_list); |
| 265 | INIT_LIST_HEAD(&workers->idle_list); | 446 | INIT_LIST_HEAD(&workers->idle_list); |
| 266 | INIT_LIST_HEAD(&workers->order_list); | 447 | INIT_LIST_HEAD(&workers->order_list); |
| 267 | INIT_LIST_HEAD(&workers->prio_order_list); | 448 | INIT_LIST_HEAD(&workers->prio_order_list); |
| 268 | spin_lock_init(&workers->lock); | 449 | spin_lock_init(&workers->lock); |
| 450 | spin_lock_init(&workers->order_lock); | ||
| 269 | workers->max_workers = max; | 451 | workers->max_workers = max; |
| 270 | workers->idle_thresh = 32; | 452 | workers->idle_thresh = 32; |
| 271 | workers->name = name; | 453 | workers->name = name; |
| 272 | workers->ordered = 0; | 454 | workers->ordered = 0; |
| 455 | workers->atomic_start_pending = 0; | ||
| 456 | workers->atomic_worker_start = async_helper; | ||
| 273 | } | 457 | } |
| 274 | 458 | ||
| 275 | /* | 459 | /* |
| 276 | * starts new worker threads. This does not enforce the max worker | 460 | * starts new worker threads. This does not enforce the max worker |
| 277 | * count in case you need to temporarily go past it. | 461 | * count in case you need to temporarily go past it. |
| 278 | */ | 462 | */ |
| 279 | int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | 463 | static int __btrfs_start_workers(struct btrfs_workers *workers, |
| 464 | int num_workers) | ||
| 280 | { | 465 | { |
| 281 | struct btrfs_worker_thread *worker; | 466 | struct btrfs_worker_thread *worker; |
| 282 | int ret = 0; | 467 | int ret = 0; |
| @@ -293,7 +478,9 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | |||
| 293 | INIT_LIST_HEAD(&worker->prio_pending); | 478 | INIT_LIST_HEAD(&worker->prio_pending); |
| 294 | INIT_LIST_HEAD(&worker->worker_list); | 479 | INIT_LIST_HEAD(&worker->worker_list); |
| 295 | spin_lock_init(&worker->lock); | 480 | spin_lock_init(&worker->lock); |
| 481 | |||
| 296 | atomic_set(&worker->num_pending, 0); | 482 | atomic_set(&worker->num_pending, 0); |
| 483 | atomic_set(&worker->refs, 1); | ||
| 297 | worker->workers = workers; | 484 | worker->workers = workers; |
| 298 | worker->task = kthread_run(worker_loop, worker, | 485 | worker->task = kthread_run(worker_loop, worker, |
| 299 | "btrfs-%s-%d", workers->name, | 486 | "btrfs-%s-%d", workers->name, |
| @@ -303,11 +490,12 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | |||
| 303 | kfree(worker); | 490 | kfree(worker); |
| 304 | goto fail; | 491 | goto fail; |
| 305 | } | 492 | } |
| 306 | |||
| 307 | spin_lock_irq(&workers->lock); | 493 | spin_lock_irq(&workers->lock); |
| 308 | list_add_tail(&worker->worker_list, &workers->idle_list); | 494 | list_add_tail(&worker->worker_list, &workers->idle_list); |
| 309 | worker->idle = 1; | 495 | worker->idle = 1; |
| 310 | workers->num_workers++; | 496 | workers->num_workers++; |
| 497 | workers->num_workers_starting--; | ||
| 498 | WARN_ON(workers->num_workers_starting < 0); | ||
| 311 | spin_unlock_irq(&workers->lock); | 499 | spin_unlock_irq(&workers->lock); |
| 312 | } | 500 | } |
| 313 | return 0; | 501 | return 0; |
| @@ -316,6 +504,14 @@ fail: | |||
| 316 | return ret; | 504 | return ret; |
| 317 | } | 505 | } |
| 318 | 506 | ||
| 507 | int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | ||
| 508 | { | ||
| 509 | spin_lock_irq(&workers->lock); | ||
| 510 | workers->num_workers_starting += num_workers; | ||
| 511 | spin_unlock_irq(&workers->lock); | ||
| 512 | return __btrfs_start_workers(workers, num_workers); | ||
| 513 | } | ||
| 514 | |||
| 319 | /* | 515 | /* |
| 320 | * run through the list and find a worker thread that doesn't have a lot | 516 | * run through the list and find a worker thread that doesn't have a lot |
| 321 | * to do right now. This can return null if we aren't yet at the thread | 517 | * to do right now. This can return null if we aren't yet at the thread |
| @@ -325,7 +521,10 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) | |||
| 325 | { | 521 | { |
| 326 | struct btrfs_worker_thread *worker; | 522 | struct btrfs_worker_thread *worker; |
| 327 | struct list_head *next; | 523 | struct list_head *next; |
| 328 | int enforce_min = workers->num_workers < workers->max_workers; | 524 | int enforce_min; |
| 525 | |||
| 526 | enforce_min = (workers->num_workers + workers->num_workers_starting) < | ||
| 527 | workers->max_workers; | ||
| 329 | 528 | ||
| 330 | /* | 529 | /* |
| 331 | * if we find an idle thread, don't move it to the end of the | 530 | * if we find an idle thread, don't move it to the end of the |
| @@ -350,7 +549,6 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) | |||
| 350 | */ | 549 | */ |
| 351 | next = workers->worker_list.next; | 550 | next = workers->worker_list.next; |
| 352 | worker = list_entry(next, struct btrfs_worker_thread, worker_list); | 551 | worker = list_entry(next, struct btrfs_worker_thread, worker_list); |
| 353 | atomic_inc(&worker->num_pending); | ||
| 354 | worker->sequence++; | 552 | worker->sequence++; |
| 355 | 553 | ||
| 356 | if (worker->sequence % workers->idle_thresh == 0) | 554 | if (worker->sequence % workers->idle_thresh == 0) |
| @@ -367,35 +565,49 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) | |||
| 367 | { | 565 | { |
| 368 | struct btrfs_worker_thread *worker; | 566 | struct btrfs_worker_thread *worker; |
| 369 | unsigned long flags; | 567 | unsigned long flags; |
| 568 | struct list_head *fallback; | ||
| 370 | 569 | ||
| 371 | again: | 570 | again: |
| 372 | spin_lock_irqsave(&workers->lock, flags); | 571 | spin_lock_irqsave(&workers->lock, flags); |
| 373 | worker = next_worker(workers); | 572 | worker = next_worker(workers); |
| 374 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 375 | 573 | ||
| 376 | if (!worker) { | 574 | if (!worker) { |
| 377 | spin_lock_irqsave(&workers->lock, flags); | 575 | if (workers->num_workers + workers->num_workers_starting >= |
| 378 | if (workers->num_workers >= workers->max_workers) { | 576 | workers->max_workers) { |
| 379 | struct list_head *fallback = NULL; | 577 | goto fallback; |
| 380 | /* | 578 | } else if (workers->atomic_worker_start) { |
| 381 | * we have failed to find any workers, just | 579 | workers->atomic_start_pending = 1; |
| 382 | * return the force one | 580 | goto fallback; |
| 383 | */ | ||
| 384 | if (!list_empty(&workers->worker_list)) | ||
| 385 | fallback = workers->worker_list.next; | ||
| 386 | if (!list_empty(&workers->idle_list)) | ||
| 387 | fallback = workers->idle_list.next; | ||
| 388 | BUG_ON(!fallback); | ||
| 389 | worker = list_entry(fallback, | ||
| 390 | struct btrfs_worker_thread, worker_list); | ||
| 391 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 392 | } else { | 581 | } else { |
| 582 | workers->num_workers_starting++; | ||
| 393 | spin_unlock_irqrestore(&workers->lock, flags); | 583 | spin_unlock_irqrestore(&workers->lock, flags); |
| 394 | /* we're below the limit, start another worker */ | 584 | /* we're below the limit, start another worker */ |
| 395 | btrfs_start_workers(workers, 1); | 585 | __btrfs_start_workers(workers, 1); |
| 396 | goto again; | 586 | goto again; |
| 397 | } | 587 | } |
| 398 | } | 588 | } |
| 589 | goto found; | ||
| 590 | |||
| 591 | fallback: | ||
| 592 | fallback = NULL; | ||
| 593 | /* | ||
| 594 | * we have failed to find any workers, just | ||
| 595 | * return the first one we can find. | ||
| 596 | */ | ||
| 597 | if (!list_empty(&workers->worker_list)) | ||
| 598 | fallback = workers->worker_list.next; | ||
| 599 | if (!list_empty(&workers->idle_list)) | ||
| 600 | fallback = workers->idle_list.next; | ||
| 601 | BUG_ON(!fallback); | ||
| 602 | worker = list_entry(fallback, | ||
| 603 | struct btrfs_worker_thread, worker_list); | ||
| 604 | found: | ||
| 605 | /* | ||
| 606 | * this makes sure the worker doesn't exit before it is placed | ||
| 607 | * onto a busy/idle list | ||
| 608 | */ | ||
| 609 | atomic_inc(&worker->num_pending); | ||
| 610 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 399 | return worker; | 611 | return worker; |
| 400 | } | 612 | } |
| 401 | 613 | ||
| @@ -427,7 +639,7 @@ int btrfs_requeue_work(struct btrfs_work *work) | |||
| 427 | spin_lock(&worker->workers->lock); | 639 | spin_lock(&worker->workers->lock); |
| 428 | worker->idle = 0; | 640 | worker->idle = 0; |
| 429 | list_move_tail(&worker->worker_list, | 641 | list_move_tail(&worker->worker_list, |
| 430 | &worker->workers->worker_list); | 642 | &worker->workers->worker_list); |
| 431 | spin_unlock(&worker->workers->lock); | 643 | spin_unlock(&worker->workers->lock); |
| 432 | } | 644 | } |
| 433 | if (!worker->working) { | 645 | if (!worker->working) { |
| @@ -435,9 +647,9 @@ int btrfs_requeue_work(struct btrfs_work *work) | |||
| 435 | worker->working = 1; | 647 | worker->working = 1; |
| 436 | } | 648 | } |
| 437 | 649 | ||
| 438 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 439 | if (wake) | 650 | if (wake) |
| 440 | wake_up_process(worker->task); | 651 | wake_up_process(worker->task); |
| 652 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 441 | out: | 653 | out: |
| 442 | 654 | ||
| 443 | return 0; | 655 | return 0; |
| @@ -463,14 +675,18 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
| 463 | 675 | ||
| 464 | worker = find_worker(workers); | 676 | worker = find_worker(workers); |
| 465 | if (workers->ordered) { | 677 | if (workers->ordered) { |
| 466 | spin_lock_irqsave(&workers->lock, flags); | 678 | /* |
| 679 | * you're not allowed to do ordered queues from an | ||
| 680 | * interrupt handler | ||
| 681 | */ | ||
| 682 | spin_lock(&workers->order_lock); | ||
| 467 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { | 683 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { |
| 468 | list_add_tail(&work->order_list, | 684 | list_add_tail(&work->order_list, |
| 469 | &workers->prio_order_list); | 685 | &workers->prio_order_list); |
| 470 | } else { | 686 | } else { |
| 471 | list_add_tail(&work->order_list, &workers->order_list); | 687 | list_add_tail(&work->order_list, &workers->order_list); |
| 472 | } | 688 | } |
| 473 | spin_unlock_irqrestore(&workers->lock, flags); | 689 | spin_unlock(&workers->order_lock); |
| 474 | } else { | 690 | } else { |
| 475 | INIT_LIST_HEAD(&work->order_list); | 691 | INIT_LIST_HEAD(&work->order_list); |
| 476 | } | 692 | } |
| @@ -481,7 +697,6 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
| 481 | list_add_tail(&work->list, &worker->prio_pending); | 697 | list_add_tail(&work->list, &worker->prio_pending); |
| 482 | else | 698 | else |
| 483 | list_add_tail(&work->list, &worker->pending); | 699 | list_add_tail(&work->list, &worker->pending); |
| 484 | atomic_inc(&worker->num_pending); | ||
| 485 | check_busy_worker(worker); | 700 | check_busy_worker(worker); |
| 486 | 701 | ||
| 487 | /* | 702 | /* |
| @@ -492,10 +707,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
| 492 | wake = 1; | 707 | wake = 1; |
| 493 | worker->working = 1; | 708 | worker->working = 1; |
| 494 | 709 | ||
| 495 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 496 | |||
| 497 | if (wake) | 710 | if (wake) |
| 498 | wake_up_process(worker->task); | 711 | wake_up_process(worker->task); |
| 712 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 713 | |||
| 499 | out: | 714 | out: |
| 500 | return 0; | 715 | return 0; |
| 501 | } | 716 | } |
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 1b511c109db6..5077746cf85e 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h | |||
| @@ -64,6 +64,8 @@ struct btrfs_workers { | |||
| 64 | /* current number of running workers */ | 64 | /* current number of running workers */ |
| 65 | int num_workers; | 65 | int num_workers; |
| 66 | 66 | ||
| 67 | int num_workers_starting; | ||
| 68 | |||
| 67 | /* max number of workers allowed. changed by btrfs_start_workers */ | 69 | /* max number of workers allowed. changed by btrfs_start_workers */ |
| 68 | int max_workers; | 70 | int max_workers; |
| 69 | 71 | ||
| @@ -73,6 +75,16 @@ struct btrfs_workers { | |||
| 73 | /* force completions in the order they were queued */ | 75 | /* force completions in the order they were queued */ |
| 74 | int ordered; | 76 | int ordered; |
| 75 | 77 | ||
| 78 | /* more workers required, but in an interrupt handler */ | ||
| 79 | int atomic_start_pending; | ||
| 80 | |||
| 81 | /* | ||
| 82 | * are we allowed to sleep while starting workers or are we required | ||
| 83 | * to start them at a later time? If we can't sleep, this indicates | ||
| 84 | * which queue we need to use to schedule thread creation. | ||
| 85 | */ | ||
| 86 | struct btrfs_workers *atomic_worker_start; | ||
| 87 | |||
| 76 | /* list with all the work threads. The workers on the idle thread | 88 | /* list with all the work threads. The workers on the idle thread |
| 77 | * may be actively servicing jobs, but they haven't yet hit the | 89 | * may be actively servicing jobs, but they haven't yet hit the |
| 78 | * idle thresh limit above. | 90 | * idle thresh limit above. |
| @@ -90,6 +102,9 @@ struct btrfs_workers { | |||
| 90 | /* lock for finding the next worker thread to queue on */ | 102 | /* lock for finding the next worker thread to queue on */ |
| 91 | spinlock_t lock; | 103 | spinlock_t lock; |
| 92 | 104 | ||
| 105 | /* lock for the ordered lists */ | ||
| 106 | spinlock_t order_lock; | ||
| 107 | |||
| 93 | /* extra name for this worker, used for current->name */ | 108 | /* extra name for this worker, used for current->name */ |
| 94 | char *name; | 109 | char *name; |
| 95 | }; | 110 | }; |
| @@ -97,7 +112,8 @@ struct btrfs_workers { | |||
| 97 | int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); | 112 | int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); |
| 98 | int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); | 113 | int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); |
| 99 | int btrfs_stop_workers(struct btrfs_workers *workers); | 114 | int btrfs_stop_workers(struct btrfs_workers *workers); |
| 100 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max); | 115 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, |
| 116 | struct btrfs_workers *async_starter); | ||
| 101 | int btrfs_requeue_work(struct btrfs_work *work); | 117 | int btrfs_requeue_work(struct btrfs_work *work); |
| 102 | void btrfs_set_work_high_prio(struct btrfs_work *work); | 118 | void btrfs_set_work_high_prio(struct btrfs_work *work); |
| 103 | #endif | 119 | #endif |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index ea1ea0af8c0e..c71abec0ab90 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
| @@ -128,6 +128,16 @@ struct btrfs_inode { | |||
| 128 | u64 last_unlink_trans; | 128 | u64 last_unlink_trans; |
| 129 | 129 | ||
| 130 | /* | 130 | /* |
| 131 | * Counters to keep track of the number of extent item's we may use due | ||
| 132 | * to delalloc and such. outstanding_extents is the number of extent | ||
| 133 | * items we think we'll end up using, and reserved_extents is the number | ||
| 134 | * of extent items we've reserved metadata for. | ||
| 135 | */ | ||
| 136 | spinlock_t accounting_lock; | ||
| 137 | int reserved_extents; | ||
| 138 | int outstanding_extents; | ||
| 139 | |||
| 140 | /* | ||
| 131 | * ordered_data_close is set by truncate when a file that used | 141 | * ordered_data_close is set by truncate when a file that used |
| 132 | * to have good data has been truncated to zero. When it is set | 142 | * to have good data has been truncated to zero. When it is set |
| 133 | * the btrfs file release call will add this inode to the | 143 | * the btrfs file release call will add this inode to the |
| @@ -138,6 +148,7 @@ struct btrfs_inode { | |||
| 138 | * of these. | 148 | * of these. |
| 139 | */ | 149 | */ |
| 140 | unsigned ordered_data_close:1; | 150 | unsigned ordered_data_close:1; |
| 151 | unsigned dummy_inode:1; | ||
| 141 | 152 | ||
| 142 | struct inode vfs_inode; | 153 | struct inode vfs_inode; |
| 143 | }; | 154 | }; |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 9d8ba4d54a37..a11a32058b50 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
| @@ -506,10 +506,10 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
| 506 | */ | 506 | */ |
| 507 | set_page_extent_mapped(page); | 507 | set_page_extent_mapped(page); |
| 508 | lock_extent(tree, last_offset, end, GFP_NOFS); | 508 | lock_extent(tree, last_offset, end, GFP_NOFS); |
| 509 | spin_lock(&em_tree->lock); | 509 | read_lock(&em_tree->lock); |
| 510 | em = lookup_extent_mapping(em_tree, last_offset, | 510 | em = lookup_extent_mapping(em_tree, last_offset, |
| 511 | PAGE_CACHE_SIZE); | 511 | PAGE_CACHE_SIZE); |
| 512 | spin_unlock(&em_tree->lock); | 512 | read_unlock(&em_tree->lock); |
| 513 | 513 | ||
| 514 | if (!em || last_offset < em->start || | 514 | if (!em || last_offset < em->start || |
| 515 | (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || | 515 | (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || |
| @@ -593,11 +593,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
| 593 | em_tree = &BTRFS_I(inode)->extent_tree; | 593 | em_tree = &BTRFS_I(inode)->extent_tree; |
| 594 | 594 | ||
| 595 | /* we need the actual starting offset of this extent in the file */ | 595 | /* we need the actual starting offset of this extent in the file */ |
| 596 | spin_lock(&em_tree->lock); | 596 | read_lock(&em_tree->lock); |
| 597 | em = lookup_extent_mapping(em_tree, | 597 | em = lookup_extent_mapping(em_tree, |
| 598 | page_offset(bio->bi_io_vec->bv_page), | 598 | page_offset(bio->bi_io_vec->bv_page), |
| 599 | PAGE_CACHE_SIZE); | 599 | PAGE_CACHE_SIZE); |
| 600 | spin_unlock(&em_tree->lock); | 600 | read_unlock(&em_tree->lock); |
| 601 | 601 | ||
| 602 | compressed_len = em->block_len; | 602 | compressed_len = em->block_len; |
| 603 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); | 603 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3fdcc0512d3a..ec96f3a6d536 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -2853,6 +2853,12 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
| 2853 | int split; | 2853 | int split; |
| 2854 | int num_doubles = 0; | 2854 | int num_doubles = 0; |
| 2855 | 2855 | ||
| 2856 | l = path->nodes[0]; | ||
| 2857 | slot = path->slots[0]; | ||
| 2858 | if (extend && data_size + btrfs_item_size_nr(l, slot) + | ||
| 2859 | sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root)) | ||
| 2860 | return -EOVERFLOW; | ||
| 2861 | |||
| 2856 | /* first try to make some room by pushing left and right */ | 2862 | /* first try to make some room by pushing left and right */ |
| 2857 | if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { | 2863 | if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { |
| 2858 | wret = push_leaf_right(trans, root, path, data_size, 0); | 2864 | wret = push_leaf_right(trans, root, path, data_size, 0); |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 837435ce84ca..1bb897ecdeeb 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -114,6 +114,10 @@ struct btrfs_ordered_sum; | |||
| 114 | */ | 114 | */ |
| 115 | #define BTRFS_DEV_ITEMS_OBJECTID 1ULL | 115 | #define BTRFS_DEV_ITEMS_OBJECTID 1ULL |
| 116 | 116 | ||
| 117 | #define BTRFS_BTREE_INODE_OBJECTID 1 | ||
| 118 | |||
| 119 | #define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 | ||
| 120 | |||
| 117 | /* | 121 | /* |
| 118 | * we can actually store much bigger names, but lets not confuse the rest | 122 | * we can actually store much bigger names, but lets not confuse the rest |
| 119 | * of linux | 123 | * of linux |
| @@ -670,21 +674,29 @@ struct btrfs_space_info { | |||
| 670 | u64 bytes_reserved; /* total bytes the allocator has reserved for | 674 | u64 bytes_reserved; /* total bytes the allocator has reserved for |
| 671 | current allocations */ | 675 | current allocations */ |
| 672 | u64 bytes_readonly; /* total bytes that are read only */ | 676 | u64 bytes_readonly; /* total bytes that are read only */ |
| 673 | 677 | u64 bytes_super; /* total bytes reserved for the super blocks */ | |
| 674 | /* delalloc accounting */ | 678 | u64 bytes_root; /* the number of bytes needed to commit a |
| 675 | u64 bytes_delalloc; /* number of bytes reserved for allocation, | 679 | transaction */ |
| 676 | this space is not necessarily reserved yet | ||
| 677 | by the allocator */ | ||
| 678 | u64 bytes_may_use; /* number of bytes that may be used for | 680 | u64 bytes_may_use; /* number of bytes that may be used for |
| 679 | delalloc */ | 681 | delalloc/allocations */ |
| 682 | u64 bytes_delalloc; /* number of bytes currently reserved for | ||
| 683 | delayed allocation */ | ||
| 680 | 684 | ||
| 681 | int full; /* indicates that we cannot allocate any more | 685 | int full; /* indicates that we cannot allocate any more |
| 682 | chunks for this space */ | 686 | chunks for this space */ |
| 683 | int force_alloc; /* set if we need to force a chunk alloc for | 687 | int force_alloc; /* set if we need to force a chunk alloc for |
| 684 | this space */ | 688 | this space */ |
| 689 | int force_delalloc; /* make people start doing filemap_flush until | ||
| 690 | we're under a threshold */ | ||
| 685 | 691 | ||
| 686 | struct list_head list; | 692 | struct list_head list; |
| 687 | 693 | ||
| 694 | /* for controlling how we free up space for allocations */ | ||
| 695 | wait_queue_head_t allocate_wait; | ||
| 696 | wait_queue_head_t flush_wait; | ||
| 697 | int allocating_chunk; | ||
| 698 | int flushing; | ||
| 699 | |||
| 688 | /* for block groups in our same type */ | 700 | /* for block groups in our same type */ |
| 689 | struct list_head block_groups; | 701 | struct list_head block_groups; |
| 690 | spinlock_t lock; | 702 | spinlock_t lock; |
| @@ -726,6 +738,15 @@ enum btrfs_caching_type { | |||
| 726 | BTRFS_CACHE_FINISHED = 2, | 738 | BTRFS_CACHE_FINISHED = 2, |
| 727 | }; | 739 | }; |
| 728 | 740 | ||
| 741 | struct btrfs_caching_control { | ||
| 742 | struct list_head list; | ||
| 743 | struct mutex mutex; | ||
| 744 | wait_queue_head_t wait; | ||
| 745 | struct btrfs_block_group_cache *block_group; | ||
| 746 | u64 progress; | ||
| 747 | atomic_t count; | ||
| 748 | }; | ||
| 749 | |||
| 729 | struct btrfs_block_group_cache { | 750 | struct btrfs_block_group_cache { |
| 730 | struct btrfs_key key; | 751 | struct btrfs_key key; |
| 731 | struct btrfs_block_group_item item; | 752 | struct btrfs_block_group_item item; |
| @@ -733,6 +754,7 @@ struct btrfs_block_group_cache { | |||
| 733 | spinlock_t lock; | 754 | spinlock_t lock; |
| 734 | u64 pinned; | 755 | u64 pinned; |
| 735 | u64 reserved; | 756 | u64 reserved; |
| 757 | u64 bytes_super; | ||
| 736 | u64 flags; | 758 | u64 flags; |
| 737 | u64 sectorsize; | 759 | u64 sectorsize; |
| 738 | int extents_thresh; | 760 | int extents_thresh; |
| @@ -742,8 +764,9 @@ struct btrfs_block_group_cache { | |||
| 742 | int dirty; | 764 | int dirty; |
| 743 | 765 | ||
| 744 | /* cache tracking stuff */ | 766 | /* cache tracking stuff */ |
| 745 | wait_queue_head_t caching_q; | ||
| 746 | int cached; | 767 | int cached; |
| 768 | struct btrfs_caching_control *caching_ctl; | ||
| 769 | u64 last_byte_to_unpin; | ||
| 747 | 770 | ||
| 748 | struct btrfs_space_info *space_info; | 771 | struct btrfs_space_info *space_info; |
| 749 | 772 | ||
| @@ -782,13 +805,16 @@ struct btrfs_fs_info { | |||
| 782 | 805 | ||
| 783 | /* the log root tree is a directory of all the other log roots */ | 806 | /* the log root tree is a directory of all the other log roots */ |
| 784 | struct btrfs_root *log_root_tree; | 807 | struct btrfs_root *log_root_tree; |
| 808 | |||
| 809 | spinlock_t fs_roots_radix_lock; | ||
| 785 | struct radix_tree_root fs_roots_radix; | 810 | struct radix_tree_root fs_roots_radix; |
| 786 | 811 | ||
| 787 | /* block group cache stuff */ | 812 | /* block group cache stuff */ |
| 788 | spinlock_t block_group_cache_lock; | 813 | spinlock_t block_group_cache_lock; |
| 789 | struct rb_root block_group_cache_tree; | 814 | struct rb_root block_group_cache_tree; |
| 790 | 815 | ||
| 791 | struct extent_io_tree pinned_extents; | 816 | struct extent_io_tree freed_extents[2]; |
| 817 | struct extent_io_tree *pinned_extents; | ||
| 792 | 818 | ||
| 793 | /* logical->physical extent mapping */ | 819 | /* logical->physical extent mapping */ |
| 794 | struct btrfs_mapping_tree mapping_tree; | 820 | struct btrfs_mapping_tree mapping_tree; |
| @@ -822,11 +848,7 @@ struct btrfs_fs_info { | |||
| 822 | struct mutex transaction_kthread_mutex; | 848 | struct mutex transaction_kthread_mutex; |
| 823 | struct mutex cleaner_mutex; | 849 | struct mutex cleaner_mutex; |
| 824 | struct mutex chunk_mutex; | 850 | struct mutex chunk_mutex; |
| 825 | struct mutex drop_mutex; | ||
| 826 | struct mutex volume_mutex; | 851 | struct mutex volume_mutex; |
| 827 | struct mutex tree_reloc_mutex; | ||
| 828 | struct rw_semaphore extent_commit_sem; | ||
| 829 | |||
| 830 | /* | 852 | /* |
| 831 | * this protects the ordered operations list only while we are | 853 | * this protects the ordered operations list only while we are |
| 832 | * processing all of the entries on it. This way we make | 854 | * processing all of the entries on it. This way we make |
| @@ -835,10 +857,16 @@ struct btrfs_fs_info { | |||
| 835 | * before jumping into the main commit. | 857 | * before jumping into the main commit. |
| 836 | */ | 858 | */ |
| 837 | struct mutex ordered_operations_mutex; | 859 | struct mutex ordered_operations_mutex; |
| 860 | struct rw_semaphore extent_commit_sem; | ||
| 861 | |||
| 862 | struct rw_semaphore subvol_sem; | ||
| 863 | |||
| 864 | struct srcu_struct subvol_srcu; | ||
| 838 | 865 | ||
| 839 | struct list_head trans_list; | 866 | struct list_head trans_list; |
| 840 | struct list_head hashers; | 867 | struct list_head hashers; |
| 841 | struct list_head dead_roots; | 868 | struct list_head dead_roots; |
| 869 | struct list_head caching_block_groups; | ||
| 842 | 870 | ||
| 843 | atomic_t nr_async_submits; | 871 | atomic_t nr_async_submits; |
| 844 | atomic_t async_submit_draining; | 872 | atomic_t async_submit_draining; |
| @@ -882,6 +910,7 @@ struct btrfs_fs_info { | |||
| 882 | * A third pool does submit_bio to avoid deadlocking with the other | 910 | * A third pool does submit_bio to avoid deadlocking with the other |
| 883 | * two | 911 | * two |
| 884 | */ | 912 | */ |
| 913 | struct btrfs_workers generic_worker; | ||
| 885 | struct btrfs_workers workers; | 914 | struct btrfs_workers workers; |
| 886 | struct btrfs_workers delalloc_workers; | 915 | struct btrfs_workers delalloc_workers; |
| 887 | struct btrfs_workers endio_workers; | 916 | struct btrfs_workers endio_workers; |
| @@ -889,6 +918,7 @@ struct btrfs_fs_info { | |||
| 889 | struct btrfs_workers endio_meta_write_workers; | 918 | struct btrfs_workers endio_meta_write_workers; |
| 890 | struct btrfs_workers endio_write_workers; | 919 | struct btrfs_workers endio_write_workers; |
| 891 | struct btrfs_workers submit_workers; | 920 | struct btrfs_workers submit_workers; |
| 921 | struct btrfs_workers enospc_workers; | ||
| 892 | /* | 922 | /* |
| 893 | * fixup workers take dirty pages that didn't properly go through | 923 | * fixup workers take dirty pages that didn't properly go through |
| 894 | * the cow mechanism and make them safe to write. It happens | 924 | * the cow mechanism and make them safe to write. It happens |
| @@ -980,6 +1010,8 @@ struct btrfs_root { | |||
| 980 | atomic_t log_commit[2]; | 1010 | atomic_t log_commit[2]; |
| 981 | unsigned long log_transid; | 1011 | unsigned long log_transid; |
| 982 | unsigned long log_batch; | 1012 | unsigned long log_batch; |
| 1013 | pid_t log_start_pid; | ||
| 1014 | bool log_multiple_pids; | ||
| 983 | 1015 | ||
| 984 | u64 objectid; | 1016 | u64 objectid; |
| 985 | u64 last_trans; | 1017 | u64 last_trans; |
| @@ -996,10 +1028,12 @@ struct btrfs_root { | |||
| 996 | u32 stripesize; | 1028 | u32 stripesize; |
| 997 | 1029 | ||
| 998 | u32 type; | 1030 | u32 type; |
| 999 | u64 highest_inode; | 1031 | |
| 1000 | u64 last_inode_alloc; | 1032 | u64 highest_objectid; |
| 1001 | int ref_cows; | 1033 | int ref_cows; |
| 1002 | int track_dirty; | 1034 | int track_dirty; |
| 1035 | int in_radix; | ||
| 1036 | |||
| 1003 | u64 defrag_trans_start; | 1037 | u64 defrag_trans_start; |
| 1004 | struct btrfs_key defrag_progress; | 1038 | struct btrfs_key defrag_progress; |
| 1005 | struct btrfs_key defrag_max; | 1039 | struct btrfs_key defrag_max; |
| @@ -1920,8 +1954,8 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | |||
| 1920 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 1954 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
| 1921 | struct btrfs_root *root, unsigned long count); | 1955 | struct btrfs_root *root, unsigned long count); |
| 1922 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 1956 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
| 1923 | int btrfs_update_pinned_extents(struct btrfs_root *root, | 1957 | int btrfs_pin_extent(struct btrfs_root *root, |
| 1924 | u64 bytenr, u64 num, int pin); | 1958 | u64 bytenr, u64 num, int reserved); |
| 1925 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 1959 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, |
| 1926 | struct btrfs_root *root, struct extent_buffer *leaf); | 1960 | struct btrfs_root *root, struct extent_buffer *leaf); |
| 1927 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | 1961 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, |
| @@ -1971,9 +2005,10 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 1971 | u64 root_objectid, u64 owner, u64 offset); | 2005 | u64 root_objectid, u64 owner, u64 offset); |
| 1972 | 2006 | ||
| 1973 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); | 2007 | int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); |
| 2008 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | ||
| 2009 | struct btrfs_root *root); | ||
| 1974 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | 2010 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, |
| 1975 | struct btrfs_root *root, | 2011 | struct btrfs_root *root); |
| 1976 | struct extent_io_tree *unpin); | ||
| 1977 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 2012 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
| 1978 | struct btrfs_root *root, | 2013 | struct btrfs_root *root, |
| 1979 | u64 bytenr, u64 num_bytes, u64 parent, | 2014 | u64 bytenr, u64 num_bytes, u64 parent, |
| @@ -1984,6 +2019,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, | |||
| 1984 | int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); | 2019 | int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); |
| 1985 | int btrfs_free_block_groups(struct btrfs_fs_info *info); | 2020 | int btrfs_free_block_groups(struct btrfs_fs_info *info); |
| 1986 | int btrfs_read_block_groups(struct btrfs_root *root); | 2021 | int btrfs_read_block_groups(struct btrfs_root *root); |
| 2022 | int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr); | ||
| 1987 | int btrfs_make_block_group(struct btrfs_trans_handle *trans, | 2023 | int btrfs_make_block_group(struct btrfs_trans_handle *trans, |
| 1988 | struct btrfs_root *root, u64 bytes_used, | 2024 | struct btrfs_root *root, u64 bytes_used, |
| 1989 | u64 type, u64 chunk_objectid, u64 chunk_offset, | 2025 | u64 type, u64 chunk_objectid, u64 chunk_offset, |
| @@ -1997,7 +2033,12 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | |||
| 1997 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | 2033 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); |
| 1998 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2034 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
| 1999 | 2035 | ||
| 2000 | int btrfs_check_metadata_free_space(struct btrfs_root *root); | 2036 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items); |
| 2037 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items); | ||
| 2038 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, | ||
| 2039 | struct inode *inode, int num_items); | ||
| 2040 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
| 2041 | struct inode *inode, int num_items); | ||
| 2001 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | 2042 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, |
| 2002 | u64 bytes); | 2043 | u64 bytes); |
| 2003 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | 2044 | void btrfs_free_reserved_data_space(struct btrfs_root *root, |
| @@ -2006,7 +2047,6 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | |||
| 2006 | u64 bytes); | 2047 | u64 bytes); |
| 2007 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | 2048 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, |
| 2008 | u64 bytes); | 2049 | u64 bytes); |
| 2009 | void btrfs_free_pinned_extents(struct btrfs_fs_info *info); | ||
| 2010 | /* ctree.c */ | 2050 | /* ctree.c */ |
| 2011 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2051 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
| 2012 | int level, int *slot); | 2052 | int level, int *slot); |
| @@ -2100,12 +2140,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
| 2100 | struct extent_buffer *parent); | 2140 | struct extent_buffer *parent); |
| 2101 | /* root-item.c */ | 2141 | /* root-item.c */ |
| 2102 | int btrfs_find_root_ref(struct btrfs_root *tree_root, | 2142 | int btrfs_find_root_ref(struct btrfs_root *tree_root, |
| 2103 | struct btrfs_path *path, | 2143 | struct btrfs_path *path, |
| 2104 | u64 root_id, u64 ref_id); | 2144 | u64 root_id, u64 ref_id); |
| 2105 | int btrfs_add_root_ref(struct btrfs_trans_handle *trans, | 2145 | int btrfs_add_root_ref(struct btrfs_trans_handle *trans, |
| 2106 | struct btrfs_root *tree_root, | 2146 | struct btrfs_root *tree_root, |
| 2107 | u64 root_id, u8 type, u64 ref_id, | 2147 | u64 root_id, u64 ref_id, u64 dirid, u64 sequence, |
| 2108 | u64 dirid, u64 sequence, | 2148 | const char *name, int name_len); |
| 2149 | int btrfs_del_root_ref(struct btrfs_trans_handle *trans, | ||
| 2150 | struct btrfs_root *tree_root, | ||
| 2151 | u64 root_id, u64 ref_id, u64 dirid, u64 *sequence, | ||
| 2109 | const char *name, int name_len); | 2152 | const char *name, int name_len); |
| 2110 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 2153 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
| 2111 | struct btrfs_key *key); | 2154 | struct btrfs_key *key); |
| @@ -2120,6 +2163,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct | |||
| 2120 | int btrfs_search_root(struct btrfs_root *root, u64 search_start, | 2163 | int btrfs_search_root(struct btrfs_root *root, u64 search_start, |
| 2121 | u64 *found_objectid); | 2164 | u64 *found_objectid); |
| 2122 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); | 2165 | int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid); |
| 2166 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root); | ||
| 2123 | int btrfs_set_root_node(struct btrfs_root_item *item, | 2167 | int btrfs_set_root_node(struct btrfs_root_item *item, |
| 2124 | struct extent_buffer *node); | 2168 | struct extent_buffer *node); |
| 2125 | /* dir-item.c */ | 2169 | /* dir-item.c */ |
| @@ -2138,6 +2182,10 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, | |||
| 2138 | struct btrfs_path *path, u64 dir, | 2182 | struct btrfs_path *path, u64 dir, |
| 2139 | u64 objectid, const char *name, int name_len, | 2183 | u64 objectid, const char *name, int name_len, |
| 2140 | int mod); | 2184 | int mod); |
| 2185 | struct btrfs_dir_item * | ||
| 2186 | btrfs_search_dir_index_item(struct btrfs_root *root, | ||
| 2187 | struct btrfs_path *path, u64 dirid, | ||
| 2188 | const char *name, int name_len); | ||
| 2141 | struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, | 2189 | struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, |
| 2142 | struct btrfs_path *path, | 2190 | struct btrfs_path *path, |
| 2143 | const char *name, int name_len); | 2191 | const char *name, int name_len); |
| @@ -2160,6 +2208,7 @@ int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, | |||
| 2160 | struct btrfs_root *root, u64 offset); | 2208 | struct btrfs_root *root, u64 offset); |
| 2161 | int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, | 2209 | int btrfs_del_orphan_item(struct btrfs_trans_handle *trans, |
| 2162 | struct btrfs_root *root, u64 offset); | 2210 | struct btrfs_root *root, u64 offset); |
| 2211 | int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset); | ||
| 2163 | 2212 | ||
| 2164 | /* inode-map.c */ | 2213 | /* inode-map.c */ |
| 2165 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, | 2214 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, |
| @@ -2232,6 +2281,10 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | |||
| 2232 | int btrfs_add_link(struct btrfs_trans_handle *trans, | 2281 | int btrfs_add_link(struct btrfs_trans_handle *trans, |
| 2233 | struct inode *parent_inode, struct inode *inode, | 2282 | struct inode *parent_inode, struct inode *inode, |
| 2234 | const char *name, int name_len, int add_backref, u64 index); | 2283 | const char *name, int name_len, int add_backref, u64 index); |
| 2284 | int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | ||
| 2285 | struct btrfs_root *root, | ||
| 2286 | struct inode *dir, u64 objectid, | ||
| 2287 | const char *name, int name_len); | ||
| 2235 | int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | 2288 | int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, |
| 2236 | struct btrfs_root *root, | 2289 | struct btrfs_root *root, |
| 2237 | struct inode *inode, u64 new_size, | 2290 | struct inode *inode, u64 new_size, |
| @@ -2242,7 +2295,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); | |||
| 2242 | int btrfs_writepages(struct address_space *mapping, | 2295 | int btrfs_writepages(struct address_space *mapping, |
| 2243 | struct writeback_control *wbc); | 2296 | struct writeback_control *wbc); |
| 2244 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 2297 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
| 2245 | struct btrfs_root *new_root, struct dentry *dentry, | 2298 | struct btrfs_root *new_root, |
| 2246 | u64 new_dirid, u64 alloc_hint); | 2299 | u64 new_dirid, u64 alloc_hint); |
| 2247 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | 2300 | int btrfs_merge_bio_hook(struct page *page, unsigned long offset, |
| 2248 | size_t size, struct bio *bio, unsigned long bio_flags); | 2301 | size_t size, struct bio *bio, unsigned long bio_flags); |
| @@ -2258,6 +2311,7 @@ int btrfs_write_inode(struct inode *inode, int wait); | |||
| 2258 | void btrfs_dirty_inode(struct inode *inode); | 2311 | void btrfs_dirty_inode(struct inode *inode); |
| 2259 | struct inode *btrfs_alloc_inode(struct super_block *sb); | 2312 | struct inode *btrfs_alloc_inode(struct super_block *sb); |
| 2260 | void btrfs_destroy_inode(struct inode *inode); | 2313 | void btrfs_destroy_inode(struct inode *inode); |
| 2314 | void btrfs_drop_inode(struct inode *inode); | ||
| 2261 | int btrfs_init_cachep(void); | 2315 | int btrfs_init_cachep(void); |
| 2262 | void btrfs_destroy_cachep(void); | 2316 | void btrfs_destroy_cachep(void); |
| 2263 | long btrfs_ioctl_trans_end(struct file *file); | 2317 | long btrfs_ioctl_trans_end(struct file *file); |
| @@ -2275,6 +2329,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); | |||
| 2275 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); | 2329 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); |
| 2276 | void btrfs_orphan_cleanup(struct btrfs_root *root); | 2330 | void btrfs_orphan_cleanup(struct btrfs_root *root); |
| 2277 | int btrfs_cont_expand(struct inode *inode, loff_t size); | 2331 | int btrfs_cont_expand(struct inode *inode, loff_t size); |
| 2332 | int btrfs_invalidate_inodes(struct btrfs_root *root); | ||
| 2333 | extern const struct dentry_operations btrfs_dentry_operations; | ||
| 2278 | 2334 | ||
| 2279 | /* ioctl.c */ | 2335 | /* ioctl.c */ |
| 2280 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | 2336 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); |
| @@ -2286,11 +2342,11 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync); | |||
| 2286 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 2342 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
| 2287 | int skip_pinned); | 2343 | int skip_pinned); |
| 2288 | int btrfs_check_file(struct btrfs_root *root, struct inode *inode); | 2344 | int btrfs_check_file(struct btrfs_root *root, struct inode *inode); |
| 2289 | extern struct file_operations btrfs_file_operations; | 2345 | extern const struct file_operations btrfs_file_operations; |
| 2290 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 2346 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
| 2291 | struct btrfs_root *root, struct inode *inode, | 2347 | struct btrfs_root *root, struct inode *inode, |
| 2292 | u64 start, u64 end, u64 locked_end, | 2348 | u64 start, u64 end, u64 locked_end, |
| 2293 | u64 inline_limit, u64 *hint_block); | 2349 | u64 inline_limit, u64 *hint_block, int drop_cache); |
| 2294 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | 2350 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, |
| 2295 | struct btrfs_root *root, | 2351 | struct btrfs_root *root, |
| 2296 | struct inode *inode, u64 start, u64 end); | 2352 | struct inode *inode, u64 start, u64 end); |
| @@ -2317,7 +2373,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options); | |||
| 2317 | int btrfs_sync_fs(struct super_block *sb, int wait); | 2373 | int btrfs_sync_fs(struct super_block *sb, int wait); |
| 2318 | 2374 | ||
| 2319 | /* acl.c */ | 2375 | /* acl.c */ |
| 2320 | #ifdef CONFIG_FS_POSIX_ACL | 2376 | #ifdef CONFIG_BTRFS_POSIX_ACL |
| 2321 | int btrfs_check_acl(struct inode *inode, int mask); | 2377 | int btrfs_check_acl(struct inode *inode, int mask); |
| 2322 | #else | 2378 | #else |
| 2323 | #define btrfs_check_acl NULL | 2379 | #define btrfs_check_acl NULL |
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 1d70236ba00c..f3a6075519cc 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
| @@ -281,6 +281,53 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, | |||
| 281 | return btrfs_match_dir_item_name(root, path, name, name_len); | 281 | return btrfs_match_dir_item_name(root, path, name, name_len); |
| 282 | } | 282 | } |
| 283 | 283 | ||
| 284 | struct btrfs_dir_item * | ||
| 285 | btrfs_search_dir_index_item(struct btrfs_root *root, | ||
| 286 | struct btrfs_path *path, u64 dirid, | ||
| 287 | const char *name, int name_len) | ||
| 288 | { | ||
| 289 | struct extent_buffer *leaf; | ||
| 290 | struct btrfs_dir_item *di; | ||
| 291 | struct btrfs_key key; | ||
| 292 | u32 nritems; | ||
| 293 | int ret; | ||
| 294 | |||
| 295 | key.objectid = dirid; | ||
| 296 | key.type = BTRFS_DIR_INDEX_KEY; | ||
| 297 | key.offset = 0; | ||
| 298 | |||
| 299 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 300 | if (ret < 0) | ||
| 301 | return ERR_PTR(ret); | ||
| 302 | |||
| 303 | leaf = path->nodes[0]; | ||
| 304 | nritems = btrfs_header_nritems(leaf); | ||
| 305 | |||
| 306 | while (1) { | ||
| 307 | if (path->slots[0] >= nritems) { | ||
| 308 | ret = btrfs_next_leaf(root, path); | ||
| 309 | if (ret < 0) | ||
| 310 | return ERR_PTR(ret); | ||
| 311 | if (ret > 0) | ||
| 312 | break; | ||
| 313 | leaf = path->nodes[0]; | ||
| 314 | nritems = btrfs_header_nritems(leaf); | ||
| 315 | continue; | ||
| 316 | } | ||
| 317 | |||
| 318 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
| 319 | if (key.objectid != dirid || key.type != BTRFS_DIR_INDEX_KEY) | ||
| 320 | break; | ||
| 321 | |||
| 322 | di = btrfs_match_dir_item_name(root, path, name, name_len); | ||
| 323 | if (di) | ||
| 324 | return di; | ||
| 325 | |||
| 326 | path->slots[0]++; | ||
| 327 | } | ||
| 328 | return NULL; | ||
| 329 | } | ||
| 330 | |||
| 284 | struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, | 331 | struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, |
| 285 | struct btrfs_root *root, | 332 | struct btrfs_root *root, |
| 286 | struct btrfs_path *path, u64 dir, | 333 | struct btrfs_path *path, u64 dir, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e83be2e4602c..100551a66c46 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -41,6 +41,7 @@ | |||
| 41 | 41 | ||
| 42 | static struct extent_io_ops btree_extent_io_ops; | 42 | static struct extent_io_ops btree_extent_io_ops; |
| 43 | static void end_workqueue_fn(struct btrfs_work *work); | 43 | static void end_workqueue_fn(struct btrfs_work *work); |
| 44 | static void free_fs_root(struct btrfs_root *root); | ||
| 44 | 45 | ||
| 45 | static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); | 46 | static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); |
| 46 | 47 | ||
| @@ -123,15 +124,15 @@ static struct extent_map *btree_get_extent(struct inode *inode, | |||
| 123 | struct extent_map *em; | 124 | struct extent_map *em; |
| 124 | int ret; | 125 | int ret; |
| 125 | 126 | ||
| 126 | spin_lock(&em_tree->lock); | 127 | read_lock(&em_tree->lock); |
| 127 | em = lookup_extent_mapping(em_tree, start, len); | 128 | em = lookup_extent_mapping(em_tree, start, len); |
| 128 | if (em) { | 129 | if (em) { |
| 129 | em->bdev = | 130 | em->bdev = |
| 130 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 131 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
| 131 | spin_unlock(&em_tree->lock); | 132 | read_unlock(&em_tree->lock); |
| 132 | goto out; | 133 | goto out; |
| 133 | } | 134 | } |
| 134 | spin_unlock(&em_tree->lock); | 135 | read_unlock(&em_tree->lock); |
| 135 | 136 | ||
| 136 | em = alloc_extent_map(GFP_NOFS); | 137 | em = alloc_extent_map(GFP_NOFS); |
| 137 | if (!em) { | 138 | if (!em) { |
| @@ -144,7 +145,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, | |||
| 144 | em->block_start = 0; | 145 | em->block_start = 0; |
| 145 | em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 146 | em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
| 146 | 147 | ||
| 147 | spin_lock(&em_tree->lock); | 148 | write_lock(&em_tree->lock); |
| 148 | ret = add_extent_mapping(em_tree, em); | 149 | ret = add_extent_mapping(em_tree, em); |
| 149 | if (ret == -EEXIST) { | 150 | if (ret == -EEXIST) { |
| 150 | u64 failed_start = em->start; | 151 | u64 failed_start = em->start; |
| @@ -163,7 +164,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, | |||
| 163 | free_extent_map(em); | 164 | free_extent_map(em); |
| 164 | em = NULL; | 165 | em = NULL; |
| 165 | } | 166 | } |
| 166 | spin_unlock(&em_tree->lock); | 167 | write_unlock(&em_tree->lock); |
| 167 | 168 | ||
| 168 | if (ret) | 169 | if (ret) |
| 169 | em = ERR_PTR(ret); | 170 | em = ERR_PTR(ret); |
| @@ -772,7 +773,7 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) | |||
| 772 | } | 773 | } |
| 773 | } | 774 | } |
| 774 | 775 | ||
| 775 | static struct address_space_operations btree_aops = { | 776 | static const struct address_space_operations btree_aops = { |
| 776 | .readpage = btree_readpage, | 777 | .readpage = btree_readpage, |
| 777 | .writepage = btree_writepage, | 778 | .writepage = btree_writepage, |
| 778 | .writepages = btree_writepages, | 779 | .writepages = btree_writepages, |
| @@ -821,14 +822,14 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | |||
| 821 | 822 | ||
| 822 | int btrfs_write_tree_block(struct extent_buffer *buf) | 823 | int btrfs_write_tree_block(struct extent_buffer *buf) |
| 823 | { | 824 | { |
| 824 | return btrfs_fdatawrite_range(buf->first_page->mapping, buf->start, | 825 | return filemap_fdatawrite_range(buf->first_page->mapping, buf->start, |
| 825 | buf->start + buf->len - 1, WB_SYNC_ALL); | 826 | buf->start + buf->len - 1); |
| 826 | } | 827 | } |
| 827 | 828 | ||
| 828 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) | 829 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) |
| 829 | { | 830 | { |
| 830 | return btrfs_wait_on_page_writeback_range(buf->first_page->mapping, | 831 | return filemap_fdatawait_range(buf->first_page->mapping, |
| 831 | buf->start, buf->start + buf->len - 1); | 832 | buf->start, buf->start + buf->len - 1); |
| 832 | } | 833 | } |
| 833 | 834 | ||
| 834 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | 835 | struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, |
| @@ -895,8 +896,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 895 | root->fs_info = fs_info; | 896 | root->fs_info = fs_info; |
| 896 | root->objectid = objectid; | 897 | root->objectid = objectid; |
| 897 | root->last_trans = 0; | 898 | root->last_trans = 0; |
| 898 | root->highest_inode = 0; | 899 | root->highest_objectid = 0; |
| 899 | root->last_inode_alloc = 0; | ||
| 900 | root->name = NULL; | 900 | root->name = NULL; |
| 901 | root->in_sysfs = 0; | 901 | root->in_sysfs = 0; |
| 902 | root->inode_tree.rb_node = NULL; | 902 | root->inode_tree.rb_node = NULL; |
| @@ -952,14 +952,16 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
| 952 | root, fs_info, objectid); | 952 | root, fs_info, objectid); |
| 953 | ret = btrfs_find_last_root(tree_root, objectid, | 953 | ret = btrfs_find_last_root(tree_root, objectid, |
| 954 | &root->root_item, &root->root_key); | 954 | &root->root_item, &root->root_key); |
| 955 | if (ret > 0) | ||
| 956 | return -ENOENT; | ||
| 955 | BUG_ON(ret); | 957 | BUG_ON(ret); |
| 956 | 958 | ||
| 957 | generation = btrfs_root_generation(&root->root_item); | 959 | generation = btrfs_root_generation(&root->root_item); |
| 958 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 960 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
| 959 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 961 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
| 960 | blocksize, generation); | 962 | blocksize, generation); |
| 961 | root->commit_root = btrfs_root_node(root); | ||
| 962 | BUG_ON(!root->node); | 963 | BUG_ON(!root->node); |
| 964 | root->commit_root = btrfs_root_node(root); | ||
| 963 | return 0; | 965 | return 0; |
| 964 | } | 966 | } |
| 965 | 967 | ||
| @@ -1095,7 +1097,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
| 1095 | struct btrfs_fs_info *fs_info = tree_root->fs_info; | 1097 | struct btrfs_fs_info *fs_info = tree_root->fs_info; |
| 1096 | struct btrfs_path *path; | 1098 | struct btrfs_path *path; |
| 1097 | struct extent_buffer *l; | 1099 | struct extent_buffer *l; |
| 1098 | u64 highest_inode; | ||
| 1099 | u64 generation; | 1100 | u64 generation; |
| 1100 | u32 blocksize; | 1101 | u32 blocksize; |
| 1101 | int ret = 0; | 1102 | int ret = 0; |
| @@ -1110,7 +1111,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
| 1110 | kfree(root); | 1111 | kfree(root); |
| 1111 | return ERR_PTR(ret); | 1112 | return ERR_PTR(ret); |
| 1112 | } | 1113 | } |
| 1113 | goto insert; | 1114 | goto out; |
| 1114 | } | 1115 | } |
| 1115 | 1116 | ||
| 1116 | __setup_root(tree_root->nodesize, tree_root->leafsize, | 1117 | __setup_root(tree_root->nodesize, tree_root->leafsize, |
| @@ -1120,39 +1121,30 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
| 1120 | path = btrfs_alloc_path(); | 1121 | path = btrfs_alloc_path(); |
| 1121 | BUG_ON(!path); | 1122 | BUG_ON(!path); |
| 1122 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); | 1123 | ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); |
| 1123 | if (ret != 0) { | 1124 | if (ret == 0) { |
| 1124 | if (ret > 0) | 1125 | l = path->nodes[0]; |
| 1125 | ret = -ENOENT; | 1126 | read_extent_buffer(l, &root->root_item, |
| 1126 | goto out; | 1127 | btrfs_item_ptr_offset(l, path->slots[0]), |
| 1128 | sizeof(root->root_item)); | ||
| 1129 | memcpy(&root->root_key, location, sizeof(*location)); | ||
| 1127 | } | 1130 | } |
| 1128 | l = path->nodes[0]; | ||
| 1129 | read_extent_buffer(l, &root->root_item, | ||
| 1130 | btrfs_item_ptr_offset(l, path->slots[0]), | ||
| 1131 | sizeof(root->root_item)); | ||
| 1132 | memcpy(&root->root_key, location, sizeof(*location)); | ||
| 1133 | ret = 0; | ||
| 1134 | out: | ||
| 1135 | btrfs_release_path(root, path); | ||
| 1136 | btrfs_free_path(path); | 1131 | btrfs_free_path(path); |
| 1137 | if (ret) { | 1132 | if (ret) { |
| 1138 | kfree(root); | 1133 | if (ret > 0) |
| 1134 | ret = -ENOENT; | ||
| 1139 | return ERR_PTR(ret); | 1135 | return ERR_PTR(ret); |
| 1140 | } | 1136 | } |
| 1137 | |||
| 1141 | generation = btrfs_root_generation(&root->root_item); | 1138 | generation = btrfs_root_generation(&root->root_item); |
| 1142 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 1139 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
| 1143 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 1140 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
| 1144 | blocksize, generation); | 1141 | blocksize, generation); |
| 1145 | root->commit_root = btrfs_root_node(root); | 1142 | root->commit_root = btrfs_root_node(root); |
| 1146 | BUG_ON(!root->node); | 1143 | BUG_ON(!root->node); |
| 1147 | insert: | 1144 | out: |
| 1148 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { | 1145 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) |
| 1149 | root->ref_cows = 1; | 1146 | root->ref_cows = 1; |
| 1150 | ret = btrfs_find_highest_inode(root, &highest_inode); | 1147 | |
| 1151 | if (ret == 0) { | ||
| 1152 | root->highest_inode = highest_inode; | ||
| 1153 | root->last_inode_alloc = highest_inode; | ||
| 1154 | } | ||
| 1155 | } | ||
| 1156 | return root; | 1148 | return root; |
| 1157 | } | 1149 | } |
| 1158 | 1150 | ||
| @@ -1187,39 +1179,66 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | |||
| 1187 | return fs_info->dev_root; | 1179 | return fs_info->dev_root; |
| 1188 | if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) | 1180 | if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) |
| 1189 | return fs_info->csum_root; | 1181 | return fs_info->csum_root; |
| 1190 | 1182 | again: | |
| 1183 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
| 1191 | root = radix_tree_lookup(&fs_info->fs_roots_radix, | 1184 | root = radix_tree_lookup(&fs_info->fs_roots_radix, |
| 1192 | (unsigned long)location->objectid); | 1185 | (unsigned long)location->objectid); |
| 1186 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
| 1193 | if (root) | 1187 | if (root) |
| 1194 | return root; | 1188 | return root; |
| 1195 | 1189 | ||
| 1190 | ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); | ||
| 1191 | if (ret == 0) | ||
| 1192 | ret = -ENOENT; | ||
| 1193 | if (ret < 0) | ||
| 1194 | return ERR_PTR(ret); | ||
| 1195 | |||
| 1196 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); | 1196 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); |
| 1197 | if (IS_ERR(root)) | 1197 | if (IS_ERR(root)) |
| 1198 | return root; | 1198 | return root; |
| 1199 | 1199 | ||
| 1200 | WARN_ON(btrfs_root_refs(&root->root_item) == 0); | ||
| 1200 | set_anon_super(&root->anon_super, NULL); | 1201 | set_anon_super(&root->anon_super, NULL); |
| 1201 | 1202 | ||
| 1203 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | ||
| 1204 | if (ret) | ||
| 1205 | goto fail; | ||
| 1206 | |||
| 1207 | spin_lock(&fs_info->fs_roots_radix_lock); | ||
| 1202 | ret = radix_tree_insert(&fs_info->fs_roots_radix, | 1208 | ret = radix_tree_insert(&fs_info->fs_roots_radix, |
| 1203 | (unsigned long)root->root_key.objectid, | 1209 | (unsigned long)root->root_key.objectid, |
| 1204 | root); | 1210 | root); |
| 1211 | if (ret == 0) | ||
| 1212 | root->in_radix = 1; | ||
| 1213 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
| 1214 | radix_tree_preload_end(); | ||
| 1205 | if (ret) { | 1215 | if (ret) { |
| 1206 | free_extent_buffer(root->node); | 1216 | if (ret == -EEXIST) { |
| 1207 | kfree(root); | 1217 | free_fs_root(root); |
| 1208 | return ERR_PTR(ret); | 1218 | goto again; |
| 1219 | } | ||
| 1220 | goto fail; | ||
| 1209 | } | 1221 | } |
| 1210 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 1222 | |
| 1211 | ret = btrfs_find_dead_roots(fs_info->tree_root, | 1223 | ret = btrfs_find_dead_roots(fs_info->tree_root, |
| 1212 | root->root_key.objectid); | 1224 | root->root_key.objectid); |
| 1213 | BUG_ON(ret); | 1225 | WARN_ON(ret); |
| 1226 | |||
| 1227 | if (!(fs_info->sb->s_flags & MS_RDONLY)) | ||
| 1214 | btrfs_orphan_cleanup(root); | 1228 | btrfs_orphan_cleanup(root); |
| 1215 | } | 1229 | |
| 1216 | return root; | 1230 | return root; |
| 1231 | fail: | ||
| 1232 | free_fs_root(root); | ||
| 1233 | return ERR_PTR(ret); | ||
| 1217 | } | 1234 | } |
| 1218 | 1235 | ||
| 1219 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, | 1236 | struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, |
| 1220 | struct btrfs_key *location, | 1237 | struct btrfs_key *location, |
| 1221 | const char *name, int namelen) | 1238 | const char *name, int namelen) |
| 1222 | { | 1239 | { |
| 1240 | return btrfs_read_fs_root_no_name(fs_info, location); | ||
| 1241 | #if 0 | ||
| 1223 | struct btrfs_root *root; | 1242 | struct btrfs_root *root; |
| 1224 | int ret; | 1243 | int ret; |
| 1225 | 1244 | ||
| @@ -1236,7 +1255,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, | |||
| 1236 | kfree(root); | 1255 | kfree(root); |
| 1237 | return ERR_PTR(ret); | 1256 | return ERR_PTR(ret); |
| 1238 | } | 1257 | } |
| 1239 | #if 0 | 1258 | |
| 1240 | ret = btrfs_sysfs_add_root(root); | 1259 | ret = btrfs_sysfs_add_root(root); |
| 1241 | if (ret) { | 1260 | if (ret) { |
| 1242 | free_extent_buffer(root->node); | 1261 | free_extent_buffer(root->node); |
| @@ -1244,9 +1263,9 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, | |||
| 1244 | kfree(root); | 1263 | kfree(root); |
| 1245 | return ERR_PTR(ret); | 1264 | return ERR_PTR(ret); |
| 1246 | } | 1265 | } |
| 1247 | #endif | ||
| 1248 | root->in_sysfs = 1; | 1266 | root->in_sysfs = 1; |
| 1249 | return root; | 1267 | return root; |
| 1268 | #endif | ||
| 1250 | } | 1269 | } |
| 1251 | 1270 | ||
| 1252 | static int btrfs_congested_fn(void *congested_data, int bdi_bits) | 1271 | static int btrfs_congested_fn(void *congested_data, int bdi_bits) |
| @@ -1325,9 +1344,9 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | |||
| 1325 | offset = page_offset(page); | 1344 | offset = page_offset(page); |
| 1326 | 1345 | ||
| 1327 | em_tree = &BTRFS_I(inode)->extent_tree; | 1346 | em_tree = &BTRFS_I(inode)->extent_tree; |
| 1328 | spin_lock(&em_tree->lock); | 1347 | read_lock(&em_tree->lock); |
| 1329 | em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); | 1348 | em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); |
| 1330 | spin_unlock(&em_tree->lock); | 1349 | read_unlock(&em_tree->lock); |
| 1331 | if (!em) { | 1350 | if (!em) { |
| 1332 | __unplug_io_fn(bdi, page); | 1351 | __unplug_io_fn(bdi, page); |
| 1333 | return; | 1352 | return; |
| @@ -1352,6 +1371,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | |||
| 1352 | { | 1371 | { |
| 1353 | int err; | 1372 | int err; |
| 1354 | 1373 | ||
| 1374 | bdi->name = "btrfs"; | ||
| 1355 | bdi->capabilities = BDI_CAP_MAP_COPY; | 1375 | bdi->capabilities = BDI_CAP_MAP_COPY; |
| 1356 | err = bdi_init(bdi); | 1376 | err = bdi_init(bdi); |
| 1357 | if (err) | 1377 | if (err) |
| @@ -1359,8 +1379,10 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | |||
| 1359 | 1379 | ||
| 1360 | err = bdi_register(bdi, NULL, "btrfs-%d", | 1380 | err = bdi_register(bdi, NULL, "btrfs-%d", |
| 1361 | atomic_inc_return(&btrfs_bdi_num)); | 1381 | atomic_inc_return(&btrfs_bdi_num)); |
| 1362 | if (err) | 1382 | if (err) { |
| 1383 | bdi_destroy(bdi); | ||
| 1363 | return err; | 1384 | return err; |
| 1385 | } | ||
| 1364 | 1386 | ||
| 1365 | bdi->ra_pages = default_backing_dev_info.ra_pages; | 1387 | bdi->ra_pages = default_backing_dev_info.ra_pages; |
| 1366 | bdi->unplug_io_fn = btrfs_unplug_io_fn; | 1388 | bdi->unplug_io_fn = btrfs_unplug_io_fn; |
| @@ -1450,9 +1472,12 @@ static int cleaner_kthread(void *arg) | |||
| 1450 | break; | 1472 | break; |
| 1451 | 1473 | ||
| 1452 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1474 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
| 1453 | mutex_lock(&root->fs_info->cleaner_mutex); | 1475 | |
| 1454 | btrfs_clean_old_snapshots(root); | 1476 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && |
| 1455 | mutex_unlock(&root->fs_info->cleaner_mutex); | 1477 | mutex_trylock(&root->fs_info->cleaner_mutex)) { |
| 1478 | btrfs_clean_old_snapshots(root); | ||
| 1479 | mutex_unlock(&root->fs_info->cleaner_mutex); | ||
| 1480 | } | ||
| 1456 | 1481 | ||
| 1457 | if (freezing(current)) { | 1482 | if (freezing(current)) { |
| 1458 | refrigerator(); | 1483 | refrigerator(); |
| @@ -1557,15 +1582,36 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1557 | err = -ENOMEM; | 1582 | err = -ENOMEM; |
| 1558 | goto fail; | 1583 | goto fail; |
| 1559 | } | 1584 | } |
| 1560 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); | 1585 | |
| 1586 | ret = init_srcu_struct(&fs_info->subvol_srcu); | ||
| 1587 | if (ret) { | ||
| 1588 | err = ret; | ||
| 1589 | goto fail; | ||
| 1590 | } | ||
| 1591 | |||
| 1592 | ret = setup_bdi(fs_info, &fs_info->bdi); | ||
| 1593 | if (ret) { | ||
| 1594 | err = ret; | ||
| 1595 | goto fail_srcu; | ||
| 1596 | } | ||
| 1597 | |||
| 1598 | fs_info->btree_inode = new_inode(sb); | ||
| 1599 | if (!fs_info->btree_inode) { | ||
| 1600 | err = -ENOMEM; | ||
| 1601 | goto fail_bdi; | ||
| 1602 | } | ||
| 1603 | |||
| 1604 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); | ||
| 1561 | INIT_LIST_HEAD(&fs_info->trans_list); | 1605 | INIT_LIST_HEAD(&fs_info->trans_list); |
| 1562 | INIT_LIST_HEAD(&fs_info->dead_roots); | 1606 | INIT_LIST_HEAD(&fs_info->dead_roots); |
| 1563 | INIT_LIST_HEAD(&fs_info->hashers); | 1607 | INIT_LIST_HEAD(&fs_info->hashers); |
| 1564 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | 1608 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); |
| 1565 | INIT_LIST_HEAD(&fs_info->ordered_operations); | 1609 | INIT_LIST_HEAD(&fs_info->ordered_operations); |
| 1610 | INIT_LIST_HEAD(&fs_info->caching_block_groups); | ||
| 1566 | spin_lock_init(&fs_info->delalloc_lock); | 1611 | spin_lock_init(&fs_info->delalloc_lock); |
| 1567 | spin_lock_init(&fs_info->new_trans_lock); | 1612 | spin_lock_init(&fs_info->new_trans_lock); |
| 1568 | spin_lock_init(&fs_info->ref_cache_lock); | 1613 | spin_lock_init(&fs_info->ref_cache_lock); |
| 1614 | spin_lock_init(&fs_info->fs_roots_radix_lock); | ||
| 1569 | 1615 | ||
| 1570 | init_completion(&fs_info->kobj_unregister); | 1616 | init_completion(&fs_info->kobj_unregister); |
| 1571 | fs_info->tree_root = tree_root; | 1617 | fs_info->tree_root = tree_root; |
| @@ -1584,12 +1630,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1584 | fs_info->sb = sb; | 1630 | fs_info->sb = sb; |
| 1585 | fs_info->max_extent = (u64)-1; | 1631 | fs_info->max_extent = (u64)-1; |
| 1586 | fs_info->max_inline = 8192 * 1024; | 1632 | fs_info->max_inline = 8192 * 1024; |
| 1587 | if (setup_bdi(fs_info, &fs_info->bdi)) | 1633 | fs_info->metadata_ratio = 0; |
| 1588 | goto fail_bdi; | ||
| 1589 | fs_info->btree_inode = new_inode(sb); | ||
| 1590 | fs_info->btree_inode->i_ino = 1; | ||
| 1591 | fs_info->btree_inode->i_nlink = 1; | ||
| 1592 | fs_info->metadata_ratio = 8; | ||
| 1593 | 1634 | ||
| 1594 | fs_info->thread_pool_size = min_t(unsigned long, | 1635 | fs_info->thread_pool_size = min_t(unsigned long, |
| 1595 | num_online_cpus() + 2, 8); | 1636 | num_online_cpus() + 2, 8); |
| @@ -1599,7 +1640,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1599 | 1640 | ||
| 1600 | sb->s_blocksize = 4096; | 1641 | sb->s_blocksize = 4096; |
| 1601 | sb->s_blocksize_bits = blksize_bits(4096); | 1642 | sb->s_blocksize_bits = blksize_bits(4096); |
| 1643 | sb->s_bdi = &fs_info->bdi; | ||
| 1602 | 1644 | ||
| 1645 | fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; | ||
| 1646 | fs_info->btree_inode->i_nlink = 1; | ||
| 1603 | /* | 1647 | /* |
| 1604 | * we set the i_size on the btree inode to the max possible int. | 1648 | * we set the i_size on the btree inode to the max possible int. |
| 1605 | * the real end of the address space is determined by all of | 1649 | * the real end of the address space is determined by all of |
| @@ -1618,28 +1662,32 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1618 | 1662 | ||
| 1619 | BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; | 1663 | BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; |
| 1620 | 1664 | ||
| 1665 | BTRFS_I(fs_info->btree_inode)->root = tree_root; | ||
| 1666 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, | ||
| 1667 | sizeof(struct btrfs_key)); | ||
| 1668 | BTRFS_I(fs_info->btree_inode)->dummy_inode = 1; | ||
| 1669 | insert_inode_hash(fs_info->btree_inode); | ||
| 1670 | |||
| 1621 | spin_lock_init(&fs_info->block_group_cache_lock); | 1671 | spin_lock_init(&fs_info->block_group_cache_lock); |
| 1622 | fs_info->block_group_cache_tree.rb_node = NULL; | 1672 | fs_info->block_group_cache_tree.rb_node = NULL; |
| 1623 | 1673 | ||
| 1624 | extent_io_tree_init(&fs_info->pinned_extents, | 1674 | extent_io_tree_init(&fs_info->freed_extents[0], |
| 1625 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 1675 | fs_info->btree_inode->i_mapping, GFP_NOFS); |
| 1676 | extent_io_tree_init(&fs_info->freed_extents[1], | ||
| 1677 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
| 1678 | fs_info->pinned_extents = &fs_info->freed_extents[0]; | ||
| 1626 | fs_info->do_barriers = 1; | 1679 | fs_info->do_barriers = 1; |
| 1627 | 1680 | ||
| 1628 | BTRFS_I(fs_info->btree_inode)->root = tree_root; | ||
| 1629 | memset(&BTRFS_I(fs_info->btree_inode)->location, 0, | ||
| 1630 | sizeof(struct btrfs_key)); | ||
| 1631 | insert_inode_hash(fs_info->btree_inode); | ||
| 1632 | 1681 | ||
| 1633 | mutex_init(&fs_info->trans_mutex); | 1682 | mutex_init(&fs_info->trans_mutex); |
| 1634 | mutex_init(&fs_info->ordered_operations_mutex); | 1683 | mutex_init(&fs_info->ordered_operations_mutex); |
| 1635 | mutex_init(&fs_info->tree_log_mutex); | 1684 | mutex_init(&fs_info->tree_log_mutex); |
| 1636 | mutex_init(&fs_info->drop_mutex); | ||
| 1637 | mutex_init(&fs_info->chunk_mutex); | 1685 | mutex_init(&fs_info->chunk_mutex); |
| 1638 | mutex_init(&fs_info->transaction_kthread_mutex); | 1686 | mutex_init(&fs_info->transaction_kthread_mutex); |
| 1639 | mutex_init(&fs_info->cleaner_mutex); | 1687 | mutex_init(&fs_info->cleaner_mutex); |
| 1640 | mutex_init(&fs_info->volume_mutex); | 1688 | mutex_init(&fs_info->volume_mutex); |
| 1641 | mutex_init(&fs_info->tree_reloc_mutex); | ||
| 1642 | init_rwsem(&fs_info->extent_commit_sem); | 1689 | init_rwsem(&fs_info->extent_commit_sem); |
| 1690 | init_rwsem(&fs_info->subvol_sem); | ||
| 1643 | 1691 | ||
| 1644 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); | 1692 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); |
| 1645 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); | 1693 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); |
| @@ -1699,20 +1747,24 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1699 | goto fail_iput; | 1747 | goto fail_iput; |
| 1700 | } | 1748 | } |
| 1701 | 1749 | ||
| 1702 | /* | 1750 | btrfs_init_workers(&fs_info->generic_worker, |
| 1703 | * we need to start all the end_io workers up front because the | 1751 | "genwork", 1, NULL); |
| 1704 | * queue work function gets called at interrupt time, and so it | 1752 | |
| 1705 | * cannot dynamically grow. | ||
| 1706 | */ | ||
| 1707 | btrfs_init_workers(&fs_info->workers, "worker", | 1753 | btrfs_init_workers(&fs_info->workers, "worker", |
| 1708 | fs_info->thread_pool_size); | 1754 | fs_info->thread_pool_size, |
| 1755 | &fs_info->generic_worker); | ||
| 1709 | 1756 | ||
| 1710 | btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", | 1757 | btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", |
| 1711 | fs_info->thread_pool_size); | 1758 | fs_info->thread_pool_size, |
| 1759 | &fs_info->generic_worker); | ||
| 1712 | 1760 | ||
| 1713 | btrfs_init_workers(&fs_info->submit_workers, "submit", | 1761 | btrfs_init_workers(&fs_info->submit_workers, "submit", |
| 1714 | min_t(u64, fs_devices->num_devices, | 1762 | min_t(u64, fs_devices->num_devices, |
| 1715 | fs_info->thread_pool_size)); | 1763 | fs_info->thread_pool_size), |
| 1764 | &fs_info->generic_worker); | ||
| 1765 | btrfs_init_workers(&fs_info->enospc_workers, "enospc", | ||
| 1766 | fs_info->thread_pool_size, | ||
| 1767 | &fs_info->generic_worker); | ||
| 1716 | 1768 | ||
| 1717 | /* a higher idle thresh on the submit workers makes it much more | 1769 | /* a higher idle thresh on the submit workers makes it much more |
| 1718 | * likely that bios will be send down in a sane order to the | 1770 | * likely that bios will be send down in a sane order to the |
| @@ -1726,15 +1778,20 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1726 | fs_info->delalloc_workers.idle_thresh = 2; | 1778 | fs_info->delalloc_workers.idle_thresh = 2; |
| 1727 | fs_info->delalloc_workers.ordered = 1; | 1779 | fs_info->delalloc_workers.ordered = 1; |
| 1728 | 1780 | ||
| 1729 | btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); | 1781 | btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1, |
| 1782 | &fs_info->generic_worker); | ||
| 1730 | btrfs_init_workers(&fs_info->endio_workers, "endio", | 1783 | btrfs_init_workers(&fs_info->endio_workers, "endio", |
| 1731 | fs_info->thread_pool_size); | 1784 | fs_info->thread_pool_size, |
| 1785 | &fs_info->generic_worker); | ||
| 1732 | btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta", | 1786 | btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta", |
| 1733 | fs_info->thread_pool_size); | 1787 | fs_info->thread_pool_size, |
| 1788 | &fs_info->generic_worker); | ||
| 1734 | btrfs_init_workers(&fs_info->endio_meta_write_workers, | 1789 | btrfs_init_workers(&fs_info->endio_meta_write_workers, |
| 1735 | "endio-meta-write", fs_info->thread_pool_size); | 1790 | "endio-meta-write", fs_info->thread_pool_size, |
| 1791 | &fs_info->generic_worker); | ||
| 1736 | btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", | 1792 | btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", |
| 1737 | fs_info->thread_pool_size); | 1793 | fs_info->thread_pool_size, |
| 1794 | &fs_info->generic_worker); | ||
| 1738 | 1795 | ||
| 1739 | /* | 1796 | /* |
| 1740 | * endios are largely parallel and should have a very | 1797 | * endios are largely parallel and should have a very |
| @@ -1743,20 +1800,19 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1743 | fs_info->endio_workers.idle_thresh = 4; | 1800 | fs_info->endio_workers.idle_thresh = 4; |
| 1744 | fs_info->endio_meta_workers.idle_thresh = 4; | 1801 | fs_info->endio_meta_workers.idle_thresh = 4; |
| 1745 | 1802 | ||
| 1746 | fs_info->endio_write_workers.idle_thresh = 64; | 1803 | fs_info->endio_write_workers.idle_thresh = 2; |
| 1747 | fs_info->endio_meta_write_workers.idle_thresh = 64; | 1804 | fs_info->endio_meta_write_workers.idle_thresh = 2; |
| 1748 | 1805 | ||
| 1749 | btrfs_start_workers(&fs_info->workers, 1); | 1806 | btrfs_start_workers(&fs_info->workers, 1); |
| 1807 | btrfs_start_workers(&fs_info->generic_worker, 1); | ||
| 1750 | btrfs_start_workers(&fs_info->submit_workers, 1); | 1808 | btrfs_start_workers(&fs_info->submit_workers, 1); |
| 1751 | btrfs_start_workers(&fs_info->delalloc_workers, 1); | 1809 | btrfs_start_workers(&fs_info->delalloc_workers, 1); |
| 1752 | btrfs_start_workers(&fs_info->fixup_workers, 1); | 1810 | btrfs_start_workers(&fs_info->fixup_workers, 1); |
| 1753 | btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); | 1811 | btrfs_start_workers(&fs_info->endio_workers, 1); |
| 1754 | btrfs_start_workers(&fs_info->endio_meta_workers, | 1812 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); |
| 1755 | fs_info->thread_pool_size); | 1813 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); |
| 1756 | btrfs_start_workers(&fs_info->endio_meta_write_workers, | 1814 | btrfs_start_workers(&fs_info->endio_write_workers, 1); |
| 1757 | fs_info->thread_pool_size); | 1815 | btrfs_start_workers(&fs_info->enospc_workers, 1); |
| 1758 | btrfs_start_workers(&fs_info->endio_write_workers, | ||
| 1759 | fs_info->thread_pool_size); | ||
| 1760 | 1816 | ||
| 1761 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 1817 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
| 1762 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 1818 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
| @@ -1916,6 +1972,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1916 | } | 1972 | } |
| 1917 | } | 1973 | } |
| 1918 | 1974 | ||
| 1975 | ret = btrfs_find_orphan_roots(tree_root); | ||
| 1976 | BUG_ON(ret); | ||
| 1977 | |||
| 1919 | if (!(sb->s_flags & MS_RDONLY)) { | 1978 | if (!(sb->s_flags & MS_RDONLY)) { |
| 1920 | ret = btrfs_recover_relocation(tree_root); | 1979 | ret = btrfs_recover_relocation(tree_root); |
| 1921 | BUG_ON(ret); | 1980 | BUG_ON(ret); |
| @@ -1959,6 +2018,7 @@ fail_chunk_root: | |||
| 1959 | free_extent_buffer(chunk_root->node); | 2018 | free_extent_buffer(chunk_root->node); |
| 1960 | free_extent_buffer(chunk_root->commit_root); | 2019 | free_extent_buffer(chunk_root->commit_root); |
| 1961 | fail_sb_buffer: | 2020 | fail_sb_buffer: |
| 2021 | btrfs_stop_workers(&fs_info->generic_worker); | ||
| 1962 | btrfs_stop_workers(&fs_info->fixup_workers); | 2022 | btrfs_stop_workers(&fs_info->fixup_workers); |
| 1963 | btrfs_stop_workers(&fs_info->delalloc_workers); | 2023 | btrfs_stop_workers(&fs_info->delalloc_workers); |
| 1964 | btrfs_stop_workers(&fs_info->workers); | 2024 | btrfs_stop_workers(&fs_info->workers); |
| @@ -1967,6 +2027,7 @@ fail_sb_buffer: | |||
| 1967 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2027 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
| 1968 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2028 | btrfs_stop_workers(&fs_info->endio_write_workers); |
| 1969 | btrfs_stop_workers(&fs_info->submit_workers); | 2029 | btrfs_stop_workers(&fs_info->submit_workers); |
| 2030 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
| 1970 | fail_iput: | 2031 | fail_iput: |
| 1971 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 2032 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
| 1972 | iput(fs_info->btree_inode); | 2033 | iput(fs_info->btree_inode); |
| @@ -1975,6 +2036,8 @@ fail_iput: | |||
| 1975 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2036 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
| 1976 | fail_bdi: | 2037 | fail_bdi: |
| 1977 | bdi_destroy(&fs_info->bdi); | 2038 | bdi_destroy(&fs_info->bdi); |
| 2039 | fail_srcu: | ||
| 2040 | cleanup_srcu_struct(&fs_info->subvol_srcu); | ||
| 1978 | fail: | 2041 | fail: |
| 1979 | kfree(extent_root); | 2042 | kfree(extent_root); |
| 1980 | kfree(tree_root); | 2043 | kfree(tree_root); |
| @@ -2234,20 +2297,29 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
| 2234 | 2297 | ||
| 2235 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | 2298 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) |
| 2236 | { | 2299 | { |
| 2237 | WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); | 2300 | spin_lock(&fs_info->fs_roots_radix_lock); |
| 2238 | radix_tree_delete(&fs_info->fs_roots_radix, | 2301 | radix_tree_delete(&fs_info->fs_roots_radix, |
| 2239 | (unsigned long)root->root_key.objectid); | 2302 | (unsigned long)root->root_key.objectid); |
| 2303 | spin_unlock(&fs_info->fs_roots_radix_lock); | ||
| 2304 | |||
| 2305 | if (btrfs_root_refs(&root->root_item) == 0) | ||
| 2306 | synchronize_srcu(&fs_info->subvol_srcu); | ||
| 2307 | |||
| 2308 | free_fs_root(root); | ||
| 2309 | return 0; | ||
| 2310 | } | ||
| 2311 | |||
| 2312 | static void free_fs_root(struct btrfs_root *root) | ||
| 2313 | { | ||
| 2314 | WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); | ||
| 2240 | if (root->anon_super.s_dev) { | 2315 | if (root->anon_super.s_dev) { |
| 2241 | down_write(&root->anon_super.s_umount); | 2316 | down_write(&root->anon_super.s_umount); |
| 2242 | kill_anon_super(&root->anon_super); | 2317 | kill_anon_super(&root->anon_super); |
| 2243 | } | 2318 | } |
| 2244 | if (root->node) | 2319 | free_extent_buffer(root->node); |
| 2245 | free_extent_buffer(root->node); | 2320 | free_extent_buffer(root->commit_root); |
| 2246 | if (root->commit_root) | ||
| 2247 | free_extent_buffer(root->commit_root); | ||
| 2248 | kfree(root->name); | 2321 | kfree(root->name); |
| 2249 | kfree(root); | 2322 | kfree(root); |
| 2250 | return 0; | ||
| 2251 | } | 2323 | } |
| 2252 | 2324 | ||
| 2253 | static int del_fs_roots(struct btrfs_fs_info *fs_info) | 2325 | static int del_fs_roots(struct btrfs_fs_info *fs_info) |
| @@ -2256,6 +2328,20 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info) | |||
| 2256 | struct btrfs_root *gang[8]; | 2328 | struct btrfs_root *gang[8]; |
| 2257 | int i; | 2329 | int i; |
| 2258 | 2330 | ||
| 2331 | while (!list_empty(&fs_info->dead_roots)) { | ||
| 2332 | gang[0] = list_entry(fs_info->dead_roots.next, | ||
| 2333 | struct btrfs_root, root_list); | ||
| 2334 | list_del(&gang[0]->root_list); | ||
| 2335 | |||
| 2336 | if (gang[0]->in_radix) { | ||
| 2337 | btrfs_free_fs_root(fs_info, gang[0]); | ||
| 2338 | } else { | ||
| 2339 | free_extent_buffer(gang[0]->node); | ||
| 2340 | free_extent_buffer(gang[0]->commit_root); | ||
| 2341 | kfree(gang[0]); | ||
| 2342 | } | ||
| 2343 | } | ||
| 2344 | |||
| 2259 | while (1) { | 2345 | while (1) { |
| 2260 | ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, | 2346 | ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, |
| 2261 | (void **)gang, 0, | 2347 | (void **)gang, 0, |
| @@ -2285,9 +2371,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) | |||
| 2285 | root_objectid = gang[ret - 1]->root_key.objectid + 1; | 2371 | root_objectid = gang[ret - 1]->root_key.objectid + 1; |
| 2286 | for (i = 0; i < ret; i++) { | 2372 | for (i = 0; i < ret; i++) { |
| 2287 | root_objectid = gang[i]->root_key.objectid; | 2373 | root_objectid = gang[i]->root_key.objectid; |
| 2288 | ret = btrfs_find_dead_roots(fs_info->tree_root, | ||
| 2289 | root_objectid); | ||
| 2290 | BUG_ON(ret); | ||
| 2291 | btrfs_orphan_cleanup(gang[i]); | 2374 | btrfs_orphan_cleanup(gang[i]); |
| 2292 | } | 2375 | } |
| 2293 | root_objectid++; | 2376 | root_objectid++; |
| @@ -2357,12 +2440,12 @@ int close_ctree(struct btrfs_root *root) | |||
| 2357 | free_extent_buffer(root->fs_info->csum_root->commit_root); | 2440 | free_extent_buffer(root->fs_info->csum_root->commit_root); |
| 2358 | 2441 | ||
| 2359 | btrfs_free_block_groups(root->fs_info); | 2442 | btrfs_free_block_groups(root->fs_info); |
| 2360 | btrfs_free_pinned_extents(root->fs_info); | ||
| 2361 | 2443 | ||
| 2362 | del_fs_roots(fs_info); | 2444 | del_fs_roots(fs_info); |
| 2363 | 2445 | ||
| 2364 | iput(fs_info->btree_inode); | 2446 | iput(fs_info->btree_inode); |
| 2365 | 2447 | ||
| 2448 | btrfs_stop_workers(&fs_info->generic_worker); | ||
| 2366 | btrfs_stop_workers(&fs_info->fixup_workers); | 2449 | btrfs_stop_workers(&fs_info->fixup_workers); |
| 2367 | btrfs_stop_workers(&fs_info->delalloc_workers); | 2450 | btrfs_stop_workers(&fs_info->delalloc_workers); |
| 2368 | btrfs_stop_workers(&fs_info->workers); | 2451 | btrfs_stop_workers(&fs_info->workers); |
| @@ -2371,11 +2454,13 @@ int close_ctree(struct btrfs_root *root) | |||
| 2371 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2454 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
| 2372 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2455 | btrfs_stop_workers(&fs_info->endio_write_workers); |
| 2373 | btrfs_stop_workers(&fs_info->submit_workers); | 2456 | btrfs_stop_workers(&fs_info->submit_workers); |
| 2457 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
| 2374 | 2458 | ||
| 2375 | btrfs_close_devices(fs_info->fs_devices); | 2459 | btrfs_close_devices(fs_info->fs_devices); |
| 2376 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2460 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
| 2377 | 2461 | ||
| 2378 | bdi_destroy(&fs_info->bdi); | 2462 | bdi_destroy(&fs_info->bdi); |
| 2463 | cleanup_srcu_struct(&fs_info->subvol_srcu); | ||
| 2379 | 2464 | ||
| 2380 | kfree(fs_info->extent_root); | 2465 | kfree(fs_info->extent_root); |
| 2381 | kfree(fs_info->tree_root); | 2466 | kfree(fs_info->tree_root); |
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 9596b40caa4e..ba5c3fd5ab8c 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c | |||
| @@ -28,7 +28,7 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, | |||
| 28 | len = BTRFS_FID_SIZE_NON_CONNECTABLE; | 28 | len = BTRFS_FID_SIZE_NON_CONNECTABLE; |
| 29 | type = FILEID_BTRFS_WITHOUT_PARENT; | 29 | type = FILEID_BTRFS_WITHOUT_PARENT; |
| 30 | 30 | ||
| 31 | fid->objectid = BTRFS_I(inode)->location.objectid; | 31 | fid->objectid = inode->i_ino; |
| 32 | fid->root_objectid = BTRFS_I(inode)->root->objectid; | 32 | fid->root_objectid = BTRFS_I(inode)->root->objectid; |
| 33 | fid->gen = inode->i_generation; | 33 | fid->gen = inode->i_generation; |
| 34 | 34 | ||
| @@ -60,34 +60,61 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, | |||
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, | 62 | static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, |
| 63 | u64 root_objectid, u32 generation) | 63 | u64 root_objectid, u32 generation, |
| 64 | int check_generation) | ||
| 64 | { | 65 | { |
| 66 | struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info; | ||
| 65 | struct btrfs_root *root; | 67 | struct btrfs_root *root; |
| 68 | struct dentry *dentry; | ||
| 66 | struct inode *inode; | 69 | struct inode *inode; |
| 67 | struct btrfs_key key; | 70 | struct btrfs_key key; |
| 71 | int index; | ||
| 72 | int err = 0; | ||
| 73 | |||
| 74 | if (objectid < BTRFS_FIRST_FREE_OBJECTID) | ||
| 75 | return ERR_PTR(-ESTALE); | ||
| 68 | 76 | ||
| 69 | key.objectid = root_objectid; | 77 | key.objectid = root_objectid; |
| 70 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 78 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); |
| 71 | key.offset = (u64)-1; | 79 | key.offset = (u64)-1; |
| 72 | 80 | ||
| 73 | root = btrfs_read_fs_root_no_name(btrfs_sb(sb)->fs_info, &key); | 81 | index = srcu_read_lock(&fs_info->subvol_srcu); |
| 74 | if (IS_ERR(root)) | 82 | |
| 75 | return ERR_CAST(root); | 83 | root = btrfs_read_fs_root_no_name(fs_info, &key); |
| 84 | if (IS_ERR(root)) { | ||
| 85 | err = PTR_ERR(root); | ||
| 86 | goto fail; | ||
| 87 | } | ||
| 88 | |||
| 89 | if (btrfs_root_refs(&root->root_item) == 0) { | ||
| 90 | err = -ENOENT; | ||
| 91 | goto fail; | ||
| 92 | } | ||
| 76 | 93 | ||
| 77 | key.objectid = objectid; | 94 | key.objectid = objectid; |
| 78 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 95 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); |
| 79 | key.offset = 0; | 96 | key.offset = 0; |
| 80 | 97 | ||
| 81 | inode = btrfs_iget(sb, &key, root); | 98 | inode = btrfs_iget(sb, &key, root); |
| 82 | if (IS_ERR(inode)) | 99 | if (IS_ERR(inode)) { |
| 83 | return (void *)inode; | 100 | err = PTR_ERR(inode); |
| 101 | goto fail; | ||
| 102 | } | ||
| 103 | |||
| 104 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
| 84 | 105 | ||
| 85 | if (generation != inode->i_generation) { | 106 | if (check_generation && generation != inode->i_generation) { |
| 86 | iput(inode); | 107 | iput(inode); |
| 87 | return ERR_PTR(-ESTALE); | 108 | return ERR_PTR(-ESTALE); |
| 88 | } | 109 | } |
| 89 | 110 | ||
| 90 | return d_obtain_alias(inode); | 111 | dentry = d_obtain_alias(inode); |
| 112 | if (!IS_ERR(dentry)) | ||
| 113 | dentry->d_op = &btrfs_dentry_operations; | ||
| 114 | return dentry; | ||
| 115 | fail: | ||
| 116 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
| 117 | return ERR_PTR(err); | ||
| 91 | } | 118 | } |
| 92 | 119 | ||
| 93 | static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, | 120 | static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, |
| @@ -111,7 +138,7 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, | |||
| 111 | objectid = fid->parent_objectid; | 138 | objectid = fid->parent_objectid; |
| 112 | generation = fid->parent_gen; | 139 | generation = fid->parent_gen; |
| 113 | 140 | ||
| 114 | return btrfs_get_dentry(sb, objectid, root_objectid, generation); | 141 | return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1); |
| 115 | } | 142 | } |
| 116 | 143 | ||
| 117 | static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, | 144 | static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, |
| @@ -133,66 +160,76 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, | |||
| 133 | root_objectid = fid->root_objectid; | 160 | root_objectid = fid->root_objectid; |
| 134 | generation = fid->gen; | 161 | generation = fid->gen; |
| 135 | 162 | ||
| 136 | return btrfs_get_dentry(sb, objectid, root_objectid, generation); | 163 | return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1); |
| 137 | } | 164 | } |
| 138 | 165 | ||
| 139 | static struct dentry *btrfs_get_parent(struct dentry *child) | 166 | static struct dentry *btrfs_get_parent(struct dentry *child) |
| 140 | { | 167 | { |
| 141 | struct inode *dir = child->d_inode; | 168 | struct inode *dir = child->d_inode; |
| 169 | static struct dentry *dentry; | ||
| 142 | struct btrfs_root *root = BTRFS_I(dir)->root; | 170 | struct btrfs_root *root = BTRFS_I(dir)->root; |
| 143 | struct btrfs_key key; | ||
| 144 | struct btrfs_path *path; | 171 | struct btrfs_path *path; |
| 145 | struct extent_buffer *leaf; | 172 | struct extent_buffer *leaf; |
| 146 | int slot; | 173 | struct btrfs_root_ref *ref; |
| 147 | u64 objectid; | 174 | struct btrfs_key key; |
| 175 | struct btrfs_key found_key; | ||
| 148 | int ret; | 176 | int ret; |
| 149 | 177 | ||
| 150 | path = btrfs_alloc_path(); | 178 | path = btrfs_alloc_path(); |
| 151 | 179 | ||
| 152 | key.objectid = dir->i_ino; | 180 | if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) { |
| 153 | btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); | 181 | key.objectid = root->root_key.objectid; |
| 154 | key.offset = (u64)-1; | 182 | key.type = BTRFS_ROOT_BACKREF_KEY; |
| 183 | key.offset = (u64)-1; | ||
| 184 | root = root->fs_info->tree_root; | ||
| 185 | } else { | ||
| 186 | key.objectid = dir->i_ino; | ||
| 187 | key.type = BTRFS_INODE_REF_KEY; | ||
| 188 | key.offset = (u64)-1; | ||
| 189 | } | ||
| 155 | 190 | ||
| 156 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 191 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| 157 | if (ret < 0) { | 192 | if (ret < 0) |
| 158 | /* Error */ | 193 | goto fail; |
| 159 | btrfs_free_path(path); | 194 | |
| 160 | return ERR_PTR(ret); | 195 | BUG_ON(ret == 0); |
| 196 | if (path->slots[0] == 0) { | ||
| 197 | ret = -ENOENT; | ||
| 198 | goto fail; | ||
| 161 | } | 199 | } |
| 200 | |||
| 201 | path->slots[0]--; | ||
| 162 | leaf = path->nodes[0]; | 202 | leaf = path->nodes[0]; |
| 163 | slot = path->slots[0]; | 203 | |
| 164 | if (ret) { | 204 | btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); |
| 165 | /* btrfs_search_slot() returns the slot where we'd want to | 205 | if (found_key.objectid != key.objectid || found_key.type != key.type) { |
| 166 | insert a backref for parent inode #0xFFFFFFFFFFFFFFFF. | 206 | ret = -ENOENT; |
| 167 | The _real_ backref, telling us what the parent inode | 207 | goto fail; |
| 168 | _actually_ is, will be in the slot _before_ the one | ||
| 169 | that btrfs_search_slot() returns. */ | ||
| 170 | if (!slot) { | ||
| 171 | /* Unless there is _no_ key in the tree before... */ | ||
| 172 | btrfs_free_path(path); | ||
| 173 | return ERR_PTR(-EIO); | ||
| 174 | } | ||
| 175 | slot--; | ||
| 176 | } | 208 | } |
| 177 | 209 | ||
| 178 | btrfs_item_key_to_cpu(leaf, &key, slot); | 210 | if (found_key.type == BTRFS_ROOT_BACKREF_KEY) { |
| 211 | ref = btrfs_item_ptr(leaf, path->slots[0], | ||
| 212 | struct btrfs_root_ref); | ||
| 213 | key.objectid = btrfs_root_ref_dirid(leaf, ref); | ||
| 214 | } else { | ||
| 215 | key.objectid = found_key.offset; | ||
| 216 | } | ||
| 179 | btrfs_free_path(path); | 217 | btrfs_free_path(path); |
| 180 | 218 | ||
| 181 | if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY) | 219 | if (found_key.type == BTRFS_ROOT_BACKREF_KEY) { |
| 182 | return ERR_PTR(-EINVAL); | 220 | return btrfs_get_dentry(root->fs_info->sb, key.objectid, |
| 183 | 221 | found_key.offset, 0, 0); | |
| 184 | objectid = key.offset; | 222 | } |
| 185 | |||
| 186 | /* If we are already at the root of a subvol, return the real root */ | ||
| 187 | if (objectid == dir->i_ino) | ||
| 188 | return dget(dir->i_sb->s_root); | ||
| 189 | 223 | ||
| 190 | /* Build a new key for the inode item */ | 224 | key.type = BTRFS_INODE_ITEM_KEY; |
| 191 | key.objectid = objectid; | ||
| 192 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
| 193 | key.offset = 0; | 225 | key.offset = 0; |
| 194 | 226 | dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root)); | |
| 195 | return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root)); | 227 | if (!IS_ERR(dentry)) |
| 228 | dentry->d_op = &btrfs_dentry_operations; | ||
| 229 | return dentry; | ||
| 230 | fail: | ||
| 231 | btrfs_free_path(path); | ||
| 232 | return ERR_PTR(ret); | ||
| 196 | } | 233 | } |
| 197 | 234 | ||
| 198 | const struct export_operations btrfs_export_ops = { | 235 | const struct export_operations btrfs_export_ops = { |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 72a2b9c28e9f..d0c4d584efad 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -32,12 +32,12 @@ | |||
| 32 | #include "locking.h" | 32 | #include "locking.h" |
| 33 | #include "free-space-cache.h" | 33 | #include "free-space-cache.h" |
| 34 | 34 | ||
| 35 | static int update_reserved_extents(struct btrfs_root *root, | ||
| 36 | u64 bytenr, u64 num, int reserve); | ||
| 37 | static int update_block_group(struct btrfs_trans_handle *trans, | 35 | static int update_block_group(struct btrfs_trans_handle *trans, |
| 38 | struct btrfs_root *root, | 36 | struct btrfs_root *root, |
| 39 | u64 bytenr, u64 num_bytes, int alloc, | 37 | u64 bytenr, u64 num_bytes, int alloc, |
| 40 | int mark_free); | 38 | int mark_free); |
| 39 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, | ||
| 40 | u64 num_bytes, int reserve); | ||
| 41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
| 42 | struct btrfs_root *root, | 42 | struct btrfs_root *root, |
| 43 | u64 bytenr, u64 num_bytes, u64 parent, | 43 | u64 bytenr, u64 num_bytes, u64 parent, |
| @@ -57,10 +57,19 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
| 57 | u64 parent, u64 root_objectid, | 57 | u64 parent, u64 root_objectid, |
| 58 | u64 flags, struct btrfs_disk_key *key, | 58 | u64 flags, struct btrfs_disk_key *key, |
| 59 | int level, struct btrfs_key *ins); | 59 | int level, struct btrfs_key *ins); |
| 60 | |||
| 61 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 60 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
| 62 | struct btrfs_root *extent_root, u64 alloc_bytes, | 61 | struct btrfs_root *extent_root, u64 alloc_bytes, |
| 63 | u64 flags, int force); | 62 | u64 flags, int force); |
| 63 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | ||
| 64 | struct btrfs_root *root, | ||
| 65 | struct btrfs_path *path, | ||
| 66 | u64 bytenr, u64 num_bytes, | ||
| 67 | int is_data, int reserved, | ||
| 68 | struct extent_buffer **must_clean); | ||
| 69 | static int find_next_key(struct btrfs_path *path, int level, | ||
| 70 | struct btrfs_key *key); | ||
| 71 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | ||
| 72 | int dump_block_groups); | ||
| 64 | 73 | ||
| 65 | static noinline int | 74 | static noinline int |
| 66 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 75 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
| @@ -153,34 +162,34 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, | |||
| 153 | return ret; | 162 | return ret; |
| 154 | } | 163 | } |
| 155 | 164 | ||
| 156 | /* | 165 | static int add_excluded_extent(struct btrfs_root *root, |
| 157 | * We always set EXTENT_LOCKED for the super mirror extents so we don't | 166 | u64 start, u64 num_bytes) |
| 158 | * overwrite them, so those bits need to be unset. Also, if we are unmounting | ||
| 159 | * with pinned extents still sitting there because we had a block group caching, | ||
| 160 | * we need to clear those now, since we are done. | ||
| 161 | */ | ||
| 162 | void btrfs_free_pinned_extents(struct btrfs_fs_info *info) | ||
| 163 | { | 167 | { |
| 164 | u64 start, end, last = 0; | 168 | u64 end = start + num_bytes - 1; |
| 165 | int ret; | 169 | set_extent_bits(&root->fs_info->freed_extents[0], |
| 170 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
| 171 | set_extent_bits(&root->fs_info->freed_extents[1], | ||
| 172 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
| 173 | return 0; | ||
| 174 | } | ||
| 166 | 175 | ||
| 167 | while (1) { | 176 | static void free_excluded_extents(struct btrfs_root *root, |
| 168 | ret = find_first_extent_bit(&info->pinned_extents, last, | 177 | struct btrfs_block_group_cache *cache) |
| 169 | &start, &end, | 178 | { |
| 170 | EXTENT_LOCKED|EXTENT_DIRTY); | 179 | u64 start, end; |
| 171 | if (ret) | ||
| 172 | break; | ||
| 173 | 180 | ||
| 174 | clear_extent_bits(&info->pinned_extents, start, end, | 181 | start = cache->key.objectid; |
| 175 | EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS); | 182 | end = start + cache->key.offset - 1; |
| 176 | last = end+1; | 183 | |
| 177 | } | 184 | clear_extent_bits(&root->fs_info->freed_extents[0], |
| 185 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
| 186 | clear_extent_bits(&root->fs_info->freed_extents[1], | ||
| 187 | start, end, EXTENT_UPTODATE, GFP_NOFS); | ||
| 178 | } | 188 | } |
| 179 | 189 | ||
| 180 | static int remove_sb_from_cache(struct btrfs_root *root, | 190 | static int exclude_super_stripes(struct btrfs_root *root, |
| 181 | struct btrfs_block_group_cache *cache) | 191 | struct btrfs_block_group_cache *cache) |
| 182 | { | 192 | { |
| 183 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 184 | u64 bytenr; | 193 | u64 bytenr; |
| 185 | u64 *logical; | 194 | u64 *logical; |
| 186 | int stripe_len; | 195 | int stripe_len; |
| @@ -192,17 +201,42 @@ static int remove_sb_from_cache(struct btrfs_root *root, | |||
| 192 | cache->key.objectid, bytenr, | 201 | cache->key.objectid, bytenr, |
| 193 | 0, &logical, &nr, &stripe_len); | 202 | 0, &logical, &nr, &stripe_len); |
| 194 | BUG_ON(ret); | 203 | BUG_ON(ret); |
| 204 | |||
| 195 | while (nr--) { | 205 | while (nr--) { |
| 196 | try_lock_extent(&fs_info->pinned_extents, | 206 | cache->bytes_super += stripe_len; |
| 197 | logical[nr], | 207 | ret = add_excluded_extent(root, logical[nr], |
| 198 | logical[nr] + stripe_len - 1, GFP_NOFS); | 208 | stripe_len); |
| 209 | BUG_ON(ret); | ||
| 199 | } | 210 | } |
| 211 | |||
| 200 | kfree(logical); | 212 | kfree(logical); |
| 201 | } | 213 | } |
| 202 | |||
| 203 | return 0; | 214 | return 0; |
| 204 | } | 215 | } |
| 205 | 216 | ||
| 217 | static struct btrfs_caching_control * | ||
| 218 | get_caching_control(struct btrfs_block_group_cache *cache) | ||
| 219 | { | ||
| 220 | struct btrfs_caching_control *ctl; | ||
| 221 | |||
| 222 | spin_lock(&cache->lock); | ||
| 223 | if (cache->cached != BTRFS_CACHE_STARTED) { | ||
| 224 | spin_unlock(&cache->lock); | ||
| 225 | return NULL; | ||
| 226 | } | ||
| 227 | |||
| 228 | ctl = cache->caching_ctl; | ||
| 229 | atomic_inc(&ctl->count); | ||
| 230 | spin_unlock(&cache->lock); | ||
| 231 | return ctl; | ||
| 232 | } | ||
| 233 | |||
| 234 | static void put_caching_control(struct btrfs_caching_control *ctl) | ||
| 235 | { | ||
| 236 | if (atomic_dec_and_test(&ctl->count)) | ||
| 237 | kfree(ctl); | ||
| 238 | } | ||
| 239 | |||
| 206 | /* | 240 | /* |
| 207 | * this is only called by cache_block_group, since we could have freed extents | 241 | * this is only called by cache_block_group, since we could have freed extents |
| 208 | * we need to check the pinned_extents for any extents that can't be used yet | 242 | * we need to check the pinned_extents for any extents that can't be used yet |
| @@ -215,9 +249,9 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
| 215 | int ret; | 249 | int ret; |
| 216 | 250 | ||
| 217 | while (start < end) { | 251 | while (start < end) { |
| 218 | ret = find_first_extent_bit(&info->pinned_extents, start, | 252 | ret = find_first_extent_bit(info->pinned_extents, start, |
| 219 | &extent_start, &extent_end, | 253 | &extent_start, &extent_end, |
| 220 | EXTENT_DIRTY|EXTENT_LOCKED); | 254 | EXTENT_DIRTY | EXTENT_UPTODATE); |
| 221 | if (ret) | 255 | if (ret) |
| 222 | break; | 256 | break; |
| 223 | 257 | ||
| @@ -249,22 +283,27 @@ static int caching_kthread(void *data) | |||
| 249 | { | 283 | { |
| 250 | struct btrfs_block_group_cache *block_group = data; | 284 | struct btrfs_block_group_cache *block_group = data; |
| 251 | struct btrfs_fs_info *fs_info = block_group->fs_info; | 285 | struct btrfs_fs_info *fs_info = block_group->fs_info; |
| 252 | u64 last = 0; | 286 | struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; |
| 287 | struct btrfs_root *extent_root = fs_info->extent_root; | ||
| 253 | struct btrfs_path *path; | 288 | struct btrfs_path *path; |
| 254 | int ret = 0; | ||
| 255 | struct btrfs_key key; | ||
| 256 | struct extent_buffer *leaf; | 289 | struct extent_buffer *leaf; |
| 257 | int slot; | 290 | struct btrfs_key key; |
| 258 | u64 total_found = 0; | 291 | u64 total_found = 0; |
| 259 | 292 | u64 last = 0; | |
| 260 | BUG_ON(!fs_info); | 293 | u32 nritems; |
| 294 | int ret = 0; | ||
| 261 | 295 | ||
| 262 | path = btrfs_alloc_path(); | 296 | path = btrfs_alloc_path(); |
| 263 | if (!path) | 297 | if (!path) |
| 264 | return -ENOMEM; | 298 | return -ENOMEM; |
| 265 | 299 | ||
| 266 | atomic_inc(&block_group->space_info->caching_threads); | 300 | exclude_super_stripes(extent_root, block_group); |
| 301 | spin_lock(&block_group->space_info->lock); | ||
| 302 | block_group->space_info->bytes_super += block_group->bytes_super; | ||
| 303 | spin_unlock(&block_group->space_info->lock); | ||
| 304 | |||
| 267 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 305 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
| 306 | |||
| 268 | /* | 307 | /* |
| 269 | * We don't want to deadlock with somebody trying to allocate a new | 308 | * We don't want to deadlock with somebody trying to allocate a new |
| 270 | * extent for the extent root while also trying to search the extent | 309 | * extent for the extent root while also trying to search the extent |
| @@ -277,74 +316,64 @@ static int caching_kthread(void *data) | |||
| 277 | 316 | ||
| 278 | key.objectid = last; | 317 | key.objectid = last; |
| 279 | key.offset = 0; | 318 | key.offset = 0; |
| 280 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | 319 | key.type = BTRFS_EXTENT_ITEM_KEY; |
| 281 | again: | 320 | again: |
| 321 | mutex_lock(&caching_ctl->mutex); | ||
| 282 | /* need to make sure the commit_root doesn't disappear */ | 322 | /* need to make sure the commit_root doesn't disappear */ |
| 283 | down_read(&fs_info->extent_commit_sem); | 323 | down_read(&fs_info->extent_commit_sem); |
| 284 | 324 | ||
| 285 | ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); | 325 | ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); |
| 286 | if (ret < 0) | 326 | if (ret < 0) |
| 287 | goto err; | 327 | goto err; |
| 288 | 328 | ||
| 329 | leaf = path->nodes[0]; | ||
| 330 | nritems = btrfs_header_nritems(leaf); | ||
| 331 | |||
| 289 | while (1) { | 332 | while (1) { |
| 290 | smp_mb(); | 333 | smp_mb(); |
| 291 | if (block_group->fs_info->closing > 1) { | 334 | if (fs_info->closing > 1) { |
| 292 | last = (u64)-1; | 335 | last = (u64)-1; |
| 293 | break; | 336 | break; |
| 294 | } | 337 | } |
| 295 | 338 | ||
| 296 | leaf = path->nodes[0]; | 339 | if (path->slots[0] < nritems) { |
| 297 | slot = path->slots[0]; | 340 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); |
| 298 | if (slot >= btrfs_header_nritems(leaf)) { | 341 | } else { |
| 299 | ret = btrfs_next_leaf(fs_info->extent_root, path); | 342 | ret = find_next_key(path, 0, &key); |
| 300 | if (ret < 0) | 343 | if (ret) |
| 301 | goto err; | ||
| 302 | else if (ret) | ||
| 303 | break; | 344 | break; |
| 304 | 345 | ||
| 305 | if (need_resched() || | 346 | caching_ctl->progress = last; |
| 306 | btrfs_transaction_in_commit(fs_info)) { | 347 | btrfs_release_path(extent_root, path); |
| 307 | leaf = path->nodes[0]; | 348 | up_read(&fs_info->extent_commit_sem); |
| 308 | 349 | mutex_unlock(&caching_ctl->mutex); | |
| 309 | /* this shouldn't happen, but if the | 350 | if (btrfs_transaction_in_commit(fs_info)) |
| 310 | * leaf is empty just move on. | ||
| 311 | */ | ||
| 312 | if (btrfs_header_nritems(leaf) == 0) | ||
| 313 | break; | ||
| 314 | /* | ||
| 315 | * we need to copy the key out so that | ||
| 316 | * we are sure the next search advances | ||
| 317 | * us forward in the btree. | ||
| 318 | */ | ||
| 319 | btrfs_item_key_to_cpu(leaf, &key, 0); | ||
| 320 | btrfs_release_path(fs_info->extent_root, path); | ||
| 321 | up_read(&fs_info->extent_commit_sem); | ||
| 322 | schedule_timeout(1); | 351 | schedule_timeout(1); |
| 323 | goto again; | 352 | else |
| 324 | } | 353 | cond_resched(); |
| 354 | goto again; | ||
| 355 | } | ||
| 325 | 356 | ||
| 357 | if (key.objectid < block_group->key.objectid) { | ||
| 358 | path->slots[0]++; | ||
| 326 | continue; | 359 | continue; |
| 327 | } | 360 | } |
| 328 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
| 329 | if (key.objectid < block_group->key.objectid) | ||
| 330 | goto next; | ||
| 331 | 361 | ||
| 332 | if (key.objectid >= block_group->key.objectid + | 362 | if (key.objectid >= block_group->key.objectid + |
| 333 | block_group->key.offset) | 363 | block_group->key.offset) |
| 334 | break; | 364 | break; |
| 335 | 365 | ||
| 336 | if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { | 366 | if (key.type == BTRFS_EXTENT_ITEM_KEY) { |
| 337 | total_found += add_new_free_space(block_group, | 367 | total_found += add_new_free_space(block_group, |
| 338 | fs_info, last, | 368 | fs_info, last, |
| 339 | key.objectid); | 369 | key.objectid); |
| 340 | last = key.objectid + key.offset; | 370 | last = key.objectid + key.offset; |
| 341 | } | ||
| 342 | 371 | ||
| 343 | if (total_found > (1024 * 1024 * 2)) { | 372 | if (total_found > (1024 * 1024 * 2)) { |
| 344 | total_found = 0; | 373 | total_found = 0; |
| 345 | wake_up(&block_group->caching_q); | 374 | wake_up(&caching_ctl->wait); |
| 375 | } | ||
| 346 | } | 376 | } |
| 347 | next: | ||
| 348 | path->slots[0]++; | 377 | path->slots[0]++; |
| 349 | } | 378 | } |
| 350 | ret = 0; | 379 | ret = 0; |
| @@ -352,33 +381,65 @@ next: | |||
| 352 | total_found += add_new_free_space(block_group, fs_info, last, | 381 | total_found += add_new_free_space(block_group, fs_info, last, |
| 353 | block_group->key.objectid + | 382 | block_group->key.objectid + |
| 354 | block_group->key.offset); | 383 | block_group->key.offset); |
| 384 | caching_ctl->progress = (u64)-1; | ||
| 355 | 385 | ||
| 356 | spin_lock(&block_group->lock); | 386 | spin_lock(&block_group->lock); |
| 387 | block_group->caching_ctl = NULL; | ||
| 357 | block_group->cached = BTRFS_CACHE_FINISHED; | 388 | block_group->cached = BTRFS_CACHE_FINISHED; |
| 358 | spin_unlock(&block_group->lock); | 389 | spin_unlock(&block_group->lock); |
| 359 | 390 | ||
| 360 | err: | 391 | err: |
| 361 | btrfs_free_path(path); | 392 | btrfs_free_path(path); |
| 362 | up_read(&fs_info->extent_commit_sem); | 393 | up_read(&fs_info->extent_commit_sem); |
| 363 | atomic_dec(&block_group->space_info->caching_threads); | ||
| 364 | wake_up(&block_group->caching_q); | ||
| 365 | 394 | ||
| 395 | free_excluded_extents(extent_root, block_group); | ||
| 396 | |||
| 397 | mutex_unlock(&caching_ctl->mutex); | ||
| 398 | wake_up(&caching_ctl->wait); | ||
| 399 | |||
| 400 | put_caching_control(caching_ctl); | ||
| 401 | atomic_dec(&block_group->space_info->caching_threads); | ||
| 366 | return 0; | 402 | return 0; |
| 367 | } | 403 | } |
| 368 | 404 | ||
| 369 | static int cache_block_group(struct btrfs_block_group_cache *cache) | 405 | static int cache_block_group(struct btrfs_block_group_cache *cache) |
| 370 | { | 406 | { |
| 407 | struct btrfs_fs_info *fs_info = cache->fs_info; | ||
| 408 | struct btrfs_caching_control *caching_ctl; | ||
| 371 | struct task_struct *tsk; | 409 | struct task_struct *tsk; |
| 372 | int ret = 0; | 410 | int ret = 0; |
| 373 | 411 | ||
| 412 | smp_mb(); | ||
| 413 | if (cache->cached != BTRFS_CACHE_NO) | ||
| 414 | return 0; | ||
| 415 | |||
| 416 | caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); | ||
| 417 | BUG_ON(!caching_ctl); | ||
| 418 | |||
| 419 | INIT_LIST_HEAD(&caching_ctl->list); | ||
| 420 | mutex_init(&caching_ctl->mutex); | ||
| 421 | init_waitqueue_head(&caching_ctl->wait); | ||
| 422 | caching_ctl->block_group = cache; | ||
| 423 | caching_ctl->progress = cache->key.objectid; | ||
| 424 | /* one for caching kthread, one for caching block group list */ | ||
| 425 | atomic_set(&caching_ctl->count, 2); | ||
| 426 | |||
| 374 | spin_lock(&cache->lock); | 427 | spin_lock(&cache->lock); |
| 375 | if (cache->cached != BTRFS_CACHE_NO) { | 428 | if (cache->cached != BTRFS_CACHE_NO) { |
| 376 | spin_unlock(&cache->lock); | 429 | spin_unlock(&cache->lock); |
| 377 | return ret; | 430 | kfree(caching_ctl); |
| 431 | return 0; | ||
| 378 | } | 432 | } |
| 433 | cache->caching_ctl = caching_ctl; | ||
| 379 | cache->cached = BTRFS_CACHE_STARTED; | 434 | cache->cached = BTRFS_CACHE_STARTED; |
| 380 | spin_unlock(&cache->lock); | 435 | spin_unlock(&cache->lock); |
| 381 | 436 | ||
| 437 | down_write(&fs_info->extent_commit_sem); | ||
| 438 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); | ||
| 439 | up_write(&fs_info->extent_commit_sem); | ||
| 440 | |||
| 441 | atomic_inc(&cache->space_info->caching_threads); | ||
| 442 | |||
| 382 | tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", | 443 | tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", |
| 383 | cache->key.objectid); | 444 | cache->key.objectid); |
| 384 | if (IS_ERR(tsk)) { | 445 | if (IS_ERR(tsk)) { |
| @@ -1511,7 +1572,8 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
| 1511 | static void btrfs_issue_discard(struct block_device *bdev, | 1572 | static void btrfs_issue_discard(struct block_device *bdev, |
| 1512 | u64 start, u64 len) | 1573 | u64 start, u64 len) |
| 1513 | { | 1574 | { |
| 1514 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); | 1575 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, |
| 1576 | DISCARD_FL_BARRIER); | ||
| 1515 | } | 1577 | } |
| 1516 | #endif | 1578 | #endif |
| 1517 | 1579 | ||
| @@ -1656,7 +1718,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans, | |||
| 1656 | parent, ref_root, flags, | 1718 | parent, ref_root, flags, |
| 1657 | ref->objectid, ref->offset, | 1719 | ref->objectid, ref->offset, |
| 1658 | &ins, node->ref_mod); | 1720 | &ins, node->ref_mod); |
| 1659 | update_reserved_extents(root, ins.objectid, ins.offset, 0); | ||
| 1660 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { | 1721 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { |
| 1661 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, | 1722 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, |
| 1662 | node->num_bytes, parent, | 1723 | node->num_bytes, parent, |
| @@ -1782,7 +1843,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
| 1782 | extent_op->flags_to_set, | 1843 | extent_op->flags_to_set, |
| 1783 | &extent_op->key, | 1844 | &extent_op->key, |
| 1784 | ref->level, &ins); | 1845 | ref->level, &ins); |
| 1785 | update_reserved_extents(root, ins.objectid, ins.offset, 0); | ||
| 1786 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { | 1846 | } else if (node->action == BTRFS_ADD_DELAYED_REF) { |
| 1787 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, | 1847 | ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, |
| 1788 | node->num_bytes, parent, ref_root, | 1848 | node->num_bytes, parent, ref_root, |
| @@ -1817,16 +1877,32 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | |||
| 1817 | BUG_ON(extent_op); | 1877 | BUG_ON(extent_op); |
| 1818 | head = btrfs_delayed_node_to_head(node); | 1878 | head = btrfs_delayed_node_to_head(node); |
| 1819 | if (insert_reserved) { | 1879 | if (insert_reserved) { |
| 1880 | int mark_free = 0; | ||
| 1881 | struct extent_buffer *must_clean = NULL; | ||
| 1882 | |||
| 1883 | ret = pin_down_bytes(trans, root, NULL, | ||
| 1884 | node->bytenr, node->num_bytes, | ||
| 1885 | head->is_data, 1, &must_clean); | ||
| 1886 | if (ret > 0) | ||
| 1887 | mark_free = 1; | ||
| 1888 | |||
| 1889 | if (must_clean) { | ||
| 1890 | clean_tree_block(NULL, root, must_clean); | ||
| 1891 | btrfs_tree_unlock(must_clean); | ||
| 1892 | free_extent_buffer(must_clean); | ||
| 1893 | } | ||
| 1820 | if (head->is_data) { | 1894 | if (head->is_data) { |
| 1821 | ret = btrfs_del_csums(trans, root, | 1895 | ret = btrfs_del_csums(trans, root, |
| 1822 | node->bytenr, | 1896 | node->bytenr, |
| 1823 | node->num_bytes); | 1897 | node->num_bytes); |
| 1824 | BUG_ON(ret); | 1898 | BUG_ON(ret); |
| 1825 | } | 1899 | } |
| 1826 | btrfs_update_pinned_extents(root, node->bytenr, | 1900 | if (mark_free) { |
| 1827 | node->num_bytes, 1); | 1901 | ret = btrfs_free_reserved_extent(root, |
| 1828 | update_reserved_extents(root, node->bytenr, | 1902 | node->bytenr, |
| 1829 | node->num_bytes, 0); | 1903 | node->num_bytes); |
| 1904 | BUG_ON(ret); | ||
| 1905 | } | ||
| 1830 | } | 1906 | } |
| 1831 | mutex_unlock(&head->mutex); | 1907 | mutex_unlock(&head->mutex); |
| 1832 | return 0; | 1908 | return 0; |
| @@ -2691,60 +2767,448 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) | |||
| 2691 | alloc_target); | 2767 | alloc_target); |
| 2692 | } | 2768 | } |
| 2693 | 2769 | ||
| 2770 | static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) | ||
| 2771 | { | ||
| 2772 | u64 num_bytes; | ||
| 2773 | int level; | ||
| 2774 | |||
| 2775 | level = BTRFS_MAX_LEVEL - 2; | ||
| 2776 | /* | ||
| 2777 | * NOTE: these calculations are absolutely the worst possible case. | ||
| 2778 | * This assumes that _every_ item we insert will require a new leaf, and | ||
| 2779 | * that the tree has grown to its maximum level size. | ||
| 2780 | */ | ||
| 2781 | |||
| 2782 | /* | ||
| 2783 | * for every item we insert we could insert both an extent item and a | ||
| 2784 | * extent ref item. Then for ever item we insert, we will need to cow | ||
| 2785 | * both the original leaf, plus the leaf to the left and right of it. | ||
| 2786 | * | ||
| 2787 | * Unless we are talking about the extent root, then we just want the | ||
| 2788 | * number of items * 2, since we just need the extent item plus its ref. | ||
| 2789 | */ | ||
| 2790 | if (root == root->fs_info->extent_root) | ||
| 2791 | num_bytes = num_items * 2; | ||
| 2792 | else | ||
| 2793 | num_bytes = (num_items + (2 * num_items)) * 3; | ||
| 2794 | |||
| 2795 | /* | ||
| 2796 | * num_bytes is total number of leaves we could need times the leaf | ||
| 2797 | * size, and then for every leaf we could end up cow'ing 2 nodes per | ||
| 2798 | * level, down to the leaf level. | ||
| 2799 | */ | ||
| 2800 | num_bytes = (num_bytes * root->leafsize) + | ||
| 2801 | (num_bytes * (level * 2)) * root->nodesize; | ||
| 2802 | |||
| 2803 | return num_bytes; | ||
| 2804 | } | ||
| 2805 | |||
| 2694 | /* | 2806 | /* |
| 2695 | * for now this just makes sure we have at least 5% of our metadata space free | 2807 | * Unreserve metadata space for delalloc. If we have less reserved credits than |
| 2696 | * for use. | 2808 | * we have extents, this function does nothing. |
| 2697 | */ | 2809 | */ |
| 2698 | int btrfs_check_metadata_free_space(struct btrfs_root *root) | 2810 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, |
| 2811 | struct inode *inode, int num_items) | ||
| 2699 | { | 2812 | { |
| 2700 | struct btrfs_fs_info *info = root->fs_info; | 2813 | struct btrfs_fs_info *info = root->fs_info; |
| 2701 | struct btrfs_space_info *meta_sinfo; | 2814 | struct btrfs_space_info *meta_sinfo; |
| 2702 | u64 alloc_target, thresh; | 2815 | u64 num_bytes; |
| 2703 | int committed = 0, ret; | 2816 | u64 alloc_target; |
| 2817 | bool bug = false; | ||
| 2704 | 2818 | ||
| 2705 | /* get the space info for where the metadata will live */ | 2819 | /* get the space info for where the metadata will live */ |
| 2706 | alloc_target = btrfs_get_alloc_profile(root, 0); | 2820 | alloc_target = btrfs_get_alloc_profile(root, 0); |
| 2707 | meta_sinfo = __find_space_info(info, alloc_target); | 2821 | meta_sinfo = __find_space_info(info, alloc_target); |
| 2708 | 2822 | ||
| 2709 | again: | 2823 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, |
| 2824 | num_items); | ||
| 2825 | |||
| 2710 | spin_lock(&meta_sinfo->lock); | 2826 | spin_lock(&meta_sinfo->lock); |
| 2711 | if (!meta_sinfo->full) | 2827 | spin_lock(&BTRFS_I(inode)->accounting_lock); |
| 2712 | thresh = meta_sinfo->total_bytes * 80; | 2828 | if (BTRFS_I(inode)->reserved_extents <= |
| 2713 | else | 2829 | BTRFS_I(inode)->outstanding_extents) { |
| 2714 | thresh = meta_sinfo->total_bytes * 95; | 2830 | spin_unlock(&BTRFS_I(inode)->accounting_lock); |
| 2831 | spin_unlock(&meta_sinfo->lock); | ||
| 2832 | return 0; | ||
| 2833 | } | ||
| 2834 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 2835 | |||
| 2836 | BTRFS_I(inode)->reserved_extents--; | ||
| 2837 | BUG_ON(BTRFS_I(inode)->reserved_extents < 0); | ||
| 2838 | |||
| 2839 | if (meta_sinfo->bytes_delalloc < num_bytes) { | ||
| 2840 | bug = true; | ||
| 2841 | meta_sinfo->bytes_delalloc = 0; | ||
| 2842 | } else { | ||
| 2843 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
| 2844 | } | ||
| 2845 | spin_unlock(&meta_sinfo->lock); | ||
| 2846 | |||
| 2847 | BUG_ON(bug); | ||
| 2848 | |||
| 2849 | return 0; | ||
| 2850 | } | ||
| 2851 | |||
| 2852 | static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) | ||
| 2853 | { | ||
| 2854 | u64 thresh; | ||
| 2855 | |||
| 2856 | thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
| 2857 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
| 2858 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
| 2859 | meta_sinfo->bytes_may_use; | ||
| 2715 | 2860 | ||
| 2861 | thresh = meta_sinfo->total_bytes - thresh; | ||
| 2862 | thresh *= 80; | ||
| 2716 | do_div(thresh, 100); | 2863 | do_div(thresh, 100); |
| 2864 | if (thresh <= meta_sinfo->bytes_delalloc) | ||
| 2865 | meta_sinfo->force_delalloc = 1; | ||
| 2866 | else | ||
| 2867 | meta_sinfo->force_delalloc = 0; | ||
| 2868 | } | ||
| 2717 | 2869 | ||
| 2718 | if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | 2870 | struct async_flush { |
| 2719 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { | 2871 | struct btrfs_root *root; |
| 2720 | struct btrfs_trans_handle *trans; | 2872 | struct btrfs_space_info *info; |
| 2721 | if (!meta_sinfo->full) { | 2873 | struct btrfs_work work; |
| 2722 | meta_sinfo->force_alloc = 1; | 2874 | }; |
| 2723 | spin_unlock(&meta_sinfo->lock); | ||
| 2724 | 2875 | ||
| 2725 | trans = btrfs_start_transaction(root, 1); | 2876 | static noinline void flush_delalloc_async(struct btrfs_work *work) |
| 2726 | if (!trans) | 2877 | { |
| 2727 | return -ENOMEM; | 2878 | struct async_flush *async; |
| 2879 | struct btrfs_root *root; | ||
| 2880 | struct btrfs_space_info *info; | ||
| 2728 | 2881 | ||
| 2729 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 2882 | async = container_of(work, struct async_flush, work); |
| 2730 | 2 * 1024 * 1024, alloc_target, 0); | 2883 | root = async->root; |
| 2731 | btrfs_end_transaction(trans, root); | 2884 | info = async->info; |
| 2885 | |||
| 2886 | btrfs_start_delalloc_inodes(root); | ||
| 2887 | wake_up(&info->flush_wait); | ||
| 2888 | btrfs_wait_ordered_extents(root, 0); | ||
| 2889 | |||
| 2890 | spin_lock(&info->lock); | ||
| 2891 | info->flushing = 0; | ||
| 2892 | spin_unlock(&info->lock); | ||
| 2893 | wake_up(&info->flush_wait); | ||
| 2894 | |||
| 2895 | kfree(async); | ||
| 2896 | } | ||
| 2897 | |||
| 2898 | static void wait_on_flush(struct btrfs_space_info *info) | ||
| 2899 | { | ||
| 2900 | DEFINE_WAIT(wait); | ||
| 2901 | u64 used; | ||
| 2902 | |||
| 2903 | while (1) { | ||
| 2904 | prepare_to_wait(&info->flush_wait, &wait, | ||
| 2905 | TASK_UNINTERRUPTIBLE); | ||
| 2906 | spin_lock(&info->lock); | ||
| 2907 | if (!info->flushing) { | ||
| 2908 | spin_unlock(&info->lock); | ||
| 2909 | break; | ||
| 2910 | } | ||
| 2911 | |||
| 2912 | used = info->bytes_used + info->bytes_reserved + | ||
| 2913 | info->bytes_pinned + info->bytes_readonly + | ||
| 2914 | info->bytes_super + info->bytes_root + | ||
| 2915 | info->bytes_may_use + info->bytes_delalloc; | ||
| 2916 | if (used < info->total_bytes) { | ||
| 2917 | spin_unlock(&info->lock); | ||
| 2918 | break; | ||
| 2919 | } | ||
| 2920 | spin_unlock(&info->lock); | ||
| 2921 | schedule(); | ||
| 2922 | } | ||
| 2923 | finish_wait(&info->flush_wait, &wait); | ||
| 2924 | } | ||
| 2925 | |||
| 2926 | static void flush_delalloc(struct btrfs_root *root, | ||
| 2927 | struct btrfs_space_info *info) | ||
| 2928 | { | ||
| 2929 | struct async_flush *async; | ||
| 2930 | bool wait = false; | ||
| 2931 | |||
| 2932 | spin_lock(&info->lock); | ||
| 2933 | |||
| 2934 | if (!info->flushing) { | ||
| 2935 | info->flushing = 1; | ||
| 2936 | init_waitqueue_head(&info->flush_wait); | ||
| 2937 | } else { | ||
| 2938 | wait = true; | ||
| 2939 | } | ||
| 2940 | |||
| 2941 | spin_unlock(&info->lock); | ||
| 2942 | |||
| 2943 | if (wait) { | ||
| 2944 | wait_on_flush(info); | ||
| 2945 | return; | ||
| 2946 | } | ||
| 2947 | |||
| 2948 | async = kzalloc(sizeof(*async), GFP_NOFS); | ||
| 2949 | if (!async) | ||
| 2950 | goto flush; | ||
| 2951 | |||
| 2952 | async->root = root; | ||
| 2953 | async->info = info; | ||
| 2954 | async->work.func = flush_delalloc_async; | ||
| 2955 | |||
| 2956 | btrfs_queue_worker(&root->fs_info->enospc_workers, | ||
| 2957 | &async->work); | ||
| 2958 | wait_on_flush(info); | ||
| 2959 | return; | ||
| 2960 | |||
| 2961 | flush: | ||
| 2962 | btrfs_start_delalloc_inodes(root); | ||
| 2963 | btrfs_wait_ordered_extents(root, 0); | ||
| 2964 | |||
| 2965 | spin_lock(&info->lock); | ||
| 2966 | info->flushing = 0; | ||
| 2967 | spin_unlock(&info->lock); | ||
| 2968 | wake_up(&info->flush_wait); | ||
| 2969 | } | ||
| 2970 | |||
| 2971 | static int maybe_allocate_chunk(struct btrfs_root *root, | ||
| 2972 | struct btrfs_space_info *info) | ||
| 2973 | { | ||
| 2974 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; | ||
| 2975 | struct btrfs_trans_handle *trans; | ||
| 2976 | bool wait = false; | ||
| 2977 | int ret = 0; | ||
| 2978 | u64 min_metadata; | ||
| 2979 | u64 free_space; | ||
| 2980 | |||
| 2981 | free_space = btrfs_super_total_bytes(disk_super); | ||
| 2982 | /* | ||
| 2983 | * we allow the metadata to grow to a max of either 5gb or 5% of the | ||
| 2984 | * space in the volume. | ||
| 2985 | */ | ||
| 2986 | min_metadata = min((u64)5 * 1024 * 1024 * 1024, | ||
| 2987 | div64_u64(free_space * 5, 100)); | ||
| 2988 | if (info->total_bytes >= min_metadata) { | ||
| 2989 | spin_unlock(&info->lock); | ||
| 2990 | return 0; | ||
| 2991 | } | ||
| 2992 | |||
| 2993 | if (info->full) { | ||
| 2994 | spin_unlock(&info->lock); | ||
| 2995 | return 0; | ||
| 2996 | } | ||
| 2997 | |||
| 2998 | if (!info->allocating_chunk) { | ||
| 2999 | info->force_alloc = 1; | ||
| 3000 | info->allocating_chunk = 1; | ||
| 3001 | init_waitqueue_head(&info->allocate_wait); | ||
| 3002 | } else { | ||
| 3003 | wait = true; | ||
| 3004 | } | ||
| 3005 | |||
| 3006 | spin_unlock(&info->lock); | ||
| 3007 | |||
| 3008 | if (wait) { | ||
| 3009 | wait_event(info->allocate_wait, | ||
| 3010 | !info->allocating_chunk); | ||
| 3011 | return 1; | ||
| 3012 | } | ||
| 3013 | |||
| 3014 | trans = btrfs_start_transaction(root, 1); | ||
| 3015 | if (!trans) { | ||
| 3016 | ret = -ENOMEM; | ||
| 3017 | goto out; | ||
| 3018 | } | ||
| 3019 | |||
| 3020 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 3021 | 4096 + 2 * 1024 * 1024, | ||
| 3022 | info->flags, 0); | ||
| 3023 | btrfs_end_transaction(trans, root); | ||
| 3024 | if (ret) | ||
| 3025 | goto out; | ||
| 3026 | out: | ||
| 3027 | spin_lock(&info->lock); | ||
| 3028 | info->allocating_chunk = 0; | ||
| 3029 | spin_unlock(&info->lock); | ||
| 3030 | wake_up(&info->allocate_wait); | ||
| 3031 | |||
| 3032 | if (ret) | ||
| 3033 | return 0; | ||
| 3034 | return 1; | ||
| 3035 | } | ||
| 3036 | |||
| 3037 | /* | ||
| 3038 | * Reserve metadata space for delalloc. | ||
| 3039 | */ | ||
| 3040 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
| 3041 | struct inode *inode, int num_items) | ||
| 3042 | { | ||
| 3043 | struct btrfs_fs_info *info = root->fs_info; | ||
| 3044 | struct btrfs_space_info *meta_sinfo; | ||
| 3045 | u64 num_bytes; | ||
| 3046 | u64 used; | ||
| 3047 | u64 alloc_target; | ||
| 3048 | int flushed = 0; | ||
| 3049 | int force_delalloc; | ||
| 3050 | |||
| 3051 | /* get the space info for where the metadata will live */ | ||
| 3052 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 3053 | meta_sinfo = __find_space_info(info, alloc_target); | ||
| 3054 | |||
| 3055 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | ||
| 3056 | num_items); | ||
| 3057 | again: | ||
| 3058 | spin_lock(&meta_sinfo->lock); | ||
| 3059 | |||
| 3060 | force_delalloc = meta_sinfo->force_delalloc; | ||
| 3061 | |||
| 3062 | if (unlikely(!meta_sinfo->bytes_root)) | ||
| 3063 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
| 3064 | |||
| 3065 | if (!flushed) | ||
| 3066 | meta_sinfo->bytes_delalloc += num_bytes; | ||
| 3067 | |||
| 3068 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
| 3069 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
| 3070 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
| 3071 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
| 3072 | |||
| 3073 | if (used > meta_sinfo->total_bytes) { | ||
| 3074 | flushed++; | ||
| 3075 | |||
| 3076 | if (flushed == 1) { | ||
| 3077 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
| 3078 | goto again; | ||
| 3079 | flushed++; | ||
| 3080 | } else { | ||
| 3081 | spin_unlock(&meta_sinfo->lock); | ||
| 3082 | } | ||
| 3083 | |||
| 3084 | if (flushed == 2) { | ||
| 3085 | filemap_flush(inode->i_mapping); | ||
| 3086 | goto again; | ||
| 3087 | } else if (flushed == 3) { | ||
| 3088 | flush_delalloc(root, meta_sinfo); | ||
| 2732 | goto again; | 3089 | goto again; |
| 2733 | } | 3090 | } |
| 3091 | spin_lock(&meta_sinfo->lock); | ||
| 3092 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
| 2734 | spin_unlock(&meta_sinfo->lock); | 3093 | spin_unlock(&meta_sinfo->lock); |
| 3094 | printk(KERN_ERR "enospc, has %d, reserved %d\n", | ||
| 3095 | BTRFS_I(inode)->outstanding_extents, | ||
| 3096 | BTRFS_I(inode)->reserved_extents); | ||
| 3097 | dump_space_info(meta_sinfo, 0, 0); | ||
| 3098 | return -ENOSPC; | ||
| 3099 | } | ||
| 2735 | 3100 | ||
| 2736 | if (!committed) { | 3101 | BTRFS_I(inode)->reserved_extents++; |
| 2737 | committed = 1; | 3102 | check_force_delalloc(meta_sinfo); |
| 2738 | trans = btrfs_join_transaction(root, 1); | 3103 | spin_unlock(&meta_sinfo->lock); |
| 2739 | if (!trans) | 3104 | |
| 2740 | return -ENOMEM; | 3105 | if (!flushed && force_delalloc) |
| 2741 | ret = btrfs_commit_transaction(trans, root); | 3106 | filemap_flush(inode->i_mapping); |
| 2742 | if (ret) | 3107 | |
| 2743 | return ret; | 3108 | return 0; |
| 3109 | } | ||
| 3110 | |||
| 3111 | /* | ||
| 3112 | * unreserve num_items number of items worth of metadata space. This needs to | ||
| 3113 | * be paired with btrfs_reserve_metadata_space. | ||
| 3114 | * | ||
| 3115 | * NOTE: if you have the option, run this _AFTER_ you do a | ||
| 3116 | * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref | ||
| 3117 | * oprations which will result in more used metadata, so we want to make sure we | ||
| 3118 | * can do that without issue. | ||
| 3119 | */ | ||
| 3120 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) | ||
| 3121 | { | ||
| 3122 | struct btrfs_fs_info *info = root->fs_info; | ||
| 3123 | struct btrfs_space_info *meta_sinfo; | ||
| 3124 | u64 num_bytes; | ||
| 3125 | u64 alloc_target; | ||
| 3126 | bool bug = false; | ||
| 3127 | |||
| 3128 | /* get the space info for where the metadata will live */ | ||
| 3129 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 3130 | meta_sinfo = __find_space_info(info, alloc_target); | ||
| 3131 | |||
| 3132 | num_bytes = calculate_bytes_needed(root, num_items); | ||
| 3133 | |||
| 3134 | spin_lock(&meta_sinfo->lock); | ||
| 3135 | if (meta_sinfo->bytes_may_use < num_bytes) { | ||
| 3136 | bug = true; | ||
| 3137 | meta_sinfo->bytes_may_use = 0; | ||
| 3138 | } else { | ||
| 3139 | meta_sinfo->bytes_may_use -= num_bytes; | ||
| 3140 | } | ||
| 3141 | spin_unlock(&meta_sinfo->lock); | ||
| 3142 | |||
| 3143 | BUG_ON(bug); | ||
| 3144 | |||
| 3145 | return 0; | ||
| 3146 | } | ||
| 3147 | |||
| 3148 | /* | ||
| 3149 | * Reserve some metadata space for use. We'll calculate the worste case number | ||
| 3150 | * of bytes that would be needed to modify num_items number of items. If we | ||
| 3151 | * have space, fantastic, if not, you get -ENOSPC. Please call | ||
| 3152 | * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of | ||
| 3153 | * items you reserved, since whatever metadata you needed should have already | ||
| 3154 | * been allocated. | ||
| 3155 | * | ||
| 3156 | * This will commit the transaction to make more space if we don't have enough | ||
| 3157 | * metadata space. THe only time we don't do this is if we're reserving space | ||
| 3158 | * inside of a transaction, then we will just return -ENOSPC and it is the | ||
| 3159 | * callers responsibility to handle it properly. | ||
| 3160 | */ | ||
| 3161 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) | ||
| 3162 | { | ||
| 3163 | struct btrfs_fs_info *info = root->fs_info; | ||
| 3164 | struct btrfs_space_info *meta_sinfo; | ||
| 3165 | u64 num_bytes; | ||
| 3166 | u64 used; | ||
| 3167 | u64 alloc_target; | ||
| 3168 | int retries = 0; | ||
| 3169 | |||
| 3170 | /* get the space info for where the metadata will live */ | ||
| 3171 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 3172 | meta_sinfo = __find_space_info(info, alloc_target); | ||
| 3173 | |||
| 3174 | num_bytes = calculate_bytes_needed(root, num_items); | ||
| 3175 | again: | ||
| 3176 | spin_lock(&meta_sinfo->lock); | ||
| 3177 | |||
| 3178 | if (unlikely(!meta_sinfo->bytes_root)) | ||
| 3179 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
| 3180 | |||
| 3181 | if (!retries) | ||
| 3182 | meta_sinfo->bytes_may_use += num_bytes; | ||
| 3183 | |||
| 3184 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
| 3185 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
| 3186 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
| 3187 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
| 3188 | |||
| 3189 | if (used > meta_sinfo->total_bytes) { | ||
| 3190 | retries++; | ||
| 3191 | if (retries == 1) { | ||
| 3192 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
| 3193 | goto again; | ||
| 3194 | retries++; | ||
| 3195 | } else { | ||
| 3196 | spin_unlock(&meta_sinfo->lock); | ||
| 3197 | } | ||
| 3198 | |||
| 3199 | if (retries == 2) { | ||
| 3200 | flush_delalloc(root, meta_sinfo); | ||
| 2744 | goto again; | 3201 | goto again; |
| 2745 | } | 3202 | } |
| 3203 | spin_lock(&meta_sinfo->lock); | ||
| 3204 | meta_sinfo->bytes_may_use -= num_bytes; | ||
| 3205 | spin_unlock(&meta_sinfo->lock); | ||
| 3206 | |||
| 3207 | dump_space_info(meta_sinfo, 0, 0); | ||
| 2746 | return -ENOSPC; | 3208 | return -ENOSPC; |
| 2747 | } | 3209 | } |
| 3210 | |||
| 3211 | check_force_delalloc(meta_sinfo); | ||
| 2748 | spin_unlock(&meta_sinfo->lock); | 3212 | spin_unlock(&meta_sinfo->lock); |
| 2749 | 3213 | ||
| 2750 | return 0; | 3214 | return 0; |
| @@ -2764,13 +3228,16 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | |||
| 2764 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 3228 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
| 2765 | 3229 | ||
| 2766 | data_sinfo = BTRFS_I(inode)->space_info; | 3230 | data_sinfo = BTRFS_I(inode)->space_info; |
| 3231 | if (!data_sinfo) | ||
| 3232 | goto alloc; | ||
| 3233 | |||
| 2767 | again: | 3234 | again: |
| 2768 | /* make sure we have enough space to handle the data first */ | 3235 | /* make sure we have enough space to handle the data first */ |
| 2769 | spin_lock(&data_sinfo->lock); | 3236 | spin_lock(&data_sinfo->lock); |
| 2770 | if (data_sinfo->total_bytes - data_sinfo->bytes_used - | 3237 | if (data_sinfo->total_bytes - data_sinfo->bytes_used - |
| 2771 | data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - | 3238 | data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - |
| 2772 | data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - | 3239 | data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - |
| 2773 | data_sinfo->bytes_may_use < bytes) { | 3240 | data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) { |
| 2774 | struct btrfs_trans_handle *trans; | 3241 | struct btrfs_trans_handle *trans; |
| 2775 | 3242 | ||
| 2776 | /* | 3243 | /* |
| @@ -2782,7 +3249,7 @@ again: | |||
| 2782 | 3249 | ||
| 2783 | data_sinfo->force_alloc = 1; | 3250 | data_sinfo->force_alloc = 1; |
| 2784 | spin_unlock(&data_sinfo->lock); | 3251 | spin_unlock(&data_sinfo->lock); |
| 2785 | 3252 | alloc: | |
| 2786 | alloc_target = btrfs_get_alloc_profile(root, 1); | 3253 | alloc_target = btrfs_get_alloc_profile(root, 1); |
| 2787 | trans = btrfs_start_transaction(root, 1); | 3254 | trans = btrfs_start_transaction(root, 1); |
| 2788 | if (!trans) | 3255 | if (!trans) |
| @@ -2794,12 +3261,17 @@ again: | |||
| 2794 | btrfs_end_transaction(trans, root); | 3261 | btrfs_end_transaction(trans, root); |
| 2795 | if (ret) | 3262 | if (ret) |
| 2796 | return ret; | 3263 | return ret; |
| 3264 | |||
| 3265 | if (!data_sinfo) { | ||
| 3266 | btrfs_set_inode_space_info(root, inode); | ||
| 3267 | data_sinfo = BTRFS_I(inode)->space_info; | ||
| 3268 | } | ||
| 2797 | goto again; | 3269 | goto again; |
| 2798 | } | 3270 | } |
| 2799 | spin_unlock(&data_sinfo->lock); | 3271 | spin_unlock(&data_sinfo->lock); |
| 2800 | 3272 | ||
| 2801 | /* commit the current transaction and try again */ | 3273 | /* commit the current transaction and try again */ |
| 2802 | if (!committed) { | 3274 | if (!committed && !root->fs_info->open_ioctl_trans) { |
| 2803 | committed = 1; | 3275 | committed = 1; |
| 2804 | trans = btrfs_join_transaction(root, 1); | 3276 | trans = btrfs_join_transaction(root, 1); |
| 2805 | if (!trans) | 3277 | if (!trans) |
| @@ -2827,7 +3299,7 @@ again: | |||
| 2827 | BTRFS_I(inode)->reserved_bytes += bytes; | 3299 | BTRFS_I(inode)->reserved_bytes += bytes; |
| 2828 | spin_unlock(&data_sinfo->lock); | 3300 | spin_unlock(&data_sinfo->lock); |
| 2829 | 3301 | ||
| 2830 | return btrfs_check_metadata_free_space(root); | 3302 | return 0; |
| 2831 | } | 3303 | } |
| 2832 | 3304 | ||
| 2833 | /* | 3305 | /* |
| @@ -2926,17 +3398,15 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 2926 | BUG_ON(!space_info); | 3398 | BUG_ON(!space_info); |
| 2927 | 3399 | ||
| 2928 | spin_lock(&space_info->lock); | 3400 | spin_lock(&space_info->lock); |
| 2929 | if (space_info->force_alloc) { | 3401 | if (space_info->force_alloc) |
| 2930 | force = 1; | 3402 | force = 1; |
| 2931 | space_info->force_alloc = 0; | ||
| 2932 | } | ||
| 2933 | if (space_info->full) { | 3403 | if (space_info->full) { |
| 2934 | spin_unlock(&space_info->lock); | 3404 | spin_unlock(&space_info->lock); |
| 2935 | goto out; | 3405 | goto out; |
| 2936 | } | 3406 | } |
| 2937 | 3407 | ||
| 2938 | thresh = space_info->total_bytes - space_info->bytes_readonly; | 3408 | thresh = space_info->total_bytes - space_info->bytes_readonly; |
| 2939 | thresh = div_factor(thresh, 6); | 3409 | thresh = div_factor(thresh, 8); |
| 2940 | if (!force && | 3410 | if (!force && |
| 2941 | (space_info->bytes_used + space_info->bytes_pinned + | 3411 | (space_info->bytes_used + space_info->bytes_pinned + |
| 2942 | space_info->bytes_reserved + alloc_bytes) < thresh) { | 3412 | space_info->bytes_reserved + alloc_bytes) < thresh) { |
| @@ -2950,7 +3420,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 2950 | * we keep a reasonable number of metadata chunks allocated in the | 3420 | * we keep a reasonable number of metadata chunks allocated in the |
| 2951 | * FS as well. | 3421 | * FS as well. |
| 2952 | */ | 3422 | */ |
| 2953 | if (flags & BTRFS_BLOCK_GROUP_DATA) { | 3423 | if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { |
| 2954 | fs_info->data_chunk_allocations++; | 3424 | fs_info->data_chunk_allocations++; |
| 2955 | if (!(fs_info->data_chunk_allocations % | 3425 | if (!(fs_info->data_chunk_allocations % |
| 2956 | fs_info->metadata_ratio)) | 3426 | fs_info->metadata_ratio)) |
| @@ -2958,8 +3428,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 2958 | } | 3428 | } |
| 2959 | 3429 | ||
| 2960 | ret = btrfs_alloc_chunk(trans, extent_root, flags); | 3430 | ret = btrfs_alloc_chunk(trans, extent_root, flags); |
| 3431 | spin_lock(&space_info->lock); | ||
| 2961 | if (ret) | 3432 | if (ret) |
| 2962 | space_info->full = 1; | 3433 | space_info->full = 1; |
| 3434 | space_info->force_alloc = 0; | ||
| 3435 | spin_unlock(&space_info->lock); | ||
| 2963 | out: | 3436 | out: |
| 2964 | mutex_unlock(&extent_root->fs_info->chunk_mutex); | 3437 | mutex_unlock(&extent_root->fs_info->chunk_mutex); |
| 2965 | return ret; | 3438 | return ret; |
| @@ -3008,10 +3481,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
| 3008 | num_bytes = min(total, cache->key.offset - byte_in_group); | 3481 | num_bytes = min(total, cache->key.offset - byte_in_group); |
| 3009 | if (alloc) { | 3482 | if (alloc) { |
| 3010 | old_val += num_bytes; | 3483 | old_val += num_bytes; |
| 3484 | btrfs_set_block_group_used(&cache->item, old_val); | ||
| 3485 | cache->reserved -= num_bytes; | ||
| 3011 | cache->space_info->bytes_used += num_bytes; | 3486 | cache->space_info->bytes_used += num_bytes; |
| 3487 | cache->space_info->bytes_reserved -= num_bytes; | ||
| 3012 | if (cache->ro) | 3488 | if (cache->ro) |
| 3013 | cache->space_info->bytes_readonly -= num_bytes; | 3489 | cache->space_info->bytes_readonly -= num_bytes; |
| 3014 | btrfs_set_block_group_used(&cache->item, old_val); | ||
| 3015 | spin_unlock(&cache->lock); | 3490 | spin_unlock(&cache->lock); |
| 3016 | spin_unlock(&cache->space_info->lock); | 3491 | spin_unlock(&cache->space_info->lock); |
| 3017 | } else { | 3492 | } else { |
| @@ -3056,127 +3531,136 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
| 3056 | return bytenr; | 3531 | return bytenr; |
| 3057 | } | 3532 | } |
| 3058 | 3533 | ||
| 3059 | int btrfs_update_pinned_extents(struct btrfs_root *root, | 3534 | /* |
| 3060 | u64 bytenr, u64 num, int pin) | 3535 | * this function must be called within transaction |
| 3536 | */ | ||
| 3537 | int btrfs_pin_extent(struct btrfs_root *root, | ||
| 3538 | u64 bytenr, u64 num_bytes, int reserved) | ||
| 3061 | { | 3539 | { |
| 3062 | u64 len; | ||
| 3063 | struct btrfs_block_group_cache *cache; | ||
| 3064 | struct btrfs_fs_info *fs_info = root->fs_info; | 3540 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 3541 | struct btrfs_block_group_cache *cache; | ||
| 3065 | 3542 | ||
| 3066 | if (pin) | 3543 | cache = btrfs_lookup_block_group(fs_info, bytenr); |
| 3067 | set_extent_dirty(&fs_info->pinned_extents, | 3544 | BUG_ON(!cache); |
| 3068 | bytenr, bytenr + num - 1, GFP_NOFS); | ||
| 3069 | |||
| 3070 | while (num > 0) { | ||
| 3071 | cache = btrfs_lookup_block_group(fs_info, bytenr); | ||
| 3072 | BUG_ON(!cache); | ||
| 3073 | len = min(num, cache->key.offset - | ||
| 3074 | (bytenr - cache->key.objectid)); | ||
| 3075 | if (pin) { | ||
| 3076 | spin_lock(&cache->space_info->lock); | ||
| 3077 | spin_lock(&cache->lock); | ||
| 3078 | cache->pinned += len; | ||
| 3079 | cache->space_info->bytes_pinned += len; | ||
| 3080 | spin_unlock(&cache->lock); | ||
| 3081 | spin_unlock(&cache->space_info->lock); | ||
| 3082 | fs_info->total_pinned += len; | ||
| 3083 | } else { | ||
| 3084 | int unpin = 0; | ||
| 3085 | 3545 | ||
| 3086 | /* | 3546 | spin_lock(&cache->space_info->lock); |
| 3087 | * in order to not race with the block group caching, we | 3547 | spin_lock(&cache->lock); |
| 3088 | * only want to unpin the extent if we are cached. If | 3548 | cache->pinned += num_bytes; |
| 3089 | * we aren't cached, we want to start async caching this | 3549 | cache->space_info->bytes_pinned += num_bytes; |
| 3090 | * block group so we can free the extent the next time | 3550 | if (reserved) { |
| 3091 | * around. | 3551 | cache->reserved -= num_bytes; |
| 3092 | */ | 3552 | cache->space_info->bytes_reserved -= num_bytes; |
| 3093 | spin_lock(&cache->space_info->lock); | 3553 | } |
| 3094 | spin_lock(&cache->lock); | 3554 | spin_unlock(&cache->lock); |
| 3095 | unpin = (cache->cached == BTRFS_CACHE_FINISHED); | 3555 | spin_unlock(&cache->space_info->lock); |
| 3096 | if (likely(unpin)) { | ||
| 3097 | cache->pinned -= len; | ||
| 3098 | cache->space_info->bytes_pinned -= len; | ||
| 3099 | fs_info->total_pinned -= len; | ||
| 3100 | } | ||
| 3101 | spin_unlock(&cache->lock); | ||
| 3102 | spin_unlock(&cache->space_info->lock); | ||
| 3103 | 3556 | ||
| 3104 | if (likely(unpin)) | 3557 | btrfs_put_block_group(cache); |
| 3105 | clear_extent_dirty(&fs_info->pinned_extents, | ||
| 3106 | bytenr, bytenr + len -1, | ||
| 3107 | GFP_NOFS); | ||
| 3108 | else | ||
| 3109 | cache_block_group(cache); | ||
| 3110 | 3558 | ||
| 3111 | if (unpin) | 3559 | set_extent_dirty(fs_info->pinned_extents, |
| 3112 | btrfs_add_free_space(cache, bytenr, len); | 3560 | bytenr, bytenr + num_bytes - 1, GFP_NOFS); |
| 3113 | } | 3561 | return 0; |
| 3114 | btrfs_put_block_group(cache); | 3562 | } |
| 3115 | bytenr += len; | 3563 | |
| 3116 | num -= len; | 3564 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, |
| 3565 | u64 num_bytes, int reserve) | ||
| 3566 | { | ||
| 3567 | spin_lock(&cache->space_info->lock); | ||
| 3568 | spin_lock(&cache->lock); | ||
| 3569 | if (reserve) { | ||
| 3570 | cache->reserved += num_bytes; | ||
| 3571 | cache->space_info->bytes_reserved += num_bytes; | ||
| 3572 | } else { | ||
| 3573 | cache->reserved -= num_bytes; | ||
| 3574 | cache->space_info->bytes_reserved -= num_bytes; | ||
| 3117 | } | 3575 | } |
| 3576 | spin_unlock(&cache->lock); | ||
| 3577 | spin_unlock(&cache->space_info->lock); | ||
| 3118 | return 0; | 3578 | return 0; |
| 3119 | } | 3579 | } |
| 3120 | 3580 | ||
| 3121 | static int update_reserved_extents(struct btrfs_root *root, | 3581 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
| 3122 | u64 bytenr, u64 num, int reserve) | 3582 | struct btrfs_root *root) |
| 3123 | { | 3583 | { |
| 3124 | u64 len; | ||
| 3125 | struct btrfs_block_group_cache *cache; | ||
| 3126 | struct btrfs_fs_info *fs_info = root->fs_info; | 3584 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 3585 | struct btrfs_caching_control *next; | ||
| 3586 | struct btrfs_caching_control *caching_ctl; | ||
| 3587 | struct btrfs_block_group_cache *cache; | ||
| 3127 | 3588 | ||
| 3128 | while (num > 0) { | 3589 | down_write(&fs_info->extent_commit_sem); |
| 3129 | cache = btrfs_lookup_block_group(fs_info, bytenr); | ||
| 3130 | BUG_ON(!cache); | ||
| 3131 | len = min(num, cache->key.offset - | ||
| 3132 | (bytenr - cache->key.objectid)); | ||
| 3133 | 3590 | ||
| 3134 | spin_lock(&cache->space_info->lock); | 3591 | list_for_each_entry_safe(caching_ctl, next, |
| 3135 | spin_lock(&cache->lock); | 3592 | &fs_info->caching_block_groups, list) { |
| 3136 | if (reserve) { | 3593 | cache = caching_ctl->block_group; |
| 3137 | cache->reserved += len; | 3594 | if (block_group_cache_done(cache)) { |
| 3138 | cache->space_info->bytes_reserved += len; | 3595 | cache->last_byte_to_unpin = (u64)-1; |
| 3596 | list_del_init(&caching_ctl->list); | ||
| 3597 | put_caching_control(caching_ctl); | ||
| 3139 | } else { | 3598 | } else { |
| 3140 | cache->reserved -= len; | 3599 | cache->last_byte_to_unpin = caching_ctl->progress; |
| 3141 | cache->space_info->bytes_reserved -= len; | ||
| 3142 | } | 3600 | } |
| 3143 | spin_unlock(&cache->lock); | ||
| 3144 | spin_unlock(&cache->space_info->lock); | ||
| 3145 | btrfs_put_block_group(cache); | ||
| 3146 | bytenr += len; | ||
| 3147 | num -= len; | ||
| 3148 | } | 3601 | } |
| 3602 | |||
| 3603 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | ||
| 3604 | fs_info->pinned_extents = &fs_info->freed_extents[1]; | ||
| 3605 | else | ||
| 3606 | fs_info->pinned_extents = &fs_info->freed_extents[0]; | ||
| 3607 | |||
| 3608 | up_write(&fs_info->extent_commit_sem); | ||
| 3149 | return 0; | 3609 | return 0; |
| 3150 | } | 3610 | } |
| 3151 | 3611 | ||
| 3152 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) | 3612 | static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) |
| 3153 | { | 3613 | { |
| 3154 | u64 last = 0; | 3614 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 3155 | u64 start; | 3615 | struct btrfs_block_group_cache *cache = NULL; |
| 3156 | u64 end; | 3616 | u64 len; |
| 3157 | struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; | ||
| 3158 | int ret; | ||
| 3159 | 3617 | ||
| 3160 | while (1) { | 3618 | while (start <= end) { |
| 3161 | ret = find_first_extent_bit(pinned_extents, last, | 3619 | if (!cache || |
| 3162 | &start, &end, EXTENT_DIRTY); | 3620 | start >= cache->key.objectid + cache->key.offset) { |
| 3163 | if (ret) | 3621 | if (cache) |
| 3164 | break; | 3622 | btrfs_put_block_group(cache); |
| 3623 | cache = btrfs_lookup_block_group(fs_info, start); | ||
| 3624 | BUG_ON(!cache); | ||
| 3625 | } | ||
| 3626 | |||
| 3627 | len = cache->key.objectid + cache->key.offset - start; | ||
| 3628 | len = min(len, end + 1 - start); | ||
| 3165 | 3629 | ||
| 3166 | set_extent_dirty(copy, start, end, GFP_NOFS); | 3630 | if (start < cache->last_byte_to_unpin) { |
| 3167 | last = end + 1; | 3631 | len = min(len, cache->last_byte_to_unpin - start); |
| 3632 | btrfs_add_free_space(cache, start, len); | ||
| 3633 | } | ||
| 3634 | |||
| 3635 | spin_lock(&cache->space_info->lock); | ||
| 3636 | spin_lock(&cache->lock); | ||
| 3637 | cache->pinned -= len; | ||
| 3638 | cache->space_info->bytes_pinned -= len; | ||
| 3639 | spin_unlock(&cache->lock); | ||
| 3640 | spin_unlock(&cache->space_info->lock); | ||
| 3641 | |||
| 3642 | start += len; | ||
| 3168 | } | 3643 | } |
| 3644 | |||
| 3645 | if (cache) | ||
| 3646 | btrfs_put_block_group(cache); | ||
| 3169 | return 0; | 3647 | return 0; |
| 3170 | } | 3648 | } |
| 3171 | 3649 | ||
| 3172 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | 3650 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, |
| 3173 | struct btrfs_root *root, | 3651 | struct btrfs_root *root) |
| 3174 | struct extent_io_tree *unpin) | ||
| 3175 | { | 3652 | { |
| 3653 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 3654 | struct extent_io_tree *unpin; | ||
| 3176 | u64 start; | 3655 | u64 start; |
| 3177 | u64 end; | 3656 | u64 end; |
| 3178 | int ret; | 3657 | int ret; |
| 3179 | 3658 | ||
| 3659 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | ||
| 3660 | unpin = &fs_info->freed_extents[1]; | ||
| 3661 | else | ||
| 3662 | unpin = &fs_info->freed_extents[0]; | ||
| 3663 | |||
| 3180 | while (1) { | 3664 | while (1) { |
| 3181 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 3665 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
| 3182 | EXTENT_DIRTY); | 3666 | EXTENT_DIRTY); |
| @@ -3185,10 +3669,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
| 3185 | 3669 | ||
| 3186 | ret = btrfs_discard_extent(root, start, end + 1 - start); | 3670 | ret = btrfs_discard_extent(root, start, end + 1 - start); |
| 3187 | 3671 | ||
| 3188 | /* unlocks the pinned mutex */ | ||
| 3189 | btrfs_update_pinned_extents(root, start, end + 1 - start, 0); | ||
| 3190 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | 3672 | clear_extent_dirty(unpin, start, end, GFP_NOFS); |
| 3191 | 3673 | unpin_extent_range(root, start, end); | |
| 3192 | cond_resched(); | 3674 | cond_resched(); |
| 3193 | } | 3675 | } |
| 3194 | 3676 | ||
| @@ -3198,7 +3680,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
| 3198 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | 3680 | static int pin_down_bytes(struct btrfs_trans_handle *trans, |
| 3199 | struct btrfs_root *root, | 3681 | struct btrfs_root *root, |
| 3200 | struct btrfs_path *path, | 3682 | struct btrfs_path *path, |
| 3201 | u64 bytenr, u64 num_bytes, int is_data, | 3683 | u64 bytenr, u64 num_bytes, |
| 3684 | int is_data, int reserved, | ||
| 3202 | struct extent_buffer **must_clean) | 3685 | struct extent_buffer **must_clean) |
| 3203 | { | 3686 | { |
| 3204 | int err = 0; | 3687 | int err = 0; |
| @@ -3230,15 +3713,15 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
| 3230 | } | 3713 | } |
| 3231 | free_extent_buffer(buf); | 3714 | free_extent_buffer(buf); |
| 3232 | pinit: | 3715 | pinit: |
| 3233 | btrfs_set_path_blocking(path); | 3716 | if (path) |
| 3717 | btrfs_set_path_blocking(path); | ||
| 3234 | /* unlocks the pinned mutex */ | 3718 | /* unlocks the pinned mutex */ |
| 3235 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | 3719 | btrfs_pin_extent(root, bytenr, num_bytes, reserved); |
| 3236 | 3720 | ||
| 3237 | BUG_ON(err < 0); | 3721 | BUG_ON(err < 0); |
| 3238 | return 0; | 3722 | return 0; |
| 3239 | } | 3723 | } |
| 3240 | 3724 | ||
| 3241 | |||
| 3242 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 3725 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
| 3243 | struct btrfs_root *root, | 3726 | struct btrfs_root *root, |
| 3244 | u64 bytenr, u64 num_bytes, u64 parent, | 3727 | u64 bytenr, u64 num_bytes, u64 parent, |
| @@ -3412,7 +3895,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 3412 | } | 3895 | } |
| 3413 | 3896 | ||
| 3414 | ret = pin_down_bytes(trans, root, path, bytenr, | 3897 | ret = pin_down_bytes(trans, root, path, bytenr, |
| 3415 | num_bytes, is_data, &must_clean); | 3898 | num_bytes, is_data, 0, &must_clean); |
| 3416 | if (ret > 0) | 3899 | if (ret > 0) |
| 3417 | mark_free = 1; | 3900 | mark_free = 1; |
| 3418 | BUG_ON(ret < 0); | 3901 | BUG_ON(ret < 0); |
| @@ -3543,8 +4026,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 3543 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { | 4026 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { |
| 3544 | WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); | 4027 | WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); |
| 3545 | /* unlocks the pinned mutex */ | 4028 | /* unlocks the pinned mutex */ |
| 3546 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | 4029 | btrfs_pin_extent(root, bytenr, num_bytes, 1); |
| 3547 | update_reserved_extents(root, bytenr, num_bytes, 0); | ||
| 3548 | ret = 0; | 4030 | ret = 0; |
| 3549 | } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { | 4031 | } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
| 3550 | ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, | 4032 | ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, |
| @@ -3584,19 +4066,33 @@ static noinline int | |||
| 3584 | wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, | 4066 | wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, |
| 3585 | u64 num_bytes) | 4067 | u64 num_bytes) |
| 3586 | { | 4068 | { |
| 4069 | struct btrfs_caching_control *caching_ctl; | ||
| 3587 | DEFINE_WAIT(wait); | 4070 | DEFINE_WAIT(wait); |
| 3588 | 4071 | ||
| 3589 | prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE); | 4072 | caching_ctl = get_caching_control(cache); |
| 3590 | 4073 | if (!caching_ctl) | |
| 3591 | if (block_group_cache_done(cache)) { | ||
| 3592 | finish_wait(&cache->caching_q, &wait); | ||
| 3593 | return 0; | 4074 | return 0; |
| 3594 | } | ||
| 3595 | schedule(); | ||
| 3596 | finish_wait(&cache->caching_q, &wait); | ||
| 3597 | 4075 | ||
| 3598 | wait_event(cache->caching_q, block_group_cache_done(cache) || | 4076 | wait_event(caching_ctl->wait, block_group_cache_done(cache) || |
| 3599 | (cache->free_space >= num_bytes)); | 4077 | (cache->free_space >= num_bytes)); |
| 4078 | |||
| 4079 | put_caching_control(caching_ctl); | ||
| 4080 | return 0; | ||
| 4081 | } | ||
| 4082 | |||
| 4083 | static noinline int | ||
| 4084 | wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | ||
| 4085 | { | ||
| 4086 | struct btrfs_caching_control *caching_ctl; | ||
| 4087 | DEFINE_WAIT(wait); | ||
| 4088 | |||
| 4089 | caching_ctl = get_caching_control(cache); | ||
| 4090 | if (!caching_ctl) | ||
| 4091 | return 0; | ||
| 4092 | |||
| 4093 | wait_event(caching_ctl->wait, block_group_cache_done(cache)); | ||
| 4094 | |||
| 4095 | put_caching_control(caching_ctl); | ||
| 3600 | return 0; | 4096 | return 0; |
| 3601 | } | 4097 | } |
| 3602 | 4098 | ||
| @@ -3634,6 +4130,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
| 3634 | int last_ptr_loop = 0; | 4130 | int last_ptr_loop = 0; |
| 3635 | int loop = 0; | 4131 | int loop = 0; |
| 3636 | bool found_uncached_bg = false; | 4132 | bool found_uncached_bg = false; |
| 4133 | bool failed_cluster_refill = false; | ||
| 4134 | bool failed_alloc = false; | ||
| 3637 | 4135 | ||
| 3638 | WARN_ON(num_bytes < root->sectorsize); | 4136 | WARN_ON(num_bytes < root->sectorsize); |
| 3639 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | 4137 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); |
| @@ -3731,7 +4229,16 @@ have_block_group: | |||
| 3731 | if (unlikely(block_group->ro)) | 4229 | if (unlikely(block_group->ro)) |
| 3732 | goto loop; | 4230 | goto loop; |
| 3733 | 4231 | ||
| 3734 | if (last_ptr) { | 4232 | /* |
| 4233 | * Ok we want to try and use the cluster allocator, so lets look | ||
| 4234 | * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will | ||
| 4235 | * have tried the cluster allocator plenty of times at this | ||
| 4236 | * point and not have found anything, so we are likely way too | ||
| 4237 | * fragmented for the clustering stuff to find anything, so lets | ||
| 4238 | * just skip it and let the allocator find whatever block it can | ||
| 4239 | * find | ||
| 4240 | */ | ||
| 4241 | if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { | ||
| 3735 | /* | 4242 | /* |
| 3736 | * the refill lock keeps out other | 4243 | * the refill lock keeps out other |
| 3737 | * people trying to start a new cluster | 4244 | * people trying to start a new cluster |
| @@ -3806,9 +4313,11 @@ refill_cluster: | |||
| 3806 | spin_unlock(&last_ptr->refill_lock); | 4313 | spin_unlock(&last_ptr->refill_lock); |
| 3807 | goto checks; | 4314 | goto checks; |
| 3808 | } | 4315 | } |
| 3809 | } else if (!cached && loop > LOOP_CACHING_NOWAIT) { | 4316 | } else if (!cached && loop > LOOP_CACHING_NOWAIT |
| 4317 | && !failed_cluster_refill) { | ||
| 3810 | spin_unlock(&last_ptr->refill_lock); | 4318 | spin_unlock(&last_ptr->refill_lock); |
| 3811 | 4319 | ||
| 4320 | failed_cluster_refill = true; | ||
| 3812 | wait_block_group_cache_progress(block_group, | 4321 | wait_block_group_cache_progress(block_group, |
| 3813 | num_bytes + empty_cluster + empty_size); | 4322 | num_bytes + empty_cluster + empty_size); |
| 3814 | goto have_block_group; | 4323 | goto have_block_group; |
| @@ -3820,25 +4329,30 @@ refill_cluster: | |||
| 3820 | * cluster. Free the cluster we've been trying | 4329 | * cluster. Free the cluster we've been trying |
| 3821 | * to use, and go to the next block group | 4330 | * to use, and go to the next block group |
| 3822 | */ | 4331 | */ |
| 3823 | if (loop < LOOP_NO_EMPTY_SIZE) { | 4332 | btrfs_return_cluster_to_free_space(NULL, last_ptr); |
| 3824 | btrfs_return_cluster_to_free_space(NULL, | ||
| 3825 | last_ptr); | ||
| 3826 | spin_unlock(&last_ptr->refill_lock); | ||
| 3827 | goto loop; | ||
| 3828 | } | ||
| 3829 | spin_unlock(&last_ptr->refill_lock); | 4333 | spin_unlock(&last_ptr->refill_lock); |
| 4334 | goto loop; | ||
| 3830 | } | 4335 | } |
| 3831 | 4336 | ||
| 3832 | offset = btrfs_find_space_for_alloc(block_group, search_start, | 4337 | offset = btrfs_find_space_for_alloc(block_group, search_start, |
| 3833 | num_bytes, empty_size); | 4338 | num_bytes, empty_size); |
| 3834 | if (!offset && (cached || (!cached && | 4339 | /* |
| 3835 | loop == LOOP_CACHING_NOWAIT))) { | 4340 | * If we didn't find a chunk, and we haven't failed on this |
| 3836 | goto loop; | 4341 | * block group before, and this block group is in the middle of |
| 3837 | } else if (!offset && (!cached && | 4342 | * caching and we are ok with waiting, then go ahead and wait |
| 3838 | loop > LOOP_CACHING_NOWAIT)) { | 4343 | * for progress to be made, and set failed_alloc to true. |
| 4344 | * | ||
| 4345 | * If failed_alloc is true then we've already waited on this | ||
| 4346 | * block group once and should move on to the next block group. | ||
| 4347 | */ | ||
| 4348 | if (!offset && !failed_alloc && !cached && | ||
| 4349 | loop > LOOP_CACHING_NOWAIT) { | ||
| 3839 | wait_block_group_cache_progress(block_group, | 4350 | wait_block_group_cache_progress(block_group, |
| 3840 | num_bytes + empty_size); | 4351 | num_bytes + empty_size); |
| 4352 | failed_alloc = true; | ||
| 3841 | goto have_block_group; | 4353 | goto have_block_group; |
| 4354 | } else if (!offset) { | ||
| 4355 | goto loop; | ||
| 3842 | } | 4356 | } |
| 3843 | checks: | 4357 | checks: |
| 3844 | search_start = stripe_align(root, offset); | 4358 | search_start = stripe_align(root, offset); |
| @@ -3880,9 +4394,13 @@ checks: | |||
| 3880 | search_start - offset); | 4394 | search_start - offset); |
| 3881 | BUG_ON(offset > search_start); | 4395 | BUG_ON(offset > search_start); |
| 3882 | 4396 | ||
| 4397 | update_reserved_extents(block_group, num_bytes, 1); | ||
| 4398 | |||
| 3883 | /* we are all good, lets return */ | 4399 | /* we are all good, lets return */ |
| 3884 | break; | 4400 | break; |
| 3885 | loop: | 4401 | loop: |
| 4402 | failed_cluster_refill = false; | ||
| 4403 | failed_alloc = false; | ||
| 3886 | btrfs_put_block_group(block_group); | 4404 | btrfs_put_block_group(block_group); |
| 3887 | } | 4405 | } |
| 3888 | up_read(&space_info->groups_sem); | 4406 | up_read(&space_info->groups_sem); |
| @@ -3940,21 +4458,32 @@ loop: | |||
| 3940 | return ret; | 4458 | return ret; |
| 3941 | } | 4459 | } |
| 3942 | 4460 | ||
| 3943 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | 4461 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
| 4462 | int dump_block_groups) | ||
| 3944 | { | 4463 | { |
| 3945 | struct btrfs_block_group_cache *cache; | 4464 | struct btrfs_block_group_cache *cache; |
| 3946 | 4465 | ||
| 4466 | spin_lock(&info->lock); | ||
| 3947 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", | 4467 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", |
| 3948 | (unsigned long long)(info->total_bytes - info->bytes_used - | 4468 | (unsigned long long)(info->total_bytes - info->bytes_used - |
| 3949 | info->bytes_pinned - info->bytes_reserved), | 4469 | info->bytes_pinned - info->bytes_reserved - |
| 4470 | info->bytes_super), | ||
| 3950 | (info->full) ? "" : "not "); | 4471 | (info->full) ? "" : "not "); |
| 3951 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | 4472 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," |
| 3952 | " may_use=%llu, used=%llu\n", | 4473 | " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu" |
| 4474 | "\n", | ||
| 3953 | (unsigned long long)info->total_bytes, | 4475 | (unsigned long long)info->total_bytes, |
| 3954 | (unsigned long long)info->bytes_pinned, | 4476 | (unsigned long long)info->bytes_pinned, |
| 3955 | (unsigned long long)info->bytes_delalloc, | 4477 | (unsigned long long)info->bytes_delalloc, |
| 3956 | (unsigned long long)info->bytes_may_use, | 4478 | (unsigned long long)info->bytes_may_use, |
| 3957 | (unsigned long long)info->bytes_used); | 4479 | (unsigned long long)info->bytes_used, |
| 4480 | (unsigned long long)info->bytes_root, | ||
| 4481 | (unsigned long long)info->bytes_super, | ||
| 4482 | (unsigned long long)info->bytes_reserved); | ||
| 4483 | spin_unlock(&info->lock); | ||
| 4484 | |||
| 4485 | if (!dump_block_groups) | ||
| 4486 | return; | ||
| 3958 | 4487 | ||
| 3959 | down_read(&info->groups_sem); | 4488 | down_read(&info->groups_sem); |
| 3960 | list_for_each_entry(cache, &info->block_groups, list) { | 4489 | list_for_each_entry(cache, &info->block_groups, list) { |
| @@ -3972,12 +4501,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | |||
| 3972 | up_read(&info->groups_sem); | 4501 | up_read(&info->groups_sem); |
| 3973 | } | 4502 | } |
| 3974 | 4503 | ||
| 3975 | static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, | 4504 | int btrfs_reserve_extent(struct btrfs_trans_handle *trans, |
| 3976 | struct btrfs_root *root, | 4505 | struct btrfs_root *root, |
| 3977 | u64 num_bytes, u64 min_alloc_size, | 4506 | u64 num_bytes, u64 min_alloc_size, |
| 3978 | u64 empty_size, u64 hint_byte, | 4507 | u64 empty_size, u64 hint_byte, |
| 3979 | u64 search_end, struct btrfs_key *ins, | 4508 | u64 search_end, struct btrfs_key *ins, |
| 3980 | u64 data) | 4509 | u64 data) |
| 3981 | { | 4510 | { |
| 3982 | int ret; | 4511 | int ret; |
| 3983 | u64 search_start = 0; | 4512 | u64 search_start = 0; |
| @@ -4022,7 +4551,7 @@ again: | |||
| 4022 | printk(KERN_ERR "btrfs allocation failed flags %llu, " | 4551 | printk(KERN_ERR "btrfs allocation failed flags %llu, " |
| 4023 | "wanted %llu\n", (unsigned long long)data, | 4552 | "wanted %llu\n", (unsigned long long)data, |
| 4024 | (unsigned long long)num_bytes); | 4553 | (unsigned long long)num_bytes); |
| 4025 | dump_space_info(sinfo, num_bytes); | 4554 | dump_space_info(sinfo, num_bytes, 1); |
| 4026 | } | 4555 | } |
| 4027 | 4556 | ||
| 4028 | return ret; | 4557 | return ret; |
| @@ -4043,25 +4572,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
| 4043 | ret = btrfs_discard_extent(root, start, len); | 4572 | ret = btrfs_discard_extent(root, start, len); |
| 4044 | 4573 | ||
| 4045 | btrfs_add_free_space(cache, start, len); | 4574 | btrfs_add_free_space(cache, start, len); |
| 4575 | update_reserved_extents(cache, len, 0); | ||
| 4046 | btrfs_put_block_group(cache); | 4576 | btrfs_put_block_group(cache); |
| 4047 | update_reserved_extents(root, start, len, 0); | ||
| 4048 | |||
| 4049 | return ret; | ||
| 4050 | } | ||
| 4051 | |||
| 4052 | int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | ||
| 4053 | struct btrfs_root *root, | ||
| 4054 | u64 num_bytes, u64 min_alloc_size, | ||
| 4055 | u64 empty_size, u64 hint_byte, | ||
| 4056 | u64 search_end, struct btrfs_key *ins, | ||
| 4057 | u64 data) | ||
| 4058 | { | ||
| 4059 | int ret; | ||
| 4060 | ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, | ||
| 4061 | empty_size, hint_byte, search_end, ins, | ||
| 4062 | data); | ||
| 4063 | if (!ret) | ||
| 4064 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
| 4065 | 4577 | ||
| 4066 | return ret; | 4578 | return ret; |
| 4067 | } | 4579 | } |
| @@ -4222,15 +4734,46 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
| 4222 | { | 4734 | { |
| 4223 | int ret; | 4735 | int ret; |
| 4224 | struct btrfs_block_group_cache *block_group; | 4736 | struct btrfs_block_group_cache *block_group; |
| 4737 | struct btrfs_caching_control *caching_ctl; | ||
| 4738 | u64 start = ins->objectid; | ||
| 4739 | u64 num_bytes = ins->offset; | ||
| 4225 | 4740 | ||
| 4226 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | 4741 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); |
| 4227 | cache_block_group(block_group); | 4742 | cache_block_group(block_group); |
| 4228 | wait_event(block_group->caching_q, | 4743 | caching_ctl = get_caching_control(block_group); |
| 4229 | block_group_cache_done(block_group)); | ||
| 4230 | 4744 | ||
| 4231 | ret = btrfs_remove_free_space(block_group, ins->objectid, | 4745 | if (!caching_ctl) { |
| 4232 | ins->offset); | 4746 | BUG_ON(!block_group_cache_done(block_group)); |
| 4233 | BUG_ON(ret); | 4747 | ret = btrfs_remove_free_space(block_group, start, num_bytes); |
| 4748 | BUG_ON(ret); | ||
| 4749 | } else { | ||
| 4750 | mutex_lock(&caching_ctl->mutex); | ||
| 4751 | |||
| 4752 | if (start >= caching_ctl->progress) { | ||
| 4753 | ret = add_excluded_extent(root, start, num_bytes); | ||
| 4754 | BUG_ON(ret); | ||
| 4755 | } else if (start + num_bytes <= caching_ctl->progress) { | ||
| 4756 | ret = btrfs_remove_free_space(block_group, | ||
| 4757 | start, num_bytes); | ||
| 4758 | BUG_ON(ret); | ||
| 4759 | } else { | ||
| 4760 | num_bytes = caching_ctl->progress - start; | ||
| 4761 | ret = btrfs_remove_free_space(block_group, | ||
| 4762 | start, num_bytes); | ||
| 4763 | BUG_ON(ret); | ||
| 4764 | |||
| 4765 | start = caching_ctl->progress; | ||
| 4766 | num_bytes = ins->objectid + ins->offset - | ||
| 4767 | caching_ctl->progress; | ||
| 4768 | ret = add_excluded_extent(root, start, num_bytes); | ||
| 4769 | BUG_ON(ret); | ||
| 4770 | } | ||
| 4771 | |||
| 4772 | mutex_unlock(&caching_ctl->mutex); | ||
| 4773 | put_caching_control(caching_ctl); | ||
| 4774 | } | ||
| 4775 | |||
| 4776 | update_reserved_extents(block_group, ins->offset, 1); | ||
| 4234 | btrfs_put_block_group(block_group); | 4777 | btrfs_put_block_group(block_group); |
| 4235 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 4778 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
| 4236 | 0, owner, offset, ins, 1); | 4779 | 0, owner, offset, ins, 1); |
| @@ -4254,9 +4797,9 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, | |||
| 4254 | int ret; | 4797 | int ret; |
| 4255 | u64 flags = 0; | 4798 | u64 flags = 0; |
| 4256 | 4799 | ||
| 4257 | ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, | 4800 | ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes, |
| 4258 | empty_size, hint_byte, search_end, | 4801 | empty_size, hint_byte, search_end, |
| 4259 | ins, 0); | 4802 | ins, 0); |
| 4260 | if (ret) | 4803 | if (ret) |
| 4261 | return ret; | 4804 | return ret; |
| 4262 | 4805 | ||
| @@ -4267,7 +4810,6 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, | |||
| 4267 | } else | 4810 | } else |
| 4268 | BUG_ON(parent > 0); | 4811 | BUG_ON(parent > 0); |
| 4269 | 4812 | ||
| 4270 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
| 4271 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | 4813 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { |
| 4272 | struct btrfs_delayed_extent_op *extent_op; | 4814 | struct btrfs_delayed_extent_op *extent_op; |
| 4273 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | 4815 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); |
| @@ -4346,452 +4888,108 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
| 4346 | return buf; | 4888 | return buf; |
| 4347 | } | 4889 | } |
| 4348 | 4890 | ||
| 4349 | #if 0 | 4891 | struct walk_control { |
| 4350 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 4892 | u64 refs[BTRFS_MAX_LEVEL]; |
| 4351 | struct btrfs_root *root, struct extent_buffer *leaf) | 4893 | u64 flags[BTRFS_MAX_LEVEL]; |
| 4352 | { | 4894 | struct btrfs_key update_progress; |
| 4353 | u64 disk_bytenr; | 4895 | int stage; |
| 4354 | u64 num_bytes; | 4896 | int level; |
| 4355 | struct btrfs_key key; | 4897 | int shared_level; |
| 4356 | struct btrfs_file_extent_item *fi; | 4898 | int update_ref; |
| 4357 | u32 nritems; | 4899 | int keep_locks; |
| 4358 | int i; | 4900 | int reada_slot; |
| 4359 | int ret; | 4901 | int reada_count; |
| 4360 | 4902 | }; | |
| 4361 | BUG_ON(!btrfs_is_leaf(leaf)); | ||
| 4362 | nritems = btrfs_header_nritems(leaf); | ||
| 4363 | |||
| 4364 | for (i = 0; i < nritems; i++) { | ||
| 4365 | cond_resched(); | ||
| 4366 | btrfs_item_key_to_cpu(leaf, &key, i); | ||
| 4367 | |||
| 4368 | /* only extents have references, skip everything else */ | ||
| 4369 | if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) | ||
| 4370 | continue; | ||
| 4371 | |||
| 4372 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
| 4373 | |||
| 4374 | /* inline extents live in the btree, they don't have refs */ | ||
| 4375 | if (btrfs_file_extent_type(leaf, fi) == | ||
| 4376 | BTRFS_FILE_EXTENT_INLINE) | ||
| 4377 | continue; | ||
| 4378 | |||
| 4379 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
| 4380 | |||
| 4381 | /* holes don't have refs */ | ||
| 4382 | if (disk_bytenr == 0) | ||
| 4383 | continue; | ||
| 4384 | |||
| 4385 | num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); | ||
| 4386 | ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes, | ||
| 4387 | leaf->start, 0, key.objectid, 0); | ||
| 4388 | BUG_ON(ret); | ||
| 4389 | } | ||
| 4390 | return 0; | ||
| 4391 | } | ||
| 4392 | |||
| 4393 | static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, | ||
| 4394 | struct btrfs_root *root, | ||
| 4395 | struct btrfs_leaf_ref *ref) | ||
| 4396 | { | ||
| 4397 | int i; | ||
| 4398 | int ret; | ||
| 4399 | struct btrfs_extent_info *info; | ||
| 4400 | struct refsort *sorted; | ||
| 4401 | |||
| 4402 | if (ref->nritems == 0) | ||
| 4403 | return 0; | ||
| 4404 | |||
| 4405 | sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS); | ||
| 4406 | for (i = 0; i < ref->nritems; i++) { | ||
| 4407 | sorted[i].bytenr = ref->extents[i].bytenr; | ||
| 4408 | sorted[i].slot = i; | ||
| 4409 | } | ||
| 4410 | sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL); | ||
| 4411 | |||
| 4412 | /* | ||
| 4413 | * the items in the ref were sorted when the ref was inserted | ||
| 4414 | * into the ref cache, so this is already in order | ||
| 4415 | */ | ||
| 4416 | for (i = 0; i < ref->nritems; i++) { | ||
| 4417 | info = ref->extents + sorted[i].slot; | ||
| 4418 | ret = btrfs_free_extent(trans, root, info->bytenr, | ||
| 4419 | info->num_bytes, ref->bytenr, | ||
| 4420 | ref->owner, ref->generation, | ||
| 4421 | info->objectid, 0); | ||
| 4422 | |||
| 4423 | atomic_inc(&root->fs_info->throttle_gen); | ||
| 4424 | wake_up(&root->fs_info->transaction_throttle); | ||
| 4425 | cond_resched(); | ||
| 4426 | |||
| 4427 | BUG_ON(ret); | ||
| 4428 | info++; | ||
| 4429 | } | ||
| 4430 | |||
| 4431 | kfree(sorted); | ||
| 4432 | return 0; | ||
| 4433 | } | ||
| 4434 | |||
| 4435 | |||
| 4436 | static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, | ||
| 4437 | struct btrfs_root *root, u64 start, | ||
| 4438 | u64 len, u32 *refs) | ||
| 4439 | { | ||
| 4440 | int ret; | ||
| 4441 | |||
| 4442 | ret = btrfs_lookup_extent_refs(trans, root, start, len, refs); | ||
| 4443 | BUG_ON(ret); | ||
| 4444 | |||
| 4445 | #if 0 /* some debugging code in case we see problems here */ | ||
| 4446 | /* if the refs count is one, it won't get increased again. But | ||
| 4447 | * if the ref count is > 1, someone may be decreasing it at | ||
| 4448 | * the same time we are. | ||
| 4449 | */ | ||
| 4450 | if (*refs != 1) { | ||
| 4451 | struct extent_buffer *eb = NULL; | ||
| 4452 | eb = btrfs_find_create_tree_block(root, start, len); | ||
| 4453 | if (eb) | ||
| 4454 | btrfs_tree_lock(eb); | ||
| 4455 | |||
| 4456 | mutex_lock(&root->fs_info->alloc_mutex); | ||
| 4457 | ret = lookup_extent_ref(NULL, root, start, len, refs); | ||
| 4458 | BUG_ON(ret); | ||
| 4459 | mutex_unlock(&root->fs_info->alloc_mutex); | ||
| 4460 | |||
| 4461 | if (eb) { | ||
| 4462 | btrfs_tree_unlock(eb); | ||
| 4463 | free_extent_buffer(eb); | ||
| 4464 | } | ||
| 4465 | if (*refs == 1) { | ||
| 4466 | printk(KERN_ERR "btrfs block %llu went down to one " | ||
| 4467 | "during drop_snap\n", (unsigned long long)start); | ||
| 4468 | } | ||
| 4469 | |||
| 4470 | } | ||
| 4471 | #endif | ||
| 4472 | |||
| 4473 | cond_resched(); | ||
| 4474 | return ret; | ||
| 4475 | } | ||
| 4476 | 4903 | ||
| 4904 | #define DROP_REFERENCE 1 | ||
| 4905 | #define UPDATE_BACKREF 2 | ||
| 4477 | 4906 | ||
| 4478 | /* | 4907 | static noinline void reada_walk_down(struct btrfs_trans_handle *trans, |
| 4479 | * this is used while deleting old snapshots, and it drops the refs | 4908 | struct btrfs_root *root, |
| 4480 | * on a whole subtree starting from a level 1 node. | 4909 | struct walk_control *wc, |
| 4481 | * | 4910 | struct btrfs_path *path) |
| 4482 | * The idea is to sort all the leaf pointers, and then drop the | ||
| 4483 | * ref on all the leaves in order. Most of the time the leaves | ||
| 4484 | * will have ref cache entries, so no leaf IOs will be required to | ||
| 4485 | * find the extents they have references on. | ||
| 4486 | * | ||
| 4487 | * For each leaf, any references it has are also dropped in order | ||
| 4488 | * | ||
| 4489 | * This ends up dropping the references in something close to optimal | ||
| 4490 | * order for reading and modifying the extent allocation tree. | ||
| 4491 | */ | ||
| 4492 | static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, | ||
| 4493 | struct btrfs_root *root, | ||
| 4494 | struct btrfs_path *path) | ||
| 4495 | { | 4911 | { |
| 4496 | u64 bytenr; | 4912 | u64 bytenr; |
| 4497 | u64 root_owner; | 4913 | u64 generation; |
| 4498 | u64 root_gen; | 4914 | u64 refs; |
| 4499 | struct extent_buffer *eb = path->nodes[1]; | 4915 | u64 flags; |
| 4500 | struct extent_buffer *leaf; | 4916 | u64 last = 0; |
| 4501 | struct btrfs_leaf_ref *ref; | 4917 | u32 nritems; |
| 4502 | struct refsort *sorted = NULL; | 4918 | u32 blocksize; |
| 4503 | int nritems = btrfs_header_nritems(eb); | 4919 | struct btrfs_key key; |
| 4920 | struct extent_buffer *eb; | ||
| 4504 | int ret; | 4921 | int ret; |
| 4505 | int i; | 4922 | int slot; |
| 4506 | int refi = 0; | 4923 | int nread = 0; |
| 4507 | int slot = path->slots[1]; | ||
| 4508 | u32 blocksize = btrfs_level_size(root, 0); | ||
| 4509 | u32 refs; | ||
| 4510 | |||
| 4511 | if (nritems == 0) | ||
| 4512 | goto out; | ||
| 4513 | |||
| 4514 | root_owner = btrfs_header_owner(eb); | ||
| 4515 | root_gen = btrfs_header_generation(eb); | ||
| 4516 | sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); | ||
| 4517 | 4924 | ||
| 4518 | /* | 4925 | if (path->slots[wc->level] < wc->reada_slot) { |
| 4519 | * step one, sort all the leaf pointers so we don't scribble | 4926 | wc->reada_count = wc->reada_count * 2 / 3; |
| 4520 | * randomly into the extent allocation tree | 4927 | wc->reada_count = max(wc->reada_count, 2); |
| 4521 | */ | 4928 | } else { |
| 4522 | for (i = slot; i < nritems; i++) { | 4929 | wc->reada_count = wc->reada_count * 3 / 2; |
| 4523 | sorted[refi].bytenr = btrfs_node_blockptr(eb, i); | 4930 | wc->reada_count = min_t(int, wc->reada_count, |
| 4524 | sorted[refi].slot = i; | 4931 | BTRFS_NODEPTRS_PER_BLOCK(root)); |
| 4525 | refi++; | ||
| 4526 | } | 4932 | } |
| 4527 | 4933 | ||
| 4528 | /* | 4934 | eb = path->nodes[wc->level]; |
| 4529 | * nritems won't be zero, but if we're picking up drop_snapshot | 4935 | nritems = btrfs_header_nritems(eb); |
| 4530 | * after a crash, slot might be > 0, so double check things | 4936 | blocksize = btrfs_level_size(root, wc->level - 1); |
| 4531 | * just in case. | ||
| 4532 | */ | ||
| 4533 | if (refi == 0) | ||
| 4534 | goto out; | ||
| 4535 | 4937 | ||
| 4536 | sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); | 4938 | for (slot = path->slots[wc->level]; slot < nritems; slot++) { |
| 4939 | if (nread >= wc->reada_count) | ||
| 4940 | break; | ||
| 4537 | 4941 | ||
| 4538 | /* | 4942 | cond_resched(); |
| 4539 | * the first loop frees everything the leaves point to | 4943 | bytenr = btrfs_node_blockptr(eb, slot); |
| 4540 | */ | 4944 | generation = btrfs_node_ptr_generation(eb, slot); |
| 4541 | for (i = 0; i < refi; i++) { | ||
| 4542 | u64 ptr_gen; | ||
| 4543 | 4945 | ||
| 4544 | bytenr = sorted[i].bytenr; | 4946 | if (slot == path->slots[wc->level]) |
| 4947 | goto reada; | ||
| 4545 | 4948 | ||
| 4546 | /* | 4949 | if (wc->stage == UPDATE_BACKREF && |
| 4547 | * check the reference count on this leaf. If it is > 1 | 4950 | generation <= root->root_key.offset) |
| 4548 | * we just decrement it below and don't update any | ||
| 4549 | * of the refs the leaf points to. | ||
| 4550 | */ | ||
| 4551 | ret = drop_snap_lookup_refcount(trans, root, bytenr, | ||
| 4552 | blocksize, &refs); | ||
| 4553 | BUG_ON(ret); | ||
| 4554 | if (refs != 1) | ||
| 4555 | continue; | 4951 | continue; |
| 4556 | 4952 | ||
| 4557 | ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); | 4953 | /* We don't lock the tree block, it's OK to be racy here */ |
| 4558 | 4954 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, | |
| 4559 | /* | 4955 | &refs, &flags); |
| 4560 | * the leaf only had one reference, which means the | ||
| 4561 | * only thing pointing to this leaf is the snapshot | ||
| 4562 | * we're deleting. It isn't possible for the reference | ||
| 4563 | * count to increase again later | ||
| 4564 | * | ||
| 4565 | * The reference cache is checked for the leaf, | ||
| 4566 | * and if found we'll be able to drop any refs held by | ||
| 4567 | * the leaf without needing to read it in. | ||
| 4568 | */ | ||
| 4569 | ref = btrfs_lookup_leaf_ref(root, bytenr); | ||
| 4570 | if (ref && ref->generation != ptr_gen) { | ||
| 4571 | btrfs_free_leaf_ref(root, ref); | ||
| 4572 | ref = NULL; | ||
| 4573 | } | ||
| 4574 | if (ref) { | ||
| 4575 | ret = cache_drop_leaf_ref(trans, root, ref); | ||
| 4576 | BUG_ON(ret); | ||
| 4577 | btrfs_remove_leaf_ref(root, ref); | ||
| 4578 | btrfs_free_leaf_ref(root, ref); | ||
| 4579 | } else { | ||
| 4580 | /* | ||
| 4581 | * the leaf wasn't in the reference cache, so | ||
| 4582 | * we have to read it. | ||
| 4583 | */ | ||
| 4584 | leaf = read_tree_block(root, bytenr, blocksize, | ||
| 4585 | ptr_gen); | ||
| 4586 | ret = btrfs_drop_leaf_ref(trans, root, leaf); | ||
| 4587 | BUG_ON(ret); | ||
| 4588 | free_extent_buffer(leaf); | ||
| 4589 | } | ||
| 4590 | atomic_inc(&root->fs_info->throttle_gen); | ||
| 4591 | wake_up(&root->fs_info->transaction_throttle); | ||
| 4592 | cond_resched(); | ||
| 4593 | } | ||
| 4594 | |||
| 4595 | /* | ||
| 4596 | * run through the loop again to free the refs on the leaves. | ||
| 4597 | * This is faster than doing it in the loop above because | ||
| 4598 | * the leaves are likely to be clustered together. We end up | ||
| 4599 | * working in nice chunks on the extent allocation tree. | ||
| 4600 | */ | ||
| 4601 | for (i = 0; i < refi; i++) { | ||
| 4602 | bytenr = sorted[i].bytenr; | ||
| 4603 | ret = btrfs_free_extent(trans, root, bytenr, | ||
| 4604 | blocksize, eb->start, | ||
| 4605 | root_owner, root_gen, 0, 1); | ||
| 4606 | BUG_ON(ret); | 4956 | BUG_ON(ret); |
| 4957 | BUG_ON(refs == 0); | ||
| 4607 | 4958 | ||
| 4608 | atomic_inc(&root->fs_info->throttle_gen); | 4959 | if (wc->stage == DROP_REFERENCE) { |
| 4609 | wake_up(&root->fs_info->transaction_throttle); | 4960 | if (refs == 1) |
| 4610 | cond_resched(); | 4961 | goto reada; |
| 4611 | } | ||
| 4612 | out: | ||
| 4613 | kfree(sorted); | ||
| 4614 | |||
| 4615 | /* | ||
| 4616 | * update the path to show we've processed the entire level 1 | ||
| 4617 | * node. This will get saved into the root's drop_snapshot_progress | ||
| 4618 | * field so these drops are not repeated again if this transaction | ||
| 4619 | * commits. | ||
| 4620 | */ | ||
| 4621 | path->slots[1] = nritems; | ||
| 4622 | return 0; | ||
| 4623 | } | ||
| 4624 | |||
| 4625 | /* | ||
| 4626 | * helper function for drop_snapshot, this walks down the tree dropping ref | ||
| 4627 | * counts as it goes. | ||
| 4628 | */ | ||
| 4629 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | ||
| 4630 | struct btrfs_root *root, | ||
| 4631 | struct btrfs_path *path, int *level) | ||
| 4632 | { | ||
| 4633 | u64 root_owner; | ||
| 4634 | u64 root_gen; | ||
| 4635 | u64 bytenr; | ||
| 4636 | u64 ptr_gen; | ||
| 4637 | struct extent_buffer *next; | ||
| 4638 | struct extent_buffer *cur; | ||
| 4639 | struct extent_buffer *parent; | ||
| 4640 | u32 blocksize; | ||
| 4641 | int ret; | ||
| 4642 | u32 refs; | ||
| 4643 | |||
| 4644 | WARN_ON(*level < 0); | ||
| 4645 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
| 4646 | ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start, | ||
| 4647 | path->nodes[*level]->len, &refs); | ||
| 4648 | BUG_ON(ret); | ||
| 4649 | if (refs > 1) | ||
| 4650 | goto out; | ||
| 4651 | |||
| 4652 | /* | ||
| 4653 | * walk down to the last node level and free all the leaves | ||
| 4654 | */ | ||
| 4655 | while (*level >= 0) { | ||
| 4656 | WARN_ON(*level < 0); | ||
| 4657 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
| 4658 | cur = path->nodes[*level]; | ||
| 4659 | |||
| 4660 | if (btrfs_header_level(cur) != *level) | ||
| 4661 | WARN_ON(1); | ||
| 4662 | |||
| 4663 | if (path->slots[*level] >= | ||
| 4664 | btrfs_header_nritems(cur)) | ||
| 4665 | break; | ||
| 4666 | 4962 | ||
| 4667 | /* the new code goes down to level 1 and does all the | 4963 | if (wc->level == 1 && |
| 4668 | * leaves pointed to that node in bulk. So, this check | 4964 | (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) |
| 4669 | * for level 0 will always be false. | 4965 | continue; |
| 4670 | * | 4966 | if (!wc->update_ref || |
| 4671 | * But, the disk format allows the drop_snapshot_progress | 4967 | generation <= root->root_key.offset) |
| 4672 | * field in the root to leave things in a state where | 4968 | continue; |
| 4673 | * a leaf will need cleaning up here. If someone crashes | 4969 | btrfs_node_key_to_cpu(eb, &key, slot); |
| 4674 | * with the old code and then boots with the new code, | 4970 | ret = btrfs_comp_cpu_keys(&key, |
| 4675 | * we might find a leaf here. | 4971 | &wc->update_progress); |
| 4676 | */ | 4972 | if (ret < 0) |
| 4677 | if (*level == 0) { | 4973 | continue; |
| 4678 | ret = btrfs_drop_leaf_ref(trans, root, cur); | 4974 | } else { |
| 4679 | BUG_ON(ret); | 4975 | if (wc->level == 1 && |
| 4680 | break; | 4976 | (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) |
| 4977 | continue; | ||
| 4681 | } | 4978 | } |
| 4682 | 4979 | reada: | |
| 4683 | /* | 4980 | ret = readahead_tree_block(root, bytenr, blocksize, |
| 4684 | * once we get to level one, process the whole node | 4981 | generation); |
| 4685 | * at once, including everything below it. | 4982 | if (ret) |
| 4686 | */ | ||
| 4687 | if (*level == 1) { | ||
| 4688 | ret = drop_level_one_refs(trans, root, path); | ||
| 4689 | BUG_ON(ret); | ||
| 4690 | break; | 4983 | break; |
| 4691 | } | 4984 | last = bytenr + blocksize; |
| 4692 | 4985 | nread++; | |
| 4693 | bytenr = btrfs_node_blockptr(cur, path->slots[*level]); | ||
| 4694 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); | ||
| 4695 | blocksize = btrfs_level_size(root, *level - 1); | ||
| 4696 | |||
| 4697 | ret = drop_snap_lookup_refcount(trans, root, bytenr, | ||
| 4698 | blocksize, &refs); | ||
| 4699 | BUG_ON(ret); | ||
| 4700 | |||
| 4701 | /* | ||
| 4702 | * if there is more than one reference, we don't need | ||
| 4703 | * to read that node to drop any references it has. We | ||
| 4704 | * just drop the ref we hold on that node and move on to the | ||
| 4705 | * next slot in this level. | ||
| 4706 | */ | ||
| 4707 | if (refs != 1) { | ||
| 4708 | parent = path->nodes[*level]; | ||
| 4709 | root_owner = btrfs_header_owner(parent); | ||
| 4710 | root_gen = btrfs_header_generation(parent); | ||
| 4711 | path->slots[*level]++; | ||
| 4712 | |||
| 4713 | ret = btrfs_free_extent(trans, root, bytenr, | ||
| 4714 | blocksize, parent->start, | ||
| 4715 | root_owner, root_gen, | ||
| 4716 | *level - 1, 1); | ||
| 4717 | BUG_ON(ret); | ||
| 4718 | |||
| 4719 | atomic_inc(&root->fs_info->throttle_gen); | ||
| 4720 | wake_up(&root->fs_info->transaction_throttle); | ||
| 4721 | cond_resched(); | ||
| 4722 | |||
| 4723 | continue; | ||
| 4724 | } | ||
| 4725 | |||
| 4726 | /* | ||
| 4727 | * we need to keep freeing things in the next level down. | ||
| 4728 | * read the block and loop around to process it | ||
| 4729 | */ | ||
| 4730 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); | ||
| 4731 | WARN_ON(*level <= 0); | ||
| 4732 | if (path->nodes[*level-1]) | ||
| 4733 | free_extent_buffer(path->nodes[*level-1]); | ||
| 4734 | path->nodes[*level-1] = next; | ||
| 4735 | *level = btrfs_header_level(next); | ||
| 4736 | path->slots[*level] = 0; | ||
| 4737 | cond_resched(); | ||
| 4738 | } | ||
| 4739 | out: | ||
| 4740 | WARN_ON(*level < 0); | ||
| 4741 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | ||
| 4742 | |||
| 4743 | if (path->nodes[*level] == root->node) { | ||
| 4744 | parent = path->nodes[*level]; | ||
| 4745 | bytenr = path->nodes[*level]->start; | ||
| 4746 | } else { | ||
| 4747 | parent = path->nodes[*level + 1]; | ||
| 4748 | bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]); | ||
| 4749 | } | 4986 | } |
| 4750 | 4987 | wc->reada_slot = slot; | |
| 4751 | blocksize = btrfs_level_size(root, *level); | ||
| 4752 | root_owner = btrfs_header_owner(parent); | ||
| 4753 | root_gen = btrfs_header_generation(parent); | ||
| 4754 | |||
| 4755 | /* | ||
| 4756 | * cleanup and free the reference on the last node | ||
| 4757 | * we processed | ||
| 4758 | */ | ||
| 4759 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, | ||
| 4760 | parent->start, root_owner, root_gen, | ||
| 4761 | *level, 1); | ||
| 4762 | free_extent_buffer(path->nodes[*level]); | ||
| 4763 | path->nodes[*level] = NULL; | ||
| 4764 | |||
| 4765 | *level += 1; | ||
| 4766 | BUG_ON(ret); | ||
| 4767 | |||
| 4768 | cond_resched(); | ||
| 4769 | return 0; | ||
| 4770 | } | 4988 | } |
| 4771 | #endif | ||
| 4772 | |||
| 4773 | struct walk_control { | ||
| 4774 | u64 refs[BTRFS_MAX_LEVEL]; | ||
| 4775 | u64 flags[BTRFS_MAX_LEVEL]; | ||
| 4776 | struct btrfs_key update_progress; | ||
| 4777 | int stage; | ||
| 4778 | int level; | ||
| 4779 | int shared_level; | ||
| 4780 | int update_ref; | ||
| 4781 | int keep_locks; | ||
| 4782 | }; | ||
| 4783 | |||
| 4784 | #define DROP_REFERENCE 1 | ||
| 4785 | #define UPDATE_BACKREF 2 | ||
| 4786 | 4989 | ||
| 4787 | /* | 4990 | /* |
| 4788 | * hepler to process tree block while walking down the tree. | 4991 | * hepler to process tree block while walking down the tree. |
| 4789 | * | 4992 | * |
| 4790 | * when wc->stage == DROP_REFERENCE, this function checks | ||
| 4791 | * reference count of the block. if the block is shared and | ||
| 4792 | * we need update back refs for the subtree rooted at the | ||
| 4793 | * block, this function changes wc->stage to UPDATE_BACKREF | ||
| 4794 | * | ||
| 4795 | * when wc->stage == UPDATE_BACKREF, this function updates | 4993 | * when wc->stage == UPDATE_BACKREF, this function updates |
| 4796 | * back refs for pointers in the block. | 4994 | * back refs for pointers in the block. |
| 4797 | * | 4995 | * |
| @@ -4800,11 +4998,10 @@ struct walk_control { | |||
| 4800 | static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | 4998 | static noinline int walk_down_proc(struct btrfs_trans_handle *trans, |
| 4801 | struct btrfs_root *root, | 4999 | struct btrfs_root *root, |
| 4802 | struct btrfs_path *path, | 5000 | struct btrfs_path *path, |
| 4803 | struct walk_control *wc) | 5001 | struct walk_control *wc, int lookup_info) |
| 4804 | { | 5002 | { |
| 4805 | int level = wc->level; | 5003 | int level = wc->level; |
| 4806 | struct extent_buffer *eb = path->nodes[level]; | 5004 | struct extent_buffer *eb = path->nodes[level]; |
| 4807 | struct btrfs_key key; | ||
| 4808 | u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; | 5005 | u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; |
| 4809 | int ret; | 5006 | int ret; |
| 4810 | 5007 | ||
| @@ -4816,8 +5013,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
| 4816 | * when reference count of tree block is 1, it won't increase | 5013 | * when reference count of tree block is 1, it won't increase |
| 4817 | * again. once full backref flag is set, we never clear it. | 5014 | * again. once full backref flag is set, we never clear it. |
| 4818 | */ | 5015 | */ |
| 4819 | if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || | 5016 | if (lookup_info && |
| 4820 | (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { | 5017 | ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || |
| 5018 | (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) { | ||
| 4821 | BUG_ON(!path->locks[level]); | 5019 | BUG_ON(!path->locks[level]); |
| 4822 | ret = btrfs_lookup_extent_info(trans, root, | 5020 | ret = btrfs_lookup_extent_info(trans, root, |
| 4823 | eb->start, eb->len, | 5021 | eb->start, eb->len, |
| @@ -4827,21 +5025,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
| 4827 | BUG_ON(wc->refs[level] == 0); | 5025 | BUG_ON(wc->refs[level] == 0); |
| 4828 | } | 5026 | } |
| 4829 | 5027 | ||
| 4830 | if (wc->stage == DROP_REFERENCE && | ||
| 4831 | wc->update_ref && wc->refs[level] > 1) { | ||
| 4832 | BUG_ON(eb == root->node); | ||
| 4833 | BUG_ON(path->slots[level] > 0); | ||
| 4834 | if (level == 0) | ||
| 4835 | btrfs_item_key_to_cpu(eb, &key, path->slots[level]); | ||
| 4836 | else | ||
| 4837 | btrfs_node_key_to_cpu(eb, &key, path->slots[level]); | ||
| 4838 | if (btrfs_header_owner(eb) == root->root_key.objectid && | ||
| 4839 | btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) { | ||
| 4840 | wc->stage = UPDATE_BACKREF; | ||
| 4841 | wc->shared_level = level; | ||
| 4842 | } | ||
| 4843 | } | ||
| 4844 | |||
| 4845 | if (wc->stage == DROP_REFERENCE) { | 5028 | if (wc->stage == DROP_REFERENCE) { |
| 4846 | if (wc->refs[level] > 1) | 5029 | if (wc->refs[level] > 1) |
| 4847 | return 1; | 5030 | return 1; |
| @@ -4878,6 +5061,136 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
| 4878 | } | 5061 | } |
| 4879 | 5062 | ||
| 4880 | /* | 5063 | /* |
| 5064 | * hepler to process tree block pointer. | ||
| 5065 | * | ||
| 5066 | * when wc->stage == DROP_REFERENCE, this function checks | ||
| 5067 | * reference count of the block pointed to. if the block | ||
| 5068 | * is shared and we need update back refs for the subtree | ||
| 5069 | * rooted at the block, this function changes wc->stage to | ||
| 5070 | * UPDATE_BACKREF. if the block is shared and there is no | ||
| 5071 | * need to update back, this function drops the reference | ||
| 5072 | * to the block. | ||
| 5073 | * | ||
| 5074 | * NOTE: return value 1 means we should stop walking down. | ||
| 5075 | */ | ||
| 5076 | static noinline int do_walk_down(struct btrfs_trans_handle *trans, | ||
| 5077 | struct btrfs_root *root, | ||
| 5078 | struct btrfs_path *path, | ||
| 5079 | struct walk_control *wc, int *lookup_info) | ||
| 5080 | { | ||
| 5081 | u64 bytenr; | ||
| 5082 | u64 generation; | ||
| 5083 | u64 parent; | ||
| 5084 | u32 blocksize; | ||
| 5085 | struct btrfs_key key; | ||
| 5086 | struct extent_buffer *next; | ||
| 5087 | int level = wc->level; | ||
| 5088 | int reada = 0; | ||
| 5089 | int ret = 0; | ||
| 5090 | |||
| 5091 | generation = btrfs_node_ptr_generation(path->nodes[level], | ||
| 5092 | path->slots[level]); | ||
| 5093 | /* | ||
| 5094 | * if the lower level block was created before the snapshot | ||
| 5095 | * was created, we know there is no need to update back refs | ||
| 5096 | * for the subtree | ||
| 5097 | */ | ||
| 5098 | if (wc->stage == UPDATE_BACKREF && | ||
| 5099 | generation <= root->root_key.offset) { | ||
| 5100 | *lookup_info = 1; | ||
| 5101 | return 1; | ||
| 5102 | } | ||
| 5103 | |||
| 5104 | bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); | ||
| 5105 | blocksize = btrfs_level_size(root, level - 1); | ||
| 5106 | |||
| 5107 | next = btrfs_find_tree_block(root, bytenr, blocksize); | ||
| 5108 | if (!next) { | ||
| 5109 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | ||
| 5110 | reada = 1; | ||
| 5111 | } | ||
| 5112 | btrfs_tree_lock(next); | ||
| 5113 | btrfs_set_lock_blocking(next); | ||
| 5114 | |||
| 5115 | ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, | ||
| 5116 | &wc->refs[level - 1], | ||
| 5117 | &wc->flags[level - 1]); | ||
| 5118 | BUG_ON(ret); | ||
| 5119 | BUG_ON(wc->refs[level - 1] == 0); | ||
| 5120 | *lookup_info = 0; | ||
| 5121 | |||
| 5122 | if (wc->stage == DROP_REFERENCE) { | ||
| 5123 | if (wc->refs[level - 1] > 1) { | ||
| 5124 | if (level == 1 && | ||
| 5125 | (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
| 5126 | goto skip; | ||
| 5127 | |||
| 5128 | if (!wc->update_ref || | ||
| 5129 | generation <= root->root_key.offset) | ||
| 5130 | goto skip; | ||
| 5131 | |||
| 5132 | btrfs_node_key_to_cpu(path->nodes[level], &key, | ||
| 5133 | path->slots[level]); | ||
| 5134 | ret = btrfs_comp_cpu_keys(&key, &wc->update_progress); | ||
| 5135 | if (ret < 0) | ||
| 5136 | goto skip; | ||
| 5137 | |||
| 5138 | wc->stage = UPDATE_BACKREF; | ||
| 5139 | wc->shared_level = level - 1; | ||
| 5140 | } | ||
| 5141 | } else { | ||
| 5142 | if (level == 1 && | ||
| 5143 | (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) | ||
| 5144 | goto skip; | ||
| 5145 | } | ||
| 5146 | |||
| 5147 | if (!btrfs_buffer_uptodate(next, generation)) { | ||
| 5148 | btrfs_tree_unlock(next); | ||
| 5149 | free_extent_buffer(next); | ||
| 5150 | next = NULL; | ||
| 5151 | *lookup_info = 1; | ||
| 5152 | } | ||
| 5153 | |||
| 5154 | if (!next) { | ||
| 5155 | if (reada && level == 1) | ||
| 5156 | reada_walk_down(trans, root, wc, path); | ||
| 5157 | next = read_tree_block(root, bytenr, blocksize, generation); | ||
| 5158 | btrfs_tree_lock(next); | ||
| 5159 | btrfs_set_lock_blocking(next); | ||
| 5160 | } | ||
| 5161 | |||
| 5162 | level--; | ||
| 5163 | BUG_ON(level != btrfs_header_level(next)); | ||
| 5164 | path->nodes[level] = next; | ||
| 5165 | path->slots[level] = 0; | ||
| 5166 | path->locks[level] = 1; | ||
| 5167 | wc->level = level; | ||
| 5168 | if (wc->level == 1) | ||
| 5169 | wc->reada_slot = 0; | ||
| 5170 | return 0; | ||
| 5171 | skip: | ||
| 5172 | wc->refs[level - 1] = 0; | ||
| 5173 | wc->flags[level - 1] = 0; | ||
| 5174 | if (wc->stage == DROP_REFERENCE) { | ||
| 5175 | if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { | ||
| 5176 | parent = path->nodes[level]->start; | ||
| 5177 | } else { | ||
| 5178 | BUG_ON(root->root_key.objectid != | ||
| 5179 | btrfs_header_owner(path->nodes[level])); | ||
| 5180 | parent = 0; | ||
| 5181 | } | ||
| 5182 | |||
| 5183 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, | ||
| 5184 | root->root_key.objectid, level - 1, 0); | ||
| 5185 | BUG_ON(ret); | ||
| 5186 | } | ||
| 5187 | btrfs_tree_unlock(next); | ||
| 5188 | free_extent_buffer(next); | ||
| 5189 | *lookup_info = 1; | ||
| 5190 | return 1; | ||
| 5191 | } | ||
| 5192 | |||
| 5193 | /* | ||
| 4881 | * hepler to process tree block while walking up the tree. | 5194 | * hepler to process tree block while walking up the tree. |
| 4882 | * | 5195 | * |
| 4883 | * when wc->stage == DROP_REFERENCE, this function drops | 5196 | * when wc->stage == DROP_REFERENCE, this function drops |
| @@ -4904,7 +5217,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
| 4904 | if (level < wc->shared_level) | 5217 | if (level < wc->shared_level) |
| 4905 | goto out; | 5218 | goto out; |
| 4906 | 5219 | ||
| 4907 | BUG_ON(wc->refs[level] <= 1); | ||
| 4908 | ret = find_next_key(path, level + 1, &wc->update_progress); | 5220 | ret = find_next_key(path, level + 1, &wc->update_progress); |
| 4909 | if (ret > 0) | 5221 | if (ret > 0) |
| 4910 | wc->update_ref = 0; | 5222 | wc->update_ref = 0; |
| @@ -4935,8 +5247,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
| 4935 | path->locks[level] = 0; | 5247 | path->locks[level] = 0; |
| 4936 | return 1; | 5248 | return 1; |
| 4937 | } | 5249 | } |
| 4938 | } else { | ||
| 4939 | BUG_ON(level != 0); | ||
| 4940 | } | 5250 | } |
| 4941 | } | 5251 | } |
| 4942 | 5252 | ||
| @@ -4989,39 +5299,28 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
| 4989 | struct btrfs_path *path, | 5299 | struct btrfs_path *path, |
| 4990 | struct walk_control *wc) | 5300 | struct walk_control *wc) |
| 4991 | { | 5301 | { |
| 4992 | struct extent_buffer *next; | ||
| 4993 | struct extent_buffer *cur; | ||
| 4994 | u64 bytenr; | ||
| 4995 | u64 ptr_gen; | ||
| 4996 | u32 blocksize; | ||
| 4997 | int level = wc->level; | 5302 | int level = wc->level; |
| 5303 | int lookup_info = 1; | ||
| 4998 | int ret; | 5304 | int ret; |
| 4999 | 5305 | ||
| 5000 | while (level >= 0) { | 5306 | while (level >= 0) { |
| 5001 | cur = path->nodes[level]; | 5307 | if (path->slots[level] >= |
| 5002 | BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); | 5308 | btrfs_header_nritems(path->nodes[level])) |
| 5309 | break; | ||
| 5003 | 5310 | ||
| 5004 | ret = walk_down_proc(trans, root, path, wc); | 5311 | ret = walk_down_proc(trans, root, path, wc, lookup_info); |
| 5005 | if (ret > 0) | 5312 | if (ret > 0) |
| 5006 | break; | 5313 | break; |
| 5007 | 5314 | ||
| 5008 | if (level == 0) | 5315 | if (level == 0) |
| 5009 | break; | 5316 | break; |
| 5010 | 5317 | ||
| 5011 | bytenr = btrfs_node_blockptr(cur, path->slots[level]); | 5318 | ret = do_walk_down(trans, root, path, wc, &lookup_info); |
| 5012 | blocksize = btrfs_level_size(root, level - 1); | 5319 | if (ret > 0) { |
| 5013 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); | 5320 | path->slots[level]++; |
| 5014 | 5321 | continue; | |
| 5015 | next = read_tree_block(root, bytenr, blocksize, ptr_gen); | 5322 | } |
| 5016 | btrfs_tree_lock(next); | 5323 | level = wc->level; |
| 5017 | btrfs_set_lock_blocking(next); | ||
| 5018 | |||
| 5019 | level--; | ||
| 5020 | BUG_ON(level != btrfs_header_level(next)); | ||
| 5021 | path->nodes[level] = next; | ||
| 5022 | path->slots[level] = 0; | ||
| 5023 | path->locks[level] = 1; | ||
| 5024 | wc->level = level; | ||
| 5025 | } | 5324 | } |
| 5026 | return 0; | 5325 | return 0; |
| 5027 | } | 5326 | } |
| @@ -5111,9 +5410,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
| 5111 | err = ret; | 5410 | err = ret; |
| 5112 | goto out; | 5411 | goto out; |
| 5113 | } | 5412 | } |
| 5114 | btrfs_node_key_to_cpu(path->nodes[level], &key, | 5413 | WARN_ON(ret > 0); |
| 5115 | path->slots[level]); | ||
| 5116 | WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key))); | ||
| 5117 | 5414 | ||
| 5118 | /* | 5415 | /* |
| 5119 | * unlock our path, this is safe because only this | 5416 | * unlock our path, this is safe because only this |
| @@ -5148,6 +5445,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
| 5148 | wc->stage = DROP_REFERENCE; | 5445 | wc->stage = DROP_REFERENCE; |
| 5149 | wc->update_ref = update_ref; | 5446 | wc->update_ref = update_ref; |
| 5150 | wc->keep_locks = 0; | 5447 | wc->keep_locks = 0; |
| 5448 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); | ||
| 5151 | 5449 | ||
| 5152 | while (1) { | 5450 | while (1) { |
| 5153 | ret = walk_down_tree(trans, root, path, wc); | 5451 | ret = walk_down_tree(trans, root, path, wc); |
| @@ -5200,9 +5498,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
| 5200 | ret = btrfs_del_root(trans, tree_root, &root->root_key); | 5498 | ret = btrfs_del_root(trans, tree_root, &root->root_key); |
| 5201 | BUG_ON(ret); | 5499 | BUG_ON(ret); |
| 5202 | 5500 | ||
| 5203 | free_extent_buffer(root->node); | 5501 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { |
| 5204 | free_extent_buffer(root->commit_root); | 5502 | ret = btrfs_find_last_root(tree_root, root->root_key.objectid, |
| 5205 | kfree(root); | 5503 | NULL, NULL); |
| 5504 | BUG_ON(ret < 0); | ||
| 5505 | if (ret > 0) { | ||
| 5506 | ret = btrfs_del_orphan_item(trans, tree_root, | ||
| 5507 | root->root_key.objectid); | ||
| 5508 | BUG_ON(ret); | ||
| 5509 | } | ||
| 5510 | } | ||
| 5511 | |||
| 5512 | if (root->in_radix) { | ||
| 5513 | btrfs_free_fs_root(tree_root->fs_info, root); | ||
| 5514 | } else { | ||
| 5515 | free_extent_buffer(root->node); | ||
| 5516 | free_extent_buffer(root->commit_root); | ||
| 5517 | kfree(root); | ||
| 5518 | } | ||
| 5206 | out: | 5519 | out: |
| 5207 | btrfs_end_transaction(trans, tree_root); | 5520 | btrfs_end_transaction(trans, tree_root); |
| 5208 | kfree(wc); | 5521 | kfree(wc); |
| @@ -5254,6 +5567,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
| 5254 | wc->stage = DROP_REFERENCE; | 5567 | wc->stage = DROP_REFERENCE; |
| 5255 | wc->update_ref = 0; | 5568 | wc->update_ref = 0; |
| 5256 | wc->keep_locks = 1; | 5569 | wc->keep_locks = 1; |
| 5570 | wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); | ||
| 5257 | 5571 | ||
| 5258 | while (1) { | 5572 | while (1) { |
| 5259 | wret = walk_down_tree(trans, root, path, wc); | 5573 | wret = walk_down_tree(trans, root, path, wc); |
| @@ -5396,9 +5710,9 @@ static noinline int relocate_data_extent(struct inode *reloc_inode, | |||
| 5396 | lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); | 5710 | lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); |
| 5397 | while (1) { | 5711 | while (1) { |
| 5398 | int ret; | 5712 | int ret; |
| 5399 | spin_lock(&em_tree->lock); | 5713 | write_lock(&em_tree->lock); |
| 5400 | ret = add_extent_mapping(em_tree, em); | 5714 | ret = add_extent_mapping(em_tree, em); |
| 5401 | spin_unlock(&em_tree->lock); | 5715 | write_unlock(&em_tree->lock); |
| 5402 | if (ret != -EEXIST) { | 5716 | if (ret != -EEXIST) { |
| 5403 | free_extent_map(em); | 5717 | free_extent_map(em); |
| 5404 | break; | 5718 | break; |
| @@ -6841,287 +7155,86 @@ int btrfs_prepare_block_group_relocation(struct btrfs_root *root, | |||
| 6841 | return 0; | 7155 | return 0; |
| 6842 | } | 7156 | } |
| 6843 | 7157 | ||
| 6844 | #if 0 | 7158 | /* |
| 6845 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | 7159 | * checks to see if its even possible to relocate this block group. |
| 6846 | struct btrfs_root *root, | 7160 | * |
| 6847 | u64 objectid, u64 size) | 7161 | * @return - -1 if it's not a good idea to relocate this block group, 0 if its |
| 6848 | { | 7162 | * ok to go ahead and try. |
| 6849 | struct btrfs_path *path; | 7163 | */ |
| 6850 | struct btrfs_inode_item *item; | 7164 | int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) |
| 6851 | struct extent_buffer *leaf; | ||
| 6852 | int ret; | ||
| 6853 | |||
| 6854 | path = btrfs_alloc_path(); | ||
| 6855 | if (!path) | ||
| 6856 | return -ENOMEM; | ||
| 6857 | |||
| 6858 | path->leave_spinning = 1; | ||
| 6859 | ret = btrfs_insert_empty_inode(trans, root, path, objectid); | ||
| 6860 | if (ret) | ||
| 6861 | goto out; | ||
| 6862 | |||
| 6863 | leaf = path->nodes[0]; | ||
| 6864 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); | ||
| 6865 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); | ||
| 6866 | btrfs_set_inode_generation(leaf, item, 1); | ||
| 6867 | btrfs_set_inode_size(leaf, item, size); | ||
| 6868 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | ||
| 6869 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); | ||
| 6870 | btrfs_mark_buffer_dirty(leaf); | ||
| 6871 | btrfs_release_path(root, path); | ||
| 6872 | out: | ||
| 6873 | btrfs_free_path(path); | ||
| 6874 | return ret; | ||
| 6875 | } | ||
| 6876 | |||
| 6877 | static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | ||
| 6878 | struct btrfs_block_group_cache *group) | ||
| 6879 | { | 7165 | { |
| 6880 | struct inode *inode = NULL; | 7166 | struct btrfs_block_group_cache *block_group; |
| 6881 | struct btrfs_trans_handle *trans; | 7167 | struct btrfs_space_info *space_info; |
| 6882 | struct btrfs_root *root; | 7168 | struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; |
| 6883 | struct btrfs_key root_key; | 7169 | struct btrfs_device *device; |
| 6884 | u64 objectid = BTRFS_FIRST_FREE_OBJECTID; | 7170 | int full = 0; |
| 6885 | int err = 0; | 7171 | int ret = 0; |
| 6886 | 7172 | ||
| 6887 | root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; | 7173 | block_group = btrfs_lookup_block_group(root->fs_info, bytenr); |
| 6888 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
| 6889 | root_key.offset = (u64)-1; | ||
| 6890 | root = btrfs_read_fs_root_no_name(fs_info, &root_key); | ||
| 6891 | if (IS_ERR(root)) | ||
| 6892 | return ERR_CAST(root); | ||
| 6893 | 7174 | ||
| 6894 | trans = btrfs_start_transaction(root, 1); | 7175 | /* odd, couldn't find the block group, leave it alone */ |
| 6895 | BUG_ON(!trans); | 7176 | if (!block_group) |
| 7177 | return -1; | ||
| 6896 | 7178 | ||
| 6897 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); | 7179 | /* no bytes used, we're good */ |
| 6898 | if (err) | 7180 | if (!btrfs_block_group_used(&block_group->item)) |
| 6899 | goto out; | 7181 | goto out; |
| 6900 | 7182 | ||
| 6901 | err = __insert_orphan_inode(trans, root, objectid, group->key.offset); | 7183 | space_info = block_group->space_info; |
| 6902 | BUG_ON(err); | 7184 | spin_lock(&space_info->lock); |
| 6903 | |||
| 6904 | err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, | ||
| 6905 | group->key.offset, 0, group->key.offset, | ||
| 6906 | 0, 0, 0); | ||
| 6907 | BUG_ON(err); | ||
| 6908 | |||
| 6909 | inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); | ||
| 6910 | if (inode->i_state & I_NEW) { | ||
| 6911 | BTRFS_I(inode)->root = root; | ||
| 6912 | BTRFS_I(inode)->location.objectid = objectid; | ||
| 6913 | BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; | ||
| 6914 | BTRFS_I(inode)->location.offset = 0; | ||
| 6915 | btrfs_read_locked_inode(inode); | ||
| 6916 | unlock_new_inode(inode); | ||
| 6917 | BUG_ON(is_bad_inode(inode)); | ||
| 6918 | } else { | ||
| 6919 | BUG_ON(1); | ||
| 6920 | } | ||
| 6921 | BTRFS_I(inode)->index_cnt = group->key.objectid; | ||
| 6922 | |||
| 6923 | err = btrfs_orphan_add(trans, inode); | ||
| 6924 | out: | ||
| 6925 | btrfs_end_transaction(trans, root); | ||
| 6926 | if (err) { | ||
| 6927 | if (inode) | ||
| 6928 | iput(inode); | ||
| 6929 | inode = ERR_PTR(err); | ||
| 6930 | } | ||
| 6931 | return inode; | ||
| 6932 | } | ||
| 6933 | |||
| 6934 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) | ||
| 6935 | { | ||
| 6936 | |||
| 6937 | struct btrfs_ordered_sum *sums; | ||
| 6938 | struct btrfs_sector_sum *sector_sum; | ||
| 6939 | struct btrfs_ordered_extent *ordered; | ||
| 6940 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 6941 | struct list_head list; | ||
| 6942 | size_t offset; | ||
| 6943 | int ret; | ||
| 6944 | u64 disk_bytenr; | ||
| 6945 | |||
| 6946 | INIT_LIST_HEAD(&list); | ||
| 6947 | |||
| 6948 | ordered = btrfs_lookup_ordered_extent(inode, file_pos); | ||
| 6949 | BUG_ON(ordered->file_offset != file_pos || ordered->len != len); | ||
| 6950 | |||
| 6951 | disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; | ||
| 6952 | ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, | ||
| 6953 | disk_bytenr + len - 1, &list); | ||
| 6954 | |||
| 6955 | while (!list_empty(&list)) { | ||
| 6956 | sums = list_entry(list.next, struct btrfs_ordered_sum, list); | ||
| 6957 | list_del_init(&sums->list); | ||
| 6958 | |||
| 6959 | sector_sum = sums->sums; | ||
| 6960 | sums->bytenr = ordered->start; | ||
| 6961 | 7185 | ||
| 6962 | offset = 0; | 7186 | full = space_info->full; |
| 6963 | while (offset < sums->len) { | ||
| 6964 | sector_sum->bytenr += ordered->start - disk_bytenr; | ||
| 6965 | sector_sum++; | ||
| 6966 | offset += root->sectorsize; | ||
| 6967 | } | ||
| 6968 | 7187 | ||
| 6969 | btrfs_add_ordered_sum(inode, ordered, sums); | 7188 | /* |
| 7189 | * if this is the last block group we have in this space, we can't | ||
| 7190 | * relocate it unless we're able to allocate a new chunk below. | ||
| 7191 | * | ||
| 7192 | * Otherwise, we need to make sure we have room in the space to handle | ||
| 7193 | * all of the extents from this block group. If we can, we're good | ||
| 7194 | */ | ||
| 7195 | if ((space_info->total_bytes != block_group->key.offset) && | ||
| 7196 | (space_info->bytes_used + space_info->bytes_reserved + | ||
| 7197 | space_info->bytes_pinned + space_info->bytes_readonly + | ||
| 7198 | btrfs_block_group_used(&block_group->item) < | ||
| 7199 | space_info->total_bytes)) { | ||
| 7200 | spin_unlock(&space_info->lock); | ||
| 7201 | goto out; | ||
| 6970 | } | 7202 | } |
| 6971 | btrfs_put_ordered_extent(ordered); | 7203 | spin_unlock(&space_info->lock); |
| 6972 | return 0; | ||
| 6973 | } | ||
| 6974 | |||
| 6975 | int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) | ||
| 6976 | { | ||
| 6977 | struct btrfs_trans_handle *trans; | ||
| 6978 | struct btrfs_path *path; | ||
| 6979 | struct btrfs_fs_info *info = root->fs_info; | ||
| 6980 | struct extent_buffer *leaf; | ||
| 6981 | struct inode *reloc_inode; | ||
| 6982 | struct btrfs_block_group_cache *block_group; | ||
| 6983 | struct btrfs_key key; | ||
| 6984 | u64 skipped; | ||
| 6985 | u64 cur_byte; | ||
| 6986 | u64 total_found; | ||
| 6987 | u32 nritems; | ||
| 6988 | int ret; | ||
| 6989 | int progress; | ||
| 6990 | int pass = 0; | ||
| 6991 | |||
| 6992 | root = root->fs_info->extent_root; | ||
| 6993 | |||
| 6994 | block_group = btrfs_lookup_block_group(info, group_start); | ||
| 6995 | BUG_ON(!block_group); | ||
| 6996 | |||
| 6997 | printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n", | ||
| 6998 | (unsigned long long)block_group->key.objectid, | ||
| 6999 | (unsigned long long)block_group->flags); | ||
| 7000 | |||
| 7001 | path = btrfs_alloc_path(); | ||
| 7002 | BUG_ON(!path); | ||
| 7003 | |||
| 7004 | reloc_inode = create_reloc_inode(info, block_group); | ||
| 7005 | BUG_ON(IS_ERR(reloc_inode)); | ||
| 7006 | |||
| 7007 | __alloc_chunk_for_shrink(root, block_group, 1); | ||
| 7008 | set_block_group_readonly(block_group); | ||
| 7009 | |||
| 7010 | btrfs_start_delalloc_inodes(info->tree_root); | ||
| 7011 | btrfs_wait_ordered_extents(info->tree_root, 0); | ||
| 7012 | again: | ||
| 7013 | skipped = 0; | ||
| 7014 | total_found = 0; | ||
| 7015 | progress = 0; | ||
| 7016 | key.objectid = block_group->key.objectid; | ||
| 7017 | key.offset = 0; | ||
| 7018 | key.type = 0; | ||
| 7019 | cur_byte = key.objectid; | ||
| 7020 | |||
| 7021 | trans = btrfs_start_transaction(info->tree_root, 1); | ||
| 7022 | btrfs_commit_transaction(trans, info->tree_root); | ||
| 7023 | 7204 | ||
| 7024 | mutex_lock(&root->fs_info->cleaner_mutex); | 7205 | /* |
| 7025 | btrfs_clean_old_snapshots(info->tree_root); | 7206 | * ok we don't have enough space, but maybe we have free space on our |
| 7026 | btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); | 7207 | * devices to allocate new chunks for relocation, so loop through our |
| 7027 | mutex_unlock(&root->fs_info->cleaner_mutex); | 7208 | * alloc devices and guess if we have enough space. However, if we |
| 7209 | * were marked as full, then we know there aren't enough chunks, and we | ||
| 7210 | * can just return. | ||
| 7211 | */ | ||
| 7212 | ret = -1; | ||
| 7213 | if (full) | ||
| 7214 | goto out; | ||
| 7028 | 7215 | ||
| 7029 | trans = btrfs_start_transaction(info->tree_root, 1); | 7216 | mutex_lock(&root->fs_info->chunk_mutex); |
| 7030 | btrfs_commit_transaction(trans, info->tree_root); | 7217 | list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { |
| 7218 | u64 min_free = btrfs_block_group_used(&block_group->item); | ||
| 7219 | u64 dev_offset, max_avail; | ||
| 7031 | 7220 | ||
| 7032 | while (1) { | 7221 | /* |
| 7033 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 7222 | * check to make sure we can actually find a chunk with enough |
| 7034 | if (ret < 0) | 7223 | * space to fit our block group in. |
| 7035 | goto out; | 7224 | */ |
| 7036 | next: | 7225 | if (device->total_bytes > device->bytes_used + min_free) { |
| 7037 | leaf = path->nodes[0]; | 7226 | ret = find_free_dev_extent(NULL, device, min_free, |
| 7038 | nritems = btrfs_header_nritems(leaf); | 7227 | &dev_offset, &max_avail); |
| 7039 | if (path->slots[0] >= nritems) { | 7228 | if (!ret) |
| 7040 | ret = btrfs_next_leaf(root, path); | ||
| 7041 | if (ret < 0) | ||
| 7042 | goto out; | ||
| 7043 | if (ret == 1) { | ||
| 7044 | ret = 0; | ||
| 7045 | break; | 7229 | break; |
| 7046 | } | 7230 | ret = -1; |
| 7047 | leaf = path->nodes[0]; | ||
| 7048 | nritems = btrfs_header_nritems(leaf); | ||
| 7049 | } | ||
| 7050 | |||
| 7051 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
| 7052 | |||
| 7053 | if (key.objectid >= block_group->key.objectid + | ||
| 7054 | block_group->key.offset) | ||
| 7055 | break; | ||
| 7056 | |||
| 7057 | if (progress && need_resched()) { | ||
| 7058 | btrfs_release_path(root, path); | ||
| 7059 | cond_resched(); | ||
| 7060 | progress = 0; | ||
| 7061 | continue; | ||
| 7062 | } | ||
| 7063 | progress = 1; | ||
| 7064 | |||
| 7065 | if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY || | ||
| 7066 | key.objectid + key.offset <= cur_byte) { | ||
| 7067 | path->slots[0]++; | ||
| 7068 | goto next; | ||
| 7069 | } | ||
| 7070 | |||
| 7071 | total_found++; | ||
| 7072 | cur_byte = key.objectid + key.offset; | ||
| 7073 | btrfs_release_path(root, path); | ||
| 7074 | |||
| 7075 | __alloc_chunk_for_shrink(root, block_group, 0); | ||
| 7076 | ret = relocate_one_extent(root, path, &key, block_group, | ||
| 7077 | reloc_inode, pass); | ||
| 7078 | BUG_ON(ret < 0); | ||
| 7079 | if (ret > 0) | ||
| 7080 | skipped++; | ||
| 7081 | |||
| 7082 | key.objectid = cur_byte; | ||
| 7083 | key.type = 0; | ||
| 7084 | key.offset = 0; | ||
| 7085 | } | ||
| 7086 | |||
| 7087 | btrfs_release_path(root, path); | ||
| 7088 | |||
| 7089 | if (pass == 0) { | ||
| 7090 | btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); | ||
| 7091 | invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1); | ||
| 7092 | } | ||
| 7093 | |||
| 7094 | if (total_found > 0) { | ||
| 7095 | printk(KERN_INFO "btrfs found %llu extents in pass %d\n", | ||
| 7096 | (unsigned long long)total_found, pass); | ||
| 7097 | pass++; | ||
| 7098 | if (total_found == skipped && pass > 2) { | ||
| 7099 | iput(reloc_inode); | ||
| 7100 | reloc_inode = create_reloc_inode(info, block_group); | ||
| 7101 | pass = 0; | ||
| 7102 | } | 7231 | } |
| 7103 | goto again; | ||
| 7104 | } | 7232 | } |
| 7105 | 7233 | mutex_unlock(&root->fs_info->chunk_mutex); | |
| 7106 | /* delete reloc_inode */ | ||
| 7107 | iput(reloc_inode); | ||
| 7108 | |||
| 7109 | /* unpin extents in this range */ | ||
| 7110 | trans = btrfs_start_transaction(info->tree_root, 1); | ||
| 7111 | btrfs_commit_transaction(trans, info->tree_root); | ||
| 7112 | |||
| 7113 | spin_lock(&block_group->lock); | ||
| 7114 | WARN_ON(block_group->pinned > 0); | ||
| 7115 | WARN_ON(block_group->reserved > 0); | ||
| 7116 | WARN_ON(btrfs_block_group_used(&block_group->item) > 0); | ||
| 7117 | spin_unlock(&block_group->lock); | ||
| 7118 | btrfs_put_block_group(block_group); | ||
| 7119 | ret = 0; | ||
| 7120 | out: | 7234 | out: |
| 7121 | btrfs_free_path(path); | 7235 | btrfs_put_block_group(block_group); |
| 7122 | return ret; | 7236 | return ret; |
| 7123 | } | 7237 | } |
| 7124 | #endif | ||
| 7125 | 7238 | ||
| 7126 | static int find_first_block_group(struct btrfs_root *root, | 7239 | static int find_first_block_group(struct btrfs_root *root, |
| 7127 | struct btrfs_path *path, struct btrfs_key *key) | 7240 | struct btrfs_path *path, struct btrfs_key *key) |
| @@ -7164,8 +7277,18 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
| 7164 | { | 7277 | { |
| 7165 | struct btrfs_block_group_cache *block_group; | 7278 | struct btrfs_block_group_cache *block_group; |
| 7166 | struct btrfs_space_info *space_info; | 7279 | struct btrfs_space_info *space_info; |
| 7280 | struct btrfs_caching_control *caching_ctl; | ||
| 7167 | struct rb_node *n; | 7281 | struct rb_node *n; |
| 7168 | 7282 | ||
| 7283 | down_write(&info->extent_commit_sem); | ||
| 7284 | while (!list_empty(&info->caching_block_groups)) { | ||
| 7285 | caching_ctl = list_entry(info->caching_block_groups.next, | ||
| 7286 | struct btrfs_caching_control, list); | ||
| 7287 | list_del(&caching_ctl->list); | ||
| 7288 | put_caching_control(caching_ctl); | ||
| 7289 | } | ||
| 7290 | up_write(&info->extent_commit_sem); | ||
| 7291 | |||
| 7169 | spin_lock(&info->block_group_cache_lock); | 7292 | spin_lock(&info->block_group_cache_lock); |
| 7170 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { | 7293 | while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { |
| 7171 | block_group = rb_entry(n, struct btrfs_block_group_cache, | 7294 | block_group = rb_entry(n, struct btrfs_block_group_cache, |
| @@ -7179,8 +7302,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
| 7179 | up_write(&block_group->space_info->groups_sem); | 7302 | up_write(&block_group->space_info->groups_sem); |
| 7180 | 7303 | ||
| 7181 | if (block_group->cached == BTRFS_CACHE_STARTED) | 7304 | if (block_group->cached == BTRFS_CACHE_STARTED) |
| 7182 | wait_event(block_group->caching_q, | 7305 | wait_block_group_cache_done(block_group); |
| 7183 | block_group_cache_done(block_group)); | ||
| 7184 | 7306 | ||
| 7185 | btrfs_remove_free_space_cache(block_group); | 7307 | btrfs_remove_free_space_cache(block_group); |
| 7186 | 7308 | ||
| @@ -7250,7 +7372,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7250 | spin_lock_init(&cache->lock); | 7372 | spin_lock_init(&cache->lock); |
| 7251 | spin_lock_init(&cache->tree_lock); | 7373 | spin_lock_init(&cache->tree_lock); |
| 7252 | cache->fs_info = info; | 7374 | cache->fs_info = info; |
| 7253 | init_waitqueue_head(&cache->caching_q); | ||
| 7254 | INIT_LIST_HEAD(&cache->list); | 7375 | INIT_LIST_HEAD(&cache->list); |
| 7255 | INIT_LIST_HEAD(&cache->cluster_list); | 7376 | INIT_LIST_HEAD(&cache->cluster_list); |
| 7256 | 7377 | ||
| @@ -7272,8 +7393,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7272 | cache->flags = btrfs_block_group_flags(&cache->item); | 7393 | cache->flags = btrfs_block_group_flags(&cache->item); |
| 7273 | cache->sectorsize = root->sectorsize; | 7394 | cache->sectorsize = root->sectorsize; |
| 7274 | 7395 | ||
| 7275 | remove_sb_from_cache(root, cache); | ||
| 7276 | |||
| 7277 | /* | 7396 | /* |
| 7278 | * check for two cases, either we are full, and therefore | 7397 | * check for two cases, either we are full, and therefore |
| 7279 | * don't need to bother with the caching work since we won't | 7398 | * don't need to bother with the caching work since we won't |
| @@ -7282,13 +7401,19 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7282 | * time, particularly in the full case. | 7401 | * time, particularly in the full case. |
| 7283 | */ | 7402 | */ |
| 7284 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { | 7403 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { |
| 7404 | exclude_super_stripes(root, cache); | ||
| 7405 | cache->last_byte_to_unpin = (u64)-1; | ||
| 7285 | cache->cached = BTRFS_CACHE_FINISHED; | 7406 | cache->cached = BTRFS_CACHE_FINISHED; |
| 7407 | free_excluded_extents(root, cache); | ||
| 7286 | } else if (btrfs_block_group_used(&cache->item) == 0) { | 7408 | } else if (btrfs_block_group_used(&cache->item) == 0) { |
| 7409 | exclude_super_stripes(root, cache); | ||
| 7410 | cache->last_byte_to_unpin = (u64)-1; | ||
| 7287 | cache->cached = BTRFS_CACHE_FINISHED; | 7411 | cache->cached = BTRFS_CACHE_FINISHED; |
| 7288 | add_new_free_space(cache, root->fs_info, | 7412 | add_new_free_space(cache, root->fs_info, |
| 7289 | found_key.objectid, | 7413 | found_key.objectid, |
| 7290 | found_key.objectid + | 7414 | found_key.objectid + |
| 7291 | found_key.offset); | 7415 | found_key.offset); |
| 7416 | free_excluded_extents(root, cache); | ||
| 7292 | } | 7417 | } |
| 7293 | 7418 | ||
| 7294 | ret = update_space_info(info, cache->flags, found_key.offset, | 7419 | ret = update_space_info(info, cache->flags, found_key.offset, |
| @@ -7296,6 +7421,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7296 | &space_info); | 7421 | &space_info); |
| 7297 | BUG_ON(ret); | 7422 | BUG_ON(ret); |
| 7298 | cache->space_info = space_info; | 7423 | cache->space_info = space_info; |
| 7424 | spin_lock(&cache->space_info->lock); | ||
| 7425 | cache->space_info->bytes_super += cache->bytes_super; | ||
| 7426 | spin_unlock(&cache->space_info->lock); | ||
| 7427 | |||
| 7299 | down_write(&space_info->groups_sem); | 7428 | down_write(&space_info->groups_sem); |
| 7300 | list_add_tail(&cache->list, &space_info->block_groups); | 7429 | list_add_tail(&cache->list, &space_info->block_groups); |
| 7301 | up_write(&space_info->groups_sem); | 7430 | up_write(&space_info->groups_sem); |
| @@ -7345,7 +7474,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
| 7345 | atomic_set(&cache->count, 1); | 7474 | atomic_set(&cache->count, 1); |
| 7346 | spin_lock_init(&cache->lock); | 7475 | spin_lock_init(&cache->lock); |
| 7347 | spin_lock_init(&cache->tree_lock); | 7476 | spin_lock_init(&cache->tree_lock); |
| 7348 | init_waitqueue_head(&cache->caching_q); | ||
| 7349 | INIT_LIST_HEAD(&cache->list); | 7477 | INIT_LIST_HEAD(&cache->list); |
| 7350 | INIT_LIST_HEAD(&cache->cluster_list); | 7478 | INIT_LIST_HEAD(&cache->cluster_list); |
| 7351 | 7479 | ||
| @@ -7354,15 +7482,23 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
| 7354 | cache->flags = type; | 7482 | cache->flags = type; |
| 7355 | btrfs_set_block_group_flags(&cache->item, type); | 7483 | btrfs_set_block_group_flags(&cache->item, type); |
| 7356 | 7484 | ||
| 7485 | cache->last_byte_to_unpin = (u64)-1; | ||
| 7357 | cache->cached = BTRFS_CACHE_FINISHED; | 7486 | cache->cached = BTRFS_CACHE_FINISHED; |
| 7358 | remove_sb_from_cache(root, cache); | 7487 | exclude_super_stripes(root, cache); |
| 7359 | 7488 | ||
| 7360 | add_new_free_space(cache, root->fs_info, chunk_offset, | 7489 | add_new_free_space(cache, root->fs_info, chunk_offset, |
| 7361 | chunk_offset + size); | 7490 | chunk_offset + size); |
| 7362 | 7491 | ||
| 7492 | free_excluded_extents(root, cache); | ||
| 7493 | |||
| 7363 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, | 7494 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, |
| 7364 | &cache->space_info); | 7495 | &cache->space_info); |
| 7365 | BUG_ON(ret); | 7496 | BUG_ON(ret); |
| 7497 | |||
| 7498 | spin_lock(&cache->space_info->lock); | ||
| 7499 | cache->space_info->bytes_super += cache->bytes_super; | ||
| 7500 | spin_unlock(&cache->space_info->lock); | ||
| 7501 | |||
| 7366 | down_write(&cache->space_info->groups_sem); | 7502 | down_write(&cache->space_info->groups_sem); |
| 7367 | list_add_tail(&cache->list, &cache->space_info->block_groups); | 7503 | list_add_tail(&cache->list, &cache->space_info->block_groups); |
| 7368 | up_write(&cache->space_info->groups_sem); | 7504 | up_write(&cache->space_info->groups_sem); |
| @@ -7428,8 +7564,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
| 7428 | up_write(&block_group->space_info->groups_sem); | 7564 | up_write(&block_group->space_info->groups_sem); |
| 7429 | 7565 | ||
| 7430 | if (block_group->cached == BTRFS_CACHE_STARTED) | 7566 | if (block_group->cached == BTRFS_CACHE_STARTED) |
| 7431 | wait_event(block_group->caching_q, | 7567 | wait_block_group_cache_done(block_group); |
| 7432 | block_group_cache_done(block_group)); | ||
| 7433 | 7568 | ||
| 7434 | btrfs_remove_free_space_cache(block_group); | 7569 | btrfs_remove_free_space_cache(block_group); |
| 7435 | 7570 | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 68260180f587..96577e8bf9fd 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -280,6 +280,14 @@ static struct extent_buffer *buffer_search(struct extent_io_tree *tree, | |||
| 280 | return NULL; | 280 | return NULL; |
| 281 | } | 281 | } |
| 282 | 282 | ||
| 283 | static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, | ||
| 284 | struct extent_state *other) | ||
| 285 | { | ||
| 286 | if (tree->ops && tree->ops->merge_extent_hook) | ||
| 287 | tree->ops->merge_extent_hook(tree->mapping->host, new, | ||
| 288 | other); | ||
| 289 | } | ||
| 290 | |||
| 283 | /* | 291 | /* |
| 284 | * utility function to look for merge candidates inside a given range. | 292 | * utility function to look for merge candidates inside a given range. |
| 285 | * Any extents with matching state are merged together into a single | 293 | * Any extents with matching state are merged together into a single |
| @@ -303,6 +311,7 @@ static int merge_state(struct extent_io_tree *tree, | |||
| 303 | other = rb_entry(other_node, struct extent_state, rb_node); | 311 | other = rb_entry(other_node, struct extent_state, rb_node); |
| 304 | if (other->end == state->start - 1 && | 312 | if (other->end == state->start - 1 && |
| 305 | other->state == state->state) { | 313 | other->state == state->state) { |
| 314 | merge_cb(tree, state, other); | ||
| 306 | state->start = other->start; | 315 | state->start = other->start; |
| 307 | other->tree = NULL; | 316 | other->tree = NULL; |
| 308 | rb_erase(&other->rb_node, &tree->state); | 317 | rb_erase(&other->rb_node, &tree->state); |
| @@ -314,33 +323,37 @@ static int merge_state(struct extent_io_tree *tree, | |||
| 314 | other = rb_entry(other_node, struct extent_state, rb_node); | 323 | other = rb_entry(other_node, struct extent_state, rb_node); |
| 315 | if (other->start == state->end + 1 && | 324 | if (other->start == state->end + 1 && |
| 316 | other->state == state->state) { | 325 | other->state == state->state) { |
| 326 | merge_cb(tree, state, other); | ||
| 317 | other->start = state->start; | 327 | other->start = state->start; |
| 318 | state->tree = NULL; | 328 | state->tree = NULL; |
| 319 | rb_erase(&state->rb_node, &tree->state); | 329 | rb_erase(&state->rb_node, &tree->state); |
| 320 | free_extent_state(state); | 330 | free_extent_state(state); |
| 331 | state = NULL; | ||
| 321 | } | 332 | } |
| 322 | } | 333 | } |
| 334 | |||
| 323 | return 0; | 335 | return 0; |
| 324 | } | 336 | } |
| 325 | 337 | ||
| 326 | static void set_state_cb(struct extent_io_tree *tree, | 338 | static int set_state_cb(struct extent_io_tree *tree, |
| 327 | struct extent_state *state, | 339 | struct extent_state *state, |
| 328 | unsigned long bits) | 340 | unsigned long bits) |
| 329 | { | 341 | { |
| 330 | if (tree->ops && tree->ops->set_bit_hook) { | 342 | if (tree->ops && tree->ops->set_bit_hook) { |
| 331 | tree->ops->set_bit_hook(tree->mapping->host, state->start, | 343 | return tree->ops->set_bit_hook(tree->mapping->host, |
| 332 | state->end, state->state, bits); | 344 | state->start, state->end, |
| 345 | state->state, bits); | ||
| 333 | } | 346 | } |
| 347 | |||
| 348 | return 0; | ||
| 334 | } | 349 | } |
| 335 | 350 | ||
| 336 | static void clear_state_cb(struct extent_io_tree *tree, | 351 | static void clear_state_cb(struct extent_io_tree *tree, |
| 337 | struct extent_state *state, | 352 | struct extent_state *state, |
| 338 | unsigned long bits) | 353 | unsigned long bits) |
| 339 | { | 354 | { |
| 340 | if (tree->ops && tree->ops->clear_bit_hook) { | 355 | if (tree->ops && tree->ops->clear_bit_hook) |
| 341 | tree->ops->clear_bit_hook(tree->mapping->host, state->start, | 356 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); |
| 342 | state->end, state->state, bits); | ||
| 343 | } | ||
| 344 | } | 357 | } |
| 345 | 358 | ||
| 346 | /* | 359 | /* |
| @@ -358,6 +371,7 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 358 | int bits) | 371 | int bits) |
| 359 | { | 372 | { |
| 360 | struct rb_node *node; | 373 | struct rb_node *node; |
| 374 | int ret; | ||
| 361 | 375 | ||
| 362 | if (end < start) { | 376 | if (end < start) { |
| 363 | printk(KERN_ERR "btrfs end < start %llu %llu\n", | 377 | printk(KERN_ERR "btrfs end < start %llu %llu\n", |
| @@ -365,12 +379,15 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 365 | (unsigned long long)start); | 379 | (unsigned long long)start); |
| 366 | WARN_ON(1); | 380 | WARN_ON(1); |
| 367 | } | 381 | } |
| 382 | state->start = start; | ||
| 383 | state->end = end; | ||
| 384 | ret = set_state_cb(tree, state, bits); | ||
| 385 | if (ret) | ||
| 386 | return ret; | ||
| 387 | |||
| 368 | if (bits & EXTENT_DIRTY) | 388 | if (bits & EXTENT_DIRTY) |
| 369 | tree->dirty_bytes += end - start + 1; | 389 | tree->dirty_bytes += end - start + 1; |
| 370 | set_state_cb(tree, state, bits); | ||
| 371 | state->state |= bits; | 390 | state->state |= bits; |
| 372 | state->start = start; | ||
| 373 | state->end = end; | ||
| 374 | node = tree_insert(&tree->state, end, &state->rb_node); | 391 | node = tree_insert(&tree->state, end, &state->rb_node); |
| 375 | if (node) { | 392 | if (node) { |
| 376 | struct extent_state *found; | 393 | struct extent_state *found; |
| @@ -387,6 +404,15 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 387 | return 0; | 404 | return 0; |
| 388 | } | 405 | } |
| 389 | 406 | ||
| 407 | static int split_cb(struct extent_io_tree *tree, struct extent_state *orig, | ||
| 408 | u64 split) | ||
| 409 | { | ||
| 410 | if (tree->ops && tree->ops->split_extent_hook) | ||
| 411 | return tree->ops->split_extent_hook(tree->mapping->host, | ||
| 412 | orig, split); | ||
| 413 | return 0; | ||
| 414 | } | ||
| 415 | |||
| 390 | /* | 416 | /* |
| 391 | * split a given extent state struct in two, inserting the preallocated | 417 | * split a given extent state struct in two, inserting the preallocated |
| 392 | * struct 'prealloc' as the newly created second half. 'split' indicates an | 418 | * struct 'prealloc' as the newly created second half. 'split' indicates an |
| @@ -405,6 +431,9 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
| 405 | struct extent_state *prealloc, u64 split) | 431 | struct extent_state *prealloc, u64 split) |
| 406 | { | 432 | { |
| 407 | struct rb_node *node; | 433 | struct rb_node *node; |
| 434 | |||
| 435 | split_cb(tree, orig, split); | ||
| 436 | |||
| 408 | prealloc->start = orig->start; | 437 | prealloc->start = orig->start; |
| 409 | prealloc->end = split - 1; | 438 | prealloc->end = split - 1; |
| 410 | prealloc->state = orig->state; | 439 | prealloc->state = orig->state; |
| @@ -431,7 +460,8 @@ static int clear_state_bit(struct extent_io_tree *tree, | |||
| 431 | struct extent_state *state, int bits, int wake, | 460 | struct extent_state *state, int bits, int wake, |
| 432 | int delete) | 461 | int delete) |
| 433 | { | 462 | { |
| 434 | int ret = state->state & bits; | 463 | int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING; |
| 464 | int ret = state->state & bits_to_clear; | ||
| 435 | 465 | ||
| 436 | if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { | 466 | if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { |
| 437 | u64 range = state->end - state->start + 1; | 467 | u64 range = state->end - state->start + 1; |
| @@ -439,7 +469,7 @@ static int clear_state_bit(struct extent_io_tree *tree, | |||
| 439 | tree->dirty_bytes -= range; | 469 | tree->dirty_bytes -= range; |
| 440 | } | 470 | } |
| 441 | clear_state_cb(tree, state, bits); | 471 | clear_state_cb(tree, state, bits); |
| 442 | state->state &= ~bits; | 472 | state->state &= ~bits_to_clear; |
| 443 | if (wake) | 473 | if (wake) |
| 444 | wake_up(&state->wq); | 474 | wake_up(&state->wq); |
| 445 | if (delete || state->state == 0) { | 475 | if (delete || state->state == 0) { |
| @@ -471,10 +501,14 @@ static int clear_state_bit(struct extent_io_tree *tree, | |||
| 471 | * bits were already set, or zero if none of the bits were already set. | 501 | * bits were already set, or zero if none of the bits were already set. |
| 472 | */ | 502 | */ |
| 473 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 503 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 474 | int bits, int wake, int delete, gfp_t mask) | 504 | int bits, int wake, int delete, |
| 505 | struct extent_state **cached_state, | ||
| 506 | gfp_t mask) | ||
| 475 | { | 507 | { |
| 476 | struct extent_state *state; | 508 | struct extent_state *state; |
| 509 | struct extent_state *cached; | ||
| 477 | struct extent_state *prealloc = NULL; | 510 | struct extent_state *prealloc = NULL; |
| 511 | struct rb_node *next_node; | ||
| 478 | struct rb_node *node; | 512 | struct rb_node *node; |
| 479 | u64 last_end; | 513 | u64 last_end; |
| 480 | int err; | 514 | int err; |
| @@ -488,6 +522,17 @@ again: | |||
| 488 | } | 522 | } |
| 489 | 523 | ||
| 490 | spin_lock(&tree->lock); | 524 | spin_lock(&tree->lock); |
| 525 | if (cached_state) { | ||
| 526 | cached = *cached_state; | ||
| 527 | *cached_state = NULL; | ||
| 528 | cached_state = NULL; | ||
| 529 | if (cached && cached->tree && cached->start == start) { | ||
| 530 | atomic_dec(&cached->refs); | ||
| 531 | state = cached; | ||
| 532 | goto hit_next; | ||
| 533 | } | ||
| 534 | free_extent_state(cached); | ||
| 535 | } | ||
| 491 | /* | 536 | /* |
| 492 | * this search will find the extents that end after | 537 | * this search will find the extents that end after |
| 493 | * our range starts | 538 | * our range starts |
| @@ -496,6 +541,7 @@ again: | |||
| 496 | if (!node) | 541 | if (!node) |
| 497 | goto out; | 542 | goto out; |
| 498 | state = rb_entry(node, struct extent_state, rb_node); | 543 | state = rb_entry(node, struct extent_state, rb_node); |
| 544 | hit_next: | ||
| 499 | if (state->start > end) | 545 | if (state->start > end) |
| 500 | goto out; | 546 | goto out; |
| 501 | WARN_ON(state->end < start); | 547 | WARN_ON(state->end < start); |
| @@ -526,13 +572,11 @@ again: | |||
| 526 | if (err) | 572 | if (err) |
| 527 | goto out; | 573 | goto out; |
| 528 | if (state->end <= end) { | 574 | if (state->end <= end) { |
| 529 | set |= clear_state_bit(tree, state, bits, | 575 | set |= clear_state_bit(tree, state, bits, wake, |
| 530 | wake, delete); | 576 | delete); |
| 531 | if (last_end == (u64)-1) | 577 | if (last_end == (u64)-1) |
| 532 | goto out; | 578 | goto out; |
| 533 | start = last_end + 1; | 579 | start = last_end + 1; |
| 534 | } else { | ||
| 535 | start = state->start; | ||
| 536 | } | 580 | } |
| 537 | goto search_again; | 581 | goto search_again; |
| 538 | } | 582 | } |
| @@ -547,19 +591,30 @@ again: | |||
| 547 | prealloc = alloc_extent_state(GFP_ATOMIC); | 591 | prealloc = alloc_extent_state(GFP_ATOMIC); |
| 548 | err = split_state(tree, state, prealloc, end + 1); | 592 | err = split_state(tree, state, prealloc, end + 1); |
| 549 | BUG_ON(err == -EEXIST); | 593 | BUG_ON(err == -EEXIST); |
| 550 | |||
| 551 | if (wake) | 594 | if (wake) |
| 552 | wake_up(&state->wq); | 595 | wake_up(&state->wq); |
| 553 | set |= clear_state_bit(tree, prealloc, bits, | 596 | |
| 554 | wake, delete); | 597 | set |= clear_state_bit(tree, prealloc, bits, wake, delete); |
| 598 | |||
| 555 | prealloc = NULL; | 599 | prealloc = NULL; |
| 556 | goto out; | 600 | goto out; |
| 557 | } | 601 | } |
| 558 | 602 | ||
| 603 | if (state->end < end && prealloc && !need_resched()) | ||
| 604 | next_node = rb_next(&state->rb_node); | ||
| 605 | else | ||
| 606 | next_node = NULL; | ||
| 607 | |||
| 559 | set |= clear_state_bit(tree, state, bits, wake, delete); | 608 | set |= clear_state_bit(tree, state, bits, wake, delete); |
| 560 | if (last_end == (u64)-1) | 609 | if (last_end == (u64)-1) |
| 561 | goto out; | 610 | goto out; |
| 562 | start = last_end + 1; | 611 | start = last_end + 1; |
| 612 | if (start <= end && next_node) { | ||
| 613 | state = rb_entry(next_node, struct extent_state, | ||
| 614 | rb_node); | ||
| 615 | if (state->start == start) | ||
| 616 | goto hit_next; | ||
| 617 | } | ||
| 563 | goto search_again; | 618 | goto search_again; |
| 564 | 619 | ||
| 565 | out: | 620 | out: |
| @@ -641,40 +696,59 @@ out: | |||
| 641 | return 0; | 696 | return 0; |
| 642 | } | 697 | } |
| 643 | 698 | ||
| 644 | static void set_state_bits(struct extent_io_tree *tree, | 699 | static int set_state_bits(struct extent_io_tree *tree, |
| 645 | struct extent_state *state, | 700 | struct extent_state *state, |
| 646 | int bits) | 701 | int bits) |
| 647 | { | 702 | { |
| 703 | int ret; | ||
| 704 | |||
| 705 | ret = set_state_cb(tree, state, bits); | ||
| 706 | if (ret) | ||
| 707 | return ret; | ||
| 708 | |||
| 648 | if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | 709 | if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { |
| 649 | u64 range = state->end - state->start + 1; | 710 | u64 range = state->end - state->start + 1; |
| 650 | tree->dirty_bytes += range; | 711 | tree->dirty_bytes += range; |
| 651 | } | 712 | } |
| 652 | set_state_cb(tree, state, bits); | ||
| 653 | state->state |= bits; | 713 | state->state |= bits; |
| 714 | |||
| 715 | return 0; | ||
| 716 | } | ||
| 717 | |||
| 718 | static void cache_state(struct extent_state *state, | ||
| 719 | struct extent_state **cached_ptr) | ||
| 720 | { | ||
| 721 | if (cached_ptr && !(*cached_ptr)) { | ||
| 722 | if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) { | ||
| 723 | *cached_ptr = state; | ||
| 724 | atomic_inc(&state->refs); | ||
| 725 | } | ||
| 726 | } | ||
| 654 | } | 727 | } |
| 655 | 728 | ||
| 656 | /* | 729 | /* |
| 657 | * set some bits on a range in the tree. This may require allocations | 730 | * set some bits on a range in the tree. This may require allocations or |
| 658 | * or sleeping, so the gfp mask is used to indicate what is allowed. | 731 | * sleeping, so the gfp mask is used to indicate what is allowed. |
| 659 | * | 732 | * |
| 660 | * If 'exclusive' == 1, this will fail with -EEXIST if some part of the | 733 | * If any of the exclusive bits are set, this will fail with -EEXIST if some |
| 661 | * range already has the desired bits set. The start of the existing | 734 | * part of the range already has the desired bits set. The start of the |
| 662 | * range is returned in failed_start in this case. | 735 | * existing range is returned in failed_start in this case. |
| 663 | * | 736 | * |
| 664 | * [start, end] is inclusive | 737 | * [start, end] is inclusive This takes the tree lock. |
| 665 | * This takes the tree lock. | ||
| 666 | */ | 738 | */ |
| 739 | |||
| 667 | static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 740 | static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 668 | int bits, int exclusive, u64 *failed_start, | 741 | int bits, int exclusive_bits, u64 *failed_start, |
| 742 | struct extent_state **cached_state, | ||
| 669 | gfp_t mask) | 743 | gfp_t mask) |
| 670 | { | 744 | { |
| 671 | struct extent_state *state; | 745 | struct extent_state *state; |
| 672 | struct extent_state *prealloc = NULL; | 746 | struct extent_state *prealloc = NULL; |
| 673 | struct rb_node *node; | 747 | struct rb_node *node; |
| 674 | int err = 0; | 748 | int err = 0; |
| 675 | int set; | ||
| 676 | u64 last_start; | 749 | u64 last_start; |
| 677 | u64 last_end; | 750 | u64 last_end; |
| 751 | |||
| 678 | again: | 752 | again: |
| 679 | if (!prealloc && (mask & __GFP_WAIT)) { | 753 | if (!prealloc && (mask & __GFP_WAIT)) { |
| 680 | prealloc = alloc_extent_state(mask); | 754 | prealloc = alloc_extent_state(mask); |
| @@ -683,6 +757,13 @@ again: | |||
| 683 | } | 757 | } |
| 684 | 758 | ||
| 685 | spin_lock(&tree->lock); | 759 | spin_lock(&tree->lock); |
| 760 | if (cached_state && *cached_state) { | ||
| 761 | state = *cached_state; | ||
| 762 | if (state->start == start && state->tree) { | ||
| 763 | node = &state->rb_node; | ||
| 764 | goto hit_next; | ||
| 765 | } | ||
| 766 | } | ||
| 686 | /* | 767 | /* |
| 687 | * this search will find all the extents that end after | 768 | * this search will find all the extents that end after |
| 688 | * our range starts. | 769 | * our range starts. |
| @@ -694,8 +775,8 @@ again: | |||
| 694 | BUG_ON(err == -EEXIST); | 775 | BUG_ON(err == -EEXIST); |
| 695 | goto out; | 776 | goto out; |
| 696 | } | 777 | } |
| 697 | |||
| 698 | state = rb_entry(node, struct extent_state, rb_node); | 778 | state = rb_entry(node, struct extent_state, rb_node); |
| 779 | hit_next: | ||
| 699 | last_start = state->start; | 780 | last_start = state->start; |
| 700 | last_end = state->end; | 781 | last_end = state->end; |
| 701 | 782 | ||
| @@ -706,17 +787,32 @@ again: | |||
| 706 | * Just lock what we found and keep going | 787 | * Just lock what we found and keep going |
| 707 | */ | 788 | */ |
| 708 | if (state->start == start && state->end <= end) { | 789 | if (state->start == start && state->end <= end) { |
| 709 | set = state->state & bits; | 790 | struct rb_node *next_node; |
| 710 | if (set && exclusive) { | 791 | if (state->state & exclusive_bits) { |
| 711 | *failed_start = state->start; | 792 | *failed_start = state->start; |
| 712 | err = -EEXIST; | 793 | err = -EEXIST; |
| 713 | goto out; | 794 | goto out; |
| 714 | } | 795 | } |
| 715 | set_state_bits(tree, state, bits); | 796 | |
| 797 | err = set_state_bits(tree, state, bits); | ||
| 798 | if (err) | ||
| 799 | goto out; | ||
| 800 | |||
| 801 | cache_state(state, cached_state); | ||
| 716 | merge_state(tree, state); | 802 | merge_state(tree, state); |
| 717 | if (last_end == (u64)-1) | 803 | if (last_end == (u64)-1) |
| 718 | goto out; | 804 | goto out; |
| 805 | |||
| 719 | start = last_end + 1; | 806 | start = last_end + 1; |
| 807 | if (start < end && prealloc && !need_resched()) { | ||
| 808 | next_node = rb_next(node); | ||
| 809 | if (next_node) { | ||
| 810 | state = rb_entry(next_node, struct extent_state, | ||
| 811 | rb_node); | ||
| 812 | if (state->start == start) | ||
| 813 | goto hit_next; | ||
| 814 | } | ||
| 815 | } | ||
| 720 | goto search_again; | 816 | goto search_again; |
| 721 | } | 817 | } |
| 722 | 818 | ||
| @@ -737,8 +833,7 @@ again: | |||
| 737 | * desired bit on it. | 833 | * desired bit on it. |
| 738 | */ | 834 | */ |
| 739 | if (state->start < start) { | 835 | if (state->start < start) { |
| 740 | set = state->state & bits; | 836 | if (state->state & exclusive_bits) { |
| 741 | if (exclusive && set) { | ||
| 742 | *failed_start = start; | 837 | *failed_start = start; |
| 743 | err = -EEXIST; | 838 | err = -EEXIST; |
| 744 | goto out; | 839 | goto out; |
| @@ -749,13 +844,14 @@ again: | |||
| 749 | if (err) | 844 | if (err) |
| 750 | goto out; | 845 | goto out; |
| 751 | if (state->end <= end) { | 846 | if (state->end <= end) { |
| 752 | set_state_bits(tree, state, bits); | 847 | err = set_state_bits(tree, state, bits); |
| 848 | if (err) | ||
| 849 | goto out; | ||
| 850 | cache_state(state, cached_state); | ||
| 753 | merge_state(tree, state); | 851 | merge_state(tree, state); |
| 754 | if (last_end == (u64)-1) | 852 | if (last_end == (u64)-1) |
| 755 | goto out; | 853 | goto out; |
| 756 | start = last_end + 1; | 854 | start = last_end + 1; |
| 757 | } else { | ||
| 758 | start = state->start; | ||
| 759 | } | 855 | } |
| 760 | goto search_again; | 856 | goto search_again; |
| 761 | } | 857 | } |
| @@ -774,10 +870,13 @@ again: | |||
| 774 | this_end = last_start - 1; | 870 | this_end = last_start - 1; |
| 775 | err = insert_state(tree, prealloc, start, this_end, | 871 | err = insert_state(tree, prealloc, start, this_end, |
| 776 | bits); | 872 | bits); |
| 777 | prealloc = NULL; | ||
| 778 | BUG_ON(err == -EEXIST); | 873 | BUG_ON(err == -EEXIST); |
| 779 | if (err) | 874 | if (err) { |
| 875 | prealloc = NULL; | ||
| 780 | goto out; | 876 | goto out; |
| 877 | } | ||
| 878 | cache_state(prealloc, cached_state); | ||
| 879 | prealloc = NULL; | ||
| 781 | start = this_end + 1; | 880 | start = this_end + 1; |
| 782 | goto search_again; | 881 | goto search_again; |
| 783 | } | 882 | } |
| @@ -788,8 +887,7 @@ again: | |||
| 788 | * on the first half | 887 | * on the first half |
| 789 | */ | 888 | */ |
| 790 | if (state->start <= end && state->end > end) { | 889 | if (state->start <= end && state->end > end) { |
| 791 | set = state->state & bits; | 890 | if (state->state & exclusive_bits) { |
| 792 | if (exclusive && set) { | ||
| 793 | *failed_start = start; | 891 | *failed_start = start; |
| 794 | err = -EEXIST; | 892 | err = -EEXIST; |
| 795 | goto out; | 893 | goto out; |
| @@ -797,7 +895,12 @@ again: | |||
| 797 | err = split_state(tree, state, prealloc, end + 1); | 895 | err = split_state(tree, state, prealloc, end + 1); |
| 798 | BUG_ON(err == -EEXIST); | 896 | BUG_ON(err == -EEXIST); |
| 799 | 897 | ||
| 800 | set_state_bits(tree, prealloc, bits); | 898 | err = set_state_bits(tree, prealloc, bits); |
| 899 | if (err) { | ||
| 900 | prealloc = NULL; | ||
| 901 | goto out; | ||
| 902 | } | ||
| 903 | cache_state(prealloc, cached_state); | ||
| 801 | merge_state(tree, prealloc); | 904 | merge_state(tree, prealloc); |
| 802 | prealloc = NULL; | 905 | prealloc = NULL; |
| 803 | goto out; | 906 | goto out; |
| @@ -826,86 +929,65 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 826 | gfp_t mask) | 929 | gfp_t mask) |
| 827 | { | 930 | { |
| 828 | return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, | 931 | return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, |
| 829 | mask); | 932 | NULL, mask); |
| 830 | } | ||
| 831 | |||
| 832 | int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 833 | gfp_t mask) | ||
| 834 | { | ||
| 835 | return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask); | ||
| 836 | } | 933 | } |
| 837 | 934 | ||
| 838 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 935 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| 839 | int bits, gfp_t mask) | 936 | int bits, gfp_t mask) |
| 840 | { | 937 | { |
| 841 | return set_extent_bit(tree, start, end, bits, 0, NULL, | 938 | return set_extent_bit(tree, start, end, bits, 0, NULL, |
| 842 | mask); | 939 | NULL, mask); |
| 843 | } | 940 | } |
| 844 | 941 | ||
| 845 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 942 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| 846 | int bits, gfp_t mask) | 943 | int bits, gfp_t mask) |
| 847 | { | 944 | { |
| 848 | return clear_extent_bit(tree, start, end, bits, 0, 0, mask); | 945 | return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask); |
| 849 | } | 946 | } |
| 850 | 947 | ||
| 851 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | 948 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, |
| 852 | gfp_t mask) | 949 | gfp_t mask) |
| 853 | { | 950 | { |
| 854 | return set_extent_bit(tree, start, end, | 951 | return set_extent_bit(tree, start, end, |
| 855 | EXTENT_DELALLOC | EXTENT_DIRTY, | 952 | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE, |
| 856 | 0, NULL, mask); | 953 | 0, NULL, NULL, mask); |
| 857 | } | 954 | } |
| 858 | 955 | ||
| 859 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 956 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
| 860 | gfp_t mask) | 957 | gfp_t mask) |
| 861 | { | 958 | { |
| 862 | return clear_extent_bit(tree, start, end, | 959 | return clear_extent_bit(tree, start, end, |
| 863 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); | 960 | EXTENT_DIRTY | EXTENT_DELALLOC | |
| 864 | } | 961 | EXTENT_DO_ACCOUNTING, 0, 0, |
| 865 | 962 | NULL, mask); | |
| 866 | int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 867 | gfp_t mask) | ||
| 868 | { | ||
| 869 | return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask); | ||
| 870 | } | 963 | } |
| 871 | 964 | ||
| 872 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 965 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
| 873 | gfp_t mask) | 966 | gfp_t mask) |
| 874 | { | 967 | { |
| 875 | return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, | 968 | return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, |
| 876 | mask); | 969 | NULL, mask); |
| 877 | } | 970 | } |
| 878 | 971 | ||
| 879 | static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 972 | static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
| 880 | gfp_t mask) | 973 | gfp_t mask) |
| 881 | { | 974 | { |
| 882 | return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); | 975 | return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, |
| 976 | NULL, mask); | ||
| 883 | } | 977 | } |
| 884 | 978 | ||
| 885 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 979 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
| 886 | gfp_t mask) | 980 | gfp_t mask) |
| 887 | { | 981 | { |
| 888 | return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, | 982 | return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, |
| 889 | mask); | 983 | NULL, mask); |
| 890 | } | 984 | } |
| 891 | 985 | ||
| 892 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, | 986 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, |
| 893 | u64 end, gfp_t mask) | 987 | u64 end, gfp_t mask) |
| 894 | { | 988 | { |
| 895 | return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); | 989 | return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, |
| 896 | } | 990 | NULL, mask); |
| 897 | |||
| 898 | static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 899 | gfp_t mask) | ||
| 900 | { | ||
| 901 | return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, | ||
| 902 | 0, NULL, mask); | ||
| 903 | } | ||
| 904 | |||
| 905 | static int clear_extent_writeback(struct extent_io_tree *tree, u64 start, | ||
| 906 | u64 end, gfp_t mask) | ||
| 907 | { | ||
| 908 | return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); | ||
| 909 | } | 991 | } |
| 910 | 992 | ||
| 911 | int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) | 993 | int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) |
| @@ -917,13 +999,15 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) | |||
| 917 | * either insert or lock state struct between start and end use mask to tell | 999 | * either insert or lock state struct between start and end use mask to tell |
| 918 | * us if waiting is desired. | 1000 | * us if waiting is desired. |
| 919 | */ | 1001 | */ |
| 920 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) | 1002 | int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| 1003 | int bits, struct extent_state **cached_state, gfp_t mask) | ||
| 921 | { | 1004 | { |
| 922 | int err; | 1005 | int err; |
| 923 | u64 failed_start; | 1006 | u64 failed_start; |
| 924 | while (1) { | 1007 | while (1) { |
| 925 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, | 1008 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits, |
| 926 | &failed_start, mask); | 1009 | EXTENT_LOCKED, &failed_start, |
| 1010 | cached_state, mask); | ||
| 927 | if (err == -EEXIST && (mask & __GFP_WAIT)) { | 1011 | if (err == -EEXIST && (mask & __GFP_WAIT)) { |
| 928 | wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); | 1012 | wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); |
| 929 | start = failed_start; | 1013 | start = failed_start; |
| @@ -935,27 +1019,40 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) | |||
| 935 | return err; | 1019 | return err; |
| 936 | } | 1020 | } |
| 937 | 1021 | ||
| 1022 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) | ||
| 1023 | { | ||
| 1024 | return lock_extent_bits(tree, start, end, 0, NULL, mask); | ||
| 1025 | } | ||
| 1026 | |||
| 938 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 1027 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, |
| 939 | gfp_t mask) | 1028 | gfp_t mask) |
| 940 | { | 1029 | { |
| 941 | int err; | 1030 | int err; |
| 942 | u64 failed_start; | 1031 | u64 failed_start; |
| 943 | 1032 | ||
| 944 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, | 1033 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED, |
| 945 | &failed_start, mask); | 1034 | &failed_start, NULL, mask); |
| 946 | if (err == -EEXIST) { | 1035 | if (err == -EEXIST) { |
| 947 | if (failed_start > start) | 1036 | if (failed_start > start) |
| 948 | clear_extent_bit(tree, start, failed_start - 1, | 1037 | clear_extent_bit(tree, start, failed_start - 1, |
| 949 | EXTENT_LOCKED, 1, 0, mask); | 1038 | EXTENT_LOCKED, 1, 0, NULL, mask); |
| 950 | return 0; | 1039 | return 0; |
| 951 | } | 1040 | } |
| 952 | return 1; | 1041 | return 1; |
| 953 | } | 1042 | } |
| 954 | 1043 | ||
| 1044 | int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 1045 | struct extent_state **cached, gfp_t mask) | ||
| 1046 | { | ||
| 1047 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached, | ||
| 1048 | mask); | ||
| 1049 | } | ||
| 1050 | |||
| 955 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 1051 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, |
| 956 | gfp_t mask) | 1052 | gfp_t mask) |
| 957 | { | 1053 | { |
| 958 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); | 1054 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, |
| 1055 | mask); | ||
| 959 | } | 1056 | } |
| 960 | 1057 | ||
| 961 | /* | 1058 | /* |
| @@ -974,7 +1071,6 @@ int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end) | |||
| 974 | page_cache_release(page); | 1071 | page_cache_release(page); |
| 975 | index++; | 1072 | index++; |
| 976 | } | 1073 | } |
| 977 | set_extent_dirty(tree, start, end, GFP_NOFS); | ||
| 978 | return 0; | 1074 | return 0; |
| 979 | } | 1075 | } |
| 980 | 1076 | ||
| @@ -994,7 +1090,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) | |||
| 994 | page_cache_release(page); | 1090 | page_cache_release(page); |
| 995 | index++; | 1091 | index++; |
| 996 | } | 1092 | } |
| 997 | set_extent_writeback(tree, start, end, GFP_NOFS); | ||
| 998 | return 0; | 1093 | return 0; |
| 999 | } | 1094 | } |
| 1000 | 1095 | ||
| @@ -1232,6 +1327,7 @@ static noinline u64 find_lock_delalloc_range(struct inode *inode, | |||
| 1232 | u64 delalloc_start; | 1327 | u64 delalloc_start; |
| 1233 | u64 delalloc_end; | 1328 | u64 delalloc_end; |
| 1234 | u64 found; | 1329 | u64 found; |
| 1330 | struct extent_state *cached_state = NULL; | ||
| 1235 | int ret; | 1331 | int ret; |
| 1236 | int loops = 0; | 1332 | int loops = 0; |
| 1237 | 1333 | ||
| @@ -1269,6 +1365,7 @@ again: | |||
| 1269 | /* some of the pages are gone, lets avoid looping by | 1365 | /* some of the pages are gone, lets avoid looping by |
| 1270 | * shortening the size of the delalloc range we're searching | 1366 | * shortening the size of the delalloc range we're searching |
| 1271 | */ | 1367 | */ |
| 1368 | free_extent_state(cached_state); | ||
| 1272 | if (!loops) { | 1369 | if (!loops) { |
| 1273 | unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); | 1370 | unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); |
| 1274 | max_bytes = PAGE_CACHE_SIZE - offset; | 1371 | max_bytes = PAGE_CACHE_SIZE - offset; |
| @@ -1282,18 +1379,21 @@ again: | |||
| 1282 | BUG_ON(ret); | 1379 | BUG_ON(ret); |
| 1283 | 1380 | ||
| 1284 | /* step three, lock the state bits for the whole range */ | 1381 | /* step three, lock the state bits for the whole range */ |
| 1285 | lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); | 1382 | lock_extent_bits(tree, delalloc_start, delalloc_end, |
| 1383 | 0, &cached_state, GFP_NOFS); | ||
| 1286 | 1384 | ||
| 1287 | /* then test to make sure it is all still delalloc */ | 1385 | /* then test to make sure it is all still delalloc */ |
| 1288 | ret = test_range_bit(tree, delalloc_start, delalloc_end, | 1386 | ret = test_range_bit(tree, delalloc_start, delalloc_end, |
| 1289 | EXTENT_DELALLOC, 1); | 1387 | EXTENT_DELALLOC, 1, cached_state); |
| 1290 | if (!ret) { | 1388 | if (!ret) { |
| 1291 | unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); | 1389 | unlock_extent_cached(tree, delalloc_start, delalloc_end, |
| 1390 | &cached_state, GFP_NOFS); | ||
| 1292 | __unlock_for_delalloc(inode, locked_page, | 1391 | __unlock_for_delalloc(inode, locked_page, |
| 1293 | delalloc_start, delalloc_end); | 1392 | delalloc_start, delalloc_end); |
| 1294 | cond_resched(); | 1393 | cond_resched(); |
| 1295 | goto again; | 1394 | goto again; |
| 1296 | } | 1395 | } |
| 1396 | free_extent_state(cached_state); | ||
| 1297 | *start = delalloc_start; | 1397 | *start = delalloc_start; |
| 1298 | *end = delalloc_end; | 1398 | *end = delalloc_end; |
| 1299 | out_failed: | 1399 | out_failed: |
| @@ -1303,11 +1403,7 @@ out_failed: | |||
| 1303 | int extent_clear_unlock_delalloc(struct inode *inode, | 1403 | int extent_clear_unlock_delalloc(struct inode *inode, |
| 1304 | struct extent_io_tree *tree, | 1404 | struct extent_io_tree *tree, |
| 1305 | u64 start, u64 end, struct page *locked_page, | 1405 | u64 start, u64 end, struct page *locked_page, |
| 1306 | int unlock_pages, | 1406 | unsigned long op) |
| 1307 | int clear_unlock, | ||
| 1308 | int clear_delalloc, int clear_dirty, | ||
| 1309 | int set_writeback, | ||
| 1310 | int end_writeback) | ||
| 1311 | { | 1407 | { |
| 1312 | int ret; | 1408 | int ret; |
| 1313 | struct page *pages[16]; | 1409 | struct page *pages[16]; |
| @@ -1317,16 +1413,21 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
| 1317 | int i; | 1413 | int i; |
| 1318 | int clear_bits = 0; | 1414 | int clear_bits = 0; |
| 1319 | 1415 | ||
| 1320 | if (clear_unlock) | 1416 | if (op & EXTENT_CLEAR_UNLOCK) |
| 1321 | clear_bits |= EXTENT_LOCKED; | 1417 | clear_bits |= EXTENT_LOCKED; |
| 1322 | if (clear_dirty) | 1418 | if (op & EXTENT_CLEAR_DIRTY) |
| 1323 | clear_bits |= EXTENT_DIRTY; | 1419 | clear_bits |= EXTENT_DIRTY; |
| 1324 | 1420 | ||
| 1325 | if (clear_delalloc) | 1421 | if (op & EXTENT_CLEAR_DELALLOC) |
| 1326 | clear_bits |= EXTENT_DELALLOC; | 1422 | clear_bits |= EXTENT_DELALLOC; |
| 1327 | 1423 | ||
| 1328 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); | 1424 | if (op & EXTENT_CLEAR_ACCOUNTING) |
| 1329 | if (!(unlock_pages || clear_dirty || set_writeback || end_writeback)) | 1425 | clear_bits |= EXTENT_DO_ACCOUNTING; |
| 1426 | |||
| 1427 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); | ||
| 1428 | if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | | ||
| 1429 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | | ||
| 1430 | EXTENT_SET_PRIVATE2))) | ||
| 1330 | return 0; | 1431 | return 0; |
| 1331 | 1432 | ||
| 1332 | while (nr_pages > 0) { | 1433 | while (nr_pages > 0) { |
| @@ -1334,17 +1435,21 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
| 1334 | min_t(unsigned long, | 1435 | min_t(unsigned long, |
| 1335 | nr_pages, ARRAY_SIZE(pages)), pages); | 1436 | nr_pages, ARRAY_SIZE(pages)), pages); |
| 1336 | for (i = 0; i < ret; i++) { | 1437 | for (i = 0; i < ret; i++) { |
| 1438 | |||
| 1439 | if (op & EXTENT_SET_PRIVATE2) | ||
| 1440 | SetPagePrivate2(pages[i]); | ||
| 1441 | |||
| 1337 | if (pages[i] == locked_page) { | 1442 | if (pages[i] == locked_page) { |
| 1338 | page_cache_release(pages[i]); | 1443 | page_cache_release(pages[i]); |
| 1339 | continue; | 1444 | continue; |
| 1340 | } | 1445 | } |
| 1341 | if (clear_dirty) | 1446 | if (op & EXTENT_CLEAR_DIRTY) |
| 1342 | clear_page_dirty_for_io(pages[i]); | 1447 | clear_page_dirty_for_io(pages[i]); |
| 1343 | if (set_writeback) | 1448 | if (op & EXTENT_SET_WRITEBACK) |
| 1344 | set_page_writeback(pages[i]); | 1449 | set_page_writeback(pages[i]); |
| 1345 | if (end_writeback) | 1450 | if (op & EXTENT_END_WRITEBACK) |
| 1346 | end_page_writeback(pages[i]); | 1451 | end_page_writeback(pages[i]); |
| 1347 | if (unlock_pages) | 1452 | if (op & EXTENT_CLEAR_UNLOCK_PAGE) |
| 1348 | unlock_page(pages[i]); | 1453 | unlock_page(pages[i]); |
| 1349 | page_cache_release(pages[i]); | 1454 | page_cache_release(pages[i]); |
| 1350 | } | 1455 | } |
| @@ -1476,14 +1581,17 @@ out: | |||
| 1476 | * range is found set. | 1581 | * range is found set. |
| 1477 | */ | 1582 | */ |
| 1478 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 1583 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 1479 | int bits, int filled) | 1584 | int bits, int filled, struct extent_state *cached) |
| 1480 | { | 1585 | { |
| 1481 | struct extent_state *state = NULL; | 1586 | struct extent_state *state = NULL; |
| 1482 | struct rb_node *node; | 1587 | struct rb_node *node; |
| 1483 | int bitset = 0; | 1588 | int bitset = 0; |
| 1484 | 1589 | ||
| 1485 | spin_lock(&tree->lock); | 1590 | spin_lock(&tree->lock); |
| 1486 | node = tree_search(tree, start); | 1591 | if (cached && cached->tree && cached->start == start) |
| 1592 | node = &cached->rb_node; | ||
| 1593 | else | ||
| 1594 | node = tree_search(tree, start); | ||
| 1487 | while (node && start <= end) { | 1595 | while (node && start <= end) { |
| 1488 | state = rb_entry(node, struct extent_state, rb_node); | 1596 | state = rb_entry(node, struct extent_state, rb_node); |
| 1489 | 1597 | ||
| @@ -1503,6 +1611,10 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 1503 | bitset = 0; | 1611 | bitset = 0; |
| 1504 | break; | 1612 | break; |
| 1505 | } | 1613 | } |
| 1614 | |||
| 1615 | if (state->end == (u64)-1) | ||
| 1616 | break; | ||
| 1617 | |||
| 1506 | start = state->end + 1; | 1618 | start = state->end + 1; |
| 1507 | if (start > end) | 1619 | if (start > end) |
| 1508 | break; | 1620 | break; |
| @@ -1526,7 +1638,7 @@ static int check_page_uptodate(struct extent_io_tree *tree, | |||
| 1526 | { | 1638 | { |
| 1527 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 1639 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; |
| 1528 | u64 end = start + PAGE_CACHE_SIZE - 1; | 1640 | u64 end = start + PAGE_CACHE_SIZE - 1; |
| 1529 | if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) | 1641 | if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL)) |
| 1530 | SetPageUptodate(page); | 1642 | SetPageUptodate(page); |
| 1531 | return 0; | 1643 | return 0; |
| 1532 | } | 1644 | } |
| @@ -1540,7 +1652,7 @@ static int check_page_locked(struct extent_io_tree *tree, | |||
| 1540 | { | 1652 | { |
| 1541 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 1653 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; |
| 1542 | u64 end = start + PAGE_CACHE_SIZE - 1; | 1654 | u64 end = start + PAGE_CACHE_SIZE - 1; |
| 1543 | if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) | 1655 | if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) |
| 1544 | unlock_page(page); | 1656 | unlock_page(page); |
| 1545 | return 0; | 1657 | return 0; |
| 1546 | } | 1658 | } |
| @@ -1552,10 +1664,7 @@ static int check_page_locked(struct extent_io_tree *tree, | |||
| 1552 | static int check_page_writeback(struct extent_io_tree *tree, | 1664 | static int check_page_writeback(struct extent_io_tree *tree, |
| 1553 | struct page *page) | 1665 | struct page *page) |
| 1554 | { | 1666 | { |
| 1555 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 1667 | end_page_writeback(page); |
| 1556 | u64 end = start + PAGE_CACHE_SIZE - 1; | ||
| 1557 | if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) | ||
| 1558 | end_page_writeback(page); | ||
| 1559 | return 0; | 1668 | return 0; |
| 1560 | } | 1669 | } |
| 1561 | 1670 | ||
| @@ -1613,13 +1722,11 @@ static void end_bio_extent_writepage(struct bio *bio, int err) | |||
| 1613 | } | 1722 | } |
| 1614 | 1723 | ||
| 1615 | if (!uptodate) { | 1724 | if (!uptodate) { |
| 1616 | clear_extent_uptodate(tree, start, end, GFP_ATOMIC); | 1725 | clear_extent_uptodate(tree, start, end, GFP_NOFS); |
| 1617 | ClearPageUptodate(page); | 1726 | ClearPageUptodate(page); |
| 1618 | SetPageError(page); | 1727 | SetPageError(page); |
| 1619 | } | 1728 | } |
| 1620 | 1729 | ||
| 1621 | clear_extent_writeback(tree, start, end, GFP_ATOMIC); | ||
| 1622 | |||
| 1623 | if (whole_page) | 1730 | if (whole_page) |
| 1624 | end_page_writeback(page); | 1731 | end_page_writeback(page); |
| 1625 | else | 1732 | else |
| @@ -1983,7 +2090,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
| 1983 | continue; | 2090 | continue; |
| 1984 | } | 2091 | } |
| 1985 | /* the get_extent function already copied into the page */ | 2092 | /* the get_extent function already copied into the page */ |
| 1986 | if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { | 2093 | if (test_range_bit(tree, cur, cur_end, |
| 2094 | EXTENT_UPTODATE, 1, NULL)) { | ||
| 1987 | check_page_uptodate(tree, page); | 2095 | check_page_uptodate(tree, page); |
| 1988 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | 2096 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); |
| 1989 | cur = cur + iosize; | 2097 | cur = cur + iosize; |
| @@ -2078,6 +2186,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2078 | u64 iosize; | 2186 | u64 iosize; |
| 2079 | u64 unlock_start; | 2187 | u64 unlock_start; |
| 2080 | sector_t sector; | 2188 | sector_t sector; |
| 2189 | struct extent_state *cached_state = NULL; | ||
| 2081 | struct extent_map *em; | 2190 | struct extent_map *em; |
| 2082 | struct block_device *bdev; | 2191 | struct block_device *bdev; |
| 2083 | int ret; | 2192 | int ret; |
| @@ -2124,6 +2233,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2124 | delalloc_end = 0; | 2233 | delalloc_end = 0; |
| 2125 | page_started = 0; | 2234 | page_started = 0; |
| 2126 | if (!epd->extent_locked) { | 2235 | if (!epd->extent_locked) { |
| 2236 | u64 delalloc_to_write = 0; | ||
| 2127 | /* | 2237 | /* |
| 2128 | * make sure the wbc mapping index is at least updated | 2238 | * make sure the wbc mapping index is at least updated |
| 2129 | * to this page. | 2239 | * to this page. |
| @@ -2143,8 +2253,24 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2143 | tree->ops->fill_delalloc(inode, page, delalloc_start, | 2253 | tree->ops->fill_delalloc(inode, page, delalloc_start, |
| 2144 | delalloc_end, &page_started, | 2254 | delalloc_end, &page_started, |
| 2145 | &nr_written); | 2255 | &nr_written); |
| 2256 | /* | ||
| 2257 | * delalloc_end is already one less than the total | ||
| 2258 | * length, so we don't subtract one from | ||
| 2259 | * PAGE_CACHE_SIZE | ||
| 2260 | */ | ||
| 2261 | delalloc_to_write += (delalloc_end - delalloc_start + | ||
| 2262 | PAGE_CACHE_SIZE) >> | ||
| 2263 | PAGE_CACHE_SHIFT; | ||
| 2146 | delalloc_start = delalloc_end + 1; | 2264 | delalloc_start = delalloc_end + 1; |
| 2147 | } | 2265 | } |
| 2266 | if (wbc->nr_to_write < delalloc_to_write) { | ||
| 2267 | int thresh = 8192; | ||
| 2268 | |||
| 2269 | if (delalloc_to_write < thresh * 2) | ||
| 2270 | thresh = delalloc_to_write; | ||
| 2271 | wbc->nr_to_write = min_t(u64, delalloc_to_write, | ||
| 2272 | thresh); | ||
| 2273 | } | ||
| 2148 | 2274 | ||
| 2149 | /* did the fill delalloc function already unlock and start | 2275 | /* did the fill delalloc function already unlock and start |
| 2150 | * the IO? | 2276 | * the IO? |
| @@ -2160,15 +2286,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2160 | goto done_unlocked; | 2286 | goto done_unlocked; |
| 2161 | } | 2287 | } |
| 2162 | } | 2288 | } |
| 2163 | lock_extent(tree, start, page_end, GFP_NOFS); | ||
| 2164 | |||
| 2165 | unlock_start = start; | ||
| 2166 | |||
| 2167 | if (tree->ops && tree->ops->writepage_start_hook) { | 2289 | if (tree->ops && tree->ops->writepage_start_hook) { |
| 2168 | ret = tree->ops->writepage_start_hook(page, start, | 2290 | ret = tree->ops->writepage_start_hook(page, start, |
| 2169 | page_end); | 2291 | page_end); |
| 2170 | if (ret == -EAGAIN) { | 2292 | if (ret == -EAGAIN) { |
| 2171 | unlock_extent(tree, start, page_end, GFP_NOFS); | ||
| 2172 | redirty_page_for_writepage(wbc, page); | 2293 | redirty_page_for_writepage(wbc, page); |
| 2173 | update_nr_written(page, wbc, nr_written); | 2294 | update_nr_written(page, wbc, nr_written); |
| 2174 | unlock_page(page); | 2295 | unlock_page(page); |
| @@ -2184,12 +2305,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2184 | update_nr_written(page, wbc, nr_written + 1); | 2305 | update_nr_written(page, wbc, nr_written + 1); |
| 2185 | 2306 | ||
| 2186 | end = page_end; | 2307 | end = page_end; |
| 2187 | if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) | ||
| 2188 | printk(KERN_ERR "btrfs delalloc bits after lock_extent\n"); | ||
| 2189 | |||
| 2190 | if (last_byte <= start) { | 2308 | if (last_byte <= start) { |
| 2191 | clear_extent_dirty(tree, start, page_end, GFP_NOFS); | ||
| 2192 | unlock_extent(tree, start, page_end, GFP_NOFS); | ||
| 2193 | if (tree->ops && tree->ops->writepage_end_io_hook) | 2309 | if (tree->ops && tree->ops->writepage_end_io_hook) |
| 2194 | tree->ops->writepage_end_io_hook(page, start, | 2310 | tree->ops->writepage_end_io_hook(page, start, |
| 2195 | page_end, NULL, 1); | 2311 | page_end, NULL, 1); |
| @@ -2197,13 +2313,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2197 | goto done; | 2313 | goto done; |
| 2198 | } | 2314 | } |
| 2199 | 2315 | ||
| 2200 | set_extent_uptodate(tree, start, page_end, GFP_NOFS); | ||
| 2201 | blocksize = inode->i_sb->s_blocksize; | 2316 | blocksize = inode->i_sb->s_blocksize; |
| 2202 | 2317 | ||
| 2203 | while (cur <= end) { | 2318 | while (cur <= end) { |
| 2204 | if (cur >= last_byte) { | 2319 | if (cur >= last_byte) { |
| 2205 | clear_extent_dirty(tree, cur, page_end, GFP_NOFS); | ||
| 2206 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); | ||
| 2207 | if (tree->ops && tree->ops->writepage_end_io_hook) | 2320 | if (tree->ops && tree->ops->writepage_end_io_hook) |
| 2208 | tree->ops->writepage_end_io_hook(page, cur, | 2321 | tree->ops->writepage_end_io_hook(page, cur, |
| 2209 | page_end, NULL, 1); | 2322 | page_end, NULL, 1); |
| @@ -2235,12 +2348,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2235 | */ | 2348 | */ |
| 2236 | if (compressed || block_start == EXTENT_MAP_HOLE || | 2349 | if (compressed || block_start == EXTENT_MAP_HOLE || |
| 2237 | block_start == EXTENT_MAP_INLINE) { | 2350 | block_start == EXTENT_MAP_INLINE) { |
| 2238 | clear_extent_dirty(tree, cur, | ||
| 2239 | cur + iosize - 1, GFP_NOFS); | ||
| 2240 | |||
| 2241 | unlock_extent(tree, unlock_start, cur + iosize - 1, | ||
| 2242 | GFP_NOFS); | ||
| 2243 | |||
| 2244 | /* | 2351 | /* |
| 2245 | * end_io notification does not happen here for | 2352 | * end_io notification does not happen here for |
| 2246 | * compressed extents | 2353 | * compressed extents |
| @@ -2265,13 +2372,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2265 | } | 2372 | } |
| 2266 | /* leave this out until we have a page_mkwrite call */ | 2373 | /* leave this out until we have a page_mkwrite call */ |
| 2267 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, | 2374 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, |
| 2268 | EXTENT_DIRTY, 0)) { | 2375 | EXTENT_DIRTY, 0, NULL)) { |
| 2269 | cur = cur + iosize; | 2376 | cur = cur + iosize; |
| 2270 | pg_offset += iosize; | 2377 | pg_offset += iosize; |
| 2271 | continue; | 2378 | continue; |
| 2272 | } | 2379 | } |
| 2273 | 2380 | ||
| 2274 | clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); | ||
| 2275 | if (tree->ops && tree->ops->writepage_io_hook) { | 2381 | if (tree->ops && tree->ops->writepage_io_hook) { |
| 2276 | ret = tree->ops->writepage_io_hook(page, cur, | 2382 | ret = tree->ops->writepage_io_hook(page, cur, |
| 2277 | cur + iosize - 1); | 2383 | cur + iosize - 1); |
| @@ -2309,12 +2415,12 @@ done: | |||
| 2309 | set_page_writeback(page); | 2415 | set_page_writeback(page); |
| 2310 | end_page_writeback(page); | 2416 | end_page_writeback(page); |
| 2311 | } | 2417 | } |
| 2312 | if (unlock_start <= page_end) | ||
| 2313 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); | ||
| 2314 | unlock_page(page); | 2418 | unlock_page(page); |
| 2315 | 2419 | ||
| 2316 | done_unlocked: | 2420 | done_unlocked: |
| 2317 | 2421 | ||
| 2422 | /* drop our reference on any cached states */ | ||
| 2423 | free_extent_state(cached_state); | ||
| 2318 | return 0; | 2424 | return 0; |
| 2319 | } | 2425 | } |
| 2320 | 2426 | ||
| @@ -2339,9 +2445,9 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
| 2339 | writepage_t writepage, void *data, | 2445 | writepage_t writepage, void *data, |
| 2340 | void (*flush_fn)(void *)) | 2446 | void (*flush_fn)(void *)) |
| 2341 | { | 2447 | { |
| 2342 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
| 2343 | int ret = 0; | 2448 | int ret = 0; |
| 2344 | int done = 0; | 2449 | int done = 0; |
| 2450 | int nr_to_write_done = 0; | ||
| 2345 | struct pagevec pvec; | 2451 | struct pagevec pvec; |
| 2346 | int nr_pages; | 2452 | int nr_pages; |
| 2347 | pgoff_t index; | 2453 | pgoff_t index; |
| @@ -2361,7 +2467,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
| 2361 | scanned = 1; | 2467 | scanned = 1; |
| 2362 | } | 2468 | } |
| 2363 | retry: | 2469 | retry: |
| 2364 | while (!done && (index <= end) && | 2470 | while (!done && !nr_to_write_done && (index <= end) && |
| 2365 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 2471 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, |
| 2366 | PAGECACHE_TAG_DIRTY, min(end - index, | 2472 | PAGECACHE_TAG_DIRTY, min(end - index, |
| 2367 | (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | 2473 | (pgoff_t)PAGEVEC_SIZE-1) + 1))) { |
| @@ -2412,12 +2518,15 @@ retry: | |||
| 2412 | unlock_page(page); | 2518 | unlock_page(page); |
| 2413 | ret = 0; | 2519 | ret = 0; |
| 2414 | } | 2520 | } |
| 2415 | if (ret || wbc->nr_to_write <= 0) | 2521 | if (ret) |
| 2416 | done = 1; | ||
| 2417 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
| 2418 | wbc->encountered_congestion = 1; | ||
| 2419 | done = 1; | 2522 | done = 1; |
| 2420 | } | 2523 | |
| 2524 | /* | ||
| 2525 | * the filesystem may choose to bump up nr_to_write. | ||
| 2526 | * We have to make sure to honor the new nr_to_write | ||
| 2527 | * at any time | ||
| 2528 | */ | ||
| 2529 | nr_to_write_done = wbc->nr_to_write <= 0; | ||
| 2421 | } | 2530 | } |
| 2422 | pagevec_release(&pvec); | 2531 | pagevec_release(&pvec); |
| 2423 | cond_resched(); | 2532 | cond_resched(); |
| @@ -2604,10 +2713,11 @@ int extent_invalidatepage(struct extent_io_tree *tree, | |||
| 2604 | return 0; | 2713 | return 0; |
| 2605 | 2714 | ||
| 2606 | lock_extent(tree, start, end, GFP_NOFS); | 2715 | lock_extent(tree, start, end, GFP_NOFS); |
| 2607 | wait_on_extent_writeback(tree, start, end); | 2716 | wait_on_page_writeback(page); |
| 2608 | clear_extent_bit(tree, start, end, | 2717 | clear_extent_bit(tree, start, end, |
| 2609 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, | 2718 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | |
| 2610 | 1, 1, GFP_NOFS); | 2719 | EXTENT_DO_ACCOUNTING, |
| 2720 | 1, 1, NULL, GFP_NOFS); | ||
| 2611 | return 0; | 2721 | return 0; |
| 2612 | } | 2722 | } |
| 2613 | 2723 | ||
| @@ -2687,7 +2797,7 @@ int extent_prepare_write(struct extent_io_tree *tree, | |||
| 2687 | !isnew && !PageUptodate(page) && | 2797 | !isnew && !PageUptodate(page) && |
| 2688 | (block_off_end > to || block_off_start < from) && | 2798 | (block_off_end > to || block_off_start < from) && |
| 2689 | !test_range_bit(tree, block_start, cur_end, | 2799 | !test_range_bit(tree, block_start, cur_end, |
| 2690 | EXTENT_UPTODATE, 1)) { | 2800 | EXTENT_UPTODATE, 1, NULL)) { |
| 2691 | u64 sector; | 2801 | u64 sector; |
| 2692 | u64 extent_offset = block_start - em->start; | 2802 | u64 extent_offset = block_start - em->start; |
| 2693 | size_t iosize; | 2803 | size_t iosize; |
| @@ -2701,7 +2811,7 @@ int extent_prepare_write(struct extent_io_tree *tree, | |||
| 2701 | */ | 2811 | */ |
| 2702 | set_extent_bit(tree, block_start, | 2812 | set_extent_bit(tree, block_start, |
| 2703 | block_start + iosize - 1, | 2813 | block_start + iosize - 1, |
| 2704 | EXTENT_LOCKED, 0, NULL, GFP_NOFS); | 2814 | EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS); |
| 2705 | ret = submit_extent_page(READ, tree, page, | 2815 | ret = submit_extent_page(READ, tree, page, |
| 2706 | sector, iosize, page_offset, em->bdev, | 2816 | sector, iosize, page_offset, em->bdev, |
| 2707 | NULL, 1, | 2817 | NULL, 1, |
| @@ -2742,13 +2852,18 @@ int try_release_extent_state(struct extent_map_tree *map, | |||
| 2742 | int ret = 1; | 2852 | int ret = 1; |
| 2743 | 2853 | ||
| 2744 | if (test_range_bit(tree, start, end, | 2854 | if (test_range_bit(tree, start, end, |
| 2745 | EXTENT_IOBITS | EXTENT_ORDERED, 0)) | 2855 | EXTENT_IOBITS, 0, NULL)) |
| 2746 | ret = 0; | 2856 | ret = 0; |
| 2747 | else { | 2857 | else { |
| 2748 | if ((mask & GFP_NOFS) == GFP_NOFS) | 2858 | if ((mask & GFP_NOFS) == GFP_NOFS) |
| 2749 | mask = GFP_NOFS; | 2859 | mask = GFP_NOFS; |
| 2750 | clear_extent_bit(tree, start, end, EXTENT_UPTODATE, | 2860 | /* |
| 2751 | 1, 1, mask); | 2861 | * at this point we can safely clear everything except the |
| 2862 | * locked bit and the nodatasum bit | ||
| 2863 | */ | ||
| 2864 | clear_extent_bit(tree, start, end, | ||
| 2865 | ~(EXTENT_LOCKED | EXTENT_NODATASUM), | ||
| 2866 | 0, 0, NULL, mask); | ||
| 2752 | } | 2867 | } |
| 2753 | return ret; | 2868 | return ret; |
| 2754 | } | 2869 | } |
| @@ -2771,29 +2886,28 @@ int try_release_extent_mapping(struct extent_map_tree *map, | |||
| 2771 | u64 len; | 2886 | u64 len; |
| 2772 | while (start <= end) { | 2887 | while (start <= end) { |
| 2773 | len = end - start + 1; | 2888 | len = end - start + 1; |
| 2774 | spin_lock(&map->lock); | 2889 | write_lock(&map->lock); |
| 2775 | em = lookup_extent_mapping(map, start, len); | 2890 | em = lookup_extent_mapping(map, start, len); |
| 2776 | if (!em || IS_ERR(em)) { | 2891 | if (!em || IS_ERR(em)) { |
| 2777 | spin_unlock(&map->lock); | 2892 | write_unlock(&map->lock); |
| 2778 | break; | 2893 | break; |
| 2779 | } | 2894 | } |
| 2780 | if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || | 2895 | if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || |
| 2781 | em->start != start) { | 2896 | em->start != start) { |
| 2782 | spin_unlock(&map->lock); | 2897 | write_unlock(&map->lock); |
| 2783 | free_extent_map(em); | 2898 | free_extent_map(em); |
| 2784 | break; | 2899 | break; |
| 2785 | } | 2900 | } |
| 2786 | if (!test_range_bit(tree, em->start, | 2901 | if (!test_range_bit(tree, em->start, |
| 2787 | extent_map_end(em) - 1, | 2902 | extent_map_end(em) - 1, |
| 2788 | EXTENT_LOCKED | EXTENT_WRITEBACK | | 2903 | EXTENT_LOCKED | EXTENT_WRITEBACK, |
| 2789 | EXTENT_ORDERED, | 2904 | 0, NULL)) { |
| 2790 | 0)) { | ||
| 2791 | remove_extent_mapping(map, em); | 2905 | remove_extent_mapping(map, em); |
| 2792 | /* once for the rb tree */ | 2906 | /* once for the rb tree */ |
| 2793 | free_extent_map(em); | 2907 | free_extent_map(em); |
| 2794 | } | 2908 | } |
| 2795 | start = extent_map_end(em); | 2909 | start = extent_map_end(em); |
| 2796 | spin_unlock(&map->lock); | 2910 | write_unlock(&map->lock); |
| 2797 | 2911 | ||
| 2798 | /* once for us */ | 2912 | /* once for us */ |
| 2799 | free_extent_map(em); | 2913 | free_extent_map(em); |
| @@ -3203,7 +3317,7 @@ int extent_range_uptodate(struct extent_io_tree *tree, | |||
| 3203 | int uptodate; | 3317 | int uptodate; |
| 3204 | unsigned long index; | 3318 | unsigned long index; |
| 3205 | 3319 | ||
| 3206 | ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1); | 3320 | ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL); |
| 3207 | if (ret) | 3321 | if (ret) |
| 3208 | return 1; | 3322 | return 1; |
| 3209 | while (start <= end) { | 3323 | while (start <= end) { |
| @@ -3233,7 +3347,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, | |||
| 3233 | return 1; | 3347 | return 1; |
| 3234 | 3348 | ||
| 3235 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3349 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
| 3236 | EXTENT_UPTODATE, 1); | 3350 | EXTENT_UPTODATE, 1, NULL); |
| 3237 | if (ret) | 3351 | if (ret) |
| 3238 | return ret; | 3352 | return ret; |
| 3239 | 3353 | ||
| @@ -3269,7 +3383,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
| 3269 | return 0; | 3383 | return 0; |
| 3270 | 3384 | ||
| 3271 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3385 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
| 3272 | EXTENT_UPTODATE, 1)) { | 3386 | EXTENT_UPTODATE, 1, NULL)) { |
| 3273 | return 0; | 3387 | return 0; |
| 3274 | } | 3388 | } |
| 3275 | 3389 | ||
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 5bc20abf3f3d..36de250a7b2b 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
| @@ -13,10 +13,9 @@ | |||
| 13 | #define EXTENT_DEFRAG (1 << 6) | 13 | #define EXTENT_DEFRAG (1 << 6) |
| 14 | #define EXTENT_DEFRAG_DONE (1 << 7) | 14 | #define EXTENT_DEFRAG_DONE (1 << 7) |
| 15 | #define EXTENT_BUFFER_FILLED (1 << 8) | 15 | #define EXTENT_BUFFER_FILLED (1 << 8) |
| 16 | #define EXTENT_ORDERED (1 << 9) | 16 | #define EXTENT_BOUNDARY (1 << 9) |
| 17 | #define EXTENT_ORDERED_METADATA (1 << 10) | 17 | #define EXTENT_NODATASUM (1 << 10) |
| 18 | #define EXTENT_BOUNDARY (1 << 11) | 18 | #define EXTENT_DO_ACCOUNTING (1 << 11) |
| 19 | #define EXTENT_NODATASUM (1 << 12) | ||
| 20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 19 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
| 21 | 20 | ||
| 22 | /* flags for bio submission */ | 21 | /* flags for bio submission */ |
| @@ -27,6 +26,16 @@ | |||
| 27 | #define EXTENT_BUFFER_BLOCKING 1 | 26 | #define EXTENT_BUFFER_BLOCKING 1 |
| 28 | #define EXTENT_BUFFER_DIRTY 2 | 27 | #define EXTENT_BUFFER_DIRTY 2 |
| 29 | 28 | ||
| 29 | /* these are flags for extent_clear_unlock_delalloc */ | ||
| 30 | #define EXTENT_CLEAR_UNLOCK_PAGE 0x1 | ||
| 31 | #define EXTENT_CLEAR_UNLOCK 0x2 | ||
| 32 | #define EXTENT_CLEAR_DELALLOC 0x4 | ||
| 33 | #define EXTENT_CLEAR_DIRTY 0x8 | ||
| 34 | #define EXTENT_SET_WRITEBACK 0x10 | ||
| 35 | #define EXTENT_END_WRITEBACK 0x20 | ||
| 36 | #define EXTENT_SET_PRIVATE2 0x40 | ||
| 37 | #define EXTENT_CLEAR_ACCOUNTING 0x80 | ||
| 38 | |||
| 30 | /* | 39 | /* |
| 31 | * page->private values. Every page that is controlled by the extent | 40 | * page->private values. Every page that is controlled by the extent |
| 32 | * map has page->private set to one. | 41 | * map has page->private set to one. |
| @@ -62,8 +71,13 @@ struct extent_io_ops { | |||
| 62 | struct extent_state *state, int uptodate); | 71 | struct extent_state *state, int uptodate); |
| 63 | int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, | 72 | int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, |
| 64 | unsigned long old, unsigned long bits); | 73 | unsigned long old, unsigned long bits); |
| 65 | int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, | 74 | int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, |
| 66 | unsigned long old, unsigned long bits); | 75 | unsigned long bits); |
| 76 | int (*merge_extent_hook)(struct inode *inode, | ||
| 77 | struct extent_state *new, | ||
| 78 | struct extent_state *other); | ||
| 79 | int (*split_extent_hook)(struct inode *inode, | ||
| 80 | struct extent_state *orig, u64 split); | ||
| 67 | int (*write_cache_pages_lock_hook)(struct page *page); | 81 | int (*write_cache_pages_lock_hook)(struct page *page); |
| 68 | }; | 82 | }; |
| 69 | 83 | ||
| @@ -81,10 +95,14 @@ struct extent_state { | |||
| 81 | u64 start; | 95 | u64 start; |
| 82 | u64 end; /* inclusive */ | 96 | u64 end; /* inclusive */ |
| 83 | struct rb_node rb_node; | 97 | struct rb_node rb_node; |
| 98 | |||
| 99 | /* ADD NEW ELEMENTS AFTER THIS */ | ||
| 84 | struct extent_io_tree *tree; | 100 | struct extent_io_tree *tree; |
| 85 | wait_queue_head_t wq; | 101 | wait_queue_head_t wq; |
| 86 | atomic_t refs; | 102 | atomic_t refs; |
| 87 | unsigned long state; | 103 | unsigned long state; |
| 104 | u64 split_start; | ||
| 105 | u64 split_end; | ||
| 88 | 106 | ||
| 89 | /* for use by the FS */ | 107 | /* for use by the FS */ |
| 90 | u64 private; | 108 | u64 private; |
| @@ -142,6 +160,8 @@ int try_release_extent_state(struct extent_map_tree *map, | |||
| 142 | struct extent_io_tree *tree, struct page *page, | 160 | struct extent_io_tree *tree, struct page *page, |
| 143 | gfp_t mask); | 161 | gfp_t mask); |
| 144 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); | 162 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); |
| 163 | int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 164 | int bits, struct extent_state **cached, gfp_t mask); | ||
| 145 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); | 165 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); |
| 146 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 166 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, |
| 147 | gfp_t mask); | 167 | gfp_t mask); |
| @@ -155,11 +175,12 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
| 155 | u64 max_bytes, unsigned long bits); | 175 | u64 max_bytes, unsigned long bits); |
| 156 | 176 | ||
| 157 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 177 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 158 | int bits, int filled); | 178 | int bits, int filled, struct extent_state *cached_state); |
| 159 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 179 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| 160 | int bits, gfp_t mask); | 180 | int bits, gfp_t mask); |
| 161 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 181 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 162 | int bits, int wake, int delete, gfp_t mask); | 182 | int bits, int wake, int delete, struct extent_state **cached, |
| 183 | gfp_t mask); | ||
| 163 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 184 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| 164 | int bits, gfp_t mask); | 185 | int bits, gfp_t mask); |
| 165 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 186 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
| @@ -278,9 +299,5 @@ int extent_range_uptodate(struct extent_io_tree *tree, | |||
| 278 | int extent_clear_unlock_delalloc(struct inode *inode, | 299 | int extent_clear_unlock_delalloc(struct inode *inode, |
| 279 | struct extent_io_tree *tree, | 300 | struct extent_io_tree *tree, |
| 280 | u64 start, u64 end, struct page *locked_page, | 301 | u64 start, u64 end, struct page *locked_page, |
| 281 | int unlock_page, | 302 | unsigned long op); |
| 282 | int clear_unlock, | ||
| 283 | int clear_delalloc, int clear_dirty, | ||
| 284 | int set_writeback, | ||
| 285 | int end_writeback); | ||
| 286 | #endif | 303 | #endif |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 30c9365861e6..2c726b7b9faa 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
| @@ -36,7 +36,7 @@ void extent_map_exit(void) | |||
| 36 | void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) | 36 | void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) |
| 37 | { | 37 | { |
| 38 | tree->map.rb_node = NULL; | 38 | tree->map.rb_node = NULL; |
| 39 | spin_lock_init(&tree->lock); | 39 | rwlock_init(&tree->lock); |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | /** | 42 | /** |
| @@ -198,6 +198,56 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) | |||
| 198 | return 0; | 198 | return 0; |
| 199 | } | 199 | } |
| 200 | 200 | ||
| 201 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) | ||
| 202 | { | ||
| 203 | int ret = 0; | ||
| 204 | struct extent_map *merge = NULL; | ||
| 205 | struct rb_node *rb; | ||
| 206 | struct extent_map *em; | ||
| 207 | |||
| 208 | write_lock(&tree->lock); | ||
| 209 | em = lookup_extent_mapping(tree, start, len); | ||
| 210 | |||
| 211 | WARN_ON(em->start != start || !em); | ||
| 212 | |||
| 213 | if (!em) | ||
| 214 | goto out; | ||
| 215 | |||
| 216 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
| 217 | |||
| 218 | if (em->start != 0) { | ||
| 219 | rb = rb_prev(&em->rb_node); | ||
| 220 | if (rb) | ||
| 221 | merge = rb_entry(rb, struct extent_map, rb_node); | ||
| 222 | if (rb && mergable_maps(merge, em)) { | ||
| 223 | em->start = merge->start; | ||
| 224 | em->len += merge->len; | ||
| 225 | em->block_len += merge->block_len; | ||
| 226 | em->block_start = merge->block_start; | ||
| 227 | merge->in_tree = 0; | ||
| 228 | rb_erase(&merge->rb_node, &tree->map); | ||
| 229 | free_extent_map(merge); | ||
| 230 | } | ||
| 231 | } | ||
| 232 | |||
| 233 | rb = rb_next(&em->rb_node); | ||
| 234 | if (rb) | ||
| 235 | merge = rb_entry(rb, struct extent_map, rb_node); | ||
| 236 | if (rb && mergable_maps(em, merge)) { | ||
| 237 | em->len += merge->len; | ||
| 238 | em->block_len += merge->len; | ||
| 239 | rb_erase(&merge->rb_node, &tree->map); | ||
| 240 | merge->in_tree = 0; | ||
| 241 | free_extent_map(merge); | ||
| 242 | } | ||
| 243 | |||
| 244 | free_extent_map(em); | ||
| 245 | out: | ||
| 246 | write_unlock(&tree->lock); | ||
| 247 | return ret; | ||
| 248 | |||
| 249 | } | ||
| 250 | |||
| 201 | /** | 251 | /** |
| 202 | * add_extent_mapping - add new extent map to the extent tree | 252 | * add_extent_mapping - add new extent map to the extent tree |
| 203 | * @tree: tree to insert new map in | 253 | * @tree: tree to insert new map in |
| @@ -222,7 +272,6 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
| 222 | ret = -EEXIST; | 272 | ret = -EEXIST; |
| 223 | goto out; | 273 | goto out; |
| 224 | } | 274 | } |
| 225 | assert_spin_locked(&tree->lock); | ||
| 226 | rb = tree_insert(&tree->map, em->start, &em->rb_node); | 275 | rb = tree_insert(&tree->map, em->start, &em->rb_node); |
| 227 | if (rb) { | 276 | if (rb) { |
| 228 | ret = -EEXIST; | 277 | ret = -EEXIST; |
| @@ -285,7 +334,6 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | |||
| 285 | struct rb_node *next = NULL; | 334 | struct rb_node *next = NULL; |
| 286 | u64 end = range_end(start, len); | 335 | u64 end = range_end(start, len); |
| 287 | 336 | ||
| 288 | assert_spin_locked(&tree->lock); | ||
| 289 | rb_node = __tree_search(&tree->map, start, &prev, &next); | 337 | rb_node = __tree_search(&tree->map, start, &prev, &next); |
| 290 | if (!rb_node && prev) { | 338 | if (!rb_node && prev) { |
| 291 | em = rb_entry(prev, struct extent_map, rb_node); | 339 | em = rb_entry(prev, struct extent_map, rb_node); |
| @@ -319,6 +367,54 @@ out: | |||
| 319 | } | 367 | } |
| 320 | 368 | ||
| 321 | /** | 369 | /** |
| 370 | * search_extent_mapping - find a nearby extent map | ||
| 371 | * @tree: tree to lookup in | ||
| 372 | * @start: byte offset to start the search | ||
| 373 | * @len: length of the lookup range | ||
| 374 | * | ||
| 375 | * Find and return the first extent_map struct in @tree that intersects the | ||
| 376 | * [start, len] range. | ||
| 377 | * | ||
| 378 | * If one can't be found, any nearby extent may be returned | ||
| 379 | */ | ||
| 380 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, | ||
| 381 | u64 start, u64 len) | ||
| 382 | { | ||
| 383 | struct extent_map *em; | ||
| 384 | struct rb_node *rb_node; | ||
| 385 | struct rb_node *prev = NULL; | ||
| 386 | struct rb_node *next = NULL; | ||
| 387 | |||
| 388 | rb_node = __tree_search(&tree->map, start, &prev, &next); | ||
| 389 | if (!rb_node && prev) { | ||
| 390 | em = rb_entry(prev, struct extent_map, rb_node); | ||
| 391 | goto found; | ||
| 392 | } | ||
| 393 | if (!rb_node && next) { | ||
| 394 | em = rb_entry(next, struct extent_map, rb_node); | ||
| 395 | goto found; | ||
| 396 | } | ||
| 397 | if (!rb_node) { | ||
| 398 | em = NULL; | ||
| 399 | goto out; | ||
| 400 | } | ||
| 401 | if (IS_ERR(rb_node)) { | ||
| 402 | em = ERR_PTR(PTR_ERR(rb_node)); | ||
| 403 | goto out; | ||
| 404 | } | ||
| 405 | em = rb_entry(rb_node, struct extent_map, rb_node); | ||
| 406 | goto found; | ||
| 407 | |||
| 408 | em = NULL; | ||
| 409 | goto out; | ||
| 410 | |||
| 411 | found: | ||
| 412 | atomic_inc(&em->refs); | ||
| 413 | out: | ||
| 414 | return em; | ||
| 415 | } | ||
| 416 | |||
| 417 | /** | ||
| 322 | * remove_extent_mapping - removes an extent_map from the extent tree | 418 | * remove_extent_mapping - removes an extent_map from the extent tree |
| 323 | * @tree: extent tree to remove from | 419 | * @tree: extent tree to remove from |
| 324 | * @em: extent map beeing removed | 420 | * @em: extent map beeing removed |
| @@ -331,7 +427,6 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) | |||
| 331 | int ret = 0; | 427 | int ret = 0; |
| 332 | 428 | ||
| 333 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); | 429 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); |
| 334 | assert_spin_locked(&tree->lock); | ||
| 335 | rb_erase(&em->rb_node, &tree->map); | 430 | rb_erase(&em->rb_node, &tree->map); |
| 336 | em->in_tree = 0; | 431 | em->in_tree = 0; |
| 337 | return ret; | 432 | return ret; |
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index fb6eeef06bb0..ab6d74b6e647 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
| @@ -31,7 +31,7 @@ struct extent_map { | |||
| 31 | 31 | ||
| 32 | struct extent_map_tree { | 32 | struct extent_map_tree { |
| 33 | struct rb_root map; | 33 | struct rb_root map; |
| 34 | spinlock_t lock; | 34 | rwlock_t lock; |
| 35 | }; | 35 | }; |
| 36 | 36 | ||
| 37 | static inline u64 extent_map_end(struct extent_map *em) | 37 | static inline u64 extent_map_end(struct extent_map *em) |
| @@ -59,4 +59,7 @@ struct extent_map *alloc_extent_map(gfp_t mask); | |||
| 59 | void free_extent_map(struct extent_map *em); | 59 | void free_extent_map(struct extent_map *em); |
| 60 | int __init extent_map_init(void); | 60 | int __init extent_map_init(void); |
| 61 | void extent_map_exit(void); | 61 | void extent_map_exit(void); |
| 62 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len); | ||
| 63 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, | ||
| 64 | u64 start, u64 len); | ||
| 62 | #endif | 65 | #endif |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4b833972273a..2d623aa0625f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -112,8 +112,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
| 112 | int err = 0; | 112 | int err = 0; |
| 113 | int i; | 113 | int i; |
| 114 | struct inode *inode = fdentry(file)->d_inode; | 114 | struct inode *inode = fdentry(file)->d_inode; |
| 115 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 116 | u64 hint_byte; | ||
| 117 | u64 num_bytes; | 115 | u64 num_bytes; |
| 118 | u64 start_pos; | 116 | u64 start_pos; |
| 119 | u64 end_of_last_block; | 117 | u64 end_of_last_block; |
| @@ -125,23 +123,10 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
| 125 | root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 123 | root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
| 126 | 124 | ||
| 127 | end_of_last_block = start_pos + num_bytes - 1; | 125 | end_of_last_block = start_pos + num_bytes - 1; |
| 126 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); | ||
| 127 | if (err) | ||
| 128 | return err; | ||
| 128 | 129 | ||
| 129 | lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); | ||
| 130 | trans = btrfs_join_transaction(root, 1); | ||
| 131 | if (!trans) { | ||
| 132 | err = -ENOMEM; | ||
| 133 | goto out_unlock; | ||
| 134 | } | ||
| 135 | btrfs_set_trans_block_group(trans, inode); | ||
| 136 | hint_byte = 0; | ||
| 137 | |||
| 138 | set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS); | ||
| 139 | |||
| 140 | /* check for reserved extents on each page, we don't want | ||
| 141 | * to reset the delalloc bit on things that already have | ||
| 142 | * extents reserved. | ||
| 143 | */ | ||
| 144 | btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); | ||
| 145 | for (i = 0; i < num_pages; i++) { | 130 | for (i = 0; i < num_pages; i++) { |
| 146 | struct page *p = pages[i]; | 131 | struct page *p = pages[i]; |
| 147 | SetPageUptodate(p); | 132 | SetPageUptodate(p); |
| @@ -155,9 +140,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
| 155 | * at this time. | 140 | * at this time. |
| 156 | */ | 141 | */ |
| 157 | } | 142 | } |
| 158 | err = btrfs_end_transaction(trans, root); | ||
| 159 | out_unlock: | ||
| 160 | unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); | ||
| 161 | return err; | 143 | return err; |
| 162 | } | 144 | } |
| 163 | 145 | ||
| @@ -189,18 +171,18 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 189 | if (!split2) | 171 | if (!split2) |
| 190 | split2 = alloc_extent_map(GFP_NOFS); | 172 | split2 = alloc_extent_map(GFP_NOFS); |
| 191 | 173 | ||
| 192 | spin_lock(&em_tree->lock); | 174 | write_lock(&em_tree->lock); |
| 193 | em = lookup_extent_mapping(em_tree, start, len); | 175 | em = lookup_extent_mapping(em_tree, start, len); |
| 194 | if (!em) { | 176 | if (!em) { |
| 195 | spin_unlock(&em_tree->lock); | 177 | write_unlock(&em_tree->lock); |
| 196 | break; | 178 | break; |
| 197 | } | 179 | } |
| 198 | flags = em->flags; | 180 | flags = em->flags; |
| 199 | if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { | 181 | if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { |
| 200 | spin_unlock(&em_tree->lock); | ||
| 201 | if (em->start <= start && | 182 | if (em->start <= start && |
| 202 | (!testend || em->start + em->len >= start + len)) { | 183 | (!testend || em->start + em->len >= start + len)) { |
| 203 | free_extent_map(em); | 184 | free_extent_map(em); |
| 185 | write_unlock(&em_tree->lock); | ||
| 204 | break; | 186 | break; |
| 205 | } | 187 | } |
| 206 | if (start < em->start) { | 188 | if (start < em->start) { |
| @@ -210,6 +192,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 210 | start = em->start + em->len; | 192 | start = em->start + em->len; |
| 211 | } | 193 | } |
| 212 | free_extent_map(em); | 194 | free_extent_map(em); |
| 195 | write_unlock(&em_tree->lock); | ||
| 213 | continue; | 196 | continue; |
| 214 | } | 197 | } |
| 215 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 198 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
| @@ -260,7 +243,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 260 | free_extent_map(split); | 243 | free_extent_map(split); |
| 261 | split = NULL; | 244 | split = NULL; |
| 262 | } | 245 | } |
| 263 | spin_unlock(&em_tree->lock); | 246 | write_unlock(&em_tree->lock); |
| 264 | 247 | ||
| 265 | /* once for us */ | 248 | /* once for us */ |
| 266 | free_extent_map(em); | 249 | free_extent_map(em); |
| @@ -289,7 +272,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 289 | noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 272 | noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
| 290 | struct btrfs_root *root, struct inode *inode, | 273 | struct btrfs_root *root, struct inode *inode, |
| 291 | u64 start, u64 end, u64 locked_end, | 274 | u64 start, u64 end, u64 locked_end, |
| 292 | u64 inline_limit, u64 *hint_byte) | 275 | u64 inline_limit, u64 *hint_byte, int drop_cache) |
| 293 | { | 276 | { |
| 294 | u64 extent_end = 0; | 277 | u64 extent_end = 0; |
| 295 | u64 search_start = start; | 278 | u64 search_start = start; |
| @@ -314,7 +297,8 @@ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
| 314 | int ret; | 297 | int ret; |
| 315 | 298 | ||
| 316 | inline_limit = 0; | 299 | inline_limit = 0; |
| 317 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | 300 | if (drop_cache) |
| 301 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | ||
| 318 | 302 | ||
| 319 | path = btrfs_alloc_path(); | 303 | path = btrfs_alloc_path(); |
| 320 | if (!path) | 304 | if (!path) |
| @@ -894,7 +878,8 @@ again: | |||
| 894 | btrfs_put_ordered_extent(ordered); | 878 | btrfs_put_ordered_extent(ordered); |
| 895 | 879 | ||
| 896 | clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, | 880 | clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, |
| 897 | last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC, | 881 | last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | |
| 882 | EXTENT_DO_ACCOUNTING, | ||
| 898 | GFP_NOFS); | 883 | GFP_NOFS); |
| 899 | unlock_extent(&BTRFS_I(inode)->io_tree, | 884 | unlock_extent(&BTRFS_I(inode)->io_tree, |
| 900 | start_pos, last_pos - 1, GFP_NOFS); | 885 | start_pos, last_pos - 1, GFP_NOFS); |
| @@ -936,21 +921,35 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 936 | start_pos = pos; | 921 | start_pos = pos; |
| 937 | 922 | ||
| 938 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | 923 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
| 924 | |||
| 925 | /* do the reserve before the mutex lock in case we have to do some | ||
| 926 | * flushing. We wouldn't deadlock, but this is more polite. | ||
| 927 | */ | ||
| 928 | err = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
| 929 | if (err) | ||
| 930 | goto out_nolock; | ||
| 931 | |||
| 932 | mutex_lock(&inode->i_mutex); | ||
| 933 | |||
| 939 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | 934 | current->backing_dev_info = inode->i_mapping->backing_dev_info; |
| 940 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 935 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
| 941 | if (err) | 936 | if (err) |
| 942 | goto out_nolock; | 937 | goto out; |
| 938 | |||
| 943 | if (count == 0) | 939 | if (count == 0) |
| 944 | goto out_nolock; | 940 | goto out; |
| 945 | 941 | ||
| 946 | err = file_remove_suid(file); | 942 | err = file_remove_suid(file); |
| 947 | if (err) | 943 | if (err) |
| 948 | goto out_nolock; | 944 | goto out; |
| 945 | |||
| 949 | file_update_time(file); | 946 | file_update_time(file); |
| 950 | 947 | ||
| 951 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 948 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
| 952 | 949 | ||
| 953 | mutex_lock(&inode->i_mutex); | 950 | /* generic_write_checks can change our pos */ |
| 951 | start_pos = pos; | ||
| 952 | |||
| 954 | BTRFS_I(inode)->sequence++; | 953 | BTRFS_I(inode)->sequence++; |
| 955 | first_index = pos >> PAGE_CACHE_SHIFT; | 954 | first_index = pos >> PAGE_CACHE_SHIFT; |
| 956 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; | 955 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; |
| @@ -1024,9 +1023,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 1024 | } | 1023 | } |
| 1025 | 1024 | ||
| 1026 | if (will_write) { | 1025 | if (will_write) { |
| 1027 | btrfs_fdatawrite_range(inode->i_mapping, pos, | 1026 | filemap_fdatawrite_range(inode->i_mapping, pos, |
| 1028 | pos + write_bytes - 1, | 1027 | pos + write_bytes - 1); |
| 1029 | WB_SYNC_ALL); | ||
| 1030 | } else { | 1028 | } else { |
| 1031 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | 1029 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, |
| 1032 | num_pages); | 1030 | num_pages); |
| @@ -1047,6 +1045,7 @@ out: | |||
| 1047 | mutex_unlock(&inode->i_mutex); | 1045 | mutex_unlock(&inode->i_mutex); |
| 1048 | if (ret) | 1046 | if (ret) |
| 1049 | err = ret; | 1047 | err = ret; |
| 1048 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 1050 | 1049 | ||
| 1051 | out_nolock: | 1050 | out_nolock: |
| 1052 | kfree(pages); | 1051 | kfree(pages); |
| @@ -1203,7 +1202,7 @@ out: | |||
| 1203 | return ret > 0 ? EIO : ret; | 1202 | return ret > 0 ? EIO : ret; |
| 1204 | } | 1203 | } |
| 1205 | 1204 | ||
| 1206 | static struct vm_operations_struct btrfs_file_vm_ops = { | 1205 | static const struct vm_operations_struct btrfs_file_vm_ops = { |
| 1207 | .fault = filemap_fault, | 1206 | .fault = filemap_fault, |
| 1208 | .page_mkwrite = btrfs_page_mkwrite, | 1207 | .page_mkwrite = btrfs_page_mkwrite, |
| 1209 | }; | 1208 | }; |
| @@ -1215,7 +1214,7 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | |||
| 1215 | return 0; | 1214 | return 0; |
| 1216 | } | 1215 | } |
| 1217 | 1216 | ||
| 1218 | struct file_operations btrfs_file_operations = { | 1217 | const struct file_operations btrfs_file_operations = { |
| 1219 | .llseek = generic_file_llseek, | 1218 | .llseek = generic_file_llseek, |
| 1220 | .read = do_sync_read, | 1219 | .read = do_sync_read, |
| 1221 | .aio_read = generic_file_aio_read, | 1220 | .aio_read = generic_file_aio_read, |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 5edcee3a617f..5c2caad76212 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
| @@ -259,7 +259,9 @@ static int link_free_space(struct btrfs_block_group_cache *block_group, | |||
| 259 | 259 | ||
| 260 | static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) | 260 | static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) |
| 261 | { | 261 | { |
| 262 | u64 max_bytes, possible_bytes; | 262 | u64 max_bytes; |
| 263 | u64 bitmap_bytes; | ||
| 264 | u64 extent_bytes; | ||
| 263 | 265 | ||
| 264 | /* | 266 | /* |
| 265 | * The goal is to keep the total amount of memory used per 1gb of space | 267 | * The goal is to keep the total amount of memory used per 1gb of space |
| @@ -269,22 +271,27 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) | |||
| 269 | max_bytes = MAX_CACHE_BYTES_PER_GIG * | 271 | max_bytes = MAX_CACHE_BYTES_PER_GIG * |
| 270 | (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); | 272 | (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); |
| 271 | 273 | ||
| 272 | possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) + | 274 | /* |
| 273 | (sizeof(struct btrfs_free_space) * | 275 | * we want to account for 1 more bitmap than what we have so we can make |
| 274 | block_group->extents_thresh); | 276 | * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as |
| 277 | * we add more bitmaps. | ||
| 278 | */ | ||
| 279 | bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE; | ||
| 275 | 280 | ||
| 276 | if (possible_bytes > max_bytes) { | 281 | if (bitmap_bytes >= max_bytes) { |
| 277 | int extent_bytes = max_bytes - | 282 | block_group->extents_thresh = 0; |
| 278 | (block_group->total_bitmaps * PAGE_CACHE_SIZE); | 283 | return; |
| 284 | } | ||
| 279 | 285 | ||
| 280 | if (extent_bytes <= 0) { | 286 | /* |
| 281 | block_group->extents_thresh = 0; | 287 | * we want the extent entry threshold to always be at most 1/2 the maxw |
| 282 | return; | 288 | * bytes we can have, or whatever is less than that. |
| 283 | } | 289 | */ |
| 290 | extent_bytes = max_bytes - bitmap_bytes; | ||
| 291 | extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); | ||
| 284 | 292 | ||
| 285 | block_group->extents_thresh = extent_bytes / | 293 | block_group->extents_thresh = |
| 286 | (sizeof(struct btrfs_free_space)); | 294 | div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); |
| 287 | } | ||
| 288 | } | 295 | } |
| 289 | 296 | ||
| 290 | static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, | 297 | static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, |
| @@ -403,6 +410,7 @@ static void add_new_bitmap(struct btrfs_block_group_cache *block_group, | |||
| 403 | BUG_ON(block_group->total_bitmaps >= max_bitmaps); | 410 | BUG_ON(block_group->total_bitmaps >= max_bitmaps); |
| 404 | 411 | ||
| 405 | info->offset = offset_to_bitmap(block_group, offset); | 412 | info->offset = offset_to_bitmap(block_group, offset); |
| 413 | info->bytes = 0; | ||
| 406 | link_free_space(block_group, info); | 414 | link_free_space(block_group, info); |
| 407 | block_group->total_bitmaps++; | 415 | block_group->total_bitmaps++; |
| 408 | 416 | ||
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 6b627c611808..72ce3c173d6a 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
| @@ -149,6 +149,8 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | |||
| 149 | ptr = (unsigned long)(ref + 1); | 149 | ptr = (unsigned long)(ref + 1); |
| 150 | ret = 0; | 150 | ret = 0; |
| 151 | } else if (ret < 0) { | 151 | } else if (ret < 0) { |
| 152 | if (ret == -EOVERFLOW) | ||
| 153 | ret = -EMLINK; | ||
| 152 | goto out; | 154 | goto out; |
| 153 | } else { | 155 | } else { |
| 154 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0], | 156 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0], |
| @@ -177,8 +179,6 @@ int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, | |||
| 177 | 179 | ||
| 178 | ret = btrfs_insert_empty_item(trans, root, path, &key, | 180 | ret = btrfs_insert_empty_item(trans, root, path, &key, |
| 179 | sizeof(struct btrfs_inode_item)); | 181 | sizeof(struct btrfs_inode_item)); |
| 180 | if (ret == 0 && objectid > root->highest_inode) | ||
| 181 | root->highest_inode = objectid; | ||
| 182 | return ret; | 182 | return ret; |
| 183 | } | 183 | } |
| 184 | 184 | ||
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 9abbced1123d..c56eb5909172 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
| @@ -43,9 +43,10 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) | |||
| 43 | slot = path->slots[0] - 1; | 43 | slot = path->slots[0] - 1; |
| 44 | l = path->nodes[0]; | 44 | l = path->nodes[0]; |
| 45 | btrfs_item_key_to_cpu(l, &found_key, slot); | 45 | btrfs_item_key_to_cpu(l, &found_key, slot); |
| 46 | *objectid = found_key.objectid; | 46 | *objectid = max_t(u64, found_key.objectid, |
| 47 | BTRFS_FIRST_FREE_OBJECTID - 1); | ||
| 47 | } else { | 48 | } else { |
| 48 | *objectid = BTRFS_FIRST_FREE_OBJECTID; | 49 | *objectid = BTRFS_FIRST_FREE_OBJECTID - 1; |
| 49 | } | 50 | } |
| 50 | ret = 0; | 51 | ret = 0; |
| 51 | error: | 52 | error: |
| @@ -53,91 +54,27 @@ error: | |||
| 53 | return ret; | 54 | return ret; |
| 54 | } | 55 | } |
| 55 | 56 | ||
| 56 | /* | ||
| 57 | * walks the btree of allocated inodes and find a hole. | ||
| 58 | */ | ||
| 59 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, | 57 | int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, |
| 60 | struct btrfs_root *root, | 58 | struct btrfs_root *root, |
| 61 | u64 dirid, u64 *objectid) | 59 | u64 dirid, u64 *objectid) |
| 62 | { | 60 | { |
| 63 | struct btrfs_path *path; | ||
| 64 | struct btrfs_key key; | ||
| 65 | int ret; | 61 | int ret; |
| 66 | int slot = 0; | ||
| 67 | u64 last_ino = 0; | ||
| 68 | int start_found; | ||
| 69 | struct extent_buffer *l; | ||
| 70 | struct btrfs_key search_key; | ||
| 71 | u64 search_start = dirid; | ||
| 72 | |||
| 73 | mutex_lock(&root->objectid_mutex); | 62 | mutex_lock(&root->objectid_mutex); |
| 74 | if (root->last_inode_alloc >= BTRFS_FIRST_FREE_OBJECTID && | ||
| 75 | root->last_inode_alloc < BTRFS_LAST_FREE_OBJECTID) { | ||
| 76 | *objectid = ++root->last_inode_alloc; | ||
| 77 | mutex_unlock(&root->objectid_mutex); | ||
| 78 | return 0; | ||
| 79 | } | ||
| 80 | path = btrfs_alloc_path(); | ||
| 81 | BUG_ON(!path); | ||
| 82 | search_start = max(search_start, (u64)BTRFS_FIRST_FREE_OBJECTID); | ||
| 83 | search_key.objectid = search_start; | ||
| 84 | search_key.type = 0; | ||
| 85 | search_key.offset = 0; | ||
| 86 | |||
| 87 | start_found = 0; | ||
| 88 | ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0); | ||
| 89 | if (ret < 0) | ||
| 90 | goto error; | ||
| 91 | 63 | ||
| 92 | while (1) { | 64 | if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) { |
| 93 | l = path->nodes[0]; | 65 | ret = btrfs_find_highest_inode(root, &root->highest_objectid); |
| 94 | slot = path->slots[0]; | 66 | if (ret) |
| 95 | if (slot >= btrfs_header_nritems(l)) { | 67 | goto out; |
| 96 | ret = btrfs_next_leaf(root, path); | 68 | } |
| 97 | if (ret == 0) | ||
| 98 | continue; | ||
| 99 | if (ret < 0) | ||
| 100 | goto error; | ||
| 101 | if (!start_found) { | ||
| 102 | *objectid = search_start; | ||
| 103 | start_found = 1; | ||
| 104 | goto found; | ||
| 105 | } | ||
| 106 | *objectid = last_ino > search_start ? | ||
| 107 | last_ino : search_start; | ||
| 108 | goto found; | ||
| 109 | } | ||
| 110 | btrfs_item_key_to_cpu(l, &key, slot); | ||
| 111 | if (key.objectid >= search_start) { | ||
| 112 | if (start_found) { | ||
| 113 | if (last_ino < search_start) | ||
| 114 | last_ino = search_start; | ||
| 115 | if (key.objectid > last_ino) { | ||
| 116 | *objectid = last_ino; | ||
| 117 | goto found; | ||
| 118 | } | ||
| 119 | } else if (key.objectid > search_start) { | ||
| 120 | *objectid = search_start; | ||
| 121 | goto found; | ||
| 122 | } | ||
| 123 | } | ||
| 124 | if (key.objectid >= BTRFS_LAST_FREE_OBJECTID) | ||
| 125 | break; | ||
| 126 | 69 | ||
| 127 | start_found = 1; | 70 | if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) { |
| 128 | last_ino = key.objectid + 1; | 71 | ret = -ENOSPC; |
| 129 | path->slots[0]++; | 72 | goto out; |
| 130 | } | 73 | } |
| 131 | BUG_ON(1); | 74 | |
| 132 | found: | 75 | *objectid = ++root->highest_objectid; |
| 133 | btrfs_release_path(root, path); | 76 | ret = 0; |
| 134 | btrfs_free_path(path); | 77 | out: |
| 135 | BUG_ON(*objectid < search_start); | ||
| 136 | mutex_unlock(&root->objectid_mutex); | ||
| 137 | return 0; | ||
| 138 | error: | ||
| 139 | btrfs_release_path(root, path); | ||
| 140 | btrfs_free_path(path); | ||
| 141 | mutex_unlock(&root->objectid_mutex); | 78 | mutex_unlock(&root->objectid_mutex); |
| 142 | return ret; | 79 | return ret; |
| 143 | } | 80 | } |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 59cba180fe83..9e138b793dc7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -55,14 +55,14 @@ struct btrfs_iget_args { | |||
| 55 | struct btrfs_root *root; | 55 | struct btrfs_root *root; |
| 56 | }; | 56 | }; |
| 57 | 57 | ||
| 58 | static struct inode_operations btrfs_dir_inode_operations; | 58 | static const struct inode_operations btrfs_dir_inode_operations; |
| 59 | static struct inode_operations btrfs_symlink_inode_operations; | 59 | static const struct inode_operations btrfs_symlink_inode_operations; |
| 60 | static struct inode_operations btrfs_dir_ro_inode_operations; | 60 | static const struct inode_operations btrfs_dir_ro_inode_operations; |
| 61 | static struct inode_operations btrfs_special_inode_operations; | 61 | static const struct inode_operations btrfs_special_inode_operations; |
| 62 | static struct inode_operations btrfs_file_inode_operations; | 62 | static const struct inode_operations btrfs_file_inode_operations; |
| 63 | static struct address_space_operations btrfs_aops; | 63 | static const struct address_space_operations btrfs_aops; |
| 64 | static struct address_space_operations btrfs_symlink_aops; | 64 | static const struct address_space_operations btrfs_symlink_aops; |
| 65 | static struct file_operations btrfs_dir_file_operations; | 65 | static const struct file_operations btrfs_dir_file_operations; |
| 66 | static struct extent_io_ops btrfs_extent_io_ops; | 66 | static struct extent_io_ops btrfs_extent_io_ops; |
| 67 | 67 | ||
| 68 | static struct kmem_cache *btrfs_inode_cachep; | 68 | static struct kmem_cache *btrfs_inode_cachep; |
| @@ -231,7 +231,8 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
| 231 | } | 231 | } |
| 232 | 232 | ||
| 233 | ret = btrfs_drop_extents(trans, root, inode, start, | 233 | ret = btrfs_drop_extents(trans, root, inode, start, |
| 234 | aligned_end, aligned_end, start, &hint_byte); | 234 | aligned_end, aligned_end, start, |
| 235 | &hint_byte, 1); | ||
| 235 | BUG_ON(ret); | 236 | BUG_ON(ret); |
| 236 | 237 | ||
| 237 | if (isize > actual_end) | 238 | if (isize > actual_end) |
| @@ -240,7 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
| 240 | inline_len, compressed_size, | 241 | inline_len, compressed_size, |
| 241 | compressed_pages); | 242 | compressed_pages); |
| 242 | BUG_ON(ret); | 243 | BUG_ON(ret); |
| 243 | btrfs_drop_extent_cache(inode, start, aligned_end, 0); | 244 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); |
| 244 | return 0; | 245 | return 0; |
| 245 | } | 246 | } |
| 246 | 247 | ||
| @@ -423,9 +424,12 @@ again: | |||
| 423 | * and free up our temp pages. | 424 | * and free up our temp pages. |
| 424 | */ | 425 | */ |
| 425 | extent_clear_unlock_delalloc(inode, | 426 | extent_clear_unlock_delalloc(inode, |
| 426 | &BTRFS_I(inode)->io_tree, | 427 | &BTRFS_I(inode)->io_tree, |
| 427 | start, end, NULL, 1, 0, | 428 | start, end, NULL, |
| 428 | 0, 1, 1, 1); | 429 | EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | |
| 430 | EXTENT_CLEAR_DELALLOC | | ||
| 431 | EXTENT_CLEAR_ACCOUNTING | | ||
| 432 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); | ||
| 429 | ret = 0; | 433 | ret = 0; |
| 430 | goto free_pages_out; | 434 | goto free_pages_out; |
| 431 | } | 435 | } |
| @@ -611,9 +615,9 @@ static noinline int submit_compressed_extents(struct inode *inode, | |||
| 611 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 615 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
| 612 | 616 | ||
| 613 | while (1) { | 617 | while (1) { |
| 614 | spin_lock(&em_tree->lock); | 618 | write_lock(&em_tree->lock); |
| 615 | ret = add_extent_mapping(em_tree, em); | 619 | ret = add_extent_mapping(em_tree, em); |
| 616 | spin_unlock(&em_tree->lock); | 620 | write_unlock(&em_tree->lock); |
| 617 | if (ret != -EEXIST) { | 621 | if (ret != -EEXIST) { |
| 618 | free_extent_map(em); | 622 | free_extent_map(em); |
| 619 | break; | 623 | break; |
| @@ -636,11 +640,14 @@ static noinline int submit_compressed_extents(struct inode *inode, | |||
| 636 | * clear dirty, set writeback and unlock the pages. | 640 | * clear dirty, set writeback and unlock the pages. |
| 637 | */ | 641 | */ |
| 638 | extent_clear_unlock_delalloc(inode, | 642 | extent_clear_unlock_delalloc(inode, |
| 639 | &BTRFS_I(inode)->io_tree, | 643 | &BTRFS_I(inode)->io_tree, |
| 640 | async_extent->start, | 644 | async_extent->start, |
| 641 | async_extent->start + | 645 | async_extent->start + |
| 642 | async_extent->ram_size - 1, | 646 | async_extent->ram_size - 1, |
| 643 | NULL, 1, 1, 0, 1, 1, 0); | 647 | NULL, EXTENT_CLEAR_UNLOCK_PAGE | |
| 648 | EXTENT_CLEAR_UNLOCK | | ||
| 649 | EXTENT_CLEAR_DELALLOC | | ||
| 650 | EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK); | ||
| 644 | 651 | ||
| 645 | ret = btrfs_submit_compressed_write(inode, | 652 | ret = btrfs_submit_compressed_write(inode, |
| 646 | async_extent->start, | 653 | async_extent->start, |
| @@ -711,9 +718,15 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 711 | start, end, 0, NULL); | 718 | start, end, 0, NULL); |
| 712 | if (ret == 0) { | 719 | if (ret == 0) { |
| 713 | extent_clear_unlock_delalloc(inode, | 720 | extent_clear_unlock_delalloc(inode, |
| 714 | &BTRFS_I(inode)->io_tree, | 721 | &BTRFS_I(inode)->io_tree, |
| 715 | start, end, NULL, 1, 1, | 722 | start, end, NULL, |
| 716 | 1, 1, 1, 1); | 723 | EXTENT_CLEAR_UNLOCK_PAGE | |
| 724 | EXTENT_CLEAR_UNLOCK | | ||
| 725 | EXTENT_CLEAR_DELALLOC | | ||
| 726 | EXTENT_CLEAR_ACCOUNTING | | ||
| 727 | EXTENT_CLEAR_DIRTY | | ||
| 728 | EXTENT_SET_WRITEBACK | | ||
| 729 | EXTENT_END_WRITEBACK); | ||
| 717 | *nr_written = *nr_written + | 730 | *nr_written = *nr_written + |
| 718 | (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; | 731 | (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; |
| 719 | *page_started = 1; | 732 | *page_started = 1; |
| @@ -725,9 +738,20 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 725 | BUG_ON(disk_num_bytes > | 738 | BUG_ON(disk_num_bytes > |
| 726 | btrfs_super_total_bytes(&root->fs_info->super_copy)); | 739 | btrfs_super_total_bytes(&root->fs_info->super_copy)); |
| 727 | 740 | ||
| 741 | |||
| 742 | read_lock(&BTRFS_I(inode)->extent_tree.lock); | ||
| 743 | em = search_extent_mapping(&BTRFS_I(inode)->extent_tree, | ||
| 744 | start, num_bytes); | ||
| 745 | if (em) { | ||
| 746 | alloc_hint = em->block_start; | ||
| 747 | free_extent_map(em); | ||
| 748 | } | ||
| 749 | read_unlock(&BTRFS_I(inode)->extent_tree.lock); | ||
| 728 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); | 750 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); |
| 729 | 751 | ||
| 730 | while (disk_num_bytes > 0) { | 752 | while (disk_num_bytes > 0) { |
| 753 | unsigned long op; | ||
| 754 | |||
| 731 | cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); | 755 | cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); |
| 732 | ret = btrfs_reserve_extent(trans, root, cur_alloc_size, | 756 | ret = btrfs_reserve_extent(trans, root, cur_alloc_size, |
| 733 | root->sectorsize, 0, alloc_hint, | 757 | root->sectorsize, 0, alloc_hint, |
| @@ -737,7 +761,6 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 737 | em = alloc_extent_map(GFP_NOFS); | 761 | em = alloc_extent_map(GFP_NOFS); |
| 738 | em->start = start; | 762 | em->start = start; |
| 739 | em->orig_start = em->start; | 763 | em->orig_start = em->start; |
| 740 | |||
| 741 | ram_size = ins.offset; | 764 | ram_size = ins.offset; |
| 742 | em->len = ins.offset; | 765 | em->len = ins.offset; |
| 743 | 766 | ||
| @@ -747,9 +770,9 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 747 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 770 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 748 | 771 | ||
| 749 | while (1) { | 772 | while (1) { |
| 750 | spin_lock(&em_tree->lock); | 773 | write_lock(&em_tree->lock); |
| 751 | ret = add_extent_mapping(em_tree, em); | 774 | ret = add_extent_mapping(em_tree, em); |
| 752 | spin_unlock(&em_tree->lock); | 775 | write_unlock(&em_tree->lock); |
| 753 | if (ret != -EEXIST) { | 776 | if (ret != -EEXIST) { |
| 754 | free_extent_map(em); | 777 | free_extent_map(em); |
| 755 | break; | 778 | break; |
| @@ -776,11 +799,17 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 776 | /* we're not doing compressed IO, don't unlock the first | 799 | /* we're not doing compressed IO, don't unlock the first |
| 777 | * page (which the caller expects to stay locked), don't | 800 | * page (which the caller expects to stay locked), don't |
| 778 | * clear any dirty bits and don't set any writeback bits | 801 | * clear any dirty bits and don't set any writeback bits |
| 802 | * | ||
| 803 | * Do set the Private2 bit so we know this page was properly | ||
| 804 | * setup for writepage | ||
| 779 | */ | 805 | */ |
| 806 | op = unlock ? EXTENT_CLEAR_UNLOCK_PAGE : 0; | ||
| 807 | op |= EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | | ||
| 808 | EXTENT_SET_PRIVATE2; | ||
| 809 | |||
| 780 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | 810 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, |
| 781 | start, start + ram_size - 1, | 811 | start, start + ram_size - 1, |
| 782 | locked_page, unlock, 1, | 812 | locked_page, op); |
| 783 | 1, 0, 0, 0); | ||
| 784 | disk_num_bytes -= cur_alloc_size; | 813 | disk_num_bytes -= cur_alloc_size; |
| 785 | num_bytes -= cur_alloc_size; | 814 | num_bytes -= cur_alloc_size; |
| 786 | alloc_hint = ins.objectid + ins.offset; | 815 | alloc_hint = ins.objectid + ins.offset; |
| @@ -852,8 +881,8 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
| 852 | u64 cur_end; | 881 | u64 cur_end; |
| 853 | int limit = 10 * 1024 * 1042; | 882 | int limit = 10 * 1024 * 1042; |
| 854 | 883 | ||
| 855 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | | 884 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED, |
| 856 | EXTENT_DELALLOC, 1, 0, GFP_NOFS); | 885 | 1, 0, NULL, GFP_NOFS); |
| 857 | while (start < end) { | 886 | while (start < end) { |
| 858 | async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); | 887 | async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); |
| 859 | async_cow->inode = inode; | 888 | async_cow->inode = inode; |
| @@ -994,6 +1023,7 @@ next_slot: | |||
| 994 | 1023 | ||
| 995 | if (found_key.offset > cur_offset) { | 1024 | if (found_key.offset > cur_offset) { |
| 996 | extent_end = found_key.offset; | 1025 | extent_end = found_key.offset; |
| 1026 | extent_type = 0; | ||
| 997 | goto out_check; | 1027 | goto out_check; |
| 998 | } | 1028 | } |
| 999 | 1029 | ||
| @@ -1080,9 +1110,9 @@ out_check: | |||
| 1080 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 1110 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 1081 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 1111 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 1082 | while (1) { | 1112 | while (1) { |
| 1083 | spin_lock(&em_tree->lock); | 1113 | write_lock(&em_tree->lock); |
| 1084 | ret = add_extent_mapping(em_tree, em); | 1114 | ret = add_extent_mapping(em_tree, em); |
| 1085 | spin_unlock(&em_tree->lock); | 1115 | write_unlock(&em_tree->lock); |
| 1086 | if (ret != -EEXIST) { | 1116 | if (ret != -EEXIST) { |
| 1087 | free_extent_map(em); | 1117 | free_extent_map(em); |
| 1088 | break; | 1118 | break; |
| @@ -1100,8 +1130,10 @@ out_check: | |||
| 1100 | BUG_ON(ret); | 1130 | BUG_ON(ret); |
| 1101 | 1131 | ||
| 1102 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | 1132 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, |
| 1103 | cur_offset, cur_offset + num_bytes - 1, | 1133 | cur_offset, cur_offset + num_bytes - 1, |
| 1104 | locked_page, 1, 1, 1, 0, 0, 0); | 1134 | locked_page, EXTENT_CLEAR_UNLOCK_PAGE | |
| 1135 | EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | | ||
| 1136 | EXTENT_SET_PRIVATE2); | ||
| 1105 | cur_offset = extent_end; | 1137 | cur_offset = extent_end; |
| 1106 | if (cur_offset > end) | 1138 | if (cur_offset > end) |
| 1107 | break; | 1139 | break; |
| @@ -1147,6 +1179,89 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
| 1147 | return ret; | 1179 | return ret; |
| 1148 | } | 1180 | } |
| 1149 | 1181 | ||
| 1182 | static int btrfs_split_extent_hook(struct inode *inode, | ||
| 1183 | struct extent_state *orig, u64 split) | ||
| 1184 | { | ||
| 1185 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1186 | u64 size; | ||
| 1187 | |||
| 1188 | if (!(orig->state & EXTENT_DELALLOC)) | ||
| 1189 | return 0; | ||
| 1190 | |||
| 1191 | size = orig->end - orig->start + 1; | ||
| 1192 | if (size > root->fs_info->max_extent) { | ||
| 1193 | u64 num_extents; | ||
| 1194 | u64 new_size; | ||
| 1195 | |||
| 1196 | new_size = orig->end - split + 1; | ||
| 1197 | num_extents = div64_u64(size + root->fs_info->max_extent - 1, | ||
| 1198 | root->fs_info->max_extent); | ||
| 1199 | |||
| 1200 | /* | ||
| 1201 | * if we break a large extent up then leave oustanding_extents | ||
| 1202 | * be, since we've already accounted for the large extent. | ||
| 1203 | */ | ||
| 1204 | if (div64_u64(new_size + root->fs_info->max_extent - 1, | ||
| 1205 | root->fs_info->max_extent) < num_extents) | ||
| 1206 | return 0; | ||
| 1207 | } | ||
| 1208 | |||
| 1209 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 1210 | BTRFS_I(inode)->outstanding_extents++; | ||
| 1211 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 1212 | |||
| 1213 | return 0; | ||
| 1214 | } | ||
| 1215 | |||
| 1216 | /* | ||
| 1217 | * extent_io.c merge_extent_hook, used to track merged delayed allocation | ||
| 1218 | * extents so we can keep track of new extents that are just merged onto old | ||
| 1219 | * extents, such as when we are doing sequential writes, so we can properly | ||
| 1220 | * account for the metadata space we'll need. | ||
| 1221 | */ | ||
| 1222 | static int btrfs_merge_extent_hook(struct inode *inode, | ||
| 1223 | struct extent_state *new, | ||
| 1224 | struct extent_state *other) | ||
| 1225 | { | ||
| 1226 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1227 | u64 new_size, old_size; | ||
| 1228 | u64 num_extents; | ||
| 1229 | |||
| 1230 | /* not delalloc, ignore it */ | ||
| 1231 | if (!(other->state & EXTENT_DELALLOC)) | ||
| 1232 | return 0; | ||
| 1233 | |||
| 1234 | old_size = other->end - other->start + 1; | ||
| 1235 | if (new->start < other->start) | ||
| 1236 | new_size = other->end - new->start + 1; | ||
| 1237 | else | ||
| 1238 | new_size = new->end - other->start + 1; | ||
| 1239 | |||
| 1240 | /* we're not bigger than the max, unreserve the space and go */ | ||
| 1241 | if (new_size <= root->fs_info->max_extent) { | ||
| 1242 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 1243 | BTRFS_I(inode)->outstanding_extents--; | ||
| 1244 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 1245 | return 0; | ||
| 1246 | } | ||
| 1247 | |||
| 1248 | /* | ||
| 1249 | * If we grew by another max_extent, just return, we want to keep that | ||
| 1250 | * reserved amount. | ||
| 1251 | */ | ||
| 1252 | num_extents = div64_u64(old_size + root->fs_info->max_extent - 1, | ||
| 1253 | root->fs_info->max_extent); | ||
| 1254 | if (div64_u64(new_size + root->fs_info->max_extent - 1, | ||
| 1255 | root->fs_info->max_extent) > num_extents) | ||
| 1256 | return 0; | ||
| 1257 | |||
| 1258 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 1259 | BTRFS_I(inode)->outstanding_extents--; | ||
| 1260 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 1261 | |||
| 1262 | return 0; | ||
| 1263 | } | ||
| 1264 | |||
| 1150 | /* | 1265 | /* |
| 1151 | * extent_io.c set_bit_hook, used to track delayed allocation | 1266 | * extent_io.c set_bit_hook, used to track delayed allocation |
| 1152 | * bytes in this file, and to maintain the list of inodes that | 1267 | * bytes in this file, and to maintain the list of inodes that |
| @@ -1155,6 +1270,7 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
| 1155 | static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | 1270 | static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, |
| 1156 | unsigned long old, unsigned long bits) | 1271 | unsigned long old, unsigned long bits) |
| 1157 | { | 1272 | { |
| 1273 | |||
| 1158 | /* | 1274 | /* |
| 1159 | * set_bit and clear bit hooks normally require _irqsave/restore | 1275 | * set_bit and clear bit hooks normally require _irqsave/restore |
| 1160 | * but in this case, we are only testeing for the DELALLOC | 1276 | * but in this case, we are only testeing for the DELALLOC |
| @@ -1162,6 +1278,10 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
| 1162 | */ | 1278 | */ |
| 1163 | if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1279 | if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { |
| 1164 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1280 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1281 | |||
| 1282 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 1283 | BTRFS_I(inode)->outstanding_extents++; | ||
| 1284 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 1165 | btrfs_delalloc_reserve_space(root, inode, end - start + 1); | 1285 | btrfs_delalloc_reserve_space(root, inode, end - start + 1); |
| 1166 | spin_lock(&root->fs_info->delalloc_lock); | 1286 | spin_lock(&root->fs_info->delalloc_lock); |
| 1167 | BTRFS_I(inode)->delalloc_bytes += end - start + 1; | 1287 | BTRFS_I(inode)->delalloc_bytes += end - start + 1; |
| @@ -1178,22 +1298,31 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
| 1178 | /* | 1298 | /* |
| 1179 | * extent_io.c clear_bit_hook, see set_bit_hook for why | 1299 | * extent_io.c clear_bit_hook, see set_bit_hook for why |
| 1180 | */ | 1300 | */ |
| 1181 | static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, | 1301 | static int btrfs_clear_bit_hook(struct inode *inode, |
| 1182 | unsigned long old, unsigned long bits) | 1302 | struct extent_state *state, unsigned long bits) |
| 1183 | { | 1303 | { |
| 1184 | /* | 1304 | /* |
| 1185 | * set_bit and clear bit hooks normally require _irqsave/restore | 1305 | * set_bit and clear bit hooks normally require _irqsave/restore |
| 1186 | * but in this case, we are only testeing for the DELALLOC | 1306 | * but in this case, we are only testeing for the DELALLOC |
| 1187 | * bit, which is only set or cleared with irqs on | 1307 | * bit, which is only set or cleared with irqs on |
| 1188 | */ | 1308 | */ |
| 1189 | if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1309 | if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { |
| 1190 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1310 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1191 | 1311 | ||
| 1312 | if (bits & EXTENT_DO_ACCOUNTING) { | ||
| 1313 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 1314 | BTRFS_I(inode)->outstanding_extents--; | ||
| 1315 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 1316 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 1317 | } | ||
| 1318 | |||
| 1192 | spin_lock(&root->fs_info->delalloc_lock); | 1319 | spin_lock(&root->fs_info->delalloc_lock); |
| 1193 | if (end - start + 1 > root->fs_info->delalloc_bytes) { | 1320 | if (state->end - state->start + 1 > |
| 1321 | root->fs_info->delalloc_bytes) { | ||
| 1194 | printk(KERN_INFO "btrfs warning: delalloc account " | 1322 | printk(KERN_INFO "btrfs warning: delalloc account " |
| 1195 | "%llu %llu\n", | 1323 | "%llu %llu\n", |
| 1196 | (unsigned long long)end - start + 1, | 1324 | (unsigned long long) |
| 1325 | state->end - state->start + 1, | ||
| 1197 | (unsigned long long) | 1326 | (unsigned long long) |
| 1198 | root->fs_info->delalloc_bytes); | 1327 | root->fs_info->delalloc_bytes); |
| 1199 | btrfs_delalloc_free_space(root, inode, (u64)-1); | 1328 | btrfs_delalloc_free_space(root, inode, (u64)-1); |
| @@ -1201,9 +1330,12 @@ static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, | |||
| 1201 | BTRFS_I(inode)->delalloc_bytes = 0; | 1330 | BTRFS_I(inode)->delalloc_bytes = 0; |
| 1202 | } else { | 1331 | } else { |
| 1203 | btrfs_delalloc_free_space(root, inode, | 1332 | btrfs_delalloc_free_space(root, inode, |
| 1204 | end - start + 1); | 1333 | state->end - |
| 1205 | root->fs_info->delalloc_bytes -= end - start + 1; | 1334 | state->start + 1); |
| 1206 | BTRFS_I(inode)->delalloc_bytes -= end - start + 1; | 1335 | root->fs_info->delalloc_bytes -= state->end - |
| 1336 | state->start + 1; | ||
| 1337 | BTRFS_I(inode)->delalloc_bytes -= state->end - | ||
| 1338 | state->start + 1; | ||
| 1207 | } | 1339 | } |
| 1208 | if (BTRFS_I(inode)->delalloc_bytes == 0 && | 1340 | if (BTRFS_I(inode)->delalloc_bytes == 0 && |
| 1209 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1341 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { |
| @@ -1374,10 +1506,8 @@ again: | |||
| 1374 | lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); | 1506 | lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); |
| 1375 | 1507 | ||
| 1376 | /* already ordered? We're done */ | 1508 | /* already ordered? We're done */ |
| 1377 | if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, | 1509 | if (PagePrivate2(page)) |
| 1378 | EXTENT_ORDERED, 0)) { | ||
| 1379 | goto out; | 1510 | goto out; |
| 1380 | } | ||
| 1381 | 1511 | ||
| 1382 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 1512 | ordered = btrfs_lookup_ordered_extent(inode, page_start); |
| 1383 | if (ordered) { | 1513 | if (ordered) { |
| @@ -1413,11 +1543,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) | |||
| 1413 | struct inode *inode = page->mapping->host; | 1543 | struct inode *inode = page->mapping->host; |
| 1414 | struct btrfs_writepage_fixup *fixup; | 1544 | struct btrfs_writepage_fixup *fixup; |
| 1415 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1545 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1416 | int ret; | ||
| 1417 | 1546 | ||
| 1418 | ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end, | 1547 | /* this page is properly in the ordered list */ |
| 1419 | EXTENT_ORDERED, 0); | 1548 | if (TestClearPagePrivate2(page)) |
| 1420 | if (ret) | ||
| 1421 | return 0; | 1549 | return 0; |
| 1422 | 1550 | ||
| 1423 | if (PageChecked(page)) | 1551 | if (PageChecked(page)) |
| @@ -1455,9 +1583,19 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
| 1455 | BUG_ON(!path); | 1583 | BUG_ON(!path); |
| 1456 | 1584 | ||
| 1457 | path->leave_spinning = 1; | 1585 | path->leave_spinning = 1; |
| 1586 | |||
| 1587 | /* | ||
| 1588 | * we may be replacing one extent in the tree with another. | ||
| 1589 | * The new extent is pinned in the extent map, and we don't want | ||
| 1590 | * to drop it from the cache until it is completely in the btree. | ||
| 1591 | * | ||
| 1592 | * So, tell btrfs_drop_extents to leave this extent in the cache. | ||
| 1593 | * the caller is expected to unpin it and allow it to be merged | ||
| 1594 | * with the others. | ||
| 1595 | */ | ||
| 1458 | ret = btrfs_drop_extents(trans, root, inode, file_pos, | 1596 | ret = btrfs_drop_extents(trans, root, inode, file_pos, |
| 1459 | file_pos + num_bytes, locked_end, | 1597 | file_pos + num_bytes, locked_end, |
| 1460 | file_pos, &hint); | 1598 | file_pos, &hint, 0); |
| 1461 | BUG_ON(ret); | 1599 | BUG_ON(ret); |
| 1462 | 1600 | ||
| 1463 | ins.objectid = inode->i_ino; | 1601 | ins.objectid = inode->i_ino; |
| @@ -1485,7 +1623,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
| 1485 | btrfs_mark_buffer_dirty(leaf); | 1623 | btrfs_mark_buffer_dirty(leaf); |
| 1486 | 1624 | ||
| 1487 | inode_add_bytes(inode, num_bytes); | 1625 | inode_add_bytes(inode, num_bytes); |
| 1488 | btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0); | ||
| 1489 | 1626 | ||
| 1490 | ins.objectid = disk_bytenr; | 1627 | ins.objectid = disk_bytenr; |
| 1491 | ins.offset = disk_num_bytes; | 1628 | ins.offset = disk_num_bytes; |
| @@ -1596,6 +1733,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
| 1596 | ordered_extent->len, | 1733 | ordered_extent->len, |
| 1597 | compressed, 0, 0, | 1734 | compressed, 0, 0, |
| 1598 | BTRFS_FILE_EXTENT_REG); | 1735 | BTRFS_FILE_EXTENT_REG); |
| 1736 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | ||
| 1737 | ordered_extent->file_offset, | ||
| 1738 | ordered_extent->len); | ||
| 1599 | BUG_ON(ret); | 1739 | BUG_ON(ret); |
| 1600 | } | 1740 | } |
| 1601 | unlock_extent(io_tree, ordered_extent->file_offset, | 1741 | unlock_extent(io_tree, ordered_extent->file_offset, |
| @@ -1623,6 +1763,7 @@ nocow: | |||
| 1623 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | 1763 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, |
| 1624 | struct extent_state *state, int uptodate) | 1764 | struct extent_state *state, int uptodate) |
| 1625 | { | 1765 | { |
| 1766 | ClearPagePrivate2(page); | ||
| 1626 | return btrfs_finish_ordered_io(page->mapping->host, start, end); | 1767 | return btrfs_finish_ordered_io(page->mapping->host, start, end); |
| 1627 | } | 1768 | } |
| 1628 | 1769 | ||
| @@ -1669,13 +1810,13 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
| 1669 | failrec->last_mirror = 0; | 1810 | failrec->last_mirror = 0; |
| 1670 | failrec->bio_flags = 0; | 1811 | failrec->bio_flags = 0; |
| 1671 | 1812 | ||
| 1672 | spin_lock(&em_tree->lock); | 1813 | read_lock(&em_tree->lock); |
| 1673 | em = lookup_extent_mapping(em_tree, start, failrec->len); | 1814 | em = lookup_extent_mapping(em_tree, start, failrec->len); |
| 1674 | if (em->start > start || em->start + em->len < start) { | 1815 | if (em->start > start || em->start + em->len < start) { |
| 1675 | free_extent_map(em); | 1816 | free_extent_map(em); |
| 1676 | em = NULL; | 1817 | em = NULL; |
| 1677 | } | 1818 | } |
| 1678 | spin_unlock(&em_tree->lock); | 1819 | read_unlock(&em_tree->lock); |
| 1679 | 1820 | ||
| 1680 | if (!em || IS_ERR(em)) { | 1821 | if (!em || IS_ERR(em)) { |
| 1681 | kfree(failrec); | 1822 | kfree(failrec); |
| @@ -1794,7 +1935,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
| 1794 | return 0; | 1935 | return 0; |
| 1795 | 1936 | ||
| 1796 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && | 1937 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && |
| 1797 | test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) { | 1938 | test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { |
| 1798 | clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM, | 1939 | clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM, |
| 1799 | GFP_NOFS); | 1940 | GFP_NOFS); |
| 1800 | return 0; | 1941 | return 0; |
| @@ -2352,6 +2493,69 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
| 2352 | return ret; | 2493 | return ret; |
| 2353 | } | 2494 | } |
| 2354 | 2495 | ||
| 2496 | int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | ||
| 2497 | struct btrfs_root *root, | ||
| 2498 | struct inode *dir, u64 objectid, | ||
| 2499 | const char *name, int name_len) | ||
| 2500 | { | ||
| 2501 | struct btrfs_path *path; | ||
| 2502 | struct extent_buffer *leaf; | ||
| 2503 | struct btrfs_dir_item *di; | ||
| 2504 | struct btrfs_key key; | ||
| 2505 | u64 index; | ||
| 2506 | int ret; | ||
| 2507 | |||
| 2508 | path = btrfs_alloc_path(); | ||
| 2509 | if (!path) | ||
| 2510 | return -ENOMEM; | ||
| 2511 | |||
| 2512 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | ||
| 2513 | name, name_len, -1); | ||
| 2514 | BUG_ON(!di || IS_ERR(di)); | ||
| 2515 | |||
| 2516 | leaf = path->nodes[0]; | ||
| 2517 | btrfs_dir_item_key_to_cpu(leaf, di, &key); | ||
| 2518 | WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); | ||
| 2519 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | ||
| 2520 | BUG_ON(ret); | ||
| 2521 | btrfs_release_path(root, path); | ||
| 2522 | |||
| 2523 | ret = btrfs_del_root_ref(trans, root->fs_info->tree_root, | ||
| 2524 | objectid, root->root_key.objectid, | ||
| 2525 | dir->i_ino, &index, name, name_len); | ||
| 2526 | if (ret < 0) { | ||
| 2527 | BUG_ON(ret != -ENOENT); | ||
| 2528 | di = btrfs_search_dir_index_item(root, path, dir->i_ino, | ||
| 2529 | name, name_len); | ||
| 2530 | BUG_ON(!di || IS_ERR(di)); | ||
| 2531 | |||
| 2532 | leaf = path->nodes[0]; | ||
| 2533 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
| 2534 | btrfs_release_path(root, path); | ||
| 2535 | index = key.offset; | ||
| 2536 | } | ||
| 2537 | |||
| 2538 | di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, | ||
| 2539 | index, name, name_len, -1); | ||
| 2540 | BUG_ON(!di || IS_ERR(di)); | ||
| 2541 | |||
| 2542 | leaf = path->nodes[0]; | ||
| 2543 | btrfs_dir_item_key_to_cpu(leaf, di, &key); | ||
| 2544 | WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); | ||
| 2545 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | ||
| 2546 | BUG_ON(ret); | ||
| 2547 | btrfs_release_path(root, path); | ||
| 2548 | |||
| 2549 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); | ||
| 2550 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | ||
| 2551 | ret = btrfs_update_inode(trans, root, dir); | ||
| 2552 | BUG_ON(ret); | ||
| 2553 | dir->i_sb->s_dirt = 1; | ||
| 2554 | |||
| 2555 | btrfs_free_path(path); | ||
| 2556 | return 0; | ||
| 2557 | } | ||
| 2558 | |||
| 2355 | static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | 2559 | static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) |
| 2356 | { | 2560 | { |
| 2357 | struct inode *inode = dentry->d_inode; | 2561 | struct inode *inode = dentry->d_inode; |
| @@ -2361,29 +2565,31 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 2361 | struct btrfs_trans_handle *trans; | 2565 | struct btrfs_trans_handle *trans; |
| 2362 | unsigned long nr = 0; | 2566 | unsigned long nr = 0; |
| 2363 | 2567 | ||
| 2364 | /* | ||
| 2365 | * the FIRST_FREE_OBJECTID check makes sure we don't try to rmdir | ||
| 2366 | * the root of a subvolume or snapshot | ||
| 2367 | */ | ||
| 2368 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || | 2568 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || |
| 2369 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { | 2569 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
| 2370 | return -ENOTEMPTY; | 2570 | return -ENOTEMPTY; |
| 2371 | } | ||
| 2372 | 2571 | ||
| 2373 | trans = btrfs_start_transaction(root, 1); | 2572 | trans = btrfs_start_transaction(root, 1); |
| 2374 | btrfs_set_trans_block_group(trans, dir); | 2573 | btrfs_set_trans_block_group(trans, dir); |
| 2375 | 2574 | ||
| 2575 | if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { | ||
| 2576 | err = btrfs_unlink_subvol(trans, root, dir, | ||
| 2577 | BTRFS_I(inode)->location.objectid, | ||
| 2578 | dentry->d_name.name, | ||
| 2579 | dentry->d_name.len); | ||
| 2580 | goto out; | ||
| 2581 | } | ||
| 2582 | |||
| 2376 | err = btrfs_orphan_add(trans, inode); | 2583 | err = btrfs_orphan_add(trans, inode); |
| 2377 | if (err) | 2584 | if (err) |
| 2378 | goto fail_trans; | 2585 | goto out; |
| 2379 | 2586 | ||
| 2380 | /* now the directory is empty */ | 2587 | /* now the directory is empty */ |
| 2381 | err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | 2588 | err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, |
| 2382 | dentry->d_name.name, dentry->d_name.len); | 2589 | dentry->d_name.name, dentry->d_name.len); |
| 2383 | if (!err) | 2590 | if (!err) |
| 2384 | btrfs_i_size_write(inode, 0); | 2591 | btrfs_i_size_write(inode, 0); |
| 2385 | 2592 | out: | |
| 2386 | fail_trans: | ||
| 2387 | nr = trans->blocks_used; | 2593 | nr = trans->blocks_used; |
| 2388 | ret = btrfs_end_transaction_throttle(trans, root); | 2594 | ret = btrfs_end_transaction_throttle(trans, root); |
| 2389 | btrfs_btree_balance_dirty(root, nr); | 2595 | btrfs_btree_balance_dirty(root, nr); |
| @@ -2864,7 +3070,12 @@ again: | |||
| 2864 | goto again; | 3070 | goto again; |
| 2865 | } | 3071 | } |
| 2866 | 3072 | ||
| 2867 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 3073 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end); |
| 3074 | if (ret) { | ||
| 3075 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 3076 | goto out_unlock; | ||
| 3077 | } | ||
| 3078 | |||
| 2868 | ret = 0; | 3079 | ret = 0; |
| 2869 | if (offset != PAGE_CACHE_SIZE) { | 3080 | if (offset != PAGE_CACHE_SIZE) { |
| 2870 | kaddr = kmap(page); | 3081 | kaddr = kmap(page); |
| @@ -2895,15 +3106,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
| 2895 | u64 last_byte; | 3106 | u64 last_byte; |
| 2896 | u64 cur_offset; | 3107 | u64 cur_offset; |
| 2897 | u64 hole_size; | 3108 | u64 hole_size; |
| 2898 | int err; | 3109 | int err = 0; |
| 2899 | 3110 | ||
| 2900 | if (size <= hole_start) | 3111 | if (size <= hole_start) |
| 2901 | return 0; | 3112 | return 0; |
| 2902 | 3113 | ||
| 2903 | err = btrfs_check_metadata_free_space(root); | ||
| 2904 | if (err) | ||
| 2905 | return err; | ||
| 2906 | |||
| 2907 | btrfs_truncate_page(inode->i_mapping, inode->i_size); | 3114 | btrfs_truncate_page(inode->i_mapping, inode->i_size); |
| 2908 | 3115 | ||
| 2909 | while (1) { | 3116 | while (1) { |
| @@ -2935,15 +3142,21 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
| 2935 | cur_offset, | 3142 | cur_offset, |
| 2936 | cur_offset + hole_size, | 3143 | cur_offset + hole_size, |
| 2937 | block_end, | 3144 | block_end, |
| 2938 | cur_offset, &hint_byte); | 3145 | cur_offset, &hint_byte, 1); |
| 3146 | if (err) | ||
| 3147 | break; | ||
| 3148 | |||
| 3149 | err = btrfs_reserve_metadata_space(root, 1); | ||
| 2939 | if (err) | 3150 | if (err) |
| 2940 | break; | 3151 | break; |
| 3152 | |||
| 2941 | err = btrfs_insert_file_extent(trans, root, | 3153 | err = btrfs_insert_file_extent(trans, root, |
| 2942 | inode->i_ino, cur_offset, 0, | 3154 | inode->i_ino, cur_offset, 0, |
| 2943 | 0, hole_size, 0, hole_size, | 3155 | 0, hole_size, 0, hole_size, |
| 2944 | 0, 0, 0); | 3156 | 0, 0, 0); |
| 2945 | btrfs_drop_extent_cache(inode, hole_start, | 3157 | btrfs_drop_extent_cache(inode, hole_start, |
| 2946 | last_byte - 1, 0); | 3158 | last_byte - 1, 0); |
| 3159 | btrfs_unreserve_metadata_space(root, 1); | ||
| 2947 | } | 3160 | } |
| 2948 | free_extent_map(em); | 3161 | free_extent_map(em); |
| 2949 | cur_offset = last_byte; | 3162 | cur_offset = last_byte; |
| @@ -3003,6 +3216,11 @@ void btrfs_delete_inode(struct inode *inode) | |||
| 3003 | } | 3216 | } |
| 3004 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 3217 | btrfs_wait_ordered_range(inode, 0, (u64)-1); |
| 3005 | 3218 | ||
| 3219 | if (inode->i_nlink > 0) { | ||
| 3220 | BUG_ON(btrfs_root_refs(&root->root_item) != 0); | ||
| 3221 | goto no_delete; | ||
| 3222 | } | ||
| 3223 | |||
| 3006 | btrfs_i_size_write(inode, 0); | 3224 | btrfs_i_size_write(inode, 0); |
| 3007 | trans = btrfs_join_transaction(root, 1); | 3225 | trans = btrfs_join_transaction(root, 1); |
| 3008 | 3226 | ||
| @@ -3070,29 +3288,67 @@ out_err: | |||
| 3070 | * is kind of like crossing a mount point. | 3288 | * is kind of like crossing a mount point. |
| 3071 | */ | 3289 | */ |
| 3072 | static int fixup_tree_root_location(struct btrfs_root *root, | 3290 | static int fixup_tree_root_location(struct btrfs_root *root, |
| 3073 | struct btrfs_key *location, | 3291 | struct inode *dir, |
| 3074 | struct btrfs_root **sub_root, | 3292 | struct dentry *dentry, |
| 3075 | struct dentry *dentry) | 3293 | struct btrfs_key *location, |
| 3294 | struct btrfs_root **sub_root) | ||
| 3076 | { | 3295 | { |
| 3077 | struct btrfs_root_item *ri; | 3296 | struct btrfs_path *path; |
| 3297 | struct btrfs_root *new_root; | ||
| 3298 | struct btrfs_root_ref *ref; | ||
| 3299 | struct extent_buffer *leaf; | ||
| 3300 | int ret; | ||
| 3301 | int err = 0; | ||
| 3078 | 3302 | ||
| 3079 | if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY) | 3303 | path = btrfs_alloc_path(); |
| 3080 | return 0; | 3304 | if (!path) { |
| 3081 | if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) | 3305 | err = -ENOMEM; |
| 3082 | return 0; | 3306 | goto out; |
| 3307 | } | ||
| 3083 | 3308 | ||
| 3084 | *sub_root = btrfs_read_fs_root(root->fs_info, location, | 3309 | err = -ENOENT; |
| 3085 | dentry->d_name.name, | 3310 | ret = btrfs_find_root_ref(root->fs_info->tree_root, path, |
| 3086 | dentry->d_name.len); | 3311 | BTRFS_I(dir)->root->root_key.objectid, |
| 3087 | if (IS_ERR(*sub_root)) | 3312 | location->objectid); |
| 3088 | return PTR_ERR(*sub_root); | 3313 | if (ret) { |
| 3314 | if (ret < 0) | ||
| 3315 | err = ret; | ||
| 3316 | goto out; | ||
| 3317 | } | ||
| 3089 | 3318 | ||
| 3090 | ri = &(*sub_root)->root_item; | 3319 | leaf = path->nodes[0]; |
| 3091 | location->objectid = btrfs_root_dirid(ri); | 3320 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); |
| 3092 | btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); | 3321 | if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino || |
| 3093 | location->offset = 0; | 3322 | btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len) |
| 3323 | goto out; | ||
| 3094 | 3324 | ||
| 3095 | return 0; | 3325 | ret = memcmp_extent_buffer(leaf, dentry->d_name.name, |
| 3326 | (unsigned long)(ref + 1), | ||
| 3327 | dentry->d_name.len); | ||
| 3328 | if (ret) | ||
| 3329 | goto out; | ||
| 3330 | |||
| 3331 | btrfs_release_path(root->fs_info->tree_root, path); | ||
| 3332 | |||
| 3333 | new_root = btrfs_read_fs_root_no_name(root->fs_info, location); | ||
| 3334 | if (IS_ERR(new_root)) { | ||
| 3335 | err = PTR_ERR(new_root); | ||
| 3336 | goto out; | ||
| 3337 | } | ||
| 3338 | |||
| 3339 | if (btrfs_root_refs(&new_root->root_item) == 0) { | ||
| 3340 | err = -ENOENT; | ||
| 3341 | goto out; | ||
| 3342 | } | ||
| 3343 | |||
| 3344 | *sub_root = new_root; | ||
| 3345 | location->objectid = btrfs_root_dirid(&new_root->root_item); | ||
| 3346 | location->type = BTRFS_INODE_ITEM_KEY; | ||
| 3347 | location->offset = 0; | ||
| 3348 | err = 0; | ||
| 3349 | out: | ||
| 3350 | btrfs_free_path(path); | ||
| 3351 | return err; | ||
| 3096 | } | 3352 | } |
| 3097 | 3353 | ||
| 3098 | static void inode_tree_add(struct inode *inode) | 3354 | static void inode_tree_add(struct inode *inode) |
| @@ -3101,11 +3357,13 @@ static void inode_tree_add(struct inode *inode) | |||
| 3101 | struct btrfs_inode *entry; | 3357 | struct btrfs_inode *entry; |
| 3102 | struct rb_node **p; | 3358 | struct rb_node **p; |
| 3103 | struct rb_node *parent; | 3359 | struct rb_node *parent; |
| 3104 | |||
| 3105 | again: | 3360 | again: |
| 3106 | p = &root->inode_tree.rb_node; | 3361 | p = &root->inode_tree.rb_node; |
| 3107 | parent = NULL; | 3362 | parent = NULL; |
| 3108 | 3363 | ||
| 3364 | if (hlist_unhashed(&inode->i_hash)) | ||
| 3365 | return; | ||
| 3366 | |||
| 3109 | spin_lock(&root->inode_lock); | 3367 | spin_lock(&root->inode_lock); |
| 3110 | while (*p) { | 3368 | while (*p) { |
| 3111 | parent = *p; | 3369 | parent = *p; |
| @@ -3132,13 +3390,87 @@ again: | |||
| 3132 | static void inode_tree_del(struct inode *inode) | 3390 | static void inode_tree_del(struct inode *inode) |
| 3133 | { | 3391 | { |
| 3134 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3392 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 3393 | int empty = 0; | ||
| 3135 | 3394 | ||
| 3136 | spin_lock(&root->inode_lock); | 3395 | spin_lock(&root->inode_lock); |
| 3137 | if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { | 3396 | if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { |
| 3138 | rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); | 3397 | rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); |
| 3139 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); | 3398 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); |
| 3399 | empty = RB_EMPTY_ROOT(&root->inode_tree); | ||
| 3140 | } | 3400 | } |
| 3141 | spin_unlock(&root->inode_lock); | 3401 | spin_unlock(&root->inode_lock); |
| 3402 | |||
| 3403 | if (empty && btrfs_root_refs(&root->root_item) == 0) { | ||
| 3404 | synchronize_srcu(&root->fs_info->subvol_srcu); | ||
| 3405 | spin_lock(&root->inode_lock); | ||
| 3406 | empty = RB_EMPTY_ROOT(&root->inode_tree); | ||
| 3407 | spin_unlock(&root->inode_lock); | ||
| 3408 | if (empty) | ||
| 3409 | btrfs_add_dead_root(root); | ||
| 3410 | } | ||
| 3411 | } | ||
| 3412 | |||
| 3413 | int btrfs_invalidate_inodes(struct btrfs_root *root) | ||
| 3414 | { | ||
| 3415 | struct rb_node *node; | ||
| 3416 | struct rb_node *prev; | ||
| 3417 | struct btrfs_inode *entry; | ||
| 3418 | struct inode *inode; | ||
| 3419 | u64 objectid = 0; | ||
| 3420 | |||
| 3421 | WARN_ON(btrfs_root_refs(&root->root_item) != 0); | ||
| 3422 | |||
| 3423 | spin_lock(&root->inode_lock); | ||
| 3424 | again: | ||
| 3425 | node = root->inode_tree.rb_node; | ||
| 3426 | prev = NULL; | ||
| 3427 | while (node) { | ||
| 3428 | prev = node; | ||
| 3429 | entry = rb_entry(node, struct btrfs_inode, rb_node); | ||
| 3430 | |||
| 3431 | if (objectid < entry->vfs_inode.i_ino) | ||
| 3432 | node = node->rb_left; | ||
| 3433 | else if (objectid > entry->vfs_inode.i_ino) | ||
| 3434 | node = node->rb_right; | ||
| 3435 | else | ||
| 3436 | break; | ||
| 3437 | } | ||
| 3438 | if (!node) { | ||
| 3439 | while (prev) { | ||
| 3440 | entry = rb_entry(prev, struct btrfs_inode, rb_node); | ||
| 3441 | if (objectid <= entry->vfs_inode.i_ino) { | ||
| 3442 | node = prev; | ||
| 3443 | break; | ||
| 3444 | } | ||
| 3445 | prev = rb_next(prev); | ||
| 3446 | } | ||
| 3447 | } | ||
| 3448 | while (node) { | ||
| 3449 | entry = rb_entry(node, struct btrfs_inode, rb_node); | ||
| 3450 | objectid = entry->vfs_inode.i_ino + 1; | ||
| 3451 | inode = igrab(&entry->vfs_inode); | ||
| 3452 | if (inode) { | ||
| 3453 | spin_unlock(&root->inode_lock); | ||
| 3454 | if (atomic_read(&inode->i_count) > 1) | ||
| 3455 | d_prune_aliases(inode); | ||
| 3456 | /* | ||
| 3457 | * btrfs_drop_inode will remove it from | ||
| 3458 | * the inode cache when its usage count | ||
| 3459 | * hits zero. | ||
| 3460 | */ | ||
| 3461 | iput(inode); | ||
| 3462 | cond_resched(); | ||
| 3463 | spin_lock(&root->inode_lock); | ||
| 3464 | goto again; | ||
| 3465 | } | ||
| 3466 | |||
| 3467 | if (cond_resched_lock(&root->inode_lock)) | ||
| 3468 | goto again; | ||
| 3469 | |||
| 3470 | node = rb_next(node); | ||
| 3471 | } | ||
| 3472 | spin_unlock(&root->inode_lock); | ||
| 3473 | return 0; | ||
| 3142 | } | 3474 | } |
| 3143 | 3475 | ||
| 3144 | static noinline void init_btrfs_i(struct inode *inode) | 3476 | static noinline void init_btrfs_i(struct inode *inode) |
| @@ -3225,15 +3557,41 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, | |||
| 3225 | return inode; | 3557 | return inode; |
| 3226 | } | 3558 | } |
| 3227 | 3559 | ||
| 3560 | static struct inode *new_simple_dir(struct super_block *s, | ||
| 3561 | struct btrfs_key *key, | ||
| 3562 | struct btrfs_root *root) | ||
| 3563 | { | ||
| 3564 | struct inode *inode = new_inode(s); | ||
| 3565 | |||
| 3566 | if (!inode) | ||
| 3567 | return ERR_PTR(-ENOMEM); | ||
| 3568 | |||
| 3569 | init_btrfs_i(inode); | ||
| 3570 | |||
| 3571 | BTRFS_I(inode)->root = root; | ||
| 3572 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); | ||
| 3573 | BTRFS_I(inode)->dummy_inode = 1; | ||
| 3574 | |||
| 3575 | inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; | ||
| 3576 | inode->i_op = &simple_dir_inode_operations; | ||
| 3577 | inode->i_fop = &simple_dir_operations; | ||
| 3578 | inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; | ||
| 3579 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | ||
| 3580 | |||
| 3581 | return inode; | ||
| 3582 | } | ||
| 3583 | |||
| 3228 | struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | 3584 | struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) |
| 3229 | { | 3585 | { |
| 3230 | struct inode *inode; | 3586 | struct inode *inode; |
| 3231 | struct btrfs_inode *bi = BTRFS_I(dir); | 3587 | struct btrfs_root *root = BTRFS_I(dir)->root; |
| 3232 | struct btrfs_root *root = bi->root; | ||
| 3233 | struct btrfs_root *sub_root = root; | 3588 | struct btrfs_root *sub_root = root; |
| 3234 | struct btrfs_key location; | 3589 | struct btrfs_key location; |
| 3590 | int index; | ||
| 3235 | int ret; | 3591 | int ret; |
| 3236 | 3592 | ||
| 3593 | dentry->d_op = &btrfs_dentry_operations; | ||
| 3594 | |||
| 3237 | if (dentry->d_name.len > BTRFS_NAME_LEN) | 3595 | if (dentry->d_name.len > BTRFS_NAME_LEN) |
| 3238 | return ERR_PTR(-ENAMETOOLONG); | 3596 | return ERR_PTR(-ENAMETOOLONG); |
| 3239 | 3597 | ||
| @@ -3242,29 +3600,52 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
| 3242 | if (ret < 0) | 3600 | if (ret < 0) |
| 3243 | return ERR_PTR(ret); | 3601 | return ERR_PTR(ret); |
| 3244 | 3602 | ||
| 3245 | inode = NULL; | 3603 | if (location.objectid == 0) |
| 3246 | if (location.objectid) { | 3604 | return NULL; |
| 3247 | ret = fixup_tree_root_location(root, &location, &sub_root, | 3605 | |
| 3248 | dentry); | 3606 | if (location.type == BTRFS_INODE_ITEM_KEY) { |
| 3249 | if (ret < 0) | 3607 | inode = btrfs_iget(dir->i_sb, &location, root); |
| 3250 | return ERR_PTR(ret); | 3608 | return inode; |
| 3251 | if (ret > 0) | 3609 | } |
| 3252 | return ERR_PTR(-ENOENT); | 3610 | |
| 3611 | BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY); | ||
| 3612 | |||
| 3613 | index = srcu_read_lock(&root->fs_info->subvol_srcu); | ||
| 3614 | ret = fixup_tree_root_location(root, dir, dentry, | ||
| 3615 | &location, &sub_root); | ||
| 3616 | if (ret < 0) { | ||
| 3617 | if (ret != -ENOENT) | ||
| 3618 | inode = ERR_PTR(ret); | ||
| 3619 | else | ||
| 3620 | inode = new_simple_dir(dir->i_sb, &location, sub_root); | ||
| 3621 | } else { | ||
| 3253 | inode = btrfs_iget(dir->i_sb, &location, sub_root); | 3622 | inode = btrfs_iget(dir->i_sb, &location, sub_root); |
| 3254 | if (IS_ERR(inode)) | ||
| 3255 | return ERR_CAST(inode); | ||
| 3256 | } | 3623 | } |
| 3624 | srcu_read_unlock(&root->fs_info->subvol_srcu, index); | ||
| 3625 | |||
| 3257 | return inode; | 3626 | return inode; |
| 3258 | } | 3627 | } |
| 3259 | 3628 | ||
| 3629 | static int btrfs_dentry_delete(struct dentry *dentry) | ||
| 3630 | { | ||
| 3631 | struct btrfs_root *root; | ||
| 3632 | |||
| 3633 | if (!dentry->d_inode && !IS_ROOT(dentry)) | ||
| 3634 | dentry = dentry->d_parent; | ||
| 3635 | |||
| 3636 | if (dentry->d_inode) { | ||
| 3637 | root = BTRFS_I(dentry->d_inode)->root; | ||
| 3638 | if (btrfs_root_refs(&root->root_item) == 0) | ||
| 3639 | return 1; | ||
| 3640 | } | ||
| 3641 | return 0; | ||
| 3642 | } | ||
| 3643 | |||
| 3260 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, | 3644 | static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, |
| 3261 | struct nameidata *nd) | 3645 | struct nameidata *nd) |
| 3262 | { | 3646 | { |
| 3263 | struct inode *inode; | 3647 | struct inode *inode; |
| 3264 | 3648 | ||
| 3265 | if (dentry->d_name.len > BTRFS_NAME_LEN) | ||
| 3266 | return ERR_PTR(-ENAMETOOLONG); | ||
| 3267 | |||
| 3268 | inode = btrfs_lookup_dentry(dir, dentry); | 3649 | inode = btrfs_lookup_dentry(dir, dentry); |
| 3269 | if (IS_ERR(inode)) | 3650 | if (IS_ERR(inode)) |
| 3270 | return ERR_CAST(inode); | 3651 | return ERR_CAST(inode); |
| @@ -3603,9 +3984,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
| 3603 | if (ret != 0) | 3984 | if (ret != 0) |
| 3604 | goto fail; | 3985 | goto fail; |
| 3605 | 3986 | ||
| 3606 | if (objectid > root->highest_inode) | ||
| 3607 | root->highest_inode = objectid; | ||
| 3608 | |||
| 3609 | inode->i_uid = current_fsuid(); | 3987 | inode->i_uid = current_fsuid(); |
| 3610 | 3988 | ||
| 3611 | if (dir && (dir->i_mode & S_ISGID)) { | 3989 | if (dir && (dir->i_mode & S_ISGID)) { |
| @@ -3673,26 +4051,35 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, | |||
| 3673 | struct inode *parent_inode, struct inode *inode, | 4051 | struct inode *parent_inode, struct inode *inode, |
| 3674 | const char *name, int name_len, int add_backref, u64 index) | 4052 | const char *name, int name_len, int add_backref, u64 index) |
| 3675 | { | 4053 | { |
| 3676 | int ret; | 4054 | int ret = 0; |
| 3677 | struct btrfs_key key; | 4055 | struct btrfs_key key; |
| 3678 | struct btrfs_root *root = BTRFS_I(parent_inode)->root; | 4056 | struct btrfs_root *root = BTRFS_I(parent_inode)->root; |
| 3679 | 4057 | ||
| 3680 | key.objectid = inode->i_ino; | 4058 | if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { |
| 3681 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 4059 | memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); |
| 3682 | key.offset = 0; | 4060 | } else { |
| 4061 | key.objectid = inode->i_ino; | ||
| 4062 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
| 4063 | key.offset = 0; | ||
| 4064 | } | ||
| 4065 | |||
| 4066 | if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { | ||
| 4067 | ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, | ||
| 4068 | key.objectid, root->root_key.objectid, | ||
| 4069 | parent_inode->i_ino, | ||
| 4070 | index, name, name_len); | ||
| 4071 | } else if (add_backref) { | ||
| 4072 | ret = btrfs_insert_inode_ref(trans, root, | ||
| 4073 | name, name_len, inode->i_ino, | ||
| 4074 | parent_inode->i_ino, index); | ||
| 4075 | } | ||
| 3683 | 4076 | ||
| 3684 | ret = btrfs_insert_dir_item(trans, root, name, name_len, | ||
| 3685 | parent_inode->i_ino, | ||
| 3686 | &key, btrfs_inode_type(inode), | ||
| 3687 | index); | ||
| 3688 | if (ret == 0) { | 4077 | if (ret == 0) { |
| 3689 | if (add_backref) { | 4078 | ret = btrfs_insert_dir_item(trans, root, name, name_len, |
| 3690 | ret = btrfs_insert_inode_ref(trans, root, | 4079 | parent_inode->i_ino, &key, |
| 3691 | name, name_len, | 4080 | btrfs_inode_type(inode), index); |
| 3692 | inode->i_ino, | 4081 | BUG_ON(ret); |
| 3693 | parent_inode->i_ino, | 4082 | |
| 3694 | index); | ||
| 3695 | } | ||
| 3696 | btrfs_i_size_write(parent_inode, parent_inode->i_size + | 4083 | btrfs_i_size_write(parent_inode, parent_inode->i_size + |
| 3697 | name_len * 2); | 4084 | name_len * 2); |
| 3698 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; | 4085 | parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; |
| @@ -3732,11 +4119,18 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
| 3732 | if (!new_valid_dev(rdev)) | 4119 | if (!new_valid_dev(rdev)) |
| 3733 | return -EINVAL; | 4120 | return -EINVAL; |
| 3734 | 4121 | ||
| 3735 | err = btrfs_check_metadata_free_space(root); | 4122 | /* |
| 4123 | * 2 for inode item and ref | ||
| 4124 | * 2 for dir items | ||
| 4125 | * 1 for xattr if selinux is on | ||
| 4126 | */ | ||
| 4127 | err = btrfs_reserve_metadata_space(root, 5); | ||
| 3736 | if (err) | 4128 | if (err) |
| 3737 | goto fail; | 4129 | return err; |
| 3738 | 4130 | ||
| 3739 | trans = btrfs_start_transaction(root, 1); | 4131 | trans = btrfs_start_transaction(root, 1); |
| 4132 | if (!trans) | ||
| 4133 | goto fail; | ||
| 3740 | btrfs_set_trans_block_group(trans, dir); | 4134 | btrfs_set_trans_block_group(trans, dir); |
| 3741 | 4135 | ||
| 3742 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | 4136 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); |
| @@ -3774,6 +4168,7 @@ out_unlock: | |||
| 3774 | nr = trans->blocks_used; | 4168 | nr = trans->blocks_used; |
| 3775 | btrfs_end_transaction_throttle(trans, root); | 4169 | btrfs_end_transaction_throttle(trans, root); |
| 3776 | fail: | 4170 | fail: |
| 4171 | btrfs_unreserve_metadata_space(root, 5); | ||
| 3777 | if (drop_inode) { | 4172 | if (drop_inode) { |
| 3778 | inode_dec_link_count(inode); | 4173 | inode_dec_link_count(inode); |
| 3779 | iput(inode); | 4174 | iput(inode); |
| @@ -3794,10 +4189,18 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
| 3794 | u64 objectid; | 4189 | u64 objectid; |
| 3795 | u64 index = 0; | 4190 | u64 index = 0; |
| 3796 | 4191 | ||
| 3797 | err = btrfs_check_metadata_free_space(root); | 4192 | /* |
| 4193 | * 2 for inode item and ref | ||
| 4194 | * 2 for dir items | ||
| 4195 | * 1 for xattr if selinux is on | ||
| 4196 | */ | ||
| 4197 | err = btrfs_reserve_metadata_space(root, 5); | ||
| 3798 | if (err) | 4198 | if (err) |
| 3799 | goto fail; | 4199 | return err; |
| 4200 | |||
| 3800 | trans = btrfs_start_transaction(root, 1); | 4201 | trans = btrfs_start_transaction(root, 1); |
| 4202 | if (!trans) | ||
| 4203 | goto fail; | ||
| 3801 | btrfs_set_trans_block_group(trans, dir); | 4204 | btrfs_set_trans_block_group(trans, dir); |
| 3802 | 4205 | ||
| 3803 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | 4206 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); |
| @@ -3838,6 +4241,7 @@ out_unlock: | |||
| 3838 | nr = trans->blocks_used; | 4241 | nr = trans->blocks_used; |
| 3839 | btrfs_end_transaction_throttle(trans, root); | 4242 | btrfs_end_transaction_throttle(trans, root); |
| 3840 | fail: | 4243 | fail: |
| 4244 | btrfs_unreserve_metadata_space(root, 5); | ||
| 3841 | if (drop_inode) { | 4245 | if (drop_inode) { |
| 3842 | inode_dec_link_count(inode); | 4246 | inode_dec_link_count(inode); |
| 3843 | iput(inode); | 4247 | iput(inode); |
| @@ -3860,10 +4264,16 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 3860 | if (inode->i_nlink == 0) | 4264 | if (inode->i_nlink == 0) |
| 3861 | return -ENOENT; | 4265 | return -ENOENT; |
| 3862 | 4266 | ||
| 3863 | btrfs_inc_nlink(inode); | 4267 | /* |
| 3864 | err = btrfs_check_metadata_free_space(root); | 4268 | * 1 item for inode ref |
| 4269 | * 2 items for dir items | ||
| 4270 | */ | ||
| 4271 | err = btrfs_reserve_metadata_space(root, 3); | ||
| 3865 | if (err) | 4272 | if (err) |
| 3866 | goto fail; | 4273 | return err; |
| 4274 | |||
| 4275 | btrfs_inc_nlink(inode); | ||
| 4276 | |||
| 3867 | err = btrfs_set_inode_index(dir, &index); | 4277 | err = btrfs_set_inode_index(dir, &index); |
| 3868 | if (err) | 4278 | if (err) |
| 3869 | goto fail; | 4279 | goto fail; |
| @@ -3875,20 +4285,19 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 3875 | 4285 | ||
| 3876 | err = btrfs_add_nondir(trans, dentry, inode, 1, index); | 4286 | err = btrfs_add_nondir(trans, dentry, inode, 1, index); |
| 3877 | 4287 | ||
| 3878 | if (err) | 4288 | if (err) { |
| 3879 | drop_inode = 1; | ||
| 3880 | |||
| 3881 | btrfs_update_inode_block_group(trans, dir); | ||
| 3882 | err = btrfs_update_inode(trans, root, inode); | ||
| 3883 | |||
| 3884 | if (err) | ||
| 3885 | drop_inode = 1; | 4289 | drop_inode = 1; |
| 4290 | } else { | ||
| 4291 | btrfs_update_inode_block_group(trans, dir); | ||
| 4292 | err = btrfs_update_inode(trans, root, inode); | ||
| 4293 | BUG_ON(err); | ||
| 4294 | btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); | ||
| 4295 | } | ||
| 3886 | 4296 | ||
| 3887 | nr = trans->blocks_used; | 4297 | nr = trans->blocks_used; |
| 3888 | |||
| 3889 | btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); | ||
| 3890 | btrfs_end_transaction_throttle(trans, root); | 4298 | btrfs_end_transaction_throttle(trans, root); |
| 3891 | fail: | 4299 | fail: |
| 4300 | btrfs_unreserve_metadata_space(root, 3); | ||
| 3892 | if (drop_inode) { | 4301 | if (drop_inode) { |
| 3893 | inode_dec_link_count(inode); | 4302 | inode_dec_link_count(inode); |
| 3894 | iput(inode); | 4303 | iput(inode); |
| @@ -3908,17 +4317,21 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 3908 | u64 index = 0; | 4317 | u64 index = 0; |
| 3909 | unsigned long nr = 1; | 4318 | unsigned long nr = 1; |
| 3910 | 4319 | ||
| 3911 | err = btrfs_check_metadata_free_space(root); | 4320 | /* |
| 4321 | * 2 items for inode and ref | ||
| 4322 | * 2 items for dir items | ||
| 4323 | * 1 for xattr if selinux is on | ||
| 4324 | */ | ||
| 4325 | err = btrfs_reserve_metadata_space(root, 5); | ||
| 3912 | if (err) | 4326 | if (err) |
| 3913 | goto out_unlock; | 4327 | return err; |
| 3914 | 4328 | ||
| 3915 | trans = btrfs_start_transaction(root, 1); | 4329 | trans = btrfs_start_transaction(root, 1); |
| 3916 | btrfs_set_trans_block_group(trans, dir); | 4330 | if (!trans) { |
| 3917 | 4331 | err = -ENOMEM; | |
| 3918 | if (IS_ERR(trans)) { | ||
| 3919 | err = PTR_ERR(trans); | ||
| 3920 | goto out_unlock; | 4332 | goto out_unlock; |
| 3921 | } | 4333 | } |
| 4334 | btrfs_set_trans_block_group(trans, dir); | ||
| 3922 | 4335 | ||
| 3923 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | 4336 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); |
| 3924 | if (err) { | 4337 | if (err) { |
| @@ -3967,6 +4380,7 @@ out_fail: | |||
| 3967 | btrfs_end_transaction_throttle(trans, root); | 4380 | btrfs_end_transaction_throttle(trans, root); |
| 3968 | 4381 | ||
| 3969 | out_unlock: | 4382 | out_unlock: |
| 4383 | btrfs_unreserve_metadata_space(root, 5); | ||
| 3970 | if (drop_on_err) | 4384 | if (drop_on_err) |
| 3971 | iput(inode); | 4385 | iput(inode); |
| 3972 | btrfs_btree_balance_dirty(root, nr); | 4386 | btrfs_btree_balance_dirty(root, nr); |
| @@ -4064,11 +4478,11 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | |||
| 4064 | int compressed; | 4478 | int compressed; |
| 4065 | 4479 | ||
| 4066 | again: | 4480 | again: |
| 4067 | spin_lock(&em_tree->lock); | 4481 | read_lock(&em_tree->lock); |
| 4068 | em = lookup_extent_mapping(em_tree, start, len); | 4482 | em = lookup_extent_mapping(em_tree, start, len); |
| 4069 | if (em) | 4483 | if (em) |
| 4070 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 4484 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 4071 | spin_unlock(&em_tree->lock); | 4485 | read_unlock(&em_tree->lock); |
| 4072 | 4486 | ||
| 4073 | if (em) { | 4487 | if (em) { |
| 4074 | if (em->start > start || em->start + em->len <= start) | 4488 | if (em->start > start || em->start + em->len <= start) |
| @@ -4215,6 +4629,11 @@ again: | |||
| 4215 | map = kmap(page); | 4629 | map = kmap(page); |
| 4216 | read_extent_buffer(leaf, map + pg_offset, ptr, | 4630 | read_extent_buffer(leaf, map + pg_offset, ptr, |
| 4217 | copy_size); | 4631 | copy_size); |
| 4632 | if (pg_offset + copy_size < PAGE_CACHE_SIZE) { | ||
| 4633 | memset(map + pg_offset + copy_size, 0, | ||
| 4634 | PAGE_CACHE_SIZE - pg_offset - | ||
| 4635 | copy_size); | ||
| 4636 | } | ||
| 4218 | kunmap(page); | 4637 | kunmap(page); |
| 4219 | } | 4638 | } |
| 4220 | flush_dcache_page(page); | 4639 | flush_dcache_page(page); |
| @@ -4259,7 +4678,7 @@ insert: | |||
| 4259 | } | 4678 | } |
| 4260 | 4679 | ||
| 4261 | err = 0; | 4680 | err = 0; |
| 4262 | spin_lock(&em_tree->lock); | 4681 | write_lock(&em_tree->lock); |
| 4263 | ret = add_extent_mapping(em_tree, em); | 4682 | ret = add_extent_mapping(em_tree, em); |
| 4264 | /* it is possible that someone inserted the extent into the tree | 4683 | /* it is possible that someone inserted the extent into the tree |
| 4265 | * while we had the lock dropped. It is also possible that | 4684 | * while we had the lock dropped. It is also possible that |
| @@ -4299,7 +4718,7 @@ insert: | |||
| 4299 | err = 0; | 4718 | err = 0; |
| 4300 | } | 4719 | } |
| 4301 | } | 4720 | } |
| 4302 | spin_unlock(&em_tree->lock); | 4721 | write_unlock(&em_tree->lock); |
| 4303 | out: | 4722 | out: |
| 4304 | if (path) | 4723 | if (path) |
| 4305 | btrfs_free_path(path); | 4724 | btrfs_free_path(path); |
| @@ -4398,13 +4817,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
| 4398 | u64 page_start = page_offset(page); | 4817 | u64 page_start = page_offset(page); |
| 4399 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; | 4818 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; |
| 4400 | 4819 | ||
| 4820 | |||
| 4821 | /* | ||
| 4822 | * we have the page locked, so new writeback can't start, | ||
| 4823 | * and the dirty bit won't be cleared while we are here. | ||
| 4824 | * | ||
| 4825 | * Wait for IO on this page so that we can safely clear | ||
| 4826 | * the PagePrivate2 bit and do ordered accounting | ||
| 4827 | */ | ||
| 4401 | wait_on_page_writeback(page); | 4828 | wait_on_page_writeback(page); |
| 4829 | |||
| 4402 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 4830 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
| 4403 | if (offset) { | 4831 | if (offset) { |
| 4404 | btrfs_releasepage(page, GFP_NOFS); | 4832 | btrfs_releasepage(page, GFP_NOFS); |
| 4405 | return; | 4833 | return; |
| 4406 | } | 4834 | } |
| 4407 | |||
| 4408 | lock_extent(tree, page_start, page_end, GFP_NOFS); | 4835 | lock_extent(tree, page_start, page_end, GFP_NOFS); |
| 4409 | ordered = btrfs_lookup_ordered_extent(page->mapping->host, | 4836 | ordered = btrfs_lookup_ordered_extent(page->mapping->host, |
| 4410 | page_offset(page)); | 4837 | page_offset(page)); |
| @@ -4415,16 +4842,22 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
| 4415 | */ | 4842 | */ |
| 4416 | clear_extent_bit(tree, page_start, page_end, | 4843 | clear_extent_bit(tree, page_start, page_end, |
| 4417 | EXTENT_DIRTY | EXTENT_DELALLOC | | 4844 | EXTENT_DIRTY | EXTENT_DELALLOC | |
| 4418 | EXTENT_LOCKED, 1, 0, GFP_NOFS); | 4845 | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0, |
| 4419 | btrfs_finish_ordered_io(page->mapping->host, | 4846 | NULL, GFP_NOFS); |
| 4420 | page_start, page_end); | 4847 | /* |
| 4848 | * whoever cleared the private bit is responsible | ||
| 4849 | * for the finish_ordered_io | ||
| 4850 | */ | ||
| 4851 | if (TestClearPagePrivate2(page)) { | ||
| 4852 | btrfs_finish_ordered_io(page->mapping->host, | ||
| 4853 | page_start, page_end); | ||
| 4854 | } | ||
| 4421 | btrfs_put_ordered_extent(ordered); | 4855 | btrfs_put_ordered_extent(ordered); |
| 4422 | lock_extent(tree, page_start, page_end, GFP_NOFS); | 4856 | lock_extent(tree, page_start, page_end, GFP_NOFS); |
| 4423 | } | 4857 | } |
| 4424 | clear_extent_bit(tree, page_start, page_end, | 4858 | clear_extent_bit(tree, page_start, page_end, |
| 4425 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | | 4859 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | |
| 4426 | EXTENT_ORDERED, | 4860 | EXTENT_DO_ACCOUNTING, 1, 1, NULL, GFP_NOFS); |
| 4427 | 1, 1, GFP_NOFS); | ||
| 4428 | __btrfs_releasepage(page, GFP_NOFS); | 4861 | __btrfs_releasepage(page, GFP_NOFS); |
| 4429 | 4862 | ||
| 4430 | ClearPageChecked(page); | 4863 | ClearPageChecked(page); |
| @@ -4473,6 +4906,13 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 4473 | goto out; | 4906 | goto out; |
| 4474 | } | 4907 | } |
| 4475 | 4908 | ||
| 4909 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
| 4910 | if (ret) { | ||
| 4911 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
| 4912 | ret = VM_FAULT_SIGBUS; | ||
| 4913 | goto out; | ||
| 4914 | } | ||
| 4915 | |||
| 4476 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ | 4916 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ |
| 4477 | again: | 4917 | again: |
| 4478 | lock_page(page); | 4918 | lock_page(page); |
| @@ -4504,7 +4944,24 @@ again: | |||
| 4504 | goto again; | 4944 | goto again; |
| 4505 | } | 4945 | } |
| 4506 | 4946 | ||
| 4507 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 4947 | /* |
| 4948 | * XXX - page_mkwrite gets called every time the page is dirtied, even | ||
| 4949 | * if it was already dirty, so for space accounting reasons we need to | ||
| 4950 | * clear any delalloc bits for the range we are fixing to save. There | ||
| 4951 | * is probably a better way to do this, but for now keep consistent with | ||
| 4952 | * prepare_pages in the normal write path. | ||
| 4953 | */ | ||
| 4954 | clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, | ||
| 4955 | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, | ||
| 4956 | GFP_NOFS); | ||
| 4957 | |||
| 4958 | ret = btrfs_set_extent_delalloc(inode, page_start, page_end); | ||
| 4959 | if (ret) { | ||
| 4960 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 4961 | ret = VM_FAULT_SIGBUS; | ||
| 4962 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
| 4963 | goto out_unlock; | ||
| 4964 | } | ||
| 4508 | ret = 0; | 4965 | ret = 0; |
| 4509 | 4966 | ||
| 4510 | /* page is wholly or partially inside EOF */ | 4967 | /* page is wholly or partially inside EOF */ |
| @@ -4521,11 +4978,15 @@ again: | |||
| 4521 | } | 4978 | } |
| 4522 | ClearPageChecked(page); | 4979 | ClearPageChecked(page); |
| 4523 | set_page_dirty(page); | 4980 | set_page_dirty(page); |
| 4981 | SetPageUptodate(page); | ||
| 4524 | 4982 | ||
| 4525 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | 4983 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; |
| 4526 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 4984 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); |
| 4527 | 4985 | ||
| 4528 | out_unlock: | 4986 | out_unlock: |
| 4987 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 4988 | if (!ret) | ||
| 4989 | return VM_FAULT_LOCKED; | ||
| 4529 | unlock_page(page); | 4990 | unlock_page(page); |
| 4530 | out: | 4991 | out: |
| 4531 | return ret; | 4992 | return ret; |
| @@ -4594,11 +5055,11 @@ out: | |||
| 4594 | * create a new subvolume directory/inode (helper for the ioctl). | 5055 | * create a new subvolume directory/inode (helper for the ioctl). |
| 4595 | */ | 5056 | */ |
| 4596 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 5057 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
| 4597 | struct btrfs_root *new_root, struct dentry *dentry, | 5058 | struct btrfs_root *new_root, |
| 4598 | u64 new_dirid, u64 alloc_hint) | 5059 | u64 new_dirid, u64 alloc_hint) |
| 4599 | { | 5060 | { |
| 4600 | struct inode *inode; | 5061 | struct inode *inode; |
| 4601 | int error; | 5062 | int err; |
| 4602 | u64 index = 0; | 5063 | u64 index = 0; |
| 4603 | 5064 | ||
| 4604 | inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, | 5065 | inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, |
| @@ -4611,11 +5072,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | |||
| 4611 | inode->i_nlink = 1; | 5072 | inode->i_nlink = 1; |
| 4612 | btrfs_i_size_write(inode, 0); | 5073 | btrfs_i_size_write(inode, 0); |
| 4613 | 5074 | ||
| 4614 | error = btrfs_update_inode(trans, new_root, inode); | 5075 | err = btrfs_update_inode(trans, new_root, inode); |
| 4615 | if (error) | 5076 | BUG_ON(err); |
| 4616 | return error; | ||
| 4617 | 5077 | ||
| 4618 | d_instantiate(dentry, inode); | 5078 | iput(inode); |
| 4619 | return 0; | 5079 | return 0; |
| 4620 | } | 5080 | } |
| 4621 | 5081 | ||
| @@ -4641,6 +5101,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
| 4641 | return NULL; | 5101 | return NULL; |
| 4642 | ei->last_trans = 0; | 5102 | ei->last_trans = 0; |
| 4643 | ei->logged_trans = 0; | 5103 | ei->logged_trans = 0; |
| 5104 | ei->outstanding_extents = 0; | ||
| 5105 | ei->reserved_extents = 0; | ||
| 5106 | spin_lock_init(&ei->accounting_lock); | ||
| 4644 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); | 5107 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); |
| 4645 | INIT_LIST_HEAD(&ei->i_orphan); | 5108 | INIT_LIST_HEAD(&ei->i_orphan); |
| 4646 | INIT_LIST_HEAD(&ei->ordered_operations); | 5109 | INIT_LIST_HEAD(&ei->ordered_operations); |
| @@ -4693,6 +5156,16 @@ void btrfs_destroy_inode(struct inode *inode) | |||
| 4693 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); | 5156 | kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); |
| 4694 | } | 5157 | } |
| 4695 | 5158 | ||
| 5159 | void btrfs_drop_inode(struct inode *inode) | ||
| 5160 | { | ||
| 5161 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 5162 | |||
| 5163 | if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) | ||
| 5164 | generic_delete_inode(inode); | ||
| 5165 | else | ||
| 5166 | generic_drop_inode(inode); | ||
| 5167 | } | ||
| 5168 | |||
| 4696 | static void init_once(void *foo) | 5169 | static void init_once(void *foo) |
| 4697 | { | 5170 | { |
| 4698 | struct btrfs_inode *ei = (struct btrfs_inode *) foo; | 5171 | struct btrfs_inode *ei = (struct btrfs_inode *) foo; |
| @@ -4761,31 +5234,37 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 4761 | { | 5234 | { |
| 4762 | struct btrfs_trans_handle *trans; | 5235 | struct btrfs_trans_handle *trans; |
| 4763 | struct btrfs_root *root = BTRFS_I(old_dir)->root; | 5236 | struct btrfs_root *root = BTRFS_I(old_dir)->root; |
| 5237 | struct btrfs_root *dest = BTRFS_I(new_dir)->root; | ||
| 4764 | struct inode *new_inode = new_dentry->d_inode; | 5238 | struct inode *new_inode = new_dentry->d_inode; |
| 4765 | struct inode *old_inode = old_dentry->d_inode; | 5239 | struct inode *old_inode = old_dentry->d_inode; |
| 4766 | struct timespec ctime = CURRENT_TIME; | 5240 | struct timespec ctime = CURRENT_TIME; |
| 4767 | u64 index = 0; | 5241 | u64 index = 0; |
| 5242 | u64 root_objectid; | ||
| 4768 | int ret; | 5243 | int ret; |
| 4769 | 5244 | ||
| 4770 | /* we're not allowed to rename between subvolumes */ | 5245 | if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) |
| 4771 | if (BTRFS_I(old_inode)->root->root_key.objectid != | 5246 | return -EPERM; |
| 4772 | BTRFS_I(new_dir)->root->root_key.objectid) | 5247 | |
| 5248 | /* we only allow rename subvolume link between subvolumes */ | ||
| 5249 | if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) | ||
| 4773 | return -EXDEV; | 5250 | return -EXDEV; |
| 4774 | 5251 | ||
| 5252 | if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID || | ||
| 5253 | (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) | ||
| 5254 | return -ENOTEMPTY; | ||
| 5255 | |||
| 4775 | if (S_ISDIR(old_inode->i_mode) && new_inode && | 5256 | if (S_ISDIR(old_inode->i_mode) && new_inode && |
| 4776 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { | 5257 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) |
| 4777 | return -ENOTEMPTY; | 5258 | return -ENOTEMPTY; |
| 4778 | } | ||
| 4779 | 5259 | ||
| 4780 | /* to rename a snapshot or subvolume, we need to juggle the | 5260 | /* |
| 4781 | * backrefs. This isn't coded yet | 5261 | * 2 items for dir items |
| 5262 | * 1 item for orphan entry | ||
| 5263 | * 1 item for ref | ||
| 4782 | */ | 5264 | */ |
| 4783 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 5265 | ret = btrfs_reserve_metadata_space(root, 4); |
| 4784 | return -EXDEV; | ||
| 4785 | |||
| 4786 | ret = btrfs_check_metadata_free_space(root); | ||
| 4787 | if (ret) | 5266 | if (ret) |
| 4788 | goto out_unlock; | 5267 | return ret; |
| 4789 | 5268 | ||
| 4790 | /* | 5269 | /* |
| 4791 | * we're using rename to replace one file with another. | 5270 | * we're using rename to replace one file with another. |
| @@ -4796,8 +5275,40 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 4796 | old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) | 5275 | old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) |
| 4797 | filemap_flush(old_inode->i_mapping); | 5276 | filemap_flush(old_inode->i_mapping); |
| 4798 | 5277 | ||
| 5278 | /* close the racy window with snapshot create/destroy ioctl */ | ||
| 5279 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | ||
| 5280 | down_read(&root->fs_info->subvol_sem); | ||
| 5281 | |||
| 4799 | trans = btrfs_start_transaction(root, 1); | 5282 | trans = btrfs_start_transaction(root, 1); |
| 5283 | btrfs_set_trans_block_group(trans, new_dir); | ||
| 5284 | |||
| 5285 | if (dest != root) | ||
| 5286 | btrfs_record_root_in_trans(trans, dest); | ||
| 5287 | |||
| 5288 | ret = btrfs_set_inode_index(new_dir, &index); | ||
| 5289 | if (ret) | ||
| 5290 | goto out_fail; | ||
| 4800 | 5291 | ||
| 5292 | if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { | ||
| 5293 | /* force full log commit if subvolume involved. */ | ||
| 5294 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
| 5295 | } else { | ||
| 5296 | ret = btrfs_insert_inode_ref(trans, dest, | ||
| 5297 | new_dentry->d_name.name, | ||
| 5298 | new_dentry->d_name.len, | ||
| 5299 | old_inode->i_ino, | ||
| 5300 | new_dir->i_ino, index); | ||
| 5301 | if (ret) | ||
| 5302 | goto out_fail; | ||
| 5303 | /* | ||
| 5304 | * this is an ugly little race, but the rename is required | ||
| 5305 | * to make sure that if we crash, the inode is either at the | ||
| 5306 | * old name or the new one. pinning the log transaction lets | ||
| 5307 | * us make sure we don't allow a log commit to come in after | ||
| 5308 | * we unlink the name but before we add the new name back in. | ||
| 5309 | */ | ||
| 5310 | btrfs_pin_log_trans(root); | ||
| 5311 | } | ||
| 4801 | /* | 5312 | /* |
| 4802 | * make sure the inode gets flushed if it is replacing | 5313 | * make sure the inode gets flushed if it is replacing |
| 4803 | * something. | 5314 | * something. |
| @@ -4807,18 +5318,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 4807 | btrfs_add_ordered_operation(trans, root, old_inode); | 5318 | btrfs_add_ordered_operation(trans, root, old_inode); |
| 4808 | } | 5319 | } |
| 4809 | 5320 | ||
| 4810 | /* | ||
| 4811 | * this is an ugly little race, but the rename is required to make | ||
| 4812 | * sure that if we crash, the inode is either at the old name | ||
| 4813 | * or the new one. pinning the log transaction lets us make sure | ||
| 4814 | * we don't allow a log commit to come in after we unlink the | ||
| 4815 | * name but before we add the new name back in. | ||
| 4816 | */ | ||
| 4817 | btrfs_pin_log_trans(root); | ||
| 4818 | |||
| 4819 | btrfs_set_trans_block_group(trans, new_dir); | ||
| 4820 | |||
| 4821 | btrfs_inc_nlink(old_dentry->d_inode); | ||
| 4822 | old_dir->i_ctime = old_dir->i_mtime = ctime; | 5321 | old_dir->i_ctime = old_dir->i_mtime = ctime; |
| 4823 | new_dir->i_ctime = new_dir->i_mtime = ctime; | 5322 | new_dir->i_ctime = new_dir->i_mtime = ctime; |
| 4824 | old_inode->i_ctime = ctime; | 5323 | old_inode->i_ctime = ctime; |
| @@ -4826,47 +5325,60 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 4826 | if (old_dentry->d_parent != new_dentry->d_parent) | 5325 | if (old_dentry->d_parent != new_dentry->d_parent) |
| 4827 | btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); | 5326 | btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); |
| 4828 | 5327 | ||
| 4829 | ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, | 5328 | if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { |
| 4830 | old_dentry->d_name.name, | 5329 | root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; |
| 4831 | old_dentry->d_name.len); | 5330 | ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid, |
| 4832 | if (ret) | 5331 | old_dentry->d_name.name, |
| 4833 | goto out_fail; | 5332 | old_dentry->d_name.len); |
| 5333 | } else { | ||
| 5334 | btrfs_inc_nlink(old_dentry->d_inode); | ||
| 5335 | ret = btrfs_unlink_inode(trans, root, old_dir, | ||
| 5336 | old_dentry->d_inode, | ||
| 5337 | old_dentry->d_name.name, | ||
| 5338 | old_dentry->d_name.len); | ||
| 5339 | } | ||
| 5340 | BUG_ON(ret); | ||
| 4834 | 5341 | ||
| 4835 | if (new_inode) { | 5342 | if (new_inode) { |
| 4836 | new_inode->i_ctime = CURRENT_TIME; | 5343 | new_inode->i_ctime = CURRENT_TIME; |
| 4837 | ret = btrfs_unlink_inode(trans, root, new_dir, | 5344 | if (unlikely(new_inode->i_ino == |
| 4838 | new_dentry->d_inode, | 5345 | BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { |
| 4839 | new_dentry->d_name.name, | 5346 | root_objectid = BTRFS_I(new_inode)->location.objectid; |
| 4840 | new_dentry->d_name.len); | 5347 | ret = btrfs_unlink_subvol(trans, dest, new_dir, |
| 4841 | if (ret) | 5348 | root_objectid, |
| 4842 | goto out_fail; | 5349 | new_dentry->d_name.name, |
| 5350 | new_dentry->d_name.len); | ||
| 5351 | BUG_ON(new_inode->i_nlink == 0); | ||
| 5352 | } else { | ||
| 5353 | ret = btrfs_unlink_inode(trans, dest, new_dir, | ||
| 5354 | new_dentry->d_inode, | ||
| 5355 | new_dentry->d_name.name, | ||
| 5356 | new_dentry->d_name.len); | ||
| 5357 | } | ||
| 5358 | BUG_ON(ret); | ||
| 4843 | if (new_inode->i_nlink == 0) { | 5359 | if (new_inode->i_nlink == 0) { |
| 4844 | ret = btrfs_orphan_add(trans, new_dentry->d_inode); | 5360 | ret = btrfs_orphan_add(trans, new_dentry->d_inode); |
| 4845 | if (ret) | 5361 | BUG_ON(ret); |
| 4846 | goto out_fail; | ||
| 4847 | } | 5362 | } |
| 4848 | |||
| 4849 | } | 5363 | } |
| 4850 | ret = btrfs_set_inode_index(new_dir, &index); | ||
| 4851 | if (ret) | ||
| 4852 | goto out_fail; | ||
| 4853 | 5364 | ||
| 4854 | ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode, | 5365 | ret = btrfs_add_link(trans, new_dir, old_inode, |
| 4855 | old_inode, new_dentry->d_name.name, | 5366 | new_dentry->d_name.name, |
| 4856 | new_dentry->d_name.len, 1, index); | 5367 | new_dentry->d_name.len, 0, index); |
| 4857 | if (ret) | 5368 | BUG_ON(ret); |
| 4858 | goto out_fail; | ||
| 4859 | 5369 | ||
| 4860 | btrfs_log_new_name(trans, old_inode, old_dir, | 5370 | if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { |
| 4861 | new_dentry->d_parent); | 5371 | btrfs_log_new_name(trans, old_inode, old_dir, |
| 5372 | new_dentry->d_parent); | ||
| 5373 | btrfs_end_log_trans(root); | ||
| 5374 | } | ||
| 4862 | out_fail: | 5375 | out_fail: |
| 4863 | |||
| 4864 | /* this btrfs_end_log_trans just allows the current | ||
| 4865 | * log-sub transaction to complete | ||
| 4866 | */ | ||
| 4867 | btrfs_end_log_trans(root); | ||
| 4868 | btrfs_end_transaction_throttle(trans, root); | 5376 | btrfs_end_transaction_throttle(trans, root); |
| 4869 | out_unlock: | 5377 | |
| 5378 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | ||
| 5379 | up_read(&root->fs_info->subvol_sem); | ||
| 5380 | |||
| 5381 | btrfs_unreserve_metadata_space(root, 4); | ||
| 4870 | return ret; | 5382 | return ret; |
| 4871 | } | 5383 | } |
| 4872 | 5384 | ||
| @@ -4938,11 +5450,18 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
| 4938 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) | 5450 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) |
| 4939 | return -ENAMETOOLONG; | 5451 | return -ENAMETOOLONG; |
| 4940 | 5452 | ||
| 4941 | err = btrfs_check_metadata_free_space(root); | 5453 | /* |
| 5454 | * 2 items for inode item and ref | ||
| 5455 | * 2 items for dir items | ||
| 5456 | * 1 item for xattr if selinux is on | ||
| 5457 | */ | ||
| 5458 | err = btrfs_reserve_metadata_space(root, 5); | ||
| 4942 | if (err) | 5459 | if (err) |
| 4943 | goto out_fail; | 5460 | return err; |
| 4944 | 5461 | ||
| 4945 | trans = btrfs_start_transaction(root, 1); | 5462 | trans = btrfs_start_transaction(root, 1); |
| 5463 | if (!trans) | ||
| 5464 | goto out_fail; | ||
| 4946 | btrfs_set_trans_block_group(trans, dir); | 5465 | btrfs_set_trans_block_group(trans, dir); |
| 4947 | 5466 | ||
| 4948 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | 5467 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); |
| @@ -5023,6 +5542,7 @@ out_unlock: | |||
| 5023 | nr = trans->blocks_used; | 5542 | nr = trans->blocks_used; |
| 5024 | btrfs_end_transaction_throttle(trans, root); | 5543 | btrfs_end_transaction_throttle(trans, root); |
| 5025 | out_fail: | 5544 | out_fail: |
| 5545 | btrfs_unreserve_metadata_space(root, 5); | ||
| 5026 | if (drop_inode) { | 5546 | if (drop_inode) { |
| 5027 | inode_dec_link_count(inode); | 5547 | inode_dec_link_count(inode); |
| 5028 | iput(inode); | 5548 | iput(inode); |
| @@ -5044,6 +5564,11 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, | |||
| 5044 | 5564 | ||
| 5045 | while (num_bytes > 0) { | 5565 | while (num_bytes > 0) { |
| 5046 | alloc_size = min(num_bytes, root->fs_info->max_extent); | 5566 | alloc_size = min(num_bytes, root->fs_info->max_extent); |
| 5567 | |||
| 5568 | ret = btrfs_reserve_metadata_space(root, 1); | ||
| 5569 | if (ret) | ||
| 5570 | goto out; | ||
| 5571 | |||
| 5047 | ret = btrfs_reserve_extent(trans, root, alloc_size, | 5572 | ret = btrfs_reserve_extent(trans, root, alloc_size, |
| 5048 | root->sectorsize, 0, alloc_hint, | 5573 | root->sectorsize, 0, alloc_hint, |
| 5049 | (u64)-1, &ins, 1); | 5574 | (u64)-1, &ins, 1); |
| @@ -5058,9 +5583,12 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, | |||
| 5058 | 0, 0, 0, | 5583 | 0, 0, 0, |
| 5059 | BTRFS_FILE_EXTENT_PREALLOC); | 5584 | BTRFS_FILE_EXTENT_PREALLOC); |
| 5060 | BUG_ON(ret); | 5585 | BUG_ON(ret); |
| 5586 | btrfs_drop_extent_cache(inode, cur_offset, | ||
| 5587 | cur_offset + ins.offset -1, 0); | ||
| 5061 | num_bytes -= ins.offset; | 5588 | num_bytes -= ins.offset; |
| 5062 | cur_offset += ins.offset; | 5589 | cur_offset += ins.offset; |
| 5063 | alloc_hint = ins.objectid + ins.offset; | 5590 | alloc_hint = ins.objectid + ins.offset; |
| 5591 | btrfs_unreserve_metadata_space(root, 1); | ||
| 5064 | } | 5592 | } |
| 5065 | out: | 5593 | out: |
| 5066 | if (cur_offset > start) { | 5594 | if (cur_offset > start) { |
| @@ -5201,7 +5729,7 @@ static int btrfs_permission(struct inode *inode, int mask) | |||
| 5201 | return generic_permission(inode, mask, btrfs_check_acl); | 5729 | return generic_permission(inode, mask, btrfs_check_acl); |
| 5202 | } | 5730 | } |
| 5203 | 5731 | ||
| 5204 | static struct inode_operations btrfs_dir_inode_operations = { | 5732 | static const struct inode_operations btrfs_dir_inode_operations = { |
| 5205 | .getattr = btrfs_getattr, | 5733 | .getattr = btrfs_getattr, |
| 5206 | .lookup = btrfs_lookup, | 5734 | .lookup = btrfs_lookup, |
| 5207 | .create = btrfs_create, | 5735 | .create = btrfs_create, |
| @@ -5219,11 +5747,12 @@ static struct inode_operations btrfs_dir_inode_operations = { | |||
| 5219 | .removexattr = btrfs_removexattr, | 5747 | .removexattr = btrfs_removexattr, |
| 5220 | .permission = btrfs_permission, | 5748 | .permission = btrfs_permission, |
| 5221 | }; | 5749 | }; |
| 5222 | static struct inode_operations btrfs_dir_ro_inode_operations = { | 5750 | static const struct inode_operations btrfs_dir_ro_inode_operations = { |
| 5223 | .lookup = btrfs_lookup, | 5751 | .lookup = btrfs_lookup, |
| 5224 | .permission = btrfs_permission, | 5752 | .permission = btrfs_permission, |
| 5225 | }; | 5753 | }; |
| 5226 | static struct file_operations btrfs_dir_file_operations = { | 5754 | |
| 5755 | static const struct file_operations btrfs_dir_file_operations = { | ||
| 5227 | .llseek = generic_file_llseek, | 5756 | .llseek = generic_file_llseek, |
| 5228 | .read = generic_read_dir, | 5757 | .read = generic_read_dir, |
| 5229 | .readdir = btrfs_real_readdir, | 5758 | .readdir = btrfs_real_readdir, |
| @@ -5245,6 +5774,8 @@ static struct extent_io_ops btrfs_extent_io_ops = { | |||
| 5245 | .readpage_io_failed_hook = btrfs_io_failed_hook, | 5774 | .readpage_io_failed_hook = btrfs_io_failed_hook, |
| 5246 | .set_bit_hook = btrfs_set_bit_hook, | 5775 | .set_bit_hook = btrfs_set_bit_hook, |
| 5247 | .clear_bit_hook = btrfs_clear_bit_hook, | 5776 | .clear_bit_hook = btrfs_clear_bit_hook, |
| 5777 | .merge_extent_hook = btrfs_merge_extent_hook, | ||
| 5778 | .split_extent_hook = btrfs_split_extent_hook, | ||
| 5248 | }; | 5779 | }; |
| 5249 | 5780 | ||
| 5250 | /* | 5781 | /* |
| @@ -5259,7 +5790,7 @@ static struct extent_io_ops btrfs_extent_io_ops = { | |||
| 5259 | * | 5790 | * |
| 5260 | * For now we're avoiding this by dropping bmap. | 5791 | * For now we're avoiding this by dropping bmap. |
| 5261 | */ | 5792 | */ |
| 5262 | static struct address_space_operations btrfs_aops = { | 5793 | static const struct address_space_operations btrfs_aops = { |
| 5263 | .readpage = btrfs_readpage, | 5794 | .readpage = btrfs_readpage, |
| 5264 | .writepage = btrfs_writepage, | 5795 | .writepage = btrfs_writepage, |
| 5265 | .writepages = btrfs_writepages, | 5796 | .writepages = btrfs_writepages, |
| @@ -5269,16 +5800,17 @@ static struct address_space_operations btrfs_aops = { | |||
| 5269 | .invalidatepage = btrfs_invalidatepage, | 5800 | .invalidatepage = btrfs_invalidatepage, |
| 5270 | .releasepage = btrfs_releasepage, | 5801 | .releasepage = btrfs_releasepage, |
| 5271 | .set_page_dirty = btrfs_set_page_dirty, | 5802 | .set_page_dirty = btrfs_set_page_dirty, |
| 5803 | .error_remove_page = generic_error_remove_page, | ||
| 5272 | }; | 5804 | }; |
| 5273 | 5805 | ||
| 5274 | static struct address_space_operations btrfs_symlink_aops = { | 5806 | static const struct address_space_operations btrfs_symlink_aops = { |
| 5275 | .readpage = btrfs_readpage, | 5807 | .readpage = btrfs_readpage, |
| 5276 | .writepage = btrfs_writepage, | 5808 | .writepage = btrfs_writepage, |
| 5277 | .invalidatepage = btrfs_invalidatepage, | 5809 | .invalidatepage = btrfs_invalidatepage, |
| 5278 | .releasepage = btrfs_releasepage, | 5810 | .releasepage = btrfs_releasepage, |
| 5279 | }; | 5811 | }; |
| 5280 | 5812 | ||
| 5281 | static struct inode_operations btrfs_file_inode_operations = { | 5813 | static const struct inode_operations btrfs_file_inode_operations = { |
| 5282 | .truncate = btrfs_truncate, | 5814 | .truncate = btrfs_truncate, |
| 5283 | .getattr = btrfs_getattr, | 5815 | .getattr = btrfs_getattr, |
| 5284 | .setattr = btrfs_setattr, | 5816 | .setattr = btrfs_setattr, |
| @@ -5290,7 +5822,7 @@ static struct inode_operations btrfs_file_inode_operations = { | |||
| 5290 | .fallocate = btrfs_fallocate, | 5822 | .fallocate = btrfs_fallocate, |
| 5291 | .fiemap = btrfs_fiemap, | 5823 | .fiemap = btrfs_fiemap, |
| 5292 | }; | 5824 | }; |
| 5293 | static struct inode_operations btrfs_special_inode_operations = { | 5825 | static const struct inode_operations btrfs_special_inode_operations = { |
| 5294 | .getattr = btrfs_getattr, | 5826 | .getattr = btrfs_getattr, |
| 5295 | .setattr = btrfs_setattr, | 5827 | .setattr = btrfs_setattr, |
| 5296 | .permission = btrfs_permission, | 5828 | .permission = btrfs_permission, |
| @@ -5299,7 +5831,7 @@ static struct inode_operations btrfs_special_inode_operations = { | |||
| 5299 | .listxattr = btrfs_listxattr, | 5831 | .listxattr = btrfs_listxattr, |
| 5300 | .removexattr = btrfs_removexattr, | 5832 | .removexattr = btrfs_removexattr, |
| 5301 | }; | 5833 | }; |
| 5302 | static struct inode_operations btrfs_symlink_inode_operations = { | 5834 | static const struct inode_operations btrfs_symlink_inode_operations = { |
| 5303 | .readlink = generic_readlink, | 5835 | .readlink = generic_readlink, |
| 5304 | .follow_link = page_follow_link_light, | 5836 | .follow_link = page_follow_link_light, |
| 5305 | .put_link = page_put_link, | 5837 | .put_link = page_put_link, |
| @@ -5309,3 +5841,7 @@ static struct inode_operations btrfs_symlink_inode_operations = { | |||
| 5309 | .listxattr = btrfs_listxattr, | 5841 | .listxattr = btrfs_listxattr, |
| 5310 | .removexattr = btrfs_removexattr, | 5842 | .removexattr = btrfs_removexattr, |
| 5311 | }; | 5843 | }; |
| 5844 | |||
| 5845 | const struct dentry_operations btrfs_dentry_operations = { | ||
| 5846 | .d_delete = btrfs_dentry_delete, | ||
| 5847 | }; | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index bd88f25889f7..cdbb054102b9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -230,8 +230,8 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
| 230 | struct btrfs_root_item root_item; | 230 | struct btrfs_root_item root_item; |
| 231 | struct btrfs_inode_item *inode_item; | 231 | struct btrfs_inode_item *inode_item; |
| 232 | struct extent_buffer *leaf; | 232 | struct extent_buffer *leaf; |
| 233 | struct btrfs_root *new_root = root; | 233 | struct btrfs_root *new_root; |
| 234 | struct inode *dir; | 234 | struct inode *dir = dentry->d_parent->d_inode; |
| 235 | int ret; | 235 | int ret; |
| 236 | int err; | 236 | int err; |
| 237 | u64 objectid; | 237 | u64 objectid; |
| @@ -239,9 +239,15 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
| 239 | u64 index = 0; | 239 | u64 index = 0; |
| 240 | unsigned long nr = 1; | 240 | unsigned long nr = 1; |
| 241 | 241 | ||
| 242 | ret = btrfs_check_metadata_free_space(root); | 242 | /* |
| 243 | * 1 - inode item | ||
| 244 | * 2 - refs | ||
| 245 | * 1 - root item | ||
| 246 | * 2 - dir items | ||
| 247 | */ | ||
| 248 | ret = btrfs_reserve_metadata_space(root, 6); | ||
| 243 | if (ret) | 249 | if (ret) |
| 244 | goto fail_commit; | 250 | return ret; |
| 245 | 251 | ||
| 246 | trans = btrfs_start_transaction(root, 1); | 252 | trans = btrfs_start_transaction(root, 1); |
| 247 | BUG_ON(!trans); | 253 | BUG_ON(!trans); |
| @@ -304,11 +310,17 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
| 304 | if (ret) | 310 | if (ret) |
| 305 | goto fail; | 311 | goto fail; |
| 306 | 312 | ||
| 313 | key.offset = (u64)-1; | ||
| 314 | new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); | ||
| 315 | BUG_ON(IS_ERR(new_root)); | ||
| 316 | |||
| 317 | btrfs_record_root_in_trans(trans, new_root); | ||
| 318 | |||
| 319 | ret = btrfs_create_subvol_root(trans, new_root, new_dirid, | ||
| 320 | BTRFS_I(dir)->block_group); | ||
| 307 | /* | 321 | /* |
| 308 | * insert the directory item | 322 | * insert the directory item |
| 309 | */ | 323 | */ |
| 310 | key.offset = (u64)-1; | ||
| 311 | dir = dentry->d_parent->d_inode; | ||
| 312 | ret = btrfs_set_inode_index(dir, &index); | 324 | ret = btrfs_set_inode_index(dir, &index); |
| 313 | BUG_ON(ret); | 325 | BUG_ON(ret); |
| 314 | 326 | ||
| @@ -322,43 +334,20 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
| 322 | ret = btrfs_update_inode(trans, root, dir); | 334 | ret = btrfs_update_inode(trans, root, dir); |
| 323 | BUG_ON(ret); | 335 | BUG_ON(ret); |
| 324 | 336 | ||
| 325 | /* add the backref first */ | ||
| 326 | ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, | 337 | ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, |
| 327 | objectid, BTRFS_ROOT_BACKREF_KEY, | 338 | objectid, root->root_key.objectid, |
| 328 | root->root_key.objectid, | ||
| 329 | dir->i_ino, index, name, namelen); | 339 | dir->i_ino, index, name, namelen); |
| 330 | 340 | ||
| 331 | BUG_ON(ret); | 341 | BUG_ON(ret); |
| 332 | 342 | ||
| 333 | /* now add the forward ref */ | 343 | d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); |
| 334 | ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, | ||
| 335 | root->root_key.objectid, BTRFS_ROOT_REF_KEY, | ||
| 336 | objectid, | ||
| 337 | dir->i_ino, index, name, namelen); | ||
| 338 | |||
| 339 | BUG_ON(ret); | ||
| 340 | |||
| 341 | ret = btrfs_commit_transaction(trans, root); | ||
| 342 | if (ret) | ||
| 343 | goto fail_commit; | ||
| 344 | |||
| 345 | new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); | ||
| 346 | BUG_ON(!new_root); | ||
| 347 | |||
| 348 | trans = btrfs_start_transaction(new_root, 1); | ||
| 349 | BUG_ON(!trans); | ||
| 350 | |||
| 351 | ret = btrfs_create_subvol_root(trans, new_root, dentry, new_dirid, | ||
| 352 | BTRFS_I(dir)->block_group); | ||
| 353 | if (ret) | ||
| 354 | goto fail; | ||
| 355 | |||
| 356 | fail: | 344 | fail: |
| 357 | nr = trans->blocks_used; | 345 | nr = trans->blocks_used; |
| 358 | err = btrfs_commit_transaction(trans, new_root); | 346 | err = btrfs_commit_transaction(trans, root); |
| 359 | if (err && !ret) | 347 | if (err && !ret) |
| 360 | ret = err; | 348 | ret = err; |
| 361 | fail_commit: | 349 | |
| 350 | btrfs_unreserve_metadata_space(root, 6); | ||
| 362 | btrfs_btree_balance_dirty(root, nr); | 351 | btrfs_btree_balance_dirty(root, nr); |
| 363 | return ret; | 352 | return ret; |
| 364 | } | 353 | } |
| @@ -375,19 +364,27 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
| 375 | if (!root->ref_cows) | 364 | if (!root->ref_cows) |
| 376 | return -EINVAL; | 365 | return -EINVAL; |
| 377 | 366 | ||
| 378 | ret = btrfs_check_metadata_free_space(root); | 367 | /* |
| 368 | * 1 - inode item | ||
| 369 | * 2 - refs | ||
| 370 | * 1 - root item | ||
| 371 | * 2 - dir items | ||
| 372 | */ | ||
| 373 | ret = btrfs_reserve_metadata_space(root, 6); | ||
| 379 | if (ret) | 374 | if (ret) |
| 380 | goto fail_unlock; | 375 | goto fail_unlock; |
| 381 | 376 | ||
| 382 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); | 377 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); |
| 383 | if (!pending_snapshot) { | 378 | if (!pending_snapshot) { |
| 384 | ret = -ENOMEM; | 379 | ret = -ENOMEM; |
| 380 | btrfs_unreserve_metadata_space(root, 6); | ||
| 385 | goto fail_unlock; | 381 | goto fail_unlock; |
| 386 | } | 382 | } |
| 387 | pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); | 383 | pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); |
| 388 | if (!pending_snapshot->name) { | 384 | if (!pending_snapshot->name) { |
| 389 | ret = -ENOMEM; | 385 | ret = -ENOMEM; |
| 390 | kfree(pending_snapshot); | 386 | kfree(pending_snapshot); |
| 387 | btrfs_unreserve_metadata_space(root, 6); | ||
| 391 | goto fail_unlock; | 388 | goto fail_unlock; |
| 392 | } | 389 | } |
| 393 | memcpy(pending_snapshot->name, name, namelen); | 390 | memcpy(pending_snapshot->name, name, namelen); |
| @@ -420,14 +417,15 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child) | |||
| 420 | * sys_mkdirat and vfs_mkdir, but we only do a single component lookup | 417 | * sys_mkdirat and vfs_mkdir, but we only do a single component lookup |
| 421 | * inside this filesystem so it's quite a bit simpler. | 418 | * inside this filesystem so it's quite a bit simpler. |
| 422 | */ | 419 | */ |
| 423 | static noinline int btrfs_mksubvol(struct path *parent, char *name, | 420 | static noinline int btrfs_mksubvol(struct path *parent, |
| 424 | int mode, int namelen, | 421 | char *name, int namelen, |
| 425 | struct btrfs_root *snap_src) | 422 | struct btrfs_root *snap_src) |
| 426 | { | 423 | { |
| 424 | struct inode *dir = parent->dentry->d_inode; | ||
| 427 | struct dentry *dentry; | 425 | struct dentry *dentry; |
| 428 | int error; | 426 | int error; |
| 429 | 427 | ||
| 430 | mutex_lock_nested(&parent->dentry->d_inode->i_mutex, I_MUTEX_PARENT); | 428 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); |
| 431 | 429 | ||
| 432 | dentry = lookup_one_len(name, parent->dentry, namelen); | 430 | dentry = lookup_one_len(name, parent->dentry, namelen); |
| 433 | error = PTR_ERR(dentry); | 431 | error = PTR_ERR(dentry); |
| @@ -438,99 +436,39 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, | |||
| 438 | if (dentry->d_inode) | 436 | if (dentry->d_inode) |
| 439 | goto out_dput; | 437 | goto out_dput; |
| 440 | 438 | ||
| 441 | if (!IS_POSIXACL(parent->dentry->d_inode)) | ||
| 442 | mode &= ~current_umask(); | ||
| 443 | |||
| 444 | error = mnt_want_write(parent->mnt); | 439 | error = mnt_want_write(parent->mnt); |
| 445 | if (error) | 440 | if (error) |
| 446 | goto out_dput; | 441 | goto out_dput; |
| 447 | 442 | ||
| 448 | error = btrfs_may_create(parent->dentry->d_inode, dentry); | 443 | error = btrfs_may_create(dir, dentry); |
| 449 | if (error) | 444 | if (error) |
| 450 | goto out_drop_write; | 445 | goto out_drop_write; |
| 451 | 446 | ||
| 452 | /* | 447 | down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); |
| 453 | * Actually perform the low-level subvolume creation after all | 448 | |
| 454 | * this VFS fuzz. | 449 | if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) |
| 455 | * | 450 | goto out_up_read; |
| 456 | * Eventually we want to pass in an inode under which we create this | 451 | |
| 457 | * subvolume, but for now all are under the filesystem root. | ||
| 458 | * | ||
| 459 | * Also we should pass on the mode eventually to allow creating new | ||
| 460 | * subvolume with specific mode bits. | ||
| 461 | */ | ||
| 462 | if (snap_src) { | 452 | if (snap_src) { |
| 463 | struct dentry *dir = dentry->d_parent; | 453 | error = create_snapshot(snap_src, dentry, |
| 464 | struct dentry *test = dir->d_parent; | 454 | name, namelen); |
| 465 | struct btrfs_path *path = btrfs_alloc_path(); | ||
| 466 | int ret; | ||
| 467 | u64 test_oid; | ||
| 468 | u64 parent_oid = BTRFS_I(dir->d_inode)->root->root_key.objectid; | ||
| 469 | |||
| 470 | test_oid = snap_src->root_key.objectid; | ||
| 471 | |||
| 472 | ret = btrfs_find_root_ref(snap_src->fs_info->tree_root, | ||
| 473 | path, parent_oid, test_oid); | ||
| 474 | if (ret == 0) | ||
| 475 | goto create; | ||
| 476 | btrfs_release_path(snap_src->fs_info->tree_root, path); | ||
| 477 | |||
| 478 | /* we need to make sure we aren't creating a directory loop | ||
| 479 | * by taking a snapshot of something that has our current | ||
| 480 | * subvol in its directory tree. So, this loops through | ||
| 481 | * the dentries and checks the forward refs for each subvolume | ||
| 482 | * to see if is references the subvolume where we are | ||
| 483 | * placing this new snapshot. | ||
| 484 | */ | ||
| 485 | while (1) { | ||
| 486 | if (!test || | ||
| 487 | dir == snap_src->fs_info->sb->s_root || | ||
| 488 | test == snap_src->fs_info->sb->s_root || | ||
| 489 | test->d_inode->i_sb != snap_src->fs_info->sb) { | ||
| 490 | break; | ||
| 491 | } | ||
| 492 | if (S_ISLNK(test->d_inode->i_mode)) { | ||
| 493 | printk(KERN_INFO "Btrfs symlink in snapshot " | ||
| 494 | "path, failed\n"); | ||
| 495 | error = -EMLINK; | ||
| 496 | btrfs_free_path(path); | ||
| 497 | goto out_drop_write; | ||
| 498 | } | ||
| 499 | test_oid = | ||
| 500 | BTRFS_I(test->d_inode)->root->root_key.objectid; | ||
| 501 | ret = btrfs_find_root_ref(snap_src->fs_info->tree_root, | ||
| 502 | path, test_oid, parent_oid); | ||
| 503 | if (ret == 0) { | ||
| 504 | printk(KERN_INFO "Btrfs snapshot creation " | ||
| 505 | "failed, looping\n"); | ||
| 506 | error = -EMLINK; | ||
| 507 | btrfs_free_path(path); | ||
| 508 | goto out_drop_write; | ||
| 509 | } | ||
| 510 | btrfs_release_path(snap_src->fs_info->tree_root, path); | ||
| 511 | test = test->d_parent; | ||
| 512 | } | ||
| 513 | create: | ||
| 514 | btrfs_free_path(path); | ||
| 515 | error = create_snapshot(snap_src, dentry, name, namelen); | ||
| 516 | } else { | 455 | } else { |
| 517 | error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root, | 456 | error = create_subvol(BTRFS_I(dir)->root, dentry, |
| 518 | dentry, name, namelen); | 457 | name, namelen); |
| 519 | } | 458 | } |
| 520 | if (error) | 459 | if (!error) |
| 521 | goto out_drop_write; | 460 | fsnotify_mkdir(dir, dentry); |
| 522 | 461 | out_up_read: | |
| 523 | fsnotify_mkdir(parent->dentry->d_inode, dentry); | 462 | up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); |
| 524 | out_drop_write: | 463 | out_drop_write: |
| 525 | mnt_drop_write(parent->mnt); | 464 | mnt_drop_write(parent->mnt); |
| 526 | out_dput: | 465 | out_dput: |
| 527 | dput(dentry); | 466 | dput(dentry); |
| 528 | out_unlock: | 467 | out_unlock: |
| 529 | mutex_unlock(&parent->dentry->d_inode->i_mutex); | 468 | mutex_unlock(&dir->i_mutex); |
| 530 | return error; | 469 | return error; |
| 531 | } | 470 | } |
| 532 | 471 | ||
| 533 | |||
| 534 | static int btrfs_defrag_file(struct file *file) | 472 | static int btrfs_defrag_file(struct file *file) |
| 535 | { | 473 | { |
| 536 | struct inode *inode = fdentry(file)->d_inode; | 474 | struct inode *inode = fdentry(file)->d_inode; |
| @@ -596,9 +534,8 @@ again: | |||
| 596 | clear_page_dirty_for_io(page); | 534 | clear_page_dirty_for_io(page); |
| 597 | 535 | ||
| 598 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 536 | btrfs_set_extent_delalloc(inode, page_start, page_end); |
| 599 | |||
| 600 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 601 | set_page_dirty(page); | 537 | set_page_dirty(page); |
| 538 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 602 | unlock_page(page); | 539 | unlock_page(page); |
| 603 | page_cache_release(page); | 540 | page_cache_release(page); |
| 604 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); | 541 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); |
| @@ -609,7 +546,8 @@ out_unlock: | |||
| 609 | return 0; | 546 | return 0; |
| 610 | } | 547 | } |
| 611 | 548 | ||
| 612 | static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) | 549 | static noinline int btrfs_ioctl_resize(struct btrfs_root *root, |
| 550 | void __user *arg) | ||
| 613 | { | 551 | { |
| 614 | u64 new_size; | 552 | u64 new_size; |
| 615 | u64 old_size; | 553 | u64 old_size; |
| @@ -718,10 +656,7 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
| 718 | { | 656 | { |
| 719 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | 657 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; |
| 720 | struct btrfs_ioctl_vol_args *vol_args; | 658 | struct btrfs_ioctl_vol_args *vol_args; |
| 721 | struct btrfs_dir_item *di; | ||
| 722 | struct btrfs_path *path; | ||
| 723 | struct file *src_file; | 659 | struct file *src_file; |
| 724 | u64 root_dirid; | ||
| 725 | int namelen; | 660 | int namelen; |
| 726 | int ret = 0; | 661 | int ret = 0; |
| 727 | 662 | ||
| @@ -739,32 +674,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
| 739 | goto out; | 674 | goto out; |
| 740 | } | 675 | } |
| 741 | 676 | ||
| 742 | path = btrfs_alloc_path(); | ||
| 743 | if (!path) { | ||
| 744 | ret = -ENOMEM; | ||
| 745 | goto out; | ||
| 746 | } | ||
| 747 | |||
| 748 | root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, | ||
| 749 | di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, | ||
| 750 | path, root_dirid, | ||
| 751 | vol_args->name, namelen, 0); | ||
| 752 | btrfs_free_path(path); | ||
| 753 | |||
| 754 | if (di && !IS_ERR(di)) { | ||
| 755 | ret = -EEXIST; | ||
| 756 | goto out; | ||
| 757 | } | ||
| 758 | |||
| 759 | if (IS_ERR(di)) { | ||
| 760 | ret = PTR_ERR(di); | ||
| 761 | goto out; | ||
| 762 | } | ||
| 763 | |||
| 764 | if (subvol) { | 677 | if (subvol) { |
| 765 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, | 678 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, |
| 766 | file->f_path.dentry->d_inode->i_mode, | 679 | NULL); |
| 767 | namelen, NULL); | ||
| 768 | } else { | 680 | } else { |
| 769 | struct inode *src_inode; | 681 | struct inode *src_inode; |
| 770 | src_file = fget(vol_args->fd); | 682 | src_file = fget(vol_args->fd); |
| @@ -781,17 +693,157 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
| 781 | fput(src_file); | 693 | fput(src_file); |
| 782 | goto out; | 694 | goto out; |
| 783 | } | 695 | } |
| 784 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, | 696 | ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, |
| 785 | file->f_path.dentry->d_inode->i_mode, | 697 | BTRFS_I(src_inode)->root); |
| 786 | namelen, BTRFS_I(src_inode)->root); | ||
| 787 | fput(src_file); | 698 | fput(src_file); |
| 788 | } | 699 | } |
| 789 | |||
| 790 | out: | 700 | out: |
| 791 | kfree(vol_args); | 701 | kfree(vol_args); |
| 792 | return ret; | 702 | return ret; |
| 793 | } | 703 | } |
| 794 | 704 | ||
| 705 | /* | ||
| 706 | * helper to check if the subvolume references other subvolumes | ||
| 707 | */ | ||
| 708 | static noinline int may_destroy_subvol(struct btrfs_root *root) | ||
| 709 | { | ||
| 710 | struct btrfs_path *path; | ||
| 711 | struct btrfs_key key; | ||
| 712 | int ret; | ||
| 713 | |||
| 714 | path = btrfs_alloc_path(); | ||
| 715 | if (!path) | ||
| 716 | return -ENOMEM; | ||
| 717 | |||
| 718 | key.objectid = root->root_key.objectid; | ||
| 719 | key.type = BTRFS_ROOT_REF_KEY; | ||
| 720 | key.offset = (u64)-1; | ||
| 721 | |||
| 722 | ret = btrfs_search_slot(NULL, root->fs_info->tree_root, | ||
| 723 | &key, path, 0, 0); | ||
| 724 | if (ret < 0) | ||
| 725 | goto out; | ||
| 726 | BUG_ON(ret == 0); | ||
| 727 | |||
| 728 | ret = 0; | ||
| 729 | if (path->slots[0] > 0) { | ||
| 730 | path->slots[0]--; | ||
| 731 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | ||
| 732 | if (key.objectid == root->root_key.objectid && | ||
| 733 | key.type == BTRFS_ROOT_REF_KEY) | ||
| 734 | ret = -ENOTEMPTY; | ||
| 735 | } | ||
| 736 | out: | ||
| 737 | btrfs_free_path(path); | ||
| 738 | return ret; | ||
| 739 | } | ||
| 740 | |||
| 741 | static noinline int btrfs_ioctl_snap_destroy(struct file *file, | ||
| 742 | void __user *arg) | ||
| 743 | { | ||
| 744 | struct dentry *parent = fdentry(file); | ||
| 745 | struct dentry *dentry; | ||
| 746 | struct inode *dir = parent->d_inode; | ||
| 747 | struct inode *inode; | ||
| 748 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
| 749 | struct btrfs_root *dest = NULL; | ||
| 750 | struct btrfs_ioctl_vol_args *vol_args; | ||
| 751 | struct btrfs_trans_handle *trans; | ||
| 752 | int namelen; | ||
| 753 | int ret; | ||
| 754 | int err = 0; | ||
| 755 | |||
| 756 | if (!capable(CAP_SYS_ADMIN)) | ||
| 757 | return -EPERM; | ||
| 758 | |||
| 759 | vol_args = memdup_user(arg, sizeof(*vol_args)); | ||
| 760 | if (IS_ERR(vol_args)) | ||
| 761 | return PTR_ERR(vol_args); | ||
| 762 | |||
| 763 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | ||
| 764 | namelen = strlen(vol_args->name); | ||
| 765 | if (strchr(vol_args->name, '/') || | ||
| 766 | strncmp(vol_args->name, "..", namelen) == 0) { | ||
| 767 | err = -EINVAL; | ||
| 768 | goto out; | ||
| 769 | } | ||
| 770 | |||
| 771 | err = mnt_want_write(file->f_path.mnt); | ||
| 772 | if (err) | ||
| 773 | goto out; | ||
| 774 | |||
| 775 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | ||
| 776 | dentry = lookup_one_len(vol_args->name, parent, namelen); | ||
| 777 | if (IS_ERR(dentry)) { | ||
| 778 | err = PTR_ERR(dentry); | ||
| 779 | goto out_unlock_dir; | ||
| 780 | } | ||
| 781 | |||
| 782 | if (!dentry->d_inode) { | ||
| 783 | err = -ENOENT; | ||
| 784 | goto out_dput; | ||
| 785 | } | ||
| 786 | |||
| 787 | inode = dentry->d_inode; | ||
| 788 | if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { | ||
| 789 | err = -EINVAL; | ||
| 790 | goto out_dput; | ||
| 791 | } | ||
| 792 | |||
| 793 | dest = BTRFS_I(inode)->root; | ||
| 794 | |||
| 795 | mutex_lock(&inode->i_mutex); | ||
| 796 | err = d_invalidate(dentry); | ||
| 797 | if (err) | ||
| 798 | goto out_unlock; | ||
| 799 | |||
| 800 | down_write(&root->fs_info->subvol_sem); | ||
| 801 | |||
| 802 | err = may_destroy_subvol(dest); | ||
| 803 | if (err) | ||
| 804 | goto out_up_write; | ||
| 805 | |||
| 806 | trans = btrfs_start_transaction(root, 1); | ||
| 807 | ret = btrfs_unlink_subvol(trans, root, dir, | ||
| 808 | dest->root_key.objectid, | ||
| 809 | dentry->d_name.name, | ||
| 810 | dentry->d_name.len); | ||
| 811 | BUG_ON(ret); | ||
| 812 | |||
| 813 | btrfs_record_root_in_trans(trans, dest); | ||
| 814 | |||
| 815 | memset(&dest->root_item.drop_progress, 0, | ||
| 816 | sizeof(dest->root_item.drop_progress)); | ||
| 817 | dest->root_item.drop_level = 0; | ||
| 818 | btrfs_set_root_refs(&dest->root_item, 0); | ||
| 819 | |||
| 820 | ret = btrfs_insert_orphan_item(trans, | ||
| 821 | root->fs_info->tree_root, | ||
| 822 | dest->root_key.objectid); | ||
| 823 | BUG_ON(ret); | ||
| 824 | |||
| 825 | ret = btrfs_commit_transaction(trans, root); | ||
| 826 | BUG_ON(ret); | ||
| 827 | inode->i_flags |= S_DEAD; | ||
| 828 | out_up_write: | ||
| 829 | up_write(&root->fs_info->subvol_sem); | ||
| 830 | out_unlock: | ||
| 831 | mutex_unlock(&inode->i_mutex); | ||
| 832 | if (!err) { | ||
| 833 | shrink_dcache_sb(root->fs_info->sb); | ||
| 834 | btrfs_invalidate_inodes(dest); | ||
| 835 | d_delete(dentry); | ||
| 836 | } | ||
| 837 | out_dput: | ||
| 838 | dput(dentry); | ||
| 839 | out_unlock_dir: | ||
| 840 | mutex_unlock(&dir->i_mutex); | ||
| 841 | mnt_drop_write(file->f_path.mnt); | ||
| 842 | out: | ||
| 843 | kfree(vol_args); | ||
| 844 | return err; | ||
| 845 | } | ||
| 846 | |||
| 795 | static int btrfs_ioctl_defrag(struct file *file) | 847 | static int btrfs_ioctl_defrag(struct file *file) |
| 796 | { | 848 | { |
| 797 | struct inode *inode = fdentry(file)->d_inode; | 849 | struct inode *inode = fdentry(file)->d_inode; |
| @@ -865,8 +917,8 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) | |||
| 865 | return ret; | 917 | return ret; |
| 866 | } | 918 | } |
| 867 | 919 | ||
| 868 | static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | 920 | static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, |
| 869 | u64 off, u64 olen, u64 destoff) | 921 | u64 off, u64 olen, u64 destoff) |
| 870 | { | 922 | { |
| 871 | struct inode *inode = fdentry(file)->d_inode; | 923 | struct inode *inode = fdentry(file)->d_inode; |
| 872 | struct btrfs_root *root = BTRFS_I(inode)->root; | 924 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| @@ -976,7 +1028,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 976 | 1028 | ||
| 977 | /* punch hole in destination first */ | 1029 | /* punch hole in destination first */ |
| 978 | btrfs_drop_extents(trans, root, inode, off, off + len, | 1030 | btrfs_drop_extents(trans, root, inode, off, off + len, |
| 979 | off + len, 0, &hint_byte); | 1031 | off + len, 0, &hint_byte, 1); |
| 980 | 1032 | ||
| 981 | /* clone data */ | 1033 | /* clone data */ |
| 982 | key.objectid = src->i_ino; | 1034 | key.objectid = src->i_ino; |
| @@ -1071,9 +1123,10 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 1071 | datao += off - key.offset; | 1123 | datao += off - key.offset; |
| 1072 | datal -= off - key.offset; | 1124 | datal -= off - key.offset; |
| 1073 | } | 1125 | } |
| 1074 | if (key.offset + datao + datal + key.offset > | 1126 | |
| 1075 | off + len) | 1127 | if (key.offset + datal > off + len) |
| 1076 | datal = off + len - key.offset - datao; | 1128 | datal = off + len - key.offset; |
| 1129 | |||
| 1077 | /* disko == 0 means it's a hole */ | 1130 | /* disko == 0 means it's a hole */ |
| 1078 | if (!disko) | 1131 | if (!disko) |
| 1079 | datao = 0; | 1132 | datao = 0; |
| @@ -1182,15 +1235,15 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
| 1182 | struct inode *inode = fdentry(file)->d_inode; | 1235 | struct inode *inode = fdentry(file)->d_inode; |
| 1183 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1236 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1184 | struct btrfs_trans_handle *trans; | 1237 | struct btrfs_trans_handle *trans; |
| 1185 | int ret = 0; | 1238 | int ret; |
| 1186 | 1239 | ||
| 1240 | ret = -EPERM; | ||
| 1187 | if (!capable(CAP_SYS_ADMIN)) | 1241 | if (!capable(CAP_SYS_ADMIN)) |
| 1188 | return -EPERM; | 1242 | goto out; |
| 1189 | 1243 | ||
| 1190 | if (file->private_data) { | 1244 | ret = -EINPROGRESS; |
| 1191 | ret = -EINPROGRESS; | 1245 | if (file->private_data) |
| 1192 | goto out; | 1246 | goto out; |
| 1193 | } | ||
| 1194 | 1247 | ||
| 1195 | ret = mnt_want_write(file->f_path.mnt); | 1248 | ret = mnt_want_write(file->f_path.mnt); |
| 1196 | if (ret) | 1249 | if (ret) |
| @@ -1200,12 +1253,19 @@ static long btrfs_ioctl_trans_start(struct file *file) | |||
| 1200 | root->fs_info->open_ioctl_trans++; | 1253 | root->fs_info->open_ioctl_trans++; |
| 1201 | mutex_unlock(&root->fs_info->trans_mutex); | 1254 | mutex_unlock(&root->fs_info->trans_mutex); |
| 1202 | 1255 | ||
| 1256 | ret = -ENOMEM; | ||
| 1203 | trans = btrfs_start_ioctl_transaction(root, 0); | 1257 | trans = btrfs_start_ioctl_transaction(root, 0); |
| 1204 | if (trans) | 1258 | if (!trans) |
| 1205 | file->private_data = trans; | 1259 | goto out_drop; |
| 1206 | else | 1260 | |
| 1207 | ret = -ENOMEM; | 1261 | file->private_data = trans; |
| 1208 | /*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/ | 1262 | return 0; |
| 1263 | |||
| 1264 | out_drop: | ||
| 1265 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 1266 | root->fs_info->open_ioctl_trans--; | ||
| 1267 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 1268 | mnt_drop_write(file->f_path.mnt); | ||
| 1209 | out: | 1269 | out: |
| 1210 | return ret; | 1270 | return ret; |
| 1211 | } | 1271 | } |
| @@ -1221,24 +1281,20 @@ long btrfs_ioctl_trans_end(struct file *file) | |||
| 1221 | struct inode *inode = fdentry(file)->d_inode; | 1281 | struct inode *inode = fdentry(file)->d_inode; |
| 1222 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1282 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1223 | struct btrfs_trans_handle *trans; | 1283 | struct btrfs_trans_handle *trans; |
| 1224 | int ret = 0; | ||
| 1225 | 1284 | ||
| 1226 | trans = file->private_data; | 1285 | trans = file->private_data; |
| 1227 | if (!trans) { | 1286 | if (!trans) |
| 1228 | ret = -EINVAL; | 1287 | return -EINVAL; |
| 1229 | goto out; | ||
| 1230 | } | ||
| 1231 | btrfs_end_transaction(trans, root); | ||
| 1232 | file->private_data = NULL; | 1288 | file->private_data = NULL; |
| 1233 | 1289 | ||
| 1290 | btrfs_end_transaction(trans, root); | ||
| 1291 | |||
| 1234 | mutex_lock(&root->fs_info->trans_mutex); | 1292 | mutex_lock(&root->fs_info->trans_mutex); |
| 1235 | root->fs_info->open_ioctl_trans--; | 1293 | root->fs_info->open_ioctl_trans--; |
| 1236 | mutex_unlock(&root->fs_info->trans_mutex); | 1294 | mutex_unlock(&root->fs_info->trans_mutex); |
| 1237 | 1295 | ||
| 1238 | mnt_drop_write(file->f_path.mnt); | 1296 | mnt_drop_write(file->f_path.mnt); |
| 1239 | 1297 | return 0; | |
| 1240 | out: | ||
| 1241 | return ret; | ||
| 1242 | } | 1298 | } |
| 1243 | 1299 | ||
| 1244 | long btrfs_ioctl(struct file *file, unsigned int | 1300 | long btrfs_ioctl(struct file *file, unsigned int |
| @@ -1258,6 +1314,8 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 1258 | return btrfs_ioctl_snap_create(file, argp, 0); | 1314 | return btrfs_ioctl_snap_create(file, argp, 0); |
| 1259 | case BTRFS_IOC_SUBVOL_CREATE: | 1315 | case BTRFS_IOC_SUBVOL_CREATE: |
| 1260 | return btrfs_ioctl_snap_create(file, argp, 1); | 1316 | return btrfs_ioctl_snap_create(file, argp, 1); |
| 1317 | case BTRFS_IOC_SNAP_DESTROY: | ||
| 1318 | return btrfs_ioctl_snap_destroy(file, argp); | ||
| 1261 | case BTRFS_IOC_DEFRAG: | 1319 | case BTRFS_IOC_DEFRAG: |
| 1262 | return btrfs_ioctl_defrag(file); | 1320 | return btrfs_ioctl_defrag(file); |
| 1263 | case BTRFS_IOC_RESIZE: | 1321 | case BTRFS_IOC_RESIZE: |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index b320b103fa13..bc49914475eb 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
| @@ -65,5 +65,6 @@ struct btrfs_ioctl_clone_range_args { | |||
| 65 | 65 | ||
| 66 | #define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ | 66 | #define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ |
| 67 | struct btrfs_ioctl_vol_args) | 67 | struct btrfs_ioctl_vol_args) |
| 68 | 68 | #define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ | |
| 69 | struct btrfs_ioctl_vol_args) | ||
| 69 | #endif | 70 | #endif |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index d6f0806c682f..5799bc46a309 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
| @@ -159,8 +159,6 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
| 159 | * | 159 | * |
| 160 | * len is the length of the extent | 160 | * len is the length of the extent |
| 161 | * | 161 | * |
| 162 | * This also sets the EXTENT_ORDERED bit on the range in the inode. | ||
| 163 | * | ||
| 164 | * The tree is given a single reference on the ordered extent that was | 162 | * The tree is given a single reference on the ordered extent that was |
| 165 | * inserted. | 163 | * inserted. |
| 166 | */ | 164 | */ |
| @@ -181,6 +179,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 181 | entry->start = start; | 179 | entry->start = start; |
| 182 | entry->len = len; | 180 | entry->len = len; |
| 183 | entry->disk_len = disk_len; | 181 | entry->disk_len = disk_len; |
| 182 | entry->bytes_left = len; | ||
| 184 | entry->inode = inode; | 183 | entry->inode = inode; |
| 185 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 184 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
| 186 | set_bit(type, &entry->flags); | 185 | set_bit(type, &entry->flags); |
| @@ -195,9 +194,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 195 | &entry->rb_node); | 194 | &entry->rb_node); |
| 196 | BUG_ON(node); | 195 | BUG_ON(node); |
| 197 | 196 | ||
| 198 | set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, | ||
| 199 | entry_end(entry) - 1, GFP_NOFS); | ||
| 200 | |||
| 201 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 197 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
| 202 | list_add_tail(&entry->root_extent_list, | 198 | list_add_tail(&entry->root_extent_list, |
| 203 | &BTRFS_I(inode)->root->fs_info->ordered_extents); | 199 | &BTRFS_I(inode)->root->fs_info->ordered_extents); |
| @@ -241,13 +237,10 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
| 241 | struct btrfs_ordered_inode_tree *tree; | 237 | struct btrfs_ordered_inode_tree *tree; |
| 242 | struct rb_node *node; | 238 | struct rb_node *node; |
| 243 | struct btrfs_ordered_extent *entry; | 239 | struct btrfs_ordered_extent *entry; |
| 244 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 245 | int ret; | 240 | int ret; |
| 246 | 241 | ||
| 247 | tree = &BTRFS_I(inode)->ordered_tree; | 242 | tree = &BTRFS_I(inode)->ordered_tree; |
| 248 | mutex_lock(&tree->mutex); | 243 | mutex_lock(&tree->mutex); |
| 249 | clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1, | ||
| 250 | GFP_NOFS); | ||
| 251 | node = tree_search(tree, file_offset); | 244 | node = tree_search(tree, file_offset); |
| 252 | if (!node) { | 245 | if (!node) { |
| 253 | ret = 1; | 246 | ret = 1; |
| @@ -260,11 +253,16 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
| 260 | goto out; | 253 | goto out; |
| 261 | } | 254 | } |
| 262 | 255 | ||
| 263 | ret = test_range_bit(io_tree, entry->file_offset, | 256 | if (io_size > entry->bytes_left) { |
| 264 | entry->file_offset + entry->len - 1, | 257 | printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", |
| 265 | EXTENT_ORDERED, 0); | 258 | (unsigned long long)entry->bytes_left, |
| 266 | if (ret == 0) | 259 | (unsigned long long)io_size); |
| 260 | } | ||
| 261 | entry->bytes_left -= io_size; | ||
| 262 | if (entry->bytes_left == 0) | ||
| 267 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 263 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
| 264 | else | ||
| 265 | ret = 1; | ||
| 268 | out: | 266 | out: |
| 269 | mutex_unlock(&tree->mutex); | 267 | mutex_unlock(&tree->mutex); |
| 270 | return ret == 0; | 268 | return ret == 0; |
| @@ -308,6 +306,12 @@ int btrfs_remove_ordered_extent(struct inode *inode, | |||
| 308 | tree->last = NULL; | 306 | tree->last = NULL; |
| 309 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); | 307 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); |
| 310 | 308 | ||
| 309 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 310 | BTRFS_I(inode)->outstanding_extents--; | ||
| 311 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 312 | btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root, | ||
| 313 | inode, 1); | ||
| 314 | |||
| 311 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 315 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
| 312 | list_del_init(&entry->root_extent_list); | 316 | list_del_init(&entry->root_extent_list); |
| 313 | 317 | ||
| @@ -460,7 +464,7 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
| 460 | * start IO on any dirty ones so the wait doesn't stall waiting | 464 | * start IO on any dirty ones so the wait doesn't stall waiting |
| 461 | * for pdflush to find them | 465 | * for pdflush to find them |
| 462 | */ | 466 | */ |
| 463 | btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_ALL); | 467 | filemap_fdatawrite_range(inode->i_mapping, start, end); |
| 464 | if (wait) { | 468 | if (wait) { |
| 465 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, | 469 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, |
| 466 | &entry->flags)); | 470 | &entry->flags)); |
| @@ -476,6 +480,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
| 476 | u64 orig_end; | 480 | u64 orig_end; |
| 477 | u64 wait_end; | 481 | u64 wait_end; |
| 478 | struct btrfs_ordered_extent *ordered; | 482 | struct btrfs_ordered_extent *ordered; |
| 483 | int found; | ||
| 479 | 484 | ||
| 480 | if (start + len < start) { | 485 | if (start + len < start) { |
| 481 | orig_end = INT_LIMIT(loff_t); | 486 | orig_end = INT_LIMIT(loff_t); |
| @@ -489,19 +494,18 @@ again: | |||
| 489 | /* start IO across the range first to instantiate any delalloc | 494 | /* start IO across the range first to instantiate any delalloc |
| 490 | * extents | 495 | * extents |
| 491 | */ | 496 | */ |
| 492 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); | 497 | filemap_fdatawrite_range(inode->i_mapping, start, orig_end); |
| 493 | 498 | ||
| 494 | /* The compression code will leave pages locked but return from | 499 | /* The compression code will leave pages locked but return from |
| 495 | * writepage without setting the page writeback. Starting again | 500 | * writepage without setting the page writeback. Starting again |
| 496 | * with WB_SYNC_ALL will end up waiting for the IO to actually start. | 501 | * with WB_SYNC_ALL will end up waiting for the IO to actually start. |
| 497 | */ | 502 | */ |
| 498 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); | 503 | filemap_fdatawrite_range(inode->i_mapping, start, orig_end); |
| 499 | 504 | ||
| 500 | btrfs_wait_on_page_writeback_range(inode->i_mapping, | 505 | filemap_fdatawait_range(inode->i_mapping, start, orig_end); |
| 501 | start >> PAGE_CACHE_SHIFT, | ||
| 502 | orig_end >> PAGE_CACHE_SHIFT); | ||
| 503 | 506 | ||
| 504 | end = orig_end; | 507 | end = orig_end; |
| 508 | found = 0; | ||
| 505 | while (1) { | 509 | while (1) { |
| 506 | ordered = btrfs_lookup_first_ordered_extent(inode, end); | 510 | ordered = btrfs_lookup_first_ordered_extent(inode, end); |
| 507 | if (!ordered) | 511 | if (!ordered) |
| @@ -514,6 +518,7 @@ again: | |||
| 514 | btrfs_put_ordered_extent(ordered); | 518 | btrfs_put_ordered_extent(ordered); |
| 515 | break; | 519 | break; |
| 516 | } | 520 | } |
| 521 | found++; | ||
| 517 | btrfs_start_ordered_extent(inode, ordered, 1); | 522 | btrfs_start_ordered_extent(inode, ordered, 1); |
| 518 | end = ordered->file_offset; | 523 | end = ordered->file_offset; |
| 519 | btrfs_put_ordered_extent(ordered); | 524 | btrfs_put_ordered_extent(ordered); |
| @@ -521,8 +526,8 @@ again: | |||
| 521 | break; | 526 | break; |
| 522 | end--; | 527 | end--; |
| 523 | } | 528 | } |
| 524 | if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, | 529 | if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, |
| 525 | EXTENT_ORDERED | EXTENT_DELALLOC, 0)) { | 530 | EXTENT_DELALLOC, 0, NULL)) { |
| 526 | schedule_timeout(1); | 531 | schedule_timeout(1); |
| 527 | goto again; | 532 | goto again; |
| 528 | } | 533 | } |
| @@ -613,7 +618,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, | |||
| 613 | */ | 618 | */ |
| 614 | if (test_range_bit(io_tree, disk_i_size, | 619 | if (test_range_bit(io_tree, disk_i_size, |
| 615 | ordered->file_offset + ordered->len - 1, | 620 | ordered->file_offset + ordered->len - 1, |
| 616 | EXTENT_DELALLOC, 0)) { | 621 | EXTENT_DELALLOC, 0, NULL)) { |
| 617 | goto out; | 622 | goto out; |
| 618 | } | 623 | } |
| 619 | /* | 624 | /* |
| @@ -664,7 +669,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, | |||
| 664 | */ | 669 | */ |
| 665 | if (i_size_test > entry_end(ordered) && | 670 | if (i_size_test > entry_end(ordered) && |
| 666 | !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, | 671 | !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, |
| 667 | EXTENT_DELALLOC, 0)) { | 672 | EXTENT_DELALLOC, 0, NULL)) { |
| 668 | new_i_size = min_t(u64, i_size_test, i_size_read(inode)); | 673 | new_i_size = min_t(u64, i_size_test, i_size_read(inode)); |
| 669 | } | 674 | } |
| 670 | BTRFS_I(inode)->disk_i_size = new_i_size; | 675 | BTRFS_I(inode)->disk_i_size = new_i_size; |
| @@ -715,90 +720,6 @@ out: | |||
| 715 | } | 720 | } |
| 716 | 721 | ||
| 717 | 722 | ||
| 718 | /** | ||
| 719 | * taken from mm/filemap.c because it isn't exported | ||
| 720 | * | ||
| 721 | * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range | ||
| 722 | * @mapping: address space structure to write | ||
| 723 | * @start: offset in bytes where the range starts | ||
| 724 | * @end: offset in bytes where the range ends (inclusive) | ||
| 725 | * @sync_mode: enable synchronous operation | ||
| 726 | * | ||
| 727 | * Start writeback against all of a mapping's dirty pages that lie | ||
| 728 | * within the byte offsets <start, end> inclusive. | ||
| 729 | * | ||
| 730 | * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as | ||
| 731 | * opposed to a regular memory cleansing writeback. The difference between | ||
| 732 | * these two operations is that if a dirty page/buffer is encountered, it must | ||
| 733 | * be waited upon, and not just skipped over. | ||
| 734 | */ | ||
| 735 | int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, | ||
| 736 | loff_t end, int sync_mode) | ||
| 737 | { | ||
| 738 | struct writeback_control wbc = { | ||
| 739 | .sync_mode = sync_mode, | ||
| 740 | .nr_to_write = mapping->nrpages * 2, | ||
| 741 | .range_start = start, | ||
| 742 | .range_end = end, | ||
| 743 | .for_writepages = 1, | ||
| 744 | }; | ||
| 745 | return btrfs_writepages(mapping, &wbc); | ||
| 746 | } | ||
| 747 | |||
| 748 | /** | ||
| 749 | * taken from mm/filemap.c because it isn't exported | ||
| 750 | * | ||
| 751 | * wait_on_page_writeback_range - wait for writeback to complete | ||
| 752 | * @mapping: target address_space | ||
| 753 | * @start: beginning page index | ||
| 754 | * @end: ending page index | ||
| 755 | * | ||
| 756 | * Wait for writeback to complete against pages indexed by start->end | ||
| 757 | * inclusive | ||
| 758 | */ | ||
| 759 | int btrfs_wait_on_page_writeback_range(struct address_space *mapping, | ||
| 760 | pgoff_t start, pgoff_t end) | ||
| 761 | { | ||
| 762 | struct pagevec pvec; | ||
| 763 | int nr_pages; | ||
| 764 | int ret = 0; | ||
| 765 | pgoff_t index; | ||
| 766 | |||
| 767 | if (end < start) | ||
| 768 | return 0; | ||
| 769 | |||
| 770 | pagevec_init(&pvec, 0); | ||
| 771 | index = start; | ||
| 772 | while ((index <= end) && | ||
| 773 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
| 774 | PAGECACHE_TAG_WRITEBACK, | ||
| 775 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) { | ||
| 776 | unsigned i; | ||
| 777 | |||
| 778 | for (i = 0; i < nr_pages; i++) { | ||
| 779 | struct page *page = pvec.pages[i]; | ||
| 780 | |||
| 781 | /* until radix tree lookup accepts end_index */ | ||
| 782 | if (page->index > end) | ||
| 783 | continue; | ||
| 784 | |||
| 785 | wait_on_page_writeback(page); | ||
| 786 | if (PageError(page)) | ||
| 787 | ret = -EIO; | ||
| 788 | } | ||
| 789 | pagevec_release(&pvec); | ||
| 790 | cond_resched(); | ||
| 791 | } | ||
| 792 | |||
| 793 | /* Check for outstanding write errors */ | ||
| 794 | if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) | ||
| 795 | ret = -ENOSPC; | ||
| 796 | if (test_and_clear_bit(AS_EIO, &mapping->flags)) | ||
| 797 | ret = -EIO; | ||
| 798 | |||
| 799 | return ret; | ||
| 800 | } | ||
| 801 | |||
| 802 | /* | 723 | /* |
| 803 | * add a given inode to the list of inodes that must be fully on | 724 | * add a given inode to the list of inodes that must be fully on |
| 804 | * disk before a transaction commit finishes. | 725 | * disk before a transaction commit finishes. |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 3d31c8827b01..f82e87488ca8 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
| @@ -85,6 +85,9 @@ struct btrfs_ordered_extent { | |||
| 85 | /* extent length on disk */ | 85 | /* extent length on disk */ |
| 86 | u64 disk_len; | 86 | u64 disk_len; |
| 87 | 87 | ||
| 88 | /* number of bytes that still need writing */ | ||
| 89 | u64 bytes_left; | ||
| 90 | |||
| 88 | /* flags (described above) */ | 91 | /* flags (described above) */ |
| 89 | unsigned long flags; | 92 | unsigned long flags; |
| 90 | 93 | ||
| @@ -150,10 +153,6 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); | |||
| 150 | int btrfs_ordered_update_i_size(struct inode *inode, | 153 | int btrfs_ordered_update_i_size(struct inode *inode, |
| 151 | struct btrfs_ordered_extent *ordered); | 154 | struct btrfs_ordered_extent *ordered); |
| 152 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); | 155 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); |
| 153 | int btrfs_wait_on_page_writeback_range(struct address_space *mapping, | ||
| 154 | pgoff_t start, pgoff_t end); | ||
| 155 | int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, | ||
| 156 | loff_t end, int sync_mode); | ||
| 157 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); | 156 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); |
| 158 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); | 157 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); |
| 159 | int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | 158 | int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c index 3c0d52af4f80..79cba5fbc28e 100644 --- a/fs/btrfs/orphan.c +++ b/fs/btrfs/orphan.c | |||
| @@ -65,3 +65,23 @@ out: | |||
| 65 | btrfs_free_path(path); | 65 | btrfs_free_path(path); |
| 66 | return ret; | 66 | return ret; |
| 67 | } | 67 | } |
| 68 | |||
| 69 | int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset) | ||
| 70 | { | ||
| 71 | struct btrfs_path *path; | ||
| 72 | struct btrfs_key key; | ||
| 73 | int ret; | ||
| 74 | |||
| 75 | key.objectid = BTRFS_ORPHAN_OBJECTID; | ||
| 76 | key.type = BTRFS_ORPHAN_ITEM_KEY; | ||
| 77 | key.offset = offset; | ||
| 78 | |||
| 79 | path = btrfs_alloc_path(); | ||
| 80 | if (!path) | ||
| 81 | return -ENOMEM; | ||
| 82 | |||
| 83 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 84 | |||
| 85 | btrfs_free_path(path); | ||
| 86 | return ret; | ||
| 87 | } | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index c04f7f212602..cfcc93c93a7b 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
| @@ -121,6 +121,15 @@ struct inodevec { | |||
| 121 | int nr; | 121 | int nr; |
| 122 | }; | 122 | }; |
| 123 | 123 | ||
| 124 | #define MAX_EXTENTS 128 | ||
| 125 | |||
| 126 | struct file_extent_cluster { | ||
| 127 | u64 start; | ||
| 128 | u64 end; | ||
| 129 | u64 boundary[MAX_EXTENTS]; | ||
| 130 | unsigned int nr; | ||
| 131 | }; | ||
| 132 | |||
| 124 | struct reloc_control { | 133 | struct reloc_control { |
| 125 | /* block group to relocate */ | 134 | /* block group to relocate */ |
| 126 | struct btrfs_block_group_cache *block_group; | 135 | struct btrfs_block_group_cache *block_group; |
| @@ -2180,7 +2189,7 @@ static int tree_block_processed(u64 bytenr, u32 blocksize, | |||
| 2180 | struct reloc_control *rc) | 2189 | struct reloc_control *rc) |
| 2181 | { | 2190 | { |
| 2182 | if (test_range_bit(&rc->processed_blocks, bytenr, | 2191 | if (test_range_bit(&rc->processed_blocks, bytenr, |
| 2183 | bytenr + blocksize - 1, EXTENT_DIRTY, 1)) | 2192 | bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) |
| 2184 | return 1; | 2193 | return 1; |
| 2185 | return 0; | 2194 | return 0; |
| 2186 | } | 2195 | } |
| @@ -2529,56 +2538,94 @@ out: | |||
| 2529 | } | 2538 | } |
| 2530 | 2539 | ||
| 2531 | static noinline_for_stack | 2540 | static noinline_for_stack |
| 2532 | int relocate_inode_pages(struct inode *inode, u64 start, u64 len) | 2541 | int setup_extent_mapping(struct inode *inode, u64 start, u64 end, |
| 2542 | u64 block_start) | ||
| 2543 | { | ||
| 2544 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 2545 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 2546 | struct extent_map *em; | ||
| 2547 | int ret = 0; | ||
| 2548 | |||
| 2549 | em = alloc_extent_map(GFP_NOFS); | ||
| 2550 | if (!em) | ||
| 2551 | return -ENOMEM; | ||
| 2552 | |||
| 2553 | em->start = start; | ||
| 2554 | em->len = end + 1 - start; | ||
| 2555 | em->block_len = em->len; | ||
| 2556 | em->block_start = block_start; | ||
| 2557 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 2558 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
| 2559 | |||
| 2560 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
| 2561 | while (1) { | ||
| 2562 | write_lock(&em_tree->lock); | ||
| 2563 | ret = add_extent_mapping(em_tree, em); | ||
| 2564 | write_unlock(&em_tree->lock); | ||
| 2565 | if (ret != -EEXIST) { | ||
| 2566 | free_extent_map(em); | ||
| 2567 | break; | ||
| 2568 | } | ||
| 2569 | btrfs_drop_extent_cache(inode, start, end, 0); | ||
| 2570 | } | ||
| 2571 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
| 2572 | return ret; | ||
| 2573 | } | ||
| 2574 | |||
| 2575 | static int relocate_file_extent_cluster(struct inode *inode, | ||
| 2576 | struct file_extent_cluster *cluster) | ||
| 2533 | { | 2577 | { |
| 2534 | u64 page_start; | 2578 | u64 page_start; |
| 2535 | u64 page_end; | 2579 | u64 page_end; |
| 2536 | unsigned long i; | 2580 | u64 offset = BTRFS_I(inode)->index_cnt; |
| 2537 | unsigned long first_index; | 2581 | unsigned long index; |
| 2538 | unsigned long last_index; | 2582 | unsigned long last_index; |
| 2539 | unsigned int total_read = 0; | 2583 | unsigned int dirty_page = 0; |
| 2540 | unsigned int total_dirty = 0; | ||
| 2541 | struct page *page; | 2584 | struct page *page; |
| 2542 | struct file_ra_state *ra; | 2585 | struct file_ra_state *ra; |
| 2543 | struct btrfs_ordered_extent *ordered; | 2586 | int nr = 0; |
| 2544 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 2545 | int ret = 0; | 2587 | int ret = 0; |
| 2546 | 2588 | ||
| 2589 | if (!cluster->nr) | ||
| 2590 | return 0; | ||
| 2591 | |||
| 2547 | ra = kzalloc(sizeof(*ra), GFP_NOFS); | 2592 | ra = kzalloc(sizeof(*ra), GFP_NOFS); |
| 2548 | if (!ra) | 2593 | if (!ra) |
| 2549 | return -ENOMEM; | 2594 | return -ENOMEM; |
| 2550 | 2595 | ||
| 2596 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; | ||
| 2597 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; | ||
| 2598 | |||
| 2551 | mutex_lock(&inode->i_mutex); | 2599 | mutex_lock(&inode->i_mutex); |
| 2552 | first_index = start >> PAGE_CACHE_SHIFT; | ||
| 2553 | last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; | ||
| 2554 | 2600 | ||
| 2555 | /* make sure the dirty trick played by the caller work */ | 2601 | i_size_write(inode, cluster->end + 1 - offset); |
| 2556 | while (1) { | 2602 | ret = setup_extent_mapping(inode, cluster->start - offset, |
| 2557 | ret = invalidate_inode_pages2_range(inode->i_mapping, | 2603 | cluster->end - offset, cluster->start); |
| 2558 | first_index, last_index); | ||
| 2559 | if (ret != -EBUSY) | ||
| 2560 | break; | ||
| 2561 | schedule_timeout(HZ/10); | ||
| 2562 | } | ||
| 2563 | if (ret) | 2604 | if (ret) |
| 2564 | goto out_unlock; | 2605 | goto out_unlock; |
| 2565 | 2606 | ||
| 2566 | file_ra_state_init(ra, inode->i_mapping); | 2607 | file_ra_state_init(ra, inode->i_mapping); |
| 2567 | 2608 | ||
| 2568 | for (i = first_index ; i <= last_index; i++) { | 2609 | WARN_ON(cluster->start != cluster->boundary[0]); |
| 2569 | if (total_read % ra->ra_pages == 0) { | 2610 | while (index <= last_index) { |
| 2570 | btrfs_force_ra(inode->i_mapping, ra, NULL, i, | 2611 | page = find_lock_page(inode->i_mapping, index); |
| 2571 | min(last_index, ra->ra_pages + i - 1)); | ||
| 2572 | } | ||
| 2573 | total_read++; | ||
| 2574 | again: | ||
| 2575 | if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) | ||
| 2576 | BUG_ON(1); | ||
| 2577 | page = grab_cache_page(inode->i_mapping, i); | ||
| 2578 | if (!page) { | 2612 | if (!page) { |
| 2579 | ret = -ENOMEM; | 2613 | page_cache_sync_readahead(inode->i_mapping, |
| 2580 | goto out_unlock; | 2614 | ra, NULL, index, |
| 2615 | last_index + 1 - index); | ||
| 2616 | page = grab_cache_page(inode->i_mapping, index); | ||
| 2617 | if (!page) { | ||
| 2618 | ret = -ENOMEM; | ||
| 2619 | goto out_unlock; | ||
| 2620 | } | ||
| 2621 | } | ||
| 2622 | |||
| 2623 | if (PageReadahead(page)) { | ||
| 2624 | page_cache_async_readahead(inode->i_mapping, | ||
| 2625 | ra, NULL, page, index, | ||
| 2626 | last_index + 1 - index); | ||
| 2581 | } | 2627 | } |
| 2628 | |||
| 2582 | if (!PageUptodate(page)) { | 2629 | if (!PageUptodate(page)) { |
| 2583 | btrfs_readpage(NULL, page); | 2630 | btrfs_readpage(NULL, page); |
| 2584 | lock_page(page); | 2631 | lock_page(page); |
| @@ -2589,75 +2636,79 @@ again: | |||
| 2589 | goto out_unlock; | 2636 | goto out_unlock; |
| 2590 | } | 2637 | } |
| 2591 | } | 2638 | } |
| 2592 | wait_on_page_writeback(page); | ||
| 2593 | 2639 | ||
| 2594 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; | 2640 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; |
| 2595 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 2641 | page_end = page_start + PAGE_CACHE_SIZE - 1; |
| 2596 | lock_extent(io_tree, page_start, page_end, GFP_NOFS); | 2642 | |
| 2597 | 2643 | lock_extent(&BTRFS_I(inode)->io_tree, | |
| 2598 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 2644 | page_start, page_end, GFP_NOFS); |
| 2599 | if (ordered) { | 2645 | |
| 2600 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 2601 | unlock_page(page); | ||
| 2602 | page_cache_release(page); | ||
| 2603 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
| 2604 | btrfs_put_ordered_extent(ordered); | ||
| 2605 | goto again; | ||
| 2606 | } | ||
| 2607 | set_page_extent_mapped(page); | 2646 | set_page_extent_mapped(page); |
| 2608 | 2647 | ||
| 2609 | if (i == first_index) | 2648 | if (nr < cluster->nr && |
| 2610 | set_extent_bits(io_tree, page_start, page_end, | 2649 | page_start + offset == cluster->boundary[nr]) { |
| 2650 | set_extent_bits(&BTRFS_I(inode)->io_tree, | ||
| 2651 | page_start, page_end, | ||
| 2611 | EXTENT_BOUNDARY, GFP_NOFS); | 2652 | EXTENT_BOUNDARY, GFP_NOFS); |
| 2653 | nr++; | ||
| 2654 | } | ||
| 2612 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 2655 | btrfs_set_extent_delalloc(inode, page_start, page_end); |
| 2613 | 2656 | ||
| 2614 | set_page_dirty(page); | 2657 | set_page_dirty(page); |
| 2615 | total_dirty++; | 2658 | dirty_page++; |
| 2616 | 2659 | ||
| 2617 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 2660 | unlock_extent(&BTRFS_I(inode)->io_tree, |
| 2661 | page_start, page_end, GFP_NOFS); | ||
| 2618 | unlock_page(page); | 2662 | unlock_page(page); |
| 2619 | page_cache_release(page); | 2663 | page_cache_release(page); |
| 2664 | |||
| 2665 | index++; | ||
| 2666 | if (nr < cluster->nr && | ||
| 2667 | page_end + 1 + offset == cluster->boundary[nr]) { | ||
| 2668 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
| 2669 | dirty_page); | ||
| 2670 | dirty_page = 0; | ||
| 2671 | } | ||
| 2672 | } | ||
| 2673 | if (dirty_page) { | ||
| 2674 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
| 2675 | dirty_page); | ||
| 2620 | } | 2676 | } |
| 2677 | WARN_ON(nr != cluster->nr); | ||
| 2621 | out_unlock: | 2678 | out_unlock: |
| 2622 | mutex_unlock(&inode->i_mutex); | 2679 | mutex_unlock(&inode->i_mutex); |
| 2623 | kfree(ra); | 2680 | kfree(ra); |
| 2624 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); | ||
| 2625 | return ret; | 2681 | return ret; |
| 2626 | } | 2682 | } |
| 2627 | 2683 | ||
| 2628 | static noinline_for_stack | 2684 | static noinline_for_stack |
| 2629 | int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key) | 2685 | int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key, |
| 2686 | struct file_extent_cluster *cluster) | ||
| 2630 | { | 2687 | { |
| 2631 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2688 | int ret; |
| 2632 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 2633 | struct extent_map *em; | ||
| 2634 | u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt; | ||
| 2635 | u64 end = start + extent_key->offset - 1; | ||
| 2636 | |||
| 2637 | em = alloc_extent_map(GFP_NOFS); | ||
| 2638 | em->start = start; | ||
| 2639 | em->len = extent_key->offset; | ||
| 2640 | em->block_len = extent_key->offset; | ||
| 2641 | em->block_start = extent_key->objectid; | ||
| 2642 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 2643 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
| 2644 | 2689 | ||
| 2645 | /* setup extent map to cheat btrfs_readpage */ | 2690 | if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) { |
| 2646 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | 2691 | ret = relocate_file_extent_cluster(inode, cluster); |
| 2647 | while (1) { | 2692 | if (ret) |
| 2648 | int ret; | 2693 | return ret; |
| 2649 | spin_lock(&em_tree->lock); | 2694 | cluster->nr = 0; |
| 2650 | ret = add_extent_mapping(em_tree, em); | ||
| 2651 | spin_unlock(&em_tree->lock); | ||
| 2652 | if (ret != -EEXIST) { | ||
| 2653 | free_extent_map(em); | ||
| 2654 | break; | ||
| 2655 | } | ||
| 2656 | btrfs_drop_extent_cache(inode, start, end, 0); | ||
| 2657 | } | 2695 | } |
| 2658 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
| 2659 | 2696 | ||
| 2660 | return relocate_inode_pages(inode, start, extent_key->offset); | 2697 | if (!cluster->nr) |
| 2698 | cluster->start = extent_key->objectid; | ||
| 2699 | else | ||
| 2700 | BUG_ON(cluster->nr >= MAX_EXTENTS); | ||
| 2701 | cluster->end = extent_key->objectid + extent_key->offset - 1; | ||
| 2702 | cluster->boundary[cluster->nr] = extent_key->objectid; | ||
| 2703 | cluster->nr++; | ||
| 2704 | |||
| 2705 | if (cluster->nr >= MAX_EXTENTS) { | ||
| 2706 | ret = relocate_file_extent_cluster(inode, cluster); | ||
| 2707 | if (ret) | ||
| 2708 | return ret; | ||
| 2709 | cluster->nr = 0; | ||
| 2710 | } | ||
| 2711 | return 0; | ||
| 2661 | } | 2712 | } |
| 2662 | 2713 | ||
| 2663 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | 2714 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
| @@ -3203,10 +3254,12 @@ static int check_extent_flags(u64 flags) | |||
| 3203 | return 0; | 3254 | return 0; |
| 3204 | } | 3255 | } |
| 3205 | 3256 | ||
| 3257 | |||
| 3206 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | 3258 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) |
| 3207 | { | 3259 | { |
| 3208 | struct rb_root blocks = RB_ROOT; | 3260 | struct rb_root blocks = RB_ROOT; |
| 3209 | struct btrfs_key key; | 3261 | struct btrfs_key key; |
| 3262 | struct file_extent_cluster *cluster; | ||
| 3210 | struct btrfs_trans_handle *trans = NULL; | 3263 | struct btrfs_trans_handle *trans = NULL; |
| 3211 | struct btrfs_path *path; | 3264 | struct btrfs_path *path; |
| 3212 | struct btrfs_extent_item *ei; | 3265 | struct btrfs_extent_item *ei; |
| @@ -3216,10 +3269,17 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3216 | int ret; | 3269 | int ret; |
| 3217 | int err = 0; | 3270 | int err = 0; |
| 3218 | 3271 | ||
| 3272 | cluster = kzalloc(sizeof(*cluster), GFP_NOFS); | ||
| 3273 | if (!cluster) | ||
| 3274 | return -ENOMEM; | ||
| 3275 | |||
| 3219 | path = btrfs_alloc_path(); | 3276 | path = btrfs_alloc_path(); |
| 3220 | if (!path) | 3277 | if (!path) |
| 3221 | return -ENOMEM; | 3278 | return -ENOMEM; |
| 3222 | 3279 | ||
| 3280 | rc->extents_found = 0; | ||
| 3281 | rc->extents_skipped = 0; | ||
| 3282 | |||
| 3223 | rc->search_start = rc->block_group->key.objectid; | 3283 | rc->search_start = rc->block_group->key.objectid; |
| 3224 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, | 3284 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, |
| 3225 | GFP_NOFS); | 3285 | GFP_NOFS); |
| @@ -3306,14 +3366,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3306 | } | 3366 | } |
| 3307 | 3367 | ||
| 3308 | nr = trans->blocks_used; | 3368 | nr = trans->blocks_used; |
| 3309 | btrfs_end_transaction_throttle(trans, rc->extent_root); | 3369 | btrfs_end_transaction(trans, rc->extent_root); |
| 3310 | trans = NULL; | 3370 | trans = NULL; |
| 3311 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3371 | btrfs_btree_balance_dirty(rc->extent_root, nr); |
| 3312 | 3372 | ||
| 3313 | if (rc->stage == MOVE_DATA_EXTENTS && | 3373 | if (rc->stage == MOVE_DATA_EXTENTS && |
| 3314 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | 3374 | (flags & BTRFS_EXTENT_FLAG_DATA)) { |
| 3315 | rc->found_file_extent = 1; | 3375 | rc->found_file_extent = 1; |
| 3316 | ret = relocate_data_extent(rc->data_inode, &key); | 3376 | ret = relocate_data_extent(rc->data_inode, |
| 3377 | &key, cluster); | ||
| 3317 | if (ret < 0) { | 3378 | if (ret < 0) { |
| 3318 | err = ret; | 3379 | err = ret; |
| 3319 | break; | 3380 | break; |
| @@ -3328,6 +3389,14 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3328 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3389 | btrfs_btree_balance_dirty(rc->extent_root, nr); |
| 3329 | } | 3390 | } |
| 3330 | 3391 | ||
| 3392 | if (!err) { | ||
| 3393 | ret = relocate_file_extent_cluster(rc->data_inode, cluster); | ||
| 3394 | if (ret < 0) | ||
| 3395 | err = ret; | ||
| 3396 | } | ||
| 3397 | |||
| 3398 | kfree(cluster); | ||
| 3399 | |||
| 3331 | rc->create_reloc_root = 0; | 3400 | rc->create_reloc_root = 0; |
| 3332 | smp_mb(); | 3401 | smp_mb(); |
| 3333 | 3402 | ||
| @@ -3348,8 +3417,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3348 | } | 3417 | } |
| 3349 | 3418 | ||
| 3350 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | 3419 | static int __insert_orphan_inode(struct btrfs_trans_handle *trans, |
| 3351 | struct btrfs_root *root, | 3420 | struct btrfs_root *root, u64 objectid) |
| 3352 | u64 objectid, u64 size) | ||
| 3353 | { | 3421 | { |
| 3354 | struct btrfs_path *path; | 3422 | struct btrfs_path *path; |
| 3355 | struct btrfs_inode_item *item; | 3423 | struct btrfs_inode_item *item; |
| @@ -3368,7 +3436,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | |||
| 3368 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); | 3436 | item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); |
| 3369 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); | 3437 | memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); |
| 3370 | btrfs_set_inode_generation(leaf, item, 1); | 3438 | btrfs_set_inode_generation(leaf, item, 1); |
| 3371 | btrfs_set_inode_size(leaf, item, size); | 3439 | btrfs_set_inode_size(leaf, item, 0); |
| 3372 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | 3440 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); |
| 3373 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); | 3441 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); |
| 3374 | btrfs_mark_buffer_dirty(leaf); | 3442 | btrfs_mark_buffer_dirty(leaf); |
| @@ -3404,12 +3472,7 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
| 3404 | if (err) | 3472 | if (err) |
| 3405 | goto out; | 3473 | goto out; |
| 3406 | 3474 | ||
| 3407 | err = __insert_orphan_inode(trans, root, objectid, group->key.offset); | 3475 | err = __insert_orphan_inode(trans, root, objectid); |
| 3408 | BUG_ON(err); | ||
| 3409 | |||
| 3410 | err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, | ||
| 3411 | group->key.offset, 0, group->key.offset, | ||
| 3412 | 0, 0, 0); | ||
| 3413 | BUG_ON(err); | 3476 | BUG_ON(err); |
| 3414 | 3477 | ||
| 3415 | key.objectid = objectid; | 3478 | key.objectid = objectid; |
| @@ -3455,7 +3518,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 3455 | BUG_ON(!rc->block_group); | 3518 | BUG_ON(!rc->block_group); |
| 3456 | 3519 | ||
| 3457 | btrfs_init_workers(&rc->workers, "relocate", | 3520 | btrfs_init_workers(&rc->workers, "relocate", |
| 3458 | fs_info->thread_pool_size); | 3521 | fs_info->thread_pool_size, NULL); |
| 3459 | 3522 | ||
| 3460 | rc->extent_root = extent_root; | 3523 | rc->extent_root = extent_root; |
| 3461 | btrfs_prepare_block_group_relocation(extent_root, rc->block_group); | 3524 | btrfs_prepare_block_group_relocation(extent_root, rc->block_group); |
| @@ -3475,14 +3538,15 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 3475 | btrfs_wait_ordered_extents(fs_info->tree_root, 0); | 3538 | btrfs_wait_ordered_extents(fs_info->tree_root, 0); |
| 3476 | 3539 | ||
| 3477 | while (1) { | 3540 | while (1) { |
| 3478 | mutex_lock(&fs_info->cleaner_mutex); | ||
| 3479 | btrfs_clean_old_snapshots(fs_info->tree_root); | ||
| 3480 | mutex_unlock(&fs_info->cleaner_mutex); | ||
| 3481 | |||
| 3482 | rc->extents_found = 0; | 3541 | rc->extents_found = 0; |
| 3483 | rc->extents_skipped = 0; | 3542 | rc->extents_skipped = 0; |
| 3484 | 3543 | ||
| 3544 | mutex_lock(&fs_info->cleaner_mutex); | ||
| 3545 | |||
| 3546 | btrfs_clean_old_snapshots(fs_info->tree_root); | ||
| 3485 | ret = relocate_block_group(rc); | 3547 | ret = relocate_block_group(rc); |
| 3548 | |||
| 3549 | mutex_unlock(&fs_info->cleaner_mutex); | ||
| 3486 | if (ret < 0) { | 3550 | if (ret < 0) { |
| 3487 | err = ret; | 3551 | err = ret; |
| 3488 | break; | 3552 | break; |
| @@ -3514,10 +3578,10 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 3514 | } | 3578 | } |
| 3515 | } | 3579 | } |
| 3516 | 3580 | ||
| 3517 | filemap_fdatawrite_range(fs_info->btree_inode->i_mapping, | 3581 | filemap_write_and_wait_range(fs_info->btree_inode->i_mapping, |
| 3518 | rc->block_group->key.objectid, | 3582 | rc->block_group->key.objectid, |
| 3519 | rc->block_group->key.objectid + | 3583 | rc->block_group->key.objectid + |
| 3520 | rc->block_group->key.offset - 1); | 3584 | rc->block_group->key.offset - 1); |
| 3521 | 3585 | ||
| 3522 | WARN_ON(rc->block_group->pinned > 0); | 3586 | WARN_ON(rc->block_group->pinned > 0); |
| 3523 | WARN_ON(rc->block_group->reserved > 0); | 3587 | WARN_ON(rc->block_group->reserved > 0); |
| @@ -3530,6 +3594,26 @@ out: | |||
| 3530 | return err; | 3594 | return err; |
| 3531 | } | 3595 | } |
| 3532 | 3596 | ||
| 3597 | static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) | ||
| 3598 | { | ||
| 3599 | struct btrfs_trans_handle *trans; | ||
| 3600 | int ret; | ||
| 3601 | |||
| 3602 | trans = btrfs_start_transaction(root->fs_info->tree_root, 1); | ||
| 3603 | |||
| 3604 | memset(&root->root_item.drop_progress, 0, | ||
| 3605 | sizeof(root->root_item.drop_progress)); | ||
| 3606 | root->root_item.drop_level = 0; | ||
| 3607 | btrfs_set_root_refs(&root->root_item, 0); | ||
| 3608 | ret = btrfs_update_root(trans, root->fs_info->tree_root, | ||
| 3609 | &root->root_key, &root->root_item); | ||
| 3610 | BUG_ON(ret); | ||
| 3611 | |||
| 3612 | ret = btrfs_end_transaction(trans, root->fs_info->tree_root); | ||
| 3613 | BUG_ON(ret); | ||
| 3614 | return 0; | ||
| 3615 | } | ||
| 3616 | |||
| 3533 | /* | 3617 | /* |
| 3534 | * recover relocation interrupted by system crash. | 3618 | * recover relocation interrupted by system crash. |
| 3535 | * | 3619 | * |
| @@ -3589,8 +3673,12 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
| 3589 | fs_root = read_fs_root(root->fs_info, | 3673 | fs_root = read_fs_root(root->fs_info, |
| 3590 | reloc_root->root_key.offset); | 3674 | reloc_root->root_key.offset); |
| 3591 | if (IS_ERR(fs_root)) { | 3675 | if (IS_ERR(fs_root)) { |
| 3592 | err = PTR_ERR(fs_root); | 3676 | ret = PTR_ERR(fs_root); |
| 3593 | goto out; | 3677 | if (ret != -ENOENT) { |
| 3678 | err = ret; | ||
| 3679 | goto out; | ||
| 3680 | } | ||
| 3681 | mark_garbage_root(reloc_root); | ||
| 3594 | } | 3682 | } |
| 3595 | } | 3683 | } |
| 3596 | 3684 | ||
| @@ -3613,7 +3701,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
| 3613 | mapping_tree_init(&rc->reloc_root_tree); | 3701 | mapping_tree_init(&rc->reloc_root_tree); |
| 3614 | INIT_LIST_HEAD(&rc->reloc_roots); | 3702 | INIT_LIST_HEAD(&rc->reloc_roots); |
| 3615 | btrfs_init_workers(&rc->workers, "relocate", | 3703 | btrfs_init_workers(&rc->workers, "relocate", |
| 3616 | root->fs_info->thread_pool_size); | 3704 | root->fs_info->thread_pool_size, NULL); |
| 3617 | rc->extent_root = root->fs_info->extent_root; | 3705 | rc->extent_root = root->fs_info->extent_root; |
| 3618 | 3706 | ||
| 3619 | set_reloc_control(rc); | 3707 | set_reloc_control(rc); |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 0ddc6d61c55a..9351428f30e2 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
| @@ -94,17 +94,23 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, | |||
| 94 | goto out; | 94 | goto out; |
| 95 | 95 | ||
| 96 | BUG_ON(ret == 0); | 96 | BUG_ON(ret == 0); |
| 97 | if (path->slots[0] == 0) { | ||
| 98 | ret = 1; | ||
| 99 | goto out; | ||
| 100 | } | ||
| 97 | l = path->nodes[0]; | 101 | l = path->nodes[0]; |
| 98 | BUG_ON(path->slots[0] == 0); | ||
| 99 | slot = path->slots[0] - 1; | 102 | slot = path->slots[0] - 1; |
| 100 | btrfs_item_key_to_cpu(l, &found_key, slot); | 103 | btrfs_item_key_to_cpu(l, &found_key, slot); |
| 101 | if (found_key.objectid != objectid) { | 104 | if (found_key.objectid != objectid || |
| 105 | found_key.type != BTRFS_ROOT_ITEM_KEY) { | ||
| 102 | ret = 1; | 106 | ret = 1; |
| 103 | goto out; | 107 | goto out; |
| 104 | } | 108 | } |
| 105 | read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), | 109 | if (item) |
| 106 | sizeof(*item)); | 110 | read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), |
| 107 | memcpy(key, &found_key, sizeof(found_key)); | 111 | sizeof(*item)); |
| 112 | if (key) | ||
| 113 | memcpy(key, &found_key, sizeof(found_key)); | ||
| 108 | ret = 0; | 114 | ret = 0; |
| 109 | out: | 115 | out: |
| 110 | btrfs_free_path(path); | 116 | btrfs_free_path(path); |
| @@ -249,6 +255,59 @@ err: | |||
| 249 | return ret; | 255 | return ret; |
| 250 | } | 256 | } |
| 251 | 257 | ||
| 258 | int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | ||
| 259 | { | ||
| 260 | struct extent_buffer *leaf; | ||
| 261 | struct btrfs_path *path; | ||
| 262 | struct btrfs_key key; | ||
| 263 | int err = 0; | ||
| 264 | int ret; | ||
| 265 | |||
| 266 | path = btrfs_alloc_path(); | ||
| 267 | if (!path) | ||
| 268 | return -ENOMEM; | ||
| 269 | |||
| 270 | key.objectid = BTRFS_ORPHAN_OBJECTID; | ||
| 271 | key.type = BTRFS_ORPHAN_ITEM_KEY; | ||
| 272 | key.offset = 0; | ||
| 273 | |||
| 274 | while (1) { | ||
| 275 | ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); | ||
| 276 | if (ret < 0) { | ||
| 277 | err = ret; | ||
| 278 | break; | ||
| 279 | } | ||
| 280 | |||
| 281 | leaf = path->nodes[0]; | ||
| 282 | if (path->slots[0] >= btrfs_header_nritems(leaf)) { | ||
| 283 | ret = btrfs_next_leaf(tree_root, path); | ||
| 284 | if (ret < 0) | ||
| 285 | err = ret; | ||
| 286 | if (ret != 0) | ||
| 287 | break; | ||
| 288 | leaf = path->nodes[0]; | ||
| 289 | } | ||
| 290 | |||
| 291 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
| 292 | btrfs_release_path(tree_root, path); | ||
| 293 | |||
| 294 | if (key.objectid != BTRFS_ORPHAN_OBJECTID || | ||
| 295 | key.type != BTRFS_ORPHAN_ITEM_KEY) | ||
| 296 | break; | ||
| 297 | |||
| 298 | ret = btrfs_find_dead_roots(tree_root, key.offset); | ||
| 299 | if (ret) { | ||
| 300 | err = ret; | ||
| 301 | break; | ||
| 302 | } | ||
| 303 | |||
| 304 | key.offset++; | ||
| 305 | } | ||
| 306 | |||
| 307 | btrfs_free_path(path); | ||
| 308 | return err; | ||
| 309 | } | ||
| 310 | |||
| 252 | /* drop the root item for 'key' from 'root' */ | 311 | /* drop the root item for 'key' from 'root' */ |
| 253 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 312 | int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
| 254 | struct btrfs_key *key) | 313 | struct btrfs_key *key) |
| @@ -278,31 +337,57 @@ out: | |||
| 278 | return ret; | 337 | return ret; |
| 279 | } | 338 | } |
| 280 | 339 | ||
| 281 | #if 0 /* this will get used when snapshot deletion is implemented */ | ||
| 282 | int btrfs_del_root_ref(struct btrfs_trans_handle *trans, | 340 | int btrfs_del_root_ref(struct btrfs_trans_handle *trans, |
| 283 | struct btrfs_root *tree_root, | 341 | struct btrfs_root *tree_root, |
| 284 | u64 root_id, u8 type, u64 ref_id) | 342 | u64 root_id, u64 ref_id, u64 dirid, u64 *sequence, |
| 343 | const char *name, int name_len) | ||
| 344 | |||
| 285 | { | 345 | { |
| 346 | struct btrfs_path *path; | ||
| 347 | struct btrfs_root_ref *ref; | ||
| 348 | struct extent_buffer *leaf; | ||
| 286 | struct btrfs_key key; | 349 | struct btrfs_key key; |
| 350 | unsigned long ptr; | ||
| 351 | int err = 0; | ||
| 287 | int ret; | 352 | int ret; |
| 288 | struct btrfs_path *path; | ||
| 289 | 353 | ||
| 290 | path = btrfs_alloc_path(); | 354 | path = btrfs_alloc_path(); |
| 355 | if (!path) | ||
| 356 | return -ENOMEM; | ||
| 291 | 357 | ||
| 292 | key.objectid = root_id; | 358 | key.objectid = root_id; |
| 293 | key.type = type; | 359 | key.type = BTRFS_ROOT_BACKREF_KEY; |
| 294 | key.offset = ref_id; | 360 | key.offset = ref_id; |
| 295 | 361 | again: | |
| 296 | ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); | 362 | ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); |
| 297 | BUG_ON(ret); | 363 | BUG_ON(ret < 0); |
| 298 | 364 | if (ret == 0) { | |
| 299 | ret = btrfs_del_item(trans, tree_root, path); | 365 | leaf = path->nodes[0]; |
| 300 | BUG_ON(ret); | 366 | ref = btrfs_item_ptr(leaf, path->slots[0], |
| 367 | struct btrfs_root_ref); | ||
| 368 | |||
| 369 | WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid); | ||
| 370 | WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len); | ||
| 371 | ptr = (unsigned long)(ref + 1); | ||
| 372 | WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len)); | ||
| 373 | *sequence = btrfs_root_ref_sequence(leaf, ref); | ||
| 374 | |||
| 375 | ret = btrfs_del_item(trans, tree_root, path); | ||
| 376 | BUG_ON(ret); | ||
| 377 | } else | ||
| 378 | err = -ENOENT; | ||
| 379 | |||
| 380 | if (key.type == BTRFS_ROOT_BACKREF_KEY) { | ||
| 381 | btrfs_release_path(tree_root, path); | ||
| 382 | key.objectid = ref_id; | ||
| 383 | key.type = BTRFS_ROOT_REF_KEY; | ||
| 384 | key.offset = root_id; | ||
| 385 | goto again; | ||
| 386 | } | ||
| 301 | 387 | ||
| 302 | btrfs_free_path(path); | 388 | btrfs_free_path(path); |
| 303 | return ret; | 389 | return err; |
| 304 | } | 390 | } |
| 305 | #endif | ||
| 306 | 391 | ||
| 307 | int btrfs_find_root_ref(struct btrfs_root *tree_root, | 392 | int btrfs_find_root_ref(struct btrfs_root *tree_root, |
| 308 | struct btrfs_path *path, | 393 | struct btrfs_path *path, |
| @@ -319,7 +404,6 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root, | |||
| 319 | return ret; | 404 | return ret; |
| 320 | } | 405 | } |
| 321 | 406 | ||
| 322 | |||
| 323 | /* | 407 | /* |
| 324 | * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY | 408 | * add a btrfs_root_ref item. type is either BTRFS_ROOT_REF_KEY |
| 325 | * or BTRFS_ROOT_BACKREF_KEY. | 409 | * or BTRFS_ROOT_BACKREF_KEY. |
| @@ -335,8 +419,7 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root, | |||
| 335 | */ | 419 | */ |
| 336 | int btrfs_add_root_ref(struct btrfs_trans_handle *trans, | 420 | int btrfs_add_root_ref(struct btrfs_trans_handle *trans, |
| 337 | struct btrfs_root *tree_root, | 421 | struct btrfs_root *tree_root, |
| 338 | u64 root_id, u8 type, u64 ref_id, | 422 | u64 root_id, u64 ref_id, u64 dirid, u64 sequence, |
| 339 | u64 dirid, u64 sequence, | ||
| 340 | const char *name, int name_len) | 423 | const char *name, int name_len) |
| 341 | { | 424 | { |
| 342 | struct btrfs_key key; | 425 | struct btrfs_key key; |
| @@ -346,13 +429,14 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, | |||
| 346 | struct extent_buffer *leaf; | 429 | struct extent_buffer *leaf; |
| 347 | unsigned long ptr; | 430 | unsigned long ptr; |
| 348 | 431 | ||
| 349 | |||
| 350 | path = btrfs_alloc_path(); | 432 | path = btrfs_alloc_path(); |
| 433 | if (!path) | ||
| 434 | return -ENOMEM; | ||
| 351 | 435 | ||
| 352 | key.objectid = root_id; | 436 | key.objectid = root_id; |
| 353 | key.type = type; | 437 | key.type = BTRFS_ROOT_BACKREF_KEY; |
| 354 | key.offset = ref_id; | 438 | key.offset = ref_id; |
| 355 | 439 | again: | |
| 356 | ret = btrfs_insert_empty_item(trans, tree_root, path, &key, | 440 | ret = btrfs_insert_empty_item(trans, tree_root, path, &key, |
| 357 | sizeof(*ref) + name_len); | 441 | sizeof(*ref) + name_len); |
| 358 | BUG_ON(ret); | 442 | BUG_ON(ret); |
| @@ -366,6 +450,14 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, | |||
| 366 | write_extent_buffer(leaf, name, ptr, name_len); | 450 | write_extent_buffer(leaf, name, ptr, name_len); |
| 367 | btrfs_mark_buffer_dirty(leaf); | 451 | btrfs_mark_buffer_dirty(leaf); |
| 368 | 452 | ||
| 453 | if (key.type == BTRFS_ROOT_BACKREF_KEY) { | ||
| 454 | btrfs_release_path(tree_root, path); | ||
| 455 | key.objectid = ref_id; | ||
| 456 | key.type = BTRFS_ROOT_REF_KEY; | ||
| 457 | key.offset = root_id; | ||
| 458 | goto again; | ||
| 459 | } | ||
| 460 | |||
| 369 | btrfs_free_path(path); | 461 | btrfs_free_path(path); |
| 370 | return ret; | 462 | return 0; |
| 371 | } | 463 | } |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6d6d06cb6dfc..9de9b2236419 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -51,7 +51,7 @@ | |||
| 51 | #include "export.h" | 51 | #include "export.h" |
| 52 | #include "compression.h" | 52 | #include "compression.h" |
| 53 | 53 | ||
| 54 | static struct super_operations btrfs_super_ops; | 54 | static const struct super_operations btrfs_super_ops; |
| 55 | 55 | ||
| 56 | static void btrfs_put_super(struct super_block *sb) | 56 | static void btrfs_put_super(struct super_block *sb) |
| 57 | { | 57 | { |
| @@ -344,7 +344,9 @@ static int btrfs_fill_super(struct super_block *sb, | |||
| 344 | sb->s_export_op = &btrfs_export_ops; | 344 | sb->s_export_op = &btrfs_export_ops; |
| 345 | sb->s_xattr = btrfs_xattr_handlers; | 345 | sb->s_xattr = btrfs_xattr_handlers; |
| 346 | sb->s_time_gran = 1; | 346 | sb->s_time_gran = 1; |
| 347 | #ifdef CONFIG_BTRFS_POSIX_ACL | ||
| 347 | sb->s_flags |= MS_POSIXACL; | 348 | sb->s_flags |= MS_POSIXACL; |
| 349 | #endif | ||
| 348 | 350 | ||
| 349 | tree_root = open_ctree(sb, fs_devices, (char *)data); | 351 | tree_root = open_ctree(sb, fs_devices, (char *)data); |
| 350 | 352 | ||
| @@ -675,7 +677,8 @@ static int btrfs_unfreeze(struct super_block *sb) | |||
| 675 | return 0; | 677 | return 0; |
| 676 | } | 678 | } |
| 677 | 679 | ||
| 678 | static struct super_operations btrfs_super_ops = { | 680 | static const struct super_operations btrfs_super_ops = { |
| 681 | .drop_inode = btrfs_drop_inode, | ||
| 679 | .delete_inode = btrfs_delete_inode, | 682 | .delete_inode = btrfs_delete_inode, |
| 680 | .put_super = btrfs_put_super, | 683 | .put_super = btrfs_put_super, |
| 681 | .sync_fs = btrfs_sync_fs, | 684 | .sync_fs = btrfs_sync_fs, |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index cdbb5022da52..0b8f36d4400a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -104,7 +104,6 @@ static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, | |||
| 104 | { | 104 | { |
| 105 | if (root->ref_cows && root->last_trans < trans->transid) { | 105 | if (root->ref_cows && root->last_trans < trans->transid) { |
| 106 | WARN_ON(root == root->fs_info->extent_root); | 106 | WARN_ON(root == root->fs_info->extent_root); |
| 107 | WARN_ON(root->root_item.refs == 0); | ||
| 108 | WARN_ON(root->commit_root != root->node); | 107 | WARN_ON(root->commit_root != root->node); |
| 109 | 108 | ||
| 110 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, | 109 | radix_tree_tag_set(&root->fs_info->fs_roots_radix, |
| @@ -187,6 +186,9 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
| 187 | h->alloc_exclude_start = 0; | 186 | h->alloc_exclude_start = 0; |
| 188 | h->delayed_ref_updates = 0; | 187 | h->delayed_ref_updates = 0; |
| 189 | 188 | ||
| 189 | if (!current->journal_info) | ||
| 190 | current->journal_info = h; | ||
| 191 | |||
| 190 | root->fs_info->running_transaction->use_count++; | 192 | root->fs_info->running_transaction->use_count++; |
| 191 | record_root_in_trans(h, root); | 193 | record_root_in_trans(h, root); |
| 192 | mutex_unlock(&root->fs_info->trans_mutex); | 194 | mutex_unlock(&root->fs_info->trans_mutex); |
| @@ -318,6 +320,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 318 | wake_up(&cur_trans->writer_wait); | 320 | wake_up(&cur_trans->writer_wait); |
| 319 | put_transaction(cur_trans); | 321 | put_transaction(cur_trans); |
| 320 | mutex_unlock(&info->trans_mutex); | 322 | mutex_unlock(&info->trans_mutex); |
| 323 | |||
| 324 | if (current->journal_info == trans) | ||
| 325 | current->journal_info = NULL; | ||
| 321 | memset(trans, 0, sizeof(*trans)); | 326 | memset(trans, 0, sizeof(*trans)); |
| 322 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 327 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
| 323 | 328 | ||
| @@ -720,7 +725,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 720 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 725 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
| 721 | 726 | ||
| 722 | key.objectid = objectid; | 727 | key.objectid = objectid; |
| 723 | key.offset = 0; | 728 | /* record when the snapshot was created in key.offset */ |
| 729 | key.offset = trans->transid; | ||
| 724 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 730 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); |
| 725 | 731 | ||
| 726 | old = btrfs_lock_root_node(root); | 732 | old = btrfs_lock_root_node(root); |
| @@ -743,6 +749,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 743 | memcpy(&pending->root_key, &key, sizeof(key)); | 749 | memcpy(&pending->root_key, &key, sizeof(key)); |
| 744 | fail: | 750 | fail: |
| 745 | kfree(new_root_item); | 751 | kfree(new_root_item); |
| 752 | btrfs_unreserve_metadata_space(root, 6); | ||
| 746 | return ret; | 753 | return ret; |
| 747 | } | 754 | } |
| 748 | 755 | ||
| @@ -778,24 +785,14 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info, | |||
| 778 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | 785 | ret = btrfs_update_inode(trans, parent_root, parent_inode); |
| 779 | BUG_ON(ret); | 786 | BUG_ON(ret); |
| 780 | 787 | ||
| 781 | /* add the backref first */ | ||
| 782 | ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, | 788 | ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, |
| 783 | pending->root_key.objectid, | 789 | pending->root_key.objectid, |
| 784 | BTRFS_ROOT_BACKREF_KEY, | ||
| 785 | parent_root->root_key.objectid, | 790 | parent_root->root_key.objectid, |
| 786 | parent_inode->i_ino, index, pending->name, | 791 | parent_inode->i_ino, index, pending->name, |
| 787 | namelen); | 792 | namelen); |
| 788 | 793 | ||
| 789 | BUG_ON(ret); | 794 | BUG_ON(ret); |
| 790 | 795 | ||
| 791 | /* now add the forward ref */ | ||
| 792 | ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, | ||
| 793 | parent_root->root_key.objectid, | ||
| 794 | BTRFS_ROOT_REF_KEY, | ||
| 795 | pending->root_key.objectid, | ||
| 796 | parent_inode->i_ino, index, pending->name, | ||
| 797 | namelen); | ||
| 798 | |||
| 799 | inode = btrfs_lookup_dentry(parent_inode, pending->dentry); | 796 | inode = btrfs_lookup_dentry(parent_inode, pending->dentry); |
| 800 | d_instantiate(pending->dentry, inode); | 797 | d_instantiate(pending->dentry, inode); |
| 801 | fail: | 798 | fail: |
| @@ -874,7 +871,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 874 | unsigned long timeout = 1; | 871 | unsigned long timeout = 1; |
| 875 | struct btrfs_transaction *cur_trans; | 872 | struct btrfs_transaction *cur_trans; |
| 876 | struct btrfs_transaction *prev_trans = NULL; | 873 | struct btrfs_transaction *prev_trans = NULL; |
| 877 | struct extent_io_tree *pinned_copy; | ||
| 878 | DEFINE_WAIT(wait); | 874 | DEFINE_WAIT(wait); |
| 879 | int ret; | 875 | int ret; |
| 880 | int should_grow = 0; | 876 | int should_grow = 0; |
| @@ -915,13 +911,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 915 | return 0; | 911 | return 0; |
| 916 | } | 912 | } |
| 917 | 913 | ||
| 918 | pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS); | ||
| 919 | if (!pinned_copy) | ||
| 920 | return -ENOMEM; | ||
| 921 | |||
| 922 | extent_io_tree_init(pinned_copy, | ||
| 923 | root->fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
| 924 | |||
| 925 | trans->transaction->in_commit = 1; | 914 | trans->transaction->in_commit = 1; |
| 926 | trans->transaction->blocked = 1; | 915 | trans->transaction->blocked = 1; |
| 927 | if (cur_trans->list.prev != &root->fs_info->trans_list) { | 916 | if (cur_trans->list.prev != &root->fs_info->trans_list) { |
| @@ -1019,6 +1008,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1019 | ret = commit_cowonly_roots(trans, root); | 1008 | ret = commit_cowonly_roots(trans, root); |
| 1020 | BUG_ON(ret); | 1009 | BUG_ON(ret); |
| 1021 | 1010 | ||
| 1011 | btrfs_prepare_extent_commit(trans, root); | ||
| 1012 | |||
| 1022 | cur_trans = root->fs_info->running_transaction; | 1013 | cur_trans = root->fs_info->running_transaction; |
| 1023 | spin_lock(&root->fs_info->new_trans_lock); | 1014 | spin_lock(&root->fs_info->new_trans_lock); |
| 1024 | root->fs_info->running_transaction = NULL; | 1015 | root->fs_info->running_transaction = NULL; |
| @@ -1042,8 +1033,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1042 | memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, | 1033 | memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, |
| 1043 | sizeof(root->fs_info->super_copy)); | 1034 | sizeof(root->fs_info->super_copy)); |
| 1044 | 1035 | ||
| 1045 | btrfs_copy_pinned(root, pinned_copy); | ||
| 1046 | |||
| 1047 | trans->transaction->blocked = 0; | 1036 | trans->transaction->blocked = 0; |
| 1048 | 1037 | ||
| 1049 | wake_up(&root->fs_info->transaction_wait); | 1038 | wake_up(&root->fs_info->transaction_wait); |
| @@ -1059,8 +1048,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1059 | */ | 1048 | */ |
| 1060 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1049 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 1061 | 1050 | ||
| 1062 | btrfs_finish_extent_commit(trans, root, pinned_copy); | 1051 | btrfs_finish_extent_commit(trans, root); |
| 1063 | kfree(pinned_copy); | ||
| 1064 | 1052 | ||
| 1065 | /* do the directory inserts of any pending snapshot creations */ | 1053 | /* do the directory inserts of any pending snapshot creations */ |
| 1066 | finish_pending_snapshots(trans, root->fs_info); | 1054 | finish_pending_snapshots(trans, root->fs_info); |
| @@ -1078,6 +1066,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1078 | 1066 | ||
| 1079 | mutex_unlock(&root->fs_info->trans_mutex); | 1067 | mutex_unlock(&root->fs_info->trans_mutex); |
| 1080 | 1068 | ||
| 1069 | if (current->journal_info == trans) | ||
| 1070 | current->journal_info = NULL; | ||
| 1071 | |||
| 1081 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 1072 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
| 1082 | return ret; | 1073 | return ret; |
| 1083 | } | 1074 | } |
| @@ -1096,8 +1087,13 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) | |||
| 1096 | 1087 | ||
| 1097 | while (!list_empty(&list)) { | 1088 | while (!list_empty(&list)) { |
| 1098 | root = list_entry(list.next, struct btrfs_root, root_list); | 1089 | root = list_entry(list.next, struct btrfs_root, root_list); |
| 1099 | list_del_init(&root->root_list); | 1090 | list_del(&root->root_list); |
| 1100 | btrfs_drop_snapshot(root, 0); | 1091 | |
| 1092 | if (btrfs_header_backref_rev(root->node) < | ||
| 1093 | BTRFS_MIXED_BACKREF_REV) | ||
| 1094 | btrfs_drop_snapshot(root, 0); | ||
| 1095 | else | ||
| 1096 | btrfs_drop_snapshot(root, 1); | ||
| 1101 | } | 1097 | } |
| 1102 | return 0; | 1098 | return 0; |
| 1103 | } | 1099 | } |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d91b0de7c502..4edfdc2acc5f 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
| @@ -137,11 +137,20 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
| 137 | 137 | ||
| 138 | mutex_lock(&root->log_mutex); | 138 | mutex_lock(&root->log_mutex); |
| 139 | if (root->log_root) { | 139 | if (root->log_root) { |
| 140 | if (!root->log_start_pid) { | ||
| 141 | root->log_start_pid = current->pid; | ||
| 142 | root->log_multiple_pids = false; | ||
| 143 | } else if (root->log_start_pid != current->pid) { | ||
| 144 | root->log_multiple_pids = true; | ||
| 145 | } | ||
| 146 | |||
| 140 | root->log_batch++; | 147 | root->log_batch++; |
| 141 | atomic_inc(&root->log_writers); | 148 | atomic_inc(&root->log_writers); |
| 142 | mutex_unlock(&root->log_mutex); | 149 | mutex_unlock(&root->log_mutex); |
| 143 | return 0; | 150 | return 0; |
| 144 | } | 151 | } |
| 152 | root->log_multiple_pids = false; | ||
| 153 | root->log_start_pid = current->pid; | ||
| 145 | mutex_lock(&root->fs_info->tree_log_mutex); | 154 | mutex_lock(&root->fs_info->tree_log_mutex); |
| 146 | if (!root->fs_info->log_root_tree) { | 155 | if (!root->fs_info->log_root_tree) { |
| 147 | ret = btrfs_init_log_root_tree(trans, root->fs_info); | 156 | ret = btrfs_init_log_root_tree(trans, root->fs_info); |
| @@ -263,8 +272,8 @@ static int process_one_buffer(struct btrfs_root *log, | |||
| 263 | struct walk_control *wc, u64 gen) | 272 | struct walk_control *wc, u64 gen) |
| 264 | { | 273 | { |
| 265 | if (wc->pin) | 274 | if (wc->pin) |
| 266 | btrfs_update_pinned_extents(log->fs_info->extent_root, | 275 | btrfs_pin_extent(log->fs_info->extent_root, |
| 267 | eb->start, eb->len, 1); | 276 | eb->start, eb->len, 0); |
| 268 | 277 | ||
| 269 | if (btrfs_buffer_uptodate(eb, gen)) { | 278 | if (btrfs_buffer_uptodate(eb, gen)) { |
| 270 | if (wc->write) | 279 | if (wc->write) |
| @@ -534,7 +543,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
| 534 | saved_nbytes = inode_get_bytes(inode); | 543 | saved_nbytes = inode_get_bytes(inode); |
| 535 | /* drop any overlapping extents */ | 544 | /* drop any overlapping extents */ |
| 536 | ret = btrfs_drop_extents(trans, root, inode, | 545 | ret = btrfs_drop_extents(trans, root, inode, |
| 537 | start, extent_end, extent_end, start, &alloc_hint); | 546 | start, extent_end, extent_end, start, &alloc_hint, 1); |
| 538 | BUG_ON(ret); | 547 | BUG_ON(ret); |
| 539 | 548 | ||
| 540 | if (found_type == BTRFS_FILE_EXTENT_REG || | 549 | if (found_type == BTRFS_FILE_EXTENT_REG || |
| @@ -1985,7 +1994,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 1985 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) | 1994 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) |
| 1986 | wait_log_commit(trans, root, root->log_transid - 1); | 1995 | wait_log_commit(trans, root, root->log_transid - 1); |
| 1987 | 1996 | ||
| 1988 | while (1) { | 1997 | while (root->log_multiple_pids) { |
| 1989 | unsigned long batch = root->log_batch; | 1998 | unsigned long batch = root->log_batch; |
| 1990 | mutex_unlock(&root->log_mutex); | 1999 | mutex_unlock(&root->log_mutex); |
| 1991 | schedule_timeout_uninterruptible(1); | 2000 | schedule_timeout_uninterruptible(1); |
| @@ -2011,6 +2020,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2011 | root->log_batch = 0; | 2020 | root->log_batch = 0; |
| 2012 | root->log_transid++; | 2021 | root->log_transid++; |
| 2013 | log->log_transid = root->log_transid; | 2022 | log->log_transid = root->log_transid; |
| 2023 | root->log_start_pid = 0; | ||
| 2014 | smp_mb(); | 2024 | smp_mb(); |
| 2015 | /* | 2025 | /* |
| 2016 | * log tree has been flushed to disk, new modifications of | 2026 | * log tree has been flushed to disk, new modifications of |
| @@ -2605,7 +2615,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
| 2605 | extent); | 2615 | extent); |
| 2606 | cs = btrfs_file_extent_offset(src, extent); | 2616 | cs = btrfs_file_extent_offset(src, extent); |
| 2607 | cl = btrfs_file_extent_num_bytes(src, | 2617 | cl = btrfs_file_extent_num_bytes(src, |
| 2608 | extent);; | 2618 | extent); |
| 2609 | if (btrfs_file_extent_compression(src, | 2619 | if (btrfs_file_extent_compression(src, |
| 2610 | extent)) { | 2620 | extent)) { |
| 2611 | cs = 0; | 2621 | cs = 0; |
| @@ -2841,7 +2851,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | |||
| 2841 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) | 2851 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) |
| 2842 | break; | 2852 | break; |
| 2843 | 2853 | ||
| 2844 | if (parent == sb->s_root) | 2854 | if (IS_ROOT(parent)) |
| 2845 | break; | 2855 | break; |
| 2846 | 2856 | ||
| 2847 | parent = parent->d_parent; | 2857 | parent = parent->d_parent; |
| @@ -2880,6 +2890,12 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
| 2880 | goto end_no_trans; | 2890 | goto end_no_trans; |
| 2881 | } | 2891 | } |
| 2882 | 2892 | ||
| 2893 | if (root != BTRFS_I(inode)->root || | ||
| 2894 | btrfs_root_refs(&root->root_item) == 0) { | ||
| 2895 | ret = 1; | ||
| 2896 | goto end_no_trans; | ||
| 2897 | } | ||
| 2898 | |||
| 2883 | ret = check_parent_dirs_for_sync(trans, inode, parent, | 2899 | ret = check_parent_dirs_for_sync(trans, inode, parent, |
| 2884 | sb, last_committed); | 2900 | sb, last_committed); |
| 2885 | if (ret) | 2901 | if (ret) |
| @@ -2907,12 +2923,15 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
| 2907 | break; | 2923 | break; |
| 2908 | 2924 | ||
| 2909 | inode = parent->d_inode; | 2925 | inode = parent->d_inode; |
| 2926 | if (root != BTRFS_I(inode)->root) | ||
| 2927 | break; | ||
| 2928 | |||
| 2910 | if (BTRFS_I(inode)->generation > | 2929 | if (BTRFS_I(inode)->generation > |
| 2911 | root->fs_info->last_trans_committed) { | 2930 | root->fs_info->last_trans_committed) { |
| 2912 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 2931 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
| 2913 | BUG_ON(ret); | 2932 | BUG_ON(ret); |
| 2914 | } | 2933 | } |
| 2915 | if (parent == sb->s_root) | 2934 | if (IS_ROOT(parent)) |
| 2916 | break; | 2935 | break; |
| 2917 | 2936 | ||
| 2918 | parent = parent->d_parent; | 2937 | parent = parent->d_parent; |
| @@ -2951,7 +2970,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | |||
| 2951 | struct btrfs_key tmp_key; | 2970 | struct btrfs_key tmp_key; |
| 2952 | struct btrfs_root *log; | 2971 | struct btrfs_root *log; |
| 2953 | struct btrfs_fs_info *fs_info = log_root_tree->fs_info; | 2972 | struct btrfs_fs_info *fs_info = log_root_tree->fs_info; |
| 2954 | u64 highest_inode; | ||
| 2955 | struct walk_control wc = { | 2973 | struct walk_control wc = { |
| 2956 | .process_func = process_one_buffer, | 2974 | .process_func = process_one_buffer, |
| 2957 | .stage = 0, | 2975 | .stage = 0, |
| @@ -3010,11 +3028,6 @@ again: | |||
| 3010 | path); | 3028 | path); |
| 3011 | BUG_ON(ret); | 3029 | BUG_ON(ret); |
| 3012 | } | 3030 | } |
| 3013 | ret = btrfs_find_highest_inode(wc.replay_dest, &highest_inode); | ||
| 3014 | if (ret == 0) { | ||
| 3015 | wc.replay_dest->highest_inode = highest_inode; | ||
| 3016 | wc.replay_dest->last_inode_alloc = highest_inode; | ||
| 3017 | } | ||
| 3018 | 3031 | ||
| 3019 | key.offset = found_key.offset - 1; | 3032 | key.offset = found_key.offset - 1; |
| 3020 | wc.replay_dest->log_root = NULL; | 3033 | wc.replay_dest->log_root = NULL; |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5dbefd11b4af..7eda483d7b5a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -260,7 +260,7 @@ loop_lock: | |||
| 260 | num_run++; | 260 | num_run++; |
| 261 | batch_run++; | 261 | batch_run++; |
| 262 | 262 | ||
| 263 | if (bio_sync(cur)) | 263 | if (bio_rw_flagged(cur, BIO_RW_SYNCIO)) |
| 264 | num_sync_run++; | 264 | num_sync_run++; |
| 265 | 265 | ||
| 266 | if (need_resched()) { | 266 | if (need_resched()) { |
| @@ -276,7 +276,7 @@ loop_lock: | |||
| 276 | * is now congested. Back off and let other work structs | 276 | * is now congested. Back off and let other work structs |
| 277 | * run instead | 277 | * run instead |
| 278 | */ | 278 | */ |
| 279 | if (pending && bdi_write_congested(bdi) && batch_run > 32 && | 279 | if (pending && bdi_write_congested(bdi) && batch_run > 8 && |
| 280 | fs_info->fs_devices->open_devices > 1) { | 280 | fs_info->fs_devices->open_devices > 1) { |
| 281 | struct io_context *ioc; | 281 | struct io_context *ioc; |
| 282 | 282 | ||
| @@ -446,8 +446,10 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
| 446 | goto error; | 446 | goto error; |
| 447 | 447 | ||
| 448 | device->name = kstrdup(orig_dev->name, GFP_NOFS); | 448 | device->name = kstrdup(orig_dev->name, GFP_NOFS); |
| 449 | if (!device->name) | 449 | if (!device->name) { |
| 450 | kfree(device); | ||
| 450 | goto error; | 451 | goto error; |
| 452 | } | ||
| 451 | 453 | ||
| 452 | device->devid = orig_dev->devid; | 454 | device->devid = orig_dev->devid; |
| 453 | device->work.func = pending_bios_fn; | 455 | device->work.func = pending_bios_fn; |
| @@ -719,10 +721,9 @@ error: | |||
| 719 | * called very infrequently and that a given device has a small number | 721 | * called very infrequently and that a given device has a small number |
| 720 | * of extents | 722 | * of extents |
| 721 | */ | 723 | */ |
| 722 | static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, | 724 | int find_free_dev_extent(struct btrfs_trans_handle *trans, |
| 723 | struct btrfs_device *device, | 725 | struct btrfs_device *device, u64 num_bytes, |
| 724 | u64 num_bytes, u64 *start, | 726 | u64 *start, u64 *max_avail) |
| 725 | u64 *max_avail) | ||
| 726 | { | 727 | { |
| 727 | struct btrfs_key key; | 728 | struct btrfs_key key; |
| 728 | struct btrfs_root *root = device->dev_root; | 729 | struct btrfs_root *root = device->dev_root; |
| @@ -1736,6 +1737,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
| 1736 | extent_root = root->fs_info->extent_root; | 1737 | extent_root = root->fs_info->extent_root; |
| 1737 | em_tree = &root->fs_info->mapping_tree.map_tree; | 1738 | em_tree = &root->fs_info->mapping_tree.map_tree; |
| 1738 | 1739 | ||
| 1740 | ret = btrfs_can_relocate(extent_root, chunk_offset); | ||
| 1741 | if (ret) | ||
| 1742 | return -ENOSPC; | ||
| 1743 | |||
| 1739 | /* step one, relocate all the extents inside this chunk */ | 1744 | /* step one, relocate all the extents inside this chunk */ |
| 1740 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); | 1745 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); |
| 1741 | BUG_ON(ret); | 1746 | BUG_ON(ret); |
| @@ -1749,9 +1754,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
| 1749 | * step two, delete the device extents and the | 1754 | * step two, delete the device extents and the |
| 1750 | * chunk tree entries | 1755 | * chunk tree entries |
| 1751 | */ | 1756 | */ |
| 1752 | spin_lock(&em_tree->lock); | 1757 | read_lock(&em_tree->lock); |
| 1753 | em = lookup_extent_mapping(em_tree, chunk_offset, 1); | 1758 | em = lookup_extent_mapping(em_tree, chunk_offset, 1); |
| 1754 | spin_unlock(&em_tree->lock); | 1759 | read_unlock(&em_tree->lock); |
| 1755 | 1760 | ||
| 1756 | BUG_ON(em->start > chunk_offset || | 1761 | BUG_ON(em->start > chunk_offset || |
| 1757 | em->start + em->len < chunk_offset); | 1762 | em->start + em->len < chunk_offset); |
| @@ -1780,9 +1785,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
| 1780 | ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); | 1785 | ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); |
| 1781 | BUG_ON(ret); | 1786 | BUG_ON(ret); |
| 1782 | 1787 | ||
| 1783 | spin_lock(&em_tree->lock); | 1788 | write_lock(&em_tree->lock); |
| 1784 | remove_extent_mapping(em_tree, em); | 1789 | remove_extent_mapping(em_tree, em); |
| 1785 | spin_unlock(&em_tree->lock); | 1790 | write_unlock(&em_tree->lock); |
| 1786 | 1791 | ||
| 1787 | kfree(map); | 1792 | kfree(map); |
| 1788 | em->bdev = NULL; | 1793 | em->bdev = NULL; |
| @@ -1807,12 +1812,15 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) | |||
| 1807 | struct btrfs_key found_key; | 1812 | struct btrfs_key found_key; |
| 1808 | u64 chunk_tree = chunk_root->root_key.objectid; | 1813 | u64 chunk_tree = chunk_root->root_key.objectid; |
| 1809 | u64 chunk_type; | 1814 | u64 chunk_type; |
| 1815 | bool retried = false; | ||
| 1816 | int failed = 0; | ||
| 1810 | int ret; | 1817 | int ret; |
| 1811 | 1818 | ||
| 1812 | path = btrfs_alloc_path(); | 1819 | path = btrfs_alloc_path(); |
| 1813 | if (!path) | 1820 | if (!path) |
| 1814 | return -ENOMEM; | 1821 | return -ENOMEM; |
| 1815 | 1822 | ||
| 1823 | again: | ||
| 1816 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; | 1824 | key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; |
| 1817 | key.offset = (u64)-1; | 1825 | key.offset = (u64)-1; |
| 1818 | key.type = BTRFS_CHUNK_ITEM_KEY; | 1826 | key.type = BTRFS_CHUNK_ITEM_KEY; |
| @@ -1842,7 +1850,10 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) | |||
| 1842 | ret = btrfs_relocate_chunk(chunk_root, chunk_tree, | 1850 | ret = btrfs_relocate_chunk(chunk_root, chunk_tree, |
| 1843 | found_key.objectid, | 1851 | found_key.objectid, |
| 1844 | found_key.offset); | 1852 | found_key.offset); |
| 1845 | BUG_ON(ret); | 1853 | if (ret == -ENOSPC) |
| 1854 | failed++; | ||
| 1855 | else if (ret) | ||
| 1856 | BUG(); | ||
| 1846 | } | 1857 | } |
| 1847 | 1858 | ||
| 1848 | if (found_key.offset == 0) | 1859 | if (found_key.offset == 0) |
| @@ -1850,6 +1861,14 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root) | |||
| 1850 | key.offset = found_key.offset - 1; | 1861 | key.offset = found_key.offset - 1; |
| 1851 | } | 1862 | } |
| 1852 | ret = 0; | 1863 | ret = 0; |
| 1864 | if (failed && !retried) { | ||
| 1865 | failed = 0; | ||
| 1866 | retried = true; | ||
| 1867 | goto again; | ||
| 1868 | } else if (failed && retried) { | ||
| 1869 | WARN_ON(1); | ||
| 1870 | ret = -ENOSPC; | ||
| 1871 | } | ||
| 1853 | error: | 1872 | error: |
| 1854 | btrfs_free_path(path); | 1873 | btrfs_free_path(path); |
| 1855 | return ret; | 1874 | return ret; |
| @@ -1894,6 +1913,8 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
| 1894 | continue; | 1913 | continue; |
| 1895 | 1914 | ||
| 1896 | ret = btrfs_shrink_device(device, old_size - size_to_free); | 1915 | ret = btrfs_shrink_device(device, old_size - size_to_free); |
| 1916 | if (ret == -ENOSPC) | ||
| 1917 | break; | ||
| 1897 | BUG_ON(ret); | 1918 | BUG_ON(ret); |
| 1898 | 1919 | ||
| 1899 | trans = btrfs_start_transaction(dev_root, 1); | 1920 | trans = btrfs_start_transaction(dev_root, 1); |
| @@ -1938,9 +1959,8 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
| 1938 | chunk = btrfs_item_ptr(path->nodes[0], | 1959 | chunk = btrfs_item_ptr(path->nodes[0], |
| 1939 | path->slots[0], | 1960 | path->slots[0], |
| 1940 | struct btrfs_chunk); | 1961 | struct btrfs_chunk); |
| 1941 | key.offset = found_key.offset; | ||
| 1942 | /* chunk zero is special */ | 1962 | /* chunk zero is special */ |
| 1943 | if (key.offset == 0) | 1963 | if (found_key.offset == 0) |
| 1944 | break; | 1964 | break; |
| 1945 | 1965 | ||
| 1946 | btrfs_release_path(chunk_root, path); | 1966 | btrfs_release_path(chunk_root, path); |
| @@ -1948,7 +1968,8 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
| 1948 | chunk_root->root_key.objectid, | 1968 | chunk_root->root_key.objectid, |
| 1949 | found_key.objectid, | 1969 | found_key.objectid, |
| 1950 | found_key.offset); | 1970 | found_key.offset); |
| 1951 | BUG_ON(ret); | 1971 | BUG_ON(ret && ret != -ENOSPC); |
| 1972 | key.offset = found_key.offset - 1; | ||
| 1952 | } | 1973 | } |
| 1953 | ret = 0; | 1974 | ret = 0; |
| 1954 | error: | 1975 | error: |
| @@ -1974,10 +1995,13 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
| 1974 | u64 chunk_offset; | 1995 | u64 chunk_offset; |
| 1975 | int ret; | 1996 | int ret; |
| 1976 | int slot; | 1997 | int slot; |
| 1998 | int failed = 0; | ||
| 1999 | bool retried = false; | ||
| 1977 | struct extent_buffer *l; | 2000 | struct extent_buffer *l; |
| 1978 | struct btrfs_key key; | 2001 | struct btrfs_key key; |
| 1979 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; | 2002 | struct btrfs_super_block *super_copy = &root->fs_info->super_copy; |
| 1980 | u64 old_total = btrfs_super_total_bytes(super_copy); | 2003 | u64 old_total = btrfs_super_total_bytes(super_copy); |
| 2004 | u64 old_size = device->total_bytes; | ||
| 1981 | u64 diff = device->total_bytes - new_size; | 2005 | u64 diff = device->total_bytes - new_size; |
| 1982 | 2006 | ||
| 1983 | if (new_size >= device->total_bytes) | 2007 | if (new_size >= device->total_bytes) |
| @@ -1987,12 +2011,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
| 1987 | if (!path) | 2011 | if (!path) |
| 1988 | return -ENOMEM; | 2012 | return -ENOMEM; |
| 1989 | 2013 | ||
| 1990 | trans = btrfs_start_transaction(root, 1); | ||
| 1991 | if (!trans) { | ||
| 1992 | ret = -ENOMEM; | ||
| 1993 | goto done; | ||
| 1994 | } | ||
| 1995 | |||
| 1996 | path->reada = 2; | 2014 | path->reada = 2; |
| 1997 | 2015 | ||
| 1998 | lock_chunks(root); | 2016 | lock_chunks(root); |
| @@ -2001,8 +2019,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
| 2001 | if (device->writeable) | 2019 | if (device->writeable) |
| 2002 | device->fs_devices->total_rw_bytes -= diff; | 2020 | device->fs_devices->total_rw_bytes -= diff; |
| 2003 | unlock_chunks(root); | 2021 | unlock_chunks(root); |
| 2004 | btrfs_end_transaction(trans, root); | ||
| 2005 | 2022 | ||
| 2023 | again: | ||
| 2006 | key.objectid = device->devid; | 2024 | key.objectid = device->devid; |
| 2007 | key.offset = (u64)-1; | 2025 | key.offset = (u64)-1; |
| 2008 | key.type = BTRFS_DEV_EXTENT_KEY; | 2026 | key.type = BTRFS_DEV_EXTENT_KEY; |
| @@ -2017,6 +2035,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
| 2017 | goto done; | 2035 | goto done; |
| 2018 | if (ret) { | 2036 | if (ret) { |
| 2019 | ret = 0; | 2037 | ret = 0; |
| 2038 | btrfs_release_path(root, path); | ||
| 2020 | break; | 2039 | break; |
| 2021 | } | 2040 | } |
| 2022 | 2041 | ||
| @@ -2024,14 +2043,18 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
| 2024 | slot = path->slots[0]; | 2043 | slot = path->slots[0]; |
| 2025 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | 2044 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); |
| 2026 | 2045 | ||
| 2027 | if (key.objectid != device->devid) | 2046 | if (key.objectid != device->devid) { |
| 2047 | btrfs_release_path(root, path); | ||
| 2028 | break; | 2048 | break; |
| 2049 | } | ||
| 2029 | 2050 | ||
| 2030 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | 2051 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); |
| 2031 | length = btrfs_dev_extent_length(l, dev_extent); | 2052 | length = btrfs_dev_extent_length(l, dev_extent); |
| 2032 | 2053 | ||
| 2033 | if (key.offset + length <= new_size) | 2054 | if (key.offset + length <= new_size) { |
| 2055 | btrfs_release_path(root, path); | ||
| 2034 | break; | 2056 | break; |
| 2057 | } | ||
| 2035 | 2058 | ||
| 2036 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); | 2059 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); |
| 2037 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); | 2060 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); |
| @@ -2040,8 +2063,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
| 2040 | 2063 | ||
| 2041 | ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, | 2064 | ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid, |
| 2042 | chunk_offset); | 2065 | chunk_offset); |
| 2043 | if (ret) | 2066 | if (ret && ret != -ENOSPC) |
| 2044 | goto done; | 2067 | goto done; |
| 2068 | if (ret == -ENOSPC) | ||
| 2069 | failed++; | ||
| 2070 | key.offset -= 1; | ||
| 2071 | } | ||
| 2072 | |||
| 2073 | if (failed && !retried) { | ||
| 2074 | failed = 0; | ||
| 2075 | retried = true; | ||
| 2076 | goto again; | ||
| 2077 | } else if (failed && retried) { | ||
| 2078 | ret = -ENOSPC; | ||
| 2079 | lock_chunks(root); | ||
| 2080 | |||
| 2081 | device->total_bytes = old_size; | ||
| 2082 | if (device->writeable) | ||
| 2083 | device->fs_devices->total_rw_bytes += diff; | ||
| 2084 | unlock_chunks(root); | ||
| 2085 | goto done; | ||
| 2045 | } | 2086 | } |
| 2046 | 2087 | ||
| 2047 | /* Shrinking succeeded, else we would be at "done". */ | 2088 | /* Shrinking succeeded, else we would be at "done". */ |
| @@ -2294,9 +2335,9 @@ again: | |||
| 2294 | em->block_len = em->len; | 2335 | em->block_len = em->len; |
| 2295 | 2336 | ||
| 2296 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; | 2337 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; |
| 2297 | spin_lock(&em_tree->lock); | 2338 | write_lock(&em_tree->lock); |
| 2298 | ret = add_extent_mapping(em_tree, em); | 2339 | ret = add_extent_mapping(em_tree, em); |
| 2299 | spin_unlock(&em_tree->lock); | 2340 | write_unlock(&em_tree->lock); |
| 2300 | BUG_ON(ret); | 2341 | BUG_ON(ret); |
| 2301 | free_extent_map(em); | 2342 | free_extent_map(em); |
| 2302 | 2343 | ||
| @@ -2491,9 +2532,9 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) | |||
| 2491 | int readonly = 0; | 2532 | int readonly = 0; |
| 2492 | int i; | 2533 | int i; |
| 2493 | 2534 | ||
| 2494 | spin_lock(&map_tree->map_tree.lock); | 2535 | read_lock(&map_tree->map_tree.lock); |
| 2495 | em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); | 2536 | em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); |
| 2496 | spin_unlock(&map_tree->map_tree.lock); | 2537 | read_unlock(&map_tree->map_tree.lock); |
| 2497 | if (!em) | 2538 | if (!em) |
| 2498 | return 1; | 2539 | return 1; |
| 2499 | 2540 | ||
| @@ -2518,11 +2559,11 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) | |||
| 2518 | struct extent_map *em; | 2559 | struct extent_map *em; |
| 2519 | 2560 | ||
| 2520 | while (1) { | 2561 | while (1) { |
| 2521 | spin_lock(&tree->map_tree.lock); | 2562 | write_lock(&tree->map_tree.lock); |
| 2522 | em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); | 2563 | em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); |
| 2523 | if (em) | 2564 | if (em) |
| 2524 | remove_extent_mapping(&tree->map_tree, em); | 2565 | remove_extent_mapping(&tree->map_tree, em); |
| 2525 | spin_unlock(&tree->map_tree.lock); | 2566 | write_unlock(&tree->map_tree.lock); |
| 2526 | if (!em) | 2567 | if (!em) |
| 2527 | break; | 2568 | break; |
| 2528 | kfree(em->bdev); | 2569 | kfree(em->bdev); |
| @@ -2540,9 +2581,9 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) | |||
| 2540 | struct extent_map_tree *em_tree = &map_tree->map_tree; | 2581 | struct extent_map_tree *em_tree = &map_tree->map_tree; |
| 2541 | int ret; | 2582 | int ret; |
| 2542 | 2583 | ||
| 2543 | spin_lock(&em_tree->lock); | 2584 | read_lock(&em_tree->lock); |
| 2544 | em = lookup_extent_mapping(em_tree, logical, len); | 2585 | em = lookup_extent_mapping(em_tree, logical, len); |
| 2545 | spin_unlock(&em_tree->lock); | 2586 | read_unlock(&em_tree->lock); |
| 2546 | BUG_ON(!em); | 2587 | BUG_ON(!em); |
| 2547 | 2588 | ||
| 2548 | BUG_ON(em->start > logical || em->start + em->len < logical); | 2589 | BUG_ON(em->start > logical || em->start + em->len < logical); |
| @@ -2604,9 +2645,9 @@ again: | |||
| 2604 | atomic_set(&multi->error, 0); | 2645 | atomic_set(&multi->error, 0); |
| 2605 | } | 2646 | } |
| 2606 | 2647 | ||
| 2607 | spin_lock(&em_tree->lock); | 2648 | read_lock(&em_tree->lock); |
| 2608 | em = lookup_extent_mapping(em_tree, logical, *length); | 2649 | em = lookup_extent_mapping(em_tree, logical, *length); |
| 2609 | spin_unlock(&em_tree->lock); | 2650 | read_unlock(&em_tree->lock); |
| 2610 | 2651 | ||
| 2611 | if (!em && unplug_page) | 2652 | if (!em && unplug_page) |
| 2612 | return 0; | 2653 | return 0; |
| @@ -2763,9 +2804,9 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
| 2763 | u64 stripe_nr; | 2804 | u64 stripe_nr; |
| 2764 | int i, j, nr = 0; | 2805 | int i, j, nr = 0; |
| 2765 | 2806 | ||
| 2766 | spin_lock(&em_tree->lock); | 2807 | read_lock(&em_tree->lock); |
| 2767 | em = lookup_extent_mapping(em_tree, chunk_start, 1); | 2808 | em = lookup_extent_mapping(em_tree, chunk_start, 1); |
| 2768 | spin_unlock(&em_tree->lock); | 2809 | read_unlock(&em_tree->lock); |
| 2769 | 2810 | ||
| 2770 | BUG_ON(!em || em->start != chunk_start); | 2811 | BUG_ON(!em || em->start != chunk_start); |
| 2771 | map = (struct map_lookup *)em->bdev; | 2812 | map = (struct map_lookup *)em->bdev; |
| @@ -2903,7 +2944,7 @@ static noinline int schedule_bio(struct btrfs_root *root, | |||
| 2903 | bio->bi_rw |= rw; | 2944 | bio->bi_rw |= rw; |
| 2904 | 2945 | ||
| 2905 | spin_lock(&device->io_lock); | 2946 | spin_lock(&device->io_lock); |
| 2906 | if (bio_sync(bio)) | 2947 | if (bio_rw_flagged(bio, BIO_RW_SYNCIO)) |
| 2907 | pending_bios = &device->pending_sync_bios; | 2948 | pending_bios = &device->pending_sync_bios; |
| 2908 | else | 2949 | else |
| 2909 | pending_bios = &device->pending_bios; | 2950 | pending_bios = &device->pending_bios; |
| @@ -3053,9 +3094,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
| 3053 | logical = key->offset; | 3094 | logical = key->offset; |
| 3054 | length = btrfs_chunk_length(leaf, chunk); | 3095 | length = btrfs_chunk_length(leaf, chunk); |
| 3055 | 3096 | ||
| 3056 | spin_lock(&map_tree->map_tree.lock); | 3097 | read_lock(&map_tree->map_tree.lock); |
| 3057 | em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); | 3098 | em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); |
| 3058 | spin_unlock(&map_tree->map_tree.lock); | 3099 | read_unlock(&map_tree->map_tree.lock); |
| 3059 | 3100 | ||
| 3060 | /* already mapped? */ | 3101 | /* already mapped? */ |
| 3061 | if (em && em->start <= logical && em->start + em->len > logical) { | 3102 | if (em && em->start <= logical && em->start + em->len > logical) { |
| @@ -3114,9 +3155,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
| 3114 | map->stripes[i].dev->in_fs_metadata = 1; | 3155 | map->stripes[i].dev->in_fs_metadata = 1; |
| 3115 | } | 3156 | } |
| 3116 | 3157 | ||
| 3117 | spin_lock(&map_tree->map_tree.lock); | 3158 | write_lock(&map_tree->map_tree.lock); |
| 3118 | ret = add_extent_mapping(&map_tree->map_tree, em); | 3159 | ret = add_extent_mapping(&map_tree->map_tree, em); |
| 3119 | spin_unlock(&map_tree->map_tree.lock); | 3160 | write_unlock(&map_tree->map_tree.lock); |
| 3120 | BUG_ON(ret); | 3161 | BUG_ON(ret); |
| 3121 | free_extent_map(em); | 3162 | free_extent_map(em); |
| 3122 | 3163 | ||
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 5139a833f721..31b0fabdd2ea 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
| @@ -181,4 +181,7 @@ int btrfs_balance(struct btrfs_root *dev_root); | |||
| 181 | void btrfs_unlock_volumes(void); | 181 | void btrfs_unlock_volumes(void); |
| 182 | void btrfs_lock_volumes(void); | 182 | void btrfs_lock_volumes(void); |
| 183 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); | 183 | int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); |
| 184 | int find_free_dev_extent(struct btrfs_trans_handle *trans, | ||
| 185 | struct btrfs_device *device, u64 num_bytes, | ||
| 186 | u64 *start, u64 *max_avail); | ||
| 184 | #endif | 187 | #endif |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index a9d3bf4d2689..b0fc93f95fd0 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
| @@ -260,7 +260,7 @@ err: | |||
| 260 | * attributes are handled directly. | 260 | * attributes are handled directly. |
| 261 | */ | 261 | */ |
| 262 | struct xattr_handler *btrfs_xattr_handlers[] = { | 262 | struct xattr_handler *btrfs_xattr_handlers[] = { |
| 263 | #ifdef CONFIG_FS_POSIX_ACL | 263 | #ifdef CONFIG_BTRFS_POSIX_ACL |
| 264 | &btrfs_xattr_acl_access_handler, | 264 | &btrfs_xattr_acl_access_handler, |
| 265 | &btrfs_xattr_acl_default_handler, | 265 | &btrfs_xattr_acl_default_handler, |
| 266 | #endif | 266 | #endif |
