diff options
| -rw-r--r-- | fs/btrfs/async-thread.c | 230 | ||||
| -rw-r--r-- | fs/btrfs/async-thread.h | 12 | ||||
| -rw-r--r-- | fs/btrfs/compression.c | 8 | ||||
| -rw-r--r-- | fs/btrfs/ctree.h | 2 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 36 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 4 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 293 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.h | 16 | ||||
| -rw-r--r-- | fs/btrfs/extent_map.c | 55 | ||||
| -rw-r--r-- | fs/btrfs/extent_map.h | 3 | ||||
| -rw-r--r-- | fs/btrfs/file.c | 35 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 112 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 5 | ||||
| -rw-r--r-- | fs/btrfs/ordered-data.c | 33 | ||||
| -rw-r--r-- | fs/btrfs/ordered-data.h | 3 | ||||
| -rw-r--r-- | fs/btrfs/relocation.c | 6 | ||||
| -rw-r--r-- | fs/btrfs/tree-log.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/volumes.c | 42 |
18 files changed, 580 insertions, 317 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 019e8af449ab..6ea5cd0a595f 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
| @@ -48,6 +48,9 @@ struct btrfs_worker_thread { | |||
| 48 | /* number of things on the pending list */ | 48 | /* number of things on the pending list */ |
| 49 | atomic_t num_pending; | 49 | atomic_t num_pending; |
| 50 | 50 | ||
| 51 | /* reference counter for this struct */ | ||
| 52 | atomic_t refs; | ||
| 53 | |||
| 51 | unsigned long sequence; | 54 | unsigned long sequence; |
| 52 | 55 | ||
| 53 | /* protects the pending list. */ | 56 | /* protects the pending list. */ |
| @@ -93,17 +96,40 @@ static void check_busy_worker(struct btrfs_worker_thread *worker) | |||
| 93 | } | 96 | } |
| 94 | } | 97 | } |
| 95 | 98 | ||
| 96 | static noinline int run_ordered_completions(struct btrfs_workers *workers, | 99 | static void check_pending_worker_creates(struct btrfs_worker_thread *worker) |
| 97 | struct btrfs_work *work) | ||
| 98 | { | 100 | { |
| 101 | struct btrfs_workers *workers = worker->workers; | ||
| 99 | unsigned long flags; | 102 | unsigned long flags; |
| 100 | 103 | ||
| 104 | rmb(); | ||
| 105 | if (!workers->atomic_start_pending) | ||
| 106 | return; | ||
| 107 | |||
| 108 | spin_lock_irqsave(&workers->lock, flags); | ||
| 109 | if (!workers->atomic_start_pending) | ||
| 110 | goto out; | ||
| 111 | |||
| 112 | workers->atomic_start_pending = 0; | ||
| 113 | if (workers->num_workers >= workers->max_workers) | ||
| 114 | goto out; | ||
| 115 | |||
| 116 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 117 | btrfs_start_workers(workers, 1); | ||
| 118 | return; | ||
| 119 | |||
| 120 | out: | ||
| 121 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 122 | } | ||
| 123 | |||
| 124 | static noinline int run_ordered_completions(struct btrfs_workers *workers, | ||
| 125 | struct btrfs_work *work) | ||
| 126 | { | ||
| 101 | if (!workers->ordered) | 127 | if (!workers->ordered) |
| 102 | return 0; | 128 | return 0; |
| 103 | 129 | ||
| 104 | set_bit(WORK_DONE_BIT, &work->flags); | 130 | set_bit(WORK_DONE_BIT, &work->flags); |
| 105 | 131 | ||
| 106 | spin_lock_irqsave(&workers->lock, flags); | 132 | spin_lock(&workers->order_lock); |
| 107 | 133 | ||
| 108 | while (1) { | 134 | while (1) { |
| 109 | if (!list_empty(&workers->prio_order_list)) { | 135 | if (!list_empty(&workers->prio_order_list)) { |
| @@ -126,45 +152,117 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers, | |||
| 126 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) | 152 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) |
| 127 | break; | 153 | break; |
| 128 | 154 | ||
| 129 | spin_unlock_irqrestore(&workers->lock, flags); | 155 | spin_unlock(&workers->order_lock); |
| 130 | 156 | ||
| 131 | work->ordered_func(work); | 157 | work->ordered_func(work); |
| 132 | 158 | ||
| 133 | /* now take the lock again and call the freeing code */ | 159 | /* now take the lock again and call the freeing code */ |
| 134 | spin_lock_irqsave(&workers->lock, flags); | 160 | spin_lock(&workers->order_lock); |
| 135 | list_del(&work->order_list); | 161 | list_del(&work->order_list); |
| 136 | work->ordered_free(work); | 162 | work->ordered_free(work); |
| 137 | } | 163 | } |
| 138 | 164 | ||
| 139 | spin_unlock_irqrestore(&workers->lock, flags); | 165 | spin_unlock(&workers->order_lock); |
| 140 | return 0; | 166 | return 0; |
| 141 | } | 167 | } |
| 142 | 168 | ||
| 169 | static void put_worker(struct btrfs_worker_thread *worker) | ||
| 170 | { | ||
| 171 | if (atomic_dec_and_test(&worker->refs)) | ||
| 172 | kfree(worker); | ||
| 173 | } | ||
| 174 | |||
| 175 | static int try_worker_shutdown(struct btrfs_worker_thread *worker) | ||
| 176 | { | ||
| 177 | int freeit = 0; | ||
| 178 | |||
| 179 | spin_lock_irq(&worker->lock); | ||
| 180 | spin_lock_irq(&worker->workers->lock); | ||
| 181 | if (worker->workers->num_workers > 1 && | ||
| 182 | worker->idle && | ||
| 183 | !worker->working && | ||
| 184 | !list_empty(&worker->worker_list) && | ||
| 185 | list_empty(&worker->prio_pending) && | ||
| 186 | list_empty(&worker->pending)) { | ||
| 187 | freeit = 1; | ||
| 188 | list_del_init(&worker->worker_list); | ||
| 189 | worker->workers->num_workers--; | ||
| 190 | } | ||
| 191 | spin_unlock_irq(&worker->workers->lock); | ||
| 192 | spin_unlock_irq(&worker->lock); | ||
| 193 | |||
| 194 | if (freeit) | ||
| 195 | put_worker(worker); | ||
| 196 | return freeit; | ||
| 197 | } | ||
| 198 | |||
| 199 | static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker, | ||
| 200 | struct list_head *prio_head, | ||
| 201 | struct list_head *head) | ||
| 202 | { | ||
| 203 | struct btrfs_work *work = NULL; | ||
| 204 | struct list_head *cur = NULL; | ||
| 205 | |||
| 206 | if(!list_empty(prio_head)) | ||
| 207 | cur = prio_head->next; | ||
| 208 | |||
| 209 | smp_mb(); | ||
| 210 | if (!list_empty(&worker->prio_pending)) | ||
| 211 | goto refill; | ||
| 212 | |||
| 213 | if (!list_empty(head)) | ||
| 214 | cur = head->next; | ||
| 215 | |||
| 216 | if (cur) | ||
| 217 | goto out; | ||
| 218 | |||
| 219 | refill: | ||
| 220 | spin_lock_irq(&worker->lock); | ||
| 221 | list_splice_tail_init(&worker->prio_pending, prio_head); | ||
| 222 | list_splice_tail_init(&worker->pending, head); | ||
| 223 | |||
| 224 | if (!list_empty(prio_head)) | ||
| 225 | cur = prio_head->next; | ||
| 226 | else if (!list_empty(head)) | ||
| 227 | cur = head->next; | ||
| 228 | spin_unlock_irq(&worker->lock); | ||
| 229 | |||
| 230 | if (!cur) | ||
| 231 | goto out_fail; | ||
| 232 | |||
| 233 | out: | ||
| 234 | work = list_entry(cur, struct btrfs_work, list); | ||
| 235 | |||
| 236 | out_fail: | ||
| 237 | return work; | ||
| 238 | } | ||
| 239 | |||
| 143 | /* | 240 | /* |
| 144 | * main loop for servicing work items | 241 | * main loop for servicing work items |
| 145 | */ | 242 | */ |
| 146 | static int worker_loop(void *arg) | 243 | static int worker_loop(void *arg) |
| 147 | { | 244 | { |
| 148 | struct btrfs_worker_thread *worker = arg; | 245 | struct btrfs_worker_thread *worker = arg; |
| 149 | struct list_head *cur; | 246 | struct list_head head; |
| 247 | struct list_head prio_head; | ||
| 150 | struct btrfs_work *work; | 248 | struct btrfs_work *work; |
| 249 | |||
| 250 | INIT_LIST_HEAD(&head); | ||
| 251 | INIT_LIST_HEAD(&prio_head); | ||
| 252 | |||
| 151 | do { | 253 | do { |
| 152 | spin_lock_irq(&worker->lock); | 254 | again: |
| 153 | again_locked: | ||
| 154 | while (1) { | 255 | while (1) { |
| 155 | if (!list_empty(&worker->prio_pending)) | 256 | |
| 156 | cur = worker->prio_pending.next; | 257 | |
| 157 | else if (!list_empty(&worker->pending)) | 258 | work = get_next_work(worker, &prio_head, &head); |
| 158 | cur = worker->pending.next; | 259 | if (!work) |
| 159 | else | ||
| 160 | break; | 260 | break; |
| 161 | 261 | ||
| 162 | work = list_entry(cur, struct btrfs_work, list); | ||
| 163 | list_del(&work->list); | 262 | list_del(&work->list); |
| 164 | clear_bit(WORK_QUEUED_BIT, &work->flags); | 263 | clear_bit(WORK_QUEUED_BIT, &work->flags); |
| 165 | 264 | ||
| 166 | work->worker = worker; | 265 | work->worker = worker; |
| 167 | spin_unlock_irq(&worker->lock); | ||
| 168 | 266 | ||
| 169 | work->func(work); | 267 | work->func(work); |
| 170 | 268 | ||
| @@ -175,9 +273,13 @@ again_locked: | |||
| 175 | */ | 273 | */ |
| 176 | run_ordered_completions(worker->workers, work); | 274 | run_ordered_completions(worker->workers, work); |
| 177 | 275 | ||
| 178 | spin_lock_irq(&worker->lock); | 276 | check_pending_worker_creates(worker); |
| 179 | check_idle_worker(worker); | 277 | |
| 180 | } | 278 | } |
| 279 | |||
| 280 | spin_lock_irq(&worker->lock); | ||
| 281 | check_idle_worker(worker); | ||
| 282 | |||
| 181 | if (freezing(current)) { | 283 | if (freezing(current)) { |
| 182 | worker->working = 0; | 284 | worker->working = 0; |
| 183 | spin_unlock_irq(&worker->lock); | 285 | spin_unlock_irq(&worker->lock); |
| @@ -216,8 +318,10 @@ again_locked: | |||
| 216 | spin_lock_irq(&worker->lock); | 318 | spin_lock_irq(&worker->lock); |
| 217 | set_current_state(TASK_INTERRUPTIBLE); | 319 | set_current_state(TASK_INTERRUPTIBLE); |
| 218 | if (!list_empty(&worker->pending) || | 320 | if (!list_empty(&worker->pending) || |
| 219 | !list_empty(&worker->prio_pending)) | 321 | !list_empty(&worker->prio_pending)) { |
| 220 | goto again_locked; | 322 | spin_unlock_irq(&worker->lock); |
| 323 | goto again; | ||
| 324 | } | ||
| 221 | 325 | ||
| 222 | /* | 326 | /* |
| 223 | * this makes sure we get a wakeup when someone | 327 | * this makes sure we get a wakeup when someone |
| @@ -226,8 +330,13 @@ again_locked: | |||
| 226 | worker->working = 0; | 330 | worker->working = 0; |
| 227 | spin_unlock_irq(&worker->lock); | 331 | spin_unlock_irq(&worker->lock); |
| 228 | 332 | ||
| 229 | if (!kthread_should_stop()) | 333 | if (!kthread_should_stop()) { |
| 230 | schedule(); | 334 | schedule_timeout(HZ * 120); |
| 335 | if (!worker->working && | ||
| 336 | try_worker_shutdown(worker)) { | ||
| 337 | return 0; | ||
| 338 | } | ||
| 339 | } | ||
| 231 | } | 340 | } |
| 232 | __set_current_state(TASK_RUNNING); | 341 | __set_current_state(TASK_RUNNING); |
| 233 | } | 342 | } |
| @@ -242,16 +351,30 @@ int btrfs_stop_workers(struct btrfs_workers *workers) | |||
| 242 | { | 351 | { |
| 243 | struct list_head *cur; | 352 | struct list_head *cur; |
| 244 | struct btrfs_worker_thread *worker; | 353 | struct btrfs_worker_thread *worker; |
| 354 | int can_stop; | ||
| 245 | 355 | ||
| 356 | spin_lock_irq(&workers->lock); | ||
| 246 | list_splice_init(&workers->idle_list, &workers->worker_list); | 357 | list_splice_init(&workers->idle_list, &workers->worker_list); |
| 247 | while (!list_empty(&workers->worker_list)) { | 358 | while (!list_empty(&workers->worker_list)) { |
| 248 | cur = workers->worker_list.next; | 359 | cur = workers->worker_list.next; |
| 249 | worker = list_entry(cur, struct btrfs_worker_thread, | 360 | worker = list_entry(cur, struct btrfs_worker_thread, |
| 250 | worker_list); | 361 | worker_list); |
| 251 | kthread_stop(worker->task); | 362 | |
| 252 | list_del(&worker->worker_list); | 363 | atomic_inc(&worker->refs); |
| 253 | kfree(worker); | 364 | workers->num_workers -= 1; |
| 365 | if (!list_empty(&worker->worker_list)) { | ||
| 366 | list_del_init(&worker->worker_list); | ||
| 367 | put_worker(worker); | ||
| 368 | can_stop = 1; | ||
| 369 | } else | ||
| 370 | can_stop = 0; | ||
| 371 | spin_unlock_irq(&workers->lock); | ||
| 372 | if (can_stop) | ||
| 373 | kthread_stop(worker->task); | ||
| 374 | spin_lock_irq(&workers->lock); | ||
| 375 | put_worker(worker); | ||
| 254 | } | 376 | } |
| 377 | spin_unlock_irq(&workers->lock); | ||
| 255 | return 0; | 378 | return 0; |
| 256 | } | 379 | } |
| 257 | 380 | ||
| @@ -266,10 +389,13 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max) | |||
| 266 | INIT_LIST_HEAD(&workers->order_list); | 389 | INIT_LIST_HEAD(&workers->order_list); |
| 267 | INIT_LIST_HEAD(&workers->prio_order_list); | 390 | INIT_LIST_HEAD(&workers->prio_order_list); |
| 268 | spin_lock_init(&workers->lock); | 391 | spin_lock_init(&workers->lock); |
| 392 | spin_lock_init(&workers->order_lock); | ||
| 269 | workers->max_workers = max; | 393 | workers->max_workers = max; |
| 270 | workers->idle_thresh = 32; | 394 | workers->idle_thresh = 32; |
| 271 | workers->name = name; | 395 | workers->name = name; |
| 272 | workers->ordered = 0; | 396 | workers->ordered = 0; |
| 397 | workers->atomic_start_pending = 0; | ||
| 398 | workers->atomic_worker_start = 0; | ||
| 273 | } | 399 | } |
| 274 | 400 | ||
| 275 | /* | 401 | /* |
| @@ -293,7 +419,9 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | |||
| 293 | INIT_LIST_HEAD(&worker->prio_pending); | 419 | INIT_LIST_HEAD(&worker->prio_pending); |
| 294 | INIT_LIST_HEAD(&worker->worker_list); | 420 | INIT_LIST_HEAD(&worker->worker_list); |
| 295 | spin_lock_init(&worker->lock); | 421 | spin_lock_init(&worker->lock); |
| 422 | |||
| 296 | atomic_set(&worker->num_pending, 0); | 423 | atomic_set(&worker->num_pending, 0); |
| 424 | atomic_set(&worker->refs, 1); | ||
| 297 | worker->workers = workers; | 425 | worker->workers = workers; |
| 298 | worker->task = kthread_run(worker_loop, worker, | 426 | worker->task = kthread_run(worker_loop, worker, |
| 299 | "btrfs-%s-%d", workers->name, | 427 | "btrfs-%s-%d", workers->name, |
| @@ -303,7 +431,6 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | |||
| 303 | kfree(worker); | 431 | kfree(worker); |
| 304 | goto fail; | 432 | goto fail; |
| 305 | } | 433 | } |
| 306 | |||
| 307 | spin_lock_irq(&workers->lock); | 434 | spin_lock_irq(&workers->lock); |
| 308 | list_add_tail(&worker->worker_list, &workers->idle_list); | 435 | list_add_tail(&worker->worker_list, &workers->idle_list); |
| 309 | worker->idle = 1; | 436 | worker->idle = 1; |
| @@ -367,28 +494,18 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) | |||
| 367 | { | 494 | { |
| 368 | struct btrfs_worker_thread *worker; | 495 | struct btrfs_worker_thread *worker; |
| 369 | unsigned long flags; | 496 | unsigned long flags; |
| 497 | struct list_head *fallback; | ||
| 370 | 498 | ||
| 371 | again: | 499 | again: |
| 372 | spin_lock_irqsave(&workers->lock, flags); | 500 | spin_lock_irqsave(&workers->lock, flags); |
| 373 | worker = next_worker(workers); | 501 | worker = next_worker(workers); |
| 374 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 375 | 502 | ||
| 376 | if (!worker) { | 503 | if (!worker) { |
| 377 | spin_lock_irqsave(&workers->lock, flags); | ||
| 378 | if (workers->num_workers >= workers->max_workers) { | 504 | if (workers->num_workers >= workers->max_workers) { |
| 379 | struct list_head *fallback = NULL; | 505 | goto fallback; |
| 380 | /* | 506 | } else if (workers->atomic_worker_start) { |
| 381 | * we have failed to find any workers, just | 507 | workers->atomic_start_pending = 1; |
| 382 | * return the force one | 508 | goto fallback; |
| 383 | */ | ||
| 384 | if (!list_empty(&workers->worker_list)) | ||
| 385 | fallback = workers->worker_list.next; | ||
| 386 | if (!list_empty(&workers->idle_list)) | ||
| 387 | fallback = workers->idle_list.next; | ||
| 388 | BUG_ON(!fallback); | ||
| 389 | worker = list_entry(fallback, | ||
| 390 | struct btrfs_worker_thread, worker_list); | ||
| 391 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 392 | } else { | 509 | } else { |
| 393 | spin_unlock_irqrestore(&workers->lock, flags); | 510 | spin_unlock_irqrestore(&workers->lock, flags); |
| 394 | /* we're below the limit, start another worker */ | 511 | /* we're below the limit, start another worker */ |
| @@ -396,6 +513,23 @@ again: | |||
| 396 | goto again; | 513 | goto again; |
| 397 | } | 514 | } |
| 398 | } | 515 | } |
| 516 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 517 | return worker; | ||
| 518 | |||
| 519 | fallback: | ||
| 520 | fallback = NULL; | ||
| 521 | /* | ||
| 522 | * we have failed to find any workers, just | ||
| 523 | * return the first one we can find. | ||
| 524 | */ | ||
| 525 | if (!list_empty(&workers->worker_list)) | ||
| 526 | fallback = workers->worker_list.next; | ||
| 527 | if (!list_empty(&workers->idle_list)) | ||
| 528 | fallback = workers->idle_list.next; | ||
| 529 | BUG_ON(!fallback); | ||
| 530 | worker = list_entry(fallback, | ||
| 531 | struct btrfs_worker_thread, worker_list); | ||
| 532 | spin_unlock_irqrestore(&workers->lock, flags); | ||
| 399 | return worker; | 533 | return worker; |
| 400 | } | 534 | } |
| 401 | 535 | ||
| @@ -435,9 +569,9 @@ int btrfs_requeue_work(struct btrfs_work *work) | |||
| 435 | worker->working = 1; | 569 | worker->working = 1; |
| 436 | } | 570 | } |
| 437 | 571 | ||
| 438 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 439 | if (wake) | 572 | if (wake) |
| 440 | wake_up_process(worker->task); | 573 | wake_up_process(worker->task); |
| 574 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 441 | out: | 575 | out: |
| 442 | 576 | ||
| 443 | return 0; | 577 | return 0; |
| @@ -463,14 +597,18 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
| 463 | 597 | ||
| 464 | worker = find_worker(workers); | 598 | worker = find_worker(workers); |
| 465 | if (workers->ordered) { | 599 | if (workers->ordered) { |
| 466 | spin_lock_irqsave(&workers->lock, flags); | 600 | /* |
| 601 | * you're not allowed to do ordered queues from an | ||
| 602 | * interrupt handler | ||
| 603 | */ | ||
| 604 | spin_lock(&workers->order_lock); | ||
| 467 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { | 605 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { |
| 468 | list_add_tail(&work->order_list, | 606 | list_add_tail(&work->order_list, |
| 469 | &workers->prio_order_list); | 607 | &workers->prio_order_list); |
| 470 | } else { | 608 | } else { |
| 471 | list_add_tail(&work->order_list, &workers->order_list); | 609 | list_add_tail(&work->order_list, &workers->order_list); |
| 472 | } | 610 | } |
| 473 | spin_unlock_irqrestore(&workers->lock, flags); | 611 | spin_unlock(&workers->order_lock); |
| 474 | } else { | 612 | } else { |
| 475 | INIT_LIST_HEAD(&work->order_list); | 613 | INIT_LIST_HEAD(&work->order_list); |
| 476 | } | 614 | } |
| @@ -492,10 +630,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
| 492 | wake = 1; | 630 | wake = 1; |
| 493 | worker->working = 1; | 631 | worker->working = 1; |
| 494 | 632 | ||
| 495 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 496 | |||
| 497 | if (wake) | 633 | if (wake) |
| 498 | wake_up_process(worker->task); | 634 | wake_up_process(worker->task); |
| 635 | spin_unlock_irqrestore(&worker->lock, flags); | ||
| 636 | |||
| 499 | out: | 637 | out: |
| 500 | return 0; | 638 | return 0; |
| 501 | } | 639 | } |
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 1b511c109db6..fc089b95ec14 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h | |||
| @@ -73,6 +73,15 @@ struct btrfs_workers { | |||
| 73 | /* force completions in the order they were queued */ | 73 | /* force completions in the order they were queued */ |
| 74 | int ordered; | 74 | int ordered; |
| 75 | 75 | ||
| 76 | /* more workers required, but in an interrupt handler */ | ||
| 77 | int atomic_start_pending; | ||
| 78 | |||
| 79 | /* | ||
| 80 | * are we allowed to sleep while starting workers or are we required | ||
| 81 | * to start them at a later time? | ||
| 82 | */ | ||
| 83 | int atomic_worker_start; | ||
| 84 | |||
| 76 | /* list with all the work threads. The workers on the idle thread | 85 | /* list with all the work threads. The workers on the idle thread |
| 77 | * may be actively servicing jobs, but they haven't yet hit the | 86 | * may be actively servicing jobs, but they haven't yet hit the |
| 78 | * idle thresh limit above. | 87 | * idle thresh limit above. |
| @@ -90,6 +99,9 @@ struct btrfs_workers { | |||
| 90 | /* lock for finding the next worker thread to queue on */ | 99 | /* lock for finding the next worker thread to queue on */ |
| 91 | spinlock_t lock; | 100 | spinlock_t lock; |
| 92 | 101 | ||
| 102 | /* lock for the ordered lists */ | ||
| 103 | spinlock_t order_lock; | ||
| 104 | |||
| 93 | /* extra name for this worker, used for current->name */ | 105 | /* extra name for this worker, used for current->name */ |
| 94 | char *name; | 106 | char *name; |
| 95 | }; | 107 | }; |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 9d8ba4d54a37..a11a32058b50 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
| @@ -506,10 +506,10 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
| 506 | */ | 506 | */ |
| 507 | set_page_extent_mapped(page); | 507 | set_page_extent_mapped(page); |
| 508 | lock_extent(tree, last_offset, end, GFP_NOFS); | 508 | lock_extent(tree, last_offset, end, GFP_NOFS); |
| 509 | spin_lock(&em_tree->lock); | 509 | read_lock(&em_tree->lock); |
| 510 | em = lookup_extent_mapping(em_tree, last_offset, | 510 | em = lookup_extent_mapping(em_tree, last_offset, |
| 511 | PAGE_CACHE_SIZE); | 511 | PAGE_CACHE_SIZE); |
| 512 | spin_unlock(&em_tree->lock); | 512 | read_unlock(&em_tree->lock); |
| 513 | 513 | ||
| 514 | if (!em || last_offset < em->start || | 514 | if (!em || last_offset < em->start || |
| 515 | (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || | 515 | (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || |
| @@ -593,11 +593,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
| 593 | em_tree = &BTRFS_I(inode)->extent_tree; | 593 | em_tree = &BTRFS_I(inode)->extent_tree; |
| 594 | 594 | ||
| 595 | /* we need the actual starting offset of this extent in the file */ | 595 | /* we need the actual starting offset of this extent in the file */ |
| 596 | spin_lock(&em_tree->lock); | 596 | read_lock(&em_tree->lock); |
| 597 | em = lookup_extent_mapping(em_tree, | 597 | em = lookup_extent_mapping(em_tree, |
| 598 | page_offset(bio->bi_io_vec->bv_page), | 598 | page_offset(bio->bi_io_vec->bv_page), |
| 599 | PAGE_CACHE_SIZE); | 599 | PAGE_CACHE_SIZE); |
| 600 | spin_unlock(&em_tree->lock); | 600 | read_unlock(&em_tree->lock); |
| 601 | 601 | ||
| 602 | compressed_len = em->block_len; | 602 | compressed_len = em->block_len; |
| 603 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); | 603 | cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 837435ce84ca..732d5b884aa7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -2290,7 +2290,7 @@ extern struct file_operations btrfs_file_operations; | |||
| 2290 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 2290 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
| 2291 | struct btrfs_root *root, struct inode *inode, | 2291 | struct btrfs_root *root, struct inode *inode, |
| 2292 | u64 start, u64 end, u64 locked_end, | 2292 | u64 start, u64 end, u64 locked_end, |
| 2293 | u64 inline_limit, u64 *hint_block); | 2293 | u64 inline_limit, u64 *hint_block, int drop_cache); |
| 2294 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | 2294 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, |
| 2295 | struct btrfs_root *root, | 2295 | struct btrfs_root *root, |
| 2296 | struct inode *inode, u64 start, u64 end); | 2296 | struct inode *inode, u64 start, u64 end); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e83be2e4602c..253da7e01ab3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -123,15 +123,15 @@ static struct extent_map *btree_get_extent(struct inode *inode, | |||
| 123 | struct extent_map *em; | 123 | struct extent_map *em; |
| 124 | int ret; | 124 | int ret; |
| 125 | 125 | ||
| 126 | spin_lock(&em_tree->lock); | 126 | read_lock(&em_tree->lock); |
| 127 | em = lookup_extent_mapping(em_tree, start, len); | 127 | em = lookup_extent_mapping(em_tree, start, len); |
| 128 | if (em) { | 128 | if (em) { |
| 129 | em->bdev = | 129 | em->bdev = |
| 130 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 130 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
| 131 | spin_unlock(&em_tree->lock); | 131 | read_unlock(&em_tree->lock); |
| 132 | goto out; | 132 | goto out; |
| 133 | } | 133 | } |
| 134 | spin_unlock(&em_tree->lock); | 134 | read_unlock(&em_tree->lock); |
| 135 | 135 | ||
| 136 | em = alloc_extent_map(GFP_NOFS); | 136 | em = alloc_extent_map(GFP_NOFS); |
| 137 | if (!em) { | 137 | if (!em) { |
| @@ -144,7 +144,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, | |||
| 144 | em->block_start = 0; | 144 | em->block_start = 0; |
| 145 | em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; | 145 | em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; |
| 146 | 146 | ||
| 147 | spin_lock(&em_tree->lock); | 147 | write_lock(&em_tree->lock); |
| 148 | ret = add_extent_mapping(em_tree, em); | 148 | ret = add_extent_mapping(em_tree, em); |
| 149 | if (ret == -EEXIST) { | 149 | if (ret == -EEXIST) { |
| 150 | u64 failed_start = em->start; | 150 | u64 failed_start = em->start; |
| @@ -163,7 +163,7 @@ static struct extent_map *btree_get_extent(struct inode *inode, | |||
| 163 | free_extent_map(em); | 163 | free_extent_map(em); |
| 164 | em = NULL; | 164 | em = NULL; |
| 165 | } | 165 | } |
| 166 | spin_unlock(&em_tree->lock); | 166 | write_unlock(&em_tree->lock); |
| 167 | 167 | ||
| 168 | if (ret) | 168 | if (ret) |
| 169 | em = ERR_PTR(ret); | 169 | em = ERR_PTR(ret); |
| @@ -1325,9 +1325,9 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) | |||
| 1325 | offset = page_offset(page); | 1325 | offset = page_offset(page); |
| 1326 | 1326 | ||
| 1327 | em_tree = &BTRFS_I(inode)->extent_tree; | 1327 | em_tree = &BTRFS_I(inode)->extent_tree; |
| 1328 | spin_lock(&em_tree->lock); | 1328 | read_lock(&em_tree->lock); |
| 1329 | em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); | 1329 | em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); |
| 1330 | spin_unlock(&em_tree->lock); | 1330 | read_unlock(&em_tree->lock); |
| 1331 | if (!em) { | 1331 | if (!em) { |
| 1332 | __unplug_io_fn(bdi, page); | 1332 | __unplug_io_fn(bdi, page); |
| 1333 | return; | 1333 | return; |
| @@ -1698,7 +1698,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1698 | err = -EINVAL; | 1698 | err = -EINVAL; |
| 1699 | goto fail_iput; | 1699 | goto fail_iput; |
| 1700 | } | 1700 | } |
| 1701 | 1701 | printk("thread pool is %d\n", fs_info->thread_pool_size); | |
| 1702 | /* | 1702 | /* |
| 1703 | * we need to start all the end_io workers up front because the | 1703 | * we need to start all the end_io workers up front because the |
| 1704 | * queue work function gets called at interrupt time, and so it | 1704 | * queue work function gets called at interrupt time, and so it |
| @@ -1743,20 +1743,22 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1743 | fs_info->endio_workers.idle_thresh = 4; | 1743 | fs_info->endio_workers.idle_thresh = 4; |
| 1744 | fs_info->endio_meta_workers.idle_thresh = 4; | 1744 | fs_info->endio_meta_workers.idle_thresh = 4; |
| 1745 | 1745 | ||
| 1746 | fs_info->endio_write_workers.idle_thresh = 64; | 1746 | fs_info->endio_write_workers.idle_thresh = 2; |
| 1747 | fs_info->endio_meta_write_workers.idle_thresh = 64; | 1747 | fs_info->endio_meta_write_workers.idle_thresh = 2; |
| 1748 | |||
| 1749 | fs_info->endio_workers.atomic_worker_start = 1; | ||
| 1750 | fs_info->endio_meta_workers.atomic_worker_start = 1; | ||
| 1751 | fs_info->endio_write_workers.atomic_worker_start = 1; | ||
| 1752 | fs_info->endio_meta_write_workers.atomic_worker_start = 1; | ||
| 1748 | 1753 | ||
| 1749 | btrfs_start_workers(&fs_info->workers, 1); | 1754 | btrfs_start_workers(&fs_info->workers, 1); |
| 1750 | btrfs_start_workers(&fs_info->submit_workers, 1); | 1755 | btrfs_start_workers(&fs_info->submit_workers, 1); |
| 1751 | btrfs_start_workers(&fs_info->delalloc_workers, 1); | 1756 | btrfs_start_workers(&fs_info->delalloc_workers, 1); |
| 1752 | btrfs_start_workers(&fs_info->fixup_workers, 1); | 1757 | btrfs_start_workers(&fs_info->fixup_workers, 1); |
| 1753 | btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); | 1758 | btrfs_start_workers(&fs_info->endio_workers, 1); |
| 1754 | btrfs_start_workers(&fs_info->endio_meta_workers, | 1759 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); |
| 1755 | fs_info->thread_pool_size); | 1760 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); |
| 1756 | btrfs_start_workers(&fs_info->endio_meta_write_workers, | 1761 | btrfs_start_workers(&fs_info->endio_write_workers, 1); |
| 1757 | fs_info->thread_pool_size); | ||
| 1758 | btrfs_start_workers(&fs_info->endio_write_workers, | ||
| 1759 | fs_info->thread_pool_size); | ||
| 1760 | 1762 | ||
| 1761 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 1763 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
| 1762 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 1764 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 72a2b9c28e9f..edd86ae9e149 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -5396,9 +5396,9 @@ static noinline int relocate_data_extent(struct inode *reloc_inode, | |||
| 5396 | lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); | 5396 | lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); |
| 5397 | while (1) { | 5397 | while (1) { |
| 5398 | int ret; | 5398 | int ret; |
| 5399 | spin_lock(&em_tree->lock); | 5399 | write_lock(&em_tree->lock); |
| 5400 | ret = add_extent_mapping(em_tree, em); | 5400 | ret = add_extent_mapping(em_tree, em); |
| 5401 | spin_unlock(&em_tree->lock); | 5401 | write_unlock(&em_tree->lock); |
| 5402 | if (ret != -EEXIST) { | 5402 | if (ret != -EEXIST) { |
| 5403 | free_extent_map(em); | 5403 | free_extent_map(em); |
| 5404 | break; | 5404 | break; |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 68260180f587..a102422cd92e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -367,10 +367,10 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 367 | } | 367 | } |
| 368 | if (bits & EXTENT_DIRTY) | 368 | if (bits & EXTENT_DIRTY) |
| 369 | tree->dirty_bytes += end - start + 1; | 369 | tree->dirty_bytes += end - start + 1; |
| 370 | set_state_cb(tree, state, bits); | ||
| 371 | state->state |= bits; | ||
| 372 | state->start = start; | 370 | state->start = start; |
| 373 | state->end = end; | 371 | state->end = end; |
| 372 | set_state_cb(tree, state, bits); | ||
| 373 | state->state |= bits; | ||
| 374 | node = tree_insert(&tree->state, end, &state->rb_node); | 374 | node = tree_insert(&tree->state, end, &state->rb_node); |
| 375 | if (node) { | 375 | if (node) { |
| 376 | struct extent_state *found; | 376 | struct extent_state *found; |
| @@ -471,10 +471,14 @@ static int clear_state_bit(struct extent_io_tree *tree, | |||
| 471 | * bits were already set, or zero if none of the bits were already set. | 471 | * bits were already set, or zero if none of the bits were already set. |
| 472 | */ | 472 | */ |
| 473 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 473 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 474 | int bits, int wake, int delete, gfp_t mask) | 474 | int bits, int wake, int delete, |
| 475 | struct extent_state **cached_state, | ||
| 476 | gfp_t mask) | ||
| 475 | { | 477 | { |
| 476 | struct extent_state *state; | 478 | struct extent_state *state; |
| 479 | struct extent_state *cached; | ||
| 477 | struct extent_state *prealloc = NULL; | 480 | struct extent_state *prealloc = NULL; |
| 481 | struct rb_node *next_node; | ||
| 478 | struct rb_node *node; | 482 | struct rb_node *node; |
| 479 | u64 last_end; | 483 | u64 last_end; |
| 480 | int err; | 484 | int err; |
| @@ -488,6 +492,17 @@ again: | |||
| 488 | } | 492 | } |
| 489 | 493 | ||
| 490 | spin_lock(&tree->lock); | 494 | spin_lock(&tree->lock); |
| 495 | if (cached_state) { | ||
| 496 | cached = *cached_state; | ||
| 497 | *cached_state = NULL; | ||
| 498 | if (cached->tree && cached->start == start) { | ||
| 499 | atomic_dec(&cached->refs); | ||
| 500 | state = cached; | ||
| 501 | last_end = state->end; | ||
| 502 | goto found; | ||
| 503 | } | ||
| 504 | free_extent_state(cached); | ||
| 505 | } | ||
| 491 | /* | 506 | /* |
| 492 | * this search will find the extents that end after | 507 | * this search will find the extents that end after |
| 493 | * our range starts | 508 | * our range starts |
| @@ -496,6 +511,7 @@ again: | |||
| 496 | if (!node) | 511 | if (!node) |
| 497 | goto out; | 512 | goto out; |
| 498 | state = rb_entry(node, struct extent_state, rb_node); | 513 | state = rb_entry(node, struct extent_state, rb_node); |
| 514 | hit_next: | ||
| 499 | if (state->start > end) | 515 | if (state->start > end) |
| 500 | goto out; | 516 | goto out; |
| 501 | WARN_ON(state->end < start); | 517 | WARN_ON(state->end < start); |
| @@ -555,11 +571,21 @@ again: | |||
| 555 | prealloc = NULL; | 571 | prealloc = NULL; |
| 556 | goto out; | 572 | goto out; |
| 557 | } | 573 | } |
| 558 | 574 | found: | |
| 575 | if (state->end < end && prealloc && !need_resched()) | ||
| 576 | next_node = rb_next(&state->rb_node); | ||
| 577 | else | ||
| 578 | next_node = NULL; | ||
| 559 | set |= clear_state_bit(tree, state, bits, wake, delete); | 579 | set |= clear_state_bit(tree, state, bits, wake, delete); |
| 560 | if (last_end == (u64)-1) | 580 | if (last_end == (u64)-1) |
| 561 | goto out; | 581 | goto out; |
| 562 | start = last_end + 1; | 582 | start = last_end + 1; |
| 583 | if (start <= end && next_node) { | ||
| 584 | state = rb_entry(next_node, struct extent_state, | ||
| 585 | rb_node); | ||
| 586 | if (state->start == start) | ||
| 587 | goto hit_next; | ||
| 588 | } | ||
| 563 | goto search_again; | 589 | goto search_again; |
| 564 | 590 | ||
| 565 | out: | 591 | out: |
| @@ -653,26 +679,37 @@ static void set_state_bits(struct extent_io_tree *tree, | |||
| 653 | state->state |= bits; | 679 | state->state |= bits; |
| 654 | } | 680 | } |
| 655 | 681 | ||
| 682 | static void cache_state(struct extent_state *state, | ||
| 683 | struct extent_state **cached_ptr) | ||
| 684 | { | ||
| 685 | if (cached_ptr && !(*cached_ptr)) { | ||
| 686 | if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) { | ||
| 687 | *cached_ptr = state; | ||
| 688 | atomic_inc(&state->refs); | ||
| 689 | } | ||
| 690 | } | ||
| 691 | } | ||
| 692 | |||
| 656 | /* | 693 | /* |
| 657 | * set some bits on a range in the tree. This may require allocations | 694 | * set some bits on a range in the tree. This may require allocations or |
| 658 | * or sleeping, so the gfp mask is used to indicate what is allowed. | 695 | * sleeping, so the gfp mask is used to indicate what is allowed. |
| 659 | * | 696 | * |
| 660 | * If 'exclusive' == 1, this will fail with -EEXIST if some part of the | 697 | * If any of the exclusive bits are set, this will fail with -EEXIST if some |
| 661 | * range already has the desired bits set. The start of the existing | 698 | * part of the range already has the desired bits set. The start of the |
| 662 | * range is returned in failed_start in this case. | 699 | * existing range is returned in failed_start in this case. |
| 663 | * | 700 | * |
| 664 | * [start, end] is inclusive | 701 | * [start, end] is inclusive This takes the tree lock. |
| 665 | * This takes the tree lock. | ||
| 666 | */ | 702 | */ |
| 703 | |||
| 667 | static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 704 | static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 668 | int bits, int exclusive, u64 *failed_start, | 705 | int bits, int exclusive_bits, u64 *failed_start, |
| 706 | struct extent_state **cached_state, | ||
| 669 | gfp_t mask) | 707 | gfp_t mask) |
| 670 | { | 708 | { |
| 671 | struct extent_state *state; | 709 | struct extent_state *state; |
| 672 | struct extent_state *prealloc = NULL; | 710 | struct extent_state *prealloc = NULL; |
| 673 | struct rb_node *node; | 711 | struct rb_node *node; |
| 674 | int err = 0; | 712 | int err = 0; |
| 675 | int set; | ||
| 676 | u64 last_start; | 713 | u64 last_start; |
| 677 | u64 last_end; | 714 | u64 last_end; |
| 678 | again: | 715 | again: |
| @@ -683,6 +720,13 @@ again: | |||
| 683 | } | 720 | } |
| 684 | 721 | ||
| 685 | spin_lock(&tree->lock); | 722 | spin_lock(&tree->lock); |
| 723 | if (cached_state && *cached_state) { | ||
| 724 | state = *cached_state; | ||
| 725 | if (state->start == start && state->tree) { | ||
| 726 | node = &state->rb_node; | ||
| 727 | goto hit_next; | ||
| 728 | } | ||
| 729 | } | ||
| 686 | /* | 730 | /* |
| 687 | * this search will find all the extents that end after | 731 | * this search will find all the extents that end after |
| 688 | * our range starts. | 732 | * our range starts. |
| @@ -694,8 +738,8 @@ again: | |||
| 694 | BUG_ON(err == -EEXIST); | 738 | BUG_ON(err == -EEXIST); |
| 695 | goto out; | 739 | goto out; |
| 696 | } | 740 | } |
| 697 | |||
| 698 | state = rb_entry(node, struct extent_state, rb_node); | 741 | state = rb_entry(node, struct extent_state, rb_node); |
| 742 | hit_next: | ||
| 699 | last_start = state->start; | 743 | last_start = state->start; |
| 700 | last_end = state->end; | 744 | last_end = state->end; |
| 701 | 745 | ||
| @@ -706,17 +750,28 @@ again: | |||
| 706 | * Just lock what we found and keep going | 750 | * Just lock what we found and keep going |
| 707 | */ | 751 | */ |
| 708 | if (state->start == start && state->end <= end) { | 752 | if (state->start == start && state->end <= end) { |
| 709 | set = state->state & bits; | 753 | struct rb_node *next_node; |
| 710 | if (set && exclusive) { | 754 | if (state->state & exclusive_bits) { |
| 711 | *failed_start = state->start; | 755 | *failed_start = state->start; |
| 712 | err = -EEXIST; | 756 | err = -EEXIST; |
| 713 | goto out; | 757 | goto out; |
| 714 | } | 758 | } |
| 715 | set_state_bits(tree, state, bits); | 759 | set_state_bits(tree, state, bits); |
| 760 | cache_state(state, cached_state); | ||
| 716 | merge_state(tree, state); | 761 | merge_state(tree, state); |
| 717 | if (last_end == (u64)-1) | 762 | if (last_end == (u64)-1) |
| 718 | goto out; | 763 | goto out; |
| 764 | |||
| 719 | start = last_end + 1; | 765 | start = last_end + 1; |
| 766 | if (start < end && prealloc && !need_resched()) { | ||
| 767 | next_node = rb_next(node); | ||
| 768 | if (next_node) { | ||
| 769 | state = rb_entry(next_node, struct extent_state, | ||
| 770 | rb_node); | ||
| 771 | if (state->start == start) | ||
| 772 | goto hit_next; | ||
| 773 | } | ||
| 774 | } | ||
| 720 | goto search_again; | 775 | goto search_again; |
| 721 | } | 776 | } |
| 722 | 777 | ||
| @@ -737,8 +792,7 @@ again: | |||
| 737 | * desired bit on it. | 792 | * desired bit on it. |
| 738 | */ | 793 | */ |
| 739 | if (state->start < start) { | 794 | if (state->start < start) { |
| 740 | set = state->state & bits; | 795 | if (state->state & exclusive_bits) { |
| 741 | if (exclusive && set) { | ||
| 742 | *failed_start = start; | 796 | *failed_start = start; |
| 743 | err = -EEXIST; | 797 | err = -EEXIST; |
| 744 | goto out; | 798 | goto out; |
| @@ -750,6 +804,7 @@ again: | |||
| 750 | goto out; | 804 | goto out; |
| 751 | if (state->end <= end) { | 805 | if (state->end <= end) { |
| 752 | set_state_bits(tree, state, bits); | 806 | set_state_bits(tree, state, bits); |
| 807 | cache_state(state, cached_state); | ||
| 753 | merge_state(tree, state); | 808 | merge_state(tree, state); |
| 754 | if (last_end == (u64)-1) | 809 | if (last_end == (u64)-1) |
| 755 | goto out; | 810 | goto out; |
| @@ -774,6 +829,7 @@ again: | |||
| 774 | this_end = last_start - 1; | 829 | this_end = last_start - 1; |
| 775 | err = insert_state(tree, prealloc, start, this_end, | 830 | err = insert_state(tree, prealloc, start, this_end, |
| 776 | bits); | 831 | bits); |
| 832 | cache_state(prealloc, cached_state); | ||
| 777 | prealloc = NULL; | 833 | prealloc = NULL; |
| 778 | BUG_ON(err == -EEXIST); | 834 | BUG_ON(err == -EEXIST); |
| 779 | if (err) | 835 | if (err) |
| @@ -788,8 +844,7 @@ again: | |||
| 788 | * on the first half | 844 | * on the first half |
| 789 | */ | 845 | */ |
| 790 | if (state->start <= end && state->end > end) { | 846 | if (state->start <= end && state->end > end) { |
| 791 | set = state->state & bits; | 847 | if (state->state & exclusive_bits) { |
| 792 | if (exclusive && set) { | ||
| 793 | *failed_start = start; | 848 | *failed_start = start; |
| 794 | err = -EEXIST; | 849 | err = -EEXIST; |
| 795 | goto out; | 850 | goto out; |
| @@ -798,6 +853,7 @@ again: | |||
| 798 | BUG_ON(err == -EEXIST); | 853 | BUG_ON(err == -EEXIST); |
| 799 | 854 | ||
| 800 | set_state_bits(tree, prealloc, bits); | 855 | set_state_bits(tree, prealloc, bits); |
| 856 | cache_state(prealloc, cached_state); | ||
| 801 | merge_state(tree, prealloc); | 857 | merge_state(tree, prealloc); |
| 802 | prealloc = NULL; | 858 | prealloc = NULL; |
| 803 | goto out; | 859 | goto out; |
| @@ -826,86 +882,64 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 826 | gfp_t mask) | 882 | gfp_t mask) |
| 827 | { | 883 | { |
| 828 | return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, | 884 | return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, |
| 829 | mask); | 885 | NULL, mask); |
| 830 | } | ||
| 831 | |||
| 832 | int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 833 | gfp_t mask) | ||
| 834 | { | ||
| 835 | return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask); | ||
| 836 | } | 886 | } |
| 837 | 887 | ||
| 838 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 888 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| 839 | int bits, gfp_t mask) | 889 | int bits, gfp_t mask) |
| 840 | { | 890 | { |
| 841 | return set_extent_bit(tree, start, end, bits, 0, NULL, | 891 | return set_extent_bit(tree, start, end, bits, 0, NULL, |
| 842 | mask); | 892 | NULL, mask); |
| 843 | } | 893 | } |
| 844 | 894 | ||
| 845 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 895 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| 846 | int bits, gfp_t mask) | 896 | int bits, gfp_t mask) |
| 847 | { | 897 | { |
| 848 | return clear_extent_bit(tree, start, end, bits, 0, 0, mask); | 898 | return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask); |
| 849 | } | 899 | } |
| 850 | 900 | ||
| 851 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, | 901 | int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, |
| 852 | gfp_t mask) | 902 | gfp_t mask) |
| 853 | { | 903 | { |
| 854 | return set_extent_bit(tree, start, end, | 904 | return set_extent_bit(tree, start, end, |
| 855 | EXTENT_DELALLOC | EXTENT_DIRTY, | 905 | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE, |
| 856 | 0, NULL, mask); | 906 | 0, NULL, NULL, mask); |
| 857 | } | 907 | } |
| 858 | 908 | ||
| 859 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | 909 | int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, |
| 860 | gfp_t mask) | 910 | gfp_t mask) |
| 861 | { | 911 | { |
| 862 | return clear_extent_bit(tree, start, end, | 912 | return clear_extent_bit(tree, start, end, |
| 863 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); | 913 | EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, |
| 864 | } | 914 | NULL, mask); |
| 865 | |||
| 866 | int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 867 | gfp_t mask) | ||
| 868 | { | ||
| 869 | return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask); | ||
| 870 | } | 915 | } |
| 871 | 916 | ||
| 872 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 917 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
| 873 | gfp_t mask) | 918 | gfp_t mask) |
| 874 | { | 919 | { |
| 875 | return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, | 920 | return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, |
| 876 | mask); | 921 | NULL, mask); |
| 877 | } | 922 | } |
| 878 | 923 | ||
| 879 | static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 924 | static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
| 880 | gfp_t mask) | 925 | gfp_t mask) |
| 881 | { | 926 | { |
| 882 | return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); | 927 | return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, |
| 928 | NULL, mask); | ||
| 883 | } | 929 | } |
| 884 | 930 | ||
| 885 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 931 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
| 886 | gfp_t mask) | 932 | gfp_t mask) |
| 887 | { | 933 | { |
| 888 | return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, | 934 | return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, |
| 889 | mask); | 935 | NULL, mask); |
| 890 | } | 936 | } |
| 891 | 937 | ||
| 892 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, | 938 | static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, |
| 893 | u64 end, gfp_t mask) | 939 | u64 end, gfp_t mask) |
| 894 | { | 940 | { |
| 895 | return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); | 941 | return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, |
| 896 | } | 942 | NULL, mask); |
| 897 | |||
| 898 | static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 899 | gfp_t mask) | ||
| 900 | { | ||
| 901 | return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, | ||
| 902 | 0, NULL, mask); | ||
| 903 | } | ||
| 904 | |||
| 905 | static int clear_extent_writeback(struct extent_io_tree *tree, u64 start, | ||
| 906 | u64 end, gfp_t mask) | ||
| 907 | { | ||
| 908 | return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); | ||
| 909 | } | 943 | } |
| 910 | 944 | ||
| 911 | int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) | 945 | int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) |
| @@ -917,13 +951,15 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) | |||
| 917 | * either insert or lock state struct between start and end use mask to tell | 951 | * either insert or lock state struct between start and end use mask to tell |
| 918 | * us if waiting is desired. | 952 | * us if waiting is desired. |
| 919 | */ | 953 | */ |
| 920 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) | 954 | int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| 955 | int bits, struct extent_state **cached_state, gfp_t mask) | ||
| 921 | { | 956 | { |
| 922 | int err; | 957 | int err; |
| 923 | u64 failed_start; | 958 | u64 failed_start; |
| 924 | while (1) { | 959 | while (1) { |
| 925 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, | 960 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits, |
| 926 | &failed_start, mask); | 961 | EXTENT_LOCKED, &failed_start, |
| 962 | cached_state, mask); | ||
| 927 | if (err == -EEXIST && (mask & __GFP_WAIT)) { | 963 | if (err == -EEXIST && (mask & __GFP_WAIT)) { |
| 928 | wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); | 964 | wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); |
| 929 | start = failed_start; | 965 | start = failed_start; |
| @@ -935,27 +971,40 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) | |||
| 935 | return err; | 971 | return err; |
| 936 | } | 972 | } |
| 937 | 973 | ||
| 974 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) | ||
| 975 | { | ||
| 976 | return lock_extent_bits(tree, start, end, 0, NULL, mask); | ||
| 977 | } | ||
| 978 | |||
| 938 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 979 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, |
| 939 | gfp_t mask) | 980 | gfp_t mask) |
| 940 | { | 981 | { |
| 941 | int err; | 982 | int err; |
| 942 | u64 failed_start; | 983 | u64 failed_start; |
| 943 | 984 | ||
| 944 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, | 985 | err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED, |
| 945 | &failed_start, mask); | 986 | &failed_start, NULL, mask); |
| 946 | if (err == -EEXIST) { | 987 | if (err == -EEXIST) { |
| 947 | if (failed_start > start) | 988 | if (failed_start > start) |
| 948 | clear_extent_bit(tree, start, failed_start - 1, | 989 | clear_extent_bit(tree, start, failed_start - 1, |
| 949 | EXTENT_LOCKED, 1, 0, mask); | 990 | EXTENT_LOCKED, 1, 0, NULL, mask); |
| 950 | return 0; | 991 | return 0; |
| 951 | } | 992 | } |
| 952 | return 1; | 993 | return 1; |
| 953 | } | 994 | } |
| 954 | 995 | ||
| 996 | int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 997 | struct extent_state **cached, gfp_t mask) | ||
| 998 | { | ||
| 999 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached, | ||
| 1000 | mask); | ||
| 1001 | } | ||
| 1002 | |||
| 955 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 1003 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, |
| 956 | gfp_t mask) | 1004 | gfp_t mask) |
| 957 | { | 1005 | { |
| 958 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); | 1006 | return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, |
| 1007 | mask); | ||
| 959 | } | 1008 | } |
| 960 | 1009 | ||
| 961 | /* | 1010 | /* |
| @@ -974,7 +1023,6 @@ int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end) | |||
| 974 | page_cache_release(page); | 1023 | page_cache_release(page); |
| 975 | index++; | 1024 | index++; |
| 976 | } | 1025 | } |
| 977 | set_extent_dirty(tree, start, end, GFP_NOFS); | ||
| 978 | return 0; | 1026 | return 0; |
| 979 | } | 1027 | } |
| 980 | 1028 | ||
| @@ -994,7 +1042,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) | |||
| 994 | page_cache_release(page); | 1042 | page_cache_release(page); |
| 995 | index++; | 1043 | index++; |
| 996 | } | 1044 | } |
| 997 | set_extent_writeback(tree, start, end, GFP_NOFS); | ||
| 998 | return 0; | 1045 | return 0; |
| 999 | } | 1046 | } |
| 1000 | 1047 | ||
| @@ -1232,6 +1279,7 @@ static noinline u64 find_lock_delalloc_range(struct inode *inode, | |||
| 1232 | u64 delalloc_start; | 1279 | u64 delalloc_start; |
| 1233 | u64 delalloc_end; | 1280 | u64 delalloc_end; |
| 1234 | u64 found; | 1281 | u64 found; |
| 1282 | struct extent_state *cached_state = NULL; | ||
| 1235 | int ret; | 1283 | int ret; |
| 1236 | int loops = 0; | 1284 | int loops = 0; |
| 1237 | 1285 | ||
| @@ -1269,6 +1317,7 @@ again: | |||
| 1269 | /* some of the pages are gone, lets avoid looping by | 1317 | /* some of the pages are gone, lets avoid looping by |
| 1270 | * shortening the size of the delalloc range we're searching | 1318 | * shortening the size of the delalloc range we're searching |
| 1271 | */ | 1319 | */ |
| 1320 | free_extent_state(cached_state); | ||
| 1272 | if (!loops) { | 1321 | if (!loops) { |
| 1273 | unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); | 1322 | unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); |
| 1274 | max_bytes = PAGE_CACHE_SIZE - offset; | 1323 | max_bytes = PAGE_CACHE_SIZE - offset; |
| @@ -1282,18 +1331,21 @@ again: | |||
| 1282 | BUG_ON(ret); | 1331 | BUG_ON(ret); |
| 1283 | 1332 | ||
| 1284 | /* step three, lock the state bits for the whole range */ | 1333 | /* step three, lock the state bits for the whole range */ |
| 1285 | lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); | 1334 | lock_extent_bits(tree, delalloc_start, delalloc_end, |
| 1335 | 0, &cached_state, GFP_NOFS); | ||
| 1286 | 1336 | ||
| 1287 | /* then test to make sure it is all still delalloc */ | 1337 | /* then test to make sure it is all still delalloc */ |
| 1288 | ret = test_range_bit(tree, delalloc_start, delalloc_end, | 1338 | ret = test_range_bit(tree, delalloc_start, delalloc_end, |
| 1289 | EXTENT_DELALLOC, 1); | 1339 | EXTENT_DELALLOC, 1, cached_state); |
| 1290 | if (!ret) { | 1340 | if (!ret) { |
| 1291 | unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); | 1341 | unlock_extent_cached(tree, delalloc_start, delalloc_end, |
| 1342 | &cached_state, GFP_NOFS); | ||
| 1292 | __unlock_for_delalloc(inode, locked_page, | 1343 | __unlock_for_delalloc(inode, locked_page, |
| 1293 | delalloc_start, delalloc_end); | 1344 | delalloc_start, delalloc_end); |
| 1294 | cond_resched(); | 1345 | cond_resched(); |
| 1295 | goto again; | 1346 | goto again; |
| 1296 | } | 1347 | } |
| 1348 | free_extent_state(cached_state); | ||
| 1297 | *start = delalloc_start; | 1349 | *start = delalloc_start; |
| 1298 | *end = delalloc_end; | 1350 | *end = delalloc_end; |
| 1299 | out_failed: | 1351 | out_failed: |
| @@ -1307,7 +1359,8 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
| 1307 | int clear_unlock, | 1359 | int clear_unlock, |
| 1308 | int clear_delalloc, int clear_dirty, | 1360 | int clear_delalloc, int clear_dirty, |
| 1309 | int set_writeback, | 1361 | int set_writeback, |
| 1310 | int end_writeback) | 1362 | int end_writeback, |
| 1363 | int set_private2) | ||
| 1311 | { | 1364 | { |
| 1312 | int ret; | 1365 | int ret; |
| 1313 | struct page *pages[16]; | 1366 | struct page *pages[16]; |
| @@ -1325,8 +1378,9 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
| 1325 | if (clear_delalloc) | 1378 | if (clear_delalloc) |
| 1326 | clear_bits |= EXTENT_DELALLOC; | 1379 | clear_bits |= EXTENT_DELALLOC; |
| 1327 | 1380 | ||
| 1328 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); | 1381 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); |
| 1329 | if (!(unlock_pages || clear_dirty || set_writeback || end_writeback)) | 1382 | if (!(unlock_pages || clear_dirty || set_writeback || end_writeback || |
| 1383 | set_private2)) | ||
| 1330 | return 0; | 1384 | return 0; |
| 1331 | 1385 | ||
| 1332 | while (nr_pages > 0) { | 1386 | while (nr_pages > 0) { |
| @@ -1334,6 +1388,10 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
| 1334 | min_t(unsigned long, | 1388 | min_t(unsigned long, |
| 1335 | nr_pages, ARRAY_SIZE(pages)), pages); | 1389 | nr_pages, ARRAY_SIZE(pages)), pages); |
| 1336 | for (i = 0; i < ret; i++) { | 1390 | for (i = 0; i < ret; i++) { |
| 1391 | |||
| 1392 | if (set_private2) | ||
| 1393 | SetPagePrivate2(pages[i]); | ||
| 1394 | |||
| 1337 | if (pages[i] == locked_page) { | 1395 | if (pages[i] == locked_page) { |
| 1338 | page_cache_release(pages[i]); | 1396 | page_cache_release(pages[i]); |
| 1339 | continue; | 1397 | continue; |
| @@ -1476,14 +1534,17 @@ out: | |||
| 1476 | * range is found set. | 1534 | * range is found set. |
| 1477 | */ | 1535 | */ |
| 1478 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 1536 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 1479 | int bits, int filled) | 1537 | int bits, int filled, struct extent_state *cached) |
| 1480 | { | 1538 | { |
| 1481 | struct extent_state *state = NULL; | 1539 | struct extent_state *state = NULL; |
| 1482 | struct rb_node *node; | 1540 | struct rb_node *node; |
| 1483 | int bitset = 0; | 1541 | int bitset = 0; |
| 1484 | 1542 | ||
| 1485 | spin_lock(&tree->lock); | 1543 | spin_lock(&tree->lock); |
| 1486 | node = tree_search(tree, start); | 1544 | if (cached && cached->tree && cached->start == start) |
| 1545 | node = &cached->rb_node; | ||
| 1546 | else | ||
| 1547 | node = tree_search(tree, start); | ||
| 1487 | while (node && start <= end) { | 1548 | while (node && start <= end) { |
| 1488 | state = rb_entry(node, struct extent_state, rb_node); | 1549 | state = rb_entry(node, struct extent_state, rb_node); |
| 1489 | 1550 | ||
| @@ -1526,7 +1587,7 @@ static int check_page_uptodate(struct extent_io_tree *tree, | |||
| 1526 | { | 1587 | { |
| 1527 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 1588 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; |
| 1528 | u64 end = start + PAGE_CACHE_SIZE - 1; | 1589 | u64 end = start + PAGE_CACHE_SIZE - 1; |
| 1529 | if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) | 1590 | if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL)) |
| 1530 | SetPageUptodate(page); | 1591 | SetPageUptodate(page); |
| 1531 | return 0; | 1592 | return 0; |
| 1532 | } | 1593 | } |
| @@ -1540,7 +1601,7 @@ static int check_page_locked(struct extent_io_tree *tree, | |||
| 1540 | { | 1601 | { |
| 1541 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 1602 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; |
| 1542 | u64 end = start + PAGE_CACHE_SIZE - 1; | 1603 | u64 end = start + PAGE_CACHE_SIZE - 1; |
| 1543 | if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) | 1604 | if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) |
| 1544 | unlock_page(page); | 1605 | unlock_page(page); |
| 1545 | return 0; | 1606 | return 0; |
| 1546 | } | 1607 | } |
| @@ -1552,10 +1613,7 @@ static int check_page_locked(struct extent_io_tree *tree, | |||
| 1552 | static int check_page_writeback(struct extent_io_tree *tree, | 1613 | static int check_page_writeback(struct extent_io_tree *tree, |
| 1553 | struct page *page) | 1614 | struct page *page) |
| 1554 | { | 1615 | { |
| 1555 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 1616 | end_page_writeback(page); |
| 1556 | u64 end = start + PAGE_CACHE_SIZE - 1; | ||
| 1557 | if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) | ||
| 1558 | end_page_writeback(page); | ||
| 1559 | return 0; | 1617 | return 0; |
| 1560 | } | 1618 | } |
| 1561 | 1619 | ||
| @@ -1613,13 +1671,11 @@ static void end_bio_extent_writepage(struct bio *bio, int err) | |||
| 1613 | } | 1671 | } |
| 1614 | 1672 | ||
| 1615 | if (!uptodate) { | 1673 | if (!uptodate) { |
| 1616 | clear_extent_uptodate(tree, start, end, GFP_ATOMIC); | 1674 | clear_extent_uptodate(tree, start, end, GFP_NOFS); |
| 1617 | ClearPageUptodate(page); | 1675 | ClearPageUptodate(page); |
| 1618 | SetPageError(page); | 1676 | SetPageError(page); |
| 1619 | } | 1677 | } |
| 1620 | 1678 | ||
| 1621 | clear_extent_writeback(tree, start, end, GFP_ATOMIC); | ||
| 1622 | |||
| 1623 | if (whole_page) | 1679 | if (whole_page) |
| 1624 | end_page_writeback(page); | 1680 | end_page_writeback(page); |
| 1625 | else | 1681 | else |
| @@ -1983,7 +2039,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
| 1983 | continue; | 2039 | continue; |
| 1984 | } | 2040 | } |
| 1985 | /* the get_extent function already copied into the page */ | 2041 | /* the get_extent function already copied into the page */ |
| 1986 | if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { | 2042 | if (test_range_bit(tree, cur, cur_end, |
| 2043 | EXTENT_UPTODATE, 1, NULL)) { | ||
| 1987 | check_page_uptodate(tree, page); | 2044 | check_page_uptodate(tree, page); |
| 1988 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); | 2045 | unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); |
| 1989 | cur = cur + iosize; | 2046 | cur = cur + iosize; |
| @@ -2078,6 +2135,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2078 | u64 iosize; | 2135 | u64 iosize; |
| 2079 | u64 unlock_start; | 2136 | u64 unlock_start; |
| 2080 | sector_t sector; | 2137 | sector_t sector; |
| 2138 | struct extent_state *cached_state = NULL; | ||
| 2081 | struct extent_map *em; | 2139 | struct extent_map *em; |
| 2082 | struct block_device *bdev; | 2140 | struct block_device *bdev; |
| 2083 | int ret; | 2141 | int ret; |
| @@ -2124,6 +2182,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2124 | delalloc_end = 0; | 2182 | delalloc_end = 0; |
| 2125 | page_started = 0; | 2183 | page_started = 0; |
| 2126 | if (!epd->extent_locked) { | 2184 | if (!epd->extent_locked) { |
| 2185 | u64 delalloc_to_write; | ||
| 2127 | /* | 2186 | /* |
| 2128 | * make sure the wbc mapping index is at least updated | 2187 | * make sure the wbc mapping index is at least updated |
| 2129 | * to this page. | 2188 | * to this page. |
| @@ -2143,6 +2202,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2143 | tree->ops->fill_delalloc(inode, page, delalloc_start, | 2202 | tree->ops->fill_delalloc(inode, page, delalloc_start, |
| 2144 | delalloc_end, &page_started, | 2203 | delalloc_end, &page_started, |
| 2145 | &nr_written); | 2204 | &nr_written); |
| 2205 | delalloc_to_write = (delalloc_end - | ||
| 2206 | max_t(u64, page_offset(page), | ||
| 2207 | delalloc_start) + 1) >> | ||
| 2208 | PAGE_CACHE_SHIFT; | ||
| 2209 | if (wbc->nr_to_write < delalloc_to_write) { | ||
| 2210 | wbc->nr_to_write = min_t(long, 8192, | ||
| 2211 | delalloc_to_write); | ||
| 2212 | } | ||
| 2146 | delalloc_start = delalloc_end + 1; | 2213 | delalloc_start = delalloc_end + 1; |
| 2147 | } | 2214 | } |
| 2148 | 2215 | ||
| @@ -2160,15 +2227,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2160 | goto done_unlocked; | 2227 | goto done_unlocked; |
| 2161 | } | 2228 | } |
| 2162 | } | 2229 | } |
| 2163 | lock_extent(tree, start, page_end, GFP_NOFS); | ||
| 2164 | |||
| 2165 | unlock_start = start; | ||
| 2166 | |||
| 2167 | if (tree->ops && tree->ops->writepage_start_hook) { | 2230 | if (tree->ops && tree->ops->writepage_start_hook) { |
| 2168 | ret = tree->ops->writepage_start_hook(page, start, | 2231 | ret = tree->ops->writepage_start_hook(page, start, |
| 2169 | page_end); | 2232 | page_end); |
| 2170 | if (ret == -EAGAIN) { | 2233 | if (ret == -EAGAIN) { |
| 2171 | unlock_extent(tree, start, page_end, GFP_NOFS); | ||
| 2172 | redirty_page_for_writepage(wbc, page); | 2234 | redirty_page_for_writepage(wbc, page); |
| 2173 | update_nr_written(page, wbc, nr_written); | 2235 | update_nr_written(page, wbc, nr_written); |
| 2174 | unlock_page(page); | 2236 | unlock_page(page); |
| @@ -2184,12 +2246,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2184 | update_nr_written(page, wbc, nr_written + 1); | 2246 | update_nr_written(page, wbc, nr_written + 1); |
| 2185 | 2247 | ||
| 2186 | end = page_end; | 2248 | end = page_end; |
| 2187 | if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) | ||
| 2188 | printk(KERN_ERR "btrfs delalloc bits after lock_extent\n"); | ||
| 2189 | |||
| 2190 | if (last_byte <= start) { | 2249 | if (last_byte <= start) { |
| 2191 | clear_extent_dirty(tree, start, page_end, GFP_NOFS); | ||
| 2192 | unlock_extent(tree, start, page_end, GFP_NOFS); | ||
| 2193 | if (tree->ops && tree->ops->writepage_end_io_hook) | 2250 | if (tree->ops && tree->ops->writepage_end_io_hook) |
| 2194 | tree->ops->writepage_end_io_hook(page, start, | 2251 | tree->ops->writepage_end_io_hook(page, start, |
| 2195 | page_end, NULL, 1); | 2252 | page_end, NULL, 1); |
| @@ -2197,13 +2254,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2197 | goto done; | 2254 | goto done; |
| 2198 | } | 2255 | } |
| 2199 | 2256 | ||
| 2200 | set_extent_uptodate(tree, start, page_end, GFP_NOFS); | ||
| 2201 | blocksize = inode->i_sb->s_blocksize; | 2257 | blocksize = inode->i_sb->s_blocksize; |
| 2202 | 2258 | ||
| 2203 | while (cur <= end) { | 2259 | while (cur <= end) { |
| 2204 | if (cur >= last_byte) { | 2260 | if (cur >= last_byte) { |
| 2205 | clear_extent_dirty(tree, cur, page_end, GFP_NOFS); | ||
| 2206 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); | ||
| 2207 | if (tree->ops && tree->ops->writepage_end_io_hook) | 2261 | if (tree->ops && tree->ops->writepage_end_io_hook) |
| 2208 | tree->ops->writepage_end_io_hook(page, cur, | 2262 | tree->ops->writepage_end_io_hook(page, cur, |
| 2209 | page_end, NULL, 1); | 2263 | page_end, NULL, 1); |
| @@ -2235,12 +2289,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2235 | */ | 2289 | */ |
| 2236 | if (compressed || block_start == EXTENT_MAP_HOLE || | 2290 | if (compressed || block_start == EXTENT_MAP_HOLE || |
| 2237 | block_start == EXTENT_MAP_INLINE) { | 2291 | block_start == EXTENT_MAP_INLINE) { |
| 2238 | clear_extent_dirty(tree, cur, | ||
| 2239 | cur + iosize - 1, GFP_NOFS); | ||
| 2240 | |||
| 2241 | unlock_extent(tree, unlock_start, cur + iosize - 1, | ||
| 2242 | GFP_NOFS); | ||
| 2243 | |||
| 2244 | /* | 2292 | /* |
| 2245 | * end_io notification does not happen here for | 2293 | * end_io notification does not happen here for |
| 2246 | * compressed extents | 2294 | * compressed extents |
| @@ -2265,13 +2313,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2265 | } | 2313 | } |
| 2266 | /* leave this out until we have a page_mkwrite call */ | 2314 | /* leave this out until we have a page_mkwrite call */ |
| 2267 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, | 2315 | if (0 && !test_range_bit(tree, cur, cur + iosize - 1, |
| 2268 | EXTENT_DIRTY, 0)) { | 2316 | EXTENT_DIRTY, 0, NULL)) { |
| 2269 | cur = cur + iosize; | 2317 | cur = cur + iosize; |
| 2270 | pg_offset += iosize; | 2318 | pg_offset += iosize; |
| 2271 | continue; | 2319 | continue; |
| 2272 | } | 2320 | } |
| 2273 | 2321 | ||
| 2274 | clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); | ||
| 2275 | if (tree->ops && tree->ops->writepage_io_hook) { | 2322 | if (tree->ops && tree->ops->writepage_io_hook) { |
| 2276 | ret = tree->ops->writepage_io_hook(page, cur, | 2323 | ret = tree->ops->writepage_io_hook(page, cur, |
| 2277 | cur + iosize - 1); | 2324 | cur + iosize - 1); |
| @@ -2309,12 +2356,12 @@ done: | |||
| 2309 | set_page_writeback(page); | 2356 | set_page_writeback(page); |
| 2310 | end_page_writeback(page); | 2357 | end_page_writeback(page); |
| 2311 | } | 2358 | } |
| 2312 | if (unlock_start <= page_end) | ||
| 2313 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); | ||
| 2314 | unlock_page(page); | 2359 | unlock_page(page); |
| 2315 | 2360 | ||
| 2316 | done_unlocked: | 2361 | done_unlocked: |
| 2317 | 2362 | ||
| 2363 | /* drop our reference on any cached states */ | ||
| 2364 | free_extent_state(cached_state); | ||
| 2318 | return 0; | 2365 | return 0; |
| 2319 | } | 2366 | } |
| 2320 | 2367 | ||
| @@ -2339,7 +2386,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
| 2339 | writepage_t writepage, void *data, | 2386 | writepage_t writepage, void *data, |
| 2340 | void (*flush_fn)(void *)) | 2387 | void (*flush_fn)(void *)) |
| 2341 | { | 2388 | { |
| 2342 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
| 2343 | int ret = 0; | 2389 | int ret = 0; |
| 2344 | int done = 0; | 2390 | int done = 0; |
| 2345 | struct pagevec pvec; | 2391 | struct pagevec pvec; |
| @@ -2414,10 +2460,6 @@ retry: | |||
| 2414 | } | 2460 | } |
| 2415 | if (ret || wbc->nr_to_write <= 0) | 2461 | if (ret || wbc->nr_to_write <= 0) |
| 2416 | done = 1; | 2462 | done = 1; |
| 2417 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
| 2418 | wbc->encountered_congestion = 1; | ||
| 2419 | done = 1; | ||
| 2420 | } | ||
| 2421 | } | 2463 | } |
| 2422 | pagevec_release(&pvec); | 2464 | pagevec_release(&pvec); |
| 2423 | cond_resched(); | 2465 | cond_resched(); |
| @@ -2604,10 +2646,10 @@ int extent_invalidatepage(struct extent_io_tree *tree, | |||
| 2604 | return 0; | 2646 | return 0; |
| 2605 | 2647 | ||
| 2606 | lock_extent(tree, start, end, GFP_NOFS); | 2648 | lock_extent(tree, start, end, GFP_NOFS); |
| 2607 | wait_on_extent_writeback(tree, start, end); | 2649 | wait_on_page_writeback(page); |
| 2608 | clear_extent_bit(tree, start, end, | 2650 | clear_extent_bit(tree, start, end, |
| 2609 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, | 2651 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, |
| 2610 | 1, 1, GFP_NOFS); | 2652 | 1, 1, NULL, GFP_NOFS); |
| 2611 | return 0; | 2653 | return 0; |
| 2612 | } | 2654 | } |
| 2613 | 2655 | ||
| @@ -2687,7 +2729,7 @@ int extent_prepare_write(struct extent_io_tree *tree, | |||
| 2687 | !isnew && !PageUptodate(page) && | 2729 | !isnew && !PageUptodate(page) && |
| 2688 | (block_off_end > to || block_off_start < from) && | 2730 | (block_off_end > to || block_off_start < from) && |
| 2689 | !test_range_bit(tree, block_start, cur_end, | 2731 | !test_range_bit(tree, block_start, cur_end, |
| 2690 | EXTENT_UPTODATE, 1)) { | 2732 | EXTENT_UPTODATE, 1, NULL)) { |
| 2691 | u64 sector; | 2733 | u64 sector; |
| 2692 | u64 extent_offset = block_start - em->start; | 2734 | u64 extent_offset = block_start - em->start; |
| 2693 | size_t iosize; | 2735 | size_t iosize; |
| @@ -2701,7 +2743,7 @@ int extent_prepare_write(struct extent_io_tree *tree, | |||
| 2701 | */ | 2743 | */ |
| 2702 | set_extent_bit(tree, block_start, | 2744 | set_extent_bit(tree, block_start, |
| 2703 | block_start + iosize - 1, | 2745 | block_start + iosize - 1, |
| 2704 | EXTENT_LOCKED, 0, NULL, GFP_NOFS); | 2746 | EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS); |
| 2705 | ret = submit_extent_page(READ, tree, page, | 2747 | ret = submit_extent_page(READ, tree, page, |
| 2706 | sector, iosize, page_offset, em->bdev, | 2748 | sector, iosize, page_offset, em->bdev, |
| 2707 | NULL, 1, | 2749 | NULL, 1, |
| @@ -2742,13 +2784,13 @@ int try_release_extent_state(struct extent_map_tree *map, | |||
| 2742 | int ret = 1; | 2784 | int ret = 1; |
| 2743 | 2785 | ||
| 2744 | if (test_range_bit(tree, start, end, | 2786 | if (test_range_bit(tree, start, end, |
| 2745 | EXTENT_IOBITS | EXTENT_ORDERED, 0)) | 2787 | EXTENT_IOBITS, 0, NULL)) |
| 2746 | ret = 0; | 2788 | ret = 0; |
| 2747 | else { | 2789 | else { |
| 2748 | if ((mask & GFP_NOFS) == GFP_NOFS) | 2790 | if ((mask & GFP_NOFS) == GFP_NOFS) |
| 2749 | mask = GFP_NOFS; | 2791 | mask = GFP_NOFS; |
| 2750 | clear_extent_bit(tree, start, end, EXTENT_UPTODATE, | 2792 | clear_extent_bit(tree, start, end, EXTENT_UPTODATE, |
| 2751 | 1, 1, mask); | 2793 | 1, 1, NULL, mask); |
| 2752 | } | 2794 | } |
| 2753 | return ret; | 2795 | return ret; |
| 2754 | } | 2796 | } |
| @@ -2771,29 +2813,28 @@ int try_release_extent_mapping(struct extent_map_tree *map, | |||
| 2771 | u64 len; | 2813 | u64 len; |
| 2772 | while (start <= end) { | 2814 | while (start <= end) { |
| 2773 | len = end - start + 1; | 2815 | len = end - start + 1; |
| 2774 | spin_lock(&map->lock); | 2816 | write_lock(&map->lock); |
| 2775 | em = lookup_extent_mapping(map, start, len); | 2817 | em = lookup_extent_mapping(map, start, len); |
| 2776 | if (!em || IS_ERR(em)) { | 2818 | if (!em || IS_ERR(em)) { |
| 2777 | spin_unlock(&map->lock); | 2819 | write_unlock(&map->lock); |
| 2778 | break; | 2820 | break; |
| 2779 | } | 2821 | } |
| 2780 | if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || | 2822 | if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || |
| 2781 | em->start != start) { | 2823 | em->start != start) { |
| 2782 | spin_unlock(&map->lock); | 2824 | write_unlock(&map->lock); |
| 2783 | free_extent_map(em); | 2825 | free_extent_map(em); |
| 2784 | break; | 2826 | break; |
| 2785 | } | 2827 | } |
| 2786 | if (!test_range_bit(tree, em->start, | 2828 | if (!test_range_bit(tree, em->start, |
| 2787 | extent_map_end(em) - 1, | 2829 | extent_map_end(em) - 1, |
| 2788 | EXTENT_LOCKED | EXTENT_WRITEBACK | | 2830 | EXTENT_LOCKED | EXTENT_WRITEBACK, |
| 2789 | EXTENT_ORDERED, | 2831 | 0, NULL)) { |
| 2790 | 0)) { | ||
| 2791 | remove_extent_mapping(map, em); | 2832 | remove_extent_mapping(map, em); |
| 2792 | /* once for the rb tree */ | 2833 | /* once for the rb tree */ |
| 2793 | free_extent_map(em); | 2834 | free_extent_map(em); |
| 2794 | } | 2835 | } |
| 2795 | start = extent_map_end(em); | 2836 | start = extent_map_end(em); |
| 2796 | spin_unlock(&map->lock); | 2837 | write_unlock(&map->lock); |
| 2797 | 2838 | ||
| 2798 | /* once for us */ | 2839 | /* once for us */ |
| 2799 | free_extent_map(em); | 2840 | free_extent_map(em); |
| @@ -3203,7 +3244,7 @@ int extent_range_uptodate(struct extent_io_tree *tree, | |||
| 3203 | int uptodate; | 3244 | int uptodate; |
| 3204 | unsigned long index; | 3245 | unsigned long index; |
| 3205 | 3246 | ||
| 3206 | ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1); | 3247 | ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL); |
| 3207 | if (ret) | 3248 | if (ret) |
| 3208 | return 1; | 3249 | return 1; |
| 3209 | while (start <= end) { | 3250 | while (start <= end) { |
| @@ -3233,7 +3274,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, | |||
| 3233 | return 1; | 3274 | return 1; |
| 3234 | 3275 | ||
| 3235 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3276 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
| 3236 | EXTENT_UPTODATE, 1); | 3277 | EXTENT_UPTODATE, 1, NULL); |
| 3237 | if (ret) | 3278 | if (ret) |
| 3238 | return ret; | 3279 | return ret; |
| 3239 | 3280 | ||
| @@ -3269,7 +3310,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
| 3269 | return 0; | 3310 | return 0; |
| 3270 | 3311 | ||
| 3271 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3312 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
| 3272 | EXTENT_UPTODATE, 1)) { | 3313 | EXTENT_UPTODATE, 1, NULL)) { |
| 3273 | return 0; | 3314 | return 0; |
| 3274 | } | 3315 | } |
| 3275 | 3316 | ||
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 5bc20abf3f3d..14ed16fd862d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
| @@ -13,10 +13,8 @@ | |||
| 13 | #define EXTENT_DEFRAG (1 << 6) | 13 | #define EXTENT_DEFRAG (1 << 6) |
| 14 | #define EXTENT_DEFRAG_DONE (1 << 7) | 14 | #define EXTENT_DEFRAG_DONE (1 << 7) |
| 15 | #define EXTENT_BUFFER_FILLED (1 << 8) | 15 | #define EXTENT_BUFFER_FILLED (1 << 8) |
| 16 | #define EXTENT_ORDERED (1 << 9) | 16 | #define EXTENT_BOUNDARY (1 << 9) |
| 17 | #define EXTENT_ORDERED_METADATA (1 << 10) | 17 | #define EXTENT_NODATASUM (1 << 10) |
| 18 | #define EXTENT_BOUNDARY (1 << 11) | ||
| 19 | #define EXTENT_NODATASUM (1 << 12) | ||
| 20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 18 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
| 21 | 19 | ||
| 22 | /* flags for bio submission */ | 20 | /* flags for bio submission */ |
| @@ -142,6 +140,8 @@ int try_release_extent_state(struct extent_map_tree *map, | |||
| 142 | struct extent_io_tree *tree, struct page *page, | 140 | struct extent_io_tree *tree, struct page *page, |
| 143 | gfp_t mask); | 141 | gfp_t mask); |
| 144 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); | 142 | int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); |
| 143 | int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 144 | int bits, struct extent_state **cached, gfp_t mask); | ||
| 145 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); | 145 | int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); |
| 146 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, | 146 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, |
| 147 | gfp_t mask); | 147 | gfp_t mask); |
| @@ -155,11 +155,12 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
| 155 | u64 max_bytes, unsigned long bits); | 155 | u64 max_bytes, unsigned long bits); |
| 156 | 156 | ||
| 157 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 157 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 158 | int bits, int filled); | 158 | int bits, int filled, struct extent_state *cached_state); |
| 159 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 159 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| 160 | int bits, gfp_t mask); | 160 | int bits, gfp_t mask); |
| 161 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 161 | int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 162 | int bits, int wake, int delete, gfp_t mask); | 162 | int bits, int wake, int delete, struct extent_state **cached, |
| 163 | gfp_t mask); | ||
| 163 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 164 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| 164 | int bits, gfp_t mask); | 165 | int bits, gfp_t mask); |
| 165 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 166 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
| @@ -282,5 +283,6 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
| 282 | int clear_unlock, | 283 | int clear_unlock, |
| 283 | int clear_delalloc, int clear_dirty, | 284 | int clear_delalloc, int clear_dirty, |
| 284 | int set_writeback, | 285 | int set_writeback, |
| 285 | int end_writeback); | 286 | int end_writeback, |
| 287 | int set_private2); | ||
| 286 | #endif | 288 | #endif |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 30c9365861e6..5bc7a0d325e7 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
| @@ -36,7 +36,7 @@ void extent_map_exit(void) | |||
| 36 | void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) | 36 | void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) |
| 37 | { | 37 | { |
| 38 | tree->map.rb_node = NULL; | 38 | tree->map.rb_node = NULL; |
| 39 | spin_lock_init(&tree->lock); | 39 | rwlock_init(&tree->lock); |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | /** | 42 | /** |
| @@ -198,6 +198,56 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) | |||
| 198 | return 0; | 198 | return 0; |
| 199 | } | 199 | } |
| 200 | 200 | ||
| 201 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len) | ||
| 202 | { | ||
| 203 | int ret = 0; | ||
| 204 | struct extent_map *merge = NULL; | ||
| 205 | struct rb_node *rb; | ||
| 206 | struct extent_map *em; | ||
| 207 | |||
| 208 | write_lock(&tree->lock); | ||
| 209 | em = lookup_extent_mapping(tree, start, len); | ||
| 210 | |||
| 211 | WARN_ON(em->start != start || !em); | ||
| 212 | |||
| 213 | if (!em) | ||
| 214 | goto out; | ||
| 215 | |||
| 216 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
| 217 | |||
| 218 | if (em->start != 0) { | ||
| 219 | rb = rb_prev(&em->rb_node); | ||
| 220 | if (rb) | ||
| 221 | merge = rb_entry(rb, struct extent_map, rb_node); | ||
| 222 | if (rb && mergable_maps(merge, em)) { | ||
| 223 | em->start = merge->start; | ||
| 224 | em->len += merge->len; | ||
| 225 | em->block_len += merge->block_len; | ||
| 226 | em->block_start = merge->block_start; | ||
| 227 | merge->in_tree = 0; | ||
| 228 | rb_erase(&merge->rb_node, &tree->map); | ||
| 229 | free_extent_map(merge); | ||
| 230 | } | ||
| 231 | } | ||
| 232 | |||
| 233 | rb = rb_next(&em->rb_node); | ||
| 234 | if (rb) | ||
| 235 | merge = rb_entry(rb, struct extent_map, rb_node); | ||
| 236 | if (rb && mergable_maps(em, merge)) { | ||
| 237 | em->len += merge->len; | ||
| 238 | em->block_len += merge->len; | ||
| 239 | rb_erase(&merge->rb_node, &tree->map); | ||
| 240 | merge->in_tree = 0; | ||
| 241 | free_extent_map(merge); | ||
| 242 | } | ||
| 243 | |||
| 244 | free_extent_map(em); | ||
| 245 | out: | ||
| 246 | write_unlock(&tree->lock); | ||
| 247 | return ret; | ||
| 248 | |||
| 249 | } | ||
| 250 | |||
| 201 | /** | 251 | /** |
| 202 | * add_extent_mapping - add new extent map to the extent tree | 252 | * add_extent_mapping - add new extent map to the extent tree |
| 203 | * @tree: tree to insert new map in | 253 | * @tree: tree to insert new map in |
| @@ -222,7 +272,6 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
| 222 | ret = -EEXIST; | 272 | ret = -EEXIST; |
| 223 | goto out; | 273 | goto out; |
| 224 | } | 274 | } |
| 225 | assert_spin_locked(&tree->lock); | ||
| 226 | rb = tree_insert(&tree->map, em->start, &em->rb_node); | 275 | rb = tree_insert(&tree->map, em->start, &em->rb_node); |
| 227 | if (rb) { | 276 | if (rb) { |
| 228 | ret = -EEXIST; | 277 | ret = -EEXIST; |
| @@ -285,7 +334,6 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | |||
| 285 | struct rb_node *next = NULL; | 334 | struct rb_node *next = NULL; |
| 286 | u64 end = range_end(start, len); | 335 | u64 end = range_end(start, len); |
| 287 | 336 | ||
| 288 | assert_spin_locked(&tree->lock); | ||
| 289 | rb_node = __tree_search(&tree->map, start, &prev, &next); | 337 | rb_node = __tree_search(&tree->map, start, &prev, &next); |
| 290 | if (!rb_node && prev) { | 338 | if (!rb_node && prev) { |
| 291 | em = rb_entry(prev, struct extent_map, rb_node); | 339 | em = rb_entry(prev, struct extent_map, rb_node); |
| @@ -331,7 +379,6 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) | |||
| 331 | int ret = 0; | 379 | int ret = 0; |
| 332 | 380 | ||
| 333 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); | 381 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); |
| 334 | assert_spin_locked(&tree->lock); | ||
| 335 | rb_erase(&em->rb_node, &tree->map); | 382 | rb_erase(&em->rb_node, &tree->map); |
| 336 | em->in_tree = 0; | 383 | em->in_tree = 0; |
| 337 | return ret; | 384 | return ret; |
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index fb6eeef06bb0..d3d442f4bbbd 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
| @@ -31,7 +31,7 @@ struct extent_map { | |||
| 31 | 31 | ||
| 32 | struct extent_map_tree { | 32 | struct extent_map_tree { |
| 33 | struct rb_root map; | 33 | struct rb_root map; |
| 34 | spinlock_t lock; | 34 | rwlock_t lock; |
| 35 | }; | 35 | }; |
| 36 | 36 | ||
| 37 | static inline u64 extent_map_end(struct extent_map *em) | 37 | static inline u64 extent_map_end(struct extent_map *em) |
| @@ -59,4 +59,5 @@ struct extent_map *alloc_extent_map(gfp_t mask); | |||
| 59 | void free_extent_map(struct extent_map *em); | 59 | void free_extent_map(struct extent_map *em); |
| 60 | int __init extent_map_init(void); | 60 | int __init extent_map_init(void); |
| 61 | void extent_map_exit(void); | 61 | void extent_map_exit(void); |
| 62 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len); | ||
| 62 | #endif | 63 | #endif |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4b833972273a..571ad3c13b47 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -112,8 +112,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
| 112 | int err = 0; | 112 | int err = 0; |
| 113 | int i; | 113 | int i; |
| 114 | struct inode *inode = fdentry(file)->d_inode; | 114 | struct inode *inode = fdentry(file)->d_inode; |
| 115 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 116 | u64 hint_byte; | ||
| 117 | u64 num_bytes; | 115 | u64 num_bytes; |
| 118 | u64 start_pos; | 116 | u64 start_pos; |
| 119 | u64 end_of_last_block; | 117 | u64 end_of_last_block; |
| @@ -125,22 +123,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
| 125 | root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 123 | root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
| 126 | 124 | ||
| 127 | end_of_last_block = start_pos + num_bytes - 1; | 125 | end_of_last_block = start_pos + num_bytes - 1; |
| 128 | |||
| 129 | lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); | ||
| 130 | trans = btrfs_join_transaction(root, 1); | ||
| 131 | if (!trans) { | ||
| 132 | err = -ENOMEM; | ||
| 133 | goto out_unlock; | ||
| 134 | } | ||
| 135 | btrfs_set_trans_block_group(trans, inode); | ||
| 136 | hint_byte = 0; | ||
| 137 | |||
| 138 | set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS); | ||
| 139 | |||
| 140 | /* check for reserved extents on each page, we don't want | ||
| 141 | * to reset the delalloc bit on things that already have | ||
| 142 | * extents reserved. | ||
| 143 | */ | ||
| 144 | btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); | 126 | btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); |
| 145 | for (i = 0; i < num_pages; i++) { | 127 | for (i = 0; i < num_pages; i++) { |
| 146 | struct page *p = pages[i]; | 128 | struct page *p = pages[i]; |
| @@ -155,9 +137,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
| 155 | * at this time. | 137 | * at this time. |
| 156 | */ | 138 | */ |
| 157 | } | 139 | } |
| 158 | err = btrfs_end_transaction(trans, root); | ||
| 159 | out_unlock: | ||
| 160 | unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); | ||
| 161 | return err; | 140 | return err; |
| 162 | } | 141 | } |
| 163 | 142 | ||
| @@ -189,18 +168,18 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 189 | if (!split2) | 168 | if (!split2) |
| 190 | split2 = alloc_extent_map(GFP_NOFS); | 169 | split2 = alloc_extent_map(GFP_NOFS); |
| 191 | 170 | ||
| 192 | spin_lock(&em_tree->lock); | 171 | write_lock(&em_tree->lock); |
| 193 | em = lookup_extent_mapping(em_tree, start, len); | 172 | em = lookup_extent_mapping(em_tree, start, len); |
| 194 | if (!em) { | 173 | if (!em) { |
| 195 | spin_unlock(&em_tree->lock); | 174 | write_unlock(&em_tree->lock); |
| 196 | break; | 175 | break; |
| 197 | } | 176 | } |
| 198 | flags = em->flags; | 177 | flags = em->flags; |
| 199 | if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { | 178 | if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { |
| 200 | spin_unlock(&em_tree->lock); | ||
| 201 | if (em->start <= start && | 179 | if (em->start <= start && |
| 202 | (!testend || em->start + em->len >= start + len)) { | 180 | (!testend || em->start + em->len >= start + len)) { |
| 203 | free_extent_map(em); | 181 | free_extent_map(em); |
| 182 | write_unlock(&em_tree->lock); | ||
| 204 | break; | 183 | break; |
| 205 | } | 184 | } |
| 206 | if (start < em->start) { | 185 | if (start < em->start) { |
| @@ -210,6 +189,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 210 | start = em->start + em->len; | 189 | start = em->start + em->len; |
| 211 | } | 190 | } |
| 212 | free_extent_map(em); | 191 | free_extent_map(em); |
| 192 | write_unlock(&em_tree->lock); | ||
| 213 | continue; | 193 | continue; |
| 214 | } | 194 | } |
| 215 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 195 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
| @@ -260,7 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 260 | free_extent_map(split); | 240 | free_extent_map(split); |
| 261 | split = NULL; | 241 | split = NULL; |
| 262 | } | 242 | } |
| 263 | spin_unlock(&em_tree->lock); | 243 | write_unlock(&em_tree->lock); |
| 264 | 244 | ||
| 265 | /* once for us */ | 245 | /* once for us */ |
| 266 | free_extent_map(em); | 246 | free_extent_map(em); |
| @@ -289,7 +269,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 289 | noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 269 | noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
| 290 | struct btrfs_root *root, struct inode *inode, | 270 | struct btrfs_root *root, struct inode *inode, |
| 291 | u64 start, u64 end, u64 locked_end, | 271 | u64 start, u64 end, u64 locked_end, |
| 292 | u64 inline_limit, u64 *hint_byte) | 272 | u64 inline_limit, u64 *hint_byte, int drop_cache) |
| 293 | { | 273 | { |
| 294 | u64 extent_end = 0; | 274 | u64 extent_end = 0; |
| 295 | u64 search_start = start; | 275 | u64 search_start = start; |
| @@ -314,7 +294,8 @@ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
| 314 | int ret; | 294 | int ret; |
| 315 | 295 | ||
| 316 | inline_limit = 0; | 296 | inline_limit = 0; |
| 317 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | 297 | if (drop_cache) |
| 298 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | ||
| 318 | 299 | ||
| 319 | path = btrfs_alloc_path(); | 300 | path = btrfs_alloc_path(); |
| 320 | if (!path) | 301 | if (!path) |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 59cba180fe83..941f1b71cd22 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -231,7 +231,8 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
| 231 | } | 231 | } |
| 232 | 232 | ||
| 233 | ret = btrfs_drop_extents(trans, root, inode, start, | 233 | ret = btrfs_drop_extents(trans, root, inode, start, |
| 234 | aligned_end, aligned_end, start, &hint_byte); | 234 | aligned_end, aligned_end, start, |
| 235 | &hint_byte, 1); | ||
| 235 | BUG_ON(ret); | 236 | BUG_ON(ret); |
| 236 | 237 | ||
| 237 | if (isize > actual_end) | 238 | if (isize > actual_end) |
| @@ -240,7 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
| 240 | inline_len, compressed_size, | 241 | inline_len, compressed_size, |
| 241 | compressed_pages); | 242 | compressed_pages); |
| 242 | BUG_ON(ret); | 243 | BUG_ON(ret); |
| 243 | btrfs_drop_extent_cache(inode, start, aligned_end, 0); | 244 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); |
| 244 | return 0; | 245 | return 0; |
| 245 | } | 246 | } |
| 246 | 247 | ||
| @@ -425,7 +426,7 @@ again: | |||
| 425 | extent_clear_unlock_delalloc(inode, | 426 | extent_clear_unlock_delalloc(inode, |
| 426 | &BTRFS_I(inode)->io_tree, | 427 | &BTRFS_I(inode)->io_tree, |
| 427 | start, end, NULL, 1, 0, | 428 | start, end, NULL, 1, 0, |
| 428 | 0, 1, 1, 1); | 429 | 0, 1, 1, 1, 0); |
| 429 | ret = 0; | 430 | ret = 0; |
| 430 | goto free_pages_out; | 431 | goto free_pages_out; |
| 431 | } | 432 | } |
| @@ -611,9 +612,9 @@ static noinline int submit_compressed_extents(struct inode *inode, | |||
| 611 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 612 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
| 612 | 613 | ||
| 613 | while (1) { | 614 | while (1) { |
| 614 | spin_lock(&em_tree->lock); | 615 | write_lock(&em_tree->lock); |
| 615 | ret = add_extent_mapping(em_tree, em); | 616 | ret = add_extent_mapping(em_tree, em); |
| 616 | spin_unlock(&em_tree->lock); | 617 | write_unlock(&em_tree->lock); |
| 617 | if (ret != -EEXIST) { | 618 | if (ret != -EEXIST) { |
| 618 | free_extent_map(em); | 619 | free_extent_map(em); |
| 619 | break; | 620 | break; |
| @@ -640,7 +641,7 @@ static noinline int submit_compressed_extents(struct inode *inode, | |||
| 640 | async_extent->start, | 641 | async_extent->start, |
| 641 | async_extent->start + | 642 | async_extent->start + |
| 642 | async_extent->ram_size - 1, | 643 | async_extent->ram_size - 1, |
| 643 | NULL, 1, 1, 0, 1, 1, 0); | 644 | NULL, 1, 1, 0, 1, 1, 0, 0); |
| 644 | 645 | ||
| 645 | ret = btrfs_submit_compressed_write(inode, | 646 | ret = btrfs_submit_compressed_write(inode, |
| 646 | async_extent->start, | 647 | async_extent->start, |
| @@ -713,7 +714,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 713 | extent_clear_unlock_delalloc(inode, | 714 | extent_clear_unlock_delalloc(inode, |
| 714 | &BTRFS_I(inode)->io_tree, | 715 | &BTRFS_I(inode)->io_tree, |
| 715 | start, end, NULL, 1, 1, | 716 | start, end, NULL, 1, 1, |
| 716 | 1, 1, 1, 1); | 717 | 1, 1, 1, 1, 0); |
| 717 | *nr_written = *nr_written + | 718 | *nr_written = *nr_written + |
| 718 | (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; | 719 | (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; |
| 719 | *page_started = 1; | 720 | *page_started = 1; |
| @@ -747,9 +748,9 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 747 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 748 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 748 | 749 | ||
| 749 | while (1) { | 750 | while (1) { |
| 750 | spin_lock(&em_tree->lock); | 751 | write_lock(&em_tree->lock); |
| 751 | ret = add_extent_mapping(em_tree, em); | 752 | ret = add_extent_mapping(em_tree, em); |
| 752 | spin_unlock(&em_tree->lock); | 753 | write_unlock(&em_tree->lock); |
| 753 | if (ret != -EEXIST) { | 754 | if (ret != -EEXIST) { |
| 754 | free_extent_map(em); | 755 | free_extent_map(em); |
| 755 | break; | 756 | break; |
| @@ -776,11 +777,14 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 776 | /* we're not doing compressed IO, don't unlock the first | 777 | /* we're not doing compressed IO, don't unlock the first |
| 777 | * page (which the caller expects to stay locked), don't | 778 | * page (which the caller expects to stay locked), don't |
| 778 | * clear any dirty bits and don't set any writeback bits | 779 | * clear any dirty bits and don't set any writeback bits |
| 780 | * | ||
| 781 | * Do set the Private2 bit so we know this page was properly | ||
| 782 | * setup for writepage | ||
| 779 | */ | 783 | */ |
| 780 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | 784 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, |
| 781 | start, start + ram_size - 1, | 785 | start, start + ram_size - 1, |
| 782 | locked_page, unlock, 1, | 786 | locked_page, unlock, 1, |
| 783 | 1, 0, 0, 0); | 787 | 1, 0, 0, 0, 1); |
| 784 | disk_num_bytes -= cur_alloc_size; | 788 | disk_num_bytes -= cur_alloc_size; |
| 785 | num_bytes -= cur_alloc_size; | 789 | num_bytes -= cur_alloc_size; |
| 786 | alloc_hint = ins.objectid + ins.offset; | 790 | alloc_hint = ins.objectid + ins.offset; |
| @@ -853,7 +857,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
| 853 | int limit = 10 * 1024 * 1042; | 857 | int limit = 10 * 1024 * 1042; |
| 854 | 858 | ||
| 855 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | | 859 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | |
| 856 | EXTENT_DELALLOC, 1, 0, GFP_NOFS); | 860 | EXTENT_DELALLOC, 1, 0, NULL, GFP_NOFS); |
| 857 | while (start < end) { | 861 | while (start < end) { |
| 858 | async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); | 862 | async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); |
| 859 | async_cow->inode = inode; | 863 | async_cow->inode = inode; |
| @@ -1080,9 +1084,9 @@ out_check: | |||
| 1080 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 1084 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 1081 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 1085 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 1082 | while (1) { | 1086 | while (1) { |
| 1083 | spin_lock(&em_tree->lock); | 1087 | write_lock(&em_tree->lock); |
| 1084 | ret = add_extent_mapping(em_tree, em); | 1088 | ret = add_extent_mapping(em_tree, em); |
| 1085 | spin_unlock(&em_tree->lock); | 1089 | write_unlock(&em_tree->lock); |
| 1086 | if (ret != -EEXIST) { | 1090 | if (ret != -EEXIST) { |
| 1087 | free_extent_map(em); | 1091 | free_extent_map(em); |
| 1088 | break; | 1092 | break; |
| @@ -1101,7 +1105,7 @@ out_check: | |||
| 1101 | 1105 | ||
| 1102 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | 1106 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, |
| 1103 | cur_offset, cur_offset + num_bytes - 1, | 1107 | cur_offset, cur_offset + num_bytes - 1, |
| 1104 | locked_page, 1, 1, 1, 0, 0, 0); | 1108 | locked_page, 1, 1, 1, 0, 0, 0, 1); |
| 1105 | cur_offset = extent_end; | 1109 | cur_offset = extent_end; |
| 1106 | if (cur_offset > end) | 1110 | if (cur_offset > end) |
| 1107 | break; | 1111 | break; |
| @@ -1374,10 +1378,8 @@ again: | |||
| 1374 | lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); | 1378 | lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); |
| 1375 | 1379 | ||
| 1376 | /* already ordered? We're done */ | 1380 | /* already ordered? We're done */ |
| 1377 | if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, | 1381 | if (PagePrivate2(page)) |
| 1378 | EXTENT_ORDERED, 0)) { | ||
| 1379 | goto out; | 1382 | goto out; |
| 1380 | } | ||
| 1381 | 1383 | ||
| 1382 | ordered = btrfs_lookup_ordered_extent(inode, page_start); | 1384 | ordered = btrfs_lookup_ordered_extent(inode, page_start); |
| 1383 | if (ordered) { | 1385 | if (ordered) { |
| @@ -1413,11 +1415,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) | |||
| 1413 | struct inode *inode = page->mapping->host; | 1415 | struct inode *inode = page->mapping->host; |
| 1414 | struct btrfs_writepage_fixup *fixup; | 1416 | struct btrfs_writepage_fixup *fixup; |
| 1415 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1417 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1416 | int ret; | ||
| 1417 | 1418 | ||
| 1418 | ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end, | 1419 | /* this page is properly in the ordered list */ |
| 1419 | EXTENT_ORDERED, 0); | 1420 | if (TestClearPagePrivate2(page)) |
| 1420 | if (ret) | ||
| 1421 | return 0; | 1421 | return 0; |
| 1422 | 1422 | ||
| 1423 | if (PageChecked(page)) | 1423 | if (PageChecked(page)) |
| @@ -1455,9 +1455,19 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
| 1455 | BUG_ON(!path); | 1455 | BUG_ON(!path); |
| 1456 | 1456 | ||
| 1457 | path->leave_spinning = 1; | 1457 | path->leave_spinning = 1; |
| 1458 | |||
| 1459 | /* | ||
| 1460 | * we may be replacing one extent in the tree with another. | ||
| 1461 | * The new extent is pinned in the extent map, and we don't want | ||
| 1462 | * to drop it from the cache until it is completely in the btree. | ||
| 1463 | * | ||
| 1464 | * So, tell btrfs_drop_extents to leave this extent in the cache. | ||
| 1465 | * the caller is expected to unpin it and allow it to be merged | ||
| 1466 | * with the others. | ||
| 1467 | */ | ||
| 1458 | ret = btrfs_drop_extents(trans, root, inode, file_pos, | 1468 | ret = btrfs_drop_extents(trans, root, inode, file_pos, |
| 1459 | file_pos + num_bytes, locked_end, | 1469 | file_pos + num_bytes, locked_end, |
| 1460 | file_pos, &hint); | 1470 | file_pos, &hint, 0); |
| 1461 | BUG_ON(ret); | 1471 | BUG_ON(ret); |
| 1462 | 1472 | ||
| 1463 | ins.objectid = inode->i_ino; | 1473 | ins.objectid = inode->i_ino; |
| @@ -1485,7 +1495,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
| 1485 | btrfs_mark_buffer_dirty(leaf); | 1495 | btrfs_mark_buffer_dirty(leaf); |
| 1486 | 1496 | ||
| 1487 | inode_add_bytes(inode, num_bytes); | 1497 | inode_add_bytes(inode, num_bytes); |
| 1488 | btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0); | ||
| 1489 | 1498 | ||
| 1490 | ins.objectid = disk_bytenr; | 1499 | ins.objectid = disk_bytenr; |
| 1491 | ins.offset = disk_num_bytes; | 1500 | ins.offset = disk_num_bytes; |
| @@ -1596,6 +1605,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
| 1596 | ordered_extent->len, | 1605 | ordered_extent->len, |
| 1597 | compressed, 0, 0, | 1606 | compressed, 0, 0, |
| 1598 | BTRFS_FILE_EXTENT_REG); | 1607 | BTRFS_FILE_EXTENT_REG); |
| 1608 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | ||
| 1609 | ordered_extent->file_offset, | ||
| 1610 | ordered_extent->len); | ||
| 1599 | BUG_ON(ret); | 1611 | BUG_ON(ret); |
| 1600 | } | 1612 | } |
| 1601 | unlock_extent(io_tree, ordered_extent->file_offset, | 1613 | unlock_extent(io_tree, ordered_extent->file_offset, |
| @@ -1623,6 +1635,7 @@ nocow: | |||
| 1623 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | 1635 | static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, |
| 1624 | struct extent_state *state, int uptodate) | 1636 | struct extent_state *state, int uptodate) |
| 1625 | { | 1637 | { |
| 1638 | ClearPagePrivate2(page); | ||
| 1626 | return btrfs_finish_ordered_io(page->mapping->host, start, end); | 1639 | return btrfs_finish_ordered_io(page->mapping->host, start, end); |
| 1627 | } | 1640 | } |
| 1628 | 1641 | ||
| @@ -1669,13 +1682,13 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
| 1669 | failrec->last_mirror = 0; | 1682 | failrec->last_mirror = 0; |
| 1670 | failrec->bio_flags = 0; | 1683 | failrec->bio_flags = 0; |
| 1671 | 1684 | ||
| 1672 | spin_lock(&em_tree->lock); | 1685 | read_lock(&em_tree->lock); |
| 1673 | em = lookup_extent_mapping(em_tree, start, failrec->len); | 1686 | em = lookup_extent_mapping(em_tree, start, failrec->len); |
| 1674 | if (em->start > start || em->start + em->len < start) { | 1687 | if (em->start > start || em->start + em->len < start) { |
| 1675 | free_extent_map(em); | 1688 | free_extent_map(em); |
| 1676 | em = NULL; | 1689 | em = NULL; |
| 1677 | } | 1690 | } |
| 1678 | spin_unlock(&em_tree->lock); | 1691 | read_unlock(&em_tree->lock); |
| 1679 | 1692 | ||
| 1680 | if (!em || IS_ERR(em)) { | 1693 | if (!em || IS_ERR(em)) { |
| 1681 | kfree(failrec); | 1694 | kfree(failrec); |
| @@ -1794,7 +1807,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
| 1794 | return 0; | 1807 | return 0; |
| 1795 | 1808 | ||
| 1796 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && | 1809 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && |
| 1797 | test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) { | 1810 | test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { |
| 1798 | clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM, | 1811 | clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM, |
| 1799 | GFP_NOFS); | 1812 | GFP_NOFS); |
| 1800 | return 0; | 1813 | return 0; |
| @@ -2935,7 +2948,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
| 2935 | cur_offset, | 2948 | cur_offset, |
| 2936 | cur_offset + hole_size, | 2949 | cur_offset + hole_size, |
| 2937 | block_end, | 2950 | block_end, |
| 2938 | cur_offset, &hint_byte); | 2951 | cur_offset, &hint_byte, 1); |
| 2939 | if (err) | 2952 | if (err) |
| 2940 | break; | 2953 | break; |
| 2941 | err = btrfs_insert_file_extent(trans, root, | 2954 | err = btrfs_insert_file_extent(trans, root, |
| @@ -4064,11 +4077,11 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, | |||
| 4064 | int compressed; | 4077 | int compressed; |
| 4065 | 4078 | ||
| 4066 | again: | 4079 | again: |
| 4067 | spin_lock(&em_tree->lock); | 4080 | read_lock(&em_tree->lock); |
| 4068 | em = lookup_extent_mapping(em_tree, start, len); | 4081 | em = lookup_extent_mapping(em_tree, start, len); |
| 4069 | if (em) | 4082 | if (em) |
| 4070 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 4083 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 4071 | spin_unlock(&em_tree->lock); | 4084 | read_unlock(&em_tree->lock); |
| 4072 | 4085 | ||
| 4073 | if (em) { | 4086 | if (em) { |
| 4074 | if (em->start > start || em->start + em->len <= start) | 4087 | if (em->start > start || em->start + em->len <= start) |
| @@ -4215,6 +4228,11 @@ again: | |||
| 4215 | map = kmap(page); | 4228 | map = kmap(page); |
| 4216 | read_extent_buffer(leaf, map + pg_offset, ptr, | 4229 | read_extent_buffer(leaf, map + pg_offset, ptr, |
| 4217 | copy_size); | 4230 | copy_size); |
| 4231 | if (pg_offset + copy_size < PAGE_CACHE_SIZE) { | ||
| 4232 | memset(map + pg_offset + copy_size, 0, | ||
| 4233 | PAGE_CACHE_SIZE - pg_offset - | ||
| 4234 | copy_size); | ||
| 4235 | } | ||
| 4218 | kunmap(page); | 4236 | kunmap(page); |
| 4219 | } | 4237 | } |
| 4220 | flush_dcache_page(page); | 4238 | flush_dcache_page(page); |
| @@ -4259,7 +4277,7 @@ insert: | |||
| 4259 | } | 4277 | } |
| 4260 | 4278 | ||
| 4261 | err = 0; | 4279 | err = 0; |
| 4262 | spin_lock(&em_tree->lock); | 4280 | write_lock(&em_tree->lock); |
| 4263 | ret = add_extent_mapping(em_tree, em); | 4281 | ret = add_extent_mapping(em_tree, em); |
| 4264 | /* it is possible that someone inserted the extent into the tree | 4282 | /* it is possible that someone inserted the extent into the tree |
| 4265 | * while we had the lock dropped. It is also possible that | 4283 | * while we had the lock dropped. It is also possible that |
| @@ -4299,7 +4317,7 @@ insert: | |||
| 4299 | err = 0; | 4317 | err = 0; |
| 4300 | } | 4318 | } |
| 4301 | } | 4319 | } |
| 4302 | spin_unlock(&em_tree->lock); | 4320 | write_unlock(&em_tree->lock); |
| 4303 | out: | 4321 | out: |
| 4304 | if (path) | 4322 | if (path) |
| 4305 | btrfs_free_path(path); | 4323 | btrfs_free_path(path); |
| @@ -4398,13 +4416,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
| 4398 | u64 page_start = page_offset(page); | 4416 | u64 page_start = page_offset(page); |
| 4399 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; | 4417 | u64 page_end = page_start + PAGE_CACHE_SIZE - 1; |
| 4400 | 4418 | ||
| 4419 | |||
| 4420 | /* | ||
| 4421 | * we have the page locked, so new writeback can't start, | ||
| 4422 | * and the dirty bit won't be cleared while we are here. | ||
| 4423 | * | ||
| 4424 | * Wait for IO on this page so that we can safely clear | ||
| 4425 | * the PagePrivate2 bit and do ordered accounting | ||
| 4426 | */ | ||
| 4401 | wait_on_page_writeback(page); | 4427 | wait_on_page_writeback(page); |
| 4428 | |||
| 4402 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 4429 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
| 4403 | if (offset) { | 4430 | if (offset) { |
| 4404 | btrfs_releasepage(page, GFP_NOFS); | 4431 | btrfs_releasepage(page, GFP_NOFS); |
| 4405 | return; | 4432 | return; |
| 4406 | } | 4433 | } |
| 4407 | |||
| 4408 | lock_extent(tree, page_start, page_end, GFP_NOFS); | 4434 | lock_extent(tree, page_start, page_end, GFP_NOFS); |
| 4409 | ordered = btrfs_lookup_ordered_extent(page->mapping->host, | 4435 | ordered = btrfs_lookup_ordered_extent(page->mapping->host, |
| 4410 | page_offset(page)); | 4436 | page_offset(page)); |
| @@ -4415,16 +4441,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
| 4415 | */ | 4441 | */ |
| 4416 | clear_extent_bit(tree, page_start, page_end, | 4442 | clear_extent_bit(tree, page_start, page_end, |
| 4417 | EXTENT_DIRTY | EXTENT_DELALLOC | | 4443 | EXTENT_DIRTY | EXTENT_DELALLOC | |
| 4418 | EXTENT_LOCKED, 1, 0, GFP_NOFS); | 4444 | EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS); |
| 4419 | btrfs_finish_ordered_io(page->mapping->host, | 4445 | /* |
| 4420 | page_start, page_end); | 4446 | * whoever cleared the private bit is responsible |
| 4447 | * for the finish_ordered_io | ||
| 4448 | */ | ||
| 4449 | if (TestClearPagePrivate2(page)) { | ||
| 4450 | btrfs_finish_ordered_io(page->mapping->host, | ||
| 4451 | page_start, page_end); | ||
| 4452 | } | ||
| 4421 | btrfs_put_ordered_extent(ordered); | 4453 | btrfs_put_ordered_extent(ordered); |
| 4422 | lock_extent(tree, page_start, page_end, GFP_NOFS); | 4454 | lock_extent(tree, page_start, page_end, GFP_NOFS); |
| 4423 | } | 4455 | } |
| 4424 | clear_extent_bit(tree, page_start, page_end, | 4456 | clear_extent_bit(tree, page_start, page_end, |
| 4425 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | | 4457 | EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, |
| 4426 | EXTENT_ORDERED, | 4458 | 1, 1, NULL, GFP_NOFS); |
| 4427 | 1, 1, GFP_NOFS); | ||
| 4428 | __btrfs_releasepage(page, GFP_NOFS); | 4459 | __btrfs_releasepage(page, GFP_NOFS); |
| 4429 | 4460 | ||
| 4430 | ClearPageChecked(page); | 4461 | ClearPageChecked(page); |
| @@ -4521,11 +4552,14 @@ again: | |||
| 4521 | } | 4552 | } |
| 4522 | ClearPageChecked(page); | 4553 | ClearPageChecked(page); |
| 4523 | set_page_dirty(page); | 4554 | set_page_dirty(page); |
| 4555 | SetPageUptodate(page); | ||
| 4524 | 4556 | ||
| 4525 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | 4557 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; |
| 4526 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 4558 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); |
| 4527 | 4559 | ||
| 4528 | out_unlock: | 4560 | out_unlock: |
| 4561 | if (!ret) | ||
| 4562 | return VM_FAULT_LOCKED; | ||
| 4529 | unlock_page(page); | 4563 | unlock_page(page); |
| 4530 | out: | 4564 | out: |
| 4531 | return ret; | 4565 | return ret; |
| @@ -5058,6 +5092,8 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, | |||
| 5058 | 0, 0, 0, | 5092 | 0, 0, 0, |
| 5059 | BTRFS_FILE_EXTENT_PREALLOC); | 5093 | BTRFS_FILE_EXTENT_PREALLOC); |
| 5060 | BUG_ON(ret); | 5094 | BUG_ON(ret); |
| 5095 | btrfs_drop_extent_cache(inode, cur_offset, | ||
| 5096 | cur_offset + ins.offset -1, 0); | ||
| 5061 | num_bytes -= ins.offset; | 5097 | num_bytes -= ins.offset; |
| 5062 | cur_offset += ins.offset; | 5098 | cur_offset += ins.offset; |
| 5063 | alloc_hint = ins.objectid + ins.offset; | 5099 | alloc_hint = ins.objectid + ins.offset; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index bd88f25889f7..ef0188fb3cc4 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -596,9 +596,8 @@ again: | |||
| 596 | clear_page_dirty_for_io(page); | 596 | clear_page_dirty_for_io(page); |
| 597 | 597 | ||
| 598 | btrfs_set_extent_delalloc(inode, page_start, page_end); | 598 | btrfs_set_extent_delalloc(inode, page_start, page_end); |
| 599 | |||
| 600 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 601 | set_page_dirty(page); | 599 | set_page_dirty(page); |
| 600 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | ||
| 602 | unlock_page(page); | 601 | unlock_page(page); |
| 603 | page_cache_release(page); | 602 | page_cache_release(page); |
| 604 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); | 603 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); |
| @@ -976,7 +975,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 976 | 975 | ||
| 977 | /* punch hole in destination first */ | 976 | /* punch hole in destination first */ |
| 978 | btrfs_drop_extents(trans, root, inode, off, off + len, | 977 | btrfs_drop_extents(trans, root, inode, off, off + len, |
| 979 | off + len, 0, &hint_byte); | 978 | off + len, 0, &hint_byte, 1); |
| 980 | 979 | ||
| 981 | /* clone data */ | 980 | /* clone data */ |
| 982 | key.objectid = src->i_ino; | 981 | key.objectid = src->i_ino; |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index d6f0806c682f..4a9c8c4cec25 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
| @@ -159,8 +159,6 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
| 159 | * | 159 | * |
| 160 | * len is the length of the extent | 160 | * len is the length of the extent |
| 161 | * | 161 | * |
| 162 | * This also sets the EXTENT_ORDERED bit on the range in the inode. | ||
| 163 | * | ||
| 164 | * The tree is given a single reference on the ordered extent that was | 162 | * The tree is given a single reference on the ordered extent that was |
| 165 | * inserted. | 163 | * inserted. |
| 166 | */ | 164 | */ |
| @@ -181,6 +179,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 181 | entry->start = start; | 179 | entry->start = start; |
| 182 | entry->len = len; | 180 | entry->len = len; |
| 183 | entry->disk_len = disk_len; | 181 | entry->disk_len = disk_len; |
| 182 | entry->bytes_left = len; | ||
| 184 | entry->inode = inode; | 183 | entry->inode = inode; |
| 185 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 184 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
| 186 | set_bit(type, &entry->flags); | 185 | set_bit(type, &entry->flags); |
| @@ -195,9 +194,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 195 | &entry->rb_node); | 194 | &entry->rb_node); |
| 196 | BUG_ON(node); | 195 | BUG_ON(node); |
| 197 | 196 | ||
| 198 | set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, | ||
| 199 | entry_end(entry) - 1, GFP_NOFS); | ||
| 200 | |||
| 201 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 197 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
| 202 | list_add_tail(&entry->root_extent_list, | 198 | list_add_tail(&entry->root_extent_list, |
| 203 | &BTRFS_I(inode)->root->fs_info->ordered_extents); | 199 | &BTRFS_I(inode)->root->fs_info->ordered_extents); |
| @@ -241,13 +237,10 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
| 241 | struct btrfs_ordered_inode_tree *tree; | 237 | struct btrfs_ordered_inode_tree *tree; |
| 242 | struct rb_node *node; | 238 | struct rb_node *node; |
| 243 | struct btrfs_ordered_extent *entry; | 239 | struct btrfs_ordered_extent *entry; |
| 244 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
| 245 | int ret; | 240 | int ret; |
| 246 | 241 | ||
| 247 | tree = &BTRFS_I(inode)->ordered_tree; | 242 | tree = &BTRFS_I(inode)->ordered_tree; |
| 248 | mutex_lock(&tree->mutex); | 243 | mutex_lock(&tree->mutex); |
| 249 | clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1, | ||
| 250 | GFP_NOFS); | ||
| 251 | node = tree_search(tree, file_offset); | 244 | node = tree_search(tree, file_offset); |
| 252 | if (!node) { | 245 | if (!node) { |
| 253 | ret = 1; | 246 | ret = 1; |
| @@ -260,11 +253,16 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
| 260 | goto out; | 253 | goto out; |
| 261 | } | 254 | } |
| 262 | 255 | ||
| 263 | ret = test_range_bit(io_tree, entry->file_offset, | 256 | if (io_size > entry->bytes_left) { |
| 264 | entry->file_offset + entry->len - 1, | 257 | printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", |
| 265 | EXTENT_ORDERED, 0); | 258 | (unsigned long long)entry->bytes_left, |
| 266 | if (ret == 0) | 259 | (unsigned long long)io_size); |
| 260 | } | ||
| 261 | entry->bytes_left -= io_size; | ||
| 262 | if (entry->bytes_left == 0) | ||
| 267 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 263 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
| 264 | else | ||
| 265 | ret = 1; | ||
| 268 | out: | 266 | out: |
| 269 | mutex_unlock(&tree->mutex); | 267 | mutex_unlock(&tree->mutex); |
| 270 | return ret == 0; | 268 | return ret == 0; |
| @@ -476,6 +474,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
| 476 | u64 orig_end; | 474 | u64 orig_end; |
| 477 | u64 wait_end; | 475 | u64 wait_end; |
| 478 | struct btrfs_ordered_extent *ordered; | 476 | struct btrfs_ordered_extent *ordered; |
| 477 | int found; | ||
| 479 | 478 | ||
| 480 | if (start + len < start) { | 479 | if (start + len < start) { |
| 481 | orig_end = INT_LIMIT(loff_t); | 480 | orig_end = INT_LIMIT(loff_t); |
| @@ -502,6 +501,7 @@ again: | |||
| 502 | orig_end >> PAGE_CACHE_SHIFT); | 501 | orig_end >> PAGE_CACHE_SHIFT); |
| 503 | 502 | ||
| 504 | end = orig_end; | 503 | end = orig_end; |
| 504 | found = 0; | ||
| 505 | while (1) { | 505 | while (1) { |
| 506 | ordered = btrfs_lookup_first_ordered_extent(inode, end); | 506 | ordered = btrfs_lookup_first_ordered_extent(inode, end); |
| 507 | if (!ordered) | 507 | if (!ordered) |
| @@ -514,6 +514,7 @@ again: | |||
| 514 | btrfs_put_ordered_extent(ordered); | 514 | btrfs_put_ordered_extent(ordered); |
| 515 | break; | 515 | break; |
| 516 | } | 516 | } |
| 517 | found++; | ||
| 517 | btrfs_start_ordered_extent(inode, ordered, 1); | 518 | btrfs_start_ordered_extent(inode, ordered, 1); |
| 518 | end = ordered->file_offset; | 519 | end = ordered->file_offset; |
| 519 | btrfs_put_ordered_extent(ordered); | 520 | btrfs_put_ordered_extent(ordered); |
| @@ -521,8 +522,8 @@ again: | |||
| 521 | break; | 522 | break; |
| 522 | end--; | 523 | end--; |
| 523 | } | 524 | } |
| 524 | if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, | 525 | if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, |
| 525 | EXTENT_ORDERED | EXTENT_DELALLOC, 0)) { | 526 | EXTENT_DELALLOC, 0, NULL)) { |
| 526 | schedule_timeout(1); | 527 | schedule_timeout(1); |
| 527 | goto again; | 528 | goto again; |
| 528 | } | 529 | } |
| @@ -613,7 +614,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, | |||
| 613 | */ | 614 | */ |
| 614 | if (test_range_bit(io_tree, disk_i_size, | 615 | if (test_range_bit(io_tree, disk_i_size, |
| 615 | ordered->file_offset + ordered->len - 1, | 616 | ordered->file_offset + ordered->len - 1, |
| 616 | EXTENT_DELALLOC, 0)) { | 617 | EXTENT_DELALLOC, 0, NULL)) { |
| 617 | goto out; | 618 | goto out; |
| 618 | } | 619 | } |
| 619 | /* | 620 | /* |
| @@ -664,7 +665,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, | |||
| 664 | */ | 665 | */ |
| 665 | if (i_size_test > entry_end(ordered) && | 666 | if (i_size_test > entry_end(ordered) && |
| 666 | !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, | 667 | !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, |
| 667 | EXTENT_DELALLOC, 0)) { | 668 | EXTENT_DELALLOC, 0, NULL)) { |
| 668 | new_i_size = min_t(u64, i_size_test, i_size_read(inode)); | 669 | new_i_size = min_t(u64, i_size_test, i_size_read(inode)); |
| 669 | } | 670 | } |
| 670 | BTRFS_I(inode)->disk_i_size = new_i_size; | 671 | BTRFS_I(inode)->disk_i_size = new_i_size; |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 3d31c8827b01..993a7ea45c70 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
| @@ -85,6 +85,9 @@ struct btrfs_ordered_extent { | |||
| 85 | /* extent length on disk */ | 85 | /* extent length on disk */ |
| 86 | u64 disk_len; | 86 | u64 disk_len; |
| 87 | 87 | ||
| 88 | /* number of bytes that still need writing */ | ||
| 89 | u64 bytes_left; | ||
| 90 | |||
| 88 | /* flags (described above) */ | 91 | /* flags (described above) */ |
| 89 | unsigned long flags; | 92 | unsigned long flags; |
| 90 | 93 | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index c04f7f212602..3be16ccc7eea 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
| @@ -2180,7 +2180,7 @@ static int tree_block_processed(u64 bytenr, u32 blocksize, | |||
| 2180 | struct reloc_control *rc) | 2180 | struct reloc_control *rc) |
| 2181 | { | 2181 | { |
| 2182 | if (test_range_bit(&rc->processed_blocks, bytenr, | 2182 | if (test_range_bit(&rc->processed_blocks, bytenr, |
| 2183 | bytenr + blocksize - 1, EXTENT_DIRTY, 1)) | 2183 | bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) |
| 2184 | return 1; | 2184 | return 1; |
| 2185 | return 0; | 2185 | return 0; |
| 2186 | } | 2186 | } |
| @@ -2646,9 +2646,9 @@ int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key) | |||
| 2646 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | 2646 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); |
| 2647 | while (1) { | 2647 | while (1) { |
| 2648 | int ret; | 2648 | int ret; |
| 2649 | spin_lock(&em_tree->lock); | 2649 | write_lock(&em_tree->lock); |
| 2650 | ret = add_extent_mapping(em_tree, em); | 2650 | ret = add_extent_mapping(em_tree, em); |
| 2651 | spin_unlock(&em_tree->lock); | 2651 | write_unlock(&em_tree->lock); |
| 2652 | if (ret != -EEXIST) { | 2652 | if (ret != -EEXIST) { |
| 2653 | free_extent_map(em); | 2653 | free_extent_map(em); |
| 2654 | break; | 2654 | break; |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d91b0de7c502..8661a7381b39 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
| @@ -534,7 +534,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
| 534 | saved_nbytes = inode_get_bytes(inode); | 534 | saved_nbytes = inode_get_bytes(inode); |
| 535 | /* drop any overlapping extents */ | 535 | /* drop any overlapping extents */ |
| 536 | ret = btrfs_drop_extents(trans, root, inode, | 536 | ret = btrfs_drop_extents(trans, root, inode, |
| 537 | start, extent_end, extent_end, start, &alloc_hint); | 537 | start, extent_end, extent_end, start, &alloc_hint, 1); |
| 538 | BUG_ON(ret); | 538 | BUG_ON(ret); |
| 539 | 539 | ||
| 540 | if (found_type == BTRFS_FILE_EXTENT_REG || | 540 | if (found_type == BTRFS_FILE_EXTENT_REG || |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5dbefd11b4af..d2358c06bbd9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -276,7 +276,7 @@ loop_lock: | |||
| 276 | * is now congested. Back off and let other work structs | 276 | * is now congested. Back off and let other work structs |
| 277 | * run instead | 277 | * run instead |
| 278 | */ | 278 | */ |
| 279 | if (pending && bdi_write_congested(bdi) && batch_run > 32 && | 279 | if (pending && bdi_write_congested(bdi) && batch_run > 8 && |
| 280 | fs_info->fs_devices->open_devices > 1) { | 280 | fs_info->fs_devices->open_devices > 1) { |
| 281 | struct io_context *ioc; | 281 | struct io_context *ioc; |
| 282 | 282 | ||
| @@ -1749,9 +1749,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
| 1749 | * step two, delete the device extents and the | 1749 | * step two, delete the device extents and the |
| 1750 | * chunk tree entries | 1750 | * chunk tree entries |
| 1751 | */ | 1751 | */ |
| 1752 | spin_lock(&em_tree->lock); | 1752 | read_lock(&em_tree->lock); |
| 1753 | em = lookup_extent_mapping(em_tree, chunk_offset, 1); | 1753 | em = lookup_extent_mapping(em_tree, chunk_offset, 1); |
| 1754 | spin_unlock(&em_tree->lock); | 1754 | read_unlock(&em_tree->lock); |
| 1755 | 1755 | ||
| 1756 | BUG_ON(em->start > chunk_offset || | 1756 | BUG_ON(em->start > chunk_offset || |
| 1757 | em->start + em->len < chunk_offset); | 1757 | em->start + em->len < chunk_offset); |
| @@ -1780,9 +1780,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
| 1780 | ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); | 1780 | ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); |
| 1781 | BUG_ON(ret); | 1781 | BUG_ON(ret); |
| 1782 | 1782 | ||
| 1783 | spin_lock(&em_tree->lock); | 1783 | write_lock(&em_tree->lock); |
| 1784 | remove_extent_mapping(em_tree, em); | 1784 | remove_extent_mapping(em_tree, em); |
| 1785 | spin_unlock(&em_tree->lock); | 1785 | write_unlock(&em_tree->lock); |
| 1786 | 1786 | ||
| 1787 | kfree(map); | 1787 | kfree(map); |
| 1788 | em->bdev = NULL; | 1788 | em->bdev = NULL; |
| @@ -2294,9 +2294,9 @@ again: | |||
| 2294 | em->block_len = em->len; | 2294 | em->block_len = em->len; |
| 2295 | 2295 | ||
| 2296 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; | 2296 | em_tree = &extent_root->fs_info->mapping_tree.map_tree; |
| 2297 | spin_lock(&em_tree->lock); | 2297 | write_lock(&em_tree->lock); |
| 2298 | ret = add_extent_mapping(em_tree, em); | 2298 | ret = add_extent_mapping(em_tree, em); |
| 2299 | spin_unlock(&em_tree->lock); | 2299 | write_unlock(&em_tree->lock); |
| 2300 | BUG_ON(ret); | 2300 | BUG_ON(ret); |
| 2301 | free_extent_map(em); | 2301 | free_extent_map(em); |
| 2302 | 2302 | ||
| @@ -2491,9 +2491,9 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) | |||
| 2491 | int readonly = 0; | 2491 | int readonly = 0; |
| 2492 | int i; | 2492 | int i; |
| 2493 | 2493 | ||
| 2494 | spin_lock(&map_tree->map_tree.lock); | 2494 | read_lock(&map_tree->map_tree.lock); |
| 2495 | em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); | 2495 | em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); |
| 2496 | spin_unlock(&map_tree->map_tree.lock); | 2496 | read_unlock(&map_tree->map_tree.lock); |
| 2497 | if (!em) | 2497 | if (!em) |
| 2498 | return 1; | 2498 | return 1; |
| 2499 | 2499 | ||
| @@ -2518,11 +2518,11 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) | |||
| 2518 | struct extent_map *em; | 2518 | struct extent_map *em; |
| 2519 | 2519 | ||
| 2520 | while (1) { | 2520 | while (1) { |
| 2521 | spin_lock(&tree->map_tree.lock); | 2521 | write_lock(&tree->map_tree.lock); |
| 2522 | em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); | 2522 | em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); |
| 2523 | if (em) | 2523 | if (em) |
| 2524 | remove_extent_mapping(&tree->map_tree, em); | 2524 | remove_extent_mapping(&tree->map_tree, em); |
| 2525 | spin_unlock(&tree->map_tree.lock); | 2525 | write_unlock(&tree->map_tree.lock); |
| 2526 | if (!em) | 2526 | if (!em) |
| 2527 | break; | 2527 | break; |
| 2528 | kfree(em->bdev); | 2528 | kfree(em->bdev); |
| @@ -2540,9 +2540,9 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) | |||
| 2540 | struct extent_map_tree *em_tree = &map_tree->map_tree; | 2540 | struct extent_map_tree *em_tree = &map_tree->map_tree; |
| 2541 | int ret; | 2541 | int ret; |
| 2542 | 2542 | ||
| 2543 | spin_lock(&em_tree->lock); | 2543 | read_lock(&em_tree->lock); |
| 2544 | em = lookup_extent_mapping(em_tree, logical, len); | 2544 | em = lookup_extent_mapping(em_tree, logical, len); |
| 2545 | spin_unlock(&em_tree->lock); | 2545 | read_unlock(&em_tree->lock); |
| 2546 | BUG_ON(!em); | 2546 | BUG_ON(!em); |
| 2547 | 2547 | ||
| 2548 | BUG_ON(em->start > logical || em->start + em->len < logical); | 2548 | BUG_ON(em->start > logical || em->start + em->len < logical); |
| @@ -2604,9 +2604,9 @@ again: | |||
| 2604 | atomic_set(&multi->error, 0); | 2604 | atomic_set(&multi->error, 0); |
| 2605 | } | 2605 | } |
| 2606 | 2606 | ||
| 2607 | spin_lock(&em_tree->lock); | 2607 | read_lock(&em_tree->lock); |
| 2608 | em = lookup_extent_mapping(em_tree, logical, *length); | 2608 | em = lookup_extent_mapping(em_tree, logical, *length); |
| 2609 | spin_unlock(&em_tree->lock); | 2609 | read_unlock(&em_tree->lock); |
| 2610 | 2610 | ||
| 2611 | if (!em && unplug_page) | 2611 | if (!em && unplug_page) |
| 2612 | return 0; | 2612 | return 0; |
| @@ -2763,9 +2763,9 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
| 2763 | u64 stripe_nr; | 2763 | u64 stripe_nr; |
| 2764 | int i, j, nr = 0; | 2764 | int i, j, nr = 0; |
| 2765 | 2765 | ||
| 2766 | spin_lock(&em_tree->lock); | 2766 | read_lock(&em_tree->lock); |
| 2767 | em = lookup_extent_mapping(em_tree, chunk_start, 1); | 2767 | em = lookup_extent_mapping(em_tree, chunk_start, 1); |
| 2768 | spin_unlock(&em_tree->lock); | 2768 | read_unlock(&em_tree->lock); |
| 2769 | 2769 | ||
| 2770 | BUG_ON(!em || em->start != chunk_start); | 2770 | BUG_ON(!em || em->start != chunk_start); |
| 2771 | map = (struct map_lookup *)em->bdev; | 2771 | map = (struct map_lookup *)em->bdev; |
| @@ -3053,9 +3053,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
| 3053 | logical = key->offset; | 3053 | logical = key->offset; |
| 3054 | length = btrfs_chunk_length(leaf, chunk); | 3054 | length = btrfs_chunk_length(leaf, chunk); |
| 3055 | 3055 | ||
| 3056 | spin_lock(&map_tree->map_tree.lock); | 3056 | read_lock(&map_tree->map_tree.lock); |
| 3057 | em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); | 3057 | em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); |
| 3058 | spin_unlock(&map_tree->map_tree.lock); | 3058 | read_unlock(&map_tree->map_tree.lock); |
| 3059 | 3059 | ||
| 3060 | /* already mapped? */ | 3060 | /* already mapped? */ |
| 3061 | if (em && em->start <= logical && em->start + em->len > logical) { | 3061 | if (em && em->start <= logical && em->start + em->len > logical) { |
| @@ -3114,9 +3114,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
| 3114 | map->stripes[i].dev->in_fs_metadata = 1; | 3114 | map->stripes[i].dev->in_fs_metadata = 1; |
| 3115 | } | 3115 | } |
| 3116 | 3116 | ||
| 3117 | spin_lock(&map_tree->map_tree.lock); | 3117 | write_lock(&map_tree->map_tree.lock); |
| 3118 | ret = add_extent_mapping(&map_tree->map_tree, em); | 3118 | ret = add_extent_mapping(&map_tree->map_tree, em); |
| 3119 | spin_unlock(&map_tree->map_tree.lock); | 3119 | write_unlock(&map_tree->map_tree.lock); |
| 3120 | BUG_ON(ret); | 3120 | BUG_ON(ret); |
| 3121 | free_extent_map(em); | 3121 | free_extent_map(em); |
| 3122 | 3122 | ||
