aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-09-11 19:07:25 -0400
committerChris Mason <chris.mason@oracle.com>2009-09-11 19:07:25 -0400
commit83ebade34bc1a90d0c3f77b87b940f336d075fda (patch)
tree99b6366c52e6bec88119ae995399c985fc61e900 /fs
parent74fca6a42863ffacaf7ba6f1936a9f228950f657 (diff)
parent93c82d575055f1bd0277acae6f966bebafd80dd5 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/async-thread.c230
-rw-r--r--fs/btrfs/async-thread.h12
-rw-r--r--fs/btrfs/compression.c8
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/disk-io.c36
-rw-r--r--fs/btrfs/extent-tree.c4
-rw-r--r--fs/btrfs/extent_io.c293
-rw-r--r--fs/btrfs/extent_io.h16
-rw-r--r--fs/btrfs/extent_map.c55
-rw-r--r--fs/btrfs/extent_map.h3
-rw-r--r--fs/btrfs/file.c35
-rw-r--r--fs/btrfs/inode.c112
-rw-r--r--fs/btrfs/ioctl.c5
-rw-r--r--fs/btrfs/ordered-data.c33
-rw-r--r--fs/btrfs/ordered-data.h3
-rw-r--r--fs/btrfs/relocation.c6
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c42
18 files changed, 580 insertions, 317 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 019e8af449ab..6ea5cd0a595f 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -48,6 +48,9 @@ struct btrfs_worker_thread {
48 /* number of things on the pending list */ 48 /* number of things on the pending list */
49 atomic_t num_pending; 49 atomic_t num_pending;
50 50
51 /* reference counter for this struct */
52 atomic_t refs;
53
51 unsigned long sequence; 54 unsigned long sequence;
52 55
53 /* protects the pending list. */ 56 /* protects the pending list. */
@@ -93,17 +96,40 @@ static void check_busy_worker(struct btrfs_worker_thread *worker)
93 } 96 }
94} 97}
95 98
96static noinline int run_ordered_completions(struct btrfs_workers *workers, 99static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
97 struct btrfs_work *work)
98{ 100{
101 struct btrfs_workers *workers = worker->workers;
99 unsigned long flags; 102 unsigned long flags;
100 103
104 rmb();
105 if (!workers->atomic_start_pending)
106 return;
107
108 spin_lock_irqsave(&workers->lock, flags);
109 if (!workers->atomic_start_pending)
110 goto out;
111
112 workers->atomic_start_pending = 0;
113 if (workers->num_workers >= workers->max_workers)
114 goto out;
115
116 spin_unlock_irqrestore(&workers->lock, flags);
117 btrfs_start_workers(workers, 1);
118 return;
119
120out:
121 spin_unlock_irqrestore(&workers->lock, flags);
122}
123
124static noinline int run_ordered_completions(struct btrfs_workers *workers,
125 struct btrfs_work *work)
126{
101 if (!workers->ordered) 127 if (!workers->ordered)
102 return 0; 128 return 0;
103 129
104 set_bit(WORK_DONE_BIT, &work->flags); 130 set_bit(WORK_DONE_BIT, &work->flags);
105 131
106 spin_lock_irqsave(&workers->lock, flags); 132 spin_lock(&workers->order_lock);
107 133
108 while (1) { 134 while (1) {
109 if (!list_empty(&workers->prio_order_list)) { 135 if (!list_empty(&workers->prio_order_list)) {
@@ -126,45 +152,117 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers,
126 if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) 152 if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
127 break; 153 break;
128 154
129 spin_unlock_irqrestore(&workers->lock, flags); 155 spin_unlock(&workers->order_lock);
130 156
131 work->ordered_func(work); 157 work->ordered_func(work);
132 158
133 /* now take the lock again and call the freeing code */ 159 /* now take the lock again and call the freeing code */
134 spin_lock_irqsave(&workers->lock, flags); 160 spin_lock(&workers->order_lock);
135 list_del(&work->order_list); 161 list_del(&work->order_list);
136 work->ordered_free(work); 162 work->ordered_free(work);
137 } 163 }
138 164
139 spin_unlock_irqrestore(&workers->lock, flags); 165 spin_unlock(&workers->order_lock);
140 return 0; 166 return 0;
141} 167}
142 168
169static void put_worker(struct btrfs_worker_thread *worker)
170{
171 if (atomic_dec_and_test(&worker->refs))
172 kfree(worker);
173}
174
175static int try_worker_shutdown(struct btrfs_worker_thread *worker)
176{
177 int freeit = 0;
178
179 spin_lock_irq(&worker->lock);
180 spin_lock_irq(&worker->workers->lock);
181 if (worker->workers->num_workers > 1 &&
182 worker->idle &&
183 !worker->working &&
184 !list_empty(&worker->worker_list) &&
185 list_empty(&worker->prio_pending) &&
186 list_empty(&worker->pending)) {
187 freeit = 1;
188 list_del_init(&worker->worker_list);
189 worker->workers->num_workers--;
190 }
191 spin_unlock_irq(&worker->workers->lock);
192 spin_unlock_irq(&worker->lock);
193
194 if (freeit)
195 put_worker(worker);
196 return freeit;
197}
198
199static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker,
200 struct list_head *prio_head,
201 struct list_head *head)
202{
203 struct btrfs_work *work = NULL;
204 struct list_head *cur = NULL;
205
206 if(!list_empty(prio_head))
207 cur = prio_head->next;
208
209 smp_mb();
210 if (!list_empty(&worker->prio_pending))
211 goto refill;
212
213 if (!list_empty(head))
214 cur = head->next;
215
216 if (cur)
217 goto out;
218
219refill:
220 spin_lock_irq(&worker->lock);
221 list_splice_tail_init(&worker->prio_pending, prio_head);
222 list_splice_tail_init(&worker->pending, head);
223
224 if (!list_empty(prio_head))
225 cur = prio_head->next;
226 else if (!list_empty(head))
227 cur = head->next;
228 spin_unlock_irq(&worker->lock);
229
230 if (!cur)
231 goto out_fail;
232
233out:
234 work = list_entry(cur, struct btrfs_work, list);
235
236out_fail:
237 return work;
238}
239
143/* 240/*
144 * main loop for servicing work items 241 * main loop for servicing work items
145 */ 242 */
146static int worker_loop(void *arg) 243static int worker_loop(void *arg)
147{ 244{
148 struct btrfs_worker_thread *worker = arg; 245 struct btrfs_worker_thread *worker = arg;
149 struct list_head *cur; 246 struct list_head head;
247 struct list_head prio_head;
150 struct btrfs_work *work; 248 struct btrfs_work *work;
249
250 INIT_LIST_HEAD(&head);
251 INIT_LIST_HEAD(&prio_head);
252
151 do { 253 do {
152 spin_lock_irq(&worker->lock); 254again:
153again_locked:
154 while (1) { 255 while (1) {
155 if (!list_empty(&worker->prio_pending)) 256
156 cur = worker->prio_pending.next; 257
157 else if (!list_empty(&worker->pending)) 258 work = get_next_work(worker, &prio_head, &head);
158 cur = worker->pending.next; 259 if (!work)
159 else
160 break; 260 break;
161 261
162 work = list_entry(cur, struct btrfs_work, list);
163 list_del(&work->list); 262 list_del(&work->list);
164 clear_bit(WORK_QUEUED_BIT, &work->flags); 263 clear_bit(WORK_QUEUED_BIT, &work->flags);
165 264
166 work->worker = worker; 265 work->worker = worker;
167 spin_unlock_irq(&worker->lock);
168 266
169 work->func(work); 267 work->func(work);
170 268
@@ -175,9 +273,13 @@ again_locked:
175 */ 273 */
176 run_ordered_completions(worker->workers, work); 274 run_ordered_completions(worker->workers, work);
177 275
178 spin_lock_irq(&worker->lock); 276 check_pending_worker_creates(worker);
179 check_idle_worker(worker); 277
180 } 278 }
279
280 spin_lock_irq(&worker->lock);
281 check_idle_worker(worker);
282
181 if (freezing(current)) { 283 if (freezing(current)) {
182 worker->working = 0; 284 worker->working = 0;
183 spin_unlock_irq(&worker->lock); 285 spin_unlock_irq(&worker->lock);
@@ -216,8 +318,10 @@ again_locked:
216 spin_lock_irq(&worker->lock); 318 spin_lock_irq(&worker->lock);
217 set_current_state(TASK_INTERRUPTIBLE); 319 set_current_state(TASK_INTERRUPTIBLE);
218 if (!list_empty(&worker->pending) || 320 if (!list_empty(&worker->pending) ||
219 !list_empty(&worker->prio_pending)) 321 !list_empty(&worker->prio_pending)) {
220 goto again_locked; 322 spin_unlock_irq(&worker->lock);
323 goto again;
324 }
221 325
222 /* 326 /*
223 * this makes sure we get a wakeup when someone 327 * this makes sure we get a wakeup when someone
@@ -226,8 +330,13 @@ again_locked:
226 worker->working = 0; 330 worker->working = 0;
227 spin_unlock_irq(&worker->lock); 331 spin_unlock_irq(&worker->lock);
228 332
229 if (!kthread_should_stop()) 333 if (!kthread_should_stop()) {
230 schedule(); 334 schedule_timeout(HZ * 120);
335 if (!worker->working &&
336 try_worker_shutdown(worker)) {
337 return 0;
338 }
339 }
231 } 340 }
232 __set_current_state(TASK_RUNNING); 341 __set_current_state(TASK_RUNNING);
233 } 342 }
@@ -242,16 +351,30 @@ int btrfs_stop_workers(struct btrfs_workers *workers)
242{ 351{
243 struct list_head *cur; 352 struct list_head *cur;
244 struct btrfs_worker_thread *worker; 353 struct btrfs_worker_thread *worker;
354 int can_stop;
245 355
356 spin_lock_irq(&workers->lock);
246 list_splice_init(&workers->idle_list, &workers->worker_list); 357 list_splice_init(&workers->idle_list, &workers->worker_list);
247 while (!list_empty(&workers->worker_list)) { 358 while (!list_empty(&workers->worker_list)) {
248 cur = workers->worker_list.next; 359 cur = workers->worker_list.next;
249 worker = list_entry(cur, struct btrfs_worker_thread, 360 worker = list_entry(cur, struct btrfs_worker_thread,
250 worker_list); 361 worker_list);
251 kthread_stop(worker->task); 362
252 list_del(&worker->worker_list); 363 atomic_inc(&worker->refs);
253 kfree(worker); 364 workers->num_workers -= 1;
365 if (!list_empty(&worker->worker_list)) {
366 list_del_init(&worker->worker_list);
367 put_worker(worker);
368 can_stop = 1;
369 } else
370 can_stop = 0;
371 spin_unlock_irq(&workers->lock);
372 if (can_stop)
373 kthread_stop(worker->task);
374 spin_lock_irq(&workers->lock);
375 put_worker(worker);
254 } 376 }
377 spin_unlock_irq(&workers->lock);
255 return 0; 378 return 0;
256} 379}
257 380
@@ -266,10 +389,13 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
266 INIT_LIST_HEAD(&workers->order_list); 389 INIT_LIST_HEAD(&workers->order_list);
267 INIT_LIST_HEAD(&workers->prio_order_list); 390 INIT_LIST_HEAD(&workers->prio_order_list);
268 spin_lock_init(&workers->lock); 391 spin_lock_init(&workers->lock);
392 spin_lock_init(&workers->order_lock);
269 workers->max_workers = max; 393 workers->max_workers = max;
270 workers->idle_thresh = 32; 394 workers->idle_thresh = 32;
271 workers->name = name; 395 workers->name = name;
272 workers->ordered = 0; 396 workers->ordered = 0;
397 workers->atomic_start_pending = 0;
398 workers->atomic_worker_start = 0;
273} 399}
274 400
275/* 401/*
@@ -293,7 +419,9 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
293 INIT_LIST_HEAD(&worker->prio_pending); 419 INIT_LIST_HEAD(&worker->prio_pending);
294 INIT_LIST_HEAD(&worker->worker_list); 420 INIT_LIST_HEAD(&worker->worker_list);
295 spin_lock_init(&worker->lock); 421 spin_lock_init(&worker->lock);
422
296 atomic_set(&worker->num_pending, 0); 423 atomic_set(&worker->num_pending, 0);
424 atomic_set(&worker->refs, 1);
297 worker->workers = workers; 425 worker->workers = workers;
298 worker->task = kthread_run(worker_loop, worker, 426 worker->task = kthread_run(worker_loop, worker,
299 "btrfs-%s-%d", workers->name, 427 "btrfs-%s-%d", workers->name,
@@ -303,7 +431,6 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
303 kfree(worker); 431 kfree(worker);
304 goto fail; 432 goto fail;
305 } 433 }
306
307 spin_lock_irq(&workers->lock); 434 spin_lock_irq(&workers->lock);
308 list_add_tail(&worker->worker_list, &workers->idle_list); 435 list_add_tail(&worker->worker_list, &workers->idle_list);
309 worker->idle = 1; 436 worker->idle = 1;
@@ -367,28 +494,18 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
367{ 494{
368 struct btrfs_worker_thread *worker; 495 struct btrfs_worker_thread *worker;
369 unsigned long flags; 496 unsigned long flags;
497 struct list_head *fallback;
370 498
371again: 499again:
372 spin_lock_irqsave(&workers->lock, flags); 500 spin_lock_irqsave(&workers->lock, flags);
373 worker = next_worker(workers); 501 worker = next_worker(workers);
374 spin_unlock_irqrestore(&workers->lock, flags);
375 502
376 if (!worker) { 503 if (!worker) {
377 spin_lock_irqsave(&workers->lock, flags);
378 if (workers->num_workers >= workers->max_workers) { 504 if (workers->num_workers >= workers->max_workers) {
379 struct list_head *fallback = NULL; 505 goto fallback;
380 /* 506 } else if (workers->atomic_worker_start) {
381 * we have failed to find any workers, just 507 workers->atomic_start_pending = 1;
382 * return the force one 508 goto fallback;
383 */
384 if (!list_empty(&workers->worker_list))
385 fallback = workers->worker_list.next;
386 if (!list_empty(&workers->idle_list))
387 fallback = workers->idle_list.next;
388 BUG_ON(!fallback);
389 worker = list_entry(fallback,
390 struct btrfs_worker_thread, worker_list);
391 spin_unlock_irqrestore(&workers->lock, flags);
392 } else { 509 } else {
393 spin_unlock_irqrestore(&workers->lock, flags); 510 spin_unlock_irqrestore(&workers->lock, flags);
394 /* we're below the limit, start another worker */ 511 /* we're below the limit, start another worker */
@@ -396,6 +513,23 @@ again:
396 goto again; 513 goto again;
397 } 514 }
398 } 515 }
516 spin_unlock_irqrestore(&workers->lock, flags);
517 return worker;
518
519fallback:
520 fallback = NULL;
521 /*
522 * we have failed to find any workers, just
523 * return the first one we can find.
524 */
525 if (!list_empty(&workers->worker_list))
526 fallback = workers->worker_list.next;
527 if (!list_empty(&workers->idle_list))
528 fallback = workers->idle_list.next;
529 BUG_ON(!fallback);
530 worker = list_entry(fallback,
531 struct btrfs_worker_thread, worker_list);
532 spin_unlock_irqrestore(&workers->lock, flags);
399 return worker; 533 return worker;
400} 534}
401 535
@@ -435,9 +569,9 @@ int btrfs_requeue_work(struct btrfs_work *work)
435 worker->working = 1; 569 worker->working = 1;
436 } 570 }
437 571
438 spin_unlock_irqrestore(&worker->lock, flags);
439 if (wake) 572 if (wake)
440 wake_up_process(worker->task); 573 wake_up_process(worker->task);
574 spin_unlock_irqrestore(&worker->lock, flags);
441out: 575out:
442 576
443 return 0; 577 return 0;
@@ -463,14 +597,18 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
463 597
464 worker = find_worker(workers); 598 worker = find_worker(workers);
465 if (workers->ordered) { 599 if (workers->ordered) {
466 spin_lock_irqsave(&workers->lock, flags); 600 /*
601 * you're not allowed to do ordered queues from an
602 * interrupt handler
603 */
604 spin_lock(&workers->order_lock);
467 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { 605 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) {
468 list_add_tail(&work->order_list, 606 list_add_tail(&work->order_list,
469 &workers->prio_order_list); 607 &workers->prio_order_list);
470 } else { 608 } else {
471 list_add_tail(&work->order_list, &workers->order_list); 609 list_add_tail(&work->order_list, &workers->order_list);
472 } 610 }
473 spin_unlock_irqrestore(&workers->lock, flags); 611 spin_unlock(&workers->order_lock);
474 } else { 612 } else {
475 INIT_LIST_HEAD(&work->order_list); 613 INIT_LIST_HEAD(&work->order_list);
476 } 614 }
@@ -492,10 +630,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
492 wake = 1; 630 wake = 1;
493 worker->working = 1; 631 worker->working = 1;
494 632
495 spin_unlock_irqrestore(&worker->lock, flags);
496
497 if (wake) 633 if (wake)
498 wake_up_process(worker->task); 634 wake_up_process(worker->task);
635 spin_unlock_irqrestore(&worker->lock, flags);
636
499out: 637out:
500 return 0; 638 return 0;
501} 639}
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 1b511c109db6..fc089b95ec14 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -73,6 +73,15 @@ struct btrfs_workers {
73 /* force completions in the order they were queued */ 73 /* force completions in the order they were queued */
74 int ordered; 74 int ordered;
75 75
76 /* more workers required, but in an interrupt handler */
77 int atomic_start_pending;
78
79 /*
80 * are we allowed to sleep while starting workers or are we required
81 * to start them at a later time?
82 */
83 int atomic_worker_start;
84
76 /* list with all the work threads. The workers on the idle thread 85 /* list with all the work threads. The workers on the idle thread
77 * may be actively servicing jobs, but they haven't yet hit the 86 * may be actively servicing jobs, but they haven't yet hit the
78 * idle thresh limit above. 87 * idle thresh limit above.
@@ -90,6 +99,9 @@ struct btrfs_workers {
90 /* lock for finding the next worker thread to queue on */ 99 /* lock for finding the next worker thread to queue on */
91 spinlock_t lock; 100 spinlock_t lock;
92 101
102 /* lock for the ordered lists */
103 spinlock_t order_lock;
104
93 /* extra name for this worker, used for current->name */ 105 /* extra name for this worker, used for current->name */
94 char *name; 106 char *name;
95}; 107};
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 9d8ba4d54a37..a11a32058b50 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -506,10 +506,10 @@ static noinline int add_ra_bio_pages(struct inode *inode,
506 */ 506 */
507 set_page_extent_mapped(page); 507 set_page_extent_mapped(page);
508 lock_extent(tree, last_offset, end, GFP_NOFS); 508 lock_extent(tree, last_offset, end, GFP_NOFS);
509 spin_lock(&em_tree->lock); 509 read_lock(&em_tree->lock);
510 em = lookup_extent_mapping(em_tree, last_offset, 510 em = lookup_extent_mapping(em_tree, last_offset,
511 PAGE_CACHE_SIZE); 511 PAGE_CACHE_SIZE);
512 spin_unlock(&em_tree->lock); 512 read_unlock(&em_tree->lock);
513 513
514 if (!em || last_offset < em->start || 514 if (!em || last_offset < em->start ||
515 (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) || 515 (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
@@ -593,11 +593,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
593 em_tree = &BTRFS_I(inode)->extent_tree; 593 em_tree = &BTRFS_I(inode)->extent_tree;
594 594
595 /* we need the actual starting offset of this extent in the file */ 595 /* we need the actual starting offset of this extent in the file */
596 spin_lock(&em_tree->lock); 596 read_lock(&em_tree->lock);
597 em = lookup_extent_mapping(em_tree, 597 em = lookup_extent_mapping(em_tree,
598 page_offset(bio->bi_io_vec->bv_page), 598 page_offset(bio->bi_io_vec->bv_page),
599 PAGE_CACHE_SIZE); 599 PAGE_CACHE_SIZE);
600 spin_unlock(&em_tree->lock); 600 read_unlock(&em_tree->lock);
601 601
602 compressed_len = em->block_len; 602 compressed_len = em->block_len;
603 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); 603 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 837435ce84ca..732d5b884aa7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2290,7 +2290,7 @@ extern struct file_operations btrfs_file_operations;
2290int btrfs_drop_extents(struct btrfs_trans_handle *trans, 2290int btrfs_drop_extents(struct btrfs_trans_handle *trans,
2291 struct btrfs_root *root, struct inode *inode, 2291 struct btrfs_root *root, struct inode *inode,
2292 u64 start, u64 end, u64 locked_end, 2292 u64 start, u64 end, u64 locked_end,
2293 u64 inline_limit, u64 *hint_block); 2293 u64 inline_limit, u64 *hint_block, int drop_cache);
2294int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2294int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2295 struct btrfs_root *root, 2295 struct btrfs_root *root,
2296 struct inode *inode, u64 start, u64 end); 2296 struct inode *inode, u64 start, u64 end);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e83be2e4602c..253da7e01ab3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -123,15 +123,15 @@ static struct extent_map *btree_get_extent(struct inode *inode,
123 struct extent_map *em; 123 struct extent_map *em;
124 int ret; 124 int ret;
125 125
126 spin_lock(&em_tree->lock); 126 read_lock(&em_tree->lock);
127 em = lookup_extent_mapping(em_tree, start, len); 127 em = lookup_extent_mapping(em_tree, start, len);
128 if (em) { 128 if (em) {
129 em->bdev = 129 em->bdev =
130 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 130 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
131 spin_unlock(&em_tree->lock); 131 read_unlock(&em_tree->lock);
132 goto out; 132 goto out;
133 } 133 }
134 spin_unlock(&em_tree->lock); 134 read_unlock(&em_tree->lock);
135 135
136 em = alloc_extent_map(GFP_NOFS); 136 em = alloc_extent_map(GFP_NOFS);
137 if (!em) { 137 if (!em) {
@@ -144,7 +144,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
144 em->block_start = 0; 144 em->block_start = 0;
145 em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 145 em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
146 146
147 spin_lock(&em_tree->lock); 147 write_lock(&em_tree->lock);
148 ret = add_extent_mapping(em_tree, em); 148 ret = add_extent_mapping(em_tree, em);
149 if (ret == -EEXIST) { 149 if (ret == -EEXIST) {
150 u64 failed_start = em->start; 150 u64 failed_start = em->start;
@@ -163,7 +163,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
163 free_extent_map(em); 163 free_extent_map(em);
164 em = NULL; 164 em = NULL;
165 } 165 }
166 spin_unlock(&em_tree->lock); 166 write_unlock(&em_tree->lock);
167 167
168 if (ret) 168 if (ret)
169 em = ERR_PTR(ret); 169 em = ERR_PTR(ret);
@@ -1325,9 +1325,9 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1325 offset = page_offset(page); 1325 offset = page_offset(page);
1326 1326
1327 em_tree = &BTRFS_I(inode)->extent_tree; 1327 em_tree = &BTRFS_I(inode)->extent_tree;
1328 spin_lock(&em_tree->lock); 1328 read_lock(&em_tree->lock);
1329 em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); 1329 em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
1330 spin_unlock(&em_tree->lock); 1330 read_unlock(&em_tree->lock);
1331 if (!em) { 1331 if (!em) {
1332 __unplug_io_fn(bdi, page); 1332 __unplug_io_fn(bdi, page);
1333 return; 1333 return;
@@ -1698,7 +1698,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1698 err = -EINVAL; 1698 err = -EINVAL;
1699 goto fail_iput; 1699 goto fail_iput;
1700 } 1700 }
1701 1701printk("thread pool is %d\n", fs_info->thread_pool_size);
1702 /* 1702 /*
1703 * we need to start all the end_io workers up front because the 1703 * we need to start all the end_io workers up front because the
1704 * queue work function gets called at interrupt time, and so it 1704 * queue work function gets called at interrupt time, and so it
@@ -1743,20 +1743,22 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1743 fs_info->endio_workers.idle_thresh = 4; 1743 fs_info->endio_workers.idle_thresh = 4;
1744 fs_info->endio_meta_workers.idle_thresh = 4; 1744 fs_info->endio_meta_workers.idle_thresh = 4;
1745 1745
1746 fs_info->endio_write_workers.idle_thresh = 64; 1746 fs_info->endio_write_workers.idle_thresh = 2;
1747 fs_info->endio_meta_write_workers.idle_thresh = 64; 1747 fs_info->endio_meta_write_workers.idle_thresh = 2;
1748
1749 fs_info->endio_workers.atomic_worker_start = 1;
1750 fs_info->endio_meta_workers.atomic_worker_start = 1;
1751 fs_info->endio_write_workers.atomic_worker_start = 1;
1752 fs_info->endio_meta_write_workers.atomic_worker_start = 1;
1748 1753
1749 btrfs_start_workers(&fs_info->workers, 1); 1754 btrfs_start_workers(&fs_info->workers, 1);
1750 btrfs_start_workers(&fs_info->submit_workers, 1); 1755 btrfs_start_workers(&fs_info->submit_workers, 1);
1751 btrfs_start_workers(&fs_info->delalloc_workers, 1); 1756 btrfs_start_workers(&fs_info->delalloc_workers, 1);
1752 btrfs_start_workers(&fs_info->fixup_workers, 1); 1757 btrfs_start_workers(&fs_info->fixup_workers, 1);
1753 btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); 1758 btrfs_start_workers(&fs_info->endio_workers, 1);
1754 btrfs_start_workers(&fs_info->endio_meta_workers, 1759 btrfs_start_workers(&fs_info->endio_meta_workers, 1);
1755 fs_info->thread_pool_size); 1760 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
1756 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1761 btrfs_start_workers(&fs_info->endio_write_workers, 1);
1757 fs_info->thread_pool_size);
1758 btrfs_start_workers(&fs_info->endio_write_workers,
1759 fs_info->thread_pool_size);
1760 1762
1761 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 1763 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
1762 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 1764 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 72a2b9c28e9f..edd86ae9e149 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5396,9 +5396,9 @@ static noinline int relocate_data_extent(struct inode *reloc_inode,
5396 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 5396 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
5397 while (1) { 5397 while (1) {
5398 int ret; 5398 int ret;
5399 spin_lock(&em_tree->lock); 5399 write_lock(&em_tree->lock);
5400 ret = add_extent_mapping(em_tree, em); 5400 ret = add_extent_mapping(em_tree, em);
5401 spin_unlock(&em_tree->lock); 5401 write_unlock(&em_tree->lock);
5402 if (ret != -EEXIST) { 5402 if (ret != -EEXIST) {
5403 free_extent_map(em); 5403 free_extent_map(em);
5404 break; 5404 break;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 68260180f587..a102422cd92e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -367,10 +367,10 @@ static int insert_state(struct extent_io_tree *tree,
367 } 367 }
368 if (bits & EXTENT_DIRTY) 368 if (bits & EXTENT_DIRTY)
369 tree->dirty_bytes += end - start + 1; 369 tree->dirty_bytes += end - start + 1;
370 set_state_cb(tree, state, bits);
371 state->state |= bits;
372 state->start = start; 370 state->start = start;
373 state->end = end; 371 state->end = end;
372 set_state_cb(tree, state, bits);
373 state->state |= bits;
374 node = tree_insert(&tree->state, end, &state->rb_node); 374 node = tree_insert(&tree->state, end, &state->rb_node);
375 if (node) { 375 if (node) {
376 struct extent_state *found; 376 struct extent_state *found;
@@ -471,10 +471,14 @@ static int clear_state_bit(struct extent_io_tree *tree,
471 * bits were already set, or zero if none of the bits were already set. 471 * bits were already set, or zero if none of the bits were already set.
472 */ 472 */
473int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 473int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
474 int bits, int wake, int delete, gfp_t mask) 474 int bits, int wake, int delete,
475 struct extent_state **cached_state,
476 gfp_t mask)
475{ 477{
476 struct extent_state *state; 478 struct extent_state *state;
479 struct extent_state *cached;
477 struct extent_state *prealloc = NULL; 480 struct extent_state *prealloc = NULL;
481 struct rb_node *next_node;
478 struct rb_node *node; 482 struct rb_node *node;
479 u64 last_end; 483 u64 last_end;
480 int err; 484 int err;
@@ -488,6 +492,17 @@ again:
488 } 492 }
489 493
490 spin_lock(&tree->lock); 494 spin_lock(&tree->lock);
495 if (cached_state) {
496 cached = *cached_state;
497 *cached_state = NULL;
498 if (cached->tree && cached->start == start) {
499 atomic_dec(&cached->refs);
500 state = cached;
501 last_end = state->end;
502 goto found;
503 }
504 free_extent_state(cached);
505 }
491 /* 506 /*
492 * this search will find the extents that end after 507 * this search will find the extents that end after
493 * our range starts 508 * our range starts
@@ -496,6 +511,7 @@ again:
496 if (!node) 511 if (!node)
497 goto out; 512 goto out;
498 state = rb_entry(node, struct extent_state, rb_node); 513 state = rb_entry(node, struct extent_state, rb_node);
514hit_next:
499 if (state->start > end) 515 if (state->start > end)
500 goto out; 516 goto out;
501 WARN_ON(state->end < start); 517 WARN_ON(state->end < start);
@@ -555,11 +571,21 @@ again:
555 prealloc = NULL; 571 prealloc = NULL;
556 goto out; 572 goto out;
557 } 573 }
558 574found:
575 if (state->end < end && prealloc && !need_resched())
576 next_node = rb_next(&state->rb_node);
577 else
578 next_node = NULL;
559 set |= clear_state_bit(tree, state, bits, wake, delete); 579 set |= clear_state_bit(tree, state, bits, wake, delete);
560 if (last_end == (u64)-1) 580 if (last_end == (u64)-1)
561 goto out; 581 goto out;
562 start = last_end + 1; 582 start = last_end + 1;
583 if (start <= end && next_node) {
584 state = rb_entry(next_node, struct extent_state,
585 rb_node);
586 if (state->start == start)
587 goto hit_next;
588 }
563 goto search_again; 589 goto search_again;
564 590
565out: 591out:
@@ -653,26 +679,37 @@ static void set_state_bits(struct extent_io_tree *tree,
653 state->state |= bits; 679 state->state |= bits;
654} 680}
655 681
682static void cache_state(struct extent_state *state,
683 struct extent_state **cached_ptr)
684{
685 if (cached_ptr && !(*cached_ptr)) {
686 if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) {
687 *cached_ptr = state;
688 atomic_inc(&state->refs);
689 }
690 }
691}
692
656/* 693/*
657 * set some bits on a range in the tree. This may require allocations 694 * set some bits on a range in the tree. This may require allocations or
658 * or sleeping, so the gfp mask is used to indicate what is allowed. 695 * sleeping, so the gfp mask is used to indicate what is allowed.
659 * 696 *
660 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the 697 * If any of the exclusive bits are set, this will fail with -EEXIST if some
661 * range already has the desired bits set. The start of the existing 698 * part of the range already has the desired bits set. The start of the
662 * range is returned in failed_start in this case. 699 * existing range is returned in failed_start in this case.
663 * 700 *
664 * [start, end] is inclusive 701 * [start, end] is inclusive This takes the tree lock.
665 * This takes the tree lock.
666 */ 702 */
703
667static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 704static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
668 int bits, int exclusive, u64 *failed_start, 705 int bits, int exclusive_bits, u64 *failed_start,
706 struct extent_state **cached_state,
669 gfp_t mask) 707 gfp_t mask)
670{ 708{
671 struct extent_state *state; 709 struct extent_state *state;
672 struct extent_state *prealloc = NULL; 710 struct extent_state *prealloc = NULL;
673 struct rb_node *node; 711 struct rb_node *node;
674 int err = 0; 712 int err = 0;
675 int set;
676 u64 last_start; 713 u64 last_start;
677 u64 last_end; 714 u64 last_end;
678again: 715again:
@@ -683,6 +720,13 @@ again:
683 } 720 }
684 721
685 spin_lock(&tree->lock); 722 spin_lock(&tree->lock);
723 if (cached_state && *cached_state) {
724 state = *cached_state;
725 if (state->start == start && state->tree) {
726 node = &state->rb_node;
727 goto hit_next;
728 }
729 }
686 /* 730 /*
687 * this search will find all the extents that end after 731 * this search will find all the extents that end after
688 * our range starts. 732 * our range starts.
@@ -694,8 +738,8 @@ again:
694 BUG_ON(err == -EEXIST); 738 BUG_ON(err == -EEXIST);
695 goto out; 739 goto out;
696 } 740 }
697
698 state = rb_entry(node, struct extent_state, rb_node); 741 state = rb_entry(node, struct extent_state, rb_node);
742hit_next:
699 last_start = state->start; 743 last_start = state->start;
700 last_end = state->end; 744 last_end = state->end;
701 745
@@ -706,17 +750,28 @@ again:
706 * Just lock what we found and keep going 750 * Just lock what we found and keep going
707 */ 751 */
708 if (state->start == start && state->end <= end) { 752 if (state->start == start && state->end <= end) {
709 set = state->state & bits; 753 struct rb_node *next_node;
710 if (set && exclusive) { 754 if (state->state & exclusive_bits) {
711 *failed_start = state->start; 755 *failed_start = state->start;
712 err = -EEXIST; 756 err = -EEXIST;
713 goto out; 757 goto out;
714 } 758 }
715 set_state_bits(tree, state, bits); 759 set_state_bits(tree, state, bits);
760 cache_state(state, cached_state);
716 merge_state(tree, state); 761 merge_state(tree, state);
717 if (last_end == (u64)-1) 762 if (last_end == (u64)-1)
718 goto out; 763 goto out;
764
719 start = last_end + 1; 765 start = last_end + 1;
766 if (start < end && prealloc && !need_resched()) {
767 next_node = rb_next(node);
768 if (next_node) {
769 state = rb_entry(next_node, struct extent_state,
770 rb_node);
771 if (state->start == start)
772 goto hit_next;
773 }
774 }
720 goto search_again; 775 goto search_again;
721 } 776 }
722 777
@@ -737,8 +792,7 @@ again:
737 * desired bit on it. 792 * desired bit on it.
738 */ 793 */
739 if (state->start < start) { 794 if (state->start < start) {
740 set = state->state & bits; 795 if (state->state & exclusive_bits) {
741 if (exclusive && set) {
742 *failed_start = start; 796 *failed_start = start;
743 err = -EEXIST; 797 err = -EEXIST;
744 goto out; 798 goto out;
@@ -750,6 +804,7 @@ again:
750 goto out; 804 goto out;
751 if (state->end <= end) { 805 if (state->end <= end) {
752 set_state_bits(tree, state, bits); 806 set_state_bits(tree, state, bits);
807 cache_state(state, cached_state);
753 merge_state(tree, state); 808 merge_state(tree, state);
754 if (last_end == (u64)-1) 809 if (last_end == (u64)-1)
755 goto out; 810 goto out;
@@ -774,6 +829,7 @@ again:
774 this_end = last_start - 1; 829 this_end = last_start - 1;
775 err = insert_state(tree, prealloc, start, this_end, 830 err = insert_state(tree, prealloc, start, this_end,
776 bits); 831 bits);
832 cache_state(prealloc, cached_state);
777 prealloc = NULL; 833 prealloc = NULL;
778 BUG_ON(err == -EEXIST); 834 BUG_ON(err == -EEXIST);
779 if (err) 835 if (err)
@@ -788,8 +844,7 @@ again:
788 * on the first half 844 * on the first half
789 */ 845 */
790 if (state->start <= end && state->end > end) { 846 if (state->start <= end && state->end > end) {
791 set = state->state & bits; 847 if (state->state & exclusive_bits) {
792 if (exclusive && set) {
793 *failed_start = start; 848 *failed_start = start;
794 err = -EEXIST; 849 err = -EEXIST;
795 goto out; 850 goto out;
@@ -798,6 +853,7 @@ again:
798 BUG_ON(err == -EEXIST); 853 BUG_ON(err == -EEXIST);
799 854
800 set_state_bits(tree, prealloc, bits); 855 set_state_bits(tree, prealloc, bits);
856 cache_state(prealloc, cached_state);
801 merge_state(tree, prealloc); 857 merge_state(tree, prealloc);
802 prealloc = NULL; 858 prealloc = NULL;
803 goto out; 859 goto out;
@@ -826,86 +882,64 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
826 gfp_t mask) 882 gfp_t mask)
827{ 883{
828 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, 884 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
829 mask); 885 NULL, mask);
830}
831
832int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
833 gfp_t mask)
834{
835 return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
836} 886}
837 887
838int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 888int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
839 int bits, gfp_t mask) 889 int bits, gfp_t mask)
840{ 890{
841 return set_extent_bit(tree, start, end, bits, 0, NULL, 891 return set_extent_bit(tree, start, end, bits, 0, NULL,
842 mask); 892 NULL, mask);
843} 893}
844 894
845int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 895int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
846 int bits, gfp_t mask) 896 int bits, gfp_t mask)
847{ 897{
848 return clear_extent_bit(tree, start, end, bits, 0, 0, mask); 898 return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
849} 899}
850 900
851int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, 901int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
852 gfp_t mask) 902 gfp_t mask)
853{ 903{
854 return set_extent_bit(tree, start, end, 904 return set_extent_bit(tree, start, end,
855 EXTENT_DELALLOC | EXTENT_DIRTY, 905 EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE,
856 0, NULL, mask); 906 0, NULL, NULL, mask);
857} 907}
858 908
859int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 909int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
860 gfp_t mask) 910 gfp_t mask)
861{ 911{
862 return clear_extent_bit(tree, start, end, 912 return clear_extent_bit(tree, start, end,
863 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); 913 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
864} 914 NULL, mask);
865
866int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
867 gfp_t mask)
868{
869 return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
870} 915}
871 916
872int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 917int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
873 gfp_t mask) 918 gfp_t mask)
874{ 919{
875 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, 920 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
876 mask); 921 NULL, mask);
877} 922}
878 923
879static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 924static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
880 gfp_t mask) 925 gfp_t mask)
881{ 926{
882 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); 927 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0,
928 NULL, mask);
883} 929}
884 930
885int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 931int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
886 gfp_t mask) 932 gfp_t mask)
887{ 933{
888 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 934 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
889 mask); 935 NULL, mask);
890} 936}
891 937
892static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 938static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
893 u64 end, gfp_t mask) 939 u64 end, gfp_t mask)
894{ 940{
895 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); 941 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
896} 942 NULL, mask);
897
898static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
899 gfp_t mask)
900{
901 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
902 0, NULL, mask);
903}
904
905static int clear_extent_writeback(struct extent_io_tree *tree, u64 start,
906 u64 end, gfp_t mask)
907{
908 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
909} 943}
910 944
911int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) 945int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
@@ -917,13 +951,15 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
917 * either insert or lock state struct between start and end use mask to tell 951 * either insert or lock state struct between start and end use mask to tell
918 * us if waiting is desired. 952 * us if waiting is desired.
919 */ 953 */
920int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) 954int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
955 int bits, struct extent_state **cached_state, gfp_t mask)
921{ 956{
922 int err; 957 int err;
923 u64 failed_start; 958 u64 failed_start;
924 while (1) { 959 while (1) {
925 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 960 err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
926 &failed_start, mask); 961 EXTENT_LOCKED, &failed_start,
962 cached_state, mask);
927 if (err == -EEXIST && (mask & __GFP_WAIT)) { 963 if (err == -EEXIST && (mask & __GFP_WAIT)) {
928 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); 964 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
929 start = failed_start; 965 start = failed_start;
@@ -935,27 +971,40 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
935 return err; 971 return err;
936} 972}
937 973
974int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
975{
976 return lock_extent_bits(tree, start, end, 0, NULL, mask);
977}
978
938int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 979int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
939 gfp_t mask) 980 gfp_t mask)
940{ 981{
941 int err; 982 int err;
942 u64 failed_start; 983 u64 failed_start;
943 984
944 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 985 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
945 &failed_start, mask); 986 &failed_start, NULL, mask);
946 if (err == -EEXIST) { 987 if (err == -EEXIST) {
947 if (failed_start > start) 988 if (failed_start > start)
948 clear_extent_bit(tree, start, failed_start - 1, 989 clear_extent_bit(tree, start, failed_start - 1,
949 EXTENT_LOCKED, 1, 0, mask); 990 EXTENT_LOCKED, 1, 0, NULL, mask);
950 return 0; 991 return 0;
951 } 992 }
952 return 1; 993 return 1;
953} 994}
954 995
996int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
997 struct extent_state **cached, gfp_t mask)
998{
999 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
1000 mask);
1001}
1002
955int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1003int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
956 gfp_t mask) 1004 gfp_t mask)
957{ 1005{
958 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); 1006 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
1007 mask);
959} 1008}
960 1009
961/* 1010/*
@@ -974,7 +1023,6 @@ int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
974 page_cache_release(page); 1023 page_cache_release(page);
975 index++; 1024 index++;
976 } 1025 }
977 set_extent_dirty(tree, start, end, GFP_NOFS);
978 return 0; 1026 return 0;
979} 1027}
980 1028
@@ -994,7 +1042,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
994 page_cache_release(page); 1042 page_cache_release(page);
995 index++; 1043 index++;
996 } 1044 }
997 set_extent_writeback(tree, start, end, GFP_NOFS);
998 return 0; 1045 return 0;
999} 1046}
1000 1047
@@ -1232,6 +1279,7 @@ static noinline u64 find_lock_delalloc_range(struct inode *inode,
1232 u64 delalloc_start; 1279 u64 delalloc_start;
1233 u64 delalloc_end; 1280 u64 delalloc_end;
1234 u64 found; 1281 u64 found;
1282 struct extent_state *cached_state = NULL;
1235 int ret; 1283 int ret;
1236 int loops = 0; 1284 int loops = 0;
1237 1285
@@ -1269,6 +1317,7 @@ again:
1269 /* some of the pages are gone, lets avoid looping by 1317 /* some of the pages are gone, lets avoid looping by
1270 * shortening the size of the delalloc range we're searching 1318 * shortening the size of the delalloc range we're searching
1271 */ 1319 */
1320 free_extent_state(cached_state);
1272 if (!loops) { 1321 if (!loops) {
1273 unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1); 1322 unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
1274 max_bytes = PAGE_CACHE_SIZE - offset; 1323 max_bytes = PAGE_CACHE_SIZE - offset;
@@ -1282,18 +1331,21 @@ again:
1282 BUG_ON(ret); 1331 BUG_ON(ret);
1283 1332
1284 /* step three, lock the state bits for the whole range */ 1333 /* step three, lock the state bits for the whole range */
1285 lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); 1334 lock_extent_bits(tree, delalloc_start, delalloc_end,
1335 0, &cached_state, GFP_NOFS);
1286 1336
1287 /* then test to make sure it is all still delalloc */ 1337 /* then test to make sure it is all still delalloc */
1288 ret = test_range_bit(tree, delalloc_start, delalloc_end, 1338 ret = test_range_bit(tree, delalloc_start, delalloc_end,
1289 EXTENT_DELALLOC, 1); 1339 EXTENT_DELALLOC, 1, cached_state);
1290 if (!ret) { 1340 if (!ret) {
1291 unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS); 1341 unlock_extent_cached(tree, delalloc_start, delalloc_end,
1342 &cached_state, GFP_NOFS);
1292 __unlock_for_delalloc(inode, locked_page, 1343 __unlock_for_delalloc(inode, locked_page,
1293 delalloc_start, delalloc_end); 1344 delalloc_start, delalloc_end);
1294 cond_resched(); 1345 cond_resched();
1295 goto again; 1346 goto again;
1296 } 1347 }
1348 free_extent_state(cached_state);
1297 *start = delalloc_start; 1349 *start = delalloc_start;
1298 *end = delalloc_end; 1350 *end = delalloc_end;
1299out_failed: 1351out_failed:
@@ -1307,7 +1359,8 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1307 int clear_unlock, 1359 int clear_unlock,
1308 int clear_delalloc, int clear_dirty, 1360 int clear_delalloc, int clear_dirty,
1309 int set_writeback, 1361 int set_writeback,
1310 int end_writeback) 1362 int end_writeback,
1363 int set_private2)
1311{ 1364{
1312 int ret; 1365 int ret;
1313 struct page *pages[16]; 1366 struct page *pages[16];
@@ -1325,8 +1378,9 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1325 if (clear_delalloc) 1378 if (clear_delalloc)
1326 clear_bits |= EXTENT_DELALLOC; 1379 clear_bits |= EXTENT_DELALLOC;
1327 1380
1328 clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS); 1381 clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
1329 if (!(unlock_pages || clear_dirty || set_writeback || end_writeback)) 1382 if (!(unlock_pages || clear_dirty || set_writeback || end_writeback ||
1383 set_private2))
1330 return 0; 1384 return 0;
1331 1385
1332 while (nr_pages > 0) { 1386 while (nr_pages > 0) {
@@ -1334,6 +1388,10 @@ int extent_clear_unlock_delalloc(struct inode *inode,
1334 min_t(unsigned long, 1388 min_t(unsigned long,
1335 nr_pages, ARRAY_SIZE(pages)), pages); 1389 nr_pages, ARRAY_SIZE(pages)), pages);
1336 for (i = 0; i < ret; i++) { 1390 for (i = 0; i < ret; i++) {
1391
1392 if (set_private2)
1393 SetPagePrivate2(pages[i]);
1394
1337 if (pages[i] == locked_page) { 1395 if (pages[i] == locked_page) {
1338 page_cache_release(pages[i]); 1396 page_cache_release(pages[i]);
1339 continue; 1397 continue;
@@ -1476,14 +1534,17 @@ out:
1476 * range is found set. 1534 * range is found set.
1477 */ 1535 */
1478int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 1536int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1479 int bits, int filled) 1537 int bits, int filled, struct extent_state *cached)
1480{ 1538{
1481 struct extent_state *state = NULL; 1539 struct extent_state *state = NULL;
1482 struct rb_node *node; 1540 struct rb_node *node;
1483 int bitset = 0; 1541 int bitset = 0;
1484 1542
1485 spin_lock(&tree->lock); 1543 spin_lock(&tree->lock);
1486 node = tree_search(tree, start); 1544 if (cached && cached->tree && cached->start == start)
1545 node = &cached->rb_node;
1546 else
1547 node = tree_search(tree, start);
1487 while (node && start <= end) { 1548 while (node && start <= end) {
1488 state = rb_entry(node, struct extent_state, rb_node); 1549 state = rb_entry(node, struct extent_state, rb_node);
1489 1550
@@ -1526,7 +1587,7 @@ static int check_page_uptodate(struct extent_io_tree *tree,
1526{ 1587{
1527 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1588 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1528 u64 end = start + PAGE_CACHE_SIZE - 1; 1589 u64 end = start + PAGE_CACHE_SIZE - 1;
1529 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) 1590 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
1530 SetPageUptodate(page); 1591 SetPageUptodate(page);
1531 return 0; 1592 return 0;
1532} 1593}
@@ -1540,7 +1601,7 @@ static int check_page_locked(struct extent_io_tree *tree,
1540{ 1601{
1541 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1602 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1542 u64 end = start + PAGE_CACHE_SIZE - 1; 1603 u64 end = start + PAGE_CACHE_SIZE - 1;
1543 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) 1604 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
1544 unlock_page(page); 1605 unlock_page(page);
1545 return 0; 1606 return 0;
1546} 1607}
@@ -1552,10 +1613,7 @@ static int check_page_locked(struct extent_io_tree *tree,
1552static int check_page_writeback(struct extent_io_tree *tree, 1613static int check_page_writeback(struct extent_io_tree *tree,
1553 struct page *page) 1614 struct page *page)
1554{ 1615{
1555 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1616 end_page_writeback(page);
1556 u64 end = start + PAGE_CACHE_SIZE - 1;
1557 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
1558 end_page_writeback(page);
1559 return 0; 1617 return 0;
1560} 1618}
1561 1619
@@ -1613,13 +1671,11 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
1613 } 1671 }
1614 1672
1615 if (!uptodate) { 1673 if (!uptodate) {
1616 clear_extent_uptodate(tree, start, end, GFP_ATOMIC); 1674 clear_extent_uptodate(tree, start, end, GFP_NOFS);
1617 ClearPageUptodate(page); 1675 ClearPageUptodate(page);
1618 SetPageError(page); 1676 SetPageError(page);
1619 } 1677 }
1620 1678
1621 clear_extent_writeback(tree, start, end, GFP_ATOMIC);
1622
1623 if (whole_page) 1679 if (whole_page)
1624 end_page_writeback(page); 1680 end_page_writeback(page);
1625 else 1681 else
@@ -1983,7 +2039,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
1983 continue; 2039 continue;
1984 } 2040 }
1985 /* the get_extent function already copied into the page */ 2041 /* the get_extent function already copied into the page */
1986 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { 2042 if (test_range_bit(tree, cur, cur_end,
2043 EXTENT_UPTODATE, 1, NULL)) {
1987 check_page_uptodate(tree, page); 2044 check_page_uptodate(tree, page);
1988 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2045 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1989 cur = cur + iosize; 2046 cur = cur + iosize;
@@ -2078,6 +2135,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2078 u64 iosize; 2135 u64 iosize;
2079 u64 unlock_start; 2136 u64 unlock_start;
2080 sector_t sector; 2137 sector_t sector;
2138 struct extent_state *cached_state = NULL;
2081 struct extent_map *em; 2139 struct extent_map *em;
2082 struct block_device *bdev; 2140 struct block_device *bdev;
2083 int ret; 2141 int ret;
@@ -2124,6 +2182,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2124 delalloc_end = 0; 2182 delalloc_end = 0;
2125 page_started = 0; 2183 page_started = 0;
2126 if (!epd->extent_locked) { 2184 if (!epd->extent_locked) {
2185 u64 delalloc_to_write;
2127 /* 2186 /*
2128 * make sure the wbc mapping index is at least updated 2187 * make sure the wbc mapping index is at least updated
2129 * to this page. 2188 * to this page.
@@ -2143,6 +2202,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2143 tree->ops->fill_delalloc(inode, page, delalloc_start, 2202 tree->ops->fill_delalloc(inode, page, delalloc_start,
2144 delalloc_end, &page_started, 2203 delalloc_end, &page_started,
2145 &nr_written); 2204 &nr_written);
2205 delalloc_to_write = (delalloc_end -
2206 max_t(u64, page_offset(page),
2207 delalloc_start) + 1) >>
2208 PAGE_CACHE_SHIFT;
2209 if (wbc->nr_to_write < delalloc_to_write) {
2210 wbc->nr_to_write = min_t(long, 8192,
2211 delalloc_to_write);
2212 }
2146 delalloc_start = delalloc_end + 1; 2213 delalloc_start = delalloc_end + 1;
2147 } 2214 }
2148 2215
@@ -2160,15 +2227,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2160 goto done_unlocked; 2227 goto done_unlocked;
2161 } 2228 }
2162 } 2229 }
2163 lock_extent(tree, start, page_end, GFP_NOFS);
2164
2165 unlock_start = start;
2166
2167 if (tree->ops && tree->ops->writepage_start_hook) { 2230 if (tree->ops && tree->ops->writepage_start_hook) {
2168 ret = tree->ops->writepage_start_hook(page, start, 2231 ret = tree->ops->writepage_start_hook(page, start,
2169 page_end); 2232 page_end);
2170 if (ret == -EAGAIN) { 2233 if (ret == -EAGAIN) {
2171 unlock_extent(tree, start, page_end, GFP_NOFS);
2172 redirty_page_for_writepage(wbc, page); 2234 redirty_page_for_writepage(wbc, page);
2173 update_nr_written(page, wbc, nr_written); 2235 update_nr_written(page, wbc, nr_written);
2174 unlock_page(page); 2236 unlock_page(page);
@@ -2184,12 +2246,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2184 update_nr_written(page, wbc, nr_written + 1); 2246 update_nr_written(page, wbc, nr_written + 1);
2185 2247
2186 end = page_end; 2248 end = page_end;
2187 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0))
2188 printk(KERN_ERR "btrfs delalloc bits after lock_extent\n");
2189
2190 if (last_byte <= start) { 2249 if (last_byte <= start) {
2191 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
2192 unlock_extent(tree, start, page_end, GFP_NOFS);
2193 if (tree->ops && tree->ops->writepage_end_io_hook) 2250 if (tree->ops && tree->ops->writepage_end_io_hook)
2194 tree->ops->writepage_end_io_hook(page, start, 2251 tree->ops->writepage_end_io_hook(page, start,
2195 page_end, NULL, 1); 2252 page_end, NULL, 1);
@@ -2197,13 +2254,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2197 goto done; 2254 goto done;
2198 } 2255 }
2199 2256
2200 set_extent_uptodate(tree, start, page_end, GFP_NOFS);
2201 blocksize = inode->i_sb->s_blocksize; 2257 blocksize = inode->i_sb->s_blocksize;
2202 2258
2203 while (cur <= end) { 2259 while (cur <= end) {
2204 if (cur >= last_byte) { 2260 if (cur >= last_byte) {
2205 clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
2206 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2207 if (tree->ops && tree->ops->writepage_end_io_hook) 2261 if (tree->ops && tree->ops->writepage_end_io_hook)
2208 tree->ops->writepage_end_io_hook(page, cur, 2262 tree->ops->writepage_end_io_hook(page, cur,
2209 page_end, NULL, 1); 2263 page_end, NULL, 1);
@@ -2235,12 +2289,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2235 */ 2289 */
2236 if (compressed || block_start == EXTENT_MAP_HOLE || 2290 if (compressed || block_start == EXTENT_MAP_HOLE ||
2237 block_start == EXTENT_MAP_INLINE) { 2291 block_start == EXTENT_MAP_INLINE) {
2238 clear_extent_dirty(tree, cur,
2239 cur + iosize - 1, GFP_NOFS);
2240
2241 unlock_extent(tree, unlock_start, cur + iosize - 1,
2242 GFP_NOFS);
2243
2244 /* 2292 /*
2245 * end_io notification does not happen here for 2293 * end_io notification does not happen here for
2246 * compressed extents 2294 * compressed extents
@@ -2265,13 +2313,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2265 } 2313 }
2266 /* leave this out until we have a page_mkwrite call */ 2314 /* leave this out until we have a page_mkwrite call */
2267 if (0 && !test_range_bit(tree, cur, cur + iosize - 1, 2315 if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
2268 EXTENT_DIRTY, 0)) { 2316 EXTENT_DIRTY, 0, NULL)) {
2269 cur = cur + iosize; 2317 cur = cur + iosize;
2270 pg_offset += iosize; 2318 pg_offset += iosize;
2271 continue; 2319 continue;
2272 } 2320 }
2273 2321
2274 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
2275 if (tree->ops && tree->ops->writepage_io_hook) { 2322 if (tree->ops && tree->ops->writepage_io_hook) {
2276 ret = tree->ops->writepage_io_hook(page, cur, 2323 ret = tree->ops->writepage_io_hook(page, cur,
2277 cur + iosize - 1); 2324 cur + iosize - 1);
@@ -2309,12 +2356,12 @@ done:
2309 set_page_writeback(page); 2356 set_page_writeback(page);
2310 end_page_writeback(page); 2357 end_page_writeback(page);
2311 } 2358 }
2312 if (unlock_start <= page_end)
2313 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2314 unlock_page(page); 2359 unlock_page(page);
2315 2360
2316done_unlocked: 2361done_unlocked:
2317 2362
2363 /* drop our reference on any cached states */
2364 free_extent_state(cached_state);
2318 return 0; 2365 return 0;
2319} 2366}
2320 2367
@@ -2339,7 +2386,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
2339 writepage_t writepage, void *data, 2386 writepage_t writepage, void *data,
2340 void (*flush_fn)(void *)) 2387 void (*flush_fn)(void *))
2341{ 2388{
2342 struct backing_dev_info *bdi = mapping->backing_dev_info;
2343 int ret = 0; 2389 int ret = 0;
2344 int done = 0; 2390 int done = 0;
2345 struct pagevec pvec; 2391 struct pagevec pvec;
@@ -2414,10 +2460,6 @@ retry:
2414 } 2460 }
2415 if (ret || wbc->nr_to_write <= 0) 2461 if (ret || wbc->nr_to_write <= 0)
2416 done = 1; 2462 done = 1;
2417 if (wbc->nonblocking && bdi_write_congested(bdi)) {
2418 wbc->encountered_congestion = 1;
2419 done = 1;
2420 }
2421 } 2463 }
2422 pagevec_release(&pvec); 2464 pagevec_release(&pvec);
2423 cond_resched(); 2465 cond_resched();
@@ -2604,10 +2646,10 @@ int extent_invalidatepage(struct extent_io_tree *tree,
2604 return 0; 2646 return 0;
2605 2647
2606 lock_extent(tree, start, end, GFP_NOFS); 2648 lock_extent(tree, start, end, GFP_NOFS);
2607 wait_on_extent_writeback(tree, start, end); 2649 wait_on_page_writeback(page);
2608 clear_extent_bit(tree, start, end, 2650 clear_extent_bit(tree, start, end,
2609 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, 2651 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
2610 1, 1, GFP_NOFS); 2652 1, 1, NULL, GFP_NOFS);
2611 return 0; 2653 return 0;
2612} 2654}
2613 2655
@@ -2687,7 +2729,7 @@ int extent_prepare_write(struct extent_io_tree *tree,
2687 !isnew && !PageUptodate(page) && 2729 !isnew && !PageUptodate(page) &&
2688 (block_off_end > to || block_off_start < from) && 2730 (block_off_end > to || block_off_start < from) &&
2689 !test_range_bit(tree, block_start, cur_end, 2731 !test_range_bit(tree, block_start, cur_end,
2690 EXTENT_UPTODATE, 1)) { 2732 EXTENT_UPTODATE, 1, NULL)) {
2691 u64 sector; 2733 u64 sector;
2692 u64 extent_offset = block_start - em->start; 2734 u64 extent_offset = block_start - em->start;
2693 size_t iosize; 2735 size_t iosize;
@@ -2701,7 +2743,7 @@ int extent_prepare_write(struct extent_io_tree *tree,
2701 */ 2743 */
2702 set_extent_bit(tree, block_start, 2744 set_extent_bit(tree, block_start,
2703 block_start + iosize - 1, 2745 block_start + iosize - 1,
2704 EXTENT_LOCKED, 0, NULL, GFP_NOFS); 2746 EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS);
2705 ret = submit_extent_page(READ, tree, page, 2747 ret = submit_extent_page(READ, tree, page,
2706 sector, iosize, page_offset, em->bdev, 2748 sector, iosize, page_offset, em->bdev,
2707 NULL, 1, 2749 NULL, 1,
@@ -2742,13 +2784,13 @@ int try_release_extent_state(struct extent_map_tree *map,
2742 int ret = 1; 2784 int ret = 1;
2743 2785
2744 if (test_range_bit(tree, start, end, 2786 if (test_range_bit(tree, start, end,
2745 EXTENT_IOBITS | EXTENT_ORDERED, 0)) 2787 EXTENT_IOBITS, 0, NULL))
2746 ret = 0; 2788 ret = 0;
2747 else { 2789 else {
2748 if ((mask & GFP_NOFS) == GFP_NOFS) 2790 if ((mask & GFP_NOFS) == GFP_NOFS)
2749 mask = GFP_NOFS; 2791 mask = GFP_NOFS;
2750 clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 2792 clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
2751 1, 1, mask); 2793 1, 1, NULL, mask);
2752 } 2794 }
2753 return ret; 2795 return ret;
2754} 2796}
@@ -2771,29 +2813,28 @@ int try_release_extent_mapping(struct extent_map_tree *map,
2771 u64 len; 2813 u64 len;
2772 while (start <= end) { 2814 while (start <= end) {
2773 len = end - start + 1; 2815 len = end - start + 1;
2774 spin_lock(&map->lock); 2816 write_lock(&map->lock);
2775 em = lookup_extent_mapping(map, start, len); 2817 em = lookup_extent_mapping(map, start, len);
2776 if (!em || IS_ERR(em)) { 2818 if (!em || IS_ERR(em)) {
2777 spin_unlock(&map->lock); 2819 write_unlock(&map->lock);
2778 break; 2820 break;
2779 } 2821 }
2780 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || 2822 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
2781 em->start != start) { 2823 em->start != start) {
2782 spin_unlock(&map->lock); 2824 write_unlock(&map->lock);
2783 free_extent_map(em); 2825 free_extent_map(em);
2784 break; 2826 break;
2785 } 2827 }
2786 if (!test_range_bit(tree, em->start, 2828 if (!test_range_bit(tree, em->start,
2787 extent_map_end(em) - 1, 2829 extent_map_end(em) - 1,
2788 EXTENT_LOCKED | EXTENT_WRITEBACK | 2830 EXTENT_LOCKED | EXTENT_WRITEBACK,
2789 EXTENT_ORDERED, 2831 0, NULL)) {
2790 0)) {
2791 remove_extent_mapping(map, em); 2832 remove_extent_mapping(map, em);
2792 /* once for the rb tree */ 2833 /* once for the rb tree */
2793 free_extent_map(em); 2834 free_extent_map(em);
2794 } 2835 }
2795 start = extent_map_end(em); 2836 start = extent_map_end(em);
2796 spin_unlock(&map->lock); 2837 write_unlock(&map->lock);
2797 2838
2798 /* once for us */ 2839 /* once for us */
2799 free_extent_map(em); 2840 free_extent_map(em);
@@ -3203,7 +3244,7 @@ int extent_range_uptodate(struct extent_io_tree *tree,
3203 int uptodate; 3244 int uptodate;
3204 unsigned long index; 3245 unsigned long index;
3205 3246
3206 ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1); 3247 ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL);
3207 if (ret) 3248 if (ret)
3208 return 1; 3249 return 1;
3209 while (start <= end) { 3250 while (start <= end) {
@@ -3233,7 +3274,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree,
3233 return 1; 3274 return 1;
3234 3275
3235 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, 3276 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3236 EXTENT_UPTODATE, 1); 3277 EXTENT_UPTODATE, 1, NULL);
3237 if (ret) 3278 if (ret)
3238 return ret; 3279 return ret;
3239 3280
@@ -3269,7 +3310,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
3269 return 0; 3310 return 0;
3270 3311
3271 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, 3312 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3272 EXTENT_UPTODATE, 1)) { 3313 EXTENT_UPTODATE, 1, NULL)) {
3273 return 0; 3314 return 0;
3274 } 3315 }
3275 3316
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5bc20abf3f3d..14ed16fd862d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -13,10 +13,8 @@
13#define EXTENT_DEFRAG (1 << 6) 13#define EXTENT_DEFRAG (1 << 6)
14#define EXTENT_DEFRAG_DONE (1 << 7) 14#define EXTENT_DEFRAG_DONE (1 << 7)
15#define EXTENT_BUFFER_FILLED (1 << 8) 15#define EXTENT_BUFFER_FILLED (1 << 8)
16#define EXTENT_ORDERED (1 << 9) 16#define EXTENT_BOUNDARY (1 << 9)
17#define EXTENT_ORDERED_METADATA (1 << 10) 17#define EXTENT_NODATASUM (1 << 10)
18#define EXTENT_BOUNDARY (1 << 11)
19#define EXTENT_NODATASUM (1 << 12)
20#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) 18#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
21 19
22/* flags for bio submission */ 20/* flags for bio submission */
@@ -142,6 +140,8 @@ int try_release_extent_state(struct extent_map_tree *map,
142 struct extent_io_tree *tree, struct page *page, 140 struct extent_io_tree *tree, struct page *page,
143 gfp_t mask); 141 gfp_t mask);
144int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); 142int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
143int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
144 int bits, struct extent_state **cached, gfp_t mask);
145int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); 145int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
146int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 146int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
147 gfp_t mask); 147 gfp_t mask);
@@ -155,11 +155,12 @@ u64 count_range_bits(struct extent_io_tree *tree,
155 u64 max_bytes, unsigned long bits); 155 u64 max_bytes, unsigned long bits);
156 156
157int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 157int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
158 int bits, int filled); 158 int bits, int filled, struct extent_state *cached_state);
159int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 159int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
160 int bits, gfp_t mask); 160 int bits, gfp_t mask);
161int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 161int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
162 int bits, int wake, int delete, gfp_t mask); 162 int bits, int wake, int delete, struct extent_state **cached,
163 gfp_t mask);
163int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 164int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
164 int bits, gfp_t mask); 165 int bits, gfp_t mask);
165int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 166int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
@@ -282,5 +283,6 @@ int extent_clear_unlock_delalloc(struct inode *inode,
282 int clear_unlock, 283 int clear_unlock,
283 int clear_delalloc, int clear_dirty, 284 int clear_delalloc, int clear_dirty,
284 int set_writeback, 285 int set_writeback,
285 int end_writeback); 286 int end_writeback,
287 int set_private2);
286#endif 288#endif
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 30c9365861e6..5bc7a0d325e7 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -36,7 +36,7 @@ void extent_map_exit(void)
36void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) 36void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
37{ 37{
38 tree->map.rb_node = NULL; 38 tree->map.rb_node = NULL;
39 spin_lock_init(&tree->lock); 39 rwlock_init(&tree->lock);
40} 40}
41 41
42/** 42/**
@@ -198,6 +198,56 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
198 return 0; 198 return 0;
199} 199}
200 200
201int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
202{
203 int ret = 0;
204 struct extent_map *merge = NULL;
205 struct rb_node *rb;
206 struct extent_map *em;
207
208 write_lock(&tree->lock);
209 em = lookup_extent_mapping(tree, start, len);
210
211 WARN_ON(em->start != start || !em);
212
213 if (!em)
214 goto out;
215
216 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
217
218 if (em->start != 0) {
219 rb = rb_prev(&em->rb_node);
220 if (rb)
221 merge = rb_entry(rb, struct extent_map, rb_node);
222 if (rb && mergable_maps(merge, em)) {
223 em->start = merge->start;
224 em->len += merge->len;
225 em->block_len += merge->block_len;
226 em->block_start = merge->block_start;
227 merge->in_tree = 0;
228 rb_erase(&merge->rb_node, &tree->map);
229 free_extent_map(merge);
230 }
231 }
232
233 rb = rb_next(&em->rb_node);
234 if (rb)
235 merge = rb_entry(rb, struct extent_map, rb_node);
236 if (rb && mergable_maps(em, merge)) {
237 em->len += merge->len;
238 em->block_len += merge->len;
239 rb_erase(&merge->rb_node, &tree->map);
240 merge->in_tree = 0;
241 free_extent_map(merge);
242 }
243
244 free_extent_map(em);
245out:
246 write_unlock(&tree->lock);
247 return ret;
248
249}
250
201/** 251/**
202 * add_extent_mapping - add new extent map to the extent tree 252 * add_extent_mapping - add new extent map to the extent tree
203 * @tree: tree to insert new map in 253 * @tree: tree to insert new map in
@@ -222,7 +272,6 @@ int add_extent_mapping(struct extent_map_tree *tree,
222 ret = -EEXIST; 272 ret = -EEXIST;
223 goto out; 273 goto out;
224 } 274 }
225 assert_spin_locked(&tree->lock);
226 rb = tree_insert(&tree->map, em->start, &em->rb_node); 275 rb = tree_insert(&tree->map, em->start, &em->rb_node);
227 if (rb) { 276 if (rb) {
228 ret = -EEXIST; 277 ret = -EEXIST;
@@ -285,7 +334,6 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
285 struct rb_node *next = NULL; 334 struct rb_node *next = NULL;
286 u64 end = range_end(start, len); 335 u64 end = range_end(start, len);
287 336
288 assert_spin_locked(&tree->lock);
289 rb_node = __tree_search(&tree->map, start, &prev, &next); 337 rb_node = __tree_search(&tree->map, start, &prev, &next);
290 if (!rb_node && prev) { 338 if (!rb_node && prev) {
291 em = rb_entry(prev, struct extent_map, rb_node); 339 em = rb_entry(prev, struct extent_map, rb_node);
@@ -331,7 +379,6 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
331 int ret = 0; 379 int ret = 0;
332 380
333 WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); 381 WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
334 assert_spin_locked(&tree->lock);
335 rb_erase(&em->rb_node, &tree->map); 382 rb_erase(&em->rb_node, &tree->map);
336 em->in_tree = 0; 383 em->in_tree = 0;
337 return ret; 384 return ret;
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index fb6eeef06bb0..d3d442f4bbbd 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -31,7 +31,7 @@ struct extent_map {
31 31
32struct extent_map_tree { 32struct extent_map_tree {
33 struct rb_root map; 33 struct rb_root map;
34 spinlock_t lock; 34 rwlock_t lock;
35}; 35};
36 36
37static inline u64 extent_map_end(struct extent_map *em) 37static inline u64 extent_map_end(struct extent_map *em)
@@ -59,4 +59,5 @@ struct extent_map *alloc_extent_map(gfp_t mask);
59void free_extent_map(struct extent_map *em); 59void free_extent_map(struct extent_map *em);
60int __init extent_map_init(void); 60int __init extent_map_init(void);
61void extent_map_exit(void); 61void extent_map_exit(void);
62int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len);
62#endif 63#endif
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 4b833972273a..571ad3c13b47 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -112,8 +112,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
112 int err = 0; 112 int err = 0;
113 int i; 113 int i;
114 struct inode *inode = fdentry(file)->d_inode; 114 struct inode *inode = fdentry(file)->d_inode;
115 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
116 u64 hint_byte;
117 u64 num_bytes; 115 u64 num_bytes;
118 u64 start_pos; 116 u64 start_pos;
119 u64 end_of_last_block; 117 u64 end_of_last_block;
@@ -125,22 +123,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
125 root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 123 root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
126 124
127 end_of_last_block = start_pos + num_bytes - 1; 125 end_of_last_block = start_pos + num_bytes - 1;
128
129 lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
130 trans = btrfs_join_transaction(root, 1);
131 if (!trans) {
132 err = -ENOMEM;
133 goto out_unlock;
134 }
135 btrfs_set_trans_block_group(trans, inode);
136 hint_byte = 0;
137
138 set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
139
140 /* check for reserved extents on each page, we don't want
141 * to reset the delalloc bit on things that already have
142 * extents reserved.
143 */
144 btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); 126 btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
145 for (i = 0; i < num_pages; i++) { 127 for (i = 0; i < num_pages; i++) {
146 struct page *p = pages[i]; 128 struct page *p = pages[i];
@@ -155,9 +137,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
155 * at this time. 137 * at this time.
156 */ 138 */
157 } 139 }
158 err = btrfs_end_transaction(trans, root);
159out_unlock:
160 unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
161 return err; 140 return err;
162} 141}
163 142
@@ -189,18 +168,18 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
189 if (!split2) 168 if (!split2)
190 split2 = alloc_extent_map(GFP_NOFS); 169 split2 = alloc_extent_map(GFP_NOFS);
191 170
192 spin_lock(&em_tree->lock); 171 write_lock(&em_tree->lock);
193 em = lookup_extent_mapping(em_tree, start, len); 172 em = lookup_extent_mapping(em_tree, start, len);
194 if (!em) { 173 if (!em) {
195 spin_unlock(&em_tree->lock); 174 write_unlock(&em_tree->lock);
196 break; 175 break;
197 } 176 }
198 flags = em->flags; 177 flags = em->flags;
199 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { 178 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
200 spin_unlock(&em_tree->lock);
201 if (em->start <= start && 179 if (em->start <= start &&
202 (!testend || em->start + em->len >= start + len)) { 180 (!testend || em->start + em->len >= start + len)) {
203 free_extent_map(em); 181 free_extent_map(em);
182 write_unlock(&em_tree->lock);
204 break; 183 break;
205 } 184 }
206 if (start < em->start) { 185 if (start < em->start) {
@@ -210,6 +189,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
210 start = em->start + em->len; 189 start = em->start + em->len;
211 } 190 }
212 free_extent_map(em); 191 free_extent_map(em);
192 write_unlock(&em_tree->lock);
213 continue; 193 continue;
214 } 194 }
215 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 195 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
@@ -260,7 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
260 free_extent_map(split); 240 free_extent_map(split);
261 split = NULL; 241 split = NULL;
262 } 242 }
263 spin_unlock(&em_tree->lock); 243 write_unlock(&em_tree->lock);
264 244
265 /* once for us */ 245 /* once for us */
266 free_extent_map(em); 246 free_extent_map(em);
@@ -289,7 +269,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
289noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, 269noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
290 struct btrfs_root *root, struct inode *inode, 270 struct btrfs_root *root, struct inode *inode,
291 u64 start, u64 end, u64 locked_end, 271 u64 start, u64 end, u64 locked_end,
292 u64 inline_limit, u64 *hint_byte) 272 u64 inline_limit, u64 *hint_byte, int drop_cache)
293{ 273{
294 u64 extent_end = 0; 274 u64 extent_end = 0;
295 u64 search_start = start; 275 u64 search_start = start;
@@ -314,7 +294,8 @@ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
314 int ret; 294 int ret;
315 295
316 inline_limit = 0; 296 inline_limit = 0;
317 btrfs_drop_extent_cache(inode, start, end - 1, 0); 297 if (drop_cache)
298 btrfs_drop_extent_cache(inode, start, end - 1, 0);
318 299
319 path = btrfs_alloc_path(); 300 path = btrfs_alloc_path();
320 if (!path) 301 if (!path)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 59cba180fe83..941f1b71cd22 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -231,7 +231,8 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
231 } 231 }
232 232
233 ret = btrfs_drop_extents(trans, root, inode, start, 233 ret = btrfs_drop_extents(trans, root, inode, start,
234 aligned_end, aligned_end, start, &hint_byte); 234 aligned_end, aligned_end, start,
235 &hint_byte, 1);
235 BUG_ON(ret); 236 BUG_ON(ret);
236 237
237 if (isize > actual_end) 238 if (isize > actual_end)
@@ -240,7 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
240 inline_len, compressed_size, 241 inline_len, compressed_size,
241 compressed_pages); 242 compressed_pages);
242 BUG_ON(ret); 243 BUG_ON(ret);
243 btrfs_drop_extent_cache(inode, start, aligned_end, 0); 244 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
244 return 0; 245 return 0;
245} 246}
246 247
@@ -425,7 +426,7 @@ again:
425 extent_clear_unlock_delalloc(inode, 426 extent_clear_unlock_delalloc(inode,
426 &BTRFS_I(inode)->io_tree, 427 &BTRFS_I(inode)->io_tree,
427 start, end, NULL, 1, 0, 428 start, end, NULL, 1, 0,
428 0, 1, 1, 1); 429 0, 1, 1, 1, 0);
429 ret = 0; 430 ret = 0;
430 goto free_pages_out; 431 goto free_pages_out;
431 } 432 }
@@ -611,9 +612,9 @@ static noinline int submit_compressed_extents(struct inode *inode,
611 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 612 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
612 613
613 while (1) { 614 while (1) {
614 spin_lock(&em_tree->lock); 615 write_lock(&em_tree->lock);
615 ret = add_extent_mapping(em_tree, em); 616 ret = add_extent_mapping(em_tree, em);
616 spin_unlock(&em_tree->lock); 617 write_unlock(&em_tree->lock);
617 if (ret != -EEXIST) { 618 if (ret != -EEXIST) {
618 free_extent_map(em); 619 free_extent_map(em);
619 break; 620 break;
@@ -640,7 +641,7 @@ static noinline int submit_compressed_extents(struct inode *inode,
640 async_extent->start, 641 async_extent->start,
641 async_extent->start + 642 async_extent->start +
642 async_extent->ram_size - 1, 643 async_extent->ram_size - 1,
643 NULL, 1, 1, 0, 1, 1, 0); 644 NULL, 1, 1, 0, 1, 1, 0, 0);
644 645
645 ret = btrfs_submit_compressed_write(inode, 646 ret = btrfs_submit_compressed_write(inode,
646 async_extent->start, 647 async_extent->start,
@@ -713,7 +714,7 @@ static noinline int cow_file_range(struct inode *inode,
713 extent_clear_unlock_delalloc(inode, 714 extent_clear_unlock_delalloc(inode,
714 &BTRFS_I(inode)->io_tree, 715 &BTRFS_I(inode)->io_tree,
715 start, end, NULL, 1, 1, 716 start, end, NULL, 1, 1,
716 1, 1, 1, 1); 717 1, 1, 1, 1, 0);
717 *nr_written = *nr_written + 718 *nr_written = *nr_written +
718 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; 719 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
719 *page_started = 1; 720 *page_started = 1;
@@ -747,9 +748,9 @@ static noinline int cow_file_range(struct inode *inode,
747 set_bit(EXTENT_FLAG_PINNED, &em->flags); 748 set_bit(EXTENT_FLAG_PINNED, &em->flags);
748 749
749 while (1) { 750 while (1) {
750 spin_lock(&em_tree->lock); 751 write_lock(&em_tree->lock);
751 ret = add_extent_mapping(em_tree, em); 752 ret = add_extent_mapping(em_tree, em);
752 spin_unlock(&em_tree->lock); 753 write_unlock(&em_tree->lock);
753 if (ret != -EEXIST) { 754 if (ret != -EEXIST) {
754 free_extent_map(em); 755 free_extent_map(em);
755 break; 756 break;
@@ -776,11 +777,14 @@ static noinline int cow_file_range(struct inode *inode,
776 /* we're not doing compressed IO, don't unlock the first 777 /* we're not doing compressed IO, don't unlock the first
777 * page (which the caller expects to stay locked), don't 778 * page (which the caller expects to stay locked), don't
778 * clear any dirty bits and don't set any writeback bits 779 * clear any dirty bits and don't set any writeback bits
780 *
781 * Do set the Private2 bit so we know this page was properly
782 * setup for writepage
779 */ 783 */
780 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 784 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
781 start, start + ram_size - 1, 785 start, start + ram_size - 1,
782 locked_page, unlock, 1, 786 locked_page, unlock, 1,
783 1, 0, 0, 0); 787 1, 0, 0, 0, 1);
784 disk_num_bytes -= cur_alloc_size; 788 disk_num_bytes -= cur_alloc_size;
785 num_bytes -= cur_alloc_size; 789 num_bytes -= cur_alloc_size;
786 alloc_hint = ins.objectid + ins.offset; 790 alloc_hint = ins.objectid + ins.offset;
@@ -853,7 +857,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
853 int limit = 10 * 1024 * 1042; 857 int limit = 10 * 1024 * 1042;
854 858
855 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | 859 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED |
856 EXTENT_DELALLOC, 1, 0, GFP_NOFS); 860 EXTENT_DELALLOC, 1, 0, NULL, GFP_NOFS);
857 while (start < end) { 861 while (start < end) {
858 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); 862 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
859 async_cow->inode = inode; 863 async_cow->inode = inode;
@@ -1080,9 +1084,9 @@ out_check:
1080 em->bdev = root->fs_info->fs_devices->latest_bdev; 1084 em->bdev = root->fs_info->fs_devices->latest_bdev;
1081 set_bit(EXTENT_FLAG_PINNED, &em->flags); 1085 set_bit(EXTENT_FLAG_PINNED, &em->flags);
1082 while (1) { 1086 while (1) {
1083 spin_lock(&em_tree->lock); 1087 write_lock(&em_tree->lock);
1084 ret = add_extent_mapping(em_tree, em); 1088 ret = add_extent_mapping(em_tree, em);
1085 spin_unlock(&em_tree->lock); 1089 write_unlock(&em_tree->lock);
1086 if (ret != -EEXIST) { 1090 if (ret != -EEXIST) {
1087 free_extent_map(em); 1091 free_extent_map(em);
1088 break; 1092 break;
@@ -1101,7 +1105,7 @@ out_check:
1101 1105
1102 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 1106 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
1103 cur_offset, cur_offset + num_bytes - 1, 1107 cur_offset, cur_offset + num_bytes - 1,
1104 locked_page, 1, 1, 1, 0, 0, 0); 1108 locked_page, 1, 1, 1, 0, 0, 0, 1);
1105 cur_offset = extent_end; 1109 cur_offset = extent_end;
1106 if (cur_offset > end) 1110 if (cur_offset > end)
1107 break; 1111 break;
@@ -1374,10 +1378,8 @@ again:
1374 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); 1378 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
1375 1379
1376 /* already ordered? We're done */ 1380 /* already ordered? We're done */
1377 if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, 1381 if (PagePrivate2(page))
1378 EXTENT_ORDERED, 0)) {
1379 goto out; 1382 goto out;
1380 }
1381 1383
1382 ordered = btrfs_lookup_ordered_extent(inode, page_start); 1384 ordered = btrfs_lookup_ordered_extent(inode, page_start);
1383 if (ordered) { 1385 if (ordered) {
@@ -1413,11 +1415,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
1413 struct inode *inode = page->mapping->host; 1415 struct inode *inode = page->mapping->host;
1414 struct btrfs_writepage_fixup *fixup; 1416 struct btrfs_writepage_fixup *fixup;
1415 struct btrfs_root *root = BTRFS_I(inode)->root; 1417 struct btrfs_root *root = BTRFS_I(inode)->root;
1416 int ret;
1417 1418
1418 ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end, 1419 /* this page is properly in the ordered list */
1419 EXTENT_ORDERED, 0); 1420 if (TestClearPagePrivate2(page))
1420 if (ret)
1421 return 0; 1421 return 0;
1422 1422
1423 if (PageChecked(page)) 1423 if (PageChecked(page))
@@ -1455,9 +1455,19 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1455 BUG_ON(!path); 1455 BUG_ON(!path);
1456 1456
1457 path->leave_spinning = 1; 1457 path->leave_spinning = 1;
1458
1459 /*
1460 * we may be replacing one extent in the tree with another.
1461 * The new extent is pinned in the extent map, and we don't want
1462 * to drop it from the cache until it is completely in the btree.
1463 *
1464 * So, tell btrfs_drop_extents to leave this extent in the cache.
1465 * the caller is expected to unpin it and allow it to be merged
1466 * with the others.
1467 */
1458 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1468 ret = btrfs_drop_extents(trans, root, inode, file_pos,
1459 file_pos + num_bytes, locked_end, 1469 file_pos + num_bytes, locked_end,
1460 file_pos, &hint); 1470 file_pos, &hint, 0);
1461 BUG_ON(ret); 1471 BUG_ON(ret);
1462 1472
1463 ins.objectid = inode->i_ino; 1473 ins.objectid = inode->i_ino;
@@ -1485,7 +1495,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1485 btrfs_mark_buffer_dirty(leaf); 1495 btrfs_mark_buffer_dirty(leaf);
1486 1496
1487 inode_add_bytes(inode, num_bytes); 1497 inode_add_bytes(inode, num_bytes);
1488 btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0);
1489 1498
1490 ins.objectid = disk_bytenr; 1499 ins.objectid = disk_bytenr;
1491 ins.offset = disk_num_bytes; 1500 ins.offset = disk_num_bytes;
@@ -1596,6 +1605,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1596 ordered_extent->len, 1605 ordered_extent->len,
1597 compressed, 0, 0, 1606 compressed, 0, 0,
1598 BTRFS_FILE_EXTENT_REG); 1607 BTRFS_FILE_EXTENT_REG);
1608 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
1609 ordered_extent->file_offset,
1610 ordered_extent->len);
1599 BUG_ON(ret); 1611 BUG_ON(ret);
1600 } 1612 }
1601 unlock_extent(io_tree, ordered_extent->file_offset, 1613 unlock_extent(io_tree, ordered_extent->file_offset,
@@ -1623,6 +1635,7 @@ nocow:
1623static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, 1635static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
1624 struct extent_state *state, int uptodate) 1636 struct extent_state *state, int uptodate)
1625{ 1637{
1638 ClearPagePrivate2(page);
1626 return btrfs_finish_ordered_io(page->mapping->host, start, end); 1639 return btrfs_finish_ordered_io(page->mapping->host, start, end);
1627} 1640}
1628 1641
@@ -1669,13 +1682,13 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
1669 failrec->last_mirror = 0; 1682 failrec->last_mirror = 0;
1670 failrec->bio_flags = 0; 1683 failrec->bio_flags = 0;
1671 1684
1672 spin_lock(&em_tree->lock); 1685 read_lock(&em_tree->lock);
1673 em = lookup_extent_mapping(em_tree, start, failrec->len); 1686 em = lookup_extent_mapping(em_tree, start, failrec->len);
1674 if (em->start > start || em->start + em->len < start) { 1687 if (em->start > start || em->start + em->len < start) {
1675 free_extent_map(em); 1688 free_extent_map(em);
1676 em = NULL; 1689 em = NULL;
1677 } 1690 }
1678 spin_unlock(&em_tree->lock); 1691 read_unlock(&em_tree->lock);
1679 1692
1680 if (!em || IS_ERR(em)) { 1693 if (!em || IS_ERR(em)) {
1681 kfree(failrec); 1694 kfree(failrec);
@@ -1794,7 +1807,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1794 return 0; 1807 return 0;
1795 1808
1796 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && 1809 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
1797 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) { 1810 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
1798 clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM, 1811 clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM,
1799 GFP_NOFS); 1812 GFP_NOFS);
1800 return 0; 1813 return 0;
@@ -2935,7 +2948,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
2935 cur_offset, 2948 cur_offset,
2936 cur_offset + hole_size, 2949 cur_offset + hole_size,
2937 block_end, 2950 block_end,
2938 cur_offset, &hint_byte); 2951 cur_offset, &hint_byte, 1);
2939 if (err) 2952 if (err)
2940 break; 2953 break;
2941 err = btrfs_insert_file_extent(trans, root, 2954 err = btrfs_insert_file_extent(trans, root,
@@ -4064,11 +4077,11 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
4064 int compressed; 4077 int compressed;
4065 4078
4066again: 4079again:
4067 spin_lock(&em_tree->lock); 4080 read_lock(&em_tree->lock);
4068 em = lookup_extent_mapping(em_tree, start, len); 4081 em = lookup_extent_mapping(em_tree, start, len);
4069 if (em) 4082 if (em)
4070 em->bdev = root->fs_info->fs_devices->latest_bdev; 4083 em->bdev = root->fs_info->fs_devices->latest_bdev;
4071 spin_unlock(&em_tree->lock); 4084 read_unlock(&em_tree->lock);
4072 4085
4073 if (em) { 4086 if (em) {
4074 if (em->start > start || em->start + em->len <= start) 4087 if (em->start > start || em->start + em->len <= start)
@@ -4215,6 +4228,11 @@ again:
4215 map = kmap(page); 4228 map = kmap(page);
4216 read_extent_buffer(leaf, map + pg_offset, ptr, 4229 read_extent_buffer(leaf, map + pg_offset, ptr,
4217 copy_size); 4230 copy_size);
4231 if (pg_offset + copy_size < PAGE_CACHE_SIZE) {
4232 memset(map + pg_offset + copy_size, 0,
4233 PAGE_CACHE_SIZE - pg_offset -
4234 copy_size);
4235 }
4218 kunmap(page); 4236 kunmap(page);
4219 } 4237 }
4220 flush_dcache_page(page); 4238 flush_dcache_page(page);
@@ -4259,7 +4277,7 @@ insert:
4259 } 4277 }
4260 4278
4261 err = 0; 4279 err = 0;
4262 spin_lock(&em_tree->lock); 4280 write_lock(&em_tree->lock);
4263 ret = add_extent_mapping(em_tree, em); 4281 ret = add_extent_mapping(em_tree, em);
4264 /* it is possible that someone inserted the extent into the tree 4282 /* it is possible that someone inserted the extent into the tree
4265 * while we had the lock dropped. It is also possible that 4283 * while we had the lock dropped. It is also possible that
@@ -4299,7 +4317,7 @@ insert:
4299 err = 0; 4317 err = 0;
4300 } 4318 }
4301 } 4319 }
4302 spin_unlock(&em_tree->lock); 4320 write_unlock(&em_tree->lock);
4303out: 4321out:
4304 if (path) 4322 if (path)
4305 btrfs_free_path(path); 4323 btrfs_free_path(path);
@@ -4398,13 +4416,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
4398 u64 page_start = page_offset(page); 4416 u64 page_start = page_offset(page);
4399 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 4417 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
4400 4418
4419
4420 /*
4421 * we have the page locked, so new writeback can't start,
4422 * and the dirty bit won't be cleared while we are here.
4423 *
4424 * Wait for IO on this page so that we can safely clear
4425 * the PagePrivate2 bit and do ordered accounting
4426 */
4401 wait_on_page_writeback(page); 4427 wait_on_page_writeback(page);
4428
4402 tree = &BTRFS_I(page->mapping->host)->io_tree; 4429 tree = &BTRFS_I(page->mapping->host)->io_tree;
4403 if (offset) { 4430 if (offset) {
4404 btrfs_releasepage(page, GFP_NOFS); 4431 btrfs_releasepage(page, GFP_NOFS);
4405 return; 4432 return;
4406 } 4433 }
4407
4408 lock_extent(tree, page_start, page_end, GFP_NOFS); 4434 lock_extent(tree, page_start, page_end, GFP_NOFS);
4409 ordered = btrfs_lookup_ordered_extent(page->mapping->host, 4435 ordered = btrfs_lookup_ordered_extent(page->mapping->host,
4410 page_offset(page)); 4436 page_offset(page));
@@ -4415,16 +4441,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
4415 */ 4441 */
4416 clear_extent_bit(tree, page_start, page_end, 4442 clear_extent_bit(tree, page_start, page_end,
4417 EXTENT_DIRTY | EXTENT_DELALLOC | 4443 EXTENT_DIRTY | EXTENT_DELALLOC |
4418 EXTENT_LOCKED, 1, 0, GFP_NOFS); 4444 EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
4419 btrfs_finish_ordered_io(page->mapping->host, 4445 /*
4420 page_start, page_end); 4446 * whoever cleared the private bit is responsible
4447 * for the finish_ordered_io
4448 */
4449 if (TestClearPagePrivate2(page)) {
4450 btrfs_finish_ordered_io(page->mapping->host,
4451 page_start, page_end);
4452 }
4421 btrfs_put_ordered_extent(ordered); 4453 btrfs_put_ordered_extent(ordered);
4422 lock_extent(tree, page_start, page_end, GFP_NOFS); 4454 lock_extent(tree, page_start, page_end, GFP_NOFS);
4423 } 4455 }
4424 clear_extent_bit(tree, page_start, page_end, 4456 clear_extent_bit(tree, page_start, page_end,
4425 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | 4457 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
4426 EXTENT_ORDERED, 4458 1, 1, NULL, GFP_NOFS);
4427 1, 1, GFP_NOFS);
4428 __btrfs_releasepage(page, GFP_NOFS); 4459 __btrfs_releasepage(page, GFP_NOFS);
4429 4460
4430 ClearPageChecked(page); 4461 ClearPageChecked(page);
@@ -4521,11 +4552,14 @@ again:
4521 } 4552 }
4522 ClearPageChecked(page); 4553 ClearPageChecked(page);
4523 set_page_dirty(page); 4554 set_page_dirty(page);
4555 SetPageUptodate(page);
4524 4556
4525 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; 4557 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
4526 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 4558 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
4527 4559
4528out_unlock: 4560out_unlock:
4561 if (!ret)
4562 return VM_FAULT_LOCKED;
4529 unlock_page(page); 4563 unlock_page(page);
4530out: 4564out:
4531 return ret; 4565 return ret;
@@ -5058,6 +5092,8 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans,
5058 0, 0, 0, 5092 0, 0, 0,
5059 BTRFS_FILE_EXTENT_PREALLOC); 5093 BTRFS_FILE_EXTENT_PREALLOC);
5060 BUG_ON(ret); 5094 BUG_ON(ret);
5095 btrfs_drop_extent_cache(inode, cur_offset,
5096 cur_offset + ins.offset -1, 0);
5061 num_bytes -= ins.offset; 5097 num_bytes -= ins.offset;
5062 cur_offset += ins.offset; 5098 cur_offset += ins.offset;
5063 alloc_hint = ins.objectid + ins.offset; 5099 alloc_hint = ins.objectid + ins.offset;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bd88f25889f7..ef0188fb3cc4 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -596,9 +596,8 @@ again:
596 clear_page_dirty_for_io(page); 596 clear_page_dirty_for_io(page);
597 597
598 btrfs_set_extent_delalloc(inode, page_start, page_end); 598 btrfs_set_extent_delalloc(inode, page_start, page_end);
599
600 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
601 set_page_dirty(page); 599 set_page_dirty(page);
600 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
602 unlock_page(page); 601 unlock_page(page);
603 page_cache_release(page); 602 page_cache_release(page);
604 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); 603 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
@@ -976,7 +975,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
976 975
977 /* punch hole in destination first */ 976 /* punch hole in destination first */
978 btrfs_drop_extents(trans, root, inode, off, off + len, 977 btrfs_drop_extents(trans, root, inode, off, off + len,
979 off + len, 0, &hint_byte); 978 off + len, 0, &hint_byte, 1);
980 979
981 /* clone data */ 980 /* clone data */
982 key.objectid = src->i_ino; 981 key.objectid = src->i_ino;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index d6f0806c682f..4a9c8c4cec25 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -159,8 +159,6 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
159 * 159 *
160 * len is the length of the extent 160 * len is the length of the extent
161 * 161 *
162 * This also sets the EXTENT_ORDERED bit on the range in the inode.
163 *
164 * The tree is given a single reference on the ordered extent that was 162 * The tree is given a single reference on the ordered extent that was
165 * inserted. 163 * inserted.
166 */ 164 */
@@ -181,6 +179,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
181 entry->start = start; 179 entry->start = start;
182 entry->len = len; 180 entry->len = len;
183 entry->disk_len = disk_len; 181 entry->disk_len = disk_len;
182 entry->bytes_left = len;
184 entry->inode = inode; 183 entry->inode = inode;
185 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) 184 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
186 set_bit(type, &entry->flags); 185 set_bit(type, &entry->flags);
@@ -195,9 +194,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
195 &entry->rb_node); 194 &entry->rb_node);
196 BUG_ON(node); 195 BUG_ON(node);
197 196
198 set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset,
199 entry_end(entry) - 1, GFP_NOFS);
200
201 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 197 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
202 list_add_tail(&entry->root_extent_list, 198 list_add_tail(&entry->root_extent_list,
203 &BTRFS_I(inode)->root->fs_info->ordered_extents); 199 &BTRFS_I(inode)->root->fs_info->ordered_extents);
@@ -241,13 +237,10 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
241 struct btrfs_ordered_inode_tree *tree; 237 struct btrfs_ordered_inode_tree *tree;
242 struct rb_node *node; 238 struct rb_node *node;
243 struct btrfs_ordered_extent *entry; 239 struct btrfs_ordered_extent *entry;
244 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
245 int ret; 240 int ret;
246 241
247 tree = &BTRFS_I(inode)->ordered_tree; 242 tree = &BTRFS_I(inode)->ordered_tree;
248 mutex_lock(&tree->mutex); 243 mutex_lock(&tree->mutex);
249 clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1,
250 GFP_NOFS);
251 node = tree_search(tree, file_offset); 244 node = tree_search(tree, file_offset);
252 if (!node) { 245 if (!node) {
253 ret = 1; 246 ret = 1;
@@ -260,11 +253,16 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
260 goto out; 253 goto out;
261 } 254 }
262 255
263 ret = test_range_bit(io_tree, entry->file_offset, 256 if (io_size > entry->bytes_left) {
264 entry->file_offset + entry->len - 1, 257 printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
265 EXTENT_ORDERED, 0); 258 (unsigned long long)entry->bytes_left,
266 if (ret == 0) 259 (unsigned long long)io_size);
260 }
261 entry->bytes_left -= io_size;
262 if (entry->bytes_left == 0)
267 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); 263 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
264 else
265 ret = 1;
268out: 266out:
269 mutex_unlock(&tree->mutex); 267 mutex_unlock(&tree->mutex);
270 return ret == 0; 268 return ret == 0;
@@ -476,6 +474,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
476 u64 orig_end; 474 u64 orig_end;
477 u64 wait_end; 475 u64 wait_end;
478 struct btrfs_ordered_extent *ordered; 476 struct btrfs_ordered_extent *ordered;
477 int found;
479 478
480 if (start + len < start) { 479 if (start + len < start) {
481 orig_end = INT_LIMIT(loff_t); 480 orig_end = INT_LIMIT(loff_t);
@@ -502,6 +501,7 @@ again:
502 orig_end >> PAGE_CACHE_SHIFT); 501 orig_end >> PAGE_CACHE_SHIFT);
503 502
504 end = orig_end; 503 end = orig_end;
504 found = 0;
505 while (1) { 505 while (1) {
506 ordered = btrfs_lookup_first_ordered_extent(inode, end); 506 ordered = btrfs_lookup_first_ordered_extent(inode, end);
507 if (!ordered) 507 if (!ordered)
@@ -514,6 +514,7 @@ again:
514 btrfs_put_ordered_extent(ordered); 514 btrfs_put_ordered_extent(ordered);
515 break; 515 break;
516 } 516 }
517 found++;
517 btrfs_start_ordered_extent(inode, ordered, 1); 518 btrfs_start_ordered_extent(inode, ordered, 1);
518 end = ordered->file_offset; 519 end = ordered->file_offset;
519 btrfs_put_ordered_extent(ordered); 520 btrfs_put_ordered_extent(ordered);
@@ -521,8 +522,8 @@ again:
521 break; 522 break;
522 end--; 523 end--;
523 } 524 }
524 if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, 525 if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
525 EXTENT_ORDERED | EXTENT_DELALLOC, 0)) { 526 EXTENT_DELALLOC, 0, NULL)) {
526 schedule_timeout(1); 527 schedule_timeout(1);
527 goto again; 528 goto again;
528 } 529 }
@@ -613,7 +614,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
613 */ 614 */
614 if (test_range_bit(io_tree, disk_i_size, 615 if (test_range_bit(io_tree, disk_i_size,
615 ordered->file_offset + ordered->len - 1, 616 ordered->file_offset + ordered->len - 1,
616 EXTENT_DELALLOC, 0)) { 617 EXTENT_DELALLOC, 0, NULL)) {
617 goto out; 618 goto out;
618 } 619 }
619 /* 620 /*
@@ -664,7 +665,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
664 */ 665 */
665 if (i_size_test > entry_end(ordered) && 666 if (i_size_test > entry_end(ordered) &&
666 !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, 667 !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1,
667 EXTENT_DELALLOC, 0)) { 668 EXTENT_DELALLOC, 0, NULL)) {
668 new_i_size = min_t(u64, i_size_test, i_size_read(inode)); 669 new_i_size = min_t(u64, i_size_test, i_size_read(inode));
669 } 670 }
670 BTRFS_I(inode)->disk_i_size = new_i_size; 671 BTRFS_I(inode)->disk_i_size = new_i_size;
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 3d31c8827b01..993a7ea45c70 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -85,6 +85,9 @@ struct btrfs_ordered_extent {
85 /* extent length on disk */ 85 /* extent length on disk */
86 u64 disk_len; 86 u64 disk_len;
87 87
88 /* number of bytes that still need writing */
89 u64 bytes_left;
90
88 /* flags (described above) */ 91 /* flags (described above) */
89 unsigned long flags; 92 unsigned long flags;
90 93
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index c04f7f212602..3be16ccc7eea 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2180,7 +2180,7 @@ static int tree_block_processed(u64 bytenr, u32 blocksize,
2180 struct reloc_control *rc) 2180 struct reloc_control *rc)
2181{ 2181{
2182 if (test_range_bit(&rc->processed_blocks, bytenr, 2182 if (test_range_bit(&rc->processed_blocks, bytenr,
2183 bytenr + blocksize - 1, EXTENT_DIRTY, 1)) 2183 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
2184 return 1; 2184 return 1;
2185 return 0; 2185 return 0;
2186} 2186}
@@ -2646,9 +2646,9 @@ int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key)
2646 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2646 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2647 while (1) { 2647 while (1) {
2648 int ret; 2648 int ret;
2649 spin_lock(&em_tree->lock); 2649 write_lock(&em_tree->lock);
2650 ret = add_extent_mapping(em_tree, em); 2650 ret = add_extent_mapping(em_tree, em);
2651 spin_unlock(&em_tree->lock); 2651 write_unlock(&em_tree->lock);
2652 if (ret != -EEXIST) { 2652 if (ret != -EEXIST) {
2653 free_extent_map(em); 2653 free_extent_map(em);
2654 break; 2654 break;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d91b0de7c502..8661a7381b39 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -534,7 +534,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
534 saved_nbytes = inode_get_bytes(inode); 534 saved_nbytes = inode_get_bytes(inode);
535 /* drop any overlapping extents */ 535 /* drop any overlapping extents */
536 ret = btrfs_drop_extents(trans, root, inode, 536 ret = btrfs_drop_extents(trans, root, inode,
537 start, extent_end, extent_end, start, &alloc_hint); 537 start, extent_end, extent_end, start, &alloc_hint, 1);
538 BUG_ON(ret); 538 BUG_ON(ret);
539 539
540 if (found_type == BTRFS_FILE_EXTENT_REG || 540 if (found_type == BTRFS_FILE_EXTENT_REG ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5dbefd11b4af..d2358c06bbd9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -276,7 +276,7 @@ loop_lock:
276 * is now congested. Back off and let other work structs 276 * is now congested. Back off and let other work structs
277 * run instead 277 * run instead
278 */ 278 */
279 if (pending && bdi_write_congested(bdi) && batch_run > 32 && 279 if (pending && bdi_write_congested(bdi) && batch_run > 8 &&
280 fs_info->fs_devices->open_devices > 1) { 280 fs_info->fs_devices->open_devices > 1) {
281 struct io_context *ioc; 281 struct io_context *ioc;
282 282
@@ -1749,9 +1749,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
1749 * step two, delete the device extents and the 1749 * step two, delete the device extents and the
1750 * chunk tree entries 1750 * chunk tree entries
1751 */ 1751 */
1752 spin_lock(&em_tree->lock); 1752 read_lock(&em_tree->lock);
1753 em = lookup_extent_mapping(em_tree, chunk_offset, 1); 1753 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
1754 spin_unlock(&em_tree->lock); 1754 read_unlock(&em_tree->lock);
1755 1755
1756 BUG_ON(em->start > chunk_offset || 1756 BUG_ON(em->start > chunk_offset ||
1757 em->start + em->len < chunk_offset); 1757 em->start + em->len < chunk_offset);
@@ -1780,9 +1780,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
1780 ret = btrfs_remove_block_group(trans, extent_root, chunk_offset); 1780 ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
1781 BUG_ON(ret); 1781 BUG_ON(ret);
1782 1782
1783 spin_lock(&em_tree->lock); 1783 write_lock(&em_tree->lock);
1784 remove_extent_mapping(em_tree, em); 1784 remove_extent_mapping(em_tree, em);
1785 spin_unlock(&em_tree->lock); 1785 write_unlock(&em_tree->lock);
1786 1786
1787 kfree(map); 1787 kfree(map);
1788 em->bdev = NULL; 1788 em->bdev = NULL;
@@ -2294,9 +2294,9 @@ again:
2294 em->block_len = em->len; 2294 em->block_len = em->len;
2295 2295
2296 em_tree = &extent_root->fs_info->mapping_tree.map_tree; 2296 em_tree = &extent_root->fs_info->mapping_tree.map_tree;
2297 spin_lock(&em_tree->lock); 2297 write_lock(&em_tree->lock);
2298 ret = add_extent_mapping(em_tree, em); 2298 ret = add_extent_mapping(em_tree, em);
2299 spin_unlock(&em_tree->lock); 2299 write_unlock(&em_tree->lock);
2300 BUG_ON(ret); 2300 BUG_ON(ret);
2301 free_extent_map(em); 2301 free_extent_map(em);
2302 2302
@@ -2491,9 +2491,9 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
2491 int readonly = 0; 2491 int readonly = 0;
2492 int i; 2492 int i;
2493 2493
2494 spin_lock(&map_tree->map_tree.lock); 2494 read_lock(&map_tree->map_tree.lock);
2495 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); 2495 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
2496 spin_unlock(&map_tree->map_tree.lock); 2496 read_unlock(&map_tree->map_tree.lock);
2497 if (!em) 2497 if (!em)
2498 return 1; 2498 return 1;
2499 2499
@@ -2518,11 +2518,11 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
2518 struct extent_map *em; 2518 struct extent_map *em;
2519 2519
2520 while (1) { 2520 while (1) {
2521 spin_lock(&tree->map_tree.lock); 2521 write_lock(&tree->map_tree.lock);
2522 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); 2522 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
2523 if (em) 2523 if (em)
2524 remove_extent_mapping(&tree->map_tree, em); 2524 remove_extent_mapping(&tree->map_tree, em);
2525 spin_unlock(&tree->map_tree.lock); 2525 write_unlock(&tree->map_tree.lock);
2526 if (!em) 2526 if (!em)
2527 break; 2527 break;
2528 kfree(em->bdev); 2528 kfree(em->bdev);
@@ -2540,9 +2540,9 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
2540 struct extent_map_tree *em_tree = &map_tree->map_tree; 2540 struct extent_map_tree *em_tree = &map_tree->map_tree;
2541 int ret; 2541 int ret;
2542 2542
2543 spin_lock(&em_tree->lock); 2543 read_lock(&em_tree->lock);
2544 em = lookup_extent_mapping(em_tree, logical, len); 2544 em = lookup_extent_mapping(em_tree, logical, len);
2545 spin_unlock(&em_tree->lock); 2545 read_unlock(&em_tree->lock);
2546 BUG_ON(!em); 2546 BUG_ON(!em);
2547 2547
2548 BUG_ON(em->start > logical || em->start + em->len < logical); 2548 BUG_ON(em->start > logical || em->start + em->len < logical);
@@ -2604,9 +2604,9 @@ again:
2604 atomic_set(&multi->error, 0); 2604 atomic_set(&multi->error, 0);
2605 } 2605 }
2606 2606
2607 spin_lock(&em_tree->lock); 2607 read_lock(&em_tree->lock);
2608 em = lookup_extent_mapping(em_tree, logical, *length); 2608 em = lookup_extent_mapping(em_tree, logical, *length);
2609 spin_unlock(&em_tree->lock); 2609 read_unlock(&em_tree->lock);
2610 2610
2611 if (!em && unplug_page) 2611 if (!em && unplug_page)
2612 return 0; 2612 return 0;
@@ -2763,9 +2763,9 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
2763 u64 stripe_nr; 2763 u64 stripe_nr;
2764 int i, j, nr = 0; 2764 int i, j, nr = 0;
2765 2765
2766 spin_lock(&em_tree->lock); 2766 read_lock(&em_tree->lock);
2767 em = lookup_extent_mapping(em_tree, chunk_start, 1); 2767 em = lookup_extent_mapping(em_tree, chunk_start, 1);
2768 spin_unlock(&em_tree->lock); 2768 read_unlock(&em_tree->lock);
2769 2769
2770 BUG_ON(!em || em->start != chunk_start); 2770 BUG_ON(!em || em->start != chunk_start);
2771 map = (struct map_lookup *)em->bdev; 2771 map = (struct map_lookup *)em->bdev;
@@ -3053,9 +3053,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
3053 logical = key->offset; 3053 logical = key->offset;
3054 length = btrfs_chunk_length(leaf, chunk); 3054 length = btrfs_chunk_length(leaf, chunk);
3055 3055
3056 spin_lock(&map_tree->map_tree.lock); 3056 read_lock(&map_tree->map_tree.lock);
3057 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); 3057 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
3058 spin_unlock(&map_tree->map_tree.lock); 3058 read_unlock(&map_tree->map_tree.lock);
3059 3059
3060 /* already mapped? */ 3060 /* already mapped? */
3061 if (em && em->start <= logical && em->start + em->len > logical) { 3061 if (em && em->start <= logical && em->start + em->len > logical) {
@@ -3114,9 +3114,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
3114 map->stripes[i].dev->in_fs_metadata = 1; 3114 map->stripes[i].dev->in_fs_metadata = 1;
3115 } 3115 }
3116 3116
3117 spin_lock(&map_tree->map_tree.lock); 3117 write_lock(&map_tree->map_tree.lock);
3118 ret = add_extent_mapping(&map_tree->map_tree, em); 3118 ret = add_extent_mapping(&map_tree->map_tree, em);
3119 spin_unlock(&map_tree->map_tree.lock); 3119 write_unlock(&map_tree->map_tree.lock);
3120 BUG_ON(ret); 3120 BUG_ON(ret);
3121 free_extent_map(em); 3121 free_extent_map(em);
3122 3122