aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/Makefile19
-rw-r--r--fs/btrfs/acl.c18
-rw-r--r--fs/btrfs/async-thread.c60
-rw-r--r--fs/btrfs/async-thread.h2
-rw-r--r--fs/btrfs/ctree.c17
-rw-r--r--fs/btrfs/ctree.h6
-rw-r--r--fs/btrfs/disk-io.c102
-rw-r--r--fs/btrfs/extent-tree.c49
-rw-r--r--fs/btrfs/extent_io.c167
-rw-r--r--fs/btrfs/extent_map.c17
-rw-r--r--fs/btrfs/file.c95
-rw-r--r--fs/btrfs/free-space-cache.c15
-rw-r--r--fs/btrfs/inode-map.c2
-rw-r--r--fs/btrfs/inode.c183
-rw-r--r--fs/btrfs/ioctl.c58
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/super.c40
-rw-r--r--fs/btrfs/transaction.c6
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c159
-rw-r--r--fs/btrfs/volumes.h16
21 files changed, 569 insertions, 466 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 9adf5e4f7e96..94212844a9bc 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -1,25 +1,10 @@
1ifneq ($(KERNELRELEASE),)
2# kbuild part of makefile
3 1
4obj-$(CONFIG_BTRFS_FS) := btrfs.o 2obj-$(CONFIG_BTRFS_FS) := btrfs.o
5btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ 3
4btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
6 file-item.o inode-item.o inode-map.o disk-io.o \ 5 file-item.o inode-item.o inode-map.o disk-io.o \
7 transaction.o inode.o file.o tree-defrag.o \ 6 transaction.o inode.o file.o tree-defrag.o \
8 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
9 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
10 ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ 9 ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \
11 compression.o delayed-ref.o 10 compression.o delayed-ref.o
12else
13
14# Normal Makefile
15
16KERNELDIR := /lib/modules/`uname -r`/build
17all:
18 $(MAKE) -C $(KERNELDIR) M=`pwd` CONFIG_BTRFS_FS=m modules
19
20modules_install:
21 $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install
22clean:
23 $(MAKE) -C $(KERNELDIR) M=`pwd` clean
24
25endif
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 7fdd184a528d..cbba000dccbe 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -60,15 +60,20 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
60 return ERR_PTR(-EINVAL); 60 return ERR_PTR(-EINVAL);
61 } 61 }
62 62
63 /* Handle the cached NULL acl case without locking */
64 acl = ACCESS_ONCE(*p_acl);
65 if (!acl)
66 return acl;
67
63 spin_lock(&inode->i_lock); 68 spin_lock(&inode->i_lock);
64 if (*p_acl != BTRFS_ACL_NOT_CACHED) 69 acl = *p_acl;
65 acl = posix_acl_dup(*p_acl); 70 if (acl != BTRFS_ACL_NOT_CACHED)
71 acl = posix_acl_dup(acl);
66 spin_unlock(&inode->i_lock); 72 spin_unlock(&inode->i_lock);
67 73
68 if (acl) 74 if (acl != BTRFS_ACL_NOT_CACHED)
69 return acl; 75 return acl;
70 76
71
72 size = __btrfs_getxattr(inode, name, "", 0); 77 size = __btrfs_getxattr(inode, name, "", 0);
73 if (size > 0) { 78 if (size > 0) {
74 value = kzalloc(size, GFP_NOFS); 79 value = kzalloc(size, GFP_NOFS);
@@ -80,9 +85,12 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
80 btrfs_update_cached_acl(inode, p_acl, acl); 85 btrfs_update_cached_acl(inode, p_acl, acl);
81 } 86 }
82 kfree(value); 87 kfree(value);
83 } else if (size == -ENOENT) { 88 } else if (size == -ENOENT || size == -ENODATA || size == 0) {
89 /* FIXME, who returns -ENOENT? I think nobody */
84 acl = NULL; 90 acl = NULL;
85 btrfs_update_cached_acl(inode, p_acl, acl); 91 btrfs_update_cached_acl(inode, p_acl, acl);
92 } else {
93 acl = ERR_PTR(-EIO);
86 } 94 }
87 95
88 return acl; 96 return acl;
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 51bfdfc8fcda..502c3d61de62 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -25,6 +25,7 @@
25#define WORK_QUEUED_BIT 0 25#define WORK_QUEUED_BIT 0
26#define WORK_DONE_BIT 1 26#define WORK_DONE_BIT 1
27#define WORK_ORDER_DONE_BIT 2 27#define WORK_ORDER_DONE_BIT 2
28#define WORK_HIGH_PRIO_BIT 3
28 29
29/* 30/*
30 * container for the kthread task pointer and the list of pending work 31 * container for the kthread task pointer and the list of pending work
@@ -36,6 +37,7 @@ struct btrfs_worker_thread {
36 37
37 /* list of struct btrfs_work that are waiting for service */ 38 /* list of struct btrfs_work that are waiting for service */
38 struct list_head pending; 39 struct list_head pending;
40 struct list_head prio_pending;
39 41
40 /* list of worker threads from struct btrfs_workers */ 42 /* list of worker threads from struct btrfs_workers */
41 struct list_head worker_list; 43 struct list_head worker_list;
@@ -103,10 +105,16 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers,
103 105
104 spin_lock_irqsave(&workers->lock, flags); 106 spin_lock_irqsave(&workers->lock, flags);
105 107
106 while (!list_empty(&workers->order_list)) { 108 while (1) {
107 work = list_entry(workers->order_list.next, 109 if (!list_empty(&workers->prio_order_list)) {
108 struct btrfs_work, order_list); 110 work = list_entry(workers->prio_order_list.next,
109 111 struct btrfs_work, order_list);
112 } else if (!list_empty(&workers->order_list)) {
113 work = list_entry(workers->order_list.next,
114 struct btrfs_work, order_list);
115 } else {
116 break;
117 }
110 if (!test_bit(WORK_DONE_BIT, &work->flags)) 118 if (!test_bit(WORK_DONE_BIT, &work->flags))
111 break; 119 break;
112 120
@@ -143,8 +151,14 @@ static int worker_loop(void *arg)
143 do { 151 do {
144 spin_lock_irq(&worker->lock); 152 spin_lock_irq(&worker->lock);
145again_locked: 153again_locked:
146 while (!list_empty(&worker->pending)) { 154 while (1) {
147 cur = worker->pending.next; 155 if (!list_empty(&worker->prio_pending))
156 cur = worker->prio_pending.next;
157 else if (!list_empty(&worker->pending))
158 cur = worker->pending.next;
159 else
160 break;
161
148 work = list_entry(cur, struct btrfs_work, list); 162 work = list_entry(cur, struct btrfs_work, list);
149 list_del(&work->list); 163 list_del(&work->list);
150 clear_bit(WORK_QUEUED_BIT, &work->flags); 164 clear_bit(WORK_QUEUED_BIT, &work->flags);
@@ -163,7 +177,6 @@ again_locked:
163 177
164 spin_lock_irq(&worker->lock); 178 spin_lock_irq(&worker->lock);
165 check_idle_worker(worker); 179 check_idle_worker(worker);
166
167 } 180 }
168 if (freezing(current)) { 181 if (freezing(current)) {
169 worker->working = 0; 182 worker->working = 0;
@@ -178,7 +191,8 @@ again_locked:
178 * jump_in? 191 * jump_in?
179 */ 192 */
180 smp_mb(); 193 smp_mb();
181 if (!list_empty(&worker->pending)) 194 if (!list_empty(&worker->pending) ||
195 !list_empty(&worker->prio_pending))
182 continue; 196 continue;
183 197
184 /* 198 /*
@@ -191,7 +205,8 @@ again_locked:
191 */ 205 */
192 schedule_timeout(1); 206 schedule_timeout(1);
193 smp_mb(); 207 smp_mb();
194 if (!list_empty(&worker->pending)) 208 if (!list_empty(&worker->pending) ||
209 !list_empty(&worker->prio_pending))
195 continue; 210 continue;
196 211
197 if (kthread_should_stop()) 212 if (kthread_should_stop())
@@ -200,7 +215,8 @@ again_locked:
200 /* still no more work?, sleep for real */ 215 /* still no more work?, sleep for real */
201 spin_lock_irq(&worker->lock); 216 spin_lock_irq(&worker->lock);
202 set_current_state(TASK_INTERRUPTIBLE); 217 set_current_state(TASK_INTERRUPTIBLE);
203 if (!list_empty(&worker->pending)) 218 if (!list_empty(&worker->pending) ||
219 !list_empty(&worker->prio_pending))
204 goto again_locked; 220 goto again_locked;
205 221
206 /* 222 /*
@@ -248,6 +264,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
248 INIT_LIST_HEAD(&workers->worker_list); 264 INIT_LIST_HEAD(&workers->worker_list);
249 INIT_LIST_HEAD(&workers->idle_list); 265 INIT_LIST_HEAD(&workers->idle_list);
250 INIT_LIST_HEAD(&workers->order_list); 266 INIT_LIST_HEAD(&workers->order_list);
267 INIT_LIST_HEAD(&workers->prio_order_list);
251 spin_lock_init(&workers->lock); 268 spin_lock_init(&workers->lock);
252 workers->max_workers = max; 269 workers->max_workers = max;
253 workers->idle_thresh = 32; 270 workers->idle_thresh = 32;
@@ -273,6 +290,7 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
273 } 290 }
274 291
275 INIT_LIST_HEAD(&worker->pending); 292 INIT_LIST_HEAD(&worker->pending);
293 INIT_LIST_HEAD(&worker->prio_pending);
276 INIT_LIST_HEAD(&worker->worker_list); 294 INIT_LIST_HEAD(&worker->worker_list);
277 spin_lock_init(&worker->lock); 295 spin_lock_init(&worker->lock);
278 atomic_set(&worker->num_pending, 0); 296 atomic_set(&worker->num_pending, 0);
@@ -396,7 +414,10 @@ int btrfs_requeue_work(struct btrfs_work *work)
396 goto out; 414 goto out;
397 415
398 spin_lock_irqsave(&worker->lock, flags); 416 spin_lock_irqsave(&worker->lock, flags);
399 list_add_tail(&work->list, &worker->pending); 417 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
418 list_add_tail(&work->list, &worker->prio_pending);
419 else
420 list_add_tail(&work->list, &worker->pending);
400 atomic_inc(&worker->num_pending); 421 atomic_inc(&worker->num_pending);
401 422
402 /* by definition we're busy, take ourselves off the idle 423 /* by definition we're busy, take ourselves off the idle
@@ -422,6 +443,11 @@ out:
422 return 0; 443 return 0;
423} 444}
424 445
446void btrfs_set_work_high_prio(struct btrfs_work *work)
447{
448 set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
449}
450
425/* 451/*
426 * places a struct btrfs_work into the pending queue of one of the kthreads 452 * places a struct btrfs_work into the pending queue of one of the kthreads
427 */ 453 */
@@ -438,7 +464,12 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
438 worker = find_worker(workers); 464 worker = find_worker(workers);
439 if (workers->ordered) { 465 if (workers->ordered) {
440 spin_lock_irqsave(&workers->lock, flags); 466 spin_lock_irqsave(&workers->lock, flags);
441 list_add_tail(&work->order_list, &workers->order_list); 467 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) {
468 list_add_tail(&work->order_list,
469 &workers->prio_order_list);
470 } else {
471 list_add_tail(&work->order_list, &workers->order_list);
472 }
442 spin_unlock_irqrestore(&workers->lock, flags); 473 spin_unlock_irqrestore(&workers->lock, flags);
443 } else { 474 } else {
444 INIT_LIST_HEAD(&work->order_list); 475 INIT_LIST_HEAD(&work->order_list);
@@ -446,7 +477,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
446 477
447 spin_lock_irqsave(&worker->lock, flags); 478 spin_lock_irqsave(&worker->lock, flags);
448 479
449 list_add_tail(&work->list, &worker->pending); 480 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
481 list_add_tail(&work->list, &worker->prio_pending);
482 else
483 list_add_tail(&work->list, &worker->pending);
450 atomic_inc(&worker->num_pending); 484 atomic_inc(&worker->num_pending);
451 check_busy_worker(worker); 485 check_busy_worker(worker);
452 486
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 31be4ed8b63e..1b511c109db6 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -85,6 +85,7 @@ struct btrfs_workers {
85 * of work items waiting for completion 85 * of work items waiting for completion
86 */ 86 */
87 struct list_head order_list; 87 struct list_head order_list;
88 struct list_head prio_order_list;
88 89
89 /* lock for finding the next worker thread to queue on */ 90 /* lock for finding the next worker thread to queue on */
90 spinlock_t lock; 91 spinlock_t lock;
@@ -98,4 +99,5 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers);
98int btrfs_stop_workers(struct btrfs_workers *workers); 99int btrfs_stop_workers(struct btrfs_workers *workers);
99void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max); 100void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max);
100int btrfs_requeue_work(struct btrfs_work *work); 101int btrfs_requeue_work(struct btrfs_work *work);
102void btrfs_set_work_high_prio(struct btrfs_work *work);
101#endif 103#endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index e5b2533b691a..a99f1c2a710d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1325,12 +1325,12 @@ static noinline int reada_for_balance(struct btrfs_root *root,
1325 int ret = 0; 1325 int ret = 0;
1326 int blocksize; 1326 int blocksize;
1327 1327
1328 parent = path->nodes[level - 1]; 1328 parent = path->nodes[level + 1];
1329 if (!parent) 1329 if (!parent)
1330 return 0; 1330 return 0;
1331 1331
1332 nritems = btrfs_header_nritems(parent); 1332 nritems = btrfs_header_nritems(parent);
1333 slot = path->slots[level]; 1333 slot = path->slots[level + 1];
1334 blocksize = btrfs_level_size(root, level); 1334 blocksize = btrfs_level_size(root, level);
1335 1335
1336 if (slot > 0) { 1336 if (slot > 0) {
@@ -1341,7 +1341,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
1341 block1 = 0; 1341 block1 = 0;
1342 free_extent_buffer(eb); 1342 free_extent_buffer(eb);
1343 } 1343 }
1344 if (slot < nritems) { 1344 if (slot + 1 < nritems) {
1345 block2 = btrfs_node_blockptr(parent, slot + 1); 1345 block2 = btrfs_node_blockptr(parent, slot + 1);
1346 gen = btrfs_node_ptr_generation(parent, slot + 1); 1346 gen = btrfs_node_ptr_generation(parent, slot + 1);
1347 eb = btrfs_find_tree_block(root, block2, blocksize); 1347 eb = btrfs_find_tree_block(root, block2, blocksize);
@@ -1351,7 +1351,11 @@ static noinline int reada_for_balance(struct btrfs_root *root,
1351 } 1351 }
1352 if (block1 || block2) { 1352 if (block1 || block2) {
1353 ret = -EAGAIN; 1353 ret = -EAGAIN;
1354
1355 /* release the whole path */
1354 btrfs_release_path(root, path); 1356 btrfs_release_path(root, path);
1357
1358 /* read the blocks */
1355 if (block1) 1359 if (block1)
1356 readahead_tree_block(root, block1, blocksize, 0); 1360 readahead_tree_block(root, block1, blocksize, 0);
1357 if (block2) 1361 if (block2)
@@ -1361,7 +1365,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
1361 eb = read_tree_block(root, block1, blocksize, 0); 1365 eb = read_tree_block(root, block1, blocksize, 0);
1362 free_extent_buffer(eb); 1366 free_extent_buffer(eb);
1363 } 1367 }
1364 if (block1) { 1368 if (block2) {
1365 eb = read_tree_block(root, block2, blocksize, 0); 1369 eb = read_tree_block(root, block2, blocksize, 0);
1366 free_extent_buffer(eb); 1370 free_extent_buffer(eb);
1367 } 1371 }
@@ -1481,12 +1485,15 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1481 * of the btree by dropping locks before 1485 * of the btree by dropping locks before
1482 * we read. 1486 * we read.
1483 */ 1487 */
1484 btrfs_release_path(NULL, p); 1488 btrfs_unlock_up_safe(p, level + 1);
1489 btrfs_set_path_blocking(p);
1490
1485 if (tmp) 1491 if (tmp)
1486 free_extent_buffer(tmp); 1492 free_extent_buffer(tmp);
1487 if (p->reada) 1493 if (p->reada)
1488 reada_for_search(root, p, level, slot, key->objectid); 1494 reada_for_search(root, p, level, slot, key->objectid);
1489 1495
1496 btrfs_release_path(NULL, p);
1490 tmp = read_tree_block(root, blocknr, blocksize, gen); 1497 tmp = read_tree_block(root, blocknr, blocksize, gen);
1491 if (tmp) 1498 if (tmp)
1492 free_extent_buffer(tmp); 1499 free_extent_buffer(tmp);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ad96495dedc5..4414a5d9983a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -881,6 +881,9 @@ struct btrfs_fs_info {
881 u64 metadata_alloc_profile; 881 u64 metadata_alloc_profile;
882 u64 system_alloc_profile; 882 u64 system_alloc_profile;
883 883
884 unsigned data_chunk_allocations;
885 unsigned metadata_ratio;
886
884 void *bdev_holder; 887 void *bdev_holder;
885}; 888};
886 889
@@ -2174,7 +2177,8 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
2174extern struct file_operations btrfs_file_operations; 2177extern struct file_operations btrfs_file_operations;
2175int btrfs_drop_extents(struct btrfs_trans_handle *trans, 2178int btrfs_drop_extents(struct btrfs_trans_handle *trans,
2176 struct btrfs_root *root, struct inode *inode, 2179 struct btrfs_root *root, struct inode *inode,
2177 u64 start, u64 end, u64 inline_limit, u64 *hint_block); 2180 u64 start, u64 end, u64 locked_end,
2181 u64 inline_limit, u64 *hint_block);
2178int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2182int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2179 struct btrfs_root *root, 2183 struct btrfs_root *root,
2180 struct inode *inode, u64 start, u64 end); 2184 struct inode *inode, u64 start, u64 end);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 92caa8035f36..0ff16d3331da 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -232,10 +232,14 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
232 memcpy(&found, result, csum_size); 232 memcpy(&found, result, csum_size);
233 233
234 read_extent_buffer(buf, &val, 0, csum_size); 234 read_extent_buffer(buf, &val, 0, csum_size);
235 printk(KERN_INFO "btrfs: %s checksum verify failed " 235 if (printk_ratelimit()) {
236 "on %llu wanted %X found %X level %d\n", 236 printk(KERN_INFO "btrfs: %s checksum verify "
237 root->fs_info->sb->s_id, 237 "failed on %llu wanted %X found %X "
238 buf->start, val, found, btrfs_header_level(buf)); 238 "level %d\n",
239 root->fs_info->sb->s_id,
240 (unsigned long long)buf->start, val, found,
241 btrfs_header_level(buf));
242 }
239 if (result != (char *)&inline_result) 243 if (result != (char *)&inline_result)
240 kfree(result); 244 kfree(result);
241 return 1; 245 return 1;
@@ -268,10 +272,13 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
268 ret = 0; 272 ret = 0;
269 goto out; 273 goto out;
270 } 274 }
271 printk("parent transid verify failed on %llu wanted %llu found %llu\n", 275 if (printk_ratelimit()) {
272 (unsigned long long)eb->start, 276 printk("parent transid verify failed on %llu wanted %llu "
273 (unsigned long long)parent_transid, 277 "found %llu\n",
274 (unsigned long long)btrfs_header_generation(eb)); 278 (unsigned long long)eb->start,
279 (unsigned long long)parent_transid,
280 (unsigned long long)btrfs_header_generation(eb));
281 }
275 ret = 1; 282 ret = 1;
276 clear_extent_buffer_uptodate(io_tree, eb); 283 clear_extent_buffer_uptodate(io_tree, eb);
277out: 284out:
@@ -415,9 +422,12 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
415 422
416 found_start = btrfs_header_bytenr(eb); 423 found_start = btrfs_header_bytenr(eb);
417 if (found_start != start) { 424 if (found_start != start) {
418 printk(KERN_INFO "btrfs bad tree block start %llu %llu\n", 425 if (printk_ratelimit()) {
419 (unsigned long long)found_start, 426 printk(KERN_INFO "btrfs bad tree block start "
420 (unsigned long long)eb->start); 427 "%llu %llu\n",
428 (unsigned long long)found_start,
429 (unsigned long long)eb->start);
430 }
421 ret = -EIO; 431 ret = -EIO;
422 goto err; 432 goto err;
423 } 433 }
@@ -429,8 +439,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
429 goto err; 439 goto err;
430 } 440 }
431 if (check_tree_block_fsid(root, eb)) { 441 if (check_tree_block_fsid(root, eb)) {
432 printk(KERN_INFO "btrfs bad fsid on block %llu\n", 442 if (printk_ratelimit()) {
433 (unsigned long long)eb->start); 443 printk(KERN_INFO "btrfs bad fsid on block %llu\n",
444 (unsigned long long)eb->start);
445 }
434 ret = -EIO; 446 ret = -EIO;
435 goto err; 447 goto err;
436 } 448 }
@@ -579,19 +591,12 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
579 async->bio_flags = bio_flags; 591 async->bio_flags = bio_flags;
580 592
581 atomic_inc(&fs_info->nr_async_submits); 593 atomic_inc(&fs_info->nr_async_submits);
594
595 if (rw & (1 << BIO_RW_SYNCIO))
596 btrfs_set_work_high_prio(&async->work);
597
582 btrfs_queue_worker(&fs_info->workers, &async->work); 598 btrfs_queue_worker(&fs_info->workers, &async->work);
583#if 0
584 int limit = btrfs_async_submit_limit(fs_info);
585 if (atomic_read(&fs_info->nr_async_submits) > limit) {
586 wait_event_timeout(fs_info->async_submit_wait,
587 (atomic_read(&fs_info->nr_async_submits) < limit),
588 HZ/10);
589 599
590 wait_event_timeout(fs_info->async_submit_wait,
591 (atomic_read(&fs_info->nr_async_bios) < limit),
592 HZ/10);
593 }
594#endif
595 while (atomic_read(&fs_info->async_submit_draining) && 600 while (atomic_read(&fs_info->async_submit_draining) &&
596 atomic_read(&fs_info->nr_async_submits)) { 601 atomic_read(&fs_info->nr_async_submits)) {
597 wait_event(fs_info->async_submit_wait, 602 wait_event(fs_info->async_submit_wait,
@@ -656,6 +661,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
656 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, 661 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
657 mirror_num, 0); 662 mirror_num, 0);
658 } 663 }
664
659 /* 665 /*
660 * kthread helpers are used to submit writes so that checksumming 666 * kthread helpers are used to submit writes so that checksumming
661 * can happen in parallel across all CPUs 667 * can happen in parallel across all CPUs
@@ -765,27 +771,6 @@ static void btree_invalidatepage(struct page *page, unsigned long offset)
765 } 771 }
766} 772}
767 773
768#if 0
769static int btree_writepage(struct page *page, struct writeback_control *wbc)
770{
771 struct buffer_head *bh;
772 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
773 struct buffer_head *head;
774 if (!page_has_buffers(page)) {
775 create_empty_buffers(page, root->fs_info->sb->s_blocksize,
776 (1 << BH_Dirty)|(1 << BH_Uptodate));
777 }
778 head = page_buffers(page);
779 bh = head;
780 do {
781 if (buffer_dirty(bh))
782 csum_tree_block(root, bh, 0);
783 bh = bh->b_this_page;
784 } while (bh != head);
785 return block_write_full_page(page, btree_get_block, wbc);
786}
787#endif
788
789static struct address_space_operations btree_aops = { 774static struct address_space_operations btree_aops = {
790 .readpage = btree_readpage, 775 .readpage = btree_readpage,
791 .writepage = btree_writepage, 776 .writepage = btree_writepage,
@@ -1273,11 +1258,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1273 int ret = 0; 1258 int ret = 0;
1274 struct btrfs_device *device; 1259 struct btrfs_device *device;
1275 struct backing_dev_info *bdi; 1260 struct backing_dev_info *bdi;
1276#if 0 1261
1277 if ((bdi_bits & (1 << BDI_write_congested)) &&
1278 btrfs_congested_async(info, 0))
1279 return 1;
1280#endif
1281 list_for_each_entry(device, &info->fs_devices->devices, dev_list) { 1262 list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
1282 if (!device->bdev) 1263 if (!device->bdev)
1283 continue; 1264 continue;
@@ -1599,6 +1580,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1599 fs_info->btree_inode = new_inode(sb); 1580 fs_info->btree_inode = new_inode(sb);
1600 fs_info->btree_inode->i_ino = 1; 1581 fs_info->btree_inode->i_ino = 1;
1601 fs_info->btree_inode->i_nlink = 1; 1582 fs_info->btree_inode->i_nlink = 1;
1583 fs_info->metadata_ratio = 8;
1602 1584
1603 fs_info->thread_pool_size = min_t(unsigned long, 1585 fs_info->thread_pool_size = min_t(unsigned long,
1604 num_online_cpus() + 2, 8); 1586 num_online_cpus() + 2, 8);
@@ -1689,7 +1671,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1689 if (features) { 1671 if (features) {
1690 printk(KERN_ERR "BTRFS: couldn't mount because of " 1672 printk(KERN_ERR "BTRFS: couldn't mount because of "
1691 "unsupported optional features (%Lx).\n", 1673 "unsupported optional features (%Lx).\n",
1692 features); 1674 (unsigned long long)features);
1693 err = -EINVAL; 1675 err = -EINVAL;
1694 goto fail_iput; 1676 goto fail_iput;
1695 } 1677 }
@@ -1699,7 +1681,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1699 if (!(sb->s_flags & MS_RDONLY) && features) { 1681 if (!(sb->s_flags & MS_RDONLY) && features) {
1700 printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " 1682 printk(KERN_ERR "BTRFS: couldn't mount RDWR because of "
1701 "unsupported option features (%Lx).\n", 1683 "unsupported option features (%Lx).\n",
1702 features); 1684 (unsigned long long)features);
1703 err = -EINVAL; 1685 err = -EINVAL;
1704 goto fail_iput; 1686 goto fail_iput;
1705 } 1687 }
@@ -2095,10 +2077,10 @@ static int write_dev_supers(struct btrfs_device *device,
2095 device->barriers = 0; 2077 device->barriers = 0;
2096 get_bh(bh); 2078 get_bh(bh);
2097 lock_buffer(bh); 2079 lock_buffer(bh);
2098 ret = submit_bh(WRITE, bh); 2080 ret = submit_bh(WRITE_SYNC, bh);
2099 } 2081 }
2100 } else { 2082 } else {
2101 ret = submit_bh(WRITE, bh); 2083 ret = submit_bh(WRITE_SYNC, bh);
2102 } 2084 }
2103 2085
2104 if (!ret && wait) { 2086 if (!ret && wait) {
@@ -2291,7 +2273,7 @@ int close_ctree(struct btrfs_root *root)
2291 2273
2292 if (fs_info->delalloc_bytes) { 2274 if (fs_info->delalloc_bytes) {
2293 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", 2275 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
2294 fs_info->delalloc_bytes); 2276 (unsigned long long)fs_info->delalloc_bytes);
2295 } 2277 }
2296 if (fs_info->total_ref_cache_size) { 2278 if (fs_info->total_ref_cache_size) {
2297 printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", 2279 printk(KERN_INFO "btrfs: at umount reference cache size %llu\n",
@@ -2328,16 +2310,6 @@ int close_ctree(struct btrfs_root *root)
2328 btrfs_stop_workers(&fs_info->endio_write_workers); 2310 btrfs_stop_workers(&fs_info->endio_write_workers);
2329 btrfs_stop_workers(&fs_info->submit_workers); 2311 btrfs_stop_workers(&fs_info->submit_workers);
2330 2312
2331#if 0
2332 while (!list_empty(&fs_info->hashers)) {
2333 struct btrfs_hasher *hasher;
2334 hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher,
2335 hashers);
2336 list_del(&hasher->hashers);
2337 crypto_free_hash(&fs_info->hash_tfm);
2338 kfree(hasher);
2339 }
2340#endif
2341 btrfs_close_devices(fs_info->fs_devices); 2313 btrfs_close_devices(fs_info->fs_devices);
2342 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2314 btrfs_mapping_tree_free(&fs_info->mapping_tree);
2343 2315
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 178df4c67de4..e4966444811b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1844,10 +1844,14 @@ again:
1844 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" 1844 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
1845 ", %llu bytes_used, %llu bytes_reserved, " 1845 ", %llu bytes_used, %llu bytes_reserved, "
1846 "%llu bytes_pinned, %llu bytes_readonly, %llu may use" 1846 "%llu bytes_pinned, %llu bytes_readonly, %llu may use"
1847 "%llu total\n", bytes, data_sinfo->bytes_delalloc, 1847 "%llu total\n", (unsigned long long)bytes,
1848 data_sinfo->bytes_used, data_sinfo->bytes_reserved, 1848 (unsigned long long)data_sinfo->bytes_delalloc,
1849 data_sinfo->bytes_pinned, data_sinfo->bytes_readonly, 1849 (unsigned long long)data_sinfo->bytes_used,
1850 data_sinfo->bytes_may_use, data_sinfo->total_bytes); 1850 (unsigned long long)data_sinfo->bytes_reserved,
1851 (unsigned long long)data_sinfo->bytes_pinned,
1852 (unsigned long long)data_sinfo->bytes_readonly,
1853 (unsigned long long)data_sinfo->bytes_may_use,
1854 (unsigned long long)data_sinfo->total_bytes);
1851 return -ENOSPC; 1855 return -ENOSPC;
1852 } 1856 }
1853 data_sinfo->bytes_may_use += bytes; 1857 data_sinfo->bytes_may_use += bytes;
@@ -1918,15 +1922,29 @@ void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
1918 spin_unlock(&info->lock); 1922 spin_unlock(&info->lock);
1919} 1923}
1920 1924
1925static void force_metadata_allocation(struct btrfs_fs_info *info)
1926{
1927 struct list_head *head = &info->space_info;
1928 struct btrfs_space_info *found;
1929
1930 rcu_read_lock();
1931 list_for_each_entry_rcu(found, head, list) {
1932 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
1933 found->force_alloc = 1;
1934 }
1935 rcu_read_unlock();
1936}
1937
1921static int do_chunk_alloc(struct btrfs_trans_handle *trans, 1938static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1922 struct btrfs_root *extent_root, u64 alloc_bytes, 1939 struct btrfs_root *extent_root, u64 alloc_bytes,
1923 u64 flags, int force) 1940 u64 flags, int force)
1924{ 1941{
1925 struct btrfs_space_info *space_info; 1942 struct btrfs_space_info *space_info;
1943 struct btrfs_fs_info *fs_info = extent_root->fs_info;
1926 u64 thresh; 1944 u64 thresh;
1927 int ret = 0; 1945 int ret = 0;
1928 1946
1929 mutex_lock(&extent_root->fs_info->chunk_mutex); 1947 mutex_lock(&fs_info->chunk_mutex);
1930 1948
1931 flags = btrfs_reduce_alloc_profile(extent_root, flags); 1949 flags = btrfs_reduce_alloc_profile(extent_root, flags);
1932 1950
@@ -1958,6 +1976,18 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1958 } 1976 }
1959 spin_unlock(&space_info->lock); 1977 spin_unlock(&space_info->lock);
1960 1978
1979 /*
1980 * if we're doing a data chunk, go ahead and make sure that
1981 * we keep a reasonable number of metadata chunks allocated in the
1982 * FS as well.
1983 */
1984 if (flags & BTRFS_BLOCK_GROUP_DATA) {
1985 fs_info->data_chunk_allocations++;
1986 if (!(fs_info->data_chunk_allocations %
1987 fs_info->metadata_ratio))
1988 force_metadata_allocation(fs_info);
1989 }
1990
1961 ret = btrfs_alloc_chunk(trans, extent_root, flags); 1991 ret = btrfs_alloc_chunk(trans, extent_root, flags);
1962 if (ret) 1992 if (ret)
1963 space_info->full = 1; 1993 space_info->full = 1;
@@ -2798,9 +2828,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
2798 info->bytes_pinned - info->bytes_reserved), 2828 info->bytes_pinned - info->bytes_reserved),
2799 (info->full) ? "" : "not "); 2829 (info->full) ? "" : "not ");
2800 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," 2830 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
2801 " may_use=%llu, used=%llu\n", info->total_bytes, 2831 " may_use=%llu, used=%llu\n",
2802 info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use, 2832 (unsigned long long)info->total_bytes,
2803 info->bytes_used); 2833 (unsigned long long)info->bytes_pinned,
2834 (unsigned long long)info->bytes_delalloc,
2835 (unsigned long long)info->bytes_may_use,
2836 (unsigned long long)info->bytes_used);
2804 2837
2805 down_read(&info->groups_sem); 2838 down_read(&info->groups_sem);
2806 list_for_each_entry(cache, &info->block_groups, list) { 2839 list_for_each_entry(cache, &info->block_groups, list) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index eb2bee8b7fbf..fe9eb990e443 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -17,12 +17,6 @@
17#include "ctree.h" 17#include "ctree.h"
18#include "btrfs_inode.h" 18#include "btrfs_inode.h"
19 19
20/* temporary define until extent_map moves out of btrfs */
21struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
22 unsigned long extra_flags,
23 void (*ctor)(void *, struct kmem_cache *,
24 unsigned long));
25
26static struct kmem_cache *extent_state_cache; 20static struct kmem_cache *extent_state_cache;
27static struct kmem_cache *extent_buffer_cache; 21static struct kmem_cache *extent_buffer_cache;
28 22
@@ -50,20 +44,23 @@ struct extent_page_data {
50 /* tells writepage not to lock the state bits for this range 44 /* tells writepage not to lock the state bits for this range
51 * it still does the unlocking 45 * it still does the unlocking
52 */ 46 */
53 int extent_locked; 47 unsigned int extent_locked:1;
48
49 /* tells the submit_bio code to use a WRITE_SYNC */
50 unsigned int sync_io:1;
54}; 51};
55 52
56int __init extent_io_init(void) 53int __init extent_io_init(void)
57{ 54{
58 extent_state_cache = btrfs_cache_create("extent_state", 55 extent_state_cache = kmem_cache_create("extent_state",
59 sizeof(struct extent_state), 0, 56 sizeof(struct extent_state), 0,
60 NULL); 57 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
61 if (!extent_state_cache) 58 if (!extent_state_cache)
62 return -ENOMEM; 59 return -ENOMEM;
63 60
64 extent_buffer_cache = btrfs_cache_create("extent_buffers", 61 extent_buffer_cache = kmem_cache_create("extent_buffers",
65 sizeof(struct extent_buffer), 0, 62 sizeof(struct extent_buffer), 0,
66 NULL); 63 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
67 if (!extent_buffer_cache) 64 if (!extent_buffer_cache)
68 goto free_state_cache; 65 goto free_state_cache;
69 return 0; 66 return 0;
@@ -1404,69 +1401,6 @@ out:
1404 return total_bytes; 1401 return total_bytes;
1405} 1402}
1406 1403
1407#if 0
1408/*
1409 * helper function to lock both pages and extents in the tree.
1410 * pages must be locked first.
1411 */
1412static int lock_range(struct extent_io_tree *tree, u64 start, u64 end)
1413{
1414 unsigned long index = start >> PAGE_CACHE_SHIFT;
1415 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1416 struct page *page;
1417 int err;
1418
1419 while (index <= end_index) {
1420 page = grab_cache_page(tree->mapping, index);
1421 if (!page) {
1422 err = -ENOMEM;
1423 goto failed;
1424 }
1425 if (IS_ERR(page)) {
1426 err = PTR_ERR(page);
1427 goto failed;
1428 }
1429 index++;
1430 }
1431 lock_extent(tree, start, end, GFP_NOFS);
1432 return 0;
1433
1434failed:
1435 /*
1436 * we failed above in getting the page at 'index', so we undo here
1437 * up to but not including the page at 'index'
1438 */
1439 end_index = index;
1440 index = start >> PAGE_CACHE_SHIFT;
1441 while (index < end_index) {
1442 page = find_get_page(tree->mapping, index);
1443 unlock_page(page);
1444 page_cache_release(page);
1445 index++;
1446 }
1447 return err;
1448}
1449
1450/*
1451 * helper function to unlock both pages and extents in the tree.
1452 */
1453static int unlock_range(struct extent_io_tree *tree, u64 start, u64 end)
1454{
1455 unsigned long index = start >> PAGE_CACHE_SHIFT;
1456 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1457 struct page *page;
1458
1459 while (index <= end_index) {
1460 page = find_get_page(tree->mapping, index);
1461 unlock_page(page);
1462 page_cache_release(page);
1463 index++;
1464 }
1465 unlock_extent(tree, start, end, GFP_NOFS);
1466 return 0;
1467}
1468#endif
1469
1470/* 1404/*
1471 * set the private field for a given byte offset in the tree. If there isn't 1405 * set the private field for a given byte offset in the tree. If there isn't
1472 * an extent_state there already, this does nothing. 1406 * an extent_state there already, this does nothing.
@@ -2101,6 +2035,16 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2101 return ret; 2035 return ret;
2102} 2036}
2103 2037
2038static noinline void update_nr_written(struct page *page,
2039 struct writeback_control *wbc,
2040 unsigned long nr_written)
2041{
2042 wbc->nr_to_write -= nr_written;
2043 if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
2044 wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
2045 page->mapping->writeback_index = page->index + nr_written;
2046}
2047
2104/* 2048/*
2105 * the writepage semantics are similar to regular writepage. extent 2049 * the writepage semantics are similar to regular writepage. extent
2106 * records are inserted to lock ranges in the tree, and as dirty areas 2050 * records are inserted to lock ranges in the tree, and as dirty areas
@@ -2136,8 +2080,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2136 u64 delalloc_end; 2080 u64 delalloc_end;
2137 int page_started; 2081 int page_started;
2138 int compressed; 2082 int compressed;
2083 int write_flags;
2139 unsigned long nr_written = 0; 2084 unsigned long nr_written = 0;
2140 2085
2086 if (wbc->sync_mode == WB_SYNC_ALL)
2087 write_flags = WRITE_SYNC_PLUG;
2088 else
2089 write_flags = WRITE;
2090
2141 WARN_ON(!PageLocked(page)); 2091 WARN_ON(!PageLocked(page));
2142 pg_offset = i_size & (PAGE_CACHE_SIZE - 1); 2092 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
2143 if (page->index > end_index || 2093 if (page->index > end_index ||
@@ -2164,6 +2114,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2164 delalloc_end = 0; 2114 delalloc_end = 0;
2165 page_started = 0; 2115 page_started = 0;
2166 if (!epd->extent_locked) { 2116 if (!epd->extent_locked) {
2117 /*
2118 * make sure the wbc mapping index is at least updated
2119 * to this page.
2120 */
2121 update_nr_written(page, wbc, 0);
2122
2167 while (delalloc_end < page_end) { 2123 while (delalloc_end < page_end) {
2168 nr_delalloc = find_lock_delalloc_range(inode, tree, 2124 nr_delalloc = find_lock_delalloc_range(inode, tree,
2169 page, 2125 page,
@@ -2185,7 +2141,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2185 */ 2141 */
2186 if (page_started) { 2142 if (page_started) {
2187 ret = 0; 2143 ret = 0;
2188 goto update_nr_written; 2144 /*
2145 * we've unlocked the page, so we can't update
2146 * the mapping's writeback index, just update
2147 * nr_to_write.
2148 */
2149 wbc->nr_to_write -= nr_written;
2150 goto done_unlocked;
2189 } 2151 }
2190 } 2152 }
2191 lock_extent(tree, start, page_end, GFP_NOFS); 2153 lock_extent(tree, start, page_end, GFP_NOFS);
@@ -2198,13 +2160,18 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2198 if (ret == -EAGAIN) { 2160 if (ret == -EAGAIN) {
2199 unlock_extent(tree, start, page_end, GFP_NOFS); 2161 unlock_extent(tree, start, page_end, GFP_NOFS);
2200 redirty_page_for_writepage(wbc, page); 2162 redirty_page_for_writepage(wbc, page);
2163 update_nr_written(page, wbc, nr_written);
2201 unlock_page(page); 2164 unlock_page(page);
2202 ret = 0; 2165 ret = 0;
2203 goto update_nr_written; 2166 goto done_unlocked;
2204 } 2167 }
2205 } 2168 }
2206 2169
2207 nr_written++; 2170 /*
2171 * we don't want to touch the inode after unlocking the page,
2172 * so we update the mapping writeback index now
2173 */
2174 update_nr_written(page, wbc, nr_written + 1);
2208 2175
2209 end = page_end; 2176 end = page_end;
2210 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) 2177 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0))
@@ -2314,9 +2281,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2314 (unsigned long long)end); 2281 (unsigned long long)end);
2315 } 2282 }
2316 2283
2317 ret = submit_extent_page(WRITE, tree, page, sector, 2284 ret = submit_extent_page(write_flags, tree, page,
2318 iosize, pg_offset, bdev, 2285 sector, iosize, pg_offset,
2319 &epd->bio, max_nr, 2286 bdev, &epd->bio, max_nr,
2320 end_bio_extent_writepage, 2287 end_bio_extent_writepage,
2321 0, 0, 0); 2288 0, 0, 0);
2322 if (ret) 2289 if (ret)
@@ -2336,11 +2303,8 @@ done:
2336 unlock_extent(tree, unlock_start, page_end, GFP_NOFS); 2303 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2337 unlock_page(page); 2304 unlock_page(page);
2338 2305
2339update_nr_written: 2306done_unlocked:
2340 wbc->nr_to_write -= nr_written; 2307
2341 if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
2342 wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
2343 page->mapping->writeback_index = page->index + nr_written;
2344 return 0; 2308 return 0;
2345} 2309}
2346 2310
@@ -2460,15 +2424,23 @@ retry:
2460 return ret; 2424 return ret;
2461} 2425}
2462 2426
2463static noinline void flush_write_bio(void *data) 2427static void flush_epd_write_bio(struct extent_page_data *epd)
2464{ 2428{
2465 struct extent_page_data *epd = data;
2466 if (epd->bio) { 2429 if (epd->bio) {
2467 submit_one_bio(WRITE, epd->bio, 0, 0); 2430 if (epd->sync_io)
2431 submit_one_bio(WRITE_SYNC, epd->bio, 0, 0);
2432 else
2433 submit_one_bio(WRITE, epd->bio, 0, 0);
2468 epd->bio = NULL; 2434 epd->bio = NULL;
2469 } 2435 }
2470} 2436}
2471 2437
2438static noinline void flush_write_bio(void *data)
2439{
2440 struct extent_page_data *epd = data;
2441 flush_epd_write_bio(epd);
2442}
2443
2472int extent_write_full_page(struct extent_io_tree *tree, struct page *page, 2444int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2473 get_extent_t *get_extent, 2445 get_extent_t *get_extent,
2474 struct writeback_control *wbc) 2446 struct writeback_control *wbc)
@@ -2480,23 +2452,22 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2480 .tree = tree, 2452 .tree = tree,
2481 .get_extent = get_extent, 2453 .get_extent = get_extent,
2482 .extent_locked = 0, 2454 .extent_locked = 0,
2455 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
2483 }; 2456 };
2484 struct writeback_control wbc_writepages = { 2457 struct writeback_control wbc_writepages = {
2485 .bdi = wbc->bdi, 2458 .bdi = wbc->bdi,
2486 .sync_mode = WB_SYNC_NONE, 2459 .sync_mode = wbc->sync_mode,
2487 .older_than_this = NULL, 2460 .older_than_this = NULL,
2488 .nr_to_write = 64, 2461 .nr_to_write = 64,
2489 .range_start = page_offset(page) + PAGE_CACHE_SIZE, 2462 .range_start = page_offset(page) + PAGE_CACHE_SIZE,
2490 .range_end = (loff_t)-1, 2463 .range_end = (loff_t)-1,
2491 }; 2464 };
2492 2465
2493
2494 ret = __extent_writepage(page, wbc, &epd); 2466 ret = __extent_writepage(page, wbc, &epd);
2495 2467
2496 extent_write_cache_pages(tree, mapping, &wbc_writepages, 2468 extent_write_cache_pages(tree, mapping, &wbc_writepages,
2497 __extent_writepage, &epd, flush_write_bio); 2469 __extent_writepage, &epd, flush_write_bio);
2498 if (epd.bio) 2470 flush_epd_write_bio(&epd);
2499 submit_one_bio(WRITE, epd.bio, 0, 0);
2500 return ret; 2471 return ret;
2501} 2472}
2502 2473
@@ -2515,6 +2486,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
2515 .tree = tree, 2486 .tree = tree,
2516 .get_extent = get_extent, 2487 .get_extent = get_extent,
2517 .extent_locked = 1, 2488 .extent_locked = 1,
2489 .sync_io = mode == WB_SYNC_ALL,
2518 }; 2490 };
2519 struct writeback_control wbc_writepages = { 2491 struct writeback_control wbc_writepages = {
2520 .bdi = inode->i_mapping->backing_dev_info, 2492 .bdi = inode->i_mapping->backing_dev_info,
@@ -2540,8 +2512,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
2540 start += PAGE_CACHE_SIZE; 2512 start += PAGE_CACHE_SIZE;
2541 } 2513 }
2542 2514
2543 if (epd.bio) 2515 flush_epd_write_bio(&epd);
2544 submit_one_bio(WRITE, epd.bio, 0, 0);
2545 return ret; 2516 return ret;
2546} 2517}
2547 2518
@@ -2556,13 +2527,13 @@ int extent_writepages(struct extent_io_tree *tree,
2556 .tree = tree, 2527 .tree = tree,
2557 .get_extent = get_extent, 2528 .get_extent = get_extent,
2558 .extent_locked = 0, 2529 .extent_locked = 0,
2530 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
2559 }; 2531 };
2560 2532
2561 ret = extent_write_cache_pages(tree, mapping, wbc, 2533 ret = extent_write_cache_pages(tree, mapping, wbc,
2562 __extent_writepage, &epd, 2534 __extent_writepage, &epd,
2563 flush_write_bio); 2535 flush_write_bio);
2564 if (epd.bio) 2536 flush_epd_write_bio(&epd);
2565 submit_one_bio(WRITE, epd.bio, 0, 0);
2566 return ret; 2537 return ret;
2567} 2538}
2568 2539
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index b187917b36fa..30c9365861e6 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -6,19 +6,14 @@
6#include <linux/hardirq.h> 6#include <linux/hardirq.h>
7#include "extent_map.h" 7#include "extent_map.h"
8 8
9/* temporary define until extent_map moves out of btrfs */
10struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
11 unsigned long extra_flags,
12 void (*ctor)(void *, struct kmem_cache *,
13 unsigned long));
14 9
15static struct kmem_cache *extent_map_cache; 10static struct kmem_cache *extent_map_cache;
16 11
17int __init extent_map_init(void) 12int __init extent_map_init(void)
18{ 13{
19 extent_map_cache = btrfs_cache_create("extent_map", 14 extent_map_cache = kmem_cache_create("extent_map",
20 sizeof(struct extent_map), 0, 15 sizeof(struct extent_map), 0,
21 NULL); 16 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
22 if (!extent_map_cache) 17 if (!extent_map_cache)
23 return -ENOMEM; 18 return -ENOMEM;
24 return 0; 19 return 0;
@@ -43,7 +38,6 @@ void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
43 tree->map.rb_node = NULL; 38 tree->map.rb_node = NULL;
44 spin_lock_init(&tree->lock); 39 spin_lock_init(&tree->lock);
45} 40}
46EXPORT_SYMBOL(extent_map_tree_init);
47 41
48/** 42/**
49 * alloc_extent_map - allocate new extent map structure 43 * alloc_extent_map - allocate new extent map structure
@@ -64,7 +58,6 @@ struct extent_map *alloc_extent_map(gfp_t mask)
64 atomic_set(&em->refs, 1); 58 atomic_set(&em->refs, 1);
65 return em; 59 return em;
66} 60}
67EXPORT_SYMBOL(alloc_extent_map);
68 61
69/** 62/**
70 * free_extent_map - drop reference count of an extent_map 63 * free_extent_map - drop reference count of an extent_map
@@ -83,7 +76,6 @@ void free_extent_map(struct extent_map *em)
83 kmem_cache_free(extent_map_cache, em); 76 kmem_cache_free(extent_map_cache, em);
84 } 77 }
85} 78}
86EXPORT_SYMBOL(free_extent_map);
87 79
88static struct rb_node *tree_insert(struct rb_root *root, u64 offset, 80static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
89 struct rb_node *node) 81 struct rb_node *node)
@@ -264,7 +256,6 @@ int add_extent_mapping(struct extent_map_tree *tree,
264out: 256out:
265 return ret; 257 return ret;
266} 258}
267EXPORT_SYMBOL(add_extent_mapping);
268 259
269/* simple helper to do math around the end of an extent, handling wrap */ 260/* simple helper to do math around the end of an extent, handling wrap */
270static u64 range_end(u64 start, u64 len) 261static u64 range_end(u64 start, u64 len)
@@ -326,7 +317,6 @@ found:
326out: 317out:
327 return em; 318 return em;
328} 319}
329EXPORT_SYMBOL(lookup_extent_mapping);
330 320
331/** 321/**
332 * remove_extent_mapping - removes an extent_map from the extent tree 322 * remove_extent_mapping - removes an extent_map from the extent tree
@@ -346,4 +336,3 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
346 em->in_tree = 0; 336 em->in_tree = 0;
347 return ret; 337 return ret;
348} 338}
349EXPORT_SYMBOL(remove_extent_mapping);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9c9fb46ccd08..1d51dc38bb49 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -272,83 +272,6 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
272 return 0; 272 return 0;
273} 273}
274 274
275int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
276{
277 return 0;
278#if 0
279 struct btrfs_path *path;
280 struct btrfs_key found_key;
281 struct extent_buffer *leaf;
282 struct btrfs_file_extent_item *extent;
283 u64 last_offset = 0;
284 int nritems;
285 int slot;
286 int found_type;
287 int ret;
288 int err = 0;
289 u64 extent_end = 0;
290
291 path = btrfs_alloc_path();
292 ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino,
293 last_offset, 0);
294 while (1) {
295 nritems = btrfs_header_nritems(path->nodes[0]);
296 if (path->slots[0] >= nritems) {
297 ret = btrfs_next_leaf(root, path);
298 if (ret)
299 goto out;
300 nritems = btrfs_header_nritems(path->nodes[0]);
301 }
302 slot = path->slots[0];
303 leaf = path->nodes[0];
304 btrfs_item_key_to_cpu(leaf, &found_key, slot);
305 if (found_key.objectid != inode->i_ino)
306 break;
307 if (found_key.type != BTRFS_EXTENT_DATA_KEY)
308 goto out;
309
310 if (found_key.offset < last_offset) {
311 WARN_ON(1);
312 btrfs_print_leaf(root, leaf);
313 printk(KERN_ERR "inode %lu found offset %llu "
314 "expected %llu\n", inode->i_ino,
315 (unsigned long long)found_key.offset,
316 (unsigned long long)last_offset);
317 err = 1;
318 goto out;
319 }
320 extent = btrfs_item_ptr(leaf, slot,
321 struct btrfs_file_extent_item);
322 found_type = btrfs_file_extent_type(leaf, extent);
323 if (found_type == BTRFS_FILE_EXTENT_REG) {
324 extent_end = found_key.offset +
325 btrfs_file_extent_num_bytes(leaf, extent);
326 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
327 struct btrfs_item *item;
328 item = btrfs_item_nr(leaf, slot);
329 extent_end = found_key.offset +
330 btrfs_file_extent_inline_len(leaf, extent);
331 extent_end = (extent_end + root->sectorsize - 1) &
332 ~((u64)root->sectorsize - 1);
333 }
334 last_offset = extent_end;
335 path->slots[0]++;
336 }
337 if (0 && last_offset < inode->i_size) {
338 WARN_ON(1);
339 btrfs_print_leaf(root, leaf);
340 printk(KERN_ERR "inode %lu found offset %llu size %llu\n",
341 inode->i_ino, (unsigned long long)last_offset,
342 (unsigned long long)inode->i_size);
343 err = 1;
344
345 }
346out:
347 btrfs_free_path(path);
348 return err;
349#endif
350}
351
352/* 275/*
353 * this is very complex, but the basic idea is to drop all extents 276 * this is very complex, but the basic idea is to drop all extents
354 * in the range start - end. hint_block is filled in with a block number 277 * in the range start - end. hint_block is filled in with a block number
@@ -363,15 +286,16 @@ out:
363 */ 286 */
364noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, 287noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
365 struct btrfs_root *root, struct inode *inode, 288 struct btrfs_root *root, struct inode *inode,
366 u64 start, u64 end, u64 inline_limit, u64 *hint_byte) 289 u64 start, u64 end, u64 locked_end,
290 u64 inline_limit, u64 *hint_byte)
367{ 291{
368 u64 extent_end = 0; 292 u64 extent_end = 0;
369 u64 locked_end = end;
370 u64 search_start = start; 293 u64 search_start = start;
371 u64 leaf_start; 294 u64 leaf_start;
372 u64 ram_bytes = 0; 295 u64 ram_bytes = 0;
373 u64 orig_parent = 0; 296 u64 orig_parent = 0;
374 u64 disk_bytenr = 0; 297 u64 disk_bytenr = 0;
298 u64 orig_locked_end = locked_end;
375 u8 compression; 299 u8 compression;
376 u8 encryption; 300 u8 encryption;
377 u16 other_encoding = 0; 301 u16 other_encoding = 0;
@@ -684,11 +608,10 @@ next_slot:
684 } 608 }
685out: 609out:
686 btrfs_free_path(path); 610 btrfs_free_path(path);
687 if (locked_end > end) { 611 if (locked_end > orig_locked_end) {
688 unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, 612 unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end,
689 GFP_NOFS); 613 locked_end - 1, GFP_NOFS);
690 } 614 }
691 btrfs_check_file(root, inode);
692 return ret; 615 return ret;
693} 616}
694 617
@@ -830,7 +753,7 @@ again:
830 753
831 ret = btrfs_del_items(trans, root, path, del_slot, del_nr); 754 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
832 BUG_ON(ret); 755 BUG_ON(ret);
833 goto done; 756 goto release;
834 } else if (split == start) { 757 } else if (split == start) {
835 if (locked_end < extent_end) { 758 if (locked_end < extent_end) {
836 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 759 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
@@ -926,6 +849,8 @@ again:
926 } 849 }
927done: 850done:
928 btrfs_mark_buffer_dirty(leaf); 851 btrfs_mark_buffer_dirty(leaf);
852
853release:
929 btrfs_release_path(root, path); 854 btrfs_release_path(root, path);
930 if (split_end && split == start) { 855 if (split_end && split == start) {
931 split = end; 856 split = end;
@@ -1131,7 +1056,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1131 if (will_write) { 1056 if (will_write) {
1132 btrfs_fdatawrite_range(inode->i_mapping, pos, 1057 btrfs_fdatawrite_range(inode->i_mapping, pos,
1133 pos + write_bytes - 1, 1058 pos + write_bytes - 1,
1134 WB_SYNC_NONE); 1059 WB_SYNC_ALL);
1135 } else { 1060 } else {
1136 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1061 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
1137 num_pages); 1062 num_pages);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 768b9523662d..0bc93657b460 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -332,13 +332,17 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
332 printk(KERN_ERR "couldn't find space %llu to free\n", 332 printk(KERN_ERR "couldn't find space %llu to free\n",
333 (unsigned long long)offset); 333 (unsigned long long)offset);
334 printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n", 334 printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n",
335 block_group->cached, block_group->key.objectid, 335 block_group->cached,
336 block_group->key.offset); 336 (unsigned long long)block_group->key.objectid,
337 (unsigned long long)block_group->key.offset);
337 btrfs_dump_free_space(block_group, bytes); 338 btrfs_dump_free_space(block_group, bytes);
338 } else if (info) { 339 } else if (info) {
339 printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, " 340 printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, "
340 "but wanted offset=%llu bytes=%llu\n", 341 "but wanted offset=%llu bytes=%llu\n",
341 info->offset, info->bytes, offset, bytes); 342 (unsigned long long)info->offset,
343 (unsigned long long)info->bytes,
344 (unsigned long long)offset,
345 (unsigned long long)bytes);
342 } 346 }
343 WARN_ON(1); 347 WARN_ON(1);
344 } 348 }
@@ -357,8 +361,9 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
357 info = rb_entry(n, struct btrfs_free_space, offset_index); 361 info = rb_entry(n, struct btrfs_free_space, offset_index);
358 if (info->bytes >= bytes) 362 if (info->bytes >= bytes)
359 count++; 363 count++;
360 printk(KERN_ERR "entry offset %llu, bytes %llu\n", info->offset, 364 printk(KERN_ERR "entry offset %llu, bytes %llu\n",
361 info->bytes); 365 (unsigned long long)info->offset,
366 (unsigned long long)info->bytes);
362 } 367 }
363 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" 368 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
364 "\n", count); 369 "\n", count);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index cc7334d833c9..9abbced1123d 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -79,7 +79,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
79 } 79 }
80 path = btrfs_alloc_path(); 80 path = btrfs_alloc_path();
81 BUG_ON(!path); 81 BUG_ON(!path);
82 search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); 82 search_start = max(search_start, (u64)BTRFS_FIRST_FREE_OBJECTID);
83 search_key.objectid = search_start; 83 search_key.objectid = search_start;
84 search_key.type = 0; 84 search_key.type = 0;
85 search_key.offset = 0; 85 search_key.offset = 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a0d1dd492a58..90c23eb28829 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -70,7 +70,6 @@ static struct extent_io_ops btrfs_extent_io_ops;
70static struct kmem_cache *btrfs_inode_cachep; 70static struct kmem_cache *btrfs_inode_cachep;
71struct kmem_cache *btrfs_trans_handle_cachep; 71struct kmem_cache *btrfs_trans_handle_cachep;
72struct kmem_cache *btrfs_transaction_cachep; 72struct kmem_cache *btrfs_transaction_cachep;
73struct kmem_cache *btrfs_bit_radix_cachep;
74struct kmem_cache *btrfs_path_cachep; 73struct kmem_cache *btrfs_path_cachep;
75 74
76#define S_SHIFT 12 75#define S_SHIFT 12
@@ -234,7 +233,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
234 } 233 }
235 234
236 ret = btrfs_drop_extents(trans, root, inode, start, 235 ret = btrfs_drop_extents(trans, root, inode, start,
237 aligned_end, start, &hint_byte); 236 aligned_end, aligned_end, start, &hint_byte);
238 BUG_ON(ret); 237 BUG_ON(ret);
239 238
240 if (isize > actual_end) 239 if (isize > actual_end)
@@ -1439,6 +1438,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1439 struct inode *inode, u64 file_pos, 1438 struct inode *inode, u64 file_pos,
1440 u64 disk_bytenr, u64 disk_num_bytes, 1439 u64 disk_bytenr, u64 disk_num_bytes,
1441 u64 num_bytes, u64 ram_bytes, 1440 u64 num_bytes, u64 ram_bytes,
1441 u64 locked_end,
1442 u8 compression, u8 encryption, 1442 u8 compression, u8 encryption,
1443 u16 other_encoding, int extent_type) 1443 u16 other_encoding, int extent_type)
1444{ 1444{
@@ -1455,7 +1455,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1455 1455
1456 path->leave_spinning = 1; 1456 path->leave_spinning = 1;
1457 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1457 ret = btrfs_drop_extents(trans, root, inode, file_pos,
1458 file_pos + num_bytes, file_pos, &hint); 1458 file_pos + num_bytes, locked_end,
1459 file_pos, &hint);
1459 BUG_ON(ret); 1460 BUG_ON(ret);
1460 1461
1461 ins.objectid = inode->i_ino; 1462 ins.objectid = inode->i_ino;
@@ -1590,6 +1591,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1590 ordered_extent->disk_len, 1591 ordered_extent->disk_len,
1591 ordered_extent->len, 1592 ordered_extent->len,
1592 ordered_extent->len, 1593 ordered_extent->len,
1594 ordered_extent->file_offset +
1595 ordered_extent->len,
1593 compressed, 0, 0, 1596 compressed, 0, 0,
1594 BTRFS_FILE_EXTENT_REG); 1597 BTRFS_FILE_EXTENT_REG);
1595 BUG_ON(ret); 1598 BUG_ON(ret);
@@ -1819,10 +1822,12 @@ good:
1819 return 0; 1822 return 0;
1820 1823
1821zeroit: 1824zeroit:
1822 printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " 1825 if (printk_ratelimit()) {
1823 "private %llu\n", page->mapping->host->i_ino, 1826 printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u "
1824 (unsigned long long)start, csum, 1827 "private %llu\n", page->mapping->host->i_ino,
1825 (unsigned long long)private); 1828 (unsigned long long)start, csum,
1829 (unsigned long long)private);
1830 }
1826 memset(kaddr + offset, 1, end - start + 1); 1831 memset(kaddr + offset, 1, end - start + 1);
1827 flush_dcache_page(page); 1832 flush_dcache_page(page);
1828 kunmap_atomic(kaddr, KM_USER0); 1833 kunmap_atomic(kaddr, KM_USER0);
@@ -2011,6 +2016,57 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2011} 2016}
2012 2017
2013/* 2018/*
2019 * very simple check to peek ahead in the leaf looking for xattrs. If we
2020 * don't find any xattrs, we know there can't be any acls.
2021 *
2022 * slot is the slot the inode is in, objectid is the objectid of the inode
2023 */
2024static noinline int acls_after_inode_item(struct extent_buffer *leaf,
2025 int slot, u64 objectid)
2026{
2027 u32 nritems = btrfs_header_nritems(leaf);
2028 struct btrfs_key found_key;
2029 int scanned = 0;
2030
2031 slot++;
2032 while (slot < nritems) {
2033 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2034
2035 /* we found a different objectid, there must not be acls */
2036 if (found_key.objectid != objectid)
2037 return 0;
2038
2039 /* we found an xattr, assume we've got an acl */
2040 if (found_key.type == BTRFS_XATTR_ITEM_KEY)
2041 return 1;
2042
2043 /*
2044 * we found a key greater than an xattr key, there can't
2045 * be any acls later on
2046 */
2047 if (found_key.type > BTRFS_XATTR_ITEM_KEY)
2048 return 0;
2049
2050 slot++;
2051 scanned++;
2052
2053 /*
2054 * it goes inode, inode backrefs, xattrs, extents,
2055 * so if there are a ton of hard links to an inode there can
2056 * be a lot of backrefs. Don't waste time searching too hard,
2057 * this is just an optimization
2058 */
2059 if (scanned >= 8)
2060 break;
2061 }
2062 /* we hit the end of the leaf before we found an xattr or
2063 * something larger than an xattr. We have to assume the inode
2064 * has acls
2065 */
2066 return 1;
2067}
2068
2069/*
2014 * read an inode from the btree into the in-memory inode 2070 * read an inode from the btree into the in-memory inode
2015 */ 2071 */
2016void btrfs_read_locked_inode(struct inode *inode) 2072void btrfs_read_locked_inode(struct inode *inode)
@@ -2021,6 +2077,7 @@ void btrfs_read_locked_inode(struct inode *inode)
2021 struct btrfs_timespec *tspec; 2077 struct btrfs_timespec *tspec;
2022 struct btrfs_root *root = BTRFS_I(inode)->root; 2078 struct btrfs_root *root = BTRFS_I(inode)->root;
2023 struct btrfs_key location; 2079 struct btrfs_key location;
2080 int maybe_acls;
2024 u64 alloc_group_block; 2081 u64 alloc_group_block;
2025 u32 rdev; 2082 u32 rdev;
2026 int ret; 2083 int ret;
@@ -2067,6 +2124,16 @@ void btrfs_read_locked_inode(struct inode *inode)
2067 2124
2068 alloc_group_block = btrfs_inode_block_group(leaf, inode_item); 2125 alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
2069 2126
2127 /*
2128 * try to precache a NULL acl entry for files that don't have
2129 * any xattrs or acls
2130 */
2131 maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino);
2132 if (!maybe_acls) {
2133 BTRFS_I(inode)->i_acl = NULL;
2134 BTRFS_I(inode)->i_default_acl = NULL;
2135 }
2136
2070 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, 2137 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
2071 alloc_group_block, 0); 2138 alloc_group_block, 0);
2072 btrfs_free_path(path); 2139 btrfs_free_path(path);
@@ -2877,6 +2944,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
2877 err = btrfs_drop_extents(trans, root, inode, 2944 err = btrfs_drop_extents(trans, root, inode,
2878 cur_offset, 2945 cur_offset,
2879 cur_offset + hole_size, 2946 cur_offset + hole_size,
2947 block_end,
2880 cur_offset, &hint_byte); 2948 cur_offset, &hint_byte);
2881 if (err) 2949 if (err)
2882 break; 2950 break;
@@ -3041,8 +3109,8 @@ static noinline void init_btrfs_i(struct inode *inode)
3041{ 3109{
3042 struct btrfs_inode *bi = BTRFS_I(inode); 3110 struct btrfs_inode *bi = BTRFS_I(inode);
3043 3111
3044 bi->i_acl = NULL; 3112 bi->i_acl = BTRFS_ACL_NOT_CACHED;
3045 bi->i_default_acl = NULL; 3113 bi->i_default_acl = BTRFS_ACL_NOT_CACHED;
3046 3114
3047 bi->generation = 0; 3115 bi->generation = 0;
3048 bi->sequence = 0; 3116 bi->sequence = 0;
@@ -4634,47 +4702,36 @@ void btrfs_destroy_cachep(void)
4634 kmem_cache_destroy(btrfs_trans_handle_cachep); 4702 kmem_cache_destroy(btrfs_trans_handle_cachep);
4635 if (btrfs_transaction_cachep) 4703 if (btrfs_transaction_cachep)
4636 kmem_cache_destroy(btrfs_transaction_cachep); 4704 kmem_cache_destroy(btrfs_transaction_cachep);
4637 if (btrfs_bit_radix_cachep)
4638 kmem_cache_destroy(btrfs_bit_radix_cachep);
4639 if (btrfs_path_cachep) 4705 if (btrfs_path_cachep)
4640 kmem_cache_destroy(btrfs_path_cachep); 4706 kmem_cache_destroy(btrfs_path_cachep);
4641} 4707}
4642 4708
4643struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
4644 unsigned long extra_flags,
4645 void (*ctor)(void *))
4646{
4647 return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
4648 SLAB_MEM_SPREAD | extra_flags), ctor);
4649}
4650
4651int btrfs_init_cachep(void) 4709int btrfs_init_cachep(void)
4652{ 4710{
4653 btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache", 4711 btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
4654 sizeof(struct btrfs_inode), 4712 sizeof(struct btrfs_inode), 0,
4655 0, init_once); 4713 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once);
4656 if (!btrfs_inode_cachep) 4714 if (!btrfs_inode_cachep)
4657 goto fail; 4715 goto fail;
4658 btrfs_trans_handle_cachep = 4716
4659 btrfs_cache_create("btrfs_trans_handle_cache", 4717 btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
4660 sizeof(struct btrfs_trans_handle), 4718 sizeof(struct btrfs_trans_handle), 0,
4661 0, NULL); 4719 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
4662 if (!btrfs_trans_handle_cachep) 4720 if (!btrfs_trans_handle_cachep)
4663 goto fail; 4721 goto fail;
4664 btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache", 4722
4665 sizeof(struct btrfs_transaction), 4723 btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
4666 0, NULL); 4724 sizeof(struct btrfs_transaction), 0,
4725 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
4667 if (!btrfs_transaction_cachep) 4726 if (!btrfs_transaction_cachep)
4668 goto fail; 4727 goto fail;
4669 btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache", 4728
4670 sizeof(struct btrfs_path), 4729 btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
4671 0, NULL); 4730 sizeof(struct btrfs_path), 0,
4731 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
4672 if (!btrfs_path_cachep) 4732 if (!btrfs_path_cachep)
4673 goto fail; 4733 goto fail;
4674 btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256, 4734
4675 SLAB_DESTROY_BY_RCU, NULL);
4676 if (!btrfs_bit_radix_cachep)
4677 goto fail;
4678 return 0; 4735 return 0;
4679fail: 4736fail:
4680 btrfs_destroy_cachep(); 4737 btrfs_destroy_cachep();
@@ -4970,10 +5027,10 @@ out_fail:
4970 return err; 5027 return err;
4971} 5028}
4972 5029
4973static int prealloc_file_range(struct inode *inode, u64 start, u64 end, 5030static int prealloc_file_range(struct btrfs_trans_handle *trans,
4974 u64 alloc_hint, int mode) 5031 struct inode *inode, u64 start, u64 end,
5032 u64 locked_end, u64 alloc_hint, int mode)
4975{ 5033{
4976 struct btrfs_trans_handle *trans;
4977 struct btrfs_root *root = BTRFS_I(inode)->root; 5034 struct btrfs_root *root = BTRFS_I(inode)->root;
4978 struct btrfs_key ins; 5035 struct btrfs_key ins;
4979 u64 alloc_size; 5036 u64 alloc_size;
@@ -4981,10 +5038,6 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
4981 u64 num_bytes = end - start; 5038 u64 num_bytes = end - start;
4982 int ret = 0; 5039 int ret = 0;
4983 5040
4984 trans = btrfs_join_transaction(root, 1);
4985 BUG_ON(!trans);
4986 btrfs_set_trans_block_group(trans, inode);
4987
4988 while (num_bytes > 0) { 5041 while (num_bytes > 0) {
4989 alloc_size = min(num_bytes, root->fs_info->max_extent); 5042 alloc_size = min(num_bytes, root->fs_info->max_extent);
4990 ret = btrfs_reserve_extent(trans, root, alloc_size, 5043 ret = btrfs_reserve_extent(trans, root, alloc_size,
@@ -4997,7 +5050,8 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
4997 ret = insert_reserved_file_extent(trans, inode, 5050 ret = insert_reserved_file_extent(trans, inode,
4998 cur_offset, ins.objectid, 5051 cur_offset, ins.objectid,
4999 ins.offset, ins.offset, 5052 ins.offset, ins.offset,
5000 ins.offset, 0, 0, 0, 5053 ins.offset, locked_end,
5054 0, 0, 0,
5001 BTRFS_FILE_EXTENT_PREALLOC); 5055 BTRFS_FILE_EXTENT_PREALLOC);
5002 BUG_ON(ret); 5056 BUG_ON(ret);
5003 num_bytes -= ins.offset; 5057 num_bytes -= ins.offset;
@@ -5015,7 +5069,6 @@ out:
5015 BUG_ON(ret); 5069 BUG_ON(ret);
5016 } 5070 }
5017 5071
5018 btrfs_end_transaction(trans, root);
5019 return ret; 5072 return ret;
5020} 5073}
5021 5074
@@ -5027,13 +5080,21 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5027 u64 alloc_start; 5080 u64 alloc_start;
5028 u64 alloc_end; 5081 u64 alloc_end;
5029 u64 alloc_hint = 0; 5082 u64 alloc_hint = 0;
5083 u64 locked_end;
5030 u64 mask = BTRFS_I(inode)->root->sectorsize - 1; 5084 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
5031 struct extent_map *em; 5085 struct extent_map *em;
5086 struct btrfs_trans_handle *trans;
5032 int ret; 5087 int ret;
5033 5088
5034 alloc_start = offset & ~mask; 5089 alloc_start = offset & ~mask;
5035 alloc_end = (offset + len + mask) & ~mask; 5090 alloc_end = (offset + len + mask) & ~mask;
5036 5091
5092 /*
5093 * wait for ordered IO before we have any locks. We'll loop again
5094 * below with the locks held.
5095 */
5096 btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
5097
5037 mutex_lock(&inode->i_mutex); 5098 mutex_lock(&inode->i_mutex);
5038 if (alloc_start > inode->i_size) { 5099 if (alloc_start > inode->i_size) {
5039 ret = btrfs_cont_expand(inode, alloc_start); 5100 ret = btrfs_cont_expand(inode, alloc_start);
@@ -5041,10 +5102,21 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5041 goto out; 5102 goto out;
5042 } 5103 }
5043 5104
5105 locked_end = alloc_end - 1;
5044 while (1) { 5106 while (1) {
5045 struct btrfs_ordered_extent *ordered; 5107 struct btrfs_ordered_extent *ordered;
5046 lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, 5108
5047 alloc_end - 1, GFP_NOFS); 5109 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
5110 if (!trans) {
5111 ret = -EIO;
5112 goto out;
5113 }
5114
5115 /* the extent lock is ordered inside the running
5116 * transaction
5117 */
5118 lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5119 GFP_NOFS);
5048 ordered = btrfs_lookup_first_ordered_extent(inode, 5120 ordered = btrfs_lookup_first_ordered_extent(inode,
5049 alloc_end - 1); 5121 alloc_end - 1);
5050 if (ordered && 5122 if (ordered &&
@@ -5052,7 +5124,13 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5052 ordered->file_offset < alloc_end) { 5124 ordered->file_offset < alloc_end) {
5053 btrfs_put_ordered_extent(ordered); 5125 btrfs_put_ordered_extent(ordered);
5054 unlock_extent(&BTRFS_I(inode)->io_tree, 5126 unlock_extent(&BTRFS_I(inode)->io_tree,
5055 alloc_start, alloc_end - 1, GFP_NOFS); 5127 alloc_start, locked_end, GFP_NOFS);
5128 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
5129
5130 /*
5131 * we can't wait on the range with the transaction
5132 * running or with the extent lock held
5133 */
5056 btrfs_wait_ordered_range(inode, alloc_start, 5134 btrfs_wait_ordered_range(inode, alloc_start,
5057 alloc_end - alloc_start); 5135 alloc_end - alloc_start);
5058 } else { 5136 } else {
@@ -5070,8 +5148,9 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5070 last_byte = min(extent_map_end(em), alloc_end); 5148 last_byte = min(extent_map_end(em), alloc_end);
5071 last_byte = (last_byte + mask) & ~mask; 5149 last_byte = (last_byte + mask) & ~mask;
5072 if (em->block_start == EXTENT_MAP_HOLE) { 5150 if (em->block_start == EXTENT_MAP_HOLE) {
5073 ret = prealloc_file_range(inode, cur_offset, 5151 ret = prealloc_file_range(trans, inode, cur_offset,
5074 last_byte, alloc_hint, mode); 5152 last_byte, locked_end + 1,
5153 alloc_hint, mode);
5075 if (ret < 0) { 5154 if (ret < 0) {
5076 free_extent_map(em); 5155 free_extent_map(em);
5077 break; 5156 break;
@@ -5087,8 +5166,10 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5087 break; 5166 break;
5088 } 5167 }
5089 } 5168 }
5090 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, 5169 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5091 GFP_NOFS); 5170 GFP_NOFS);
5171
5172 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
5092out: 5173out:
5093 mutex_unlock(&inode->i_mutex); 5174 mutex_unlock(&inode->i_mutex);
5094 return ret; 5175 return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7594bec1be10..5e94ea6e1cbe 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -461,15 +461,9 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
461 if (!capable(CAP_SYS_ADMIN)) 461 if (!capable(CAP_SYS_ADMIN))
462 return -EPERM; 462 return -EPERM;
463 463
464 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); 464 vol_args = memdup_user(arg, sizeof(*vol_args));
465 465 if (IS_ERR(vol_args))
466 if (!vol_args) 466 return PTR_ERR(vol_args);
467 return -ENOMEM;
468
469 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
470 ret = -EFAULT;
471 goto out;
472 }
473 467
474 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 468 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
475 namelen = strlen(vol_args->name); 469 namelen = strlen(vol_args->name);
@@ -483,11 +477,13 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
483 *devstr = '\0'; 477 *devstr = '\0';
484 devstr = vol_args->name; 478 devstr = vol_args->name;
485 devid = simple_strtoull(devstr, &end, 10); 479 devid = simple_strtoull(devstr, &end, 10);
486 printk(KERN_INFO "resizing devid %llu\n", devid); 480 printk(KERN_INFO "resizing devid %llu\n",
481 (unsigned long long)devid);
487 } 482 }
488 device = btrfs_find_device(root, devid, NULL, NULL); 483 device = btrfs_find_device(root, devid, NULL, NULL);
489 if (!device) { 484 if (!device) {
490 printk(KERN_INFO "resizer unable to find device %llu\n", devid); 485 printk(KERN_INFO "resizer unable to find device %llu\n",
486 (unsigned long long)devid);
491 ret = -EINVAL; 487 ret = -EINVAL;
492 goto out_unlock; 488 goto out_unlock;
493 } 489 }
@@ -545,7 +541,6 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
545 541
546out_unlock: 542out_unlock:
547 mutex_unlock(&root->fs_info->volume_mutex); 543 mutex_unlock(&root->fs_info->volume_mutex);
548out:
549 kfree(vol_args); 544 kfree(vol_args);
550 return ret; 545 return ret;
551} 546}
@@ -565,15 +560,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
565 if (root->fs_info->sb->s_flags & MS_RDONLY) 560 if (root->fs_info->sb->s_flags & MS_RDONLY)
566 return -EROFS; 561 return -EROFS;
567 562
568 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); 563 vol_args = memdup_user(arg, sizeof(*vol_args));
569 564 if (IS_ERR(vol_args))
570 if (!vol_args) 565 return PTR_ERR(vol_args);
571 return -ENOMEM;
572
573 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
574 ret = -EFAULT;
575 goto out;
576 }
577 566
578 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 567 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
579 namelen = strlen(vol_args->name); 568 namelen = strlen(vol_args->name);
@@ -675,19 +664,13 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
675 if (!capable(CAP_SYS_ADMIN)) 664 if (!capable(CAP_SYS_ADMIN))
676 return -EPERM; 665 return -EPERM;
677 666
678 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); 667 vol_args = memdup_user(arg, sizeof(*vol_args));
668 if (IS_ERR(vol_args))
669 return PTR_ERR(vol_args);
679 670
680 if (!vol_args)
681 return -ENOMEM;
682
683 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
684 ret = -EFAULT;
685 goto out;
686 }
687 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 671 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
688 ret = btrfs_init_new_device(root, vol_args->name); 672 ret = btrfs_init_new_device(root, vol_args->name);
689 673
690out:
691 kfree(vol_args); 674 kfree(vol_args);
692 return ret; 675 return ret;
693} 676}
@@ -703,19 +686,13 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
703 if (root->fs_info->sb->s_flags & MS_RDONLY) 686 if (root->fs_info->sb->s_flags & MS_RDONLY)
704 return -EROFS; 687 return -EROFS;
705 688
706 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); 689 vol_args = memdup_user(arg, sizeof(*vol_args));
690 if (IS_ERR(vol_args))
691 return PTR_ERR(vol_args);
707 692
708 if (!vol_args)
709 return -ENOMEM;
710
711 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
712 ret = -EFAULT;
713 goto out;
714 }
715 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 693 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
716 ret = btrfs_rm_device(root, vol_args->name); 694 ret = btrfs_rm_device(root, vol_args->name);
717 695
718out:
719 kfree(vol_args); 696 kfree(vol_args);
720 return ret; 697 return ret;
721} 698}
@@ -830,7 +807,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
830 BUG_ON(!trans); 807 BUG_ON(!trans);
831 808
832 /* punch hole in destination first */ 809 /* punch hole in destination first */
833 btrfs_drop_extents(trans, root, inode, off, off+len, 0, &hint_byte); 810 btrfs_drop_extents(trans, root, inode, off, off + len,
811 off + len, 0, &hint_byte);
834 812
835 /* clone data */ 813 /* clone data */
836 key.objectid = src->i_ino; 814 key.objectid = src->i_ino;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 53c87b197d70..d6f0806c682f 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -489,7 +489,7 @@ again:
489 /* start IO across the range first to instantiate any delalloc 489 /* start IO across the range first to instantiate any delalloc
490 * extents 490 * extents
491 */ 491 */
492 btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE); 492 btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL);
493 493
494 /* The compression code will leave pages locked but return from 494 /* The compression code will leave pages locked but return from
495 * writepage without setting the page writeback. Starting again 495 * writepage without setting the page writeback. Starting again
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9744af9d71e9..3536bdb2d7cb 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -68,7 +68,7 @@ enum {
68 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, 68 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
69 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, 69 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
70 Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog, 70 Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog,
71 Opt_flushoncommit, Opt_err, 71 Opt_ratio, Opt_flushoncommit, Opt_err,
72}; 72};
73 73
74static match_table_t tokens = { 74static match_table_t tokens = {
@@ -87,6 +87,7 @@ static match_table_t tokens = {
87 {Opt_noacl, "noacl"}, 87 {Opt_noacl, "noacl"},
88 {Opt_notreelog, "notreelog"}, 88 {Opt_notreelog, "notreelog"},
89 {Opt_flushoncommit, "flushoncommit"}, 89 {Opt_flushoncommit, "flushoncommit"},
90 {Opt_ratio, "metadata_ratio=%d"},
90 {Opt_err, NULL}, 91 {Opt_err, NULL},
91}; 92};
92 93
@@ -195,7 +196,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
195 info->max_extent = max_t(u64, 196 info->max_extent = max_t(u64,
196 info->max_extent, root->sectorsize); 197 info->max_extent, root->sectorsize);
197 printk(KERN_INFO "btrfs: max_extent at %llu\n", 198 printk(KERN_INFO "btrfs: max_extent at %llu\n",
198 info->max_extent); 199 (unsigned long long)info->max_extent);
199 } 200 }
200 break; 201 break;
201 case Opt_max_inline: 202 case Opt_max_inline:
@@ -210,7 +211,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
210 root->sectorsize); 211 root->sectorsize);
211 } 212 }
212 printk(KERN_INFO "btrfs: max_inline at %llu\n", 213 printk(KERN_INFO "btrfs: max_inline at %llu\n",
213 info->max_inline); 214 (unsigned long long)info->max_inline);
214 } 215 }
215 break; 216 break;
216 case Opt_alloc_start: 217 case Opt_alloc_start:
@@ -220,7 +221,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
220 kfree(num); 221 kfree(num);
221 printk(KERN_INFO 222 printk(KERN_INFO
222 "btrfs: allocations start at %llu\n", 223 "btrfs: allocations start at %llu\n",
223 info->alloc_start); 224 (unsigned long long)info->alloc_start);
224 } 225 }
225 break; 226 break;
226 case Opt_noacl: 227 case Opt_noacl:
@@ -234,6 +235,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
234 printk(KERN_INFO "btrfs: turning on flush-on-commit\n"); 235 printk(KERN_INFO "btrfs: turning on flush-on-commit\n");
235 btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT); 236 btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT);
236 break; 237 break;
238 case Opt_ratio:
239 intarg = 0;
240 match_int(&args[0], &intarg);
241 if (intarg) {
242 info->metadata_ratio = intarg;
243 printk(KERN_INFO "btrfs: metadata ratio %d\n",
244 info->metadata_ratio);
245 }
246 break;
237 default: 247 default:
238 break; 248 break;
239 } 249 }
@@ -410,11 +420,14 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
410 if (btrfs_test_opt(root, NOBARRIER)) 420 if (btrfs_test_opt(root, NOBARRIER))
411 seq_puts(seq, ",nobarrier"); 421 seq_puts(seq, ",nobarrier");
412 if (info->max_extent != (u64)-1) 422 if (info->max_extent != (u64)-1)
413 seq_printf(seq, ",max_extent=%llu", info->max_extent); 423 seq_printf(seq, ",max_extent=%llu",
424 (unsigned long long)info->max_extent);
414 if (info->max_inline != 8192 * 1024) 425 if (info->max_inline != 8192 * 1024)
415 seq_printf(seq, ",max_inline=%llu", info->max_inline); 426 seq_printf(seq, ",max_inline=%llu",
427 (unsigned long long)info->max_inline);
416 if (info->alloc_start != 0) 428 if (info->alloc_start != 0)
417 seq_printf(seq, ",alloc_start=%llu", info->alloc_start); 429 seq_printf(seq, ",alloc_start=%llu",
430 (unsigned long long)info->alloc_start);
418 if (info->thread_pool_size != min_t(unsigned long, 431 if (info->thread_pool_size != min_t(unsigned long,
419 num_online_cpus() + 2, 8)) 432 num_online_cpus() + 2, 8))
420 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); 433 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
@@ -635,14 +648,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
635 if (!capable(CAP_SYS_ADMIN)) 648 if (!capable(CAP_SYS_ADMIN))
636 return -EPERM; 649 return -EPERM;
637 650
638 vol = kmalloc(sizeof(*vol), GFP_KERNEL); 651 vol = memdup_user((void __user *)arg, sizeof(*vol));
639 if (!vol) 652 if (IS_ERR(vol))
640 return -ENOMEM; 653 return PTR_ERR(vol);
641
642 if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) {
643 ret = -EFAULT;
644 goto out;
645 }
646 654
647 switch (cmd) { 655 switch (cmd) {
648 case BTRFS_IOC_SCAN_DEV: 656 case BTRFS_IOC_SCAN_DEV:
@@ -650,7 +658,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
650 &btrfs_fs_type, &fs_devices); 658 &btrfs_fs_type, &fs_devices);
651 break; 659 break;
652 } 660 }
653out: 661
654 kfree(vol); 662 kfree(vol);
655 return ret; 663 return ret;
656} 664}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2869b3361eb6..01b143605ec1 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -687,7 +687,13 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
687 prepare_to_wait(&info->transaction_wait, &wait, 687 prepare_to_wait(&info->transaction_wait, &wait,
688 TASK_UNINTERRUPTIBLE); 688 TASK_UNINTERRUPTIBLE);
689 mutex_unlock(&info->trans_mutex); 689 mutex_unlock(&info->trans_mutex);
690
691 atomic_dec(&info->throttles);
692 wake_up(&info->transaction_throttle);
693
690 schedule(); 694 schedule();
695
696 atomic_inc(&info->throttles);
691 mutex_lock(&info->trans_mutex); 697 mutex_lock(&info->trans_mutex);
692 finish_wait(&info->transaction_wait, &wait); 698 finish_wait(&info->transaction_wait, &wait);
693 } 699 }
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 25f20ea11f27..db5e212e8445 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -536,7 +536,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
536 saved_nbytes = inode_get_bytes(inode); 536 saved_nbytes = inode_get_bytes(inode);
537 /* drop any overlapping extents */ 537 /* drop any overlapping extents */
538 ret = btrfs_drop_extents(trans, root, inode, 538 ret = btrfs_drop_extents(trans, root, inode,
539 start, extent_end, start, &alloc_hint); 539 start, extent_end, extent_end, start, &alloc_hint);
540 BUG_ON(ret); 540 BUG_ON(ret);
541 541
542 if (found_type == BTRFS_FILE_EXTENT_REG || 542 if (found_type == BTRFS_FILE_EXTENT_REG ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e0913e469728..5f01dad4b696 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -125,6 +125,20 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
125 return NULL; 125 return NULL;
126} 126}
127 127
128static void requeue_list(struct btrfs_pending_bios *pending_bios,
129 struct bio *head, struct bio *tail)
130{
131
132 struct bio *old_head;
133
134 old_head = pending_bios->head;
135 pending_bios->head = head;
136 if (pending_bios->tail)
137 tail->bi_next = old_head;
138 else
139 pending_bios->tail = tail;
140}
141
128/* 142/*
129 * we try to collect pending bios for a device so we don't get a large 143 * we try to collect pending bios for a device so we don't get a large
130 * number of procs sending bios down to the same device. This greatly 144 * number of procs sending bios down to the same device. This greatly
@@ -141,10 +155,12 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
141 struct bio *pending; 155 struct bio *pending;
142 struct backing_dev_info *bdi; 156 struct backing_dev_info *bdi;
143 struct btrfs_fs_info *fs_info; 157 struct btrfs_fs_info *fs_info;
158 struct btrfs_pending_bios *pending_bios;
144 struct bio *tail; 159 struct bio *tail;
145 struct bio *cur; 160 struct bio *cur;
146 int again = 0; 161 int again = 0;
147 unsigned long num_run = 0; 162 unsigned long num_run;
163 unsigned long num_sync_run;
148 unsigned long limit; 164 unsigned long limit;
149 unsigned long last_waited = 0; 165 unsigned long last_waited = 0;
150 166
@@ -153,20 +169,30 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
153 limit = btrfs_async_submit_limit(fs_info); 169 limit = btrfs_async_submit_limit(fs_info);
154 limit = limit * 2 / 3; 170 limit = limit * 2 / 3;
155 171
172 /* we want to make sure that every time we switch from the sync
173 * list to the normal list, we unplug
174 */
175 num_sync_run = 0;
176
156loop: 177loop:
157 spin_lock(&device->io_lock); 178 spin_lock(&device->io_lock);
179 num_run = 0;
158 180
159loop_lock: 181loop_lock:
182
160 /* take all the bios off the list at once and process them 183 /* take all the bios off the list at once and process them
161 * later on (without the lock held). But, remember the 184 * later on (without the lock held). But, remember the
162 * tail and other pointers so the bios can be properly reinserted 185 * tail and other pointers so the bios can be properly reinserted
163 * into the list if we hit congestion 186 * into the list if we hit congestion
164 */ 187 */
165 pending = device->pending_bios; 188 if (device->pending_sync_bios.head)
166 tail = device->pending_bio_tail; 189 pending_bios = &device->pending_sync_bios;
190 else
191 pending_bios = &device->pending_bios;
192
193 pending = pending_bios->head;
194 tail = pending_bios->tail;
167 WARN_ON(pending && !tail); 195 WARN_ON(pending && !tail);
168 device->pending_bios = NULL;
169 device->pending_bio_tail = NULL;
170 196
171 /* 197 /*
172 * if pending was null this time around, no bios need processing 198 * if pending was null this time around, no bios need processing
@@ -176,16 +202,41 @@ loop_lock:
176 * device->running_pending is used to synchronize with the 202 * device->running_pending is used to synchronize with the
177 * schedule_bio code. 203 * schedule_bio code.
178 */ 204 */
179 if (pending) { 205 if (device->pending_sync_bios.head == NULL &&
180 again = 1; 206 device->pending_bios.head == NULL) {
181 device->running_pending = 1;
182 } else {
183 again = 0; 207 again = 0;
184 device->running_pending = 0; 208 device->running_pending = 0;
209 } else {
210 again = 1;
211 device->running_pending = 1;
185 } 212 }
213
214 pending_bios->head = NULL;
215 pending_bios->tail = NULL;
216
186 spin_unlock(&device->io_lock); 217 spin_unlock(&device->io_lock);
187 218
219 /*
220 * if we're doing the regular priority list, make sure we unplug
221 * for any high prio bios we've sent down
222 */
223 if (pending_bios == &device->pending_bios && num_sync_run > 0) {
224 num_sync_run = 0;
225 blk_run_backing_dev(bdi, NULL);
226 }
227
188 while (pending) { 228 while (pending) {
229
230 rmb();
231 if (pending_bios != &device->pending_sync_bios &&
232 device->pending_sync_bios.head &&
233 num_run > 16) {
234 cond_resched();
235 spin_lock(&device->io_lock);
236 requeue_list(pending_bios, pending, tail);
237 goto loop_lock;
238 }
239
189 cur = pending; 240 cur = pending;
190 pending = pending->bi_next; 241 pending = pending->bi_next;
191 cur->bi_next = NULL; 242 cur->bi_next = NULL;
@@ -196,10 +247,18 @@ loop_lock:
196 wake_up(&fs_info->async_submit_wait); 247 wake_up(&fs_info->async_submit_wait);
197 248
198 BUG_ON(atomic_read(&cur->bi_cnt) == 0); 249 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
199 bio_get(cur);
200 submit_bio(cur->bi_rw, cur); 250 submit_bio(cur->bi_rw, cur);
201 bio_put(cur);
202 num_run++; 251 num_run++;
252 if (bio_sync(cur))
253 num_sync_run++;
254
255 if (need_resched()) {
256 if (num_sync_run) {
257 blk_run_backing_dev(bdi, NULL);
258 num_sync_run = 0;
259 }
260 cond_resched();
261 }
203 262
204 /* 263 /*
205 * we made progress, there is more work to do and the bdi 264 * we made progress, there is more work to do and the bdi
@@ -208,7 +267,6 @@ loop_lock:
208 */ 267 */
209 if (pending && bdi_write_congested(bdi) && num_run > 16 && 268 if (pending && bdi_write_congested(bdi) && num_run > 16 &&
210 fs_info->fs_devices->open_devices > 1) { 269 fs_info->fs_devices->open_devices > 1) {
211 struct bio *old_head;
212 struct io_context *ioc; 270 struct io_context *ioc;
213 271
214 ioc = current->io_context; 272 ioc = current->io_context;
@@ -233,17 +291,17 @@ loop_lock:
233 * against it before looping 291 * against it before looping
234 */ 292 */
235 last_waited = ioc->last_waited; 293 last_waited = ioc->last_waited;
294 if (need_resched()) {
295 if (num_sync_run) {
296 blk_run_backing_dev(bdi, NULL);
297 num_sync_run = 0;
298 }
299 cond_resched();
300 }
236 continue; 301 continue;
237 } 302 }
238 spin_lock(&device->io_lock); 303 spin_lock(&device->io_lock);
239 304 requeue_list(pending_bios, pending, tail);
240 old_head = device->pending_bios;
241 device->pending_bios = pending;
242 if (device->pending_bio_tail)
243 tail->bi_next = old_head;
244 else
245 device->pending_bio_tail = tail;
246
247 device->running_pending = 1; 305 device->running_pending = 1;
248 306
249 spin_unlock(&device->io_lock); 307 spin_unlock(&device->io_lock);
@@ -251,11 +309,18 @@ loop_lock:
251 goto done; 309 goto done;
252 } 310 }
253 } 311 }
312
313 if (num_sync_run) {
314 num_sync_run = 0;
315 blk_run_backing_dev(bdi, NULL);
316 }
317
318 cond_resched();
254 if (again) 319 if (again)
255 goto loop; 320 goto loop;
256 321
257 spin_lock(&device->io_lock); 322 spin_lock(&device->io_lock);
258 if (device->pending_bios) 323 if (device->pending_bios.head || device->pending_sync_bios.head)
259 goto loop_lock; 324 goto loop_lock;
260 spin_unlock(&device->io_lock); 325 spin_unlock(&device->io_lock);
261 326
@@ -1478,7 +1543,7 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
1478 btrfs_set_device_io_align(leaf, dev_item, device->io_align); 1543 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1479 btrfs_set_device_io_width(leaf, dev_item, device->io_width); 1544 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1480 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); 1545 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
1481 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); 1546 btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes);
1482 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); 1547 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
1483 btrfs_mark_buffer_dirty(leaf); 1548 btrfs_mark_buffer_dirty(leaf);
1484 1549
@@ -1875,14 +1940,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1875 device->total_bytes = new_size; 1940 device->total_bytes = new_size;
1876 if (device->writeable) 1941 if (device->writeable)
1877 device->fs_devices->total_rw_bytes -= diff; 1942 device->fs_devices->total_rw_bytes -= diff;
1878 ret = btrfs_update_device(trans, device);
1879 if (ret) {
1880 unlock_chunks(root);
1881 btrfs_end_transaction(trans, root);
1882 goto done;
1883 }
1884 WARN_ON(diff > old_total);
1885 btrfs_set_super_total_bytes(super_copy, old_total - diff);
1886 unlock_chunks(root); 1943 unlock_chunks(root);
1887 btrfs_end_transaction(trans, root); 1944 btrfs_end_transaction(trans, root);
1888 1945
@@ -1914,7 +1971,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1914 length = btrfs_dev_extent_length(l, dev_extent); 1971 length = btrfs_dev_extent_length(l, dev_extent);
1915 1972
1916 if (key.offset + length <= new_size) 1973 if (key.offset + length <= new_size)
1917 goto done; 1974 break;
1918 1975
1919 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); 1976 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1920 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); 1977 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
@@ -1927,6 +1984,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1927 goto done; 1984 goto done;
1928 } 1985 }
1929 1986
1987 /* Shrinking succeeded, else we would be at "done". */
1988 trans = btrfs_start_transaction(root, 1);
1989 if (!trans) {
1990 ret = -ENOMEM;
1991 goto done;
1992 }
1993 lock_chunks(root);
1994
1995 device->disk_total_bytes = new_size;
1996 /* Now btrfs_update_device() will change the on-disk size. */
1997 ret = btrfs_update_device(trans, device);
1998 if (ret) {
1999 unlock_chunks(root);
2000 btrfs_end_transaction(trans, root);
2001 goto done;
2002 }
2003 WARN_ON(diff > old_total);
2004 btrfs_set_super_total_bytes(super_copy, old_total - diff);
2005 unlock_chunks(root);
2006 btrfs_end_transaction(trans, root);
1930done: 2007done:
1931 btrfs_free_path(path); 2008 btrfs_free_path(path);
1932 return ret; 2009 return ret;
@@ -2497,7 +2574,7 @@ again:
2497 max_errors = 1; 2574 max_errors = 1;
2498 } 2575 }
2499 } 2576 }
2500 if (multi_ret && rw == WRITE && 2577 if (multi_ret && (rw & (1 << BIO_RW)) &&
2501 stripes_allocated < stripes_required) { 2578 stripes_allocated < stripes_required) {
2502 stripes_allocated = map->num_stripes; 2579 stripes_allocated = map->num_stripes;
2503 free_extent_map(em); 2580 free_extent_map(em);
@@ -2762,6 +2839,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
2762 int rw, struct bio *bio) 2839 int rw, struct bio *bio)
2763{ 2840{
2764 int should_queue = 1; 2841 int should_queue = 1;
2842 struct btrfs_pending_bios *pending_bios;
2765 2843
2766 /* don't bother with additional async steps for reads, right now */ 2844 /* don't bother with additional async steps for reads, right now */
2767 if (!(rw & (1 << BIO_RW))) { 2845 if (!(rw & (1 << BIO_RW))) {
@@ -2783,13 +2861,17 @@ static noinline int schedule_bio(struct btrfs_root *root,
2783 bio->bi_rw |= rw; 2861 bio->bi_rw |= rw;
2784 2862
2785 spin_lock(&device->io_lock); 2863 spin_lock(&device->io_lock);
2864 if (bio_sync(bio))
2865 pending_bios = &device->pending_sync_bios;
2866 else
2867 pending_bios = &device->pending_bios;
2786 2868
2787 if (device->pending_bio_tail) 2869 if (pending_bios->tail)
2788 device->pending_bio_tail->bi_next = bio; 2870 pending_bios->tail->bi_next = bio;
2789 2871
2790 device->pending_bio_tail = bio; 2872 pending_bios->tail = bio;
2791 if (!device->pending_bios) 2873 if (!pending_bios->head)
2792 device->pending_bios = bio; 2874 pending_bios->head = bio;
2793 if (device->running_pending) 2875 if (device->running_pending)
2794 should_queue = 0; 2876 should_queue = 0;
2795 2877
@@ -3006,7 +3088,8 @@ static int fill_device_from_item(struct extent_buffer *leaf,
3006 unsigned long ptr; 3088 unsigned long ptr;
3007 3089
3008 device->devid = btrfs_device_id(leaf, dev_item); 3090 device->devid = btrfs_device_id(leaf, dev_item);
3009 device->total_bytes = btrfs_device_total_bytes(leaf, dev_item); 3091 device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
3092 device->total_bytes = device->disk_total_bytes;
3010 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); 3093 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
3011 device->type = btrfs_device_type(leaf, dev_item); 3094 device->type = btrfs_device_type(leaf, dev_item);
3012 device->io_align = btrfs_device_io_align(leaf, dev_item); 3095 device->io_align = btrfs_device_io_align(leaf, dev_item);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2185de72ff7d..5c3ff6d02fd7 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -23,13 +23,22 @@
23#include "async-thread.h" 23#include "async-thread.h"
24 24
25struct buffer_head; 25struct buffer_head;
26struct btrfs_pending_bios {
27 struct bio *head;
28 struct bio *tail;
29};
30
26struct btrfs_device { 31struct btrfs_device {
27 struct list_head dev_list; 32 struct list_head dev_list;
28 struct list_head dev_alloc_list; 33 struct list_head dev_alloc_list;
29 struct btrfs_fs_devices *fs_devices; 34 struct btrfs_fs_devices *fs_devices;
30 struct btrfs_root *dev_root; 35 struct btrfs_root *dev_root;
31 struct bio *pending_bios; 36
32 struct bio *pending_bio_tail; 37 /* regular prio bios */
38 struct btrfs_pending_bios pending_bios;
39 /* WRITE_SYNC bios */
40 struct btrfs_pending_bios pending_sync_bios;
41
33 int running_pending; 42 int running_pending;
34 u64 generation; 43 u64 generation;
35 44
@@ -52,6 +61,9 @@ struct btrfs_device {
52 /* size of the device */ 61 /* size of the device */
53 u64 total_bytes; 62 u64 total_bytes;
54 63
64 /* size of the disk */
65 u64 disk_total_bytes;
66
55 /* bytes used */ 67 /* bytes used */
56 u64 bytes_used; 68 u64 bytes_used;
57 69