diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/Makefile | 19 | ||||
-rw-r--r-- | fs/btrfs/acl.c | 18 | ||||
-rw-r--r-- | fs/btrfs/async-thread.c | 60 | ||||
-rw-r--r-- | fs/btrfs/async-thread.h | 2 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 17 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 6 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 102 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 49 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 167 | ||||
-rw-r--r-- | fs/btrfs/extent_map.c | 17 | ||||
-rw-r--r-- | fs/btrfs/file.c | 95 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 15 | ||||
-rw-r--r-- | fs/btrfs/inode-map.c | 2 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 183 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 58 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 2 | ||||
-rw-r--r-- | fs/btrfs/super.c | 40 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 6 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 2 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 159 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 16 |
21 files changed, 569 insertions, 466 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 9adf5e4f7e96..94212844a9bc 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -1,25 +1,10 @@ | |||
1 | ifneq ($(KERNELRELEASE),) | ||
2 | # kbuild part of makefile | ||
3 | 1 | ||
4 | obj-$(CONFIG_BTRFS_FS) := btrfs.o | 2 | obj-$(CONFIG_BTRFS_FS) := btrfs.o |
5 | btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | 3 | |
4 | btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | ||
6 | file-item.o inode-item.o inode-map.o disk-io.o \ | 5 | file-item.o inode-item.o inode-map.o disk-io.o \ |
7 | transaction.o inode.o file.o tree-defrag.o \ | 6 | transaction.o inode.o file.o tree-defrag.o \ |
8 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
9 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
10 | ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ | 9 | ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ |
11 | compression.o delayed-ref.o | 10 | compression.o delayed-ref.o |
12 | else | ||
13 | |||
14 | # Normal Makefile | ||
15 | |||
16 | KERNELDIR := /lib/modules/`uname -r`/build | ||
17 | all: | ||
18 | $(MAKE) -C $(KERNELDIR) M=`pwd` CONFIG_BTRFS_FS=m modules | ||
19 | |||
20 | modules_install: | ||
21 | $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install | ||
22 | clean: | ||
23 | $(MAKE) -C $(KERNELDIR) M=`pwd` clean | ||
24 | |||
25 | endif | ||
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 7fdd184a528d..cbba000dccbe 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -60,15 +60,20 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
60 | return ERR_PTR(-EINVAL); | 60 | return ERR_PTR(-EINVAL); |
61 | } | 61 | } |
62 | 62 | ||
63 | /* Handle the cached NULL acl case without locking */ | ||
64 | acl = ACCESS_ONCE(*p_acl); | ||
65 | if (!acl) | ||
66 | return acl; | ||
67 | |||
63 | spin_lock(&inode->i_lock); | 68 | spin_lock(&inode->i_lock); |
64 | if (*p_acl != BTRFS_ACL_NOT_CACHED) | 69 | acl = *p_acl; |
65 | acl = posix_acl_dup(*p_acl); | 70 | if (acl != BTRFS_ACL_NOT_CACHED) |
71 | acl = posix_acl_dup(acl); | ||
66 | spin_unlock(&inode->i_lock); | 72 | spin_unlock(&inode->i_lock); |
67 | 73 | ||
68 | if (acl) | 74 | if (acl != BTRFS_ACL_NOT_CACHED) |
69 | return acl; | 75 | return acl; |
70 | 76 | ||
71 | |||
72 | size = __btrfs_getxattr(inode, name, "", 0); | 77 | size = __btrfs_getxattr(inode, name, "", 0); |
73 | if (size > 0) { | 78 | if (size > 0) { |
74 | value = kzalloc(size, GFP_NOFS); | 79 | value = kzalloc(size, GFP_NOFS); |
@@ -80,9 +85,12 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
80 | btrfs_update_cached_acl(inode, p_acl, acl); | 85 | btrfs_update_cached_acl(inode, p_acl, acl); |
81 | } | 86 | } |
82 | kfree(value); | 87 | kfree(value); |
83 | } else if (size == -ENOENT) { | 88 | } else if (size == -ENOENT || size == -ENODATA || size == 0) { |
89 | /* FIXME, who returns -ENOENT? I think nobody */ | ||
84 | acl = NULL; | 90 | acl = NULL; |
85 | btrfs_update_cached_acl(inode, p_acl, acl); | 91 | btrfs_update_cached_acl(inode, p_acl, acl); |
92 | } else { | ||
93 | acl = ERR_PTR(-EIO); | ||
86 | } | 94 | } |
87 | 95 | ||
88 | return acl; | 96 | return acl; |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 51bfdfc8fcda..502c3d61de62 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #define WORK_QUEUED_BIT 0 | 25 | #define WORK_QUEUED_BIT 0 |
26 | #define WORK_DONE_BIT 1 | 26 | #define WORK_DONE_BIT 1 |
27 | #define WORK_ORDER_DONE_BIT 2 | 27 | #define WORK_ORDER_DONE_BIT 2 |
28 | #define WORK_HIGH_PRIO_BIT 3 | ||
28 | 29 | ||
29 | /* | 30 | /* |
30 | * container for the kthread task pointer and the list of pending work | 31 | * container for the kthread task pointer and the list of pending work |
@@ -36,6 +37,7 @@ struct btrfs_worker_thread { | |||
36 | 37 | ||
37 | /* list of struct btrfs_work that are waiting for service */ | 38 | /* list of struct btrfs_work that are waiting for service */ |
38 | struct list_head pending; | 39 | struct list_head pending; |
40 | struct list_head prio_pending; | ||
39 | 41 | ||
40 | /* list of worker threads from struct btrfs_workers */ | 42 | /* list of worker threads from struct btrfs_workers */ |
41 | struct list_head worker_list; | 43 | struct list_head worker_list; |
@@ -103,10 +105,16 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers, | |||
103 | 105 | ||
104 | spin_lock_irqsave(&workers->lock, flags); | 106 | spin_lock_irqsave(&workers->lock, flags); |
105 | 107 | ||
106 | while (!list_empty(&workers->order_list)) { | 108 | while (1) { |
107 | work = list_entry(workers->order_list.next, | 109 | if (!list_empty(&workers->prio_order_list)) { |
108 | struct btrfs_work, order_list); | 110 | work = list_entry(workers->prio_order_list.next, |
109 | 111 | struct btrfs_work, order_list); | |
112 | } else if (!list_empty(&workers->order_list)) { | ||
113 | work = list_entry(workers->order_list.next, | ||
114 | struct btrfs_work, order_list); | ||
115 | } else { | ||
116 | break; | ||
117 | } | ||
110 | if (!test_bit(WORK_DONE_BIT, &work->flags)) | 118 | if (!test_bit(WORK_DONE_BIT, &work->flags)) |
111 | break; | 119 | break; |
112 | 120 | ||
@@ -143,8 +151,14 @@ static int worker_loop(void *arg) | |||
143 | do { | 151 | do { |
144 | spin_lock_irq(&worker->lock); | 152 | spin_lock_irq(&worker->lock); |
145 | again_locked: | 153 | again_locked: |
146 | while (!list_empty(&worker->pending)) { | 154 | while (1) { |
147 | cur = worker->pending.next; | 155 | if (!list_empty(&worker->prio_pending)) |
156 | cur = worker->prio_pending.next; | ||
157 | else if (!list_empty(&worker->pending)) | ||
158 | cur = worker->pending.next; | ||
159 | else | ||
160 | break; | ||
161 | |||
148 | work = list_entry(cur, struct btrfs_work, list); | 162 | work = list_entry(cur, struct btrfs_work, list); |
149 | list_del(&work->list); | 163 | list_del(&work->list); |
150 | clear_bit(WORK_QUEUED_BIT, &work->flags); | 164 | clear_bit(WORK_QUEUED_BIT, &work->flags); |
@@ -163,7 +177,6 @@ again_locked: | |||
163 | 177 | ||
164 | spin_lock_irq(&worker->lock); | 178 | spin_lock_irq(&worker->lock); |
165 | check_idle_worker(worker); | 179 | check_idle_worker(worker); |
166 | |||
167 | } | 180 | } |
168 | if (freezing(current)) { | 181 | if (freezing(current)) { |
169 | worker->working = 0; | 182 | worker->working = 0; |
@@ -178,7 +191,8 @@ again_locked: | |||
178 | * jump_in? | 191 | * jump_in? |
179 | */ | 192 | */ |
180 | smp_mb(); | 193 | smp_mb(); |
181 | if (!list_empty(&worker->pending)) | 194 | if (!list_empty(&worker->pending) || |
195 | !list_empty(&worker->prio_pending)) | ||
182 | continue; | 196 | continue; |
183 | 197 | ||
184 | /* | 198 | /* |
@@ -191,7 +205,8 @@ again_locked: | |||
191 | */ | 205 | */ |
192 | schedule_timeout(1); | 206 | schedule_timeout(1); |
193 | smp_mb(); | 207 | smp_mb(); |
194 | if (!list_empty(&worker->pending)) | 208 | if (!list_empty(&worker->pending) || |
209 | !list_empty(&worker->prio_pending)) | ||
195 | continue; | 210 | continue; |
196 | 211 | ||
197 | if (kthread_should_stop()) | 212 | if (kthread_should_stop()) |
@@ -200,7 +215,8 @@ again_locked: | |||
200 | /* still no more work?, sleep for real */ | 215 | /* still no more work?, sleep for real */ |
201 | spin_lock_irq(&worker->lock); | 216 | spin_lock_irq(&worker->lock); |
202 | set_current_state(TASK_INTERRUPTIBLE); | 217 | set_current_state(TASK_INTERRUPTIBLE); |
203 | if (!list_empty(&worker->pending)) | 218 | if (!list_empty(&worker->pending) || |
219 | !list_empty(&worker->prio_pending)) | ||
204 | goto again_locked; | 220 | goto again_locked; |
205 | 221 | ||
206 | /* | 222 | /* |
@@ -248,6 +264,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max) | |||
248 | INIT_LIST_HEAD(&workers->worker_list); | 264 | INIT_LIST_HEAD(&workers->worker_list); |
249 | INIT_LIST_HEAD(&workers->idle_list); | 265 | INIT_LIST_HEAD(&workers->idle_list); |
250 | INIT_LIST_HEAD(&workers->order_list); | 266 | INIT_LIST_HEAD(&workers->order_list); |
267 | INIT_LIST_HEAD(&workers->prio_order_list); | ||
251 | spin_lock_init(&workers->lock); | 268 | spin_lock_init(&workers->lock); |
252 | workers->max_workers = max; | 269 | workers->max_workers = max; |
253 | workers->idle_thresh = 32; | 270 | workers->idle_thresh = 32; |
@@ -273,6 +290,7 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | |||
273 | } | 290 | } |
274 | 291 | ||
275 | INIT_LIST_HEAD(&worker->pending); | 292 | INIT_LIST_HEAD(&worker->pending); |
293 | INIT_LIST_HEAD(&worker->prio_pending); | ||
276 | INIT_LIST_HEAD(&worker->worker_list); | 294 | INIT_LIST_HEAD(&worker->worker_list); |
277 | spin_lock_init(&worker->lock); | 295 | spin_lock_init(&worker->lock); |
278 | atomic_set(&worker->num_pending, 0); | 296 | atomic_set(&worker->num_pending, 0); |
@@ -396,7 +414,10 @@ int btrfs_requeue_work(struct btrfs_work *work) | |||
396 | goto out; | 414 | goto out; |
397 | 415 | ||
398 | spin_lock_irqsave(&worker->lock, flags); | 416 | spin_lock_irqsave(&worker->lock, flags); |
399 | list_add_tail(&work->list, &worker->pending); | 417 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) |
418 | list_add_tail(&work->list, &worker->prio_pending); | ||
419 | else | ||
420 | list_add_tail(&work->list, &worker->pending); | ||
400 | atomic_inc(&worker->num_pending); | 421 | atomic_inc(&worker->num_pending); |
401 | 422 | ||
402 | /* by definition we're busy, take ourselves off the idle | 423 | /* by definition we're busy, take ourselves off the idle |
@@ -422,6 +443,11 @@ out: | |||
422 | return 0; | 443 | return 0; |
423 | } | 444 | } |
424 | 445 | ||
446 | void btrfs_set_work_high_prio(struct btrfs_work *work) | ||
447 | { | ||
448 | set_bit(WORK_HIGH_PRIO_BIT, &work->flags); | ||
449 | } | ||
450 | |||
425 | /* | 451 | /* |
426 | * places a struct btrfs_work into the pending queue of one of the kthreads | 452 | * places a struct btrfs_work into the pending queue of one of the kthreads |
427 | */ | 453 | */ |
@@ -438,7 +464,12 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
438 | worker = find_worker(workers); | 464 | worker = find_worker(workers); |
439 | if (workers->ordered) { | 465 | if (workers->ordered) { |
440 | spin_lock_irqsave(&workers->lock, flags); | 466 | spin_lock_irqsave(&workers->lock, flags); |
441 | list_add_tail(&work->order_list, &workers->order_list); | 467 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { |
468 | list_add_tail(&work->order_list, | ||
469 | &workers->prio_order_list); | ||
470 | } else { | ||
471 | list_add_tail(&work->order_list, &workers->order_list); | ||
472 | } | ||
442 | spin_unlock_irqrestore(&workers->lock, flags); | 473 | spin_unlock_irqrestore(&workers->lock, flags); |
443 | } else { | 474 | } else { |
444 | INIT_LIST_HEAD(&work->order_list); | 475 | INIT_LIST_HEAD(&work->order_list); |
@@ -446,7 +477,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
446 | 477 | ||
447 | spin_lock_irqsave(&worker->lock, flags); | 478 | spin_lock_irqsave(&worker->lock, flags); |
448 | 479 | ||
449 | list_add_tail(&work->list, &worker->pending); | 480 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) |
481 | list_add_tail(&work->list, &worker->prio_pending); | ||
482 | else | ||
483 | list_add_tail(&work->list, &worker->pending); | ||
450 | atomic_inc(&worker->num_pending); | 484 | atomic_inc(&worker->num_pending); |
451 | check_busy_worker(worker); | 485 | check_busy_worker(worker); |
452 | 486 | ||
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 31be4ed8b63e..1b511c109db6 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h | |||
@@ -85,6 +85,7 @@ struct btrfs_workers { | |||
85 | * of work items waiting for completion | 85 | * of work items waiting for completion |
86 | */ | 86 | */ |
87 | struct list_head order_list; | 87 | struct list_head order_list; |
88 | struct list_head prio_order_list; | ||
88 | 89 | ||
89 | /* lock for finding the next worker thread to queue on */ | 90 | /* lock for finding the next worker thread to queue on */ |
90 | spinlock_t lock; | 91 | spinlock_t lock; |
@@ -98,4 +99,5 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); | |||
98 | int btrfs_stop_workers(struct btrfs_workers *workers); | 99 | int btrfs_stop_workers(struct btrfs_workers *workers); |
99 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max); | 100 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max); |
100 | int btrfs_requeue_work(struct btrfs_work *work); | 101 | int btrfs_requeue_work(struct btrfs_work *work); |
102 | void btrfs_set_work_high_prio(struct btrfs_work *work); | ||
101 | #endif | 103 | #endif |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e5b2533b691a..a99f1c2a710d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -1325,12 +1325,12 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
1325 | int ret = 0; | 1325 | int ret = 0; |
1326 | int blocksize; | 1326 | int blocksize; |
1327 | 1327 | ||
1328 | parent = path->nodes[level - 1]; | 1328 | parent = path->nodes[level + 1]; |
1329 | if (!parent) | 1329 | if (!parent) |
1330 | return 0; | 1330 | return 0; |
1331 | 1331 | ||
1332 | nritems = btrfs_header_nritems(parent); | 1332 | nritems = btrfs_header_nritems(parent); |
1333 | slot = path->slots[level]; | 1333 | slot = path->slots[level + 1]; |
1334 | blocksize = btrfs_level_size(root, level); | 1334 | blocksize = btrfs_level_size(root, level); |
1335 | 1335 | ||
1336 | if (slot > 0) { | 1336 | if (slot > 0) { |
@@ -1341,7 +1341,7 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
1341 | block1 = 0; | 1341 | block1 = 0; |
1342 | free_extent_buffer(eb); | 1342 | free_extent_buffer(eb); |
1343 | } | 1343 | } |
1344 | if (slot < nritems) { | 1344 | if (slot + 1 < nritems) { |
1345 | block2 = btrfs_node_blockptr(parent, slot + 1); | 1345 | block2 = btrfs_node_blockptr(parent, slot + 1); |
1346 | gen = btrfs_node_ptr_generation(parent, slot + 1); | 1346 | gen = btrfs_node_ptr_generation(parent, slot + 1); |
1347 | eb = btrfs_find_tree_block(root, block2, blocksize); | 1347 | eb = btrfs_find_tree_block(root, block2, blocksize); |
@@ -1351,7 +1351,11 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
1351 | } | 1351 | } |
1352 | if (block1 || block2) { | 1352 | if (block1 || block2) { |
1353 | ret = -EAGAIN; | 1353 | ret = -EAGAIN; |
1354 | |||
1355 | /* release the whole path */ | ||
1354 | btrfs_release_path(root, path); | 1356 | btrfs_release_path(root, path); |
1357 | |||
1358 | /* read the blocks */ | ||
1355 | if (block1) | 1359 | if (block1) |
1356 | readahead_tree_block(root, block1, blocksize, 0); | 1360 | readahead_tree_block(root, block1, blocksize, 0); |
1357 | if (block2) | 1361 | if (block2) |
@@ -1361,7 +1365,7 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
1361 | eb = read_tree_block(root, block1, blocksize, 0); | 1365 | eb = read_tree_block(root, block1, blocksize, 0); |
1362 | free_extent_buffer(eb); | 1366 | free_extent_buffer(eb); |
1363 | } | 1367 | } |
1364 | if (block1) { | 1368 | if (block2) { |
1365 | eb = read_tree_block(root, block2, blocksize, 0); | 1369 | eb = read_tree_block(root, block2, blocksize, 0); |
1366 | free_extent_buffer(eb); | 1370 | free_extent_buffer(eb); |
1367 | } | 1371 | } |
@@ -1481,12 +1485,15 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1481 | * of the btree by dropping locks before | 1485 | * of the btree by dropping locks before |
1482 | * we read. | 1486 | * we read. |
1483 | */ | 1487 | */ |
1484 | btrfs_release_path(NULL, p); | 1488 | btrfs_unlock_up_safe(p, level + 1); |
1489 | btrfs_set_path_blocking(p); | ||
1490 | |||
1485 | if (tmp) | 1491 | if (tmp) |
1486 | free_extent_buffer(tmp); | 1492 | free_extent_buffer(tmp); |
1487 | if (p->reada) | 1493 | if (p->reada) |
1488 | reada_for_search(root, p, level, slot, key->objectid); | 1494 | reada_for_search(root, p, level, slot, key->objectid); |
1489 | 1495 | ||
1496 | btrfs_release_path(NULL, p); | ||
1490 | tmp = read_tree_block(root, blocknr, blocksize, gen); | 1497 | tmp = read_tree_block(root, blocknr, blocksize, gen); |
1491 | if (tmp) | 1498 | if (tmp) |
1492 | free_extent_buffer(tmp); | 1499 | free_extent_buffer(tmp); |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ad96495dedc5..4414a5d9983a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -881,6 +881,9 @@ struct btrfs_fs_info { | |||
881 | u64 metadata_alloc_profile; | 881 | u64 metadata_alloc_profile; |
882 | u64 system_alloc_profile; | 882 | u64 system_alloc_profile; |
883 | 883 | ||
884 | unsigned data_chunk_allocations; | ||
885 | unsigned metadata_ratio; | ||
886 | |||
884 | void *bdev_holder; | 887 | void *bdev_holder; |
885 | }; | 888 | }; |
886 | 889 | ||
@@ -2174,7 +2177,8 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode); | |||
2174 | extern struct file_operations btrfs_file_operations; | 2177 | extern struct file_operations btrfs_file_operations; |
2175 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 2178 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
2176 | struct btrfs_root *root, struct inode *inode, | 2179 | struct btrfs_root *root, struct inode *inode, |
2177 | u64 start, u64 end, u64 inline_limit, u64 *hint_block); | 2180 | u64 start, u64 end, u64 locked_end, |
2181 | u64 inline_limit, u64 *hint_block); | ||
2178 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | 2182 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, |
2179 | struct btrfs_root *root, | 2183 | struct btrfs_root *root, |
2180 | struct inode *inode, u64 start, u64 end); | 2184 | struct inode *inode, u64 start, u64 end); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 92caa8035f36..0ff16d3331da 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -232,10 +232,14 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
232 | memcpy(&found, result, csum_size); | 232 | memcpy(&found, result, csum_size); |
233 | 233 | ||
234 | read_extent_buffer(buf, &val, 0, csum_size); | 234 | read_extent_buffer(buf, &val, 0, csum_size); |
235 | printk(KERN_INFO "btrfs: %s checksum verify failed " | 235 | if (printk_ratelimit()) { |
236 | "on %llu wanted %X found %X level %d\n", | 236 | printk(KERN_INFO "btrfs: %s checksum verify " |
237 | root->fs_info->sb->s_id, | 237 | "failed on %llu wanted %X found %X " |
238 | buf->start, val, found, btrfs_header_level(buf)); | 238 | "level %d\n", |
239 | root->fs_info->sb->s_id, | ||
240 | (unsigned long long)buf->start, val, found, | ||
241 | btrfs_header_level(buf)); | ||
242 | } | ||
239 | if (result != (char *)&inline_result) | 243 | if (result != (char *)&inline_result) |
240 | kfree(result); | 244 | kfree(result); |
241 | return 1; | 245 | return 1; |
@@ -268,10 +272,13 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, | |||
268 | ret = 0; | 272 | ret = 0; |
269 | goto out; | 273 | goto out; |
270 | } | 274 | } |
271 | printk("parent transid verify failed on %llu wanted %llu found %llu\n", | 275 | if (printk_ratelimit()) { |
272 | (unsigned long long)eb->start, | 276 | printk("parent transid verify failed on %llu wanted %llu " |
273 | (unsigned long long)parent_transid, | 277 | "found %llu\n", |
274 | (unsigned long long)btrfs_header_generation(eb)); | 278 | (unsigned long long)eb->start, |
279 | (unsigned long long)parent_transid, | ||
280 | (unsigned long long)btrfs_header_generation(eb)); | ||
281 | } | ||
275 | ret = 1; | 282 | ret = 1; |
276 | clear_extent_buffer_uptodate(io_tree, eb); | 283 | clear_extent_buffer_uptodate(io_tree, eb); |
277 | out: | 284 | out: |
@@ -415,9 +422,12 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
415 | 422 | ||
416 | found_start = btrfs_header_bytenr(eb); | 423 | found_start = btrfs_header_bytenr(eb); |
417 | if (found_start != start) { | 424 | if (found_start != start) { |
418 | printk(KERN_INFO "btrfs bad tree block start %llu %llu\n", | 425 | if (printk_ratelimit()) { |
419 | (unsigned long long)found_start, | 426 | printk(KERN_INFO "btrfs bad tree block start " |
420 | (unsigned long long)eb->start); | 427 | "%llu %llu\n", |
428 | (unsigned long long)found_start, | ||
429 | (unsigned long long)eb->start); | ||
430 | } | ||
421 | ret = -EIO; | 431 | ret = -EIO; |
422 | goto err; | 432 | goto err; |
423 | } | 433 | } |
@@ -429,8 +439,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
429 | goto err; | 439 | goto err; |
430 | } | 440 | } |
431 | if (check_tree_block_fsid(root, eb)) { | 441 | if (check_tree_block_fsid(root, eb)) { |
432 | printk(KERN_INFO "btrfs bad fsid on block %llu\n", | 442 | if (printk_ratelimit()) { |
433 | (unsigned long long)eb->start); | 443 | printk(KERN_INFO "btrfs bad fsid on block %llu\n", |
444 | (unsigned long long)eb->start); | ||
445 | } | ||
434 | ret = -EIO; | 446 | ret = -EIO; |
435 | goto err; | 447 | goto err; |
436 | } | 448 | } |
@@ -579,19 +591,12 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
579 | async->bio_flags = bio_flags; | 591 | async->bio_flags = bio_flags; |
580 | 592 | ||
581 | atomic_inc(&fs_info->nr_async_submits); | 593 | atomic_inc(&fs_info->nr_async_submits); |
594 | |||
595 | if (rw & (1 << BIO_RW_SYNCIO)) | ||
596 | btrfs_set_work_high_prio(&async->work); | ||
597 | |||
582 | btrfs_queue_worker(&fs_info->workers, &async->work); | 598 | btrfs_queue_worker(&fs_info->workers, &async->work); |
583 | #if 0 | ||
584 | int limit = btrfs_async_submit_limit(fs_info); | ||
585 | if (atomic_read(&fs_info->nr_async_submits) > limit) { | ||
586 | wait_event_timeout(fs_info->async_submit_wait, | ||
587 | (atomic_read(&fs_info->nr_async_submits) < limit), | ||
588 | HZ/10); | ||
589 | 599 | ||
590 | wait_event_timeout(fs_info->async_submit_wait, | ||
591 | (atomic_read(&fs_info->nr_async_bios) < limit), | ||
592 | HZ/10); | ||
593 | } | ||
594 | #endif | ||
595 | while (atomic_read(&fs_info->async_submit_draining) && | 600 | while (atomic_read(&fs_info->async_submit_draining) && |
596 | atomic_read(&fs_info->nr_async_submits)) { | 601 | atomic_read(&fs_info->nr_async_submits)) { |
597 | wait_event(fs_info->async_submit_wait, | 602 | wait_event(fs_info->async_submit_wait, |
@@ -656,6 +661,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
656 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | 661 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, |
657 | mirror_num, 0); | 662 | mirror_num, 0); |
658 | } | 663 | } |
664 | |||
659 | /* | 665 | /* |
660 | * kthread helpers are used to submit writes so that checksumming | 666 | * kthread helpers are used to submit writes so that checksumming |
661 | * can happen in parallel across all CPUs | 667 | * can happen in parallel across all CPUs |
@@ -765,27 +771,6 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) | |||
765 | } | 771 | } |
766 | } | 772 | } |
767 | 773 | ||
768 | #if 0 | ||
769 | static int btree_writepage(struct page *page, struct writeback_control *wbc) | ||
770 | { | ||
771 | struct buffer_head *bh; | ||
772 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
773 | struct buffer_head *head; | ||
774 | if (!page_has_buffers(page)) { | ||
775 | create_empty_buffers(page, root->fs_info->sb->s_blocksize, | ||
776 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | ||
777 | } | ||
778 | head = page_buffers(page); | ||
779 | bh = head; | ||
780 | do { | ||
781 | if (buffer_dirty(bh)) | ||
782 | csum_tree_block(root, bh, 0); | ||
783 | bh = bh->b_this_page; | ||
784 | } while (bh != head); | ||
785 | return block_write_full_page(page, btree_get_block, wbc); | ||
786 | } | ||
787 | #endif | ||
788 | |||
789 | static struct address_space_operations btree_aops = { | 774 | static struct address_space_operations btree_aops = { |
790 | .readpage = btree_readpage, | 775 | .readpage = btree_readpage, |
791 | .writepage = btree_writepage, | 776 | .writepage = btree_writepage, |
@@ -1273,11 +1258,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) | |||
1273 | int ret = 0; | 1258 | int ret = 0; |
1274 | struct btrfs_device *device; | 1259 | struct btrfs_device *device; |
1275 | struct backing_dev_info *bdi; | 1260 | struct backing_dev_info *bdi; |
1276 | #if 0 | 1261 | |
1277 | if ((bdi_bits & (1 << BDI_write_congested)) && | ||
1278 | btrfs_congested_async(info, 0)) | ||
1279 | return 1; | ||
1280 | #endif | ||
1281 | list_for_each_entry(device, &info->fs_devices->devices, dev_list) { | 1262 | list_for_each_entry(device, &info->fs_devices->devices, dev_list) { |
1282 | if (!device->bdev) | 1263 | if (!device->bdev) |
1283 | continue; | 1264 | continue; |
@@ -1599,6 +1580,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1599 | fs_info->btree_inode = new_inode(sb); | 1580 | fs_info->btree_inode = new_inode(sb); |
1600 | fs_info->btree_inode->i_ino = 1; | 1581 | fs_info->btree_inode->i_ino = 1; |
1601 | fs_info->btree_inode->i_nlink = 1; | 1582 | fs_info->btree_inode->i_nlink = 1; |
1583 | fs_info->metadata_ratio = 8; | ||
1602 | 1584 | ||
1603 | fs_info->thread_pool_size = min_t(unsigned long, | 1585 | fs_info->thread_pool_size = min_t(unsigned long, |
1604 | num_online_cpus() + 2, 8); | 1586 | num_online_cpus() + 2, 8); |
@@ -1689,7 +1671,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1689 | if (features) { | 1671 | if (features) { |
1690 | printk(KERN_ERR "BTRFS: couldn't mount because of " | 1672 | printk(KERN_ERR "BTRFS: couldn't mount because of " |
1691 | "unsupported optional features (%Lx).\n", | 1673 | "unsupported optional features (%Lx).\n", |
1692 | features); | 1674 | (unsigned long long)features); |
1693 | err = -EINVAL; | 1675 | err = -EINVAL; |
1694 | goto fail_iput; | 1676 | goto fail_iput; |
1695 | } | 1677 | } |
@@ -1699,7 +1681,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1699 | if (!(sb->s_flags & MS_RDONLY) && features) { | 1681 | if (!(sb->s_flags & MS_RDONLY) && features) { |
1700 | printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " | 1682 | printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " |
1701 | "unsupported option features (%Lx).\n", | 1683 | "unsupported option features (%Lx).\n", |
1702 | features); | 1684 | (unsigned long long)features); |
1703 | err = -EINVAL; | 1685 | err = -EINVAL; |
1704 | goto fail_iput; | 1686 | goto fail_iput; |
1705 | } | 1687 | } |
@@ -2095,10 +2077,10 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2095 | device->barriers = 0; | 2077 | device->barriers = 0; |
2096 | get_bh(bh); | 2078 | get_bh(bh); |
2097 | lock_buffer(bh); | 2079 | lock_buffer(bh); |
2098 | ret = submit_bh(WRITE, bh); | 2080 | ret = submit_bh(WRITE_SYNC, bh); |
2099 | } | 2081 | } |
2100 | } else { | 2082 | } else { |
2101 | ret = submit_bh(WRITE, bh); | 2083 | ret = submit_bh(WRITE_SYNC, bh); |
2102 | } | 2084 | } |
2103 | 2085 | ||
2104 | if (!ret && wait) { | 2086 | if (!ret && wait) { |
@@ -2291,7 +2273,7 @@ int close_ctree(struct btrfs_root *root) | |||
2291 | 2273 | ||
2292 | if (fs_info->delalloc_bytes) { | 2274 | if (fs_info->delalloc_bytes) { |
2293 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", | 2275 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", |
2294 | fs_info->delalloc_bytes); | 2276 | (unsigned long long)fs_info->delalloc_bytes); |
2295 | } | 2277 | } |
2296 | if (fs_info->total_ref_cache_size) { | 2278 | if (fs_info->total_ref_cache_size) { |
2297 | printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", | 2279 | printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", |
@@ -2328,16 +2310,6 @@ int close_ctree(struct btrfs_root *root) | |||
2328 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2310 | btrfs_stop_workers(&fs_info->endio_write_workers); |
2329 | btrfs_stop_workers(&fs_info->submit_workers); | 2311 | btrfs_stop_workers(&fs_info->submit_workers); |
2330 | 2312 | ||
2331 | #if 0 | ||
2332 | while (!list_empty(&fs_info->hashers)) { | ||
2333 | struct btrfs_hasher *hasher; | ||
2334 | hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher, | ||
2335 | hashers); | ||
2336 | list_del(&hasher->hashers); | ||
2337 | crypto_free_hash(&fs_info->hash_tfm); | ||
2338 | kfree(hasher); | ||
2339 | } | ||
2340 | #endif | ||
2341 | btrfs_close_devices(fs_info->fs_devices); | 2313 | btrfs_close_devices(fs_info->fs_devices); |
2342 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2314 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
2343 | 2315 | ||
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 178df4c67de4..e4966444811b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -1844,10 +1844,14 @@ again: | |||
1844 | printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" | 1844 | printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" |
1845 | ", %llu bytes_used, %llu bytes_reserved, " | 1845 | ", %llu bytes_used, %llu bytes_reserved, " |
1846 | "%llu bytes_pinned, %llu bytes_readonly, %llu may use" | 1846 | "%llu bytes_pinned, %llu bytes_readonly, %llu may use" |
1847 | "%llu total\n", bytes, data_sinfo->bytes_delalloc, | 1847 | "%llu total\n", (unsigned long long)bytes, |
1848 | data_sinfo->bytes_used, data_sinfo->bytes_reserved, | 1848 | (unsigned long long)data_sinfo->bytes_delalloc, |
1849 | data_sinfo->bytes_pinned, data_sinfo->bytes_readonly, | 1849 | (unsigned long long)data_sinfo->bytes_used, |
1850 | data_sinfo->bytes_may_use, data_sinfo->total_bytes); | 1850 | (unsigned long long)data_sinfo->bytes_reserved, |
1851 | (unsigned long long)data_sinfo->bytes_pinned, | ||
1852 | (unsigned long long)data_sinfo->bytes_readonly, | ||
1853 | (unsigned long long)data_sinfo->bytes_may_use, | ||
1854 | (unsigned long long)data_sinfo->total_bytes); | ||
1851 | return -ENOSPC; | 1855 | return -ENOSPC; |
1852 | } | 1856 | } |
1853 | data_sinfo->bytes_may_use += bytes; | 1857 | data_sinfo->bytes_may_use += bytes; |
@@ -1918,15 +1922,29 @@ void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | |||
1918 | spin_unlock(&info->lock); | 1922 | spin_unlock(&info->lock); |
1919 | } | 1923 | } |
1920 | 1924 | ||
1925 | static void force_metadata_allocation(struct btrfs_fs_info *info) | ||
1926 | { | ||
1927 | struct list_head *head = &info->space_info; | ||
1928 | struct btrfs_space_info *found; | ||
1929 | |||
1930 | rcu_read_lock(); | ||
1931 | list_for_each_entry_rcu(found, head, list) { | ||
1932 | if (found->flags & BTRFS_BLOCK_GROUP_METADATA) | ||
1933 | found->force_alloc = 1; | ||
1934 | } | ||
1935 | rcu_read_unlock(); | ||
1936 | } | ||
1937 | |||
1921 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 1938 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
1922 | struct btrfs_root *extent_root, u64 alloc_bytes, | 1939 | struct btrfs_root *extent_root, u64 alloc_bytes, |
1923 | u64 flags, int force) | 1940 | u64 flags, int force) |
1924 | { | 1941 | { |
1925 | struct btrfs_space_info *space_info; | 1942 | struct btrfs_space_info *space_info; |
1943 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | ||
1926 | u64 thresh; | 1944 | u64 thresh; |
1927 | int ret = 0; | 1945 | int ret = 0; |
1928 | 1946 | ||
1929 | mutex_lock(&extent_root->fs_info->chunk_mutex); | 1947 | mutex_lock(&fs_info->chunk_mutex); |
1930 | 1948 | ||
1931 | flags = btrfs_reduce_alloc_profile(extent_root, flags); | 1949 | flags = btrfs_reduce_alloc_profile(extent_root, flags); |
1932 | 1950 | ||
@@ -1958,6 +1976,18 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
1958 | } | 1976 | } |
1959 | spin_unlock(&space_info->lock); | 1977 | spin_unlock(&space_info->lock); |
1960 | 1978 | ||
1979 | /* | ||
1980 | * if we're doing a data chunk, go ahead and make sure that | ||
1981 | * we keep a reasonable number of metadata chunks allocated in the | ||
1982 | * FS as well. | ||
1983 | */ | ||
1984 | if (flags & BTRFS_BLOCK_GROUP_DATA) { | ||
1985 | fs_info->data_chunk_allocations++; | ||
1986 | if (!(fs_info->data_chunk_allocations % | ||
1987 | fs_info->metadata_ratio)) | ||
1988 | force_metadata_allocation(fs_info); | ||
1989 | } | ||
1990 | |||
1961 | ret = btrfs_alloc_chunk(trans, extent_root, flags); | 1991 | ret = btrfs_alloc_chunk(trans, extent_root, flags); |
1962 | if (ret) | 1992 | if (ret) |
1963 | space_info->full = 1; | 1993 | space_info->full = 1; |
@@ -2798,9 +2828,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | |||
2798 | info->bytes_pinned - info->bytes_reserved), | 2828 | info->bytes_pinned - info->bytes_reserved), |
2799 | (info->full) ? "" : "not "); | 2829 | (info->full) ? "" : "not "); |
2800 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | 2830 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," |
2801 | " may_use=%llu, used=%llu\n", info->total_bytes, | 2831 | " may_use=%llu, used=%llu\n", |
2802 | info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use, | 2832 | (unsigned long long)info->total_bytes, |
2803 | info->bytes_used); | 2833 | (unsigned long long)info->bytes_pinned, |
2834 | (unsigned long long)info->bytes_delalloc, | ||
2835 | (unsigned long long)info->bytes_may_use, | ||
2836 | (unsigned long long)info->bytes_used); | ||
2804 | 2837 | ||
2805 | down_read(&info->groups_sem); | 2838 | down_read(&info->groups_sem); |
2806 | list_for_each_entry(cache, &info->block_groups, list) { | 2839 | list_for_each_entry(cache, &info->block_groups, list) { |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index eb2bee8b7fbf..fe9eb990e443 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -17,12 +17,6 @@ | |||
17 | #include "ctree.h" | 17 | #include "ctree.h" |
18 | #include "btrfs_inode.h" | 18 | #include "btrfs_inode.h" |
19 | 19 | ||
20 | /* temporary define until extent_map moves out of btrfs */ | ||
21 | struct kmem_cache *btrfs_cache_create(const char *name, size_t size, | ||
22 | unsigned long extra_flags, | ||
23 | void (*ctor)(void *, struct kmem_cache *, | ||
24 | unsigned long)); | ||
25 | |||
26 | static struct kmem_cache *extent_state_cache; | 20 | static struct kmem_cache *extent_state_cache; |
27 | static struct kmem_cache *extent_buffer_cache; | 21 | static struct kmem_cache *extent_buffer_cache; |
28 | 22 | ||
@@ -50,20 +44,23 @@ struct extent_page_data { | |||
50 | /* tells writepage not to lock the state bits for this range | 44 | /* tells writepage not to lock the state bits for this range |
51 | * it still does the unlocking | 45 | * it still does the unlocking |
52 | */ | 46 | */ |
53 | int extent_locked; | 47 | unsigned int extent_locked:1; |
48 | |||
49 | /* tells the submit_bio code to use a WRITE_SYNC */ | ||
50 | unsigned int sync_io:1; | ||
54 | }; | 51 | }; |
55 | 52 | ||
56 | int __init extent_io_init(void) | 53 | int __init extent_io_init(void) |
57 | { | 54 | { |
58 | extent_state_cache = btrfs_cache_create("extent_state", | 55 | extent_state_cache = kmem_cache_create("extent_state", |
59 | sizeof(struct extent_state), 0, | 56 | sizeof(struct extent_state), 0, |
60 | NULL); | 57 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
61 | if (!extent_state_cache) | 58 | if (!extent_state_cache) |
62 | return -ENOMEM; | 59 | return -ENOMEM; |
63 | 60 | ||
64 | extent_buffer_cache = btrfs_cache_create("extent_buffers", | 61 | extent_buffer_cache = kmem_cache_create("extent_buffers", |
65 | sizeof(struct extent_buffer), 0, | 62 | sizeof(struct extent_buffer), 0, |
66 | NULL); | 63 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
67 | if (!extent_buffer_cache) | 64 | if (!extent_buffer_cache) |
68 | goto free_state_cache; | 65 | goto free_state_cache; |
69 | return 0; | 66 | return 0; |
@@ -1404,69 +1401,6 @@ out: | |||
1404 | return total_bytes; | 1401 | return total_bytes; |
1405 | } | 1402 | } |
1406 | 1403 | ||
1407 | #if 0 | ||
1408 | /* | ||
1409 | * helper function to lock both pages and extents in the tree. | ||
1410 | * pages must be locked first. | ||
1411 | */ | ||
1412 | static int lock_range(struct extent_io_tree *tree, u64 start, u64 end) | ||
1413 | { | ||
1414 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
1415 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
1416 | struct page *page; | ||
1417 | int err; | ||
1418 | |||
1419 | while (index <= end_index) { | ||
1420 | page = grab_cache_page(tree->mapping, index); | ||
1421 | if (!page) { | ||
1422 | err = -ENOMEM; | ||
1423 | goto failed; | ||
1424 | } | ||
1425 | if (IS_ERR(page)) { | ||
1426 | err = PTR_ERR(page); | ||
1427 | goto failed; | ||
1428 | } | ||
1429 | index++; | ||
1430 | } | ||
1431 | lock_extent(tree, start, end, GFP_NOFS); | ||
1432 | return 0; | ||
1433 | |||
1434 | failed: | ||
1435 | /* | ||
1436 | * we failed above in getting the page at 'index', so we undo here | ||
1437 | * up to but not including the page at 'index' | ||
1438 | */ | ||
1439 | end_index = index; | ||
1440 | index = start >> PAGE_CACHE_SHIFT; | ||
1441 | while (index < end_index) { | ||
1442 | page = find_get_page(tree->mapping, index); | ||
1443 | unlock_page(page); | ||
1444 | page_cache_release(page); | ||
1445 | index++; | ||
1446 | } | ||
1447 | return err; | ||
1448 | } | ||
1449 | |||
1450 | /* | ||
1451 | * helper function to unlock both pages and extents in the tree. | ||
1452 | */ | ||
1453 | static int unlock_range(struct extent_io_tree *tree, u64 start, u64 end) | ||
1454 | { | ||
1455 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
1456 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
1457 | struct page *page; | ||
1458 | |||
1459 | while (index <= end_index) { | ||
1460 | page = find_get_page(tree->mapping, index); | ||
1461 | unlock_page(page); | ||
1462 | page_cache_release(page); | ||
1463 | index++; | ||
1464 | } | ||
1465 | unlock_extent(tree, start, end, GFP_NOFS); | ||
1466 | return 0; | ||
1467 | } | ||
1468 | #endif | ||
1469 | |||
1470 | /* | 1404 | /* |
1471 | * set the private field for a given byte offset in the tree. If there isn't | 1405 | * set the private field for a given byte offset in the tree. If there isn't |
1472 | * an extent_state there already, this does nothing. | 1406 | * an extent_state there already, this does nothing. |
@@ -2101,6 +2035,16 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | |||
2101 | return ret; | 2035 | return ret; |
2102 | } | 2036 | } |
2103 | 2037 | ||
2038 | static noinline void update_nr_written(struct page *page, | ||
2039 | struct writeback_control *wbc, | ||
2040 | unsigned long nr_written) | ||
2041 | { | ||
2042 | wbc->nr_to_write -= nr_written; | ||
2043 | if (wbc->range_cyclic || (wbc->nr_to_write > 0 && | ||
2044 | wbc->range_start == 0 && wbc->range_end == LLONG_MAX)) | ||
2045 | page->mapping->writeback_index = page->index + nr_written; | ||
2046 | } | ||
2047 | |||
2104 | /* | 2048 | /* |
2105 | * the writepage semantics are similar to regular writepage. extent | 2049 | * the writepage semantics are similar to regular writepage. extent |
2106 | * records are inserted to lock ranges in the tree, and as dirty areas | 2050 | * records are inserted to lock ranges in the tree, and as dirty areas |
@@ -2136,8 +2080,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2136 | u64 delalloc_end; | 2080 | u64 delalloc_end; |
2137 | int page_started; | 2081 | int page_started; |
2138 | int compressed; | 2082 | int compressed; |
2083 | int write_flags; | ||
2139 | unsigned long nr_written = 0; | 2084 | unsigned long nr_written = 0; |
2140 | 2085 | ||
2086 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2087 | write_flags = WRITE_SYNC_PLUG; | ||
2088 | else | ||
2089 | write_flags = WRITE; | ||
2090 | |||
2141 | WARN_ON(!PageLocked(page)); | 2091 | WARN_ON(!PageLocked(page)); |
2142 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | 2092 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); |
2143 | if (page->index > end_index || | 2093 | if (page->index > end_index || |
@@ -2164,6 +2114,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2164 | delalloc_end = 0; | 2114 | delalloc_end = 0; |
2165 | page_started = 0; | 2115 | page_started = 0; |
2166 | if (!epd->extent_locked) { | 2116 | if (!epd->extent_locked) { |
2117 | /* | ||
2118 | * make sure the wbc mapping index is at least updated | ||
2119 | * to this page. | ||
2120 | */ | ||
2121 | update_nr_written(page, wbc, 0); | ||
2122 | |||
2167 | while (delalloc_end < page_end) { | 2123 | while (delalloc_end < page_end) { |
2168 | nr_delalloc = find_lock_delalloc_range(inode, tree, | 2124 | nr_delalloc = find_lock_delalloc_range(inode, tree, |
2169 | page, | 2125 | page, |
@@ -2185,7 +2141,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2185 | */ | 2141 | */ |
2186 | if (page_started) { | 2142 | if (page_started) { |
2187 | ret = 0; | 2143 | ret = 0; |
2188 | goto update_nr_written; | 2144 | /* |
2145 | * we've unlocked the page, so we can't update | ||
2146 | * the mapping's writeback index, just update | ||
2147 | * nr_to_write. | ||
2148 | */ | ||
2149 | wbc->nr_to_write -= nr_written; | ||
2150 | goto done_unlocked; | ||
2189 | } | 2151 | } |
2190 | } | 2152 | } |
2191 | lock_extent(tree, start, page_end, GFP_NOFS); | 2153 | lock_extent(tree, start, page_end, GFP_NOFS); |
@@ -2198,13 +2160,18 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2198 | if (ret == -EAGAIN) { | 2160 | if (ret == -EAGAIN) { |
2199 | unlock_extent(tree, start, page_end, GFP_NOFS); | 2161 | unlock_extent(tree, start, page_end, GFP_NOFS); |
2200 | redirty_page_for_writepage(wbc, page); | 2162 | redirty_page_for_writepage(wbc, page); |
2163 | update_nr_written(page, wbc, nr_written); | ||
2201 | unlock_page(page); | 2164 | unlock_page(page); |
2202 | ret = 0; | 2165 | ret = 0; |
2203 | goto update_nr_written; | 2166 | goto done_unlocked; |
2204 | } | 2167 | } |
2205 | } | 2168 | } |
2206 | 2169 | ||
2207 | nr_written++; | 2170 | /* |
2171 | * we don't want to touch the inode after unlocking the page, | ||
2172 | * so we update the mapping writeback index now | ||
2173 | */ | ||
2174 | update_nr_written(page, wbc, nr_written + 1); | ||
2208 | 2175 | ||
2209 | end = page_end; | 2176 | end = page_end; |
2210 | if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) | 2177 | if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) |
@@ -2314,9 +2281,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2314 | (unsigned long long)end); | 2281 | (unsigned long long)end); |
2315 | } | 2282 | } |
2316 | 2283 | ||
2317 | ret = submit_extent_page(WRITE, tree, page, sector, | 2284 | ret = submit_extent_page(write_flags, tree, page, |
2318 | iosize, pg_offset, bdev, | 2285 | sector, iosize, pg_offset, |
2319 | &epd->bio, max_nr, | 2286 | bdev, &epd->bio, max_nr, |
2320 | end_bio_extent_writepage, | 2287 | end_bio_extent_writepage, |
2321 | 0, 0, 0); | 2288 | 0, 0, 0); |
2322 | if (ret) | 2289 | if (ret) |
@@ -2336,11 +2303,8 @@ done: | |||
2336 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); | 2303 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); |
2337 | unlock_page(page); | 2304 | unlock_page(page); |
2338 | 2305 | ||
2339 | update_nr_written: | 2306 | done_unlocked: |
2340 | wbc->nr_to_write -= nr_written; | 2307 | |
2341 | if (wbc->range_cyclic || (wbc->nr_to_write > 0 && | ||
2342 | wbc->range_start == 0 && wbc->range_end == LLONG_MAX)) | ||
2343 | page->mapping->writeback_index = page->index + nr_written; | ||
2344 | return 0; | 2308 | return 0; |
2345 | } | 2309 | } |
2346 | 2310 | ||
@@ -2460,15 +2424,23 @@ retry: | |||
2460 | return ret; | 2424 | return ret; |
2461 | } | 2425 | } |
2462 | 2426 | ||
2463 | static noinline void flush_write_bio(void *data) | 2427 | static void flush_epd_write_bio(struct extent_page_data *epd) |
2464 | { | 2428 | { |
2465 | struct extent_page_data *epd = data; | ||
2466 | if (epd->bio) { | 2429 | if (epd->bio) { |
2467 | submit_one_bio(WRITE, epd->bio, 0, 0); | 2430 | if (epd->sync_io) |
2431 | submit_one_bio(WRITE_SYNC, epd->bio, 0, 0); | ||
2432 | else | ||
2433 | submit_one_bio(WRITE, epd->bio, 0, 0); | ||
2468 | epd->bio = NULL; | 2434 | epd->bio = NULL; |
2469 | } | 2435 | } |
2470 | } | 2436 | } |
2471 | 2437 | ||
2438 | static noinline void flush_write_bio(void *data) | ||
2439 | { | ||
2440 | struct extent_page_data *epd = data; | ||
2441 | flush_epd_write_bio(epd); | ||
2442 | } | ||
2443 | |||
2472 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | 2444 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, |
2473 | get_extent_t *get_extent, | 2445 | get_extent_t *get_extent, |
2474 | struct writeback_control *wbc) | 2446 | struct writeback_control *wbc) |
@@ -2480,23 +2452,22 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2480 | .tree = tree, | 2452 | .tree = tree, |
2481 | .get_extent = get_extent, | 2453 | .get_extent = get_extent, |
2482 | .extent_locked = 0, | 2454 | .extent_locked = 0, |
2455 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | ||
2483 | }; | 2456 | }; |
2484 | struct writeback_control wbc_writepages = { | 2457 | struct writeback_control wbc_writepages = { |
2485 | .bdi = wbc->bdi, | 2458 | .bdi = wbc->bdi, |
2486 | .sync_mode = WB_SYNC_NONE, | 2459 | .sync_mode = wbc->sync_mode, |
2487 | .older_than_this = NULL, | 2460 | .older_than_this = NULL, |
2488 | .nr_to_write = 64, | 2461 | .nr_to_write = 64, |
2489 | .range_start = page_offset(page) + PAGE_CACHE_SIZE, | 2462 | .range_start = page_offset(page) + PAGE_CACHE_SIZE, |
2490 | .range_end = (loff_t)-1, | 2463 | .range_end = (loff_t)-1, |
2491 | }; | 2464 | }; |
2492 | 2465 | ||
2493 | |||
2494 | ret = __extent_writepage(page, wbc, &epd); | 2466 | ret = __extent_writepage(page, wbc, &epd); |
2495 | 2467 | ||
2496 | extent_write_cache_pages(tree, mapping, &wbc_writepages, | 2468 | extent_write_cache_pages(tree, mapping, &wbc_writepages, |
2497 | __extent_writepage, &epd, flush_write_bio); | 2469 | __extent_writepage, &epd, flush_write_bio); |
2498 | if (epd.bio) | 2470 | flush_epd_write_bio(&epd); |
2499 | submit_one_bio(WRITE, epd.bio, 0, 0); | ||
2500 | return ret; | 2471 | return ret; |
2501 | } | 2472 | } |
2502 | 2473 | ||
@@ -2515,6 +2486,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
2515 | .tree = tree, | 2486 | .tree = tree, |
2516 | .get_extent = get_extent, | 2487 | .get_extent = get_extent, |
2517 | .extent_locked = 1, | 2488 | .extent_locked = 1, |
2489 | .sync_io = mode == WB_SYNC_ALL, | ||
2518 | }; | 2490 | }; |
2519 | struct writeback_control wbc_writepages = { | 2491 | struct writeback_control wbc_writepages = { |
2520 | .bdi = inode->i_mapping->backing_dev_info, | 2492 | .bdi = inode->i_mapping->backing_dev_info, |
@@ -2540,8 +2512,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
2540 | start += PAGE_CACHE_SIZE; | 2512 | start += PAGE_CACHE_SIZE; |
2541 | } | 2513 | } |
2542 | 2514 | ||
2543 | if (epd.bio) | 2515 | flush_epd_write_bio(&epd); |
2544 | submit_one_bio(WRITE, epd.bio, 0, 0); | ||
2545 | return ret; | 2516 | return ret; |
2546 | } | 2517 | } |
2547 | 2518 | ||
@@ -2556,13 +2527,13 @@ int extent_writepages(struct extent_io_tree *tree, | |||
2556 | .tree = tree, | 2527 | .tree = tree, |
2557 | .get_extent = get_extent, | 2528 | .get_extent = get_extent, |
2558 | .extent_locked = 0, | 2529 | .extent_locked = 0, |
2530 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | ||
2559 | }; | 2531 | }; |
2560 | 2532 | ||
2561 | ret = extent_write_cache_pages(tree, mapping, wbc, | 2533 | ret = extent_write_cache_pages(tree, mapping, wbc, |
2562 | __extent_writepage, &epd, | 2534 | __extent_writepage, &epd, |
2563 | flush_write_bio); | 2535 | flush_write_bio); |
2564 | if (epd.bio) | 2536 | flush_epd_write_bio(&epd); |
2565 | submit_one_bio(WRITE, epd.bio, 0, 0); | ||
2566 | return ret; | 2537 | return ret; |
2567 | } | 2538 | } |
2568 | 2539 | ||
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index b187917b36fa..30c9365861e6 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -6,19 +6,14 @@ | |||
6 | #include <linux/hardirq.h> | 6 | #include <linux/hardirq.h> |
7 | #include "extent_map.h" | 7 | #include "extent_map.h" |
8 | 8 | ||
9 | /* temporary define until extent_map moves out of btrfs */ | ||
10 | struct kmem_cache *btrfs_cache_create(const char *name, size_t size, | ||
11 | unsigned long extra_flags, | ||
12 | void (*ctor)(void *, struct kmem_cache *, | ||
13 | unsigned long)); | ||
14 | 9 | ||
15 | static struct kmem_cache *extent_map_cache; | 10 | static struct kmem_cache *extent_map_cache; |
16 | 11 | ||
17 | int __init extent_map_init(void) | 12 | int __init extent_map_init(void) |
18 | { | 13 | { |
19 | extent_map_cache = btrfs_cache_create("extent_map", | 14 | extent_map_cache = kmem_cache_create("extent_map", |
20 | sizeof(struct extent_map), 0, | 15 | sizeof(struct extent_map), 0, |
21 | NULL); | 16 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
22 | if (!extent_map_cache) | 17 | if (!extent_map_cache) |
23 | return -ENOMEM; | 18 | return -ENOMEM; |
24 | return 0; | 19 | return 0; |
@@ -43,7 +38,6 @@ void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) | |||
43 | tree->map.rb_node = NULL; | 38 | tree->map.rb_node = NULL; |
44 | spin_lock_init(&tree->lock); | 39 | spin_lock_init(&tree->lock); |
45 | } | 40 | } |
46 | EXPORT_SYMBOL(extent_map_tree_init); | ||
47 | 41 | ||
48 | /** | 42 | /** |
49 | * alloc_extent_map - allocate new extent map structure | 43 | * alloc_extent_map - allocate new extent map structure |
@@ -64,7 +58,6 @@ struct extent_map *alloc_extent_map(gfp_t mask) | |||
64 | atomic_set(&em->refs, 1); | 58 | atomic_set(&em->refs, 1); |
65 | return em; | 59 | return em; |
66 | } | 60 | } |
67 | EXPORT_SYMBOL(alloc_extent_map); | ||
68 | 61 | ||
69 | /** | 62 | /** |
70 | * free_extent_map - drop reference count of an extent_map | 63 | * free_extent_map - drop reference count of an extent_map |
@@ -83,7 +76,6 @@ void free_extent_map(struct extent_map *em) | |||
83 | kmem_cache_free(extent_map_cache, em); | 76 | kmem_cache_free(extent_map_cache, em); |
84 | } | 77 | } |
85 | } | 78 | } |
86 | EXPORT_SYMBOL(free_extent_map); | ||
87 | 79 | ||
88 | static struct rb_node *tree_insert(struct rb_root *root, u64 offset, | 80 | static struct rb_node *tree_insert(struct rb_root *root, u64 offset, |
89 | struct rb_node *node) | 81 | struct rb_node *node) |
@@ -264,7 +256,6 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
264 | out: | 256 | out: |
265 | return ret; | 257 | return ret; |
266 | } | 258 | } |
267 | EXPORT_SYMBOL(add_extent_mapping); | ||
268 | 259 | ||
269 | /* simple helper to do math around the end of an extent, handling wrap */ | 260 | /* simple helper to do math around the end of an extent, handling wrap */ |
270 | static u64 range_end(u64 start, u64 len) | 261 | static u64 range_end(u64 start, u64 len) |
@@ -326,7 +317,6 @@ found: | |||
326 | out: | 317 | out: |
327 | return em; | 318 | return em; |
328 | } | 319 | } |
329 | EXPORT_SYMBOL(lookup_extent_mapping); | ||
330 | 320 | ||
331 | /** | 321 | /** |
332 | * remove_extent_mapping - removes an extent_map from the extent tree | 322 | * remove_extent_mapping - removes an extent_map from the extent tree |
@@ -346,4 +336,3 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) | |||
346 | em->in_tree = 0; | 336 | em->in_tree = 0; |
347 | return ret; | 337 | return ret; |
348 | } | 338 | } |
349 | EXPORT_SYMBOL(remove_extent_mapping); | ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9c9fb46ccd08..1d51dc38bb49 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -272,83 +272,6 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
272 | return 0; | 272 | return 0; |
273 | } | 273 | } |
274 | 274 | ||
275 | int btrfs_check_file(struct btrfs_root *root, struct inode *inode) | ||
276 | { | ||
277 | return 0; | ||
278 | #if 0 | ||
279 | struct btrfs_path *path; | ||
280 | struct btrfs_key found_key; | ||
281 | struct extent_buffer *leaf; | ||
282 | struct btrfs_file_extent_item *extent; | ||
283 | u64 last_offset = 0; | ||
284 | int nritems; | ||
285 | int slot; | ||
286 | int found_type; | ||
287 | int ret; | ||
288 | int err = 0; | ||
289 | u64 extent_end = 0; | ||
290 | |||
291 | path = btrfs_alloc_path(); | ||
292 | ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino, | ||
293 | last_offset, 0); | ||
294 | while (1) { | ||
295 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
296 | if (path->slots[0] >= nritems) { | ||
297 | ret = btrfs_next_leaf(root, path); | ||
298 | if (ret) | ||
299 | goto out; | ||
300 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
301 | } | ||
302 | slot = path->slots[0]; | ||
303 | leaf = path->nodes[0]; | ||
304 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
305 | if (found_key.objectid != inode->i_ino) | ||
306 | break; | ||
307 | if (found_key.type != BTRFS_EXTENT_DATA_KEY) | ||
308 | goto out; | ||
309 | |||
310 | if (found_key.offset < last_offset) { | ||
311 | WARN_ON(1); | ||
312 | btrfs_print_leaf(root, leaf); | ||
313 | printk(KERN_ERR "inode %lu found offset %llu " | ||
314 | "expected %llu\n", inode->i_ino, | ||
315 | (unsigned long long)found_key.offset, | ||
316 | (unsigned long long)last_offset); | ||
317 | err = 1; | ||
318 | goto out; | ||
319 | } | ||
320 | extent = btrfs_item_ptr(leaf, slot, | ||
321 | struct btrfs_file_extent_item); | ||
322 | found_type = btrfs_file_extent_type(leaf, extent); | ||
323 | if (found_type == BTRFS_FILE_EXTENT_REG) { | ||
324 | extent_end = found_key.offset + | ||
325 | btrfs_file_extent_num_bytes(leaf, extent); | ||
326 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { | ||
327 | struct btrfs_item *item; | ||
328 | item = btrfs_item_nr(leaf, slot); | ||
329 | extent_end = found_key.offset + | ||
330 | btrfs_file_extent_inline_len(leaf, extent); | ||
331 | extent_end = (extent_end + root->sectorsize - 1) & | ||
332 | ~((u64)root->sectorsize - 1); | ||
333 | } | ||
334 | last_offset = extent_end; | ||
335 | path->slots[0]++; | ||
336 | } | ||
337 | if (0 && last_offset < inode->i_size) { | ||
338 | WARN_ON(1); | ||
339 | btrfs_print_leaf(root, leaf); | ||
340 | printk(KERN_ERR "inode %lu found offset %llu size %llu\n", | ||
341 | inode->i_ino, (unsigned long long)last_offset, | ||
342 | (unsigned long long)inode->i_size); | ||
343 | err = 1; | ||
344 | |||
345 | } | ||
346 | out: | ||
347 | btrfs_free_path(path); | ||
348 | return err; | ||
349 | #endif | ||
350 | } | ||
351 | |||
352 | /* | 275 | /* |
353 | * this is very complex, but the basic idea is to drop all extents | 276 | * this is very complex, but the basic idea is to drop all extents |
354 | * in the range start - end. hint_block is filled in with a block number | 277 | * in the range start - end. hint_block is filled in with a block number |
@@ -363,15 +286,16 @@ out: | |||
363 | */ | 286 | */ |
364 | noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 287 | noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
365 | struct btrfs_root *root, struct inode *inode, | 288 | struct btrfs_root *root, struct inode *inode, |
366 | u64 start, u64 end, u64 inline_limit, u64 *hint_byte) | 289 | u64 start, u64 end, u64 locked_end, |
290 | u64 inline_limit, u64 *hint_byte) | ||
367 | { | 291 | { |
368 | u64 extent_end = 0; | 292 | u64 extent_end = 0; |
369 | u64 locked_end = end; | ||
370 | u64 search_start = start; | 293 | u64 search_start = start; |
371 | u64 leaf_start; | 294 | u64 leaf_start; |
372 | u64 ram_bytes = 0; | 295 | u64 ram_bytes = 0; |
373 | u64 orig_parent = 0; | 296 | u64 orig_parent = 0; |
374 | u64 disk_bytenr = 0; | 297 | u64 disk_bytenr = 0; |
298 | u64 orig_locked_end = locked_end; | ||
375 | u8 compression; | 299 | u8 compression; |
376 | u8 encryption; | 300 | u8 encryption; |
377 | u16 other_encoding = 0; | 301 | u16 other_encoding = 0; |
@@ -684,11 +608,10 @@ next_slot: | |||
684 | } | 608 | } |
685 | out: | 609 | out: |
686 | btrfs_free_path(path); | 610 | btrfs_free_path(path); |
687 | if (locked_end > end) { | 611 | if (locked_end > orig_locked_end) { |
688 | unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, | 612 | unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end, |
689 | GFP_NOFS); | 613 | locked_end - 1, GFP_NOFS); |
690 | } | 614 | } |
691 | btrfs_check_file(root, inode); | ||
692 | return ret; | 615 | return ret; |
693 | } | 616 | } |
694 | 617 | ||
@@ -830,7 +753,7 @@ again: | |||
830 | 753 | ||
831 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); | 754 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); |
832 | BUG_ON(ret); | 755 | BUG_ON(ret); |
833 | goto done; | 756 | goto release; |
834 | } else if (split == start) { | 757 | } else if (split == start) { |
835 | if (locked_end < extent_end) { | 758 | if (locked_end < extent_end) { |
836 | ret = try_lock_extent(&BTRFS_I(inode)->io_tree, | 759 | ret = try_lock_extent(&BTRFS_I(inode)->io_tree, |
@@ -926,6 +849,8 @@ again: | |||
926 | } | 849 | } |
927 | done: | 850 | done: |
928 | btrfs_mark_buffer_dirty(leaf); | 851 | btrfs_mark_buffer_dirty(leaf); |
852 | |||
853 | release: | ||
929 | btrfs_release_path(root, path); | 854 | btrfs_release_path(root, path); |
930 | if (split_end && split == start) { | 855 | if (split_end && split == start) { |
931 | split = end; | 856 | split = end; |
@@ -1131,7 +1056,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
1131 | if (will_write) { | 1056 | if (will_write) { |
1132 | btrfs_fdatawrite_range(inode->i_mapping, pos, | 1057 | btrfs_fdatawrite_range(inode->i_mapping, pos, |
1133 | pos + write_bytes - 1, | 1058 | pos + write_bytes - 1, |
1134 | WB_SYNC_NONE); | 1059 | WB_SYNC_ALL); |
1135 | } else { | 1060 | } else { |
1136 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | 1061 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, |
1137 | num_pages); | 1062 | num_pages); |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 768b9523662d..0bc93657b460 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -332,13 +332,17 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | |||
332 | printk(KERN_ERR "couldn't find space %llu to free\n", | 332 | printk(KERN_ERR "couldn't find space %llu to free\n", |
333 | (unsigned long long)offset); | 333 | (unsigned long long)offset); |
334 | printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n", | 334 | printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n", |
335 | block_group->cached, block_group->key.objectid, | 335 | block_group->cached, |
336 | block_group->key.offset); | 336 | (unsigned long long)block_group->key.objectid, |
337 | (unsigned long long)block_group->key.offset); | ||
337 | btrfs_dump_free_space(block_group, bytes); | 338 | btrfs_dump_free_space(block_group, bytes); |
338 | } else if (info) { | 339 | } else if (info) { |
339 | printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, " | 340 | printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, " |
340 | "but wanted offset=%llu bytes=%llu\n", | 341 | "but wanted offset=%llu bytes=%llu\n", |
341 | info->offset, info->bytes, offset, bytes); | 342 | (unsigned long long)info->offset, |
343 | (unsigned long long)info->bytes, | ||
344 | (unsigned long long)offset, | ||
345 | (unsigned long long)bytes); | ||
342 | } | 346 | } |
343 | WARN_ON(1); | 347 | WARN_ON(1); |
344 | } | 348 | } |
@@ -357,8 +361,9 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | |||
357 | info = rb_entry(n, struct btrfs_free_space, offset_index); | 361 | info = rb_entry(n, struct btrfs_free_space, offset_index); |
358 | if (info->bytes >= bytes) | 362 | if (info->bytes >= bytes) |
359 | count++; | 363 | count++; |
360 | printk(KERN_ERR "entry offset %llu, bytes %llu\n", info->offset, | 364 | printk(KERN_ERR "entry offset %llu, bytes %llu\n", |
361 | info->bytes); | 365 | (unsigned long long)info->offset, |
366 | (unsigned long long)info->bytes); | ||
362 | } | 367 | } |
363 | printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" | 368 | printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" |
364 | "\n", count); | 369 | "\n", count); |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index cc7334d833c9..9abbced1123d 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -79,7 +79,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, | |||
79 | } | 79 | } |
80 | path = btrfs_alloc_path(); | 80 | path = btrfs_alloc_path(); |
81 | BUG_ON(!path); | 81 | BUG_ON(!path); |
82 | search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); | 82 | search_start = max(search_start, (u64)BTRFS_FIRST_FREE_OBJECTID); |
83 | search_key.objectid = search_start; | 83 | search_key.objectid = search_start; |
84 | search_key.type = 0; | 84 | search_key.type = 0; |
85 | search_key.offset = 0; | 85 | search_key.offset = 0; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a0d1dd492a58..90c23eb28829 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -70,7 +70,6 @@ static struct extent_io_ops btrfs_extent_io_ops; | |||
70 | static struct kmem_cache *btrfs_inode_cachep; | 70 | static struct kmem_cache *btrfs_inode_cachep; |
71 | struct kmem_cache *btrfs_trans_handle_cachep; | 71 | struct kmem_cache *btrfs_trans_handle_cachep; |
72 | struct kmem_cache *btrfs_transaction_cachep; | 72 | struct kmem_cache *btrfs_transaction_cachep; |
73 | struct kmem_cache *btrfs_bit_radix_cachep; | ||
74 | struct kmem_cache *btrfs_path_cachep; | 73 | struct kmem_cache *btrfs_path_cachep; |
75 | 74 | ||
76 | #define S_SHIFT 12 | 75 | #define S_SHIFT 12 |
@@ -234,7 +233,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
234 | } | 233 | } |
235 | 234 | ||
236 | ret = btrfs_drop_extents(trans, root, inode, start, | 235 | ret = btrfs_drop_extents(trans, root, inode, start, |
237 | aligned_end, start, &hint_byte); | 236 | aligned_end, aligned_end, start, &hint_byte); |
238 | BUG_ON(ret); | 237 | BUG_ON(ret); |
239 | 238 | ||
240 | if (isize > actual_end) | 239 | if (isize > actual_end) |
@@ -1439,6 +1438,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1439 | struct inode *inode, u64 file_pos, | 1438 | struct inode *inode, u64 file_pos, |
1440 | u64 disk_bytenr, u64 disk_num_bytes, | 1439 | u64 disk_bytenr, u64 disk_num_bytes, |
1441 | u64 num_bytes, u64 ram_bytes, | 1440 | u64 num_bytes, u64 ram_bytes, |
1441 | u64 locked_end, | ||
1442 | u8 compression, u8 encryption, | 1442 | u8 compression, u8 encryption, |
1443 | u16 other_encoding, int extent_type) | 1443 | u16 other_encoding, int extent_type) |
1444 | { | 1444 | { |
@@ -1455,7 +1455,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1455 | 1455 | ||
1456 | path->leave_spinning = 1; | 1456 | path->leave_spinning = 1; |
1457 | ret = btrfs_drop_extents(trans, root, inode, file_pos, | 1457 | ret = btrfs_drop_extents(trans, root, inode, file_pos, |
1458 | file_pos + num_bytes, file_pos, &hint); | 1458 | file_pos + num_bytes, locked_end, |
1459 | file_pos, &hint); | ||
1459 | BUG_ON(ret); | 1460 | BUG_ON(ret); |
1460 | 1461 | ||
1461 | ins.objectid = inode->i_ino; | 1462 | ins.objectid = inode->i_ino; |
@@ -1590,6 +1591,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1590 | ordered_extent->disk_len, | 1591 | ordered_extent->disk_len, |
1591 | ordered_extent->len, | 1592 | ordered_extent->len, |
1592 | ordered_extent->len, | 1593 | ordered_extent->len, |
1594 | ordered_extent->file_offset + | ||
1595 | ordered_extent->len, | ||
1593 | compressed, 0, 0, | 1596 | compressed, 0, 0, |
1594 | BTRFS_FILE_EXTENT_REG); | 1597 | BTRFS_FILE_EXTENT_REG); |
1595 | BUG_ON(ret); | 1598 | BUG_ON(ret); |
@@ -1819,10 +1822,12 @@ good: | |||
1819 | return 0; | 1822 | return 0; |
1820 | 1823 | ||
1821 | zeroit: | 1824 | zeroit: |
1822 | printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " | 1825 | if (printk_ratelimit()) { |
1823 | "private %llu\n", page->mapping->host->i_ino, | 1826 | printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " |
1824 | (unsigned long long)start, csum, | 1827 | "private %llu\n", page->mapping->host->i_ino, |
1825 | (unsigned long long)private); | 1828 | (unsigned long long)start, csum, |
1829 | (unsigned long long)private); | ||
1830 | } | ||
1826 | memset(kaddr + offset, 1, end - start + 1); | 1831 | memset(kaddr + offset, 1, end - start + 1); |
1827 | flush_dcache_page(page); | 1832 | flush_dcache_page(page); |
1828 | kunmap_atomic(kaddr, KM_USER0); | 1833 | kunmap_atomic(kaddr, KM_USER0); |
@@ -2011,6 +2016,57 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2011 | } | 2016 | } |
2012 | 2017 | ||
2013 | /* | 2018 | /* |
2019 | * very simple check to peek ahead in the leaf looking for xattrs. If we | ||
2020 | * don't find any xattrs, we know there can't be any acls. | ||
2021 | * | ||
2022 | * slot is the slot the inode is in, objectid is the objectid of the inode | ||
2023 | */ | ||
2024 | static noinline int acls_after_inode_item(struct extent_buffer *leaf, | ||
2025 | int slot, u64 objectid) | ||
2026 | { | ||
2027 | u32 nritems = btrfs_header_nritems(leaf); | ||
2028 | struct btrfs_key found_key; | ||
2029 | int scanned = 0; | ||
2030 | |||
2031 | slot++; | ||
2032 | while (slot < nritems) { | ||
2033 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
2034 | |||
2035 | /* we found a different objectid, there must not be acls */ | ||
2036 | if (found_key.objectid != objectid) | ||
2037 | return 0; | ||
2038 | |||
2039 | /* we found an xattr, assume we've got an acl */ | ||
2040 | if (found_key.type == BTRFS_XATTR_ITEM_KEY) | ||
2041 | return 1; | ||
2042 | |||
2043 | /* | ||
2044 | * we found a key greater than an xattr key, there can't | ||
2045 | * be any acls later on | ||
2046 | */ | ||
2047 | if (found_key.type > BTRFS_XATTR_ITEM_KEY) | ||
2048 | return 0; | ||
2049 | |||
2050 | slot++; | ||
2051 | scanned++; | ||
2052 | |||
2053 | /* | ||
2054 | * it goes inode, inode backrefs, xattrs, extents, | ||
2055 | * so if there are a ton of hard links to an inode there can | ||
2056 | * be a lot of backrefs. Don't waste time searching too hard, | ||
2057 | * this is just an optimization | ||
2058 | */ | ||
2059 | if (scanned >= 8) | ||
2060 | break; | ||
2061 | } | ||
2062 | /* we hit the end of the leaf before we found an xattr or | ||
2063 | * something larger than an xattr. We have to assume the inode | ||
2064 | * has acls | ||
2065 | */ | ||
2066 | return 1; | ||
2067 | } | ||
2068 | |||
2069 | /* | ||
2014 | * read an inode from the btree into the in-memory inode | 2070 | * read an inode from the btree into the in-memory inode |
2015 | */ | 2071 | */ |
2016 | void btrfs_read_locked_inode(struct inode *inode) | 2072 | void btrfs_read_locked_inode(struct inode *inode) |
@@ -2021,6 +2077,7 @@ void btrfs_read_locked_inode(struct inode *inode) | |||
2021 | struct btrfs_timespec *tspec; | 2077 | struct btrfs_timespec *tspec; |
2022 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2078 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2023 | struct btrfs_key location; | 2079 | struct btrfs_key location; |
2080 | int maybe_acls; | ||
2024 | u64 alloc_group_block; | 2081 | u64 alloc_group_block; |
2025 | u32 rdev; | 2082 | u32 rdev; |
2026 | int ret; | 2083 | int ret; |
@@ -2067,6 +2124,16 @@ void btrfs_read_locked_inode(struct inode *inode) | |||
2067 | 2124 | ||
2068 | alloc_group_block = btrfs_inode_block_group(leaf, inode_item); | 2125 | alloc_group_block = btrfs_inode_block_group(leaf, inode_item); |
2069 | 2126 | ||
2127 | /* | ||
2128 | * try to precache a NULL acl entry for files that don't have | ||
2129 | * any xattrs or acls | ||
2130 | */ | ||
2131 | maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino); | ||
2132 | if (!maybe_acls) { | ||
2133 | BTRFS_I(inode)->i_acl = NULL; | ||
2134 | BTRFS_I(inode)->i_default_acl = NULL; | ||
2135 | } | ||
2136 | |||
2070 | BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, | 2137 | BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, |
2071 | alloc_group_block, 0); | 2138 | alloc_group_block, 0); |
2072 | btrfs_free_path(path); | 2139 | btrfs_free_path(path); |
@@ -2877,6 +2944,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
2877 | err = btrfs_drop_extents(trans, root, inode, | 2944 | err = btrfs_drop_extents(trans, root, inode, |
2878 | cur_offset, | 2945 | cur_offset, |
2879 | cur_offset + hole_size, | 2946 | cur_offset + hole_size, |
2947 | block_end, | ||
2880 | cur_offset, &hint_byte); | 2948 | cur_offset, &hint_byte); |
2881 | if (err) | 2949 | if (err) |
2882 | break; | 2950 | break; |
@@ -3041,8 +3109,8 @@ static noinline void init_btrfs_i(struct inode *inode) | |||
3041 | { | 3109 | { |
3042 | struct btrfs_inode *bi = BTRFS_I(inode); | 3110 | struct btrfs_inode *bi = BTRFS_I(inode); |
3043 | 3111 | ||
3044 | bi->i_acl = NULL; | 3112 | bi->i_acl = BTRFS_ACL_NOT_CACHED; |
3045 | bi->i_default_acl = NULL; | 3113 | bi->i_default_acl = BTRFS_ACL_NOT_CACHED; |
3046 | 3114 | ||
3047 | bi->generation = 0; | 3115 | bi->generation = 0; |
3048 | bi->sequence = 0; | 3116 | bi->sequence = 0; |
@@ -4634,47 +4702,36 @@ void btrfs_destroy_cachep(void) | |||
4634 | kmem_cache_destroy(btrfs_trans_handle_cachep); | 4702 | kmem_cache_destroy(btrfs_trans_handle_cachep); |
4635 | if (btrfs_transaction_cachep) | 4703 | if (btrfs_transaction_cachep) |
4636 | kmem_cache_destroy(btrfs_transaction_cachep); | 4704 | kmem_cache_destroy(btrfs_transaction_cachep); |
4637 | if (btrfs_bit_radix_cachep) | ||
4638 | kmem_cache_destroy(btrfs_bit_radix_cachep); | ||
4639 | if (btrfs_path_cachep) | 4705 | if (btrfs_path_cachep) |
4640 | kmem_cache_destroy(btrfs_path_cachep); | 4706 | kmem_cache_destroy(btrfs_path_cachep); |
4641 | } | 4707 | } |
4642 | 4708 | ||
4643 | struct kmem_cache *btrfs_cache_create(const char *name, size_t size, | ||
4644 | unsigned long extra_flags, | ||
4645 | void (*ctor)(void *)) | ||
4646 | { | ||
4647 | return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT | | ||
4648 | SLAB_MEM_SPREAD | extra_flags), ctor); | ||
4649 | } | ||
4650 | |||
4651 | int btrfs_init_cachep(void) | 4709 | int btrfs_init_cachep(void) |
4652 | { | 4710 | { |
4653 | btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache", | 4711 | btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache", |
4654 | sizeof(struct btrfs_inode), | 4712 | sizeof(struct btrfs_inode), 0, |
4655 | 0, init_once); | 4713 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once); |
4656 | if (!btrfs_inode_cachep) | 4714 | if (!btrfs_inode_cachep) |
4657 | goto fail; | 4715 | goto fail; |
4658 | btrfs_trans_handle_cachep = | 4716 | |
4659 | btrfs_cache_create("btrfs_trans_handle_cache", | 4717 | btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache", |
4660 | sizeof(struct btrfs_trans_handle), | 4718 | sizeof(struct btrfs_trans_handle), 0, |
4661 | 0, NULL); | 4719 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
4662 | if (!btrfs_trans_handle_cachep) | 4720 | if (!btrfs_trans_handle_cachep) |
4663 | goto fail; | 4721 | goto fail; |
4664 | btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache", | 4722 | |
4665 | sizeof(struct btrfs_transaction), | 4723 | btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache", |
4666 | 0, NULL); | 4724 | sizeof(struct btrfs_transaction), 0, |
4725 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | ||
4667 | if (!btrfs_transaction_cachep) | 4726 | if (!btrfs_transaction_cachep) |
4668 | goto fail; | 4727 | goto fail; |
4669 | btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache", | 4728 | |
4670 | sizeof(struct btrfs_path), | 4729 | btrfs_path_cachep = kmem_cache_create("btrfs_path_cache", |
4671 | 0, NULL); | 4730 | sizeof(struct btrfs_path), 0, |
4731 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | ||
4672 | if (!btrfs_path_cachep) | 4732 | if (!btrfs_path_cachep) |
4673 | goto fail; | 4733 | goto fail; |
4674 | btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256, | 4734 | |
4675 | SLAB_DESTROY_BY_RCU, NULL); | ||
4676 | if (!btrfs_bit_radix_cachep) | ||
4677 | goto fail; | ||
4678 | return 0; | 4735 | return 0; |
4679 | fail: | 4736 | fail: |
4680 | btrfs_destroy_cachep(); | 4737 | btrfs_destroy_cachep(); |
@@ -4970,10 +5027,10 @@ out_fail: | |||
4970 | return err; | 5027 | return err; |
4971 | } | 5028 | } |
4972 | 5029 | ||
4973 | static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | 5030 | static int prealloc_file_range(struct btrfs_trans_handle *trans, |
4974 | u64 alloc_hint, int mode) | 5031 | struct inode *inode, u64 start, u64 end, |
5032 | u64 locked_end, u64 alloc_hint, int mode) | ||
4975 | { | 5033 | { |
4976 | struct btrfs_trans_handle *trans; | ||
4977 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5034 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4978 | struct btrfs_key ins; | 5035 | struct btrfs_key ins; |
4979 | u64 alloc_size; | 5036 | u64 alloc_size; |
@@ -4981,10 +5038,6 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | |||
4981 | u64 num_bytes = end - start; | 5038 | u64 num_bytes = end - start; |
4982 | int ret = 0; | 5039 | int ret = 0; |
4983 | 5040 | ||
4984 | trans = btrfs_join_transaction(root, 1); | ||
4985 | BUG_ON(!trans); | ||
4986 | btrfs_set_trans_block_group(trans, inode); | ||
4987 | |||
4988 | while (num_bytes > 0) { | 5041 | while (num_bytes > 0) { |
4989 | alloc_size = min(num_bytes, root->fs_info->max_extent); | 5042 | alloc_size = min(num_bytes, root->fs_info->max_extent); |
4990 | ret = btrfs_reserve_extent(trans, root, alloc_size, | 5043 | ret = btrfs_reserve_extent(trans, root, alloc_size, |
@@ -4997,7 +5050,8 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | |||
4997 | ret = insert_reserved_file_extent(trans, inode, | 5050 | ret = insert_reserved_file_extent(trans, inode, |
4998 | cur_offset, ins.objectid, | 5051 | cur_offset, ins.objectid, |
4999 | ins.offset, ins.offset, | 5052 | ins.offset, ins.offset, |
5000 | ins.offset, 0, 0, 0, | 5053 | ins.offset, locked_end, |
5054 | 0, 0, 0, | ||
5001 | BTRFS_FILE_EXTENT_PREALLOC); | 5055 | BTRFS_FILE_EXTENT_PREALLOC); |
5002 | BUG_ON(ret); | 5056 | BUG_ON(ret); |
5003 | num_bytes -= ins.offset; | 5057 | num_bytes -= ins.offset; |
@@ -5015,7 +5069,6 @@ out: | |||
5015 | BUG_ON(ret); | 5069 | BUG_ON(ret); |
5016 | } | 5070 | } |
5017 | 5071 | ||
5018 | btrfs_end_transaction(trans, root); | ||
5019 | return ret; | 5072 | return ret; |
5020 | } | 5073 | } |
5021 | 5074 | ||
@@ -5027,13 +5080,21 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5027 | u64 alloc_start; | 5080 | u64 alloc_start; |
5028 | u64 alloc_end; | 5081 | u64 alloc_end; |
5029 | u64 alloc_hint = 0; | 5082 | u64 alloc_hint = 0; |
5083 | u64 locked_end; | ||
5030 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | 5084 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; |
5031 | struct extent_map *em; | 5085 | struct extent_map *em; |
5086 | struct btrfs_trans_handle *trans; | ||
5032 | int ret; | 5087 | int ret; |
5033 | 5088 | ||
5034 | alloc_start = offset & ~mask; | 5089 | alloc_start = offset & ~mask; |
5035 | alloc_end = (offset + len + mask) & ~mask; | 5090 | alloc_end = (offset + len + mask) & ~mask; |
5036 | 5091 | ||
5092 | /* | ||
5093 | * wait for ordered IO before we have any locks. We'll loop again | ||
5094 | * below with the locks held. | ||
5095 | */ | ||
5096 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
5097 | |||
5037 | mutex_lock(&inode->i_mutex); | 5098 | mutex_lock(&inode->i_mutex); |
5038 | if (alloc_start > inode->i_size) { | 5099 | if (alloc_start > inode->i_size) { |
5039 | ret = btrfs_cont_expand(inode, alloc_start); | 5100 | ret = btrfs_cont_expand(inode, alloc_start); |
@@ -5041,10 +5102,21 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5041 | goto out; | 5102 | goto out; |
5042 | } | 5103 | } |
5043 | 5104 | ||
5105 | locked_end = alloc_end - 1; | ||
5044 | while (1) { | 5106 | while (1) { |
5045 | struct btrfs_ordered_extent *ordered; | 5107 | struct btrfs_ordered_extent *ordered; |
5046 | lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, | 5108 | |
5047 | alloc_end - 1, GFP_NOFS); | 5109 | trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); |
5110 | if (!trans) { | ||
5111 | ret = -EIO; | ||
5112 | goto out; | ||
5113 | } | ||
5114 | |||
5115 | /* the extent lock is ordered inside the running | ||
5116 | * transaction | ||
5117 | */ | ||
5118 | lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | ||
5119 | GFP_NOFS); | ||
5048 | ordered = btrfs_lookup_first_ordered_extent(inode, | 5120 | ordered = btrfs_lookup_first_ordered_extent(inode, |
5049 | alloc_end - 1); | 5121 | alloc_end - 1); |
5050 | if (ordered && | 5122 | if (ordered && |
@@ -5052,7 +5124,13 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5052 | ordered->file_offset < alloc_end) { | 5124 | ordered->file_offset < alloc_end) { |
5053 | btrfs_put_ordered_extent(ordered); | 5125 | btrfs_put_ordered_extent(ordered); |
5054 | unlock_extent(&BTRFS_I(inode)->io_tree, | 5126 | unlock_extent(&BTRFS_I(inode)->io_tree, |
5055 | alloc_start, alloc_end - 1, GFP_NOFS); | 5127 | alloc_start, locked_end, GFP_NOFS); |
5128 | btrfs_end_transaction(trans, BTRFS_I(inode)->root); | ||
5129 | |||
5130 | /* | ||
5131 | * we can't wait on the range with the transaction | ||
5132 | * running or with the extent lock held | ||
5133 | */ | ||
5056 | btrfs_wait_ordered_range(inode, alloc_start, | 5134 | btrfs_wait_ordered_range(inode, alloc_start, |
5057 | alloc_end - alloc_start); | 5135 | alloc_end - alloc_start); |
5058 | } else { | 5136 | } else { |
@@ -5070,8 +5148,9 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5070 | last_byte = min(extent_map_end(em), alloc_end); | 5148 | last_byte = min(extent_map_end(em), alloc_end); |
5071 | last_byte = (last_byte + mask) & ~mask; | 5149 | last_byte = (last_byte + mask) & ~mask; |
5072 | if (em->block_start == EXTENT_MAP_HOLE) { | 5150 | if (em->block_start == EXTENT_MAP_HOLE) { |
5073 | ret = prealloc_file_range(inode, cur_offset, | 5151 | ret = prealloc_file_range(trans, inode, cur_offset, |
5074 | last_byte, alloc_hint, mode); | 5152 | last_byte, locked_end + 1, |
5153 | alloc_hint, mode); | ||
5075 | if (ret < 0) { | 5154 | if (ret < 0) { |
5076 | free_extent_map(em); | 5155 | free_extent_map(em); |
5077 | break; | 5156 | break; |
@@ -5087,8 +5166,10 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5087 | break; | 5166 | break; |
5088 | } | 5167 | } |
5089 | } | 5168 | } |
5090 | unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, | 5169 | unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, |
5091 | GFP_NOFS); | 5170 | GFP_NOFS); |
5171 | |||
5172 | btrfs_end_transaction(trans, BTRFS_I(inode)->root); | ||
5092 | out: | 5173 | out: |
5093 | mutex_unlock(&inode->i_mutex); | 5174 | mutex_unlock(&inode->i_mutex); |
5094 | return ret; | 5175 | return ret; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7594bec1be10..5e94ea6e1cbe 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -461,15 +461,9 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) | |||
461 | if (!capable(CAP_SYS_ADMIN)) | 461 | if (!capable(CAP_SYS_ADMIN)) |
462 | return -EPERM; | 462 | return -EPERM; |
463 | 463 | ||
464 | vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); | 464 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
465 | 465 | if (IS_ERR(vol_args)) | |
466 | if (!vol_args) | 466 | return PTR_ERR(vol_args); |
467 | return -ENOMEM; | ||
468 | |||
469 | if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { | ||
470 | ret = -EFAULT; | ||
471 | goto out; | ||
472 | } | ||
473 | 467 | ||
474 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 468 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
475 | namelen = strlen(vol_args->name); | 469 | namelen = strlen(vol_args->name); |
@@ -483,11 +477,13 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) | |||
483 | *devstr = '\0'; | 477 | *devstr = '\0'; |
484 | devstr = vol_args->name; | 478 | devstr = vol_args->name; |
485 | devid = simple_strtoull(devstr, &end, 10); | 479 | devid = simple_strtoull(devstr, &end, 10); |
486 | printk(KERN_INFO "resizing devid %llu\n", devid); | 480 | printk(KERN_INFO "resizing devid %llu\n", |
481 | (unsigned long long)devid); | ||
487 | } | 482 | } |
488 | device = btrfs_find_device(root, devid, NULL, NULL); | 483 | device = btrfs_find_device(root, devid, NULL, NULL); |
489 | if (!device) { | 484 | if (!device) { |
490 | printk(KERN_INFO "resizer unable to find device %llu\n", devid); | 485 | printk(KERN_INFO "resizer unable to find device %llu\n", |
486 | (unsigned long long)devid); | ||
491 | ret = -EINVAL; | 487 | ret = -EINVAL; |
492 | goto out_unlock; | 488 | goto out_unlock; |
493 | } | 489 | } |
@@ -545,7 +541,6 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) | |||
545 | 541 | ||
546 | out_unlock: | 542 | out_unlock: |
547 | mutex_unlock(&root->fs_info->volume_mutex); | 543 | mutex_unlock(&root->fs_info->volume_mutex); |
548 | out: | ||
549 | kfree(vol_args); | 544 | kfree(vol_args); |
550 | return ret; | 545 | return ret; |
551 | } | 546 | } |
@@ -565,15 +560,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
565 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 560 | if (root->fs_info->sb->s_flags & MS_RDONLY) |
566 | return -EROFS; | 561 | return -EROFS; |
567 | 562 | ||
568 | vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); | 563 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
569 | 564 | if (IS_ERR(vol_args)) | |
570 | if (!vol_args) | 565 | return PTR_ERR(vol_args); |
571 | return -ENOMEM; | ||
572 | |||
573 | if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { | ||
574 | ret = -EFAULT; | ||
575 | goto out; | ||
576 | } | ||
577 | 566 | ||
578 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 567 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
579 | namelen = strlen(vol_args->name); | 568 | namelen = strlen(vol_args->name); |
@@ -675,19 +664,13 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) | |||
675 | if (!capable(CAP_SYS_ADMIN)) | 664 | if (!capable(CAP_SYS_ADMIN)) |
676 | return -EPERM; | 665 | return -EPERM; |
677 | 666 | ||
678 | vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); | 667 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
668 | if (IS_ERR(vol_args)) | ||
669 | return PTR_ERR(vol_args); | ||
679 | 670 | ||
680 | if (!vol_args) | ||
681 | return -ENOMEM; | ||
682 | |||
683 | if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { | ||
684 | ret = -EFAULT; | ||
685 | goto out; | ||
686 | } | ||
687 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 671 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
688 | ret = btrfs_init_new_device(root, vol_args->name); | 672 | ret = btrfs_init_new_device(root, vol_args->name); |
689 | 673 | ||
690 | out: | ||
691 | kfree(vol_args); | 674 | kfree(vol_args); |
692 | return ret; | 675 | return ret; |
693 | } | 676 | } |
@@ -703,19 +686,13 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) | |||
703 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 686 | if (root->fs_info->sb->s_flags & MS_RDONLY) |
704 | return -EROFS; | 687 | return -EROFS; |
705 | 688 | ||
706 | vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); | 689 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
690 | if (IS_ERR(vol_args)) | ||
691 | return PTR_ERR(vol_args); | ||
707 | 692 | ||
708 | if (!vol_args) | ||
709 | return -ENOMEM; | ||
710 | |||
711 | if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { | ||
712 | ret = -EFAULT; | ||
713 | goto out; | ||
714 | } | ||
715 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 693 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
716 | ret = btrfs_rm_device(root, vol_args->name); | 694 | ret = btrfs_rm_device(root, vol_args->name); |
717 | 695 | ||
718 | out: | ||
719 | kfree(vol_args); | 696 | kfree(vol_args); |
720 | return ret; | 697 | return ret; |
721 | } | 698 | } |
@@ -830,7 +807,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
830 | BUG_ON(!trans); | 807 | BUG_ON(!trans); |
831 | 808 | ||
832 | /* punch hole in destination first */ | 809 | /* punch hole in destination first */ |
833 | btrfs_drop_extents(trans, root, inode, off, off+len, 0, &hint_byte); | 810 | btrfs_drop_extents(trans, root, inode, off, off + len, |
811 | off + len, 0, &hint_byte); | ||
834 | 812 | ||
835 | /* clone data */ | 813 | /* clone data */ |
836 | key.objectid = src->i_ino; | 814 | key.objectid = src->i_ino; |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 53c87b197d70..d6f0806c682f 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -489,7 +489,7 @@ again: | |||
489 | /* start IO across the range first to instantiate any delalloc | 489 | /* start IO across the range first to instantiate any delalloc |
490 | * extents | 490 | * extents |
491 | */ | 491 | */ |
492 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE); | 492 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); |
493 | 493 | ||
494 | /* The compression code will leave pages locked but return from | 494 | /* The compression code will leave pages locked but return from |
495 | * writepage without setting the page writeback. Starting again | 495 | * writepage without setting the page writeback. Starting again |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9744af9d71e9..3536bdb2d7cb 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -68,7 +68,7 @@ enum { | |||
68 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, | 68 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, |
69 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, | 69 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, |
70 | Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog, | 70 | Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog, |
71 | Opt_flushoncommit, Opt_err, | 71 | Opt_ratio, Opt_flushoncommit, Opt_err, |
72 | }; | 72 | }; |
73 | 73 | ||
74 | static match_table_t tokens = { | 74 | static match_table_t tokens = { |
@@ -87,6 +87,7 @@ static match_table_t tokens = { | |||
87 | {Opt_noacl, "noacl"}, | 87 | {Opt_noacl, "noacl"}, |
88 | {Opt_notreelog, "notreelog"}, | 88 | {Opt_notreelog, "notreelog"}, |
89 | {Opt_flushoncommit, "flushoncommit"}, | 89 | {Opt_flushoncommit, "flushoncommit"}, |
90 | {Opt_ratio, "metadata_ratio=%d"}, | ||
90 | {Opt_err, NULL}, | 91 | {Opt_err, NULL}, |
91 | }; | 92 | }; |
92 | 93 | ||
@@ -195,7 +196,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
195 | info->max_extent = max_t(u64, | 196 | info->max_extent = max_t(u64, |
196 | info->max_extent, root->sectorsize); | 197 | info->max_extent, root->sectorsize); |
197 | printk(KERN_INFO "btrfs: max_extent at %llu\n", | 198 | printk(KERN_INFO "btrfs: max_extent at %llu\n", |
198 | info->max_extent); | 199 | (unsigned long long)info->max_extent); |
199 | } | 200 | } |
200 | break; | 201 | break; |
201 | case Opt_max_inline: | 202 | case Opt_max_inline: |
@@ -210,7 +211,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
210 | root->sectorsize); | 211 | root->sectorsize); |
211 | } | 212 | } |
212 | printk(KERN_INFO "btrfs: max_inline at %llu\n", | 213 | printk(KERN_INFO "btrfs: max_inline at %llu\n", |
213 | info->max_inline); | 214 | (unsigned long long)info->max_inline); |
214 | } | 215 | } |
215 | break; | 216 | break; |
216 | case Opt_alloc_start: | 217 | case Opt_alloc_start: |
@@ -220,7 +221,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
220 | kfree(num); | 221 | kfree(num); |
221 | printk(KERN_INFO | 222 | printk(KERN_INFO |
222 | "btrfs: allocations start at %llu\n", | 223 | "btrfs: allocations start at %llu\n", |
223 | info->alloc_start); | 224 | (unsigned long long)info->alloc_start); |
224 | } | 225 | } |
225 | break; | 226 | break; |
226 | case Opt_noacl: | 227 | case Opt_noacl: |
@@ -234,6 +235,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
234 | printk(KERN_INFO "btrfs: turning on flush-on-commit\n"); | 235 | printk(KERN_INFO "btrfs: turning on flush-on-commit\n"); |
235 | btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT); | 236 | btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT); |
236 | break; | 237 | break; |
238 | case Opt_ratio: | ||
239 | intarg = 0; | ||
240 | match_int(&args[0], &intarg); | ||
241 | if (intarg) { | ||
242 | info->metadata_ratio = intarg; | ||
243 | printk(KERN_INFO "btrfs: metadata ratio %d\n", | ||
244 | info->metadata_ratio); | ||
245 | } | ||
246 | break; | ||
237 | default: | 247 | default: |
238 | break; | 248 | break; |
239 | } | 249 | } |
@@ -410,11 +420,14 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
410 | if (btrfs_test_opt(root, NOBARRIER)) | 420 | if (btrfs_test_opt(root, NOBARRIER)) |
411 | seq_puts(seq, ",nobarrier"); | 421 | seq_puts(seq, ",nobarrier"); |
412 | if (info->max_extent != (u64)-1) | 422 | if (info->max_extent != (u64)-1) |
413 | seq_printf(seq, ",max_extent=%llu", info->max_extent); | 423 | seq_printf(seq, ",max_extent=%llu", |
424 | (unsigned long long)info->max_extent); | ||
414 | if (info->max_inline != 8192 * 1024) | 425 | if (info->max_inline != 8192 * 1024) |
415 | seq_printf(seq, ",max_inline=%llu", info->max_inline); | 426 | seq_printf(seq, ",max_inline=%llu", |
427 | (unsigned long long)info->max_inline); | ||
416 | if (info->alloc_start != 0) | 428 | if (info->alloc_start != 0) |
417 | seq_printf(seq, ",alloc_start=%llu", info->alloc_start); | 429 | seq_printf(seq, ",alloc_start=%llu", |
430 | (unsigned long long)info->alloc_start); | ||
418 | if (info->thread_pool_size != min_t(unsigned long, | 431 | if (info->thread_pool_size != min_t(unsigned long, |
419 | num_online_cpus() + 2, 8)) | 432 | num_online_cpus() + 2, 8)) |
420 | seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); | 433 | seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); |
@@ -635,14 +648,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, | |||
635 | if (!capable(CAP_SYS_ADMIN)) | 648 | if (!capable(CAP_SYS_ADMIN)) |
636 | return -EPERM; | 649 | return -EPERM; |
637 | 650 | ||
638 | vol = kmalloc(sizeof(*vol), GFP_KERNEL); | 651 | vol = memdup_user((void __user *)arg, sizeof(*vol)); |
639 | if (!vol) | 652 | if (IS_ERR(vol)) |
640 | return -ENOMEM; | 653 | return PTR_ERR(vol); |
641 | |||
642 | if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) { | ||
643 | ret = -EFAULT; | ||
644 | goto out; | ||
645 | } | ||
646 | 654 | ||
647 | switch (cmd) { | 655 | switch (cmd) { |
648 | case BTRFS_IOC_SCAN_DEV: | 656 | case BTRFS_IOC_SCAN_DEV: |
@@ -650,7 +658,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, | |||
650 | &btrfs_fs_type, &fs_devices); | 658 | &btrfs_fs_type, &fs_devices); |
651 | break; | 659 | break; |
652 | } | 660 | } |
653 | out: | 661 | |
654 | kfree(vol); | 662 | kfree(vol); |
655 | return ret; | 663 | return ret; |
656 | } | 664 | } |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2869b3361eb6..01b143605ec1 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -687,7 +687,13 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info) | |||
687 | prepare_to_wait(&info->transaction_wait, &wait, | 687 | prepare_to_wait(&info->transaction_wait, &wait, |
688 | TASK_UNINTERRUPTIBLE); | 688 | TASK_UNINTERRUPTIBLE); |
689 | mutex_unlock(&info->trans_mutex); | 689 | mutex_unlock(&info->trans_mutex); |
690 | |||
691 | atomic_dec(&info->throttles); | ||
692 | wake_up(&info->transaction_throttle); | ||
693 | |||
690 | schedule(); | 694 | schedule(); |
695 | |||
696 | atomic_inc(&info->throttles); | ||
691 | mutex_lock(&info->trans_mutex); | 697 | mutex_lock(&info->trans_mutex); |
692 | finish_wait(&info->transaction_wait, &wait); | 698 | finish_wait(&info->transaction_wait, &wait); |
693 | } | 699 | } |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 25f20ea11f27..db5e212e8445 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -536,7 +536,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
536 | saved_nbytes = inode_get_bytes(inode); | 536 | saved_nbytes = inode_get_bytes(inode); |
537 | /* drop any overlapping extents */ | 537 | /* drop any overlapping extents */ |
538 | ret = btrfs_drop_extents(trans, root, inode, | 538 | ret = btrfs_drop_extents(trans, root, inode, |
539 | start, extent_end, start, &alloc_hint); | 539 | start, extent_end, extent_end, start, &alloc_hint); |
540 | BUG_ON(ret); | 540 | BUG_ON(ret); |
541 | 541 | ||
542 | if (found_type == BTRFS_FILE_EXTENT_REG || | 542 | if (found_type == BTRFS_FILE_EXTENT_REG || |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e0913e469728..5f01dad4b696 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -125,6 +125,20 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid) | |||
125 | return NULL; | 125 | return NULL; |
126 | } | 126 | } |
127 | 127 | ||
128 | static void requeue_list(struct btrfs_pending_bios *pending_bios, | ||
129 | struct bio *head, struct bio *tail) | ||
130 | { | ||
131 | |||
132 | struct bio *old_head; | ||
133 | |||
134 | old_head = pending_bios->head; | ||
135 | pending_bios->head = head; | ||
136 | if (pending_bios->tail) | ||
137 | tail->bi_next = old_head; | ||
138 | else | ||
139 | pending_bios->tail = tail; | ||
140 | } | ||
141 | |||
128 | /* | 142 | /* |
129 | * we try to collect pending bios for a device so we don't get a large | 143 | * we try to collect pending bios for a device so we don't get a large |
130 | * number of procs sending bios down to the same device. This greatly | 144 | * number of procs sending bios down to the same device. This greatly |
@@ -141,10 +155,12 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
141 | struct bio *pending; | 155 | struct bio *pending; |
142 | struct backing_dev_info *bdi; | 156 | struct backing_dev_info *bdi; |
143 | struct btrfs_fs_info *fs_info; | 157 | struct btrfs_fs_info *fs_info; |
158 | struct btrfs_pending_bios *pending_bios; | ||
144 | struct bio *tail; | 159 | struct bio *tail; |
145 | struct bio *cur; | 160 | struct bio *cur; |
146 | int again = 0; | 161 | int again = 0; |
147 | unsigned long num_run = 0; | 162 | unsigned long num_run; |
163 | unsigned long num_sync_run; | ||
148 | unsigned long limit; | 164 | unsigned long limit; |
149 | unsigned long last_waited = 0; | 165 | unsigned long last_waited = 0; |
150 | 166 | ||
@@ -153,20 +169,30 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
153 | limit = btrfs_async_submit_limit(fs_info); | 169 | limit = btrfs_async_submit_limit(fs_info); |
154 | limit = limit * 2 / 3; | 170 | limit = limit * 2 / 3; |
155 | 171 | ||
172 | /* we want to make sure that every time we switch from the sync | ||
173 | * list to the normal list, we unplug | ||
174 | */ | ||
175 | num_sync_run = 0; | ||
176 | |||
156 | loop: | 177 | loop: |
157 | spin_lock(&device->io_lock); | 178 | spin_lock(&device->io_lock); |
179 | num_run = 0; | ||
158 | 180 | ||
159 | loop_lock: | 181 | loop_lock: |
182 | |||
160 | /* take all the bios off the list at once and process them | 183 | /* take all the bios off the list at once and process them |
161 | * later on (without the lock held). But, remember the | 184 | * later on (without the lock held). But, remember the |
162 | * tail and other pointers so the bios can be properly reinserted | 185 | * tail and other pointers so the bios can be properly reinserted |
163 | * into the list if we hit congestion | 186 | * into the list if we hit congestion |
164 | */ | 187 | */ |
165 | pending = device->pending_bios; | 188 | if (device->pending_sync_bios.head) |
166 | tail = device->pending_bio_tail; | 189 | pending_bios = &device->pending_sync_bios; |
190 | else | ||
191 | pending_bios = &device->pending_bios; | ||
192 | |||
193 | pending = pending_bios->head; | ||
194 | tail = pending_bios->tail; | ||
167 | WARN_ON(pending && !tail); | 195 | WARN_ON(pending && !tail); |
168 | device->pending_bios = NULL; | ||
169 | device->pending_bio_tail = NULL; | ||
170 | 196 | ||
171 | /* | 197 | /* |
172 | * if pending was null this time around, no bios need processing | 198 | * if pending was null this time around, no bios need processing |
@@ -176,16 +202,41 @@ loop_lock: | |||
176 | * device->running_pending is used to synchronize with the | 202 | * device->running_pending is used to synchronize with the |
177 | * schedule_bio code. | 203 | * schedule_bio code. |
178 | */ | 204 | */ |
179 | if (pending) { | 205 | if (device->pending_sync_bios.head == NULL && |
180 | again = 1; | 206 | device->pending_bios.head == NULL) { |
181 | device->running_pending = 1; | ||
182 | } else { | ||
183 | again = 0; | 207 | again = 0; |
184 | device->running_pending = 0; | 208 | device->running_pending = 0; |
209 | } else { | ||
210 | again = 1; | ||
211 | device->running_pending = 1; | ||
185 | } | 212 | } |
213 | |||
214 | pending_bios->head = NULL; | ||
215 | pending_bios->tail = NULL; | ||
216 | |||
186 | spin_unlock(&device->io_lock); | 217 | spin_unlock(&device->io_lock); |
187 | 218 | ||
219 | /* | ||
220 | * if we're doing the regular priority list, make sure we unplug | ||
221 | * for any high prio bios we've sent down | ||
222 | */ | ||
223 | if (pending_bios == &device->pending_bios && num_sync_run > 0) { | ||
224 | num_sync_run = 0; | ||
225 | blk_run_backing_dev(bdi, NULL); | ||
226 | } | ||
227 | |||
188 | while (pending) { | 228 | while (pending) { |
229 | |||
230 | rmb(); | ||
231 | if (pending_bios != &device->pending_sync_bios && | ||
232 | device->pending_sync_bios.head && | ||
233 | num_run > 16) { | ||
234 | cond_resched(); | ||
235 | spin_lock(&device->io_lock); | ||
236 | requeue_list(pending_bios, pending, tail); | ||
237 | goto loop_lock; | ||
238 | } | ||
239 | |||
189 | cur = pending; | 240 | cur = pending; |
190 | pending = pending->bi_next; | 241 | pending = pending->bi_next; |
191 | cur->bi_next = NULL; | 242 | cur->bi_next = NULL; |
@@ -196,10 +247,18 @@ loop_lock: | |||
196 | wake_up(&fs_info->async_submit_wait); | 247 | wake_up(&fs_info->async_submit_wait); |
197 | 248 | ||
198 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); | 249 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); |
199 | bio_get(cur); | ||
200 | submit_bio(cur->bi_rw, cur); | 250 | submit_bio(cur->bi_rw, cur); |
201 | bio_put(cur); | ||
202 | num_run++; | 251 | num_run++; |
252 | if (bio_sync(cur)) | ||
253 | num_sync_run++; | ||
254 | |||
255 | if (need_resched()) { | ||
256 | if (num_sync_run) { | ||
257 | blk_run_backing_dev(bdi, NULL); | ||
258 | num_sync_run = 0; | ||
259 | } | ||
260 | cond_resched(); | ||
261 | } | ||
203 | 262 | ||
204 | /* | 263 | /* |
205 | * we made progress, there is more work to do and the bdi | 264 | * we made progress, there is more work to do and the bdi |
@@ -208,7 +267,6 @@ loop_lock: | |||
208 | */ | 267 | */ |
209 | if (pending && bdi_write_congested(bdi) && num_run > 16 && | 268 | if (pending && bdi_write_congested(bdi) && num_run > 16 && |
210 | fs_info->fs_devices->open_devices > 1) { | 269 | fs_info->fs_devices->open_devices > 1) { |
211 | struct bio *old_head; | ||
212 | struct io_context *ioc; | 270 | struct io_context *ioc; |
213 | 271 | ||
214 | ioc = current->io_context; | 272 | ioc = current->io_context; |
@@ -233,17 +291,17 @@ loop_lock: | |||
233 | * against it before looping | 291 | * against it before looping |
234 | */ | 292 | */ |
235 | last_waited = ioc->last_waited; | 293 | last_waited = ioc->last_waited; |
294 | if (need_resched()) { | ||
295 | if (num_sync_run) { | ||
296 | blk_run_backing_dev(bdi, NULL); | ||
297 | num_sync_run = 0; | ||
298 | } | ||
299 | cond_resched(); | ||
300 | } | ||
236 | continue; | 301 | continue; |
237 | } | 302 | } |
238 | spin_lock(&device->io_lock); | 303 | spin_lock(&device->io_lock); |
239 | 304 | requeue_list(pending_bios, pending, tail); | |
240 | old_head = device->pending_bios; | ||
241 | device->pending_bios = pending; | ||
242 | if (device->pending_bio_tail) | ||
243 | tail->bi_next = old_head; | ||
244 | else | ||
245 | device->pending_bio_tail = tail; | ||
246 | |||
247 | device->running_pending = 1; | 305 | device->running_pending = 1; |
248 | 306 | ||
249 | spin_unlock(&device->io_lock); | 307 | spin_unlock(&device->io_lock); |
@@ -251,11 +309,18 @@ loop_lock: | |||
251 | goto done; | 309 | goto done; |
252 | } | 310 | } |
253 | } | 311 | } |
312 | |||
313 | if (num_sync_run) { | ||
314 | num_sync_run = 0; | ||
315 | blk_run_backing_dev(bdi, NULL); | ||
316 | } | ||
317 | |||
318 | cond_resched(); | ||
254 | if (again) | 319 | if (again) |
255 | goto loop; | 320 | goto loop; |
256 | 321 | ||
257 | spin_lock(&device->io_lock); | 322 | spin_lock(&device->io_lock); |
258 | if (device->pending_bios) | 323 | if (device->pending_bios.head || device->pending_sync_bios.head) |
259 | goto loop_lock; | 324 | goto loop_lock; |
260 | spin_unlock(&device->io_lock); | 325 | spin_unlock(&device->io_lock); |
261 | 326 | ||
@@ -1478,7 +1543,7 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, | |||
1478 | btrfs_set_device_io_align(leaf, dev_item, device->io_align); | 1543 | btrfs_set_device_io_align(leaf, dev_item, device->io_align); |
1479 | btrfs_set_device_io_width(leaf, dev_item, device->io_width); | 1544 | btrfs_set_device_io_width(leaf, dev_item, device->io_width); |
1480 | btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); | 1545 | btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); |
1481 | btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); | 1546 | btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes); |
1482 | btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); | 1547 | btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); |
1483 | btrfs_mark_buffer_dirty(leaf); | 1548 | btrfs_mark_buffer_dirty(leaf); |
1484 | 1549 | ||
@@ -1875,14 +1940,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
1875 | device->total_bytes = new_size; | 1940 | device->total_bytes = new_size; |
1876 | if (device->writeable) | 1941 | if (device->writeable) |
1877 | device->fs_devices->total_rw_bytes -= diff; | 1942 | device->fs_devices->total_rw_bytes -= diff; |
1878 | ret = btrfs_update_device(trans, device); | ||
1879 | if (ret) { | ||
1880 | unlock_chunks(root); | ||
1881 | btrfs_end_transaction(trans, root); | ||
1882 | goto done; | ||
1883 | } | ||
1884 | WARN_ON(diff > old_total); | ||
1885 | btrfs_set_super_total_bytes(super_copy, old_total - diff); | ||
1886 | unlock_chunks(root); | 1943 | unlock_chunks(root); |
1887 | btrfs_end_transaction(trans, root); | 1944 | btrfs_end_transaction(trans, root); |
1888 | 1945 | ||
@@ -1914,7 +1971,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
1914 | length = btrfs_dev_extent_length(l, dev_extent); | 1971 | length = btrfs_dev_extent_length(l, dev_extent); |
1915 | 1972 | ||
1916 | if (key.offset + length <= new_size) | 1973 | if (key.offset + length <= new_size) |
1917 | goto done; | 1974 | break; |
1918 | 1975 | ||
1919 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); | 1976 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); |
1920 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); | 1977 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); |
@@ -1927,6 +1984,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
1927 | goto done; | 1984 | goto done; |
1928 | } | 1985 | } |
1929 | 1986 | ||
1987 | /* Shrinking succeeded, else we would be at "done". */ | ||
1988 | trans = btrfs_start_transaction(root, 1); | ||
1989 | if (!trans) { | ||
1990 | ret = -ENOMEM; | ||
1991 | goto done; | ||
1992 | } | ||
1993 | lock_chunks(root); | ||
1994 | |||
1995 | device->disk_total_bytes = new_size; | ||
1996 | /* Now btrfs_update_device() will change the on-disk size. */ | ||
1997 | ret = btrfs_update_device(trans, device); | ||
1998 | if (ret) { | ||
1999 | unlock_chunks(root); | ||
2000 | btrfs_end_transaction(trans, root); | ||
2001 | goto done; | ||
2002 | } | ||
2003 | WARN_ON(diff > old_total); | ||
2004 | btrfs_set_super_total_bytes(super_copy, old_total - diff); | ||
2005 | unlock_chunks(root); | ||
2006 | btrfs_end_transaction(trans, root); | ||
1930 | done: | 2007 | done: |
1931 | btrfs_free_path(path); | 2008 | btrfs_free_path(path); |
1932 | return ret; | 2009 | return ret; |
@@ -2497,7 +2574,7 @@ again: | |||
2497 | max_errors = 1; | 2574 | max_errors = 1; |
2498 | } | 2575 | } |
2499 | } | 2576 | } |
2500 | if (multi_ret && rw == WRITE && | 2577 | if (multi_ret && (rw & (1 << BIO_RW)) && |
2501 | stripes_allocated < stripes_required) { | 2578 | stripes_allocated < stripes_required) { |
2502 | stripes_allocated = map->num_stripes; | 2579 | stripes_allocated = map->num_stripes; |
2503 | free_extent_map(em); | 2580 | free_extent_map(em); |
@@ -2762,6 +2839,7 @@ static noinline int schedule_bio(struct btrfs_root *root, | |||
2762 | int rw, struct bio *bio) | 2839 | int rw, struct bio *bio) |
2763 | { | 2840 | { |
2764 | int should_queue = 1; | 2841 | int should_queue = 1; |
2842 | struct btrfs_pending_bios *pending_bios; | ||
2765 | 2843 | ||
2766 | /* don't bother with additional async steps for reads, right now */ | 2844 | /* don't bother with additional async steps for reads, right now */ |
2767 | if (!(rw & (1 << BIO_RW))) { | 2845 | if (!(rw & (1 << BIO_RW))) { |
@@ -2783,13 +2861,17 @@ static noinline int schedule_bio(struct btrfs_root *root, | |||
2783 | bio->bi_rw |= rw; | 2861 | bio->bi_rw |= rw; |
2784 | 2862 | ||
2785 | spin_lock(&device->io_lock); | 2863 | spin_lock(&device->io_lock); |
2864 | if (bio_sync(bio)) | ||
2865 | pending_bios = &device->pending_sync_bios; | ||
2866 | else | ||
2867 | pending_bios = &device->pending_bios; | ||
2786 | 2868 | ||
2787 | if (device->pending_bio_tail) | 2869 | if (pending_bios->tail) |
2788 | device->pending_bio_tail->bi_next = bio; | 2870 | pending_bios->tail->bi_next = bio; |
2789 | 2871 | ||
2790 | device->pending_bio_tail = bio; | 2872 | pending_bios->tail = bio; |
2791 | if (!device->pending_bios) | 2873 | if (!pending_bios->head) |
2792 | device->pending_bios = bio; | 2874 | pending_bios->head = bio; |
2793 | if (device->running_pending) | 2875 | if (device->running_pending) |
2794 | should_queue = 0; | 2876 | should_queue = 0; |
2795 | 2877 | ||
@@ -3006,7 +3088,8 @@ static int fill_device_from_item(struct extent_buffer *leaf, | |||
3006 | unsigned long ptr; | 3088 | unsigned long ptr; |
3007 | 3089 | ||
3008 | device->devid = btrfs_device_id(leaf, dev_item); | 3090 | device->devid = btrfs_device_id(leaf, dev_item); |
3009 | device->total_bytes = btrfs_device_total_bytes(leaf, dev_item); | 3091 | device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item); |
3092 | device->total_bytes = device->disk_total_bytes; | ||
3010 | device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); | 3093 | device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); |
3011 | device->type = btrfs_device_type(leaf, dev_item); | 3094 | device->type = btrfs_device_type(leaf, dev_item); |
3012 | device->io_align = btrfs_device_io_align(leaf, dev_item); | 3095 | device->io_align = btrfs_device_io_align(leaf, dev_item); |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 2185de72ff7d..5c3ff6d02fd7 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -23,13 +23,22 @@ | |||
23 | #include "async-thread.h" | 23 | #include "async-thread.h" |
24 | 24 | ||
25 | struct buffer_head; | 25 | struct buffer_head; |
26 | struct btrfs_pending_bios { | ||
27 | struct bio *head; | ||
28 | struct bio *tail; | ||
29 | }; | ||
30 | |||
26 | struct btrfs_device { | 31 | struct btrfs_device { |
27 | struct list_head dev_list; | 32 | struct list_head dev_list; |
28 | struct list_head dev_alloc_list; | 33 | struct list_head dev_alloc_list; |
29 | struct btrfs_fs_devices *fs_devices; | 34 | struct btrfs_fs_devices *fs_devices; |
30 | struct btrfs_root *dev_root; | 35 | struct btrfs_root *dev_root; |
31 | struct bio *pending_bios; | 36 | |
32 | struct bio *pending_bio_tail; | 37 | /* regular prio bios */ |
38 | struct btrfs_pending_bios pending_bios; | ||
39 | /* WRITE_SYNC bios */ | ||
40 | struct btrfs_pending_bios pending_sync_bios; | ||
41 | |||
33 | int running_pending; | 42 | int running_pending; |
34 | u64 generation; | 43 | u64 generation; |
35 | 44 | ||
@@ -52,6 +61,9 @@ struct btrfs_device { | |||
52 | /* size of the device */ | 61 | /* size of the device */ |
53 | u64 total_bytes; | 62 | u64 total_bytes; |
54 | 63 | ||
64 | /* size of the disk */ | ||
65 | u64 disk_total_bytes; | ||
66 | |||
55 | /* bytes used */ | 67 | /* bytes used */ |
56 | u64 bytes_used; | 68 | u64 bytes_used; |
57 | 69 | ||