aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c435
1 files changed, 280 insertions, 155 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9ab1bed88116..f76b1fd160d4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -41,6 +41,7 @@
41#include "compat.h" 41#include "compat.h"
42#include "volumes.h" 42#include "volumes.h"
43 43
44static struct kmem_cache *btrfs_inode_defrag_cachep;
44/* 45/*
45 * when auto defrag is enabled we 46 * when auto defrag is enabled we
46 * queue up these defrag structs to remember which 47 * queue up these defrag structs to remember which
@@ -90,7 +91,7 @@ static int __compare_inode_defrag(struct inode_defrag *defrag1,
90 * If an existing record is found the defrag item you 91 * If an existing record is found the defrag item you
91 * pass in is freed 92 * pass in is freed
92 */ 93 */
93static void __btrfs_add_inode_defrag(struct inode *inode, 94static int __btrfs_add_inode_defrag(struct inode *inode,
94 struct inode_defrag *defrag) 95 struct inode_defrag *defrag)
95{ 96{
96 struct btrfs_root *root = BTRFS_I(inode)->root; 97 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -118,18 +119,24 @@ static void __btrfs_add_inode_defrag(struct inode *inode,
118 entry->transid = defrag->transid; 119 entry->transid = defrag->transid;
119 if (defrag->last_offset > entry->last_offset) 120 if (defrag->last_offset > entry->last_offset)
120 entry->last_offset = defrag->last_offset; 121 entry->last_offset = defrag->last_offset;
121 goto exists; 122 return -EEXIST;
122 } 123 }
123 } 124 }
124 set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); 125 set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
125 rb_link_node(&defrag->rb_node, parent, p); 126 rb_link_node(&defrag->rb_node, parent, p);
126 rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); 127 rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
127 return; 128 return 0;
129}
128 130
129exists: 131static inline int __need_auto_defrag(struct btrfs_root *root)
130 kfree(defrag); 132{
131 return; 133 if (!btrfs_test_opt(root, AUTO_DEFRAG))
134 return 0;
135
136 if (btrfs_fs_closing(root->fs_info))
137 return 0;
132 138
139 return 1;
133} 140}
134 141
135/* 142/*
@@ -142,11 +149,9 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
142 struct btrfs_root *root = BTRFS_I(inode)->root; 149 struct btrfs_root *root = BTRFS_I(inode)->root;
143 struct inode_defrag *defrag; 150 struct inode_defrag *defrag;
144 u64 transid; 151 u64 transid;
152 int ret;
145 153
146 if (!btrfs_test_opt(root, AUTO_DEFRAG)) 154 if (!__need_auto_defrag(root))
147 return 0;
148
149 if (btrfs_fs_closing(root->fs_info))
150 return 0; 155 return 0;
151 156
152 if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) 157 if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
@@ -157,7 +162,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
157 else 162 else
158 transid = BTRFS_I(inode)->root->last_trans; 163 transid = BTRFS_I(inode)->root->last_trans;
159 164
160 defrag = kzalloc(sizeof(*defrag), GFP_NOFS); 165 defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
161 if (!defrag) 166 if (!defrag)
162 return -ENOMEM; 167 return -ENOMEM;
163 168
@@ -166,20 +171,56 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
166 defrag->root = root->root_key.objectid; 171 defrag->root = root->root_key.objectid;
167 172
168 spin_lock(&root->fs_info->defrag_inodes_lock); 173 spin_lock(&root->fs_info->defrag_inodes_lock);
169 if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) 174 if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) {
170 __btrfs_add_inode_defrag(inode, defrag); 175 /*
171 else 176 * If we set IN_DEFRAG flag and evict the inode from memory,
172 kfree(defrag); 177 * and then re-read this inode, this new inode doesn't have
178 * IN_DEFRAG flag. At the case, we may find the existed defrag.
179 */
180 ret = __btrfs_add_inode_defrag(inode, defrag);
181 if (ret)
182 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
183 } else {
184 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
185 }
173 spin_unlock(&root->fs_info->defrag_inodes_lock); 186 spin_unlock(&root->fs_info->defrag_inodes_lock);
174 return 0; 187 return 0;
175} 188}
176 189
177/* 190/*
178 * must be called with the defrag_inodes lock held 191 * Requeue the defrag object. If there is a defrag object that points to
192 * the same inode in the tree, we will merge them together (by
193 * __btrfs_add_inode_defrag()) and free the one that we want to requeue.
194 */
195void btrfs_requeue_inode_defrag(struct inode *inode,
196 struct inode_defrag *defrag)
197{
198 struct btrfs_root *root = BTRFS_I(inode)->root;
199 int ret;
200
201 if (!__need_auto_defrag(root))
202 goto out;
203
204 /*
205 * Here we don't check the IN_DEFRAG flag, because we need merge
206 * them together.
207 */
208 spin_lock(&root->fs_info->defrag_inodes_lock);
209 ret = __btrfs_add_inode_defrag(inode, defrag);
210 spin_unlock(&root->fs_info->defrag_inodes_lock);
211 if (ret)
212 goto out;
213 return;
214out:
215 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
216}
217
218/*
219 * pick the defragable inode that we want, if it doesn't exist, we will get
220 * the next one.
179 */ 221 */
180struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, 222static struct inode_defrag *
181 u64 root, u64 ino, 223btrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino)
182 struct rb_node **next)
183{ 224{
184 struct inode_defrag *entry = NULL; 225 struct inode_defrag *entry = NULL;
185 struct inode_defrag tmp; 226 struct inode_defrag tmp;
@@ -190,7 +231,8 @@ struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info,
190 tmp.ino = ino; 231 tmp.ino = ino;
191 tmp.root = root; 232 tmp.root = root;
192 233
193 p = info->defrag_inodes.rb_node; 234 spin_lock(&fs_info->defrag_inodes_lock);
235 p = fs_info->defrag_inodes.rb_node;
194 while (p) { 236 while (p) {
195 parent = p; 237 parent = p;
196 entry = rb_entry(parent, struct inode_defrag, rb_node); 238 entry = rb_entry(parent, struct inode_defrag, rb_node);
@@ -201,52 +243,131 @@ struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info,
201 else if (ret > 0) 243 else if (ret > 0)
202 p = parent->rb_right; 244 p = parent->rb_right;
203 else 245 else
204 return entry; 246 goto out;
205 } 247 }
206 248
207 if (next) { 249 if (parent && __compare_inode_defrag(&tmp, entry) > 0) {
208 while (parent && __compare_inode_defrag(&tmp, entry) > 0) { 250 parent = rb_next(parent);
209 parent = rb_next(parent); 251 if (parent)
210 entry = rb_entry(parent, struct inode_defrag, rb_node); 252 entry = rb_entry(parent, struct inode_defrag, rb_node);
211 } 253 else
212 *next = parent; 254 entry = NULL;
213 } 255 }
214 return NULL; 256out:
257 if (entry)
258 rb_erase(parent, &fs_info->defrag_inodes);
259 spin_unlock(&fs_info->defrag_inodes_lock);
260 return entry;
215} 261}
216 262
217/* 263void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
218 * run through the list of inodes in the FS that need
219 * defragging
220 */
221int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
222{ 264{
223 struct inode_defrag *defrag; 265 struct inode_defrag *defrag;
266 struct rb_node *node;
267
268 spin_lock(&fs_info->defrag_inodes_lock);
269 node = rb_first(&fs_info->defrag_inodes);
270 while (node) {
271 rb_erase(node, &fs_info->defrag_inodes);
272 defrag = rb_entry(node, struct inode_defrag, rb_node);
273 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
274
275 if (need_resched()) {
276 spin_unlock(&fs_info->defrag_inodes_lock);
277 cond_resched();
278 spin_lock(&fs_info->defrag_inodes_lock);
279 }
280
281 node = rb_first(&fs_info->defrag_inodes);
282 }
283 spin_unlock(&fs_info->defrag_inodes_lock);
284}
285
286#define BTRFS_DEFRAG_BATCH 1024
287
288static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
289 struct inode_defrag *defrag)
290{
224 struct btrfs_root *inode_root; 291 struct btrfs_root *inode_root;
225 struct inode *inode; 292 struct inode *inode;
226 struct rb_node *n;
227 struct btrfs_key key; 293 struct btrfs_key key;
228 struct btrfs_ioctl_defrag_range_args range; 294 struct btrfs_ioctl_defrag_range_args range;
229 u64 first_ino = 0;
230 u64 root_objectid = 0;
231 int num_defrag; 295 int num_defrag;
232 int defrag_batch = 1024;
233 296
297 /* get the inode */
298 key.objectid = defrag->root;
299 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
300 key.offset = (u64)-1;
301 inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
302 if (IS_ERR(inode_root)) {
303 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
304 return PTR_ERR(inode_root);
305 }
306
307 key.objectid = defrag->ino;
308 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
309 key.offset = 0;
310 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
311 if (IS_ERR(inode)) {
312 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
313 return PTR_ERR(inode);
314 }
315
316 /* do a chunk of defrag */
317 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
234 memset(&range, 0, sizeof(range)); 318 memset(&range, 0, sizeof(range));
235 range.len = (u64)-1; 319 range.len = (u64)-1;
320 range.start = defrag->last_offset;
321
322 sb_start_write(fs_info->sb);
323 num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
324 BTRFS_DEFRAG_BATCH);
325 sb_end_write(fs_info->sb);
326 /*
327 * if we filled the whole defrag batch, there
328 * must be more work to do. Queue this defrag
329 * again
330 */
331 if (num_defrag == BTRFS_DEFRAG_BATCH) {
332 defrag->last_offset = range.start;
333 btrfs_requeue_inode_defrag(inode, defrag);
334 } else if (defrag->last_offset && !defrag->cycled) {
335 /*
336 * we didn't fill our defrag batch, but
337 * we didn't start at zero. Make sure we loop
338 * around to the start of the file.
339 */
340 defrag->last_offset = 0;
341 defrag->cycled = 1;
342 btrfs_requeue_inode_defrag(inode, defrag);
343 } else {
344 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
345 }
346
347 iput(inode);
348 return 0;
349}
350
351/*
352 * run through the list of inodes in the FS that need
353 * defragging
354 */
355int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
356{
357 struct inode_defrag *defrag;
358 u64 first_ino = 0;
359 u64 root_objectid = 0;
236 360
237 atomic_inc(&fs_info->defrag_running); 361 atomic_inc(&fs_info->defrag_running);
238 spin_lock(&fs_info->defrag_inodes_lock);
239 while(1) { 362 while(1) {
240 n = NULL; 363 if (!__need_auto_defrag(fs_info->tree_root))
364 break;
241 365
242 /* find an inode to defrag */ 366 /* find an inode to defrag */
243 defrag = btrfs_find_defrag_inode(fs_info, root_objectid, 367 defrag = btrfs_pick_defrag_inode(fs_info, root_objectid,
244 first_ino, &n); 368 first_ino);
245 if (!defrag) { 369 if (!defrag) {
246 if (n) { 370 if (root_objectid || first_ino) {
247 defrag = rb_entry(n, struct inode_defrag,
248 rb_node);
249 } else if (root_objectid || first_ino) {
250 root_objectid = 0; 371 root_objectid = 0;
251 first_ino = 0; 372 first_ino = 0;
252 continue; 373 continue;
@@ -255,70 +376,11 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
255 } 376 }
256 } 377 }
257 378
258 /* remove it from the rbtree */
259 first_ino = defrag->ino + 1; 379 first_ino = defrag->ino + 1;
260 root_objectid = defrag->root; 380 root_objectid = defrag->root;
261 rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
262
263 if (btrfs_fs_closing(fs_info))
264 goto next_free;
265
266 spin_unlock(&fs_info->defrag_inodes_lock);
267
268 /* get the inode */
269 key.objectid = defrag->root;
270 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
271 key.offset = (u64)-1;
272 inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
273 if (IS_ERR(inode_root))
274 goto next;
275
276 key.objectid = defrag->ino;
277 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
278 key.offset = 0;
279
280 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
281 if (IS_ERR(inode))
282 goto next;
283
284 /* do a chunk of defrag */
285 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
286 range.start = defrag->last_offset;
287 num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
288 defrag_batch);
289 /*
290 * if we filled the whole defrag batch, there
291 * must be more work to do. Queue this defrag
292 * again
293 */
294 if (num_defrag == defrag_batch) {
295 defrag->last_offset = range.start;
296 __btrfs_add_inode_defrag(inode, defrag);
297 /*
298 * we don't want to kfree defrag, we added it back to
299 * the rbtree
300 */
301 defrag = NULL;
302 } else if (defrag->last_offset && !defrag->cycled) {
303 /*
304 * we didn't fill our defrag batch, but
305 * we didn't start at zero. Make sure we loop
306 * around to the start of the file.
307 */
308 defrag->last_offset = 0;
309 defrag->cycled = 1;
310 __btrfs_add_inode_defrag(inode, defrag);
311 defrag = NULL;
312 }
313 381
314 iput(inode); 382 __btrfs_run_defrag_inode(fs_info, defrag);
315next:
316 spin_lock(&fs_info->defrag_inodes_lock);
317next_free:
318 kfree(defrag);
319 } 383 }
320 spin_unlock(&fs_info->defrag_inodes_lock);
321
322 atomic_dec(&fs_info->defrag_running); 384 atomic_dec(&fs_info->defrag_running);
323 385
324 /* 386 /*
@@ -526,6 +588,8 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
526 split->block_len = em->block_len; 588 split->block_len = em->block_len;
527 else 589 else
528 split->block_len = split->len; 590 split->block_len = split->len;
591 split->orig_block_len = max(split->block_len,
592 em->orig_block_len);
529 split->generation = gen; 593 split->generation = gen;
530 split->bdev = em->bdev; 594 split->bdev = em->bdev;
531 split->flags = flags; 595 split->flags = flags;
@@ -547,6 +611,8 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
547 split->flags = flags; 611 split->flags = flags;
548 split->compress_type = em->compress_type; 612 split->compress_type = em->compress_type;
549 split->generation = gen; 613 split->generation = gen;
614 split->orig_block_len = max(em->block_len,
615 em->orig_block_len);
550 616
551 if (compressed) { 617 if (compressed) {
552 split->block_len = em->block_len; 618 split->block_len = em->block_len;
@@ -555,7 +621,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
555 } else { 621 } else {
556 split->block_len = split->len; 622 split->block_len = split->len;
557 split->block_start = em->block_start + diff; 623 split->block_start = em->block_start + diff;
558 split->orig_start = split->start; 624 split->orig_start = em->orig_start;
559 } 625 }
560 626
561 ret = add_extent_mapping(em_tree, split); 627 ret = add_extent_mapping(em_tree, split);
@@ -1346,10 +1412,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1346 1412
1347 cond_resched(); 1413 cond_resched();
1348 1414
1349 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1415 balance_dirty_pages_ratelimited(inode->i_mapping);
1350 dirty_pages);
1351 if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) 1416 if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
1352 btrfs_btree_balance_dirty(root, 1); 1417 btrfs_btree_balance_dirty(root);
1353 1418
1354 pos += copied; 1419 pos += copied;
1355 num_written += copied; 1420 num_written += copied;
@@ -1398,6 +1463,24 @@ out:
1398 return written ? written : err; 1463 return written ? written : err;
1399} 1464}
1400 1465
1466static void update_time_for_write(struct inode *inode)
1467{
1468 struct timespec now;
1469
1470 if (IS_NOCMTIME(inode))
1471 return;
1472
1473 now = current_fs_time(inode->i_sb);
1474 if (!timespec_equal(&inode->i_mtime, &now))
1475 inode->i_mtime = now;
1476
1477 if (!timespec_equal(&inode->i_ctime, &now))
1478 inode->i_ctime = now;
1479
1480 if (IS_I_VERSION(inode))
1481 inode_inc_iversion(inode);
1482}
1483
1401static ssize_t btrfs_file_aio_write(struct kiocb *iocb, 1484static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1402 const struct iovec *iov, 1485 const struct iovec *iov,
1403 unsigned long nr_segs, loff_t pos) 1486 unsigned long nr_segs, loff_t pos)
@@ -1410,6 +1493,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1410 ssize_t num_written = 0; 1493 ssize_t num_written = 0;
1411 ssize_t err = 0; 1494 ssize_t err = 0;
1412 size_t count, ocount; 1495 size_t count, ocount;
1496 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
1413 1497
1414 sb_start_write(inode->i_sb); 1498 sb_start_write(inode->i_sb);
1415 1499
@@ -1452,11 +1536,13 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1452 goto out; 1536 goto out;
1453 } 1537 }
1454 1538
1455 err = file_update_time(file); 1539 /*
1456 if (err) { 1540 * We reserve space for updating the inode when we reserve space for the
1457 mutex_unlock(&inode->i_mutex); 1541 * extent we are going to write, so we will enospc out there. We don't
1458 goto out; 1542 * need to start yet another transaction to update the inode as we will
1459 } 1543 * update the inode when we finish writing whatever data we write.
1544 */
1545 update_time_for_write(inode);
1460 1546
1461 start_pos = round_down(pos, root->sectorsize); 1547 start_pos = round_down(pos, root->sectorsize);
1462 if (start_pos > i_size_read(inode)) { 1548 if (start_pos > i_size_read(inode)) {
@@ -1467,6 +1553,9 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1467 } 1553 }
1468 } 1554 }
1469 1555
1556 if (sync)
1557 atomic_inc(&BTRFS_I(inode)->sync_writers);
1558
1470 if (unlikely(file->f_flags & O_DIRECT)) { 1559 if (unlikely(file->f_flags & O_DIRECT)) {
1471 num_written = __btrfs_direct_write(iocb, iov, nr_segs, 1560 num_written = __btrfs_direct_write(iocb, iov, nr_segs,
1472 pos, ppos, count, ocount); 1561 pos, ppos, count, ocount);
@@ -1493,14 +1582,21 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1493 * this will either be one more than the running transaction 1582 * this will either be one more than the running transaction
1494 * or the generation used for the next transaction if there isn't 1583 * or the generation used for the next transaction if there isn't
1495 * one running right now. 1584 * one running right now.
1585 *
1586 * We also have to set last_sub_trans to the current log transid,
1587 * otherwise subsequent syncs to a file that's been synced in this
1588 * transaction will appear to have already occured.
1496 */ 1589 */
1497 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; 1590 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
1591 BTRFS_I(inode)->last_sub_trans = root->log_transid;
1498 if (num_written > 0 || num_written == -EIOCBQUEUED) { 1592 if (num_written > 0 || num_written == -EIOCBQUEUED) {
1499 err = generic_write_sync(file, pos, num_written); 1593 err = generic_write_sync(file, pos, num_written);
1500 if (err < 0 && num_written > 0) 1594 if (err < 0 && num_written > 0)
1501 num_written = err; 1595 num_written = err;
1502 } 1596 }
1503out: 1597out:
1598 if (sync)
1599 atomic_dec(&BTRFS_I(inode)->sync_writers);
1504 sb_end_write(inode->i_sb); 1600 sb_end_write(inode->i_sb);
1505 current->backing_dev_info = NULL; 1601 current->backing_dev_info = NULL;
1506 return num_written ? num_written : err; 1602 return num_written ? num_written : err;
@@ -1551,7 +1647,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1551 * out of the ->i_mutex. If so, we can flush the dirty pages by 1647 * out of the ->i_mutex. If so, we can flush the dirty pages by
1552 * multi-task, and make the performance up. 1648 * multi-task, and make the performance up.
1553 */ 1649 */
1650 atomic_inc(&BTRFS_I(inode)->sync_writers);
1554 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 1651 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
1652 atomic_dec(&BTRFS_I(inode)->sync_writers);
1555 if (ret) 1653 if (ret)
1556 return ret; 1654 return ret;
1557 1655
@@ -1562,7 +1660,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1562 * range being left. 1660 * range being left.
1563 */ 1661 */
1564 atomic_inc(&root->log_batch); 1662 atomic_inc(&root->log_batch);
1565 btrfs_wait_ordered_range(inode, start, end); 1663 btrfs_wait_ordered_range(inode, start, end - start + 1);
1566 atomic_inc(&root->log_batch); 1664 atomic_inc(&root->log_batch);
1567 1665
1568 /* 1666 /*
@@ -1768,6 +1866,7 @@ out:
1768 1866
1769 hole_em->block_start = EXTENT_MAP_HOLE; 1867 hole_em->block_start = EXTENT_MAP_HOLE;
1770 hole_em->block_len = 0; 1868 hole_em->block_len = 0;
1869 hole_em->orig_block_len = 0;
1771 hole_em->bdev = root->fs_info->fs_devices->latest_bdev; 1870 hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
1772 hole_em->compress_type = BTRFS_COMPRESS_NONE; 1871 hole_em->compress_type = BTRFS_COMPRESS_NONE;
1773 hole_em->generation = trans->transid; 1872 hole_em->generation = trans->transid;
@@ -1797,48 +1896,51 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
1797 struct btrfs_path *path; 1896 struct btrfs_path *path;
1798 struct btrfs_block_rsv *rsv; 1897 struct btrfs_block_rsv *rsv;
1799 struct btrfs_trans_handle *trans; 1898 struct btrfs_trans_handle *trans;
1800 u64 mask = BTRFS_I(inode)->root->sectorsize - 1; 1899 u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
1801 u64 lockstart = (offset + mask) & ~mask; 1900 u64 lockend = round_down(offset + len,
1802 u64 lockend = ((offset + len) & ~mask) - 1; 1901 BTRFS_I(inode)->root->sectorsize) - 1;
1803 u64 cur_offset = lockstart; 1902 u64 cur_offset = lockstart;
1804 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); 1903 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
1805 u64 drop_end; 1904 u64 drop_end;
1806 unsigned long nr;
1807 int ret = 0; 1905 int ret = 0;
1808 int err = 0; 1906 int err = 0;
1809 bool same_page = (offset >> PAGE_CACHE_SHIFT) == 1907 bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
1810 ((offset + len) >> PAGE_CACHE_SHIFT); 1908 ((offset + len - 1) >> PAGE_CACHE_SHIFT));
1811 1909
1812 btrfs_wait_ordered_range(inode, offset, len); 1910 btrfs_wait_ordered_range(inode, offset, len);
1813 1911
1814 mutex_lock(&inode->i_mutex); 1912 mutex_lock(&inode->i_mutex);
1815 if (offset >= inode->i_size) { 1913 /*
1816 mutex_unlock(&inode->i_mutex); 1914 * We needn't truncate any page which is beyond the end of the file
1817 return 0; 1915 * because we are sure there is no data there.
1818 } 1916 */
1819
1820 /* 1917 /*
1821 * Only do this if we are in the same page and we aren't doing the 1918 * Only do this if we are in the same page and we aren't doing the
1822 * entire page. 1919 * entire page.
1823 */ 1920 */
1824 if (same_page && len < PAGE_CACHE_SIZE) { 1921 if (same_page && len < PAGE_CACHE_SIZE) {
1825 ret = btrfs_truncate_page(inode, offset, len, 0); 1922 if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE))
1923 ret = btrfs_truncate_page(inode, offset, len, 0);
1826 mutex_unlock(&inode->i_mutex); 1924 mutex_unlock(&inode->i_mutex);
1827 return ret; 1925 return ret;
1828 } 1926 }
1829 1927
1830 /* zero back part of the first page */ 1928 /* zero back part of the first page */
1831 ret = btrfs_truncate_page(inode, offset, 0, 0); 1929 if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) {
1832 if (ret) { 1930 ret = btrfs_truncate_page(inode, offset, 0, 0);
1833 mutex_unlock(&inode->i_mutex); 1931 if (ret) {
1834 return ret; 1932 mutex_unlock(&inode->i_mutex);
1933 return ret;
1934 }
1835 } 1935 }
1836 1936
1837 /* zero the front end of the last page */ 1937 /* zero the front end of the last page */
1838 ret = btrfs_truncate_page(inode, offset + len, 0, 1); 1938 if (offset + len < round_up(inode->i_size, PAGE_CACHE_SIZE)) {
1839 if (ret) { 1939 ret = btrfs_truncate_page(inode, offset + len, 0, 1);
1840 mutex_unlock(&inode->i_mutex); 1940 if (ret) {
1841 return ret; 1941 mutex_unlock(&inode->i_mutex);
1942 return ret;
1943 }
1842 } 1944 }
1843 1945
1844 if (lockend < lockstart) { 1946 if (lockend < lockstart) {
@@ -1931,9 +2033,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
1931 break; 2033 break;
1932 } 2034 }
1933 2035
1934 nr = trans->blocks_used;
1935 btrfs_end_transaction(trans, root); 2036 btrfs_end_transaction(trans, root);
1936 btrfs_btree_balance_dirty(root, nr); 2037 btrfs_btree_balance_dirty(root);
1937 2038
1938 trans = btrfs_start_transaction(root, 3); 2039 trans = btrfs_start_transaction(root, 3);
1939 if (IS_ERR(trans)) { 2040 if (IS_ERR(trans)) {
@@ -1964,11 +2065,13 @@ out_trans:
1964 if (!trans) 2065 if (!trans)
1965 goto out_free; 2066 goto out_free;
1966 2067
2068 inode_inc_iversion(inode);
2069 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2070
1967 trans->block_rsv = &root->fs_info->trans_block_rsv; 2071 trans->block_rsv = &root->fs_info->trans_block_rsv;
1968 ret = btrfs_update_inode(trans, root, inode); 2072 ret = btrfs_update_inode(trans, root, inode);
1969 nr = trans->blocks_used;
1970 btrfs_end_transaction(trans, root); 2073 btrfs_end_transaction(trans, root);
1971 btrfs_btree_balance_dirty(root, nr); 2074 btrfs_btree_balance_dirty(root);
1972out_free: 2075out_free:
1973 btrfs_free_path(path); 2076 btrfs_free_path(path);
1974 btrfs_free_block_rsv(root, rsv); 2077 btrfs_free_block_rsv(root, rsv);
@@ -1992,12 +2095,12 @@ static long btrfs_fallocate(struct file *file, int mode,
1992 u64 alloc_end; 2095 u64 alloc_end;
1993 u64 alloc_hint = 0; 2096 u64 alloc_hint = 0;
1994 u64 locked_end; 2097 u64 locked_end;
1995 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
1996 struct extent_map *em; 2098 struct extent_map *em;
2099 int blocksize = BTRFS_I(inode)->root->sectorsize;
1997 int ret; 2100 int ret;
1998 2101
1999 alloc_start = offset & ~mask; 2102 alloc_start = round_down(offset, blocksize);
2000 alloc_end = (offset + len + mask) & ~mask; 2103 alloc_end = round_up(offset + len, blocksize);
2001 2104
2002 /* Make sure we aren't being give some crap mode */ 2105 /* Make sure we aren't being give some crap mode */
2003 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 2106 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
@@ -2010,7 +2113,7 @@ static long btrfs_fallocate(struct file *file, int mode,
2010 * Make sure we have enough space before we do the 2113 * Make sure we have enough space before we do the
2011 * allocation. 2114 * allocation.
2012 */ 2115 */
2013 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start + 1); 2116 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
2014 if (ret) 2117 if (ret)
2015 return ret; 2118 return ret;
2016 2119
@@ -2078,7 +2181,7 @@ static long btrfs_fallocate(struct file *file, int mode,
2078 } 2181 }
2079 last_byte = min(extent_map_end(em), alloc_end); 2182 last_byte = min(extent_map_end(em), alloc_end);
2080 actual_end = min_t(u64, extent_map_end(em), offset + len); 2183 actual_end = min_t(u64, extent_map_end(em), offset + len);
2081 last_byte = (last_byte + mask) & ~mask; 2184 last_byte = ALIGN(last_byte, blocksize);
2082 2185
2083 if (em->block_start == EXTENT_MAP_HOLE || 2186 if (em->block_start == EXTENT_MAP_HOLE ||
2084 (cur_offset >= inode->i_size && 2187 (cur_offset >= inode->i_size &&
@@ -2117,11 +2220,11 @@ static long btrfs_fallocate(struct file *file, int mode,
2117out: 2220out:
2118 mutex_unlock(&inode->i_mutex); 2221 mutex_unlock(&inode->i_mutex);
2119 /* Let go of our reservation. */ 2222 /* Let go of our reservation. */
2120 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start + 1); 2223 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
2121 return ret; 2224 return ret;
2122} 2225}
2123 2226
2124static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) 2227static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
2125{ 2228{
2126 struct btrfs_root *root = BTRFS_I(inode)->root; 2229 struct btrfs_root *root = BTRFS_I(inode)->root;
2127 struct extent_map *em; 2230 struct extent_map *em;
@@ -2138,6 +2241,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
2138 if (lockend <= lockstart) 2241 if (lockend <= lockstart)
2139 lockend = lockstart + root->sectorsize; 2242 lockend = lockstart + root->sectorsize;
2140 2243
2244 lockend--;
2141 len = lockend - lockstart + 1; 2245 len = lockend - lockstart + 1;
2142 2246
2143 len = max_t(u64, len, root->sectorsize); 2247 len = max_t(u64, len, root->sectorsize);
@@ -2155,7 +2259,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
2155 * before the position we want in case there is outstanding delalloc 2259 * before the position we want in case there is outstanding delalloc
2156 * going on here. 2260 * going on here.
2157 */ 2261 */
2158 if (origin == SEEK_HOLE && start != 0) { 2262 if (whence == SEEK_HOLE && start != 0) {
2159 if (start <= root->sectorsize) 2263 if (start <= root->sectorsize)
2160 em = btrfs_get_extent_fiemap(inode, NULL, 0, 0, 2264 em = btrfs_get_extent_fiemap(inode, NULL, 0, 0,
2161 root->sectorsize, 0); 2265 root->sectorsize, 0);
@@ -2189,13 +2293,13 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
2189 } 2293 }
2190 } 2294 }
2191 2295
2192 if (origin == SEEK_HOLE) { 2296 if (whence == SEEK_HOLE) {
2193 *offset = start; 2297 *offset = start;
2194 free_extent_map(em); 2298 free_extent_map(em);
2195 break; 2299 break;
2196 } 2300 }
2197 } else { 2301 } else {
2198 if (origin == SEEK_DATA) { 2302 if (whence == SEEK_DATA) {
2199 if (em->block_start == EXTENT_MAP_DELALLOC) { 2303 if (em->block_start == EXTENT_MAP_DELALLOC) {
2200 if (start >= inode->i_size) { 2304 if (start >= inode->i_size) {
2201 free_extent_map(em); 2305 free_extent_map(em);
@@ -2204,9 +2308,12 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
2204 } 2308 }
2205 } 2309 }
2206 2310
2207 *offset = start; 2311 if (!test_bit(EXTENT_FLAG_PREALLOC,
2208 free_extent_map(em); 2312 &em->flags)) {
2209 break; 2313 *offset = start;
2314 free_extent_map(em);
2315 break;
2316 }
2210 } 2317 }
2211 } 2318 }
2212 2319
@@ -2232,16 +2339,16 @@ out:
2232 return ret; 2339 return ret;
2233} 2340}
2234 2341
2235static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin) 2342static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
2236{ 2343{
2237 struct inode *inode = file->f_mapping->host; 2344 struct inode *inode = file->f_mapping->host;
2238 int ret; 2345 int ret;
2239 2346
2240 mutex_lock(&inode->i_mutex); 2347 mutex_lock(&inode->i_mutex);
2241 switch (origin) { 2348 switch (whence) {
2242 case SEEK_END: 2349 case SEEK_END:
2243 case SEEK_CUR: 2350 case SEEK_CUR:
2244 offset = generic_file_llseek(file, offset, origin); 2351 offset = generic_file_llseek(file, offset, whence);
2245 goto out; 2352 goto out;
2246 case SEEK_DATA: 2353 case SEEK_DATA:
2247 case SEEK_HOLE: 2354 case SEEK_HOLE:
@@ -2250,7 +2357,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
2250 return -ENXIO; 2357 return -ENXIO;
2251 } 2358 }
2252 2359
2253 ret = find_desired_extent(inode, &offset, origin); 2360 ret = find_desired_extent(inode, &offset, whence);
2254 if (ret) { 2361 if (ret) {
2255 mutex_unlock(&inode->i_mutex); 2362 mutex_unlock(&inode->i_mutex);
2256 return ret; 2363 return ret;
@@ -2293,3 +2400,21 @@ const struct file_operations btrfs_file_operations = {
2293 .compat_ioctl = btrfs_ioctl, 2400 .compat_ioctl = btrfs_ioctl,
2294#endif 2401#endif
2295}; 2402};
2403
2404void btrfs_auto_defrag_exit(void)
2405{
2406 if (btrfs_inode_defrag_cachep)
2407 kmem_cache_destroy(btrfs_inode_defrag_cachep);
2408}
2409
2410int btrfs_auto_defrag_init(void)
2411{
2412 btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag",
2413 sizeof(struct inode_defrag), 0,
2414 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
2415 NULL);
2416 if (!btrfs_inode_defrag_cachep)
2417 return -ENOMEM;
2418
2419 return 0;
2420}