aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c406
1 files changed, 264 insertions, 142 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9c6673a9231f..77061bf43edb 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -41,6 +41,7 @@
41#include "compat.h" 41#include "compat.h"
42#include "volumes.h" 42#include "volumes.h"
43 43
44static struct kmem_cache *btrfs_inode_defrag_cachep;
44/* 45/*
45 * when auto defrag is enabled we 46 * when auto defrag is enabled we
46 * queue up these defrag structs to remember which 47 * queue up these defrag structs to remember which
@@ -90,7 +91,7 @@ static int __compare_inode_defrag(struct inode_defrag *defrag1,
90 * If an existing record is found the defrag item you 91 * If an existing record is found the defrag item you
91 * pass in is freed 92 * pass in is freed
92 */ 93 */
93static void __btrfs_add_inode_defrag(struct inode *inode, 94static int __btrfs_add_inode_defrag(struct inode *inode,
94 struct inode_defrag *defrag) 95 struct inode_defrag *defrag)
95{ 96{
96 struct btrfs_root *root = BTRFS_I(inode)->root; 97 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -118,18 +119,24 @@ static void __btrfs_add_inode_defrag(struct inode *inode,
118 entry->transid = defrag->transid; 119 entry->transid = defrag->transid;
119 if (defrag->last_offset > entry->last_offset) 120 if (defrag->last_offset > entry->last_offset)
120 entry->last_offset = defrag->last_offset; 121 entry->last_offset = defrag->last_offset;
121 goto exists; 122 return -EEXIST;
122 } 123 }
123 } 124 }
124 set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); 125 set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
125 rb_link_node(&defrag->rb_node, parent, p); 126 rb_link_node(&defrag->rb_node, parent, p);
126 rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); 127 rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
127 return; 128 return 0;
129}
128 130
129exists: 131static inline int __need_auto_defrag(struct btrfs_root *root)
130 kfree(defrag); 132{
131 return; 133 if (!btrfs_test_opt(root, AUTO_DEFRAG))
134 return 0;
135
136 if (btrfs_fs_closing(root->fs_info))
137 return 0;
132 138
139 return 1;
133} 140}
134 141
135/* 142/*
@@ -142,11 +149,9 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
142 struct btrfs_root *root = BTRFS_I(inode)->root; 149 struct btrfs_root *root = BTRFS_I(inode)->root;
143 struct inode_defrag *defrag; 150 struct inode_defrag *defrag;
144 u64 transid; 151 u64 transid;
152 int ret;
145 153
146 if (!btrfs_test_opt(root, AUTO_DEFRAG)) 154 if (!__need_auto_defrag(root))
147 return 0;
148
149 if (btrfs_fs_closing(root->fs_info))
150 return 0; 155 return 0;
151 156
152 if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) 157 if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
@@ -157,7 +162,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
157 else 162 else
158 transid = BTRFS_I(inode)->root->last_trans; 163 transid = BTRFS_I(inode)->root->last_trans;
159 164
160 defrag = kzalloc(sizeof(*defrag), GFP_NOFS); 165 defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
161 if (!defrag) 166 if (!defrag)
162 return -ENOMEM; 167 return -ENOMEM;
163 168
@@ -166,20 +171,56 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
166 defrag->root = root->root_key.objectid; 171 defrag->root = root->root_key.objectid;
167 172
168 spin_lock(&root->fs_info->defrag_inodes_lock); 173 spin_lock(&root->fs_info->defrag_inodes_lock);
169 if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) 174 if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) {
170 __btrfs_add_inode_defrag(inode, defrag); 175 /*
171 else 176 * If we set IN_DEFRAG flag and evict the inode from memory,
172 kfree(defrag); 177 * and then re-read this inode, this new inode doesn't have
178 * IN_DEFRAG flag. At the case, we may find the existed defrag.
179 */
180 ret = __btrfs_add_inode_defrag(inode, defrag);
181 if (ret)
182 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
183 } else {
184 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
185 }
173 spin_unlock(&root->fs_info->defrag_inodes_lock); 186 spin_unlock(&root->fs_info->defrag_inodes_lock);
174 return 0; 187 return 0;
175} 188}
176 189
177/* 190/*
178 * must be called with the defrag_inodes lock held 191 * Requeue the defrag object. If there is a defrag object that points to
192 * the same inode in the tree, we will merge them together (by
193 * __btrfs_add_inode_defrag()) and free the one that we want to requeue.
179 */ 194 */
180struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, 195void btrfs_requeue_inode_defrag(struct inode *inode,
181 u64 root, u64 ino, 196 struct inode_defrag *defrag)
182 struct rb_node **next) 197{
198 struct btrfs_root *root = BTRFS_I(inode)->root;
199 int ret;
200
201 if (!__need_auto_defrag(root))
202 goto out;
203
204 /*
205 * Here we don't check the IN_DEFRAG flag, because we need merge
206 * them together.
207 */
208 spin_lock(&root->fs_info->defrag_inodes_lock);
209 ret = __btrfs_add_inode_defrag(inode, defrag);
210 spin_unlock(&root->fs_info->defrag_inodes_lock);
211 if (ret)
212 goto out;
213 return;
214out:
215 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
216}
217
218/*
219 * pick the defragable inode that we want, if it doesn't exist, we will get
220 * the next one.
221 */
222static struct inode_defrag *
223btrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino)
183{ 224{
184 struct inode_defrag *entry = NULL; 225 struct inode_defrag *entry = NULL;
185 struct inode_defrag tmp; 226 struct inode_defrag tmp;
@@ -190,7 +231,8 @@ struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info,
190 tmp.ino = ino; 231 tmp.ino = ino;
191 tmp.root = root; 232 tmp.root = root;
192 233
193 p = info->defrag_inodes.rb_node; 234 spin_lock(&fs_info->defrag_inodes_lock);
235 p = fs_info->defrag_inodes.rb_node;
194 while (p) { 236 while (p) {
195 parent = p; 237 parent = p;
196 entry = rb_entry(parent, struct inode_defrag, rb_node); 238 entry = rb_entry(parent, struct inode_defrag, rb_node);
@@ -201,52 +243,131 @@ struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info,
201 else if (ret > 0) 243 else if (ret > 0)
202 p = parent->rb_right; 244 p = parent->rb_right;
203 else 245 else
204 return entry; 246 goto out;
205 } 247 }
206 248
207 if (next) { 249 if (parent && __compare_inode_defrag(&tmp, entry) > 0) {
208 while (parent && __compare_inode_defrag(&tmp, entry) > 0) { 250 parent = rb_next(parent);
209 parent = rb_next(parent); 251 if (parent)
210 entry = rb_entry(parent, struct inode_defrag, rb_node); 252 entry = rb_entry(parent, struct inode_defrag, rb_node);
211 } 253 else
212 *next = parent; 254 entry = NULL;
213 } 255 }
214 return NULL; 256out:
257 if (entry)
258 rb_erase(parent, &fs_info->defrag_inodes);
259 spin_unlock(&fs_info->defrag_inodes_lock);
260 return entry;
215} 261}
216 262
217/* 263void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
218 * run through the list of inodes in the FS that need
219 * defragging
220 */
221int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
222{ 264{
223 struct inode_defrag *defrag; 265 struct inode_defrag *defrag;
266 struct rb_node *node;
267
268 spin_lock(&fs_info->defrag_inodes_lock);
269 node = rb_first(&fs_info->defrag_inodes);
270 while (node) {
271 rb_erase(node, &fs_info->defrag_inodes);
272 defrag = rb_entry(node, struct inode_defrag, rb_node);
273 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
274
275 if (need_resched()) {
276 spin_unlock(&fs_info->defrag_inodes_lock);
277 cond_resched();
278 spin_lock(&fs_info->defrag_inodes_lock);
279 }
280
281 node = rb_first(&fs_info->defrag_inodes);
282 }
283 spin_unlock(&fs_info->defrag_inodes_lock);
284}
285
286#define BTRFS_DEFRAG_BATCH 1024
287
288static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
289 struct inode_defrag *defrag)
290{
224 struct btrfs_root *inode_root; 291 struct btrfs_root *inode_root;
225 struct inode *inode; 292 struct inode *inode;
226 struct rb_node *n;
227 struct btrfs_key key; 293 struct btrfs_key key;
228 struct btrfs_ioctl_defrag_range_args range; 294 struct btrfs_ioctl_defrag_range_args range;
229 u64 first_ino = 0;
230 u64 root_objectid = 0;
231 int num_defrag; 295 int num_defrag;
232 int defrag_batch = 1024;
233 296
297 /* get the inode */
298 key.objectid = defrag->root;
299 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
300 key.offset = (u64)-1;
301 inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
302 if (IS_ERR(inode_root)) {
303 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
304 return PTR_ERR(inode_root);
305 }
306
307 key.objectid = defrag->ino;
308 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
309 key.offset = 0;
310 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
311 if (IS_ERR(inode)) {
312 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
313 return PTR_ERR(inode);
314 }
315
316 /* do a chunk of defrag */
317 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
234 memset(&range, 0, sizeof(range)); 318 memset(&range, 0, sizeof(range));
235 range.len = (u64)-1; 319 range.len = (u64)-1;
320 range.start = defrag->last_offset;
321
322 sb_start_write(fs_info->sb);
323 num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
324 BTRFS_DEFRAG_BATCH);
325 sb_end_write(fs_info->sb);
326 /*
327 * if we filled the whole defrag batch, there
328 * must be more work to do. Queue this defrag
329 * again
330 */
331 if (num_defrag == BTRFS_DEFRAG_BATCH) {
332 defrag->last_offset = range.start;
333 btrfs_requeue_inode_defrag(inode, defrag);
334 } else if (defrag->last_offset && !defrag->cycled) {
335 /*
336 * we didn't fill our defrag batch, but
337 * we didn't start at zero. Make sure we loop
338 * around to the start of the file.
339 */
340 defrag->last_offset = 0;
341 defrag->cycled = 1;
342 btrfs_requeue_inode_defrag(inode, defrag);
343 } else {
344 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
345 }
346
347 iput(inode);
348 return 0;
349}
350
351/*
352 * run through the list of inodes in the FS that need
353 * defragging
354 */
355int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
356{
357 struct inode_defrag *defrag;
358 u64 first_ino = 0;
359 u64 root_objectid = 0;
236 360
237 atomic_inc(&fs_info->defrag_running); 361 atomic_inc(&fs_info->defrag_running);
238 spin_lock(&fs_info->defrag_inodes_lock);
239 while(1) { 362 while(1) {
240 n = NULL; 363 if (!__need_auto_defrag(fs_info->tree_root))
364 break;
241 365
242 /* find an inode to defrag */ 366 /* find an inode to defrag */
243 defrag = btrfs_find_defrag_inode(fs_info, root_objectid, 367 defrag = btrfs_pick_defrag_inode(fs_info, root_objectid,
244 first_ino, &n); 368 first_ino);
245 if (!defrag) { 369 if (!defrag) {
246 if (n) { 370 if (root_objectid || first_ino) {
247 defrag = rb_entry(n, struct inode_defrag,
248 rb_node);
249 } else if (root_objectid || first_ino) {
250 root_objectid = 0; 371 root_objectid = 0;
251 first_ino = 0; 372 first_ino = 0;
252 continue; 373 continue;
@@ -255,70 +376,11 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
255 } 376 }
256 } 377 }
257 378
258 /* remove it from the rbtree */
259 first_ino = defrag->ino + 1; 379 first_ino = defrag->ino + 1;
260 root_objectid = defrag->root; 380 root_objectid = defrag->root;
261 rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
262
263 if (btrfs_fs_closing(fs_info))
264 goto next_free;
265
266 spin_unlock(&fs_info->defrag_inodes_lock);
267
268 /* get the inode */
269 key.objectid = defrag->root;
270 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
271 key.offset = (u64)-1;
272 inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
273 if (IS_ERR(inode_root))
274 goto next;
275
276 key.objectid = defrag->ino;
277 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
278 key.offset = 0;
279
280 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
281 if (IS_ERR(inode))
282 goto next;
283 381
284 /* do a chunk of defrag */ 382 __btrfs_run_defrag_inode(fs_info, defrag);
285 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
286 range.start = defrag->last_offset;
287 num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
288 defrag_batch);
289 /*
290 * if we filled the whole defrag batch, there
291 * must be more work to do. Queue this defrag
292 * again
293 */
294 if (num_defrag == defrag_batch) {
295 defrag->last_offset = range.start;
296 __btrfs_add_inode_defrag(inode, defrag);
297 /*
298 * we don't want to kfree defrag, we added it back to
299 * the rbtree
300 */
301 defrag = NULL;
302 } else if (defrag->last_offset && !defrag->cycled) {
303 /*
304 * we didn't fill our defrag batch, but
305 * we didn't start at zero. Make sure we loop
306 * around to the start of the file.
307 */
308 defrag->last_offset = 0;
309 defrag->cycled = 1;
310 __btrfs_add_inode_defrag(inode, defrag);
311 defrag = NULL;
312 }
313
314 iput(inode);
315next:
316 spin_lock(&fs_info->defrag_inodes_lock);
317next_free:
318 kfree(defrag);
319 } 383 }
320 spin_unlock(&fs_info->defrag_inodes_lock);
321
322 atomic_dec(&fs_info->defrag_running); 384 atomic_dec(&fs_info->defrag_running);
323 385
324 /* 386 /*
@@ -526,6 +588,8 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
526 split->block_len = em->block_len; 588 split->block_len = em->block_len;
527 else 589 else
528 split->block_len = split->len; 590 split->block_len = split->len;
591 split->orig_block_len = max(split->block_len,
592 em->orig_block_len);
529 split->generation = gen; 593 split->generation = gen;
530 split->bdev = em->bdev; 594 split->bdev = em->bdev;
531 split->flags = flags; 595 split->flags = flags;
@@ -547,6 +611,8 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
547 split->flags = flags; 611 split->flags = flags;
548 split->compress_type = em->compress_type; 612 split->compress_type = em->compress_type;
549 split->generation = gen; 613 split->generation = gen;
614 split->orig_block_len = max(em->block_len,
615 em->orig_block_len);
550 616
551 if (compressed) { 617 if (compressed) {
552 split->block_len = em->block_len; 618 split->block_len = em->block_len;
@@ -555,7 +621,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
555 } else { 621 } else {
556 split->block_len = split->len; 622 split->block_len = split->len;
557 split->block_start = em->block_start + diff; 623 split->block_start = em->block_start + diff;
558 split->orig_start = split->start; 624 split->orig_start = em->orig_start;
559 } 625 }
560 626
561 ret = add_extent_mapping(em_tree, split); 627 ret = add_extent_mapping(em_tree, split);
@@ -1348,7 +1414,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1348 1414
1349 balance_dirty_pages_ratelimited(inode->i_mapping); 1415 balance_dirty_pages_ratelimited(inode->i_mapping);
1350 if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) 1416 if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
1351 btrfs_btree_balance_dirty(root, 1); 1417 btrfs_btree_balance_dirty(root);
1352 1418
1353 pos += copied; 1419 pos += copied;
1354 num_written += copied; 1420 num_written += copied;
@@ -1397,6 +1463,24 @@ out:
1397 return written ? written : err; 1463 return written ? written : err;
1398} 1464}
1399 1465
1466static void update_time_for_write(struct inode *inode)
1467{
1468 struct timespec now;
1469
1470 if (IS_NOCMTIME(inode))
1471 return;
1472
1473 now = current_fs_time(inode->i_sb);
1474 if (!timespec_equal(&inode->i_mtime, &now))
1475 inode->i_mtime = now;
1476
1477 if (!timespec_equal(&inode->i_ctime, &now))
1478 inode->i_ctime = now;
1479
1480 if (IS_I_VERSION(inode))
1481 inode_inc_iversion(inode);
1482}
1483
1400static ssize_t btrfs_file_aio_write(struct kiocb *iocb, 1484static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1401 const struct iovec *iov, 1485 const struct iovec *iov,
1402 unsigned long nr_segs, loff_t pos) 1486 unsigned long nr_segs, loff_t pos)
@@ -1409,6 +1493,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1409 ssize_t num_written = 0; 1493 ssize_t num_written = 0;
1410 ssize_t err = 0; 1494 ssize_t err = 0;
1411 size_t count, ocount; 1495 size_t count, ocount;
1496 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
1412 1497
1413 sb_start_write(inode->i_sb); 1498 sb_start_write(inode->i_sb);
1414 1499
@@ -1451,11 +1536,13 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1451 goto out; 1536 goto out;
1452 } 1537 }
1453 1538
1454 err = file_update_time(file); 1539 /*
1455 if (err) { 1540 * We reserve space for updating the inode when we reserve space for the
1456 mutex_unlock(&inode->i_mutex); 1541 * extent we are going to write, so we will enospc out there. We don't
1457 goto out; 1542 * need to start yet another transaction to update the inode as we will
1458 } 1543 * update the inode when we finish writing whatever data we write.
1544 */
1545 update_time_for_write(inode);
1459 1546
1460 start_pos = round_down(pos, root->sectorsize); 1547 start_pos = round_down(pos, root->sectorsize);
1461 if (start_pos > i_size_read(inode)) { 1548 if (start_pos > i_size_read(inode)) {
@@ -1466,6 +1553,9 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1466 } 1553 }
1467 } 1554 }
1468 1555
1556 if (sync)
1557 atomic_inc(&BTRFS_I(inode)->sync_writers);
1558
1469 if (unlikely(file->f_flags & O_DIRECT)) { 1559 if (unlikely(file->f_flags & O_DIRECT)) {
1470 num_written = __btrfs_direct_write(iocb, iov, nr_segs, 1560 num_written = __btrfs_direct_write(iocb, iov, nr_segs,
1471 pos, ppos, count, ocount); 1561 pos, ppos, count, ocount);
@@ -1492,14 +1582,21 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1492 * this will either be one more than the running transaction 1582 * this will either be one more than the running transaction
1493 * or the generation used for the next transaction if there isn't 1583 * or the generation used for the next transaction if there isn't
1494 * one running right now. 1584 * one running right now.
1585 *
1586 * We also have to set last_sub_trans to the current log transid,
1587 * otherwise subsequent syncs to a file that's been synced in this
1588 * transaction will appear to have already occured.
1495 */ 1589 */
1496 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; 1590 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
1591 BTRFS_I(inode)->last_sub_trans = root->log_transid;
1497 if (num_written > 0 || num_written == -EIOCBQUEUED) { 1592 if (num_written > 0 || num_written == -EIOCBQUEUED) {
1498 err = generic_write_sync(file, pos, num_written); 1593 err = generic_write_sync(file, pos, num_written);
1499 if (err < 0 && num_written > 0) 1594 if (err < 0 && num_written > 0)
1500 num_written = err; 1595 num_written = err;
1501 } 1596 }
1502out: 1597out:
1598 if (sync)
1599 atomic_dec(&BTRFS_I(inode)->sync_writers);
1503 sb_end_write(inode->i_sb); 1600 sb_end_write(inode->i_sb);
1504 current->backing_dev_info = NULL; 1601 current->backing_dev_info = NULL;
1505 return num_written ? num_written : err; 1602 return num_written ? num_written : err;
@@ -1550,7 +1647,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1550 * out of the ->i_mutex. If so, we can flush the dirty pages by 1647 * out of the ->i_mutex. If so, we can flush the dirty pages by
1551 * multi-task, and make the performance up. 1648 * multi-task, and make the performance up.
1552 */ 1649 */
1650 atomic_inc(&BTRFS_I(inode)->sync_writers);
1553 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 1651 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
1652 atomic_dec(&BTRFS_I(inode)->sync_writers);
1554 if (ret) 1653 if (ret)
1555 return ret; 1654 return ret;
1556 1655
@@ -1561,7 +1660,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1561 * range being left. 1660 * range being left.
1562 */ 1661 */
1563 atomic_inc(&root->log_batch); 1662 atomic_inc(&root->log_batch);
1564 btrfs_wait_ordered_range(inode, start, end); 1663 btrfs_wait_ordered_range(inode, start, end - start + 1);
1565 atomic_inc(&root->log_batch); 1664 atomic_inc(&root->log_batch);
1566 1665
1567 /* 1666 /*
@@ -1767,6 +1866,7 @@ out:
1767 1866
1768 hole_em->block_start = EXTENT_MAP_HOLE; 1867 hole_em->block_start = EXTENT_MAP_HOLE;
1769 hole_em->block_len = 0; 1868 hole_em->block_len = 0;
1869 hole_em->orig_block_len = 0;
1770 hole_em->bdev = root->fs_info->fs_devices->latest_bdev; 1870 hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
1771 hole_em->compress_type = BTRFS_COMPRESS_NONE; 1871 hole_em->compress_type = BTRFS_COMPRESS_NONE;
1772 hole_em->generation = trans->transid; 1872 hole_em->generation = trans->transid;
@@ -1796,48 +1896,51 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
1796 struct btrfs_path *path; 1896 struct btrfs_path *path;
1797 struct btrfs_block_rsv *rsv; 1897 struct btrfs_block_rsv *rsv;
1798 struct btrfs_trans_handle *trans; 1898 struct btrfs_trans_handle *trans;
1799 u64 mask = BTRFS_I(inode)->root->sectorsize - 1; 1899 u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
1800 u64 lockstart = (offset + mask) & ~mask; 1900 u64 lockend = round_down(offset + len,
1801 u64 lockend = ((offset + len) & ~mask) - 1; 1901 BTRFS_I(inode)->root->sectorsize) - 1;
1802 u64 cur_offset = lockstart; 1902 u64 cur_offset = lockstart;
1803 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); 1903 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
1804 u64 drop_end; 1904 u64 drop_end;
1805 unsigned long nr;
1806 int ret = 0; 1905 int ret = 0;
1807 int err = 0; 1906 int err = 0;
1808 bool same_page = (offset >> PAGE_CACHE_SHIFT) == 1907 bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
1809 ((offset + len) >> PAGE_CACHE_SHIFT); 1908 ((offset + len - 1) >> PAGE_CACHE_SHIFT));
1810 1909
1811 btrfs_wait_ordered_range(inode, offset, len); 1910 btrfs_wait_ordered_range(inode, offset, len);
1812 1911
1813 mutex_lock(&inode->i_mutex); 1912 mutex_lock(&inode->i_mutex);
1814 if (offset >= inode->i_size) { 1913 /*
1815 mutex_unlock(&inode->i_mutex); 1914 * We needn't truncate any page which is beyond the end of the file
1816 return 0; 1915 * because we are sure there is no data there.
1817 } 1916 */
1818
1819 /* 1917 /*
1820 * Only do this if we are in the same page and we aren't doing the 1918 * Only do this if we are in the same page and we aren't doing the
1821 * entire page. 1919 * entire page.
1822 */ 1920 */
1823 if (same_page && len < PAGE_CACHE_SIZE) { 1921 if (same_page && len < PAGE_CACHE_SIZE) {
1824 ret = btrfs_truncate_page(inode, offset, len, 0); 1922 if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE))
1923 ret = btrfs_truncate_page(inode, offset, len, 0);
1825 mutex_unlock(&inode->i_mutex); 1924 mutex_unlock(&inode->i_mutex);
1826 return ret; 1925 return ret;
1827 } 1926 }
1828 1927
1829 /* zero back part of the first page */ 1928 /* zero back part of the first page */
1830 ret = btrfs_truncate_page(inode, offset, 0, 0); 1929 if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) {
1831 if (ret) { 1930 ret = btrfs_truncate_page(inode, offset, 0, 0);
1832 mutex_unlock(&inode->i_mutex); 1931 if (ret) {
1833 return ret; 1932 mutex_unlock(&inode->i_mutex);
1933 return ret;
1934 }
1834 } 1935 }
1835 1936
1836 /* zero the front end of the last page */ 1937 /* zero the front end of the last page */
1837 ret = btrfs_truncate_page(inode, offset + len, 0, 1); 1938 if (offset + len < round_up(inode->i_size, PAGE_CACHE_SIZE)) {
1838 if (ret) { 1939 ret = btrfs_truncate_page(inode, offset + len, 0, 1);
1839 mutex_unlock(&inode->i_mutex); 1940 if (ret) {
1840 return ret; 1941 mutex_unlock(&inode->i_mutex);
1942 return ret;
1943 }
1841 } 1944 }
1842 1945
1843 if (lockend < lockstart) { 1946 if (lockend < lockstart) {
@@ -1930,9 +2033,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
1930 break; 2033 break;
1931 } 2034 }
1932 2035
1933 nr = trans->blocks_used;
1934 btrfs_end_transaction(trans, root); 2036 btrfs_end_transaction(trans, root);
1935 btrfs_btree_balance_dirty(root, nr); 2037 btrfs_btree_balance_dirty(root);
1936 2038
1937 trans = btrfs_start_transaction(root, 3); 2039 trans = btrfs_start_transaction(root, 3);
1938 if (IS_ERR(trans)) { 2040 if (IS_ERR(trans)) {
@@ -1963,11 +2065,13 @@ out_trans:
1963 if (!trans) 2065 if (!trans)
1964 goto out_free; 2066 goto out_free;
1965 2067
2068 inode_inc_iversion(inode);
2069 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2070
1966 trans->block_rsv = &root->fs_info->trans_block_rsv; 2071 trans->block_rsv = &root->fs_info->trans_block_rsv;
1967 ret = btrfs_update_inode(trans, root, inode); 2072 ret = btrfs_update_inode(trans, root, inode);
1968 nr = trans->blocks_used;
1969 btrfs_end_transaction(trans, root); 2073 btrfs_end_transaction(trans, root);
1970 btrfs_btree_balance_dirty(root, nr); 2074 btrfs_btree_balance_dirty(root);
1971out_free: 2075out_free:
1972 btrfs_free_path(path); 2076 btrfs_free_path(path);
1973 btrfs_free_block_rsv(root, rsv); 2077 btrfs_free_block_rsv(root, rsv);
@@ -1991,12 +2095,12 @@ static long btrfs_fallocate(struct file *file, int mode,
1991 u64 alloc_end; 2095 u64 alloc_end;
1992 u64 alloc_hint = 0; 2096 u64 alloc_hint = 0;
1993 u64 locked_end; 2097 u64 locked_end;
1994 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
1995 struct extent_map *em; 2098 struct extent_map *em;
2099 int blocksize = BTRFS_I(inode)->root->sectorsize;
1996 int ret; 2100 int ret;
1997 2101
1998 alloc_start = offset & ~mask; 2102 alloc_start = round_down(offset, blocksize);
1999 alloc_end = (offset + len + mask) & ~mask; 2103 alloc_end = round_up(offset + len, blocksize);
2000 2104
2001 /* Make sure we aren't being give some crap mode */ 2105 /* Make sure we aren't being give some crap mode */
2002 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 2106 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
@@ -2009,7 +2113,7 @@ static long btrfs_fallocate(struct file *file, int mode,
2009 * Make sure we have enough space before we do the 2113 * Make sure we have enough space before we do the
2010 * allocation. 2114 * allocation.
2011 */ 2115 */
2012 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start + 1); 2116 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
2013 if (ret) 2117 if (ret)
2014 return ret; 2118 return ret;
2015 2119
@@ -2077,7 +2181,7 @@ static long btrfs_fallocate(struct file *file, int mode,
2077 } 2181 }
2078 last_byte = min(extent_map_end(em), alloc_end); 2182 last_byte = min(extent_map_end(em), alloc_end);
2079 actual_end = min_t(u64, extent_map_end(em), offset + len); 2183 actual_end = min_t(u64, extent_map_end(em), offset + len);
2080 last_byte = (last_byte + mask) & ~mask; 2184 last_byte = ALIGN(last_byte, blocksize);
2081 2185
2082 if (em->block_start == EXTENT_MAP_HOLE || 2186 if (em->block_start == EXTENT_MAP_HOLE ||
2083 (cur_offset >= inode->i_size && 2187 (cur_offset >= inode->i_size &&
@@ -2116,7 +2220,7 @@ static long btrfs_fallocate(struct file *file, int mode,
2116out: 2220out:
2117 mutex_unlock(&inode->i_mutex); 2221 mutex_unlock(&inode->i_mutex);
2118 /* Let go of our reservation. */ 2222 /* Let go of our reservation. */
2119 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start + 1); 2223 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
2120 return ret; 2224 return ret;
2121} 2225}
2122 2226
@@ -2292,3 +2396,21 @@ const struct file_operations btrfs_file_operations = {
2292 .compat_ioctl = btrfs_ioctl, 2396 .compat_ioctl = btrfs_ioctl,
2293#endif 2397#endif
2294}; 2398};
2399
2400void btrfs_auto_defrag_exit(void)
2401{
2402 if (btrfs_inode_defrag_cachep)
2403 kmem_cache_destroy(btrfs_inode_defrag_cachep);
2404}
2405
2406int btrfs_auto_defrag_init(void)
2407{
2408 btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag",
2409 sizeof(struct inode_defrag), 0,
2410 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
2411 NULL);
2412 if (!btrfs_inode_defrag_cachep)
2413 return -ENOMEM;
2414
2415 return 0;
2416}