diff options
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r-- | fs/btrfs/file.c | 406 |
1 files changed, 264 insertions, 142 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9c6673a9231f..77061bf43edb 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include "compat.h" | 41 | #include "compat.h" |
42 | #include "volumes.h" | 42 | #include "volumes.h" |
43 | 43 | ||
44 | static struct kmem_cache *btrfs_inode_defrag_cachep; | ||
44 | /* | 45 | /* |
45 | * when auto defrag is enabled we | 46 | * when auto defrag is enabled we |
46 | * queue up these defrag structs to remember which | 47 | * queue up these defrag structs to remember which |
@@ -90,7 +91,7 @@ static int __compare_inode_defrag(struct inode_defrag *defrag1, | |||
90 | * If an existing record is found the defrag item you | 91 | * If an existing record is found the defrag item you |
91 | * pass in is freed | 92 | * pass in is freed |
92 | */ | 93 | */ |
93 | static void __btrfs_add_inode_defrag(struct inode *inode, | 94 | static int __btrfs_add_inode_defrag(struct inode *inode, |
94 | struct inode_defrag *defrag) | 95 | struct inode_defrag *defrag) |
95 | { | 96 | { |
96 | struct btrfs_root *root = BTRFS_I(inode)->root; | 97 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -118,18 +119,24 @@ static void __btrfs_add_inode_defrag(struct inode *inode, | |||
118 | entry->transid = defrag->transid; | 119 | entry->transid = defrag->transid; |
119 | if (defrag->last_offset > entry->last_offset) | 120 | if (defrag->last_offset > entry->last_offset) |
120 | entry->last_offset = defrag->last_offset; | 121 | entry->last_offset = defrag->last_offset; |
121 | goto exists; | 122 | return -EEXIST; |
122 | } | 123 | } |
123 | } | 124 | } |
124 | set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); | 125 | set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); |
125 | rb_link_node(&defrag->rb_node, parent, p); | 126 | rb_link_node(&defrag->rb_node, parent, p); |
126 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); | 127 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); |
127 | return; | 128 | return 0; |
129 | } | ||
128 | 130 | ||
129 | exists: | 131 | static inline int __need_auto_defrag(struct btrfs_root *root) |
130 | kfree(defrag); | 132 | { |
131 | return; | 133 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) |
134 | return 0; | ||
135 | |||
136 | if (btrfs_fs_closing(root->fs_info)) | ||
137 | return 0; | ||
132 | 138 | ||
139 | return 1; | ||
133 | } | 140 | } |
134 | 141 | ||
135 | /* | 142 | /* |
@@ -142,11 +149,9 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
142 | struct btrfs_root *root = BTRFS_I(inode)->root; | 149 | struct btrfs_root *root = BTRFS_I(inode)->root; |
143 | struct inode_defrag *defrag; | 150 | struct inode_defrag *defrag; |
144 | u64 transid; | 151 | u64 transid; |
152 | int ret; | ||
145 | 153 | ||
146 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) | 154 | if (!__need_auto_defrag(root)) |
147 | return 0; | ||
148 | |||
149 | if (btrfs_fs_closing(root->fs_info)) | ||
150 | return 0; | 155 | return 0; |
151 | 156 | ||
152 | if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) | 157 | if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) |
@@ -157,7 +162,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
157 | else | 162 | else |
158 | transid = BTRFS_I(inode)->root->last_trans; | 163 | transid = BTRFS_I(inode)->root->last_trans; |
159 | 164 | ||
160 | defrag = kzalloc(sizeof(*defrag), GFP_NOFS); | 165 | defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS); |
161 | if (!defrag) | 166 | if (!defrag) |
162 | return -ENOMEM; | 167 | return -ENOMEM; |
163 | 168 | ||
@@ -166,20 +171,56 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
166 | defrag->root = root->root_key.objectid; | 171 | defrag->root = root->root_key.objectid; |
167 | 172 | ||
168 | spin_lock(&root->fs_info->defrag_inodes_lock); | 173 | spin_lock(&root->fs_info->defrag_inodes_lock); |
169 | if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) | 174 | if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) { |
170 | __btrfs_add_inode_defrag(inode, defrag); | 175 | /* |
171 | else | 176 | * If we set IN_DEFRAG flag and evict the inode from memory, |
172 | kfree(defrag); | 177 | * and then re-read this inode, this new inode doesn't have |
178 | * IN_DEFRAG flag. At the case, we may find the existed defrag. | ||
179 | */ | ||
180 | ret = __btrfs_add_inode_defrag(inode, defrag); | ||
181 | if (ret) | ||
182 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
183 | } else { | ||
184 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
185 | } | ||
173 | spin_unlock(&root->fs_info->defrag_inodes_lock); | 186 | spin_unlock(&root->fs_info->defrag_inodes_lock); |
174 | return 0; | 187 | return 0; |
175 | } | 188 | } |
176 | 189 | ||
177 | /* | 190 | /* |
178 | * must be called with the defrag_inodes lock held | 191 | * Requeue the defrag object. If there is a defrag object that points to |
192 | * the same inode in the tree, we will merge them together (by | ||
193 | * __btrfs_add_inode_defrag()) and free the one that we want to requeue. | ||
179 | */ | 194 | */ |
180 | struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, | 195 | void btrfs_requeue_inode_defrag(struct inode *inode, |
181 | u64 root, u64 ino, | 196 | struct inode_defrag *defrag) |
182 | struct rb_node **next) | 197 | { |
198 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
199 | int ret; | ||
200 | |||
201 | if (!__need_auto_defrag(root)) | ||
202 | goto out; | ||
203 | |||
204 | /* | ||
205 | * Here we don't check the IN_DEFRAG flag, because we need merge | ||
206 | * them together. | ||
207 | */ | ||
208 | spin_lock(&root->fs_info->defrag_inodes_lock); | ||
209 | ret = __btrfs_add_inode_defrag(inode, defrag); | ||
210 | spin_unlock(&root->fs_info->defrag_inodes_lock); | ||
211 | if (ret) | ||
212 | goto out; | ||
213 | return; | ||
214 | out: | ||
215 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
216 | } | ||
217 | |||
218 | /* | ||
219 | * pick the defragable inode that we want, if it doesn't exist, we will get | ||
220 | * the next one. | ||
221 | */ | ||
222 | static struct inode_defrag * | ||
223 | btrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino) | ||
183 | { | 224 | { |
184 | struct inode_defrag *entry = NULL; | 225 | struct inode_defrag *entry = NULL; |
185 | struct inode_defrag tmp; | 226 | struct inode_defrag tmp; |
@@ -190,7 +231,8 @@ struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, | |||
190 | tmp.ino = ino; | 231 | tmp.ino = ino; |
191 | tmp.root = root; | 232 | tmp.root = root; |
192 | 233 | ||
193 | p = info->defrag_inodes.rb_node; | 234 | spin_lock(&fs_info->defrag_inodes_lock); |
235 | p = fs_info->defrag_inodes.rb_node; | ||
194 | while (p) { | 236 | while (p) { |
195 | parent = p; | 237 | parent = p; |
196 | entry = rb_entry(parent, struct inode_defrag, rb_node); | 238 | entry = rb_entry(parent, struct inode_defrag, rb_node); |
@@ -201,52 +243,131 @@ struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, | |||
201 | else if (ret > 0) | 243 | else if (ret > 0) |
202 | p = parent->rb_right; | 244 | p = parent->rb_right; |
203 | else | 245 | else |
204 | return entry; | 246 | goto out; |
205 | } | 247 | } |
206 | 248 | ||
207 | if (next) { | 249 | if (parent && __compare_inode_defrag(&tmp, entry) > 0) { |
208 | while (parent && __compare_inode_defrag(&tmp, entry) > 0) { | 250 | parent = rb_next(parent); |
209 | parent = rb_next(parent); | 251 | if (parent) |
210 | entry = rb_entry(parent, struct inode_defrag, rb_node); | 252 | entry = rb_entry(parent, struct inode_defrag, rb_node); |
211 | } | 253 | else |
212 | *next = parent; | 254 | entry = NULL; |
213 | } | 255 | } |
214 | return NULL; | 256 | out: |
257 | if (entry) | ||
258 | rb_erase(parent, &fs_info->defrag_inodes); | ||
259 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
260 | return entry; | ||
215 | } | 261 | } |
216 | 262 | ||
217 | /* | 263 | void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info) |
218 | * run through the list of inodes in the FS that need | ||
219 | * defragging | ||
220 | */ | ||
221 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | ||
222 | { | 264 | { |
223 | struct inode_defrag *defrag; | 265 | struct inode_defrag *defrag; |
266 | struct rb_node *node; | ||
267 | |||
268 | spin_lock(&fs_info->defrag_inodes_lock); | ||
269 | node = rb_first(&fs_info->defrag_inodes); | ||
270 | while (node) { | ||
271 | rb_erase(node, &fs_info->defrag_inodes); | ||
272 | defrag = rb_entry(node, struct inode_defrag, rb_node); | ||
273 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
274 | |||
275 | if (need_resched()) { | ||
276 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
277 | cond_resched(); | ||
278 | spin_lock(&fs_info->defrag_inodes_lock); | ||
279 | } | ||
280 | |||
281 | node = rb_first(&fs_info->defrag_inodes); | ||
282 | } | ||
283 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
284 | } | ||
285 | |||
286 | #define BTRFS_DEFRAG_BATCH 1024 | ||
287 | |||
288 | static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, | ||
289 | struct inode_defrag *defrag) | ||
290 | { | ||
224 | struct btrfs_root *inode_root; | 291 | struct btrfs_root *inode_root; |
225 | struct inode *inode; | 292 | struct inode *inode; |
226 | struct rb_node *n; | ||
227 | struct btrfs_key key; | 293 | struct btrfs_key key; |
228 | struct btrfs_ioctl_defrag_range_args range; | 294 | struct btrfs_ioctl_defrag_range_args range; |
229 | u64 first_ino = 0; | ||
230 | u64 root_objectid = 0; | ||
231 | int num_defrag; | 295 | int num_defrag; |
232 | int defrag_batch = 1024; | ||
233 | 296 | ||
297 | /* get the inode */ | ||
298 | key.objectid = defrag->root; | ||
299 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
300 | key.offset = (u64)-1; | ||
301 | inode_root = btrfs_read_fs_root_no_name(fs_info, &key); | ||
302 | if (IS_ERR(inode_root)) { | ||
303 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
304 | return PTR_ERR(inode_root); | ||
305 | } | ||
306 | |||
307 | key.objectid = defrag->ino; | ||
308 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
309 | key.offset = 0; | ||
310 | inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); | ||
311 | if (IS_ERR(inode)) { | ||
312 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
313 | return PTR_ERR(inode); | ||
314 | } | ||
315 | |||
316 | /* do a chunk of defrag */ | ||
317 | clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); | ||
234 | memset(&range, 0, sizeof(range)); | 318 | memset(&range, 0, sizeof(range)); |
235 | range.len = (u64)-1; | 319 | range.len = (u64)-1; |
320 | range.start = defrag->last_offset; | ||
321 | |||
322 | sb_start_write(fs_info->sb); | ||
323 | num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, | ||
324 | BTRFS_DEFRAG_BATCH); | ||
325 | sb_end_write(fs_info->sb); | ||
326 | /* | ||
327 | * if we filled the whole defrag batch, there | ||
328 | * must be more work to do. Queue this defrag | ||
329 | * again | ||
330 | */ | ||
331 | if (num_defrag == BTRFS_DEFRAG_BATCH) { | ||
332 | defrag->last_offset = range.start; | ||
333 | btrfs_requeue_inode_defrag(inode, defrag); | ||
334 | } else if (defrag->last_offset && !defrag->cycled) { | ||
335 | /* | ||
336 | * we didn't fill our defrag batch, but | ||
337 | * we didn't start at zero. Make sure we loop | ||
338 | * around to the start of the file. | ||
339 | */ | ||
340 | defrag->last_offset = 0; | ||
341 | defrag->cycled = 1; | ||
342 | btrfs_requeue_inode_defrag(inode, defrag); | ||
343 | } else { | ||
344 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
345 | } | ||
346 | |||
347 | iput(inode); | ||
348 | return 0; | ||
349 | } | ||
350 | |||
351 | /* | ||
352 | * run through the list of inodes in the FS that need | ||
353 | * defragging | ||
354 | */ | ||
355 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | ||
356 | { | ||
357 | struct inode_defrag *defrag; | ||
358 | u64 first_ino = 0; | ||
359 | u64 root_objectid = 0; | ||
236 | 360 | ||
237 | atomic_inc(&fs_info->defrag_running); | 361 | atomic_inc(&fs_info->defrag_running); |
238 | spin_lock(&fs_info->defrag_inodes_lock); | ||
239 | while(1) { | 362 | while(1) { |
240 | n = NULL; | 363 | if (!__need_auto_defrag(fs_info->tree_root)) |
364 | break; | ||
241 | 365 | ||
242 | /* find an inode to defrag */ | 366 | /* find an inode to defrag */ |
243 | defrag = btrfs_find_defrag_inode(fs_info, root_objectid, | 367 | defrag = btrfs_pick_defrag_inode(fs_info, root_objectid, |
244 | first_ino, &n); | 368 | first_ino); |
245 | if (!defrag) { | 369 | if (!defrag) { |
246 | if (n) { | 370 | if (root_objectid || first_ino) { |
247 | defrag = rb_entry(n, struct inode_defrag, | ||
248 | rb_node); | ||
249 | } else if (root_objectid || first_ino) { | ||
250 | root_objectid = 0; | 371 | root_objectid = 0; |
251 | first_ino = 0; | 372 | first_ino = 0; |
252 | continue; | 373 | continue; |
@@ -255,70 +376,11 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | |||
255 | } | 376 | } |
256 | } | 377 | } |
257 | 378 | ||
258 | /* remove it from the rbtree */ | ||
259 | first_ino = defrag->ino + 1; | 379 | first_ino = defrag->ino + 1; |
260 | root_objectid = defrag->root; | 380 | root_objectid = defrag->root; |
261 | rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); | ||
262 | |||
263 | if (btrfs_fs_closing(fs_info)) | ||
264 | goto next_free; | ||
265 | |||
266 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
267 | |||
268 | /* get the inode */ | ||
269 | key.objectid = defrag->root; | ||
270 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
271 | key.offset = (u64)-1; | ||
272 | inode_root = btrfs_read_fs_root_no_name(fs_info, &key); | ||
273 | if (IS_ERR(inode_root)) | ||
274 | goto next; | ||
275 | |||
276 | key.objectid = defrag->ino; | ||
277 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
278 | key.offset = 0; | ||
279 | |||
280 | inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); | ||
281 | if (IS_ERR(inode)) | ||
282 | goto next; | ||
283 | 381 | ||
284 | /* do a chunk of defrag */ | 382 | __btrfs_run_defrag_inode(fs_info, defrag); |
285 | clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); | ||
286 | range.start = defrag->last_offset; | ||
287 | num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, | ||
288 | defrag_batch); | ||
289 | /* | ||
290 | * if we filled the whole defrag batch, there | ||
291 | * must be more work to do. Queue this defrag | ||
292 | * again | ||
293 | */ | ||
294 | if (num_defrag == defrag_batch) { | ||
295 | defrag->last_offset = range.start; | ||
296 | __btrfs_add_inode_defrag(inode, defrag); | ||
297 | /* | ||
298 | * we don't want to kfree defrag, we added it back to | ||
299 | * the rbtree | ||
300 | */ | ||
301 | defrag = NULL; | ||
302 | } else if (defrag->last_offset && !defrag->cycled) { | ||
303 | /* | ||
304 | * we didn't fill our defrag batch, but | ||
305 | * we didn't start at zero. Make sure we loop | ||
306 | * around to the start of the file. | ||
307 | */ | ||
308 | defrag->last_offset = 0; | ||
309 | defrag->cycled = 1; | ||
310 | __btrfs_add_inode_defrag(inode, defrag); | ||
311 | defrag = NULL; | ||
312 | } | ||
313 | |||
314 | iput(inode); | ||
315 | next: | ||
316 | spin_lock(&fs_info->defrag_inodes_lock); | ||
317 | next_free: | ||
318 | kfree(defrag); | ||
319 | } | 383 | } |
320 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
321 | |||
322 | atomic_dec(&fs_info->defrag_running); | 384 | atomic_dec(&fs_info->defrag_running); |
323 | 385 | ||
324 | /* | 386 | /* |
@@ -526,6 +588,8 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
526 | split->block_len = em->block_len; | 588 | split->block_len = em->block_len; |
527 | else | 589 | else |
528 | split->block_len = split->len; | 590 | split->block_len = split->len; |
591 | split->orig_block_len = max(split->block_len, | ||
592 | em->orig_block_len); | ||
529 | split->generation = gen; | 593 | split->generation = gen; |
530 | split->bdev = em->bdev; | 594 | split->bdev = em->bdev; |
531 | split->flags = flags; | 595 | split->flags = flags; |
@@ -547,6 +611,8 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
547 | split->flags = flags; | 611 | split->flags = flags; |
548 | split->compress_type = em->compress_type; | 612 | split->compress_type = em->compress_type; |
549 | split->generation = gen; | 613 | split->generation = gen; |
614 | split->orig_block_len = max(em->block_len, | ||
615 | em->orig_block_len); | ||
550 | 616 | ||
551 | if (compressed) { | 617 | if (compressed) { |
552 | split->block_len = em->block_len; | 618 | split->block_len = em->block_len; |
@@ -555,7 +621,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
555 | } else { | 621 | } else { |
556 | split->block_len = split->len; | 622 | split->block_len = split->len; |
557 | split->block_start = em->block_start + diff; | 623 | split->block_start = em->block_start + diff; |
558 | split->orig_start = split->start; | 624 | split->orig_start = em->orig_start; |
559 | } | 625 | } |
560 | 626 | ||
561 | ret = add_extent_mapping(em_tree, split); | 627 | ret = add_extent_mapping(em_tree, split); |
@@ -1348,7 +1414,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1348 | 1414 | ||
1349 | balance_dirty_pages_ratelimited(inode->i_mapping); | 1415 | balance_dirty_pages_ratelimited(inode->i_mapping); |
1350 | if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | 1416 | if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) |
1351 | btrfs_btree_balance_dirty(root, 1); | 1417 | btrfs_btree_balance_dirty(root); |
1352 | 1418 | ||
1353 | pos += copied; | 1419 | pos += copied; |
1354 | num_written += copied; | 1420 | num_written += copied; |
@@ -1397,6 +1463,24 @@ out: | |||
1397 | return written ? written : err; | 1463 | return written ? written : err; |
1398 | } | 1464 | } |
1399 | 1465 | ||
1466 | static void update_time_for_write(struct inode *inode) | ||
1467 | { | ||
1468 | struct timespec now; | ||
1469 | |||
1470 | if (IS_NOCMTIME(inode)) | ||
1471 | return; | ||
1472 | |||
1473 | now = current_fs_time(inode->i_sb); | ||
1474 | if (!timespec_equal(&inode->i_mtime, &now)) | ||
1475 | inode->i_mtime = now; | ||
1476 | |||
1477 | if (!timespec_equal(&inode->i_ctime, &now)) | ||
1478 | inode->i_ctime = now; | ||
1479 | |||
1480 | if (IS_I_VERSION(inode)) | ||
1481 | inode_inc_iversion(inode); | ||
1482 | } | ||
1483 | |||
1400 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | 1484 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, |
1401 | const struct iovec *iov, | 1485 | const struct iovec *iov, |
1402 | unsigned long nr_segs, loff_t pos) | 1486 | unsigned long nr_segs, loff_t pos) |
@@ -1409,6 +1493,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1409 | ssize_t num_written = 0; | 1493 | ssize_t num_written = 0; |
1410 | ssize_t err = 0; | 1494 | ssize_t err = 0; |
1411 | size_t count, ocount; | 1495 | size_t count, ocount; |
1496 | bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); | ||
1412 | 1497 | ||
1413 | sb_start_write(inode->i_sb); | 1498 | sb_start_write(inode->i_sb); |
1414 | 1499 | ||
@@ -1451,11 +1536,13 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1451 | goto out; | 1536 | goto out; |
1452 | } | 1537 | } |
1453 | 1538 | ||
1454 | err = file_update_time(file); | 1539 | /* |
1455 | if (err) { | 1540 | * We reserve space for updating the inode when we reserve space for the |
1456 | mutex_unlock(&inode->i_mutex); | 1541 | * extent we are going to write, so we will enospc out there. We don't |
1457 | goto out; | 1542 | * need to start yet another transaction to update the inode as we will |
1458 | } | 1543 | * update the inode when we finish writing whatever data we write. |
1544 | */ | ||
1545 | update_time_for_write(inode); | ||
1459 | 1546 | ||
1460 | start_pos = round_down(pos, root->sectorsize); | 1547 | start_pos = round_down(pos, root->sectorsize); |
1461 | if (start_pos > i_size_read(inode)) { | 1548 | if (start_pos > i_size_read(inode)) { |
@@ -1466,6 +1553,9 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1466 | } | 1553 | } |
1467 | } | 1554 | } |
1468 | 1555 | ||
1556 | if (sync) | ||
1557 | atomic_inc(&BTRFS_I(inode)->sync_writers); | ||
1558 | |||
1469 | if (unlikely(file->f_flags & O_DIRECT)) { | 1559 | if (unlikely(file->f_flags & O_DIRECT)) { |
1470 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, | 1560 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, |
1471 | pos, ppos, count, ocount); | 1561 | pos, ppos, count, ocount); |
@@ -1492,14 +1582,21 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1492 | * this will either be one more than the running transaction | 1582 | * this will either be one more than the running transaction |
1493 | * or the generation used for the next transaction if there isn't | 1583 | * or the generation used for the next transaction if there isn't |
1494 | * one running right now. | 1584 | * one running right now. |
1585 | * | ||
1586 | * We also have to set last_sub_trans to the current log transid, | ||
1587 | * otherwise subsequent syncs to a file that's been synced in this | ||
1588 | * transaction will appear to have already occured. | ||
1495 | */ | 1589 | */ |
1496 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | 1590 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; |
1591 | BTRFS_I(inode)->last_sub_trans = root->log_transid; | ||
1497 | if (num_written > 0 || num_written == -EIOCBQUEUED) { | 1592 | if (num_written > 0 || num_written == -EIOCBQUEUED) { |
1498 | err = generic_write_sync(file, pos, num_written); | 1593 | err = generic_write_sync(file, pos, num_written); |
1499 | if (err < 0 && num_written > 0) | 1594 | if (err < 0 && num_written > 0) |
1500 | num_written = err; | 1595 | num_written = err; |
1501 | } | 1596 | } |
1502 | out: | 1597 | out: |
1598 | if (sync) | ||
1599 | atomic_dec(&BTRFS_I(inode)->sync_writers); | ||
1503 | sb_end_write(inode->i_sb); | 1600 | sb_end_write(inode->i_sb); |
1504 | current->backing_dev_info = NULL; | 1601 | current->backing_dev_info = NULL; |
1505 | return num_written ? num_written : err; | 1602 | return num_written ? num_written : err; |
@@ -1550,7 +1647,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1550 | * out of the ->i_mutex. If so, we can flush the dirty pages by | 1647 | * out of the ->i_mutex. If so, we can flush the dirty pages by |
1551 | * multi-task, and make the performance up. | 1648 | * multi-task, and make the performance up. |
1552 | */ | 1649 | */ |
1650 | atomic_inc(&BTRFS_I(inode)->sync_writers); | ||
1553 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 1651 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); |
1652 | atomic_dec(&BTRFS_I(inode)->sync_writers); | ||
1554 | if (ret) | 1653 | if (ret) |
1555 | return ret; | 1654 | return ret; |
1556 | 1655 | ||
@@ -1561,7 +1660,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1561 | * range being left. | 1660 | * range being left. |
1562 | */ | 1661 | */ |
1563 | atomic_inc(&root->log_batch); | 1662 | atomic_inc(&root->log_batch); |
1564 | btrfs_wait_ordered_range(inode, start, end); | 1663 | btrfs_wait_ordered_range(inode, start, end - start + 1); |
1565 | atomic_inc(&root->log_batch); | 1664 | atomic_inc(&root->log_batch); |
1566 | 1665 | ||
1567 | /* | 1666 | /* |
@@ -1767,6 +1866,7 @@ out: | |||
1767 | 1866 | ||
1768 | hole_em->block_start = EXTENT_MAP_HOLE; | 1867 | hole_em->block_start = EXTENT_MAP_HOLE; |
1769 | hole_em->block_len = 0; | 1868 | hole_em->block_len = 0; |
1869 | hole_em->orig_block_len = 0; | ||
1770 | hole_em->bdev = root->fs_info->fs_devices->latest_bdev; | 1870 | hole_em->bdev = root->fs_info->fs_devices->latest_bdev; |
1771 | hole_em->compress_type = BTRFS_COMPRESS_NONE; | 1871 | hole_em->compress_type = BTRFS_COMPRESS_NONE; |
1772 | hole_em->generation = trans->transid; | 1872 | hole_em->generation = trans->transid; |
@@ -1796,48 +1896,51 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
1796 | struct btrfs_path *path; | 1896 | struct btrfs_path *path; |
1797 | struct btrfs_block_rsv *rsv; | 1897 | struct btrfs_block_rsv *rsv; |
1798 | struct btrfs_trans_handle *trans; | 1898 | struct btrfs_trans_handle *trans; |
1799 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | 1899 | u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize); |
1800 | u64 lockstart = (offset + mask) & ~mask; | 1900 | u64 lockend = round_down(offset + len, |
1801 | u64 lockend = ((offset + len) & ~mask) - 1; | 1901 | BTRFS_I(inode)->root->sectorsize) - 1; |
1802 | u64 cur_offset = lockstart; | 1902 | u64 cur_offset = lockstart; |
1803 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | 1903 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); |
1804 | u64 drop_end; | 1904 | u64 drop_end; |
1805 | unsigned long nr; | ||
1806 | int ret = 0; | 1905 | int ret = 0; |
1807 | int err = 0; | 1906 | int err = 0; |
1808 | bool same_page = (offset >> PAGE_CACHE_SHIFT) == | 1907 | bool same_page = ((offset >> PAGE_CACHE_SHIFT) == |
1809 | ((offset + len) >> PAGE_CACHE_SHIFT); | 1908 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); |
1810 | 1909 | ||
1811 | btrfs_wait_ordered_range(inode, offset, len); | 1910 | btrfs_wait_ordered_range(inode, offset, len); |
1812 | 1911 | ||
1813 | mutex_lock(&inode->i_mutex); | 1912 | mutex_lock(&inode->i_mutex); |
1814 | if (offset >= inode->i_size) { | 1913 | /* |
1815 | mutex_unlock(&inode->i_mutex); | 1914 | * We needn't truncate any page which is beyond the end of the file |
1816 | return 0; | 1915 | * because we are sure there is no data there. |
1817 | } | 1916 | */ |
1818 | |||
1819 | /* | 1917 | /* |
1820 | * Only do this if we are in the same page and we aren't doing the | 1918 | * Only do this if we are in the same page and we aren't doing the |
1821 | * entire page. | 1919 | * entire page. |
1822 | */ | 1920 | */ |
1823 | if (same_page && len < PAGE_CACHE_SIZE) { | 1921 | if (same_page && len < PAGE_CACHE_SIZE) { |
1824 | ret = btrfs_truncate_page(inode, offset, len, 0); | 1922 | if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) |
1923 | ret = btrfs_truncate_page(inode, offset, len, 0); | ||
1825 | mutex_unlock(&inode->i_mutex); | 1924 | mutex_unlock(&inode->i_mutex); |
1826 | return ret; | 1925 | return ret; |
1827 | } | 1926 | } |
1828 | 1927 | ||
1829 | /* zero back part of the first page */ | 1928 | /* zero back part of the first page */ |
1830 | ret = btrfs_truncate_page(inode, offset, 0, 0); | 1929 | if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) { |
1831 | if (ret) { | 1930 | ret = btrfs_truncate_page(inode, offset, 0, 0); |
1832 | mutex_unlock(&inode->i_mutex); | 1931 | if (ret) { |
1833 | return ret; | 1932 | mutex_unlock(&inode->i_mutex); |
1933 | return ret; | ||
1934 | } | ||
1834 | } | 1935 | } |
1835 | 1936 | ||
1836 | /* zero the front end of the last page */ | 1937 | /* zero the front end of the last page */ |
1837 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); | 1938 | if (offset + len < round_up(inode->i_size, PAGE_CACHE_SIZE)) { |
1838 | if (ret) { | 1939 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); |
1839 | mutex_unlock(&inode->i_mutex); | 1940 | if (ret) { |
1840 | return ret; | 1941 | mutex_unlock(&inode->i_mutex); |
1942 | return ret; | ||
1943 | } | ||
1841 | } | 1944 | } |
1842 | 1945 | ||
1843 | if (lockend < lockstart) { | 1946 | if (lockend < lockstart) { |
@@ -1930,9 +2033,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
1930 | break; | 2033 | break; |
1931 | } | 2034 | } |
1932 | 2035 | ||
1933 | nr = trans->blocks_used; | ||
1934 | btrfs_end_transaction(trans, root); | 2036 | btrfs_end_transaction(trans, root); |
1935 | btrfs_btree_balance_dirty(root, nr); | 2037 | btrfs_btree_balance_dirty(root); |
1936 | 2038 | ||
1937 | trans = btrfs_start_transaction(root, 3); | 2039 | trans = btrfs_start_transaction(root, 3); |
1938 | if (IS_ERR(trans)) { | 2040 | if (IS_ERR(trans)) { |
@@ -1963,11 +2065,13 @@ out_trans: | |||
1963 | if (!trans) | 2065 | if (!trans) |
1964 | goto out_free; | 2066 | goto out_free; |
1965 | 2067 | ||
2068 | inode_inc_iversion(inode); | ||
2069 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
2070 | |||
1966 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 2071 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
1967 | ret = btrfs_update_inode(trans, root, inode); | 2072 | ret = btrfs_update_inode(trans, root, inode); |
1968 | nr = trans->blocks_used; | ||
1969 | btrfs_end_transaction(trans, root); | 2073 | btrfs_end_transaction(trans, root); |
1970 | btrfs_btree_balance_dirty(root, nr); | 2074 | btrfs_btree_balance_dirty(root); |
1971 | out_free: | 2075 | out_free: |
1972 | btrfs_free_path(path); | 2076 | btrfs_free_path(path); |
1973 | btrfs_free_block_rsv(root, rsv); | 2077 | btrfs_free_block_rsv(root, rsv); |
@@ -1991,12 +2095,12 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
1991 | u64 alloc_end; | 2095 | u64 alloc_end; |
1992 | u64 alloc_hint = 0; | 2096 | u64 alloc_hint = 0; |
1993 | u64 locked_end; | 2097 | u64 locked_end; |
1994 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | ||
1995 | struct extent_map *em; | 2098 | struct extent_map *em; |
2099 | int blocksize = BTRFS_I(inode)->root->sectorsize; | ||
1996 | int ret; | 2100 | int ret; |
1997 | 2101 | ||
1998 | alloc_start = offset & ~mask; | 2102 | alloc_start = round_down(offset, blocksize); |
1999 | alloc_end = (offset + len + mask) & ~mask; | 2103 | alloc_end = round_up(offset + len, blocksize); |
2000 | 2104 | ||
2001 | /* Make sure we aren't being give some crap mode */ | 2105 | /* Make sure we aren't being give some crap mode */ |
2002 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | 2106 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) |
@@ -2009,7 +2113,7 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2009 | * Make sure we have enough space before we do the | 2113 | * Make sure we have enough space before we do the |
2010 | * allocation. | 2114 | * allocation. |
2011 | */ | 2115 | */ |
2012 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start + 1); | 2116 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); |
2013 | if (ret) | 2117 | if (ret) |
2014 | return ret; | 2118 | return ret; |
2015 | 2119 | ||
@@ -2077,7 +2181,7 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2077 | } | 2181 | } |
2078 | last_byte = min(extent_map_end(em), alloc_end); | 2182 | last_byte = min(extent_map_end(em), alloc_end); |
2079 | actual_end = min_t(u64, extent_map_end(em), offset + len); | 2183 | actual_end = min_t(u64, extent_map_end(em), offset + len); |
2080 | last_byte = (last_byte + mask) & ~mask; | 2184 | last_byte = ALIGN(last_byte, blocksize); |
2081 | 2185 | ||
2082 | if (em->block_start == EXTENT_MAP_HOLE || | 2186 | if (em->block_start == EXTENT_MAP_HOLE || |
2083 | (cur_offset >= inode->i_size && | 2187 | (cur_offset >= inode->i_size && |
@@ -2116,7 +2220,7 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
2116 | out: | 2220 | out: |
2117 | mutex_unlock(&inode->i_mutex); | 2221 | mutex_unlock(&inode->i_mutex); |
2118 | /* Let go of our reservation. */ | 2222 | /* Let go of our reservation. */ |
2119 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start + 1); | 2223 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); |
2120 | return ret; | 2224 | return ret; |
2121 | } | 2225 | } |
2122 | 2226 | ||
@@ -2292,3 +2396,21 @@ const struct file_operations btrfs_file_operations = { | |||
2292 | .compat_ioctl = btrfs_ioctl, | 2396 | .compat_ioctl = btrfs_ioctl, |
2293 | #endif | 2397 | #endif |
2294 | }; | 2398 | }; |
2399 | |||
2400 | void btrfs_auto_defrag_exit(void) | ||
2401 | { | ||
2402 | if (btrfs_inode_defrag_cachep) | ||
2403 | kmem_cache_destroy(btrfs_inode_defrag_cachep); | ||
2404 | } | ||
2405 | |||
2406 | int btrfs_auto_defrag_init(void) | ||
2407 | { | ||
2408 | btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag", | ||
2409 | sizeof(struct inode_defrag), 0, | ||
2410 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | ||
2411 | NULL); | ||
2412 | if (!btrfs_inode_defrag_cachep) | ||
2413 | return -ENOMEM; | ||
2414 | |||
2415 | return 0; | ||
2416 | } | ||