aboutsummaryrefslogtreecommitdiffstats
path: root/fs/f2fs/data.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-18 11:17:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-18 11:17:20 -0400
commit06a60deca87dba8e2c186ea7f12ea87d6785188e (patch)
tree2a6c8de6a7b110d13a1c1e3fc07cdc9065dfd749 /fs/f2fs/data.c
parentd6a24d0640d609138a4e40a4ce9fd9fe7859e24c (diff)
parent10027551ccf5459cc771c31ac8bc8e5cc8db45f8 (diff)
Merge tag 'for-f2fs-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "New features: - in-memory extent_cache - fs_shutdown to test power-off-recovery - use inline_data to store symlink path - show f2fs as a non-misc filesystem Major fixes: - avoid CPU stalls on sync_dirty_dir_inodes - fix some power-off-recovery procedure - fix handling of broken symlink correctly - fix missing dot and dotdot made by sudden power cuts - handle wrong data index during roll-forward recovery - preallocate data blocks for direct_io ... and a bunch of minor bug fixes and cleanups" * tag 'for-f2fs-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (71 commits) f2fs: pass checkpoint reason on roll-forward recovery f2fs: avoid abnormal behavior on broken symlink f2fs: flush symlink path to avoid broken symlink after POR f2fs: change 0 to false for bool type f2fs: do not recover wrong data index f2fs: do not increase link count during recovery f2fs: assign parent's i_mode for empty dir f2fs: add F2FS_INLINE_DOTS to recover missing dot dentries f2fs: fix mismatching lock and unlock pages for roll-forward recovery f2fs: fix sparse warnings f2fs: limit b_size of mapped bh in f2fs_map_bh f2fs: persist system.advise into on-disk inode f2fs: avoid NULL pointer dereference in f2fs_xattr_advise_get f2fs: preallocate fallocated blocks for direct IO f2fs: enable inline data by default f2fs: preserve extent info for extent cache f2fs: initialize extent tree with on-disk extent info of inode f2fs: introduce __{find,grab}_extent_tree f2fs: split set_data_blkaddr from f2fs_update_extent_cache f2fs: enable fast symlink by utilizing inline data ...
Diffstat (limited to 'fs/f2fs/data.c')
-rw-r--r--fs/f2fs/data.c742
1 files changed, 674 insertions, 68 deletions
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 319eda511c4f..b91b0e10678e 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -25,6 +25,9 @@
25#include "trace.h" 25#include "trace.h"
26#include <trace/events/f2fs.h> 26#include <trace/events/f2fs.h>
27 27
28static struct kmem_cache *extent_tree_slab;
29static struct kmem_cache *extent_node_slab;
30
28static void f2fs_read_end_io(struct bio *bio, int err) 31static void f2fs_read_end_io(struct bio *bio, int err)
29{ 32{
30 struct bio_vec *bvec; 33 struct bio_vec *bvec;
@@ -197,7 +200,7 @@ alloc_new:
197 * ->node_page 200 * ->node_page
198 * update block addresses in the node page 201 * update block addresses in the node page
199 */ 202 */
200static void __set_data_blkaddr(struct dnode_of_data *dn) 203void set_data_blkaddr(struct dnode_of_data *dn)
201{ 204{
202 struct f2fs_node *rn; 205 struct f2fs_node *rn;
203 __le32 *addr_array; 206 __le32 *addr_array;
@@ -226,7 +229,7 @@ int reserve_new_block(struct dnode_of_data *dn)
226 trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); 229 trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
227 230
228 dn->data_blkaddr = NEW_ADDR; 231 dn->data_blkaddr = NEW_ADDR;
229 __set_data_blkaddr(dn); 232 set_data_blkaddr(dn);
230 mark_inode_dirty(dn->inode); 233 mark_inode_dirty(dn->inode);
231 sync_inode_page(dn); 234 sync_inode_page(dn);
232 return 0; 235 return 0;
@@ -248,73 +251,62 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
248 return err; 251 return err;
249} 252}
250 253
251static int check_extent_cache(struct inode *inode, pgoff_t pgofs, 254static void f2fs_map_bh(struct super_block *sb, pgoff_t pgofs,
252 struct buffer_head *bh_result) 255 struct extent_info *ei, struct buffer_head *bh_result)
256{
257 unsigned int blkbits = sb->s_blocksize_bits;
258 size_t max_size = bh_result->b_size;
259 size_t mapped_size;
260
261 clear_buffer_new(bh_result);
262 map_bh(bh_result, sb, ei->blk + pgofs - ei->fofs);
263 mapped_size = (ei->fofs + ei->len - pgofs) << blkbits;
264 bh_result->b_size = min(max_size, mapped_size);
265}
266
267static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs,
268 struct extent_info *ei)
253{ 269{
254 struct f2fs_inode_info *fi = F2FS_I(inode); 270 struct f2fs_inode_info *fi = F2FS_I(inode);
255 pgoff_t start_fofs, end_fofs; 271 pgoff_t start_fofs, end_fofs;
256 block_t start_blkaddr; 272 block_t start_blkaddr;
257 273
258 if (is_inode_flag_set(fi, FI_NO_EXTENT)) 274 read_lock(&fi->ext_lock);
259 return 0;
260
261 read_lock(&fi->ext.ext_lock);
262 if (fi->ext.len == 0) { 275 if (fi->ext.len == 0) {
263 read_unlock(&fi->ext.ext_lock); 276 read_unlock(&fi->ext_lock);
264 return 0; 277 return false;
265 } 278 }
266 279
267 stat_inc_total_hit(inode->i_sb); 280 stat_inc_total_hit(inode->i_sb);
268 281
269 start_fofs = fi->ext.fofs; 282 start_fofs = fi->ext.fofs;
270 end_fofs = fi->ext.fofs + fi->ext.len - 1; 283 end_fofs = fi->ext.fofs + fi->ext.len - 1;
271 start_blkaddr = fi->ext.blk_addr; 284 start_blkaddr = fi->ext.blk;
272 285
273 if (pgofs >= start_fofs && pgofs <= end_fofs) { 286 if (pgofs >= start_fofs && pgofs <= end_fofs) {
274 unsigned int blkbits = inode->i_sb->s_blocksize_bits; 287 *ei = fi->ext;
275 size_t count;
276
277 set_buffer_new(bh_result);
278 map_bh(bh_result, inode->i_sb,
279 start_blkaddr + pgofs - start_fofs);
280 count = end_fofs - pgofs + 1;
281 if (count < (UINT_MAX >> blkbits))
282 bh_result->b_size = (count << blkbits);
283 else
284 bh_result->b_size = UINT_MAX;
285
286 stat_inc_read_hit(inode->i_sb); 288 stat_inc_read_hit(inode->i_sb);
287 read_unlock(&fi->ext.ext_lock); 289 read_unlock(&fi->ext_lock);
288 return 1; 290 return true;
289 } 291 }
290 read_unlock(&fi->ext.ext_lock); 292 read_unlock(&fi->ext_lock);
291 return 0; 293 return false;
292} 294}
293 295
294void update_extent_cache(struct dnode_of_data *dn) 296static bool update_extent_info(struct inode *inode, pgoff_t fofs,
297 block_t blkaddr)
295{ 298{
296 struct f2fs_inode_info *fi = F2FS_I(dn->inode); 299 struct f2fs_inode_info *fi = F2FS_I(inode);
297 pgoff_t fofs, start_fofs, end_fofs; 300 pgoff_t start_fofs, end_fofs;
298 block_t start_blkaddr, end_blkaddr; 301 block_t start_blkaddr, end_blkaddr;
299 int need_update = true; 302 int need_update = true;
300 303
301 f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); 304 write_lock(&fi->ext_lock);
302
303 /* Update the page address in the parent node */
304 __set_data_blkaddr(dn);
305
306 if (is_inode_flag_set(fi, FI_NO_EXTENT))
307 return;
308
309 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
310 dn->ofs_in_node;
311
312 write_lock(&fi->ext.ext_lock);
313 305
314 start_fofs = fi->ext.fofs; 306 start_fofs = fi->ext.fofs;
315 end_fofs = fi->ext.fofs + fi->ext.len - 1; 307 end_fofs = fi->ext.fofs + fi->ext.len - 1;
316 start_blkaddr = fi->ext.blk_addr; 308 start_blkaddr = fi->ext.blk;
317 end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1; 309 end_blkaddr = fi->ext.blk + fi->ext.len - 1;
318 310
319 /* Drop and initialize the matched extent */ 311 /* Drop and initialize the matched extent */
320 if (fi->ext.len == 1 && fofs == start_fofs) 312 if (fi->ext.len == 1 && fofs == start_fofs)
@@ -322,24 +314,24 @@ void update_extent_cache(struct dnode_of_data *dn)
322 314
323 /* Initial extent */ 315 /* Initial extent */
324 if (fi->ext.len == 0) { 316 if (fi->ext.len == 0) {
325 if (dn->data_blkaddr != NULL_ADDR) { 317 if (blkaddr != NULL_ADDR) {
326 fi->ext.fofs = fofs; 318 fi->ext.fofs = fofs;
327 fi->ext.blk_addr = dn->data_blkaddr; 319 fi->ext.blk = blkaddr;
328 fi->ext.len = 1; 320 fi->ext.len = 1;
329 } 321 }
330 goto end_update; 322 goto end_update;
331 } 323 }
332 324
333 /* Front merge */ 325 /* Front merge */
334 if (fofs == start_fofs - 1 && dn->data_blkaddr == start_blkaddr - 1) { 326 if (fofs == start_fofs - 1 && blkaddr == start_blkaddr - 1) {
335 fi->ext.fofs--; 327 fi->ext.fofs--;
336 fi->ext.blk_addr--; 328 fi->ext.blk--;
337 fi->ext.len++; 329 fi->ext.len++;
338 goto end_update; 330 goto end_update;
339 } 331 }
340 332
341 /* Back merge */ 333 /* Back merge */
342 if (fofs == end_fofs + 1 && dn->data_blkaddr == end_blkaddr + 1) { 334 if (fofs == end_fofs + 1 && blkaddr == end_blkaddr + 1) {
343 fi->ext.len++; 335 fi->ext.len++;
344 goto end_update; 336 goto end_update;
345 } 337 }
@@ -351,8 +343,7 @@ void update_extent_cache(struct dnode_of_data *dn)
351 fi->ext.len = fofs - start_fofs; 343 fi->ext.len = fofs - start_fofs;
352 } else { 344 } else {
353 fi->ext.fofs = fofs + 1; 345 fi->ext.fofs = fofs + 1;
354 fi->ext.blk_addr = start_blkaddr + 346 fi->ext.blk = start_blkaddr + fofs - start_fofs + 1;
355 fofs - start_fofs + 1;
356 fi->ext.len -= fofs - start_fofs + 1; 347 fi->ext.len -= fofs - start_fofs + 1;
357 } 348 }
358 } else { 349 } else {
@@ -366,27 +357,583 @@ void update_extent_cache(struct dnode_of_data *dn)
366 need_update = true; 357 need_update = true;
367 } 358 }
368end_update: 359end_update:
369 write_unlock(&fi->ext.ext_lock); 360 write_unlock(&fi->ext_lock);
370 if (need_update) 361 return need_update;
371 sync_inode_page(dn); 362}
363
364static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
365 struct extent_tree *et, struct extent_info *ei,
366 struct rb_node *parent, struct rb_node **p)
367{
368 struct extent_node *en;
369
370 en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
371 if (!en)
372 return NULL;
373
374 en->ei = *ei;
375 INIT_LIST_HEAD(&en->list);
376
377 rb_link_node(&en->rb_node, parent, p);
378 rb_insert_color(&en->rb_node, &et->root);
379 et->count++;
380 atomic_inc(&sbi->total_ext_node);
381 return en;
382}
383
384static void __detach_extent_node(struct f2fs_sb_info *sbi,
385 struct extent_tree *et, struct extent_node *en)
386{
387 rb_erase(&en->rb_node, &et->root);
388 et->count--;
389 atomic_dec(&sbi->total_ext_node);
390
391 if (et->cached_en == en)
392 et->cached_en = NULL;
393}
394
395static struct extent_tree *__find_extent_tree(struct f2fs_sb_info *sbi,
396 nid_t ino)
397{
398 struct extent_tree *et;
399
400 down_read(&sbi->extent_tree_lock);
401 et = radix_tree_lookup(&sbi->extent_tree_root, ino);
402 if (!et) {
403 up_read(&sbi->extent_tree_lock);
404 return NULL;
405 }
406 atomic_inc(&et->refcount);
407 up_read(&sbi->extent_tree_lock);
408
409 return et;
410}
411
412static struct extent_tree *__grab_extent_tree(struct inode *inode)
413{
414 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
415 struct extent_tree *et;
416 nid_t ino = inode->i_ino;
417
418 down_write(&sbi->extent_tree_lock);
419 et = radix_tree_lookup(&sbi->extent_tree_root, ino);
420 if (!et) {
421 et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
422 f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
423 memset(et, 0, sizeof(struct extent_tree));
424 et->ino = ino;
425 et->root = RB_ROOT;
426 et->cached_en = NULL;
427 rwlock_init(&et->lock);
428 atomic_set(&et->refcount, 0);
429 et->count = 0;
430 sbi->total_ext_tree++;
431 }
432 atomic_inc(&et->refcount);
433 up_write(&sbi->extent_tree_lock);
434
435 return et;
436}
437
438static struct extent_node *__lookup_extent_tree(struct extent_tree *et,
439 unsigned int fofs)
440{
441 struct rb_node *node = et->root.rb_node;
442 struct extent_node *en;
443
444 if (et->cached_en) {
445 struct extent_info *cei = &et->cached_en->ei;
446
447 if (cei->fofs <= fofs && cei->fofs + cei->len > fofs)
448 return et->cached_en;
449 }
450
451 while (node) {
452 en = rb_entry(node, struct extent_node, rb_node);
453
454 if (fofs < en->ei.fofs) {
455 node = node->rb_left;
456 } else if (fofs >= en->ei.fofs + en->ei.len) {
457 node = node->rb_right;
458 } else {
459 et->cached_en = en;
460 return en;
461 }
462 }
463 return NULL;
464}
465
466static struct extent_node *__try_back_merge(struct f2fs_sb_info *sbi,
467 struct extent_tree *et, struct extent_node *en)
468{
469 struct extent_node *prev;
470 struct rb_node *node;
471
472 node = rb_prev(&en->rb_node);
473 if (!node)
474 return NULL;
475
476 prev = rb_entry(node, struct extent_node, rb_node);
477 if (__is_back_mergeable(&en->ei, &prev->ei)) {
478 en->ei.fofs = prev->ei.fofs;
479 en->ei.blk = prev->ei.blk;
480 en->ei.len += prev->ei.len;
481 __detach_extent_node(sbi, et, prev);
482 return prev;
483 }
484 return NULL;
485}
486
487static struct extent_node *__try_front_merge(struct f2fs_sb_info *sbi,
488 struct extent_tree *et, struct extent_node *en)
489{
490 struct extent_node *next;
491 struct rb_node *node;
492
493 node = rb_next(&en->rb_node);
494 if (!node)
495 return NULL;
496
497 next = rb_entry(node, struct extent_node, rb_node);
498 if (__is_front_mergeable(&en->ei, &next->ei)) {
499 en->ei.len += next->ei.len;
500 __detach_extent_node(sbi, et, next);
501 return next;
502 }
503 return NULL;
504}
505
506static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
507 struct extent_tree *et, struct extent_info *ei,
508 struct extent_node **den)
509{
510 struct rb_node **p = &et->root.rb_node;
511 struct rb_node *parent = NULL;
512 struct extent_node *en;
513
514 while (*p) {
515 parent = *p;
516 en = rb_entry(parent, struct extent_node, rb_node);
517
518 if (ei->fofs < en->ei.fofs) {
519 if (__is_front_mergeable(ei, &en->ei)) {
520 f2fs_bug_on(sbi, !den);
521 en->ei.fofs = ei->fofs;
522 en->ei.blk = ei->blk;
523 en->ei.len += ei->len;
524 *den = __try_back_merge(sbi, et, en);
525 return en;
526 }
527 p = &(*p)->rb_left;
528 } else if (ei->fofs >= en->ei.fofs + en->ei.len) {
529 if (__is_back_mergeable(ei, &en->ei)) {
530 f2fs_bug_on(sbi, !den);
531 en->ei.len += ei->len;
532 *den = __try_front_merge(sbi, et, en);
533 return en;
534 }
535 p = &(*p)->rb_right;
536 } else {
537 f2fs_bug_on(sbi, 1);
538 }
539 }
540
541 return __attach_extent_node(sbi, et, ei, parent, p);
542}
543
544static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
545 struct extent_tree *et, bool free_all)
546{
547 struct rb_node *node, *next;
548 struct extent_node *en;
549 unsigned int count = et->count;
550
551 node = rb_first(&et->root);
552 while (node) {
553 next = rb_next(node);
554 en = rb_entry(node, struct extent_node, rb_node);
555
556 if (free_all) {
557 spin_lock(&sbi->extent_lock);
558 if (!list_empty(&en->list))
559 list_del_init(&en->list);
560 spin_unlock(&sbi->extent_lock);
561 }
562
563 if (free_all || list_empty(&en->list)) {
564 __detach_extent_node(sbi, et, en);
565 kmem_cache_free(extent_node_slab, en);
566 }
567 node = next;
568 }
569
570 return count - et->count;
571}
572
573static void f2fs_init_extent_tree(struct inode *inode,
574 struct f2fs_extent *i_ext)
575{
576 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
577 struct extent_tree *et;
578 struct extent_node *en;
579 struct extent_info ei;
580
581 if (le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN)
582 return;
583
584 et = __grab_extent_tree(inode);
585
586 write_lock(&et->lock);
587 if (et->count)
588 goto out;
589
590 set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
591 le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
592
593 en = __insert_extent_tree(sbi, et, &ei, NULL);
594 if (en) {
595 et->cached_en = en;
596
597 spin_lock(&sbi->extent_lock);
598 list_add_tail(&en->list, &sbi->extent_list);
599 spin_unlock(&sbi->extent_lock);
600 }
601out:
602 write_unlock(&et->lock);
603 atomic_dec(&et->refcount);
604}
605
606static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
607 struct extent_info *ei)
608{
609 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
610 struct extent_tree *et;
611 struct extent_node *en;
612
613 trace_f2fs_lookup_extent_tree_start(inode, pgofs);
614
615 et = __find_extent_tree(sbi, inode->i_ino);
616 if (!et)
617 return false;
618
619 read_lock(&et->lock);
620 en = __lookup_extent_tree(et, pgofs);
621 if (en) {
622 *ei = en->ei;
623 spin_lock(&sbi->extent_lock);
624 if (!list_empty(&en->list))
625 list_move_tail(&en->list, &sbi->extent_list);
626 spin_unlock(&sbi->extent_lock);
627 stat_inc_read_hit(sbi->sb);
628 }
629 stat_inc_total_hit(sbi->sb);
630 read_unlock(&et->lock);
631
632 trace_f2fs_lookup_extent_tree_end(inode, pgofs, en);
633
634 atomic_dec(&et->refcount);
635 return en ? true : false;
636}
637
638static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs,
639 block_t blkaddr)
640{
641 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
642 struct extent_tree *et;
643 struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
644 struct extent_node *den = NULL;
645 struct extent_info ei, dei;
646 unsigned int endofs;
647
648 trace_f2fs_update_extent_tree(inode, fofs, blkaddr);
649
650 et = __grab_extent_tree(inode);
651
652 write_lock(&et->lock);
653
654 /* 1. lookup and remove existing extent info in cache */
655 en = __lookup_extent_tree(et, fofs);
656 if (!en)
657 goto update_extent;
658
659 dei = en->ei;
660 __detach_extent_node(sbi, et, en);
661
662 /* 2. if extent can be split more, split and insert the left part */
663 if (dei.len > 1) {
664 /* insert left part of split extent into cache */
665 if (fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
666 set_extent_info(&ei, dei.fofs, dei.blk,
667 fofs - dei.fofs);
668 en1 = __insert_extent_tree(sbi, et, &ei, NULL);
669 }
670
671 /* insert right part of split extent into cache */
672 endofs = dei.fofs + dei.len - 1;
673 if (endofs - fofs >= F2FS_MIN_EXTENT_LEN) {
674 set_extent_info(&ei, fofs + 1,
675 fofs - dei.fofs + dei.blk, endofs - fofs);
676 en2 = __insert_extent_tree(sbi, et, &ei, NULL);
677 }
678 }
679
680update_extent:
681 /* 3. update extent in extent cache */
682 if (blkaddr) {
683 set_extent_info(&ei, fofs, blkaddr, 1);
684 en3 = __insert_extent_tree(sbi, et, &ei, &den);
685 }
686
687 /* 4. update in global extent list */
688 spin_lock(&sbi->extent_lock);
689 if (en && !list_empty(&en->list))
690 list_del(&en->list);
691 /*
692 * en1 and en2 split from en, they will become more and more smaller
693 * fragments after splitting several times. So if the length is smaller
694 * than F2FS_MIN_EXTENT_LEN, we will not add them into extent tree.
695 */
696 if (en1)
697 list_add_tail(&en1->list, &sbi->extent_list);
698 if (en2)
699 list_add_tail(&en2->list, &sbi->extent_list);
700 if (en3) {
701 if (list_empty(&en3->list))
702 list_add_tail(&en3->list, &sbi->extent_list);
703 else
704 list_move_tail(&en3->list, &sbi->extent_list);
705 }
706 if (den && !list_empty(&den->list))
707 list_del(&den->list);
708 spin_unlock(&sbi->extent_lock);
709
710 /* 5. release extent node */
711 if (en)
712 kmem_cache_free(extent_node_slab, en);
713 if (den)
714 kmem_cache_free(extent_node_slab, den);
715
716 write_unlock(&et->lock);
717 atomic_dec(&et->refcount);
718}
719
720void f2fs_preserve_extent_tree(struct inode *inode)
721{
722 struct extent_tree *et;
723 struct extent_info *ext = &F2FS_I(inode)->ext;
724 bool sync = false;
725
726 if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
727 return;
728
729 et = __find_extent_tree(F2FS_I_SB(inode), inode->i_ino);
730 if (!et) {
731 if (ext->len) {
732 ext->len = 0;
733 update_inode_page(inode);
734 }
735 return;
736 }
737
738 read_lock(&et->lock);
739 if (et->count) {
740 struct extent_node *en;
741
742 if (et->cached_en) {
743 en = et->cached_en;
744 } else {
745 struct rb_node *node = rb_first(&et->root);
746
747 if (!node)
748 node = rb_last(&et->root);
749 en = rb_entry(node, struct extent_node, rb_node);
750 }
751
752 if (__is_extent_same(ext, &en->ei))
753 goto out;
754
755 *ext = en->ei;
756 sync = true;
757 } else if (ext->len) {
758 ext->len = 0;
759 sync = true;
760 }
761out:
762 read_unlock(&et->lock);
763 atomic_dec(&et->refcount);
764
765 if (sync)
766 update_inode_page(inode);
767}
768
769void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
770{
771 struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
772 struct extent_node *en, *tmp;
773 unsigned long ino = F2FS_ROOT_INO(sbi);
774 struct radix_tree_iter iter;
775 void **slot;
776 unsigned int found;
777 unsigned int node_cnt = 0, tree_cnt = 0;
778
779 if (!test_opt(sbi, EXTENT_CACHE))
780 return;
781
782 if (available_free_memory(sbi, EXTENT_CACHE))
783 return;
784
785 spin_lock(&sbi->extent_lock);
786 list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
787 if (!nr_shrink--)
788 break;
789 list_del_init(&en->list);
790 }
791 spin_unlock(&sbi->extent_lock);
792
793 down_read(&sbi->extent_tree_lock);
794 while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
795 (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
796 unsigned i;
797
798 ino = treevec[found - 1]->ino + 1;
799 for (i = 0; i < found; i++) {
800 struct extent_tree *et = treevec[i];
801
802 atomic_inc(&et->refcount);
803 write_lock(&et->lock);
804 node_cnt += __free_extent_tree(sbi, et, false);
805 write_unlock(&et->lock);
806 atomic_dec(&et->refcount);
807 }
808 }
809 up_read(&sbi->extent_tree_lock);
810
811 down_write(&sbi->extent_tree_lock);
812 radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter,
813 F2FS_ROOT_INO(sbi)) {
814 struct extent_tree *et = (struct extent_tree *)*slot;
815
816 if (!atomic_read(&et->refcount) && !et->count) {
817 radix_tree_delete(&sbi->extent_tree_root, et->ino);
818 kmem_cache_free(extent_tree_slab, et);
819 sbi->total_ext_tree--;
820 tree_cnt++;
821 }
822 }
823 up_write(&sbi->extent_tree_lock);
824
825 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
826}
827
828void f2fs_destroy_extent_tree(struct inode *inode)
829{
830 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
831 struct extent_tree *et;
832 unsigned int node_cnt = 0;
833
834 if (!test_opt(sbi, EXTENT_CACHE))
835 return;
836
837 et = __find_extent_tree(sbi, inode->i_ino);
838 if (!et)
839 goto out;
840
841 /* free all extent info belong to this extent tree */
842 write_lock(&et->lock);
843 node_cnt = __free_extent_tree(sbi, et, true);
844 write_unlock(&et->lock);
845
846 atomic_dec(&et->refcount);
847
848 /* try to find and delete extent tree entry in radix tree */
849 down_write(&sbi->extent_tree_lock);
850 et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino);
851 if (!et) {
852 up_write(&sbi->extent_tree_lock);
853 goto out;
854 }
855 f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
856 radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
857 kmem_cache_free(extent_tree_slab, et);
858 sbi->total_ext_tree--;
859 up_write(&sbi->extent_tree_lock);
860out:
861 trace_f2fs_destroy_extent_tree(inode, node_cnt);
372 return; 862 return;
373} 863}
374 864
865void f2fs_init_extent_cache(struct inode *inode, struct f2fs_extent *i_ext)
866{
867 if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
868 f2fs_init_extent_tree(inode, i_ext);
869
870 write_lock(&F2FS_I(inode)->ext_lock);
871 get_extent_info(&F2FS_I(inode)->ext, *i_ext);
872 write_unlock(&F2FS_I(inode)->ext_lock);
873}
874
875static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
876 struct extent_info *ei)
877{
878 if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
879 return false;
880
881 if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
882 return f2fs_lookup_extent_tree(inode, pgofs, ei);
883
884 return lookup_extent_info(inode, pgofs, ei);
885}
886
887void f2fs_update_extent_cache(struct dnode_of_data *dn)
888{
889 struct f2fs_inode_info *fi = F2FS_I(dn->inode);
890 pgoff_t fofs;
891
892 f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
893
894 if (is_inode_flag_set(fi, FI_NO_EXTENT))
895 return;
896
897 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
898 dn->ofs_in_node;
899
900 if (test_opt(F2FS_I_SB(dn->inode), EXTENT_CACHE))
901 return f2fs_update_extent_tree(dn->inode, fofs,
902 dn->data_blkaddr);
903
904 if (update_extent_info(dn->inode, fofs, dn->data_blkaddr))
905 sync_inode_page(dn);
906}
907
375struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) 908struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
376{ 909{
377 struct address_space *mapping = inode->i_mapping; 910 struct address_space *mapping = inode->i_mapping;
378 struct dnode_of_data dn; 911 struct dnode_of_data dn;
379 struct page *page; 912 struct page *page;
913 struct extent_info ei;
380 int err; 914 int err;
381 struct f2fs_io_info fio = { 915 struct f2fs_io_info fio = {
382 .type = DATA, 916 .type = DATA,
383 .rw = sync ? READ_SYNC : READA, 917 .rw = sync ? READ_SYNC : READA,
384 }; 918 };
385 919
920 /*
921 * If sync is false, it needs to check its block allocation.
922 * This is need and triggered by two flows:
923 * gc and truncate_partial_data_page.
924 */
925 if (!sync)
926 goto search;
927
386 page = find_get_page(mapping, index); 928 page = find_get_page(mapping, index);
387 if (page && PageUptodate(page)) 929 if (page && PageUptodate(page))
388 return page; 930 return page;
389 f2fs_put_page(page, 0); 931 f2fs_put_page(page, 0);
932search:
933 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
934 dn.data_blkaddr = ei.blk + index - ei.fofs;
935 goto got_it;
936 }
390 937
391 set_new_dnode(&dn, inode, NULL, NULL, 0); 938 set_new_dnode(&dn, inode, NULL, NULL, 0);
392 err = get_dnode_of_data(&dn, index, LOOKUP_NODE); 939 err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
@@ -401,6 +948,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
401 if (unlikely(dn.data_blkaddr == NEW_ADDR)) 948 if (unlikely(dn.data_blkaddr == NEW_ADDR))
402 return ERR_PTR(-EINVAL); 949 return ERR_PTR(-EINVAL);
403 950
951got_it:
404 page = grab_cache_page(mapping, index); 952 page = grab_cache_page(mapping, index);
405 if (!page) 953 if (!page)
406 return ERR_PTR(-ENOMEM); 954 return ERR_PTR(-ENOMEM);
@@ -435,6 +983,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
435 struct address_space *mapping = inode->i_mapping; 983 struct address_space *mapping = inode->i_mapping;
436 struct dnode_of_data dn; 984 struct dnode_of_data dn;
437 struct page *page; 985 struct page *page;
986 struct extent_info ei;
438 int err; 987 int err;
439 struct f2fs_io_info fio = { 988 struct f2fs_io_info fio = {
440 .type = DATA, 989 .type = DATA,
@@ -445,6 +994,11 @@ repeat:
445 if (!page) 994 if (!page)
446 return ERR_PTR(-ENOMEM); 995 return ERR_PTR(-ENOMEM);
447 996
997 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
998 dn.data_blkaddr = ei.blk + index - ei.fofs;
999 goto got_it;
1000 }
1001
448 set_new_dnode(&dn, inode, NULL, NULL, 0); 1002 set_new_dnode(&dn, inode, NULL, NULL, 0);
449 err = get_dnode_of_data(&dn, index, LOOKUP_NODE); 1003 err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
450 if (err) { 1004 if (err) {
@@ -458,6 +1012,7 @@ repeat:
458 return ERR_PTR(-ENOENT); 1012 return ERR_PTR(-ENOENT);
459 } 1013 }
460 1014
1015got_it:
461 if (PageUptodate(page)) 1016 if (PageUptodate(page))
462 return page; 1017 return page;
463 1018
@@ -569,19 +1124,26 @@ static int __allocate_data_block(struct dnode_of_data *dn)
569 1124
570 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 1125 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
571 return -EPERM; 1126 return -EPERM;
1127
1128 dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
1129 if (dn->data_blkaddr == NEW_ADDR)
1130 goto alloc;
1131
572 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) 1132 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
573 return -ENOSPC; 1133 return -ENOSPC;
574 1134
1135alloc:
575 get_node_info(sbi, dn->nid, &ni); 1136 get_node_info(sbi, dn->nid, &ni);
576 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 1137 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
577 1138
578 if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page) 1139 if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
579 seg = CURSEG_DIRECT_IO; 1140 seg = CURSEG_DIRECT_IO;
580 1141
581 allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, seg); 1142 allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
1143 &sum, seg);
582 1144
583 /* direct IO doesn't use extent cache to maximize the performance */ 1145 /* direct IO doesn't use extent cache to maximize the performance */
584 __set_data_blkaddr(dn); 1146 set_data_blkaddr(dn);
585 1147
586 /* update i_size */ 1148 /* update i_size */
587 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + 1149 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
@@ -615,7 +1177,10 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
615 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); 1177 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
616 1178
617 while (dn.ofs_in_node < end_offset && len) { 1179 while (dn.ofs_in_node < end_offset && len) {
618 if (dn.data_blkaddr == NULL_ADDR) { 1180 block_t blkaddr;
1181
1182 blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
1183 if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
619 if (__allocate_data_block(&dn)) 1184 if (__allocate_data_block(&dn))
620 goto sync_out; 1185 goto sync_out;
621 allocated = true; 1186 allocated = true;
@@ -659,13 +1224,16 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
659 int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; 1224 int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
660 pgoff_t pgofs, end_offset; 1225 pgoff_t pgofs, end_offset;
661 int err = 0, ofs = 1; 1226 int err = 0, ofs = 1;
1227 struct extent_info ei;
662 bool allocated = false; 1228 bool allocated = false;
663 1229
664 /* Get the page offset from the block offset(iblock) */ 1230 /* Get the page offset from the block offset(iblock) */
665 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); 1231 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
666 1232
667 if (check_extent_cache(inode, pgofs, bh_result)) 1233 if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
1234 f2fs_map_bh(inode->i_sb, pgofs, &ei, bh_result);
668 goto out; 1235 goto out;
1236 }
669 1237
670 if (create) 1238 if (create)
671 f2fs_lock_op(F2FS_I_SB(inode)); 1239 f2fs_lock_op(F2FS_I_SB(inode));
@@ -682,7 +1250,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
682 goto put_out; 1250 goto put_out;
683 1251
684 if (dn.data_blkaddr != NULL_ADDR) { 1252 if (dn.data_blkaddr != NULL_ADDR) {
685 set_buffer_new(bh_result); 1253 clear_buffer_new(bh_result);
686 map_bh(bh_result, inode->i_sb, dn.data_blkaddr); 1254 map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
687 } else if (create) { 1255 } else if (create) {
688 err = __allocate_data_block(&dn); 1256 err = __allocate_data_block(&dn);
@@ -727,6 +1295,7 @@ get_next:
727 if (err) 1295 if (err)
728 goto sync_out; 1296 goto sync_out;
729 allocated = true; 1297 allocated = true;
1298 set_buffer_new(bh_result);
730 blkaddr = dn.data_blkaddr; 1299 blkaddr = dn.data_blkaddr;
731 } 1300 }
732 /* Give more consecutive addresses for the readahead */ 1301 /* Give more consecutive addresses for the readahead */
@@ -813,8 +1382,10 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
813 fio->blk_addr = dn.data_blkaddr; 1382 fio->blk_addr = dn.data_blkaddr;
814 1383
815 /* This page is already truncated */ 1384 /* This page is already truncated */
816 if (fio->blk_addr == NULL_ADDR) 1385 if (fio->blk_addr == NULL_ADDR) {
1386 ClearPageUptodate(page);
817 goto out_writepage; 1387 goto out_writepage;
1388 }
818 1389
819 set_page_writeback(page); 1390 set_page_writeback(page);
820 1391
@@ -827,10 +1398,15 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
827 need_inplace_update(inode))) { 1398 need_inplace_update(inode))) {
828 rewrite_data_page(page, fio); 1399 rewrite_data_page(page, fio);
829 set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); 1400 set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
1401 trace_f2fs_do_write_data_page(page, IPU);
830 } else { 1402 } else {
831 write_data_page(page, &dn, fio); 1403 write_data_page(page, &dn, fio);
832 update_extent_cache(&dn); 1404 set_data_blkaddr(&dn);
1405 f2fs_update_extent_cache(&dn);
1406 trace_f2fs_do_write_data_page(page, OPU);
833 set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); 1407 set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
1408 if (page->index == 0)
1409 set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
834 } 1410 }
835out_writepage: 1411out_writepage:
836 f2fs_put_dnode(&dn); 1412 f2fs_put_dnode(&dn);
@@ -909,6 +1485,8 @@ done:
909 clear_cold_data(page); 1485 clear_cold_data(page);
910out: 1486out:
911 inode_dec_dirty_pages(inode); 1487 inode_dec_dirty_pages(inode);
1488 if (err)
1489 ClearPageUptodate(page);
912 unlock_page(page); 1490 unlock_page(page);
913 if (need_balance_fs) 1491 if (need_balance_fs)
914 f2fs_balance_fs(sbi); 1492 f2fs_balance_fs(sbi);
@@ -935,7 +1513,6 @@ static int f2fs_write_data_pages(struct address_space *mapping,
935{ 1513{
936 struct inode *inode = mapping->host; 1514 struct inode *inode = mapping->host;
937 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1515 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
938 bool locked = false;
939 int ret; 1516 int ret;
940 long diff; 1517 long diff;
941 1518
@@ -950,15 +1527,13 @@ static int f2fs_write_data_pages(struct address_space *mapping,
950 available_free_memory(sbi, DIRTY_DENTS)) 1527 available_free_memory(sbi, DIRTY_DENTS))
951 goto skip_write; 1528 goto skip_write;
952 1529
1530 /* during POR, we don't need to trigger writepage at all. */
1531 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1532 goto skip_write;
1533
953 diff = nr_pages_to_write(sbi, DATA, wbc); 1534 diff = nr_pages_to_write(sbi, DATA, wbc);
954 1535
955 if (!S_ISDIR(inode->i_mode)) {
956 mutex_lock(&sbi->writepages);
957 locked = true;
958 }
959 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); 1536 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
960 if (locked)
961 mutex_unlock(&sbi->writepages);
962 1537
963 f2fs_submit_merged_bio(sbi, DATA, WRITE); 1538 f2fs_submit_merged_bio(sbi, DATA, WRITE);
964 1539
@@ -1236,6 +1811,37 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
1236 return generic_block_bmap(mapping, block, get_data_block); 1811 return generic_block_bmap(mapping, block, get_data_block);
1237} 1812}
1238 1813
1814void init_extent_cache_info(struct f2fs_sb_info *sbi)
1815{
1816 INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
1817 init_rwsem(&sbi->extent_tree_lock);
1818 INIT_LIST_HEAD(&sbi->extent_list);
1819 spin_lock_init(&sbi->extent_lock);
1820 sbi->total_ext_tree = 0;
1821 atomic_set(&sbi->total_ext_node, 0);
1822}
1823
1824int __init create_extent_cache(void)
1825{
1826 extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
1827 sizeof(struct extent_tree));
1828 if (!extent_tree_slab)
1829 return -ENOMEM;
1830 extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
1831 sizeof(struct extent_node));
1832 if (!extent_node_slab) {
1833 kmem_cache_destroy(extent_tree_slab);
1834 return -ENOMEM;
1835 }
1836 return 0;
1837}
1838
1839void destroy_extent_cache(void)
1840{
1841 kmem_cache_destroy(extent_node_slab);
1842 kmem_cache_destroy(extent_tree_slab);
1843}
1844
1239const struct address_space_operations f2fs_dblock_aops = { 1845const struct address_space_operations f2fs_dblock_aops = {
1240 .readpage = f2fs_read_data_page, 1846 .readpage = f2fs_read_data_page,
1241 .readpages = f2fs_read_data_pages, 1847 .readpages = f2fs_read_data_pages,