diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-03 16:10:22 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-09-03 16:10:22 -0400 |
commit | 4c12ab7e5e2e892fa94df500f96001837918a281 (patch) | |
tree | c0772bf31167593212f9adc53152f44c010f438b | |
parent | 9cbf22b37ae0592dea809cb8d424990774c21786 (diff) | |
parent | 01a5ad827a36e36f45e1fdb96903ea115f759865 (diff) |
Merge tag 'for-f2fs-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim:
"The major work includes fixing and enhancing the existing extent_cache
feature, which has been well settling down so far and now it becomes a
default mount option accordingly.
Also, this version newly registers a f2fs memory shrinker to reclaim
several objects consumed by a couple of data structures in order to
avoid memory pressures.
Another new feature is to add ioctl(F2FS_GARBAGE_COLLECT) which
triggers a cleaning job explicitly by users.
Most of the other patches are to fix bugs occurred in the corner cases
across the whole code area"
* tag 'for-f2fs-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (85 commits)
f2fs: upset segment_info repair
f2fs: avoid accessing NULL pointer in f2fs_drop_largest_extent
f2fs: update extent tree in batches
f2fs: fix to release inode correctly
f2fs: handle f2fs_truncate error correctly
f2fs: avoid unneeded initializing when converting inline dentry
f2fs: atomically set inode->i_flags
f2fs: fix wrong pointer access during try_to_free_nids
f2fs: use __GFP_NOFAIL to avoid infinite loop
f2fs: lookup neighbor extent nodes for merging later
f2fs: split __insert_extent_tree_ret for readability
f2fs: kill dead code in __insert_extent_tree
f2fs: adjust showing of extent cache stat
f2fs: add largest/cached stat in extent cache
f2fs: fix incorrect mapping for bmap
f2fs: add annotation for space utilization of regular/inline dentry
f2fs: fix to update cached_en of extent tree properly
f2fs: fix typo
f2fs: check the node block address of newly allocated nid
f2fs: go out for insert_inode_locked failure
...
-rw-r--r-- | Documentation/filesystems/f2fs.txt | 4 | ||||
-rw-r--r-- | MAINTAINERS | 2 | ||||
-rw-r--r-- | fs/f2fs/Kconfig | 2 | ||||
-rw-r--r-- | fs/f2fs/Makefile | 1 | ||||
-rw-r--r-- | fs/f2fs/checkpoint.c | 93 | ||||
-rw-r--r-- | fs/f2fs/crypto_key.c | 3 | ||||
-rw-r--r-- | fs/f2fs/data.c | 953 | ||||
-rw-r--r-- | fs/f2fs/debug.c | 30 | ||||
-rw-r--r-- | fs/f2fs/dir.c | 4 | ||||
-rw-r--r-- | fs/f2fs/extent_cache.c | 791 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 134 | ||||
-rw-r--r-- | fs/f2fs/file.c | 185 | ||||
-rw-r--r-- | fs/f2fs/gc.c | 81 | ||||
-rw-r--r-- | fs/f2fs/gc.h | 6 | ||||
-rw-r--r-- | fs/f2fs/inline.c | 23 | ||||
-rw-r--r-- | fs/f2fs/inode.c | 97 | ||||
-rw-r--r-- | fs/f2fs/namei.c | 21 | ||||
-rw-r--r-- | fs/f2fs/node.c | 86 | ||||
-rw-r--r-- | fs/f2fs/recovery.c | 43 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 78 | ||||
-rw-r--r-- | fs/f2fs/segment.h | 55 | ||||
-rw-r--r-- | fs/f2fs/shrinker.c | 139 | ||||
-rw-r--r-- | fs/f2fs/super.c | 65 | ||||
-rw-r--r-- | fs/f2fs/xattr.c | 5 | ||||
-rw-r--r-- | include/linux/f2fs_fs.h | 16 | ||||
-rw-r--r-- | include/trace/events/f2fs.h | 12 |
26 files changed, 1903 insertions, 1026 deletions
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index e9e750e59efc..e2d5105b7214 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt | |||
@@ -143,7 +143,9 @@ fastboot This option is used when a system wants to reduce mount | |||
143 | extent_cache Enable an extent cache based on rb-tree, it can cache | 143 | extent_cache Enable an extent cache based on rb-tree, it can cache |
144 | as many as extent which map between contiguous logical | 144 | as many as extent which map between contiguous logical |
145 | address and physical address per inode, resulting in | 145 | address and physical address per inode, resulting in |
146 | increasing the cache hit ratio. | 146 | increasing the cache hit ratio. Set by default. |
147 | noextent_cache Diable an extent cache based on rb-tree explicitly, see | ||
148 | the above extent_cache mount option. | ||
147 | noinline_data Disable the inline data feature, inline data feature is | 149 | noinline_data Disable the inline data feature, inline data feature is |
148 | enabled by default. | 150 | enabled by default. |
149 | 151 | ||
diff --git a/MAINTAINERS b/MAINTAINERS index 73db93cc55fd..205cd5d687e4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -4416,6 +4416,7 @@ F: include/linux/fscache*.h | |||
4416 | F2FS FILE SYSTEM | 4416 | F2FS FILE SYSTEM |
4417 | M: Jaegeuk Kim <jaegeuk@kernel.org> | 4417 | M: Jaegeuk Kim <jaegeuk@kernel.org> |
4418 | M: Changman Lee <cm224.lee@samsung.com> | 4418 | M: Changman Lee <cm224.lee@samsung.com> |
4419 | R: Chao Yu <chao2.yu@samsung.com> | ||
4419 | L: linux-f2fs-devel@lists.sourceforge.net | 4420 | L: linux-f2fs-devel@lists.sourceforge.net |
4420 | W: http://en.wikipedia.org/wiki/F2FS | 4421 | W: http://en.wikipedia.org/wiki/F2FS |
4421 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git | 4422 | T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git |
@@ -4424,6 +4425,7 @@ F: Documentation/filesystems/f2fs.txt | |||
4424 | F: Documentation/ABI/testing/sysfs-fs-f2fs | 4425 | F: Documentation/ABI/testing/sysfs-fs-f2fs |
4425 | F: fs/f2fs/ | 4426 | F: fs/f2fs/ |
4426 | F: include/linux/f2fs_fs.h | 4427 | F: include/linux/f2fs_fs.h |
4428 | F: include/trace/events/f2fs.h | ||
4427 | 4429 | ||
4428 | FUJITSU FR-V (FRV) PORT | 4430 | FUJITSU FR-V (FRV) PORT |
4429 | M: David Howells <dhowells@redhat.com> | 4431 | M: David Howells <dhowells@redhat.com> |
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index c629762005bc..b0a9dc929f88 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig | |||
@@ -45,7 +45,7 @@ config F2FS_FS_POSIX_ACL | |||
45 | default y | 45 | default y |
46 | help | 46 | help |
47 | Posix Access Control Lists (ACLs) support permissions for users and | 47 | Posix Access Control Lists (ACLs) support permissions for users and |
48 | gourps beyond the owner/group/world scheme. | 48 | groups beyond the owner/group/world scheme. |
49 | 49 | ||
50 | To learn more about Access Control Lists, visit the POSIX ACLs for | 50 | To learn more about Access Control Lists, visit the POSIX ACLs for |
51 | Linux website <http://acl.bestbits.at/>. | 51 | Linux website <http://acl.bestbits.at/>. |
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index 396be1a39e55..08e101ed914c 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile | |||
@@ -2,6 +2,7 @@ obj-$(CONFIG_F2FS_FS) += f2fs.o | |||
2 | 2 | ||
3 | f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o | 3 | f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o |
4 | f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o | 4 | f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o |
5 | f2fs-y += shrinker.o extent_cache.o | ||
5 | f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o | 6 | f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o |
6 | f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o | 7 | f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o |
7 | f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o | 8 | f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o |
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index b70bbe1a6a8c..c5a38e352a80 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c | |||
@@ -69,14 +69,24 @@ repeat: | |||
69 | 69 | ||
70 | fio.page = page; | 70 | fio.page = page; |
71 | 71 | ||
72 | if (f2fs_submit_page_bio(&fio)) | 72 | if (f2fs_submit_page_bio(&fio)) { |
73 | f2fs_put_page(page, 1); | ||
73 | goto repeat; | 74 | goto repeat; |
75 | } | ||
74 | 76 | ||
75 | lock_page(page); | 77 | lock_page(page); |
76 | if (unlikely(page->mapping != mapping)) { | 78 | if (unlikely(page->mapping != mapping)) { |
77 | f2fs_put_page(page, 1); | 79 | f2fs_put_page(page, 1); |
78 | goto repeat; | 80 | goto repeat; |
79 | } | 81 | } |
82 | |||
83 | /* | ||
84 | * if there is any IO error when accessing device, make our filesystem | ||
85 | * readonly and make sure do not write checkpoint with non-uptodate | ||
86 | * meta page. | ||
87 | */ | ||
88 | if (unlikely(!PageUptodate(page))) | ||
89 | f2fs_stop_checkpoint(sbi); | ||
80 | out: | 90 | out: |
81 | return page; | 91 | return page; |
82 | } | 92 | } |
@@ -326,26 +336,18 @@ const struct address_space_operations f2fs_meta_aops = { | |||
326 | static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) | 336 | static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) |
327 | { | 337 | { |
328 | struct inode_management *im = &sbi->im[type]; | 338 | struct inode_management *im = &sbi->im[type]; |
329 | struct ino_entry *e; | 339 | struct ino_entry *e, *tmp; |
340 | |||
341 | tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS); | ||
330 | retry: | 342 | retry: |
331 | if (radix_tree_preload(GFP_NOFS)) { | 343 | radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); |
332 | cond_resched(); | ||
333 | goto retry; | ||
334 | } | ||
335 | 344 | ||
336 | spin_lock(&im->ino_lock); | 345 | spin_lock(&im->ino_lock); |
337 | |||
338 | e = radix_tree_lookup(&im->ino_root, ino); | 346 | e = radix_tree_lookup(&im->ino_root, ino); |
339 | if (!e) { | 347 | if (!e) { |
340 | e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC); | 348 | e = tmp; |
341 | if (!e) { | ||
342 | spin_unlock(&im->ino_lock); | ||
343 | radix_tree_preload_end(); | ||
344 | goto retry; | ||
345 | } | ||
346 | if (radix_tree_insert(&im->ino_root, ino, e)) { | 349 | if (radix_tree_insert(&im->ino_root, ino, e)) { |
347 | spin_unlock(&im->ino_lock); | 350 | spin_unlock(&im->ino_lock); |
348 | kmem_cache_free(ino_entry_slab, e); | ||
349 | radix_tree_preload_end(); | 351 | radix_tree_preload_end(); |
350 | goto retry; | 352 | goto retry; |
351 | } | 353 | } |
@@ -358,6 +360,9 @@ retry: | |||
358 | } | 360 | } |
359 | spin_unlock(&im->ino_lock); | 361 | spin_unlock(&im->ino_lock); |
360 | radix_tree_preload_end(); | 362 | radix_tree_preload_end(); |
363 | |||
364 | if (e != tmp) | ||
365 | kmem_cache_free(ino_entry_slab, tmp); | ||
361 | } | 366 | } |
362 | 367 | ||
363 | static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) | 368 | static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) |
@@ -458,24 +463,34 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) | |||
458 | __remove_ino_entry(sbi, ino, ORPHAN_INO); | 463 | __remove_ino_entry(sbi, ino, ORPHAN_INO); |
459 | } | 464 | } |
460 | 465 | ||
461 | static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) | 466 | static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) |
462 | { | 467 | { |
463 | struct inode *inode = f2fs_iget(sbi->sb, ino); | 468 | struct inode *inode; |
464 | f2fs_bug_on(sbi, IS_ERR(inode)); | 469 | |
470 | inode = f2fs_iget(sbi->sb, ino); | ||
471 | if (IS_ERR(inode)) { | ||
472 | /* | ||
473 | * there should be a bug that we can't find the entry | ||
474 | * to orphan inode. | ||
475 | */ | ||
476 | f2fs_bug_on(sbi, PTR_ERR(inode) == -ENOENT); | ||
477 | return PTR_ERR(inode); | ||
478 | } | ||
479 | |||
465 | clear_nlink(inode); | 480 | clear_nlink(inode); |
466 | 481 | ||
467 | /* truncate all the data during iput */ | 482 | /* truncate all the data during iput */ |
468 | iput(inode); | 483 | iput(inode); |
484 | return 0; | ||
469 | } | 485 | } |
470 | 486 | ||
471 | void recover_orphan_inodes(struct f2fs_sb_info *sbi) | 487 | int recover_orphan_inodes(struct f2fs_sb_info *sbi) |
472 | { | 488 | { |
473 | block_t start_blk, orphan_blocks, i, j; | 489 | block_t start_blk, orphan_blocks, i, j; |
490 | int err; | ||
474 | 491 | ||
475 | if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) | 492 | if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) |
476 | return; | 493 | return 0; |
477 | |||
478 | set_sbi_flag(sbi, SBI_POR_DOING); | ||
479 | 494 | ||
480 | start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); | 495 | start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); |
481 | orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); | 496 | orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); |
@@ -489,14 +504,17 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi) | |||
489 | orphan_blk = (struct f2fs_orphan_block *)page_address(page); | 504 | orphan_blk = (struct f2fs_orphan_block *)page_address(page); |
490 | for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { | 505 | for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { |
491 | nid_t ino = le32_to_cpu(orphan_blk->ino[j]); | 506 | nid_t ino = le32_to_cpu(orphan_blk->ino[j]); |
492 | recover_orphan_inode(sbi, ino); | 507 | err = recover_orphan_inode(sbi, ino); |
508 | if (err) { | ||
509 | f2fs_put_page(page, 1); | ||
510 | return err; | ||
511 | } | ||
493 | } | 512 | } |
494 | f2fs_put_page(page, 1); | 513 | f2fs_put_page(page, 1); |
495 | } | 514 | } |
496 | /* clear Orphan Flag */ | 515 | /* clear Orphan Flag */ |
497 | clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); | 516 | clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); |
498 | clear_sbi_flag(sbi, SBI_POR_DOING); | 517 | return 0; |
499 | return; | ||
500 | } | 518 | } |
501 | 519 | ||
502 | static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) | 520 | static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) |
@@ -504,7 +522,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) | |||
504 | struct list_head *head; | 522 | struct list_head *head; |
505 | struct f2fs_orphan_block *orphan_blk = NULL; | 523 | struct f2fs_orphan_block *orphan_blk = NULL; |
506 | unsigned int nentries = 0; | 524 | unsigned int nentries = 0; |
507 | unsigned short index; | 525 | unsigned short index = 1; |
508 | unsigned short orphan_blocks; | 526 | unsigned short orphan_blocks; |
509 | struct page *page = NULL; | 527 | struct page *page = NULL; |
510 | struct ino_entry *orphan = NULL; | 528 | struct ino_entry *orphan = NULL; |
@@ -512,11 +530,6 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) | |||
512 | 530 | ||
513 | orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num); | 531 | orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num); |
514 | 532 | ||
515 | for (index = 0; index < orphan_blocks; index++) | ||
516 | grab_meta_page(sbi, start_blk + index); | ||
517 | |||
518 | index = 1; | ||
519 | |||
520 | /* | 533 | /* |
521 | * we don't need to do spin_lock(&im->ino_lock) here, since all the | 534 | * we don't need to do spin_lock(&im->ino_lock) here, since all the |
522 | * orphan inode operations are covered under f2fs_lock_op(). | 535 | * orphan inode operations are covered under f2fs_lock_op(). |
@@ -527,12 +540,10 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) | |||
527 | /* loop for each orphan inode entry and write them in Jornal block */ | 540 | /* loop for each orphan inode entry and write them in Jornal block */ |
528 | list_for_each_entry(orphan, head, list) { | 541 | list_for_each_entry(orphan, head, list) { |
529 | if (!page) { | 542 | if (!page) { |
530 | page = find_get_page(META_MAPPING(sbi), start_blk++); | 543 | page = grab_meta_page(sbi, start_blk++); |
531 | f2fs_bug_on(sbi, !page); | ||
532 | orphan_blk = | 544 | orphan_blk = |
533 | (struct f2fs_orphan_block *)page_address(page); | 545 | (struct f2fs_orphan_block *)page_address(page); |
534 | memset(orphan_blk, 0, sizeof(*orphan_blk)); | 546 | memset(orphan_blk, 0, sizeof(*orphan_blk)); |
535 | f2fs_put_page(page, 0); | ||
536 | } | 547 | } |
537 | 548 | ||
538 | orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); | 549 | orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); |
@@ -704,7 +715,8 @@ void update_dirty_page(struct inode *inode, struct page *page) | |||
704 | struct inode_entry *new; | 715 | struct inode_entry *new; |
705 | int ret = 0; | 716 | int ret = 0; |
706 | 717 | ||
707 | if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) | 718 | if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && |
719 | !S_ISLNK(inode->i_mode)) | ||
708 | return; | 720 | return; |
709 | 721 | ||
710 | if (!S_ISDIR(inode->i_mode)) { | 722 | if (!S_ISDIR(inode->i_mode)) { |
@@ -892,12 +904,15 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
892 | __u32 crc32 = 0; | 904 | __u32 crc32 = 0; |
893 | int i; | 905 | int i; |
894 | int cp_payload_blks = __cp_payload(sbi); | 906 | int cp_payload_blks = __cp_payload(sbi); |
907 | block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg); | ||
908 | bool invalidate = false; | ||
895 | 909 | ||
896 | /* | 910 | /* |
897 | * This avoids to conduct wrong roll-forward operations and uses | 911 | * This avoids to conduct wrong roll-forward operations and uses |
898 | * metapages, so should be called prior to sync_meta_pages below. | 912 | * metapages, so should be called prior to sync_meta_pages below. |
899 | */ | 913 | */ |
900 | discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg)); | 914 | if (discard_next_dnode(sbi, discard_blk)) |
915 | invalidate = true; | ||
901 | 916 | ||
902 | /* Flush all the NAT/SIT pages */ | 917 | /* Flush all the NAT/SIT pages */ |
903 | while (get_pages(sbi, F2FS_DIRTY_META)) { | 918 | while (get_pages(sbi, F2FS_DIRTY_META)) { |
@@ -1026,6 +1041,14 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) | |||
1026 | /* wait for previous submitted meta pages writeback */ | 1041 | /* wait for previous submitted meta pages writeback */ |
1027 | wait_on_all_pages_writeback(sbi); | 1042 | wait_on_all_pages_writeback(sbi); |
1028 | 1043 | ||
1044 | /* | ||
1045 | * invalidate meta page which is used temporarily for zeroing out | ||
1046 | * block at the end of warm node chain. | ||
1047 | */ | ||
1048 | if (invalidate) | ||
1049 | invalidate_mapping_pages(META_MAPPING(sbi), discard_blk, | ||
1050 | discard_blk); | ||
1051 | |||
1029 | release_dirty_inode(sbi); | 1052 | release_dirty_inode(sbi); |
1030 | 1053 | ||
1031 | if (unlikely(f2fs_cp_error(sbi))) | 1054 | if (unlikely(f2fs_cp_error(sbi))) |
diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c index 95b8f936f00b..9f77de2ef317 100644 --- a/fs/f2fs/crypto_key.c +++ b/fs/f2fs/crypto_key.c | |||
@@ -92,8 +92,7 @@ static void f2fs_free_crypt_info(struct f2fs_crypt_info *ci) | |||
92 | if (!ci) | 92 | if (!ci) |
93 | return; | 93 | return; |
94 | 94 | ||
95 | if (ci->ci_keyring_key) | 95 | key_put(ci->ci_keyring_key); |
96 | key_put(ci->ci_keyring_key); | ||
97 | crypto_free_ablkcipher(ci->ci_ctfm); | 96 | crypto_free_ablkcipher(ci->ci_ctfm); |
98 | kmem_cache_free(f2fs_crypt_info_cachep, ci); | 97 | kmem_cache_free(f2fs_crypt_info_cachep, ci); |
99 | } | 98 | } |
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c414d49aa2de..a82abe921b89 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/mpage.h> | 14 | #include <linux/mpage.h> |
15 | #include <linux/writeback.h> | 15 | #include <linux/writeback.h> |
16 | #include <linux/backing-dev.h> | 16 | #include <linux/backing-dev.h> |
17 | #include <linux/pagevec.h> | ||
17 | #include <linux/blkdev.h> | 18 | #include <linux/blkdev.h> |
18 | #include <linux/bio.h> | 19 | #include <linux/bio.h> |
19 | #include <linux/prefetch.h> | 20 | #include <linux/prefetch.h> |
@@ -26,9 +27,6 @@ | |||
26 | #include "trace.h" | 27 | #include "trace.h" |
27 | #include <trace/events/f2fs.h> | 28 | #include <trace/events/f2fs.h> |
28 | 29 | ||
29 | static struct kmem_cache *extent_tree_slab; | ||
30 | static struct kmem_cache *extent_node_slab; | ||
31 | |||
32 | static void f2fs_read_end_io(struct bio *bio) | 30 | static void f2fs_read_end_io(struct bio *bio) |
33 | { | 31 | { |
34 | struct bio_vec *bvec; | 32 | struct bio_vec *bvec; |
@@ -92,8 +90,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, | |||
92 | { | 90 | { |
93 | struct bio *bio; | 91 | struct bio *bio; |
94 | 92 | ||
95 | /* No failure on bio allocation */ | 93 | bio = f2fs_bio_alloc(npages); |
96 | bio = bio_alloc(GFP_NOIO, npages); | ||
97 | 94 | ||
98 | bio->bi_bdev = sbi->sb->s_bdev; | 95 | bio->bi_bdev = sbi->sb->s_bdev; |
99 | bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr); | 96 | bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr); |
@@ -158,7 +155,6 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) | |||
158 | 155 | ||
159 | if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { | 156 | if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { |
160 | bio_put(bio); | 157 | bio_put(bio); |
161 | f2fs_put_page(page, 1); | ||
162 | return -EFAULT; | 158 | return -EFAULT; |
163 | } | 159 | } |
164 | 160 | ||
@@ -266,645 +262,17 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) | |||
266 | return err; | 262 | return err; |
267 | } | 263 | } |
268 | 264 | ||
269 | static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs, | 265 | int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) |
270 | struct extent_info *ei) | ||
271 | { | ||
272 | struct f2fs_inode_info *fi = F2FS_I(inode); | ||
273 | pgoff_t start_fofs, end_fofs; | ||
274 | block_t start_blkaddr; | ||
275 | |||
276 | read_lock(&fi->ext_lock); | ||
277 | if (fi->ext.len == 0) { | ||
278 | read_unlock(&fi->ext_lock); | ||
279 | return false; | ||
280 | } | ||
281 | |||
282 | stat_inc_total_hit(inode->i_sb); | ||
283 | |||
284 | start_fofs = fi->ext.fofs; | ||
285 | end_fofs = fi->ext.fofs + fi->ext.len - 1; | ||
286 | start_blkaddr = fi->ext.blk; | ||
287 | |||
288 | if (pgofs >= start_fofs && pgofs <= end_fofs) { | ||
289 | *ei = fi->ext; | ||
290 | stat_inc_read_hit(inode->i_sb); | ||
291 | read_unlock(&fi->ext_lock); | ||
292 | return true; | ||
293 | } | ||
294 | read_unlock(&fi->ext_lock); | ||
295 | return false; | ||
296 | } | ||
297 | |||
298 | static bool update_extent_info(struct inode *inode, pgoff_t fofs, | ||
299 | block_t blkaddr) | ||
300 | { | 266 | { |
301 | struct f2fs_inode_info *fi = F2FS_I(inode); | ||
302 | pgoff_t start_fofs, end_fofs; | ||
303 | block_t start_blkaddr, end_blkaddr; | ||
304 | int need_update = true; | ||
305 | |||
306 | write_lock(&fi->ext_lock); | ||
307 | |||
308 | start_fofs = fi->ext.fofs; | ||
309 | end_fofs = fi->ext.fofs + fi->ext.len - 1; | ||
310 | start_blkaddr = fi->ext.blk; | ||
311 | end_blkaddr = fi->ext.blk + fi->ext.len - 1; | ||
312 | |||
313 | /* Drop and initialize the matched extent */ | ||
314 | if (fi->ext.len == 1 && fofs == start_fofs) | ||
315 | fi->ext.len = 0; | ||
316 | |||
317 | /* Initial extent */ | ||
318 | if (fi->ext.len == 0) { | ||
319 | if (blkaddr != NULL_ADDR) { | ||
320 | fi->ext.fofs = fofs; | ||
321 | fi->ext.blk = blkaddr; | ||
322 | fi->ext.len = 1; | ||
323 | } | ||
324 | goto end_update; | ||
325 | } | ||
326 | |||
327 | /* Front merge */ | ||
328 | if (fofs == start_fofs - 1 && blkaddr == start_blkaddr - 1) { | ||
329 | fi->ext.fofs--; | ||
330 | fi->ext.blk--; | ||
331 | fi->ext.len++; | ||
332 | goto end_update; | ||
333 | } | ||
334 | |||
335 | /* Back merge */ | ||
336 | if (fofs == end_fofs + 1 && blkaddr == end_blkaddr + 1) { | ||
337 | fi->ext.len++; | ||
338 | goto end_update; | ||
339 | } | ||
340 | |||
341 | /* Split the existing extent */ | ||
342 | if (fi->ext.len > 1 && | ||
343 | fofs >= start_fofs && fofs <= end_fofs) { | ||
344 | if ((end_fofs - fofs) < (fi->ext.len >> 1)) { | ||
345 | fi->ext.len = fofs - start_fofs; | ||
346 | } else { | ||
347 | fi->ext.fofs = fofs + 1; | ||
348 | fi->ext.blk = start_blkaddr + fofs - start_fofs + 1; | ||
349 | fi->ext.len -= fofs - start_fofs + 1; | ||
350 | } | ||
351 | } else { | ||
352 | need_update = false; | ||
353 | } | ||
354 | |||
355 | /* Finally, if the extent is very fragmented, let's drop the cache. */ | ||
356 | if (fi->ext.len < F2FS_MIN_EXTENT_LEN) { | ||
357 | fi->ext.len = 0; | ||
358 | set_inode_flag(fi, FI_NO_EXTENT); | ||
359 | need_update = true; | ||
360 | } | ||
361 | end_update: | ||
362 | write_unlock(&fi->ext_lock); | ||
363 | return need_update; | ||
364 | } | ||
365 | |||
366 | static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, | ||
367 | struct extent_tree *et, struct extent_info *ei, | ||
368 | struct rb_node *parent, struct rb_node **p) | ||
369 | { | ||
370 | struct extent_node *en; | ||
371 | |||
372 | en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC); | ||
373 | if (!en) | ||
374 | return NULL; | ||
375 | |||
376 | en->ei = *ei; | ||
377 | INIT_LIST_HEAD(&en->list); | ||
378 | |||
379 | rb_link_node(&en->rb_node, parent, p); | ||
380 | rb_insert_color(&en->rb_node, &et->root); | ||
381 | et->count++; | ||
382 | atomic_inc(&sbi->total_ext_node); | ||
383 | return en; | ||
384 | } | ||
385 | |||
386 | static void __detach_extent_node(struct f2fs_sb_info *sbi, | ||
387 | struct extent_tree *et, struct extent_node *en) | ||
388 | { | ||
389 | rb_erase(&en->rb_node, &et->root); | ||
390 | et->count--; | ||
391 | atomic_dec(&sbi->total_ext_node); | ||
392 | |||
393 | if (et->cached_en == en) | ||
394 | et->cached_en = NULL; | ||
395 | } | ||
396 | |||
397 | static struct extent_tree *__find_extent_tree(struct f2fs_sb_info *sbi, | ||
398 | nid_t ino) | ||
399 | { | ||
400 | struct extent_tree *et; | ||
401 | |||
402 | down_read(&sbi->extent_tree_lock); | ||
403 | et = radix_tree_lookup(&sbi->extent_tree_root, ino); | ||
404 | if (!et) { | ||
405 | up_read(&sbi->extent_tree_lock); | ||
406 | return NULL; | ||
407 | } | ||
408 | atomic_inc(&et->refcount); | ||
409 | up_read(&sbi->extent_tree_lock); | ||
410 | |||
411 | return et; | ||
412 | } | ||
413 | |||
414 | static struct extent_tree *__grab_extent_tree(struct inode *inode) | ||
415 | { | ||
416 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
417 | struct extent_tree *et; | ||
418 | nid_t ino = inode->i_ino; | ||
419 | |||
420 | down_write(&sbi->extent_tree_lock); | ||
421 | et = radix_tree_lookup(&sbi->extent_tree_root, ino); | ||
422 | if (!et) { | ||
423 | et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS); | ||
424 | f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et); | ||
425 | memset(et, 0, sizeof(struct extent_tree)); | ||
426 | et->ino = ino; | ||
427 | et->root = RB_ROOT; | ||
428 | et->cached_en = NULL; | ||
429 | rwlock_init(&et->lock); | ||
430 | atomic_set(&et->refcount, 0); | ||
431 | et->count = 0; | ||
432 | sbi->total_ext_tree++; | ||
433 | } | ||
434 | atomic_inc(&et->refcount); | ||
435 | up_write(&sbi->extent_tree_lock); | ||
436 | |||
437 | return et; | ||
438 | } | ||
439 | |||
440 | static struct extent_node *__lookup_extent_tree(struct extent_tree *et, | ||
441 | unsigned int fofs) | ||
442 | { | ||
443 | struct rb_node *node = et->root.rb_node; | ||
444 | struct extent_node *en; | ||
445 | |||
446 | if (et->cached_en) { | ||
447 | struct extent_info *cei = &et->cached_en->ei; | ||
448 | |||
449 | if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) | ||
450 | return et->cached_en; | ||
451 | } | ||
452 | |||
453 | while (node) { | ||
454 | en = rb_entry(node, struct extent_node, rb_node); | ||
455 | |||
456 | if (fofs < en->ei.fofs) { | ||
457 | node = node->rb_left; | ||
458 | } else if (fofs >= en->ei.fofs + en->ei.len) { | ||
459 | node = node->rb_right; | ||
460 | } else { | ||
461 | et->cached_en = en; | ||
462 | return en; | ||
463 | } | ||
464 | } | ||
465 | return NULL; | ||
466 | } | ||
467 | |||
468 | static struct extent_node *__try_back_merge(struct f2fs_sb_info *sbi, | ||
469 | struct extent_tree *et, struct extent_node *en) | ||
470 | { | ||
471 | struct extent_node *prev; | ||
472 | struct rb_node *node; | ||
473 | |||
474 | node = rb_prev(&en->rb_node); | ||
475 | if (!node) | ||
476 | return NULL; | ||
477 | |||
478 | prev = rb_entry(node, struct extent_node, rb_node); | ||
479 | if (__is_back_mergeable(&en->ei, &prev->ei)) { | ||
480 | en->ei.fofs = prev->ei.fofs; | ||
481 | en->ei.blk = prev->ei.blk; | ||
482 | en->ei.len += prev->ei.len; | ||
483 | __detach_extent_node(sbi, et, prev); | ||
484 | return prev; | ||
485 | } | ||
486 | return NULL; | ||
487 | } | ||
488 | |||
489 | static struct extent_node *__try_front_merge(struct f2fs_sb_info *sbi, | ||
490 | struct extent_tree *et, struct extent_node *en) | ||
491 | { | ||
492 | struct extent_node *next; | ||
493 | struct rb_node *node; | ||
494 | |||
495 | node = rb_next(&en->rb_node); | ||
496 | if (!node) | ||
497 | return NULL; | ||
498 | |||
499 | next = rb_entry(node, struct extent_node, rb_node); | ||
500 | if (__is_front_mergeable(&en->ei, &next->ei)) { | ||
501 | en->ei.len += next->ei.len; | ||
502 | __detach_extent_node(sbi, et, next); | ||
503 | return next; | ||
504 | } | ||
505 | return NULL; | ||
506 | } | ||
507 | |||
508 | static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, | ||
509 | struct extent_tree *et, struct extent_info *ei, | ||
510 | struct extent_node **den) | ||
511 | { | ||
512 | struct rb_node **p = &et->root.rb_node; | ||
513 | struct rb_node *parent = NULL; | ||
514 | struct extent_node *en; | ||
515 | |||
516 | while (*p) { | ||
517 | parent = *p; | ||
518 | en = rb_entry(parent, struct extent_node, rb_node); | ||
519 | |||
520 | if (ei->fofs < en->ei.fofs) { | ||
521 | if (__is_front_mergeable(ei, &en->ei)) { | ||
522 | f2fs_bug_on(sbi, !den); | ||
523 | en->ei.fofs = ei->fofs; | ||
524 | en->ei.blk = ei->blk; | ||
525 | en->ei.len += ei->len; | ||
526 | *den = __try_back_merge(sbi, et, en); | ||
527 | return en; | ||
528 | } | ||
529 | p = &(*p)->rb_left; | ||
530 | } else if (ei->fofs >= en->ei.fofs + en->ei.len) { | ||
531 | if (__is_back_mergeable(ei, &en->ei)) { | ||
532 | f2fs_bug_on(sbi, !den); | ||
533 | en->ei.len += ei->len; | ||
534 | *den = __try_front_merge(sbi, et, en); | ||
535 | return en; | ||
536 | } | ||
537 | p = &(*p)->rb_right; | ||
538 | } else { | ||
539 | f2fs_bug_on(sbi, 1); | ||
540 | } | ||
541 | } | ||
542 | |||
543 | return __attach_extent_node(sbi, et, ei, parent, p); | ||
544 | } | ||
545 | |||
546 | static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, | ||
547 | struct extent_tree *et, bool free_all) | ||
548 | { | ||
549 | struct rb_node *node, *next; | ||
550 | struct extent_node *en; | ||
551 | unsigned int count = et->count; | ||
552 | |||
553 | node = rb_first(&et->root); | ||
554 | while (node) { | ||
555 | next = rb_next(node); | ||
556 | en = rb_entry(node, struct extent_node, rb_node); | ||
557 | |||
558 | if (free_all) { | ||
559 | spin_lock(&sbi->extent_lock); | ||
560 | if (!list_empty(&en->list)) | ||
561 | list_del_init(&en->list); | ||
562 | spin_unlock(&sbi->extent_lock); | ||
563 | } | ||
564 | |||
565 | if (free_all || list_empty(&en->list)) { | ||
566 | __detach_extent_node(sbi, et, en); | ||
567 | kmem_cache_free(extent_node_slab, en); | ||
568 | } | ||
569 | node = next; | ||
570 | } | ||
571 | |||
572 | return count - et->count; | ||
573 | } | ||
574 | |||
575 | static void f2fs_init_extent_tree(struct inode *inode, | ||
576 | struct f2fs_extent *i_ext) | ||
577 | { | ||
578 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
579 | struct extent_tree *et; | ||
580 | struct extent_node *en; | ||
581 | struct extent_info ei; | 267 | struct extent_info ei; |
268 | struct inode *inode = dn->inode; | ||
582 | 269 | ||
583 | if (le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN) | 270 | if (f2fs_lookup_extent_cache(inode, index, &ei)) { |
584 | return; | 271 | dn->data_blkaddr = ei.blk + index - ei.fofs; |
585 | 272 | return 0; | |
586 | et = __grab_extent_tree(inode); | ||
587 | |||
588 | write_lock(&et->lock); | ||
589 | if (et->count) | ||
590 | goto out; | ||
591 | |||
592 | set_extent_info(&ei, le32_to_cpu(i_ext->fofs), | ||
593 | le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len)); | ||
594 | |||
595 | en = __insert_extent_tree(sbi, et, &ei, NULL); | ||
596 | if (en) { | ||
597 | et->cached_en = en; | ||
598 | |||
599 | spin_lock(&sbi->extent_lock); | ||
600 | list_add_tail(&en->list, &sbi->extent_list); | ||
601 | spin_unlock(&sbi->extent_lock); | ||
602 | } | ||
603 | out: | ||
604 | write_unlock(&et->lock); | ||
605 | atomic_dec(&et->refcount); | ||
606 | } | ||
607 | |||
608 | static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, | ||
609 | struct extent_info *ei) | ||
610 | { | ||
611 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
612 | struct extent_tree *et; | ||
613 | struct extent_node *en; | ||
614 | |||
615 | trace_f2fs_lookup_extent_tree_start(inode, pgofs); | ||
616 | |||
617 | et = __find_extent_tree(sbi, inode->i_ino); | ||
618 | if (!et) | ||
619 | return false; | ||
620 | |||
621 | read_lock(&et->lock); | ||
622 | en = __lookup_extent_tree(et, pgofs); | ||
623 | if (en) { | ||
624 | *ei = en->ei; | ||
625 | spin_lock(&sbi->extent_lock); | ||
626 | if (!list_empty(&en->list)) | ||
627 | list_move_tail(&en->list, &sbi->extent_list); | ||
628 | spin_unlock(&sbi->extent_lock); | ||
629 | stat_inc_read_hit(sbi->sb); | ||
630 | } | ||
631 | stat_inc_total_hit(sbi->sb); | ||
632 | read_unlock(&et->lock); | ||
633 | |||
634 | trace_f2fs_lookup_extent_tree_end(inode, pgofs, en); | ||
635 | |||
636 | atomic_dec(&et->refcount); | ||
637 | return en ? true : false; | ||
638 | } | ||
639 | |||
640 | static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs, | ||
641 | block_t blkaddr) | ||
642 | { | ||
643 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
644 | struct extent_tree *et; | ||
645 | struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL; | ||
646 | struct extent_node *den = NULL; | ||
647 | struct extent_info ei, dei; | ||
648 | unsigned int endofs; | ||
649 | |||
650 | trace_f2fs_update_extent_tree(inode, fofs, blkaddr); | ||
651 | |||
652 | et = __grab_extent_tree(inode); | ||
653 | |||
654 | write_lock(&et->lock); | ||
655 | |||
656 | /* 1. lookup and remove existing extent info in cache */ | ||
657 | en = __lookup_extent_tree(et, fofs); | ||
658 | if (!en) | ||
659 | goto update_extent; | ||
660 | |||
661 | dei = en->ei; | ||
662 | __detach_extent_node(sbi, et, en); | ||
663 | |||
664 | /* 2. if extent can be split more, split and insert the left part */ | ||
665 | if (dei.len > 1) { | ||
666 | /* insert left part of split extent into cache */ | ||
667 | if (fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN) { | ||
668 | set_extent_info(&ei, dei.fofs, dei.blk, | ||
669 | fofs - dei.fofs); | ||
670 | en1 = __insert_extent_tree(sbi, et, &ei, NULL); | ||
671 | } | ||
672 | |||
673 | /* insert right part of split extent into cache */ | ||
674 | endofs = dei.fofs + dei.len - 1; | ||
675 | if (endofs - fofs >= F2FS_MIN_EXTENT_LEN) { | ||
676 | set_extent_info(&ei, fofs + 1, | ||
677 | fofs - dei.fofs + dei.blk, endofs - fofs); | ||
678 | en2 = __insert_extent_tree(sbi, et, &ei, NULL); | ||
679 | } | ||
680 | } | ||
681 | |||
682 | update_extent: | ||
683 | /* 3. update extent in extent cache */ | ||
684 | if (blkaddr) { | ||
685 | set_extent_info(&ei, fofs, blkaddr, 1); | ||
686 | en3 = __insert_extent_tree(sbi, et, &ei, &den); | ||
687 | } | ||
688 | |||
689 | /* 4. update in global extent list */ | ||
690 | spin_lock(&sbi->extent_lock); | ||
691 | if (en && !list_empty(&en->list)) | ||
692 | list_del(&en->list); | ||
693 | /* | ||
694 | * en1 and en2 split from en, they will become more and more smaller | ||
695 | * fragments after splitting several times. So if the length is smaller | ||
696 | * than F2FS_MIN_EXTENT_LEN, we will not add them into extent tree. | ||
697 | */ | ||
698 | if (en1) | ||
699 | list_add_tail(&en1->list, &sbi->extent_list); | ||
700 | if (en2) | ||
701 | list_add_tail(&en2->list, &sbi->extent_list); | ||
702 | if (en3) { | ||
703 | if (list_empty(&en3->list)) | ||
704 | list_add_tail(&en3->list, &sbi->extent_list); | ||
705 | else | ||
706 | list_move_tail(&en3->list, &sbi->extent_list); | ||
707 | } | ||
708 | if (den && !list_empty(&den->list)) | ||
709 | list_del(&den->list); | ||
710 | spin_unlock(&sbi->extent_lock); | ||
711 | |||
712 | /* 5. release extent node */ | ||
713 | if (en) | ||
714 | kmem_cache_free(extent_node_slab, en); | ||
715 | if (den) | ||
716 | kmem_cache_free(extent_node_slab, den); | ||
717 | |||
718 | write_unlock(&et->lock); | ||
719 | atomic_dec(&et->refcount); | ||
720 | } | ||
721 | |||
722 | void f2fs_preserve_extent_tree(struct inode *inode) | ||
723 | { | ||
724 | struct extent_tree *et; | ||
725 | struct extent_info *ext = &F2FS_I(inode)->ext; | ||
726 | bool sync = false; | ||
727 | |||
728 | if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) | ||
729 | return; | ||
730 | |||
731 | et = __find_extent_tree(F2FS_I_SB(inode), inode->i_ino); | ||
732 | if (!et) { | ||
733 | if (ext->len) { | ||
734 | ext->len = 0; | ||
735 | update_inode_page(inode); | ||
736 | } | ||
737 | return; | ||
738 | } | ||
739 | |||
740 | read_lock(&et->lock); | ||
741 | if (et->count) { | ||
742 | struct extent_node *en; | ||
743 | |||
744 | if (et->cached_en) { | ||
745 | en = et->cached_en; | ||
746 | } else { | ||
747 | struct rb_node *node = rb_first(&et->root); | ||
748 | |||
749 | if (!node) | ||
750 | node = rb_last(&et->root); | ||
751 | en = rb_entry(node, struct extent_node, rb_node); | ||
752 | } | ||
753 | |||
754 | if (__is_extent_same(ext, &en->ei)) | ||
755 | goto out; | ||
756 | |||
757 | *ext = en->ei; | ||
758 | sync = true; | ||
759 | } else if (ext->len) { | ||
760 | ext->len = 0; | ||
761 | sync = true; | ||
762 | } | ||
763 | out: | ||
764 | read_unlock(&et->lock); | ||
765 | atomic_dec(&et->refcount); | ||
766 | |||
767 | if (sync) | ||
768 | update_inode_page(inode); | ||
769 | } | ||
770 | |||
771 | void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) | ||
772 | { | ||
773 | struct extent_tree *treevec[EXT_TREE_VEC_SIZE]; | ||
774 | struct extent_node *en, *tmp; | ||
775 | unsigned long ino = F2FS_ROOT_INO(sbi); | ||
776 | struct radix_tree_iter iter; | ||
777 | void **slot; | ||
778 | unsigned int found; | ||
779 | unsigned int node_cnt = 0, tree_cnt = 0; | ||
780 | |||
781 | if (!test_opt(sbi, EXTENT_CACHE)) | ||
782 | return; | ||
783 | |||
784 | if (available_free_memory(sbi, EXTENT_CACHE)) | ||
785 | return; | ||
786 | |||
787 | spin_lock(&sbi->extent_lock); | ||
788 | list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) { | ||
789 | if (!nr_shrink--) | ||
790 | break; | ||
791 | list_del_init(&en->list); | ||
792 | } | ||
793 | spin_unlock(&sbi->extent_lock); | ||
794 | |||
795 | down_read(&sbi->extent_tree_lock); | ||
796 | while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root, | ||
797 | (void **)treevec, ino, EXT_TREE_VEC_SIZE))) { | ||
798 | unsigned i; | ||
799 | |||
800 | ino = treevec[found - 1]->ino + 1; | ||
801 | for (i = 0; i < found; i++) { | ||
802 | struct extent_tree *et = treevec[i]; | ||
803 | |||
804 | atomic_inc(&et->refcount); | ||
805 | write_lock(&et->lock); | ||
806 | node_cnt += __free_extent_tree(sbi, et, false); | ||
807 | write_unlock(&et->lock); | ||
808 | atomic_dec(&et->refcount); | ||
809 | } | ||
810 | } | ||
811 | up_read(&sbi->extent_tree_lock); | ||
812 | |||
813 | down_write(&sbi->extent_tree_lock); | ||
814 | radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter, | ||
815 | F2FS_ROOT_INO(sbi)) { | ||
816 | struct extent_tree *et = (struct extent_tree *)*slot; | ||
817 | |||
818 | if (!atomic_read(&et->refcount) && !et->count) { | ||
819 | radix_tree_delete(&sbi->extent_tree_root, et->ino); | ||
820 | kmem_cache_free(extent_tree_slab, et); | ||
821 | sbi->total_ext_tree--; | ||
822 | tree_cnt++; | ||
823 | } | ||
824 | } | ||
825 | up_write(&sbi->extent_tree_lock); | ||
826 | |||
827 | trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt); | ||
828 | } | ||
829 | |||
830 | void f2fs_destroy_extent_tree(struct inode *inode) | ||
831 | { | ||
832 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
833 | struct extent_tree *et; | ||
834 | unsigned int node_cnt = 0; | ||
835 | |||
836 | if (!test_opt(sbi, EXTENT_CACHE)) | ||
837 | return; | ||
838 | |||
839 | et = __find_extent_tree(sbi, inode->i_ino); | ||
840 | if (!et) | ||
841 | goto out; | ||
842 | |||
843 | /* free all extent info belong to this extent tree */ | ||
844 | write_lock(&et->lock); | ||
845 | node_cnt = __free_extent_tree(sbi, et, true); | ||
846 | write_unlock(&et->lock); | ||
847 | |||
848 | atomic_dec(&et->refcount); | ||
849 | |||
850 | /* try to find and delete extent tree entry in radix tree */ | ||
851 | down_write(&sbi->extent_tree_lock); | ||
852 | et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino); | ||
853 | if (!et) { | ||
854 | up_write(&sbi->extent_tree_lock); | ||
855 | goto out; | ||
856 | } | 273 | } |
857 | f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count); | ||
858 | radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); | ||
859 | kmem_cache_free(extent_tree_slab, et); | ||
860 | sbi->total_ext_tree--; | ||
861 | up_write(&sbi->extent_tree_lock); | ||
862 | out: | ||
863 | trace_f2fs_destroy_extent_tree(inode, node_cnt); | ||
864 | return; | ||
865 | } | ||
866 | |||
867 | void f2fs_init_extent_cache(struct inode *inode, struct f2fs_extent *i_ext) | ||
868 | { | ||
869 | if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) | ||
870 | f2fs_init_extent_tree(inode, i_ext); | ||
871 | |||
872 | write_lock(&F2FS_I(inode)->ext_lock); | ||
873 | get_extent_info(&F2FS_I(inode)->ext, *i_ext); | ||
874 | write_unlock(&F2FS_I(inode)->ext_lock); | ||
875 | } | ||
876 | 274 | ||
877 | static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, | 275 | return f2fs_reserve_block(dn, index); |
878 | struct extent_info *ei) | ||
879 | { | ||
880 | if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) | ||
881 | return false; | ||
882 | |||
883 | if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) | ||
884 | return f2fs_lookup_extent_tree(inode, pgofs, ei); | ||
885 | |||
886 | return lookup_extent_info(inode, pgofs, ei); | ||
887 | } | ||
888 | |||
889 | void f2fs_update_extent_cache(struct dnode_of_data *dn) | ||
890 | { | ||
891 | struct f2fs_inode_info *fi = F2FS_I(dn->inode); | ||
892 | pgoff_t fofs; | ||
893 | |||
894 | f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); | ||
895 | |||
896 | if (is_inode_flag_set(fi, FI_NO_EXTENT)) | ||
897 | return; | ||
898 | |||
899 | fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + | ||
900 | dn->ofs_in_node; | ||
901 | |||
902 | if (test_opt(F2FS_I_SB(dn->inode), EXTENT_CACHE)) | ||
903 | return f2fs_update_extent_tree(dn->inode, fofs, | ||
904 | dn->data_blkaddr); | ||
905 | |||
906 | if (update_extent_info(dn->inode, fofs, dn->data_blkaddr)) | ||
907 | sync_inode_page(dn); | ||
908 | } | 276 | } |
909 | 277 | ||
910 | struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw) | 278 | struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw) |
@@ -935,15 +303,13 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw) | |||
935 | 303 | ||
936 | set_new_dnode(&dn, inode, NULL, NULL, 0); | 304 | set_new_dnode(&dn, inode, NULL, NULL, 0); |
937 | err = get_dnode_of_data(&dn, index, LOOKUP_NODE); | 305 | err = get_dnode_of_data(&dn, index, LOOKUP_NODE); |
938 | if (err) { | 306 | if (err) |
939 | f2fs_put_page(page, 1); | 307 | goto put_err; |
940 | return ERR_PTR(err); | ||
941 | } | ||
942 | f2fs_put_dnode(&dn); | 308 | f2fs_put_dnode(&dn); |
943 | 309 | ||
944 | if (unlikely(dn.data_blkaddr == NULL_ADDR)) { | 310 | if (unlikely(dn.data_blkaddr == NULL_ADDR)) { |
945 | f2fs_put_page(page, 1); | 311 | err = -ENOENT; |
946 | return ERR_PTR(-ENOENT); | 312 | goto put_err; |
947 | } | 313 | } |
948 | got_it: | 314 | got_it: |
949 | if (PageUptodate(page)) { | 315 | if (PageUptodate(page)) { |
@@ -968,8 +334,12 @@ got_it: | |||
968 | fio.page = page; | 334 | fio.page = page; |
969 | err = f2fs_submit_page_bio(&fio); | 335 | err = f2fs_submit_page_bio(&fio); |
970 | if (err) | 336 | if (err) |
971 | return ERR_PTR(err); | 337 | goto put_err; |
972 | return page; | 338 | return page; |
339 | |||
340 | put_err: | ||
341 | f2fs_put_page(page, 1); | ||
342 | return ERR_PTR(err); | ||
973 | } | 343 | } |
974 | 344 | ||
975 | struct page *find_data_page(struct inode *inode, pgoff_t index) | 345 | struct page *find_data_page(struct inode *inode, pgoff_t index) |
@@ -1030,7 +400,8 @@ repeat: | |||
1030 | * | 400 | * |
1031 | * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and | 401 | * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and |
1032 | * f2fs_unlock_op(). | 402 | * f2fs_unlock_op(). |
1033 | * Note that, ipage is set only by make_empty_dir. | 403 | * Note that, ipage is set only by make_empty_dir, and if any error occur, |
404 | * ipage should be released by this function. | ||
1034 | */ | 405 | */ |
1035 | struct page *get_new_data_page(struct inode *inode, | 406 | struct page *get_new_data_page(struct inode *inode, |
1036 | struct page *ipage, pgoff_t index, bool new_i_size) | 407 | struct page *ipage, pgoff_t index, bool new_i_size) |
@@ -1041,8 +412,14 @@ struct page *get_new_data_page(struct inode *inode, | |||
1041 | int err; | 412 | int err; |
1042 | repeat: | 413 | repeat: |
1043 | page = grab_cache_page(mapping, index); | 414 | page = grab_cache_page(mapping, index); |
1044 | if (!page) | 415 | if (!page) { |
416 | /* | ||
417 | * before exiting, we should make sure ipage will be released | ||
418 | * if any error occur. | ||
419 | */ | ||
420 | f2fs_put_page(ipage, 1); | ||
1045 | return ERR_PTR(-ENOMEM); | 421 | return ERR_PTR(-ENOMEM); |
422 | } | ||
1046 | 423 | ||
1047 | set_new_dnode(&dn, inode, ipage, NULL, 0); | 424 | set_new_dnode(&dn, inode, ipage, NULL, 0); |
1048 | err = f2fs_reserve_block(&dn, index); | 425 | err = f2fs_reserve_block(&dn, index); |
@@ -1107,8 +484,6 @@ alloc: | |||
1107 | 484 | ||
1108 | allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr, | 485 | allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr, |
1109 | &sum, seg); | 486 | &sum, seg); |
1110 | |||
1111 | /* direct IO doesn't use extent cache to maximize the performance */ | ||
1112 | set_data_blkaddr(dn); | 487 | set_data_blkaddr(dn); |
1113 | 488 | ||
1114 | /* update i_size */ | 489 | /* update i_size */ |
@@ -1117,6 +492,9 @@ alloc: | |||
1117 | if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT)) | 492 | if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT)) |
1118 | i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT)); | 493 | i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT)); |
1119 | 494 | ||
495 | /* direct IO doesn't use extent cache to maximize the performance */ | ||
496 | f2fs_drop_largest_extent(dn->inode, fofs); | ||
497 | |||
1120 | return 0; | 498 | return 0; |
1121 | } | 499 | } |
1122 | 500 | ||
@@ -1183,7 +561,7 @@ out: | |||
1183 | * c. give the block addresses to blockdev | 561 | * c. give the block addresses to blockdev |
1184 | */ | 562 | */ |
1185 | static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, | 563 | static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, |
1186 | int create, bool fiemap) | 564 | int create, int flag) |
1187 | { | 565 | { |
1188 | unsigned int maxblocks = map->m_len; | 566 | unsigned int maxblocks = map->m_len; |
1189 | struct dnode_of_data dn; | 567 | struct dnode_of_data dn; |
@@ -1217,8 +595,19 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, | |||
1217 | err = 0; | 595 | err = 0; |
1218 | goto unlock_out; | 596 | goto unlock_out; |
1219 | } | 597 | } |
1220 | if (dn.data_blkaddr == NEW_ADDR && !fiemap) | 598 | if (dn.data_blkaddr == NEW_ADDR) { |
1221 | goto put_out; | 599 | if (flag == F2FS_GET_BLOCK_BMAP) { |
600 | err = -ENOENT; | ||
601 | goto put_out; | ||
602 | } else if (flag == F2FS_GET_BLOCK_READ || | ||
603 | flag == F2FS_GET_BLOCK_DIO) { | ||
604 | goto put_out; | ||
605 | } | ||
606 | /* | ||
607 | * if it is in fiemap call path (flag = F2FS_GET_BLOCK_FIEMAP), | ||
608 | * mark it as mapped and unwritten block. | ||
609 | */ | ||
610 | } | ||
1222 | 611 | ||
1223 | if (dn.data_blkaddr != NULL_ADDR) { | 612 | if (dn.data_blkaddr != NULL_ADDR) { |
1224 | map->m_flags = F2FS_MAP_MAPPED; | 613 | map->m_flags = F2FS_MAP_MAPPED; |
@@ -1233,6 +622,8 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, | |||
1233 | map->m_flags = F2FS_MAP_NEW | F2FS_MAP_MAPPED; | 622 | map->m_flags = F2FS_MAP_NEW | F2FS_MAP_MAPPED; |
1234 | map->m_pblk = dn.data_blkaddr; | 623 | map->m_pblk = dn.data_blkaddr; |
1235 | } else { | 624 | } else { |
625 | if (flag == F2FS_GET_BLOCK_BMAP) | ||
626 | err = -ENOENT; | ||
1236 | goto put_out; | 627 | goto put_out; |
1237 | } | 628 | } |
1238 | 629 | ||
@@ -1255,7 +646,9 @@ get_next: | |||
1255 | err = 0; | 646 | err = 0; |
1256 | goto unlock_out; | 647 | goto unlock_out; |
1257 | } | 648 | } |
1258 | if (dn.data_blkaddr == NEW_ADDR && !fiemap) | 649 | |
650 | if (dn.data_blkaddr == NEW_ADDR && | ||
651 | flag != F2FS_GET_BLOCK_FIEMAP) | ||
1259 | goto put_out; | 652 | goto put_out; |
1260 | 653 | ||
1261 | end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); | 654 | end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); |
@@ -1297,7 +690,7 @@ out: | |||
1297 | } | 690 | } |
1298 | 691 | ||
1299 | static int __get_data_block(struct inode *inode, sector_t iblock, | 692 | static int __get_data_block(struct inode *inode, sector_t iblock, |
1300 | struct buffer_head *bh, int create, bool fiemap) | 693 | struct buffer_head *bh, int create, int flag) |
1301 | { | 694 | { |
1302 | struct f2fs_map_blocks map; | 695 | struct f2fs_map_blocks map; |
1303 | int ret; | 696 | int ret; |
@@ -1305,7 +698,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock, | |||
1305 | map.m_lblk = iblock; | 698 | map.m_lblk = iblock; |
1306 | map.m_len = bh->b_size >> inode->i_blkbits; | 699 | map.m_len = bh->b_size >> inode->i_blkbits; |
1307 | 700 | ||
1308 | ret = f2fs_map_blocks(inode, &map, create, fiemap); | 701 | ret = f2fs_map_blocks(inode, &map, create, flag); |
1309 | if (!ret) { | 702 | if (!ret) { |
1310 | map_bh(bh, inode->i_sb, map.m_pblk); | 703 | map_bh(bh, inode->i_sb, map.m_pblk); |
1311 | bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags; | 704 | bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags; |
@@ -1315,15 +708,23 @@ static int __get_data_block(struct inode *inode, sector_t iblock, | |||
1315 | } | 708 | } |
1316 | 709 | ||
1317 | static int get_data_block(struct inode *inode, sector_t iblock, | 710 | static int get_data_block(struct inode *inode, sector_t iblock, |
711 | struct buffer_head *bh_result, int create, int flag) | ||
712 | { | ||
713 | return __get_data_block(inode, iblock, bh_result, create, flag); | ||
714 | } | ||
715 | |||
716 | static int get_data_block_dio(struct inode *inode, sector_t iblock, | ||
1318 | struct buffer_head *bh_result, int create) | 717 | struct buffer_head *bh_result, int create) |
1319 | { | 718 | { |
1320 | return __get_data_block(inode, iblock, bh_result, create, false); | 719 | return __get_data_block(inode, iblock, bh_result, create, |
720 | F2FS_GET_BLOCK_DIO); | ||
1321 | } | 721 | } |
1322 | 722 | ||
1323 | static int get_data_block_fiemap(struct inode *inode, sector_t iblock, | 723 | static int get_data_block_bmap(struct inode *inode, sector_t iblock, |
1324 | struct buffer_head *bh_result, int create) | 724 | struct buffer_head *bh_result, int create) |
1325 | { | 725 | { |
1326 | return __get_data_block(inode, iblock, bh_result, create, true); | 726 | return __get_data_block(inode, iblock, bh_result, create, |
727 | F2FS_GET_BLOCK_BMAP); | ||
1327 | } | 728 | } |
1328 | 729 | ||
1329 | static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) | 730 | static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) |
@@ -1367,7 +768,8 @@ next: | |||
1367 | memset(&map_bh, 0, sizeof(struct buffer_head)); | 768 | memset(&map_bh, 0, sizeof(struct buffer_head)); |
1368 | map_bh.b_size = len; | 769 | map_bh.b_size = len; |
1369 | 770 | ||
1370 | ret = get_data_block_fiemap(inode, start_blk, &map_bh, 0); | 771 | ret = get_data_block(inode, start_blk, &map_bh, 0, |
772 | F2FS_GET_BLOCK_FIEMAP); | ||
1371 | if (ret) | 773 | if (ret) |
1372 | goto out; | 774 | goto out; |
1373 | 775 | ||
@@ -1770,6 +1172,137 @@ static int __f2fs_writepage(struct page *page, struct writeback_control *wbc, | |||
1770 | return ret; | 1172 | return ret; |
1771 | } | 1173 | } |
1772 | 1174 | ||
1175 | /* | ||
1176 | * This function was copied from write_cche_pages from mm/page-writeback.c. | ||
1177 | * The major change is making write step of cold data page separately from | ||
1178 | * warm/hot data page. | ||
1179 | */ | ||
1180 | static int f2fs_write_cache_pages(struct address_space *mapping, | ||
1181 | struct writeback_control *wbc, writepage_t writepage, | ||
1182 | void *data) | ||
1183 | { | ||
1184 | int ret = 0; | ||
1185 | int done = 0; | ||
1186 | struct pagevec pvec; | ||
1187 | int nr_pages; | ||
1188 | pgoff_t uninitialized_var(writeback_index); | ||
1189 | pgoff_t index; | ||
1190 | pgoff_t end; /* Inclusive */ | ||
1191 | pgoff_t done_index; | ||
1192 | int cycled; | ||
1193 | int range_whole = 0; | ||
1194 | int tag; | ||
1195 | int step = 0; | ||
1196 | |||
1197 | pagevec_init(&pvec, 0); | ||
1198 | next: | ||
1199 | if (wbc->range_cyclic) { | ||
1200 | writeback_index = mapping->writeback_index; /* prev offset */ | ||
1201 | index = writeback_index; | ||
1202 | if (index == 0) | ||
1203 | cycled = 1; | ||
1204 | else | ||
1205 | cycled = 0; | ||
1206 | end = -1; | ||
1207 | } else { | ||
1208 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
1209 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
1210 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | ||
1211 | range_whole = 1; | ||
1212 | cycled = 1; /* ignore range_cyclic tests */ | ||
1213 | } | ||
1214 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) | ||
1215 | tag = PAGECACHE_TAG_TOWRITE; | ||
1216 | else | ||
1217 | tag = PAGECACHE_TAG_DIRTY; | ||
1218 | retry: | ||
1219 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) | ||
1220 | tag_pages_for_writeback(mapping, index, end); | ||
1221 | done_index = index; | ||
1222 | while (!done && (index <= end)) { | ||
1223 | int i; | ||
1224 | |||
1225 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | ||
1226 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1); | ||
1227 | if (nr_pages == 0) | ||
1228 | break; | ||
1229 | |||
1230 | for (i = 0; i < nr_pages; i++) { | ||
1231 | struct page *page = pvec.pages[i]; | ||
1232 | |||
1233 | if (page->index > end) { | ||
1234 | done = 1; | ||
1235 | break; | ||
1236 | } | ||
1237 | |||
1238 | done_index = page->index; | ||
1239 | |||
1240 | lock_page(page); | ||
1241 | |||
1242 | if (unlikely(page->mapping != mapping)) { | ||
1243 | continue_unlock: | ||
1244 | unlock_page(page); | ||
1245 | continue; | ||
1246 | } | ||
1247 | |||
1248 | if (!PageDirty(page)) { | ||
1249 | /* someone wrote it for us */ | ||
1250 | goto continue_unlock; | ||
1251 | } | ||
1252 | |||
1253 | if (step == is_cold_data(page)) | ||
1254 | goto continue_unlock; | ||
1255 | |||
1256 | if (PageWriteback(page)) { | ||
1257 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
1258 | f2fs_wait_on_page_writeback(page, DATA); | ||
1259 | else | ||
1260 | goto continue_unlock; | ||
1261 | } | ||
1262 | |||
1263 | BUG_ON(PageWriteback(page)); | ||
1264 | if (!clear_page_dirty_for_io(page)) | ||
1265 | goto continue_unlock; | ||
1266 | |||
1267 | ret = (*writepage)(page, wbc, data); | ||
1268 | if (unlikely(ret)) { | ||
1269 | if (ret == AOP_WRITEPAGE_ACTIVATE) { | ||
1270 | unlock_page(page); | ||
1271 | ret = 0; | ||
1272 | } else { | ||
1273 | done_index = page->index + 1; | ||
1274 | done = 1; | ||
1275 | break; | ||
1276 | } | ||
1277 | } | ||
1278 | |||
1279 | if (--wbc->nr_to_write <= 0 && | ||
1280 | wbc->sync_mode == WB_SYNC_NONE) { | ||
1281 | done = 1; | ||
1282 | break; | ||
1283 | } | ||
1284 | } | ||
1285 | pagevec_release(&pvec); | ||
1286 | cond_resched(); | ||
1287 | } | ||
1288 | |||
1289 | if (step < 1) { | ||
1290 | step++; | ||
1291 | goto next; | ||
1292 | } | ||
1293 | |||
1294 | if (!cycled && !done) { | ||
1295 | cycled = 1; | ||
1296 | index = 0; | ||
1297 | end = writeback_index - 1; | ||
1298 | goto retry; | ||
1299 | } | ||
1300 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | ||
1301 | mapping->writeback_index = done_index; | ||
1302 | |||
1303 | return ret; | ||
1304 | } | ||
1305 | |||
1773 | static int f2fs_write_data_pages(struct address_space *mapping, | 1306 | static int f2fs_write_data_pages(struct address_space *mapping, |
1774 | struct writeback_control *wbc) | 1307 | struct writeback_control *wbc) |
1775 | { | 1308 | { |
@@ -1785,6 +1318,10 @@ static int f2fs_write_data_pages(struct address_space *mapping, | |||
1785 | if (!mapping->a_ops->writepage) | 1318 | if (!mapping->a_ops->writepage) |
1786 | return 0; | 1319 | return 0; |
1787 | 1320 | ||
1321 | /* skip writing if there is no dirty page in this inode */ | ||
1322 | if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE) | ||
1323 | return 0; | ||
1324 | |||
1788 | if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && | 1325 | if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && |
1789 | get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) && | 1326 | get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) && |
1790 | available_free_memory(sbi, DIRTY_DENTS)) | 1327 | available_free_memory(sbi, DIRTY_DENTS)) |
@@ -1800,12 +1337,11 @@ static int f2fs_write_data_pages(struct address_space *mapping, | |||
1800 | mutex_lock(&sbi->writepages); | 1337 | mutex_lock(&sbi->writepages); |
1801 | locked = true; | 1338 | locked = true; |
1802 | } | 1339 | } |
1803 | ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); | 1340 | ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); |
1341 | f2fs_submit_merged_bio(sbi, DATA, WRITE); | ||
1804 | if (locked) | 1342 | if (locked) |
1805 | mutex_unlock(&sbi->writepages); | 1343 | mutex_unlock(&sbi->writepages); |
1806 | 1344 | ||
1807 | f2fs_submit_merged_bio(sbi, DATA, WRITE); | ||
1808 | |||
1809 | remove_dirty_dir_inode(inode); | 1345 | remove_dirty_dir_inode(inode); |
1810 | 1346 | ||
1811 | wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); | 1347 | wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); |
@@ -1832,7 +1368,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, | |||
1832 | { | 1368 | { |
1833 | struct inode *inode = mapping->host; | 1369 | struct inode *inode = mapping->host; |
1834 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | 1370 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
1835 | struct page *page, *ipage; | 1371 | struct page *page = NULL; |
1372 | struct page *ipage; | ||
1836 | pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; | 1373 | pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; |
1837 | struct dnode_of_data dn; | 1374 | struct dnode_of_data dn; |
1838 | int err = 0; | 1375 | int err = 0; |
@@ -1882,25 +1419,28 @@ repeat: | |||
1882 | if (err) | 1419 | if (err) |
1883 | goto put_fail; | 1420 | goto put_fail; |
1884 | } | 1421 | } |
1885 | err = f2fs_reserve_block(&dn, index); | 1422 | |
1423 | err = f2fs_get_block(&dn, index); | ||
1886 | if (err) | 1424 | if (err) |
1887 | goto put_fail; | 1425 | goto put_fail; |
1888 | put_next: | 1426 | put_next: |
1889 | f2fs_put_dnode(&dn); | 1427 | f2fs_put_dnode(&dn); |
1890 | f2fs_unlock_op(sbi); | 1428 | f2fs_unlock_op(sbi); |
1891 | 1429 | ||
1892 | if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) | ||
1893 | return 0; | ||
1894 | |||
1895 | f2fs_wait_on_page_writeback(page, DATA); | 1430 | f2fs_wait_on_page_writeback(page, DATA); |
1896 | 1431 | ||
1432 | if (len == PAGE_CACHE_SIZE) | ||
1433 | goto out_update; | ||
1434 | if (PageUptodate(page)) | ||
1435 | goto out_clear; | ||
1436 | |||
1897 | if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { | 1437 | if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { |
1898 | unsigned start = pos & (PAGE_CACHE_SIZE - 1); | 1438 | unsigned start = pos & (PAGE_CACHE_SIZE - 1); |
1899 | unsigned end = start + len; | 1439 | unsigned end = start + len; |
1900 | 1440 | ||
1901 | /* Reading beyond i_size is simple: memset to zero */ | 1441 | /* Reading beyond i_size is simple: memset to zero */ |
1902 | zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE); | 1442 | zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE); |
1903 | goto out; | 1443 | goto out_update; |
1904 | } | 1444 | } |
1905 | 1445 | ||
1906 | if (dn.data_blkaddr == NEW_ADDR) { | 1446 | if (dn.data_blkaddr == NEW_ADDR) { |
@@ -1920,7 +1460,6 @@ put_next: | |||
1920 | 1460 | ||
1921 | lock_page(page); | 1461 | lock_page(page); |
1922 | if (unlikely(!PageUptodate(page))) { | 1462 | if (unlikely(!PageUptodate(page))) { |
1923 | f2fs_put_page(page, 1); | ||
1924 | err = -EIO; | 1463 | err = -EIO; |
1925 | goto fail; | 1464 | goto fail; |
1926 | } | 1465 | } |
@@ -1932,14 +1471,13 @@ put_next: | |||
1932 | /* avoid symlink page */ | 1471 | /* avoid symlink page */ |
1933 | if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { | 1472 | if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { |
1934 | err = f2fs_decrypt_one(inode, page); | 1473 | err = f2fs_decrypt_one(inode, page); |
1935 | if (err) { | 1474 | if (err) |
1936 | f2fs_put_page(page, 1); | ||
1937 | goto fail; | 1475 | goto fail; |
1938 | } | ||
1939 | } | 1476 | } |
1940 | } | 1477 | } |
1941 | out: | 1478 | out_update: |
1942 | SetPageUptodate(page); | 1479 | SetPageUptodate(page); |
1480 | out_clear: | ||
1943 | clear_cold_data(page); | 1481 | clear_cold_data(page); |
1944 | return 0; | 1482 | return 0; |
1945 | 1483 | ||
@@ -1947,8 +1485,8 @@ put_fail: | |||
1947 | f2fs_put_dnode(&dn); | 1485 | f2fs_put_dnode(&dn); |
1948 | unlock_fail: | 1486 | unlock_fail: |
1949 | f2fs_unlock_op(sbi); | 1487 | f2fs_unlock_op(sbi); |
1950 | f2fs_put_page(page, 1); | ||
1951 | fail: | 1488 | fail: |
1489 | f2fs_put_page(page, 1); | ||
1952 | f2fs_write_failed(mapping, pos + len); | 1490 | f2fs_write_failed(mapping, pos + len); |
1953 | return err; | 1491 | return err; |
1954 | } | 1492 | } |
@@ -1979,9 +1517,6 @@ static int check_direct_IO(struct inode *inode, struct iov_iter *iter, | |||
1979 | { | 1517 | { |
1980 | unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; | 1518 | unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; |
1981 | 1519 | ||
1982 | if (iov_iter_rw(iter) == READ) | ||
1983 | return 0; | ||
1984 | |||
1985 | if (offset & blocksize_mask) | 1520 | if (offset & blocksize_mask) |
1986 | return -EINVAL; | 1521 | return -EINVAL; |
1987 | 1522 | ||
@@ -2010,15 +1545,16 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, | |||
2010 | if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) | 1545 | if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) |
2011 | return 0; | 1546 | return 0; |
2012 | 1547 | ||
2013 | if (check_direct_IO(inode, iter, offset)) | 1548 | err = check_direct_IO(inode, iter, offset); |
2014 | return 0; | 1549 | if (err) |
1550 | return err; | ||
2015 | 1551 | ||
2016 | trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); | 1552 | trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); |
2017 | 1553 | ||
2018 | if (iov_iter_rw(iter) == WRITE) | 1554 | if (iov_iter_rw(iter) == WRITE) |
2019 | __allocate_data_blocks(inode, offset, count); | 1555 | __allocate_data_blocks(inode, offset, count); |
2020 | 1556 | ||
2021 | err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block); | 1557 | err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio); |
2022 | if (err < 0 && iov_iter_rw(iter) == WRITE) | 1558 | if (err < 0 && iov_iter_rw(iter) == WRITE) |
2023 | f2fs_write_failed(mapping, offset + count); | 1559 | f2fs_write_failed(mapping, offset + count); |
2024 | 1560 | ||
@@ -2045,6 +1581,11 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset, | |||
2045 | else | 1581 | else |
2046 | inode_dec_dirty_pages(inode); | 1582 | inode_dec_dirty_pages(inode); |
2047 | } | 1583 | } |
1584 | |||
1585 | /* This is atomic written page, keep Private */ | ||
1586 | if (IS_ATOMIC_WRITTEN_PAGE(page)) | ||
1587 | return; | ||
1588 | |||
2048 | ClearPagePrivate(page); | 1589 | ClearPagePrivate(page); |
2049 | } | 1590 | } |
2050 | 1591 | ||
@@ -2054,6 +1595,10 @@ int f2fs_release_page(struct page *page, gfp_t wait) | |||
2054 | if (PageDirty(page)) | 1595 | if (PageDirty(page)) |
2055 | return 0; | 1596 | return 0; |
2056 | 1597 | ||
1598 | /* This is atomic written page, keep Private */ | ||
1599 | if (IS_ATOMIC_WRITTEN_PAGE(page)) | ||
1600 | return 0; | ||
1601 | |||
2057 | ClearPagePrivate(page); | 1602 | ClearPagePrivate(page); |
2058 | return 1; | 1603 | return 1; |
2059 | } | 1604 | } |
@@ -2068,8 +1613,15 @@ static int f2fs_set_data_page_dirty(struct page *page) | |||
2068 | SetPageUptodate(page); | 1613 | SetPageUptodate(page); |
2069 | 1614 | ||
2070 | if (f2fs_is_atomic_file(inode)) { | 1615 | if (f2fs_is_atomic_file(inode)) { |
2071 | register_inmem_page(inode, page); | 1616 | if (!IS_ATOMIC_WRITTEN_PAGE(page)) { |
2072 | return 1; | 1617 | register_inmem_page(inode, page); |
1618 | return 1; | ||
1619 | } | ||
1620 | /* | ||
1621 | * Previously, this page has been registered, we just | ||
1622 | * return here. | ||
1623 | */ | ||
1624 | return 0; | ||
2073 | } | 1625 | } |
2074 | 1626 | ||
2075 | if (!PageDirty(page)) { | 1627 | if (!PageDirty(page)) { |
@@ -2090,38 +1642,7 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) | |||
2090 | if (err) | 1642 | if (err) |
2091 | return err; | 1643 | return err; |
2092 | } | 1644 | } |
2093 | return generic_block_bmap(mapping, block, get_data_block); | 1645 | return generic_block_bmap(mapping, block, get_data_block_bmap); |
2094 | } | ||
2095 | |||
2096 | void init_extent_cache_info(struct f2fs_sb_info *sbi) | ||
2097 | { | ||
2098 | INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO); | ||
2099 | init_rwsem(&sbi->extent_tree_lock); | ||
2100 | INIT_LIST_HEAD(&sbi->extent_list); | ||
2101 | spin_lock_init(&sbi->extent_lock); | ||
2102 | sbi->total_ext_tree = 0; | ||
2103 | atomic_set(&sbi->total_ext_node, 0); | ||
2104 | } | ||
2105 | |||
2106 | int __init create_extent_cache(void) | ||
2107 | { | ||
2108 | extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", | ||
2109 | sizeof(struct extent_tree)); | ||
2110 | if (!extent_tree_slab) | ||
2111 | return -ENOMEM; | ||
2112 | extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node", | ||
2113 | sizeof(struct extent_node)); | ||
2114 | if (!extent_node_slab) { | ||
2115 | kmem_cache_destroy(extent_tree_slab); | ||
2116 | return -ENOMEM; | ||
2117 | } | ||
2118 | return 0; | ||
2119 | } | ||
2120 | |||
2121 | void destroy_extent_cache(void) | ||
2122 | { | ||
2123 | kmem_cache_destroy(extent_node_slab); | ||
2124 | kmem_cache_destroy(extent_tree_slab); | ||
2125 | } | 1646 | } |
2126 | 1647 | ||
2127 | const struct address_space_operations f2fs_dblock_aops = { | 1648 | const struct address_space_operations f2fs_dblock_aops = { |
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 75176e0dd6c8..d013d8479753 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c | |||
@@ -33,8 +33,11 @@ static void update_general_status(struct f2fs_sb_info *sbi) | |||
33 | int i; | 33 | int i; |
34 | 34 | ||
35 | /* validation check of the segment numbers */ | 35 | /* validation check of the segment numbers */ |
36 | si->hit_ext = sbi->read_hit_ext; | 36 | si->hit_largest = atomic_read(&sbi->read_hit_largest); |
37 | si->total_ext = sbi->total_hit_ext; | 37 | si->hit_cached = atomic_read(&sbi->read_hit_cached); |
38 | si->hit_rbtree = atomic_read(&sbi->read_hit_rbtree); | ||
39 | si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree; | ||
40 | si->total_ext = atomic_read(&sbi->total_hit_ext); | ||
38 | si->ext_tree = sbi->total_ext_tree; | 41 | si->ext_tree = sbi->total_ext_tree; |
39 | si->ext_node = atomic_read(&sbi->total_ext_node); | 42 | si->ext_node = atomic_read(&sbi->total_ext_node); |
40 | si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); | 43 | si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); |
@@ -49,6 +52,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) | |||
49 | si->valid_count = valid_user_blocks(sbi); | 52 | si->valid_count = valid_user_blocks(sbi); |
50 | si->valid_node_count = valid_node_count(sbi); | 53 | si->valid_node_count = valid_node_count(sbi); |
51 | si->valid_inode_count = valid_inode_count(sbi); | 54 | si->valid_inode_count = valid_inode_count(sbi); |
55 | si->inline_xattr = atomic_read(&sbi->inline_xattr); | ||
52 | si->inline_inode = atomic_read(&sbi->inline_inode); | 56 | si->inline_inode = atomic_read(&sbi->inline_inode); |
53 | si->inline_dir = atomic_read(&sbi->inline_dir); | 57 | si->inline_dir = atomic_read(&sbi->inline_dir); |
54 | si->utilization = utilization(sbi); | 58 | si->utilization = utilization(sbi); |
@@ -226,6 +230,8 @@ static int stat_show(struct seq_file *s, void *v) | |||
226 | seq_printf(s, "Other: %u)\n - Data: %u\n", | 230 | seq_printf(s, "Other: %u)\n - Data: %u\n", |
227 | si->valid_node_count - si->valid_inode_count, | 231 | si->valid_node_count - si->valid_inode_count, |
228 | si->valid_count - si->valid_node_count); | 232 | si->valid_count - si->valid_node_count); |
233 | seq_printf(s, " - Inline_xattr Inode: %u\n", | ||
234 | si->inline_xattr); | ||
229 | seq_printf(s, " - Inline_data Inode: %u\n", | 235 | seq_printf(s, " - Inline_data Inode: %u\n", |
230 | si->inline_inode); | 236 | si->inline_inode); |
231 | seq_printf(s, " - Inline_dentry Inode: %u\n", | 237 | seq_printf(s, " - Inline_dentry Inode: %u\n", |
@@ -276,10 +282,16 @@ static int stat_show(struct seq_file *s, void *v) | |||
276 | si->bg_data_blks); | 282 | si->bg_data_blks); |
277 | seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks, | 283 | seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks, |
278 | si->bg_node_blks); | 284 | si->bg_node_blks); |
279 | seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", | 285 | seq_puts(s, "\nExtent Cache:\n"); |
280 | si->hit_ext, si->total_ext); | 286 | seq_printf(s, " - Hit Count: L1-1:%d L1-2:%d L2:%d\n", |
281 | seq_printf(s, "\nExtent Tree Count: %d\n", si->ext_tree); | 287 | si->hit_largest, si->hit_cached, |
282 | seq_printf(s, "\nExtent Node Count: %d\n", si->ext_node); | 288 | si->hit_rbtree); |
289 | seq_printf(s, " - Hit Ratio: %d%% (%d / %d)\n", | ||
290 | !si->total_ext ? 0 : | ||
291 | (si->hit_total * 100) / si->total_ext, | ||
292 | si->hit_total, si->total_ext); | ||
293 | seq_printf(s, " - Inner Struct Count: tree: %d, node: %d\n", | ||
294 | si->ext_tree, si->ext_node); | ||
283 | seq_puts(s, "\nBalancing F2FS Async:\n"); | 295 | seq_puts(s, "\nBalancing F2FS Async:\n"); |
284 | seq_printf(s, " - inmem: %4d, wb: %4d\n", | 296 | seq_printf(s, " - inmem: %4d, wb: %4d\n", |
285 | si->inmem_pages, si->wb_pages); | 297 | si->inmem_pages, si->wb_pages); |
@@ -366,6 +378,12 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) | |||
366 | si->sbi = sbi; | 378 | si->sbi = sbi; |
367 | sbi->stat_info = si; | 379 | sbi->stat_info = si; |
368 | 380 | ||
381 | atomic_set(&sbi->total_hit_ext, 0); | ||
382 | atomic_set(&sbi->read_hit_rbtree, 0); | ||
383 | atomic_set(&sbi->read_hit_largest, 0); | ||
384 | atomic_set(&sbi->read_hit_cached, 0); | ||
385 | |||
386 | atomic_set(&sbi->inline_xattr, 0); | ||
369 | atomic_set(&sbi->inline_inode, 0); | 387 | atomic_set(&sbi->inline_inode, 0); |
370 | atomic_set(&sbi->inline_dir, 0); | 388 | atomic_set(&sbi->inline_dir, 0); |
371 | atomic_set(&sbi->inplace_count, 0); | 389 | atomic_set(&sbi->inplace_count, 0); |
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index a34ebd8312ab..8f15fc134040 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c | |||
@@ -718,8 +718,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, | |||
718 | if (inode) | 718 | if (inode) |
719 | f2fs_drop_nlink(dir, inode, NULL); | 719 | f2fs_drop_nlink(dir, inode, NULL); |
720 | 720 | ||
721 | if (bit_pos == NR_DENTRY_IN_BLOCK) { | 721 | if (bit_pos == NR_DENTRY_IN_BLOCK && |
722 | truncate_hole(dir, page->index, page->index + 1); | 722 | !truncate_hole(dir, page->index, page->index + 1)) { |
723 | clear_page_dirty_for_io(page); | 723 | clear_page_dirty_for_io(page); |
724 | ClearPagePrivate(page); | 724 | ClearPagePrivate(page); |
725 | ClearPageUptodate(page); | 725 | ClearPageUptodate(page); |
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c new file mode 100644 index 000000000000..997ac86f2a1d --- /dev/null +++ b/fs/f2fs/extent_cache.c | |||
@@ -0,0 +1,791 @@ | |||
1 | /* | ||
2 | * f2fs extent cache support | ||
3 | * | ||
4 | * Copyright (c) 2015 Motorola Mobility | ||
5 | * Copyright (c) 2015 Samsung Electronics | ||
6 | * Authors: Jaegeuk Kim <jaegeuk@kernel.org> | ||
7 | * Chao Yu <chao2.yu@samsung.com> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License version 2 as | ||
11 | * published by the Free Software Foundation. | ||
12 | */ | ||
13 | |||
14 | #include <linux/fs.h> | ||
15 | #include <linux/f2fs_fs.h> | ||
16 | |||
17 | #include "f2fs.h" | ||
18 | #include "node.h" | ||
19 | #include <trace/events/f2fs.h> | ||
20 | |||
21 | static struct kmem_cache *extent_tree_slab; | ||
22 | static struct kmem_cache *extent_node_slab; | ||
23 | |||
24 | static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, | ||
25 | struct extent_tree *et, struct extent_info *ei, | ||
26 | struct rb_node *parent, struct rb_node **p) | ||
27 | { | ||
28 | struct extent_node *en; | ||
29 | |||
30 | en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC); | ||
31 | if (!en) | ||
32 | return NULL; | ||
33 | |||
34 | en->ei = *ei; | ||
35 | INIT_LIST_HEAD(&en->list); | ||
36 | |||
37 | rb_link_node(&en->rb_node, parent, p); | ||
38 | rb_insert_color(&en->rb_node, &et->root); | ||
39 | et->count++; | ||
40 | atomic_inc(&sbi->total_ext_node); | ||
41 | return en; | ||
42 | } | ||
43 | |||
44 | static void __detach_extent_node(struct f2fs_sb_info *sbi, | ||
45 | struct extent_tree *et, struct extent_node *en) | ||
46 | { | ||
47 | rb_erase(&en->rb_node, &et->root); | ||
48 | et->count--; | ||
49 | atomic_dec(&sbi->total_ext_node); | ||
50 | |||
51 | if (et->cached_en == en) | ||
52 | et->cached_en = NULL; | ||
53 | } | ||
54 | |||
55 | static struct extent_tree *__grab_extent_tree(struct inode *inode) | ||
56 | { | ||
57 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
58 | struct extent_tree *et; | ||
59 | nid_t ino = inode->i_ino; | ||
60 | |||
61 | down_write(&sbi->extent_tree_lock); | ||
62 | et = radix_tree_lookup(&sbi->extent_tree_root, ino); | ||
63 | if (!et) { | ||
64 | et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS); | ||
65 | f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et); | ||
66 | memset(et, 0, sizeof(struct extent_tree)); | ||
67 | et->ino = ino; | ||
68 | et->root = RB_ROOT; | ||
69 | et->cached_en = NULL; | ||
70 | rwlock_init(&et->lock); | ||
71 | atomic_set(&et->refcount, 0); | ||
72 | et->count = 0; | ||
73 | sbi->total_ext_tree++; | ||
74 | } | ||
75 | atomic_inc(&et->refcount); | ||
76 | up_write(&sbi->extent_tree_lock); | ||
77 | |||
78 | /* never died until evict_inode */ | ||
79 | F2FS_I(inode)->extent_tree = et; | ||
80 | |||
81 | return et; | ||
82 | } | ||
83 | |||
84 | static struct extent_node *__lookup_extent_tree(struct f2fs_sb_info *sbi, | ||
85 | struct extent_tree *et, unsigned int fofs) | ||
86 | { | ||
87 | struct rb_node *node = et->root.rb_node; | ||
88 | struct extent_node *en = et->cached_en; | ||
89 | |||
90 | if (en) { | ||
91 | struct extent_info *cei = &en->ei; | ||
92 | |||
93 | if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) { | ||
94 | stat_inc_cached_node_hit(sbi); | ||
95 | return en; | ||
96 | } | ||
97 | } | ||
98 | |||
99 | while (node) { | ||
100 | en = rb_entry(node, struct extent_node, rb_node); | ||
101 | |||
102 | if (fofs < en->ei.fofs) { | ||
103 | node = node->rb_left; | ||
104 | } else if (fofs >= en->ei.fofs + en->ei.len) { | ||
105 | node = node->rb_right; | ||
106 | } else { | ||
107 | stat_inc_rbtree_node_hit(sbi); | ||
108 | return en; | ||
109 | } | ||
110 | } | ||
111 | return NULL; | ||
112 | } | ||
113 | |||
114 | static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi, | ||
115 | struct extent_tree *et, struct extent_info *ei) | ||
116 | { | ||
117 | struct rb_node **p = &et->root.rb_node; | ||
118 | struct extent_node *en; | ||
119 | |||
120 | en = __attach_extent_node(sbi, et, ei, NULL, p); | ||
121 | if (!en) | ||
122 | return NULL; | ||
123 | |||
124 | et->largest = en->ei; | ||
125 | et->cached_en = en; | ||
126 | return en; | ||
127 | } | ||
128 | |||
129 | static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, | ||
130 | struct extent_tree *et, bool free_all) | ||
131 | { | ||
132 | struct rb_node *node, *next; | ||
133 | struct extent_node *en; | ||
134 | unsigned int count = et->count; | ||
135 | |||
136 | node = rb_first(&et->root); | ||
137 | while (node) { | ||
138 | next = rb_next(node); | ||
139 | en = rb_entry(node, struct extent_node, rb_node); | ||
140 | |||
141 | if (free_all) { | ||
142 | spin_lock(&sbi->extent_lock); | ||
143 | if (!list_empty(&en->list)) | ||
144 | list_del_init(&en->list); | ||
145 | spin_unlock(&sbi->extent_lock); | ||
146 | } | ||
147 | |||
148 | if (free_all || list_empty(&en->list)) { | ||
149 | __detach_extent_node(sbi, et, en); | ||
150 | kmem_cache_free(extent_node_slab, en); | ||
151 | } | ||
152 | node = next; | ||
153 | } | ||
154 | |||
155 | return count - et->count; | ||
156 | } | ||
157 | |||
158 | static void __drop_largest_extent(struct inode *inode, pgoff_t fofs) | ||
159 | { | ||
160 | struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest; | ||
161 | |||
162 | if (largest->fofs <= fofs && largest->fofs + largest->len > fofs) | ||
163 | largest->len = 0; | ||
164 | } | ||
165 | |||
166 | void f2fs_drop_largest_extent(struct inode *inode, pgoff_t fofs) | ||
167 | { | ||
168 | if (!f2fs_may_extent_tree(inode)) | ||
169 | return; | ||
170 | |||
171 | __drop_largest_extent(inode, fofs); | ||
172 | } | ||
173 | |||
174 | void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) | ||
175 | { | ||
176 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
177 | struct extent_tree *et; | ||
178 | struct extent_node *en; | ||
179 | struct extent_info ei; | ||
180 | |||
181 | if (!f2fs_may_extent_tree(inode)) | ||
182 | return; | ||
183 | |||
184 | et = __grab_extent_tree(inode); | ||
185 | |||
186 | if (!i_ext || le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN) | ||
187 | return; | ||
188 | |||
189 | set_extent_info(&ei, le32_to_cpu(i_ext->fofs), | ||
190 | le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len)); | ||
191 | |||
192 | write_lock(&et->lock); | ||
193 | if (et->count) | ||
194 | goto out; | ||
195 | |||
196 | en = __init_extent_tree(sbi, et, &ei); | ||
197 | if (en) { | ||
198 | spin_lock(&sbi->extent_lock); | ||
199 | list_add_tail(&en->list, &sbi->extent_list); | ||
200 | spin_unlock(&sbi->extent_lock); | ||
201 | } | ||
202 | out: | ||
203 | write_unlock(&et->lock); | ||
204 | } | ||
205 | |||
206 | static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, | ||
207 | struct extent_info *ei) | ||
208 | { | ||
209 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
210 | struct extent_tree *et = F2FS_I(inode)->extent_tree; | ||
211 | struct extent_node *en; | ||
212 | bool ret = false; | ||
213 | |||
214 | f2fs_bug_on(sbi, !et); | ||
215 | |||
216 | trace_f2fs_lookup_extent_tree_start(inode, pgofs); | ||
217 | |||
218 | read_lock(&et->lock); | ||
219 | |||
220 | if (et->largest.fofs <= pgofs && | ||
221 | et->largest.fofs + et->largest.len > pgofs) { | ||
222 | *ei = et->largest; | ||
223 | ret = true; | ||
224 | stat_inc_largest_node_hit(sbi); | ||
225 | goto out; | ||
226 | } | ||
227 | |||
228 | en = __lookup_extent_tree(sbi, et, pgofs); | ||
229 | if (en) { | ||
230 | *ei = en->ei; | ||
231 | spin_lock(&sbi->extent_lock); | ||
232 | if (!list_empty(&en->list)) | ||
233 | list_move_tail(&en->list, &sbi->extent_list); | ||
234 | et->cached_en = en; | ||
235 | spin_unlock(&sbi->extent_lock); | ||
236 | ret = true; | ||
237 | } | ||
238 | out: | ||
239 | stat_inc_total_hit(sbi); | ||
240 | read_unlock(&et->lock); | ||
241 | |||
242 | trace_f2fs_lookup_extent_tree_end(inode, pgofs, ei); | ||
243 | return ret; | ||
244 | } | ||
245 | |||
246 | |||
247 | /* | ||
248 | * lookup extent at @fofs, if hit, return the extent | ||
249 | * if not, return NULL and | ||
250 | * @prev_ex: extent before fofs | ||
251 | * @next_ex: extent after fofs | ||
252 | * @insert_p: insert point for new extent at fofs | ||
253 | * in order to simpfy the insertion after. | ||
254 | * tree must stay unchanged between lookup and insertion. | ||
255 | */ | ||
256 | static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et, | ||
257 | unsigned int fofs, | ||
258 | struct extent_node **prev_ex, | ||
259 | struct extent_node **next_ex, | ||
260 | struct rb_node ***insert_p, | ||
261 | struct rb_node **insert_parent) | ||
262 | { | ||
263 | struct rb_node **pnode = &et->root.rb_node; | ||
264 | struct rb_node *parent = NULL, *tmp_node; | ||
265 | struct extent_node *en = et->cached_en; | ||
266 | |||
267 | *insert_p = NULL; | ||
268 | *insert_parent = NULL; | ||
269 | *prev_ex = NULL; | ||
270 | *next_ex = NULL; | ||
271 | |||
272 | if (RB_EMPTY_ROOT(&et->root)) | ||
273 | return NULL; | ||
274 | |||
275 | if (en) { | ||
276 | struct extent_info *cei = &en->ei; | ||
277 | |||
278 | if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) | ||
279 | goto lookup_neighbors; | ||
280 | } | ||
281 | |||
282 | while (*pnode) { | ||
283 | parent = *pnode; | ||
284 | en = rb_entry(*pnode, struct extent_node, rb_node); | ||
285 | |||
286 | if (fofs < en->ei.fofs) | ||
287 | pnode = &(*pnode)->rb_left; | ||
288 | else if (fofs >= en->ei.fofs + en->ei.len) | ||
289 | pnode = &(*pnode)->rb_right; | ||
290 | else | ||
291 | goto lookup_neighbors; | ||
292 | } | ||
293 | |||
294 | *insert_p = pnode; | ||
295 | *insert_parent = parent; | ||
296 | |||
297 | en = rb_entry(parent, struct extent_node, rb_node); | ||
298 | tmp_node = parent; | ||
299 | if (parent && fofs > en->ei.fofs) | ||
300 | tmp_node = rb_next(parent); | ||
301 | *next_ex = tmp_node ? | ||
302 | rb_entry(tmp_node, struct extent_node, rb_node) : NULL; | ||
303 | |||
304 | tmp_node = parent; | ||
305 | if (parent && fofs < en->ei.fofs) | ||
306 | tmp_node = rb_prev(parent); | ||
307 | *prev_ex = tmp_node ? | ||
308 | rb_entry(tmp_node, struct extent_node, rb_node) : NULL; | ||
309 | return NULL; | ||
310 | |||
311 | lookup_neighbors: | ||
312 | if (fofs == en->ei.fofs) { | ||
313 | /* lookup prev node for merging backward later */ | ||
314 | tmp_node = rb_prev(&en->rb_node); | ||
315 | *prev_ex = tmp_node ? | ||
316 | rb_entry(tmp_node, struct extent_node, rb_node) : NULL; | ||
317 | } | ||
318 | if (fofs == en->ei.fofs + en->ei.len - 1) { | ||
319 | /* lookup next node for merging frontward later */ | ||
320 | tmp_node = rb_next(&en->rb_node); | ||
321 | *next_ex = tmp_node ? | ||
322 | rb_entry(tmp_node, struct extent_node, rb_node) : NULL; | ||
323 | } | ||
324 | return en; | ||
325 | } | ||
326 | |||
327 | static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, | ||
328 | struct extent_tree *et, struct extent_info *ei, | ||
329 | struct extent_node **den, | ||
330 | struct extent_node *prev_ex, | ||
331 | struct extent_node *next_ex) | ||
332 | { | ||
333 | struct extent_node *en = NULL; | ||
334 | |||
335 | if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) { | ||
336 | prev_ex->ei.len += ei->len; | ||
337 | ei = &prev_ex->ei; | ||
338 | en = prev_ex; | ||
339 | } | ||
340 | |||
341 | if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) { | ||
342 | if (en) { | ||
343 | __detach_extent_node(sbi, et, prev_ex); | ||
344 | *den = prev_ex; | ||
345 | } | ||
346 | next_ex->ei.fofs = ei->fofs; | ||
347 | next_ex->ei.blk = ei->blk; | ||
348 | next_ex->ei.len += ei->len; | ||
349 | en = next_ex; | ||
350 | } | ||
351 | |||
352 | if (en) { | ||
353 | if (en->ei.len > et->largest.len) | ||
354 | et->largest = en->ei; | ||
355 | et->cached_en = en; | ||
356 | } | ||
357 | return en; | ||
358 | } | ||
359 | |||
360 | static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, | ||
361 | struct extent_tree *et, struct extent_info *ei, | ||
362 | struct rb_node **insert_p, | ||
363 | struct rb_node *insert_parent) | ||
364 | { | ||
365 | struct rb_node **p = &et->root.rb_node; | ||
366 | struct rb_node *parent = NULL; | ||
367 | struct extent_node *en = NULL; | ||
368 | |||
369 | if (insert_p && insert_parent) { | ||
370 | parent = insert_parent; | ||
371 | p = insert_p; | ||
372 | goto do_insert; | ||
373 | } | ||
374 | |||
375 | while (*p) { | ||
376 | parent = *p; | ||
377 | en = rb_entry(parent, struct extent_node, rb_node); | ||
378 | |||
379 | if (ei->fofs < en->ei.fofs) | ||
380 | p = &(*p)->rb_left; | ||
381 | else if (ei->fofs >= en->ei.fofs + en->ei.len) | ||
382 | p = &(*p)->rb_right; | ||
383 | else | ||
384 | f2fs_bug_on(sbi, 1); | ||
385 | } | ||
386 | do_insert: | ||
387 | en = __attach_extent_node(sbi, et, ei, parent, p); | ||
388 | if (!en) | ||
389 | return NULL; | ||
390 | |||
391 | if (en->ei.len > et->largest.len) | ||
392 | et->largest = en->ei; | ||
393 | et->cached_en = en; | ||
394 | return en; | ||
395 | } | ||
396 | |||
397 | unsigned int f2fs_update_extent_tree_range(struct inode *inode, | ||
398 | pgoff_t fofs, block_t blkaddr, unsigned int len) | ||
399 | { | ||
400 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
401 | struct extent_tree *et = F2FS_I(inode)->extent_tree; | ||
402 | struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL; | ||
403 | struct extent_node *prev_en = NULL, *next_en = NULL; | ||
404 | struct extent_info ei, dei, prev; | ||
405 | struct rb_node **insert_p = NULL, *insert_parent = NULL; | ||
406 | unsigned int end = fofs + len; | ||
407 | unsigned int pos = (unsigned int)fofs; | ||
408 | |||
409 | if (!et) | ||
410 | return false; | ||
411 | |||
412 | write_lock(&et->lock); | ||
413 | |||
414 | if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) { | ||
415 | write_unlock(&et->lock); | ||
416 | return false; | ||
417 | } | ||
418 | |||
419 | prev = et->largest; | ||
420 | dei.len = 0; | ||
421 | |||
422 | /* we do not guarantee that the largest extent is cached all the time */ | ||
423 | __drop_largest_extent(inode, fofs); | ||
424 | |||
425 | /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ | ||
426 | en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en, | ||
427 | &insert_p, &insert_parent); | ||
428 | if (!en) { | ||
429 | if (next_en) { | ||
430 | en = next_en; | ||
431 | f2fs_bug_on(sbi, en->ei.fofs <= pos); | ||
432 | pos = en->ei.fofs; | ||
433 | } else { | ||
434 | /* | ||
435 | * skip searching in the tree since there is no | ||
436 | * larger extent node in the cache. | ||
437 | */ | ||
438 | goto update_extent; | ||
439 | } | ||
440 | } | ||
441 | |||
442 | /* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */ | ||
443 | while (en) { | ||
444 | struct rb_node *node; | ||
445 | |||
446 | if (pos >= end) | ||
447 | break; | ||
448 | |||
449 | dei = en->ei; | ||
450 | en1 = en2 = NULL; | ||
451 | |||
452 | node = rb_next(&en->rb_node); | ||
453 | |||
454 | /* | ||
455 | * 2.1 there are four cases when we invalidate blkaddr in extent | ||
456 | * node, |V: valid address, X: will be invalidated| | ||
457 | */ | ||
458 | /* case#1, invalidate right part of extent node |VVVVVXXXXX| */ | ||
459 | if (pos > dei.fofs && end >= dei.fofs + dei.len) { | ||
460 | en->ei.len = pos - dei.fofs; | ||
461 | |||
462 | if (en->ei.len < F2FS_MIN_EXTENT_LEN) { | ||
463 | __detach_extent_node(sbi, et, en); | ||
464 | insert_p = NULL; | ||
465 | insert_parent = NULL; | ||
466 | goto update; | ||
467 | } | ||
468 | |||
469 | if (__is_extent_same(&dei, &et->largest)) | ||
470 | et->largest = en->ei; | ||
471 | goto next; | ||
472 | } | ||
473 | |||
474 | /* case#2, invalidate left part of extent node |XXXXXVVVVV| */ | ||
475 | if (pos <= dei.fofs && end < dei.fofs + dei.len) { | ||
476 | en->ei.fofs = end; | ||
477 | en->ei.blk += end - dei.fofs; | ||
478 | en->ei.len -= end - dei.fofs; | ||
479 | |||
480 | if (en->ei.len < F2FS_MIN_EXTENT_LEN) { | ||
481 | __detach_extent_node(sbi, et, en); | ||
482 | insert_p = NULL; | ||
483 | insert_parent = NULL; | ||
484 | goto update; | ||
485 | } | ||
486 | |||
487 | if (__is_extent_same(&dei, &et->largest)) | ||
488 | et->largest = en->ei; | ||
489 | goto next; | ||
490 | } | ||
491 | |||
492 | __detach_extent_node(sbi, et, en); | ||
493 | |||
494 | /* | ||
495 | * if we remove node in rb-tree, our parent node pointer may | ||
496 | * point the wrong place, discard them. | ||
497 | */ | ||
498 | insert_p = NULL; | ||
499 | insert_parent = NULL; | ||
500 | |||
501 | /* case#3, invalidate entire extent node |XXXXXXXXXX| */ | ||
502 | if (pos <= dei.fofs && end >= dei.fofs + dei.len) { | ||
503 | if (__is_extent_same(&dei, &et->largest)) | ||
504 | et->largest.len = 0; | ||
505 | goto update; | ||
506 | } | ||
507 | |||
508 | /* | ||
509 | * case#4, invalidate data in the middle of extent node | ||
510 | * |VVVXXXXVVV| | ||
511 | */ | ||
512 | if (dei.len > F2FS_MIN_EXTENT_LEN) { | ||
513 | unsigned int endofs; | ||
514 | |||
515 | /* insert left part of split extent into cache */ | ||
516 | if (pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) { | ||
517 | set_extent_info(&ei, dei.fofs, dei.blk, | ||
518 | pos - dei.fofs); | ||
519 | en1 = __insert_extent_tree(sbi, et, &ei, | ||
520 | NULL, NULL); | ||
521 | } | ||
522 | |||
523 | /* insert right part of split extent into cache */ | ||
524 | endofs = dei.fofs + dei.len; | ||
525 | if (endofs - end >= F2FS_MIN_EXTENT_LEN) { | ||
526 | set_extent_info(&ei, end, | ||
527 | end - dei.fofs + dei.blk, | ||
528 | endofs - end); | ||
529 | en2 = __insert_extent_tree(sbi, et, &ei, | ||
530 | NULL, NULL); | ||
531 | } | ||
532 | } | ||
533 | update: | ||
534 | /* 2.2 update in global extent list */ | ||
535 | spin_lock(&sbi->extent_lock); | ||
536 | if (en && !list_empty(&en->list)) | ||
537 | list_del(&en->list); | ||
538 | if (en1) | ||
539 | list_add_tail(&en1->list, &sbi->extent_list); | ||
540 | if (en2) | ||
541 | list_add_tail(&en2->list, &sbi->extent_list); | ||
542 | spin_unlock(&sbi->extent_lock); | ||
543 | |||
544 | /* 2.3 release extent node */ | ||
545 | if (en) | ||
546 | kmem_cache_free(extent_node_slab, en); | ||
547 | next: | ||
548 | en = node ? rb_entry(node, struct extent_node, rb_node) : NULL; | ||
549 | next_en = en; | ||
550 | if (en) | ||
551 | pos = en->ei.fofs; | ||
552 | } | ||
553 | |||
554 | update_extent: | ||
555 | /* 3. update extent in extent cache */ | ||
556 | if (blkaddr) { | ||
557 | struct extent_node *den = NULL; | ||
558 | |||
559 | set_extent_info(&ei, fofs, blkaddr, len); | ||
560 | en3 = __try_merge_extent_node(sbi, et, &ei, &den, | ||
561 | prev_en, next_en); | ||
562 | if (!en3) | ||
563 | en3 = __insert_extent_tree(sbi, et, &ei, | ||
564 | insert_p, insert_parent); | ||
565 | |||
566 | /* give up extent_cache, if split and small updates happen */ | ||
567 | if (dei.len >= 1 && | ||
568 | prev.len < F2FS_MIN_EXTENT_LEN && | ||
569 | et->largest.len < F2FS_MIN_EXTENT_LEN) { | ||
570 | et->largest.len = 0; | ||
571 | set_inode_flag(F2FS_I(inode), FI_NO_EXTENT); | ||
572 | } | ||
573 | |||
574 | spin_lock(&sbi->extent_lock); | ||
575 | if (en3) { | ||
576 | if (list_empty(&en3->list)) | ||
577 | list_add_tail(&en3->list, &sbi->extent_list); | ||
578 | else | ||
579 | list_move_tail(&en3->list, &sbi->extent_list); | ||
580 | } | ||
581 | if (den && !list_empty(&den->list)) | ||
582 | list_del(&den->list); | ||
583 | spin_unlock(&sbi->extent_lock); | ||
584 | |||
585 | if (den) | ||
586 | kmem_cache_free(extent_node_slab, den); | ||
587 | } | ||
588 | |||
589 | if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) | ||
590 | __free_extent_tree(sbi, et, true); | ||
591 | |||
592 | write_unlock(&et->lock); | ||
593 | |||
594 | return !__is_extent_same(&prev, &et->largest); | ||
595 | } | ||
596 | |||
597 | unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) | ||
598 | { | ||
599 | struct extent_tree *treevec[EXT_TREE_VEC_SIZE]; | ||
600 | struct extent_node *en, *tmp; | ||
601 | unsigned long ino = F2FS_ROOT_INO(sbi); | ||
602 | struct radix_tree_root *root = &sbi->extent_tree_root; | ||
603 | unsigned int found; | ||
604 | unsigned int node_cnt = 0, tree_cnt = 0; | ||
605 | int remained; | ||
606 | |||
607 | if (!test_opt(sbi, EXTENT_CACHE)) | ||
608 | return 0; | ||
609 | |||
610 | if (!down_write_trylock(&sbi->extent_tree_lock)) | ||
611 | goto out; | ||
612 | |||
613 | /* 1. remove unreferenced extent tree */ | ||
614 | while ((found = radix_tree_gang_lookup(root, | ||
615 | (void **)treevec, ino, EXT_TREE_VEC_SIZE))) { | ||
616 | unsigned i; | ||
617 | |||
618 | ino = treevec[found - 1]->ino + 1; | ||
619 | for (i = 0; i < found; i++) { | ||
620 | struct extent_tree *et = treevec[i]; | ||
621 | |||
622 | if (!atomic_read(&et->refcount)) { | ||
623 | write_lock(&et->lock); | ||
624 | node_cnt += __free_extent_tree(sbi, et, true); | ||
625 | write_unlock(&et->lock); | ||
626 | |||
627 | radix_tree_delete(root, et->ino); | ||
628 | kmem_cache_free(extent_tree_slab, et); | ||
629 | sbi->total_ext_tree--; | ||
630 | tree_cnt++; | ||
631 | |||
632 | if (node_cnt + tree_cnt >= nr_shrink) | ||
633 | goto unlock_out; | ||
634 | } | ||
635 | } | ||
636 | } | ||
637 | up_write(&sbi->extent_tree_lock); | ||
638 | |||
639 | /* 2. remove LRU extent entries */ | ||
640 | if (!down_write_trylock(&sbi->extent_tree_lock)) | ||
641 | goto out; | ||
642 | |||
643 | remained = nr_shrink - (node_cnt + tree_cnt); | ||
644 | |||
645 | spin_lock(&sbi->extent_lock); | ||
646 | list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) { | ||
647 | if (!remained--) | ||
648 | break; | ||
649 | list_del_init(&en->list); | ||
650 | } | ||
651 | spin_unlock(&sbi->extent_lock); | ||
652 | |||
653 | while ((found = radix_tree_gang_lookup(root, | ||
654 | (void **)treevec, ino, EXT_TREE_VEC_SIZE))) { | ||
655 | unsigned i; | ||
656 | |||
657 | ino = treevec[found - 1]->ino + 1; | ||
658 | for (i = 0; i < found; i++) { | ||
659 | struct extent_tree *et = treevec[i]; | ||
660 | |||
661 | write_lock(&et->lock); | ||
662 | node_cnt += __free_extent_tree(sbi, et, false); | ||
663 | write_unlock(&et->lock); | ||
664 | |||
665 | if (node_cnt + tree_cnt >= nr_shrink) | ||
666 | break; | ||
667 | } | ||
668 | } | ||
669 | unlock_out: | ||
670 | up_write(&sbi->extent_tree_lock); | ||
671 | out: | ||
672 | trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt); | ||
673 | |||
674 | return node_cnt + tree_cnt; | ||
675 | } | ||
676 | |||
677 | unsigned int f2fs_destroy_extent_node(struct inode *inode) | ||
678 | { | ||
679 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
680 | struct extent_tree *et = F2FS_I(inode)->extent_tree; | ||
681 | unsigned int node_cnt = 0; | ||
682 | |||
683 | if (!et) | ||
684 | return 0; | ||
685 | |||
686 | write_lock(&et->lock); | ||
687 | node_cnt = __free_extent_tree(sbi, et, true); | ||
688 | write_unlock(&et->lock); | ||
689 | |||
690 | return node_cnt; | ||
691 | } | ||
692 | |||
693 | void f2fs_destroy_extent_tree(struct inode *inode) | ||
694 | { | ||
695 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
696 | struct extent_tree *et = F2FS_I(inode)->extent_tree; | ||
697 | unsigned int node_cnt = 0; | ||
698 | |||
699 | if (!et) | ||
700 | return; | ||
701 | |||
702 | if (inode->i_nlink && !is_bad_inode(inode) && et->count) { | ||
703 | atomic_dec(&et->refcount); | ||
704 | return; | ||
705 | } | ||
706 | |||
707 | /* free all extent info belong to this extent tree */ | ||
708 | node_cnt = f2fs_destroy_extent_node(inode); | ||
709 | |||
710 | /* delete extent tree entry in radix tree */ | ||
711 | down_write(&sbi->extent_tree_lock); | ||
712 | atomic_dec(&et->refcount); | ||
713 | f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count); | ||
714 | radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); | ||
715 | kmem_cache_free(extent_tree_slab, et); | ||
716 | sbi->total_ext_tree--; | ||
717 | up_write(&sbi->extent_tree_lock); | ||
718 | |||
719 | F2FS_I(inode)->extent_tree = NULL; | ||
720 | |||
721 | trace_f2fs_destroy_extent_tree(inode, node_cnt); | ||
722 | } | ||
723 | |||
724 | bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, | ||
725 | struct extent_info *ei) | ||
726 | { | ||
727 | if (!f2fs_may_extent_tree(inode)) | ||
728 | return false; | ||
729 | |||
730 | return f2fs_lookup_extent_tree(inode, pgofs, ei); | ||
731 | } | ||
732 | |||
733 | void f2fs_update_extent_cache(struct dnode_of_data *dn) | ||
734 | { | ||
735 | struct f2fs_inode_info *fi = F2FS_I(dn->inode); | ||
736 | pgoff_t fofs; | ||
737 | |||
738 | if (!f2fs_may_extent_tree(dn->inode)) | ||
739 | return; | ||
740 | |||
741 | f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); | ||
742 | |||
743 | |||
744 | fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + | ||
745 | dn->ofs_in_node; | ||
746 | |||
747 | if (f2fs_update_extent_tree_range(dn->inode, fofs, dn->data_blkaddr, 1)) | ||
748 | sync_inode_page(dn); | ||
749 | } | ||
750 | |||
751 | void f2fs_update_extent_cache_range(struct dnode_of_data *dn, | ||
752 | pgoff_t fofs, block_t blkaddr, unsigned int len) | ||
753 | |||
754 | { | ||
755 | if (!f2fs_may_extent_tree(dn->inode)) | ||
756 | return; | ||
757 | |||
758 | if (f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len)) | ||
759 | sync_inode_page(dn); | ||
760 | } | ||
761 | |||
762 | void init_extent_cache_info(struct f2fs_sb_info *sbi) | ||
763 | { | ||
764 | INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO); | ||
765 | init_rwsem(&sbi->extent_tree_lock); | ||
766 | INIT_LIST_HEAD(&sbi->extent_list); | ||
767 | spin_lock_init(&sbi->extent_lock); | ||
768 | sbi->total_ext_tree = 0; | ||
769 | atomic_set(&sbi->total_ext_node, 0); | ||
770 | } | ||
771 | |||
772 | int __init create_extent_cache(void) | ||
773 | { | ||
774 | extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", | ||
775 | sizeof(struct extent_tree)); | ||
776 | if (!extent_tree_slab) | ||
777 | return -ENOMEM; | ||
778 | extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node", | ||
779 | sizeof(struct extent_node)); | ||
780 | if (!extent_node_slab) { | ||
781 | kmem_cache_destroy(extent_tree_slab); | ||
782 | return -ENOMEM; | ||
783 | } | ||
784 | return 0; | ||
785 | } | ||
786 | |||
787 | void destroy_extent_cache(void) | ||
788 | { | ||
789 | kmem_cache_destroy(extent_node_slab); | ||
790 | kmem_cache_destroy(extent_tree_slab); | ||
791 | } | ||
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a8327ed73898..f1a90ffd7cad 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/magic.h> | 19 | #include <linux/magic.h> |
20 | #include <linux/kobject.h> | 20 | #include <linux/kobject.h> |
21 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
22 | #include <linux/bio.h> | ||
22 | 23 | ||
23 | #ifdef CONFIG_F2FS_CHECK_FS | 24 | #ifdef CONFIG_F2FS_CHECK_FS |
24 | #define f2fs_bug_on(sbi, condition) BUG_ON(condition) | 25 | #define f2fs_bug_on(sbi, condition) BUG_ON(condition) |
@@ -228,6 +229,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, | |||
228 | #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) | 229 | #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) |
229 | #define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) | 230 | #define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) |
230 | #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) | 231 | #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) |
232 | #define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6) | ||
231 | 233 | ||
232 | #define F2FS_IOC_SET_ENCRYPTION_POLICY \ | 234 | #define F2FS_IOC_SET_ENCRYPTION_POLICY \ |
233 | _IOR('f', 19, struct f2fs_encryption_policy) | 235 | _IOR('f', 19, struct f2fs_encryption_policy) |
@@ -320,7 +322,7 @@ enum { | |||
320 | */ | 322 | */ |
321 | }; | 323 | }; |
322 | 324 | ||
323 | #define F2FS_LINK_MAX 32000 /* maximum link count per file */ | 325 | #define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */ |
324 | 326 | ||
325 | #define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ | 327 | #define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ |
326 | 328 | ||
@@ -349,6 +351,7 @@ struct extent_tree { | |||
349 | nid_t ino; /* inode number */ | 351 | nid_t ino; /* inode number */ |
350 | struct rb_root root; /* root of extent info rb-tree */ | 352 | struct rb_root root; /* root of extent info rb-tree */ |
351 | struct extent_node *cached_en; /* recently accessed extent node */ | 353 | struct extent_node *cached_en; /* recently accessed extent node */ |
354 | struct extent_info largest; /* largested extent info */ | ||
352 | rwlock_t lock; /* protect extent info rb-tree */ | 355 | rwlock_t lock; /* protect extent info rb-tree */ |
353 | atomic_t refcount; /* reference count of rb-tree */ | 356 | atomic_t refcount; /* reference count of rb-tree */ |
354 | unsigned int count; /* # of extent node in rb-tree*/ | 357 | unsigned int count; /* # of extent node in rb-tree*/ |
@@ -372,6 +375,12 @@ struct f2fs_map_blocks { | |||
372 | unsigned int m_flags; | 375 | unsigned int m_flags; |
373 | }; | 376 | }; |
374 | 377 | ||
378 | /* for flag in get_data_block */ | ||
379 | #define F2FS_GET_BLOCK_READ 0 | ||
380 | #define F2FS_GET_BLOCK_DIO 1 | ||
381 | #define F2FS_GET_BLOCK_FIEMAP 2 | ||
382 | #define F2FS_GET_BLOCK_BMAP 3 | ||
383 | |||
375 | /* | 384 | /* |
376 | * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. | 385 | * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. |
377 | */ | 386 | */ |
@@ -420,14 +429,13 @@ struct f2fs_inode_info { | |||
420 | unsigned int clevel; /* maximum level of given file name */ | 429 | unsigned int clevel; /* maximum level of given file name */ |
421 | nid_t i_xattr_nid; /* node id that contains xattrs */ | 430 | nid_t i_xattr_nid; /* node id that contains xattrs */ |
422 | unsigned long long xattr_ver; /* cp version of xattr modification */ | 431 | unsigned long long xattr_ver; /* cp version of xattr modification */ |
423 | struct extent_info ext; /* in-memory extent cache entry */ | ||
424 | rwlock_t ext_lock; /* rwlock for single extent cache */ | ||
425 | struct inode_entry *dirty_dir; /* the pointer of dirty dir */ | 432 | struct inode_entry *dirty_dir; /* the pointer of dirty dir */ |
426 | 433 | ||
427 | struct radix_tree_root inmem_root; /* radix tree for inmem pages */ | ||
428 | struct list_head inmem_pages; /* inmemory pages managed by f2fs */ | 434 | struct list_head inmem_pages; /* inmemory pages managed by f2fs */ |
429 | struct mutex inmem_lock; /* lock for inmemory pages */ | 435 | struct mutex inmem_lock; /* lock for inmemory pages */ |
430 | 436 | ||
437 | struct extent_tree *extent_tree; /* cached extent_tree entry */ | ||
438 | |||
431 | #ifdef CONFIG_F2FS_FS_ENCRYPTION | 439 | #ifdef CONFIG_F2FS_FS_ENCRYPTION |
432 | /* Encryption params */ | 440 | /* Encryption params */ |
433 | struct f2fs_crypt_info *i_crypt_info; | 441 | struct f2fs_crypt_info *i_crypt_info; |
@@ -779,7 +787,11 @@ struct f2fs_sb_info { | |||
779 | unsigned int segment_count[2]; /* # of allocated segments */ | 787 | unsigned int segment_count[2]; /* # of allocated segments */ |
780 | unsigned int block_count[2]; /* # of allocated blocks */ | 788 | unsigned int block_count[2]; /* # of allocated blocks */ |
781 | atomic_t inplace_count; /* # of inplace update */ | 789 | atomic_t inplace_count; /* # of inplace update */ |
782 | int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ | 790 | atomic_t total_hit_ext; /* # of lookup extent cache */ |
791 | atomic_t read_hit_rbtree; /* # of hit rbtree extent node */ | ||
792 | atomic_t read_hit_largest; /* # of hit largest extent node */ | ||
793 | atomic_t read_hit_cached; /* # of hit cached extent node */ | ||
794 | atomic_t inline_xattr; /* # of inline_xattr inodes */ | ||
783 | atomic_t inline_inode; /* # of inline_data inodes */ | 795 | atomic_t inline_inode; /* # of inline_data inodes */ |
784 | atomic_t inline_dir; /* # of inline_dentry inodes */ | 796 | atomic_t inline_dir; /* # of inline_dentry inodes */ |
785 | int bg_gc; /* background gc calls */ | 797 | int bg_gc; /* background gc calls */ |
@@ -791,6 +803,11 @@ struct f2fs_sb_info { | |||
791 | /* For sysfs suppport */ | 803 | /* For sysfs suppport */ |
792 | struct kobject s_kobj; | 804 | struct kobject s_kobj; |
793 | struct completion s_kobj_unregister; | 805 | struct completion s_kobj_unregister; |
806 | |||
807 | /* For shrinker support */ | ||
808 | struct list_head s_list; | ||
809 | struct mutex umount_mutex; | ||
810 | unsigned int shrinker_run_no; | ||
794 | }; | 811 | }; |
795 | 812 | ||
796 | /* | 813 | /* |
@@ -1039,7 +1056,8 @@ static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) | |||
1039 | 1056 | ||
1040 | static inline void inode_dec_dirty_pages(struct inode *inode) | 1057 | static inline void inode_dec_dirty_pages(struct inode *inode) |
1041 | { | 1058 | { |
1042 | if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) | 1059 | if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && |
1060 | !S_ISLNK(inode->i_mode)) | ||
1043 | return; | 1061 | return; |
1044 | 1062 | ||
1045 | atomic_dec(&F2FS_I(inode)->dirty_pages); | 1063 | atomic_dec(&F2FS_I(inode)->dirty_pages); |
@@ -1234,16 +1252,24 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, | |||
1234 | gfp_t flags) | 1252 | gfp_t flags) |
1235 | { | 1253 | { |
1236 | void *entry; | 1254 | void *entry; |
1237 | retry: | ||
1238 | entry = kmem_cache_alloc(cachep, flags); | ||
1239 | if (!entry) { | ||
1240 | cond_resched(); | ||
1241 | goto retry; | ||
1242 | } | ||
1243 | 1255 | ||
1256 | entry = kmem_cache_alloc(cachep, flags); | ||
1257 | if (!entry) | ||
1258 | entry = kmem_cache_alloc(cachep, flags | __GFP_NOFAIL); | ||
1244 | return entry; | 1259 | return entry; |
1245 | } | 1260 | } |
1246 | 1261 | ||
1262 | static inline struct bio *f2fs_bio_alloc(int npages) | ||
1263 | { | ||
1264 | struct bio *bio; | ||
1265 | |||
1266 | /* No failure on bio allocation */ | ||
1267 | bio = bio_alloc(GFP_NOIO, npages); | ||
1268 | if (!bio) | ||
1269 | bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); | ||
1270 | return bio; | ||
1271 | } | ||
1272 | |||
1247 | static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, | 1273 | static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, |
1248 | unsigned long index, void *item) | 1274 | unsigned long index, void *item) |
1249 | { | 1275 | { |
@@ -1342,6 +1368,7 @@ enum { | |||
1342 | FI_INC_LINK, /* need to increment i_nlink */ | 1368 | FI_INC_LINK, /* need to increment i_nlink */ |
1343 | FI_ACL_MODE, /* indicate acl mode */ | 1369 | FI_ACL_MODE, /* indicate acl mode */ |
1344 | FI_NO_ALLOC, /* should not allocate any blocks */ | 1370 | FI_NO_ALLOC, /* should not allocate any blocks */ |
1371 | FI_FREE_NID, /* free allocated nide */ | ||
1345 | FI_UPDATE_DIR, /* should update inode block for consistency */ | 1372 | FI_UPDATE_DIR, /* should update inode block for consistency */ |
1346 | FI_DELAY_IPUT, /* used for the recovery */ | 1373 | FI_DELAY_IPUT, /* used for the recovery */ |
1347 | FI_NO_EXTENT, /* not to use the extent cache */ | 1374 | FI_NO_EXTENT, /* not to use the extent cache */ |
@@ -1541,6 +1568,17 @@ static inline bool is_dot_dotdot(const struct qstr *str) | |||
1541 | return false; | 1568 | return false; |
1542 | } | 1569 | } |
1543 | 1570 | ||
1571 | static inline bool f2fs_may_extent_tree(struct inode *inode) | ||
1572 | { | ||
1573 | mode_t mode = inode->i_mode; | ||
1574 | |||
1575 | if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) || | ||
1576 | is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) | ||
1577 | return false; | ||
1578 | |||
1579 | return S_ISREG(mode); | ||
1580 | } | ||
1581 | |||
1544 | #define get_inode_mode(i) \ | 1582 | #define get_inode_mode(i) \ |
1545 | ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ | 1583 | ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ |
1546 | (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) | 1584 | (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) |
@@ -1557,7 +1595,7 @@ static inline bool is_dot_dotdot(const struct qstr *str) | |||
1557 | int f2fs_sync_file(struct file *, loff_t, loff_t, int); | 1595 | int f2fs_sync_file(struct file *, loff_t, loff_t, int); |
1558 | void truncate_data_blocks(struct dnode_of_data *); | 1596 | void truncate_data_blocks(struct dnode_of_data *); |
1559 | int truncate_blocks(struct inode *, u64, bool); | 1597 | int truncate_blocks(struct inode *, u64, bool); |
1560 | void f2fs_truncate(struct inode *); | 1598 | int f2fs_truncate(struct inode *, bool); |
1561 | int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); | 1599 | int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); |
1562 | int f2fs_setattr(struct dentry *, struct iattr *); | 1600 | int f2fs_setattr(struct dentry *, struct iattr *); |
1563 | int truncate_hole(struct inode *, pgoff_t, pgoff_t); | 1601 | int truncate_hole(struct inode *, pgoff_t, pgoff_t); |
@@ -1649,7 +1687,7 @@ int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); | |||
1649 | int truncate_inode_blocks(struct inode *, pgoff_t); | 1687 | int truncate_inode_blocks(struct inode *, pgoff_t); |
1650 | int truncate_xattr_node(struct inode *, struct page *); | 1688 | int truncate_xattr_node(struct inode *, struct page *); |
1651 | int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); | 1689 | int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); |
1652 | void remove_inode_page(struct inode *); | 1690 | int remove_inode_page(struct inode *); |
1653 | struct page *new_inode_page(struct inode *); | 1691 | struct page *new_inode_page(struct inode *); |
1654 | struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); | 1692 | struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); |
1655 | void ra_node_page(struct f2fs_sb_info *, nid_t); | 1693 | void ra_node_page(struct f2fs_sb_info *, nid_t); |
@@ -1660,6 +1698,7 @@ int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *); | |||
1660 | bool alloc_nid(struct f2fs_sb_info *, nid_t *); | 1698 | bool alloc_nid(struct f2fs_sb_info *, nid_t *); |
1661 | void alloc_nid_done(struct f2fs_sb_info *, nid_t); | 1699 | void alloc_nid_done(struct f2fs_sb_info *, nid_t); |
1662 | void alloc_nid_failed(struct f2fs_sb_info *, nid_t); | 1700 | void alloc_nid_failed(struct f2fs_sb_info *, nid_t); |
1701 | int try_to_free_nids(struct f2fs_sb_info *, int); | ||
1663 | void recover_inline_xattr(struct inode *, struct page *); | 1702 | void recover_inline_xattr(struct inode *, struct page *); |
1664 | void recover_xattr_data(struct inode *, struct page *, block_t); | 1703 | void recover_xattr_data(struct inode *, struct page *, block_t); |
1665 | int recover_inode_page(struct f2fs_sb_info *, struct page *); | 1704 | int recover_inode_page(struct f2fs_sb_info *, struct page *); |
@@ -1675,7 +1714,7 @@ void destroy_node_manager_caches(void); | |||
1675 | * segment.c | 1714 | * segment.c |
1676 | */ | 1715 | */ |
1677 | void register_inmem_page(struct inode *, struct page *); | 1716 | void register_inmem_page(struct inode *, struct page *); |
1678 | void commit_inmem_pages(struct inode *, bool); | 1717 | int commit_inmem_pages(struct inode *, bool); |
1679 | void f2fs_balance_fs(struct f2fs_sb_info *); | 1718 | void f2fs_balance_fs(struct f2fs_sb_info *); |
1680 | void f2fs_balance_fs_bg(struct f2fs_sb_info *); | 1719 | void f2fs_balance_fs_bg(struct f2fs_sb_info *); |
1681 | int f2fs_issue_flush(struct f2fs_sb_info *); | 1720 | int f2fs_issue_flush(struct f2fs_sb_info *); |
@@ -1685,7 +1724,7 @@ void invalidate_blocks(struct f2fs_sb_info *, block_t); | |||
1685 | void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); | 1724 | void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); |
1686 | void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *); | 1725 | void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *); |
1687 | void release_discard_addrs(struct f2fs_sb_info *); | 1726 | void release_discard_addrs(struct f2fs_sb_info *); |
1688 | void discard_next_dnode(struct f2fs_sb_info *, block_t); | 1727 | bool discard_next_dnode(struct f2fs_sb_info *, block_t); |
1689 | int npages_for_summary_flush(struct f2fs_sb_info *, bool); | 1728 | int npages_for_summary_flush(struct f2fs_sb_info *, bool); |
1690 | void allocate_new_segments(struct f2fs_sb_info *); | 1729 | void allocate_new_segments(struct f2fs_sb_info *); |
1691 | int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); | 1730 | int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); |
@@ -1727,7 +1766,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *); | |||
1727 | void release_orphan_inode(struct f2fs_sb_info *); | 1766 | void release_orphan_inode(struct f2fs_sb_info *); |
1728 | void add_orphan_inode(struct f2fs_sb_info *, nid_t); | 1767 | void add_orphan_inode(struct f2fs_sb_info *, nid_t); |
1729 | void remove_orphan_inode(struct f2fs_sb_info *, nid_t); | 1768 | void remove_orphan_inode(struct f2fs_sb_info *, nid_t); |
1730 | void recover_orphan_inodes(struct f2fs_sb_info *); | 1769 | int recover_orphan_inodes(struct f2fs_sb_info *); |
1731 | int get_valid_checkpoint(struct f2fs_sb_info *); | 1770 | int get_valid_checkpoint(struct f2fs_sb_info *); |
1732 | void update_dirty_page(struct inode *, struct page *); | 1771 | void update_dirty_page(struct inode *, struct page *); |
1733 | void add_dirty_dir_inode(struct inode *); | 1772 | void add_dirty_dir_inode(struct inode *); |
@@ -1746,21 +1785,14 @@ int f2fs_submit_page_bio(struct f2fs_io_info *); | |||
1746 | void f2fs_submit_page_mbio(struct f2fs_io_info *); | 1785 | void f2fs_submit_page_mbio(struct f2fs_io_info *); |
1747 | void set_data_blkaddr(struct dnode_of_data *); | 1786 | void set_data_blkaddr(struct dnode_of_data *); |
1748 | int reserve_new_block(struct dnode_of_data *); | 1787 | int reserve_new_block(struct dnode_of_data *); |
1788 | int f2fs_get_block(struct dnode_of_data *, pgoff_t); | ||
1749 | int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); | 1789 | int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); |
1750 | void f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); | ||
1751 | void f2fs_destroy_extent_tree(struct inode *); | ||
1752 | void f2fs_init_extent_cache(struct inode *, struct f2fs_extent *); | ||
1753 | void f2fs_update_extent_cache(struct dnode_of_data *); | ||
1754 | void f2fs_preserve_extent_tree(struct inode *); | ||
1755 | struct page *get_read_data_page(struct inode *, pgoff_t, int); | 1790 | struct page *get_read_data_page(struct inode *, pgoff_t, int); |
1756 | struct page *find_data_page(struct inode *, pgoff_t); | 1791 | struct page *find_data_page(struct inode *, pgoff_t); |
1757 | struct page *get_lock_data_page(struct inode *, pgoff_t); | 1792 | struct page *get_lock_data_page(struct inode *, pgoff_t); |
1758 | struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); | 1793 | struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); |
1759 | int do_write_data_page(struct f2fs_io_info *); | 1794 | int do_write_data_page(struct f2fs_io_info *); |
1760 | int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); | 1795 | int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); |
1761 | void init_extent_cache_info(struct f2fs_sb_info *); | ||
1762 | int __init create_extent_cache(void); | ||
1763 | void destroy_extent_cache(void); | ||
1764 | void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); | 1796 | void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); |
1765 | int f2fs_release_page(struct page *, gfp_t); | 1797 | int f2fs_release_page(struct page *, gfp_t); |
1766 | 1798 | ||
@@ -1788,11 +1820,13 @@ struct f2fs_stat_info { | |||
1788 | struct f2fs_sb_info *sbi; | 1820 | struct f2fs_sb_info *sbi; |
1789 | int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; | 1821 | int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; |
1790 | int main_area_segs, main_area_sections, main_area_zones; | 1822 | int main_area_segs, main_area_sections, main_area_zones; |
1791 | int hit_ext, total_ext, ext_tree, ext_node; | 1823 | int hit_largest, hit_cached, hit_rbtree, hit_total, total_ext; |
1824 | int ext_tree, ext_node; | ||
1792 | int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; | 1825 | int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; |
1793 | int nats, dirty_nats, sits, dirty_sits, fnids; | 1826 | int nats, dirty_nats, sits, dirty_sits, fnids; |
1794 | int total_count, utilization; | 1827 | int total_count, utilization; |
1795 | int bg_gc, inline_inode, inline_dir, inmem_pages, wb_pages; | 1828 | int bg_gc, inmem_pages, wb_pages; |
1829 | int inline_xattr, inline_inode, inline_dir; | ||
1796 | unsigned int valid_count, valid_node_count, valid_inode_count; | 1830 | unsigned int valid_count, valid_node_count, valid_inode_count; |
1797 | unsigned int bimodal, avg_vblocks; | 1831 | unsigned int bimodal, avg_vblocks; |
1798 | int util_free, util_valid, util_invalid; | 1832 | int util_free, util_valid, util_invalid; |
@@ -1823,8 +1857,20 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) | |||
1823 | #define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) | 1857 | #define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) |
1824 | #define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++) | 1858 | #define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++) |
1825 | #define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--) | 1859 | #define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--) |
1826 | #define stat_inc_total_hit(sb) ((F2FS_SB(sb))->total_hit_ext++) | 1860 | #define stat_inc_total_hit(sbi) (atomic_inc(&(sbi)->total_hit_ext)) |
1827 | #define stat_inc_read_hit(sb) ((F2FS_SB(sb))->read_hit_ext++) | 1861 | #define stat_inc_rbtree_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_rbtree)) |
1862 | #define stat_inc_largest_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_largest)) | ||
1863 | #define stat_inc_cached_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_cached)) | ||
1864 | #define stat_inc_inline_xattr(inode) \ | ||
1865 | do { \ | ||
1866 | if (f2fs_has_inline_xattr(inode)) \ | ||
1867 | (atomic_inc(&F2FS_I_SB(inode)->inline_xattr)); \ | ||
1868 | } while (0) | ||
1869 | #define stat_dec_inline_xattr(inode) \ | ||
1870 | do { \ | ||
1871 | if (f2fs_has_inline_xattr(inode)) \ | ||
1872 | (atomic_dec(&F2FS_I_SB(inode)->inline_xattr)); \ | ||
1873 | } while (0) | ||
1828 | #define stat_inc_inline_inode(inode) \ | 1874 | #define stat_inc_inline_inode(inode) \ |
1829 | do { \ | 1875 | do { \ |
1830 | if (f2fs_has_inline_data(inode)) \ | 1876 | if (f2fs_has_inline_data(inode)) \ |
@@ -1894,7 +1940,11 @@ void f2fs_destroy_root_stats(void); | |||
1894 | #define stat_inc_dirty_dir(sbi) | 1940 | #define stat_inc_dirty_dir(sbi) |
1895 | #define stat_dec_dirty_dir(sbi) | 1941 | #define stat_dec_dirty_dir(sbi) |
1896 | #define stat_inc_total_hit(sb) | 1942 | #define stat_inc_total_hit(sb) |
1897 | #define stat_inc_read_hit(sb) | 1943 | #define stat_inc_rbtree_node_hit(sb) |
1944 | #define stat_inc_largest_node_hit(sbi) | ||
1945 | #define stat_inc_cached_node_hit(sbi) | ||
1946 | #define stat_inc_inline_xattr(inode) | ||
1947 | #define stat_dec_inline_xattr(inode) | ||
1898 | #define stat_inc_inline_inode(inode) | 1948 | #define stat_inc_inline_inode(inode) |
1899 | #define stat_dec_inline_inode(inode) | 1949 | #define stat_dec_inline_inode(inode) |
1900 | #define stat_inc_inline_dir(inode) | 1950 | #define stat_inc_inline_dir(inode) |
@@ -1950,6 +2000,30 @@ int f2fs_read_inline_dir(struct file *, struct dir_context *, | |||
1950 | struct f2fs_str *); | 2000 | struct f2fs_str *); |
1951 | 2001 | ||
1952 | /* | 2002 | /* |
2003 | * shrinker.c | ||
2004 | */ | ||
2005 | unsigned long f2fs_shrink_count(struct shrinker *, struct shrink_control *); | ||
2006 | unsigned long f2fs_shrink_scan(struct shrinker *, struct shrink_control *); | ||
2007 | void f2fs_join_shrinker(struct f2fs_sb_info *); | ||
2008 | void f2fs_leave_shrinker(struct f2fs_sb_info *); | ||
2009 | |||
2010 | /* | ||
2011 | * extent_cache.c | ||
2012 | */ | ||
2013 | unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); | ||
2014 | void f2fs_drop_largest_extent(struct inode *, pgoff_t); | ||
2015 | void f2fs_init_extent_tree(struct inode *, struct f2fs_extent *); | ||
2016 | unsigned int f2fs_destroy_extent_node(struct inode *); | ||
2017 | void f2fs_destroy_extent_tree(struct inode *); | ||
2018 | bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *); | ||
2019 | void f2fs_update_extent_cache(struct dnode_of_data *); | ||
2020 | void f2fs_update_extent_cache_range(struct dnode_of_data *dn, | ||
2021 | pgoff_t, block_t, unsigned int); | ||
2022 | void init_extent_cache_info(struct f2fs_sb_info *); | ||
2023 | int __init create_extent_cache(void); | ||
2024 | void destroy_extent_cache(void); | ||
2025 | |||
2026 | /* | ||
1953 | * crypto support | 2027 | * crypto support |
1954 | */ | 2028 | */ |
1955 | static inline int f2fs_encrypted_inode(struct inode *inode) | 2029 | static inline int f2fs_encrypted_inode(struct inode *inode) |
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b0f38c3b37f4..8120f8685141 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include "segment.h" | 27 | #include "segment.h" |
28 | #include "xattr.h" | 28 | #include "xattr.h" |
29 | #include "acl.h" | 29 | #include "acl.h" |
30 | #include "gc.h" | ||
30 | #include "trace.h" | 31 | #include "trace.h" |
31 | #include <trace/events/f2fs.h> | 32 | #include <trace/events/f2fs.h> |
32 | 33 | ||
@@ -85,6 +86,8 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, | |||
85 | mapped: | 86 | mapped: |
86 | /* fill the page */ | 87 | /* fill the page */ |
87 | f2fs_wait_on_page_writeback(page, DATA); | 88 | f2fs_wait_on_page_writeback(page, DATA); |
89 | /* if gced page is attached, don't write to cold segment */ | ||
90 | clear_cold_data(page); | ||
88 | out: | 91 | out: |
89 | sb_end_pagefault(inode->i_sb); | 92 | sb_end_pagefault(inode->i_sb); |
90 | return block_page_mkwrite_return(err); | 93 | return block_page_mkwrite_return(err); |
@@ -203,8 +206,8 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
203 | } | 206 | } |
204 | 207 | ||
205 | /* if the inode is dirty, let's recover all the time */ | 208 | /* if the inode is dirty, let's recover all the time */ |
206 | if (!datasync && is_inode_flag_set(fi, FI_DIRTY_INODE)) { | 209 | if (!datasync) { |
207 | update_inode_page(inode); | 210 | f2fs_write_inode(inode, NULL); |
208 | goto go_write; | 211 | goto go_write; |
209 | } | 212 | } |
210 | 213 | ||
@@ -442,9 +445,9 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) | |||
442 | 445 | ||
443 | int truncate_data_blocks_range(struct dnode_of_data *dn, int count) | 446 | int truncate_data_blocks_range(struct dnode_of_data *dn, int count) |
444 | { | 447 | { |
445 | int nr_free = 0, ofs = dn->ofs_in_node; | ||
446 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); | 448 | struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); |
447 | struct f2fs_node *raw_node; | 449 | struct f2fs_node *raw_node; |
450 | int nr_free = 0, ofs = dn->ofs_in_node, len = count; | ||
448 | __le32 *addr; | 451 | __le32 *addr; |
449 | 452 | ||
450 | raw_node = F2FS_NODE(dn->node_page); | 453 | raw_node = F2FS_NODE(dn->node_page); |
@@ -457,14 +460,22 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) | |||
457 | 460 | ||
458 | dn->data_blkaddr = NULL_ADDR; | 461 | dn->data_blkaddr = NULL_ADDR; |
459 | set_data_blkaddr(dn); | 462 | set_data_blkaddr(dn); |
460 | f2fs_update_extent_cache(dn); | ||
461 | invalidate_blocks(sbi, blkaddr); | 463 | invalidate_blocks(sbi, blkaddr); |
462 | if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) | 464 | if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) |
463 | clear_inode_flag(F2FS_I(dn->inode), | 465 | clear_inode_flag(F2FS_I(dn->inode), |
464 | FI_FIRST_BLOCK_WRITTEN); | 466 | FI_FIRST_BLOCK_WRITTEN); |
465 | nr_free++; | 467 | nr_free++; |
466 | } | 468 | } |
469 | |||
467 | if (nr_free) { | 470 | if (nr_free) { |
471 | pgoff_t fofs; | ||
472 | /* | ||
473 | * once we invalidate valid blkaddr in range [ofs, ofs + count], | ||
474 | * we will invalidate all blkaddr in the whole range. | ||
475 | */ | ||
476 | fofs = start_bidx_of_node(ofs_of_node(dn->node_page), | ||
477 | F2FS_I(dn->inode)) + ofs; | ||
478 | f2fs_update_extent_cache_range(dn, fofs, 0, len); | ||
468 | dec_valid_block_count(sbi, dn->inode, nr_free); | 479 | dec_valid_block_count(sbi, dn->inode, nr_free); |
469 | set_page_dirty(dn->node_page); | 480 | set_page_dirty(dn->node_page); |
470 | sync_inode_page(dn); | 481 | sync_inode_page(dn); |
@@ -576,24 +587,30 @@ out: | |||
576 | return err; | 587 | return err; |
577 | } | 588 | } |
578 | 589 | ||
579 | void f2fs_truncate(struct inode *inode) | 590 | int f2fs_truncate(struct inode *inode, bool lock) |
580 | { | 591 | { |
592 | int err; | ||
593 | |||
581 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 594 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
582 | S_ISLNK(inode->i_mode))) | 595 | S_ISLNK(inode->i_mode))) |
583 | return; | 596 | return 0; |
584 | 597 | ||
585 | trace_f2fs_truncate(inode); | 598 | trace_f2fs_truncate(inode); |
586 | 599 | ||
587 | /* we should check inline_data size */ | 600 | /* we should check inline_data size */ |
588 | if (f2fs_has_inline_data(inode) && !f2fs_may_inline_data(inode)) { | 601 | if (f2fs_has_inline_data(inode) && !f2fs_may_inline_data(inode)) { |
589 | if (f2fs_convert_inline_inode(inode)) | 602 | err = f2fs_convert_inline_inode(inode); |
590 | return; | 603 | if (err) |
604 | return err; | ||
591 | } | 605 | } |
592 | 606 | ||
593 | if (!truncate_blocks(inode, i_size_read(inode), true)) { | 607 | err = truncate_blocks(inode, i_size_read(inode), lock); |
594 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 608 | if (err) |
595 | mark_inode_dirty(inode); | 609 | return err; |
596 | } | 610 | |
611 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
612 | mark_inode_dirty(inode); | ||
613 | return 0; | ||
597 | } | 614 | } |
598 | 615 | ||
599 | int f2fs_getattr(struct vfsmount *mnt, | 616 | int f2fs_getattr(struct vfsmount *mnt, |
@@ -653,7 +670,9 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) | |||
653 | 670 | ||
654 | if (attr->ia_size <= i_size_read(inode)) { | 671 | if (attr->ia_size <= i_size_read(inode)) { |
655 | truncate_setsize(inode, attr->ia_size); | 672 | truncate_setsize(inode, attr->ia_size); |
656 | f2fs_truncate(inode); | 673 | err = f2fs_truncate(inode, true); |
674 | if (err) | ||
675 | return err; | ||
657 | f2fs_balance_fs(F2FS_I_SB(inode)); | 676 | f2fs_balance_fs(F2FS_I_SB(inode)); |
658 | } else { | 677 | } else { |
659 | /* | 678 | /* |
@@ -692,14 +711,14 @@ const struct inode_operations f2fs_file_inode_operations = { | |||
692 | .fiemap = f2fs_fiemap, | 711 | .fiemap = f2fs_fiemap, |
693 | }; | 712 | }; |
694 | 713 | ||
695 | static void fill_zero(struct inode *inode, pgoff_t index, | 714 | static int fill_zero(struct inode *inode, pgoff_t index, |
696 | loff_t start, loff_t len) | 715 | loff_t start, loff_t len) |
697 | { | 716 | { |
698 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | 717 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
699 | struct page *page; | 718 | struct page *page; |
700 | 719 | ||
701 | if (!len) | 720 | if (!len) |
702 | return; | 721 | return 0; |
703 | 722 | ||
704 | f2fs_balance_fs(sbi); | 723 | f2fs_balance_fs(sbi); |
705 | 724 | ||
@@ -707,12 +726,14 @@ static void fill_zero(struct inode *inode, pgoff_t index, | |||
707 | page = get_new_data_page(inode, NULL, index, false); | 726 | page = get_new_data_page(inode, NULL, index, false); |
708 | f2fs_unlock_op(sbi); | 727 | f2fs_unlock_op(sbi); |
709 | 728 | ||
710 | if (!IS_ERR(page)) { | 729 | if (IS_ERR(page)) |
711 | f2fs_wait_on_page_writeback(page, DATA); | 730 | return PTR_ERR(page); |
712 | zero_user(page, start, len); | 731 | |
713 | set_page_dirty(page); | 732 | f2fs_wait_on_page_writeback(page, DATA); |
714 | f2fs_put_page(page, 1); | 733 | zero_user(page, start, len); |
715 | } | 734 | set_page_dirty(page); |
735 | f2fs_put_page(page, 1); | ||
736 | return 0; | ||
716 | } | 737 | } |
717 | 738 | ||
718 | int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) | 739 | int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) |
@@ -760,14 +781,22 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
760 | off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); | 781 | off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); |
761 | 782 | ||
762 | if (pg_start == pg_end) { | 783 | if (pg_start == pg_end) { |
763 | fill_zero(inode, pg_start, off_start, | 784 | ret = fill_zero(inode, pg_start, off_start, |
764 | off_end - off_start); | 785 | off_end - off_start); |
786 | if (ret) | ||
787 | return ret; | ||
765 | } else { | 788 | } else { |
766 | if (off_start) | 789 | if (off_start) { |
767 | fill_zero(inode, pg_start++, off_start, | 790 | ret = fill_zero(inode, pg_start++, off_start, |
768 | PAGE_CACHE_SIZE - off_start); | 791 | PAGE_CACHE_SIZE - off_start); |
769 | if (off_end) | 792 | if (ret) |
770 | fill_zero(inode, pg_end, 0, off_end); | 793 | return ret; |
794 | } | ||
795 | if (off_end) { | ||
796 | ret = fill_zero(inode, pg_end, 0, off_end); | ||
797 | if (ret) | ||
798 | return ret; | ||
799 | } | ||
771 | 800 | ||
772 | if (pg_start < pg_end) { | 801 | if (pg_start < pg_end) { |
773 | struct address_space *mapping = inode->i_mapping; | 802 | struct address_space *mapping = inode->i_mapping; |
@@ -797,11 +826,11 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end) | |||
797 | pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; | 826 | pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; |
798 | int ret = 0; | 827 | int ret = 0; |
799 | 828 | ||
800 | f2fs_lock_op(sbi); | ||
801 | |||
802 | for (; end < nrpages; start++, end++) { | 829 | for (; end < nrpages; start++, end++) { |
803 | block_t new_addr, old_addr; | 830 | block_t new_addr, old_addr; |
804 | 831 | ||
832 | f2fs_lock_op(sbi); | ||
833 | |||
805 | set_new_dnode(&dn, inode, NULL, NULL, 0); | 834 | set_new_dnode(&dn, inode, NULL, NULL, 0); |
806 | ret = get_dnode_of_data(&dn, end, LOOKUP_NODE_RA); | 835 | ret = get_dnode_of_data(&dn, end, LOOKUP_NODE_RA); |
807 | if (ret && ret != -ENOENT) { | 836 | if (ret && ret != -ENOENT) { |
@@ -817,13 +846,16 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end) | |||
817 | if (new_addr == NULL_ADDR) { | 846 | if (new_addr == NULL_ADDR) { |
818 | set_new_dnode(&dn, inode, NULL, NULL, 0); | 847 | set_new_dnode(&dn, inode, NULL, NULL, 0); |
819 | ret = get_dnode_of_data(&dn, start, LOOKUP_NODE_RA); | 848 | ret = get_dnode_of_data(&dn, start, LOOKUP_NODE_RA); |
820 | if (ret && ret != -ENOENT) | 849 | if (ret && ret != -ENOENT) { |
821 | goto out; | 850 | goto out; |
822 | else if (ret == -ENOENT) | 851 | } else if (ret == -ENOENT) { |
852 | f2fs_unlock_op(sbi); | ||
823 | continue; | 853 | continue; |
854 | } | ||
824 | 855 | ||
825 | if (dn.data_blkaddr == NULL_ADDR) { | 856 | if (dn.data_blkaddr == NULL_ADDR) { |
826 | f2fs_put_dnode(&dn); | 857 | f2fs_put_dnode(&dn); |
858 | f2fs_unlock_op(sbi); | ||
827 | continue; | 859 | continue; |
828 | } else { | 860 | } else { |
829 | truncate_data_blocks_range(&dn, 1); | 861 | truncate_data_blocks_range(&dn, 1); |
@@ -862,8 +894,9 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end) | |||
862 | 894 | ||
863 | f2fs_put_dnode(&dn); | 895 | f2fs_put_dnode(&dn); |
864 | } | 896 | } |
897 | f2fs_unlock_op(sbi); | ||
865 | } | 898 | } |
866 | ret = 0; | 899 | return 0; |
867 | out: | 900 | out: |
868 | f2fs_unlock_op(sbi); | 901 | f2fs_unlock_op(sbi); |
869 | return ret; | 902 | return ret; |
@@ -885,6 +918,14 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) | |||
885 | if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) | 918 | if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) |
886 | return -EINVAL; | 919 | return -EINVAL; |
887 | 920 | ||
921 | f2fs_balance_fs(F2FS_I_SB(inode)); | ||
922 | |||
923 | if (f2fs_has_inline_data(inode)) { | ||
924 | ret = f2fs_convert_inline_inode(inode); | ||
925 | if (ret) | ||
926 | return ret; | ||
927 | } | ||
928 | |||
888 | pg_start = offset >> PAGE_CACHE_SHIFT; | 929 | pg_start = offset >> PAGE_CACHE_SHIFT; |
889 | pg_end = (offset + len) >> PAGE_CACHE_SHIFT; | 930 | pg_end = (offset + len) >> PAGE_CACHE_SHIFT; |
890 | 931 | ||
@@ -946,14 +987,21 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, | |||
946 | off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); | 987 | off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); |
947 | 988 | ||
948 | if (pg_start == pg_end) { | 989 | if (pg_start == pg_end) { |
949 | fill_zero(inode, pg_start, off_start, off_end - off_start); | 990 | ret = fill_zero(inode, pg_start, off_start, |
991 | off_end - off_start); | ||
992 | if (ret) | ||
993 | return ret; | ||
994 | |||
950 | if (offset + len > new_size) | 995 | if (offset + len > new_size) |
951 | new_size = offset + len; | 996 | new_size = offset + len; |
952 | new_size = max_t(loff_t, new_size, offset + len); | 997 | new_size = max_t(loff_t, new_size, offset + len); |
953 | } else { | 998 | } else { |
954 | if (off_start) { | 999 | if (off_start) { |
955 | fill_zero(inode, pg_start++, off_start, | 1000 | ret = fill_zero(inode, pg_start++, off_start, |
956 | PAGE_CACHE_SIZE - off_start); | 1001 | PAGE_CACHE_SIZE - off_start); |
1002 | if (ret) | ||
1003 | return ret; | ||
1004 | |||
957 | new_size = max_t(loff_t, new_size, | 1005 | new_size = max_t(loff_t, new_size, |
958 | pg_start << PAGE_CACHE_SHIFT); | 1006 | pg_start << PAGE_CACHE_SHIFT); |
959 | } | 1007 | } |
@@ -995,7 +1043,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, | |||
995 | } | 1043 | } |
996 | 1044 | ||
997 | if (off_end) { | 1045 | if (off_end) { |
998 | fill_zero(inode, pg_end, 0, off_end); | 1046 | ret = fill_zero(inode, pg_end, 0, off_end); |
1047 | if (ret) | ||
1048 | goto out; | ||
1049 | |||
999 | new_size = max_t(loff_t, new_size, offset + len); | 1050 | new_size = max_t(loff_t, new_size, offset + len); |
1000 | } | 1051 | } |
1001 | } | 1052 | } |
@@ -1033,6 +1084,12 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) | |||
1033 | 1084 | ||
1034 | f2fs_balance_fs(sbi); | 1085 | f2fs_balance_fs(sbi); |
1035 | 1086 | ||
1087 | if (f2fs_has_inline_data(inode)) { | ||
1088 | ret = f2fs_convert_inline_inode(inode); | ||
1089 | if (ret) | ||
1090 | return ret; | ||
1091 | } | ||
1092 | |||
1036 | ret = truncate_blocks(inode, i_size_read(inode), true); | 1093 | ret = truncate_blocks(inode, i_size_read(inode), true); |
1037 | if (ret) | 1094 | if (ret) |
1038 | return ret; | 1095 | return ret; |
@@ -1302,6 +1359,7 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg) | |||
1302 | static int f2fs_ioc_start_atomic_write(struct file *filp) | 1359 | static int f2fs_ioc_start_atomic_write(struct file *filp) |
1303 | { | 1360 | { |
1304 | struct inode *inode = file_inode(filp); | 1361 | struct inode *inode = file_inode(filp); |
1362 | int ret; | ||
1305 | 1363 | ||
1306 | if (!inode_owner_or_capable(inode)) | 1364 | if (!inode_owner_or_capable(inode)) |
1307 | return -EACCES; | 1365 | return -EACCES; |
@@ -1311,9 +1369,12 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) | |||
1311 | if (f2fs_is_atomic_file(inode)) | 1369 | if (f2fs_is_atomic_file(inode)) |
1312 | return 0; | 1370 | return 0; |
1313 | 1371 | ||
1314 | set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); | 1372 | ret = f2fs_convert_inline_inode(inode); |
1373 | if (ret) | ||
1374 | return ret; | ||
1315 | 1375 | ||
1316 | return f2fs_convert_inline_inode(inode); | 1376 | set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); |
1377 | return 0; | ||
1317 | } | 1378 | } |
1318 | 1379 | ||
1319 | static int f2fs_ioc_commit_atomic_write(struct file *filp) | 1380 | static int f2fs_ioc_commit_atomic_write(struct file *filp) |
@@ -1333,10 +1394,13 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) | |||
1333 | 1394 | ||
1334 | if (f2fs_is_atomic_file(inode)) { | 1395 | if (f2fs_is_atomic_file(inode)) { |
1335 | clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); | 1396 | clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); |
1336 | commit_inmem_pages(inode, false); | 1397 | ret = commit_inmem_pages(inode, false); |
1398 | if (ret) | ||
1399 | goto err_out; | ||
1337 | } | 1400 | } |
1338 | 1401 | ||
1339 | ret = f2fs_sync_file(filp, 0, LONG_MAX, 0); | 1402 | ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0); |
1403 | err_out: | ||
1340 | mnt_drop_write_file(filp); | 1404 | mnt_drop_write_file(filp); |
1341 | return ret; | 1405 | return ret; |
1342 | } | 1406 | } |
@@ -1344,6 +1408,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) | |||
1344 | static int f2fs_ioc_start_volatile_write(struct file *filp) | 1408 | static int f2fs_ioc_start_volatile_write(struct file *filp) |
1345 | { | 1409 | { |
1346 | struct inode *inode = file_inode(filp); | 1410 | struct inode *inode = file_inode(filp); |
1411 | int ret; | ||
1347 | 1412 | ||
1348 | if (!inode_owner_or_capable(inode)) | 1413 | if (!inode_owner_or_capable(inode)) |
1349 | return -EACCES; | 1414 | return -EACCES; |
@@ -1351,9 +1416,12 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) | |||
1351 | if (f2fs_is_volatile_file(inode)) | 1416 | if (f2fs_is_volatile_file(inode)) |
1352 | return 0; | 1417 | return 0; |
1353 | 1418 | ||
1354 | set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); | 1419 | ret = f2fs_convert_inline_inode(inode); |
1420 | if (ret) | ||
1421 | return ret; | ||
1355 | 1422 | ||
1356 | return f2fs_convert_inline_inode(inode); | 1423 | set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); |
1424 | return 0; | ||
1357 | } | 1425 | } |
1358 | 1426 | ||
1359 | static int f2fs_ioc_release_volatile_write(struct file *filp) | 1427 | static int f2fs_ioc_release_volatile_write(struct file *filp) |
@@ -1389,7 +1457,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) | |||
1389 | 1457 | ||
1390 | if (f2fs_is_atomic_file(inode)) { | 1458 | if (f2fs_is_atomic_file(inode)) { |
1391 | clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); | 1459 | clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); |
1392 | commit_inmem_pages(inode, false); | 1460 | commit_inmem_pages(inode, true); |
1393 | } | 1461 | } |
1394 | 1462 | ||
1395 | if (f2fs_is_volatile_file(inode)) | 1463 | if (f2fs_is_volatile_file(inode)) |
@@ -1544,6 +1612,35 @@ got_it: | |||
1544 | return 0; | 1612 | return 0; |
1545 | } | 1613 | } |
1546 | 1614 | ||
1615 | static int f2fs_ioc_gc(struct file *filp, unsigned long arg) | ||
1616 | { | ||
1617 | struct inode *inode = file_inode(filp); | ||
1618 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
1619 | __u32 i, count; | ||
1620 | |||
1621 | if (!capable(CAP_SYS_ADMIN)) | ||
1622 | return -EPERM; | ||
1623 | |||
1624 | if (get_user(count, (__u32 __user *)arg)) | ||
1625 | return -EFAULT; | ||
1626 | |||
1627 | if (!count || count > F2FS_BATCH_GC_MAX_NUM) | ||
1628 | return -EINVAL; | ||
1629 | |||
1630 | for (i = 0; i < count; i++) { | ||
1631 | if (!mutex_trylock(&sbi->gc_mutex)) | ||
1632 | break; | ||
1633 | |||
1634 | if (f2fs_gc(sbi)) | ||
1635 | break; | ||
1636 | } | ||
1637 | |||
1638 | if (put_user(i, (__u32 __user *)arg)) | ||
1639 | return -EFAULT; | ||
1640 | |||
1641 | return 0; | ||
1642 | } | ||
1643 | |||
1547 | long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 1644 | long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
1548 | { | 1645 | { |
1549 | switch (cmd) { | 1646 | switch (cmd) { |
@@ -1573,6 +1670,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
1573 | return f2fs_ioc_get_encryption_policy(filp, arg); | 1670 | return f2fs_ioc_get_encryption_policy(filp, arg); |
1574 | case F2FS_IOC_GET_ENCRYPTION_PWSALT: | 1671 | case F2FS_IOC_GET_ENCRYPTION_PWSALT: |
1575 | return f2fs_ioc_get_encryption_pwsalt(filp, arg); | 1672 | return f2fs_ioc_get_encryption_pwsalt(filp, arg); |
1673 | case F2FS_IOC_GARBAGE_COLLECT: | ||
1674 | return f2fs_ioc_gc(filp, arg); | ||
1576 | default: | 1675 | default: |
1577 | return -ENOTTY; | 1676 | return -ENOTTY; |
1578 | } | 1677 | } |
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 22fb5ef37966..782b8e72c094 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c | |||
@@ -391,23 +391,27 @@ static int check_valid_map(struct f2fs_sb_info *sbi, | |||
391 | * On validity, copy that node with cold status, otherwise (invalid node) | 391 | * On validity, copy that node with cold status, otherwise (invalid node) |
392 | * ignore that. | 392 | * ignore that. |
393 | */ | 393 | */ |
394 | static void gc_node_segment(struct f2fs_sb_info *sbi, | 394 | static int gc_node_segment(struct f2fs_sb_info *sbi, |
395 | struct f2fs_summary *sum, unsigned int segno, int gc_type) | 395 | struct f2fs_summary *sum, unsigned int segno, int gc_type) |
396 | { | 396 | { |
397 | bool initial = true; | 397 | bool initial = true; |
398 | struct f2fs_summary *entry; | 398 | struct f2fs_summary *entry; |
399 | block_t start_addr; | ||
399 | int off; | 400 | int off; |
400 | 401 | ||
402 | start_addr = START_BLOCK(sbi, segno); | ||
403 | |||
401 | next_step: | 404 | next_step: |
402 | entry = sum; | 405 | entry = sum; |
403 | 406 | ||
404 | for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { | 407 | for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { |
405 | nid_t nid = le32_to_cpu(entry->nid); | 408 | nid_t nid = le32_to_cpu(entry->nid); |
406 | struct page *node_page; | 409 | struct page *node_page; |
410 | struct node_info ni; | ||
407 | 411 | ||
408 | /* stop BG_GC if there is not enough free sections. */ | 412 | /* stop BG_GC if there is not enough free sections. */ |
409 | if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) | 413 | if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) |
410 | return; | 414 | return 0; |
411 | 415 | ||
412 | if (check_valid_map(sbi, segno, off) == 0) | 416 | if (check_valid_map(sbi, segno, off) == 0) |
413 | continue; | 417 | continue; |
@@ -426,6 +430,12 @@ next_step: | |||
426 | continue; | 430 | continue; |
427 | } | 431 | } |
428 | 432 | ||
433 | get_node_info(sbi, nid, &ni); | ||
434 | if (ni.blk_addr != start_addr + off) { | ||
435 | f2fs_put_page(node_page, 1); | ||
436 | continue; | ||
437 | } | ||
438 | |||
429 | /* set page dirty and write it */ | 439 | /* set page dirty and write it */ |
430 | if (gc_type == FG_GC) { | 440 | if (gc_type == FG_GC) { |
431 | f2fs_wait_on_page_writeback(node_page, NODE); | 441 | f2fs_wait_on_page_writeback(node_page, NODE); |
@@ -451,13 +461,11 @@ next_step: | |||
451 | }; | 461 | }; |
452 | sync_node_pages(sbi, 0, &wbc); | 462 | sync_node_pages(sbi, 0, &wbc); |
453 | 463 | ||
454 | /* | 464 | /* return 1 only if FG_GC succefully reclaimed one */ |
455 | * In the case of FG_GC, it'd be better to reclaim this victim | 465 | if (get_valid_blocks(sbi, segno, 1) == 0) |
456 | * completely. | 466 | return 1; |
457 | */ | ||
458 | if (get_valid_blocks(sbi, segno, 1) != 0) | ||
459 | goto next_step; | ||
460 | } | 467 | } |
468 | return 0; | ||
461 | } | 469 | } |
462 | 470 | ||
463 | /* | 471 | /* |
@@ -487,7 +495,7 @@ block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi) | |||
487 | return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi); | 495 | return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi); |
488 | } | 496 | } |
489 | 497 | ||
490 | static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, | 498 | static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, |
491 | struct node_info *dni, block_t blkaddr, unsigned int *nofs) | 499 | struct node_info *dni, block_t blkaddr, unsigned int *nofs) |
492 | { | 500 | { |
493 | struct page *node_page; | 501 | struct page *node_page; |
@@ -500,13 +508,13 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, | |||
500 | 508 | ||
501 | node_page = get_node_page(sbi, nid); | 509 | node_page = get_node_page(sbi, nid); |
502 | if (IS_ERR(node_page)) | 510 | if (IS_ERR(node_page)) |
503 | return 0; | 511 | return false; |
504 | 512 | ||
505 | get_node_info(sbi, nid, dni); | 513 | get_node_info(sbi, nid, dni); |
506 | 514 | ||
507 | if (sum->version != dni->version) { | 515 | if (sum->version != dni->version) { |
508 | f2fs_put_page(node_page, 1); | 516 | f2fs_put_page(node_page, 1); |
509 | return 0; | 517 | return false; |
510 | } | 518 | } |
511 | 519 | ||
512 | *nofs = ofs_of_node(node_page); | 520 | *nofs = ofs_of_node(node_page); |
@@ -514,8 +522,8 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, | |||
514 | f2fs_put_page(node_page, 1); | 522 | f2fs_put_page(node_page, 1); |
515 | 523 | ||
516 | if (source_blkaddr != blkaddr) | 524 | if (source_blkaddr != blkaddr) |
517 | return 0; | 525 | return false; |
518 | return 1; | 526 | return true; |
519 | } | 527 | } |
520 | 528 | ||
521 | static void move_encrypted_block(struct inode *inode, block_t bidx) | 529 | static void move_encrypted_block(struct inode *inode, block_t bidx) |
@@ -552,7 +560,10 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) | |||
552 | fio.page = page; | 560 | fio.page = page; |
553 | fio.blk_addr = dn.data_blkaddr; | 561 | fio.blk_addr = dn.data_blkaddr; |
554 | 562 | ||
555 | fio.encrypted_page = grab_cache_page(META_MAPPING(fio.sbi), fio.blk_addr); | 563 | fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), |
564 | fio.blk_addr, | ||
565 | FGP_LOCK|FGP_CREAT, | ||
566 | GFP_NOFS); | ||
556 | if (!fio.encrypted_page) | 567 | if (!fio.encrypted_page) |
557 | goto put_out; | 568 | goto put_out; |
558 | 569 | ||
@@ -636,7 +647,7 @@ out: | |||
636 | * If the parent node is not valid or the data block address is different, | 647 | * If the parent node is not valid or the data block address is different, |
637 | * the victim data block is ignored. | 648 | * the victim data block is ignored. |
638 | */ | 649 | */ |
639 | static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, | 650 | static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, |
640 | struct gc_inode_list *gc_list, unsigned int segno, int gc_type) | 651 | struct gc_inode_list *gc_list, unsigned int segno, int gc_type) |
641 | { | 652 | { |
642 | struct super_block *sb = sbi->sb; | 653 | struct super_block *sb = sbi->sb; |
@@ -659,7 +670,7 @@ next_step: | |||
659 | 670 | ||
660 | /* stop BG_GC if there is not enough free sections. */ | 671 | /* stop BG_GC if there is not enough free sections. */ |
661 | if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) | 672 | if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) |
662 | return; | 673 | return 0; |
663 | 674 | ||
664 | if (check_valid_map(sbi, segno, off) == 0) | 675 | if (check_valid_map(sbi, segno, off) == 0) |
665 | continue; | 676 | continue; |
@@ -670,7 +681,7 @@ next_step: | |||
670 | } | 681 | } |
671 | 682 | ||
672 | /* Get an inode by ino with checking validity */ | 683 | /* Get an inode by ino with checking validity */ |
673 | if (check_dnode(sbi, entry, &dni, start_addr + off, &nofs) == 0) | 684 | if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs)) |
674 | continue; | 685 | continue; |
675 | 686 | ||
676 | if (phase == 1) { | 687 | if (phase == 1) { |
@@ -724,15 +735,11 @@ next_step: | |||
724 | if (gc_type == FG_GC) { | 735 | if (gc_type == FG_GC) { |
725 | f2fs_submit_merged_bio(sbi, DATA, WRITE); | 736 | f2fs_submit_merged_bio(sbi, DATA, WRITE); |
726 | 737 | ||
727 | /* | 738 | /* return 1 only if FG_GC succefully reclaimed one */ |
728 | * In the case of FG_GC, it'd be better to reclaim this victim | 739 | if (get_valid_blocks(sbi, segno, 1) == 0) |
729 | * completely. | 740 | return 1; |
730 | */ | ||
731 | if (get_valid_blocks(sbi, segno, 1) != 0) { | ||
732 | phase = 2; | ||
733 | goto next_step; | ||
734 | } | ||
735 | } | 741 | } |
742 | return 0; | ||
736 | } | 743 | } |
737 | 744 | ||
738 | static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, | 745 | static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, |
@@ -748,12 +755,13 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, | |||
748 | return ret; | 755 | return ret; |
749 | } | 756 | } |
750 | 757 | ||
751 | static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, | 758 | static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, |
752 | struct gc_inode_list *gc_list, int gc_type) | 759 | struct gc_inode_list *gc_list, int gc_type) |
753 | { | 760 | { |
754 | struct page *sum_page; | 761 | struct page *sum_page; |
755 | struct f2fs_summary_block *sum; | 762 | struct f2fs_summary_block *sum; |
756 | struct blk_plug plug; | 763 | struct blk_plug plug; |
764 | int nfree = 0; | ||
757 | 765 | ||
758 | /* read segment summary of victim */ | 766 | /* read segment summary of victim */ |
759 | sum_page = get_sum_page(sbi, segno); | 767 | sum_page = get_sum_page(sbi, segno); |
@@ -773,10 +781,11 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, | |||
773 | 781 | ||
774 | switch (GET_SUM_TYPE((&sum->footer))) { | 782 | switch (GET_SUM_TYPE((&sum->footer))) { |
775 | case SUM_TYPE_NODE: | 783 | case SUM_TYPE_NODE: |
776 | gc_node_segment(sbi, sum->entries, segno, gc_type); | 784 | nfree = gc_node_segment(sbi, sum->entries, segno, gc_type); |
777 | break; | 785 | break; |
778 | case SUM_TYPE_DATA: | 786 | case SUM_TYPE_DATA: |
779 | gc_data_segment(sbi, sum->entries, gc_list, segno, gc_type); | 787 | nfree = gc_data_segment(sbi, sum->entries, gc_list, |
788 | segno, gc_type); | ||
780 | break; | 789 | break; |
781 | } | 790 | } |
782 | blk_finish_plug(&plug); | 791 | blk_finish_plug(&plug); |
@@ -785,11 +794,13 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, | |||
785 | stat_inc_call_count(sbi->stat_info); | 794 | stat_inc_call_count(sbi->stat_info); |
786 | 795 | ||
787 | f2fs_put_page(sum_page, 0); | 796 | f2fs_put_page(sum_page, 0); |
797 | return nfree; | ||
788 | } | 798 | } |
789 | 799 | ||
790 | int f2fs_gc(struct f2fs_sb_info *sbi) | 800 | int f2fs_gc(struct f2fs_sb_info *sbi) |
791 | { | 801 | { |
792 | unsigned int segno, i; | 802 | unsigned int segno = NULL_SEGNO; |
803 | unsigned int i; | ||
793 | int gc_type = BG_GC; | 804 | int gc_type = BG_GC; |
794 | int nfree = 0; | 805 | int nfree = 0; |
795 | int ret = -1; | 806 | int ret = -1; |
@@ -808,10 +819,11 @@ gc_more: | |||
808 | 819 | ||
809 | if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { | 820 | if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { |
810 | gc_type = FG_GC; | 821 | gc_type = FG_GC; |
811 | write_checkpoint(sbi, &cpc); | 822 | if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi)) |
823 | write_checkpoint(sbi, &cpc); | ||
812 | } | 824 | } |
813 | 825 | ||
814 | if (!__get_victim(sbi, &segno, gc_type)) | 826 | if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type)) |
815 | goto stop; | 827 | goto stop; |
816 | ret = 0; | 828 | ret = 0; |
817 | 829 | ||
@@ -821,13 +833,10 @@ gc_more: | |||
821 | META_SSA); | 833 | META_SSA); |
822 | 834 | ||
823 | for (i = 0; i < sbi->segs_per_sec; i++) | 835 | for (i = 0; i < sbi->segs_per_sec; i++) |
824 | do_garbage_collect(sbi, segno + i, &gc_list, gc_type); | 836 | nfree += do_garbage_collect(sbi, segno + i, &gc_list, gc_type); |
825 | 837 | ||
826 | if (gc_type == FG_GC) { | 838 | if (gc_type == FG_GC) |
827 | sbi->cur_victim_sec = NULL_SEGNO; | 839 | sbi->cur_victim_sec = NULL_SEGNO; |
828 | nfree++; | ||
829 | WARN_ON(get_valid_blocks(sbi, segno, sbi->segs_per_sec)); | ||
830 | } | ||
831 | 840 | ||
832 | if (has_not_enough_free_secs(sbi, nfree)) | 841 | if (has_not_enough_free_secs(sbi, nfree)) |
833 | goto gc_more; | 842 | goto gc_more; |
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index b4a65be9f7d3..c5a055b3376e 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h | |||
@@ -19,6 +19,12 @@ | |||
19 | #define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ | 19 | #define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ |
20 | #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ | 20 | #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ |
21 | 21 | ||
22 | /* | ||
23 | * with this macro, we can control the max time we do garbage collection, | ||
24 | * when user triggers batch mode gc by ioctl. | ||
25 | */ | ||
26 | #define F2FS_BATCH_GC_MAX_NUM 16 | ||
27 | |||
22 | /* Search max. number of dirty segments to select a victim segment */ | 28 | /* Search max. number of dirty segments to select a victim segment */ |
23 | #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ | 29 | #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ |
24 | 30 | ||
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index a13ffcc32992..3d143be42895 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c | |||
@@ -360,6 +360,10 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent, | |||
360 | return 0; | 360 | return 0; |
361 | } | 361 | } |
362 | 362 | ||
363 | /* | ||
364 | * NOTE: ipage is grabbed by caller, but if any error occurs, we should | ||
365 | * release ipage in this function. | ||
366 | */ | ||
363 | static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, | 367 | static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, |
364 | struct f2fs_inline_dentry *inline_dentry) | 368 | struct f2fs_inline_dentry *inline_dentry) |
365 | { | 369 | { |
@@ -369,8 +373,10 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, | |||
369 | int err; | 373 | int err; |
370 | 374 | ||
371 | page = grab_cache_page(dir->i_mapping, 0); | 375 | page = grab_cache_page(dir->i_mapping, 0); |
372 | if (!page) | 376 | if (!page) { |
377 | f2fs_put_page(ipage, 1); | ||
373 | return -ENOMEM; | 378 | return -ENOMEM; |
379 | } | ||
374 | 380 | ||
375 | set_new_dnode(&dn, dir, ipage, NULL, 0); | 381 | set_new_dnode(&dn, dir, ipage, NULL, 0); |
376 | err = f2fs_reserve_block(&dn, 0); | 382 | err = f2fs_reserve_block(&dn, 0); |
@@ -378,13 +384,21 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, | |||
378 | goto out; | 384 | goto out; |
379 | 385 | ||
380 | f2fs_wait_on_page_writeback(page, DATA); | 386 | f2fs_wait_on_page_writeback(page, DATA); |
381 | zero_user_segment(page, 0, PAGE_CACHE_SIZE); | 387 | zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); |
382 | 388 | ||
383 | dentry_blk = kmap_atomic(page); | 389 | dentry_blk = kmap_atomic(page); |
384 | 390 | ||
385 | /* copy data from inline dentry block to new dentry block */ | 391 | /* copy data from inline dentry block to new dentry block */ |
386 | memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap, | 392 | memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap, |
387 | INLINE_DENTRY_BITMAP_SIZE); | 393 | INLINE_DENTRY_BITMAP_SIZE); |
394 | memset(dentry_blk->dentry_bitmap + INLINE_DENTRY_BITMAP_SIZE, 0, | ||
395 | SIZE_OF_DENTRY_BITMAP - INLINE_DENTRY_BITMAP_SIZE); | ||
396 | /* | ||
397 | * we do not need to zero out remainder part of dentry and filename | ||
398 | * field, since we have used bitmap for marking the usage status of | ||
399 | * them, besides, we can also ignore copying/zeroing reserved space | ||
400 | * of dentry block, because them haven't been used so far. | ||
401 | */ | ||
388 | memcpy(dentry_blk->dentry, inline_dentry->dentry, | 402 | memcpy(dentry_blk->dentry, inline_dentry->dentry, |
389 | sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY); | 403 | sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY); |
390 | memcpy(dentry_blk->filename, inline_dentry->filename, | 404 | memcpy(dentry_blk->filename, inline_dentry->filename, |
@@ -434,8 +448,9 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, | |||
434 | slots, NR_INLINE_DENTRY); | 448 | slots, NR_INLINE_DENTRY); |
435 | if (bit_pos >= NR_INLINE_DENTRY) { | 449 | if (bit_pos >= NR_INLINE_DENTRY) { |
436 | err = f2fs_convert_inline_dir(dir, ipage, dentry_blk); | 450 | err = f2fs_convert_inline_dir(dir, ipage, dentry_blk); |
437 | if (!err) | 451 | if (err) |
438 | err = -EAGAIN; | 452 | return err; |
453 | err = -EAGAIN; | ||
439 | goto out; | 454 | goto out; |
440 | } | 455 | } |
441 | 456 | ||
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2550868dc651..35aae65b3e5d 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <linux/f2fs_fs.h> | 12 | #include <linux/f2fs_fs.h> |
13 | #include <linux/buffer_head.h> | 13 | #include <linux/buffer_head.h> |
14 | #include <linux/writeback.h> | 14 | #include <linux/writeback.h> |
15 | #include <linux/bitops.h> | ||
16 | 15 | ||
17 | #include "f2fs.h" | 16 | #include "f2fs.h" |
18 | #include "node.h" | 17 | #include "node.h" |
@@ -34,8 +33,8 @@ void f2fs_set_inode_flags(struct inode *inode) | |||
34 | new_fl |= S_NOATIME; | 33 | new_fl |= S_NOATIME; |
35 | if (flags & FS_DIRSYNC_FL) | 34 | if (flags & FS_DIRSYNC_FL) |
36 | new_fl |= S_DIRSYNC; | 35 | new_fl |= S_DIRSYNC; |
37 | set_mask_bits(&inode->i_flags, | 36 | inode_set_flags(inode, new_fl, |
38 | S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl); | 37 | S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); |
39 | } | 38 | } |
40 | 39 | ||
41 | static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) | 40 | static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) |
@@ -139,7 +138,7 @@ static int do_read_inode(struct inode *inode) | |||
139 | fi->i_pino = le32_to_cpu(ri->i_pino); | 138 | fi->i_pino = le32_to_cpu(ri->i_pino); |
140 | fi->i_dir_level = ri->i_dir_level; | 139 | fi->i_dir_level = ri->i_dir_level; |
141 | 140 | ||
142 | f2fs_init_extent_cache(inode, &ri->i_ext); | 141 | f2fs_init_extent_tree(inode, &ri->i_ext); |
143 | 142 | ||
144 | get_inline_info(fi, ri); | 143 | get_inline_info(fi, ri); |
145 | 144 | ||
@@ -155,6 +154,7 @@ static int do_read_inode(struct inode *inode) | |||
155 | 154 | ||
156 | f2fs_put_page(node_page, 1); | 155 | f2fs_put_page(node_page, 1); |
157 | 156 | ||
157 | stat_inc_inline_xattr(inode); | ||
158 | stat_inc_inline_inode(inode); | 158 | stat_inc_inline_inode(inode); |
159 | stat_inc_inline_dir(inode); | 159 | stat_inc_inline_dir(inode); |
160 | 160 | ||
@@ -237,10 +237,11 @@ void update_inode(struct inode *inode, struct page *node_page) | |||
237 | ri->i_size = cpu_to_le64(i_size_read(inode)); | 237 | ri->i_size = cpu_to_le64(i_size_read(inode)); |
238 | ri->i_blocks = cpu_to_le64(inode->i_blocks); | 238 | ri->i_blocks = cpu_to_le64(inode->i_blocks); |
239 | 239 | ||
240 | read_lock(&F2FS_I(inode)->ext_lock); | 240 | if (F2FS_I(inode)->extent_tree) |
241 | set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext); | 241 | set_raw_extent(&F2FS_I(inode)->extent_tree->largest, |
242 | read_unlock(&F2FS_I(inode)->ext_lock); | 242 | &ri->i_ext); |
243 | 243 | else | |
244 | memset(&ri->i_ext, 0, sizeof(ri->i_ext)); | ||
244 | set_raw_inline(F2FS_I(inode), ri); | 245 | set_raw_inline(F2FS_I(inode), ri); |
245 | 246 | ||
246 | ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); | 247 | ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); |
@@ -314,7 +315,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
314 | void f2fs_evict_inode(struct inode *inode) | 315 | void f2fs_evict_inode(struct inode *inode) |
315 | { | 316 | { |
316 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | 317 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
317 | nid_t xnid = F2FS_I(inode)->i_xattr_nid; | 318 | struct f2fs_inode_info *fi = F2FS_I(inode); |
319 | nid_t xnid = fi->i_xattr_nid; | ||
320 | int err = 0; | ||
318 | 321 | ||
319 | /* some remained atomic pages should discarded */ | 322 | /* some remained atomic pages should discarded */ |
320 | if (f2fs_is_atomic_file(inode)) | 323 | if (f2fs_is_atomic_file(inode)) |
@@ -330,41 +333,62 @@ void f2fs_evict_inode(struct inode *inode) | |||
330 | f2fs_bug_on(sbi, get_dirty_pages(inode)); | 333 | f2fs_bug_on(sbi, get_dirty_pages(inode)); |
331 | remove_dirty_dir_inode(inode); | 334 | remove_dirty_dir_inode(inode); |
332 | 335 | ||
336 | f2fs_destroy_extent_tree(inode); | ||
337 | |||
333 | if (inode->i_nlink || is_bad_inode(inode)) | 338 | if (inode->i_nlink || is_bad_inode(inode)) |
334 | goto no_delete; | 339 | goto no_delete; |
335 | 340 | ||
336 | sb_start_intwrite(inode->i_sb); | 341 | sb_start_intwrite(inode->i_sb); |
337 | set_inode_flag(F2FS_I(inode), FI_NO_ALLOC); | 342 | set_inode_flag(fi, FI_NO_ALLOC); |
338 | i_size_write(inode, 0); | 343 | i_size_write(inode, 0); |
339 | 344 | ||
340 | if (F2FS_HAS_BLOCKS(inode)) | 345 | if (F2FS_HAS_BLOCKS(inode)) |
341 | f2fs_truncate(inode); | 346 | err = f2fs_truncate(inode, true); |
342 | 347 | ||
343 | f2fs_lock_op(sbi); | 348 | if (!err) { |
344 | remove_inode_page(inode); | 349 | f2fs_lock_op(sbi); |
345 | f2fs_unlock_op(sbi); | 350 | err = remove_inode_page(inode); |
351 | f2fs_unlock_op(sbi); | ||
352 | } | ||
346 | 353 | ||
347 | sb_end_intwrite(inode->i_sb); | 354 | sb_end_intwrite(inode->i_sb); |
348 | no_delete: | 355 | no_delete: |
356 | stat_dec_inline_xattr(inode); | ||
349 | stat_dec_inline_dir(inode); | 357 | stat_dec_inline_dir(inode); |
350 | stat_dec_inline_inode(inode); | 358 | stat_dec_inline_inode(inode); |
351 | 359 | ||
352 | /* update extent info in inode */ | ||
353 | if (inode->i_nlink) | ||
354 | f2fs_preserve_extent_tree(inode); | ||
355 | f2fs_destroy_extent_tree(inode); | ||
356 | |||
357 | invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); | 360 | invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); |
358 | if (xnid) | 361 | if (xnid) |
359 | invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); | 362 | invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); |
360 | if (is_inode_flag_set(F2FS_I(inode), FI_APPEND_WRITE)) | 363 | if (is_inode_flag_set(fi, FI_APPEND_WRITE)) |
361 | add_dirty_inode(sbi, inode->i_ino, APPEND_INO); | 364 | add_dirty_inode(sbi, inode->i_ino, APPEND_INO); |
362 | if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE)) | 365 | if (is_inode_flag_set(fi, FI_UPDATE_WRITE)) |
363 | add_dirty_inode(sbi, inode->i_ino, UPDATE_INO); | 366 | add_dirty_inode(sbi, inode->i_ino, UPDATE_INO); |
367 | if (is_inode_flag_set(fi, FI_FREE_NID)) { | ||
368 | if (err && err != -ENOENT) | ||
369 | alloc_nid_done(sbi, inode->i_ino); | ||
370 | else | ||
371 | alloc_nid_failed(sbi, inode->i_ino); | ||
372 | clear_inode_flag(fi, FI_FREE_NID); | ||
373 | } | ||
374 | |||
375 | if (err && err != -ENOENT) { | ||
376 | if (!exist_written_data(sbi, inode->i_ino, ORPHAN_INO)) { | ||
377 | /* | ||
378 | * get here because we failed to release resource | ||
379 | * of inode previously, reminder our user to run fsck | ||
380 | * for fixing. | ||
381 | */ | ||
382 | set_sbi_flag(sbi, SBI_NEED_FSCK); | ||
383 | f2fs_msg(sbi->sb, KERN_WARNING, | ||
384 | "inode (ino:%lu) resource leak, run fsck " | ||
385 | "to fix this issue!", inode->i_ino); | ||
386 | } | ||
387 | } | ||
364 | out_clear: | 388 | out_clear: |
365 | #ifdef CONFIG_F2FS_FS_ENCRYPTION | 389 | #ifdef CONFIG_F2FS_FS_ENCRYPTION |
366 | if (F2FS_I(inode)->i_crypt_info) | 390 | if (fi->i_crypt_info) |
367 | f2fs_free_encryption_info(inode, F2FS_I(inode)->i_crypt_info); | 391 | f2fs_free_encryption_info(inode, fi->i_crypt_info); |
368 | #endif | 392 | #endif |
369 | clear_inode(inode); | 393 | clear_inode(inode); |
370 | } | 394 | } |
@@ -373,6 +397,7 @@ out_clear: | |||
373 | void handle_failed_inode(struct inode *inode) | 397 | void handle_failed_inode(struct inode *inode) |
374 | { | 398 | { |
375 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | 399 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
400 | int err = 0; | ||
376 | 401 | ||
377 | clear_nlink(inode); | 402 | clear_nlink(inode); |
378 | make_bad_inode(inode); | 403 | make_bad_inode(inode); |
@@ -380,13 +405,29 @@ void handle_failed_inode(struct inode *inode) | |||
380 | 405 | ||
381 | i_size_write(inode, 0); | 406 | i_size_write(inode, 0); |
382 | if (F2FS_HAS_BLOCKS(inode)) | 407 | if (F2FS_HAS_BLOCKS(inode)) |
383 | f2fs_truncate(inode); | 408 | err = f2fs_truncate(inode, false); |
384 | 409 | ||
385 | remove_inode_page(inode); | 410 | if (!err) |
411 | err = remove_inode_page(inode); | ||
412 | |||
413 | /* | ||
414 | * if we skip truncate_node in remove_inode_page bacause we failed | ||
415 | * before, it's better to find another way to release resource of | ||
416 | * this inode (e.g. valid block count, node block or nid). Here we | ||
417 | * choose to add this inode to orphan list, so that we can call iput | ||
418 | * for releasing in orphan recovery flow. | ||
419 | * | ||
420 | * Note: we should add inode to orphan list before f2fs_unlock_op() | ||
421 | * so we can prevent losing this orphan when encoutering checkpoint | ||
422 | * and following suddenly power-off. | ||
423 | */ | ||
424 | if (err && err != -ENOENT) { | ||
425 | err = acquire_orphan_inode(sbi); | ||
426 | if (!err) | ||
427 | add_orphan_inode(sbi, inode->i_ino); | ||
428 | } | ||
386 | 429 | ||
387 | clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); | 430 | set_inode_flag(F2FS_I(inode), FI_FREE_NID); |
388 | clear_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); | ||
389 | alloc_nid_failed(sbi, inode->i_ino); | ||
390 | f2fs_unlock_op(sbi); | 431 | f2fs_unlock_op(sbi); |
391 | 432 | ||
392 | /* iput will drop the inode object */ | 433 | /* iput will drop the inode object */ |
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index fdbae21ee8fb..a680bf38e4f0 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c | |||
@@ -53,7 +53,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) | |||
53 | if (err) { | 53 | if (err) { |
54 | err = -EINVAL; | 54 | err = -EINVAL; |
55 | nid_free = true; | 55 | nid_free = true; |
56 | goto out; | 56 | goto fail; |
57 | } | 57 | } |
58 | 58 | ||
59 | /* If the directory encrypted, then we should encrypt the inode. */ | 59 | /* If the directory encrypted, then we should encrypt the inode. */ |
@@ -65,6 +65,9 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) | |||
65 | if (f2fs_may_inline_dentry(inode)) | 65 | if (f2fs_may_inline_dentry(inode)) |
66 | set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); | 66 | set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); |
67 | 67 | ||
68 | f2fs_init_extent_tree(inode, NULL); | ||
69 | |||
70 | stat_inc_inline_xattr(inode); | ||
68 | stat_inc_inline_inode(inode); | 71 | stat_inc_inline_inode(inode); |
69 | stat_inc_inline_dir(inode); | 72 | stat_inc_inline_dir(inode); |
70 | 73 | ||
@@ -72,15 +75,12 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) | |||
72 | mark_inode_dirty(inode); | 75 | mark_inode_dirty(inode); |
73 | return inode; | 76 | return inode; |
74 | 77 | ||
75 | out: | ||
76 | clear_nlink(inode); | ||
77 | unlock_new_inode(inode); | ||
78 | fail: | 78 | fail: |
79 | trace_f2fs_new_inode(inode, err); | 79 | trace_f2fs_new_inode(inode, err); |
80 | make_bad_inode(inode); | 80 | make_bad_inode(inode); |
81 | iput(inode); | ||
82 | if (nid_free) | 81 | if (nid_free) |
83 | alloc_nid_failed(sbi, ino); | 82 | set_inode_flag(F2FS_I(inode), FI_FREE_NID); |
83 | iput(inode); | ||
84 | return ERR_PTR(err); | 84 | return ERR_PTR(err); |
85 | } | 85 | } |
86 | 86 | ||
@@ -89,7 +89,14 @@ static int is_multimedia_file(const unsigned char *s, const char *sub) | |||
89 | size_t slen = strlen(s); | 89 | size_t slen = strlen(s); |
90 | size_t sublen = strlen(sub); | 90 | size_t sublen = strlen(sub); |
91 | 91 | ||
92 | if (sublen > slen) | 92 | /* |
93 | * filename format of multimedia file should be defined as: | ||
94 | * "filename + '.' + extension". | ||
95 | */ | ||
96 | if (slen < sublen + 2) | ||
97 | return 0; | ||
98 | |||
99 | if (s[slen - sublen - 1] != '.') | ||
93 | return 0; | 100 | return 0; |
94 | 101 | ||
95 | return !strncasecmp(s + slen - sublen, sub, sublen); | 102 | return !strncasecmp(s + slen - sublen, sub, sublen); |
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 7dd63b794bfb..27d1a74dd6f3 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c | |||
@@ -159,7 +159,7 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, | |||
159 | 159 | ||
160 | head = radix_tree_lookup(&nm_i->nat_set_root, set); | 160 | head = radix_tree_lookup(&nm_i->nat_set_root, set); |
161 | if (!head) { | 161 | if (!head) { |
162 | head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); | 162 | head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS); |
163 | 163 | ||
164 | INIT_LIST_HEAD(&head->entry_list); | 164 | INIT_LIST_HEAD(&head->entry_list); |
165 | INIT_LIST_HEAD(&head->set_list); | 165 | INIT_LIST_HEAD(&head->set_list); |
@@ -246,7 +246,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) | |||
246 | { | 246 | { |
247 | struct nat_entry *new; | 247 | struct nat_entry *new; |
248 | 248 | ||
249 | new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); | 249 | new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS); |
250 | f2fs_radix_tree_insert(&nm_i->nat_root, nid, new); | 250 | f2fs_radix_tree_insert(&nm_i->nat_root, nid, new); |
251 | memset(new, 0, sizeof(struct nat_entry)); | 251 | memset(new, 0, sizeof(struct nat_entry)); |
252 | nat_set_nid(new, nid); | 252 | nat_set_nid(new, nid); |
@@ -306,6 +306,10 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, | |||
306 | if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { | 306 | if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { |
307 | unsigned char version = nat_get_version(e); | 307 | unsigned char version = nat_get_version(e); |
308 | nat_set_version(e, inc_node_version(version)); | 308 | nat_set_version(e, inc_node_version(version)); |
309 | |||
310 | /* in order to reuse the nid */ | ||
311 | if (nm_i->next_scan_nid > ni->nid) | ||
312 | nm_i->next_scan_nid = ni->nid; | ||
309 | } | 313 | } |
310 | 314 | ||
311 | /* change address */ | 315 | /* change address */ |
@@ -328,11 +332,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, | |||
328 | int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) | 332 | int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) |
329 | { | 333 | { |
330 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 334 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
335 | int nr = nr_shrink; | ||
331 | 336 | ||
332 | if (available_free_memory(sbi, NAT_ENTRIES)) | 337 | if (!down_write_trylock(&nm_i->nat_tree_lock)) |
333 | return 0; | 338 | return 0; |
334 | 339 | ||
335 | down_write(&nm_i->nat_tree_lock); | ||
336 | while (nr_shrink && !list_empty(&nm_i->nat_entries)) { | 340 | while (nr_shrink && !list_empty(&nm_i->nat_entries)) { |
337 | struct nat_entry *ne; | 341 | struct nat_entry *ne; |
338 | ne = list_first_entry(&nm_i->nat_entries, | 342 | ne = list_first_entry(&nm_i->nat_entries, |
@@ -341,7 +345,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) | |||
341 | nr_shrink--; | 345 | nr_shrink--; |
342 | } | 346 | } |
343 | up_write(&nm_i->nat_tree_lock); | 347 | up_write(&nm_i->nat_tree_lock); |
344 | return nr_shrink; | 348 | return nr - nr_shrink; |
345 | } | 349 | } |
346 | 350 | ||
347 | /* | 351 | /* |
@@ -898,17 +902,20 @@ int truncate_xattr_node(struct inode *inode, struct page *page) | |||
898 | * Caller should grab and release a rwsem by calling f2fs_lock_op() and | 902 | * Caller should grab and release a rwsem by calling f2fs_lock_op() and |
899 | * f2fs_unlock_op(). | 903 | * f2fs_unlock_op(). |
900 | */ | 904 | */ |
901 | void remove_inode_page(struct inode *inode) | 905 | int remove_inode_page(struct inode *inode) |
902 | { | 906 | { |
903 | struct dnode_of_data dn; | 907 | struct dnode_of_data dn; |
908 | int err; | ||
904 | 909 | ||
905 | set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); | 910 | set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); |
906 | if (get_dnode_of_data(&dn, 0, LOOKUP_NODE)) | 911 | err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); |
907 | return; | 912 | if (err) |
913 | return err; | ||
908 | 914 | ||
909 | if (truncate_xattr_node(inode, dn.inode_page)) { | 915 | err = truncate_xattr_node(inode, dn.inode_page); |
916 | if (err) { | ||
910 | f2fs_put_dnode(&dn); | 917 | f2fs_put_dnode(&dn); |
911 | return; | 918 | return err; |
912 | } | 919 | } |
913 | 920 | ||
914 | /* remove potential inline_data blocks */ | 921 | /* remove potential inline_data blocks */ |
@@ -922,6 +929,7 @@ void remove_inode_page(struct inode *inode) | |||
922 | 929 | ||
923 | /* will put inode & node pages */ | 930 | /* will put inode & node pages */ |
924 | truncate_node(&dn); | 931 | truncate_node(&dn); |
932 | return 0; | ||
925 | } | 933 | } |
926 | 934 | ||
927 | struct page *new_inode_page(struct inode *inode) | 935 | struct page *new_inode_page(struct inode *inode) |
@@ -991,8 +999,7 @@ fail: | |||
991 | /* | 999 | /* |
992 | * Caller should do after getting the following values. | 1000 | * Caller should do after getting the following values. |
993 | * 0: f2fs_put_page(page, 0) | 1001 | * 0: f2fs_put_page(page, 0) |
994 | * LOCKED_PAGE: f2fs_put_page(page, 1) | 1002 | * LOCKED_PAGE or error: f2fs_put_page(page, 1) |
995 | * error: nothing | ||
996 | */ | 1003 | */ |
997 | static int read_node_page(struct page *page, int rw) | 1004 | static int read_node_page(struct page *page, int rw) |
998 | { | 1005 | { |
@@ -1010,7 +1017,6 @@ static int read_node_page(struct page *page, int rw) | |||
1010 | 1017 | ||
1011 | if (unlikely(ni.blk_addr == NULL_ADDR)) { | 1018 | if (unlikely(ni.blk_addr == NULL_ADDR)) { |
1012 | ClearPageUptodate(page); | 1019 | ClearPageUptodate(page); |
1013 | f2fs_put_page(page, 1); | ||
1014 | return -ENOENT; | 1020 | return -ENOENT; |
1015 | } | 1021 | } |
1016 | 1022 | ||
@@ -1041,10 +1047,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) | |||
1041 | return; | 1047 | return; |
1042 | 1048 | ||
1043 | err = read_node_page(apage, READA); | 1049 | err = read_node_page(apage, READA); |
1044 | if (err == 0) | 1050 | f2fs_put_page(apage, err ? 1 : 0); |
1045 | f2fs_put_page(apage, 0); | ||
1046 | else if (err == LOCKED_PAGE) | ||
1047 | f2fs_put_page(apage, 1); | ||
1048 | } | 1051 | } |
1049 | 1052 | ||
1050 | struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) | 1053 | struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) |
@@ -1057,10 +1060,12 @@ repeat: | |||
1057 | return ERR_PTR(-ENOMEM); | 1060 | return ERR_PTR(-ENOMEM); |
1058 | 1061 | ||
1059 | err = read_node_page(page, READ_SYNC); | 1062 | err = read_node_page(page, READ_SYNC); |
1060 | if (err < 0) | 1063 | if (err < 0) { |
1064 | f2fs_put_page(page, 1); | ||
1061 | return ERR_PTR(err); | 1065 | return ERR_PTR(err); |
1062 | else if (err != LOCKED_PAGE) | 1066 | } else if (err != LOCKED_PAGE) { |
1063 | lock_page(page); | 1067 | lock_page(page); |
1068 | } | ||
1064 | 1069 | ||
1065 | if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) { | 1070 | if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) { |
1066 | ClearPageUptodate(page); | 1071 | ClearPageUptodate(page); |
@@ -1096,10 +1101,12 @@ repeat: | |||
1096 | return ERR_PTR(-ENOMEM); | 1101 | return ERR_PTR(-ENOMEM); |
1097 | 1102 | ||
1098 | err = read_node_page(page, READ_SYNC); | 1103 | err = read_node_page(page, READ_SYNC); |
1099 | if (err < 0) | 1104 | if (err < 0) { |
1105 | f2fs_put_page(page, 1); | ||
1100 | return ERR_PTR(err); | 1106 | return ERR_PTR(err); |
1101 | else if (err == LOCKED_PAGE) | 1107 | } else if (err == LOCKED_PAGE) { |
1102 | goto page_hit; | 1108 | goto page_hit; |
1109 | } | ||
1103 | 1110 | ||
1104 | blk_start_plug(&plug); | 1111 | blk_start_plug(&plug); |
1105 | 1112 | ||
@@ -1533,7 +1540,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi) | |||
1533 | if (unlikely(nid >= nm_i->max_nid)) | 1540 | if (unlikely(nid >= nm_i->max_nid)) |
1534 | nid = 0; | 1541 | nid = 0; |
1535 | 1542 | ||
1536 | if (i++ == FREE_NID_PAGES) | 1543 | if (++i >= FREE_NID_PAGES) |
1537 | break; | 1544 | break; |
1538 | } | 1545 | } |
1539 | 1546 | ||
@@ -1570,6 +1577,8 @@ retry: | |||
1570 | 1577 | ||
1571 | /* We should not use stale free nids created by build_free_nids */ | 1578 | /* We should not use stale free nids created by build_free_nids */ |
1572 | if (nm_i->fcnt && !on_build_free_nids(nm_i)) { | 1579 | if (nm_i->fcnt && !on_build_free_nids(nm_i)) { |
1580 | struct node_info ni; | ||
1581 | |||
1573 | f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); | 1582 | f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); |
1574 | list_for_each_entry(i, &nm_i->free_nid_list, list) | 1583 | list_for_each_entry(i, &nm_i->free_nid_list, list) |
1575 | if (i->state == NID_NEW) | 1584 | if (i->state == NID_NEW) |
@@ -1580,6 +1589,13 @@ retry: | |||
1580 | i->state = NID_ALLOC; | 1589 | i->state = NID_ALLOC; |
1581 | nm_i->fcnt--; | 1590 | nm_i->fcnt--; |
1582 | spin_unlock(&nm_i->free_nid_list_lock); | 1591 | spin_unlock(&nm_i->free_nid_list_lock); |
1592 | |||
1593 | /* check nid is allocated already */ | ||
1594 | get_node_info(sbi, *nid, &ni); | ||
1595 | if (ni.blk_addr != NULL_ADDR) { | ||
1596 | alloc_nid_done(sbi, *nid); | ||
1597 | goto retry; | ||
1598 | } | ||
1583 | return true; | 1599 | return true; |
1584 | } | 1600 | } |
1585 | spin_unlock(&nm_i->free_nid_list_lock); | 1601 | spin_unlock(&nm_i->free_nid_list_lock); |
@@ -1636,6 +1652,32 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) | |||
1636 | kmem_cache_free(free_nid_slab, i); | 1652 | kmem_cache_free(free_nid_slab, i); |
1637 | } | 1653 | } |
1638 | 1654 | ||
1655 | int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) | ||
1656 | { | ||
1657 | struct f2fs_nm_info *nm_i = NM_I(sbi); | ||
1658 | struct free_nid *i, *next; | ||
1659 | int nr = nr_shrink; | ||
1660 | |||
1661 | if (!mutex_trylock(&nm_i->build_lock)) | ||
1662 | return 0; | ||
1663 | |||
1664 | spin_lock(&nm_i->free_nid_list_lock); | ||
1665 | list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { | ||
1666 | if (nr_shrink <= 0 || nm_i->fcnt <= NAT_ENTRY_PER_BLOCK) | ||
1667 | break; | ||
1668 | if (i->state == NID_ALLOC) | ||
1669 | continue; | ||
1670 | __del_from_free_nid_list(nm_i, i); | ||
1671 | kmem_cache_free(free_nid_slab, i); | ||
1672 | nm_i->fcnt--; | ||
1673 | nr_shrink--; | ||
1674 | } | ||
1675 | spin_unlock(&nm_i->free_nid_list_lock); | ||
1676 | mutex_unlock(&nm_i->build_lock); | ||
1677 | |||
1678 | return nr - nr_shrink; | ||
1679 | } | ||
1680 | |||
1639 | void recover_inline_xattr(struct inode *inode, struct page *page) | 1681 | void recover_inline_xattr(struct inode *inode, struct page *page) |
1640 | { | 1682 | { |
1641 | void *src_addr, *dst_addr; | 1683 | void *src_addr, *dst_addr; |
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 24a8c1d4f45f..faec2ca004b9 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c | |||
@@ -399,14 +399,35 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
399 | f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); | 399 | f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); |
400 | f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page)); | 400 | f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page)); |
401 | 401 | ||
402 | for (; start < end; start++) { | 402 | for (; start < end; start++, dn.ofs_in_node++) { |
403 | block_t src, dest; | 403 | block_t src, dest; |
404 | 404 | ||
405 | src = datablock_addr(dn.node_page, dn.ofs_in_node); | 405 | src = datablock_addr(dn.node_page, dn.ofs_in_node); |
406 | dest = datablock_addr(page, dn.ofs_in_node); | 406 | dest = datablock_addr(page, dn.ofs_in_node); |
407 | 407 | ||
408 | if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR && | 408 | /* skip recovering if dest is the same as src */ |
409 | is_valid_blkaddr(sbi, dest, META_POR)) { | 409 | if (src == dest) |
410 | continue; | ||
411 | |||
412 | /* dest is invalid, just invalidate src block */ | ||
413 | if (dest == NULL_ADDR) { | ||
414 | truncate_data_blocks_range(&dn, 1); | ||
415 | continue; | ||
416 | } | ||
417 | |||
418 | /* | ||
419 | * dest is reserved block, invalidate src block | ||
420 | * and then reserve one new block in dnode page. | ||
421 | */ | ||
422 | if (dest == NEW_ADDR) { | ||
423 | truncate_data_blocks_range(&dn, 1); | ||
424 | err = reserve_new_block(&dn); | ||
425 | f2fs_bug_on(sbi, err); | ||
426 | continue; | ||
427 | } | ||
428 | |||
429 | /* dest is valid block, try to recover from src to dest */ | ||
430 | if (is_valid_blkaddr(sbi, dest, META_POR)) { | ||
410 | 431 | ||
411 | if (src == NULL_ADDR) { | 432 | if (src == NULL_ADDR) { |
412 | err = reserve_new_block(&dn); | 433 | err = reserve_new_block(&dn); |
@@ -424,7 +445,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
424 | ni.version, false); | 445 | ni.version, false); |
425 | recovered++; | 446 | recovered++; |
426 | } | 447 | } |
427 | dn.ofs_in_node++; | ||
428 | } | 448 | } |
429 | 449 | ||
430 | if (IS_INODE(dn.node_page)) | 450 | if (IS_INODE(dn.node_page)) |
@@ -525,14 +545,12 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) | |||
525 | 545 | ||
526 | INIT_LIST_HEAD(&inode_list); | 546 | INIT_LIST_HEAD(&inode_list); |
527 | 547 | ||
528 | /* step #1: find fsynced inode numbers */ | ||
529 | set_sbi_flag(sbi, SBI_POR_DOING); | ||
530 | |||
531 | /* prevent checkpoint */ | 548 | /* prevent checkpoint */ |
532 | mutex_lock(&sbi->cp_mutex); | 549 | mutex_lock(&sbi->cp_mutex); |
533 | 550 | ||
534 | blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); | 551 | blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); |
535 | 552 | ||
553 | /* step #1: find fsynced inode numbers */ | ||
536 | err = find_fsync_dnodes(sbi, &inode_list); | 554 | err = find_fsync_dnodes(sbi, &inode_list); |
537 | if (err) | 555 | if (err) |
538 | goto out; | 556 | goto out; |
@@ -561,11 +579,20 @@ out: | |||
561 | 579 | ||
562 | clear_sbi_flag(sbi, SBI_POR_DOING); | 580 | clear_sbi_flag(sbi, SBI_POR_DOING); |
563 | if (err) { | 581 | if (err) { |
564 | discard_next_dnode(sbi, blkaddr); | 582 | bool invalidate = false; |
583 | |||
584 | if (discard_next_dnode(sbi, blkaddr)) | ||
585 | invalidate = true; | ||
565 | 586 | ||
566 | /* Flush all the NAT/SIT pages */ | 587 | /* Flush all the NAT/SIT pages */ |
567 | while (get_pages(sbi, F2FS_DIRTY_META)) | 588 | while (get_pages(sbi, F2FS_DIRTY_META)) |
568 | sync_meta_pages(sbi, META, LONG_MAX); | 589 | sync_meta_pages(sbi, META, LONG_MAX); |
590 | |||
591 | /* invalidate temporary meta page */ | ||
592 | if (invalidate) | ||
593 | invalidate_mapping_pages(META_MAPPING(sbi), | ||
594 | blkaddr, blkaddr); | ||
595 | |||
569 | set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); | 596 | set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); |
570 | mutex_unlock(&sbi->cp_mutex); | 597 | mutex_unlock(&sbi->cp_mutex); |
571 | } else if (need_writecp) { | 598 | } else if (need_writecp) { |
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 61b97f9cb9f6..78e6d0696847 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c | |||
@@ -197,28 +197,20 @@ void register_inmem_page(struct inode *inode, struct page *page) | |||
197 | { | 197 | { |
198 | struct f2fs_inode_info *fi = F2FS_I(inode); | 198 | struct f2fs_inode_info *fi = F2FS_I(inode); |
199 | struct inmem_pages *new; | 199 | struct inmem_pages *new; |
200 | int err; | ||
201 | 200 | ||
202 | SetPagePrivate(page); | ||
203 | f2fs_trace_pid(page); | 201 | f2fs_trace_pid(page); |
204 | 202 | ||
203 | set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE); | ||
204 | SetPagePrivate(page); | ||
205 | |||
205 | new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); | 206 | new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); |
206 | 207 | ||
207 | /* add atomic page indices to the list */ | 208 | /* add atomic page indices to the list */ |
208 | new->page = page; | 209 | new->page = page; |
209 | INIT_LIST_HEAD(&new->list); | 210 | INIT_LIST_HEAD(&new->list); |
210 | retry: | 211 | |
211 | /* increase reference count with clean state */ | 212 | /* increase reference count with clean state */ |
212 | mutex_lock(&fi->inmem_lock); | 213 | mutex_lock(&fi->inmem_lock); |
213 | err = radix_tree_insert(&fi->inmem_root, page->index, new); | ||
214 | if (err == -EEXIST) { | ||
215 | mutex_unlock(&fi->inmem_lock); | ||
216 | kmem_cache_free(inmem_entry_slab, new); | ||
217 | return; | ||
218 | } else if (err) { | ||
219 | mutex_unlock(&fi->inmem_lock); | ||
220 | goto retry; | ||
221 | } | ||
222 | get_page(page); | 214 | get_page(page); |
223 | list_add_tail(&new->list, &fi->inmem_pages); | 215 | list_add_tail(&new->list, &fi->inmem_pages); |
224 | inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); | 216 | inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); |
@@ -227,7 +219,7 @@ retry: | |||
227 | trace_f2fs_register_inmem_page(page, INMEM); | 219 | trace_f2fs_register_inmem_page(page, INMEM); |
228 | } | 220 | } |
229 | 221 | ||
230 | void commit_inmem_pages(struct inode *inode, bool abort) | 222 | int commit_inmem_pages(struct inode *inode, bool abort) |
231 | { | 223 | { |
232 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | 224 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
233 | struct f2fs_inode_info *fi = F2FS_I(inode); | 225 | struct f2fs_inode_info *fi = F2FS_I(inode); |
@@ -239,6 +231,7 @@ void commit_inmem_pages(struct inode *inode, bool abort) | |||
239 | .rw = WRITE_SYNC | REQ_PRIO, | 231 | .rw = WRITE_SYNC | REQ_PRIO, |
240 | .encrypted_page = NULL, | 232 | .encrypted_page = NULL, |
241 | }; | 233 | }; |
234 | int err = 0; | ||
242 | 235 | ||
243 | /* | 236 | /* |
244 | * The abort is true only when f2fs_evict_inode is called. | 237 | * The abort is true only when f2fs_evict_inode is called. |
@@ -254,8 +247,8 @@ void commit_inmem_pages(struct inode *inode, bool abort) | |||
254 | 247 | ||
255 | mutex_lock(&fi->inmem_lock); | 248 | mutex_lock(&fi->inmem_lock); |
256 | list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { | 249 | list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { |
250 | lock_page(cur->page); | ||
257 | if (!abort) { | 251 | if (!abort) { |
258 | lock_page(cur->page); | ||
259 | if (cur->page->mapping == inode->i_mapping) { | 252 | if (cur->page->mapping == inode->i_mapping) { |
260 | set_page_dirty(cur->page); | 253 | set_page_dirty(cur->page); |
261 | f2fs_wait_on_page_writeback(cur->page, DATA); | 254 | f2fs_wait_on_page_writeback(cur->page, DATA); |
@@ -263,15 +256,20 @@ void commit_inmem_pages(struct inode *inode, bool abort) | |||
263 | inode_dec_dirty_pages(inode); | 256 | inode_dec_dirty_pages(inode); |
264 | trace_f2fs_commit_inmem_page(cur->page, INMEM); | 257 | trace_f2fs_commit_inmem_page(cur->page, INMEM); |
265 | fio.page = cur->page; | 258 | fio.page = cur->page; |
266 | do_write_data_page(&fio); | 259 | err = do_write_data_page(&fio); |
267 | submit_bio = true; | 260 | submit_bio = true; |
261 | if (err) { | ||
262 | unlock_page(cur->page); | ||
263 | break; | ||
264 | } | ||
268 | } | 265 | } |
269 | f2fs_put_page(cur->page, 1); | ||
270 | } else { | 266 | } else { |
271 | trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP); | 267 | trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP); |
272 | put_page(cur->page); | ||
273 | } | 268 | } |
274 | radix_tree_delete(&fi->inmem_root, cur->page->index); | 269 | set_page_private(cur->page, 0); |
270 | ClearPagePrivate(cur->page); | ||
271 | f2fs_put_page(cur->page, 1); | ||
272 | |||
275 | list_del(&cur->list); | 273 | list_del(&cur->list); |
276 | kmem_cache_free(inmem_entry_slab, cur); | 274 | kmem_cache_free(inmem_entry_slab, cur); |
277 | dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); | 275 | dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); |
@@ -283,6 +281,7 @@ void commit_inmem_pages(struct inode *inode, bool abort) | |||
283 | if (submit_bio) | 281 | if (submit_bio) |
284 | f2fs_submit_merged_bio(sbi, DATA, WRITE); | 282 | f2fs_submit_merged_bio(sbi, DATA, WRITE); |
285 | } | 283 | } |
284 | return err; | ||
286 | } | 285 | } |
287 | 286 | ||
288 | /* | 287 | /* |
@@ -304,10 +303,18 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi) | |||
304 | void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) | 303 | void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) |
305 | { | 304 | { |
306 | /* try to shrink extent cache when there is no enough memory */ | 305 | /* try to shrink extent cache when there is no enough memory */ |
307 | f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); | 306 | if (!available_free_memory(sbi, EXTENT_CACHE)) |
307 | f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); | ||
308 | |||
309 | /* check the # of cached NAT entries */ | ||
310 | if (!available_free_memory(sbi, NAT_ENTRIES)) | ||
311 | try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); | ||
312 | |||
313 | if (!available_free_memory(sbi, FREE_NIDS)) | ||
314 | try_to_free_nids(sbi, NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES); | ||
308 | 315 | ||
309 | /* check the # of cached NAT entries and prefree segments */ | 316 | /* checkpoint is the only way to shrink partial cached entries */ |
310 | if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || | 317 | if (!available_free_memory(sbi, NAT_ENTRIES) || |
311 | excess_prefree_segs(sbi) || | 318 | excess_prefree_segs(sbi) || |
312 | !available_free_memory(sbi, INO_ENTRIES)) | 319 | !available_free_memory(sbi, INO_ENTRIES)) |
313 | f2fs_sync_fs(sbi->sb, true); | 320 | f2fs_sync_fs(sbi->sb, true); |
@@ -323,10 +330,12 @@ repeat: | |||
323 | return 0; | 330 | return 0; |
324 | 331 | ||
325 | if (!llist_empty(&fcc->issue_list)) { | 332 | if (!llist_empty(&fcc->issue_list)) { |
326 | struct bio *bio = bio_alloc(GFP_NOIO, 0); | 333 | struct bio *bio; |
327 | struct flush_cmd *cmd, *next; | 334 | struct flush_cmd *cmd, *next; |
328 | int ret; | 335 | int ret; |
329 | 336 | ||
337 | bio = f2fs_bio_alloc(0); | ||
338 | |||
330 | fcc->dispatch_list = llist_del_all(&fcc->issue_list); | 339 | fcc->dispatch_list = llist_del_all(&fcc->issue_list); |
331 | fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); | 340 | fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); |
332 | 341 | ||
@@ -358,8 +367,15 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) | |||
358 | if (test_opt(sbi, NOBARRIER)) | 367 | if (test_opt(sbi, NOBARRIER)) |
359 | return 0; | 368 | return 0; |
360 | 369 | ||
361 | if (!test_opt(sbi, FLUSH_MERGE)) | 370 | if (!test_opt(sbi, FLUSH_MERGE)) { |
362 | return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); | 371 | struct bio *bio = f2fs_bio_alloc(0); |
372 | int ret; | ||
373 | |||
374 | bio->bi_bdev = sbi->sb->s_bdev; | ||
375 | ret = submit_bio_wait(WRITE_FLUSH, bio); | ||
376 | bio_put(bio); | ||
377 | return ret; | ||
378 | } | ||
363 | 379 | ||
364 | init_completion(&cmd.wait); | 380 | init_completion(&cmd.wait); |
365 | 381 | ||
@@ -503,7 +519,7 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi, | |||
503 | return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); | 519 | return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); |
504 | } | 520 | } |
505 | 521 | ||
506 | void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) | 522 | bool discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) |
507 | { | 523 | { |
508 | int err = -ENOTSUPP; | 524 | int err = -ENOTSUPP; |
509 | 525 | ||
@@ -513,13 +529,16 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) | |||
513 | unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); | 529 | unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); |
514 | 530 | ||
515 | if (f2fs_test_bit(offset, se->discard_map)) | 531 | if (f2fs_test_bit(offset, se->discard_map)) |
516 | return; | 532 | return false; |
517 | 533 | ||
518 | err = f2fs_issue_discard(sbi, blkaddr, 1); | 534 | err = f2fs_issue_discard(sbi, blkaddr, 1); |
519 | } | 535 | } |
520 | 536 | ||
521 | if (err) | 537 | if (err) { |
522 | update_meta_page(sbi, NULL, blkaddr); | 538 | update_meta_page(sbi, NULL, blkaddr); |
539 | return true; | ||
540 | } | ||
541 | return false; | ||
523 | } | 542 | } |
524 | 543 | ||
525 | static void __add_discard_entry(struct f2fs_sb_info *sbi, | 544 | static void __add_discard_entry(struct f2fs_sb_info *sbi, |
@@ -1218,7 +1237,8 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, | |||
1218 | mutex_lock(&sit_i->sentry_lock); | 1237 | mutex_lock(&sit_i->sentry_lock); |
1219 | 1238 | ||
1220 | /* direct_io'ed data is aligned to the segment for better performance */ | 1239 | /* direct_io'ed data is aligned to the segment for better performance */ |
1221 | if (direct_io && curseg->next_blkoff) | 1240 | if (direct_io && curseg->next_blkoff && |
1241 | !has_not_enough_free_secs(sbi, 0)) | ||
1222 | __allocate_new_segments(sbi, type); | 1242 | __allocate_new_segments(sbi, type); |
1223 | 1243 | ||
1224 | *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); | 1244 | *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); |
@@ -1733,7 +1753,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, | |||
1733 | static struct sit_entry_set *grab_sit_entry_set(void) | 1753 | static struct sit_entry_set *grab_sit_entry_set(void) |
1734 | { | 1754 | { |
1735 | struct sit_entry_set *ses = | 1755 | struct sit_entry_set *ses = |
1736 | f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_ATOMIC); | 1756 | f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS); |
1737 | 1757 | ||
1738 | ses->entry_cnt = 0; | 1758 | ses->entry_cnt = 0; |
1739 | INIT_LIST_HEAD(&ses->set_list); | 1759 | INIT_LIST_HEAD(&ses->set_list); |
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 79e7b879a753..b6e4ed15c698 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h | |||
@@ -177,6 +177,15 @@ struct segment_allocation { | |||
177 | void (*allocate_segment)(struct f2fs_sb_info *, int, bool); | 177 | void (*allocate_segment)(struct f2fs_sb_info *, int, bool); |
178 | }; | 178 | }; |
179 | 179 | ||
180 | /* | ||
181 | * this value is set in page as a private data which indicate that | ||
182 | * the page is atomically written, and it is in inmem_pages list. | ||
183 | */ | ||
184 | #define ATOMIC_WRITTEN_PAGE 0x0000ffff | ||
185 | |||
186 | #define IS_ATOMIC_WRITTEN_PAGE(page) \ | ||
187 | (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE) | ||
188 | |||
180 | struct inmem_pages { | 189 | struct inmem_pages { |
181 | struct list_head list; | 190 | struct list_head list; |
182 | struct page *page; | 191 | struct page *page; |
@@ -555,16 +564,15 @@ static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type) | |||
555 | return curseg->next_blkoff; | 564 | return curseg->next_blkoff; |
556 | } | 565 | } |
557 | 566 | ||
558 | #ifdef CONFIG_F2FS_CHECK_FS | ||
559 | static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) | 567 | static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) |
560 | { | 568 | { |
561 | BUG_ON(segno > TOTAL_SEGS(sbi) - 1); | 569 | f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1); |
562 | } | 570 | } |
563 | 571 | ||
564 | static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) | 572 | static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) |
565 | { | 573 | { |
566 | BUG_ON(blk_addr < SEG0_BLKADDR(sbi)); | 574 | f2fs_bug_on(sbi, blk_addr < SEG0_BLKADDR(sbi) |
567 | BUG_ON(blk_addr >= MAX_BLKADDR(sbi)); | 575 | || blk_addr >= MAX_BLKADDR(sbi)); |
568 | } | 576 | } |
569 | 577 | ||
570 | /* | 578 | /* |
@@ -573,16 +581,11 @@ static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) | |||
573 | static inline void check_block_count(struct f2fs_sb_info *sbi, | 581 | static inline void check_block_count(struct f2fs_sb_info *sbi, |
574 | int segno, struct f2fs_sit_entry *raw_sit) | 582 | int segno, struct f2fs_sit_entry *raw_sit) |
575 | { | 583 | { |
584 | #ifdef CONFIG_F2FS_CHECK_FS | ||
576 | bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false; | 585 | bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false; |
577 | int valid_blocks = 0; | 586 | int valid_blocks = 0; |
578 | int cur_pos = 0, next_pos; | 587 | int cur_pos = 0, next_pos; |
579 | 588 | ||
580 | /* check segment usage */ | ||
581 | BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); | ||
582 | |||
583 | /* check boundary of a given segment number */ | ||
584 | BUG_ON(segno > TOTAL_SEGS(sbi) - 1); | ||
585 | |||
586 | /* check bitmap with valid block count */ | 589 | /* check bitmap with valid block count */ |
587 | do { | 590 | do { |
588 | if (is_valid) { | 591 | if (is_valid) { |
@@ -598,35 +601,11 @@ static inline void check_block_count(struct f2fs_sb_info *sbi, | |||
598 | is_valid = !is_valid; | 601 | is_valid = !is_valid; |
599 | } while (cur_pos < sbi->blocks_per_seg); | 602 | } while (cur_pos < sbi->blocks_per_seg); |
600 | BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); | 603 | BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); |
601 | } | ||
602 | #else | ||
603 | static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) | ||
604 | { | ||
605 | if (segno > TOTAL_SEGS(sbi) - 1) | ||
606 | set_sbi_flag(sbi, SBI_NEED_FSCK); | ||
607 | } | ||
608 | |||
609 | static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) | ||
610 | { | ||
611 | if (blk_addr < SEG0_BLKADDR(sbi) || blk_addr >= MAX_BLKADDR(sbi)) | ||
612 | set_sbi_flag(sbi, SBI_NEED_FSCK); | ||
613 | } | ||
614 | |||
615 | /* | ||
616 | * Summary block is always treated as an invalid block | ||
617 | */ | ||
618 | static inline void check_block_count(struct f2fs_sb_info *sbi, | ||
619 | int segno, struct f2fs_sit_entry *raw_sit) | ||
620 | { | ||
621 | /* check segment usage */ | ||
622 | if (GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg) | ||
623 | set_sbi_flag(sbi, SBI_NEED_FSCK); | ||
624 | |||
625 | /* check boundary of a given segment number */ | ||
626 | if (segno > TOTAL_SEGS(sbi) - 1) | ||
627 | set_sbi_flag(sbi, SBI_NEED_FSCK); | ||
628 | } | ||
629 | #endif | 604 | #endif |
605 | /* check segment usage, and check boundary of a given segment number */ | ||
606 | f2fs_bug_on(sbi, GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg | ||
607 | || segno > TOTAL_SEGS(sbi) - 1); | ||
608 | } | ||
630 | 609 | ||
631 | static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, | 610 | static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, |
632 | unsigned int start) | 611 | unsigned int start) |
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c new file mode 100644 index 000000000000..da0d8e0b55a5 --- /dev/null +++ b/fs/f2fs/shrinker.c | |||
@@ -0,0 +1,139 @@ | |||
1 | /* | ||
2 | * f2fs shrinker support | ||
3 | * the basic infra was copied from fs/ubifs/shrinker.c | ||
4 | * | ||
5 | * Copyright (c) 2015 Motorola Mobility | ||
6 | * Copyright (c) 2015 Jaegeuk Kim <jaegeuk@kernel.org> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | #include <linux/fs.h> | ||
13 | #include <linux/f2fs_fs.h> | ||
14 | |||
15 | #include "f2fs.h" | ||
16 | |||
17 | static LIST_HEAD(f2fs_list); | ||
18 | static DEFINE_SPINLOCK(f2fs_list_lock); | ||
19 | static unsigned int shrinker_run_no; | ||
20 | |||
21 | static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi) | ||
22 | { | ||
23 | return NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt; | ||
24 | } | ||
25 | |||
26 | static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) | ||
27 | { | ||
28 | if (NM_I(sbi)->fcnt > NAT_ENTRY_PER_BLOCK) | ||
29 | return NM_I(sbi)->fcnt - NAT_ENTRY_PER_BLOCK; | ||
30 | return 0; | ||
31 | } | ||
32 | |||
33 | static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi) | ||
34 | { | ||
35 | return sbi->total_ext_tree + atomic_read(&sbi->total_ext_node); | ||
36 | } | ||
37 | |||
38 | unsigned long f2fs_shrink_count(struct shrinker *shrink, | ||
39 | struct shrink_control *sc) | ||
40 | { | ||
41 | struct f2fs_sb_info *sbi; | ||
42 | struct list_head *p; | ||
43 | unsigned long count = 0; | ||
44 | |||
45 | spin_lock(&f2fs_list_lock); | ||
46 | p = f2fs_list.next; | ||
47 | while (p != &f2fs_list) { | ||
48 | sbi = list_entry(p, struct f2fs_sb_info, s_list); | ||
49 | |||
50 | /* stop f2fs_put_super */ | ||
51 | if (!mutex_trylock(&sbi->umount_mutex)) { | ||
52 | p = p->next; | ||
53 | continue; | ||
54 | } | ||
55 | spin_unlock(&f2fs_list_lock); | ||
56 | |||
57 | /* count extent cache entries */ | ||
58 | count += __count_extent_cache(sbi); | ||
59 | |||
60 | /* shrink clean nat cache entries */ | ||
61 | count += __count_nat_entries(sbi); | ||
62 | |||
63 | /* count free nids cache entries */ | ||
64 | count += __count_free_nids(sbi); | ||
65 | |||
66 | spin_lock(&f2fs_list_lock); | ||
67 | p = p->next; | ||
68 | mutex_unlock(&sbi->umount_mutex); | ||
69 | } | ||
70 | spin_unlock(&f2fs_list_lock); | ||
71 | return count; | ||
72 | } | ||
73 | |||
74 | unsigned long f2fs_shrink_scan(struct shrinker *shrink, | ||
75 | struct shrink_control *sc) | ||
76 | { | ||
77 | unsigned long nr = sc->nr_to_scan; | ||
78 | struct f2fs_sb_info *sbi; | ||
79 | struct list_head *p; | ||
80 | unsigned int run_no; | ||
81 | unsigned long freed = 0; | ||
82 | |||
83 | spin_lock(&f2fs_list_lock); | ||
84 | do { | ||
85 | run_no = ++shrinker_run_no; | ||
86 | } while (run_no == 0); | ||
87 | p = f2fs_list.next; | ||
88 | while (p != &f2fs_list) { | ||
89 | sbi = list_entry(p, struct f2fs_sb_info, s_list); | ||
90 | |||
91 | if (sbi->shrinker_run_no == run_no) | ||
92 | break; | ||
93 | |||
94 | /* stop f2fs_put_super */ | ||
95 | if (!mutex_trylock(&sbi->umount_mutex)) { | ||
96 | p = p->next; | ||
97 | continue; | ||
98 | } | ||
99 | spin_unlock(&f2fs_list_lock); | ||
100 | |||
101 | sbi->shrinker_run_no = run_no; | ||
102 | |||
103 | /* shrink extent cache entries */ | ||
104 | freed += f2fs_shrink_extent_tree(sbi, nr >> 1); | ||
105 | |||
106 | /* shrink clean nat cache entries */ | ||
107 | if (freed < nr) | ||
108 | freed += try_to_free_nats(sbi, nr - freed); | ||
109 | |||
110 | /* shrink free nids cache entries */ | ||
111 | if (freed < nr) | ||
112 | freed += try_to_free_nids(sbi, nr - freed); | ||
113 | |||
114 | spin_lock(&f2fs_list_lock); | ||
115 | p = p->next; | ||
116 | list_move_tail(&sbi->s_list, &f2fs_list); | ||
117 | mutex_unlock(&sbi->umount_mutex); | ||
118 | if (freed >= nr) | ||
119 | break; | ||
120 | } | ||
121 | spin_unlock(&f2fs_list_lock); | ||
122 | return freed; | ||
123 | } | ||
124 | |||
125 | void f2fs_join_shrinker(struct f2fs_sb_info *sbi) | ||
126 | { | ||
127 | spin_lock(&f2fs_list_lock); | ||
128 | list_add_tail(&sbi->s_list, &f2fs_list); | ||
129 | spin_unlock(&f2fs_list_lock); | ||
130 | } | ||
131 | |||
132 | void f2fs_leave_shrinker(struct f2fs_sb_info *sbi) | ||
133 | { | ||
134 | f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi)); | ||
135 | |||
136 | spin_lock(&f2fs_list_lock); | ||
137 | list_del(&sbi->s_list); | ||
138 | spin_unlock(&f2fs_list_lock); | ||
139 | } | ||
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a06b0b46fe69..f79478115d37 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c | |||
@@ -39,6 +39,13 @@ static struct proc_dir_entry *f2fs_proc_root; | |||
39 | static struct kmem_cache *f2fs_inode_cachep; | 39 | static struct kmem_cache *f2fs_inode_cachep; |
40 | static struct kset *f2fs_kset; | 40 | static struct kset *f2fs_kset; |
41 | 41 | ||
42 | /* f2fs-wide shrinker description */ | ||
43 | static struct shrinker f2fs_shrinker_info = { | ||
44 | .scan_objects = f2fs_shrink_scan, | ||
45 | .count_objects = f2fs_shrink_count, | ||
46 | .seeks = DEFAULT_SEEKS, | ||
47 | }; | ||
48 | |||
42 | enum { | 49 | enum { |
43 | Opt_gc_background, | 50 | Opt_gc_background, |
44 | Opt_disable_roll_forward, | 51 | Opt_disable_roll_forward, |
@@ -58,6 +65,7 @@ enum { | |||
58 | Opt_nobarrier, | 65 | Opt_nobarrier, |
59 | Opt_fastboot, | 66 | Opt_fastboot, |
60 | Opt_extent_cache, | 67 | Opt_extent_cache, |
68 | Opt_noextent_cache, | ||
61 | Opt_noinline_data, | 69 | Opt_noinline_data, |
62 | Opt_err, | 70 | Opt_err, |
63 | }; | 71 | }; |
@@ -81,6 +89,7 @@ static match_table_t f2fs_tokens = { | |||
81 | {Opt_nobarrier, "nobarrier"}, | 89 | {Opt_nobarrier, "nobarrier"}, |
82 | {Opt_fastboot, "fastboot"}, | 90 | {Opt_fastboot, "fastboot"}, |
83 | {Opt_extent_cache, "extent_cache"}, | 91 | {Opt_extent_cache, "extent_cache"}, |
92 | {Opt_noextent_cache, "noextent_cache"}, | ||
84 | {Opt_noinline_data, "noinline_data"}, | 93 | {Opt_noinline_data, "noinline_data"}, |
85 | {Opt_err, NULL}, | 94 | {Opt_err, NULL}, |
86 | }; | 95 | }; |
@@ -382,6 +391,9 @@ static int parse_options(struct super_block *sb, char *options) | |||
382 | case Opt_extent_cache: | 391 | case Opt_extent_cache: |
383 | set_opt(sbi, EXTENT_CACHE); | 392 | set_opt(sbi, EXTENT_CACHE); |
384 | break; | 393 | break; |
394 | case Opt_noextent_cache: | ||
395 | clear_opt(sbi, EXTENT_CACHE); | ||
396 | break; | ||
385 | case Opt_noinline_data: | 397 | case Opt_noinline_data: |
386 | clear_opt(sbi, INLINE_DATA); | 398 | clear_opt(sbi, INLINE_DATA); |
387 | break; | 399 | break; |
@@ -410,9 +422,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) | |||
410 | atomic_set(&fi->dirty_pages, 0); | 422 | atomic_set(&fi->dirty_pages, 0); |
411 | fi->i_current_depth = 1; | 423 | fi->i_current_depth = 1; |
412 | fi->i_advise = 0; | 424 | fi->i_advise = 0; |
413 | rwlock_init(&fi->ext_lock); | ||
414 | init_rwsem(&fi->i_sem); | 425 | init_rwsem(&fi->i_sem); |
415 | INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS); | ||
416 | INIT_LIST_HEAD(&fi->inmem_pages); | 426 | INIT_LIST_HEAD(&fi->inmem_pages); |
417 | mutex_init(&fi->inmem_lock); | 427 | mutex_init(&fi->inmem_lock); |
418 | 428 | ||
@@ -441,17 +451,22 @@ static int f2fs_drop_inode(struct inode *inode) | |||
441 | */ | 451 | */ |
442 | if (!inode_unhashed(inode) && inode->i_state & I_SYNC) { | 452 | if (!inode_unhashed(inode) && inode->i_state & I_SYNC) { |
443 | if (!inode->i_nlink && !is_bad_inode(inode)) { | 453 | if (!inode->i_nlink && !is_bad_inode(inode)) { |
454 | /* to avoid evict_inode call simultaneously */ | ||
455 | atomic_inc(&inode->i_count); | ||
444 | spin_unlock(&inode->i_lock); | 456 | spin_unlock(&inode->i_lock); |
445 | 457 | ||
446 | /* some remained atomic pages should discarded */ | 458 | /* some remained atomic pages should discarded */ |
447 | if (f2fs_is_atomic_file(inode)) | 459 | if (f2fs_is_atomic_file(inode)) |
448 | commit_inmem_pages(inode, true); | 460 | commit_inmem_pages(inode, true); |
449 | 461 | ||
462 | /* should remain fi->extent_tree for writepage */ | ||
463 | f2fs_destroy_extent_node(inode); | ||
464 | |||
450 | sb_start_intwrite(inode->i_sb); | 465 | sb_start_intwrite(inode->i_sb); |
451 | i_size_write(inode, 0); | 466 | i_size_write(inode, 0); |
452 | 467 | ||
453 | if (F2FS_HAS_BLOCKS(inode)) | 468 | if (F2FS_HAS_BLOCKS(inode)) |
454 | f2fs_truncate(inode); | 469 | f2fs_truncate(inode, true); |
455 | 470 | ||
456 | sb_end_intwrite(inode->i_sb); | 471 | sb_end_intwrite(inode->i_sb); |
457 | 472 | ||
@@ -461,6 +476,7 @@ static int f2fs_drop_inode(struct inode *inode) | |||
461 | F2FS_I(inode)->i_crypt_info); | 476 | F2FS_I(inode)->i_crypt_info); |
462 | #endif | 477 | #endif |
463 | spin_lock(&inode->i_lock); | 478 | spin_lock(&inode->i_lock); |
479 | atomic_dec(&inode->i_count); | ||
464 | } | 480 | } |
465 | return 0; | 481 | return 0; |
466 | } | 482 | } |
@@ -498,9 +514,11 @@ static void f2fs_put_super(struct super_block *sb) | |||
498 | } | 514 | } |
499 | kobject_del(&sbi->s_kobj); | 515 | kobject_del(&sbi->s_kobj); |
500 | 516 | ||
501 | f2fs_destroy_stats(sbi); | ||
502 | stop_gc_thread(sbi); | 517 | stop_gc_thread(sbi); |
503 | 518 | ||
519 | /* prevent remaining shrinker jobs */ | ||
520 | mutex_lock(&sbi->umount_mutex); | ||
521 | |||
504 | /* | 522 | /* |
505 | * We don't need to do checkpoint when superblock is clean. | 523 | * We don't need to do checkpoint when superblock is clean. |
506 | * But, the previous checkpoint was not done by umount, it needs to do | 524 | * But, the previous checkpoint was not done by umount, it needs to do |
@@ -514,6 +532,9 @@ static void f2fs_put_super(struct super_block *sb) | |||
514 | write_checkpoint(sbi, &cpc); | 532 | write_checkpoint(sbi, &cpc); |
515 | } | 533 | } |
516 | 534 | ||
535 | /* write_checkpoint can update stat informaion */ | ||
536 | f2fs_destroy_stats(sbi); | ||
537 | |||
517 | /* | 538 | /* |
518 | * normally superblock is clean, so we need to release this. | 539 | * normally superblock is clean, so we need to release this. |
519 | * In addition, EIO will skip do checkpoint, we need this as well. | 540 | * In addition, EIO will skip do checkpoint, we need this as well. |
@@ -521,6 +542,9 @@ static void f2fs_put_super(struct super_block *sb) | |||
521 | release_dirty_inode(sbi); | 542 | release_dirty_inode(sbi); |
522 | release_discard_addrs(sbi); | 543 | release_discard_addrs(sbi); |
523 | 544 | ||
545 | f2fs_leave_shrinker(sbi); | ||
546 | mutex_unlock(&sbi->umount_mutex); | ||
547 | |||
524 | iput(sbi->node_inode); | 548 | iput(sbi->node_inode); |
525 | iput(sbi->meta_inode); | 549 | iput(sbi->meta_inode); |
526 | 550 | ||
@@ -647,6 +671,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) | |||
647 | seq_puts(seq, ",fastboot"); | 671 | seq_puts(seq, ",fastboot"); |
648 | if (test_opt(sbi, EXTENT_CACHE)) | 672 | if (test_opt(sbi, EXTENT_CACHE)) |
649 | seq_puts(seq, ",extent_cache"); | 673 | seq_puts(seq, ",extent_cache"); |
674 | else | ||
675 | seq_puts(seq, ",noextent_cache"); | ||
650 | seq_printf(seq, ",active_logs=%u", sbi->active_logs); | 676 | seq_printf(seq, ",active_logs=%u", sbi->active_logs); |
651 | 677 | ||
652 | return 0; | 678 | return 0; |
@@ -667,7 +693,7 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset) | |||
667 | struct seg_entry *se = get_seg_entry(sbi, i); | 693 | struct seg_entry *se = get_seg_entry(sbi, i); |
668 | 694 | ||
669 | if ((i % 10) == 0) | 695 | if ((i % 10) == 0) |
670 | seq_printf(seq, "%-5d", i); | 696 | seq_printf(seq, "%-10d", i); |
671 | seq_printf(seq, "%d|%-3u", se->type, | 697 | seq_printf(seq, "%d|%-3u", se->type, |
672 | get_valid_blocks(sbi, i, 1)); | 698 | get_valid_blocks(sbi, i, 1)); |
673 | if ((i % 10) == 9 || i == (total_segs - 1)) | 699 | if ((i % 10) == 9 || i == (total_segs - 1)) |
@@ -699,6 +725,7 @@ static void default_options(struct f2fs_sb_info *sbi) | |||
699 | 725 | ||
700 | set_opt(sbi, BG_GC); | 726 | set_opt(sbi, BG_GC); |
701 | set_opt(sbi, INLINE_DATA); | 727 | set_opt(sbi, INLINE_DATA); |
728 | set_opt(sbi, EXTENT_CACHE); | ||
702 | 729 | ||
703 | #ifdef CONFIG_F2FS_FS_XATTR | 730 | #ifdef CONFIG_F2FS_FS_XATTR |
704 | set_opt(sbi, XATTR_USER); | 731 | set_opt(sbi, XATTR_USER); |
@@ -970,6 +997,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi) | |||
970 | 997 | ||
971 | sbi->dir_level = DEF_DIR_LEVEL; | 998 | sbi->dir_level = DEF_DIR_LEVEL; |
972 | clear_sbi_flag(sbi, SBI_NEED_FSCK); | 999 | clear_sbi_flag(sbi, SBI_NEED_FSCK); |
1000 | |||
1001 | INIT_LIST_HEAD(&sbi->s_list); | ||
1002 | mutex_init(&sbi->umount_mutex); | ||
973 | } | 1003 | } |
974 | 1004 | ||
975 | /* | 1005 | /* |
@@ -1135,7 +1165,9 @@ try_onemore: | |||
1135 | mutex_init(&sbi->writepages); | 1165 | mutex_init(&sbi->writepages); |
1136 | mutex_init(&sbi->cp_mutex); | 1166 | mutex_init(&sbi->cp_mutex); |
1137 | init_rwsem(&sbi->node_write); | 1167 | init_rwsem(&sbi->node_write); |
1138 | clear_sbi_flag(sbi, SBI_POR_DOING); | 1168 | |
1169 | /* disallow all the data/node/meta page writes */ | ||
1170 | set_sbi_flag(sbi, SBI_POR_DOING); | ||
1139 | spin_lock_init(&sbi->stat_lock); | 1171 | spin_lock_init(&sbi->stat_lock); |
1140 | 1172 | ||
1141 | init_rwsem(&sbi->read_io.io_rwsem); | 1173 | init_rwsem(&sbi->read_io.io_rwsem); |
@@ -1212,8 +1244,12 @@ try_onemore: | |||
1212 | goto free_nm; | 1244 | goto free_nm; |
1213 | } | 1245 | } |
1214 | 1246 | ||
1247 | f2fs_join_shrinker(sbi); | ||
1248 | |||
1215 | /* if there are nt orphan nodes free them */ | 1249 | /* if there are nt orphan nodes free them */ |
1216 | recover_orphan_inodes(sbi); | 1250 | err = recover_orphan_inodes(sbi); |
1251 | if (err) | ||
1252 | goto free_node_inode; | ||
1217 | 1253 | ||
1218 | /* read root inode and dentry */ | 1254 | /* read root inode and dentry */ |
1219 | root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); | 1255 | root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); |
@@ -1275,6 +1311,8 @@ try_onemore: | |||
1275 | goto free_kobj; | 1311 | goto free_kobj; |
1276 | } | 1312 | } |
1277 | } | 1313 | } |
1314 | /* recover_fsync_data() cleared this already */ | ||
1315 | clear_sbi_flag(sbi, SBI_POR_DOING); | ||
1278 | 1316 | ||
1279 | /* | 1317 | /* |
1280 | * If filesystem is not mounted as read-only then | 1318 | * If filesystem is not mounted as read-only then |
@@ -1308,7 +1346,10 @@ free_root_inode: | |||
1308 | dput(sb->s_root); | 1346 | dput(sb->s_root); |
1309 | sb->s_root = NULL; | 1347 | sb->s_root = NULL; |
1310 | free_node_inode: | 1348 | free_node_inode: |
1349 | mutex_lock(&sbi->umount_mutex); | ||
1350 | f2fs_leave_shrinker(sbi); | ||
1311 | iput(sbi->node_inode); | 1351 | iput(sbi->node_inode); |
1352 | mutex_unlock(&sbi->umount_mutex); | ||
1312 | free_nm: | 1353 | free_nm: |
1313 | destroy_node_manager(sbi); | 1354 | destroy_node_manager(sbi); |
1314 | free_sm: | 1355 | free_sm: |
@@ -1404,13 +1445,20 @@ static int __init init_f2fs_fs(void) | |||
1404 | err = f2fs_init_crypto(); | 1445 | err = f2fs_init_crypto(); |
1405 | if (err) | 1446 | if (err) |
1406 | goto free_kset; | 1447 | goto free_kset; |
1407 | err = register_filesystem(&f2fs_fs_type); | 1448 | |
1449 | err = register_shrinker(&f2fs_shrinker_info); | ||
1408 | if (err) | 1450 | if (err) |
1409 | goto free_crypto; | 1451 | goto free_crypto; |
1452 | |||
1453 | err = register_filesystem(&f2fs_fs_type); | ||
1454 | if (err) | ||
1455 | goto free_shrinker; | ||
1410 | f2fs_create_root_stats(); | 1456 | f2fs_create_root_stats(); |
1411 | f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); | 1457 | f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); |
1412 | return 0; | 1458 | return 0; |
1413 | 1459 | ||
1460 | free_shrinker: | ||
1461 | unregister_shrinker(&f2fs_shrinker_info); | ||
1414 | free_crypto: | 1462 | free_crypto: |
1415 | f2fs_exit_crypto(); | 1463 | f2fs_exit_crypto(); |
1416 | free_kset: | 1464 | free_kset: |
@@ -1433,6 +1481,7 @@ static void __exit exit_f2fs_fs(void) | |||
1433 | { | 1481 | { |
1434 | remove_proc_entry("fs/f2fs", NULL); | 1482 | remove_proc_entry("fs/f2fs", NULL); |
1435 | f2fs_destroy_root_stats(); | 1483 | f2fs_destroy_root_stats(); |
1484 | unregister_shrinker(&f2fs_shrinker_info); | ||
1436 | unregister_filesystem(&f2fs_fs_type); | 1485 | unregister_filesystem(&f2fs_fs_type); |
1437 | f2fs_exit_crypto(); | 1486 | f2fs_exit_crypto(); |
1438 | destroy_extent_cache(); | 1487 | destroy_extent_cache(); |
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 07449b980acb..4de2286c0e4d 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c | |||
@@ -499,9 +499,12 @@ static int __f2fs_setxattr(struct inode *inode, int index, | |||
499 | 499 | ||
500 | len = strlen(name); | 500 | len = strlen(name); |
501 | 501 | ||
502 | if (len > F2FS_NAME_LEN || size > MAX_VALUE_LEN(inode)) | 502 | if (len > F2FS_NAME_LEN) |
503 | return -ERANGE; | 503 | return -ERANGE; |
504 | 504 | ||
505 | if (size > MAX_VALUE_LEN(inode)) | ||
506 | return -E2BIG; | ||
507 | |||
505 | base_addr = read_all_xattrs(inode, ipage); | 508 | base_addr = read_all_xattrs(inode, ipage); |
506 | if (!base_addr) | 509 | if (!base_addr) |
507 | goto exit; | 510 | goto exit; |
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 920408a21ffd..25c6324a0dd0 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h | |||
@@ -417,15 +417,25 @@ typedef __le32 f2fs_hash_t; | |||
417 | 417 | ||
418 | #define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) | 418 | #define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) |
419 | 419 | ||
420 | /* the number of dentry in a block */ | ||
421 | #define NR_DENTRY_IN_BLOCK 214 | ||
422 | |||
423 | /* MAX level for dir lookup */ | 420 | /* MAX level for dir lookup */ |
424 | #define MAX_DIR_HASH_DEPTH 63 | 421 | #define MAX_DIR_HASH_DEPTH 63 |
425 | 422 | ||
426 | /* MAX buckets in one level of dir */ | 423 | /* MAX buckets in one level of dir */ |
427 | #define MAX_DIR_BUCKETS (1 << ((MAX_DIR_HASH_DEPTH / 2) - 1)) | 424 | #define MAX_DIR_BUCKETS (1 << ((MAX_DIR_HASH_DEPTH / 2) - 1)) |
428 | 425 | ||
426 | /* | ||
427 | * space utilization of regular dentry and inline dentry | ||
428 | * regular dentry inline dentry | ||
429 | * bitmap 1 * 27 = 27 1 * 23 = 23 | ||
430 | * reserved 1 * 3 = 3 1 * 7 = 7 | ||
431 | * dentry 11 * 214 = 2354 11 * 182 = 2002 | ||
432 | * filename 8 * 214 = 1712 8 * 182 = 1456 | ||
433 | * total 4096 3488 | ||
434 | * | ||
435 | * Note: there are more reserved space in inline dentry than in regular | ||
436 | * dentry, when converting inline dentry we should handle this carefully. | ||
437 | */ | ||
438 | #define NR_DENTRY_IN_BLOCK 214 /* the number of dentry in a block */ | ||
429 | #define SIZE_OF_DIR_ENTRY 11 /* by byte */ | 439 | #define SIZE_OF_DIR_ENTRY 11 /* by byte */ |
430 | #define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ | 440 | #define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ |
431 | BITS_PER_BYTE) | 441 | BITS_PER_BYTE) |
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 04856a2d8c82..a01946514b5a 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h | |||
@@ -1099,11 +1099,11 @@ TRACE_EVENT(f2fs_lookup_extent_tree_start, | |||
1099 | TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end, | 1099 | TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end, |
1100 | 1100 | ||
1101 | TP_PROTO(struct inode *inode, unsigned int pgofs, | 1101 | TP_PROTO(struct inode *inode, unsigned int pgofs, |
1102 | struct extent_node *en), | 1102 | struct extent_info *ei), |
1103 | 1103 | ||
1104 | TP_ARGS(inode, pgofs, en), | 1104 | TP_ARGS(inode, pgofs, ei), |
1105 | 1105 | ||
1106 | TP_CONDITION(en), | 1106 | TP_CONDITION(ei), |
1107 | 1107 | ||
1108 | TP_STRUCT__entry( | 1108 | TP_STRUCT__entry( |
1109 | __field(dev_t, dev) | 1109 | __field(dev_t, dev) |
@@ -1118,9 +1118,9 @@ TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end, | |||
1118 | __entry->dev = inode->i_sb->s_dev; | 1118 | __entry->dev = inode->i_sb->s_dev; |
1119 | __entry->ino = inode->i_ino; | 1119 | __entry->ino = inode->i_ino; |
1120 | __entry->pgofs = pgofs; | 1120 | __entry->pgofs = pgofs; |
1121 | __entry->fofs = en->ei.fofs; | 1121 | __entry->fofs = ei->fofs; |
1122 | __entry->blk = en->ei.blk; | 1122 | __entry->blk = ei->blk; |
1123 | __entry->len = en->ei.len; | 1123 | __entry->len = ei->len; |
1124 | ), | 1124 | ), |
1125 | 1125 | ||
1126 | TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, " | 1126 | TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, " |