aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-03 16:10:22 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-03 16:10:22 -0400
commit4c12ab7e5e2e892fa94df500f96001837918a281 (patch)
treec0772bf31167593212f9adc53152f44c010f438b
parent9cbf22b37ae0592dea809cb8d424990774c21786 (diff)
parent01a5ad827a36e36f45e1fdb96903ea115f759865 (diff)
Merge tag 'for-f2fs-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "The major work includes fixing and enhancing the existing extent_cache feature, which has been well settling down so far and now it becomes a default mount option accordingly. Also, this version newly registers a f2fs memory shrinker to reclaim several objects consumed by a couple of data structures in order to avoid memory pressures. Another new feature is to add ioctl(F2FS_GARBAGE_COLLECT) which triggers a cleaning job explicitly by users. Most of the other patches are to fix bugs occurred in the corner cases across the whole code area" * tag 'for-f2fs-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (85 commits) f2fs: upset segment_info repair f2fs: avoid accessing NULL pointer in f2fs_drop_largest_extent f2fs: update extent tree in batches f2fs: fix to release inode correctly f2fs: handle f2fs_truncate error correctly f2fs: avoid unneeded initializing when converting inline dentry f2fs: atomically set inode->i_flags f2fs: fix wrong pointer access during try_to_free_nids f2fs: use __GFP_NOFAIL to avoid infinite loop f2fs: lookup neighbor extent nodes for merging later f2fs: split __insert_extent_tree_ret for readability f2fs: kill dead code in __insert_extent_tree f2fs: adjust showing of extent cache stat f2fs: add largest/cached stat in extent cache f2fs: fix incorrect mapping for bmap f2fs: add annotation for space utilization of regular/inline dentry f2fs: fix to update cached_en of extent tree properly f2fs: fix typo f2fs: check the node block address of newly allocated nid f2fs: go out for insert_inode_locked failure ...
-rw-r--r--Documentation/filesystems/f2fs.txt4
-rw-r--r--MAINTAINERS2
-rw-r--r--fs/f2fs/Kconfig2
-rw-r--r--fs/f2fs/Makefile1
-rw-r--r--fs/f2fs/checkpoint.c93
-rw-r--r--fs/f2fs/crypto_key.c3
-rw-r--r--fs/f2fs/data.c953
-rw-r--r--fs/f2fs/debug.c30
-rw-r--r--fs/f2fs/dir.c4
-rw-r--r--fs/f2fs/extent_cache.c791
-rw-r--r--fs/f2fs/f2fs.h134
-rw-r--r--fs/f2fs/file.c185
-rw-r--r--fs/f2fs/gc.c81
-rw-r--r--fs/f2fs/gc.h6
-rw-r--r--fs/f2fs/inline.c23
-rw-r--r--fs/f2fs/inode.c97
-rw-r--r--fs/f2fs/namei.c21
-rw-r--r--fs/f2fs/node.c86
-rw-r--r--fs/f2fs/recovery.c43
-rw-r--r--fs/f2fs/segment.c78
-rw-r--r--fs/f2fs/segment.h55
-rw-r--r--fs/f2fs/shrinker.c139
-rw-r--r--fs/f2fs/super.c65
-rw-r--r--fs/f2fs/xattr.c5
-rw-r--r--include/linux/f2fs_fs.h16
-rw-r--r--include/trace/events/f2fs.h12
26 files changed, 1903 insertions, 1026 deletions
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index e9e750e59efc..e2d5105b7214 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -143,7 +143,9 @@ fastboot This option is used when a system wants to reduce mount
143extent_cache Enable an extent cache based on rb-tree, it can cache 143extent_cache Enable an extent cache based on rb-tree, it can cache
144 as many as extent which map between contiguous logical 144 as many as extent which map between contiguous logical
145 address and physical address per inode, resulting in 145 address and physical address per inode, resulting in
146 increasing the cache hit ratio. 146 increasing the cache hit ratio. Set by default.
147noextent_cache Diable an extent cache based on rb-tree explicitly, see
148 the above extent_cache mount option.
147noinline_data Disable the inline data feature, inline data feature is 149noinline_data Disable the inline data feature, inline data feature is
148 enabled by default. 150 enabled by default.
149 151
diff --git a/MAINTAINERS b/MAINTAINERS
index 73db93cc55fd..205cd5d687e4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4416,6 +4416,7 @@ F: include/linux/fscache*.h
4416F2FS FILE SYSTEM 4416F2FS FILE SYSTEM
4417M: Jaegeuk Kim <jaegeuk@kernel.org> 4417M: Jaegeuk Kim <jaegeuk@kernel.org>
4418M: Changman Lee <cm224.lee@samsung.com> 4418M: Changman Lee <cm224.lee@samsung.com>
4419R: Chao Yu <chao2.yu@samsung.com>
4419L: linux-f2fs-devel@lists.sourceforge.net 4420L: linux-f2fs-devel@lists.sourceforge.net
4420W: http://en.wikipedia.org/wiki/F2FS 4421W: http://en.wikipedia.org/wiki/F2FS
4421T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git 4422T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
@@ -4424,6 +4425,7 @@ F: Documentation/filesystems/f2fs.txt
4424F: Documentation/ABI/testing/sysfs-fs-f2fs 4425F: Documentation/ABI/testing/sysfs-fs-f2fs
4425F: fs/f2fs/ 4426F: fs/f2fs/
4426F: include/linux/f2fs_fs.h 4427F: include/linux/f2fs_fs.h
4428F: include/trace/events/f2fs.h
4427 4429
4428FUJITSU FR-V (FRV) PORT 4430FUJITSU FR-V (FRV) PORT
4429M: David Howells <dhowells@redhat.com> 4431M: David Howells <dhowells@redhat.com>
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index c629762005bc..b0a9dc929f88 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -45,7 +45,7 @@ config F2FS_FS_POSIX_ACL
45 default y 45 default y
46 help 46 help
47 Posix Access Control Lists (ACLs) support permissions for users and 47 Posix Access Control Lists (ACLs) support permissions for users and
48 gourps beyond the owner/group/world scheme. 48 groups beyond the owner/group/world scheme.
49 49
50 To learn more about Access Control Lists, visit the POSIX ACLs for 50 To learn more about Access Control Lists, visit the POSIX ACLs for
51 Linux website <http://acl.bestbits.at/>. 51 Linux website <http://acl.bestbits.at/>.
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
index 396be1a39e55..08e101ed914c 100644
--- a/fs/f2fs/Makefile
+++ b/fs/f2fs/Makefile
@@ -2,6 +2,7 @@ obj-$(CONFIG_F2FS_FS) += f2fs.o
2 2
3f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o 3f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o
4f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o 4f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o
5f2fs-y += shrinker.o extent_cache.o
5f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o 6f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
6f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o 7f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
7f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o 8f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index b70bbe1a6a8c..c5a38e352a80 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -69,14 +69,24 @@ repeat:
69 69
70 fio.page = page; 70 fio.page = page;
71 71
72 if (f2fs_submit_page_bio(&fio)) 72 if (f2fs_submit_page_bio(&fio)) {
73 f2fs_put_page(page, 1);
73 goto repeat; 74 goto repeat;
75 }
74 76
75 lock_page(page); 77 lock_page(page);
76 if (unlikely(page->mapping != mapping)) { 78 if (unlikely(page->mapping != mapping)) {
77 f2fs_put_page(page, 1); 79 f2fs_put_page(page, 1);
78 goto repeat; 80 goto repeat;
79 } 81 }
82
83 /*
84 * if there is any IO error when accessing device, make our filesystem
85 * readonly and make sure do not write checkpoint with non-uptodate
86 * meta page.
87 */
88 if (unlikely(!PageUptodate(page)))
89 f2fs_stop_checkpoint(sbi);
80out: 90out:
81 return page; 91 return page;
82} 92}
@@ -326,26 +336,18 @@ const struct address_space_operations f2fs_meta_aops = {
326static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 336static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
327{ 337{
328 struct inode_management *im = &sbi->im[type]; 338 struct inode_management *im = &sbi->im[type];
329 struct ino_entry *e; 339 struct ino_entry *e, *tmp;
340
341 tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
330retry: 342retry:
331 if (radix_tree_preload(GFP_NOFS)) { 343 radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
332 cond_resched();
333 goto retry;
334 }
335 344
336 spin_lock(&im->ino_lock); 345 spin_lock(&im->ino_lock);
337
338 e = radix_tree_lookup(&im->ino_root, ino); 346 e = radix_tree_lookup(&im->ino_root, ino);
339 if (!e) { 347 if (!e) {
340 e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC); 348 e = tmp;
341 if (!e) {
342 spin_unlock(&im->ino_lock);
343 radix_tree_preload_end();
344 goto retry;
345 }
346 if (radix_tree_insert(&im->ino_root, ino, e)) { 349 if (radix_tree_insert(&im->ino_root, ino, e)) {
347 spin_unlock(&im->ino_lock); 350 spin_unlock(&im->ino_lock);
348 kmem_cache_free(ino_entry_slab, e);
349 radix_tree_preload_end(); 351 radix_tree_preload_end();
350 goto retry; 352 goto retry;
351 } 353 }
@@ -358,6 +360,9 @@ retry:
358 } 360 }
359 spin_unlock(&im->ino_lock); 361 spin_unlock(&im->ino_lock);
360 radix_tree_preload_end(); 362 radix_tree_preload_end();
363
364 if (e != tmp)
365 kmem_cache_free(ino_entry_slab, tmp);
361} 366}
362 367
363static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 368static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
@@ -458,24 +463,34 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
458 __remove_ino_entry(sbi, ino, ORPHAN_INO); 463 __remove_ino_entry(sbi, ino, ORPHAN_INO);
459} 464}
460 465
461static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 466static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
462{ 467{
463 struct inode *inode = f2fs_iget(sbi->sb, ino); 468 struct inode *inode;
464 f2fs_bug_on(sbi, IS_ERR(inode)); 469
470 inode = f2fs_iget(sbi->sb, ino);
471 if (IS_ERR(inode)) {
472 /*
473 * there should be a bug that we can't find the entry
474 * to orphan inode.
475 */
476 f2fs_bug_on(sbi, PTR_ERR(inode) == -ENOENT);
477 return PTR_ERR(inode);
478 }
479
465 clear_nlink(inode); 480 clear_nlink(inode);
466 481
467 /* truncate all the data during iput */ 482 /* truncate all the data during iput */
468 iput(inode); 483 iput(inode);
484 return 0;
469} 485}
470 486
471void recover_orphan_inodes(struct f2fs_sb_info *sbi) 487int recover_orphan_inodes(struct f2fs_sb_info *sbi)
472{ 488{
473 block_t start_blk, orphan_blocks, i, j; 489 block_t start_blk, orphan_blocks, i, j;
490 int err;
474 491
475 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) 492 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
476 return; 493 return 0;
477
478 set_sbi_flag(sbi, SBI_POR_DOING);
479 494
480 start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); 495 start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
481 orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); 496 orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
@@ -489,14 +504,17 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi)
489 orphan_blk = (struct f2fs_orphan_block *)page_address(page); 504 orphan_blk = (struct f2fs_orphan_block *)page_address(page);
490 for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { 505 for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
491 nid_t ino = le32_to_cpu(orphan_blk->ino[j]); 506 nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
492 recover_orphan_inode(sbi, ino); 507 err = recover_orphan_inode(sbi, ino);
508 if (err) {
509 f2fs_put_page(page, 1);
510 return err;
511 }
493 } 512 }
494 f2fs_put_page(page, 1); 513 f2fs_put_page(page, 1);
495 } 514 }
496 /* clear Orphan Flag */ 515 /* clear Orphan Flag */
497 clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); 516 clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
498 clear_sbi_flag(sbi, SBI_POR_DOING); 517 return 0;
499 return;
500} 518}
501 519
502static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) 520static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
@@ -504,7 +522,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
504 struct list_head *head; 522 struct list_head *head;
505 struct f2fs_orphan_block *orphan_blk = NULL; 523 struct f2fs_orphan_block *orphan_blk = NULL;
506 unsigned int nentries = 0; 524 unsigned int nentries = 0;
507 unsigned short index; 525 unsigned short index = 1;
508 unsigned short orphan_blocks; 526 unsigned short orphan_blocks;
509 struct page *page = NULL; 527 struct page *page = NULL;
510 struct ino_entry *orphan = NULL; 528 struct ino_entry *orphan = NULL;
@@ -512,11 +530,6 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
512 530
513 orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num); 531 orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num);
514 532
515 for (index = 0; index < orphan_blocks; index++)
516 grab_meta_page(sbi, start_blk + index);
517
518 index = 1;
519
520 /* 533 /*
521 * we don't need to do spin_lock(&im->ino_lock) here, since all the 534 * we don't need to do spin_lock(&im->ino_lock) here, since all the
522 * orphan inode operations are covered under f2fs_lock_op(). 535 * orphan inode operations are covered under f2fs_lock_op().
@@ -527,12 +540,10 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
527 /* loop for each orphan inode entry and write them in Jornal block */ 540 /* loop for each orphan inode entry and write them in Jornal block */
528 list_for_each_entry(orphan, head, list) { 541 list_for_each_entry(orphan, head, list) {
529 if (!page) { 542 if (!page) {
530 page = find_get_page(META_MAPPING(sbi), start_blk++); 543 page = grab_meta_page(sbi, start_blk++);
531 f2fs_bug_on(sbi, !page);
532 orphan_blk = 544 orphan_blk =
533 (struct f2fs_orphan_block *)page_address(page); 545 (struct f2fs_orphan_block *)page_address(page);
534 memset(orphan_blk, 0, sizeof(*orphan_blk)); 546 memset(orphan_blk, 0, sizeof(*orphan_blk));
535 f2fs_put_page(page, 0);
536 } 547 }
537 548
538 orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); 549 orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
@@ -704,7 +715,8 @@ void update_dirty_page(struct inode *inode, struct page *page)
704 struct inode_entry *new; 715 struct inode_entry *new;
705 int ret = 0; 716 int ret = 0;
706 717
707 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) 718 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
719 !S_ISLNK(inode->i_mode))
708 return; 720 return;
709 721
710 if (!S_ISDIR(inode->i_mode)) { 722 if (!S_ISDIR(inode->i_mode)) {
@@ -892,12 +904,15 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
892 __u32 crc32 = 0; 904 __u32 crc32 = 0;
893 int i; 905 int i;
894 int cp_payload_blks = __cp_payload(sbi); 906 int cp_payload_blks = __cp_payload(sbi);
907 block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg);
908 bool invalidate = false;
895 909
896 /* 910 /*
897 * This avoids to conduct wrong roll-forward operations and uses 911 * This avoids to conduct wrong roll-forward operations and uses
898 * metapages, so should be called prior to sync_meta_pages below. 912 * metapages, so should be called prior to sync_meta_pages below.
899 */ 913 */
900 discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg)); 914 if (discard_next_dnode(sbi, discard_blk))
915 invalidate = true;
901 916
902 /* Flush all the NAT/SIT pages */ 917 /* Flush all the NAT/SIT pages */
903 while (get_pages(sbi, F2FS_DIRTY_META)) { 918 while (get_pages(sbi, F2FS_DIRTY_META)) {
@@ -1026,6 +1041,14 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1026 /* wait for previous submitted meta pages writeback */ 1041 /* wait for previous submitted meta pages writeback */
1027 wait_on_all_pages_writeback(sbi); 1042 wait_on_all_pages_writeback(sbi);
1028 1043
1044 /*
1045 * invalidate meta page which is used temporarily for zeroing out
1046 * block at the end of warm node chain.
1047 */
1048 if (invalidate)
1049 invalidate_mapping_pages(META_MAPPING(sbi), discard_blk,
1050 discard_blk);
1051
1029 release_dirty_inode(sbi); 1052 release_dirty_inode(sbi);
1030 1053
1031 if (unlikely(f2fs_cp_error(sbi))) 1054 if (unlikely(f2fs_cp_error(sbi)))
diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c
index 95b8f936f00b..9f77de2ef317 100644
--- a/fs/f2fs/crypto_key.c
+++ b/fs/f2fs/crypto_key.c
@@ -92,8 +92,7 @@ static void f2fs_free_crypt_info(struct f2fs_crypt_info *ci)
92 if (!ci) 92 if (!ci)
93 return; 93 return;
94 94
95 if (ci->ci_keyring_key) 95 key_put(ci->ci_keyring_key);
96 key_put(ci->ci_keyring_key);
97 crypto_free_ablkcipher(ci->ci_ctfm); 96 crypto_free_ablkcipher(ci->ci_ctfm);
98 kmem_cache_free(f2fs_crypt_info_cachep, ci); 97 kmem_cache_free(f2fs_crypt_info_cachep, ci);
99} 98}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index c414d49aa2de..a82abe921b89 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -14,6 +14,7 @@
14#include <linux/mpage.h> 14#include <linux/mpage.h>
15#include <linux/writeback.h> 15#include <linux/writeback.h>
16#include <linux/backing-dev.h> 16#include <linux/backing-dev.h>
17#include <linux/pagevec.h>
17#include <linux/blkdev.h> 18#include <linux/blkdev.h>
18#include <linux/bio.h> 19#include <linux/bio.h>
19#include <linux/prefetch.h> 20#include <linux/prefetch.h>
@@ -26,9 +27,6 @@
26#include "trace.h" 27#include "trace.h"
27#include <trace/events/f2fs.h> 28#include <trace/events/f2fs.h>
28 29
29static struct kmem_cache *extent_tree_slab;
30static struct kmem_cache *extent_node_slab;
31
32static void f2fs_read_end_io(struct bio *bio) 30static void f2fs_read_end_io(struct bio *bio)
33{ 31{
34 struct bio_vec *bvec; 32 struct bio_vec *bvec;
@@ -92,8 +90,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
92{ 90{
93 struct bio *bio; 91 struct bio *bio;
94 92
95 /* No failure on bio allocation */ 93 bio = f2fs_bio_alloc(npages);
96 bio = bio_alloc(GFP_NOIO, npages);
97 94
98 bio->bi_bdev = sbi->sb->s_bdev; 95 bio->bi_bdev = sbi->sb->s_bdev;
99 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr); 96 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
@@ -158,7 +155,6 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
158 155
159 if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { 156 if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
160 bio_put(bio); 157 bio_put(bio);
161 f2fs_put_page(page, 1);
162 return -EFAULT; 158 return -EFAULT;
163 } 159 }
164 160
@@ -266,645 +262,17 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
266 return err; 262 return err;
267} 263}
268 264
269static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs, 265int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
270 struct extent_info *ei)
271{
272 struct f2fs_inode_info *fi = F2FS_I(inode);
273 pgoff_t start_fofs, end_fofs;
274 block_t start_blkaddr;
275
276 read_lock(&fi->ext_lock);
277 if (fi->ext.len == 0) {
278 read_unlock(&fi->ext_lock);
279 return false;
280 }
281
282 stat_inc_total_hit(inode->i_sb);
283
284 start_fofs = fi->ext.fofs;
285 end_fofs = fi->ext.fofs + fi->ext.len - 1;
286 start_blkaddr = fi->ext.blk;
287
288 if (pgofs >= start_fofs && pgofs <= end_fofs) {
289 *ei = fi->ext;
290 stat_inc_read_hit(inode->i_sb);
291 read_unlock(&fi->ext_lock);
292 return true;
293 }
294 read_unlock(&fi->ext_lock);
295 return false;
296}
297
298static bool update_extent_info(struct inode *inode, pgoff_t fofs,
299 block_t blkaddr)
300{ 266{
301 struct f2fs_inode_info *fi = F2FS_I(inode);
302 pgoff_t start_fofs, end_fofs;
303 block_t start_blkaddr, end_blkaddr;
304 int need_update = true;
305
306 write_lock(&fi->ext_lock);
307
308 start_fofs = fi->ext.fofs;
309 end_fofs = fi->ext.fofs + fi->ext.len - 1;
310 start_blkaddr = fi->ext.blk;
311 end_blkaddr = fi->ext.blk + fi->ext.len - 1;
312
313 /* Drop and initialize the matched extent */
314 if (fi->ext.len == 1 && fofs == start_fofs)
315 fi->ext.len = 0;
316
317 /* Initial extent */
318 if (fi->ext.len == 0) {
319 if (blkaddr != NULL_ADDR) {
320 fi->ext.fofs = fofs;
321 fi->ext.blk = blkaddr;
322 fi->ext.len = 1;
323 }
324 goto end_update;
325 }
326
327 /* Front merge */
328 if (fofs == start_fofs - 1 && blkaddr == start_blkaddr - 1) {
329 fi->ext.fofs--;
330 fi->ext.blk--;
331 fi->ext.len++;
332 goto end_update;
333 }
334
335 /* Back merge */
336 if (fofs == end_fofs + 1 && blkaddr == end_blkaddr + 1) {
337 fi->ext.len++;
338 goto end_update;
339 }
340
341 /* Split the existing extent */
342 if (fi->ext.len > 1 &&
343 fofs >= start_fofs && fofs <= end_fofs) {
344 if ((end_fofs - fofs) < (fi->ext.len >> 1)) {
345 fi->ext.len = fofs - start_fofs;
346 } else {
347 fi->ext.fofs = fofs + 1;
348 fi->ext.blk = start_blkaddr + fofs - start_fofs + 1;
349 fi->ext.len -= fofs - start_fofs + 1;
350 }
351 } else {
352 need_update = false;
353 }
354
355 /* Finally, if the extent is very fragmented, let's drop the cache. */
356 if (fi->ext.len < F2FS_MIN_EXTENT_LEN) {
357 fi->ext.len = 0;
358 set_inode_flag(fi, FI_NO_EXTENT);
359 need_update = true;
360 }
361end_update:
362 write_unlock(&fi->ext_lock);
363 return need_update;
364}
365
366static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
367 struct extent_tree *et, struct extent_info *ei,
368 struct rb_node *parent, struct rb_node **p)
369{
370 struct extent_node *en;
371
372 en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
373 if (!en)
374 return NULL;
375
376 en->ei = *ei;
377 INIT_LIST_HEAD(&en->list);
378
379 rb_link_node(&en->rb_node, parent, p);
380 rb_insert_color(&en->rb_node, &et->root);
381 et->count++;
382 atomic_inc(&sbi->total_ext_node);
383 return en;
384}
385
386static void __detach_extent_node(struct f2fs_sb_info *sbi,
387 struct extent_tree *et, struct extent_node *en)
388{
389 rb_erase(&en->rb_node, &et->root);
390 et->count--;
391 atomic_dec(&sbi->total_ext_node);
392
393 if (et->cached_en == en)
394 et->cached_en = NULL;
395}
396
397static struct extent_tree *__find_extent_tree(struct f2fs_sb_info *sbi,
398 nid_t ino)
399{
400 struct extent_tree *et;
401
402 down_read(&sbi->extent_tree_lock);
403 et = radix_tree_lookup(&sbi->extent_tree_root, ino);
404 if (!et) {
405 up_read(&sbi->extent_tree_lock);
406 return NULL;
407 }
408 atomic_inc(&et->refcount);
409 up_read(&sbi->extent_tree_lock);
410
411 return et;
412}
413
414static struct extent_tree *__grab_extent_tree(struct inode *inode)
415{
416 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
417 struct extent_tree *et;
418 nid_t ino = inode->i_ino;
419
420 down_write(&sbi->extent_tree_lock);
421 et = radix_tree_lookup(&sbi->extent_tree_root, ino);
422 if (!et) {
423 et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
424 f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
425 memset(et, 0, sizeof(struct extent_tree));
426 et->ino = ino;
427 et->root = RB_ROOT;
428 et->cached_en = NULL;
429 rwlock_init(&et->lock);
430 atomic_set(&et->refcount, 0);
431 et->count = 0;
432 sbi->total_ext_tree++;
433 }
434 atomic_inc(&et->refcount);
435 up_write(&sbi->extent_tree_lock);
436
437 return et;
438}
439
440static struct extent_node *__lookup_extent_tree(struct extent_tree *et,
441 unsigned int fofs)
442{
443 struct rb_node *node = et->root.rb_node;
444 struct extent_node *en;
445
446 if (et->cached_en) {
447 struct extent_info *cei = &et->cached_en->ei;
448
449 if (cei->fofs <= fofs && cei->fofs + cei->len > fofs)
450 return et->cached_en;
451 }
452
453 while (node) {
454 en = rb_entry(node, struct extent_node, rb_node);
455
456 if (fofs < en->ei.fofs) {
457 node = node->rb_left;
458 } else if (fofs >= en->ei.fofs + en->ei.len) {
459 node = node->rb_right;
460 } else {
461 et->cached_en = en;
462 return en;
463 }
464 }
465 return NULL;
466}
467
468static struct extent_node *__try_back_merge(struct f2fs_sb_info *sbi,
469 struct extent_tree *et, struct extent_node *en)
470{
471 struct extent_node *prev;
472 struct rb_node *node;
473
474 node = rb_prev(&en->rb_node);
475 if (!node)
476 return NULL;
477
478 prev = rb_entry(node, struct extent_node, rb_node);
479 if (__is_back_mergeable(&en->ei, &prev->ei)) {
480 en->ei.fofs = prev->ei.fofs;
481 en->ei.blk = prev->ei.blk;
482 en->ei.len += prev->ei.len;
483 __detach_extent_node(sbi, et, prev);
484 return prev;
485 }
486 return NULL;
487}
488
489static struct extent_node *__try_front_merge(struct f2fs_sb_info *sbi,
490 struct extent_tree *et, struct extent_node *en)
491{
492 struct extent_node *next;
493 struct rb_node *node;
494
495 node = rb_next(&en->rb_node);
496 if (!node)
497 return NULL;
498
499 next = rb_entry(node, struct extent_node, rb_node);
500 if (__is_front_mergeable(&en->ei, &next->ei)) {
501 en->ei.len += next->ei.len;
502 __detach_extent_node(sbi, et, next);
503 return next;
504 }
505 return NULL;
506}
507
508static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
509 struct extent_tree *et, struct extent_info *ei,
510 struct extent_node **den)
511{
512 struct rb_node **p = &et->root.rb_node;
513 struct rb_node *parent = NULL;
514 struct extent_node *en;
515
516 while (*p) {
517 parent = *p;
518 en = rb_entry(parent, struct extent_node, rb_node);
519
520 if (ei->fofs < en->ei.fofs) {
521 if (__is_front_mergeable(ei, &en->ei)) {
522 f2fs_bug_on(sbi, !den);
523 en->ei.fofs = ei->fofs;
524 en->ei.blk = ei->blk;
525 en->ei.len += ei->len;
526 *den = __try_back_merge(sbi, et, en);
527 return en;
528 }
529 p = &(*p)->rb_left;
530 } else if (ei->fofs >= en->ei.fofs + en->ei.len) {
531 if (__is_back_mergeable(ei, &en->ei)) {
532 f2fs_bug_on(sbi, !den);
533 en->ei.len += ei->len;
534 *den = __try_front_merge(sbi, et, en);
535 return en;
536 }
537 p = &(*p)->rb_right;
538 } else {
539 f2fs_bug_on(sbi, 1);
540 }
541 }
542
543 return __attach_extent_node(sbi, et, ei, parent, p);
544}
545
546static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
547 struct extent_tree *et, bool free_all)
548{
549 struct rb_node *node, *next;
550 struct extent_node *en;
551 unsigned int count = et->count;
552
553 node = rb_first(&et->root);
554 while (node) {
555 next = rb_next(node);
556 en = rb_entry(node, struct extent_node, rb_node);
557
558 if (free_all) {
559 spin_lock(&sbi->extent_lock);
560 if (!list_empty(&en->list))
561 list_del_init(&en->list);
562 spin_unlock(&sbi->extent_lock);
563 }
564
565 if (free_all || list_empty(&en->list)) {
566 __detach_extent_node(sbi, et, en);
567 kmem_cache_free(extent_node_slab, en);
568 }
569 node = next;
570 }
571
572 return count - et->count;
573}
574
575static void f2fs_init_extent_tree(struct inode *inode,
576 struct f2fs_extent *i_ext)
577{
578 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
579 struct extent_tree *et;
580 struct extent_node *en;
581 struct extent_info ei; 267 struct extent_info ei;
268 struct inode *inode = dn->inode;
582 269
583 if (le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN) 270 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
584 return; 271 dn->data_blkaddr = ei.blk + index - ei.fofs;
585 272 return 0;
586 et = __grab_extent_tree(inode);
587
588 write_lock(&et->lock);
589 if (et->count)
590 goto out;
591
592 set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
593 le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
594
595 en = __insert_extent_tree(sbi, et, &ei, NULL);
596 if (en) {
597 et->cached_en = en;
598
599 spin_lock(&sbi->extent_lock);
600 list_add_tail(&en->list, &sbi->extent_list);
601 spin_unlock(&sbi->extent_lock);
602 }
603out:
604 write_unlock(&et->lock);
605 atomic_dec(&et->refcount);
606}
607
608static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
609 struct extent_info *ei)
610{
611 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
612 struct extent_tree *et;
613 struct extent_node *en;
614
615 trace_f2fs_lookup_extent_tree_start(inode, pgofs);
616
617 et = __find_extent_tree(sbi, inode->i_ino);
618 if (!et)
619 return false;
620
621 read_lock(&et->lock);
622 en = __lookup_extent_tree(et, pgofs);
623 if (en) {
624 *ei = en->ei;
625 spin_lock(&sbi->extent_lock);
626 if (!list_empty(&en->list))
627 list_move_tail(&en->list, &sbi->extent_list);
628 spin_unlock(&sbi->extent_lock);
629 stat_inc_read_hit(sbi->sb);
630 }
631 stat_inc_total_hit(sbi->sb);
632 read_unlock(&et->lock);
633
634 trace_f2fs_lookup_extent_tree_end(inode, pgofs, en);
635
636 atomic_dec(&et->refcount);
637 return en ? true : false;
638}
639
640static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs,
641 block_t blkaddr)
642{
643 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
644 struct extent_tree *et;
645 struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
646 struct extent_node *den = NULL;
647 struct extent_info ei, dei;
648 unsigned int endofs;
649
650 trace_f2fs_update_extent_tree(inode, fofs, blkaddr);
651
652 et = __grab_extent_tree(inode);
653
654 write_lock(&et->lock);
655
656 /* 1. lookup and remove existing extent info in cache */
657 en = __lookup_extent_tree(et, fofs);
658 if (!en)
659 goto update_extent;
660
661 dei = en->ei;
662 __detach_extent_node(sbi, et, en);
663
664 /* 2. if extent can be split more, split and insert the left part */
665 if (dei.len > 1) {
666 /* insert left part of split extent into cache */
667 if (fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
668 set_extent_info(&ei, dei.fofs, dei.blk,
669 fofs - dei.fofs);
670 en1 = __insert_extent_tree(sbi, et, &ei, NULL);
671 }
672
673 /* insert right part of split extent into cache */
674 endofs = dei.fofs + dei.len - 1;
675 if (endofs - fofs >= F2FS_MIN_EXTENT_LEN) {
676 set_extent_info(&ei, fofs + 1,
677 fofs - dei.fofs + dei.blk, endofs - fofs);
678 en2 = __insert_extent_tree(sbi, et, &ei, NULL);
679 }
680 }
681
682update_extent:
683 /* 3. update extent in extent cache */
684 if (blkaddr) {
685 set_extent_info(&ei, fofs, blkaddr, 1);
686 en3 = __insert_extent_tree(sbi, et, &ei, &den);
687 }
688
689 /* 4. update in global extent list */
690 spin_lock(&sbi->extent_lock);
691 if (en && !list_empty(&en->list))
692 list_del(&en->list);
693 /*
694 * en1 and en2 split from en, they will become more and more smaller
695 * fragments after splitting several times. So if the length is smaller
696 * than F2FS_MIN_EXTENT_LEN, we will not add them into extent tree.
697 */
698 if (en1)
699 list_add_tail(&en1->list, &sbi->extent_list);
700 if (en2)
701 list_add_tail(&en2->list, &sbi->extent_list);
702 if (en3) {
703 if (list_empty(&en3->list))
704 list_add_tail(&en3->list, &sbi->extent_list);
705 else
706 list_move_tail(&en3->list, &sbi->extent_list);
707 }
708 if (den && !list_empty(&den->list))
709 list_del(&den->list);
710 spin_unlock(&sbi->extent_lock);
711
712 /* 5. release extent node */
713 if (en)
714 kmem_cache_free(extent_node_slab, en);
715 if (den)
716 kmem_cache_free(extent_node_slab, den);
717
718 write_unlock(&et->lock);
719 atomic_dec(&et->refcount);
720}
721
722void f2fs_preserve_extent_tree(struct inode *inode)
723{
724 struct extent_tree *et;
725 struct extent_info *ext = &F2FS_I(inode)->ext;
726 bool sync = false;
727
728 if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
729 return;
730
731 et = __find_extent_tree(F2FS_I_SB(inode), inode->i_ino);
732 if (!et) {
733 if (ext->len) {
734 ext->len = 0;
735 update_inode_page(inode);
736 }
737 return;
738 }
739
740 read_lock(&et->lock);
741 if (et->count) {
742 struct extent_node *en;
743
744 if (et->cached_en) {
745 en = et->cached_en;
746 } else {
747 struct rb_node *node = rb_first(&et->root);
748
749 if (!node)
750 node = rb_last(&et->root);
751 en = rb_entry(node, struct extent_node, rb_node);
752 }
753
754 if (__is_extent_same(ext, &en->ei))
755 goto out;
756
757 *ext = en->ei;
758 sync = true;
759 } else if (ext->len) {
760 ext->len = 0;
761 sync = true;
762 }
763out:
764 read_unlock(&et->lock);
765 atomic_dec(&et->refcount);
766
767 if (sync)
768 update_inode_page(inode);
769}
770
771void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
772{
773 struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
774 struct extent_node *en, *tmp;
775 unsigned long ino = F2FS_ROOT_INO(sbi);
776 struct radix_tree_iter iter;
777 void **slot;
778 unsigned int found;
779 unsigned int node_cnt = 0, tree_cnt = 0;
780
781 if (!test_opt(sbi, EXTENT_CACHE))
782 return;
783
784 if (available_free_memory(sbi, EXTENT_CACHE))
785 return;
786
787 spin_lock(&sbi->extent_lock);
788 list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
789 if (!nr_shrink--)
790 break;
791 list_del_init(&en->list);
792 }
793 spin_unlock(&sbi->extent_lock);
794
795 down_read(&sbi->extent_tree_lock);
796 while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
797 (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
798 unsigned i;
799
800 ino = treevec[found - 1]->ino + 1;
801 for (i = 0; i < found; i++) {
802 struct extent_tree *et = treevec[i];
803
804 atomic_inc(&et->refcount);
805 write_lock(&et->lock);
806 node_cnt += __free_extent_tree(sbi, et, false);
807 write_unlock(&et->lock);
808 atomic_dec(&et->refcount);
809 }
810 }
811 up_read(&sbi->extent_tree_lock);
812
813 down_write(&sbi->extent_tree_lock);
814 radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter,
815 F2FS_ROOT_INO(sbi)) {
816 struct extent_tree *et = (struct extent_tree *)*slot;
817
818 if (!atomic_read(&et->refcount) && !et->count) {
819 radix_tree_delete(&sbi->extent_tree_root, et->ino);
820 kmem_cache_free(extent_tree_slab, et);
821 sbi->total_ext_tree--;
822 tree_cnt++;
823 }
824 }
825 up_write(&sbi->extent_tree_lock);
826
827 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
828}
829
830void f2fs_destroy_extent_tree(struct inode *inode)
831{
832 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
833 struct extent_tree *et;
834 unsigned int node_cnt = 0;
835
836 if (!test_opt(sbi, EXTENT_CACHE))
837 return;
838
839 et = __find_extent_tree(sbi, inode->i_ino);
840 if (!et)
841 goto out;
842
843 /* free all extent info belong to this extent tree */
844 write_lock(&et->lock);
845 node_cnt = __free_extent_tree(sbi, et, true);
846 write_unlock(&et->lock);
847
848 atomic_dec(&et->refcount);
849
850 /* try to find and delete extent tree entry in radix tree */
851 down_write(&sbi->extent_tree_lock);
852 et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino);
853 if (!et) {
854 up_write(&sbi->extent_tree_lock);
855 goto out;
856 } 273 }
857 f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
858 radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
859 kmem_cache_free(extent_tree_slab, et);
860 sbi->total_ext_tree--;
861 up_write(&sbi->extent_tree_lock);
862out:
863 trace_f2fs_destroy_extent_tree(inode, node_cnt);
864 return;
865}
866
867void f2fs_init_extent_cache(struct inode *inode, struct f2fs_extent *i_ext)
868{
869 if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
870 f2fs_init_extent_tree(inode, i_ext);
871
872 write_lock(&F2FS_I(inode)->ext_lock);
873 get_extent_info(&F2FS_I(inode)->ext, *i_ext);
874 write_unlock(&F2FS_I(inode)->ext_lock);
875}
876 274
877static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, 275 return f2fs_reserve_block(dn, index);
878 struct extent_info *ei)
879{
880 if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
881 return false;
882
883 if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
884 return f2fs_lookup_extent_tree(inode, pgofs, ei);
885
886 return lookup_extent_info(inode, pgofs, ei);
887}
888
889void f2fs_update_extent_cache(struct dnode_of_data *dn)
890{
891 struct f2fs_inode_info *fi = F2FS_I(dn->inode);
892 pgoff_t fofs;
893
894 f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
895
896 if (is_inode_flag_set(fi, FI_NO_EXTENT))
897 return;
898
899 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
900 dn->ofs_in_node;
901
902 if (test_opt(F2FS_I_SB(dn->inode), EXTENT_CACHE))
903 return f2fs_update_extent_tree(dn->inode, fofs,
904 dn->data_blkaddr);
905
906 if (update_extent_info(dn->inode, fofs, dn->data_blkaddr))
907 sync_inode_page(dn);
908} 276}
909 277
910struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw) 278struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw)
@@ -935,15 +303,13 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw)
935 303
936 set_new_dnode(&dn, inode, NULL, NULL, 0); 304 set_new_dnode(&dn, inode, NULL, NULL, 0);
937 err = get_dnode_of_data(&dn, index, LOOKUP_NODE); 305 err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
938 if (err) { 306 if (err)
939 f2fs_put_page(page, 1); 307 goto put_err;
940 return ERR_PTR(err);
941 }
942 f2fs_put_dnode(&dn); 308 f2fs_put_dnode(&dn);
943 309
944 if (unlikely(dn.data_blkaddr == NULL_ADDR)) { 310 if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
945 f2fs_put_page(page, 1); 311 err = -ENOENT;
946 return ERR_PTR(-ENOENT); 312 goto put_err;
947 } 313 }
948got_it: 314got_it:
949 if (PageUptodate(page)) { 315 if (PageUptodate(page)) {
@@ -968,8 +334,12 @@ got_it:
968 fio.page = page; 334 fio.page = page;
969 err = f2fs_submit_page_bio(&fio); 335 err = f2fs_submit_page_bio(&fio);
970 if (err) 336 if (err)
971 return ERR_PTR(err); 337 goto put_err;
972 return page; 338 return page;
339
340put_err:
341 f2fs_put_page(page, 1);
342 return ERR_PTR(err);
973} 343}
974 344
975struct page *find_data_page(struct inode *inode, pgoff_t index) 345struct page *find_data_page(struct inode *inode, pgoff_t index)
@@ -1030,7 +400,8 @@ repeat:
1030 * 400 *
1031 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and 401 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
1032 * f2fs_unlock_op(). 402 * f2fs_unlock_op().
1033 * Note that, ipage is set only by make_empty_dir. 403 * Note that, ipage is set only by make_empty_dir, and if any error occur,
404 * ipage should be released by this function.
1034 */ 405 */
1035struct page *get_new_data_page(struct inode *inode, 406struct page *get_new_data_page(struct inode *inode,
1036 struct page *ipage, pgoff_t index, bool new_i_size) 407 struct page *ipage, pgoff_t index, bool new_i_size)
@@ -1041,8 +412,14 @@ struct page *get_new_data_page(struct inode *inode,
1041 int err; 412 int err;
1042repeat: 413repeat:
1043 page = grab_cache_page(mapping, index); 414 page = grab_cache_page(mapping, index);
1044 if (!page) 415 if (!page) {
416 /*
417 * before exiting, we should make sure ipage will be released
418 * if any error occur.
419 */
420 f2fs_put_page(ipage, 1);
1045 return ERR_PTR(-ENOMEM); 421 return ERR_PTR(-ENOMEM);
422 }
1046 423
1047 set_new_dnode(&dn, inode, ipage, NULL, 0); 424 set_new_dnode(&dn, inode, ipage, NULL, 0);
1048 err = f2fs_reserve_block(&dn, index); 425 err = f2fs_reserve_block(&dn, index);
@@ -1107,8 +484,6 @@ alloc:
1107 484
1108 allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr, 485 allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
1109 &sum, seg); 486 &sum, seg);
1110
1111 /* direct IO doesn't use extent cache to maximize the performance */
1112 set_data_blkaddr(dn); 487 set_data_blkaddr(dn);
1113 488
1114 /* update i_size */ 489 /* update i_size */
@@ -1117,6 +492,9 @@ alloc:
1117 if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT)) 492 if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT))
1118 i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT)); 493 i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT));
1119 494
495 /* direct IO doesn't use extent cache to maximize the performance */
496 f2fs_drop_largest_extent(dn->inode, fofs);
497
1120 return 0; 498 return 0;
1121} 499}
1122 500
@@ -1183,7 +561,7 @@ out:
1183 * c. give the block addresses to blockdev 561 * c. give the block addresses to blockdev
1184 */ 562 */
1185static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, 563static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
1186 int create, bool fiemap) 564 int create, int flag)
1187{ 565{
1188 unsigned int maxblocks = map->m_len; 566 unsigned int maxblocks = map->m_len;
1189 struct dnode_of_data dn; 567 struct dnode_of_data dn;
@@ -1217,8 +595,19 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
1217 err = 0; 595 err = 0;
1218 goto unlock_out; 596 goto unlock_out;
1219 } 597 }
1220 if (dn.data_blkaddr == NEW_ADDR && !fiemap) 598 if (dn.data_blkaddr == NEW_ADDR) {
1221 goto put_out; 599 if (flag == F2FS_GET_BLOCK_BMAP) {
600 err = -ENOENT;
601 goto put_out;
602 } else if (flag == F2FS_GET_BLOCK_READ ||
603 flag == F2FS_GET_BLOCK_DIO) {
604 goto put_out;
605 }
606 /*
607 * if it is in fiemap call path (flag = F2FS_GET_BLOCK_FIEMAP),
608 * mark it as mapped and unwritten block.
609 */
610 }
1222 611
1223 if (dn.data_blkaddr != NULL_ADDR) { 612 if (dn.data_blkaddr != NULL_ADDR) {
1224 map->m_flags = F2FS_MAP_MAPPED; 613 map->m_flags = F2FS_MAP_MAPPED;
@@ -1233,6 +622,8 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
1233 map->m_flags = F2FS_MAP_NEW | F2FS_MAP_MAPPED; 622 map->m_flags = F2FS_MAP_NEW | F2FS_MAP_MAPPED;
1234 map->m_pblk = dn.data_blkaddr; 623 map->m_pblk = dn.data_blkaddr;
1235 } else { 624 } else {
625 if (flag == F2FS_GET_BLOCK_BMAP)
626 err = -ENOENT;
1236 goto put_out; 627 goto put_out;
1237 } 628 }
1238 629
@@ -1255,7 +646,9 @@ get_next:
1255 err = 0; 646 err = 0;
1256 goto unlock_out; 647 goto unlock_out;
1257 } 648 }
1258 if (dn.data_blkaddr == NEW_ADDR && !fiemap) 649
650 if (dn.data_blkaddr == NEW_ADDR &&
651 flag != F2FS_GET_BLOCK_FIEMAP)
1259 goto put_out; 652 goto put_out;
1260 653
1261 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); 654 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
@@ -1297,7 +690,7 @@ out:
1297} 690}
1298 691
1299static int __get_data_block(struct inode *inode, sector_t iblock, 692static int __get_data_block(struct inode *inode, sector_t iblock,
1300 struct buffer_head *bh, int create, bool fiemap) 693 struct buffer_head *bh, int create, int flag)
1301{ 694{
1302 struct f2fs_map_blocks map; 695 struct f2fs_map_blocks map;
1303 int ret; 696 int ret;
@@ -1305,7 +698,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
1305 map.m_lblk = iblock; 698 map.m_lblk = iblock;
1306 map.m_len = bh->b_size >> inode->i_blkbits; 699 map.m_len = bh->b_size >> inode->i_blkbits;
1307 700
1308 ret = f2fs_map_blocks(inode, &map, create, fiemap); 701 ret = f2fs_map_blocks(inode, &map, create, flag);
1309 if (!ret) { 702 if (!ret) {
1310 map_bh(bh, inode->i_sb, map.m_pblk); 703 map_bh(bh, inode->i_sb, map.m_pblk);
1311 bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags; 704 bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
@@ -1315,15 +708,23 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
1315} 708}
1316 709
1317static int get_data_block(struct inode *inode, sector_t iblock, 710static int get_data_block(struct inode *inode, sector_t iblock,
711 struct buffer_head *bh_result, int create, int flag)
712{
713 return __get_data_block(inode, iblock, bh_result, create, flag);
714}
715
716static int get_data_block_dio(struct inode *inode, sector_t iblock,
1318 struct buffer_head *bh_result, int create) 717 struct buffer_head *bh_result, int create)
1319{ 718{
1320 return __get_data_block(inode, iblock, bh_result, create, false); 719 return __get_data_block(inode, iblock, bh_result, create,
720 F2FS_GET_BLOCK_DIO);
1321} 721}
1322 722
1323static int get_data_block_fiemap(struct inode *inode, sector_t iblock, 723static int get_data_block_bmap(struct inode *inode, sector_t iblock,
1324 struct buffer_head *bh_result, int create) 724 struct buffer_head *bh_result, int create)
1325{ 725{
1326 return __get_data_block(inode, iblock, bh_result, create, true); 726 return __get_data_block(inode, iblock, bh_result, create,
727 F2FS_GET_BLOCK_BMAP);
1327} 728}
1328 729
1329static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) 730static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
@@ -1367,7 +768,8 @@ next:
1367 memset(&map_bh, 0, sizeof(struct buffer_head)); 768 memset(&map_bh, 0, sizeof(struct buffer_head));
1368 map_bh.b_size = len; 769 map_bh.b_size = len;
1369 770
1370 ret = get_data_block_fiemap(inode, start_blk, &map_bh, 0); 771 ret = get_data_block(inode, start_blk, &map_bh, 0,
772 F2FS_GET_BLOCK_FIEMAP);
1371 if (ret) 773 if (ret)
1372 goto out; 774 goto out;
1373 775
@@ -1770,6 +1172,137 @@ static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
1770 return ret; 1172 return ret;
1771} 1173}
1772 1174
1175/*
1176 * This function was copied from write_cche_pages from mm/page-writeback.c.
1177 * The major change is making write step of cold data page separately from
1178 * warm/hot data page.
1179 */
1180static int f2fs_write_cache_pages(struct address_space *mapping,
1181 struct writeback_control *wbc, writepage_t writepage,
1182 void *data)
1183{
1184 int ret = 0;
1185 int done = 0;
1186 struct pagevec pvec;
1187 int nr_pages;
1188 pgoff_t uninitialized_var(writeback_index);
1189 pgoff_t index;
1190 pgoff_t end; /* Inclusive */
1191 pgoff_t done_index;
1192 int cycled;
1193 int range_whole = 0;
1194 int tag;
1195 int step = 0;
1196
1197 pagevec_init(&pvec, 0);
1198next:
1199 if (wbc->range_cyclic) {
1200 writeback_index = mapping->writeback_index; /* prev offset */
1201 index = writeback_index;
1202 if (index == 0)
1203 cycled = 1;
1204 else
1205 cycled = 0;
1206 end = -1;
1207 } else {
1208 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1209 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1210 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1211 range_whole = 1;
1212 cycled = 1; /* ignore range_cyclic tests */
1213 }
1214 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
1215 tag = PAGECACHE_TAG_TOWRITE;
1216 else
1217 tag = PAGECACHE_TAG_DIRTY;
1218retry:
1219 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
1220 tag_pages_for_writeback(mapping, index, end);
1221 done_index = index;
1222 while (!done && (index <= end)) {
1223 int i;
1224
1225 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
1226 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1);
1227 if (nr_pages == 0)
1228 break;
1229
1230 for (i = 0; i < nr_pages; i++) {
1231 struct page *page = pvec.pages[i];
1232
1233 if (page->index > end) {
1234 done = 1;
1235 break;
1236 }
1237
1238 done_index = page->index;
1239
1240 lock_page(page);
1241
1242 if (unlikely(page->mapping != mapping)) {
1243continue_unlock:
1244 unlock_page(page);
1245 continue;
1246 }
1247
1248 if (!PageDirty(page)) {
1249 /* someone wrote it for us */
1250 goto continue_unlock;
1251 }
1252
1253 if (step == is_cold_data(page))
1254 goto continue_unlock;
1255
1256 if (PageWriteback(page)) {
1257 if (wbc->sync_mode != WB_SYNC_NONE)
1258 f2fs_wait_on_page_writeback(page, DATA);
1259 else
1260 goto continue_unlock;
1261 }
1262
1263 BUG_ON(PageWriteback(page));
1264 if (!clear_page_dirty_for_io(page))
1265 goto continue_unlock;
1266
1267 ret = (*writepage)(page, wbc, data);
1268 if (unlikely(ret)) {
1269 if (ret == AOP_WRITEPAGE_ACTIVATE) {
1270 unlock_page(page);
1271 ret = 0;
1272 } else {
1273 done_index = page->index + 1;
1274 done = 1;
1275 break;
1276 }
1277 }
1278
1279 if (--wbc->nr_to_write <= 0 &&
1280 wbc->sync_mode == WB_SYNC_NONE) {
1281 done = 1;
1282 break;
1283 }
1284 }
1285 pagevec_release(&pvec);
1286 cond_resched();
1287 }
1288
1289 if (step < 1) {
1290 step++;
1291 goto next;
1292 }
1293
1294 if (!cycled && !done) {
1295 cycled = 1;
1296 index = 0;
1297 end = writeback_index - 1;
1298 goto retry;
1299 }
1300 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1301 mapping->writeback_index = done_index;
1302
1303 return ret;
1304}
1305
1773static int f2fs_write_data_pages(struct address_space *mapping, 1306static int f2fs_write_data_pages(struct address_space *mapping,
1774 struct writeback_control *wbc) 1307 struct writeback_control *wbc)
1775{ 1308{
@@ -1785,6 +1318,10 @@ static int f2fs_write_data_pages(struct address_space *mapping,
1785 if (!mapping->a_ops->writepage) 1318 if (!mapping->a_ops->writepage)
1786 return 0; 1319 return 0;
1787 1320
1321 /* skip writing if there is no dirty page in this inode */
1322 if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
1323 return 0;
1324
1788 if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && 1325 if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
1789 get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) && 1326 get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
1790 available_free_memory(sbi, DIRTY_DENTS)) 1327 available_free_memory(sbi, DIRTY_DENTS))
@@ -1800,12 +1337,11 @@ static int f2fs_write_data_pages(struct address_space *mapping,
1800 mutex_lock(&sbi->writepages); 1337 mutex_lock(&sbi->writepages);
1801 locked = true; 1338 locked = true;
1802 } 1339 }
1803 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); 1340 ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
1341 f2fs_submit_merged_bio(sbi, DATA, WRITE);
1804 if (locked) 1342 if (locked)
1805 mutex_unlock(&sbi->writepages); 1343 mutex_unlock(&sbi->writepages);
1806 1344
1807 f2fs_submit_merged_bio(sbi, DATA, WRITE);
1808
1809 remove_dirty_dir_inode(inode); 1345 remove_dirty_dir_inode(inode);
1810 1346
1811 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); 1347 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
@@ -1832,7 +1368,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
1832{ 1368{
1833 struct inode *inode = mapping->host; 1369 struct inode *inode = mapping->host;
1834 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1370 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1835 struct page *page, *ipage; 1371 struct page *page = NULL;
1372 struct page *ipage;
1836 pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; 1373 pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
1837 struct dnode_of_data dn; 1374 struct dnode_of_data dn;
1838 int err = 0; 1375 int err = 0;
@@ -1882,25 +1419,28 @@ repeat:
1882 if (err) 1419 if (err)
1883 goto put_fail; 1420 goto put_fail;
1884 } 1421 }
1885 err = f2fs_reserve_block(&dn, index); 1422
1423 err = f2fs_get_block(&dn, index);
1886 if (err) 1424 if (err)
1887 goto put_fail; 1425 goto put_fail;
1888put_next: 1426put_next:
1889 f2fs_put_dnode(&dn); 1427 f2fs_put_dnode(&dn);
1890 f2fs_unlock_op(sbi); 1428 f2fs_unlock_op(sbi);
1891 1429
1892 if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
1893 return 0;
1894
1895 f2fs_wait_on_page_writeback(page, DATA); 1430 f2fs_wait_on_page_writeback(page, DATA);
1896 1431
1432 if (len == PAGE_CACHE_SIZE)
1433 goto out_update;
1434 if (PageUptodate(page))
1435 goto out_clear;
1436
1897 if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { 1437 if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
1898 unsigned start = pos & (PAGE_CACHE_SIZE - 1); 1438 unsigned start = pos & (PAGE_CACHE_SIZE - 1);
1899 unsigned end = start + len; 1439 unsigned end = start + len;
1900 1440
1901 /* Reading beyond i_size is simple: memset to zero */ 1441 /* Reading beyond i_size is simple: memset to zero */
1902 zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE); 1442 zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
1903 goto out; 1443 goto out_update;
1904 } 1444 }
1905 1445
1906 if (dn.data_blkaddr == NEW_ADDR) { 1446 if (dn.data_blkaddr == NEW_ADDR) {
@@ -1920,7 +1460,6 @@ put_next:
1920 1460
1921 lock_page(page); 1461 lock_page(page);
1922 if (unlikely(!PageUptodate(page))) { 1462 if (unlikely(!PageUptodate(page))) {
1923 f2fs_put_page(page, 1);
1924 err = -EIO; 1463 err = -EIO;
1925 goto fail; 1464 goto fail;
1926 } 1465 }
@@ -1932,14 +1471,13 @@ put_next:
1932 /* avoid symlink page */ 1471 /* avoid symlink page */
1933 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { 1472 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
1934 err = f2fs_decrypt_one(inode, page); 1473 err = f2fs_decrypt_one(inode, page);
1935 if (err) { 1474 if (err)
1936 f2fs_put_page(page, 1);
1937 goto fail; 1475 goto fail;
1938 }
1939 } 1476 }
1940 } 1477 }
1941out: 1478out_update:
1942 SetPageUptodate(page); 1479 SetPageUptodate(page);
1480out_clear:
1943 clear_cold_data(page); 1481 clear_cold_data(page);
1944 return 0; 1482 return 0;
1945 1483
@@ -1947,8 +1485,8 @@ put_fail:
1947 f2fs_put_dnode(&dn); 1485 f2fs_put_dnode(&dn);
1948unlock_fail: 1486unlock_fail:
1949 f2fs_unlock_op(sbi); 1487 f2fs_unlock_op(sbi);
1950 f2fs_put_page(page, 1);
1951fail: 1488fail:
1489 f2fs_put_page(page, 1);
1952 f2fs_write_failed(mapping, pos + len); 1490 f2fs_write_failed(mapping, pos + len);
1953 return err; 1491 return err;
1954} 1492}
@@ -1979,9 +1517,6 @@ static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
1979{ 1517{
1980 unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; 1518 unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
1981 1519
1982 if (iov_iter_rw(iter) == READ)
1983 return 0;
1984
1985 if (offset & blocksize_mask) 1520 if (offset & blocksize_mask)
1986 return -EINVAL; 1521 return -EINVAL;
1987 1522
@@ -2010,15 +1545,16 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
2010 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) 1545 if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
2011 return 0; 1546 return 0;
2012 1547
2013 if (check_direct_IO(inode, iter, offset)) 1548 err = check_direct_IO(inode, iter, offset);
2014 return 0; 1549 if (err)
1550 return err;
2015 1551
2016 trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); 1552 trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
2017 1553
2018 if (iov_iter_rw(iter) == WRITE) 1554 if (iov_iter_rw(iter) == WRITE)
2019 __allocate_data_blocks(inode, offset, count); 1555 __allocate_data_blocks(inode, offset, count);
2020 1556
2021 err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block); 1557 err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);
2022 if (err < 0 && iov_iter_rw(iter) == WRITE) 1558 if (err < 0 && iov_iter_rw(iter) == WRITE)
2023 f2fs_write_failed(mapping, offset + count); 1559 f2fs_write_failed(mapping, offset + count);
2024 1560
@@ -2045,6 +1581,11 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
2045 else 1581 else
2046 inode_dec_dirty_pages(inode); 1582 inode_dec_dirty_pages(inode);
2047 } 1583 }
1584
1585 /* This is atomic written page, keep Private */
1586 if (IS_ATOMIC_WRITTEN_PAGE(page))
1587 return;
1588
2048 ClearPagePrivate(page); 1589 ClearPagePrivate(page);
2049} 1590}
2050 1591
@@ -2054,6 +1595,10 @@ int f2fs_release_page(struct page *page, gfp_t wait)
2054 if (PageDirty(page)) 1595 if (PageDirty(page))
2055 return 0; 1596 return 0;
2056 1597
1598 /* This is atomic written page, keep Private */
1599 if (IS_ATOMIC_WRITTEN_PAGE(page))
1600 return 0;
1601
2057 ClearPagePrivate(page); 1602 ClearPagePrivate(page);
2058 return 1; 1603 return 1;
2059} 1604}
@@ -2068,8 +1613,15 @@ static int f2fs_set_data_page_dirty(struct page *page)
2068 SetPageUptodate(page); 1613 SetPageUptodate(page);
2069 1614
2070 if (f2fs_is_atomic_file(inode)) { 1615 if (f2fs_is_atomic_file(inode)) {
2071 register_inmem_page(inode, page); 1616 if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
2072 return 1; 1617 register_inmem_page(inode, page);
1618 return 1;
1619 }
1620 /*
1621 * Previously, this page has been registered, we just
1622 * return here.
1623 */
1624 return 0;
2073 } 1625 }
2074 1626
2075 if (!PageDirty(page)) { 1627 if (!PageDirty(page)) {
@@ -2090,38 +1642,7 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
2090 if (err) 1642 if (err)
2091 return err; 1643 return err;
2092 } 1644 }
2093 return generic_block_bmap(mapping, block, get_data_block); 1645 return generic_block_bmap(mapping, block, get_data_block_bmap);
2094}
2095
2096void init_extent_cache_info(struct f2fs_sb_info *sbi)
2097{
2098 INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
2099 init_rwsem(&sbi->extent_tree_lock);
2100 INIT_LIST_HEAD(&sbi->extent_list);
2101 spin_lock_init(&sbi->extent_lock);
2102 sbi->total_ext_tree = 0;
2103 atomic_set(&sbi->total_ext_node, 0);
2104}
2105
2106int __init create_extent_cache(void)
2107{
2108 extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
2109 sizeof(struct extent_tree));
2110 if (!extent_tree_slab)
2111 return -ENOMEM;
2112 extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
2113 sizeof(struct extent_node));
2114 if (!extent_node_slab) {
2115 kmem_cache_destroy(extent_tree_slab);
2116 return -ENOMEM;
2117 }
2118 return 0;
2119}
2120
2121void destroy_extent_cache(void)
2122{
2123 kmem_cache_destroy(extent_node_slab);
2124 kmem_cache_destroy(extent_tree_slab);
2125} 1646}
2126 1647
2127const struct address_space_operations f2fs_dblock_aops = { 1648const struct address_space_operations f2fs_dblock_aops = {
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 75176e0dd6c8..d013d8479753 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -33,8 +33,11 @@ static void update_general_status(struct f2fs_sb_info *sbi)
33 int i; 33 int i;
34 34
35 /* validation check of the segment numbers */ 35 /* validation check of the segment numbers */
36 si->hit_ext = sbi->read_hit_ext; 36 si->hit_largest = atomic_read(&sbi->read_hit_largest);
37 si->total_ext = sbi->total_hit_ext; 37 si->hit_cached = atomic_read(&sbi->read_hit_cached);
38 si->hit_rbtree = atomic_read(&sbi->read_hit_rbtree);
39 si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree;
40 si->total_ext = atomic_read(&sbi->total_hit_ext);
38 si->ext_tree = sbi->total_ext_tree; 41 si->ext_tree = sbi->total_ext_tree;
39 si->ext_node = atomic_read(&sbi->total_ext_node); 42 si->ext_node = atomic_read(&sbi->total_ext_node);
40 si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); 43 si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
@@ -49,6 +52,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
49 si->valid_count = valid_user_blocks(sbi); 52 si->valid_count = valid_user_blocks(sbi);
50 si->valid_node_count = valid_node_count(sbi); 53 si->valid_node_count = valid_node_count(sbi);
51 si->valid_inode_count = valid_inode_count(sbi); 54 si->valid_inode_count = valid_inode_count(sbi);
55 si->inline_xattr = atomic_read(&sbi->inline_xattr);
52 si->inline_inode = atomic_read(&sbi->inline_inode); 56 si->inline_inode = atomic_read(&sbi->inline_inode);
53 si->inline_dir = atomic_read(&sbi->inline_dir); 57 si->inline_dir = atomic_read(&sbi->inline_dir);
54 si->utilization = utilization(sbi); 58 si->utilization = utilization(sbi);
@@ -226,6 +230,8 @@ static int stat_show(struct seq_file *s, void *v)
226 seq_printf(s, "Other: %u)\n - Data: %u\n", 230 seq_printf(s, "Other: %u)\n - Data: %u\n",
227 si->valid_node_count - si->valid_inode_count, 231 si->valid_node_count - si->valid_inode_count,
228 si->valid_count - si->valid_node_count); 232 si->valid_count - si->valid_node_count);
233 seq_printf(s, " - Inline_xattr Inode: %u\n",
234 si->inline_xattr);
229 seq_printf(s, " - Inline_data Inode: %u\n", 235 seq_printf(s, " - Inline_data Inode: %u\n",
230 si->inline_inode); 236 si->inline_inode);
231 seq_printf(s, " - Inline_dentry Inode: %u\n", 237 seq_printf(s, " - Inline_dentry Inode: %u\n",
@@ -276,10 +282,16 @@ static int stat_show(struct seq_file *s, void *v)
276 si->bg_data_blks); 282 si->bg_data_blks);
277 seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks, 283 seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks,
278 si->bg_node_blks); 284 si->bg_node_blks);
279 seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", 285 seq_puts(s, "\nExtent Cache:\n");
280 si->hit_ext, si->total_ext); 286 seq_printf(s, " - Hit Count: L1-1:%d L1-2:%d L2:%d\n",
281 seq_printf(s, "\nExtent Tree Count: %d\n", si->ext_tree); 287 si->hit_largest, si->hit_cached,
282 seq_printf(s, "\nExtent Node Count: %d\n", si->ext_node); 288 si->hit_rbtree);
289 seq_printf(s, " - Hit Ratio: %d%% (%d / %d)\n",
290 !si->total_ext ? 0 :
291 (si->hit_total * 100) / si->total_ext,
292 si->hit_total, si->total_ext);
293 seq_printf(s, " - Inner Struct Count: tree: %d, node: %d\n",
294 si->ext_tree, si->ext_node);
283 seq_puts(s, "\nBalancing F2FS Async:\n"); 295 seq_puts(s, "\nBalancing F2FS Async:\n");
284 seq_printf(s, " - inmem: %4d, wb: %4d\n", 296 seq_printf(s, " - inmem: %4d, wb: %4d\n",
285 si->inmem_pages, si->wb_pages); 297 si->inmem_pages, si->wb_pages);
@@ -366,6 +378,12 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
366 si->sbi = sbi; 378 si->sbi = sbi;
367 sbi->stat_info = si; 379 sbi->stat_info = si;
368 380
381 atomic_set(&sbi->total_hit_ext, 0);
382 atomic_set(&sbi->read_hit_rbtree, 0);
383 atomic_set(&sbi->read_hit_largest, 0);
384 atomic_set(&sbi->read_hit_cached, 0);
385
386 atomic_set(&sbi->inline_xattr, 0);
369 atomic_set(&sbi->inline_inode, 0); 387 atomic_set(&sbi->inline_inode, 0);
370 atomic_set(&sbi->inline_dir, 0); 388 atomic_set(&sbi->inline_dir, 0);
371 atomic_set(&sbi->inplace_count, 0); 389 atomic_set(&sbi->inplace_count, 0);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index a34ebd8312ab..8f15fc134040 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -718,8 +718,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
718 if (inode) 718 if (inode)
719 f2fs_drop_nlink(dir, inode, NULL); 719 f2fs_drop_nlink(dir, inode, NULL);
720 720
721 if (bit_pos == NR_DENTRY_IN_BLOCK) { 721 if (bit_pos == NR_DENTRY_IN_BLOCK &&
722 truncate_hole(dir, page->index, page->index + 1); 722 !truncate_hole(dir, page->index, page->index + 1)) {
723 clear_page_dirty_for_io(page); 723 clear_page_dirty_for_io(page);
724 ClearPagePrivate(page); 724 ClearPagePrivate(page);
725 ClearPageUptodate(page); 725 ClearPageUptodate(page);
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
new file mode 100644
index 000000000000..997ac86f2a1d
--- /dev/null
+++ b/fs/f2fs/extent_cache.c
@@ -0,0 +1,791 @@
1/*
2 * f2fs extent cache support
3 *
4 * Copyright (c) 2015 Motorola Mobility
5 * Copyright (c) 2015 Samsung Electronics
6 * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
7 * Chao Yu <chao2.yu@samsung.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include <linux/fs.h>
15#include <linux/f2fs_fs.h>
16
17#include "f2fs.h"
18#include "node.h"
19#include <trace/events/f2fs.h>
20
21static struct kmem_cache *extent_tree_slab;
22static struct kmem_cache *extent_node_slab;
23
24static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
25 struct extent_tree *et, struct extent_info *ei,
26 struct rb_node *parent, struct rb_node **p)
27{
28 struct extent_node *en;
29
30 en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
31 if (!en)
32 return NULL;
33
34 en->ei = *ei;
35 INIT_LIST_HEAD(&en->list);
36
37 rb_link_node(&en->rb_node, parent, p);
38 rb_insert_color(&en->rb_node, &et->root);
39 et->count++;
40 atomic_inc(&sbi->total_ext_node);
41 return en;
42}
43
44static void __detach_extent_node(struct f2fs_sb_info *sbi,
45 struct extent_tree *et, struct extent_node *en)
46{
47 rb_erase(&en->rb_node, &et->root);
48 et->count--;
49 atomic_dec(&sbi->total_ext_node);
50
51 if (et->cached_en == en)
52 et->cached_en = NULL;
53}
54
55static struct extent_tree *__grab_extent_tree(struct inode *inode)
56{
57 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
58 struct extent_tree *et;
59 nid_t ino = inode->i_ino;
60
61 down_write(&sbi->extent_tree_lock);
62 et = radix_tree_lookup(&sbi->extent_tree_root, ino);
63 if (!et) {
64 et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
65 f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
66 memset(et, 0, sizeof(struct extent_tree));
67 et->ino = ino;
68 et->root = RB_ROOT;
69 et->cached_en = NULL;
70 rwlock_init(&et->lock);
71 atomic_set(&et->refcount, 0);
72 et->count = 0;
73 sbi->total_ext_tree++;
74 }
75 atomic_inc(&et->refcount);
76 up_write(&sbi->extent_tree_lock);
77
78 /* never died until evict_inode */
79 F2FS_I(inode)->extent_tree = et;
80
81 return et;
82}
83
84static struct extent_node *__lookup_extent_tree(struct f2fs_sb_info *sbi,
85 struct extent_tree *et, unsigned int fofs)
86{
87 struct rb_node *node = et->root.rb_node;
88 struct extent_node *en = et->cached_en;
89
90 if (en) {
91 struct extent_info *cei = &en->ei;
92
93 if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) {
94 stat_inc_cached_node_hit(sbi);
95 return en;
96 }
97 }
98
99 while (node) {
100 en = rb_entry(node, struct extent_node, rb_node);
101
102 if (fofs < en->ei.fofs) {
103 node = node->rb_left;
104 } else if (fofs >= en->ei.fofs + en->ei.len) {
105 node = node->rb_right;
106 } else {
107 stat_inc_rbtree_node_hit(sbi);
108 return en;
109 }
110 }
111 return NULL;
112}
113
114static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
115 struct extent_tree *et, struct extent_info *ei)
116{
117 struct rb_node **p = &et->root.rb_node;
118 struct extent_node *en;
119
120 en = __attach_extent_node(sbi, et, ei, NULL, p);
121 if (!en)
122 return NULL;
123
124 et->largest = en->ei;
125 et->cached_en = en;
126 return en;
127}
128
129static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
130 struct extent_tree *et, bool free_all)
131{
132 struct rb_node *node, *next;
133 struct extent_node *en;
134 unsigned int count = et->count;
135
136 node = rb_first(&et->root);
137 while (node) {
138 next = rb_next(node);
139 en = rb_entry(node, struct extent_node, rb_node);
140
141 if (free_all) {
142 spin_lock(&sbi->extent_lock);
143 if (!list_empty(&en->list))
144 list_del_init(&en->list);
145 spin_unlock(&sbi->extent_lock);
146 }
147
148 if (free_all || list_empty(&en->list)) {
149 __detach_extent_node(sbi, et, en);
150 kmem_cache_free(extent_node_slab, en);
151 }
152 node = next;
153 }
154
155 return count - et->count;
156}
157
158static void __drop_largest_extent(struct inode *inode, pgoff_t fofs)
159{
160 struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest;
161
162 if (largest->fofs <= fofs && largest->fofs + largest->len > fofs)
163 largest->len = 0;
164}
165
166void f2fs_drop_largest_extent(struct inode *inode, pgoff_t fofs)
167{
168 if (!f2fs_may_extent_tree(inode))
169 return;
170
171 __drop_largest_extent(inode, fofs);
172}
173
174void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
175{
176 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
177 struct extent_tree *et;
178 struct extent_node *en;
179 struct extent_info ei;
180
181 if (!f2fs_may_extent_tree(inode))
182 return;
183
184 et = __grab_extent_tree(inode);
185
186 if (!i_ext || le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN)
187 return;
188
189 set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
190 le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
191
192 write_lock(&et->lock);
193 if (et->count)
194 goto out;
195
196 en = __init_extent_tree(sbi, et, &ei);
197 if (en) {
198 spin_lock(&sbi->extent_lock);
199 list_add_tail(&en->list, &sbi->extent_list);
200 spin_unlock(&sbi->extent_lock);
201 }
202out:
203 write_unlock(&et->lock);
204}
205
206static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
207 struct extent_info *ei)
208{
209 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
210 struct extent_tree *et = F2FS_I(inode)->extent_tree;
211 struct extent_node *en;
212 bool ret = false;
213
214 f2fs_bug_on(sbi, !et);
215
216 trace_f2fs_lookup_extent_tree_start(inode, pgofs);
217
218 read_lock(&et->lock);
219
220 if (et->largest.fofs <= pgofs &&
221 et->largest.fofs + et->largest.len > pgofs) {
222 *ei = et->largest;
223 ret = true;
224 stat_inc_largest_node_hit(sbi);
225 goto out;
226 }
227
228 en = __lookup_extent_tree(sbi, et, pgofs);
229 if (en) {
230 *ei = en->ei;
231 spin_lock(&sbi->extent_lock);
232 if (!list_empty(&en->list))
233 list_move_tail(&en->list, &sbi->extent_list);
234 et->cached_en = en;
235 spin_unlock(&sbi->extent_lock);
236 ret = true;
237 }
238out:
239 stat_inc_total_hit(sbi);
240 read_unlock(&et->lock);
241
242 trace_f2fs_lookup_extent_tree_end(inode, pgofs, ei);
243 return ret;
244}
245
246
247/*
248 * lookup extent at @fofs, if hit, return the extent
249 * if not, return NULL and
250 * @prev_ex: extent before fofs
251 * @next_ex: extent after fofs
252 * @insert_p: insert point for new extent at fofs
253 * in order to simpfy the insertion after.
254 * tree must stay unchanged between lookup and insertion.
255 */
256static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et,
257 unsigned int fofs,
258 struct extent_node **prev_ex,
259 struct extent_node **next_ex,
260 struct rb_node ***insert_p,
261 struct rb_node **insert_parent)
262{
263 struct rb_node **pnode = &et->root.rb_node;
264 struct rb_node *parent = NULL, *tmp_node;
265 struct extent_node *en = et->cached_en;
266
267 *insert_p = NULL;
268 *insert_parent = NULL;
269 *prev_ex = NULL;
270 *next_ex = NULL;
271
272 if (RB_EMPTY_ROOT(&et->root))
273 return NULL;
274
275 if (en) {
276 struct extent_info *cei = &en->ei;
277
278 if (cei->fofs <= fofs && cei->fofs + cei->len > fofs)
279 goto lookup_neighbors;
280 }
281
282 while (*pnode) {
283 parent = *pnode;
284 en = rb_entry(*pnode, struct extent_node, rb_node);
285
286 if (fofs < en->ei.fofs)
287 pnode = &(*pnode)->rb_left;
288 else if (fofs >= en->ei.fofs + en->ei.len)
289 pnode = &(*pnode)->rb_right;
290 else
291 goto lookup_neighbors;
292 }
293
294 *insert_p = pnode;
295 *insert_parent = parent;
296
297 en = rb_entry(parent, struct extent_node, rb_node);
298 tmp_node = parent;
299 if (parent && fofs > en->ei.fofs)
300 tmp_node = rb_next(parent);
301 *next_ex = tmp_node ?
302 rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
303
304 tmp_node = parent;
305 if (parent && fofs < en->ei.fofs)
306 tmp_node = rb_prev(parent);
307 *prev_ex = tmp_node ?
308 rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
309 return NULL;
310
311lookup_neighbors:
312 if (fofs == en->ei.fofs) {
313 /* lookup prev node for merging backward later */
314 tmp_node = rb_prev(&en->rb_node);
315 *prev_ex = tmp_node ?
316 rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
317 }
318 if (fofs == en->ei.fofs + en->ei.len - 1) {
319 /* lookup next node for merging frontward later */
320 tmp_node = rb_next(&en->rb_node);
321 *next_ex = tmp_node ?
322 rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
323 }
324 return en;
325}
326
327static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
328 struct extent_tree *et, struct extent_info *ei,
329 struct extent_node **den,
330 struct extent_node *prev_ex,
331 struct extent_node *next_ex)
332{
333 struct extent_node *en = NULL;
334
335 if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) {
336 prev_ex->ei.len += ei->len;
337 ei = &prev_ex->ei;
338 en = prev_ex;
339 }
340
341 if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
342 if (en) {
343 __detach_extent_node(sbi, et, prev_ex);
344 *den = prev_ex;
345 }
346 next_ex->ei.fofs = ei->fofs;
347 next_ex->ei.blk = ei->blk;
348 next_ex->ei.len += ei->len;
349 en = next_ex;
350 }
351
352 if (en) {
353 if (en->ei.len > et->largest.len)
354 et->largest = en->ei;
355 et->cached_en = en;
356 }
357 return en;
358}
359
360static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
361 struct extent_tree *et, struct extent_info *ei,
362 struct rb_node **insert_p,
363 struct rb_node *insert_parent)
364{
365 struct rb_node **p = &et->root.rb_node;
366 struct rb_node *parent = NULL;
367 struct extent_node *en = NULL;
368
369 if (insert_p && insert_parent) {
370 parent = insert_parent;
371 p = insert_p;
372 goto do_insert;
373 }
374
375 while (*p) {
376 parent = *p;
377 en = rb_entry(parent, struct extent_node, rb_node);
378
379 if (ei->fofs < en->ei.fofs)
380 p = &(*p)->rb_left;
381 else if (ei->fofs >= en->ei.fofs + en->ei.len)
382 p = &(*p)->rb_right;
383 else
384 f2fs_bug_on(sbi, 1);
385 }
386do_insert:
387 en = __attach_extent_node(sbi, et, ei, parent, p);
388 if (!en)
389 return NULL;
390
391 if (en->ei.len > et->largest.len)
392 et->largest = en->ei;
393 et->cached_en = en;
394 return en;
395}
396
397unsigned int f2fs_update_extent_tree_range(struct inode *inode,
398 pgoff_t fofs, block_t blkaddr, unsigned int len)
399{
400 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
401 struct extent_tree *et = F2FS_I(inode)->extent_tree;
402 struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
403 struct extent_node *prev_en = NULL, *next_en = NULL;
404 struct extent_info ei, dei, prev;
405 struct rb_node **insert_p = NULL, *insert_parent = NULL;
406 unsigned int end = fofs + len;
407 unsigned int pos = (unsigned int)fofs;
408
409 if (!et)
410 return false;
411
412 write_lock(&et->lock);
413
414 if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) {
415 write_unlock(&et->lock);
416 return false;
417 }
418
419 prev = et->largest;
420 dei.len = 0;
421
422 /* we do not guarantee that the largest extent is cached all the time */
423 __drop_largest_extent(inode, fofs);
424
425 /* 1. lookup first extent node in range [fofs, fofs + len - 1] */
426 en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en,
427 &insert_p, &insert_parent);
428 if (!en) {
429 if (next_en) {
430 en = next_en;
431 f2fs_bug_on(sbi, en->ei.fofs <= pos);
432 pos = en->ei.fofs;
433 } else {
434 /*
435 * skip searching in the tree since there is no
436 * larger extent node in the cache.
437 */
438 goto update_extent;
439 }
440 }
441
442 /* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */
443 while (en) {
444 struct rb_node *node;
445
446 if (pos >= end)
447 break;
448
449 dei = en->ei;
450 en1 = en2 = NULL;
451
452 node = rb_next(&en->rb_node);
453
454 /*
455 * 2.1 there are four cases when we invalidate blkaddr in extent
456 * node, |V: valid address, X: will be invalidated|
457 */
458 /* case#1, invalidate right part of extent node |VVVVVXXXXX| */
459 if (pos > dei.fofs && end >= dei.fofs + dei.len) {
460 en->ei.len = pos - dei.fofs;
461
462 if (en->ei.len < F2FS_MIN_EXTENT_LEN) {
463 __detach_extent_node(sbi, et, en);
464 insert_p = NULL;
465 insert_parent = NULL;
466 goto update;
467 }
468
469 if (__is_extent_same(&dei, &et->largest))
470 et->largest = en->ei;
471 goto next;
472 }
473
474 /* case#2, invalidate left part of extent node |XXXXXVVVVV| */
475 if (pos <= dei.fofs && end < dei.fofs + dei.len) {
476 en->ei.fofs = end;
477 en->ei.blk += end - dei.fofs;
478 en->ei.len -= end - dei.fofs;
479
480 if (en->ei.len < F2FS_MIN_EXTENT_LEN) {
481 __detach_extent_node(sbi, et, en);
482 insert_p = NULL;
483 insert_parent = NULL;
484 goto update;
485 }
486
487 if (__is_extent_same(&dei, &et->largest))
488 et->largest = en->ei;
489 goto next;
490 }
491
492 __detach_extent_node(sbi, et, en);
493
494 /*
495 * if we remove node in rb-tree, our parent node pointer may
496 * point the wrong place, discard them.
497 */
498 insert_p = NULL;
499 insert_parent = NULL;
500
501 /* case#3, invalidate entire extent node |XXXXXXXXXX| */
502 if (pos <= dei.fofs && end >= dei.fofs + dei.len) {
503 if (__is_extent_same(&dei, &et->largest))
504 et->largest.len = 0;
505 goto update;
506 }
507
508 /*
509 * case#4, invalidate data in the middle of extent node
510 * |VVVXXXXVVV|
511 */
512 if (dei.len > F2FS_MIN_EXTENT_LEN) {
513 unsigned int endofs;
514
515 /* insert left part of split extent into cache */
516 if (pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
517 set_extent_info(&ei, dei.fofs, dei.blk,
518 pos - dei.fofs);
519 en1 = __insert_extent_tree(sbi, et, &ei,
520 NULL, NULL);
521 }
522
523 /* insert right part of split extent into cache */
524 endofs = dei.fofs + dei.len;
525 if (endofs - end >= F2FS_MIN_EXTENT_LEN) {
526 set_extent_info(&ei, end,
527 end - dei.fofs + dei.blk,
528 endofs - end);
529 en2 = __insert_extent_tree(sbi, et, &ei,
530 NULL, NULL);
531 }
532 }
533update:
534 /* 2.2 update in global extent list */
535 spin_lock(&sbi->extent_lock);
536 if (en && !list_empty(&en->list))
537 list_del(&en->list);
538 if (en1)
539 list_add_tail(&en1->list, &sbi->extent_list);
540 if (en2)
541 list_add_tail(&en2->list, &sbi->extent_list);
542 spin_unlock(&sbi->extent_lock);
543
544 /* 2.3 release extent node */
545 if (en)
546 kmem_cache_free(extent_node_slab, en);
547next:
548 en = node ? rb_entry(node, struct extent_node, rb_node) : NULL;
549 next_en = en;
550 if (en)
551 pos = en->ei.fofs;
552 }
553
554update_extent:
555 /* 3. update extent in extent cache */
556 if (blkaddr) {
557 struct extent_node *den = NULL;
558
559 set_extent_info(&ei, fofs, blkaddr, len);
560 en3 = __try_merge_extent_node(sbi, et, &ei, &den,
561 prev_en, next_en);
562 if (!en3)
563 en3 = __insert_extent_tree(sbi, et, &ei,
564 insert_p, insert_parent);
565
566 /* give up extent_cache, if split and small updates happen */
567 if (dei.len >= 1 &&
568 prev.len < F2FS_MIN_EXTENT_LEN &&
569 et->largest.len < F2FS_MIN_EXTENT_LEN) {
570 et->largest.len = 0;
571 set_inode_flag(F2FS_I(inode), FI_NO_EXTENT);
572 }
573
574 spin_lock(&sbi->extent_lock);
575 if (en3) {
576 if (list_empty(&en3->list))
577 list_add_tail(&en3->list, &sbi->extent_list);
578 else
579 list_move_tail(&en3->list, &sbi->extent_list);
580 }
581 if (den && !list_empty(&den->list))
582 list_del(&den->list);
583 spin_unlock(&sbi->extent_lock);
584
585 if (den)
586 kmem_cache_free(extent_node_slab, den);
587 }
588
589 if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
590 __free_extent_tree(sbi, et, true);
591
592 write_unlock(&et->lock);
593
594 return !__is_extent_same(&prev, &et->largest);
595}
596
597unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
598{
599 struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
600 struct extent_node *en, *tmp;
601 unsigned long ino = F2FS_ROOT_INO(sbi);
602 struct radix_tree_root *root = &sbi->extent_tree_root;
603 unsigned int found;
604 unsigned int node_cnt = 0, tree_cnt = 0;
605 int remained;
606
607 if (!test_opt(sbi, EXTENT_CACHE))
608 return 0;
609
610 if (!down_write_trylock(&sbi->extent_tree_lock))
611 goto out;
612
613 /* 1. remove unreferenced extent tree */
614 while ((found = radix_tree_gang_lookup(root,
615 (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
616 unsigned i;
617
618 ino = treevec[found - 1]->ino + 1;
619 for (i = 0; i < found; i++) {
620 struct extent_tree *et = treevec[i];
621
622 if (!atomic_read(&et->refcount)) {
623 write_lock(&et->lock);
624 node_cnt += __free_extent_tree(sbi, et, true);
625 write_unlock(&et->lock);
626
627 radix_tree_delete(root, et->ino);
628 kmem_cache_free(extent_tree_slab, et);
629 sbi->total_ext_tree--;
630 tree_cnt++;
631
632 if (node_cnt + tree_cnt >= nr_shrink)
633 goto unlock_out;
634 }
635 }
636 }
637 up_write(&sbi->extent_tree_lock);
638
639 /* 2. remove LRU extent entries */
640 if (!down_write_trylock(&sbi->extent_tree_lock))
641 goto out;
642
643 remained = nr_shrink - (node_cnt + tree_cnt);
644
645 spin_lock(&sbi->extent_lock);
646 list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
647 if (!remained--)
648 break;
649 list_del_init(&en->list);
650 }
651 spin_unlock(&sbi->extent_lock);
652
653 while ((found = radix_tree_gang_lookup(root,
654 (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
655 unsigned i;
656
657 ino = treevec[found - 1]->ino + 1;
658 for (i = 0; i < found; i++) {
659 struct extent_tree *et = treevec[i];
660
661 write_lock(&et->lock);
662 node_cnt += __free_extent_tree(sbi, et, false);
663 write_unlock(&et->lock);
664
665 if (node_cnt + tree_cnt >= nr_shrink)
666 break;
667 }
668 }
669unlock_out:
670 up_write(&sbi->extent_tree_lock);
671out:
672 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
673
674 return node_cnt + tree_cnt;
675}
676
677unsigned int f2fs_destroy_extent_node(struct inode *inode)
678{
679 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
680 struct extent_tree *et = F2FS_I(inode)->extent_tree;
681 unsigned int node_cnt = 0;
682
683 if (!et)
684 return 0;
685
686 write_lock(&et->lock);
687 node_cnt = __free_extent_tree(sbi, et, true);
688 write_unlock(&et->lock);
689
690 return node_cnt;
691}
692
693void f2fs_destroy_extent_tree(struct inode *inode)
694{
695 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
696 struct extent_tree *et = F2FS_I(inode)->extent_tree;
697 unsigned int node_cnt = 0;
698
699 if (!et)
700 return;
701
702 if (inode->i_nlink && !is_bad_inode(inode) && et->count) {
703 atomic_dec(&et->refcount);
704 return;
705 }
706
707 /* free all extent info belong to this extent tree */
708 node_cnt = f2fs_destroy_extent_node(inode);
709
710 /* delete extent tree entry in radix tree */
711 down_write(&sbi->extent_tree_lock);
712 atomic_dec(&et->refcount);
713 f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
714 radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
715 kmem_cache_free(extent_tree_slab, et);
716 sbi->total_ext_tree--;
717 up_write(&sbi->extent_tree_lock);
718
719 F2FS_I(inode)->extent_tree = NULL;
720
721 trace_f2fs_destroy_extent_tree(inode, node_cnt);
722}
723
724bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
725 struct extent_info *ei)
726{
727 if (!f2fs_may_extent_tree(inode))
728 return false;
729
730 return f2fs_lookup_extent_tree(inode, pgofs, ei);
731}
732
733void f2fs_update_extent_cache(struct dnode_of_data *dn)
734{
735 struct f2fs_inode_info *fi = F2FS_I(dn->inode);
736 pgoff_t fofs;
737
738 if (!f2fs_may_extent_tree(dn->inode))
739 return;
740
741 f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
742
743
744 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
745 dn->ofs_in_node;
746
747 if (f2fs_update_extent_tree_range(dn->inode, fofs, dn->data_blkaddr, 1))
748 sync_inode_page(dn);
749}
750
751void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
752 pgoff_t fofs, block_t blkaddr, unsigned int len)
753
754{
755 if (!f2fs_may_extent_tree(dn->inode))
756 return;
757
758 if (f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len))
759 sync_inode_page(dn);
760}
761
762void init_extent_cache_info(struct f2fs_sb_info *sbi)
763{
764 INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
765 init_rwsem(&sbi->extent_tree_lock);
766 INIT_LIST_HEAD(&sbi->extent_list);
767 spin_lock_init(&sbi->extent_lock);
768 sbi->total_ext_tree = 0;
769 atomic_set(&sbi->total_ext_node, 0);
770}
771
772int __init create_extent_cache(void)
773{
774 extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
775 sizeof(struct extent_tree));
776 if (!extent_tree_slab)
777 return -ENOMEM;
778 extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
779 sizeof(struct extent_node));
780 if (!extent_node_slab) {
781 kmem_cache_destroy(extent_tree_slab);
782 return -ENOMEM;
783 }
784 return 0;
785}
786
787void destroy_extent_cache(void)
788{
789 kmem_cache_destroy(extent_node_slab);
790 kmem_cache_destroy(extent_tree_slab);
791}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index a8327ed73898..f1a90ffd7cad 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -19,6 +19,7 @@
19#include <linux/magic.h> 19#include <linux/magic.h>
20#include <linux/kobject.h> 20#include <linux/kobject.h>
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/bio.h>
22 23
23#ifdef CONFIG_F2FS_CHECK_FS 24#ifdef CONFIG_F2FS_CHECK_FS
24#define f2fs_bug_on(sbi, condition) BUG_ON(condition) 25#define f2fs_bug_on(sbi, condition) BUG_ON(condition)
@@ -228,6 +229,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
228#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) 229#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3)
229#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) 230#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4)
230#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) 231#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
232#define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6)
231 233
232#define F2FS_IOC_SET_ENCRYPTION_POLICY \ 234#define F2FS_IOC_SET_ENCRYPTION_POLICY \
233 _IOR('f', 19, struct f2fs_encryption_policy) 235 _IOR('f', 19, struct f2fs_encryption_policy)
@@ -320,7 +322,7 @@ enum {
320 */ 322 */
321}; 323};
322 324
323#define F2FS_LINK_MAX 32000 /* maximum link count per file */ 325#define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */
324 326
325#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ 327#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
326 328
@@ -349,6 +351,7 @@ struct extent_tree {
349 nid_t ino; /* inode number */ 351 nid_t ino; /* inode number */
350 struct rb_root root; /* root of extent info rb-tree */ 352 struct rb_root root; /* root of extent info rb-tree */
351 struct extent_node *cached_en; /* recently accessed extent node */ 353 struct extent_node *cached_en; /* recently accessed extent node */
354 struct extent_info largest; /* largested extent info */
352 rwlock_t lock; /* protect extent info rb-tree */ 355 rwlock_t lock; /* protect extent info rb-tree */
353 atomic_t refcount; /* reference count of rb-tree */ 356 atomic_t refcount; /* reference count of rb-tree */
354 unsigned int count; /* # of extent node in rb-tree*/ 357 unsigned int count; /* # of extent node in rb-tree*/
@@ -372,6 +375,12 @@ struct f2fs_map_blocks {
372 unsigned int m_flags; 375 unsigned int m_flags;
373}; 376};
374 377
378/* for flag in get_data_block */
379#define F2FS_GET_BLOCK_READ 0
380#define F2FS_GET_BLOCK_DIO 1
381#define F2FS_GET_BLOCK_FIEMAP 2
382#define F2FS_GET_BLOCK_BMAP 3
383
375/* 384/*
376 * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. 385 * i_advise uses FADVISE_XXX_BIT. We can add additional hints later.
377 */ 386 */
@@ -420,14 +429,13 @@ struct f2fs_inode_info {
420 unsigned int clevel; /* maximum level of given file name */ 429 unsigned int clevel; /* maximum level of given file name */
421 nid_t i_xattr_nid; /* node id that contains xattrs */ 430 nid_t i_xattr_nid; /* node id that contains xattrs */
422 unsigned long long xattr_ver; /* cp version of xattr modification */ 431 unsigned long long xattr_ver; /* cp version of xattr modification */
423 struct extent_info ext; /* in-memory extent cache entry */
424 rwlock_t ext_lock; /* rwlock for single extent cache */
425 struct inode_entry *dirty_dir; /* the pointer of dirty dir */ 432 struct inode_entry *dirty_dir; /* the pointer of dirty dir */
426 433
427 struct radix_tree_root inmem_root; /* radix tree for inmem pages */
428 struct list_head inmem_pages; /* inmemory pages managed by f2fs */ 434 struct list_head inmem_pages; /* inmemory pages managed by f2fs */
429 struct mutex inmem_lock; /* lock for inmemory pages */ 435 struct mutex inmem_lock; /* lock for inmemory pages */
430 436
437 struct extent_tree *extent_tree; /* cached extent_tree entry */
438
431#ifdef CONFIG_F2FS_FS_ENCRYPTION 439#ifdef CONFIG_F2FS_FS_ENCRYPTION
432 /* Encryption params */ 440 /* Encryption params */
433 struct f2fs_crypt_info *i_crypt_info; 441 struct f2fs_crypt_info *i_crypt_info;
@@ -779,7 +787,11 @@ struct f2fs_sb_info {
779 unsigned int segment_count[2]; /* # of allocated segments */ 787 unsigned int segment_count[2]; /* # of allocated segments */
780 unsigned int block_count[2]; /* # of allocated blocks */ 788 unsigned int block_count[2]; /* # of allocated blocks */
781 atomic_t inplace_count; /* # of inplace update */ 789 atomic_t inplace_count; /* # of inplace update */
782 int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ 790 atomic_t total_hit_ext; /* # of lookup extent cache */
791 atomic_t read_hit_rbtree; /* # of hit rbtree extent node */
792 atomic_t read_hit_largest; /* # of hit largest extent node */
793 atomic_t read_hit_cached; /* # of hit cached extent node */
794 atomic_t inline_xattr; /* # of inline_xattr inodes */
783 atomic_t inline_inode; /* # of inline_data inodes */ 795 atomic_t inline_inode; /* # of inline_data inodes */
784 atomic_t inline_dir; /* # of inline_dentry inodes */ 796 atomic_t inline_dir; /* # of inline_dentry inodes */
785 int bg_gc; /* background gc calls */ 797 int bg_gc; /* background gc calls */
@@ -791,6 +803,11 @@ struct f2fs_sb_info {
791 /* For sysfs suppport */ 803 /* For sysfs suppport */
792 struct kobject s_kobj; 804 struct kobject s_kobj;
793 struct completion s_kobj_unregister; 805 struct completion s_kobj_unregister;
806
807 /* For shrinker support */
808 struct list_head s_list;
809 struct mutex umount_mutex;
810 unsigned int shrinker_run_no;
794}; 811};
795 812
796/* 813/*
@@ -1039,7 +1056,8 @@ static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
1039 1056
1040static inline void inode_dec_dirty_pages(struct inode *inode) 1057static inline void inode_dec_dirty_pages(struct inode *inode)
1041{ 1058{
1042 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) 1059 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
1060 !S_ISLNK(inode->i_mode))
1043 return; 1061 return;
1044 1062
1045 atomic_dec(&F2FS_I(inode)->dirty_pages); 1063 atomic_dec(&F2FS_I(inode)->dirty_pages);
@@ -1234,16 +1252,24 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
1234 gfp_t flags) 1252 gfp_t flags)
1235{ 1253{
1236 void *entry; 1254 void *entry;
1237retry:
1238 entry = kmem_cache_alloc(cachep, flags);
1239 if (!entry) {
1240 cond_resched();
1241 goto retry;
1242 }
1243 1255
1256 entry = kmem_cache_alloc(cachep, flags);
1257 if (!entry)
1258 entry = kmem_cache_alloc(cachep, flags | __GFP_NOFAIL);
1244 return entry; 1259 return entry;
1245} 1260}
1246 1261
1262static inline struct bio *f2fs_bio_alloc(int npages)
1263{
1264 struct bio *bio;
1265
1266 /* No failure on bio allocation */
1267 bio = bio_alloc(GFP_NOIO, npages);
1268 if (!bio)
1269 bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages);
1270 return bio;
1271}
1272
1247static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, 1273static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
1248 unsigned long index, void *item) 1274 unsigned long index, void *item)
1249{ 1275{
@@ -1342,6 +1368,7 @@ enum {
1342 FI_INC_LINK, /* need to increment i_nlink */ 1368 FI_INC_LINK, /* need to increment i_nlink */
1343 FI_ACL_MODE, /* indicate acl mode */ 1369 FI_ACL_MODE, /* indicate acl mode */
1344 FI_NO_ALLOC, /* should not allocate any blocks */ 1370 FI_NO_ALLOC, /* should not allocate any blocks */
1371 FI_FREE_NID, /* free allocated nide */
1345 FI_UPDATE_DIR, /* should update inode block for consistency */ 1372 FI_UPDATE_DIR, /* should update inode block for consistency */
1346 FI_DELAY_IPUT, /* used for the recovery */ 1373 FI_DELAY_IPUT, /* used for the recovery */
1347 FI_NO_EXTENT, /* not to use the extent cache */ 1374 FI_NO_EXTENT, /* not to use the extent cache */
@@ -1541,6 +1568,17 @@ static inline bool is_dot_dotdot(const struct qstr *str)
1541 return false; 1568 return false;
1542} 1569}
1543 1570
1571static inline bool f2fs_may_extent_tree(struct inode *inode)
1572{
1573 mode_t mode = inode->i_mode;
1574
1575 if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) ||
1576 is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
1577 return false;
1578
1579 return S_ISREG(mode);
1580}
1581
1544#define get_inode_mode(i) \ 1582#define get_inode_mode(i) \
1545 ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ 1583 ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
1546 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) 1584 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
@@ -1557,7 +1595,7 @@ static inline bool is_dot_dotdot(const struct qstr *str)
1557int f2fs_sync_file(struct file *, loff_t, loff_t, int); 1595int f2fs_sync_file(struct file *, loff_t, loff_t, int);
1558void truncate_data_blocks(struct dnode_of_data *); 1596void truncate_data_blocks(struct dnode_of_data *);
1559int truncate_blocks(struct inode *, u64, bool); 1597int truncate_blocks(struct inode *, u64, bool);
1560void f2fs_truncate(struct inode *); 1598int f2fs_truncate(struct inode *, bool);
1561int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 1599int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
1562int f2fs_setattr(struct dentry *, struct iattr *); 1600int f2fs_setattr(struct dentry *, struct iattr *);
1563int truncate_hole(struct inode *, pgoff_t, pgoff_t); 1601int truncate_hole(struct inode *, pgoff_t, pgoff_t);
@@ -1649,7 +1687,7 @@ int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
1649int truncate_inode_blocks(struct inode *, pgoff_t); 1687int truncate_inode_blocks(struct inode *, pgoff_t);
1650int truncate_xattr_node(struct inode *, struct page *); 1688int truncate_xattr_node(struct inode *, struct page *);
1651int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); 1689int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
1652void remove_inode_page(struct inode *); 1690int remove_inode_page(struct inode *);
1653struct page *new_inode_page(struct inode *); 1691struct page *new_inode_page(struct inode *);
1654struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); 1692struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
1655void ra_node_page(struct f2fs_sb_info *, nid_t); 1693void ra_node_page(struct f2fs_sb_info *, nid_t);
@@ -1660,6 +1698,7 @@ int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *);
1660bool alloc_nid(struct f2fs_sb_info *, nid_t *); 1698bool alloc_nid(struct f2fs_sb_info *, nid_t *);
1661void alloc_nid_done(struct f2fs_sb_info *, nid_t); 1699void alloc_nid_done(struct f2fs_sb_info *, nid_t);
1662void alloc_nid_failed(struct f2fs_sb_info *, nid_t); 1700void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
1701int try_to_free_nids(struct f2fs_sb_info *, int);
1663void recover_inline_xattr(struct inode *, struct page *); 1702void recover_inline_xattr(struct inode *, struct page *);
1664void recover_xattr_data(struct inode *, struct page *, block_t); 1703void recover_xattr_data(struct inode *, struct page *, block_t);
1665int recover_inode_page(struct f2fs_sb_info *, struct page *); 1704int recover_inode_page(struct f2fs_sb_info *, struct page *);
@@ -1675,7 +1714,7 @@ void destroy_node_manager_caches(void);
1675 * segment.c 1714 * segment.c
1676 */ 1715 */
1677void register_inmem_page(struct inode *, struct page *); 1716void register_inmem_page(struct inode *, struct page *);
1678void commit_inmem_pages(struct inode *, bool); 1717int commit_inmem_pages(struct inode *, bool);
1679void f2fs_balance_fs(struct f2fs_sb_info *); 1718void f2fs_balance_fs(struct f2fs_sb_info *);
1680void f2fs_balance_fs_bg(struct f2fs_sb_info *); 1719void f2fs_balance_fs_bg(struct f2fs_sb_info *);
1681int f2fs_issue_flush(struct f2fs_sb_info *); 1720int f2fs_issue_flush(struct f2fs_sb_info *);
@@ -1685,7 +1724,7 @@ void invalidate_blocks(struct f2fs_sb_info *, block_t);
1685void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); 1724void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
1686void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *); 1725void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
1687void release_discard_addrs(struct f2fs_sb_info *); 1726void release_discard_addrs(struct f2fs_sb_info *);
1688void discard_next_dnode(struct f2fs_sb_info *, block_t); 1727bool discard_next_dnode(struct f2fs_sb_info *, block_t);
1689int npages_for_summary_flush(struct f2fs_sb_info *, bool); 1728int npages_for_summary_flush(struct f2fs_sb_info *, bool);
1690void allocate_new_segments(struct f2fs_sb_info *); 1729void allocate_new_segments(struct f2fs_sb_info *);
1691int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); 1730int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
@@ -1727,7 +1766,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *);
1727void release_orphan_inode(struct f2fs_sb_info *); 1766void release_orphan_inode(struct f2fs_sb_info *);
1728void add_orphan_inode(struct f2fs_sb_info *, nid_t); 1767void add_orphan_inode(struct f2fs_sb_info *, nid_t);
1729void remove_orphan_inode(struct f2fs_sb_info *, nid_t); 1768void remove_orphan_inode(struct f2fs_sb_info *, nid_t);
1730void recover_orphan_inodes(struct f2fs_sb_info *); 1769int recover_orphan_inodes(struct f2fs_sb_info *);
1731int get_valid_checkpoint(struct f2fs_sb_info *); 1770int get_valid_checkpoint(struct f2fs_sb_info *);
1732void update_dirty_page(struct inode *, struct page *); 1771void update_dirty_page(struct inode *, struct page *);
1733void add_dirty_dir_inode(struct inode *); 1772void add_dirty_dir_inode(struct inode *);
@@ -1746,21 +1785,14 @@ int f2fs_submit_page_bio(struct f2fs_io_info *);
1746void f2fs_submit_page_mbio(struct f2fs_io_info *); 1785void f2fs_submit_page_mbio(struct f2fs_io_info *);
1747void set_data_blkaddr(struct dnode_of_data *); 1786void set_data_blkaddr(struct dnode_of_data *);
1748int reserve_new_block(struct dnode_of_data *); 1787int reserve_new_block(struct dnode_of_data *);
1788int f2fs_get_block(struct dnode_of_data *, pgoff_t);
1749int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); 1789int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
1750void f2fs_shrink_extent_tree(struct f2fs_sb_info *, int);
1751void f2fs_destroy_extent_tree(struct inode *);
1752void f2fs_init_extent_cache(struct inode *, struct f2fs_extent *);
1753void f2fs_update_extent_cache(struct dnode_of_data *);
1754void f2fs_preserve_extent_tree(struct inode *);
1755struct page *get_read_data_page(struct inode *, pgoff_t, int); 1790struct page *get_read_data_page(struct inode *, pgoff_t, int);
1756struct page *find_data_page(struct inode *, pgoff_t); 1791struct page *find_data_page(struct inode *, pgoff_t);
1757struct page *get_lock_data_page(struct inode *, pgoff_t); 1792struct page *get_lock_data_page(struct inode *, pgoff_t);
1758struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); 1793struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
1759int do_write_data_page(struct f2fs_io_info *); 1794int do_write_data_page(struct f2fs_io_info *);
1760int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); 1795int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
1761void init_extent_cache_info(struct f2fs_sb_info *);
1762int __init create_extent_cache(void);
1763void destroy_extent_cache(void);
1764void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); 1796void f2fs_invalidate_page(struct page *, unsigned int, unsigned int);
1765int f2fs_release_page(struct page *, gfp_t); 1797int f2fs_release_page(struct page *, gfp_t);
1766 1798
@@ -1788,11 +1820,13 @@ struct f2fs_stat_info {
1788 struct f2fs_sb_info *sbi; 1820 struct f2fs_sb_info *sbi;
1789 int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; 1821 int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
1790 int main_area_segs, main_area_sections, main_area_zones; 1822 int main_area_segs, main_area_sections, main_area_zones;
1791 int hit_ext, total_ext, ext_tree, ext_node; 1823 int hit_largest, hit_cached, hit_rbtree, hit_total, total_ext;
1824 int ext_tree, ext_node;
1792 int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; 1825 int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
1793 int nats, dirty_nats, sits, dirty_sits, fnids; 1826 int nats, dirty_nats, sits, dirty_sits, fnids;
1794 int total_count, utilization; 1827 int total_count, utilization;
1795 int bg_gc, inline_inode, inline_dir, inmem_pages, wb_pages; 1828 int bg_gc, inmem_pages, wb_pages;
1829 int inline_xattr, inline_inode, inline_dir;
1796 unsigned int valid_count, valid_node_count, valid_inode_count; 1830 unsigned int valid_count, valid_node_count, valid_inode_count;
1797 unsigned int bimodal, avg_vblocks; 1831 unsigned int bimodal, avg_vblocks;
1798 int util_free, util_valid, util_invalid; 1832 int util_free, util_valid, util_invalid;
@@ -1823,8 +1857,20 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
1823#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) 1857#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
1824#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++) 1858#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++)
1825#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--) 1859#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--)
1826#define stat_inc_total_hit(sb) ((F2FS_SB(sb))->total_hit_ext++) 1860#define stat_inc_total_hit(sbi) (atomic_inc(&(sbi)->total_hit_ext))
1827#define stat_inc_read_hit(sb) ((F2FS_SB(sb))->read_hit_ext++) 1861#define stat_inc_rbtree_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_rbtree))
1862#define stat_inc_largest_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_largest))
1863#define stat_inc_cached_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_cached))
1864#define stat_inc_inline_xattr(inode) \
1865 do { \
1866 if (f2fs_has_inline_xattr(inode)) \
1867 (atomic_inc(&F2FS_I_SB(inode)->inline_xattr)); \
1868 } while (0)
1869#define stat_dec_inline_xattr(inode) \
1870 do { \
1871 if (f2fs_has_inline_xattr(inode)) \
1872 (atomic_dec(&F2FS_I_SB(inode)->inline_xattr)); \
1873 } while (0)
1828#define stat_inc_inline_inode(inode) \ 1874#define stat_inc_inline_inode(inode) \
1829 do { \ 1875 do { \
1830 if (f2fs_has_inline_data(inode)) \ 1876 if (f2fs_has_inline_data(inode)) \
@@ -1894,7 +1940,11 @@ void f2fs_destroy_root_stats(void);
1894#define stat_inc_dirty_dir(sbi) 1940#define stat_inc_dirty_dir(sbi)
1895#define stat_dec_dirty_dir(sbi) 1941#define stat_dec_dirty_dir(sbi)
1896#define stat_inc_total_hit(sb) 1942#define stat_inc_total_hit(sb)
1897#define stat_inc_read_hit(sb) 1943#define stat_inc_rbtree_node_hit(sb)
1944#define stat_inc_largest_node_hit(sbi)
1945#define stat_inc_cached_node_hit(sbi)
1946#define stat_inc_inline_xattr(inode)
1947#define stat_dec_inline_xattr(inode)
1898#define stat_inc_inline_inode(inode) 1948#define stat_inc_inline_inode(inode)
1899#define stat_dec_inline_inode(inode) 1949#define stat_dec_inline_inode(inode)
1900#define stat_inc_inline_dir(inode) 1950#define stat_inc_inline_dir(inode)
@@ -1950,6 +2000,30 @@ int f2fs_read_inline_dir(struct file *, struct dir_context *,
1950 struct f2fs_str *); 2000 struct f2fs_str *);
1951 2001
1952/* 2002/*
2003 * shrinker.c
2004 */
2005unsigned long f2fs_shrink_count(struct shrinker *, struct shrink_control *);
2006unsigned long f2fs_shrink_scan(struct shrinker *, struct shrink_control *);
2007void f2fs_join_shrinker(struct f2fs_sb_info *);
2008void f2fs_leave_shrinker(struct f2fs_sb_info *);
2009
2010/*
2011 * extent_cache.c
2012 */
2013unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int);
2014void f2fs_drop_largest_extent(struct inode *, pgoff_t);
2015void f2fs_init_extent_tree(struct inode *, struct f2fs_extent *);
2016unsigned int f2fs_destroy_extent_node(struct inode *);
2017void f2fs_destroy_extent_tree(struct inode *);
2018bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *);
2019void f2fs_update_extent_cache(struct dnode_of_data *);
2020void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
2021 pgoff_t, block_t, unsigned int);
2022void init_extent_cache_info(struct f2fs_sb_info *);
2023int __init create_extent_cache(void);
2024void destroy_extent_cache(void);
2025
2026/*
1953 * crypto support 2027 * crypto support
1954 */ 2028 */
1955static inline int f2fs_encrypted_inode(struct inode *inode) 2029static inline int f2fs_encrypted_inode(struct inode *inode)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index b0f38c3b37f4..8120f8685141 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -27,6 +27,7 @@
27#include "segment.h" 27#include "segment.h"
28#include "xattr.h" 28#include "xattr.h"
29#include "acl.h" 29#include "acl.h"
30#include "gc.h"
30#include "trace.h" 31#include "trace.h"
31#include <trace/events/f2fs.h> 32#include <trace/events/f2fs.h>
32 33
@@ -85,6 +86,8 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
85mapped: 86mapped:
86 /* fill the page */ 87 /* fill the page */
87 f2fs_wait_on_page_writeback(page, DATA); 88 f2fs_wait_on_page_writeback(page, DATA);
89 /* if gced page is attached, don't write to cold segment */
90 clear_cold_data(page);
88out: 91out:
89 sb_end_pagefault(inode->i_sb); 92 sb_end_pagefault(inode->i_sb);
90 return block_page_mkwrite_return(err); 93 return block_page_mkwrite_return(err);
@@ -203,8 +206,8 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
203 } 206 }
204 207
205 /* if the inode is dirty, let's recover all the time */ 208 /* if the inode is dirty, let's recover all the time */
206 if (!datasync && is_inode_flag_set(fi, FI_DIRTY_INODE)) { 209 if (!datasync) {
207 update_inode_page(inode); 210 f2fs_write_inode(inode, NULL);
208 goto go_write; 211 goto go_write;
209 } 212 }
210 213
@@ -442,9 +445,9 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
442 445
443int truncate_data_blocks_range(struct dnode_of_data *dn, int count) 446int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
444{ 447{
445 int nr_free = 0, ofs = dn->ofs_in_node;
446 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 448 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
447 struct f2fs_node *raw_node; 449 struct f2fs_node *raw_node;
450 int nr_free = 0, ofs = dn->ofs_in_node, len = count;
448 __le32 *addr; 451 __le32 *addr;
449 452
450 raw_node = F2FS_NODE(dn->node_page); 453 raw_node = F2FS_NODE(dn->node_page);
@@ -457,14 +460,22 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
457 460
458 dn->data_blkaddr = NULL_ADDR; 461 dn->data_blkaddr = NULL_ADDR;
459 set_data_blkaddr(dn); 462 set_data_blkaddr(dn);
460 f2fs_update_extent_cache(dn);
461 invalidate_blocks(sbi, blkaddr); 463 invalidate_blocks(sbi, blkaddr);
462 if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) 464 if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
463 clear_inode_flag(F2FS_I(dn->inode), 465 clear_inode_flag(F2FS_I(dn->inode),
464 FI_FIRST_BLOCK_WRITTEN); 466 FI_FIRST_BLOCK_WRITTEN);
465 nr_free++; 467 nr_free++;
466 } 468 }
469
467 if (nr_free) { 470 if (nr_free) {
471 pgoff_t fofs;
472 /*
473 * once we invalidate valid blkaddr in range [ofs, ofs + count],
474 * we will invalidate all blkaddr in the whole range.
475 */
476 fofs = start_bidx_of_node(ofs_of_node(dn->node_page),
477 F2FS_I(dn->inode)) + ofs;
478 f2fs_update_extent_cache_range(dn, fofs, 0, len);
468 dec_valid_block_count(sbi, dn->inode, nr_free); 479 dec_valid_block_count(sbi, dn->inode, nr_free);
469 set_page_dirty(dn->node_page); 480 set_page_dirty(dn->node_page);
470 sync_inode_page(dn); 481 sync_inode_page(dn);
@@ -576,24 +587,30 @@ out:
576 return err; 587 return err;
577} 588}
578 589
579void f2fs_truncate(struct inode *inode) 590int f2fs_truncate(struct inode *inode, bool lock)
580{ 591{
592 int err;
593
581 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 594 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
582 S_ISLNK(inode->i_mode))) 595 S_ISLNK(inode->i_mode)))
583 return; 596 return 0;
584 597
585 trace_f2fs_truncate(inode); 598 trace_f2fs_truncate(inode);
586 599
587 /* we should check inline_data size */ 600 /* we should check inline_data size */
588 if (f2fs_has_inline_data(inode) && !f2fs_may_inline_data(inode)) { 601 if (f2fs_has_inline_data(inode) && !f2fs_may_inline_data(inode)) {
589 if (f2fs_convert_inline_inode(inode)) 602 err = f2fs_convert_inline_inode(inode);
590 return; 603 if (err)
604 return err;
591 } 605 }
592 606
593 if (!truncate_blocks(inode, i_size_read(inode), true)) { 607 err = truncate_blocks(inode, i_size_read(inode), lock);
594 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 608 if (err)
595 mark_inode_dirty(inode); 609 return err;
596 } 610
611 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
612 mark_inode_dirty(inode);
613 return 0;
597} 614}
598 615
599int f2fs_getattr(struct vfsmount *mnt, 616int f2fs_getattr(struct vfsmount *mnt,
@@ -653,7 +670,9 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
653 670
654 if (attr->ia_size <= i_size_read(inode)) { 671 if (attr->ia_size <= i_size_read(inode)) {
655 truncate_setsize(inode, attr->ia_size); 672 truncate_setsize(inode, attr->ia_size);
656 f2fs_truncate(inode); 673 err = f2fs_truncate(inode, true);
674 if (err)
675 return err;
657 f2fs_balance_fs(F2FS_I_SB(inode)); 676 f2fs_balance_fs(F2FS_I_SB(inode));
658 } else { 677 } else {
659 /* 678 /*
@@ -692,14 +711,14 @@ const struct inode_operations f2fs_file_inode_operations = {
692 .fiemap = f2fs_fiemap, 711 .fiemap = f2fs_fiemap,
693}; 712};
694 713
695static void fill_zero(struct inode *inode, pgoff_t index, 714static int fill_zero(struct inode *inode, pgoff_t index,
696 loff_t start, loff_t len) 715 loff_t start, loff_t len)
697{ 716{
698 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 717 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
699 struct page *page; 718 struct page *page;
700 719
701 if (!len) 720 if (!len)
702 return; 721 return 0;
703 722
704 f2fs_balance_fs(sbi); 723 f2fs_balance_fs(sbi);
705 724
@@ -707,12 +726,14 @@ static void fill_zero(struct inode *inode, pgoff_t index,
707 page = get_new_data_page(inode, NULL, index, false); 726 page = get_new_data_page(inode, NULL, index, false);
708 f2fs_unlock_op(sbi); 727 f2fs_unlock_op(sbi);
709 728
710 if (!IS_ERR(page)) { 729 if (IS_ERR(page))
711 f2fs_wait_on_page_writeback(page, DATA); 730 return PTR_ERR(page);
712 zero_user(page, start, len); 731
713 set_page_dirty(page); 732 f2fs_wait_on_page_writeback(page, DATA);
714 f2fs_put_page(page, 1); 733 zero_user(page, start, len);
715 } 734 set_page_dirty(page);
735 f2fs_put_page(page, 1);
736 return 0;
716} 737}
717 738
718int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) 739int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
@@ -760,14 +781,22 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
760 off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); 781 off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
761 782
762 if (pg_start == pg_end) { 783 if (pg_start == pg_end) {
763 fill_zero(inode, pg_start, off_start, 784 ret = fill_zero(inode, pg_start, off_start,
764 off_end - off_start); 785 off_end - off_start);
786 if (ret)
787 return ret;
765 } else { 788 } else {
766 if (off_start) 789 if (off_start) {
767 fill_zero(inode, pg_start++, off_start, 790 ret = fill_zero(inode, pg_start++, off_start,
768 PAGE_CACHE_SIZE - off_start); 791 PAGE_CACHE_SIZE - off_start);
769 if (off_end) 792 if (ret)
770 fill_zero(inode, pg_end, 0, off_end); 793 return ret;
794 }
795 if (off_end) {
796 ret = fill_zero(inode, pg_end, 0, off_end);
797 if (ret)
798 return ret;
799 }
771 800
772 if (pg_start < pg_end) { 801 if (pg_start < pg_end) {
773 struct address_space *mapping = inode->i_mapping; 802 struct address_space *mapping = inode->i_mapping;
@@ -797,11 +826,11 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
797 pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; 826 pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
798 int ret = 0; 827 int ret = 0;
799 828
800 f2fs_lock_op(sbi);
801
802 for (; end < nrpages; start++, end++) { 829 for (; end < nrpages; start++, end++) {
803 block_t new_addr, old_addr; 830 block_t new_addr, old_addr;
804 831
832 f2fs_lock_op(sbi);
833
805 set_new_dnode(&dn, inode, NULL, NULL, 0); 834 set_new_dnode(&dn, inode, NULL, NULL, 0);
806 ret = get_dnode_of_data(&dn, end, LOOKUP_NODE_RA); 835 ret = get_dnode_of_data(&dn, end, LOOKUP_NODE_RA);
807 if (ret && ret != -ENOENT) { 836 if (ret && ret != -ENOENT) {
@@ -817,13 +846,16 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
817 if (new_addr == NULL_ADDR) { 846 if (new_addr == NULL_ADDR) {
818 set_new_dnode(&dn, inode, NULL, NULL, 0); 847 set_new_dnode(&dn, inode, NULL, NULL, 0);
819 ret = get_dnode_of_data(&dn, start, LOOKUP_NODE_RA); 848 ret = get_dnode_of_data(&dn, start, LOOKUP_NODE_RA);
820 if (ret && ret != -ENOENT) 849 if (ret && ret != -ENOENT) {
821 goto out; 850 goto out;
822 else if (ret == -ENOENT) 851 } else if (ret == -ENOENT) {
852 f2fs_unlock_op(sbi);
823 continue; 853 continue;
854 }
824 855
825 if (dn.data_blkaddr == NULL_ADDR) { 856 if (dn.data_blkaddr == NULL_ADDR) {
826 f2fs_put_dnode(&dn); 857 f2fs_put_dnode(&dn);
858 f2fs_unlock_op(sbi);
827 continue; 859 continue;
828 } else { 860 } else {
829 truncate_data_blocks_range(&dn, 1); 861 truncate_data_blocks_range(&dn, 1);
@@ -862,8 +894,9 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
862 894
863 f2fs_put_dnode(&dn); 895 f2fs_put_dnode(&dn);
864 } 896 }
897 f2fs_unlock_op(sbi);
865 } 898 }
866 ret = 0; 899 return 0;
867out: 900out:
868 f2fs_unlock_op(sbi); 901 f2fs_unlock_op(sbi);
869 return ret; 902 return ret;
@@ -885,6 +918,14 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
885 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1)) 918 if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
886 return -EINVAL; 919 return -EINVAL;
887 920
921 f2fs_balance_fs(F2FS_I_SB(inode));
922
923 if (f2fs_has_inline_data(inode)) {
924 ret = f2fs_convert_inline_inode(inode);
925 if (ret)
926 return ret;
927 }
928
888 pg_start = offset >> PAGE_CACHE_SHIFT; 929 pg_start = offset >> PAGE_CACHE_SHIFT;
889 pg_end = (offset + len) >> PAGE_CACHE_SHIFT; 930 pg_end = (offset + len) >> PAGE_CACHE_SHIFT;
890 931
@@ -946,14 +987,21 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
946 off_end = (offset + len) & (PAGE_CACHE_SIZE - 1); 987 off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
947 988
948 if (pg_start == pg_end) { 989 if (pg_start == pg_end) {
949 fill_zero(inode, pg_start, off_start, off_end - off_start); 990 ret = fill_zero(inode, pg_start, off_start,
991 off_end - off_start);
992 if (ret)
993 return ret;
994
950 if (offset + len > new_size) 995 if (offset + len > new_size)
951 new_size = offset + len; 996 new_size = offset + len;
952 new_size = max_t(loff_t, new_size, offset + len); 997 new_size = max_t(loff_t, new_size, offset + len);
953 } else { 998 } else {
954 if (off_start) { 999 if (off_start) {
955 fill_zero(inode, pg_start++, off_start, 1000 ret = fill_zero(inode, pg_start++, off_start,
956 PAGE_CACHE_SIZE - off_start); 1001 PAGE_CACHE_SIZE - off_start);
1002 if (ret)
1003 return ret;
1004
957 new_size = max_t(loff_t, new_size, 1005 new_size = max_t(loff_t, new_size,
958 pg_start << PAGE_CACHE_SHIFT); 1006 pg_start << PAGE_CACHE_SHIFT);
959 } 1007 }
@@ -995,7 +1043,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
995 } 1043 }
996 1044
997 if (off_end) { 1045 if (off_end) {
998 fill_zero(inode, pg_end, 0, off_end); 1046 ret = fill_zero(inode, pg_end, 0, off_end);
1047 if (ret)
1048 goto out;
1049
999 new_size = max_t(loff_t, new_size, offset + len); 1050 new_size = max_t(loff_t, new_size, offset + len);
1000 } 1051 }
1001 } 1052 }
@@ -1033,6 +1084,12 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
1033 1084
1034 f2fs_balance_fs(sbi); 1085 f2fs_balance_fs(sbi);
1035 1086
1087 if (f2fs_has_inline_data(inode)) {
1088 ret = f2fs_convert_inline_inode(inode);
1089 if (ret)
1090 return ret;
1091 }
1092
1036 ret = truncate_blocks(inode, i_size_read(inode), true); 1093 ret = truncate_blocks(inode, i_size_read(inode), true);
1037 if (ret) 1094 if (ret)
1038 return ret; 1095 return ret;
@@ -1302,6 +1359,7 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
1302static int f2fs_ioc_start_atomic_write(struct file *filp) 1359static int f2fs_ioc_start_atomic_write(struct file *filp)
1303{ 1360{
1304 struct inode *inode = file_inode(filp); 1361 struct inode *inode = file_inode(filp);
1362 int ret;
1305 1363
1306 if (!inode_owner_or_capable(inode)) 1364 if (!inode_owner_or_capable(inode))
1307 return -EACCES; 1365 return -EACCES;
@@ -1311,9 +1369,12 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
1311 if (f2fs_is_atomic_file(inode)) 1369 if (f2fs_is_atomic_file(inode))
1312 return 0; 1370 return 0;
1313 1371
1314 set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1372 ret = f2fs_convert_inline_inode(inode);
1373 if (ret)
1374 return ret;
1315 1375
1316 return f2fs_convert_inline_inode(inode); 1376 set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
1377 return 0;
1317} 1378}
1318 1379
1319static int f2fs_ioc_commit_atomic_write(struct file *filp) 1380static int f2fs_ioc_commit_atomic_write(struct file *filp)
@@ -1333,10 +1394,13 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
1333 1394
1334 if (f2fs_is_atomic_file(inode)) { 1395 if (f2fs_is_atomic_file(inode)) {
1335 clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1396 clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
1336 commit_inmem_pages(inode, false); 1397 ret = commit_inmem_pages(inode, false);
1398 if (ret)
1399 goto err_out;
1337 } 1400 }
1338 1401
1339 ret = f2fs_sync_file(filp, 0, LONG_MAX, 0); 1402 ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0);
1403err_out:
1340 mnt_drop_write_file(filp); 1404 mnt_drop_write_file(filp);
1341 return ret; 1405 return ret;
1342} 1406}
@@ -1344,6 +1408,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
1344static int f2fs_ioc_start_volatile_write(struct file *filp) 1408static int f2fs_ioc_start_volatile_write(struct file *filp)
1345{ 1409{
1346 struct inode *inode = file_inode(filp); 1410 struct inode *inode = file_inode(filp);
1411 int ret;
1347 1412
1348 if (!inode_owner_or_capable(inode)) 1413 if (!inode_owner_or_capable(inode))
1349 return -EACCES; 1414 return -EACCES;
@@ -1351,9 +1416,12 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
1351 if (f2fs_is_volatile_file(inode)) 1416 if (f2fs_is_volatile_file(inode))
1352 return 0; 1417 return 0;
1353 1418
1354 set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); 1419 ret = f2fs_convert_inline_inode(inode);
1420 if (ret)
1421 return ret;
1355 1422
1356 return f2fs_convert_inline_inode(inode); 1423 set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
1424 return 0;
1357} 1425}
1358 1426
1359static int f2fs_ioc_release_volatile_write(struct file *filp) 1427static int f2fs_ioc_release_volatile_write(struct file *filp)
@@ -1389,7 +1457,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
1389 1457
1390 if (f2fs_is_atomic_file(inode)) { 1458 if (f2fs_is_atomic_file(inode)) {
1391 clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1459 clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
1392 commit_inmem_pages(inode, false); 1460 commit_inmem_pages(inode, true);
1393 } 1461 }
1394 1462
1395 if (f2fs_is_volatile_file(inode)) 1463 if (f2fs_is_volatile_file(inode))
@@ -1544,6 +1612,35 @@ got_it:
1544 return 0; 1612 return 0;
1545} 1613}
1546 1614
1615static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
1616{
1617 struct inode *inode = file_inode(filp);
1618 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1619 __u32 i, count;
1620
1621 if (!capable(CAP_SYS_ADMIN))
1622 return -EPERM;
1623
1624 if (get_user(count, (__u32 __user *)arg))
1625 return -EFAULT;
1626
1627 if (!count || count > F2FS_BATCH_GC_MAX_NUM)
1628 return -EINVAL;
1629
1630 for (i = 0; i < count; i++) {
1631 if (!mutex_trylock(&sbi->gc_mutex))
1632 break;
1633
1634 if (f2fs_gc(sbi))
1635 break;
1636 }
1637
1638 if (put_user(i, (__u32 __user *)arg))
1639 return -EFAULT;
1640
1641 return 0;
1642}
1643
1547long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 1644long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
1548{ 1645{
1549 switch (cmd) { 1646 switch (cmd) {
@@ -1573,6 +1670,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
1573 return f2fs_ioc_get_encryption_policy(filp, arg); 1670 return f2fs_ioc_get_encryption_policy(filp, arg);
1574 case F2FS_IOC_GET_ENCRYPTION_PWSALT: 1671 case F2FS_IOC_GET_ENCRYPTION_PWSALT:
1575 return f2fs_ioc_get_encryption_pwsalt(filp, arg); 1672 return f2fs_ioc_get_encryption_pwsalt(filp, arg);
1673 case F2FS_IOC_GARBAGE_COLLECT:
1674 return f2fs_ioc_gc(filp, arg);
1576 default: 1675 default:
1577 return -ENOTTY; 1676 return -ENOTTY;
1578 } 1677 }
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 22fb5ef37966..782b8e72c094 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -391,23 +391,27 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
391 * On validity, copy that node with cold status, otherwise (invalid node) 391 * On validity, copy that node with cold status, otherwise (invalid node)
392 * ignore that. 392 * ignore that.
393 */ 393 */
394static void gc_node_segment(struct f2fs_sb_info *sbi, 394static int gc_node_segment(struct f2fs_sb_info *sbi,
395 struct f2fs_summary *sum, unsigned int segno, int gc_type) 395 struct f2fs_summary *sum, unsigned int segno, int gc_type)
396{ 396{
397 bool initial = true; 397 bool initial = true;
398 struct f2fs_summary *entry; 398 struct f2fs_summary *entry;
399 block_t start_addr;
399 int off; 400 int off;
400 401
402 start_addr = START_BLOCK(sbi, segno);
403
401next_step: 404next_step:
402 entry = sum; 405 entry = sum;
403 406
404 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { 407 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
405 nid_t nid = le32_to_cpu(entry->nid); 408 nid_t nid = le32_to_cpu(entry->nid);
406 struct page *node_page; 409 struct page *node_page;
410 struct node_info ni;
407 411
408 /* stop BG_GC if there is not enough free sections. */ 412 /* stop BG_GC if there is not enough free sections. */
409 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) 413 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
410 return; 414 return 0;
411 415
412 if (check_valid_map(sbi, segno, off) == 0) 416 if (check_valid_map(sbi, segno, off) == 0)
413 continue; 417 continue;
@@ -426,6 +430,12 @@ next_step:
426 continue; 430 continue;
427 } 431 }
428 432
433 get_node_info(sbi, nid, &ni);
434 if (ni.blk_addr != start_addr + off) {
435 f2fs_put_page(node_page, 1);
436 continue;
437 }
438
429 /* set page dirty and write it */ 439 /* set page dirty and write it */
430 if (gc_type == FG_GC) { 440 if (gc_type == FG_GC) {
431 f2fs_wait_on_page_writeback(node_page, NODE); 441 f2fs_wait_on_page_writeback(node_page, NODE);
@@ -451,13 +461,11 @@ next_step:
451 }; 461 };
452 sync_node_pages(sbi, 0, &wbc); 462 sync_node_pages(sbi, 0, &wbc);
453 463
454 /* 464 /* return 1 only if FG_GC succefully reclaimed one */
455 * In the case of FG_GC, it'd be better to reclaim this victim 465 if (get_valid_blocks(sbi, segno, 1) == 0)
456 * completely. 466 return 1;
457 */
458 if (get_valid_blocks(sbi, segno, 1) != 0)
459 goto next_step;
460 } 467 }
468 return 0;
461} 469}
462 470
463/* 471/*
@@ -487,7 +495,7 @@ block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi)
487 return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi); 495 return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi);
488} 496}
489 497
490static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 498static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
491 struct node_info *dni, block_t blkaddr, unsigned int *nofs) 499 struct node_info *dni, block_t blkaddr, unsigned int *nofs)
492{ 500{
493 struct page *node_page; 501 struct page *node_page;
@@ -500,13 +508,13 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
500 508
501 node_page = get_node_page(sbi, nid); 509 node_page = get_node_page(sbi, nid);
502 if (IS_ERR(node_page)) 510 if (IS_ERR(node_page))
503 return 0; 511 return false;
504 512
505 get_node_info(sbi, nid, dni); 513 get_node_info(sbi, nid, dni);
506 514
507 if (sum->version != dni->version) { 515 if (sum->version != dni->version) {
508 f2fs_put_page(node_page, 1); 516 f2fs_put_page(node_page, 1);
509 return 0; 517 return false;
510 } 518 }
511 519
512 *nofs = ofs_of_node(node_page); 520 *nofs = ofs_of_node(node_page);
@@ -514,8 +522,8 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
514 f2fs_put_page(node_page, 1); 522 f2fs_put_page(node_page, 1);
515 523
516 if (source_blkaddr != blkaddr) 524 if (source_blkaddr != blkaddr)
517 return 0; 525 return false;
518 return 1; 526 return true;
519} 527}
520 528
521static void move_encrypted_block(struct inode *inode, block_t bidx) 529static void move_encrypted_block(struct inode *inode, block_t bidx)
@@ -552,7 +560,10 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
552 fio.page = page; 560 fio.page = page;
553 fio.blk_addr = dn.data_blkaddr; 561 fio.blk_addr = dn.data_blkaddr;
554 562
555 fio.encrypted_page = grab_cache_page(META_MAPPING(fio.sbi), fio.blk_addr); 563 fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi),
564 fio.blk_addr,
565 FGP_LOCK|FGP_CREAT,
566 GFP_NOFS);
556 if (!fio.encrypted_page) 567 if (!fio.encrypted_page)
557 goto put_out; 568 goto put_out;
558 569
@@ -636,7 +647,7 @@ out:
636 * If the parent node is not valid or the data block address is different, 647 * If the parent node is not valid or the data block address is different,
637 * the victim data block is ignored. 648 * the victim data block is ignored.
638 */ 649 */
639static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, 650static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
640 struct gc_inode_list *gc_list, unsigned int segno, int gc_type) 651 struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
641{ 652{
642 struct super_block *sb = sbi->sb; 653 struct super_block *sb = sbi->sb;
@@ -659,7 +670,7 @@ next_step:
659 670
660 /* stop BG_GC if there is not enough free sections. */ 671 /* stop BG_GC if there is not enough free sections. */
661 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) 672 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
662 return; 673 return 0;
663 674
664 if (check_valid_map(sbi, segno, off) == 0) 675 if (check_valid_map(sbi, segno, off) == 0)
665 continue; 676 continue;
@@ -670,7 +681,7 @@ next_step:
670 } 681 }
671 682
672 /* Get an inode by ino with checking validity */ 683 /* Get an inode by ino with checking validity */
673 if (check_dnode(sbi, entry, &dni, start_addr + off, &nofs) == 0) 684 if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs))
674 continue; 685 continue;
675 686
676 if (phase == 1) { 687 if (phase == 1) {
@@ -724,15 +735,11 @@ next_step:
724 if (gc_type == FG_GC) { 735 if (gc_type == FG_GC) {
725 f2fs_submit_merged_bio(sbi, DATA, WRITE); 736 f2fs_submit_merged_bio(sbi, DATA, WRITE);
726 737
727 /* 738 /* return 1 only if FG_GC succefully reclaimed one */
728 * In the case of FG_GC, it'd be better to reclaim this victim 739 if (get_valid_blocks(sbi, segno, 1) == 0)
729 * completely. 740 return 1;
730 */
731 if (get_valid_blocks(sbi, segno, 1) != 0) {
732 phase = 2;
733 goto next_step;
734 }
735 } 741 }
742 return 0;
736} 743}
737 744
738static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, 745static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
@@ -748,12 +755,13 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
748 return ret; 755 return ret;
749} 756}
750 757
751static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, 758static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
752 struct gc_inode_list *gc_list, int gc_type) 759 struct gc_inode_list *gc_list, int gc_type)
753{ 760{
754 struct page *sum_page; 761 struct page *sum_page;
755 struct f2fs_summary_block *sum; 762 struct f2fs_summary_block *sum;
756 struct blk_plug plug; 763 struct blk_plug plug;
764 int nfree = 0;
757 765
758 /* read segment summary of victim */ 766 /* read segment summary of victim */
759 sum_page = get_sum_page(sbi, segno); 767 sum_page = get_sum_page(sbi, segno);
@@ -773,10 +781,11 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
773 781
774 switch (GET_SUM_TYPE((&sum->footer))) { 782 switch (GET_SUM_TYPE((&sum->footer))) {
775 case SUM_TYPE_NODE: 783 case SUM_TYPE_NODE:
776 gc_node_segment(sbi, sum->entries, segno, gc_type); 784 nfree = gc_node_segment(sbi, sum->entries, segno, gc_type);
777 break; 785 break;
778 case SUM_TYPE_DATA: 786 case SUM_TYPE_DATA:
779 gc_data_segment(sbi, sum->entries, gc_list, segno, gc_type); 787 nfree = gc_data_segment(sbi, sum->entries, gc_list,
788 segno, gc_type);
780 break; 789 break;
781 } 790 }
782 blk_finish_plug(&plug); 791 blk_finish_plug(&plug);
@@ -785,11 +794,13 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
785 stat_inc_call_count(sbi->stat_info); 794 stat_inc_call_count(sbi->stat_info);
786 795
787 f2fs_put_page(sum_page, 0); 796 f2fs_put_page(sum_page, 0);
797 return nfree;
788} 798}
789 799
790int f2fs_gc(struct f2fs_sb_info *sbi) 800int f2fs_gc(struct f2fs_sb_info *sbi)
791{ 801{
792 unsigned int segno, i; 802 unsigned int segno = NULL_SEGNO;
803 unsigned int i;
793 int gc_type = BG_GC; 804 int gc_type = BG_GC;
794 int nfree = 0; 805 int nfree = 0;
795 int ret = -1; 806 int ret = -1;
@@ -808,10 +819,11 @@ gc_more:
808 819
809 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { 820 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
810 gc_type = FG_GC; 821 gc_type = FG_GC;
811 write_checkpoint(sbi, &cpc); 822 if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi))
823 write_checkpoint(sbi, &cpc);
812 } 824 }
813 825
814 if (!__get_victim(sbi, &segno, gc_type)) 826 if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type))
815 goto stop; 827 goto stop;
816 ret = 0; 828 ret = 0;
817 829
@@ -821,13 +833,10 @@ gc_more:
821 META_SSA); 833 META_SSA);
822 834
823 for (i = 0; i < sbi->segs_per_sec; i++) 835 for (i = 0; i < sbi->segs_per_sec; i++)
824 do_garbage_collect(sbi, segno + i, &gc_list, gc_type); 836 nfree += do_garbage_collect(sbi, segno + i, &gc_list, gc_type);
825 837
826 if (gc_type == FG_GC) { 838 if (gc_type == FG_GC)
827 sbi->cur_victim_sec = NULL_SEGNO; 839 sbi->cur_victim_sec = NULL_SEGNO;
828 nfree++;
829 WARN_ON(get_valid_blocks(sbi, segno, sbi->segs_per_sec));
830 }
831 840
832 if (has_not_enough_free_secs(sbi, nfree)) 841 if (has_not_enough_free_secs(sbi, nfree))
833 goto gc_more; 842 goto gc_more;
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index b4a65be9f7d3..c5a055b3376e 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -19,6 +19,12 @@
19#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ 19#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */
20#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ 20#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
21 21
22/*
23 * with this macro, we can control the max time we do garbage collection,
24 * when user triggers batch mode gc by ioctl.
25 */
26#define F2FS_BATCH_GC_MAX_NUM 16
27
22/* Search max. number of dirty segments to select a victim segment */ 28/* Search max. number of dirty segments to select a victim segment */
23#define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ 29#define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
24 30
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index a13ffcc32992..3d143be42895 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -360,6 +360,10 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent,
360 return 0; 360 return 0;
361} 361}
362 362
363/*
364 * NOTE: ipage is grabbed by caller, but if any error occurs, we should
365 * release ipage in this function.
366 */
363static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, 367static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
364 struct f2fs_inline_dentry *inline_dentry) 368 struct f2fs_inline_dentry *inline_dentry)
365{ 369{
@@ -369,8 +373,10 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
369 int err; 373 int err;
370 374
371 page = grab_cache_page(dir->i_mapping, 0); 375 page = grab_cache_page(dir->i_mapping, 0);
372 if (!page) 376 if (!page) {
377 f2fs_put_page(ipage, 1);
373 return -ENOMEM; 378 return -ENOMEM;
379 }
374 380
375 set_new_dnode(&dn, dir, ipage, NULL, 0); 381 set_new_dnode(&dn, dir, ipage, NULL, 0);
376 err = f2fs_reserve_block(&dn, 0); 382 err = f2fs_reserve_block(&dn, 0);
@@ -378,13 +384,21 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
378 goto out; 384 goto out;
379 385
380 f2fs_wait_on_page_writeback(page, DATA); 386 f2fs_wait_on_page_writeback(page, DATA);
381 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 387 zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
382 388
383 dentry_blk = kmap_atomic(page); 389 dentry_blk = kmap_atomic(page);
384 390
385 /* copy data from inline dentry block to new dentry block */ 391 /* copy data from inline dentry block to new dentry block */
386 memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap, 392 memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap,
387 INLINE_DENTRY_BITMAP_SIZE); 393 INLINE_DENTRY_BITMAP_SIZE);
394 memset(dentry_blk->dentry_bitmap + INLINE_DENTRY_BITMAP_SIZE, 0,
395 SIZE_OF_DENTRY_BITMAP - INLINE_DENTRY_BITMAP_SIZE);
396 /*
397 * we do not need to zero out remainder part of dentry and filename
398 * field, since we have used bitmap for marking the usage status of
399 * them, besides, we can also ignore copying/zeroing reserved space
400 * of dentry block, because them haven't been used so far.
401 */
388 memcpy(dentry_blk->dentry, inline_dentry->dentry, 402 memcpy(dentry_blk->dentry, inline_dentry->dentry,
389 sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY); 403 sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY);
390 memcpy(dentry_blk->filename, inline_dentry->filename, 404 memcpy(dentry_blk->filename, inline_dentry->filename,
@@ -434,8 +448,9 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
434 slots, NR_INLINE_DENTRY); 448 slots, NR_INLINE_DENTRY);
435 if (bit_pos >= NR_INLINE_DENTRY) { 449 if (bit_pos >= NR_INLINE_DENTRY) {
436 err = f2fs_convert_inline_dir(dir, ipage, dentry_blk); 450 err = f2fs_convert_inline_dir(dir, ipage, dentry_blk);
437 if (!err) 451 if (err)
438 err = -EAGAIN; 452 return err;
453 err = -EAGAIN;
439 goto out; 454 goto out;
440 } 455 }
441 456
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 2550868dc651..35aae65b3e5d 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -12,7 +12,6 @@
12#include <linux/f2fs_fs.h> 12#include <linux/f2fs_fs.h>
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/writeback.h> 14#include <linux/writeback.h>
15#include <linux/bitops.h>
16 15
17#include "f2fs.h" 16#include "f2fs.h"
18#include "node.h" 17#include "node.h"
@@ -34,8 +33,8 @@ void f2fs_set_inode_flags(struct inode *inode)
34 new_fl |= S_NOATIME; 33 new_fl |= S_NOATIME;
35 if (flags & FS_DIRSYNC_FL) 34 if (flags & FS_DIRSYNC_FL)
36 new_fl |= S_DIRSYNC; 35 new_fl |= S_DIRSYNC;
37 set_mask_bits(&inode->i_flags, 36 inode_set_flags(inode, new_fl,
38 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl); 37 S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
39} 38}
40 39
41static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) 40static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -139,7 +138,7 @@ static int do_read_inode(struct inode *inode)
139 fi->i_pino = le32_to_cpu(ri->i_pino); 138 fi->i_pino = le32_to_cpu(ri->i_pino);
140 fi->i_dir_level = ri->i_dir_level; 139 fi->i_dir_level = ri->i_dir_level;
141 140
142 f2fs_init_extent_cache(inode, &ri->i_ext); 141 f2fs_init_extent_tree(inode, &ri->i_ext);
143 142
144 get_inline_info(fi, ri); 143 get_inline_info(fi, ri);
145 144
@@ -155,6 +154,7 @@ static int do_read_inode(struct inode *inode)
155 154
156 f2fs_put_page(node_page, 1); 155 f2fs_put_page(node_page, 1);
157 156
157 stat_inc_inline_xattr(inode);
158 stat_inc_inline_inode(inode); 158 stat_inc_inline_inode(inode);
159 stat_inc_inline_dir(inode); 159 stat_inc_inline_dir(inode);
160 160
@@ -237,10 +237,11 @@ void update_inode(struct inode *inode, struct page *node_page)
237 ri->i_size = cpu_to_le64(i_size_read(inode)); 237 ri->i_size = cpu_to_le64(i_size_read(inode));
238 ri->i_blocks = cpu_to_le64(inode->i_blocks); 238 ri->i_blocks = cpu_to_le64(inode->i_blocks);
239 239
240 read_lock(&F2FS_I(inode)->ext_lock); 240 if (F2FS_I(inode)->extent_tree)
241 set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext); 241 set_raw_extent(&F2FS_I(inode)->extent_tree->largest,
242 read_unlock(&F2FS_I(inode)->ext_lock); 242 &ri->i_ext);
243 243 else
244 memset(&ri->i_ext, 0, sizeof(ri->i_ext));
244 set_raw_inline(F2FS_I(inode), ri); 245 set_raw_inline(F2FS_I(inode), ri);
245 246
246 ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); 247 ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
@@ -314,7 +315,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
314void f2fs_evict_inode(struct inode *inode) 315void f2fs_evict_inode(struct inode *inode)
315{ 316{
316 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 317 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
317 nid_t xnid = F2FS_I(inode)->i_xattr_nid; 318 struct f2fs_inode_info *fi = F2FS_I(inode);
319 nid_t xnid = fi->i_xattr_nid;
320 int err = 0;
318 321
319 /* some remained atomic pages should discarded */ 322 /* some remained atomic pages should discarded */
320 if (f2fs_is_atomic_file(inode)) 323 if (f2fs_is_atomic_file(inode))
@@ -330,41 +333,62 @@ void f2fs_evict_inode(struct inode *inode)
330 f2fs_bug_on(sbi, get_dirty_pages(inode)); 333 f2fs_bug_on(sbi, get_dirty_pages(inode));
331 remove_dirty_dir_inode(inode); 334 remove_dirty_dir_inode(inode);
332 335
336 f2fs_destroy_extent_tree(inode);
337
333 if (inode->i_nlink || is_bad_inode(inode)) 338 if (inode->i_nlink || is_bad_inode(inode))
334 goto no_delete; 339 goto no_delete;
335 340
336 sb_start_intwrite(inode->i_sb); 341 sb_start_intwrite(inode->i_sb);
337 set_inode_flag(F2FS_I(inode), FI_NO_ALLOC); 342 set_inode_flag(fi, FI_NO_ALLOC);
338 i_size_write(inode, 0); 343 i_size_write(inode, 0);
339 344
340 if (F2FS_HAS_BLOCKS(inode)) 345 if (F2FS_HAS_BLOCKS(inode))
341 f2fs_truncate(inode); 346 err = f2fs_truncate(inode, true);
342 347
343 f2fs_lock_op(sbi); 348 if (!err) {
344 remove_inode_page(inode); 349 f2fs_lock_op(sbi);
345 f2fs_unlock_op(sbi); 350 err = remove_inode_page(inode);
351 f2fs_unlock_op(sbi);
352 }
346 353
347 sb_end_intwrite(inode->i_sb); 354 sb_end_intwrite(inode->i_sb);
348no_delete: 355no_delete:
356 stat_dec_inline_xattr(inode);
349 stat_dec_inline_dir(inode); 357 stat_dec_inline_dir(inode);
350 stat_dec_inline_inode(inode); 358 stat_dec_inline_inode(inode);
351 359
352 /* update extent info in inode */
353 if (inode->i_nlink)
354 f2fs_preserve_extent_tree(inode);
355 f2fs_destroy_extent_tree(inode);
356
357 invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); 360 invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
358 if (xnid) 361 if (xnid)
359 invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); 362 invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
360 if (is_inode_flag_set(F2FS_I(inode), FI_APPEND_WRITE)) 363 if (is_inode_flag_set(fi, FI_APPEND_WRITE))
361 add_dirty_inode(sbi, inode->i_ino, APPEND_INO); 364 add_dirty_inode(sbi, inode->i_ino, APPEND_INO);
362 if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE)) 365 if (is_inode_flag_set(fi, FI_UPDATE_WRITE))
363 add_dirty_inode(sbi, inode->i_ino, UPDATE_INO); 366 add_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
367 if (is_inode_flag_set(fi, FI_FREE_NID)) {
368 if (err && err != -ENOENT)
369 alloc_nid_done(sbi, inode->i_ino);
370 else
371 alloc_nid_failed(sbi, inode->i_ino);
372 clear_inode_flag(fi, FI_FREE_NID);
373 }
374
375 if (err && err != -ENOENT) {
376 if (!exist_written_data(sbi, inode->i_ino, ORPHAN_INO)) {
377 /*
378 * get here because we failed to release resource
379 * of inode previously, reminder our user to run fsck
380 * for fixing.
381 */
382 set_sbi_flag(sbi, SBI_NEED_FSCK);
383 f2fs_msg(sbi->sb, KERN_WARNING,
384 "inode (ino:%lu) resource leak, run fsck "
385 "to fix this issue!", inode->i_ino);
386 }
387 }
364out_clear: 388out_clear:
365#ifdef CONFIG_F2FS_FS_ENCRYPTION 389#ifdef CONFIG_F2FS_FS_ENCRYPTION
366 if (F2FS_I(inode)->i_crypt_info) 390 if (fi->i_crypt_info)
367 f2fs_free_encryption_info(inode, F2FS_I(inode)->i_crypt_info); 391 f2fs_free_encryption_info(inode, fi->i_crypt_info);
368#endif 392#endif
369 clear_inode(inode); 393 clear_inode(inode);
370} 394}
@@ -373,6 +397,7 @@ out_clear:
373void handle_failed_inode(struct inode *inode) 397void handle_failed_inode(struct inode *inode)
374{ 398{
375 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 399 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
400 int err = 0;
376 401
377 clear_nlink(inode); 402 clear_nlink(inode);
378 make_bad_inode(inode); 403 make_bad_inode(inode);
@@ -380,13 +405,29 @@ void handle_failed_inode(struct inode *inode)
380 405
381 i_size_write(inode, 0); 406 i_size_write(inode, 0);
382 if (F2FS_HAS_BLOCKS(inode)) 407 if (F2FS_HAS_BLOCKS(inode))
383 f2fs_truncate(inode); 408 err = f2fs_truncate(inode, false);
384 409
385 remove_inode_page(inode); 410 if (!err)
411 err = remove_inode_page(inode);
412
413 /*
414 * if we skip truncate_node in remove_inode_page bacause we failed
415 * before, it's better to find another way to release resource of
416 * this inode (e.g. valid block count, node block or nid). Here we
417 * choose to add this inode to orphan list, so that we can call iput
418 * for releasing in orphan recovery flow.
419 *
420 * Note: we should add inode to orphan list before f2fs_unlock_op()
421 * so we can prevent losing this orphan when encoutering checkpoint
422 * and following suddenly power-off.
423 */
424 if (err && err != -ENOENT) {
425 err = acquire_orphan_inode(sbi);
426 if (!err)
427 add_orphan_inode(sbi, inode->i_ino);
428 }
386 429
387 clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); 430 set_inode_flag(F2FS_I(inode), FI_FREE_NID);
388 clear_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
389 alloc_nid_failed(sbi, inode->i_ino);
390 f2fs_unlock_op(sbi); 431 f2fs_unlock_op(sbi);
391 432
392 /* iput will drop the inode object */ 433 /* iput will drop the inode object */
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index fdbae21ee8fb..a680bf38e4f0 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -53,7 +53,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
53 if (err) { 53 if (err) {
54 err = -EINVAL; 54 err = -EINVAL;
55 nid_free = true; 55 nid_free = true;
56 goto out; 56 goto fail;
57 } 57 }
58 58
59 /* If the directory encrypted, then we should encrypt the inode. */ 59 /* If the directory encrypted, then we should encrypt the inode. */
@@ -65,6 +65,9 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
65 if (f2fs_may_inline_dentry(inode)) 65 if (f2fs_may_inline_dentry(inode))
66 set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); 66 set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
67 67
68 f2fs_init_extent_tree(inode, NULL);
69
70 stat_inc_inline_xattr(inode);
68 stat_inc_inline_inode(inode); 71 stat_inc_inline_inode(inode);
69 stat_inc_inline_dir(inode); 72 stat_inc_inline_dir(inode);
70 73
@@ -72,15 +75,12 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
72 mark_inode_dirty(inode); 75 mark_inode_dirty(inode);
73 return inode; 76 return inode;
74 77
75out:
76 clear_nlink(inode);
77 unlock_new_inode(inode);
78fail: 78fail:
79 trace_f2fs_new_inode(inode, err); 79 trace_f2fs_new_inode(inode, err);
80 make_bad_inode(inode); 80 make_bad_inode(inode);
81 iput(inode);
82 if (nid_free) 81 if (nid_free)
83 alloc_nid_failed(sbi, ino); 82 set_inode_flag(F2FS_I(inode), FI_FREE_NID);
83 iput(inode);
84 return ERR_PTR(err); 84 return ERR_PTR(err);
85} 85}
86 86
@@ -89,7 +89,14 @@ static int is_multimedia_file(const unsigned char *s, const char *sub)
89 size_t slen = strlen(s); 89 size_t slen = strlen(s);
90 size_t sublen = strlen(sub); 90 size_t sublen = strlen(sub);
91 91
92 if (sublen > slen) 92 /*
93 * filename format of multimedia file should be defined as:
94 * "filename + '.' + extension".
95 */
96 if (slen < sublen + 2)
97 return 0;
98
99 if (s[slen - sublen - 1] != '.')
93 return 0; 100 return 0;
94 101
95 return !strncasecmp(s + slen - sublen, sub, sublen); 102 return !strncasecmp(s + slen - sublen, sub, sublen);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 7dd63b794bfb..27d1a74dd6f3 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -159,7 +159,7 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
159 159
160 head = radix_tree_lookup(&nm_i->nat_set_root, set); 160 head = radix_tree_lookup(&nm_i->nat_set_root, set);
161 if (!head) { 161 if (!head) {
162 head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); 162 head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS);
163 163
164 INIT_LIST_HEAD(&head->entry_list); 164 INIT_LIST_HEAD(&head->entry_list);
165 INIT_LIST_HEAD(&head->set_list); 165 INIT_LIST_HEAD(&head->set_list);
@@ -246,7 +246,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
246{ 246{
247 struct nat_entry *new; 247 struct nat_entry *new;
248 248
249 new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); 249 new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
250 f2fs_radix_tree_insert(&nm_i->nat_root, nid, new); 250 f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
251 memset(new, 0, sizeof(struct nat_entry)); 251 memset(new, 0, sizeof(struct nat_entry));
252 nat_set_nid(new, nid); 252 nat_set_nid(new, nid);
@@ -306,6 +306,10 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
306 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { 306 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
307 unsigned char version = nat_get_version(e); 307 unsigned char version = nat_get_version(e);
308 nat_set_version(e, inc_node_version(version)); 308 nat_set_version(e, inc_node_version(version));
309
310 /* in order to reuse the nid */
311 if (nm_i->next_scan_nid > ni->nid)
312 nm_i->next_scan_nid = ni->nid;
309 } 313 }
310 314
311 /* change address */ 315 /* change address */
@@ -328,11 +332,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
328int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) 332int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
329{ 333{
330 struct f2fs_nm_info *nm_i = NM_I(sbi); 334 struct f2fs_nm_info *nm_i = NM_I(sbi);
335 int nr = nr_shrink;
331 336
332 if (available_free_memory(sbi, NAT_ENTRIES)) 337 if (!down_write_trylock(&nm_i->nat_tree_lock))
333 return 0; 338 return 0;
334 339
335 down_write(&nm_i->nat_tree_lock);
336 while (nr_shrink && !list_empty(&nm_i->nat_entries)) { 340 while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
337 struct nat_entry *ne; 341 struct nat_entry *ne;
338 ne = list_first_entry(&nm_i->nat_entries, 342 ne = list_first_entry(&nm_i->nat_entries,
@@ -341,7 +345,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
341 nr_shrink--; 345 nr_shrink--;
342 } 346 }
343 up_write(&nm_i->nat_tree_lock); 347 up_write(&nm_i->nat_tree_lock);
344 return nr_shrink; 348 return nr - nr_shrink;
345} 349}
346 350
347/* 351/*
@@ -898,17 +902,20 @@ int truncate_xattr_node(struct inode *inode, struct page *page)
898 * Caller should grab and release a rwsem by calling f2fs_lock_op() and 902 * Caller should grab and release a rwsem by calling f2fs_lock_op() and
899 * f2fs_unlock_op(). 903 * f2fs_unlock_op().
900 */ 904 */
901void remove_inode_page(struct inode *inode) 905int remove_inode_page(struct inode *inode)
902{ 906{
903 struct dnode_of_data dn; 907 struct dnode_of_data dn;
908 int err;
904 909
905 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); 910 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
906 if (get_dnode_of_data(&dn, 0, LOOKUP_NODE)) 911 err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
907 return; 912 if (err)
913 return err;
908 914
909 if (truncate_xattr_node(inode, dn.inode_page)) { 915 err = truncate_xattr_node(inode, dn.inode_page);
916 if (err) {
910 f2fs_put_dnode(&dn); 917 f2fs_put_dnode(&dn);
911 return; 918 return err;
912 } 919 }
913 920
914 /* remove potential inline_data blocks */ 921 /* remove potential inline_data blocks */
@@ -922,6 +929,7 @@ void remove_inode_page(struct inode *inode)
922 929
923 /* will put inode & node pages */ 930 /* will put inode & node pages */
924 truncate_node(&dn); 931 truncate_node(&dn);
932 return 0;
925} 933}
926 934
927struct page *new_inode_page(struct inode *inode) 935struct page *new_inode_page(struct inode *inode)
@@ -991,8 +999,7 @@ fail:
991/* 999/*
992 * Caller should do after getting the following values. 1000 * Caller should do after getting the following values.
993 * 0: f2fs_put_page(page, 0) 1001 * 0: f2fs_put_page(page, 0)
994 * LOCKED_PAGE: f2fs_put_page(page, 1) 1002 * LOCKED_PAGE or error: f2fs_put_page(page, 1)
995 * error: nothing
996 */ 1003 */
997static int read_node_page(struct page *page, int rw) 1004static int read_node_page(struct page *page, int rw)
998{ 1005{
@@ -1010,7 +1017,6 @@ static int read_node_page(struct page *page, int rw)
1010 1017
1011 if (unlikely(ni.blk_addr == NULL_ADDR)) { 1018 if (unlikely(ni.blk_addr == NULL_ADDR)) {
1012 ClearPageUptodate(page); 1019 ClearPageUptodate(page);
1013 f2fs_put_page(page, 1);
1014 return -ENOENT; 1020 return -ENOENT;
1015 } 1021 }
1016 1022
@@ -1041,10 +1047,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
1041 return; 1047 return;
1042 1048
1043 err = read_node_page(apage, READA); 1049 err = read_node_page(apage, READA);
1044 if (err == 0) 1050 f2fs_put_page(apage, err ? 1 : 0);
1045 f2fs_put_page(apage, 0);
1046 else if (err == LOCKED_PAGE)
1047 f2fs_put_page(apage, 1);
1048} 1051}
1049 1052
1050struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) 1053struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
@@ -1057,10 +1060,12 @@ repeat:
1057 return ERR_PTR(-ENOMEM); 1060 return ERR_PTR(-ENOMEM);
1058 1061
1059 err = read_node_page(page, READ_SYNC); 1062 err = read_node_page(page, READ_SYNC);
1060 if (err < 0) 1063 if (err < 0) {
1064 f2fs_put_page(page, 1);
1061 return ERR_PTR(err); 1065 return ERR_PTR(err);
1062 else if (err != LOCKED_PAGE) 1066 } else if (err != LOCKED_PAGE) {
1063 lock_page(page); 1067 lock_page(page);
1068 }
1064 1069
1065 if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) { 1070 if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
1066 ClearPageUptodate(page); 1071 ClearPageUptodate(page);
@@ -1096,10 +1101,12 @@ repeat:
1096 return ERR_PTR(-ENOMEM); 1101 return ERR_PTR(-ENOMEM);
1097 1102
1098 err = read_node_page(page, READ_SYNC); 1103 err = read_node_page(page, READ_SYNC);
1099 if (err < 0) 1104 if (err < 0) {
1105 f2fs_put_page(page, 1);
1100 return ERR_PTR(err); 1106 return ERR_PTR(err);
1101 else if (err == LOCKED_PAGE) 1107 } else if (err == LOCKED_PAGE) {
1102 goto page_hit; 1108 goto page_hit;
1109 }
1103 1110
1104 blk_start_plug(&plug); 1111 blk_start_plug(&plug);
1105 1112
@@ -1533,7 +1540,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
1533 if (unlikely(nid >= nm_i->max_nid)) 1540 if (unlikely(nid >= nm_i->max_nid))
1534 nid = 0; 1541 nid = 0;
1535 1542
1536 if (i++ == FREE_NID_PAGES) 1543 if (++i >= FREE_NID_PAGES)
1537 break; 1544 break;
1538 } 1545 }
1539 1546
@@ -1570,6 +1577,8 @@ retry:
1570 1577
1571 /* We should not use stale free nids created by build_free_nids */ 1578 /* We should not use stale free nids created by build_free_nids */
1572 if (nm_i->fcnt && !on_build_free_nids(nm_i)) { 1579 if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
1580 struct node_info ni;
1581
1573 f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); 1582 f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
1574 list_for_each_entry(i, &nm_i->free_nid_list, list) 1583 list_for_each_entry(i, &nm_i->free_nid_list, list)
1575 if (i->state == NID_NEW) 1584 if (i->state == NID_NEW)
@@ -1580,6 +1589,13 @@ retry:
1580 i->state = NID_ALLOC; 1589 i->state = NID_ALLOC;
1581 nm_i->fcnt--; 1590 nm_i->fcnt--;
1582 spin_unlock(&nm_i->free_nid_list_lock); 1591 spin_unlock(&nm_i->free_nid_list_lock);
1592
1593 /* check nid is allocated already */
1594 get_node_info(sbi, *nid, &ni);
1595 if (ni.blk_addr != NULL_ADDR) {
1596 alloc_nid_done(sbi, *nid);
1597 goto retry;
1598 }
1583 return true; 1599 return true;
1584 } 1600 }
1585 spin_unlock(&nm_i->free_nid_list_lock); 1601 spin_unlock(&nm_i->free_nid_list_lock);
@@ -1636,6 +1652,32 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1636 kmem_cache_free(free_nid_slab, i); 1652 kmem_cache_free(free_nid_slab, i);
1637} 1653}
1638 1654
1655int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
1656{
1657 struct f2fs_nm_info *nm_i = NM_I(sbi);
1658 struct free_nid *i, *next;
1659 int nr = nr_shrink;
1660
1661 if (!mutex_trylock(&nm_i->build_lock))
1662 return 0;
1663
1664 spin_lock(&nm_i->free_nid_list_lock);
1665 list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
1666 if (nr_shrink <= 0 || nm_i->fcnt <= NAT_ENTRY_PER_BLOCK)
1667 break;
1668 if (i->state == NID_ALLOC)
1669 continue;
1670 __del_from_free_nid_list(nm_i, i);
1671 kmem_cache_free(free_nid_slab, i);
1672 nm_i->fcnt--;
1673 nr_shrink--;
1674 }
1675 spin_unlock(&nm_i->free_nid_list_lock);
1676 mutex_unlock(&nm_i->build_lock);
1677
1678 return nr - nr_shrink;
1679}
1680
1639void recover_inline_xattr(struct inode *inode, struct page *page) 1681void recover_inline_xattr(struct inode *inode, struct page *page)
1640{ 1682{
1641 void *src_addr, *dst_addr; 1683 void *src_addr, *dst_addr;
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 24a8c1d4f45f..faec2ca004b9 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -399,14 +399,35 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
399 f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); 399 f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
400 f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page)); 400 f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
401 401
402 for (; start < end; start++) { 402 for (; start < end; start++, dn.ofs_in_node++) {
403 block_t src, dest; 403 block_t src, dest;
404 404
405 src = datablock_addr(dn.node_page, dn.ofs_in_node); 405 src = datablock_addr(dn.node_page, dn.ofs_in_node);
406 dest = datablock_addr(page, dn.ofs_in_node); 406 dest = datablock_addr(page, dn.ofs_in_node);
407 407
408 if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR && 408 /* skip recovering if dest is the same as src */
409 is_valid_blkaddr(sbi, dest, META_POR)) { 409 if (src == dest)
410 continue;
411
412 /* dest is invalid, just invalidate src block */
413 if (dest == NULL_ADDR) {
414 truncate_data_blocks_range(&dn, 1);
415 continue;
416 }
417
418 /*
419 * dest is reserved block, invalidate src block
420 * and then reserve one new block in dnode page.
421 */
422 if (dest == NEW_ADDR) {
423 truncate_data_blocks_range(&dn, 1);
424 err = reserve_new_block(&dn);
425 f2fs_bug_on(sbi, err);
426 continue;
427 }
428
429 /* dest is valid block, try to recover from src to dest */
430 if (is_valid_blkaddr(sbi, dest, META_POR)) {
410 431
411 if (src == NULL_ADDR) { 432 if (src == NULL_ADDR) {
412 err = reserve_new_block(&dn); 433 err = reserve_new_block(&dn);
@@ -424,7 +445,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
424 ni.version, false); 445 ni.version, false);
425 recovered++; 446 recovered++;
426 } 447 }
427 dn.ofs_in_node++;
428 } 448 }
429 449
430 if (IS_INODE(dn.node_page)) 450 if (IS_INODE(dn.node_page))
@@ -525,14 +545,12 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
525 545
526 INIT_LIST_HEAD(&inode_list); 546 INIT_LIST_HEAD(&inode_list);
527 547
528 /* step #1: find fsynced inode numbers */
529 set_sbi_flag(sbi, SBI_POR_DOING);
530
531 /* prevent checkpoint */ 548 /* prevent checkpoint */
532 mutex_lock(&sbi->cp_mutex); 549 mutex_lock(&sbi->cp_mutex);
533 550
534 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 551 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
535 552
553 /* step #1: find fsynced inode numbers */
536 err = find_fsync_dnodes(sbi, &inode_list); 554 err = find_fsync_dnodes(sbi, &inode_list);
537 if (err) 555 if (err)
538 goto out; 556 goto out;
@@ -561,11 +579,20 @@ out:
561 579
562 clear_sbi_flag(sbi, SBI_POR_DOING); 580 clear_sbi_flag(sbi, SBI_POR_DOING);
563 if (err) { 581 if (err) {
564 discard_next_dnode(sbi, blkaddr); 582 bool invalidate = false;
583
584 if (discard_next_dnode(sbi, blkaddr))
585 invalidate = true;
565 586
566 /* Flush all the NAT/SIT pages */ 587 /* Flush all the NAT/SIT pages */
567 while (get_pages(sbi, F2FS_DIRTY_META)) 588 while (get_pages(sbi, F2FS_DIRTY_META))
568 sync_meta_pages(sbi, META, LONG_MAX); 589 sync_meta_pages(sbi, META, LONG_MAX);
590
591 /* invalidate temporary meta page */
592 if (invalidate)
593 invalidate_mapping_pages(META_MAPPING(sbi),
594 blkaddr, blkaddr);
595
569 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 596 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
570 mutex_unlock(&sbi->cp_mutex); 597 mutex_unlock(&sbi->cp_mutex);
571 } else if (need_writecp) { 598 } else if (need_writecp) {
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 61b97f9cb9f6..78e6d0696847 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -197,28 +197,20 @@ void register_inmem_page(struct inode *inode, struct page *page)
197{ 197{
198 struct f2fs_inode_info *fi = F2FS_I(inode); 198 struct f2fs_inode_info *fi = F2FS_I(inode);
199 struct inmem_pages *new; 199 struct inmem_pages *new;
200 int err;
201 200
202 SetPagePrivate(page);
203 f2fs_trace_pid(page); 201 f2fs_trace_pid(page);
204 202
203 set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
204 SetPagePrivate(page);
205
205 new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); 206 new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
206 207
207 /* add atomic page indices to the list */ 208 /* add atomic page indices to the list */
208 new->page = page; 209 new->page = page;
209 INIT_LIST_HEAD(&new->list); 210 INIT_LIST_HEAD(&new->list);
210retry: 211
211 /* increase reference count with clean state */ 212 /* increase reference count with clean state */
212 mutex_lock(&fi->inmem_lock); 213 mutex_lock(&fi->inmem_lock);
213 err = radix_tree_insert(&fi->inmem_root, page->index, new);
214 if (err == -EEXIST) {
215 mutex_unlock(&fi->inmem_lock);
216 kmem_cache_free(inmem_entry_slab, new);
217 return;
218 } else if (err) {
219 mutex_unlock(&fi->inmem_lock);
220 goto retry;
221 }
222 get_page(page); 214 get_page(page);
223 list_add_tail(&new->list, &fi->inmem_pages); 215 list_add_tail(&new->list, &fi->inmem_pages);
224 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); 216 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
@@ -227,7 +219,7 @@ retry:
227 trace_f2fs_register_inmem_page(page, INMEM); 219 trace_f2fs_register_inmem_page(page, INMEM);
228} 220}
229 221
230void commit_inmem_pages(struct inode *inode, bool abort) 222int commit_inmem_pages(struct inode *inode, bool abort)
231{ 223{
232 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 224 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
233 struct f2fs_inode_info *fi = F2FS_I(inode); 225 struct f2fs_inode_info *fi = F2FS_I(inode);
@@ -239,6 +231,7 @@ void commit_inmem_pages(struct inode *inode, bool abort)
239 .rw = WRITE_SYNC | REQ_PRIO, 231 .rw = WRITE_SYNC | REQ_PRIO,
240 .encrypted_page = NULL, 232 .encrypted_page = NULL,
241 }; 233 };
234 int err = 0;
242 235
243 /* 236 /*
244 * The abort is true only when f2fs_evict_inode is called. 237 * The abort is true only when f2fs_evict_inode is called.
@@ -254,8 +247,8 @@ void commit_inmem_pages(struct inode *inode, bool abort)
254 247
255 mutex_lock(&fi->inmem_lock); 248 mutex_lock(&fi->inmem_lock);
256 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { 249 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
250 lock_page(cur->page);
257 if (!abort) { 251 if (!abort) {
258 lock_page(cur->page);
259 if (cur->page->mapping == inode->i_mapping) { 252 if (cur->page->mapping == inode->i_mapping) {
260 set_page_dirty(cur->page); 253 set_page_dirty(cur->page);
261 f2fs_wait_on_page_writeback(cur->page, DATA); 254 f2fs_wait_on_page_writeback(cur->page, DATA);
@@ -263,15 +256,20 @@ void commit_inmem_pages(struct inode *inode, bool abort)
263 inode_dec_dirty_pages(inode); 256 inode_dec_dirty_pages(inode);
264 trace_f2fs_commit_inmem_page(cur->page, INMEM); 257 trace_f2fs_commit_inmem_page(cur->page, INMEM);
265 fio.page = cur->page; 258 fio.page = cur->page;
266 do_write_data_page(&fio); 259 err = do_write_data_page(&fio);
267 submit_bio = true; 260 submit_bio = true;
261 if (err) {
262 unlock_page(cur->page);
263 break;
264 }
268 } 265 }
269 f2fs_put_page(cur->page, 1);
270 } else { 266 } else {
271 trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP); 267 trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP);
272 put_page(cur->page);
273 } 268 }
274 radix_tree_delete(&fi->inmem_root, cur->page->index); 269 set_page_private(cur->page, 0);
270 ClearPagePrivate(cur->page);
271 f2fs_put_page(cur->page, 1);
272
275 list_del(&cur->list); 273 list_del(&cur->list);
276 kmem_cache_free(inmem_entry_slab, cur); 274 kmem_cache_free(inmem_entry_slab, cur);
277 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); 275 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
@@ -283,6 +281,7 @@ void commit_inmem_pages(struct inode *inode, bool abort)
283 if (submit_bio) 281 if (submit_bio)
284 f2fs_submit_merged_bio(sbi, DATA, WRITE); 282 f2fs_submit_merged_bio(sbi, DATA, WRITE);
285 } 283 }
284 return err;
286} 285}
287 286
288/* 287/*
@@ -304,10 +303,18 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi)
304void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) 303void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
305{ 304{
306 /* try to shrink extent cache when there is no enough memory */ 305 /* try to shrink extent cache when there is no enough memory */
307 f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER); 306 if (!available_free_memory(sbi, EXTENT_CACHE))
307 f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
308
309 /* check the # of cached NAT entries */
310 if (!available_free_memory(sbi, NAT_ENTRIES))
311 try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
312
313 if (!available_free_memory(sbi, FREE_NIDS))
314 try_to_free_nids(sbi, NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES);
308 315
309 /* check the # of cached NAT entries and prefree segments */ 316 /* checkpoint is the only way to shrink partial cached entries */
310 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || 317 if (!available_free_memory(sbi, NAT_ENTRIES) ||
311 excess_prefree_segs(sbi) || 318 excess_prefree_segs(sbi) ||
312 !available_free_memory(sbi, INO_ENTRIES)) 319 !available_free_memory(sbi, INO_ENTRIES))
313 f2fs_sync_fs(sbi->sb, true); 320 f2fs_sync_fs(sbi->sb, true);
@@ -323,10 +330,12 @@ repeat:
323 return 0; 330 return 0;
324 331
325 if (!llist_empty(&fcc->issue_list)) { 332 if (!llist_empty(&fcc->issue_list)) {
326 struct bio *bio = bio_alloc(GFP_NOIO, 0); 333 struct bio *bio;
327 struct flush_cmd *cmd, *next; 334 struct flush_cmd *cmd, *next;
328 int ret; 335 int ret;
329 336
337 bio = f2fs_bio_alloc(0);
338
330 fcc->dispatch_list = llist_del_all(&fcc->issue_list); 339 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
331 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); 340 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
332 341
@@ -358,8 +367,15 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
358 if (test_opt(sbi, NOBARRIER)) 367 if (test_opt(sbi, NOBARRIER))
359 return 0; 368 return 0;
360 369
361 if (!test_opt(sbi, FLUSH_MERGE)) 370 if (!test_opt(sbi, FLUSH_MERGE)) {
362 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); 371 struct bio *bio = f2fs_bio_alloc(0);
372 int ret;
373
374 bio->bi_bdev = sbi->sb->s_bdev;
375 ret = submit_bio_wait(WRITE_FLUSH, bio);
376 bio_put(bio);
377 return ret;
378 }
363 379
364 init_completion(&cmd.wait); 380 init_completion(&cmd.wait);
365 381
@@ -503,7 +519,7 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
503 return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); 519 return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
504} 520}
505 521
506void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) 522bool discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
507{ 523{
508 int err = -ENOTSUPP; 524 int err = -ENOTSUPP;
509 525
@@ -513,13 +529,16 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
513 unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); 529 unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
514 530
515 if (f2fs_test_bit(offset, se->discard_map)) 531 if (f2fs_test_bit(offset, se->discard_map))
516 return; 532 return false;
517 533
518 err = f2fs_issue_discard(sbi, blkaddr, 1); 534 err = f2fs_issue_discard(sbi, blkaddr, 1);
519 } 535 }
520 536
521 if (err) 537 if (err) {
522 update_meta_page(sbi, NULL, blkaddr); 538 update_meta_page(sbi, NULL, blkaddr);
539 return true;
540 }
541 return false;
523} 542}
524 543
525static void __add_discard_entry(struct f2fs_sb_info *sbi, 544static void __add_discard_entry(struct f2fs_sb_info *sbi,
@@ -1218,7 +1237,8 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1218 mutex_lock(&sit_i->sentry_lock); 1237 mutex_lock(&sit_i->sentry_lock);
1219 1238
1220 /* direct_io'ed data is aligned to the segment for better performance */ 1239 /* direct_io'ed data is aligned to the segment for better performance */
1221 if (direct_io && curseg->next_blkoff) 1240 if (direct_io && curseg->next_blkoff &&
1241 !has_not_enough_free_secs(sbi, 0))
1222 __allocate_new_segments(sbi, type); 1242 __allocate_new_segments(sbi, type);
1223 1243
1224 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 1244 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
@@ -1733,7 +1753,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1733static struct sit_entry_set *grab_sit_entry_set(void) 1753static struct sit_entry_set *grab_sit_entry_set(void)
1734{ 1754{
1735 struct sit_entry_set *ses = 1755 struct sit_entry_set *ses =
1736 f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_ATOMIC); 1756 f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
1737 1757
1738 ses->entry_cnt = 0; 1758 ses->entry_cnt = 0;
1739 INIT_LIST_HEAD(&ses->set_list); 1759 INIT_LIST_HEAD(&ses->set_list);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 79e7b879a753..b6e4ed15c698 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -177,6 +177,15 @@ struct segment_allocation {
177 void (*allocate_segment)(struct f2fs_sb_info *, int, bool); 177 void (*allocate_segment)(struct f2fs_sb_info *, int, bool);
178}; 178};
179 179
180/*
181 * this value is set in page as a private data which indicate that
182 * the page is atomically written, and it is in inmem_pages list.
183 */
184#define ATOMIC_WRITTEN_PAGE 0x0000ffff
185
186#define IS_ATOMIC_WRITTEN_PAGE(page) \
187 (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE)
188
180struct inmem_pages { 189struct inmem_pages {
181 struct list_head list; 190 struct list_head list;
182 struct page *page; 191 struct page *page;
@@ -555,16 +564,15 @@ static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type)
555 return curseg->next_blkoff; 564 return curseg->next_blkoff;
556} 565}
557 566
558#ifdef CONFIG_F2FS_CHECK_FS
559static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) 567static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
560{ 568{
561 BUG_ON(segno > TOTAL_SEGS(sbi) - 1); 569 f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1);
562} 570}
563 571
564static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) 572static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
565{ 573{
566 BUG_ON(blk_addr < SEG0_BLKADDR(sbi)); 574 f2fs_bug_on(sbi, blk_addr < SEG0_BLKADDR(sbi)
567 BUG_ON(blk_addr >= MAX_BLKADDR(sbi)); 575 || blk_addr >= MAX_BLKADDR(sbi));
568} 576}
569 577
570/* 578/*
@@ -573,16 +581,11 @@ static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
573static inline void check_block_count(struct f2fs_sb_info *sbi, 581static inline void check_block_count(struct f2fs_sb_info *sbi,
574 int segno, struct f2fs_sit_entry *raw_sit) 582 int segno, struct f2fs_sit_entry *raw_sit)
575{ 583{
584#ifdef CONFIG_F2FS_CHECK_FS
576 bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false; 585 bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false;
577 int valid_blocks = 0; 586 int valid_blocks = 0;
578 int cur_pos = 0, next_pos; 587 int cur_pos = 0, next_pos;
579 588
580 /* check segment usage */
581 BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg);
582
583 /* check boundary of a given segment number */
584 BUG_ON(segno > TOTAL_SEGS(sbi) - 1);
585
586 /* check bitmap with valid block count */ 589 /* check bitmap with valid block count */
587 do { 590 do {
588 if (is_valid) { 591 if (is_valid) {
@@ -598,35 +601,11 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
598 is_valid = !is_valid; 601 is_valid = !is_valid;
599 } while (cur_pos < sbi->blocks_per_seg); 602 } while (cur_pos < sbi->blocks_per_seg);
600 BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); 603 BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks);
601}
602#else
603static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
604{
605 if (segno > TOTAL_SEGS(sbi) - 1)
606 set_sbi_flag(sbi, SBI_NEED_FSCK);
607}
608
609static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
610{
611 if (blk_addr < SEG0_BLKADDR(sbi) || blk_addr >= MAX_BLKADDR(sbi))
612 set_sbi_flag(sbi, SBI_NEED_FSCK);
613}
614
615/*
616 * Summary block is always treated as an invalid block
617 */
618static inline void check_block_count(struct f2fs_sb_info *sbi,
619 int segno, struct f2fs_sit_entry *raw_sit)
620{
621 /* check segment usage */
622 if (GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg)
623 set_sbi_flag(sbi, SBI_NEED_FSCK);
624
625 /* check boundary of a given segment number */
626 if (segno > TOTAL_SEGS(sbi) - 1)
627 set_sbi_flag(sbi, SBI_NEED_FSCK);
628}
629#endif 604#endif
605 /* check segment usage, and check boundary of a given segment number */
606 f2fs_bug_on(sbi, GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
607 || segno > TOTAL_SEGS(sbi) - 1);
608}
630 609
631static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, 610static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
632 unsigned int start) 611 unsigned int start)
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
new file mode 100644
index 000000000000..da0d8e0b55a5
--- /dev/null
+++ b/fs/f2fs/shrinker.c
@@ -0,0 +1,139 @@
1/*
2 * f2fs shrinker support
3 * the basic infra was copied from fs/ubifs/shrinker.c
4 *
5 * Copyright (c) 2015 Motorola Mobility
6 * Copyright (c) 2015 Jaegeuk Kim <jaegeuk@kernel.org>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12#include <linux/fs.h>
13#include <linux/f2fs_fs.h>
14
15#include "f2fs.h"
16
17static LIST_HEAD(f2fs_list);
18static DEFINE_SPINLOCK(f2fs_list_lock);
19static unsigned int shrinker_run_no;
20
21static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi)
22{
23 return NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt;
24}
25
26static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
27{
28 if (NM_I(sbi)->fcnt > NAT_ENTRY_PER_BLOCK)
29 return NM_I(sbi)->fcnt - NAT_ENTRY_PER_BLOCK;
30 return 0;
31}
32
33static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
34{
35 return sbi->total_ext_tree + atomic_read(&sbi->total_ext_node);
36}
37
38unsigned long f2fs_shrink_count(struct shrinker *shrink,
39 struct shrink_control *sc)
40{
41 struct f2fs_sb_info *sbi;
42 struct list_head *p;
43 unsigned long count = 0;
44
45 spin_lock(&f2fs_list_lock);
46 p = f2fs_list.next;
47 while (p != &f2fs_list) {
48 sbi = list_entry(p, struct f2fs_sb_info, s_list);
49
50 /* stop f2fs_put_super */
51 if (!mutex_trylock(&sbi->umount_mutex)) {
52 p = p->next;
53 continue;
54 }
55 spin_unlock(&f2fs_list_lock);
56
57 /* count extent cache entries */
58 count += __count_extent_cache(sbi);
59
60 /* shrink clean nat cache entries */
61 count += __count_nat_entries(sbi);
62
63 /* count free nids cache entries */
64 count += __count_free_nids(sbi);
65
66 spin_lock(&f2fs_list_lock);
67 p = p->next;
68 mutex_unlock(&sbi->umount_mutex);
69 }
70 spin_unlock(&f2fs_list_lock);
71 return count;
72}
73
74unsigned long f2fs_shrink_scan(struct shrinker *shrink,
75 struct shrink_control *sc)
76{
77 unsigned long nr = sc->nr_to_scan;
78 struct f2fs_sb_info *sbi;
79 struct list_head *p;
80 unsigned int run_no;
81 unsigned long freed = 0;
82
83 spin_lock(&f2fs_list_lock);
84 do {
85 run_no = ++shrinker_run_no;
86 } while (run_no == 0);
87 p = f2fs_list.next;
88 while (p != &f2fs_list) {
89 sbi = list_entry(p, struct f2fs_sb_info, s_list);
90
91 if (sbi->shrinker_run_no == run_no)
92 break;
93
94 /* stop f2fs_put_super */
95 if (!mutex_trylock(&sbi->umount_mutex)) {
96 p = p->next;
97 continue;
98 }
99 spin_unlock(&f2fs_list_lock);
100
101 sbi->shrinker_run_no = run_no;
102
103 /* shrink extent cache entries */
104 freed += f2fs_shrink_extent_tree(sbi, nr >> 1);
105
106 /* shrink clean nat cache entries */
107 if (freed < nr)
108 freed += try_to_free_nats(sbi, nr - freed);
109
110 /* shrink free nids cache entries */
111 if (freed < nr)
112 freed += try_to_free_nids(sbi, nr - freed);
113
114 spin_lock(&f2fs_list_lock);
115 p = p->next;
116 list_move_tail(&sbi->s_list, &f2fs_list);
117 mutex_unlock(&sbi->umount_mutex);
118 if (freed >= nr)
119 break;
120 }
121 spin_unlock(&f2fs_list_lock);
122 return freed;
123}
124
125void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
126{
127 spin_lock(&f2fs_list_lock);
128 list_add_tail(&sbi->s_list, &f2fs_list);
129 spin_unlock(&f2fs_list_lock);
130}
131
132void f2fs_leave_shrinker(struct f2fs_sb_info *sbi)
133{
134 f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi));
135
136 spin_lock(&f2fs_list_lock);
137 list_del(&sbi->s_list);
138 spin_unlock(&f2fs_list_lock);
139}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index a06b0b46fe69..f79478115d37 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -39,6 +39,13 @@ static struct proc_dir_entry *f2fs_proc_root;
39static struct kmem_cache *f2fs_inode_cachep; 39static struct kmem_cache *f2fs_inode_cachep;
40static struct kset *f2fs_kset; 40static struct kset *f2fs_kset;
41 41
42/* f2fs-wide shrinker description */
43static struct shrinker f2fs_shrinker_info = {
44 .scan_objects = f2fs_shrink_scan,
45 .count_objects = f2fs_shrink_count,
46 .seeks = DEFAULT_SEEKS,
47};
48
42enum { 49enum {
43 Opt_gc_background, 50 Opt_gc_background,
44 Opt_disable_roll_forward, 51 Opt_disable_roll_forward,
@@ -58,6 +65,7 @@ enum {
58 Opt_nobarrier, 65 Opt_nobarrier,
59 Opt_fastboot, 66 Opt_fastboot,
60 Opt_extent_cache, 67 Opt_extent_cache,
68 Opt_noextent_cache,
61 Opt_noinline_data, 69 Opt_noinline_data,
62 Opt_err, 70 Opt_err,
63}; 71};
@@ -81,6 +89,7 @@ static match_table_t f2fs_tokens = {
81 {Opt_nobarrier, "nobarrier"}, 89 {Opt_nobarrier, "nobarrier"},
82 {Opt_fastboot, "fastboot"}, 90 {Opt_fastboot, "fastboot"},
83 {Opt_extent_cache, "extent_cache"}, 91 {Opt_extent_cache, "extent_cache"},
92 {Opt_noextent_cache, "noextent_cache"},
84 {Opt_noinline_data, "noinline_data"}, 93 {Opt_noinline_data, "noinline_data"},
85 {Opt_err, NULL}, 94 {Opt_err, NULL},
86}; 95};
@@ -382,6 +391,9 @@ static int parse_options(struct super_block *sb, char *options)
382 case Opt_extent_cache: 391 case Opt_extent_cache:
383 set_opt(sbi, EXTENT_CACHE); 392 set_opt(sbi, EXTENT_CACHE);
384 break; 393 break;
394 case Opt_noextent_cache:
395 clear_opt(sbi, EXTENT_CACHE);
396 break;
385 case Opt_noinline_data: 397 case Opt_noinline_data:
386 clear_opt(sbi, INLINE_DATA); 398 clear_opt(sbi, INLINE_DATA);
387 break; 399 break;
@@ -410,9 +422,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
410 atomic_set(&fi->dirty_pages, 0); 422 atomic_set(&fi->dirty_pages, 0);
411 fi->i_current_depth = 1; 423 fi->i_current_depth = 1;
412 fi->i_advise = 0; 424 fi->i_advise = 0;
413 rwlock_init(&fi->ext_lock);
414 init_rwsem(&fi->i_sem); 425 init_rwsem(&fi->i_sem);
415 INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS);
416 INIT_LIST_HEAD(&fi->inmem_pages); 426 INIT_LIST_HEAD(&fi->inmem_pages);
417 mutex_init(&fi->inmem_lock); 427 mutex_init(&fi->inmem_lock);
418 428
@@ -441,17 +451,22 @@ static int f2fs_drop_inode(struct inode *inode)
441 */ 451 */
442 if (!inode_unhashed(inode) && inode->i_state & I_SYNC) { 452 if (!inode_unhashed(inode) && inode->i_state & I_SYNC) {
443 if (!inode->i_nlink && !is_bad_inode(inode)) { 453 if (!inode->i_nlink && !is_bad_inode(inode)) {
454 /* to avoid evict_inode call simultaneously */
455 atomic_inc(&inode->i_count);
444 spin_unlock(&inode->i_lock); 456 spin_unlock(&inode->i_lock);
445 457
446 /* some remained atomic pages should discarded */ 458 /* some remained atomic pages should discarded */
447 if (f2fs_is_atomic_file(inode)) 459 if (f2fs_is_atomic_file(inode))
448 commit_inmem_pages(inode, true); 460 commit_inmem_pages(inode, true);
449 461
462 /* should remain fi->extent_tree for writepage */
463 f2fs_destroy_extent_node(inode);
464
450 sb_start_intwrite(inode->i_sb); 465 sb_start_intwrite(inode->i_sb);
451 i_size_write(inode, 0); 466 i_size_write(inode, 0);
452 467
453 if (F2FS_HAS_BLOCKS(inode)) 468 if (F2FS_HAS_BLOCKS(inode))
454 f2fs_truncate(inode); 469 f2fs_truncate(inode, true);
455 470
456 sb_end_intwrite(inode->i_sb); 471 sb_end_intwrite(inode->i_sb);
457 472
@@ -461,6 +476,7 @@ static int f2fs_drop_inode(struct inode *inode)
461 F2FS_I(inode)->i_crypt_info); 476 F2FS_I(inode)->i_crypt_info);
462#endif 477#endif
463 spin_lock(&inode->i_lock); 478 spin_lock(&inode->i_lock);
479 atomic_dec(&inode->i_count);
464 } 480 }
465 return 0; 481 return 0;
466 } 482 }
@@ -498,9 +514,11 @@ static void f2fs_put_super(struct super_block *sb)
498 } 514 }
499 kobject_del(&sbi->s_kobj); 515 kobject_del(&sbi->s_kobj);
500 516
501 f2fs_destroy_stats(sbi);
502 stop_gc_thread(sbi); 517 stop_gc_thread(sbi);
503 518
519 /* prevent remaining shrinker jobs */
520 mutex_lock(&sbi->umount_mutex);
521
504 /* 522 /*
505 * We don't need to do checkpoint when superblock is clean. 523 * We don't need to do checkpoint when superblock is clean.
506 * But, the previous checkpoint was not done by umount, it needs to do 524 * But, the previous checkpoint was not done by umount, it needs to do
@@ -514,6 +532,9 @@ static void f2fs_put_super(struct super_block *sb)
514 write_checkpoint(sbi, &cpc); 532 write_checkpoint(sbi, &cpc);
515 } 533 }
516 534
535 /* write_checkpoint can update stat informaion */
536 f2fs_destroy_stats(sbi);
537
517 /* 538 /*
518 * normally superblock is clean, so we need to release this. 539 * normally superblock is clean, so we need to release this.
519 * In addition, EIO will skip do checkpoint, we need this as well. 540 * In addition, EIO will skip do checkpoint, we need this as well.
@@ -521,6 +542,9 @@ static void f2fs_put_super(struct super_block *sb)
521 release_dirty_inode(sbi); 542 release_dirty_inode(sbi);
522 release_discard_addrs(sbi); 543 release_discard_addrs(sbi);
523 544
545 f2fs_leave_shrinker(sbi);
546 mutex_unlock(&sbi->umount_mutex);
547
524 iput(sbi->node_inode); 548 iput(sbi->node_inode);
525 iput(sbi->meta_inode); 549 iput(sbi->meta_inode);
526 550
@@ -647,6 +671,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
647 seq_puts(seq, ",fastboot"); 671 seq_puts(seq, ",fastboot");
648 if (test_opt(sbi, EXTENT_CACHE)) 672 if (test_opt(sbi, EXTENT_CACHE))
649 seq_puts(seq, ",extent_cache"); 673 seq_puts(seq, ",extent_cache");
674 else
675 seq_puts(seq, ",noextent_cache");
650 seq_printf(seq, ",active_logs=%u", sbi->active_logs); 676 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
651 677
652 return 0; 678 return 0;
@@ -667,7 +693,7 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
667 struct seg_entry *se = get_seg_entry(sbi, i); 693 struct seg_entry *se = get_seg_entry(sbi, i);
668 694
669 if ((i % 10) == 0) 695 if ((i % 10) == 0)
670 seq_printf(seq, "%-5d", i); 696 seq_printf(seq, "%-10d", i);
671 seq_printf(seq, "%d|%-3u", se->type, 697 seq_printf(seq, "%d|%-3u", se->type,
672 get_valid_blocks(sbi, i, 1)); 698 get_valid_blocks(sbi, i, 1));
673 if ((i % 10) == 9 || i == (total_segs - 1)) 699 if ((i % 10) == 9 || i == (total_segs - 1))
@@ -699,6 +725,7 @@ static void default_options(struct f2fs_sb_info *sbi)
699 725
700 set_opt(sbi, BG_GC); 726 set_opt(sbi, BG_GC);
701 set_opt(sbi, INLINE_DATA); 727 set_opt(sbi, INLINE_DATA);
728 set_opt(sbi, EXTENT_CACHE);
702 729
703#ifdef CONFIG_F2FS_FS_XATTR 730#ifdef CONFIG_F2FS_FS_XATTR
704 set_opt(sbi, XATTR_USER); 731 set_opt(sbi, XATTR_USER);
@@ -970,6 +997,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
970 997
971 sbi->dir_level = DEF_DIR_LEVEL; 998 sbi->dir_level = DEF_DIR_LEVEL;
972 clear_sbi_flag(sbi, SBI_NEED_FSCK); 999 clear_sbi_flag(sbi, SBI_NEED_FSCK);
1000
1001 INIT_LIST_HEAD(&sbi->s_list);
1002 mutex_init(&sbi->umount_mutex);
973} 1003}
974 1004
975/* 1005/*
@@ -1135,7 +1165,9 @@ try_onemore:
1135 mutex_init(&sbi->writepages); 1165 mutex_init(&sbi->writepages);
1136 mutex_init(&sbi->cp_mutex); 1166 mutex_init(&sbi->cp_mutex);
1137 init_rwsem(&sbi->node_write); 1167 init_rwsem(&sbi->node_write);
1138 clear_sbi_flag(sbi, SBI_POR_DOING); 1168
1169 /* disallow all the data/node/meta page writes */
1170 set_sbi_flag(sbi, SBI_POR_DOING);
1139 spin_lock_init(&sbi->stat_lock); 1171 spin_lock_init(&sbi->stat_lock);
1140 1172
1141 init_rwsem(&sbi->read_io.io_rwsem); 1173 init_rwsem(&sbi->read_io.io_rwsem);
@@ -1212,8 +1244,12 @@ try_onemore:
1212 goto free_nm; 1244 goto free_nm;
1213 } 1245 }
1214 1246
1247 f2fs_join_shrinker(sbi);
1248
1215 /* if there are nt orphan nodes free them */ 1249 /* if there are nt orphan nodes free them */
1216 recover_orphan_inodes(sbi); 1250 err = recover_orphan_inodes(sbi);
1251 if (err)
1252 goto free_node_inode;
1217 1253
1218 /* read root inode and dentry */ 1254 /* read root inode and dentry */
1219 root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); 1255 root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
@@ -1275,6 +1311,8 @@ try_onemore:
1275 goto free_kobj; 1311 goto free_kobj;
1276 } 1312 }
1277 } 1313 }
1314 /* recover_fsync_data() cleared this already */
1315 clear_sbi_flag(sbi, SBI_POR_DOING);
1278 1316
1279 /* 1317 /*
1280 * If filesystem is not mounted as read-only then 1318 * If filesystem is not mounted as read-only then
@@ -1308,7 +1346,10 @@ free_root_inode:
1308 dput(sb->s_root); 1346 dput(sb->s_root);
1309 sb->s_root = NULL; 1347 sb->s_root = NULL;
1310free_node_inode: 1348free_node_inode:
1349 mutex_lock(&sbi->umount_mutex);
1350 f2fs_leave_shrinker(sbi);
1311 iput(sbi->node_inode); 1351 iput(sbi->node_inode);
1352 mutex_unlock(&sbi->umount_mutex);
1312free_nm: 1353free_nm:
1313 destroy_node_manager(sbi); 1354 destroy_node_manager(sbi);
1314free_sm: 1355free_sm:
@@ -1404,13 +1445,20 @@ static int __init init_f2fs_fs(void)
1404 err = f2fs_init_crypto(); 1445 err = f2fs_init_crypto();
1405 if (err) 1446 if (err)
1406 goto free_kset; 1447 goto free_kset;
1407 err = register_filesystem(&f2fs_fs_type); 1448
1449 err = register_shrinker(&f2fs_shrinker_info);
1408 if (err) 1450 if (err)
1409 goto free_crypto; 1451 goto free_crypto;
1452
1453 err = register_filesystem(&f2fs_fs_type);
1454 if (err)
1455 goto free_shrinker;
1410 f2fs_create_root_stats(); 1456 f2fs_create_root_stats();
1411 f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); 1457 f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
1412 return 0; 1458 return 0;
1413 1459
1460free_shrinker:
1461 unregister_shrinker(&f2fs_shrinker_info);
1414free_crypto: 1462free_crypto:
1415 f2fs_exit_crypto(); 1463 f2fs_exit_crypto();
1416free_kset: 1464free_kset:
@@ -1433,6 +1481,7 @@ static void __exit exit_f2fs_fs(void)
1433{ 1481{
1434 remove_proc_entry("fs/f2fs", NULL); 1482 remove_proc_entry("fs/f2fs", NULL);
1435 f2fs_destroy_root_stats(); 1483 f2fs_destroy_root_stats();
1484 unregister_shrinker(&f2fs_shrinker_info);
1436 unregister_filesystem(&f2fs_fs_type); 1485 unregister_filesystem(&f2fs_fs_type);
1437 f2fs_exit_crypto(); 1486 f2fs_exit_crypto();
1438 destroy_extent_cache(); 1487 destroy_extent_cache();
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 07449b980acb..4de2286c0e4d 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -499,9 +499,12 @@ static int __f2fs_setxattr(struct inode *inode, int index,
499 499
500 len = strlen(name); 500 len = strlen(name);
501 501
502 if (len > F2FS_NAME_LEN || size > MAX_VALUE_LEN(inode)) 502 if (len > F2FS_NAME_LEN)
503 return -ERANGE; 503 return -ERANGE;
504 504
505 if (size > MAX_VALUE_LEN(inode))
506 return -E2BIG;
507
505 base_addr = read_all_xattrs(inode, ipage); 508 base_addr = read_all_xattrs(inode, ipage);
506 if (!base_addr) 509 if (!base_addr)
507 goto exit; 510 goto exit;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 920408a21ffd..25c6324a0dd0 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -417,15 +417,25 @@ typedef __le32 f2fs_hash_t;
417 417
418#define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) 418#define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS)
419 419
420/* the number of dentry in a block */
421#define NR_DENTRY_IN_BLOCK 214
422
423/* MAX level for dir lookup */ 420/* MAX level for dir lookup */
424#define MAX_DIR_HASH_DEPTH 63 421#define MAX_DIR_HASH_DEPTH 63
425 422
426/* MAX buckets in one level of dir */ 423/* MAX buckets in one level of dir */
427#define MAX_DIR_BUCKETS (1 << ((MAX_DIR_HASH_DEPTH / 2) - 1)) 424#define MAX_DIR_BUCKETS (1 << ((MAX_DIR_HASH_DEPTH / 2) - 1))
428 425
426/*
427 * space utilization of regular dentry and inline dentry
428 * regular dentry inline dentry
429 * bitmap 1 * 27 = 27 1 * 23 = 23
430 * reserved 1 * 3 = 3 1 * 7 = 7
431 * dentry 11 * 214 = 2354 11 * 182 = 2002
432 * filename 8 * 214 = 1712 8 * 182 = 1456
433 * total 4096 3488
434 *
435 * Note: there are more reserved space in inline dentry than in regular
436 * dentry, when converting inline dentry we should handle this carefully.
437 */
438#define NR_DENTRY_IN_BLOCK 214 /* the number of dentry in a block */
429#define SIZE_OF_DIR_ENTRY 11 /* by byte */ 439#define SIZE_OF_DIR_ENTRY 11 /* by byte */
430#define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ 440#define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \
431 BITS_PER_BYTE) 441 BITS_PER_BYTE)
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 04856a2d8c82..a01946514b5a 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -1099,11 +1099,11 @@ TRACE_EVENT(f2fs_lookup_extent_tree_start,
1099TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end, 1099TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end,
1100 1100
1101 TP_PROTO(struct inode *inode, unsigned int pgofs, 1101 TP_PROTO(struct inode *inode, unsigned int pgofs,
1102 struct extent_node *en), 1102 struct extent_info *ei),
1103 1103
1104 TP_ARGS(inode, pgofs, en), 1104 TP_ARGS(inode, pgofs, ei),
1105 1105
1106 TP_CONDITION(en), 1106 TP_CONDITION(ei),
1107 1107
1108 TP_STRUCT__entry( 1108 TP_STRUCT__entry(
1109 __field(dev_t, dev) 1109 __field(dev_t, dev)
@@ -1118,9 +1118,9 @@ TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end,
1118 __entry->dev = inode->i_sb->s_dev; 1118 __entry->dev = inode->i_sb->s_dev;
1119 __entry->ino = inode->i_ino; 1119 __entry->ino = inode->i_ino;
1120 __entry->pgofs = pgofs; 1120 __entry->pgofs = pgofs;
1121 __entry->fofs = en->ei.fofs; 1121 __entry->fofs = ei->fofs;
1122 __entry->blk = en->ei.blk; 1122 __entry->blk = ei->blk;
1123 __entry->len = en->ei.len; 1123 __entry->len = ei->len;
1124 ), 1124 ),
1125 1125
1126 TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, " 1126 TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, "