aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-21 21:25:28 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-21 21:25:28 -0400
commitf6c658df63856db3bf8f467024b1dbee37b5399c (patch)
treeeefa48fd1e03354a284ad3228e55bab0a236667f /fs
parent07be1337b9e8bfcd855c6e9175b5066a30ac609b (diff)
parent0f3311a8c266b9f4fae4e5cdfcd9a86970e2b9bd (diff)
Merge tag 'for-f2fs-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "In this round, as Ted pointed out, fscrypto allows one more key prefix given by filesystem to resolve backward compatibility issues. Other than that, we've fixed several error handling cases by introducing a fault injection facility. We've also achieved performance improvement in some workloads as well as a bunch of bug fixes. Summary: Enhancements: - fs-specific prefix for fscrypto - fault injection facility - expose validity bitmaps for user to be aware of fragmentation - fallocate/rm/preallocation speed up - use percpu counters Bug fixes: - some inline_dentry/inline_data bugs - error handling for atomic/volatile/orphan inodes - recover broken superblock" * tag 'for-f2fs-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (73 commits) f2fs: fix to update dirty page count correctly f2fs: flush pending bios right away when error occurs f2fs: avoid ENOSPC fault in the recovery process f2fs: make exit_f2fs_fs more clear f2fs: use percpu_counter for total_valid_inode_count f2fs: use percpu_counter for alloc_valid_block_count f2fs: use percpu_counter for # of dirty pages in inode f2fs: use percpu_counter for page counters f2fs: use bio count instead of F2FS_WRITEBACK page count f2fs: manipulate dirty file inodes when DATA_FLUSH is set f2fs: add fault injection to sysfs f2fs: no need inc dirty pages under inode lock f2fs: fix incorrect error path handling in f2fs_move_rehashed_dirents f2fs: fix i_current_depth during inline dentry conversion f2fs: correct return value type of f2fs_fill_super f2fs: fix deadlock when flush inline data f2fs: avoid f2fs_bug_on during recovery f2fs: show # of orphan inodes f2fs: support in batch fzero in dnode page f2fs: support in batch multi blocks preallocation ...
Diffstat (limited to 'fs')
-rw-r--r--fs/crypto/keyinfo.c120
-rw-r--r--fs/f2fs/Kconfig8
-rw-r--r--fs/f2fs/acl.c4
-rw-r--r--fs/f2fs/checkpoint.c67
-rw-r--r--fs/f2fs/data.c197
-rw-r--r--fs/f2fs/debug.c25
-rw-r--r--fs/f2fs/dir.c128
-rw-r--r--fs/f2fs/extent_cache.c3
-rw-r--r--fs/f2fs/f2fs.h197
-rw-r--r--fs/f2fs/file.c309
-rw-r--r--fs/f2fs/gc.c27
-rw-r--r--fs/f2fs/inline.c111
-rw-r--r--fs/f2fs/inode.c66
-rw-r--r--fs/f2fs/node.c316
-rw-r--r--fs/f2fs/recovery.c149
-rw-r--r--fs/f2fs/segment.c8
-rw-r--r--fs/f2fs/segment.h9
-rw-r--r--fs/f2fs/super.c288
-rw-r--r--fs/f2fs/xattr.c3
19 files changed, 1431 insertions, 604 deletions
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 06f5aa478bf2..1ac263eddc4e 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -78,6 +78,67 @@ out:
78 return res; 78 return res;
79} 79}
80 80
81static int validate_user_key(struct fscrypt_info *crypt_info,
82 struct fscrypt_context *ctx, u8 *raw_key,
83 u8 *prefix, int prefix_size)
84{
85 u8 *full_key_descriptor;
86 struct key *keyring_key;
87 struct fscrypt_key *master_key;
88 const struct user_key_payload *ukp;
89 int full_key_len = prefix_size + (FS_KEY_DESCRIPTOR_SIZE * 2) + 1;
90 int res;
91
92 full_key_descriptor = kmalloc(full_key_len, GFP_NOFS);
93 if (!full_key_descriptor)
94 return -ENOMEM;
95
96 memcpy(full_key_descriptor, prefix, prefix_size);
97 sprintf(full_key_descriptor + prefix_size,
98 "%*phN", FS_KEY_DESCRIPTOR_SIZE,
99 ctx->master_key_descriptor);
100 full_key_descriptor[full_key_len - 1] = '\0';
101 keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL);
102 kfree(full_key_descriptor);
103 if (IS_ERR(keyring_key))
104 return PTR_ERR(keyring_key);
105
106 if (keyring_key->type != &key_type_logon) {
107 printk_once(KERN_WARNING
108 "%s: key type must be logon\n", __func__);
109 res = -ENOKEY;
110 goto out;
111 }
112 down_read(&keyring_key->sem);
113 ukp = user_key_payload(keyring_key);
114 if (ukp->datalen != sizeof(struct fscrypt_key)) {
115 res = -EINVAL;
116 up_read(&keyring_key->sem);
117 goto out;
118 }
119 master_key = (struct fscrypt_key *)ukp->data;
120 BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE);
121
122 if (master_key->size != FS_AES_256_XTS_KEY_SIZE) {
123 printk_once(KERN_WARNING
124 "%s: key size incorrect: %d\n",
125 __func__, master_key->size);
126 res = -ENOKEY;
127 up_read(&keyring_key->sem);
128 goto out;
129 }
130 res = derive_key_aes(ctx->nonce, master_key->raw, raw_key);
131 up_read(&keyring_key->sem);
132 if (res)
133 goto out;
134
135 crypt_info->ci_keyring_key = keyring_key;
136 return 0;
137out:
138 key_put(keyring_key);
139 return res;
140}
141
81static void put_crypt_info(struct fscrypt_info *ci) 142static void put_crypt_info(struct fscrypt_info *ci)
82{ 143{
83 if (!ci) 144 if (!ci)
@@ -91,12 +152,7 @@ static void put_crypt_info(struct fscrypt_info *ci)
91int get_crypt_info(struct inode *inode) 152int get_crypt_info(struct inode *inode)
92{ 153{
93 struct fscrypt_info *crypt_info; 154 struct fscrypt_info *crypt_info;
94 u8 full_key_descriptor[FS_KEY_DESC_PREFIX_SIZE +
95 (FS_KEY_DESCRIPTOR_SIZE * 2) + 1];
96 struct key *keyring_key = NULL;
97 struct fscrypt_key *master_key;
98 struct fscrypt_context ctx; 155 struct fscrypt_context ctx;
99 const struct user_key_payload *ukp;
100 struct crypto_skcipher *ctfm; 156 struct crypto_skcipher *ctfm;
101 const char *cipher_str; 157 const char *cipher_str;
102 u8 raw_key[FS_MAX_KEY_SIZE]; 158 u8 raw_key[FS_MAX_KEY_SIZE];
@@ -167,48 +223,24 @@ retry:
167 memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE); 223 memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE);
168 goto got_key; 224 goto got_key;
169 } 225 }
170 memcpy(full_key_descriptor, FS_KEY_DESC_PREFIX,
171 FS_KEY_DESC_PREFIX_SIZE);
172 sprintf(full_key_descriptor + FS_KEY_DESC_PREFIX_SIZE,
173 "%*phN", FS_KEY_DESCRIPTOR_SIZE,
174 ctx.master_key_descriptor);
175 full_key_descriptor[FS_KEY_DESC_PREFIX_SIZE +
176 (2 * FS_KEY_DESCRIPTOR_SIZE)] = '\0';
177 keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL);
178 if (IS_ERR(keyring_key)) {
179 res = PTR_ERR(keyring_key);
180 keyring_key = NULL;
181 goto out;
182 }
183 crypt_info->ci_keyring_key = keyring_key;
184 if (keyring_key->type != &key_type_logon) {
185 printk_once(KERN_WARNING
186 "%s: key type must be logon\n", __func__);
187 res = -ENOKEY;
188 goto out;
189 }
190 down_read(&keyring_key->sem);
191 ukp = user_key_payload(keyring_key);
192 if (ukp->datalen != sizeof(struct fscrypt_key)) {
193 res = -EINVAL;
194 up_read(&keyring_key->sem);
195 goto out;
196 }
197 master_key = (struct fscrypt_key *)ukp->data;
198 BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE);
199 226
200 if (master_key->size != FS_AES_256_XTS_KEY_SIZE) { 227 res = validate_user_key(crypt_info, &ctx, raw_key,
201 printk_once(KERN_WARNING 228 FS_KEY_DESC_PREFIX, FS_KEY_DESC_PREFIX_SIZE);
202 "%s: key size incorrect: %d\n", 229 if (res && inode->i_sb->s_cop->key_prefix) {
203 __func__, master_key->size); 230 u8 *prefix = NULL;
204 res = -ENOKEY; 231 int prefix_size, res2;
205 up_read(&keyring_key->sem); 232
233 prefix_size = inode->i_sb->s_cop->key_prefix(inode, &prefix);
234 res2 = validate_user_key(crypt_info, &ctx, raw_key,
235 prefix, prefix_size);
236 if (res2) {
237 if (res2 == -ENOKEY)
238 res = -ENOKEY;
239 goto out;
240 }
241 } else if (res) {
206 goto out; 242 goto out;
207 } 243 }
208 res = derive_key_aes(ctx.nonce, master_key->raw, raw_key);
209 up_read(&keyring_key->sem);
210 if (res)
211 goto out;
212got_key: 244got_key:
213 ctfm = crypto_alloc_skcipher(cipher_str, 0, 0); 245 ctfm = crypto_alloc_skcipher(cipher_str, 0, 0);
214 if (!ctfm || IS_ERR(ctfm)) { 246 if (!ctfm || IS_ERR(ctfm)) {
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 1f8982a957f1..378c221d68a9 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -94,3 +94,11 @@ config F2FS_IO_TRACE
94 information and block IO patterns in the filesystem level. 94 information and block IO patterns in the filesystem level.
95 95
96 If unsure, say N. 96 If unsure, say N.
97
98config F2FS_FAULT_INJECTION
99 bool "F2FS fault injection facility"
100 depends on F2FS_FS
101 help
102 Test F2FS to inject faults such as ENOMEM, ENOSPC, and so on.
103
104 If unsure, say N.
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 6f1fdda977b3..a31c7e859af6 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -115,7 +115,7 @@ static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size)
115 struct f2fs_acl_entry *entry; 115 struct f2fs_acl_entry *entry;
116 int i; 116 int i;
117 117
118 f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count * 118 f2fs_acl = f2fs_kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count *
119 sizeof(struct f2fs_acl_entry), GFP_NOFS); 119 sizeof(struct f2fs_acl_entry), GFP_NOFS);
120 if (!f2fs_acl) 120 if (!f2fs_acl)
121 return ERR_PTR(-ENOMEM); 121 return ERR_PTR(-ENOMEM);
@@ -175,7 +175,7 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type,
175 175
176 retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage); 176 retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage);
177 if (retval > 0) { 177 if (retval > 0) {
178 value = kmalloc(retval, GFP_F2FS_ZERO); 178 value = f2fs_kmalloc(retval, GFP_F2FS_ZERO);
179 if (!value) 179 if (!value)
180 return ERR_PTR(-ENOMEM); 180 return ERR_PTR(-ENOMEM);
181 retval = f2fs_getxattr(inode, name_index, "", value, 181 retval = f2fs_getxattr(inode, name_index, "", value,
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 0955312e5ca0..389160049993 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -26,6 +26,14 @@
26static struct kmem_cache *ino_entry_slab; 26static struct kmem_cache *ino_entry_slab;
27struct kmem_cache *inode_entry_slab; 27struct kmem_cache *inode_entry_slab;
28 28
29void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
30{
31 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
32 sbi->sb->s_flags |= MS_RDONLY;
33 if (!end_io)
34 f2fs_flush_merged_bios(sbi);
35}
36
29/* 37/*
30 * We guarantee no failure on the returned page. 38 * We guarantee no failure on the returned page.
31 */ 39 */
@@ -34,7 +42,7 @@ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
34 struct address_space *mapping = META_MAPPING(sbi); 42 struct address_space *mapping = META_MAPPING(sbi);
35 struct page *page = NULL; 43 struct page *page = NULL;
36repeat: 44repeat:
37 page = grab_cache_page(mapping, index); 45 page = f2fs_grab_cache_page(mapping, index, false);
38 if (!page) { 46 if (!page) {
39 cond_resched(); 47 cond_resched();
40 goto repeat; 48 goto repeat;
@@ -64,7 +72,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
64 if (unlikely(!is_meta)) 72 if (unlikely(!is_meta))
65 fio.rw &= ~REQ_META; 73 fio.rw &= ~REQ_META;
66repeat: 74repeat:
67 page = grab_cache_page(mapping, index); 75 page = f2fs_grab_cache_page(mapping, index, false);
68 if (!page) { 76 if (!page) {
69 cond_resched(); 77 cond_resched();
70 goto repeat; 78 goto repeat;
@@ -91,7 +99,7 @@ repeat:
91 * meta page. 99 * meta page.
92 */ 100 */
93 if (unlikely(!PageUptodate(page))) 101 if (unlikely(!PageUptodate(page)))
94 f2fs_stop_checkpoint(sbi); 102 f2fs_stop_checkpoint(sbi, false);
95out: 103out:
96 return page; 104 return page;
97} 105}
@@ -186,7 +194,8 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
186 BUG(); 194 BUG();
187 } 195 }
188 196
189 page = grab_cache_page(META_MAPPING(sbi), fio.new_blkaddr); 197 page = f2fs_grab_cache_page(META_MAPPING(sbi),
198 fio.new_blkaddr, false);
190 if (!page) 199 if (!page)
191 continue; 200 continue;
192 if (PageUptodate(page)) { 201 if (PageUptodate(page)) {
@@ -211,7 +220,7 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
211 bool readahead = false; 220 bool readahead = false;
212 221
213 page = find_get_page(META_MAPPING(sbi), index); 222 page = find_get_page(META_MAPPING(sbi), index);
214 if (!page || (page && !PageUptodate(page))) 223 if (!page || !PageUptodate(page))
215 readahead = true; 224 readahead = true;
216 f2fs_put_page(page, 0); 225 f2fs_put_page(page, 0);
217 226
@@ -448,12 +457,12 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
448 return e ? true : false; 457 return e ? true : false;
449} 458}
450 459
451void release_ino_entry(struct f2fs_sb_info *sbi) 460void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
452{ 461{
453 struct ino_entry *e, *tmp; 462 struct ino_entry *e, *tmp;
454 int i; 463 int i;
455 464
456 for (i = APPEND_INO; i <= UPDATE_INO; i++) { 465 for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) {
457 struct inode_management *im = &sbi->im[i]; 466 struct inode_management *im = &sbi->im[i];
458 467
459 spin_lock(&im->ino_lock); 468 spin_lock(&im->ino_lock);
@@ -473,6 +482,13 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
473 int err = 0; 482 int err = 0;
474 483
475 spin_lock(&im->ino_lock); 484 spin_lock(&im->ino_lock);
485
486#ifdef CONFIG_F2FS_FAULT_INJECTION
487 if (time_to_inject(FAULT_ORPHAN)) {
488 spin_unlock(&im->ino_lock);
489 return -ENOSPC;
490 }
491#endif
476 if (unlikely(im->ino_num >= sbi->max_orphans)) 492 if (unlikely(im->ino_num >= sbi->max_orphans))
477 err = -ENOSPC; 493 err = -ENOSPC;
478 else 494 else
@@ -777,43 +793,32 @@ void update_dirty_page(struct inode *inode, struct page *page)
777 !S_ISLNK(inode->i_mode)) 793 !S_ISLNK(inode->i_mode))
778 return; 794 return;
779 795
780 spin_lock(&sbi->inode_lock[type]); 796 if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH)) {
781 __add_dirty_inode(inode, type); 797 spin_lock(&sbi->inode_lock[type]);
782 inode_inc_dirty_pages(inode); 798 __add_dirty_inode(inode, type);
783 spin_unlock(&sbi->inode_lock[type]); 799 spin_unlock(&sbi->inode_lock[type]);
800 }
784 801
802 inode_inc_dirty_pages(inode);
785 SetPagePrivate(page); 803 SetPagePrivate(page);
786 f2fs_trace_pid(page); 804 f2fs_trace_pid(page);
787} 805}
788 806
789void add_dirty_dir_inode(struct inode *inode)
790{
791 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
792
793 spin_lock(&sbi->inode_lock[DIR_INODE]);
794 __add_dirty_inode(inode, DIR_INODE);
795 spin_unlock(&sbi->inode_lock[DIR_INODE]);
796}
797
798void remove_dirty_inode(struct inode *inode) 807void remove_dirty_inode(struct inode *inode)
799{ 808{
800 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 809 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
801 struct f2fs_inode_info *fi = F2FS_I(inode);
802 enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE; 810 enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
803 811
804 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) && 812 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
805 !S_ISLNK(inode->i_mode)) 813 !S_ISLNK(inode->i_mode))
806 return; 814 return;
807 815
816 if (type == FILE_INODE && !test_opt(sbi, DATA_FLUSH))
817 return;
818
808 spin_lock(&sbi->inode_lock[type]); 819 spin_lock(&sbi->inode_lock[type]);
809 __remove_dirty_inode(inode, type); 820 __remove_dirty_inode(inode, type);
810 spin_unlock(&sbi->inode_lock[type]); 821 spin_unlock(&sbi->inode_lock[type]);
811
812 /* Only from the recovery routine */
813 if (is_inode_flag_set(fi, FI_DELAY_IPUT)) {
814 clear_inode_flag(fi, FI_DELAY_IPUT);
815 iput(inode);
816 }
817} 822}
818 823
819int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) 824int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
@@ -892,7 +897,7 @@ retry_flush_nodes:
892 897
893 if (get_pages(sbi, F2FS_DIRTY_NODES)) { 898 if (get_pages(sbi, F2FS_DIRTY_NODES)) {
894 up_write(&sbi->node_write); 899 up_write(&sbi->node_write);
895 err = sync_node_pages(sbi, 0, &wbc); 900 err = sync_node_pages(sbi, &wbc);
896 if (err) { 901 if (err) {
897 f2fs_unlock_all(sbi); 902 f2fs_unlock_all(sbi);
898 goto out; 903 goto out;
@@ -917,7 +922,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
917 for (;;) { 922 for (;;) {
918 prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE); 923 prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
919 924
920 if (!get_pages(sbi, F2FS_WRITEBACK)) 925 if (!atomic_read(&sbi->nr_wb_bios))
921 break; 926 break;
922 927
923 io_schedule_timeout(5*HZ); 928 io_schedule_timeout(5*HZ);
@@ -1082,7 +1087,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1082 1087
1083 /* update user_block_counts */ 1088 /* update user_block_counts */
1084 sbi->last_valid_block_count = sbi->total_valid_block_count; 1089 sbi->last_valid_block_count = sbi->total_valid_block_count;
1085 sbi->alloc_valid_block_count = 0; 1090 percpu_counter_set(&sbi->alloc_valid_block_count, 0);
1086 1091
1087 /* Here, we only have one bio having CP pack */ 1092 /* Here, we only have one bio having CP pack */
1088 sync_meta_pages(sbi, META_FLUSH, LONG_MAX); 1093 sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
@@ -1098,7 +1103,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1098 invalidate_mapping_pages(META_MAPPING(sbi), discard_blk, 1103 invalidate_mapping_pages(META_MAPPING(sbi), discard_blk,
1099 discard_blk); 1104 discard_blk);
1100 1105
1101 release_ino_entry(sbi); 1106 release_ino_entry(sbi, false);
1102 1107
1103 if (unlikely(f2fs_cp_error(sbi))) 1108 if (unlikely(f2fs_cp_error(sbi)))
1104 return -EIO; 1109 return -EIO;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index bb376c3bca62..9a8bbc1fb1fa 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -68,13 +68,12 @@ static void f2fs_write_end_io(struct bio *bio)
68 68
69 if (unlikely(bio->bi_error)) { 69 if (unlikely(bio->bi_error)) {
70 set_bit(AS_EIO, &page->mapping->flags); 70 set_bit(AS_EIO, &page->mapping->flags);
71 f2fs_stop_checkpoint(sbi); 71 f2fs_stop_checkpoint(sbi, true);
72 } 72 }
73 end_page_writeback(page); 73 end_page_writeback(page);
74 dec_page_count(sbi, F2FS_WRITEBACK);
75 } 74 }
76 75 if (atomic_dec_and_test(&sbi->nr_wb_bios) &&
77 if (!get_pages(sbi, F2FS_WRITEBACK) && wq_has_sleeper(&sbi->cp_wait)) 76 wq_has_sleeper(&sbi->cp_wait))
78 wake_up(&sbi->cp_wait); 77 wake_up(&sbi->cp_wait);
79 78
80 bio_put(bio); 79 bio_put(bio);
@@ -98,6 +97,14 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
98 return bio; 97 return bio;
99} 98}
100 99
100static inline void __submit_bio(struct f2fs_sb_info *sbi, int rw,
101 struct bio *bio)
102{
103 if (!is_read_io(rw))
104 atomic_inc(&sbi->nr_wb_bios);
105 submit_bio(rw, bio);
106}
107
101static void __submit_merged_bio(struct f2fs_bio_info *io) 108static void __submit_merged_bio(struct f2fs_bio_info *io)
102{ 109{
103 struct f2fs_io_info *fio = &io->fio; 110 struct f2fs_io_info *fio = &io->fio;
@@ -110,7 +117,7 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
110 else 117 else
111 trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio); 118 trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio);
112 119
113 submit_bio(fio->rw, io->bio); 120 __submit_bio(io->sbi, fio->rw, io->bio);
114 io->bio = NULL; 121 io->bio = NULL;
115} 122}
116 123
@@ -228,7 +235,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
228 return -EFAULT; 235 return -EFAULT;
229 } 236 }
230 237
231 submit_bio(fio->rw, bio); 238 __submit_bio(fio->sbi, fio->rw, bio);
232 return 0; 239 return 0;
233} 240}
234 241
@@ -248,9 +255,6 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
248 255
249 down_write(&io->io_rwsem); 256 down_write(&io->io_rwsem);
250 257
251 if (!is_read)
252 inc_page_count(sbi, F2FS_WRITEBACK);
253
254 if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 || 258 if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
255 io->fio.rw != fio->rw)) 259 io->fio.rw != fio->rw))
256 __submit_merged_bio(io); 260 __submit_merged_bio(io);
@@ -278,6 +282,16 @@ alloc_new:
278 trace_f2fs_submit_page_mbio(fio->page, fio); 282 trace_f2fs_submit_page_mbio(fio->page, fio);
279} 283}
280 284
285static void __set_data_blkaddr(struct dnode_of_data *dn)
286{
287 struct f2fs_node *rn = F2FS_NODE(dn->node_page);
288 __le32 *addr_array;
289
290 /* Get physical address of data block */
291 addr_array = blkaddr_in_node(rn);
292 addr_array[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
293}
294
281/* 295/*
282 * Lock ordering for the change of data block address: 296 * Lock ordering for the change of data block address:
283 * ->data_page 297 * ->data_page
@@ -286,19 +300,9 @@ alloc_new:
286 */ 300 */
287void set_data_blkaddr(struct dnode_of_data *dn) 301void set_data_blkaddr(struct dnode_of_data *dn)
288{ 302{
289 struct f2fs_node *rn; 303 f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
290 __le32 *addr_array; 304 __set_data_blkaddr(dn);
291 struct page *node_page = dn->node_page; 305 if (set_page_dirty(dn->node_page))
292 unsigned int ofs_in_node = dn->ofs_in_node;
293
294 f2fs_wait_on_page_writeback(node_page, NODE, true);
295
296 rn = F2FS_NODE(node_page);
297
298 /* Get physical address of data block */
299 addr_array = blkaddr_in_node(rn);
300 addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
301 if (set_page_dirty(node_page))
302 dn->node_changed = true; 306 dn->node_changed = true;
303} 307}
304 308
@@ -309,24 +313,53 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
309 f2fs_update_extent_cache(dn); 313 f2fs_update_extent_cache(dn);
310} 314}
311 315
312int reserve_new_block(struct dnode_of_data *dn) 316/* dn->ofs_in_node will be returned with up-to-date last block pointer */
317int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
313{ 318{
314 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 319 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
315 320
321 if (!count)
322 return 0;
323
316 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 324 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
317 return -EPERM; 325 return -EPERM;
318 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) 326 if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count)))
319 return -ENOSPC; 327 return -ENOSPC;
320 328
321 trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); 329 trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
330 dn->ofs_in_node, count);
331
332 f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
333
334 for (; count > 0; dn->ofs_in_node++) {
335 block_t blkaddr =
336 datablock_addr(dn->node_page, dn->ofs_in_node);
337 if (blkaddr == NULL_ADDR) {
338 dn->data_blkaddr = NEW_ADDR;
339 __set_data_blkaddr(dn);
340 count--;
341 }
342 }
343
344 if (set_page_dirty(dn->node_page))
345 dn->node_changed = true;
322 346
323 dn->data_blkaddr = NEW_ADDR;
324 set_data_blkaddr(dn);
325 mark_inode_dirty(dn->inode); 347 mark_inode_dirty(dn->inode);
326 sync_inode_page(dn); 348 sync_inode_page(dn);
327 return 0; 349 return 0;
328} 350}
329 351
352/* Should keep dn->ofs_in_node unchanged */
353int reserve_new_block(struct dnode_of_data *dn)
354{
355 unsigned int ofs_in_node = dn->ofs_in_node;
356 int ret;
357
358 ret = reserve_new_blocks(dn, 1);
359 dn->ofs_in_node = ofs_in_node;
360 return ret;
361}
362
330int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) 363int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
331{ 364{
332 bool need_put = dn->inode_page ? false : true; 365 bool need_put = dn->inode_page ? false : true;
@@ -545,6 +578,7 @@ static int __allocate_data_block(struct dnode_of_data *dn)
545 struct node_info ni; 578 struct node_info ni;
546 int seg = CURSEG_WARM_DATA; 579 int seg = CURSEG_WARM_DATA;
547 pgoff_t fofs; 580 pgoff_t fofs;
581 blkcnt_t count = 1;
548 582
549 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 583 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
550 return -EPERM; 584 return -EPERM;
@@ -553,7 +587,7 @@ static int __allocate_data_block(struct dnode_of_data *dn)
553 if (dn->data_blkaddr == NEW_ADDR) 587 if (dn->data_blkaddr == NEW_ADDR)
554 goto alloc; 588 goto alloc;
555 589
556 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) 590 if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count)))
557 return -ENOSPC; 591 return -ENOSPC;
558 592
559alloc: 593alloc:
@@ -582,8 +616,8 @@ ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
582 struct f2fs_map_blocks map; 616 struct f2fs_map_blocks map;
583 ssize_t ret = 0; 617 ssize_t ret = 0;
584 618
585 map.m_lblk = F2FS_BYTES_TO_BLK(iocb->ki_pos); 619 map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
586 map.m_len = F2FS_BLK_ALIGN(iov_iter_count(from)); 620 map.m_len = F2FS_BYTES_TO_BLK(iov_iter_count(from));
587 map.m_next_pgofs = NULL; 621 map.m_next_pgofs = NULL;
588 622
589 if (f2fs_encrypted_inode(inode)) 623 if (f2fs_encrypted_inode(inode))
@@ -621,8 +655,10 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
621 struct dnode_of_data dn; 655 struct dnode_of_data dn;
622 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 656 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
623 int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; 657 int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
624 pgoff_t pgofs, end_offset; 658 pgoff_t pgofs, end_offset, end;
625 int err = 0, ofs = 1; 659 int err = 0, ofs = 1;
660 unsigned int ofs_in_node, last_ofs_in_node;
661 blkcnt_t prealloc;
626 struct extent_info ei; 662 struct extent_info ei;
627 bool allocated = false; 663 bool allocated = false;
628 block_t blkaddr; 664 block_t blkaddr;
@@ -632,6 +668,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
632 668
633 /* it only supports block size == page size */ 669 /* it only supports block size == page size */
634 pgofs = (pgoff_t)map->m_lblk; 670 pgofs = (pgoff_t)map->m_lblk;
671 end = pgofs + maxblocks;
635 672
636 if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) { 673 if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
637 map->m_pblk = ei.blk + pgofs - ei.fofs; 674 map->m_pblk = ei.blk + pgofs - ei.fofs;
@@ -648,6 +685,8 @@ next_dnode:
648 set_new_dnode(&dn, inode, NULL, NULL, 0); 685 set_new_dnode(&dn, inode, NULL, NULL, 0);
649 err = get_dnode_of_data(&dn, pgofs, mode); 686 err = get_dnode_of_data(&dn, pgofs, mode);
650 if (err) { 687 if (err) {
688 if (flag == F2FS_GET_BLOCK_BMAP)
689 map->m_pblk = 0;
651 if (err == -ENOENT) { 690 if (err == -ENOENT) {
652 err = 0; 691 err = 0;
653 if (map->m_next_pgofs) 692 if (map->m_next_pgofs)
@@ -657,6 +696,8 @@ next_dnode:
657 goto unlock_out; 696 goto unlock_out;
658 } 697 }
659 698
699 prealloc = 0;
700 ofs_in_node = dn.ofs_in_node;
660 end_offset = ADDRS_PER_PAGE(dn.node_page, inode); 701 end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
661 702
662next_block: 703next_block:
@@ -669,31 +710,41 @@ next_block:
669 goto sync_out; 710 goto sync_out;
670 } 711 }
671 if (flag == F2FS_GET_BLOCK_PRE_AIO) { 712 if (flag == F2FS_GET_BLOCK_PRE_AIO) {
672 if (blkaddr == NULL_ADDR) 713 if (blkaddr == NULL_ADDR) {
673 err = reserve_new_block(&dn); 714 prealloc++;
715 last_ofs_in_node = dn.ofs_in_node;
716 }
674 } else { 717 } else {
675 err = __allocate_data_block(&dn); 718 err = __allocate_data_block(&dn);
719 if (!err) {
720 set_inode_flag(F2FS_I(inode),
721 FI_APPEND_WRITE);
722 allocated = true;
723 }
676 } 724 }
677 if (err) 725 if (err)
678 goto sync_out; 726 goto sync_out;
679 allocated = true;
680 map->m_flags = F2FS_MAP_NEW; 727 map->m_flags = F2FS_MAP_NEW;
681 blkaddr = dn.data_blkaddr; 728 blkaddr = dn.data_blkaddr;
682 } else { 729 } else {
730 if (flag == F2FS_GET_BLOCK_BMAP) {
731 map->m_pblk = 0;
732 goto sync_out;
733 }
683 if (flag == F2FS_GET_BLOCK_FIEMAP && 734 if (flag == F2FS_GET_BLOCK_FIEMAP &&
684 blkaddr == NULL_ADDR) { 735 blkaddr == NULL_ADDR) {
685 if (map->m_next_pgofs) 736 if (map->m_next_pgofs)
686 *map->m_next_pgofs = pgofs + 1; 737 *map->m_next_pgofs = pgofs + 1;
687 } 738 }
688 if (flag != F2FS_GET_BLOCK_FIEMAP || 739 if (flag != F2FS_GET_BLOCK_FIEMAP ||
689 blkaddr != NEW_ADDR) { 740 blkaddr != NEW_ADDR)
690 if (flag == F2FS_GET_BLOCK_BMAP)
691 err = -ENOENT;
692 goto sync_out; 741 goto sync_out;
693 }
694 } 742 }
695 } 743 }
696 744
745 if (flag == F2FS_GET_BLOCK_PRE_AIO)
746 goto skip;
747
697 if (map->m_len == 0) { 748 if (map->m_len == 0) {
698 /* preallocated unwritten block should be mapped for fiemap. */ 749 /* preallocated unwritten block should be mapped for fiemap. */
699 if (blkaddr == NEW_ADDR) 750 if (blkaddr == NEW_ADDR)
@@ -705,32 +756,49 @@ next_block:
705 } else if ((map->m_pblk != NEW_ADDR && 756 } else if ((map->m_pblk != NEW_ADDR &&
706 blkaddr == (map->m_pblk + ofs)) || 757 blkaddr == (map->m_pblk + ofs)) ||
707 (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) || 758 (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
708 flag == F2FS_GET_BLOCK_PRE_DIO || 759 flag == F2FS_GET_BLOCK_PRE_DIO) {
709 flag == F2FS_GET_BLOCK_PRE_AIO) {
710 ofs++; 760 ofs++;
711 map->m_len++; 761 map->m_len++;
712 } else { 762 } else {
713 goto sync_out; 763 goto sync_out;
714 } 764 }
715 765
766skip:
716 dn.ofs_in_node++; 767 dn.ofs_in_node++;
717 pgofs++; 768 pgofs++;
718 769
719 if (map->m_len < maxblocks) { 770 /* preallocate blocks in batch for one dnode page */
720 if (dn.ofs_in_node < end_offset) 771 if (flag == F2FS_GET_BLOCK_PRE_AIO &&
721 goto next_block; 772 (pgofs == end || dn.ofs_in_node == end_offset)) {
722 773
723 if (allocated) 774 dn.ofs_in_node = ofs_in_node;
724 sync_inode_page(&dn); 775 err = reserve_new_blocks(&dn, prealloc);
725 f2fs_put_dnode(&dn); 776 if (err)
777 goto sync_out;
726 778
727 if (create) { 779 map->m_len += dn.ofs_in_node - ofs_in_node;
728 f2fs_unlock_op(sbi); 780 if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
729 f2fs_balance_fs(sbi, allocated); 781 err = -ENOSPC;
782 goto sync_out;
730 } 783 }
731 allocated = false; 784 dn.ofs_in_node = end_offset;
732 goto next_dnode; 785 }
786
787 if (pgofs >= end)
788 goto sync_out;
789 else if (dn.ofs_in_node < end_offset)
790 goto next_block;
791
792 if (allocated)
793 sync_inode_page(&dn);
794 f2fs_put_dnode(&dn);
795
796 if (create) {
797 f2fs_unlock_op(sbi);
798 f2fs_balance_fs(sbi, allocated);
733 } 799 }
800 allocated = false;
801 goto next_dnode;
734 802
735sync_out: 803sync_out:
736 if (allocated) 804 if (allocated)
@@ -983,7 +1051,7 @@ got_it:
983 */ 1051 */
984 if (bio && (last_block_in_bio != block_nr - 1)) { 1052 if (bio && (last_block_in_bio != block_nr - 1)) {
985submit_and_realloc: 1053submit_and_realloc:
986 submit_bio(READ, bio); 1054 __submit_bio(F2FS_I_SB(inode), READ, bio);
987 bio = NULL; 1055 bio = NULL;
988 } 1056 }
989 if (bio == NULL) { 1057 if (bio == NULL) {
@@ -1026,7 +1094,7 @@ set_error_page:
1026 goto next_page; 1094 goto next_page;
1027confused: 1095confused:
1028 if (bio) { 1096 if (bio) {
1029 submit_bio(READ, bio); 1097 __submit_bio(F2FS_I_SB(inode), READ, bio);
1030 bio = NULL; 1098 bio = NULL;
1031 } 1099 }
1032 unlock_page(page); 1100 unlock_page(page);
@@ -1036,7 +1104,7 @@ next_page:
1036 } 1104 }
1037 BUG_ON(pages && !list_empty(pages)); 1105 BUG_ON(pages && !list_empty(pages));
1038 if (bio) 1106 if (bio)
1039 submit_bio(READ, bio); 1107 __submit_bio(F2FS_I_SB(inode), READ, bio);
1040 return 0; 1108 return 0;
1041} 1109}
1042 1110
@@ -1177,8 +1245,10 @@ write:
1177 goto redirty_out; 1245 goto redirty_out;
1178 if (f2fs_is_drop_cache(inode)) 1246 if (f2fs_is_drop_cache(inode))
1179 goto out; 1247 goto out;
1180 if (f2fs_is_volatile_file(inode) && !wbc->for_reclaim && 1248 /* we should not write 0'th page having journal header */
1181 available_free_memory(sbi, BASE_CHECK)) 1249 if (f2fs_is_volatile_file(inode) && (!page->index ||
1250 (!wbc->for_reclaim &&
1251 available_free_memory(sbi, BASE_CHECK))))
1182 goto redirty_out; 1252 goto redirty_out;
1183 1253
1184 /* Dentry blocks are controlled by checkpoint */ 1254 /* Dentry blocks are controlled by checkpoint */
@@ -1480,7 +1550,8 @@ restart:
1480 if (pos + len <= MAX_INLINE_DATA) { 1550 if (pos + len <= MAX_INLINE_DATA) {
1481 read_inline_data(page, ipage); 1551 read_inline_data(page, ipage);
1482 set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); 1552 set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
1483 set_inline_node(ipage); 1553 if (inode->i_nlink)
1554 set_inline_node(ipage);
1484 } else { 1555 } else {
1485 err = f2fs_convert_inline_page(&dn, page); 1556 err = f2fs_convert_inline_page(&dn, page);
1486 if (err) 1557 if (err)
@@ -1496,7 +1567,7 @@ restart:
1496 } else { 1567 } else {
1497 /* hole case */ 1568 /* hole case */
1498 err = get_dnode_of_data(&dn, index, LOOKUP_NODE); 1569 err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
1499 if (err || (!err && dn.data_blkaddr == NULL_ADDR)) { 1570 if (err || dn.data_blkaddr == NULL_ADDR) {
1500 f2fs_put_dnode(&dn); 1571 f2fs_put_dnode(&dn);
1501 f2fs_lock_op(sbi); 1572 f2fs_lock_op(sbi);
1502 locked = true; 1573 locked = true;
@@ -1683,8 +1754,12 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
1683 trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); 1754 trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
1684 1755
1685 err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); 1756 err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio);
1686 if (err < 0 && iov_iter_rw(iter) == WRITE) 1757 if (iov_iter_rw(iter) == WRITE) {
1687 f2fs_write_failed(mapping, offset + count); 1758 if (err > 0)
1759 set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
1760 else if (err < 0)
1761 f2fs_write_failed(mapping, offset + count);
1762 }
1688 1763
1689 trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err); 1764 trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err);
1690 1765
@@ -1714,6 +1789,7 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
1714 if (IS_ATOMIC_WRITTEN_PAGE(page)) 1789 if (IS_ATOMIC_WRITTEN_PAGE(page))
1715 return; 1790 return;
1716 1791
1792 set_page_private(page, 0);
1717 ClearPagePrivate(page); 1793 ClearPagePrivate(page);
1718} 1794}
1719 1795
@@ -1727,6 +1803,7 @@ int f2fs_release_page(struct page *page, gfp_t wait)
1727 if (IS_ATOMIC_WRITTEN_PAGE(page)) 1803 if (IS_ATOMIC_WRITTEN_PAGE(page))
1728 return 0; 1804 return 0;
1729 1805
1806 set_page_private(page, 0);
1730 ClearPagePrivate(page); 1807 ClearPagePrivate(page);
1731 return 1; 1808 return 1;
1732} 1809}
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index f4a61a5ff79f..d89a425055d0 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -48,7 +48,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
48 si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE]; 48 si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
49 si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; 49 si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
50 si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); 50 si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
51 si->wb_pages = get_pages(sbi, F2FS_WRITEBACK); 51 si->wb_bios = atomic_read(&sbi->nr_wb_bios);
52 si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; 52 si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
53 si->rsvd_segs = reserved_segments(sbi); 53 si->rsvd_segs = reserved_segments(sbi);
54 si->overp_segs = overprovision_segments(sbi); 54 si->overp_segs = overprovision_segments(sbi);
@@ -58,6 +58,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
58 si->inline_xattr = atomic_read(&sbi->inline_xattr); 58 si->inline_xattr = atomic_read(&sbi->inline_xattr);
59 si->inline_inode = atomic_read(&sbi->inline_inode); 59 si->inline_inode = atomic_read(&sbi->inline_inode);
60 si->inline_dir = atomic_read(&sbi->inline_dir); 60 si->inline_dir = atomic_read(&sbi->inline_dir);
61 si->orphans = sbi->im[ORPHAN_INO].ino_num;
61 si->utilization = utilization(sbi); 62 si->utilization = utilization(sbi);
62 63
63 si->free_segs = free_segments(sbi); 64 si->free_segs = free_segments(sbi);
@@ -143,6 +144,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
143 si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; 144 si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize;
144 si->base_mem += 2 * sizeof(struct f2fs_inode_info); 145 si->base_mem += 2 * sizeof(struct f2fs_inode_info);
145 si->base_mem += sizeof(*sbi->ckpt); 146 si->base_mem += sizeof(*sbi->ckpt);
147 si->base_mem += sizeof(struct percpu_counter) * NR_COUNT_TYPE;
146 148
147 /* build sm */ 149 /* build sm */
148 si->base_mem += sizeof(struct f2fs_sm_info); 150 si->base_mem += sizeof(struct f2fs_sm_info);
@@ -192,7 +194,7 @@ get_cache:
192 si->cache_mem += NM_I(sbi)->dirty_nat_cnt * 194 si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
193 sizeof(struct nat_entry_set); 195 sizeof(struct nat_entry_set);
194 si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); 196 si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
195 for (i = 0; i <= UPDATE_INO; i++) 197 for (i = 0; i <= ORPHAN_INO; i++)
196 si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); 198 si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
197 si->cache_mem += atomic_read(&sbi->total_ext_tree) * 199 si->cache_mem += atomic_read(&sbi->total_ext_tree) *
198 sizeof(struct extent_tree); 200 sizeof(struct extent_tree);
@@ -216,8 +218,9 @@ static int stat_show(struct seq_file *s, void *v)
216 list_for_each_entry(si, &f2fs_stat_list, stat_list) { 218 list_for_each_entry(si, &f2fs_stat_list, stat_list) {
217 update_general_status(si->sbi); 219 update_general_status(si->sbi);
218 220
219 seq_printf(s, "\n=====[ partition info(%pg). #%d ]=====\n", 221 seq_printf(s, "\n=====[ partition info(%pg). #%d, %s]=====\n",
220 si->sbi->sb->s_bdev, i++); 222 si->sbi->sb->s_bdev, i++,
223 f2fs_readonly(si->sbi->sb) ? "RO": "RW");
221 seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ", 224 seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
222 si->sit_area_segs, si->nat_area_segs); 225 si->sit_area_segs, si->nat_area_segs);
223 seq_printf(s, "[SSA: %d] [MAIN: %d", 226 seq_printf(s, "[SSA: %d] [MAIN: %d",
@@ -237,6 +240,8 @@ static int stat_show(struct seq_file *s, void *v)
237 si->inline_inode); 240 si->inline_inode);
238 seq_printf(s, " - Inline_dentry Inode: %u\n", 241 seq_printf(s, " - Inline_dentry Inode: %u\n",
239 si->inline_dir); 242 si->inline_dir);
243 seq_printf(s, " - Orphan Inode: %u\n",
244 si->orphans);
240 seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", 245 seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
241 si->main_area_segs, si->main_area_sections, 246 si->main_area_segs, si->main_area_sections,
242 si->main_area_zones); 247 si->main_area_zones);
@@ -295,15 +300,15 @@ static int stat_show(struct seq_file *s, void *v)
295 seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", 300 seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
296 si->ext_tree, si->zombie_tree, si->ext_node); 301 si->ext_tree, si->zombie_tree, si->ext_node);
297 seq_puts(s, "\nBalancing F2FS Async:\n"); 302 seq_puts(s, "\nBalancing F2FS Async:\n");
298 seq_printf(s, " - inmem: %4d, wb: %4d\n", 303 seq_printf(s, " - inmem: %4lld, wb_bios: %4d\n",
299 si->inmem_pages, si->wb_pages); 304 si->inmem_pages, si->wb_bios);
300 seq_printf(s, " - nodes: %4d in %4d\n", 305 seq_printf(s, " - nodes: %4lld in %4d\n",
301 si->ndirty_node, si->node_pages); 306 si->ndirty_node, si->node_pages);
302 seq_printf(s, " - dents: %4d in dirs:%4d\n", 307 seq_printf(s, " - dents: %4lld in dirs:%4d\n",
303 si->ndirty_dent, si->ndirty_dirs); 308 si->ndirty_dent, si->ndirty_dirs);
304 seq_printf(s, " - datas: %4d in files:%4d\n", 309 seq_printf(s, " - datas: %4lld in files:%4d\n",
305 si->ndirty_data, si->ndirty_files); 310 si->ndirty_data, si->ndirty_files);
306 seq_printf(s, " - meta: %4d in %4d\n", 311 seq_printf(s, " - meta: %4lld in %4d\n",
307 si->ndirty_meta, si->meta_pages); 312 si->ndirty_meta, si->meta_pages);
308 seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", 313 seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n",
309 si->dirty_nats, si->nats, si->dirty_sits, si->sits); 314 si->dirty_nats, si->nats, si->dirty_sits, si->sits);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 9e4615146d13..f9313f684540 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -48,7 +48,6 @@ unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
48 [F2FS_FT_SYMLINK] = DT_LNK, 48 [F2FS_FT_SYMLINK] = DT_LNK,
49}; 49};
50 50
51#define S_SHIFT 12
52static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { 51static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
53 [S_IFREG >> S_SHIFT] = F2FS_FT_REG_FILE, 52 [S_IFREG >> S_SHIFT] = F2FS_FT_REG_FILE,
54 [S_IFDIR >> S_SHIFT] = F2FS_FT_DIR, 53 [S_IFDIR >> S_SHIFT] = F2FS_FT_DIR,
@@ -64,6 +63,13 @@ void set_de_type(struct f2fs_dir_entry *de, umode_t mode)
64 de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; 63 de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
65} 64}
66 65
66unsigned char get_de_type(struct f2fs_dir_entry *de)
67{
68 if (de->file_type < F2FS_FT_MAX)
69 return f2fs_filetype_table[de->file_type];
70 return DT_UNKNOWN;
71}
72
67static unsigned long dir_block_index(unsigned int level, 73static unsigned long dir_block_index(unsigned int level,
68 int dir_level, unsigned int idx) 74 int dir_level, unsigned int idx)
69{ 75{
@@ -95,11 +101,6 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
95 else 101 else
96 kunmap(dentry_page); 102 kunmap(dentry_page);
97 103
98 /*
99 * For the most part, it should be a bug when name_len is zero.
100 * We stop here for figuring out where the bugs has occurred.
101 */
102 f2fs_bug_on(F2FS_P_SB(dentry_page), d.max < 0);
103 return de; 104 return de;
104} 105}
105 106
@@ -124,6 +125,11 @@ struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
124 125
125 de = &d->dentry[bit_pos]; 126 de = &d->dentry[bit_pos];
126 127
128 if (unlikely(!de->name_len)) {
129 bit_pos++;
130 continue;
131 }
132
127 /* encrypted case */ 133 /* encrypted case */
128 de_name.name = d->filename[bit_pos]; 134 de_name.name = d->filename[bit_pos];
129 de_name.len = le16_to_cpu(de->name_len); 135 de_name.len = le16_to_cpu(de->name_len);
@@ -141,10 +147,6 @@ struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
141 *max_slots = max_len; 147 *max_slots = max_len;
142 max_len = 0; 148 max_len = 0;
143 149
144 /* remain bug on condition */
145 if (unlikely(!de->name_len))
146 d->max = -1;
147
148 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); 150 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
149 } 151 }
150 152
@@ -389,9 +391,14 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
389 return page; 391 return page;
390 392
391 if (S_ISDIR(inode->i_mode)) { 393 if (S_ISDIR(inode->i_mode)) {
394 /* in order to handle error case */
395 get_page(page);
392 err = make_empty_dir(inode, dir, page); 396 err = make_empty_dir(inode, dir, page);
393 if (err) 397 if (err) {
394 goto error; 398 lock_page(page);
399 goto put_error;
400 }
401 put_page(page);
395 } 402 }
396 403
397 err = f2fs_init_acl(inode, dir, page, dpage); 404 err = f2fs_init_acl(inode, dir, page, dpage);
@@ -435,13 +442,12 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
435 return page; 442 return page;
436 443
437put_error: 444put_error:
438 f2fs_put_page(page, 1); 445 /* truncate empty dir pages */
439error:
440 /* once the failed inode becomes a bad inode, i_mode is S_IFREG */
441 truncate_inode_pages(&inode->i_data, 0); 446 truncate_inode_pages(&inode->i_data, 0);
442 truncate_blocks(inode, 0, false); 447
443 remove_dirty_inode(inode); 448 clear_nlink(inode);
444 remove_inode_page(inode); 449 update_inode(inode, page);
450 f2fs_put_page(page, 1);
445 return ERR_PTR(err); 451 return ERR_PTR(err);
446} 452}
447 453
@@ -509,11 +515,7 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
509 } 515 }
510} 516}
511 517
512/* 518int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
513 * Caller should grab and release a rwsem by calling f2fs_lock_op() and
514 * f2fs_unlock_op().
515 */
516int __f2fs_add_link(struct inode *dir, const struct qstr *name,
517 struct inode *inode, nid_t ino, umode_t mode) 519 struct inode *inode, nid_t ino, umode_t mode)
518{ 520{
519 unsigned int bit_pos; 521 unsigned int bit_pos;
@@ -526,28 +528,11 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
526 struct f2fs_dentry_block *dentry_blk = NULL; 528 struct f2fs_dentry_block *dentry_blk = NULL;
527 struct f2fs_dentry_ptr d; 529 struct f2fs_dentry_ptr d;
528 struct page *page = NULL; 530 struct page *page = NULL;
529 struct fscrypt_name fname; 531 int slots, err = 0;
530 struct qstr new_name;
531 int slots, err;
532
533 err = fscrypt_setup_filename(dir, name, 0, &fname);
534 if (err)
535 return err;
536
537 new_name.name = fname_name(&fname);
538 new_name.len = fname_len(&fname);
539
540 if (f2fs_has_inline_dentry(dir)) {
541 err = f2fs_add_inline_entry(dir, &new_name, inode, ino, mode);
542 if (!err || err != -EAGAIN)
543 goto out;
544 else
545 err = 0;
546 }
547 532
548 level = 0; 533 level = 0;
549 slots = GET_DENTRY_SLOTS(new_name.len); 534 slots = GET_DENTRY_SLOTS(new_name->len);
550 dentry_hash = f2fs_dentry_hash(&new_name); 535 dentry_hash = f2fs_dentry_hash(new_name);
551 536
552 current_depth = F2FS_I(dir)->i_current_depth; 537 current_depth = F2FS_I(dir)->i_current_depth;
553 if (F2FS_I(dir)->chash == dentry_hash) { 538 if (F2FS_I(dir)->chash == dentry_hash) {
@@ -556,10 +541,12 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
556 } 541 }
557 542
558start: 543start:
559 if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) { 544#ifdef CONFIG_F2FS_FAULT_INJECTION
560 err = -ENOSPC; 545 if (time_to_inject(FAULT_DIR_DEPTH))
561 goto out; 546 return -ENOSPC;
562 } 547#endif
548 if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
549 return -ENOSPC;
563 550
564 /* Increase the depth, if required */ 551 /* Increase the depth, if required */
565 if (level == current_depth) 552 if (level == current_depth)
@@ -573,10 +560,8 @@ start:
573 560
574 for (block = bidx; block <= (bidx + nblock - 1); block++) { 561 for (block = bidx; block <= (bidx + nblock - 1); block++) {
575 dentry_page = get_new_data_page(dir, NULL, block, true); 562 dentry_page = get_new_data_page(dir, NULL, block, true);
576 if (IS_ERR(dentry_page)) { 563 if (IS_ERR(dentry_page))
577 err = PTR_ERR(dentry_page); 564 return PTR_ERR(dentry_page);
578 goto out;
579 }
580 565
581 dentry_blk = kmap(dentry_page); 566 dentry_blk = kmap(dentry_page);
582 bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, 567 bit_pos = room_for_filename(&dentry_blk->dentry_bitmap,
@@ -596,7 +581,7 @@ add_dentry:
596 581
597 if (inode) { 582 if (inode) {
598 down_write(&F2FS_I(inode)->i_sem); 583 down_write(&F2FS_I(inode)->i_sem);
599 page = init_inode_metadata(inode, dir, &new_name, NULL); 584 page = init_inode_metadata(inode, dir, new_name, NULL);
600 if (IS_ERR(page)) { 585 if (IS_ERR(page)) {
601 err = PTR_ERR(page); 586 err = PTR_ERR(page);
602 goto fail; 587 goto fail;
@@ -606,7 +591,7 @@ add_dentry:
606 } 591 }
607 592
608 make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); 593 make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1);
609 f2fs_update_dentry(ino, mode, &d, &new_name, dentry_hash, bit_pos); 594 f2fs_update_dentry(ino, mode, &d, new_name, dentry_hash, bit_pos);
610 595
611 set_page_dirty(dentry_page); 596 set_page_dirty(dentry_page);
612 597
@@ -628,7 +613,34 @@ fail:
628 } 613 }
629 kunmap(dentry_page); 614 kunmap(dentry_page);
630 f2fs_put_page(dentry_page, 1); 615 f2fs_put_page(dentry_page, 1);
631out: 616
617 return err;
618}
619
620/*
621 * Caller should grab and release a rwsem by calling f2fs_lock_op() and
622 * f2fs_unlock_op().
623 */
624int __f2fs_add_link(struct inode *dir, const struct qstr *name,
625 struct inode *inode, nid_t ino, umode_t mode)
626{
627 struct fscrypt_name fname;
628 struct qstr new_name;
629 int err;
630
631 err = fscrypt_setup_filename(dir, name, 0, &fname);
632 if (err)
633 return err;
634
635 new_name.name = fname_name(&fname);
636 new_name.len = fname_len(&fname);
637
638 err = -EAGAIN;
639 if (f2fs_has_inline_dentry(dir))
640 err = f2fs_add_inline_entry(dir, &new_name, inode, ino, mode);
641 if (err == -EAGAIN)
642 err = f2fs_add_regular_entry(dir, &new_name, inode, ino, mode);
643
632 fscrypt_free_filename(&fname); 644 fscrypt_free_filename(&fname);
633 f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); 645 f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
634 return err; 646 return err;
@@ -792,10 +804,7 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
792 continue; 804 continue;
793 } 805 }
794 806
795 if (de->file_type < F2FS_FT_MAX) 807 d_type = get_de_type(de);
796 d_type = f2fs_filetype_table[de->file_type];
797 else
798 d_type = DT_UNKNOWN;
799 808
800 de_name.name = d->filename[bit_pos]; 809 de_name.name = d->filename[bit_pos];
801 de_name.len = le16_to_cpu(de->name_len); 810 de_name.len = le16_to_cpu(de->name_len);
@@ -804,7 +813,7 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
804 int save_len = fstr->len; 813 int save_len = fstr->len;
805 int ret; 814 int ret;
806 815
807 de_name.name = kmalloc(de_name.len, GFP_NOFS); 816 de_name.name = f2fs_kmalloc(de_name.len, GFP_NOFS);
808 if (!de_name.name) 817 if (!de_name.name)
809 return false; 818 return false;
810 819
@@ -887,6 +896,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
887 kunmap(dentry_page); 896 kunmap(dentry_page);
888 f2fs_put_page(dentry_page, 1); 897 f2fs_put_page(dentry_page, 1);
889 } 898 }
899 err = 0;
890out: 900out:
891 fscrypt_fname_free_buffer(&fstr); 901 fscrypt_fname_free_buffer(&fstr);
892 return err; 902 return err;
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index c859bb044728..5bfcdb9b69f2 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -196,8 +196,7 @@ bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
196 if (!i_ext || !i_ext->len) 196 if (!i_ext || !i_ext->len)
197 return false; 197 return false;
198 198
199 set_extent_info(&ei, le32_to_cpu(i_ext->fofs), 199 get_extent_info(&ei, i_ext);
200 le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
201 200
202 write_lock(&et->lock); 201 write_lock(&et->lock);
203 if (atomic_read(&et->node_cnt)) 202 if (atomic_read(&et->node_cnt))
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7a4558d17f36..916e7c238e3d 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -37,6 +37,57 @@
37 } while (0) 37 } while (0)
38#endif 38#endif
39 39
40#ifdef CONFIG_F2FS_FAULT_INJECTION
41enum {
42 FAULT_KMALLOC,
43 FAULT_PAGE_ALLOC,
44 FAULT_ALLOC_NID,
45 FAULT_ORPHAN,
46 FAULT_BLOCK,
47 FAULT_DIR_DEPTH,
48 FAULT_MAX,
49};
50
51struct f2fs_fault_info {
52 atomic_t inject_ops;
53 unsigned int inject_rate;
54 unsigned int inject_type;
55};
56
57extern struct f2fs_fault_info f2fs_fault;
58extern char *fault_name[FAULT_MAX];
59#define IS_FAULT_SET(type) (f2fs_fault.inject_type & (1 << (type)))
60
61static inline bool time_to_inject(int type)
62{
63 if (!f2fs_fault.inject_rate)
64 return false;
65 if (type == FAULT_KMALLOC && !IS_FAULT_SET(type))
66 return false;
67 else if (type == FAULT_PAGE_ALLOC && !IS_FAULT_SET(type))
68 return false;
69 else if (type == FAULT_ALLOC_NID && !IS_FAULT_SET(type))
70 return false;
71 else if (type == FAULT_ORPHAN && !IS_FAULT_SET(type))
72 return false;
73 else if (type == FAULT_BLOCK && !IS_FAULT_SET(type))
74 return false;
75 else if (type == FAULT_DIR_DEPTH && !IS_FAULT_SET(type))
76 return false;
77
78 atomic_inc(&f2fs_fault.inject_ops);
79 if (atomic_read(&f2fs_fault.inject_ops) >= f2fs_fault.inject_rate) {
80 atomic_set(&f2fs_fault.inject_ops, 0);
81 printk("%sF2FS-fs : inject %s in %pF\n",
82 KERN_INFO,
83 fault_name[type],
84 __builtin_return_address(0));
85 return true;
86 }
87 return false;
88}
89#endif
90
40/* 91/*
41 * For mount options 92 * For mount options
42 */ 93 */
@@ -56,6 +107,7 @@
56#define F2FS_MOUNT_EXTENT_CACHE 0x00002000 107#define F2FS_MOUNT_EXTENT_CACHE 0x00002000
57#define F2FS_MOUNT_FORCE_FG_GC 0x00004000 108#define F2FS_MOUNT_FORCE_FG_GC 0x00004000
58#define F2FS_MOUNT_DATA_FLUSH 0x00008000 109#define F2FS_MOUNT_DATA_FLUSH 0x00008000
110#define F2FS_MOUNT_FAULT_INJECTION 0x00010000
59 111
60#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) 112#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
61#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) 113#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -159,7 +211,6 @@ struct fsync_inode_entry {
159 struct inode *inode; /* vfs inode pointer */ 211 struct inode *inode; /* vfs inode pointer */
160 block_t blkaddr; /* block address locating the last fsync */ 212 block_t blkaddr; /* block address locating the last fsync */
161 block_t last_dentry; /* block address locating the last dentry */ 213 block_t last_dentry; /* block address locating the last dentry */
162 block_t last_inode; /* block address locating the last inode */
163}; 214};
164 215
165#define nats_in_cursum(jnl) (le16_to_cpu(jnl->n_nats)) 216#define nats_in_cursum(jnl) (le16_to_cpu(jnl->n_nats))
@@ -385,7 +436,7 @@ struct f2fs_inode_info {
385 /* Use below internally in f2fs*/ 436 /* Use below internally in f2fs*/
386 unsigned long flags; /* use to pass per-file flags */ 437 unsigned long flags; /* use to pass per-file flags */
387 struct rw_semaphore i_sem; /* protect fi info */ 438 struct rw_semaphore i_sem; /* protect fi info */
388 atomic_t dirty_pages; /* # of dirty pages */ 439 struct percpu_counter dirty_pages; /* # of dirty pages */
389 f2fs_hash_t chash; /* hash value of given file name */ 440 f2fs_hash_t chash; /* hash value of given file name */
390 unsigned int clevel; /* maximum level of given file name */ 441 unsigned int clevel; /* maximum level of given file name */
391 nid_t i_xattr_nid; /* node id that contains xattrs */ 442 nid_t i_xattr_nid; /* node id that contains xattrs */
@@ -398,11 +449,11 @@ struct f2fs_inode_info {
398}; 449};
399 450
400static inline void get_extent_info(struct extent_info *ext, 451static inline void get_extent_info(struct extent_info *ext,
401 struct f2fs_extent i_ext) 452 struct f2fs_extent *i_ext)
402{ 453{
403 ext->fofs = le32_to_cpu(i_ext.fofs); 454 ext->fofs = le32_to_cpu(i_ext->fofs);
404 ext->blk = le32_to_cpu(i_ext.blk); 455 ext->blk = le32_to_cpu(i_ext->blk);
405 ext->len = le32_to_cpu(i_ext.len); 456 ext->len = le32_to_cpu(i_ext->len);
406} 457}
407 458
408static inline void set_raw_extent(struct extent_info *ext, 459static inline void set_raw_extent(struct extent_info *ext,
@@ -599,7 +650,6 @@ struct f2fs_sm_info {
599 * dirty dentry blocks, dirty node blocks, and dirty meta blocks. 650 * dirty dentry blocks, dirty node blocks, and dirty meta blocks.
600 */ 651 */
601enum count_type { 652enum count_type {
602 F2FS_WRITEBACK,
603 F2FS_DIRTY_DENTS, 653 F2FS_DIRTY_DENTS,
604 F2FS_DIRTY_DATA, 654 F2FS_DIRTY_DATA,
605 F2FS_DIRTY_NODES, 655 F2FS_DIRTY_NODES,
@@ -672,6 +722,7 @@ enum {
672 SBI_IS_CLOSE, /* specify unmounting */ 722 SBI_IS_CLOSE, /* specify unmounting */
673 SBI_NEED_FSCK, /* need fsck.f2fs to fix */ 723 SBI_NEED_FSCK, /* need fsck.f2fs to fix */
674 SBI_POR_DOING, /* recovery is doing or not */ 724 SBI_POR_DOING, /* recovery is doing or not */
725 SBI_NEED_SB_WRITE, /* need to recover superblock */
675}; 726};
676 727
677enum { 728enum {
@@ -680,6 +731,10 @@ enum {
680 MAX_TIME, 731 MAX_TIME,
681}; 732};
682 733
734#ifdef CONFIG_F2FS_FS_ENCRYPTION
735#define F2FS_KEY_DESC_PREFIX "f2fs:"
736#define F2FS_KEY_DESC_PREFIX_SIZE 5
737#endif
683struct f2fs_sb_info { 738struct f2fs_sb_info {
684 struct super_block *sb; /* pointer to VFS super block */ 739 struct super_block *sb; /* pointer to VFS super block */
685 struct proc_dir_entry *s_proc; /* proc entry */ 740 struct proc_dir_entry *s_proc; /* proc entry */
@@ -687,6 +742,10 @@ struct f2fs_sb_info {
687 int valid_super_block; /* valid super block no */ 742 int valid_super_block; /* valid super block no */
688 int s_flag; /* flags for sbi */ 743 int s_flag; /* flags for sbi */
689 744
745#ifdef CONFIG_F2FS_FS_ENCRYPTION
746 u8 key_prefix[F2FS_KEY_DESC_PREFIX_SIZE];
747 u8 key_prefix_size;
748#endif
690 /* for node-related operations */ 749 /* for node-related operations */
691 struct f2fs_nm_info *nm_info; /* node manager */ 750 struct f2fs_nm_info *nm_info; /* node manager */
692 struct inode *node_inode; /* cache node blocks */ 751 struct inode *node_inode; /* cache node blocks */
@@ -742,18 +801,24 @@ struct f2fs_sb_info {
742 unsigned int total_sections; /* total section count */ 801 unsigned int total_sections; /* total section count */
743 unsigned int total_node_count; /* total node block count */ 802 unsigned int total_node_count; /* total node block count */
744 unsigned int total_valid_node_count; /* valid node block count */ 803 unsigned int total_valid_node_count; /* valid node block count */
745 unsigned int total_valid_inode_count; /* valid inode count */
746 loff_t max_file_blocks; /* max block index of file */ 804 loff_t max_file_blocks; /* max block index of file */
747 int active_logs; /* # of active logs */ 805 int active_logs; /* # of active logs */
748 int dir_level; /* directory level */ 806 int dir_level; /* directory level */
749 807
750 block_t user_block_count; /* # of user blocks */ 808 block_t user_block_count; /* # of user blocks */
751 block_t total_valid_block_count; /* # of valid blocks */ 809 block_t total_valid_block_count; /* # of valid blocks */
752 block_t alloc_valid_block_count; /* # of allocated blocks */
753 block_t discard_blks; /* discard command candidats */ 810 block_t discard_blks; /* discard command candidats */
754 block_t last_valid_block_count; /* for recovery */ 811 block_t last_valid_block_count; /* for recovery */
755 u32 s_next_generation; /* for NFS support */ 812 u32 s_next_generation; /* for NFS support */
756 atomic_t nr_pages[NR_COUNT_TYPE]; /* # of pages, see count_type */ 813 atomic_t nr_wb_bios; /* # of writeback bios */
814
815 /* # of pages, see count_type */
816 struct percpu_counter nr_pages[NR_COUNT_TYPE];
817 /* # of allocated blocks */
818 struct percpu_counter alloc_valid_block_count;
819
820 /* valid inode count */
821 struct percpu_counter total_valid_inode_count;
757 822
758 struct f2fs_mount_info mount_opt; /* mount options */ 823 struct f2fs_mount_info mount_opt; /* mount options */
759 824
@@ -1055,21 +1120,33 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs)
1055} 1120}
1056 1121
1057static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, 1122static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
1058 struct inode *inode, blkcnt_t count) 1123 struct inode *inode, blkcnt_t *count)
1059{ 1124{
1060 block_t valid_block_count; 1125 block_t valid_block_count;
1061 1126
1062 spin_lock(&sbi->stat_lock); 1127 spin_lock(&sbi->stat_lock);
1063 valid_block_count = 1128#ifdef CONFIG_F2FS_FAULT_INJECTION
1064 sbi->total_valid_block_count + (block_t)count; 1129 if (time_to_inject(FAULT_BLOCK)) {
1065 if (unlikely(valid_block_count > sbi->user_block_count)) {
1066 spin_unlock(&sbi->stat_lock); 1130 spin_unlock(&sbi->stat_lock);
1067 return false; 1131 return false;
1068 } 1132 }
1069 inode->i_blocks += count; 1133#endif
1070 sbi->total_valid_block_count = valid_block_count; 1134 valid_block_count =
1071 sbi->alloc_valid_block_count += (block_t)count; 1135 sbi->total_valid_block_count + (block_t)(*count);
1136 if (unlikely(valid_block_count > sbi->user_block_count)) {
1137 *count = sbi->user_block_count - sbi->total_valid_block_count;
1138 if (!*count) {
1139 spin_unlock(&sbi->stat_lock);
1140 return false;
1141 }
1142 }
1143 /* *count can be recalculated */
1144 inode->i_blocks += *count;
1145 sbi->total_valid_block_count =
1146 sbi->total_valid_block_count + (block_t)(*count);
1072 spin_unlock(&sbi->stat_lock); 1147 spin_unlock(&sbi->stat_lock);
1148
1149 percpu_counter_add(&sbi->alloc_valid_block_count, (*count));
1073 return true; 1150 return true;
1074} 1151}
1075 1152
@@ -1087,20 +1164,20 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
1087 1164
1088static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) 1165static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
1089{ 1166{
1090 atomic_inc(&sbi->nr_pages[count_type]); 1167 percpu_counter_inc(&sbi->nr_pages[count_type]);
1091 set_sbi_flag(sbi, SBI_IS_DIRTY); 1168 set_sbi_flag(sbi, SBI_IS_DIRTY);
1092} 1169}
1093 1170
1094static inline void inode_inc_dirty_pages(struct inode *inode) 1171static inline void inode_inc_dirty_pages(struct inode *inode)
1095{ 1172{
1096 atomic_inc(&F2FS_I(inode)->dirty_pages); 1173 percpu_counter_inc(&F2FS_I(inode)->dirty_pages);
1097 inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? 1174 inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
1098 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); 1175 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
1099} 1176}
1100 1177
1101static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) 1178static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
1102{ 1179{
1103 atomic_dec(&sbi->nr_pages[count_type]); 1180 percpu_counter_dec(&sbi->nr_pages[count_type]);
1104} 1181}
1105 1182
1106static inline void inode_dec_dirty_pages(struct inode *inode) 1183static inline void inode_dec_dirty_pages(struct inode *inode)
@@ -1109,26 +1186,28 @@ static inline void inode_dec_dirty_pages(struct inode *inode)
1109 !S_ISLNK(inode->i_mode)) 1186 !S_ISLNK(inode->i_mode))
1110 return; 1187 return;
1111 1188
1112 atomic_dec(&F2FS_I(inode)->dirty_pages); 1189 percpu_counter_dec(&F2FS_I(inode)->dirty_pages);
1113 dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? 1190 dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
1114 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); 1191 F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
1115} 1192}
1116 1193
1117static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) 1194static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type)
1118{ 1195{
1119 return atomic_read(&sbi->nr_pages[count_type]); 1196 return percpu_counter_sum_positive(&sbi->nr_pages[count_type]);
1120} 1197}
1121 1198
1122static inline int get_dirty_pages(struct inode *inode) 1199static inline s64 get_dirty_pages(struct inode *inode)
1123{ 1200{
1124 return atomic_read(&F2FS_I(inode)->dirty_pages); 1201 return percpu_counter_sum_positive(&F2FS_I(inode)->dirty_pages);
1125} 1202}
1126 1203
1127static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) 1204static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
1128{ 1205{
1129 unsigned int pages_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg; 1206 unsigned int pages_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg;
1130 return ((get_pages(sbi, block_type) + pages_per_sec - 1) 1207 unsigned int segs = (get_pages(sbi, block_type) + pages_per_sec - 1) >>
1131 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec; 1208 sbi->log_blocks_per_seg;
1209
1210 return segs / sbi->segs_per_sec;
1132} 1211}
1133 1212
1134static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) 1213static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi)
@@ -1217,11 +1296,11 @@ static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi,
1217 if (inode) 1296 if (inode)
1218 inode->i_blocks++; 1297 inode->i_blocks++;
1219 1298
1220 sbi->alloc_valid_block_count++;
1221 sbi->total_valid_node_count++; 1299 sbi->total_valid_node_count++;
1222 sbi->total_valid_block_count++; 1300 sbi->total_valid_block_count++;
1223 spin_unlock(&sbi->stat_lock); 1301 spin_unlock(&sbi->stat_lock);
1224 1302
1303 percpu_counter_inc(&sbi->alloc_valid_block_count);
1225 return true; 1304 return true;
1226} 1305}
1227 1306
@@ -1248,28 +1327,30 @@ static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
1248 1327
1249static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) 1328static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
1250{ 1329{
1251 spin_lock(&sbi->stat_lock); 1330 percpu_counter_inc(&sbi->total_valid_inode_count);
1252 f2fs_bug_on(sbi, sbi->total_valid_inode_count == sbi->total_node_count);
1253 sbi->total_valid_inode_count++;
1254 spin_unlock(&sbi->stat_lock);
1255} 1331}
1256 1332
1257static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi) 1333static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi)
1258{ 1334{
1259 spin_lock(&sbi->stat_lock); 1335 percpu_counter_dec(&sbi->total_valid_inode_count);
1260 f2fs_bug_on(sbi, !sbi->total_valid_inode_count);
1261 sbi->total_valid_inode_count--;
1262 spin_unlock(&sbi->stat_lock);
1263} 1336}
1264 1337
1265static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) 1338static inline s64 valid_inode_count(struct f2fs_sb_info *sbi)
1266{ 1339{
1267 return sbi->total_valid_inode_count; 1340 return percpu_counter_sum_positive(&sbi->total_valid_inode_count);
1268} 1341}
1269 1342
1270static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, 1343static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
1271 pgoff_t index, bool for_write) 1344 pgoff_t index, bool for_write)
1272{ 1345{
1346#ifdef CONFIG_F2FS_FAULT_INJECTION
1347 struct page *page = find_lock_page(mapping, index);
1348 if (page)
1349 return page;
1350
1351 if (time_to_inject(FAULT_PAGE_ALLOC))
1352 return NULL;
1353#endif
1273 if (!for_write) 1354 if (!for_write)
1274 return grab_cache_page(mapping, index); 1355 return grab_cache_page(mapping, index);
1275 return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); 1356 return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
@@ -1435,7 +1516,6 @@ enum {
1435 FI_NO_ALLOC, /* should not allocate any blocks */ 1516 FI_NO_ALLOC, /* should not allocate any blocks */
1436 FI_FREE_NID, /* free allocated nide */ 1517 FI_FREE_NID, /* free allocated nide */
1437 FI_UPDATE_DIR, /* should update inode block for consistency */ 1518 FI_UPDATE_DIR, /* should update inode block for consistency */
1438 FI_DELAY_IPUT, /* used for the recovery */
1439 FI_NO_EXTENT, /* not to use the extent cache */ 1519 FI_NO_EXTENT, /* not to use the extent cache */
1440 FI_INLINE_XATTR, /* used for inline xattr */ 1520 FI_INLINE_XATTR, /* used for inline xattr */
1441 FI_INLINE_DATA, /* used for inline data*/ 1521 FI_INLINE_DATA, /* used for inline data*/
@@ -1618,12 +1698,6 @@ static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi)
1618 return is_set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 1698 return is_set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
1619} 1699}
1620 1700
1621static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi)
1622{
1623 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
1624 sbi->sb->s_flags |= MS_RDONLY;
1625}
1626
1627static inline bool is_dot_dotdot(const struct qstr *str) 1701static inline bool is_dot_dotdot(const struct qstr *str)
1628{ 1702{
1629 if (str->len == 1 && str->name[0] == '.') 1703 if (str->len == 1 && str->name[0] == '.')
@@ -1644,6 +1718,15 @@ static inline bool f2fs_may_extent_tree(struct inode *inode)
1644 return S_ISREG(inode->i_mode); 1718 return S_ISREG(inode->i_mode);
1645} 1719}
1646 1720
1721static inline void *f2fs_kmalloc(size_t size, gfp_t flags)
1722{
1723#ifdef CONFIG_F2FS_FAULT_INJECTION
1724 if (time_to_inject(FAULT_KMALLOC))
1725 return NULL;
1726#endif
1727 return kmalloc(size, flags);
1728}
1729
1647static inline void *f2fs_kvmalloc(size_t size, gfp_t flags) 1730static inline void *f2fs_kvmalloc(size_t size, gfp_t flags)
1648{ 1731{
1649 void *ret; 1732 void *ret;
@@ -1710,7 +1793,7 @@ struct dentry *f2fs_get_parent(struct dentry *child);
1710 */ 1793 */
1711extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; 1794extern unsigned char f2fs_filetype_table[F2FS_FT_MAX];
1712void set_de_type(struct f2fs_dir_entry *, umode_t); 1795void set_de_type(struct f2fs_dir_entry *, umode_t);
1713 1796unsigned char get_de_type(struct f2fs_dir_entry *);
1714struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *, 1797struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *,
1715 f2fs_hash_t, int *, struct f2fs_dentry_ptr *); 1798 f2fs_hash_t, int *, struct f2fs_dentry_ptr *);
1716bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, 1799bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
@@ -1731,6 +1814,8 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
1731int update_dent_inode(struct inode *, struct inode *, const struct qstr *); 1814int update_dent_inode(struct inode *, struct inode *, const struct qstr *);
1732void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *, 1815void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *,
1733 const struct qstr *, f2fs_hash_t , unsigned int); 1816 const struct qstr *, f2fs_hash_t , unsigned int);
1817int f2fs_add_regular_entry(struct inode *, const struct qstr *,
1818 struct inode *, nid_t, umode_t);
1734int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t, 1819int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t,
1735 umode_t); 1820 umode_t);
1736void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, 1821void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *,
@@ -1781,7 +1866,10 @@ void ra_node_page(struct f2fs_sb_info *, nid_t);
1781struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); 1866struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
1782struct page *get_node_page_ra(struct page *, int); 1867struct page *get_node_page_ra(struct page *, int);
1783void sync_inode_page(struct dnode_of_data *); 1868void sync_inode_page(struct dnode_of_data *);
1784int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *); 1869void move_node_page(struct page *, int);
1870int fsync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *,
1871 bool);
1872int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *);
1785bool alloc_nid(struct f2fs_sb_info *, nid_t *); 1873bool alloc_nid(struct f2fs_sb_info *, nid_t *);
1786void alloc_nid_done(struct f2fs_sb_info *, nid_t); 1874void alloc_nid_done(struct f2fs_sb_info *, nid_t);
1787void alloc_nid_failed(struct f2fs_sb_info *, nid_t); 1875void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
@@ -1843,6 +1931,7 @@ void destroy_segment_manager_caches(void);
1843/* 1931/*
1844 * checkpoint.c 1932 * checkpoint.c
1845 */ 1933 */
1934void f2fs_stop_checkpoint(struct f2fs_sb_info *, bool);
1846struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); 1935struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
1847struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); 1936struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
1848struct page *get_tmp_page(struct f2fs_sb_info *, pgoff_t); 1937struct page *get_tmp_page(struct f2fs_sb_info *, pgoff_t);
@@ -1852,7 +1941,7 @@ void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
1852long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); 1941long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
1853void add_ino_entry(struct f2fs_sb_info *, nid_t, int type); 1942void add_ino_entry(struct f2fs_sb_info *, nid_t, int type);
1854void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type); 1943void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type);
1855void release_ino_entry(struct f2fs_sb_info *); 1944void release_ino_entry(struct f2fs_sb_info *, bool);
1856bool exist_written_data(struct f2fs_sb_info *, nid_t, int); 1945bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
1857int acquire_orphan_inode(struct f2fs_sb_info *); 1946int acquire_orphan_inode(struct f2fs_sb_info *);
1858void release_orphan_inode(struct f2fs_sb_info *); 1947void release_orphan_inode(struct f2fs_sb_info *);
@@ -1861,7 +1950,6 @@ void remove_orphan_inode(struct f2fs_sb_info *, nid_t);
1861int recover_orphan_inodes(struct f2fs_sb_info *); 1950int recover_orphan_inodes(struct f2fs_sb_info *);
1862int get_valid_checkpoint(struct f2fs_sb_info *); 1951int get_valid_checkpoint(struct f2fs_sb_info *);
1863void update_dirty_page(struct inode *, struct page *); 1952void update_dirty_page(struct inode *, struct page *);
1864void add_dirty_dir_inode(struct inode *);
1865void remove_dirty_inode(struct inode *); 1953void remove_dirty_inode(struct inode *);
1866int sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type); 1954int sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type);
1867int write_checkpoint(struct f2fs_sb_info *, struct cp_control *); 1955int write_checkpoint(struct f2fs_sb_info *, struct cp_control *);
@@ -1880,6 +1968,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *);
1880void f2fs_submit_page_mbio(struct f2fs_io_info *); 1968void f2fs_submit_page_mbio(struct f2fs_io_info *);
1881void set_data_blkaddr(struct dnode_of_data *); 1969void set_data_blkaddr(struct dnode_of_data *);
1882void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t); 1970void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t);
1971int reserve_new_blocks(struct dnode_of_data *, blkcnt_t);
1883int reserve_new_block(struct dnode_of_data *); 1972int reserve_new_block(struct dnode_of_data *);
1884int f2fs_get_block(struct dnode_of_data *, pgoff_t); 1973int f2fs_get_block(struct dnode_of_data *, pgoff_t);
1885ssize_t f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *); 1974ssize_t f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *);
@@ -1906,7 +1995,7 @@ void build_gc_manager(struct f2fs_sb_info *);
1906/* 1995/*
1907 * recovery.c 1996 * recovery.c
1908 */ 1997 */
1909int recover_fsync_data(struct f2fs_sb_info *); 1998int recover_fsync_data(struct f2fs_sb_info *, bool);
1910bool space_for_roll_forward(struct f2fs_sb_info *); 1999bool space_for_roll_forward(struct f2fs_sb_info *);
1911 2000
1912/* 2001/*
@@ -1921,12 +2010,12 @@ struct f2fs_stat_info {
1921 unsigned long long hit_largest, hit_cached, hit_rbtree; 2010 unsigned long long hit_largest, hit_cached, hit_rbtree;
1922 unsigned long long hit_total, total_ext; 2011 unsigned long long hit_total, total_ext;
1923 int ext_tree, zombie_tree, ext_node; 2012 int ext_tree, zombie_tree, ext_node;
1924 int ndirty_node, ndirty_meta; 2013 s64 ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, inmem_pages;
1925 int ndirty_dent, ndirty_dirs, ndirty_data, ndirty_files; 2014 unsigned int ndirty_dirs, ndirty_files;
1926 int nats, dirty_nats, sits, dirty_sits, fnids; 2015 int nats, dirty_nats, sits, dirty_sits, fnids;
1927 int total_count, utilization; 2016 int total_count, utilization;
1928 int bg_gc, inmem_pages, wb_pages; 2017 int bg_gc, wb_bios;
1929 int inline_xattr, inline_inode, inline_dir; 2018 int inline_xattr, inline_inode, inline_dir, orphans;
1930 unsigned int valid_count, valid_node_count, valid_inode_count; 2019 unsigned int valid_count, valid_node_count, valid_inode_count;
1931 unsigned int bimodal, avg_vblocks; 2020 unsigned int bimodal, avg_vblocks;
1932 int util_free, util_valid, util_invalid; 2021 int util_free, util_valid, util_invalid;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index c6b14951bef3..f4c0086655c4 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -182,7 +182,8 @@ static void try_to_fix_pino(struct inode *inode)
182 } 182 }
183} 183}
184 184
185int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) 185static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
186 int datasync, bool atomic)
186{ 187{
187 struct inode *inode = file->f_mapping->host; 188 struct inode *inode = file->f_mapping->host;
188 struct f2fs_inode_info *fi = F2FS_I(inode); 189 struct f2fs_inode_info *fi = F2FS_I(inode);
@@ -256,7 +257,9 @@ go_write:
256 goto out; 257 goto out;
257 } 258 }
258sync_nodes: 259sync_nodes:
259 sync_node_pages(sbi, ino, &wbc); 260 ret = fsync_node_pages(sbi, ino, &wbc, atomic);
261 if (ret)
262 goto out;
260 263
261 /* if cp_error was enabled, we should avoid infinite loop */ 264 /* if cp_error was enabled, we should avoid infinite loop */
262 if (unlikely(f2fs_cp_error(sbi))) { 265 if (unlikely(f2fs_cp_error(sbi))) {
@@ -288,6 +291,11 @@ out:
288 return ret; 291 return ret;
289} 292}
290 293
294int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
295{
296 return f2fs_do_sync_file(file, start, end, datasync, false);
297}
298
291static pgoff_t __get_first_dirty_index(struct address_space *mapping, 299static pgoff_t __get_first_dirty_index(struct address_space *mapping,
292 pgoff_t pgofs, int whence) 300 pgoff_t pgofs, int whence)
293{ 301{
@@ -555,6 +563,9 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
555 563
556 free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1); 564 free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1);
557 565
566 if (free_from >= sbi->max_file_blocks)
567 goto free_partial;
568
558 if (lock) 569 if (lock)
559 f2fs_lock_op(sbi); 570 f2fs_lock_op(sbi);
560 571
@@ -573,7 +584,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
573 } 584 }
574 585
575 set_new_dnode(&dn, inode, ipage, NULL, 0); 586 set_new_dnode(&dn, inode, ipage, NULL, 0);
576 err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); 587 err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA);
577 if (err) { 588 if (err) {
578 if (err == -ENOENT) 589 if (err == -ENOENT)
579 goto free_next; 590 goto free_next;
@@ -596,7 +607,7 @@ free_next:
596out: 607out:
597 if (lock) 608 if (lock)
598 f2fs_unlock_op(sbi); 609 f2fs_unlock_op(sbi);
599 610free_partial:
600 /* lastly zero out the first data page */ 611 /* lastly zero out the first data page */
601 if (!err) 612 if (!err)
602 err = truncate_partial_data_page(inode, from, truncate_page); 613 err = truncate_partial_data_page(inode, from, truncate_page);
@@ -986,6 +997,49 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
986 return ret; 997 return ret;
987} 998}
988 999
1000static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
1001 pgoff_t end)
1002{
1003 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1004 pgoff_t index = start;
1005 unsigned int ofs_in_node = dn->ofs_in_node;
1006 blkcnt_t count = 0;
1007 int ret;
1008
1009 for (; index < end; index++, dn->ofs_in_node++) {
1010 if (datablock_addr(dn->node_page, dn->ofs_in_node) == NULL_ADDR)
1011 count++;
1012 }
1013
1014 dn->ofs_in_node = ofs_in_node;
1015 ret = reserve_new_blocks(dn, count);
1016 if (ret)
1017 return ret;
1018
1019 dn->ofs_in_node = ofs_in_node;
1020 for (index = start; index < end; index++, dn->ofs_in_node++) {
1021 dn->data_blkaddr =
1022 datablock_addr(dn->node_page, dn->ofs_in_node);
1023 /*
1024 * reserve_new_blocks will not guarantee entire block
1025 * allocation.
1026 */
1027 if (dn->data_blkaddr == NULL_ADDR) {
1028 ret = -ENOSPC;
1029 break;
1030 }
1031 if (dn->data_blkaddr != NEW_ADDR) {
1032 invalidate_blocks(sbi, dn->data_blkaddr);
1033 dn->data_blkaddr = NEW_ADDR;
1034 set_data_blkaddr(dn);
1035 }
1036 }
1037
1038 f2fs_update_extent_cache_range(dn, start, 0, index - start);
1039
1040 return ret;
1041}
1042
989static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, 1043static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
990 int mode) 1044 int mode)
991{ 1045{
@@ -1036,35 +1090,32 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
1036 (loff_t)pg_start << PAGE_SHIFT); 1090 (loff_t)pg_start << PAGE_SHIFT);
1037 } 1091 }
1038 1092
1039 for (index = pg_start; index < pg_end; index++) { 1093 for (index = pg_start; index < pg_end;) {
1040 struct dnode_of_data dn; 1094 struct dnode_of_data dn;
1041 struct page *ipage; 1095 unsigned int end_offset;
1096 pgoff_t end;
1042 1097
1043 f2fs_lock_op(sbi); 1098 f2fs_lock_op(sbi);
1044 1099
1045 ipage = get_node_page(sbi, inode->i_ino); 1100 set_new_dnode(&dn, inode, NULL, NULL, 0);
1046 if (IS_ERR(ipage)) { 1101 ret = get_dnode_of_data(&dn, index, ALLOC_NODE);
1047 ret = PTR_ERR(ipage);
1048 f2fs_unlock_op(sbi);
1049 goto out;
1050 }
1051
1052 set_new_dnode(&dn, inode, ipage, NULL, 0);
1053 ret = f2fs_reserve_block(&dn, index);
1054 if (ret) { 1102 if (ret) {
1055 f2fs_unlock_op(sbi); 1103 f2fs_unlock_op(sbi);
1056 goto out; 1104 goto out;
1057 } 1105 }
1058 1106
1059 if (dn.data_blkaddr != NEW_ADDR) { 1107 end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
1060 invalidate_blocks(sbi, dn.data_blkaddr); 1108 end = min(pg_end, end_offset - dn.ofs_in_node + index);
1061 f2fs_update_data_blkaddr(&dn, NEW_ADDR); 1109
1062 } 1110 ret = f2fs_do_zero_range(&dn, index, end);
1063 f2fs_put_dnode(&dn); 1111 f2fs_put_dnode(&dn);
1064 f2fs_unlock_op(sbi); 1112 f2fs_unlock_op(sbi);
1113 if (ret)
1114 goto out;
1065 1115
1116 index = end;
1066 new_size = max_t(loff_t, new_size, 1117 new_size = max_t(loff_t, new_size,
1067 (loff_t)(index + 1) << PAGE_SHIFT); 1118 (loff_t)index << PAGE_SHIFT);
1068 } 1119 }
1069 1120
1070 if (off_end) { 1121 if (off_end) {
@@ -1147,10 +1198,11 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
1147 loff_t len, int mode) 1198 loff_t len, int mode)
1148{ 1199{
1149 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1200 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1150 pgoff_t index, pg_start, pg_end; 1201 struct f2fs_map_blocks map = { .m_next_pgofs = NULL };
1202 pgoff_t pg_end;
1151 loff_t new_size = i_size_read(inode); 1203 loff_t new_size = i_size_read(inode);
1152 loff_t off_start, off_end; 1204 loff_t off_end;
1153 int ret = 0; 1205 int ret;
1154 1206
1155 ret = inode_newsize_ok(inode, (len + offset)); 1207 ret = inode_newsize_ok(inode, (len + offset));
1156 if (ret) 1208 if (ret)
@@ -1162,43 +1214,35 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
1162 1214
1163 f2fs_balance_fs(sbi, true); 1215 f2fs_balance_fs(sbi, true);
1164 1216
1165 pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; 1217 pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT;
1166 pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
1167
1168 off_start = offset & (PAGE_SIZE - 1);
1169 off_end = (offset + len) & (PAGE_SIZE - 1); 1218 off_end = (offset + len) & (PAGE_SIZE - 1);
1170 1219
1171 f2fs_lock_op(sbi); 1220 map.m_lblk = ((unsigned long long)offset) >> PAGE_SHIFT;
1221 map.m_len = pg_end - map.m_lblk;
1222 if (off_end)
1223 map.m_len++;
1172 1224
1173 for (index = pg_start; index <= pg_end; index++) { 1225 ret = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
1174 struct dnode_of_data dn; 1226 if (ret) {
1227 pgoff_t last_off;
1175 1228
1176 if (index == pg_end && !off_end) 1229 if (!map.m_len)
1177 goto noalloc; 1230 return ret;
1178 1231
1179 set_new_dnode(&dn, inode, NULL, NULL, 0); 1232 last_off = map.m_lblk + map.m_len - 1;
1180 ret = f2fs_reserve_block(&dn, index); 1233
1181 if (ret) 1234 /* update new size to the failed position */
1182 break; 1235 new_size = (last_off == pg_end) ? offset + len:
1183noalloc: 1236 (loff_t)(last_off + 1) << PAGE_SHIFT;
1184 if (pg_start == pg_end) 1237 } else {
1185 new_size = offset + len; 1238 new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end;
1186 else if (index == pg_start && off_start)
1187 new_size = (loff_t)(index + 1) << PAGE_SHIFT;
1188 else if (index == pg_end)
1189 new_size = ((loff_t)index << PAGE_SHIFT) +
1190 off_end;
1191 else
1192 new_size += PAGE_SIZE;
1193 } 1239 }
1194 1240
1195 if (!(mode & FALLOC_FL_KEEP_SIZE) && 1241 if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) {
1196 i_size_read(inode) < new_size) {
1197 i_size_write(inode, new_size); 1242 i_size_write(inode, new_size);
1198 mark_inode_dirty(inode); 1243 mark_inode_dirty(inode);
1199 update_inode_page(inode); 1244 update_inode_page(inode);
1200 } 1245 }
1201 f2fs_unlock_op(sbi);
1202 1246
1203 return ret; 1247 return ret;
1204} 1248}
@@ -1254,10 +1298,19 @@ out:
1254 1298
1255static int f2fs_release_file(struct inode *inode, struct file *filp) 1299static int f2fs_release_file(struct inode *inode, struct file *filp)
1256{ 1300{
1301 /*
1302 * f2fs_relase_file is called at every close calls. So we should
1303 * not drop any inmemory pages by close called by other process.
1304 */
1305 if (!(filp->f_mode & FMODE_WRITE) ||
1306 atomic_read(&inode->i_writecount) != 1)
1307 return 0;
1308
1257 /* some remained atomic pages should discarded */ 1309 /* some remained atomic pages should discarded */
1258 if (f2fs_is_atomic_file(inode)) 1310 if (f2fs_is_atomic_file(inode))
1259 drop_inmem_pages(inode); 1311 drop_inmem_pages(inode);
1260 if (f2fs_is_volatile_file(inode)) { 1312 if (f2fs_is_volatile_file(inode)) {
1313 clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
1261 set_inode_flag(F2FS_I(inode), FI_DROP_CACHE); 1314 set_inode_flag(F2FS_I(inode), FI_DROP_CACHE);
1262 filemap_fdatawrite(inode->i_mapping); 1315 filemap_fdatawrite(inode->i_mapping);
1263 clear_inode_flag(F2FS_I(inode), FI_DROP_CACHE); 1316 clear_inode_flag(F2FS_I(inode), FI_DROP_CACHE);
@@ -1294,20 +1347,16 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
1294 unsigned int oldflags; 1347 unsigned int oldflags;
1295 int ret; 1348 int ret;
1296 1349
1350 if (!inode_owner_or_capable(inode))
1351 return -EACCES;
1352
1353 if (get_user(flags, (int __user *)arg))
1354 return -EFAULT;
1355
1297 ret = mnt_want_write_file(filp); 1356 ret = mnt_want_write_file(filp);
1298 if (ret) 1357 if (ret)
1299 return ret; 1358 return ret;
1300 1359
1301 if (!inode_owner_or_capable(inode)) {
1302 ret = -EACCES;
1303 goto out;
1304 }
1305
1306 if (get_user(flags, (int __user *)arg)) {
1307 ret = -EFAULT;
1308 goto out;
1309 }
1310
1311 flags = f2fs_mask_flags(inode->i_mode, flags); 1360 flags = f2fs_mask_flags(inode->i_mode, flags);
1312 1361
1313 inode_lock(inode); 1362 inode_lock(inode);
@@ -1350,17 +1399,35 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
1350 if (!inode_owner_or_capable(inode)) 1399 if (!inode_owner_or_capable(inode))
1351 return -EACCES; 1400 return -EACCES;
1352 1401
1402 ret = mnt_want_write_file(filp);
1403 if (ret)
1404 return ret;
1405
1406 inode_lock(inode);
1407
1353 if (f2fs_is_atomic_file(inode)) 1408 if (f2fs_is_atomic_file(inode))
1354 return 0; 1409 goto out;
1355 1410
1356 ret = f2fs_convert_inline_inode(inode); 1411 ret = f2fs_convert_inline_inode(inode);
1357 if (ret) 1412 if (ret)
1358 return ret; 1413 goto out;
1359 1414
1360 set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1415 set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
1361 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 1416 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1362 1417
1363 return 0; 1418 if (!get_dirty_pages(inode))
1419 goto out;
1420
1421 f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
1422 "Unexpected flush for atomic writes: ino=%lu, npages=%lld",
1423 inode->i_ino, get_dirty_pages(inode));
1424 ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
1425 if (ret)
1426 clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
1427out:
1428 inode_unlock(inode);
1429 mnt_drop_write_file(filp);
1430 return ret;
1364} 1431}
1365 1432
1366static int f2fs_ioc_commit_atomic_write(struct file *filp) 1433static int f2fs_ioc_commit_atomic_write(struct file *filp)
@@ -1371,13 +1438,15 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
1371 if (!inode_owner_or_capable(inode)) 1438 if (!inode_owner_or_capable(inode))
1372 return -EACCES; 1439 return -EACCES;
1373 1440
1374 if (f2fs_is_volatile_file(inode))
1375 return 0;
1376
1377 ret = mnt_want_write_file(filp); 1441 ret = mnt_want_write_file(filp);
1378 if (ret) 1442 if (ret)
1379 return ret; 1443 return ret;
1380 1444
1445 inode_lock(inode);
1446
1447 if (f2fs_is_volatile_file(inode))
1448 goto err_out;
1449
1381 if (f2fs_is_atomic_file(inode)) { 1450 if (f2fs_is_atomic_file(inode)) {
1382 clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1451 clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
1383 ret = commit_inmem_pages(inode); 1452 ret = commit_inmem_pages(inode);
@@ -1387,8 +1456,9 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
1387 } 1456 }
1388 } 1457 }
1389 1458
1390 ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0); 1459 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
1391err_out: 1460err_out:
1461 inode_unlock(inode);
1392 mnt_drop_write_file(filp); 1462 mnt_drop_write_file(filp);
1393 return ret; 1463 return ret;
1394} 1464}
@@ -1401,32 +1471,54 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
1401 if (!inode_owner_or_capable(inode)) 1471 if (!inode_owner_or_capable(inode))
1402 return -EACCES; 1472 return -EACCES;
1403 1473
1474 ret = mnt_want_write_file(filp);
1475 if (ret)
1476 return ret;
1477
1478 inode_lock(inode);
1479
1404 if (f2fs_is_volatile_file(inode)) 1480 if (f2fs_is_volatile_file(inode))
1405 return 0; 1481 goto out;
1406 1482
1407 ret = f2fs_convert_inline_inode(inode); 1483 ret = f2fs_convert_inline_inode(inode);
1408 if (ret) 1484 if (ret)
1409 return ret; 1485 goto out;
1410 1486
1411 set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); 1487 set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
1412 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 1488 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1413 return 0; 1489out:
1490 inode_unlock(inode);
1491 mnt_drop_write_file(filp);
1492 return ret;
1414} 1493}
1415 1494
1416static int f2fs_ioc_release_volatile_write(struct file *filp) 1495static int f2fs_ioc_release_volatile_write(struct file *filp)
1417{ 1496{
1418 struct inode *inode = file_inode(filp); 1497 struct inode *inode = file_inode(filp);
1498 int ret;
1419 1499
1420 if (!inode_owner_or_capable(inode)) 1500 if (!inode_owner_or_capable(inode))
1421 return -EACCES; 1501 return -EACCES;
1422 1502
1503 ret = mnt_want_write_file(filp);
1504 if (ret)
1505 return ret;
1506
1507 inode_lock(inode);
1508
1423 if (!f2fs_is_volatile_file(inode)) 1509 if (!f2fs_is_volatile_file(inode))
1424 return 0; 1510 goto out;
1425 1511
1426 if (!f2fs_is_first_block_written(inode)) 1512 if (!f2fs_is_first_block_written(inode)) {
1427 return truncate_partial_data_page(inode, 0, true); 1513 ret = truncate_partial_data_page(inode, 0, true);
1514 goto out;
1515 }
1428 1516
1429 return punch_hole(inode, 0, F2FS_BLKSIZE); 1517 ret = punch_hole(inode, 0, F2FS_BLKSIZE);
1518out:
1519 inode_unlock(inode);
1520 mnt_drop_write_file(filp);
1521 return ret;
1430} 1522}
1431 1523
1432static int f2fs_ioc_abort_volatile_write(struct file *filp) 1524static int f2fs_ioc_abort_volatile_write(struct file *filp)
@@ -1441,15 +1533,17 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
1441 if (ret) 1533 if (ret)
1442 return ret; 1534 return ret;
1443 1535
1444 if (f2fs_is_atomic_file(inode)) { 1536 inode_lock(inode);
1445 clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 1537
1538 if (f2fs_is_atomic_file(inode))
1446 drop_inmem_pages(inode); 1539 drop_inmem_pages(inode);
1447 }
1448 if (f2fs_is_volatile_file(inode)) { 1540 if (f2fs_is_volatile_file(inode)) {
1449 clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); 1541 clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
1450 ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0); 1542 ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
1451 } 1543 }
1452 1544
1545 inode_unlock(inode);
1546
1453 mnt_drop_write_file(filp); 1547 mnt_drop_write_file(filp);
1454 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 1548 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1455 return ret; 1549 return ret;
@@ -1461,6 +1555,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
1461 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1555 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1462 struct super_block *sb = sbi->sb; 1556 struct super_block *sb = sbi->sb;
1463 __u32 in; 1557 __u32 in;
1558 int ret;
1464 1559
1465 if (!capable(CAP_SYS_ADMIN)) 1560 if (!capable(CAP_SYS_ADMIN))
1466 return -EPERM; 1561 return -EPERM;
@@ -1468,31 +1563,38 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
1468 if (get_user(in, (__u32 __user *)arg)) 1563 if (get_user(in, (__u32 __user *)arg))
1469 return -EFAULT; 1564 return -EFAULT;
1470 1565
1566 ret = mnt_want_write_file(filp);
1567 if (ret)
1568 return ret;
1569
1471 switch (in) { 1570 switch (in) {
1472 case F2FS_GOING_DOWN_FULLSYNC: 1571 case F2FS_GOING_DOWN_FULLSYNC:
1473 sb = freeze_bdev(sb->s_bdev); 1572 sb = freeze_bdev(sb->s_bdev);
1474 if (sb && !IS_ERR(sb)) { 1573 if (sb && !IS_ERR(sb)) {
1475 f2fs_stop_checkpoint(sbi); 1574 f2fs_stop_checkpoint(sbi, false);
1476 thaw_bdev(sb->s_bdev, sb); 1575 thaw_bdev(sb->s_bdev, sb);
1477 } 1576 }
1478 break; 1577 break;
1479 case F2FS_GOING_DOWN_METASYNC: 1578 case F2FS_GOING_DOWN_METASYNC:
1480 /* do checkpoint only */ 1579 /* do checkpoint only */
1481 f2fs_sync_fs(sb, 1); 1580 f2fs_sync_fs(sb, 1);
1482 f2fs_stop_checkpoint(sbi); 1581 f2fs_stop_checkpoint(sbi, false);
1483 break; 1582 break;
1484 case F2FS_GOING_DOWN_NOSYNC: 1583 case F2FS_GOING_DOWN_NOSYNC:
1485 f2fs_stop_checkpoint(sbi); 1584 f2fs_stop_checkpoint(sbi, false);
1486 break; 1585 break;
1487 case F2FS_GOING_DOWN_METAFLUSH: 1586 case F2FS_GOING_DOWN_METAFLUSH:
1488 sync_meta_pages(sbi, META, LONG_MAX); 1587 sync_meta_pages(sbi, META, LONG_MAX);
1489 f2fs_stop_checkpoint(sbi); 1588 f2fs_stop_checkpoint(sbi, false);
1490 break; 1589 break;
1491 default: 1590 default:
1492 return -EINVAL; 1591 ret = -EINVAL;
1592 goto out;
1493 } 1593 }
1494 f2fs_update_time(sbi, REQ_TIME); 1594 f2fs_update_time(sbi, REQ_TIME);
1495 return 0; 1595out:
1596 mnt_drop_write_file(filp);
1597 return ret;
1496} 1598}
1497 1599
1498static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) 1600static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
@@ -1513,9 +1615,14 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
1513 sizeof(range))) 1615 sizeof(range)))
1514 return -EFAULT; 1616 return -EFAULT;
1515 1617
1618 ret = mnt_want_write_file(filp);
1619 if (ret)
1620 return ret;
1621
1516 range.minlen = max((unsigned int)range.minlen, 1622 range.minlen = max((unsigned int)range.minlen,
1517 q->limits.discard_granularity); 1623 q->limits.discard_granularity);
1518 ret = f2fs_trim_fs(F2FS_SB(sb), &range); 1624 ret = f2fs_trim_fs(F2FS_SB(sb), &range);
1625 mnt_drop_write_file(filp);
1519 if (ret < 0) 1626 if (ret < 0)
1520 return ret; 1627 return ret;
1521 1628
@@ -1540,13 +1647,21 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
1540{ 1647{
1541 struct fscrypt_policy policy; 1648 struct fscrypt_policy policy;
1542 struct inode *inode = file_inode(filp); 1649 struct inode *inode = file_inode(filp);
1650 int ret;
1543 1651
1544 if (copy_from_user(&policy, (struct fscrypt_policy __user *)arg, 1652 if (copy_from_user(&policy, (struct fscrypt_policy __user *)arg,
1545 sizeof(policy))) 1653 sizeof(policy)))
1546 return -EFAULT; 1654 return -EFAULT;
1547 1655
1656 ret = mnt_want_write_file(filp);
1657 if (ret)
1658 return ret;
1659
1548 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 1660 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1549 return fscrypt_process_policy(inode, &policy); 1661 ret = fscrypt_process_policy(inode, &policy);
1662
1663 mnt_drop_write_file(filp);
1664 return ret;
1550} 1665}
1551 1666
1552static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) 1667static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
@@ -1603,6 +1718,7 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
1603 struct inode *inode = file_inode(filp); 1718 struct inode *inode = file_inode(filp);
1604 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1719 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1605 __u32 sync; 1720 __u32 sync;
1721 int ret;
1606 1722
1607 if (!capable(CAP_SYS_ADMIN)) 1723 if (!capable(CAP_SYS_ADMIN))
1608 return -EPERM; 1724 return -EPERM;
@@ -1613,20 +1729,30 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
1613 if (f2fs_readonly(sbi->sb)) 1729 if (f2fs_readonly(sbi->sb))
1614 return -EROFS; 1730 return -EROFS;
1615 1731
1732 ret = mnt_want_write_file(filp);
1733 if (ret)
1734 return ret;
1735
1616 if (!sync) { 1736 if (!sync) {
1617 if (!mutex_trylock(&sbi->gc_mutex)) 1737 if (!mutex_trylock(&sbi->gc_mutex)) {
1618 return -EBUSY; 1738 ret = -EBUSY;
1739 goto out;
1740 }
1619 } else { 1741 } else {
1620 mutex_lock(&sbi->gc_mutex); 1742 mutex_lock(&sbi->gc_mutex);
1621 } 1743 }
1622 1744
1623 return f2fs_gc(sbi, sync); 1745 ret = f2fs_gc(sbi, sync);
1746out:
1747 mnt_drop_write_file(filp);
1748 return ret;
1624} 1749}
1625 1750
1626static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) 1751static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
1627{ 1752{
1628 struct inode *inode = file_inode(filp); 1753 struct inode *inode = file_inode(filp);
1629 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1754 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1755 int ret;
1630 1756
1631 if (!capable(CAP_SYS_ADMIN)) 1757 if (!capable(CAP_SYS_ADMIN))
1632 return -EPERM; 1758 return -EPERM;
@@ -1634,7 +1760,14 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
1634 if (f2fs_readonly(sbi->sb)) 1760 if (f2fs_readonly(sbi->sb))
1635 return -EROFS; 1761 return -EROFS;
1636 1762
1637 return f2fs_sync_fs(sbi->sb, 1); 1763 ret = mnt_want_write_file(filp);
1764 if (ret)
1765 return ret;
1766
1767 ret = f2fs_sync_fs(sbi->sb, 1);
1768
1769 mnt_drop_write_file(filp);
1770 return ret;
1638} 1771}
1639 1772
1640static int f2fs_defragment_range(struct f2fs_sb_info *sbi, 1773static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index b0051a97824c..38d56f678912 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -96,7 +96,7 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
96 dev_t dev = sbi->sb->s_bdev->bd_dev; 96 dev_t dev = sbi->sb->s_bdev->bd_dev;
97 int err = 0; 97 int err = 0;
98 98
99 gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); 99 gc_th = f2fs_kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
100 if (!gc_th) { 100 if (!gc_th) {
101 err = -ENOMEM; 101 err = -ENOMEM;
102 goto out; 102 goto out;
@@ -465,15 +465,7 @@ next_step:
465 continue; 465 continue;
466 } 466 }
467 467
468 /* set page dirty and write it */ 468 move_node_page(node_page, gc_type);
469 if (gc_type == FG_GC) {
470 f2fs_wait_on_page_writeback(node_page, NODE, true);
471 set_page_dirty(node_page);
472 } else {
473 if (!PageWriteback(node_page))
474 set_page_dirty(node_page);
475 }
476 f2fs_put_page(node_page, 1);
477 stat_inc_node_blk_count(sbi, 1, gc_type); 469 stat_inc_node_blk_count(sbi, 1, gc_type);
478 } 470 }
479 471
@@ -834,18 +826,9 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
834 f2fs_put_page(sum_page, 0); 826 f2fs_put_page(sum_page, 0);
835 } 827 }
836 828
837 if (gc_type == FG_GC) { 829 if (gc_type == FG_GC)
838 if (type == SUM_TYPE_NODE) { 830 f2fs_submit_merged_bio(sbi,
839 struct writeback_control wbc = { 831 (type == SUM_TYPE_NODE) ? NODE : DATA, WRITE);
840 .sync_mode = WB_SYNC_ALL,
841 .nr_to_write = LONG_MAX,
842 .for_reclaim = 0,
843 };
844 sync_node_pages(sbi, 0, &wbc);
845 } else {
846 f2fs_submit_merged_bio(sbi, DATA, WRITE);
847 }
848 }
849 832
850 blk_finish_plug(&plug); 833 blk_finish_plug(&plug);
851 834
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index a2fbe6f427d3..a4bb155dd00a 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -161,7 +161,7 @@ int f2fs_convert_inline_inode(struct inode *inode)
161 if (!f2fs_has_inline_data(inode)) 161 if (!f2fs_has_inline_data(inode))
162 return 0; 162 return 0;
163 163
164 page = grab_cache_page(inode->i_mapping, 0); 164 page = f2fs_grab_cache_page(inode->i_mapping, 0, false);
165 if (!page) 165 if (!page)
166 return -ENOMEM; 166 return -ENOMEM;
167 167
@@ -303,11 +303,6 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
303 else 303 else
304 f2fs_put_page(ipage, 0); 304 f2fs_put_page(ipage, 0);
305 305
306 /*
307 * For the most part, it should be a bug when name_len is zero.
308 * We stop here for figuring out where the bugs has occurred.
309 */
310 f2fs_bug_on(sbi, d.max < 0);
311 return de; 306 return de;
312} 307}
313 308
@@ -355,7 +350,7 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent,
355 * NOTE: ipage is grabbed by caller, but if any error occurs, we should 350 * NOTE: ipage is grabbed by caller, but if any error occurs, we should
356 * release ipage in this function. 351 * release ipage in this function.
357 */ 352 */
358static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, 353static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
359 struct f2fs_inline_dentry *inline_dentry) 354 struct f2fs_inline_dentry *inline_dentry)
360{ 355{
361 struct page *page; 356 struct page *page;
@@ -363,7 +358,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
363 struct f2fs_dentry_block *dentry_blk; 358 struct f2fs_dentry_block *dentry_blk;
364 int err; 359 int err;
365 360
366 page = grab_cache_page(dir->i_mapping, 0); 361 page = f2fs_grab_cache_page(dir->i_mapping, 0, false);
367 if (!page) { 362 if (!page) {
368 f2fs_put_page(ipage, 1); 363 f2fs_put_page(ipage, 1);
369 return -ENOMEM; 364 return -ENOMEM;
@@ -405,6 +400,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
405 stat_dec_inline_dir(dir); 400 stat_dec_inline_dir(dir);
406 clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); 401 clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY);
407 402
403 F2FS_I(dir)->i_current_depth = 1;
408 if (i_size_read(dir) < PAGE_SIZE) { 404 if (i_size_read(dir) < PAGE_SIZE) {
409 i_size_write(dir, PAGE_SIZE); 405 i_size_write(dir, PAGE_SIZE);
410 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); 406 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
@@ -416,6 +412,105 @@ out:
416 return err; 412 return err;
417} 413}
418 414
415static int f2fs_add_inline_entries(struct inode *dir,
416 struct f2fs_inline_dentry *inline_dentry)
417{
418 struct f2fs_dentry_ptr d;
419 unsigned long bit_pos = 0;
420 int err = 0;
421
422 make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2);
423
424 while (bit_pos < d.max) {
425 struct f2fs_dir_entry *de;
426 struct qstr new_name;
427 nid_t ino;
428 umode_t fake_mode;
429
430 if (!test_bit_le(bit_pos, d.bitmap)) {
431 bit_pos++;
432 continue;
433 }
434
435 de = &d.dentry[bit_pos];
436
437 if (unlikely(!de->name_len)) {
438 bit_pos++;
439 continue;
440 }
441
442 new_name.name = d.filename[bit_pos];
443 new_name.len = de->name_len;
444
445 ino = le32_to_cpu(de->ino);
446 fake_mode = get_de_type(de) << S_SHIFT;
447
448 err = f2fs_add_regular_entry(dir, &new_name, NULL,
449 ino, fake_mode);
450 if (err)
451 goto punch_dentry_pages;
452
453 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
454 }
455 return 0;
456punch_dentry_pages:
457 truncate_inode_pages(&dir->i_data, 0);
458 truncate_blocks(dir, 0, false);
459 remove_dirty_inode(dir);
460 return err;
461}
462
463static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
464 struct f2fs_inline_dentry *inline_dentry)
465{
466 struct f2fs_inline_dentry *backup_dentry;
467 struct f2fs_inode_info *fi = F2FS_I(dir);
468 int err;
469
470 backup_dentry = f2fs_kmalloc(sizeof(struct f2fs_inline_dentry),
471 GFP_F2FS_ZERO);
472 if (!backup_dentry) {
473 f2fs_put_page(ipage, 1);
474 return -ENOMEM;
475 }
476
477 memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA);
478 truncate_inline_inode(ipage, 0);
479
480 unlock_page(ipage);
481
482 err = f2fs_add_inline_entries(dir, backup_dentry);
483 if (err)
484 goto recover;
485
486 lock_page(ipage);
487
488 stat_dec_inline_dir(dir);
489 clear_inode_flag(fi, FI_INLINE_DENTRY);
490 update_inode(dir, ipage);
491 kfree(backup_dentry);
492 return 0;
493recover:
494 lock_page(ipage);
495 memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA);
496 fi->i_current_depth = 0;
497 i_size_write(dir, MAX_INLINE_DATA);
498 update_inode(dir, ipage);
499 f2fs_put_page(ipage, 1);
500
501 kfree(backup_dentry);
502 return err;
503}
504
505static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
506 struct f2fs_inline_dentry *inline_dentry)
507{
508 if (!F2FS_I(dir)->i_dir_level)
509 return f2fs_move_inline_dirents(dir, ipage, inline_dentry);
510 else
511 return f2fs_move_rehashed_dirents(dir, ipage, inline_dentry);
512}
513
419int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, 514int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
420 struct inode *inode, nid_t ino, umode_t mode) 515 struct inode *inode, nid_t ino, umode_t mode)
421{ 516{
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index cb269c46ac25..2e68adab0d64 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -283,7 +283,7 @@ retry:
283 cond_resched(); 283 cond_resched();
284 goto retry; 284 goto retry;
285 } else if (err != -ENOENT) { 285 } else if (err != -ENOENT) {
286 f2fs_stop_checkpoint(sbi); 286 f2fs_stop_checkpoint(sbi, false);
287 } 287 }
288 return 0; 288 return 0;
289 } 289 }
@@ -344,7 +344,7 @@ void f2fs_evict_inode(struct inode *inode)
344 sb_start_intwrite(inode->i_sb); 344 sb_start_intwrite(inode->i_sb);
345 set_inode_flag(fi, FI_NO_ALLOC); 345 set_inode_flag(fi, FI_NO_ALLOC);
346 i_size_write(inode, 0); 346 i_size_write(inode, 0);
347 347retry:
348 if (F2FS_HAS_BLOCKS(inode)) 348 if (F2FS_HAS_BLOCKS(inode))
349 err = f2fs_truncate(inode, true); 349 err = f2fs_truncate(inode, true);
350 350
@@ -354,6 +354,12 @@ void f2fs_evict_inode(struct inode *inode)
354 f2fs_unlock_op(sbi); 354 f2fs_unlock_op(sbi);
355 } 355 }
356 356
357 /* give more chances, if ENOMEM case */
358 if (err == -ENOMEM) {
359 err = 0;
360 goto retry;
361 }
362
357 sb_end_intwrite(inode->i_sb); 363 sb_end_intwrite(inode->i_sb);
358no_delete: 364no_delete:
359 stat_dec_inline_xattr(inode); 365 stat_dec_inline_xattr(inode);
@@ -368,26 +374,11 @@ no_delete:
368 if (is_inode_flag_set(fi, FI_UPDATE_WRITE)) 374 if (is_inode_flag_set(fi, FI_UPDATE_WRITE))
369 add_ino_entry(sbi, inode->i_ino, UPDATE_INO); 375 add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
370 if (is_inode_flag_set(fi, FI_FREE_NID)) { 376 if (is_inode_flag_set(fi, FI_FREE_NID)) {
371 if (err && err != -ENOENT) 377 alloc_nid_failed(sbi, inode->i_ino);
372 alloc_nid_done(sbi, inode->i_ino);
373 else
374 alloc_nid_failed(sbi, inode->i_ino);
375 clear_inode_flag(fi, FI_FREE_NID); 378 clear_inode_flag(fi, FI_FREE_NID);
376 } 379 }
377 380 f2fs_bug_on(sbi, err &&
378 if (err && err != -ENOENT) { 381 !exist_written_data(sbi, inode->i_ino, ORPHAN_INO));
379 if (!exist_written_data(sbi, inode->i_ino, ORPHAN_INO)) {
380 /*
381 * get here because we failed to release resource
382 * of inode previously, reminder our user to run fsck
383 * for fixing.
384 */
385 set_sbi_flag(sbi, SBI_NEED_FSCK);
386 f2fs_msg(sbi->sb, KERN_WARNING,
387 "inode (ino:%lu) resource leak, run fsck "
388 "to fix this issue!", inode->i_ino);
389 }
390 }
391out_clear: 382out_clear:
392 fscrypt_put_encryption_info(inode, NULL); 383 fscrypt_put_encryption_info(inode, NULL);
393 clear_inode(inode); 384 clear_inode(inode);
@@ -397,37 +388,32 @@ out_clear:
397void handle_failed_inode(struct inode *inode) 388void handle_failed_inode(struct inode *inode)
398{ 389{
399 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 390 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
400 int err = 0; 391 struct node_info ni;
401 392
402 clear_nlink(inode); 393 /* don't make bad inode, since it becomes a regular file. */
403 make_bad_inode(inode);
404 unlock_new_inode(inode); 394 unlock_new_inode(inode);
405 395
406 i_size_write(inode, 0);
407 if (F2FS_HAS_BLOCKS(inode))
408 err = f2fs_truncate(inode, false);
409
410 if (!err)
411 err = remove_inode_page(inode);
412
413 /* 396 /*
414 * if we skip truncate_node in remove_inode_page bacause we failed
415 * before, it's better to find another way to release resource of
416 * this inode (e.g. valid block count, node block or nid). Here we
417 * choose to add this inode to orphan list, so that we can call iput
418 * for releasing in orphan recovery flow.
419 *
420 * Note: we should add inode to orphan list before f2fs_unlock_op() 397 * Note: we should add inode to orphan list before f2fs_unlock_op()
421 * so we can prevent losing this orphan when encoutering checkpoint 398 * so we can prevent losing this orphan when encoutering checkpoint
422 * and following suddenly power-off. 399 * and following suddenly power-off.
423 */ 400 */
424 if (err && err != -ENOENT) { 401 get_node_info(sbi, inode->i_ino, &ni);
425 err = acquire_orphan_inode(sbi); 402
426 if (!err) 403 if (ni.blk_addr != NULL_ADDR) {
404 int err = acquire_orphan_inode(sbi);
405 if (err) {
406 set_sbi_flag(sbi, SBI_NEED_FSCK);
407 f2fs_msg(sbi->sb, KERN_WARNING,
408 "Too many orphan inodes, run fsck to fix.");
409 } else {
427 add_orphan_inode(sbi, inode->i_ino); 410 add_orphan_inode(sbi, inode->i_ino);
411 }
412 alloc_nid_done(sbi, inode->i_ino);
413 } else {
414 set_inode_flag(F2FS_I(inode), FI_FREE_NID);
428 } 415 }
429 416
430 set_inode_flag(F2FS_I(inode), FI_FREE_NID);
431 f2fs_unlock_op(sbi); 417 f2fs_unlock_op(sbi);
432 418
433 /* iput will drop the inode object */ 419 /* iput will drop the inode object */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 1a33de9d84b1..1f21aae80c40 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -407,6 +407,29 @@ cache:
407 up_write(&nm_i->nat_tree_lock); 407 up_write(&nm_i->nat_tree_lock);
408} 408}
409 409
410/*
411 * readahead MAX_RA_NODE number of node pages.
412 */
413static void ra_node_pages(struct page *parent, int start, int n)
414{
415 struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
416 struct blk_plug plug;
417 int i, end;
418 nid_t nid;
419
420 blk_start_plug(&plug);
421
422 /* Then, try readahead for siblings of the desired node */
423 end = start + n;
424 end = min(end, NIDS_PER_BLOCK);
425 for (i = start; i < end; i++) {
426 nid = get_nid(parent, i, false);
427 ra_node_page(sbi, nid);
428 }
429
430 blk_finish_plug(&plug);
431}
432
410pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs) 433pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
411{ 434{
412 const long direct_index = ADDRS_PER_INODE(dn->inode); 435 const long direct_index = ADDRS_PER_INODE(dn->inode);
@@ -707,6 +730,8 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
707 return PTR_ERR(page); 730 return PTR_ERR(page);
708 } 731 }
709 732
733 ra_node_pages(page, ofs, NIDS_PER_BLOCK);
734
710 rn = F2FS_NODE(page); 735 rn = F2FS_NODE(page);
711 if (depth < 3) { 736 if (depth < 3) {
712 for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { 737 for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
@@ -784,6 +809,8 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
784 nid[i + 1] = get_nid(pages[i], offset[i + 1], false); 809 nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
785 } 810 }
786 811
812 ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK);
813
787 /* free direct nodes linked to a partial indirect node */ 814 /* free direct nodes linked to a partial indirect node */
788 for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) { 815 for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
789 child_nid = get_nid(pages[idx], i, false); 816 child_nid = get_nid(pages[idx], i, false);
@@ -832,7 +859,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
832 trace_f2fs_truncate_inode_blocks_enter(inode, from); 859 trace_f2fs_truncate_inode_blocks_enter(inode, from);
833 860
834 level = get_node_path(inode, from, offset, noffset); 861 level = get_node_path(inode, from, offset, noffset);
835restart: 862
836 page = get_node_page(sbi, inode->i_ino); 863 page = get_node_page(sbi, inode->i_ino);
837 if (IS_ERR(page)) { 864 if (IS_ERR(page)) {
838 trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page)); 865 trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page));
@@ -896,10 +923,7 @@ skip_partial:
896 if (offset[1] == 0 && 923 if (offset[1] == 0 &&
897 ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { 924 ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
898 lock_page(page); 925 lock_page(page);
899 if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 926 BUG_ON(page->mapping != NODE_MAPPING(sbi));
900 f2fs_put_page(page, 1);
901 goto restart;
902 }
903 f2fs_wait_on_page_writeback(page, NODE, true); 927 f2fs_wait_on_page_writeback(page, NODE, true);
904 ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; 928 ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
905 set_page_dirty(page); 929 set_page_dirty(page);
@@ -998,7 +1022,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
998 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 1022 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
999 return ERR_PTR(-EPERM); 1023 return ERR_PTR(-EPERM);
1000 1024
1001 page = grab_cache_page(NODE_MAPPING(sbi), dn->nid); 1025 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false);
1002 if (!page) 1026 if (!page)
1003 return ERR_PTR(-ENOMEM); 1027 return ERR_PTR(-ENOMEM);
1004 1028
@@ -1090,7 +1114,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
1090 if (apage) 1114 if (apage)
1091 return; 1115 return;
1092 1116
1093 apage = grab_cache_page(NODE_MAPPING(sbi), nid); 1117 apage = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
1094 if (!apage) 1118 if (!apage)
1095 return; 1119 return;
1096 1120
@@ -1098,29 +1122,6 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
1098 f2fs_put_page(apage, err ? 1 : 0); 1122 f2fs_put_page(apage, err ? 1 : 0);
1099} 1123}
1100 1124
1101/*
1102 * readahead MAX_RA_NODE number of node pages.
1103 */
1104static void ra_node_pages(struct page *parent, int start)
1105{
1106 struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
1107 struct blk_plug plug;
1108 int i, end;
1109 nid_t nid;
1110
1111 blk_start_plug(&plug);
1112
1113 /* Then, try readahead for siblings of the desired node */
1114 end = start + MAX_RA_NODE;
1115 end = min(end, NIDS_PER_BLOCK);
1116 for (i = start; i < end; i++) {
1117 nid = get_nid(parent, i, false);
1118 ra_node_page(sbi, nid);
1119 }
1120
1121 blk_finish_plug(&plug);
1122}
1123
1124static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, 1125static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
1125 struct page *parent, int start) 1126 struct page *parent, int start)
1126{ 1127{
@@ -1131,7 +1132,7 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
1131 return ERR_PTR(-ENOENT); 1132 return ERR_PTR(-ENOENT);
1132 f2fs_bug_on(sbi, check_nid_range(sbi, nid)); 1133 f2fs_bug_on(sbi, check_nid_range(sbi, nid));
1133repeat: 1134repeat:
1134 page = grab_cache_page(NODE_MAPPING(sbi), nid); 1135 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
1135 if (!page) 1136 if (!page)
1136 return ERR_PTR(-ENOMEM); 1137 return ERR_PTR(-ENOMEM);
1137 1138
@@ -1144,7 +1145,7 @@ repeat:
1144 } 1145 }
1145 1146
1146 if (parent) 1147 if (parent)
1147 ra_node_pages(parent, start + 1); 1148 ra_node_pages(parent, start + 1, MAX_RA_NODE);
1148 1149
1149 lock_page(page); 1150 lock_page(page);
1150 1151
@@ -1196,19 +1197,17 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
1196{ 1197{
1197 struct inode *inode; 1198 struct inode *inode;
1198 struct page *page; 1199 struct page *page;
1200 int ret;
1199 1201
1200 /* should flush inline_data before evict_inode */ 1202 /* should flush inline_data before evict_inode */
1201 inode = ilookup(sbi->sb, ino); 1203 inode = ilookup(sbi->sb, ino);
1202 if (!inode) 1204 if (!inode)
1203 return; 1205 return;
1204 1206
1205 page = pagecache_get_page(inode->i_mapping, 0, FGP_NOWAIT, 0); 1207 page = pagecache_get_page(inode->i_mapping, 0, FGP_LOCK|FGP_NOWAIT, 0);
1206 if (!page) 1208 if (!page)
1207 goto iput_out; 1209 goto iput_out;
1208 1210
1209 if (!trylock_page(page))
1210 goto release_out;
1211
1212 if (!PageUptodate(page)) 1211 if (!PageUptodate(page))
1213 goto page_out; 1212 goto page_out;
1214 1213
@@ -1218,24 +1217,214 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
1218 if (!clear_page_dirty_for_io(page)) 1217 if (!clear_page_dirty_for_io(page))
1219 goto page_out; 1218 goto page_out;
1220 1219
1221 if (!f2fs_write_inline_data(inode, page)) 1220 ret = f2fs_write_inline_data(inode, page);
1222 inode_dec_dirty_pages(inode); 1221 inode_dec_dirty_pages(inode);
1223 else 1222 if (ret)
1224 set_page_dirty(page); 1223 set_page_dirty(page);
1225page_out: 1224page_out:
1226 unlock_page(page); 1225 f2fs_put_page(page, 1);
1227release_out:
1228 f2fs_put_page(page, 0);
1229iput_out: 1226iput_out:
1230 iput(inode); 1227 iput(inode);
1231} 1228}
1232 1229
1233int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, 1230void move_node_page(struct page *node_page, int gc_type)
1234 struct writeback_control *wbc) 1231{
1232 if (gc_type == FG_GC) {
1233 struct f2fs_sb_info *sbi = F2FS_P_SB(node_page);
1234 struct writeback_control wbc = {
1235 .sync_mode = WB_SYNC_ALL,
1236 .nr_to_write = 1,
1237 .for_reclaim = 0,
1238 };
1239
1240 set_page_dirty(node_page);
1241 f2fs_wait_on_page_writeback(node_page, NODE, true);
1242
1243 f2fs_bug_on(sbi, PageWriteback(node_page));
1244 if (!clear_page_dirty_for_io(node_page))
1245 goto out_page;
1246
1247 if (NODE_MAPPING(sbi)->a_ops->writepage(node_page, &wbc))
1248 unlock_page(node_page);
1249 goto release_page;
1250 } else {
1251 /* set page dirty and write it */
1252 if (!PageWriteback(node_page))
1253 set_page_dirty(node_page);
1254 }
1255out_page:
1256 unlock_page(node_page);
1257release_page:
1258 f2fs_put_page(node_page, 0);
1259}
1260
1261static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
1235{ 1262{
1236 pgoff_t index, end; 1263 pgoff_t index, end;
1237 struct pagevec pvec; 1264 struct pagevec pvec;
1238 int step = ino ? 2 : 0; 1265 struct page *last_page = NULL;
1266
1267 pagevec_init(&pvec, 0);
1268 index = 0;
1269 end = ULONG_MAX;
1270
1271 while (index <= end) {
1272 int i, nr_pages;
1273 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1274 PAGECACHE_TAG_DIRTY,
1275 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1276 if (nr_pages == 0)
1277 break;
1278
1279 for (i = 0; i < nr_pages; i++) {
1280 struct page *page = pvec.pages[i];
1281
1282 if (unlikely(f2fs_cp_error(sbi))) {
1283 f2fs_put_page(last_page, 0);
1284 pagevec_release(&pvec);
1285 return ERR_PTR(-EIO);
1286 }
1287
1288 if (!IS_DNODE(page) || !is_cold_node(page))
1289 continue;
1290 if (ino_of_node(page) != ino)
1291 continue;
1292
1293 lock_page(page);
1294
1295 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1296continue_unlock:
1297 unlock_page(page);
1298 continue;
1299 }
1300 if (ino_of_node(page) != ino)
1301 goto continue_unlock;
1302
1303 if (!PageDirty(page)) {
1304 /* someone wrote it for us */
1305 goto continue_unlock;
1306 }
1307
1308 if (last_page)
1309 f2fs_put_page(last_page, 0);
1310
1311 get_page(page);
1312 last_page = page;
1313 unlock_page(page);
1314 }
1315 pagevec_release(&pvec);
1316 cond_resched();
1317 }
1318 return last_page;
1319}
1320
1321int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
1322 struct writeback_control *wbc, bool atomic)
1323{
1324 pgoff_t index, end;
1325 struct pagevec pvec;
1326 int ret = 0;
1327 struct page *last_page = NULL;
1328 bool marked = false;
1329
1330 if (atomic) {
1331 last_page = last_fsync_dnode(sbi, ino);
1332 if (IS_ERR_OR_NULL(last_page))
1333 return PTR_ERR_OR_ZERO(last_page);
1334 }
1335retry:
1336 pagevec_init(&pvec, 0);
1337 index = 0;
1338 end = ULONG_MAX;
1339
1340 while (index <= end) {
1341 int i, nr_pages;
1342 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1343 PAGECACHE_TAG_DIRTY,
1344 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1345 if (nr_pages == 0)
1346 break;
1347
1348 for (i = 0; i < nr_pages; i++) {
1349 struct page *page = pvec.pages[i];
1350
1351 if (unlikely(f2fs_cp_error(sbi))) {
1352 f2fs_put_page(last_page, 0);
1353 pagevec_release(&pvec);
1354 return -EIO;
1355 }
1356
1357 if (!IS_DNODE(page) || !is_cold_node(page))
1358 continue;
1359 if (ino_of_node(page) != ino)
1360 continue;
1361
1362 lock_page(page);
1363
1364 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1365continue_unlock:
1366 unlock_page(page);
1367 continue;
1368 }
1369 if (ino_of_node(page) != ino)
1370 goto continue_unlock;
1371
1372 if (!PageDirty(page) && page != last_page) {
1373 /* someone wrote it for us */
1374 goto continue_unlock;
1375 }
1376
1377 f2fs_wait_on_page_writeback(page, NODE, true);
1378 BUG_ON(PageWriteback(page));
1379
1380 if (!atomic || page == last_page) {
1381 set_fsync_mark(page, 1);
1382 if (IS_INODE(page))
1383 set_dentry_mark(page,
1384 need_dentry_mark(sbi, ino));
1385 /* may be written by other thread */
1386 if (!PageDirty(page))
1387 set_page_dirty(page);
1388 }
1389
1390 if (!clear_page_dirty_for_io(page))
1391 goto continue_unlock;
1392
1393 ret = NODE_MAPPING(sbi)->a_ops->writepage(page, wbc);
1394 if (ret) {
1395 unlock_page(page);
1396 f2fs_put_page(last_page, 0);
1397 break;
1398 }
1399 if (page == last_page) {
1400 f2fs_put_page(page, 0);
1401 marked = true;
1402 break;
1403 }
1404 }
1405 pagevec_release(&pvec);
1406 cond_resched();
1407
1408 if (ret || marked)
1409 break;
1410 }
1411 if (!ret && atomic && !marked) {
1412 f2fs_msg(sbi->sb, KERN_DEBUG,
1413 "Retry to write fsync mark: ino=%u, idx=%lx",
1414 ino, last_page->index);
1415 lock_page(last_page);
1416 set_page_dirty(last_page);
1417 unlock_page(last_page);
1418 goto retry;
1419 }
1420 return ret ? -EIO: 0;
1421}
1422
1423int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc)
1424{
1425 pgoff_t index, end;
1426 struct pagevec pvec;
1427 int step = 0;
1239 int nwritten = 0; 1428 int nwritten = 0;
1240 1429
1241 pagevec_init(&pvec, 0); 1430 pagevec_init(&pvec, 0);
@@ -1274,15 +1463,8 @@ next_step:
1274 if (step == 2 && (!IS_DNODE(page) || 1463 if (step == 2 && (!IS_DNODE(page) ||
1275 !is_cold_node(page))) 1464 !is_cold_node(page)))
1276 continue; 1465 continue;
1277
1278 /*
1279 * If an fsync mode,
1280 * we should not skip writing node pages.
1281 */
1282lock_node: 1466lock_node:
1283 if (ino && ino_of_node(page) == ino) 1467 if (!trylock_page(page))
1284 lock_page(page);
1285 else if (!trylock_page(page))
1286 continue; 1468 continue;
1287 1469
1288 if (unlikely(page->mapping != NODE_MAPPING(sbi))) { 1470 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
@@ -1290,8 +1472,6 @@ continue_unlock:
1290 unlock_page(page); 1472 unlock_page(page);
1291 continue; 1473 continue;
1292 } 1474 }
1293 if (ino && ino_of_node(page) != ino)
1294 goto continue_unlock;
1295 1475
1296 if (!PageDirty(page)) { 1476 if (!PageDirty(page)) {
1297 /* someone wrote it for us */ 1477 /* someone wrote it for us */
@@ -1299,7 +1479,7 @@ continue_unlock:
1299 } 1479 }
1300 1480
1301 /* flush inline_data */ 1481 /* flush inline_data */
1302 if (!ino && is_inline_node(page)) { 1482 if (is_inline_node(page)) {
1303 clear_inline_node(page); 1483 clear_inline_node(page);
1304 unlock_page(page); 1484 unlock_page(page);
1305 flush_inline_data(sbi, ino_of_node(page)); 1485 flush_inline_data(sbi, ino_of_node(page));
@@ -1312,17 +1492,8 @@ continue_unlock:
1312 if (!clear_page_dirty_for_io(page)) 1492 if (!clear_page_dirty_for_io(page))
1313 goto continue_unlock; 1493 goto continue_unlock;
1314 1494
1315 /* called by fsync() */ 1495 set_fsync_mark(page, 0);
1316 if (ino && IS_DNODE(page)) { 1496 set_dentry_mark(page, 0);
1317 set_fsync_mark(page, 1);
1318 if (IS_INODE(page))
1319 set_dentry_mark(page,
1320 need_dentry_mark(sbi, ino));
1321 nwritten++;
1322 } else {
1323 set_fsync_mark(page, 0);
1324 set_dentry_mark(page, 0);
1325 }
1326 1497
1327 if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc)) 1498 if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc))
1328 unlock_page(page); 1499 unlock_page(page);
@@ -1470,7 +1641,7 @@ static int f2fs_write_node_pages(struct address_space *mapping,
1470 1641
1471 diff = nr_pages_to_write(sbi, NODE, wbc); 1642 diff = nr_pages_to_write(sbi, NODE, wbc);
1472 wbc->sync_mode = WB_SYNC_NONE; 1643 wbc->sync_mode = WB_SYNC_NONE;
1473 sync_node_pages(sbi, 0, wbc); 1644 sync_node_pages(sbi, wbc);
1474 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); 1645 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
1475 return 0; 1646 return 0;
1476 1647
@@ -1524,7 +1695,6 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1524 struct f2fs_nm_info *nm_i = NM_I(sbi); 1695 struct f2fs_nm_info *nm_i = NM_I(sbi);
1525 struct free_nid *i; 1696 struct free_nid *i;
1526 struct nat_entry *ne; 1697 struct nat_entry *ne;
1527 bool allocated = false;
1528 1698
1529 if (!available_free_memory(sbi, FREE_NIDS)) 1699 if (!available_free_memory(sbi, FREE_NIDS))
1530 return -1; 1700 return -1;
@@ -1538,8 +1708,6 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1538 ne = __lookup_nat_cache(nm_i, nid); 1708 ne = __lookup_nat_cache(nm_i, nid);
1539 if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || 1709 if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
1540 nat_get_blkaddr(ne) != NULL_ADDR)) 1710 nat_get_blkaddr(ne) != NULL_ADDR))
1541 allocated = true;
1542 if (allocated)
1543 return 0; 1711 return 0;
1544 } 1712 }
1545 1713
@@ -1672,6 +1840,10 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
1672 struct f2fs_nm_info *nm_i = NM_I(sbi); 1840 struct f2fs_nm_info *nm_i = NM_I(sbi);
1673 struct free_nid *i = NULL; 1841 struct free_nid *i = NULL;
1674retry: 1842retry:
1843#ifdef CONFIG_F2FS_FAULT_INJECTION
1844 if (time_to_inject(FAULT_ALLOC_NID))
1845 return false;
1846#endif
1675 if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids)) 1847 if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids))
1676 return false; 1848 return false;
1677 1849
@@ -1846,7 +2018,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1846 if (unlikely(old_ni.blk_addr != NULL_ADDR)) 2018 if (unlikely(old_ni.blk_addr != NULL_ADDR))
1847 return -EINVAL; 2019 return -EINVAL;
1848 2020
1849 ipage = grab_cache_page(NODE_MAPPING(sbi), ino); 2021 ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false);
1850 if (!ipage) 2022 if (!ipage)
1851 return -ENOMEM; 2023 return -ENOMEM;
1852 2024
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 011942f94d64..3d7216d7a288 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -49,8 +49,9 @@ static struct kmem_cache *fsync_entry_slab;
49 49
50bool space_for_roll_forward(struct f2fs_sb_info *sbi) 50bool space_for_roll_forward(struct f2fs_sb_info *sbi)
51{ 51{
52 if (sbi->last_valid_block_count + sbi->alloc_valid_block_count 52 s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count);
53 > sbi->user_block_count) 53
54 if (sbi->last_valid_block_count + nalloc > sbi->user_block_count)
54 return false; 55 return false;
55 return true; 56 return true;
56} 57}
@@ -67,7 +68,30 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
67 return NULL; 68 return NULL;
68} 69}
69 70
70static int recover_dentry(struct inode *inode, struct page *ipage) 71static struct fsync_inode_entry *add_fsync_inode(struct list_head *head,
72 struct inode *inode)
73{
74 struct fsync_inode_entry *entry;
75
76 entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
77 if (!entry)
78 return NULL;
79
80 entry->inode = inode;
81 list_add_tail(&entry->list, head);
82
83 return entry;
84}
85
86static void del_fsync_inode(struct fsync_inode_entry *entry)
87{
88 iput(entry->inode);
89 list_del(&entry->list);
90 kmem_cache_free(fsync_entry_slab, entry);
91}
92
93static int recover_dentry(struct inode *inode, struct page *ipage,
94 struct list_head *dir_list)
71{ 95{
72 struct f2fs_inode *raw_inode = F2FS_INODE(ipage); 96 struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
73 nid_t pino = le32_to_cpu(raw_inode->i_pino); 97 nid_t pino = le32_to_cpu(raw_inode->i_pino);
@@ -75,18 +99,29 @@ static int recover_dentry(struct inode *inode, struct page *ipage)
75 struct qstr name; 99 struct qstr name;
76 struct page *page; 100 struct page *page;
77 struct inode *dir, *einode; 101 struct inode *dir, *einode;
102 struct fsync_inode_entry *entry;
78 int err = 0; 103 int err = 0;
79 104
80 dir = f2fs_iget(inode->i_sb, pino); 105 entry = get_fsync_inode(dir_list, pino);
81 if (IS_ERR(dir)) { 106 if (!entry) {
82 err = PTR_ERR(dir); 107 dir = f2fs_iget(inode->i_sb, pino);
83 goto out; 108 if (IS_ERR(dir)) {
109 err = PTR_ERR(dir);
110 goto out;
111 }
112
113 entry = add_fsync_inode(dir_list, dir);
114 if (!entry) {
115 err = -ENOMEM;
116 iput(dir);
117 goto out;
118 }
84 } 119 }
85 120
86 if (file_enc_name(inode)) { 121 dir = entry->inode;
87 iput(dir); 122
123 if (file_enc_name(inode))
88 return 0; 124 return 0;
89 }
90 125
91 name.len = le32_to_cpu(raw_inode->i_namelen); 126 name.len = le32_to_cpu(raw_inode->i_namelen);
92 name.name = raw_inode->i_name; 127 name.name = raw_inode->i_name;
@@ -94,7 +129,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage)
94 if (unlikely(name.len > F2FS_NAME_LEN)) { 129 if (unlikely(name.len > F2FS_NAME_LEN)) {
95 WARN_ON(1); 130 WARN_ON(1);
96 err = -ENAMETOOLONG; 131 err = -ENAMETOOLONG;
97 goto out_err; 132 goto out;
98 } 133 }
99retry: 134retry:
100 de = f2fs_find_entry(dir, &name, &page); 135 de = f2fs_find_entry(dir, &name, &page);
@@ -120,23 +155,12 @@ retry:
120 goto retry; 155 goto retry;
121 } 156 }
122 err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode); 157 err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode);
123 if (err)
124 goto out_err;
125
126 if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
127 iput(dir);
128 } else {
129 add_dirty_dir_inode(dir);
130 set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
131 }
132 158
133 goto out; 159 goto out;
134 160
135out_unmap_put: 161out_unmap_put:
136 f2fs_dentry_kunmap(dir, page); 162 f2fs_dentry_kunmap(dir, page);
137 f2fs_put_page(page, 0); 163 f2fs_put_page(page, 0);
138out_err:
139 iput(dir);
140out: 164out:
141 f2fs_msg(inode->i_sb, KERN_NOTICE, 165 f2fs_msg(inode->i_sb, KERN_NOTICE,
142 "%s: ino = %x, name = %s, dir = %lx, err = %d", 166 "%s: ino = %x, name = %s, dir = %lx, err = %d",
@@ -198,6 +222,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
198{ 222{
199 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); 223 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
200 struct curseg_info *curseg; 224 struct curseg_info *curseg;
225 struct inode *inode;
201 struct page *page = NULL; 226 struct page *page = NULL;
202 block_t blkaddr; 227 block_t blkaddr;
203 int err = 0; 228 int err = 0;
@@ -206,8 +231,6 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
206 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 231 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
207 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 232 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
208 233
209 ra_meta_pages(sbi, blkaddr, 1, META_POR, true);
210
211 while (1) { 234 while (1) {
212 struct fsync_inode_entry *entry; 235 struct fsync_inode_entry *entry;
213 236
@@ -233,35 +256,32 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
233 break; 256 break;
234 } 257 }
235 258
236 /* add this fsync inode to the list */
237 entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
238 if (!entry) {
239 err = -ENOMEM;
240 break;
241 }
242 /* 259 /*
243 * CP | dnode(F) | inode(DF) 260 * CP | dnode(F) | inode(DF)
244 * For this case, we should not give up now. 261 * For this case, we should not give up now.
245 */ 262 */
246 entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); 263 inode = f2fs_iget(sbi->sb, ino_of_node(page));
247 if (IS_ERR(entry->inode)) { 264 if (IS_ERR(inode)) {
248 err = PTR_ERR(entry->inode); 265 err = PTR_ERR(inode);
249 kmem_cache_free(fsync_entry_slab, entry);
250 if (err == -ENOENT) { 266 if (err == -ENOENT) {
251 err = 0; 267 err = 0;
252 goto next; 268 goto next;
253 } 269 }
254 break; 270 break;
255 } 271 }
256 list_add_tail(&entry->list, head); 272
273 /* add this fsync inode to the list */
274 entry = add_fsync_inode(head, inode);
275 if (!entry) {
276 err = -ENOMEM;
277 iput(inode);
278 break;
279 }
257 } 280 }
258 entry->blkaddr = blkaddr; 281 entry->blkaddr = blkaddr;
259 282
260 if (IS_INODE(page)) { 283 if (IS_INODE(page) && is_dent_dnode(page))
261 entry->last_inode = blkaddr; 284 entry->last_dentry = blkaddr;
262 if (is_dent_dnode(page))
263 entry->last_dentry = blkaddr;
264 }
265next: 285next:
266 /* check next segment */ 286 /* check next segment */
267 blkaddr = next_blkaddr_of_node(page); 287 blkaddr = next_blkaddr_of_node(page);
@@ -277,11 +297,8 @@ static void destroy_fsync_dnodes(struct list_head *head)
277{ 297{
278 struct fsync_inode_entry *entry, *tmp; 298 struct fsync_inode_entry *entry, *tmp;
279 299
280 list_for_each_entry_safe(entry, tmp, head, list) { 300 list_for_each_entry_safe(entry, tmp, head, list)
281 iput(entry->inode); 301 del_fsync_inode(entry);
282 list_del(&entry->list);
283 kmem_cache_free(fsync_entry_slab, entry);
284 }
285} 302}
286 303
287static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, 304static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
@@ -444,8 +461,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
444 */ 461 */
445 if (dest == NEW_ADDR) { 462 if (dest == NEW_ADDR) {
446 truncate_data_blocks_range(&dn, 1); 463 truncate_data_blocks_range(&dn, 1);
447 err = reserve_new_block(&dn); 464 reserve_new_block(&dn);
448 f2fs_bug_on(sbi, err);
449 continue; 465 continue;
450 } 466 }
451 467
@@ -454,6 +470,10 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
454 470
455 if (src == NULL_ADDR) { 471 if (src == NULL_ADDR) {
456 err = reserve_new_block(&dn); 472 err = reserve_new_block(&dn);
473#ifdef CONFIG_F2FS_FAULT_INJECTION
474 while (err)
475 err = reserve_new_block(&dn);
476#endif
457 /* We should not get -ENOSPC */ 477 /* We should not get -ENOSPC */
458 f2fs_bug_on(sbi, err); 478 f2fs_bug_on(sbi, err);
459 } 479 }
@@ -486,7 +506,8 @@ out:
486 return err; 506 return err;
487} 507}
488 508
489static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head) 509static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
510 struct list_head *dir_list)
490{ 511{
491 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); 512 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
492 struct curseg_info *curseg; 513 struct curseg_info *curseg;
@@ -513,7 +534,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head)
513 break; 534 break;
514 } 535 }
515 536
516 entry = get_fsync_inode(head, ino_of_node(page)); 537 entry = get_fsync_inode(inode_list, ino_of_node(page));
517 if (!entry) 538 if (!entry)
518 goto next; 539 goto next;
519 /* 540 /*
@@ -521,10 +542,10 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head)
521 * In this case, we can lose the latest inode(x). 542 * In this case, we can lose the latest inode(x).
522 * So, call recover_inode for the inode update. 543 * So, call recover_inode for the inode update.
523 */ 544 */
524 if (entry->last_inode == blkaddr) 545 if (IS_INODE(page))
525 recover_inode(entry->inode, page); 546 recover_inode(entry->inode, page);
526 if (entry->last_dentry == blkaddr) { 547 if (entry->last_dentry == blkaddr) {
527 err = recover_dentry(entry->inode, page); 548 err = recover_dentry(entry->inode, page, dir_list);
528 if (err) { 549 if (err) {
529 f2fs_put_page(page, 1); 550 f2fs_put_page(page, 1);
530 break; 551 break;
@@ -536,11 +557,8 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head)
536 break; 557 break;
537 } 558 }
538 559
539 if (entry->blkaddr == blkaddr) { 560 if (entry->blkaddr == blkaddr)
540 iput(entry->inode); 561 del_fsync_inode(entry);
541 list_del(&entry->list);
542 kmem_cache_free(fsync_entry_slab, entry);
543 }
544next: 562next:
545 /* check next segment */ 563 /* check next segment */
546 blkaddr = next_blkaddr_of_node(page); 564 blkaddr = next_blkaddr_of_node(page);
@@ -551,12 +569,14 @@ next:
551 return err; 569 return err;
552} 570}
553 571
554int recover_fsync_data(struct f2fs_sb_info *sbi) 572int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
555{ 573{
556 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); 574 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
557 struct list_head inode_list; 575 struct list_head inode_list;
576 struct list_head dir_list;
558 block_t blkaddr; 577 block_t blkaddr;
559 int err; 578 int err;
579 int ret = 0;
560 bool need_writecp = false; 580 bool need_writecp = false;
561 581
562 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", 582 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
@@ -565,6 +585,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
565 return -ENOMEM; 585 return -ENOMEM;
566 586
567 INIT_LIST_HEAD(&inode_list); 587 INIT_LIST_HEAD(&inode_list);
588 INIT_LIST_HEAD(&dir_list);
568 589
569 /* prevent checkpoint */ 590 /* prevent checkpoint */
570 mutex_lock(&sbi->cp_mutex); 591 mutex_lock(&sbi->cp_mutex);
@@ -573,21 +594,22 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
573 594
574 /* step #1: find fsynced inode numbers */ 595 /* step #1: find fsynced inode numbers */
575 err = find_fsync_dnodes(sbi, &inode_list); 596 err = find_fsync_dnodes(sbi, &inode_list);
576 if (err) 597 if (err || list_empty(&inode_list))
577 goto out; 598 goto out;
578 599
579 if (list_empty(&inode_list)) 600 if (check_only) {
601 ret = 1;
580 goto out; 602 goto out;
603 }
581 604
582 need_writecp = true; 605 need_writecp = true;
583 606
584 /* step #2: recover data */ 607 /* step #2: recover data */
585 err = recover_data(sbi, &inode_list); 608 err = recover_data(sbi, &inode_list, &dir_list);
586 if (!err) 609 if (!err)
587 f2fs_bug_on(sbi, !list_empty(&inode_list)); 610 f2fs_bug_on(sbi, !list_empty(&inode_list));
588out: 611out:
589 destroy_fsync_dnodes(&inode_list); 612 destroy_fsync_dnodes(&inode_list);
590 kmem_cache_destroy(fsync_entry_slab);
591 613
592 /* truncate meta pages to be used by the recovery */ 614 /* truncate meta pages to be used by the recovery */
593 truncate_inode_pages_range(META_MAPPING(sbi), 615 truncate_inode_pages_range(META_MAPPING(sbi),
@@ -625,5 +647,8 @@ out:
625 } else { 647 } else {
626 mutex_unlock(&sbi->cp_mutex); 648 mutex_unlock(&sbi->cp_mutex);
627 } 649 }
628 return err; 650
651 destroy_fsync_dnodes(&dir_list);
652 kmem_cache_destroy(fsync_entry_slab);
653 return ret ? ret: err;
629} 654}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 540669d6978e..2e6f537a0e7d 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -223,9 +223,11 @@ static int __revoke_inmem_pages(struct inode *inode,
223 f2fs_put_dnode(&dn); 223 f2fs_put_dnode(&dn);
224 } 224 }
225next: 225next:
226 ClearPageUptodate(page); 226 /* we don't need to invalidate this in the sccessful status */
227 if (drop || recover)
228 ClearPageUptodate(page);
227 set_page_private(page, 0); 229 set_page_private(page, 0);
228 ClearPageUptodate(page); 230 ClearPagePrivate(page);
229 f2fs_put_page(page, 1); 231 f2fs_put_page(page, 1);
230 232
231 list_del(&cur->list); 233 list_del(&cur->list);
@@ -239,6 +241,8 @@ void drop_inmem_pages(struct inode *inode)
239{ 241{
240 struct f2fs_inode_info *fi = F2FS_I(inode); 242 struct f2fs_inode_info *fi = F2FS_I(inode);
241 243
244 clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
245
242 mutex_lock(&fi->inmem_lock); 246 mutex_lock(&fi->inmem_lock);
243 __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); 247 __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
244 mutex_unlock(&fi->inmem_lock); 248 mutex_unlock(&fi->inmem_lock);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 975c33df65c7..7a756ff5a36d 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -158,16 +158,17 @@ struct victim_sel_policy {
158}; 158};
159 159
160struct seg_entry { 160struct seg_entry {
161 unsigned short valid_blocks; /* # of valid blocks */ 161 unsigned int type:6; /* segment type like CURSEG_XXX_TYPE */
162 unsigned int valid_blocks:10; /* # of valid blocks */
163 unsigned int ckpt_valid_blocks:10; /* # of valid blocks last cp */
164 unsigned int padding:6; /* padding */
162 unsigned char *cur_valid_map; /* validity bitmap of blocks */ 165 unsigned char *cur_valid_map; /* validity bitmap of blocks */
163 /* 166 /*
164 * # of valid blocks and the validity bitmap stored in the the last 167 * # of valid blocks and the validity bitmap stored in the the last
165 * checkpoint pack. This information is used by the SSR mode. 168 * checkpoint pack. This information is used by the SSR mode.
166 */ 169 */
167 unsigned short ckpt_valid_blocks; 170 unsigned char *ckpt_valid_map; /* validity bitmap of blocks last cp */
168 unsigned char *ckpt_valid_map;
169 unsigned char *discard_map; 171 unsigned char *discard_map;
170 unsigned char type; /* segment type like CURSEG_XXX_TYPE */
171 unsigned long long mtime; /* modification time of the segment */ 172 unsigned long long mtime; /* modification time of the segment */
172}; 173};
173 174
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 006f87d69921..74cc8520b8b1 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -39,6 +39,30 @@ static struct proc_dir_entry *f2fs_proc_root;
39static struct kmem_cache *f2fs_inode_cachep; 39static struct kmem_cache *f2fs_inode_cachep;
40static struct kset *f2fs_kset; 40static struct kset *f2fs_kset;
41 41
42#ifdef CONFIG_F2FS_FAULT_INJECTION
43struct f2fs_fault_info f2fs_fault;
44
45char *fault_name[FAULT_MAX] = {
46 [FAULT_KMALLOC] = "kmalloc",
47 [FAULT_PAGE_ALLOC] = "page alloc",
48 [FAULT_ALLOC_NID] = "alloc nid",
49 [FAULT_ORPHAN] = "orphan",
50 [FAULT_BLOCK] = "no more block",
51 [FAULT_DIR_DEPTH] = "too big dir depth",
52};
53
54static void f2fs_build_fault_attr(unsigned int rate)
55{
56 if (rate) {
57 atomic_set(&f2fs_fault.inject_ops, 0);
58 f2fs_fault.inject_rate = rate;
59 f2fs_fault.inject_type = (1 << FAULT_MAX) - 1;
60 } else {
61 memset(&f2fs_fault, 0, sizeof(struct f2fs_fault_info));
62 }
63}
64#endif
65
42/* f2fs-wide shrinker description */ 66/* f2fs-wide shrinker description */
43static struct shrinker f2fs_shrinker_info = { 67static struct shrinker f2fs_shrinker_info = {
44 .scan_objects = f2fs_shrink_scan, 68 .scan_objects = f2fs_shrink_scan,
@@ -68,6 +92,7 @@ enum {
68 Opt_noextent_cache, 92 Opt_noextent_cache,
69 Opt_noinline_data, 93 Opt_noinline_data,
70 Opt_data_flush, 94 Opt_data_flush,
95 Opt_fault_injection,
71 Opt_err, 96 Opt_err,
72}; 97};
73 98
@@ -93,6 +118,7 @@ static match_table_t f2fs_tokens = {
93 {Opt_noextent_cache, "noextent_cache"}, 118 {Opt_noextent_cache, "noextent_cache"},
94 {Opt_noinline_data, "noinline_data"}, 119 {Opt_noinline_data, "noinline_data"},
95 {Opt_data_flush, "data_flush"}, 120 {Opt_data_flush, "data_flush"},
121 {Opt_fault_injection, "fault_injection=%u"},
96 {Opt_err, NULL}, 122 {Opt_err, NULL},
97}; 123};
98 124
@@ -102,6 +128,10 @@ enum {
102 SM_INFO, /* struct f2fs_sm_info */ 128 SM_INFO, /* struct f2fs_sm_info */
103 NM_INFO, /* struct f2fs_nm_info */ 129 NM_INFO, /* struct f2fs_nm_info */
104 F2FS_SBI, /* struct f2fs_sb_info */ 130 F2FS_SBI, /* struct f2fs_sb_info */
131#ifdef CONFIG_F2FS_FAULT_INJECTION
132 FAULT_INFO_RATE, /* struct f2fs_fault_info */
133 FAULT_INFO_TYPE, /* struct f2fs_fault_info */
134#endif
105}; 135};
106 136
107struct f2fs_attr { 137struct f2fs_attr {
@@ -123,6 +153,11 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
123 return (unsigned char *)NM_I(sbi); 153 return (unsigned char *)NM_I(sbi);
124 else if (struct_type == F2FS_SBI) 154 else if (struct_type == F2FS_SBI)
125 return (unsigned char *)sbi; 155 return (unsigned char *)sbi;
156#ifdef CONFIG_F2FS_FAULT_INJECTION
157 else if (struct_type == FAULT_INFO_RATE ||
158 struct_type == FAULT_INFO_TYPE)
159 return (unsigned char *)&f2fs_fault;
160#endif
126 return NULL; 161 return NULL;
127} 162}
128 163
@@ -172,6 +207,10 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
172 ret = kstrtoul(skip_spaces(buf), 0, &t); 207 ret = kstrtoul(skip_spaces(buf), 0, &t);
173 if (ret < 0) 208 if (ret < 0)
174 return ret; 209 return ret;
210#ifdef CONFIG_F2FS_FAULT_INJECTION
211 if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX))
212 return -EINVAL;
213#endif
175 *ui = t; 214 *ui = t;
176 return count; 215 return count;
177} 216}
@@ -237,6 +276,10 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
237F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); 276F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
238F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); 277F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
239F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); 278F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
279#ifdef CONFIG_F2FS_FAULT_INJECTION
280F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
281F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
282#endif
240F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); 283F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
241 284
242#define ATTR_LIST(name) (&f2fs_attr_##name.attr) 285#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
@@ -273,6 +316,22 @@ static struct kobj_type f2fs_ktype = {
273 .release = f2fs_sb_release, 316 .release = f2fs_sb_release,
274}; 317};
275 318
319#ifdef CONFIG_F2FS_FAULT_INJECTION
320/* sysfs for f2fs fault injection */
321static struct kobject f2fs_fault_inject;
322
323static struct attribute *f2fs_fault_attrs[] = {
324 ATTR_LIST(inject_rate),
325 ATTR_LIST(inject_type),
326 NULL
327};
328
329static struct kobj_type f2fs_fault_ktype = {
330 .default_attrs = f2fs_fault_attrs,
331 .sysfs_ops = &f2fs_attr_ops,
332};
333#endif
334
276void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) 335void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
277{ 336{
278 struct va_format vaf; 337 struct va_format vaf;
@@ -300,6 +359,10 @@ static int parse_options(struct super_block *sb, char *options)
300 char *p, *name; 359 char *p, *name;
301 int arg = 0; 360 int arg = 0;
302 361
362#ifdef CONFIG_F2FS_FAULT_INJECTION
363 f2fs_build_fault_attr(0);
364#endif
365
303 if (!options) 366 if (!options)
304 return 0; 367 return 0;
305 368
@@ -433,6 +496,16 @@ static int parse_options(struct super_block *sb, char *options)
433 case Opt_data_flush: 496 case Opt_data_flush:
434 set_opt(sbi, DATA_FLUSH); 497 set_opt(sbi, DATA_FLUSH);
435 break; 498 break;
499 case Opt_fault_injection:
500 if (args->from && match_int(args, &arg))
501 return -EINVAL;
502#ifdef CONFIG_F2FS_FAULT_INJECTION
503 f2fs_build_fault_attr(arg);
504#else
505 f2fs_msg(sb, KERN_INFO,
506 "FAULT_INJECTION was not selected");
507#endif
508 break;
436 default: 509 default:
437 f2fs_msg(sb, KERN_ERR, 510 f2fs_msg(sb, KERN_ERR,
438 "Unrecognized mount option \"%s\" or missing value", 511 "Unrecognized mount option \"%s\" or missing value",
@@ -453,9 +526,13 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
453 526
454 init_once((void *) fi); 527 init_once((void *) fi);
455 528
529 if (percpu_counter_init(&fi->dirty_pages, 0, GFP_NOFS)) {
530 kmem_cache_free(f2fs_inode_cachep, fi);
531 return NULL;
532 }
533
456 /* Initialize f2fs-specific inode info */ 534 /* Initialize f2fs-specific inode info */
457 fi->vfs_inode.i_version = 1; 535 fi->vfs_inode.i_version = 1;
458 atomic_set(&fi->dirty_pages, 0);
459 fi->i_current_depth = 1; 536 fi->i_current_depth = 1;
460 fi->i_advise = 0; 537 fi->i_advise = 0;
461 init_rwsem(&fi->i_sem); 538 init_rwsem(&fi->i_sem);
@@ -530,15 +607,27 @@ static void f2fs_i_callback(struct rcu_head *head)
530 607
531static void f2fs_destroy_inode(struct inode *inode) 608static void f2fs_destroy_inode(struct inode *inode)
532{ 609{
610 percpu_counter_destroy(&F2FS_I(inode)->dirty_pages);
533 call_rcu(&inode->i_rcu, f2fs_i_callback); 611 call_rcu(&inode->i_rcu, f2fs_i_callback);
534} 612}
535 613
614static void destroy_percpu_info(struct f2fs_sb_info *sbi)
615{
616 int i;
617
618 for (i = 0; i < NR_COUNT_TYPE; i++)
619 percpu_counter_destroy(&sbi->nr_pages[i]);
620 percpu_counter_destroy(&sbi->alloc_valid_block_count);
621 percpu_counter_destroy(&sbi->total_valid_inode_count);
622}
623
536static void f2fs_put_super(struct super_block *sb) 624static void f2fs_put_super(struct super_block *sb)
537{ 625{
538 struct f2fs_sb_info *sbi = F2FS_SB(sb); 626 struct f2fs_sb_info *sbi = F2FS_SB(sb);
539 627
540 if (sbi->s_proc) { 628 if (sbi->s_proc) {
541 remove_proc_entry("segment_info", sbi->s_proc); 629 remove_proc_entry("segment_info", sbi->s_proc);
630 remove_proc_entry("segment_bits", sbi->s_proc);
542 remove_proc_entry(sb->s_id, f2fs_proc_root); 631 remove_proc_entry(sb->s_id, f2fs_proc_root);
543 } 632 }
544 kobject_del(&sbi->s_kobj); 633 kobject_del(&sbi->s_kobj);
@@ -568,15 +657,14 @@ static void f2fs_put_super(struct super_block *sb)
568 * normally superblock is clean, so we need to release this. 657 * normally superblock is clean, so we need to release this.
569 * In addition, EIO will skip do checkpoint, we need this as well. 658 * In addition, EIO will skip do checkpoint, we need this as well.
570 */ 659 */
571 release_ino_entry(sbi); 660 release_ino_entry(sbi, true);
572 release_discard_addrs(sbi); 661 release_discard_addrs(sbi);
573 662
574 f2fs_leave_shrinker(sbi); 663 f2fs_leave_shrinker(sbi);
575 mutex_unlock(&sbi->umount_mutex); 664 mutex_unlock(&sbi->umount_mutex);
576 665
577 /* our cp_error case, we can wait for any writeback page */ 666 /* our cp_error case, we can wait for any writeback page */
578 if (get_pages(sbi, F2FS_WRITEBACK)) 667 f2fs_flush_merged_bios(sbi);
579 f2fs_flush_merged_bios(sbi);
580 668
581 iput(sbi->node_inode); 669 iput(sbi->node_inode);
582 iput(sbi->meta_inode); 670 iput(sbi->meta_inode);
@@ -593,6 +681,8 @@ static void f2fs_put_super(struct super_block *sb)
593 if (sbi->s_chksum_driver) 681 if (sbi->s_chksum_driver)
594 crypto_free_shash(sbi->s_chksum_driver); 682 crypto_free_shash(sbi->s_chksum_driver);
595 kfree(sbi->raw_super); 683 kfree(sbi->raw_super);
684
685 destroy_percpu_info(sbi);
596 kfree(sbi); 686 kfree(sbi);
597} 687}
598 688
@@ -745,19 +835,47 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
745 return 0; 835 return 0;
746} 836}
747 837
748static int segment_info_open_fs(struct inode *inode, struct file *file) 838static int segment_bits_seq_show(struct seq_file *seq, void *offset)
749{ 839{
750 return single_open(file, segment_info_seq_show, PDE_DATA(inode)); 840 struct super_block *sb = seq->private;
841 struct f2fs_sb_info *sbi = F2FS_SB(sb);
842 unsigned int total_segs =
843 le32_to_cpu(sbi->raw_super->segment_count_main);
844 int i, j;
845
846 seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n"
847 "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n");
848
849 for (i = 0; i < total_segs; i++) {
850 struct seg_entry *se = get_seg_entry(sbi, i);
851
852 seq_printf(seq, "%-10d", i);
853 seq_printf(seq, "%d|%-3u|", se->type,
854 get_valid_blocks(sbi, i, 1));
855 for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++)
856 seq_printf(seq, "%x ", se->cur_valid_map[j]);
857 seq_putc(seq, '\n');
858 }
859 return 0;
751} 860}
752 861
753static const struct file_operations f2fs_seq_segment_info_fops = { 862#define F2FS_PROC_FILE_DEF(_name) \
754 .owner = THIS_MODULE, 863static int _name##_open_fs(struct inode *inode, struct file *file) \
755 .open = segment_info_open_fs, 864{ \
756 .read = seq_read, 865 return single_open(file, _name##_seq_show, PDE_DATA(inode)); \
757 .llseek = seq_lseek, 866} \
758 .release = single_release, 867 \
868static const struct file_operations f2fs_seq_##_name##_fops = { \
869 .owner = THIS_MODULE, \
870 .open = _name##_open_fs, \
871 .read = seq_read, \
872 .llseek = seq_lseek, \
873 .release = single_release, \
759}; 874};
760 875
876F2FS_PROC_FILE_DEF(segment_info);
877F2FS_PROC_FILE_DEF(segment_bits);
878
761static void default_options(struct f2fs_sb_info *sbi) 879static void default_options(struct f2fs_sb_info *sbi)
762{ 880{
763 /* init some FS parameters */ 881 /* init some FS parameters */
@@ -791,13 +909,15 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
791 org_mount_opt = sbi->mount_opt; 909 org_mount_opt = sbi->mount_opt;
792 active_logs = sbi->active_logs; 910 active_logs = sbi->active_logs;
793 911
794 if (*flags & MS_RDONLY) { 912 /* recover superblocks we couldn't write due to previous RO mount */
795 set_opt(sbi, FASTBOOT); 913 if (!(*flags & MS_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) {
796 set_sbi_flag(sbi, SBI_IS_DIRTY); 914 err = f2fs_commit_super(sbi, false);
915 f2fs_msg(sb, KERN_INFO,
916 "Try to recover all the superblocks, ret: %d", err);
917 if (!err)
918 clear_sbi_flag(sbi, SBI_NEED_SB_WRITE);
797 } 919 }
798 920
799 sync_filesystem(sb);
800
801 sbi->mount_opt.opt = 0; 921 sbi->mount_opt.opt = 0;
802 default_options(sbi); 922 default_options(sbi);
803 923
@@ -829,7 +949,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
829 if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) { 949 if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) {
830 if (sbi->gc_thread) { 950 if (sbi->gc_thread) {
831 stop_gc_thread(sbi); 951 stop_gc_thread(sbi);
832 f2fs_sync_fs(sb, 1);
833 need_restart_gc = true; 952 need_restart_gc = true;
834 } 953 }
835 } else if (!sbi->gc_thread) { 954 } else if (!sbi->gc_thread) {
@@ -839,6 +958,16 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
839 need_stop_gc = true; 958 need_stop_gc = true;
840 } 959 }
841 960
961 if (*flags & MS_RDONLY) {
962 writeback_inodes_sb(sb, WB_REASON_SYNC);
963 sync_inodes_sb(sb);
964
965 set_sbi_flag(sbi, SBI_IS_DIRTY);
966 set_sbi_flag(sbi, SBI_IS_CLOSE);
967 f2fs_sync_fs(sb, 1);
968 clear_sbi_flag(sbi, SBI_IS_CLOSE);
969 }
970
842 /* 971 /*
843 * We stop issue flush thread if FS is mounted as RO 972 * We stop issue flush thread if FS is mounted as RO
844 * or if flush_merge is not passed in mount option. 973 * or if flush_merge is not passed in mount option.
@@ -852,8 +981,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
852 } 981 }
853skip: 982skip:
854 /* Update the POSIXACL Flag */ 983 /* Update the POSIXACL Flag */
855 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 984 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
856 (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); 985 (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
986
857 return 0; 987 return 0;
858restore_gc: 988restore_gc:
859 if (need_restart_gc) { 989 if (need_restart_gc) {
@@ -893,6 +1023,12 @@ static int f2fs_get_context(struct inode *inode, void *ctx, size_t len)
893 ctx, len, NULL); 1023 ctx, len, NULL);
894} 1024}
895 1025
1026static int f2fs_key_prefix(struct inode *inode, u8 **key)
1027{
1028 *key = F2FS_I_SB(inode)->key_prefix;
1029 return F2FS_I_SB(inode)->key_prefix_size;
1030}
1031
896static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len, 1032static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len,
897 void *fs_data) 1033 void *fs_data)
898{ 1034{
@@ -909,6 +1045,7 @@ static unsigned f2fs_max_namelen(struct inode *inode)
909 1045
910static struct fscrypt_operations f2fs_cryptops = { 1046static struct fscrypt_operations f2fs_cryptops = {
911 .get_context = f2fs_get_context, 1047 .get_context = f2fs_get_context,
1048 .key_prefix = f2fs_key_prefix,
912 .set_context = f2fs_set_context, 1049 .set_context = f2fs_set_context,
913 .is_encrypted = f2fs_encrypted_inode, 1050 .is_encrypted = f2fs_encrypted_inode,
914 .empty_dir = f2fs_empty_dir, 1051 .empty_dir = f2fs_empty_dir,
@@ -998,11 +1135,12 @@ static int __f2fs_commit_super(struct buffer_head *bh,
998 return __sync_dirty_buffer(bh, WRITE_FLUSH_FUA); 1135 return __sync_dirty_buffer(bh, WRITE_FLUSH_FUA);
999} 1136}
1000 1137
1001static inline bool sanity_check_area_boundary(struct super_block *sb, 1138static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
1002 struct buffer_head *bh) 1139 struct buffer_head *bh)
1003{ 1140{
1004 struct f2fs_super_block *raw_super = (struct f2fs_super_block *) 1141 struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
1005 (bh->b_data + F2FS_SUPER_OFFSET); 1142 (bh->b_data + F2FS_SUPER_OFFSET);
1143 struct super_block *sb = sbi->sb;
1006 u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); 1144 u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
1007 u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr); 1145 u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr);
1008 u32 sit_blkaddr = le32_to_cpu(raw_super->sit_blkaddr); 1146 u32 sit_blkaddr = le32_to_cpu(raw_super->sit_blkaddr);
@@ -1081,6 +1219,7 @@ static inline bool sanity_check_area_boundary(struct super_block *sb,
1081 segment0_blkaddr) >> log_blocks_per_seg); 1219 segment0_blkaddr) >> log_blocks_per_seg);
1082 1220
1083 if (f2fs_readonly(sb) || bdev_read_only(sb->s_bdev)) { 1221 if (f2fs_readonly(sb) || bdev_read_only(sb->s_bdev)) {
1222 set_sbi_flag(sbi, SBI_NEED_SB_WRITE);
1084 res = "internally"; 1223 res = "internally";
1085 } else { 1224 } else {
1086 err = __f2fs_commit_super(bh, NULL); 1225 err = __f2fs_commit_super(bh, NULL);
@@ -1098,11 +1237,12 @@ static inline bool sanity_check_area_boundary(struct super_block *sb,
1098 return false; 1237 return false;
1099} 1238}
1100 1239
1101static int sanity_check_raw_super(struct super_block *sb, 1240static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
1102 struct buffer_head *bh) 1241 struct buffer_head *bh)
1103{ 1242{
1104 struct f2fs_super_block *raw_super = (struct f2fs_super_block *) 1243 struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
1105 (bh->b_data + F2FS_SUPER_OFFSET); 1244 (bh->b_data + F2FS_SUPER_OFFSET);
1245 struct super_block *sb = sbi->sb;
1106 unsigned int blocksize; 1246 unsigned int blocksize;
1107 1247
1108 if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) { 1248 if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
@@ -1169,7 +1309,7 @@ static int sanity_check_raw_super(struct super_block *sb,
1169 } 1309 }
1170 1310
1171 /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ 1311 /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
1172 if (sanity_check_area_boundary(sb, bh)) 1312 if (sanity_check_area_boundary(sbi, bh))
1173 return 1; 1313 return 1;
1174 1314
1175 return 0; 1315 return 0;
@@ -1201,7 +1341,6 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
1201static void init_sb_info(struct f2fs_sb_info *sbi) 1341static void init_sb_info(struct f2fs_sb_info *sbi)
1202{ 1342{
1203 struct f2fs_super_block *raw_super = sbi->raw_super; 1343 struct f2fs_super_block *raw_super = sbi->raw_super;
1204 int i;
1205 1344
1206 sbi->log_sectors_per_block = 1345 sbi->log_sectors_per_block =
1207 le32_to_cpu(raw_super->log_sectors_per_block); 1346 le32_to_cpu(raw_super->log_sectors_per_block);
@@ -1221,9 +1360,6 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
1221 sbi->cur_victim_sec = NULL_SECNO; 1360 sbi->cur_victim_sec = NULL_SECNO;
1222 sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; 1361 sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
1223 1362
1224 for (i = 0; i < NR_COUNT_TYPE; i++)
1225 atomic_set(&sbi->nr_pages[i], 0);
1226
1227 sbi->dir_level = DEF_DIR_LEVEL; 1363 sbi->dir_level = DEF_DIR_LEVEL;
1228 sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; 1364 sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
1229 sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL; 1365 sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
@@ -1231,6 +1367,30 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
1231 1367
1232 INIT_LIST_HEAD(&sbi->s_list); 1368 INIT_LIST_HEAD(&sbi->s_list);
1233 mutex_init(&sbi->umount_mutex); 1369 mutex_init(&sbi->umount_mutex);
1370
1371#ifdef CONFIG_F2FS_FS_ENCRYPTION
1372 memcpy(sbi->key_prefix, F2FS_KEY_DESC_PREFIX,
1373 F2FS_KEY_DESC_PREFIX_SIZE);
1374 sbi->key_prefix_size = F2FS_KEY_DESC_PREFIX_SIZE;
1375#endif
1376}
1377
1378static int init_percpu_info(struct f2fs_sb_info *sbi)
1379{
1380 int i, err;
1381
1382 for (i = 0; i < NR_COUNT_TYPE; i++) {
1383 err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL);
1384 if (err)
1385 return err;
1386 }
1387
1388 err = percpu_counter_init(&sbi->alloc_valid_block_count, 0, GFP_KERNEL);
1389 if (err)
1390 return err;
1391
1392 return percpu_counter_init(&sbi->total_valid_inode_count, 0,
1393 GFP_KERNEL);
1234} 1394}
1235 1395
1236/* 1396/*
@@ -1239,10 +1399,11 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
1239 * to get the first valid one. If any one of them is broken, we pass 1399 * to get the first valid one. If any one of them is broken, we pass
1240 * them recovery flag back to the caller. 1400 * them recovery flag back to the caller.
1241 */ 1401 */
1242static int read_raw_super_block(struct super_block *sb, 1402static int read_raw_super_block(struct f2fs_sb_info *sbi,
1243 struct f2fs_super_block **raw_super, 1403 struct f2fs_super_block **raw_super,
1244 int *valid_super_block, int *recovery) 1404 int *valid_super_block, int *recovery)
1245{ 1405{
1406 struct super_block *sb = sbi->sb;
1246 int block; 1407 int block;
1247 struct buffer_head *bh; 1408 struct buffer_head *bh;
1248 struct f2fs_super_block *super; 1409 struct f2fs_super_block *super;
@@ -1262,7 +1423,7 @@ static int read_raw_super_block(struct super_block *sb,
1262 } 1423 }
1263 1424
1264 /* sanity checking of raw super */ 1425 /* sanity checking of raw super */
1265 if (sanity_check_raw_super(sb, bh)) { 1426 if (sanity_check_raw_super(sbi, bh)) {
1266 f2fs_msg(sb, KERN_ERR, 1427 f2fs_msg(sb, KERN_ERR,
1267 "Can't find valid F2FS filesystem in %dth superblock", 1428 "Can't find valid F2FS filesystem in %dth superblock",
1268 block + 1); 1429 block + 1);
@@ -1298,6 +1459,12 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
1298 struct buffer_head *bh; 1459 struct buffer_head *bh;
1299 int err; 1460 int err;
1300 1461
1462 if ((recover && f2fs_readonly(sbi->sb)) ||
1463 bdev_read_only(sbi->sb->s_bdev)) {
1464 set_sbi_flag(sbi, SBI_NEED_SB_WRITE);
1465 return -EROFS;
1466 }
1467
1301 /* write back-up superblock first */ 1468 /* write back-up superblock first */
1302 bh = sb_getblk(sbi->sb, sbi->valid_super_block ? 0: 1); 1469 bh = sb_getblk(sbi->sb, sbi->valid_super_block ? 0: 1);
1303 if (!bh) 1470 if (!bh)
@@ -1323,7 +1490,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
1323 struct f2fs_sb_info *sbi; 1490 struct f2fs_sb_info *sbi;
1324 struct f2fs_super_block *raw_super; 1491 struct f2fs_super_block *raw_super;
1325 struct inode *root; 1492 struct inode *root;
1326 long err; 1493 int err;
1327 bool retry = true, need_fsck = false; 1494 bool retry = true, need_fsck = false;
1328 char *options = NULL; 1495 char *options = NULL;
1329 int recovery, i, valid_super_block; 1496 int recovery, i, valid_super_block;
@@ -1340,6 +1507,8 @@ try_onemore:
1340 if (!sbi) 1507 if (!sbi)
1341 return -ENOMEM; 1508 return -ENOMEM;
1342 1509
1510 sbi->sb = sb;
1511
1343 /* Load the checksum driver */ 1512 /* Load the checksum driver */
1344 sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0); 1513 sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0);
1345 if (IS_ERR(sbi->s_chksum_driver)) { 1514 if (IS_ERR(sbi->s_chksum_driver)) {
@@ -1355,7 +1524,7 @@ try_onemore:
1355 goto free_sbi; 1524 goto free_sbi;
1356 } 1525 }
1357 1526
1358 err = read_raw_super_block(sb, &raw_super, &valid_super_block, 1527 err = read_raw_super_block(sbi, &raw_super, &valid_super_block,
1359 &recovery); 1528 &recovery);
1360 if (err) 1529 if (err)
1361 goto free_sbi; 1530 goto free_sbi;
@@ -1390,7 +1559,6 @@ try_onemore:
1390 memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); 1559 memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
1391 1560
1392 /* init f2fs-specific super block info */ 1561 /* init f2fs-specific super block info */
1393 sbi->sb = sb;
1394 sbi->raw_super = raw_super; 1562 sbi->raw_super = raw_super;
1395 sbi->valid_super_block = valid_super_block; 1563 sbi->valid_super_block = valid_super_block;
1396 mutex_init(&sbi->gc_mutex); 1564 mutex_init(&sbi->gc_mutex);
@@ -1415,6 +1583,10 @@ try_onemore:
1415 init_waitqueue_head(&sbi->cp_wait); 1583 init_waitqueue_head(&sbi->cp_wait);
1416 init_sb_info(sbi); 1584 init_sb_info(sbi);
1417 1585
1586 err = init_percpu_info(sbi);
1587 if (err)
1588 goto free_options;
1589
1418 /* get an inode for meta space */ 1590 /* get an inode for meta space */
1419 sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); 1591 sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
1420 if (IS_ERR(sbi->meta_inode)) { 1592 if (IS_ERR(sbi->meta_inode)) {
@@ -1431,13 +1603,13 @@ try_onemore:
1431 1603
1432 sbi->total_valid_node_count = 1604 sbi->total_valid_node_count =
1433 le32_to_cpu(sbi->ckpt->valid_node_count); 1605 le32_to_cpu(sbi->ckpt->valid_node_count);
1434 sbi->total_valid_inode_count = 1606 percpu_counter_set(&sbi->total_valid_inode_count,
1435 le32_to_cpu(sbi->ckpt->valid_inode_count); 1607 le32_to_cpu(sbi->ckpt->valid_inode_count));
1436 sbi->user_block_count = le64_to_cpu(sbi->ckpt->user_block_count); 1608 sbi->user_block_count = le64_to_cpu(sbi->ckpt->user_block_count);
1437 sbi->total_valid_block_count = 1609 sbi->total_valid_block_count =
1438 le64_to_cpu(sbi->ckpt->valid_block_count); 1610 le64_to_cpu(sbi->ckpt->valid_block_count);
1439 sbi->last_valid_block_count = sbi->total_valid_block_count; 1611 sbi->last_valid_block_count = sbi->total_valid_block_count;
1440 sbi->alloc_valid_block_count = 0; 1612
1441 for (i = 0; i < NR_INODE_TYPE; i++) { 1613 for (i = 0; i < NR_INODE_TYPE; i++) {
1442 INIT_LIST_HEAD(&sbi->inode_list[i]); 1614 INIT_LIST_HEAD(&sbi->inode_list[i]);
1443 spin_lock_init(&sbi->inode_lock[i]); 1615 spin_lock_init(&sbi->inode_lock[i]);
@@ -1515,9 +1687,12 @@ try_onemore:
1515 if (f2fs_proc_root) 1687 if (f2fs_proc_root)
1516 sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); 1688 sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
1517 1689
1518 if (sbi->s_proc) 1690 if (sbi->s_proc) {
1519 proc_create_data("segment_info", S_IRUGO, sbi->s_proc, 1691 proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
1520 &f2fs_seq_segment_info_fops, sb); 1692 &f2fs_seq_segment_info_fops, sb);
1693 proc_create_data("segment_bits", S_IRUGO, sbi->s_proc,
1694 &f2fs_seq_segment_bits_fops, sb);
1695 }
1521 1696
1522 sbi->s_kobj.kset = f2fs_kset; 1697 sbi->s_kobj.kset = f2fs_kset;
1523 init_completion(&sbi->s_kobj_unregister); 1698 init_completion(&sbi->s_kobj_unregister);
@@ -1541,14 +1716,24 @@ try_onemore:
1541 if (need_fsck) 1716 if (need_fsck)
1542 set_sbi_flag(sbi, SBI_NEED_FSCK); 1717 set_sbi_flag(sbi, SBI_NEED_FSCK);
1543 1718
1544 err = recover_fsync_data(sbi); 1719 err = recover_fsync_data(sbi, false);
1545 if (err) { 1720 if (err < 0) {
1546 need_fsck = true; 1721 need_fsck = true;
1547 f2fs_msg(sb, KERN_ERR, 1722 f2fs_msg(sb, KERN_ERR,
1548 "Cannot recover all fsync data errno=%ld", err); 1723 "Cannot recover all fsync data errno=%d", err);
1724 goto free_kobj;
1725 }
1726 } else {
1727 err = recover_fsync_data(sbi, true);
1728
1729 if (!f2fs_readonly(sb) && err > 0) {
1730 err = -EINVAL;
1731 f2fs_msg(sb, KERN_ERR,
1732 "Need to recover fsync data");
1549 goto free_kobj; 1733 goto free_kobj;
1550 } 1734 }
1551 } 1735 }
1736
1552 /* recover_fsync_data() cleared this already */ 1737 /* recover_fsync_data() cleared this already */
1553 clear_sbi_flag(sbi, SBI_POR_DOING); 1738 clear_sbi_flag(sbi, SBI_POR_DOING);
1554 1739
@@ -1565,10 +1750,10 @@ try_onemore:
1565 kfree(options); 1750 kfree(options);
1566 1751
1567 /* recover broken superblock */ 1752 /* recover broken superblock */
1568 if (recovery && !f2fs_readonly(sb) && !bdev_read_only(sb->s_bdev)) { 1753 if (recovery) {
1569 err = f2fs_commit_super(sbi, true); 1754 err = f2fs_commit_super(sbi, true);
1570 f2fs_msg(sb, KERN_INFO, 1755 f2fs_msg(sb, KERN_INFO,
1571 "Try to recover %dth superblock, ret: %ld", 1756 "Try to recover %dth superblock, ret: %d",
1572 sbi->valid_super_block ? 1 : 2, err); 1757 sbi->valid_super_block ? 1 : 2, err);
1573 } 1758 }
1574 1759
@@ -1583,6 +1768,7 @@ free_kobj:
1583free_proc: 1768free_proc:
1584 if (sbi->s_proc) { 1769 if (sbi->s_proc) {
1585 remove_proc_entry("segment_info", sbi->s_proc); 1770 remove_proc_entry("segment_info", sbi->s_proc);
1771 remove_proc_entry("segment_bits", sbi->s_proc);
1586 remove_proc_entry(sb->s_id, f2fs_proc_root); 1772 remove_proc_entry(sb->s_id, f2fs_proc_root);
1587 } 1773 }
1588 f2fs_destroy_stats(sbi); 1774 f2fs_destroy_stats(sbi);
@@ -1603,6 +1789,7 @@ free_meta_inode:
1603 make_bad_inode(sbi->meta_inode); 1789 make_bad_inode(sbi->meta_inode);
1604 iput(sbi->meta_inode); 1790 iput(sbi->meta_inode);
1605free_options: 1791free_options:
1792 destroy_percpu_info(sbi);
1606 kfree(options); 1793 kfree(options);
1607free_sb_buf: 1794free_sb_buf:
1608 kfree(raw_super); 1795 kfree(raw_super);
@@ -1688,6 +1875,16 @@ static int __init init_f2fs_fs(void)
1688 err = -ENOMEM; 1875 err = -ENOMEM;
1689 goto free_extent_cache; 1876 goto free_extent_cache;
1690 } 1877 }
1878#ifdef CONFIG_F2FS_FAULT_INJECTION
1879 f2fs_fault_inject.kset = f2fs_kset;
1880 f2fs_build_fault_attr(0);
1881 err = kobject_init_and_add(&f2fs_fault_inject, &f2fs_fault_ktype,
1882 NULL, "fault_injection");
1883 if (err) {
1884 f2fs_fault_inject.kset = NULL;
1885 goto free_kset;
1886 }
1887#endif
1691 err = register_shrinker(&f2fs_shrinker_info); 1888 err = register_shrinker(&f2fs_shrinker_info);
1692 if (err) 1889 if (err)
1693 goto free_kset; 1890 goto free_kset;
@@ -1706,6 +1903,10 @@ free_filesystem:
1706free_shrinker: 1903free_shrinker:
1707 unregister_shrinker(&f2fs_shrinker_info); 1904 unregister_shrinker(&f2fs_shrinker_info);
1708free_kset: 1905free_kset:
1906#ifdef CONFIG_F2FS_FAULT_INJECTION
1907 if (f2fs_fault_inject.kset)
1908 kobject_put(&f2fs_fault_inject);
1909#endif
1709 kset_unregister(f2fs_kset); 1910 kset_unregister(f2fs_kset);
1710free_extent_cache: 1911free_extent_cache:
1711 destroy_extent_cache(); 1912 destroy_extent_cache();
@@ -1725,14 +1926,17 @@ static void __exit exit_f2fs_fs(void)
1725{ 1926{
1726 remove_proc_entry("fs/f2fs", NULL); 1927 remove_proc_entry("fs/f2fs", NULL);
1727 f2fs_destroy_root_stats(); 1928 f2fs_destroy_root_stats();
1728 unregister_shrinker(&f2fs_shrinker_info);
1729 unregister_filesystem(&f2fs_fs_type); 1929 unregister_filesystem(&f2fs_fs_type);
1930 unregister_shrinker(&f2fs_shrinker_info);
1931#ifdef CONFIG_F2FS_FAULT_INJECTION
1932 kobject_put(&f2fs_fault_inject);
1933#endif
1934 kset_unregister(f2fs_kset);
1730 destroy_extent_cache(); 1935 destroy_extent_cache();
1731 destroy_checkpoint_caches(); 1936 destroy_checkpoint_caches();
1732 destroy_segment_manager_caches(); 1937 destroy_segment_manager_caches();
1733 destroy_node_manager_caches(); 1938 destroy_node_manager_caches();
1734 destroy_inodecache(); 1939 destroy_inodecache();
1735 kset_unregister(f2fs_kset);
1736 f2fs_destroy_trace_ios(); 1940 f2fs_destroy_trace_ios();
1737} 1941}
1738 1942
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 17fd2b1a6848..00ea56797258 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -498,7 +498,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
498 free = free + ENTRY_SIZE(here); 498 free = free + ENTRY_SIZE(here);
499 499
500 if (unlikely(free < newsize)) { 500 if (unlikely(free < newsize)) {
501 error = -ENOSPC; 501 error = -E2BIG;
502 goto exit; 502 goto exit;
503 } 503 }
504 } 504 }
@@ -526,7 +526,6 @@ static int __f2fs_setxattr(struct inode *inode, int index,
526 * Before we come here, old entry is removed. 526 * Before we come here, old entry is removed.
527 * We just write new entry. 527 * We just write new entry.
528 */ 528 */
529 memset(last, 0, newsize);
530 last->e_name_index = index; 529 last->e_name_index = index;
531 last->e_name_len = len; 530 last->e_name_len = len;
532 memcpy(last->e_name, name, len); 531 memcpy(last->e_name, name, len);