aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs31
-rw-r--r--Documentation/filesystems/f2fs.txt24
-rw-r--r--MAINTAINERS1
-rw-r--r--fs/f2fs/Makefile2
-rw-r--r--fs/f2fs/checkpoint.c195
-rw-r--r--fs/f2fs/data.c621
-rw-r--r--fs/f2fs/debug.c53
-rw-r--r--fs/f2fs/dir.c47
-rw-r--r--fs/f2fs/f2fs.h195
-rw-r--r--fs/f2fs/file.c84
-rw-r--r--fs/f2fs/gc.c22
-rw-r--r--fs/f2fs/gc.h2
-rw-r--r--fs/f2fs/inline.c222
-rw-r--r--fs/f2fs/inode.c23
-rw-r--r--fs/f2fs/namei.c5
-rw-r--r--fs/f2fs/node.c272
-rw-r--r--fs/f2fs/node.h8
-rw-r--r--fs/f2fs/recovery.c49
-rw-r--r--fs/f2fs/segment.c584
-rw-r--r--fs/f2fs/segment.h81
-rw-r--r--fs/f2fs/super.c72
-rw-r--r--fs/f2fs/xattr.c2
-rw-r--r--include/linux/f2fs_fs.h7
-rw-r--r--include/trace/events/f2fs.h107
24 files changed, 1884 insertions, 825 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 31942efcaf0e..32b0809203dd 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -24,3 +24,34 @@ Date: July 2013
24Contact: "Namjae Jeon" <namjae.jeon@samsung.com> 24Contact: "Namjae Jeon" <namjae.jeon@samsung.com>
25Description: 25Description:
26 Controls the victim selection policy for garbage collection. 26 Controls the victim selection policy for garbage collection.
27
28What: /sys/fs/f2fs/<disk>/reclaim_segments
29Date: October 2013
30Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
31Description:
32 Controls the issue rate of segment discard commands.
33
34What: /sys/fs/f2fs/<disk>/ipu_policy
35Date: November 2013
36Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
37Description:
38 Controls the in-place-update policy.
39
40What: /sys/fs/f2fs/<disk>/min_ipu_util
41Date: November 2013
42Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
43Description:
44 Controls the FS utilization condition for the in-place-update
45 policies.
46
47What: /sys/fs/f2fs/<disk>/max_small_discards
48Date: November 2013
49Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
50Description:
51 Controls the issue rate of small discard commands.
52
53What: /sys/fs/f2fs/<disk>/max_victim_search
54Date: January 2014
55Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
56Description:
57 Controls the number of trials to find a victim segment.
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index a3fe811bbdbc..b8d284975f0f 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -120,6 +120,8 @@ active_logs=%u Support configuring the number of active logs. In the
120disable_ext_identify Disable the extension list configured by mkfs, so f2fs 120disable_ext_identify Disable the extension list configured by mkfs, so f2fs
121 does not aware of cold files such as media files. 121 does not aware of cold files such as media files.
122inline_xattr Enable the inline xattrs feature. 122inline_xattr Enable the inline xattrs feature.
123inline_data Enable the inline data feature: New created small(<~3.4k)
124 files can be written into inode block.
123 125
124================================================================================ 126================================================================================
125DEBUGFS ENTRIES 127DEBUGFS ENTRIES
@@ -171,6 +173,28 @@ Files in /sys/fs/f2fs/<devname>
171 conduct checkpoint to reclaim the prefree segments 173 conduct checkpoint to reclaim the prefree segments
172 to free segments. By default, 100 segments, 200MB. 174 to free segments. By default, 100 segments, 200MB.
173 175
176 max_small_discards This parameter controls the number of discard
177 commands that consist small blocks less than 2MB.
178 The candidates to be discarded are cached until
179 checkpoint is triggered, and issued during the
180 checkpoint. By default, it is disabled with 0.
181
182 ipu_policy This parameter controls the policy of in-place
183 updates in f2fs. There are five policies:
184 0: F2FS_IPU_FORCE, 1: F2FS_IPU_SSR,
185 2: F2FS_IPU_UTIL, 3: F2FS_IPU_SSR_UTIL,
186 4: F2FS_IPU_DISABLE.
187
188 min_ipu_util This parameter controls the threshold to trigger
189 in-place-updates. The number indicates percentage
190 of the filesystem utilization, and used by
191 F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies.
192
193 max_victim_search This parameter controls the number of trials to
194 find a victim segment when conducting SSR and
195 cleaning operations. The default value is 4096
196 which covers 8GB block address range.
197
174================================================================================ 198================================================================================
175USAGE 199USAGE
176================================================================================ 200================================================================================
diff --git a/MAINTAINERS b/MAINTAINERS
index 0207c30906ad..671047620dbb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3634,6 +3634,7 @@ W: http://en.wikipedia.org/wiki/F2FS
3634T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git 3634T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
3635S: Maintained 3635S: Maintained
3636F: Documentation/filesystems/f2fs.txt 3636F: Documentation/filesystems/f2fs.txt
3637F: Documentation/ABI/testing/sysfs-fs-f2fs
3637F: fs/f2fs/ 3638F: fs/f2fs/
3638F: include/linux/f2fs_fs.h 3639F: include/linux/f2fs_fs.h
3639 3640
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
index 27a0820340b9..2e35da12d292 100644
--- a/fs/f2fs/Makefile
+++ b/fs/f2fs/Makefile
@@ -1,6 +1,6 @@
1obj-$(CONFIG_F2FS_FS) += f2fs.o 1obj-$(CONFIG_F2FS_FS) += f2fs.o
2 2
3f2fs-y := dir.o file.o inode.o namei.o hash.o super.o 3f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o
4f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o 4f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o
5f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o 5f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
6f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o 6f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 5716e5eb4e8e..293d0486a40f 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -30,7 +30,7 @@ static struct kmem_cache *inode_entry_slab;
30 */ 30 */
31struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) 31struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
32{ 32{
33 struct address_space *mapping = sbi->meta_inode->i_mapping; 33 struct address_space *mapping = META_MAPPING(sbi);
34 struct page *page = NULL; 34 struct page *page = NULL;
35repeat: 35repeat:
36 page = grab_cache_page(mapping, index); 36 page = grab_cache_page(mapping, index);
@@ -50,7 +50,7 @@ repeat:
50 */ 50 */
51struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) 51struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
52{ 52{
53 struct address_space *mapping = sbi->meta_inode->i_mapping; 53 struct address_space *mapping = META_MAPPING(sbi);
54 struct page *page; 54 struct page *page;
55repeat: 55repeat:
56 page = grab_cache_page(mapping, index); 56 page = grab_cache_page(mapping, index);
@@ -61,11 +61,12 @@ repeat:
61 if (PageUptodate(page)) 61 if (PageUptodate(page))
62 goto out; 62 goto out;
63 63
64 if (f2fs_readpage(sbi, page, index, READ_SYNC)) 64 if (f2fs_submit_page_bio(sbi, page, index,
65 READ_SYNC | REQ_META | REQ_PRIO))
65 goto repeat; 66 goto repeat;
66 67
67 lock_page(page); 68 lock_page(page);
68 if (page->mapping != mapping) { 69 if (unlikely(page->mapping != mapping)) {
69 f2fs_put_page(page, 1); 70 f2fs_put_page(page, 1);
70 goto repeat; 71 goto repeat;
71 } 72 }
@@ -81,13 +82,12 @@ static int f2fs_write_meta_page(struct page *page,
81 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 82 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
82 83
83 /* Should not write any meta pages, if any IO error was occurred */ 84 /* Should not write any meta pages, if any IO error was occurred */
84 if (wbc->for_reclaim || sbi->por_doing || 85 if (unlikely(sbi->por_doing ||
85 is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)) { 86 is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
86 dec_page_count(sbi, F2FS_DIRTY_META); 87 goto redirty_out;
87 wbc->pages_skipped++; 88
88 set_page_dirty(page); 89 if (wbc->for_reclaim)
89 return AOP_WRITEPAGE_ACTIVATE; 90 goto redirty_out;
90 }
91 91
92 wait_on_page_writeback(page); 92 wait_on_page_writeback(page);
93 93
@@ -95,24 +95,31 @@ static int f2fs_write_meta_page(struct page *page,
95 dec_page_count(sbi, F2FS_DIRTY_META); 95 dec_page_count(sbi, F2FS_DIRTY_META);
96 unlock_page(page); 96 unlock_page(page);
97 return 0; 97 return 0;
98
99redirty_out:
100 dec_page_count(sbi, F2FS_DIRTY_META);
101 wbc->pages_skipped++;
102 set_page_dirty(page);
103 return AOP_WRITEPAGE_ACTIVATE;
98} 104}
99 105
100static int f2fs_write_meta_pages(struct address_space *mapping, 106static int f2fs_write_meta_pages(struct address_space *mapping,
101 struct writeback_control *wbc) 107 struct writeback_control *wbc)
102{ 108{
103 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); 109 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
104 struct block_device *bdev = sbi->sb->s_bdev; 110 int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
105 long written; 111 long written;
106 112
107 if (wbc->for_kupdate) 113 if (wbc->for_kupdate)
108 return 0; 114 return 0;
109 115
110 if (get_pages(sbi, F2FS_DIRTY_META) == 0) 116 /* collect a number of dirty meta pages and write together */
117 if (get_pages(sbi, F2FS_DIRTY_META) < nrpages)
111 return 0; 118 return 0;
112 119
113 /* if mounting is failed, skip writing node pages */ 120 /* if mounting is failed, skip writing node pages */
114 mutex_lock(&sbi->cp_mutex); 121 mutex_lock(&sbi->cp_mutex);
115 written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev)); 122 written = sync_meta_pages(sbi, META, nrpages);
116 mutex_unlock(&sbi->cp_mutex); 123 mutex_unlock(&sbi->cp_mutex);
117 wbc->nr_to_write -= written; 124 wbc->nr_to_write -= written;
118 return 0; 125 return 0;
@@ -121,7 +128,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
121long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, 128long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
122 long nr_to_write) 129 long nr_to_write)
123{ 130{
124 struct address_space *mapping = sbi->meta_inode->i_mapping; 131 struct address_space *mapping = META_MAPPING(sbi);
125 pgoff_t index = 0, end = LONG_MAX; 132 pgoff_t index = 0, end = LONG_MAX;
126 struct pagevec pvec; 133 struct pagevec pvec;
127 long nwritten = 0; 134 long nwritten = 0;
@@ -136,7 +143,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
136 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 143 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
137 PAGECACHE_TAG_DIRTY, 144 PAGECACHE_TAG_DIRTY,
138 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 145 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
139 if (nr_pages == 0) 146 if (unlikely(nr_pages == 0))
140 break; 147 break;
141 148
142 for (i = 0; i < nr_pages; i++) { 149 for (i = 0; i < nr_pages; i++) {
@@ -149,7 +156,8 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
149 unlock_page(page); 156 unlock_page(page);
150 break; 157 break;
151 } 158 }
152 if (nwritten++ >= nr_to_write) 159 nwritten++;
160 if (unlikely(nwritten >= nr_to_write))
153 break; 161 break;
154 } 162 }
155 pagevec_release(&pvec); 163 pagevec_release(&pvec);
@@ -157,7 +165,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
157 } 165 }
158 166
159 if (nwritten) 167 if (nwritten)
160 f2fs_submit_bio(sbi, type, nr_to_write == LONG_MAX); 168 f2fs_submit_merged_bio(sbi, type, WRITE);
161 169
162 return nwritten; 170 return nwritten;
163} 171}
@@ -186,31 +194,24 @@ const struct address_space_operations f2fs_meta_aops = {
186 194
187int acquire_orphan_inode(struct f2fs_sb_info *sbi) 195int acquire_orphan_inode(struct f2fs_sb_info *sbi)
188{ 196{
189 unsigned int max_orphans;
190 int err = 0; 197 int err = 0;
191 198
192 /* 199 spin_lock(&sbi->orphan_inode_lock);
193 * considering 512 blocks in a segment 5 blocks are needed for cp 200 if (unlikely(sbi->n_orphans >= sbi->max_orphans))
194 * and log segment summaries. Remaining blocks are used to keep
195 * orphan entries with the limitation one reserved segment
196 * for cp pack we can have max 1020*507 orphan entries
197 */
198 max_orphans = (sbi->blocks_per_seg - 5) * F2FS_ORPHANS_PER_BLOCK;
199 mutex_lock(&sbi->orphan_inode_mutex);
200 if (sbi->n_orphans >= max_orphans)
201 err = -ENOSPC; 201 err = -ENOSPC;
202 else 202 else
203 sbi->n_orphans++; 203 sbi->n_orphans++;
204 mutex_unlock(&sbi->orphan_inode_mutex); 204 spin_unlock(&sbi->orphan_inode_lock);
205
205 return err; 206 return err;
206} 207}
207 208
208void release_orphan_inode(struct f2fs_sb_info *sbi) 209void release_orphan_inode(struct f2fs_sb_info *sbi)
209{ 210{
210 mutex_lock(&sbi->orphan_inode_mutex); 211 spin_lock(&sbi->orphan_inode_lock);
211 f2fs_bug_on(sbi->n_orphans == 0); 212 f2fs_bug_on(sbi->n_orphans == 0);
212 sbi->n_orphans--; 213 sbi->n_orphans--;
213 mutex_unlock(&sbi->orphan_inode_mutex); 214 spin_unlock(&sbi->orphan_inode_lock);
214} 215}
215 216
216void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 217void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -218,27 +219,30 @@ void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
218 struct list_head *head, *this; 219 struct list_head *head, *this;
219 struct orphan_inode_entry *new = NULL, *orphan = NULL; 220 struct orphan_inode_entry *new = NULL, *orphan = NULL;
220 221
221 mutex_lock(&sbi->orphan_inode_mutex); 222 new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
223 new->ino = ino;
224
225 spin_lock(&sbi->orphan_inode_lock);
222 head = &sbi->orphan_inode_list; 226 head = &sbi->orphan_inode_list;
223 list_for_each(this, head) { 227 list_for_each(this, head) {
224 orphan = list_entry(this, struct orphan_inode_entry, list); 228 orphan = list_entry(this, struct orphan_inode_entry, list);
225 if (orphan->ino == ino) 229 if (orphan->ino == ino) {
226 goto out; 230 spin_unlock(&sbi->orphan_inode_lock);
231 kmem_cache_free(orphan_entry_slab, new);
232 return;
233 }
234
227 if (orphan->ino > ino) 235 if (orphan->ino > ino)
228 break; 236 break;
229 orphan = NULL; 237 orphan = NULL;
230 } 238 }
231 239
232 new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
233 new->ino = ino;
234
235 /* add new_oentry into list which is sorted by inode number */ 240 /* add new_oentry into list which is sorted by inode number */
236 if (orphan) 241 if (orphan)
237 list_add(&new->list, this->prev); 242 list_add(&new->list, this->prev);
238 else 243 else
239 list_add_tail(&new->list, head); 244 list_add_tail(&new->list, head);
240out: 245 spin_unlock(&sbi->orphan_inode_lock);
241 mutex_unlock(&sbi->orphan_inode_mutex);
242} 246}
243 247
244void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 248void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -246,7 +250,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
246 struct list_head *head; 250 struct list_head *head;
247 struct orphan_inode_entry *orphan; 251 struct orphan_inode_entry *orphan;
248 252
249 mutex_lock(&sbi->orphan_inode_mutex); 253 spin_lock(&sbi->orphan_inode_lock);
250 head = &sbi->orphan_inode_list; 254 head = &sbi->orphan_inode_list;
251 list_for_each_entry(orphan, head, list) { 255 list_for_each_entry(orphan, head, list) {
252 if (orphan->ino == ino) { 256 if (orphan->ino == ino) {
@@ -257,7 +261,7 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
257 break; 261 break;
258 } 262 }
259 } 263 }
260 mutex_unlock(&sbi->orphan_inode_mutex); 264 spin_unlock(&sbi->orphan_inode_lock);
261} 265}
262 266
263static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 267static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -270,12 +274,12 @@ static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
270 iput(inode); 274 iput(inode);
271} 275}
272 276
273int recover_orphan_inodes(struct f2fs_sb_info *sbi) 277void recover_orphan_inodes(struct f2fs_sb_info *sbi)
274{ 278{
275 block_t start_blk, orphan_blkaddr, i, j; 279 block_t start_blk, orphan_blkaddr, i, j;
276 280
277 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) 281 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
278 return 0; 282 return;
279 283
280 sbi->por_doing = true; 284 sbi->por_doing = true;
281 start_blk = __start_cp_addr(sbi) + 1; 285 start_blk = __start_cp_addr(sbi) + 1;
@@ -295,29 +299,39 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
295 /* clear Orphan Flag */ 299 /* clear Orphan Flag */
296 clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); 300 clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
297 sbi->por_doing = false; 301 sbi->por_doing = false;
298 return 0; 302 return;
299} 303}
300 304
301static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) 305static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
302{ 306{
303 struct list_head *head, *this, *next; 307 struct list_head *head;
304 struct f2fs_orphan_block *orphan_blk = NULL; 308 struct f2fs_orphan_block *orphan_blk = NULL;
305 struct page *page = NULL;
306 unsigned int nentries = 0; 309 unsigned int nentries = 0;
307 unsigned short index = 1; 310 unsigned short index;
308 unsigned short orphan_blocks; 311 unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
309
310 orphan_blocks = (unsigned short)((sbi->n_orphans +
311 (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); 312 (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
313 struct page *page = NULL;
314 struct orphan_inode_entry *orphan = NULL;
315
316 for (index = 0; index < orphan_blocks; index++)
317 grab_meta_page(sbi, start_blk + index);
312 318
313 mutex_lock(&sbi->orphan_inode_mutex); 319 index = 1;
320 spin_lock(&sbi->orphan_inode_lock);
314 head = &sbi->orphan_inode_list; 321 head = &sbi->orphan_inode_list;
315 322
316 /* loop for each orphan inode entry and write them in Jornal block */ 323 /* loop for each orphan inode entry and write them in Jornal block */
317 list_for_each_safe(this, next, head) { 324 list_for_each_entry(orphan, head, list) {
318 struct orphan_inode_entry *orphan; 325 if (!page) {
326 page = find_get_page(META_MAPPING(sbi), start_blk++);
327 f2fs_bug_on(!page);
328 orphan_blk =
329 (struct f2fs_orphan_block *)page_address(page);
330 memset(orphan_blk, 0, sizeof(*orphan_blk));
331 f2fs_put_page(page, 0);
332 }
319 333
320 orphan = list_entry(this, struct orphan_inode_entry, list); 334 orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
321 335
322 if (nentries == F2FS_ORPHANS_PER_BLOCK) { 336 if (nentries == F2FS_ORPHANS_PER_BLOCK) {
323 /* 337 /*
@@ -331,29 +345,20 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
331 set_page_dirty(page); 345 set_page_dirty(page);
332 f2fs_put_page(page, 1); 346 f2fs_put_page(page, 1);
333 index++; 347 index++;
334 start_blk++;
335 nentries = 0; 348 nentries = 0;
336 page = NULL; 349 page = NULL;
337 } 350 }
338 if (page) 351 }
339 goto page_exist;
340 352
341 page = grab_meta_page(sbi, start_blk); 353 if (page) {
342 orphan_blk = (struct f2fs_orphan_block *)page_address(page); 354 orphan_blk->blk_addr = cpu_to_le16(index);
343 memset(orphan_blk, 0, sizeof(*orphan_blk)); 355 orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
344page_exist: 356 orphan_blk->entry_count = cpu_to_le32(nentries);
345 orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino); 357 set_page_dirty(page);
358 f2fs_put_page(page, 1);
346 } 359 }
347 if (!page)
348 goto end;
349 360
350 orphan_blk->blk_addr = cpu_to_le16(index); 361 spin_unlock(&sbi->orphan_inode_lock);
351 orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
352 orphan_blk->entry_count = cpu_to_le32(nentries);
353 set_page_dirty(page);
354 f2fs_put_page(page, 1);
355end:
356 mutex_unlock(&sbi->orphan_inode_mutex);
357} 362}
358 363
359static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, 364static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
@@ -428,7 +433,8 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
428 cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version); 433 cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
429 434
430 /* The second checkpoint pack should start at the next segment */ 435 /* The second checkpoint pack should start at the next segment */
431 cp_start_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); 436 cp_start_blk_no += ((unsigned long long)1) <<
437 le32_to_cpu(fsb->log_blocks_per_seg);
432 cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version); 438 cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);
433 439
434 if (cp1 && cp2) { 440 if (cp1 && cp2) {
@@ -465,7 +471,7 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
465 list_for_each(this, head) { 471 list_for_each(this, head) {
466 struct dir_inode_entry *entry; 472 struct dir_inode_entry *entry;
467 entry = list_entry(this, struct dir_inode_entry, list); 473 entry = list_entry(this, struct dir_inode_entry, list);
468 if (entry->inode == inode) 474 if (unlikely(entry->inode == inode))
469 return -EEXIST; 475 return -EEXIST;
470 } 476 }
471 list_add_tail(&new->list, head); 477 list_add_tail(&new->list, head);
@@ -513,8 +519,8 @@ void add_dirty_dir_inode(struct inode *inode)
513void remove_dirty_dir_inode(struct inode *inode) 519void remove_dirty_dir_inode(struct inode *inode)
514{ 520{
515 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 521 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
516 struct list_head *head = &sbi->dir_inode_list; 522
517 struct list_head *this; 523 struct list_head *this, *head;
518 524
519 if (!S_ISDIR(inode->i_mode)) 525 if (!S_ISDIR(inode->i_mode))
520 return; 526 return;
@@ -525,6 +531,7 @@ void remove_dirty_dir_inode(struct inode *inode)
525 return; 531 return;
526 } 532 }
527 533
534 head = &sbi->dir_inode_list;
528 list_for_each(this, head) { 535 list_for_each(this, head) {
529 struct dir_inode_entry *entry; 536 struct dir_inode_entry *entry;
530 entry = list_entry(this, struct dir_inode_entry, list); 537 entry = list_entry(this, struct dir_inode_entry, list);
@@ -546,11 +553,13 @@ void remove_dirty_dir_inode(struct inode *inode)
546 553
547struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) 554struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino)
548{ 555{
549 struct list_head *head = &sbi->dir_inode_list; 556
550 struct list_head *this; 557 struct list_head *this, *head;
551 struct inode *inode = NULL; 558 struct inode *inode = NULL;
552 559
553 spin_lock(&sbi->dir_inode_lock); 560 spin_lock(&sbi->dir_inode_lock);
561
562 head = &sbi->dir_inode_list;
554 list_for_each(this, head) { 563 list_for_each(this, head) {
555 struct dir_inode_entry *entry; 564 struct dir_inode_entry *entry;
556 entry = list_entry(this, struct dir_inode_entry, list); 565 entry = list_entry(this, struct dir_inode_entry, list);
@@ -565,11 +574,13 @@ struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino)
565 574
566void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) 575void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
567{ 576{
568 struct list_head *head = &sbi->dir_inode_list; 577 struct list_head *head;
569 struct dir_inode_entry *entry; 578 struct dir_inode_entry *entry;
570 struct inode *inode; 579 struct inode *inode;
571retry: 580retry:
572 spin_lock(&sbi->dir_inode_lock); 581 spin_lock(&sbi->dir_inode_lock);
582
583 head = &sbi->dir_inode_list;
573 if (list_empty(head)) { 584 if (list_empty(head)) {
574 spin_unlock(&sbi->dir_inode_lock); 585 spin_unlock(&sbi->dir_inode_lock);
575 return; 586 return;
@@ -585,7 +596,7 @@ retry:
585 * We should submit bio, since it exists several 596 * We should submit bio, since it exists several
586 * wribacking dentry pages in the freeing inode. 597 * wribacking dentry pages in the freeing inode.
587 */ 598 */
588 f2fs_submit_bio(sbi, DATA, true); 599 f2fs_submit_merged_bio(sbi, DATA, WRITE);
589 } 600 }
590 goto retry; 601 goto retry;
591} 602}
@@ -760,8 +771,8 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
760 /* wait for previous submitted node/meta pages writeback */ 771 /* wait for previous submitted node/meta pages writeback */
761 wait_on_all_pages_writeback(sbi); 772 wait_on_all_pages_writeback(sbi);
762 773
763 filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX); 774 filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
764 filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX); 775 filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
765 776
766 /* update user_block_counts */ 777 /* update user_block_counts */
767 sbi->last_valid_block_count = sbi->total_valid_block_count; 778 sbi->last_valid_block_count = sbi->total_valid_block_count;
@@ -770,7 +781,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
770 /* Here, we only have one bio having CP pack */ 781 /* Here, we only have one bio having CP pack */
771 sync_meta_pages(sbi, META_FLUSH, LONG_MAX); 782 sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
772 783
773 if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { 784 if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
774 clear_prefree_segments(sbi); 785 clear_prefree_segments(sbi);
775 F2FS_RESET_SB_DIRT(sbi); 786 F2FS_RESET_SB_DIRT(sbi);
776 } 787 }
@@ -791,9 +802,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
791 802
792 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); 803 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops");
793 804
794 f2fs_submit_bio(sbi, DATA, true); 805 f2fs_submit_merged_bio(sbi, DATA, WRITE);
795 f2fs_submit_bio(sbi, NODE, true); 806 f2fs_submit_merged_bio(sbi, NODE, WRITE);
796 f2fs_submit_bio(sbi, META, true); 807 f2fs_submit_merged_bio(sbi, META, WRITE);
797 808
798 /* 809 /*
799 * update checkpoint pack index 810 * update checkpoint pack index
@@ -818,20 +829,28 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
818 829
819void init_orphan_info(struct f2fs_sb_info *sbi) 830void init_orphan_info(struct f2fs_sb_info *sbi)
820{ 831{
821 mutex_init(&sbi->orphan_inode_mutex); 832 spin_lock_init(&sbi->orphan_inode_lock);
822 INIT_LIST_HEAD(&sbi->orphan_inode_list); 833 INIT_LIST_HEAD(&sbi->orphan_inode_list);
823 sbi->n_orphans = 0; 834 sbi->n_orphans = 0;
835 /*
836 * considering 512 blocks in a segment 8 blocks are needed for cp
837 * and log segment summaries. Remaining blocks are used to keep
838 * orphan entries with the limitation one reserved segment
839 * for cp pack we can have max 1020*504 orphan entries
840 */
841 sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
842 * F2FS_ORPHANS_PER_BLOCK;
824} 843}
825 844
826int __init create_checkpoint_caches(void) 845int __init create_checkpoint_caches(void)
827{ 846{
828 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", 847 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
829 sizeof(struct orphan_inode_entry), NULL); 848 sizeof(struct orphan_inode_entry), NULL);
830 if (unlikely(!orphan_entry_slab)) 849 if (!orphan_entry_slab)
831 return -ENOMEM; 850 return -ENOMEM;
832 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", 851 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
833 sizeof(struct dir_inode_entry), NULL); 852 sizeof(struct dir_inode_entry), NULL);
834 if (unlikely(!inode_entry_slab)) { 853 if (!inode_entry_slab) {
835 kmem_cache_destroy(orphan_entry_slab); 854 kmem_cache_destroy(orphan_entry_slab);
836 return -ENOMEM; 855 return -ENOMEM;
837 } 856 }
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index aa3438c571fa..0ae558723506 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -24,6 +24,195 @@
24#include "segment.h" 24#include "segment.h"
25#include <trace/events/f2fs.h> 25#include <trace/events/f2fs.h>
26 26
27static void f2fs_read_end_io(struct bio *bio, int err)
28{
29 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
30 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
31
32 do {
33 struct page *page = bvec->bv_page;
34
35 if (--bvec >= bio->bi_io_vec)
36 prefetchw(&bvec->bv_page->flags);
37
38 if (unlikely(!uptodate)) {
39 ClearPageUptodate(page);
40 SetPageError(page);
41 } else {
42 SetPageUptodate(page);
43 }
44 unlock_page(page);
45 } while (bvec >= bio->bi_io_vec);
46
47 bio_put(bio);
48}
49
50static void f2fs_write_end_io(struct bio *bio, int err)
51{
52 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
53 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
54 struct f2fs_sb_info *sbi = F2FS_SB(bvec->bv_page->mapping->host->i_sb);
55
56 do {
57 struct page *page = bvec->bv_page;
58
59 if (--bvec >= bio->bi_io_vec)
60 prefetchw(&bvec->bv_page->flags);
61
62 if (unlikely(!uptodate)) {
63 SetPageError(page);
64 set_bit(AS_EIO, &page->mapping->flags);
65 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
66 sbi->sb->s_flags |= MS_RDONLY;
67 }
68 end_page_writeback(page);
69 dec_page_count(sbi, F2FS_WRITEBACK);
70 } while (bvec >= bio->bi_io_vec);
71
72 if (bio->bi_private)
73 complete(bio->bi_private);
74
75 if (!get_pages(sbi, F2FS_WRITEBACK) &&
76 !list_empty(&sbi->cp_wait.task_list))
77 wake_up(&sbi->cp_wait);
78
79 bio_put(bio);
80}
81
82/*
83 * Low-level block read/write IO operations.
84 */
85static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
86 int npages, bool is_read)
87{
88 struct bio *bio;
89
90 /* No failure on bio allocation */
91 bio = bio_alloc(GFP_NOIO, npages);
92
93 bio->bi_bdev = sbi->sb->s_bdev;
94 bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
95 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
96
97 return bio;
98}
99
100static void __submit_merged_bio(struct f2fs_bio_info *io)
101{
102 struct f2fs_io_info *fio = &io->fio;
103 int rw;
104
105 if (!io->bio)
106 return;
107
108 rw = fio->rw;
109
110 if (is_read_io(rw)) {
111 trace_f2fs_submit_read_bio(io->sbi->sb, rw,
112 fio->type, io->bio);
113 submit_bio(rw, io->bio);
114 } else {
115 trace_f2fs_submit_write_bio(io->sbi->sb, rw,
116 fio->type, io->bio);
117 /*
118 * META_FLUSH is only from the checkpoint procedure, and we
119 * should wait this metadata bio for FS consistency.
120 */
121 if (fio->type == META_FLUSH) {
122 DECLARE_COMPLETION_ONSTACK(wait);
123 io->bio->bi_private = &wait;
124 submit_bio(rw, io->bio);
125 wait_for_completion(&wait);
126 } else {
127 submit_bio(rw, io->bio);
128 }
129 }
130
131 io->bio = NULL;
132}
133
134void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
135 enum page_type type, int rw)
136{
137 enum page_type btype = PAGE_TYPE_OF_BIO(type);
138 struct f2fs_bio_info *io;
139
140 io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
141
142 mutex_lock(&io->io_mutex);
143
144 /* change META to META_FLUSH in the checkpoint procedure */
145 if (type >= META_FLUSH) {
146 io->fio.type = META_FLUSH;
147 io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
148 }
149 __submit_merged_bio(io);
150 mutex_unlock(&io->io_mutex);
151}
152
153/*
154 * Fill the locked page with data located in the block address.
155 * Return unlocked page.
156 */
157int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
158 block_t blk_addr, int rw)
159{
160 struct bio *bio;
161
162 trace_f2fs_submit_page_bio(page, blk_addr, rw);
163
164 /* Allocate a new bio */
165 bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw));
166
167 if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
168 bio_put(bio);
169 f2fs_put_page(page, 1);
170 return -EFAULT;
171 }
172
173 submit_bio(rw, bio);
174 return 0;
175}
176
177void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
178 block_t blk_addr, struct f2fs_io_info *fio)
179{
180 enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
181 struct f2fs_bio_info *io;
182 bool is_read = is_read_io(fio->rw);
183
184 io = is_read ? &sbi->read_io : &sbi->write_io[btype];
185
186 verify_block_addr(sbi, blk_addr);
187
188 mutex_lock(&io->io_mutex);
189
190 if (!is_read)
191 inc_page_count(sbi, F2FS_WRITEBACK);
192
193 if (io->bio && (io->last_block_in_bio != blk_addr - 1 ||
194 io->fio.rw != fio->rw))
195 __submit_merged_bio(io);
196alloc_new:
197 if (io->bio == NULL) {
198 int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
199
200 io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read);
201 io->fio = *fio;
202 }
203
204 if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) <
205 PAGE_CACHE_SIZE) {
206 __submit_merged_bio(io);
207 goto alloc_new;
208 }
209
210 io->last_block_in_bio = blk_addr;
211
212 mutex_unlock(&io->io_mutex);
213 trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr);
214}
215
27/* 216/*
28 * Lock ordering for the change of data block address: 217 * Lock ordering for the change of data block address:
29 * ->data_page 218 * ->data_page
@@ -37,7 +226,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
37 struct page *node_page = dn->node_page; 226 struct page *node_page = dn->node_page;
38 unsigned int ofs_in_node = dn->ofs_in_node; 227 unsigned int ofs_in_node = dn->ofs_in_node;
39 228
40 f2fs_wait_on_page_writeback(node_page, NODE, false); 229 f2fs_wait_on_page_writeback(node_page, NODE);
41 230
42 rn = F2FS_NODE(node_page); 231 rn = F2FS_NODE(node_page);
43 232
@@ -51,19 +240,39 @@ int reserve_new_block(struct dnode_of_data *dn)
51{ 240{
52 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 241 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
53 242
54 if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) 243 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
55 return -EPERM; 244 return -EPERM;
56 if (!inc_valid_block_count(sbi, dn->inode, 1)) 245 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
57 return -ENOSPC; 246 return -ENOSPC;
58 247
59 trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); 248 trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
60 249
61 __set_data_blkaddr(dn, NEW_ADDR); 250 __set_data_blkaddr(dn, NEW_ADDR);
62 dn->data_blkaddr = NEW_ADDR; 251 dn->data_blkaddr = NEW_ADDR;
252 mark_inode_dirty(dn->inode);
63 sync_inode_page(dn); 253 sync_inode_page(dn);
64 return 0; 254 return 0;
65} 255}
66 256
257int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
258{
259 bool need_put = dn->inode_page ? false : true;
260 int err;
261
262 /* if inode_page exists, index should be zero */
263 f2fs_bug_on(!need_put && index);
264
265 err = get_dnode_of_data(dn, index, ALLOC_NODE);
266 if (err)
267 return err;
268
269 if (dn->data_blkaddr == NULL_ADDR)
270 err = reserve_new_block(dn);
271 if (err || need_put)
272 f2fs_put_dnode(dn);
273 return err;
274}
275
67static int check_extent_cache(struct inode *inode, pgoff_t pgofs, 276static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
68 struct buffer_head *bh_result) 277 struct buffer_head *bh_result)
69{ 278{
@@ -71,6 +280,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
71 pgoff_t start_fofs, end_fofs; 280 pgoff_t start_fofs, end_fofs;
72 block_t start_blkaddr; 281 block_t start_blkaddr;
73 282
283 if (is_inode_flag_set(fi, FI_NO_EXTENT))
284 return 0;
285
74 read_lock(&fi->ext.ext_lock); 286 read_lock(&fi->ext.ext_lock);
75 if (fi->ext.len == 0) { 287 if (fi->ext.len == 0) {
76 read_unlock(&fi->ext.ext_lock); 288 read_unlock(&fi->ext.ext_lock);
@@ -109,6 +321,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
109 struct f2fs_inode_info *fi = F2FS_I(dn->inode); 321 struct f2fs_inode_info *fi = F2FS_I(dn->inode);
110 pgoff_t fofs, start_fofs, end_fofs; 322 pgoff_t fofs, start_fofs, end_fofs;
111 block_t start_blkaddr, end_blkaddr; 323 block_t start_blkaddr, end_blkaddr;
324 int need_update = true;
112 325
113 f2fs_bug_on(blk_addr == NEW_ADDR); 326 f2fs_bug_on(blk_addr == NEW_ADDR);
114 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + 327 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
@@ -117,6 +330,9 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
117 /* Update the page address in the parent node */ 330 /* Update the page address in the parent node */
118 __set_data_blkaddr(dn, blk_addr); 331 __set_data_blkaddr(dn, blk_addr);
119 332
333 if (is_inode_flag_set(fi, FI_NO_EXTENT))
334 return;
335
120 write_lock(&fi->ext.ext_lock); 336 write_lock(&fi->ext.ext_lock);
121 337
122 start_fofs = fi->ext.fofs; 338 start_fofs = fi->ext.fofs;
@@ -163,14 +379,21 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
163 fofs - start_fofs + 1; 379 fofs - start_fofs + 1;
164 fi->ext.len -= fofs - start_fofs + 1; 380 fi->ext.len -= fofs - start_fofs + 1;
165 } 381 }
166 goto end_update; 382 } else {
383 need_update = false;
167 } 384 }
168 write_unlock(&fi->ext.ext_lock);
169 return;
170 385
386 /* Finally, if the extent is very fragmented, let's drop the cache. */
387 if (fi->ext.len < F2FS_MIN_EXTENT_LEN) {
388 fi->ext.len = 0;
389 set_inode_flag(fi, FI_NO_EXTENT);
390 need_update = true;
391 }
171end_update: 392end_update:
172 write_unlock(&fi->ext.ext_lock); 393 write_unlock(&fi->ext.ext_lock);
173 sync_inode_page(dn); 394 if (need_update)
395 sync_inode_page(dn);
396 return;
174} 397}
175 398
176struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) 399struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
@@ -196,7 +419,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
196 return ERR_PTR(-ENOENT); 419 return ERR_PTR(-ENOENT);
197 420
198 /* By fallocate(), there is no cached page, but with NEW_ADDR */ 421 /* By fallocate(), there is no cached page, but with NEW_ADDR */
199 if (dn.data_blkaddr == NEW_ADDR) 422 if (unlikely(dn.data_blkaddr == NEW_ADDR))
200 return ERR_PTR(-EINVAL); 423 return ERR_PTR(-EINVAL);
201 424
202 page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); 425 page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
@@ -208,11 +431,14 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
208 return page; 431 return page;
209 } 432 }
210 433
211 err = f2fs_readpage(sbi, page, dn.data_blkaddr, 434 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
212 sync ? READ_SYNC : READA); 435 sync ? READ_SYNC : READA);
436 if (err)
437 return ERR_PTR(err);
438
213 if (sync) { 439 if (sync) {
214 wait_on_page_locked(page); 440 wait_on_page_locked(page);
215 if (!PageUptodate(page)) { 441 if (unlikely(!PageUptodate(page))) {
216 f2fs_put_page(page, 0); 442 f2fs_put_page(page, 0);
217 return ERR_PTR(-EIO); 443 return ERR_PTR(-EIO);
218 } 444 }
@@ -246,7 +472,7 @@ repeat:
246 } 472 }
247 f2fs_put_dnode(&dn); 473 f2fs_put_dnode(&dn);
248 474
249 if (dn.data_blkaddr == NULL_ADDR) { 475 if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
250 f2fs_put_page(page, 1); 476 f2fs_put_page(page, 1);
251 return ERR_PTR(-ENOENT); 477 return ERR_PTR(-ENOENT);
252 } 478 }
@@ -266,16 +492,16 @@ repeat:
266 return page; 492 return page;
267 } 493 }
268 494
269 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); 495 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC);
270 if (err) 496 if (err)
271 return ERR_PTR(err); 497 return ERR_PTR(err);
272 498
273 lock_page(page); 499 lock_page(page);
274 if (!PageUptodate(page)) { 500 if (unlikely(!PageUptodate(page))) {
275 f2fs_put_page(page, 1); 501 f2fs_put_page(page, 1);
276 return ERR_PTR(-EIO); 502 return ERR_PTR(-EIO);
277 } 503 }
278 if (page->mapping != mapping) { 504 if (unlikely(page->mapping != mapping)) {
279 f2fs_put_page(page, 1); 505 f2fs_put_page(page, 1);
280 goto repeat; 506 goto repeat;
281 } 507 }
@@ -286,12 +512,12 @@ repeat:
286 * Caller ensures that this data page is never allocated. 512 * Caller ensures that this data page is never allocated.
287 * A new zero-filled data page is allocated in the page cache. 513 * A new zero-filled data page is allocated in the page cache.
288 * 514 *
289 * Also, caller should grab and release a mutex by calling mutex_lock_op() and 515 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
290 * mutex_unlock_op(). 516 * f2fs_unlock_op().
291 * Note that, npage is set only by make_empty_dir. 517 * Note that, ipage is set only by make_empty_dir.
292 */ 518 */
293struct page *get_new_data_page(struct inode *inode, 519struct page *get_new_data_page(struct inode *inode,
294 struct page *npage, pgoff_t index, bool new_i_size) 520 struct page *ipage, pgoff_t index, bool new_i_size)
295{ 521{
296 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 522 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
297 struct address_space *mapping = inode->i_mapping; 523 struct address_space *mapping = inode->i_mapping;
@@ -299,24 +525,16 @@ struct page *get_new_data_page(struct inode *inode,
299 struct dnode_of_data dn; 525 struct dnode_of_data dn;
300 int err; 526 int err;
301 527
302 set_new_dnode(&dn, inode, npage, npage, 0); 528 set_new_dnode(&dn, inode, ipage, NULL, 0);
303 err = get_dnode_of_data(&dn, index, ALLOC_NODE); 529 err = f2fs_reserve_block(&dn, index);
304 if (err) 530 if (err)
305 return ERR_PTR(err); 531 return ERR_PTR(err);
306
307 if (dn.data_blkaddr == NULL_ADDR) {
308 if (reserve_new_block(&dn)) {
309 if (!npage)
310 f2fs_put_dnode(&dn);
311 return ERR_PTR(-ENOSPC);
312 }
313 }
314 if (!npage)
315 f2fs_put_dnode(&dn);
316repeat: 532repeat:
317 page = grab_cache_page(mapping, index); 533 page = grab_cache_page(mapping, index);
318 if (!page) 534 if (!page) {
319 return ERR_PTR(-ENOMEM); 535 err = -ENOMEM;
536 goto put_err;
537 }
320 538
321 if (PageUptodate(page)) 539 if (PageUptodate(page))
322 return page; 540 return page;
@@ -325,15 +543,18 @@ repeat:
325 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 543 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
326 SetPageUptodate(page); 544 SetPageUptodate(page);
327 } else { 545 } else {
328 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); 546 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
547 READ_SYNC);
329 if (err) 548 if (err)
330 return ERR_PTR(err); 549 goto put_err;
550
331 lock_page(page); 551 lock_page(page);
332 if (!PageUptodate(page)) { 552 if (unlikely(!PageUptodate(page))) {
333 f2fs_put_page(page, 1); 553 f2fs_put_page(page, 1);
334 return ERR_PTR(-EIO); 554 err = -EIO;
555 goto put_err;
335 } 556 }
336 if (page->mapping != mapping) { 557 if (unlikely(page->mapping != mapping)) {
337 f2fs_put_page(page, 1); 558 f2fs_put_page(page, 1);
338 goto repeat; 559 goto repeat;
339 } 560 }
@@ -344,140 +565,187 @@ repeat:
344 i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); 565 i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));
345 /* Only the directory inode sets new_i_size */ 566 /* Only the directory inode sets new_i_size */
346 set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); 567 set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
347 mark_inode_dirty_sync(inode);
348 } 568 }
349 return page; 569 return page;
350}
351
352static void read_end_io(struct bio *bio, int err)
353{
354 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
355 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
356 570
357 do { 571put_err:
358 struct page *page = bvec->bv_page; 572 f2fs_put_dnode(&dn);
359 573 return ERR_PTR(err);
360 if (--bvec >= bio->bi_io_vec)
361 prefetchw(&bvec->bv_page->flags);
362
363 if (uptodate) {
364 SetPageUptodate(page);
365 } else {
366 ClearPageUptodate(page);
367 SetPageError(page);
368 }
369 unlock_page(page);
370 } while (bvec >= bio->bi_io_vec);
371 bio_put(bio);
372} 574}
373 575
374/* 576static int __allocate_data_block(struct dnode_of_data *dn)
375 * Fill the locked page with data located in the block address.
376 * Return unlocked page.
377 */
378int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page,
379 block_t blk_addr, int type)
380{ 577{
381 struct block_device *bdev = sbi->sb->s_bdev; 578 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
382 struct bio *bio; 579 struct f2fs_summary sum;
580 block_t new_blkaddr;
581 struct node_info ni;
582 int type;
383 583
384 trace_f2fs_readpage(page, blk_addr, type); 584 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
585 return -EPERM;
586 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
587 return -ENOSPC;
385 588
386 down_read(&sbi->bio_sem); 589 __set_data_blkaddr(dn, NEW_ADDR);
590 dn->data_blkaddr = NEW_ADDR;
387 591
388 /* Allocate a new bio */ 592 get_node_info(sbi, dn->nid, &ni);
389 bio = f2fs_bio_alloc(bdev, 1); 593 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
390 594
391 /* Initialize the bio */ 595 type = CURSEG_WARM_DATA;
392 bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
393 bio->bi_end_io = read_end_io;
394 596
395 if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { 597 allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type);
396 bio_put(bio);
397 up_read(&sbi->bio_sem);
398 f2fs_put_page(page, 1);
399 return -EFAULT;
400 }
401 598
402 submit_bio(type, bio); 599 /* direct IO doesn't use extent cache to maximize the performance */
403 up_read(&sbi->bio_sem); 600 set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
601 update_extent_cache(new_blkaddr, dn);
602 clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
603
604 dn->data_blkaddr = new_blkaddr;
404 return 0; 605 return 0;
405} 606}
406 607
407/* 608/*
408 * This function should be used by the data read flow only where it 609 * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh.
409 * does not check the "create" flag that indicates block allocation. 610 * If original data blocks are allocated, then give them to blockdev.
410 * The reason for this special functionality is to exploit VFS readahead 611 * Otherwise,
411 * mechanism. 612 * a. preallocate requested block addresses
613 * b. do not use extent cache for better performance
614 * c. give the block addresses to blockdev
412 */ 615 */
413static int get_data_block_ro(struct inode *inode, sector_t iblock, 616static int get_data_block(struct inode *inode, sector_t iblock,
414 struct buffer_head *bh_result, int create) 617 struct buffer_head *bh_result, int create)
415{ 618{
619 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
416 unsigned int blkbits = inode->i_sb->s_blocksize_bits; 620 unsigned int blkbits = inode->i_sb->s_blocksize_bits;
417 unsigned maxblocks = bh_result->b_size >> blkbits; 621 unsigned maxblocks = bh_result->b_size >> blkbits;
418 struct dnode_of_data dn; 622 struct dnode_of_data dn;
419 pgoff_t pgofs; 623 int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
420 int err; 624 pgoff_t pgofs, end_offset;
625 int err = 0, ofs = 1;
626 bool allocated = false;
421 627
422 /* Get the page offset from the block offset(iblock) */ 628 /* Get the page offset from the block offset(iblock) */
423 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); 629 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
424 630
425 if (check_extent_cache(inode, pgofs, bh_result)) { 631 if (check_extent_cache(inode, pgofs, bh_result))
426 trace_f2fs_get_data_block(inode, iblock, bh_result, 0); 632 goto out;
427 return 0; 633
428 } 634 if (create)
635 f2fs_lock_op(sbi);
429 636
430 /* When reading holes, we need its node page */ 637 /* When reading holes, we need its node page */
431 set_new_dnode(&dn, inode, NULL, NULL, 0); 638 set_new_dnode(&dn, inode, NULL, NULL, 0);
432 err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); 639 err = get_dnode_of_data(&dn, pgofs, mode);
433 if (err) { 640 if (err) {
434 trace_f2fs_get_data_block(inode, iblock, bh_result, err); 641 if (err == -ENOENT)
435 return (err == -ENOENT) ? 0 : err; 642 err = 0;
643 goto unlock_out;
644 }
645 if (dn.data_blkaddr == NEW_ADDR)
646 goto put_out;
647
648 if (dn.data_blkaddr != NULL_ADDR) {
649 map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
650 } else if (create) {
651 err = __allocate_data_block(&dn);
652 if (err)
653 goto put_out;
654 allocated = true;
655 map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
656 } else {
657 goto put_out;
436 } 658 }
437 659
438 /* It does not support data allocation */ 660 end_offset = IS_INODE(dn.node_page) ?
439 f2fs_bug_on(create); 661 ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
662 bh_result->b_size = (((size_t)1) << blkbits);
663 dn.ofs_in_node++;
664 pgofs++;
665
666get_next:
667 if (dn.ofs_in_node >= end_offset) {
668 if (allocated)
669 sync_inode_page(&dn);
670 allocated = false;
671 f2fs_put_dnode(&dn);
440 672
441 if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) { 673 set_new_dnode(&dn, inode, NULL, NULL, 0);
442 int i; 674 err = get_dnode_of_data(&dn, pgofs, mode);
443 unsigned int end_offset; 675 if (err) {
676 if (err == -ENOENT)
677 err = 0;
678 goto unlock_out;
679 }
680 if (dn.data_blkaddr == NEW_ADDR)
681 goto put_out;
444 682
445 end_offset = IS_INODE(dn.node_page) ? 683 end_offset = IS_INODE(dn.node_page) ?
446 ADDRS_PER_INODE(F2FS_I(inode)) : 684 ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
447 ADDRS_PER_BLOCK; 685 }
448
449 clear_buffer_new(bh_result);
450 686
687 if (maxblocks > (bh_result->b_size >> blkbits)) {
688 block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
689 if (blkaddr == NULL_ADDR && create) {
690 err = __allocate_data_block(&dn);
691 if (err)
692 goto sync_out;
693 allocated = true;
694 blkaddr = dn.data_blkaddr;
695 }
451 /* Give more consecutive addresses for the read ahead */ 696 /* Give more consecutive addresses for the read ahead */
452 for (i = 0; i < end_offset - dn.ofs_in_node; i++) 697 if (blkaddr == (bh_result->b_blocknr + ofs)) {
453 if (((datablock_addr(dn.node_page, 698 ofs++;
454 dn.ofs_in_node + i)) 699 dn.ofs_in_node++;
455 != (dn.data_blkaddr + i)) || maxblocks == i) 700 pgofs++;
456 break; 701 bh_result->b_size += (((size_t)1) << blkbits);
457 map_bh(bh_result, inode->i_sb, dn.data_blkaddr); 702 goto get_next;
458 bh_result->b_size = (i << blkbits); 703 }
459 } 704 }
705sync_out:
706 if (allocated)
707 sync_inode_page(&dn);
708put_out:
460 f2fs_put_dnode(&dn); 709 f2fs_put_dnode(&dn);
461 trace_f2fs_get_data_block(inode, iblock, bh_result, 0); 710unlock_out:
462 return 0; 711 if (create)
712 f2fs_unlock_op(sbi);
713out:
714 trace_f2fs_get_data_block(inode, iblock, bh_result, err);
715 return err;
463} 716}
464 717
465static int f2fs_read_data_page(struct file *file, struct page *page) 718static int f2fs_read_data_page(struct file *file, struct page *page)
466{ 719{
467 return mpage_readpage(page, get_data_block_ro); 720 struct inode *inode = page->mapping->host;
721 int ret;
722
723 /* If the file has inline data, try to read it directlly */
724 if (f2fs_has_inline_data(inode))
725 ret = f2fs_read_inline_data(inode, page);
726 else
727 ret = mpage_readpage(page, get_data_block);
728
729 return ret;
468} 730}
469 731
470static int f2fs_read_data_pages(struct file *file, 732static int f2fs_read_data_pages(struct file *file,
471 struct address_space *mapping, 733 struct address_space *mapping,
472 struct list_head *pages, unsigned nr_pages) 734 struct list_head *pages, unsigned nr_pages)
473{ 735{
474 return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro); 736 struct inode *inode = file->f_mapping->host;
737
738 /* If the file has inline data, skip readpages */
739 if (f2fs_has_inline_data(inode))
740 return 0;
741
742 return mpage_readpages(mapping, pages, nr_pages, get_data_block);
475} 743}
476 744
477int do_write_data_page(struct page *page) 745int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
478{ 746{
479 struct inode *inode = page->mapping->host; 747 struct inode *inode = page->mapping->host;
480 block_t old_blk_addr, new_blk_addr; 748 block_t old_blkaddr, new_blkaddr;
481 struct dnode_of_data dn; 749 struct dnode_of_data dn;
482 int err = 0; 750 int err = 0;
483 751
@@ -486,10 +754,10 @@ int do_write_data_page(struct page *page)
486 if (err) 754 if (err)
487 return err; 755 return err;
488 756
489 old_blk_addr = dn.data_blkaddr; 757 old_blkaddr = dn.data_blkaddr;
490 758
491 /* This page is already truncated */ 759 /* This page is already truncated */
492 if (old_blk_addr == NULL_ADDR) 760 if (old_blkaddr == NULL_ADDR)
493 goto out_writepage; 761 goto out_writepage;
494 762
495 set_page_writeback(page); 763 set_page_writeback(page);
@@ -498,15 +766,13 @@ int do_write_data_page(struct page *page)
498 * If current allocation needs SSR, 766 * If current allocation needs SSR,
499 * it had better in-place writes for updated data. 767 * it had better in-place writes for updated data.
500 */ 768 */
501 if (unlikely(old_blk_addr != NEW_ADDR && 769 if (unlikely(old_blkaddr != NEW_ADDR &&
502 !is_cold_data(page) && 770 !is_cold_data(page) &&
503 need_inplace_update(inode))) { 771 need_inplace_update(inode))) {
504 rewrite_data_page(F2FS_SB(inode->i_sb), page, 772 rewrite_data_page(page, old_blkaddr, fio);
505 old_blk_addr);
506 } else { 773 } else {
507 write_data_page(inode, page, &dn, 774 write_data_page(page, &dn, &new_blkaddr, fio);
508 old_blk_addr, &new_blk_addr); 775 update_extent_cache(new_blkaddr, &dn);
509 update_extent_cache(new_blk_addr, &dn);
510 } 776 }
511out_writepage: 777out_writepage:
512 f2fs_put_dnode(&dn); 778 f2fs_put_dnode(&dn);
@@ -521,9 +787,13 @@ static int f2fs_write_data_page(struct page *page,
521 loff_t i_size = i_size_read(inode); 787 loff_t i_size = i_size_read(inode);
522 const pgoff_t end_index = ((unsigned long long) i_size) 788 const pgoff_t end_index = ((unsigned long long) i_size)
523 >> PAGE_CACHE_SHIFT; 789 >> PAGE_CACHE_SHIFT;
524 unsigned offset; 790 unsigned offset = 0;
525 bool need_balance_fs = false; 791 bool need_balance_fs = false;
526 int err = 0; 792 int err = 0;
793 struct f2fs_io_info fio = {
794 .type = DATA,
795 .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
796 };
527 797
528 if (page->index < end_index) 798 if (page->index < end_index)
529 goto write; 799 goto write;
@@ -543,7 +813,7 @@ static int f2fs_write_data_page(struct page *page,
543 813
544 zero_user_segment(page, offset, PAGE_CACHE_SIZE); 814 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
545write: 815write:
546 if (sbi->por_doing) { 816 if (unlikely(sbi->por_doing)) {
547 err = AOP_WRITEPAGE_ACTIVATE; 817 err = AOP_WRITEPAGE_ACTIVATE;
548 goto redirty_out; 818 goto redirty_out;
549 } 819 }
@@ -552,10 +822,18 @@ write:
552 if (S_ISDIR(inode->i_mode)) { 822 if (S_ISDIR(inode->i_mode)) {
553 dec_page_count(sbi, F2FS_DIRTY_DENTS); 823 dec_page_count(sbi, F2FS_DIRTY_DENTS);
554 inode_dec_dirty_dents(inode); 824 inode_dec_dirty_dents(inode);
555 err = do_write_data_page(page); 825 err = do_write_data_page(page, &fio);
556 } else { 826 } else {
557 f2fs_lock_op(sbi); 827 f2fs_lock_op(sbi);
558 err = do_write_data_page(page); 828
829 if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) {
830 err = f2fs_write_inline_data(inode, page, offset);
831 f2fs_unlock_op(sbi);
832 goto out;
833 } else {
834 err = do_write_data_page(page, &fio);
835 }
836
559 f2fs_unlock_op(sbi); 837 f2fs_unlock_op(sbi);
560 need_balance_fs = true; 838 need_balance_fs = true;
561 } 839 }
@@ -564,8 +842,10 @@ write:
564 else if (err) 842 else if (err)
565 goto redirty_out; 843 goto redirty_out;
566 844
567 if (wbc->for_reclaim) 845 if (wbc->for_reclaim) {
568 f2fs_submit_bio(sbi, DATA, true); 846 f2fs_submit_merged_bio(sbi, DATA, WRITE);
847 need_balance_fs = false;
848 }
569 849
570 clear_cold_data(page); 850 clear_cold_data(page);
571out: 851out:
@@ -617,7 +897,8 @@ static int f2fs_write_data_pages(struct address_space *mapping,
617 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); 897 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
618 if (locked) 898 if (locked)
619 mutex_unlock(&sbi->writepages); 899 mutex_unlock(&sbi->writepages);
620 f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL)); 900
901 f2fs_submit_merged_bio(sbi, DATA, WRITE);
621 902
622 remove_dirty_dir_inode(inode); 903 remove_dirty_dir_inode(inode);
623 904
@@ -638,27 +919,28 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
638 919
639 f2fs_balance_fs(sbi); 920 f2fs_balance_fs(sbi);
640repeat: 921repeat:
922 err = f2fs_convert_inline_data(inode, pos + len);
923 if (err)
924 return err;
925
641 page = grab_cache_page_write_begin(mapping, index, flags); 926 page = grab_cache_page_write_begin(mapping, index, flags);
642 if (!page) 927 if (!page)
643 return -ENOMEM; 928 return -ENOMEM;
644 *pagep = page; 929 *pagep = page;
645 930
646 f2fs_lock_op(sbi); 931 if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA)
932 goto inline_data;
647 933
934 f2fs_lock_op(sbi);
648 set_new_dnode(&dn, inode, NULL, NULL, 0); 935 set_new_dnode(&dn, inode, NULL, NULL, 0);
649 err = get_dnode_of_data(&dn, index, ALLOC_NODE); 936 err = f2fs_reserve_block(&dn, index);
650 if (err)
651 goto err;
652
653 if (dn.data_blkaddr == NULL_ADDR)
654 err = reserve_new_block(&dn);
655
656 f2fs_put_dnode(&dn);
657 if (err)
658 goto err;
659
660 f2fs_unlock_op(sbi); 937 f2fs_unlock_op(sbi);
661 938
939 if (err) {
940 f2fs_put_page(page, 1);
941 return err;
942 }
943inline_data:
662 if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) 944 if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
663 return 0; 945 return 0;
664 946
@@ -674,15 +956,19 @@ repeat:
674 if (dn.data_blkaddr == NEW_ADDR) { 956 if (dn.data_blkaddr == NEW_ADDR) {
675 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 957 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
676 } else { 958 } else {
677 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); 959 if (f2fs_has_inline_data(inode))
960 err = f2fs_read_inline_data(inode, page);
961 else
962 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
963 READ_SYNC);
678 if (err) 964 if (err)
679 return err; 965 return err;
680 lock_page(page); 966 lock_page(page);
681 if (!PageUptodate(page)) { 967 if (unlikely(!PageUptodate(page))) {
682 f2fs_put_page(page, 1); 968 f2fs_put_page(page, 1);
683 return -EIO; 969 return -EIO;
684 } 970 }
685 if (page->mapping != mapping) { 971 if (unlikely(page->mapping != mapping)) {
686 f2fs_put_page(page, 1); 972 f2fs_put_page(page, 1);
687 goto repeat; 973 goto repeat;
688 } 974 }
@@ -691,11 +977,6 @@ out:
691 SetPageUptodate(page); 977 SetPageUptodate(page);
692 clear_cold_data(page); 978 clear_cold_data(page);
693 return 0; 979 return 0;
694
695err:
696 f2fs_unlock_op(sbi);
697 f2fs_put_page(page, 1);
698 return err;
699} 980}
700 981
701static int f2fs_write_end(struct file *file, 982static int f2fs_write_end(struct file *file,
@@ -714,23 +995,43 @@ static int f2fs_write_end(struct file *file,
714 update_inode_page(inode); 995 update_inode_page(inode);
715 } 996 }
716 997
717 unlock_page(page); 998 f2fs_put_page(page, 1);
718 page_cache_release(page);
719 return copied; 999 return copied;
720} 1000}
721 1001
1002static int check_direct_IO(struct inode *inode, int rw,
1003 const struct iovec *iov, loff_t offset, unsigned long nr_segs)
1004{
1005 unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
1006 int i;
1007
1008 if (rw == READ)
1009 return 0;
1010
1011 if (offset & blocksize_mask)
1012 return -EINVAL;
1013
1014 for (i = 0; i < nr_segs; i++)
1015 if (iov[i].iov_len & blocksize_mask)
1016 return -EINVAL;
1017 return 0;
1018}
1019
722static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, 1020static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
723 const struct iovec *iov, loff_t offset, unsigned long nr_segs) 1021 const struct iovec *iov, loff_t offset, unsigned long nr_segs)
724{ 1022{
725 struct file *file = iocb->ki_filp; 1023 struct file *file = iocb->ki_filp;
726 struct inode *inode = file->f_mapping->host; 1024 struct inode *inode = file->f_mapping->host;
727 1025
728 if (rw == WRITE) 1026 /* Let buffer I/O handle the inline data case. */
1027 if (f2fs_has_inline_data(inode))
1028 return 0;
1029
1030 if (check_direct_IO(inode, rw, iov, offset, nr_segs))
729 return 0; 1031 return 0;
730 1032
731 /* Needs synchronization with the cleaner */
732 return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, 1033 return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
733 get_data_block_ro); 1034 get_data_block);
734} 1035}
735 1036
736static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, 1037static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
@@ -759,6 +1060,8 @@ static int f2fs_set_data_page_dirty(struct page *page)
759 trace_f2fs_set_page_dirty(page, DATA); 1060 trace_f2fs_set_page_dirty(page, DATA);
760 1061
761 SetPageUptodate(page); 1062 SetPageUptodate(page);
1063 mark_inode_dirty(inode);
1064
762 if (!PageDirty(page)) { 1065 if (!PageDirty(page)) {
763 __set_page_dirty_nobuffers(page); 1066 __set_page_dirty_nobuffers(page);
764 set_dirty_dir_page(inode, page); 1067 set_dirty_dir_page(inode, page);
@@ -769,7 +1072,7 @@ static int f2fs_set_data_page_dirty(struct page *page)
769 1072
770static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) 1073static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
771{ 1074{
772 return generic_block_bmap(mapping, block, get_data_block_ro); 1075 return generic_block_bmap(mapping, block, get_data_block);
773} 1076}
774 1077
775const struct address_space_operations f2fs_dblock_aops = { 1078const struct address_space_operations f2fs_dblock_aops = {
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a84b0a8e6854..3de9d20d0c14 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -24,7 +24,7 @@
24#include "gc.h" 24#include "gc.h"
25 25
26static LIST_HEAD(f2fs_stat_list); 26static LIST_HEAD(f2fs_stat_list);
27static struct dentry *debugfs_root; 27static struct dentry *f2fs_debugfs_root;
28static DEFINE_MUTEX(f2fs_stat_mutex); 28static DEFINE_MUTEX(f2fs_stat_mutex);
29 29
30static void update_general_status(struct f2fs_sb_info *sbi) 30static void update_general_status(struct f2fs_sb_info *sbi)
@@ -45,14 +45,15 @@ static void update_general_status(struct f2fs_sb_info *sbi)
45 si->valid_count = valid_user_blocks(sbi); 45 si->valid_count = valid_user_blocks(sbi);
46 si->valid_node_count = valid_node_count(sbi); 46 si->valid_node_count = valid_node_count(sbi);
47 si->valid_inode_count = valid_inode_count(sbi); 47 si->valid_inode_count = valid_inode_count(sbi);
48 si->inline_inode = sbi->inline_inode;
48 si->utilization = utilization(sbi); 49 si->utilization = utilization(sbi);
49 50
50 si->free_segs = free_segments(sbi); 51 si->free_segs = free_segments(sbi);
51 si->free_secs = free_sections(sbi); 52 si->free_secs = free_sections(sbi);
52 si->prefree_count = prefree_segments(sbi); 53 si->prefree_count = prefree_segments(sbi);
53 si->dirty_count = dirty_segments(sbi); 54 si->dirty_count = dirty_segments(sbi);
54 si->node_pages = sbi->node_inode->i_mapping->nrpages; 55 si->node_pages = NODE_MAPPING(sbi)->nrpages;
55 si->meta_pages = sbi->meta_inode->i_mapping->nrpages; 56 si->meta_pages = META_MAPPING(sbi)->nrpages;
56 si->nats = NM_I(sbi)->nat_cnt; 57 si->nats = NM_I(sbi)->nat_cnt;
57 si->sits = SIT_I(sbi)->dirty_sentries; 58 si->sits = SIT_I(sbi)->dirty_sentries;
58 si->fnids = NM_I(sbi)->fcnt; 59 si->fnids = NM_I(sbi)->fcnt;
@@ -165,9 +166,9 @@ get_cache:
165 /* free nids */ 166 /* free nids */
166 si->cache_mem = NM_I(sbi)->fcnt; 167 si->cache_mem = NM_I(sbi)->fcnt;
167 si->cache_mem += NM_I(sbi)->nat_cnt; 168 si->cache_mem += NM_I(sbi)->nat_cnt;
168 npages = sbi->node_inode->i_mapping->nrpages; 169 npages = NODE_MAPPING(sbi)->nrpages;
169 si->cache_mem += npages << PAGE_CACHE_SHIFT; 170 si->cache_mem += npages << PAGE_CACHE_SHIFT;
170 npages = sbi->meta_inode->i_mapping->nrpages; 171 npages = META_MAPPING(sbi)->nrpages;
171 si->cache_mem += npages << PAGE_CACHE_SHIFT; 172 si->cache_mem += npages << PAGE_CACHE_SHIFT;
172 si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry); 173 si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry);
173 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); 174 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
@@ -200,6 +201,8 @@ static int stat_show(struct seq_file *s, void *v)
200 seq_printf(s, "Other: %u)\n - Data: %u\n", 201 seq_printf(s, "Other: %u)\n - Data: %u\n",
201 si->valid_node_count - si->valid_inode_count, 202 si->valid_node_count - si->valid_inode_count,
202 si->valid_count - si->valid_node_count); 203 si->valid_count - si->valid_node_count);
204 seq_printf(s, " - Inline_data Inode: %u\n",
205 si->inline_inode);
203 seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", 206 seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
204 si->main_area_segs, si->main_area_sections, 207 si->main_area_segs, si->main_area_sections,
205 si->main_area_zones); 208 si->main_area_zones);
@@ -242,14 +245,14 @@ static int stat_show(struct seq_file *s, void *v)
242 seq_printf(s, " - node blocks : %d\n", si->node_blks); 245 seq_printf(s, " - node blocks : %d\n", si->node_blks);
243 seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", 246 seq_printf(s, "\nExtent Hit Ratio: %d / %d\n",
244 si->hit_ext, si->total_ext); 247 si->hit_ext, si->total_ext);
245 seq_printf(s, "\nBalancing F2FS Async:\n"); 248 seq_puts(s, "\nBalancing F2FS Async:\n");
246 seq_printf(s, " - nodes %4d in %4d\n", 249 seq_printf(s, " - nodes: %4d in %4d\n",
247 si->ndirty_node, si->node_pages); 250 si->ndirty_node, si->node_pages);
248 seq_printf(s, " - dents %4d in dirs:%4d\n", 251 seq_printf(s, " - dents: %4d in dirs:%4d\n",
249 si->ndirty_dent, si->ndirty_dirs); 252 si->ndirty_dent, si->ndirty_dirs);
250 seq_printf(s, " - meta %4d in %4d\n", 253 seq_printf(s, " - meta: %4d in %4d\n",
251 si->ndirty_meta, si->meta_pages); 254 si->ndirty_meta, si->meta_pages);
252 seq_printf(s, " - NATs %5d > %lu\n", 255 seq_printf(s, " - NATs: %5d > %lu\n",
253 si->nats, NM_WOUT_THRESHOLD); 256 si->nats, NM_WOUT_THRESHOLD);
254 seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n", 257 seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n",
255 si->sits, si->fnids); 258 si->sits, si->fnids);
@@ -340,14 +343,32 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
340 343
341void __init f2fs_create_root_stats(void) 344void __init f2fs_create_root_stats(void)
342{ 345{
343 debugfs_root = debugfs_create_dir("f2fs", NULL); 346 struct dentry *file;
344 if (debugfs_root) 347
345 debugfs_create_file("status", S_IRUGO, debugfs_root, 348 f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
346 NULL, &stat_fops); 349 if (!f2fs_debugfs_root)
350 goto bail;
351
352 file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
353 NULL, &stat_fops);
354 if (!file)
355 goto free_debugfs_dir;
356
357 return;
358
359free_debugfs_dir:
360 debugfs_remove(f2fs_debugfs_root);
361
362bail:
363 f2fs_debugfs_root = NULL;
364 return;
347} 365}
348 366
349void f2fs_destroy_root_stats(void) 367void f2fs_destroy_root_stats(void)
350{ 368{
351 debugfs_remove_recursive(debugfs_root); 369 if (!f2fs_debugfs_root)
352 debugfs_root = NULL; 370 return;
371
372 debugfs_remove_recursive(f2fs_debugfs_root);
373 f2fs_debugfs_root = NULL;
353} 374}
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 594fc1bb64ef..2b7c255bcbdf 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -190,9 +190,6 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
190 unsigned int max_depth; 190 unsigned int max_depth;
191 unsigned int level; 191 unsigned int level;
192 192
193 if (namelen > F2FS_NAME_LEN)
194 return NULL;
195
196 if (npages == 0) 193 if (npages == 0)
197 return NULL; 194 return NULL;
198 195
@@ -259,20 +256,17 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
259 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 256 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
260 mark_inode_dirty(dir); 257 mark_inode_dirty(dir);
261 258
262 /* update parent inode number before releasing dentry page */
263 F2FS_I(inode)->i_pino = dir->i_ino;
264
265 f2fs_put_page(page, 1); 259 f2fs_put_page(page, 1);
266} 260}
267 261
268static void init_dent_inode(const struct qstr *name, struct page *ipage) 262static void init_dent_inode(const struct qstr *name, struct page *ipage)
269{ 263{
270 struct f2fs_node *rn; 264 struct f2fs_inode *ri;
271 265
272 /* copy name info. to this inode page */ 266 /* copy name info. to this inode page */
273 rn = F2FS_NODE(ipage); 267 ri = F2FS_INODE(ipage);
274 rn->i.i_namelen = cpu_to_le32(name->len); 268 ri->i_namelen = cpu_to_le32(name->len);
275 memcpy(rn->i.i_name, name->name, name->len); 269 memcpy(ri->i_name, name->name, name->len);
276 set_page_dirty(ipage); 270 set_page_dirty(ipage);
277} 271}
278 272
@@ -348,11 +342,11 @@ static struct page *init_inode_metadata(struct inode *inode,
348 342
349 err = f2fs_init_acl(inode, dir, page); 343 err = f2fs_init_acl(inode, dir, page);
350 if (err) 344 if (err)
351 goto error; 345 goto put_error;
352 346
353 err = f2fs_init_security(inode, dir, name, page); 347 err = f2fs_init_security(inode, dir, name, page);
354 if (err) 348 if (err)
355 goto error; 349 goto put_error;
356 350
357 wait_on_page_writeback(page); 351 wait_on_page_writeback(page);
358 } else { 352 } else {
@@ -376,8 +370,9 @@ static struct page *init_inode_metadata(struct inode *inode,
376 } 370 }
377 return page; 371 return page;
378 372
379error: 373put_error:
380 f2fs_put_page(page, 1); 374 f2fs_put_page(page, 1);
375error:
381 remove_inode_page(inode); 376 remove_inode_page(inode);
382 return ERR_PTR(err); 377 return ERR_PTR(err);
383} 378}
@@ -393,6 +388,8 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode,
393 clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); 388 clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
394 } 389 }
395 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 390 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
391 mark_inode_dirty(dir);
392
396 if (F2FS_I(dir)->i_current_depth != current_depth) { 393 if (F2FS_I(dir)->i_current_depth != current_depth) {
397 F2FS_I(dir)->i_current_depth = current_depth; 394 F2FS_I(dir)->i_current_depth = current_depth;
398 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); 395 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
@@ -400,8 +397,6 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode,
400 397
401 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) 398 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR))
402 update_inode_page(dir); 399 update_inode_page(dir);
403 else
404 mark_inode_dirty(dir);
405 400
406 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) 401 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
407 clear_inode_flag(F2FS_I(inode), FI_INC_LINK); 402 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
@@ -432,10 +427,11 @@ next:
432} 427}
433 428
434/* 429/*
435 * Caller should grab and release a mutex by calling mutex_lock_op() and 430 * Caller should grab and release a rwsem by calling f2fs_lock_op() and
436 * mutex_unlock_op(). 431 * f2fs_unlock_op().
437 */ 432 */
438int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode) 433int __f2fs_add_link(struct inode *dir, const struct qstr *name,
434 struct inode *inode)
439{ 435{
440 unsigned int bit_pos; 436 unsigned int bit_pos;
441 unsigned int level; 437 unsigned int level;
@@ -461,7 +457,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *in
461 } 457 }
462 458
463start: 459start:
464 if (current_depth == MAX_DIR_HASH_DEPTH) 460 if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
465 return -ENOSPC; 461 return -ENOSPC;
466 462
467 /* Increase the depth, if required */ 463 /* Increase the depth, if required */
@@ -554,14 +550,11 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
554 550
555 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 551 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
556 552
557 if (inode && S_ISDIR(inode->i_mode)) {
558 drop_nlink(dir);
559 update_inode_page(dir);
560 } else {
561 mark_inode_dirty(dir);
562 }
563
564 if (inode) { 553 if (inode) {
554 if (S_ISDIR(inode->i_mode)) {
555 drop_nlink(dir);
556 update_inode_page(dir);
557 }
565 inode->i_ctime = CURRENT_TIME; 558 inode->i_ctime = CURRENT_TIME;
566 drop_nlink(inode); 559 drop_nlink(inode);
567 if (S_ISDIR(inode->i_mode)) { 560 if (S_ISDIR(inode->i_mode)) {
@@ -636,7 +629,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
636 629
637 bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); 630 bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK);
638 631
639 for ( ; n < npages; n++) { 632 for (; n < npages; n++) {
640 dentry_page = get_lock_data_page(inode, n); 633 dentry_page = get_lock_data_page(inode, n);
641 if (IS_ERR(dentry_page)) 634 if (IS_ERR(dentry_page))
642 continue; 635 continue;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 89dc7508faf2..af51a0bd2dee 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -22,8 +22,10 @@
22 22
23#ifdef CONFIG_F2FS_CHECK_FS 23#ifdef CONFIG_F2FS_CHECK_FS
24#define f2fs_bug_on(condition) BUG_ON(condition) 24#define f2fs_bug_on(condition) BUG_ON(condition)
25#define f2fs_down_write(x, y) down_write_nest_lock(x, y)
25#else 26#else
26#define f2fs_bug_on(condition) 27#define f2fs_bug_on(condition)
28#define f2fs_down_write(x, y) down_write(x)
27#endif 29#endif
28 30
29/* 31/*
@@ -37,6 +39,7 @@
37#define F2FS_MOUNT_POSIX_ACL 0x00000020 39#define F2FS_MOUNT_POSIX_ACL 0x00000020
38#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 40#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040
39#define F2FS_MOUNT_INLINE_XATTR 0x00000080 41#define F2FS_MOUNT_INLINE_XATTR 0x00000080
42#define F2FS_MOUNT_INLINE_DATA 0x00000100
40 43
41#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) 44#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
42#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) 45#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -97,6 +100,13 @@ struct dir_inode_entry {
97 struct inode *inode; /* vfs inode pointer */ 100 struct inode *inode; /* vfs inode pointer */
98}; 101};
99 102
103/* for the list of blockaddresses to be discarded */
104struct discard_entry {
105 struct list_head list; /* list head */
106 block_t blkaddr; /* block address to be discarded */
107 int len; /* # of consecutive blocks of the discard */
108};
109
100/* for the list of fsync inodes, used only during recovery */ 110/* for the list of fsync inodes, used only during recovery */
101struct fsync_inode_entry { 111struct fsync_inode_entry {
102 struct list_head list; /* list head */ 112 struct list_head list; /* list head */
@@ -155,13 +165,15 @@ enum {
155 LOOKUP_NODE, /* look up a node without readahead */ 165 LOOKUP_NODE, /* look up a node without readahead */
156 LOOKUP_NODE_RA, /* 166 LOOKUP_NODE_RA, /*
157 * look up a node with readahead called 167 * look up a node with readahead called
158 * by get_datablock_ro. 168 * by get_data_block.
159 */ 169 */
160}; 170};
161 171
162#define F2FS_LINK_MAX 32000 /* maximum link count per file */ 172#define F2FS_LINK_MAX 32000 /* maximum link count per file */
163 173
164/* for in-memory extent cache entry */ 174/* for in-memory extent cache entry */
175#define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */
176
165struct extent_info { 177struct extent_info {
166 rwlock_t ext_lock; /* rwlock for consistency */ 178 rwlock_t ext_lock; /* rwlock for consistency */
167 unsigned int fofs; /* start offset in a file */ 179 unsigned int fofs; /* start offset in a file */
@@ -308,6 +320,14 @@ struct f2fs_sm_info {
308 320
309 /* a threshold to reclaim prefree segments */ 321 /* a threshold to reclaim prefree segments */
310 unsigned int rec_prefree_segments; 322 unsigned int rec_prefree_segments;
323
324 /* for small discard management */
325 struct list_head discard_list; /* 4KB discard list */
326 int nr_discards; /* # of discards in the list */
327 int max_discards; /* max. discards to be issued */
328
329 unsigned int ipu_policy; /* in-place-update policy */
330 unsigned int min_ipu_util; /* in-place-update threshold */
311}; 331};
312 332
313/* 333/*
@@ -338,6 +358,7 @@ enum count_type {
338 * with waiting the bio's completion 358 * with waiting the bio's completion
339 * ... Only can be used with META. 359 * ... Only can be used with META.
340 */ 360 */
361#define PAGE_TYPE_OF_BIO(type) ((type) > META ? META : (type))
341enum page_type { 362enum page_type {
342 DATA, 363 DATA,
343 NODE, 364 NODE,
@@ -346,6 +367,20 @@ enum page_type {
346 META_FLUSH, 367 META_FLUSH,
347}; 368};
348 369
370struct f2fs_io_info {
371 enum page_type type; /* contains DATA/NODE/META/META_FLUSH */
372 int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */
373};
374
375#define is_read_io(rw) (((rw) & 1) == READ)
376struct f2fs_bio_info {
377 struct f2fs_sb_info *sbi; /* f2fs superblock */
378 struct bio *bio; /* bios to merge */
379 sector_t last_block_in_bio; /* last block number */
380 struct f2fs_io_info fio; /* store buffered io info. */
381 struct mutex io_mutex; /* mutex for bio */
382};
383
349struct f2fs_sb_info { 384struct f2fs_sb_info {
350 struct super_block *sb; /* pointer to VFS super block */ 385 struct super_block *sb; /* pointer to VFS super block */
351 struct proc_dir_entry *s_proc; /* proc entry */ 386 struct proc_dir_entry *s_proc; /* proc entry */
@@ -359,9 +394,10 @@ struct f2fs_sb_info {
359 394
360 /* for segment-related operations */ 395 /* for segment-related operations */
361 struct f2fs_sm_info *sm_info; /* segment manager */ 396 struct f2fs_sm_info *sm_info; /* segment manager */
362 struct bio *bio[NR_PAGE_TYPE]; /* bios to merge */ 397
363 sector_t last_block_in_bio[NR_PAGE_TYPE]; /* last block number */ 398 /* for bio operations */
364 struct rw_semaphore bio_sem; /* IO semaphore */ 399 struct f2fs_bio_info read_io; /* for read bios */
400 struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */
365 401
366 /* for checkpoint */ 402 /* for checkpoint */
367 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ 403 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
@@ -376,8 +412,9 @@ struct f2fs_sb_info {
376 412
377 /* for orphan inode management */ 413 /* for orphan inode management */
378 struct list_head orphan_inode_list; /* orphan inode list */ 414 struct list_head orphan_inode_list; /* orphan inode list */
379 struct mutex orphan_inode_mutex; /* for orphan inode list */ 415 spinlock_t orphan_inode_lock; /* for orphan inode list */
380 unsigned int n_orphans; /* # of orphan inodes */ 416 unsigned int n_orphans; /* # of orphan inodes */
417 unsigned int max_orphans; /* max orphan inodes */
381 418
382 /* for directory inode management */ 419 /* for directory inode management */
383 struct list_head dir_inode_list; /* dir inode list */ 420 struct list_head dir_inode_list; /* dir inode list */
@@ -414,6 +451,9 @@ struct f2fs_sb_info {
414 struct f2fs_gc_kthread *gc_thread; /* GC thread */ 451 struct f2fs_gc_kthread *gc_thread; /* GC thread */
415 unsigned int cur_victim_sec; /* current victim section num */ 452 unsigned int cur_victim_sec; /* current victim section num */
416 453
454 /* maximum # of trials to find a victim segment for SSR and GC */
455 unsigned int max_victim_search;
456
417 /* 457 /*
418 * for stat information. 458 * for stat information.
419 * one is for the LFS mode, and the other is for the SSR mode. 459 * one is for the LFS mode, and the other is for the SSR mode.
@@ -423,6 +463,7 @@ struct f2fs_sb_info {
423 unsigned int segment_count[2]; /* # of allocated segments */ 463 unsigned int segment_count[2]; /* # of allocated segments */
424 unsigned int block_count[2]; /* # of allocated blocks */ 464 unsigned int block_count[2]; /* # of allocated blocks */
425 int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ 465 int total_hit_ext, read_hit_ext; /* extent cache hit ratio */
466 int inline_inode; /* # of inline_data inodes */
426 int bg_gc; /* background gc calls */ 467 int bg_gc; /* background gc calls */
427 unsigned int n_dirty_dirs; /* # of dir inodes */ 468 unsigned int n_dirty_dirs; /* # of dir inodes */
428#endif 469#endif
@@ -462,6 +503,11 @@ static inline struct f2fs_node *F2FS_NODE(struct page *page)
462 return (struct f2fs_node *)page_address(page); 503 return (struct f2fs_node *)page_address(page);
463} 504}
464 505
506static inline struct f2fs_inode *F2FS_INODE(struct page *page)
507{
508 return &((struct f2fs_node *)page_address(page))->i;
509}
510
465static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi) 511static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi)
466{ 512{
467 return (struct f2fs_nm_info *)(sbi->nm_info); 513 return (struct f2fs_nm_info *)(sbi->nm_info);
@@ -487,6 +533,16 @@ static inline struct dirty_seglist_info *DIRTY_I(struct f2fs_sb_info *sbi)
487 return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info); 533 return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info);
488} 534}
489 535
536static inline struct address_space *META_MAPPING(struct f2fs_sb_info *sbi)
537{
538 return sbi->meta_inode->i_mapping;
539}
540
541static inline struct address_space *NODE_MAPPING(struct f2fs_sb_info *sbi)
542{
543 return sbi->node_inode->i_mapping;
544}
545
490static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi) 546static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi)
491{ 547{
492 sbi->s_dirty = 1; 548 sbi->s_dirty = 1;
@@ -534,7 +590,7 @@ static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
534 590
535static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) 591static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
536{ 592{
537 down_write_nest_lock(&sbi->cp_rwsem, &sbi->cp_mutex); 593 f2fs_down_write(&sbi->cp_rwsem, &sbi->cp_mutex);
538} 594}
539 595
540static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) 596static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
@@ -548,7 +604,7 @@ static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
548static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) 604static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
549{ 605{
550 WARN_ON((nid >= NM_I(sbi)->max_nid)); 606 WARN_ON((nid >= NM_I(sbi)->max_nid));
551 if (nid >= NM_I(sbi)->max_nid) 607 if (unlikely(nid >= NM_I(sbi)->max_nid))
552 return -EINVAL; 608 return -EINVAL;
553 return 0; 609 return 0;
554} 610}
@@ -561,9 +617,9 @@ static inline int check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
561static inline int F2FS_HAS_BLOCKS(struct inode *inode) 617static inline int F2FS_HAS_BLOCKS(struct inode *inode)
562{ 618{
563 if (F2FS_I(inode)->i_xattr_nid) 619 if (F2FS_I(inode)->i_xattr_nid)
564 return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1); 620 return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1;
565 else 621 else
566 return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS); 622 return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS;
567} 623}
568 624
569static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, 625static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
@@ -574,7 +630,7 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
574 spin_lock(&sbi->stat_lock); 630 spin_lock(&sbi->stat_lock);
575 valid_block_count = 631 valid_block_count =
576 sbi->total_valid_block_count + (block_t)count; 632 sbi->total_valid_block_count + (block_t)count;
577 if (valid_block_count > sbi->user_block_count) { 633 if (unlikely(valid_block_count > sbi->user_block_count)) {
578 spin_unlock(&sbi->stat_lock); 634 spin_unlock(&sbi->stat_lock);
579 return false; 635 return false;
580 } 636 }
@@ -585,7 +641,7 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
585 return true; 641 return true;
586} 642}
587 643
588static inline int dec_valid_block_count(struct f2fs_sb_info *sbi, 644static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
589 struct inode *inode, 645 struct inode *inode,
590 blkcnt_t count) 646 blkcnt_t count)
591{ 647{
@@ -595,7 +651,6 @@ static inline int dec_valid_block_count(struct f2fs_sb_info *sbi,
595 inode->i_blocks -= count; 651 inode->i_blocks -= count;
596 sbi->total_valid_block_count -= (block_t)count; 652 sbi->total_valid_block_count -= (block_t)count;
597 spin_unlock(&sbi->stat_lock); 653 spin_unlock(&sbi->stat_lock);
598 return 0;
599} 654}
600 655
601static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) 656static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
@@ -686,50 +741,48 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)
686} 741}
687 742
688static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, 743static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi,
689 struct inode *inode, 744 struct inode *inode)
690 unsigned int count)
691{ 745{
692 block_t valid_block_count; 746 block_t valid_block_count;
693 unsigned int valid_node_count; 747 unsigned int valid_node_count;
694 748
695 spin_lock(&sbi->stat_lock); 749 spin_lock(&sbi->stat_lock);
696 750
697 valid_block_count = sbi->total_valid_block_count + (block_t)count; 751 valid_block_count = sbi->total_valid_block_count + 1;
698 sbi->alloc_valid_block_count += (block_t)count; 752 if (unlikely(valid_block_count > sbi->user_block_count)) {
699 valid_node_count = sbi->total_valid_node_count + count;
700
701 if (valid_block_count > sbi->user_block_count) {
702 spin_unlock(&sbi->stat_lock); 753 spin_unlock(&sbi->stat_lock);
703 return false; 754 return false;
704 } 755 }
705 756
706 if (valid_node_count > sbi->total_node_count) { 757 valid_node_count = sbi->total_valid_node_count + 1;
758 if (unlikely(valid_node_count > sbi->total_node_count)) {
707 spin_unlock(&sbi->stat_lock); 759 spin_unlock(&sbi->stat_lock);
708 return false; 760 return false;
709 } 761 }
710 762
711 if (inode) 763 if (inode)
712 inode->i_blocks += count; 764 inode->i_blocks++;
713 sbi->total_valid_node_count = valid_node_count; 765
714 sbi->total_valid_block_count = valid_block_count; 766 sbi->alloc_valid_block_count++;
767 sbi->total_valid_node_count++;
768 sbi->total_valid_block_count++;
715 spin_unlock(&sbi->stat_lock); 769 spin_unlock(&sbi->stat_lock);
716 770
717 return true; 771 return true;
718} 772}
719 773
720static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, 774static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
721 struct inode *inode, 775 struct inode *inode)
722 unsigned int count)
723{ 776{
724 spin_lock(&sbi->stat_lock); 777 spin_lock(&sbi->stat_lock);
725 778
726 f2fs_bug_on(sbi->total_valid_block_count < count); 779 f2fs_bug_on(!sbi->total_valid_block_count);
727 f2fs_bug_on(sbi->total_valid_node_count < count); 780 f2fs_bug_on(!sbi->total_valid_node_count);
728 f2fs_bug_on(inode->i_blocks < count); 781 f2fs_bug_on(!inode->i_blocks);
729 782
730 inode->i_blocks -= count; 783 inode->i_blocks--;
731 sbi->total_valid_node_count -= count; 784 sbi->total_valid_node_count--;
732 sbi->total_valid_block_count -= (block_t)count; 785 sbi->total_valid_block_count--;
733 786
734 spin_unlock(&sbi->stat_lock); 787 spin_unlock(&sbi->stat_lock);
735} 788}
@@ -751,13 +804,12 @@ static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
751 spin_unlock(&sbi->stat_lock); 804 spin_unlock(&sbi->stat_lock);
752} 805}
753 806
754static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi) 807static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi)
755{ 808{
756 spin_lock(&sbi->stat_lock); 809 spin_lock(&sbi->stat_lock);
757 f2fs_bug_on(!sbi->total_valid_inode_count); 810 f2fs_bug_on(!sbi->total_valid_inode_count);
758 sbi->total_valid_inode_count--; 811 sbi->total_valid_inode_count--;
759 spin_unlock(&sbi->stat_lock); 812 spin_unlock(&sbi->stat_lock);
760 return 0;
761} 813}
762 814
763static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) 815static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi)
@@ -771,7 +823,7 @@ static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi)
771 823
772static inline void f2fs_put_page(struct page *page, int unlock) 824static inline void f2fs_put_page(struct page *page, int unlock)
773{ 825{
774 if (!page || IS_ERR(page)) 826 if (!page)
775 return; 827 return;
776 828
777 if (unlock) { 829 if (unlock) {
@@ -876,7 +928,9 @@ enum {
876 FI_NO_ALLOC, /* should not allocate any blocks */ 928 FI_NO_ALLOC, /* should not allocate any blocks */
877 FI_UPDATE_DIR, /* should update inode block for consistency */ 929 FI_UPDATE_DIR, /* should update inode block for consistency */
878 FI_DELAY_IPUT, /* used for the recovery */ 930 FI_DELAY_IPUT, /* used for the recovery */
931 FI_NO_EXTENT, /* not to use the extent cache */
879 FI_INLINE_XATTR, /* used for inline xattr */ 932 FI_INLINE_XATTR, /* used for inline xattr */
933 FI_INLINE_DATA, /* used for inline data*/
880}; 934};
881 935
882static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) 936static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
@@ -914,6 +968,8 @@ static inline void get_inline_info(struct f2fs_inode_info *fi,
914{ 968{
915 if (ri->i_inline & F2FS_INLINE_XATTR) 969 if (ri->i_inline & F2FS_INLINE_XATTR)
916 set_inode_flag(fi, FI_INLINE_XATTR); 970 set_inode_flag(fi, FI_INLINE_XATTR);
971 if (ri->i_inline & F2FS_INLINE_DATA)
972 set_inode_flag(fi, FI_INLINE_DATA);
917} 973}
918 974
919static inline void set_raw_inline(struct f2fs_inode_info *fi, 975static inline void set_raw_inline(struct f2fs_inode_info *fi,
@@ -923,6 +979,8 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi,
923 979
924 if (is_inode_flag_set(fi, FI_INLINE_XATTR)) 980 if (is_inode_flag_set(fi, FI_INLINE_XATTR))
925 ri->i_inline |= F2FS_INLINE_XATTR; 981 ri->i_inline |= F2FS_INLINE_XATTR;
982 if (is_inode_flag_set(fi, FI_INLINE_DATA))
983 ri->i_inline |= F2FS_INLINE_DATA;
926} 984}
927 985
928static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi) 986static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi)
@@ -948,6 +1006,18 @@ static inline int inline_xattr_size(struct inode *inode)
948 return 0; 1006 return 0;
949} 1007}
950 1008
1009static inline int f2fs_has_inline_data(struct inode *inode)
1010{
1011 return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA);
1012}
1013
1014static inline void *inline_data_addr(struct page *page)
1015{
1016 struct f2fs_inode *ri;
1017 ri = (struct f2fs_inode *)page_address(page);
1018 return (void *)&(ri->i_addr[1]);
1019}
1020
951static inline int f2fs_readonly(struct super_block *sb) 1021static inline int f2fs_readonly(struct super_block *sb)
952{ 1022{
953 return sb->s_flags & MS_RDONLY; 1023 return sb->s_flags & MS_RDONLY;
@@ -958,6 +1028,7 @@ static inline int f2fs_readonly(struct super_block *sb)
958 */ 1028 */
959int f2fs_sync_file(struct file *, loff_t, loff_t, int); 1029int f2fs_sync_file(struct file *, loff_t, loff_t, int);
960void truncate_data_blocks(struct dnode_of_data *); 1030void truncate_data_blocks(struct dnode_of_data *);
1031int truncate_blocks(struct inode *, u64);
961void f2fs_truncate(struct inode *); 1032void f2fs_truncate(struct inode *);
962int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 1033int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
963int f2fs_setattr(struct dentry *, struct iattr *); 1034int f2fs_setattr(struct dentry *, struct iattr *);
@@ -1027,7 +1098,7 @@ int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
1027int truncate_inode_blocks(struct inode *, pgoff_t); 1098int truncate_inode_blocks(struct inode *, pgoff_t);
1028int truncate_xattr_node(struct inode *, struct page *); 1099int truncate_xattr_node(struct inode *, struct page *);
1029int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); 1100int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
1030int remove_inode_page(struct inode *); 1101void remove_inode_page(struct inode *);
1031struct page *new_inode_page(struct inode *, const struct qstr *); 1102struct page *new_inode_page(struct inode *, const struct qstr *);
1032struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); 1103struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
1033void ra_node_page(struct f2fs_sb_info *, nid_t); 1104void ra_node_page(struct f2fs_sb_info *, nid_t);
@@ -1059,19 +1130,19 @@ void clear_prefree_segments(struct f2fs_sb_info *);
1059int npages_for_summary_flush(struct f2fs_sb_info *); 1130int npages_for_summary_flush(struct f2fs_sb_info *);
1060void allocate_new_segments(struct f2fs_sb_info *); 1131void allocate_new_segments(struct f2fs_sb_info *);
1061struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); 1132struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
1062struct bio *f2fs_bio_alloc(struct block_device *, int);
1063void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool);
1064void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool);
1065void write_meta_page(struct f2fs_sb_info *, struct page *); 1133void write_meta_page(struct f2fs_sb_info *, struct page *);
1066void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, 1134void write_node_page(struct f2fs_sb_info *, struct page *,
1067 block_t, block_t *); 1135 struct f2fs_io_info *, unsigned int, block_t, block_t *);
1068void write_data_page(struct inode *, struct page *, struct dnode_of_data*, 1136void write_data_page(struct page *, struct dnode_of_data *, block_t *,
1069 block_t, block_t *); 1137 struct f2fs_io_info *);
1070void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t); 1138void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *);
1071void recover_data_page(struct f2fs_sb_info *, struct page *, 1139void recover_data_page(struct f2fs_sb_info *, struct page *,
1072 struct f2fs_summary *, block_t, block_t); 1140 struct f2fs_summary *, block_t, block_t);
1073void rewrite_node_page(struct f2fs_sb_info *, struct page *, 1141void rewrite_node_page(struct f2fs_sb_info *, struct page *,
1074 struct f2fs_summary *, block_t, block_t); 1142 struct f2fs_summary *, block_t, block_t);
1143void allocate_data_block(struct f2fs_sb_info *, struct page *,
1144 block_t, block_t *, struct f2fs_summary *, int);
1145void f2fs_wait_on_page_writeback(struct page *, enum page_type);
1075void write_data_summaries(struct f2fs_sb_info *, block_t); 1146void write_data_summaries(struct f2fs_sb_info *, block_t);
1076void write_node_summaries(struct f2fs_sb_info *, block_t); 1147void write_node_summaries(struct f2fs_sb_info *, block_t);
1077int lookup_journal_in_cursum(struct f2fs_summary_block *, 1148int lookup_journal_in_cursum(struct f2fs_summary_block *,
@@ -1079,6 +1150,8 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *,
1079void flush_sit_entries(struct f2fs_sb_info *); 1150void flush_sit_entries(struct f2fs_sb_info *);
1080int build_segment_manager(struct f2fs_sb_info *); 1151int build_segment_manager(struct f2fs_sb_info *);
1081void destroy_segment_manager(struct f2fs_sb_info *); 1152void destroy_segment_manager(struct f2fs_sb_info *);
1153int __init create_segment_manager_caches(void);
1154void destroy_segment_manager_caches(void);
1082 1155
1083/* 1156/*
1084 * checkpoint.c 1157 * checkpoint.c
@@ -1090,7 +1163,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *);
1090void release_orphan_inode(struct f2fs_sb_info *); 1163void release_orphan_inode(struct f2fs_sb_info *);
1091void add_orphan_inode(struct f2fs_sb_info *, nid_t); 1164void add_orphan_inode(struct f2fs_sb_info *, nid_t);
1092void remove_orphan_inode(struct f2fs_sb_info *, nid_t); 1165void remove_orphan_inode(struct f2fs_sb_info *, nid_t);
1093int recover_orphan_inodes(struct f2fs_sb_info *); 1166void recover_orphan_inodes(struct f2fs_sb_info *);
1094int get_valid_checkpoint(struct f2fs_sb_info *); 1167int get_valid_checkpoint(struct f2fs_sb_info *);
1095void set_dirty_dir_page(struct inode *, struct page *); 1168void set_dirty_dir_page(struct inode *, struct page *);
1096void add_dirty_dir_inode(struct inode *); 1169void add_dirty_dir_inode(struct inode *);
@@ -1105,13 +1178,17 @@ void destroy_checkpoint_caches(void);
1105/* 1178/*
1106 * data.c 1179 * data.c
1107 */ 1180 */
1181void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int);
1182int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, block_t, int);
1183void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, block_t,
1184 struct f2fs_io_info *);
1108int reserve_new_block(struct dnode_of_data *); 1185int reserve_new_block(struct dnode_of_data *);
1186int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
1109void update_extent_cache(block_t, struct dnode_of_data *); 1187void update_extent_cache(block_t, struct dnode_of_data *);
1110struct page *find_data_page(struct inode *, pgoff_t, bool); 1188struct page *find_data_page(struct inode *, pgoff_t, bool);
1111struct page *get_lock_data_page(struct inode *, pgoff_t); 1189struct page *get_lock_data_page(struct inode *, pgoff_t);
1112struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); 1190struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
1113int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); 1191int do_write_data_page(struct page *, struct f2fs_io_info *);
1114int do_write_data_page(struct page *);
1115 1192
1116/* 1193/*
1117 * gc.c 1194 * gc.c
@@ -1144,7 +1221,7 @@ struct f2fs_stat_info {
1144 int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; 1221 int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
1145 int nats, sits, fnids; 1222 int nats, sits, fnids;
1146 int total_count, utilization; 1223 int total_count, utilization;
1147 int bg_gc; 1224 int bg_gc, inline_inode;
1148 unsigned int valid_count, valid_node_count, valid_inode_count; 1225 unsigned int valid_count, valid_node_count, valid_inode_count;
1149 unsigned int bimodal, avg_vblocks; 1226 unsigned int bimodal, avg_vblocks;
1150 int util_free, util_valid, util_invalid; 1227 int util_free, util_valid, util_invalid;
@@ -1164,7 +1241,7 @@ struct f2fs_stat_info {
1164 1241
1165static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) 1242static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
1166{ 1243{
1167 return (struct f2fs_stat_info*)sbi->stat_info; 1244 return (struct f2fs_stat_info *)sbi->stat_info;
1168} 1245}
1169 1246
1170#define stat_inc_call_count(si) ((si)->call_count++) 1247#define stat_inc_call_count(si) ((si)->call_count++)
@@ -1173,6 +1250,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
1173#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--) 1250#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--)
1174#define stat_inc_total_hit(sb) ((F2FS_SB(sb))->total_hit_ext++) 1251#define stat_inc_total_hit(sb) ((F2FS_SB(sb))->total_hit_ext++)
1175#define stat_inc_read_hit(sb) ((F2FS_SB(sb))->read_hit_ext++) 1252#define stat_inc_read_hit(sb) ((F2FS_SB(sb))->read_hit_ext++)
1253#define stat_inc_inline_inode(inode) \
1254 do { \
1255 if (f2fs_has_inline_data(inode)) \
1256 ((F2FS_SB(inode->i_sb))->inline_inode++); \
1257 } while (0)
1258#define stat_dec_inline_inode(inode) \
1259 do { \
1260 if (f2fs_has_inline_data(inode)) \
1261 ((F2FS_SB(inode->i_sb))->inline_inode--); \
1262 } while (0)
1263
1176#define stat_inc_seg_type(sbi, curseg) \ 1264#define stat_inc_seg_type(sbi, curseg) \
1177 ((sbi)->segment_count[(curseg)->alloc_type]++) 1265 ((sbi)->segment_count[(curseg)->alloc_type]++)
1178#define stat_inc_block_count(sbi, curseg) \ 1266#define stat_inc_block_count(sbi, curseg) \
@@ -1216,6 +1304,8 @@ void f2fs_destroy_root_stats(void);
1216#define stat_dec_dirty_dir(sbi) 1304#define stat_dec_dirty_dir(sbi)
1217#define stat_inc_total_hit(sb) 1305#define stat_inc_total_hit(sb)
1218#define stat_inc_read_hit(sb) 1306#define stat_inc_read_hit(sb)
1307#define stat_inc_inline_inode(inode)
1308#define stat_dec_inline_inode(inode)
1219#define stat_inc_seg_type(sbi, curseg) 1309#define stat_inc_seg_type(sbi, curseg)
1220#define stat_inc_block_count(sbi, curseg) 1310#define stat_inc_block_count(sbi, curseg)
1221#define stat_inc_seg_count(si, type) 1311#define stat_inc_seg_count(si, type)
@@ -1238,4 +1328,13 @@ extern const struct address_space_operations f2fs_meta_aops;
1238extern const struct inode_operations f2fs_dir_inode_operations; 1328extern const struct inode_operations f2fs_dir_inode_operations;
1239extern const struct inode_operations f2fs_symlink_inode_operations; 1329extern const struct inode_operations f2fs_symlink_inode_operations;
1240extern const struct inode_operations f2fs_special_inode_operations; 1330extern const struct inode_operations f2fs_special_inode_operations;
1331
1332/*
1333 * inline.c
1334 */
1335bool f2fs_may_inline(struct inode *);
1336int f2fs_read_inline_data(struct inode *, struct page *);
1337int f2fs_convert_inline_data(struct inode *, pgoff_t);
1338int f2fs_write_inline_data(struct inode *, struct page *, unsigned int);
1339int recover_inline_data(struct inode *, struct page *);
1241#endif 1340#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 7d714f4972d5..85e91ca88d57 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -33,7 +33,6 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
33 struct page *page = vmf->page; 33 struct page *page = vmf->page;
34 struct inode *inode = file_inode(vma->vm_file); 34 struct inode *inode = file_inode(vma->vm_file);
35 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 35 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
36 block_t old_blk_addr;
37 struct dnode_of_data dn; 36 struct dnode_of_data dn;
38 int err; 37 int err;
39 38
@@ -44,30 +43,16 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
44 /* block allocation */ 43 /* block allocation */
45 f2fs_lock_op(sbi); 44 f2fs_lock_op(sbi);
46 set_new_dnode(&dn, inode, NULL, NULL, 0); 45 set_new_dnode(&dn, inode, NULL, NULL, 0);
47 err = get_dnode_of_data(&dn, page->index, ALLOC_NODE); 46 err = f2fs_reserve_block(&dn, page->index);
48 if (err) {
49 f2fs_unlock_op(sbi);
50 goto out;
51 }
52
53 old_blk_addr = dn.data_blkaddr;
54
55 if (old_blk_addr == NULL_ADDR) {
56 err = reserve_new_block(&dn);
57 if (err) {
58 f2fs_put_dnode(&dn);
59 f2fs_unlock_op(sbi);
60 goto out;
61 }
62 }
63 f2fs_put_dnode(&dn);
64 f2fs_unlock_op(sbi); 47 f2fs_unlock_op(sbi);
48 if (err)
49 goto out;
65 50
66 file_update_time(vma->vm_file); 51 file_update_time(vma->vm_file);
67 lock_page(page); 52 lock_page(page);
68 if (page->mapping != inode->i_mapping || 53 if (unlikely(page->mapping != inode->i_mapping ||
69 page_offset(page) > i_size_read(inode) || 54 page_offset(page) > i_size_read(inode) ||
70 !PageUptodate(page)) { 55 !PageUptodate(page))) {
71 unlock_page(page); 56 unlock_page(page);
72 err = -EFAULT; 57 err = -EFAULT;
73 goto out; 58 goto out;
@@ -130,12 +115,12 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
130 int ret = 0; 115 int ret = 0;
131 bool need_cp = false; 116 bool need_cp = false;
132 struct writeback_control wbc = { 117 struct writeback_control wbc = {
133 .sync_mode = WB_SYNC_ALL, 118 .sync_mode = WB_SYNC_NONE,
134 .nr_to_write = LONG_MAX, 119 .nr_to_write = LONG_MAX,
135 .for_reclaim = 0, 120 .for_reclaim = 0,
136 }; 121 };
137 122
138 if (f2fs_readonly(inode->i_sb)) 123 if (unlikely(f2fs_readonly(inode->i_sb)))
139 return 0; 124 return 0;
140 125
141 trace_f2fs_sync_file_enter(inode); 126 trace_f2fs_sync_file_enter(inode);
@@ -217,7 +202,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
217 raw_node = F2FS_NODE(dn->node_page); 202 raw_node = F2FS_NODE(dn->node_page);
218 addr = blkaddr_in_node(raw_node) + ofs; 203 addr = blkaddr_in_node(raw_node) + ofs;
219 204
220 for ( ; count > 0; count--, addr++, dn->ofs_in_node++) { 205 for (; count > 0; count--, addr++, dn->ofs_in_node++) {
221 block_t blkaddr = le32_to_cpu(*addr); 206 block_t blkaddr = le32_to_cpu(*addr);
222 if (blkaddr == NULL_ADDR) 207 if (blkaddr == NULL_ADDR)
223 continue; 208 continue;
@@ -256,7 +241,7 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
256 return; 241 return;
257 242
258 lock_page(page); 243 lock_page(page);
259 if (page->mapping != inode->i_mapping) { 244 if (unlikely(page->mapping != inode->i_mapping)) {
260 f2fs_put_page(page, 1); 245 f2fs_put_page(page, 1);
261 return; 246 return;
262 } 247 }
@@ -266,21 +251,24 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
266 f2fs_put_page(page, 1); 251 f2fs_put_page(page, 1);
267} 252}
268 253
269static int truncate_blocks(struct inode *inode, u64 from) 254int truncate_blocks(struct inode *inode, u64 from)
270{ 255{
271 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 256 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
272 unsigned int blocksize = inode->i_sb->s_blocksize; 257 unsigned int blocksize = inode->i_sb->s_blocksize;
273 struct dnode_of_data dn; 258 struct dnode_of_data dn;
274 pgoff_t free_from; 259 pgoff_t free_from;
275 int count = 0; 260 int count = 0, err = 0;
276 int err;
277 261
278 trace_f2fs_truncate_blocks_enter(inode, from); 262 trace_f2fs_truncate_blocks_enter(inode, from);
279 263
264 if (f2fs_has_inline_data(inode))
265 goto done;
266
280 free_from = (pgoff_t) 267 free_from = (pgoff_t)
281 ((from + blocksize - 1) >> (sbi->log_blocksize)); 268 ((from + blocksize - 1) >> (sbi->log_blocksize));
282 269
283 f2fs_lock_op(sbi); 270 f2fs_lock_op(sbi);
271
284 set_new_dnode(&dn, inode, NULL, NULL, 0); 272 set_new_dnode(&dn, inode, NULL, NULL, 0);
285 err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); 273 err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
286 if (err) { 274 if (err) {
@@ -308,7 +296,7 @@ static int truncate_blocks(struct inode *inode, u64 from)
308free_next: 296free_next:
309 err = truncate_inode_blocks(inode, free_from); 297 err = truncate_inode_blocks(inode, free_from);
310 f2fs_unlock_op(sbi); 298 f2fs_unlock_op(sbi);
311 299done:
312 /* lastly zero out the first data page */ 300 /* lastly zero out the first data page */
313 truncate_partial_data_page(inode, from); 301 truncate_partial_data_page(inode, from);
314 302
@@ -382,6 +370,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
382 370
383 if ((attr->ia_valid & ATTR_SIZE) && 371 if ((attr->ia_valid & ATTR_SIZE) &&
384 attr->ia_size != i_size_read(inode)) { 372 attr->ia_size != i_size_read(inode)) {
373 err = f2fs_convert_inline_data(inode, attr->ia_size);
374 if (err)
375 return err;
376
385 truncate_setsize(inode, attr->ia_size); 377 truncate_setsize(inode, attr->ia_size);
386 f2fs_truncate(inode); 378 f2fs_truncate(inode);
387 f2fs_balance_fs(F2FS_SB(inode->i_sb)); 379 f2fs_balance_fs(F2FS_SB(inode->i_sb));
@@ -459,12 +451,16 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
459 return 0; 451 return 0;
460} 452}
461 453
462static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode) 454static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
463{ 455{
464 pgoff_t pg_start, pg_end; 456 pgoff_t pg_start, pg_end;
465 loff_t off_start, off_end; 457 loff_t off_start, off_end;
466 int ret = 0; 458 int ret = 0;
467 459
460 ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1);
461 if (ret)
462 return ret;
463
468 pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; 464 pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
469 pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; 465 pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
470 466
@@ -499,12 +495,6 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode)
499 } 495 }
500 } 496 }
501 497
502 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
503 i_size_read(inode) <= (offset + len)) {
504 i_size_write(inode, offset);
505 mark_inode_dirty(inode);
506 }
507
508 return ret; 498 return ret;
509} 499}
510 500
@@ -521,6 +511,10 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
521 if (ret) 511 if (ret)
522 return ret; 512 return ret;
523 513
514 ret = f2fs_convert_inline_data(inode, offset + len);
515 if (ret)
516 return ret;
517
524 pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; 518 pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
525 pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; 519 pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
526 520
@@ -532,22 +526,10 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
532 526
533 f2fs_lock_op(sbi); 527 f2fs_lock_op(sbi);
534 set_new_dnode(&dn, inode, NULL, NULL, 0); 528 set_new_dnode(&dn, inode, NULL, NULL, 0);
535 ret = get_dnode_of_data(&dn, index, ALLOC_NODE); 529 ret = f2fs_reserve_block(&dn, index);
536 if (ret) {
537 f2fs_unlock_op(sbi);
538 break;
539 }
540
541 if (dn.data_blkaddr == NULL_ADDR) {
542 ret = reserve_new_block(&dn);
543 if (ret) {
544 f2fs_put_dnode(&dn);
545 f2fs_unlock_op(sbi);
546 break;
547 }
548 }
549 f2fs_put_dnode(&dn);
550 f2fs_unlock_op(sbi); 530 f2fs_unlock_op(sbi);
531 if (ret)
532 break;
551 533
552 if (pg_start == pg_end) 534 if (pg_start == pg_end)
553 new_size = offset + len; 535 new_size = offset + len;
@@ -578,7 +560,7 @@ static long f2fs_fallocate(struct file *file, int mode,
578 return -EOPNOTSUPP; 560 return -EOPNOTSUPP;
579 561
580 if (mode & FALLOC_FL_PUNCH_HOLE) 562 if (mode & FALLOC_FL_PUNCH_HOLE)
581 ret = punch_hole(inode, offset, len, mode); 563 ret = punch_hole(inode, offset, len);
582 else 564 else
583 ret = expand_inode_data(inode, offset, len, mode); 565 ret = expand_inode_data(inode, offset, len, mode);
584 566
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index b7ad1ec7e4cc..ea0371e854b4 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -119,7 +119,6 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
119 kfree(gc_th); 119 kfree(gc_th);
120 sbi->gc_thread = NULL; 120 sbi->gc_thread = NULL;
121 } 121 }
122
123out: 122out:
124 return err; 123 return err;
125} 124}
@@ -164,8 +163,8 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
164 p->ofs_unit = sbi->segs_per_sec; 163 p->ofs_unit = sbi->segs_per_sec;
165 } 164 }
166 165
167 if (p->max_search > MAX_VICTIM_SEARCH) 166 if (p->max_search > sbi->max_victim_search)
168 p->max_search = MAX_VICTIM_SEARCH; 167 p->max_search = sbi->max_victim_search;
169 168
170 p->offset = sbi->last_victim[p->gc_mode]; 169 p->offset = sbi->last_victim[p->gc_mode];
171} 170}
@@ -429,7 +428,7 @@ next_step:
429 428
430 /* set page dirty and write it */ 429 /* set page dirty and write it */
431 if (gc_type == FG_GC) { 430 if (gc_type == FG_GC) {
432 f2fs_wait_on_page_writeback(node_page, NODE, true); 431 f2fs_wait_on_page_writeback(node_page, NODE);
433 set_page_dirty(node_page); 432 set_page_dirty(node_page);
434 } else { 433 } else {
435 if (!PageWriteback(node_page)) 434 if (!PageWriteback(node_page))
@@ -521,6 +520,11 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
521 520
522static void move_data_page(struct inode *inode, struct page *page, int gc_type) 521static void move_data_page(struct inode *inode, struct page *page, int gc_type)
523{ 522{
523 struct f2fs_io_info fio = {
524 .type = DATA,
525 .rw = WRITE_SYNC,
526 };
527
524 if (gc_type == BG_GC) { 528 if (gc_type == BG_GC) {
525 if (PageWriteback(page)) 529 if (PageWriteback(page))
526 goto out; 530 goto out;
@@ -529,7 +533,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type)
529 } else { 533 } else {
530 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 534 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
531 535
532 f2fs_wait_on_page_writeback(page, DATA, true); 536 f2fs_wait_on_page_writeback(page, DATA);
533 537
534 if (clear_page_dirty_for_io(page) && 538 if (clear_page_dirty_for_io(page) &&
535 S_ISDIR(inode->i_mode)) { 539 S_ISDIR(inode->i_mode)) {
@@ -537,7 +541,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type)
537 inode_dec_dirty_dents(inode); 541 inode_dec_dirty_dents(inode);
538 } 542 }
539 set_cold_data(page); 543 set_cold_data(page);
540 do_write_data_page(page); 544 do_write_data_page(page, &fio);
541 clear_cold_data(page); 545 clear_cold_data(page);
542 } 546 }
543out: 547out:
@@ -631,7 +635,7 @@ next_iput:
631 goto next_step; 635 goto next_step;
632 636
633 if (gc_type == FG_GC) { 637 if (gc_type == FG_GC) {
634 f2fs_submit_bio(sbi, DATA, true); 638 f2fs_submit_merged_bio(sbi, DATA, WRITE);
635 639
636 /* 640 /*
637 * In the case of FG_GC, it'd be better to reclaim this victim 641 * In the case of FG_GC, it'd be better to reclaim this victim
@@ -664,8 +668,6 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
664 668
665 /* read segment summary of victim */ 669 /* read segment summary of victim */
666 sum_page = get_sum_page(sbi, segno); 670 sum_page = get_sum_page(sbi, segno);
667 if (IS_ERR(sum_page))
668 return;
669 671
670 blk_start_plug(&plug); 672 blk_start_plug(&plug);
671 673
@@ -697,7 +699,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
697 699
698 INIT_LIST_HEAD(&ilist); 700 INIT_LIST_HEAD(&ilist);
699gc_more: 701gc_more:
700 if (!(sbi->sb->s_flags & MS_ACTIVE)) 702 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
701 goto stop; 703 goto stop;
702 704
703 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { 705 if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 507056d22205..5d5eb6047bf4 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -20,7 +20,7 @@
20#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ 20#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
21 21
22/* Search max. number of dirty segments to select a victim segment */ 22/* Search max. number of dirty segments to select a victim segment */
23#define MAX_VICTIM_SEARCH 4096 /* covers 8GB */ 23#define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
24 24
25struct f2fs_gc_kthread { 25struct f2fs_gc_kthread {
26 struct task_struct *f2fs_gc_task; 26 struct task_struct *f2fs_gc_task;
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
new file mode 100644
index 000000000000..31ee5b164ff9
--- /dev/null
+++ b/fs/f2fs/inline.c
@@ -0,0 +1,222 @@
1/*
2 * fs/f2fs/inline.c
3 * Copyright (c) 2013, Intel Corporation
4 * Authors: Huajun Li <huajun.li@intel.com>
5 * Haicheng Li <haicheng.li@intel.com>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13
14#include "f2fs.h"
15
16bool f2fs_may_inline(struct inode *inode)
17{
18 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
19 block_t nr_blocks;
20 loff_t i_size;
21
22 if (!test_opt(sbi, INLINE_DATA))
23 return false;
24
25 nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2;
26 if (inode->i_blocks > nr_blocks)
27 return false;
28
29 i_size = i_size_read(inode);
30 if (i_size > MAX_INLINE_DATA)
31 return false;
32
33 return true;
34}
35
36int f2fs_read_inline_data(struct inode *inode, struct page *page)
37{
38 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
39 struct page *ipage;
40 void *src_addr, *dst_addr;
41
42 if (page->index) {
43 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
44 goto out;
45 }
46
47 ipage = get_node_page(sbi, inode->i_ino);
48 if (IS_ERR(ipage))
49 return PTR_ERR(ipage);
50
51 zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
52
53 /* Copy the whole inline data block */
54 src_addr = inline_data_addr(ipage);
55 dst_addr = kmap(page);
56 memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
57 kunmap(page);
58 f2fs_put_page(ipage, 1);
59
60out:
61 SetPageUptodate(page);
62 unlock_page(page);
63
64 return 0;
65}
66
67static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
68{
69 int err;
70 struct page *ipage;
71 struct dnode_of_data dn;
72 void *src_addr, *dst_addr;
73 block_t new_blk_addr;
74 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
75 struct f2fs_io_info fio = {
76 .type = DATA,
77 .rw = WRITE_SYNC | REQ_PRIO,
78 };
79
80 f2fs_lock_op(sbi);
81 ipage = get_node_page(sbi, inode->i_ino);
82 if (IS_ERR(ipage))
83 return PTR_ERR(ipage);
84
85 /*
86 * i_addr[0] is not used for inline data,
87 * so reserving new block will not destroy inline data
88 */
89 set_new_dnode(&dn, inode, ipage, NULL, 0);
90 err = f2fs_reserve_block(&dn, 0);
91 if (err) {
92 f2fs_unlock_op(sbi);
93 return err;
94 }
95
96 zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
97
98 /* Copy the whole inline data block */
99 src_addr = inline_data_addr(ipage);
100 dst_addr = kmap(page);
101 memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
102 kunmap(page);
103 SetPageUptodate(page);
104
105 /* write data page to try to make data consistent */
106 set_page_writeback(page);
107 write_data_page(page, &dn, &new_blk_addr, &fio);
108 update_extent_cache(new_blk_addr, &dn);
109 f2fs_wait_on_page_writeback(page, DATA);
110
111 /* clear inline data and flag after data writeback */
112 zero_user_segment(ipage, INLINE_DATA_OFFSET,
113 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
114 clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
115 stat_dec_inline_inode(inode);
116
117 sync_inode_page(&dn);
118 f2fs_put_dnode(&dn);
119 f2fs_unlock_op(sbi);
120 return err;
121}
122
123int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size)
124{
125 struct page *page;
126 int err;
127
128 if (!f2fs_has_inline_data(inode))
129 return 0;
130 else if (to_size <= MAX_INLINE_DATA)
131 return 0;
132
133 page = grab_cache_page_write_begin(inode->i_mapping, 0, AOP_FLAG_NOFS);
134 if (!page)
135 return -ENOMEM;
136
137 err = __f2fs_convert_inline_data(inode, page);
138 f2fs_put_page(page, 1);
139 return err;
140}
141
142int f2fs_write_inline_data(struct inode *inode,
143 struct page *page, unsigned size)
144{
145 void *src_addr, *dst_addr;
146 struct page *ipage;
147 struct dnode_of_data dn;
148 int err;
149
150 set_new_dnode(&dn, inode, NULL, NULL, 0);
151 err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
152 if (err)
153 return err;
154 ipage = dn.inode_page;
155
156 zero_user_segment(ipage, INLINE_DATA_OFFSET,
157 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
158 src_addr = kmap(page);
159 dst_addr = inline_data_addr(ipage);
160 memcpy(dst_addr, src_addr, size);
161 kunmap(page);
162
163 /* Release the first data block if it is allocated */
164 if (!f2fs_has_inline_data(inode)) {
165 truncate_data_blocks_range(&dn, 1);
166 set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
167 stat_inc_inline_inode(inode);
168 }
169
170 sync_inode_page(&dn);
171 f2fs_put_dnode(&dn);
172
173 return 0;
174}
175
176int recover_inline_data(struct inode *inode, struct page *npage)
177{
178 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
179 struct f2fs_inode *ri = NULL;
180 void *src_addr, *dst_addr;
181 struct page *ipage;
182
183 /*
184 * The inline_data recovery policy is as follows.
185 * [prev.] [next] of inline_data flag
186 * o o -> recover inline_data
187 * o x -> remove inline_data, and then recover data blocks
188 * x o -> remove inline_data, and then recover inline_data
189 * x x -> recover data blocks
190 */
191 if (IS_INODE(npage))
192 ri = F2FS_INODE(npage);
193
194 if (f2fs_has_inline_data(inode) &&
195 ri && ri->i_inline & F2FS_INLINE_DATA) {
196process_inline:
197 ipage = get_node_page(sbi, inode->i_ino);
198 f2fs_bug_on(IS_ERR(ipage));
199
200 src_addr = inline_data_addr(npage);
201 dst_addr = inline_data_addr(ipage);
202 memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
203 update_inode(inode, ipage);
204 f2fs_put_page(ipage, 1);
205 return -1;
206 }
207
208 if (f2fs_has_inline_data(inode)) {
209 ipage = get_node_page(sbi, inode->i_ino);
210 f2fs_bug_on(IS_ERR(ipage));
211 zero_user_segment(ipage, INLINE_DATA_OFFSET,
212 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
213 clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
214 update_inode(inode, ipage);
215 f2fs_put_page(ipage, 1);
216 } else if (ri && ri->i_inline & F2FS_INLINE_DATA) {
217 truncate_blocks(inode, 0);
218 set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
219 goto process_inline;
220 }
221 return 0;
222}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index d0eaa9faeca0..4d67ed736dca 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -42,9 +42,11 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
42 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || 42 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
43 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 43 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
44 if (ri->i_addr[0]) 44 if (ri->i_addr[0])
45 inode->i_rdev = old_decode_dev(le32_to_cpu(ri->i_addr[0])); 45 inode->i_rdev =
46 old_decode_dev(le32_to_cpu(ri->i_addr[0]));
46 else 47 else
47 inode->i_rdev = new_decode_dev(le32_to_cpu(ri->i_addr[1])); 48 inode->i_rdev =
49 new_decode_dev(le32_to_cpu(ri->i_addr[1]));
48 } 50 }
49} 51}
50 52
@@ -52,11 +54,13 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
52{ 54{
53 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 55 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
54 if (old_valid_dev(inode->i_rdev)) { 56 if (old_valid_dev(inode->i_rdev)) {
55 ri->i_addr[0] = cpu_to_le32(old_encode_dev(inode->i_rdev)); 57 ri->i_addr[0] =
58 cpu_to_le32(old_encode_dev(inode->i_rdev));
56 ri->i_addr[1] = 0; 59 ri->i_addr[1] = 0;
57 } else { 60 } else {
58 ri->i_addr[0] = 0; 61 ri->i_addr[0] = 0;
59 ri->i_addr[1] = cpu_to_le32(new_encode_dev(inode->i_rdev)); 62 ri->i_addr[1] =
63 cpu_to_le32(new_encode_dev(inode->i_rdev));
60 ri->i_addr[2] = 0; 64 ri->i_addr[2] = 0;
61 } 65 }
62 } 66 }
@@ -67,7 +71,6 @@ static int do_read_inode(struct inode *inode)
67 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 71 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
68 struct f2fs_inode_info *fi = F2FS_I(inode); 72 struct f2fs_inode_info *fi = F2FS_I(inode);
69 struct page *node_page; 73 struct page *node_page;
70 struct f2fs_node *rn;
71 struct f2fs_inode *ri; 74 struct f2fs_inode *ri;
72 75
73 /* Check if ino is within scope */ 76 /* Check if ino is within scope */
@@ -81,8 +84,7 @@ static int do_read_inode(struct inode *inode)
81 if (IS_ERR(node_page)) 84 if (IS_ERR(node_page))
82 return PTR_ERR(node_page); 85 return PTR_ERR(node_page);
83 86
84 rn = F2FS_NODE(node_page); 87 ri = F2FS_INODE(node_page);
85 ri = &(rn->i);
86 88
87 inode->i_mode = le16_to_cpu(ri->i_mode); 89 inode->i_mode = le16_to_cpu(ri->i_mode);
88 i_uid_write(inode, le32_to_cpu(ri->i_uid)); 90 i_uid_write(inode, le32_to_cpu(ri->i_uid));
@@ -175,13 +177,11 @@ bad_inode:
175 177
176void update_inode(struct inode *inode, struct page *node_page) 178void update_inode(struct inode *inode, struct page *node_page)
177{ 179{
178 struct f2fs_node *rn;
179 struct f2fs_inode *ri; 180 struct f2fs_inode *ri;
180 181
181 f2fs_wait_on_page_writeback(node_page, NODE, false); 182 f2fs_wait_on_page_writeback(node_page, NODE);
182 183
183 rn = F2FS_NODE(node_page); 184 ri = F2FS_INODE(node_page);
184 ri = &(rn->i);
185 185
186 ri->i_mode = cpu_to_le16(inode->i_mode); 186 ri->i_mode = cpu_to_le16(inode->i_mode);
187 ri->i_advise = F2FS_I(inode)->i_advise; 187 ri->i_advise = F2FS_I(inode)->i_advise;
@@ -281,6 +281,7 @@ void f2fs_evict_inode(struct inode *inode)
281 281
282 f2fs_lock_op(sbi); 282 f2fs_lock_op(sbi);
283 remove_inode_page(inode); 283 remove_inode_page(inode);
284 stat_dec_inline_inode(inode);
284 f2fs_unlock_op(sbi); 285 f2fs_unlock_op(sbi);
285 286
286 sb_end_intwrite(inode->i_sb); 287 sb_end_intwrite(inode->i_sb);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 575adac17f8b..3d32f2969c5e 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -424,11 +424,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
424 } 424 }
425 425
426 f2fs_set_link(new_dir, new_entry, new_page, old_inode); 426 f2fs_set_link(new_dir, new_entry, new_page, old_inode);
427 F2FS_I(old_inode)->i_pino = new_dir->i_ino;
427 428
428 new_inode->i_ctime = CURRENT_TIME; 429 new_inode->i_ctime = CURRENT_TIME;
429 if (old_dir_entry) 430 if (old_dir_entry)
430 drop_nlink(new_inode); 431 drop_nlink(new_inode);
431 drop_nlink(new_inode); 432 drop_nlink(new_inode);
433 mark_inode_dirty(new_inode);
432 434
433 if (!new_inode->i_nlink) 435 if (!new_inode->i_nlink)
434 add_orphan_inode(sbi, new_inode->i_ino); 436 add_orphan_inode(sbi, new_inode->i_ino);
@@ -457,11 +459,14 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
457 if (old_dir != new_dir) { 459 if (old_dir != new_dir) {
458 f2fs_set_link(old_inode, old_dir_entry, 460 f2fs_set_link(old_inode, old_dir_entry,
459 old_dir_page, new_dir); 461 old_dir_page, new_dir);
462 F2FS_I(old_inode)->i_pino = new_dir->i_ino;
463 update_inode_page(old_inode);
460 } else { 464 } else {
461 kunmap(old_dir_page); 465 kunmap(old_dir_page);
462 f2fs_put_page(old_dir_page, 0); 466 f2fs_put_page(old_dir_page, 0);
463 } 467 }
464 drop_nlink(old_dir); 468 drop_nlink(old_dir);
469 mark_inode_dirty(old_dir);
465 update_inode_page(old_dir); 470 update_inode_page(old_dir);
466 } 471 }
467 472
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 4ac4150d421d..b0649b76eb4f 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -87,17 +87,19 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
87 */ 87 */
88static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) 88static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
89{ 89{
90 struct address_space *mapping = sbi->meta_inode->i_mapping; 90 struct address_space *mapping = META_MAPPING(sbi);
91 struct f2fs_nm_info *nm_i = NM_I(sbi); 91 struct f2fs_nm_info *nm_i = NM_I(sbi);
92 struct blk_plug plug;
93 struct page *page; 92 struct page *page;
94 pgoff_t index; 93 pgoff_t index;
95 int i; 94 int i;
95 struct f2fs_io_info fio = {
96 .type = META,
97 .rw = READ_SYNC | REQ_META | REQ_PRIO
98 };
96 99
97 blk_start_plug(&plug);
98 100
99 for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) { 101 for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) {
100 if (nid >= nm_i->max_nid) 102 if (unlikely(nid >= nm_i->max_nid))
101 nid = 0; 103 nid = 0;
102 index = current_nat_addr(sbi, nid); 104 index = current_nat_addr(sbi, nid);
103 105
@@ -105,15 +107,15 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
105 if (!page) 107 if (!page)
106 continue; 108 continue;
107 if (PageUptodate(page)) { 109 if (PageUptodate(page)) {
110 mark_page_accessed(page);
108 f2fs_put_page(page, 1); 111 f2fs_put_page(page, 1);
109 continue; 112 continue;
110 } 113 }
111 if (f2fs_readpage(sbi, page, index, READ)) 114 f2fs_submit_page_mbio(sbi, page, index, &fio);
112 continue; 115 mark_page_accessed(page);
113
114 f2fs_put_page(page, 0); 116 f2fs_put_page(page, 0);
115 } 117 }
116 blk_finish_plug(&plug); 118 f2fs_submit_merged_bio(sbi, META, READ);
117} 119}
118 120
119static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) 121static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
@@ -391,8 +393,8 @@ got:
391 393
392/* 394/*
393 * Caller should call f2fs_put_dnode(dn). 395 * Caller should call f2fs_put_dnode(dn).
394 * Also, it should grab and release a mutex by calling mutex_lock_op() and 396 * Also, it should grab and release a rwsem by calling f2fs_lock_op() and
395 * mutex_unlock_op() only if ro is not set RDONLY_NODE. 397 * f2fs_unlock_op() only if ro is not set RDONLY_NODE.
396 * In the case of RDONLY_NODE, we don't need to care about mutex. 398 * In the case of RDONLY_NODE, we don't need to care about mutex.
397 */ 399 */
398int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) 400int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
@@ -502,7 +504,7 @@ static void truncate_node(struct dnode_of_data *dn)
502 504
503 /* Deallocate node address */ 505 /* Deallocate node address */
504 invalidate_blocks(sbi, ni.blk_addr); 506 invalidate_blocks(sbi, ni.blk_addr);
505 dec_valid_node_count(sbi, dn->inode, 1); 507 dec_valid_node_count(sbi, dn->inode);
506 set_node_addr(sbi, &ni, NULL_ADDR); 508 set_node_addr(sbi, &ni, NULL_ADDR);
507 509
508 if (dn->nid == dn->inode->i_ino) { 510 if (dn->nid == dn->inode->i_ino) {
@@ -516,6 +518,10 @@ invalidate:
516 F2FS_SET_SB_DIRT(sbi); 518 F2FS_SET_SB_DIRT(sbi);
517 519
518 f2fs_put_page(dn->node_page, 1); 520 f2fs_put_page(dn->node_page, 1);
521
522 invalidate_mapping_pages(NODE_MAPPING(sbi),
523 dn->node_page->index, dn->node_page->index);
524
519 dn->node_page = NULL; 525 dn->node_page = NULL;
520 trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); 526 trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
521} 527}
@@ -631,19 +637,19 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
631 return 0; 637 return 0;
632 638
633 /* get indirect nodes in the path */ 639 /* get indirect nodes in the path */
634 for (i = 0; i < depth - 1; i++) { 640 for (i = 0; i < idx + 1; i++) {
635 /* refernece count'll be increased */ 641 /* refernece count'll be increased */
636 pages[i] = get_node_page(sbi, nid[i]); 642 pages[i] = get_node_page(sbi, nid[i]);
637 if (IS_ERR(pages[i])) { 643 if (IS_ERR(pages[i])) {
638 depth = i + 1;
639 err = PTR_ERR(pages[i]); 644 err = PTR_ERR(pages[i]);
645 idx = i - 1;
640 goto fail; 646 goto fail;
641 } 647 }
642 nid[i + 1] = get_nid(pages[i], offset[i + 1], false); 648 nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
643 } 649 }
644 650
645 /* free direct nodes linked to a partial indirect node */ 651 /* free direct nodes linked to a partial indirect node */
646 for (i = offset[depth - 1]; i < NIDS_PER_BLOCK; i++) { 652 for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
647 child_nid = get_nid(pages[idx], i, false); 653 child_nid = get_nid(pages[idx], i, false);
648 if (!child_nid) 654 if (!child_nid)
649 continue; 655 continue;
@@ -654,7 +660,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
654 set_nid(pages[idx], i, 0, false); 660 set_nid(pages[idx], i, 0, false);
655 } 661 }
656 662
657 if (offset[depth - 1] == 0) { 663 if (offset[idx + 1] == 0) {
658 dn->node_page = pages[idx]; 664 dn->node_page = pages[idx];
659 dn->nid = nid[idx]; 665 dn->nid = nid[idx];
660 truncate_node(dn); 666 truncate_node(dn);
@@ -662,9 +668,10 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
662 f2fs_put_page(pages[idx], 1); 668 f2fs_put_page(pages[idx], 1);
663 } 669 }
664 offset[idx]++; 670 offset[idx]++;
665 offset[depth - 1] = 0; 671 offset[idx + 1] = 0;
672 idx--;
666fail: 673fail:
667 for (i = depth - 3; i >= 0; i--) 674 for (i = idx; i >= 0; i--)
668 f2fs_put_page(pages[i], 1); 675 f2fs_put_page(pages[i], 1);
669 676
670 trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err); 677 trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err);
@@ -678,11 +685,10 @@ fail:
678int truncate_inode_blocks(struct inode *inode, pgoff_t from) 685int truncate_inode_blocks(struct inode *inode, pgoff_t from)
679{ 686{
680 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 687 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
681 struct address_space *node_mapping = sbi->node_inode->i_mapping;
682 int err = 0, cont = 1; 688 int err = 0, cont = 1;
683 int level, offset[4], noffset[4]; 689 int level, offset[4], noffset[4];
684 unsigned int nofs = 0; 690 unsigned int nofs = 0;
685 struct f2fs_node *rn; 691 struct f2fs_inode *ri;
686 struct dnode_of_data dn; 692 struct dnode_of_data dn;
687 struct page *page; 693 struct page *page;
688 694
@@ -699,7 +705,7 @@ restart:
699 set_new_dnode(&dn, inode, page, NULL, 0); 705 set_new_dnode(&dn, inode, page, NULL, 0);
700 unlock_page(page); 706 unlock_page(page);
701 707
702 rn = F2FS_NODE(page); 708 ri = F2FS_INODE(page);
703 switch (level) { 709 switch (level) {
704 case 0: 710 case 0:
705 case 1: 711 case 1:
@@ -709,7 +715,7 @@ restart:
709 nofs = noffset[1]; 715 nofs = noffset[1];
710 if (!offset[level - 1]) 716 if (!offset[level - 1])
711 goto skip_partial; 717 goto skip_partial;
712 err = truncate_partial_nodes(&dn, &rn->i, offset, level); 718 err = truncate_partial_nodes(&dn, ri, offset, level);
713 if (err < 0 && err != -ENOENT) 719 if (err < 0 && err != -ENOENT)
714 goto fail; 720 goto fail;
715 nofs += 1 + NIDS_PER_BLOCK; 721 nofs += 1 + NIDS_PER_BLOCK;
@@ -718,7 +724,7 @@ restart:
718 nofs = 5 + 2 * NIDS_PER_BLOCK; 724 nofs = 5 + 2 * NIDS_PER_BLOCK;
719 if (!offset[level - 1]) 725 if (!offset[level - 1])
720 goto skip_partial; 726 goto skip_partial;
721 err = truncate_partial_nodes(&dn, &rn->i, offset, level); 727 err = truncate_partial_nodes(&dn, ri, offset, level);
722 if (err < 0 && err != -ENOENT) 728 if (err < 0 && err != -ENOENT)
723 goto fail; 729 goto fail;
724 break; 730 break;
@@ -728,7 +734,7 @@ restart:
728 734
729skip_partial: 735skip_partial:
730 while (cont) { 736 while (cont) {
731 dn.nid = le32_to_cpu(rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]); 737 dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
732 switch (offset[0]) { 738 switch (offset[0]) {
733 case NODE_DIR1_BLOCK: 739 case NODE_DIR1_BLOCK:
734 case NODE_DIR2_BLOCK: 740 case NODE_DIR2_BLOCK:
@@ -751,14 +757,14 @@ skip_partial:
751 if (err < 0 && err != -ENOENT) 757 if (err < 0 && err != -ENOENT)
752 goto fail; 758 goto fail;
753 if (offset[1] == 0 && 759 if (offset[1] == 0 &&
754 rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) { 760 ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
755 lock_page(page); 761 lock_page(page);
756 if (page->mapping != node_mapping) { 762 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
757 f2fs_put_page(page, 1); 763 f2fs_put_page(page, 1);
758 goto restart; 764 goto restart;
759 } 765 }
760 wait_on_page_writeback(page); 766 wait_on_page_writeback(page);
761 rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; 767 ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
762 set_page_dirty(page); 768 set_page_dirty(page);
763 unlock_page(page); 769 unlock_page(page);
764 } 770 }
@@ -794,38 +800,34 @@ int truncate_xattr_node(struct inode *inode, struct page *page)
794 set_new_dnode(&dn, inode, page, npage, nid); 800 set_new_dnode(&dn, inode, page, npage, nid);
795 801
796 if (page) 802 if (page)
797 dn.inode_page_locked = 1; 803 dn.inode_page_locked = true;
798 truncate_node(&dn); 804 truncate_node(&dn);
799 return 0; 805 return 0;
800} 806}
801 807
802/* 808/*
803 * Caller should grab and release a mutex by calling mutex_lock_op() and 809 * Caller should grab and release a rwsem by calling f2fs_lock_op() and
804 * mutex_unlock_op(). 810 * f2fs_unlock_op().
805 */ 811 */
806int remove_inode_page(struct inode *inode) 812void remove_inode_page(struct inode *inode)
807{ 813{
808 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 814 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
809 struct page *page; 815 struct page *page;
810 nid_t ino = inode->i_ino; 816 nid_t ino = inode->i_ino;
811 struct dnode_of_data dn; 817 struct dnode_of_data dn;
812 int err;
813 818
814 page = get_node_page(sbi, ino); 819 page = get_node_page(sbi, ino);
815 if (IS_ERR(page)) 820 if (IS_ERR(page))
816 return PTR_ERR(page); 821 return;
817 822
818 err = truncate_xattr_node(inode, page); 823 if (truncate_xattr_node(inode, page)) {
819 if (err) {
820 f2fs_put_page(page, 1); 824 f2fs_put_page(page, 1);
821 return err; 825 return;
822 } 826 }
823
824 /* 0 is possible, after f2fs_new_inode() is failed */ 827 /* 0 is possible, after f2fs_new_inode() is failed */
825 f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1); 828 f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1);
826 set_new_dnode(&dn, inode, page, page, ino); 829 set_new_dnode(&dn, inode, page, page, ino);
827 truncate_node(&dn); 830 truncate_node(&dn);
828 return 0;
829} 831}
830 832
831struct page *new_inode_page(struct inode *inode, const struct qstr *name) 833struct page *new_inode_page(struct inode *inode, const struct qstr *name)
@@ -843,19 +845,18 @@ struct page *new_node_page(struct dnode_of_data *dn,
843 unsigned int ofs, struct page *ipage) 845 unsigned int ofs, struct page *ipage)
844{ 846{
845 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 847 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
846 struct address_space *mapping = sbi->node_inode->i_mapping;
847 struct node_info old_ni, new_ni; 848 struct node_info old_ni, new_ni;
848 struct page *page; 849 struct page *page;
849 int err; 850 int err;
850 851
851 if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)) 852 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
852 return ERR_PTR(-EPERM); 853 return ERR_PTR(-EPERM);
853 854
854 page = grab_cache_page(mapping, dn->nid); 855 page = grab_cache_page(NODE_MAPPING(sbi), dn->nid);
855 if (!page) 856 if (!page)
856 return ERR_PTR(-ENOMEM); 857 return ERR_PTR(-ENOMEM);
857 858
858 if (!inc_valid_node_count(sbi, dn->inode, 1)) { 859 if (unlikely(!inc_valid_node_count(sbi, dn->inode))) {
859 err = -ENOSPC; 860 err = -ENOSPC;
860 goto fail; 861 goto fail;
861 } 862 }
@@ -898,14 +899,14 @@ fail:
898 * LOCKED_PAGE: f2fs_put_page(page, 1) 899 * LOCKED_PAGE: f2fs_put_page(page, 1)
899 * error: nothing 900 * error: nothing
900 */ 901 */
901static int read_node_page(struct page *page, int type) 902static int read_node_page(struct page *page, int rw)
902{ 903{
903 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); 904 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
904 struct node_info ni; 905 struct node_info ni;
905 906
906 get_node_info(sbi, page->index, &ni); 907 get_node_info(sbi, page->index, &ni);
907 908
908 if (ni.blk_addr == NULL_ADDR) { 909 if (unlikely(ni.blk_addr == NULL_ADDR)) {
909 f2fs_put_page(page, 1); 910 f2fs_put_page(page, 1);
910 return -ENOENT; 911 return -ENOENT;
911 } 912 }
@@ -913,7 +914,7 @@ static int read_node_page(struct page *page, int type)
913 if (PageUptodate(page)) 914 if (PageUptodate(page))
914 return LOCKED_PAGE; 915 return LOCKED_PAGE;
915 916
916 return f2fs_readpage(sbi, page, ni.blk_addr, type); 917 return f2fs_submit_page_bio(sbi, page, ni.blk_addr, rw);
917} 918}
918 919
919/* 920/*
@@ -921,18 +922,17 @@ static int read_node_page(struct page *page, int type)
921 */ 922 */
922void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) 923void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
923{ 924{
924 struct address_space *mapping = sbi->node_inode->i_mapping;
925 struct page *apage; 925 struct page *apage;
926 int err; 926 int err;
927 927
928 apage = find_get_page(mapping, nid); 928 apage = find_get_page(NODE_MAPPING(sbi), nid);
929 if (apage && PageUptodate(apage)) { 929 if (apage && PageUptodate(apage)) {
930 f2fs_put_page(apage, 0); 930 f2fs_put_page(apage, 0);
931 return; 931 return;
932 } 932 }
933 f2fs_put_page(apage, 0); 933 f2fs_put_page(apage, 0);
934 934
935 apage = grab_cache_page(mapping, nid); 935 apage = grab_cache_page(NODE_MAPPING(sbi), nid);
936 if (!apage) 936 if (!apage)
937 return; 937 return;
938 938
@@ -945,11 +945,10 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
945 945
946struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) 946struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
947{ 947{
948 struct address_space *mapping = sbi->node_inode->i_mapping;
949 struct page *page; 948 struct page *page;
950 int err; 949 int err;
951repeat: 950repeat:
952 page = grab_cache_page(mapping, nid); 951 page = grab_cache_page(NODE_MAPPING(sbi), nid);
953 if (!page) 952 if (!page)
954 return ERR_PTR(-ENOMEM); 953 return ERR_PTR(-ENOMEM);
955 954
@@ -960,11 +959,11 @@ repeat:
960 goto got_it; 959 goto got_it;
961 960
962 lock_page(page); 961 lock_page(page);
963 if (!PageUptodate(page)) { 962 if (unlikely(!PageUptodate(page))) {
964 f2fs_put_page(page, 1); 963 f2fs_put_page(page, 1);
965 return ERR_PTR(-EIO); 964 return ERR_PTR(-EIO);
966 } 965 }
967 if (page->mapping != mapping) { 966 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
968 f2fs_put_page(page, 1); 967 f2fs_put_page(page, 1);
969 goto repeat; 968 goto repeat;
970 } 969 }
@@ -981,7 +980,6 @@ got_it:
981struct page *get_node_page_ra(struct page *parent, int start) 980struct page *get_node_page_ra(struct page *parent, int start)
982{ 981{
983 struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); 982 struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb);
984 struct address_space *mapping = sbi->node_inode->i_mapping;
985 struct blk_plug plug; 983 struct blk_plug plug;
986 struct page *page; 984 struct page *page;
987 int err, i, end; 985 int err, i, end;
@@ -992,7 +990,7 @@ struct page *get_node_page_ra(struct page *parent, int start)
992 if (!nid) 990 if (!nid)
993 return ERR_PTR(-ENOENT); 991 return ERR_PTR(-ENOENT);
994repeat: 992repeat:
995 page = grab_cache_page(mapping, nid); 993 page = grab_cache_page(NODE_MAPPING(sbi), nid);
996 if (!page) 994 if (!page)
997 return ERR_PTR(-ENOMEM); 995 return ERR_PTR(-ENOMEM);
998 996
@@ -1017,12 +1015,12 @@ repeat:
1017 blk_finish_plug(&plug); 1015 blk_finish_plug(&plug);
1018 1016
1019 lock_page(page); 1017 lock_page(page);
1020 if (page->mapping != mapping) { 1018 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1021 f2fs_put_page(page, 1); 1019 f2fs_put_page(page, 1);
1022 goto repeat; 1020 goto repeat;
1023 } 1021 }
1024page_hit: 1022page_hit:
1025 if (!PageUptodate(page)) { 1023 if (unlikely(!PageUptodate(page))) {
1026 f2fs_put_page(page, 1); 1024 f2fs_put_page(page, 1);
1027 return ERR_PTR(-EIO); 1025 return ERR_PTR(-EIO);
1028 } 1026 }
@@ -1048,7 +1046,6 @@ void sync_inode_page(struct dnode_of_data *dn)
1048int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, 1046int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
1049 struct writeback_control *wbc) 1047 struct writeback_control *wbc)
1050{ 1048{
1051 struct address_space *mapping = sbi->node_inode->i_mapping;
1052 pgoff_t index, end; 1049 pgoff_t index, end;
1053 struct pagevec pvec; 1050 struct pagevec pvec;
1054 int step = ino ? 2 : 0; 1051 int step = ino ? 2 : 0;
@@ -1062,7 +1059,7 @@ next_step:
1062 1059
1063 while (index <= end) { 1060 while (index <= end) {
1064 int i, nr_pages; 1061 int i, nr_pages;
1065 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 1062 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1066 PAGECACHE_TAG_DIRTY, 1063 PAGECACHE_TAG_DIRTY,
1067 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); 1064 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1068 if (nr_pages == 0) 1065 if (nr_pages == 0)
@@ -1095,7 +1092,7 @@ next_step:
1095 else if (!trylock_page(page)) 1092 else if (!trylock_page(page))
1096 continue; 1093 continue;
1097 1094
1098 if (unlikely(page->mapping != mapping)) { 1095 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1099continue_unlock: 1096continue_unlock:
1100 unlock_page(page); 1097 unlock_page(page);
1101 continue; 1098 continue;
@@ -1122,7 +1119,7 @@ continue_unlock:
1122 set_fsync_mark(page, 0); 1119 set_fsync_mark(page, 0);
1123 set_dentry_mark(page, 0); 1120 set_dentry_mark(page, 0);
1124 } 1121 }
1125 mapping->a_ops->writepage(page, wbc); 1122 NODE_MAPPING(sbi)->a_ops->writepage(page, wbc);
1126 wrote++; 1123 wrote++;
1127 1124
1128 if (--wbc->nr_to_write == 0) 1125 if (--wbc->nr_to_write == 0)
@@ -1143,31 +1140,31 @@ continue_unlock:
1143 } 1140 }
1144 1141
1145 if (wrote) 1142 if (wrote)
1146 f2fs_submit_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL); 1143 f2fs_submit_merged_bio(sbi, NODE, WRITE);
1147
1148 return nwritten; 1144 return nwritten;
1149} 1145}
1150 1146
1151int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) 1147int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
1152{ 1148{
1153 struct address_space *mapping = sbi->node_inode->i_mapping;
1154 pgoff_t index = 0, end = LONG_MAX; 1149 pgoff_t index = 0, end = LONG_MAX;
1155 struct pagevec pvec; 1150 struct pagevec pvec;
1156 int nr_pages;
1157 int ret2 = 0, ret = 0; 1151 int ret2 = 0, ret = 0;
1158 1152
1159 pagevec_init(&pvec, 0); 1153 pagevec_init(&pvec, 0);
1160 while ((index <= end) && 1154
1161 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 1155 while (index <= end) {
1162 PAGECACHE_TAG_WRITEBACK, 1156 int i, nr_pages;
1163 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) { 1157 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1164 unsigned i; 1158 PAGECACHE_TAG_WRITEBACK,
1159 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1160 if (nr_pages == 0)
1161 break;
1165 1162
1166 for (i = 0; i < nr_pages; i++) { 1163 for (i = 0; i < nr_pages; i++) {
1167 struct page *page = pvec.pages[i]; 1164 struct page *page = pvec.pages[i];
1168 1165
1169 /* until radix tree lookup accepts end_index */ 1166 /* until radix tree lookup accepts end_index */
1170 if (page->index > end) 1167 if (unlikely(page->index > end))
1171 continue; 1168 continue;
1172 1169
1173 if (ino && ino_of_node(page) == ino) { 1170 if (ino && ino_of_node(page) == ino) {
@@ -1180,9 +1177,9 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
1180 cond_resched(); 1177 cond_resched();
1181 } 1178 }
1182 1179
1183 if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) 1180 if (unlikely(test_and_clear_bit(AS_ENOSPC, &NODE_MAPPING(sbi)->flags)))
1184 ret2 = -ENOSPC; 1181 ret2 = -ENOSPC;
1185 if (test_and_clear_bit(AS_EIO, &mapping->flags)) 1182 if (unlikely(test_and_clear_bit(AS_EIO, &NODE_MAPPING(sbi)->flags)))
1186 ret2 = -EIO; 1183 ret2 = -EIO;
1187 if (!ret) 1184 if (!ret)
1188 ret = ret2; 1185 ret = ret2;
@@ -1196,8 +1193,12 @@ static int f2fs_write_node_page(struct page *page,
1196 nid_t nid; 1193 nid_t nid;
1197 block_t new_addr; 1194 block_t new_addr;
1198 struct node_info ni; 1195 struct node_info ni;
1196 struct f2fs_io_info fio = {
1197 .type = NODE,
1198 .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
1199 };
1199 1200
1200 if (sbi->por_doing) 1201 if (unlikely(sbi->por_doing))
1201 goto redirty_out; 1202 goto redirty_out;
1202 1203
1203 wait_on_page_writeback(page); 1204 wait_on_page_writeback(page);
@@ -1209,7 +1210,7 @@ static int f2fs_write_node_page(struct page *page,
1209 get_node_info(sbi, nid, &ni); 1210 get_node_info(sbi, nid, &ni);
1210 1211
1211 /* This page is already truncated */ 1212 /* This page is already truncated */
1212 if (ni.blk_addr == NULL_ADDR) { 1213 if (unlikely(ni.blk_addr == NULL_ADDR)) {
1213 dec_page_count(sbi, F2FS_DIRTY_NODES); 1214 dec_page_count(sbi, F2FS_DIRTY_NODES);
1214 unlock_page(page); 1215 unlock_page(page);
1215 return 0; 1216 return 0;
@@ -1220,7 +1221,7 @@ static int f2fs_write_node_page(struct page *page,
1220 1221
1221 mutex_lock(&sbi->node_write); 1222 mutex_lock(&sbi->node_write);
1222 set_page_writeback(page); 1223 set_page_writeback(page);
1223 write_node_page(sbi, page, nid, ni.blk_addr, &new_addr); 1224 write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
1224 set_node_addr(sbi, &ni, new_addr); 1225 set_node_addr(sbi, &ni, new_addr);
1225 dec_page_count(sbi, F2FS_DIRTY_NODES); 1226 dec_page_count(sbi, F2FS_DIRTY_NODES);
1226 mutex_unlock(&sbi->node_write); 1227 mutex_unlock(&sbi->node_write);
@@ -1255,6 +1256,7 @@ static int f2fs_write_node_pages(struct address_space *mapping,
1255 1256
1256 /* if mounting is failed, skip writing node pages */ 1257 /* if mounting is failed, skip writing node pages */
1257 wbc->nr_to_write = 3 * max_hw_blocks(sbi); 1258 wbc->nr_to_write = 3 * max_hw_blocks(sbi);
1259 wbc->sync_mode = WB_SYNC_NONE;
1258 sync_node_pages(sbi, 0, wbc); 1260 sync_node_pages(sbi, 0, wbc);
1259 wbc->nr_to_write = nr_to_write - (3 * max_hw_blocks(sbi) - 1261 wbc->nr_to_write = nr_to_write - (3 * max_hw_blocks(sbi) -
1260 wbc->nr_to_write); 1262 wbc->nr_to_write);
@@ -1333,7 +1335,7 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build)
1333 return -1; 1335 return -1;
1334 1336
1335 /* 0 nid should not be used */ 1337 /* 0 nid should not be used */
1336 if (nid == 0) 1338 if (unlikely(nid == 0))
1337 return 0; 1339 return 0;
1338 1340
1339 if (build) { 1341 if (build) {
@@ -1386,7 +1388,7 @@ static void scan_nat_page(struct f2fs_nm_info *nm_i,
1386 1388
1387 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { 1389 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
1388 1390
1389 if (start_nid >= nm_i->max_nid) 1391 if (unlikely(start_nid >= nm_i->max_nid))
1390 break; 1392 break;
1391 1393
1392 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); 1394 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
@@ -1420,7 +1422,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
1420 f2fs_put_page(page, 1); 1422 f2fs_put_page(page, 1);
1421 1423
1422 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); 1424 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
1423 if (nid >= nm_i->max_nid) 1425 if (unlikely(nid >= nm_i->max_nid))
1424 nid = 0; 1426 nid = 0;
1425 1427
1426 if (i++ == FREE_NID_PAGES) 1428 if (i++ == FREE_NID_PAGES)
@@ -1454,7 +1456,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
1454 struct free_nid *i = NULL; 1456 struct free_nid *i = NULL;
1455 struct list_head *this; 1457 struct list_head *this;
1456retry: 1458retry:
1457 if (sbi->total_valid_node_count + 1 >= nm_i->max_nid) 1459 if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid))
1458 return false; 1460 return false;
1459 1461
1460 spin_lock(&nm_i->free_nid_list_lock); 1462 spin_lock(&nm_i->free_nid_list_lock);
@@ -1535,13 +1537,12 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
1535 1537
1536int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) 1538int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1537{ 1539{
1538 struct address_space *mapping = sbi->node_inode->i_mapping; 1540 struct f2fs_inode *src, *dst;
1539 struct f2fs_node *src, *dst;
1540 nid_t ino = ino_of_node(page); 1541 nid_t ino = ino_of_node(page);
1541 struct node_info old_ni, new_ni; 1542 struct node_info old_ni, new_ni;
1542 struct page *ipage; 1543 struct page *ipage;
1543 1544
1544 ipage = grab_cache_page(mapping, ino); 1545 ipage = grab_cache_page(NODE_MAPPING(sbi), ino);
1545 if (!ipage) 1546 if (!ipage)
1546 return -ENOMEM; 1547 return -ENOMEM;
1547 1548
@@ -1552,19 +1553,19 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1552 SetPageUptodate(ipage); 1553 SetPageUptodate(ipage);
1553 fill_node_footer(ipage, ino, ino, 0, true); 1554 fill_node_footer(ipage, ino, ino, 0, true);
1554 1555
1555 src = F2FS_NODE(page); 1556 src = F2FS_INODE(page);
1556 dst = F2FS_NODE(ipage); 1557 dst = F2FS_INODE(ipage);
1557 1558
1558 memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i); 1559 memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src);
1559 dst->i.i_size = 0; 1560 dst->i_size = 0;
1560 dst->i.i_blocks = cpu_to_le64(1); 1561 dst->i_blocks = cpu_to_le64(1);
1561 dst->i.i_links = cpu_to_le32(1); 1562 dst->i_links = cpu_to_le32(1);
1562 dst->i.i_xattr_nid = 0; 1563 dst->i_xattr_nid = 0;
1563 1564
1564 new_ni = old_ni; 1565 new_ni = old_ni;
1565 new_ni.ino = ino; 1566 new_ni.ino = ino;
1566 1567
1567 if (!inc_valid_node_count(sbi, NULL, 1)) 1568 if (unlikely(!inc_valid_node_count(sbi, NULL)))
1568 WARN_ON(1); 1569 WARN_ON(1);
1569 set_node_addr(sbi, &new_ni, NEW_ADDR); 1570 set_node_addr(sbi, &new_ni, NEW_ADDR);
1570 inc_valid_inode_count(sbi); 1571 inc_valid_inode_count(sbi);
@@ -1572,47 +1573,88 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1572 return 0; 1573 return 0;
1573} 1574}
1574 1575
1576/*
1577 * ra_sum_pages() merge contiguous pages into one bio and submit.
1578 * these pre-readed pages are linked in pages list.
1579 */
1580static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages,
1581 int start, int nrpages)
1582{
1583 struct page *page;
1584 int page_idx = start;
1585 struct f2fs_io_info fio = {
1586 .type = META,
1587 .rw = READ_SYNC | REQ_META | REQ_PRIO
1588 };
1589
1590 for (; page_idx < start + nrpages; page_idx++) {
1591 /* alloc temporal page for read node summary info*/
1592 page = alloc_page(GFP_F2FS_ZERO);
1593 if (!page) {
1594 struct page *tmp;
1595 list_for_each_entry_safe(page, tmp, pages, lru) {
1596 list_del(&page->lru);
1597 unlock_page(page);
1598 __free_pages(page, 0);
1599 }
1600 return -ENOMEM;
1601 }
1602
1603 lock_page(page);
1604 page->index = page_idx;
1605 list_add_tail(&page->lru, pages);
1606 }
1607
1608 list_for_each_entry(page, pages, lru)
1609 f2fs_submit_page_mbio(sbi, page, page->index, &fio);
1610
1611 f2fs_submit_merged_bio(sbi, META, READ);
1612 return 0;
1613}
1614
1575int restore_node_summary(struct f2fs_sb_info *sbi, 1615int restore_node_summary(struct f2fs_sb_info *sbi,
1576 unsigned int segno, struct f2fs_summary_block *sum) 1616 unsigned int segno, struct f2fs_summary_block *sum)
1577{ 1617{
1578 struct f2fs_node *rn; 1618 struct f2fs_node *rn;
1579 struct f2fs_summary *sum_entry; 1619 struct f2fs_summary *sum_entry;
1580 struct page *page; 1620 struct page *page, *tmp;
1581 block_t addr; 1621 block_t addr;
1582 int i, last_offset; 1622 int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
1583 1623 int i, last_offset, nrpages, err = 0;
1584 /* alloc temporal page for read node */ 1624 LIST_HEAD(page_list);
1585 page = alloc_page(GFP_NOFS | __GFP_ZERO);
1586 if (!page)
1587 return -ENOMEM;
1588 lock_page(page);
1589 1625
1590 /* scan the node segment */ 1626 /* scan the node segment */
1591 last_offset = sbi->blocks_per_seg; 1627 last_offset = sbi->blocks_per_seg;
1592 addr = START_BLOCK(sbi, segno); 1628 addr = START_BLOCK(sbi, segno);
1593 sum_entry = &sum->entries[0]; 1629 sum_entry = &sum->entries[0];
1594 1630
1595 for (i = 0; i < last_offset; i++, sum_entry++) { 1631 for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
1596 /* 1632 nrpages = min(last_offset - i, bio_blocks);
1597 * In order to read next node page,
1598 * we must clear PageUptodate flag.
1599 */
1600 ClearPageUptodate(page);
1601 1633
1602 if (f2fs_readpage(sbi, page, addr, READ_SYNC)) 1634 /* read ahead node pages */
1603 goto out; 1635 err = ra_sum_pages(sbi, &page_list, addr, nrpages);
1636 if (err)
1637 return err;
1604 1638
1605 lock_page(page); 1639 list_for_each_entry_safe(page, tmp, &page_list, lru) {
1606 rn = F2FS_NODE(page); 1640
1607 sum_entry->nid = rn->footer.nid; 1641 lock_page(page);
1608 sum_entry->version = 0; 1642 if (unlikely(!PageUptodate(page))) {
1609 sum_entry->ofs_in_node = 0; 1643 err = -EIO;
1610 addr++; 1644 } else {
1645 rn = F2FS_NODE(page);
1646 sum_entry->nid = rn->footer.nid;
1647 sum_entry->version = 0;
1648 sum_entry->ofs_in_node = 0;
1649 sum_entry++;
1650 }
1651
1652 list_del(&page->lru);
1653 unlock_page(page);
1654 __free_pages(page, 0);
1655 }
1611 } 1656 }
1612 unlock_page(page); 1657 return err;
1613out:
1614 __free_pages(page, 0);
1615 return 0;
1616} 1658}
1617 1659
1618static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) 1660static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 3496bb3e15dc..c4c79885c993 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -224,7 +224,13 @@ static inline block_t next_blkaddr_of_node(struct page *node_page)
224 * | `- direct node (5 + N => 5 + 2N - 1) 224 * | `- direct node (5 + N => 5 + 2N - 1)
225 * `- double indirect node (5 + 2N) 225 * `- double indirect node (5 + 2N)
226 * `- indirect node (6 + 2N) 226 * `- indirect node (6 + 2N)
227 * `- direct node (x(N + 1)) 227 * `- direct node
228 * ......
229 * `- indirect node ((6 + 2N) + x(N + 1))
230 * `- direct node
231 * ......
232 * `- indirect node ((6 + 2N) + (N - 1)(N + 1))
233 * `- direct node
228 */ 234 */
229static inline bool IS_DNODE(struct page *node_page) 235static inline bool IS_DNODE(struct page *node_page)
230{ 236{
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index fdc81161f254..976a7a934db5 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -40,8 +40,7 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
40 40
41static int recover_dentry(struct page *ipage, struct inode *inode) 41static int recover_dentry(struct page *ipage, struct inode *inode)
42{ 42{
43 struct f2fs_node *raw_node = F2FS_NODE(ipage); 43 struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
44 struct f2fs_inode *raw_inode = &(raw_node->i);
45 nid_t pino = le32_to_cpu(raw_inode->i_pino); 44 nid_t pino = le32_to_cpu(raw_inode->i_pino);
46 struct f2fs_dir_entry *de; 45 struct f2fs_dir_entry *de;
47 struct qstr name; 46 struct qstr name;
@@ -62,6 +61,12 @@ static int recover_dentry(struct page *ipage, struct inode *inode)
62 61
63 name.len = le32_to_cpu(raw_inode->i_namelen); 62 name.len = le32_to_cpu(raw_inode->i_namelen);
64 name.name = raw_inode->i_name; 63 name.name = raw_inode->i_name;
64
65 if (unlikely(name.len > F2FS_NAME_LEN)) {
66 WARN_ON(1);
67 err = -ENAMETOOLONG;
68 goto out;
69 }
65retry: 70retry:
66 de = f2fs_find_entry(dir, &name, &page); 71 de = f2fs_find_entry(dir, &name, &page);
67 if (de && inode->i_ino == le32_to_cpu(de->ino)) 72 if (de && inode->i_ino == le32_to_cpu(de->ino))
@@ -90,17 +95,16 @@ out_unmap_put:
90 kunmap(page); 95 kunmap(page);
91 f2fs_put_page(page, 0); 96 f2fs_put_page(page, 0);
92out: 97out:
93 f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: " 98 f2fs_msg(inode->i_sb, KERN_NOTICE,
94 "ino = %x, name = %s, dir = %lx, err = %d", 99 "%s: ino = %x, name = %s, dir = %lx, err = %d",
95 ino_of_node(ipage), raw_inode->i_name, 100 __func__, ino_of_node(ipage), raw_inode->i_name,
96 IS_ERR(dir) ? 0 : dir->i_ino, err); 101 IS_ERR(dir) ? 0 : dir->i_ino, err);
97 return err; 102 return err;
98} 103}
99 104
100static int recover_inode(struct inode *inode, struct page *node_page) 105static int recover_inode(struct inode *inode, struct page *node_page)
101{ 106{
102 struct f2fs_node *raw_node = F2FS_NODE(node_page); 107 struct f2fs_inode *raw_inode = F2FS_INODE(node_page);
103 struct f2fs_inode *raw_inode = &(raw_node->i);
104 108
105 if (!IS_INODE(node_page)) 109 if (!IS_INODE(node_page))
106 return 0; 110 return 0;
@@ -143,9 +147,9 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
143 while (1) { 147 while (1) {
144 struct fsync_inode_entry *entry; 148 struct fsync_inode_entry *entry;
145 149
146 err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); 150 err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC);
147 if (err) 151 if (err)
148 goto out; 152 return err;
149 153
150 lock_page(page); 154 lock_page(page);
151 155
@@ -191,9 +195,10 @@ next:
191 /* check next segment */ 195 /* check next segment */
192 blkaddr = next_blkaddr_of_node(page); 196 blkaddr = next_blkaddr_of_node(page);
193 } 197 }
198
194 unlock_page(page); 199 unlock_page(page);
195out:
196 __free_pages(page, 0); 200 __free_pages(page, 0);
201
197 return err; 202 return err;
198} 203}
199 204
@@ -293,6 +298,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
293 struct node_info ni; 298 struct node_info ni;
294 int err = 0, recovered = 0; 299 int err = 0, recovered = 0;
295 300
301 if (recover_inline_data(inode, page))
302 goto out;
303
296 start = start_bidx_of_node(ofs_of_node(page), fi); 304 start = start_bidx_of_node(ofs_of_node(page), fi);
297 if (IS_INODE(page)) 305 if (IS_INODE(page))
298 end = start + ADDRS_PER_INODE(fi); 306 end = start + ADDRS_PER_INODE(fi);
@@ -300,12 +308,13 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
300 end = start + ADDRS_PER_BLOCK; 308 end = start + ADDRS_PER_BLOCK;
301 309
302 f2fs_lock_op(sbi); 310 f2fs_lock_op(sbi);
311
303 set_new_dnode(&dn, inode, NULL, NULL, 0); 312 set_new_dnode(&dn, inode, NULL, NULL, 0);
304 313
305 err = get_dnode_of_data(&dn, start, ALLOC_NODE); 314 err = get_dnode_of_data(&dn, start, ALLOC_NODE);
306 if (err) { 315 if (err) {
307 f2fs_unlock_op(sbi); 316 f2fs_unlock_op(sbi);
308 return err; 317 goto out;
309 } 318 }
310 319
311 wait_on_page_writeback(dn.node_page); 320 wait_on_page_writeback(dn.node_page);
@@ -356,10 +365,10 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
356err: 365err:
357 f2fs_put_dnode(&dn); 366 f2fs_put_dnode(&dn);
358 f2fs_unlock_op(sbi); 367 f2fs_unlock_op(sbi);
359 368out:
360 f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, " 369 f2fs_msg(sbi->sb, KERN_NOTICE,
361 "recovered_data = %d blocks, err = %d", 370 "recover_data: ino = %lx, recovered = %d blocks, err = %d",
362 inode->i_ino, recovered, err); 371 inode->i_ino, recovered, err);
363 return err; 372 return err;
364} 373}
365 374
@@ -377,7 +386,7 @@ static int recover_data(struct f2fs_sb_info *sbi,
377 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 386 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
378 387
379 /* read node page */ 388 /* read node page */
380 page = alloc_page(GFP_NOFS | __GFP_ZERO); 389 page = alloc_page(GFP_F2FS_ZERO);
381 if (!page) 390 if (!page)
382 return -ENOMEM; 391 return -ENOMEM;
383 392
@@ -386,9 +395,9 @@ static int recover_data(struct f2fs_sb_info *sbi,
386 while (1) { 395 while (1) {
387 struct fsync_inode_entry *entry; 396 struct fsync_inode_entry *entry;
388 397
389 err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC); 398 err = f2fs_submit_page_bio(sbi, page, blkaddr, READ_SYNC);
390 if (err) 399 if (err)
391 goto out; 400 return err;
392 401
393 lock_page(page); 402 lock_page(page);
394 403
@@ -412,8 +421,8 @@ next:
412 /* check next segment */ 421 /* check next segment */
413 blkaddr = next_blkaddr_of_node(page); 422 blkaddr = next_blkaddr_of_node(page);
414 } 423 }
424
415 unlock_page(page); 425 unlock_page(page);
416out:
417 __free_pages(page, 0); 426 __free_pages(page, 0);
418 427
419 if (!err) 428 if (!err)
@@ -429,7 +438,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
429 438
430 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", 439 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
431 sizeof(struct fsync_inode_entry), NULL); 440 sizeof(struct fsync_inode_entry), NULL);
432 if (unlikely(!fsync_entry_slab)) 441 if (!fsync_entry_slab)
433 return -ENOMEM; 442 return -ENOMEM;
434 443
435 INIT_LIST_HEAD(&inode_list); 444 INIT_LIST_HEAD(&inode_list);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index fa284d397199..7caac5f2ca9e 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -14,12 +14,163 @@
14#include <linux/blkdev.h> 14#include <linux/blkdev.h>
15#include <linux/prefetch.h> 15#include <linux/prefetch.h>
16#include <linux/vmalloc.h> 16#include <linux/vmalloc.h>
17#include <linux/swap.h>
17 18
18#include "f2fs.h" 19#include "f2fs.h"
19#include "segment.h" 20#include "segment.h"
20#include "node.h" 21#include "node.h"
21#include <trace/events/f2fs.h> 22#include <trace/events/f2fs.h>
22 23
24#define __reverse_ffz(x) __reverse_ffs(~(x))
25
26static struct kmem_cache *discard_entry_slab;
27
28/*
29 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
30 * MSB and LSB are reversed in a byte by f2fs_set_bit.
31 */
32static inline unsigned long __reverse_ffs(unsigned long word)
33{
34 int num = 0;
35
36#if BITS_PER_LONG == 64
37 if ((word & 0xffffffff) == 0) {
38 num += 32;
39 word >>= 32;
40 }
41#endif
42 if ((word & 0xffff) == 0) {
43 num += 16;
44 word >>= 16;
45 }
46 if ((word & 0xff) == 0) {
47 num += 8;
48 word >>= 8;
49 }
50 if ((word & 0xf0) == 0)
51 num += 4;
52 else
53 word >>= 4;
54 if ((word & 0xc) == 0)
55 num += 2;
56 else
57 word >>= 2;
58 if ((word & 0x2) == 0)
59 num += 1;
60 return num;
61}
62
63/*
64 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c becasue
65 * f2fs_set_bit makes MSB and LSB reversed in a byte.
66 * Example:
67 * LSB <--> MSB
68 * f2fs_set_bit(0, bitmap) => 0000 0001
69 * f2fs_set_bit(7, bitmap) => 1000 0000
70 */
71static unsigned long __find_rev_next_bit(const unsigned long *addr,
72 unsigned long size, unsigned long offset)
73{
74 const unsigned long *p = addr + BIT_WORD(offset);
75 unsigned long result = offset & ~(BITS_PER_LONG - 1);
76 unsigned long tmp;
77 unsigned long mask, submask;
78 unsigned long quot, rest;
79
80 if (offset >= size)
81 return size;
82
83 size -= result;
84 offset %= BITS_PER_LONG;
85 if (!offset)
86 goto aligned;
87
88 tmp = *(p++);
89 quot = (offset >> 3) << 3;
90 rest = offset & 0x7;
91 mask = ~0UL << quot;
92 submask = (unsigned char)(0xff << rest) >> rest;
93 submask <<= quot;
94 mask &= submask;
95 tmp &= mask;
96 if (size < BITS_PER_LONG)
97 goto found_first;
98 if (tmp)
99 goto found_middle;
100
101 size -= BITS_PER_LONG;
102 result += BITS_PER_LONG;
103aligned:
104 while (size & ~(BITS_PER_LONG-1)) {
105 tmp = *(p++);
106 if (tmp)
107 goto found_middle;
108 result += BITS_PER_LONG;
109 size -= BITS_PER_LONG;
110 }
111 if (!size)
112 return result;
113 tmp = *p;
114found_first:
115 tmp &= (~0UL >> (BITS_PER_LONG - size));
116 if (tmp == 0UL) /* Are any bits set? */
117 return result + size; /* Nope. */
118found_middle:
119 return result + __reverse_ffs(tmp);
120}
121
122static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
123 unsigned long size, unsigned long offset)
124{
125 const unsigned long *p = addr + BIT_WORD(offset);
126 unsigned long result = offset & ~(BITS_PER_LONG - 1);
127 unsigned long tmp;
128 unsigned long mask, submask;
129 unsigned long quot, rest;
130
131 if (offset >= size)
132 return size;
133
134 size -= result;
135 offset %= BITS_PER_LONG;
136 if (!offset)
137 goto aligned;
138
139 tmp = *(p++);
140 quot = (offset >> 3) << 3;
141 rest = offset & 0x7;
142 mask = ~(~0UL << quot);
143 submask = (unsigned char)~((unsigned char)(0xff << rest) >> rest);
144 submask <<= quot;
145 mask += submask;
146 tmp |= mask;
147 if (size < BITS_PER_LONG)
148 goto found_first;
149 if (~tmp)
150 goto found_middle;
151
152 size -= BITS_PER_LONG;
153 result += BITS_PER_LONG;
154aligned:
155 while (size & ~(BITS_PER_LONG - 1)) {
156 tmp = *(p++);
157 if (~tmp)
158 goto found_middle;
159 result += BITS_PER_LONG;
160 size -= BITS_PER_LONG;
161 }
162 if (!size)
163 return result;
164 tmp = *p;
165
166found_first:
167 tmp |= ~0UL << size;
168 if (tmp == ~0UL) /* Are any bits zero? */
169 return result + size; /* Nope. */
170found_middle:
171 return result + __reverse_ffz(tmp);
172}
173
23/* 174/*
24 * This function balances dirty node and dentry pages. 175 * This function balances dirty node and dentry pages.
25 * In addition, it controls garbage collection. 176 * In addition, it controls garbage collection.
@@ -116,6 +267,56 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
116 mutex_unlock(&dirty_i->seglist_lock); 267 mutex_unlock(&dirty_i->seglist_lock);
117} 268}
118 269
270static void f2fs_issue_discard(struct f2fs_sb_info *sbi,
271 block_t blkstart, block_t blklen)
272{
273 sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart);
274 sector_t len = SECTOR_FROM_BLOCK(sbi, blklen);
275 blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
276 trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
277}
278
279static void add_discard_addrs(struct f2fs_sb_info *sbi,
280 unsigned int segno, struct seg_entry *se)
281{
282 struct list_head *head = &SM_I(sbi)->discard_list;
283 struct discard_entry *new;
284 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
285 int max_blocks = sbi->blocks_per_seg;
286 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
287 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
288 unsigned long dmap[entries];
289 unsigned int start = 0, end = -1;
290 int i;
291
292 if (!test_opt(sbi, DISCARD))
293 return;
294
295 /* zero block will be discarded through the prefree list */
296 if (!se->valid_blocks || se->valid_blocks == max_blocks)
297 return;
298
299 /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
300 for (i = 0; i < entries; i++)
301 dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
302
303 while (SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
304 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
305 if (start >= max_blocks)
306 break;
307
308 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
309
310 new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
311 INIT_LIST_HEAD(&new->list);
312 new->blkaddr = START_BLOCK(sbi, segno) + start;
313 new->len = end - start;
314
315 list_add_tail(&new->list, head);
316 SM_I(sbi)->nr_discards += end - start;
317 }
318}
319
119/* 320/*
120 * Should call clear_prefree_segments after checkpoint is done. 321 * Should call clear_prefree_segments after checkpoint is done.
121 */ 322 */
@@ -138,6 +339,9 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
138 339
139void clear_prefree_segments(struct f2fs_sb_info *sbi) 340void clear_prefree_segments(struct f2fs_sb_info *sbi)
140{ 341{
342 struct list_head *head = &(SM_I(sbi)->discard_list);
343 struct list_head *this, *next;
344 struct discard_entry *entry;
141 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 345 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
142 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; 346 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
143 unsigned int total_segs = TOTAL_SEGS(sbi); 347 unsigned int total_segs = TOTAL_SEGS(sbi);
@@ -160,14 +364,19 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi)
160 if (!test_opt(sbi, DISCARD)) 364 if (!test_opt(sbi, DISCARD))
161 continue; 365 continue;
162 366
163 blkdev_issue_discard(sbi->sb->s_bdev, 367 f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
164 START_BLOCK(sbi, start) << 368 (end - start) << sbi->log_blocks_per_seg);
165 sbi->log_sectors_per_block,
166 (1 << (sbi->log_sectors_per_block +
167 sbi->log_blocks_per_seg)) * (end - start),
168 GFP_NOFS, 0);
169 } 369 }
170 mutex_unlock(&dirty_i->seglist_lock); 370 mutex_unlock(&dirty_i->seglist_lock);
371
372 /* send small discards */
373 list_for_each_safe(this, next, head) {
374 entry = list_entry(this, struct discard_entry, list);
375 f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
376 list_del(&entry->list);
377 SM_I(sbi)->nr_discards -= entry->len;
378 kmem_cache_free(discard_entry_slab, entry);
379 }
171} 380}
172 381
173static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) 382static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
@@ -459,13 +668,18 @@ static void __next_free_blkoff(struct f2fs_sb_info *sbi,
459 struct curseg_info *seg, block_t start) 668 struct curseg_info *seg, block_t start)
460{ 669{
461 struct seg_entry *se = get_seg_entry(sbi, seg->segno); 670 struct seg_entry *se = get_seg_entry(sbi, seg->segno);
462 block_t ofs; 671 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
463 for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) { 672 unsigned long target_map[entries];
464 if (!f2fs_test_bit(ofs, se->ckpt_valid_map) 673 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
465 && !f2fs_test_bit(ofs, se->cur_valid_map)) 674 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
466 break; 675 int i, pos;
467 } 676
468 seg->next_blkoff = ofs; 677 for (i = 0; i < entries; i++)
678 target_map[i] = ckpt_map[i] | cur_map[i];
679
680 pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
681
682 seg->next_blkoff = pos;
469} 683}
470 684
471/* 685/*
@@ -573,148 +787,6 @@ static const struct segment_allocation default_salloc_ops = {
573 .allocate_segment = allocate_segment_by_default, 787 .allocate_segment = allocate_segment_by_default,
574}; 788};
575 789
576static void f2fs_end_io_write(struct bio *bio, int err)
577{
578 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
579 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
580 struct bio_private *p = bio->bi_private;
581
582 do {
583 struct page *page = bvec->bv_page;
584
585 if (--bvec >= bio->bi_io_vec)
586 prefetchw(&bvec->bv_page->flags);
587 if (!uptodate) {
588 SetPageError(page);
589 if (page->mapping)
590 set_bit(AS_EIO, &page->mapping->flags);
591 set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG);
592 p->sbi->sb->s_flags |= MS_RDONLY;
593 }
594 end_page_writeback(page);
595 dec_page_count(p->sbi, F2FS_WRITEBACK);
596 } while (bvec >= bio->bi_io_vec);
597
598 if (p->is_sync)
599 complete(p->wait);
600
601 if (!get_pages(p->sbi, F2FS_WRITEBACK) &&
602 !list_empty(&p->sbi->cp_wait.task_list))
603 wake_up(&p->sbi->cp_wait);
604
605 kfree(p);
606 bio_put(bio);
607}
608
609struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages)
610{
611 struct bio *bio;
612
613 /* No failure on bio allocation */
614 bio = bio_alloc(GFP_NOIO, npages);
615 bio->bi_bdev = bdev;
616 bio->bi_private = NULL;
617
618 return bio;
619}
620
621static void do_submit_bio(struct f2fs_sb_info *sbi,
622 enum page_type type, bool sync)
623{
624 int rw = sync ? WRITE_SYNC : WRITE;
625 enum page_type btype = type > META ? META : type;
626
627 if (type >= META_FLUSH)
628 rw = WRITE_FLUSH_FUA;
629
630 if (btype == META)
631 rw |= REQ_META;
632
633 if (sbi->bio[btype]) {
634 struct bio_private *p = sbi->bio[btype]->bi_private;
635 p->sbi = sbi;
636 sbi->bio[btype]->bi_end_io = f2fs_end_io_write;
637
638 trace_f2fs_do_submit_bio(sbi->sb, btype, sync, sbi->bio[btype]);
639
640 if (type == META_FLUSH) {
641 DECLARE_COMPLETION_ONSTACK(wait);
642 p->is_sync = true;
643 p->wait = &wait;
644 submit_bio(rw, sbi->bio[btype]);
645 wait_for_completion(&wait);
646 } else {
647 p->is_sync = false;
648 submit_bio(rw, sbi->bio[btype]);
649 }
650 sbi->bio[btype] = NULL;
651 }
652}
653
654void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync)
655{
656 down_write(&sbi->bio_sem);
657 do_submit_bio(sbi, type, sync);
658 up_write(&sbi->bio_sem);
659}
660
661static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page,
662 block_t blk_addr, enum page_type type)
663{
664 struct block_device *bdev = sbi->sb->s_bdev;
665 int bio_blocks;
666
667 verify_block_addr(sbi, blk_addr);
668
669 down_write(&sbi->bio_sem);
670
671 inc_page_count(sbi, F2FS_WRITEBACK);
672
673 if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1)
674 do_submit_bio(sbi, type, false);
675alloc_new:
676 if (sbi->bio[type] == NULL) {
677 struct bio_private *priv;
678retry:
679 priv = kmalloc(sizeof(struct bio_private), GFP_NOFS);
680 if (!priv) {
681 cond_resched();
682 goto retry;
683 }
684
685 bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
686 sbi->bio[type] = f2fs_bio_alloc(bdev, bio_blocks);
687 sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
688 sbi->bio[type]->bi_private = priv;
689 /*
690 * The end_io will be assigned at the sumbission phase.
691 * Until then, let bio_add_page() merge consecutive IOs as much
692 * as possible.
693 */
694 }
695
696 if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) <
697 PAGE_CACHE_SIZE) {
698 do_submit_bio(sbi, type, false);
699 goto alloc_new;
700 }
701
702 sbi->last_block_in_bio[type] = blk_addr;
703
704 up_write(&sbi->bio_sem);
705 trace_f2fs_submit_write_page(page, blk_addr, type);
706}
707
708void f2fs_wait_on_page_writeback(struct page *page,
709 enum page_type type, bool sync)
710{
711 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
712 if (PageWriteback(page)) {
713 f2fs_submit_bio(sbi, type, sync);
714 wait_on_page_writeback(page);
715 }
716}
717
718static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) 790static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
719{ 791{
720 struct curseg_info *curseg = CURSEG_I(sbi, type); 792 struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -782,16 +854,14 @@ static int __get_segment_type(struct page *page, enum page_type p_type)
782 return __get_segment_type_6(page, p_type); 854 return __get_segment_type_6(page, p_type);
783} 855}
784 856
785static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, 857void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
786 block_t old_blkaddr, block_t *new_blkaddr, 858 block_t old_blkaddr, block_t *new_blkaddr,
787 struct f2fs_summary *sum, enum page_type p_type) 859 struct f2fs_summary *sum, int type)
788{ 860{
789 struct sit_info *sit_i = SIT_I(sbi); 861 struct sit_info *sit_i = SIT_I(sbi);
790 struct curseg_info *curseg; 862 struct curseg_info *curseg;
791 unsigned int old_cursegno; 863 unsigned int old_cursegno;
792 int type;
793 864
794 type = __get_segment_type(page, p_type);
795 curseg = CURSEG_I(sbi, type); 865 curseg = CURSEG_I(sbi, type);
796 866
797 mutex_lock(&curseg->curseg_mutex); 867 mutex_lock(&curseg->curseg_mutex);
@@ -824,49 +894,64 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
824 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); 894 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
825 mutex_unlock(&sit_i->sentry_lock); 895 mutex_unlock(&sit_i->sentry_lock);
826 896
827 if (p_type == NODE) 897 if (page && IS_NODESEG(type))
828 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); 898 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
829 899
830 /* writeout dirty page into bdev */
831 submit_write_page(sbi, page, *new_blkaddr, p_type);
832
833 mutex_unlock(&curseg->curseg_mutex); 900 mutex_unlock(&curseg->curseg_mutex);
834} 901}
835 902
903static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
904 block_t old_blkaddr, block_t *new_blkaddr,
905 struct f2fs_summary *sum, struct f2fs_io_info *fio)
906{
907 int type = __get_segment_type(page, fio->type);
908
909 allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type);
910
911 /* writeout dirty page into bdev */
912 f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio);
913}
914
836void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) 915void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
837{ 916{
917 struct f2fs_io_info fio = {
918 .type = META,
919 .rw = WRITE_SYNC | REQ_META | REQ_PRIO
920 };
921
838 set_page_writeback(page); 922 set_page_writeback(page);
839 submit_write_page(sbi, page, page->index, META); 923 f2fs_submit_page_mbio(sbi, page, page->index, &fio);
840} 924}
841 925
842void write_node_page(struct f2fs_sb_info *sbi, struct page *page, 926void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
927 struct f2fs_io_info *fio,
843 unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr) 928 unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr)
844{ 929{
845 struct f2fs_summary sum; 930 struct f2fs_summary sum;
846 set_summary(&sum, nid, 0, 0); 931 set_summary(&sum, nid, 0, 0);
847 do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE); 932 do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio);
848} 933}
849 934
850void write_data_page(struct inode *inode, struct page *page, 935void write_data_page(struct page *page, struct dnode_of_data *dn,
851 struct dnode_of_data *dn, block_t old_blkaddr, 936 block_t *new_blkaddr, struct f2fs_io_info *fio)
852 block_t *new_blkaddr)
853{ 937{
854 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 938 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
855 struct f2fs_summary sum; 939 struct f2fs_summary sum;
856 struct node_info ni; 940 struct node_info ni;
857 941
858 f2fs_bug_on(old_blkaddr == NULL_ADDR); 942 f2fs_bug_on(dn->data_blkaddr == NULL_ADDR);
859 get_node_info(sbi, dn->nid, &ni); 943 get_node_info(sbi, dn->nid, &ni);
860 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 944 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
861 945
862 do_write_page(sbi, page, old_blkaddr, 946 do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio);
863 new_blkaddr, &sum, DATA);
864} 947}
865 948
866void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page, 949void rewrite_data_page(struct page *page, block_t old_blkaddr,
867 block_t old_blk_addr) 950 struct f2fs_io_info *fio)
868{ 951{
869 submit_write_page(sbi, page, old_blk_addr, DATA); 952 struct inode *inode = page->mapping->host;
953 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
954 f2fs_submit_page_mbio(sbi, page, old_blkaddr, fio);
870} 955}
871 956
872void recover_data_page(struct f2fs_sb_info *sbi, 957void recover_data_page(struct f2fs_sb_info *sbi,
@@ -925,6 +1010,10 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
925 unsigned int segno, old_cursegno; 1010 unsigned int segno, old_cursegno;
926 block_t next_blkaddr = next_blkaddr_of_node(page); 1011 block_t next_blkaddr = next_blkaddr_of_node(page);
927 unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr); 1012 unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr);
1013 struct f2fs_io_info fio = {
1014 .type = NODE,
1015 .rw = WRITE_SYNC,
1016 };
928 1017
929 curseg = CURSEG_I(sbi, type); 1018 curseg = CURSEG_I(sbi, type);
930 1019
@@ -953,8 +1042,8 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
953 1042
954 /* rewrite node page */ 1043 /* rewrite node page */
955 set_page_writeback(page); 1044 set_page_writeback(page);
956 submit_write_page(sbi, page, new_blkaddr, NODE); 1045 f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio);
957 f2fs_submit_bio(sbi, NODE, true); 1046 f2fs_submit_merged_bio(sbi, NODE, WRITE);
958 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); 1047 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
959 1048
960 locate_dirty_segment(sbi, old_cursegno); 1049 locate_dirty_segment(sbi, old_cursegno);
@@ -964,6 +1053,16 @@ void rewrite_node_page(struct f2fs_sb_info *sbi,
964 mutex_unlock(&curseg->curseg_mutex); 1053 mutex_unlock(&curseg->curseg_mutex);
965} 1054}
966 1055
1056void f2fs_wait_on_page_writeback(struct page *page,
1057 enum page_type type)
1058{
1059 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
1060 if (PageWriteback(page)) {
1061 f2fs_submit_merged_bio(sbi, type, WRITE);
1062 wait_on_page_writeback(page);
1063 }
1064}
1065
967static int read_compacted_summaries(struct f2fs_sb_info *sbi) 1066static int read_compacted_summaries(struct f2fs_sb_info *sbi)
968{ 1067{
969 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1068 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
@@ -1314,6 +1413,10 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
1314 1413
1315 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); 1414 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
1316 1415
1416 /* add discard candidates */
1417 if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards)
1418 add_discard_addrs(sbi, segno, se);
1419
1317 if (flushed) 1420 if (flushed)
1318 goto to_sit_page; 1421 goto to_sit_page;
1319 1422
@@ -1480,41 +1583,94 @@ static int build_curseg(struct f2fs_sb_info *sbi)
1480 return restore_curseg_summaries(sbi); 1583 return restore_curseg_summaries(sbi);
1481} 1584}
1482 1585
1586static int ra_sit_pages(struct f2fs_sb_info *sbi, int start, int nrpages)
1587{
1588 struct address_space *mapping = META_MAPPING(sbi);
1589 struct page *page;
1590 block_t blk_addr, prev_blk_addr = 0;
1591 int sit_blk_cnt = SIT_BLK_CNT(sbi);
1592 int blkno = start;
1593 struct f2fs_io_info fio = {
1594 .type = META,
1595 .rw = READ_SYNC | REQ_META | REQ_PRIO
1596 };
1597
1598 for (; blkno < start + nrpages && blkno < sit_blk_cnt; blkno++) {
1599
1600 blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK);
1601
1602 if (blkno != start && prev_blk_addr + 1 != blk_addr)
1603 break;
1604 prev_blk_addr = blk_addr;
1605repeat:
1606 page = grab_cache_page(mapping, blk_addr);
1607 if (!page) {
1608 cond_resched();
1609 goto repeat;
1610 }
1611 if (PageUptodate(page)) {
1612 mark_page_accessed(page);
1613 f2fs_put_page(page, 1);
1614 continue;
1615 }
1616
1617 f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
1618
1619 mark_page_accessed(page);
1620 f2fs_put_page(page, 0);
1621 }
1622
1623 f2fs_submit_merged_bio(sbi, META, READ);
1624 return blkno - start;
1625}
1626
1483static void build_sit_entries(struct f2fs_sb_info *sbi) 1627static void build_sit_entries(struct f2fs_sb_info *sbi)
1484{ 1628{
1485 struct sit_info *sit_i = SIT_I(sbi); 1629 struct sit_info *sit_i = SIT_I(sbi);
1486 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 1630 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1487 struct f2fs_summary_block *sum = curseg->sum_blk; 1631 struct f2fs_summary_block *sum = curseg->sum_blk;
1488 unsigned int start; 1632 int sit_blk_cnt = SIT_BLK_CNT(sbi);
1633 unsigned int i, start, end;
1634 unsigned int readed, start_blk = 0;
1635 int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
1489 1636
1490 for (start = 0; start < TOTAL_SEGS(sbi); start++) { 1637 do {
1491 struct seg_entry *se = &sit_i->sentries[start]; 1638 readed = ra_sit_pages(sbi, start_blk, nrpages);
1492 struct f2fs_sit_block *sit_blk; 1639
1493 struct f2fs_sit_entry sit; 1640 start = start_blk * sit_i->sents_per_block;
1494 struct page *page; 1641 end = (start_blk + readed) * sit_i->sents_per_block;
1495 int i; 1642
1496 1643 for (; start < end && start < TOTAL_SEGS(sbi); start++) {
1497 mutex_lock(&curseg->curseg_mutex); 1644 struct seg_entry *se = &sit_i->sentries[start];
1498 for (i = 0; i < sits_in_cursum(sum); i++) { 1645 struct f2fs_sit_block *sit_blk;
1499 if (le32_to_cpu(segno_in_journal(sum, i)) == start) { 1646 struct f2fs_sit_entry sit;
1500 sit = sit_in_journal(sum, i); 1647 struct page *page;
1501 mutex_unlock(&curseg->curseg_mutex); 1648
1502 goto got_it; 1649 mutex_lock(&curseg->curseg_mutex);
1650 for (i = 0; i < sits_in_cursum(sum); i++) {
1651 if (le32_to_cpu(segno_in_journal(sum, i))
1652 == start) {
1653 sit = sit_in_journal(sum, i);
1654 mutex_unlock(&curseg->curseg_mutex);
1655 goto got_it;
1656 }
1503 } 1657 }
1504 } 1658 mutex_unlock(&curseg->curseg_mutex);
1505 mutex_unlock(&curseg->curseg_mutex); 1659
1506 page = get_current_sit_page(sbi, start); 1660 page = get_current_sit_page(sbi, start);
1507 sit_blk = (struct f2fs_sit_block *)page_address(page); 1661 sit_blk = (struct f2fs_sit_block *)page_address(page);
1508 sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; 1662 sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
1509 f2fs_put_page(page, 1); 1663 f2fs_put_page(page, 1);
1510got_it: 1664got_it:
1511 check_block_count(sbi, start, &sit); 1665 check_block_count(sbi, start, &sit);
1512 seg_info_from_raw_sit(se, &sit); 1666 seg_info_from_raw_sit(se, &sit);
1513 if (sbi->segs_per_sec > 1) { 1667 if (sbi->segs_per_sec > 1) {
1514 struct sec_entry *e = get_sec_entry(sbi, start); 1668 struct sec_entry *e = get_sec_entry(sbi, start);
1515 e->valid_blocks += se->valid_blocks; 1669 e->valid_blocks += se->valid_blocks;
1670 }
1516 } 1671 }
1517 } 1672 start_blk += readed;
1673 } while (start_blk < sit_blk_cnt);
1518} 1674}
1519 1675
1520static void init_free_segmap(struct f2fs_sb_info *sbi) 1676static void init_free_segmap(struct f2fs_sb_info *sbi)
@@ -1644,6 +1800,12 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
1644 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); 1800 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
1645 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); 1801 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
1646 sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS; 1802 sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS;
1803 sm_info->ipu_policy = F2FS_IPU_DISABLE;
1804 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
1805
1806 INIT_LIST_HEAD(&sm_info->discard_list);
1807 sm_info->nr_discards = 0;
1808 sm_info->max_discards = 0;
1647 1809
1648 err = build_sit_info(sbi); 1810 err = build_sit_info(sbi);
1649 if (err) 1811 if (err)
@@ -1760,3 +1922,17 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi)
1760 sbi->sm_info = NULL; 1922 sbi->sm_info = NULL;
1761 kfree(sm_info); 1923 kfree(sm_info);
1762} 1924}
1925
1926int __init create_segment_manager_caches(void)
1927{
1928 discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
1929 sizeof(struct discard_entry), NULL);
1930 if (!discard_entry_slab)
1931 return -ENOMEM;
1932 return 0;
1933}
1934
1935void destroy_segment_manager_caches(void)
1936{
1937 kmem_cache_destroy(discard_entry_slab);
1938}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 269f690b4e24..5731682d7516 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -20,13 +20,8 @@
20#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) 20#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
21#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) 21#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno)
22 22
23#define IS_DATASEG(t) \ 23#define IS_DATASEG(t) (t <= CURSEG_COLD_DATA)
24 ((t == CURSEG_HOT_DATA) || (t == CURSEG_COLD_DATA) || \ 24#define IS_NODESEG(t) (t >= CURSEG_HOT_NODE)
25 (t == CURSEG_WARM_DATA))
26
27#define IS_NODESEG(t) \
28 ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \
29 (t == CURSEG_WARM_NODE))
30 25
31#define IS_CURSEG(sbi, seg) \ 26#define IS_CURSEG(sbi, seg) \
32 ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ 27 ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \
@@ -83,25 +78,20 @@
83 (segno / SIT_ENTRY_PER_BLOCK) 78 (segno / SIT_ENTRY_PER_BLOCK)
84#define START_SEGNO(sit_i, segno) \ 79#define START_SEGNO(sit_i, segno) \
85 (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) 80 (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK)
81#define SIT_BLK_CNT(sbi) \
82 ((TOTAL_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK)
86#define f2fs_bitmap_size(nr) \ 83#define f2fs_bitmap_size(nr) \
87 (BITS_TO_LONGS(nr) * sizeof(unsigned long)) 84 (BITS_TO_LONGS(nr) * sizeof(unsigned long))
88#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) 85#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments)
89#define TOTAL_SECS(sbi) (sbi->total_sections) 86#define TOTAL_SECS(sbi) (sbi->total_sections)
90 87
91#define SECTOR_FROM_BLOCK(sbi, blk_addr) \ 88#define SECTOR_FROM_BLOCK(sbi, blk_addr) \
92 (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) 89 (((sector_t)blk_addr) << (sbi)->log_sectors_per_block)
93#define SECTOR_TO_BLOCK(sbi, sectors) \ 90#define SECTOR_TO_BLOCK(sbi, sectors) \
94 (sectors >> ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE)) 91 (sectors >> (sbi)->log_sectors_per_block)
95#define MAX_BIO_BLOCKS(max_hw_blocks) \ 92#define MAX_BIO_BLOCKS(max_hw_blocks) \
96 (min((int)max_hw_blocks, BIO_MAX_PAGES)) 93 (min((int)max_hw_blocks, BIO_MAX_PAGES))
97 94
98/* during checkpoint, bio_private is used to synchronize the last bio */
99struct bio_private {
100 struct f2fs_sb_info *sbi;
101 bool is_sync;
102 void *wait;
103};
104
105/* 95/*
106 * indicate a block allocation direction: RIGHT and LEFT. 96 * indicate a block allocation direction: RIGHT and LEFT.
107 * RIGHT means allocating new sections towards the end of volume. 97 * RIGHT means allocating new sections towards the end of volume.
@@ -458,8 +448,8 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi)
458 448
459static inline bool need_SSR(struct f2fs_sb_info *sbi) 449static inline bool need_SSR(struct f2fs_sb_info *sbi)
460{ 450{
461 return ((prefree_segments(sbi) / sbi->segs_per_sec) 451 return (prefree_segments(sbi) / sbi->segs_per_sec)
462 + free_sections(sbi) < overprovision_sections(sbi)); 452 + free_sections(sbi) < overprovision_sections(sbi);
463} 453}
464 454
465static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) 455static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
@@ -467,38 +457,71 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
467 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); 457 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
468 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); 458 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
469 459
470 if (sbi->por_doing) 460 if (unlikely(sbi->por_doing))
471 return false; 461 return false;
472 462
473 return ((free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + 463 return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs +
474 reserved_sections(sbi))); 464 reserved_sections(sbi));
475} 465}
476 466
477static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi) 467static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
478{ 468{
479 return (prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments); 469 return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments;
480} 470}
481 471
482static inline int utilization(struct f2fs_sb_info *sbi) 472static inline int utilization(struct f2fs_sb_info *sbi)
483{ 473{
484 return div_u64((u64)valid_user_blocks(sbi) * 100, sbi->user_block_count); 474 return div_u64((u64)valid_user_blocks(sbi) * 100,
475 sbi->user_block_count);
485} 476}
486 477
487/* 478/*
488 * Sometimes f2fs may be better to drop out-of-place update policy. 479 * Sometimes f2fs may be better to drop out-of-place update policy.
489 * So, if fs utilization is over MIN_IPU_UTIL, then f2fs tries to write 480 * And, users can control the policy through sysfs entries.
490 * data in the original place likewise other traditional file systems. 481 * There are five policies with triggering conditions as follows.
491 * But, currently set 100 in percentage, which means it is disabled. 482 * F2FS_IPU_FORCE - all the time,
492 * See below need_inplace_update(). 483 * F2FS_IPU_SSR - if SSR mode is activated,
484 * F2FS_IPU_UTIL - if FS utilization is over threashold,
485 * F2FS_IPU_SSR_UTIL - if SSR mode is activated and FS utilization is over
486 * threashold,
487 * F2FS_IPUT_DISABLE - disable IPU. (=default option)
493 */ 488 */
494#define MIN_IPU_UTIL 100 489#define DEF_MIN_IPU_UTIL 70
490
491enum {
492 F2FS_IPU_FORCE,
493 F2FS_IPU_SSR,
494 F2FS_IPU_UTIL,
495 F2FS_IPU_SSR_UTIL,
496 F2FS_IPU_DISABLE,
497};
498
495static inline bool need_inplace_update(struct inode *inode) 499static inline bool need_inplace_update(struct inode *inode)
496{ 500{
497 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 501 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
502
503 /* IPU can be done only for the user data */
498 if (S_ISDIR(inode->i_mode)) 504 if (S_ISDIR(inode->i_mode))
499 return false; 505 return false;
500 if (need_SSR(sbi) && utilization(sbi) > MIN_IPU_UTIL) 506
507 switch (SM_I(sbi)->ipu_policy) {
508 case F2FS_IPU_FORCE:
501 return true; 509 return true;
510 case F2FS_IPU_SSR:
511 if (need_SSR(sbi))
512 return true;
513 break;
514 case F2FS_IPU_UTIL:
515 if (utilization(sbi) > SM_I(sbi)->min_ipu_util)
516 return true;
517 break;
518 case F2FS_IPU_SSR_UTIL:
519 if (need_SSR(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util)
520 return true;
521 break;
522 case F2FS_IPU_DISABLE:
523 break;
524 }
502 return false; 525 return false;
503} 526}
504 527
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index bafff72de8e8..1a85f83abd53 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -50,6 +50,7 @@ enum {
50 Opt_active_logs, 50 Opt_active_logs,
51 Opt_disable_ext_identify, 51 Opt_disable_ext_identify,
52 Opt_inline_xattr, 52 Opt_inline_xattr,
53 Opt_inline_data,
53 Opt_err, 54 Opt_err,
54}; 55};
55 56
@@ -65,6 +66,7 @@ static match_table_t f2fs_tokens = {
65 {Opt_active_logs, "active_logs=%u"}, 66 {Opt_active_logs, "active_logs=%u"},
66 {Opt_disable_ext_identify, "disable_ext_identify"}, 67 {Opt_disable_ext_identify, "disable_ext_identify"},
67 {Opt_inline_xattr, "inline_xattr"}, 68 {Opt_inline_xattr, "inline_xattr"},
69 {Opt_inline_data, "inline_data"},
68 {Opt_err, NULL}, 70 {Opt_err, NULL},
69}; 71};
70 72
@@ -72,6 +74,7 @@ static match_table_t f2fs_tokens = {
72enum { 74enum {
73 GC_THREAD, /* struct f2fs_gc_thread */ 75 GC_THREAD, /* struct f2fs_gc_thread */
74 SM_INFO, /* struct f2fs_sm_info */ 76 SM_INFO, /* struct f2fs_sm_info */
77 F2FS_SBI, /* struct f2fs_sb_info */
75}; 78};
76 79
77struct f2fs_attr { 80struct f2fs_attr {
@@ -89,6 +92,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
89 return (unsigned char *)sbi->gc_thread; 92 return (unsigned char *)sbi->gc_thread;
90 else if (struct_type == SM_INFO) 93 else if (struct_type == SM_INFO)
91 return (unsigned char *)SM_I(sbi); 94 return (unsigned char *)SM_I(sbi);
95 else if (struct_type == F2FS_SBI)
96 return (unsigned char *)sbi;
92 return NULL; 97 return NULL;
93} 98}
94 99
@@ -175,6 +180,10 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
175F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); 180F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
176F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); 181F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
177F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); 182F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
183F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
184F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
185F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
186F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
178 187
179#define ATTR_LIST(name) (&f2fs_attr_##name.attr) 188#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
180static struct attribute *f2fs_attrs[] = { 189static struct attribute *f2fs_attrs[] = {
@@ -183,6 +192,10 @@ static struct attribute *f2fs_attrs[] = {
183 ATTR_LIST(gc_no_gc_sleep_time), 192 ATTR_LIST(gc_no_gc_sleep_time),
184 ATTR_LIST(gc_idle), 193 ATTR_LIST(gc_idle),
185 ATTR_LIST(reclaim_segments), 194 ATTR_LIST(reclaim_segments),
195 ATTR_LIST(max_small_discards),
196 ATTR_LIST(ipu_policy),
197 ATTR_LIST(min_ipu_util),
198 ATTR_LIST(max_victim_search),
186 NULL, 199 NULL,
187}; 200};
188 201
@@ -311,6 +324,9 @@ static int parse_options(struct super_block *sb, char *options)
311 case Opt_disable_ext_identify: 324 case Opt_disable_ext_identify:
312 set_opt(sbi, DISABLE_EXT_IDENTIFY); 325 set_opt(sbi, DISABLE_EXT_IDENTIFY);
313 break; 326 break;
327 case Opt_inline_data:
328 set_opt(sbi, INLINE_DATA);
329 break;
314 default: 330 default:
315 f2fs_msg(sb, KERN_ERR, 331 f2fs_msg(sb, KERN_ERR,
316 "Unrecognized mount option \"%s\" or missing value", 332 "Unrecognized mount option \"%s\" or missing value",
@@ -325,7 +341,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
325{ 341{
326 struct f2fs_inode_info *fi; 342 struct f2fs_inode_info *fi;
327 343
328 fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_NOFS | __GFP_ZERO); 344 fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_F2FS_ZERO);
329 if (!fi) 345 if (!fi)
330 return NULL; 346 return NULL;
331 347
@@ -508,7 +524,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
508#endif 524#endif
509 if (test_opt(sbi, DISABLE_EXT_IDENTIFY)) 525 if (test_opt(sbi, DISABLE_EXT_IDENTIFY))
510 seq_puts(seq, ",disable_ext_identify"); 526 seq_puts(seq, ",disable_ext_identify");
511 527 if (test_opt(sbi, INLINE_DATA))
528 seq_puts(seq, ",inline_data");
512 seq_printf(seq, ",active_logs=%u", sbi->active_logs); 529 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
513 530
514 return 0; 531 return 0;
@@ -518,7 +535,8 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
518{ 535{
519 struct super_block *sb = seq->private; 536 struct super_block *sb = seq->private;
520 struct f2fs_sb_info *sbi = F2FS_SB(sb); 537 struct f2fs_sb_info *sbi = F2FS_SB(sb);
521 unsigned int total_segs = le32_to_cpu(sbi->raw_super->segment_count_main); 538 unsigned int total_segs =
539 le32_to_cpu(sbi->raw_super->segment_count_main);
522 int i; 540 int i;
523 541
524 for (i = 0; i < total_segs; i++) { 542 for (i = 0; i < total_segs; i++) {
@@ -618,7 +636,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
618 struct f2fs_sb_info *sbi = F2FS_SB(sb); 636 struct f2fs_sb_info *sbi = F2FS_SB(sb);
619 struct inode *inode; 637 struct inode *inode;
620 638
621 if (ino < F2FS_ROOT_INO(sbi)) 639 if (unlikely(ino < F2FS_ROOT_INO(sbi)))
622 return ERR_PTR(-ESTALE); 640 return ERR_PTR(-ESTALE);
623 641
624 /* 642 /*
@@ -629,7 +647,7 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
629 inode = f2fs_iget(sb, ino); 647 inode = f2fs_iget(sb, ino);
630 if (IS_ERR(inode)) 648 if (IS_ERR(inode))
631 return ERR_CAST(inode); 649 return ERR_CAST(inode);
632 if (generation && inode->i_generation != generation) { 650 if (unlikely(generation && inode->i_generation != generation)) {
633 /* we didn't find the right inode.. */ 651 /* we didn't find the right inode.. */
634 iput(inode); 652 iput(inode);
635 return ERR_PTR(-ESTALE); 653 return ERR_PTR(-ESTALE);
@@ -732,10 +750,10 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi)
732 fsmeta += le32_to_cpu(ckpt->rsvd_segment_count); 750 fsmeta += le32_to_cpu(ckpt->rsvd_segment_count);
733 fsmeta += le32_to_cpu(raw_super->segment_count_ssa); 751 fsmeta += le32_to_cpu(raw_super->segment_count_ssa);
734 752
735 if (fsmeta >= total) 753 if (unlikely(fsmeta >= total))
736 return 1; 754 return 1;
737 755
738 if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { 756 if (unlikely(is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
739 f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); 757 f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
740 return 1; 758 return 1;
741 } 759 }
@@ -763,6 +781,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
763 sbi->node_ino_num = le32_to_cpu(raw_super->node_ino); 781 sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
764 sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino); 782 sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
765 sbi->cur_victim_sec = NULL_SECNO; 783 sbi->cur_victim_sec = NULL_SECNO;
784 sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
766 785
767 for (i = 0; i < NR_COUNT_TYPE; i++) 786 for (i = 0; i < NR_COUNT_TYPE; i++)
768 atomic_set(&sbi->nr_pages[i], 0); 787 atomic_set(&sbi->nr_pages[i], 0);
@@ -798,9 +817,10 @@ retry:
798 /* sanity checking of raw super */ 817 /* sanity checking of raw super */
799 if (sanity_check_raw_super(sb, *raw_super)) { 818 if (sanity_check_raw_super(sb, *raw_super)) {
800 brelse(*raw_super_buf); 819 brelse(*raw_super_buf);
801 f2fs_msg(sb, KERN_ERR, "Can't find a valid F2FS filesystem " 820 f2fs_msg(sb, KERN_ERR,
802 "in %dth superblock", block + 1); 821 "Can't find valid F2FS filesystem in %dth superblock",
803 if(block == 0) { 822 block + 1);
823 if (block == 0) {
804 block++; 824 block++;
805 goto retry; 825 goto retry;
806 } else { 826 } else {
@@ -818,6 +838,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
818 struct buffer_head *raw_super_buf; 838 struct buffer_head *raw_super_buf;
819 struct inode *root; 839 struct inode *root;
820 long err = -EINVAL; 840 long err = -EINVAL;
841 int i;
821 842
822 /* allocate memory for f2fs-specific super block info */ 843 /* allocate memory for f2fs-specific super block info */
823 sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); 844 sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
@@ -825,7 +846,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
825 return -ENOMEM; 846 return -ENOMEM;
826 847
827 /* set a block size */ 848 /* set a block size */
828 if (!sb_set_blocksize(sb, F2FS_BLKSIZE)) { 849 if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) {
829 f2fs_msg(sb, KERN_ERR, "unable to set blocksize"); 850 f2fs_msg(sb, KERN_ERR, "unable to set blocksize");
830 goto free_sbi; 851 goto free_sbi;
831 } 852 }
@@ -874,7 +895,16 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
874 mutex_init(&sbi->node_write); 895 mutex_init(&sbi->node_write);
875 sbi->por_doing = false; 896 sbi->por_doing = false;
876 spin_lock_init(&sbi->stat_lock); 897 spin_lock_init(&sbi->stat_lock);
877 init_rwsem(&sbi->bio_sem); 898
899 mutex_init(&sbi->read_io.io_mutex);
900 sbi->read_io.sbi = sbi;
901 sbi->read_io.bio = NULL;
902 for (i = 0; i < NR_PAGE_TYPE; i++) {
903 mutex_init(&sbi->write_io[i].io_mutex);
904 sbi->write_io[i].sbi = sbi;
905 sbi->write_io[i].bio = NULL;
906 }
907
878 init_rwsem(&sbi->cp_rwsem); 908 init_rwsem(&sbi->cp_rwsem);
879 init_waitqueue_head(&sbi->cp_wait); 909 init_waitqueue_head(&sbi->cp_wait);
880 init_sb_info(sbi); 910 init_sb_info(sbi);
@@ -939,9 +969,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
939 } 969 }
940 970
941 /* if there are nt orphan nodes free them */ 971 /* if there are nt orphan nodes free them */
942 err = -EINVAL; 972 recover_orphan_inodes(sbi);
943 if (recover_orphan_inodes(sbi))
944 goto free_node_inode;
945 973
946 /* read root inode and dentry */ 974 /* read root inode and dentry */
947 root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); 975 root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
@@ -950,8 +978,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
950 err = PTR_ERR(root); 978 err = PTR_ERR(root);
951 goto free_node_inode; 979 goto free_node_inode;
952 } 980 }
953 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) 981 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
982 err = -EINVAL;
954 goto free_root_inode; 983 goto free_root_inode;
984 }
955 985
956 sb->s_root = d_make_root(root); /* allocate root dentry */ 986 sb->s_root = d_make_root(root); /* allocate root dentry */
957 if (!sb->s_root) { 987 if (!sb->s_root) {
@@ -1053,7 +1083,7 @@ static int __init init_inodecache(void)
1053{ 1083{
1054 f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", 1084 f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache",
1055 sizeof(struct f2fs_inode_info), NULL); 1085 sizeof(struct f2fs_inode_info), NULL);
1056 if (f2fs_inode_cachep == NULL) 1086 if (!f2fs_inode_cachep)
1057 return -ENOMEM; 1087 return -ENOMEM;
1058 return 0; 1088 return 0;
1059} 1089}
@@ -1078,9 +1108,12 @@ static int __init init_f2fs_fs(void)
1078 err = create_node_manager_caches(); 1108 err = create_node_manager_caches();
1079 if (err) 1109 if (err)
1080 goto free_inodecache; 1110 goto free_inodecache;
1081 err = create_gc_caches(); 1111 err = create_segment_manager_caches();
1082 if (err) 1112 if (err)
1083 goto free_node_manager_caches; 1113 goto free_node_manager_caches;
1114 err = create_gc_caches();
1115 if (err)
1116 goto free_segment_manager_caches;
1084 err = create_checkpoint_caches(); 1117 err = create_checkpoint_caches();
1085 if (err) 1118 if (err)
1086 goto free_gc_caches; 1119 goto free_gc_caches;
@@ -1102,6 +1135,8 @@ free_checkpoint_caches:
1102 destroy_checkpoint_caches(); 1135 destroy_checkpoint_caches();
1103free_gc_caches: 1136free_gc_caches:
1104 destroy_gc_caches(); 1137 destroy_gc_caches();
1138free_segment_manager_caches:
1139 destroy_segment_manager_caches();
1105free_node_manager_caches: 1140free_node_manager_caches:
1106 destroy_node_manager_caches(); 1141 destroy_node_manager_caches();
1107free_inodecache: 1142free_inodecache:
@@ -1117,6 +1152,7 @@ static void __exit exit_f2fs_fs(void)
1117 unregister_filesystem(&f2fs_fs_type); 1152 unregister_filesystem(&f2fs_fs_type);
1118 destroy_checkpoint_caches(); 1153 destroy_checkpoint_caches();
1119 destroy_gc_caches(); 1154 destroy_gc_caches();
1155 destroy_segment_manager_caches();
1120 destroy_node_manager_caches(); 1156 destroy_node_manager_caches();
1121 destroy_inodecache(); 1157 destroy_inodecache();
1122 kset_unregister(f2fs_kset); 1158 kset_unregister(f2fs_kset);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index aa7a3f139fe5..b0fb8a27f3da 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -522,7 +522,7 @@ static int __f2fs_setxattr(struct inode *inode, int name_index,
522 if (found) 522 if (found)
523 free = free + ENTRY_SIZE(here); 523 free = free + ENTRY_SIZE(here);
524 524
525 if (free < newsize) { 525 if (unlikely(free < newsize)) {
526 error = -ENOSPC; 526 error = -ENOSPC;
527 goto exit; 527 goto exit;
528 } 528 }
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index bb942f6d5702..da74d878dc4f 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -153,6 +153,13 @@ struct f2fs_extent {
153#define NODE_DIND_BLOCK (DEF_ADDRS_PER_INODE + 5) 153#define NODE_DIND_BLOCK (DEF_ADDRS_PER_INODE + 5)
154 154
155#define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */ 155#define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */
156#define F2FS_INLINE_DATA 0x02 /* file inline data flag */
157
158#define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \
159 F2FS_INLINE_XATTR_ADDRS - 1))
160
161#define INLINE_DATA_OFFSET (PAGE_CACHE_SIZE - sizeof(struct node_footer) \
162 - sizeof(__le32) * (DEF_ADDRS_PER_INODE + 5 - 1))
156 163
157struct f2fs_inode { 164struct f2fs_inode {
158 __le16 i_mode; /* file mode */ 165 __le16 i_mode; /* file mode */
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index e0dc355fa317..3b9f28dfc849 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -16,15 +16,28 @@
16 { META, "META" }, \ 16 { META, "META" }, \
17 { META_FLUSH, "META_FLUSH" }) 17 { META_FLUSH, "META_FLUSH" })
18 18
19#define show_bio_type(type) \ 19#define F2FS_BIO_MASK(t) (t & (READA | WRITE_FLUSH_FUA))
20 __print_symbolic(type, \ 20#define F2FS_BIO_EXTRA_MASK(t) (t & (REQ_META | REQ_PRIO))
21 { READ, "READ" }, \ 21
22 { READA, "READAHEAD" }, \ 22#define show_bio_type(type) show_bio_base(type), show_bio_extra(type)
23 { READ_SYNC, "READ_SYNC" }, \ 23
24 { WRITE, "WRITE" }, \ 24#define show_bio_base(type) \
25 { WRITE_SYNC, "WRITE_SYNC" }, \ 25 __print_symbolic(F2FS_BIO_MASK(type), \
26 { WRITE_FLUSH, "WRITE_FLUSH" }, \ 26 { READ, "READ" }, \
27 { WRITE_FUA, "WRITE_FUA" }) 27 { READA, "READAHEAD" }, \
28 { READ_SYNC, "READ_SYNC" }, \
29 { WRITE, "WRITE" }, \
30 { WRITE_SYNC, "WRITE_SYNC" }, \
31 { WRITE_FLUSH, "WRITE_FLUSH" }, \
32 { WRITE_FUA, "WRITE_FUA" }, \
33 { WRITE_FLUSH_FUA, "WRITE_FLUSH_FUA" })
34
35#define show_bio_extra(type) \
36 __print_symbolic(F2FS_BIO_EXTRA_MASK(type), \
37 { REQ_META, "(M)" }, \
38 { REQ_PRIO, "(P)" }, \
39 { REQ_META | REQ_PRIO, "(MP)" }, \
40 { 0, " \b" })
28 41
29#define show_data_type(type) \ 42#define show_data_type(type) \
30 __print_symbolic(type, \ 43 __print_symbolic(type, \
@@ -421,7 +434,7 @@ TRACE_EVENT(f2fs_truncate_partial_nodes,
421 __entry->err) 434 __entry->err)
422); 435);
423 436
424TRACE_EVENT_CONDITION(f2fs_readpage, 437TRACE_EVENT_CONDITION(f2fs_submit_page_bio,
425 438
426 TP_PROTO(struct page *page, sector_t blkaddr, int type), 439 TP_PROTO(struct page *page, sector_t blkaddr, int type),
427 440
@@ -446,7 +459,7 @@ TRACE_EVENT_CONDITION(f2fs_readpage,
446 ), 459 ),
447 460
448 TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " 461 TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, "
449 "blkaddr = 0x%llx, bio_type = %s", 462 "blkaddr = 0x%llx, bio_type = %s%s",
450 show_dev_ino(__entry), 463 show_dev_ino(__entry),
451 (unsigned long)__entry->index, 464 (unsigned long)__entry->index,
452 (unsigned long long)__entry->blkaddr, 465 (unsigned long long)__entry->blkaddr,
@@ -598,36 +611,54 @@ TRACE_EVENT(f2fs_reserve_new_block,
598 __entry->ofs_in_node) 611 __entry->ofs_in_node)
599); 612);
600 613
601TRACE_EVENT(f2fs_do_submit_bio, 614DECLARE_EVENT_CLASS(f2fs__submit_bio,
602 615
603 TP_PROTO(struct super_block *sb, int btype, bool sync, struct bio *bio), 616 TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio),
604 617
605 TP_ARGS(sb, btype, sync, bio), 618 TP_ARGS(sb, rw, type, bio),
606 619
607 TP_STRUCT__entry( 620 TP_STRUCT__entry(
608 __field(dev_t, dev) 621 __field(dev_t, dev)
609 __field(int, btype) 622 __field(int, rw)
610 __field(bool, sync) 623 __field(int, type)
611 __field(sector_t, sector) 624 __field(sector_t, sector)
612 __field(unsigned int, size) 625 __field(unsigned int, size)
613 ), 626 ),
614 627
615 TP_fast_assign( 628 TP_fast_assign(
616 __entry->dev = sb->s_dev; 629 __entry->dev = sb->s_dev;
617 __entry->btype = btype; 630 __entry->rw = rw;
618 __entry->sync = sync; 631 __entry->type = type;
619 __entry->sector = bio->bi_sector; 632 __entry->sector = bio->bi_sector;
620 __entry->size = bio->bi_size; 633 __entry->size = bio->bi_size;
621 ), 634 ),
622 635
623 TP_printk("dev = (%d,%d), type = %s, io = %s, sector = %lld, size = %u", 636 TP_printk("dev = (%d,%d), %s%s, %s, sector = %lld, size = %u",
624 show_dev(__entry), 637 show_dev(__entry),
625 show_block_type(__entry->btype), 638 show_bio_type(__entry->rw),
626 __entry->sync ? "sync" : "no sync", 639 show_block_type(__entry->type),
627 (unsigned long long)__entry->sector, 640 (unsigned long long)__entry->sector,
628 __entry->size) 641 __entry->size)
629); 642);
630 643
644DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_write_bio,
645
646 TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio),
647
648 TP_ARGS(sb, rw, type, bio),
649
650 TP_CONDITION(bio)
651);
652
653DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio,
654
655 TP_PROTO(struct super_block *sb, int rw, int type, struct bio *bio),
656
657 TP_ARGS(sb, rw, type, bio),
658
659 TP_CONDITION(bio)
660);
661
631DECLARE_EVENT_CLASS(f2fs__page, 662DECLARE_EVENT_CLASS(f2fs__page,
632 663
633 TP_PROTO(struct page *page, int type), 664 TP_PROTO(struct page *page, int type),
@@ -674,15 +705,16 @@ DEFINE_EVENT(f2fs__page, f2fs_vm_page_mkwrite,
674 TP_ARGS(page, type) 705 TP_ARGS(page, type)
675); 706);
676 707
677TRACE_EVENT(f2fs_submit_write_page, 708TRACE_EVENT(f2fs_submit_page_mbio,
678 709
679 TP_PROTO(struct page *page, block_t blk_addr, int type), 710 TP_PROTO(struct page *page, int rw, int type, block_t blk_addr),
680 711
681 TP_ARGS(page, blk_addr, type), 712 TP_ARGS(page, rw, type, blk_addr),
682 713
683 TP_STRUCT__entry( 714 TP_STRUCT__entry(
684 __field(dev_t, dev) 715 __field(dev_t, dev)
685 __field(ino_t, ino) 716 __field(ino_t, ino)
717 __field(int, rw)
686 __field(int, type) 718 __field(int, type)
687 __field(pgoff_t, index) 719 __field(pgoff_t, index)
688 __field(block_t, block) 720 __field(block_t, block)
@@ -691,13 +723,15 @@ TRACE_EVENT(f2fs_submit_write_page,
691 TP_fast_assign( 723 TP_fast_assign(
692 __entry->dev = page->mapping->host->i_sb->s_dev; 724 __entry->dev = page->mapping->host->i_sb->s_dev;
693 __entry->ino = page->mapping->host->i_ino; 725 __entry->ino = page->mapping->host->i_ino;
726 __entry->rw = rw;
694 __entry->type = type; 727 __entry->type = type;
695 __entry->index = page->index; 728 __entry->index = page->index;
696 __entry->block = blk_addr; 729 __entry->block = blk_addr;
697 ), 730 ),
698 731
699 TP_printk("dev = (%d,%d), ino = %lu, %s, index = %lu, blkaddr = 0x%llx", 732 TP_printk("dev = (%d,%d), ino = %lu, %s%s, %s, index = %lu, blkaddr = 0x%llx",
700 show_dev_ino(__entry), 733 show_dev_ino(__entry),
734 show_bio_type(__entry->rw),
701 show_block_type(__entry->type), 735 show_block_type(__entry->type),
702 (unsigned long)__entry->index, 736 (unsigned long)__entry->index,
703 (unsigned long long)__entry->block) 737 (unsigned long long)__entry->block)
@@ -727,6 +761,29 @@ TRACE_EVENT(f2fs_write_checkpoint,
727 __entry->msg) 761 __entry->msg)
728); 762);
729 763
764TRACE_EVENT(f2fs_issue_discard,
765
766 TP_PROTO(struct super_block *sb, block_t blkstart, block_t blklen),
767
768 TP_ARGS(sb, blkstart, blklen),
769
770 TP_STRUCT__entry(
771 __field(dev_t, dev)
772 __field(block_t, blkstart)
773 __field(block_t, blklen)
774 ),
775
776 TP_fast_assign(
777 __entry->dev = sb->s_dev;
778 __entry->blkstart = blkstart;
779 __entry->blklen = blklen;
780 ),
781
782 TP_printk("dev = (%d,%d), blkstart = 0x%llx, blklen = 0x%llx",
783 show_dev(__entry),
784 (unsigned long long)__entry->blkstart,
785 (unsigned long long)__entry->blklen)
786);
730#endif /* _TRACE_F2FS_H */ 787#endif /* _TRACE_F2FS_H */
731 788
732 /* This part must be outside protection */ 789 /* This part must be outside protection */