aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-18 11:17:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-18 11:17:20 -0400
commit06a60deca87dba8e2c186ea7f12ea87d6785188e (patch)
tree2a6c8de6a7b110d13a1c1e3fc07cdc9065dfd749 /fs
parentd6a24d0640d609138a4e40a4ce9fd9fe7859e24c (diff)
parent10027551ccf5459cc771c31ac8bc8e5cc8db45f8 (diff)
Merge tag 'for-f2fs-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "New features: - in-memory extent_cache - fs_shutdown to test power-off-recovery - use inline_data to store symlink path - show f2fs as a non-misc filesystem Major fixes: - avoid CPU stalls on sync_dirty_dir_inodes - fix some power-off-recovery procedure - fix handling of broken symlink correctly - fix missing dot and dotdot made by sudden power cuts - handle wrong data index during roll-forward recovery - preallocate data blocks for direct_io ... and a bunch of minor bug fixes and cleanups" * tag 'for-f2fs-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (71 commits) f2fs: pass checkpoint reason on roll-forward recovery f2fs: avoid abnormal behavior on broken symlink f2fs: flush symlink path to avoid broken symlink after POR f2fs: change 0 to false for bool type f2fs: do not recover wrong data index f2fs: do not increase link count during recovery f2fs: assign parent's i_mode for empty dir f2fs: add F2FS_INLINE_DOTS to recover missing dot dentries f2fs: fix mismatching lock and unlock pages for roll-forward recovery f2fs: fix sparse warnings f2fs: limit b_size of mapped bh in f2fs_map_bh f2fs: persist system.advise into on-disk inode f2fs: avoid NULL pointer dereference in f2fs_xattr_advise_get f2fs: preallocate fallocated blocks for direct IO f2fs: enable inline data by default f2fs: preserve extent info for extent cache f2fs: initialize extent tree with on-disk extent info of inode f2fs: introduce __{find,grab}_extent_tree f2fs: split set_data_blkaddr from f2fs_update_extent_cache f2fs: enable fast symlink by utilizing inline data ...
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/f2fs/Kconfig2
-rw-r--r--fs/f2fs/acl.c14
-rw-r--r--fs/f2fs/checkpoint.c38
-rw-r--r--fs/f2fs/data.c742
-rw-r--r--fs/f2fs/debug.c22
-rw-r--r--fs/f2fs/dir.c93
-rw-r--r--fs/f2fs/f2fs.h174
-rw-r--r--fs/f2fs/file.c64
-rw-r--r--fs/f2fs/gc.c6
-rw-r--r--fs/f2fs/inline.c69
-rw-r--r--fs/f2fs/inode.c25
-rw-r--r--fs/f2fs/namei.c81
-rw-r--r--fs/f2fs/node.c18
-rw-r--r--fs/f2fs/node.h1
-rw-r--r--fs/f2fs/recovery.c76
-rw-r--r--fs/f2fs/segment.c17
-rw-r--r--fs/f2fs/segment.h3
-rw-r--r--fs/f2fs/super.c40
-rw-r--r--fs/f2fs/xattr.c4
20 files changed, 1230 insertions, 261 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index ec35851e5b71..011f43365d7b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -32,6 +32,7 @@ source "fs/gfs2/Kconfig"
32source "fs/ocfs2/Kconfig" 32source "fs/ocfs2/Kconfig"
33source "fs/btrfs/Kconfig" 33source "fs/btrfs/Kconfig"
34source "fs/nilfs2/Kconfig" 34source "fs/nilfs2/Kconfig"
35source "fs/f2fs/Kconfig"
35 36
36config FS_DAX 37config FS_DAX
37 bool "Direct Access (DAX) support" 38 bool "Direct Access (DAX) support"
@@ -217,7 +218,6 @@ source "fs/pstore/Kconfig"
217source "fs/sysv/Kconfig" 218source "fs/sysv/Kconfig"
218source "fs/ufs/Kconfig" 219source "fs/ufs/Kconfig"
219source "fs/exofs/Kconfig" 220source "fs/exofs/Kconfig"
220source "fs/f2fs/Kconfig"
221 221
222endif # MISC_FILESYSTEMS 222endif # MISC_FILESYSTEMS
223 223
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 94e2d2ffabe1..05f0f663f14c 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -1,5 +1,5 @@
1config F2FS_FS 1config F2FS_FS
2 tristate "F2FS filesystem support (EXPERIMENTAL)" 2 tristate "F2FS filesystem support"
3 depends on BLOCK 3 depends on BLOCK
4 help 4 help
5 F2FS is based on Log-structured File System (LFS), which supports 5 F2FS is based on Log-structured File System (LFS), which supports
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 742202779bd5..4320ffab3495 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -351,13 +351,11 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode,
351 351
352 *acl = f2fs_acl_clone(p, GFP_NOFS); 352 *acl = f2fs_acl_clone(p, GFP_NOFS);
353 if (!*acl) 353 if (!*acl)
354 return -ENOMEM; 354 goto no_mem;
355 355
356 ret = f2fs_acl_create_masq(*acl, mode); 356 ret = f2fs_acl_create_masq(*acl, mode);
357 if (ret < 0) { 357 if (ret < 0)
358 posix_acl_release(*acl); 358 goto no_mem_clone;
359 return -ENOMEM;
360 }
361 359
362 if (ret == 0) { 360 if (ret == 0) {
363 posix_acl_release(*acl); 361 posix_acl_release(*acl);
@@ -378,6 +376,12 @@ no_acl:
378 *default_acl = NULL; 376 *default_acl = NULL;
379 *acl = NULL; 377 *acl = NULL;
380 return 0; 378 return 0;
379
380no_mem_clone:
381 posix_acl_release(*acl);
382no_mem:
383 posix_acl_release(p);
384 return -ENOMEM;
381} 385}
382 386
383int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, 387int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 7f794b72b3b7..a5e17a2a0781 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -276,7 +276,7 @@ continue_unlock:
276 if (!clear_page_dirty_for_io(page)) 276 if (!clear_page_dirty_for_io(page))
277 goto continue_unlock; 277 goto continue_unlock;
278 278
279 if (f2fs_write_meta_page(page, &wbc)) { 279 if (mapping->a_ops->writepage(page, &wbc)) {
280 unlock_page(page); 280 unlock_page(page);
281 break; 281 break;
282 } 282 }
@@ -464,20 +464,19 @@ static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
464 464
465void recover_orphan_inodes(struct f2fs_sb_info *sbi) 465void recover_orphan_inodes(struct f2fs_sb_info *sbi)
466{ 466{
467 block_t start_blk, orphan_blkaddr, i, j; 467 block_t start_blk, orphan_blocks, i, j;
468 468
469 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) 469 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
470 return; 470 return;
471 471
472 set_sbi_flag(sbi, SBI_POR_DOING); 472 set_sbi_flag(sbi, SBI_POR_DOING);
473 473
474 start_blk = __start_cp_addr(sbi) + 1 + 474 start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
475 le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); 475 orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
476 orphan_blkaddr = __start_sum_addr(sbi) - 1;
477 476
478 ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP); 477 ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP);
479 478
480 for (i = 0; i < orphan_blkaddr; i++) { 479 for (i = 0; i < orphan_blocks; i++) {
481 struct page *page = get_meta_page(sbi, start_blk + i); 480 struct page *page = get_meta_page(sbi, start_blk + i);
482 struct f2fs_orphan_block *orphan_blk; 481 struct f2fs_orphan_block *orphan_blk;
483 482
@@ -615,7 +614,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
615 unsigned long blk_size = sbi->blocksize; 614 unsigned long blk_size = sbi->blocksize;
616 unsigned long long cp1_version = 0, cp2_version = 0; 615 unsigned long long cp1_version = 0, cp2_version = 0;
617 unsigned long long cp_start_blk_no; 616 unsigned long long cp_start_blk_no;
618 unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); 617 unsigned int cp_blks = 1 + __cp_payload(sbi);
619 block_t cp_blk_no; 618 block_t cp_blk_no;
620 int i; 619 int i;
621 620
@@ -796,6 +795,7 @@ retry:
796 * wribacking dentry pages in the freeing inode. 795 * wribacking dentry pages in the freeing inode.
797 */ 796 */
798 f2fs_submit_merged_bio(sbi, DATA, WRITE); 797 f2fs_submit_merged_bio(sbi, DATA, WRITE);
798 cond_resched();
799 } 799 }
800 goto retry; 800 goto retry;
801} 801}
@@ -884,7 +884,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
884 __u32 crc32 = 0; 884 __u32 crc32 = 0;
885 void *kaddr; 885 void *kaddr;
886 int i; 886 int i;
887 int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); 887 int cp_payload_blks = __cp_payload(sbi);
888 888
889 /* 889 /*
890 * This avoids to conduct wrong roll-forward operations and uses 890 * This avoids to conduct wrong roll-forward operations and uses
@@ -1048,17 +1048,18 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1048 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1048 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1049 unsigned long long ckpt_ver; 1049 unsigned long long ckpt_ver;
1050 1050
1051 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
1052
1053 mutex_lock(&sbi->cp_mutex); 1051 mutex_lock(&sbi->cp_mutex);
1054 1052
1055 if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && 1053 if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
1056 cpc->reason != CP_DISCARD && cpc->reason != CP_UMOUNT) 1054 (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC))
1057 goto out; 1055 goto out;
1058 if (unlikely(f2fs_cp_error(sbi))) 1056 if (unlikely(f2fs_cp_error(sbi)))
1059 goto out; 1057 goto out;
1060 if (f2fs_readonly(sbi->sb)) 1058 if (f2fs_readonly(sbi->sb))
1061 goto out; 1059 goto out;
1060
1061 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
1062
1062 if (block_operations(sbi)) 1063 if (block_operations(sbi))
1063 goto out; 1064 goto out;
1064 1065
@@ -1085,6 +1086,10 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1085 1086
1086 unblock_operations(sbi); 1087 unblock_operations(sbi);
1087 stat_inc_cp_count(sbi->stat_info); 1088 stat_inc_cp_count(sbi->stat_info);
1089
1090 if (cpc->reason == CP_RECOVERY)
1091 f2fs_msg(sbi->sb, KERN_NOTICE,
1092 "checkpoint: version = %llx", ckpt_ver);
1088out: 1093out:
1089 mutex_unlock(&sbi->cp_mutex); 1094 mutex_unlock(&sbi->cp_mutex);
1090 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); 1095 trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
@@ -1103,14 +1108,9 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
1103 im->ino_num = 0; 1108 im->ino_num = 0;
1104 } 1109 }
1105 1110
1106 /*
1107 * considering 512 blocks in a segment 8 blocks are needed for cp
1108 * and log segment summaries. Remaining blocks are used to keep
1109 * orphan entries with the limitation one reserved segment
1110 * for cp pack we can have max 1020*504 orphan entries
1111 */
1112 sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - 1111 sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
1113 NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK; 1112 NR_CURSEG_TYPE - __cp_payload(sbi)) *
1113 F2FS_ORPHANS_PER_BLOCK;
1114} 1114}
1115 1115
1116int __init create_checkpoint_caches(void) 1116int __init create_checkpoint_caches(void)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 319eda511c4f..b91b0e10678e 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -25,6 +25,9 @@
25#include "trace.h" 25#include "trace.h"
26#include <trace/events/f2fs.h> 26#include <trace/events/f2fs.h>
27 27
28static struct kmem_cache *extent_tree_slab;
29static struct kmem_cache *extent_node_slab;
30
28static void f2fs_read_end_io(struct bio *bio, int err) 31static void f2fs_read_end_io(struct bio *bio, int err)
29{ 32{
30 struct bio_vec *bvec; 33 struct bio_vec *bvec;
@@ -197,7 +200,7 @@ alloc_new:
197 * ->node_page 200 * ->node_page
198 * update block addresses in the node page 201 * update block addresses in the node page
199 */ 202 */
200static void __set_data_blkaddr(struct dnode_of_data *dn) 203void set_data_blkaddr(struct dnode_of_data *dn)
201{ 204{
202 struct f2fs_node *rn; 205 struct f2fs_node *rn;
203 __le32 *addr_array; 206 __le32 *addr_array;
@@ -226,7 +229,7 @@ int reserve_new_block(struct dnode_of_data *dn)
226 trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); 229 trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
227 230
228 dn->data_blkaddr = NEW_ADDR; 231 dn->data_blkaddr = NEW_ADDR;
229 __set_data_blkaddr(dn); 232 set_data_blkaddr(dn);
230 mark_inode_dirty(dn->inode); 233 mark_inode_dirty(dn->inode);
231 sync_inode_page(dn); 234 sync_inode_page(dn);
232 return 0; 235 return 0;
@@ -248,73 +251,62 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
248 return err; 251 return err;
249} 252}
250 253
251static int check_extent_cache(struct inode *inode, pgoff_t pgofs, 254static void f2fs_map_bh(struct super_block *sb, pgoff_t pgofs,
252 struct buffer_head *bh_result) 255 struct extent_info *ei, struct buffer_head *bh_result)
256{
257 unsigned int blkbits = sb->s_blocksize_bits;
258 size_t max_size = bh_result->b_size;
259 size_t mapped_size;
260
261 clear_buffer_new(bh_result);
262 map_bh(bh_result, sb, ei->blk + pgofs - ei->fofs);
263 mapped_size = (ei->fofs + ei->len - pgofs) << blkbits;
264 bh_result->b_size = min(max_size, mapped_size);
265}
266
267static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs,
268 struct extent_info *ei)
253{ 269{
254 struct f2fs_inode_info *fi = F2FS_I(inode); 270 struct f2fs_inode_info *fi = F2FS_I(inode);
255 pgoff_t start_fofs, end_fofs; 271 pgoff_t start_fofs, end_fofs;
256 block_t start_blkaddr; 272 block_t start_blkaddr;
257 273
258 if (is_inode_flag_set(fi, FI_NO_EXTENT)) 274 read_lock(&fi->ext_lock);
259 return 0;
260
261 read_lock(&fi->ext.ext_lock);
262 if (fi->ext.len == 0) { 275 if (fi->ext.len == 0) {
263 read_unlock(&fi->ext.ext_lock); 276 read_unlock(&fi->ext_lock);
264 return 0; 277 return false;
265 } 278 }
266 279
267 stat_inc_total_hit(inode->i_sb); 280 stat_inc_total_hit(inode->i_sb);
268 281
269 start_fofs = fi->ext.fofs; 282 start_fofs = fi->ext.fofs;
270 end_fofs = fi->ext.fofs + fi->ext.len - 1; 283 end_fofs = fi->ext.fofs + fi->ext.len - 1;
271 start_blkaddr = fi->ext.blk_addr; 284 start_blkaddr = fi->ext.blk;
272 285
273 if (pgofs >= start_fofs && pgofs <= end_fofs) { 286 if (pgofs >= start_fofs && pgofs <= end_fofs) {
274 unsigned int blkbits = inode->i_sb->s_blocksize_bits; 287 *ei = fi->ext;
275 size_t count;
276
277 set_buffer_new(bh_result);
278 map_bh(bh_result, inode->i_sb,
279 start_blkaddr + pgofs - start_fofs);
280 count = end_fofs - pgofs + 1;
281 if (count < (UINT_MAX >> blkbits))
282 bh_result->b_size = (count << blkbits);
283 else
284 bh_result->b_size = UINT_MAX;
285
286 stat_inc_read_hit(inode->i_sb); 288 stat_inc_read_hit(inode->i_sb);
287 read_unlock(&fi->ext.ext_lock); 289 read_unlock(&fi->ext_lock);
288 return 1; 290 return true;
289 } 291 }
290 read_unlock(&fi->ext.ext_lock); 292 read_unlock(&fi->ext_lock);
291 return 0; 293 return false;
292} 294}
293 295
294void update_extent_cache(struct dnode_of_data *dn) 296static bool update_extent_info(struct inode *inode, pgoff_t fofs,
297 block_t blkaddr)
295{ 298{
296 struct f2fs_inode_info *fi = F2FS_I(dn->inode); 299 struct f2fs_inode_info *fi = F2FS_I(inode);
297 pgoff_t fofs, start_fofs, end_fofs; 300 pgoff_t start_fofs, end_fofs;
298 block_t start_blkaddr, end_blkaddr; 301 block_t start_blkaddr, end_blkaddr;
299 int need_update = true; 302 int need_update = true;
300 303
301 f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); 304 write_lock(&fi->ext_lock);
302
303 /* Update the page address in the parent node */
304 __set_data_blkaddr(dn);
305
306 if (is_inode_flag_set(fi, FI_NO_EXTENT))
307 return;
308
309 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
310 dn->ofs_in_node;
311
312 write_lock(&fi->ext.ext_lock);
313 305
314 start_fofs = fi->ext.fofs; 306 start_fofs = fi->ext.fofs;
315 end_fofs = fi->ext.fofs + fi->ext.len - 1; 307 end_fofs = fi->ext.fofs + fi->ext.len - 1;
316 start_blkaddr = fi->ext.blk_addr; 308 start_blkaddr = fi->ext.blk;
317 end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1; 309 end_blkaddr = fi->ext.blk + fi->ext.len - 1;
318 310
319 /* Drop and initialize the matched extent */ 311 /* Drop and initialize the matched extent */
320 if (fi->ext.len == 1 && fofs == start_fofs) 312 if (fi->ext.len == 1 && fofs == start_fofs)
@@ -322,24 +314,24 @@ void update_extent_cache(struct dnode_of_data *dn)
322 314
323 /* Initial extent */ 315 /* Initial extent */
324 if (fi->ext.len == 0) { 316 if (fi->ext.len == 0) {
325 if (dn->data_blkaddr != NULL_ADDR) { 317 if (blkaddr != NULL_ADDR) {
326 fi->ext.fofs = fofs; 318 fi->ext.fofs = fofs;
327 fi->ext.blk_addr = dn->data_blkaddr; 319 fi->ext.blk = blkaddr;
328 fi->ext.len = 1; 320 fi->ext.len = 1;
329 } 321 }
330 goto end_update; 322 goto end_update;
331 } 323 }
332 324
333 /* Front merge */ 325 /* Front merge */
334 if (fofs == start_fofs - 1 && dn->data_blkaddr == start_blkaddr - 1) { 326 if (fofs == start_fofs - 1 && blkaddr == start_blkaddr - 1) {
335 fi->ext.fofs--; 327 fi->ext.fofs--;
336 fi->ext.blk_addr--; 328 fi->ext.blk--;
337 fi->ext.len++; 329 fi->ext.len++;
338 goto end_update; 330 goto end_update;
339 } 331 }
340 332
341 /* Back merge */ 333 /* Back merge */
342 if (fofs == end_fofs + 1 && dn->data_blkaddr == end_blkaddr + 1) { 334 if (fofs == end_fofs + 1 && blkaddr == end_blkaddr + 1) {
343 fi->ext.len++; 335 fi->ext.len++;
344 goto end_update; 336 goto end_update;
345 } 337 }
@@ -351,8 +343,7 @@ void update_extent_cache(struct dnode_of_data *dn)
351 fi->ext.len = fofs - start_fofs; 343 fi->ext.len = fofs - start_fofs;
352 } else { 344 } else {
353 fi->ext.fofs = fofs + 1; 345 fi->ext.fofs = fofs + 1;
354 fi->ext.blk_addr = start_blkaddr + 346 fi->ext.blk = start_blkaddr + fofs - start_fofs + 1;
355 fofs - start_fofs + 1;
356 fi->ext.len -= fofs - start_fofs + 1; 347 fi->ext.len -= fofs - start_fofs + 1;
357 } 348 }
358 } else { 349 } else {
@@ -366,27 +357,583 @@ void update_extent_cache(struct dnode_of_data *dn)
366 need_update = true; 357 need_update = true;
367 } 358 }
368end_update: 359end_update:
369 write_unlock(&fi->ext.ext_lock); 360 write_unlock(&fi->ext_lock);
370 if (need_update) 361 return need_update;
371 sync_inode_page(dn); 362}
363
364static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
365 struct extent_tree *et, struct extent_info *ei,
366 struct rb_node *parent, struct rb_node **p)
367{
368 struct extent_node *en;
369
370 en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
371 if (!en)
372 return NULL;
373
374 en->ei = *ei;
375 INIT_LIST_HEAD(&en->list);
376
377 rb_link_node(&en->rb_node, parent, p);
378 rb_insert_color(&en->rb_node, &et->root);
379 et->count++;
380 atomic_inc(&sbi->total_ext_node);
381 return en;
382}
383
384static void __detach_extent_node(struct f2fs_sb_info *sbi,
385 struct extent_tree *et, struct extent_node *en)
386{
387 rb_erase(&en->rb_node, &et->root);
388 et->count--;
389 atomic_dec(&sbi->total_ext_node);
390
391 if (et->cached_en == en)
392 et->cached_en = NULL;
393}
394
395static struct extent_tree *__find_extent_tree(struct f2fs_sb_info *sbi,
396 nid_t ino)
397{
398 struct extent_tree *et;
399
400 down_read(&sbi->extent_tree_lock);
401 et = radix_tree_lookup(&sbi->extent_tree_root, ino);
402 if (!et) {
403 up_read(&sbi->extent_tree_lock);
404 return NULL;
405 }
406 atomic_inc(&et->refcount);
407 up_read(&sbi->extent_tree_lock);
408
409 return et;
410}
411
412static struct extent_tree *__grab_extent_tree(struct inode *inode)
413{
414 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
415 struct extent_tree *et;
416 nid_t ino = inode->i_ino;
417
418 down_write(&sbi->extent_tree_lock);
419 et = radix_tree_lookup(&sbi->extent_tree_root, ino);
420 if (!et) {
421 et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
422 f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
423 memset(et, 0, sizeof(struct extent_tree));
424 et->ino = ino;
425 et->root = RB_ROOT;
426 et->cached_en = NULL;
427 rwlock_init(&et->lock);
428 atomic_set(&et->refcount, 0);
429 et->count = 0;
430 sbi->total_ext_tree++;
431 }
432 atomic_inc(&et->refcount);
433 up_write(&sbi->extent_tree_lock);
434
435 return et;
436}
437
438static struct extent_node *__lookup_extent_tree(struct extent_tree *et,
439 unsigned int fofs)
440{
441 struct rb_node *node = et->root.rb_node;
442 struct extent_node *en;
443
444 if (et->cached_en) {
445 struct extent_info *cei = &et->cached_en->ei;
446
447 if (cei->fofs <= fofs && cei->fofs + cei->len > fofs)
448 return et->cached_en;
449 }
450
451 while (node) {
452 en = rb_entry(node, struct extent_node, rb_node);
453
454 if (fofs < en->ei.fofs) {
455 node = node->rb_left;
456 } else if (fofs >= en->ei.fofs + en->ei.len) {
457 node = node->rb_right;
458 } else {
459 et->cached_en = en;
460 return en;
461 }
462 }
463 return NULL;
464}
465
466static struct extent_node *__try_back_merge(struct f2fs_sb_info *sbi,
467 struct extent_tree *et, struct extent_node *en)
468{
469 struct extent_node *prev;
470 struct rb_node *node;
471
472 node = rb_prev(&en->rb_node);
473 if (!node)
474 return NULL;
475
476 prev = rb_entry(node, struct extent_node, rb_node);
477 if (__is_back_mergeable(&en->ei, &prev->ei)) {
478 en->ei.fofs = prev->ei.fofs;
479 en->ei.blk = prev->ei.blk;
480 en->ei.len += prev->ei.len;
481 __detach_extent_node(sbi, et, prev);
482 return prev;
483 }
484 return NULL;
485}
486
487static struct extent_node *__try_front_merge(struct f2fs_sb_info *sbi,
488 struct extent_tree *et, struct extent_node *en)
489{
490 struct extent_node *next;
491 struct rb_node *node;
492
493 node = rb_next(&en->rb_node);
494 if (!node)
495 return NULL;
496
497 next = rb_entry(node, struct extent_node, rb_node);
498 if (__is_front_mergeable(&en->ei, &next->ei)) {
499 en->ei.len += next->ei.len;
500 __detach_extent_node(sbi, et, next);
501 return next;
502 }
503 return NULL;
504}
505
506static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
507 struct extent_tree *et, struct extent_info *ei,
508 struct extent_node **den)
509{
510 struct rb_node **p = &et->root.rb_node;
511 struct rb_node *parent = NULL;
512 struct extent_node *en;
513
514 while (*p) {
515 parent = *p;
516 en = rb_entry(parent, struct extent_node, rb_node);
517
518 if (ei->fofs < en->ei.fofs) {
519 if (__is_front_mergeable(ei, &en->ei)) {
520 f2fs_bug_on(sbi, !den);
521 en->ei.fofs = ei->fofs;
522 en->ei.blk = ei->blk;
523 en->ei.len += ei->len;
524 *den = __try_back_merge(sbi, et, en);
525 return en;
526 }
527 p = &(*p)->rb_left;
528 } else if (ei->fofs >= en->ei.fofs + en->ei.len) {
529 if (__is_back_mergeable(ei, &en->ei)) {
530 f2fs_bug_on(sbi, !den);
531 en->ei.len += ei->len;
532 *den = __try_front_merge(sbi, et, en);
533 return en;
534 }
535 p = &(*p)->rb_right;
536 } else {
537 f2fs_bug_on(sbi, 1);
538 }
539 }
540
541 return __attach_extent_node(sbi, et, ei, parent, p);
542}
543
544static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
545 struct extent_tree *et, bool free_all)
546{
547 struct rb_node *node, *next;
548 struct extent_node *en;
549 unsigned int count = et->count;
550
551 node = rb_first(&et->root);
552 while (node) {
553 next = rb_next(node);
554 en = rb_entry(node, struct extent_node, rb_node);
555
556 if (free_all) {
557 spin_lock(&sbi->extent_lock);
558 if (!list_empty(&en->list))
559 list_del_init(&en->list);
560 spin_unlock(&sbi->extent_lock);
561 }
562
563 if (free_all || list_empty(&en->list)) {
564 __detach_extent_node(sbi, et, en);
565 kmem_cache_free(extent_node_slab, en);
566 }
567 node = next;
568 }
569
570 return count - et->count;
571}
572
573static void f2fs_init_extent_tree(struct inode *inode,
574 struct f2fs_extent *i_ext)
575{
576 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
577 struct extent_tree *et;
578 struct extent_node *en;
579 struct extent_info ei;
580
581 if (le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN)
582 return;
583
584 et = __grab_extent_tree(inode);
585
586 write_lock(&et->lock);
587 if (et->count)
588 goto out;
589
590 set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
591 le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
592
593 en = __insert_extent_tree(sbi, et, &ei, NULL);
594 if (en) {
595 et->cached_en = en;
596
597 spin_lock(&sbi->extent_lock);
598 list_add_tail(&en->list, &sbi->extent_list);
599 spin_unlock(&sbi->extent_lock);
600 }
601out:
602 write_unlock(&et->lock);
603 atomic_dec(&et->refcount);
604}
605
606static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
607 struct extent_info *ei)
608{
609 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
610 struct extent_tree *et;
611 struct extent_node *en;
612
613 trace_f2fs_lookup_extent_tree_start(inode, pgofs);
614
615 et = __find_extent_tree(sbi, inode->i_ino);
616 if (!et)
617 return false;
618
619 read_lock(&et->lock);
620 en = __lookup_extent_tree(et, pgofs);
621 if (en) {
622 *ei = en->ei;
623 spin_lock(&sbi->extent_lock);
624 if (!list_empty(&en->list))
625 list_move_tail(&en->list, &sbi->extent_list);
626 spin_unlock(&sbi->extent_lock);
627 stat_inc_read_hit(sbi->sb);
628 }
629 stat_inc_total_hit(sbi->sb);
630 read_unlock(&et->lock);
631
632 trace_f2fs_lookup_extent_tree_end(inode, pgofs, en);
633
634 atomic_dec(&et->refcount);
635 return en ? true : false;
636}
637
638static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs,
639 block_t blkaddr)
640{
641 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
642 struct extent_tree *et;
643 struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
644 struct extent_node *den = NULL;
645 struct extent_info ei, dei;
646 unsigned int endofs;
647
648 trace_f2fs_update_extent_tree(inode, fofs, blkaddr);
649
650 et = __grab_extent_tree(inode);
651
652 write_lock(&et->lock);
653
654 /* 1. lookup and remove existing extent info in cache */
655 en = __lookup_extent_tree(et, fofs);
656 if (!en)
657 goto update_extent;
658
659 dei = en->ei;
660 __detach_extent_node(sbi, et, en);
661
662 /* 2. if extent can be split more, split and insert the left part */
663 if (dei.len > 1) {
664 /* insert left part of split extent into cache */
665 if (fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
666 set_extent_info(&ei, dei.fofs, dei.blk,
667 fofs - dei.fofs);
668 en1 = __insert_extent_tree(sbi, et, &ei, NULL);
669 }
670
671 /* insert right part of split extent into cache */
672 endofs = dei.fofs + dei.len - 1;
673 if (endofs - fofs >= F2FS_MIN_EXTENT_LEN) {
674 set_extent_info(&ei, fofs + 1,
675 fofs - dei.fofs + dei.blk, endofs - fofs);
676 en2 = __insert_extent_tree(sbi, et, &ei, NULL);
677 }
678 }
679
680update_extent:
681 /* 3. update extent in extent cache */
682 if (blkaddr) {
683 set_extent_info(&ei, fofs, blkaddr, 1);
684 en3 = __insert_extent_tree(sbi, et, &ei, &den);
685 }
686
687 /* 4. update in global extent list */
688 spin_lock(&sbi->extent_lock);
689 if (en && !list_empty(&en->list))
690 list_del(&en->list);
691 /*
692 * en1 and en2 split from en, they will become more and more smaller
693 * fragments after splitting several times. So if the length is smaller
694 * than F2FS_MIN_EXTENT_LEN, we will not add them into extent tree.
695 */
696 if (en1)
697 list_add_tail(&en1->list, &sbi->extent_list);
698 if (en2)
699 list_add_tail(&en2->list, &sbi->extent_list);
700 if (en3) {
701 if (list_empty(&en3->list))
702 list_add_tail(&en3->list, &sbi->extent_list);
703 else
704 list_move_tail(&en3->list, &sbi->extent_list);
705 }
706 if (den && !list_empty(&den->list))
707 list_del(&den->list);
708 spin_unlock(&sbi->extent_lock);
709
710 /* 5. release extent node */
711 if (en)
712 kmem_cache_free(extent_node_slab, en);
713 if (den)
714 kmem_cache_free(extent_node_slab, den);
715
716 write_unlock(&et->lock);
717 atomic_dec(&et->refcount);
718}
719
720void f2fs_preserve_extent_tree(struct inode *inode)
721{
722 struct extent_tree *et;
723 struct extent_info *ext = &F2FS_I(inode)->ext;
724 bool sync = false;
725
726 if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
727 return;
728
729 et = __find_extent_tree(F2FS_I_SB(inode), inode->i_ino);
730 if (!et) {
731 if (ext->len) {
732 ext->len = 0;
733 update_inode_page(inode);
734 }
735 return;
736 }
737
738 read_lock(&et->lock);
739 if (et->count) {
740 struct extent_node *en;
741
742 if (et->cached_en) {
743 en = et->cached_en;
744 } else {
745 struct rb_node *node = rb_first(&et->root);
746
747 if (!node)
748 node = rb_last(&et->root);
749 en = rb_entry(node, struct extent_node, rb_node);
750 }
751
752 if (__is_extent_same(ext, &en->ei))
753 goto out;
754
755 *ext = en->ei;
756 sync = true;
757 } else if (ext->len) {
758 ext->len = 0;
759 sync = true;
760 }
761out:
762 read_unlock(&et->lock);
763 atomic_dec(&et->refcount);
764
765 if (sync)
766 update_inode_page(inode);
767}
768
769void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
770{
771 struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
772 struct extent_node *en, *tmp;
773 unsigned long ino = F2FS_ROOT_INO(sbi);
774 struct radix_tree_iter iter;
775 void **slot;
776 unsigned int found;
777 unsigned int node_cnt = 0, tree_cnt = 0;
778
779 if (!test_opt(sbi, EXTENT_CACHE))
780 return;
781
782 if (available_free_memory(sbi, EXTENT_CACHE))
783 return;
784
785 spin_lock(&sbi->extent_lock);
786 list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
787 if (!nr_shrink--)
788 break;
789 list_del_init(&en->list);
790 }
791 spin_unlock(&sbi->extent_lock);
792
793 down_read(&sbi->extent_tree_lock);
794 while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
795 (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
796 unsigned i;
797
798 ino = treevec[found - 1]->ino + 1;
799 for (i = 0; i < found; i++) {
800 struct extent_tree *et = treevec[i];
801
802 atomic_inc(&et->refcount);
803 write_lock(&et->lock);
804 node_cnt += __free_extent_tree(sbi, et, false);
805 write_unlock(&et->lock);
806 atomic_dec(&et->refcount);
807 }
808 }
809 up_read(&sbi->extent_tree_lock);
810
811 down_write(&sbi->extent_tree_lock);
812 radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter,
813 F2FS_ROOT_INO(sbi)) {
814 struct extent_tree *et = (struct extent_tree *)*slot;
815
816 if (!atomic_read(&et->refcount) && !et->count) {
817 radix_tree_delete(&sbi->extent_tree_root, et->ino);
818 kmem_cache_free(extent_tree_slab, et);
819 sbi->total_ext_tree--;
820 tree_cnt++;
821 }
822 }
823 up_write(&sbi->extent_tree_lock);
824
825 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
826}
827
828void f2fs_destroy_extent_tree(struct inode *inode)
829{
830 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
831 struct extent_tree *et;
832 unsigned int node_cnt = 0;
833
834 if (!test_opt(sbi, EXTENT_CACHE))
835 return;
836
837 et = __find_extent_tree(sbi, inode->i_ino);
838 if (!et)
839 goto out;
840
841 /* free all extent info belong to this extent tree */
842 write_lock(&et->lock);
843 node_cnt = __free_extent_tree(sbi, et, true);
844 write_unlock(&et->lock);
845
846 atomic_dec(&et->refcount);
847
848 /* try to find and delete extent tree entry in radix tree */
849 down_write(&sbi->extent_tree_lock);
850 et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino);
851 if (!et) {
852 up_write(&sbi->extent_tree_lock);
853 goto out;
854 }
855 f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
856 radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
857 kmem_cache_free(extent_tree_slab, et);
858 sbi->total_ext_tree--;
859 up_write(&sbi->extent_tree_lock);
860out:
861 trace_f2fs_destroy_extent_tree(inode, node_cnt);
372 return; 862 return;
373} 863}
374 864
865void f2fs_init_extent_cache(struct inode *inode, struct f2fs_extent *i_ext)
866{
867 if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
868 f2fs_init_extent_tree(inode, i_ext);
869
870 write_lock(&F2FS_I(inode)->ext_lock);
871 get_extent_info(&F2FS_I(inode)->ext, *i_ext);
872 write_unlock(&F2FS_I(inode)->ext_lock);
873}
874
875static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
876 struct extent_info *ei)
877{
878 if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
879 return false;
880
881 if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
882 return f2fs_lookup_extent_tree(inode, pgofs, ei);
883
884 return lookup_extent_info(inode, pgofs, ei);
885}
886
887void f2fs_update_extent_cache(struct dnode_of_data *dn)
888{
889 struct f2fs_inode_info *fi = F2FS_I(dn->inode);
890 pgoff_t fofs;
891
892 f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
893
894 if (is_inode_flag_set(fi, FI_NO_EXTENT))
895 return;
896
897 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
898 dn->ofs_in_node;
899
900 if (test_opt(F2FS_I_SB(dn->inode), EXTENT_CACHE))
901 return f2fs_update_extent_tree(dn->inode, fofs,
902 dn->data_blkaddr);
903
904 if (update_extent_info(dn->inode, fofs, dn->data_blkaddr))
905 sync_inode_page(dn);
906}
907
375struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) 908struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
376{ 909{
377 struct address_space *mapping = inode->i_mapping; 910 struct address_space *mapping = inode->i_mapping;
378 struct dnode_of_data dn; 911 struct dnode_of_data dn;
379 struct page *page; 912 struct page *page;
913 struct extent_info ei;
380 int err; 914 int err;
381 struct f2fs_io_info fio = { 915 struct f2fs_io_info fio = {
382 .type = DATA, 916 .type = DATA,
383 .rw = sync ? READ_SYNC : READA, 917 .rw = sync ? READ_SYNC : READA,
384 }; 918 };
385 919
920 /*
921 * If sync is false, it needs to check its block allocation.
922 * This is need and triggered by two flows:
923 * gc and truncate_partial_data_page.
924 */
925 if (!sync)
926 goto search;
927
386 page = find_get_page(mapping, index); 928 page = find_get_page(mapping, index);
387 if (page && PageUptodate(page)) 929 if (page && PageUptodate(page))
388 return page; 930 return page;
389 f2fs_put_page(page, 0); 931 f2fs_put_page(page, 0);
932search:
933 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
934 dn.data_blkaddr = ei.blk + index - ei.fofs;
935 goto got_it;
936 }
390 937
391 set_new_dnode(&dn, inode, NULL, NULL, 0); 938 set_new_dnode(&dn, inode, NULL, NULL, 0);
392 err = get_dnode_of_data(&dn, index, LOOKUP_NODE); 939 err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
@@ -401,6 +948,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
401 if (unlikely(dn.data_blkaddr == NEW_ADDR)) 948 if (unlikely(dn.data_blkaddr == NEW_ADDR))
402 return ERR_PTR(-EINVAL); 949 return ERR_PTR(-EINVAL);
403 950
951got_it:
404 page = grab_cache_page(mapping, index); 952 page = grab_cache_page(mapping, index);
405 if (!page) 953 if (!page)
406 return ERR_PTR(-ENOMEM); 954 return ERR_PTR(-ENOMEM);
@@ -435,6 +983,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
435 struct address_space *mapping = inode->i_mapping; 983 struct address_space *mapping = inode->i_mapping;
436 struct dnode_of_data dn; 984 struct dnode_of_data dn;
437 struct page *page; 985 struct page *page;
986 struct extent_info ei;
438 int err; 987 int err;
439 struct f2fs_io_info fio = { 988 struct f2fs_io_info fio = {
440 .type = DATA, 989 .type = DATA,
@@ -445,6 +994,11 @@ repeat:
445 if (!page) 994 if (!page)
446 return ERR_PTR(-ENOMEM); 995 return ERR_PTR(-ENOMEM);
447 996
997 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
998 dn.data_blkaddr = ei.blk + index - ei.fofs;
999 goto got_it;
1000 }
1001
448 set_new_dnode(&dn, inode, NULL, NULL, 0); 1002 set_new_dnode(&dn, inode, NULL, NULL, 0);
449 err = get_dnode_of_data(&dn, index, LOOKUP_NODE); 1003 err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
450 if (err) { 1004 if (err) {
@@ -458,6 +1012,7 @@ repeat:
458 return ERR_PTR(-ENOENT); 1012 return ERR_PTR(-ENOENT);
459 } 1013 }
460 1014
1015got_it:
461 if (PageUptodate(page)) 1016 if (PageUptodate(page))
462 return page; 1017 return page;
463 1018
@@ -569,19 +1124,26 @@ static int __allocate_data_block(struct dnode_of_data *dn)
569 1124
570 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 1125 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
571 return -EPERM; 1126 return -EPERM;
1127
1128 dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
1129 if (dn->data_blkaddr == NEW_ADDR)
1130 goto alloc;
1131
572 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) 1132 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
573 return -ENOSPC; 1133 return -ENOSPC;
574 1134
1135alloc:
575 get_node_info(sbi, dn->nid, &ni); 1136 get_node_info(sbi, dn->nid, &ni);
576 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 1137 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
577 1138
578 if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page) 1139 if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
579 seg = CURSEG_DIRECT_IO; 1140 seg = CURSEG_DIRECT_IO;
580 1141
581 allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, seg); 1142 allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
1143 &sum, seg);
582 1144
583 /* direct IO doesn't use extent cache to maximize the performance */ 1145 /* direct IO doesn't use extent cache to maximize the performance */
584 __set_data_blkaddr(dn); 1146 set_data_blkaddr(dn);
585 1147
586 /* update i_size */ 1148 /* update i_size */
587 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + 1149 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
@@ -615,7 +1177,10 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
615 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); 1177 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
616 1178
617 while (dn.ofs_in_node < end_offset && len) { 1179 while (dn.ofs_in_node < end_offset && len) {
618 if (dn.data_blkaddr == NULL_ADDR) { 1180 block_t blkaddr;
1181
1182 blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
1183 if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
619 if (__allocate_data_block(&dn)) 1184 if (__allocate_data_block(&dn))
620 goto sync_out; 1185 goto sync_out;
621 allocated = true; 1186 allocated = true;
@@ -659,13 +1224,16 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
659 int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; 1224 int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
660 pgoff_t pgofs, end_offset; 1225 pgoff_t pgofs, end_offset;
661 int err = 0, ofs = 1; 1226 int err = 0, ofs = 1;
1227 struct extent_info ei;
662 bool allocated = false; 1228 bool allocated = false;
663 1229
664 /* Get the page offset from the block offset(iblock) */ 1230 /* Get the page offset from the block offset(iblock) */
665 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); 1231 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
666 1232
667 if (check_extent_cache(inode, pgofs, bh_result)) 1233 if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
1234 f2fs_map_bh(inode->i_sb, pgofs, &ei, bh_result);
668 goto out; 1235 goto out;
1236 }
669 1237
670 if (create) 1238 if (create)
671 f2fs_lock_op(F2FS_I_SB(inode)); 1239 f2fs_lock_op(F2FS_I_SB(inode));
@@ -682,7 +1250,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
682 goto put_out; 1250 goto put_out;
683 1251
684 if (dn.data_blkaddr != NULL_ADDR) { 1252 if (dn.data_blkaddr != NULL_ADDR) {
685 set_buffer_new(bh_result); 1253 clear_buffer_new(bh_result);
686 map_bh(bh_result, inode->i_sb, dn.data_blkaddr); 1254 map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
687 } else if (create) { 1255 } else if (create) {
688 err = __allocate_data_block(&dn); 1256 err = __allocate_data_block(&dn);
@@ -727,6 +1295,7 @@ get_next:
727 if (err) 1295 if (err)
728 goto sync_out; 1296 goto sync_out;
729 allocated = true; 1297 allocated = true;
1298 set_buffer_new(bh_result);
730 blkaddr = dn.data_blkaddr; 1299 blkaddr = dn.data_blkaddr;
731 } 1300 }
732 /* Give more consecutive addresses for the readahead */ 1301 /* Give more consecutive addresses for the readahead */
@@ -813,8 +1382,10 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
813 fio->blk_addr = dn.data_blkaddr; 1382 fio->blk_addr = dn.data_blkaddr;
814 1383
815 /* This page is already truncated */ 1384 /* This page is already truncated */
816 if (fio->blk_addr == NULL_ADDR) 1385 if (fio->blk_addr == NULL_ADDR) {
1386 ClearPageUptodate(page);
817 goto out_writepage; 1387 goto out_writepage;
1388 }
818 1389
819 set_page_writeback(page); 1390 set_page_writeback(page);
820 1391
@@ -827,10 +1398,15 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
827 need_inplace_update(inode))) { 1398 need_inplace_update(inode))) {
828 rewrite_data_page(page, fio); 1399 rewrite_data_page(page, fio);
829 set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); 1400 set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
1401 trace_f2fs_do_write_data_page(page, IPU);
830 } else { 1402 } else {
831 write_data_page(page, &dn, fio); 1403 write_data_page(page, &dn, fio);
832 update_extent_cache(&dn); 1404 set_data_blkaddr(&dn);
1405 f2fs_update_extent_cache(&dn);
1406 trace_f2fs_do_write_data_page(page, OPU);
833 set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); 1407 set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
1408 if (page->index == 0)
1409 set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
834 } 1410 }
835out_writepage: 1411out_writepage:
836 f2fs_put_dnode(&dn); 1412 f2fs_put_dnode(&dn);
@@ -909,6 +1485,8 @@ done:
909 clear_cold_data(page); 1485 clear_cold_data(page);
910out: 1486out:
911 inode_dec_dirty_pages(inode); 1487 inode_dec_dirty_pages(inode);
1488 if (err)
1489 ClearPageUptodate(page);
912 unlock_page(page); 1490 unlock_page(page);
913 if (need_balance_fs) 1491 if (need_balance_fs)
914 f2fs_balance_fs(sbi); 1492 f2fs_balance_fs(sbi);
@@ -935,7 +1513,6 @@ static int f2fs_write_data_pages(struct address_space *mapping,
935{ 1513{
936 struct inode *inode = mapping->host; 1514 struct inode *inode = mapping->host;
937 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1515 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
938 bool locked = false;
939 int ret; 1516 int ret;
940 long diff; 1517 long diff;
941 1518
@@ -950,15 +1527,13 @@ static int f2fs_write_data_pages(struct address_space *mapping,
950 available_free_memory(sbi, DIRTY_DENTS)) 1527 available_free_memory(sbi, DIRTY_DENTS))
951 goto skip_write; 1528 goto skip_write;
952 1529
1530 /* during POR, we don't need to trigger writepage at all. */
1531 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1532 goto skip_write;
1533
953 diff = nr_pages_to_write(sbi, DATA, wbc); 1534 diff = nr_pages_to_write(sbi, DATA, wbc);
954 1535
955 if (!S_ISDIR(inode->i_mode)) {
956 mutex_lock(&sbi->writepages);
957 locked = true;
958 }
959 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); 1536 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
960 if (locked)
961 mutex_unlock(&sbi->writepages);
962 1537
963 f2fs_submit_merged_bio(sbi, DATA, WRITE); 1538 f2fs_submit_merged_bio(sbi, DATA, WRITE);
964 1539
@@ -1236,6 +1811,37 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
1236 return generic_block_bmap(mapping, block, get_data_block); 1811 return generic_block_bmap(mapping, block, get_data_block);
1237} 1812}
1238 1813
1814void init_extent_cache_info(struct f2fs_sb_info *sbi)
1815{
1816 INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
1817 init_rwsem(&sbi->extent_tree_lock);
1818 INIT_LIST_HEAD(&sbi->extent_list);
1819 spin_lock_init(&sbi->extent_lock);
1820 sbi->total_ext_tree = 0;
1821 atomic_set(&sbi->total_ext_node, 0);
1822}
1823
1824int __init create_extent_cache(void)
1825{
1826 extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
1827 sizeof(struct extent_tree));
1828 if (!extent_tree_slab)
1829 return -ENOMEM;
1830 extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
1831 sizeof(struct extent_node));
1832 if (!extent_node_slab) {
1833 kmem_cache_destroy(extent_tree_slab);
1834 return -ENOMEM;
1835 }
1836 return 0;
1837}
1838
1839void destroy_extent_cache(void)
1840{
1841 kmem_cache_destroy(extent_node_slab);
1842 kmem_cache_destroy(extent_tree_slab);
1843}
1844
1239const struct address_space_operations f2fs_dblock_aops = { 1845const struct address_space_operations f2fs_dblock_aops = {
1240 .readpage = f2fs_read_data_page, 1846 .readpage = f2fs_read_data_page,
1241 .readpages = f2fs_read_data_pages, 1847 .readpages = f2fs_read_data_pages,
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index e671373cc8ab..f5388f37217e 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -35,6 +35,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
35 /* validation check of the segment numbers */ 35 /* validation check of the segment numbers */
36 si->hit_ext = sbi->read_hit_ext; 36 si->hit_ext = sbi->read_hit_ext;
37 si->total_ext = sbi->total_hit_ext; 37 si->total_ext = sbi->total_hit_ext;
38 si->ext_tree = sbi->total_ext_tree;
39 si->ext_node = atomic_read(&sbi->total_ext_node);
38 si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); 40 si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
39 si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); 41 si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
40 si->ndirty_dirs = sbi->n_dirty_dirs; 42 si->ndirty_dirs = sbi->n_dirty_dirs;
@@ -185,6 +187,9 @@ get_cache:
185 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry); 187 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry);
186 for (i = 0; i <= UPDATE_INO; i++) 188 for (i = 0; i <= UPDATE_INO; i++)
187 si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); 189 si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
190 si->cache_mem += sbi->total_ext_tree * sizeof(struct extent_tree);
191 si->cache_mem += atomic_read(&sbi->total_ext_node) *
192 sizeof(struct extent_node);
188 193
189 si->page_mem = 0; 194 si->page_mem = 0;
190 npages = NODE_MAPPING(sbi)->nrpages; 195 npages = NODE_MAPPING(sbi)->nrpages;
@@ -260,13 +265,20 @@ static int stat_show(struct seq_file *s, void *v)
260 seq_printf(s, "CP calls: %d\n", si->cp_count); 265 seq_printf(s, "CP calls: %d\n", si->cp_count);
261 seq_printf(s, "GC calls: %d (BG: %d)\n", 266 seq_printf(s, "GC calls: %d (BG: %d)\n",
262 si->call_count, si->bg_gc); 267 si->call_count, si->bg_gc);
263 seq_printf(s, " - data segments : %d\n", si->data_segs); 268 seq_printf(s, " - data segments : %d (%d)\n",
264 seq_printf(s, " - node segments : %d\n", si->node_segs); 269 si->data_segs, si->bg_data_segs);
265 seq_printf(s, "Try to move %d blocks\n", si->tot_blks); 270 seq_printf(s, " - node segments : %d (%d)\n",
266 seq_printf(s, " - data blocks : %d\n", si->data_blks); 271 si->node_segs, si->bg_node_segs);
267 seq_printf(s, " - node blocks : %d\n", si->node_blks); 272 seq_printf(s, "Try to move %d blocks (BG: %d)\n", si->tot_blks,
273 si->bg_data_blks + si->bg_node_blks);
274 seq_printf(s, " - data blocks : %d (%d)\n", si->data_blks,
275 si->bg_data_blks);
276 seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks,
277 si->bg_node_blks);
268 seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", 278 seq_printf(s, "\nExtent Hit Ratio: %d / %d\n",
269 si->hit_ext, si->total_ext); 279 si->hit_ext, si->total_ext);
280 seq_printf(s, "\nExtent Tree Count: %d\n", si->ext_tree);
281 seq_printf(s, "\nExtent Node Count: %d\n", si->ext_node);
270 seq_puts(s, "\nBalancing F2FS Async:\n"); 282 seq_puts(s, "\nBalancing F2FS Async:\n");
271 seq_printf(s, " - inmem: %4d, wb: %4d\n", 283 seq_printf(s, " - inmem: %4d, wb: %4d\n",
272 si->inmem_pages, si->wb_pages); 284 si->inmem_pages, si->wb_pages);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index b74097a7f6d9..3a3302ab7871 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -59,9 +59,8 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
59 [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, 59 [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK,
60}; 60};
61 61
62void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) 62void set_de_type(struct f2fs_dir_entry *de, umode_t mode)
63{ 63{
64 umode_t mode = inode->i_mode;
65 de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; 64 de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
66} 65}
67 66
@@ -127,22 +126,19 @@ struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots,
127 *max_slots = 0; 126 *max_slots = 0;
128 while (bit_pos < d->max) { 127 while (bit_pos < d->max) {
129 if (!test_bit_le(bit_pos, d->bitmap)) { 128 if (!test_bit_le(bit_pos, d->bitmap)) {
130 if (bit_pos == 0)
131 max_len = 1;
132 else if (!test_bit_le(bit_pos - 1, d->bitmap))
133 max_len++;
134 bit_pos++; 129 bit_pos++;
130 max_len++;
135 continue; 131 continue;
136 } 132 }
133
137 de = &d->dentry[bit_pos]; 134 de = &d->dentry[bit_pos];
138 if (early_match_name(name->len, namehash, de) && 135 if (early_match_name(name->len, namehash, de) &&
139 !memcmp(d->filename[bit_pos], name->name, name->len)) 136 !memcmp(d->filename[bit_pos], name->name, name->len))
140 goto found; 137 goto found;
141 138
142 if (max_slots && *max_slots >= 0 && max_len > *max_slots) { 139 if (max_slots && max_len > *max_slots)
143 *max_slots = max_len; 140 *max_slots = max_len;
144 max_len = 0; 141 max_len = 0;
145 }
146 142
147 /* remain bug on condition */ 143 /* remain bug on condition */
148 if (unlikely(!de->name_len)) 144 if (unlikely(!de->name_len))
@@ -219,14 +215,14 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
219 unsigned int max_depth; 215 unsigned int max_depth;
220 unsigned int level; 216 unsigned int level;
221 217
218 *res_page = NULL;
219
222 if (f2fs_has_inline_dentry(dir)) 220 if (f2fs_has_inline_dentry(dir))
223 return find_in_inline_dir(dir, child, res_page); 221 return find_in_inline_dir(dir, child, res_page);
224 222
225 if (npages == 0) 223 if (npages == 0)
226 return NULL; 224 return NULL;
227 225
228 *res_page = NULL;
229
230 name_hash = f2fs_dentry_hash(child); 226 name_hash = f2fs_dentry_hash(child);
231 max_depth = F2FS_I(dir)->i_current_depth; 227 max_depth = F2FS_I(dir)->i_current_depth;
232 228
@@ -285,7 +281,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
285 lock_page(page); 281 lock_page(page);
286 f2fs_wait_on_page_writeback(page, type); 282 f2fs_wait_on_page_writeback(page, type);
287 de->ino = cpu_to_le32(inode->i_ino); 283 de->ino = cpu_to_le32(inode->i_ino);
288 set_de_type(de, inode); 284 set_de_type(de, inode->i_mode);
289 f2fs_dentry_kunmap(dir, page); 285 f2fs_dentry_kunmap(dir, page);
290 set_page_dirty(page); 286 set_page_dirty(page);
291 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 287 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
@@ -331,14 +327,14 @@ void do_make_empty_dir(struct inode *inode, struct inode *parent,
331 de->hash_code = 0; 327 de->hash_code = 0;
332 de->ino = cpu_to_le32(inode->i_ino); 328 de->ino = cpu_to_le32(inode->i_ino);
333 memcpy(d->filename[0], ".", 1); 329 memcpy(d->filename[0], ".", 1);
334 set_de_type(de, inode); 330 set_de_type(de, inode->i_mode);
335 331
336 de = &d->dentry[1]; 332 de = &d->dentry[1];
337 de->hash_code = 0; 333 de->hash_code = 0;
338 de->name_len = cpu_to_le16(2); 334 de->name_len = cpu_to_le16(2);
339 de->ino = cpu_to_le32(parent->i_ino); 335 de->ino = cpu_to_le32(parent->i_ino);
340 memcpy(d->filename[1], "..", 2); 336 memcpy(d->filename[1], "..", 2);
341 set_de_type(de, inode); 337 set_de_type(de, parent->i_mode);
342 338
343 test_and_set_bit_le(0, (void *)d->bitmap); 339 test_and_set_bit_le(0, (void *)d->bitmap);
344 test_and_set_bit_le(1, (void *)d->bitmap); 340 test_and_set_bit_le(1, (void *)d->bitmap);
@@ -435,7 +431,7 @@ error:
435void update_parent_metadata(struct inode *dir, struct inode *inode, 431void update_parent_metadata(struct inode *dir, struct inode *inode,
436 unsigned int current_depth) 432 unsigned int current_depth)
437{ 433{
438 if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { 434 if (inode && is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
439 if (S_ISDIR(inode->i_mode)) { 435 if (S_ISDIR(inode->i_mode)) {
440 inc_nlink(dir); 436 inc_nlink(dir);
441 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); 437 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
@@ -450,7 +446,7 @@ void update_parent_metadata(struct inode *dir, struct inode *inode,
450 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); 446 set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
451 } 447 }
452 448
453 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) 449 if (inode && is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
454 clear_inode_flag(F2FS_I(inode), FI_INC_LINK); 450 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
455} 451}
456 452
@@ -474,30 +470,47 @@ next:
474 goto next; 470 goto next;
475} 471}
476 472
473void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
474 const struct qstr *name, f2fs_hash_t name_hash,
475 unsigned int bit_pos)
476{
477 struct f2fs_dir_entry *de;
478 int slots = GET_DENTRY_SLOTS(name->len);
479 int i;
480
481 de = &d->dentry[bit_pos];
482 de->hash_code = name_hash;
483 de->name_len = cpu_to_le16(name->len);
484 memcpy(d->filename[bit_pos], name->name, name->len);
485 de->ino = cpu_to_le32(ino);
486 set_de_type(de, mode);
487 for (i = 0; i < slots; i++)
488 test_and_set_bit_le(bit_pos + i, (void *)d->bitmap);
489}
490
477/* 491/*
478 * Caller should grab and release a rwsem by calling f2fs_lock_op() and 492 * Caller should grab and release a rwsem by calling f2fs_lock_op() and
479 * f2fs_unlock_op(). 493 * f2fs_unlock_op().
480 */ 494 */
481int __f2fs_add_link(struct inode *dir, const struct qstr *name, 495int __f2fs_add_link(struct inode *dir, const struct qstr *name,
482 struct inode *inode) 496 struct inode *inode, nid_t ino, umode_t mode)
483{ 497{
484 unsigned int bit_pos; 498 unsigned int bit_pos;
485 unsigned int level; 499 unsigned int level;
486 unsigned int current_depth; 500 unsigned int current_depth;
487 unsigned long bidx, block; 501 unsigned long bidx, block;
488 f2fs_hash_t dentry_hash; 502 f2fs_hash_t dentry_hash;
489 struct f2fs_dir_entry *de;
490 unsigned int nbucket, nblock; 503 unsigned int nbucket, nblock;
491 size_t namelen = name->len; 504 size_t namelen = name->len;
492 struct page *dentry_page = NULL; 505 struct page *dentry_page = NULL;
493 struct f2fs_dentry_block *dentry_blk = NULL; 506 struct f2fs_dentry_block *dentry_blk = NULL;
507 struct f2fs_dentry_ptr d;
494 int slots = GET_DENTRY_SLOTS(namelen); 508 int slots = GET_DENTRY_SLOTS(namelen);
495 struct page *page; 509 struct page *page = NULL;
496 int err = 0; 510 int err = 0;
497 int i;
498 511
499 if (f2fs_has_inline_dentry(dir)) { 512 if (f2fs_has_inline_dentry(dir)) {
500 err = f2fs_add_inline_entry(dir, name, inode); 513 err = f2fs_add_inline_entry(dir, name, inode, ino, mode);
501 if (!err || err != -EAGAIN) 514 if (!err || err != -EAGAIN)
502 return err; 515 return err;
503 else 516 else
@@ -547,30 +560,31 @@ start:
547add_dentry: 560add_dentry:
548 f2fs_wait_on_page_writeback(dentry_page, DATA); 561 f2fs_wait_on_page_writeback(dentry_page, DATA);
549 562
550 down_write(&F2FS_I(inode)->i_sem); 563 if (inode) {
551 page = init_inode_metadata(inode, dir, name, NULL); 564 down_write(&F2FS_I(inode)->i_sem);
552 if (IS_ERR(page)) { 565 page = init_inode_metadata(inode, dir, name, NULL);
553 err = PTR_ERR(page); 566 if (IS_ERR(page)) {
554 goto fail; 567 err = PTR_ERR(page);
568 goto fail;
569 }
555 } 570 }
556 de = &dentry_blk->dentry[bit_pos]; 571
557 de->hash_code = dentry_hash; 572 make_dentry_ptr(&d, (void *)dentry_blk, 1);
558 de->name_len = cpu_to_le16(namelen); 573 f2fs_update_dentry(ino, mode, &d, name, dentry_hash, bit_pos);
559 memcpy(dentry_blk->filename[bit_pos], name->name, name->len); 574
560 de->ino = cpu_to_le32(inode->i_ino);
561 set_de_type(de, inode);
562 for (i = 0; i < slots; i++)
563 test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
564 set_page_dirty(dentry_page); 575 set_page_dirty(dentry_page);
565 576
566 /* we don't need to mark_inode_dirty now */ 577 if (inode) {
567 F2FS_I(inode)->i_pino = dir->i_ino; 578 /* we don't need to mark_inode_dirty now */
568 update_inode(inode, page); 579 F2FS_I(inode)->i_pino = dir->i_ino;
569 f2fs_put_page(page, 1); 580 update_inode(inode, page);
581 f2fs_put_page(page, 1);
582 }
570 583
571 update_parent_metadata(dir, inode, current_depth); 584 update_parent_metadata(dir, inode, current_depth);
572fail: 585fail:
573 up_write(&F2FS_I(inode)->i_sem); 586 if (inode)
587 up_write(&F2FS_I(inode)->i_sem);
574 588
575 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { 589 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
576 update_inode_page(dir); 590 update_inode_page(dir);
@@ -669,6 +683,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
669 if (bit_pos == NR_DENTRY_IN_BLOCK) { 683 if (bit_pos == NR_DENTRY_IN_BLOCK) {
670 truncate_hole(dir, page->index, page->index + 1); 684 truncate_hole(dir, page->index, page->index + 1);
671 clear_page_dirty_for_io(page); 685 clear_page_dirty_for_io(page);
686 ClearPagePrivate(page);
672 ClearPageUptodate(page); 687 ClearPageUptodate(page);
673 inode_dec_dirty_pages(dir); 688 inode_dec_dirty_pages(dir);
674 } 689 }
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7fa3313ab0e2..c06a25e5cec3 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -50,6 +50,7 @@
50#define F2FS_MOUNT_FLUSH_MERGE 0x00000400 50#define F2FS_MOUNT_FLUSH_MERGE 0x00000400
51#define F2FS_MOUNT_NOBARRIER 0x00000800 51#define F2FS_MOUNT_NOBARRIER 0x00000800
52#define F2FS_MOUNT_FASTBOOT 0x00001000 52#define F2FS_MOUNT_FASTBOOT 0x00001000
53#define F2FS_MOUNT_EXTENT_CACHE 0x00002000
53 54
54#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) 55#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
55#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) 56#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -102,6 +103,7 @@ enum {
102 CP_UMOUNT, 103 CP_UMOUNT,
103 CP_FASTBOOT, 104 CP_FASTBOOT,
104 CP_SYNC, 105 CP_SYNC,
106 CP_RECOVERY,
105 CP_DISCARD, 107 CP_DISCARD,
106}; 108};
107 109
@@ -216,6 +218,15 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
216#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) 218#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4)
217#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) 219#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
218 220
221/*
222 * should be same as XFS_IOC_GOINGDOWN.
223 * Flags for going down operation used by FS_IOC_GOINGDOWN
224 */
225#define F2FS_IOC_SHUTDOWN _IOR('X', 125, __u32) /* Shutdown */
226#define F2FS_GOING_DOWN_FULLSYNC 0x0 /* going down with full sync */
227#define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */
228#define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */
229
219#if defined(__KERNEL__) && defined(CONFIG_COMPAT) 230#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
220/* 231/*
221 * ioctl commands in 32 bit emulation 232 * ioctl commands in 32 bit emulation
@@ -273,14 +284,34 @@ enum {
273 284
274#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ 285#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
275 286
287/* vector size for gang look-up from extent cache that consists of radix tree */
288#define EXT_TREE_VEC_SIZE 64
289
276/* for in-memory extent cache entry */ 290/* for in-memory extent cache entry */
277#define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */ 291#define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */
292
293/* number of extent info in extent cache we try to shrink */
294#define EXTENT_CACHE_SHRINK_NUMBER 128
278 295
279struct extent_info { 296struct extent_info {
280 rwlock_t ext_lock; /* rwlock for consistency */ 297 unsigned int fofs; /* start offset in a file */
281 unsigned int fofs; /* start offset in a file */ 298 u32 blk; /* start block address of the extent */
282 u32 blk_addr; /* start block address of the extent */ 299 unsigned int len; /* length of the extent */
283 unsigned int len; /* length of the extent */ 300};
301
302struct extent_node {
303 struct rb_node rb_node; /* rb node located in rb-tree */
304 struct list_head list; /* node in global extent list of sbi */
305 struct extent_info ei; /* extent info */
306};
307
308struct extent_tree {
309 nid_t ino; /* inode number */
310 struct rb_root root; /* root of extent info rb-tree */
311 struct extent_node *cached_en; /* recently accessed extent node */
312 rwlock_t lock; /* protect extent info rb-tree */
313 atomic_t refcount; /* reference count of rb-tree */
314 unsigned int count; /* # of extent node in rb-tree*/
284}; 315};
285 316
286/* 317/*
@@ -309,6 +340,7 @@ struct f2fs_inode_info {
309 nid_t i_xattr_nid; /* node id that contains xattrs */ 340 nid_t i_xattr_nid; /* node id that contains xattrs */
310 unsigned long long xattr_ver; /* cp version of xattr modification */ 341 unsigned long long xattr_ver; /* cp version of xattr modification */
311 struct extent_info ext; /* in-memory extent cache entry */ 342 struct extent_info ext; /* in-memory extent cache entry */
343 rwlock_t ext_lock; /* rwlock for single extent cache */
312 struct inode_entry *dirty_dir; /* the pointer of dirty dir */ 344 struct inode_entry *dirty_dir; /* the pointer of dirty dir */
313 345
314 struct radix_tree_root inmem_root; /* radix tree for inmem pages */ 346 struct radix_tree_root inmem_root; /* radix tree for inmem pages */
@@ -319,21 +351,51 @@ struct f2fs_inode_info {
319static inline void get_extent_info(struct extent_info *ext, 351static inline void get_extent_info(struct extent_info *ext,
320 struct f2fs_extent i_ext) 352 struct f2fs_extent i_ext)
321{ 353{
322 write_lock(&ext->ext_lock);
323 ext->fofs = le32_to_cpu(i_ext.fofs); 354 ext->fofs = le32_to_cpu(i_ext.fofs);
324 ext->blk_addr = le32_to_cpu(i_ext.blk_addr); 355 ext->blk = le32_to_cpu(i_ext.blk);
325 ext->len = le32_to_cpu(i_ext.len); 356 ext->len = le32_to_cpu(i_ext.len);
326 write_unlock(&ext->ext_lock);
327} 357}
328 358
329static inline void set_raw_extent(struct extent_info *ext, 359static inline void set_raw_extent(struct extent_info *ext,
330 struct f2fs_extent *i_ext) 360 struct f2fs_extent *i_ext)
331{ 361{
332 read_lock(&ext->ext_lock);
333 i_ext->fofs = cpu_to_le32(ext->fofs); 362 i_ext->fofs = cpu_to_le32(ext->fofs);
334 i_ext->blk_addr = cpu_to_le32(ext->blk_addr); 363 i_ext->blk = cpu_to_le32(ext->blk);
335 i_ext->len = cpu_to_le32(ext->len); 364 i_ext->len = cpu_to_le32(ext->len);
336 read_unlock(&ext->ext_lock); 365}
366
367static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
368 u32 blk, unsigned int len)
369{
370 ei->fofs = fofs;
371 ei->blk = blk;
372 ei->len = len;
373}
374
375static inline bool __is_extent_same(struct extent_info *ei1,
376 struct extent_info *ei2)
377{
378 return (ei1->fofs == ei2->fofs && ei1->blk == ei2->blk &&
379 ei1->len == ei2->len);
380}
381
382static inline bool __is_extent_mergeable(struct extent_info *back,
383 struct extent_info *front)
384{
385 return (back->fofs + back->len == front->fofs &&
386 back->blk + back->len == front->blk);
387}
388
389static inline bool __is_back_mergeable(struct extent_info *cur,
390 struct extent_info *back)
391{
392 return __is_extent_mergeable(back, cur);
393}
394
395static inline bool __is_front_mergeable(struct extent_info *cur,
396 struct extent_info *front)
397{
398 return __is_extent_mergeable(cur, front);
337} 399}
338 400
339struct f2fs_nm_info { 401struct f2fs_nm_info {
@@ -502,6 +564,10 @@ enum page_type {
502 META, 564 META,
503 NR_PAGE_TYPE, 565 NR_PAGE_TYPE,
504 META_FLUSH, 566 META_FLUSH,
567 INMEM, /* the below types are used by tracepoints only. */
568 INMEM_DROP,
569 IPU,
570 OPU,
505}; 571};
506 572
507struct f2fs_io_info { 573struct f2fs_io_info {
@@ -559,7 +625,6 @@ struct f2fs_sb_info {
559 struct mutex cp_mutex; /* checkpoint procedure lock */ 625 struct mutex cp_mutex; /* checkpoint procedure lock */
560 struct rw_semaphore cp_rwsem; /* blocking FS operations */ 626 struct rw_semaphore cp_rwsem; /* blocking FS operations */
561 struct rw_semaphore node_write; /* locking node writes */ 627 struct rw_semaphore node_write; /* locking node writes */
562 struct mutex writepages; /* mutex for writepages() */
563 wait_queue_head_t cp_wait; 628 wait_queue_head_t cp_wait;
564 629
565 struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ 630 struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
@@ -571,6 +636,14 @@ struct f2fs_sb_info {
571 struct list_head dir_inode_list; /* dir inode list */ 636 struct list_head dir_inode_list; /* dir inode list */
572 spinlock_t dir_inode_lock; /* for dir inode list lock */ 637 spinlock_t dir_inode_lock; /* for dir inode list lock */
573 638
639 /* for extent tree cache */
640 struct radix_tree_root extent_tree_root;/* cache extent cache entries */
641 struct rw_semaphore extent_tree_lock; /* locking extent radix tree */
642 struct list_head extent_list; /* lru list for shrinker */
643 spinlock_t extent_lock; /* locking extent lru list */
644 int total_ext_tree; /* extent tree count */
645 atomic_t total_ext_node; /* extent info count */
646
574 /* basic filesystem units */ 647 /* basic filesystem units */
575 unsigned int log_sectors_per_block; /* log2 sectors per block */ 648 unsigned int log_sectors_per_block; /* log2 sectors per block */
576 unsigned int log_blocksize; /* log2 block size */ 649 unsigned int log_blocksize; /* log2 block size */
@@ -920,12 +993,17 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
920 return 0; 993 return 0;
921} 994}
922 995
996static inline block_t __cp_payload(struct f2fs_sb_info *sbi)
997{
998 return le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
999}
1000
923static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) 1001static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
924{ 1002{
925 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 1003 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
926 int offset; 1004 int offset;
927 1005
928 if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) { 1006 if (__cp_payload(sbi) > 0) {
929 if (flag == NAT_BITMAP) 1007 if (flag == NAT_BITMAP)
930 return &ckpt->sit_nat_version_bitmap; 1008 return &ckpt->sit_nat_version_bitmap;
931 else 1009 else
@@ -1166,8 +1244,10 @@ enum {
1166 FI_NEED_IPU, /* used for ipu per file */ 1244 FI_NEED_IPU, /* used for ipu per file */
1167 FI_ATOMIC_FILE, /* indicate atomic file */ 1245 FI_ATOMIC_FILE, /* indicate atomic file */
1168 FI_VOLATILE_FILE, /* indicate volatile file */ 1246 FI_VOLATILE_FILE, /* indicate volatile file */
1247 FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */
1169 FI_DROP_CACHE, /* drop dirty page cache */ 1248 FI_DROP_CACHE, /* drop dirty page cache */
1170 FI_DATA_EXIST, /* indicate data exists */ 1249 FI_DATA_EXIST, /* indicate data exists */
1250 FI_INLINE_DOTS, /* indicate inline dot dentries */
1171}; 1251};
1172 1252
1173static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) 1253static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
@@ -1204,6 +1284,8 @@ static inline void get_inline_info(struct f2fs_inode_info *fi,
1204 set_inode_flag(fi, FI_INLINE_DENTRY); 1284 set_inode_flag(fi, FI_INLINE_DENTRY);
1205 if (ri->i_inline & F2FS_DATA_EXIST) 1285 if (ri->i_inline & F2FS_DATA_EXIST)
1206 set_inode_flag(fi, FI_DATA_EXIST); 1286 set_inode_flag(fi, FI_DATA_EXIST);
1287 if (ri->i_inline & F2FS_INLINE_DOTS)
1288 set_inode_flag(fi, FI_INLINE_DOTS);
1207} 1289}
1208 1290
1209static inline void set_raw_inline(struct f2fs_inode_info *fi, 1291static inline void set_raw_inline(struct f2fs_inode_info *fi,
@@ -1219,6 +1301,8 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi,
1219 ri->i_inline |= F2FS_INLINE_DENTRY; 1301 ri->i_inline |= F2FS_INLINE_DENTRY;
1220 if (is_inode_flag_set(fi, FI_DATA_EXIST)) 1302 if (is_inode_flag_set(fi, FI_DATA_EXIST))
1221 ri->i_inline |= F2FS_DATA_EXIST; 1303 ri->i_inline |= F2FS_DATA_EXIST;
1304 if (is_inode_flag_set(fi, FI_INLINE_DOTS))
1305 ri->i_inline |= F2FS_INLINE_DOTS;
1222} 1306}
1223 1307
1224static inline int f2fs_has_inline_xattr(struct inode *inode) 1308static inline int f2fs_has_inline_xattr(struct inode *inode)
@@ -1264,6 +1348,11 @@ static inline int f2fs_exist_data(struct inode *inode)
1264 return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST); 1348 return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST);
1265} 1349}
1266 1350
1351static inline int f2fs_has_inline_dots(struct inode *inode)
1352{
1353 return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DOTS);
1354}
1355
1267static inline bool f2fs_is_atomic_file(struct inode *inode) 1356static inline bool f2fs_is_atomic_file(struct inode *inode)
1268{ 1357{
1269 return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); 1358 return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE);
@@ -1274,6 +1363,11 @@ static inline bool f2fs_is_volatile_file(struct inode *inode)
1274 return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE); 1363 return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE);
1275} 1364}
1276 1365
1366static inline bool f2fs_is_first_block_written(struct inode *inode)
1367{
1368 return is_inode_flag_set(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
1369}
1370
1277static inline bool f2fs_is_drop_cache(struct inode *inode) 1371static inline bool f2fs_is_drop_cache(struct inode *inode)
1278{ 1372{
1279 return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE); 1373 return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE);
@@ -1290,12 +1384,6 @@ static inline int f2fs_has_inline_dentry(struct inode *inode)
1290 return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY); 1384 return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY);
1291} 1385}
1292 1386
1293static inline void *inline_dentry_addr(struct page *page)
1294{
1295 struct f2fs_inode *ri = F2FS_INODE(page);
1296 return (void *)&(ri->i_addr[1]);
1297}
1298
1299static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) 1387static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page)
1300{ 1388{
1301 if (!f2fs_has_inline_dentry(dir)) 1389 if (!f2fs_has_inline_dentry(dir))
@@ -1363,7 +1451,7 @@ struct dentry *f2fs_get_parent(struct dentry *child);
1363 * dir.c 1451 * dir.c
1364 */ 1452 */
1365extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; 1453extern unsigned char f2fs_filetype_table[F2FS_FT_MAX];
1366void set_de_type(struct f2fs_dir_entry *, struct inode *); 1454void set_de_type(struct f2fs_dir_entry *, umode_t);
1367struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, 1455struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *,
1368 struct f2fs_dentry_ptr *); 1456 struct f2fs_dentry_ptr *);
1369bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, 1457bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
@@ -1382,7 +1470,10 @@ ino_t f2fs_inode_by_name(struct inode *, struct qstr *);
1382void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, 1470void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
1383 struct page *, struct inode *); 1471 struct page *, struct inode *);
1384int update_dent_inode(struct inode *, const struct qstr *); 1472int update_dent_inode(struct inode *, const struct qstr *);
1385int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); 1473void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *,
1474 const struct qstr *, f2fs_hash_t , unsigned int);
1475int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t,
1476 umode_t);
1386void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, 1477void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *,
1387 struct inode *); 1478 struct inode *);
1388int f2fs_do_tmpfile(struct inode *, struct inode *); 1479int f2fs_do_tmpfile(struct inode *, struct inode *);
@@ -1392,7 +1483,7 @@ bool f2fs_empty_dir(struct inode *);
1392static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) 1483static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
1393{ 1484{
1394 return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name, 1485 return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name,
1395 inode); 1486 inode, inode->i_ino, inode->i_mode);
1396} 1487}
1397 1488
1398/* 1489/*
@@ -1519,14 +1610,22 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *,
1519 struct f2fs_io_info *); 1610 struct f2fs_io_info *);
1520void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, 1611void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *,
1521 struct f2fs_io_info *); 1612 struct f2fs_io_info *);
1613void set_data_blkaddr(struct dnode_of_data *);
1522int reserve_new_block(struct dnode_of_data *); 1614int reserve_new_block(struct dnode_of_data *);
1523int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); 1615int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
1524void update_extent_cache(struct dnode_of_data *); 1616void f2fs_shrink_extent_tree(struct f2fs_sb_info *, int);
1617void f2fs_destroy_extent_tree(struct inode *);
1618void f2fs_init_extent_cache(struct inode *, struct f2fs_extent *);
1619void f2fs_update_extent_cache(struct dnode_of_data *);
1620void f2fs_preserve_extent_tree(struct inode *);
1525struct page *find_data_page(struct inode *, pgoff_t, bool); 1621struct page *find_data_page(struct inode *, pgoff_t, bool);
1526struct page *get_lock_data_page(struct inode *, pgoff_t); 1622struct page *get_lock_data_page(struct inode *, pgoff_t);
1527struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); 1623struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
1528int do_write_data_page(struct page *, struct f2fs_io_info *); 1624int do_write_data_page(struct page *, struct f2fs_io_info *);
1529int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); 1625int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
1626void init_extent_cache_info(struct f2fs_sb_info *);
1627int __init create_extent_cache(void);
1628void destroy_extent_cache(void);
1530void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); 1629void f2fs_invalidate_page(struct page *, unsigned int, unsigned int);
1531int f2fs_release_page(struct page *, gfp_t); 1630int f2fs_release_page(struct page *, gfp_t);
1532 1631
@@ -1554,7 +1653,7 @@ struct f2fs_stat_info {
1554 struct f2fs_sb_info *sbi; 1653 struct f2fs_sb_info *sbi;
1555 int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; 1654 int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
1556 int main_area_segs, main_area_sections, main_area_zones; 1655 int main_area_segs, main_area_sections, main_area_zones;
1557 int hit_ext, total_ext; 1656 int hit_ext, total_ext, ext_tree, ext_node;
1558 int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; 1657 int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
1559 int nats, dirty_nats, sits, dirty_sits, fnids; 1658 int nats, dirty_nats, sits, dirty_sits, fnids;
1560 int total_count, utilization; 1659 int total_count, utilization;
@@ -1566,7 +1665,9 @@ struct f2fs_stat_info {
1566 int dirty_count, node_pages, meta_pages; 1665 int dirty_count, node_pages, meta_pages;
1567 int prefree_count, call_count, cp_count; 1666 int prefree_count, call_count, cp_count;
1568 int tot_segs, node_segs, data_segs, free_segs, free_secs; 1667 int tot_segs, node_segs, data_segs, free_segs, free_secs;
1668 int bg_node_segs, bg_data_segs;
1569 int tot_blks, data_blks, node_blks; 1669 int tot_blks, data_blks, node_blks;
1670 int bg_data_blks, bg_node_blks;
1570 int curseg[NR_CURSEG_TYPE]; 1671 int curseg[NR_CURSEG_TYPE];
1571 int cursec[NR_CURSEG_TYPE]; 1672 int cursec[NR_CURSEG_TYPE];
1572 int curzone[NR_CURSEG_TYPE]; 1673 int curzone[NR_CURSEG_TYPE];
@@ -1615,31 +1716,36 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
1615 ((sbi)->block_count[(curseg)->alloc_type]++) 1716 ((sbi)->block_count[(curseg)->alloc_type]++)
1616#define stat_inc_inplace_blocks(sbi) \ 1717#define stat_inc_inplace_blocks(sbi) \
1617 (atomic_inc(&(sbi)->inplace_count)) 1718 (atomic_inc(&(sbi)->inplace_count))
1618#define stat_inc_seg_count(sbi, type) \ 1719#define stat_inc_seg_count(sbi, type, gc_type) \
1619 do { \ 1720 do { \
1620 struct f2fs_stat_info *si = F2FS_STAT(sbi); \ 1721 struct f2fs_stat_info *si = F2FS_STAT(sbi); \
1621 (si)->tot_segs++; \ 1722 (si)->tot_segs++; \
1622 if (type == SUM_TYPE_DATA) \ 1723 if (type == SUM_TYPE_DATA) { \
1623 si->data_segs++; \ 1724 si->data_segs++; \
1624 else \ 1725 si->bg_data_segs += (gc_type == BG_GC) ? 1 : 0; \
1726 } else { \
1625 si->node_segs++; \ 1727 si->node_segs++; \
1728 si->bg_node_segs += (gc_type == BG_GC) ? 1 : 0; \
1729 } \
1626 } while (0) 1730 } while (0)
1627 1731
1628#define stat_inc_tot_blk_count(si, blks) \ 1732#define stat_inc_tot_blk_count(si, blks) \
1629 (si->tot_blks += (blks)) 1733 (si->tot_blks += (blks))
1630 1734
1631#define stat_inc_data_blk_count(sbi, blks) \ 1735#define stat_inc_data_blk_count(sbi, blks, gc_type) \
1632 do { \ 1736 do { \
1633 struct f2fs_stat_info *si = F2FS_STAT(sbi); \ 1737 struct f2fs_stat_info *si = F2FS_STAT(sbi); \
1634 stat_inc_tot_blk_count(si, blks); \ 1738 stat_inc_tot_blk_count(si, blks); \
1635 si->data_blks += (blks); \ 1739 si->data_blks += (blks); \
1740 si->bg_data_blks += (gc_type == BG_GC) ? (blks) : 0; \
1636 } while (0) 1741 } while (0)
1637 1742
1638#define stat_inc_node_blk_count(sbi, blks) \ 1743#define stat_inc_node_blk_count(sbi, blks, gc_type) \
1639 do { \ 1744 do { \
1640 struct f2fs_stat_info *si = F2FS_STAT(sbi); \ 1745 struct f2fs_stat_info *si = F2FS_STAT(sbi); \
1641 stat_inc_tot_blk_count(si, blks); \ 1746 stat_inc_tot_blk_count(si, blks); \
1642 si->node_blks += (blks); \ 1747 si->node_blks += (blks); \
1748 si->bg_node_blks += (gc_type == BG_GC) ? (blks) : 0; \
1643 } while (0) 1749 } while (0)
1644 1750
1645int f2fs_build_stats(struct f2fs_sb_info *); 1751int f2fs_build_stats(struct f2fs_sb_info *);
@@ -1661,10 +1767,10 @@ void f2fs_destroy_root_stats(void);
1661#define stat_inc_seg_type(sbi, curseg) 1767#define stat_inc_seg_type(sbi, curseg)
1662#define stat_inc_block_count(sbi, curseg) 1768#define stat_inc_block_count(sbi, curseg)
1663#define stat_inc_inplace_blocks(sbi) 1769#define stat_inc_inplace_blocks(sbi)
1664#define stat_inc_seg_count(si, type) 1770#define stat_inc_seg_count(sbi, type, gc_type)
1665#define stat_inc_tot_blk_count(si, blks) 1771#define stat_inc_tot_blk_count(si, blks)
1666#define stat_inc_data_blk_count(si, blks) 1772#define stat_inc_data_blk_count(sbi, blks, gc_type)
1667#define stat_inc_node_blk_count(sbi, blks) 1773#define stat_inc_node_blk_count(sbi, blks, gc_type)
1668 1774
1669static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } 1775static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
1670static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } 1776static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
@@ -1688,6 +1794,7 @@ extern struct kmem_cache *inode_entry_slab;
1688 */ 1794 */
1689bool f2fs_may_inline(struct inode *); 1795bool f2fs_may_inline(struct inode *);
1690void read_inline_data(struct page *, struct page *); 1796void read_inline_data(struct page *, struct page *);
1797bool truncate_inline_inode(struct page *, u64);
1691int f2fs_read_inline_data(struct inode *, struct page *); 1798int f2fs_read_inline_data(struct inode *, struct page *);
1692int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); 1799int f2fs_convert_inline_page(struct dnode_of_data *, struct page *);
1693int f2fs_convert_inline_inode(struct inode *); 1800int f2fs_convert_inline_inode(struct inode *);
@@ -1697,7 +1804,8 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *,
1697 struct page **); 1804 struct page **);
1698struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **); 1805struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **);
1699int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); 1806int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *);
1700int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *); 1807int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *,
1808 nid_t, umode_t);
1701void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, 1809void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *,
1702 struct inode *, struct inode *); 1810 struct inode *, struct inode *);
1703bool f2fs_empty_inline_dir(struct inode *); 1811bool f2fs_empty_inline_dir(struct inode *);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index df6a0596eccf..a6f3f6186588 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -241,6 +241,8 @@ go_write:
241 * will be used only for fsynced inodes after checkpoint. 241 * will be used only for fsynced inodes after checkpoint.
242 */ 242 */
243 try_to_fix_pino(inode); 243 try_to_fix_pino(inode);
244 clear_inode_flag(fi, FI_APPEND_WRITE);
245 clear_inode_flag(fi, FI_UPDATE_WRITE);
244 goto out; 246 goto out;
245 } 247 }
246sync_nodes: 248sync_nodes:
@@ -433,8 +435,12 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
433 continue; 435 continue;
434 436
435 dn->data_blkaddr = NULL_ADDR; 437 dn->data_blkaddr = NULL_ADDR;
436 update_extent_cache(dn); 438 set_data_blkaddr(dn);
439 f2fs_update_extent_cache(dn);
437 invalidate_blocks(sbi, blkaddr); 440 invalidate_blocks(sbi, blkaddr);
441 if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
442 clear_inode_flag(F2FS_I(dn->inode),
443 FI_FIRST_BLOCK_WRITTEN);
438 nr_free++; 444 nr_free++;
439 } 445 }
440 if (nr_free) { 446 if (nr_free) {
@@ -454,15 +460,16 @@ void truncate_data_blocks(struct dnode_of_data *dn)
454 truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); 460 truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
455} 461}
456 462
457static int truncate_partial_data_page(struct inode *inode, u64 from) 463static int truncate_partial_data_page(struct inode *inode, u64 from,
464 bool force)
458{ 465{
459 unsigned offset = from & (PAGE_CACHE_SIZE - 1); 466 unsigned offset = from & (PAGE_CACHE_SIZE - 1);
460 struct page *page; 467 struct page *page;
461 468
462 if (!offset) 469 if (!offset && !force)
463 return 0; 470 return 0;
464 471
465 page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false); 472 page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, force);
466 if (IS_ERR(page)) 473 if (IS_ERR(page))
467 return 0; 474 return 0;
468 475
@@ -473,7 +480,8 @@ static int truncate_partial_data_page(struct inode *inode, u64 from)
473 480
474 f2fs_wait_on_page_writeback(page, DATA); 481 f2fs_wait_on_page_writeback(page, DATA);
475 zero_user(page, offset, PAGE_CACHE_SIZE - offset); 482 zero_user(page, offset, PAGE_CACHE_SIZE - offset);
476 set_page_dirty(page); 483 if (!force)
484 set_page_dirty(page);
477out: 485out:
478 f2fs_put_page(page, 1); 486 f2fs_put_page(page, 1);
479 return 0; 487 return 0;
@@ -487,6 +495,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
487 pgoff_t free_from; 495 pgoff_t free_from;
488 int count = 0, err = 0; 496 int count = 0, err = 0;
489 struct page *ipage; 497 struct page *ipage;
498 bool truncate_page = false;
490 499
491 trace_f2fs_truncate_blocks_enter(inode, from); 500 trace_f2fs_truncate_blocks_enter(inode, from);
492 501
@@ -502,7 +511,10 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
502 } 511 }
503 512
504 if (f2fs_has_inline_data(inode)) { 513 if (f2fs_has_inline_data(inode)) {
514 if (truncate_inline_inode(ipage, from))
515 set_page_dirty(ipage);
505 f2fs_put_page(ipage, 1); 516 f2fs_put_page(ipage, 1);
517 truncate_page = true;
506 goto out; 518 goto out;
507 } 519 }
508 520
@@ -533,7 +545,7 @@ out:
533 545
534 /* lastly zero out the first data page */ 546 /* lastly zero out the first data page */
535 if (!err) 547 if (!err)
536 err = truncate_partial_data_page(inode, from); 548 err = truncate_partial_data_page(inode, from, truncate_page);
537 549
538 trace_f2fs_truncate_blocks_exit(inode, err); 550 trace_f2fs_truncate_blocks_exit(inode, err);
539 return err; 551 return err;
@@ -997,6 +1009,9 @@ static int f2fs_ioc_release_volatile_write(struct file *filp)
997 if (!f2fs_is_volatile_file(inode)) 1009 if (!f2fs_is_volatile_file(inode))
998 return 0; 1010 return 0;
999 1011
1012 if (!f2fs_is_first_block_written(inode))
1013 return truncate_partial_data_page(inode, 0, true);
1014
1000 punch_hole(inode, 0, F2FS_BLKSIZE); 1015 punch_hole(inode, 0, F2FS_BLKSIZE);
1001 return 0; 1016 return 0;
1002} 1017}
@@ -1029,6 +1044,41 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
1029 return ret; 1044 return ret;
1030} 1045}
1031 1046
1047static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
1048{
1049 struct inode *inode = file_inode(filp);
1050 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1051 struct super_block *sb = sbi->sb;
1052 __u32 in;
1053
1054 if (!capable(CAP_SYS_ADMIN))
1055 return -EPERM;
1056
1057 if (get_user(in, (__u32 __user *)arg))
1058 return -EFAULT;
1059
1060 switch (in) {
1061 case F2FS_GOING_DOWN_FULLSYNC:
1062 sb = freeze_bdev(sb->s_bdev);
1063 if (sb && !IS_ERR(sb)) {
1064 f2fs_stop_checkpoint(sbi);
1065 thaw_bdev(sb->s_bdev, sb);
1066 }
1067 break;
1068 case F2FS_GOING_DOWN_METASYNC:
1069 /* do checkpoint only */
1070 f2fs_sync_fs(sb, 1);
1071 f2fs_stop_checkpoint(sbi);
1072 break;
1073 case F2FS_GOING_DOWN_NOSYNC:
1074 f2fs_stop_checkpoint(sbi);
1075 break;
1076 default:
1077 return -EINVAL;
1078 }
1079 return 0;
1080}
1081
1032static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) 1082static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
1033{ 1083{
1034 struct inode *inode = file_inode(filp); 1084 struct inode *inode = file_inode(filp);
@@ -1078,6 +1128,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
1078 return f2fs_ioc_release_volatile_write(filp); 1128 return f2fs_ioc_release_volatile_write(filp);
1079 case F2FS_IOC_ABORT_VOLATILE_WRITE: 1129 case F2FS_IOC_ABORT_VOLATILE_WRITE:
1080 return f2fs_ioc_abort_volatile_write(filp); 1130 return f2fs_ioc_abort_volatile_write(filp);
1131 case F2FS_IOC_SHUTDOWN:
1132 return f2fs_ioc_shutdown(filp, arg);
1081 case FITRIM: 1133 case FITRIM:
1082 return f2fs_ioc_fitrim(filp, arg); 1134 return f2fs_ioc_fitrim(filp, arg);
1083 default: 1135 default:
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 76adbc3641f1..ed58211fe79b 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -435,7 +435,7 @@ next_step:
435 set_page_dirty(node_page); 435 set_page_dirty(node_page);
436 } 436 }
437 f2fs_put_page(node_page, 1); 437 f2fs_put_page(node_page, 1);
438 stat_inc_node_blk_count(sbi, 1); 438 stat_inc_node_blk_count(sbi, 1, gc_type);
439 } 439 }
440 440
441 if (initial) { 441 if (initial) {
@@ -622,7 +622,7 @@ next_step:
622 if (IS_ERR(data_page)) 622 if (IS_ERR(data_page))
623 continue; 623 continue;
624 move_data_page(inode, data_page, gc_type); 624 move_data_page(inode, data_page, gc_type);
625 stat_inc_data_blk_count(sbi, 1); 625 stat_inc_data_blk_count(sbi, 1, gc_type);
626 } 626 }
627 } 627 }
628 628
@@ -680,7 +680,7 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
680 } 680 }
681 blk_finish_plug(&plug); 681 blk_finish_plug(&plug);
682 682
683 stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); 683 stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)), gc_type);
684 stat_inc_call_count(sbi->stat_info); 684 stat_inc_call_count(sbi->stat_info);
685 685
686 f2fs_put_page(sum_page, 1); 686 f2fs_put_page(sum_page, 1);
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 1484c00133cd..8140e4f0e538 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -21,7 +21,7 @@ bool f2fs_may_inline(struct inode *inode)
21 if (f2fs_is_atomic_file(inode)) 21 if (f2fs_is_atomic_file(inode))
22 return false; 22 return false;
23 23
24 if (!S_ISREG(inode->i_mode)) 24 if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))
25 return false; 25 return false;
26 26
27 if (i_size_read(inode) > MAX_INLINE_DATA) 27 if (i_size_read(inode) > MAX_INLINE_DATA)
@@ -50,10 +50,19 @@ void read_inline_data(struct page *page, struct page *ipage)
50 SetPageUptodate(page); 50 SetPageUptodate(page);
51} 51}
52 52
53static void truncate_inline_data(struct page *ipage) 53bool truncate_inline_inode(struct page *ipage, u64 from)
54{ 54{
55 void *addr;
56
57 if (from >= MAX_INLINE_DATA)
58 return false;
59
60 addr = inline_data_addr(ipage);
61
55 f2fs_wait_on_page_writeback(ipage, NODE); 62 f2fs_wait_on_page_writeback(ipage, NODE);
56 memset(inline_data_addr(ipage), 0, MAX_INLINE_DATA); 63 memset(addr + from, 0, MAX_INLINE_DATA - from);
64
65 return true;
57} 66}
58 67
59int f2fs_read_inline_data(struct inode *inode, struct page *page) 68int f2fs_read_inline_data(struct inode *inode, struct page *page)
@@ -122,7 +131,8 @@ no_update:
122 set_page_writeback(page); 131 set_page_writeback(page);
123 fio.blk_addr = dn->data_blkaddr; 132 fio.blk_addr = dn->data_blkaddr;
124 write_data_page(page, dn, &fio); 133 write_data_page(page, dn, &fio);
125 update_extent_cache(dn); 134 set_data_blkaddr(dn);
135 f2fs_update_extent_cache(dn);
126 f2fs_wait_on_page_writeback(page, DATA); 136 f2fs_wait_on_page_writeback(page, DATA);
127 if (dirty) 137 if (dirty)
128 inode_dec_dirty_pages(dn->inode); 138 inode_dec_dirty_pages(dn->inode);
@@ -131,7 +141,7 @@ no_update:
131 set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE); 141 set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE);
132 142
133 /* clear inline data and flag after data writeback */ 143 /* clear inline data and flag after data writeback */
134 truncate_inline_data(dn->inode_page); 144 truncate_inline_inode(dn->inode_page, 0);
135clear_out: 145clear_out:
136 stat_dec_inline_inode(dn->inode); 146 stat_dec_inline_inode(dn->inode);
137 f2fs_clear_inline_inode(dn->inode); 147 f2fs_clear_inline_inode(dn->inode);
@@ -245,7 +255,7 @@ process_inline:
245 if (f2fs_has_inline_data(inode)) { 255 if (f2fs_has_inline_data(inode)) {
246 ipage = get_node_page(sbi, inode->i_ino); 256 ipage = get_node_page(sbi, inode->i_ino);
247 f2fs_bug_on(sbi, IS_ERR(ipage)); 257 f2fs_bug_on(sbi, IS_ERR(ipage));
248 truncate_inline_data(ipage); 258 truncate_inline_inode(ipage, 0);
249 f2fs_clear_inline_inode(inode); 259 f2fs_clear_inline_inode(inode);
250 update_inode(inode, ipage); 260 update_inode(inode, ipage);
251 f2fs_put_page(ipage, 1); 261 f2fs_put_page(ipage, 1);
@@ -363,7 +373,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
363 set_page_dirty(page); 373 set_page_dirty(page);
364 374
365 /* clear inline dir and flag after data writeback */ 375 /* clear inline dir and flag after data writeback */
366 truncate_inline_data(ipage); 376 truncate_inline_inode(ipage, 0);
367 377
368 stat_dec_inline_dir(dir); 378 stat_dec_inline_dir(dir);
369 clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); 379 clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY);
@@ -380,21 +390,18 @@ out:
380} 390}
381 391
382int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, 392int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
383 struct inode *inode) 393 struct inode *inode, nid_t ino, umode_t mode)
384{ 394{
385 struct f2fs_sb_info *sbi = F2FS_I_SB(dir); 395 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
386 struct page *ipage; 396 struct page *ipage;
387 unsigned int bit_pos; 397 unsigned int bit_pos;
388 f2fs_hash_t name_hash; 398 f2fs_hash_t name_hash;
389 struct f2fs_dir_entry *de;
390 size_t namelen = name->len; 399 size_t namelen = name->len;
391 struct f2fs_inline_dentry *dentry_blk = NULL; 400 struct f2fs_inline_dentry *dentry_blk = NULL;
401 struct f2fs_dentry_ptr d;
392 int slots = GET_DENTRY_SLOTS(namelen); 402 int slots = GET_DENTRY_SLOTS(namelen);
393 struct page *page; 403 struct page *page = NULL;
394 int err = 0; 404 int err = 0;
395 int i;
396
397 name_hash = f2fs_dentry_hash(name);
398 405
399 ipage = get_node_page(sbi, dir->i_ino); 406 ipage = get_node_page(sbi, dir->i_ino);
400 if (IS_ERR(ipage)) 407 if (IS_ERR(ipage))
@@ -410,32 +417,34 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
410 goto out; 417 goto out;
411 } 418 }
412 419
413 down_write(&F2FS_I(inode)->i_sem); 420 if (inode) {
414 page = init_inode_metadata(inode, dir, name, ipage); 421 down_write(&F2FS_I(inode)->i_sem);
415 if (IS_ERR(page)) { 422 page = init_inode_metadata(inode, dir, name, ipage);
416 err = PTR_ERR(page); 423 if (IS_ERR(page)) {
417 goto fail; 424 err = PTR_ERR(page);
425 goto fail;
426 }
418 } 427 }
419 428
420 f2fs_wait_on_page_writeback(ipage, NODE); 429 f2fs_wait_on_page_writeback(ipage, NODE);
421 de = &dentry_blk->dentry[bit_pos]; 430
422 de->hash_code = name_hash; 431 name_hash = f2fs_dentry_hash(name);
423 de->name_len = cpu_to_le16(namelen); 432 make_dentry_ptr(&d, (void *)dentry_blk, 2);
424 memcpy(dentry_blk->filename[bit_pos], name->name, name->len); 433 f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos);
425 de->ino = cpu_to_le32(inode->i_ino); 434
426 set_de_type(de, inode);
427 for (i = 0; i < slots; i++)
428 test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
429 set_page_dirty(ipage); 435 set_page_dirty(ipage);
430 436
431 /* we don't need to mark_inode_dirty now */ 437 /* we don't need to mark_inode_dirty now */
432 F2FS_I(inode)->i_pino = dir->i_ino; 438 if (inode) {
433 update_inode(inode, page); 439 F2FS_I(inode)->i_pino = dir->i_ino;
434 f2fs_put_page(page, 1); 440 update_inode(inode, page);
441 f2fs_put_page(page, 1);
442 }
435 443
436 update_parent_metadata(dir, inode, 0); 444 update_parent_metadata(dir, inode, 0);
437fail: 445fail:
438 up_write(&F2FS_I(inode)->i_sem); 446 if (inode)
447 up_write(&F2FS_I(inode)->i_sem);
439 448
440 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { 449 if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
441 update_inode(dir, ipage); 450 update_inode(dir, ipage);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 2d002e3738a7..e622ec95409e 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -51,6 +51,15 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
51 } 51 }
52} 52}
53 53
54static bool __written_first_block(struct f2fs_inode *ri)
55{
56 block_t addr = le32_to_cpu(ri->i_addr[0]);
57
58 if (addr != NEW_ADDR && addr != NULL_ADDR)
59 return true;
60 return false;
61}
62
54static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) 63static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
55{ 64{
56 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 65 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
@@ -130,7 +139,8 @@ static int do_read_inode(struct inode *inode)
130 fi->i_pino = le32_to_cpu(ri->i_pino); 139 fi->i_pino = le32_to_cpu(ri->i_pino);
131 fi->i_dir_level = ri->i_dir_level; 140 fi->i_dir_level = ri->i_dir_level;
132 141
133 get_extent_info(&fi->ext, ri->i_ext); 142 f2fs_init_extent_cache(inode, &ri->i_ext);
143
134 get_inline_info(fi, ri); 144 get_inline_info(fi, ri);
135 145
136 /* check data exist */ 146 /* check data exist */
@@ -140,6 +150,9 @@ static int do_read_inode(struct inode *inode)
140 /* get rdev by using inline_info */ 150 /* get rdev by using inline_info */
141 __get_inode_rdev(inode, ri); 151 __get_inode_rdev(inode, ri);
142 152
153 if (__written_first_block(ri))
154 set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
155
143 f2fs_put_page(node_page, 1); 156 f2fs_put_page(node_page, 1);
144 157
145 stat_inc_inline_inode(inode); 158 stat_inc_inline_inode(inode);
@@ -220,7 +233,11 @@ void update_inode(struct inode *inode, struct page *node_page)
220 ri->i_links = cpu_to_le32(inode->i_nlink); 233 ri->i_links = cpu_to_le32(inode->i_nlink);
221 ri->i_size = cpu_to_le64(i_size_read(inode)); 234 ri->i_size = cpu_to_le64(i_size_read(inode));
222 ri->i_blocks = cpu_to_le64(inode->i_blocks); 235 ri->i_blocks = cpu_to_le64(inode->i_blocks);
236
237 read_lock(&F2FS_I(inode)->ext_lock);
223 set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext); 238 set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext);
239 read_unlock(&F2FS_I(inode)->ext_lock);
240
224 set_raw_inline(F2FS_I(inode), ri); 241 set_raw_inline(F2FS_I(inode), ri);
225 242
226 ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); 243 ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
@@ -328,6 +345,12 @@ void f2fs_evict_inode(struct inode *inode)
328no_delete: 345no_delete:
329 stat_dec_inline_dir(inode); 346 stat_dec_inline_dir(inode);
330 stat_dec_inline_inode(inode); 347 stat_dec_inline_inode(inode);
348
349 /* update extent info in inode */
350 if (inode->i_nlink)
351 f2fs_preserve_extent_tree(inode);
352 f2fs_destroy_extent_tree(inode);
353
331 invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); 354 invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
332 if (xnid) 355 if (xnid)
333 invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); 356 invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index e79639a9787a..407dde3d7a92 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -14,6 +14,7 @@
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/ctype.h> 15#include <linux/ctype.h>
16#include <linux/dcache.h> 16#include <linux/dcache.h>
17#include <linux/namei.h>
17 18
18#include "f2fs.h" 19#include "f2fs.h"
19#include "node.h" 20#include "node.h"
@@ -187,6 +188,44 @@ struct dentry *f2fs_get_parent(struct dentry *child)
187 return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino)); 188 return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino));
188} 189}
189 190
191static int __recover_dot_dentries(struct inode *dir, nid_t pino)
192{
193 struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
194 struct qstr dot = QSTR_INIT(".", 1);
195 struct qstr dotdot = QSTR_INIT("..", 2);
196 struct f2fs_dir_entry *de;
197 struct page *page;
198 int err = 0;
199
200 f2fs_lock_op(sbi);
201
202 de = f2fs_find_entry(dir, &dot, &page);
203 if (de) {
204 f2fs_dentry_kunmap(dir, page);
205 f2fs_put_page(page, 0);
206 } else {
207 err = __f2fs_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR);
208 if (err)
209 goto out;
210 }
211
212 de = f2fs_find_entry(dir, &dotdot, &page);
213 if (de) {
214 f2fs_dentry_kunmap(dir, page);
215 f2fs_put_page(page, 0);
216 } else {
217 err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR);
218 }
219out:
220 if (!err) {
221 clear_inode_flag(F2FS_I(dir), FI_INLINE_DOTS);
222 mark_inode_dirty(dir);
223 }
224
225 f2fs_unlock_op(sbi);
226 return err;
227}
228
190static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, 229static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
191 unsigned int flags) 230 unsigned int flags)
192{ 231{
@@ -206,6 +245,16 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
206 inode = f2fs_iget(dir->i_sb, ino); 245 inode = f2fs_iget(dir->i_sb, ino);
207 if (IS_ERR(inode)) 246 if (IS_ERR(inode))
208 return ERR_CAST(inode); 247 return ERR_CAST(inode);
248
249 if (f2fs_has_inline_dots(inode)) {
250 int err;
251
252 err = __recover_dot_dentries(inode, dir->i_ino);
253 if (err) {
254 iget_failed(inode);
255 return ERR_PTR(err);
256 }
257 }
209 } 258 }
210 259
211 return d_splice_alias(inode, dentry); 260 return d_splice_alias(inode, dentry);
@@ -247,6 +296,23 @@ fail:
247 return err; 296 return err;
248} 297}
249 298
299static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd)
300{
301 struct page *page;
302
303 page = page_follow_link_light(dentry, nd);
304 if (IS_ERR(page))
305 return page;
306
307 /* this is broken symlink case */
308 if (*nd_get_link(nd) == 0) {
309 kunmap(page);
310 page_cache_release(page);
311 return ERR_PTR(-ENOENT);
312 }
313 return page;
314}
315
250static int f2fs_symlink(struct inode *dir, struct dentry *dentry, 316static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
251 const char *symname) 317 const char *symname)
252{ 318{
@@ -276,6 +342,17 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
276 d_instantiate(dentry, inode); 342 d_instantiate(dentry, inode);
277 unlock_new_inode(inode); 343 unlock_new_inode(inode);
278 344
345 /*
346 * Let's flush symlink data in order to avoid broken symlink as much as
347 * possible. Nevertheless, fsyncing is the best way, but there is no
348 * way to get a file descriptor in order to flush that.
349 *
350 * Note that, it needs to do dir->fsync to make this recoverable.
351 * If the symlink path is stored into inline_data, there is no
352 * performance regression.
353 */
354 filemap_write_and_wait_range(inode->i_mapping, 0, symlen - 1);
355
279 if (IS_DIRSYNC(dir)) 356 if (IS_DIRSYNC(dir))
280 f2fs_sync_fs(sbi->sb, 1); 357 f2fs_sync_fs(sbi->sb, 1);
281 return err; 358 return err;
@@ -693,6 +770,8 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
693 f2fs_unlock_op(sbi); 770 f2fs_unlock_op(sbi);
694 771
695 alloc_nid_done(sbi, inode->i_ino); 772 alloc_nid_done(sbi, inode->i_ino);
773
774 stat_inc_inline_inode(inode);
696 d_tmpfile(dentry, inode); 775 d_tmpfile(dentry, inode);
697 unlock_new_inode(inode); 776 unlock_new_inode(inode);
698 return 0; 777 return 0;
@@ -729,7 +808,7 @@ const struct inode_operations f2fs_dir_inode_operations = {
729 808
730const struct inode_operations f2fs_symlink_inode_operations = { 809const struct inode_operations f2fs_symlink_inode_operations = {
731 .readlink = generic_readlink, 810 .readlink = generic_readlink,
732 .follow_link = page_follow_link_light, 811 .follow_link = f2fs_follow_link,
733 .put_link = page_put_link, 812 .put_link = page_put_link,
734 .getattr = f2fs_getattr, 813 .getattr = f2fs_getattr,
735 .setattr = f2fs_setattr, 814 .setattr = f2fs_setattr,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 97bd9d3db882..8ab0cf1930bd 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -41,7 +41,9 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
41 /* only uses low memory */ 41 /* only uses low memory */
42 avail_ram = val.totalram - val.totalhigh; 42 avail_ram = val.totalram - val.totalhigh;
43 43
44 /* give 25%, 25%, 50%, 50% memory for each components respectively */ 44 /*
45 * give 25%, 25%, 50%, 50%, 50% memory for each components respectively
46 */
45 if (type == FREE_NIDS) { 47 if (type == FREE_NIDS) {
46 mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 48 mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
47 PAGE_CACHE_SHIFT; 49 PAGE_CACHE_SHIFT;
@@ -62,6 +64,11 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
62 mem_size += (sbi->im[i].ino_num * 64 mem_size += (sbi->im[i].ino_num *
63 sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; 65 sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
64 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); 66 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
67 } else if (type == EXTENT_CACHE) {
68 mem_size = (sbi->total_ext_tree * sizeof(struct extent_tree) +
69 atomic_read(&sbi->total_ext_node) *
70 sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT;
71 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
65 } else { 72 } else {
66 if (sbi->sb->s_bdi->dirty_exceeded) 73 if (sbi->sb->s_bdi->dirty_exceeded)
67 return false; 74 return false;
@@ -494,7 +501,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
494 501
495 /* if inline_data is set, should not report any block indices */ 502 /* if inline_data is set, should not report any block indices */
496 if (f2fs_has_inline_data(dn->inode) && index) { 503 if (f2fs_has_inline_data(dn->inode) && index) {
497 err = -EINVAL; 504 err = -ENOENT;
498 f2fs_put_page(npage[0], 1); 505 f2fs_put_page(npage[0], 1);
499 goto release_out; 506 goto release_out;
500 } 507 }
@@ -995,6 +1002,7 @@ static int read_node_page(struct page *page, int rw)
995 get_node_info(sbi, page->index, &ni); 1002 get_node_info(sbi, page->index, &ni);
996 1003
997 if (unlikely(ni.blk_addr == NULL_ADDR)) { 1004 if (unlikely(ni.blk_addr == NULL_ADDR)) {
1005 ClearPageUptodate(page);
998 f2fs_put_page(page, 1); 1006 f2fs_put_page(page, 1);
999 return -ENOENT; 1007 return -ENOENT;
1000 } 1008 }
@@ -1306,6 +1314,7 @@ static int f2fs_write_node_page(struct page *page,
1306 1314
1307 /* This page is already truncated */ 1315 /* This page is already truncated */
1308 if (unlikely(ni.blk_addr == NULL_ADDR)) { 1316 if (unlikely(ni.blk_addr == NULL_ADDR)) {
1317 ClearPageUptodate(page);
1309 dec_page_count(sbi, F2FS_DIRTY_NODES); 1318 dec_page_count(sbi, F2FS_DIRTY_NODES);
1310 unlock_page(page); 1319 unlock_page(page);
1311 return 0; 1320 return 0;
@@ -1821,6 +1830,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
1821 struct f2fs_nat_block *nat_blk; 1830 struct f2fs_nat_block *nat_blk;
1822 struct nat_entry *ne, *cur; 1831 struct nat_entry *ne, *cur;
1823 struct page *page = NULL; 1832 struct page *page = NULL;
1833 struct f2fs_nm_info *nm_i = NM_I(sbi);
1824 1834
1825 /* 1835 /*
1826 * there are two steps to flush nat entries: 1836 * there are two steps to flush nat entries:
@@ -1874,7 +1884,9 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
1874 1884
1875 f2fs_bug_on(sbi, set->entry_cnt); 1885 f2fs_bug_on(sbi, set->entry_cnt);
1876 1886
1887 down_write(&nm_i->nat_tree_lock);
1877 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); 1888 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
1889 up_write(&nm_i->nat_tree_lock);
1878 kmem_cache_free(nat_entry_set_slab, set); 1890 kmem_cache_free(nat_entry_set_slab, set);
1879} 1891}
1880 1892
@@ -1902,6 +1914,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1902 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) 1914 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
1903 remove_nats_in_journal(sbi); 1915 remove_nats_in_journal(sbi);
1904 1916
1917 down_write(&nm_i->nat_tree_lock);
1905 while ((found = __gang_lookup_nat_set(nm_i, 1918 while ((found = __gang_lookup_nat_set(nm_i,
1906 set_idx, SETVEC_SIZE, setvec))) { 1919 set_idx, SETVEC_SIZE, setvec))) {
1907 unsigned idx; 1920 unsigned idx;
@@ -1910,6 +1923,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1910 __adjust_nat_entry_set(setvec[idx], &sets, 1923 __adjust_nat_entry_set(setvec[idx], &sets,
1911 MAX_NAT_JENTRIES(sum)); 1924 MAX_NAT_JENTRIES(sum));
1912 } 1925 }
1926 up_write(&nm_i->nat_tree_lock);
1913 1927
1914 /* flush dirty nats in nat entry set */ 1928 /* flush dirty nats in nat entry set */
1915 list_for_each_entry_safe(set, tmp, &sets, set_list) 1929 list_for_each_entry_safe(set, tmp, &sets, set_list)
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index f405bbf2435a..c56026f1725c 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -120,6 +120,7 @@ enum mem_type {
120 NAT_ENTRIES, /* indicates the cached nat entry */ 120 NAT_ENTRIES, /* indicates the cached nat entry */
121 DIRTY_DENTS, /* indicates dirty dentry pages */ 121 DIRTY_DENTS, /* indicates dirty dentry pages */
122 INO_ENTRIES, /* indicates inode entries */ 122 INO_ENTRIES, /* indicates inode entries */
123 EXTENT_CACHE, /* indicates extent cache */
123 BASE_CHECK, /* check kernel status */ 124 BASE_CHECK, /* check kernel status */
124}; 125};
125 126
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 41afb9534bbd..8d8ea99f2156 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -93,10 +93,9 @@ static int recover_dentry(struct inode *inode, struct page *ipage)
93 } 93 }
94retry: 94retry:
95 de = f2fs_find_entry(dir, &name, &page); 95 de = f2fs_find_entry(dir, &name, &page);
96 if (de && inode->i_ino == le32_to_cpu(de->ino)) { 96 if (de && inode->i_ino == le32_to_cpu(de->ino))
97 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
98 goto out_unmap_put; 97 goto out_unmap_put;
99 } 98
100 if (de) { 99 if (de) {
101 einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); 100 einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
102 if (IS_ERR(einode)) { 101 if (IS_ERR(einode)) {
@@ -115,7 +114,7 @@ retry:
115 iput(einode); 114 iput(einode);
116 goto retry; 115 goto retry;
117 } 116 }
118 err = __f2fs_add_link(dir, &name, inode); 117 err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode);
119 if (err) 118 if (err)
120 goto out_err; 119 goto out_err;
121 120
@@ -187,11 +186,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
187 goto next; 186 goto next;
188 187
189 entry = get_fsync_inode(head, ino_of_node(page)); 188 entry = get_fsync_inode(head, ino_of_node(page));
190 if (entry) { 189 if (!entry) {
191 if (IS_INODE(page) && is_dent_dnode(page))
192 set_inode_flag(F2FS_I(entry->inode),
193 FI_INC_LINK);
194 } else {
195 if (IS_INODE(page) && is_dent_dnode(page)) { 190 if (IS_INODE(page) && is_dent_dnode(page)) {
196 err = recover_inode_page(sbi, page); 191 err = recover_inode_page(sbi, page);
197 if (err) 192 if (err)
@@ -212,8 +207,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
212 if (IS_ERR(entry->inode)) { 207 if (IS_ERR(entry->inode)) {
213 err = PTR_ERR(entry->inode); 208 err = PTR_ERR(entry->inode);
214 kmem_cache_free(fsync_entry_slab, entry); 209 kmem_cache_free(fsync_entry_slab, entry);
215 if (err == -ENOENT) 210 if (err == -ENOENT) {
211 err = 0;
216 goto next; 212 goto next;
213 }
217 break; 214 break;
218 } 215 }
219 list_add_tail(&entry->list, head); 216 list_add_tail(&entry->list, head);
@@ -256,6 +253,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
256 struct f2fs_summary_block *sum_node; 253 struct f2fs_summary_block *sum_node;
257 struct f2fs_summary sum; 254 struct f2fs_summary sum;
258 struct page *sum_page, *node_page; 255 struct page *sum_page, *node_page;
256 struct dnode_of_data tdn = *dn;
259 nid_t ino, nid; 257 nid_t ino, nid;
260 struct inode *inode; 258 struct inode *inode;
261 unsigned int offset; 259 unsigned int offset;
@@ -283,17 +281,15 @@ got_it:
283 /* Use the locked dnode page and inode */ 281 /* Use the locked dnode page and inode */
284 nid = le32_to_cpu(sum.nid); 282 nid = le32_to_cpu(sum.nid);
285 if (dn->inode->i_ino == nid) { 283 if (dn->inode->i_ino == nid) {
286 struct dnode_of_data tdn = *dn;
287 tdn.nid = nid; 284 tdn.nid = nid;
285 if (!dn->inode_page_locked)
286 lock_page(dn->inode_page);
288 tdn.node_page = dn->inode_page; 287 tdn.node_page = dn->inode_page;
289 tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); 288 tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
290 truncate_data_blocks_range(&tdn, 1); 289 goto truncate_out;
291 return 0;
292 } else if (dn->nid == nid) { 290 } else if (dn->nid == nid) {
293 struct dnode_of_data tdn = *dn;
294 tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); 291 tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
295 truncate_data_blocks_range(&tdn, 1); 292 goto truncate_out;
296 return 0;
297 } 293 }
298 294
299 /* Get the node page */ 295 /* Get the node page */
@@ -317,18 +313,33 @@ got_it:
317 bidx = start_bidx_of_node(offset, F2FS_I(inode)) + 313 bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
318 le16_to_cpu(sum.ofs_in_node); 314 le16_to_cpu(sum.ofs_in_node);
319 315
320 if (ino != dn->inode->i_ino) { 316 /*
321 truncate_hole(inode, bidx, bidx + 1); 317 * if inode page is locked, unlock temporarily, but its reference
318 * count keeps alive.
319 */
320 if (ino == dn->inode->i_ino && dn->inode_page_locked)
321 unlock_page(dn->inode_page);
322
323 set_new_dnode(&tdn, inode, NULL, NULL, 0);
324 if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
325 goto out;
326
327 if (tdn.data_blkaddr == blkaddr)
328 truncate_data_blocks_range(&tdn, 1);
329
330 f2fs_put_dnode(&tdn);
331out:
332 if (ino != dn->inode->i_ino)
322 iput(inode); 333 iput(inode);
323 } else { 334 else if (dn->inode_page_locked)
324 struct dnode_of_data tdn; 335 lock_page(dn->inode_page);
325 set_new_dnode(&tdn, inode, dn->inode_page, NULL, 0); 336 return 0;
326 if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) 337
327 return 0; 338truncate_out:
328 if (tdn.data_blkaddr != NULL_ADDR) 339 if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
329 truncate_data_blocks_range(&tdn, 1); 340 truncate_data_blocks_range(&tdn, 1);
330 f2fs_put_page(tdn.node_page, 1); 341 if (dn->inode->i_ino == nid && !dn->inode_page_locked)
331 } 342 unlock_page(dn->inode_page);
332 return 0; 343 return 0;
333} 344}
334 345
@@ -384,7 +395,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
384 src = datablock_addr(dn.node_page, dn.ofs_in_node); 395 src = datablock_addr(dn.node_page, dn.ofs_in_node);
385 dest = datablock_addr(page, dn.ofs_in_node); 396 dest = datablock_addr(page, dn.ofs_in_node);
386 397
387 if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) { 398 if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR &&
399 dest >= MAIN_BLKADDR(sbi) && dest < MAX_BLKADDR(sbi)) {
400
388 if (src == NULL_ADDR) { 401 if (src == NULL_ADDR) {
389 err = reserve_new_block(&dn); 402 err = reserve_new_block(&dn);
390 /* We should not get -ENOSPC */ 403 /* We should not get -ENOSPC */
@@ -401,14 +414,13 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
401 /* write dummy data page */ 414 /* write dummy data page */
402 recover_data_page(sbi, NULL, &sum, src, dest); 415 recover_data_page(sbi, NULL, &sum, src, dest);
403 dn.data_blkaddr = dest; 416 dn.data_blkaddr = dest;
404 update_extent_cache(&dn); 417 set_data_blkaddr(&dn);
418 f2fs_update_extent_cache(&dn);
405 recovered++; 419 recovered++;
406 } 420 }
407 dn.ofs_in_node++; 421 dn.ofs_in_node++;
408 } 422 }
409 423
410 /* write node page in place */
411 set_summary(&sum, dn.nid, 0, 0);
412 if (IS_INODE(dn.node_page)) 424 if (IS_INODE(dn.node_page))
413 sync_inode_page(&dn); 425 sync_inode_page(&dn);
414 426
@@ -552,7 +564,7 @@ out:
552 mutex_unlock(&sbi->cp_mutex); 564 mutex_unlock(&sbi->cp_mutex);
553 } else if (need_writecp) { 565 } else if (need_writecp) {
554 struct cp_control cpc = { 566 struct cp_control cpc = {
555 .reason = CP_SYNC, 567 .reason = CP_RECOVERY,
556 }; 568 };
557 mutex_unlock(&sbi->cp_mutex); 569 mutex_unlock(&sbi->cp_mutex);
558 write_checkpoint(sbi, &cpc); 570 write_checkpoint(sbi, &cpc);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index daee4ab913da..f939660941bb 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -205,6 +205,8 @@ retry:
205 list_add_tail(&new->list, &fi->inmem_pages); 205 list_add_tail(&new->list, &fi->inmem_pages);
206 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); 206 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
207 mutex_unlock(&fi->inmem_lock); 207 mutex_unlock(&fi->inmem_lock);
208
209 trace_f2fs_register_inmem_page(page, INMEM);
208} 210}
209 211
210void commit_inmem_pages(struct inode *inode, bool abort) 212void commit_inmem_pages(struct inode *inode, bool abort)
@@ -238,11 +240,13 @@ void commit_inmem_pages(struct inode *inode, bool abort)
238 f2fs_wait_on_page_writeback(cur->page, DATA); 240 f2fs_wait_on_page_writeback(cur->page, DATA);
239 if (clear_page_dirty_for_io(cur->page)) 241 if (clear_page_dirty_for_io(cur->page))
240 inode_dec_dirty_pages(inode); 242 inode_dec_dirty_pages(inode);
243 trace_f2fs_commit_inmem_page(cur->page, INMEM);
241 do_write_data_page(cur->page, &fio); 244 do_write_data_page(cur->page, &fio);
242 submit_bio = true; 245 submit_bio = true;
243 } 246 }
244 f2fs_put_page(cur->page, 1); 247 f2fs_put_page(cur->page, 1);
245 } else { 248 } else {
249 trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP);
246 put_page(cur->page); 250 put_page(cur->page);
247 } 251 }
248 radix_tree_delete(&fi->inmem_root, cur->page->index); 252 radix_tree_delete(&fi->inmem_root, cur->page->index);
@@ -277,6 +281,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi)
277 281
278void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) 282void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
279{ 283{
284 /* try to shrink extent cache when there is no enough memory */
285 f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
286
280 /* check the # of cached NAT entries and prefree segments */ 287 /* check the # of cached NAT entries and prefree segments */
281 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || 288 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
282 excess_prefree_segs(sbi) || 289 excess_prefree_segs(sbi) ||
@@ -549,7 +556,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
549 556
550 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); 557 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
551 558
552 if (end - start < cpc->trim_minlen) 559 if (force && end - start < cpc->trim_minlen)
553 continue; 560 continue;
554 561
555 __add_discard_entry(sbi, cpc, start, end); 562 __add_discard_entry(sbi, cpc, start, end);
@@ -1164,6 +1171,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1164 curseg = CURSEG_I(sbi, type); 1171 curseg = CURSEG_I(sbi, type);
1165 1172
1166 mutex_lock(&curseg->curseg_mutex); 1173 mutex_lock(&curseg->curseg_mutex);
1174 mutex_lock(&sit_i->sentry_lock);
1167 1175
1168 /* direct_io'ed data is aligned to the segment for better performance */ 1176 /* direct_io'ed data is aligned to the segment for better performance */
1169 if (direct_io && curseg->next_blkoff) 1177 if (direct_io && curseg->next_blkoff)
@@ -1178,7 +1186,6 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1178 */ 1186 */
1179 __add_sum_entry(sbi, type, sum); 1187 __add_sum_entry(sbi, type, sum);
1180 1188
1181 mutex_lock(&sit_i->sentry_lock);
1182 __refresh_next_blkoff(sbi, curseg); 1189 __refresh_next_blkoff(sbi, curseg);
1183 1190
1184 stat_inc_block_count(sbi, curseg); 1191 stat_inc_block_count(sbi, curseg);
@@ -1730,6 +1737,9 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1730 mutex_lock(&curseg->curseg_mutex); 1737 mutex_lock(&curseg->curseg_mutex);
1731 mutex_lock(&sit_i->sentry_lock); 1738 mutex_lock(&sit_i->sentry_lock);
1732 1739
1740 if (!sit_i->dirty_sentries)
1741 goto out;
1742
1733 /* 1743 /*
1734 * add and account sit entries of dirty bitmap in sit entry 1744 * add and account sit entries of dirty bitmap in sit entry
1735 * set temporarily 1745 * set temporarily
@@ -1744,9 +1754,6 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1744 if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL)) 1754 if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL))
1745 remove_sits_in_journal(sbi); 1755 remove_sits_in_journal(sbi);
1746 1756
1747 if (!sit_i->dirty_sentries)
1748 goto out;
1749
1750 /* 1757 /*
1751 * there are two steps to flush sit entries: 1758 * there are two steps to flush sit entries:
1752 * #1, flush sit entries to journal in current cold data summary block. 1759 * #1, flush sit entries to journal in current cold data summary block.
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 7fd35111cf62..85d7fa7514b2 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -336,7 +336,8 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
336 clear_bit(segno, free_i->free_segmap); 336 clear_bit(segno, free_i->free_segmap);
337 free_i->free_segments++; 337 free_i->free_segments++;
338 338
339 next = find_next_bit(free_i->free_segmap, MAIN_SEGS(sbi), start_segno); 339 next = find_next_bit(free_i->free_segmap,
340 start_segno + sbi->segs_per_sec, start_segno);
340 if (next >= start_segno + sbi->segs_per_sec) { 341 if (next >= start_segno + sbi->segs_per_sec) {
341 clear_bit(secno, free_i->free_secmap); 342 clear_bit(secno, free_i->free_secmap);
342 free_i->free_sections++; 343 free_i->free_sections++;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f2fe666a6ea9..160b88346b24 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -57,6 +57,8 @@ enum {
57 Opt_flush_merge, 57 Opt_flush_merge,
58 Opt_nobarrier, 58 Opt_nobarrier,
59 Opt_fastboot, 59 Opt_fastboot,
60 Opt_extent_cache,
61 Opt_noinline_data,
60 Opt_err, 62 Opt_err,
61}; 63};
62 64
@@ -78,6 +80,8 @@ static match_table_t f2fs_tokens = {
78 {Opt_flush_merge, "flush_merge"}, 80 {Opt_flush_merge, "flush_merge"},
79 {Opt_nobarrier, "nobarrier"}, 81 {Opt_nobarrier, "nobarrier"},
80 {Opt_fastboot, "fastboot"}, 82 {Opt_fastboot, "fastboot"},
83 {Opt_extent_cache, "extent_cache"},
84 {Opt_noinline_data, "noinline_data"},
81 {Opt_err, NULL}, 85 {Opt_err, NULL},
82}; 86};
83 87
@@ -367,6 +371,12 @@ static int parse_options(struct super_block *sb, char *options)
367 case Opt_fastboot: 371 case Opt_fastboot:
368 set_opt(sbi, FASTBOOT); 372 set_opt(sbi, FASTBOOT);
369 break; 373 break;
374 case Opt_extent_cache:
375 set_opt(sbi, EXTENT_CACHE);
376 break;
377 case Opt_noinline_data:
378 clear_opt(sbi, INLINE_DATA);
379 break;
370 default: 380 default:
371 f2fs_msg(sb, KERN_ERR, 381 f2fs_msg(sb, KERN_ERR,
372 "Unrecognized mount option \"%s\" or missing value", 382 "Unrecognized mount option \"%s\" or missing value",
@@ -392,7 +402,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
392 atomic_set(&fi->dirty_pages, 0); 402 atomic_set(&fi->dirty_pages, 0);
393 fi->i_current_depth = 1; 403 fi->i_current_depth = 1;
394 fi->i_advise = 0; 404 fi->i_advise = 0;
395 rwlock_init(&fi->ext.ext_lock); 405 rwlock_init(&fi->ext_lock);
396 init_rwsem(&fi->i_sem); 406 init_rwsem(&fi->i_sem);
397 INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS); 407 INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS);
398 INIT_LIST_HEAD(&fi->inmem_pages); 408 INIT_LIST_HEAD(&fi->inmem_pages);
@@ -591,6 +601,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
591 seq_puts(seq, ",disable_ext_identify"); 601 seq_puts(seq, ",disable_ext_identify");
592 if (test_opt(sbi, INLINE_DATA)) 602 if (test_opt(sbi, INLINE_DATA))
593 seq_puts(seq, ",inline_data"); 603 seq_puts(seq, ",inline_data");
604 else
605 seq_puts(seq, ",noinline_data");
594 if (test_opt(sbi, INLINE_DENTRY)) 606 if (test_opt(sbi, INLINE_DENTRY))
595 seq_puts(seq, ",inline_dentry"); 607 seq_puts(seq, ",inline_dentry");
596 if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) 608 if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
@@ -599,6 +611,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
599 seq_puts(seq, ",nobarrier"); 611 seq_puts(seq, ",nobarrier");
600 if (test_opt(sbi, FASTBOOT)) 612 if (test_opt(sbi, FASTBOOT))
601 seq_puts(seq, ",fastboot"); 613 seq_puts(seq, ",fastboot");
614 if (test_opt(sbi, EXTENT_CACHE))
615 seq_puts(seq, ",extent_cache");
602 seq_printf(seq, ",active_logs=%u", sbi->active_logs); 616 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
603 617
604 return 0; 618 return 0;
@@ -959,7 +973,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
959 struct buffer_head *raw_super_buf; 973 struct buffer_head *raw_super_buf;
960 struct inode *root; 974 struct inode *root;
961 long err = -EINVAL; 975 long err = -EINVAL;
962 bool retry = true; 976 bool retry = true, need_fsck = false;
963 char *options = NULL; 977 char *options = NULL;
964 int i; 978 int i;
965 979
@@ -984,6 +998,7 @@ try_onemore:
984 sbi->active_logs = NR_CURSEG_TYPE; 998 sbi->active_logs = NR_CURSEG_TYPE;
985 999
986 set_opt(sbi, BG_GC); 1000 set_opt(sbi, BG_GC);
1001 set_opt(sbi, INLINE_DATA);
987 1002
988#ifdef CONFIG_F2FS_FS_XATTR 1003#ifdef CONFIG_F2FS_FS_XATTR
989 set_opt(sbi, XATTR_USER); 1004 set_opt(sbi, XATTR_USER);
@@ -1020,7 +1035,6 @@ try_onemore:
1020 sbi->raw_super = raw_super; 1035 sbi->raw_super = raw_super;
1021 sbi->raw_super_buf = raw_super_buf; 1036 sbi->raw_super_buf = raw_super_buf;
1022 mutex_init(&sbi->gc_mutex); 1037 mutex_init(&sbi->gc_mutex);
1023 mutex_init(&sbi->writepages);
1024 mutex_init(&sbi->cp_mutex); 1038 mutex_init(&sbi->cp_mutex);
1025 init_rwsem(&sbi->node_write); 1039 init_rwsem(&sbi->node_write);
1026 clear_sbi_flag(sbi, SBI_POR_DOING); 1040 clear_sbi_flag(sbi, SBI_POR_DOING);
@@ -1072,6 +1086,8 @@ try_onemore:
1072 INIT_LIST_HEAD(&sbi->dir_inode_list); 1086 INIT_LIST_HEAD(&sbi->dir_inode_list);
1073 spin_lock_init(&sbi->dir_inode_lock); 1087 spin_lock_init(&sbi->dir_inode_lock);
1074 1088
1089 init_extent_cache_info(sbi);
1090
1075 init_ino_entry_info(sbi); 1091 init_ino_entry_info(sbi);
1076 1092
1077 /* setup f2fs internal modules */ 1093 /* setup f2fs internal modules */
@@ -1146,9 +1162,6 @@ try_onemore:
1146 if (err) 1162 if (err)
1147 goto free_proc; 1163 goto free_proc;
1148 1164
1149 if (!retry)
1150 set_sbi_flag(sbi, SBI_NEED_FSCK);
1151
1152 /* recover fsynced data */ 1165 /* recover fsynced data */
1153 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { 1166 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
1154 /* 1167 /*
@@ -1160,8 +1173,13 @@ try_onemore:
1160 err = -EROFS; 1173 err = -EROFS;
1161 goto free_kobj; 1174 goto free_kobj;
1162 } 1175 }
1176
1177 if (need_fsck)
1178 set_sbi_flag(sbi, SBI_NEED_FSCK);
1179
1163 err = recover_fsync_data(sbi); 1180 err = recover_fsync_data(sbi);
1164 if (err) { 1181 if (err) {
1182 need_fsck = true;
1165 f2fs_msg(sb, KERN_ERR, 1183 f2fs_msg(sb, KERN_ERR,
1166 "Cannot recover all fsync data errno=%ld", err); 1184 "Cannot recover all fsync data errno=%ld", err);
1167 goto free_kobj; 1185 goto free_kobj;
@@ -1212,7 +1230,7 @@ free_sbi:
1212 1230
1213 /* give only one another chance */ 1231 /* give only one another chance */
1214 if (retry) { 1232 if (retry) {
1215 retry = 0; 1233 retry = false;
1216 shrink_dcache_sb(sb); 1234 shrink_dcache_sb(sb);
1217 goto try_onemore; 1235 goto try_onemore;
1218 } 1236 }
@@ -1278,10 +1296,13 @@ static int __init init_f2fs_fs(void)
1278 err = create_checkpoint_caches(); 1296 err = create_checkpoint_caches();
1279 if (err) 1297 if (err)
1280 goto free_segment_manager_caches; 1298 goto free_segment_manager_caches;
1299 err = create_extent_cache();
1300 if (err)
1301 goto free_checkpoint_caches;
1281 f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); 1302 f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj);
1282 if (!f2fs_kset) { 1303 if (!f2fs_kset) {
1283 err = -ENOMEM; 1304 err = -ENOMEM;
1284 goto free_checkpoint_caches; 1305 goto free_extent_cache;
1285 } 1306 }
1286 err = register_filesystem(&f2fs_fs_type); 1307 err = register_filesystem(&f2fs_fs_type);
1287 if (err) 1308 if (err)
@@ -1292,6 +1313,8 @@ static int __init init_f2fs_fs(void)
1292 1313
1293free_kset: 1314free_kset:
1294 kset_unregister(f2fs_kset); 1315 kset_unregister(f2fs_kset);
1316free_extent_cache:
1317 destroy_extent_cache();
1295free_checkpoint_caches: 1318free_checkpoint_caches:
1296 destroy_checkpoint_caches(); 1319 destroy_checkpoint_caches();
1297free_segment_manager_caches: 1320free_segment_manager_caches:
@@ -1309,6 +1332,7 @@ static void __exit exit_f2fs_fs(void)
1309 remove_proc_entry("fs/f2fs", NULL); 1332 remove_proc_entry("fs/f2fs", NULL);
1310 f2fs_destroy_root_stats(); 1333 f2fs_destroy_root_stats();
1311 unregister_filesystem(&f2fs_fs_type); 1334 unregister_filesystem(&f2fs_fs_type);
1335 destroy_extent_cache();
1312 destroy_checkpoint_caches(); 1336 destroy_checkpoint_caches();
1313 destroy_segment_manager_caches(); 1337 destroy_segment_manager_caches();
1314 destroy_node_manager_caches(); 1338 destroy_node_manager_caches();
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 5072bf9ae0ef..b0fd2f2d0716 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -135,7 +135,8 @@ static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name,
135 if (strcmp(name, "") != 0) 135 if (strcmp(name, "") != 0)
136 return -EINVAL; 136 return -EINVAL;
137 137
138 *((char *)buffer) = F2FS_I(inode)->i_advise; 138 if (buffer)
139 *((char *)buffer) = F2FS_I(inode)->i_advise;
139 return sizeof(char); 140 return sizeof(char);
140} 141}
141 142
@@ -152,6 +153,7 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name,
152 return -EINVAL; 153 return -EINVAL;
153 154
154 F2FS_I(inode)->i_advise |= *(char *)value; 155 F2FS_I(inode)->i_advise |= *(char *)value;
156 mark_inode_dirty(inode);
155 return 0; 157 return 0;
156} 158}
157 159