aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-04 23:30:07 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-04 23:30:07 -0400
commitb54ecfb7022d93e8d342ed4a2512d858d0682c0c (patch)
tree8eda913e4079141bca296eec35827dc8ab170332
parentae9b475ebed96afe51d6bcf10dc7aee9c8d89ed7 (diff)
parentb65ee14818e67127aa242fe1dbd3711b9c095cc0 (diff)
Merge tag 'for-f2fs-3.17' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "This series includes patches to: - add nobarrier mount option - support tmpfile and rename2 - enhance the fdatasync behavior - fix the error path - fix the recovery routine - refactor a part of the checkpoint procedure - reduce some lock contentions" * tag 'for-f2fs-3.17' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (40 commits) f2fs: use for_each_set_bit to simplify the code f2fs: add f2fs_balance_fs for expand_inode_data f2fs: invalidate xattr node page when evict inode f2fs: avoid skipping recover_inline_xattr after recover_inline_data f2fs: add tracepoint for f2fs_direct_IO f2fs: reduce competition among node page writes f2fs: fix coding style f2fs: remove redundant lines in allocate_data_block f2fs: add tracepoint for f2fs_issue_flush f2fs: avoid retrying wrong recovery routine when error was occurred f2fs: test before set/clear bits f2fs: fix wrong condition for unlikely f2fs: enable in-place-update for fdatasync f2fs: skip unnecessary data writes during fsync f2fs: add info of appended or updated data writes f2fs: use radix_tree for ino management f2fs: add infra for ino management f2fs: punch the core function for inode management f2fs: add nobarrier mount option f2fs: fix to put root inode in error path of fill_super ...
-rw-r--r--Documentation/filesystems/f2fs.txt5
-rw-r--r--fs/f2fs/acl.c6
-rw-r--r--fs/f2fs/checkpoint.c178
-rw-r--r--fs/f2fs/data.c59
-rw-r--r--fs/f2fs/debug.c19
-rw-r--r--fs/f2fs/dir.c87
-rw-r--r--fs/f2fs/f2fs.h50
-rw-r--r--fs/f2fs/file.c45
-rw-r--r--fs/f2fs/gc.c7
-rw-r--r--fs/f2fs/hash.c4
-rw-r--r--fs/f2fs/inline.c1
-rw-r--r--fs/f2fs/inode.c12
-rw-r--r--fs/f2fs/namei.c246
-rw-r--r--fs/f2fs/node.c273
-rw-r--r--fs/f2fs/node.h7
-rw-r--r--fs/f2fs/recovery.c22
-rw-r--r--fs/f2fs/segment.c38
-rw-r--r--fs/f2fs/segment.h8
-rw-r--r--fs/f2fs/super.c21
-rw-r--r--include/trace/events/f2fs.h87
20 files changed, 883 insertions, 292 deletions
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 51afba17bbae..a2046a7d0a9d 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -126,6 +126,11 @@ flush_merge Merge concurrent cache_flush commands as much as possible
126 to eliminate redundant command issues. If the underlying 126 to eliminate redundant command issues. If the underlying
127 device handles the cache_flush command relatively slowly, 127 device handles the cache_flush command relatively slowly,
128 recommend to enable this option. 128 recommend to enable this option.
129nobarrier This option can be used if underlying storage guarantees
130 its cached data should be written to the novolatile area.
131 If this option is set, no cache_flush commands are issued
132 but f2fs still guarantees the write ordering of all the
133 data writes.
129 134
130================================================================================ 135================================================================================
131DEBUGFS ENTRIES 136DEBUGFS ENTRIES
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index dbe2141d10ad..83b9b5a8d112 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -203,12 +203,6 @@ static int __f2fs_set_acl(struct inode *inode, int type,
203 size_t size = 0; 203 size_t size = 0;
204 int error; 204 int error;
205 205
206 if (acl) {
207 error = posix_acl_valid(acl);
208 if (error < 0)
209 return error;
210 }
211
212 switch (type) { 206 switch (type) {
213 case ACL_TYPE_ACCESS: 207 case ACL_TYPE_ACCESS:
214 name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; 208 name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 0b4710c1d370..6aeed5bada52 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -22,7 +22,7 @@
22#include "segment.h" 22#include "segment.h"
23#include <trace/events/f2fs.h> 23#include <trace/events/f2fs.h>
24 24
25static struct kmem_cache *orphan_entry_slab; 25static struct kmem_cache *ino_entry_slab;
26static struct kmem_cache *inode_entry_slab; 26static struct kmem_cache *inode_entry_slab;
27 27
28/* 28/*
@@ -282,72 +282,120 @@ const struct address_space_operations f2fs_meta_aops = {
282 .set_page_dirty = f2fs_set_meta_page_dirty, 282 .set_page_dirty = f2fs_set_meta_page_dirty,
283}; 283};
284 284
285static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
286{
287 struct ino_entry *e;
288retry:
289 spin_lock(&sbi->ino_lock[type]);
290
291 e = radix_tree_lookup(&sbi->ino_root[type], ino);
292 if (!e) {
293 e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
294 if (!e) {
295 spin_unlock(&sbi->ino_lock[type]);
296 goto retry;
297 }
298 if (radix_tree_insert(&sbi->ino_root[type], ino, e)) {
299 spin_unlock(&sbi->ino_lock[type]);
300 kmem_cache_free(ino_entry_slab, e);
301 goto retry;
302 }
303 memset(e, 0, sizeof(struct ino_entry));
304 e->ino = ino;
305
306 list_add_tail(&e->list, &sbi->ino_list[type]);
307 }
308 spin_unlock(&sbi->ino_lock[type]);
309}
310
311static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
312{
313 struct ino_entry *e;
314
315 spin_lock(&sbi->ino_lock[type]);
316 e = radix_tree_lookup(&sbi->ino_root[type], ino);
317 if (e) {
318 list_del(&e->list);
319 radix_tree_delete(&sbi->ino_root[type], ino);
320 if (type == ORPHAN_INO)
321 sbi->n_orphans--;
322 spin_unlock(&sbi->ino_lock[type]);
323 kmem_cache_free(ino_entry_slab, e);
324 return;
325 }
326 spin_unlock(&sbi->ino_lock[type]);
327}
328
329void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
330{
331 /* add new dirty ino entry into list */
332 __add_ino_entry(sbi, ino, type);
333}
334
335void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
336{
337 /* remove dirty ino entry from list */
338 __remove_ino_entry(sbi, ino, type);
339}
340
341/* mode should be APPEND_INO or UPDATE_INO */
342bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
343{
344 struct ino_entry *e;
345 spin_lock(&sbi->ino_lock[mode]);
346 e = radix_tree_lookup(&sbi->ino_root[mode], ino);
347 spin_unlock(&sbi->ino_lock[mode]);
348 return e ? true : false;
349}
350
351static void release_dirty_inode(struct f2fs_sb_info *sbi)
352{
353 struct ino_entry *e, *tmp;
354 int i;
355
356 for (i = APPEND_INO; i <= UPDATE_INO; i++) {
357 spin_lock(&sbi->ino_lock[i]);
358 list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) {
359 list_del(&e->list);
360 radix_tree_delete(&sbi->ino_root[i], e->ino);
361 kmem_cache_free(ino_entry_slab, e);
362 }
363 spin_unlock(&sbi->ino_lock[i]);
364 }
365}
366
285int acquire_orphan_inode(struct f2fs_sb_info *sbi) 367int acquire_orphan_inode(struct f2fs_sb_info *sbi)
286{ 368{
287 int err = 0; 369 int err = 0;
288 370
289 spin_lock(&sbi->orphan_inode_lock); 371 spin_lock(&sbi->ino_lock[ORPHAN_INO]);
290 if (unlikely(sbi->n_orphans >= sbi->max_orphans)) 372 if (unlikely(sbi->n_orphans >= sbi->max_orphans))
291 err = -ENOSPC; 373 err = -ENOSPC;
292 else 374 else
293 sbi->n_orphans++; 375 sbi->n_orphans++;
294 spin_unlock(&sbi->orphan_inode_lock); 376 spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
295 377
296 return err; 378 return err;
297} 379}
298 380
299void release_orphan_inode(struct f2fs_sb_info *sbi) 381void release_orphan_inode(struct f2fs_sb_info *sbi)
300{ 382{
301 spin_lock(&sbi->orphan_inode_lock); 383 spin_lock(&sbi->ino_lock[ORPHAN_INO]);
302 f2fs_bug_on(sbi->n_orphans == 0); 384 f2fs_bug_on(sbi->n_orphans == 0);
303 sbi->n_orphans--; 385 sbi->n_orphans--;
304 spin_unlock(&sbi->orphan_inode_lock); 386 spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
305} 387}
306 388
307void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 389void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
308{ 390{
309 struct list_head *head; 391 /* add new orphan ino entry into list */
310 struct orphan_inode_entry *new, *orphan; 392 __add_ino_entry(sbi, ino, ORPHAN_INO);
311
312 new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
313 new->ino = ino;
314
315 spin_lock(&sbi->orphan_inode_lock);
316 head = &sbi->orphan_inode_list;
317 list_for_each_entry(orphan, head, list) {
318 if (orphan->ino == ino) {
319 spin_unlock(&sbi->orphan_inode_lock);
320 kmem_cache_free(orphan_entry_slab, new);
321 return;
322 }
323
324 if (orphan->ino > ino)
325 break;
326 }
327
328 /* add new orphan entry into list which is sorted by inode number */
329 list_add_tail(&new->list, &orphan->list);
330 spin_unlock(&sbi->orphan_inode_lock);
331} 393}
332 394
333void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 395void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
334{ 396{
335 struct list_head *head; 397 /* remove orphan entry from orphan list */
336 struct orphan_inode_entry *orphan; 398 __remove_ino_entry(sbi, ino, ORPHAN_INO);
337
338 spin_lock(&sbi->orphan_inode_lock);
339 head = &sbi->orphan_inode_list;
340 list_for_each_entry(orphan, head, list) {
341 if (orphan->ino == ino) {
342 list_del(&orphan->list);
343 f2fs_bug_on(sbi->n_orphans == 0);
344 sbi->n_orphans--;
345 spin_unlock(&sbi->orphan_inode_lock);
346 kmem_cache_free(orphan_entry_slab, orphan);
347 return;
348 }
349 }
350 spin_unlock(&sbi->orphan_inode_lock);
351} 399}
352 400
353static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) 401static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -401,14 +449,14 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
401 unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans + 449 unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
402 (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); 450 (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
403 struct page *page = NULL; 451 struct page *page = NULL;
404 struct orphan_inode_entry *orphan = NULL; 452 struct ino_entry *orphan = NULL;
405 453
406 for (index = 0; index < orphan_blocks; index++) 454 for (index = 0; index < orphan_blocks; index++)
407 grab_meta_page(sbi, start_blk + index); 455 grab_meta_page(sbi, start_blk + index);
408 456
409 index = 1; 457 index = 1;
410 spin_lock(&sbi->orphan_inode_lock); 458 spin_lock(&sbi->ino_lock[ORPHAN_INO]);
411 head = &sbi->orphan_inode_list; 459 head = &sbi->ino_list[ORPHAN_INO];
412 460
413 /* loop for each orphan inode entry and write them in Jornal block */ 461 /* loop for each orphan inode entry and write them in Jornal block */
414 list_for_each_entry(orphan, head, list) { 462 list_for_each_entry(orphan, head, list) {
@@ -448,7 +496,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
448 f2fs_put_page(page, 1); 496 f2fs_put_page(page, 1);
449 } 497 }
450 498
451 spin_unlock(&sbi->orphan_inode_lock); 499 spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
452} 500}
453 501
454static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, 502static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
@@ -714,10 +762,10 @@ retry_flush_dents:
714 * until finishing nat/sit flush. 762 * until finishing nat/sit flush.
715 */ 763 */
716retry_flush_nodes: 764retry_flush_nodes:
717 mutex_lock(&sbi->node_write); 765 down_write(&sbi->node_write);
718 766
719 if (get_pages(sbi, F2FS_DIRTY_NODES)) { 767 if (get_pages(sbi, F2FS_DIRTY_NODES)) {
720 mutex_unlock(&sbi->node_write); 768 up_write(&sbi->node_write);
721 sync_node_pages(sbi, 0, &wbc); 769 sync_node_pages(sbi, 0, &wbc);
722 goto retry_flush_nodes; 770 goto retry_flush_nodes;
723 } 771 }
@@ -726,7 +774,7 @@ retry_flush_nodes:
726 774
727static void unblock_operations(struct f2fs_sb_info *sbi) 775static void unblock_operations(struct f2fs_sb_info *sbi)
728{ 776{
729 mutex_unlock(&sbi->node_write); 777 up_write(&sbi->node_write);
730 f2fs_unlock_all(sbi); 778 f2fs_unlock_all(sbi);
731} 779}
732 780
@@ -748,6 +796,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
748static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) 796static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
749{ 797{
750 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); 798 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
799 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
751 nid_t last_nid = 0; 800 nid_t last_nid = 0;
752 block_t start_blk; 801 block_t start_blk;
753 struct page *cp_page; 802 struct page *cp_page;
@@ -761,7 +810,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
761 * This avoids to conduct wrong roll-forward operations and uses 810 * This avoids to conduct wrong roll-forward operations and uses
762 * metapages, so should be called prior to sync_meta_pages below. 811 * metapages, so should be called prior to sync_meta_pages below.
763 */ 812 */
764 discard_next_dnode(sbi); 813 discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));
765 814
766 /* Flush all the NAT/SIT pages */ 815 /* Flush all the NAT/SIT pages */
767 while (get_pages(sbi, F2FS_DIRTY_META)) 816 while (get_pages(sbi, F2FS_DIRTY_META))
@@ -885,8 +934,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
885 /* Here, we only have one bio having CP pack */ 934 /* Here, we only have one bio having CP pack */
886 sync_meta_pages(sbi, META_FLUSH, LONG_MAX); 935 sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
887 936
888 if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) { 937 if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
889 clear_prefree_segments(sbi); 938 clear_prefree_segments(sbi);
939 release_dirty_inode(sbi);
890 F2FS_RESET_SB_DIRT(sbi); 940 F2FS_RESET_SB_DIRT(sbi);
891 } 941 }
892} 942}
@@ -932,31 +982,37 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
932 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); 982 trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
933} 983}
934 984
935void init_orphan_info(struct f2fs_sb_info *sbi) 985void init_ino_entry_info(struct f2fs_sb_info *sbi)
936{ 986{
937 spin_lock_init(&sbi->orphan_inode_lock); 987 int i;
938 INIT_LIST_HEAD(&sbi->orphan_inode_list); 988
939 sbi->n_orphans = 0; 989 for (i = 0; i < MAX_INO_ENTRY; i++) {
990 INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC);
991 spin_lock_init(&sbi->ino_lock[i]);
992 INIT_LIST_HEAD(&sbi->ino_list[i]);
993 }
994
940 /* 995 /*
941 * considering 512 blocks in a segment 8 blocks are needed for cp 996 * considering 512 blocks in a segment 8 blocks are needed for cp
942 * and log segment summaries. Remaining blocks are used to keep 997 * and log segment summaries. Remaining blocks are used to keep
943 * orphan entries with the limitation one reserved segment 998 * orphan entries with the limitation one reserved segment
944 * for cp pack we can have max 1020*504 orphan entries 999 * for cp pack we can have max 1020*504 orphan entries
945 */ 1000 */
1001 sbi->n_orphans = 0;
946 sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE) 1002 sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
947 * F2FS_ORPHANS_PER_BLOCK; 1003 * F2FS_ORPHANS_PER_BLOCK;
948} 1004}
949 1005
950int __init create_checkpoint_caches(void) 1006int __init create_checkpoint_caches(void)
951{ 1007{
952 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", 1008 ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
953 sizeof(struct orphan_inode_entry)); 1009 sizeof(struct ino_entry));
954 if (!orphan_entry_slab) 1010 if (!ino_entry_slab)
955 return -ENOMEM; 1011 return -ENOMEM;
956 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", 1012 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
957 sizeof(struct dir_inode_entry)); 1013 sizeof(struct dir_inode_entry));
958 if (!inode_entry_slab) { 1014 if (!inode_entry_slab) {
959 kmem_cache_destroy(orphan_entry_slab); 1015 kmem_cache_destroy(ino_entry_slab);
960 return -ENOMEM; 1016 return -ENOMEM;
961 } 1017 }
962 return 0; 1018 return 0;
@@ -964,6 +1020,6 @@ int __init create_checkpoint_caches(void)
964 1020
965void destroy_checkpoint_caches(void) 1021void destroy_checkpoint_caches(void)
966{ 1022{
967 kmem_cache_destroy(orphan_entry_slab); 1023 kmem_cache_destroy(ino_entry_slab);
968 kmem_cache_destroy(inode_entry_slab); 1024 kmem_cache_destroy(inode_entry_slab);
969} 1025}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index f8cf619edb5f..03313099c51c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -139,7 +139,10 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
139 /* change META to META_FLUSH in the checkpoint procedure */ 139 /* change META to META_FLUSH in the checkpoint procedure */
140 if (type >= META_FLUSH) { 140 if (type >= META_FLUSH) {
141 io->fio.type = META_FLUSH; 141 io->fio.type = META_FLUSH;
142 io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO; 142 if (test_opt(sbi, NOBARRIER))
143 io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
144 else
145 io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
143 } 146 }
144 __submit_merged_bio(io); 147 __submit_merged_bio(io);
145 up_write(&io->io_rwsem); 148 up_write(&io->io_rwsem);
@@ -626,8 +629,10 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
626 if (check_extent_cache(inode, pgofs, bh_result)) 629 if (check_extent_cache(inode, pgofs, bh_result))
627 goto out; 630 goto out;
628 631
629 if (create) 632 if (create) {
633 f2fs_balance_fs(sbi);
630 f2fs_lock_op(sbi); 634 f2fs_lock_op(sbi);
635 }
631 636
632 /* When reading holes, we need its node page */ 637 /* When reading holes, we need its node page */
633 set_new_dnode(&dn, inode, NULL, NULL, 0); 638 set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -784,9 +789,11 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
784 !is_cold_data(page) && 789 !is_cold_data(page) &&
785 need_inplace_update(inode))) { 790 need_inplace_update(inode))) {
786 rewrite_data_page(page, old_blkaddr, fio); 791 rewrite_data_page(page, old_blkaddr, fio);
792 set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
787 } else { 793 } else {
788 write_data_page(page, &dn, &new_blkaddr, fio); 794 write_data_page(page, &dn, &new_blkaddr, fio);
789 update_extent_cache(new_blkaddr, &dn); 795 update_extent_cache(new_blkaddr, &dn);
796 set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
790 } 797 }
791out_writepage: 798out_writepage:
792 f2fs_put_dnode(&dn); 799 f2fs_put_dnode(&dn);
@@ -914,6 +921,16 @@ skip_write:
914 return 0; 921 return 0;
915} 922}
916 923
924static void f2fs_write_failed(struct address_space *mapping, loff_t to)
925{
926 struct inode *inode = mapping->host;
927
928 if (to > inode->i_size) {
929 truncate_pagecache(inode, inode->i_size);
930 truncate_blocks(inode, inode->i_size);
931 }
932}
933
917static int f2fs_write_begin(struct file *file, struct address_space *mapping, 934static int f2fs_write_begin(struct file *file, struct address_space *mapping,
918 loff_t pos, unsigned len, unsigned flags, 935 loff_t pos, unsigned len, unsigned flags,
919 struct page **pagep, void **fsdata) 936 struct page **pagep, void **fsdata)
@@ -931,11 +948,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
931repeat: 948repeat:
932 err = f2fs_convert_inline_data(inode, pos + len); 949 err = f2fs_convert_inline_data(inode, pos + len);
933 if (err) 950 if (err)
934 return err; 951 goto fail;
935 952
936 page = grab_cache_page_write_begin(mapping, index, flags); 953 page = grab_cache_page_write_begin(mapping, index, flags);
937 if (!page) 954 if (!page) {
938 return -ENOMEM; 955 err = -ENOMEM;
956 goto fail;
957 }
939 958
940 /* to avoid latency during memory pressure */ 959 /* to avoid latency during memory pressure */
941 unlock_page(page); 960 unlock_page(page);
@@ -949,10 +968,9 @@ repeat:
949 set_new_dnode(&dn, inode, NULL, NULL, 0); 968 set_new_dnode(&dn, inode, NULL, NULL, 0);
950 err = f2fs_reserve_block(&dn, index); 969 err = f2fs_reserve_block(&dn, index);
951 f2fs_unlock_op(sbi); 970 f2fs_unlock_op(sbi);
952
953 if (err) { 971 if (err) {
954 f2fs_put_page(page, 0); 972 f2fs_put_page(page, 0);
955 return err; 973 goto fail;
956 } 974 }
957inline_data: 975inline_data:
958 lock_page(page); 976 lock_page(page);
@@ -982,19 +1000,20 @@ inline_data:
982 err = f2fs_read_inline_data(inode, page); 1000 err = f2fs_read_inline_data(inode, page);
983 if (err) { 1001 if (err) {
984 page_cache_release(page); 1002 page_cache_release(page);
985 return err; 1003 goto fail;
986 } 1004 }
987 } else { 1005 } else {
988 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, 1006 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
989 READ_SYNC); 1007 READ_SYNC);
990 if (err) 1008 if (err)
991 return err; 1009 goto fail;
992 } 1010 }
993 1011
994 lock_page(page); 1012 lock_page(page);
995 if (unlikely(!PageUptodate(page))) { 1013 if (unlikely(!PageUptodate(page))) {
996 f2fs_put_page(page, 1); 1014 f2fs_put_page(page, 1);
997 return -EIO; 1015 err = -EIO;
1016 goto fail;
998 } 1017 }
999 if (unlikely(page->mapping != mapping)) { 1018 if (unlikely(page->mapping != mapping)) {
1000 f2fs_put_page(page, 1); 1019 f2fs_put_page(page, 1);
@@ -1005,6 +1024,9 @@ out:
1005 SetPageUptodate(page); 1024 SetPageUptodate(page);
1006 clear_cold_data(page); 1025 clear_cold_data(page);
1007 return 0; 1026 return 0;
1027fail:
1028 f2fs_write_failed(mapping, pos + len);
1029 return err;
1008} 1030}
1009 1031
1010static int f2fs_write_end(struct file *file, 1032static int f2fs_write_end(struct file *file,
@@ -1016,7 +1038,6 @@ static int f2fs_write_end(struct file *file,
1016 1038
1017 trace_f2fs_write_end(inode, pos, len, copied); 1039 trace_f2fs_write_end(inode, pos, len, copied);
1018 1040
1019 SetPageUptodate(page);
1020 set_page_dirty(page); 1041 set_page_dirty(page);
1021 1042
1022 if (pos + copied > i_size_read(inode)) { 1043 if (pos + copied > i_size_read(inode)) {
@@ -1050,7 +1071,10 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
1050 struct iov_iter *iter, loff_t offset) 1071 struct iov_iter *iter, loff_t offset)
1051{ 1072{
1052 struct file *file = iocb->ki_filp; 1073 struct file *file = iocb->ki_filp;
1053 struct inode *inode = file->f_mapping->host; 1074 struct address_space *mapping = file->f_mapping;
1075 struct inode *inode = mapping->host;
1076 size_t count = iov_iter_count(iter);
1077 int err;
1054 1078
1055 /* Let buffer I/O handle the inline data case. */ 1079 /* Let buffer I/O handle the inline data case. */
1056 if (f2fs_has_inline_data(inode)) 1080 if (f2fs_has_inline_data(inode))
@@ -1062,8 +1086,15 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
1062 /* clear fsync mark to recover these blocks */ 1086 /* clear fsync mark to recover these blocks */
1063 fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino); 1087 fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);
1064 1088
1065 return blockdev_direct_IO(rw, iocb, inode, iter, offset, 1089 trace_f2fs_direct_IO_enter(inode, offset, count, rw);
1066 get_data_block); 1090
1091 err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
1092 if (err < 0 && (rw & WRITE))
1093 f2fs_write_failed(mapping, offset + count);
1094
1095 trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
1096
1097 return err;
1067} 1098}
1068 1099
1069static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, 1100static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index b52c12cf5873..a441ba33be11 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -167,7 +167,7 @@ get_cache:
167 si->cache_mem += npages << PAGE_CACHE_SHIFT; 167 si->cache_mem += npages << PAGE_CACHE_SHIFT;
168 npages = META_MAPPING(sbi)->nrpages; 168 npages = META_MAPPING(sbi)->nrpages;
169 si->cache_mem += npages << PAGE_CACHE_SHIFT; 169 si->cache_mem += npages << PAGE_CACHE_SHIFT;
170 si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry); 170 si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry);
171 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); 171 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
172} 172}
173 173
@@ -345,21 +345,14 @@ void __init f2fs_create_root_stats(void)
345 345
346 f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL); 346 f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
347 if (!f2fs_debugfs_root) 347 if (!f2fs_debugfs_root)
348 goto bail; 348 return;
349 349
350 file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, 350 file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
351 NULL, &stat_fops); 351 NULL, &stat_fops);
352 if (!file) 352 if (!file) {
353 goto free_debugfs_dir; 353 debugfs_remove(f2fs_debugfs_root);
354 354 f2fs_debugfs_root = NULL;
355 return; 355 }
356
357free_debugfs_dir:
358 debugfs_remove(f2fs_debugfs_root);
359
360bail:
361 f2fs_debugfs_root = NULL;
362 return;
363} 356}
364 357
365void f2fs_destroy_root_stats(void) 358void f2fs_destroy_root_stats(void)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index a4addd72ebbd..bcf893c3d903 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -77,8 +77,8 @@ static unsigned long dir_block_index(unsigned int level,
77 return bidx; 77 return bidx;
78} 78}
79 79
80static bool early_match_name(const char *name, size_t namelen, 80static bool early_match_name(size_t namelen, f2fs_hash_t namehash,
81 f2fs_hash_t namehash, struct f2fs_dir_entry *de) 81 struct f2fs_dir_entry *de)
82{ 82{
83 if (le16_to_cpu(de->name_len) != namelen) 83 if (le16_to_cpu(de->name_len) != namelen)
84 return false; 84 return false;
@@ -90,7 +90,7 @@ static bool early_match_name(const char *name, size_t namelen,
90} 90}
91 91
92static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, 92static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
93 const char *name, size_t namelen, int *max_slots, 93 struct qstr *name, int *max_slots,
94 f2fs_hash_t namehash, struct page **res_page) 94 f2fs_hash_t namehash, struct page **res_page)
95{ 95{
96 struct f2fs_dir_entry *de; 96 struct f2fs_dir_entry *de;
@@ -109,9 +109,10 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
109 continue; 109 continue;
110 } 110 }
111 de = &dentry_blk->dentry[bit_pos]; 111 de = &dentry_blk->dentry[bit_pos];
112 if (early_match_name(name, namelen, namehash, de)) { 112 if (early_match_name(name->len, namehash, de)) {
113 if (!memcmp(dentry_blk->filename[bit_pos], 113 if (!memcmp(dentry_blk->filename[bit_pos],
114 name, namelen)) { 114 name->name,
115 name->len)) {
115 *res_page = dentry_page; 116 *res_page = dentry_page;
116 goto found; 117 goto found;
117 } 118 }
@@ -120,6 +121,13 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
120 *max_slots = max_len; 121 *max_slots = max_len;
121 max_len = 0; 122 max_len = 0;
122 } 123 }
124
125 /*
126 * For the most part, it should be a bug when name_len is zero.
127 * We stop here for figuring out where the bugs are occurred.
128 */
129 f2fs_bug_on(!de->name_len);
130
123 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); 131 bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
124 } 132 }
125 133
@@ -132,10 +140,10 @@ found:
132} 140}
133 141
134static struct f2fs_dir_entry *find_in_level(struct inode *dir, 142static struct f2fs_dir_entry *find_in_level(struct inode *dir,
135 unsigned int level, const char *name, size_t namelen, 143 unsigned int level, struct qstr *name,
136 f2fs_hash_t namehash, struct page **res_page) 144 f2fs_hash_t namehash, struct page **res_page)
137{ 145{
138 int s = GET_DENTRY_SLOTS(namelen); 146 int s = GET_DENTRY_SLOTS(name->len);
139 unsigned int nbucket, nblock; 147 unsigned int nbucket, nblock;
140 unsigned int bidx, end_block; 148 unsigned int bidx, end_block;
141 struct page *dentry_page; 149 struct page *dentry_page;
@@ -160,8 +168,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
160 continue; 168 continue;
161 } 169 }
162 170
163 de = find_in_block(dentry_page, name, namelen, 171 de = find_in_block(dentry_page, name, &max_slots,
164 &max_slots, namehash, res_page); 172 namehash, res_page);
165 if (de) 173 if (de)
166 break; 174 break;
167 175
@@ -187,8 +195,6 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
187struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, 195struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
188 struct qstr *child, struct page **res_page) 196 struct qstr *child, struct page **res_page)
189{ 197{
190 const char *name = child->name;
191 size_t namelen = child->len;
192 unsigned long npages = dir_blocks(dir); 198 unsigned long npages = dir_blocks(dir);
193 struct f2fs_dir_entry *de = NULL; 199 struct f2fs_dir_entry *de = NULL;
194 f2fs_hash_t name_hash; 200 f2fs_hash_t name_hash;
@@ -200,12 +206,11 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
200 206
201 *res_page = NULL; 207 *res_page = NULL;
202 208
203 name_hash = f2fs_dentry_hash(name, namelen); 209 name_hash = f2fs_dentry_hash(child);
204 max_depth = F2FS_I(dir)->i_current_depth; 210 max_depth = F2FS_I(dir)->i_current_depth;
205 211
206 for (level = 0; level < max_depth; level++) { 212 for (level = 0; level < max_depth; level++) {
207 de = find_in_level(dir, level, name, 213 de = find_in_level(dir, level, child, name_hash, res_page);
208 namelen, name_hash, res_page);
209 if (de) 214 if (de)
210 break; 215 break;
211 } 216 }
@@ -298,14 +303,13 @@ static int make_empty_dir(struct inode *inode,
298 struct page *dentry_page; 303 struct page *dentry_page;
299 struct f2fs_dentry_block *dentry_blk; 304 struct f2fs_dentry_block *dentry_blk;
300 struct f2fs_dir_entry *de; 305 struct f2fs_dir_entry *de;
301 void *kaddr;
302 306
303 dentry_page = get_new_data_page(inode, page, 0, true); 307 dentry_page = get_new_data_page(inode, page, 0, true);
304 if (IS_ERR(dentry_page)) 308 if (IS_ERR(dentry_page))
305 return PTR_ERR(dentry_page); 309 return PTR_ERR(dentry_page);
306 310
307 kaddr = kmap_atomic(dentry_page); 311
308 dentry_blk = (struct f2fs_dentry_block *)kaddr; 312 dentry_blk = kmap_atomic(dentry_page);
309 313
310 de = &dentry_blk->dentry[0]; 314 de = &dentry_blk->dentry[0];
311 de->name_len = cpu_to_le16(1); 315 de->name_len = cpu_to_le16(1);
@@ -323,7 +327,7 @@ static int make_empty_dir(struct inode *inode,
323 327
324 test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); 328 test_and_set_bit_le(0, &dentry_blk->dentry_bitmap);
325 test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); 329 test_and_set_bit_le(1, &dentry_blk->dentry_bitmap);
326 kunmap_atomic(kaddr); 330 kunmap_atomic(dentry_blk);
327 331
328 set_page_dirty(dentry_page); 332 set_page_dirty(dentry_page);
329 f2fs_put_page(dentry_page, 1); 333 f2fs_put_page(dentry_page, 1);
@@ -333,11 +337,12 @@ static int make_empty_dir(struct inode *inode,
333static struct page *init_inode_metadata(struct inode *inode, 337static struct page *init_inode_metadata(struct inode *inode,
334 struct inode *dir, const struct qstr *name) 338 struct inode *dir, const struct qstr *name)
335{ 339{
340 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
336 struct page *page; 341 struct page *page;
337 int err; 342 int err;
338 343
339 if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { 344 if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
340 page = new_inode_page(inode, name); 345 page = new_inode_page(inode);
341 if (IS_ERR(page)) 346 if (IS_ERR(page))
342 return page; 347 return page;
343 348
@@ -362,7 +367,8 @@ static struct page *init_inode_metadata(struct inode *inode,
362 set_cold_node(inode, page); 367 set_cold_node(inode, page);
363 } 368 }
364 369
365 init_dent_inode(name, page); 370 if (name)
371 init_dent_inode(name, page);
366 372
367 /* 373 /*
368 * This file should be checkpointed during fsync. 374 * This file should be checkpointed during fsync.
@@ -370,6 +376,12 @@ static struct page *init_inode_metadata(struct inode *inode,
370 */ 376 */
371 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { 377 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
372 file_lost_pino(inode); 378 file_lost_pino(inode);
379 /*
380 * If link the tmpfile to alias through linkat path,
381 * we should remove this inode from orphan list.
382 */
383 if (inode->i_nlink == 0)
384 remove_orphan_inode(sbi, inode->i_ino);
373 inc_nlink(inode); 385 inc_nlink(inode);
374 } 386 }
375 return page; 387 return page;
@@ -453,7 +465,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
453 int err = 0; 465 int err = 0;
454 int i; 466 int i;
455 467
456 dentry_hash = f2fs_dentry_hash(name->name, name->len); 468 dentry_hash = f2fs_dentry_hash(name);
457 level = 0; 469 level = 0;
458 current_depth = F2FS_I(dir)->i_current_depth; 470 current_depth = F2FS_I(dir)->i_current_depth;
459 if (F2FS_I(dir)->chash == dentry_hash) { 471 if (F2FS_I(dir)->chash == dentry_hash) {
@@ -529,6 +541,27 @@ fail:
529 return err; 541 return err;
530} 542}
531 543
544int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
545{
546 struct page *page;
547 int err = 0;
548
549 down_write(&F2FS_I(inode)->i_sem);
550 page = init_inode_metadata(inode, dir, NULL);
551 if (IS_ERR(page)) {
552 err = PTR_ERR(page);
553 goto fail;
554 }
555 /* we don't need to mark_inode_dirty now */
556 update_inode(inode, page);
557 f2fs_put_page(page, 1);
558
559 clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
560fail:
561 up_write(&F2FS_I(inode)->i_sem);
562 return err;
563}
564
532/* 565/*
533 * It only removes the dentry from the dentry page,corresponding name 566 * It only removes the dentry from the dentry page,corresponding name
534 * entry in name page does not need to be touched during deletion. 567 * entry in name page does not need to be touched during deletion.
@@ -541,14 +574,13 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
541 struct address_space *mapping = page->mapping; 574 struct address_space *mapping = page->mapping;
542 struct inode *dir = mapping->host; 575 struct inode *dir = mapping->host;
543 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); 576 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
544 void *kaddr = page_address(page);
545 int i; 577 int i;
546 578
547 lock_page(page); 579 lock_page(page);
548 f2fs_wait_on_page_writeback(page, DATA); 580 f2fs_wait_on_page_writeback(page, DATA);
549 581
550 dentry_blk = (struct f2fs_dentry_block *)kaddr; 582 dentry_blk = page_address(page);
551 bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry; 583 bit_pos = dentry - dentry_blk->dentry;
552 for (i = 0; i < slots; i++) 584 for (i = 0; i < slots; i++)
553 test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); 585 test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
554 586
@@ -603,7 +635,6 @@ bool f2fs_empty_dir(struct inode *dir)
603 unsigned long nblock = dir_blocks(dir); 635 unsigned long nblock = dir_blocks(dir);
604 636
605 for (bidx = 0; bidx < nblock; bidx++) { 637 for (bidx = 0; bidx < nblock; bidx++) {
606 void *kaddr;
607 dentry_page = get_lock_data_page(dir, bidx); 638 dentry_page = get_lock_data_page(dir, bidx);
608 if (IS_ERR(dentry_page)) { 639 if (IS_ERR(dentry_page)) {
609 if (PTR_ERR(dentry_page) == -ENOENT) 640 if (PTR_ERR(dentry_page) == -ENOENT)
@@ -612,8 +643,8 @@ bool f2fs_empty_dir(struct inode *dir)
612 return false; 643 return false;
613 } 644 }
614 645
615 kaddr = kmap_atomic(dentry_page); 646
616 dentry_blk = (struct f2fs_dentry_block *)kaddr; 647 dentry_blk = kmap_atomic(dentry_page);
617 if (bidx == 0) 648 if (bidx == 0)
618 bit_pos = 2; 649 bit_pos = 2;
619 else 650 else
@@ -621,7 +652,7 @@ bool f2fs_empty_dir(struct inode *dir)
621 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, 652 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
622 NR_DENTRY_IN_BLOCK, 653 NR_DENTRY_IN_BLOCK,
623 bit_pos); 654 bit_pos);
624 kunmap_atomic(kaddr); 655 kunmap_atomic(dentry_blk);
625 656
626 f2fs_put_page(dentry_page, 1); 657 f2fs_put_page(dentry_page, 1);
627 658
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 58df97e174d0..4dab5338a97a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -41,6 +41,7 @@
41#define F2FS_MOUNT_INLINE_XATTR 0x00000080 41#define F2FS_MOUNT_INLINE_XATTR 0x00000080
42#define F2FS_MOUNT_INLINE_DATA 0x00000100 42#define F2FS_MOUNT_INLINE_DATA 0x00000100
43#define F2FS_MOUNT_FLUSH_MERGE 0x00000200 43#define F2FS_MOUNT_FLUSH_MERGE 0x00000200
44#define F2FS_MOUNT_NOBARRIER 0x00000400
44 45
45#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) 46#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
46#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) 47#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -99,8 +100,15 @@ enum {
99 META_SSA 100 META_SSA
100}; 101};
101 102
102/* for the list of orphan inodes */ 103/* for the list of ino */
103struct orphan_inode_entry { 104enum {
105 ORPHAN_INO, /* for orphan ino list */
106 APPEND_INO, /* for append ino list */
107 UPDATE_INO, /* for update ino list */
108 MAX_INO_ENTRY, /* max. list */
109};
110
111struct ino_entry {
104 struct list_head list; /* list head */ 112 struct list_head list; /* list head */
105 nid_t ino; /* inode number */ 113 nid_t ino; /* inode number */
106}; 114};
@@ -256,6 +264,8 @@ struct f2fs_nm_info {
256 unsigned int nat_cnt; /* the # of cached nat entries */ 264 unsigned int nat_cnt; /* the # of cached nat entries */
257 struct list_head nat_entries; /* cached nat entry list (clean) */ 265 struct list_head nat_entries; /* cached nat entry list (clean) */
258 struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ 266 struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
267 struct list_head nat_entry_set; /* nat entry set list */
268 unsigned int dirty_nat_cnt; /* total num of nat entries in set */
259 269
260 /* free node ids management */ 270 /* free node ids management */
261 struct radix_tree_root free_nid_root;/* root of the free_nid cache */ 271 struct radix_tree_root free_nid_root;/* root of the free_nid cache */
@@ -442,14 +452,17 @@ struct f2fs_sb_info {
442 struct inode *meta_inode; /* cache meta blocks */ 452 struct inode *meta_inode; /* cache meta blocks */
443 struct mutex cp_mutex; /* checkpoint procedure lock */ 453 struct mutex cp_mutex; /* checkpoint procedure lock */
444 struct rw_semaphore cp_rwsem; /* blocking FS operations */ 454 struct rw_semaphore cp_rwsem; /* blocking FS operations */
445 struct mutex node_write; /* locking node writes */ 455 struct rw_semaphore node_write; /* locking node writes */
446 struct mutex writepages; /* mutex for writepages() */ 456 struct mutex writepages; /* mutex for writepages() */
447 bool por_doing; /* recovery is doing or not */ 457 bool por_doing; /* recovery is doing or not */
448 wait_queue_head_t cp_wait; 458 wait_queue_head_t cp_wait;
449 459
450 /* for orphan inode management */ 460 /* for inode management */
451 struct list_head orphan_inode_list; /* orphan inode list */ 461 struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */
452 spinlock_t orphan_inode_lock; /* for orphan inode list */ 462 spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */
463 struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */
464
465 /* for orphan inode, use 0'th array */
453 unsigned int n_orphans; /* # of orphan inodes */ 466 unsigned int n_orphans; /* # of orphan inodes */
454 unsigned int max_orphans; /* max orphan inodes */ 467 unsigned int max_orphans; /* max orphan inodes */
455 468
@@ -768,7 +781,7 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
768 if (flag == NAT_BITMAP) 781 if (flag == NAT_BITMAP)
769 return &ckpt->sit_nat_version_bitmap; 782 return &ckpt->sit_nat_version_bitmap;
770 else 783 else
771 return ((unsigned char *)ckpt + F2FS_BLKSIZE); 784 return (unsigned char *)ckpt + F2FS_BLKSIZE;
772 } else { 785 } else {
773 offset = (flag == NAT_BITMAP) ? 786 offset = (flag == NAT_BITMAP) ?
774 le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; 787 le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
@@ -983,11 +996,15 @@ enum {
983 FI_NO_EXTENT, /* not to use the extent cache */ 996 FI_NO_EXTENT, /* not to use the extent cache */
984 FI_INLINE_XATTR, /* used for inline xattr */ 997 FI_INLINE_XATTR, /* used for inline xattr */
985 FI_INLINE_DATA, /* used for inline data*/ 998 FI_INLINE_DATA, /* used for inline data*/
999 FI_APPEND_WRITE, /* inode has appended data */
1000 FI_UPDATE_WRITE, /* inode has in-place-update data */
1001 FI_NEED_IPU, /* used fo ipu for fdatasync */
986}; 1002};
987 1003
988static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) 1004static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
989{ 1005{
990 set_bit(flag, &fi->flags); 1006 if (!test_bit(flag, &fi->flags))
1007 set_bit(flag, &fi->flags);
991} 1008}
992 1009
993static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag) 1010static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)
@@ -997,7 +1014,8 @@ static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)
997 1014
998static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag) 1015static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag)
999{ 1016{
1000 clear_bit(flag, &fi->flags); 1017 if (test_bit(flag, &fi->flags))
1018 clear_bit(flag, &fi->flags);
1001} 1019}
1002 1020
1003static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) 1021static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode)
@@ -1136,6 +1154,7 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
1136int update_dent_inode(struct inode *, const struct qstr *); 1154int update_dent_inode(struct inode *, const struct qstr *);
1137int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); 1155int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *);
1138void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); 1156void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *);
1157int f2fs_do_tmpfile(struct inode *, struct inode *);
1139int f2fs_make_empty(struct inode *, struct inode *); 1158int f2fs_make_empty(struct inode *, struct inode *);
1140bool f2fs_empty_dir(struct inode *); 1159bool f2fs_empty_dir(struct inode *);
1141 1160
@@ -1155,7 +1174,7 @@ void f2fs_msg(struct super_block *, const char *, const char *, ...);
1155/* 1174/*
1156 * hash.c 1175 * hash.c
1157 */ 1176 */
1158f2fs_hash_t f2fs_dentry_hash(const char *, size_t); 1177f2fs_hash_t f2fs_dentry_hash(const struct qstr *);
1159 1178
1160/* 1179/*
1161 * node.c 1180 * node.c
@@ -1173,7 +1192,7 @@ int truncate_inode_blocks(struct inode *, pgoff_t);
1173int truncate_xattr_node(struct inode *, struct page *); 1192int truncate_xattr_node(struct inode *, struct page *);
1174int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); 1193int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
1175void remove_inode_page(struct inode *); 1194void remove_inode_page(struct inode *);
1176struct page *new_inode_page(struct inode *, const struct qstr *); 1195struct page *new_inode_page(struct inode *);
1177struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); 1196struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
1178void ra_node_page(struct f2fs_sb_info *, nid_t); 1197void ra_node_page(struct f2fs_sb_info *, nid_t);
1179struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); 1198struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
@@ -1185,6 +1204,7 @@ void alloc_nid_done(struct f2fs_sb_info *, nid_t);
1185void alloc_nid_failed(struct f2fs_sb_info *, nid_t); 1204void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
1186void recover_node_page(struct f2fs_sb_info *, struct page *, 1205void recover_node_page(struct f2fs_sb_info *, struct page *,
1187 struct f2fs_summary *, struct node_info *, block_t); 1206 struct f2fs_summary *, struct node_info *, block_t);
1207void recover_inline_xattr(struct inode *, struct page *);
1188bool recover_xattr_data(struct inode *, struct page *, block_t); 1208bool recover_xattr_data(struct inode *, struct page *, block_t);
1189int recover_inode_page(struct f2fs_sb_info *, struct page *); 1209int recover_inode_page(struct f2fs_sb_info *, struct page *);
1190int restore_node_summary(struct f2fs_sb_info *, unsigned int, 1210int restore_node_summary(struct f2fs_sb_info *, unsigned int,
@@ -1206,7 +1226,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *);
1206void invalidate_blocks(struct f2fs_sb_info *, block_t); 1226void invalidate_blocks(struct f2fs_sb_info *, block_t);
1207void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); 1227void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
1208void clear_prefree_segments(struct f2fs_sb_info *); 1228void clear_prefree_segments(struct f2fs_sb_info *);
1209void discard_next_dnode(struct f2fs_sb_info *); 1229void discard_next_dnode(struct f2fs_sb_info *, block_t);
1210int npages_for_summary_flush(struct f2fs_sb_info *); 1230int npages_for_summary_flush(struct f2fs_sb_info *);
1211void allocate_new_segments(struct f2fs_sb_info *); 1231void allocate_new_segments(struct f2fs_sb_info *);
1212struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); 1232struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
@@ -1240,6 +1260,9 @@ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
1240struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); 1260struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
1241int ra_meta_pages(struct f2fs_sb_info *, int, int, int); 1261int ra_meta_pages(struct f2fs_sb_info *, int, int, int);
1242long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); 1262long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
1263void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
1264void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
1265bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
1243int acquire_orphan_inode(struct f2fs_sb_info *); 1266int acquire_orphan_inode(struct f2fs_sb_info *);
1244void release_orphan_inode(struct f2fs_sb_info *); 1267void release_orphan_inode(struct f2fs_sb_info *);
1245void add_orphan_inode(struct f2fs_sb_info *, nid_t); 1268void add_orphan_inode(struct f2fs_sb_info *, nid_t);
@@ -1251,7 +1274,7 @@ void add_dirty_dir_inode(struct inode *);
1251void remove_dirty_dir_inode(struct inode *); 1274void remove_dirty_dir_inode(struct inode *);
1252void sync_dirty_dir_inodes(struct f2fs_sb_info *); 1275void sync_dirty_dir_inodes(struct f2fs_sb_info *);
1253void write_checkpoint(struct f2fs_sb_info *, bool); 1276void write_checkpoint(struct f2fs_sb_info *, bool);
1254void init_orphan_info(struct f2fs_sb_info *); 1277void init_ino_entry_info(struct f2fs_sb_info *);
1255int __init create_checkpoint_caches(void); 1278int __init create_checkpoint_caches(void);
1256void destroy_checkpoint_caches(void); 1279void destroy_checkpoint_caches(void);
1257 1280
@@ -1295,7 +1318,6 @@ bool space_for_roll_forward(struct f2fs_sb_info *);
1295struct f2fs_stat_info { 1318struct f2fs_stat_info {
1296 struct list_head stat_list; 1319 struct list_head stat_list;
1297 struct f2fs_sb_info *sbi; 1320 struct f2fs_sb_info *sbi;
1298 struct mutex stat_lock;
1299 int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; 1321 int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
1300 int main_area_segs, main_area_sections, main_area_zones; 1322 int main_area_segs, main_area_sections, main_area_zones;
1301 int hit_ext, total_ext; 1323 int hit_ext, total_ext;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 7d8b96275092..208f1a9bd569 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -127,12 +127,30 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
127 return 0; 127 return 0;
128 128
129 trace_f2fs_sync_file_enter(inode); 129 trace_f2fs_sync_file_enter(inode);
130
131 /* if fdatasync is triggered, let's do in-place-update */
132 if (datasync)
133 set_inode_flag(fi, FI_NEED_IPU);
134
130 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 135 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
136 if (datasync)
137 clear_inode_flag(fi, FI_NEED_IPU);
131 if (ret) { 138 if (ret) {
132 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); 139 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
133 return ret; 140 return ret;
134 } 141 }
135 142
143 /*
144 * if there is no written data, don't waste time to write recovery info.
145 */
146 if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
147 !exist_written_data(sbi, inode->i_ino, APPEND_INO)) {
148 if (is_inode_flag_set(fi, FI_UPDATE_WRITE) ||
149 exist_written_data(sbi, inode->i_ino, UPDATE_INO))
150 goto flush_out;
151 goto out;
152 }
153
136 /* guarantee free sections for fsync */ 154 /* guarantee free sections for fsync */
137 f2fs_balance_fs(sbi); 155 f2fs_balance_fs(sbi);
138 156
@@ -188,6 +206,13 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
188 ret = wait_on_node_pages_writeback(sbi, inode->i_ino); 206 ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
189 if (ret) 207 if (ret)
190 goto out; 208 goto out;
209
210 /* once recovery info is written, don't need to tack this */
211 remove_dirty_inode(sbi, inode->i_ino, APPEND_INO);
212 clear_inode_flag(fi, FI_APPEND_WRITE);
213flush_out:
214 remove_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
215 clear_inode_flag(fi, FI_UPDATE_WRITE);
191 ret = f2fs_issue_flush(F2FS_SB(inode->i_sb)); 216 ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));
192 } 217 }
193out: 218out:
@@ -206,8 +231,9 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping,
206 231
207 /* find first dirty page index */ 232 /* find first dirty page index */
208 pagevec_init(&pvec, 0); 233 pagevec_init(&pvec, 0);
209 nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1); 234 nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs,
210 pgofs = nr_pages ? pvec.pages[0]->index: LONG_MAX; 235 PAGECACHE_TAG_DIRTY, 1);
236 pgofs = nr_pages ? pvec.pages[0]->index : LONG_MAX;
211 pagevec_release(&pvec); 237 pagevec_release(&pvec);
212 return pgofs; 238 return pgofs;
213} 239}
@@ -272,8 +298,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
272 } 298 }
273 } 299 }
274 300
275 end_offset = IS_INODE(dn.node_page) ? 301 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
276 ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
277 302
278 /* find data/hole in dnode block */ 303 /* find data/hole in dnode block */
279 for (; dn.ofs_in_node < end_offset; 304 for (; dn.ofs_in_node < end_offset;
@@ -380,13 +405,15 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
380 return; 405 return;
381 406
382 lock_page(page); 407 lock_page(page);
383 if (unlikely(page->mapping != inode->i_mapping)) { 408 if (unlikely(!PageUptodate(page) ||
384 f2fs_put_page(page, 1); 409 page->mapping != inode->i_mapping))
385 return; 410 goto out;
386 } 411
387 f2fs_wait_on_page_writeback(page, DATA); 412 f2fs_wait_on_page_writeback(page, DATA);
388 zero_user(page, offset, PAGE_CACHE_SIZE - offset); 413 zero_user(page, offset, PAGE_CACHE_SIZE - offset);
389 set_page_dirty(page); 414 set_page_dirty(page);
415
416out:
390 f2fs_put_page(page, 1); 417 f2fs_put_page(page, 1);
391} 418}
392 419
@@ -645,6 +672,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
645 loff_t off_start, off_end; 672 loff_t off_start, off_end;
646 int ret = 0; 673 int ret = 0;
647 674
675 f2fs_balance_fs(sbi);
676
648 ret = inode_newsize_ok(inode, (len + offset)); 677 ret = inode_newsize_ok(inode, (len + offset));
649 if (ret) 678 if (ret)
650 return ret; 679 return ret;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index b90dbe55403a..d7947d90ccc3 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -186,7 +186,6 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
186static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) 186static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
187{ 187{
188 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 188 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
189 unsigned int hint = 0;
190 unsigned int secno; 189 unsigned int secno;
191 190
192 /* 191 /*
@@ -194,11 +193,9 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
194 * selected by background GC before. 193 * selected by background GC before.
195 * Those segments guarantee they have small valid blocks. 194 * Those segments guarantee they have small valid blocks.
196 */ 195 */
197next: 196 for_each_set_bit(secno, dirty_i->victim_secmap, TOTAL_SECS(sbi)) {
198 secno = find_next_bit(dirty_i->victim_secmap, TOTAL_SECS(sbi), hint++);
199 if (secno < TOTAL_SECS(sbi)) {
200 if (sec_usage_check(sbi, secno)) 197 if (sec_usage_check(sbi, secno))
201 goto next; 198 continue;
202 clear_bit(secno, dirty_i->victim_secmap); 199 clear_bit(secno, dirty_i->victim_secmap);
203 return secno * sbi->segs_per_sec; 200 return secno * sbi->segs_per_sec;
204 } 201 }
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index 6eb8d269b53b..948d17bf7281 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -69,12 +69,14 @@ static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num)
69 *buf++ = pad; 69 *buf++ = pad;
70} 70}
71 71
72f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len) 72f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info)
73{ 73{
74 __u32 hash; 74 __u32 hash;
75 f2fs_hash_t f2fs_hash; 75 f2fs_hash_t f2fs_hash;
76 const char *p; 76 const char *p;
77 __u32 in[8], buf[4]; 77 __u32 in[8], buf[4];
78 const char *name = name_info->name;
79 size_t len = name_info->len;
78 80
79 if ((len <= 2) && (name[0] == '.') && 81 if ((len <= 2) && (name[0] == '.') &&
80 (name[1] == '.' || name[1] == '\0')) 82 (name[1] == '.' || name[1] == '\0'))
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 1bba5228c197..5beeccef9ae1 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -172,6 +172,7 @@ int f2fs_write_inline_data(struct inode *inode,
172 stat_inc_inline_inode(inode); 172 stat_inc_inline_inode(inode);
173 } 173 }
174 174
175 set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
175 sync_inode_page(&dn); 176 sync_inode_page(&dn);
176 f2fs_put_dnode(&dn); 177 f2fs_put_dnode(&dn);
177 178
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 2cf6962f6cc8..2c39999f3868 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -267,13 +267,14 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
267void f2fs_evict_inode(struct inode *inode) 267void f2fs_evict_inode(struct inode *inode)
268{ 268{
269 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 269 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
270 nid_t xnid = F2FS_I(inode)->i_xattr_nid;
270 271
271 trace_f2fs_evict_inode(inode); 272 trace_f2fs_evict_inode(inode);
272 truncate_inode_pages_final(&inode->i_data); 273 truncate_inode_pages_final(&inode->i_data);
273 274
274 if (inode->i_ino == F2FS_NODE_INO(sbi) || 275 if (inode->i_ino == F2FS_NODE_INO(sbi) ||
275 inode->i_ino == F2FS_META_INO(sbi)) 276 inode->i_ino == F2FS_META_INO(sbi))
276 goto no_delete; 277 goto out_clear;
277 278
278 f2fs_bug_on(get_dirty_dents(inode)); 279 f2fs_bug_on(get_dirty_dents(inode));
279 remove_dirty_dir_inode(inode); 280 remove_dirty_dir_inode(inode);
@@ -295,6 +296,13 @@ void f2fs_evict_inode(struct inode *inode)
295 296
296 sb_end_intwrite(inode->i_sb); 297 sb_end_intwrite(inode->i_sb);
297no_delete: 298no_delete:
298 clear_inode(inode);
299 invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); 299 invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
300 if (xnid)
301 invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
302 if (is_inode_flag_set(F2FS_I(inode), FI_APPEND_WRITE))
303 add_dirty_inode(sbi, inode->i_ino, APPEND_INO);
304 if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE))
305 add_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
306out_clear:
307 clear_inode(inode);
300} 308}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index a6bdddc33ce2..27b03776ffd2 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -13,6 +13,7 @@
13#include <linux/pagemap.h> 13#include <linux/pagemap.h>
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/ctype.h> 15#include <linux/ctype.h>
16#include <linux/dcache.h>
16 17
17#include "f2fs.h" 18#include "f2fs.h"
18#include "node.h" 19#include "node.h"
@@ -22,14 +23,13 @@
22 23
23static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) 24static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
24{ 25{
25 struct super_block *sb = dir->i_sb; 26 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
26 struct f2fs_sb_info *sbi = F2FS_SB(sb);
27 nid_t ino; 27 nid_t ino;
28 struct inode *inode; 28 struct inode *inode;
29 bool nid_free = false; 29 bool nid_free = false;
30 int err; 30 int err;
31 31
32 inode = new_inode(sb); 32 inode = new_inode(dir->i_sb);
33 if (!inode) 33 if (!inode)
34 return ERR_PTR(-ENOMEM); 34 return ERR_PTR(-ENOMEM);
35 35
@@ -102,8 +102,7 @@ static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode,
102static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 102static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
103 bool excl) 103 bool excl)
104{ 104{
105 struct super_block *sb = dir->i_sb; 105 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
106 struct f2fs_sb_info *sbi = F2FS_SB(sb);
107 struct inode *inode; 106 struct inode *inode;
108 nid_t ino = 0; 107 nid_t ino = 0;
109 int err; 108 int err;
@@ -146,8 +145,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
146 struct dentry *dentry) 145 struct dentry *dentry)
147{ 146{
148 struct inode *inode = old_dentry->d_inode; 147 struct inode *inode = old_dentry->d_inode;
149 struct super_block *sb = dir->i_sb; 148 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
150 struct f2fs_sb_info *sbi = F2FS_SB(sb);
151 int err; 149 int err;
152 150
153 f2fs_balance_fs(sbi); 151 f2fs_balance_fs(sbi);
@@ -207,8 +205,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
207 205
208static int f2fs_unlink(struct inode *dir, struct dentry *dentry) 206static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
209{ 207{
210 struct super_block *sb = dir->i_sb; 208 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
211 struct f2fs_sb_info *sbi = F2FS_SB(sb);
212 struct inode *inode = dentry->d_inode; 209 struct inode *inode = dentry->d_inode;
213 struct f2fs_dir_entry *de; 210 struct f2fs_dir_entry *de;
214 struct page *page; 211 struct page *page;
@@ -242,8 +239,7 @@ fail:
242static int f2fs_symlink(struct inode *dir, struct dentry *dentry, 239static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
243 const char *symname) 240 const char *symname)
244{ 241{
245 struct super_block *sb = dir->i_sb; 242 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
246 struct f2fs_sb_info *sbi = F2FS_SB(sb);
247 struct inode *inode; 243 struct inode *inode;
248 size_t symlen = strlen(symname) + 1; 244 size_t symlen = strlen(symname) + 1;
249 int err; 245 int err;
@@ -330,8 +326,7 @@ static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
330static int f2fs_mknod(struct inode *dir, struct dentry *dentry, 326static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
331 umode_t mode, dev_t rdev) 327 umode_t mode, dev_t rdev)
332{ 328{
333 struct super_block *sb = dir->i_sb; 329 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
334 struct f2fs_sb_info *sbi = F2FS_SB(sb);
335 struct inode *inode; 330 struct inode *inode;
336 int err = 0; 331 int err = 0;
337 332
@@ -369,8 +364,7 @@ out:
369static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, 364static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
370 struct inode *new_dir, struct dentry *new_dentry) 365 struct inode *new_dir, struct dentry *new_dentry)
371{ 366{
372 struct super_block *sb = old_dir->i_sb; 367 struct f2fs_sb_info *sbi = F2FS_SB(old_dir->i_sb);
373 struct f2fs_sb_info *sbi = F2FS_SB(sb);
374 struct inode *old_inode = old_dentry->d_inode; 368 struct inode *old_inode = old_dentry->d_inode;
375 struct inode *new_inode = new_dentry->d_inode; 369 struct inode *new_inode = new_dentry->d_inode;
376 struct page *old_dir_page; 370 struct page *old_dir_page;
@@ -393,8 +387,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
393 goto out_old; 387 goto out_old;
394 } 388 }
395 389
396 f2fs_lock_op(sbi);
397
398 if (new_inode) { 390 if (new_inode) {
399 391
400 err = -ENOTEMPTY; 392 err = -ENOTEMPTY;
@@ -407,6 +399,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
407 if (!new_entry) 399 if (!new_entry)
408 goto out_dir; 400 goto out_dir;
409 401
402 f2fs_lock_op(sbi);
403
410 err = acquire_orphan_inode(sbi); 404 err = acquire_orphan_inode(sbi);
411 if (err) 405 if (err)
412 goto put_out_dir; 406 goto put_out_dir;
@@ -435,9 +429,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
435 update_inode_page(old_inode); 429 update_inode_page(old_inode);
436 update_inode_page(new_inode); 430 update_inode_page(new_inode);
437 } else { 431 } else {
432 f2fs_lock_op(sbi);
433
438 err = f2fs_add_link(new_dentry, old_inode); 434 err = f2fs_add_link(new_dentry, old_inode);
439 if (err) 435 if (err) {
436 f2fs_unlock_op(sbi);
440 goto out_dir; 437 goto out_dir;
438 }
441 439
442 if (old_dir_entry) { 440 if (old_dir_entry) {
443 inc_nlink(new_dir); 441 inc_nlink(new_dir);
@@ -472,6 +470,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
472 return 0; 470 return 0;
473 471
474put_out_dir: 472put_out_dir:
473 f2fs_unlock_op(sbi);
475 kunmap(new_page); 474 kunmap(new_page);
476 f2fs_put_page(new_page, 0); 475 f2fs_put_page(new_page, 0);
477out_dir: 476out_dir:
@@ -479,7 +478,151 @@ out_dir:
479 kunmap(old_dir_page); 478 kunmap(old_dir_page);
480 f2fs_put_page(old_dir_page, 0); 479 f2fs_put_page(old_dir_page, 0);
481 } 480 }
481out_old:
482 kunmap(old_page);
483 f2fs_put_page(old_page, 0);
484out:
485 return err;
486}
487
488static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
489 struct inode *new_dir, struct dentry *new_dentry)
490{
491 struct super_block *sb = old_dir->i_sb;
492 struct f2fs_sb_info *sbi = F2FS_SB(sb);
493 struct inode *old_inode = old_dentry->d_inode;
494 struct inode *new_inode = new_dentry->d_inode;
495 struct page *old_dir_page, *new_dir_page;
496 struct page *old_page, *new_page;
497 struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL;
498 struct f2fs_dir_entry *old_entry, *new_entry;
499 int old_nlink = 0, new_nlink = 0;
500 int err = -ENOENT;
501
502 f2fs_balance_fs(sbi);
503
504 old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
505 if (!old_entry)
506 goto out;
507
508 new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page);
509 if (!new_entry)
510 goto out_old;
511
512 /* prepare for updating ".." directory entry info later */
513 if (old_dir != new_dir) {
514 if (S_ISDIR(old_inode->i_mode)) {
515 err = -EIO;
516 old_dir_entry = f2fs_parent_dir(old_inode,
517 &old_dir_page);
518 if (!old_dir_entry)
519 goto out_new;
520 }
521
522 if (S_ISDIR(new_inode->i_mode)) {
523 err = -EIO;
524 new_dir_entry = f2fs_parent_dir(new_inode,
525 &new_dir_page);
526 if (!new_dir_entry)
527 goto out_old_dir;
528 }
529 }
530
531 /*
532 * If cross rename between file and directory those are not
533 * in the same directory, we will inc nlink of file's parent
534 * later, so we should check upper boundary of its nlink.
535 */
536 if ((!old_dir_entry || !new_dir_entry) &&
537 old_dir_entry != new_dir_entry) {
538 old_nlink = old_dir_entry ? -1 : 1;
539 new_nlink = -old_nlink;
540 err = -EMLINK;
541 if ((old_nlink > 0 && old_inode->i_nlink >= F2FS_LINK_MAX) ||
542 (new_nlink > 0 && new_inode->i_nlink >= F2FS_LINK_MAX))
543 goto out_new_dir;
544 }
545
546 f2fs_lock_op(sbi);
547
548 err = update_dent_inode(old_inode, &new_dentry->d_name);
549 if (err)
550 goto out_unlock;
551
552 err = update_dent_inode(new_inode, &old_dentry->d_name);
553 if (err)
554 goto out_undo;
555
556 /* update ".." directory entry info of old dentry */
557 if (old_dir_entry)
558 f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir);
559
560 /* update ".." directory entry info of new dentry */
561 if (new_dir_entry)
562 f2fs_set_link(new_inode, new_dir_entry, new_dir_page, old_dir);
563
564 /* update directory entry info of old dir inode */
565 f2fs_set_link(old_dir, old_entry, old_page, new_inode);
566
567 down_write(&F2FS_I(old_inode)->i_sem);
568 file_lost_pino(old_inode);
569 up_write(&F2FS_I(old_inode)->i_sem);
570
571 update_inode_page(old_inode);
572
573 old_dir->i_ctime = CURRENT_TIME;
574 if (old_nlink) {
575 down_write(&F2FS_I(old_dir)->i_sem);
576 if (old_nlink < 0)
577 drop_nlink(old_dir);
578 else
579 inc_nlink(old_dir);
580 up_write(&F2FS_I(old_dir)->i_sem);
581 }
582 mark_inode_dirty(old_dir);
583 update_inode_page(old_dir);
584
585 /* update directory entry info of new dir inode */
586 f2fs_set_link(new_dir, new_entry, new_page, old_inode);
587
588 down_write(&F2FS_I(new_inode)->i_sem);
589 file_lost_pino(new_inode);
590 up_write(&F2FS_I(new_inode)->i_sem);
591
592 update_inode_page(new_inode);
593
594 new_dir->i_ctime = CURRENT_TIME;
595 if (new_nlink) {
596 down_write(&F2FS_I(new_dir)->i_sem);
597 if (new_nlink < 0)
598 drop_nlink(new_dir);
599 else
600 inc_nlink(new_dir);
601 up_write(&F2FS_I(new_dir)->i_sem);
602 }
603 mark_inode_dirty(new_dir);
604 update_inode_page(new_dir);
605
606 f2fs_unlock_op(sbi);
607 return 0;
608out_undo:
609 /* Still we may fail to recover name info of f2fs_inode here */
610 update_dent_inode(old_inode, &old_dentry->d_name);
611out_unlock:
482 f2fs_unlock_op(sbi); 612 f2fs_unlock_op(sbi);
613out_new_dir:
614 if (new_dir_entry) {
615 kunmap(new_dir_page);
616 f2fs_put_page(new_dir_page, 0);
617 }
618out_old_dir:
619 if (old_dir_entry) {
620 kunmap(old_dir_page);
621 f2fs_put_page(old_dir_page, 0);
622 }
623out_new:
624 kunmap(new_page);
625 f2fs_put_page(new_page, 0);
483out_old: 626out_old:
484 kunmap(old_page); 627 kunmap(old_page);
485 f2fs_put_page(old_page, 0); 628 f2fs_put_page(old_page, 0);
@@ -487,6 +630,71 @@ out:
487 return err; 630 return err;
488} 631}
489 632
633static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry,
634 struct inode *new_dir, struct dentry *new_dentry,
635 unsigned int flags)
636{
637 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
638 return -EINVAL;
639
640 if (flags & RENAME_EXCHANGE) {
641 return f2fs_cross_rename(old_dir, old_dentry,
642 new_dir, new_dentry);
643 }
644 /*
645 * VFS has already handled the new dentry existence case,
646 * here, we just deal with "RENAME_NOREPLACE" as regular rename.
647 */
648 return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry);
649}
650
651static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
652{
653 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
654 struct inode *inode;
655 int err;
656
657 inode = f2fs_new_inode(dir, mode);
658 if (IS_ERR(inode))
659 return PTR_ERR(inode);
660
661 inode->i_op = &f2fs_file_inode_operations;
662 inode->i_fop = &f2fs_file_operations;
663 inode->i_mapping->a_ops = &f2fs_dblock_aops;
664
665 f2fs_lock_op(sbi);
666 err = acquire_orphan_inode(sbi);
667 if (err)
668 goto out;
669
670 err = f2fs_do_tmpfile(inode, dir);
671 if (err)
672 goto release_out;
673
674 /*
675 * add this non-linked tmpfile to orphan list, in this way we could
676 * remove all unused data of tmpfile after abnormal power-off.
677 */
678 add_orphan_inode(sbi, inode->i_ino);
679 f2fs_unlock_op(sbi);
680
681 alloc_nid_done(sbi, inode->i_ino);
682 d_tmpfile(dentry, inode);
683 unlock_new_inode(inode);
684 return 0;
685
686release_out:
687 release_orphan_inode(sbi);
688out:
689 f2fs_unlock_op(sbi);
690 clear_nlink(inode);
691 unlock_new_inode(inode);
692 make_bad_inode(inode);
693 iput(inode);
694 alloc_nid_failed(sbi, inode->i_ino);
695 return err;
696}
697
490const struct inode_operations f2fs_dir_inode_operations = { 698const struct inode_operations f2fs_dir_inode_operations = {
491 .create = f2fs_create, 699 .create = f2fs_create,
492 .lookup = f2fs_lookup, 700 .lookup = f2fs_lookup,
@@ -497,6 +705,8 @@ const struct inode_operations f2fs_dir_inode_operations = {
497 .rmdir = f2fs_rmdir, 705 .rmdir = f2fs_rmdir,
498 .mknod = f2fs_mknod, 706 .mknod = f2fs_mknod,
499 .rename = f2fs_rename, 707 .rename = f2fs_rename,
708 .rename2 = f2fs_rename2,
709 .tmpfile = f2fs_tmpfile,
500 .getattr = f2fs_getattr, 710 .getattr = f2fs_getattr,
501 .setattr = f2fs_setattr, 711 .setattr = f2fs_setattr,
502 .get_acl = f2fs_get_acl, 712 .get_acl = f2fs_get_acl,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 4b697ccc9b0c..d3d90d284631 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -25,6 +25,7 @@
25 25
26static struct kmem_cache *nat_entry_slab; 26static struct kmem_cache *nat_entry_slab;
27static struct kmem_cache *free_nid_slab; 27static struct kmem_cache *free_nid_slab;
28static struct kmem_cache *nat_entry_set_slab;
28 29
29bool available_free_memory(struct f2fs_sb_info *sbi, int type) 30bool available_free_memory(struct f2fs_sb_info *sbi, int type)
30{ 31{
@@ -90,12 +91,8 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
90 91
91 /* get current nat block page with lock */ 92 /* get current nat block page with lock */
92 src_page = get_meta_page(sbi, src_off); 93 src_page = get_meta_page(sbi, src_off);
93
94 /* Dirty src_page means that it is already the new target NAT page. */
95 if (PageDirty(src_page))
96 return src_page;
97
98 dst_page = grab_meta_page(sbi, dst_off); 94 dst_page = grab_meta_page(sbi, dst_off);
95 f2fs_bug_on(PageDirty(src_page));
99 96
100 src_addr = page_address(src_page); 97 src_addr = page_address(src_page);
101 dst_addr = page_address(dst_page); 98 dst_addr = page_address(dst_page);
@@ -845,7 +842,7 @@ void remove_inode_page(struct inode *inode)
845 truncate_node(&dn); 842 truncate_node(&dn);
846} 843}
847 844
848struct page *new_inode_page(struct inode *inode, const struct qstr *name) 845struct page *new_inode_page(struct inode *inode)
849{ 846{
850 struct dnode_of_data dn; 847 struct dnode_of_data dn;
851 848
@@ -1234,12 +1231,12 @@ static int f2fs_write_node_page(struct page *page,
1234 if (wbc->for_reclaim) 1231 if (wbc->for_reclaim)
1235 goto redirty_out; 1232 goto redirty_out;
1236 1233
1237 mutex_lock(&sbi->node_write); 1234 down_read(&sbi->node_write);
1238 set_page_writeback(page); 1235 set_page_writeback(page);
1239 write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); 1236 write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
1240 set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page)); 1237 set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
1241 dec_page_count(sbi, F2FS_DIRTY_NODES); 1238 dec_page_count(sbi, F2FS_DIRTY_NODES);
1242 mutex_unlock(&sbi->node_write); 1239 up_read(&sbi->node_write);
1243 unlock_page(page); 1240 unlock_page(page);
1244 return 0; 1241 return 0;
1245 1242
@@ -1552,7 +1549,7 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
1552 clear_node_page_dirty(page); 1549 clear_node_page_dirty(page);
1553} 1550}
1554 1551
1555static void recover_inline_xattr(struct inode *inode, struct page *page) 1552void recover_inline_xattr(struct inode *inode, struct page *page)
1556{ 1553{
1557 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 1554 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1558 void *src_addr, *dst_addr; 1555 void *src_addr, *dst_addr;
@@ -1591,8 +1588,6 @@ bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
1591 nid_t new_xnid = nid_of_node(page); 1588 nid_t new_xnid = nid_of_node(page);
1592 struct node_info ni; 1589 struct node_info ni;
1593 1590
1594 recover_inline_xattr(inode, page);
1595
1596 if (!f2fs_has_xattr_block(ofs_of_node(page))) 1591 if (!f2fs_has_xattr_block(ofs_of_node(page)))
1597 return false; 1592 return false;
1598 1593
@@ -1744,7 +1739,90 @@ skip:
1744 return err; 1739 return err;
1745} 1740}
1746 1741
1747static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) 1742static struct nat_entry_set *grab_nat_entry_set(void)
1743{
1744 struct nat_entry_set *nes =
1745 f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
1746
1747 nes->entry_cnt = 0;
1748 INIT_LIST_HEAD(&nes->set_list);
1749 INIT_LIST_HEAD(&nes->entry_list);
1750 return nes;
1751}
1752
1753static void release_nat_entry_set(struct nat_entry_set *nes,
1754 struct f2fs_nm_info *nm_i)
1755{
1756 f2fs_bug_on(!list_empty(&nes->entry_list));
1757
1758 nm_i->dirty_nat_cnt -= nes->entry_cnt;
1759 list_del(&nes->set_list);
1760 kmem_cache_free(nat_entry_set_slab, nes);
1761}
1762
1763static void adjust_nat_entry_set(struct nat_entry_set *nes,
1764 struct list_head *head)
1765{
1766 struct nat_entry_set *next = nes;
1767
1768 if (list_is_last(&nes->set_list, head))
1769 return;
1770
1771 list_for_each_entry_continue(next, head, set_list)
1772 if (nes->entry_cnt <= next->entry_cnt)
1773 break;
1774
1775 list_move_tail(&nes->set_list, &next->set_list);
1776}
1777
1778static void add_nat_entry(struct nat_entry *ne, struct list_head *head)
1779{
1780 struct nat_entry_set *nes;
1781 nid_t start_nid = START_NID(ne->ni.nid);
1782
1783 list_for_each_entry(nes, head, set_list) {
1784 if (nes->start_nid == start_nid) {
1785 list_move_tail(&ne->list, &nes->entry_list);
1786 nes->entry_cnt++;
1787 adjust_nat_entry_set(nes, head);
1788 return;
1789 }
1790 }
1791
1792 nes = grab_nat_entry_set();
1793
1794 nes->start_nid = start_nid;
1795 list_move_tail(&ne->list, &nes->entry_list);
1796 nes->entry_cnt++;
1797 list_add(&nes->set_list, head);
1798}
1799
1800static void merge_nats_in_set(struct f2fs_sb_info *sbi)
1801{
1802 struct f2fs_nm_info *nm_i = NM_I(sbi);
1803 struct list_head *dirty_list = &nm_i->dirty_nat_entries;
1804 struct list_head *set_list = &nm_i->nat_entry_set;
1805 struct nat_entry *ne, *tmp;
1806
1807 write_lock(&nm_i->nat_tree_lock);
1808 list_for_each_entry_safe(ne, tmp, dirty_list, list) {
1809 if (nat_get_blkaddr(ne) == NEW_ADDR)
1810 continue;
1811 add_nat_entry(ne, set_list);
1812 nm_i->dirty_nat_cnt++;
1813 }
1814 write_unlock(&nm_i->nat_tree_lock);
1815}
1816
1817static bool __has_cursum_space(struct f2fs_summary_block *sum, int size)
1818{
1819 if (nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES)
1820 return true;
1821 else
1822 return false;
1823}
1824
1825static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
1748{ 1826{
1749 struct f2fs_nm_info *nm_i = NM_I(sbi); 1827 struct f2fs_nm_info *nm_i = NM_I(sbi);
1750 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1828 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1752,12 +1830,6 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
1752 int i; 1830 int i;
1753 1831
1754 mutex_lock(&curseg->curseg_mutex); 1832 mutex_lock(&curseg->curseg_mutex);
1755
1756 if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) {
1757 mutex_unlock(&curseg->curseg_mutex);
1758 return false;
1759 }
1760
1761 for (i = 0; i < nats_in_cursum(sum); i++) { 1833 for (i = 0; i < nats_in_cursum(sum); i++) {
1762 struct nat_entry *ne; 1834 struct nat_entry *ne;
1763 struct f2fs_nat_entry raw_ne; 1835 struct f2fs_nat_entry raw_ne;
@@ -1767,23 +1839,21 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
1767retry: 1839retry:
1768 write_lock(&nm_i->nat_tree_lock); 1840 write_lock(&nm_i->nat_tree_lock);
1769 ne = __lookup_nat_cache(nm_i, nid); 1841 ne = __lookup_nat_cache(nm_i, nid);
1770 if (ne) { 1842 if (ne)
1771 __set_nat_cache_dirty(nm_i, ne); 1843 goto found;
1772 write_unlock(&nm_i->nat_tree_lock); 1844
1773 continue;
1774 }
1775 ne = grab_nat_entry(nm_i, nid); 1845 ne = grab_nat_entry(nm_i, nid);
1776 if (!ne) { 1846 if (!ne) {
1777 write_unlock(&nm_i->nat_tree_lock); 1847 write_unlock(&nm_i->nat_tree_lock);
1778 goto retry; 1848 goto retry;
1779 } 1849 }
1780 node_info_from_raw_nat(&ne->ni, &raw_ne); 1850 node_info_from_raw_nat(&ne->ni, &raw_ne);
1851found:
1781 __set_nat_cache_dirty(nm_i, ne); 1852 __set_nat_cache_dirty(nm_i, ne);
1782 write_unlock(&nm_i->nat_tree_lock); 1853 write_unlock(&nm_i->nat_tree_lock);
1783 } 1854 }
1784 update_nats_in_cursum(sum, -i); 1855 update_nats_in_cursum(sum, -i);
1785 mutex_unlock(&curseg->curseg_mutex); 1856 mutex_unlock(&curseg->curseg_mutex);
1786 return true;
1787} 1857}
1788 1858
1789/* 1859/*
@@ -1794,80 +1864,91 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1794 struct f2fs_nm_info *nm_i = NM_I(sbi); 1864 struct f2fs_nm_info *nm_i = NM_I(sbi);
1795 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1865 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1796 struct f2fs_summary_block *sum = curseg->sum_blk; 1866 struct f2fs_summary_block *sum = curseg->sum_blk;
1797 struct nat_entry *ne, *cur; 1867 struct nat_entry_set *nes, *tmp;
1798 struct page *page = NULL; 1868 struct list_head *head = &nm_i->nat_entry_set;
1799 struct f2fs_nat_block *nat_blk = NULL; 1869 bool to_journal = true;
1800 nid_t start_nid = 0, end_nid = 0;
1801 bool flushed;
1802 1870
1803 flushed = flush_nats_in_journal(sbi); 1871 /* merge nat entries of dirty list to nat entry set temporarily */
1804 1872 merge_nats_in_set(sbi);
1805 if (!flushed)
1806 mutex_lock(&curseg->curseg_mutex);
1807
1808 /* 1) flush dirty nat caches */
1809 list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) {
1810 nid_t nid;
1811 struct f2fs_nat_entry raw_ne;
1812 int offset = -1;
1813
1814 if (nat_get_blkaddr(ne) == NEW_ADDR)
1815 continue;
1816 1873
1817 nid = nat_get_nid(ne); 1874 /*
1875 * if there are no enough space in journal to store dirty nat
1876 * entries, remove all entries from journal and merge them
1877 * into nat entry set.
1878 */
1879 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt)) {
1880 remove_nats_in_journal(sbi);
1818 1881
1819 if (flushed) 1882 /*
1820 goto to_nat_page; 1883 * merge nat entries of dirty list to nat entry set temporarily
1884 */
1885 merge_nats_in_set(sbi);
1886 }
1821 1887
1822 /* if there is room for nat enries in curseg->sumpage */ 1888 if (!nm_i->dirty_nat_cnt)
1823 offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1); 1889 return;
1824 if (offset >= 0) {
1825 raw_ne = nat_in_journal(sum, offset);
1826 goto flush_now;
1827 }
1828to_nat_page:
1829 if (!page || (start_nid > nid || nid > end_nid)) {
1830 if (page) {
1831 f2fs_put_page(page, 1);
1832 page = NULL;
1833 }
1834 start_nid = START_NID(nid);
1835 end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1;
1836 1890
1837 /* 1891 /*
1838 * get nat block with dirty flag, increased reference 1892 * there are two steps to flush nat entries:
1839 * count, mapped and lock 1893 * #1, flush nat entries to journal in current hot data summary block.
1840 */ 1894 * #2, flush nat entries to nat page.
1895 */
1896 list_for_each_entry_safe(nes, tmp, head, set_list) {
1897 struct f2fs_nat_block *nat_blk;
1898 struct nat_entry *ne, *cur;
1899 struct page *page;
1900 nid_t start_nid = nes->start_nid;
1901
1902 if (to_journal && !__has_cursum_space(sum, nes->entry_cnt))
1903 to_journal = false;
1904
1905 if (to_journal) {
1906 mutex_lock(&curseg->curseg_mutex);
1907 } else {
1841 page = get_next_nat_page(sbi, start_nid); 1908 page = get_next_nat_page(sbi, start_nid);
1842 nat_blk = page_address(page); 1909 nat_blk = page_address(page);
1910 f2fs_bug_on(!nat_blk);
1843 } 1911 }
1844 1912
1845 f2fs_bug_on(!nat_blk); 1913 /* flush dirty nats in nat entry set */
1846 raw_ne = nat_blk->entries[nid - start_nid]; 1914 list_for_each_entry_safe(ne, cur, &nes->entry_list, list) {
1847flush_now: 1915 struct f2fs_nat_entry *raw_ne;
1848 raw_nat_from_node_info(&raw_ne, &ne->ni); 1916 nid_t nid = nat_get_nid(ne);
1849 1917 int offset;
1850 if (offset < 0) { 1918
1851 nat_blk->entries[nid - start_nid] = raw_ne; 1919 if (to_journal) {
1852 } else { 1920 offset = lookup_journal_in_cursum(sum,
1853 nat_in_journal(sum, offset) = raw_ne; 1921 NAT_JOURNAL, nid, 1);
1854 nid_in_journal(sum, offset) = cpu_to_le32(nid); 1922 f2fs_bug_on(offset < 0);
1855 } 1923 raw_ne = &nat_in_journal(sum, offset);
1924 nid_in_journal(sum, offset) = cpu_to_le32(nid);
1925 } else {
1926 raw_ne = &nat_blk->entries[nid - start_nid];
1927 }
1928 raw_nat_from_node_info(raw_ne, &ne->ni);
1856 1929
1857 if (nat_get_blkaddr(ne) == NULL_ADDR && 1930 if (nat_get_blkaddr(ne) == NULL_ADDR &&
1858 add_free_nid(sbi, nid, false) <= 0) { 1931 add_free_nid(sbi, nid, false) <= 0) {
1859 write_lock(&nm_i->nat_tree_lock); 1932 write_lock(&nm_i->nat_tree_lock);
1860 __del_from_nat_cache(nm_i, ne); 1933 __del_from_nat_cache(nm_i, ne);
1861 write_unlock(&nm_i->nat_tree_lock); 1934 write_unlock(&nm_i->nat_tree_lock);
1862 } else { 1935 } else {
1863 write_lock(&nm_i->nat_tree_lock); 1936 write_lock(&nm_i->nat_tree_lock);
1864 __clear_nat_cache_dirty(nm_i, ne); 1937 __clear_nat_cache_dirty(nm_i, ne);
1865 write_unlock(&nm_i->nat_tree_lock); 1938 write_unlock(&nm_i->nat_tree_lock);
1939 }
1866 } 1940 }
1941
1942 if (to_journal)
1943 mutex_unlock(&curseg->curseg_mutex);
1944 else
1945 f2fs_put_page(page, 1);
1946
1947 release_nat_entry_set(nes, nm_i);
1867 } 1948 }
1868 if (!flushed) 1949
1869 mutex_unlock(&curseg->curseg_mutex); 1950 f2fs_bug_on(!list_empty(head));
1870 f2fs_put_page(page, 1); 1951 f2fs_bug_on(nm_i->dirty_nat_cnt);
1871} 1952}
1872 1953
1873static int init_node_manager(struct f2fs_sb_info *sbi) 1954static int init_node_manager(struct f2fs_sb_info *sbi)
@@ -1896,6 +1977,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
1896 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); 1977 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
1897 INIT_LIST_HEAD(&nm_i->nat_entries); 1978 INIT_LIST_HEAD(&nm_i->nat_entries);
1898 INIT_LIST_HEAD(&nm_i->dirty_nat_entries); 1979 INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
1980 INIT_LIST_HEAD(&nm_i->nat_entry_set);
1899 1981
1900 mutex_init(&nm_i->build_lock); 1982 mutex_init(&nm_i->build_lock);
1901 spin_lock_init(&nm_i->free_nid_list_lock); 1983 spin_lock_init(&nm_i->free_nid_list_lock);
@@ -1976,19 +2058,30 @@ int __init create_node_manager_caches(void)
1976 nat_entry_slab = f2fs_kmem_cache_create("nat_entry", 2058 nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
1977 sizeof(struct nat_entry)); 2059 sizeof(struct nat_entry));
1978 if (!nat_entry_slab) 2060 if (!nat_entry_slab)
1979 return -ENOMEM; 2061 goto fail;
1980 2062
1981 free_nid_slab = f2fs_kmem_cache_create("free_nid", 2063 free_nid_slab = f2fs_kmem_cache_create("free_nid",
1982 sizeof(struct free_nid)); 2064 sizeof(struct free_nid));
1983 if (!free_nid_slab) { 2065 if (!free_nid_slab)
1984 kmem_cache_destroy(nat_entry_slab); 2066 goto destory_nat_entry;
1985 return -ENOMEM; 2067
1986 } 2068 nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
2069 sizeof(struct nat_entry_set));
2070 if (!nat_entry_set_slab)
2071 goto destory_free_nid;
1987 return 0; 2072 return 0;
2073
2074destory_free_nid:
2075 kmem_cache_destroy(free_nid_slab);
2076destory_nat_entry:
2077 kmem_cache_destroy(nat_entry_slab);
2078fail:
2079 return -ENOMEM;
1988} 2080}
1989 2081
1990void destroy_node_manager_caches(void) 2082void destroy_node_manager_caches(void)
1991{ 2083{
2084 kmem_cache_destroy(nat_entry_set_slab);
1992 kmem_cache_destroy(free_nid_slab); 2085 kmem_cache_destroy(free_nid_slab);
1993 kmem_cache_destroy(nat_entry_slab); 2086 kmem_cache_destroy(nat_entry_slab);
1994} 2087}
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 7281112cd1c8..8a116a407599 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -89,6 +89,13 @@ enum mem_type {
89 DIRTY_DENTS /* indicates dirty dentry pages */ 89 DIRTY_DENTS /* indicates dirty dentry pages */
90}; 90};
91 91
92struct nat_entry_set {
93 struct list_head set_list; /* link with all nat sets */
94 struct list_head entry_list; /* link with dirty nat entries */
95 nid_t start_nid; /* start nid of nats in set */
96 unsigned int entry_cnt; /* the # of nat entries in set */
97};
98
92/* 99/*
93 * For free nid mangement 100 * For free nid mangement
94 */ 101 */
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index a112368a4a86..fe1c6d921ba2 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -300,6 +300,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
300 struct node_info ni; 300 struct node_info ni;
301 int err = 0, recovered = 0; 301 int err = 0, recovered = 0;
302 302
303 recover_inline_xattr(inode, page);
304
303 if (recover_inline_data(inode, page)) 305 if (recover_inline_data(inode, page))
304 goto out; 306 goto out;
305 307
@@ -434,7 +436,9 @@ next:
434 436
435int recover_fsync_data(struct f2fs_sb_info *sbi) 437int recover_fsync_data(struct f2fs_sb_info *sbi)
436{ 438{
439 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
437 struct list_head inode_list; 440 struct list_head inode_list;
441 block_t blkaddr;
438 int err; 442 int err;
439 bool need_writecp = false; 443 bool need_writecp = false;
440 444
@@ -447,6 +451,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
447 451
448 /* step #1: find fsynced inode numbers */ 452 /* step #1: find fsynced inode numbers */
449 sbi->por_doing = true; 453 sbi->por_doing = true;
454
455 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
456
450 err = find_fsync_dnodes(sbi, &inode_list); 457 err = find_fsync_dnodes(sbi, &inode_list);
451 if (err) 458 if (err)
452 goto out; 459 goto out;
@@ -462,8 +469,21 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
462out: 469out:
463 destroy_fsync_dnodes(&inode_list); 470 destroy_fsync_dnodes(&inode_list);
464 kmem_cache_destroy(fsync_entry_slab); 471 kmem_cache_destroy(fsync_entry_slab);
472
473 if (err) {
474 truncate_inode_pages_final(NODE_MAPPING(sbi));
475 truncate_inode_pages_final(META_MAPPING(sbi));
476 }
477
465 sbi->por_doing = false; 478 sbi->por_doing = false;
466 if (!err && need_writecp) 479 if (err) {
480 discard_next_dnode(sbi, blkaddr);
481
482 /* Flush all the NAT/SIT pages */
483 while (get_pages(sbi, F2FS_DIRTY_META))
484 sync_meta_pages(sbi, META, LONG_MAX);
485 } else if (need_writecp) {
467 write_checkpoint(sbi, false); 486 write_checkpoint(sbi, false);
487 }
468 return err; 488 return err;
469} 489}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index d04613df710a..0dfeebae2a50 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -239,6 +239,12 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
239 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; 239 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
240 struct flush_cmd cmd; 240 struct flush_cmd cmd;
241 241
242 trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
243 test_opt(sbi, FLUSH_MERGE));
244
245 if (test_opt(sbi, NOBARRIER))
246 return 0;
247
242 if (!test_opt(sbi, FLUSH_MERGE)) 248 if (!test_opt(sbi, FLUSH_MERGE))
243 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); 249 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
244 250
@@ -272,13 +278,13 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
272 return -ENOMEM; 278 return -ENOMEM;
273 spin_lock_init(&fcc->issue_lock); 279 spin_lock_init(&fcc->issue_lock);
274 init_waitqueue_head(&fcc->flush_wait_queue); 280 init_waitqueue_head(&fcc->flush_wait_queue);
275 sbi->sm_info->cmd_control_info = fcc; 281 SM_I(sbi)->cmd_control_info = fcc;
276 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, 282 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
277 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); 283 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
278 if (IS_ERR(fcc->f2fs_issue_flush)) { 284 if (IS_ERR(fcc->f2fs_issue_flush)) {
279 err = PTR_ERR(fcc->f2fs_issue_flush); 285 err = PTR_ERR(fcc->f2fs_issue_flush);
280 kfree(fcc); 286 kfree(fcc);
281 sbi->sm_info->cmd_control_info = NULL; 287 SM_I(sbi)->cmd_control_info = NULL;
282 return err; 288 return err;
283 } 289 }
284 290
@@ -287,13 +293,12 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi)
287 293
288void destroy_flush_cmd_control(struct f2fs_sb_info *sbi) 294void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
289{ 295{
290 struct flush_cmd_control *fcc = 296 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
291 sbi->sm_info->cmd_control_info;
292 297
293 if (fcc && fcc->f2fs_issue_flush) 298 if (fcc && fcc->f2fs_issue_flush)
294 kthread_stop(fcc->f2fs_issue_flush); 299 kthread_stop(fcc->f2fs_issue_flush);
295 kfree(fcc); 300 kfree(fcc);
296 sbi->sm_info->cmd_control_info = NULL; 301 SM_I(sbi)->cmd_control_info = NULL;
297} 302}
298 303
299static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, 304static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
@@ -377,11 +382,8 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
377 return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); 382 return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
378} 383}
379 384
380void discard_next_dnode(struct f2fs_sb_info *sbi) 385void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
381{ 386{
382 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
383 block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
384
385 if (f2fs_issue_discard(sbi, blkaddr, 1)) { 387 if (f2fs_issue_discard(sbi, blkaddr, 1)) {
386 struct page *page = grab_meta_page(sbi, blkaddr); 388 struct page *page = grab_meta_page(sbi, blkaddr);
387 /* zero-filled page */ 389 /* zero-filled page */
@@ -437,17 +439,12 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi,
437static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) 439static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
438{ 440{
439 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); 441 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
440 unsigned int segno = -1; 442 unsigned int segno;
441 unsigned int total_segs = TOTAL_SEGS(sbi); 443 unsigned int total_segs = TOTAL_SEGS(sbi);
442 444
443 mutex_lock(&dirty_i->seglist_lock); 445 mutex_lock(&dirty_i->seglist_lock);
444 while (1) { 446 for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], total_segs)
445 segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
446 segno + 1);
447 if (segno >= total_segs)
448 break;
449 __set_test_and_free(sbi, segno); 447 __set_test_and_free(sbi, segno);
450 }
451 mutex_unlock(&dirty_i->seglist_lock); 448 mutex_unlock(&dirty_i->seglist_lock);
452} 449}
453 450
@@ -974,14 +971,12 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
974{ 971{
975 struct sit_info *sit_i = SIT_I(sbi); 972 struct sit_info *sit_i = SIT_I(sbi);
976 struct curseg_info *curseg; 973 struct curseg_info *curseg;
977 unsigned int old_cursegno;
978 974
979 curseg = CURSEG_I(sbi, type); 975 curseg = CURSEG_I(sbi, type);
980 976
981 mutex_lock(&curseg->curseg_mutex); 977 mutex_lock(&curseg->curseg_mutex);
982 978
983 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 979 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
984 old_cursegno = curseg->segno;
985 980
986 /* 981 /*
987 * __add_sum_entry should be resided under the curseg_mutex 982 * __add_sum_entry should be resided under the curseg_mutex
@@ -1002,7 +997,6 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1002 * since SSR needs latest valid block information. 997 * since SSR needs latest valid block information.
1003 */ 998 */
1004 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); 999 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
1005 locate_dirty_segment(sbi, old_cursegno);
1006 1000
1007 mutex_unlock(&sit_i->sentry_lock); 1001 mutex_unlock(&sit_i->sentry_lock);
1008 1002
@@ -1532,7 +1526,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
1532 struct page *page = NULL; 1526 struct page *page = NULL;
1533 struct f2fs_sit_block *raw_sit = NULL; 1527 struct f2fs_sit_block *raw_sit = NULL;
1534 unsigned int start = 0, end = 0; 1528 unsigned int start = 0, end = 0;
1535 unsigned int segno = -1; 1529 unsigned int segno;
1536 bool flushed; 1530 bool flushed;
1537 1531
1538 mutex_lock(&curseg->curseg_mutex); 1532 mutex_lock(&curseg->curseg_mutex);
@@ -1544,7 +1538,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
1544 */ 1538 */
1545 flushed = flush_sits_in_journal(sbi); 1539 flushed = flush_sits_in_journal(sbi);
1546 1540
1547 while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) { 1541 for_each_set_bit(segno, bitmap, nsegs) {
1548 struct seg_entry *se = get_seg_entry(sbi, segno); 1542 struct seg_entry *se = get_seg_entry(sbi, segno);
1549 int sit_offset, offset; 1543 int sit_offset, offset;
1550 1544
@@ -1703,7 +1697,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
1703 struct curseg_info *array; 1697 struct curseg_info *array;
1704 int i; 1698 int i;
1705 1699
1706 array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL); 1700 array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
1707 if (!array) 1701 if (!array)
1708 return -ENOMEM; 1702 return -ENOMEM;
1709 1703
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 7091204680f4..55973f7b0330 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -347,8 +347,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
347 if (test_and_clear_bit(segno, free_i->free_segmap)) { 347 if (test_and_clear_bit(segno, free_i->free_segmap)) {
348 free_i->free_segments++; 348 free_i->free_segments++;
349 349
350 next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), 350 next = find_next_bit(free_i->free_segmap,
351 start_segno); 351 start_segno + sbi->segs_per_sec, start_segno);
352 if (next >= start_segno + sbi->segs_per_sec) { 352 if (next >= start_segno + sbi->segs_per_sec) {
353 if (test_and_clear_bit(secno, free_i->free_secmap)) 353 if (test_and_clear_bit(secno, free_i->free_secmap))
354 free_i->free_sections++; 354 free_i->free_sections++;
@@ -486,6 +486,10 @@ static inline bool need_inplace_update(struct inode *inode)
486 if (S_ISDIR(inode->i_mode)) 486 if (S_ISDIR(inode->i_mode))
487 return false; 487 return false;
488 488
489 /* this is only set during fdatasync */
490 if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
491 return true;
492
489 switch (SM_I(sbi)->ipu_policy) { 493 switch (SM_I(sbi)->ipu_policy) {
490 case F2FS_IPU_FORCE: 494 case F2FS_IPU_FORCE:
491 return true; 495 return true;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8f96d9372ade..657582fc7601 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -52,6 +52,7 @@ enum {
52 Opt_inline_xattr, 52 Opt_inline_xattr,
53 Opt_inline_data, 53 Opt_inline_data,
54 Opt_flush_merge, 54 Opt_flush_merge,
55 Opt_nobarrier,
55 Opt_err, 56 Opt_err,
56}; 57};
57 58
@@ -69,6 +70,7 @@ static match_table_t f2fs_tokens = {
69 {Opt_inline_xattr, "inline_xattr"}, 70 {Opt_inline_xattr, "inline_xattr"},
70 {Opt_inline_data, "inline_data"}, 71 {Opt_inline_data, "inline_data"},
71 {Opt_flush_merge, "flush_merge"}, 72 {Opt_flush_merge, "flush_merge"},
73 {Opt_nobarrier, "nobarrier"},
72 {Opt_err, NULL}, 74 {Opt_err, NULL},
73}; 75};
74 76
@@ -339,6 +341,9 @@ static int parse_options(struct super_block *sb, char *options)
339 case Opt_flush_merge: 341 case Opt_flush_merge:
340 set_opt(sbi, FLUSH_MERGE); 342 set_opt(sbi, FLUSH_MERGE);
341 break; 343 break;
344 case Opt_nobarrier:
345 set_opt(sbi, NOBARRIER);
346 break;
342 default: 347 default:
343 f2fs_msg(sb, KERN_ERR, 348 f2fs_msg(sb, KERN_ERR,
344 "Unrecognized mount option \"%s\" or missing value", 349 "Unrecognized mount option \"%s\" or missing value",
@@ -544,6 +549,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
544 seq_puts(seq, ",inline_data"); 549 seq_puts(seq, ",inline_data");
545 if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) 550 if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
546 seq_puts(seq, ",flush_merge"); 551 seq_puts(seq, ",flush_merge");
552 if (test_opt(sbi, NOBARRIER))
553 seq_puts(seq, ",nobarrier");
547 seq_printf(seq, ",active_logs=%u", sbi->active_logs); 554 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
548 555
549 return 0; 556 return 0;
@@ -615,7 +622,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
615 * Previous and new state of filesystem is RO, 622 * Previous and new state of filesystem is RO,
616 * so skip checking GC and FLUSH_MERGE conditions. 623 * so skip checking GC and FLUSH_MERGE conditions.
617 */ 624 */
618 if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) 625 if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
619 goto skip; 626 goto skip;
620 627
621 /* 628 /*
@@ -642,8 +649,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
642 */ 649 */
643 if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { 650 if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
644 destroy_flush_cmd_control(sbi); 651 destroy_flush_cmd_control(sbi);
645 } else if (test_opt(sbi, FLUSH_MERGE) && 652 } else if (test_opt(sbi, FLUSH_MERGE) && !SM_I(sbi)->cmd_control_info) {
646 !sbi->sm_info->cmd_control_info) {
647 err = create_flush_cmd_control(sbi); 653 err = create_flush_cmd_control(sbi);
648 if (err) 654 if (err)
649 goto restore_gc; 655 goto restore_gc;
@@ -947,7 +953,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
947 mutex_init(&sbi->gc_mutex); 953 mutex_init(&sbi->gc_mutex);
948 mutex_init(&sbi->writepages); 954 mutex_init(&sbi->writepages);
949 mutex_init(&sbi->cp_mutex); 955 mutex_init(&sbi->cp_mutex);
950 mutex_init(&sbi->node_write); 956 init_rwsem(&sbi->node_write);
951 sbi->por_doing = false; 957 sbi->por_doing = false;
952 spin_lock_init(&sbi->stat_lock); 958 spin_lock_init(&sbi->stat_lock);
953 959
@@ -997,7 +1003,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
997 INIT_LIST_HEAD(&sbi->dir_inode_list); 1003 INIT_LIST_HEAD(&sbi->dir_inode_list);
998 spin_lock_init(&sbi->dir_inode_lock); 1004 spin_lock_init(&sbi->dir_inode_lock);
999 1005
1000 init_orphan_info(sbi); 1006 init_ino_entry_info(sbi);
1001 1007
1002 /* setup f2fs internal modules */ 1008 /* setup f2fs internal modules */
1003 err = build_segment_manager(sbi); 1009 err = build_segment_manager(sbi);
@@ -1034,8 +1040,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
1034 goto free_node_inode; 1040 goto free_node_inode;
1035 } 1041 }
1036 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 1042 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
1043 iput(root);
1037 err = -EINVAL; 1044 err = -EINVAL;
1038 goto free_root_inode; 1045 goto free_node_inode;
1039 } 1046 }
1040 1047
1041 sb->s_root = d_make_root(root); /* allocate root dentry */ 1048 sb->s_root = d_make_root(root); /* allocate root dentry */
@@ -1082,7 +1089,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
1082 * If filesystem is not mounted as read-only then 1089 * If filesystem is not mounted as read-only then
1083 * do start the gc_thread. 1090 * do start the gc_thread.
1084 */ 1091 */
1085 if (!(sb->s_flags & MS_RDONLY)) { 1092 if (!f2fs_readonly(sb)) {
1086 /* After POR, we can run background GC thread.*/ 1093 /* After POR, we can run background GC thread.*/
1087 err = start_gc_thread(sbi); 1094 err = start_gc_thread(sbi);
1088 if (err) 1095 if (err)
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index b983990b4a9f..d06d44363fea 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -587,6 +587,69 @@ TRACE_EVENT(f2fs_fallocate,
587 __entry->ret) 587 __entry->ret)
588); 588);
589 589
590TRACE_EVENT(f2fs_direct_IO_enter,
591
592 TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw),
593
594 TP_ARGS(inode, offset, len, rw),
595
596 TP_STRUCT__entry(
597 __field(dev_t, dev)
598 __field(ino_t, ino)
599 __field(loff_t, pos)
600 __field(unsigned long, len)
601 __field(int, rw)
602 ),
603
604 TP_fast_assign(
605 __entry->dev = inode->i_sb->s_dev;
606 __entry->ino = inode->i_ino;
607 __entry->pos = offset;
608 __entry->len = len;
609 __entry->rw = rw;
610 ),
611
612 TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu rw = %d",
613 show_dev_ino(__entry),
614 __entry->pos,
615 __entry->len,
616 __entry->rw)
617);
618
619TRACE_EVENT(f2fs_direct_IO_exit,
620
621 TP_PROTO(struct inode *inode, loff_t offset, unsigned long len,
622 int rw, int ret),
623
624 TP_ARGS(inode, offset, len, rw, ret),
625
626 TP_STRUCT__entry(
627 __field(dev_t, dev)
628 __field(ino_t, ino)
629 __field(loff_t, pos)
630 __field(unsigned long, len)
631 __field(int, rw)
632 __field(int, ret)
633 ),
634
635 TP_fast_assign(
636 __entry->dev = inode->i_sb->s_dev;
637 __entry->ino = inode->i_ino;
638 __entry->pos = offset;
639 __entry->len = len;
640 __entry->rw = rw;
641 __entry->ret = ret;
642 ),
643
644 TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu "
645 "rw = %d ret = %d",
646 show_dev_ino(__entry),
647 __entry->pos,
648 __entry->len,
649 __entry->rw,
650 __entry->ret)
651);
652
590TRACE_EVENT(f2fs_reserve_new_block, 653TRACE_EVENT(f2fs_reserve_new_block,
591 654
592 TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node), 655 TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node),
@@ -926,6 +989,30 @@ TRACE_EVENT(f2fs_issue_discard,
926 (unsigned long long)__entry->blkstart, 989 (unsigned long long)__entry->blkstart,
927 (unsigned long long)__entry->blklen) 990 (unsigned long long)__entry->blklen)
928); 991);
992
993TRACE_EVENT(f2fs_issue_flush,
994
995 TP_PROTO(struct super_block *sb, bool nobarrier, bool flush_merge),
996
997 TP_ARGS(sb, nobarrier, flush_merge),
998
999 TP_STRUCT__entry(
1000 __field(dev_t, dev)
1001 __field(bool, nobarrier)
1002 __field(bool, flush_merge)
1003 ),
1004
1005 TP_fast_assign(
1006 __entry->dev = sb->s_dev;
1007 __entry->nobarrier = nobarrier;
1008 __entry->flush_merge = flush_merge;
1009 ),
1010
1011 TP_printk("dev = (%d,%d), %s %s",
1012 show_dev(__entry),
1013 __entry->nobarrier ? "skip (nobarrier)" : "issue",
1014 __entry->flush_merge ? " with flush_merge" : "")
1015);
929#endif /* _TRACE_F2FS_H */ 1016#endif /* _TRACE_F2FS_H */
930 1017
931 /* This part must be outside protection */ 1018 /* This part must be outside protection */