aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-12 22:28:50 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-12 22:28:50 -0500
commitc7d7b98671552abade78834c522b7308bda73c0d (patch)
tree062dab589e90a8006d2be5d55f8f95f4f16be93b /fs
parent818099574b04c5301eacbbcd441022b353a65466 (diff)
parent1a118ccfd60fc78e64c0a3ab9e85075545839d6e (diff)
Merge tag 'for-f2fs-3.20' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "Major changes are to: - add f2fs_io_tracer and F2FS_IOC_GETVERSION - fix wrong acl assignment from parent - fix accessing wrong data blocks - fix wrong condition check for f2fs_sync_fs - align start block address for direct_io - add and refactor the readahead flows of FS metadata - refactor atomic and volatile write policies But most of patches are for clean-ups and minor bug fixes. Some of them refactor old code too" * tag 'for-f2fs-3.20' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (64 commits) f2fs: use spinlock for segmap_lock instead of rwlock f2fs: fix accessing wrong indexed data blocks f2fs: avoid variable length array f2fs: fix sparse warnings f2fs: allocate data blocks in advance for f2fs_direct_IO f2fs: introduce macros to convert bytes and blocks in f2fs f2fs: call set_buffer_new for get_block f2fs: check node page contents all the time f2fs: avoid data offset overflow when lseeking huge file f2fs: fix to use highmem for pages of newly created directory f2fs: introduce a batched trim f2fs: merge {invalidate,release}page for meta/node/data pages f2fs: show the number of writeback pages in stat f2fs: keep PagePrivate during releasepage f2fs: should fail mount when trying to recover data on read-only dev f2fs: split UMOUNT and FASTBOOT flags f2fs: avoid write_checkpoint if f2fs is mounted readonly f2fs: support norecovery mount option f2fs: fix not to drop mount options when retrying fill_super f2fs: merge flags in struct f2fs_sb_info ...
Diffstat (limited to 'fs')
-rw-r--r--fs/f2fs/Kconfig10
-rw-r--r--fs/f2fs/Makefile1
-rw-r--r--fs/f2fs/acl.c6
-rw-r--r--fs/f2fs/checkpoint.c95
-rw-r--r--fs/f2fs/data.c218
-rw-r--r--fs/f2fs/debug.c59
-rw-r--r--fs/f2fs/dir.c3
-rw-r--r--fs/f2fs/f2fs.h120
-rw-r--r--fs/f2fs/file.c100
-rw-r--r--fs/f2fs/gc.c38
-rw-r--r--fs/f2fs/gc.h33
-rw-r--r--fs/f2fs/inline.c32
-rw-r--r--fs/f2fs/inode.c37
-rw-r--r--fs/f2fs/namei.c2
-rw-r--r--fs/f2fs/node.c154
-rw-r--r--fs/f2fs/node.h45
-rw-r--r--fs/f2fs/recovery.c11
-rw-r--r--fs/f2fs/segment.c194
-rw-r--r--fs/f2fs/segment.h29
-rw-r--r--fs/f2fs/super.c75
-rw-r--r--fs/f2fs/trace.c159
-rw-r--r--fs/f2fs/trace.h46
22 files changed, 986 insertions, 481 deletions
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 736a348509f7..94e2d2ffabe1 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -71,3 +71,13 @@ config F2FS_CHECK_FS
71 Enables BUG_ONs which check the filesystem consistency in runtime. 71 Enables BUG_ONs which check the filesystem consistency in runtime.
72 72
73 If you want to improve the performance, say N. 73 If you want to improve the performance, say N.
74
75config F2FS_IO_TRACE
76 bool "F2FS IO tracer"
77 depends on F2FS_FS
78 depends on FUNCTION_TRACER
79 help
80 F2FS IO trace is based on a function trace, which gathers process
81 information and block IO patterns in the filesystem level.
82
83 If unsure, say N.
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
index 2e35da12d292..d92397731db8 100644
--- a/fs/f2fs/Makefile
+++ b/fs/f2fs/Makefile
@@ -5,3 +5,4 @@ f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o
5f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o 5f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
6f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o 6f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
7f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o 7f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
8f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 1ccb26bc2a0b..742202779bd5 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -62,7 +62,7 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size)
62 if (count == 0) 62 if (count == 0)
63 return NULL; 63 return NULL;
64 64
65 acl = posix_acl_alloc(count, GFP_KERNEL); 65 acl = posix_acl_alloc(count, GFP_NOFS);
66 if (!acl) 66 if (!acl)
67 return ERR_PTR(-ENOMEM); 67 return ERR_PTR(-ENOMEM);
68 68
@@ -116,7 +116,7 @@ static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size)
116 int i; 116 int i;
117 117
118 f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count * 118 f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count *
119 sizeof(struct f2fs_acl_entry), GFP_KERNEL); 119 sizeof(struct f2fs_acl_entry), GFP_NOFS);
120 if (!f2fs_acl) 120 if (!f2fs_acl)
121 return ERR_PTR(-ENOMEM); 121 return ERR_PTR(-ENOMEM);
122 122
@@ -396,7 +396,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
396 posix_acl_release(default_acl); 396 posix_acl_release(default_acl);
397 } 397 }
398 if (acl) { 398 if (acl) {
399 if (error) 399 if (!error)
400 error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, 400 error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl,
401 ipage); 401 ipage);
402 posix_acl_release(acl); 402 posix_acl_release(acl);
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index e6c271fefaca..7f794b72b3b7 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -20,10 +20,11 @@
20#include "f2fs.h" 20#include "f2fs.h"
21#include "node.h" 21#include "node.h"
22#include "segment.h" 22#include "segment.h"
23#include "trace.h"
23#include <trace/events/f2fs.h> 24#include <trace/events/f2fs.h>
24 25
25static struct kmem_cache *ino_entry_slab; 26static struct kmem_cache *ino_entry_slab;
26static struct kmem_cache *inode_entry_slab; 27struct kmem_cache *inode_entry_slab;
27 28
28/* 29/*
29 * We guarantee no failure on the returned page. 30 * We guarantee no failure on the returned page.
@@ -50,6 +51,11 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
50{ 51{
51 struct address_space *mapping = META_MAPPING(sbi); 52 struct address_space *mapping = META_MAPPING(sbi);
52 struct page *page; 53 struct page *page;
54 struct f2fs_io_info fio = {
55 .type = META,
56 .rw = READ_SYNC | REQ_META | REQ_PRIO,
57 .blk_addr = index,
58 };
53repeat: 59repeat:
54 page = grab_cache_page(mapping, index); 60 page = grab_cache_page(mapping, index);
55 if (!page) { 61 if (!page) {
@@ -59,8 +65,7 @@ repeat:
59 if (PageUptodate(page)) 65 if (PageUptodate(page))
60 goto out; 66 goto out;
61 67
62 if (f2fs_submit_page_bio(sbi, page, index, 68 if (f2fs_submit_page_bio(sbi, page, &fio))
63 READ_SYNC | REQ_META | REQ_PRIO))
64 goto repeat; 69 goto repeat;
65 70
66 lock_page(page); 71 lock_page(page);
@@ -112,14 +117,12 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
112 block_t prev_blk_addr = 0; 117 block_t prev_blk_addr = 0;
113 struct page *page; 118 struct page *page;
114 block_t blkno = start; 119 block_t blkno = start;
115
116 struct f2fs_io_info fio = { 120 struct f2fs_io_info fio = {
117 .type = META, 121 .type = META,
118 .rw = READ_SYNC | REQ_META | REQ_PRIO 122 .rw = READ_SYNC | REQ_META | REQ_PRIO
119 }; 123 };
120 124
121 for (; nrpages-- > 0; blkno++) { 125 for (; nrpages-- > 0; blkno++) {
122 block_t blk_addr;
123 126
124 if (!is_valid_blkaddr(sbi, blkno, type)) 127 if (!is_valid_blkaddr(sbi, blkno, type))
125 goto out; 128 goto out;
@@ -130,27 +133,27 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
130 NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid))) 133 NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid)))
131 blkno = 0; 134 blkno = 0;
132 /* get nat block addr */ 135 /* get nat block addr */
133 blk_addr = current_nat_addr(sbi, 136 fio.blk_addr = current_nat_addr(sbi,
134 blkno * NAT_ENTRY_PER_BLOCK); 137 blkno * NAT_ENTRY_PER_BLOCK);
135 break; 138 break;
136 case META_SIT: 139 case META_SIT:
137 /* get sit block addr */ 140 /* get sit block addr */
138 blk_addr = current_sit_addr(sbi, 141 fio.blk_addr = current_sit_addr(sbi,
139 blkno * SIT_ENTRY_PER_BLOCK); 142 blkno * SIT_ENTRY_PER_BLOCK);
140 if (blkno != start && prev_blk_addr + 1 != blk_addr) 143 if (blkno != start && prev_blk_addr + 1 != fio.blk_addr)
141 goto out; 144 goto out;
142 prev_blk_addr = blk_addr; 145 prev_blk_addr = fio.blk_addr;
143 break; 146 break;
144 case META_SSA: 147 case META_SSA:
145 case META_CP: 148 case META_CP:
146 case META_POR: 149 case META_POR:
147 blk_addr = blkno; 150 fio.blk_addr = blkno;
148 break; 151 break;
149 default: 152 default:
150 BUG(); 153 BUG();
151 } 154 }
152 155
153 page = grab_cache_page(META_MAPPING(sbi), blk_addr); 156 page = grab_cache_page(META_MAPPING(sbi), fio.blk_addr);
154 if (!page) 157 if (!page)
155 continue; 158 continue;
156 if (PageUptodate(page)) { 159 if (PageUptodate(page)) {
@@ -158,7 +161,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
158 continue; 161 continue;
159 } 162 }
160 163
161 f2fs_submit_page_mbio(sbi, page, blk_addr, &fio); 164 f2fs_submit_page_mbio(sbi, page, &fio);
162 f2fs_put_page(page, 0); 165 f2fs_put_page(page, 0);
163 } 166 }
164out: 167out:
@@ -187,7 +190,7 @@ static int f2fs_write_meta_page(struct page *page,
187 190
188 trace_f2fs_writepage(page, META); 191 trace_f2fs_writepage(page, META);
189 192
190 if (unlikely(sbi->por_doing)) 193 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
191 goto redirty_out; 194 goto redirty_out;
192 if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) 195 if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
193 goto redirty_out; 196 goto redirty_out;
@@ -299,6 +302,8 @@ static int f2fs_set_meta_page_dirty(struct page *page)
299 if (!PageDirty(page)) { 302 if (!PageDirty(page)) {
300 __set_page_dirty_nobuffers(page); 303 __set_page_dirty_nobuffers(page);
301 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); 304 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
305 SetPagePrivate(page);
306 f2fs_trace_pid(page);
302 return 1; 307 return 1;
303 } 308 }
304 return 0; 309 return 0;
@@ -308,6 +313,8 @@ const struct address_space_operations f2fs_meta_aops = {
308 .writepage = f2fs_write_meta_page, 313 .writepage = f2fs_write_meta_page,
309 .writepages = f2fs_write_meta_pages, 314 .writepages = f2fs_write_meta_pages,
310 .set_page_dirty = f2fs_set_meta_page_dirty, 315 .set_page_dirty = f2fs_set_meta_page_dirty,
316 .invalidatepage = f2fs_invalidate_page,
317 .releasepage = f2fs_release_page,
311}; 318};
312 319
313static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) 320static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
@@ -462,7 +469,7 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi)
462 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) 469 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
463 return; 470 return;
464 471
465 sbi->por_doing = true; 472 set_sbi_flag(sbi, SBI_POR_DOING);
466 473
467 start_blk = __start_cp_addr(sbi) + 1 + 474 start_blk = __start_cp_addr(sbi) + 1 +
468 le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); 475 le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
@@ -483,7 +490,7 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi)
483 } 490 }
484 /* clear Orphan Flag */ 491 /* clear Orphan Flag */
485 clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); 492 clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
486 sbi->por_doing = false; 493 clear_sbi_flag(sbi, SBI_POR_DOING);
487 return; 494 return;
488} 495}
489 496
@@ -567,7 +574,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
567 if (crc_offset >= blk_size) 574 if (crc_offset >= blk_size)
568 goto invalid_cp1; 575 goto invalid_cp1;
569 576
570 crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); 577 crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset)));
571 if (!f2fs_crc_valid(crc, cp_block, crc_offset)) 578 if (!f2fs_crc_valid(crc, cp_block, crc_offset))
572 goto invalid_cp1; 579 goto invalid_cp1;
573 580
@@ -582,7 +589,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
582 if (crc_offset >= blk_size) 589 if (crc_offset >= blk_size)
583 goto invalid_cp2; 590 goto invalid_cp2;
584 591
585 crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); 592 crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset)));
586 if (!f2fs_crc_valid(crc, cp_block, crc_offset)) 593 if (!f2fs_crc_valid(crc, cp_block, crc_offset))
587 goto invalid_cp2; 594 goto invalid_cp2;
588 595
@@ -669,7 +676,7 @@ fail_no_cp:
669 return -EINVAL; 676 return -EINVAL;
670} 677}
671 678
672static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) 679static int __add_dirty_inode(struct inode *inode, struct inode_entry *new)
673{ 680{
674 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 681 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
675 682
@@ -686,7 +693,7 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
686void update_dirty_page(struct inode *inode, struct page *page) 693void update_dirty_page(struct inode *inode, struct page *page)
687{ 694{
688 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 695 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
689 struct dir_inode_entry *new; 696 struct inode_entry *new;
690 int ret = 0; 697 int ret = 0;
691 698
692 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode)) 699 if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode))
@@ -710,12 +717,13 @@ void update_dirty_page(struct inode *inode, struct page *page)
710 kmem_cache_free(inode_entry_slab, new); 717 kmem_cache_free(inode_entry_slab, new);
711out: 718out:
712 SetPagePrivate(page); 719 SetPagePrivate(page);
720 f2fs_trace_pid(page);
713} 721}
714 722
715void add_dirty_dir_inode(struct inode *inode) 723void add_dirty_dir_inode(struct inode *inode)
716{ 724{
717 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 725 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
718 struct dir_inode_entry *new = 726 struct inode_entry *new =
719 f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); 727 f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
720 int ret = 0; 728 int ret = 0;
721 729
@@ -733,7 +741,7 @@ void add_dirty_dir_inode(struct inode *inode)
733void remove_dirty_dir_inode(struct inode *inode) 741void remove_dirty_dir_inode(struct inode *inode)
734{ 742{
735 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 743 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
736 struct dir_inode_entry *entry; 744 struct inode_entry *entry;
737 745
738 if (!S_ISDIR(inode->i_mode)) 746 if (!S_ISDIR(inode->i_mode))
739 return; 747 return;
@@ -763,7 +771,7 @@ void remove_dirty_dir_inode(struct inode *inode)
763void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) 771void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
764{ 772{
765 struct list_head *head; 773 struct list_head *head;
766 struct dir_inode_entry *entry; 774 struct inode_entry *entry;
767 struct inode *inode; 775 struct inode *inode;
768retry: 776retry:
769 if (unlikely(f2fs_cp_error(sbi))) 777 if (unlikely(f2fs_cp_error(sbi)))
@@ -776,7 +784,7 @@ retry:
776 spin_unlock(&sbi->dir_inode_lock); 784 spin_unlock(&sbi->dir_inode_lock);
777 return; 785 return;
778 } 786 }
779 entry = list_entry(head->next, struct dir_inode_entry, list); 787 entry = list_entry(head->next, struct inode_entry, list);
780 inode = igrab(entry->inode); 788 inode = igrab(entry->inode);
781 spin_unlock(&sbi->dir_inode_lock); 789 spin_unlock(&sbi->dir_inode_lock);
782 if (inode) { 790 if (inode) {
@@ -922,7 +930,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
922 ckpt->next_free_nid = cpu_to_le32(last_nid); 930 ckpt->next_free_nid = cpu_to_le32(last_nid);
923 931
924 /* 2 cp + n data seg summary + orphan inode blocks */ 932 /* 2 cp + n data seg summary + orphan inode blocks */
925 data_sum_blocks = npages_for_summary_flush(sbi); 933 data_sum_blocks = npages_for_summary_flush(sbi, false);
926 if (data_sum_blocks < NR_CURSEG_DATA_TYPE) 934 if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
927 set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); 935 set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
928 else 936 else
@@ -932,24 +940,31 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
932 ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + 940 ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
933 orphan_blocks); 941 orphan_blocks);
934 942
935 if (cpc->reason == CP_UMOUNT) { 943 if (__remain_node_summaries(cpc->reason))
936 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
937 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ 944 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
938 cp_payload_blks + data_sum_blocks + 945 cp_payload_blks + data_sum_blocks +
939 orphan_blocks + NR_CURSEG_NODE_TYPE); 946 orphan_blocks + NR_CURSEG_NODE_TYPE);
940 } else { 947 else
941 clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
942 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS + 948 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
943 cp_payload_blks + data_sum_blocks + 949 cp_payload_blks + data_sum_blocks +
944 orphan_blocks); 950 orphan_blocks);
945 } 951
952 if (cpc->reason == CP_UMOUNT)
953 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
954 else
955 clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
956
957 if (cpc->reason == CP_FASTBOOT)
958 set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
959 else
960 clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
946 961
947 if (orphan_num) 962 if (orphan_num)
948 set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); 963 set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
949 else 964 else
950 clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); 965 clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
951 966
952 if (sbi->need_fsck) 967 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
953 set_ckpt_flags(ckpt, CP_FSCK_FLAG); 968 set_ckpt_flags(ckpt, CP_FSCK_FLAG);
954 969
955 /* update SIT/NAT bitmap */ 970 /* update SIT/NAT bitmap */
@@ -966,15 +981,14 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
966 /* write out checkpoint buffer at block 0 */ 981 /* write out checkpoint buffer at block 0 */
967 cp_page = grab_meta_page(sbi, start_blk++); 982 cp_page = grab_meta_page(sbi, start_blk++);
968 kaddr = page_address(cp_page); 983 kaddr = page_address(cp_page);
969 memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); 984 memcpy(kaddr, ckpt, F2FS_BLKSIZE);
970 set_page_dirty(cp_page); 985 set_page_dirty(cp_page);
971 f2fs_put_page(cp_page, 1); 986 f2fs_put_page(cp_page, 1);
972 987
973 for (i = 1; i < 1 + cp_payload_blks; i++) { 988 for (i = 1; i < 1 + cp_payload_blks; i++) {
974 cp_page = grab_meta_page(sbi, start_blk++); 989 cp_page = grab_meta_page(sbi, start_blk++);
975 kaddr = page_address(cp_page); 990 kaddr = page_address(cp_page);
976 memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, 991 memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, F2FS_BLKSIZE);
977 (1 << sbi->log_blocksize));
978 set_page_dirty(cp_page); 992 set_page_dirty(cp_page);
979 f2fs_put_page(cp_page, 1); 993 f2fs_put_page(cp_page, 1);
980 } 994 }
@@ -986,7 +1000,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
986 1000
987 write_data_summaries(sbi, start_blk); 1001 write_data_summaries(sbi, start_blk);
988 start_blk += data_sum_blocks; 1002 start_blk += data_sum_blocks;
989 if (cpc->reason == CP_UMOUNT) { 1003 if (__remain_node_summaries(cpc->reason)) {
990 write_node_summaries(sbi, start_blk); 1004 write_node_summaries(sbi, start_blk);
991 start_blk += NR_CURSEG_NODE_TYPE; 1005 start_blk += NR_CURSEG_NODE_TYPE;
992 } 1006 }
@@ -994,7 +1008,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
994 /* writeout checkpoint block */ 1008 /* writeout checkpoint block */
995 cp_page = grab_meta_page(sbi, start_blk); 1009 cp_page = grab_meta_page(sbi, start_blk);
996 kaddr = page_address(cp_page); 1010 kaddr = page_address(cp_page);
997 memcpy(kaddr, ckpt, (1 << sbi->log_blocksize)); 1011 memcpy(kaddr, ckpt, F2FS_BLKSIZE);
998 set_page_dirty(cp_page); 1012 set_page_dirty(cp_page);
999 f2fs_put_page(cp_page, 1); 1013 f2fs_put_page(cp_page, 1);
1000 1014
@@ -1023,7 +1037,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1023 return; 1037 return;
1024 1038
1025 clear_prefree_segments(sbi); 1039 clear_prefree_segments(sbi);
1026 F2FS_RESET_SB_DIRT(sbi); 1040 clear_sbi_flag(sbi, SBI_IS_DIRTY);
1027} 1041}
1028 1042
1029/* 1043/*
@@ -1038,10 +1052,13 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1038 1052
1039 mutex_lock(&sbi->cp_mutex); 1053 mutex_lock(&sbi->cp_mutex);
1040 1054
1041 if (!sbi->s_dirty && cpc->reason != CP_DISCARD) 1055 if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
1056 cpc->reason != CP_DISCARD && cpc->reason != CP_UMOUNT)
1042 goto out; 1057 goto out;
1043 if (unlikely(f2fs_cp_error(sbi))) 1058 if (unlikely(f2fs_cp_error(sbi)))
1044 goto out; 1059 goto out;
1060 if (f2fs_readonly(sbi->sb))
1061 goto out;
1045 if (block_operations(sbi)) 1062 if (block_operations(sbi))
1046 goto out; 1063 goto out;
1047 1064
@@ -1102,8 +1119,8 @@ int __init create_checkpoint_caches(void)
1102 sizeof(struct ino_entry)); 1119 sizeof(struct ino_entry));
1103 if (!ino_entry_slab) 1120 if (!ino_entry_slab)
1104 return -ENOMEM; 1121 return -ENOMEM;
1105 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", 1122 inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry",
1106 sizeof(struct dir_inode_entry)); 1123 sizeof(struct inode_entry));
1107 if (!inode_entry_slab) { 1124 if (!inode_entry_slab) {
1108 kmem_cache_destroy(ino_entry_slab); 1125 kmem_cache_destroy(ino_entry_slab);
1109 return -ENOMEM; 1126 return -ENOMEM;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7ec697b37f19..985ed023a750 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -22,6 +22,7 @@
22#include "f2fs.h" 22#include "f2fs.h"
23#include "node.h" 23#include "node.h"
24#include "segment.h" 24#include "segment.h"
25#include "trace.h"
25#include <trace/events/f2fs.h> 26#include <trace/events/f2fs.h>
26 27
27static void f2fs_read_end_io(struct bio *bio, int err) 28static void f2fs_read_end_io(struct bio *bio, int err)
@@ -95,11 +96,9 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
95 return; 96 return;
96 97
97 if (is_read_io(fio->rw)) 98 if (is_read_io(fio->rw))
98 trace_f2fs_submit_read_bio(io->sbi->sb, fio->rw, 99 trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio);
99 fio->type, io->bio);
100 else 100 else
101 trace_f2fs_submit_write_bio(io->sbi->sb, fio->rw, 101 trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio);
102 fio->type, io->bio);
103 102
104 submit_bio(fio->rw, io->bio); 103 submit_bio(fio->rw, io->bio);
105 io->bio = NULL; 104 io->bio = NULL;
@@ -132,14 +131,15 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
132 * Return unlocked page. 131 * Return unlocked page.
133 */ 132 */
134int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, 133int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
135 block_t blk_addr, int rw) 134 struct f2fs_io_info *fio)
136{ 135{
137 struct bio *bio; 136 struct bio *bio;
138 137
139 trace_f2fs_submit_page_bio(page, blk_addr, rw); 138 trace_f2fs_submit_page_bio(page, fio);
139 f2fs_trace_ios(page, fio, 0);
140 140
141 /* Allocate a new bio */ 141 /* Allocate a new bio */
142 bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw)); 142 bio = __bio_alloc(sbi, fio->blk_addr, 1, is_read_io(fio->rw));
143 143
144 if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { 144 if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
145 bio_put(bio); 145 bio_put(bio);
@@ -147,12 +147,12 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
147 return -EFAULT; 147 return -EFAULT;
148 } 148 }
149 149
150 submit_bio(rw, bio); 150 submit_bio(fio->rw, bio);
151 return 0; 151 return 0;
152} 152}
153 153
154void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, 154void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
155 block_t blk_addr, struct f2fs_io_info *fio) 155 struct f2fs_io_info *fio)
156{ 156{
157 enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); 157 enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
158 struct f2fs_bio_info *io; 158 struct f2fs_bio_info *io;
@@ -160,21 +160,21 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
160 160
161 io = is_read ? &sbi->read_io : &sbi->write_io[btype]; 161 io = is_read ? &sbi->read_io : &sbi->write_io[btype];
162 162
163 verify_block_addr(sbi, blk_addr); 163 verify_block_addr(sbi, fio->blk_addr);
164 164
165 down_write(&io->io_rwsem); 165 down_write(&io->io_rwsem);
166 166
167 if (!is_read) 167 if (!is_read)
168 inc_page_count(sbi, F2FS_WRITEBACK); 168 inc_page_count(sbi, F2FS_WRITEBACK);
169 169
170 if (io->bio && (io->last_block_in_bio != blk_addr - 1 || 170 if (io->bio && (io->last_block_in_bio != fio->blk_addr - 1 ||
171 io->fio.rw != fio->rw)) 171 io->fio.rw != fio->rw))
172 __submit_merged_bio(io); 172 __submit_merged_bio(io);
173alloc_new: 173alloc_new:
174 if (io->bio == NULL) { 174 if (io->bio == NULL) {
175 int bio_blocks = MAX_BIO_BLOCKS(sbi); 175 int bio_blocks = MAX_BIO_BLOCKS(sbi);
176 176
177 io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read); 177 io->bio = __bio_alloc(sbi, fio->blk_addr, bio_blocks, is_read);
178 io->fio = *fio; 178 io->fio = *fio;
179 } 179 }
180 180
@@ -184,10 +184,11 @@ alloc_new:
184 goto alloc_new; 184 goto alloc_new;
185 } 185 }
186 186
187 io->last_block_in_bio = blk_addr; 187 io->last_block_in_bio = fio->blk_addr;
188 f2fs_trace_ios(page, fio, 0);
188 189
189 up_write(&io->io_rwsem); 190 up_write(&io->io_rwsem);
190 trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr); 191 trace_f2fs_submit_page_mbio(page, fio);
191} 192}
192 193
193/* 194/*
@@ -196,7 +197,7 @@ alloc_new:
196 * ->node_page 197 * ->node_page
197 * update block addresses in the node page 198 * update block addresses in the node page
198 */ 199 */
199static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) 200static void __set_data_blkaddr(struct dnode_of_data *dn)
200{ 201{
201 struct f2fs_node *rn; 202 struct f2fs_node *rn;
202 __le32 *addr_array; 203 __le32 *addr_array;
@@ -209,7 +210,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
209 210
210 /* Get physical address of data block */ 211 /* Get physical address of data block */
211 addr_array = blkaddr_in_node(rn); 212 addr_array = blkaddr_in_node(rn);
212 addr_array[ofs_in_node] = cpu_to_le32(new_addr); 213 addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
213 set_page_dirty(node_page); 214 set_page_dirty(node_page);
214} 215}
215 216
@@ -224,8 +225,8 @@ int reserve_new_block(struct dnode_of_data *dn)
224 225
225 trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); 226 trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
226 227
227 __set_data_blkaddr(dn, NEW_ADDR);
228 dn->data_blkaddr = NEW_ADDR; 228 dn->data_blkaddr = NEW_ADDR;
229 __set_data_blkaddr(dn);
229 mark_inode_dirty(dn->inode); 230 mark_inode_dirty(dn->inode);
230 sync_inode_page(dn); 231 sync_inode_page(dn);
231 return 0; 232 return 0;
@@ -273,7 +274,7 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
273 unsigned int blkbits = inode->i_sb->s_blocksize_bits; 274 unsigned int blkbits = inode->i_sb->s_blocksize_bits;
274 size_t count; 275 size_t count;
275 276
276 clear_buffer_new(bh_result); 277 set_buffer_new(bh_result);
277 map_bh(bh_result, inode->i_sb, 278 map_bh(bh_result, inode->i_sb,
278 start_blkaddr + pgofs - start_fofs); 279 start_blkaddr + pgofs - start_fofs);
279 count = end_fofs - pgofs + 1; 280 count = end_fofs - pgofs + 1;
@@ -290,23 +291,24 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
290 return 0; 291 return 0;
291} 292}
292 293
293void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) 294void update_extent_cache(struct dnode_of_data *dn)
294{ 295{
295 struct f2fs_inode_info *fi = F2FS_I(dn->inode); 296 struct f2fs_inode_info *fi = F2FS_I(dn->inode);
296 pgoff_t fofs, start_fofs, end_fofs; 297 pgoff_t fofs, start_fofs, end_fofs;
297 block_t start_blkaddr, end_blkaddr; 298 block_t start_blkaddr, end_blkaddr;
298 int need_update = true; 299 int need_update = true;
299 300
300 f2fs_bug_on(F2FS_I_SB(dn->inode), blk_addr == NEW_ADDR); 301 f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
301 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
302 dn->ofs_in_node;
303 302
304 /* Update the page address in the parent node */ 303 /* Update the page address in the parent node */
305 __set_data_blkaddr(dn, blk_addr); 304 __set_data_blkaddr(dn);
306 305
307 if (is_inode_flag_set(fi, FI_NO_EXTENT)) 306 if (is_inode_flag_set(fi, FI_NO_EXTENT))
308 return; 307 return;
309 308
309 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
310 dn->ofs_in_node;
311
310 write_lock(&fi->ext.ext_lock); 312 write_lock(&fi->ext.ext_lock);
311 313
312 start_fofs = fi->ext.fofs; 314 start_fofs = fi->ext.fofs;
@@ -320,16 +322,16 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
320 322
321 /* Initial extent */ 323 /* Initial extent */
322 if (fi->ext.len == 0) { 324 if (fi->ext.len == 0) {
323 if (blk_addr != NULL_ADDR) { 325 if (dn->data_blkaddr != NULL_ADDR) {
324 fi->ext.fofs = fofs; 326 fi->ext.fofs = fofs;
325 fi->ext.blk_addr = blk_addr; 327 fi->ext.blk_addr = dn->data_blkaddr;
326 fi->ext.len = 1; 328 fi->ext.len = 1;
327 } 329 }
328 goto end_update; 330 goto end_update;
329 } 331 }
330 332
331 /* Front merge */ 333 /* Front merge */
332 if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) { 334 if (fofs == start_fofs - 1 && dn->data_blkaddr == start_blkaddr - 1) {
333 fi->ext.fofs--; 335 fi->ext.fofs--;
334 fi->ext.blk_addr--; 336 fi->ext.blk_addr--;
335 fi->ext.len++; 337 fi->ext.len++;
@@ -337,7 +339,7 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
337 } 339 }
338 340
339 /* Back merge */ 341 /* Back merge */
340 if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) { 342 if (fofs == end_fofs + 1 && dn->data_blkaddr == end_blkaddr + 1) {
341 fi->ext.len++; 343 fi->ext.len++;
342 goto end_update; 344 goto end_update;
343 } 345 }
@@ -376,6 +378,10 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
376 struct dnode_of_data dn; 378 struct dnode_of_data dn;
377 struct page *page; 379 struct page *page;
378 int err; 380 int err;
381 struct f2fs_io_info fio = {
382 .type = DATA,
383 .rw = sync ? READ_SYNC : READA,
384 };
379 385
380 page = find_get_page(mapping, index); 386 page = find_get_page(mapping, index);
381 if (page && PageUptodate(page)) 387 if (page && PageUptodate(page))
@@ -404,8 +410,8 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
404 return page; 410 return page;
405 } 411 }
406 412
407 err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, dn.data_blkaddr, 413 fio.blk_addr = dn.data_blkaddr;
408 sync ? READ_SYNC : READA); 414 err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio);
409 if (err) 415 if (err)
410 return ERR_PTR(err); 416 return ERR_PTR(err);
411 417
@@ -430,7 +436,10 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
430 struct dnode_of_data dn; 436 struct dnode_of_data dn;
431 struct page *page; 437 struct page *page;
432 int err; 438 int err;
433 439 struct f2fs_io_info fio = {
440 .type = DATA,
441 .rw = READ_SYNC,
442 };
434repeat: 443repeat:
435 page = grab_cache_page(mapping, index); 444 page = grab_cache_page(mapping, index);
436 if (!page) 445 if (!page)
@@ -464,8 +473,8 @@ repeat:
464 return page; 473 return page;
465 } 474 }
466 475
467 err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, 476 fio.blk_addr = dn.data_blkaddr;
468 dn.data_blkaddr, READ_SYNC); 477 err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio);
469 if (err) 478 if (err)
470 return ERR_PTR(err); 479 return ERR_PTR(err);
471 480
@@ -515,8 +524,12 @@ repeat:
515 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 524 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
516 SetPageUptodate(page); 525 SetPageUptodate(page);
517 } else { 526 } else {
518 err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, 527 struct f2fs_io_info fio = {
519 dn.data_blkaddr, READ_SYNC); 528 .type = DATA,
529 .rw = READ_SYNC,
530 .blk_addr = dn.data_blkaddr,
531 };
532 err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio);
520 if (err) 533 if (err)
521 goto put_err; 534 goto put_err;
522 535
@@ -550,30 +563,25 @@ static int __allocate_data_block(struct dnode_of_data *dn)
550 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 563 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
551 struct f2fs_inode_info *fi = F2FS_I(dn->inode); 564 struct f2fs_inode_info *fi = F2FS_I(dn->inode);
552 struct f2fs_summary sum; 565 struct f2fs_summary sum;
553 block_t new_blkaddr;
554 struct node_info ni; 566 struct node_info ni;
567 int seg = CURSEG_WARM_DATA;
555 pgoff_t fofs; 568 pgoff_t fofs;
556 int type;
557 569
558 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 570 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
559 return -EPERM; 571 return -EPERM;
560 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) 572 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
561 return -ENOSPC; 573 return -ENOSPC;
562 574
563 __set_data_blkaddr(dn, NEW_ADDR);
564 dn->data_blkaddr = NEW_ADDR;
565
566 get_node_info(sbi, dn->nid, &ni); 575 get_node_info(sbi, dn->nid, &ni);
567 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 576 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
568 577
569 type = CURSEG_WARM_DATA; 578 if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
579 seg = CURSEG_DIRECT_IO;
570 580
571 allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type); 581 allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, seg);
572 582
573 /* direct IO doesn't use extent cache to maximize the performance */ 583 /* direct IO doesn't use extent cache to maximize the performance */
574 set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); 584 __set_data_blkaddr(dn);
575 update_extent_cache(new_blkaddr, dn);
576 clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
577 585
578 /* update i_size */ 586 /* update i_size */
579 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + 587 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
@@ -581,10 +589,59 @@ static int __allocate_data_block(struct dnode_of_data *dn)
581 if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT)) 589 if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT))
582 i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT)); 590 i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT));
583 591
584 dn->data_blkaddr = new_blkaddr;
585 return 0; 592 return 0;
586} 593}
587 594
595static void __allocate_data_blocks(struct inode *inode, loff_t offset,
596 size_t count)
597{
598 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
599 struct dnode_of_data dn;
600 u64 start = F2FS_BYTES_TO_BLK(offset);
601 u64 len = F2FS_BYTES_TO_BLK(count);
602 bool allocated;
603 u64 end_offset;
604
605 while (len) {
606 f2fs_balance_fs(sbi);
607 f2fs_lock_op(sbi);
608
609 /* When reading holes, we need its node page */
610 set_new_dnode(&dn, inode, NULL, NULL, 0);
611 if (get_dnode_of_data(&dn, start, ALLOC_NODE))
612 goto out;
613
614 allocated = false;
615 end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
616
617 while (dn.ofs_in_node < end_offset && len) {
618 if (dn.data_blkaddr == NULL_ADDR) {
619 if (__allocate_data_block(&dn))
620 goto sync_out;
621 allocated = true;
622 }
623 len--;
624 start++;
625 dn.ofs_in_node++;
626 }
627
628 if (allocated)
629 sync_inode_page(&dn);
630
631 f2fs_put_dnode(&dn);
632 f2fs_unlock_op(sbi);
633 }
634 return;
635
636sync_out:
637 if (allocated)
638 sync_inode_page(&dn);
639 f2fs_put_dnode(&dn);
640out:
641 f2fs_unlock_op(sbi);
642 return;
643}
644
588/* 645/*
589 * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh. 646 * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh.
590 * If original data blocks are allocated, then give them to blockdev. 647 * If original data blocks are allocated, then give them to blockdev.
@@ -610,10 +667,8 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
610 if (check_extent_cache(inode, pgofs, bh_result)) 667 if (check_extent_cache(inode, pgofs, bh_result))
611 goto out; 668 goto out;
612 669
613 if (create) { 670 if (create)
614 f2fs_balance_fs(F2FS_I_SB(inode));
615 f2fs_lock_op(F2FS_I_SB(inode)); 671 f2fs_lock_op(F2FS_I_SB(inode));
616 }
617 672
618 /* When reading holes, we need its node page */ 673 /* When reading holes, we need its node page */
619 set_new_dnode(&dn, inode, NULL, NULL, 0); 674 set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -627,12 +682,14 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
627 goto put_out; 682 goto put_out;
628 683
629 if (dn.data_blkaddr != NULL_ADDR) { 684 if (dn.data_blkaddr != NULL_ADDR) {
685 set_buffer_new(bh_result);
630 map_bh(bh_result, inode->i_sb, dn.data_blkaddr); 686 map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
631 } else if (create) { 687 } else if (create) {
632 err = __allocate_data_block(&dn); 688 err = __allocate_data_block(&dn);
633 if (err) 689 if (err)
634 goto put_out; 690 goto put_out;
635 allocated = true; 691 allocated = true;
692 set_buffer_new(bh_result);
636 map_bh(bh_result, inode->i_sb, dn.data_blkaddr); 693 map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
637 } else { 694 } else {
638 goto put_out; 695 goto put_out;
@@ -745,7 +802,6 @@ static int f2fs_read_data_pages(struct file *file,
745int do_write_data_page(struct page *page, struct f2fs_io_info *fio) 802int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
746{ 803{
747 struct inode *inode = page->mapping->host; 804 struct inode *inode = page->mapping->host;
748 block_t old_blkaddr, new_blkaddr;
749 struct dnode_of_data dn; 805 struct dnode_of_data dn;
750 int err = 0; 806 int err = 0;
751 807
@@ -754,10 +810,10 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
754 if (err) 810 if (err)
755 return err; 811 return err;
756 812
757 old_blkaddr = dn.data_blkaddr; 813 fio->blk_addr = dn.data_blkaddr;
758 814
759 /* This page is already truncated */ 815 /* This page is already truncated */
760 if (old_blkaddr == NULL_ADDR) 816 if (fio->blk_addr == NULL_ADDR)
761 goto out_writepage; 817 goto out_writepage;
762 818
763 set_page_writeback(page); 819 set_page_writeback(page);
@@ -766,14 +822,14 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
766 * If current allocation needs SSR, 822 * If current allocation needs SSR,
767 * it had better in-place writes for updated data. 823 * it had better in-place writes for updated data.
768 */ 824 */
769 if (unlikely(old_blkaddr != NEW_ADDR && 825 if (unlikely(fio->blk_addr != NEW_ADDR &&
770 !is_cold_data(page) && 826 !is_cold_data(page) &&
771 need_inplace_update(inode))) { 827 need_inplace_update(inode))) {
772 rewrite_data_page(page, old_blkaddr, fio); 828 rewrite_data_page(page, fio);
773 set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); 829 set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
774 } else { 830 } else {
775 write_data_page(page, &dn, &new_blkaddr, fio); 831 write_data_page(page, &dn, fio);
776 update_extent_cache(new_blkaddr, &dn); 832 update_extent_cache(&dn);
777 set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); 833 set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
778 } 834 }
779out_writepage: 835out_writepage:
@@ -812,7 +868,12 @@ static int f2fs_write_data_page(struct page *page,
812 868
813 zero_user_segment(page, offset, PAGE_CACHE_SIZE); 869 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
814write: 870write:
815 if (unlikely(sbi->por_doing)) 871 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
872 goto redirty_out;
873 if (f2fs_is_drop_cache(inode))
874 goto out;
875 if (f2fs_is_volatile_file(inode) && !wbc->for_reclaim &&
876 available_free_memory(sbi, BASE_CHECK))
816 goto redirty_out; 877 goto redirty_out;
817 878
818 /* Dentry blocks are controlled by checkpoint */ 879 /* Dentry blocks are controlled by checkpoint */
@@ -826,7 +887,6 @@ write:
826 /* we should bypass data pages to proceed the kworkder jobs */ 887 /* we should bypass data pages to proceed the kworkder jobs */
827 if (unlikely(f2fs_cp_error(sbi))) { 888 if (unlikely(f2fs_cp_error(sbi))) {
828 SetPageError(page); 889 SetPageError(page);
829 unlock_page(page);
830 goto out; 890 goto out;
831 } 891 }
832 892
@@ -1002,8 +1062,12 @@ put_next:
1002 if (dn.data_blkaddr == NEW_ADDR) { 1062 if (dn.data_blkaddr == NEW_ADDR) {
1003 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 1063 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
1004 } else { 1064 } else {
1005 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, 1065 struct f2fs_io_info fio = {
1006 READ_SYNC); 1066 .type = DATA,
1067 .rw = READ_SYNC,
1068 .blk_addr = dn.data_blkaddr,
1069 };
1070 err = f2fs_submit_page_bio(sbi, page, &fio);
1007 if (err) 1071 if (err)
1008 goto fail; 1072 goto fail;
1009 1073
@@ -1092,6 +1156,9 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
1092 1156
1093 trace_f2fs_direct_IO_enter(inode, offset, count, rw); 1157 trace_f2fs_direct_IO_enter(inode, offset, count, rw);
1094 1158
1159 if (rw & WRITE)
1160 __allocate_data_blocks(inode, offset, count);
1161
1095 err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); 1162 err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
1096 if (err < 0 && (rw & WRITE)) 1163 if (err < 0 && (rw & WRITE))
1097 f2fs_write_failed(mapping, offset + count); 1164 f2fs_write_failed(mapping, offset + count);
@@ -1101,24 +1168,33 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
1101 return err; 1168 return err;
1102} 1169}
1103 1170
1104static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, 1171void f2fs_invalidate_page(struct page *page, unsigned int offset,
1105 unsigned int length) 1172 unsigned int length)
1106{ 1173{
1107 struct inode *inode = page->mapping->host; 1174 struct inode *inode = page->mapping->host;
1175 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1108 1176
1109 if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE) 1177 if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
1178 (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE))
1110 return; 1179 return;
1111 1180
1112 if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) 1181 if (PageDirty(page)) {
1113 invalidate_inmem_page(inode, page); 1182 if (inode->i_ino == F2FS_META_INO(sbi))
1114 1183 dec_page_count(sbi, F2FS_DIRTY_META);
1115 if (PageDirty(page)) 1184 else if (inode->i_ino == F2FS_NODE_INO(sbi))
1116 inode_dec_dirty_pages(inode); 1185 dec_page_count(sbi, F2FS_DIRTY_NODES);
1186 else
1187 inode_dec_dirty_pages(inode);
1188 }
1117 ClearPagePrivate(page); 1189 ClearPagePrivate(page);
1118} 1190}
1119 1191
1120static int f2fs_release_data_page(struct page *page, gfp_t wait) 1192int f2fs_release_page(struct page *page, gfp_t wait)
1121{ 1193{
1194 /* If this is dirty page, keep PagePrivate */
1195 if (PageDirty(page))
1196 return 0;
1197
1122 ClearPagePrivate(page); 1198 ClearPagePrivate(page);
1123 return 1; 1199 return 1;
1124} 1200}
@@ -1132,7 +1208,7 @@ static int f2fs_set_data_page_dirty(struct page *page)
1132 1208
1133 SetPageUptodate(page); 1209 SetPageUptodate(page);
1134 1210
1135 if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) { 1211 if (f2fs_is_atomic_file(inode)) {
1136 register_inmem_page(inode, page); 1212 register_inmem_page(inode, page);
1137 return 1; 1213 return 1;
1138 } 1214 }
@@ -1168,8 +1244,8 @@ const struct address_space_operations f2fs_dblock_aops = {
1168 .write_begin = f2fs_write_begin, 1244 .write_begin = f2fs_write_begin,
1169 .write_end = f2fs_write_end, 1245 .write_end = f2fs_write_end,
1170 .set_page_dirty = f2fs_set_data_page_dirty, 1246 .set_page_dirty = f2fs_set_data_page_dirty,
1171 .invalidatepage = f2fs_invalidate_data_page, 1247 .invalidatepage = f2fs_invalidate_page,
1172 .releasepage = f2fs_release_data_page, 1248 .releasepage = f2fs_release_page,
1173 .direct_IO = f2fs_direct_IO, 1249 .direct_IO = f2fs_direct_IO,
1174 .bmap = f2fs_bmap, 1250 .bmap = f2fs_bmap,
1175}; 1251};
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 91e8f699ab30..e671373cc8ab 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -40,6 +40,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
40 si->ndirty_dirs = sbi->n_dirty_dirs; 40 si->ndirty_dirs = sbi->n_dirty_dirs;
41 si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); 41 si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
42 si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); 42 si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
43 si->wb_pages = get_pages(sbi, F2FS_WRITEBACK);
43 si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; 44 si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
44 si->rsvd_segs = reserved_segments(sbi); 45 si->rsvd_segs = reserved_segments(sbi);
45 si->overp_segs = overprovision_segments(sbi); 46 si->overp_segs = overprovision_segments(sbi);
@@ -57,7 +58,9 @@ static void update_general_status(struct f2fs_sb_info *sbi)
57 si->node_pages = NODE_MAPPING(sbi)->nrpages; 58 si->node_pages = NODE_MAPPING(sbi)->nrpages;
58 si->meta_pages = META_MAPPING(sbi)->nrpages; 59 si->meta_pages = META_MAPPING(sbi)->nrpages;
59 si->nats = NM_I(sbi)->nat_cnt; 60 si->nats = NM_I(sbi)->nat_cnt;
60 si->sits = SIT_I(sbi)->dirty_sentries; 61 si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
62 si->sits = MAIN_SEGS(sbi);
63 si->dirty_sits = SIT_I(sbi)->dirty_sentries;
61 si->fnids = NM_I(sbi)->fcnt; 64 si->fnids = NM_I(sbi)->fcnt;
62 si->bg_gc = sbi->bg_gc; 65 si->bg_gc = sbi->bg_gc;
63 si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) 66 si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
@@ -79,6 +82,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
79 si->segment_count[i] = sbi->segment_count[i]; 82 si->segment_count[i] = sbi->segment_count[i];
80 si->block_count[i] = sbi->block_count[i]; 83 si->block_count[i] = sbi->block_count[i];
81 } 84 }
85
86 si->inplace_count = atomic_read(&sbi->inplace_count);
82} 87}
83 88
84/* 89/*
@@ -137,6 +142,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
137 si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry); 142 si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry);
138 si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); 143 si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi));
139 si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); 144 si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi);
145 si->base_mem += SIT_VBLOCK_MAP_SIZE;
140 if (sbi->segs_per_sec > 1) 146 if (sbi->segs_per_sec > 1)
141 si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); 147 si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry);
142 si->base_mem += __bitmap_size(sbi, SIT_BITMAP); 148 si->base_mem += __bitmap_size(sbi, SIT_BITMAP);
@@ -159,20 +165,32 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
159 si->base_mem += sizeof(struct f2fs_nm_info); 165 si->base_mem += sizeof(struct f2fs_nm_info);
160 si->base_mem += __bitmap_size(sbi, NAT_BITMAP); 166 si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
161 167
168get_cache:
169 si->cache_mem = 0;
170
162 /* build gc */ 171 /* build gc */
163 si->base_mem += sizeof(struct f2fs_gc_kthread); 172 if (sbi->gc_thread)
173 si->cache_mem += sizeof(struct f2fs_gc_kthread);
174
175 /* build merge flush thread */
176 if (SM_I(sbi)->cmd_control_info)
177 si->cache_mem += sizeof(struct flush_cmd_control);
164 178
165get_cache:
166 /* free nids */ 179 /* free nids */
167 si->cache_mem = NM_I(sbi)->fcnt; 180 si->cache_mem += NM_I(sbi)->fcnt * sizeof(struct free_nid);
168 si->cache_mem += NM_I(sbi)->nat_cnt; 181 si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry);
169 npages = NODE_MAPPING(sbi)->nrpages; 182 si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
170 si->cache_mem += npages << PAGE_CACHE_SHIFT; 183 sizeof(struct nat_entry_set);
171 npages = META_MAPPING(sbi)->nrpages; 184 si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
172 si->cache_mem += npages << PAGE_CACHE_SHIFT; 185 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry);
173 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
174 for (i = 0; i <= UPDATE_INO; i++) 186 for (i = 0; i <= UPDATE_INO; i++)
175 si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); 187 si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
188
189 si->page_mem = 0;
190 npages = NODE_MAPPING(sbi)->nrpages;
191 si->page_mem += npages << PAGE_CACHE_SHIFT;
192 npages = META_MAPPING(sbi)->nrpages;
193 si->page_mem += npages << PAGE_CACHE_SHIFT;
176} 194}
177 195
178static int stat_show(struct seq_file *s, void *v) 196static int stat_show(struct seq_file *s, void *v)
@@ -250,16 +268,16 @@ static int stat_show(struct seq_file *s, void *v)
250 seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", 268 seq_printf(s, "\nExtent Hit Ratio: %d / %d\n",
251 si->hit_ext, si->total_ext); 269 si->hit_ext, si->total_ext);
252 seq_puts(s, "\nBalancing F2FS Async:\n"); 270 seq_puts(s, "\nBalancing F2FS Async:\n");
253 seq_printf(s, " - inmem: %4d\n", 271 seq_printf(s, " - inmem: %4d, wb: %4d\n",
254 si->inmem_pages); 272 si->inmem_pages, si->wb_pages);
255 seq_printf(s, " - nodes: %4d in %4d\n", 273 seq_printf(s, " - nodes: %4d in %4d\n",
256 si->ndirty_node, si->node_pages); 274 si->ndirty_node, si->node_pages);
257 seq_printf(s, " - dents: %4d in dirs:%4d\n", 275 seq_printf(s, " - dents: %4d in dirs:%4d\n",
258 si->ndirty_dent, si->ndirty_dirs); 276 si->ndirty_dent, si->ndirty_dirs);
259 seq_printf(s, " - meta: %4d in %4d\n", 277 seq_printf(s, " - meta: %4d in %4d\n",
260 si->ndirty_meta, si->meta_pages); 278 si->ndirty_meta, si->meta_pages);
261 seq_printf(s, " - NATs: %9d\n - SITs: %9d\n", 279 seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n",
262 si->nats, si->sits); 280 si->dirty_nats, si->nats, si->dirty_sits, si->sits);
263 seq_printf(s, " - free_nids: %9d\n", 281 seq_printf(s, " - free_nids: %9d\n",
264 si->fnids); 282 si->fnids);
265 seq_puts(s, "\nDistribution of User Blocks:"); 283 seq_puts(s, "\nDistribution of User Blocks:");
@@ -277,6 +295,7 @@ static int stat_show(struct seq_file *s, void *v)
277 for (j = 0; j < si->util_free; j++) 295 for (j = 0; j < si->util_free; j++)
278 seq_putc(s, '-'); 296 seq_putc(s, '-');
279 seq_puts(s, "]\n\n"); 297 seq_puts(s, "]\n\n");
298 seq_printf(s, "IPU: %u blocks\n", si->inplace_count);
280 seq_printf(s, "SSR: %u blocks in %u segments\n", 299 seq_printf(s, "SSR: %u blocks in %u segments\n",
281 si->block_count[SSR], si->segment_count[SSR]); 300 si->block_count[SSR], si->segment_count[SSR]);
282 seq_printf(s, "LFS: %u blocks in %u segments\n", 301 seq_printf(s, "LFS: %u blocks in %u segments\n",
@@ -289,9 +308,14 @@ static int stat_show(struct seq_file *s, void *v)
289 308
290 /* memory footprint */ 309 /* memory footprint */
291 update_mem_info(si->sbi); 310 update_mem_info(si->sbi);
292 seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n", 311 seq_printf(s, "\nMemory: %u KB\n",
293 (si->base_mem + si->cache_mem) >> 10, 312 (si->base_mem + si->cache_mem + si->page_mem) >> 10);
294 si->base_mem >> 10, si->cache_mem >> 10); 313 seq_printf(s, " - static: %u KB\n",
314 si->base_mem >> 10);
315 seq_printf(s, " - cached: %u KB\n",
316 si->cache_mem >> 10);
317 seq_printf(s, " - paged : %u KB\n",
318 si->page_mem >> 10);
295 } 319 }
296 mutex_unlock(&f2fs_stat_mutex); 320 mutex_unlock(&f2fs_stat_mutex);
297 return 0; 321 return 0;
@@ -331,6 +355,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
331 355
332 atomic_set(&sbi->inline_inode, 0); 356 atomic_set(&sbi->inline_inode, 0);
333 atomic_set(&sbi->inline_dir, 0); 357 atomic_set(&sbi->inline_dir, 0);
358 atomic_set(&sbi->inplace_count, 0);
334 359
335 mutex_lock(&f2fs_stat_mutex); 360 mutex_lock(&f2fs_stat_mutex);
336 list_add_tail(&si->stat_list, &f2fs_stat_list); 361 list_add_tail(&si->stat_list, &f2fs_stat_list);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index b1a7d5737cd0..b74097a7f6d9 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -286,8 +286,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
286 f2fs_wait_on_page_writeback(page, type); 286 f2fs_wait_on_page_writeback(page, type);
287 de->ino = cpu_to_le32(inode->i_ino); 287 de->ino = cpu_to_le32(inode->i_ino);
288 set_de_type(de, inode); 288 set_de_type(de, inode);
289 if (!f2fs_has_inline_dentry(dir)) 289 f2fs_dentry_kunmap(dir, page);
290 kunmap(page);
291 set_page_dirty(page); 290 set_page_dirty(page);
292 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 291 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
293 mark_inode_dirty(dir); 292 mark_inode_dirty(dir);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ec58bb2373fc..7fa3313ab0e2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -28,7 +28,7 @@
28 do { \ 28 do { \
29 if (unlikely(condition)) { \ 29 if (unlikely(condition)) { \
30 WARN_ON(1); \ 30 WARN_ON(1); \
31 sbi->need_fsck = true; \ 31 set_sbi_flag(sbi, SBI_NEED_FSCK); \
32 } \ 32 } \
33 } while (0) 33 } while (0)
34#define f2fs_down_write(x, y) down_write(x) 34#define f2fs_down_write(x, y) down_write(x)
@@ -100,10 +100,15 @@ enum {
100 100
101enum { 101enum {
102 CP_UMOUNT, 102 CP_UMOUNT,
103 CP_FASTBOOT,
103 CP_SYNC, 104 CP_SYNC,
104 CP_DISCARD, 105 CP_DISCARD,
105}; 106};
106 107
108#define DEF_BATCHED_TRIM_SECTIONS 32
109#define BATCHED_TRIM_SEGMENTS(sbi) \
110 (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
111
107struct cp_control { 112struct cp_control {
108 int reason; 113 int reason;
109 __u64 trim_start; 114 __u64 trim_start;
@@ -136,8 +141,14 @@ struct ino_entry {
136 nid_t ino; /* inode number */ 141 nid_t ino; /* inode number */
137}; 142};
138 143
139/* for the list of directory inodes */ 144/*
140struct dir_inode_entry { 145 * for the list of directory inodes or gc inodes.
146 * NOTE: there are two slab users for this structure, if we add/modify/delete
147 * fields in structure for one of slab users, it may affect fields or size of
148 * other one, in this condition, it's better to split both of slab and related
149 * data structure.
150 */
151struct inode_entry {
141 struct list_head list; /* list head */ 152 struct list_head list; /* list head */
142 struct inode *inode; /* vfs inode pointer */ 153 struct inode *inode; /* vfs inode pointer */
143}; 154};
@@ -196,11 +207,14 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
196 */ 207 */
197#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS 208#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS
198#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS 209#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS
210#define F2FS_IOC_GETVERSION FS_IOC_GETVERSION
199 211
200#define F2FS_IOCTL_MAGIC 0xf5 212#define F2FS_IOCTL_MAGIC 0xf5
201#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) 213#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1)
202#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) 214#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2)
203#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) 215#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3)
216#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4)
217#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
204 218
205#if defined(__KERNEL__) && defined(CONFIG_COMPAT) 219#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
206/* 220/*
@@ -295,7 +309,7 @@ struct f2fs_inode_info {
295 nid_t i_xattr_nid; /* node id that contains xattrs */ 309 nid_t i_xattr_nid; /* node id that contains xattrs */
296 unsigned long long xattr_ver; /* cp version of xattr modification */ 310 unsigned long long xattr_ver; /* cp version of xattr modification */
297 struct extent_info ext; /* in-memory extent cache entry */ 311 struct extent_info ext; /* in-memory extent cache entry */
298 struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ 312 struct inode_entry *dirty_dir; /* the pointer of dirty dir */
299 313
300 struct radix_tree_root inmem_root; /* radix tree for inmem pages */ 314 struct radix_tree_root inmem_root; /* radix tree for inmem pages */
301 struct list_head inmem_pages; /* inmemory pages managed by f2fs */ 315 struct list_head inmem_pages; /* inmemory pages managed by f2fs */
@@ -398,7 +412,8 @@ enum {
398 CURSEG_HOT_NODE, /* direct node blocks of directory files */ 412 CURSEG_HOT_NODE, /* direct node blocks of directory files */
399 CURSEG_WARM_NODE, /* direct node blocks of normal files */ 413 CURSEG_WARM_NODE, /* direct node blocks of normal files */
400 CURSEG_COLD_NODE, /* indirect node blocks */ 414 CURSEG_COLD_NODE, /* indirect node blocks */
401 NO_CHECK_TYPE 415 NO_CHECK_TYPE,
416 CURSEG_DIRECT_IO, /* to use for the direct IO path */
402}; 417};
403 418
404struct flush_cmd { 419struct flush_cmd {
@@ -437,6 +452,9 @@ struct f2fs_sm_info {
437 int nr_discards; /* # of discards in the list */ 452 int nr_discards; /* # of discards in the list */
438 int max_discards; /* max. discards to be issued */ 453 int max_discards; /* max. discards to be issued */
439 454
455 /* for batched trimming */
456 unsigned int trim_sections; /* # of sections to trim */
457
440 struct list_head sit_entry_set; /* sit entry set list */ 458 struct list_head sit_entry_set; /* sit entry set list */
441 459
442 unsigned int ipu_policy; /* in-place-update policy */ 460 unsigned int ipu_policy; /* in-place-update policy */
@@ -489,6 +507,7 @@ enum page_type {
489struct f2fs_io_info { 507struct f2fs_io_info {
490 enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ 508 enum page_type type; /* contains DATA/NODE/META/META_FLUSH */
491 int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */ 509 int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */
510 block_t blk_addr; /* block address to be written */
492}; 511};
493 512
494#define is_read_io(rw) (((rw) & 1) == READ) 513#define is_read_io(rw) (((rw) & 1) == READ)
@@ -508,13 +527,20 @@ struct inode_management {
508 unsigned long ino_num; /* number of entries */ 527 unsigned long ino_num; /* number of entries */
509}; 528};
510 529
530/* For s_flag in struct f2fs_sb_info */
531enum {
532 SBI_IS_DIRTY, /* dirty flag for checkpoint */
533 SBI_IS_CLOSE, /* specify unmounting */
534 SBI_NEED_FSCK, /* need fsck.f2fs to fix */
535 SBI_POR_DOING, /* recovery is doing or not */
536};
537
511struct f2fs_sb_info { 538struct f2fs_sb_info {
512 struct super_block *sb; /* pointer to VFS super block */ 539 struct super_block *sb; /* pointer to VFS super block */
513 struct proc_dir_entry *s_proc; /* proc entry */ 540 struct proc_dir_entry *s_proc; /* proc entry */
514 struct buffer_head *raw_super_buf; /* buffer head of raw sb */ 541 struct buffer_head *raw_super_buf; /* buffer head of raw sb */
515 struct f2fs_super_block *raw_super; /* raw super block pointer */ 542 struct f2fs_super_block *raw_super; /* raw super block pointer */
516 int s_dirty; /* dirty flag for checkpoint */ 543 int s_flag; /* flags for sbi */
517 bool need_fsck; /* need fsck.f2fs to fix */
518 544
519 /* for node-related operations */ 545 /* for node-related operations */
520 struct f2fs_nm_info *nm_info; /* node manager */ 546 struct f2fs_nm_info *nm_info; /* node manager */
@@ -534,7 +560,6 @@ struct f2fs_sb_info {
534 struct rw_semaphore cp_rwsem; /* blocking FS operations */ 560 struct rw_semaphore cp_rwsem; /* blocking FS operations */
535 struct rw_semaphore node_write; /* locking node writes */ 561 struct rw_semaphore node_write; /* locking node writes */
536 struct mutex writepages; /* mutex for writepages() */ 562 struct mutex writepages; /* mutex for writepages() */
537 bool por_doing; /* recovery is doing or not */
538 wait_queue_head_t cp_wait; 563 wait_queue_head_t cp_wait;
539 564
540 struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ 565 struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
@@ -589,6 +614,7 @@ struct f2fs_sb_info {
589 struct f2fs_stat_info *stat_info; /* FS status information */ 614 struct f2fs_stat_info *stat_info; /* FS status information */
590 unsigned int segment_count[2]; /* # of allocated segments */ 615 unsigned int segment_count[2]; /* # of allocated segments */
591 unsigned int block_count[2]; /* # of allocated blocks */ 616 unsigned int block_count[2]; /* # of allocated blocks */
617 atomic_t inplace_count; /* # of inplace update */
592 int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ 618 int total_hit_ext, read_hit_ext; /* extent cache hit ratio */
593 atomic_t inline_inode; /* # of inline_data inodes */ 619 atomic_t inline_inode; /* # of inline_data inodes */
594 atomic_t inline_dir; /* # of inline_dentry inodes */ 620 atomic_t inline_dir; /* # of inline_dentry inodes */
@@ -686,14 +712,19 @@ static inline struct address_space *NODE_MAPPING(struct f2fs_sb_info *sbi)
686 return sbi->node_inode->i_mapping; 712 return sbi->node_inode->i_mapping;
687} 713}
688 714
689static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi) 715static inline bool is_sbi_flag_set(struct f2fs_sb_info *sbi, unsigned int type)
690{ 716{
691 sbi->s_dirty = 1; 717 return sbi->s_flag & (0x01 << type);
692} 718}
693 719
694static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi) 720static inline void set_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type)
695{ 721{
696 sbi->s_dirty = 0; 722 sbi->s_flag |= (0x01 << type);
723}
724
725static inline void clear_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type)
726{
727 sbi->s_flag &= ~(0x01 << type);
697} 728}
698 729
699static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) 730static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp)
@@ -741,6 +772,28 @@ static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
741 up_write(&sbi->cp_rwsem); 772 up_write(&sbi->cp_rwsem);
742} 773}
743 774
775static inline int __get_cp_reason(struct f2fs_sb_info *sbi)
776{
777 int reason = CP_SYNC;
778
779 if (test_opt(sbi, FASTBOOT))
780 reason = CP_FASTBOOT;
781 if (is_sbi_flag_set(sbi, SBI_IS_CLOSE))
782 reason = CP_UMOUNT;
783 return reason;
784}
785
786static inline bool __remain_node_summaries(int reason)
787{
788 return (reason == CP_UMOUNT || reason == CP_FASTBOOT);
789}
790
791static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi)
792{
793 return (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) ||
794 is_set_ckpt_flags(F2FS_CKPT(sbi), CP_FASTBOOT_FLAG));
795}
796
744/* 797/*
745 * Check whether the given nid is within node id range. 798 * Check whether the given nid is within node id range.
746 */ 799 */
@@ -805,7 +858,7 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
805static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) 858static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
806{ 859{
807 atomic_inc(&sbi->nr_pages[count_type]); 860 atomic_inc(&sbi->nr_pages[count_type]);
808 F2FS_SET_SB_DIRT(sbi); 861 set_sbi_flag(sbi, SBI_IS_DIRTY);
809} 862}
810 863
811static inline void inode_inc_dirty_pages(struct inode *inode) 864static inline void inode_inc_dirty_pages(struct inode *inode)
@@ -1113,6 +1166,7 @@ enum {
1113 FI_NEED_IPU, /* used for ipu per file */ 1166 FI_NEED_IPU, /* used for ipu per file */
1114 FI_ATOMIC_FILE, /* indicate atomic file */ 1167 FI_ATOMIC_FILE, /* indicate atomic file */
1115 FI_VOLATILE_FILE, /* indicate volatile file */ 1168 FI_VOLATILE_FILE, /* indicate volatile file */
1169 FI_DROP_CACHE, /* drop dirty page cache */
1116 FI_DATA_EXIST, /* indicate data exists */ 1170 FI_DATA_EXIST, /* indicate data exists */
1117}; 1171};
1118 1172
@@ -1220,6 +1274,11 @@ static inline bool f2fs_is_volatile_file(struct inode *inode)
1220 return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE); 1274 return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE);
1221} 1275}
1222 1276
1277static inline bool f2fs_is_drop_cache(struct inode *inode)
1278{
1279 return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE);
1280}
1281
1223static inline void *inline_data_addr(struct page *page) 1282static inline void *inline_data_addr(struct page *page)
1224{ 1283{
1225 struct f2fs_inode *ri = F2FS_INODE(page); 1284 struct f2fs_inode *ri = F2FS_INODE(page);
@@ -1389,7 +1448,6 @@ void destroy_node_manager_caches(void);
1389 * segment.c 1448 * segment.c
1390 */ 1449 */
1391void register_inmem_page(struct inode *, struct page *); 1450void register_inmem_page(struct inode *, struct page *);
1392void invalidate_inmem_page(struct inode *, struct page *);
1393void commit_inmem_pages(struct inode *, bool); 1451void commit_inmem_pages(struct inode *, bool);
1394void f2fs_balance_fs(struct f2fs_sb_info *); 1452void f2fs_balance_fs(struct f2fs_sb_info *);
1395void f2fs_balance_fs_bg(struct f2fs_sb_info *); 1453void f2fs_balance_fs_bg(struct f2fs_sb_info *);
@@ -1401,16 +1459,16 @@ void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
1401void clear_prefree_segments(struct f2fs_sb_info *); 1459void clear_prefree_segments(struct f2fs_sb_info *);
1402void release_discard_addrs(struct f2fs_sb_info *); 1460void release_discard_addrs(struct f2fs_sb_info *);
1403void discard_next_dnode(struct f2fs_sb_info *, block_t); 1461void discard_next_dnode(struct f2fs_sb_info *, block_t);
1404int npages_for_summary_flush(struct f2fs_sb_info *); 1462int npages_for_summary_flush(struct f2fs_sb_info *, bool);
1405void allocate_new_segments(struct f2fs_sb_info *); 1463void allocate_new_segments(struct f2fs_sb_info *);
1406int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); 1464int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
1407struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); 1465struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
1408void write_meta_page(struct f2fs_sb_info *, struct page *); 1466void write_meta_page(struct f2fs_sb_info *, struct page *);
1409void write_node_page(struct f2fs_sb_info *, struct page *, 1467void write_node_page(struct f2fs_sb_info *, struct page *,
1410 struct f2fs_io_info *, unsigned int, block_t, block_t *); 1468 unsigned int, struct f2fs_io_info *);
1411void write_data_page(struct page *, struct dnode_of_data *, block_t *, 1469void write_data_page(struct page *, struct dnode_of_data *,
1412 struct f2fs_io_info *); 1470 struct f2fs_io_info *);
1413void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *); 1471void rewrite_data_page(struct page *, struct f2fs_io_info *);
1414void recover_data_page(struct f2fs_sb_info *, struct page *, 1472void recover_data_page(struct f2fs_sb_info *, struct page *,
1415 struct f2fs_summary *, block_t, block_t); 1473 struct f2fs_summary *, block_t, block_t);
1416void allocate_data_block(struct f2fs_sb_info *, struct page *, 1474void allocate_data_block(struct f2fs_sb_info *, struct page *,
@@ -1457,17 +1515,20 @@ void destroy_checkpoint_caches(void);
1457 * data.c 1515 * data.c
1458 */ 1516 */
1459void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int); 1517void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int);
1460int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *, block_t, int); 1518int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *,
1461void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *, block_t, 1519 struct f2fs_io_info *);
1520void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *,
1462 struct f2fs_io_info *); 1521 struct f2fs_io_info *);
1463int reserve_new_block(struct dnode_of_data *); 1522int reserve_new_block(struct dnode_of_data *);
1464int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); 1523int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
1465void update_extent_cache(block_t, struct dnode_of_data *); 1524void update_extent_cache(struct dnode_of_data *);
1466struct page *find_data_page(struct inode *, pgoff_t, bool); 1525struct page *find_data_page(struct inode *, pgoff_t, bool);
1467struct page *get_lock_data_page(struct inode *, pgoff_t); 1526struct page *get_lock_data_page(struct inode *, pgoff_t);
1468struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); 1527struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
1469int do_write_data_page(struct page *, struct f2fs_io_info *); 1528int do_write_data_page(struct page *, struct f2fs_io_info *);
1470int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); 1529int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
1530void f2fs_invalidate_page(struct page *, unsigned int, unsigned int);
1531int f2fs_release_page(struct page *, gfp_t);
1471 1532
1472/* 1533/*
1473 * gc.c 1534 * gc.c
@@ -1477,8 +1538,6 @@ void stop_gc_thread(struct f2fs_sb_info *);
1477block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *); 1538block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *);
1478int f2fs_gc(struct f2fs_sb_info *); 1539int f2fs_gc(struct f2fs_sb_info *);
1479void build_gc_manager(struct f2fs_sb_info *); 1540void build_gc_manager(struct f2fs_sb_info *);
1480int __init create_gc_caches(void);
1481void destroy_gc_caches(void);
1482 1541
1483/* 1542/*
1484 * recovery.c 1543 * recovery.c
@@ -1497,9 +1556,9 @@ struct f2fs_stat_info {
1497 int main_area_segs, main_area_sections, main_area_zones; 1556 int main_area_segs, main_area_sections, main_area_zones;
1498 int hit_ext, total_ext; 1557 int hit_ext, total_ext;
1499 int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; 1558 int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
1500 int nats, sits, fnids; 1559 int nats, dirty_nats, sits, dirty_sits, fnids;
1501 int total_count, utilization; 1560 int total_count, utilization;
1502 int bg_gc, inline_inode, inline_dir, inmem_pages; 1561 int bg_gc, inline_inode, inline_dir, inmem_pages, wb_pages;
1503 unsigned int valid_count, valid_node_count, valid_inode_count; 1562 unsigned int valid_count, valid_node_count, valid_inode_count;
1504 unsigned int bimodal, avg_vblocks; 1563 unsigned int bimodal, avg_vblocks;
1505 int util_free, util_valid, util_invalid; 1564 int util_free, util_valid, util_invalid;
@@ -1514,7 +1573,8 @@ struct f2fs_stat_info {
1514 1573
1515 unsigned int segment_count[2]; 1574 unsigned int segment_count[2];
1516 unsigned int block_count[2]; 1575 unsigned int block_count[2];
1517 unsigned base_mem, cache_mem; 1576 unsigned int inplace_count;
1577 unsigned base_mem, cache_mem, page_mem;
1518}; 1578};
1519 1579
1520static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) 1580static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
@@ -1553,7 +1613,8 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
1553 ((sbi)->segment_count[(curseg)->alloc_type]++) 1613 ((sbi)->segment_count[(curseg)->alloc_type]++)
1554#define stat_inc_block_count(sbi, curseg) \ 1614#define stat_inc_block_count(sbi, curseg) \
1555 ((sbi)->block_count[(curseg)->alloc_type]++) 1615 ((sbi)->block_count[(curseg)->alloc_type]++)
1556 1616#define stat_inc_inplace_blocks(sbi) \
1617 (atomic_inc(&(sbi)->inplace_count))
1557#define stat_inc_seg_count(sbi, type) \ 1618#define stat_inc_seg_count(sbi, type) \
1558 do { \ 1619 do { \
1559 struct f2fs_stat_info *si = F2FS_STAT(sbi); \ 1620 struct f2fs_stat_info *si = F2FS_STAT(sbi); \
@@ -1599,6 +1660,7 @@ void f2fs_destroy_root_stats(void);
1599#define stat_dec_inline_dir(inode) 1660#define stat_dec_inline_dir(inode)
1600#define stat_inc_seg_type(sbi, curseg) 1661#define stat_inc_seg_type(sbi, curseg)
1601#define stat_inc_block_count(sbi, curseg) 1662#define stat_inc_block_count(sbi, curseg)
1663#define stat_inc_inplace_blocks(sbi)
1602#define stat_inc_seg_count(si, type) 1664#define stat_inc_seg_count(si, type)
1603#define stat_inc_tot_blk_count(si, blks) 1665#define stat_inc_tot_blk_count(si, blks)
1604#define stat_inc_data_blk_count(si, blks) 1666#define stat_inc_data_blk_count(si, blks)
@@ -1619,6 +1681,7 @@ extern const struct address_space_operations f2fs_meta_aops;
1619extern const struct inode_operations f2fs_dir_inode_operations; 1681extern const struct inode_operations f2fs_dir_inode_operations;
1620extern const struct inode_operations f2fs_symlink_inode_operations; 1682extern const struct inode_operations f2fs_symlink_inode_operations;
1621extern const struct inode_operations f2fs_special_inode_operations; 1683extern const struct inode_operations f2fs_special_inode_operations;
1684extern struct kmem_cache *inode_entry_slab;
1622 1685
1623/* 1686/*
1624 * inline.c 1687 * inline.c
@@ -1629,7 +1692,6 @@ int f2fs_read_inline_data(struct inode *, struct page *);
1629int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); 1692int f2fs_convert_inline_page(struct dnode_of_data *, struct page *);
1630int f2fs_convert_inline_inode(struct inode *); 1693int f2fs_convert_inline_inode(struct inode *);
1631int f2fs_write_inline_data(struct inode *, struct page *); 1694int f2fs_write_inline_data(struct inode *, struct page *);
1632void truncate_inline_data(struct page *, u64);
1633bool recover_inline_data(struct inode *, struct page *); 1695bool recover_inline_data(struct inode *, struct page *);
1634struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *, 1696struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *,
1635 struct page **); 1697 struct page **);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 5674ba13102b..98dac27bc3f7 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -26,6 +26,7 @@
26#include "segment.h" 26#include "segment.h"
27#include "xattr.h" 27#include "xattr.h"
28#include "acl.h" 28#include "acl.h"
29#include "trace.h"
29#include <trace/events/f2fs.h> 30#include <trace/events/f2fs.h>
30 31
31static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, 32static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
@@ -245,6 +246,10 @@ go_write:
245sync_nodes: 246sync_nodes:
246 sync_node_pages(sbi, ino, &wbc); 247 sync_node_pages(sbi, ino, &wbc);
247 248
249 /* if cp_error was enabled, we should avoid infinite loop */
250 if (unlikely(f2fs_cp_error(sbi)))
251 goto out;
252
248 if (need_inode_block_update(sbi, ino)) { 253 if (need_inode_block_update(sbi, ino)) {
249 mark_inode_dirty_sync(inode); 254 mark_inode_dirty_sync(inode);
250 f2fs_write_inode(inode, NULL); 255 f2fs_write_inode(inode, NULL);
@@ -264,6 +269,7 @@ flush_out:
264 ret = f2fs_issue_flush(sbi); 269 ret = f2fs_issue_flush(sbi);
265out: 270out:
266 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); 271 trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
272 f2fs_trace_ios(NULL, NULL, 1);
267 return ret; 273 return ret;
268} 274}
269 275
@@ -350,7 +356,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
350 /* find data/hole in dnode block */ 356 /* find data/hole in dnode block */
351 for (; dn.ofs_in_node < end_offset; 357 for (; dn.ofs_in_node < end_offset;
352 dn.ofs_in_node++, pgofs++, 358 dn.ofs_in_node++, pgofs++,
353 data_ofs = pgofs << PAGE_CACHE_SHIFT) { 359 data_ofs = (loff_t)pgofs << PAGE_CACHE_SHIFT) {
354 block_t blkaddr; 360 block_t blkaddr;
355 blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); 361 blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
356 362
@@ -426,7 +432,8 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
426 if (blkaddr == NULL_ADDR) 432 if (blkaddr == NULL_ADDR)
427 continue; 433 continue;
428 434
429 update_extent_cache(NULL_ADDR, dn); 435 dn->data_blkaddr = NULL_ADDR;
436 update_extent_cache(dn);
430 invalidate_blocks(sbi, blkaddr); 437 invalidate_blocks(sbi, blkaddr);
431 nr_free++; 438 nr_free++;
432 } 439 }
@@ -483,8 +490,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
483 490
484 trace_f2fs_truncate_blocks_enter(inode, from); 491 trace_f2fs_truncate_blocks_enter(inode, from);
485 492
486 free_from = (pgoff_t) 493 free_from = (pgoff_t)F2FS_BYTES_TO_BLK(from + blocksize - 1);
487 ((from + blocksize - 1) >> (sbi->log_blocksize));
488 494
489 if (lock) 495 if (lock)
490 f2fs_lock_op(sbi); 496 f2fs_lock_op(sbi);
@@ -835,6 +841,19 @@ static long f2fs_fallocate(struct file *file, int mode,
835 return ret; 841 return ret;
836} 842}
837 843
844static int f2fs_release_file(struct inode *inode, struct file *filp)
845{
846 /* some remained atomic pages should discarded */
847 if (f2fs_is_atomic_file(inode))
848 commit_inmem_pages(inode, true);
849 if (f2fs_is_volatile_file(inode)) {
850 set_inode_flag(F2FS_I(inode), FI_DROP_CACHE);
851 filemap_fdatawrite(inode->i_mapping);
852 clear_inode_flag(F2FS_I(inode), FI_DROP_CACHE);
853 }
854 return 0;
855}
856
838#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL)) 857#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL))
839#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL) 858#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL)
840 859
@@ -905,29 +924,30 @@ out:
905 return ret; 924 return ret;
906} 925}
907 926
927static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
928{
929 struct inode *inode = file_inode(filp);
930
931 return put_user(inode->i_generation, (int __user *)arg);
932}
933
908static int f2fs_ioc_start_atomic_write(struct file *filp) 934static int f2fs_ioc_start_atomic_write(struct file *filp)
909{ 935{
910 struct inode *inode = file_inode(filp); 936 struct inode *inode = file_inode(filp);
911 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
912 937
913 if (!inode_owner_or_capable(inode)) 938 if (!inode_owner_or_capable(inode))
914 return -EACCES; 939 return -EACCES;
915 940
916 f2fs_balance_fs(sbi); 941 f2fs_balance_fs(F2FS_I_SB(inode));
942
943 if (f2fs_is_atomic_file(inode))
944 return 0;
917 945
918 set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); 946 set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
919 947
920 return f2fs_convert_inline_inode(inode); 948 return f2fs_convert_inline_inode(inode);
921} 949}
922 950
923static int f2fs_release_file(struct inode *inode, struct file *filp)
924{
925 /* some remained atomic pages should discarded */
926 if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
927 commit_inmem_pages(inode, true);
928 return 0;
929}
930
931static int f2fs_ioc_commit_atomic_write(struct file *filp) 951static int f2fs_ioc_commit_atomic_write(struct file *filp)
932{ 952{
933 struct inode *inode = file_inode(filp); 953 struct inode *inode = file_inode(filp);
@@ -948,6 +968,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
948 968
949 ret = f2fs_sync_file(filp, 0, LONG_MAX, 0); 969 ret = f2fs_sync_file(filp, 0, LONG_MAX, 0);
950 mnt_drop_write_file(filp); 970 mnt_drop_write_file(filp);
971 clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
951 return ret; 972 return ret;
952} 973}
953 974
@@ -958,11 +979,56 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
958 if (!inode_owner_or_capable(inode)) 979 if (!inode_owner_or_capable(inode))
959 return -EACCES; 980 return -EACCES;
960 981
982 if (f2fs_is_volatile_file(inode))
983 return 0;
984
961 set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); 985 set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
962 986
963 return f2fs_convert_inline_inode(inode); 987 return f2fs_convert_inline_inode(inode);
964} 988}
965 989
990static int f2fs_ioc_release_volatile_write(struct file *filp)
991{
992 struct inode *inode = file_inode(filp);
993
994 if (!inode_owner_or_capable(inode))
995 return -EACCES;
996
997 if (!f2fs_is_volatile_file(inode))
998 return 0;
999
1000 punch_hole(inode, 0, F2FS_BLKSIZE);
1001 return 0;
1002}
1003
1004static int f2fs_ioc_abort_volatile_write(struct file *filp)
1005{
1006 struct inode *inode = file_inode(filp);
1007 int ret;
1008
1009 if (!inode_owner_or_capable(inode))
1010 return -EACCES;
1011
1012 ret = mnt_want_write_file(filp);
1013 if (ret)
1014 return ret;
1015
1016 f2fs_balance_fs(F2FS_I_SB(inode));
1017
1018 if (f2fs_is_atomic_file(inode)) {
1019 commit_inmem_pages(inode, false);
1020 clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
1021 }
1022
1023 if (f2fs_is_volatile_file(inode)) {
1024 clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
1025 filemap_fdatawrite(inode->i_mapping);
1026 set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
1027 }
1028 mnt_drop_write_file(filp);
1029 return ret;
1030}
1031
966static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) 1032static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
967{ 1033{
968 struct inode *inode = file_inode(filp); 1034 struct inode *inode = file_inode(filp);
@@ -1000,12 +1066,18 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
1000 return f2fs_ioc_getflags(filp, arg); 1066 return f2fs_ioc_getflags(filp, arg);
1001 case F2FS_IOC_SETFLAGS: 1067 case F2FS_IOC_SETFLAGS:
1002 return f2fs_ioc_setflags(filp, arg); 1068 return f2fs_ioc_setflags(filp, arg);
1069 case F2FS_IOC_GETVERSION:
1070 return f2fs_ioc_getversion(filp, arg);
1003 case F2FS_IOC_START_ATOMIC_WRITE: 1071 case F2FS_IOC_START_ATOMIC_WRITE:
1004 return f2fs_ioc_start_atomic_write(filp); 1072 return f2fs_ioc_start_atomic_write(filp);
1005 case F2FS_IOC_COMMIT_ATOMIC_WRITE: 1073 case F2FS_IOC_COMMIT_ATOMIC_WRITE:
1006 return f2fs_ioc_commit_atomic_write(filp); 1074 return f2fs_ioc_commit_atomic_write(filp);
1007 case F2FS_IOC_START_VOLATILE_WRITE: 1075 case F2FS_IOC_START_VOLATILE_WRITE:
1008 return f2fs_ioc_start_volatile_write(filp); 1076 return f2fs_ioc_start_volatile_write(filp);
1077 case F2FS_IOC_RELEASE_VOLATILE_WRITE:
1078 return f2fs_ioc_release_volatile_write(filp);
1079 case F2FS_IOC_ABORT_VOLATILE_WRITE:
1080 return f2fs_ioc_abort_volatile_write(filp);
1009 case FITRIM: 1081 case FITRIM:
1010 return f2fs_ioc_fitrim(filp, arg); 1082 return f2fs_ioc_fitrim(filp, arg);
1011 default: 1083 default:
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index eec0933a4819..76adbc3641f1 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -24,8 +24,6 @@
24#include "gc.h" 24#include "gc.h"
25#include <trace/events/f2fs.h> 25#include <trace/events/f2fs.h>
26 26
27static struct kmem_cache *winode_slab;
28
29static int gc_thread_func(void *data) 27static int gc_thread_func(void *data)
30{ 28{
31 struct f2fs_sb_info *sbi = data; 29 struct f2fs_sb_info *sbi = data;
@@ -46,7 +44,7 @@ static int gc_thread_func(void *data)
46 break; 44 break;
47 45
48 if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) { 46 if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) {
49 wait_ms = increase_sleep_time(gc_th, wait_ms); 47 increase_sleep_time(gc_th, &wait_ms);
50 continue; 48 continue;
51 } 49 }
52 50
@@ -67,15 +65,15 @@ static int gc_thread_func(void *data)
67 continue; 65 continue;
68 66
69 if (!is_idle(sbi)) { 67 if (!is_idle(sbi)) {
70 wait_ms = increase_sleep_time(gc_th, wait_ms); 68 increase_sleep_time(gc_th, &wait_ms);
71 mutex_unlock(&sbi->gc_mutex); 69 mutex_unlock(&sbi->gc_mutex);
72 continue; 70 continue;
73 } 71 }
74 72
75 if (has_enough_invalid_blocks(sbi)) 73 if (has_enough_invalid_blocks(sbi))
76 wait_ms = decrease_sleep_time(gc_th, wait_ms); 74 decrease_sleep_time(gc_th, &wait_ms);
77 else 75 else
78 wait_ms = increase_sleep_time(gc_th, wait_ms); 76 increase_sleep_time(gc_th, &wait_ms);
79 77
80 stat_inc_bggc_count(sbi); 78 stat_inc_bggc_count(sbi);
81 79
@@ -356,13 +354,10 @@ static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode)
356 iput(inode); 354 iput(inode);
357 return; 355 return;
358 } 356 }
359 new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS); 357 new_ie = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
360 new_ie->inode = inode; 358 new_ie->inode = inode;
361retry: 359
362 if (radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie)) { 360 f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie);
363 cond_resched();
364 goto retry;
365 }
366 list_add_tail(&new_ie->list, &gc_list->ilist); 361 list_add_tail(&new_ie->list, &gc_list->ilist);
367} 362}
368 363
@@ -373,7 +368,7 @@ static void put_gc_inode(struct gc_inode_list *gc_list)
373 radix_tree_delete(&gc_list->iroot, ie->inode->i_ino); 368 radix_tree_delete(&gc_list->iroot, ie->inode->i_ino);
374 iput(ie->inode); 369 iput(ie->inode);
375 list_del(&ie->list); 370 list_del(&ie->list);
376 kmem_cache_free(winode_slab, ie); 371 kmem_cache_free(inode_entry_slab, ie);
377 } 372 }
378} 373}
379 374
@@ -703,8 +698,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
703 .iroot = RADIX_TREE_INIT(GFP_NOFS), 698 .iroot = RADIX_TREE_INIT(GFP_NOFS),
704 }; 699 };
705 700
706 cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC; 701 cpc.reason = __get_cp_reason(sbi);
707
708gc_more: 702gc_more:
709 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) 703 if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
710 goto stop; 704 goto stop;
@@ -750,17 +744,3 @@ void build_gc_manager(struct f2fs_sb_info *sbi)
750{ 744{
751 DIRTY_I(sbi)->v_ops = &default_v_ops; 745 DIRTY_I(sbi)->v_ops = &default_v_ops;
752} 746}
753
754int __init create_gc_caches(void)
755{
756 winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes",
757 sizeof(struct inode_entry));
758 if (!winode_slab)
759 return -ENOMEM;
760 return 0;
761}
762
763void destroy_gc_caches(void)
764{
765 kmem_cache_destroy(winode_slab);
766}
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 6ff7ad38463e..b4a65be9f7d3 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -35,11 +35,6 @@ struct f2fs_gc_kthread {
35 unsigned int gc_idle; 35 unsigned int gc_idle;
36}; 36};
37 37
38struct inode_entry {
39 struct list_head list;
40 struct inode *inode;
41};
42
43struct gc_inode_list { 38struct gc_inode_list {
44 struct list_head ilist; 39 struct list_head ilist;
45 struct radix_tree_root iroot; 40 struct radix_tree_root iroot;
@@ -69,26 +64,26 @@ static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi)
69 return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100; 64 return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100;
70} 65}
71 66
72static inline long increase_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) 67static inline void increase_sleep_time(struct f2fs_gc_kthread *gc_th,
68 long *wait)
73{ 69{
74 if (wait == gc_th->no_gc_sleep_time) 70 if (*wait == gc_th->no_gc_sleep_time)
75 return wait; 71 return;
76 72
77 wait += gc_th->min_sleep_time; 73 *wait += gc_th->min_sleep_time;
78 if (wait > gc_th->max_sleep_time) 74 if (*wait > gc_th->max_sleep_time)
79 wait = gc_th->max_sleep_time; 75 *wait = gc_th->max_sleep_time;
80 return wait;
81} 76}
82 77
83static inline long decrease_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) 78static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th,
79 long *wait)
84{ 80{
85 if (wait == gc_th->no_gc_sleep_time) 81 if (*wait == gc_th->no_gc_sleep_time)
86 wait = gc_th->max_sleep_time; 82 *wait = gc_th->max_sleep_time;
87 83
88 wait -= gc_th->min_sleep_time; 84 *wait -= gc_th->min_sleep_time;
89 if (wait <= gc_th->min_sleep_time) 85 if (*wait <= gc_th->min_sleep_time)
90 wait = gc_th->min_sleep_time; 86 *wait = gc_th->min_sleep_time;
91 return wait;
92} 87}
93 88
94static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) 89static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index f2d3c581e776..1484c00133cd 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -50,6 +50,12 @@ void read_inline_data(struct page *page, struct page *ipage)
50 SetPageUptodate(page); 50 SetPageUptodate(page);
51} 51}
52 52
53static void truncate_inline_data(struct page *ipage)
54{
55 f2fs_wait_on_page_writeback(ipage, NODE);
56 memset(inline_data_addr(ipage), 0, MAX_INLINE_DATA);
57}
58
53int f2fs_read_inline_data(struct inode *inode, struct page *page) 59int f2fs_read_inline_data(struct inode *inode, struct page *page)
54{ 60{
55 struct page *ipage; 61 struct page *ipage;
@@ -79,7 +85,6 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
79int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) 85int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
80{ 86{
81 void *src_addr, *dst_addr; 87 void *src_addr, *dst_addr;
82 block_t new_blk_addr;
83 struct f2fs_io_info fio = { 88 struct f2fs_io_info fio = {
84 .type = DATA, 89 .type = DATA,
85 .rw = WRITE_SYNC | REQ_PRIO, 90 .rw = WRITE_SYNC | REQ_PRIO,
@@ -115,9 +120,9 @@ no_update:
115 120
116 /* write data page to try to make data consistent */ 121 /* write data page to try to make data consistent */
117 set_page_writeback(page); 122 set_page_writeback(page);
118 123 fio.blk_addr = dn->data_blkaddr;
119 write_data_page(page, dn, &new_blk_addr, &fio); 124 write_data_page(page, dn, &fio);
120 update_extent_cache(new_blk_addr, dn); 125 update_extent_cache(dn);
121 f2fs_wait_on_page_writeback(page, DATA); 126 f2fs_wait_on_page_writeback(page, DATA);
122 if (dirty) 127 if (dirty)
123 inode_dec_dirty_pages(dn->inode); 128 inode_dec_dirty_pages(dn->inode);
@@ -126,7 +131,7 @@ no_update:
126 set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE); 131 set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE);
127 132
128 /* clear inline data and flag after data writeback */ 133 /* clear inline data and flag after data writeback */
129 truncate_inline_data(dn->inode_page, 0); 134 truncate_inline_data(dn->inode_page);
130clear_out: 135clear_out:
131 stat_dec_inline_inode(dn->inode); 136 stat_dec_inline_inode(dn->inode);
132 f2fs_clear_inline_inode(dn->inode); 137 f2fs_clear_inline_inode(dn->inode);
@@ -199,19 +204,6 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
199 return 0; 204 return 0;
200} 205}
201 206
202void truncate_inline_data(struct page *ipage, u64 from)
203{
204 void *addr;
205
206 if (from >= MAX_INLINE_DATA)
207 return;
208
209 f2fs_wait_on_page_writeback(ipage, NODE);
210
211 addr = inline_data_addr(ipage);
212 memset(addr + from, 0, MAX_INLINE_DATA - from);
213}
214
215bool recover_inline_data(struct inode *inode, struct page *npage) 207bool recover_inline_data(struct inode *inode, struct page *npage)
216{ 208{
217 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 209 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -253,7 +245,7 @@ process_inline:
253 if (f2fs_has_inline_data(inode)) { 245 if (f2fs_has_inline_data(inode)) {
254 ipage = get_node_page(sbi, inode->i_ino); 246 ipage = get_node_page(sbi, inode->i_ino);
255 f2fs_bug_on(sbi, IS_ERR(ipage)); 247 f2fs_bug_on(sbi, IS_ERR(ipage));
256 truncate_inline_data(ipage, 0); 248 truncate_inline_data(ipage);
257 f2fs_clear_inline_inode(inode); 249 f2fs_clear_inline_inode(inode);
258 update_inode(inode, ipage); 250 update_inode(inode, ipage);
259 f2fs_put_page(ipage, 1); 251 f2fs_put_page(ipage, 1);
@@ -371,7 +363,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
371 set_page_dirty(page); 363 set_page_dirty(page);
372 364
373 /* clear inline dir and flag after data writeback */ 365 /* clear inline dir and flag after data writeback */
374 truncate_inline_data(ipage, 0); 366 truncate_inline_data(ipage);
375 367
376 stat_dec_inline_dir(dir); 368 stat_dec_inline_dir(dir);
377 clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); 369 clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 196cc7843aaf..2d002e3738a7 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -67,29 +67,23 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
67 } 67 }
68} 68}
69 69
70static int __recover_inline_status(struct inode *inode, struct page *ipage) 70static void __recover_inline_status(struct inode *inode, struct page *ipage)
71{ 71{
72 void *inline_data = inline_data_addr(ipage); 72 void *inline_data = inline_data_addr(ipage);
73 struct f2fs_inode *ri; 73 __le32 *start = inline_data;
74 void *zbuf; 74 __le32 *end = start + MAX_INLINE_DATA / sizeof(__le32);
75 75
76 zbuf = kzalloc(MAX_INLINE_DATA, GFP_NOFS); 76 while (start < end) {
77 if (!zbuf) 77 if (*start++) {
78 return -ENOMEM; 78 f2fs_wait_on_page_writeback(ipage, NODE);
79 79
80 if (!memcmp(zbuf, inline_data, MAX_INLINE_DATA)) { 80 set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
81 kfree(zbuf); 81 set_raw_inline(F2FS_I(inode), F2FS_INODE(ipage));
82 return 0; 82 set_page_dirty(ipage);
83 return;
84 }
83 } 85 }
84 kfree(zbuf); 86 return;
85
86 f2fs_wait_on_page_writeback(ipage, NODE);
87 set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
88
89 ri = F2FS_INODE(ipage);
90 set_raw_inline(F2FS_I(inode), ri);
91 set_page_dirty(ipage);
92 return 0;
93} 87}
94 88
95static int do_read_inode(struct inode *inode) 89static int do_read_inode(struct inode *inode)
@@ -98,7 +92,6 @@ static int do_read_inode(struct inode *inode)
98 struct f2fs_inode_info *fi = F2FS_I(inode); 92 struct f2fs_inode_info *fi = F2FS_I(inode);
99 struct page *node_page; 93 struct page *node_page;
100 struct f2fs_inode *ri; 94 struct f2fs_inode *ri;
101 int err = 0;
102 95
103 /* Check if ino is within scope */ 96 /* Check if ino is within scope */
104 if (check_nid_range(sbi, inode->i_ino)) { 97 if (check_nid_range(sbi, inode->i_ino)) {
@@ -142,7 +135,7 @@ static int do_read_inode(struct inode *inode)
142 135
143 /* check data exist */ 136 /* check data exist */
144 if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) 137 if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
145 err = __recover_inline_status(inode, node_page); 138 __recover_inline_status(inode, node_page);
146 139
147 /* get rdev by using inline_info */ 140 /* get rdev by using inline_info */
148 __get_inode_rdev(inode, ri); 141 __get_inode_rdev(inode, ri);
@@ -152,7 +145,7 @@ static int do_read_inode(struct inode *inode)
152 stat_inc_inline_inode(inode); 145 stat_inc_inline_inode(inode);
153 stat_inc_inline_dir(inode); 146 stat_inc_inline_dir(inode);
154 147
155 return err; 148 return 0;
156} 149}
157 150
158struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) 151struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
@@ -304,7 +297,7 @@ void f2fs_evict_inode(struct inode *inode)
304 nid_t xnid = F2FS_I(inode)->i_xattr_nid; 297 nid_t xnid = F2FS_I(inode)->i_xattr_nid;
305 298
306 /* some remained atomic pages should discarded */ 299 /* some remained atomic pages should discarded */
307 if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) 300 if (f2fs_is_atomic_file(inode))
308 commit_inmem_pages(inode, true); 301 commit_inmem_pages(inode, true);
309 302
310 trace_f2fs_evict_inode(inode); 303 trace_f2fs_evict_inode(inode);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 547a2deeb1ac..e79639a9787a 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -299,7 +299,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
299 inode->i_op = &f2fs_dir_inode_operations; 299 inode->i_op = &f2fs_dir_inode_operations;
300 inode->i_fop = &f2fs_dir_operations; 300 inode->i_fop = &f2fs_dir_operations;
301 inode->i_mapping->a_ops = &f2fs_dblock_aops; 301 inode->i_mapping->a_ops = &f2fs_dblock_aops;
302 mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); 302 mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
303 303
304 set_inode_flag(F2FS_I(inode), FI_INC_LINK); 304 set_inode_flag(F2FS_I(inode), FI_INC_LINK);
305 f2fs_lock_op(sbi); 305 f2fs_lock_op(sbi);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index f83326ca32ef..97bd9d3db882 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -19,6 +19,7 @@
19#include "f2fs.h" 19#include "f2fs.h"
20#include "node.h" 20#include "node.h"
21#include "segment.h" 21#include "segment.h"
22#include "trace.h"
22#include <trace/events/f2fs.h> 23#include <trace/events/f2fs.h>
23 24
24#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock) 25#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock)
@@ -57,12 +58,13 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
57 } else if (type == INO_ENTRIES) { 58 } else if (type == INO_ENTRIES) {
58 int i; 59 int i;
59 60
60 if (sbi->sb->s_bdi->dirty_exceeded)
61 return false;
62 for (i = 0; i <= UPDATE_INO; i++) 61 for (i = 0; i <= UPDATE_INO; i++)
63 mem_size += (sbi->im[i].ino_num * 62 mem_size += (sbi->im[i].ino_num *
64 sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; 63 sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
65 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); 64 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
65 } else {
66 if (sbi->sb->s_bdi->dirty_exceeded)
67 return false;
66 } 68 }
67 return res; 69 return res;
68} 70}
@@ -268,7 +270,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
268 e = __lookup_nat_cache(nm_i, ni->nid); 270 e = __lookup_nat_cache(nm_i, ni->nid);
269 if (!e) { 271 if (!e) {
270 e = grab_nat_entry(nm_i, ni->nid); 272 e = grab_nat_entry(nm_i, ni->nid);
271 e->ni = *ni; 273 copy_node_info(&e->ni, ni);
272 f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); 274 f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
273 } else if (new_blkaddr == NEW_ADDR) { 275 } else if (new_blkaddr == NEW_ADDR) {
274 /* 276 /*
@@ -276,7 +278,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
276 * previous nat entry can be remained in nat cache. 278 * previous nat entry can be remained in nat cache.
277 * So, reinitialize it with new information. 279 * So, reinitialize it with new information.
278 */ 280 */
279 e->ni = *ni; 281 copy_node_info(&e->ni, ni);
280 f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR); 282 f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR);
281 } 283 }
282 284
@@ -346,7 +348,6 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
346 struct nat_entry *e; 348 struct nat_entry *e;
347 int i; 349 int i;
348 350
349 memset(&ne, 0, sizeof(struct f2fs_nat_entry));
350 ni->nid = nid; 351 ni->nid = nid;
351 352
352 /* Check nat cache */ 353 /* Check nat cache */
@@ -361,6 +362,8 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
361 if (e) 362 if (e)
362 return; 363 return;
363 364
365 memset(&ne, 0, sizeof(struct f2fs_nat_entry));
366
364 /* Check current segment summary */ 367 /* Check current segment summary */
365 mutex_lock(&curseg->curseg_mutex); 368 mutex_lock(&curseg->curseg_mutex);
366 i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0); 369 i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
@@ -471,7 +474,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
471{ 474{
472 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 475 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
473 struct page *npage[4]; 476 struct page *npage[4];
474 struct page *parent; 477 struct page *parent = NULL;
475 int offset[4]; 478 int offset[4];
476 unsigned int noffset[4]; 479 unsigned int noffset[4];
477 nid_t nids[4]; 480 nid_t nids[4];
@@ -488,6 +491,14 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
488 if (IS_ERR(npage[0])) 491 if (IS_ERR(npage[0]))
489 return PTR_ERR(npage[0]); 492 return PTR_ERR(npage[0]);
490 } 493 }
494
495 /* if inline_data is set, should not report any block indices */
496 if (f2fs_has_inline_data(dn->inode) && index) {
497 err = -EINVAL;
498 f2fs_put_page(npage[0], 1);
499 goto release_out;
500 }
501
491 parent = npage[0]; 502 parent = npage[0];
492 if (level != 0) 503 if (level != 0)
493 nids[1] = get_nid(parent, offset[0], true); 504 nids[1] = get_nid(parent, offset[0], true);
@@ -585,7 +596,7 @@ static void truncate_node(struct dnode_of_data *dn)
585 } 596 }
586invalidate: 597invalidate:
587 clear_node_page_dirty(dn->node_page); 598 clear_node_page_dirty(dn->node_page);
588 F2FS_SET_SB_DIRT(sbi); 599 set_sbi_flag(sbi, SBI_IS_DIRTY);
589 600
590 f2fs_put_page(dn->node_page, 1); 601 f2fs_put_page(dn->node_page, 1);
591 602
@@ -976,6 +987,10 @@ static int read_node_page(struct page *page, int rw)
976{ 987{
977 struct f2fs_sb_info *sbi = F2FS_P_SB(page); 988 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
978 struct node_info ni; 989 struct node_info ni;
990 struct f2fs_io_info fio = {
991 .type = NODE,
992 .rw = rw,
993 };
979 994
980 get_node_info(sbi, page->index, &ni); 995 get_node_info(sbi, page->index, &ni);
981 996
@@ -987,7 +1002,8 @@ static int read_node_page(struct page *page, int rw)
987 if (PageUptodate(page)) 1002 if (PageUptodate(page))
988 return LOCKED_PAGE; 1003 return LOCKED_PAGE;
989 1004
990 return f2fs_submit_page_bio(sbi, page, ni.blk_addr, rw); 1005 fio.blk_addr = ni.blk_addr;
1006 return f2fs_submit_page_bio(sbi, page, &fio);
991} 1007}
992 1008
993/* 1009/*
@@ -1028,11 +1044,11 @@ repeat:
1028 err = read_node_page(page, READ_SYNC); 1044 err = read_node_page(page, READ_SYNC);
1029 if (err < 0) 1045 if (err < 0)
1030 return ERR_PTR(err); 1046 return ERR_PTR(err);
1031 else if (err == LOCKED_PAGE) 1047 else if (err != LOCKED_PAGE)
1032 goto got_it; 1048 lock_page(page);
1033 1049
1034 lock_page(page);
1035 if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) { 1050 if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
1051 ClearPageUptodate(page);
1036 f2fs_put_page(page, 1); 1052 f2fs_put_page(page, 1);
1037 return ERR_PTR(-EIO); 1053 return ERR_PTR(-EIO);
1038 } 1054 }
@@ -1040,7 +1056,6 @@ repeat:
1040 f2fs_put_page(page, 1); 1056 f2fs_put_page(page, 1);
1041 goto repeat; 1057 goto repeat;
1042 } 1058 }
1043got_it:
1044 return page; 1059 return page;
1045} 1060}
1046 1061
@@ -1268,7 +1283,6 @@ static int f2fs_write_node_page(struct page *page,
1268{ 1283{
1269 struct f2fs_sb_info *sbi = F2FS_P_SB(page); 1284 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1270 nid_t nid; 1285 nid_t nid;
1271 block_t new_addr;
1272 struct node_info ni; 1286 struct node_info ni;
1273 struct f2fs_io_info fio = { 1287 struct f2fs_io_info fio = {
1274 .type = NODE, 1288 .type = NODE,
@@ -1277,7 +1291,7 @@ static int f2fs_write_node_page(struct page *page,
1277 1291
1278 trace_f2fs_writepage(page, NODE); 1292 trace_f2fs_writepage(page, NODE);
1279 1293
1280 if (unlikely(sbi->por_doing)) 1294 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1281 goto redirty_out; 1295 goto redirty_out;
1282 if (unlikely(f2fs_cp_error(sbi))) 1296 if (unlikely(f2fs_cp_error(sbi)))
1283 goto redirty_out; 1297 goto redirty_out;
@@ -1303,9 +1317,11 @@ static int f2fs_write_node_page(struct page *page,
1303 } else { 1317 } else {
1304 down_read(&sbi->node_write); 1318 down_read(&sbi->node_write);
1305 } 1319 }
1320
1306 set_page_writeback(page); 1321 set_page_writeback(page);
1307 write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); 1322 fio.blk_addr = ni.blk_addr;
1308 set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page)); 1323 write_node_page(sbi, page, nid, &fio);
1324 set_node_addr(sbi, &ni, fio.blk_addr, is_fsync_dnode(page));
1309 dec_page_count(sbi, F2FS_DIRTY_NODES); 1325 dec_page_count(sbi, F2FS_DIRTY_NODES);
1310 up_read(&sbi->node_write); 1326 up_read(&sbi->node_write);
1311 unlock_page(page); 1327 unlock_page(page);
@@ -1355,26 +1371,12 @@ static int f2fs_set_node_page_dirty(struct page *page)
1355 __set_page_dirty_nobuffers(page); 1371 __set_page_dirty_nobuffers(page);
1356 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); 1372 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
1357 SetPagePrivate(page); 1373 SetPagePrivate(page);
1374 f2fs_trace_pid(page);
1358 return 1; 1375 return 1;
1359 } 1376 }
1360 return 0; 1377 return 0;
1361} 1378}
1362 1379
1363static void f2fs_invalidate_node_page(struct page *page, unsigned int offset,
1364 unsigned int length)
1365{
1366 struct inode *inode = page->mapping->host;
1367 if (PageDirty(page))
1368 dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_NODES);
1369 ClearPagePrivate(page);
1370}
1371
1372static int f2fs_release_node_page(struct page *page, gfp_t wait)
1373{
1374 ClearPagePrivate(page);
1375 return 1;
1376}
1377
1378/* 1380/*
1379 * Structure of the f2fs node operations 1381 * Structure of the f2fs node operations
1380 */ 1382 */
@@ -1382,8 +1384,8 @@ const struct address_space_operations f2fs_node_aops = {
1382 .writepage = f2fs_write_node_page, 1384 .writepage = f2fs_write_node_page,
1383 .writepages = f2fs_write_node_pages, 1385 .writepages = f2fs_write_node_pages,
1384 .set_page_dirty = f2fs_set_node_page_dirty, 1386 .set_page_dirty = f2fs_set_node_page_dirty,
1385 .invalidatepage = f2fs_invalidate_node_page, 1387 .invalidatepage = f2fs_invalidate_page,
1386 .releasepage = f2fs_release_node_page, 1388 .releasepage = f2fs_release_page,
1387}; 1389};
1388 1390
1389static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, 1391static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
@@ -1726,80 +1728,41 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1726 return 0; 1728 return 0;
1727} 1729}
1728 1730
1729/*
1730 * ra_sum_pages() merge contiguous pages into one bio and submit.
1731 * these pre-read pages are allocated in bd_inode's mapping tree.
1732 */
1733static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages,
1734 int start, int nrpages)
1735{
1736 struct inode *inode = sbi->sb->s_bdev->bd_inode;
1737 struct address_space *mapping = inode->i_mapping;
1738 int i, page_idx = start;
1739 struct f2fs_io_info fio = {
1740 .type = META,
1741 .rw = READ_SYNC | REQ_META | REQ_PRIO
1742 };
1743
1744 for (i = 0; page_idx < start + nrpages; page_idx++, i++) {
1745 /* alloc page in bd_inode for reading node summary info */
1746 pages[i] = grab_cache_page(mapping, page_idx);
1747 if (!pages[i])
1748 break;
1749 f2fs_submit_page_mbio(sbi, pages[i], page_idx, &fio);
1750 }
1751
1752 f2fs_submit_merged_bio(sbi, META, READ);
1753 return i;
1754}
1755
1756int restore_node_summary(struct f2fs_sb_info *sbi, 1731int restore_node_summary(struct f2fs_sb_info *sbi,
1757 unsigned int segno, struct f2fs_summary_block *sum) 1732 unsigned int segno, struct f2fs_summary_block *sum)
1758{ 1733{
1759 struct f2fs_node *rn; 1734 struct f2fs_node *rn;
1760 struct f2fs_summary *sum_entry; 1735 struct f2fs_summary *sum_entry;
1761 struct inode *inode = sbi->sb->s_bdev->bd_inode;
1762 block_t addr; 1736 block_t addr;
1763 int bio_blocks = MAX_BIO_BLOCKS(sbi); 1737 int bio_blocks = MAX_BIO_BLOCKS(sbi);
1764 struct page *pages[bio_blocks]; 1738 int i, idx, last_offset, nrpages;
1765 int i, idx, last_offset, nrpages, err = 0;
1766 1739
1767 /* scan the node segment */ 1740 /* scan the node segment */
1768 last_offset = sbi->blocks_per_seg; 1741 last_offset = sbi->blocks_per_seg;
1769 addr = START_BLOCK(sbi, segno); 1742 addr = START_BLOCK(sbi, segno);
1770 sum_entry = &sum->entries[0]; 1743 sum_entry = &sum->entries[0];
1771 1744
1772 for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) { 1745 for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
1773 nrpages = min(last_offset - i, bio_blocks); 1746 nrpages = min(last_offset - i, bio_blocks);
1774 1747
1775 /* readahead node pages */ 1748 /* readahead node pages */
1776 nrpages = ra_sum_pages(sbi, pages, addr, nrpages); 1749 ra_meta_pages(sbi, addr, nrpages, META_POR);
1777 if (!nrpages)
1778 return -ENOMEM;
1779 1750
1780 for (idx = 0; idx < nrpages; idx++) { 1751 for (idx = addr; idx < addr + nrpages; idx++) {
1781 if (err) 1752 struct page *page = get_meta_page(sbi, idx);
1782 goto skip;
1783 1753
1784 lock_page(pages[idx]); 1754 rn = F2FS_NODE(page);
1785 if (unlikely(!PageUptodate(pages[idx]))) { 1755 sum_entry->nid = rn->footer.nid;
1786 err = -EIO; 1756 sum_entry->version = 0;
1787 } else { 1757 sum_entry->ofs_in_node = 0;
1788 rn = F2FS_NODE(pages[idx]); 1758 sum_entry++;
1789 sum_entry->nid = rn->footer.nid; 1759 f2fs_put_page(page, 1);
1790 sum_entry->version = 0;
1791 sum_entry->ofs_in_node = 0;
1792 sum_entry++;
1793 }
1794 unlock_page(pages[idx]);
1795skip:
1796 page_cache_release(pages[idx]);
1797 } 1760 }
1798 1761
1799 invalidate_mapping_pages(inode->i_mapping, addr, 1762 invalidate_mapping_pages(META_MAPPING(sbi), addr,
1800 addr + nrpages); 1763 addr + nrpages);
1801 } 1764 }
1802 return err; 1765 return 0;
1803} 1766}
1804 1767
1805static void remove_nats_in_journal(struct f2fs_sb_info *sbi) 1768static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
@@ -1923,7 +1886,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1923 struct f2fs_nm_info *nm_i = NM_I(sbi); 1886 struct f2fs_nm_info *nm_i = NM_I(sbi);
1924 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1887 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1925 struct f2fs_summary_block *sum = curseg->sum_blk; 1888 struct f2fs_summary_block *sum = curseg->sum_blk;
1926 struct nat_entry_set *setvec[NATVEC_SIZE]; 1889 struct nat_entry_set *setvec[SETVEC_SIZE];
1927 struct nat_entry_set *set, *tmp; 1890 struct nat_entry_set *set, *tmp;
1928 unsigned int found; 1891 unsigned int found;
1929 nid_t set_idx = 0; 1892 nid_t set_idx = 0;
@@ -1940,7 +1903,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1940 remove_nats_in_journal(sbi); 1903 remove_nats_in_journal(sbi);
1941 1904
1942 while ((found = __gang_lookup_nat_set(nm_i, 1905 while ((found = __gang_lookup_nat_set(nm_i,
1943 set_idx, NATVEC_SIZE, setvec))) { 1906 set_idx, SETVEC_SIZE, setvec))) {
1944 unsigned idx; 1907 unsigned idx;
1945 set_idx = setvec[found - 1]->set + 1; 1908 set_idx = setvec[found - 1]->set + 1;
1946 for (idx = 0; idx < found; idx++) 1909 for (idx = 0; idx < found; idx++)
@@ -2020,6 +1983,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
2020 struct f2fs_nm_info *nm_i = NM_I(sbi); 1983 struct f2fs_nm_info *nm_i = NM_I(sbi);
2021 struct free_nid *i, *next_i; 1984 struct free_nid *i, *next_i;
2022 struct nat_entry *natvec[NATVEC_SIZE]; 1985 struct nat_entry *natvec[NATVEC_SIZE];
1986 struct nat_entry_set *setvec[SETVEC_SIZE];
2023 nid_t nid = 0; 1987 nid_t nid = 0;
2024 unsigned int found; 1988 unsigned int found;
2025 1989
@@ -2044,11 +2008,27 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
2044 while ((found = __gang_lookup_nat_cache(nm_i, 2008 while ((found = __gang_lookup_nat_cache(nm_i,
2045 nid, NATVEC_SIZE, natvec))) { 2009 nid, NATVEC_SIZE, natvec))) {
2046 unsigned idx; 2010 unsigned idx;
2011
2047 nid = nat_get_nid(natvec[found - 1]) + 1; 2012 nid = nat_get_nid(natvec[found - 1]) + 1;
2048 for (idx = 0; idx < found; idx++) 2013 for (idx = 0; idx < found; idx++)
2049 __del_from_nat_cache(nm_i, natvec[idx]); 2014 __del_from_nat_cache(nm_i, natvec[idx]);
2050 } 2015 }
2051 f2fs_bug_on(sbi, nm_i->nat_cnt); 2016 f2fs_bug_on(sbi, nm_i->nat_cnt);
2017
2018 /* destroy nat set cache */
2019 nid = 0;
2020 while ((found = __gang_lookup_nat_set(nm_i,
2021 nid, SETVEC_SIZE, setvec))) {
2022 unsigned idx;
2023
2024 nid = setvec[found - 1]->set + 1;
2025 for (idx = 0; idx < found; idx++) {
2026 /* entry_cnt is not zero, when cp_error was occurred */
2027 f2fs_bug_on(sbi, !list_empty(&setvec[idx]->entry_list));
2028 radix_tree_delete(&nm_i->nat_set_root, setvec[idx]->set);
2029 kmem_cache_free(nat_entry_set_slab, setvec[idx]);
2030 }
2031 }
2052 up_write(&nm_i->nat_tree_lock); 2032 up_write(&nm_i->nat_tree_lock);
2053 2033
2054 kfree(nm_i->nat_bitmap); 2034 kfree(nm_i->nat_bitmap);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index d10b6448a671..f405bbf2435a 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -25,10 +25,19 @@
25 25
26/* vector size for gang look-up from nat cache that consists of radix tree */ 26/* vector size for gang look-up from nat cache that consists of radix tree */
27#define NATVEC_SIZE 64 27#define NATVEC_SIZE 64
28#define SETVEC_SIZE 32
28 29
29/* return value for read_node_page */ 30/* return value for read_node_page */
30#define LOCKED_PAGE 1 31#define LOCKED_PAGE 1
31 32
33/* For flag in struct node_info */
34enum {
35 IS_CHECKPOINTED, /* is it checkpointed before? */
36 HAS_FSYNCED_INODE, /* is the inode fsynced before? */
37 HAS_LAST_FSYNC, /* has the latest node fsync mark? */
38 IS_DIRTY, /* this nat entry is dirty? */
39};
40
32/* 41/*
33 * For node information 42 * For node information
34 */ 43 */
@@ -37,18 +46,11 @@ struct node_info {
37 nid_t ino; /* inode number of the node's owner */ 46 nid_t ino; /* inode number of the node's owner */
38 block_t blk_addr; /* block address of the node */ 47 block_t blk_addr; /* block address of the node */
39 unsigned char version; /* version of the node */ 48 unsigned char version; /* version of the node */
40}; 49 unsigned char flag; /* for node information bits */
41
42enum {
43 IS_CHECKPOINTED, /* is it checkpointed before? */
44 HAS_FSYNCED_INODE, /* is the inode fsynced before? */
45 HAS_LAST_FSYNC, /* has the latest node fsync mark? */
46 IS_DIRTY, /* this nat entry is dirty? */
47}; 50};
48 51
49struct nat_entry { 52struct nat_entry {
50 struct list_head list; /* for clean or dirty nat list */ 53 struct list_head list; /* for clean or dirty nat list */
51 unsigned char flag; /* for node information bits */
52 struct node_info ni; /* in-memory node information */ 54 struct node_info ni; /* in-memory node information */
53}; 55};
54 56
@@ -63,20 +65,30 @@ struct nat_entry {
63 65
64#define inc_node_version(version) (++version) 66#define inc_node_version(version) (++version)
65 67
68static inline void copy_node_info(struct node_info *dst,
69 struct node_info *src)
70{
71 dst->nid = src->nid;
72 dst->ino = src->ino;
73 dst->blk_addr = src->blk_addr;
74 dst->version = src->version;
75 /* should not copy flag here */
76}
77
66static inline void set_nat_flag(struct nat_entry *ne, 78static inline void set_nat_flag(struct nat_entry *ne,
67 unsigned int type, bool set) 79 unsigned int type, bool set)
68{ 80{
69 unsigned char mask = 0x01 << type; 81 unsigned char mask = 0x01 << type;
70 if (set) 82 if (set)
71 ne->flag |= mask; 83 ne->ni.flag |= mask;
72 else 84 else
73 ne->flag &= ~mask; 85 ne->ni.flag &= ~mask;
74} 86}
75 87
76static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type) 88static inline bool get_nat_flag(struct nat_entry *ne, unsigned int type)
77{ 89{
78 unsigned char mask = 0x01 << type; 90 unsigned char mask = 0x01 << type;
79 return ne->flag & mask; 91 return ne->ni.flag & mask;
80} 92}
81 93
82static inline void nat_reset_flag(struct nat_entry *ne) 94static inline void nat_reset_flag(struct nat_entry *ne)
@@ -108,6 +120,7 @@ enum mem_type {
108 NAT_ENTRIES, /* indicates the cached nat entry */ 120 NAT_ENTRIES, /* indicates the cached nat entry */
109 DIRTY_DENTS, /* indicates dirty dentry pages */ 121 DIRTY_DENTS, /* indicates dirty dentry pages */
110 INO_ENTRIES, /* indicates inode entries */ 122 INO_ENTRIES, /* indicates inode entries */
123 BASE_CHECK, /* check kernel status */
111}; 124};
112 125
113struct nat_entry_set { 126struct nat_entry_set {
@@ -200,11 +213,19 @@ static inline void fill_node_footer(struct page *page, nid_t nid,
200 nid_t ino, unsigned int ofs, bool reset) 213 nid_t ino, unsigned int ofs, bool reset)
201{ 214{
202 struct f2fs_node *rn = F2FS_NODE(page); 215 struct f2fs_node *rn = F2FS_NODE(page);
216 unsigned int old_flag = 0;
217
203 if (reset) 218 if (reset)
204 memset(rn, 0, sizeof(*rn)); 219 memset(rn, 0, sizeof(*rn));
220 else
221 old_flag = le32_to_cpu(rn->footer.flag);
222
205 rn->footer.nid = cpu_to_le32(nid); 223 rn->footer.nid = cpu_to_le32(nid);
206 rn->footer.ino = cpu_to_le32(ino); 224 rn->footer.ino = cpu_to_le32(ino);
207 rn->footer.flag = cpu_to_le32(ofs << OFFSET_BIT_SHIFT); 225
226 /* should remain old flag bits such as COLD_BIT_SHIFT */
227 rn->footer.flag = cpu_to_le32((ofs << OFFSET_BIT_SHIFT) |
228 (old_flag & OFFSET_BIT_MASK));
208} 229}
209 230
210static inline void copy_node_footer(struct page *dst, struct page *src) 231static inline void copy_node_footer(struct page *dst, struct page *src)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 9160a37e1c7a..41afb9534bbd 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -346,6 +346,10 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
346 if (IS_INODE(page)) { 346 if (IS_INODE(page)) {
347 recover_inline_xattr(inode, page); 347 recover_inline_xattr(inode, page);
348 } else if (f2fs_has_xattr_block(ofs_of_node(page))) { 348 } else if (f2fs_has_xattr_block(ofs_of_node(page))) {
349 /*
350 * Deprecated; xattr blocks should be found from cold log.
351 * But, we should remain this for backward compatibility.
352 */
349 recover_xattr_data(inode, page, blkaddr); 353 recover_xattr_data(inode, page, blkaddr);
350 goto out; 354 goto out;
351 } 355 }
@@ -396,7 +400,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
396 400
397 /* write dummy data page */ 401 /* write dummy data page */
398 recover_data_page(sbi, NULL, &sum, src, dest); 402 recover_data_page(sbi, NULL, &sum, src, dest);
399 update_extent_cache(dest, &dn); 403 dn.data_blkaddr = dest;
404 update_extent_cache(&dn);
400 recovered++; 405 recovered++;
401 } 406 }
402 dn.ofs_in_node++; 407 dn.ofs_in_node++;
@@ -503,7 +508,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
503 INIT_LIST_HEAD(&inode_list); 508 INIT_LIST_HEAD(&inode_list);
504 509
505 /* step #1: find fsynced inode numbers */ 510 /* step #1: find fsynced inode numbers */
506 sbi->por_doing = true; 511 set_sbi_flag(sbi, SBI_POR_DOING);
507 512
508 /* prevent checkpoint */ 513 /* prevent checkpoint */
509 mutex_lock(&sbi->cp_mutex); 514 mutex_lock(&sbi->cp_mutex);
@@ -536,7 +541,7 @@ out:
536 truncate_inode_pages_final(META_MAPPING(sbi)); 541 truncate_inode_pages_final(META_MAPPING(sbi));
537 } 542 }
538 543
539 sbi->por_doing = false; 544 clear_sbi_flag(sbi, SBI_POR_DOING);
540 if (err) { 545 if (err) {
541 discard_next_dnode(sbi, blkaddr); 546 discard_next_dnode(sbi, blkaddr);
542 547
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 42607a679923..daee4ab913da 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -20,6 +20,7 @@
20#include "f2fs.h" 20#include "f2fs.h"
21#include "segment.h" 21#include "segment.h"
22#include "node.h" 22#include "node.h"
23#include "trace.h"
23#include <trace/events/f2fs.h> 24#include <trace/events/f2fs.h>
24 25
25#define __reverse_ffz(x) __reverse_ffs(~(x)) 26#define __reverse_ffz(x) __reverse_ffs(~(x))
@@ -181,6 +182,7 @@ void register_inmem_page(struct inode *inode, struct page *page)
181 int err; 182 int err;
182 183
183 SetPagePrivate(page); 184 SetPagePrivate(page);
185 f2fs_trace_pid(page);
184 186
185 new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); 187 new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
186 188
@@ -205,23 +207,6 @@ retry:
205 mutex_unlock(&fi->inmem_lock); 207 mutex_unlock(&fi->inmem_lock);
206} 208}
207 209
208void invalidate_inmem_page(struct inode *inode, struct page *page)
209{
210 struct f2fs_inode_info *fi = F2FS_I(inode);
211 struct inmem_pages *cur;
212
213 mutex_lock(&fi->inmem_lock);
214 cur = radix_tree_lookup(&fi->inmem_root, page->index);
215 if (cur) {
216 radix_tree_delete(&fi->inmem_root, cur->page->index);
217 f2fs_put_page(cur->page, 0);
218 list_del(&cur->list);
219 kmem_cache_free(inmem_entry_slab, cur);
220 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
221 }
222 mutex_unlock(&fi->inmem_lock);
223}
224
225void commit_inmem_pages(struct inode *inode, bool abort) 210void commit_inmem_pages(struct inode *inode, bool abort)
226{ 211{
227 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 212 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -230,7 +215,7 @@ void commit_inmem_pages(struct inode *inode, bool abort)
230 bool submit_bio = false; 215 bool submit_bio = false;
231 struct f2fs_io_info fio = { 216 struct f2fs_io_info fio = {
232 .type = DATA, 217 .type = DATA,
233 .rw = WRITE_SYNC, 218 .rw = WRITE_SYNC | REQ_PRIO,
234 }; 219 };
235 220
236 /* 221 /*
@@ -240,33 +225,38 @@ void commit_inmem_pages(struct inode *inode, bool abort)
240 * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this 225 * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this
241 * inode becomes free by iget_locked in f2fs_iget. 226 * inode becomes free by iget_locked in f2fs_iget.
242 */ 227 */
243 if (!abort) 228 if (!abort) {
244 f2fs_balance_fs(sbi); 229 f2fs_balance_fs(sbi);
245 230 f2fs_lock_op(sbi);
246 f2fs_lock_op(sbi); 231 }
247 232
248 mutex_lock(&fi->inmem_lock); 233 mutex_lock(&fi->inmem_lock);
249 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { 234 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
250 lock_page(cur->page); 235 if (!abort) {
251 if (!abort && cur->page->mapping == inode->i_mapping) { 236 lock_page(cur->page);
252 f2fs_wait_on_page_writeback(cur->page, DATA); 237 if (cur->page->mapping == inode->i_mapping) {
253 if (clear_page_dirty_for_io(cur->page)) 238 f2fs_wait_on_page_writeback(cur->page, DATA);
254 inode_dec_dirty_pages(inode); 239 if (clear_page_dirty_for_io(cur->page))
255 do_write_data_page(cur->page, &fio); 240 inode_dec_dirty_pages(inode);
256 submit_bio = true; 241 do_write_data_page(cur->page, &fio);
242 submit_bio = true;
243 }
244 f2fs_put_page(cur->page, 1);
245 } else {
246 put_page(cur->page);
257 } 247 }
258 radix_tree_delete(&fi->inmem_root, cur->page->index); 248 radix_tree_delete(&fi->inmem_root, cur->page->index);
259 f2fs_put_page(cur->page, 1);
260 list_del(&cur->list); 249 list_del(&cur->list);
261 kmem_cache_free(inmem_entry_slab, cur); 250 kmem_cache_free(inmem_entry_slab, cur);
262 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); 251 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
263 } 252 }
264 if (submit_bio)
265 f2fs_submit_merged_bio(sbi, DATA, WRITE);
266 mutex_unlock(&fi->inmem_lock); 253 mutex_unlock(&fi->inmem_lock);
267 254
268 filemap_fdatawait_range(inode->i_mapping, 0, LLONG_MAX); 255 if (!abort) {
269 f2fs_unlock_op(sbi); 256 f2fs_unlock_op(sbi);
257 if (submit_bio)
258 f2fs_submit_merged_bio(sbi, DATA, WRITE);
259 }
270} 260}
271 261
272/* 262/*
@@ -290,7 +280,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
290 /* check the # of cached NAT entries and prefree segments */ 280 /* check the # of cached NAT entries and prefree segments */
291 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || 281 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
292 excess_prefree_segs(sbi) || 282 excess_prefree_segs(sbi) ||
293 available_free_memory(sbi, INO_ENTRIES)) 283 !available_free_memory(sbi, INO_ENTRIES))
294 f2fs_sync_fs(sbi->sb, true); 284 f2fs_sync_fs(sbi->sb, true);
295} 285}
296 286
@@ -515,12 +505,13 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
515 struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); 505 struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
516 unsigned long *cur_map = (unsigned long *)se->cur_valid_map; 506 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
517 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; 507 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
518 unsigned long dmap[entries]; 508 unsigned long *dmap = SIT_I(sbi)->tmp_map;
519 unsigned int start = 0, end = -1; 509 unsigned int start = 0, end = -1;
520 bool force = (cpc->reason == CP_DISCARD); 510 bool force = (cpc->reason == CP_DISCARD);
521 int i; 511 int i;
522 512
523 if (!force && !test_opt(sbi, DISCARD)) 513 if (!force && (!test_opt(sbi, DISCARD) ||
514 SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards))
524 return; 515 return;
525 516
526 if (force && !se->valid_blocks) { 517 if (force && !se->valid_blocks) {
@@ -548,7 +539,8 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
548 539
549 /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ 540 /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
550 for (i = 0; i < entries; i++) 541 for (i = 0; i < entries; i++)
551 dmap[i] = ~(cur_map[i] | ckpt_map[i]); 542 dmap[i] = force ? ~ckpt_map[i] :
543 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
552 544
553 while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { 545 while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
554 start = __find_rev_next_bit(dmap, max_blocks, end + 1); 546 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
@@ -735,7 +727,7 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
735/* 727/*
736 * Calculate the number of current summary pages for writing 728 * Calculate the number of current summary pages for writing
737 */ 729 */
738int npages_for_summary_flush(struct f2fs_sb_info *sbi) 730int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
739{ 731{
740 int valid_sum_count = 0; 732 int valid_sum_count = 0;
741 int i, sum_in_page; 733 int i, sum_in_page;
@@ -743,8 +735,13 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi)
743 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 735 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
744 if (sbi->ckpt->alloc_type[i] == SSR) 736 if (sbi->ckpt->alloc_type[i] == SSR)
745 valid_sum_count += sbi->blocks_per_seg; 737 valid_sum_count += sbi->blocks_per_seg;
746 else 738 else {
747 valid_sum_count += curseg_blkoff(sbi, i); 739 if (for_ra)
740 valid_sum_count += le16_to_cpu(
741 F2FS_CKPT(sbi)->cur_data_blkoff[i]);
742 else
743 valid_sum_count += curseg_blkoff(sbi, i);
744 }
748 } 745 }
749 746
750 sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE - 747 sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE -
@@ -803,7 +800,7 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
803 int go_left = 0; 800 int go_left = 0;
804 int i; 801 int i;
805 802
806 write_lock(&free_i->segmap_lock); 803 spin_lock(&free_i->segmap_lock);
807 804
808 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { 805 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
809 segno = find_next_zero_bit(free_i->free_segmap, 806 segno = find_next_zero_bit(free_i->free_segmap,
@@ -876,7 +873,7 @@ got_it:
876 f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); 873 f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
877 __set_inuse(sbi, segno); 874 __set_inuse(sbi, segno);
878 *newseg = segno; 875 *newseg = segno;
879 write_unlock(&free_i->segmap_lock); 876 spin_unlock(&free_i->segmap_lock);
880} 877}
881 878
882static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) 879static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
@@ -927,7 +924,7 @@ static void __next_free_blkoff(struct f2fs_sb_info *sbi,
927{ 924{
928 struct seg_entry *se = get_seg_entry(sbi, seg->segno); 925 struct seg_entry *se = get_seg_entry(sbi, seg->segno);
929 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); 926 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
930 unsigned long target_map[entries]; 927 unsigned long *target_map = SIT_I(sbi)->tmp_map;
931 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; 928 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
932 unsigned long *cur_map = (unsigned long *)se->cur_valid_map; 929 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
933 int i, pos; 930 int i, pos;
@@ -1027,18 +1024,22 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
1027 stat_inc_seg_type(sbi, curseg); 1024 stat_inc_seg_type(sbi, curseg);
1028} 1025}
1029 1026
1027static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type)
1028{
1029 struct curseg_info *curseg = CURSEG_I(sbi, type);
1030 unsigned int old_segno;
1031
1032 old_segno = curseg->segno;
1033 SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
1034 locate_dirty_segment(sbi, old_segno);
1035}
1036
1030void allocate_new_segments(struct f2fs_sb_info *sbi) 1037void allocate_new_segments(struct f2fs_sb_info *sbi)
1031{ 1038{
1032 struct curseg_info *curseg;
1033 unsigned int old_curseg;
1034 int i; 1039 int i;
1035 1040
1036 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { 1041 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
1037 curseg = CURSEG_I(sbi, i); 1042 __allocate_new_segments(sbi, i);
1038 old_curseg = curseg->segno;
1039 SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
1040 locate_dirty_segment(sbi, old_curseg);
1041 }
1042} 1043}
1043 1044
1044static const struct segment_allocation default_salloc_ops = { 1045static const struct segment_allocation default_salloc_ops = {
@@ -1047,8 +1048,8 @@ static const struct segment_allocation default_salloc_ops = {
1047 1048
1048int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) 1049int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
1049{ 1050{
1050 __u64 start = range->start >> sbi->log_blocksize; 1051 __u64 start = F2FS_BYTES_TO_BLK(range->start);
1051 __u64 end = start + (range->len >> sbi->log_blocksize) - 1; 1052 __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
1052 unsigned int start_segno, end_segno; 1053 unsigned int start_segno, end_segno;
1053 struct cp_control cpc; 1054 struct cp_control cpc;
1054 1055
@@ -1065,16 +1066,21 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
1065 end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : 1066 end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
1066 GET_SEGNO(sbi, end); 1067 GET_SEGNO(sbi, end);
1067 cpc.reason = CP_DISCARD; 1068 cpc.reason = CP_DISCARD;
1068 cpc.trim_start = start_segno; 1069 cpc.trim_minlen = F2FS_BYTES_TO_BLK(range->minlen);
1069 cpc.trim_end = end_segno;
1070 cpc.trim_minlen = range->minlen >> sbi->log_blocksize;
1071 1070
1072 /* do checkpoint to issue discard commands safely */ 1071 /* do checkpoint to issue discard commands safely */
1073 mutex_lock(&sbi->gc_mutex); 1072 for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
1074 write_checkpoint(sbi, &cpc); 1073 cpc.trim_start = start_segno;
1075 mutex_unlock(&sbi->gc_mutex); 1074 cpc.trim_end = min_t(unsigned int, rounddown(start_segno +
1075 BATCHED_TRIM_SEGMENTS(sbi),
1076 sbi->segs_per_sec) - 1, end_segno);
1077
1078 mutex_lock(&sbi->gc_mutex);
1079 write_checkpoint(sbi, &cpc);
1080 mutex_unlock(&sbi->gc_mutex);
1081 }
1076out: 1082out:
1077 range->len = cpc.trimmed << sbi->log_blocksize; 1083 range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
1078 return 0; 1084 return 0;
1079} 1085}
1080 1086
@@ -1151,11 +1157,18 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1151{ 1157{
1152 struct sit_info *sit_i = SIT_I(sbi); 1158 struct sit_info *sit_i = SIT_I(sbi);
1153 struct curseg_info *curseg; 1159 struct curseg_info *curseg;
1160 bool direct_io = (type == CURSEG_DIRECT_IO);
1161
1162 type = direct_io ? CURSEG_WARM_DATA : type;
1154 1163
1155 curseg = CURSEG_I(sbi, type); 1164 curseg = CURSEG_I(sbi, type);
1156 1165
1157 mutex_lock(&curseg->curseg_mutex); 1166 mutex_lock(&curseg->curseg_mutex);
1158 1167
1168 /* direct_io'ed data is aligned to the segment for better performance */
1169 if (direct_io && curseg->next_blkoff)
1170 __allocate_new_segments(sbi, type);
1171
1159 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); 1172 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
1160 1173
1161 /* 1174 /*
@@ -1187,39 +1200,39 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1187} 1200}
1188 1201
1189static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, 1202static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
1190 block_t old_blkaddr, block_t *new_blkaddr, 1203 struct f2fs_summary *sum,
1191 struct f2fs_summary *sum, struct f2fs_io_info *fio) 1204 struct f2fs_io_info *fio)
1192{ 1205{
1193 int type = __get_segment_type(page, fio->type); 1206 int type = __get_segment_type(page, fio->type);
1194 1207
1195 allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type); 1208 allocate_data_block(sbi, page, fio->blk_addr, &fio->blk_addr, sum, type);
1196 1209
1197 /* writeout dirty page into bdev */ 1210 /* writeout dirty page into bdev */
1198 f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio); 1211 f2fs_submit_page_mbio(sbi, page, fio);
1199} 1212}
1200 1213
1201void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) 1214void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
1202{ 1215{
1203 struct f2fs_io_info fio = { 1216 struct f2fs_io_info fio = {
1204 .type = META, 1217 .type = META,
1205 .rw = WRITE_SYNC | REQ_META | REQ_PRIO 1218 .rw = WRITE_SYNC | REQ_META | REQ_PRIO,
1219 .blk_addr = page->index,
1206 }; 1220 };
1207 1221
1208 set_page_writeback(page); 1222 set_page_writeback(page);
1209 f2fs_submit_page_mbio(sbi, page, page->index, &fio); 1223 f2fs_submit_page_mbio(sbi, page, &fio);
1210} 1224}
1211 1225
1212void write_node_page(struct f2fs_sb_info *sbi, struct page *page, 1226void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
1213 struct f2fs_io_info *fio, 1227 unsigned int nid, struct f2fs_io_info *fio)
1214 unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr)
1215{ 1228{
1216 struct f2fs_summary sum; 1229 struct f2fs_summary sum;
1217 set_summary(&sum, nid, 0, 0); 1230 set_summary(&sum, nid, 0, 0);
1218 do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio); 1231 do_write_page(sbi, page, &sum, fio);
1219} 1232}
1220 1233
1221void write_data_page(struct page *page, struct dnode_of_data *dn, 1234void write_data_page(struct page *page, struct dnode_of_data *dn,
1222 block_t *new_blkaddr, struct f2fs_io_info *fio) 1235 struct f2fs_io_info *fio)
1223{ 1236{
1224 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 1237 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1225 struct f2fs_summary sum; 1238 struct f2fs_summary sum;
@@ -1228,14 +1241,14 @@ void write_data_page(struct page *page, struct dnode_of_data *dn,
1228 f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); 1241 f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
1229 get_node_info(sbi, dn->nid, &ni); 1242 get_node_info(sbi, dn->nid, &ni);
1230 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 1243 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1231 1244 do_write_page(sbi, page, &sum, fio);
1232 do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio); 1245 dn->data_blkaddr = fio->blk_addr;
1233} 1246}
1234 1247
1235void rewrite_data_page(struct page *page, block_t old_blkaddr, 1248void rewrite_data_page(struct page *page, struct f2fs_io_info *fio)
1236 struct f2fs_io_info *fio)
1237{ 1249{
1238 f2fs_submit_page_mbio(F2FS_P_SB(page), page, old_blkaddr, fio); 1250 stat_inc_inplace_blocks(F2FS_P_SB(page));
1251 f2fs_submit_page_mbio(F2FS_P_SB(page), page, fio);
1239} 1252}
1240 1253
1241void recover_data_page(struct f2fs_sb_info *sbi, 1254void recover_data_page(struct f2fs_sb_info *sbi,
@@ -1393,7 +1406,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1393 segno = le32_to_cpu(ckpt->cur_data_segno[type]); 1406 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
1394 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type - 1407 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
1395 CURSEG_HOT_DATA]); 1408 CURSEG_HOT_DATA]);
1396 if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) 1409 if (__exist_node_summaries(sbi))
1397 blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type); 1410 blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
1398 else 1411 else
1399 blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type); 1412 blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
@@ -1402,7 +1415,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1402 CURSEG_HOT_NODE]); 1415 CURSEG_HOT_NODE]);
1403 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type - 1416 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
1404 CURSEG_HOT_NODE]); 1417 CURSEG_HOT_NODE]);
1405 if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) 1418 if (__exist_node_summaries(sbi))
1406 blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE, 1419 blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
1407 type - CURSEG_HOT_NODE); 1420 type - CURSEG_HOT_NODE);
1408 else 1421 else
@@ -1413,7 +1426,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1413 sum = (struct f2fs_summary_block *)page_address(new); 1426 sum = (struct f2fs_summary_block *)page_address(new);
1414 1427
1415 if (IS_NODESEG(type)) { 1428 if (IS_NODESEG(type)) {
1416 if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) { 1429 if (__exist_node_summaries(sbi)) {
1417 struct f2fs_summary *ns = &sum->entries[0]; 1430 struct f2fs_summary *ns = &sum->entries[0];
1418 int i; 1431 int i;
1419 for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { 1432 for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
@@ -1450,12 +1463,22 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1450 int err; 1463 int err;
1451 1464
1452 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { 1465 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
1466 int npages = npages_for_summary_flush(sbi, true);
1467
1468 if (npages >= 2)
1469 ra_meta_pages(sbi, start_sum_block(sbi), npages,
1470 META_CP);
1471
1453 /* restore for compacted data summary */ 1472 /* restore for compacted data summary */
1454 if (read_compacted_summaries(sbi)) 1473 if (read_compacted_summaries(sbi))
1455 return -EINVAL; 1474 return -EINVAL;
1456 type = CURSEG_HOT_NODE; 1475 type = CURSEG_HOT_NODE;
1457 } 1476 }
1458 1477
1478 if (__exist_node_summaries(sbi))
1479 ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
1480 NR_CURSEG_TYPE - type, META_CP);
1481
1459 for (; type <= CURSEG_COLD_NODE; type++) { 1482 for (; type <= CURSEG_COLD_NODE; type++) {
1460 err = read_normal_summaries(sbi, type); 1483 err = read_normal_summaries(sbi, type);
1461 if (err) 1484 if (err)
@@ -1549,8 +1572,7 @@ void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1549 1572
1550void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) 1573void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1551{ 1574{
1552 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) 1575 write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
1553 write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
1554} 1576}
1555 1577
1556int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, 1578int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
@@ -1754,7 +1776,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1754 se = get_seg_entry(sbi, segno); 1776 se = get_seg_entry(sbi, segno);
1755 1777
1756 /* add discard candidates */ 1778 /* add discard candidates */
1757 if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) { 1779 if (cpc->reason != CP_DISCARD) {
1758 cpc->trim_start = segno; 1780 cpc->trim_start = segno;
1759 add_discard_addrs(sbi, cpc); 1781 add_discard_addrs(sbi, cpc);
1760 } 1782 }
@@ -1833,6 +1855,10 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
1833 return -ENOMEM; 1855 return -ENOMEM;
1834 } 1856 }
1835 1857
1858 sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1859 if (!sit_i->tmp_map)
1860 return -ENOMEM;
1861
1836 if (sbi->segs_per_sec > 1) { 1862 if (sbi->segs_per_sec > 1) {
1837 sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) * 1863 sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) *
1838 sizeof(struct sec_entry)); 1864 sizeof(struct sec_entry));
@@ -1897,7 +1923,7 @@ static int build_free_segmap(struct f2fs_sb_info *sbi)
1897 free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi)); 1923 free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
1898 free_i->free_segments = 0; 1924 free_i->free_segments = 0;
1899 free_i->free_sections = 0; 1925 free_i->free_sections = 0;
1900 rwlock_init(&free_i->segmap_lock); 1926 spin_lock_init(&free_i->segmap_lock);
1901 return 0; 1927 return 0;
1902} 1928}
1903 1929
@@ -2110,6 +2136,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
2110 sm_info->nr_discards = 0; 2136 sm_info->nr_discards = 0;
2111 sm_info->max_discards = 0; 2137 sm_info->max_discards = 0;
2112 2138
2139 sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
2140
2113 INIT_LIST_HEAD(&sm_info->sit_entry_set); 2141 INIT_LIST_HEAD(&sm_info->sit_entry_set);
2114 2142
2115 if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { 2143 if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
@@ -2212,6 +2240,8 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
2212 kfree(sit_i->sentries[start].ckpt_valid_map); 2240 kfree(sit_i->sentries[start].ckpt_valid_map);
2213 } 2241 }
2214 } 2242 }
2243 kfree(sit_i->tmp_map);
2244
2215 vfree(sit_i->sentries); 2245 vfree(sit_i->sentries);
2216 vfree(sit_i->sec_entries); 2246 vfree(sit_i->sec_entries);
2217 kfree(sit_i->dirty_sentries_bitmap); 2247 kfree(sit_i->dirty_sentries_bitmap);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 7f327c0ba4e3..7fd35111cf62 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -189,6 +189,7 @@ struct sit_info {
189 char *sit_bitmap; /* SIT bitmap pointer */ 189 char *sit_bitmap; /* SIT bitmap pointer */
190 unsigned int bitmap_size; /* SIT bitmap size */ 190 unsigned int bitmap_size; /* SIT bitmap size */
191 191
192 unsigned long *tmp_map; /* bitmap for temporal use */
192 unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */ 193 unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */
193 unsigned int dirty_sentries; /* # of dirty sentries */ 194 unsigned int dirty_sentries; /* # of dirty sentries */
194 unsigned int sents_per_block; /* # of SIT entries per block */ 195 unsigned int sents_per_block; /* # of SIT entries per block */
@@ -207,7 +208,7 @@ struct free_segmap_info {
207 unsigned int start_segno; /* start segment number logically */ 208 unsigned int start_segno; /* start segment number logically */
208 unsigned int free_segments; /* # of free segments */ 209 unsigned int free_segments; /* # of free segments */
209 unsigned int free_sections; /* # of free sections */ 210 unsigned int free_sections; /* # of free sections */
210 rwlock_t segmap_lock; /* free segmap lock */ 211 spinlock_t segmap_lock; /* free segmap lock */
211 unsigned long *free_segmap; /* free segment bitmap */ 212 unsigned long *free_segmap; /* free segment bitmap */
212 unsigned long *free_secmap; /* free section bitmap */ 213 unsigned long *free_secmap; /* free section bitmap */
213}; 214};
@@ -318,9 +319,9 @@ static inline unsigned int find_next_inuse(struct free_segmap_info *free_i,
318 unsigned int max, unsigned int segno) 319 unsigned int max, unsigned int segno)
319{ 320{
320 unsigned int ret; 321 unsigned int ret;
321 read_lock(&free_i->segmap_lock); 322 spin_lock(&free_i->segmap_lock);
322 ret = find_next_bit(free_i->free_segmap, max, segno); 323 ret = find_next_bit(free_i->free_segmap, max, segno);
323 read_unlock(&free_i->segmap_lock); 324 spin_unlock(&free_i->segmap_lock);
324 return ret; 325 return ret;
325} 326}
326 327
@@ -331,7 +332,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
331 unsigned int start_segno = secno * sbi->segs_per_sec; 332 unsigned int start_segno = secno * sbi->segs_per_sec;
332 unsigned int next; 333 unsigned int next;
333 334
334 write_lock(&free_i->segmap_lock); 335 spin_lock(&free_i->segmap_lock);
335 clear_bit(segno, free_i->free_segmap); 336 clear_bit(segno, free_i->free_segmap);
336 free_i->free_segments++; 337 free_i->free_segments++;
337 338
@@ -340,7 +341,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
340 clear_bit(secno, free_i->free_secmap); 341 clear_bit(secno, free_i->free_secmap);
341 free_i->free_sections++; 342 free_i->free_sections++;
342 } 343 }
343 write_unlock(&free_i->segmap_lock); 344 spin_unlock(&free_i->segmap_lock);
344} 345}
345 346
346static inline void __set_inuse(struct f2fs_sb_info *sbi, 347static inline void __set_inuse(struct f2fs_sb_info *sbi,
@@ -362,7 +363,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
362 unsigned int start_segno = secno * sbi->segs_per_sec; 363 unsigned int start_segno = secno * sbi->segs_per_sec;
363 unsigned int next; 364 unsigned int next;
364 365
365 write_lock(&free_i->segmap_lock); 366 spin_lock(&free_i->segmap_lock);
366 if (test_and_clear_bit(segno, free_i->free_segmap)) { 367 if (test_and_clear_bit(segno, free_i->free_segmap)) {
367 free_i->free_segments++; 368 free_i->free_segments++;
368 369
@@ -373,7 +374,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
373 free_i->free_sections++; 374 free_i->free_sections++;
374 } 375 }
375 } 376 }
376 write_unlock(&free_i->segmap_lock); 377 spin_unlock(&free_i->segmap_lock);
377} 378}
378 379
379static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, 380static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi,
@@ -381,13 +382,13 @@ static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi,
381{ 382{
382 struct free_segmap_info *free_i = FREE_I(sbi); 383 struct free_segmap_info *free_i = FREE_I(sbi);
383 unsigned int secno = segno / sbi->segs_per_sec; 384 unsigned int secno = segno / sbi->segs_per_sec;
384 write_lock(&free_i->segmap_lock); 385 spin_lock(&free_i->segmap_lock);
385 if (!test_and_set_bit(segno, free_i->free_segmap)) { 386 if (!test_and_set_bit(segno, free_i->free_segmap)) {
386 free_i->free_segments--; 387 free_i->free_segments--;
387 if (!test_and_set_bit(secno, free_i->free_secmap)) 388 if (!test_and_set_bit(secno, free_i->free_secmap))
388 free_i->free_sections--; 389 free_i->free_sections--;
389 } 390 }
390 write_unlock(&free_i->segmap_lock); 391 spin_unlock(&free_i->segmap_lock);
391} 392}
392 393
393static inline void get_sit_bitmap(struct f2fs_sb_info *sbi, 394static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
@@ -460,7 +461,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed)
460 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); 461 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
461 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); 462 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
462 463
463 if (unlikely(sbi->por_doing)) 464 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
464 return false; 465 return false;
465 466
466 return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + 467 return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs +
@@ -599,13 +600,13 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
599static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) 600static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
600{ 601{
601 if (segno > TOTAL_SEGS(sbi) - 1) 602 if (segno > TOTAL_SEGS(sbi) - 1)
602 sbi->need_fsck = true; 603 set_sbi_flag(sbi, SBI_NEED_FSCK);
603} 604}
604 605
605static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) 606static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
606{ 607{
607 if (blk_addr < SEG0_BLKADDR(sbi) || blk_addr >= MAX_BLKADDR(sbi)) 608 if (blk_addr < SEG0_BLKADDR(sbi) || blk_addr >= MAX_BLKADDR(sbi))
608 sbi->need_fsck = true; 609 set_sbi_flag(sbi, SBI_NEED_FSCK);
609} 610}
610 611
611/* 612/*
@@ -616,11 +617,11 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
616{ 617{
617 /* check segment usage */ 618 /* check segment usage */
618 if (GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg) 619 if (GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg)
619 sbi->need_fsck = true; 620 set_sbi_flag(sbi, SBI_NEED_FSCK);
620 621
621 /* check boundary of a given segment number */ 622 /* check boundary of a given segment number */
622 if (segno > TOTAL_SEGS(sbi) - 1) 623 if (segno > TOTAL_SEGS(sbi) - 1)
623 sbi->need_fsck = true; 624 set_sbi_flag(sbi, SBI_NEED_FSCK);
624} 625}
625#endif 626#endif
626 627
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f71421d70475..f2fe666a6ea9 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -30,6 +30,7 @@
30#include "segment.h" 30#include "segment.h"
31#include "xattr.h" 31#include "xattr.h"
32#include "gc.h" 32#include "gc.h"
33#include "trace.h"
33 34
34#define CREATE_TRACE_POINTS 35#define CREATE_TRACE_POINTS
35#include <trace/events/f2fs.h> 36#include <trace/events/f2fs.h>
@@ -41,6 +42,7 @@ static struct kset *f2fs_kset;
41enum { 42enum {
42 Opt_gc_background, 43 Opt_gc_background,
43 Opt_disable_roll_forward, 44 Opt_disable_roll_forward,
45 Opt_norecovery,
44 Opt_discard, 46 Opt_discard,
45 Opt_noheap, 47 Opt_noheap,
46 Opt_user_xattr, 48 Opt_user_xattr,
@@ -61,6 +63,7 @@ enum {
61static match_table_t f2fs_tokens = { 63static match_table_t f2fs_tokens = {
62 {Opt_gc_background, "background_gc=%s"}, 64 {Opt_gc_background, "background_gc=%s"},
63 {Opt_disable_roll_forward, "disable_roll_forward"}, 65 {Opt_disable_roll_forward, "disable_roll_forward"},
66 {Opt_norecovery, "norecovery"},
64 {Opt_discard, "discard"}, 67 {Opt_discard, "discard"},
65 {Opt_noheap, "no_heap"}, 68 {Opt_noheap, "no_heap"},
66 {Opt_user_xattr, "user_xattr"}, 69 {Opt_user_xattr, "user_xattr"},
@@ -192,6 +195,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
192F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); 195F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
193F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); 196F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
194F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); 197F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
198F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
195F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); 199F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
196F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); 200F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
197F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); 201F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
@@ -207,6 +211,7 @@ static struct attribute *f2fs_attrs[] = {
207 ATTR_LIST(gc_idle), 211 ATTR_LIST(gc_idle),
208 ATTR_LIST(reclaim_segments), 212 ATTR_LIST(reclaim_segments),
209 ATTR_LIST(max_small_discards), 213 ATTR_LIST(max_small_discards),
214 ATTR_LIST(batched_trim_sections),
210 ATTR_LIST(ipu_policy), 215 ATTR_LIST(ipu_policy),
211 ATTR_LIST(min_ipu_util), 216 ATTR_LIST(min_ipu_util),
212 ATTR_LIST(min_fsync_blocks), 217 ATTR_LIST(min_fsync_blocks),
@@ -286,6 +291,12 @@ static int parse_options(struct super_block *sb, char *options)
286 case Opt_disable_roll_forward: 291 case Opt_disable_roll_forward:
287 set_opt(sbi, DISABLE_ROLL_FORWARD); 292 set_opt(sbi, DISABLE_ROLL_FORWARD);
288 break; 293 break;
294 case Opt_norecovery:
295 /* this option mounts f2fs with ro */
296 set_opt(sbi, DISABLE_ROLL_FORWARD);
297 if (!f2fs_readonly(sb))
298 return -EINVAL;
299 break;
289 case Opt_discard: 300 case Opt_discard:
290 set_opt(sbi, DISCARD); 301 set_opt(sbi, DISCARD);
291 break; 302 break;
@@ -446,8 +457,13 @@ static void f2fs_put_super(struct super_block *sb)
446 f2fs_destroy_stats(sbi); 457 f2fs_destroy_stats(sbi);
447 stop_gc_thread(sbi); 458 stop_gc_thread(sbi);
448 459
449 /* We don't need to do checkpoint when it's clean */ 460 /*
450 if (sbi->s_dirty) { 461 * We don't need to do checkpoint when superblock is clean.
462 * But, the previous checkpoint was not done by umount, it needs to do
463 * clean checkpoint again.
464 */
465 if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
466 !is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) {
451 struct cp_control cpc = { 467 struct cp_control cpc = {
452 .reason = CP_UMOUNT, 468 .reason = CP_UMOUNT,
453 }; 469 };
@@ -486,13 +502,15 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
486 if (sync) { 502 if (sync) {
487 struct cp_control cpc; 503 struct cp_control cpc;
488 504
489 cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC; 505 cpc.reason = __get_cp_reason(sbi);
506
490 mutex_lock(&sbi->gc_mutex); 507 mutex_lock(&sbi->gc_mutex);
491 write_checkpoint(sbi, &cpc); 508 write_checkpoint(sbi, &cpc);
492 mutex_unlock(&sbi->gc_mutex); 509 mutex_unlock(&sbi->gc_mutex);
493 } else { 510 } else {
494 f2fs_balance_fs(sbi); 511 f2fs_balance_fs(sbi);
495 } 512 }
513 f2fs_trace_ios(NULL, NULL, 1);
496 514
497 return 0; 515 return 0;
498} 516}
@@ -887,7 +905,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
887 atomic_set(&sbi->nr_pages[i], 0); 905 atomic_set(&sbi->nr_pages[i], 0);
888 906
889 sbi->dir_level = DEF_DIR_LEVEL; 907 sbi->dir_level = DEF_DIR_LEVEL;
890 sbi->need_fsck = false; 908 clear_sbi_flag(sbi, SBI_NEED_FSCK);
891} 909}
892 910
893/* 911/*
@@ -942,6 +960,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
942 struct inode *root; 960 struct inode *root;
943 long err = -EINVAL; 961 long err = -EINVAL;
944 bool retry = true; 962 bool retry = true;
963 char *options = NULL;
945 int i; 964 int i;
946 965
947try_onemore: 966try_onemore:
@@ -973,9 +992,15 @@ try_onemore:
973 set_opt(sbi, POSIX_ACL); 992 set_opt(sbi, POSIX_ACL);
974#endif 993#endif
975 /* parse mount options */ 994 /* parse mount options */
976 err = parse_options(sb, (char *)data); 995 options = kstrdup((const char *)data, GFP_KERNEL);
977 if (err) 996 if (data && !options) {
997 err = -ENOMEM;
978 goto free_sb_buf; 998 goto free_sb_buf;
999 }
1000
1001 err = parse_options(sb, options);
1002 if (err)
1003 goto free_options;
979 1004
980 sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize)); 1005 sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize));
981 sb->s_max_links = F2FS_LINK_MAX; 1006 sb->s_max_links = F2FS_LINK_MAX;
@@ -998,7 +1023,7 @@ try_onemore:
998 mutex_init(&sbi->writepages); 1023 mutex_init(&sbi->writepages);
999 mutex_init(&sbi->cp_mutex); 1024 mutex_init(&sbi->cp_mutex);
1000 init_rwsem(&sbi->node_write); 1025 init_rwsem(&sbi->node_write);
1001 sbi->por_doing = false; 1026 clear_sbi_flag(sbi, SBI_POR_DOING);
1002 spin_lock_init(&sbi->stat_lock); 1027 spin_lock_init(&sbi->stat_lock);
1003 1028
1004 init_rwsem(&sbi->read_io.io_rwsem); 1029 init_rwsem(&sbi->read_io.io_rwsem);
@@ -1019,7 +1044,7 @@ try_onemore:
1019 if (IS_ERR(sbi->meta_inode)) { 1044 if (IS_ERR(sbi->meta_inode)) {
1020 f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode"); 1045 f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
1021 err = PTR_ERR(sbi->meta_inode); 1046 err = PTR_ERR(sbi->meta_inode);
1022 goto free_sb_buf; 1047 goto free_options;
1023 } 1048 }
1024 1049
1025 err = get_valid_checkpoint(sbi); 1050 err = get_valid_checkpoint(sbi);
@@ -1122,10 +1147,19 @@ try_onemore:
1122 goto free_proc; 1147 goto free_proc;
1123 1148
1124 if (!retry) 1149 if (!retry)
1125 sbi->need_fsck = true; 1150 set_sbi_flag(sbi, SBI_NEED_FSCK);
1126 1151
1127 /* recover fsynced data */ 1152 /* recover fsynced data */
1128 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { 1153 if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
1154 /*
1155 * mount should be failed, when device has readonly mode, and
1156 * previous checkpoint was not done by clean system shutdown.
1157 */
1158 if (bdev_read_only(sb->s_bdev) &&
1159 !is_set_ckpt_flags(sbi->ckpt, CP_UMOUNT_FLAG)) {
1160 err = -EROFS;
1161 goto free_kobj;
1162 }
1129 err = recover_fsync_data(sbi); 1163 err = recover_fsync_data(sbi);
1130 if (err) { 1164 if (err) {
1131 f2fs_msg(sb, KERN_ERR, 1165 f2fs_msg(sb, KERN_ERR,
@@ -1144,6 +1178,7 @@ try_onemore:
1144 if (err) 1178 if (err)
1145 goto free_kobj; 1179 goto free_kobj;
1146 } 1180 }
1181 kfree(options);
1147 return 0; 1182 return 0;
1148 1183
1149free_kobj: 1184free_kobj:
@@ -1168,6 +1203,8 @@ free_cp:
1168free_meta_inode: 1203free_meta_inode:
1169 make_bad_inode(sbi->meta_inode); 1204 make_bad_inode(sbi->meta_inode);
1170 iput(sbi->meta_inode); 1205 iput(sbi->meta_inode);
1206free_options:
1207 kfree(options);
1171free_sb_buf: 1208free_sb_buf:
1172 brelse(raw_super_buf); 1209 brelse(raw_super_buf);
1173free_sbi: 1210free_sbi:
@@ -1188,11 +1225,18 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
1188 return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super); 1225 return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super);
1189} 1226}
1190 1227
1228static void kill_f2fs_super(struct super_block *sb)
1229{
1230 if (sb->s_root)
1231 set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE);
1232 kill_block_super(sb);
1233}
1234
1191static struct file_system_type f2fs_fs_type = { 1235static struct file_system_type f2fs_fs_type = {
1192 .owner = THIS_MODULE, 1236 .owner = THIS_MODULE,
1193 .name = "f2fs", 1237 .name = "f2fs",
1194 .mount = f2fs_mount, 1238 .mount = f2fs_mount,
1195 .kill_sb = kill_block_super, 1239 .kill_sb = kill_f2fs_super,
1196 .fs_flags = FS_REQUIRES_DEV, 1240 .fs_flags = FS_REQUIRES_DEV,
1197}; 1241};
1198MODULE_ALIAS_FS("f2fs"); 1242MODULE_ALIAS_FS("f2fs");
@@ -1220,6 +1264,8 @@ static int __init init_f2fs_fs(void)
1220{ 1264{
1221 int err; 1265 int err;
1222 1266
1267 f2fs_build_trace_ios();
1268
1223 err = init_inodecache(); 1269 err = init_inodecache();
1224 if (err) 1270 if (err)
1225 goto fail; 1271 goto fail;
@@ -1229,12 +1275,9 @@ static int __init init_f2fs_fs(void)
1229 err = create_segment_manager_caches(); 1275 err = create_segment_manager_caches();
1230 if (err) 1276 if (err)
1231 goto free_node_manager_caches; 1277 goto free_node_manager_caches;
1232 err = create_gc_caches();
1233 if (err)
1234 goto free_segment_manager_caches;
1235 err = create_checkpoint_caches(); 1278 err = create_checkpoint_caches();
1236 if (err) 1279 if (err)
1237 goto free_gc_caches; 1280 goto free_segment_manager_caches;
1238 f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); 1281 f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj);
1239 if (!f2fs_kset) { 1282 if (!f2fs_kset) {
1240 err = -ENOMEM; 1283 err = -ENOMEM;
@@ -1251,8 +1294,6 @@ free_kset:
1251 kset_unregister(f2fs_kset); 1294 kset_unregister(f2fs_kset);
1252free_checkpoint_caches: 1295free_checkpoint_caches:
1253 destroy_checkpoint_caches(); 1296 destroy_checkpoint_caches();
1254free_gc_caches:
1255 destroy_gc_caches();
1256free_segment_manager_caches: 1297free_segment_manager_caches:
1257 destroy_segment_manager_caches(); 1298 destroy_segment_manager_caches();
1258free_node_manager_caches: 1299free_node_manager_caches:
@@ -1269,11 +1310,11 @@ static void __exit exit_f2fs_fs(void)
1269 f2fs_destroy_root_stats(); 1310 f2fs_destroy_root_stats();
1270 unregister_filesystem(&f2fs_fs_type); 1311 unregister_filesystem(&f2fs_fs_type);
1271 destroy_checkpoint_caches(); 1312 destroy_checkpoint_caches();
1272 destroy_gc_caches();
1273 destroy_segment_manager_caches(); 1313 destroy_segment_manager_caches();
1274 destroy_node_manager_caches(); 1314 destroy_node_manager_caches();
1275 destroy_inodecache(); 1315 destroy_inodecache();
1276 kset_unregister(f2fs_kset); 1316 kset_unregister(f2fs_kset);
1317 f2fs_destroy_trace_ios();
1277} 1318}
1278 1319
1279module_init(init_f2fs_fs) 1320module_init(init_f2fs_fs)
diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c
new file mode 100644
index 000000000000..875aa8179bc1
--- /dev/null
+++ b/fs/f2fs/trace.c
@@ -0,0 +1,159 @@
1/*
2 * f2fs IO tracer
3 *
4 * Copyright (c) 2014 Motorola Mobility
5 * Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/sched.h>
14#include <linux/radix-tree.h>
15
16#include "f2fs.h"
17#include "trace.h"
18
19static RADIX_TREE(pids, GFP_ATOMIC);
20static spinlock_t pids_lock;
21static struct last_io_info last_io;
22
23static inline void __print_last_io(void)
24{
25 if (!last_io.len)
26 return;
27
28 trace_printk("%3x:%3x %4x %-16s %2x %5x %12x %4x\n",
29 last_io.major, last_io.minor,
30 last_io.pid, "----------------",
31 last_io.type,
32 last_io.fio.rw, last_io.fio.blk_addr,
33 last_io.len);
34 memset(&last_io, 0, sizeof(last_io));
35}
36
37static int __file_type(struct inode *inode, pid_t pid)
38{
39 if (f2fs_is_atomic_file(inode))
40 return __ATOMIC_FILE;
41 else if (f2fs_is_volatile_file(inode))
42 return __VOLATILE_FILE;
43 else if (S_ISDIR(inode->i_mode))
44 return __DIR_FILE;
45 else if (inode->i_ino == F2FS_NODE_INO(F2FS_I_SB(inode)))
46 return __NODE_FILE;
47 else if (inode->i_ino == F2FS_META_INO(F2FS_I_SB(inode)))
48 return __META_FILE;
49 else if (pid)
50 return __NORMAL_FILE;
51 else
52 return __MISC_FILE;
53}
54
55void f2fs_trace_pid(struct page *page)
56{
57 struct inode *inode = page->mapping->host;
58 pid_t pid = task_pid_nr(current);
59 void *p;
60
61 page->private = pid;
62
63 if (radix_tree_preload(GFP_NOFS))
64 return;
65
66 spin_lock(&pids_lock);
67 p = radix_tree_lookup(&pids, pid);
68 if (p == current)
69 goto out;
70 if (p)
71 radix_tree_delete(&pids, pid);
72
73 f2fs_radix_tree_insert(&pids, pid, current);
74
75 trace_printk("%3x:%3x %4x %-16s\n",
76 MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
77 pid, current->comm);
78out:
79 spin_unlock(&pids_lock);
80 radix_tree_preload_end();
81}
82
83void f2fs_trace_ios(struct page *page, struct f2fs_io_info *fio, int flush)
84{
85 struct inode *inode;
86 pid_t pid;
87 int major, minor;
88
89 if (flush) {
90 __print_last_io();
91 return;
92 }
93
94 inode = page->mapping->host;
95 pid = page_private(page);
96
97 major = MAJOR(inode->i_sb->s_dev);
98 minor = MINOR(inode->i_sb->s_dev);
99
100 if (last_io.major == major && last_io.minor == minor &&
101 last_io.pid == pid &&
102 last_io.type == __file_type(inode, pid) &&
103 last_io.fio.rw == fio->rw &&
104 last_io.fio.blk_addr + last_io.len == fio->blk_addr) {
105 last_io.len++;
106 return;
107 }
108
109 __print_last_io();
110
111 last_io.major = major;
112 last_io.minor = minor;
113 last_io.pid = pid;
114 last_io.type = __file_type(inode, pid);
115 last_io.fio = *fio;
116 last_io.len = 1;
117 return;
118}
119
120void f2fs_build_trace_ios(void)
121{
122 spin_lock_init(&pids_lock);
123}
124
125#define PIDVEC_SIZE 128
126static unsigned int gang_lookup_pids(pid_t *results, unsigned long first_index,
127 unsigned int max_items)
128{
129 struct radix_tree_iter iter;
130 void **slot;
131 unsigned int ret = 0;
132
133 if (unlikely(!max_items))
134 return 0;
135
136 radix_tree_for_each_slot(slot, &pids, &iter, first_index) {
137 results[ret] = iter.index;
138 if (++ret == PIDVEC_SIZE)
139 break;
140 }
141 return ret;
142}
143
144void f2fs_destroy_trace_ios(void)
145{
146 pid_t pid[PIDVEC_SIZE];
147 pid_t next_pid = 0;
148 unsigned int found;
149
150 spin_lock(&pids_lock);
151 while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) {
152 unsigned idx;
153
154 next_pid = pid[found - 1] + 1;
155 for (idx = 0; idx < found; idx++)
156 radix_tree_delete(&pids, pid[idx]);
157 }
158 spin_unlock(&pids_lock);
159}
diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h
new file mode 100644
index 000000000000..1041dbeb52ae
--- /dev/null
+++ b/fs/f2fs/trace.h
@@ -0,0 +1,46 @@
1/*
2 * f2fs IO tracer
3 *
4 * Copyright (c) 2014 Motorola Mobility
5 * Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#ifndef __F2FS_TRACE_H__
12#define __F2FS_TRACE_H__
13
14#ifdef CONFIG_F2FS_IO_TRACE
15#include <trace/events/f2fs.h>
16
17enum file_type {
18 __NORMAL_FILE,
19 __DIR_FILE,
20 __NODE_FILE,
21 __META_FILE,
22 __ATOMIC_FILE,
23 __VOLATILE_FILE,
24 __MISC_FILE,
25};
26
27struct last_io_info {
28 int major, minor;
29 pid_t pid;
30 enum file_type type;
31 struct f2fs_io_info fio;
32 block_t len;
33};
34
35extern void f2fs_trace_pid(struct page *);
36extern void f2fs_trace_ios(struct page *, struct f2fs_io_info *, int);
37extern void f2fs_build_trace_ios(void);
38extern void f2fs_destroy_trace_ios(void);
39#else
40#define f2fs_trace_pid(p)
41#define f2fs_trace_ios(p, i, n)
42#define f2fs_build_trace_ios()
43#define f2fs_destroy_trace_ios()
44
45#endif
46#endif /* __F2FS_TRACE_H__ */