diff options
author | Jaegeuk Kim <jaegeuk@kernel.org> | 2014-10-06 20:39:50 -0400 |
---|---|---|
committer | Jaegeuk Kim <jaegeuk@kernel.org> | 2014-10-06 20:39:50 -0400 |
commit | 88b88a66797159949cec32eaab12b4968f6fae2d (patch) | |
tree | da80cebeb9cc86d5f9f16718ce7921eef16ed30e | |
parent | 120c2cba1d76494a68e36a11eb630cb335ed1494 (diff) |
f2fs: support atomic writes
This patch introduces a very limited functionality for atomic write support.
In order to support atomic write, this patch adds two ioctls:
o F2FS_IOC_START_ATOMIC_WRITE
o F2FS_IOC_COMMIT_ATOMIC_WRITE
The database engine should be aware of the following sequence.
1. open
-> ioctl(F2FS_IOC_START_ATOMIC_WRITE);
2. writes
: all the written data will be treated as atomic pages.
3. commit
-> ioctl(F2FS_IOC_COMMIT_ATOMIC_WRITE);
: this flushes all the data blocks to the disk, which will be shown all or
nothing by f2fs recovery procedure.
4. repeat to #2.
The IO pattens should be:
,- START_ATOMIC_WRITE ,- COMMIT_ATOMIC_WRITE
CP | D D D D D D | FSYNC | D D D D | FSYNC ...
`- COMMIT_ATOMIC_WRITE
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
-rw-r--r-- | fs/f2fs/data.c | 5 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 21 | ||||
-rw-r--r-- | fs/f2fs/file.c | 39 | ||||
-rw-r--r-- | fs/f2fs/inline.c | 3 | ||||
-rw-r--r-- | fs/f2fs/inode.c | 4 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 63 | ||||
-rw-r--r-- | fs/f2fs/segment.h | 7 | ||||
-rw-r--r-- | fs/f2fs/super.c | 2 |
8 files changed, 139 insertions, 5 deletions
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 13ab72084913..8bbd60633f37 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c | |||
@@ -1052,7 +1052,10 @@ static int f2fs_write_end(struct file *file, | |||
1052 | 1052 | ||
1053 | trace_f2fs_write_end(inode, pos, len, copied); | 1053 | trace_f2fs_write_end(inode, pos, len, copied); |
1054 | 1054 | ||
1055 | set_page_dirty(page); | 1055 | if (f2fs_is_atomic_file(inode)) |
1056 | register_inmem_page(inode, page); | ||
1057 | else | ||
1058 | set_page_dirty(page); | ||
1056 | 1059 | ||
1057 | if (pos + copied > i_size_read(inode)) { | 1060 | if (pos + copied > i_size_read(inode)) { |
1058 | i_size_write(inode, pos + copied); | 1061 | i_size_write(inode, pos + copied); |
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a397f7ac9945..07fda632430b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h | |||
@@ -192,8 +192,12 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, | |||
192 | /* | 192 | /* |
193 | * ioctl commands | 193 | * ioctl commands |
194 | */ | 194 | */ |
195 | #define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS | 195 | #define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS |
196 | #define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS | 196 | #define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS |
197 | |||
198 | #define F2FS_IOCTL_MAGIC 0xf5 | ||
199 | #define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1) | ||
200 | #define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2) | ||
197 | 201 | ||
198 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) | 202 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) |
199 | /* | 203 | /* |
@@ -263,6 +267,9 @@ struct f2fs_inode_info { | |||
263 | unsigned long long xattr_ver; /* cp version of xattr modification */ | 267 | unsigned long long xattr_ver; /* cp version of xattr modification */ |
264 | struct extent_info ext; /* in-memory extent cache entry */ | 268 | struct extent_info ext; /* in-memory extent cache entry */ |
265 | struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ | 269 | struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ |
270 | |||
271 | struct list_head inmem_pages; /* inmemory pages managed by f2fs */ | ||
272 | struct mutex inmem_lock; /* lock for inmemory pages */ | ||
266 | }; | 273 | }; |
267 | 274 | ||
268 | static inline void get_extent_info(struct extent_info *ext, | 275 | static inline void get_extent_info(struct extent_info *ext, |
@@ -1051,7 +1058,8 @@ enum { | |||
1051 | FI_INLINE_DATA, /* used for inline data*/ | 1058 | FI_INLINE_DATA, /* used for inline data*/ |
1052 | FI_APPEND_WRITE, /* inode has appended data */ | 1059 | FI_APPEND_WRITE, /* inode has appended data */ |
1053 | FI_UPDATE_WRITE, /* inode has in-place-update data */ | 1060 | FI_UPDATE_WRITE, /* inode has in-place-update data */ |
1054 | FI_NEED_IPU, /* used fo ipu for fdatasync */ | 1061 | FI_NEED_IPU, /* used for ipu per file */ |
1062 | FI_ATOMIC_FILE, /* indicate atomic file */ | ||
1055 | }; | 1063 | }; |
1056 | 1064 | ||
1057 | static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) | 1065 | static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) |
@@ -1138,6 +1146,11 @@ static inline int f2fs_has_inline_data(struct inode *inode) | |||
1138 | return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); | 1146 | return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); |
1139 | } | 1147 | } |
1140 | 1148 | ||
1149 | static inline bool f2fs_is_atomic_file(struct inode *inode) | ||
1150 | { | ||
1151 | return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); | ||
1152 | } | ||
1153 | |||
1141 | static inline void *inline_data_addr(struct page *page) | 1154 | static inline void *inline_data_addr(struct page *page) |
1142 | { | 1155 | { |
1143 | struct f2fs_inode *ri = F2FS_INODE(page); | 1156 | struct f2fs_inode *ri = F2FS_INODE(page); |
@@ -1275,6 +1288,8 @@ void destroy_node_manager_caches(void); | |||
1275 | /* | 1288 | /* |
1276 | * segment.c | 1289 | * segment.c |
1277 | */ | 1290 | */ |
1291 | void register_inmem_page(struct inode *, struct page *); | ||
1292 | void commit_inmem_pages(struct inode *, bool); | ||
1278 | void f2fs_balance_fs(struct f2fs_sb_info *); | 1293 | void f2fs_balance_fs(struct f2fs_sb_info *); |
1279 | void f2fs_balance_fs_bg(struct f2fs_sb_info *); | 1294 | void f2fs_balance_fs_bg(struct f2fs_sb_info *); |
1280 | int f2fs_issue_flush(struct f2fs_sb_info *); | 1295 | int f2fs_issue_flush(struct f2fs_sb_info *); |
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 735e9a20f939..3708b80125d0 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c | |||
@@ -862,6 +862,41 @@ out: | |||
862 | return ret; | 862 | return ret; |
863 | } | 863 | } |
864 | 864 | ||
865 | static int f2fs_ioc_start_atomic_write(struct file *filp) | ||
866 | { | ||
867 | struct inode *inode = file_inode(filp); | ||
868 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
869 | |||
870 | if (!inode_owner_or_capable(inode)) | ||
871 | return -EACCES; | ||
872 | |||
873 | f2fs_balance_fs(sbi); | ||
874 | |||
875 | set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); | ||
876 | |||
877 | return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); | ||
878 | } | ||
879 | |||
880 | static int f2fs_ioc_commit_atomic_write(struct file *filp) | ||
881 | { | ||
882 | struct inode *inode = file_inode(filp); | ||
883 | int ret; | ||
884 | |||
885 | if (!inode_owner_or_capable(inode)) | ||
886 | return -EACCES; | ||
887 | |||
888 | ret = mnt_want_write_file(filp); | ||
889 | if (ret) | ||
890 | return ret; | ||
891 | |||
892 | if (f2fs_is_atomic_file(inode)) | ||
893 | commit_inmem_pages(inode, false); | ||
894 | |||
895 | ret = f2fs_sync_file(filp, 0, LONG_MAX, 0); | ||
896 | mnt_drop_write_file(filp); | ||
897 | return ret; | ||
898 | } | ||
899 | |||
865 | static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) | 900 | static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) |
866 | { | 901 | { |
867 | struct inode *inode = file_inode(filp); | 902 | struct inode *inode = file_inode(filp); |
@@ -899,6 +934,10 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
899 | return f2fs_ioc_getflags(filp, arg); | 934 | return f2fs_ioc_getflags(filp, arg); |
900 | case F2FS_IOC_SETFLAGS: | 935 | case F2FS_IOC_SETFLAGS: |
901 | return f2fs_ioc_setflags(filp, arg); | 936 | return f2fs_ioc_setflags(filp, arg); |
937 | case F2FS_IOC_START_ATOMIC_WRITE: | ||
938 | return f2fs_ioc_start_atomic_write(filp); | ||
939 | case F2FS_IOC_COMMIT_ATOMIC_WRITE: | ||
940 | return f2fs_ioc_commit_atomic_write(filp); | ||
902 | case FITRIM: | 941 | case FITRIM: |
903 | return f2fs_ioc_fitrim(filp, arg); | 942 | return f2fs_ioc_fitrim(filp, arg); |
904 | default: | 943 | default: |
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 6aef11d69e33..88036fd75797 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c | |||
@@ -21,6 +21,9 @@ bool f2fs_may_inline(struct inode *inode) | |||
21 | if (!test_opt(F2FS_I_SB(inode), INLINE_DATA)) | 21 | if (!test_opt(F2FS_I_SB(inode), INLINE_DATA)) |
22 | return false; | 22 | return false; |
23 | 23 | ||
24 | if (f2fs_is_atomic_file(inode)) | ||
25 | return false; | ||
26 | |||
24 | nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; | 27 | nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; |
25 | if (inode->i_blocks > nr_blocks) | 28 | if (inode->i_blocks > nr_blocks) |
26 | return false; | 29 | return false; |
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 63923eef1ffe..1b85f72d0071 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c | |||
@@ -269,6 +269,10 @@ void f2fs_evict_inode(struct inode *inode) | |||
269 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | 269 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); |
270 | nid_t xnid = F2FS_I(inode)->i_xattr_nid; | 270 | nid_t xnid = F2FS_I(inode)->i_xattr_nid; |
271 | 271 | ||
272 | /* some remained atomic pages should discarded */ | ||
273 | if (f2fs_is_atomic_file(inode)) | ||
274 | commit_inmem_pages(inode, true); | ||
275 | |||
272 | trace_f2fs_evict_inode(inode); | 276 | trace_f2fs_evict_inode(inode); |
273 | truncate_inode_pages_final(&inode->i_data); | 277 | truncate_inode_pages_final(&inode->i_data); |
274 | 278 | ||
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4d1c49a55e0c..923cb76fdc46 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c | |||
@@ -26,6 +26,7 @@ | |||
26 | 26 | ||
27 | static struct kmem_cache *discard_entry_slab; | 27 | static struct kmem_cache *discard_entry_slab; |
28 | static struct kmem_cache *sit_entry_set_slab; | 28 | static struct kmem_cache *sit_entry_set_slab; |
29 | static struct kmem_cache *inmem_entry_slab; | ||
29 | 30 | ||
30 | /* | 31 | /* |
31 | * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since | 32 | * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since |
@@ -173,6 +174,60 @@ found_middle: | |||
173 | return result + __reverse_ffz(tmp); | 174 | return result + __reverse_ffz(tmp); |
174 | } | 175 | } |
175 | 176 | ||
177 | void register_inmem_page(struct inode *inode, struct page *page) | ||
178 | { | ||
179 | struct f2fs_inode_info *fi = F2FS_I(inode); | ||
180 | struct inmem_pages *new; | ||
181 | |||
182 | new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); | ||
183 | |||
184 | /* add atomic page indices to the list */ | ||
185 | new->page = page; | ||
186 | INIT_LIST_HEAD(&new->list); | ||
187 | |||
188 | /* increase reference count with clean state */ | ||
189 | mutex_lock(&fi->inmem_lock); | ||
190 | get_page(page); | ||
191 | list_add_tail(&new->list, &fi->inmem_pages); | ||
192 | mutex_unlock(&fi->inmem_lock); | ||
193 | } | ||
194 | |||
195 | void commit_inmem_pages(struct inode *inode, bool abort) | ||
196 | { | ||
197 | struct f2fs_sb_info *sbi = F2FS_I_SB(inode); | ||
198 | struct f2fs_inode_info *fi = F2FS_I(inode); | ||
199 | struct inmem_pages *cur, *tmp; | ||
200 | bool submit_bio = false; | ||
201 | struct f2fs_io_info fio = { | ||
202 | .type = DATA, | ||
203 | .rw = WRITE_SYNC, | ||
204 | }; | ||
205 | |||
206 | f2fs_balance_fs(sbi); | ||
207 | f2fs_lock_op(sbi); | ||
208 | |||
209 | mutex_lock(&fi->inmem_lock); | ||
210 | list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) { | ||
211 | lock_page(cur->page); | ||
212 | if (!abort && cur->page->mapping == inode->i_mapping) { | ||
213 | f2fs_wait_on_page_writeback(cur->page, DATA); | ||
214 | if (clear_page_dirty_for_io(cur->page)) | ||
215 | inode_dec_dirty_pages(inode); | ||
216 | do_write_data_page(cur->page, &fio); | ||
217 | submit_bio = true; | ||
218 | } | ||
219 | f2fs_put_page(cur->page, 1); | ||
220 | list_del(&cur->list); | ||
221 | kmem_cache_free(inmem_entry_slab, cur); | ||
222 | } | ||
223 | if (submit_bio) | ||
224 | f2fs_submit_merged_bio(sbi, DATA, WRITE); | ||
225 | mutex_unlock(&fi->inmem_lock); | ||
226 | |||
227 | filemap_fdatawait_range(inode->i_mapping, 0, LLONG_MAX); | ||
228 | f2fs_unlock_op(sbi); | ||
229 | } | ||
230 | |||
176 | /* | 231 | /* |
177 | * This function balances dirty node and dentry pages. | 232 | * This function balances dirty node and dentry pages. |
178 | * In addition, it controls garbage collection. | 233 | * In addition, it controls garbage collection. |
@@ -2148,8 +2203,15 @@ int __init create_segment_manager_caches(void) | |||
2148 | sizeof(struct nat_entry_set)); | 2203 | sizeof(struct nat_entry_set)); |
2149 | if (!sit_entry_set_slab) | 2204 | if (!sit_entry_set_slab) |
2150 | goto destory_discard_entry; | 2205 | goto destory_discard_entry; |
2206 | |||
2207 | inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry", | ||
2208 | sizeof(struct inmem_pages)); | ||
2209 | if (!inmem_entry_slab) | ||
2210 | goto destroy_sit_entry_set; | ||
2151 | return 0; | 2211 | return 0; |
2152 | 2212 | ||
2213 | destroy_sit_entry_set: | ||
2214 | kmem_cache_destroy(sit_entry_set_slab); | ||
2153 | destory_discard_entry: | 2215 | destory_discard_entry: |
2154 | kmem_cache_destroy(discard_entry_slab); | 2216 | kmem_cache_destroy(discard_entry_slab); |
2155 | fail: | 2217 | fail: |
@@ -2160,4 +2222,5 @@ void destroy_segment_manager_caches(void) | |||
2160 | { | 2222 | { |
2161 | kmem_cache_destroy(sit_entry_set_slab); | 2223 | kmem_cache_destroy(sit_entry_set_slab); |
2162 | kmem_cache_destroy(discard_entry_slab); | 2224 | kmem_cache_destroy(discard_entry_slab); |
2225 | kmem_cache_destroy(inmem_entry_slab); | ||
2163 | } | 2226 | } |
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index afb73627a8ec..2495bec1c621 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h | |||
@@ -175,6 +175,11 @@ struct segment_allocation { | |||
175 | void (*allocate_segment)(struct f2fs_sb_info *, int, bool); | 175 | void (*allocate_segment)(struct f2fs_sb_info *, int, bool); |
176 | }; | 176 | }; |
177 | 177 | ||
178 | struct inmem_pages { | ||
179 | struct list_head list; | ||
180 | struct page *page; | ||
181 | }; | ||
182 | |||
178 | struct sit_info { | 183 | struct sit_info { |
179 | const struct segment_allocation *s_ops; | 184 | const struct segment_allocation *s_ops; |
180 | 185 | ||
@@ -504,7 +509,7 @@ static inline bool need_inplace_update(struct inode *inode) | |||
504 | unsigned int policy = SM_I(sbi)->ipu_policy; | 509 | unsigned int policy = SM_I(sbi)->ipu_policy; |
505 | 510 | ||
506 | /* IPU can be done only for the user data */ | 511 | /* IPU can be done only for the user data */ |
507 | if (S_ISDIR(inode->i_mode)) | 512 | if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) |
508 | return false; | 513 | return false; |
509 | 514 | ||
510 | if (policy & (0x1 << F2FS_IPU_FORCE)) | 515 | if (policy & (0x1 << F2FS_IPU_FORCE)) |
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bb6b568d6ad4..41d6f700f4ee 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c | |||
@@ -373,6 +373,8 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) | |||
373 | fi->i_advise = 0; | 373 | fi->i_advise = 0; |
374 | rwlock_init(&fi->ext.ext_lock); | 374 | rwlock_init(&fi->ext.ext_lock); |
375 | init_rwsem(&fi->i_sem); | 375 | init_rwsem(&fi->i_sem); |
376 | INIT_LIST_HEAD(&fi->inmem_pages); | ||
377 | mutex_init(&fi->inmem_lock); | ||
376 | 378 | ||
377 | set_inode_flag(fi, FI_NEW_INODE); | 379 | set_inode_flag(fi, FI_NEW_INODE); |
378 | 380 | ||