aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJaegeuk Kim <jaegeuk@kernel.org>2014-10-06 20:39:50 -0400
committerJaegeuk Kim <jaegeuk@kernel.org>2014-10-06 20:39:50 -0400
commit88b88a66797159949cec32eaab12b4968f6fae2d (patch)
treeda80cebeb9cc86d5f9f16718ce7921eef16ed30e
parent120c2cba1d76494a68e36a11eb630cb335ed1494 (diff)
f2fs: support atomic writes
This patch introduces a very limited functionality for atomic write support. In order to support atomic write, this patch adds two ioctls: o F2FS_IOC_START_ATOMIC_WRITE o F2FS_IOC_COMMIT_ATOMIC_WRITE The database engine should be aware of the following sequence. 1. open -> ioctl(F2FS_IOC_START_ATOMIC_WRITE); 2. writes : all the written data will be treated as atomic pages. 3. commit -> ioctl(F2FS_IOC_COMMIT_ATOMIC_WRITE); : this flushes all the data blocks to the disk, which will be shown all or nothing by f2fs recovery procedure. 4. repeat to #2. The IO pattens should be: ,- START_ATOMIC_WRITE ,- COMMIT_ATOMIC_WRITE CP | D D D D D D | FSYNC | D D D D | FSYNC ... `- COMMIT_ATOMIC_WRITE Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
-rw-r--r--fs/f2fs/data.c5
-rw-r--r--fs/f2fs/f2fs.h21
-rw-r--r--fs/f2fs/file.c39
-rw-r--r--fs/f2fs/inline.c3
-rw-r--r--fs/f2fs/inode.c4
-rw-r--r--fs/f2fs/segment.c63
-rw-r--r--fs/f2fs/segment.h7
-rw-r--r--fs/f2fs/super.c2
8 files changed, 139 insertions, 5 deletions
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 13ab72084913..8bbd60633f37 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1052,7 +1052,10 @@ static int f2fs_write_end(struct file *file,
1052 1052
1053 trace_f2fs_write_end(inode, pos, len, copied); 1053 trace_f2fs_write_end(inode, pos, len, copied);
1054 1054
1055 set_page_dirty(page); 1055 if (f2fs_is_atomic_file(inode))
1056 register_inmem_page(inode, page);
1057 else
1058 set_page_dirty(page);
1056 1059
1057 if (pos + copied > i_size_read(inode)) { 1060 if (pos + copied > i_size_read(inode)) {
1058 i_size_write(inode, pos + copied); 1061 i_size_write(inode, pos + copied);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index a397f7ac9945..07fda632430b 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -192,8 +192,12 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
192/* 192/*
193 * ioctl commands 193 * ioctl commands
194 */ 194 */
195#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS 195#define F2FS_IOC_GETFLAGS FS_IOC_GETFLAGS
196#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS 196#define F2FS_IOC_SETFLAGS FS_IOC_SETFLAGS
197
198#define F2FS_IOCTL_MAGIC 0xf5
199#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1)
200#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2)
197 201
198#if defined(__KERNEL__) && defined(CONFIG_COMPAT) 202#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
199/* 203/*
@@ -263,6 +267,9 @@ struct f2fs_inode_info {
263 unsigned long long xattr_ver; /* cp version of xattr modification */ 267 unsigned long long xattr_ver; /* cp version of xattr modification */
264 struct extent_info ext; /* in-memory extent cache entry */ 268 struct extent_info ext; /* in-memory extent cache entry */
265 struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ 269 struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */
270
271 struct list_head inmem_pages; /* inmemory pages managed by f2fs */
272 struct mutex inmem_lock; /* lock for inmemory pages */
266}; 273};
267 274
268static inline void get_extent_info(struct extent_info *ext, 275static inline void get_extent_info(struct extent_info *ext,
@@ -1051,7 +1058,8 @@ enum {
1051 FI_INLINE_DATA, /* used for inline data*/ 1058 FI_INLINE_DATA, /* used for inline data*/
1052 FI_APPEND_WRITE, /* inode has appended data */ 1059 FI_APPEND_WRITE, /* inode has appended data */
1053 FI_UPDATE_WRITE, /* inode has in-place-update data */ 1060 FI_UPDATE_WRITE, /* inode has in-place-update data */
1054 FI_NEED_IPU, /* used fo ipu for fdatasync */ 1061 FI_NEED_IPU, /* used for ipu per file */
1062 FI_ATOMIC_FILE, /* indicate atomic file */
1055}; 1063};
1056 1064
1057static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) 1065static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
@@ -1138,6 +1146,11 @@ static inline int f2fs_has_inline_data(struct inode *inode)
1138 return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); 1146 return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA);
1139} 1147}
1140 1148
1149static inline bool f2fs_is_atomic_file(struct inode *inode)
1150{
1151 return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE);
1152}
1153
1141static inline void *inline_data_addr(struct page *page) 1154static inline void *inline_data_addr(struct page *page)
1142{ 1155{
1143 struct f2fs_inode *ri = F2FS_INODE(page); 1156 struct f2fs_inode *ri = F2FS_INODE(page);
@@ -1275,6 +1288,8 @@ void destroy_node_manager_caches(void);
1275/* 1288/*
1276 * segment.c 1289 * segment.c
1277 */ 1290 */
1291void register_inmem_page(struct inode *, struct page *);
1292void commit_inmem_pages(struct inode *, bool);
1278void f2fs_balance_fs(struct f2fs_sb_info *); 1293void f2fs_balance_fs(struct f2fs_sb_info *);
1279void f2fs_balance_fs_bg(struct f2fs_sb_info *); 1294void f2fs_balance_fs_bg(struct f2fs_sb_info *);
1280int f2fs_issue_flush(struct f2fs_sb_info *); 1295int f2fs_issue_flush(struct f2fs_sb_info *);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 735e9a20f939..3708b80125d0 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -862,6 +862,41 @@ out:
862 return ret; 862 return ret;
863} 863}
864 864
865static int f2fs_ioc_start_atomic_write(struct file *filp)
866{
867 struct inode *inode = file_inode(filp);
868 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
869
870 if (!inode_owner_or_capable(inode))
871 return -EACCES;
872
873 f2fs_balance_fs(sbi);
874
875 set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
876
877 return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL);
878}
879
880static int f2fs_ioc_commit_atomic_write(struct file *filp)
881{
882 struct inode *inode = file_inode(filp);
883 int ret;
884
885 if (!inode_owner_or_capable(inode))
886 return -EACCES;
887
888 ret = mnt_want_write_file(filp);
889 if (ret)
890 return ret;
891
892 if (f2fs_is_atomic_file(inode))
893 commit_inmem_pages(inode, false);
894
895 ret = f2fs_sync_file(filp, 0, LONG_MAX, 0);
896 mnt_drop_write_file(filp);
897 return ret;
898}
899
865static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) 900static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
866{ 901{
867 struct inode *inode = file_inode(filp); 902 struct inode *inode = file_inode(filp);
@@ -899,6 +934,10 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
899 return f2fs_ioc_getflags(filp, arg); 934 return f2fs_ioc_getflags(filp, arg);
900 case F2FS_IOC_SETFLAGS: 935 case F2FS_IOC_SETFLAGS:
901 return f2fs_ioc_setflags(filp, arg); 936 return f2fs_ioc_setflags(filp, arg);
937 case F2FS_IOC_START_ATOMIC_WRITE:
938 return f2fs_ioc_start_atomic_write(filp);
939 case F2FS_IOC_COMMIT_ATOMIC_WRITE:
940 return f2fs_ioc_commit_atomic_write(filp);
902 case FITRIM: 941 case FITRIM:
903 return f2fs_ioc_fitrim(filp, arg); 942 return f2fs_ioc_fitrim(filp, arg);
904 default: 943 default:
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 6aef11d69e33..88036fd75797 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -21,6 +21,9 @@ bool f2fs_may_inline(struct inode *inode)
21 if (!test_opt(F2FS_I_SB(inode), INLINE_DATA)) 21 if (!test_opt(F2FS_I_SB(inode), INLINE_DATA))
22 return false; 22 return false;
23 23
24 if (f2fs_is_atomic_file(inode))
25 return false;
26
24 nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; 27 nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2;
25 if (inode->i_blocks > nr_blocks) 28 if (inode->i_blocks > nr_blocks)
26 return false; 29 return false;
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 63923eef1ffe..1b85f72d0071 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -269,6 +269,10 @@ void f2fs_evict_inode(struct inode *inode)
269 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 269 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
270 nid_t xnid = F2FS_I(inode)->i_xattr_nid; 270 nid_t xnid = F2FS_I(inode)->i_xattr_nid;
271 271
272 /* some remained atomic pages should discarded */
273 if (f2fs_is_atomic_file(inode))
274 commit_inmem_pages(inode, true);
275
272 trace_f2fs_evict_inode(inode); 276 trace_f2fs_evict_inode(inode);
273 truncate_inode_pages_final(&inode->i_data); 277 truncate_inode_pages_final(&inode->i_data);
274 278
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4d1c49a55e0c..923cb76fdc46 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -26,6 +26,7 @@
26 26
27static struct kmem_cache *discard_entry_slab; 27static struct kmem_cache *discard_entry_slab;
28static struct kmem_cache *sit_entry_set_slab; 28static struct kmem_cache *sit_entry_set_slab;
29static struct kmem_cache *inmem_entry_slab;
29 30
30/* 31/*
31 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since 32 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
@@ -173,6 +174,60 @@ found_middle:
173 return result + __reverse_ffz(tmp); 174 return result + __reverse_ffz(tmp);
174} 175}
175 176
177void register_inmem_page(struct inode *inode, struct page *page)
178{
179 struct f2fs_inode_info *fi = F2FS_I(inode);
180 struct inmem_pages *new;
181
182 new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
183
184 /* add atomic page indices to the list */
185 new->page = page;
186 INIT_LIST_HEAD(&new->list);
187
188 /* increase reference count with clean state */
189 mutex_lock(&fi->inmem_lock);
190 get_page(page);
191 list_add_tail(&new->list, &fi->inmem_pages);
192 mutex_unlock(&fi->inmem_lock);
193}
194
195void commit_inmem_pages(struct inode *inode, bool abort)
196{
197 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
198 struct f2fs_inode_info *fi = F2FS_I(inode);
199 struct inmem_pages *cur, *tmp;
200 bool submit_bio = false;
201 struct f2fs_io_info fio = {
202 .type = DATA,
203 .rw = WRITE_SYNC,
204 };
205
206 f2fs_balance_fs(sbi);
207 f2fs_lock_op(sbi);
208
209 mutex_lock(&fi->inmem_lock);
210 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
211 lock_page(cur->page);
212 if (!abort && cur->page->mapping == inode->i_mapping) {
213 f2fs_wait_on_page_writeback(cur->page, DATA);
214 if (clear_page_dirty_for_io(cur->page))
215 inode_dec_dirty_pages(inode);
216 do_write_data_page(cur->page, &fio);
217 submit_bio = true;
218 }
219 f2fs_put_page(cur->page, 1);
220 list_del(&cur->list);
221 kmem_cache_free(inmem_entry_slab, cur);
222 }
223 if (submit_bio)
224 f2fs_submit_merged_bio(sbi, DATA, WRITE);
225 mutex_unlock(&fi->inmem_lock);
226
227 filemap_fdatawait_range(inode->i_mapping, 0, LLONG_MAX);
228 f2fs_unlock_op(sbi);
229}
230
176/* 231/*
177 * This function balances dirty node and dentry pages. 232 * This function balances dirty node and dentry pages.
178 * In addition, it controls garbage collection. 233 * In addition, it controls garbage collection.
@@ -2148,8 +2203,15 @@ int __init create_segment_manager_caches(void)
2148 sizeof(struct nat_entry_set)); 2203 sizeof(struct nat_entry_set));
2149 if (!sit_entry_set_slab) 2204 if (!sit_entry_set_slab)
2150 goto destory_discard_entry; 2205 goto destory_discard_entry;
2206
2207 inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
2208 sizeof(struct inmem_pages));
2209 if (!inmem_entry_slab)
2210 goto destroy_sit_entry_set;
2151 return 0; 2211 return 0;
2152 2212
2213destroy_sit_entry_set:
2214 kmem_cache_destroy(sit_entry_set_slab);
2153destory_discard_entry: 2215destory_discard_entry:
2154 kmem_cache_destroy(discard_entry_slab); 2216 kmem_cache_destroy(discard_entry_slab);
2155fail: 2217fail:
@@ -2160,4 +2222,5 @@ void destroy_segment_manager_caches(void)
2160{ 2222{
2161 kmem_cache_destroy(sit_entry_set_slab); 2223 kmem_cache_destroy(sit_entry_set_slab);
2162 kmem_cache_destroy(discard_entry_slab); 2224 kmem_cache_destroy(discard_entry_slab);
2225 kmem_cache_destroy(inmem_entry_slab);
2163} 2226}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index afb73627a8ec..2495bec1c621 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -175,6 +175,11 @@ struct segment_allocation {
175 void (*allocate_segment)(struct f2fs_sb_info *, int, bool); 175 void (*allocate_segment)(struct f2fs_sb_info *, int, bool);
176}; 176};
177 177
178struct inmem_pages {
179 struct list_head list;
180 struct page *page;
181};
182
178struct sit_info { 183struct sit_info {
179 const struct segment_allocation *s_ops; 184 const struct segment_allocation *s_ops;
180 185
@@ -504,7 +509,7 @@ static inline bool need_inplace_update(struct inode *inode)
504 unsigned int policy = SM_I(sbi)->ipu_policy; 509 unsigned int policy = SM_I(sbi)->ipu_policy;
505 510
506 /* IPU can be done only for the user data */ 511 /* IPU can be done only for the user data */
507 if (S_ISDIR(inode->i_mode)) 512 if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode))
508 return false; 513 return false;
509 514
510 if (policy & (0x1 << F2FS_IPU_FORCE)) 515 if (policy & (0x1 << F2FS_IPU_FORCE))
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index bb6b568d6ad4..41d6f700f4ee 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -373,6 +373,8 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
373 fi->i_advise = 0; 373 fi->i_advise = 0;
374 rwlock_init(&fi->ext.ext_lock); 374 rwlock_init(&fi->ext.ext_lock);
375 init_rwsem(&fi->i_sem); 375 init_rwsem(&fi->i_sem);
376 INIT_LIST_HEAD(&fi->inmem_pages);
377 mutex_init(&fi->inmem_lock);
376 378
377 set_inode_flag(fi, FI_NEW_INODE); 379 set_inode_flag(fi, FI_NEW_INODE);
378 380