aboutsummaryrefslogtreecommitdiffstats
path: root/fs/f2fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/f2fs')
-rw-r--r--fs/f2fs/Kconfig53
-rw-r--r--fs/f2fs/Makefile7
-rw-r--r--fs/f2fs/acl.c414
-rw-r--r--fs/f2fs/acl.h57
-rw-r--r--fs/f2fs/checkpoint.c794
-rw-r--r--fs/f2fs/data.c702
-rw-r--r--fs/f2fs/debug.c361
-rw-r--r--fs/f2fs/dir.c672
-rw-r--r--fs/f2fs/f2fs.h1083
-rw-r--r--fs/f2fs/file.c636
-rw-r--r--fs/f2fs/gc.c742
-rw-r--r--fs/f2fs/gc.h117
-rw-r--r--fs/f2fs/hash.c97
-rw-r--r--fs/f2fs/inode.c268
-rw-r--r--fs/f2fs/namei.c503
-rw-r--r--fs/f2fs/node.c1764
-rw-r--r--fs/f2fs/node.h353
-rw-r--r--fs/f2fs/recovery.c375
-rw-r--r--fs/f2fs/segment.c1791
-rw-r--r--fs/f2fs/segment.h618
-rw-r--r--fs/f2fs/super.c657
-rw-r--r--fs/f2fs/xattr.c440
-rw-r--r--fs/f2fs/xattr.h145
23 files changed, 12649 insertions, 0 deletions
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
new file mode 100644
index 000000000000..fd27e7e6326e
--- /dev/null
+++ b/fs/f2fs/Kconfig
@@ -0,0 +1,53 @@
1config F2FS_FS
2 tristate "F2FS filesystem support (EXPERIMENTAL)"
3 depends on BLOCK
4 help
5 F2FS is based on Log-structured File System (LFS), which supports
6 versatile "flash-friendly" features. The design has been focused on
7 addressing the fundamental issues in LFS, which are snowball effect
8 of wandering tree and high cleaning overhead.
9
10 Since flash-based storages show different characteristics according to
11 the internal geometry or flash memory management schemes aka FTL, F2FS
12 and tools support various parameters not only for configuring on-disk
13 layout, but also for selecting allocation and cleaning algorithms.
14
15 If unsure, say N.
16
17config F2FS_STAT_FS
18 bool "F2FS Status Information"
19 depends on F2FS_FS && DEBUG_FS
20 default y
21 help
22 /sys/kernel/debug/f2fs/ contains information about all the partitions
23 mounted as f2fs. Each file shows the whole f2fs information.
24
25 /sys/kernel/debug/f2fs/status includes:
26 - major file system information managed by f2fs currently
27 - average SIT information about whole segments
28 - current memory footprint consumed by f2fs.
29
30config F2FS_FS_XATTR
31 bool "F2FS extended attributes"
32 depends on F2FS_FS
33 default y
34 help
35 Extended attributes are name:value pairs associated with inodes by
36 the kernel or by users (see the attr(5) manual page, or visit
37 <http://acl.bestbits.at/> for details).
38
39 If unsure, say N.
40
41config F2FS_FS_POSIX_ACL
42 bool "F2FS Access Control Lists"
43 depends on F2FS_FS_XATTR
44 select FS_POSIX_ACL
45 default y
46 help
47 Posix Access Control Lists (ACLs) support permissions for users and
48 gourps beyond the owner/group/world scheme.
49
50 To learn more about Access Control Lists, visit the POSIX ACLs for
51 Linux website <http://acl.bestbits.at/>.
52
53 If you don't know what Access Control Lists are, say N
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
new file mode 100644
index 000000000000..27a0820340b9
--- /dev/null
+++ b/fs/f2fs/Makefile
@@ -0,0 +1,7 @@
1obj-$(CONFIG_F2FS_FS) += f2fs.o
2
3f2fs-y := dir.o file.o inode.o namei.o hash.o super.o
4f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o
5f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
6f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
7f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
new file mode 100644
index 000000000000..fed74d193ffb
--- /dev/null
+++ b/fs/f2fs/acl.c
@@ -0,0 +1,414 @@
1/*
2 * fs/f2fs/acl.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * Portions of this code from linux/fs/ext2/acl.c
8 *
9 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 */
15#include <linux/f2fs_fs.h>
16#include "f2fs.h"
17#include "xattr.h"
18#include "acl.h"
19
20#define get_inode_mode(i) ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
21 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
22
23static inline size_t f2fs_acl_size(int count)
24{
25 if (count <= 4) {
26 return sizeof(struct f2fs_acl_header) +
27 count * sizeof(struct f2fs_acl_entry_short);
28 } else {
29 return sizeof(struct f2fs_acl_header) +
30 4 * sizeof(struct f2fs_acl_entry_short) +
31 (count - 4) * sizeof(struct f2fs_acl_entry);
32 }
33}
34
35static inline int f2fs_acl_count(size_t size)
36{
37 ssize_t s;
38 size -= sizeof(struct f2fs_acl_header);
39 s = size - 4 * sizeof(struct f2fs_acl_entry_short);
40 if (s < 0) {
41 if (size % sizeof(struct f2fs_acl_entry_short))
42 return -1;
43 return size / sizeof(struct f2fs_acl_entry_short);
44 } else {
45 if (s % sizeof(struct f2fs_acl_entry))
46 return -1;
47 return s / sizeof(struct f2fs_acl_entry) + 4;
48 }
49}
50
51static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size)
52{
53 int i, count;
54 struct posix_acl *acl;
55 struct f2fs_acl_header *hdr = (struct f2fs_acl_header *)value;
56 struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1);
57 const char *end = value + size;
58
59 if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION))
60 return ERR_PTR(-EINVAL);
61
62 count = f2fs_acl_count(size);
63 if (count < 0)
64 return ERR_PTR(-EINVAL);
65 if (count == 0)
66 return NULL;
67
68 acl = posix_acl_alloc(count, GFP_KERNEL);
69 if (!acl)
70 return ERR_PTR(-ENOMEM);
71
72 for (i = 0; i < count; i++) {
73
74 if ((char *)entry > end)
75 goto fail;
76
77 acl->a_entries[i].e_tag = le16_to_cpu(entry->e_tag);
78 acl->a_entries[i].e_perm = le16_to_cpu(entry->e_perm);
79
80 switch (acl->a_entries[i].e_tag) {
81 case ACL_USER_OBJ:
82 case ACL_GROUP_OBJ:
83 case ACL_MASK:
84 case ACL_OTHER:
85 acl->a_entries[i].e_id = ACL_UNDEFINED_ID;
86 entry = (struct f2fs_acl_entry *)((char *)entry +
87 sizeof(struct f2fs_acl_entry_short));
88 break;
89
90 case ACL_USER:
91 acl->a_entries[i].e_uid =
92 make_kuid(&init_user_ns,
93 le32_to_cpu(entry->e_id));
94 entry = (struct f2fs_acl_entry *)((char *)entry +
95 sizeof(struct f2fs_acl_entry));
96 break;
97 case ACL_GROUP:
98 acl->a_entries[i].e_gid =
99 make_kgid(&init_user_ns,
100 le32_to_cpu(entry->e_id));
101 entry = (struct f2fs_acl_entry *)((char *)entry +
102 sizeof(struct f2fs_acl_entry));
103 break;
104 default:
105 goto fail;
106 }
107 }
108 if ((char *)entry != end)
109 goto fail;
110 return acl;
111fail:
112 posix_acl_release(acl);
113 return ERR_PTR(-EINVAL);
114}
115
116static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size)
117{
118 struct f2fs_acl_header *f2fs_acl;
119 struct f2fs_acl_entry *entry;
120 int i;
121
122 f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count *
123 sizeof(struct f2fs_acl_entry), GFP_KERNEL);
124 if (!f2fs_acl)
125 return ERR_PTR(-ENOMEM);
126
127 f2fs_acl->a_version = cpu_to_le32(F2FS_ACL_VERSION);
128 entry = (struct f2fs_acl_entry *)(f2fs_acl + 1);
129
130 for (i = 0; i < acl->a_count; i++) {
131
132 entry->e_tag = cpu_to_le16(acl->a_entries[i].e_tag);
133 entry->e_perm = cpu_to_le16(acl->a_entries[i].e_perm);
134
135 switch (acl->a_entries[i].e_tag) {
136 case ACL_USER:
137 entry->e_id = cpu_to_le32(
138 from_kuid(&init_user_ns,
139 acl->a_entries[i].e_uid));
140 entry = (struct f2fs_acl_entry *)((char *)entry +
141 sizeof(struct f2fs_acl_entry));
142 break;
143 case ACL_GROUP:
144 entry->e_id = cpu_to_le32(
145 from_kgid(&init_user_ns,
146 acl->a_entries[i].e_gid));
147 entry = (struct f2fs_acl_entry *)((char *)entry +
148 sizeof(struct f2fs_acl_entry));
149 break;
150 case ACL_USER_OBJ:
151 case ACL_GROUP_OBJ:
152 case ACL_MASK:
153 case ACL_OTHER:
154 entry = (struct f2fs_acl_entry *)((char *)entry +
155 sizeof(struct f2fs_acl_entry_short));
156 break;
157 default:
158 goto fail;
159 }
160 }
161 *size = f2fs_acl_size(acl->a_count);
162 return (void *)f2fs_acl;
163
164fail:
165 kfree(f2fs_acl);
166 return ERR_PTR(-EINVAL);
167}
168
169struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
170{
171 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
172 int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT;
173 void *value = NULL;
174 struct posix_acl *acl;
175 int retval;
176
177 if (!test_opt(sbi, POSIX_ACL))
178 return NULL;
179
180 acl = get_cached_acl(inode, type);
181 if (acl != ACL_NOT_CACHED)
182 return acl;
183
184 if (type == ACL_TYPE_ACCESS)
185 name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
186
187 retval = f2fs_getxattr(inode, name_index, "", NULL, 0);
188 if (retval > 0) {
189 value = kmalloc(retval, GFP_KERNEL);
190 if (!value)
191 return ERR_PTR(-ENOMEM);
192 retval = f2fs_getxattr(inode, name_index, "", value, retval);
193 }
194
195 if (retval < 0) {
196 if (retval == -ENODATA)
197 acl = NULL;
198 else
199 acl = ERR_PTR(retval);
200 } else {
201 acl = f2fs_acl_from_disk(value, retval);
202 }
203 kfree(value);
204 if (!IS_ERR(acl))
205 set_cached_acl(inode, type, acl);
206
207 return acl;
208}
209
210static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
211{
212 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
213 struct f2fs_inode_info *fi = F2FS_I(inode);
214 int name_index;
215 void *value = NULL;
216 size_t size = 0;
217 int error;
218
219 if (!test_opt(sbi, POSIX_ACL))
220 return 0;
221 if (S_ISLNK(inode->i_mode))
222 return -EOPNOTSUPP;
223
224 switch (type) {
225 case ACL_TYPE_ACCESS:
226 name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
227 if (acl) {
228 error = posix_acl_equiv_mode(acl, &inode->i_mode);
229 if (error < 0)
230 return error;
231 set_acl_inode(fi, inode->i_mode);
232 if (error == 0)
233 acl = NULL;
234 }
235 break;
236
237 case ACL_TYPE_DEFAULT:
238 name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT;
239 if (!S_ISDIR(inode->i_mode))
240 return acl ? -EACCES : 0;
241 break;
242
243 default:
244 return -EINVAL;
245 }
246
247 if (acl) {
248 value = f2fs_acl_to_disk(acl, &size);
249 if (IS_ERR(value)) {
250 cond_clear_inode_flag(fi, FI_ACL_MODE);
251 return (int)PTR_ERR(value);
252 }
253 }
254
255 error = f2fs_setxattr(inode, name_index, "", value, size);
256
257 kfree(value);
258 if (!error)
259 set_cached_acl(inode, type, acl);
260
261 cond_clear_inode_flag(fi, FI_ACL_MODE);
262 return error;
263}
264
265int f2fs_init_acl(struct inode *inode, struct inode *dir)
266{
267 struct posix_acl *acl = NULL;
268 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
269 int error = 0;
270
271 if (!S_ISLNK(inode->i_mode)) {
272 if (test_opt(sbi, POSIX_ACL)) {
273 acl = f2fs_get_acl(dir, ACL_TYPE_DEFAULT);
274 if (IS_ERR(acl))
275 return PTR_ERR(acl);
276 }
277 if (!acl)
278 inode->i_mode &= ~current_umask();
279 }
280
281 if (test_opt(sbi, POSIX_ACL) && acl) {
282
283 if (S_ISDIR(inode->i_mode)) {
284 error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
285 if (error)
286 goto cleanup;
287 }
288 error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode);
289 if (error < 0)
290 return error;
291 if (error > 0)
292 error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl);
293 }
294cleanup:
295 posix_acl_release(acl);
296 return error;
297}
298
299int f2fs_acl_chmod(struct inode *inode)
300{
301 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
302 struct posix_acl *acl;
303 int error;
304 mode_t mode = get_inode_mode(inode);
305
306 if (!test_opt(sbi, POSIX_ACL))
307 return 0;
308 if (S_ISLNK(mode))
309 return -EOPNOTSUPP;
310
311 acl = f2fs_get_acl(inode, ACL_TYPE_ACCESS);
312 if (IS_ERR(acl) || !acl)
313 return PTR_ERR(acl);
314
315 error = posix_acl_chmod(&acl, GFP_KERNEL, mode);
316 if (error)
317 return error;
318 error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl);
319 posix_acl_release(acl);
320 return error;
321}
322
323static size_t f2fs_xattr_list_acl(struct dentry *dentry, char *list,
324 size_t list_size, const char *name, size_t name_len, int type)
325{
326 struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
327 const char *xname = POSIX_ACL_XATTR_DEFAULT;
328 size_t size;
329
330 if (!test_opt(sbi, POSIX_ACL))
331 return 0;
332
333 if (type == ACL_TYPE_ACCESS)
334 xname = POSIX_ACL_XATTR_ACCESS;
335
336 size = strlen(xname) + 1;
337 if (list && size <= list_size)
338 memcpy(list, xname, size);
339 return size;
340}
341
342static int f2fs_xattr_get_acl(struct dentry *dentry, const char *name,
343 void *buffer, size_t size, int type)
344{
345 struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
346 struct posix_acl *acl;
347 int error;
348
349 if (strcmp(name, "") != 0)
350 return -EINVAL;
351 if (!test_opt(sbi, POSIX_ACL))
352 return -EOPNOTSUPP;
353
354 acl = f2fs_get_acl(dentry->d_inode, type);
355 if (IS_ERR(acl))
356 return PTR_ERR(acl);
357 if (!acl)
358 return -ENODATA;
359 error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
360 posix_acl_release(acl);
361
362 return error;
363}
364
365static int f2fs_xattr_set_acl(struct dentry *dentry, const char *name,
366 const void *value, size_t size, int flags, int type)
367{
368 struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
369 struct inode *inode = dentry->d_inode;
370 struct posix_acl *acl = NULL;
371 int error;
372
373 if (strcmp(name, "") != 0)
374 return -EINVAL;
375 if (!test_opt(sbi, POSIX_ACL))
376 return -EOPNOTSUPP;
377 if (!inode_owner_or_capable(inode))
378 return -EPERM;
379
380 if (value) {
381 acl = posix_acl_from_xattr(&init_user_ns, value, size);
382 if (IS_ERR(acl))
383 return PTR_ERR(acl);
384 if (acl) {
385 error = posix_acl_valid(acl);
386 if (error)
387 goto release_and_out;
388 }
389 } else {
390 acl = NULL;
391 }
392
393 error = f2fs_set_acl(inode, type, acl);
394
395release_and_out:
396 posix_acl_release(acl);
397 return error;
398}
399
400const struct xattr_handler f2fs_xattr_acl_default_handler = {
401 .prefix = POSIX_ACL_XATTR_DEFAULT,
402 .flags = ACL_TYPE_DEFAULT,
403 .list = f2fs_xattr_list_acl,
404 .get = f2fs_xattr_get_acl,
405 .set = f2fs_xattr_set_acl,
406};
407
408const struct xattr_handler f2fs_xattr_acl_access_handler = {
409 .prefix = POSIX_ACL_XATTR_ACCESS,
410 .flags = ACL_TYPE_ACCESS,
411 .list = f2fs_xattr_list_acl,
412 .get = f2fs_xattr_get_acl,
413 .set = f2fs_xattr_set_acl,
414};
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
new file mode 100644
index 000000000000..80f430674417
--- /dev/null
+++ b/fs/f2fs/acl.h
@@ -0,0 +1,57 @@
1/*
2 * fs/f2fs/acl.h
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * Portions of this code from linux/fs/ext2/acl.h
8 *
9 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 */
15#ifndef __F2FS_ACL_H__
16#define __F2FS_ACL_H__
17
18#include <linux/posix_acl_xattr.h>
19
20#define F2FS_ACL_VERSION 0x0001
21
22struct f2fs_acl_entry {
23 __le16 e_tag;
24 __le16 e_perm;
25 __le32 e_id;
26};
27
28struct f2fs_acl_entry_short {
29 __le16 e_tag;
30 __le16 e_perm;
31};
32
33struct f2fs_acl_header {
34 __le32 a_version;
35};
36
37#ifdef CONFIG_F2FS_FS_POSIX_ACL
38
39extern struct posix_acl *f2fs_get_acl(struct inode *inode, int type);
40extern int f2fs_acl_chmod(struct inode *inode);
41extern int f2fs_init_acl(struct inode *inode, struct inode *dir);
42#else
43#define f2fs_check_acl NULL
44#define f2fs_get_acl NULL
45#define f2fs_set_acl NULL
46
47static inline int f2fs_acl_chmod(struct inode *inode)
48{
49 return 0;
50}
51
52static inline int f2fs_init_acl(struct inode *inode, struct inode *dir)
53{
54 return 0;
55}
56#endif
57#endif /* __F2FS_ACL_H__ */
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
new file mode 100644
index 000000000000..6ef36c37e2be
--- /dev/null
+++ b/fs/f2fs/checkpoint.c
@@ -0,0 +1,794 @@
1/*
2 * fs/f2fs/checkpoint.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/bio.h>
13#include <linux/mpage.h>
14#include <linux/writeback.h>
15#include <linux/blkdev.h>
16#include <linux/f2fs_fs.h>
17#include <linux/pagevec.h>
18#include <linux/swap.h>
19
20#include "f2fs.h"
21#include "node.h"
22#include "segment.h"
23
24static struct kmem_cache *orphan_entry_slab;
25static struct kmem_cache *inode_entry_slab;
26
27/*
28 * We guarantee no failure on the returned page.
29 */
30struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
31{
32 struct address_space *mapping = sbi->meta_inode->i_mapping;
33 struct page *page = NULL;
34repeat:
35 page = grab_cache_page(mapping, index);
36 if (!page) {
37 cond_resched();
38 goto repeat;
39 }
40
41 /* We wait writeback only inside grab_meta_page() */
42 wait_on_page_writeback(page);
43 SetPageUptodate(page);
44 return page;
45}
46
47/*
48 * We guarantee no failure on the returned page.
49 */
50struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
51{
52 struct address_space *mapping = sbi->meta_inode->i_mapping;
53 struct page *page;
54repeat:
55 page = grab_cache_page(mapping, index);
56 if (!page) {
57 cond_resched();
58 goto repeat;
59 }
60 if (f2fs_readpage(sbi, page, index, READ_SYNC)) {
61 f2fs_put_page(page, 1);
62 goto repeat;
63 }
64 mark_page_accessed(page);
65
66 /* We do not allow returning an errorneous page */
67 return page;
68}
69
70static int f2fs_write_meta_page(struct page *page,
71 struct writeback_control *wbc)
72{
73 struct inode *inode = page->mapping->host;
74 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
75 int err;
76
77 wait_on_page_writeback(page);
78
79 err = write_meta_page(sbi, page, wbc);
80 if (err) {
81 wbc->pages_skipped++;
82 set_page_dirty(page);
83 }
84
85 dec_page_count(sbi, F2FS_DIRTY_META);
86
87 /* In this case, we should not unlock this page */
88 if (err != AOP_WRITEPAGE_ACTIVATE)
89 unlock_page(page);
90 return err;
91}
92
93static int f2fs_write_meta_pages(struct address_space *mapping,
94 struct writeback_control *wbc)
95{
96 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
97 struct block_device *bdev = sbi->sb->s_bdev;
98 long written;
99
100 if (wbc->for_kupdate)
101 return 0;
102
103 if (get_pages(sbi, F2FS_DIRTY_META) == 0)
104 return 0;
105
106 /* if mounting is failed, skip writing node pages */
107 mutex_lock(&sbi->cp_mutex);
108 written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev));
109 mutex_unlock(&sbi->cp_mutex);
110 wbc->nr_to_write -= written;
111 return 0;
112}
113
114long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
115 long nr_to_write)
116{
117 struct address_space *mapping = sbi->meta_inode->i_mapping;
118 pgoff_t index = 0, end = LONG_MAX;
119 struct pagevec pvec;
120 long nwritten = 0;
121 struct writeback_control wbc = {
122 .for_reclaim = 0,
123 };
124
125 pagevec_init(&pvec, 0);
126
127 while (index <= end) {
128 int i, nr_pages;
129 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
130 PAGECACHE_TAG_DIRTY,
131 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
132 if (nr_pages == 0)
133 break;
134
135 for (i = 0; i < nr_pages; i++) {
136 struct page *page = pvec.pages[i];
137 lock_page(page);
138 BUG_ON(page->mapping != mapping);
139 BUG_ON(!PageDirty(page));
140 clear_page_dirty_for_io(page);
141 f2fs_write_meta_page(page, &wbc);
142 if (nwritten++ >= nr_to_write)
143 break;
144 }
145 pagevec_release(&pvec);
146 cond_resched();
147 }
148
149 if (nwritten)
150 f2fs_submit_bio(sbi, type, nr_to_write == LONG_MAX);
151
152 return nwritten;
153}
154
155static int f2fs_set_meta_page_dirty(struct page *page)
156{
157 struct address_space *mapping = page->mapping;
158 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
159
160 SetPageUptodate(page);
161 if (!PageDirty(page)) {
162 __set_page_dirty_nobuffers(page);
163 inc_page_count(sbi, F2FS_DIRTY_META);
164 F2FS_SET_SB_DIRT(sbi);
165 return 1;
166 }
167 return 0;
168}
169
170const struct address_space_operations f2fs_meta_aops = {
171 .writepage = f2fs_write_meta_page,
172 .writepages = f2fs_write_meta_pages,
173 .set_page_dirty = f2fs_set_meta_page_dirty,
174};
175
176int check_orphan_space(struct f2fs_sb_info *sbi)
177{
178 unsigned int max_orphans;
179 int err = 0;
180
181 /*
182 * considering 512 blocks in a segment 5 blocks are needed for cp
183 * and log segment summaries. Remaining blocks are used to keep
184 * orphan entries with the limitation one reserved segment
185 * for cp pack we can have max 1020*507 orphan entries
186 */
187 max_orphans = (sbi->blocks_per_seg - 5) * F2FS_ORPHANS_PER_BLOCK;
188 mutex_lock(&sbi->orphan_inode_mutex);
189 if (sbi->n_orphans >= max_orphans)
190 err = -ENOSPC;
191 mutex_unlock(&sbi->orphan_inode_mutex);
192 return err;
193}
194
195void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
196{
197 struct list_head *head, *this;
198 struct orphan_inode_entry *new = NULL, *orphan = NULL;
199
200 mutex_lock(&sbi->orphan_inode_mutex);
201 head = &sbi->orphan_inode_list;
202 list_for_each(this, head) {
203 orphan = list_entry(this, struct orphan_inode_entry, list);
204 if (orphan->ino == ino)
205 goto out;
206 if (orphan->ino > ino)
207 break;
208 orphan = NULL;
209 }
210retry:
211 new = kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
212 if (!new) {
213 cond_resched();
214 goto retry;
215 }
216 new->ino = ino;
217 INIT_LIST_HEAD(&new->list);
218
219 /* add new_oentry into list which is sorted by inode number */
220 if (orphan) {
221 struct orphan_inode_entry *prev;
222
223 /* get previous entry */
224 prev = list_entry(orphan->list.prev, typeof(*prev), list);
225 if (&prev->list != head)
226 /* insert new orphan inode entry */
227 list_add(&new->list, &prev->list);
228 else
229 list_add(&new->list, head);
230 } else {
231 list_add_tail(&new->list, head);
232 }
233 sbi->n_orphans++;
234out:
235 mutex_unlock(&sbi->orphan_inode_mutex);
236}
237
238void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
239{
240 struct list_head *this, *next, *head;
241 struct orphan_inode_entry *orphan;
242
243 mutex_lock(&sbi->orphan_inode_mutex);
244 head = &sbi->orphan_inode_list;
245 list_for_each_safe(this, next, head) {
246 orphan = list_entry(this, struct orphan_inode_entry, list);
247 if (orphan->ino == ino) {
248 list_del(&orphan->list);
249 kmem_cache_free(orphan_entry_slab, orphan);
250 sbi->n_orphans--;
251 break;
252 }
253 }
254 mutex_unlock(&sbi->orphan_inode_mutex);
255}
256
257static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
258{
259 struct inode *inode = f2fs_iget(sbi->sb, ino);
260 BUG_ON(IS_ERR(inode));
261 clear_nlink(inode);
262
263 /* truncate all the data during iput */
264 iput(inode);
265}
266
267int recover_orphan_inodes(struct f2fs_sb_info *sbi)
268{
269 block_t start_blk, orphan_blkaddr, i, j;
270
271 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
272 return 0;
273
274 sbi->por_doing = 1;
275 start_blk = __start_cp_addr(sbi) + 1;
276 orphan_blkaddr = __start_sum_addr(sbi) - 1;
277
278 for (i = 0; i < orphan_blkaddr; i++) {
279 struct page *page = get_meta_page(sbi, start_blk + i);
280 struct f2fs_orphan_block *orphan_blk;
281
282 orphan_blk = (struct f2fs_orphan_block *)page_address(page);
283 for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
284 nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
285 recover_orphan_inode(sbi, ino);
286 }
287 f2fs_put_page(page, 1);
288 }
289 /* clear Orphan Flag */
290 clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
291 sbi->por_doing = 0;
292 return 0;
293}
294
295static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
296{
297 struct list_head *head, *this, *next;
298 struct f2fs_orphan_block *orphan_blk = NULL;
299 struct page *page = NULL;
300 unsigned int nentries = 0;
301 unsigned short index = 1;
302 unsigned short orphan_blocks;
303
304 orphan_blocks = (unsigned short)((sbi->n_orphans +
305 (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
306
307 mutex_lock(&sbi->orphan_inode_mutex);
308 head = &sbi->orphan_inode_list;
309
310 /* loop for each orphan inode entry and write them in Jornal block */
311 list_for_each_safe(this, next, head) {
312 struct orphan_inode_entry *orphan;
313
314 orphan = list_entry(this, struct orphan_inode_entry, list);
315
316 if (nentries == F2FS_ORPHANS_PER_BLOCK) {
317 /*
318 * an orphan block is full of 1020 entries,
319 * then we need to flush current orphan blocks
320 * and bring another one in memory
321 */
322 orphan_blk->blk_addr = cpu_to_le16(index);
323 orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
324 orphan_blk->entry_count = cpu_to_le32(nentries);
325 set_page_dirty(page);
326 f2fs_put_page(page, 1);
327 index++;
328 start_blk++;
329 nentries = 0;
330 page = NULL;
331 }
332 if (page)
333 goto page_exist;
334
335 page = grab_meta_page(sbi, start_blk);
336 orphan_blk = (struct f2fs_orphan_block *)page_address(page);
337 memset(orphan_blk, 0, sizeof(*orphan_blk));
338page_exist:
339 orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
340 }
341 if (!page)
342 goto end;
343
344 orphan_blk->blk_addr = cpu_to_le16(index);
345 orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
346 orphan_blk->entry_count = cpu_to_le32(nentries);
347 set_page_dirty(page);
348 f2fs_put_page(page, 1);
349end:
350 mutex_unlock(&sbi->orphan_inode_mutex);
351}
352
353static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
354 block_t cp_addr, unsigned long long *version)
355{
356 struct page *cp_page_1, *cp_page_2 = NULL;
357 unsigned long blk_size = sbi->blocksize;
358 struct f2fs_checkpoint *cp_block;
359 unsigned long long cur_version = 0, pre_version = 0;
360 unsigned int crc = 0;
361 size_t crc_offset;
362
363 /* Read the 1st cp block in this CP pack */
364 cp_page_1 = get_meta_page(sbi, cp_addr);
365
366 /* get the version number */
367 cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1);
368 crc_offset = le32_to_cpu(cp_block->checksum_offset);
369 if (crc_offset >= blk_size)
370 goto invalid_cp1;
371
372 crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset);
373 if (!f2fs_crc_valid(crc, cp_block, crc_offset))
374 goto invalid_cp1;
375
376 pre_version = le64_to_cpu(cp_block->checkpoint_ver);
377
378 /* Read the 2nd cp block in this CP pack */
379 cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
380 cp_page_2 = get_meta_page(sbi, cp_addr);
381
382 cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2);
383 crc_offset = le32_to_cpu(cp_block->checksum_offset);
384 if (crc_offset >= blk_size)
385 goto invalid_cp2;
386
387 crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset);
388 if (!f2fs_crc_valid(crc, cp_block, crc_offset))
389 goto invalid_cp2;
390
391 cur_version = le64_to_cpu(cp_block->checkpoint_ver);
392
393 if (cur_version == pre_version) {
394 *version = cur_version;
395 f2fs_put_page(cp_page_2, 1);
396 return cp_page_1;
397 }
398invalid_cp2:
399 f2fs_put_page(cp_page_2, 1);
400invalid_cp1:
401 f2fs_put_page(cp_page_1, 1);
402 return NULL;
403}
404
405int get_valid_checkpoint(struct f2fs_sb_info *sbi)
406{
407 struct f2fs_checkpoint *cp_block;
408 struct f2fs_super_block *fsb = sbi->raw_super;
409 struct page *cp1, *cp2, *cur_page;
410 unsigned long blk_size = sbi->blocksize;
411 unsigned long long cp1_version = 0, cp2_version = 0;
412 unsigned long long cp_start_blk_no;
413
414 sbi->ckpt = kzalloc(blk_size, GFP_KERNEL);
415 if (!sbi->ckpt)
416 return -ENOMEM;
417 /*
418 * Finding out valid cp block involves read both
419 * sets( cp pack1 and cp pack 2)
420 */
421 cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
422 cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
423
424 /* The second checkpoint pack should start at the next segment */
425 cp_start_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
426 cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);
427
428 if (cp1 && cp2) {
429 if (ver_after(cp2_version, cp1_version))
430 cur_page = cp2;
431 else
432 cur_page = cp1;
433 } else if (cp1) {
434 cur_page = cp1;
435 } else if (cp2) {
436 cur_page = cp2;
437 } else {
438 goto fail_no_cp;
439 }
440
441 cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
442 memcpy(sbi->ckpt, cp_block, blk_size);
443
444 f2fs_put_page(cp1, 1);
445 f2fs_put_page(cp2, 1);
446 return 0;
447
448fail_no_cp:
449 kfree(sbi->ckpt);
450 return -EINVAL;
451}
452
453void set_dirty_dir_page(struct inode *inode, struct page *page)
454{
455 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
456 struct list_head *head = &sbi->dir_inode_list;
457 struct dir_inode_entry *new;
458 struct list_head *this;
459
460 if (!S_ISDIR(inode->i_mode))
461 return;
462retry:
463 new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
464 if (!new) {
465 cond_resched();
466 goto retry;
467 }
468 new->inode = inode;
469 INIT_LIST_HEAD(&new->list);
470
471 spin_lock(&sbi->dir_inode_lock);
472 list_for_each(this, head) {
473 struct dir_inode_entry *entry;
474 entry = list_entry(this, struct dir_inode_entry, list);
475 if (entry->inode == inode) {
476 kmem_cache_free(inode_entry_slab, new);
477 goto out;
478 }
479 }
480 list_add_tail(&new->list, head);
481 sbi->n_dirty_dirs++;
482
483 BUG_ON(!S_ISDIR(inode->i_mode));
484out:
485 inc_page_count(sbi, F2FS_DIRTY_DENTS);
486 inode_inc_dirty_dents(inode);
487 SetPagePrivate(page);
488
489 spin_unlock(&sbi->dir_inode_lock);
490}
491
492void remove_dirty_dir_inode(struct inode *inode)
493{
494 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
495 struct list_head *head = &sbi->dir_inode_list;
496 struct list_head *this;
497
498 if (!S_ISDIR(inode->i_mode))
499 return;
500
501 spin_lock(&sbi->dir_inode_lock);
502 if (atomic_read(&F2FS_I(inode)->dirty_dents))
503 goto out;
504
505 list_for_each(this, head) {
506 struct dir_inode_entry *entry;
507 entry = list_entry(this, struct dir_inode_entry, list);
508 if (entry->inode == inode) {
509 list_del(&entry->list);
510 kmem_cache_free(inode_entry_slab, entry);
511 sbi->n_dirty_dirs--;
512 break;
513 }
514 }
515out:
516 spin_unlock(&sbi->dir_inode_lock);
517}
518
519void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
520{
521 struct list_head *head = &sbi->dir_inode_list;
522 struct dir_inode_entry *entry;
523 struct inode *inode;
524retry:
525 spin_lock(&sbi->dir_inode_lock);
526 if (list_empty(head)) {
527 spin_unlock(&sbi->dir_inode_lock);
528 return;
529 }
530 entry = list_entry(head->next, struct dir_inode_entry, list);
531 inode = igrab(entry->inode);
532 spin_unlock(&sbi->dir_inode_lock);
533 if (inode) {
534 filemap_flush(inode->i_mapping);
535 iput(inode);
536 } else {
537 /*
538 * We should submit bio, since it exists several
539 * wribacking dentry pages in the freeing inode.
540 */
541 f2fs_submit_bio(sbi, DATA, true);
542 }
543 goto retry;
544}
545
546/*
547 * Freeze all the FS-operations for checkpoint.
548 */
549void block_operations(struct f2fs_sb_info *sbi)
550{
551 int t;
552 struct writeback_control wbc = {
553 .sync_mode = WB_SYNC_ALL,
554 .nr_to_write = LONG_MAX,
555 .for_reclaim = 0,
556 };
557
558 /* Stop renaming operation */
559 mutex_lock_op(sbi, RENAME);
560 mutex_lock_op(sbi, DENTRY_OPS);
561
562retry_dents:
563 /* write all the dirty dentry pages */
564 sync_dirty_dir_inodes(sbi);
565
566 mutex_lock_op(sbi, DATA_WRITE);
567 if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
568 mutex_unlock_op(sbi, DATA_WRITE);
569 goto retry_dents;
570 }
571
572 /* block all the operations */
573 for (t = DATA_NEW; t <= NODE_TRUNC; t++)
574 mutex_lock_op(sbi, t);
575
576 mutex_lock(&sbi->write_inode);
577
578 /*
579 * POR: we should ensure that there is no dirty node pages
580 * until finishing nat/sit flush.
581 */
582retry:
583 sync_node_pages(sbi, 0, &wbc);
584
585 mutex_lock_op(sbi, NODE_WRITE);
586
587 if (get_pages(sbi, F2FS_DIRTY_NODES)) {
588 mutex_unlock_op(sbi, NODE_WRITE);
589 goto retry;
590 }
591 mutex_unlock(&sbi->write_inode);
592}
593
594static void unblock_operations(struct f2fs_sb_info *sbi)
595{
596 int t;
597 for (t = NODE_WRITE; t >= RENAME; t--)
598 mutex_unlock_op(sbi, t);
599}
600
601static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
602{
603 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
604 nid_t last_nid = 0;
605 block_t start_blk;
606 struct page *cp_page;
607 unsigned int data_sum_blocks, orphan_blocks;
608 unsigned int crc32 = 0;
609 void *kaddr;
610 int i;
611
612 /* Flush all the NAT/SIT pages */
613 while (get_pages(sbi, F2FS_DIRTY_META))
614 sync_meta_pages(sbi, META, LONG_MAX);
615
616 next_free_nid(sbi, &last_nid);
617
618 /*
619 * modify checkpoint
620 * version number is already updated
621 */
622 ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
623 ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
624 ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
625 for (i = 0; i < 3; i++) {
626 ckpt->cur_node_segno[i] =
627 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
628 ckpt->cur_node_blkoff[i] =
629 cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
630 ckpt->alloc_type[i + CURSEG_HOT_NODE] =
631 curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
632 }
633 for (i = 0; i < 3; i++) {
634 ckpt->cur_data_segno[i] =
635 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
636 ckpt->cur_data_blkoff[i] =
637 cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
638 ckpt->alloc_type[i + CURSEG_HOT_DATA] =
639 curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
640 }
641
642 ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
643 ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
644 ckpt->next_free_nid = cpu_to_le32(last_nid);
645
646 /* 2 cp + n data seg summary + orphan inode blocks */
647 data_sum_blocks = npages_for_summary_flush(sbi);
648 if (data_sum_blocks < 3)
649 set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
650 else
651 clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
652
653 orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1)
654 / F2FS_ORPHANS_PER_BLOCK;
655 ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks);
656
657 if (is_umount) {
658 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
659 ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
660 data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE);
661 } else {
662 clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
663 ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
664 data_sum_blocks + orphan_blocks);
665 }
666
667 if (sbi->n_orphans)
668 set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
669 else
670 clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
671
672 /* update SIT/NAT bitmap */
673 get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
674 get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
675
676 crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset));
677 *(__le32 *)((unsigned char *)ckpt +
678 le32_to_cpu(ckpt->checksum_offset))
679 = cpu_to_le32(crc32);
680
681 start_blk = __start_cp_addr(sbi);
682
683 /* write out checkpoint buffer at block 0 */
684 cp_page = grab_meta_page(sbi, start_blk++);
685 kaddr = page_address(cp_page);
686 memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
687 set_page_dirty(cp_page);
688 f2fs_put_page(cp_page, 1);
689
690 if (sbi->n_orphans) {
691 write_orphan_inodes(sbi, start_blk);
692 start_blk += orphan_blocks;
693 }
694
695 write_data_summaries(sbi, start_blk);
696 start_blk += data_sum_blocks;
697 if (is_umount) {
698 write_node_summaries(sbi, start_blk);
699 start_blk += NR_CURSEG_NODE_TYPE;
700 }
701
702 /* writeout checkpoint block */
703 cp_page = grab_meta_page(sbi, start_blk);
704 kaddr = page_address(cp_page);
705 memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
706 set_page_dirty(cp_page);
707 f2fs_put_page(cp_page, 1);
708
709 /* wait for previous submitted node/meta pages writeback */
710 while (get_pages(sbi, F2FS_WRITEBACK))
711 congestion_wait(BLK_RW_ASYNC, HZ / 50);
712
713 filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX);
714 filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX);
715
716 /* update user_block_counts */
717 sbi->last_valid_block_count = sbi->total_valid_block_count;
718 sbi->alloc_valid_block_count = 0;
719
720 /* Here, we only have one bio having CP pack */
721 if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))
722 sbi->sb->s_flags |= MS_RDONLY;
723 else
724 sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
725
726 clear_prefree_segments(sbi);
727 F2FS_RESET_SB_DIRT(sbi);
728}
729
730/*
731 * We guarantee that this checkpoint procedure should not fail.
732 */
733void write_checkpoint(struct f2fs_sb_info *sbi, bool blocked, bool is_umount)
734{
735 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
736 unsigned long long ckpt_ver;
737
738 if (!blocked) {
739 mutex_lock(&sbi->cp_mutex);
740 block_operations(sbi);
741 }
742
743 f2fs_submit_bio(sbi, DATA, true);
744 f2fs_submit_bio(sbi, NODE, true);
745 f2fs_submit_bio(sbi, META, true);
746
747 /*
748 * update checkpoint pack index
749 * Increase the version number so that
750 * SIT entries and seg summaries are written at correct place
751 */
752 ckpt_ver = le64_to_cpu(ckpt->checkpoint_ver);
753 ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
754
755 /* write cached NAT/SIT entries to NAT/SIT area */
756 flush_nat_entries(sbi);
757 flush_sit_entries(sbi);
758
759 reset_victim_segmap(sbi);
760
761 /* unlock all the fs_lock[] in do_checkpoint() */
762 do_checkpoint(sbi, is_umount);
763
764 unblock_operations(sbi);
765 mutex_unlock(&sbi->cp_mutex);
766}
767
768void init_orphan_info(struct f2fs_sb_info *sbi)
769{
770 mutex_init(&sbi->orphan_inode_mutex);
771 INIT_LIST_HEAD(&sbi->orphan_inode_list);
772 sbi->n_orphans = 0;
773}
774
775int create_checkpoint_caches(void)
776{
777 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
778 sizeof(struct orphan_inode_entry), NULL);
779 if (unlikely(!orphan_entry_slab))
780 return -ENOMEM;
781 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
782 sizeof(struct dir_inode_entry), NULL);
783 if (unlikely(!inode_entry_slab)) {
784 kmem_cache_destroy(orphan_entry_slab);
785 return -ENOMEM;
786 }
787 return 0;
788}
789
790void destroy_checkpoint_caches(void)
791{
792 kmem_cache_destroy(orphan_entry_slab);
793 kmem_cache_destroy(inode_entry_slab);
794}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
new file mode 100644
index 000000000000..655aeabc1dd4
--- /dev/null
+++ b/fs/f2fs/data.c
@@ -0,0 +1,702 @@
1/*
2 * fs/f2fs/data.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/buffer_head.h>
14#include <linux/mpage.h>
15#include <linux/writeback.h>
16#include <linux/backing-dev.h>
17#include <linux/blkdev.h>
18#include <linux/bio.h>
19
20#include "f2fs.h"
21#include "node.h"
22#include "segment.h"
23
24/*
25 * Lock ordering for the change of data block address:
26 * ->data_page
27 * ->node_page
28 * update block addresses in the node page
29 */
30static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
31{
32 struct f2fs_node *rn;
33 __le32 *addr_array;
34 struct page *node_page = dn->node_page;
35 unsigned int ofs_in_node = dn->ofs_in_node;
36
37 wait_on_page_writeback(node_page);
38
39 rn = (struct f2fs_node *)page_address(node_page);
40
41 /* Get physical address of data block */
42 addr_array = blkaddr_in_node(rn);
43 addr_array[ofs_in_node] = cpu_to_le32(new_addr);
44 set_page_dirty(node_page);
45}
46
47int reserve_new_block(struct dnode_of_data *dn)
48{
49 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
50
51 if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))
52 return -EPERM;
53 if (!inc_valid_block_count(sbi, dn->inode, 1))
54 return -ENOSPC;
55
56 __set_data_blkaddr(dn, NEW_ADDR);
57 dn->data_blkaddr = NEW_ADDR;
58 sync_inode_page(dn);
59 return 0;
60}
61
62static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
63 struct buffer_head *bh_result)
64{
65 struct f2fs_inode_info *fi = F2FS_I(inode);
66 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
67 pgoff_t start_fofs, end_fofs;
68 block_t start_blkaddr;
69
70 read_lock(&fi->ext.ext_lock);
71 if (fi->ext.len == 0) {
72 read_unlock(&fi->ext.ext_lock);
73 return 0;
74 }
75
76 sbi->total_hit_ext++;
77 start_fofs = fi->ext.fofs;
78 end_fofs = fi->ext.fofs + fi->ext.len - 1;
79 start_blkaddr = fi->ext.blk_addr;
80
81 if (pgofs >= start_fofs && pgofs <= end_fofs) {
82 unsigned int blkbits = inode->i_sb->s_blocksize_bits;
83 size_t count;
84
85 clear_buffer_new(bh_result);
86 map_bh(bh_result, inode->i_sb,
87 start_blkaddr + pgofs - start_fofs);
88 count = end_fofs - pgofs + 1;
89 if (count < (UINT_MAX >> blkbits))
90 bh_result->b_size = (count << blkbits);
91 else
92 bh_result->b_size = UINT_MAX;
93
94 sbi->read_hit_ext++;
95 read_unlock(&fi->ext.ext_lock);
96 return 1;
97 }
98 read_unlock(&fi->ext.ext_lock);
99 return 0;
100}
101
102void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
103{
104 struct f2fs_inode_info *fi = F2FS_I(dn->inode);
105 pgoff_t fofs, start_fofs, end_fofs;
106 block_t start_blkaddr, end_blkaddr;
107
108 BUG_ON(blk_addr == NEW_ADDR);
109 fofs = start_bidx_of_node(ofs_of_node(dn->node_page)) + dn->ofs_in_node;
110
111 /* Update the page address in the parent node */
112 __set_data_blkaddr(dn, blk_addr);
113
114 write_lock(&fi->ext.ext_lock);
115
116 start_fofs = fi->ext.fofs;
117 end_fofs = fi->ext.fofs + fi->ext.len - 1;
118 start_blkaddr = fi->ext.blk_addr;
119 end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1;
120
121 /* Drop and initialize the matched extent */
122 if (fi->ext.len == 1 && fofs == start_fofs)
123 fi->ext.len = 0;
124
125 /* Initial extent */
126 if (fi->ext.len == 0) {
127 if (blk_addr != NULL_ADDR) {
128 fi->ext.fofs = fofs;
129 fi->ext.blk_addr = blk_addr;
130 fi->ext.len = 1;
131 }
132 goto end_update;
133 }
134
135 /* Frone merge */
136 if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) {
137 fi->ext.fofs--;
138 fi->ext.blk_addr--;
139 fi->ext.len++;
140 goto end_update;
141 }
142
143 /* Back merge */
144 if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) {
145 fi->ext.len++;
146 goto end_update;
147 }
148
149 /* Split the existing extent */
150 if (fi->ext.len > 1 &&
151 fofs >= start_fofs && fofs <= end_fofs) {
152 if ((end_fofs - fofs) < (fi->ext.len >> 1)) {
153 fi->ext.len = fofs - start_fofs;
154 } else {
155 fi->ext.fofs = fofs + 1;
156 fi->ext.blk_addr = start_blkaddr +
157 fofs - start_fofs + 1;
158 fi->ext.len -= fofs - start_fofs + 1;
159 }
160 goto end_update;
161 }
162 write_unlock(&fi->ext.ext_lock);
163 return;
164
165end_update:
166 write_unlock(&fi->ext.ext_lock);
167 sync_inode_page(dn);
168 return;
169}
170
171struct page *find_data_page(struct inode *inode, pgoff_t index)
172{
173 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
174 struct address_space *mapping = inode->i_mapping;
175 struct dnode_of_data dn;
176 struct page *page;
177 int err;
178
179 page = find_get_page(mapping, index);
180 if (page && PageUptodate(page))
181 return page;
182 f2fs_put_page(page, 0);
183
184 set_new_dnode(&dn, inode, NULL, NULL, 0);
185 err = get_dnode_of_data(&dn, index, RDONLY_NODE);
186 if (err)
187 return ERR_PTR(err);
188 f2fs_put_dnode(&dn);
189
190 if (dn.data_blkaddr == NULL_ADDR)
191 return ERR_PTR(-ENOENT);
192
193 /* By fallocate(), there is no cached page, but with NEW_ADDR */
194 if (dn.data_blkaddr == NEW_ADDR)
195 return ERR_PTR(-EINVAL);
196
197 page = grab_cache_page(mapping, index);
198 if (!page)
199 return ERR_PTR(-ENOMEM);
200
201 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
202 if (err) {
203 f2fs_put_page(page, 1);
204 return ERR_PTR(err);
205 }
206 unlock_page(page);
207 return page;
208}
209
210/*
211 * If it tries to access a hole, return an error.
212 * Because, the callers, functions in dir.c and GC, should be able to know
213 * whether this page exists or not.
214 */
215struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
216{
217 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
218 struct address_space *mapping = inode->i_mapping;
219 struct dnode_of_data dn;
220 struct page *page;
221 int err;
222
223 set_new_dnode(&dn, inode, NULL, NULL, 0);
224 err = get_dnode_of_data(&dn, index, RDONLY_NODE);
225 if (err)
226 return ERR_PTR(err);
227 f2fs_put_dnode(&dn);
228
229 if (dn.data_blkaddr == NULL_ADDR)
230 return ERR_PTR(-ENOENT);
231
232 page = grab_cache_page(mapping, index);
233 if (!page)
234 return ERR_PTR(-ENOMEM);
235
236 if (PageUptodate(page))
237 return page;
238
239 BUG_ON(dn.data_blkaddr == NEW_ADDR);
240 BUG_ON(dn.data_blkaddr == NULL_ADDR);
241
242 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
243 if (err) {
244 f2fs_put_page(page, 1);
245 return ERR_PTR(err);
246 }
247 return page;
248}
249
250/*
251 * Caller ensures that this data page is never allocated.
252 * A new zero-filled data page is allocated in the page cache.
253 */
254struct page *get_new_data_page(struct inode *inode, pgoff_t index,
255 bool new_i_size)
256{
257 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
258 struct address_space *mapping = inode->i_mapping;
259 struct page *page;
260 struct dnode_of_data dn;
261 int err;
262
263 set_new_dnode(&dn, inode, NULL, NULL, 0);
264 err = get_dnode_of_data(&dn, index, 0);
265 if (err)
266 return ERR_PTR(err);
267
268 if (dn.data_blkaddr == NULL_ADDR) {
269 if (reserve_new_block(&dn)) {
270 f2fs_put_dnode(&dn);
271 return ERR_PTR(-ENOSPC);
272 }
273 }
274 f2fs_put_dnode(&dn);
275
276 page = grab_cache_page(mapping, index);
277 if (!page)
278 return ERR_PTR(-ENOMEM);
279
280 if (PageUptodate(page))
281 return page;
282
283 if (dn.data_blkaddr == NEW_ADDR) {
284 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
285 } else {
286 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
287 if (err) {
288 f2fs_put_page(page, 1);
289 return ERR_PTR(err);
290 }
291 }
292 SetPageUptodate(page);
293
294 if (new_i_size &&
295 i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) {
296 i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));
297 mark_inode_dirty_sync(inode);
298 }
299 return page;
300}
301
302static void read_end_io(struct bio *bio, int err)
303{
304 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
305 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
306
307 do {
308 struct page *page = bvec->bv_page;
309
310 if (--bvec >= bio->bi_io_vec)
311 prefetchw(&bvec->bv_page->flags);
312
313 if (uptodate) {
314 SetPageUptodate(page);
315 } else {
316 ClearPageUptodate(page);
317 SetPageError(page);
318 }
319 unlock_page(page);
320 } while (bvec >= bio->bi_io_vec);
321 kfree(bio->bi_private);
322 bio_put(bio);
323}
324
325/*
326 * Fill the locked page with data located in the block address.
327 * Read operation is synchronous, and caller must unlock the page.
328 */
329int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page,
330 block_t blk_addr, int type)
331{
332 struct block_device *bdev = sbi->sb->s_bdev;
333 bool sync = (type == READ_SYNC);
334 struct bio *bio;
335
336 /* This page can be already read by other threads */
337 if (PageUptodate(page)) {
338 if (!sync)
339 unlock_page(page);
340 return 0;
341 }
342
343 down_read(&sbi->bio_sem);
344
345 /* Allocate a new bio */
346 bio = f2fs_bio_alloc(bdev, 1);
347
348 /* Initialize the bio */
349 bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
350 bio->bi_end_io = read_end_io;
351
352 if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
353 kfree(bio->bi_private);
354 bio_put(bio);
355 up_read(&sbi->bio_sem);
356 return -EFAULT;
357 }
358
359 submit_bio(type, bio);
360 up_read(&sbi->bio_sem);
361
362 /* wait for read completion if sync */
363 if (sync) {
364 lock_page(page);
365 if (PageError(page))
366 return -EIO;
367 }
368 return 0;
369}
370
371/*
372 * This function should be used by the data read flow only where it
373 * does not check the "create" flag that indicates block allocation.
374 * The reason for this special functionality is to exploit VFS readahead
375 * mechanism.
376 */
377static int get_data_block_ro(struct inode *inode, sector_t iblock,
378 struct buffer_head *bh_result, int create)
379{
380 unsigned int blkbits = inode->i_sb->s_blocksize_bits;
381 unsigned maxblocks = bh_result->b_size >> blkbits;
382 struct dnode_of_data dn;
383 pgoff_t pgofs;
384 int err;
385
386 /* Get the page offset from the block offset(iblock) */
387 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
388
389 if (check_extent_cache(inode, pgofs, bh_result))
390 return 0;
391
392 /* When reading holes, we need its node page */
393 set_new_dnode(&dn, inode, NULL, NULL, 0);
394 err = get_dnode_of_data(&dn, pgofs, RDONLY_NODE);
395 if (err)
396 return (err == -ENOENT) ? 0 : err;
397
398 /* It does not support data allocation */
399 BUG_ON(create);
400
401 if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) {
402 int i;
403 unsigned int end_offset;
404
405 end_offset = IS_INODE(dn.node_page) ?
406 ADDRS_PER_INODE :
407 ADDRS_PER_BLOCK;
408
409 clear_buffer_new(bh_result);
410
411 /* Give more consecutive addresses for the read ahead */
412 for (i = 0; i < end_offset - dn.ofs_in_node; i++)
413 if (((datablock_addr(dn.node_page,
414 dn.ofs_in_node + i))
415 != (dn.data_blkaddr + i)) || maxblocks == i)
416 break;
417 map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
418 bh_result->b_size = (i << blkbits);
419 }
420 f2fs_put_dnode(&dn);
421 return 0;
422}
423
424static int f2fs_read_data_page(struct file *file, struct page *page)
425{
426 return mpage_readpage(page, get_data_block_ro);
427}
428
429static int f2fs_read_data_pages(struct file *file,
430 struct address_space *mapping,
431 struct list_head *pages, unsigned nr_pages)
432{
433 return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro);
434}
435
436int do_write_data_page(struct page *page)
437{
438 struct inode *inode = page->mapping->host;
439 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
440 block_t old_blk_addr, new_blk_addr;
441 struct dnode_of_data dn;
442 int err = 0;
443
444 set_new_dnode(&dn, inode, NULL, NULL, 0);
445 err = get_dnode_of_data(&dn, page->index, RDONLY_NODE);
446 if (err)
447 return err;
448
449 old_blk_addr = dn.data_blkaddr;
450
451 /* This page is already truncated */
452 if (old_blk_addr == NULL_ADDR)
453 goto out_writepage;
454
455 set_page_writeback(page);
456
457 /*
458 * If current allocation needs SSR,
459 * it had better in-place writes for updated data.
460 */
461 if (old_blk_addr != NEW_ADDR && !is_cold_data(page) &&
462 need_inplace_update(inode)) {
463 rewrite_data_page(F2FS_SB(inode->i_sb), page,
464 old_blk_addr);
465 } else {
466 write_data_page(inode, page, &dn,
467 old_blk_addr, &new_blk_addr);
468 update_extent_cache(new_blk_addr, &dn);
469 F2FS_I(inode)->data_version =
470 le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver);
471 }
472out_writepage:
473 f2fs_put_dnode(&dn);
474 return err;
475}
476
477static int f2fs_write_data_page(struct page *page,
478 struct writeback_control *wbc)
479{
480 struct inode *inode = page->mapping->host;
481 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
482 loff_t i_size = i_size_read(inode);
483 const pgoff_t end_index = ((unsigned long long) i_size)
484 >> PAGE_CACHE_SHIFT;
485 unsigned offset;
486 int err = 0;
487
488 if (page->index < end_index)
489 goto out;
490
491 /*
492 * If the offset is out-of-range of file size,
493 * this page does not have to be written to disk.
494 */
495 offset = i_size & (PAGE_CACHE_SIZE - 1);
496 if ((page->index >= end_index + 1) || !offset) {
497 if (S_ISDIR(inode->i_mode)) {
498 dec_page_count(sbi, F2FS_DIRTY_DENTS);
499 inode_dec_dirty_dents(inode);
500 }
501 goto unlock_out;
502 }
503
504 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
505out:
506 if (sbi->por_doing)
507 goto redirty_out;
508
509 if (wbc->for_reclaim && !S_ISDIR(inode->i_mode) && !is_cold_data(page))
510 goto redirty_out;
511
512 mutex_lock_op(sbi, DATA_WRITE);
513 if (S_ISDIR(inode->i_mode)) {
514 dec_page_count(sbi, F2FS_DIRTY_DENTS);
515 inode_dec_dirty_dents(inode);
516 }
517 err = do_write_data_page(page);
518 if (err && err != -ENOENT) {
519 wbc->pages_skipped++;
520 set_page_dirty(page);
521 }
522 mutex_unlock_op(sbi, DATA_WRITE);
523
524 if (wbc->for_reclaim)
525 f2fs_submit_bio(sbi, DATA, true);
526
527 if (err == -ENOENT)
528 goto unlock_out;
529
530 clear_cold_data(page);
531 unlock_page(page);
532
533 if (!wbc->for_reclaim && !S_ISDIR(inode->i_mode))
534 f2fs_balance_fs(sbi);
535 return 0;
536
537unlock_out:
538 unlock_page(page);
539 return (err == -ENOENT) ? 0 : err;
540
541redirty_out:
542 wbc->pages_skipped++;
543 set_page_dirty(page);
544 return AOP_WRITEPAGE_ACTIVATE;
545}
546
547#define MAX_DESIRED_PAGES_WP 4096
548
549static int f2fs_write_data_pages(struct address_space *mapping,
550 struct writeback_control *wbc)
551{
552 struct inode *inode = mapping->host;
553 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
554 int ret;
555 long excess_nrtw = 0, desired_nrtw;
556
557 if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) {
558 desired_nrtw = MAX_DESIRED_PAGES_WP;
559 excess_nrtw = desired_nrtw - wbc->nr_to_write;
560 wbc->nr_to_write = desired_nrtw;
561 }
562
563 if (!S_ISDIR(inode->i_mode))
564 mutex_lock(&sbi->writepages);
565 ret = generic_writepages(mapping, wbc);
566 if (!S_ISDIR(inode->i_mode))
567 mutex_unlock(&sbi->writepages);
568 f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL));
569
570 remove_dirty_dir_inode(inode);
571
572 wbc->nr_to_write -= excess_nrtw;
573 return ret;
574}
575
576static int f2fs_write_begin(struct file *file, struct address_space *mapping,
577 loff_t pos, unsigned len, unsigned flags,
578 struct page **pagep, void **fsdata)
579{
580 struct inode *inode = mapping->host;
581 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
582 struct page *page;
583 pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
584 struct dnode_of_data dn;
585 int err = 0;
586
587 /* for nobh_write_end */
588 *fsdata = NULL;
589
590 f2fs_balance_fs(sbi);
591
592 page = grab_cache_page_write_begin(mapping, index, flags);
593 if (!page)
594 return -ENOMEM;
595 *pagep = page;
596
597 mutex_lock_op(sbi, DATA_NEW);
598
599 set_new_dnode(&dn, inode, NULL, NULL, 0);
600 err = get_dnode_of_data(&dn, index, 0);
601 if (err) {
602 mutex_unlock_op(sbi, DATA_NEW);
603 f2fs_put_page(page, 1);
604 return err;
605 }
606
607 if (dn.data_blkaddr == NULL_ADDR) {
608 err = reserve_new_block(&dn);
609 if (err) {
610 f2fs_put_dnode(&dn);
611 mutex_unlock_op(sbi, DATA_NEW);
612 f2fs_put_page(page, 1);
613 return err;
614 }
615 }
616 f2fs_put_dnode(&dn);
617
618 mutex_unlock_op(sbi, DATA_NEW);
619
620 if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
621 return 0;
622
623 if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
624 unsigned start = pos & (PAGE_CACHE_SIZE - 1);
625 unsigned end = start + len;
626
627 /* Reading beyond i_size is simple: memset to zero */
628 zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
629 return 0;
630 }
631
632 if (dn.data_blkaddr == NEW_ADDR) {
633 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
634 } else {
635 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
636 if (err) {
637 f2fs_put_page(page, 1);
638 return err;
639 }
640 }
641 SetPageUptodate(page);
642 clear_cold_data(page);
643 return 0;
644}
645
646static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
647 const struct iovec *iov, loff_t offset, unsigned long nr_segs)
648{
649 struct file *file = iocb->ki_filp;
650 struct inode *inode = file->f_mapping->host;
651
652 if (rw == WRITE)
653 return 0;
654
655 /* Needs synchronization with the cleaner */
656 return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
657 get_data_block_ro);
658}
659
660static void f2fs_invalidate_data_page(struct page *page, unsigned long offset)
661{
662 struct inode *inode = page->mapping->host;
663 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
664 if (S_ISDIR(inode->i_mode) && PageDirty(page)) {
665 dec_page_count(sbi, F2FS_DIRTY_DENTS);
666 inode_dec_dirty_dents(inode);
667 }
668 ClearPagePrivate(page);
669}
670
671static int f2fs_release_data_page(struct page *page, gfp_t wait)
672{
673 ClearPagePrivate(page);
674 return 0;
675}
676
677static int f2fs_set_data_page_dirty(struct page *page)
678{
679 struct address_space *mapping = page->mapping;
680 struct inode *inode = mapping->host;
681
682 SetPageUptodate(page);
683 if (!PageDirty(page)) {
684 __set_page_dirty_nobuffers(page);
685 set_dirty_dir_page(inode, page);
686 return 1;
687 }
688 return 0;
689}
690
691const struct address_space_operations f2fs_dblock_aops = {
692 .readpage = f2fs_read_data_page,
693 .readpages = f2fs_read_data_pages,
694 .writepage = f2fs_write_data_page,
695 .writepages = f2fs_write_data_pages,
696 .write_begin = f2fs_write_begin,
697 .write_end = nobh_write_end,
698 .set_page_dirty = f2fs_set_data_page_dirty,
699 .invalidatepage = f2fs_invalidate_data_page,
700 .releasepage = f2fs_release_data_page,
701 .direct_IO = f2fs_direct_IO,
702};
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
new file mode 100644
index 000000000000..0e0380a588ad
--- /dev/null
+++ b/fs/f2fs/debug.c
@@ -0,0 +1,361 @@
1/*
2 * f2fs debugging statistics
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 * Copyright (c) 2012 Linux Foundation
7 * Copyright (c) 2012 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include <linux/fs.h>
15#include <linux/backing-dev.h>
16#include <linux/proc_fs.h>
17#include <linux/f2fs_fs.h>
18#include <linux/blkdev.h>
19#include <linux/debugfs.h>
20#include <linux/seq_file.h>
21
22#include "f2fs.h"
23#include "node.h"
24#include "segment.h"
25#include "gc.h"
26
27static LIST_HEAD(f2fs_stat_list);
28static struct dentry *debugfs_root;
29
30static void update_general_status(struct f2fs_sb_info *sbi)
31{
32 struct f2fs_stat_info *si = sbi->stat_info;
33 int i;
34
35 /* valid check of the segment numbers */
36 si->hit_ext = sbi->read_hit_ext;
37 si->total_ext = sbi->total_hit_ext;
38 si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
39 si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
40 si->ndirty_dirs = sbi->n_dirty_dirs;
41 si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
42 si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
43 si->rsvd_segs = reserved_segments(sbi);
44 si->overp_segs = overprovision_segments(sbi);
45 si->valid_count = valid_user_blocks(sbi);
46 si->valid_node_count = valid_node_count(sbi);
47 si->valid_inode_count = valid_inode_count(sbi);
48 si->utilization = utilization(sbi);
49
50 si->free_segs = free_segments(sbi);
51 si->free_secs = free_sections(sbi);
52 si->prefree_count = prefree_segments(sbi);
53 si->dirty_count = dirty_segments(sbi);
54 si->node_pages = sbi->node_inode->i_mapping->nrpages;
55 si->meta_pages = sbi->meta_inode->i_mapping->nrpages;
56 si->nats = NM_I(sbi)->nat_cnt;
57 si->sits = SIT_I(sbi)->dirty_sentries;
58 si->fnids = NM_I(sbi)->fcnt;
59 si->bg_gc = sbi->bg_gc;
60 si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
61 * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
62 / 2;
63 si->util_valid = (int)(written_block_count(sbi) >>
64 sbi->log_blocks_per_seg)
65 * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
66 / 2;
67 si->util_invalid = 50 - si->util_free - si->util_valid;
68 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
69 struct curseg_info *curseg = CURSEG_I(sbi, i);
70 si->curseg[i] = curseg->segno;
71 si->cursec[i] = curseg->segno / sbi->segs_per_sec;
72 si->curzone[i] = si->cursec[i] / sbi->secs_per_zone;
73 }
74
75 for (i = 0; i < 2; i++) {
76 si->segment_count[i] = sbi->segment_count[i];
77 si->block_count[i] = sbi->block_count[i];
78 }
79}
80
81/*
82 * This function calculates BDF of every segments
83 */
84static void update_sit_info(struct f2fs_sb_info *sbi)
85{
86 struct f2fs_stat_info *si = sbi->stat_info;
87 unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist;
88 struct sit_info *sit_i = SIT_I(sbi);
89 unsigned int segno, vblocks;
90 int ndirty = 0;
91
92 bimodal = 0;
93 total_vblocks = 0;
94 blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg);
95 hblks_per_sec = blks_per_sec / 2;
96 mutex_lock(&sit_i->sentry_lock);
97 for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) {
98 vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
99 dist = abs(vblocks - hblks_per_sec);
100 bimodal += dist * dist;
101
102 if (vblocks > 0 && vblocks < blks_per_sec) {
103 total_vblocks += vblocks;
104 ndirty++;
105 }
106 }
107 mutex_unlock(&sit_i->sentry_lock);
108 dist = sbi->total_sections * hblks_per_sec * hblks_per_sec / 100;
109 si->bimodal = bimodal / dist;
110 if (si->dirty_count)
111 si->avg_vblocks = total_vblocks / ndirty;
112 else
113 si->avg_vblocks = 0;
114}
115
116/*
117 * This function calculates memory footprint.
118 */
119static void update_mem_info(struct f2fs_sb_info *sbi)
120{
121 struct f2fs_stat_info *si = sbi->stat_info;
122 unsigned npages;
123
124 if (si->base_mem)
125 goto get_cache;
126
127 si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize;
128 si->base_mem += 2 * sizeof(struct f2fs_inode_info);
129 si->base_mem += sizeof(*sbi->ckpt);
130
131 /* build sm */
132 si->base_mem += sizeof(struct f2fs_sm_info);
133
134 /* build sit */
135 si->base_mem += sizeof(struct sit_info);
136 si->base_mem += TOTAL_SEGS(sbi) * sizeof(struct seg_entry);
137 si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi));
138 si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * TOTAL_SEGS(sbi);
139 if (sbi->segs_per_sec > 1)
140 si->base_mem += sbi->total_sections *
141 sizeof(struct sec_entry);
142 si->base_mem += __bitmap_size(sbi, SIT_BITMAP);
143
144 /* build free segmap */
145 si->base_mem += sizeof(struct free_segmap_info);
146 si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi));
147 si->base_mem += f2fs_bitmap_size(sbi->total_sections);
148
149 /* build curseg */
150 si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE;
151 si->base_mem += PAGE_CACHE_SIZE * NR_CURSEG_TYPE;
152
153 /* build dirty segmap */
154 si->base_mem += sizeof(struct dirty_seglist_info);
155 si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi));
156 si->base_mem += 2 * f2fs_bitmap_size(TOTAL_SEGS(sbi));
157
158 /* buld nm */
159 si->base_mem += sizeof(struct f2fs_nm_info);
160 si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
161
162 /* build gc */
163 si->base_mem += sizeof(struct f2fs_gc_kthread);
164
165get_cache:
166 /* free nids */
167 si->cache_mem = NM_I(sbi)->fcnt;
168 si->cache_mem += NM_I(sbi)->nat_cnt;
169 npages = sbi->node_inode->i_mapping->nrpages;
170 si->cache_mem += npages << PAGE_CACHE_SHIFT;
171 npages = sbi->meta_inode->i_mapping->nrpages;
172 si->cache_mem += npages << PAGE_CACHE_SHIFT;
173 si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry);
174 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
175}
176
177static int stat_show(struct seq_file *s, void *v)
178{
179 struct f2fs_stat_info *si, *next;
180 int i = 0;
181 int j;
182
183 list_for_each_entry_safe(si, next, &f2fs_stat_list, stat_list) {
184
185 mutex_lock(&si->stat_lock);
186 if (!si->sbi) {
187 mutex_unlock(&si->stat_lock);
188 continue;
189 }
190 update_general_status(si->sbi);
191
192 seq_printf(s, "\n=====[ partition info. #%d ]=====\n", i++);
193 seq_printf(s, "[SB: 1] [CP: 2] [NAT: %d] [SIT: %d] ",
194 si->nat_area_segs, si->sit_area_segs);
195 seq_printf(s, "[SSA: %d] [MAIN: %d",
196 si->ssa_area_segs, si->main_area_segs);
197 seq_printf(s, "(OverProv:%d Resv:%d)]\n\n",
198 si->overp_segs, si->rsvd_segs);
199 seq_printf(s, "Utilization: %d%% (%d valid blocks)\n",
200 si->utilization, si->valid_count);
201 seq_printf(s, " - Node: %u (Inode: %u, ",
202 si->valid_node_count, si->valid_inode_count);
203 seq_printf(s, "Other: %u)\n - Data: %u\n",
204 si->valid_node_count - si->valid_inode_count,
205 si->valid_count - si->valid_node_count);
206 seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
207 si->main_area_segs, si->main_area_sections,
208 si->main_area_zones);
209 seq_printf(s, " - COLD data: %d, %d, %d\n",
210 si->curseg[CURSEG_COLD_DATA],
211 si->cursec[CURSEG_COLD_DATA],
212 si->curzone[CURSEG_COLD_DATA]);
213 seq_printf(s, " - WARM data: %d, %d, %d\n",
214 si->curseg[CURSEG_WARM_DATA],
215 si->cursec[CURSEG_WARM_DATA],
216 si->curzone[CURSEG_WARM_DATA]);
217 seq_printf(s, " - HOT data: %d, %d, %d\n",
218 si->curseg[CURSEG_HOT_DATA],
219 si->cursec[CURSEG_HOT_DATA],
220 si->curzone[CURSEG_HOT_DATA]);
221 seq_printf(s, " - Dir dnode: %d, %d, %d\n",
222 si->curseg[CURSEG_HOT_NODE],
223 si->cursec[CURSEG_HOT_NODE],
224 si->curzone[CURSEG_HOT_NODE]);
225 seq_printf(s, " - File dnode: %d, %d, %d\n",
226 si->curseg[CURSEG_WARM_NODE],
227 si->cursec[CURSEG_WARM_NODE],
228 si->curzone[CURSEG_WARM_NODE]);
229 seq_printf(s, " - Indir nodes: %d, %d, %d\n",
230 si->curseg[CURSEG_COLD_NODE],
231 si->cursec[CURSEG_COLD_NODE],
232 si->curzone[CURSEG_COLD_NODE]);
233 seq_printf(s, "\n - Valid: %d\n - Dirty: %d\n",
234 si->main_area_segs - si->dirty_count -
235 si->prefree_count - si->free_segs,
236 si->dirty_count);
237 seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n",
238 si->prefree_count, si->free_segs, si->free_secs);
239 seq_printf(s, "GC calls: %d (BG: %d)\n",
240 si->call_count, si->bg_gc);
241 seq_printf(s, " - data segments : %d\n", si->data_segs);
242 seq_printf(s, " - node segments : %d\n", si->node_segs);
243 seq_printf(s, "Try to move %d blocks\n", si->tot_blks);
244 seq_printf(s, " - data blocks : %d\n", si->data_blks);
245 seq_printf(s, " - node blocks : %d\n", si->node_blks);
246 seq_printf(s, "\nExtent Hit Ratio: %d / %d\n",
247 si->hit_ext, si->total_ext);
248 seq_printf(s, "\nBalancing F2FS Async:\n");
249 seq_printf(s, " - nodes %4d in %4d\n",
250 si->ndirty_node, si->node_pages);
251 seq_printf(s, " - dents %4d in dirs:%4d\n",
252 si->ndirty_dent, si->ndirty_dirs);
253 seq_printf(s, " - meta %4d in %4d\n",
254 si->ndirty_meta, si->meta_pages);
255 seq_printf(s, " - NATs %5d > %lu\n",
256 si->nats, NM_WOUT_THRESHOLD);
257 seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n",
258 si->sits, si->fnids);
259 seq_printf(s, "\nDistribution of User Blocks:");
260 seq_printf(s, " [ valid | invalid | free ]\n");
261 seq_printf(s, " [");
262
263 for (j = 0; j < si->util_valid; j++)
264 seq_printf(s, "-");
265 seq_printf(s, "|");
266
267 for (j = 0; j < si->util_invalid; j++)
268 seq_printf(s, "-");
269 seq_printf(s, "|");
270
271 for (j = 0; j < si->util_free; j++)
272 seq_printf(s, "-");
273 seq_printf(s, "]\n\n");
274 seq_printf(s, "SSR: %u blocks in %u segments\n",
275 si->block_count[SSR], si->segment_count[SSR]);
276 seq_printf(s, "LFS: %u blocks in %u segments\n",
277 si->block_count[LFS], si->segment_count[LFS]);
278
279 /* segment usage info */
280 update_sit_info(si->sbi);
281 seq_printf(s, "\nBDF: %u, avg. vblocks: %u\n",
282 si->bimodal, si->avg_vblocks);
283
284 /* memory footprint */
285 update_mem_info(si->sbi);
286 seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n",
287 (si->base_mem + si->cache_mem) >> 10,
288 si->base_mem >> 10, si->cache_mem >> 10);
289 mutex_unlock(&si->stat_lock);
290 }
291 return 0;
292}
293
294static int stat_open(struct inode *inode, struct file *file)
295{
296 return single_open(file, stat_show, inode->i_private);
297}
298
299static const struct file_operations stat_fops = {
300 .open = stat_open,
301 .read = seq_read,
302 .llseek = seq_lseek,
303 .release = single_release,
304};
305
306static int init_stats(struct f2fs_sb_info *sbi)
307{
308 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
309 struct f2fs_stat_info *si;
310
311 sbi->stat_info = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL);
312 if (!sbi->stat_info)
313 return -ENOMEM;
314
315 si = sbi->stat_info;
316 mutex_init(&si->stat_lock);
317 list_add_tail(&si->stat_list, &f2fs_stat_list);
318
319 si->all_area_segs = le32_to_cpu(raw_super->segment_count);
320 si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit);
321 si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat);
322 si->ssa_area_segs = le32_to_cpu(raw_super->segment_count_ssa);
323 si->main_area_segs = le32_to_cpu(raw_super->segment_count_main);
324 si->main_area_sections = le32_to_cpu(raw_super->section_count);
325 si->main_area_zones = si->main_area_sections /
326 le32_to_cpu(raw_super->secs_per_zone);
327 si->sbi = sbi;
328 return 0;
329}
330
331int f2fs_build_stats(struct f2fs_sb_info *sbi)
332{
333 int retval;
334
335 retval = init_stats(sbi);
336 if (retval)
337 return retval;
338
339 if (!debugfs_root)
340 debugfs_root = debugfs_create_dir("f2fs", NULL);
341
342 debugfs_create_file("status", S_IRUGO, debugfs_root, NULL, &stat_fops);
343 return 0;
344}
345
346void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
347{
348 struct f2fs_stat_info *si = sbi->stat_info;
349
350 list_del(&si->stat_list);
351 mutex_lock(&si->stat_lock);
352 si->sbi = NULL;
353 mutex_unlock(&si->stat_lock);
354 kfree(sbi->stat_info);
355}
356
357void destroy_root_stats(void)
358{
359 debugfs_remove_recursive(debugfs_root);
360 debugfs_root = NULL;
361}
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
new file mode 100644
index 000000000000..b4e24f32b54e
--- /dev/null
+++ b/fs/f2fs/dir.c
@@ -0,0 +1,672 @@
1/*
2 * fs/f2fs/dir.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include "f2fs.h"
14#include "acl.h"
15
16static unsigned long dir_blocks(struct inode *inode)
17{
18 return ((unsigned long long) (i_size_read(inode) + PAGE_CACHE_SIZE - 1))
19 >> PAGE_CACHE_SHIFT;
20}
21
22static unsigned int dir_buckets(unsigned int level)
23{
24 if (level < MAX_DIR_HASH_DEPTH / 2)
25 return 1 << level;
26 else
27 return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1);
28}
29
30static unsigned int bucket_blocks(unsigned int level)
31{
32 if (level < MAX_DIR_HASH_DEPTH / 2)
33 return 2;
34 else
35 return 4;
36}
37
38static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
39 [F2FS_FT_UNKNOWN] = DT_UNKNOWN,
40 [F2FS_FT_REG_FILE] = DT_REG,
41 [F2FS_FT_DIR] = DT_DIR,
42 [F2FS_FT_CHRDEV] = DT_CHR,
43 [F2FS_FT_BLKDEV] = DT_BLK,
44 [F2FS_FT_FIFO] = DT_FIFO,
45 [F2FS_FT_SOCK] = DT_SOCK,
46 [F2FS_FT_SYMLINK] = DT_LNK,
47};
48
49#define S_SHIFT 12
50static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
51 [S_IFREG >> S_SHIFT] = F2FS_FT_REG_FILE,
52 [S_IFDIR >> S_SHIFT] = F2FS_FT_DIR,
53 [S_IFCHR >> S_SHIFT] = F2FS_FT_CHRDEV,
54 [S_IFBLK >> S_SHIFT] = F2FS_FT_BLKDEV,
55 [S_IFIFO >> S_SHIFT] = F2FS_FT_FIFO,
56 [S_IFSOCK >> S_SHIFT] = F2FS_FT_SOCK,
57 [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK,
58};
59
60static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
61{
62 mode_t mode = inode->i_mode;
63 de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
64}
65
66static unsigned long dir_block_index(unsigned int level, unsigned int idx)
67{
68 unsigned long i;
69 unsigned long bidx = 0;
70
71 for (i = 0; i < level; i++)
72 bidx += dir_buckets(i) * bucket_blocks(i);
73 bidx += idx * bucket_blocks(level);
74 return bidx;
75}
76
77static bool early_match_name(const char *name, int namelen,
78 f2fs_hash_t namehash, struct f2fs_dir_entry *de)
79{
80 if (le16_to_cpu(de->name_len) != namelen)
81 return false;
82
83 if (de->hash_code != namehash)
84 return false;
85
86 return true;
87}
88
89static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
90 const char *name, int namelen, int *max_slots,
91 f2fs_hash_t namehash, struct page **res_page)
92{
93 struct f2fs_dir_entry *de;
94 unsigned long bit_pos, end_pos, next_pos;
95 struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
96 int slots;
97
98 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
99 NR_DENTRY_IN_BLOCK, 0);
100 while (bit_pos < NR_DENTRY_IN_BLOCK) {
101 de = &dentry_blk->dentry[bit_pos];
102 slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
103
104 if (early_match_name(name, namelen, namehash, de)) {
105 if (!memcmp(dentry_blk->filename[bit_pos],
106 name, namelen)) {
107 *res_page = dentry_page;
108 goto found;
109 }
110 }
111 next_pos = bit_pos + slots;
112 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
113 NR_DENTRY_IN_BLOCK, next_pos);
114 if (bit_pos >= NR_DENTRY_IN_BLOCK)
115 end_pos = NR_DENTRY_IN_BLOCK;
116 else
117 end_pos = bit_pos;
118 if (*max_slots < end_pos - next_pos)
119 *max_slots = end_pos - next_pos;
120 }
121
122 de = NULL;
123 kunmap(dentry_page);
124found:
125 return de;
126}
127
128static struct f2fs_dir_entry *find_in_level(struct inode *dir,
129 unsigned int level, const char *name, int namelen,
130 f2fs_hash_t namehash, struct page **res_page)
131{
132 int s = GET_DENTRY_SLOTS(namelen);
133 unsigned int nbucket, nblock;
134 unsigned int bidx, end_block;
135 struct page *dentry_page;
136 struct f2fs_dir_entry *de = NULL;
137 bool room = false;
138 int max_slots = 0;
139
140 BUG_ON(level > MAX_DIR_HASH_DEPTH);
141
142 nbucket = dir_buckets(level);
143 nblock = bucket_blocks(level);
144
145 bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket);
146 end_block = bidx + nblock;
147
148 for (; bidx < end_block; bidx++) {
149 /* no need to allocate new dentry pages to all the indices */
150 dentry_page = find_data_page(dir, bidx);
151 if (IS_ERR(dentry_page)) {
152 room = true;
153 continue;
154 }
155
156 de = find_in_block(dentry_page, name, namelen,
157 &max_slots, namehash, res_page);
158 if (de)
159 break;
160
161 if (max_slots >= s)
162 room = true;
163 f2fs_put_page(dentry_page, 0);
164 }
165
166 if (!de && room && F2FS_I(dir)->chash != namehash) {
167 F2FS_I(dir)->chash = namehash;
168 F2FS_I(dir)->clevel = level;
169 }
170
171 return de;
172}
173
174/*
175 * Find an entry in the specified directory with the wanted name.
176 * It returns the page where the entry was found (as a parameter - res_page),
177 * and the entry itself. Page is returned mapped and unlocked.
178 * Entry is guaranteed to be valid.
179 */
180struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
181 struct qstr *child, struct page **res_page)
182{
183 const char *name = child->name;
184 int namelen = child->len;
185 unsigned long npages = dir_blocks(dir);
186 struct f2fs_dir_entry *de = NULL;
187 f2fs_hash_t name_hash;
188 unsigned int max_depth;
189 unsigned int level;
190
191 if (npages == 0)
192 return NULL;
193
194 *res_page = NULL;
195
196 name_hash = f2fs_dentry_hash(name, namelen);
197 max_depth = F2FS_I(dir)->i_current_depth;
198
199 for (level = 0; level < max_depth; level++) {
200 de = find_in_level(dir, level, name,
201 namelen, name_hash, res_page);
202 if (de)
203 break;
204 }
205 if (!de && F2FS_I(dir)->chash != name_hash) {
206 F2FS_I(dir)->chash = name_hash;
207 F2FS_I(dir)->clevel = level - 1;
208 }
209 return de;
210}
211
212struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p)
213{
214 struct page *page = NULL;
215 struct f2fs_dir_entry *de = NULL;
216 struct f2fs_dentry_block *dentry_blk = NULL;
217
218 page = get_lock_data_page(dir, 0);
219 if (IS_ERR(page))
220 return NULL;
221
222 dentry_blk = kmap(page);
223 de = &dentry_blk->dentry[1];
224 *p = page;
225 unlock_page(page);
226 return de;
227}
228
229ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr)
230{
231 ino_t res = 0;
232 struct f2fs_dir_entry *de;
233 struct page *page;
234
235 de = f2fs_find_entry(dir, qstr, &page);
236 if (de) {
237 res = le32_to_cpu(de->ino);
238 kunmap(page);
239 f2fs_put_page(page, 0);
240 }
241
242 return res;
243}
244
245void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
246 struct page *page, struct inode *inode)
247{
248 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
249
250 mutex_lock_op(sbi, DENTRY_OPS);
251 lock_page(page);
252 wait_on_page_writeback(page);
253 de->ino = cpu_to_le32(inode->i_ino);
254 set_de_type(de, inode);
255 kunmap(page);
256 set_page_dirty(page);
257 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
258 mark_inode_dirty(dir);
259
260 /* update parent inode number before releasing dentry page */
261 F2FS_I(inode)->i_pino = dir->i_ino;
262
263 f2fs_put_page(page, 1);
264 mutex_unlock_op(sbi, DENTRY_OPS);
265}
266
267void init_dent_inode(struct dentry *dentry, struct page *ipage)
268{
269 struct f2fs_node *rn;
270
271 if (IS_ERR(ipage))
272 return;
273
274 wait_on_page_writeback(ipage);
275
276 /* copy dentry info. to this inode page */
277 rn = (struct f2fs_node *)page_address(ipage);
278 rn->i.i_namelen = cpu_to_le32(dentry->d_name.len);
279 memcpy(rn->i.i_name, dentry->d_name.name, dentry->d_name.len);
280 set_page_dirty(ipage);
281}
282
283static int init_inode_metadata(struct inode *inode, struct dentry *dentry)
284{
285 struct inode *dir = dentry->d_parent->d_inode;
286
287 if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
288 int err;
289 err = new_inode_page(inode, dentry);
290 if (err)
291 return err;
292
293 if (S_ISDIR(inode->i_mode)) {
294 err = f2fs_make_empty(inode, dir);
295 if (err) {
296 remove_inode_page(inode);
297 return err;
298 }
299 }
300
301 err = f2fs_init_acl(inode, dir);
302 if (err) {
303 remove_inode_page(inode);
304 return err;
305 }
306 } else {
307 struct page *ipage;
308 ipage = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino);
309 if (IS_ERR(ipage))
310 return PTR_ERR(ipage);
311 init_dent_inode(dentry, ipage);
312 f2fs_put_page(ipage, 1);
313 }
314 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
315 inc_nlink(inode);
316 f2fs_write_inode(inode, NULL);
317 }
318 return 0;
319}
320
321static void update_parent_metadata(struct inode *dir, struct inode *inode,
322 unsigned int current_depth)
323{
324 bool need_dir_update = false;
325
326 if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
327 if (S_ISDIR(inode->i_mode)) {
328 inc_nlink(dir);
329 need_dir_update = true;
330 }
331 clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
332 }
333 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
334 if (F2FS_I(dir)->i_current_depth != current_depth) {
335 F2FS_I(dir)->i_current_depth = current_depth;
336 need_dir_update = true;
337 }
338
339 if (need_dir_update)
340 f2fs_write_inode(dir, NULL);
341 else
342 mark_inode_dirty(dir);
343
344 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
345 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
346}
347
348static int room_for_filename(struct f2fs_dentry_block *dentry_blk, int slots)
349{
350 int bit_start = 0;
351 int zero_start, zero_end;
352next:
353 zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap,
354 NR_DENTRY_IN_BLOCK,
355 bit_start);
356 if (zero_start >= NR_DENTRY_IN_BLOCK)
357 return NR_DENTRY_IN_BLOCK;
358
359 zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap,
360 NR_DENTRY_IN_BLOCK,
361 zero_start);
362 if (zero_end - zero_start >= slots)
363 return zero_start;
364
365 bit_start = zero_end + 1;
366
367 if (zero_end + 1 >= NR_DENTRY_IN_BLOCK)
368 return NR_DENTRY_IN_BLOCK;
369 goto next;
370}
371
372int f2fs_add_link(struct dentry *dentry, struct inode *inode)
373{
374 unsigned int bit_pos;
375 unsigned int level;
376 unsigned int current_depth;
377 unsigned long bidx, block;
378 f2fs_hash_t dentry_hash;
379 struct f2fs_dir_entry *de;
380 unsigned int nbucket, nblock;
381 struct inode *dir = dentry->d_parent->d_inode;
382 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
383 const char *name = dentry->d_name.name;
384 int namelen = dentry->d_name.len;
385 struct page *dentry_page = NULL;
386 struct f2fs_dentry_block *dentry_blk = NULL;
387 int slots = GET_DENTRY_SLOTS(namelen);
388 int err = 0;
389 int i;
390
391 dentry_hash = f2fs_dentry_hash(name, dentry->d_name.len);
392 level = 0;
393 current_depth = F2FS_I(dir)->i_current_depth;
394 if (F2FS_I(dir)->chash == dentry_hash) {
395 level = F2FS_I(dir)->clevel;
396 F2FS_I(dir)->chash = 0;
397 }
398
399start:
400 if (current_depth == MAX_DIR_HASH_DEPTH)
401 return -ENOSPC;
402
403 /* Increase the depth, if required */
404 if (level == current_depth)
405 ++current_depth;
406
407 nbucket = dir_buckets(level);
408 nblock = bucket_blocks(level);
409
410 bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket));
411
412 for (block = bidx; block <= (bidx + nblock - 1); block++) {
413 mutex_lock_op(sbi, DENTRY_OPS);
414 dentry_page = get_new_data_page(dir, block, true);
415 if (IS_ERR(dentry_page)) {
416 mutex_unlock_op(sbi, DENTRY_OPS);
417 return PTR_ERR(dentry_page);
418 }
419
420 dentry_blk = kmap(dentry_page);
421 bit_pos = room_for_filename(dentry_blk, slots);
422 if (bit_pos < NR_DENTRY_IN_BLOCK)
423 goto add_dentry;
424
425 kunmap(dentry_page);
426 f2fs_put_page(dentry_page, 1);
427 mutex_unlock_op(sbi, DENTRY_OPS);
428 }
429
430 /* Move to next level to find the empty slot for new dentry */
431 ++level;
432 goto start;
433add_dentry:
434 err = init_inode_metadata(inode, dentry);
435 if (err)
436 goto fail;
437
438 wait_on_page_writeback(dentry_page);
439
440 de = &dentry_blk->dentry[bit_pos];
441 de->hash_code = dentry_hash;
442 de->name_len = cpu_to_le16(namelen);
443 memcpy(dentry_blk->filename[bit_pos], name, namelen);
444 de->ino = cpu_to_le32(inode->i_ino);
445 set_de_type(de, inode);
446 for (i = 0; i < slots; i++)
447 test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
448 set_page_dirty(dentry_page);
449
450 update_parent_metadata(dir, inode, current_depth);
451
452 /* update parent inode number before releasing dentry page */
453 F2FS_I(inode)->i_pino = dir->i_ino;
454fail:
455 kunmap(dentry_page);
456 f2fs_put_page(dentry_page, 1);
457 mutex_unlock_op(sbi, DENTRY_OPS);
458 return err;
459}
460
461/*
462 * It only removes the dentry from the dentry page,corresponding name
463 * entry in name page does not need to be touched during deletion.
464 */
465void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
466 struct inode *inode)
467{
468 struct f2fs_dentry_block *dentry_blk;
469 unsigned int bit_pos;
470 struct address_space *mapping = page->mapping;
471 struct inode *dir = mapping->host;
472 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
473 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
474 void *kaddr = page_address(page);
475 int i;
476
477 mutex_lock_op(sbi, DENTRY_OPS);
478
479 lock_page(page);
480 wait_on_page_writeback(page);
481
482 dentry_blk = (struct f2fs_dentry_block *)kaddr;
483 bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry;
484 for (i = 0; i < slots; i++)
485 test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
486
487 /* Let's check and deallocate this dentry page */
488 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
489 NR_DENTRY_IN_BLOCK,
490 0);
491 kunmap(page); /* kunmap - pair of f2fs_find_entry */
492 set_page_dirty(page);
493
494 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
495
496 if (inode && S_ISDIR(inode->i_mode)) {
497 drop_nlink(dir);
498 f2fs_write_inode(dir, NULL);
499 } else {
500 mark_inode_dirty(dir);
501 }
502
503 if (inode) {
504 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
505 drop_nlink(inode);
506 if (S_ISDIR(inode->i_mode)) {
507 drop_nlink(inode);
508 i_size_write(inode, 0);
509 }
510 f2fs_write_inode(inode, NULL);
511 if (inode->i_nlink == 0)
512 add_orphan_inode(sbi, inode->i_ino);
513 }
514
515 if (bit_pos == NR_DENTRY_IN_BLOCK) {
516 truncate_hole(dir, page->index, page->index + 1);
517 clear_page_dirty_for_io(page);
518 ClearPageUptodate(page);
519 dec_page_count(sbi, F2FS_DIRTY_DENTS);
520 inode_dec_dirty_dents(dir);
521 }
522 f2fs_put_page(page, 1);
523
524 mutex_unlock_op(sbi, DENTRY_OPS);
525}
526
527int f2fs_make_empty(struct inode *inode, struct inode *parent)
528{
529 struct page *dentry_page;
530 struct f2fs_dentry_block *dentry_blk;
531 struct f2fs_dir_entry *de;
532 void *kaddr;
533
534 dentry_page = get_new_data_page(inode, 0, true);
535 if (IS_ERR(dentry_page))
536 return PTR_ERR(dentry_page);
537
538 kaddr = kmap_atomic(dentry_page);
539 dentry_blk = (struct f2fs_dentry_block *)kaddr;
540
541 de = &dentry_blk->dentry[0];
542 de->name_len = cpu_to_le16(1);
543 de->hash_code = 0;
544 de->ino = cpu_to_le32(inode->i_ino);
545 memcpy(dentry_blk->filename[0], ".", 1);
546 set_de_type(de, inode);
547
548 de = &dentry_blk->dentry[1];
549 de->hash_code = 0;
550 de->name_len = cpu_to_le16(2);
551 de->ino = cpu_to_le32(parent->i_ino);
552 memcpy(dentry_blk->filename[1], "..", 2);
553 set_de_type(de, inode);
554
555 test_and_set_bit_le(0, &dentry_blk->dentry_bitmap);
556 test_and_set_bit_le(1, &dentry_blk->dentry_bitmap);
557 kunmap_atomic(kaddr);
558
559 set_page_dirty(dentry_page);
560 f2fs_put_page(dentry_page, 1);
561 return 0;
562}
563
564bool f2fs_empty_dir(struct inode *dir)
565{
566 unsigned long bidx;
567 struct page *dentry_page;
568 unsigned int bit_pos;
569 struct f2fs_dentry_block *dentry_blk;
570 unsigned long nblock = dir_blocks(dir);
571
572 for (bidx = 0; bidx < nblock; bidx++) {
573 void *kaddr;
574 dentry_page = get_lock_data_page(dir, bidx);
575 if (IS_ERR(dentry_page)) {
576 if (PTR_ERR(dentry_page) == -ENOENT)
577 continue;
578 else
579 return false;
580 }
581
582 kaddr = kmap_atomic(dentry_page);
583 dentry_blk = (struct f2fs_dentry_block *)kaddr;
584 if (bidx == 0)
585 bit_pos = 2;
586 else
587 bit_pos = 0;
588 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
589 NR_DENTRY_IN_BLOCK,
590 bit_pos);
591 kunmap_atomic(kaddr);
592
593 f2fs_put_page(dentry_page, 1);
594
595 if (bit_pos < NR_DENTRY_IN_BLOCK)
596 return false;
597 }
598 return true;
599}
600
601static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir)
602{
603 unsigned long pos = file->f_pos;
604 struct inode *inode = file->f_dentry->d_inode;
605 unsigned long npages = dir_blocks(inode);
606 unsigned char *types = NULL;
607 unsigned int bit_pos = 0, start_bit_pos = 0;
608 int over = 0;
609 struct f2fs_dentry_block *dentry_blk = NULL;
610 struct f2fs_dir_entry *de = NULL;
611 struct page *dentry_page = NULL;
612 unsigned int n = 0;
613 unsigned char d_type = DT_UNKNOWN;
614 int slots;
615
616 types = f2fs_filetype_table;
617 bit_pos = (pos % NR_DENTRY_IN_BLOCK);
618 n = (pos / NR_DENTRY_IN_BLOCK);
619
620 for ( ; n < npages; n++) {
621 dentry_page = get_lock_data_page(inode, n);
622 if (IS_ERR(dentry_page))
623 continue;
624
625 start_bit_pos = bit_pos;
626 dentry_blk = kmap(dentry_page);
627 while (bit_pos < NR_DENTRY_IN_BLOCK) {
628 d_type = DT_UNKNOWN;
629 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
630 NR_DENTRY_IN_BLOCK,
631 bit_pos);
632 if (bit_pos >= NR_DENTRY_IN_BLOCK)
633 break;
634
635 de = &dentry_blk->dentry[bit_pos];
636 if (types && de->file_type < F2FS_FT_MAX)
637 d_type = types[de->file_type];
638
639 over = filldir(dirent,
640 dentry_blk->filename[bit_pos],
641 le16_to_cpu(de->name_len),
642 (n * NR_DENTRY_IN_BLOCK) + bit_pos,
643 le32_to_cpu(de->ino), d_type);
644 if (over) {
645 file->f_pos += bit_pos - start_bit_pos;
646 goto success;
647 }
648 slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
649 bit_pos += slots;
650 }
651 bit_pos = 0;
652 file->f_pos = (n + 1) * NR_DENTRY_IN_BLOCK;
653 kunmap(dentry_page);
654 f2fs_put_page(dentry_page, 1);
655 dentry_page = NULL;
656 }
657success:
658 if (dentry_page && !IS_ERR(dentry_page)) {
659 kunmap(dentry_page);
660 f2fs_put_page(dentry_page, 1);
661 }
662
663 return 0;
664}
665
666const struct file_operations f2fs_dir_operations = {
667 .llseek = generic_file_llseek,
668 .read = generic_read_dir,
669 .readdir = f2fs_readdir,
670 .fsync = f2fs_sync_file,
671 .unlocked_ioctl = f2fs_ioctl,
672};
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
new file mode 100644
index 000000000000..a18d63db2fb6
--- /dev/null
+++ b/fs/f2fs/f2fs.h
@@ -0,0 +1,1083 @@
1/*
2 * fs/f2fs/f2fs.h
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#ifndef _LINUX_F2FS_H
12#define _LINUX_F2FS_H
13
14#include <linux/types.h>
15#include <linux/page-flags.h>
16#include <linux/buffer_head.h>
17#include <linux/slab.h>
18#include <linux/crc32.h>
19#include <linux/magic.h>
20
21/*
22 * For mount options
23 */
24#define F2FS_MOUNT_BG_GC 0x00000001
25#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002
26#define F2FS_MOUNT_DISCARD 0x00000004
27#define F2FS_MOUNT_NOHEAP 0x00000008
28#define F2FS_MOUNT_XATTR_USER 0x00000010
29#define F2FS_MOUNT_POSIX_ACL 0x00000020
30#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040
31
32#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
33#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
34#define test_opt(sbi, option) (sbi->mount_opt.opt & F2FS_MOUNT_##option)
35
36#define ver_after(a, b) (typecheck(unsigned long long, a) && \
37 typecheck(unsigned long long, b) && \
38 ((long long)((a) - (b)) > 0))
39
40typedef u64 block_t;
41typedef u32 nid_t;
42
43struct f2fs_mount_info {
44 unsigned int opt;
45};
46
47static inline __u32 f2fs_crc32(void *buff, size_t len)
48{
49 return crc32_le(F2FS_SUPER_MAGIC, buff, len);
50}
51
52static inline bool f2fs_crc_valid(__u32 blk_crc, void *buff, size_t buff_size)
53{
54 return f2fs_crc32(buff, buff_size) == blk_crc;
55}
56
57/*
58 * For checkpoint manager
59 */
60enum {
61 NAT_BITMAP,
62 SIT_BITMAP
63};
64
65/* for the list of orphan inodes */
66struct orphan_inode_entry {
67 struct list_head list; /* list head */
68 nid_t ino; /* inode number */
69};
70
71/* for the list of directory inodes */
72struct dir_inode_entry {
73 struct list_head list; /* list head */
74 struct inode *inode; /* vfs inode pointer */
75};
76
77/* for the list of fsync inodes, used only during recovery */
78struct fsync_inode_entry {
79 struct list_head list; /* list head */
80 struct inode *inode; /* vfs inode pointer */
81 block_t blkaddr; /* block address locating the last inode */
82};
83
84#define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats))
85#define sits_in_cursum(sum) (le16_to_cpu(sum->n_sits))
86
87#define nat_in_journal(sum, i) (sum->nat_j.entries[i].ne)
88#define nid_in_journal(sum, i) (sum->nat_j.entries[i].nid)
89#define sit_in_journal(sum, i) (sum->sit_j.entries[i].se)
90#define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno)
91
92static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i)
93{
94 int before = nats_in_cursum(rs);
95 rs->n_nats = cpu_to_le16(before + i);
96 return before;
97}
98
99static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i)
100{
101 int before = sits_in_cursum(rs);
102 rs->n_sits = cpu_to_le16(before + i);
103 return before;
104}
105
106/*
107 * For INODE and NODE manager
108 */
109#define XATTR_NODE_OFFSET (-1) /*
110 * store xattrs to one node block per
111 * file keeping -1 as its node offset to
112 * distinguish from index node blocks.
113 */
114#define RDONLY_NODE 1 /*
115 * specify a read-only mode when getting
116 * a node block. 0 is read-write mode.
117 * used by get_dnode_of_data().
118 */
119#define F2FS_LINK_MAX 32000 /* maximum link count per file */
120
121/* for in-memory extent cache entry */
122struct extent_info {
123 rwlock_t ext_lock; /* rwlock for consistency */
124 unsigned int fofs; /* start offset in a file */
125 u32 blk_addr; /* start block address of the extent */
126 unsigned int len; /* lenth of the extent */
127};
128
129/*
130 * i_advise uses FADVISE_XXX_BIT. We can add additional hints later.
131 */
132#define FADVISE_COLD_BIT 0x01
133
134struct f2fs_inode_info {
135 struct inode vfs_inode; /* serve a vfs inode */
136 unsigned long i_flags; /* keep an inode flags for ioctl */
137 unsigned char i_advise; /* use to give file attribute hints */
138 unsigned int i_current_depth; /* use only in directory structure */
139 unsigned int i_pino; /* parent inode number */
140 umode_t i_acl_mode; /* keep file acl mode temporarily */
141
142 /* Use below internally in f2fs*/
143 unsigned long flags; /* use to pass per-file flags */
144 unsigned long long data_version;/* lastes version of data for fsync */
145 atomic_t dirty_dents; /* # of dirty dentry pages */
146 f2fs_hash_t chash; /* hash value of given file name */
147 unsigned int clevel; /* maximum level of given file name */
148 nid_t i_xattr_nid; /* node id that contains xattrs */
149 struct extent_info ext; /* in-memory extent cache entry */
150};
151
152static inline void get_extent_info(struct extent_info *ext,
153 struct f2fs_extent i_ext)
154{
155 write_lock(&ext->ext_lock);
156 ext->fofs = le32_to_cpu(i_ext.fofs);
157 ext->blk_addr = le32_to_cpu(i_ext.blk_addr);
158 ext->len = le32_to_cpu(i_ext.len);
159 write_unlock(&ext->ext_lock);
160}
161
162static inline void set_raw_extent(struct extent_info *ext,
163 struct f2fs_extent *i_ext)
164{
165 read_lock(&ext->ext_lock);
166 i_ext->fofs = cpu_to_le32(ext->fofs);
167 i_ext->blk_addr = cpu_to_le32(ext->blk_addr);
168 i_ext->len = cpu_to_le32(ext->len);
169 read_unlock(&ext->ext_lock);
170}
171
172struct f2fs_nm_info {
173 block_t nat_blkaddr; /* base disk address of NAT */
174 nid_t max_nid; /* maximum possible node ids */
175 nid_t init_scan_nid; /* the first nid to be scanned */
176 nid_t next_scan_nid; /* the next nid to be scanned */
177
178 /* NAT cache management */
179 struct radix_tree_root nat_root;/* root of the nat entry cache */
180 rwlock_t nat_tree_lock; /* protect nat_tree_lock */
181 unsigned int nat_cnt; /* the # of cached nat entries */
182 struct list_head nat_entries; /* cached nat entry list (clean) */
183 struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
184
185 /* free node ids management */
186 struct list_head free_nid_list; /* a list for free nids */
187 spinlock_t free_nid_list_lock; /* protect free nid list */
188 unsigned int fcnt; /* the number of free node id */
189 struct mutex build_lock; /* lock for build free nids */
190
191 /* for checkpoint */
192 char *nat_bitmap; /* NAT bitmap pointer */
193 int bitmap_size; /* bitmap size */
194};
195
196/*
197 * this structure is used as one of function parameters.
198 * all the information are dedicated to a given direct node block determined
199 * by the data offset in a file.
200 */
201struct dnode_of_data {
202 struct inode *inode; /* vfs inode pointer */
203 struct page *inode_page; /* its inode page, NULL is possible */
204 struct page *node_page; /* cached direct node page */
205 nid_t nid; /* node id of the direct node block */
206 unsigned int ofs_in_node; /* data offset in the node page */
207 bool inode_page_locked; /* inode page is locked or not */
208 block_t data_blkaddr; /* block address of the node block */
209};
210
211static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
212 struct page *ipage, struct page *npage, nid_t nid)
213{
214 dn->inode = inode;
215 dn->inode_page = ipage;
216 dn->node_page = npage;
217 dn->nid = nid;
218 dn->inode_page_locked = 0;
219}
220
221/*
222 * For SIT manager
223 *
224 * By default, there are 6 active log areas across the whole main area.
225 * When considering hot and cold data separation to reduce cleaning overhead,
226 * we split 3 for data logs and 3 for node logs as hot, warm, and cold types,
227 * respectively.
228 * In the current design, you should not change the numbers intentionally.
229 * Instead, as a mount option such as active_logs=x, you can use 2, 4, and 6
230 * logs individually according to the underlying devices. (default: 6)
231 * Just in case, on-disk layout covers maximum 16 logs that consist of 8 for
232 * data and 8 for node logs.
233 */
234#define NR_CURSEG_DATA_TYPE (3)
235#define NR_CURSEG_NODE_TYPE (3)
236#define NR_CURSEG_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
237
238enum {
239 CURSEG_HOT_DATA = 0, /* directory entry blocks */
240 CURSEG_WARM_DATA, /* data blocks */
241 CURSEG_COLD_DATA, /* multimedia or GCed data blocks */
242 CURSEG_HOT_NODE, /* direct node blocks of directory files */
243 CURSEG_WARM_NODE, /* direct node blocks of normal files */
244 CURSEG_COLD_NODE, /* indirect node blocks */
245 NO_CHECK_TYPE
246};
247
248struct f2fs_sm_info {
249 struct sit_info *sit_info; /* whole segment information */
250 struct free_segmap_info *free_info; /* free segment information */
251 struct dirty_seglist_info *dirty_info; /* dirty segment information */
252 struct curseg_info *curseg_array; /* active segment information */
253
254 struct list_head wblist_head; /* list of under-writeback pages */
255 spinlock_t wblist_lock; /* lock for checkpoint */
256
257 block_t seg0_blkaddr; /* block address of 0'th segment */
258 block_t main_blkaddr; /* start block address of main area */
259 block_t ssa_blkaddr; /* start block address of SSA area */
260
261 unsigned int segment_count; /* total # of segments */
262 unsigned int main_segments; /* # of segments in main area */
263 unsigned int reserved_segments; /* # of reserved segments */
264 unsigned int ovp_segments; /* # of overprovision segments */
265};
266
267/*
268 * For directory operation
269 */
270#define NODE_DIR1_BLOCK (ADDRS_PER_INODE + 1)
271#define NODE_DIR2_BLOCK (ADDRS_PER_INODE + 2)
272#define NODE_IND1_BLOCK (ADDRS_PER_INODE + 3)
273#define NODE_IND2_BLOCK (ADDRS_PER_INODE + 4)
274#define NODE_DIND_BLOCK (ADDRS_PER_INODE + 5)
275
276/*
277 * For superblock
278 */
279/*
280 * COUNT_TYPE for monitoring
281 *
282 * f2fs monitors the number of several block types such as on-writeback,
283 * dirty dentry blocks, dirty node blocks, and dirty meta blocks.
284 */
285enum count_type {
286 F2FS_WRITEBACK,
287 F2FS_DIRTY_DENTS,
288 F2FS_DIRTY_NODES,
289 F2FS_DIRTY_META,
290 NR_COUNT_TYPE,
291};
292
293/*
294 * FS_LOCK nesting subclasses for the lock validator:
295 *
296 * The locking order between these classes is
297 * RENAME -> DENTRY_OPS -> DATA_WRITE -> DATA_NEW
298 * -> DATA_TRUNC -> NODE_WRITE -> NODE_NEW -> NODE_TRUNC
299 */
300enum lock_type {
301 RENAME, /* for renaming operations */
302 DENTRY_OPS, /* for directory operations */
303 DATA_WRITE, /* for data write */
304 DATA_NEW, /* for data allocation */
305 DATA_TRUNC, /* for data truncate */
306 NODE_NEW, /* for node allocation */
307 NODE_TRUNC, /* for node truncate */
308 NODE_WRITE, /* for node write */
309 NR_LOCK_TYPE,
310};
311
312/*
313 * The below are the page types of bios used in submti_bio().
314 * The available types are:
315 * DATA User data pages. It operates as async mode.
316 * NODE Node pages. It operates as async mode.
317 * META FS metadata pages such as SIT, NAT, CP.
318 * NR_PAGE_TYPE The number of page types.
319 * META_FLUSH Make sure the previous pages are written
320 * with waiting the bio's completion
321 * ... Only can be used with META.
322 */
323enum page_type {
324 DATA,
325 NODE,
326 META,
327 NR_PAGE_TYPE,
328 META_FLUSH,
329};
330
331struct f2fs_sb_info {
332 struct super_block *sb; /* pointer to VFS super block */
333 struct buffer_head *raw_super_buf; /* buffer head of raw sb */
334 struct f2fs_super_block *raw_super; /* raw super block pointer */
335 int s_dirty; /* dirty flag for checkpoint */
336
337 /* for node-related operations */
338 struct f2fs_nm_info *nm_info; /* node manager */
339 struct inode *node_inode; /* cache node blocks */
340
341 /* for segment-related operations */
342 struct f2fs_sm_info *sm_info; /* segment manager */
343 struct bio *bio[NR_PAGE_TYPE]; /* bios to merge */
344 sector_t last_block_in_bio[NR_PAGE_TYPE]; /* last block number */
345 struct rw_semaphore bio_sem; /* IO semaphore */
346
347 /* for checkpoint */
348 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
349 struct inode *meta_inode; /* cache meta blocks */
350 struct mutex cp_mutex; /* for checkpoint procedure */
351 struct mutex fs_lock[NR_LOCK_TYPE]; /* for blocking FS operations */
352 struct mutex write_inode; /* mutex for write inode */
353 struct mutex writepages; /* mutex for writepages() */
354 int por_doing; /* recovery is doing or not */
355
356 /* for orphan inode management */
357 struct list_head orphan_inode_list; /* orphan inode list */
358 struct mutex orphan_inode_mutex; /* for orphan inode list */
359 unsigned int n_orphans; /* # of orphan inodes */
360
361 /* for directory inode management */
362 struct list_head dir_inode_list; /* dir inode list */
363 spinlock_t dir_inode_lock; /* for dir inode list lock */
364 unsigned int n_dirty_dirs; /* # of dir inodes */
365
366 /* basic file system units */
367 unsigned int log_sectors_per_block; /* log2 sectors per block */
368 unsigned int log_blocksize; /* log2 block size */
369 unsigned int blocksize; /* block size */
370 unsigned int root_ino_num; /* root inode number*/
371 unsigned int node_ino_num; /* node inode number*/
372 unsigned int meta_ino_num; /* meta inode number*/
373 unsigned int log_blocks_per_seg; /* log2 blocks per segment */
374 unsigned int blocks_per_seg; /* blocks per segment */
375 unsigned int segs_per_sec; /* segments per section */
376 unsigned int secs_per_zone; /* sections per zone */
377 unsigned int total_sections; /* total section count */
378 unsigned int total_node_count; /* total node block count */
379 unsigned int total_valid_node_count; /* valid node block count */
380 unsigned int total_valid_inode_count; /* valid inode count */
381 int active_logs; /* # of active logs */
382
383 block_t user_block_count; /* # of user blocks */
384 block_t total_valid_block_count; /* # of valid blocks */
385 block_t alloc_valid_block_count; /* # of allocated blocks */
386 block_t last_valid_block_count; /* for recovery */
387 u32 s_next_generation; /* for NFS support */
388 atomic_t nr_pages[NR_COUNT_TYPE]; /* # of pages, see count_type */
389
390 struct f2fs_mount_info mount_opt; /* mount options */
391
392 /* for cleaning operations */
393 struct mutex gc_mutex; /* mutex for GC */
394 struct f2fs_gc_kthread *gc_thread; /* GC thread */
395
396 /*
397 * for stat information.
398 * one is for the LFS mode, and the other is for the SSR mode.
399 */
400 struct f2fs_stat_info *stat_info; /* FS status information */
401 unsigned int segment_count[2]; /* # of allocated segments */
402 unsigned int block_count[2]; /* # of allocated blocks */
403 unsigned int last_victim[2]; /* last victim segment # */
404 int total_hit_ext, read_hit_ext; /* extent cache hit ratio */
405 int bg_gc; /* background gc calls */
406 spinlock_t stat_lock; /* lock for stat operations */
407};
408
409/*
410 * Inline functions
411 */
412static inline struct f2fs_inode_info *F2FS_I(struct inode *inode)
413{
414 return container_of(inode, struct f2fs_inode_info, vfs_inode);
415}
416
417static inline struct f2fs_sb_info *F2FS_SB(struct super_block *sb)
418{
419 return sb->s_fs_info;
420}
421
422static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi)
423{
424 return (struct f2fs_super_block *)(sbi->raw_super);
425}
426
427static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi)
428{
429 return (struct f2fs_checkpoint *)(sbi->ckpt);
430}
431
432static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi)
433{
434 return (struct f2fs_nm_info *)(sbi->nm_info);
435}
436
437static inline struct f2fs_sm_info *SM_I(struct f2fs_sb_info *sbi)
438{
439 return (struct f2fs_sm_info *)(sbi->sm_info);
440}
441
442static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi)
443{
444 return (struct sit_info *)(SM_I(sbi)->sit_info);
445}
446
447static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
448{
449 return (struct free_segmap_info *)(SM_I(sbi)->free_info);
450}
451
452static inline struct dirty_seglist_info *DIRTY_I(struct f2fs_sb_info *sbi)
453{
454 return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info);
455}
456
457static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi)
458{
459 sbi->s_dirty = 1;
460}
461
462static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi)
463{
464 sbi->s_dirty = 0;
465}
466
467static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
468{
469 unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
470 return ckpt_flags & f;
471}
472
473static inline void set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
474{
475 unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
476 ckpt_flags |= f;
477 cp->ckpt_flags = cpu_to_le32(ckpt_flags);
478}
479
480static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
481{
482 unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
483 ckpt_flags &= (~f);
484 cp->ckpt_flags = cpu_to_le32(ckpt_flags);
485}
486
487static inline void mutex_lock_op(struct f2fs_sb_info *sbi, enum lock_type t)
488{
489 mutex_lock_nested(&sbi->fs_lock[t], t);
490}
491
492static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, enum lock_type t)
493{
494 mutex_unlock(&sbi->fs_lock[t]);
495}
496
497/*
498 * Check whether the given nid is within node id range.
499 */
500static inline void check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
501{
502 BUG_ON((nid >= NM_I(sbi)->max_nid));
503}
504
505#define F2FS_DEFAULT_ALLOCATED_BLOCKS 1
506
507/*
508 * Check whether the inode has blocks or not
509 */
510static inline int F2FS_HAS_BLOCKS(struct inode *inode)
511{
512 if (F2FS_I(inode)->i_xattr_nid)
513 return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1);
514 else
515 return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS);
516}
517
518static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
519 struct inode *inode, blkcnt_t count)
520{
521 block_t valid_block_count;
522
523 spin_lock(&sbi->stat_lock);
524 valid_block_count =
525 sbi->total_valid_block_count + (block_t)count;
526 if (valid_block_count > sbi->user_block_count) {
527 spin_unlock(&sbi->stat_lock);
528 return false;
529 }
530 inode->i_blocks += count;
531 sbi->total_valid_block_count = valid_block_count;
532 sbi->alloc_valid_block_count += (block_t)count;
533 spin_unlock(&sbi->stat_lock);
534 return true;
535}
536
537static inline int dec_valid_block_count(struct f2fs_sb_info *sbi,
538 struct inode *inode,
539 blkcnt_t count)
540{
541 spin_lock(&sbi->stat_lock);
542 BUG_ON(sbi->total_valid_block_count < (block_t) count);
543 BUG_ON(inode->i_blocks < count);
544 inode->i_blocks -= count;
545 sbi->total_valid_block_count -= (block_t)count;
546 spin_unlock(&sbi->stat_lock);
547 return 0;
548}
549
550static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
551{
552 atomic_inc(&sbi->nr_pages[count_type]);
553 F2FS_SET_SB_DIRT(sbi);
554}
555
556static inline void inode_inc_dirty_dents(struct inode *inode)
557{
558 atomic_inc(&F2FS_I(inode)->dirty_dents);
559}
560
561static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
562{
563 atomic_dec(&sbi->nr_pages[count_type]);
564}
565
566static inline void inode_dec_dirty_dents(struct inode *inode)
567{
568 atomic_dec(&F2FS_I(inode)->dirty_dents);
569}
570
571static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
572{
573 return atomic_read(&sbi->nr_pages[count_type]);
574}
575
576static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi)
577{
578 block_t ret;
579 spin_lock(&sbi->stat_lock);
580 ret = sbi->total_valid_block_count;
581 spin_unlock(&sbi->stat_lock);
582 return ret;
583}
584
585static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
586{
587 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
588
589 /* return NAT or SIT bitmap */
590 if (flag == NAT_BITMAP)
591 return le32_to_cpu(ckpt->nat_ver_bitmap_bytesize);
592 else if (flag == SIT_BITMAP)
593 return le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
594
595 return 0;
596}
597
598static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
599{
600 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
601 int offset = (flag == NAT_BITMAP) ?
602 le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
603 return &ckpt->sit_nat_version_bitmap + offset;
604}
605
606static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
607{
608 block_t start_addr;
609 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
610 unsigned long long ckpt_version = le64_to_cpu(ckpt->checkpoint_ver);
611
612 start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
613
614 /*
615 * odd numbered checkpoint should at cp segment 0
616 * and even segent must be at cp segment 1
617 */
618 if (!(ckpt_version & 1))
619 start_addr += sbi->blocks_per_seg;
620
621 return start_addr;
622}
623
624static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)
625{
626 return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum);
627}
628
629static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi,
630 struct inode *inode,
631 unsigned int count)
632{
633 block_t valid_block_count;
634 unsigned int valid_node_count;
635
636 spin_lock(&sbi->stat_lock);
637
638 valid_block_count = sbi->total_valid_block_count + (block_t)count;
639 sbi->alloc_valid_block_count += (block_t)count;
640 valid_node_count = sbi->total_valid_node_count + count;
641
642 if (valid_block_count > sbi->user_block_count) {
643 spin_unlock(&sbi->stat_lock);
644 return false;
645 }
646
647 if (valid_node_count > sbi->total_node_count) {
648 spin_unlock(&sbi->stat_lock);
649 return false;
650 }
651
652 if (inode)
653 inode->i_blocks += count;
654 sbi->total_valid_node_count = valid_node_count;
655 sbi->total_valid_block_count = valid_block_count;
656 spin_unlock(&sbi->stat_lock);
657
658 return true;
659}
660
661static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
662 struct inode *inode,
663 unsigned int count)
664{
665 spin_lock(&sbi->stat_lock);
666
667 BUG_ON(sbi->total_valid_block_count < count);
668 BUG_ON(sbi->total_valid_node_count < count);
669 BUG_ON(inode->i_blocks < count);
670
671 inode->i_blocks -= count;
672 sbi->total_valid_node_count -= count;
673 sbi->total_valid_block_count -= (block_t)count;
674
675 spin_unlock(&sbi->stat_lock);
676}
677
678static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
679{
680 unsigned int ret;
681 spin_lock(&sbi->stat_lock);
682 ret = sbi->total_valid_node_count;
683 spin_unlock(&sbi->stat_lock);
684 return ret;
685}
686
687static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
688{
689 spin_lock(&sbi->stat_lock);
690 BUG_ON(sbi->total_valid_inode_count == sbi->total_node_count);
691 sbi->total_valid_inode_count++;
692 spin_unlock(&sbi->stat_lock);
693}
694
695static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi)
696{
697 spin_lock(&sbi->stat_lock);
698 BUG_ON(!sbi->total_valid_inode_count);
699 sbi->total_valid_inode_count--;
700 spin_unlock(&sbi->stat_lock);
701 return 0;
702}
703
704static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi)
705{
706 unsigned int ret;
707 spin_lock(&sbi->stat_lock);
708 ret = sbi->total_valid_inode_count;
709 spin_unlock(&sbi->stat_lock);
710 return ret;
711}
712
713static inline void f2fs_put_page(struct page *page, int unlock)
714{
715 if (!page || IS_ERR(page))
716 return;
717
718 if (unlock) {
719 BUG_ON(!PageLocked(page));
720 unlock_page(page);
721 }
722 page_cache_release(page);
723}
724
725static inline void f2fs_put_dnode(struct dnode_of_data *dn)
726{
727 if (dn->node_page)
728 f2fs_put_page(dn->node_page, 1);
729 if (dn->inode_page && dn->node_page != dn->inode_page)
730 f2fs_put_page(dn->inode_page, 0);
731 dn->node_page = NULL;
732 dn->inode_page = NULL;
733}
734
735static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name,
736 size_t size, void (*ctor)(void *))
737{
738 return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor);
739}
740
741#define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino)
742
743static inline bool IS_INODE(struct page *page)
744{
745 struct f2fs_node *p = (struct f2fs_node *)page_address(page);
746 return RAW_IS_INODE(p);
747}
748
749static inline __le32 *blkaddr_in_node(struct f2fs_node *node)
750{
751 return RAW_IS_INODE(node) ? node->i.i_addr : node->dn.addr;
752}
753
754static inline block_t datablock_addr(struct page *node_page,
755 unsigned int offset)
756{
757 struct f2fs_node *raw_node;
758 __le32 *addr_array;
759 raw_node = (struct f2fs_node *)page_address(node_page);
760 addr_array = blkaddr_in_node(raw_node);
761 return le32_to_cpu(addr_array[offset]);
762}
763
764static inline int f2fs_test_bit(unsigned int nr, char *addr)
765{
766 int mask;
767
768 addr += (nr >> 3);
769 mask = 1 << (7 - (nr & 0x07));
770 return mask & *addr;
771}
772
773static inline int f2fs_set_bit(unsigned int nr, char *addr)
774{
775 int mask;
776 int ret;
777
778 addr += (nr >> 3);
779 mask = 1 << (7 - (nr & 0x07));
780 ret = mask & *addr;
781 *addr |= mask;
782 return ret;
783}
784
785static inline int f2fs_clear_bit(unsigned int nr, char *addr)
786{
787 int mask;
788 int ret;
789
790 addr += (nr >> 3);
791 mask = 1 << (7 - (nr & 0x07));
792 ret = mask & *addr;
793 *addr &= ~mask;
794 return ret;
795}
796
797/* used for f2fs_inode_info->flags */
798enum {
799 FI_NEW_INODE, /* indicate newly allocated inode */
800 FI_NEED_CP, /* need to do checkpoint during fsync */
801 FI_INC_LINK, /* need to increment i_nlink */
802 FI_ACL_MODE, /* indicate acl mode */
803 FI_NO_ALLOC, /* should not allocate any blocks */
804};
805
806static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
807{
808 set_bit(flag, &fi->flags);
809}
810
811static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)
812{
813 return test_bit(flag, &fi->flags);
814}
815
816static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag)
817{
818 clear_bit(flag, &fi->flags);
819}
820
821static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode)
822{
823 fi->i_acl_mode = mode;
824 set_inode_flag(fi, FI_ACL_MODE);
825}
826
827static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag)
828{
829 if (is_inode_flag_set(fi, FI_ACL_MODE)) {
830 clear_inode_flag(fi, FI_ACL_MODE);
831 return 1;
832 }
833 return 0;
834}
835
836/*
837 * file.c
838 */
839int f2fs_sync_file(struct file *, loff_t, loff_t, int);
840void truncate_data_blocks(struct dnode_of_data *);
841void f2fs_truncate(struct inode *);
842int f2fs_setattr(struct dentry *, struct iattr *);
843int truncate_hole(struct inode *, pgoff_t, pgoff_t);
844long f2fs_ioctl(struct file *, unsigned int, unsigned long);
845
846/*
847 * inode.c
848 */
849void f2fs_set_inode_flags(struct inode *);
850struct inode *f2fs_iget_nowait(struct super_block *, unsigned long);
851struct inode *f2fs_iget(struct super_block *, unsigned long);
852void update_inode(struct inode *, struct page *);
853int f2fs_write_inode(struct inode *, struct writeback_control *);
854void f2fs_evict_inode(struct inode *);
855
856/*
857 * namei.c
858 */
859struct dentry *f2fs_get_parent(struct dentry *child);
860
861/*
862 * dir.c
863 */
864struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *,
865 struct page **);
866struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **);
867ino_t f2fs_inode_by_name(struct inode *, struct qstr *);
868void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
869 struct page *, struct inode *);
870void init_dent_inode(struct dentry *, struct page *);
871int f2fs_add_link(struct dentry *, struct inode *);
872void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *);
873int f2fs_make_empty(struct inode *, struct inode *);
874bool f2fs_empty_dir(struct inode *);
875
876/*
877 * super.c
878 */
879int f2fs_sync_fs(struct super_block *, int);
880
881/*
882 * hash.c
883 */
884f2fs_hash_t f2fs_dentry_hash(const char *, int);
885
886/*
887 * node.c
888 */
889struct dnode_of_data;
890struct node_info;
891
892int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
893void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
894int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
895int truncate_inode_blocks(struct inode *, pgoff_t);
896int remove_inode_page(struct inode *);
897int new_inode_page(struct inode *, struct dentry *);
898struct page *new_node_page(struct dnode_of_data *, unsigned int);
899void ra_node_page(struct f2fs_sb_info *, nid_t);
900struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
901struct page *get_node_page_ra(struct page *, int);
902void sync_inode_page(struct dnode_of_data *);
903int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *);
904bool alloc_nid(struct f2fs_sb_info *, nid_t *);
905void alloc_nid_done(struct f2fs_sb_info *, nid_t);
906void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
907void recover_node_page(struct f2fs_sb_info *, struct page *,
908 struct f2fs_summary *, struct node_info *, block_t);
909int recover_inode_page(struct f2fs_sb_info *, struct page *);
910int restore_node_summary(struct f2fs_sb_info *, unsigned int,
911 struct f2fs_summary_block *);
912void flush_nat_entries(struct f2fs_sb_info *);
913int build_node_manager(struct f2fs_sb_info *);
914void destroy_node_manager(struct f2fs_sb_info *);
915int create_node_manager_caches(void);
916void destroy_node_manager_caches(void);
917
918/*
919 * segment.c
920 */
921void f2fs_balance_fs(struct f2fs_sb_info *);
922void invalidate_blocks(struct f2fs_sb_info *, block_t);
923void locate_dirty_segment(struct f2fs_sb_info *, unsigned int);
924void clear_prefree_segments(struct f2fs_sb_info *);
925int npages_for_summary_flush(struct f2fs_sb_info *);
926void allocate_new_segments(struct f2fs_sb_info *);
927struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
928struct bio *f2fs_bio_alloc(struct block_device *, int);
929void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool sync);
930int write_meta_page(struct f2fs_sb_info *, struct page *,
931 struct writeback_control *);
932void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int,
933 block_t, block_t *);
934void write_data_page(struct inode *, struct page *, struct dnode_of_data*,
935 block_t, block_t *);
936void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t);
937void recover_data_page(struct f2fs_sb_info *, struct page *,
938 struct f2fs_summary *, block_t, block_t);
939void rewrite_node_page(struct f2fs_sb_info *, struct page *,
940 struct f2fs_summary *, block_t, block_t);
941void write_data_summaries(struct f2fs_sb_info *, block_t);
942void write_node_summaries(struct f2fs_sb_info *, block_t);
943int lookup_journal_in_cursum(struct f2fs_summary_block *,
944 int, unsigned int, int);
945void flush_sit_entries(struct f2fs_sb_info *);
946int build_segment_manager(struct f2fs_sb_info *);
947void reset_victim_segmap(struct f2fs_sb_info *);
948void destroy_segment_manager(struct f2fs_sb_info *);
949
950/*
951 * checkpoint.c
952 */
953struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
954struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
955long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
956int check_orphan_space(struct f2fs_sb_info *);
957void add_orphan_inode(struct f2fs_sb_info *, nid_t);
958void remove_orphan_inode(struct f2fs_sb_info *, nid_t);
959int recover_orphan_inodes(struct f2fs_sb_info *);
960int get_valid_checkpoint(struct f2fs_sb_info *);
961void set_dirty_dir_page(struct inode *, struct page *);
962void remove_dirty_dir_inode(struct inode *);
963void sync_dirty_dir_inodes(struct f2fs_sb_info *);
964void block_operations(struct f2fs_sb_info *);
965void write_checkpoint(struct f2fs_sb_info *, bool, bool);
966void init_orphan_info(struct f2fs_sb_info *);
967int create_checkpoint_caches(void);
968void destroy_checkpoint_caches(void);
969
970/*
971 * data.c
972 */
973int reserve_new_block(struct dnode_of_data *);
974void update_extent_cache(block_t, struct dnode_of_data *);
975struct page *find_data_page(struct inode *, pgoff_t);
976struct page *get_lock_data_page(struct inode *, pgoff_t);
977struct page *get_new_data_page(struct inode *, pgoff_t, bool);
978int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int);
979int do_write_data_page(struct page *);
980
981/*
982 * gc.c
983 */
984int start_gc_thread(struct f2fs_sb_info *);
985void stop_gc_thread(struct f2fs_sb_info *);
986block_t start_bidx_of_node(unsigned int);
987int f2fs_gc(struct f2fs_sb_info *, int);
988void build_gc_manager(struct f2fs_sb_info *);
989int create_gc_caches(void);
990void destroy_gc_caches(void);
991
992/*
993 * recovery.c
994 */
995void recover_fsync_data(struct f2fs_sb_info *);
996bool space_for_roll_forward(struct f2fs_sb_info *);
997
998/*
999 * debug.c
1000 */
1001#ifdef CONFIG_F2FS_STAT_FS
1002struct f2fs_stat_info {
1003 struct list_head stat_list;
1004 struct f2fs_sb_info *sbi;
1005 struct mutex stat_lock;
1006 int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
1007 int main_area_segs, main_area_sections, main_area_zones;
1008 int hit_ext, total_ext;
1009 int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
1010 int nats, sits, fnids;
1011 int total_count, utilization;
1012 int bg_gc;
1013 unsigned int valid_count, valid_node_count, valid_inode_count;
1014 unsigned int bimodal, avg_vblocks;
1015 int util_free, util_valid, util_invalid;
1016 int rsvd_segs, overp_segs;
1017 int dirty_count, node_pages, meta_pages;
1018 int prefree_count, call_count;
1019 int tot_segs, node_segs, data_segs, free_segs, free_secs;
1020 int tot_blks, data_blks, node_blks;
1021 int curseg[NR_CURSEG_TYPE];
1022 int cursec[NR_CURSEG_TYPE];
1023 int curzone[NR_CURSEG_TYPE];
1024
1025 unsigned int segment_count[2];
1026 unsigned int block_count[2];
1027 unsigned base_mem, cache_mem;
1028};
1029
1030#define stat_inc_call_count(si) ((si)->call_count++)
1031
1032#define stat_inc_seg_count(sbi, type) \
1033 do { \
1034 struct f2fs_stat_info *si = sbi->stat_info; \
1035 (si)->tot_segs++; \
1036 if (type == SUM_TYPE_DATA) \
1037 si->data_segs++; \
1038 else \
1039 si->node_segs++; \
1040 } while (0)
1041
1042#define stat_inc_tot_blk_count(si, blks) \
1043 (si->tot_blks += (blks))
1044
1045#define stat_inc_data_blk_count(sbi, blks) \
1046 do { \
1047 struct f2fs_stat_info *si = sbi->stat_info; \
1048 stat_inc_tot_blk_count(si, blks); \
1049 si->data_blks += (blks); \
1050 } while (0)
1051
1052#define stat_inc_node_blk_count(sbi, blks) \
1053 do { \
1054 struct f2fs_stat_info *si = sbi->stat_info; \
1055 stat_inc_tot_blk_count(si, blks); \
1056 si->node_blks += (blks); \
1057 } while (0)
1058
1059int f2fs_build_stats(struct f2fs_sb_info *);
1060void f2fs_destroy_stats(struct f2fs_sb_info *);
1061void destroy_root_stats(void);
1062#else
1063#define stat_inc_call_count(si)
1064#define stat_inc_seg_count(si, type)
1065#define stat_inc_tot_blk_count(si, blks)
1066#define stat_inc_data_blk_count(si, blks)
1067#define stat_inc_node_blk_count(sbi, blks)
1068
1069static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
1070static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
1071static inline void destroy_root_stats(void) { }
1072#endif
1073
1074extern const struct file_operations f2fs_dir_operations;
1075extern const struct file_operations f2fs_file_operations;
1076extern const struct inode_operations f2fs_file_inode_operations;
1077extern const struct address_space_operations f2fs_dblock_aops;
1078extern const struct address_space_operations f2fs_node_aops;
1079extern const struct address_space_operations f2fs_meta_aops;
1080extern const struct inode_operations f2fs_dir_inode_operations;
1081extern const struct inode_operations f2fs_symlink_inode_operations;
1082extern const struct inode_operations f2fs_special_inode_operations;
1083#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
new file mode 100644
index 000000000000..f9e085dfb1f0
--- /dev/null
+++ b/fs/f2fs/file.c
@@ -0,0 +1,636 @@
1/*
2 * fs/f2fs/file.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/stat.h>
14#include <linux/buffer_head.h>
15#include <linux/writeback.h>
16#include <linux/falloc.h>
17#include <linux/types.h>
18#include <linux/uaccess.h>
19#include <linux/mount.h>
20
21#include "f2fs.h"
22#include "node.h"
23#include "segment.h"
24#include "xattr.h"
25#include "acl.h"
26
27static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
28 struct vm_fault *vmf)
29{
30 struct page *page = vmf->page;
31 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
32 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
33 block_t old_blk_addr;
34 struct dnode_of_data dn;
35 int err;
36
37 f2fs_balance_fs(sbi);
38
39 sb_start_pagefault(inode->i_sb);
40
41 mutex_lock_op(sbi, DATA_NEW);
42
43 /* block allocation */
44 set_new_dnode(&dn, inode, NULL, NULL, 0);
45 err = get_dnode_of_data(&dn, page->index, 0);
46 if (err) {
47 mutex_unlock_op(sbi, DATA_NEW);
48 goto out;
49 }
50
51 old_blk_addr = dn.data_blkaddr;
52
53 if (old_blk_addr == NULL_ADDR) {
54 err = reserve_new_block(&dn);
55 if (err) {
56 f2fs_put_dnode(&dn);
57 mutex_unlock_op(sbi, DATA_NEW);
58 goto out;
59 }
60 }
61 f2fs_put_dnode(&dn);
62
63 mutex_unlock_op(sbi, DATA_NEW);
64
65 lock_page(page);
66 if (page->mapping != inode->i_mapping ||
67 page_offset(page) >= i_size_read(inode) ||
68 !PageUptodate(page)) {
69 unlock_page(page);
70 err = -EFAULT;
71 goto out;
72 }
73
74 /*
75 * check to see if the page is mapped already (no holes)
76 */
77 if (PageMappedToDisk(page))
78 goto out;
79
80 /* fill the page */
81 wait_on_page_writeback(page);
82
83 /* page is wholly or partially inside EOF */
84 if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) {
85 unsigned offset;
86 offset = i_size_read(inode) & ~PAGE_CACHE_MASK;
87 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
88 }
89 set_page_dirty(page);
90 SetPageUptodate(page);
91
92 file_update_time(vma->vm_file);
93out:
94 sb_end_pagefault(inode->i_sb);
95 return block_page_mkwrite_return(err);
96}
97
98static const struct vm_operations_struct f2fs_file_vm_ops = {
99 .fault = filemap_fault,
100 .page_mkwrite = f2fs_vm_page_mkwrite,
101};
102
103static int need_to_sync_dir(struct f2fs_sb_info *sbi, struct inode *inode)
104{
105 struct dentry *dentry;
106 nid_t pino;
107
108 inode = igrab(inode);
109 dentry = d_find_any_alias(inode);
110 if (!dentry) {
111 iput(inode);
112 return 0;
113 }
114 pino = dentry->d_parent->d_inode->i_ino;
115 dput(dentry);
116 iput(inode);
117 return !is_checkpointed_node(sbi, pino);
118}
119
120int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
121{
122 struct inode *inode = file->f_mapping->host;
123 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
124 unsigned long long cur_version;
125 int ret = 0;
126 bool need_cp = false;
127 struct writeback_control wbc = {
128 .sync_mode = WB_SYNC_ALL,
129 .nr_to_write = LONG_MAX,
130 .for_reclaim = 0,
131 };
132
133 if (inode->i_sb->s_flags & MS_RDONLY)
134 return 0;
135
136 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
137 if (ret)
138 return ret;
139
140 mutex_lock(&inode->i_mutex);
141
142 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
143 goto out;
144
145 mutex_lock(&sbi->cp_mutex);
146 cur_version = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver);
147 mutex_unlock(&sbi->cp_mutex);
148
149 if (F2FS_I(inode)->data_version != cur_version &&
150 !(inode->i_state & I_DIRTY))
151 goto out;
152 F2FS_I(inode)->data_version--;
153
154 if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
155 need_cp = true;
156 if (is_inode_flag_set(F2FS_I(inode), FI_NEED_CP))
157 need_cp = true;
158 if (!space_for_roll_forward(sbi))
159 need_cp = true;
160 if (need_to_sync_dir(sbi, inode))
161 need_cp = true;
162
163 f2fs_write_inode(inode, NULL);
164
165 if (need_cp) {
166 /* all the dirty node pages should be flushed for POR */
167 ret = f2fs_sync_fs(inode->i_sb, 1);
168 clear_inode_flag(F2FS_I(inode), FI_NEED_CP);
169 } else {
170 while (sync_node_pages(sbi, inode->i_ino, &wbc) == 0)
171 f2fs_write_inode(inode, NULL);
172 filemap_fdatawait_range(sbi->node_inode->i_mapping,
173 0, LONG_MAX);
174 }
175out:
176 mutex_unlock(&inode->i_mutex);
177 return ret;
178}
179
180static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
181{
182 file_accessed(file);
183 vma->vm_ops = &f2fs_file_vm_ops;
184 return 0;
185}
186
187static int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
188{
189 int nr_free = 0, ofs = dn->ofs_in_node;
190 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
191 struct f2fs_node *raw_node;
192 __le32 *addr;
193
194 raw_node = page_address(dn->node_page);
195 addr = blkaddr_in_node(raw_node) + ofs;
196
197 for ( ; count > 0; count--, addr++, dn->ofs_in_node++) {
198 block_t blkaddr = le32_to_cpu(*addr);
199 if (blkaddr == NULL_ADDR)
200 continue;
201
202 update_extent_cache(NULL_ADDR, dn);
203 invalidate_blocks(sbi, blkaddr);
204 dec_valid_block_count(sbi, dn->inode, 1);
205 nr_free++;
206 }
207 if (nr_free) {
208 set_page_dirty(dn->node_page);
209 sync_inode_page(dn);
210 }
211 dn->ofs_in_node = ofs;
212 return nr_free;
213}
214
215void truncate_data_blocks(struct dnode_of_data *dn)
216{
217 truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
218}
219
220static void truncate_partial_data_page(struct inode *inode, u64 from)
221{
222 unsigned offset = from & (PAGE_CACHE_SIZE - 1);
223 struct page *page;
224
225 if (!offset)
226 return;
227
228 page = find_data_page(inode, from >> PAGE_CACHE_SHIFT);
229 if (IS_ERR(page))
230 return;
231
232 lock_page(page);
233 wait_on_page_writeback(page);
234 zero_user(page, offset, PAGE_CACHE_SIZE - offset);
235 set_page_dirty(page);
236 f2fs_put_page(page, 1);
237}
238
239static int truncate_blocks(struct inode *inode, u64 from)
240{
241 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
242 unsigned int blocksize = inode->i_sb->s_blocksize;
243 struct dnode_of_data dn;
244 pgoff_t free_from;
245 int count = 0;
246 int err;
247
248 free_from = (pgoff_t)
249 ((from + blocksize - 1) >> (sbi->log_blocksize));
250
251 mutex_lock_op(sbi, DATA_TRUNC);
252
253 set_new_dnode(&dn, inode, NULL, NULL, 0);
254 err = get_dnode_of_data(&dn, free_from, RDONLY_NODE);
255 if (err) {
256 if (err == -ENOENT)
257 goto free_next;
258 mutex_unlock_op(sbi, DATA_TRUNC);
259 return err;
260 }
261
262 if (IS_INODE(dn.node_page))
263 count = ADDRS_PER_INODE;
264 else
265 count = ADDRS_PER_BLOCK;
266
267 count -= dn.ofs_in_node;
268 BUG_ON(count < 0);
269 if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
270 truncate_data_blocks_range(&dn, count);
271 free_from += count;
272 }
273
274 f2fs_put_dnode(&dn);
275free_next:
276 err = truncate_inode_blocks(inode, free_from);
277 mutex_unlock_op(sbi, DATA_TRUNC);
278
279 /* lastly zero out the first data page */
280 truncate_partial_data_page(inode, from);
281
282 return err;
283}
284
285void f2fs_truncate(struct inode *inode)
286{
287 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
288 S_ISLNK(inode->i_mode)))
289 return;
290
291 if (!truncate_blocks(inode, i_size_read(inode))) {
292 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
293 mark_inode_dirty(inode);
294 }
295
296 f2fs_balance_fs(F2FS_SB(inode->i_sb));
297}
298
299static int f2fs_getattr(struct vfsmount *mnt,
300 struct dentry *dentry, struct kstat *stat)
301{
302 struct inode *inode = dentry->d_inode;
303 generic_fillattr(inode, stat);
304 stat->blocks <<= 3;
305 return 0;
306}
307
308#ifdef CONFIG_F2FS_FS_POSIX_ACL
309static void __setattr_copy(struct inode *inode, const struct iattr *attr)
310{
311 struct f2fs_inode_info *fi = F2FS_I(inode);
312 unsigned int ia_valid = attr->ia_valid;
313
314 if (ia_valid & ATTR_UID)
315 inode->i_uid = attr->ia_uid;
316 if (ia_valid & ATTR_GID)
317 inode->i_gid = attr->ia_gid;
318 if (ia_valid & ATTR_ATIME)
319 inode->i_atime = timespec_trunc(attr->ia_atime,
320 inode->i_sb->s_time_gran);
321 if (ia_valid & ATTR_MTIME)
322 inode->i_mtime = timespec_trunc(attr->ia_mtime,
323 inode->i_sb->s_time_gran);
324 if (ia_valid & ATTR_CTIME)
325 inode->i_ctime = timespec_trunc(attr->ia_ctime,
326 inode->i_sb->s_time_gran);
327 if (ia_valid & ATTR_MODE) {
328 umode_t mode = attr->ia_mode;
329
330 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
331 mode &= ~S_ISGID;
332 set_acl_inode(fi, mode);
333 }
334}
335#else
336#define __setattr_copy setattr_copy
337#endif
338
339int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
340{
341 struct inode *inode = dentry->d_inode;
342 struct f2fs_inode_info *fi = F2FS_I(inode);
343 int err;
344
345 err = inode_change_ok(inode, attr);
346 if (err)
347 return err;
348
349 if ((attr->ia_valid & ATTR_SIZE) &&
350 attr->ia_size != i_size_read(inode)) {
351 truncate_setsize(inode, attr->ia_size);
352 f2fs_truncate(inode);
353 }
354
355 __setattr_copy(inode, attr);
356
357 if (attr->ia_valid & ATTR_MODE) {
358 err = f2fs_acl_chmod(inode);
359 if (err || is_inode_flag_set(fi, FI_ACL_MODE)) {
360 inode->i_mode = fi->i_acl_mode;
361 clear_inode_flag(fi, FI_ACL_MODE);
362 }
363 }
364
365 mark_inode_dirty(inode);
366 return err;
367}
368
369const struct inode_operations f2fs_file_inode_operations = {
370 .getattr = f2fs_getattr,
371 .setattr = f2fs_setattr,
372 .get_acl = f2fs_get_acl,
373#ifdef CONFIG_F2FS_FS_XATTR
374 .setxattr = generic_setxattr,
375 .getxattr = generic_getxattr,
376 .listxattr = f2fs_listxattr,
377 .removexattr = generic_removexattr,
378#endif
379};
380
381static void fill_zero(struct inode *inode, pgoff_t index,
382 loff_t start, loff_t len)
383{
384 struct page *page;
385
386 if (!len)
387 return;
388
389 page = get_new_data_page(inode, index, false);
390
391 if (!IS_ERR(page)) {
392 wait_on_page_writeback(page);
393 zero_user(page, start, len);
394 set_page_dirty(page);
395 f2fs_put_page(page, 1);
396 }
397}
398
399int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
400{
401 pgoff_t index;
402 int err;
403
404 for (index = pg_start; index < pg_end; index++) {
405 struct dnode_of_data dn;
406 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
407
408 mutex_lock_op(sbi, DATA_TRUNC);
409 set_new_dnode(&dn, inode, NULL, NULL, 0);
410 err = get_dnode_of_data(&dn, index, RDONLY_NODE);
411 if (err) {
412 mutex_unlock_op(sbi, DATA_TRUNC);
413 if (err == -ENOENT)
414 continue;
415 return err;
416 }
417
418 if (dn.data_blkaddr != NULL_ADDR)
419 truncate_data_blocks_range(&dn, 1);
420 f2fs_put_dnode(&dn);
421 mutex_unlock_op(sbi, DATA_TRUNC);
422 }
423 return 0;
424}
425
426static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode)
427{
428 pgoff_t pg_start, pg_end;
429 loff_t off_start, off_end;
430 int ret = 0;
431
432 pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
433 pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
434
435 off_start = offset & (PAGE_CACHE_SIZE - 1);
436 off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
437
438 if (pg_start == pg_end) {
439 fill_zero(inode, pg_start, off_start,
440 off_end - off_start);
441 } else {
442 if (off_start)
443 fill_zero(inode, pg_start++, off_start,
444 PAGE_CACHE_SIZE - off_start);
445 if (off_end)
446 fill_zero(inode, pg_end, 0, off_end);
447
448 if (pg_start < pg_end) {
449 struct address_space *mapping = inode->i_mapping;
450 loff_t blk_start, blk_end;
451
452 blk_start = pg_start << PAGE_CACHE_SHIFT;
453 blk_end = pg_end << PAGE_CACHE_SHIFT;
454 truncate_inode_pages_range(mapping, blk_start,
455 blk_end - 1);
456 ret = truncate_hole(inode, pg_start, pg_end);
457 }
458 }
459
460 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
461 i_size_read(inode) <= (offset + len)) {
462 i_size_write(inode, offset);
463 mark_inode_dirty(inode);
464 }
465
466 return ret;
467}
468
469static int expand_inode_data(struct inode *inode, loff_t offset,
470 loff_t len, int mode)
471{
472 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
473 pgoff_t index, pg_start, pg_end;
474 loff_t new_size = i_size_read(inode);
475 loff_t off_start, off_end;
476 int ret = 0;
477
478 ret = inode_newsize_ok(inode, (len + offset));
479 if (ret)
480 return ret;
481
482 pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
483 pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
484
485 off_start = offset & (PAGE_CACHE_SIZE - 1);
486 off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
487
488 for (index = pg_start; index <= pg_end; index++) {
489 struct dnode_of_data dn;
490
491 mutex_lock_op(sbi, DATA_NEW);
492
493 set_new_dnode(&dn, inode, NULL, NULL, 0);
494 ret = get_dnode_of_data(&dn, index, 0);
495 if (ret) {
496 mutex_unlock_op(sbi, DATA_NEW);
497 break;
498 }
499
500 if (dn.data_blkaddr == NULL_ADDR) {
501 ret = reserve_new_block(&dn);
502 if (ret) {
503 f2fs_put_dnode(&dn);
504 mutex_unlock_op(sbi, DATA_NEW);
505 break;
506 }
507 }
508 f2fs_put_dnode(&dn);
509
510 mutex_unlock_op(sbi, DATA_NEW);
511
512 if (pg_start == pg_end)
513 new_size = offset + len;
514 else if (index == pg_start && off_start)
515 new_size = (index + 1) << PAGE_CACHE_SHIFT;
516 else if (index == pg_end)
517 new_size = (index << PAGE_CACHE_SHIFT) + off_end;
518 else
519 new_size += PAGE_CACHE_SIZE;
520 }
521
522 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
523 i_size_read(inode) < new_size) {
524 i_size_write(inode, new_size);
525 mark_inode_dirty(inode);
526 }
527
528 return ret;
529}
530
531static long f2fs_fallocate(struct file *file, int mode,
532 loff_t offset, loff_t len)
533{
534 struct inode *inode = file->f_path.dentry->d_inode;
535 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
536 long ret;
537
538 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
539 return -EOPNOTSUPP;
540
541 if (mode & FALLOC_FL_PUNCH_HOLE)
542 ret = punch_hole(inode, offset, len, mode);
543 else
544 ret = expand_inode_data(inode, offset, len, mode);
545
546 f2fs_balance_fs(sbi);
547 return ret;
548}
549
550#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL))
551#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL)
552
553static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags)
554{
555 if (S_ISDIR(mode))
556 return flags;
557 else if (S_ISREG(mode))
558 return flags & F2FS_REG_FLMASK;
559 else
560 return flags & F2FS_OTHER_FLMASK;
561}
562
563long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
564{
565 struct inode *inode = filp->f_dentry->d_inode;
566 struct f2fs_inode_info *fi = F2FS_I(inode);
567 unsigned int flags;
568 int ret;
569
570 switch (cmd) {
571 case FS_IOC_GETFLAGS:
572 flags = fi->i_flags & FS_FL_USER_VISIBLE;
573 return put_user(flags, (int __user *) arg);
574 case FS_IOC_SETFLAGS:
575 {
576 unsigned int oldflags;
577
578 ret = mnt_want_write(filp->f_path.mnt);
579 if (ret)
580 return ret;
581
582 if (!inode_owner_or_capable(inode)) {
583 ret = -EACCES;
584 goto out;
585 }
586
587 if (get_user(flags, (int __user *) arg)) {
588 ret = -EFAULT;
589 goto out;
590 }
591
592 flags = f2fs_mask_flags(inode->i_mode, flags);
593
594 mutex_lock(&inode->i_mutex);
595
596 oldflags = fi->i_flags;
597
598 if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
599 if (!capable(CAP_LINUX_IMMUTABLE)) {
600 mutex_unlock(&inode->i_mutex);
601 ret = -EPERM;
602 goto out;
603 }
604 }
605
606 flags = flags & FS_FL_USER_MODIFIABLE;
607 flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
608 fi->i_flags = flags;
609 mutex_unlock(&inode->i_mutex);
610
611 f2fs_set_inode_flags(inode);
612 inode->i_ctime = CURRENT_TIME;
613 mark_inode_dirty(inode);
614out:
615 mnt_drop_write(filp->f_path.mnt);
616 return ret;
617 }
618 default:
619 return -ENOTTY;
620 }
621}
622
623const struct file_operations f2fs_file_operations = {
624 .llseek = generic_file_llseek,
625 .read = do_sync_read,
626 .write = do_sync_write,
627 .aio_read = generic_file_aio_read,
628 .aio_write = generic_file_aio_write,
629 .open = generic_file_open,
630 .mmap = f2fs_file_mmap,
631 .fsync = f2fs_sync_file,
632 .fallocate = f2fs_fallocate,
633 .unlocked_ioctl = f2fs_ioctl,
634 .splice_read = generic_file_splice_read,
635 .splice_write = generic_file_splice_write,
636};
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
new file mode 100644
index 000000000000..644aa3808273
--- /dev/null
+++ b/fs/f2fs/gc.c
@@ -0,0 +1,742 @@
1/*
2 * fs/f2fs/gc.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/module.h>
13#include <linux/backing-dev.h>
14#include <linux/proc_fs.h>
15#include <linux/init.h>
16#include <linux/f2fs_fs.h>
17#include <linux/kthread.h>
18#include <linux/delay.h>
19#include <linux/freezer.h>
20#include <linux/blkdev.h>
21
22#include "f2fs.h"
23#include "node.h"
24#include "segment.h"
25#include "gc.h"
26
27static struct kmem_cache *winode_slab;
28
29static int gc_thread_func(void *data)
30{
31 struct f2fs_sb_info *sbi = data;
32 wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
33 long wait_ms;
34
35 wait_ms = GC_THREAD_MIN_SLEEP_TIME;
36
37 do {
38 if (try_to_freeze())
39 continue;
40 else
41 wait_event_interruptible_timeout(*wq,
42 kthread_should_stop(),
43 msecs_to_jiffies(wait_ms));
44 if (kthread_should_stop())
45 break;
46
47 f2fs_balance_fs(sbi);
48
49 if (!test_opt(sbi, BG_GC))
50 continue;
51
52 /*
53 * [GC triggering condition]
54 * 0. GC is not conducted currently.
55 * 1. There are enough dirty segments.
56 * 2. IO subsystem is idle by checking the # of writeback pages.
57 * 3. IO subsystem is idle by checking the # of requests in
58 * bdev's request list.
59 *
60 * Note) We have to avoid triggering GCs too much frequently.
61 * Because it is possible that some segments can be
62 * invalidated soon after by user update or deletion.
63 * So, I'd like to wait some time to collect dirty segments.
64 */
65 if (!mutex_trylock(&sbi->gc_mutex))
66 continue;
67
68 if (!is_idle(sbi)) {
69 wait_ms = increase_sleep_time(wait_ms);
70 mutex_unlock(&sbi->gc_mutex);
71 continue;
72 }
73
74 if (has_enough_invalid_blocks(sbi))
75 wait_ms = decrease_sleep_time(wait_ms);
76 else
77 wait_ms = increase_sleep_time(wait_ms);
78
79 sbi->bg_gc++;
80
81 if (f2fs_gc(sbi, 1) == GC_NONE)
82 wait_ms = GC_THREAD_NOGC_SLEEP_TIME;
83 else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME)
84 wait_ms = GC_THREAD_MAX_SLEEP_TIME;
85
86 } while (!kthread_should_stop());
87 return 0;
88}
89
90int start_gc_thread(struct f2fs_sb_info *sbi)
91{
92 struct f2fs_gc_kthread *gc_th;
93
94 gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
95 if (!gc_th)
96 return -ENOMEM;
97
98 sbi->gc_thread = gc_th;
99 init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
100 sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
101 GC_THREAD_NAME);
102 if (IS_ERR(gc_th->f2fs_gc_task)) {
103 kfree(gc_th);
104 return -ENOMEM;
105 }
106 return 0;
107}
108
109void stop_gc_thread(struct f2fs_sb_info *sbi)
110{
111 struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
112 if (!gc_th)
113 return;
114 kthread_stop(gc_th->f2fs_gc_task);
115 kfree(gc_th);
116 sbi->gc_thread = NULL;
117}
118
119static int select_gc_type(int gc_type)
120{
121 return (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
122}
123
124static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
125 int type, struct victim_sel_policy *p)
126{
127 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
128
129 if (p->alloc_mode) {
130 p->gc_mode = GC_GREEDY;
131 p->dirty_segmap = dirty_i->dirty_segmap[type];
132 p->ofs_unit = 1;
133 } else {
134 p->gc_mode = select_gc_type(gc_type);
135 p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
136 p->ofs_unit = sbi->segs_per_sec;
137 }
138 p->offset = sbi->last_victim[p->gc_mode];
139}
140
141static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
142 struct victim_sel_policy *p)
143{
144 if (p->gc_mode == GC_GREEDY)
145 return (1 << sbi->log_blocks_per_seg) * p->ofs_unit;
146 else if (p->gc_mode == GC_CB)
147 return UINT_MAX;
148 else /* No other gc_mode */
149 return 0;
150}
151
152static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
153{
154 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
155 unsigned int segno;
156
157 /*
158 * If the gc_type is FG_GC, we can select victim segments
159 * selected by background GC before.
160 * Those segments guarantee they have small valid blocks.
161 */
162 segno = find_next_bit(dirty_i->victim_segmap[BG_GC],
163 TOTAL_SEGS(sbi), 0);
164 if (segno < TOTAL_SEGS(sbi)) {
165 clear_bit(segno, dirty_i->victim_segmap[BG_GC]);
166 return segno;
167 }
168 return NULL_SEGNO;
169}
170
171static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
172{
173 struct sit_info *sit_i = SIT_I(sbi);
174 unsigned int secno = GET_SECNO(sbi, segno);
175 unsigned int start = secno * sbi->segs_per_sec;
176 unsigned long long mtime = 0;
177 unsigned int vblocks;
178 unsigned char age = 0;
179 unsigned char u;
180 unsigned int i;
181
182 for (i = 0; i < sbi->segs_per_sec; i++)
183 mtime += get_seg_entry(sbi, start + i)->mtime;
184 vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
185
186 mtime = div_u64(mtime, sbi->segs_per_sec);
187 vblocks = div_u64(vblocks, sbi->segs_per_sec);
188
189 u = (vblocks * 100) >> sbi->log_blocks_per_seg;
190
191 /* Handle if the system time is changed by user */
192 if (mtime < sit_i->min_mtime)
193 sit_i->min_mtime = mtime;
194 if (mtime > sit_i->max_mtime)
195 sit_i->max_mtime = mtime;
196 if (sit_i->max_mtime != sit_i->min_mtime)
197 age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime),
198 sit_i->max_mtime - sit_i->min_mtime);
199
200 return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
201}
202
203static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno,
204 struct victim_sel_policy *p)
205{
206 if (p->alloc_mode == SSR)
207 return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
208
209 /* alloc_mode == LFS */
210 if (p->gc_mode == GC_GREEDY)
211 return get_valid_blocks(sbi, segno, sbi->segs_per_sec);
212 else
213 return get_cb_cost(sbi, segno);
214}
215
216/*
217 * This function is called from two pathes.
218 * One is garbage collection and the other is SSR segment selection.
219 * When it is called during GC, it just gets a victim segment
220 * and it does not remove it from dirty seglist.
221 * When it is called from SSR segment selection, it finds a segment
222 * which has minimum valid blocks and removes it from dirty seglist.
223 */
224static int get_victim_by_default(struct f2fs_sb_info *sbi,
225 unsigned int *result, int gc_type, int type, char alloc_mode)
226{
227 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
228 struct victim_sel_policy p;
229 unsigned int segno;
230 int nsearched = 0;
231
232 p.alloc_mode = alloc_mode;
233 select_policy(sbi, gc_type, type, &p);
234
235 p.min_segno = NULL_SEGNO;
236 p.min_cost = get_max_cost(sbi, &p);
237
238 mutex_lock(&dirty_i->seglist_lock);
239
240 if (p.alloc_mode == LFS && gc_type == FG_GC) {
241 p.min_segno = check_bg_victims(sbi);
242 if (p.min_segno != NULL_SEGNO)
243 goto got_it;
244 }
245
246 while (1) {
247 unsigned long cost;
248
249 segno = find_next_bit(p.dirty_segmap,
250 TOTAL_SEGS(sbi), p.offset);
251 if (segno >= TOTAL_SEGS(sbi)) {
252 if (sbi->last_victim[p.gc_mode]) {
253 sbi->last_victim[p.gc_mode] = 0;
254 p.offset = 0;
255 continue;
256 }
257 break;
258 }
259 p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit;
260
261 if (test_bit(segno, dirty_i->victim_segmap[FG_GC]))
262 continue;
263 if (gc_type == BG_GC &&
264 test_bit(segno, dirty_i->victim_segmap[BG_GC]))
265 continue;
266 if (IS_CURSEC(sbi, GET_SECNO(sbi, segno)))
267 continue;
268
269 cost = get_gc_cost(sbi, segno, &p);
270
271 if (p.min_cost > cost) {
272 p.min_segno = segno;
273 p.min_cost = cost;
274 }
275
276 if (cost == get_max_cost(sbi, &p))
277 continue;
278
279 if (nsearched++ >= MAX_VICTIM_SEARCH) {
280 sbi->last_victim[p.gc_mode] = segno;
281 break;
282 }
283 }
284got_it:
285 if (p.min_segno != NULL_SEGNO) {
286 *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
287 if (p.alloc_mode == LFS) {
288 int i;
289 for (i = 0; i < p.ofs_unit; i++)
290 set_bit(*result + i,
291 dirty_i->victim_segmap[gc_type]);
292 }
293 }
294 mutex_unlock(&dirty_i->seglist_lock);
295
296 return (p.min_segno == NULL_SEGNO) ? 0 : 1;
297}
298
299static const struct victim_selection default_v_ops = {
300 .get_victim = get_victim_by_default,
301};
302
303static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist)
304{
305 struct list_head *this;
306 struct inode_entry *ie;
307
308 list_for_each(this, ilist) {
309 ie = list_entry(this, struct inode_entry, list);
310 if (ie->inode->i_ino == ino)
311 return ie->inode;
312 }
313 return NULL;
314}
315
316static void add_gc_inode(struct inode *inode, struct list_head *ilist)
317{
318 struct list_head *this;
319 struct inode_entry *new_ie, *ie;
320
321 list_for_each(this, ilist) {
322 ie = list_entry(this, struct inode_entry, list);
323 if (ie->inode == inode) {
324 iput(inode);
325 return;
326 }
327 }
328repeat:
329 new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS);
330 if (!new_ie) {
331 cond_resched();
332 goto repeat;
333 }
334 new_ie->inode = inode;
335 list_add_tail(&new_ie->list, ilist);
336}
337
338static void put_gc_inode(struct list_head *ilist)
339{
340 struct inode_entry *ie, *next_ie;
341 list_for_each_entry_safe(ie, next_ie, ilist, list) {
342 iput(ie->inode);
343 list_del(&ie->list);
344 kmem_cache_free(winode_slab, ie);
345 }
346}
347
348static int check_valid_map(struct f2fs_sb_info *sbi,
349 unsigned int segno, int offset)
350{
351 struct sit_info *sit_i = SIT_I(sbi);
352 struct seg_entry *sentry;
353 int ret;
354
355 mutex_lock(&sit_i->sentry_lock);
356 sentry = get_seg_entry(sbi, segno);
357 ret = f2fs_test_bit(offset, sentry->cur_valid_map);
358 mutex_unlock(&sit_i->sentry_lock);
359 return ret ? GC_OK : GC_NEXT;
360}
361
362/*
363 * This function compares node address got in summary with that in NAT.
364 * On validity, copy that node with cold status, otherwise (invalid node)
365 * ignore that.
366 */
367static int gc_node_segment(struct f2fs_sb_info *sbi,
368 struct f2fs_summary *sum, unsigned int segno, int gc_type)
369{
370 bool initial = true;
371 struct f2fs_summary *entry;
372 int off;
373
374next_step:
375 entry = sum;
376 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
377 nid_t nid = le32_to_cpu(entry->nid);
378 struct page *node_page;
379 int err;
380
381 /*
382 * It makes sure that free segments are able to write
383 * all the dirty node pages before CP after this CP.
384 * So let's check the space of dirty node pages.
385 */
386 if (should_do_checkpoint(sbi)) {
387 mutex_lock(&sbi->cp_mutex);
388 block_operations(sbi);
389 return GC_BLOCKED;
390 }
391
392 err = check_valid_map(sbi, segno, off);
393 if (err == GC_ERROR)
394 return err;
395 else if (err == GC_NEXT)
396 continue;
397
398 if (initial) {
399 ra_node_page(sbi, nid);
400 continue;
401 }
402 node_page = get_node_page(sbi, nid);
403 if (IS_ERR(node_page))
404 continue;
405
406 /* set page dirty and write it */
407 if (!PageWriteback(node_page))
408 set_page_dirty(node_page);
409 f2fs_put_page(node_page, 1);
410 stat_inc_node_blk_count(sbi, 1);
411 }
412 if (initial) {
413 initial = false;
414 goto next_step;
415 }
416
417 if (gc_type == FG_GC) {
418 struct writeback_control wbc = {
419 .sync_mode = WB_SYNC_ALL,
420 .nr_to_write = LONG_MAX,
421 .for_reclaim = 0,
422 };
423 sync_node_pages(sbi, 0, &wbc);
424 }
425 return GC_DONE;
426}
427
428/*
429 * Calculate start block index that this node page contains
430 */
431block_t start_bidx_of_node(unsigned int node_ofs)
432{
433 block_t start_bidx;
434 unsigned int bidx, indirect_blks;
435 int dec;
436
437 indirect_blks = 2 * NIDS_PER_BLOCK + 4;
438
439 start_bidx = 1;
440 if (node_ofs == 0) {
441 start_bidx = 0;
442 } else if (node_ofs <= 2) {
443 bidx = node_ofs - 1;
444 } else if (node_ofs <= indirect_blks) {
445 dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1);
446 bidx = node_ofs - 2 - dec;
447 } else {
448 dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
449 bidx = node_ofs - 5 - dec;
450 }
451
452 if (start_bidx)
453 start_bidx = bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE;
454 return start_bidx;
455}
456
457static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
458 struct node_info *dni, block_t blkaddr, unsigned int *nofs)
459{
460 struct page *node_page;
461 nid_t nid;
462 unsigned int ofs_in_node;
463 block_t source_blkaddr;
464
465 nid = le32_to_cpu(sum->nid);
466 ofs_in_node = le16_to_cpu(sum->ofs_in_node);
467
468 node_page = get_node_page(sbi, nid);
469 if (IS_ERR(node_page))
470 return GC_NEXT;
471
472 get_node_info(sbi, nid, dni);
473
474 if (sum->version != dni->version) {
475 f2fs_put_page(node_page, 1);
476 return GC_NEXT;
477 }
478
479 *nofs = ofs_of_node(node_page);
480 source_blkaddr = datablock_addr(node_page, ofs_in_node);
481 f2fs_put_page(node_page, 1);
482
483 if (source_blkaddr != blkaddr)
484 return GC_NEXT;
485 return GC_OK;
486}
487
488static void move_data_page(struct inode *inode, struct page *page, int gc_type)
489{
490 if (page->mapping != inode->i_mapping)
491 goto out;
492
493 if (inode != page->mapping->host)
494 goto out;
495
496 if (PageWriteback(page))
497 goto out;
498
499 if (gc_type == BG_GC) {
500 set_page_dirty(page);
501 set_cold_data(page);
502 } else {
503 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
504 mutex_lock_op(sbi, DATA_WRITE);
505 if (clear_page_dirty_for_io(page) &&
506 S_ISDIR(inode->i_mode)) {
507 dec_page_count(sbi, F2FS_DIRTY_DENTS);
508 inode_dec_dirty_dents(inode);
509 }
510 set_cold_data(page);
511 do_write_data_page(page);
512 mutex_unlock_op(sbi, DATA_WRITE);
513 clear_cold_data(page);
514 }
515out:
516 f2fs_put_page(page, 1);
517}
518
519/*
520 * This function tries to get parent node of victim data block, and identifies
521 * data block validity. If the block is valid, copy that with cold status and
522 * modify parent node.
523 * If the parent node is not valid or the data block address is different,
524 * the victim data block is ignored.
525 */
526static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
527 struct list_head *ilist, unsigned int segno, int gc_type)
528{
529 struct super_block *sb = sbi->sb;
530 struct f2fs_summary *entry;
531 block_t start_addr;
532 int err, off;
533 int phase = 0;
534
535 start_addr = START_BLOCK(sbi, segno);
536
537next_step:
538 entry = sum;
539 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
540 struct page *data_page;
541 struct inode *inode;
542 struct node_info dni; /* dnode info for the data */
543 unsigned int ofs_in_node, nofs;
544 block_t start_bidx;
545
546 /*
547 * It makes sure that free segments are able to write
548 * all the dirty node pages before CP after this CP.
549 * So let's check the space of dirty node pages.
550 */
551 if (should_do_checkpoint(sbi)) {
552 mutex_lock(&sbi->cp_mutex);
553 block_operations(sbi);
554 err = GC_BLOCKED;
555 goto stop;
556 }
557
558 err = check_valid_map(sbi, segno, off);
559 if (err == GC_ERROR)
560 goto stop;
561 else if (err == GC_NEXT)
562 continue;
563
564 if (phase == 0) {
565 ra_node_page(sbi, le32_to_cpu(entry->nid));
566 continue;
567 }
568
569 /* Get an inode by ino with checking validity */
570 err = check_dnode(sbi, entry, &dni, start_addr + off, &nofs);
571 if (err == GC_ERROR)
572 goto stop;
573 else if (err == GC_NEXT)
574 continue;
575
576 if (phase == 1) {
577 ra_node_page(sbi, dni.ino);
578 continue;
579 }
580
581 start_bidx = start_bidx_of_node(nofs);
582 ofs_in_node = le16_to_cpu(entry->ofs_in_node);
583
584 if (phase == 2) {
585 inode = f2fs_iget_nowait(sb, dni.ino);
586 if (IS_ERR(inode))
587 continue;
588
589 data_page = find_data_page(inode,
590 start_bidx + ofs_in_node);
591 if (IS_ERR(data_page))
592 goto next_iput;
593
594 f2fs_put_page(data_page, 0);
595 add_gc_inode(inode, ilist);
596 } else {
597 inode = find_gc_inode(dni.ino, ilist);
598 if (inode) {
599 data_page = get_lock_data_page(inode,
600 start_bidx + ofs_in_node);
601 if (IS_ERR(data_page))
602 continue;
603 move_data_page(inode, data_page, gc_type);
604 stat_inc_data_blk_count(sbi, 1);
605 }
606 }
607 continue;
608next_iput:
609 iput(inode);
610 }
611 if (++phase < 4)
612 goto next_step;
613 err = GC_DONE;
614stop:
615 if (gc_type == FG_GC)
616 f2fs_submit_bio(sbi, DATA, true);
617 return err;
618}
619
620static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
621 int gc_type, int type)
622{
623 struct sit_info *sit_i = SIT_I(sbi);
624 int ret;
625 mutex_lock(&sit_i->sentry_lock);
626 ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, type, LFS);
627 mutex_unlock(&sit_i->sentry_lock);
628 return ret;
629}
630
631static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
632 struct list_head *ilist, int gc_type)
633{
634 struct page *sum_page;
635 struct f2fs_summary_block *sum;
636 int ret = GC_DONE;
637
638 /* read segment summary of victim */
639 sum_page = get_sum_page(sbi, segno);
640 if (IS_ERR(sum_page))
641 return GC_ERROR;
642
643 /*
644 * CP needs to lock sum_page. In this time, we don't need
645 * to lock this page, because this summary page is not gone anywhere.
646 * Also, this page is not gonna be updated before GC is done.
647 */
648 unlock_page(sum_page);
649 sum = page_address(sum_page);
650
651 switch (GET_SUM_TYPE((&sum->footer))) {
652 case SUM_TYPE_NODE:
653 ret = gc_node_segment(sbi, sum->entries, segno, gc_type);
654 break;
655 case SUM_TYPE_DATA:
656 ret = gc_data_segment(sbi, sum->entries, ilist, segno, gc_type);
657 break;
658 }
659 stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)));
660 stat_inc_call_count(sbi->stat_info);
661
662 f2fs_put_page(sum_page, 0);
663 return ret;
664}
665
666int f2fs_gc(struct f2fs_sb_info *sbi, int nGC)
667{
668 unsigned int segno;
669 int old_free_secs, cur_free_secs;
670 int gc_status, nfree;
671 struct list_head ilist;
672 int gc_type = BG_GC;
673
674 INIT_LIST_HEAD(&ilist);
675gc_more:
676 nfree = 0;
677 gc_status = GC_NONE;
678
679 if (has_not_enough_free_secs(sbi))
680 old_free_secs = reserved_sections(sbi);
681 else
682 old_free_secs = free_sections(sbi);
683
684 while (sbi->sb->s_flags & MS_ACTIVE) {
685 int i;
686 if (has_not_enough_free_secs(sbi))
687 gc_type = FG_GC;
688
689 cur_free_secs = free_sections(sbi) + nfree;
690
691 /* We got free space successfully. */
692 if (nGC < cur_free_secs - old_free_secs)
693 break;
694
695 if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
696 break;
697
698 for (i = 0; i < sbi->segs_per_sec; i++) {
699 /*
700 * do_garbage_collect will give us three gc_status:
701 * GC_ERROR, GC_DONE, and GC_BLOCKED.
702 * If GC is finished uncleanly, we have to return
703 * the victim to dirty segment list.
704 */
705 gc_status = do_garbage_collect(sbi, segno + i,
706 &ilist, gc_type);
707 if (gc_status != GC_DONE)
708 goto stop;
709 nfree++;
710 }
711 }
712stop:
713 if (has_not_enough_free_secs(sbi) || gc_status == GC_BLOCKED) {
714 write_checkpoint(sbi, (gc_status == GC_BLOCKED), false);
715 if (nfree)
716 goto gc_more;
717 }
718 mutex_unlock(&sbi->gc_mutex);
719
720 put_gc_inode(&ilist);
721 BUG_ON(!list_empty(&ilist));
722 return gc_status;
723}
724
725void build_gc_manager(struct f2fs_sb_info *sbi)
726{
727 DIRTY_I(sbi)->v_ops = &default_v_ops;
728}
729
730int create_gc_caches(void)
731{
732 winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes",
733 sizeof(struct inode_entry), NULL);
734 if (!winode_slab)
735 return -ENOMEM;
736 return 0;
737}
738
739void destroy_gc_caches(void)
740{
741 kmem_cache_destroy(winode_slab);
742}
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
new file mode 100644
index 000000000000..b026d9354ccd
--- /dev/null
+++ b/fs/f2fs/gc.h
@@ -0,0 +1,117 @@
1/*
2 * fs/f2fs/gc.h
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#define GC_THREAD_NAME "f2fs_gc_task"
12#define GC_THREAD_MIN_WB_PAGES 1 /*
13 * a threshold to determine
14 * whether IO subsystem is idle
15 * or not
16 */
17#define GC_THREAD_MIN_SLEEP_TIME 10000 /* milliseconds */
18#define GC_THREAD_MAX_SLEEP_TIME 30000
19#define GC_THREAD_NOGC_SLEEP_TIME 10000
20#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */
21#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
22
23/* Search max. number of dirty segments to select a victim segment */
24#define MAX_VICTIM_SEARCH 20
25
26enum {
27 GC_NONE = 0,
28 GC_ERROR,
29 GC_OK,
30 GC_NEXT,
31 GC_BLOCKED,
32 GC_DONE,
33};
34
35struct f2fs_gc_kthread {
36 struct task_struct *f2fs_gc_task;
37 wait_queue_head_t gc_wait_queue_head;
38};
39
40struct inode_entry {
41 struct list_head list;
42 struct inode *inode;
43};
44
45/*
46 * inline functions
47 */
48static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)
49{
50 if (free_segments(sbi) < overprovision_segments(sbi))
51 return 0;
52 else
53 return (free_segments(sbi) - overprovision_segments(sbi))
54 << sbi->log_blocks_per_seg;
55}
56
57static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi)
58{
59 return (long)(sbi->user_block_count * LIMIT_INVALID_BLOCK) / 100;
60}
61
62static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi)
63{
64 block_t reclaimable_user_blocks = sbi->user_block_count -
65 written_block_count(sbi);
66 return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100;
67}
68
69static inline long increase_sleep_time(long wait)
70{
71 wait += GC_THREAD_MIN_SLEEP_TIME;
72 if (wait > GC_THREAD_MAX_SLEEP_TIME)
73 wait = GC_THREAD_MAX_SLEEP_TIME;
74 return wait;
75}
76
77static inline long decrease_sleep_time(long wait)
78{
79 wait -= GC_THREAD_MIN_SLEEP_TIME;
80 if (wait <= GC_THREAD_MIN_SLEEP_TIME)
81 wait = GC_THREAD_MIN_SLEEP_TIME;
82 return wait;
83}
84
85static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
86{
87 block_t invalid_user_blocks = sbi->user_block_count -
88 written_block_count(sbi);
89 /*
90 * Background GC is triggered with the following condition.
91 * 1. There are a number of invalid blocks.
92 * 2. There is not enough free space.
93 */
94 if (invalid_user_blocks > limit_invalid_user_blocks(sbi) &&
95 free_user_blocks(sbi) < limit_free_user_blocks(sbi))
96 return true;
97 return false;
98}
99
100static inline int is_idle(struct f2fs_sb_info *sbi)
101{
102 struct block_device *bdev = sbi->sb->s_bdev;
103 struct request_queue *q = bdev_get_queue(bdev);
104 struct request_list *rl = &q->root_rl;
105 return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]);
106}
107
108static inline bool should_do_checkpoint(struct f2fs_sb_info *sbi)
109{
110 unsigned int pages_per_sec = sbi->segs_per_sec *
111 (1 << sbi->log_blocks_per_seg);
112 int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1)
113 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
114 int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1)
115 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
116 return free_sections(sbi) <= (node_secs + 2 * dent_secs + 2);
117}
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
new file mode 100644
index 000000000000..a60f04200f8b
--- /dev/null
+++ b/fs/f2fs/hash.c
@@ -0,0 +1,97 @@
1/*
2 * fs/f2fs/hash.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * Portions of this code from linux/fs/ext3/hash.c
8 *
9 * Copyright (C) 2002 by Theodore Ts'o
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 */
15#include <linux/types.h>
16#include <linux/fs.h>
17#include <linux/f2fs_fs.h>
18#include <linux/cryptohash.h>
19#include <linux/pagemap.h>
20
21#include "f2fs.h"
22
23/*
24 * Hashing code copied from ext3
25 */
26#define DELTA 0x9E3779B9
27
28static void TEA_transform(unsigned int buf[4], unsigned int const in[])
29{
30 __u32 sum = 0;
31 __u32 b0 = buf[0], b1 = buf[1];
32 __u32 a = in[0], b = in[1], c = in[2], d = in[3];
33 int n = 16;
34
35 do {
36 sum += DELTA;
37 b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
38 b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
39 } while (--n);
40
41 buf[0] += b0;
42 buf[1] += b1;
43}
44
45static void str2hashbuf(const char *msg, int len, unsigned int *buf, int num)
46{
47 unsigned pad, val;
48 int i;
49
50 pad = (__u32)len | ((__u32)len << 8);
51 pad |= pad << 16;
52
53 val = pad;
54 if (len > num * 4)
55 len = num * 4;
56 for (i = 0; i < len; i++) {
57 if ((i % 4) == 0)
58 val = pad;
59 val = msg[i] + (val << 8);
60 if ((i % 4) == 3) {
61 *buf++ = val;
62 val = pad;
63 num--;
64 }
65 }
66 if (--num >= 0)
67 *buf++ = val;
68 while (--num >= 0)
69 *buf++ = pad;
70}
71
72f2fs_hash_t f2fs_dentry_hash(const char *name, int len)
73{
74 __u32 hash, minor_hash;
75 f2fs_hash_t f2fs_hash;
76 const char *p;
77 __u32 in[8], buf[4];
78
79 /* Initialize the default seed for the hash checksum functions */
80 buf[0] = 0x67452301;
81 buf[1] = 0xefcdab89;
82 buf[2] = 0x98badcfe;
83 buf[3] = 0x10325476;
84
85 p = name;
86 while (len > 0) {
87 str2hashbuf(p, len, in, 4);
88 TEA_transform(buf, in);
89 len -= 16;
90 p += 16;
91 }
92 hash = buf[0];
93 minor_hash = buf[1];
94
95 f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT);
96 return f2fs_hash;
97}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
new file mode 100644
index 000000000000..df5fb381ebf1
--- /dev/null
+++ b/fs/f2fs/inode.c
@@ -0,0 +1,268 @@
1/*
2 * fs/f2fs/inode.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/buffer_head.h>
14#include <linux/writeback.h>
15
16#include "f2fs.h"
17#include "node.h"
18
19struct f2fs_iget_args {
20 u64 ino;
21 int on_free;
22};
23
24void f2fs_set_inode_flags(struct inode *inode)
25{
26 unsigned int flags = F2FS_I(inode)->i_flags;
27
28 inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE |
29 S_NOATIME | S_DIRSYNC);
30
31 if (flags & FS_SYNC_FL)
32 inode->i_flags |= S_SYNC;
33 if (flags & FS_APPEND_FL)
34 inode->i_flags |= S_APPEND;
35 if (flags & FS_IMMUTABLE_FL)
36 inode->i_flags |= S_IMMUTABLE;
37 if (flags & FS_NOATIME_FL)
38 inode->i_flags |= S_NOATIME;
39 if (flags & FS_DIRSYNC_FL)
40 inode->i_flags |= S_DIRSYNC;
41}
42
43static int f2fs_iget_test(struct inode *inode, void *data)
44{
45 struct f2fs_iget_args *args = data;
46
47 if (inode->i_ino != args->ino)
48 return 0;
49 if (inode->i_state & (I_FREEING | I_WILL_FREE)) {
50 args->on_free = 1;
51 return 0;
52 }
53 return 1;
54}
55
56struct inode *f2fs_iget_nowait(struct super_block *sb, unsigned long ino)
57{
58 struct f2fs_iget_args args = {
59 .ino = ino,
60 .on_free = 0
61 };
62 struct inode *inode = ilookup5(sb, ino, f2fs_iget_test, &args);
63
64 if (inode)
65 return inode;
66 if (!args.on_free)
67 return f2fs_iget(sb, ino);
68 return ERR_PTR(-ENOENT);
69}
70
71static int do_read_inode(struct inode *inode)
72{
73 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
74 struct f2fs_inode_info *fi = F2FS_I(inode);
75 struct page *node_page;
76 struct f2fs_node *rn;
77 struct f2fs_inode *ri;
78
79 /* Check if ino is within scope */
80 check_nid_range(sbi, inode->i_ino);
81
82 node_page = get_node_page(sbi, inode->i_ino);
83 if (IS_ERR(node_page))
84 return PTR_ERR(node_page);
85
86 rn = page_address(node_page);
87 ri = &(rn->i);
88
89 inode->i_mode = le16_to_cpu(ri->i_mode);
90 i_uid_write(inode, le32_to_cpu(ri->i_uid));
91 i_gid_write(inode, le32_to_cpu(ri->i_gid));
92 set_nlink(inode, le32_to_cpu(ri->i_links));
93 inode->i_size = le64_to_cpu(ri->i_size);
94 inode->i_blocks = le64_to_cpu(ri->i_blocks);
95
96 inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime);
97 inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime);
98 inode->i_mtime.tv_sec = le64_to_cpu(ri->i_mtime);
99 inode->i_atime.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
100 inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
101 inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
102 inode->i_generation = le32_to_cpu(ri->i_generation);
103
104 fi->i_current_depth = le32_to_cpu(ri->i_current_depth);
105 fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
106 fi->i_flags = le32_to_cpu(ri->i_flags);
107 fi->flags = 0;
108 fi->data_version = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver) - 1;
109 fi->i_advise = ri->i_advise;
110 fi->i_pino = le32_to_cpu(ri->i_pino);
111 get_extent_info(&fi->ext, ri->i_ext);
112 f2fs_put_page(node_page, 1);
113 return 0;
114}
115
116struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
117{
118 struct f2fs_sb_info *sbi = F2FS_SB(sb);
119 struct inode *inode;
120 int ret;
121
122 inode = iget_locked(sb, ino);
123 if (!inode)
124 return ERR_PTR(-ENOMEM);
125 if (!(inode->i_state & I_NEW))
126 return inode;
127 if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi))
128 goto make_now;
129
130 ret = do_read_inode(inode);
131 if (ret)
132 goto bad_inode;
133
134 if (!sbi->por_doing && inode->i_nlink == 0) {
135 ret = -ENOENT;
136 goto bad_inode;
137 }
138
139make_now:
140 if (ino == F2FS_NODE_INO(sbi)) {
141 inode->i_mapping->a_ops = &f2fs_node_aops;
142 mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
143 } else if (ino == F2FS_META_INO(sbi)) {
144 inode->i_mapping->a_ops = &f2fs_meta_aops;
145 mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
146 } else if (S_ISREG(inode->i_mode)) {
147 inode->i_op = &f2fs_file_inode_operations;
148 inode->i_fop = &f2fs_file_operations;
149 inode->i_mapping->a_ops = &f2fs_dblock_aops;
150 } else if (S_ISDIR(inode->i_mode)) {
151 inode->i_op = &f2fs_dir_inode_operations;
152 inode->i_fop = &f2fs_dir_operations;
153 inode->i_mapping->a_ops = &f2fs_dblock_aops;
154 mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER_MOVABLE |
155 __GFP_ZERO);
156 } else if (S_ISLNK(inode->i_mode)) {
157 inode->i_op = &f2fs_symlink_inode_operations;
158 inode->i_mapping->a_ops = &f2fs_dblock_aops;
159 } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
160 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
161 inode->i_op = &f2fs_special_inode_operations;
162 init_special_inode(inode, inode->i_mode, inode->i_rdev);
163 } else {
164 ret = -EIO;
165 goto bad_inode;
166 }
167 unlock_new_inode(inode);
168
169 return inode;
170
171bad_inode:
172 iget_failed(inode);
173 return ERR_PTR(ret);
174}
175
176void update_inode(struct inode *inode, struct page *node_page)
177{
178 struct f2fs_node *rn;
179 struct f2fs_inode *ri;
180
181 wait_on_page_writeback(node_page);
182
183 rn = page_address(node_page);
184 ri = &(rn->i);
185
186 ri->i_mode = cpu_to_le16(inode->i_mode);
187 ri->i_advise = F2FS_I(inode)->i_advise;
188 ri->i_uid = cpu_to_le32(i_uid_read(inode));
189 ri->i_gid = cpu_to_le32(i_gid_read(inode));
190 ri->i_links = cpu_to_le32(inode->i_nlink);
191 ri->i_size = cpu_to_le64(i_size_read(inode));
192 ri->i_blocks = cpu_to_le64(inode->i_blocks);
193 set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext);
194
195 ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
196 ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
197 ri->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
198 ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
199 ri->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
200 ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
201 ri->i_current_depth = cpu_to_le32(F2FS_I(inode)->i_current_depth);
202 ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid);
203 ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
204 ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
205 ri->i_generation = cpu_to_le32(inode->i_generation);
206 set_page_dirty(node_page);
207}
208
209int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
210{
211 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
212 struct page *node_page;
213 bool need_lock = false;
214
215 if (inode->i_ino == F2FS_NODE_INO(sbi) ||
216 inode->i_ino == F2FS_META_INO(sbi))
217 return 0;
218
219 node_page = get_node_page(sbi, inode->i_ino);
220 if (IS_ERR(node_page))
221 return PTR_ERR(node_page);
222
223 if (!PageDirty(node_page)) {
224 need_lock = true;
225 f2fs_put_page(node_page, 1);
226 mutex_lock(&sbi->write_inode);
227 node_page = get_node_page(sbi, inode->i_ino);
228 if (IS_ERR(node_page)) {
229 mutex_unlock(&sbi->write_inode);
230 return PTR_ERR(node_page);
231 }
232 }
233 update_inode(inode, node_page);
234 f2fs_put_page(node_page, 1);
235 if (need_lock)
236 mutex_unlock(&sbi->write_inode);
237 return 0;
238}
239
240/*
241 * Called at the last iput() if i_nlink is zero
242 */
243void f2fs_evict_inode(struct inode *inode)
244{
245 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
246
247 truncate_inode_pages(&inode->i_data, 0);
248
249 if (inode->i_ino == F2FS_NODE_INO(sbi) ||
250 inode->i_ino == F2FS_META_INO(sbi))
251 goto no_delete;
252
253 BUG_ON(atomic_read(&F2FS_I(inode)->dirty_dents));
254 remove_dirty_dir_inode(inode);
255
256 if (inode->i_nlink || is_bad_inode(inode))
257 goto no_delete;
258
259 set_inode_flag(F2FS_I(inode), FI_NO_ALLOC);
260 i_size_write(inode, 0);
261
262 if (F2FS_HAS_BLOCKS(inode))
263 f2fs_truncate(inode);
264
265 remove_inode_page(inode);
266no_delete:
267 clear_inode(inode);
268}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
new file mode 100644
index 000000000000..89b7675dc377
--- /dev/null
+++ b/fs/f2fs/namei.c
@@ -0,0 +1,503 @@
1/*
2 * fs/f2fs/namei.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/pagemap.h>
14#include <linux/sched.h>
15#include <linux/ctype.h>
16
17#include "f2fs.h"
18#include "xattr.h"
19#include "acl.h"
20
21static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
22{
23 struct super_block *sb = dir->i_sb;
24 struct f2fs_sb_info *sbi = F2FS_SB(sb);
25 nid_t ino;
26 struct inode *inode;
27 bool nid_free = false;
28 int err;
29
30 inode = new_inode(sb);
31 if (!inode)
32 return ERR_PTR(-ENOMEM);
33
34 mutex_lock_op(sbi, NODE_NEW);
35 if (!alloc_nid(sbi, &ino)) {
36 mutex_unlock_op(sbi, NODE_NEW);
37 err = -ENOSPC;
38 goto fail;
39 }
40 mutex_unlock_op(sbi, NODE_NEW);
41
42 inode->i_uid = current_fsuid();
43
44 if (dir->i_mode & S_ISGID) {
45 inode->i_gid = dir->i_gid;
46 if (S_ISDIR(mode))
47 mode |= S_ISGID;
48 } else {
49 inode->i_gid = current_fsgid();
50 }
51
52 inode->i_ino = ino;
53 inode->i_mode = mode;
54 inode->i_blocks = 0;
55 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
56 inode->i_generation = sbi->s_next_generation++;
57
58 err = insert_inode_locked(inode);
59 if (err) {
60 err = -EINVAL;
61 nid_free = true;
62 goto out;
63 }
64
65 mark_inode_dirty(inode);
66 return inode;
67
68out:
69 clear_nlink(inode);
70 unlock_new_inode(inode);
71fail:
72 iput(inode);
73 if (nid_free)
74 alloc_nid_failed(sbi, ino);
75 return ERR_PTR(err);
76}
77
78static int is_multimedia_file(const unsigned char *s, const char *sub)
79{
80 int slen = strlen(s);
81 int sublen = strlen(sub);
82 int ret;
83
84 if (sublen > slen)
85 return 1;
86
87 ret = memcmp(s + slen - sublen, sub, sublen);
88 if (ret) { /* compare upper case */
89 int i;
90 char upper_sub[8];
91 for (i = 0; i < sublen && i < sizeof(upper_sub); i++)
92 upper_sub[i] = toupper(sub[i]);
93 return memcmp(s + slen - sublen, upper_sub, sublen);
94 }
95
96 return ret;
97}
98
99/*
100 * Set multimedia files as cold files for hot/cold data separation
101 */
102static inline void set_cold_file(struct f2fs_sb_info *sbi, struct inode *inode,
103 const unsigned char *name)
104{
105 int i;
106 __u8 (*extlist)[8] = sbi->raw_super->extension_list;
107
108 int count = le32_to_cpu(sbi->raw_super->extension_count);
109 for (i = 0; i < count; i++) {
110 if (!is_multimedia_file(name, extlist[i])) {
111 F2FS_I(inode)->i_advise |= FADVISE_COLD_BIT;
112 break;
113 }
114 }
115}
116
117static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
118 bool excl)
119{
120 struct super_block *sb = dir->i_sb;
121 struct f2fs_sb_info *sbi = F2FS_SB(sb);
122 struct inode *inode;
123 nid_t ino = 0;
124 int err;
125
126 inode = f2fs_new_inode(dir, mode);
127 if (IS_ERR(inode))
128 return PTR_ERR(inode);
129
130 if (!test_opt(sbi, DISABLE_EXT_IDENTIFY))
131 set_cold_file(sbi, inode, dentry->d_name.name);
132
133 inode->i_op = &f2fs_file_inode_operations;
134 inode->i_fop = &f2fs_file_operations;
135 inode->i_mapping->a_ops = &f2fs_dblock_aops;
136 ino = inode->i_ino;
137
138 err = f2fs_add_link(dentry, inode);
139 if (err)
140 goto out;
141
142 alloc_nid_done(sbi, ino);
143
144 if (!sbi->por_doing)
145 d_instantiate(dentry, inode);
146 unlock_new_inode(inode);
147
148 f2fs_balance_fs(sbi);
149 return 0;
150out:
151 clear_nlink(inode);
152 unlock_new_inode(inode);
153 iput(inode);
154 alloc_nid_failed(sbi, ino);
155 return err;
156}
157
158static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
159 struct dentry *dentry)
160{
161 struct inode *inode = old_dentry->d_inode;
162 struct super_block *sb = dir->i_sb;
163 struct f2fs_sb_info *sbi = F2FS_SB(sb);
164 int err;
165
166 inode->i_ctime = CURRENT_TIME;
167 atomic_inc(&inode->i_count);
168
169 set_inode_flag(F2FS_I(inode), FI_INC_LINK);
170 err = f2fs_add_link(dentry, inode);
171 if (err)
172 goto out;
173
174 d_instantiate(dentry, inode);
175
176 f2fs_balance_fs(sbi);
177 return 0;
178out:
179 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
180 iput(inode);
181 return err;
182}
183
184struct dentry *f2fs_get_parent(struct dentry *child)
185{
186 struct qstr dotdot = QSTR_INIT("..", 2);
187 unsigned long ino = f2fs_inode_by_name(child->d_inode, &dotdot);
188 if (!ino)
189 return ERR_PTR(-ENOENT);
190 return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino));
191}
192
193static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
194 unsigned int flags)
195{
196 struct inode *inode = NULL;
197 struct f2fs_dir_entry *de;
198 struct page *page;
199
200 if (dentry->d_name.len > F2FS_MAX_NAME_LEN)
201 return ERR_PTR(-ENAMETOOLONG);
202
203 de = f2fs_find_entry(dir, &dentry->d_name, &page);
204 if (de) {
205 nid_t ino = le32_to_cpu(de->ino);
206 kunmap(page);
207 f2fs_put_page(page, 0);
208
209 inode = f2fs_iget(dir->i_sb, ino);
210 if (IS_ERR(inode))
211 return ERR_CAST(inode);
212 }
213
214 return d_splice_alias(inode, dentry);
215}
216
217static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
218{
219 struct super_block *sb = dir->i_sb;
220 struct f2fs_sb_info *sbi = F2FS_SB(sb);
221 struct inode *inode = dentry->d_inode;
222 struct f2fs_dir_entry *de;
223 struct page *page;
224 int err = -ENOENT;
225
226 de = f2fs_find_entry(dir, &dentry->d_name, &page);
227 if (!de)
228 goto fail;
229
230 err = check_orphan_space(sbi);
231 if (err) {
232 kunmap(page);
233 f2fs_put_page(page, 0);
234 goto fail;
235 }
236
237 f2fs_delete_entry(de, page, inode);
238
239 /* In order to evict this inode, we set it dirty */
240 mark_inode_dirty(inode);
241 f2fs_balance_fs(sbi);
242fail:
243 return err;
244}
245
246static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
247 const char *symname)
248{
249 struct super_block *sb = dir->i_sb;
250 struct f2fs_sb_info *sbi = F2FS_SB(sb);
251 struct inode *inode;
252 unsigned symlen = strlen(symname) + 1;
253 int err;
254
255 inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
256 if (IS_ERR(inode))
257 return PTR_ERR(inode);
258
259 inode->i_op = &f2fs_symlink_inode_operations;
260 inode->i_mapping->a_ops = &f2fs_dblock_aops;
261
262 err = f2fs_add_link(dentry, inode);
263 if (err)
264 goto out;
265
266 err = page_symlink(inode, symname, symlen);
267 alloc_nid_done(sbi, inode->i_ino);
268
269 d_instantiate(dentry, inode);
270 unlock_new_inode(inode);
271
272 f2fs_balance_fs(sbi);
273
274 return err;
275out:
276 clear_nlink(inode);
277 unlock_new_inode(inode);
278 iput(inode);
279 alloc_nid_failed(sbi, inode->i_ino);
280 return err;
281}
282
283static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
284{
285 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
286 struct inode *inode;
287 int err;
288
289 inode = f2fs_new_inode(dir, S_IFDIR | mode);
290 if (IS_ERR(inode))
291 return PTR_ERR(inode);
292
293 inode->i_op = &f2fs_dir_inode_operations;
294 inode->i_fop = &f2fs_dir_operations;
295 inode->i_mapping->a_ops = &f2fs_dblock_aops;
296 mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
297
298 set_inode_flag(F2FS_I(inode), FI_INC_LINK);
299 err = f2fs_add_link(dentry, inode);
300 if (err)
301 goto out_fail;
302
303 alloc_nid_done(sbi, inode->i_ino);
304
305 d_instantiate(dentry, inode);
306 unlock_new_inode(inode);
307
308 f2fs_balance_fs(sbi);
309 return 0;
310
311out_fail:
312 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
313 clear_nlink(inode);
314 unlock_new_inode(inode);
315 iput(inode);
316 alloc_nid_failed(sbi, inode->i_ino);
317 return err;
318}
319
320static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
321{
322 struct inode *inode = dentry->d_inode;
323 if (f2fs_empty_dir(inode))
324 return f2fs_unlink(dir, dentry);
325 return -ENOTEMPTY;
326}
327
328static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
329 umode_t mode, dev_t rdev)
330{
331 struct super_block *sb = dir->i_sb;
332 struct f2fs_sb_info *sbi = F2FS_SB(sb);
333 struct inode *inode;
334 int err = 0;
335
336 if (!new_valid_dev(rdev))
337 return -EINVAL;
338
339 inode = f2fs_new_inode(dir, mode);
340 if (IS_ERR(inode))
341 return PTR_ERR(inode);
342
343 init_special_inode(inode, inode->i_mode, rdev);
344 inode->i_op = &f2fs_special_inode_operations;
345
346 err = f2fs_add_link(dentry, inode);
347 if (err)
348 goto out;
349
350 alloc_nid_done(sbi, inode->i_ino);
351 d_instantiate(dentry, inode);
352 unlock_new_inode(inode);
353
354 f2fs_balance_fs(sbi);
355
356 return 0;
357out:
358 clear_nlink(inode);
359 unlock_new_inode(inode);
360 iput(inode);
361 alloc_nid_failed(sbi, inode->i_ino);
362 return err;
363}
364
365static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
366 struct inode *new_dir, struct dentry *new_dentry)
367{
368 struct super_block *sb = old_dir->i_sb;
369 struct f2fs_sb_info *sbi = F2FS_SB(sb);
370 struct inode *old_inode = old_dentry->d_inode;
371 struct inode *new_inode = new_dentry->d_inode;
372 struct page *old_dir_page;
373 struct page *old_page;
374 struct f2fs_dir_entry *old_dir_entry = NULL;
375 struct f2fs_dir_entry *old_entry;
376 struct f2fs_dir_entry *new_entry;
377 int err = -ENOENT;
378
379 old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
380 if (!old_entry)
381 goto out;
382
383 if (S_ISDIR(old_inode->i_mode)) {
384 err = -EIO;
385 old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page);
386 if (!old_dir_entry)
387 goto out_old;
388 }
389
390 mutex_lock_op(sbi, RENAME);
391
392 if (new_inode) {
393 struct page *new_page;
394
395 err = -ENOTEMPTY;
396 if (old_dir_entry && !f2fs_empty_dir(new_inode))
397 goto out_dir;
398
399 err = -ENOENT;
400 new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name,
401 &new_page);
402 if (!new_entry)
403 goto out_dir;
404
405 f2fs_set_link(new_dir, new_entry, new_page, old_inode);
406
407 new_inode->i_ctime = CURRENT_TIME;
408 if (old_dir_entry)
409 drop_nlink(new_inode);
410 drop_nlink(new_inode);
411 if (!new_inode->i_nlink)
412 add_orphan_inode(sbi, new_inode->i_ino);
413 f2fs_write_inode(new_inode, NULL);
414 } else {
415 err = f2fs_add_link(new_dentry, old_inode);
416 if (err)
417 goto out_dir;
418
419 if (old_dir_entry) {
420 inc_nlink(new_dir);
421 f2fs_write_inode(new_dir, NULL);
422 }
423 }
424
425 old_inode->i_ctime = CURRENT_TIME;
426 set_inode_flag(F2FS_I(old_inode), FI_NEED_CP);
427 mark_inode_dirty(old_inode);
428
429 f2fs_delete_entry(old_entry, old_page, NULL);
430
431 if (old_dir_entry) {
432 if (old_dir != new_dir) {
433 f2fs_set_link(old_inode, old_dir_entry,
434 old_dir_page, new_dir);
435 } else {
436 kunmap(old_dir_page);
437 f2fs_put_page(old_dir_page, 0);
438 }
439 drop_nlink(old_dir);
440 f2fs_write_inode(old_dir, NULL);
441 }
442
443 mutex_unlock_op(sbi, RENAME);
444
445 f2fs_balance_fs(sbi);
446 return 0;
447
448out_dir:
449 if (old_dir_entry) {
450 kunmap(old_dir_page);
451 f2fs_put_page(old_dir_page, 0);
452 }
453 mutex_unlock_op(sbi, RENAME);
454out_old:
455 kunmap(old_page);
456 f2fs_put_page(old_page, 0);
457out:
458 return err;
459}
460
461const struct inode_operations f2fs_dir_inode_operations = {
462 .create = f2fs_create,
463 .lookup = f2fs_lookup,
464 .link = f2fs_link,
465 .unlink = f2fs_unlink,
466 .symlink = f2fs_symlink,
467 .mkdir = f2fs_mkdir,
468 .rmdir = f2fs_rmdir,
469 .mknod = f2fs_mknod,
470 .rename = f2fs_rename,
471 .setattr = f2fs_setattr,
472 .get_acl = f2fs_get_acl,
473#ifdef CONFIG_F2FS_FS_XATTR
474 .setxattr = generic_setxattr,
475 .getxattr = generic_getxattr,
476 .listxattr = f2fs_listxattr,
477 .removexattr = generic_removexattr,
478#endif
479};
480
481const struct inode_operations f2fs_symlink_inode_operations = {
482 .readlink = generic_readlink,
483 .follow_link = page_follow_link_light,
484 .put_link = page_put_link,
485 .setattr = f2fs_setattr,
486#ifdef CONFIG_F2FS_FS_XATTR
487 .setxattr = generic_setxattr,
488 .getxattr = generic_getxattr,
489 .listxattr = f2fs_listxattr,
490 .removexattr = generic_removexattr,
491#endif
492};
493
494const struct inode_operations f2fs_special_inode_operations = {
495 .setattr = f2fs_setattr,
496 .get_acl = f2fs_get_acl,
497#ifdef CONFIG_F2FS_FS_XATTR
498 .setxattr = generic_setxattr,
499 .getxattr = generic_getxattr,
500 .listxattr = f2fs_listxattr,
501 .removexattr = generic_removexattr,
502#endif
503};
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
new file mode 100644
index 000000000000..19870361497e
--- /dev/null
+++ b/fs/f2fs/node.c
@@ -0,0 +1,1764 @@
1/*
2 * fs/f2fs/node.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/mpage.h>
14#include <linux/backing-dev.h>
15#include <linux/blkdev.h>
16#include <linux/pagevec.h>
17#include <linux/swap.h>
18
19#include "f2fs.h"
20#include "node.h"
21#include "segment.h"
22
23static struct kmem_cache *nat_entry_slab;
24static struct kmem_cache *free_nid_slab;
25
26static void clear_node_page_dirty(struct page *page)
27{
28 struct address_space *mapping = page->mapping;
29 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
30 unsigned int long flags;
31
32 if (PageDirty(page)) {
33 spin_lock_irqsave(&mapping->tree_lock, flags);
34 radix_tree_tag_clear(&mapping->page_tree,
35 page_index(page),
36 PAGECACHE_TAG_DIRTY);
37 spin_unlock_irqrestore(&mapping->tree_lock, flags);
38
39 clear_page_dirty_for_io(page);
40 dec_page_count(sbi, F2FS_DIRTY_NODES);
41 }
42 ClearPageUptodate(page);
43}
44
45static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
46{
47 pgoff_t index = current_nat_addr(sbi, nid);
48 return get_meta_page(sbi, index);
49}
50
51static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
52{
53 struct page *src_page;
54 struct page *dst_page;
55 pgoff_t src_off;
56 pgoff_t dst_off;
57 void *src_addr;
58 void *dst_addr;
59 struct f2fs_nm_info *nm_i = NM_I(sbi);
60
61 src_off = current_nat_addr(sbi, nid);
62 dst_off = next_nat_addr(sbi, src_off);
63
64 /* get current nat block page with lock */
65 src_page = get_meta_page(sbi, src_off);
66
67 /* Dirty src_page means that it is already the new target NAT page. */
68 if (PageDirty(src_page))
69 return src_page;
70
71 dst_page = grab_meta_page(sbi, dst_off);
72
73 src_addr = page_address(src_page);
74 dst_addr = page_address(dst_page);
75 memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
76 set_page_dirty(dst_page);
77 f2fs_put_page(src_page, 1);
78
79 set_to_next_nat(nm_i, nid);
80
81 return dst_page;
82}
83
84/*
85 * Readahead NAT pages
86 */
87static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
88{
89 struct address_space *mapping = sbi->meta_inode->i_mapping;
90 struct f2fs_nm_info *nm_i = NM_I(sbi);
91 struct page *page;
92 pgoff_t index;
93 int i;
94
95 for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) {
96 if (nid >= nm_i->max_nid)
97 nid = 0;
98 index = current_nat_addr(sbi, nid);
99
100 page = grab_cache_page(mapping, index);
101 if (!page)
102 continue;
103 if (f2fs_readpage(sbi, page, index, READ)) {
104 f2fs_put_page(page, 1);
105 continue;
106 }
107 page_cache_release(page);
108 }
109}
110
111static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
112{
113 return radix_tree_lookup(&nm_i->nat_root, n);
114}
115
116static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
117 nid_t start, unsigned int nr, struct nat_entry **ep)
118{
119 return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr);
120}
121
122static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
123{
124 list_del(&e->list);
125 radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
126 nm_i->nat_cnt--;
127 kmem_cache_free(nat_entry_slab, e);
128}
129
130int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
131{
132 struct f2fs_nm_info *nm_i = NM_I(sbi);
133 struct nat_entry *e;
134 int is_cp = 1;
135
136 read_lock(&nm_i->nat_tree_lock);
137 e = __lookup_nat_cache(nm_i, nid);
138 if (e && !e->checkpointed)
139 is_cp = 0;
140 read_unlock(&nm_i->nat_tree_lock);
141 return is_cp;
142}
143
144static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
145{
146 struct nat_entry *new;
147
148 new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC);
149 if (!new)
150 return NULL;
151 if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
152 kmem_cache_free(nat_entry_slab, new);
153 return NULL;
154 }
155 memset(new, 0, sizeof(struct nat_entry));
156 nat_set_nid(new, nid);
157 list_add_tail(&new->list, &nm_i->nat_entries);
158 nm_i->nat_cnt++;
159 return new;
160}
161
162static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
163 struct f2fs_nat_entry *ne)
164{
165 struct nat_entry *e;
166retry:
167 write_lock(&nm_i->nat_tree_lock);
168 e = __lookup_nat_cache(nm_i, nid);
169 if (!e) {
170 e = grab_nat_entry(nm_i, nid);
171 if (!e) {
172 write_unlock(&nm_i->nat_tree_lock);
173 goto retry;
174 }
175 nat_set_blkaddr(e, le32_to_cpu(ne->block_addr));
176 nat_set_ino(e, le32_to_cpu(ne->ino));
177 nat_set_version(e, ne->version);
178 e->checkpointed = true;
179 }
180 write_unlock(&nm_i->nat_tree_lock);
181}
182
183static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
184 block_t new_blkaddr)
185{
186 struct f2fs_nm_info *nm_i = NM_I(sbi);
187 struct nat_entry *e;
188retry:
189 write_lock(&nm_i->nat_tree_lock);
190 e = __lookup_nat_cache(nm_i, ni->nid);
191 if (!e) {
192 e = grab_nat_entry(nm_i, ni->nid);
193 if (!e) {
194 write_unlock(&nm_i->nat_tree_lock);
195 goto retry;
196 }
197 e->ni = *ni;
198 e->checkpointed = true;
199 BUG_ON(ni->blk_addr == NEW_ADDR);
200 } else if (new_blkaddr == NEW_ADDR) {
201 /*
202 * when nid is reallocated,
203 * previous nat entry can be remained in nat cache.
204 * So, reinitialize it with new information.
205 */
206 e->ni = *ni;
207 BUG_ON(ni->blk_addr != NULL_ADDR);
208 }
209
210 if (new_blkaddr == NEW_ADDR)
211 e->checkpointed = false;
212
213 /* sanity check */
214 BUG_ON(nat_get_blkaddr(e) != ni->blk_addr);
215 BUG_ON(nat_get_blkaddr(e) == NULL_ADDR &&
216 new_blkaddr == NULL_ADDR);
217 BUG_ON(nat_get_blkaddr(e) == NEW_ADDR &&
218 new_blkaddr == NEW_ADDR);
219 BUG_ON(nat_get_blkaddr(e) != NEW_ADDR &&
220 nat_get_blkaddr(e) != NULL_ADDR &&
221 new_blkaddr == NEW_ADDR);
222
223 /* increament version no as node is removed */
224 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
225 unsigned char version = nat_get_version(e);
226 nat_set_version(e, inc_node_version(version));
227 }
228
229 /* change address */
230 nat_set_blkaddr(e, new_blkaddr);
231 __set_nat_cache_dirty(nm_i, e);
232 write_unlock(&nm_i->nat_tree_lock);
233}
234
235static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
236{
237 struct f2fs_nm_info *nm_i = NM_I(sbi);
238
239 if (nm_i->nat_cnt < 2 * NM_WOUT_THRESHOLD)
240 return 0;
241
242 write_lock(&nm_i->nat_tree_lock);
243 while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
244 struct nat_entry *ne;
245 ne = list_first_entry(&nm_i->nat_entries,
246 struct nat_entry, list);
247 __del_from_nat_cache(nm_i, ne);
248 nr_shrink--;
249 }
250 write_unlock(&nm_i->nat_tree_lock);
251 return nr_shrink;
252}
253
254/*
255 * This function returns always success
256 */
257void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
258{
259 struct f2fs_nm_info *nm_i = NM_I(sbi);
260 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
261 struct f2fs_summary_block *sum = curseg->sum_blk;
262 nid_t start_nid = START_NID(nid);
263 struct f2fs_nat_block *nat_blk;
264 struct page *page = NULL;
265 struct f2fs_nat_entry ne;
266 struct nat_entry *e;
267 int i;
268
269 memset(&ne, 0, sizeof(struct f2fs_nat_entry));
270 ni->nid = nid;
271
272 /* Check nat cache */
273 read_lock(&nm_i->nat_tree_lock);
274 e = __lookup_nat_cache(nm_i, nid);
275 if (e) {
276 ni->ino = nat_get_ino(e);
277 ni->blk_addr = nat_get_blkaddr(e);
278 ni->version = nat_get_version(e);
279 }
280 read_unlock(&nm_i->nat_tree_lock);
281 if (e)
282 return;
283
284 /* Check current segment summary */
285 mutex_lock(&curseg->curseg_mutex);
286 i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
287 if (i >= 0) {
288 ne = nat_in_journal(sum, i);
289 node_info_from_raw_nat(ni, &ne);
290 }
291 mutex_unlock(&curseg->curseg_mutex);
292 if (i >= 0)
293 goto cache;
294
295 /* Fill node_info from nat page */
296 page = get_current_nat_page(sbi, start_nid);
297 nat_blk = (struct f2fs_nat_block *)page_address(page);
298 ne = nat_blk->entries[nid - start_nid];
299 node_info_from_raw_nat(ni, &ne);
300 f2fs_put_page(page, 1);
301cache:
302 /* cache nat entry */
303 cache_nat_entry(NM_I(sbi), nid, &ne);
304}
305
306/*
307 * The maximum depth is four.
308 * Offset[0] will have raw inode offset.
309 */
310static int get_node_path(long block, int offset[4], unsigned int noffset[4])
311{
312 const long direct_index = ADDRS_PER_INODE;
313 const long direct_blks = ADDRS_PER_BLOCK;
314 const long dptrs_per_blk = NIDS_PER_BLOCK;
315 const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
316 const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
317 int n = 0;
318 int level = 0;
319
320 noffset[0] = 0;
321
322 if (block < direct_index) {
323 offset[n++] = block;
324 level = 0;
325 goto got;
326 }
327 block -= direct_index;
328 if (block < direct_blks) {
329 offset[n++] = NODE_DIR1_BLOCK;
330 noffset[n] = 1;
331 offset[n++] = block;
332 level = 1;
333 goto got;
334 }
335 block -= direct_blks;
336 if (block < direct_blks) {
337 offset[n++] = NODE_DIR2_BLOCK;
338 noffset[n] = 2;
339 offset[n++] = block;
340 level = 1;
341 goto got;
342 }
343 block -= direct_blks;
344 if (block < indirect_blks) {
345 offset[n++] = NODE_IND1_BLOCK;
346 noffset[n] = 3;
347 offset[n++] = block / direct_blks;
348 noffset[n] = 4 + offset[n - 1];
349 offset[n++] = block % direct_blks;
350 level = 2;
351 goto got;
352 }
353 block -= indirect_blks;
354 if (block < indirect_blks) {
355 offset[n++] = NODE_IND2_BLOCK;
356 noffset[n] = 4 + dptrs_per_blk;
357 offset[n++] = block / direct_blks;
358 noffset[n] = 5 + dptrs_per_blk + offset[n - 1];
359 offset[n++] = block % direct_blks;
360 level = 2;
361 goto got;
362 }
363 block -= indirect_blks;
364 if (block < dindirect_blks) {
365 offset[n++] = NODE_DIND_BLOCK;
366 noffset[n] = 5 + (dptrs_per_blk * 2);
367 offset[n++] = block / indirect_blks;
368 noffset[n] = 6 + (dptrs_per_blk * 2) +
369 offset[n - 1] * (dptrs_per_blk + 1);
370 offset[n++] = (block / direct_blks) % dptrs_per_blk;
371 noffset[n] = 7 + (dptrs_per_blk * 2) +
372 offset[n - 2] * (dptrs_per_blk + 1) +
373 offset[n - 1];
374 offset[n++] = block % direct_blks;
375 level = 3;
376 goto got;
377 } else {
378 BUG();
379 }
380got:
381 return level;
382}
383
384/*
385 * Caller should call f2fs_put_dnode(dn).
386 */
387int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro)
388{
389 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
390 struct page *npage[4];
391 struct page *parent;
392 int offset[4];
393 unsigned int noffset[4];
394 nid_t nids[4];
395 int level, i;
396 int err = 0;
397
398 level = get_node_path(index, offset, noffset);
399
400 nids[0] = dn->inode->i_ino;
401 npage[0] = get_node_page(sbi, nids[0]);
402 if (IS_ERR(npage[0]))
403 return PTR_ERR(npage[0]);
404
405 parent = npage[0];
406 nids[1] = get_nid(parent, offset[0], true);
407 dn->inode_page = npage[0];
408 dn->inode_page_locked = true;
409
410 /* get indirect or direct nodes */
411 for (i = 1; i <= level; i++) {
412 bool done = false;
413
414 if (!nids[i] && !ro) {
415 mutex_lock_op(sbi, NODE_NEW);
416
417 /* alloc new node */
418 if (!alloc_nid(sbi, &(nids[i]))) {
419 mutex_unlock_op(sbi, NODE_NEW);
420 err = -ENOSPC;
421 goto release_pages;
422 }
423
424 dn->nid = nids[i];
425 npage[i] = new_node_page(dn, noffset[i]);
426 if (IS_ERR(npage[i])) {
427 alloc_nid_failed(sbi, nids[i]);
428 mutex_unlock_op(sbi, NODE_NEW);
429 err = PTR_ERR(npage[i]);
430 goto release_pages;
431 }
432
433 set_nid(parent, offset[i - 1], nids[i], i == 1);
434 alloc_nid_done(sbi, nids[i]);
435 mutex_unlock_op(sbi, NODE_NEW);
436 done = true;
437 } else if (ro && i == level && level > 1) {
438 npage[i] = get_node_page_ra(parent, offset[i - 1]);
439 if (IS_ERR(npage[i])) {
440 err = PTR_ERR(npage[i]);
441 goto release_pages;
442 }
443 done = true;
444 }
445 if (i == 1) {
446 dn->inode_page_locked = false;
447 unlock_page(parent);
448 } else {
449 f2fs_put_page(parent, 1);
450 }
451
452 if (!done) {
453 npage[i] = get_node_page(sbi, nids[i]);
454 if (IS_ERR(npage[i])) {
455 err = PTR_ERR(npage[i]);
456 f2fs_put_page(npage[0], 0);
457 goto release_out;
458 }
459 }
460 if (i < level) {
461 parent = npage[i];
462 nids[i + 1] = get_nid(parent, offset[i], false);
463 }
464 }
465 dn->nid = nids[level];
466 dn->ofs_in_node = offset[level];
467 dn->node_page = npage[level];
468 dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
469 return 0;
470
471release_pages:
472 f2fs_put_page(parent, 1);
473 if (i > 1)
474 f2fs_put_page(npage[0], 0);
475release_out:
476 dn->inode_page = NULL;
477 dn->node_page = NULL;
478 return err;
479}
480
481static void truncate_node(struct dnode_of_data *dn)
482{
483 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
484 struct node_info ni;
485
486 get_node_info(sbi, dn->nid, &ni);
487 BUG_ON(ni.blk_addr == NULL_ADDR);
488
489 if (ni.blk_addr != NULL_ADDR)
490 invalidate_blocks(sbi, ni.blk_addr);
491
492 /* Deallocate node address */
493 dec_valid_node_count(sbi, dn->inode, 1);
494 set_node_addr(sbi, &ni, NULL_ADDR);
495
496 if (dn->nid == dn->inode->i_ino) {
497 remove_orphan_inode(sbi, dn->nid);
498 dec_valid_inode_count(sbi);
499 } else {
500 sync_inode_page(dn);
501 }
502
503 clear_node_page_dirty(dn->node_page);
504 F2FS_SET_SB_DIRT(sbi);
505
506 f2fs_put_page(dn->node_page, 1);
507 dn->node_page = NULL;
508}
509
510static int truncate_dnode(struct dnode_of_data *dn)
511{
512 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
513 struct page *page;
514
515 if (dn->nid == 0)
516 return 1;
517
518 /* get direct node */
519 page = get_node_page(sbi, dn->nid);
520 if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
521 return 1;
522 else if (IS_ERR(page))
523 return PTR_ERR(page);
524
525 /* Make dnode_of_data for parameter */
526 dn->node_page = page;
527 dn->ofs_in_node = 0;
528 truncate_data_blocks(dn);
529 truncate_node(dn);
530 return 1;
531}
532
533static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
534 int ofs, int depth)
535{
536 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
537 struct dnode_of_data rdn = *dn;
538 struct page *page;
539 struct f2fs_node *rn;
540 nid_t child_nid;
541 unsigned int child_nofs;
542 int freed = 0;
543 int i, ret;
544
545 if (dn->nid == 0)
546 return NIDS_PER_BLOCK + 1;
547
548 page = get_node_page(sbi, dn->nid);
549 if (IS_ERR(page))
550 return PTR_ERR(page);
551
552 rn = (struct f2fs_node *)page_address(page);
553 if (depth < 3) {
554 for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
555 child_nid = le32_to_cpu(rn->in.nid[i]);
556 if (child_nid == 0)
557 continue;
558 rdn.nid = child_nid;
559 ret = truncate_dnode(&rdn);
560 if (ret < 0)
561 goto out_err;
562 set_nid(page, i, 0, false);
563 }
564 } else {
565 child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
566 for (i = ofs; i < NIDS_PER_BLOCK; i++) {
567 child_nid = le32_to_cpu(rn->in.nid[i]);
568 if (child_nid == 0) {
569 child_nofs += NIDS_PER_BLOCK + 1;
570 continue;
571 }
572 rdn.nid = child_nid;
573 ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
574 if (ret == (NIDS_PER_BLOCK + 1)) {
575 set_nid(page, i, 0, false);
576 child_nofs += ret;
577 } else if (ret < 0 && ret != -ENOENT) {
578 goto out_err;
579 }
580 }
581 freed = child_nofs;
582 }
583
584 if (!ofs) {
585 /* remove current indirect node */
586 dn->node_page = page;
587 truncate_node(dn);
588 freed++;
589 } else {
590 f2fs_put_page(page, 1);
591 }
592 return freed;
593
594out_err:
595 f2fs_put_page(page, 1);
596 return ret;
597}
598
599static int truncate_partial_nodes(struct dnode_of_data *dn,
600 struct f2fs_inode *ri, int *offset, int depth)
601{
602 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
603 struct page *pages[2];
604 nid_t nid[3];
605 nid_t child_nid;
606 int err = 0;
607 int i;
608 int idx = depth - 2;
609
610 nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
611 if (!nid[0])
612 return 0;
613
614 /* get indirect nodes in the path */
615 for (i = 0; i < depth - 1; i++) {
616 /* refernece count'll be increased */
617 pages[i] = get_node_page(sbi, nid[i]);
618 if (IS_ERR(pages[i])) {
619 depth = i + 1;
620 err = PTR_ERR(pages[i]);
621 goto fail;
622 }
623 nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
624 }
625
626 /* free direct nodes linked to a partial indirect node */
627 for (i = offset[depth - 1]; i < NIDS_PER_BLOCK; i++) {
628 child_nid = get_nid(pages[idx], i, false);
629 if (!child_nid)
630 continue;
631 dn->nid = child_nid;
632 err = truncate_dnode(dn);
633 if (err < 0)
634 goto fail;
635 set_nid(pages[idx], i, 0, false);
636 }
637
638 if (offset[depth - 1] == 0) {
639 dn->node_page = pages[idx];
640 dn->nid = nid[idx];
641 truncate_node(dn);
642 } else {
643 f2fs_put_page(pages[idx], 1);
644 }
645 offset[idx]++;
646 offset[depth - 1] = 0;
647fail:
648 for (i = depth - 3; i >= 0; i--)
649 f2fs_put_page(pages[i], 1);
650 return err;
651}
652
653/*
654 * All the block addresses of data and nodes should be nullified.
655 */
656int truncate_inode_blocks(struct inode *inode, pgoff_t from)
657{
658 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
659 int err = 0, cont = 1;
660 int level, offset[4], noffset[4];
661 unsigned int nofs;
662 struct f2fs_node *rn;
663 struct dnode_of_data dn;
664 struct page *page;
665
666 level = get_node_path(from, offset, noffset);
667
668 page = get_node_page(sbi, inode->i_ino);
669 if (IS_ERR(page))
670 return PTR_ERR(page);
671
672 set_new_dnode(&dn, inode, page, NULL, 0);
673 unlock_page(page);
674
675 rn = page_address(page);
676 switch (level) {
677 case 0:
678 case 1:
679 nofs = noffset[1];
680 break;
681 case 2:
682 nofs = noffset[1];
683 if (!offset[level - 1])
684 goto skip_partial;
685 err = truncate_partial_nodes(&dn, &rn->i, offset, level);
686 if (err < 0 && err != -ENOENT)
687 goto fail;
688 nofs += 1 + NIDS_PER_BLOCK;
689 break;
690 case 3:
691 nofs = 5 + 2 * NIDS_PER_BLOCK;
692 if (!offset[level - 1])
693 goto skip_partial;
694 err = truncate_partial_nodes(&dn, &rn->i, offset, level);
695 if (err < 0 && err != -ENOENT)
696 goto fail;
697 break;
698 default:
699 BUG();
700 }
701
702skip_partial:
703 while (cont) {
704 dn.nid = le32_to_cpu(rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]);
705 switch (offset[0]) {
706 case NODE_DIR1_BLOCK:
707 case NODE_DIR2_BLOCK:
708 err = truncate_dnode(&dn);
709 break;
710
711 case NODE_IND1_BLOCK:
712 case NODE_IND2_BLOCK:
713 err = truncate_nodes(&dn, nofs, offset[1], 2);
714 break;
715
716 case NODE_DIND_BLOCK:
717 err = truncate_nodes(&dn, nofs, offset[1], 3);
718 cont = 0;
719 break;
720
721 default:
722 BUG();
723 }
724 if (err < 0 && err != -ENOENT)
725 goto fail;
726 if (offset[1] == 0 &&
727 rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) {
728 lock_page(page);
729 wait_on_page_writeback(page);
730 rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
731 set_page_dirty(page);
732 unlock_page(page);
733 }
734 offset[1] = 0;
735 offset[0]++;
736 nofs += err;
737 }
738fail:
739 f2fs_put_page(page, 0);
740 return err > 0 ? 0 : err;
741}
742
743int remove_inode_page(struct inode *inode)
744{
745 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
746 struct page *page;
747 nid_t ino = inode->i_ino;
748 struct dnode_of_data dn;
749
750 mutex_lock_op(sbi, NODE_TRUNC);
751 page = get_node_page(sbi, ino);
752 if (IS_ERR(page)) {
753 mutex_unlock_op(sbi, NODE_TRUNC);
754 return PTR_ERR(page);
755 }
756
757 if (F2FS_I(inode)->i_xattr_nid) {
758 nid_t nid = F2FS_I(inode)->i_xattr_nid;
759 struct page *npage = get_node_page(sbi, nid);
760
761 if (IS_ERR(npage)) {
762 mutex_unlock_op(sbi, NODE_TRUNC);
763 return PTR_ERR(npage);
764 }
765
766 F2FS_I(inode)->i_xattr_nid = 0;
767 set_new_dnode(&dn, inode, page, npage, nid);
768 dn.inode_page_locked = 1;
769 truncate_node(&dn);
770 }
771 if (inode->i_blocks == 1) {
772 /* inernally call f2fs_put_page() */
773 set_new_dnode(&dn, inode, page, page, ino);
774 truncate_node(&dn);
775 } else if (inode->i_blocks == 0) {
776 struct node_info ni;
777 get_node_info(sbi, inode->i_ino, &ni);
778
779 /* called after f2fs_new_inode() is failed */
780 BUG_ON(ni.blk_addr != NULL_ADDR);
781 f2fs_put_page(page, 1);
782 } else {
783 BUG();
784 }
785 mutex_unlock_op(sbi, NODE_TRUNC);
786 return 0;
787}
788
789int new_inode_page(struct inode *inode, struct dentry *dentry)
790{
791 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
792 struct page *page;
793 struct dnode_of_data dn;
794
795 /* allocate inode page for new inode */
796 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
797 mutex_lock_op(sbi, NODE_NEW);
798 page = new_node_page(&dn, 0);
799 init_dent_inode(dentry, page);
800 mutex_unlock_op(sbi, NODE_NEW);
801 if (IS_ERR(page))
802 return PTR_ERR(page);
803 f2fs_put_page(page, 1);
804 return 0;
805}
806
807struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs)
808{
809 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
810 struct address_space *mapping = sbi->node_inode->i_mapping;
811 struct node_info old_ni, new_ni;
812 struct page *page;
813 int err;
814
815 if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))
816 return ERR_PTR(-EPERM);
817
818 page = grab_cache_page(mapping, dn->nid);
819 if (!page)
820 return ERR_PTR(-ENOMEM);
821
822 get_node_info(sbi, dn->nid, &old_ni);
823
824 SetPageUptodate(page);
825 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
826
827 /* Reinitialize old_ni with new node page */
828 BUG_ON(old_ni.blk_addr != NULL_ADDR);
829 new_ni = old_ni;
830 new_ni.ino = dn->inode->i_ino;
831
832 if (!inc_valid_node_count(sbi, dn->inode, 1)) {
833 err = -ENOSPC;
834 goto fail;
835 }
836 set_node_addr(sbi, &new_ni, NEW_ADDR);
837
838 dn->node_page = page;
839 sync_inode_page(dn);
840 set_page_dirty(page);
841 set_cold_node(dn->inode, page);
842 if (ofs == 0)
843 inc_valid_inode_count(sbi);
844
845 return page;
846
847fail:
848 f2fs_put_page(page, 1);
849 return ERR_PTR(err);
850}
851
852static int read_node_page(struct page *page, int type)
853{
854 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
855 struct node_info ni;
856
857 get_node_info(sbi, page->index, &ni);
858
859 if (ni.blk_addr == NULL_ADDR)
860 return -ENOENT;
861 return f2fs_readpage(sbi, page, ni.blk_addr, type);
862}
863
864/*
865 * Readahead a node page
866 */
867void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
868{
869 struct address_space *mapping = sbi->node_inode->i_mapping;
870 struct page *apage;
871
872 apage = find_get_page(mapping, nid);
873 if (apage && PageUptodate(apage))
874 goto release_out;
875 f2fs_put_page(apage, 0);
876
877 apage = grab_cache_page(mapping, nid);
878 if (!apage)
879 return;
880
881 if (read_node_page(apage, READA))
882 goto unlock_out;
883
884 page_cache_release(apage);
885 return;
886
887unlock_out:
888 unlock_page(apage);
889release_out:
890 page_cache_release(apage);
891}
892
893struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
894{
895 int err;
896 struct page *page;
897 struct address_space *mapping = sbi->node_inode->i_mapping;
898
899 page = grab_cache_page(mapping, nid);
900 if (!page)
901 return ERR_PTR(-ENOMEM);
902
903 err = read_node_page(page, READ_SYNC);
904 if (err) {
905 f2fs_put_page(page, 1);
906 return ERR_PTR(err);
907 }
908
909 BUG_ON(nid != nid_of_node(page));
910 mark_page_accessed(page);
911 return page;
912}
913
914/*
915 * Return a locked page for the desired node page.
916 * And, readahead MAX_RA_NODE number of node pages.
917 */
918struct page *get_node_page_ra(struct page *parent, int start)
919{
920 struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb);
921 struct address_space *mapping = sbi->node_inode->i_mapping;
922 int i, end;
923 int err = 0;
924 nid_t nid;
925 struct page *page;
926
927 /* First, try getting the desired direct node. */
928 nid = get_nid(parent, start, false);
929 if (!nid)
930 return ERR_PTR(-ENOENT);
931
932 page = find_get_page(mapping, nid);
933 if (page && PageUptodate(page))
934 goto page_hit;
935 f2fs_put_page(page, 0);
936
937repeat:
938 page = grab_cache_page(mapping, nid);
939 if (!page)
940 return ERR_PTR(-ENOMEM);
941
942 err = read_node_page(page, READA);
943 if (err) {
944 f2fs_put_page(page, 1);
945 return ERR_PTR(err);
946 }
947
948 /* Then, try readahead for siblings of the desired node */
949 end = start + MAX_RA_NODE;
950 end = min(end, NIDS_PER_BLOCK);
951 for (i = start + 1; i < end; i++) {
952 nid = get_nid(parent, i, false);
953 if (!nid)
954 continue;
955 ra_node_page(sbi, nid);
956 }
957
958page_hit:
959 lock_page(page);
960 if (PageError(page)) {
961 f2fs_put_page(page, 1);
962 return ERR_PTR(-EIO);
963 }
964
965 /* Has the page been truncated? */
966 if (page->mapping != mapping) {
967 f2fs_put_page(page, 1);
968 goto repeat;
969 }
970 return page;
971}
972
973void sync_inode_page(struct dnode_of_data *dn)
974{
975 if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) {
976 update_inode(dn->inode, dn->node_page);
977 } else if (dn->inode_page) {
978 if (!dn->inode_page_locked)
979 lock_page(dn->inode_page);
980 update_inode(dn->inode, dn->inode_page);
981 if (!dn->inode_page_locked)
982 unlock_page(dn->inode_page);
983 } else {
984 f2fs_write_inode(dn->inode, NULL);
985 }
986}
987
988int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
989 struct writeback_control *wbc)
990{
991 struct address_space *mapping = sbi->node_inode->i_mapping;
992 pgoff_t index, end;
993 struct pagevec pvec;
994 int step = ino ? 2 : 0;
995 int nwritten = 0, wrote = 0;
996
997 pagevec_init(&pvec, 0);
998
999next_step:
1000 index = 0;
1001 end = LONG_MAX;
1002
1003 while (index <= end) {
1004 int i, nr_pages;
1005 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1006 PAGECACHE_TAG_DIRTY,
1007 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1008 if (nr_pages == 0)
1009 break;
1010
1011 for (i = 0; i < nr_pages; i++) {
1012 struct page *page = pvec.pages[i];
1013
1014 /*
1015 * flushing sequence with step:
1016 * 0. indirect nodes
1017 * 1. dentry dnodes
1018 * 2. file dnodes
1019 */
1020 if (step == 0 && IS_DNODE(page))
1021 continue;
1022 if (step == 1 && (!IS_DNODE(page) ||
1023 is_cold_node(page)))
1024 continue;
1025 if (step == 2 && (!IS_DNODE(page) ||
1026 !is_cold_node(page)))
1027 continue;
1028
1029 /*
1030 * If an fsync mode,
1031 * we should not skip writing node pages.
1032 */
1033 if (ino && ino_of_node(page) == ino)
1034 lock_page(page);
1035 else if (!trylock_page(page))
1036 continue;
1037
1038 if (unlikely(page->mapping != mapping)) {
1039continue_unlock:
1040 unlock_page(page);
1041 continue;
1042 }
1043 if (ino && ino_of_node(page) != ino)
1044 goto continue_unlock;
1045
1046 if (!PageDirty(page)) {
1047 /* someone wrote it for us */
1048 goto continue_unlock;
1049 }
1050
1051 if (!clear_page_dirty_for_io(page))
1052 goto continue_unlock;
1053
1054 /* called by fsync() */
1055 if (ino && IS_DNODE(page)) {
1056 int mark = !is_checkpointed_node(sbi, ino);
1057 set_fsync_mark(page, 1);
1058 if (IS_INODE(page))
1059 set_dentry_mark(page, mark);
1060 nwritten++;
1061 } else {
1062 set_fsync_mark(page, 0);
1063 set_dentry_mark(page, 0);
1064 }
1065 mapping->a_ops->writepage(page, wbc);
1066 wrote++;
1067
1068 if (--wbc->nr_to_write == 0)
1069 break;
1070 }
1071 pagevec_release(&pvec);
1072 cond_resched();
1073
1074 if (wbc->nr_to_write == 0) {
1075 step = 2;
1076 break;
1077 }
1078 }
1079
1080 if (step < 2) {
1081 step++;
1082 goto next_step;
1083 }
1084
1085 if (wrote)
1086 f2fs_submit_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL);
1087
1088 return nwritten;
1089}
1090
1091static int f2fs_write_node_page(struct page *page,
1092 struct writeback_control *wbc)
1093{
1094 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
1095 nid_t nid;
1096 unsigned int nofs;
1097 block_t new_addr;
1098 struct node_info ni;
1099
1100 if (wbc->for_reclaim) {
1101 dec_page_count(sbi, F2FS_DIRTY_NODES);
1102 wbc->pages_skipped++;
1103 set_page_dirty(page);
1104 return AOP_WRITEPAGE_ACTIVATE;
1105 }
1106
1107 wait_on_page_writeback(page);
1108
1109 mutex_lock_op(sbi, NODE_WRITE);
1110
1111 /* get old block addr of this node page */
1112 nid = nid_of_node(page);
1113 nofs = ofs_of_node(page);
1114 BUG_ON(page->index != nid);
1115
1116 get_node_info(sbi, nid, &ni);
1117
1118 /* This page is already truncated */
1119 if (ni.blk_addr == NULL_ADDR)
1120 return 0;
1121
1122 set_page_writeback(page);
1123
1124 /* insert node offset */
1125 write_node_page(sbi, page, nid, ni.blk_addr, &new_addr);
1126 set_node_addr(sbi, &ni, new_addr);
1127 dec_page_count(sbi, F2FS_DIRTY_NODES);
1128
1129 mutex_unlock_op(sbi, NODE_WRITE);
1130 unlock_page(page);
1131 return 0;
1132}
1133
1134static int f2fs_write_node_pages(struct address_space *mapping,
1135 struct writeback_control *wbc)
1136{
1137 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1138 struct block_device *bdev = sbi->sb->s_bdev;
1139 long nr_to_write = wbc->nr_to_write;
1140
1141 if (wbc->for_kupdate)
1142 return 0;
1143
1144 if (get_pages(sbi, F2FS_DIRTY_NODES) == 0)
1145 return 0;
1146
1147 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) {
1148 write_checkpoint(sbi, false, false);
1149 return 0;
1150 }
1151
1152 /* if mounting is failed, skip writing node pages */
1153 wbc->nr_to_write = bio_get_nr_vecs(bdev);
1154 sync_node_pages(sbi, 0, wbc);
1155 wbc->nr_to_write = nr_to_write -
1156 (bio_get_nr_vecs(bdev) - wbc->nr_to_write);
1157 return 0;
1158}
1159
1160static int f2fs_set_node_page_dirty(struct page *page)
1161{
1162 struct address_space *mapping = page->mapping;
1163 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1164
1165 SetPageUptodate(page);
1166 if (!PageDirty(page)) {
1167 __set_page_dirty_nobuffers(page);
1168 inc_page_count(sbi, F2FS_DIRTY_NODES);
1169 SetPagePrivate(page);
1170 return 1;
1171 }
1172 return 0;
1173}
1174
1175static void f2fs_invalidate_node_page(struct page *page, unsigned long offset)
1176{
1177 struct inode *inode = page->mapping->host;
1178 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1179 if (PageDirty(page))
1180 dec_page_count(sbi, F2FS_DIRTY_NODES);
1181 ClearPagePrivate(page);
1182}
1183
1184static int f2fs_release_node_page(struct page *page, gfp_t wait)
1185{
1186 ClearPagePrivate(page);
1187 return 0;
1188}
1189
1190/*
1191 * Structure of the f2fs node operations
1192 */
1193const struct address_space_operations f2fs_node_aops = {
1194 .writepage = f2fs_write_node_page,
1195 .writepages = f2fs_write_node_pages,
1196 .set_page_dirty = f2fs_set_node_page_dirty,
1197 .invalidatepage = f2fs_invalidate_node_page,
1198 .releasepage = f2fs_release_node_page,
1199};
1200
1201static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head)
1202{
1203 struct list_head *this;
1204 struct free_nid *i = NULL;
1205 list_for_each(this, head) {
1206 i = list_entry(this, struct free_nid, list);
1207 if (i->nid == n)
1208 break;
1209 i = NULL;
1210 }
1211 return i;
1212}
1213
1214static void __del_from_free_nid_list(struct free_nid *i)
1215{
1216 list_del(&i->list);
1217 kmem_cache_free(free_nid_slab, i);
1218}
1219
1220static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
1221{
1222 struct free_nid *i;
1223
1224 if (nm_i->fcnt > 2 * MAX_FREE_NIDS)
1225 return 0;
1226retry:
1227 i = kmem_cache_alloc(free_nid_slab, GFP_NOFS);
1228 if (!i) {
1229 cond_resched();
1230 goto retry;
1231 }
1232 i->nid = nid;
1233 i->state = NID_NEW;
1234
1235 spin_lock(&nm_i->free_nid_list_lock);
1236 if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) {
1237 spin_unlock(&nm_i->free_nid_list_lock);
1238 kmem_cache_free(free_nid_slab, i);
1239 return 0;
1240 }
1241 list_add_tail(&i->list, &nm_i->free_nid_list);
1242 nm_i->fcnt++;
1243 spin_unlock(&nm_i->free_nid_list_lock);
1244 return 1;
1245}
1246
1247static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
1248{
1249 struct free_nid *i;
1250 spin_lock(&nm_i->free_nid_list_lock);
1251 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
1252 if (i && i->state == NID_NEW) {
1253 __del_from_free_nid_list(i);
1254 nm_i->fcnt--;
1255 }
1256 spin_unlock(&nm_i->free_nid_list_lock);
1257}
1258
1259static int scan_nat_page(struct f2fs_nm_info *nm_i,
1260 struct page *nat_page, nid_t start_nid)
1261{
1262 struct f2fs_nat_block *nat_blk = page_address(nat_page);
1263 block_t blk_addr;
1264 int fcnt = 0;
1265 int i;
1266
1267 /* 0 nid should not be used */
1268 if (start_nid == 0)
1269 ++start_nid;
1270
1271 i = start_nid % NAT_ENTRY_PER_BLOCK;
1272
1273 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
1274 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
1275 BUG_ON(blk_addr == NEW_ADDR);
1276 if (blk_addr == NULL_ADDR)
1277 fcnt += add_free_nid(nm_i, start_nid);
1278 }
1279 return fcnt;
1280}
1281
1282static void build_free_nids(struct f2fs_sb_info *sbi)
1283{
1284 struct free_nid *fnid, *next_fnid;
1285 struct f2fs_nm_info *nm_i = NM_I(sbi);
1286 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1287 struct f2fs_summary_block *sum = curseg->sum_blk;
1288 nid_t nid = 0;
1289 bool is_cycled = false;
1290 int fcnt = 0;
1291 int i;
1292
1293 nid = nm_i->next_scan_nid;
1294 nm_i->init_scan_nid = nid;
1295
1296 ra_nat_pages(sbi, nid);
1297
1298 while (1) {
1299 struct page *page = get_current_nat_page(sbi, nid);
1300
1301 fcnt += scan_nat_page(nm_i, page, nid);
1302 f2fs_put_page(page, 1);
1303
1304 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
1305
1306 if (nid >= nm_i->max_nid) {
1307 nid = 0;
1308 is_cycled = true;
1309 }
1310 if (fcnt > MAX_FREE_NIDS)
1311 break;
1312 if (is_cycled && nm_i->init_scan_nid <= nid)
1313 break;
1314 }
1315
1316 nm_i->next_scan_nid = nid;
1317
1318 /* find free nids from current sum_pages */
1319 mutex_lock(&curseg->curseg_mutex);
1320 for (i = 0; i < nats_in_cursum(sum); i++) {
1321 block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
1322 nid = le32_to_cpu(nid_in_journal(sum, i));
1323 if (addr == NULL_ADDR)
1324 add_free_nid(nm_i, nid);
1325 else
1326 remove_free_nid(nm_i, nid);
1327 }
1328 mutex_unlock(&curseg->curseg_mutex);
1329
1330 /* remove the free nids from current allocated nids */
1331 list_for_each_entry_safe(fnid, next_fnid, &nm_i->free_nid_list, list) {
1332 struct nat_entry *ne;
1333
1334 read_lock(&nm_i->nat_tree_lock);
1335 ne = __lookup_nat_cache(nm_i, fnid->nid);
1336 if (ne && nat_get_blkaddr(ne) != NULL_ADDR)
1337 remove_free_nid(nm_i, fnid->nid);
1338 read_unlock(&nm_i->nat_tree_lock);
1339 }
1340}
1341
1342/*
1343 * If this function returns success, caller can obtain a new nid
1344 * from second parameter of this function.
1345 * The returned nid could be used ino as well as nid when inode is created.
1346 */
1347bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
1348{
1349 struct f2fs_nm_info *nm_i = NM_I(sbi);
1350 struct free_nid *i = NULL;
1351 struct list_head *this;
1352retry:
1353 mutex_lock(&nm_i->build_lock);
1354 if (!nm_i->fcnt) {
1355 /* scan NAT in order to build free nid list */
1356 build_free_nids(sbi);
1357 if (!nm_i->fcnt) {
1358 mutex_unlock(&nm_i->build_lock);
1359 return false;
1360 }
1361 }
1362 mutex_unlock(&nm_i->build_lock);
1363
1364 /*
1365 * We check fcnt again since previous check is racy as
1366 * we didn't hold free_nid_list_lock. So other thread
1367 * could consume all of free nids.
1368 */
1369 spin_lock(&nm_i->free_nid_list_lock);
1370 if (!nm_i->fcnt) {
1371 spin_unlock(&nm_i->free_nid_list_lock);
1372 goto retry;
1373 }
1374
1375 BUG_ON(list_empty(&nm_i->free_nid_list));
1376 list_for_each(this, &nm_i->free_nid_list) {
1377 i = list_entry(this, struct free_nid, list);
1378 if (i->state == NID_NEW)
1379 break;
1380 }
1381
1382 BUG_ON(i->state != NID_NEW);
1383 *nid = i->nid;
1384 i->state = NID_ALLOC;
1385 nm_i->fcnt--;
1386 spin_unlock(&nm_i->free_nid_list_lock);
1387 return true;
1388}
1389
1390/*
1391 * alloc_nid() should be called prior to this function.
1392 */
1393void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
1394{
1395 struct f2fs_nm_info *nm_i = NM_I(sbi);
1396 struct free_nid *i;
1397
1398 spin_lock(&nm_i->free_nid_list_lock);
1399 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
1400 if (i) {
1401 BUG_ON(i->state != NID_ALLOC);
1402 __del_from_free_nid_list(i);
1403 }
1404 spin_unlock(&nm_i->free_nid_list_lock);
1405}
1406
1407/*
1408 * alloc_nid() should be called prior to this function.
1409 */
1410void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1411{
1412 alloc_nid_done(sbi, nid);
1413 add_free_nid(NM_I(sbi), nid);
1414}
1415
1416void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
1417 struct f2fs_summary *sum, struct node_info *ni,
1418 block_t new_blkaddr)
1419{
1420 rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr);
1421 set_node_addr(sbi, ni, new_blkaddr);
1422 clear_node_page_dirty(page);
1423}
1424
1425int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1426{
1427 struct address_space *mapping = sbi->node_inode->i_mapping;
1428 struct f2fs_node *src, *dst;
1429 nid_t ino = ino_of_node(page);
1430 struct node_info old_ni, new_ni;
1431 struct page *ipage;
1432
1433 ipage = grab_cache_page(mapping, ino);
1434 if (!ipage)
1435 return -ENOMEM;
1436
1437 /* Should not use this inode from free nid list */
1438 remove_free_nid(NM_I(sbi), ino);
1439
1440 get_node_info(sbi, ino, &old_ni);
1441 SetPageUptodate(ipage);
1442 fill_node_footer(ipage, ino, ino, 0, true);
1443
1444 src = (struct f2fs_node *)page_address(page);
1445 dst = (struct f2fs_node *)page_address(ipage);
1446
1447 memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i);
1448 dst->i.i_size = 0;
1449 dst->i.i_blocks = cpu_to_le64(1);
1450 dst->i.i_links = cpu_to_le32(1);
1451 dst->i.i_xattr_nid = 0;
1452
1453 new_ni = old_ni;
1454 new_ni.ino = ino;
1455
1456 set_node_addr(sbi, &new_ni, NEW_ADDR);
1457 inc_valid_inode_count(sbi);
1458
1459 f2fs_put_page(ipage, 1);
1460 return 0;
1461}
1462
1463int restore_node_summary(struct f2fs_sb_info *sbi,
1464 unsigned int segno, struct f2fs_summary_block *sum)
1465{
1466 struct f2fs_node *rn;
1467 struct f2fs_summary *sum_entry;
1468 struct page *page;
1469 block_t addr;
1470 int i, last_offset;
1471
1472 /* alloc temporal page for read node */
1473 page = alloc_page(GFP_NOFS | __GFP_ZERO);
1474 if (IS_ERR(page))
1475 return PTR_ERR(page);
1476 lock_page(page);
1477
1478 /* scan the node segment */
1479 last_offset = sbi->blocks_per_seg;
1480 addr = START_BLOCK(sbi, segno);
1481 sum_entry = &sum->entries[0];
1482
1483 for (i = 0; i < last_offset; i++, sum_entry++) {
1484 if (f2fs_readpage(sbi, page, addr, READ_SYNC))
1485 goto out;
1486
1487 rn = (struct f2fs_node *)page_address(page);
1488 sum_entry->nid = rn->footer.nid;
1489 sum_entry->version = 0;
1490 sum_entry->ofs_in_node = 0;
1491 addr++;
1492
1493 /*
1494 * In order to read next node page,
1495 * we must clear PageUptodate flag.
1496 */
1497 ClearPageUptodate(page);
1498 }
1499out:
1500 unlock_page(page);
1501 __free_pages(page, 0);
1502 return 0;
1503}
1504
1505static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
1506{
1507 struct f2fs_nm_info *nm_i = NM_I(sbi);
1508 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1509 struct f2fs_summary_block *sum = curseg->sum_blk;
1510 int i;
1511
1512 mutex_lock(&curseg->curseg_mutex);
1513
1514 if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) {
1515 mutex_unlock(&curseg->curseg_mutex);
1516 return false;
1517 }
1518
1519 for (i = 0; i < nats_in_cursum(sum); i++) {
1520 struct nat_entry *ne;
1521 struct f2fs_nat_entry raw_ne;
1522 nid_t nid = le32_to_cpu(nid_in_journal(sum, i));
1523
1524 raw_ne = nat_in_journal(sum, i);
1525retry:
1526 write_lock(&nm_i->nat_tree_lock);
1527 ne = __lookup_nat_cache(nm_i, nid);
1528 if (ne) {
1529 __set_nat_cache_dirty(nm_i, ne);
1530 write_unlock(&nm_i->nat_tree_lock);
1531 continue;
1532 }
1533 ne = grab_nat_entry(nm_i, nid);
1534 if (!ne) {
1535 write_unlock(&nm_i->nat_tree_lock);
1536 goto retry;
1537 }
1538 nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr));
1539 nat_set_ino(ne, le32_to_cpu(raw_ne.ino));
1540 nat_set_version(ne, raw_ne.version);
1541 __set_nat_cache_dirty(nm_i, ne);
1542 write_unlock(&nm_i->nat_tree_lock);
1543 }
1544 update_nats_in_cursum(sum, -i);
1545 mutex_unlock(&curseg->curseg_mutex);
1546 return true;
1547}
1548
1549/*
1550 * This function is called during the checkpointing process.
1551 */
1552void flush_nat_entries(struct f2fs_sb_info *sbi)
1553{
1554 struct f2fs_nm_info *nm_i = NM_I(sbi);
1555 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1556 struct f2fs_summary_block *sum = curseg->sum_blk;
1557 struct list_head *cur, *n;
1558 struct page *page = NULL;
1559 struct f2fs_nat_block *nat_blk = NULL;
1560 nid_t start_nid = 0, end_nid = 0;
1561 bool flushed;
1562
1563 flushed = flush_nats_in_journal(sbi);
1564
1565 if (!flushed)
1566 mutex_lock(&curseg->curseg_mutex);
1567
1568 /* 1) flush dirty nat caches */
1569 list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) {
1570 struct nat_entry *ne;
1571 nid_t nid;
1572 struct f2fs_nat_entry raw_ne;
1573 int offset = -1;
1574 block_t old_blkaddr, new_blkaddr;
1575
1576 ne = list_entry(cur, struct nat_entry, list);
1577 nid = nat_get_nid(ne);
1578
1579 if (nat_get_blkaddr(ne) == NEW_ADDR)
1580 continue;
1581 if (flushed)
1582 goto to_nat_page;
1583
1584 /* if there is room for nat enries in curseg->sumpage */
1585 offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1);
1586 if (offset >= 0) {
1587 raw_ne = nat_in_journal(sum, offset);
1588 old_blkaddr = le32_to_cpu(raw_ne.block_addr);
1589 goto flush_now;
1590 }
1591to_nat_page:
1592 if (!page || (start_nid > nid || nid > end_nid)) {
1593 if (page) {
1594 f2fs_put_page(page, 1);
1595 page = NULL;
1596 }
1597 start_nid = START_NID(nid);
1598 end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1;
1599
1600 /*
1601 * get nat block with dirty flag, increased reference
1602 * count, mapped and lock
1603 */
1604 page = get_next_nat_page(sbi, start_nid);
1605 nat_blk = page_address(page);
1606 }
1607
1608 BUG_ON(!nat_blk);
1609 raw_ne = nat_blk->entries[nid - start_nid];
1610 old_blkaddr = le32_to_cpu(raw_ne.block_addr);
1611flush_now:
1612 new_blkaddr = nat_get_blkaddr(ne);
1613
1614 raw_ne.ino = cpu_to_le32(nat_get_ino(ne));
1615 raw_ne.block_addr = cpu_to_le32(new_blkaddr);
1616 raw_ne.version = nat_get_version(ne);
1617
1618 if (offset < 0) {
1619 nat_blk->entries[nid - start_nid] = raw_ne;
1620 } else {
1621 nat_in_journal(sum, offset) = raw_ne;
1622 nid_in_journal(sum, offset) = cpu_to_le32(nid);
1623 }
1624
1625 if (nat_get_blkaddr(ne) == NULL_ADDR) {
1626 write_lock(&nm_i->nat_tree_lock);
1627 __del_from_nat_cache(nm_i, ne);
1628 write_unlock(&nm_i->nat_tree_lock);
1629
1630 /* We can reuse this freed nid at this point */
1631 add_free_nid(NM_I(sbi), nid);
1632 } else {
1633 write_lock(&nm_i->nat_tree_lock);
1634 __clear_nat_cache_dirty(nm_i, ne);
1635 ne->checkpointed = true;
1636 write_unlock(&nm_i->nat_tree_lock);
1637 }
1638 }
1639 if (!flushed)
1640 mutex_unlock(&curseg->curseg_mutex);
1641 f2fs_put_page(page, 1);
1642
1643 /* 2) shrink nat caches if necessary */
1644 try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD);
1645}
1646
1647static int init_node_manager(struct f2fs_sb_info *sbi)
1648{
1649 struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
1650 struct f2fs_nm_info *nm_i = NM_I(sbi);
1651 unsigned char *version_bitmap;
1652 unsigned int nat_segs, nat_blocks;
1653
1654 nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
1655
1656 /* segment_count_nat includes pair segment so divide to 2. */
1657 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
1658 nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
1659 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
1660 nm_i->fcnt = 0;
1661 nm_i->nat_cnt = 0;
1662
1663 INIT_LIST_HEAD(&nm_i->free_nid_list);
1664 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
1665 INIT_LIST_HEAD(&nm_i->nat_entries);
1666 INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
1667
1668 mutex_init(&nm_i->build_lock);
1669 spin_lock_init(&nm_i->free_nid_list_lock);
1670 rwlock_init(&nm_i->nat_tree_lock);
1671
1672 nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
1673 nm_i->init_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
1674 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
1675
1676 nm_i->nat_bitmap = kzalloc(nm_i->bitmap_size, GFP_KERNEL);
1677 if (!nm_i->nat_bitmap)
1678 return -ENOMEM;
1679 version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
1680 if (!version_bitmap)
1681 return -EFAULT;
1682
1683 /* copy version bitmap */
1684 memcpy(nm_i->nat_bitmap, version_bitmap, nm_i->bitmap_size);
1685 return 0;
1686}
1687
1688int build_node_manager(struct f2fs_sb_info *sbi)
1689{
1690 int err;
1691
1692 sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL);
1693 if (!sbi->nm_info)
1694 return -ENOMEM;
1695
1696 err = init_node_manager(sbi);
1697 if (err)
1698 return err;
1699
1700 build_free_nids(sbi);
1701 return 0;
1702}
1703
1704void destroy_node_manager(struct f2fs_sb_info *sbi)
1705{
1706 struct f2fs_nm_info *nm_i = NM_I(sbi);
1707 struct free_nid *i, *next_i;
1708 struct nat_entry *natvec[NATVEC_SIZE];
1709 nid_t nid = 0;
1710 unsigned int found;
1711
1712 if (!nm_i)
1713 return;
1714
1715 /* destroy free nid list */
1716 spin_lock(&nm_i->free_nid_list_lock);
1717 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
1718 BUG_ON(i->state == NID_ALLOC);
1719 __del_from_free_nid_list(i);
1720 nm_i->fcnt--;
1721 }
1722 BUG_ON(nm_i->fcnt);
1723 spin_unlock(&nm_i->free_nid_list_lock);
1724
1725 /* destroy nat cache */
1726 write_lock(&nm_i->nat_tree_lock);
1727 while ((found = __gang_lookup_nat_cache(nm_i,
1728 nid, NATVEC_SIZE, natvec))) {
1729 unsigned idx;
1730 for (idx = 0; idx < found; idx++) {
1731 struct nat_entry *e = natvec[idx];
1732 nid = nat_get_nid(e) + 1;
1733 __del_from_nat_cache(nm_i, e);
1734 }
1735 }
1736 BUG_ON(nm_i->nat_cnt);
1737 write_unlock(&nm_i->nat_tree_lock);
1738
1739 kfree(nm_i->nat_bitmap);
1740 sbi->nm_info = NULL;
1741 kfree(nm_i);
1742}
1743
1744int create_node_manager_caches(void)
1745{
1746 nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
1747 sizeof(struct nat_entry), NULL);
1748 if (!nat_entry_slab)
1749 return -ENOMEM;
1750
1751 free_nid_slab = f2fs_kmem_cache_create("free_nid",
1752 sizeof(struct free_nid), NULL);
1753 if (!free_nid_slab) {
1754 kmem_cache_destroy(nat_entry_slab);
1755 return -ENOMEM;
1756 }
1757 return 0;
1758}
1759
1760void destroy_node_manager_caches(void)
1761{
1762 kmem_cache_destroy(free_nid_slab);
1763 kmem_cache_destroy(nat_entry_slab);
1764}
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
new file mode 100644
index 000000000000..afdb130f782e
--- /dev/null
+++ b/fs/f2fs/node.h
@@ -0,0 +1,353 @@
1/*
2 * fs/f2fs/node.h
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11/* start node id of a node block dedicated to the given node id */
12#define START_NID(nid) ((nid / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK)
13
14/* node block offset on the NAT area dedicated to the given start node id */
15#define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK)
16
17/* # of pages to perform readahead before building free nids */
18#define FREE_NID_PAGES 4
19
20/* maximum # of free node ids to produce during build_free_nids */
21#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES)
22
23/* maximum readahead size for node during getting data blocks */
24#define MAX_RA_NODE 128
25
26/* maximum cached nat entries to manage memory footprint */
27#define NM_WOUT_THRESHOLD (64 * NAT_ENTRY_PER_BLOCK)
28
29/* vector size for gang look-up from nat cache that consists of radix tree */
30#define NATVEC_SIZE 64
31
32/*
33 * For node information
34 */
35struct node_info {
36 nid_t nid; /* node id */
37 nid_t ino; /* inode number of the node's owner */
38 block_t blk_addr; /* block address of the node */
39 unsigned char version; /* version of the node */
40};
41
42struct nat_entry {
43 struct list_head list; /* for clean or dirty nat list */
44 bool checkpointed; /* whether it is checkpointed or not */
45 struct node_info ni; /* in-memory node information */
46};
47
48#define nat_get_nid(nat) (nat->ni.nid)
49#define nat_set_nid(nat, n) (nat->ni.nid = n)
50#define nat_get_blkaddr(nat) (nat->ni.blk_addr)
51#define nat_set_blkaddr(nat, b) (nat->ni.blk_addr = b)
52#define nat_get_ino(nat) (nat->ni.ino)
53#define nat_set_ino(nat, i) (nat->ni.ino = i)
54#define nat_get_version(nat) (nat->ni.version)
55#define nat_set_version(nat, v) (nat->ni.version = v)
56
57#define __set_nat_cache_dirty(nm_i, ne) \
58 list_move_tail(&ne->list, &nm_i->dirty_nat_entries);
59#define __clear_nat_cache_dirty(nm_i, ne) \
60 list_move_tail(&ne->list, &nm_i->nat_entries);
61#define inc_node_version(version) (++version)
62
63static inline void node_info_from_raw_nat(struct node_info *ni,
64 struct f2fs_nat_entry *raw_ne)
65{
66 ni->ino = le32_to_cpu(raw_ne->ino);
67 ni->blk_addr = le32_to_cpu(raw_ne->block_addr);
68 ni->version = raw_ne->version;
69}
70
71/*
72 * For free nid mangement
73 */
74enum nid_state {
75 NID_NEW, /* newly added to free nid list */
76 NID_ALLOC /* it is allocated */
77};
78
79struct free_nid {
80 struct list_head list; /* for free node id list */
81 nid_t nid; /* node id */
82 int state; /* in use or not: NID_NEW or NID_ALLOC */
83};
84
85static inline int next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
86{
87 struct f2fs_nm_info *nm_i = NM_I(sbi);
88 struct free_nid *fnid;
89
90 if (nm_i->fcnt <= 0)
91 return -1;
92 spin_lock(&nm_i->free_nid_list_lock);
93 fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list);
94 *nid = fnid->nid;
95 spin_unlock(&nm_i->free_nid_list_lock);
96 return 0;
97}
98
99/*
100 * inline functions
101 */
102static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr)
103{
104 struct f2fs_nm_info *nm_i = NM_I(sbi);
105 memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size);
106}
107
108static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start)
109{
110 struct f2fs_nm_info *nm_i = NM_I(sbi);
111 pgoff_t block_off;
112 pgoff_t block_addr;
113 int seg_off;
114
115 block_off = NAT_BLOCK_OFFSET(start);
116 seg_off = block_off >> sbi->log_blocks_per_seg;
117
118 block_addr = (pgoff_t)(nm_i->nat_blkaddr +
119 (seg_off << sbi->log_blocks_per_seg << 1) +
120 (block_off & ((1 << sbi->log_blocks_per_seg) - 1)));
121
122 if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
123 block_addr += sbi->blocks_per_seg;
124
125 return block_addr;
126}
127
128static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi,
129 pgoff_t block_addr)
130{
131 struct f2fs_nm_info *nm_i = NM_I(sbi);
132
133 block_addr -= nm_i->nat_blkaddr;
134 if ((block_addr >> sbi->log_blocks_per_seg) % 2)
135 block_addr -= sbi->blocks_per_seg;
136 else
137 block_addr += sbi->blocks_per_seg;
138
139 return block_addr + nm_i->nat_blkaddr;
140}
141
142static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
143{
144 unsigned int block_off = NAT_BLOCK_OFFSET(start_nid);
145
146 if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
147 f2fs_clear_bit(block_off, nm_i->nat_bitmap);
148 else
149 f2fs_set_bit(block_off, nm_i->nat_bitmap);
150}
151
152static inline void fill_node_footer(struct page *page, nid_t nid,
153 nid_t ino, unsigned int ofs, bool reset)
154{
155 void *kaddr = page_address(page);
156 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
157 if (reset)
158 memset(rn, 0, sizeof(*rn));
159 rn->footer.nid = cpu_to_le32(nid);
160 rn->footer.ino = cpu_to_le32(ino);
161 rn->footer.flag = cpu_to_le32(ofs << OFFSET_BIT_SHIFT);
162}
163
164static inline void copy_node_footer(struct page *dst, struct page *src)
165{
166 void *src_addr = page_address(src);
167 void *dst_addr = page_address(dst);
168 struct f2fs_node *src_rn = (struct f2fs_node *)src_addr;
169 struct f2fs_node *dst_rn = (struct f2fs_node *)dst_addr;
170 memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer));
171}
172
173static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
174{
175 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
176 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
177 void *kaddr = page_address(page);
178 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
179 rn->footer.cp_ver = ckpt->checkpoint_ver;
180 rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
181}
182
183static inline nid_t ino_of_node(struct page *node_page)
184{
185 void *kaddr = page_address(node_page);
186 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
187 return le32_to_cpu(rn->footer.ino);
188}
189
190static inline nid_t nid_of_node(struct page *node_page)
191{
192 void *kaddr = page_address(node_page);
193 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
194 return le32_to_cpu(rn->footer.nid);
195}
196
197static inline unsigned int ofs_of_node(struct page *node_page)
198{
199 void *kaddr = page_address(node_page);
200 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
201 unsigned flag = le32_to_cpu(rn->footer.flag);
202 return flag >> OFFSET_BIT_SHIFT;
203}
204
205static inline unsigned long long cpver_of_node(struct page *node_page)
206{
207 void *kaddr = page_address(node_page);
208 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
209 return le64_to_cpu(rn->footer.cp_ver);
210}
211
212static inline block_t next_blkaddr_of_node(struct page *node_page)
213{
214 void *kaddr = page_address(node_page);
215 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
216 return le32_to_cpu(rn->footer.next_blkaddr);
217}
218
219/*
220 * f2fs assigns the following node offsets described as (num).
221 * N = NIDS_PER_BLOCK
222 *
223 * Inode block (0)
224 * |- direct node (1)
225 * |- direct node (2)
226 * |- indirect node (3)
227 * | `- direct node (4 => 4 + N - 1)
228 * |- indirect node (4 + N)
229 * | `- direct node (5 + N => 5 + 2N - 1)
230 * `- double indirect node (5 + 2N)
231 * `- indirect node (6 + 2N)
232 * `- direct node (x(N + 1))
233 */
234static inline bool IS_DNODE(struct page *node_page)
235{
236 unsigned int ofs = ofs_of_node(node_page);
237 if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
238 ofs == 5 + 2 * NIDS_PER_BLOCK)
239 return false;
240 if (ofs >= 6 + 2 * NIDS_PER_BLOCK) {
241 ofs -= 6 + 2 * NIDS_PER_BLOCK;
242 if ((long int)ofs % (NIDS_PER_BLOCK + 1))
243 return false;
244 }
245 return true;
246}
247
248static inline void set_nid(struct page *p, int off, nid_t nid, bool i)
249{
250 struct f2fs_node *rn = (struct f2fs_node *)page_address(p);
251
252 wait_on_page_writeback(p);
253
254 if (i)
255 rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
256 else
257 rn->in.nid[off] = cpu_to_le32(nid);
258 set_page_dirty(p);
259}
260
261static inline nid_t get_nid(struct page *p, int off, bool i)
262{
263 struct f2fs_node *rn = (struct f2fs_node *)page_address(p);
264 if (i)
265 return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]);
266 return le32_to_cpu(rn->in.nid[off]);
267}
268
269/*
270 * Coldness identification:
271 * - Mark cold files in f2fs_inode_info
272 * - Mark cold node blocks in their node footer
273 * - Mark cold data pages in page cache
274 */
275static inline int is_cold_file(struct inode *inode)
276{
277 return F2FS_I(inode)->i_advise & FADVISE_COLD_BIT;
278}
279
280static inline int is_cold_data(struct page *page)
281{
282 return PageChecked(page);
283}
284
285static inline void set_cold_data(struct page *page)
286{
287 SetPageChecked(page);
288}
289
290static inline void clear_cold_data(struct page *page)
291{
292 ClearPageChecked(page);
293}
294
295static inline int is_cold_node(struct page *page)
296{
297 void *kaddr = page_address(page);
298 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
299 unsigned int flag = le32_to_cpu(rn->footer.flag);
300 return flag & (0x1 << COLD_BIT_SHIFT);
301}
302
303static inline unsigned char is_fsync_dnode(struct page *page)
304{
305 void *kaddr = page_address(page);
306 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
307 unsigned int flag = le32_to_cpu(rn->footer.flag);
308 return flag & (0x1 << FSYNC_BIT_SHIFT);
309}
310
311static inline unsigned char is_dent_dnode(struct page *page)
312{
313 void *kaddr = page_address(page);
314 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
315 unsigned int flag = le32_to_cpu(rn->footer.flag);
316 return flag & (0x1 << DENT_BIT_SHIFT);
317}
318
319static inline void set_cold_node(struct inode *inode, struct page *page)
320{
321 struct f2fs_node *rn = (struct f2fs_node *)page_address(page);
322 unsigned int flag = le32_to_cpu(rn->footer.flag);
323
324 if (S_ISDIR(inode->i_mode))
325 flag &= ~(0x1 << COLD_BIT_SHIFT);
326 else
327 flag |= (0x1 << COLD_BIT_SHIFT);
328 rn->footer.flag = cpu_to_le32(flag);
329}
330
331static inline void set_fsync_mark(struct page *page, int mark)
332{
333 void *kaddr = page_address(page);
334 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
335 unsigned int flag = le32_to_cpu(rn->footer.flag);
336 if (mark)
337 flag |= (0x1 << FSYNC_BIT_SHIFT);
338 else
339 flag &= ~(0x1 << FSYNC_BIT_SHIFT);
340 rn->footer.flag = cpu_to_le32(flag);
341}
342
343static inline void set_dentry_mark(struct page *page, int mark)
344{
345 void *kaddr = page_address(page);
346 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
347 unsigned int flag = le32_to_cpu(rn->footer.flag);
348 if (mark)
349 flag |= (0x1 << DENT_BIT_SHIFT);
350 else
351 flag &= ~(0x1 << DENT_BIT_SHIFT);
352 rn->footer.flag = cpu_to_le32(flag);
353}
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
new file mode 100644
index 000000000000..b07e9b6ef376
--- /dev/null
+++ b/fs/f2fs/recovery.c
@@ -0,0 +1,375 @@
1/*
2 * fs/f2fs/recovery.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include "f2fs.h"
14#include "node.h"
15#include "segment.h"
16
17static struct kmem_cache *fsync_entry_slab;
18
19bool space_for_roll_forward(struct f2fs_sb_info *sbi)
20{
21 if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
22 > sbi->user_block_count)
23 return false;
24 return true;
25}
26
27static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
28 nid_t ino)
29{
30 struct list_head *this;
31 struct fsync_inode_entry *entry;
32
33 list_for_each(this, head) {
34 entry = list_entry(this, struct fsync_inode_entry, list);
35 if (entry->inode->i_ino == ino)
36 return entry;
37 }
38 return NULL;
39}
40
41static int recover_dentry(struct page *ipage, struct inode *inode)
42{
43 struct f2fs_node *raw_node = (struct f2fs_node *)kmap(ipage);
44 struct f2fs_inode *raw_inode = &(raw_node->i);
45 struct dentry dent, parent;
46 struct f2fs_dir_entry *de;
47 struct page *page;
48 struct inode *dir;
49 int err = 0;
50
51 if (!is_dent_dnode(ipage))
52 goto out;
53
54 dir = f2fs_iget(inode->i_sb, le32_to_cpu(raw_inode->i_pino));
55 if (IS_ERR(dir)) {
56 err = -EINVAL;
57 goto out;
58 }
59
60 parent.d_inode = dir;
61 dent.d_parent = &parent;
62 dent.d_name.len = le32_to_cpu(raw_inode->i_namelen);
63 dent.d_name.name = raw_inode->i_name;
64
65 de = f2fs_find_entry(dir, &dent.d_name, &page);
66 if (de) {
67 kunmap(page);
68 f2fs_put_page(page, 0);
69 } else {
70 f2fs_add_link(&dent, inode);
71 }
72 iput(dir);
73out:
74 kunmap(ipage);
75 return err;
76}
77
78static int recover_inode(struct inode *inode, struct page *node_page)
79{
80 void *kaddr = page_address(node_page);
81 struct f2fs_node *raw_node = (struct f2fs_node *)kaddr;
82 struct f2fs_inode *raw_inode = &(raw_node->i);
83
84 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
85 i_size_write(inode, le64_to_cpu(raw_inode->i_size));
86 inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
87 inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
88 inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
89 inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
90 inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
91 inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
92
93 return recover_dentry(node_page, inode);
94}
95
96static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
97{
98 unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver);
99 struct curseg_info *curseg;
100 struct page *page;
101 block_t blkaddr;
102 int err = 0;
103
104 /* get node pages in the current segment */
105 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
106 blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff;
107
108 /* read node page */
109 page = alloc_page(GFP_F2FS_ZERO);
110 if (IS_ERR(page))
111 return PTR_ERR(page);
112 lock_page(page);
113
114 while (1) {
115 struct fsync_inode_entry *entry;
116
117 if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC))
118 goto out;
119
120 if (cp_ver != cpver_of_node(page))
121 goto out;
122
123 if (!is_fsync_dnode(page))
124 goto next;
125
126 entry = get_fsync_inode(head, ino_of_node(page));
127 if (entry) {
128 entry->blkaddr = blkaddr;
129 if (IS_INODE(page) && is_dent_dnode(page))
130 set_inode_flag(F2FS_I(entry->inode),
131 FI_INC_LINK);
132 } else {
133 if (IS_INODE(page) && is_dent_dnode(page)) {
134 if (recover_inode_page(sbi, page)) {
135 err = -ENOMEM;
136 goto out;
137 }
138 }
139
140 /* add this fsync inode to the list */
141 entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS);
142 if (!entry) {
143 err = -ENOMEM;
144 goto out;
145 }
146
147 INIT_LIST_HEAD(&entry->list);
148 list_add_tail(&entry->list, head);
149
150 entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
151 if (IS_ERR(entry->inode)) {
152 err = PTR_ERR(entry->inode);
153 goto out;
154 }
155 entry->blkaddr = blkaddr;
156 }
157 if (IS_INODE(page)) {
158 err = recover_inode(entry->inode, page);
159 if (err)
160 goto out;
161 }
162next:
163 /* check next segment */
164 blkaddr = next_blkaddr_of_node(page);
165 ClearPageUptodate(page);
166 }
167out:
168 unlock_page(page);
169 __free_pages(page, 0);
170 return err;
171}
172
173static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi,
174 struct list_head *head)
175{
176 struct list_head *this;
177 struct fsync_inode_entry *entry;
178 list_for_each(this, head) {
179 entry = list_entry(this, struct fsync_inode_entry, list);
180 iput(entry->inode);
181 list_del(&entry->list);
182 kmem_cache_free(fsync_entry_slab, entry);
183 }
184}
185
186static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
187 block_t blkaddr)
188{
189 struct seg_entry *sentry;
190 unsigned int segno = GET_SEGNO(sbi, blkaddr);
191 unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) &
192 (sbi->blocks_per_seg - 1);
193 struct f2fs_summary sum;
194 nid_t ino;
195 void *kaddr;
196 struct inode *inode;
197 struct page *node_page;
198 block_t bidx;
199 int i;
200
201 sentry = get_seg_entry(sbi, segno);
202 if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
203 return;
204
205 /* Get the previous summary */
206 for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
207 struct curseg_info *curseg = CURSEG_I(sbi, i);
208 if (curseg->segno == segno) {
209 sum = curseg->sum_blk->entries[blkoff];
210 break;
211 }
212 }
213 if (i > CURSEG_COLD_DATA) {
214 struct page *sum_page = get_sum_page(sbi, segno);
215 struct f2fs_summary_block *sum_node;
216 kaddr = page_address(sum_page);
217 sum_node = (struct f2fs_summary_block *)kaddr;
218 sum = sum_node->entries[blkoff];
219 f2fs_put_page(sum_page, 1);
220 }
221
222 /* Get the node page */
223 node_page = get_node_page(sbi, le32_to_cpu(sum.nid));
224 bidx = start_bidx_of_node(ofs_of_node(node_page)) +
225 le16_to_cpu(sum.ofs_in_node);
226 ino = ino_of_node(node_page);
227 f2fs_put_page(node_page, 1);
228
229 /* Deallocate previous index in the node page */
230 inode = f2fs_iget_nowait(sbi->sb, ino);
231 truncate_hole(inode, bidx, bidx + 1);
232 iput(inode);
233}
234
235static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
236 struct page *page, block_t blkaddr)
237{
238 unsigned int start, end;
239 struct dnode_of_data dn;
240 struct f2fs_summary sum;
241 struct node_info ni;
242
243 start = start_bidx_of_node(ofs_of_node(page));
244 if (IS_INODE(page))
245 end = start + ADDRS_PER_INODE;
246 else
247 end = start + ADDRS_PER_BLOCK;
248
249 set_new_dnode(&dn, inode, NULL, NULL, 0);
250 if (get_dnode_of_data(&dn, start, 0))
251 return;
252
253 wait_on_page_writeback(dn.node_page);
254
255 get_node_info(sbi, dn.nid, &ni);
256 BUG_ON(ni.ino != ino_of_node(page));
257 BUG_ON(ofs_of_node(dn.node_page) != ofs_of_node(page));
258
259 for (; start < end; start++) {
260 block_t src, dest;
261
262 src = datablock_addr(dn.node_page, dn.ofs_in_node);
263 dest = datablock_addr(page, dn.ofs_in_node);
264
265 if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) {
266 if (src == NULL_ADDR) {
267 int err = reserve_new_block(&dn);
268 /* We should not get -ENOSPC */
269 BUG_ON(err);
270 }
271
272 /* Check the previous node page having this index */
273 check_index_in_prev_nodes(sbi, dest);
274
275 set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
276
277 /* write dummy data page */
278 recover_data_page(sbi, NULL, &sum, src, dest);
279 update_extent_cache(dest, &dn);
280 }
281 dn.ofs_in_node++;
282 }
283
284 /* write node page in place */
285 set_summary(&sum, dn.nid, 0, 0);
286 if (IS_INODE(dn.node_page))
287 sync_inode_page(&dn);
288
289 copy_node_footer(dn.node_page, page);
290 fill_node_footer(dn.node_page, dn.nid, ni.ino,
291 ofs_of_node(page), false);
292 set_page_dirty(dn.node_page);
293
294 recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr);
295 f2fs_put_dnode(&dn);
296}
297
298static void recover_data(struct f2fs_sb_info *sbi,
299 struct list_head *head, int type)
300{
301 unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver);
302 struct curseg_info *curseg;
303 struct page *page;
304 block_t blkaddr;
305
306 /* get node pages in the current segment */
307 curseg = CURSEG_I(sbi, type);
308 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
309
310 /* read node page */
311 page = alloc_page(GFP_NOFS | __GFP_ZERO);
312 if (IS_ERR(page))
313 return;
314 lock_page(page);
315
316 while (1) {
317 struct fsync_inode_entry *entry;
318
319 if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC))
320 goto out;
321
322 if (cp_ver != cpver_of_node(page))
323 goto out;
324
325 entry = get_fsync_inode(head, ino_of_node(page));
326 if (!entry)
327 goto next;
328
329 do_recover_data(sbi, entry->inode, page, blkaddr);
330
331 if (entry->blkaddr == blkaddr) {
332 iput(entry->inode);
333 list_del(&entry->list);
334 kmem_cache_free(fsync_entry_slab, entry);
335 }
336next:
337 /* check next segment */
338 blkaddr = next_blkaddr_of_node(page);
339 ClearPageUptodate(page);
340 }
341out:
342 unlock_page(page);
343 __free_pages(page, 0);
344
345 allocate_new_segments(sbi);
346}
347
348void recover_fsync_data(struct f2fs_sb_info *sbi)
349{
350 struct list_head inode_list;
351
352 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
353 sizeof(struct fsync_inode_entry), NULL);
354 if (unlikely(!fsync_entry_slab))
355 return;
356
357 INIT_LIST_HEAD(&inode_list);
358
359 /* step #1: find fsynced inode numbers */
360 if (find_fsync_dnodes(sbi, &inode_list))
361 goto out;
362
363 if (list_empty(&inode_list))
364 goto out;
365
366 /* step #2: recover data */
367 sbi->por_doing = 1;
368 recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
369 sbi->por_doing = 0;
370 BUG_ON(!list_empty(&inode_list));
371out:
372 destroy_fsync_dnodes(sbi, &inode_list);
373 kmem_cache_destroy(fsync_entry_slab);
374 write_checkpoint(sbi, false, false);
375}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
new file mode 100644
index 000000000000..1b26e4ea1016
--- /dev/null
+++ b/fs/f2fs/segment.c
@@ -0,0 +1,1791 @@
1/*
2 * fs/f2fs/segment.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/bio.h>
14#include <linux/blkdev.h>
15#include <linux/vmalloc.h>
16
17#include "f2fs.h"
18#include "segment.h"
19#include "node.h"
20
21static int need_to_flush(struct f2fs_sb_info *sbi)
22{
23 unsigned int pages_per_sec = (1 << sbi->log_blocks_per_seg) *
24 sbi->segs_per_sec;
25 int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1)
26 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
27 int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1)
28 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
29
30 if (sbi->por_doing)
31 return 0;
32
33 if (free_sections(sbi) <= (node_secs + 2 * dent_secs +
34 reserved_sections(sbi)))
35 return 1;
36 return 0;
37}
38
39/*
40 * This function balances dirty node and dentry pages.
41 * In addition, it controls garbage collection.
42 */
43void f2fs_balance_fs(struct f2fs_sb_info *sbi)
44{
45 struct writeback_control wbc = {
46 .sync_mode = WB_SYNC_ALL,
47 .nr_to_write = LONG_MAX,
48 .for_reclaim = 0,
49 };
50
51 if (sbi->por_doing)
52 return;
53
54 /*
55 * We should do checkpoint when there are so many dirty node pages
56 * with enough free segments. After then, we should do GC.
57 */
58 if (need_to_flush(sbi)) {
59 sync_dirty_dir_inodes(sbi);
60 sync_node_pages(sbi, 0, &wbc);
61 }
62
63 if (has_not_enough_free_secs(sbi)) {
64 mutex_lock(&sbi->gc_mutex);
65 f2fs_gc(sbi, 1);
66 }
67}
68
69static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
70 enum dirty_type dirty_type)
71{
72 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
73
74 /* need not be added */
75 if (IS_CURSEG(sbi, segno))
76 return;
77
78 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
79 dirty_i->nr_dirty[dirty_type]++;
80
81 if (dirty_type == DIRTY) {
82 struct seg_entry *sentry = get_seg_entry(sbi, segno);
83 dirty_type = sentry->type;
84 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
85 dirty_i->nr_dirty[dirty_type]++;
86 }
87}
88
89static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
90 enum dirty_type dirty_type)
91{
92 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
93
94 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
95 dirty_i->nr_dirty[dirty_type]--;
96
97 if (dirty_type == DIRTY) {
98 struct seg_entry *sentry = get_seg_entry(sbi, segno);
99 dirty_type = sentry->type;
100 if (test_and_clear_bit(segno,
101 dirty_i->dirty_segmap[dirty_type]))
102 dirty_i->nr_dirty[dirty_type]--;
103 clear_bit(segno, dirty_i->victim_segmap[FG_GC]);
104 clear_bit(segno, dirty_i->victim_segmap[BG_GC]);
105 }
106}
107
108/*
109 * Should not occur error such as -ENOMEM.
110 * Adding dirty entry into seglist is not critical operation.
111 * If a given segment is one of current working segments, it won't be added.
112 */
113void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
114{
115 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
116 unsigned short valid_blocks;
117
118 if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
119 return;
120
121 mutex_lock(&dirty_i->seglist_lock);
122
123 valid_blocks = get_valid_blocks(sbi, segno, 0);
124
125 if (valid_blocks == 0) {
126 __locate_dirty_segment(sbi, segno, PRE);
127 __remove_dirty_segment(sbi, segno, DIRTY);
128 } else if (valid_blocks < sbi->blocks_per_seg) {
129 __locate_dirty_segment(sbi, segno, DIRTY);
130 } else {
131 /* Recovery routine with SSR needs this */
132 __remove_dirty_segment(sbi, segno, DIRTY);
133 }
134
135 mutex_unlock(&dirty_i->seglist_lock);
136 return;
137}
138
139/*
140 * Should call clear_prefree_segments after checkpoint is done.
141 */
142static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
143{
144 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
145 unsigned int segno, offset = 0;
146 unsigned int total_segs = TOTAL_SEGS(sbi);
147
148 mutex_lock(&dirty_i->seglist_lock);
149 while (1) {
150 segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
151 offset);
152 if (segno >= total_segs)
153 break;
154 __set_test_and_free(sbi, segno);
155 offset = segno + 1;
156 }
157 mutex_unlock(&dirty_i->seglist_lock);
158}
159
160void clear_prefree_segments(struct f2fs_sb_info *sbi)
161{
162 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
163 unsigned int segno, offset = 0;
164 unsigned int total_segs = TOTAL_SEGS(sbi);
165
166 mutex_lock(&dirty_i->seglist_lock);
167 while (1) {
168 segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
169 offset);
170 if (segno >= total_segs)
171 break;
172
173 offset = segno + 1;
174 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE]))
175 dirty_i->nr_dirty[PRE]--;
176
177 /* Let's use trim */
178 if (test_opt(sbi, DISCARD))
179 blkdev_issue_discard(sbi->sb->s_bdev,
180 START_BLOCK(sbi, segno) <<
181 sbi->log_sectors_per_block,
182 1 << (sbi->log_sectors_per_block +
183 sbi->log_blocks_per_seg),
184 GFP_NOFS, 0);
185 }
186 mutex_unlock(&dirty_i->seglist_lock);
187}
188
189static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
190{
191 struct sit_info *sit_i = SIT_I(sbi);
192 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap))
193 sit_i->dirty_sentries++;
194}
195
196static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
197 unsigned int segno, int modified)
198{
199 struct seg_entry *se = get_seg_entry(sbi, segno);
200 se->type = type;
201 if (modified)
202 __mark_sit_entry_dirty(sbi, segno);
203}
204
205static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
206{
207 struct seg_entry *se;
208 unsigned int segno, offset;
209 long int new_vblocks;
210
211 segno = GET_SEGNO(sbi, blkaddr);
212
213 se = get_seg_entry(sbi, segno);
214 new_vblocks = se->valid_blocks + del;
215 offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1);
216
217 BUG_ON((new_vblocks >> (sizeof(unsigned short) << 3) ||
218 (new_vblocks > sbi->blocks_per_seg)));
219
220 se->valid_blocks = new_vblocks;
221 se->mtime = get_mtime(sbi);
222 SIT_I(sbi)->max_mtime = se->mtime;
223
224 /* Update valid block bitmap */
225 if (del > 0) {
226 if (f2fs_set_bit(offset, se->cur_valid_map))
227 BUG();
228 } else {
229 if (!f2fs_clear_bit(offset, se->cur_valid_map))
230 BUG();
231 }
232 if (!f2fs_test_bit(offset, se->ckpt_valid_map))
233 se->ckpt_valid_blocks += del;
234
235 __mark_sit_entry_dirty(sbi, segno);
236
237 /* update total number of valid blocks to be written in ckpt area */
238 SIT_I(sbi)->written_valid_blocks += del;
239
240 if (sbi->segs_per_sec > 1)
241 get_sec_entry(sbi, segno)->valid_blocks += del;
242}
243
244static void refresh_sit_entry(struct f2fs_sb_info *sbi,
245 block_t old_blkaddr, block_t new_blkaddr)
246{
247 update_sit_entry(sbi, new_blkaddr, 1);
248 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
249 update_sit_entry(sbi, old_blkaddr, -1);
250}
251
252void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
253{
254 unsigned int segno = GET_SEGNO(sbi, addr);
255 struct sit_info *sit_i = SIT_I(sbi);
256
257 BUG_ON(addr == NULL_ADDR);
258 if (addr == NEW_ADDR)
259 return;
260
261 /* add it into sit main buffer */
262 mutex_lock(&sit_i->sentry_lock);
263
264 update_sit_entry(sbi, addr, -1);
265
266 /* add it into dirty seglist */
267 locate_dirty_segment(sbi, segno);
268
269 mutex_unlock(&sit_i->sentry_lock);
270}
271
272/*
273 * This function should be resided under the curseg_mutex lock
274 */
275static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
276 struct f2fs_summary *sum, unsigned short offset)
277{
278 struct curseg_info *curseg = CURSEG_I(sbi, type);
279 void *addr = curseg->sum_blk;
280 addr += offset * sizeof(struct f2fs_summary);
281 memcpy(addr, sum, sizeof(struct f2fs_summary));
282 return;
283}
284
285/*
286 * Calculate the number of current summary pages for writing
287 */
288int npages_for_summary_flush(struct f2fs_sb_info *sbi)
289{
290 int total_size_bytes = 0;
291 int valid_sum_count = 0;
292 int i, sum_space;
293
294 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
295 if (sbi->ckpt->alloc_type[i] == SSR)
296 valid_sum_count += sbi->blocks_per_seg;
297 else
298 valid_sum_count += curseg_blkoff(sbi, i);
299 }
300
301 total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1)
302 + sizeof(struct nat_journal) + 2
303 + sizeof(struct sit_journal) + 2;
304 sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE;
305 if (total_size_bytes < sum_space)
306 return 1;
307 else if (total_size_bytes < 2 * sum_space)
308 return 2;
309 return 3;
310}
311
312/*
313 * Caller should put this summary page
314 */
315struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
316{
317 return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
318}
319
320static void write_sum_page(struct f2fs_sb_info *sbi,
321 struct f2fs_summary_block *sum_blk, block_t blk_addr)
322{
323 struct page *page = grab_meta_page(sbi, blk_addr);
324 void *kaddr = page_address(page);
325 memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE);
326 set_page_dirty(page);
327 f2fs_put_page(page, 1);
328}
329
330static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi,
331 int ofs_unit, int type)
332{
333 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
334 unsigned long *prefree_segmap = dirty_i->dirty_segmap[PRE];
335 unsigned int segno, next_segno, i;
336 int ofs = 0;
337
338 /*
339 * If there is not enough reserved sections,
340 * we should not reuse prefree segments.
341 */
342 if (has_not_enough_free_secs(sbi))
343 return NULL_SEGNO;
344
345 /*
346 * NODE page should not reuse prefree segment,
347 * since those information is used for SPOR.
348 */
349 if (IS_NODESEG(type))
350 return NULL_SEGNO;
351next:
352 segno = find_next_bit(prefree_segmap, TOTAL_SEGS(sbi), ofs++);
353 ofs = ((segno / ofs_unit) * ofs_unit) + ofs_unit;
354 if (segno < TOTAL_SEGS(sbi)) {
355 /* skip intermediate segments in a section */
356 if (segno % ofs_unit)
357 goto next;
358
359 /* skip if whole section is not prefree */
360 next_segno = find_next_zero_bit(prefree_segmap,
361 TOTAL_SEGS(sbi), segno + 1);
362 if (next_segno - segno < ofs_unit)
363 goto next;
364
365 /* skip if whole section was not free at the last checkpoint */
366 for (i = 0; i < ofs_unit; i++)
367 if (get_seg_entry(sbi, segno)->ckpt_valid_blocks)
368 goto next;
369 return segno;
370 }
371 return NULL_SEGNO;
372}
373
374/*
375 * Find a new segment from the free segments bitmap to right order
376 * This function should be returned with success, otherwise BUG
377 */
378static void get_new_segment(struct f2fs_sb_info *sbi,
379 unsigned int *newseg, bool new_sec, int dir)
380{
381 struct free_segmap_info *free_i = FREE_I(sbi);
382 unsigned int total_secs = sbi->total_sections;
383 unsigned int segno, secno, zoneno;
384 unsigned int total_zones = sbi->total_sections / sbi->secs_per_zone;
385 unsigned int hint = *newseg / sbi->segs_per_sec;
386 unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
387 unsigned int left_start = hint;
388 bool init = true;
389 int go_left = 0;
390 int i;
391
392 write_lock(&free_i->segmap_lock);
393
394 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
395 segno = find_next_zero_bit(free_i->free_segmap,
396 TOTAL_SEGS(sbi), *newseg + 1);
397 if (segno < TOTAL_SEGS(sbi))
398 goto got_it;
399 }
400find_other_zone:
401 secno = find_next_zero_bit(free_i->free_secmap, total_secs, hint);
402 if (secno >= total_secs) {
403 if (dir == ALLOC_RIGHT) {
404 secno = find_next_zero_bit(free_i->free_secmap,
405 total_secs, 0);
406 BUG_ON(secno >= total_secs);
407 } else {
408 go_left = 1;
409 left_start = hint - 1;
410 }
411 }
412 if (go_left == 0)
413 goto skip_left;
414
415 while (test_bit(left_start, free_i->free_secmap)) {
416 if (left_start > 0) {
417 left_start--;
418 continue;
419 }
420 left_start = find_next_zero_bit(free_i->free_secmap,
421 total_secs, 0);
422 BUG_ON(left_start >= total_secs);
423 break;
424 }
425 secno = left_start;
426skip_left:
427 hint = secno;
428 segno = secno * sbi->segs_per_sec;
429 zoneno = secno / sbi->secs_per_zone;
430
431 /* give up on finding another zone */
432 if (!init)
433 goto got_it;
434 if (sbi->secs_per_zone == 1)
435 goto got_it;
436 if (zoneno == old_zoneno)
437 goto got_it;
438 if (dir == ALLOC_LEFT) {
439 if (!go_left && zoneno + 1 >= total_zones)
440 goto got_it;
441 if (go_left && zoneno == 0)
442 goto got_it;
443 }
444 for (i = 0; i < NR_CURSEG_TYPE; i++)
445 if (CURSEG_I(sbi, i)->zone == zoneno)
446 break;
447
448 if (i < NR_CURSEG_TYPE) {
449 /* zone is in user, try another */
450 if (go_left)
451 hint = zoneno * sbi->secs_per_zone - 1;
452 else if (zoneno + 1 >= total_zones)
453 hint = 0;
454 else
455 hint = (zoneno + 1) * sbi->secs_per_zone;
456 init = false;
457 goto find_other_zone;
458 }
459got_it:
460 /* set it as dirty segment in free segmap */
461 BUG_ON(test_bit(segno, free_i->free_segmap));
462 __set_inuse(sbi, segno);
463 *newseg = segno;
464 write_unlock(&free_i->segmap_lock);
465}
466
467static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
468{
469 struct curseg_info *curseg = CURSEG_I(sbi, type);
470 struct summary_footer *sum_footer;
471
472 curseg->segno = curseg->next_segno;
473 curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno);
474 curseg->next_blkoff = 0;
475 curseg->next_segno = NULL_SEGNO;
476
477 sum_footer = &(curseg->sum_blk->footer);
478 memset(sum_footer, 0, sizeof(struct summary_footer));
479 if (IS_DATASEG(type))
480 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
481 if (IS_NODESEG(type))
482 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
483 __set_sit_entry_type(sbi, type, curseg->segno, modified);
484}
485
486/*
487 * Allocate a current working segment.
488 * This function always allocates a free segment in LFS manner.
489 */
490static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
491{
492 struct curseg_info *curseg = CURSEG_I(sbi, type);
493 unsigned int segno = curseg->segno;
494 int dir = ALLOC_LEFT;
495
496 write_sum_page(sbi, curseg->sum_blk,
497 GET_SUM_BLOCK(sbi, curseg->segno));
498 if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
499 dir = ALLOC_RIGHT;
500
501 if (test_opt(sbi, NOHEAP))
502 dir = ALLOC_RIGHT;
503
504 get_new_segment(sbi, &segno, new_sec, dir);
505 curseg->next_segno = segno;
506 reset_curseg(sbi, type, 1);
507 curseg->alloc_type = LFS;
508}
509
510static void __next_free_blkoff(struct f2fs_sb_info *sbi,
511 struct curseg_info *seg, block_t start)
512{
513 struct seg_entry *se = get_seg_entry(sbi, seg->segno);
514 block_t ofs;
515 for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) {
516 if (!f2fs_test_bit(ofs, se->ckpt_valid_map)
517 && !f2fs_test_bit(ofs, se->cur_valid_map))
518 break;
519 }
520 seg->next_blkoff = ofs;
521}
522
523/*
524 * If a segment is written by LFS manner, next block offset is just obtained
525 * by increasing the current block offset. However, if a segment is written by
526 * SSR manner, next block offset obtained by calling __next_free_blkoff
527 */
528static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
529 struct curseg_info *seg)
530{
531 if (seg->alloc_type == SSR)
532 __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
533 else
534 seg->next_blkoff++;
535}
536
537/*
538 * This function always allocates a used segment (from dirty seglist) by SSR
539 * manner, so it should recover the existing segment information of valid blocks
540 */
541static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
542{
543 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
544 struct curseg_info *curseg = CURSEG_I(sbi, type);
545 unsigned int new_segno = curseg->next_segno;
546 struct f2fs_summary_block *sum_node;
547 struct page *sum_page;
548
549 write_sum_page(sbi, curseg->sum_blk,
550 GET_SUM_BLOCK(sbi, curseg->segno));
551 __set_test_and_inuse(sbi, new_segno);
552
553 mutex_lock(&dirty_i->seglist_lock);
554 __remove_dirty_segment(sbi, new_segno, PRE);
555 __remove_dirty_segment(sbi, new_segno, DIRTY);
556 mutex_unlock(&dirty_i->seglist_lock);
557
558 reset_curseg(sbi, type, 1);
559 curseg->alloc_type = SSR;
560 __next_free_blkoff(sbi, curseg, 0);
561
562 if (reuse) {
563 sum_page = get_sum_page(sbi, new_segno);
564 sum_node = (struct f2fs_summary_block *)page_address(sum_page);
565 memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
566 f2fs_put_page(sum_page, 1);
567 }
568}
569
570/*
571 * flush out current segment and replace it with new segment
572 * This function should be returned with success, otherwise BUG
573 */
574static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
575 int type, bool force)
576{
577 struct curseg_info *curseg = CURSEG_I(sbi, type);
578 unsigned int ofs_unit;
579
580 if (force) {
581 new_curseg(sbi, type, true);
582 goto out;
583 }
584
585 ofs_unit = need_SSR(sbi) ? 1 : sbi->segs_per_sec;
586 curseg->next_segno = check_prefree_segments(sbi, ofs_unit, type);
587
588 if (curseg->next_segno != NULL_SEGNO)
589 change_curseg(sbi, type, false);
590 else if (type == CURSEG_WARM_NODE)
591 new_curseg(sbi, type, false);
592 else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
593 change_curseg(sbi, type, true);
594 else
595 new_curseg(sbi, type, false);
596out:
597 sbi->segment_count[curseg->alloc_type]++;
598}
599
600void allocate_new_segments(struct f2fs_sb_info *sbi)
601{
602 struct curseg_info *curseg;
603 unsigned int old_curseg;
604 int i;
605
606 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
607 curseg = CURSEG_I(sbi, i);
608 old_curseg = curseg->segno;
609 SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
610 locate_dirty_segment(sbi, old_curseg);
611 }
612}
613
614static const struct segment_allocation default_salloc_ops = {
615 .allocate_segment = allocate_segment_by_default,
616};
617
618static void f2fs_end_io_write(struct bio *bio, int err)
619{
620 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
621 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
622 struct bio_private *p = bio->bi_private;
623
624 do {
625 struct page *page = bvec->bv_page;
626
627 if (--bvec >= bio->bi_io_vec)
628 prefetchw(&bvec->bv_page->flags);
629 if (!uptodate) {
630 SetPageError(page);
631 if (page->mapping)
632 set_bit(AS_EIO, &page->mapping->flags);
633 set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG);
634 set_page_dirty(page);
635 }
636 end_page_writeback(page);
637 dec_page_count(p->sbi, F2FS_WRITEBACK);
638 } while (bvec >= bio->bi_io_vec);
639
640 if (p->is_sync)
641 complete(p->wait);
642 kfree(p);
643 bio_put(bio);
644}
645
646struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages)
647{
648 struct bio *bio;
649 struct bio_private *priv;
650retry:
651 priv = kmalloc(sizeof(struct bio_private), GFP_NOFS);
652 if (!priv) {
653 cond_resched();
654 goto retry;
655 }
656
657 /* No failure on bio allocation */
658 bio = bio_alloc(GFP_NOIO, npages);
659 bio->bi_bdev = bdev;
660 bio->bi_private = priv;
661 return bio;
662}
663
664static void do_submit_bio(struct f2fs_sb_info *sbi,
665 enum page_type type, bool sync)
666{
667 int rw = sync ? WRITE_SYNC : WRITE;
668 enum page_type btype = type > META ? META : type;
669
670 if (type >= META_FLUSH)
671 rw = WRITE_FLUSH_FUA;
672
673 if (sbi->bio[btype]) {
674 struct bio_private *p = sbi->bio[btype]->bi_private;
675 p->sbi = sbi;
676 sbi->bio[btype]->bi_end_io = f2fs_end_io_write;
677 if (type == META_FLUSH) {
678 DECLARE_COMPLETION_ONSTACK(wait);
679 p->is_sync = true;
680 p->wait = &wait;
681 submit_bio(rw, sbi->bio[btype]);
682 wait_for_completion(&wait);
683 } else {
684 p->is_sync = false;
685 submit_bio(rw, sbi->bio[btype]);
686 }
687 sbi->bio[btype] = NULL;
688 }
689}
690
691void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync)
692{
693 down_write(&sbi->bio_sem);
694 do_submit_bio(sbi, type, sync);
695 up_write(&sbi->bio_sem);
696}
697
698static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page,
699 block_t blk_addr, enum page_type type)
700{
701 struct block_device *bdev = sbi->sb->s_bdev;
702
703 verify_block_addr(sbi, blk_addr);
704
705 down_write(&sbi->bio_sem);
706
707 inc_page_count(sbi, F2FS_WRITEBACK);
708
709 if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1)
710 do_submit_bio(sbi, type, false);
711alloc_new:
712 if (sbi->bio[type] == NULL) {
713 sbi->bio[type] = f2fs_bio_alloc(bdev, bio_get_nr_vecs(bdev));
714 sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
715 /*
716 * The end_io will be assigned at the sumbission phase.
717 * Until then, let bio_add_page() merge consecutive IOs as much
718 * as possible.
719 */
720 }
721
722 if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) <
723 PAGE_CACHE_SIZE) {
724 do_submit_bio(sbi, type, false);
725 goto alloc_new;
726 }
727
728 sbi->last_block_in_bio[type] = blk_addr;
729
730 up_write(&sbi->bio_sem);
731}
732
733static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
734{
735 struct curseg_info *curseg = CURSEG_I(sbi, type);
736 if (curseg->next_blkoff < sbi->blocks_per_seg)
737 return true;
738 return false;
739}
740
741static int __get_segment_type_2(struct page *page, enum page_type p_type)
742{
743 if (p_type == DATA)
744 return CURSEG_HOT_DATA;
745 else
746 return CURSEG_HOT_NODE;
747}
748
749static int __get_segment_type_4(struct page *page, enum page_type p_type)
750{
751 if (p_type == DATA) {
752 struct inode *inode = page->mapping->host;
753
754 if (S_ISDIR(inode->i_mode))
755 return CURSEG_HOT_DATA;
756 else
757 return CURSEG_COLD_DATA;
758 } else {
759 if (IS_DNODE(page) && !is_cold_node(page))
760 return CURSEG_HOT_NODE;
761 else
762 return CURSEG_COLD_NODE;
763 }
764}
765
766static int __get_segment_type_6(struct page *page, enum page_type p_type)
767{
768 if (p_type == DATA) {
769 struct inode *inode = page->mapping->host;
770
771 if (S_ISDIR(inode->i_mode))
772 return CURSEG_HOT_DATA;
773 else if (is_cold_data(page) || is_cold_file(inode))
774 return CURSEG_COLD_DATA;
775 else
776 return CURSEG_WARM_DATA;
777 } else {
778 if (IS_DNODE(page))
779 return is_cold_node(page) ? CURSEG_WARM_NODE :
780 CURSEG_HOT_NODE;
781 else
782 return CURSEG_COLD_NODE;
783 }
784}
785
786static int __get_segment_type(struct page *page, enum page_type p_type)
787{
788 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
789 switch (sbi->active_logs) {
790 case 2:
791 return __get_segment_type_2(page, p_type);
792 case 4:
793 return __get_segment_type_4(page, p_type);
794 case 6:
795 return __get_segment_type_6(page, p_type);
796 default:
797 BUG();
798 }
799}
800
801static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
802 block_t old_blkaddr, block_t *new_blkaddr,
803 struct f2fs_summary *sum, enum page_type p_type)
804{
805 struct sit_info *sit_i = SIT_I(sbi);
806 struct curseg_info *curseg;
807 unsigned int old_cursegno;
808 int type;
809
810 type = __get_segment_type(page, p_type);
811 curseg = CURSEG_I(sbi, type);
812
813 mutex_lock(&curseg->curseg_mutex);
814
815 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
816 old_cursegno = curseg->segno;
817
818 /*
819 * __add_sum_entry should be resided under the curseg_mutex
820 * because, this function updates a summary entry in the
821 * current summary block.
822 */
823 __add_sum_entry(sbi, type, sum, curseg->next_blkoff);
824
825 mutex_lock(&sit_i->sentry_lock);
826 __refresh_next_blkoff(sbi, curseg);
827 sbi->block_count[curseg->alloc_type]++;
828
829 /*
830 * SIT information should be updated before segment allocation,
831 * since SSR needs latest valid block information.
832 */
833 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
834
835 if (!__has_curseg_space(sbi, type))
836 sit_i->s_ops->allocate_segment(sbi, type, false);
837
838 locate_dirty_segment(sbi, old_cursegno);
839 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
840 mutex_unlock(&sit_i->sentry_lock);
841
842 if (p_type == NODE)
843 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
844
845 /* writeout dirty page into bdev */
846 submit_write_page(sbi, page, *new_blkaddr, p_type);
847
848 mutex_unlock(&curseg->curseg_mutex);
849}
850
851int write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
852 struct writeback_control *wbc)
853{
854 if (wbc->for_reclaim)
855 return AOP_WRITEPAGE_ACTIVATE;
856
857 set_page_writeback(page);
858 submit_write_page(sbi, page, page->index, META);
859 return 0;
860}
861
862void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
863 unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr)
864{
865 struct f2fs_summary sum;
866 set_summary(&sum, nid, 0, 0);
867 do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE);
868}
869
870void write_data_page(struct inode *inode, struct page *page,
871 struct dnode_of_data *dn, block_t old_blkaddr,
872 block_t *new_blkaddr)
873{
874 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
875 struct f2fs_summary sum;
876 struct node_info ni;
877
878 BUG_ON(old_blkaddr == NULL_ADDR);
879 get_node_info(sbi, dn->nid, &ni);
880 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
881
882 do_write_page(sbi, page, old_blkaddr,
883 new_blkaddr, &sum, DATA);
884}
885
886void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page,
887 block_t old_blk_addr)
888{
889 submit_write_page(sbi, page, old_blk_addr, DATA);
890}
891
892void recover_data_page(struct f2fs_sb_info *sbi,
893 struct page *page, struct f2fs_summary *sum,
894 block_t old_blkaddr, block_t new_blkaddr)
895{
896 struct sit_info *sit_i = SIT_I(sbi);
897 struct curseg_info *curseg;
898 unsigned int segno, old_cursegno;
899 struct seg_entry *se;
900 int type;
901
902 segno = GET_SEGNO(sbi, new_blkaddr);
903 se = get_seg_entry(sbi, segno);
904 type = se->type;
905
906 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
907 if (old_blkaddr == NULL_ADDR)
908 type = CURSEG_COLD_DATA;
909 else
910 type = CURSEG_WARM_DATA;
911 }
912 curseg = CURSEG_I(sbi, type);
913
914 mutex_lock(&curseg->curseg_mutex);
915 mutex_lock(&sit_i->sentry_lock);
916
917 old_cursegno = curseg->segno;
918
919 /* change the current segment */
920 if (segno != curseg->segno) {
921 curseg->next_segno = segno;
922 change_curseg(sbi, type, true);
923 }
924
925 curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
926 (sbi->blocks_per_seg - 1);
927 __add_sum_entry(sbi, type, sum, curseg->next_blkoff);
928
929 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
930
931 locate_dirty_segment(sbi, old_cursegno);
932 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
933
934 mutex_unlock(&sit_i->sentry_lock);
935 mutex_unlock(&curseg->curseg_mutex);
936}
937
938void rewrite_node_page(struct f2fs_sb_info *sbi,
939 struct page *page, struct f2fs_summary *sum,
940 block_t old_blkaddr, block_t new_blkaddr)
941{
942 struct sit_info *sit_i = SIT_I(sbi);
943 int type = CURSEG_WARM_NODE;
944 struct curseg_info *curseg;
945 unsigned int segno, old_cursegno;
946 block_t next_blkaddr = next_blkaddr_of_node(page);
947 unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr);
948
949 curseg = CURSEG_I(sbi, type);
950
951 mutex_lock(&curseg->curseg_mutex);
952 mutex_lock(&sit_i->sentry_lock);
953
954 segno = GET_SEGNO(sbi, new_blkaddr);
955 old_cursegno = curseg->segno;
956
957 /* change the current segment */
958 if (segno != curseg->segno) {
959 curseg->next_segno = segno;
960 change_curseg(sbi, type, true);
961 }
962 curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
963 (sbi->blocks_per_seg - 1);
964 __add_sum_entry(sbi, type, sum, curseg->next_blkoff);
965
966 /* change the current log to the next block addr in advance */
967 if (next_segno != segno) {
968 curseg->next_segno = next_segno;
969 change_curseg(sbi, type, true);
970 }
971 curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) &
972 (sbi->blocks_per_seg - 1);
973
974 /* rewrite node page */
975 set_page_writeback(page);
976 submit_write_page(sbi, page, new_blkaddr, NODE);
977 f2fs_submit_bio(sbi, NODE, true);
978 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
979
980 locate_dirty_segment(sbi, old_cursegno);
981 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
982
983 mutex_unlock(&sit_i->sentry_lock);
984 mutex_unlock(&curseg->curseg_mutex);
985}
986
987static int read_compacted_summaries(struct f2fs_sb_info *sbi)
988{
989 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
990 struct curseg_info *seg_i;
991 unsigned char *kaddr;
992 struct page *page;
993 block_t start;
994 int i, j, offset;
995
996 start = start_sum_block(sbi);
997
998 page = get_meta_page(sbi, start++);
999 kaddr = (unsigned char *)page_address(page);
1000
1001 /* Step 1: restore nat cache */
1002 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1003 memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE);
1004
1005 /* Step 2: restore sit cache */
1006 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1007 memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE,
1008 SUM_JOURNAL_SIZE);
1009 offset = 2 * SUM_JOURNAL_SIZE;
1010
1011 /* Step 3: restore summary entries */
1012 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1013 unsigned short blk_off;
1014 unsigned int segno;
1015
1016 seg_i = CURSEG_I(sbi, i);
1017 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
1018 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
1019 seg_i->next_segno = segno;
1020 reset_curseg(sbi, i, 0);
1021 seg_i->alloc_type = ckpt->alloc_type[i];
1022 seg_i->next_blkoff = blk_off;
1023
1024 if (seg_i->alloc_type == SSR)
1025 blk_off = sbi->blocks_per_seg;
1026
1027 for (j = 0; j < blk_off; j++) {
1028 struct f2fs_summary *s;
1029 s = (struct f2fs_summary *)(kaddr + offset);
1030 seg_i->sum_blk->entries[j] = *s;
1031 offset += SUMMARY_SIZE;
1032 if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1033 SUM_FOOTER_SIZE)
1034 continue;
1035
1036 f2fs_put_page(page, 1);
1037 page = NULL;
1038
1039 page = get_meta_page(sbi, start++);
1040 kaddr = (unsigned char *)page_address(page);
1041 offset = 0;
1042 }
1043 }
1044 f2fs_put_page(page, 1);
1045 return 0;
1046}
1047
1048static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1049{
1050 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1051 struct f2fs_summary_block *sum;
1052 struct curseg_info *curseg;
1053 struct page *new;
1054 unsigned short blk_off;
1055 unsigned int segno = 0;
1056 block_t blk_addr = 0;
1057
1058 /* get segment number and block addr */
1059 if (IS_DATASEG(type)) {
1060 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
1061 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
1062 CURSEG_HOT_DATA]);
1063 if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
1064 blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
1065 else
1066 blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
1067 } else {
1068 segno = le32_to_cpu(ckpt->cur_node_segno[type -
1069 CURSEG_HOT_NODE]);
1070 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
1071 CURSEG_HOT_NODE]);
1072 if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
1073 blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
1074 type - CURSEG_HOT_NODE);
1075 else
1076 blk_addr = GET_SUM_BLOCK(sbi, segno);
1077 }
1078
1079 new = get_meta_page(sbi, blk_addr);
1080 sum = (struct f2fs_summary_block *)page_address(new);
1081
1082 if (IS_NODESEG(type)) {
1083 if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) {
1084 struct f2fs_summary *ns = &sum->entries[0];
1085 int i;
1086 for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
1087 ns->version = 0;
1088 ns->ofs_in_node = 0;
1089 }
1090 } else {
1091 if (restore_node_summary(sbi, segno, sum)) {
1092 f2fs_put_page(new, 1);
1093 return -EINVAL;
1094 }
1095 }
1096 }
1097
1098 /* set uncompleted segment to curseg */
1099 curseg = CURSEG_I(sbi, type);
1100 mutex_lock(&curseg->curseg_mutex);
1101 memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE);
1102 curseg->next_segno = segno;
1103 reset_curseg(sbi, type, 0);
1104 curseg->alloc_type = ckpt->alloc_type[type];
1105 curseg->next_blkoff = blk_off;
1106 mutex_unlock(&curseg->curseg_mutex);
1107 f2fs_put_page(new, 1);
1108 return 0;
1109}
1110
1111static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1112{
1113 int type = CURSEG_HOT_DATA;
1114
1115 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
1116 /* restore for compacted data summary */
1117 if (read_compacted_summaries(sbi))
1118 return -EINVAL;
1119 type = CURSEG_HOT_NODE;
1120 }
1121
1122 for (; type <= CURSEG_COLD_NODE; type++)
1123 if (read_normal_summaries(sbi, type))
1124 return -EINVAL;
1125 return 0;
1126}
1127
1128static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
1129{
1130 struct page *page;
1131 unsigned char *kaddr;
1132 struct f2fs_summary *summary;
1133 struct curseg_info *seg_i;
1134 int written_size = 0;
1135 int i, j;
1136
1137 page = grab_meta_page(sbi, blkaddr++);
1138 kaddr = (unsigned char *)page_address(page);
1139
1140 /* Step 1: write nat cache */
1141 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1142 memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE);
1143 written_size += SUM_JOURNAL_SIZE;
1144
1145 /* Step 2: write sit cache */
1146 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1147 memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits,
1148 SUM_JOURNAL_SIZE);
1149 written_size += SUM_JOURNAL_SIZE;
1150
1151 set_page_dirty(page);
1152
1153 /* Step 3: write summary entries */
1154 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1155 unsigned short blkoff;
1156 seg_i = CURSEG_I(sbi, i);
1157 if (sbi->ckpt->alloc_type[i] == SSR)
1158 blkoff = sbi->blocks_per_seg;
1159 else
1160 blkoff = curseg_blkoff(sbi, i);
1161
1162 for (j = 0; j < blkoff; j++) {
1163 if (!page) {
1164 page = grab_meta_page(sbi, blkaddr++);
1165 kaddr = (unsigned char *)page_address(page);
1166 written_size = 0;
1167 }
1168 summary = (struct f2fs_summary *)(kaddr + written_size);
1169 *summary = seg_i->sum_blk->entries[j];
1170 written_size += SUMMARY_SIZE;
1171 set_page_dirty(page);
1172
1173 if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1174 SUM_FOOTER_SIZE)
1175 continue;
1176
1177 f2fs_put_page(page, 1);
1178 page = NULL;
1179 }
1180 }
1181 if (page)
1182 f2fs_put_page(page, 1);
1183}
1184
1185static void write_normal_summaries(struct f2fs_sb_info *sbi,
1186 block_t blkaddr, int type)
1187{
1188 int i, end;
1189 if (IS_DATASEG(type))
1190 end = type + NR_CURSEG_DATA_TYPE;
1191 else
1192 end = type + NR_CURSEG_NODE_TYPE;
1193
1194 for (i = type; i < end; i++) {
1195 struct curseg_info *sum = CURSEG_I(sbi, i);
1196 mutex_lock(&sum->curseg_mutex);
1197 write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type));
1198 mutex_unlock(&sum->curseg_mutex);
1199 }
1200}
1201
1202void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1203{
1204 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG))
1205 write_compacted_summaries(sbi, start_blk);
1206 else
1207 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
1208}
1209
1210void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1211{
1212 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG))
1213 write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
1214 return;
1215}
1216
1217int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
1218 unsigned int val, int alloc)
1219{
1220 int i;
1221
1222 if (type == NAT_JOURNAL) {
1223 for (i = 0; i < nats_in_cursum(sum); i++) {
1224 if (le32_to_cpu(nid_in_journal(sum, i)) == val)
1225 return i;
1226 }
1227 if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
1228 return update_nats_in_cursum(sum, 1);
1229 } else if (type == SIT_JOURNAL) {
1230 for (i = 0; i < sits_in_cursum(sum); i++)
1231 if (le32_to_cpu(segno_in_journal(sum, i)) == val)
1232 return i;
1233 if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
1234 return update_sits_in_cursum(sum, 1);
1235 }
1236 return -1;
1237}
1238
1239static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
1240 unsigned int segno)
1241{
1242 struct sit_info *sit_i = SIT_I(sbi);
1243 unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno);
1244 block_t blk_addr = sit_i->sit_base_addr + offset;
1245
1246 check_seg_range(sbi, segno);
1247
1248 /* calculate sit block address */
1249 if (f2fs_test_bit(offset, sit_i->sit_bitmap))
1250 blk_addr += sit_i->sit_blocks;
1251
1252 return get_meta_page(sbi, blk_addr);
1253}
1254
1255static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1256 unsigned int start)
1257{
1258 struct sit_info *sit_i = SIT_I(sbi);
1259 struct page *src_page, *dst_page;
1260 pgoff_t src_off, dst_off;
1261 void *src_addr, *dst_addr;
1262
1263 src_off = current_sit_addr(sbi, start);
1264 dst_off = next_sit_addr(sbi, src_off);
1265
1266 /* get current sit block page without lock */
1267 src_page = get_meta_page(sbi, src_off);
1268 dst_page = grab_meta_page(sbi, dst_off);
1269 BUG_ON(PageDirty(src_page));
1270
1271 src_addr = page_address(src_page);
1272 dst_addr = page_address(dst_page);
1273 memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
1274
1275 set_page_dirty(dst_page);
1276 f2fs_put_page(src_page, 1);
1277
1278 set_to_next_sit(sit_i, start);
1279
1280 return dst_page;
1281}
1282
1283static bool flush_sits_in_journal(struct f2fs_sb_info *sbi)
1284{
1285 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1286 struct f2fs_summary_block *sum = curseg->sum_blk;
1287 int i;
1288
1289 /*
1290 * If the journal area in the current summary is full of sit entries,
1291 * all the sit entries will be flushed. Otherwise the sit entries
1292 * are not able to replace with newly hot sit entries.
1293 */
1294 if (sits_in_cursum(sum) >= SIT_JOURNAL_ENTRIES) {
1295 for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
1296 unsigned int segno;
1297 segno = le32_to_cpu(segno_in_journal(sum, i));
1298 __mark_sit_entry_dirty(sbi, segno);
1299 }
1300 update_sits_in_cursum(sum, -sits_in_cursum(sum));
1301 return 1;
1302 }
1303 return 0;
1304}
1305
1306/*
1307 * CP calls this function, which flushes SIT entries including sit_journal,
1308 * and moves prefree segs to free segs.
1309 */
1310void flush_sit_entries(struct f2fs_sb_info *sbi)
1311{
1312 struct sit_info *sit_i = SIT_I(sbi);
1313 unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
1314 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1315 struct f2fs_summary_block *sum = curseg->sum_blk;
1316 unsigned long nsegs = TOTAL_SEGS(sbi);
1317 struct page *page = NULL;
1318 struct f2fs_sit_block *raw_sit = NULL;
1319 unsigned int start = 0, end = 0;
1320 unsigned int segno = -1;
1321 bool flushed;
1322
1323 mutex_lock(&curseg->curseg_mutex);
1324 mutex_lock(&sit_i->sentry_lock);
1325
1326 /*
1327 * "flushed" indicates whether sit entries in journal are flushed
1328 * to the SIT area or not.
1329 */
1330 flushed = flush_sits_in_journal(sbi);
1331
1332 while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) {
1333 struct seg_entry *se = get_seg_entry(sbi, segno);
1334 int sit_offset, offset;
1335
1336 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
1337
1338 if (flushed)
1339 goto to_sit_page;
1340
1341 offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1);
1342 if (offset >= 0) {
1343 segno_in_journal(sum, offset) = cpu_to_le32(segno);
1344 seg_info_to_raw_sit(se, &sit_in_journal(sum, offset));
1345 goto flush_done;
1346 }
1347to_sit_page:
1348 if (!page || (start > segno) || (segno > end)) {
1349 if (page) {
1350 f2fs_put_page(page, 1);
1351 page = NULL;
1352 }
1353
1354 start = START_SEGNO(sit_i, segno);
1355 end = start + SIT_ENTRY_PER_BLOCK - 1;
1356
1357 /* read sit block that will be updated */
1358 page = get_next_sit_page(sbi, start);
1359 raw_sit = page_address(page);
1360 }
1361
1362 /* udpate entry in SIT block */
1363 seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]);
1364flush_done:
1365 __clear_bit(segno, bitmap);
1366 sit_i->dirty_sentries--;
1367 }
1368 mutex_unlock(&sit_i->sentry_lock);
1369 mutex_unlock(&curseg->curseg_mutex);
1370
1371 /* writeout last modified SIT block */
1372 f2fs_put_page(page, 1);
1373
1374 set_prefree_as_free_segments(sbi);
1375}
1376
1377static int build_sit_info(struct f2fs_sb_info *sbi)
1378{
1379 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1380 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1381 struct sit_info *sit_i;
1382 unsigned int sit_segs, start;
1383 char *src_bitmap, *dst_bitmap;
1384 unsigned int bitmap_size;
1385
1386 /* allocate memory for SIT information */
1387 sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
1388 if (!sit_i)
1389 return -ENOMEM;
1390
1391 SM_I(sbi)->sit_info = sit_i;
1392
1393 sit_i->sentries = vzalloc(TOTAL_SEGS(sbi) * sizeof(struct seg_entry));
1394 if (!sit_i->sentries)
1395 return -ENOMEM;
1396
1397 bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1398 sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
1399 if (!sit_i->dirty_sentries_bitmap)
1400 return -ENOMEM;
1401
1402 for (start = 0; start < TOTAL_SEGS(sbi); start++) {
1403 sit_i->sentries[start].cur_valid_map
1404 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1405 sit_i->sentries[start].ckpt_valid_map
1406 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1407 if (!sit_i->sentries[start].cur_valid_map
1408 || !sit_i->sentries[start].ckpt_valid_map)
1409 return -ENOMEM;
1410 }
1411
1412 if (sbi->segs_per_sec > 1) {
1413 sit_i->sec_entries = vzalloc(sbi->total_sections *
1414 sizeof(struct sec_entry));
1415 if (!sit_i->sec_entries)
1416 return -ENOMEM;
1417 }
1418
1419 /* get information related with SIT */
1420 sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
1421
1422 /* setup SIT bitmap from ckeckpoint pack */
1423 bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
1424 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
1425
1426 dst_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
1427 if (!dst_bitmap)
1428 return -ENOMEM;
1429 memcpy(dst_bitmap, src_bitmap, bitmap_size);
1430
1431 /* init SIT information */
1432 sit_i->s_ops = &default_salloc_ops;
1433
1434 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
1435 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
1436 sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
1437 sit_i->sit_bitmap = dst_bitmap;
1438 sit_i->bitmap_size = bitmap_size;
1439 sit_i->dirty_sentries = 0;
1440 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
1441 sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
1442 sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
1443 mutex_init(&sit_i->sentry_lock);
1444 return 0;
1445}
1446
1447static int build_free_segmap(struct f2fs_sb_info *sbi)
1448{
1449 struct f2fs_sm_info *sm_info = SM_I(sbi);
1450 struct free_segmap_info *free_i;
1451 unsigned int bitmap_size, sec_bitmap_size;
1452
1453 /* allocate memory for free segmap information */
1454 free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
1455 if (!free_i)
1456 return -ENOMEM;
1457
1458 SM_I(sbi)->free_info = free_i;
1459
1460 bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1461 free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL);
1462 if (!free_i->free_segmap)
1463 return -ENOMEM;
1464
1465 sec_bitmap_size = f2fs_bitmap_size(sbi->total_sections);
1466 free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL);
1467 if (!free_i->free_secmap)
1468 return -ENOMEM;
1469
1470 /* set all segments as dirty temporarily */
1471 memset(free_i->free_segmap, 0xff, bitmap_size);
1472 memset(free_i->free_secmap, 0xff, sec_bitmap_size);
1473
1474 /* init free segmap information */
1475 free_i->start_segno =
1476 (unsigned int) GET_SEGNO_FROM_SEG0(sbi, sm_info->main_blkaddr);
1477 free_i->free_segments = 0;
1478 free_i->free_sections = 0;
1479 rwlock_init(&free_i->segmap_lock);
1480 return 0;
1481}
1482
1483static int build_curseg(struct f2fs_sb_info *sbi)
1484{
1485 struct curseg_info *array;
1486 int i;
1487
1488 array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL);
1489 if (!array)
1490 return -ENOMEM;
1491
1492 SM_I(sbi)->curseg_array = array;
1493
1494 for (i = 0; i < NR_CURSEG_TYPE; i++) {
1495 mutex_init(&array[i].curseg_mutex);
1496 array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
1497 if (!array[i].sum_blk)
1498 return -ENOMEM;
1499 array[i].segno = NULL_SEGNO;
1500 array[i].next_blkoff = 0;
1501 }
1502 return restore_curseg_summaries(sbi);
1503}
1504
1505static void build_sit_entries(struct f2fs_sb_info *sbi)
1506{
1507 struct sit_info *sit_i = SIT_I(sbi);
1508 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1509 struct f2fs_summary_block *sum = curseg->sum_blk;
1510 unsigned int start;
1511
1512 for (start = 0; start < TOTAL_SEGS(sbi); start++) {
1513 struct seg_entry *se = &sit_i->sentries[start];
1514 struct f2fs_sit_block *sit_blk;
1515 struct f2fs_sit_entry sit;
1516 struct page *page;
1517 int i;
1518
1519 mutex_lock(&curseg->curseg_mutex);
1520 for (i = 0; i < sits_in_cursum(sum); i++) {
1521 if (le32_to_cpu(segno_in_journal(sum, i)) == start) {
1522 sit = sit_in_journal(sum, i);
1523 mutex_unlock(&curseg->curseg_mutex);
1524 goto got_it;
1525 }
1526 }
1527 mutex_unlock(&curseg->curseg_mutex);
1528 page = get_current_sit_page(sbi, start);
1529 sit_blk = (struct f2fs_sit_block *)page_address(page);
1530 sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
1531 f2fs_put_page(page, 1);
1532got_it:
1533 check_block_count(sbi, start, &sit);
1534 seg_info_from_raw_sit(se, &sit);
1535 if (sbi->segs_per_sec > 1) {
1536 struct sec_entry *e = get_sec_entry(sbi, start);
1537 e->valid_blocks += se->valid_blocks;
1538 }
1539 }
1540}
1541
1542static void init_free_segmap(struct f2fs_sb_info *sbi)
1543{
1544 unsigned int start;
1545 int type;
1546
1547 for (start = 0; start < TOTAL_SEGS(sbi); start++) {
1548 struct seg_entry *sentry = get_seg_entry(sbi, start);
1549 if (!sentry->valid_blocks)
1550 __set_free(sbi, start);
1551 }
1552
1553 /* set use the current segments */
1554 for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
1555 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
1556 __set_test_and_inuse(sbi, curseg_t->segno);
1557 }
1558}
1559
1560static void init_dirty_segmap(struct f2fs_sb_info *sbi)
1561{
1562 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1563 struct free_segmap_info *free_i = FREE_I(sbi);
1564 unsigned int segno = 0, offset = 0;
1565 unsigned short valid_blocks;
1566
1567 while (segno < TOTAL_SEGS(sbi)) {
1568 /* find dirty segment based on free segmap */
1569 segno = find_next_inuse(free_i, TOTAL_SEGS(sbi), offset);
1570 if (segno >= TOTAL_SEGS(sbi))
1571 break;
1572 offset = segno + 1;
1573 valid_blocks = get_valid_blocks(sbi, segno, 0);
1574 if (valid_blocks >= sbi->blocks_per_seg || !valid_blocks)
1575 continue;
1576 mutex_lock(&dirty_i->seglist_lock);
1577 __locate_dirty_segment(sbi, segno, DIRTY);
1578 mutex_unlock(&dirty_i->seglist_lock);
1579 }
1580}
1581
1582static int init_victim_segmap(struct f2fs_sb_info *sbi)
1583{
1584 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1585 unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1586
1587 dirty_i->victim_segmap[FG_GC] = kzalloc(bitmap_size, GFP_KERNEL);
1588 dirty_i->victim_segmap[BG_GC] = kzalloc(bitmap_size, GFP_KERNEL);
1589 if (!dirty_i->victim_segmap[FG_GC] || !dirty_i->victim_segmap[BG_GC])
1590 return -ENOMEM;
1591 return 0;
1592}
1593
1594static int build_dirty_segmap(struct f2fs_sb_info *sbi)
1595{
1596 struct dirty_seglist_info *dirty_i;
1597 unsigned int bitmap_size, i;
1598
1599 /* allocate memory for dirty segments list information */
1600 dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
1601 if (!dirty_i)
1602 return -ENOMEM;
1603
1604 SM_I(sbi)->dirty_info = dirty_i;
1605 mutex_init(&dirty_i->seglist_lock);
1606
1607 bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1608
1609 for (i = 0; i < NR_DIRTY_TYPE; i++) {
1610 dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
1611 dirty_i->nr_dirty[i] = 0;
1612 if (!dirty_i->dirty_segmap[i])
1613 return -ENOMEM;
1614 }
1615
1616 init_dirty_segmap(sbi);
1617 return init_victim_segmap(sbi);
1618}
1619
1620/*
1621 * Update min, max modified time for cost-benefit GC algorithm
1622 */
1623static void init_min_max_mtime(struct f2fs_sb_info *sbi)
1624{
1625 struct sit_info *sit_i = SIT_I(sbi);
1626 unsigned int segno;
1627
1628 mutex_lock(&sit_i->sentry_lock);
1629
1630 sit_i->min_mtime = LLONG_MAX;
1631
1632 for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) {
1633 unsigned int i;
1634 unsigned long long mtime = 0;
1635
1636 for (i = 0; i < sbi->segs_per_sec; i++)
1637 mtime += get_seg_entry(sbi, segno + i)->mtime;
1638
1639 mtime = div_u64(mtime, sbi->segs_per_sec);
1640
1641 if (sit_i->min_mtime > mtime)
1642 sit_i->min_mtime = mtime;
1643 }
1644 sit_i->max_mtime = get_mtime(sbi);
1645 mutex_unlock(&sit_i->sentry_lock);
1646}
1647
1648int build_segment_manager(struct f2fs_sb_info *sbi)
1649{
1650 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1651 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1652 struct f2fs_sm_info *sm_info;
1653 int err;
1654
1655 sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
1656 if (!sm_info)
1657 return -ENOMEM;
1658
1659 /* init sm info */
1660 sbi->sm_info = sm_info;
1661 INIT_LIST_HEAD(&sm_info->wblist_head);
1662 spin_lock_init(&sm_info->wblist_lock);
1663 sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
1664 sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
1665 sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
1666 sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
1667 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
1668 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
1669 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
1670
1671 err = build_sit_info(sbi);
1672 if (err)
1673 return err;
1674 err = build_free_segmap(sbi);
1675 if (err)
1676 return err;
1677 err = build_curseg(sbi);
1678 if (err)
1679 return err;
1680
1681 /* reinit free segmap based on SIT */
1682 build_sit_entries(sbi);
1683
1684 init_free_segmap(sbi);
1685 err = build_dirty_segmap(sbi);
1686 if (err)
1687 return err;
1688
1689 init_min_max_mtime(sbi);
1690 return 0;
1691}
1692
1693static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
1694 enum dirty_type dirty_type)
1695{
1696 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1697
1698 mutex_lock(&dirty_i->seglist_lock);
1699 kfree(dirty_i->dirty_segmap[dirty_type]);
1700 dirty_i->nr_dirty[dirty_type] = 0;
1701 mutex_unlock(&dirty_i->seglist_lock);
1702}
1703
1704void reset_victim_segmap(struct f2fs_sb_info *sbi)
1705{
1706 unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1707 memset(DIRTY_I(sbi)->victim_segmap[FG_GC], 0, bitmap_size);
1708}
1709
1710static void destroy_victim_segmap(struct f2fs_sb_info *sbi)
1711{
1712 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1713
1714 kfree(dirty_i->victim_segmap[FG_GC]);
1715 kfree(dirty_i->victim_segmap[BG_GC]);
1716}
1717
1718static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
1719{
1720 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1721 int i;
1722
1723 if (!dirty_i)
1724 return;
1725
1726 /* discard pre-free/dirty segments list */
1727 for (i = 0; i < NR_DIRTY_TYPE; i++)
1728 discard_dirty_segmap(sbi, i);
1729
1730 destroy_victim_segmap(sbi);
1731 SM_I(sbi)->dirty_info = NULL;
1732 kfree(dirty_i);
1733}
1734
1735static void destroy_curseg(struct f2fs_sb_info *sbi)
1736{
1737 struct curseg_info *array = SM_I(sbi)->curseg_array;
1738 int i;
1739
1740 if (!array)
1741 return;
1742 SM_I(sbi)->curseg_array = NULL;
1743 for (i = 0; i < NR_CURSEG_TYPE; i++)
1744 kfree(array[i].sum_blk);
1745 kfree(array);
1746}
1747
1748static void destroy_free_segmap(struct f2fs_sb_info *sbi)
1749{
1750 struct free_segmap_info *free_i = SM_I(sbi)->free_info;
1751 if (!free_i)
1752 return;
1753 SM_I(sbi)->free_info = NULL;
1754 kfree(free_i->free_segmap);
1755 kfree(free_i->free_secmap);
1756 kfree(free_i);
1757}
1758
1759static void destroy_sit_info(struct f2fs_sb_info *sbi)
1760{
1761 struct sit_info *sit_i = SIT_I(sbi);
1762 unsigned int start;
1763
1764 if (!sit_i)
1765 return;
1766
1767 if (sit_i->sentries) {
1768 for (start = 0; start < TOTAL_SEGS(sbi); start++) {
1769 kfree(sit_i->sentries[start].cur_valid_map);
1770 kfree(sit_i->sentries[start].ckpt_valid_map);
1771 }
1772 }
1773 vfree(sit_i->sentries);
1774 vfree(sit_i->sec_entries);
1775 kfree(sit_i->dirty_sentries_bitmap);
1776
1777 SM_I(sbi)->sit_info = NULL;
1778 kfree(sit_i->sit_bitmap);
1779 kfree(sit_i);
1780}
1781
1782void destroy_segment_manager(struct f2fs_sb_info *sbi)
1783{
1784 struct f2fs_sm_info *sm_info = SM_I(sbi);
1785 destroy_dirty_segmap(sbi);
1786 destroy_curseg(sbi);
1787 destroy_free_segmap(sbi);
1788 destroy_sit_info(sbi);
1789 sbi->sm_info = NULL;
1790 kfree(sm_info);
1791}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
new file mode 100644
index 000000000000..0948405af6f5
--- /dev/null
+++ b/fs/f2fs/segment.h
@@ -0,0 +1,618 @@
1/*
2 * fs/f2fs/segment.h
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11/* constant macro */
12#define NULL_SEGNO ((unsigned int)(~0))
13
14/* V: Logical segment # in volume, R: Relative segment # in main area */
15#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
16#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno)
17
18#define IS_DATASEG(t) \
19 ((t == CURSEG_HOT_DATA) || (t == CURSEG_COLD_DATA) || \
20 (t == CURSEG_WARM_DATA))
21
22#define IS_NODESEG(t) \
23 ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \
24 (t == CURSEG_WARM_NODE))
25
26#define IS_CURSEG(sbi, segno) \
27 ((segno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \
28 (segno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \
29 (segno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \
30 (segno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \
31 (segno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \
32 (segno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
33
34#define IS_CURSEC(sbi, secno) \
35 ((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \
36 sbi->segs_per_sec) || \
37 (secno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \
38 sbi->segs_per_sec) || \
39 (secno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \
40 sbi->segs_per_sec) || \
41 (secno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \
42 sbi->segs_per_sec) || \
43 (secno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \
44 sbi->segs_per_sec) || \
45 (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
46 sbi->segs_per_sec)) \
47
48#define START_BLOCK(sbi, segno) \
49 (SM_I(sbi)->seg0_blkaddr + \
50 (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg))
51#define NEXT_FREE_BLKADDR(sbi, curseg) \
52 (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff)
53
54#define MAIN_BASE_BLOCK(sbi) (SM_I(sbi)->main_blkaddr)
55
56#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) \
57 ((blk_addr) - SM_I(sbi)->seg0_blkaddr)
58#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \
59 (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg)
60#define GET_SEGNO(sbi, blk_addr) \
61 (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \
62 NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
63 GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
64#define GET_SECNO(sbi, segno) \
65 ((segno) / sbi->segs_per_sec)
66#define GET_ZONENO_FROM_SEGNO(sbi, segno) \
67 ((segno / sbi->segs_per_sec) / sbi->secs_per_zone)
68
69#define GET_SUM_BLOCK(sbi, segno) \
70 ((sbi->sm_info->ssa_blkaddr) + segno)
71
72#define GET_SUM_TYPE(footer) ((footer)->entry_type)
73#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = type)
74
75#define SIT_ENTRY_OFFSET(sit_i, segno) \
76 (segno % sit_i->sents_per_block)
77#define SIT_BLOCK_OFFSET(sit_i, segno) \
78 (segno / SIT_ENTRY_PER_BLOCK)
79#define START_SEGNO(sit_i, segno) \
80 (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK)
81#define f2fs_bitmap_size(nr) \
82 (BITS_TO_LONGS(nr) * sizeof(unsigned long))
83#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments)
84
85#define SECTOR_FROM_BLOCK(sbi, blk_addr) \
86 (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE))
87
88/* during checkpoint, bio_private is used to synchronize the last bio */
89struct bio_private {
90 struct f2fs_sb_info *sbi;
91 bool is_sync;
92 void *wait;
93};
94
95/*
96 * indicate a block allocation direction: RIGHT and LEFT.
97 * RIGHT means allocating new sections towards the end of volume.
98 * LEFT means the opposite direction.
99 */
100enum {
101 ALLOC_RIGHT = 0,
102 ALLOC_LEFT
103};
104
105/*
106 * In the victim_sel_policy->alloc_mode, there are two block allocation modes.
107 * LFS writes data sequentially with cleaning operations.
108 * SSR (Slack Space Recycle) reuses obsolete space without cleaning operations.
109 */
110enum {
111 LFS = 0,
112 SSR
113};
114
115/*
116 * In the victim_sel_policy->gc_mode, there are two gc, aka cleaning, modes.
117 * GC_CB is based on cost-benefit algorithm.
118 * GC_GREEDY is based on greedy algorithm.
119 */
120enum {
121 GC_CB = 0,
122 GC_GREEDY
123};
124
125/*
126 * BG_GC means the background cleaning job.
127 * FG_GC means the on-demand cleaning job.
128 */
129enum {
130 BG_GC = 0,
131 FG_GC
132};
133
134/* for a function parameter to select a victim segment */
135struct victim_sel_policy {
136 int alloc_mode; /* LFS or SSR */
137 int gc_mode; /* GC_CB or GC_GREEDY */
138 unsigned long *dirty_segmap; /* dirty segment bitmap */
139 unsigned int offset; /* last scanned bitmap offset */
140 unsigned int ofs_unit; /* bitmap search unit */
141 unsigned int min_cost; /* minimum cost */
142 unsigned int min_segno; /* segment # having min. cost */
143};
144
145struct seg_entry {
146 unsigned short valid_blocks; /* # of valid blocks */
147 unsigned char *cur_valid_map; /* validity bitmap of blocks */
148 /*
149 * # of valid blocks and the validity bitmap stored in the the last
150 * checkpoint pack. This information is used by the SSR mode.
151 */
152 unsigned short ckpt_valid_blocks;
153 unsigned char *ckpt_valid_map;
154 unsigned char type; /* segment type like CURSEG_XXX_TYPE */
155 unsigned long long mtime; /* modification time of the segment */
156};
157
158struct sec_entry {
159 unsigned int valid_blocks; /* # of valid blocks in a section */
160};
161
162struct segment_allocation {
163 void (*allocate_segment)(struct f2fs_sb_info *, int, bool);
164};
165
166struct sit_info {
167 const struct segment_allocation *s_ops;
168
169 block_t sit_base_addr; /* start block address of SIT area */
170 block_t sit_blocks; /* # of blocks used by SIT area */
171 block_t written_valid_blocks; /* # of valid blocks in main area */
172 char *sit_bitmap; /* SIT bitmap pointer */
173 unsigned int bitmap_size; /* SIT bitmap size */
174
175 unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */
176 unsigned int dirty_sentries; /* # of dirty sentries */
177 unsigned int sents_per_block; /* # of SIT entries per block */
178 struct mutex sentry_lock; /* to protect SIT cache */
179 struct seg_entry *sentries; /* SIT segment-level cache */
180 struct sec_entry *sec_entries; /* SIT section-level cache */
181
182 /* for cost-benefit algorithm in cleaning procedure */
183 unsigned long long elapsed_time; /* elapsed time after mount */
184 unsigned long long mounted_time; /* mount time */
185 unsigned long long min_mtime; /* min. modification time */
186 unsigned long long max_mtime; /* max. modification time */
187};
188
189struct free_segmap_info {
190 unsigned int start_segno; /* start segment number logically */
191 unsigned int free_segments; /* # of free segments */
192 unsigned int free_sections; /* # of free sections */
193 rwlock_t segmap_lock; /* free segmap lock */
194 unsigned long *free_segmap; /* free segment bitmap */
195 unsigned long *free_secmap; /* free section bitmap */
196};
197
198/* Notice: The order of dirty type is same with CURSEG_XXX in f2fs.h */
199enum dirty_type {
200 DIRTY_HOT_DATA, /* dirty segments assigned as hot data logs */
201 DIRTY_WARM_DATA, /* dirty segments assigned as warm data logs */
202 DIRTY_COLD_DATA, /* dirty segments assigned as cold data logs */
203 DIRTY_HOT_NODE, /* dirty segments assigned as hot node logs */
204 DIRTY_WARM_NODE, /* dirty segments assigned as warm node logs */
205 DIRTY_COLD_NODE, /* dirty segments assigned as cold node logs */
206 DIRTY, /* to count # of dirty segments */
207 PRE, /* to count # of entirely obsolete segments */
208 NR_DIRTY_TYPE
209};
210
211struct dirty_seglist_info {
212 const struct victim_selection *v_ops; /* victim selction operation */
213 unsigned long *dirty_segmap[NR_DIRTY_TYPE];
214 struct mutex seglist_lock; /* lock for segment bitmaps */
215 int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */
216 unsigned long *victim_segmap[2]; /* BG_GC, FG_GC */
217};
218
219/* victim selection function for cleaning and SSR */
220struct victim_selection {
221 int (*get_victim)(struct f2fs_sb_info *, unsigned int *,
222 int, int, char);
223};
224
225/* for active log information */
226struct curseg_info {
227 struct mutex curseg_mutex; /* lock for consistency */
228 struct f2fs_summary_block *sum_blk; /* cached summary block */
229 unsigned char alloc_type; /* current allocation type */
230 unsigned int segno; /* current segment number */
231 unsigned short next_blkoff; /* next block offset to write */
232 unsigned int zone; /* current zone number */
233 unsigned int next_segno; /* preallocated segment */
234};
235
236/*
237 * inline functions
238 */
239static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
240{
241 return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
242}
243
244static inline struct seg_entry *get_seg_entry(struct f2fs_sb_info *sbi,
245 unsigned int segno)
246{
247 struct sit_info *sit_i = SIT_I(sbi);
248 return &sit_i->sentries[segno];
249}
250
251static inline struct sec_entry *get_sec_entry(struct f2fs_sb_info *sbi,
252 unsigned int segno)
253{
254 struct sit_info *sit_i = SIT_I(sbi);
255 return &sit_i->sec_entries[GET_SECNO(sbi, segno)];
256}
257
258static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
259 unsigned int segno, int section)
260{
261 /*
262 * In order to get # of valid blocks in a section instantly from many
263 * segments, f2fs manages two counting structures separately.
264 */
265 if (section > 1)
266 return get_sec_entry(sbi, segno)->valid_blocks;
267 else
268 return get_seg_entry(sbi, segno)->valid_blocks;
269}
270
271static inline void seg_info_from_raw_sit(struct seg_entry *se,
272 struct f2fs_sit_entry *rs)
273{
274 se->valid_blocks = GET_SIT_VBLOCKS(rs);
275 se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs);
276 memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
277 memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
278 se->type = GET_SIT_TYPE(rs);
279 se->mtime = le64_to_cpu(rs->mtime);
280}
281
282static inline void seg_info_to_raw_sit(struct seg_entry *se,
283 struct f2fs_sit_entry *rs)
284{
285 unsigned short raw_vblocks = (se->type << SIT_VBLOCKS_SHIFT) |
286 se->valid_blocks;
287 rs->vblocks = cpu_to_le16(raw_vblocks);
288 memcpy(rs->valid_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE);
289 memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
290 se->ckpt_valid_blocks = se->valid_blocks;
291 rs->mtime = cpu_to_le64(se->mtime);
292}
293
294static inline unsigned int find_next_inuse(struct free_segmap_info *free_i,
295 unsigned int max, unsigned int segno)
296{
297 unsigned int ret;
298 read_lock(&free_i->segmap_lock);
299 ret = find_next_bit(free_i->free_segmap, max, segno);
300 read_unlock(&free_i->segmap_lock);
301 return ret;
302}
303
304static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
305{
306 struct free_segmap_info *free_i = FREE_I(sbi);
307 unsigned int secno = segno / sbi->segs_per_sec;
308 unsigned int start_segno = secno * sbi->segs_per_sec;
309 unsigned int next;
310
311 write_lock(&free_i->segmap_lock);
312 clear_bit(segno, free_i->free_segmap);
313 free_i->free_segments++;
314
315 next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), start_segno);
316 if (next >= start_segno + sbi->segs_per_sec) {
317 clear_bit(secno, free_i->free_secmap);
318 free_i->free_sections++;
319 }
320 write_unlock(&free_i->segmap_lock);
321}
322
323static inline void __set_inuse(struct f2fs_sb_info *sbi,
324 unsigned int segno)
325{
326 struct free_segmap_info *free_i = FREE_I(sbi);
327 unsigned int secno = segno / sbi->segs_per_sec;
328 set_bit(segno, free_i->free_segmap);
329 free_i->free_segments--;
330 if (!test_and_set_bit(secno, free_i->free_secmap))
331 free_i->free_sections--;
332}
333
334static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
335 unsigned int segno)
336{
337 struct free_segmap_info *free_i = FREE_I(sbi);
338 unsigned int secno = segno / sbi->segs_per_sec;
339 unsigned int start_segno = secno * sbi->segs_per_sec;
340 unsigned int next;
341
342 write_lock(&free_i->segmap_lock);
343 if (test_and_clear_bit(segno, free_i->free_segmap)) {
344 free_i->free_segments++;
345
346 next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi),
347 start_segno);
348 if (next >= start_segno + sbi->segs_per_sec) {
349 if (test_and_clear_bit(secno, free_i->free_secmap))
350 free_i->free_sections++;
351 }
352 }
353 write_unlock(&free_i->segmap_lock);
354}
355
356static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi,
357 unsigned int segno)
358{
359 struct free_segmap_info *free_i = FREE_I(sbi);
360 unsigned int secno = segno / sbi->segs_per_sec;
361 write_lock(&free_i->segmap_lock);
362 if (!test_and_set_bit(segno, free_i->free_segmap)) {
363 free_i->free_segments--;
364 if (!test_and_set_bit(secno, free_i->free_secmap))
365 free_i->free_sections--;
366 }
367 write_unlock(&free_i->segmap_lock);
368}
369
370static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
371 void *dst_addr)
372{
373 struct sit_info *sit_i = SIT_I(sbi);
374 memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size);
375}
376
377static inline block_t written_block_count(struct f2fs_sb_info *sbi)
378{
379 struct sit_info *sit_i = SIT_I(sbi);
380 block_t vblocks;
381
382 mutex_lock(&sit_i->sentry_lock);
383 vblocks = sit_i->written_valid_blocks;
384 mutex_unlock(&sit_i->sentry_lock);
385
386 return vblocks;
387}
388
389static inline unsigned int free_segments(struct f2fs_sb_info *sbi)
390{
391 struct free_segmap_info *free_i = FREE_I(sbi);
392 unsigned int free_segs;
393
394 read_lock(&free_i->segmap_lock);
395 free_segs = free_i->free_segments;
396 read_unlock(&free_i->segmap_lock);
397
398 return free_segs;
399}
400
401static inline int reserved_segments(struct f2fs_sb_info *sbi)
402{
403 return SM_I(sbi)->reserved_segments;
404}
405
406static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
407{
408 struct free_segmap_info *free_i = FREE_I(sbi);
409 unsigned int free_secs;
410
411 read_lock(&free_i->segmap_lock);
412 free_secs = free_i->free_sections;
413 read_unlock(&free_i->segmap_lock);
414
415 return free_secs;
416}
417
418static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi)
419{
420 return DIRTY_I(sbi)->nr_dirty[PRE];
421}
422
423static inline unsigned int dirty_segments(struct f2fs_sb_info *sbi)
424{
425 return DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_DATA] +
426 DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_DATA] +
427 DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_DATA] +
428 DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_NODE] +
429 DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_NODE] +
430 DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_NODE];
431}
432
433static inline int overprovision_segments(struct f2fs_sb_info *sbi)
434{
435 return SM_I(sbi)->ovp_segments;
436}
437
438static inline int overprovision_sections(struct f2fs_sb_info *sbi)
439{
440 return ((unsigned int) overprovision_segments(sbi)) / sbi->segs_per_sec;
441}
442
443static inline int reserved_sections(struct f2fs_sb_info *sbi)
444{
445 return ((unsigned int) reserved_segments(sbi)) / sbi->segs_per_sec;
446}
447
448static inline bool need_SSR(struct f2fs_sb_info *sbi)
449{
450 return (free_sections(sbi) < overprovision_sections(sbi));
451}
452
453static inline int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
454{
455 struct curseg_info *curseg = CURSEG_I(sbi, type);
456 return DIRTY_I(sbi)->v_ops->get_victim(sbi,
457 &(curseg)->next_segno, BG_GC, type, SSR);
458}
459
460static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi)
461{
462 return free_sections(sbi) <= reserved_sections(sbi);
463}
464
465static inline int utilization(struct f2fs_sb_info *sbi)
466{
467 return (long int)valid_user_blocks(sbi) * 100 /
468 (long int)sbi->user_block_count;
469}
470
471/*
472 * Sometimes f2fs may be better to drop out-of-place update policy.
473 * So, if fs utilization is over MIN_IPU_UTIL, then f2fs tries to write
474 * data in the original place likewise other traditional file systems.
475 * But, currently set 100 in percentage, which means it is disabled.
476 * See below need_inplace_update().
477 */
478#define MIN_IPU_UTIL 100
479static inline bool need_inplace_update(struct inode *inode)
480{
481 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
482 if (S_ISDIR(inode->i_mode))
483 return false;
484 if (need_SSR(sbi) && utilization(sbi) > MIN_IPU_UTIL)
485 return true;
486 return false;
487}
488
489static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi,
490 int type)
491{
492 struct curseg_info *curseg = CURSEG_I(sbi, type);
493 return curseg->segno;
494}
495
496static inline unsigned char curseg_alloc_type(struct f2fs_sb_info *sbi,
497 int type)
498{
499 struct curseg_info *curseg = CURSEG_I(sbi, type);
500 return curseg->alloc_type;
501}
502
503static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type)
504{
505 struct curseg_info *curseg = CURSEG_I(sbi, type);
506 return curseg->next_blkoff;
507}
508
509static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
510{
511 unsigned int end_segno = SM_I(sbi)->segment_count - 1;
512 BUG_ON(segno > end_segno);
513}
514
515/*
516 * This function is used for only debugging.
517 * NOTE: In future, we have to remove this function.
518 */
519static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
520{
521 struct f2fs_sm_info *sm_info = SM_I(sbi);
522 block_t total_blks = sm_info->segment_count << sbi->log_blocks_per_seg;
523 block_t start_addr = sm_info->seg0_blkaddr;
524 block_t end_addr = start_addr + total_blks - 1;
525 BUG_ON(blk_addr < start_addr);
526 BUG_ON(blk_addr > end_addr);
527}
528
529/*
530 * Summary block is always treated as invalid block
531 */
532static inline void check_block_count(struct f2fs_sb_info *sbi,
533 int segno, struct f2fs_sit_entry *raw_sit)
534{
535 struct f2fs_sm_info *sm_info = SM_I(sbi);
536 unsigned int end_segno = sm_info->segment_count - 1;
537 int valid_blocks = 0;
538 int i;
539
540 /* check segment usage */
541 BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg);
542
543 /* check boundary of a given segment number */
544 BUG_ON(segno > end_segno);
545
546 /* check bitmap with valid block count */
547 for (i = 0; i < sbi->blocks_per_seg; i++)
548 if (f2fs_test_bit(i, raw_sit->valid_map))
549 valid_blocks++;
550 BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks);
551}
552
553static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
554 unsigned int start)
555{
556 struct sit_info *sit_i = SIT_I(sbi);
557 unsigned int offset = SIT_BLOCK_OFFSET(sit_i, start);
558 block_t blk_addr = sit_i->sit_base_addr + offset;
559
560 check_seg_range(sbi, start);
561
562 /* calculate sit block address */
563 if (f2fs_test_bit(offset, sit_i->sit_bitmap))
564 blk_addr += sit_i->sit_blocks;
565
566 return blk_addr;
567}
568
569static inline pgoff_t next_sit_addr(struct f2fs_sb_info *sbi,
570 pgoff_t block_addr)
571{
572 struct sit_info *sit_i = SIT_I(sbi);
573 block_addr -= sit_i->sit_base_addr;
574 if (block_addr < sit_i->sit_blocks)
575 block_addr += sit_i->sit_blocks;
576 else
577 block_addr -= sit_i->sit_blocks;
578
579 return block_addr + sit_i->sit_base_addr;
580}
581
582static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start)
583{
584 unsigned int block_off = SIT_BLOCK_OFFSET(sit_i, start);
585
586 if (f2fs_test_bit(block_off, sit_i->sit_bitmap))
587 f2fs_clear_bit(block_off, sit_i->sit_bitmap);
588 else
589 f2fs_set_bit(block_off, sit_i->sit_bitmap);
590}
591
592static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi)
593{
594 struct sit_info *sit_i = SIT_I(sbi);
595 return sit_i->elapsed_time + CURRENT_TIME_SEC.tv_sec -
596 sit_i->mounted_time;
597}
598
599static inline void set_summary(struct f2fs_summary *sum, nid_t nid,
600 unsigned int ofs_in_node, unsigned char version)
601{
602 sum->nid = cpu_to_le32(nid);
603 sum->ofs_in_node = cpu_to_le16(ofs_in_node);
604 sum->version = version;
605}
606
607static inline block_t start_sum_block(struct f2fs_sb_info *sbi)
608{
609 return __start_cp_addr(sbi) +
610 le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum);
611}
612
613static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
614{
615 return __start_cp_addr(sbi) +
616 le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_total_block_count)
617 - (base + 1) + type;
618}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
new file mode 100644
index 000000000000..13867322cf5a
--- /dev/null
+++ b/fs/f2fs/super.c
@@ -0,0 +1,657 @@
1/*
2 * fs/f2fs/super.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/module.h>
12#include <linux/init.h>
13#include <linux/fs.h>
14#include <linux/statfs.h>
15#include <linux/proc_fs.h>
16#include <linux/buffer_head.h>
17#include <linux/backing-dev.h>
18#include <linux/kthread.h>
19#include <linux/parser.h>
20#include <linux/mount.h>
21#include <linux/seq_file.h>
22#include <linux/random.h>
23#include <linux/exportfs.h>
24#include <linux/f2fs_fs.h>
25
26#include "f2fs.h"
27#include "node.h"
28#include "xattr.h"
29
30static struct kmem_cache *f2fs_inode_cachep;
31
32enum {
33 Opt_gc_background_off,
34 Opt_disable_roll_forward,
35 Opt_discard,
36 Opt_noheap,
37 Opt_nouser_xattr,
38 Opt_noacl,
39 Opt_active_logs,
40 Opt_disable_ext_identify,
41 Opt_err,
42};
43
44static match_table_t f2fs_tokens = {
45 {Opt_gc_background_off, "background_gc_off"},
46 {Opt_disable_roll_forward, "disable_roll_forward"},
47 {Opt_discard, "discard"},
48 {Opt_noheap, "no_heap"},
49 {Opt_nouser_xattr, "nouser_xattr"},
50 {Opt_noacl, "noacl"},
51 {Opt_active_logs, "active_logs=%u"},
52 {Opt_disable_ext_identify, "disable_ext_identify"},
53 {Opt_err, NULL},
54};
55
56static void init_once(void *foo)
57{
58 struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
59
60 inode_init_once(&fi->vfs_inode);
61}
62
63static struct inode *f2fs_alloc_inode(struct super_block *sb)
64{
65 struct f2fs_inode_info *fi;
66
67 fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_NOFS | __GFP_ZERO);
68 if (!fi)
69 return NULL;
70
71 init_once((void *) fi);
72
73 /* Initilize f2fs-specific inode info */
74 fi->vfs_inode.i_version = 1;
75 atomic_set(&fi->dirty_dents, 0);
76 fi->i_current_depth = 1;
77 fi->i_advise = 0;
78 rwlock_init(&fi->ext.ext_lock);
79
80 set_inode_flag(fi, FI_NEW_INODE);
81
82 return &fi->vfs_inode;
83}
84
85static void f2fs_i_callback(struct rcu_head *head)
86{
87 struct inode *inode = container_of(head, struct inode, i_rcu);
88 kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode));
89}
90
91static void f2fs_destroy_inode(struct inode *inode)
92{
93 call_rcu(&inode->i_rcu, f2fs_i_callback);
94}
95
96static void f2fs_put_super(struct super_block *sb)
97{
98 struct f2fs_sb_info *sbi = F2FS_SB(sb);
99
100 f2fs_destroy_stats(sbi);
101 stop_gc_thread(sbi);
102
103 write_checkpoint(sbi, false, true);
104
105 iput(sbi->node_inode);
106 iput(sbi->meta_inode);
107
108 /* destroy f2fs internal modules */
109 destroy_node_manager(sbi);
110 destroy_segment_manager(sbi);
111
112 kfree(sbi->ckpt);
113
114 sb->s_fs_info = NULL;
115 brelse(sbi->raw_super_buf);
116 kfree(sbi);
117}
118
119int f2fs_sync_fs(struct super_block *sb, int sync)
120{
121 struct f2fs_sb_info *sbi = F2FS_SB(sb);
122 int ret = 0;
123
124 if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES))
125 return 0;
126
127 if (sync)
128 write_checkpoint(sbi, false, false);
129
130 return ret;
131}
132
133static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
134{
135 struct super_block *sb = dentry->d_sb;
136 struct f2fs_sb_info *sbi = F2FS_SB(sb);
137 u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
138 block_t total_count, user_block_count, start_count, ovp_count;
139
140 total_count = le64_to_cpu(sbi->raw_super->block_count);
141 user_block_count = sbi->user_block_count;
142 start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr);
143 ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
144 buf->f_type = F2FS_SUPER_MAGIC;
145 buf->f_bsize = sbi->blocksize;
146
147 buf->f_blocks = total_count - start_count;
148 buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count;
149 buf->f_bavail = user_block_count - valid_user_blocks(sbi);
150
151 buf->f_files = valid_inode_count(sbi);
152 buf->f_ffree = sbi->total_node_count - valid_node_count(sbi);
153
154 buf->f_namelen = F2FS_MAX_NAME_LEN;
155 buf->f_fsid.val[0] = (u32)id;
156 buf->f_fsid.val[1] = (u32)(id >> 32);
157
158 return 0;
159}
160
161static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
162{
163 struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
164
165 if (test_opt(sbi, BG_GC))
166 seq_puts(seq, ",background_gc_on");
167 else
168 seq_puts(seq, ",background_gc_off");
169 if (test_opt(sbi, DISABLE_ROLL_FORWARD))
170 seq_puts(seq, ",disable_roll_forward");
171 if (test_opt(sbi, DISCARD))
172 seq_puts(seq, ",discard");
173 if (test_opt(sbi, NOHEAP))
174 seq_puts(seq, ",no_heap_alloc");
175#ifdef CONFIG_F2FS_FS_XATTR
176 if (test_opt(sbi, XATTR_USER))
177 seq_puts(seq, ",user_xattr");
178 else
179 seq_puts(seq, ",nouser_xattr");
180#endif
181#ifdef CONFIG_F2FS_FS_POSIX_ACL
182 if (test_opt(sbi, POSIX_ACL))
183 seq_puts(seq, ",acl");
184 else
185 seq_puts(seq, ",noacl");
186#endif
187 if (test_opt(sbi, DISABLE_EXT_IDENTIFY))
188 seq_puts(seq, ",disable_ext_indentify");
189
190 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
191
192 return 0;
193}
194
195static struct super_operations f2fs_sops = {
196 .alloc_inode = f2fs_alloc_inode,
197 .destroy_inode = f2fs_destroy_inode,
198 .write_inode = f2fs_write_inode,
199 .show_options = f2fs_show_options,
200 .evict_inode = f2fs_evict_inode,
201 .put_super = f2fs_put_super,
202 .sync_fs = f2fs_sync_fs,
203 .statfs = f2fs_statfs,
204};
205
206static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
207 u64 ino, u32 generation)
208{
209 struct f2fs_sb_info *sbi = F2FS_SB(sb);
210 struct inode *inode;
211
212 if (ino < F2FS_ROOT_INO(sbi))
213 return ERR_PTR(-ESTALE);
214
215 /*
216 * f2fs_iget isn't quite right if the inode is currently unallocated!
217 * However f2fs_iget currently does appropriate checks to handle stale
218 * inodes so everything is OK.
219 */
220 inode = f2fs_iget(sb, ino);
221 if (IS_ERR(inode))
222 return ERR_CAST(inode);
223 if (generation && inode->i_generation != generation) {
224 /* we didn't find the right inode.. */
225 iput(inode);
226 return ERR_PTR(-ESTALE);
227 }
228 return inode;
229}
230
231static struct dentry *f2fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
232 int fh_len, int fh_type)
233{
234 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
235 f2fs_nfs_get_inode);
236}
237
238static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid,
239 int fh_len, int fh_type)
240{
241 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
242 f2fs_nfs_get_inode);
243}
244
245static const struct export_operations f2fs_export_ops = {
246 .fh_to_dentry = f2fs_fh_to_dentry,
247 .fh_to_parent = f2fs_fh_to_parent,
248 .get_parent = f2fs_get_parent,
249};
250
251static int parse_options(struct f2fs_sb_info *sbi, char *options)
252{
253 substring_t args[MAX_OPT_ARGS];
254 char *p;
255 int arg = 0;
256
257 if (!options)
258 return 0;
259
260 while ((p = strsep(&options, ",")) != NULL) {
261 int token;
262 if (!*p)
263 continue;
264 /*
265 * Initialize args struct so we know whether arg was
266 * found; some options take optional arguments.
267 */
268 args[0].to = args[0].from = NULL;
269 token = match_token(p, f2fs_tokens, args);
270
271 switch (token) {
272 case Opt_gc_background_off:
273 clear_opt(sbi, BG_GC);
274 break;
275 case Opt_disable_roll_forward:
276 set_opt(sbi, DISABLE_ROLL_FORWARD);
277 break;
278 case Opt_discard:
279 set_opt(sbi, DISCARD);
280 break;
281 case Opt_noheap:
282 set_opt(sbi, NOHEAP);
283 break;
284#ifdef CONFIG_F2FS_FS_XATTR
285 case Opt_nouser_xattr:
286 clear_opt(sbi, XATTR_USER);
287 break;
288#else
289 case Opt_nouser_xattr:
290 pr_info("nouser_xattr options not supported\n");
291 break;
292#endif
293#ifdef CONFIG_F2FS_FS_POSIX_ACL
294 case Opt_noacl:
295 clear_opt(sbi, POSIX_ACL);
296 break;
297#else
298 case Opt_noacl:
299 pr_info("noacl options not supported\n");
300 break;
301#endif
302 case Opt_active_logs:
303 if (args->from && match_int(args, &arg))
304 return -EINVAL;
305 if (arg != 2 && arg != 4 && arg != 6)
306 return -EINVAL;
307 sbi->active_logs = arg;
308 break;
309 case Opt_disable_ext_identify:
310 set_opt(sbi, DISABLE_EXT_IDENTIFY);
311 break;
312 default:
313 pr_err("Unrecognized mount option \"%s\" or missing value\n",
314 p);
315 return -EINVAL;
316 }
317 }
318 return 0;
319}
320
321static loff_t max_file_size(unsigned bits)
322{
323 loff_t result = ADDRS_PER_INODE;
324 loff_t leaf_count = ADDRS_PER_BLOCK;
325
326 /* two direct node blocks */
327 result += (leaf_count * 2);
328
329 /* two indirect node blocks */
330 leaf_count *= NIDS_PER_BLOCK;
331 result += (leaf_count * 2);
332
333 /* one double indirect node block */
334 leaf_count *= NIDS_PER_BLOCK;
335 result += leaf_count;
336
337 result <<= bits;
338 return result;
339}
340
341static int sanity_check_raw_super(struct f2fs_super_block *raw_super)
342{
343 unsigned int blocksize;
344
345 if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic))
346 return 1;
347
348 /* Currently, support only 4KB block size */
349 blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
350 if (blocksize != PAGE_CACHE_SIZE)
351 return 1;
352 if (le32_to_cpu(raw_super->log_sectorsize) !=
353 F2FS_LOG_SECTOR_SIZE)
354 return 1;
355 if (le32_to_cpu(raw_super->log_sectors_per_block) !=
356 F2FS_LOG_SECTORS_PER_BLOCK)
357 return 1;
358 return 0;
359}
360
361static int sanity_check_ckpt(struct f2fs_super_block *raw_super,
362 struct f2fs_checkpoint *ckpt)
363{
364 unsigned int total, fsmeta;
365
366 total = le32_to_cpu(raw_super->segment_count);
367 fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
368 fsmeta += le32_to_cpu(raw_super->segment_count_sit);
369 fsmeta += le32_to_cpu(raw_super->segment_count_nat);
370 fsmeta += le32_to_cpu(ckpt->rsvd_segment_count);
371 fsmeta += le32_to_cpu(raw_super->segment_count_ssa);
372
373 if (fsmeta >= total)
374 return 1;
375 return 0;
376}
377
378static void init_sb_info(struct f2fs_sb_info *sbi)
379{
380 struct f2fs_super_block *raw_super = sbi->raw_super;
381 int i;
382
383 sbi->log_sectors_per_block =
384 le32_to_cpu(raw_super->log_sectors_per_block);
385 sbi->log_blocksize = le32_to_cpu(raw_super->log_blocksize);
386 sbi->blocksize = 1 << sbi->log_blocksize;
387 sbi->log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
388 sbi->blocks_per_seg = 1 << sbi->log_blocks_per_seg;
389 sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
390 sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
391 sbi->total_sections = le32_to_cpu(raw_super->section_count);
392 sbi->total_node_count =
393 (le32_to_cpu(raw_super->segment_count_nat) / 2)
394 * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK;
395 sbi->root_ino_num = le32_to_cpu(raw_super->root_ino);
396 sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
397 sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
398
399 for (i = 0; i < NR_COUNT_TYPE; i++)
400 atomic_set(&sbi->nr_pages[i], 0);
401}
402
403static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
404{
405 struct f2fs_sb_info *sbi;
406 struct f2fs_super_block *raw_super;
407 struct buffer_head *raw_super_buf;
408 struct inode *root;
409 long err = -EINVAL;
410 int i;
411
412 /* allocate memory for f2fs-specific super block info */
413 sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
414 if (!sbi)
415 return -ENOMEM;
416
417 /* set a temporary block size */
418 if (!sb_set_blocksize(sb, F2FS_BLKSIZE))
419 goto free_sbi;
420
421 /* read f2fs raw super block */
422 raw_super_buf = sb_bread(sb, 0);
423 if (!raw_super_buf) {
424 err = -EIO;
425 goto free_sbi;
426 }
427 raw_super = (struct f2fs_super_block *)
428 ((char *)raw_super_buf->b_data + F2FS_SUPER_OFFSET);
429
430 /* init some FS parameters */
431 sbi->active_logs = NR_CURSEG_TYPE;
432
433 set_opt(sbi, BG_GC);
434
435#ifdef CONFIG_F2FS_FS_XATTR
436 set_opt(sbi, XATTR_USER);
437#endif
438#ifdef CONFIG_F2FS_FS_POSIX_ACL
439 set_opt(sbi, POSIX_ACL);
440#endif
441 /* parse mount options */
442 if (parse_options(sbi, (char *)data))
443 goto free_sb_buf;
444
445 /* sanity checking of raw super */
446 if (sanity_check_raw_super(raw_super))
447 goto free_sb_buf;
448
449 sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize));
450 sb->s_max_links = F2FS_LINK_MAX;
451 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
452
453 sb->s_op = &f2fs_sops;
454 sb->s_xattr = f2fs_xattr_handlers;
455 sb->s_export_op = &f2fs_export_ops;
456 sb->s_magic = F2FS_SUPER_MAGIC;
457 sb->s_fs_info = sbi;
458 sb->s_time_gran = 1;
459 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
460 (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
461 memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
462
463 /* init f2fs-specific super block info */
464 sbi->sb = sb;
465 sbi->raw_super = raw_super;
466 sbi->raw_super_buf = raw_super_buf;
467 mutex_init(&sbi->gc_mutex);
468 mutex_init(&sbi->write_inode);
469 mutex_init(&sbi->writepages);
470 mutex_init(&sbi->cp_mutex);
471 for (i = 0; i < NR_LOCK_TYPE; i++)
472 mutex_init(&sbi->fs_lock[i]);
473 sbi->por_doing = 0;
474 spin_lock_init(&sbi->stat_lock);
475 init_rwsem(&sbi->bio_sem);
476 init_sb_info(sbi);
477
478 /* get an inode for meta space */
479 sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
480 if (IS_ERR(sbi->meta_inode)) {
481 err = PTR_ERR(sbi->meta_inode);
482 goto free_sb_buf;
483 }
484
485 err = get_valid_checkpoint(sbi);
486 if (err)
487 goto free_meta_inode;
488
489 /* sanity checking of checkpoint */
490 err = -EINVAL;
491 if (sanity_check_ckpt(raw_super, sbi->ckpt))
492 goto free_cp;
493
494 sbi->total_valid_node_count =
495 le32_to_cpu(sbi->ckpt->valid_node_count);
496 sbi->total_valid_inode_count =
497 le32_to_cpu(sbi->ckpt->valid_inode_count);
498 sbi->user_block_count = le64_to_cpu(sbi->ckpt->user_block_count);
499 sbi->total_valid_block_count =
500 le64_to_cpu(sbi->ckpt->valid_block_count);
501 sbi->last_valid_block_count = sbi->total_valid_block_count;
502 sbi->alloc_valid_block_count = 0;
503 INIT_LIST_HEAD(&sbi->dir_inode_list);
504 spin_lock_init(&sbi->dir_inode_lock);
505
506 /* init super block */
507 if (!sb_set_blocksize(sb, sbi->blocksize))
508 goto free_cp;
509
510 init_orphan_info(sbi);
511
512 /* setup f2fs internal modules */
513 err = build_segment_manager(sbi);
514 if (err)
515 goto free_sm;
516 err = build_node_manager(sbi);
517 if (err)
518 goto free_nm;
519
520 build_gc_manager(sbi);
521
522 /* get an inode for node space */
523 sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
524 if (IS_ERR(sbi->node_inode)) {
525 err = PTR_ERR(sbi->node_inode);
526 goto free_nm;
527 }
528
529 /* if there are nt orphan nodes free them */
530 err = -EINVAL;
531 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) &&
532 recover_orphan_inodes(sbi))
533 goto free_node_inode;
534
535 /* read root inode and dentry */
536 root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
537 if (IS_ERR(root)) {
538 err = PTR_ERR(root);
539 goto free_node_inode;
540 }
541 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size)
542 goto free_root_inode;
543
544 sb->s_root = d_make_root(root); /* allocate root dentry */
545 if (!sb->s_root) {
546 err = -ENOMEM;
547 goto free_root_inode;
548 }
549
550 /* recover fsynced data */
551 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) &&
552 !test_opt(sbi, DISABLE_ROLL_FORWARD))
553 recover_fsync_data(sbi);
554
555 /* After POR, we can run background GC thread */
556 err = start_gc_thread(sbi);
557 if (err)
558 goto fail;
559
560 err = f2fs_build_stats(sbi);
561 if (err)
562 goto fail;
563
564 return 0;
565fail:
566 stop_gc_thread(sbi);
567free_root_inode:
568 dput(sb->s_root);
569 sb->s_root = NULL;
570free_node_inode:
571 iput(sbi->node_inode);
572free_nm:
573 destroy_node_manager(sbi);
574free_sm:
575 destroy_segment_manager(sbi);
576free_cp:
577 kfree(sbi->ckpt);
578free_meta_inode:
579 make_bad_inode(sbi->meta_inode);
580 iput(sbi->meta_inode);
581free_sb_buf:
582 brelse(raw_super_buf);
583free_sbi:
584 kfree(sbi);
585 return err;
586}
587
588static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
589 const char *dev_name, void *data)
590{
591 return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super);
592}
593
594static struct file_system_type f2fs_fs_type = {
595 .owner = THIS_MODULE,
596 .name = "f2fs",
597 .mount = f2fs_mount,
598 .kill_sb = kill_block_super,
599 .fs_flags = FS_REQUIRES_DEV,
600};
601
602static int init_inodecache(void)
603{
604 f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache",
605 sizeof(struct f2fs_inode_info), NULL);
606 if (f2fs_inode_cachep == NULL)
607 return -ENOMEM;
608 return 0;
609}
610
611static void destroy_inodecache(void)
612{
613 /*
614 * Make sure all delayed rcu free inodes are flushed before we
615 * destroy cache.
616 */
617 rcu_barrier();
618 kmem_cache_destroy(f2fs_inode_cachep);
619}
620
621static int __init init_f2fs_fs(void)
622{
623 int err;
624
625 err = init_inodecache();
626 if (err)
627 goto fail;
628 err = create_node_manager_caches();
629 if (err)
630 goto fail;
631 err = create_gc_caches();
632 if (err)
633 goto fail;
634 err = create_checkpoint_caches();
635 if (err)
636 goto fail;
637 return register_filesystem(&f2fs_fs_type);
638fail:
639 return err;
640}
641
642static void __exit exit_f2fs_fs(void)
643{
644 destroy_root_stats();
645 unregister_filesystem(&f2fs_fs_type);
646 destroy_checkpoint_caches();
647 destroy_gc_caches();
648 destroy_node_manager_caches();
649 destroy_inodecache();
650}
651
652module_init(init_f2fs_fs)
653module_exit(exit_f2fs_fs)
654
655MODULE_AUTHOR("Samsung Electronics's Praesto Team");
656MODULE_DESCRIPTION("Flash Friendly File System");
657MODULE_LICENSE("GPL");
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
new file mode 100644
index 000000000000..7d52e8dc0c59
--- /dev/null
+++ b/fs/f2fs/xattr.c
@@ -0,0 +1,440 @@
1/*
2 * fs/f2fs/xattr.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * Portions of this code from linux/fs/ext2/xattr.c
8 *
9 * Copyright (C) 2001-2003 Andreas Gruenbacher <agruen@suse.de>
10 *
11 * Fix by Harrison Xing <harrison@mountainviewdata.com>.
12 * Extended attributes for symlinks and special files added per
13 * suggestion of Luka Renko <luka.renko@hermes.si>.
14 * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
15 * Red Hat Inc.
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License version 2 as
19 * published by the Free Software Foundation.
20 */
21#include <linux/rwsem.h>
22#include <linux/f2fs_fs.h>
23#include "f2fs.h"
24#include "xattr.h"
25
26static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list,
27 size_t list_size, const char *name, size_t name_len, int type)
28{
29 struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
30 int total_len, prefix_len = 0;
31 const char *prefix = NULL;
32
33 switch (type) {
34 case F2FS_XATTR_INDEX_USER:
35 if (!test_opt(sbi, XATTR_USER))
36 return -EOPNOTSUPP;
37 prefix = XATTR_USER_PREFIX;
38 prefix_len = XATTR_USER_PREFIX_LEN;
39 break;
40 case F2FS_XATTR_INDEX_TRUSTED:
41 if (!capable(CAP_SYS_ADMIN))
42 return -EPERM;
43 prefix = XATTR_TRUSTED_PREFIX;
44 prefix_len = XATTR_TRUSTED_PREFIX_LEN;
45 break;
46 default:
47 return -EINVAL;
48 }
49
50 total_len = prefix_len + name_len + 1;
51 if (list && total_len <= list_size) {
52 memcpy(list, prefix, prefix_len);
53 memcpy(list+prefix_len, name, name_len);
54 list[prefix_len + name_len] = '\0';
55 }
56 return total_len;
57}
58
59static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name,
60 void *buffer, size_t size, int type)
61{
62 struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
63
64 switch (type) {
65 case F2FS_XATTR_INDEX_USER:
66 if (!test_opt(sbi, XATTR_USER))
67 return -EOPNOTSUPP;
68 break;
69 case F2FS_XATTR_INDEX_TRUSTED:
70 if (!capable(CAP_SYS_ADMIN))
71 return -EPERM;
72 break;
73 default:
74 return -EINVAL;
75 }
76 if (strcmp(name, "") == 0)
77 return -EINVAL;
78 return f2fs_getxattr(dentry->d_inode, type, name,
79 buffer, size);
80}
81
82static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name,
83 const void *value, size_t size, int flags, int type)
84{
85 struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
86
87 switch (type) {
88 case F2FS_XATTR_INDEX_USER:
89 if (!test_opt(sbi, XATTR_USER))
90 return -EOPNOTSUPP;
91 break;
92 case F2FS_XATTR_INDEX_TRUSTED:
93 if (!capable(CAP_SYS_ADMIN))
94 return -EPERM;
95 break;
96 default:
97 return -EINVAL;
98 }
99 if (strcmp(name, "") == 0)
100 return -EINVAL;
101
102 return f2fs_setxattr(dentry->d_inode, type, name, value, size);
103}
104
105static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list,
106 size_t list_size, const char *name, size_t name_len, int type)
107{
108 const char *xname = F2FS_SYSTEM_ADVISE_PREFIX;
109 size_t size;
110
111 if (type != F2FS_XATTR_INDEX_ADVISE)
112 return 0;
113
114 size = strlen(xname) + 1;
115 if (list && size <= list_size)
116 memcpy(list, xname, size);
117 return size;
118}
119
120static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name,
121 void *buffer, size_t size, int type)
122{
123 struct inode *inode = dentry->d_inode;
124
125 if (strcmp(name, "") != 0)
126 return -EINVAL;
127
128 *((char *)buffer) = F2FS_I(inode)->i_advise;
129 return sizeof(char);
130}
131
132static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name,
133 const void *value, size_t size, int flags, int type)
134{
135 struct inode *inode = dentry->d_inode;
136
137 if (strcmp(name, "") != 0)
138 return -EINVAL;
139 if (!inode_owner_or_capable(inode))
140 return -EPERM;
141 if (value == NULL)
142 return -EINVAL;
143
144 F2FS_I(inode)->i_advise |= *(char *)value;
145 return 0;
146}
147
148const struct xattr_handler f2fs_xattr_user_handler = {
149 .prefix = XATTR_USER_PREFIX,
150 .flags = F2FS_XATTR_INDEX_USER,
151 .list = f2fs_xattr_generic_list,
152 .get = f2fs_xattr_generic_get,
153 .set = f2fs_xattr_generic_set,
154};
155
156const struct xattr_handler f2fs_xattr_trusted_handler = {
157 .prefix = XATTR_TRUSTED_PREFIX,
158 .flags = F2FS_XATTR_INDEX_TRUSTED,
159 .list = f2fs_xattr_generic_list,
160 .get = f2fs_xattr_generic_get,
161 .set = f2fs_xattr_generic_set,
162};
163
164const struct xattr_handler f2fs_xattr_advise_handler = {
165 .prefix = F2FS_SYSTEM_ADVISE_PREFIX,
166 .flags = F2FS_XATTR_INDEX_ADVISE,
167 .list = f2fs_xattr_advise_list,
168 .get = f2fs_xattr_advise_get,
169 .set = f2fs_xattr_advise_set,
170};
171
172static const struct xattr_handler *f2fs_xattr_handler_map[] = {
173 [F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler,
174#ifdef CONFIG_F2FS_FS_POSIX_ACL
175 [F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &f2fs_xattr_acl_access_handler,
176 [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler,
177#endif
178 [F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler,
179 [F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler,
180};
181
182const struct xattr_handler *f2fs_xattr_handlers[] = {
183 &f2fs_xattr_user_handler,
184#ifdef CONFIG_F2FS_FS_POSIX_ACL
185 &f2fs_xattr_acl_access_handler,
186 &f2fs_xattr_acl_default_handler,
187#endif
188 &f2fs_xattr_trusted_handler,
189 &f2fs_xattr_advise_handler,
190 NULL,
191};
192
193static inline const struct xattr_handler *f2fs_xattr_handler(int name_index)
194{
195 const struct xattr_handler *handler = NULL;
196
197 if (name_index > 0 && name_index < ARRAY_SIZE(f2fs_xattr_handler_map))
198 handler = f2fs_xattr_handler_map[name_index];
199 return handler;
200}
201
202int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
203 void *buffer, size_t buffer_size)
204{
205 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
206 struct f2fs_inode_info *fi = F2FS_I(inode);
207 struct f2fs_xattr_entry *entry;
208 struct page *page;
209 void *base_addr;
210 int error = 0, found = 0;
211 int value_len, name_len;
212
213 if (name == NULL)
214 return -EINVAL;
215 name_len = strlen(name);
216
217 if (!fi->i_xattr_nid)
218 return -ENODATA;
219
220 page = get_node_page(sbi, fi->i_xattr_nid);
221 base_addr = page_address(page);
222
223 list_for_each_xattr(entry, base_addr) {
224 if (entry->e_name_index != name_index)
225 continue;
226 if (entry->e_name_len != name_len)
227 continue;
228 if (!memcmp(entry->e_name, name, name_len)) {
229 found = 1;
230 break;
231 }
232 }
233 if (!found) {
234 error = -ENODATA;
235 goto cleanup;
236 }
237
238 value_len = le16_to_cpu(entry->e_value_size);
239
240 if (buffer && value_len > buffer_size) {
241 error = -ERANGE;
242 goto cleanup;
243 }
244
245 if (buffer) {
246 char *pval = entry->e_name + entry->e_name_len;
247 memcpy(buffer, pval, value_len);
248 }
249 error = value_len;
250
251cleanup:
252 f2fs_put_page(page, 1);
253 return error;
254}
255
256ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
257{
258 struct inode *inode = dentry->d_inode;
259 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
260 struct f2fs_inode_info *fi = F2FS_I(inode);
261 struct f2fs_xattr_entry *entry;
262 struct page *page;
263 void *base_addr;
264 int error = 0;
265 size_t rest = buffer_size;
266
267 if (!fi->i_xattr_nid)
268 return 0;
269
270 page = get_node_page(sbi, fi->i_xattr_nid);
271 base_addr = page_address(page);
272
273 list_for_each_xattr(entry, base_addr) {
274 const struct xattr_handler *handler =
275 f2fs_xattr_handler(entry->e_name_index);
276 size_t size;
277
278 if (!handler)
279 continue;
280
281 size = handler->list(dentry, buffer, rest, entry->e_name,
282 entry->e_name_len, handler->flags);
283 if (buffer && size > rest) {
284 error = -ERANGE;
285 goto cleanup;
286 }
287
288 if (buffer)
289 buffer += size;
290 rest -= size;
291 }
292 error = buffer_size - rest;
293cleanup:
294 f2fs_put_page(page, 1);
295 return error;
296}
297
298int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
299 const void *value, size_t value_len)
300{
301 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
302 struct f2fs_inode_info *fi = F2FS_I(inode);
303 struct f2fs_xattr_header *header = NULL;
304 struct f2fs_xattr_entry *here, *last;
305 struct page *page;
306 void *base_addr;
307 int error, found, free, name_len, newsize;
308 char *pval;
309
310 if (name == NULL)
311 return -EINVAL;
312 name_len = strlen(name);
313
314 if (value == NULL)
315 value_len = 0;
316
317 if (name_len > 255 || value_len > MAX_VALUE_LEN)
318 return -ERANGE;
319
320 mutex_lock_op(sbi, NODE_NEW);
321 if (!fi->i_xattr_nid) {
322 /* Allocate new attribute block */
323 struct dnode_of_data dn;
324
325 if (!alloc_nid(sbi, &fi->i_xattr_nid)) {
326 mutex_unlock_op(sbi, NODE_NEW);
327 return -ENOSPC;
328 }
329 set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid);
330 mark_inode_dirty(inode);
331
332 page = new_node_page(&dn, XATTR_NODE_OFFSET);
333 if (IS_ERR(page)) {
334 alloc_nid_failed(sbi, fi->i_xattr_nid);
335 fi->i_xattr_nid = 0;
336 mutex_unlock_op(sbi, NODE_NEW);
337 return PTR_ERR(page);
338 }
339
340 alloc_nid_done(sbi, fi->i_xattr_nid);
341 base_addr = page_address(page);
342 header = XATTR_HDR(base_addr);
343 header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC);
344 header->h_refcount = cpu_to_le32(1);
345 } else {
346 /* The inode already has an extended attribute block. */
347 page = get_node_page(sbi, fi->i_xattr_nid);
348 if (IS_ERR(page)) {
349 mutex_unlock_op(sbi, NODE_NEW);
350 return PTR_ERR(page);
351 }
352
353 base_addr = page_address(page);
354 header = XATTR_HDR(base_addr);
355 }
356
357 if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) {
358 error = -EIO;
359 goto cleanup;
360 }
361
362 /* find entry with wanted name. */
363 found = 0;
364 list_for_each_xattr(here, base_addr) {
365 if (here->e_name_index != name_index)
366 continue;
367 if (here->e_name_len != name_len)
368 continue;
369 if (!memcmp(here->e_name, name, name_len)) {
370 found = 1;
371 break;
372 }
373 }
374
375 last = here;
376
377 while (!IS_XATTR_LAST_ENTRY(last))
378 last = XATTR_NEXT_ENTRY(last);
379
380 newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) +
381 name_len + value_len);
382
383 /* 1. Check space */
384 if (value) {
385 /* If value is NULL, it is remove operation.
386 * In case of update operation, we caculate free.
387 */
388 free = MIN_OFFSET - ((char *)last - (char *)header);
389 if (found)
390 free = free - ENTRY_SIZE(here);
391
392 if (free < newsize) {
393 error = -ENOSPC;
394 goto cleanup;
395 }
396 }
397
398 /* 2. Remove old entry */
399 if (found) {
400 /* If entry is found, remove old entry.
401 * If not found, remove operation is not needed.
402 */
403 struct f2fs_xattr_entry *next = XATTR_NEXT_ENTRY(here);
404 int oldsize = ENTRY_SIZE(here);
405
406 memmove(here, next, (char *)last - (char *)next);
407 last = (struct f2fs_xattr_entry *)((char *)last - oldsize);
408 memset(last, 0, oldsize);
409 }
410
411 /* 3. Write new entry */
412 if (value) {
413 /* Before we come here, old entry is removed.
414 * We just write new entry. */
415 memset(last, 0, newsize);
416 last->e_name_index = name_index;
417 last->e_name_len = name_len;
418 memcpy(last->e_name, name, name_len);
419 pval = last->e_name + name_len;
420 memcpy(pval, value, value_len);
421 last->e_value_size = cpu_to_le16(value_len);
422 }
423
424 set_page_dirty(page);
425 f2fs_put_page(page, 1);
426
427 if (is_inode_flag_set(fi, FI_ACL_MODE)) {
428 inode->i_mode = fi->i_acl_mode;
429 inode->i_ctime = CURRENT_TIME;
430 clear_inode_flag(fi, FI_ACL_MODE);
431 }
432 f2fs_write_inode(inode, NULL);
433 mutex_unlock_op(sbi, NODE_NEW);
434
435 return 0;
436cleanup:
437 f2fs_put_page(page, 1);
438 mutex_unlock_op(sbi, NODE_NEW);
439 return error;
440}
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
new file mode 100644
index 000000000000..49c9558305e3
--- /dev/null
+++ b/fs/f2fs/xattr.h
@@ -0,0 +1,145 @@
1/*
2 * fs/f2fs/xattr.h
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * Portions of this code from linux/fs/ext2/xattr.h
8 *
9 * On-disk format of extended attributes for the ext2 filesystem.
10 *
11 * (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License version 2 as
15 * published by the Free Software Foundation.
16 */
17#ifndef __F2FS_XATTR_H__
18#define __F2FS_XATTR_H__
19
20#include <linux/init.h>
21#include <linux/xattr.h>
22
23/* Magic value in attribute blocks */
24#define F2FS_XATTR_MAGIC 0xF2F52011
25
26/* Maximum number of references to one attribute block */
27#define F2FS_XATTR_REFCOUNT_MAX 1024
28
29/* Name indexes */
30#define F2FS_SYSTEM_ADVISE_PREFIX "system.advise"
31#define F2FS_XATTR_INDEX_USER 1
32#define F2FS_XATTR_INDEX_POSIX_ACL_ACCESS 2
33#define F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT 3
34#define F2FS_XATTR_INDEX_TRUSTED 4
35#define F2FS_XATTR_INDEX_LUSTRE 5
36#define F2FS_XATTR_INDEX_SECURITY 6
37#define F2FS_XATTR_INDEX_ADVISE 7
38
39struct f2fs_xattr_header {
40 __le32 h_magic; /* magic number for identification */
41 __le32 h_refcount; /* reference count */
42 __u32 h_reserved[4]; /* zero right now */
43};
44
45struct f2fs_xattr_entry {
46 __u8 e_name_index;
47 __u8 e_name_len;
48 __le16 e_value_size; /* size of attribute value */
49 char e_name[0]; /* attribute name */
50};
51
52#define XATTR_HDR(ptr) ((struct f2fs_xattr_header *)(ptr))
53#define XATTR_ENTRY(ptr) ((struct f2fs_xattr_entry *)(ptr))
54#define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr)+1))
55#define XATTR_ROUND (3)
56
57#define XATTR_ALIGN(size) ((size + XATTR_ROUND) & ~XATTR_ROUND)
58
59#define ENTRY_SIZE(entry) (XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + \
60 entry->e_name_len + le16_to_cpu(entry->e_value_size)))
61
62#define XATTR_NEXT_ENTRY(entry) ((struct f2fs_xattr_entry *)((char *)(entry) +\
63 ENTRY_SIZE(entry)))
64
65#define IS_XATTR_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
66
67#define list_for_each_xattr(entry, addr) \
68 for (entry = XATTR_FIRST_ENTRY(addr);\
69 !IS_XATTR_LAST_ENTRY(entry);\
70 entry = XATTR_NEXT_ENTRY(entry))
71
72
73#define MIN_OFFSET XATTR_ALIGN(PAGE_SIZE - \
74 sizeof(struct node_footer) - \
75 sizeof(__u32))
76
77#define MAX_VALUE_LEN (MIN_OFFSET - sizeof(struct f2fs_xattr_header) - \
78 sizeof(struct f2fs_xattr_entry))
79
80/*
81 * On-disk structure of f2fs_xattr
82 * We use only 1 block for xattr.
83 *
84 * +--------------------+
85 * | f2fs_xattr_header |
86 * | |
87 * +--------------------+
88 * | f2fs_xattr_entry |
89 * | .e_name_index = 1 |
90 * | .e_name_len = 3 |
91 * | .e_value_size = 14 |
92 * | .e_name = "foo" |
93 * | "value_of_xattr" |<- value_offs = e_name + e_name_len
94 * +--------------------+
95 * | f2fs_xattr_entry |
96 * | .e_name_index = 4 |
97 * | .e_name = "bar" |
98 * +--------------------+
99 * | |
100 * | Free |
101 * | |
102 * +--------------------+<- MIN_OFFSET
103 * | node_footer |
104 * | (nid, ino, offset) |
105 * +--------------------+
106 *
107 **/
108
109#ifdef CONFIG_F2FS_FS_XATTR
110extern const struct xattr_handler f2fs_xattr_user_handler;
111extern const struct xattr_handler f2fs_xattr_trusted_handler;
112extern const struct xattr_handler f2fs_xattr_acl_access_handler;
113extern const struct xattr_handler f2fs_xattr_acl_default_handler;
114extern const struct xattr_handler f2fs_xattr_advise_handler;
115
116extern const struct xattr_handler *f2fs_xattr_handlers[];
117
118extern int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
119 const void *value, size_t value_len);
120extern int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
121 void *buffer, size_t buffer_size);
122extern ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer,
123 size_t buffer_size);
124
125#else
126
127#define f2fs_xattr_handlers NULL
128static inline int f2fs_setxattr(struct inode *inode, int name_index,
129 const char *name, const void *value, size_t value_len)
130{
131 return -EOPNOTSUPP;
132}
133static inline int f2fs_getxattr(struct inode *inode, int name_index,
134 const char *name, void *buffer, size_t buffer_size)
135{
136 return -EOPNOTSUPP;
137}
138static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer,
139 size_t buffer_size)
140{
141 return -EOPNOTSUPP;
142}
143#endif
144
145#endif /* __F2FS_XATTR_H__ */