aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-20 16:54:51 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-20 16:54:52 -0500
commita13eea6bd9ee62ceacfc5243d54c84396bc86cb4 (patch)
tree46192468880c144f2b367deb5188267866ee1fac
parentfcc16882ac4532aaa644bff444f0c5d6228ba71e (diff)
parent6666e6aa9f36b2bfd6b30072c07b34f2a24becf1 (diff)
Merge tag 'for-3.8-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull new F2FS filesystem from Jaegeuk Kim: "Introduce a new file system, Flash-Friendly File System (F2FS), to Linux 3.8. Highlights: - Add initial f2fs source codes - Fix an endian conversion bug - Fix build failures on random configs - Fix the power-off-recovery routine - Minor cleanup, coding style, and typos patches" From the Kconfig help text: F2FS is based on Log-structured File System (LFS), which supports versatile "flash-friendly" features. The design has been focused on addressing the fundamental issues in LFS, which are snowball effect of wandering tree and high cleaning overhead. Since flash-based storages show different characteristics according to the internal geometry or flash memory management schemes aka FTL, F2FS and tools support various parameters not only for configuring on-disk layout, but also for selecting allocation and cleaning algorithms. and there's an article by Neil Brown about it on lwn.net: http://lwn.net/Articles/518988/ * tag 'for-3.8-merge' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (36 commits) f2fs: fix tracking parent inode number f2fs: cleanup the f2fs_bio_alloc routine f2fs: introduce accessor to retrieve number of dentry slots f2fs: remove redundant call to f2fs_put_page in delete entry f2fs: make use of GFP_F2FS_ZERO for setting gfp_mask f2fs: rewrite f2fs_bio_alloc to make it simpler f2fs: fix a typo in f2fs documentation f2fs: remove unused variable f2fs: move error condition for mkdir at proper place f2fs: remove unneeded initialization f2fs: check read only condition before beginning write out f2fs: remove unneeded memset from init_once f2fs: show error in case of invalid mount arguments f2fs: fix the compiler warning for uninitialized use of variable f2fs: resolve build failures f2fs: adjust kernel coding style f2fs: fix endian conversion bugs reported by sparse f2fs: remove unneeded version.h header file from f2fs.h f2fs: update the f2fs document f2fs: update Kconfig and Makefile ...
-rw-r--r--Documentation/filesystems/00-INDEX2
-rw-r--r--Documentation/filesystems/f2fs.txt421
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/Makefile1
-rw-r--r--fs/f2fs/Kconfig53
-rw-r--r--fs/f2fs/Makefile7
-rw-r--r--fs/f2fs/acl.c414
-rw-r--r--fs/f2fs/acl.h57
-rw-r--r--fs/f2fs/checkpoint.c794
-rw-r--r--fs/f2fs/data.c702
-rw-r--r--fs/f2fs/debug.c361
-rw-r--r--fs/f2fs/dir.c672
-rw-r--r--fs/f2fs/f2fs.h1083
-rw-r--r--fs/f2fs/file.c636
-rw-r--r--fs/f2fs/gc.c742
-rw-r--r--fs/f2fs/gc.h117
-rw-r--r--fs/f2fs/hash.c97
-rw-r--r--fs/f2fs/inode.c268
-rw-r--r--fs/f2fs/namei.c503
-rw-r--r--fs/f2fs/node.c1764
-rw-r--r--fs/f2fs/node.h353
-rw-r--r--fs/f2fs/recovery.c375
-rw-r--r--fs/f2fs/segment.c1791
-rw-r--r--fs/f2fs/segment.h618
-rw-r--r--fs/f2fs/super.c657
-rw-r--r--fs/f2fs/xattr.c440
-rw-r--r--fs/f2fs/xattr.h145
-rw-r--r--include/linux/f2fs_fs.h413
-rw-r--r--include/uapi/linux/magic.h1
29 files changed, 13488 insertions, 0 deletions
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index 7b52ba7bf32a..8042050eb265 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -50,6 +50,8 @@ ext4.txt
50 - info, mount options and specifications for the Ext4 filesystem. 50 - info, mount options and specifications for the Ext4 filesystem.
51files.txt 51files.txt
52 - info on file management in the Linux kernel. 52 - info on file management in the Linux kernel.
53f2fs.txt
54 - info and mount options for the F2FS filesystem.
53fuse.txt 55fuse.txt
54 - info on the Filesystem in User SpacE including mount options. 56 - info on the Filesystem in User SpacE including mount options.
55gfs2.txt 57gfs2.txt
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
new file mode 100644
index 000000000000..8fbd8b46ee34
--- /dev/null
+++ b/Documentation/filesystems/f2fs.txt
@@ -0,0 +1,421 @@
1================================================================================
2WHAT IS Flash-Friendly File System (F2FS)?
3================================================================================
4
5NAND flash memory-based storage devices, such as SSD, eMMC, and SD cards, have
6been equipped on a variety systems ranging from mobile to server systems. Since
7they are known to have different characteristics from the conventional rotating
8disks, a file system, an upper layer to the storage device, should adapt to the
9changes from the sketch in the design level.
10
11F2FS is a file system exploiting NAND flash memory-based storage devices, which
12is based on Log-structured File System (LFS). The design has been focused on
13addressing the fundamental issues in LFS, which are snowball effect of wandering
14tree and high cleaning overhead.
15
16Since a NAND flash memory-based storage device shows different characteristic
17according to its internal geometry or flash memory management scheme, namely FTL,
18F2FS and its tools support various parameters not only for configuring on-disk
19layout, but also for selecting allocation and cleaning algorithms.
20
21The file system formatting tool, "mkfs.f2fs", is available from the following
22git tree:
23>> git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git
24
25For reporting bugs and sending patches, please use the following mailing list:
26>> linux-f2fs-devel@lists.sourceforge.net
27
28================================================================================
29BACKGROUND AND DESIGN ISSUES
30================================================================================
31
32Log-structured File System (LFS)
33--------------------------------
34"A log-structured file system writes all modifications to disk sequentially in
35a log-like structure, thereby speeding up both file writing and crash recovery.
36The log is the only structure on disk; it contains indexing information so that
37files can be read back from the log efficiently. In order to maintain large free
38areas on disk for fast writing, we divide the log into segments and use a
39segment cleaner to compress the live information from heavily fragmented
40segments." from Rosenblum, M. and Ousterhout, J. K., 1992, "The design and
41implementation of a log-structured file system", ACM Trans. Computer Systems
4210, 1, 26–52.
43
44Wandering Tree Problem
45----------------------
46In LFS, when a file data is updated and written to the end of log, its direct
47pointer block is updated due to the changed location. Then the indirect pointer
48block is also updated due to the direct pointer block update. In this manner,
49the upper index structures such as inode, inode map, and checkpoint block are
50also updated recursively. This problem is called as wandering tree problem [1],
51and in order to enhance the performance, it should eliminate or relax the update
52propagation as much as possible.
53
54[1] Bityutskiy, A. 2005. JFFS3 design issues. http://www.linux-mtd.infradead.org/
55
56Cleaning Overhead
57-----------------
58Since LFS is based on out-of-place writes, it produces so many obsolete blocks
59scattered across the whole storage. In order to serve new empty log space, it
60needs to reclaim these obsolete blocks seamlessly to users. This job is called
61as a cleaning process.
62
63The process consists of three operations as follows.
641. A victim segment is selected through referencing segment usage table.
652. It loads parent index structures of all the data in the victim identified by
66 segment summary blocks.
673. It checks the cross-reference between the data and its parent index structure.
684. It moves valid data selectively.
69
70This cleaning job may cause unexpected long delays, so the most important goal
71is to hide the latencies to users. And also definitely, it should reduce the
72amount of valid data to be moved, and move them quickly as well.
73
74================================================================================
75KEY FEATURES
76================================================================================
77
78Flash Awareness
79---------------
80- Enlarge the random write area for better performance, but provide the high
81 spatial locality
82- Align FS data structures to the operational units in FTL as best efforts
83
84Wandering Tree Problem
85----------------------
86- Use a term, “node”, that represents inodes as well as various pointer blocks
87- Introduce Node Address Table (NAT) containing the locations of all the “node”
88 blocks; this will cut off the update propagation.
89
90Cleaning Overhead
91-----------------
92- Support a background cleaning process
93- Support greedy and cost-benefit algorithms for victim selection policies
94- Support multi-head logs for static/dynamic hot and cold data separation
95- Introduce adaptive logging for efficient block allocation
96
97================================================================================
98MOUNT OPTIONS
99================================================================================
100
101background_gc_off Turn off cleaning operations, namely garbage collection,
102 triggered in background when I/O subsystem is idle.
103disable_roll_forward Disable the roll-forward recovery routine
104discard Issue discard/TRIM commands when a segment is cleaned.
105no_heap Disable heap-style segment allocation which finds free
106 segments for data from the beginning of main area, while
107 for node from the end of main area.
108nouser_xattr Disable Extended User Attributes. Note: xattr is enabled
109 by default if CONFIG_F2FS_FS_XATTR is selected.
110noacl Disable POSIX Access Control List. Note: acl is enabled
111 by default if CONFIG_F2FS_FS_POSIX_ACL is selected.
112active_logs=%u Support configuring the number of active logs. In the
113 current design, f2fs supports only 2, 4, and 6 logs.
114 Default number is 6.
115disable_ext_identify Disable the extension list configured by mkfs, so f2fs
116 does not aware of cold files such as media files.
117
118================================================================================
119DEBUGFS ENTRIES
120================================================================================
121
122/sys/kernel/debug/f2fs/ contains information about all the partitions mounted as
123f2fs. Each file shows the whole f2fs information.
124
125/sys/kernel/debug/f2fs/status includes:
126 - major file system information managed by f2fs currently
127 - average SIT information about whole segments
128 - current memory footprint consumed by f2fs.
129
130================================================================================
131USAGE
132================================================================================
133
1341. Download userland tools and compile them.
135
1362. Skip, if f2fs was compiled statically inside kernel.
137 Otherwise, insert the f2fs.ko module.
138 # insmod f2fs.ko
139
1403. Create a directory trying to mount
141 # mkdir /mnt/f2fs
142
1434. Format the block device, and then mount as f2fs
144 # mkfs.f2fs -l label /dev/block_device
145 # mount -t f2fs /dev/block_device /mnt/f2fs
146
147Format options
148--------------
149-l [label] : Give a volume label, up to 256 unicode name.
150-a [0 or 1] : Split start location of each area for heap-based allocation.
151 1 is set by default, which performs this.
152-o [int] : Set overprovision ratio in percent over volume size.
153 5 is set by default.
154-s [int] : Set the number of segments per section.
155 1 is set by default.
156-z [int] : Set the number of sections per zone.
157 1 is set by default.
158-e [str] : Set basic extension list. e.g. "mp3,gif,mov"
159
160================================================================================
161DESIGN
162================================================================================
163
164On-disk Layout
165--------------
166
167F2FS divides the whole volume into a number of segments, each of which is fixed
168to 2MB in size. A section is composed of consecutive segments, and a zone
169consists of a set of sections. By default, section and zone sizes are set to one
170segment size identically, but users can easily modify the sizes by mkfs.
171
172F2FS splits the entire volume into six areas, and all the areas except superblock
173consists of multiple segments as described below.
174
175 align with the zone size <-|
176 |-> align with the segment size
177 _________________________________________________________________________
178 | | | Node | Segment | Segment | |
179 | Superblock | Checkpoint | Address | Info. | Summary | Main |
180 | (SB) | (CP) | Table (NAT) | Table (SIT) | Area (SSA) | |
181 |____________|_____2______|______N______|______N______|______N_____|__N___|
182 . .
183 . .
184 . .
185 ._________________________________________.
186 |_Segment_|_..._|_Segment_|_..._|_Segment_|
187 . .
188 ._________._________
189 |_section_|__...__|_
190 . .
191 .________.
192 |__zone__|
193
194- Superblock (SB)
195 : It is located at the beginning of the partition, and there exist two copies
196 to avoid file system crash. It contains basic partition information and some
197 default parameters of f2fs.
198
199- Checkpoint (CP)
200 : It contains file system information, bitmaps for valid NAT/SIT sets, orphan
201 inode lists, and summary entries of current active segments.
202
203- Node Address Table (NAT)
204 : It is composed of a block address table for all the node blocks stored in
205 Main area.
206
207- Segment Information Table (SIT)
208 : It contains segment information such as valid block count and bitmap for the
209 validity of all the blocks.
210
211- Segment Summary Area (SSA)
212 : It contains summary entries which contains the owner information of all the
213 data and node blocks stored in Main area.
214
215- Main Area
216 : It contains file and directory data including their indices.
217
218In order to avoid misalignment between file system and flash-based storage, F2FS
219aligns the start block address of CP with the segment size. Also, it aligns the
220start block address of Main area with the zone size by reserving some segments
221in SSA area.
222
223Reference the following survey for additional technical details.
224https://wiki.linaro.org/WorkingGroups/Kernel/Projects/FlashCardSurvey
225
226File System Metadata Structure
227------------------------------
228
229F2FS adopts the checkpointing scheme to maintain file system consistency. At
230mount time, F2FS first tries to find the last valid checkpoint data by scanning
231CP area. In order to reduce the scanning time, F2FS uses only two copies of CP.
232One of them always indicates the last valid data, which is called as shadow copy
233mechanism. In addition to CP, NAT and SIT also adopt the shadow copy mechanism.
234
235For file system consistency, each CP points to which NAT and SIT copies are
236valid, as shown as below.
237
238 +--------+----------+---------+
239 | CP | NAT | SIT |
240 +--------+----------+---------+
241 . . . .
242 . . . .
243 . . . .
244 +-------+-------+--------+--------+--------+--------+
245 | CP #0 | CP #1 | NAT #0 | NAT #1 | SIT #0 | SIT #1 |
246 +-------+-------+--------+--------+--------+--------+
247 | ^ ^
248 | | |
249 `----------------------------------------'
250
251Index Structure
252---------------
253
254The key data structure to manage the data locations is a "node". Similar to
255traditional file structures, F2FS has three types of node: inode, direct node,
256indirect node. F2FS assigns 4KB to an inode block which contains 923 data block
257indices, two direct node pointers, two indirect node pointers, and one double
258indirect node pointer as described below. One direct node block contains 1018
259data blocks, and one indirect node block contains also 1018 node blocks. Thus,
260one inode block (i.e., a file) covers:
261
262 4KB * (923 + 2 * 1018 + 2 * 1018 * 1018 + 1018 * 1018 * 1018) := 3.94TB.
263
264 Inode block (4KB)
265 |- data (923)
266 |- direct node (2)
267 | `- data (1018)
268 |- indirect node (2)
269 | `- direct node (1018)
270 | `- data (1018)
271 `- double indirect node (1)
272 `- indirect node (1018)
273 `- direct node (1018)
274 `- data (1018)
275
276Note that, all the node blocks are mapped by NAT which means the location of
277each node is translated by the NAT table. In the consideration of the wandering
278tree problem, F2FS is able to cut off the propagation of node updates caused by
279leaf data writes.
280
281Directory Structure
282-------------------
283
284A directory entry occupies 11 bytes, which consists of the following attributes.
285
286- hash hash value of the file name
287- ino inode number
288- len the length of file name
289- type file type such as directory, symlink, etc
290
291A dentry block consists of 214 dentry slots and file names. Therein a bitmap is
292used to represent whether each dentry is valid or not. A dentry block occupies
2934KB with the following composition.
294
295 Dentry Block(4 K) = bitmap (27 bytes) + reserved (3 bytes) +
296 dentries(11 * 214 bytes) + file name (8 * 214 bytes)
297
298 [Bucket]
299 +--------------------------------+
300 |dentry block 1 | dentry block 2 |
301 +--------------------------------+
302 . .
303 . .
304 . [Dentry Block Structure: 4KB] .
305 +--------+----------+----------+------------+
306 | bitmap | reserved | dentries | file names |
307 +--------+----------+----------+------------+
308 [Dentry Block: 4KB] . .
309 . .
310 . .
311 +------+------+-----+------+
312 | hash | ino | len | type |
313 +------+------+-----+------+
314 [Dentry Structure: 11 bytes]
315
316F2FS implements multi-level hash tables for directory structure. Each level has
317a hash table with dedicated number of hash buckets as shown below. Note that
318"A(2B)" means a bucket includes 2 data blocks.
319
320----------------------
321A : bucket
322B : block
323N : MAX_DIR_HASH_DEPTH
324----------------------
325
326level #0 | A(2B)
327 |
328level #1 | A(2B) - A(2B)
329 |
330level #2 | A(2B) - A(2B) - A(2B) - A(2B)
331 . | . . . .
332level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B)
333 . | . . . .
334level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B)
335
336The number of blocks and buckets are determined by,
337
338 ,- 2, if n < MAX_DIR_HASH_DEPTH / 2,
339 # of blocks in level #n = |
340 `- 4, Otherwise
341
342 ,- 2^n, if n < MAX_DIR_HASH_DEPTH / 2,
343 # of buckets in level #n = |
344 `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), Otherwise
345
346When F2FS finds a file name in a directory, at first a hash value of the file
347name is calculated. Then, F2FS scans the hash table in level #0 to find the
348dentry consisting of the file name and its inode number. If not found, F2FS
349scans the next hash table in level #1. In this way, F2FS scans hash tables in
350each levels incrementally from 1 to N. In each levels F2FS needs to scan only
351one bucket determined by the following equation, which shows O(log(# of files))
352complexity.
353
354 bucket number to scan in level #n = (hash value) % (# of buckets in level #n)
355
356In the case of file creation, F2FS finds empty consecutive slots that cover the
357file name. F2FS searches the empty slots in the hash tables of whole levels from
3581 to N in the same way as the lookup operation.
359
360The following figure shows an example of two cases holding children.
361 --------------> Dir <--------------
362 | |
363 child child
364
365 child - child [hole] - child
366
367 child - child - child [hole] - [hole] - child
368
369 Case 1: Case 2:
370 Number of children = 6, Number of children = 3,
371 File size = 7 File size = 7
372
373Default Block Allocation
374------------------------
375
376At runtime, F2FS manages six active logs inside "Main" area: Hot/Warm/Cold node
377and Hot/Warm/Cold data.
378
379- Hot node contains direct node blocks of directories.
380- Warm node contains direct node blocks except hot node blocks.
381- Cold node contains indirect node blocks
382- Hot data contains dentry blocks
383- Warm data contains data blocks except hot and cold data blocks
384- Cold data contains multimedia data or migrated data blocks
385
386LFS has two schemes for free space management: threaded log and copy-and-compac-
387tion. The copy-and-compaction scheme which is known as cleaning, is well-suited
388for devices showing very good sequential write performance, since free segments
389are served all the time for writing new data. However, it suffers from cleaning
390overhead under high utilization. Contrarily, the threaded log scheme suffers
391from random writes, but no cleaning process is needed. F2FS adopts a hybrid
392scheme where the copy-and-compaction scheme is adopted by default, but the
393policy is dynamically changed to the threaded log scheme according to the file
394system status.
395
396In order to align F2FS with underlying flash-based storage, F2FS allocates a
397segment in a unit of section. F2FS expects that the section size would be the
398same as the unit size of garbage collection in FTL. Furthermore, with respect
399to the mapping granularity in FTL, F2FS allocates each section of the active
400logs from different zones as much as possible, since FTL can write the data in
401the active logs into one allocation unit according to its mapping granularity.
402
403Cleaning process
404----------------
405
406F2FS does cleaning both on demand and in the background. On-demand cleaning is
407triggered when there are not enough free segments to serve VFS calls. Background
408cleaner is operated by a kernel thread, and triggers the cleaning job when the
409system is idle.
410
411F2FS supports two victim selection policies: greedy and cost-benefit algorithms.
412In the greedy algorithm, F2FS selects a victim segment having the smallest number
413of valid blocks. In the cost-benefit algorithm, F2FS selects a victim segment
414according to the segment age and the number of valid blocks in order to address
415log block thrashing problem in the greedy algorithm. F2FS adopts the greedy
416algorithm for on-demand cleaner, while background cleaner adopts cost-benefit
417algorithm.
418
419In order to identify whether the data in the victim segment are valid or not,
420F2FS manages a bitmap. Each bit represents the validity of a block, and the
421bitmap is composed of a bit stream covering whole blocks in main area.
diff --git a/fs/Kconfig b/fs/Kconfig
index eaff24a19502..cfe512fd1caf 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -220,6 +220,7 @@ source "fs/pstore/Kconfig"
220source "fs/sysv/Kconfig" 220source "fs/sysv/Kconfig"
221source "fs/ufs/Kconfig" 221source "fs/ufs/Kconfig"
222source "fs/exofs/Kconfig" 222source "fs/exofs/Kconfig"
223source "fs/f2fs/Kconfig"
223 224
224endif # MISC_FILESYSTEMS 225endif # MISC_FILESYSTEMS
225 226
diff --git a/fs/Makefile b/fs/Makefile
index 1d7af79288a0..9d53192236fc 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -123,6 +123,7 @@ obj-$(CONFIG_DEBUG_FS) += debugfs/
123obj-$(CONFIG_OCFS2_FS) += ocfs2/ 123obj-$(CONFIG_OCFS2_FS) += ocfs2/
124obj-$(CONFIG_BTRFS_FS) += btrfs/ 124obj-$(CONFIG_BTRFS_FS) += btrfs/
125obj-$(CONFIG_GFS2_FS) += gfs2/ 125obj-$(CONFIG_GFS2_FS) += gfs2/
126obj-$(CONFIG_F2FS_FS) += f2fs/
126obj-y += exofs/ # Multiple modules 127obj-y += exofs/ # Multiple modules
127obj-$(CONFIG_CEPH_FS) += ceph/ 128obj-$(CONFIG_CEPH_FS) += ceph/
128obj-$(CONFIG_PSTORE) += pstore/ 129obj-$(CONFIG_PSTORE) += pstore/
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
new file mode 100644
index 000000000000..fd27e7e6326e
--- /dev/null
+++ b/fs/f2fs/Kconfig
@@ -0,0 +1,53 @@
1config F2FS_FS
2 tristate "F2FS filesystem support (EXPERIMENTAL)"
3 depends on BLOCK
4 help
5 F2FS is based on Log-structured File System (LFS), which supports
6 versatile "flash-friendly" features. The design has been focused on
7 addressing the fundamental issues in LFS, which are snowball effect
8 of wandering tree and high cleaning overhead.
9
10 Since flash-based storages show different characteristics according to
11 the internal geometry or flash memory management schemes aka FTL, F2FS
12 and tools support various parameters not only for configuring on-disk
13 layout, but also for selecting allocation and cleaning algorithms.
14
15 If unsure, say N.
16
17config F2FS_STAT_FS
18 bool "F2FS Status Information"
19 depends on F2FS_FS && DEBUG_FS
20 default y
21 help
22 /sys/kernel/debug/f2fs/ contains information about all the partitions
23 mounted as f2fs. Each file shows the whole f2fs information.
24
25 /sys/kernel/debug/f2fs/status includes:
26 - major file system information managed by f2fs currently
27 - average SIT information about whole segments
28 - current memory footprint consumed by f2fs.
29
30config F2FS_FS_XATTR
31 bool "F2FS extended attributes"
32 depends on F2FS_FS
33 default y
34 help
35 Extended attributes are name:value pairs associated with inodes by
36 the kernel or by users (see the attr(5) manual page, or visit
37 <http://acl.bestbits.at/> for details).
38
39 If unsure, say N.
40
41config F2FS_FS_POSIX_ACL
42 bool "F2FS Access Control Lists"
43 depends on F2FS_FS_XATTR
44 select FS_POSIX_ACL
45 default y
46 help
47 Posix Access Control Lists (ACLs) support permissions for users and
48 gourps beyond the owner/group/world scheme.
49
50 To learn more about Access Control Lists, visit the POSIX ACLs for
51 Linux website <http://acl.bestbits.at/>.
52
53 If you don't know what Access Control Lists are, say N
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
new file mode 100644
index 000000000000..27a0820340b9
--- /dev/null
+++ b/fs/f2fs/Makefile
@@ -0,0 +1,7 @@
1obj-$(CONFIG_F2FS_FS) += f2fs.o
2
3f2fs-y := dir.o file.o inode.o namei.o hash.o super.o
4f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o
5f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
6f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
7f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
new file mode 100644
index 000000000000..fed74d193ffb
--- /dev/null
+++ b/fs/f2fs/acl.c
@@ -0,0 +1,414 @@
1/*
2 * fs/f2fs/acl.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * Portions of this code from linux/fs/ext2/acl.c
8 *
9 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 */
15#include <linux/f2fs_fs.h>
16#include "f2fs.h"
17#include "xattr.h"
18#include "acl.h"
19
20#define get_inode_mode(i) ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
21 (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
22
23static inline size_t f2fs_acl_size(int count)
24{
25 if (count <= 4) {
26 return sizeof(struct f2fs_acl_header) +
27 count * sizeof(struct f2fs_acl_entry_short);
28 } else {
29 return sizeof(struct f2fs_acl_header) +
30 4 * sizeof(struct f2fs_acl_entry_short) +
31 (count - 4) * sizeof(struct f2fs_acl_entry);
32 }
33}
34
35static inline int f2fs_acl_count(size_t size)
36{
37 ssize_t s;
38 size -= sizeof(struct f2fs_acl_header);
39 s = size - 4 * sizeof(struct f2fs_acl_entry_short);
40 if (s < 0) {
41 if (size % sizeof(struct f2fs_acl_entry_short))
42 return -1;
43 return size / sizeof(struct f2fs_acl_entry_short);
44 } else {
45 if (s % sizeof(struct f2fs_acl_entry))
46 return -1;
47 return s / sizeof(struct f2fs_acl_entry) + 4;
48 }
49}
50
51static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size)
52{
53 int i, count;
54 struct posix_acl *acl;
55 struct f2fs_acl_header *hdr = (struct f2fs_acl_header *)value;
56 struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1);
57 const char *end = value + size;
58
59 if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION))
60 return ERR_PTR(-EINVAL);
61
62 count = f2fs_acl_count(size);
63 if (count < 0)
64 return ERR_PTR(-EINVAL);
65 if (count == 0)
66 return NULL;
67
68 acl = posix_acl_alloc(count, GFP_KERNEL);
69 if (!acl)
70 return ERR_PTR(-ENOMEM);
71
72 for (i = 0; i < count; i++) {
73
74 if ((char *)entry > end)
75 goto fail;
76
77 acl->a_entries[i].e_tag = le16_to_cpu(entry->e_tag);
78 acl->a_entries[i].e_perm = le16_to_cpu(entry->e_perm);
79
80 switch (acl->a_entries[i].e_tag) {
81 case ACL_USER_OBJ:
82 case ACL_GROUP_OBJ:
83 case ACL_MASK:
84 case ACL_OTHER:
85 acl->a_entries[i].e_id = ACL_UNDEFINED_ID;
86 entry = (struct f2fs_acl_entry *)((char *)entry +
87 sizeof(struct f2fs_acl_entry_short));
88 break;
89
90 case ACL_USER:
91 acl->a_entries[i].e_uid =
92 make_kuid(&init_user_ns,
93 le32_to_cpu(entry->e_id));
94 entry = (struct f2fs_acl_entry *)((char *)entry +
95 sizeof(struct f2fs_acl_entry));
96 break;
97 case ACL_GROUP:
98 acl->a_entries[i].e_gid =
99 make_kgid(&init_user_ns,
100 le32_to_cpu(entry->e_id));
101 entry = (struct f2fs_acl_entry *)((char *)entry +
102 sizeof(struct f2fs_acl_entry));
103 break;
104 default:
105 goto fail;
106 }
107 }
108 if ((char *)entry != end)
109 goto fail;
110 return acl;
111fail:
112 posix_acl_release(acl);
113 return ERR_PTR(-EINVAL);
114}
115
116static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size)
117{
118 struct f2fs_acl_header *f2fs_acl;
119 struct f2fs_acl_entry *entry;
120 int i;
121
122 f2fs_acl = kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count *
123 sizeof(struct f2fs_acl_entry), GFP_KERNEL);
124 if (!f2fs_acl)
125 return ERR_PTR(-ENOMEM);
126
127 f2fs_acl->a_version = cpu_to_le32(F2FS_ACL_VERSION);
128 entry = (struct f2fs_acl_entry *)(f2fs_acl + 1);
129
130 for (i = 0; i < acl->a_count; i++) {
131
132 entry->e_tag = cpu_to_le16(acl->a_entries[i].e_tag);
133 entry->e_perm = cpu_to_le16(acl->a_entries[i].e_perm);
134
135 switch (acl->a_entries[i].e_tag) {
136 case ACL_USER:
137 entry->e_id = cpu_to_le32(
138 from_kuid(&init_user_ns,
139 acl->a_entries[i].e_uid));
140 entry = (struct f2fs_acl_entry *)((char *)entry +
141 sizeof(struct f2fs_acl_entry));
142 break;
143 case ACL_GROUP:
144 entry->e_id = cpu_to_le32(
145 from_kgid(&init_user_ns,
146 acl->a_entries[i].e_gid));
147 entry = (struct f2fs_acl_entry *)((char *)entry +
148 sizeof(struct f2fs_acl_entry));
149 break;
150 case ACL_USER_OBJ:
151 case ACL_GROUP_OBJ:
152 case ACL_MASK:
153 case ACL_OTHER:
154 entry = (struct f2fs_acl_entry *)((char *)entry +
155 sizeof(struct f2fs_acl_entry_short));
156 break;
157 default:
158 goto fail;
159 }
160 }
161 *size = f2fs_acl_size(acl->a_count);
162 return (void *)f2fs_acl;
163
164fail:
165 kfree(f2fs_acl);
166 return ERR_PTR(-EINVAL);
167}
168
169struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
170{
171 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
172 int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT;
173 void *value = NULL;
174 struct posix_acl *acl;
175 int retval;
176
177 if (!test_opt(sbi, POSIX_ACL))
178 return NULL;
179
180 acl = get_cached_acl(inode, type);
181 if (acl != ACL_NOT_CACHED)
182 return acl;
183
184 if (type == ACL_TYPE_ACCESS)
185 name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
186
187 retval = f2fs_getxattr(inode, name_index, "", NULL, 0);
188 if (retval > 0) {
189 value = kmalloc(retval, GFP_KERNEL);
190 if (!value)
191 return ERR_PTR(-ENOMEM);
192 retval = f2fs_getxattr(inode, name_index, "", value, retval);
193 }
194
195 if (retval < 0) {
196 if (retval == -ENODATA)
197 acl = NULL;
198 else
199 acl = ERR_PTR(retval);
200 } else {
201 acl = f2fs_acl_from_disk(value, retval);
202 }
203 kfree(value);
204 if (!IS_ERR(acl))
205 set_cached_acl(inode, type, acl);
206
207 return acl;
208}
209
210static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
211{
212 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
213 struct f2fs_inode_info *fi = F2FS_I(inode);
214 int name_index;
215 void *value = NULL;
216 size_t size = 0;
217 int error;
218
219 if (!test_opt(sbi, POSIX_ACL))
220 return 0;
221 if (S_ISLNK(inode->i_mode))
222 return -EOPNOTSUPP;
223
224 switch (type) {
225 case ACL_TYPE_ACCESS:
226 name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
227 if (acl) {
228 error = posix_acl_equiv_mode(acl, &inode->i_mode);
229 if (error < 0)
230 return error;
231 set_acl_inode(fi, inode->i_mode);
232 if (error == 0)
233 acl = NULL;
234 }
235 break;
236
237 case ACL_TYPE_DEFAULT:
238 name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT;
239 if (!S_ISDIR(inode->i_mode))
240 return acl ? -EACCES : 0;
241 break;
242
243 default:
244 return -EINVAL;
245 }
246
247 if (acl) {
248 value = f2fs_acl_to_disk(acl, &size);
249 if (IS_ERR(value)) {
250 cond_clear_inode_flag(fi, FI_ACL_MODE);
251 return (int)PTR_ERR(value);
252 }
253 }
254
255 error = f2fs_setxattr(inode, name_index, "", value, size);
256
257 kfree(value);
258 if (!error)
259 set_cached_acl(inode, type, acl);
260
261 cond_clear_inode_flag(fi, FI_ACL_MODE);
262 return error;
263}
264
265int f2fs_init_acl(struct inode *inode, struct inode *dir)
266{
267 struct posix_acl *acl = NULL;
268 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
269 int error = 0;
270
271 if (!S_ISLNK(inode->i_mode)) {
272 if (test_opt(sbi, POSIX_ACL)) {
273 acl = f2fs_get_acl(dir, ACL_TYPE_DEFAULT);
274 if (IS_ERR(acl))
275 return PTR_ERR(acl);
276 }
277 if (!acl)
278 inode->i_mode &= ~current_umask();
279 }
280
281 if (test_opt(sbi, POSIX_ACL) && acl) {
282
283 if (S_ISDIR(inode->i_mode)) {
284 error = f2fs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
285 if (error)
286 goto cleanup;
287 }
288 error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode);
289 if (error < 0)
290 return error;
291 if (error > 0)
292 error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl);
293 }
294cleanup:
295 posix_acl_release(acl);
296 return error;
297}
298
299int f2fs_acl_chmod(struct inode *inode)
300{
301 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
302 struct posix_acl *acl;
303 int error;
304 mode_t mode = get_inode_mode(inode);
305
306 if (!test_opt(sbi, POSIX_ACL))
307 return 0;
308 if (S_ISLNK(mode))
309 return -EOPNOTSUPP;
310
311 acl = f2fs_get_acl(inode, ACL_TYPE_ACCESS);
312 if (IS_ERR(acl) || !acl)
313 return PTR_ERR(acl);
314
315 error = posix_acl_chmod(&acl, GFP_KERNEL, mode);
316 if (error)
317 return error;
318 error = f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl);
319 posix_acl_release(acl);
320 return error;
321}
322
323static size_t f2fs_xattr_list_acl(struct dentry *dentry, char *list,
324 size_t list_size, const char *name, size_t name_len, int type)
325{
326 struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
327 const char *xname = POSIX_ACL_XATTR_DEFAULT;
328 size_t size;
329
330 if (!test_opt(sbi, POSIX_ACL))
331 return 0;
332
333 if (type == ACL_TYPE_ACCESS)
334 xname = POSIX_ACL_XATTR_ACCESS;
335
336 size = strlen(xname) + 1;
337 if (list && size <= list_size)
338 memcpy(list, xname, size);
339 return size;
340}
341
342static int f2fs_xattr_get_acl(struct dentry *dentry, const char *name,
343 void *buffer, size_t size, int type)
344{
345 struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
346 struct posix_acl *acl;
347 int error;
348
349 if (strcmp(name, "") != 0)
350 return -EINVAL;
351 if (!test_opt(sbi, POSIX_ACL))
352 return -EOPNOTSUPP;
353
354 acl = f2fs_get_acl(dentry->d_inode, type);
355 if (IS_ERR(acl))
356 return PTR_ERR(acl);
357 if (!acl)
358 return -ENODATA;
359 error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
360 posix_acl_release(acl);
361
362 return error;
363}
364
365static int f2fs_xattr_set_acl(struct dentry *dentry, const char *name,
366 const void *value, size_t size, int flags, int type)
367{
368 struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
369 struct inode *inode = dentry->d_inode;
370 struct posix_acl *acl = NULL;
371 int error;
372
373 if (strcmp(name, "") != 0)
374 return -EINVAL;
375 if (!test_opt(sbi, POSIX_ACL))
376 return -EOPNOTSUPP;
377 if (!inode_owner_or_capable(inode))
378 return -EPERM;
379
380 if (value) {
381 acl = posix_acl_from_xattr(&init_user_ns, value, size);
382 if (IS_ERR(acl))
383 return PTR_ERR(acl);
384 if (acl) {
385 error = posix_acl_valid(acl);
386 if (error)
387 goto release_and_out;
388 }
389 } else {
390 acl = NULL;
391 }
392
393 error = f2fs_set_acl(inode, type, acl);
394
395release_and_out:
396 posix_acl_release(acl);
397 return error;
398}
399
400const struct xattr_handler f2fs_xattr_acl_default_handler = {
401 .prefix = POSIX_ACL_XATTR_DEFAULT,
402 .flags = ACL_TYPE_DEFAULT,
403 .list = f2fs_xattr_list_acl,
404 .get = f2fs_xattr_get_acl,
405 .set = f2fs_xattr_set_acl,
406};
407
408const struct xattr_handler f2fs_xattr_acl_access_handler = {
409 .prefix = POSIX_ACL_XATTR_ACCESS,
410 .flags = ACL_TYPE_ACCESS,
411 .list = f2fs_xattr_list_acl,
412 .get = f2fs_xattr_get_acl,
413 .set = f2fs_xattr_set_acl,
414};
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
new file mode 100644
index 000000000000..80f430674417
--- /dev/null
+++ b/fs/f2fs/acl.h
@@ -0,0 +1,57 @@
1/*
2 * fs/f2fs/acl.h
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * Portions of this code from linux/fs/ext2/acl.h
8 *
9 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 */
15#ifndef __F2FS_ACL_H__
16#define __F2FS_ACL_H__
17
18#include <linux/posix_acl_xattr.h>
19
20#define F2FS_ACL_VERSION 0x0001
21
22struct f2fs_acl_entry {
23 __le16 e_tag;
24 __le16 e_perm;
25 __le32 e_id;
26};
27
28struct f2fs_acl_entry_short {
29 __le16 e_tag;
30 __le16 e_perm;
31};
32
33struct f2fs_acl_header {
34 __le32 a_version;
35};
36
37#ifdef CONFIG_F2FS_FS_POSIX_ACL
38
39extern struct posix_acl *f2fs_get_acl(struct inode *inode, int type);
40extern int f2fs_acl_chmod(struct inode *inode);
41extern int f2fs_init_acl(struct inode *inode, struct inode *dir);
42#else
43#define f2fs_check_acl NULL
44#define f2fs_get_acl NULL
45#define f2fs_set_acl NULL
46
47static inline int f2fs_acl_chmod(struct inode *inode)
48{
49 return 0;
50}
51
52static inline int f2fs_init_acl(struct inode *inode, struct inode *dir)
53{
54 return 0;
55}
56#endif
57#endif /* __F2FS_ACL_H__ */
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
new file mode 100644
index 000000000000..6ef36c37e2be
--- /dev/null
+++ b/fs/f2fs/checkpoint.c
@@ -0,0 +1,794 @@
1/*
2 * fs/f2fs/checkpoint.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/bio.h>
13#include <linux/mpage.h>
14#include <linux/writeback.h>
15#include <linux/blkdev.h>
16#include <linux/f2fs_fs.h>
17#include <linux/pagevec.h>
18#include <linux/swap.h>
19
20#include "f2fs.h"
21#include "node.h"
22#include "segment.h"
23
24static struct kmem_cache *orphan_entry_slab;
25static struct kmem_cache *inode_entry_slab;
26
27/*
28 * We guarantee no failure on the returned page.
29 */
30struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
31{
32 struct address_space *mapping = sbi->meta_inode->i_mapping;
33 struct page *page = NULL;
34repeat:
35 page = grab_cache_page(mapping, index);
36 if (!page) {
37 cond_resched();
38 goto repeat;
39 }
40
41 /* We wait writeback only inside grab_meta_page() */
42 wait_on_page_writeback(page);
43 SetPageUptodate(page);
44 return page;
45}
46
47/*
48 * We guarantee no failure on the returned page.
49 */
50struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
51{
52 struct address_space *mapping = sbi->meta_inode->i_mapping;
53 struct page *page;
54repeat:
55 page = grab_cache_page(mapping, index);
56 if (!page) {
57 cond_resched();
58 goto repeat;
59 }
60 if (f2fs_readpage(sbi, page, index, READ_SYNC)) {
61 f2fs_put_page(page, 1);
62 goto repeat;
63 }
64 mark_page_accessed(page);
65
66 /* We do not allow returning an errorneous page */
67 return page;
68}
69
70static int f2fs_write_meta_page(struct page *page,
71 struct writeback_control *wbc)
72{
73 struct inode *inode = page->mapping->host;
74 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
75 int err;
76
77 wait_on_page_writeback(page);
78
79 err = write_meta_page(sbi, page, wbc);
80 if (err) {
81 wbc->pages_skipped++;
82 set_page_dirty(page);
83 }
84
85 dec_page_count(sbi, F2FS_DIRTY_META);
86
87 /* In this case, we should not unlock this page */
88 if (err != AOP_WRITEPAGE_ACTIVATE)
89 unlock_page(page);
90 return err;
91}
92
93static int f2fs_write_meta_pages(struct address_space *mapping,
94 struct writeback_control *wbc)
95{
96 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
97 struct block_device *bdev = sbi->sb->s_bdev;
98 long written;
99
100 if (wbc->for_kupdate)
101 return 0;
102
103 if (get_pages(sbi, F2FS_DIRTY_META) == 0)
104 return 0;
105
106 /* if mounting is failed, skip writing node pages */
107 mutex_lock(&sbi->cp_mutex);
108 written = sync_meta_pages(sbi, META, bio_get_nr_vecs(bdev));
109 mutex_unlock(&sbi->cp_mutex);
110 wbc->nr_to_write -= written;
111 return 0;
112}
113
114long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
115 long nr_to_write)
116{
117 struct address_space *mapping = sbi->meta_inode->i_mapping;
118 pgoff_t index = 0, end = LONG_MAX;
119 struct pagevec pvec;
120 long nwritten = 0;
121 struct writeback_control wbc = {
122 .for_reclaim = 0,
123 };
124
125 pagevec_init(&pvec, 0);
126
127 while (index <= end) {
128 int i, nr_pages;
129 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
130 PAGECACHE_TAG_DIRTY,
131 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
132 if (nr_pages == 0)
133 break;
134
135 for (i = 0; i < nr_pages; i++) {
136 struct page *page = pvec.pages[i];
137 lock_page(page);
138 BUG_ON(page->mapping != mapping);
139 BUG_ON(!PageDirty(page));
140 clear_page_dirty_for_io(page);
141 f2fs_write_meta_page(page, &wbc);
142 if (nwritten++ >= nr_to_write)
143 break;
144 }
145 pagevec_release(&pvec);
146 cond_resched();
147 }
148
149 if (nwritten)
150 f2fs_submit_bio(sbi, type, nr_to_write == LONG_MAX);
151
152 return nwritten;
153}
154
155static int f2fs_set_meta_page_dirty(struct page *page)
156{
157 struct address_space *mapping = page->mapping;
158 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
159
160 SetPageUptodate(page);
161 if (!PageDirty(page)) {
162 __set_page_dirty_nobuffers(page);
163 inc_page_count(sbi, F2FS_DIRTY_META);
164 F2FS_SET_SB_DIRT(sbi);
165 return 1;
166 }
167 return 0;
168}
169
170const struct address_space_operations f2fs_meta_aops = {
171 .writepage = f2fs_write_meta_page,
172 .writepages = f2fs_write_meta_pages,
173 .set_page_dirty = f2fs_set_meta_page_dirty,
174};
175
176int check_orphan_space(struct f2fs_sb_info *sbi)
177{
178 unsigned int max_orphans;
179 int err = 0;
180
181 /*
182 * considering 512 blocks in a segment 5 blocks are needed for cp
183 * and log segment summaries. Remaining blocks are used to keep
184 * orphan entries with the limitation one reserved segment
185 * for cp pack we can have max 1020*507 orphan entries
186 */
187 max_orphans = (sbi->blocks_per_seg - 5) * F2FS_ORPHANS_PER_BLOCK;
188 mutex_lock(&sbi->orphan_inode_mutex);
189 if (sbi->n_orphans >= max_orphans)
190 err = -ENOSPC;
191 mutex_unlock(&sbi->orphan_inode_mutex);
192 return err;
193}
194
195void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
196{
197 struct list_head *head, *this;
198 struct orphan_inode_entry *new = NULL, *orphan = NULL;
199
200 mutex_lock(&sbi->orphan_inode_mutex);
201 head = &sbi->orphan_inode_list;
202 list_for_each(this, head) {
203 orphan = list_entry(this, struct orphan_inode_entry, list);
204 if (orphan->ino == ino)
205 goto out;
206 if (orphan->ino > ino)
207 break;
208 orphan = NULL;
209 }
210retry:
211 new = kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
212 if (!new) {
213 cond_resched();
214 goto retry;
215 }
216 new->ino = ino;
217 INIT_LIST_HEAD(&new->list);
218
219 /* add new_oentry into list which is sorted by inode number */
220 if (orphan) {
221 struct orphan_inode_entry *prev;
222
223 /* get previous entry */
224 prev = list_entry(orphan->list.prev, typeof(*prev), list);
225 if (&prev->list != head)
226 /* insert new orphan inode entry */
227 list_add(&new->list, &prev->list);
228 else
229 list_add(&new->list, head);
230 } else {
231 list_add_tail(&new->list, head);
232 }
233 sbi->n_orphans++;
234out:
235 mutex_unlock(&sbi->orphan_inode_mutex);
236}
237
238void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
239{
240 struct list_head *this, *next, *head;
241 struct orphan_inode_entry *orphan;
242
243 mutex_lock(&sbi->orphan_inode_mutex);
244 head = &sbi->orphan_inode_list;
245 list_for_each_safe(this, next, head) {
246 orphan = list_entry(this, struct orphan_inode_entry, list);
247 if (orphan->ino == ino) {
248 list_del(&orphan->list);
249 kmem_cache_free(orphan_entry_slab, orphan);
250 sbi->n_orphans--;
251 break;
252 }
253 }
254 mutex_unlock(&sbi->orphan_inode_mutex);
255}
256
257static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
258{
259 struct inode *inode = f2fs_iget(sbi->sb, ino);
260 BUG_ON(IS_ERR(inode));
261 clear_nlink(inode);
262
263 /* truncate all the data during iput */
264 iput(inode);
265}
266
267int recover_orphan_inodes(struct f2fs_sb_info *sbi)
268{
269 block_t start_blk, orphan_blkaddr, i, j;
270
271 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
272 return 0;
273
274 sbi->por_doing = 1;
275 start_blk = __start_cp_addr(sbi) + 1;
276 orphan_blkaddr = __start_sum_addr(sbi) - 1;
277
278 for (i = 0; i < orphan_blkaddr; i++) {
279 struct page *page = get_meta_page(sbi, start_blk + i);
280 struct f2fs_orphan_block *orphan_blk;
281
282 orphan_blk = (struct f2fs_orphan_block *)page_address(page);
283 for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
284 nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
285 recover_orphan_inode(sbi, ino);
286 }
287 f2fs_put_page(page, 1);
288 }
289 /* clear Orphan Flag */
290 clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
291 sbi->por_doing = 0;
292 return 0;
293}
294
295static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
296{
297 struct list_head *head, *this, *next;
298 struct f2fs_orphan_block *orphan_blk = NULL;
299 struct page *page = NULL;
300 unsigned int nentries = 0;
301 unsigned short index = 1;
302 unsigned short orphan_blocks;
303
304 orphan_blocks = (unsigned short)((sbi->n_orphans +
305 (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
306
307 mutex_lock(&sbi->orphan_inode_mutex);
308 head = &sbi->orphan_inode_list;
309
310 /* loop for each orphan inode entry and write them in Jornal block */
311 list_for_each_safe(this, next, head) {
312 struct orphan_inode_entry *orphan;
313
314 orphan = list_entry(this, struct orphan_inode_entry, list);
315
316 if (nentries == F2FS_ORPHANS_PER_BLOCK) {
317 /*
318 * an orphan block is full of 1020 entries,
319 * then we need to flush current orphan blocks
320 * and bring another one in memory
321 */
322 orphan_blk->blk_addr = cpu_to_le16(index);
323 orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
324 orphan_blk->entry_count = cpu_to_le32(nentries);
325 set_page_dirty(page);
326 f2fs_put_page(page, 1);
327 index++;
328 start_blk++;
329 nentries = 0;
330 page = NULL;
331 }
332 if (page)
333 goto page_exist;
334
335 page = grab_meta_page(sbi, start_blk);
336 orphan_blk = (struct f2fs_orphan_block *)page_address(page);
337 memset(orphan_blk, 0, sizeof(*orphan_blk));
338page_exist:
339 orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
340 }
341 if (!page)
342 goto end;
343
344 orphan_blk->blk_addr = cpu_to_le16(index);
345 orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
346 orphan_blk->entry_count = cpu_to_le32(nentries);
347 set_page_dirty(page);
348 f2fs_put_page(page, 1);
349end:
350 mutex_unlock(&sbi->orphan_inode_mutex);
351}
352
353static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
354 block_t cp_addr, unsigned long long *version)
355{
356 struct page *cp_page_1, *cp_page_2 = NULL;
357 unsigned long blk_size = sbi->blocksize;
358 struct f2fs_checkpoint *cp_block;
359 unsigned long long cur_version = 0, pre_version = 0;
360 unsigned int crc = 0;
361 size_t crc_offset;
362
363 /* Read the 1st cp block in this CP pack */
364 cp_page_1 = get_meta_page(sbi, cp_addr);
365
366 /* get the version number */
367 cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1);
368 crc_offset = le32_to_cpu(cp_block->checksum_offset);
369 if (crc_offset >= blk_size)
370 goto invalid_cp1;
371
372 crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset);
373 if (!f2fs_crc_valid(crc, cp_block, crc_offset))
374 goto invalid_cp1;
375
376 pre_version = le64_to_cpu(cp_block->checkpoint_ver);
377
378 /* Read the 2nd cp block in this CP pack */
379 cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
380 cp_page_2 = get_meta_page(sbi, cp_addr);
381
382 cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2);
383 crc_offset = le32_to_cpu(cp_block->checksum_offset);
384 if (crc_offset >= blk_size)
385 goto invalid_cp2;
386
387 crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset);
388 if (!f2fs_crc_valid(crc, cp_block, crc_offset))
389 goto invalid_cp2;
390
391 cur_version = le64_to_cpu(cp_block->checkpoint_ver);
392
393 if (cur_version == pre_version) {
394 *version = cur_version;
395 f2fs_put_page(cp_page_2, 1);
396 return cp_page_1;
397 }
398invalid_cp2:
399 f2fs_put_page(cp_page_2, 1);
400invalid_cp1:
401 f2fs_put_page(cp_page_1, 1);
402 return NULL;
403}
404
405int get_valid_checkpoint(struct f2fs_sb_info *sbi)
406{
407 struct f2fs_checkpoint *cp_block;
408 struct f2fs_super_block *fsb = sbi->raw_super;
409 struct page *cp1, *cp2, *cur_page;
410 unsigned long blk_size = sbi->blocksize;
411 unsigned long long cp1_version = 0, cp2_version = 0;
412 unsigned long long cp_start_blk_no;
413
414 sbi->ckpt = kzalloc(blk_size, GFP_KERNEL);
415 if (!sbi->ckpt)
416 return -ENOMEM;
417 /*
418 * Finding out valid cp block involves read both
419 * sets( cp pack1 and cp pack 2)
420 */
421 cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
422 cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
423
424 /* The second checkpoint pack should start at the next segment */
425 cp_start_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
426 cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);
427
428 if (cp1 && cp2) {
429 if (ver_after(cp2_version, cp1_version))
430 cur_page = cp2;
431 else
432 cur_page = cp1;
433 } else if (cp1) {
434 cur_page = cp1;
435 } else if (cp2) {
436 cur_page = cp2;
437 } else {
438 goto fail_no_cp;
439 }
440
441 cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
442 memcpy(sbi->ckpt, cp_block, blk_size);
443
444 f2fs_put_page(cp1, 1);
445 f2fs_put_page(cp2, 1);
446 return 0;
447
448fail_no_cp:
449 kfree(sbi->ckpt);
450 return -EINVAL;
451}
452
453void set_dirty_dir_page(struct inode *inode, struct page *page)
454{
455 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
456 struct list_head *head = &sbi->dir_inode_list;
457 struct dir_inode_entry *new;
458 struct list_head *this;
459
460 if (!S_ISDIR(inode->i_mode))
461 return;
462retry:
463 new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
464 if (!new) {
465 cond_resched();
466 goto retry;
467 }
468 new->inode = inode;
469 INIT_LIST_HEAD(&new->list);
470
471 spin_lock(&sbi->dir_inode_lock);
472 list_for_each(this, head) {
473 struct dir_inode_entry *entry;
474 entry = list_entry(this, struct dir_inode_entry, list);
475 if (entry->inode == inode) {
476 kmem_cache_free(inode_entry_slab, new);
477 goto out;
478 }
479 }
480 list_add_tail(&new->list, head);
481 sbi->n_dirty_dirs++;
482
483 BUG_ON(!S_ISDIR(inode->i_mode));
484out:
485 inc_page_count(sbi, F2FS_DIRTY_DENTS);
486 inode_inc_dirty_dents(inode);
487 SetPagePrivate(page);
488
489 spin_unlock(&sbi->dir_inode_lock);
490}
491
492void remove_dirty_dir_inode(struct inode *inode)
493{
494 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
495 struct list_head *head = &sbi->dir_inode_list;
496 struct list_head *this;
497
498 if (!S_ISDIR(inode->i_mode))
499 return;
500
501 spin_lock(&sbi->dir_inode_lock);
502 if (atomic_read(&F2FS_I(inode)->dirty_dents))
503 goto out;
504
505 list_for_each(this, head) {
506 struct dir_inode_entry *entry;
507 entry = list_entry(this, struct dir_inode_entry, list);
508 if (entry->inode == inode) {
509 list_del(&entry->list);
510 kmem_cache_free(inode_entry_slab, entry);
511 sbi->n_dirty_dirs--;
512 break;
513 }
514 }
515out:
516 spin_unlock(&sbi->dir_inode_lock);
517}
518
519void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
520{
521 struct list_head *head = &sbi->dir_inode_list;
522 struct dir_inode_entry *entry;
523 struct inode *inode;
524retry:
525 spin_lock(&sbi->dir_inode_lock);
526 if (list_empty(head)) {
527 spin_unlock(&sbi->dir_inode_lock);
528 return;
529 }
530 entry = list_entry(head->next, struct dir_inode_entry, list);
531 inode = igrab(entry->inode);
532 spin_unlock(&sbi->dir_inode_lock);
533 if (inode) {
534 filemap_flush(inode->i_mapping);
535 iput(inode);
536 } else {
537 /*
538 * We should submit bio, since it exists several
539 * wribacking dentry pages in the freeing inode.
540 */
541 f2fs_submit_bio(sbi, DATA, true);
542 }
543 goto retry;
544}
545
546/*
547 * Freeze all the FS-operations for checkpoint.
548 */
549void block_operations(struct f2fs_sb_info *sbi)
550{
551 int t;
552 struct writeback_control wbc = {
553 .sync_mode = WB_SYNC_ALL,
554 .nr_to_write = LONG_MAX,
555 .for_reclaim = 0,
556 };
557
558 /* Stop renaming operation */
559 mutex_lock_op(sbi, RENAME);
560 mutex_lock_op(sbi, DENTRY_OPS);
561
562retry_dents:
563 /* write all the dirty dentry pages */
564 sync_dirty_dir_inodes(sbi);
565
566 mutex_lock_op(sbi, DATA_WRITE);
567 if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
568 mutex_unlock_op(sbi, DATA_WRITE);
569 goto retry_dents;
570 }
571
572 /* block all the operations */
573 for (t = DATA_NEW; t <= NODE_TRUNC; t++)
574 mutex_lock_op(sbi, t);
575
576 mutex_lock(&sbi->write_inode);
577
578 /*
579 * POR: we should ensure that there is no dirty node pages
580 * until finishing nat/sit flush.
581 */
582retry:
583 sync_node_pages(sbi, 0, &wbc);
584
585 mutex_lock_op(sbi, NODE_WRITE);
586
587 if (get_pages(sbi, F2FS_DIRTY_NODES)) {
588 mutex_unlock_op(sbi, NODE_WRITE);
589 goto retry;
590 }
591 mutex_unlock(&sbi->write_inode);
592}
593
594static void unblock_operations(struct f2fs_sb_info *sbi)
595{
596 int t;
597 for (t = NODE_WRITE; t >= RENAME; t--)
598 mutex_unlock_op(sbi, t);
599}
600
601static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
602{
603 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
604 nid_t last_nid = 0;
605 block_t start_blk;
606 struct page *cp_page;
607 unsigned int data_sum_blocks, orphan_blocks;
608 unsigned int crc32 = 0;
609 void *kaddr;
610 int i;
611
612 /* Flush all the NAT/SIT pages */
613 while (get_pages(sbi, F2FS_DIRTY_META))
614 sync_meta_pages(sbi, META, LONG_MAX);
615
616 next_free_nid(sbi, &last_nid);
617
618 /*
619 * modify checkpoint
620 * version number is already updated
621 */
622 ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
623 ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
624 ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
625 for (i = 0; i < 3; i++) {
626 ckpt->cur_node_segno[i] =
627 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
628 ckpt->cur_node_blkoff[i] =
629 cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
630 ckpt->alloc_type[i + CURSEG_HOT_NODE] =
631 curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
632 }
633 for (i = 0; i < 3; i++) {
634 ckpt->cur_data_segno[i] =
635 cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
636 ckpt->cur_data_blkoff[i] =
637 cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
638 ckpt->alloc_type[i + CURSEG_HOT_DATA] =
639 curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
640 }
641
642 ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
643 ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
644 ckpt->next_free_nid = cpu_to_le32(last_nid);
645
646 /* 2 cp + n data seg summary + orphan inode blocks */
647 data_sum_blocks = npages_for_summary_flush(sbi);
648 if (data_sum_blocks < 3)
649 set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
650 else
651 clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
652
653 orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1)
654 / F2FS_ORPHANS_PER_BLOCK;
655 ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks);
656
657 if (is_umount) {
658 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
659 ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
660 data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE);
661 } else {
662 clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
663 ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
664 data_sum_blocks + orphan_blocks);
665 }
666
667 if (sbi->n_orphans)
668 set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
669 else
670 clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
671
672 /* update SIT/NAT bitmap */
673 get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
674 get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
675
676 crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset));
677 *(__le32 *)((unsigned char *)ckpt +
678 le32_to_cpu(ckpt->checksum_offset))
679 = cpu_to_le32(crc32);
680
681 start_blk = __start_cp_addr(sbi);
682
683 /* write out checkpoint buffer at block 0 */
684 cp_page = grab_meta_page(sbi, start_blk++);
685 kaddr = page_address(cp_page);
686 memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
687 set_page_dirty(cp_page);
688 f2fs_put_page(cp_page, 1);
689
690 if (sbi->n_orphans) {
691 write_orphan_inodes(sbi, start_blk);
692 start_blk += orphan_blocks;
693 }
694
695 write_data_summaries(sbi, start_blk);
696 start_blk += data_sum_blocks;
697 if (is_umount) {
698 write_node_summaries(sbi, start_blk);
699 start_blk += NR_CURSEG_NODE_TYPE;
700 }
701
702 /* writeout checkpoint block */
703 cp_page = grab_meta_page(sbi, start_blk);
704 kaddr = page_address(cp_page);
705 memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
706 set_page_dirty(cp_page);
707 f2fs_put_page(cp_page, 1);
708
709 /* wait for previous submitted node/meta pages writeback */
710 while (get_pages(sbi, F2FS_WRITEBACK))
711 congestion_wait(BLK_RW_ASYNC, HZ / 50);
712
713 filemap_fdatawait_range(sbi->node_inode->i_mapping, 0, LONG_MAX);
714 filemap_fdatawait_range(sbi->meta_inode->i_mapping, 0, LONG_MAX);
715
716 /* update user_block_counts */
717 sbi->last_valid_block_count = sbi->total_valid_block_count;
718 sbi->alloc_valid_block_count = 0;
719
720 /* Here, we only have one bio having CP pack */
721 if (is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))
722 sbi->sb->s_flags |= MS_RDONLY;
723 else
724 sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
725
726 clear_prefree_segments(sbi);
727 F2FS_RESET_SB_DIRT(sbi);
728}
729
730/*
731 * We guarantee that this checkpoint procedure should not fail.
732 */
733void write_checkpoint(struct f2fs_sb_info *sbi, bool blocked, bool is_umount)
734{
735 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
736 unsigned long long ckpt_ver;
737
738 if (!blocked) {
739 mutex_lock(&sbi->cp_mutex);
740 block_operations(sbi);
741 }
742
743 f2fs_submit_bio(sbi, DATA, true);
744 f2fs_submit_bio(sbi, NODE, true);
745 f2fs_submit_bio(sbi, META, true);
746
747 /*
748 * update checkpoint pack index
749 * Increase the version number so that
750 * SIT entries and seg summaries are written at correct place
751 */
752 ckpt_ver = le64_to_cpu(ckpt->checkpoint_ver);
753 ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
754
755 /* write cached NAT/SIT entries to NAT/SIT area */
756 flush_nat_entries(sbi);
757 flush_sit_entries(sbi);
758
759 reset_victim_segmap(sbi);
760
761 /* unlock all the fs_lock[] in do_checkpoint() */
762 do_checkpoint(sbi, is_umount);
763
764 unblock_operations(sbi);
765 mutex_unlock(&sbi->cp_mutex);
766}
767
768void init_orphan_info(struct f2fs_sb_info *sbi)
769{
770 mutex_init(&sbi->orphan_inode_mutex);
771 INIT_LIST_HEAD(&sbi->orphan_inode_list);
772 sbi->n_orphans = 0;
773}
774
775int create_checkpoint_caches(void)
776{
777 orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
778 sizeof(struct orphan_inode_entry), NULL);
779 if (unlikely(!orphan_entry_slab))
780 return -ENOMEM;
781 inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
782 sizeof(struct dir_inode_entry), NULL);
783 if (unlikely(!inode_entry_slab)) {
784 kmem_cache_destroy(orphan_entry_slab);
785 return -ENOMEM;
786 }
787 return 0;
788}
789
790void destroy_checkpoint_caches(void)
791{
792 kmem_cache_destroy(orphan_entry_slab);
793 kmem_cache_destroy(inode_entry_slab);
794}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
new file mode 100644
index 000000000000..655aeabc1dd4
--- /dev/null
+++ b/fs/f2fs/data.c
@@ -0,0 +1,702 @@
1/*
2 * fs/f2fs/data.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/buffer_head.h>
14#include <linux/mpage.h>
15#include <linux/writeback.h>
16#include <linux/backing-dev.h>
17#include <linux/blkdev.h>
18#include <linux/bio.h>
19
20#include "f2fs.h"
21#include "node.h"
22#include "segment.h"
23
24/*
25 * Lock ordering for the change of data block address:
26 * ->data_page
27 * ->node_page
28 * update block addresses in the node page
29 */
30static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
31{
32 struct f2fs_node *rn;
33 __le32 *addr_array;
34 struct page *node_page = dn->node_page;
35 unsigned int ofs_in_node = dn->ofs_in_node;
36
37 wait_on_page_writeback(node_page);
38
39 rn = (struct f2fs_node *)page_address(node_page);
40
41 /* Get physical address of data block */
42 addr_array = blkaddr_in_node(rn);
43 addr_array[ofs_in_node] = cpu_to_le32(new_addr);
44 set_page_dirty(node_page);
45}
46
47int reserve_new_block(struct dnode_of_data *dn)
48{
49 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
50
51 if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))
52 return -EPERM;
53 if (!inc_valid_block_count(sbi, dn->inode, 1))
54 return -ENOSPC;
55
56 __set_data_blkaddr(dn, NEW_ADDR);
57 dn->data_blkaddr = NEW_ADDR;
58 sync_inode_page(dn);
59 return 0;
60}
61
62static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
63 struct buffer_head *bh_result)
64{
65 struct f2fs_inode_info *fi = F2FS_I(inode);
66 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
67 pgoff_t start_fofs, end_fofs;
68 block_t start_blkaddr;
69
70 read_lock(&fi->ext.ext_lock);
71 if (fi->ext.len == 0) {
72 read_unlock(&fi->ext.ext_lock);
73 return 0;
74 }
75
76 sbi->total_hit_ext++;
77 start_fofs = fi->ext.fofs;
78 end_fofs = fi->ext.fofs + fi->ext.len - 1;
79 start_blkaddr = fi->ext.blk_addr;
80
81 if (pgofs >= start_fofs && pgofs <= end_fofs) {
82 unsigned int blkbits = inode->i_sb->s_blocksize_bits;
83 size_t count;
84
85 clear_buffer_new(bh_result);
86 map_bh(bh_result, inode->i_sb,
87 start_blkaddr + pgofs - start_fofs);
88 count = end_fofs - pgofs + 1;
89 if (count < (UINT_MAX >> blkbits))
90 bh_result->b_size = (count << blkbits);
91 else
92 bh_result->b_size = UINT_MAX;
93
94 sbi->read_hit_ext++;
95 read_unlock(&fi->ext.ext_lock);
96 return 1;
97 }
98 read_unlock(&fi->ext.ext_lock);
99 return 0;
100}
101
102void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
103{
104 struct f2fs_inode_info *fi = F2FS_I(dn->inode);
105 pgoff_t fofs, start_fofs, end_fofs;
106 block_t start_blkaddr, end_blkaddr;
107
108 BUG_ON(blk_addr == NEW_ADDR);
109 fofs = start_bidx_of_node(ofs_of_node(dn->node_page)) + dn->ofs_in_node;
110
111 /* Update the page address in the parent node */
112 __set_data_blkaddr(dn, blk_addr);
113
114 write_lock(&fi->ext.ext_lock);
115
116 start_fofs = fi->ext.fofs;
117 end_fofs = fi->ext.fofs + fi->ext.len - 1;
118 start_blkaddr = fi->ext.blk_addr;
119 end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1;
120
121 /* Drop and initialize the matched extent */
122 if (fi->ext.len == 1 && fofs == start_fofs)
123 fi->ext.len = 0;
124
125 /* Initial extent */
126 if (fi->ext.len == 0) {
127 if (blk_addr != NULL_ADDR) {
128 fi->ext.fofs = fofs;
129 fi->ext.blk_addr = blk_addr;
130 fi->ext.len = 1;
131 }
132 goto end_update;
133 }
134
135 /* Frone merge */
136 if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) {
137 fi->ext.fofs--;
138 fi->ext.blk_addr--;
139 fi->ext.len++;
140 goto end_update;
141 }
142
143 /* Back merge */
144 if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) {
145 fi->ext.len++;
146 goto end_update;
147 }
148
149 /* Split the existing extent */
150 if (fi->ext.len > 1 &&
151 fofs >= start_fofs && fofs <= end_fofs) {
152 if ((end_fofs - fofs) < (fi->ext.len >> 1)) {
153 fi->ext.len = fofs - start_fofs;
154 } else {
155 fi->ext.fofs = fofs + 1;
156 fi->ext.blk_addr = start_blkaddr +
157 fofs - start_fofs + 1;
158 fi->ext.len -= fofs - start_fofs + 1;
159 }
160 goto end_update;
161 }
162 write_unlock(&fi->ext.ext_lock);
163 return;
164
165end_update:
166 write_unlock(&fi->ext.ext_lock);
167 sync_inode_page(dn);
168 return;
169}
170
171struct page *find_data_page(struct inode *inode, pgoff_t index)
172{
173 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
174 struct address_space *mapping = inode->i_mapping;
175 struct dnode_of_data dn;
176 struct page *page;
177 int err;
178
179 page = find_get_page(mapping, index);
180 if (page && PageUptodate(page))
181 return page;
182 f2fs_put_page(page, 0);
183
184 set_new_dnode(&dn, inode, NULL, NULL, 0);
185 err = get_dnode_of_data(&dn, index, RDONLY_NODE);
186 if (err)
187 return ERR_PTR(err);
188 f2fs_put_dnode(&dn);
189
190 if (dn.data_blkaddr == NULL_ADDR)
191 return ERR_PTR(-ENOENT);
192
193 /* By fallocate(), there is no cached page, but with NEW_ADDR */
194 if (dn.data_blkaddr == NEW_ADDR)
195 return ERR_PTR(-EINVAL);
196
197 page = grab_cache_page(mapping, index);
198 if (!page)
199 return ERR_PTR(-ENOMEM);
200
201 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
202 if (err) {
203 f2fs_put_page(page, 1);
204 return ERR_PTR(err);
205 }
206 unlock_page(page);
207 return page;
208}
209
210/*
211 * If it tries to access a hole, return an error.
212 * Because, the callers, functions in dir.c and GC, should be able to know
213 * whether this page exists or not.
214 */
215struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
216{
217 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
218 struct address_space *mapping = inode->i_mapping;
219 struct dnode_of_data dn;
220 struct page *page;
221 int err;
222
223 set_new_dnode(&dn, inode, NULL, NULL, 0);
224 err = get_dnode_of_data(&dn, index, RDONLY_NODE);
225 if (err)
226 return ERR_PTR(err);
227 f2fs_put_dnode(&dn);
228
229 if (dn.data_blkaddr == NULL_ADDR)
230 return ERR_PTR(-ENOENT);
231
232 page = grab_cache_page(mapping, index);
233 if (!page)
234 return ERR_PTR(-ENOMEM);
235
236 if (PageUptodate(page))
237 return page;
238
239 BUG_ON(dn.data_blkaddr == NEW_ADDR);
240 BUG_ON(dn.data_blkaddr == NULL_ADDR);
241
242 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
243 if (err) {
244 f2fs_put_page(page, 1);
245 return ERR_PTR(err);
246 }
247 return page;
248}
249
250/*
251 * Caller ensures that this data page is never allocated.
252 * A new zero-filled data page is allocated in the page cache.
253 */
254struct page *get_new_data_page(struct inode *inode, pgoff_t index,
255 bool new_i_size)
256{
257 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
258 struct address_space *mapping = inode->i_mapping;
259 struct page *page;
260 struct dnode_of_data dn;
261 int err;
262
263 set_new_dnode(&dn, inode, NULL, NULL, 0);
264 err = get_dnode_of_data(&dn, index, 0);
265 if (err)
266 return ERR_PTR(err);
267
268 if (dn.data_blkaddr == NULL_ADDR) {
269 if (reserve_new_block(&dn)) {
270 f2fs_put_dnode(&dn);
271 return ERR_PTR(-ENOSPC);
272 }
273 }
274 f2fs_put_dnode(&dn);
275
276 page = grab_cache_page(mapping, index);
277 if (!page)
278 return ERR_PTR(-ENOMEM);
279
280 if (PageUptodate(page))
281 return page;
282
283 if (dn.data_blkaddr == NEW_ADDR) {
284 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
285 } else {
286 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
287 if (err) {
288 f2fs_put_page(page, 1);
289 return ERR_PTR(err);
290 }
291 }
292 SetPageUptodate(page);
293
294 if (new_i_size &&
295 i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) {
296 i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));
297 mark_inode_dirty_sync(inode);
298 }
299 return page;
300}
301
302static void read_end_io(struct bio *bio, int err)
303{
304 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
305 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
306
307 do {
308 struct page *page = bvec->bv_page;
309
310 if (--bvec >= bio->bi_io_vec)
311 prefetchw(&bvec->bv_page->flags);
312
313 if (uptodate) {
314 SetPageUptodate(page);
315 } else {
316 ClearPageUptodate(page);
317 SetPageError(page);
318 }
319 unlock_page(page);
320 } while (bvec >= bio->bi_io_vec);
321 kfree(bio->bi_private);
322 bio_put(bio);
323}
324
325/*
326 * Fill the locked page with data located in the block address.
327 * Read operation is synchronous, and caller must unlock the page.
328 */
329int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page,
330 block_t blk_addr, int type)
331{
332 struct block_device *bdev = sbi->sb->s_bdev;
333 bool sync = (type == READ_SYNC);
334 struct bio *bio;
335
336 /* This page can be already read by other threads */
337 if (PageUptodate(page)) {
338 if (!sync)
339 unlock_page(page);
340 return 0;
341 }
342
343 down_read(&sbi->bio_sem);
344
345 /* Allocate a new bio */
346 bio = f2fs_bio_alloc(bdev, 1);
347
348 /* Initialize the bio */
349 bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
350 bio->bi_end_io = read_end_io;
351
352 if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
353 kfree(bio->bi_private);
354 bio_put(bio);
355 up_read(&sbi->bio_sem);
356 return -EFAULT;
357 }
358
359 submit_bio(type, bio);
360 up_read(&sbi->bio_sem);
361
362 /* wait for read completion if sync */
363 if (sync) {
364 lock_page(page);
365 if (PageError(page))
366 return -EIO;
367 }
368 return 0;
369}
370
371/*
372 * This function should be used by the data read flow only where it
373 * does not check the "create" flag that indicates block allocation.
374 * The reason for this special functionality is to exploit VFS readahead
375 * mechanism.
376 */
377static int get_data_block_ro(struct inode *inode, sector_t iblock,
378 struct buffer_head *bh_result, int create)
379{
380 unsigned int blkbits = inode->i_sb->s_blocksize_bits;
381 unsigned maxblocks = bh_result->b_size >> blkbits;
382 struct dnode_of_data dn;
383 pgoff_t pgofs;
384 int err;
385
386 /* Get the page offset from the block offset(iblock) */
387 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
388
389 if (check_extent_cache(inode, pgofs, bh_result))
390 return 0;
391
392 /* When reading holes, we need its node page */
393 set_new_dnode(&dn, inode, NULL, NULL, 0);
394 err = get_dnode_of_data(&dn, pgofs, RDONLY_NODE);
395 if (err)
396 return (err == -ENOENT) ? 0 : err;
397
398 /* It does not support data allocation */
399 BUG_ON(create);
400
401 if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) {
402 int i;
403 unsigned int end_offset;
404
405 end_offset = IS_INODE(dn.node_page) ?
406 ADDRS_PER_INODE :
407 ADDRS_PER_BLOCK;
408
409 clear_buffer_new(bh_result);
410
411 /* Give more consecutive addresses for the read ahead */
412 for (i = 0; i < end_offset - dn.ofs_in_node; i++)
413 if (((datablock_addr(dn.node_page,
414 dn.ofs_in_node + i))
415 != (dn.data_blkaddr + i)) || maxblocks == i)
416 break;
417 map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
418 bh_result->b_size = (i << blkbits);
419 }
420 f2fs_put_dnode(&dn);
421 return 0;
422}
423
424static int f2fs_read_data_page(struct file *file, struct page *page)
425{
426 return mpage_readpage(page, get_data_block_ro);
427}
428
429static int f2fs_read_data_pages(struct file *file,
430 struct address_space *mapping,
431 struct list_head *pages, unsigned nr_pages)
432{
433 return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro);
434}
435
436int do_write_data_page(struct page *page)
437{
438 struct inode *inode = page->mapping->host;
439 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
440 block_t old_blk_addr, new_blk_addr;
441 struct dnode_of_data dn;
442 int err = 0;
443
444 set_new_dnode(&dn, inode, NULL, NULL, 0);
445 err = get_dnode_of_data(&dn, page->index, RDONLY_NODE);
446 if (err)
447 return err;
448
449 old_blk_addr = dn.data_blkaddr;
450
451 /* This page is already truncated */
452 if (old_blk_addr == NULL_ADDR)
453 goto out_writepage;
454
455 set_page_writeback(page);
456
457 /*
458 * If current allocation needs SSR,
459 * it had better in-place writes for updated data.
460 */
461 if (old_blk_addr != NEW_ADDR && !is_cold_data(page) &&
462 need_inplace_update(inode)) {
463 rewrite_data_page(F2FS_SB(inode->i_sb), page,
464 old_blk_addr);
465 } else {
466 write_data_page(inode, page, &dn,
467 old_blk_addr, &new_blk_addr);
468 update_extent_cache(new_blk_addr, &dn);
469 F2FS_I(inode)->data_version =
470 le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver);
471 }
472out_writepage:
473 f2fs_put_dnode(&dn);
474 return err;
475}
476
477static int f2fs_write_data_page(struct page *page,
478 struct writeback_control *wbc)
479{
480 struct inode *inode = page->mapping->host;
481 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
482 loff_t i_size = i_size_read(inode);
483 const pgoff_t end_index = ((unsigned long long) i_size)
484 >> PAGE_CACHE_SHIFT;
485 unsigned offset;
486 int err = 0;
487
488 if (page->index < end_index)
489 goto out;
490
491 /*
492 * If the offset is out-of-range of file size,
493 * this page does not have to be written to disk.
494 */
495 offset = i_size & (PAGE_CACHE_SIZE - 1);
496 if ((page->index >= end_index + 1) || !offset) {
497 if (S_ISDIR(inode->i_mode)) {
498 dec_page_count(sbi, F2FS_DIRTY_DENTS);
499 inode_dec_dirty_dents(inode);
500 }
501 goto unlock_out;
502 }
503
504 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
505out:
506 if (sbi->por_doing)
507 goto redirty_out;
508
509 if (wbc->for_reclaim && !S_ISDIR(inode->i_mode) && !is_cold_data(page))
510 goto redirty_out;
511
512 mutex_lock_op(sbi, DATA_WRITE);
513 if (S_ISDIR(inode->i_mode)) {
514 dec_page_count(sbi, F2FS_DIRTY_DENTS);
515 inode_dec_dirty_dents(inode);
516 }
517 err = do_write_data_page(page);
518 if (err && err != -ENOENT) {
519 wbc->pages_skipped++;
520 set_page_dirty(page);
521 }
522 mutex_unlock_op(sbi, DATA_WRITE);
523
524 if (wbc->for_reclaim)
525 f2fs_submit_bio(sbi, DATA, true);
526
527 if (err == -ENOENT)
528 goto unlock_out;
529
530 clear_cold_data(page);
531 unlock_page(page);
532
533 if (!wbc->for_reclaim && !S_ISDIR(inode->i_mode))
534 f2fs_balance_fs(sbi);
535 return 0;
536
537unlock_out:
538 unlock_page(page);
539 return (err == -ENOENT) ? 0 : err;
540
541redirty_out:
542 wbc->pages_skipped++;
543 set_page_dirty(page);
544 return AOP_WRITEPAGE_ACTIVATE;
545}
546
547#define MAX_DESIRED_PAGES_WP 4096
548
549static int f2fs_write_data_pages(struct address_space *mapping,
550 struct writeback_control *wbc)
551{
552 struct inode *inode = mapping->host;
553 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
554 int ret;
555 long excess_nrtw = 0, desired_nrtw;
556
557 if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) {
558 desired_nrtw = MAX_DESIRED_PAGES_WP;
559 excess_nrtw = desired_nrtw - wbc->nr_to_write;
560 wbc->nr_to_write = desired_nrtw;
561 }
562
563 if (!S_ISDIR(inode->i_mode))
564 mutex_lock(&sbi->writepages);
565 ret = generic_writepages(mapping, wbc);
566 if (!S_ISDIR(inode->i_mode))
567 mutex_unlock(&sbi->writepages);
568 f2fs_submit_bio(sbi, DATA, (wbc->sync_mode == WB_SYNC_ALL));
569
570 remove_dirty_dir_inode(inode);
571
572 wbc->nr_to_write -= excess_nrtw;
573 return ret;
574}
575
576static int f2fs_write_begin(struct file *file, struct address_space *mapping,
577 loff_t pos, unsigned len, unsigned flags,
578 struct page **pagep, void **fsdata)
579{
580 struct inode *inode = mapping->host;
581 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
582 struct page *page;
583 pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
584 struct dnode_of_data dn;
585 int err = 0;
586
587 /* for nobh_write_end */
588 *fsdata = NULL;
589
590 f2fs_balance_fs(sbi);
591
592 page = grab_cache_page_write_begin(mapping, index, flags);
593 if (!page)
594 return -ENOMEM;
595 *pagep = page;
596
597 mutex_lock_op(sbi, DATA_NEW);
598
599 set_new_dnode(&dn, inode, NULL, NULL, 0);
600 err = get_dnode_of_data(&dn, index, 0);
601 if (err) {
602 mutex_unlock_op(sbi, DATA_NEW);
603 f2fs_put_page(page, 1);
604 return err;
605 }
606
607 if (dn.data_blkaddr == NULL_ADDR) {
608 err = reserve_new_block(&dn);
609 if (err) {
610 f2fs_put_dnode(&dn);
611 mutex_unlock_op(sbi, DATA_NEW);
612 f2fs_put_page(page, 1);
613 return err;
614 }
615 }
616 f2fs_put_dnode(&dn);
617
618 mutex_unlock_op(sbi, DATA_NEW);
619
620 if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
621 return 0;
622
623 if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
624 unsigned start = pos & (PAGE_CACHE_SIZE - 1);
625 unsigned end = start + len;
626
627 /* Reading beyond i_size is simple: memset to zero */
628 zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
629 return 0;
630 }
631
632 if (dn.data_blkaddr == NEW_ADDR) {
633 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
634 } else {
635 err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC);
636 if (err) {
637 f2fs_put_page(page, 1);
638 return err;
639 }
640 }
641 SetPageUptodate(page);
642 clear_cold_data(page);
643 return 0;
644}
645
646static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
647 const struct iovec *iov, loff_t offset, unsigned long nr_segs)
648{
649 struct file *file = iocb->ki_filp;
650 struct inode *inode = file->f_mapping->host;
651
652 if (rw == WRITE)
653 return 0;
654
655 /* Needs synchronization with the cleaner */
656 return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
657 get_data_block_ro);
658}
659
660static void f2fs_invalidate_data_page(struct page *page, unsigned long offset)
661{
662 struct inode *inode = page->mapping->host;
663 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
664 if (S_ISDIR(inode->i_mode) && PageDirty(page)) {
665 dec_page_count(sbi, F2FS_DIRTY_DENTS);
666 inode_dec_dirty_dents(inode);
667 }
668 ClearPagePrivate(page);
669}
670
671static int f2fs_release_data_page(struct page *page, gfp_t wait)
672{
673 ClearPagePrivate(page);
674 return 0;
675}
676
677static int f2fs_set_data_page_dirty(struct page *page)
678{
679 struct address_space *mapping = page->mapping;
680 struct inode *inode = mapping->host;
681
682 SetPageUptodate(page);
683 if (!PageDirty(page)) {
684 __set_page_dirty_nobuffers(page);
685 set_dirty_dir_page(inode, page);
686 return 1;
687 }
688 return 0;
689}
690
691const struct address_space_operations f2fs_dblock_aops = {
692 .readpage = f2fs_read_data_page,
693 .readpages = f2fs_read_data_pages,
694 .writepage = f2fs_write_data_page,
695 .writepages = f2fs_write_data_pages,
696 .write_begin = f2fs_write_begin,
697 .write_end = nobh_write_end,
698 .set_page_dirty = f2fs_set_data_page_dirty,
699 .invalidatepage = f2fs_invalidate_data_page,
700 .releasepage = f2fs_release_data_page,
701 .direct_IO = f2fs_direct_IO,
702};
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
new file mode 100644
index 000000000000..0e0380a588ad
--- /dev/null
+++ b/fs/f2fs/debug.c
@@ -0,0 +1,361 @@
1/*
2 * f2fs debugging statistics
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 * Copyright (c) 2012 Linux Foundation
7 * Copyright (c) 2012 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 */
13
14#include <linux/fs.h>
15#include <linux/backing-dev.h>
16#include <linux/proc_fs.h>
17#include <linux/f2fs_fs.h>
18#include <linux/blkdev.h>
19#include <linux/debugfs.h>
20#include <linux/seq_file.h>
21
22#include "f2fs.h"
23#include "node.h"
24#include "segment.h"
25#include "gc.h"
26
27static LIST_HEAD(f2fs_stat_list);
28static struct dentry *debugfs_root;
29
30static void update_general_status(struct f2fs_sb_info *sbi)
31{
32 struct f2fs_stat_info *si = sbi->stat_info;
33 int i;
34
35 /* valid check of the segment numbers */
36 si->hit_ext = sbi->read_hit_ext;
37 si->total_ext = sbi->total_hit_ext;
38 si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
39 si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
40 si->ndirty_dirs = sbi->n_dirty_dirs;
41 si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
42 si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
43 si->rsvd_segs = reserved_segments(sbi);
44 si->overp_segs = overprovision_segments(sbi);
45 si->valid_count = valid_user_blocks(sbi);
46 si->valid_node_count = valid_node_count(sbi);
47 si->valid_inode_count = valid_inode_count(sbi);
48 si->utilization = utilization(sbi);
49
50 si->free_segs = free_segments(sbi);
51 si->free_secs = free_sections(sbi);
52 si->prefree_count = prefree_segments(sbi);
53 si->dirty_count = dirty_segments(sbi);
54 si->node_pages = sbi->node_inode->i_mapping->nrpages;
55 si->meta_pages = sbi->meta_inode->i_mapping->nrpages;
56 si->nats = NM_I(sbi)->nat_cnt;
57 si->sits = SIT_I(sbi)->dirty_sentries;
58 si->fnids = NM_I(sbi)->fcnt;
59 si->bg_gc = sbi->bg_gc;
60 si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
61 * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
62 / 2;
63 si->util_valid = (int)(written_block_count(sbi) >>
64 sbi->log_blocks_per_seg)
65 * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
66 / 2;
67 si->util_invalid = 50 - si->util_free - si->util_valid;
68 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
69 struct curseg_info *curseg = CURSEG_I(sbi, i);
70 si->curseg[i] = curseg->segno;
71 si->cursec[i] = curseg->segno / sbi->segs_per_sec;
72 si->curzone[i] = si->cursec[i] / sbi->secs_per_zone;
73 }
74
75 for (i = 0; i < 2; i++) {
76 si->segment_count[i] = sbi->segment_count[i];
77 si->block_count[i] = sbi->block_count[i];
78 }
79}
80
81/*
82 * This function calculates BDF of every segments
83 */
84static void update_sit_info(struct f2fs_sb_info *sbi)
85{
86 struct f2fs_stat_info *si = sbi->stat_info;
87 unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist;
88 struct sit_info *sit_i = SIT_I(sbi);
89 unsigned int segno, vblocks;
90 int ndirty = 0;
91
92 bimodal = 0;
93 total_vblocks = 0;
94 blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg);
95 hblks_per_sec = blks_per_sec / 2;
96 mutex_lock(&sit_i->sentry_lock);
97 for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) {
98 vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
99 dist = abs(vblocks - hblks_per_sec);
100 bimodal += dist * dist;
101
102 if (vblocks > 0 && vblocks < blks_per_sec) {
103 total_vblocks += vblocks;
104 ndirty++;
105 }
106 }
107 mutex_unlock(&sit_i->sentry_lock);
108 dist = sbi->total_sections * hblks_per_sec * hblks_per_sec / 100;
109 si->bimodal = bimodal / dist;
110 if (si->dirty_count)
111 si->avg_vblocks = total_vblocks / ndirty;
112 else
113 si->avg_vblocks = 0;
114}
115
116/*
117 * This function calculates memory footprint.
118 */
119static void update_mem_info(struct f2fs_sb_info *sbi)
120{
121 struct f2fs_stat_info *si = sbi->stat_info;
122 unsigned npages;
123
124 if (si->base_mem)
125 goto get_cache;
126
127 si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize;
128 si->base_mem += 2 * sizeof(struct f2fs_inode_info);
129 si->base_mem += sizeof(*sbi->ckpt);
130
131 /* build sm */
132 si->base_mem += sizeof(struct f2fs_sm_info);
133
134 /* build sit */
135 si->base_mem += sizeof(struct sit_info);
136 si->base_mem += TOTAL_SEGS(sbi) * sizeof(struct seg_entry);
137 si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi));
138 si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * TOTAL_SEGS(sbi);
139 if (sbi->segs_per_sec > 1)
140 si->base_mem += sbi->total_sections *
141 sizeof(struct sec_entry);
142 si->base_mem += __bitmap_size(sbi, SIT_BITMAP);
143
144 /* build free segmap */
145 si->base_mem += sizeof(struct free_segmap_info);
146 si->base_mem += f2fs_bitmap_size(TOTAL_SEGS(sbi));
147 si->base_mem += f2fs_bitmap_size(sbi->total_sections);
148
149 /* build curseg */
150 si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE;
151 si->base_mem += PAGE_CACHE_SIZE * NR_CURSEG_TYPE;
152
153 /* build dirty segmap */
154 si->base_mem += sizeof(struct dirty_seglist_info);
155 si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi));
156 si->base_mem += 2 * f2fs_bitmap_size(TOTAL_SEGS(sbi));
157
158 /* buld nm */
159 si->base_mem += sizeof(struct f2fs_nm_info);
160 si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
161
162 /* build gc */
163 si->base_mem += sizeof(struct f2fs_gc_kthread);
164
165get_cache:
166 /* free nids */
167 si->cache_mem = NM_I(sbi)->fcnt;
168 si->cache_mem += NM_I(sbi)->nat_cnt;
169 npages = sbi->node_inode->i_mapping->nrpages;
170 si->cache_mem += npages << PAGE_CACHE_SHIFT;
171 npages = sbi->meta_inode->i_mapping->nrpages;
172 si->cache_mem += npages << PAGE_CACHE_SHIFT;
173 si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry);
174 si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
175}
176
177static int stat_show(struct seq_file *s, void *v)
178{
179 struct f2fs_stat_info *si, *next;
180 int i = 0;
181 int j;
182
183 list_for_each_entry_safe(si, next, &f2fs_stat_list, stat_list) {
184
185 mutex_lock(&si->stat_lock);
186 if (!si->sbi) {
187 mutex_unlock(&si->stat_lock);
188 continue;
189 }
190 update_general_status(si->sbi);
191
192 seq_printf(s, "\n=====[ partition info. #%d ]=====\n", i++);
193 seq_printf(s, "[SB: 1] [CP: 2] [NAT: %d] [SIT: %d] ",
194 si->nat_area_segs, si->sit_area_segs);
195 seq_printf(s, "[SSA: %d] [MAIN: %d",
196 si->ssa_area_segs, si->main_area_segs);
197 seq_printf(s, "(OverProv:%d Resv:%d)]\n\n",
198 si->overp_segs, si->rsvd_segs);
199 seq_printf(s, "Utilization: %d%% (%d valid blocks)\n",
200 si->utilization, si->valid_count);
201 seq_printf(s, " - Node: %u (Inode: %u, ",
202 si->valid_node_count, si->valid_inode_count);
203 seq_printf(s, "Other: %u)\n - Data: %u\n",
204 si->valid_node_count - si->valid_inode_count,
205 si->valid_count - si->valid_node_count);
206 seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
207 si->main_area_segs, si->main_area_sections,
208 si->main_area_zones);
209 seq_printf(s, " - COLD data: %d, %d, %d\n",
210 si->curseg[CURSEG_COLD_DATA],
211 si->cursec[CURSEG_COLD_DATA],
212 si->curzone[CURSEG_COLD_DATA]);
213 seq_printf(s, " - WARM data: %d, %d, %d\n",
214 si->curseg[CURSEG_WARM_DATA],
215 si->cursec[CURSEG_WARM_DATA],
216 si->curzone[CURSEG_WARM_DATA]);
217 seq_printf(s, " - HOT data: %d, %d, %d\n",
218 si->curseg[CURSEG_HOT_DATA],
219 si->cursec[CURSEG_HOT_DATA],
220 si->curzone[CURSEG_HOT_DATA]);
221 seq_printf(s, " - Dir dnode: %d, %d, %d\n",
222 si->curseg[CURSEG_HOT_NODE],
223 si->cursec[CURSEG_HOT_NODE],
224 si->curzone[CURSEG_HOT_NODE]);
225 seq_printf(s, " - File dnode: %d, %d, %d\n",
226 si->curseg[CURSEG_WARM_NODE],
227 si->cursec[CURSEG_WARM_NODE],
228 si->curzone[CURSEG_WARM_NODE]);
229 seq_printf(s, " - Indir nodes: %d, %d, %d\n",
230 si->curseg[CURSEG_COLD_NODE],
231 si->cursec[CURSEG_COLD_NODE],
232 si->curzone[CURSEG_COLD_NODE]);
233 seq_printf(s, "\n - Valid: %d\n - Dirty: %d\n",
234 si->main_area_segs - si->dirty_count -
235 si->prefree_count - si->free_segs,
236 si->dirty_count);
237 seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n",
238 si->prefree_count, si->free_segs, si->free_secs);
239 seq_printf(s, "GC calls: %d (BG: %d)\n",
240 si->call_count, si->bg_gc);
241 seq_printf(s, " - data segments : %d\n", si->data_segs);
242 seq_printf(s, " - node segments : %d\n", si->node_segs);
243 seq_printf(s, "Try to move %d blocks\n", si->tot_blks);
244 seq_printf(s, " - data blocks : %d\n", si->data_blks);
245 seq_printf(s, " - node blocks : %d\n", si->node_blks);
246 seq_printf(s, "\nExtent Hit Ratio: %d / %d\n",
247 si->hit_ext, si->total_ext);
248 seq_printf(s, "\nBalancing F2FS Async:\n");
249 seq_printf(s, " - nodes %4d in %4d\n",
250 si->ndirty_node, si->node_pages);
251 seq_printf(s, " - dents %4d in dirs:%4d\n",
252 si->ndirty_dent, si->ndirty_dirs);
253 seq_printf(s, " - meta %4d in %4d\n",
254 si->ndirty_meta, si->meta_pages);
255 seq_printf(s, " - NATs %5d > %lu\n",
256 si->nats, NM_WOUT_THRESHOLD);
257 seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n",
258 si->sits, si->fnids);
259 seq_printf(s, "\nDistribution of User Blocks:");
260 seq_printf(s, " [ valid | invalid | free ]\n");
261 seq_printf(s, " [");
262
263 for (j = 0; j < si->util_valid; j++)
264 seq_printf(s, "-");
265 seq_printf(s, "|");
266
267 for (j = 0; j < si->util_invalid; j++)
268 seq_printf(s, "-");
269 seq_printf(s, "|");
270
271 for (j = 0; j < si->util_free; j++)
272 seq_printf(s, "-");
273 seq_printf(s, "]\n\n");
274 seq_printf(s, "SSR: %u blocks in %u segments\n",
275 si->block_count[SSR], si->segment_count[SSR]);
276 seq_printf(s, "LFS: %u blocks in %u segments\n",
277 si->block_count[LFS], si->segment_count[LFS]);
278
279 /* segment usage info */
280 update_sit_info(si->sbi);
281 seq_printf(s, "\nBDF: %u, avg. vblocks: %u\n",
282 si->bimodal, si->avg_vblocks);
283
284 /* memory footprint */
285 update_mem_info(si->sbi);
286 seq_printf(s, "\nMemory: %u KB = static: %u + cached: %u\n",
287 (si->base_mem + si->cache_mem) >> 10,
288 si->base_mem >> 10, si->cache_mem >> 10);
289 mutex_unlock(&si->stat_lock);
290 }
291 return 0;
292}
293
294static int stat_open(struct inode *inode, struct file *file)
295{
296 return single_open(file, stat_show, inode->i_private);
297}
298
299static const struct file_operations stat_fops = {
300 .open = stat_open,
301 .read = seq_read,
302 .llseek = seq_lseek,
303 .release = single_release,
304};
305
306static int init_stats(struct f2fs_sb_info *sbi)
307{
308 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
309 struct f2fs_stat_info *si;
310
311 sbi->stat_info = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL);
312 if (!sbi->stat_info)
313 return -ENOMEM;
314
315 si = sbi->stat_info;
316 mutex_init(&si->stat_lock);
317 list_add_tail(&si->stat_list, &f2fs_stat_list);
318
319 si->all_area_segs = le32_to_cpu(raw_super->segment_count);
320 si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit);
321 si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat);
322 si->ssa_area_segs = le32_to_cpu(raw_super->segment_count_ssa);
323 si->main_area_segs = le32_to_cpu(raw_super->segment_count_main);
324 si->main_area_sections = le32_to_cpu(raw_super->section_count);
325 si->main_area_zones = si->main_area_sections /
326 le32_to_cpu(raw_super->secs_per_zone);
327 si->sbi = sbi;
328 return 0;
329}
330
331int f2fs_build_stats(struct f2fs_sb_info *sbi)
332{
333 int retval;
334
335 retval = init_stats(sbi);
336 if (retval)
337 return retval;
338
339 if (!debugfs_root)
340 debugfs_root = debugfs_create_dir("f2fs", NULL);
341
342 debugfs_create_file("status", S_IRUGO, debugfs_root, NULL, &stat_fops);
343 return 0;
344}
345
346void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
347{
348 struct f2fs_stat_info *si = sbi->stat_info;
349
350 list_del(&si->stat_list);
351 mutex_lock(&si->stat_lock);
352 si->sbi = NULL;
353 mutex_unlock(&si->stat_lock);
354 kfree(sbi->stat_info);
355}
356
357void destroy_root_stats(void)
358{
359 debugfs_remove_recursive(debugfs_root);
360 debugfs_root = NULL;
361}
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
new file mode 100644
index 000000000000..b4e24f32b54e
--- /dev/null
+++ b/fs/f2fs/dir.c
@@ -0,0 +1,672 @@
1/*
2 * fs/f2fs/dir.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include "f2fs.h"
14#include "acl.h"
15
16static unsigned long dir_blocks(struct inode *inode)
17{
18 return ((unsigned long long) (i_size_read(inode) + PAGE_CACHE_SIZE - 1))
19 >> PAGE_CACHE_SHIFT;
20}
21
22static unsigned int dir_buckets(unsigned int level)
23{
24 if (level < MAX_DIR_HASH_DEPTH / 2)
25 return 1 << level;
26 else
27 return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1);
28}
29
30static unsigned int bucket_blocks(unsigned int level)
31{
32 if (level < MAX_DIR_HASH_DEPTH / 2)
33 return 2;
34 else
35 return 4;
36}
37
38static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
39 [F2FS_FT_UNKNOWN] = DT_UNKNOWN,
40 [F2FS_FT_REG_FILE] = DT_REG,
41 [F2FS_FT_DIR] = DT_DIR,
42 [F2FS_FT_CHRDEV] = DT_CHR,
43 [F2FS_FT_BLKDEV] = DT_BLK,
44 [F2FS_FT_FIFO] = DT_FIFO,
45 [F2FS_FT_SOCK] = DT_SOCK,
46 [F2FS_FT_SYMLINK] = DT_LNK,
47};
48
49#define S_SHIFT 12
50static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
51 [S_IFREG >> S_SHIFT] = F2FS_FT_REG_FILE,
52 [S_IFDIR >> S_SHIFT] = F2FS_FT_DIR,
53 [S_IFCHR >> S_SHIFT] = F2FS_FT_CHRDEV,
54 [S_IFBLK >> S_SHIFT] = F2FS_FT_BLKDEV,
55 [S_IFIFO >> S_SHIFT] = F2FS_FT_FIFO,
56 [S_IFSOCK >> S_SHIFT] = F2FS_FT_SOCK,
57 [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK,
58};
59
60static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
61{
62 mode_t mode = inode->i_mode;
63 de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
64}
65
66static unsigned long dir_block_index(unsigned int level, unsigned int idx)
67{
68 unsigned long i;
69 unsigned long bidx = 0;
70
71 for (i = 0; i < level; i++)
72 bidx += dir_buckets(i) * bucket_blocks(i);
73 bidx += idx * bucket_blocks(level);
74 return bidx;
75}
76
77static bool early_match_name(const char *name, int namelen,
78 f2fs_hash_t namehash, struct f2fs_dir_entry *de)
79{
80 if (le16_to_cpu(de->name_len) != namelen)
81 return false;
82
83 if (de->hash_code != namehash)
84 return false;
85
86 return true;
87}
88
89static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
90 const char *name, int namelen, int *max_slots,
91 f2fs_hash_t namehash, struct page **res_page)
92{
93 struct f2fs_dir_entry *de;
94 unsigned long bit_pos, end_pos, next_pos;
95 struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
96 int slots;
97
98 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
99 NR_DENTRY_IN_BLOCK, 0);
100 while (bit_pos < NR_DENTRY_IN_BLOCK) {
101 de = &dentry_blk->dentry[bit_pos];
102 slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
103
104 if (early_match_name(name, namelen, namehash, de)) {
105 if (!memcmp(dentry_blk->filename[bit_pos],
106 name, namelen)) {
107 *res_page = dentry_page;
108 goto found;
109 }
110 }
111 next_pos = bit_pos + slots;
112 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
113 NR_DENTRY_IN_BLOCK, next_pos);
114 if (bit_pos >= NR_DENTRY_IN_BLOCK)
115 end_pos = NR_DENTRY_IN_BLOCK;
116 else
117 end_pos = bit_pos;
118 if (*max_slots < end_pos - next_pos)
119 *max_slots = end_pos - next_pos;
120 }
121
122 de = NULL;
123 kunmap(dentry_page);
124found:
125 return de;
126}
127
128static struct f2fs_dir_entry *find_in_level(struct inode *dir,
129 unsigned int level, const char *name, int namelen,
130 f2fs_hash_t namehash, struct page **res_page)
131{
132 int s = GET_DENTRY_SLOTS(namelen);
133 unsigned int nbucket, nblock;
134 unsigned int bidx, end_block;
135 struct page *dentry_page;
136 struct f2fs_dir_entry *de = NULL;
137 bool room = false;
138 int max_slots = 0;
139
140 BUG_ON(level > MAX_DIR_HASH_DEPTH);
141
142 nbucket = dir_buckets(level);
143 nblock = bucket_blocks(level);
144
145 bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket);
146 end_block = bidx + nblock;
147
148 for (; bidx < end_block; bidx++) {
149 /* no need to allocate new dentry pages to all the indices */
150 dentry_page = find_data_page(dir, bidx);
151 if (IS_ERR(dentry_page)) {
152 room = true;
153 continue;
154 }
155
156 de = find_in_block(dentry_page, name, namelen,
157 &max_slots, namehash, res_page);
158 if (de)
159 break;
160
161 if (max_slots >= s)
162 room = true;
163 f2fs_put_page(dentry_page, 0);
164 }
165
166 if (!de && room && F2FS_I(dir)->chash != namehash) {
167 F2FS_I(dir)->chash = namehash;
168 F2FS_I(dir)->clevel = level;
169 }
170
171 return de;
172}
173
174/*
175 * Find an entry in the specified directory with the wanted name.
176 * It returns the page where the entry was found (as a parameter - res_page),
177 * and the entry itself. Page is returned mapped and unlocked.
178 * Entry is guaranteed to be valid.
179 */
180struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
181 struct qstr *child, struct page **res_page)
182{
183 const char *name = child->name;
184 int namelen = child->len;
185 unsigned long npages = dir_blocks(dir);
186 struct f2fs_dir_entry *de = NULL;
187 f2fs_hash_t name_hash;
188 unsigned int max_depth;
189 unsigned int level;
190
191 if (npages == 0)
192 return NULL;
193
194 *res_page = NULL;
195
196 name_hash = f2fs_dentry_hash(name, namelen);
197 max_depth = F2FS_I(dir)->i_current_depth;
198
199 for (level = 0; level < max_depth; level++) {
200 de = find_in_level(dir, level, name,
201 namelen, name_hash, res_page);
202 if (de)
203 break;
204 }
205 if (!de && F2FS_I(dir)->chash != name_hash) {
206 F2FS_I(dir)->chash = name_hash;
207 F2FS_I(dir)->clevel = level - 1;
208 }
209 return de;
210}
211
212struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p)
213{
214 struct page *page = NULL;
215 struct f2fs_dir_entry *de = NULL;
216 struct f2fs_dentry_block *dentry_blk = NULL;
217
218 page = get_lock_data_page(dir, 0);
219 if (IS_ERR(page))
220 return NULL;
221
222 dentry_blk = kmap(page);
223 de = &dentry_blk->dentry[1];
224 *p = page;
225 unlock_page(page);
226 return de;
227}
228
229ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr)
230{
231 ino_t res = 0;
232 struct f2fs_dir_entry *de;
233 struct page *page;
234
235 de = f2fs_find_entry(dir, qstr, &page);
236 if (de) {
237 res = le32_to_cpu(de->ino);
238 kunmap(page);
239 f2fs_put_page(page, 0);
240 }
241
242 return res;
243}
244
245void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
246 struct page *page, struct inode *inode)
247{
248 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
249
250 mutex_lock_op(sbi, DENTRY_OPS);
251 lock_page(page);
252 wait_on_page_writeback(page);
253 de->ino = cpu_to_le32(inode->i_ino);
254 set_de_type(de, inode);
255 kunmap(page);
256 set_page_dirty(page);
257 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
258 mark_inode_dirty(dir);
259
260 /* update parent inode number before releasing dentry page */
261 F2FS_I(inode)->i_pino = dir->i_ino;
262
263 f2fs_put_page(page, 1);
264 mutex_unlock_op(sbi, DENTRY_OPS);
265}
266
267void init_dent_inode(struct dentry *dentry, struct page *ipage)
268{
269 struct f2fs_node *rn;
270
271 if (IS_ERR(ipage))
272 return;
273
274 wait_on_page_writeback(ipage);
275
276 /* copy dentry info. to this inode page */
277 rn = (struct f2fs_node *)page_address(ipage);
278 rn->i.i_namelen = cpu_to_le32(dentry->d_name.len);
279 memcpy(rn->i.i_name, dentry->d_name.name, dentry->d_name.len);
280 set_page_dirty(ipage);
281}
282
283static int init_inode_metadata(struct inode *inode, struct dentry *dentry)
284{
285 struct inode *dir = dentry->d_parent->d_inode;
286
287 if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
288 int err;
289 err = new_inode_page(inode, dentry);
290 if (err)
291 return err;
292
293 if (S_ISDIR(inode->i_mode)) {
294 err = f2fs_make_empty(inode, dir);
295 if (err) {
296 remove_inode_page(inode);
297 return err;
298 }
299 }
300
301 err = f2fs_init_acl(inode, dir);
302 if (err) {
303 remove_inode_page(inode);
304 return err;
305 }
306 } else {
307 struct page *ipage;
308 ipage = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino);
309 if (IS_ERR(ipage))
310 return PTR_ERR(ipage);
311 init_dent_inode(dentry, ipage);
312 f2fs_put_page(ipage, 1);
313 }
314 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) {
315 inc_nlink(inode);
316 f2fs_write_inode(inode, NULL);
317 }
318 return 0;
319}
320
321static void update_parent_metadata(struct inode *dir, struct inode *inode,
322 unsigned int current_depth)
323{
324 bool need_dir_update = false;
325
326 if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
327 if (S_ISDIR(inode->i_mode)) {
328 inc_nlink(dir);
329 need_dir_update = true;
330 }
331 clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
332 }
333 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
334 if (F2FS_I(dir)->i_current_depth != current_depth) {
335 F2FS_I(dir)->i_current_depth = current_depth;
336 need_dir_update = true;
337 }
338
339 if (need_dir_update)
340 f2fs_write_inode(dir, NULL);
341 else
342 mark_inode_dirty(dir);
343
344 if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
345 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
346}
347
348static int room_for_filename(struct f2fs_dentry_block *dentry_blk, int slots)
349{
350 int bit_start = 0;
351 int zero_start, zero_end;
352next:
353 zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap,
354 NR_DENTRY_IN_BLOCK,
355 bit_start);
356 if (zero_start >= NR_DENTRY_IN_BLOCK)
357 return NR_DENTRY_IN_BLOCK;
358
359 zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap,
360 NR_DENTRY_IN_BLOCK,
361 zero_start);
362 if (zero_end - zero_start >= slots)
363 return zero_start;
364
365 bit_start = zero_end + 1;
366
367 if (zero_end + 1 >= NR_DENTRY_IN_BLOCK)
368 return NR_DENTRY_IN_BLOCK;
369 goto next;
370}
371
372int f2fs_add_link(struct dentry *dentry, struct inode *inode)
373{
374 unsigned int bit_pos;
375 unsigned int level;
376 unsigned int current_depth;
377 unsigned long bidx, block;
378 f2fs_hash_t dentry_hash;
379 struct f2fs_dir_entry *de;
380 unsigned int nbucket, nblock;
381 struct inode *dir = dentry->d_parent->d_inode;
382 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
383 const char *name = dentry->d_name.name;
384 int namelen = dentry->d_name.len;
385 struct page *dentry_page = NULL;
386 struct f2fs_dentry_block *dentry_blk = NULL;
387 int slots = GET_DENTRY_SLOTS(namelen);
388 int err = 0;
389 int i;
390
391 dentry_hash = f2fs_dentry_hash(name, dentry->d_name.len);
392 level = 0;
393 current_depth = F2FS_I(dir)->i_current_depth;
394 if (F2FS_I(dir)->chash == dentry_hash) {
395 level = F2FS_I(dir)->clevel;
396 F2FS_I(dir)->chash = 0;
397 }
398
399start:
400 if (current_depth == MAX_DIR_HASH_DEPTH)
401 return -ENOSPC;
402
403 /* Increase the depth, if required */
404 if (level == current_depth)
405 ++current_depth;
406
407 nbucket = dir_buckets(level);
408 nblock = bucket_blocks(level);
409
410 bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket));
411
412 for (block = bidx; block <= (bidx + nblock - 1); block++) {
413 mutex_lock_op(sbi, DENTRY_OPS);
414 dentry_page = get_new_data_page(dir, block, true);
415 if (IS_ERR(dentry_page)) {
416 mutex_unlock_op(sbi, DENTRY_OPS);
417 return PTR_ERR(dentry_page);
418 }
419
420 dentry_blk = kmap(dentry_page);
421 bit_pos = room_for_filename(dentry_blk, slots);
422 if (bit_pos < NR_DENTRY_IN_BLOCK)
423 goto add_dentry;
424
425 kunmap(dentry_page);
426 f2fs_put_page(dentry_page, 1);
427 mutex_unlock_op(sbi, DENTRY_OPS);
428 }
429
430 /* Move to next level to find the empty slot for new dentry */
431 ++level;
432 goto start;
433add_dentry:
434 err = init_inode_metadata(inode, dentry);
435 if (err)
436 goto fail;
437
438 wait_on_page_writeback(dentry_page);
439
440 de = &dentry_blk->dentry[bit_pos];
441 de->hash_code = dentry_hash;
442 de->name_len = cpu_to_le16(namelen);
443 memcpy(dentry_blk->filename[bit_pos], name, namelen);
444 de->ino = cpu_to_le32(inode->i_ino);
445 set_de_type(de, inode);
446 for (i = 0; i < slots; i++)
447 test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
448 set_page_dirty(dentry_page);
449
450 update_parent_metadata(dir, inode, current_depth);
451
452 /* update parent inode number before releasing dentry page */
453 F2FS_I(inode)->i_pino = dir->i_ino;
454fail:
455 kunmap(dentry_page);
456 f2fs_put_page(dentry_page, 1);
457 mutex_unlock_op(sbi, DENTRY_OPS);
458 return err;
459}
460
461/*
462 * It only removes the dentry from the dentry page,corresponding name
463 * entry in name page does not need to be touched during deletion.
464 */
465void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
466 struct inode *inode)
467{
468 struct f2fs_dentry_block *dentry_blk;
469 unsigned int bit_pos;
470 struct address_space *mapping = page->mapping;
471 struct inode *dir = mapping->host;
472 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
473 int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
474 void *kaddr = page_address(page);
475 int i;
476
477 mutex_lock_op(sbi, DENTRY_OPS);
478
479 lock_page(page);
480 wait_on_page_writeback(page);
481
482 dentry_blk = (struct f2fs_dentry_block *)kaddr;
483 bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry;
484 for (i = 0; i < slots; i++)
485 test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
486
487 /* Let's check and deallocate this dentry page */
488 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
489 NR_DENTRY_IN_BLOCK,
490 0);
491 kunmap(page); /* kunmap - pair of f2fs_find_entry */
492 set_page_dirty(page);
493
494 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
495
496 if (inode && S_ISDIR(inode->i_mode)) {
497 drop_nlink(dir);
498 f2fs_write_inode(dir, NULL);
499 } else {
500 mark_inode_dirty(dir);
501 }
502
503 if (inode) {
504 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
505 drop_nlink(inode);
506 if (S_ISDIR(inode->i_mode)) {
507 drop_nlink(inode);
508 i_size_write(inode, 0);
509 }
510 f2fs_write_inode(inode, NULL);
511 if (inode->i_nlink == 0)
512 add_orphan_inode(sbi, inode->i_ino);
513 }
514
515 if (bit_pos == NR_DENTRY_IN_BLOCK) {
516 truncate_hole(dir, page->index, page->index + 1);
517 clear_page_dirty_for_io(page);
518 ClearPageUptodate(page);
519 dec_page_count(sbi, F2FS_DIRTY_DENTS);
520 inode_dec_dirty_dents(dir);
521 }
522 f2fs_put_page(page, 1);
523
524 mutex_unlock_op(sbi, DENTRY_OPS);
525}
526
527int f2fs_make_empty(struct inode *inode, struct inode *parent)
528{
529 struct page *dentry_page;
530 struct f2fs_dentry_block *dentry_blk;
531 struct f2fs_dir_entry *de;
532 void *kaddr;
533
534 dentry_page = get_new_data_page(inode, 0, true);
535 if (IS_ERR(dentry_page))
536 return PTR_ERR(dentry_page);
537
538 kaddr = kmap_atomic(dentry_page);
539 dentry_blk = (struct f2fs_dentry_block *)kaddr;
540
541 de = &dentry_blk->dentry[0];
542 de->name_len = cpu_to_le16(1);
543 de->hash_code = 0;
544 de->ino = cpu_to_le32(inode->i_ino);
545 memcpy(dentry_blk->filename[0], ".", 1);
546 set_de_type(de, inode);
547
548 de = &dentry_blk->dentry[1];
549 de->hash_code = 0;
550 de->name_len = cpu_to_le16(2);
551 de->ino = cpu_to_le32(parent->i_ino);
552 memcpy(dentry_blk->filename[1], "..", 2);
553 set_de_type(de, inode);
554
555 test_and_set_bit_le(0, &dentry_blk->dentry_bitmap);
556 test_and_set_bit_le(1, &dentry_blk->dentry_bitmap);
557 kunmap_atomic(kaddr);
558
559 set_page_dirty(dentry_page);
560 f2fs_put_page(dentry_page, 1);
561 return 0;
562}
563
564bool f2fs_empty_dir(struct inode *dir)
565{
566 unsigned long bidx;
567 struct page *dentry_page;
568 unsigned int bit_pos;
569 struct f2fs_dentry_block *dentry_blk;
570 unsigned long nblock = dir_blocks(dir);
571
572 for (bidx = 0; bidx < nblock; bidx++) {
573 void *kaddr;
574 dentry_page = get_lock_data_page(dir, bidx);
575 if (IS_ERR(dentry_page)) {
576 if (PTR_ERR(dentry_page) == -ENOENT)
577 continue;
578 else
579 return false;
580 }
581
582 kaddr = kmap_atomic(dentry_page);
583 dentry_blk = (struct f2fs_dentry_block *)kaddr;
584 if (bidx == 0)
585 bit_pos = 2;
586 else
587 bit_pos = 0;
588 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
589 NR_DENTRY_IN_BLOCK,
590 bit_pos);
591 kunmap_atomic(kaddr);
592
593 f2fs_put_page(dentry_page, 1);
594
595 if (bit_pos < NR_DENTRY_IN_BLOCK)
596 return false;
597 }
598 return true;
599}
600
601static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir)
602{
603 unsigned long pos = file->f_pos;
604 struct inode *inode = file->f_dentry->d_inode;
605 unsigned long npages = dir_blocks(inode);
606 unsigned char *types = NULL;
607 unsigned int bit_pos = 0, start_bit_pos = 0;
608 int over = 0;
609 struct f2fs_dentry_block *dentry_blk = NULL;
610 struct f2fs_dir_entry *de = NULL;
611 struct page *dentry_page = NULL;
612 unsigned int n = 0;
613 unsigned char d_type = DT_UNKNOWN;
614 int slots;
615
616 types = f2fs_filetype_table;
617 bit_pos = (pos % NR_DENTRY_IN_BLOCK);
618 n = (pos / NR_DENTRY_IN_BLOCK);
619
620 for ( ; n < npages; n++) {
621 dentry_page = get_lock_data_page(inode, n);
622 if (IS_ERR(dentry_page))
623 continue;
624
625 start_bit_pos = bit_pos;
626 dentry_blk = kmap(dentry_page);
627 while (bit_pos < NR_DENTRY_IN_BLOCK) {
628 d_type = DT_UNKNOWN;
629 bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
630 NR_DENTRY_IN_BLOCK,
631 bit_pos);
632 if (bit_pos >= NR_DENTRY_IN_BLOCK)
633 break;
634
635 de = &dentry_blk->dentry[bit_pos];
636 if (types && de->file_type < F2FS_FT_MAX)
637 d_type = types[de->file_type];
638
639 over = filldir(dirent,
640 dentry_blk->filename[bit_pos],
641 le16_to_cpu(de->name_len),
642 (n * NR_DENTRY_IN_BLOCK) + bit_pos,
643 le32_to_cpu(de->ino), d_type);
644 if (over) {
645 file->f_pos += bit_pos - start_bit_pos;
646 goto success;
647 }
648 slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
649 bit_pos += slots;
650 }
651 bit_pos = 0;
652 file->f_pos = (n + 1) * NR_DENTRY_IN_BLOCK;
653 kunmap(dentry_page);
654 f2fs_put_page(dentry_page, 1);
655 dentry_page = NULL;
656 }
657success:
658 if (dentry_page && !IS_ERR(dentry_page)) {
659 kunmap(dentry_page);
660 f2fs_put_page(dentry_page, 1);
661 }
662
663 return 0;
664}
665
666const struct file_operations f2fs_dir_operations = {
667 .llseek = generic_file_llseek,
668 .read = generic_read_dir,
669 .readdir = f2fs_readdir,
670 .fsync = f2fs_sync_file,
671 .unlocked_ioctl = f2fs_ioctl,
672};
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
new file mode 100644
index 000000000000..a18d63db2fb6
--- /dev/null
+++ b/fs/f2fs/f2fs.h
@@ -0,0 +1,1083 @@
1/*
2 * fs/f2fs/f2fs.h
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#ifndef _LINUX_F2FS_H
12#define _LINUX_F2FS_H
13
14#include <linux/types.h>
15#include <linux/page-flags.h>
16#include <linux/buffer_head.h>
17#include <linux/slab.h>
18#include <linux/crc32.h>
19#include <linux/magic.h>
20
21/*
22 * For mount options
23 */
24#define F2FS_MOUNT_BG_GC 0x00000001
25#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002
26#define F2FS_MOUNT_DISCARD 0x00000004
27#define F2FS_MOUNT_NOHEAP 0x00000008
28#define F2FS_MOUNT_XATTR_USER 0x00000010
29#define F2FS_MOUNT_POSIX_ACL 0x00000020
30#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040
31
32#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
33#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
34#define test_opt(sbi, option) (sbi->mount_opt.opt & F2FS_MOUNT_##option)
35
36#define ver_after(a, b) (typecheck(unsigned long long, a) && \
37 typecheck(unsigned long long, b) && \
38 ((long long)((a) - (b)) > 0))
39
40typedef u64 block_t;
41typedef u32 nid_t;
42
43struct f2fs_mount_info {
44 unsigned int opt;
45};
46
47static inline __u32 f2fs_crc32(void *buff, size_t len)
48{
49 return crc32_le(F2FS_SUPER_MAGIC, buff, len);
50}
51
52static inline bool f2fs_crc_valid(__u32 blk_crc, void *buff, size_t buff_size)
53{
54 return f2fs_crc32(buff, buff_size) == blk_crc;
55}
56
57/*
58 * For checkpoint manager
59 */
60enum {
61 NAT_BITMAP,
62 SIT_BITMAP
63};
64
65/* for the list of orphan inodes */
66struct orphan_inode_entry {
67 struct list_head list; /* list head */
68 nid_t ino; /* inode number */
69};
70
71/* for the list of directory inodes */
72struct dir_inode_entry {
73 struct list_head list; /* list head */
74 struct inode *inode; /* vfs inode pointer */
75};
76
77/* for the list of fsync inodes, used only during recovery */
78struct fsync_inode_entry {
79 struct list_head list; /* list head */
80 struct inode *inode; /* vfs inode pointer */
81 block_t blkaddr; /* block address locating the last inode */
82};
83
84#define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats))
85#define sits_in_cursum(sum) (le16_to_cpu(sum->n_sits))
86
87#define nat_in_journal(sum, i) (sum->nat_j.entries[i].ne)
88#define nid_in_journal(sum, i) (sum->nat_j.entries[i].nid)
89#define sit_in_journal(sum, i) (sum->sit_j.entries[i].se)
90#define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno)
91
92static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i)
93{
94 int before = nats_in_cursum(rs);
95 rs->n_nats = cpu_to_le16(before + i);
96 return before;
97}
98
99static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i)
100{
101 int before = sits_in_cursum(rs);
102 rs->n_sits = cpu_to_le16(before + i);
103 return before;
104}
105
106/*
107 * For INODE and NODE manager
108 */
109#define XATTR_NODE_OFFSET (-1) /*
110 * store xattrs to one node block per
111 * file keeping -1 as its node offset to
112 * distinguish from index node blocks.
113 */
114#define RDONLY_NODE 1 /*
115 * specify a read-only mode when getting
116 * a node block. 0 is read-write mode.
117 * used by get_dnode_of_data().
118 */
119#define F2FS_LINK_MAX 32000 /* maximum link count per file */
120
121/* for in-memory extent cache entry */
122struct extent_info {
123 rwlock_t ext_lock; /* rwlock for consistency */
124 unsigned int fofs; /* start offset in a file */
125 u32 blk_addr; /* start block address of the extent */
126 unsigned int len; /* lenth of the extent */
127};
128
129/*
130 * i_advise uses FADVISE_XXX_BIT. We can add additional hints later.
131 */
132#define FADVISE_COLD_BIT 0x01
133
134struct f2fs_inode_info {
135 struct inode vfs_inode; /* serve a vfs inode */
136 unsigned long i_flags; /* keep an inode flags for ioctl */
137 unsigned char i_advise; /* use to give file attribute hints */
138 unsigned int i_current_depth; /* use only in directory structure */
139 unsigned int i_pino; /* parent inode number */
140 umode_t i_acl_mode; /* keep file acl mode temporarily */
141
142 /* Use below internally in f2fs*/
143 unsigned long flags; /* use to pass per-file flags */
144 unsigned long long data_version;/* lastes version of data for fsync */
145 atomic_t dirty_dents; /* # of dirty dentry pages */
146 f2fs_hash_t chash; /* hash value of given file name */
147 unsigned int clevel; /* maximum level of given file name */
148 nid_t i_xattr_nid; /* node id that contains xattrs */
149 struct extent_info ext; /* in-memory extent cache entry */
150};
151
152static inline void get_extent_info(struct extent_info *ext,
153 struct f2fs_extent i_ext)
154{
155 write_lock(&ext->ext_lock);
156 ext->fofs = le32_to_cpu(i_ext.fofs);
157 ext->blk_addr = le32_to_cpu(i_ext.blk_addr);
158 ext->len = le32_to_cpu(i_ext.len);
159 write_unlock(&ext->ext_lock);
160}
161
162static inline void set_raw_extent(struct extent_info *ext,
163 struct f2fs_extent *i_ext)
164{
165 read_lock(&ext->ext_lock);
166 i_ext->fofs = cpu_to_le32(ext->fofs);
167 i_ext->blk_addr = cpu_to_le32(ext->blk_addr);
168 i_ext->len = cpu_to_le32(ext->len);
169 read_unlock(&ext->ext_lock);
170}
171
172struct f2fs_nm_info {
173 block_t nat_blkaddr; /* base disk address of NAT */
174 nid_t max_nid; /* maximum possible node ids */
175 nid_t init_scan_nid; /* the first nid to be scanned */
176 nid_t next_scan_nid; /* the next nid to be scanned */
177
178 /* NAT cache management */
179 struct radix_tree_root nat_root;/* root of the nat entry cache */
180 rwlock_t nat_tree_lock; /* protect nat_tree_lock */
181 unsigned int nat_cnt; /* the # of cached nat entries */
182 struct list_head nat_entries; /* cached nat entry list (clean) */
183 struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
184
185 /* free node ids management */
186 struct list_head free_nid_list; /* a list for free nids */
187 spinlock_t free_nid_list_lock; /* protect free nid list */
188 unsigned int fcnt; /* the number of free node id */
189 struct mutex build_lock; /* lock for build free nids */
190
191 /* for checkpoint */
192 char *nat_bitmap; /* NAT bitmap pointer */
193 int bitmap_size; /* bitmap size */
194};
195
196/*
197 * this structure is used as one of function parameters.
198 * all the information are dedicated to a given direct node block determined
199 * by the data offset in a file.
200 */
201struct dnode_of_data {
202 struct inode *inode; /* vfs inode pointer */
203 struct page *inode_page; /* its inode page, NULL is possible */
204 struct page *node_page; /* cached direct node page */
205 nid_t nid; /* node id of the direct node block */
206 unsigned int ofs_in_node; /* data offset in the node page */
207 bool inode_page_locked; /* inode page is locked or not */
208 block_t data_blkaddr; /* block address of the node block */
209};
210
211static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
212 struct page *ipage, struct page *npage, nid_t nid)
213{
214 dn->inode = inode;
215 dn->inode_page = ipage;
216 dn->node_page = npage;
217 dn->nid = nid;
218 dn->inode_page_locked = 0;
219}
220
221/*
222 * For SIT manager
223 *
224 * By default, there are 6 active log areas across the whole main area.
225 * When considering hot and cold data separation to reduce cleaning overhead,
226 * we split 3 for data logs and 3 for node logs as hot, warm, and cold types,
227 * respectively.
228 * In the current design, you should not change the numbers intentionally.
229 * Instead, as a mount option such as active_logs=x, you can use 2, 4, and 6
230 * logs individually according to the underlying devices. (default: 6)
231 * Just in case, on-disk layout covers maximum 16 logs that consist of 8 for
232 * data and 8 for node logs.
233 */
234#define NR_CURSEG_DATA_TYPE (3)
235#define NR_CURSEG_NODE_TYPE (3)
236#define NR_CURSEG_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
237
238enum {
239 CURSEG_HOT_DATA = 0, /* directory entry blocks */
240 CURSEG_WARM_DATA, /* data blocks */
241 CURSEG_COLD_DATA, /* multimedia or GCed data blocks */
242 CURSEG_HOT_NODE, /* direct node blocks of directory files */
243 CURSEG_WARM_NODE, /* direct node blocks of normal files */
244 CURSEG_COLD_NODE, /* indirect node blocks */
245 NO_CHECK_TYPE
246};
247
248struct f2fs_sm_info {
249 struct sit_info *sit_info; /* whole segment information */
250 struct free_segmap_info *free_info; /* free segment information */
251 struct dirty_seglist_info *dirty_info; /* dirty segment information */
252 struct curseg_info *curseg_array; /* active segment information */
253
254 struct list_head wblist_head; /* list of under-writeback pages */
255 spinlock_t wblist_lock; /* lock for checkpoint */
256
257 block_t seg0_blkaddr; /* block address of 0'th segment */
258 block_t main_blkaddr; /* start block address of main area */
259 block_t ssa_blkaddr; /* start block address of SSA area */
260
261 unsigned int segment_count; /* total # of segments */
262 unsigned int main_segments; /* # of segments in main area */
263 unsigned int reserved_segments; /* # of reserved segments */
264 unsigned int ovp_segments; /* # of overprovision segments */
265};
266
267/*
268 * For directory operation
269 */
270#define NODE_DIR1_BLOCK (ADDRS_PER_INODE + 1)
271#define NODE_DIR2_BLOCK (ADDRS_PER_INODE + 2)
272#define NODE_IND1_BLOCK (ADDRS_PER_INODE + 3)
273#define NODE_IND2_BLOCK (ADDRS_PER_INODE + 4)
274#define NODE_DIND_BLOCK (ADDRS_PER_INODE + 5)
275
276/*
277 * For superblock
278 */
279/*
280 * COUNT_TYPE for monitoring
281 *
282 * f2fs monitors the number of several block types such as on-writeback,
283 * dirty dentry blocks, dirty node blocks, and dirty meta blocks.
284 */
285enum count_type {
286 F2FS_WRITEBACK,
287 F2FS_DIRTY_DENTS,
288 F2FS_DIRTY_NODES,
289 F2FS_DIRTY_META,
290 NR_COUNT_TYPE,
291};
292
293/*
294 * FS_LOCK nesting subclasses for the lock validator:
295 *
296 * The locking order between these classes is
297 * RENAME -> DENTRY_OPS -> DATA_WRITE -> DATA_NEW
298 * -> DATA_TRUNC -> NODE_WRITE -> NODE_NEW -> NODE_TRUNC
299 */
300enum lock_type {
301 RENAME, /* for renaming operations */
302 DENTRY_OPS, /* for directory operations */
303 DATA_WRITE, /* for data write */
304 DATA_NEW, /* for data allocation */
305 DATA_TRUNC, /* for data truncate */
306 NODE_NEW, /* for node allocation */
307 NODE_TRUNC, /* for node truncate */
308 NODE_WRITE, /* for node write */
309 NR_LOCK_TYPE,
310};
311
312/*
313 * The below are the page types of bios used in submti_bio().
314 * The available types are:
315 * DATA User data pages. It operates as async mode.
316 * NODE Node pages. It operates as async mode.
317 * META FS metadata pages such as SIT, NAT, CP.
318 * NR_PAGE_TYPE The number of page types.
319 * META_FLUSH Make sure the previous pages are written
320 * with waiting the bio's completion
321 * ... Only can be used with META.
322 */
323enum page_type {
324 DATA,
325 NODE,
326 META,
327 NR_PAGE_TYPE,
328 META_FLUSH,
329};
330
331struct f2fs_sb_info {
332 struct super_block *sb; /* pointer to VFS super block */
333 struct buffer_head *raw_super_buf; /* buffer head of raw sb */
334 struct f2fs_super_block *raw_super; /* raw super block pointer */
335 int s_dirty; /* dirty flag for checkpoint */
336
337 /* for node-related operations */
338 struct f2fs_nm_info *nm_info; /* node manager */
339 struct inode *node_inode; /* cache node blocks */
340
341 /* for segment-related operations */
342 struct f2fs_sm_info *sm_info; /* segment manager */
343 struct bio *bio[NR_PAGE_TYPE]; /* bios to merge */
344 sector_t last_block_in_bio[NR_PAGE_TYPE]; /* last block number */
345 struct rw_semaphore bio_sem; /* IO semaphore */
346
347 /* for checkpoint */
348 struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
349 struct inode *meta_inode; /* cache meta blocks */
350 struct mutex cp_mutex; /* for checkpoint procedure */
351 struct mutex fs_lock[NR_LOCK_TYPE]; /* for blocking FS operations */
352 struct mutex write_inode; /* mutex for write inode */
353 struct mutex writepages; /* mutex for writepages() */
354 int por_doing; /* recovery is doing or not */
355
356 /* for orphan inode management */
357 struct list_head orphan_inode_list; /* orphan inode list */
358 struct mutex orphan_inode_mutex; /* for orphan inode list */
359 unsigned int n_orphans; /* # of orphan inodes */
360
361 /* for directory inode management */
362 struct list_head dir_inode_list; /* dir inode list */
363 spinlock_t dir_inode_lock; /* for dir inode list lock */
364 unsigned int n_dirty_dirs; /* # of dir inodes */
365
366 /* basic file system units */
367 unsigned int log_sectors_per_block; /* log2 sectors per block */
368 unsigned int log_blocksize; /* log2 block size */
369 unsigned int blocksize; /* block size */
370 unsigned int root_ino_num; /* root inode number*/
371 unsigned int node_ino_num; /* node inode number*/
372 unsigned int meta_ino_num; /* meta inode number*/
373 unsigned int log_blocks_per_seg; /* log2 blocks per segment */
374 unsigned int blocks_per_seg; /* blocks per segment */
375 unsigned int segs_per_sec; /* segments per section */
376 unsigned int secs_per_zone; /* sections per zone */
377 unsigned int total_sections; /* total section count */
378 unsigned int total_node_count; /* total node block count */
379 unsigned int total_valid_node_count; /* valid node block count */
380 unsigned int total_valid_inode_count; /* valid inode count */
381 int active_logs; /* # of active logs */
382
383 block_t user_block_count; /* # of user blocks */
384 block_t total_valid_block_count; /* # of valid blocks */
385 block_t alloc_valid_block_count; /* # of allocated blocks */
386 block_t last_valid_block_count; /* for recovery */
387 u32 s_next_generation; /* for NFS support */
388 atomic_t nr_pages[NR_COUNT_TYPE]; /* # of pages, see count_type */
389
390 struct f2fs_mount_info mount_opt; /* mount options */
391
392 /* for cleaning operations */
393 struct mutex gc_mutex; /* mutex for GC */
394 struct f2fs_gc_kthread *gc_thread; /* GC thread */
395
396 /*
397 * for stat information.
398 * one is for the LFS mode, and the other is for the SSR mode.
399 */
400 struct f2fs_stat_info *stat_info; /* FS status information */
401 unsigned int segment_count[2]; /* # of allocated segments */
402 unsigned int block_count[2]; /* # of allocated blocks */
403 unsigned int last_victim[2]; /* last victim segment # */
404 int total_hit_ext, read_hit_ext; /* extent cache hit ratio */
405 int bg_gc; /* background gc calls */
406 spinlock_t stat_lock; /* lock for stat operations */
407};
408
409/*
410 * Inline functions
411 */
412static inline struct f2fs_inode_info *F2FS_I(struct inode *inode)
413{
414 return container_of(inode, struct f2fs_inode_info, vfs_inode);
415}
416
417static inline struct f2fs_sb_info *F2FS_SB(struct super_block *sb)
418{
419 return sb->s_fs_info;
420}
421
422static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi)
423{
424 return (struct f2fs_super_block *)(sbi->raw_super);
425}
426
427static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi)
428{
429 return (struct f2fs_checkpoint *)(sbi->ckpt);
430}
431
432static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi)
433{
434 return (struct f2fs_nm_info *)(sbi->nm_info);
435}
436
437static inline struct f2fs_sm_info *SM_I(struct f2fs_sb_info *sbi)
438{
439 return (struct f2fs_sm_info *)(sbi->sm_info);
440}
441
442static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi)
443{
444 return (struct sit_info *)(SM_I(sbi)->sit_info);
445}
446
447static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi)
448{
449 return (struct free_segmap_info *)(SM_I(sbi)->free_info);
450}
451
452static inline struct dirty_seglist_info *DIRTY_I(struct f2fs_sb_info *sbi)
453{
454 return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info);
455}
456
457static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi)
458{
459 sbi->s_dirty = 1;
460}
461
462static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi)
463{
464 sbi->s_dirty = 0;
465}
466
467static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
468{
469 unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
470 return ckpt_flags & f;
471}
472
473static inline void set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
474{
475 unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
476 ckpt_flags |= f;
477 cp->ckpt_flags = cpu_to_le32(ckpt_flags);
478}
479
480static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
481{
482 unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
483 ckpt_flags &= (~f);
484 cp->ckpt_flags = cpu_to_le32(ckpt_flags);
485}
486
487static inline void mutex_lock_op(struct f2fs_sb_info *sbi, enum lock_type t)
488{
489 mutex_lock_nested(&sbi->fs_lock[t], t);
490}
491
492static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, enum lock_type t)
493{
494 mutex_unlock(&sbi->fs_lock[t]);
495}
496
497/*
498 * Check whether the given nid is within node id range.
499 */
500static inline void check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
501{
502 BUG_ON((nid >= NM_I(sbi)->max_nid));
503}
504
505#define F2FS_DEFAULT_ALLOCATED_BLOCKS 1
506
507/*
508 * Check whether the inode has blocks or not
509 */
510static inline int F2FS_HAS_BLOCKS(struct inode *inode)
511{
512 if (F2FS_I(inode)->i_xattr_nid)
513 return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1);
514 else
515 return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS);
516}
517
518static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
519 struct inode *inode, blkcnt_t count)
520{
521 block_t valid_block_count;
522
523 spin_lock(&sbi->stat_lock);
524 valid_block_count =
525 sbi->total_valid_block_count + (block_t)count;
526 if (valid_block_count > sbi->user_block_count) {
527 spin_unlock(&sbi->stat_lock);
528 return false;
529 }
530 inode->i_blocks += count;
531 sbi->total_valid_block_count = valid_block_count;
532 sbi->alloc_valid_block_count += (block_t)count;
533 spin_unlock(&sbi->stat_lock);
534 return true;
535}
536
537static inline int dec_valid_block_count(struct f2fs_sb_info *sbi,
538 struct inode *inode,
539 blkcnt_t count)
540{
541 spin_lock(&sbi->stat_lock);
542 BUG_ON(sbi->total_valid_block_count < (block_t) count);
543 BUG_ON(inode->i_blocks < count);
544 inode->i_blocks -= count;
545 sbi->total_valid_block_count -= (block_t)count;
546 spin_unlock(&sbi->stat_lock);
547 return 0;
548}
549
550static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
551{
552 atomic_inc(&sbi->nr_pages[count_type]);
553 F2FS_SET_SB_DIRT(sbi);
554}
555
556static inline void inode_inc_dirty_dents(struct inode *inode)
557{
558 atomic_inc(&F2FS_I(inode)->dirty_dents);
559}
560
561static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
562{
563 atomic_dec(&sbi->nr_pages[count_type]);
564}
565
566static inline void inode_dec_dirty_dents(struct inode *inode)
567{
568 atomic_dec(&F2FS_I(inode)->dirty_dents);
569}
570
571static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
572{
573 return atomic_read(&sbi->nr_pages[count_type]);
574}
575
576static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi)
577{
578 block_t ret;
579 spin_lock(&sbi->stat_lock);
580 ret = sbi->total_valid_block_count;
581 spin_unlock(&sbi->stat_lock);
582 return ret;
583}
584
585static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
586{
587 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
588
589 /* return NAT or SIT bitmap */
590 if (flag == NAT_BITMAP)
591 return le32_to_cpu(ckpt->nat_ver_bitmap_bytesize);
592 else if (flag == SIT_BITMAP)
593 return le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
594
595 return 0;
596}
597
598static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
599{
600 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
601 int offset = (flag == NAT_BITMAP) ?
602 le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
603 return &ckpt->sit_nat_version_bitmap + offset;
604}
605
606static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
607{
608 block_t start_addr;
609 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
610 unsigned long long ckpt_version = le64_to_cpu(ckpt->checkpoint_ver);
611
612 start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
613
614 /*
615 * odd numbered checkpoint should at cp segment 0
616 * and even segent must be at cp segment 1
617 */
618 if (!(ckpt_version & 1))
619 start_addr += sbi->blocks_per_seg;
620
621 return start_addr;
622}
623
624static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)
625{
626 return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum);
627}
628
629static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi,
630 struct inode *inode,
631 unsigned int count)
632{
633 block_t valid_block_count;
634 unsigned int valid_node_count;
635
636 spin_lock(&sbi->stat_lock);
637
638 valid_block_count = sbi->total_valid_block_count + (block_t)count;
639 sbi->alloc_valid_block_count += (block_t)count;
640 valid_node_count = sbi->total_valid_node_count + count;
641
642 if (valid_block_count > sbi->user_block_count) {
643 spin_unlock(&sbi->stat_lock);
644 return false;
645 }
646
647 if (valid_node_count > sbi->total_node_count) {
648 spin_unlock(&sbi->stat_lock);
649 return false;
650 }
651
652 if (inode)
653 inode->i_blocks += count;
654 sbi->total_valid_node_count = valid_node_count;
655 sbi->total_valid_block_count = valid_block_count;
656 spin_unlock(&sbi->stat_lock);
657
658 return true;
659}
660
661static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
662 struct inode *inode,
663 unsigned int count)
664{
665 spin_lock(&sbi->stat_lock);
666
667 BUG_ON(sbi->total_valid_block_count < count);
668 BUG_ON(sbi->total_valid_node_count < count);
669 BUG_ON(inode->i_blocks < count);
670
671 inode->i_blocks -= count;
672 sbi->total_valid_node_count -= count;
673 sbi->total_valid_block_count -= (block_t)count;
674
675 spin_unlock(&sbi->stat_lock);
676}
677
678static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi)
679{
680 unsigned int ret;
681 spin_lock(&sbi->stat_lock);
682 ret = sbi->total_valid_node_count;
683 spin_unlock(&sbi->stat_lock);
684 return ret;
685}
686
687static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi)
688{
689 spin_lock(&sbi->stat_lock);
690 BUG_ON(sbi->total_valid_inode_count == sbi->total_node_count);
691 sbi->total_valid_inode_count++;
692 spin_unlock(&sbi->stat_lock);
693}
694
695static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi)
696{
697 spin_lock(&sbi->stat_lock);
698 BUG_ON(!sbi->total_valid_inode_count);
699 sbi->total_valid_inode_count--;
700 spin_unlock(&sbi->stat_lock);
701 return 0;
702}
703
704static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi)
705{
706 unsigned int ret;
707 spin_lock(&sbi->stat_lock);
708 ret = sbi->total_valid_inode_count;
709 spin_unlock(&sbi->stat_lock);
710 return ret;
711}
712
713static inline void f2fs_put_page(struct page *page, int unlock)
714{
715 if (!page || IS_ERR(page))
716 return;
717
718 if (unlock) {
719 BUG_ON(!PageLocked(page));
720 unlock_page(page);
721 }
722 page_cache_release(page);
723}
724
725static inline void f2fs_put_dnode(struct dnode_of_data *dn)
726{
727 if (dn->node_page)
728 f2fs_put_page(dn->node_page, 1);
729 if (dn->inode_page && dn->node_page != dn->inode_page)
730 f2fs_put_page(dn->inode_page, 0);
731 dn->node_page = NULL;
732 dn->inode_page = NULL;
733}
734
735static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name,
736 size_t size, void (*ctor)(void *))
737{
738 return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor);
739}
740
741#define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino)
742
743static inline bool IS_INODE(struct page *page)
744{
745 struct f2fs_node *p = (struct f2fs_node *)page_address(page);
746 return RAW_IS_INODE(p);
747}
748
749static inline __le32 *blkaddr_in_node(struct f2fs_node *node)
750{
751 return RAW_IS_INODE(node) ? node->i.i_addr : node->dn.addr;
752}
753
754static inline block_t datablock_addr(struct page *node_page,
755 unsigned int offset)
756{
757 struct f2fs_node *raw_node;
758 __le32 *addr_array;
759 raw_node = (struct f2fs_node *)page_address(node_page);
760 addr_array = blkaddr_in_node(raw_node);
761 return le32_to_cpu(addr_array[offset]);
762}
763
764static inline int f2fs_test_bit(unsigned int nr, char *addr)
765{
766 int mask;
767
768 addr += (nr >> 3);
769 mask = 1 << (7 - (nr & 0x07));
770 return mask & *addr;
771}
772
773static inline int f2fs_set_bit(unsigned int nr, char *addr)
774{
775 int mask;
776 int ret;
777
778 addr += (nr >> 3);
779 mask = 1 << (7 - (nr & 0x07));
780 ret = mask & *addr;
781 *addr |= mask;
782 return ret;
783}
784
785static inline int f2fs_clear_bit(unsigned int nr, char *addr)
786{
787 int mask;
788 int ret;
789
790 addr += (nr >> 3);
791 mask = 1 << (7 - (nr & 0x07));
792 ret = mask & *addr;
793 *addr &= ~mask;
794 return ret;
795}
796
797/* used for f2fs_inode_info->flags */
798enum {
799 FI_NEW_INODE, /* indicate newly allocated inode */
800 FI_NEED_CP, /* need to do checkpoint during fsync */
801 FI_INC_LINK, /* need to increment i_nlink */
802 FI_ACL_MODE, /* indicate acl mode */
803 FI_NO_ALLOC, /* should not allocate any blocks */
804};
805
806static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
807{
808 set_bit(flag, &fi->flags);
809}
810
811static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)
812{
813 return test_bit(flag, &fi->flags);
814}
815
816static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag)
817{
818 clear_bit(flag, &fi->flags);
819}
820
821static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode)
822{
823 fi->i_acl_mode = mode;
824 set_inode_flag(fi, FI_ACL_MODE);
825}
826
827static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag)
828{
829 if (is_inode_flag_set(fi, FI_ACL_MODE)) {
830 clear_inode_flag(fi, FI_ACL_MODE);
831 return 1;
832 }
833 return 0;
834}
835
836/*
837 * file.c
838 */
839int f2fs_sync_file(struct file *, loff_t, loff_t, int);
840void truncate_data_blocks(struct dnode_of_data *);
841void f2fs_truncate(struct inode *);
842int f2fs_setattr(struct dentry *, struct iattr *);
843int truncate_hole(struct inode *, pgoff_t, pgoff_t);
844long f2fs_ioctl(struct file *, unsigned int, unsigned long);
845
846/*
847 * inode.c
848 */
849void f2fs_set_inode_flags(struct inode *);
850struct inode *f2fs_iget_nowait(struct super_block *, unsigned long);
851struct inode *f2fs_iget(struct super_block *, unsigned long);
852void update_inode(struct inode *, struct page *);
853int f2fs_write_inode(struct inode *, struct writeback_control *);
854void f2fs_evict_inode(struct inode *);
855
856/*
857 * namei.c
858 */
859struct dentry *f2fs_get_parent(struct dentry *child);
860
861/*
862 * dir.c
863 */
864struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *,
865 struct page **);
866struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **);
867ino_t f2fs_inode_by_name(struct inode *, struct qstr *);
868void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
869 struct page *, struct inode *);
870void init_dent_inode(struct dentry *, struct page *);
871int f2fs_add_link(struct dentry *, struct inode *);
872void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *);
873int f2fs_make_empty(struct inode *, struct inode *);
874bool f2fs_empty_dir(struct inode *);
875
876/*
877 * super.c
878 */
879int f2fs_sync_fs(struct super_block *, int);
880
881/*
882 * hash.c
883 */
884f2fs_hash_t f2fs_dentry_hash(const char *, int);
885
886/*
887 * node.c
888 */
889struct dnode_of_data;
890struct node_info;
891
892int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
893void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
894int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
895int truncate_inode_blocks(struct inode *, pgoff_t);
896int remove_inode_page(struct inode *);
897int new_inode_page(struct inode *, struct dentry *);
898struct page *new_node_page(struct dnode_of_data *, unsigned int);
899void ra_node_page(struct f2fs_sb_info *, nid_t);
900struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
901struct page *get_node_page_ra(struct page *, int);
902void sync_inode_page(struct dnode_of_data *);
903int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *);
904bool alloc_nid(struct f2fs_sb_info *, nid_t *);
905void alloc_nid_done(struct f2fs_sb_info *, nid_t);
906void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
907void recover_node_page(struct f2fs_sb_info *, struct page *,
908 struct f2fs_summary *, struct node_info *, block_t);
909int recover_inode_page(struct f2fs_sb_info *, struct page *);
910int restore_node_summary(struct f2fs_sb_info *, unsigned int,
911 struct f2fs_summary_block *);
912void flush_nat_entries(struct f2fs_sb_info *);
913int build_node_manager(struct f2fs_sb_info *);
914void destroy_node_manager(struct f2fs_sb_info *);
915int create_node_manager_caches(void);
916void destroy_node_manager_caches(void);
917
918/*
919 * segment.c
920 */
921void f2fs_balance_fs(struct f2fs_sb_info *);
922void invalidate_blocks(struct f2fs_sb_info *, block_t);
923void locate_dirty_segment(struct f2fs_sb_info *, unsigned int);
924void clear_prefree_segments(struct f2fs_sb_info *);
925int npages_for_summary_flush(struct f2fs_sb_info *);
926void allocate_new_segments(struct f2fs_sb_info *);
927struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
928struct bio *f2fs_bio_alloc(struct block_device *, int);
929void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool sync);
930int write_meta_page(struct f2fs_sb_info *, struct page *,
931 struct writeback_control *);
932void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int,
933 block_t, block_t *);
934void write_data_page(struct inode *, struct page *, struct dnode_of_data*,
935 block_t, block_t *);
936void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t);
937void recover_data_page(struct f2fs_sb_info *, struct page *,
938 struct f2fs_summary *, block_t, block_t);
939void rewrite_node_page(struct f2fs_sb_info *, struct page *,
940 struct f2fs_summary *, block_t, block_t);
941void write_data_summaries(struct f2fs_sb_info *, block_t);
942void write_node_summaries(struct f2fs_sb_info *, block_t);
943int lookup_journal_in_cursum(struct f2fs_summary_block *,
944 int, unsigned int, int);
945void flush_sit_entries(struct f2fs_sb_info *);
946int build_segment_manager(struct f2fs_sb_info *);
947void reset_victim_segmap(struct f2fs_sb_info *);
948void destroy_segment_manager(struct f2fs_sb_info *);
949
950/*
951 * checkpoint.c
952 */
953struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
954struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
955long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
956int check_orphan_space(struct f2fs_sb_info *);
957void add_orphan_inode(struct f2fs_sb_info *, nid_t);
958void remove_orphan_inode(struct f2fs_sb_info *, nid_t);
959int recover_orphan_inodes(struct f2fs_sb_info *);
960int get_valid_checkpoint(struct f2fs_sb_info *);
961void set_dirty_dir_page(struct inode *, struct page *);
962void remove_dirty_dir_inode(struct inode *);
963void sync_dirty_dir_inodes(struct f2fs_sb_info *);
964void block_operations(struct f2fs_sb_info *);
965void write_checkpoint(struct f2fs_sb_info *, bool, bool);
966void init_orphan_info(struct f2fs_sb_info *);
967int create_checkpoint_caches(void);
968void destroy_checkpoint_caches(void);
969
970/*
971 * data.c
972 */
973int reserve_new_block(struct dnode_of_data *);
974void update_extent_cache(block_t, struct dnode_of_data *);
975struct page *find_data_page(struct inode *, pgoff_t);
976struct page *get_lock_data_page(struct inode *, pgoff_t);
977struct page *get_new_data_page(struct inode *, pgoff_t, bool);
978int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int);
979int do_write_data_page(struct page *);
980
981/*
982 * gc.c
983 */
984int start_gc_thread(struct f2fs_sb_info *);
985void stop_gc_thread(struct f2fs_sb_info *);
986block_t start_bidx_of_node(unsigned int);
987int f2fs_gc(struct f2fs_sb_info *, int);
988void build_gc_manager(struct f2fs_sb_info *);
989int create_gc_caches(void);
990void destroy_gc_caches(void);
991
992/*
993 * recovery.c
994 */
995void recover_fsync_data(struct f2fs_sb_info *);
996bool space_for_roll_forward(struct f2fs_sb_info *);
997
998/*
999 * debug.c
1000 */
1001#ifdef CONFIG_F2FS_STAT_FS
1002struct f2fs_stat_info {
1003 struct list_head stat_list;
1004 struct f2fs_sb_info *sbi;
1005 struct mutex stat_lock;
1006 int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
1007 int main_area_segs, main_area_sections, main_area_zones;
1008 int hit_ext, total_ext;
1009 int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
1010 int nats, sits, fnids;
1011 int total_count, utilization;
1012 int bg_gc;
1013 unsigned int valid_count, valid_node_count, valid_inode_count;
1014 unsigned int bimodal, avg_vblocks;
1015 int util_free, util_valid, util_invalid;
1016 int rsvd_segs, overp_segs;
1017 int dirty_count, node_pages, meta_pages;
1018 int prefree_count, call_count;
1019 int tot_segs, node_segs, data_segs, free_segs, free_secs;
1020 int tot_blks, data_blks, node_blks;
1021 int curseg[NR_CURSEG_TYPE];
1022 int cursec[NR_CURSEG_TYPE];
1023 int curzone[NR_CURSEG_TYPE];
1024
1025 unsigned int segment_count[2];
1026 unsigned int block_count[2];
1027 unsigned base_mem, cache_mem;
1028};
1029
1030#define stat_inc_call_count(si) ((si)->call_count++)
1031
1032#define stat_inc_seg_count(sbi, type) \
1033 do { \
1034 struct f2fs_stat_info *si = sbi->stat_info; \
1035 (si)->tot_segs++; \
1036 if (type == SUM_TYPE_DATA) \
1037 si->data_segs++; \
1038 else \
1039 si->node_segs++; \
1040 } while (0)
1041
1042#define stat_inc_tot_blk_count(si, blks) \
1043 (si->tot_blks += (blks))
1044
1045#define stat_inc_data_blk_count(sbi, blks) \
1046 do { \
1047 struct f2fs_stat_info *si = sbi->stat_info; \
1048 stat_inc_tot_blk_count(si, blks); \
1049 si->data_blks += (blks); \
1050 } while (0)
1051
1052#define stat_inc_node_blk_count(sbi, blks) \
1053 do { \
1054 struct f2fs_stat_info *si = sbi->stat_info; \
1055 stat_inc_tot_blk_count(si, blks); \
1056 si->node_blks += (blks); \
1057 } while (0)
1058
1059int f2fs_build_stats(struct f2fs_sb_info *);
1060void f2fs_destroy_stats(struct f2fs_sb_info *);
1061void destroy_root_stats(void);
1062#else
1063#define stat_inc_call_count(si)
1064#define stat_inc_seg_count(si, type)
1065#define stat_inc_tot_blk_count(si, blks)
1066#define stat_inc_data_blk_count(si, blks)
1067#define stat_inc_node_blk_count(sbi, blks)
1068
1069static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
1070static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
1071static inline void destroy_root_stats(void) { }
1072#endif
1073
1074extern const struct file_operations f2fs_dir_operations;
1075extern const struct file_operations f2fs_file_operations;
1076extern const struct inode_operations f2fs_file_inode_operations;
1077extern const struct address_space_operations f2fs_dblock_aops;
1078extern const struct address_space_operations f2fs_node_aops;
1079extern const struct address_space_operations f2fs_meta_aops;
1080extern const struct inode_operations f2fs_dir_inode_operations;
1081extern const struct inode_operations f2fs_symlink_inode_operations;
1082extern const struct inode_operations f2fs_special_inode_operations;
1083#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
new file mode 100644
index 000000000000..f9e085dfb1f0
--- /dev/null
+++ b/fs/f2fs/file.c
@@ -0,0 +1,636 @@
1/*
2 * fs/f2fs/file.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/stat.h>
14#include <linux/buffer_head.h>
15#include <linux/writeback.h>
16#include <linux/falloc.h>
17#include <linux/types.h>
18#include <linux/uaccess.h>
19#include <linux/mount.h>
20
21#include "f2fs.h"
22#include "node.h"
23#include "segment.h"
24#include "xattr.h"
25#include "acl.h"
26
27static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
28 struct vm_fault *vmf)
29{
30 struct page *page = vmf->page;
31 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
32 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
33 block_t old_blk_addr;
34 struct dnode_of_data dn;
35 int err;
36
37 f2fs_balance_fs(sbi);
38
39 sb_start_pagefault(inode->i_sb);
40
41 mutex_lock_op(sbi, DATA_NEW);
42
43 /* block allocation */
44 set_new_dnode(&dn, inode, NULL, NULL, 0);
45 err = get_dnode_of_data(&dn, page->index, 0);
46 if (err) {
47 mutex_unlock_op(sbi, DATA_NEW);
48 goto out;
49 }
50
51 old_blk_addr = dn.data_blkaddr;
52
53 if (old_blk_addr == NULL_ADDR) {
54 err = reserve_new_block(&dn);
55 if (err) {
56 f2fs_put_dnode(&dn);
57 mutex_unlock_op(sbi, DATA_NEW);
58 goto out;
59 }
60 }
61 f2fs_put_dnode(&dn);
62
63 mutex_unlock_op(sbi, DATA_NEW);
64
65 lock_page(page);
66 if (page->mapping != inode->i_mapping ||
67 page_offset(page) >= i_size_read(inode) ||
68 !PageUptodate(page)) {
69 unlock_page(page);
70 err = -EFAULT;
71 goto out;
72 }
73
74 /*
75 * check to see if the page is mapped already (no holes)
76 */
77 if (PageMappedToDisk(page))
78 goto out;
79
80 /* fill the page */
81 wait_on_page_writeback(page);
82
83 /* page is wholly or partially inside EOF */
84 if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) {
85 unsigned offset;
86 offset = i_size_read(inode) & ~PAGE_CACHE_MASK;
87 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
88 }
89 set_page_dirty(page);
90 SetPageUptodate(page);
91
92 file_update_time(vma->vm_file);
93out:
94 sb_end_pagefault(inode->i_sb);
95 return block_page_mkwrite_return(err);
96}
97
98static const struct vm_operations_struct f2fs_file_vm_ops = {
99 .fault = filemap_fault,
100 .page_mkwrite = f2fs_vm_page_mkwrite,
101};
102
103static int need_to_sync_dir(struct f2fs_sb_info *sbi, struct inode *inode)
104{
105 struct dentry *dentry;
106 nid_t pino;
107
108 inode = igrab(inode);
109 dentry = d_find_any_alias(inode);
110 if (!dentry) {
111 iput(inode);
112 return 0;
113 }
114 pino = dentry->d_parent->d_inode->i_ino;
115 dput(dentry);
116 iput(inode);
117 return !is_checkpointed_node(sbi, pino);
118}
119
120int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
121{
122 struct inode *inode = file->f_mapping->host;
123 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
124 unsigned long long cur_version;
125 int ret = 0;
126 bool need_cp = false;
127 struct writeback_control wbc = {
128 .sync_mode = WB_SYNC_ALL,
129 .nr_to_write = LONG_MAX,
130 .for_reclaim = 0,
131 };
132
133 if (inode->i_sb->s_flags & MS_RDONLY)
134 return 0;
135
136 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
137 if (ret)
138 return ret;
139
140 mutex_lock(&inode->i_mutex);
141
142 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
143 goto out;
144
145 mutex_lock(&sbi->cp_mutex);
146 cur_version = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver);
147 mutex_unlock(&sbi->cp_mutex);
148
149 if (F2FS_I(inode)->data_version != cur_version &&
150 !(inode->i_state & I_DIRTY))
151 goto out;
152 F2FS_I(inode)->data_version--;
153
154 if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
155 need_cp = true;
156 if (is_inode_flag_set(F2FS_I(inode), FI_NEED_CP))
157 need_cp = true;
158 if (!space_for_roll_forward(sbi))
159 need_cp = true;
160 if (need_to_sync_dir(sbi, inode))
161 need_cp = true;
162
163 f2fs_write_inode(inode, NULL);
164
165 if (need_cp) {
166 /* all the dirty node pages should be flushed for POR */
167 ret = f2fs_sync_fs(inode->i_sb, 1);
168 clear_inode_flag(F2FS_I(inode), FI_NEED_CP);
169 } else {
170 while (sync_node_pages(sbi, inode->i_ino, &wbc) == 0)
171 f2fs_write_inode(inode, NULL);
172 filemap_fdatawait_range(sbi->node_inode->i_mapping,
173 0, LONG_MAX);
174 }
175out:
176 mutex_unlock(&inode->i_mutex);
177 return ret;
178}
179
180static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
181{
182 file_accessed(file);
183 vma->vm_ops = &f2fs_file_vm_ops;
184 return 0;
185}
186
187static int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
188{
189 int nr_free = 0, ofs = dn->ofs_in_node;
190 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
191 struct f2fs_node *raw_node;
192 __le32 *addr;
193
194 raw_node = page_address(dn->node_page);
195 addr = blkaddr_in_node(raw_node) + ofs;
196
197 for ( ; count > 0; count--, addr++, dn->ofs_in_node++) {
198 block_t blkaddr = le32_to_cpu(*addr);
199 if (blkaddr == NULL_ADDR)
200 continue;
201
202 update_extent_cache(NULL_ADDR, dn);
203 invalidate_blocks(sbi, blkaddr);
204 dec_valid_block_count(sbi, dn->inode, 1);
205 nr_free++;
206 }
207 if (nr_free) {
208 set_page_dirty(dn->node_page);
209 sync_inode_page(dn);
210 }
211 dn->ofs_in_node = ofs;
212 return nr_free;
213}
214
215void truncate_data_blocks(struct dnode_of_data *dn)
216{
217 truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
218}
219
220static void truncate_partial_data_page(struct inode *inode, u64 from)
221{
222 unsigned offset = from & (PAGE_CACHE_SIZE - 1);
223 struct page *page;
224
225 if (!offset)
226 return;
227
228 page = find_data_page(inode, from >> PAGE_CACHE_SHIFT);
229 if (IS_ERR(page))
230 return;
231
232 lock_page(page);
233 wait_on_page_writeback(page);
234 zero_user(page, offset, PAGE_CACHE_SIZE - offset);
235 set_page_dirty(page);
236 f2fs_put_page(page, 1);
237}
238
239static int truncate_blocks(struct inode *inode, u64 from)
240{
241 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
242 unsigned int blocksize = inode->i_sb->s_blocksize;
243 struct dnode_of_data dn;
244 pgoff_t free_from;
245 int count = 0;
246 int err;
247
248 free_from = (pgoff_t)
249 ((from + blocksize - 1) >> (sbi->log_blocksize));
250
251 mutex_lock_op(sbi, DATA_TRUNC);
252
253 set_new_dnode(&dn, inode, NULL, NULL, 0);
254 err = get_dnode_of_data(&dn, free_from, RDONLY_NODE);
255 if (err) {
256 if (err == -ENOENT)
257 goto free_next;
258 mutex_unlock_op(sbi, DATA_TRUNC);
259 return err;
260 }
261
262 if (IS_INODE(dn.node_page))
263 count = ADDRS_PER_INODE;
264 else
265 count = ADDRS_PER_BLOCK;
266
267 count -= dn.ofs_in_node;
268 BUG_ON(count < 0);
269 if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
270 truncate_data_blocks_range(&dn, count);
271 free_from += count;
272 }
273
274 f2fs_put_dnode(&dn);
275free_next:
276 err = truncate_inode_blocks(inode, free_from);
277 mutex_unlock_op(sbi, DATA_TRUNC);
278
279 /* lastly zero out the first data page */
280 truncate_partial_data_page(inode, from);
281
282 return err;
283}
284
285void f2fs_truncate(struct inode *inode)
286{
287 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
288 S_ISLNK(inode->i_mode)))
289 return;
290
291 if (!truncate_blocks(inode, i_size_read(inode))) {
292 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
293 mark_inode_dirty(inode);
294 }
295
296 f2fs_balance_fs(F2FS_SB(inode->i_sb));
297}
298
299static int f2fs_getattr(struct vfsmount *mnt,
300 struct dentry *dentry, struct kstat *stat)
301{
302 struct inode *inode = dentry->d_inode;
303 generic_fillattr(inode, stat);
304 stat->blocks <<= 3;
305 return 0;
306}
307
308#ifdef CONFIG_F2FS_FS_POSIX_ACL
309static void __setattr_copy(struct inode *inode, const struct iattr *attr)
310{
311 struct f2fs_inode_info *fi = F2FS_I(inode);
312 unsigned int ia_valid = attr->ia_valid;
313
314 if (ia_valid & ATTR_UID)
315 inode->i_uid = attr->ia_uid;
316 if (ia_valid & ATTR_GID)
317 inode->i_gid = attr->ia_gid;
318 if (ia_valid & ATTR_ATIME)
319 inode->i_atime = timespec_trunc(attr->ia_atime,
320 inode->i_sb->s_time_gran);
321 if (ia_valid & ATTR_MTIME)
322 inode->i_mtime = timespec_trunc(attr->ia_mtime,
323 inode->i_sb->s_time_gran);
324 if (ia_valid & ATTR_CTIME)
325 inode->i_ctime = timespec_trunc(attr->ia_ctime,
326 inode->i_sb->s_time_gran);
327 if (ia_valid & ATTR_MODE) {
328 umode_t mode = attr->ia_mode;
329
330 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
331 mode &= ~S_ISGID;
332 set_acl_inode(fi, mode);
333 }
334}
335#else
336#define __setattr_copy setattr_copy
337#endif
338
339int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
340{
341 struct inode *inode = dentry->d_inode;
342 struct f2fs_inode_info *fi = F2FS_I(inode);
343 int err;
344
345 err = inode_change_ok(inode, attr);
346 if (err)
347 return err;
348
349 if ((attr->ia_valid & ATTR_SIZE) &&
350 attr->ia_size != i_size_read(inode)) {
351 truncate_setsize(inode, attr->ia_size);
352 f2fs_truncate(inode);
353 }
354
355 __setattr_copy(inode, attr);
356
357 if (attr->ia_valid & ATTR_MODE) {
358 err = f2fs_acl_chmod(inode);
359 if (err || is_inode_flag_set(fi, FI_ACL_MODE)) {
360 inode->i_mode = fi->i_acl_mode;
361 clear_inode_flag(fi, FI_ACL_MODE);
362 }
363 }
364
365 mark_inode_dirty(inode);
366 return err;
367}
368
369const struct inode_operations f2fs_file_inode_operations = {
370 .getattr = f2fs_getattr,
371 .setattr = f2fs_setattr,
372 .get_acl = f2fs_get_acl,
373#ifdef CONFIG_F2FS_FS_XATTR
374 .setxattr = generic_setxattr,
375 .getxattr = generic_getxattr,
376 .listxattr = f2fs_listxattr,
377 .removexattr = generic_removexattr,
378#endif
379};
380
381static void fill_zero(struct inode *inode, pgoff_t index,
382 loff_t start, loff_t len)
383{
384 struct page *page;
385
386 if (!len)
387 return;
388
389 page = get_new_data_page(inode, index, false);
390
391 if (!IS_ERR(page)) {
392 wait_on_page_writeback(page);
393 zero_user(page, start, len);
394 set_page_dirty(page);
395 f2fs_put_page(page, 1);
396 }
397}
398
399int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
400{
401 pgoff_t index;
402 int err;
403
404 for (index = pg_start; index < pg_end; index++) {
405 struct dnode_of_data dn;
406 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
407
408 mutex_lock_op(sbi, DATA_TRUNC);
409 set_new_dnode(&dn, inode, NULL, NULL, 0);
410 err = get_dnode_of_data(&dn, index, RDONLY_NODE);
411 if (err) {
412 mutex_unlock_op(sbi, DATA_TRUNC);
413 if (err == -ENOENT)
414 continue;
415 return err;
416 }
417
418 if (dn.data_blkaddr != NULL_ADDR)
419 truncate_data_blocks_range(&dn, 1);
420 f2fs_put_dnode(&dn);
421 mutex_unlock_op(sbi, DATA_TRUNC);
422 }
423 return 0;
424}
425
426static int punch_hole(struct inode *inode, loff_t offset, loff_t len, int mode)
427{
428 pgoff_t pg_start, pg_end;
429 loff_t off_start, off_end;
430 int ret = 0;
431
432 pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
433 pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
434
435 off_start = offset & (PAGE_CACHE_SIZE - 1);
436 off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
437
438 if (pg_start == pg_end) {
439 fill_zero(inode, pg_start, off_start,
440 off_end - off_start);
441 } else {
442 if (off_start)
443 fill_zero(inode, pg_start++, off_start,
444 PAGE_CACHE_SIZE - off_start);
445 if (off_end)
446 fill_zero(inode, pg_end, 0, off_end);
447
448 if (pg_start < pg_end) {
449 struct address_space *mapping = inode->i_mapping;
450 loff_t blk_start, blk_end;
451
452 blk_start = pg_start << PAGE_CACHE_SHIFT;
453 blk_end = pg_end << PAGE_CACHE_SHIFT;
454 truncate_inode_pages_range(mapping, blk_start,
455 blk_end - 1);
456 ret = truncate_hole(inode, pg_start, pg_end);
457 }
458 }
459
460 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
461 i_size_read(inode) <= (offset + len)) {
462 i_size_write(inode, offset);
463 mark_inode_dirty(inode);
464 }
465
466 return ret;
467}
468
469static int expand_inode_data(struct inode *inode, loff_t offset,
470 loff_t len, int mode)
471{
472 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
473 pgoff_t index, pg_start, pg_end;
474 loff_t new_size = i_size_read(inode);
475 loff_t off_start, off_end;
476 int ret = 0;
477
478 ret = inode_newsize_ok(inode, (len + offset));
479 if (ret)
480 return ret;
481
482 pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
483 pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
484
485 off_start = offset & (PAGE_CACHE_SIZE - 1);
486 off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
487
488 for (index = pg_start; index <= pg_end; index++) {
489 struct dnode_of_data dn;
490
491 mutex_lock_op(sbi, DATA_NEW);
492
493 set_new_dnode(&dn, inode, NULL, NULL, 0);
494 ret = get_dnode_of_data(&dn, index, 0);
495 if (ret) {
496 mutex_unlock_op(sbi, DATA_NEW);
497 break;
498 }
499
500 if (dn.data_blkaddr == NULL_ADDR) {
501 ret = reserve_new_block(&dn);
502 if (ret) {
503 f2fs_put_dnode(&dn);
504 mutex_unlock_op(sbi, DATA_NEW);
505 break;
506 }
507 }
508 f2fs_put_dnode(&dn);
509
510 mutex_unlock_op(sbi, DATA_NEW);
511
512 if (pg_start == pg_end)
513 new_size = offset + len;
514 else if (index == pg_start && off_start)
515 new_size = (index + 1) << PAGE_CACHE_SHIFT;
516 else if (index == pg_end)
517 new_size = (index << PAGE_CACHE_SHIFT) + off_end;
518 else
519 new_size += PAGE_CACHE_SIZE;
520 }
521
522 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
523 i_size_read(inode) < new_size) {
524 i_size_write(inode, new_size);
525 mark_inode_dirty(inode);
526 }
527
528 return ret;
529}
530
531static long f2fs_fallocate(struct file *file, int mode,
532 loff_t offset, loff_t len)
533{
534 struct inode *inode = file->f_path.dentry->d_inode;
535 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
536 long ret;
537
538 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
539 return -EOPNOTSUPP;
540
541 if (mode & FALLOC_FL_PUNCH_HOLE)
542 ret = punch_hole(inode, offset, len, mode);
543 else
544 ret = expand_inode_data(inode, offset, len, mode);
545
546 f2fs_balance_fs(sbi);
547 return ret;
548}
549
550#define F2FS_REG_FLMASK (~(FS_DIRSYNC_FL | FS_TOPDIR_FL))
551#define F2FS_OTHER_FLMASK (FS_NODUMP_FL | FS_NOATIME_FL)
552
553static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags)
554{
555 if (S_ISDIR(mode))
556 return flags;
557 else if (S_ISREG(mode))
558 return flags & F2FS_REG_FLMASK;
559 else
560 return flags & F2FS_OTHER_FLMASK;
561}
562
563long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
564{
565 struct inode *inode = filp->f_dentry->d_inode;
566 struct f2fs_inode_info *fi = F2FS_I(inode);
567 unsigned int flags;
568 int ret;
569
570 switch (cmd) {
571 case FS_IOC_GETFLAGS:
572 flags = fi->i_flags & FS_FL_USER_VISIBLE;
573 return put_user(flags, (int __user *) arg);
574 case FS_IOC_SETFLAGS:
575 {
576 unsigned int oldflags;
577
578 ret = mnt_want_write(filp->f_path.mnt);
579 if (ret)
580 return ret;
581
582 if (!inode_owner_or_capable(inode)) {
583 ret = -EACCES;
584 goto out;
585 }
586
587 if (get_user(flags, (int __user *) arg)) {
588 ret = -EFAULT;
589 goto out;
590 }
591
592 flags = f2fs_mask_flags(inode->i_mode, flags);
593
594 mutex_lock(&inode->i_mutex);
595
596 oldflags = fi->i_flags;
597
598 if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
599 if (!capable(CAP_LINUX_IMMUTABLE)) {
600 mutex_unlock(&inode->i_mutex);
601 ret = -EPERM;
602 goto out;
603 }
604 }
605
606 flags = flags & FS_FL_USER_MODIFIABLE;
607 flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
608 fi->i_flags = flags;
609 mutex_unlock(&inode->i_mutex);
610
611 f2fs_set_inode_flags(inode);
612 inode->i_ctime = CURRENT_TIME;
613 mark_inode_dirty(inode);
614out:
615 mnt_drop_write(filp->f_path.mnt);
616 return ret;
617 }
618 default:
619 return -ENOTTY;
620 }
621}
622
623const struct file_operations f2fs_file_operations = {
624 .llseek = generic_file_llseek,
625 .read = do_sync_read,
626 .write = do_sync_write,
627 .aio_read = generic_file_aio_read,
628 .aio_write = generic_file_aio_write,
629 .open = generic_file_open,
630 .mmap = f2fs_file_mmap,
631 .fsync = f2fs_sync_file,
632 .fallocate = f2fs_fallocate,
633 .unlocked_ioctl = f2fs_ioctl,
634 .splice_read = generic_file_splice_read,
635 .splice_write = generic_file_splice_write,
636};
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
new file mode 100644
index 000000000000..644aa3808273
--- /dev/null
+++ b/fs/f2fs/gc.c
@@ -0,0 +1,742 @@
1/*
2 * fs/f2fs/gc.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/module.h>
13#include <linux/backing-dev.h>
14#include <linux/proc_fs.h>
15#include <linux/init.h>
16#include <linux/f2fs_fs.h>
17#include <linux/kthread.h>
18#include <linux/delay.h>
19#include <linux/freezer.h>
20#include <linux/blkdev.h>
21
22#include "f2fs.h"
23#include "node.h"
24#include "segment.h"
25#include "gc.h"
26
27static struct kmem_cache *winode_slab;
28
29static int gc_thread_func(void *data)
30{
31 struct f2fs_sb_info *sbi = data;
32 wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
33 long wait_ms;
34
35 wait_ms = GC_THREAD_MIN_SLEEP_TIME;
36
37 do {
38 if (try_to_freeze())
39 continue;
40 else
41 wait_event_interruptible_timeout(*wq,
42 kthread_should_stop(),
43 msecs_to_jiffies(wait_ms));
44 if (kthread_should_stop())
45 break;
46
47 f2fs_balance_fs(sbi);
48
49 if (!test_opt(sbi, BG_GC))
50 continue;
51
52 /*
53 * [GC triggering condition]
54 * 0. GC is not conducted currently.
55 * 1. There are enough dirty segments.
56 * 2. IO subsystem is idle by checking the # of writeback pages.
57 * 3. IO subsystem is idle by checking the # of requests in
58 * bdev's request list.
59 *
60 * Note) We have to avoid triggering GCs too much frequently.
61 * Because it is possible that some segments can be
62 * invalidated soon after by user update or deletion.
63 * So, I'd like to wait some time to collect dirty segments.
64 */
65 if (!mutex_trylock(&sbi->gc_mutex))
66 continue;
67
68 if (!is_idle(sbi)) {
69 wait_ms = increase_sleep_time(wait_ms);
70 mutex_unlock(&sbi->gc_mutex);
71 continue;
72 }
73
74 if (has_enough_invalid_blocks(sbi))
75 wait_ms = decrease_sleep_time(wait_ms);
76 else
77 wait_ms = increase_sleep_time(wait_ms);
78
79 sbi->bg_gc++;
80
81 if (f2fs_gc(sbi, 1) == GC_NONE)
82 wait_ms = GC_THREAD_NOGC_SLEEP_TIME;
83 else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME)
84 wait_ms = GC_THREAD_MAX_SLEEP_TIME;
85
86 } while (!kthread_should_stop());
87 return 0;
88}
89
90int start_gc_thread(struct f2fs_sb_info *sbi)
91{
92 struct f2fs_gc_kthread *gc_th;
93
94 gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
95 if (!gc_th)
96 return -ENOMEM;
97
98 sbi->gc_thread = gc_th;
99 init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
100 sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
101 GC_THREAD_NAME);
102 if (IS_ERR(gc_th->f2fs_gc_task)) {
103 kfree(gc_th);
104 return -ENOMEM;
105 }
106 return 0;
107}
108
109void stop_gc_thread(struct f2fs_sb_info *sbi)
110{
111 struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
112 if (!gc_th)
113 return;
114 kthread_stop(gc_th->f2fs_gc_task);
115 kfree(gc_th);
116 sbi->gc_thread = NULL;
117}
118
119static int select_gc_type(int gc_type)
120{
121 return (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
122}
123
124static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
125 int type, struct victim_sel_policy *p)
126{
127 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
128
129 if (p->alloc_mode) {
130 p->gc_mode = GC_GREEDY;
131 p->dirty_segmap = dirty_i->dirty_segmap[type];
132 p->ofs_unit = 1;
133 } else {
134 p->gc_mode = select_gc_type(gc_type);
135 p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
136 p->ofs_unit = sbi->segs_per_sec;
137 }
138 p->offset = sbi->last_victim[p->gc_mode];
139}
140
141static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
142 struct victim_sel_policy *p)
143{
144 if (p->gc_mode == GC_GREEDY)
145 return (1 << sbi->log_blocks_per_seg) * p->ofs_unit;
146 else if (p->gc_mode == GC_CB)
147 return UINT_MAX;
148 else /* No other gc_mode */
149 return 0;
150}
151
152static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
153{
154 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
155 unsigned int segno;
156
157 /*
158 * If the gc_type is FG_GC, we can select victim segments
159 * selected by background GC before.
160 * Those segments guarantee they have small valid blocks.
161 */
162 segno = find_next_bit(dirty_i->victim_segmap[BG_GC],
163 TOTAL_SEGS(sbi), 0);
164 if (segno < TOTAL_SEGS(sbi)) {
165 clear_bit(segno, dirty_i->victim_segmap[BG_GC]);
166 return segno;
167 }
168 return NULL_SEGNO;
169}
170
171static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
172{
173 struct sit_info *sit_i = SIT_I(sbi);
174 unsigned int secno = GET_SECNO(sbi, segno);
175 unsigned int start = secno * sbi->segs_per_sec;
176 unsigned long long mtime = 0;
177 unsigned int vblocks;
178 unsigned char age = 0;
179 unsigned char u;
180 unsigned int i;
181
182 for (i = 0; i < sbi->segs_per_sec; i++)
183 mtime += get_seg_entry(sbi, start + i)->mtime;
184 vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
185
186 mtime = div_u64(mtime, sbi->segs_per_sec);
187 vblocks = div_u64(vblocks, sbi->segs_per_sec);
188
189 u = (vblocks * 100) >> sbi->log_blocks_per_seg;
190
191 /* Handle if the system time is changed by user */
192 if (mtime < sit_i->min_mtime)
193 sit_i->min_mtime = mtime;
194 if (mtime > sit_i->max_mtime)
195 sit_i->max_mtime = mtime;
196 if (sit_i->max_mtime != sit_i->min_mtime)
197 age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime),
198 sit_i->max_mtime - sit_i->min_mtime);
199
200 return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
201}
202
203static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno,
204 struct victim_sel_policy *p)
205{
206 if (p->alloc_mode == SSR)
207 return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
208
209 /* alloc_mode == LFS */
210 if (p->gc_mode == GC_GREEDY)
211 return get_valid_blocks(sbi, segno, sbi->segs_per_sec);
212 else
213 return get_cb_cost(sbi, segno);
214}
215
216/*
217 * This function is called from two pathes.
218 * One is garbage collection and the other is SSR segment selection.
219 * When it is called during GC, it just gets a victim segment
220 * and it does not remove it from dirty seglist.
221 * When it is called from SSR segment selection, it finds a segment
222 * which has minimum valid blocks and removes it from dirty seglist.
223 */
224static int get_victim_by_default(struct f2fs_sb_info *sbi,
225 unsigned int *result, int gc_type, int type, char alloc_mode)
226{
227 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
228 struct victim_sel_policy p;
229 unsigned int segno;
230 int nsearched = 0;
231
232 p.alloc_mode = alloc_mode;
233 select_policy(sbi, gc_type, type, &p);
234
235 p.min_segno = NULL_SEGNO;
236 p.min_cost = get_max_cost(sbi, &p);
237
238 mutex_lock(&dirty_i->seglist_lock);
239
240 if (p.alloc_mode == LFS && gc_type == FG_GC) {
241 p.min_segno = check_bg_victims(sbi);
242 if (p.min_segno != NULL_SEGNO)
243 goto got_it;
244 }
245
246 while (1) {
247 unsigned long cost;
248
249 segno = find_next_bit(p.dirty_segmap,
250 TOTAL_SEGS(sbi), p.offset);
251 if (segno >= TOTAL_SEGS(sbi)) {
252 if (sbi->last_victim[p.gc_mode]) {
253 sbi->last_victim[p.gc_mode] = 0;
254 p.offset = 0;
255 continue;
256 }
257 break;
258 }
259 p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit;
260
261 if (test_bit(segno, dirty_i->victim_segmap[FG_GC]))
262 continue;
263 if (gc_type == BG_GC &&
264 test_bit(segno, dirty_i->victim_segmap[BG_GC]))
265 continue;
266 if (IS_CURSEC(sbi, GET_SECNO(sbi, segno)))
267 continue;
268
269 cost = get_gc_cost(sbi, segno, &p);
270
271 if (p.min_cost > cost) {
272 p.min_segno = segno;
273 p.min_cost = cost;
274 }
275
276 if (cost == get_max_cost(sbi, &p))
277 continue;
278
279 if (nsearched++ >= MAX_VICTIM_SEARCH) {
280 sbi->last_victim[p.gc_mode] = segno;
281 break;
282 }
283 }
284got_it:
285 if (p.min_segno != NULL_SEGNO) {
286 *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
287 if (p.alloc_mode == LFS) {
288 int i;
289 for (i = 0; i < p.ofs_unit; i++)
290 set_bit(*result + i,
291 dirty_i->victim_segmap[gc_type]);
292 }
293 }
294 mutex_unlock(&dirty_i->seglist_lock);
295
296 return (p.min_segno == NULL_SEGNO) ? 0 : 1;
297}
298
299static const struct victim_selection default_v_ops = {
300 .get_victim = get_victim_by_default,
301};
302
303static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist)
304{
305 struct list_head *this;
306 struct inode_entry *ie;
307
308 list_for_each(this, ilist) {
309 ie = list_entry(this, struct inode_entry, list);
310 if (ie->inode->i_ino == ino)
311 return ie->inode;
312 }
313 return NULL;
314}
315
316static void add_gc_inode(struct inode *inode, struct list_head *ilist)
317{
318 struct list_head *this;
319 struct inode_entry *new_ie, *ie;
320
321 list_for_each(this, ilist) {
322 ie = list_entry(this, struct inode_entry, list);
323 if (ie->inode == inode) {
324 iput(inode);
325 return;
326 }
327 }
328repeat:
329 new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS);
330 if (!new_ie) {
331 cond_resched();
332 goto repeat;
333 }
334 new_ie->inode = inode;
335 list_add_tail(&new_ie->list, ilist);
336}
337
338static void put_gc_inode(struct list_head *ilist)
339{
340 struct inode_entry *ie, *next_ie;
341 list_for_each_entry_safe(ie, next_ie, ilist, list) {
342 iput(ie->inode);
343 list_del(&ie->list);
344 kmem_cache_free(winode_slab, ie);
345 }
346}
347
348static int check_valid_map(struct f2fs_sb_info *sbi,
349 unsigned int segno, int offset)
350{
351 struct sit_info *sit_i = SIT_I(sbi);
352 struct seg_entry *sentry;
353 int ret;
354
355 mutex_lock(&sit_i->sentry_lock);
356 sentry = get_seg_entry(sbi, segno);
357 ret = f2fs_test_bit(offset, sentry->cur_valid_map);
358 mutex_unlock(&sit_i->sentry_lock);
359 return ret ? GC_OK : GC_NEXT;
360}
361
362/*
363 * This function compares node address got in summary with that in NAT.
364 * On validity, copy that node with cold status, otherwise (invalid node)
365 * ignore that.
366 */
367static int gc_node_segment(struct f2fs_sb_info *sbi,
368 struct f2fs_summary *sum, unsigned int segno, int gc_type)
369{
370 bool initial = true;
371 struct f2fs_summary *entry;
372 int off;
373
374next_step:
375 entry = sum;
376 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
377 nid_t nid = le32_to_cpu(entry->nid);
378 struct page *node_page;
379 int err;
380
381 /*
382 * It makes sure that free segments are able to write
383 * all the dirty node pages before CP after this CP.
384 * So let's check the space of dirty node pages.
385 */
386 if (should_do_checkpoint(sbi)) {
387 mutex_lock(&sbi->cp_mutex);
388 block_operations(sbi);
389 return GC_BLOCKED;
390 }
391
392 err = check_valid_map(sbi, segno, off);
393 if (err == GC_ERROR)
394 return err;
395 else if (err == GC_NEXT)
396 continue;
397
398 if (initial) {
399 ra_node_page(sbi, nid);
400 continue;
401 }
402 node_page = get_node_page(sbi, nid);
403 if (IS_ERR(node_page))
404 continue;
405
406 /* set page dirty and write it */
407 if (!PageWriteback(node_page))
408 set_page_dirty(node_page);
409 f2fs_put_page(node_page, 1);
410 stat_inc_node_blk_count(sbi, 1);
411 }
412 if (initial) {
413 initial = false;
414 goto next_step;
415 }
416
417 if (gc_type == FG_GC) {
418 struct writeback_control wbc = {
419 .sync_mode = WB_SYNC_ALL,
420 .nr_to_write = LONG_MAX,
421 .for_reclaim = 0,
422 };
423 sync_node_pages(sbi, 0, &wbc);
424 }
425 return GC_DONE;
426}
427
428/*
429 * Calculate start block index that this node page contains
430 */
431block_t start_bidx_of_node(unsigned int node_ofs)
432{
433 block_t start_bidx;
434 unsigned int bidx, indirect_blks;
435 int dec;
436
437 indirect_blks = 2 * NIDS_PER_BLOCK + 4;
438
439 start_bidx = 1;
440 if (node_ofs == 0) {
441 start_bidx = 0;
442 } else if (node_ofs <= 2) {
443 bidx = node_ofs - 1;
444 } else if (node_ofs <= indirect_blks) {
445 dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1);
446 bidx = node_ofs - 2 - dec;
447 } else {
448 dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
449 bidx = node_ofs - 5 - dec;
450 }
451
452 if (start_bidx)
453 start_bidx = bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE;
454 return start_bidx;
455}
456
457static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
458 struct node_info *dni, block_t blkaddr, unsigned int *nofs)
459{
460 struct page *node_page;
461 nid_t nid;
462 unsigned int ofs_in_node;
463 block_t source_blkaddr;
464
465 nid = le32_to_cpu(sum->nid);
466 ofs_in_node = le16_to_cpu(sum->ofs_in_node);
467
468 node_page = get_node_page(sbi, nid);
469 if (IS_ERR(node_page))
470 return GC_NEXT;
471
472 get_node_info(sbi, nid, dni);
473
474 if (sum->version != dni->version) {
475 f2fs_put_page(node_page, 1);
476 return GC_NEXT;
477 }
478
479 *nofs = ofs_of_node(node_page);
480 source_blkaddr = datablock_addr(node_page, ofs_in_node);
481 f2fs_put_page(node_page, 1);
482
483 if (source_blkaddr != blkaddr)
484 return GC_NEXT;
485 return GC_OK;
486}
487
488static void move_data_page(struct inode *inode, struct page *page, int gc_type)
489{
490 if (page->mapping != inode->i_mapping)
491 goto out;
492
493 if (inode != page->mapping->host)
494 goto out;
495
496 if (PageWriteback(page))
497 goto out;
498
499 if (gc_type == BG_GC) {
500 set_page_dirty(page);
501 set_cold_data(page);
502 } else {
503 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
504 mutex_lock_op(sbi, DATA_WRITE);
505 if (clear_page_dirty_for_io(page) &&
506 S_ISDIR(inode->i_mode)) {
507 dec_page_count(sbi, F2FS_DIRTY_DENTS);
508 inode_dec_dirty_dents(inode);
509 }
510 set_cold_data(page);
511 do_write_data_page(page);
512 mutex_unlock_op(sbi, DATA_WRITE);
513 clear_cold_data(page);
514 }
515out:
516 f2fs_put_page(page, 1);
517}
518
519/*
520 * This function tries to get parent node of victim data block, and identifies
521 * data block validity. If the block is valid, copy that with cold status and
522 * modify parent node.
523 * If the parent node is not valid or the data block address is different,
524 * the victim data block is ignored.
525 */
526static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
527 struct list_head *ilist, unsigned int segno, int gc_type)
528{
529 struct super_block *sb = sbi->sb;
530 struct f2fs_summary *entry;
531 block_t start_addr;
532 int err, off;
533 int phase = 0;
534
535 start_addr = START_BLOCK(sbi, segno);
536
537next_step:
538 entry = sum;
539 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
540 struct page *data_page;
541 struct inode *inode;
542 struct node_info dni; /* dnode info for the data */
543 unsigned int ofs_in_node, nofs;
544 block_t start_bidx;
545
546 /*
547 * It makes sure that free segments are able to write
548 * all the dirty node pages before CP after this CP.
549 * So let's check the space of dirty node pages.
550 */
551 if (should_do_checkpoint(sbi)) {
552 mutex_lock(&sbi->cp_mutex);
553 block_operations(sbi);
554 err = GC_BLOCKED;
555 goto stop;
556 }
557
558 err = check_valid_map(sbi, segno, off);
559 if (err == GC_ERROR)
560 goto stop;
561 else if (err == GC_NEXT)
562 continue;
563
564 if (phase == 0) {
565 ra_node_page(sbi, le32_to_cpu(entry->nid));
566 continue;
567 }
568
569 /* Get an inode by ino with checking validity */
570 err = check_dnode(sbi, entry, &dni, start_addr + off, &nofs);
571 if (err == GC_ERROR)
572 goto stop;
573 else if (err == GC_NEXT)
574 continue;
575
576 if (phase == 1) {
577 ra_node_page(sbi, dni.ino);
578 continue;
579 }
580
581 start_bidx = start_bidx_of_node(nofs);
582 ofs_in_node = le16_to_cpu(entry->ofs_in_node);
583
584 if (phase == 2) {
585 inode = f2fs_iget_nowait(sb, dni.ino);
586 if (IS_ERR(inode))
587 continue;
588
589 data_page = find_data_page(inode,
590 start_bidx + ofs_in_node);
591 if (IS_ERR(data_page))
592 goto next_iput;
593
594 f2fs_put_page(data_page, 0);
595 add_gc_inode(inode, ilist);
596 } else {
597 inode = find_gc_inode(dni.ino, ilist);
598 if (inode) {
599 data_page = get_lock_data_page(inode,
600 start_bidx + ofs_in_node);
601 if (IS_ERR(data_page))
602 continue;
603 move_data_page(inode, data_page, gc_type);
604 stat_inc_data_blk_count(sbi, 1);
605 }
606 }
607 continue;
608next_iput:
609 iput(inode);
610 }
611 if (++phase < 4)
612 goto next_step;
613 err = GC_DONE;
614stop:
615 if (gc_type == FG_GC)
616 f2fs_submit_bio(sbi, DATA, true);
617 return err;
618}
619
620static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
621 int gc_type, int type)
622{
623 struct sit_info *sit_i = SIT_I(sbi);
624 int ret;
625 mutex_lock(&sit_i->sentry_lock);
626 ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, type, LFS);
627 mutex_unlock(&sit_i->sentry_lock);
628 return ret;
629}
630
631static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
632 struct list_head *ilist, int gc_type)
633{
634 struct page *sum_page;
635 struct f2fs_summary_block *sum;
636 int ret = GC_DONE;
637
638 /* read segment summary of victim */
639 sum_page = get_sum_page(sbi, segno);
640 if (IS_ERR(sum_page))
641 return GC_ERROR;
642
643 /*
644 * CP needs to lock sum_page. In this time, we don't need
645 * to lock this page, because this summary page is not gone anywhere.
646 * Also, this page is not gonna be updated before GC is done.
647 */
648 unlock_page(sum_page);
649 sum = page_address(sum_page);
650
651 switch (GET_SUM_TYPE((&sum->footer))) {
652 case SUM_TYPE_NODE:
653 ret = gc_node_segment(sbi, sum->entries, segno, gc_type);
654 break;
655 case SUM_TYPE_DATA:
656 ret = gc_data_segment(sbi, sum->entries, ilist, segno, gc_type);
657 break;
658 }
659 stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)));
660 stat_inc_call_count(sbi->stat_info);
661
662 f2fs_put_page(sum_page, 0);
663 return ret;
664}
665
666int f2fs_gc(struct f2fs_sb_info *sbi, int nGC)
667{
668 unsigned int segno;
669 int old_free_secs, cur_free_secs;
670 int gc_status, nfree;
671 struct list_head ilist;
672 int gc_type = BG_GC;
673
674 INIT_LIST_HEAD(&ilist);
675gc_more:
676 nfree = 0;
677 gc_status = GC_NONE;
678
679 if (has_not_enough_free_secs(sbi))
680 old_free_secs = reserved_sections(sbi);
681 else
682 old_free_secs = free_sections(sbi);
683
684 while (sbi->sb->s_flags & MS_ACTIVE) {
685 int i;
686 if (has_not_enough_free_secs(sbi))
687 gc_type = FG_GC;
688
689 cur_free_secs = free_sections(sbi) + nfree;
690
691 /* We got free space successfully. */
692 if (nGC < cur_free_secs - old_free_secs)
693 break;
694
695 if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
696 break;
697
698 for (i = 0; i < sbi->segs_per_sec; i++) {
699 /*
700 * do_garbage_collect will give us three gc_status:
701 * GC_ERROR, GC_DONE, and GC_BLOCKED.
702 * If GC is finished uncleanly, we have to return
703 * the victim to dirty segment list.
704 */
705 gc_status = do_garbage_collect(sbi, segno + i,
706 &ilist, gc_type);
707 if (gc_status != GC_DONE)
708 goto stop;
709 nfree++;
710 }
711 }
712stop:
713 if (has_not_enough_free_secs(sbi) || gc_status == GC_BLOCKED) {
714 write_checkpoint(sbi, (gc_status == GC_BLOCKED), false);
715 if (nfree)
716 goto gc_more;
717 }
718 mutex_unlock(&sbi->gc_mutex);
719
720 put_gc_inode(&ilist);
721 BUG_ON(!list_empty(&ilist));
722 return gc_status;
723}
724
725void build_gc_manager(struct f2fs_sb_info *sbi)
726{
727 DIRTY_I(sbi)->v_ops = &default_v_ops;
728}
729
730int create_gc_caches(void)
731{
732 winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes",
733 sizeof(struct inode_entry), NULL);
734 if (!winode_slab)
735 return -ENOMEM;
736 return 0;
737}
738
739void destroy_gc_caches(void)
740{
741 kmem_cache_destroy(winode_slab);
742}
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
new file mode 100644
index 000000000000..b026d9354ccd
--- /dev/null
+++ b/fs/f2fs/gc.h
@@ -0,0 +1,117 @@
1/*
2 * fs/f2fs/gc.h
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#define GC_THREAD_NAME "f2fs_gc_task"
12#define GC_THREAD_MIN_WB_PAGES 1 /*
13 * a threshold to determine
14 * whether IO subsystem is idle
15 * or not
16 */
17#define GC_THREAD_MIN_SLEEP_TIME 10000 /* milliseconds */
18#define GC_THREAD_MAX_SLEEP_TIME 30000
19#define GC_THREAD_NOGC_SLEEP_TIME 10000
20#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */
21#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
22
23/* Search max. number of dirty segments to select a victim segment */
24#define MAX_VICTIM_SEARCH 20
25
26enum {
27 GC_NONE = 0,
28 GC_ERROR,
29 GC_OK,
30 GC_NEXT,
31 GC_BLOCKED,
32 GC_DONE,
33};
34
35struct f2fs_gc_kthread {
36 struct task_struct *f2fs_gc_task;
37 wait_queue_head_t gc_wait_queue_head;
38};
39
40struct inode_entry {
41 struct list_head list;
42 struct inode *inode;
43};
44
45/*
46 * inline functions
47 */
48static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)
49{
50 if (free_segments(sbi) < overprovision_segments(sbi))
51 return 0;
52 else
53 return (free_segments(sbi) - overprovision_segments(sbi))
54 << sbi->log_blocks_per_seg;
55}
56
57static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi)
58{
59 return (long)(sbi->user_block_count * LIMIT_INVALID_BLOCK) / 100;
60}
61
62static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi)
63{
64 block_t reclaimable_user_blocks = sbi->user_block_count -
65 written_block_count(sbi);
66 return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100;
67}
68
69static inline long increase_sleep_time(long wait)
70{
71 wait += GC_THREAD_MIN_SLEEP_TIME;
72 if (wait > GC_THREAD_MAX_SLEEP_TIME)
73 wait = GC_THREAD_MAX_SLEEP_TIME;
74 return wait;
75}
76
77static inline long decrease_sleep_time(long wait)
78{
79 wait -= GC_THREAD_MIN_SLEEP_TIME;
80 if (wait <= GC_THREAD_MIN_SLEEP_TIME)
81 wait = GC_THREAD_MIN_SLEEP_TIME;
82 return wait;
83}
84
85static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
86{
87 block_t invalid_user_blocks = sbi->user_block_count -
88 written_block_count(sbi);
89 /*
90 * Background GC is triggered with the following condition.
91 * 1. There are a number of invalid blocks.
92 * 2. There is not enough free space.
93 */
94 if (invalid_user_blocks > limit_invalid_user_blocks(sbi) &&
95 free_user_blocks(sbi) < limit_free_user_blocks(sbi))
96 return true;
97 return false;
98}
99
100static inline int is_idle(struct f2fs_sb_info *sbi)
101{
102 struct block_device *bdev = sbi->sb->s_bdev;
103 struct request_queue *q = bdev_get_queue(bdev);
104 struct request_list *rl = &q->root_rl;
105 return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]);
106}
107
108static inline bool should_do_checkpoint(struct f2fs_sb_info *sbi)
109{
110 unsigned int pages_per_sec = sbi->segs_per_sec *
111 (1 << sbi->log_blocks_per_seg);
112 int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1)
113 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
114 int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1)
115 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
116 return free_sections(sbi) <= (node_secs + 2 * dent_secs + 2);
117}
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
new file mode 100644
index 000000000000..a60f04200f8b
--- /dev/null
+++ b/fs/f2fs/hash.c
@@ -0,0 +1,97 @@
1/*
2 * fs/f2fs/hash.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * Portions of this code from linux/fs/ext3/hash.c
8 *
9 * Copyright (C) 2002 by Theodore Ts'o
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 */
15#include <linux/types.h>
16#include <linux/fs.h>
17#include <linux/f2fs_fs.h>
18#include <linux/cryptohash.h>
19#include <linux/pagemap.h>
20
21#include "f2fs.h"
22
23/*
24 * Hashing code copied from ext3
25 */
26#define DELTA 0x9E3779B9
27
28static void TEA_transform(unsigned int buf[4], unsigned int const in[])
29{
30 __u32 sum = 0;
31 __u32 b0 = buf[0], b1 = buf[1];
32 __u32 a = in[0], b = in[1], c = in[2], d = in[3];
33 int n = 16;
34
35 do {
36 sum += DELTA;
37 b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
38 b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
39 } while (--n);
40
41 buf[0] += b0;
42 buf[1] += b1;
43}
44
45static void str2hashbuf(const char *msg, int len, unsigned int *buf, int num)
46{
47 unsigned pad, val;
48 int i;
49
50 pad = (__u32)len | ((__u32)len << 8);
51 pad |= pad << 16;
52
53 val = pad;
54 if (len > num * 4)
55 len = num * 4;
56 for (i = 0; i < len; i++) {
57 if ((i % 4) == 0)
58 val = pad;
59 val = msg[i] + (val << 8);
60 if ((i % 4) == 3) {
61 *buf++ = val;
62 val = pad;
63 num--;
64 }
65 }
66 if (--num >= 0)
67 *buf++ = val;
68 while (--num >= 0)
69 *buf++ = pad;
70}
71
72f2fs_hash_t f2fs_dentry_hash(const char *name, int len)
73{
74 __u32 hash, minor_hash;
75 f2fs_hash_t f2fs_hash;
76 const char *p;
77 __u32 in[8], buf[4];
78
79 /* Initialize the default seed for the hash checksum functions */
80 buf[0] = 0x67452301;
81 buf[1] = 0xefcdab89;
82 buf[2] = 0x98badcfe;
83 buf[3] = 0x10325476;
84
85 p = name;
86 while (len > 0) {
87 str2hashbuf(p, len, in, 4);
88 TEA_transform(buf, in);
89 len -= 16;
90 p += 16;
91 }
92 hash = buf[0];
93 minor_hash = buf[1];
94
95 f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT);
96 return f2fs_hash;
97}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
new file mode 100644
index 000000000000..df5fb381ebf1
--- /dev/null
+++ b/fs/f2fs/inode.c
@@ -0,0 +1,268 @@
1/*
2 * fs/f2fs/inode.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/buffer_head.h>
14#include <linux/writeback.h>
15
16#include "f2fs.h"
17#include "node.h"
18
19struct f2fs_iget_args {
20 u64 ino;
21 int on_free;
22};
23
24void f2fs_set_inode_flags(struct inode *inode)
25{
26 unsigned int flags = F2FS_I(inode)->i_flags;
27
28 inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE |
29 S_NOATIME | S_DIRSYNC);
30
31 if (flags & FS_SYNC_FL)
32 inode->i_flags |= S_SYNC;
33 if (flags & FS_APPEND_FL)
34 inode->i_flags |= S_APPEND;
35 if (flags & FS_IMMUTABLE_FL)
36 inode->i_flags |= S_IMMUTABLE;
37 if (flags & FS_NOATIME_FL)
38 inode->i_flags |= S_NOATIME;
39 if (flags & FS_DIRSYNC_FL)
40 inode->i_flags |= S_DIRSYNC;
41}
42
43static int f2fs_iget_test(struct inode *inode, void *data)
44{
45 struct f2fs_iget_args *args = data;
46
47 if (inode->i_ino != args->ino)
48 return 0;
49 if (inode->i_state & (I_FREEING | I_WILL_FREE)) {
50 args->on_free = 1;
51 return 0;
52 }
53 return 1;
54}
55
56struct inode *f2fs_iget_nowait(struct super_block *sb, unsigned long ino)
57{
58 struct f2fs_iget_args args = {
59 .ino = ino,
60 .on_free = 0
61 };
62 struct inode *inode = ilookup5(sb, ino, f2fs_iget_test, &args);
63
64 if (inode)
65 return inode;
66 if (!args.on_free)
67 return f2fs_iget(sb, ino);
68 return ERR_PTR(-ENOENT);
69}
70
71static int do_read_inode(struct inode *inode)
72{
73 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
74 struct f2fs_inode_info *fi = F2FS_I(inode);
75 struct page *node_page;
76 struct f2fs_node *rn;
77 struct f2fs_inode *ri;
78
79 /* Check if ino is within scope */
80 check_nid_range(sbi, inode->i_ino);
81
82 node_page = get_node_page(sbi, inode->i_ino);
83 if (IS_ERR(node_page))
84 return PTR_ERR(node_page);
85
86 rn = page_address(node_page);
87 ri = &(rn->i);
88
89 inode->i_mode = le16_to_cpu(ri->i_mode);
90 i_uid_write(inode, le32_to_cpu(ri->i_uid));
91 i_gid_write(inode, le32_to_cpu(ri->i_gid));
92 set_nlink(inode, le32_to_cpu(ri->i_links));
93 inode->i_size = le64_to_cpu(ri->i_size);
94 inode->i_blocks = le64_to_cpu(ri->i_blocks);
95
96 inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime);
97 inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime);
98 inode->i_mtime.tv_sec = le64_to_cpu(ri->i_mtime);
99 inode->i_atime.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
100 inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
101 inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
102 inode->i_generation = le32_to_cpu(ri->i_generation);
103
104 fi->i_current_depth = le32_to_cpu(ri->i_current_depth);
105 fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
106 fi->i_flags = le32_to_cpu(ri->i_flags);
107 fi->flags = 0;
108 fi->data_version = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver) - 1;
109 fi->i_advise = ri->i_advise;
110 fi->i_pino = le32_to_cpu(ri->i_pino);
111 get_extent_info(&fi->ext, ri->i_ext);
112 f2fs_put_page(node_page, 1);
113 return 0;
114}
115
116struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
117{
118 struct f2fs_sb_info *sbi = F2FS_SB(sb);
119 struct inode *inode;
120 int ret;
121
122 inode = iget_locked(sb, ino);
123 if (!inode)
124 return ERR_PTR(-ENOMEM);
125 if (!(inode->i_state & I_NEW))
126 return inode;
127 if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi))
128 goto make_now;
129
130 ret = do_read_inode(inode);
131 if (ret)
132 goto bad_inode;
133
134 if (!sbi->por_doing && inode->i_nlink == 0) {
135 ret = -ENOENT;
136 goto bad_inode;
137 }
138
139make_now:
140 if (ino == F2FS_NODE_INO(sbi)) {
141 inode->i_mapping->a_ops = &f2fs_node_aops;
142 mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
143 } else if (ino == F2FS_META_INO(sbi)) {
144 inode->i_mapping->a_ops = &f2fs_meta_aops;
145 mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
146 } else if (S_ISREG(inode->i_mode)) {
147 inode->i_op = &f2fs_file_inode_operations;
148 inode->i_fop = &f2fs_file_operations;
149 inode->i_mapping->a_ops = &f2fs_dblock_aops;
150 } else if (S_ISDIR(inode->i_mode)) {
151 inode->i_op = &f2fs_dir_inode_operations;
152 inode->i_fop = &f2fs_dir_operations;
153 inode->i_mapping->a_ops = &f2fs_dblock_aops;
154 mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER_MOVABLE |
155 __GFP_ZERO);
156 } else if (S_ISLNK(inode->i_mode)) {
157 inode->i_op = &f2fs_symlink_inode_operations;
158 inode->i_mapping->a_ops = &f2fs_dblock_aops;
159 } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
160 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
161 inode->i_op = &f2fs_special_inode_operations;
162 init_special_inode(inode, inode->i_mode, inode->i_rdev);
163 } else {
164 ret = -EIO;
165 goto bad_inode;
166 }
167 unlock_new_inode(inode);
168
169 return inode;
170
171bad_inode:
172 iget_failed(inode);
173 return ERR_PTR(ret);
174}
175
176void update_inode(struct inode *inode, struct page *node_page)
177{
178 struct f2fs_node *rn;
179 struct f2fs_inode *ri;
180
181 wait_on_page_writeback(node_page);
182
183 rn = page_address(node_page);
184 ri = &(rn->i);
185
186 ri->i_mode = cpu_to_le16(inode->i_mode);
187 ri->i_advise = F2FS_I(inode)->i_advise;
188 ri->i_uid = cpu_to_le32(i_uid_read(inode));
189 ri->i_gid = cpu_to_le32(i_gid_read(inode));
190 ri->i_links = cpu_to_le32(inode->i_nlink);
191 ri->i_size = cpu_to_le64(i_size_read(inode));
192 ri->i_blocks = cpu_to_le64(inode->i_blocks);
193 set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext);
194
195 ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
196 ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
197 ri->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
198 ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
199 ri->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
200 ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
201 ri->i_current_depth = cpu_to_le32(F2FS_I(inode)->i_current_depth);
202 ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid);
203 ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
204 ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
205 ri->i_generation = cpu_to_le32(inode->i_generation);
206 set_page_dirty(node_page);
207}
208
209int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
210{
211 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
212 struct page *node_page;
213 bool need_lock = false;
214
215 if (inode->i_ino == F2FS_NODE_INO(sbi) ||
216 inode->i_ino == F2FS_META_INO(sbi))
217 return 0;
218
219 node_page = get_node_page(sbi, inode->i_ino);
220 if (IS_ERR(node_page))
221 return PTR_ERR(node_page);
222
223 if (!PageDirty(node_page)) {
224 need_lock = true;
225 f2fs_put_page(node_page, 1);
226 mutex_lock(&sbi->write_inode);
227 node_page = get_node_page(sbi, inode->i_ino);
228 if (IS_ERR(node_page)) {
229 mutex_unlock(&sbi->write_inode);
230 return PTR_ERR(node_page);
231 }
232 }
233 update_inode(inode, node_page);
234 f2fs_put_page(node_page, 1);
235 if (need_lock)
236 mutex_unlock(&sbi->write_inode);
237 return 0;
238}
239
240/*
241 * Called at the last iput() if i_nlink is zero
242 */
243void f2fs_evict_inode(struct inode *inode)
244{
245 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
246
247 truncate_inode_pages(&inode->i_data, 0);
248
249 if (inode->i_ino == F2FS_NODE_INO(sbi) ||
250 inode->i_ino == F2FS_META_INO(sbi))
251 goto no_delete;
252
253 BUG_ON(atomic_read(&F2FS_I(inode)->dirty_dents));
254 remove_dirty_dir_inode(inode);
255
256 if (inode->i_nlink || is_bad_inode(inode))
257 goto no_delete;
258
259 set_inode_flag(F2FS_I(inode), FI_NO_ALLOC);
260 i_size_write(inode, 0);
261
262 if (F2FS_HAS_BLOCKS(inode))
263 f2fs_truncate(inode);
264
265 remove_inode_page(inode);
266no_delete:
267 clear_inode(inode);
268}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
new file mode 100644
index 000000000000..89b7675dc377
--- /dev/null
+++ b/fs/f2fs/namei.c
@@ -0,0 +1,503 @@
1/*
2 * fs/f2fs/namei.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/pagemap.h>
14#include <linux/sched.h>
15#include <linux/ctype.h>
16
17#include "f2fs.h"
18#include "xattr.h"
19#include "acl.h"
20
21static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
22{
23 struct super_block *sb = dir->i_sb;
24 struct f2fs_sb_info *sbi = F2FS_SB(sb);
25 nid_t ino;
26 struct inode *inode;
27 bool nid_free = false;
28 int err;
29
30 inode = new_inode(sb);
31 if (!inode)
32 return ERR_PTR(-ENOMEM);
33
34 mutex_lock_op(sbi, NODE_NEW);
35 if (!alloc_nid(sbi, &ino)) {
36 mutex_unlock_op(sbi, NODE_NEW);
37 err = -ENOSPC;
38 goto fail;
39 }
40 mutex_unlock_op(sbi, NODE_NEW);
41
42 inode->i_uid = current_fsuid();
43
44 if (dir->i_mode & S_ISGID) {
45 inode->i_gid = dir->i_gid;
46 if (S_ISDIR(mode))
47 mode |= S_ISGID;
48 } else {
49 inode->i_gid = current_fsgid();
50 }
51
52 inode->i_ino = ino;
53 inode->i_mode = mode;
54 inode->i_blocks = 0;
55 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
56 inode->i_generation = sbi->s_next_generation++;
57
58 err = insert_inode_locked(inode);
59 if (err) {
60 err = -EINVAL;
61 nid_free = true;
62 goto out;
63 }
64
65 mark_inode_dirty(inode);
66 return inode;
67
68out:
69 clear_nlink(inode);
70 unlock_new_inode(inode);
71fail:
72 iput(inode);
73 if (nid_free)
74 alloc_nid_failed(sbi, ino);
75 return ERR_PTR(err);
76}
77
78static int is_multimedia_file(const unsigned char *s, const char *sub)
79{
80 int slen = strlen(s);
81 int sublen = strlen(sub);
82 int ret;
83
84 if (sublen > slen)
85 return 1;
86
87 ret = memcmp(s + slen - sublen, sub, sublen);
88 if (ret) { /* compare upper case */
89 int i;
90 char upper_sub[8];
91 for (i = 0; i < sublen && i < sizeof(upper_sub); i++)
92 upper_sub[i] = toupper(sub[i]);
93 return memcmp(s + slen - sublen, upper_sub, sublen);
94 }
95
96 return ret;
97}
98
99/*
100 * Set multimedia files as cold files for hot/cold data separation
101 */
102static inline void set_cold_file(struct f2fs_sb_info *sbi, struct inode *inode,
103 const unsigned char *name)
104{
105 int i;
106 __u8 (*extlist)[8] = sbi->raw_super->extension_list;
107
108 int count = le32_to_cpu(sbi->raw_super->extension_count);
109 for (i = 0; i < count; i++) {
110 if (!is_multimedia_file(name, extlist[i])) {
111 F2FS_I(inode)->i_advise |= FADVISE_COLD_BIT;
112 break;
113 }
114 }
115}
116
117static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
118 bool excl)
119{
120 struct super_block *sb = dir->i_sb;
121 struct f2fs_sb_info *sbi = F2FS_SB(sb);
122 struct inode *inode;
123 nid_t ino = 0;
124 int err;
125
126 inode = f2fs_new_inode(dir, mode);
127 if (IS_ERR(inode))
128 return PTR_ERR(inode);
129
130 if (!test_opt(sbi, DISABLE_EXT_IDENTIFY))
131 set_cold_file(sbi, inode, dentry->d_name.name);
132
133 inode->i_op = &f2fs_file_inode_operations;
134 inode->i_fop = &f2fs_file_operations;
135 inode->i_mapping->a_ops = &f2fs_dblock_aops;
136 ino = inode->i_ino;
137
138 err = f2fs_add_link(dentry, inode);
139 if (err)
140 goto out;
141
142 alloc_nid_done(sbi, ino);
143
144 if (!sbi->por_doing)
145 d_instantiate(dentry, inode);
146 unlock_new_inode(inode);
147
148 f2fs_balance_fs(sbi);
149 return 0;
150out:
151 clear_nlink(inode);
152 unlock_new_inode(inode);
153 iput(inode);
154 alloc_nid_failed(sbi, ino);
155 return err;
156}
157
158static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
159 struct dentry *dentry)
160{
161 struct inode *inode = old_dentry->d_inode;
162 struct super_block *sb = dir->i_sb;
163 struct f2fs_sb_info *sbi = F2FS_SB(sb);
164 int err;
165
166 inode->i_ctime = CURRENT_TIME;
167 atomic_inc(&inode->i_count);
168
169 set_inode_flag(F2FS_I(inode), FI_INC_LINK);
170 err = f2fs_add_link(dentry, inode);
171 if (err)
172 goto out;
173
174 d_instantiate(dentry, inode);
175
176 f2fs_balance_fs(sbi);
177 return 0;
178out:
179 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
180 iput(inode);
181 return err;
182}
183
184struct dentry *f2fs_get_parent(struct dentry *child)
185{
186 struct qstr dotdot = QSTR_INIT("..", 2);
187 unsigned long ino = f2fs_inode_by_name(child->d_inode, &dotdot);
188 if (!ino)
189 return ERR_PTR(-ENOENT);
190 return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino));
191}
192
193static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
194 unsigned int flags)
195{
196 struct inode *inode = NULL;
197 struct f2fs_dir_entry *de;
198 struct page *page;
199
200 if (dentry->d_name.len > F2FS_MAX_NAME_LEN)
201 return ERR_PTR(-ENAMETOOLONG);
202
203 de = f2fs_find_entry(dir, &dentry->d_name, &page);
204 if (de) {
205 nid_t ino = le32_to_cpu(de->ino);
206 kunmap(page);
207 f2fs_put_page(page, 0);
208
209 inode = f2fs_iget(dir->i_sb, ino);
210 if (IS_ERR(inode))
211 return ERR_CAST(inode);
212 }
213
214 return d_splice_alias(inode, dentry);
215}
216
217static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
218{
219 struct super_block *sb = dir->i_sb;
220 struct f2fs_sb_info *sbi = F2FS_SB(sb);
221 struct inode *inode = dentry->d_inode;
222 struct f2fs_dir_entry *de;
223 struct page *page;
224 int err = -ENOENT;
225
226 de = f2fs_find_entry(dir, &dentry->d_name, &page);
227 if (!de)
228 goto fail;
229
230 err = check_orphan_space(sbi);
231 if (err) {
232 kunmap(page);
233 f2fs_put_page(page, 0);
234 goto fail;
235 }
236
237 f2fs_delete_entry(de, page, inode);
238
239 /* In order to evict this inode, we set it dirty */
240 mark_inode_dirty(inode);
241 f2fs_balance_fs(sbi);
242fail:
243 return err;
244}
245
246static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
247 const char *symname)
248{
249 struct super_block *sb = dir->i_sb;
250 struct f2fs_sb_info *sbi = F2FS_SB(sb);
251 struct inode *inode;
252 unsigned symlen = strlen(symname) + 1;
253 int err;
254
255 inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
256 if (IS_ERR(inode))
257 return PTR_ERR(inode);
258
259 inode->i_op = &f2fs_symlink_inode_operations;
260 inode->i_mapping->a_ops = &f2fs_dblock_aops;
261
262 err = f2fs_add_link(dentry, inode);
263 if (err)
264 goto out;
265
266 err = page_symlink(inode, symname, symlen);
267 alloc_nid_done(sbi, inode->i_ino);
268
269 d_instantiate(dentry, inode);
270 unlock_new_inode(inode);
271
272 f2fs_balance_fs(sbi);
273
274 return err;
275out:
276 clear_nlink(inode);
277 unlock_new_inode(inode);
278 iput(inode);
279 alloc_nid_failed(sbi, inode->i_ino);
280 return err;
281}
282
283static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
284{
285 struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
286 struct inode *inode;
287 int err;
288
289 inode = f2fs_new_inode(dir, S_IFDIR | mode);
290 if (IS_ERR(inode))
291 return PTR_ERR(inode);
292
293 inode->i_op = &f2fs_dir_inode_operations;
294 inode->i_fop = &f2fs_dir_operations;
295 inode->i_mapping->a_ops = &f2fs_dblock_aops;
296 mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
297
298 set_inode_flag(F2FS_I(inode), FI_INC_LINK);
299 err = f2fs_add_link(dentry, inode);
300 if (err)
301 goto out_fail;
302
303 alloc_nid_done(sbi, inode->i_ino);
304
305 d_instantiate(dentry, inode);
306 unlock_new_inode(inode);
307
308 f2fs_balance_fs(sbi);
309 return 0;
310
311out_fail:
312 clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
313 clear_nlink(inode);
314 unlock_new_inode(inode);
315 iput(inode);
316 alloc_nid_failed(sbi, inode->i_ino);
317 return err;
318}
319
320static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
321{
322 struct inode *inode = dentry->d_inode;
323 if (f2fs_empty_dir(inode))
324 return f2fs_unlink(dir, dentry);
325 return -ENOTEMPTY;
326}
327
328static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
329 umode_t mode, dev_t rdev)
330{
331 struct super_block *sb = dir->i_sb;
332 struct f2fs_sb_info *sbi = F2FS_SB(sb);
333 struct inode *inode;
334 int err = 0;
335
336 if (!new_valid_dev(rdev))
337 return -EINVAL;
338
339 inode = f2fs_new_inode(dir, mode);
340 if (IS_ERR(inode))
341 return PTR_ERR(inode);
342
343 init_special_inode(inode, inode->i_mode, rdev);
344 inode->i_op = &f2fs_special_inode_operations;
345
346 err = f2fs_add_link(dentry, inode);
347 if (err)
348 goto out;
349
350 alloc_nid_done(sbi, inode->i_ino);
351 d_instantiate(dentry, inode);
352 unlock_new_inode(inode);
353
354 f2fs_balance_fs(sbi);
355
356 return 0;
357out:
358 clear_nlink(inode);
359 unlock_new_inode(inode);
360 iput(inode);
361 alloc_nid_failed(sbi, inode->i_ino);
362 return err;
363}
364
365static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
366 struct inode *new_dir, struct dentry *new_dentry)
367{
368 struct super_block *sb = old_dir->i_sb;
369 struct f2fs_sb_info *sbi = F2FS_SB(sb);
370 struct inode *old_inode = old_dentry->d_inode;
371 struct inode *new_inode = new_dentry->d_inode;
372 struct page *old_dir_page;
373 struct page *old_page;
374 struct f2fs_dir_entry *old_dir_entry = NULL;
375 struct f2fs_dir_entry *old_entry;
376 struct f2fs_dir_entry *new_entry;
377 int err = -ENOENT;
378
379 old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
380 if (!old_entry)
381 goto out;
382
383 if (S_ISDIR(old_inode->i_mode)) {
384 err = -EIO;
385 old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page);
386 if (!old_dir_entry)
387 goto out_old;
388 }
389
390 mutex_lock_op(sbi, RENAME);
391
392 if (new_inode) {
393 struct page *new_page;
394
395 err = -ENOTEMPTY;
396 if (old_dir_entry && !f2fs_empty_dir(new_inode))
397 goto out_dir;
398
399 err = -ENOENT;
400 new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name,
401 &new_page);
402 if (!new_entry)
403 goto out_dir;
404
405 f2fs_set_link(new_dir, new_entry, new_page, old_inode);
406
407 new_inode->i_ctime = CURRENT_TIME;
408 if (old_dir_entry)
409 drop_nlink(new_inode);
410 drop_nlink(new_inode);
411 if (!new_inode->i_nlink)
412 add_orphan_inode(sbi, new_inode->i_ino);
413 f2fs_write_inode(new_inode, NULL);
414 } else {
415 err = f2fs_add_link(new_dentry, old_inode);
416 if (err)
417 goto out_dir;
418
419 if (old_dir_entry) {
420 inc_nlink(new_dir);
421 f2fs_write_inode(new_dir, NULL);
422 }
423 }
424
425 old_inode->i_ctime = CURRENT_TIME;
426 set_inode_flag(F2FS_I(old_inode), FI_NEED_CP);
427 mark_inode_dirty(old_inode);
428
429 f2fs_delete_entry(old_entry, old_page, NULL);
430
431 if (old_dir_entry) {
432 if (old_dir != new_dir) {
433 f2fs_set_link(old_inode, old_dir_entry,
434 old_dir_page, new_dir);
435 } else {
436 kunmap(old_dir_page);
437 f2fs_put_page(old_dir_page, 0);
438 }
439 drop_nlink(old_dir);
440 f2fs_write_inode(old_dir, NULL);
441 }
442
443 mutex_unlock_op(sbi, RENAME);
444
445 f2fs_balance_fs(sbi);
446 return 0;
447
448out_dir:
449 if (old_dir_entry) {
450 kunmap(old_dir_page);
451 f2fs_put_page(old_dir_page, 0);
452 }
453 mutex_unlock_op(sbi, RENAME);
454out_old:
455 kunmap(old_page);
456 f2fs_put_page(old_page, 0);
457out:
458 return err;
459}
460
461const struct inode_operations f2fs_dir_inode_operations = {
462 .create = f2fs_create,
463 .lookup = f2fs_lookup,
464 .link = f2fs_link,
465 .unlink = f2fs_unlink,
466 .symlink = f2fs_symlink,
467 .mkdir = f2fs_mkdir,
468 .rmdir = f2fs_rmdir,
469 .mknod = f2fs_mknod,
470 .rename = f2fs_rename,
471 .setattr = f2fs_setattr,
472 .get_acl = f2fs_get_acl,
473#ifdef CONFIG_F2FS_FS_XATTR
474 .setxattr = generic_setxattr,
475 .getxattr = generic_getxattr,
476 .listxattr = f2fs_listxattr,
477 .removexattr = generic_removexattr,
478#endif
479};
480
481const struct inode_operations f2fs_symlink_inode_operations = {
482 .readlink = generic_readlink,
483 .follow_link = page_follow_link_light,
484 .put_link = page_put_link,
485 .setattr = f2fs_setattr,
486#ifdef CONFIG_F2FS_FS_XATTR
487 .setxattr = generic_setxattr,
488 .getxattr = generic_getxattr,
489 .listxattr = f2fs_listxattr,
490 .removexattr = generic_removexattr,
491#endif
492};
493
494const struct inode_operations f2fs_special_inode_operations = {
495 .setattr = f2fs_setattr,
496 .get_acl = f2fs_get_acl,
497#ifdef CONFIG_F2FS_FS_XATTR
498 .setxattr = generic_setxattr,
499 .getxattr = generic_getxattr,
500 .listxattr = f2fs_listxattr,
501 .removexattr = generic_removexattr,
502#endif
503};
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
new file mode 100644
index 000000000000..19870361497e
--- /dev/null
+++ b/fs/f2fs/node.c
@@ -0,0 +1,1764 @@
1/*
2 * fs/f2fs/node.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/mpage.h>
14#include <linux/backing-dev.h>
15#include <linux/blkdev.h>
16#include <linux/pagevec.h>
17#include <linux/swap.h>
18
19#include "f2fs.h"
20#include "node.h"
21#include "segment.h"
22
23static struct kmem_cache *nat_entry_slab;
24static struct kmem_cache *free_nid_slab;
25
26static void clear_node_page_dirty(struct page *page)
27{
28 struct address_space *mapping = page->mapping;
29 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
30 unsigned int long flags;
31
32 if (PageDirty(page)) {
33 spin_lock_irqsave(&mapping->tree_lock, flags);
34 radix_tree_tag_clear(&mapping->page_tree,
35 page_index(page),
36 PAGECACHE_TAG_DIRTY);
37 spin_unlock_irqrestore(&mapping->tree_lock, flags);
38
39 clear_page_dirty_for_io(page);
40 dec_page_count(sbi, F2FS_DIRTY_NODES);
41 }
42 ClearPageUptodate(page);
43}
44
45static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
46{
47 pgoff_t index = current_nat_addr(sbi, nid);
48 return get_meta_page(sbi, index);
49}
50
51static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
52{
53 struct page *src_page;
54 struct page *dst_page;
55 pgoff_t src_off;
56 pgoff_t dst_off;
57 void *src_addr;
58 void *dst_addr;
59 struct f2fs_nm_info *nm_i = NM_I(sbi);
60
61 src_off = current_nat_addr(sbi, nid);
62 dst_off = next_nat_addr(sbi, src_off);
63
64 /* get current nat block page with lock */
65 src_page = get_meta_page(sbi, src_off);
66
67 /* Dirty src_page means that it is already the new target NAT page. */
68 if (PageDirty(src_page))
69 return src_page;
70
71 dst_page = grab_meta_page(sbi, dst_off);
72
73 src_addr = page_address(src_page);
74 dst_addr = page_address(dst_page);
75 memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
76 set_page_dirty(dst_page);
77 f2fs_put_page(src_page, 1);
78
79 set_to_next_nat(nm_i, nid);
80
81 return dst_page;
82}
83
84/*
85 * Readahead NAT pages
86 */
87static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
88{
89 struct address_space *mapping = sbi->meta_inode->i_mapping;
90 struct f2fs_nm_info *nm_i = NM_I(sbi);
91 struct page *page;
92 pgoff_t index;
93 int i;
94
95 for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) {
96 if (nid >= nm_i->max_nid)
97 nid = 0;
98 index = current_nat_addr(sbi, nid);
99
100 page = grab_cache_page(mapping, index);
101 if (!page)
102 continue;
103 if (f2fs_readpage(sbi, page, index, READ)) {
104 f2fs_put_page(page, 1);
105 continue;
106 }
107 page_cache_release(page);
108 }
109}
110
111static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
112{
113 return radix_tree_lookup(&nm_i->nat_root, n);
114}
115
116static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
117 nid_t start, unsigned int nr, struct nat_entry **ep)
118{
119 return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr);
120}
121
122static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
123{
124 list_del(&e->list);
125 radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
126 nm_i->nat_cnt--;
127 kmem_cache_free(nat_entry_slab, e);
128}
129
130int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
131{
132 struct f2fs_nm_info *nm_i = NM_I(sbi);
133 struct nat_entry *e;
134 int is_cp = 1;
135
136 read_lock(&nm_i->nat_tree_lock);
137 e = __lookup_nat_cache(nm_i, nid);
138 if (e && !e->checkpointed)
139 is_cp = 0;
140 read_unlock(&nm_i->nat_tree_lock);
141 return is_cp;
142}
143
144static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
145{
146 struct nat_entry *new;
147
148 new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC);
149 if (!new)
150 return NULL;
151 if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
152 kmem_cache_free(nat_entry_slab, new);
153 return NULL;
154 }
155 memset(new, 0, sizeof(struct nat_entry));
156 nat_set_nid(new, nid);
157 list_add_tail(&new->list, &nm_i->nat_entries);
158 nm_i->nat_cnt++;
159 return new;
160}
161
162static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
163 struct f2fs_nat_entry *ne)
164{
165 struct nat_entry *e;
166retry:
167 write_lock(&nm_i->nat_tree_lock);
168 e = __lookup_nat_cache(nm_i, nid);
169 if (!e) {
170 e = grab_nat_entry(nm_i, nid);
171 if (!e) {
172 write_unlock(&nm_i->nat_tree_lock);
173 goto retry;
174 }
175 nat_set_blkaddr(e, le32_to_cpu(ne->block_addr));
176 nat_set_ino(e, le32_to_cpu(ne->ino));
177 nat_set_version(e, ne->version);
178 e->checkpointed = true;
179 }
180 write_unlock(&nm_i->nat_tree_lock);
181}
182
183static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
184 block_t new_blkaddr)
185{
186 struct f2fs_nm_info *nm_i = NM_I(sbi);
187 struct nat_entry *e;
188retry:
189 write_lock(&nm_i->nat_tree_lock);
190 e = __lookup_nat_cache(nm_i, ni->nid);
191 if (!e) {
192 e = grab_nat_entry(nm_i, ni->nid);
193 if (!e) {
194 write_unlock(&nm_i->nat_tree_lock);
195 goto retry;
196 }
197 e->ni = *ni;
198 e->checkpointed = true;
199 BUG_ON(ni->blk_addr == NEW_ADDR);
200 } else if (new_blkaddr == NEW_ADDR) {
201 /*
202 * when nid is reallocated,
203 * previous nat entry can be remained in nat cache.
204 * So, reinitialize it with new information.
205 */
206 e->ni = *ni;
207 BUG_ON(ni->blk_addr != NULL_ADDR);
208 }
209
210 if (new_blkaddr == NEW_ADDR)
211 e->checkpointed = false;
212
213 /* sanity check */
214 BUG_ON(nat_get_blkaddr(e) != ni->blk_addr);
215 BUG_ON(nat_get_blkaddr(e) == NULL_ADDR &&
216 new_blkaddr == NULL_ADDR);
217 BUG_ON(nat_get_blkaddr(e) == NEW_ADDR &&
218 new_blkaddr == NEW_ADDR);
219 BUG_ON(nat_get_blkaddr(e) != NEW_ADDR &&
220 nat_get_blkaddr(e) != NULL_ADDR &&
221 new_blkaddr == NEW_ADDR);
222
223 /* increament version no as node is removed */
224 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
225 unsigned char version = nat_get_version(e);
226 nat_set_version(e, inc_node_version(version));
227 }
228
229 /* change address */
230 nat_set_blkaddr(e, new_blkaddr);
231 __set_nat_cache_dirty(nm_i, e);
232 write_unlock(&nm_i->nat_tree_lock);
233}
234
235static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
236{
237 struct f2fs_nm_info *nm_i = NM_I(sbi);
238
239 if (nm_i->nat_cnt < 2 * NM_WOUT_THRESHOLD)
240 return 0;
241
242 write_lock(&nm_i->nat_tree_lock);
243 while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
244 struct nat_entry *ne;
245 ne = list_first_entry(&nm_i->nat_entries,
246 struct nat_entry, list);
247 __del_from_nat_cache(nm_i, ne);
248 nr_shrink--;
249 }
250 write_unlock(&nm_i->nat_tree_lock);
251 return nr_shrink;
252}
253
254/*
255 * This function returns always success
256 */
257void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
258{
259 struct f2fs_nm_info *nm_i = NM_I(sbi);
260 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
261 struct f2fs_summary_block *sum = curseg->sum_blk;
262 nid_t start_nid = START_NID(nid);
263 struct f2fs_nat_block *nat_blk;
264 struct page *page = NULL;
265 struct f2fs_nat_entry ne;
266 struct nat_entry *e;
267 int i;
268
269 memset(&ne, 0, sizeof(struct f2fs_nat_entry));
270 ni->nid = nid;
271
272 /* Check nat cache */
273 read_lock(&nm_i->nat_tree_lock);
274 e = __lookup_nat_cache(nm_i, nid);
275 if (e) {
276 ni->ino = nat_get_ino(e);
277 ni->blk_addr = nat_get_blkaddr(e);
278 ni->version = nat_get_version(e);
279 }
280 read_unlock(&nm_i->nat_tree_lock);
281 if (e)
282 return;
283
284 /* Check current segment summary */
285 mutex_lock(&curseg->curseg_mutex);
286 i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
287 if (i >= 0) {
288 ne = nat_in_journal(sum, i);
289 node_info_from_raw_nat(ni, &ne);
290 }
291 mutex_unlock(&curseg->curseg_mutex);
292 if (i >= 0)
293 goto cache;
294
295 /* Fill node_info from nat page */
296 page = get_current_nat_page(sbi, start_nid);
297 nat_blk = (struct f2fs_nat_block *)page_address(page);
298 ne = nat_blk->entries[nid - start_nid];
299 node_info_from_raw_nat(ni, &ne);
300 f2fs_put_page(page, 1);
301cache:
302 /* cache nat entry */
303 cache_nat_entry(NM_I(sbi), nid, &ne);
304}
305
306/*
307 * The maximum depth is four.
308 * Offset[0] will have raw inode offset.
309 */
310static int get_node_path(long block, int offset[4], unsigned int noffset[4])
311{
312 const long direct_index = ADDRS_PER_INODE;
313 const long direct_blks = ADDRS_PER_BLOCK;
314 const long dptrs_per_blk = NIDS_PER_BLOCK;
315 const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
316 const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
317 int n = 0;
318 int level = 0;
319
320 noffset[0] = 0;
321
322 if (block < direct_index) {
323 offset[n++] = block;
324 level = 0;
325 goto got;
326 }
327 block -= direct_index;
328 if (block < direct_blks) {
329 offset[n++] = NODE_DIR1_BLOCK;
330 noffset[n] = 1;
331 offset[n++] = block;
332 level = 1;
333 goto got;
334 }
335 block -= direct_blks;
336 if (block < direct_blks) {
337 offset[n++] = NODE_DIR2_BLOCK;
338 noffset[n] = 2;
339 offset[n++] = block;
340 level = 1;
341 goto got;
342 }
343 block -= direct_blks;
344 if (block < indirect_blks) {
345 offset[n++] = NODE_IND1_BLOCK;
346 noffset[n] = 3;
347 offset[n++] = block / direct_blks;
348 noffset[n] = 4 + offset[n - 1];
349 offset[n++] = block % direct_blks;
350 level = 2;
351 goto got;
352 }
353 block -= indirect_blks;
354 if (block < indirect_blks) {
355 offset[n++] = NODE_IND2_BLOCK;
356 noffset[n] = 4 + dptrs_per_blk;
357 offset[n++] = block / direct_blks;
358 noffset[n] = 5 + dptrs_per_blk + offset[n - 1];
359 offset[n++] = block % direct_blks;
360 level = 2;
361 goto got;
362 }
363 block -= indirect_blks;
364 if (block < dindirect_blks) {
365 offset[n++] = NODE_DIND_BLOCK;
366 noffset[n] = 5 + (dptrs_per_blk * 2);
367 offset[n++] = block / indirect_blks;
368 noffset[n] = 6 + (dptrs_per_blk * 2) +
369 offset[n - 1] * (dptrs_per_blk + 1);
370 offset[n++] = (block / direct_blks) % dptrs_per_blk;
371 noffset[n] = 7 + (dptrs_per_blk * 2) +
372 offset[n - 2] * (dptrs_per_blk + 1) +
373 offset[n - 1];
374 offset[n++] = block % direct_blks;
375 level = 3;
376 goto got;
377 } else {
378 BUG();
379 }
380got:
381 return level;
382}
383
384/*
385 * Caller should call f2fs_put_dnode(dn).
386 */
387int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro)
388{
389 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
390 struct page *npage[4];
391 struct page *parent;
392 int offset[4];
393 unsigned int noffset[4];
394 nid_t nids[4];
395 int level, i;
396 int err = 0;
397
398 level = get_node_path(index, offset, noffset);
399
400 nids[0] = dn->inode->i_ino;
401 npage[0] = get_node_page(sbi, nids[0]);
402 if (IS_ERR(npage[0]))
403 return PTR_ERR(npage[0]);
404
405 parent = npage[0];
406 nids[1] = get_nid(parent, offset[0], true);
407 dn->inode_page = npage[0];
408 dn->inode_page_locked = true;
409
410 /* get indirect or direct nodes */
411 for (i = 1; i <= level; i++) {
412 bool done = false;
413
414 if (!nids[i] && !ro) {
415 mutex_lock_op(sbi, NODE_NEW);
416
417 /* alloc new node */
418 if (!alloc_nid(sbi, &(nids[i]))) {
419 mutex_unlock_op(sbi, NODE_NEW);
420 err = -ENOSPC;
421 goto release_pages;
422 }
423
424 dn->nid = nids[i];
425 npage[i] = new_node_page(dn, noffset[i]);
426 if (IS_ERR(npage[i])) {
427 alloc_nid_failed(sbi, nids[i]);
428 mutex_unlock_op(sbi, NODE_NEW);
429 err = PTR_ERR(npage[i]);
430 goto release_pages;
431 }
432
433 set_nid(parent, offset[i - 1], nids[i], i == 1);
434 alloc_nid_done(sbi, nids[i]);
435 mutex_unlock_op(sbi, NODE_NEW);
436 done = true;
437 } else if (ro && i == level && level > 1) {
438 npage[i] = get_node_page_ra(parent, offset[i - 1]);
439 if (IS_ERR(npage[i])) {
440 err = PTR_ERR(npage[i]);
441 goto release_pages;
442 }
443 done = true;
444 }
445 if (i == 1) {
446 dn->inode_page_locked = false;
447 unlock_page(parent);
448 } else {
449 f2fs_put_page(parent, 1);
450 }
451
452 if (!done) {
453 npage[i] = get_node_page(sbi, nids[i]);
454 if (IS_ERR(npage[i])) {
455 err = PTR_ERR(npage[i]);
456 f2fs_put_page(npage[0], 0);
457 goto release_out;
458 }
459 }
460 if (i < level) {
461 parent = npage[i];
462 nids[i + 1] = get_nid(parent, offset[i], false);
463 }
464 }
465 dn->nid = nids[level];
466 dn->ofs_in_node = offset[level];
467 dn->node_page = npage[level];
468 dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
469 return 0;
470
471release_pages:
472 f2fs_put_page(parent, 1);
473 if (i > 1)
474 f2fs_put_page(npage[0], 0);
475release_out:
476 dn->inode_page = NULL;
477 dn->node_page = NULL;
478 return err;
479}
480
481static void truncate_node(struct dnode_of_data *dn)
482{
483 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
484 struct node_info ni;
485
486 get_node_info(sbi, dn->nid, &ni);
487 BUG_ON(ni.blk_addr == NULL_ADDR);
488
489 if (ni.blk_addr != NULL_ADDR)
490 invalidate_blocks(sbi, ni.blk_addr);
491
492 /* Deallocate node address */
493 dec_valid_node_count(sbi, dn->inode, 1);
494 set_node_addr(sbi, &ni, NULL_ADDR);
495
496 if (dn->nid == dn->inode->i_ino) {
497 remove_orphan_inode(sbi, dn->nid);
498 dec_valid_inode_count(sbi);
499 } else {
500 sync_inode_page(dn);
501 }
502
503 clear_node_page_dirty(dn->node_page);
504 F2FS_SET_SB_DIRT(sbi);
505
506 f2fs_put_page(dn->node_page, 1);
507 dn->node_page = NULL;
508}
509
510static int truncate_dnode(struct dnode_of_data *dn)
511{
512 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
513 struct page *page;
514
515 if (dn->nid == 0)
516 return 1;
517
518 /* get direct node */
519 page = get_node_page(sbi, dn->nid);
520 if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
521 return 1;
522 else if (IS_ERR(page))
523 return PTR_ERR(page);
524
525 /* Make dnode_of_data for parameter */
526 dn->node_page = page;
527 dn->ofs_in_node = 0;
528 truncate_data_blocks(dn);
529 truncate_node(dn);
530 return 1;
531}
532
533static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
534 int ofs, int depth)
535{
536 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
537 struct dnode_of_data rdn = *dn;
538 struct page *page;
539 struct f2fs_node *rn;
540 nid_t child_nid;
541 unsigned int child_nofs;
542 int freed = 0;
543 int i, ret;
544
545 if (dn->nid == 0)
546 return NIDS_PER_BLOCK + 1;
547
548 page = get_node_page(sbi, dn->nid);
549 if (IS_ERR(page))
550 return PTR_ERR(page);
551
552 rn = (struct f2fs_node *)page_address(page);
553 if (depth < 3) {
554 for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
555 child_nid = le32_to_cpu(rn->in.nid[i]);
556 if (child_nid == 0)
557 continue;
558 rdn.nid = child_nid;
559 ret = truncate_dnode(&rdn);
560 if (ret < 0)
561 goto out_err;
562 set_nid(page, i, 0, false);
563 }
564 } else {
565 child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
566 for (i = ofs; i < NIDS_PER_BLOCK; i++) {
567 child_nid = le32_to_cpu(rn->in.nid[i]);
568 if (child_nid == 0) {
569 child_nofs += NIDS_PER_BLOCK + 1;
570 continue;
571 }
572 rdn.nid = child_nid;
573 ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
574 if (ret == (NIDS_PER_BLOCK + 1)) {
575 set_nid(page, i, 0, false);
576 child_nofs += ret;
577 } else if (ret < 0 && ret != -ENOENT) {
578 goto out_err;
579 }
580 }
581 freed = child_nofs;
582 }
583
584 if (!ofs) {
585 /* remove current indirect node */
586 dn->node_page = page;
587 truncate_node(dn);
588 freed++;
589 } else {
590 f2fs_put_page(page, 1);
591 }
592 return freed;
593
594out_err:
595 f2fs_put_page(page, 1);
596 return ret;
597}
598
599static int truncate_partial_nodes(struct dnode_of_data *dn,
600 struct f2fs_inode *ri, int *offset, int depth)
601{
602 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
603 struct page *pages[2];
604 nid_t nid[3];
605 nid_t child_nid;
606 int err = 0;
607 int i;
608 int idx = depth - 2;
609
610 nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
611 if (!nid[0])
612 return 0;
613
614 /* get indirect nodes in the path */
615 for (i = 0; i < depth - 1; i++) {
616 /* refernece count'll be increased */
617 pages[i] = get_node_page(sbi, nid[i]);
618 if (IS_ERR(pages[i])) {
619 depth = i + 1;
620 err = PTR_ERR(pages[i]);
621 goto fail;
622 }
623 nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
624 }
625
626 /* free direct nodes linked to a partial indirect node */
627 for (i = offset[depth - 1]; i < NIDS_PER_BLOCK; i++) {
628 child_nid = get_nid(pages[idx], i, false);
629 if (!child_nid)
630 continue;
631 dn->nid = child_nid;
632 err = truncate_dnode(dn);
633 if (err < 0)
634 goto fail;
635 set_nid(pages[idx], i, 0, false);
636 }
637
638 if (offset[depth - 1] == 0) {
639 dn->node_page = pages[idx];
640 dn->nid = nid[idx];
641 truncate_node(dn);
642 } else {
643 f2fs_put_page(pages[idx], 1);
644 }
645 offset[idx]++;
646 offset[depth - 1] = 0;
647fail:
648 for (i = depth - 3; i >= 0; i--)
649 f2fs_put_page(pages[i], 1);
650 return err;
651}
652
653/*
654 * All the block addresses of data and nodes should be nullified.
655 */
656int truncate_inode_blocks(struct inode *inode, pgoff_t from)
657{
658 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
659 int err = 0, cont = 1;
660 int level, offset[4], noffset[4];
661 unsigned int nofs;
662 struct f2fs_node *rn;
663 struct dnode_of_data dn;
664 struct page *page;
665
666 level = get_node_path(from, offset, noffset);
667
668 page = get_node_page(sbi, inode->i_ino);
669 if (IS_ERR(page))
670 return PTR_ERR(page);
671
672 set_new_dnode(&dn, inode, page, NULL, 0);
673 unlock_page(page);
674
675 rn = page_address(page);
676 switch (level) {
677 case 0:
678 case 1:
679 nofs = noffset[1];
680 break;
681 case 2:
682 nofs = noffset[1];
683 if (!offset[level - 1])
684 goto skip_partial;
685 err = truncate_partial_nodes(&dn, &rn->i, offset, level);
686 if (err < 0 && err != -ENOENT)
687 goto fail;
688 nofs += 1 + NIDS_PER_BLOCK;
689 break;
690 case 3:
691 nofs = 5 + 2 * NIDS_PER_BLOCK;
692 if (!offset[level - 1])
693 goto skip_partial;
694 err = truncate_partial_nodes(&dn, &rn->i, offset, level);
695 if (err < 0 && err != -ENOENT)
696 goto fail;
697 break;
698 default:
699 BUG();
700 }
701
702skip_partial:
703 while (cont) {
704 dn.nid = le32_to_cpu(rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]);
705 switch (offset[0]) {
706 case NODE_DIR1_BLOCK:
707 case NODE_DIR2_BLOCK:
708 err = truncate_dnode(&dn);
709 break;
710
711 case NODE_IND1_BLOCK:
712 case NODE_IND2_BLOCK:
713 err = truncate_nodes(&dn, nofs, offset[1], 2);
714 break;
715
716 case NODE_DIND_BLOCK:
717 err = truncate_nodes(&dn, nofs, offset[1], 3);
718 cont = 0;
719 break;
720
721 default:
722 BUG();
723 }
724 if (err < 0 && err != -ENOENT)
725 goto fail;
726 if (offset[1] == 0 &&
727 rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) {
728 lock_page(page);
729 wait_on_page_writeback(page);
730 rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
731 set_page_dirty(page);
732 unlock_page(page);
733 }
734 offset[1] = 0;
735 offset[0]++;
736 nofs += err;
737 }
738fail:
739 f2fs_put_page(page, 0);
740 return err > 0 ? 0 : err;
741}
742
743int remove_inode_page(struct inode *inode)
744{
745 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
746 struct page *page;
747 nid_t ino = inode->i_ino;
748 struct dnode_of_data dn;
749
750 mutex_lock_op(sbi, NODE_TRUNC);
751 page = get_node_page(sbi, ino);
752 if (IS_ERR(page)) {
753 mutex_unlock_op(sbi, NODE_TRUNC);
754 return PTR_ERR(page);
755 }
756
757 if (F2FS_I(inode)->i_xattr_nid) {
758 nid_t nid = F2FS_I(inode)->i_xattr_nid;
759 struct page *npage = get_node_page(sbi, nid);
760
761 if (IS_ERR(npage)) {
762 mutex_unlock_op(sbi, NODE_TRUNC);
763 return PTR_ERR(npage);
764 }
765
766 F2FS_I(inode)->i_xattr_nid = 0;
767 set_new_dnode(&dn, inode, page, npage, nid);
768 dn.inode_page_locked = 1;
769 truncate_node(&dn);
770 }
771 if (inode->i_blocks == 1) {
772 /* inernally call f2fs_put_page() */
773 set_new_dnode(&dn, inode, page, page, ino);
774 truncate_node(&dn);
775 } else if (inode->i_blocks == 0) {
776 struct node_info ni;
777 get_node_info(sbi, inode->i_ino, &ni);
778
779 /* called after f2fs_new_inode() is failed */
780 BUG_ON(ni.blk_addr != NULL_ADDR);
781 f2fs_put_page(page, 1);
782 } else {
783 BUG();
784 }
785 mutex_unlock_op(sbi, NODE_TRUNC);
786 return 0;
787}
788
789int new_inode_page(struct inode *inode, struct dentry *dentry)
790{
791 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
792 struct page *page;
793 struct dnode_of_data dn;
794
795 /* allocate inode page for new inode */
796 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
797 mutex_lock_op(sbi, NODE_NEW);
798 page = new_node_page(&dn, 0);
799 init_dent_inode(dentry, page);
800 mutex_unlock_op(sbi, NODE_NEW);
801 if (IS_ERR(page))
802 return PTR_ERR(page);
803 f2fs_put_page(page, 1);
804 return 0;
805}
806
807struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs)
808{
809 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
810 struct address_space *mapping = sbi->node_inode->i_mapping;
811 struct node_info old_ni, new_ni;
812 struct page *page;
813 int err;
814
815 if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))
816 return ERR_PTR(-EPERM);
817
818 page = grab_cache_page(mapping, dn->nid);
819 if (!page)
820 return ERR_PTR(-ENOMEM);
821
822 get_node_info(sbi, dn->nid, &old_ni);
823
824 SetPageUptodate(page);
825 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
826
827 /* Reinitialize old_ni with new node page */
828 BUG_ON(old_ni.blk_addr != NULL_ADDR);
829 new_ni = old_ni;
830 new_ni.ino = dn->inode->i_ino;
831
832 if (!inc_valid_node_count(sbi, dn->inode, 1)) {
833 err = -ENOSPC;
834 goto fail;
835 }
836 set_node_addr(sbi, &new_ni, NEW_ADDR);
837
838 dn->node_page = page;
839 sync_inode_page(dn);
840 set_page_dirty(page);
841 set_cold_node(dn->inode, page);
842 if (ofs == 0)
843 inc_valid_inode_count(sbi);
844
845 return page;
846
847fail:
848 f2fs_put_page(page, 1);
849 return ERR_PTR(err);
850}
851
852static int read_node_page(struct page *page, int type)
853{
854 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
855 struct node_info ni;
856
857 get_node_info(sbi, page->index, &ni);
858
859 if (ni.blk_addr == NULL_ADDR)
860 return -ENOENT;
861 return f2fs_readpage(sbi, page, ni.blk_addr, type);
862}
863
864/*
865 * Readahead a node page
866 */
867void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
868{
869 struct address_space *mapping = sbi->node_inode->i_mapping;
870 struct page *apage;
871
872 apage = find_get_page(mapping, nid);
873 if (apage && PageUptodate(apage))
874 goto release_out;
875 f2fs_put_page(apage, 0);
876
877 apage = grab_cache_page(mapping, nid);
878 if (!apage)
879 return;
880
881 if (read_node_page(apage, READA))
882 goto unlock_out;
883
884 page_cache_release(apage);
885 return;
886
887unlock_out:
888 unlock_page(apage);
889release_out:
890 page_cache_release(apage);
891}
892
893struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
894{
895 int err;
896 struct page *page;
897 struct address_space *mapping = sbi->node_inode->i_mapping;
898
899 page = grab_cache_page(mapping, nid);
900 if (!page)
901 return ERR_PTR(-ENOMEM);
902
903 err = read_node_page(page, READ_SYNC);
904 if (err) {
905 f2fs_put_page(page, 1);
906 return ERR_PTR(err);
907 }
908
909 BUG_ON(nid != nid_of_node(page));
910 mark_page_accessed(page);
911 return page;
912}
913
914/*
915 * Return a locked page for the desired node page.
916 * And, readahead MAX_RA_NODE number of node pages.
917 */
918struct page *get_node_page_ra(struct page *parent, int start)
919{
920 struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb);
921 struct address_space *mapping = sbi->node_inode->i_mapping;
922 int i, end;
923 int err = 0;
924 nid_t nid;
925 struct page *page;
926
927 /* First, try getting the desired direct node. */
928 nid = get_nid(parent, start, false);
929 if (!nid)
930 return ERR_PTR(-ENOENT);
931
932 page = find_get_page(mapping, nid);
933 if (page && PageUptodate(page))
934 goto page_hit;
935 f2fs_put_page(page, 0);
936
937repeat:
938 page = grab_cache_page(mapping, nid);
939 if (!page)
940 return ERR_PTR(-ENOMEM);
941
942 err = read_node_page(page, READA);
943 if (err) {
944 f2fs_put_page(page, 1);
945 return ERR_PTR(err);
946 }
947
948 /* Then, try readahead for siblings of the desired node */
949 end = start + MAX_RA_NODE;
950 end = min(end, NIDS_PER_BLOCK);
951 for (i = start + 1; i < end; i++) {
952 nid = get_nid(parent, i, false);
953 if (!nid)
954 continue;
955 ra_node_page(sbi, nid);
956 }
957
958page_hit:
959 lock_page(page);
960 if (PageError(page)) {
961 f2fs_put_page(page, 1);
962 return ERR_PTR(-EIO);
963 }
964
965 /* Has the page been truncated? */
966 if (page->mapping != mapping) {
967 f2fs_put_page(page, 1);
968 goto repeat;
969 }
970 return page;
971}
972
973void sync_inode_page(struct dnode_of_data *dn)
974{
975 if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) {
976 update_inode(dn->inode, dn->node_page);
977 } else if (dn->inode_page) {
978 if (!dn->inode_page_locked)
979 lock_page(dn->inode_page);
980 update_inode(dn->inode, dn->inode_page);
981 if (!dn->inode_page_locked)
982 unlock_page(dn->inode_page);
983 } else {
984 f2fs_write_inode(dn->inode, NULL);
985 }
986}
987
988int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
989 struct writeback_control *wbc)
990{
991 struct address_space *mapping = sbi->node_inode->i_mapping;
992 pgoff_t index, end;
993 struct pagevec pvec;
994 int step = ino ? 2 : 0;
995 int nwritten = 0, wrote = 0;
996
997 pagevec_init(&pvec, 0);
998
999next_step:
1000 index = 0;
1001 end = LONG_MAX;
1002
1003 while (index <= end) {
1004 int i, nr_pages;
1005 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1006 PAGECACHE_TAG_DIRTY,
1007 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1008 if (nr_pages == 0)
1009 break;
1010
1011 for (i = 0; i < nr_pages; i++) {
1012 struct page *page = pvec.pages[i];
1013
1014 /*
1015 * flushing sequence with step:
1016 * 0. indirect nodes
1017 * 1. dentry dnodes
1018 * 2. file dnodes
1019 */
1020 if (step == 0 && IS_DNODE(page))
1021 continue;
1022 if (step == 1 && (!IS_DNODE(page) ||
1023 is_cold_node(page)))
1024 continue;
1025 if (step == 2 && (!IS_DNODE(page) ||
1026 !is_cold_node(page)))
1027 continue;
1028
1029 /*
1030 * If an fsync mode,
1031 * we should not skip writing node pages.
1032 */
1033 if (ino && ino_of_node(page) == ino)
1034 lock_page(page);
1035 else if (!trylock_page(page))
1036 continue;
1037
1038 if (unlikely(page->mapping != mapping)) {
1039continue_unlock:
1040 unlock_page(page);
1041 continue;
1042 }
1043 if (ino && ino_of_node(page) != ino)
1044 goto continue_unlock;
1045
1046 if (!PageDirty(page)) {
1047 /* someone wrote it for us */
1048 goto continue_unlock;
1049 }
1050
1051 if (!clear_page_dirty_for_io(page))
1052 goto continue_unlock;
1053
1054 /* called by fsync() */
1055 if (ino && IS_DNODE(page)) {
1056 int mark = !is_checkpointed_node(sbi, ino);
1057 set_fsync_mark(page, 1);
1058 if (IS_INODE(page))
1059 set_dentry_mark(page, mark);
1060 nwritten++;
1061 } else {
1062 set_fsync_mark(page, 0);
1063 set_dentry_mark(page, 0);
1064 }
1065 mapping->a_ops->writepage(page, wbc);
1066 wrote++;
1067
1068 if (--wbc->nr_to_write == 0)
1069 break;
1070 }
1071 pagevec_release(&pvec);
1072 cond_resched();
1073
1074 if (wbc->nr_to_write == 0) {
1075 step = 2;
1076 break;
1077 }
1078 }
1079
1080 if (step < 2) {
1081 step++;
1082 goto next_step;
1083 }
1084
1085 if (wrote)
1086 f2fs_submit_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL);
1087
1088 return nwritten;
1089}
1090
1091static int f2fs_write_node_page(struct page *page,
1092 struct writeback_control *wbc)
1093{
1094 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
1095 nid_t nid;
1096 unsigned int nofs;
1097 block_t new_addr;
1098 struct node_info ni;
1099
1100 if (wbc->for_reclaim) {
1101 dec_page_count(sbi, F2FS_DIRTY_NODES);
1102 wbc->pages_skipped++;
1103 set_page_dirty(page);
1104 return AOP_WRITEPAGE_ACTIVATE;
1105 }
1106
1107 wait_on_page_writeback(page);
1108
1109 mutex_lock_op(sbi, NODE_WRITE);
1110
1111 /* get old block addr of this node page */
1112 nid = nid_of_node(page);
1113 nofs = ofs_of_node(page);
1114 BUG_ON(page->index != nid);
1115
1116 get_node_info(sbi, nid, &ni);
1117
1118 /* This page is already truncated */
1119 if (ni.blk_addr == NULL_ADDR)
1120 return 0;
1121
1122 set_page_writeback(page);
1123
1124 /* insert node offset */
1125 write_node_page(sbi, page, nid, ni.blk_addr, &new_addr);
1126 set_node_addr(sbi, &ni, new_addr);
1127 dec_page_count(sbi, F2FS_DIRTY_NODES);
1128
1129 mutex_unlock_op(sbi, NODE_WRITE);
1130 unlock_page(page);
1131 return 0;
1132}
1133
1134static int f2fs_write_node_pages(struct address_space *mapping,
1135 struct writeback_control *wbc)
1136{
1137 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1138 struct block_device *bdev = sbi->sb->s_bdev;
1139 long nr_to_write = wbc->nr_to_write;
1140
1141 if (wbc->for_kupdate)
1142 return 0;
1143
1144 if (get_pages(sbi, F2FS_DIRTY_NODES) == 0)
1145 return 0;
1146
1147 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) {
1148 write_checkpoint(sbi, false, false);
1149 return 0;
1150 }
1151
1152 /* if mounting is failed, skip writing node pages */
1153 wbc->nr_to_write = bio_get_nr_vecs(bdev);
1154 sync_node_pages(sbi, 0, wbc);
1155 wbc->nr_to_write = nr_to_write -
1156 (bio_get_nr_vecs(bdev) - wbc->nr_to_write);
1157 return 0;
1158}
1159
1160static int f2fs_set_node_page_dirty(struct page *page)
1161{
1162 struct address_space *mapping = page->mapping;
1163 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1164
1165 SetPageUptodate(page);
1166 if (!PageDirty(page)) {
1167 __set_page_dirty_nobuffers(page);
1168 inc_page_count(sbi, F2FS_DIRTY_NODES);
1169 SetPagePrivate(page);
1170 return 1;
1171 }
1172 return 0;
1173}
1174
1175static void f2fs_invalidate_node_page(struct page *page, unsigned long offset)
1176{
1177 struct inode *inode = page->mapping->host;
1178 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1179 if (PageDirty(page))
1180 dec_page_count(sbi, F2FS_DIRTY_NODES);
1181 ClearPagePrivate(page);
1182}
1183
1184static int f2fs_release_node_page(struct page *page, gfp_t wait)
1185{
1186 ClearPagePrivate(page);
1187 return 0;
1188}
1189
1190/*
1191 * Structure of the f2fs node operations
1192 */
1193const struct address_space_operations f2fs_node_aops = {
1194 .writepage = f2fs_write_node_page,
1195 .writepages = f2fs_write_node_pages,
1196 .set_page_dirty = f2fs_set_node_page_dirty,
1197 .invalidatepage = f2fs_invalidate_node_page,
1198 .releasepage = f2fs_release_node_page,
1199};
1200
1201static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head)
1202{
1203 struct list_head *this;
1204 struct free_nid *i = NULL;
1205 list_for_each(this, head) {
1206 i = list_entry(this, struct free_nid, list);
1207 if (i->nid == n)
1208 break;
1209 i = NULL;
1210 }
1211 return i;
1212}
1213
1214static void __del_from_free_nid_list(struct free_nid *i)
1215{
1216 list_del(&i->list);
1217 kmem_cache_free(free_nid_slab, i);
1218}
1219
1220static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
1221{
1222 struct free_nid *i;
1223
1224 if (nm_i->fcnt > 2 * MAX_FREE_NIDS)
1225 return 0;
1226retry:
1227 i = kmem_cache_alloc(free_nid_slab, GFP_NOFS);
1228 if (!i) {
1229 cond_resched();
1230 goto retry;
1231 }
1232 i->nid = nid;
1233 i->state = NID_NEW;
1234
1235 spin_lock(&nm_i->free_nid_list_lock);
1236 if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) {
1237 spin_unlock(&nm_i->free_nid_list_lock);
1238 kmem_cache_free(free_nid_slab, i);
1239 return 0;
1240 }
1241 list_add_tail(&i->list, &nm_i->free_nid_list);
1242 nm_i->fcnt++;
1243 spin_unlock(&nm_i->free_nid_list_lock);
1244 return 1;
1245}
1246
1247static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
1248{
1249 struct free_nid *i;
1250 spin_lock(&nm_i->free_nid_list_lock);
1251 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
1252 if (i && i->state == NID_NEW) {
1253 __del_from_free_nid_list(i);
1254 nm_i->fcnt--;
1255 }
1256 spin_unlock(&nm_i->free_nid_list_lock);
1257}
1258
1259static int scan_nat_page(struct f2fs_nm_info *nm_i,
1260 struct page *nat_page, nid_t start_nid)
1261{
1262 struct f2fs_nat_block *nat_blk = page_address(nat_page);
1263 block_t blk_addr;
1264 int fcnt = 0;
1265 int i;
1266
1267 /* 0 nid should not be used */
1268 if (start_nid == 0)
1269 ++start_nid;
1270
1271 i = start_nid % NAT_ENTRY_PER_BLOCK;
1272
1273 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
1274 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
1275 BUG_ON(blk_addr == NEW_ADDR);
1276 if (blk_addr == NULL_ADDR)
1277 fcnt += add_free_nid(nm_i, start_nid);
1278 }
1279 return fcnt;
1280}
1281
1282static void build_free_nids(struct f2fs_sb_info *sbi)
1283{
1284 struct free_nid *fnid, *next_fnid;
1285 struct f2fs_nm_info *nm_i = NM_I(sbi);
1286 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1287 struct f2fs_summary_block *sum = curseg->sum_blk;
1288 nid_t nid = 0;
1289 bool is_cycled = false;
1290 int fcnt = 0;
1291 int i;
1292
1293 nid = nm_i->next_scan_nid;
1294 nm_i->init_scan_nid = nid;
1295
1296 ra_nat_pages(sbi, nid);
1297
1298 while (1) {
1299 struct page *page = get_current_nat_page(sbi, nid);
1300
1301 fcnt += scan_nat_page(nm_i, page, nid);
1302 f2fs_put_page(page, 1);
1303
1304 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
1305
1306 if (nid >= nm_i->max_nid) {
1307 nid = 0;
1308 is_cycled = true;
1309 }
1310 if (fcnt > MAX_FREE_NIDS)
1311 break;
1312 if (is_cycled && nm_i->init_scan_nid <= nid)
1313 break;
1314 }
1315
1316 nm_i->next_scan_nid = nid;
1317
1318 /* find free nids from current sum_pages */
1319 mutex_lock(&curseg->curseg_mutex);
1320 for (i = 0; i < nats_in_cursum(sum); i++) {
1321 block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
1322 nid = le32_to_cpu(nid_in_journal(sum, i));
1323 if (addr == NULL_ADDR)
1324 add_free_nid(nm_i, nid);
1325 else
1326 remove_free_nid(nm_i, nid);
1327 }
1328 mutex_unlock(&curseg->curseg_mutex);
1329
1330 /* remove the free nids from current allocated nids */
1331 list_for_each_entry_safe(fnid, next_fnid, &nm_i->free_nid_list, list) {
1332 struct nat_entry *ne;
1333
1334 read_lock(&nm_i->nat_tree_lock);
1335 ne = __lookup_nat_cache(nm_i, fnid->nid);
1336 if (ne && nat_get_blkaddr(ne) != NULL_ADDR)
1337 remove_free_nid(nm_i, fnid->nid);
1338 read_unlock(&nm_i->nat_tree_lock);
1339 }
1340}
1341
1342/*
1343 * If this function returns success, caller can obtain a new nid
1344 * from second parameter of this function.
1345 * The returned nid could be used ino as well as nid when inode is created.
1346 */
1347bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
1348{
1349 struct f2fs_nm_info *nm_i = NM_I(sbi);
1350 struct free_nid *i = NULL;
1351 struct list_head *this;
1352retry:
1353 mutex_lock(&nm_i->build_lock);
1354 if (!nm_i->fcnt) {
1355 /* scan NAT in order to build free nid list */
1356 build_free_nids(sbi);
1357 if (!nm_i->fcnt) {
1358 mutex_unlock(&nm_i->build_lock);
1359 return false;
1360 }
1361 }
1362 mutex_unlock(&nm_i->build_lock);
1363
1364 /*
1365 * We check fcnt again since previous check is racy as
1366 * we didn't hold free_nid_list_lock. So other thread
1367 * could consume all of free nids.
1368 */
1369 spin_lock(&nm_i->free_nid_list_lock);
1370 if (!nm_i->fcnt) {
1371 spin_unlock(&nm_i->free_nid_list_lock);
1372 goto retry;
1373 }
1374
1375 BUG_ON(list_empty(&nm_i->free_nid_list));
1376 list_for_each(this, &nm_i->free_nid_list) {
1377 i = list_entry(this, struct free_nid, list);
1378 if (i->state == NID_NEW)
1379 break;
1380 }
1381
1382 BUG_ON(i->state != NID_NEW);
1383 *nid = i->nid;
1384 i->state = NID_ALLOC;
1385 nm_i->fcnt--;
1386 spin_unlock(&nm_i->free_nid_list_lock);
1387 return true;
1388}
1389
1390/*
1391 * alloc_nid() should be called prior to this function.
1392 */
1393void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
1394{
1395 struct f2fs_nm_info *nm_i = NM_I(sbi);
1396 struct free_nid *i;
1397
1398 spin_lock(&nm_i->free_nid_list_lock);
1399 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
1400 if (i) {
1401 BUG_ON(i->state != NID_ALLOC);
1402 __del_from_free_nid_list(i);
1403 }
1404 spin_unlock(&nm_i->free_nid_list_lock);
1405}
1406
1407/*
1408 * alloc_nid() should be called prior to this function.
1409 */
1410void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1411{
1412 alloc_nid_done(sbi, nid);
1413 add_free_nid(NM_I(sbi), nid);
1414}
1415
1416void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
1417 struct f2fs_summary *sum, struct node_info *ni,
1418 block_t new_blkaddr)
1419{
1420 rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr);
1421 set_node_addr(sbi, ni, new_blkaddr);
1422 clear_node_page_dirty(page);
1423}
1424
1425int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1426{
1427 struct address_space *mapping = sbi->node_inode->i_mapping;
1428 struct f2fs_node *src, *dst;
1429 nid_t ino = ino_of_node(page);
1430 struct node_info old_ni, new_ni;
1431 struct page *ipage;
1432
1433 ipage = grab_cache_page(mapping, ino);
1434 if (!ipage)
1435 return -ENOMEM;
1436
1437 /* Should not use this inode from free nid list */
1438 remove_free_nid(NM_I(sbi), ino);
1439
1440 get_node_info(sbi, ino, &old_ni);
1441 SetPageUptodate(ipage);
1442 fill_node_footer(ipage, ino, ino, 0, true);
1443
1444 src = (struct f2fs_node *)page_address(page);
1445 dst = (struct f2fs_node *)page_address(ipage);
1446
1447 memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i);
1448 dst->i.i_size = 0;
1449 dst->i.i_blocks = cpu_to_le64(1);
1450 dst->i.i_links = cpu_to_le32(1);
1451 dst->i.i_xattr_nid = 0;
1452
1453 new_ni = old_ni;
1454 new_ni.ino = ino;
1455
1456 set_node_addr(sbi, &new_ni, NEW_ADDR);
1457 inc_valid_inode_count(sbi);
1458
1459 f2fs_put_page(ipage, 1);
1460 return 0;
1461}
1462
1463int restore_node_summary(struct f2fs_sb_info *sbi,
1464 unsigned int segno, struct f2fs_summary_block *sum)
1465{
1466 struct f2fs_node *rn;
1467 struct f2fs_summary *sum_entry;
1468 struct page *page;
1469 block_t addr;
1470 int i, last_offset;
1471
1472 /* alloc temporal page for read node */
1473 page = alloc_page(GFP_NOFS | __GFP_ZERO);
1474 if (IS_ERR(page))
1475 return PTR_ERR(page);
1476 lock_page(page);
1477
1478 /* scan the node segment */
1479 last_offset = sbi->blocks_per_seg;
1480 addr = START_BLOCK(sbi, segno);
1481 sum_entry = &sum->entries[0];
1482
1483 for (i = 0; i < last_offset; i++, sum_entry++) {
1484 if (f2fs_readpage(sbi, page, addr, READ_SYNC))
1485 goto out;
1486
1487 rn = (struct f2fs_node *)page_address(page);
1488 sum_entry->nid = rn->footer.nid;
1489 sum_entry->version = 0;
1490 sum_entry->ofs_in_node = 0;
1491 addr++;
1492
1493 /*
1494 * In order to read next node page,
1495 * we must clear PageUptodate flag.
1496 */
1497 ClearPageUptodate(page);
1498 }
1499out:
1500 unlock_page(page);
1501 __free_pages(page, 0);
1502 return 0;
1503}
1504
1505static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
1506{
1507 struct f2fs_nm_info *nm_i = NM_I(sbi);
1508 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1509 struct f2fs_summary_block *sum = curseg->sum_blk;
1510 int i;
1511
1512 mutex_lock(&curseg->curseg_mutex);
1513
1514 if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) {
1515 mutex_unlock(&curseg->curseg_mutex);
1516 return false;
1517 }
1518
1519 for (i = 0; i < nats_in_cursum(sum); i++) {
1520 struct nat_entry *ne;
1521 struct f2fs_nat_entry raw_ne;
1522 nid_t nid = le32_to_cpu(nid_in_journal(sum, i));
1523
1524 raw_ne = nat_in_journal(sum, i);
1525retry:
1526 write_lock(&nm_i->nat_tree_lock);
1527 ne = __lookup_nat_cache(nm_i, nid);
1528 if (ne) {
1529 __set_nat_cache_dirty(nm_i, ne);
1530 write_unlock(&nm_i->nat_tree_lock);
1531 continue;
1532 }
1533 ne = grab_nat_entry(nm_i, nid);
1534 if (!ne) {
1535 write_unlock(&nm_i->nat_tree_lock);
1536 goto retry;
1537 }
1538 nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr));
1539 nat_set_ino(ne, le32_to_cpu(raw_ne.ino));
1540 nat_set_version(ne, raw_ne.version);
1541 __set_nat_cache_dirty(nm_i, ne);
1542 write_unlock(&nm_i->nat_tree_lock);
1543 }
1544 update_nats_in_cursum(sum, -i);
1545 mutex_unlock(&curseg->curseg_mutex);
1546 return true;
1547}
1548
1549/*
1550 * This function is called during the checkpointing process.
1551 */
1552void flush_nat_entries(struct f2fs_sb_info *sbi)
1553{
1554 struct f2fs_nm_info *nm_i = NM_I(sbi);
1555 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1556 struct f2fs_summary_block *sum = curseg->sum_blk;
1557 struct list_head *cur, *n;
1558 struct page *page = NULL;
1559 struct f2fs_nat_block *nat_blk = NULL;
1560 nid_t start_nid = 0, end_nid = 0;
1561 bool flushed;
1562
1563 flushed = flush_nats_in_journal(sbi);
1564
1565 if (!flushed)
1566 mutex_lock(&curseg->curseg_mutex);
1567
1568 /* 1) flush dirty nat caches */
1569 list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) {
1570 struct nat_entry *ne;
1571 nid_t nid;
1572 struct f2fs_nat_entry raw_ne;
1573 int offset = -1;
1574 block_t old_blkaddr, new_blkaddr;
1575
1576 ne = list_entry(cur, struct nat_entry, list);
1577 nid = nat_get_nid(ne);
1578
1579 if (nat_get_blkaddr(ne) == NEW_ADDR)
1580 continue;
1581 if (flushed)
1582 goto to_nat_page;
1583
1584 /* if there is room for nat enries in curseg->sumpage */
1585 offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1);
1586 if (offset >= 0) {
1587 raw_ne = nat_in_journal(sum, offset);
1588 old_blkaddr = le32_to_cpu(raw_ne.block_addr);
1589 goto flush_now;
1590 }
1591to_nat_page:
1592 if (!page || (start_nid > nid || nid > end_nid)) {
1593 if (page) {
1594 f2fs_put_page(page, 1);
1595 page = NULL;
1596 }
1597 start_nid = START_NID(nid);
1598 end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1;
1599
1600 /*
1601 * get nat block with dirty flag, increased reference
1602 * count, mapped and lock
1603 */
1604 page = get_next_nat_page(sbi, start_nid);
1605 nat_blk = page_address(page);
1606 }
1607
1608 BUG_ON(!nat_blk);
1609 raw_ne = nat_blk->entries[nid - start_nid];
1610 old_blkaddr = le32_to_cpu(raw_ne.block_addr);
1611flush_now:
1612 new_blkaddr = nat_get_blkaddr(ne);
1613
1614 raw_ne.ino = cpu_to_le32(nat_get_ino(ne));
1615 raw_ne.block_addr = cpu_to_le32(new_blkaddr);
1616 raw_ne.version = nat_get_version(ne);
1617
1618 if (offset < 0) {
1619 nat_blk->entries[nid - start_nid] = raw_ne;
1620 } else {
1621 nat_in_journal(sum, offset) = raw_ne;
1622 nid_in_journal(sum, offset) = cpu_to_le32(nid);
1623 }
1624
1625 if (nat_get_blkaddr(ne) == NULL_ADDR) {
1626 write_lock(&nm_i->nat_tree_lock);
1627 __del_from_nat_cache(nm_i, ne);
1628 write_unlock(&nm_i->nat_tree_lock);
1629
1630 /* We can reuse this freed nid at this point */
1631 add_free_nid(NM_I(sbi), nid);
1632 } else {
1633 write_lock(&nm_i->nat_tree_lock);
1634 __clear_nat_cache_dirty(nm_i, ne);
1635 ne->checkpointed = true;
1636 write_unlock(&nm_i->nat_tree_lock);
1637 }
1638 }
1639 if (!flushed)
1640 mutex_unlock(&curseg->curseg_mutex);
1641 f2fs_put_page(page, 1);
1642
1643 /* 2) shrink nat caches if necessary */
1644 try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD);
1645}
1646
1647static int init_node_manager(struct f2fs_sb_info *sbi)
1648{
1649 struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
1650 struct f2fs_nm_info *nm_i = NM_I(sbi);
1651 unsigned char *version_bitmap;
1652 unsigned int nat_segs, nat_blocks;
1653
1654 nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
1655
1656 /* segment_count_nat includes pair segment so divide to 2. */
1657 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
1658 nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
1659 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
1660 nm_i->fcnt = 0;
1661 nm_i->nat_cnt = 0;
1662
1663 INIT_LIST_HEAD(&nm_i->free_nid_list);
1664 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
1665 INIT_LIST_HEAD(&nm_i->nat_entries);
1666 INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
1667
1668 mutex_init(&nm_i->build_lock);
1669 spin_lock_init(&nm_i->free_nid_list_lock);
1670 rwlock_init(&nm_i->nat_tree_lock);
1671
1672 nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
1673 nm_i->init_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
1674 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
1675
1676 nm_i->nat_bitmap = kzalloc(nm_i->bitmap_size, GFP_KERNEL);
1677 if (!nm_i->nat_bitmap)
1678 return -ENOMEM;
1679 version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
1680 if (!version_bitmap)
1681 return -EFAULT;
1682
1683 /* copy version bitmap */
1684 memcpy(nm_i->nat_bitmap, version_bitmap, nm_i->bitmap_size);
1685 return 0;
1686}
1687
1688int build_node_manager(struct f2fs_sb_info *sbi)
1689{
1690 int err;
1691
1692 sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL);
1693 if (!sbi->nm_info)
1694 return -ENOMEM;
1695
1696 err = init_node_manager(sbi);
1697 if (err)
1698 return err;
1699
1700 build_free_nids(sbi);
1701 return 0;
1702}
1703
1704void destroy_node_manager(struct f2fs_sb_info *sbi)
1705{
1706 struct f2fs_nm_info *nm_i = NM_I(sbi);
1707 struct free_nid *i, *next_i;
1708 struct nat_entry *natvec[NATVEC_SIZE];
1709 nid_t nid = 0;
1710 unsigned int found;
1711
1712 if (!nm_i)
1713 return;
1714
1715 /* destroy free nid list */
1716 spin_lock(&nm_i->free_nid_list_lock);
1717 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
1718 BUG_ON(i->state == NID_ALLOC);
1719 __del_from_free_nid_list(i);
1720 nm_i->fcnt--;
1721 }
1722 BUG_ON(nm_i->fcnt);
1723 spin_unlock(&nm_i->free_nid_list_lock);
1724
1725 /* destroy nat cache */
1726 write_lock(&nm_i->nat_tree_lock);
1727 while ((found = __gang_lookup_nat_cache(nm_i,
1728 nid, NATVEC_SIZE, natvec))) {
1729 unsigned idx;
1730 for (idx = 0; idx < found; idx++) {
1731 struct nat_entry *e = natvec[idx];
1732 nid = nat_get_nid(e) + 1;
1733 __del_from_nat_cache(nm_i, e);
1734 }
1735 }
1736 BUG_ON(nm_i->nat_cnt);
1737 write_unlock(&nm_i->nat_tree_lock);
1738
1739 kfree(nm_i->nat_bitmap);
1740 sbi->nm_info = NULL;
1741 kfree(nm_i);
1742}
1743
1744int create_node_manager_caches(void)
1745{
1746 nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
1747 sizeof(struct nat_entry), NULL);
1748 if (!nat_entry_slab)
1749 return -ENOMEM;
1750
1751 free_nid_slab = f2fs_kmem_cache_create("free_nid",
1752 sizeof(struct free_nid), NULL);
1753 if (!free_nid_slab) {
1754 kmem_cache_destroy(nat_entry_slab);
1755 return -ENOMEM;
1756 }
1757 return 0;
1758}
1759
1760void destroy_node_manager_caches(void)
1761{
1762 kmem_cache_destroy(free_nid_slab);
1763 kmem_cache_destroy(nat_entry_slab);
1764}
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
new file mode 100644
index 000000000000..afdb130f782e
--- /dev/null
+++ b/fs/f2fs/node.h
@@ -0,0 +1,353 @@
1/*
2 * fs/f2fs/node.h
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11/* start node id of a node block dedicated to the given node id */
12#define START_NID(nid) ((nid / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK)
13
14/* node block offset on the NAT area dedicated to the given start node id */
15#define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK)
16
17/* # of pages to perform readahead before building free nids */
18#define FREE_NID_PAGES 4
19
20/* maximum # of free node ids to produce during build_free_nids */
21#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES)
22
23/* maximum readahead size for node during getting data blocks */
24#define MAX_RA_NODE 128
25
26/* maximum cached nat entries to manage memory footprint */
27#define NM_WOUT_THRESHOLD (64 * NAT_ENTRY_PER_BLOCK)
28
29/* vector size for gang look-up from nat cache that consists of radix tree */
30#define NATVEC_SIZE 64
31
32/*
33 * For node information
34 */
35struct node_info {
36 nid_t nid; /* node id */
37 nid_t ino; /* inode number of the node's owner */
38 block_t blk_addr; /* block address of the node */
39 unsigned char version; /* version of the node */
40};
41
42struct nat_entry {
43 struct list_head list; /* for clean or dirty nat list */
44 bool checkpointed; /* whether it is checkpointed or not */
45 struct node_info ni; /* in-memory node information */
46};
47
48#define nat_get_nid(nat) (nat->ni.nid)
49#define nat_set_nid(nat, n) (nat->ni.nid = n)
50#define nat_get_blkaddr(nat) (nat->ni.blk_addr)
51#define nat_set_blkaddr(nat, b) (nat->ni.blk_addr = b)
52#define nat_get_ino(nat) (nat->ni.ino)
53#define nat_set_ino(nat, i) (nat->ni.ino = i)
54#define nat_get_version(nat) (nat->ni.version)
55#define nat_set_version(nat, v) (nat->ni.version = v)
56
57#define __set_nat_cache_dirty(nm_i, ne) \
58 list_move_tail(&ne->list, &nm_i->dirty_nat_entries);
59#define __clear_nat_cache_dirty(nm_i, ne) \
60 list_move_tail(&ne->list, &nm_i->nat_entries);
61#define inc_node_version(version) (++version)
62
63static inline void node_info_from_raw_nat(struct node_info *ni,
64 struct f2fs_nat_entry *raw_ne)
65{
66 ni->ino = le32_to_cpu(raw_ne->ino);
67 ni->blk_addr = le32_to_cpu(raw_ne->block_addr);
68 ni->version = raw_ne->version;
69}
70
71/*
72 * For free nid mangement
73 */
74enum nid_state {
75 NID_NEW, /* newly added to free nid list */
76 NID_ALLOC /* it is allocated */
77};
78
79struct free_nid {
80 struct list_head list; /* for free node id list */
81 nid_t nid; /* node id */
82 int state; /* in use or not: NID_NEW or NID_ALLOC */
83};
84
85static inline int next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
86{
87 struct f2fs_nm_info *nm_i = NM_I(sbi);
88 struct free_nid *fnid;
89
90 if (nm_i->fcnt <= 0)
91 return -1;
92 spin_lock(&nm_i->free_nid_list_lock);
93 fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list);
94 *nid = fnid->nid;
95 spin_unlock(&nm_i->free_nid_list_lock);
96 return 0;
97}
98
99/*
100 * inline functions
101 */
102static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr)
103{
104 struct f2fs_nm_info *nm_i = NM_I(sbi);
105 memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size);
106}
107
108static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start)
109{
110 struct f2fs_nm_info *nm_i = NM_I(sbi);
111 pgoff_t block_off;
112 pgoff_t block_addr;
113 int seg_off;
114
115 block_off = NAT_BLOCK_OFFSET(start);
116 seg_off = block_off >> sbi->log_blocks_per_seg;
117
118 block_addr = (pgoff_t)(nm_i->nat_blkaddr +
119 (seg_off << sbi->log_blocks_per_seg << 1) +
120 (block_off & ((1 << sbi->log_blocks_per_seg) - 1)));
121
122 if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
123 block_addr += sbi->blocks_per_seg;
124
125 return block_addr;
126}
127
128static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi,
129 pgoff_t block_addr)
130{
131 struct f2fs_nm_info *nm_i = NM_I(sbi);
132
133 block_addr -= nm_i->nat_blkaddr;
134 if ((block_addr >> sbi->log_blocks_per_seg) % 2)
135 block_addr -= sbi->blocks_per_seg;
136 else
137 block_addr += sbi->blocks_per_seg;
138
139 return block_addr + nm_i->nat_blkaddr;
140}
141
142static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
143{
144 unsigned int block_off = NAT_BLOCK_OFFSET(start_nid);
145
146 if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
147 f2fs_clear_bit(block_off, nm_i->nat_bitmap);
148 else
149 f2fs_set_bit(block_off, nm_i->nat_bitmap);
150}
151
152static inline void fill_node_footer(struct page *page, nid_t nid,
153 nid_t ino, unsigned int ofs, bool reset)
154{
155 void *kaddr = page_address(page);
156 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
157 if (reset)
158 memset(rn, 0, sizeof(*rn));
159 rn->footer.nid = cpu_to_le32(nid);
160 rn->footer.ino = cpu_to_le32(ino);
161 rn->footer.flag = cpu_to_le32(ofs << OFFSET_BIT_SHIFT);
162}
163
164static inline void copy_node_footer(struct page *dst, struct page *src)
165{
166 void *src_addr = page_address(src);
167 void *dst_addr = page_address(dst);
168 struct f2fs_node *src_rn = (struct f2fs_node *)src_addr;
169 struct f2fs_node *dst_rn = (struct f2fs_node *)dst_addr;
170 memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer));
171}
172
173static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
174{
175 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
176 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
177 void *kaddr = page_address(page);
178 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
179 rn->footer.cp_ver = ckpt->checkpoint_ver;
180 rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
181}
182
183static inline nid_t ino_of_node(struct page *node_page)
184{
185 void *kaddr = page_address(node_page);
186 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
187 return le32_to_cpu(rn->footer.ino);
188}
189
190static inline nid_t nid_of_node(struct page *node_page)
191{
192 void *kaddr = page_address(node_page);
193 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
194 return le32_to_cpu(rn->footer.nid);
195}
196
197static inline unsigned int ofs_of_node(struct page *node_page)
198{
199 void *kaddr = page_address(node_page);
200 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
201 unsigned flag = le32_to_cpu(rn->footer.flag);
202 return flag >> OFFSET_BIT_SHIFT;
203}
204
205static inline unsigned long long cpver_of_node(struct page *node_page)
206{
207 void *kaddr = page_address(node_page);
208 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
209 return le64_to_cpu(rn->footer.cp_ver);
210}
211
212static inline block_t next_blkaddr_of_node(struct page *node_page)
213{
214 void *kaddr = page_address(node_page);
215 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
216 return le32_to_cpu(rn->footer.next_blkaddr);
217}
218
219/*
220 * f2fs assigns the following node offsets described as (num).
221 * N = NIDS_PER_BLOCK
222 *
223 * Inode block (0)
224 * |- direct node (1)
225 * |- direct node (2)
226 * |- indirect node (3)
227 * | `- direct node (4 => 4 + N - 1)
228 * |- indirect node (4 + N)
229 * | `- direct node (5 + N => 5 + 2N - 1)
230 * `- double indirect node (5 + 2N)
231 * `- indirect node (6 + 2N)
232 * `- direct node (x(N + 1))
233 */
234static inline bool IS_DNODE(struct page *node_page)
235{
236 unsigned int ofs = ofs_of_node(node_page);
237 if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
238 ofs == 5 + 2 * NIDS_PER_BLOCK)
239 return false;
240 if (ofs >= 6 + 2 * NIDS_PER_BLOCK) {
241 ofs -= 6 + 2 * NIDS_PER_BLOCK;
242 if ((long int)ofs % (NIDS_PER_BLOCK + 1))
243 return false;
244 }
245 return true;
246}
247
248static inline void set_nid(struct page *p, int off, nid_t nid, bool i)
249{
250 struct f2fs_node *rn = (struct f2fs_node *)page_address(p);
251
252 wait_on_page_writeback(p);
253
254 if (i)
255 rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
256 else
257 rn->in.nid[off] = cpu_to_le32(nid);
258 set_page_dirty(p);
259}
260
261static inline nid_t get_nid(struct page *p, int off, bool i)
262{
263 struct f2fs_node *rn = (struct f2fs_node *)page_address(p);
264 if (i)
265 return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]);
266 return le32_to_cpu(rn->in.nid[off]);
267}
268
269/*
270 * Coldness identification:
271 * - Mark cold files in f2fs_inode_info
272 * - Mark cold node blocks in their node footer
273 * - Mark cold data pages in page cache
274 */
275static inline int is_cold_file(struct inode *inode)
276{
277 return F2FS_I(inode)->i_advise & FADVISE_COLD_BIT;
278}
279
280static inline int is_cold_data(struct page *page)
281{
282 return PageChecked(page);
283}
284
285static inline void set_cold_data(struct page *page)
286{
287 SetPageChecked(page);
288}
289
290static inline void clear_cold_data(struct page *page)
291{
292 ClearPageChecked(page);
293}
294
295static inline int is_cold_node(struct page *page)
296{
297 void *kaddr = page_address(page);
298 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
299 unsigned int flag = le32_to_cpu(rn->footer.flag);
300 return flag & (0x1 << COLD_BIT_SHIFT);
301}
302
303static inline unsigned char is_fsync_dnode(struct page *page)
304{
305 void *kaddr = page_address(page);
306 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
307 unsigned int flag = le32_to_cpu(rn->footer.flag);
308 return flag & (0x1 << FSYNC_BIT_SHIFT);
309}
310
311static inline unsigned char is_dent_dnode(struct page *page)
312{
313 void *kaddr = page_address(page);
314 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
315 unsigned int flag = le32_to_cpu(rn->footer.flag);
316 return flag & (0x1 << DENT_BIT_SHIFT);
317}
318
319static inline void set_cold_node(struct inode *inode, struct page *page)
320{
321 struct f2fs_node *rn = (struct f2fs_node *)page_address(page);
322 unsigned int flag = le32_to_cpu(rn->footer.flag);
323
324 if (S_ISDIR(inode->i_mode))
325 flag &= ~(0x1 << COLD_BIT_SHIFT);
326 else
327 flag |= (0x1 << COLD_BIT_SHIFT);
328 rn->footer.flag = cpu_to_le32(flag);
329}
330
331static inline void set_fsync_mark(struct page *page, int mark)
332{
333 void *kaddr = page_address(page);
334 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
335 unsigned int flag = le32_to_cpu(rn->footer.flag);
336 if (mark)
337 flag |= (0x1 << FSYNC_BIT_SHIFT);
338 else
339 flag &= ~(0x1 << FSYNC_BIT_SHIFT);
340 rn->footer.flag = cpu_to_le32(flag);
341}
342
343static inline void set_dentry_mark(struct page *page, int mark)
344{
345 void *kaddr = page_address(page);
346 struct f2fs_node *rn = (struct f2fs_node *)kaddr;
347 unsigned int flag = le32_to_cpu(rn->footer.flag);
348 if (mark)
349 flag |= (0x1 << DENT_BIT_SHIFT);
350 else
351 flag &= ~(0x1 << DENT_BIT_SHIFT);
352 rn->footer.flag = cpu_to_le32(flag);
353}
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
new file mode 100644
index 000000000000..b07e9b6ef376
--- /dev/null
+++ b/fs/f2fs/recovery.c
@@ -0,0 +1,375 @@
1/*
2 * fs/f2fs/recovery.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include "f2fs.h"
14#include "node.h"
15#include "segment.h"
16
17static struct kmem_cache *fsync_entry_slab;
18
19bool space_for_roll_forward(struct f2fs_sb_info *sbi)
20{
21 if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
22 > sbi->user_block_count)
23 return false;
24 return true;
25}
26
27static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
28 nid_t ino)
29{
30 struct list_head *this;
31 struct fsync_inode_entry *entry;
32
33 list_for_each(this, head) {
34 entry = list_entry(this, struct fsync_inode_entry, list);
35 if (entry->inode->i_ino == ino)
36 return entry;
37 }
38 return NULL;
39}
40
41static int recover_dentry(struct page *ipage, struct inode *inode)
42{
43 struct f2fs_node *raw_node = (struct f2fs_node *)kmap(ipage);
44 struct f2fs_inode *raw_inode = &(raw_node->i);
45 struct dentry dent, parent;
46 struct f2fs_dir_entry *de;
47 struct page *page;
48 struct inode *dir;
49 int err = 0;
50
51 if (!is_dent_dnode(ipage))
52 goto out;
53
54 dir = f2fs_iget(inode->i_sb, le32_to_cpu(raw_inode->i_pino));
55 if (IS_ERR(dir)) {
56 err = -EINVAL;
57 goto out;
58 }
59
60 parent.d_inode = dir;
61 dent.d_parent = &parent;
62 dent.d_name.len = le32_to_cpu(raw_inode->i_namelen);
63 dent.d_name.name = raw_inode->i_name;
64
65 de = f2fs_find_entry(dir, &dent.d_name, &page);
66 if (de) {
67 kunmap(page);
68 f2fs_put_page(page, 0);
69 } else {
70 f2fs_add_link(&dent, inode);
71 }
72 iput(dir);
73out:
74 kunmap(ipage);
75 return err;
76}
77
78static int recover_inode(struct inode *inode, struct page *node_page)
79{
80 void *kaddr = page_address(node_page);
81 struct f2fs_node *raw_node = (struct f2fs_node *)kaddr;
82 struct f2fs_inode *raw_inode = &(raw_node->i);
83
84 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
85 i_size_write(inode, le64_to_cpu(raw_inode->i_size));
86 inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
87 inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
88 inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
89 inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
90 inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
91 inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
92
93 return recover_dentry(node_page, inode);
94}
95
96static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
97{
98 unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver);
99 struct curseg_info *curseg;
100 struct page *page;
101 block_t blkaddr;
102 int err = 0;
103
104 /* get node pages in the current segment */
105 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
106 blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff;
107
108 /* read node page */
109 page = alloc_page(GFP_F2FS_ZERO);
110 if (IS_ERR(page))
111 return PTR_ERR(page);
112 lock_page(page);
113
114 while (1) {
115 struct fsync_inode_entry *entry;
116
117 if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC))
118 goto out;
119
120 if (cp_ver != cpver_of_node(page))
121 goto out;
122
123 if (!is_fsync_dnode(page))
124 goto next;
125
126 entry = get_fsync_inode(head, ino_of_node(page));
127 if (entry) {
128 entry->blkaddr = blkaddr;
129 if (IS_INODE(page) && is_dent_dnode(page))
130 set_inode_flag(F2FS_I(entry->inode),
131 FI_INC_LINK);
132 } else {
133 if (IS_INODE(page) && is_dent_dnode(page)) {
134 if (recover_inode_page(sbi, page)) {
135 err = -ENOMEM;
136 goto out;
137 }
138 }
139
140 /* add this fsync inode to the list */
141 entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS);
142 if (!entry) {
143 err = -ENOMEM;
144 goto out;
145 }
146
147 INIT_LIST_HEAD(&entry->list);
148 list_add_tail(&entry->list, head);
149
150 entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
151 if (IS_ERR(entry->inode)) {
152 err = PTR_ERR(entry->inode);
153 goto out;
154 }
155 entry->blkaddr = blkaddr;
156 }
157 if (IS_INODE(page)) {
158 err = recover_inode(entry->inode, page);
159 if (err)
160 goto out;
161 }
162next:
163 /* check next segment */
164 blkaddr = next_blkaddr_of_node(page);
165 ClearPageUptodate(page);
166 }
167out:
168 unlock_page(page);
169 __free_pages(page, 0);
170 return err;
171}
172
173static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi,
174 struct list_head *head)
175{
176 struct list_head *this;
177 struct fsync_inode_entry *entry;
178 list_for_each(this, head) {
179 entry = list_entry(this, struct fsync_inode_entry, list);
180 iput(entry->inode);
181 list_del(&entry->list);
182 kmem_cache_free(fsync_entry_slab, entry);
183 }
184}
185
186static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
187 block_t blkaddr)
188{
189 struct seg_entry *sentry;
190 unsigned int segno = GET_SEGNO(sbi, blkaddr);
191 unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) &
192 (sbi->blocks_per_seg - 1);
193 struct f2fs_summary sum;
194 nid_t ino;
195 void *kaddr;
196 struct inode *inode;
197 struct page *node_page;
198 block_t bidx;
199 int i;
200
201 sentry = get_seg_entry(sbi, segno);
202 if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
203 return;
204
205 /* Get the previous summary */
206 for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
207 struct curseg_info *curseg = CURSEG_I(sbi, i);
208 if (curseg->segno == segno) {
209 sum = curseg->sum_blk->entries[blkoff];
210 break;
211 }
212 }
213 if (i > CURSEG_COLD_DATA) {
214 struct page *sum_page = get_sum_page(sbi, segno);
215 struct f2fs_summary_block *sum_node;
216 kaddr = page_address(sum_page);
217 sum_node = (struct f2fs_summary_block *)kaddr;
218 sum = sum_node->entries[blkoff];
219 f2fs_put_page(sum_page, 1);
220 }
221
222 /* Get the node page */
223 node_page = get_node_page(sbi, le32_to_cpu(sum.nid));
224 bidx = start_bidx_of_node(ofs_of_node(node_page)) +
225 le16_to_cpu(sum.ofs_in_node);
226 ino = ino_of_node(node_page);
227 f2fs_put_page(node_page, 1);
228
229 /* Deallocate previous index in the node page */
230 inode = f2fs_iget_nowait(sbi->sb, ino);
231 truncate_hole(inode, bidx, bidx + 1);
232 iput(inode);
233}
234
235static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
236 struct page *page, block_t blkaddr)
237{
238 unsigned int start, end;
239 struct dnode_of_data dn;
240 struct f2fs_summary sum;
241 struct node_info ni;
242
243 start = start_bidx_of_node(ofs_of_node(page));
244 if (IS_INODE(page))
245 end = start + ADDRS_PER_INODE;
246 else
247 end = start + ADDRS_PER_BLOCK;
248
249 set_new_dnode(&dn, inode, NULL, NULL, 0);
250 if (get_dnode_of_data(&dn, start, 0))
251 return;
252
253 wait_on_page_writeback(dn.node_page);
254
255 get_node_info(sbi, dn.nid, &ni);
256 BUG_ON(ni.ino != ino_of_node(page));
257 BUG_ON(ofs_of_node(dn.node_page) != ofs_of_node(page));
258
259 for (; start < end; start++) {
260 block_t src, dest;
261
262 src = datablock_addr(dn.node_page, dn.ofs_in_node);
263 dest = datablock_addr(page, dn.ofs_in_node);
264
265 if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) {
266 if (src == NULL_ADDR) {
267 int err = reserve_new_block(&dn);
268 /* We should not get -ENOSPC */
269 BUG_ON(err);
270 }
271
272 /* Check the previous node page having this index */
273 check_index_in_prev_nodes(sbi, dest);
274
275 set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
276
277 /* write dummy data page */
278 recover_data_page(sbi, NULL, &sum, src, dest);
279 update_extent_cache(dest, &dn);
280 }
281 dn.ofs_in_node++;
282 }
283
284 /* write node page in place */
285 set_summary(&sum, dn.nid, 0, 0);
286 if (IS_INODE(dn.node_page))
287 sync_inode_page(&dn);
288
289 copy_node_footer(dn.node_page, page);
290 fill_node_footer(dn.node_page, dn.nid, ni.ino,
291 ofs_of_node(page), false);
292 set_page_dirty(dn.node_page);
293
294 recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr);
295 f2fs_put_dnode(&dn);
296}
297
298static void recover_data(struct f2fs_sb_info *sbi,
299 struct list_head *head, int type)
300{
301 unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver);
302 struct curseg_info *curseg;
303 struct page *page;
304 block_t blkaddr;
305
306 /* get node pages in the current segment */
307 curseg = CURSEG_I(sbi, type);
308 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
309
310 /* read node page */
311 page = alloc_page(GFP_NOFS | __GFP_ZERO);
312 if (IS_ERR(page))
313 return;
314 lock_page(page);
315
316 while (1) {
317 struct fsync_inode_entry *entry;
318
319 if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC))
320 goto out;
321
322 if (cp_ver != cpver_of_node(page))
323 goto out;
324
325 entry = get_fsync_inode(head, ino_of_node(page));
326 if (!entry)
327 goto next;
328
329 do_recover_data(sbi, entry->inode, page, blkaddr);
330
331 if (entry->blkaddr == blkaddr) {
332 iput(entry->inode);
333 list_del(&entry->list);
334 kmem_cache_free(fsync_entry_slab, entry);
335 }
336next:
337 /* check next segment */
338 blkaddr = next_blkaddr_of_node(page);
339 ClearPageUptodate(page);
340 }
341out:
342 unlock_page(page);
343 __free_pages(page, 0);
344
345 allocate_new_segments(sbi);
346}
347
348void recover_fsync_data(struct f2fs_sb_info *sbi)
349{
350 struct list_head inode_list;
351
352 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
353 sizeof(struct fsync_inode_entry), NULL);
354 if (unlikely(!fsync_entry_slab))
355 return;
356
357 INIT_LIST_HEAD(&inode_list);
358
359 /* step #1: find fsynced inode numbers */
360 if (find_fsync_dnodes(sbi, &inode_list))
361 goto out;
362
363 if (list_empty(&inode_list))
364 goto out;
365
366 /* step #2: recover data */
367 sbi->por_doing = 1;
368 recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
369 sbi->por_doing = 0;
370 BUG_ON(!list_empty(&inode_list));
371out:
372 destroy_fsync_dnodes(sbi, &inode_list);
373 kmem_cache_destroy(fsync_entry_slab);
374 write_checkpoint(sbi, false, false);
375}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
new file mode 100644
index 000000000000..1b26e4ea1016
--- /dev/null
+++ b/fs/f2fs/segment.c
@@ -0,0 +1,1791 @@
1/*
2 * fs/f2fs/segment.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/bio.h>
14#include <linux/blkdev.h>
15#include <linux/vmalloc.h>
16
17#include "f2fs.h"
18#include "segment.h"
19#include "node.h"
20
21static int need_to_flush(struct f2fs_sb_info *sbi)
22{
23 unsigned int pages_per_sec = (1 << sbi->log_blocks_per_seg) *
24 sbi->segs_per_sec;
25 int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1)
26 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
27 int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1)
28 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
29
30 if (sbi->por_doing)
31 return 0;
32
33 if (free_sections(sbi) <= (node_secs + 2 * dent_secs +
34 reserved_sections(sbi)))
35 return 1;
36 return 0;
37}
38
39/*
40 * This function balances dirty node and dentry pages.
41 * In addition, it controls garbage collection.
42 */
43void f2fs_balance_fs(struct f2fs_sb_info *sbi)
44{
45 struct writeback_control wbc = {
46 .sync_mode = WB_SYNC_ALL,
47 .nr_to_write = LONG_MAX,
48 .for_reclaim = 0,
49 };
50
51 if (sbi->por_doing)
52 return;
53
54 /*
55 * We should do checkpoint when there are so many dirty node pages
56 * with enough free segments. After then, we should do GC.
57 */
58 if (need_to_flush(sbi)) {
59 sync_dirty_dir_inodes(sbi);
60 sync_node_pages(sbi, 0, &wbc);
61 }
62
63 if (has_not_enough_free_secs(sbi)) {
64 mutex_lock(&sbi->gc_mutex);
65 f2fs_gc(sbi, 1);
66 }
67}
68
69static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
70 enum dirty_type dirty_type)
71{
72 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
73
74 /* need not be added */
75 if (IS_CURSEG(sbi, segno))
76 return;
77
78 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
79 dirty_i->nr_dirty[dirty_type]++;
80
81 if (dirty_type == DIRTY) {
82 struct seg_entry *sentry = get_seg_entry(sbi, segno);
83 dirty_type = sentry->type;
84 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
85 dirty_i->nr_dirty[dirty_type]++;
86 }
87}
88
89static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
90 enum dirty_type dirty_type)
91{
92 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
93
94 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
95 dirty_i->nr_dirty[dirty_type]--;
96
97 if (dirty_type == DIRTY) {
98 struct seg_entry *sentry = get_seg_entry(sbi, segno);
99 dirty_type = sentry->type;
100 if (test_and_clear_bit(segno,
101 dirty_i->dirty_segmap[dirty_type]))
102 dirty_i->nr_dirty[dirty_type]--;
103 clear_bit(segno, dirty_i->victim_segmap[FG_GC]);
104 clear_bit(segno, dirty_i->victim_segmap[BG_GC]);
105 }
106}
107
108/*
109 * Should not occur error such as -ENOMEM.
110 * Adding dirty entry into seglist is not critical operation.
111 * If a given segment is one of current working segments, it won't be added.
112 */
113void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
114{
115 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
116 unsigned short valid_blocks;
117
118 if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
119 return;
120
121 mutex_lock(&dirty_i->seglist_lock);
122
123 valid_blocks = get_valid_blocks(sbi, segno, 0);
124
125 if (valid_blocks == 0) {
126 __locate_dirty_segment(sbi, segno, PRE);
127 __remove_dirty_segment(sbi, segno, DIRTY);
128 } else if (valid_blocks < sbi->blocks_per_seg) {
129 __locate_dirty_segment(sbi, segno, DIRTY);
130 } else {
131 /* Recovery routine with SSR needs this */
132 __remove_dirty_segment(sbi, segno, DIRTY);
133 }
134
135 mutex_unlock(&dirty_i->seglist_lock);
136 return;
137}
138
139/*
140 * Should call clear_prefree_segments after checkpoint is done.
141 */
142static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
143{
144 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
145 unsigned int segno, offset = 0;
146 unsigned int total_segs = TOTAL_SEGS(sbi);
147
148 mutex_lock(&dirty_i->seglist_lock);
149 while (1) {
150 segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
151 offset);
152 if (segno >= total_segs)
153 break;
154 __set_test_and_free(sbi, segno);
155 offset = segno + 1;
156 }
157 mutex_unlock(&dirty_i->seglist_lock);
158}
159
160void clear_prefree_segments(struct f2fs_sb_info *sbi)
161{
162 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
163 unsigned int segno, offset = 0;
164 unsigned int total_segs = TOTAL_SEGS(sbi);
165
166 mutex_lock(&dirty_i->seglist_lock);
167 while (1) {
168 segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
169 offset);
170 if (segno >= total_segs)
171 break;
172
173 offset = segno + 1;
174 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE]))
175 dirty_i->nr_dirty[PRE]--;
176
177 /* Let's use trim */
178 if (test_opt(sbi, DISCARD))
179 blkdev_issue_discard(sbi->sb->s_bdev,
180 START_BLOCK(sbi, segno) <<
181 sbi->log_sectors_per_block,
182 1 << (sbi->log_sectors_per_block +
183 sbi->log_blocks_per_seg),
184 GFP_NOFS, 0);
185 }
186 mutex_unlock(&dirty_i->seglist_lock);
187}
188
189static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
190{
191 struct sit_info *sit_i = SIT_I(sbi);
192 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap))
193 sit_i->dirty_sentries++;
194}
195
196static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
197 unsigned int segno, int modified)
198{
199 struct seg_entry *se = get_seg_entry(sbi, segno);
200 se->type = type;
201 if (modified)
202 __mark_sit_entry_dirty(sbi, segno);
203}
204
205static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
206{
207 struct seg_entry *se;
208 unsigned int segno, offset;
209 long int new_vblocks;
210
211 segno = GET_SEGNO(sbi, blkaddr);
212
213 se = get_seg_entry(sbi, segno);
214 new_vblocks = se->valid_blocks + del;
215 offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1);
216
217 BUG_ON((new_vblocks >> (sizeof(unsigned short) << 3) ||
218 (new_vblocks > sbi->blocks_per_seg)));
219
220 se->valid_blocks = new_vblocks;
221 se->mtime = get_mtime(sbi);
222 SIT_I(sbi)->max_mtime = se->mtime;
223
224 /* Update valid block bitmap */
225 if (del > 0) {
226 if (f2fs_set_bit(offset, se->cur_valid_map))
227 BUG();
228 } else {
229 if (!f2fs_clear_bit(offset, se->cur_valid_map))
230 BUG();
231 }
232 if (!f2fs_test_bit(offset, se->ckpt_valid_map))
233 se->ckpt_valid_blocks += del;
234
235 __mark_sit_entry_dirty(sbi, segno);
236
237 /* update total number of valid blocks to be written in ckpt area */
238 SIT_I(sbi)->written_valid_blocks += del;
239
240 if (sbi->segs_per_sec > 1)
241 get_sec_entry(sbi, segno)->valid_blocks += del;
242}
243
244static void refresh_sit_entry(struct f2fs_sb_info *sbi,
245 block_t old_blkaddr, block_t new_blkaddr)
246{
247 update_sit_entry(sbi, new_blkaddr, 1);
248 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
249 update_sit_entry(sbi, old_blkaddr, -1);
250}
251
252void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
253{
254 unsigned int segno = GET_SEGNO(sbi, addr);
255 struct sit_info *sit_i = SIT_I(sbi);
256
257 BUG_ON(addr == NULL_ADDR);
258 if (addr == NEW_ADDR)
259 return;
260
261 /* add it into sit main buffer */
262 mutex_lock(&sit_i->sentry_lock);
263
264 update_sit_entry(sbi, addr, -1);
265
266 /* add it into dirty seglist */
267 locate_dirty_segment(sbi, segno);
268
269 mutex_unlock(&sit_i->sentry_lock);
270}
271
272/*
273 * This function should be resided under the curseg_mutex lock
274 */
275static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
276 struct f2fs_summary *sum, unsigned short offset)
277{
278 struct curseg_info *curseg = CURSEG_I(sbi, type);
279 void *addr = curseg->sum_blk;
280 addr += offset * sizeof(struct f2fs_summary);
281 memcpy(addr, sum, sizeof(struct f2fs_summary));
282 return;
283}
284
285/*
286 * Calculate the number of current summary pages for writing
287 */
288int npages_for_summary_flush(struct f2fs_sb_info *sbi)
289{
290 int total_size_bytes = 0;
291 int valid_sum_count = 0;
292 int i, sum_space;
293
294 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
295 if (sbi->ckpt->alloc_type[i] == SSR)
296 valid_sum_count += sbi->blocks_per_seg;
297 else
298 valid_sum_count += curseg_blkoff(sbi, i);
299 }
300
301 total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1)
302 + sizeof(struct nat_journal) + 2
303 + sizeof(struct sit_journal) + 2;
304 sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE;
305 if (total_size_bytes < sum_space)
306 return 1;
307 else if (total_size_bytes < 2 * sum_space)
308 return 2;
309 return 3;
310}
311
312/*
313 * Caller should put this summary page
314 */
315struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
316{
317 return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
318}
319
320static void write_sum_page(struct f2fs_sb_info *sbi,
321 struct f2fs_summary_block *sum_blk, block_t blk_addr)
322{
323 struct page *page = grab_meta_page(sbi, blk_addr);
324 void *kaddr = page_address(page);
325 memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE);
326 set_page_dirty(page);
327 f2fs_put_page(page, 1);
328}
329
330static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi,
331 int ofs_unit, int type)
332{
333 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
334 unsigned long *prefree_segmap = dirty_i->dirty_segmap[PRE];
335 unsigned int segno, next_segno, i;
336 int ofs = 0;
337
338 /*
339 * If there is not enough reserved sections,
340 * we should not reuse prefree segments.
341 */
342 if (has_not_enough_free_secs(sbi))
343 return NULL_SEGNO;
344
345 /*
346 * NODE page should not reuse prefree segment,
347 * since those information is used for SPOR.
348 */
349 if (IS_NODESEG(type))
350 return NULL_SEGNO;
351next:
352 segno = find_next_bit(prefree_segmap, TOTAL_SEGS(sbi), ofs++);
353 ofs = ((segno / ofs_unit) * ofs_unit) + ofs_unit;
354 if (segno < TOTAL_SEGS(sbi)) {
355 /* skip intermediate segments in a section */
356 if (segno % ofs_unit)
357 goto next;
358
359 /* skip if whole section is not prefree */
360 next_segno = find_next_zero_bit(prefree_segmap,
361 TOTAL_SEGS(sbi), segno + 1);
362 if (next_segno - segno < ofs_unit)
363 goto next;
364
365 /* skip if whole section was not free at the last checkpoint */
366 for (i = 0; i < ofs_unit; i++)
367 if (get_seg_entry(sbi, segno)->ckpt_valid_blocks)
368 goto next;
369 return segno;
370 }
371 return NULL_SEGNO;
372}
373
374/*
375 * Find a new segment from the free segments bitmap to right order
376 * This function should be returned with success, otherwise BUG
377 */
378static void get_new_segment(struct f2fs_sb_info *sbi,
379 unsigned int *newseg, bool new_sec, int dir)
380{
381 struct free_segmap_info *free_i = FREE_I(sbi);
382 unsigned int total_secs = sbi->total_sections;
383 unsigned int segno, secno, zoneno;
384 unsigned int total_zones = sbi->total_sections / sbi->secs_per_zone;
385 unsigned int hint = *newseg / sbi->segs_per_sec;
386 unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
387 unsigned int left_start = hint;
388 bool init = true;
389 int go_left = 0;
390 int i;
391
392 write_lock(&free_i->segmap_lock);
393
394 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
395 segno = find_next_zero_bit(free_i->free_segmap,
396 TOTAL_SEGS(sbi), *newseg + 1);
397 if (segno < TOTAL_SEGS(sbi))
398 goto got_it;
399 }
400find_other_zone:
401 secno = find_next_zero_bit(free_i->free_secmap, total_secs, hint);
402 if (secno >= total_secs) {
403 if (dir == ALLOC_RIGHT) {
404 secno = find_next_zero_bit(free_i->free_secmap,
405 total_secs, 0);
406 BUG_ON(secno >= total_secs);
407 } else {
408 go_left = 1;
409 left_start = hint - 1;
410 }
411 }
412 if (go_left == 0)
413 goto skip_left;
414
415 while (test_bit(left_start, free_i->free_secmap)) {
416 if (left_start > 0) {
417 left_start--;
418 continue;
419 }
420 left_start = find_next_zero_bit(free_i->free_secmap,
421 total_secs, 0);
422 BUG_ON(left_start >= total_secs);
423 break;
424 }
425 secno = left_start;
426skip_left:
427 hint = secno;
428 segno = secno * sbi->segs_per_sec;
429 zoneno = secno / sbi->secs_per_zone;
430
431 /* give up on finding another zone */
432 if (!init)
433 goto got_it;
434 if (sbi->secs_per_zone == 1)
435 goto got_it;
436 if (zoneno == old_zoneno)
437 goto got_it;
438 if (dir == ALLOC_LEFT) {
439 if (!go_left && zoneno + 1 >= total_zones)
440 goto got_it;
441 if (go_left && zoneno == 0)
442 goto got_it;
443 }
444 for (i = 0; i < NR_CURSEG_TYPE; i++)
445 if (CURSEG_I(sbi, i)->zone == zoneno)
446 break;
447
448 if (i < NR_CURSEG_TYPE) {
449 /* zone is in user, try another */
450 if (go_left)
451 hint = zoneno * sbi->secs_per_zone - 1;
452 else if (zoneno + 1 >= total_zones)
453 hint = 0;
454 else
455 hint = (zoneno + 1) * sbi->secs_per_zone;
456 init = false;
457 goto find_other_zone;
458 }
459got_it:
460 /* set it as dirty segment in free segmap */
461 BUG_ON(test_bit(segno, free_i->free_segmap));
462 __set_inuse(sbi, segno);
463 *newseg = segno;
464 write_unlock(&free_i->segmap_lock);
465}
466
467static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
468{
469 struct curseg_info *curseg = CURSEG_I(sbi, type);
470 struct summary_footer *sum_footer;
471
472 curseg->segno = curseg->next_segno;
473 curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno);
474 curseg->next_blkoff = 0;
475 curseg->next_segno = NULL_SEGNO;
476
477 sum_footer = &(curseg->sum_blk->footer);
478 memset(sum_footer, 0, sizeof(struct summary_footer));
479 if (IS_DATASEG(type))
480 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
481 if (IS_NODESEG(type))
482 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
483 __set_sit_entry_type(sbi, type, curseg->segno, modified);
484}
485
486/*
487 * Allocate a current working segment.
488 * This function always allocates a free segment in LFS manner.
489 */
490static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
491{
492 struct curseg_info *curseg = CURSEG_I(sbi, type);
493 unsigned int segno = curseg->segno;
494 int dir = ALLOC_LEFT;
495
496 write_sum_page(sbi, curseg->sum_blk,
497 GET_SUM_BLOCK(sbi, curseg->segno));
498 if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
499 dir = ALLOC_RIGHT;
500
501 if (test_opt(sbi, NOHEAP))
502 dir = ALLOC_RIGHT;
503
504 get_new_segment(sbi, &segno, new_sec, dir);
505 curseg->next_segno = segno;
506 reset_curseg(sbi, type, 1);
507 curseg->alloc_type = LFS;
508}
509
510static void __next_free_blkoff(struct f2fs_sb_info *sbi,
511 struct curseg_info *seg, block_t start)
512{
513 struct seg_entry *se = get_seg_entry(sbi, seg->segno);
514 block_t ofs;
515 for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) {
516 if (!f2fs_test_bit(ofs, se->ckpt_valid_map)
517 && !f2fs_test_bit(ofs, se->cur_valid_map))
518 break;
519 }
520 seg->next_blkoff = ofs;
521}
522
523/*
524 * If a segment is written by LFS manner, next block offset is just obtained
525 * by increasing the current block offset. However, if a segment is written by
526 * SSR manner, next block offset obtained by calling __next_free_blkoff
527 */
528static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
529 struct curseg_info *seg)
530{
531 if (seg->alloc_type == SSR)
532 __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
533 else
534 seg->next_blkoff++;
535}
536
537/*
538 * This function always allocates a used segment (from dirty seglist) by SSR
539 * manner, so it should recover the existing segment information of valid blocks
540 */
541static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
542{
543 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
544 struct curseg_info *curseg = CURSEG_I(sbi, type);
545 unsigned int new_segno = curseg->next_segno;
546 struct f2fs_summary_block *sum_node;
547 struct page *sum_page;
548
549 write_sum_page(sbi, curseg->sum_blk,
550 GET_SUM_BLOCK(sbi, curseg->segno));
551 __set_test_and_inuse(sbi, new_segno);
552
553 mutex_lock(&dirty_i->seglist_lock);
554 __remove_dirty_segment(sbi, new_segno, PRE);
555 __remove_dirty_segment(sbi, new_segno, DIRTY);
556 mutex_unlock(&dirty_i->seglist_lock);
557
558 reset_curseg(sbi, type, 1);
559 curseg->alloc_type = SSR;
560 __next_free_blkoff(sbi, curseg, 0);
561
562 if (reuse) {
563 sum_page = get_sum_page(sbi, new_segno);
564 sum_node = (struct f2fs_summary_block *)page_address(sum_page);
565 memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
566 f2fs_put_page(sum_page, 1);
567 }
568}
569
570/*
571 * flush out current segment and replace it with new segment
572 * This function should be returned with success, otherwise BUG
573 */
574static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
575 int type, bool force)
576{
577 struct curseg_info *curseg = CURSEG_I(sbi, type);
578 unsigned int ofs_unit;
579
580 if (force) {
581 new_curseg(sbi, type, true);
582 goto out;
583 }
584
585 ofs_unit = need_SSR(sbi) ? 1 : sbi->segs_per_sec;
586 curseg->next_segno = check_prefree_segments(sbi, ofs_unit, type);
587
588 if (curseg->next_segno != NULL_SEGNO)
589 change_curseg(sbi, type, false);
590 else if (type == CURSEG_WARM_NODE)
591 new_curseg(sbi, type, false);
592 else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
593 change_curseg(sbi, type, true);
594 else
595 new_curseg(sbi, type, false);
596out:
597 sbi->segment_count[curseg->alloc_type]++;
598}
599
600void allocate_new_segments(struct f2fs_sb_info *sbi)
601{
602 struct curseg_info *curseg;
603 unsigned int old_curseg;
604 int i;
605
606 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
607 curseg = CURSEG_I(sbi, i);
608 old_curseg = curseg->segno;
609 SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
610 locate_dirty_segment(sbi, old_curseg);
611 }
612}
613
614static const struct segment_allocation default_salloc_ops = {
615 .allocate_segment = allocate_segment_by_default,
616};
617
618static void f2fs_end_io_write(struct bio *bio, int err)
619{
620 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
621 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
622 struct bio_private *p = bio->bi_private;
623
624 do {
625 struct page *page = bvec->bv_page;
626
627 if (--bvec >= bio->bi_io_vec)
628 prefetchw(&bvec->bv_page->flags);
629 if (!uptodate) {
630 SetPageError(page);
631 if (page->mapping)
632 set_bit(AS_EIO, &page->mapping->flags);
633 set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG);
634 set_page_dirty(page);
635 }
636 end_page_writeback(page);
637 dec_page_count(p->sbi, F2FS_WRITEBACK);
638 } while (bvec >= bio->bi_io_vec);
639
640 if (p->is_sync)
641 complete(p->wait);
642 kfree(p);
643 bio_put(bio);
644}
645
646struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages)
647{
648 struct bio *bio;
649 struct bio_private *priv;
650retry:
651 priv = kmalloc(sizeof(struct bio_private), GFP_NOFS);
652 if (!priv) {
653 cond_resched();
654 goto retry;
655 }
656
657 /* No failure on bio allocation */
658 bio = bio_alloc(GFP_NOIO, npages);
659 bio->bi_bdev = bdev;
660 bio->bi_private = priv;
661 return bio;
662}
663
664static void do_submit_bio(struct f2fs_sb_info *sbi,
665 enum page_type type, bool sync)
666{
667 int rw = sync ? WRITE_SYNC : WRITE;
668 enum page_type btype = type > META ? META : type;
669
670 if (type >= META_FLUSH)
671 rw = WRITE_FLUSH_FUA;
672
673 if (sbi->bio[btype]) {
674 struct bio_private *p = sbi->bio[btype]->bi_private;
675 p->sbi = sbi;
676 sbi->bio[btype]->bi_end_io = f2fs_end_io_write;
677 if (type == META_FLUSH) {
678 DECLARE_COMPLETION_ONSTACK(wait);
679 p->is_sync = true;
680 p->wait = &wait;
681 submit_bio(rw, sbi->bio[btype]);
682 wait_for_completion(&wait);
683 } else {
684 p->is_sync = false;
685 submit_bio(rw, sbi->bio[btype]);
686 }
687 sbi->bio[btype] = NULL;
688 }
689}
690
691void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync)
692{
693 down_write(&sbi->bio_sem);
694 do_submit_bio(sbi, type, sync);
695 up_write(&sbi->bio_sem);
696}
697
698static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page,
699 block_t blk_addr, enum page_type type)
700{
701 struct block_device *bdev = sbi->sb->s_bdev;
702
703 verify_block_addr(sbi, blk_addr);
704
705 down_write(&sbi->bio_sem);
706
707 inc_page_count(sbi, F2FS_WRITEBACK);
708
709 if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1)
710 do_submit_bio(sbi, type, false);
711alloc_new:
712 if (sbi->bio[type] == NULL) {
713 sbi->bio[type] = f2fs_bio_alloc(bdev, bio_get_nr_vecs(bdev));
714 sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
715 /*
716 * The end_io will be assigned at the sumbission phase.
717 * Until then, let bio_add_page() merge consecutive IOs as much
718 * as possible.
719 */
720 }
721
722 if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) <
723 PAGE_CACHE_SIZE) {
724 do_submit_bio(sbi, type, false);
725 goto alloc_new;
726 }
727
728 sbi->last_block_in_bio[type] = blk_addr;
729
730 up_write(&sbi->bio_sem);
731}
732
733static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
734{
735 struct curseg_info *curseg = CURSEG_I(sbi, type);
736 if (curseg->next_blkoff < sbi->blocks_per_seg)
737 return true;
738 return false;
739}
740
741static int __get_segment_type_2(struct page *page, enum page_type p_type)
742{
743 if (p_type == DATA)
744 return CURSEG_HOT_DATA;
745 else
746 return CURSEG_HOT_NODE;
747}
748
749static int __get_segment_type_4(struct page *page, enum page_type p_type)
750{
751 if (p_type == DATA) {
752 struct inode *inode = page->mapping->host;
753
754 if (S_ISDIR(inode->i_mode))
755 return CURSEG_HOT_DATA;
756 else
757 return CURSEG_COLD_DATA;
758 } else {
759 if (IS_DNODE(page) && !is_cold_node(page))
760 return CURSEG_HOT_NODE;
761 else
762 return CURSEG_COLD_NODE;
763 }
764}
765
766static int __get_segment_type_6(struct page *page, enum page_type p_type)
767{
768 if (p_type == DATA) {
769 struct inode *inode = page->mapping->host;
770
771 if (S_ISDIR(inode->i_mode))
772 return CURSEG_HOT_DATA;
773 else if (is_cold_data(page) || is_cold_file(inode))
774 return CURSEG_COLD_DATA;
775 else
776 return CURSEG_WARM_DATA;
777 } else {
778 if (IS_DNODE(page))
779 return is_cold_node(page) ? CURSEG_WARM_NODE :
780 CURSEG_HOT_NODE;
781 else
782 return CURSEG_COLD_NODE;
783 }
784}
785
786static int __get_segment_type(struct page *page, enum page_type p_type)
787{
788 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
789 switch (sbi->active_logs) {
790 case 2:
791 return __get_segment_type_2(page, p_type);
792 case 4:
793 return __get_segment_type_4(page, p_type);
794 case 6:
795 return __get_segment_type_6(page, p_type);
796 default:
797 BUG();
798 }
799}
800
801static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
802 block_t old_blkaddr, block_t *new_blkaddr,
803 struct f2fs_summary *sum, enum page_type p_type)
804{
805 struct sit_info *sit_i = SIT_I(sbi);
806 struct curseg_info *curseg;
807 unsigned int old_cursegno;
808 int type;
809
810 type = __get_segment_type(page, p_type);
811 curseg = CURSEG_I(sbi, type);
812
813 mutex_lock(&curseg->curseg_mutex);
814
815 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
816 old_cursegno = curseg->segno;
817
818 /*
819 * __add_sum_entry should be resided under the curseg_mutex
820 * because, this function updates a summary entry in the
821 * current summary block.
822 */
823 __add_sum_entry(sbi, type, sum, curseg->next_blkoff);
824
825 mutex_lock(&sit_i->sentry_lock);
826 __refresh_next_blkoff(sbi, curseg);
827 sbi->block_count[curseg->alloc_type]++;
828
829 /*
830 * SIT information should be updated before segment allocation,
831 * since SSR needs latest valid block information.
832 */
833 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
834
835 if (!__has_curseg_space(sbi, type))
836 sit_i->s_ops->allocate_segment(sbi, type, false);
837
838 locate_dirty_segment(sbi, old_cursegno);
839 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
840 mutex_unlock(&sit_i->sentry_lock);
841
842 if (p_type == NODE)
843 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
844
845 /* writeout dirty page into bdev */
846 submit_write_page(sbi, page, *new_blkaddr, p_type);
847
848 mutex_unlock(&curseg->curseg_mutex);
849}
850
851int write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
852 struct writeback_control *wbc)
853{
854 if (wbc->for_reclaim)
855 return AOP_WRITEPAGE_ACTIVATE;
856
857 set_page_writeback(page);
858 submit_write_page(sbi, page, page->index, META);
859 return 0;
860}
861
862void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
863 unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr)
864{
865 struct f2fs_summary sum;
866 set_summary(&sum, nid, 0, 0);
867 do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE);
868}
869
870void write_data_page(struct inode *inode, struct page *page,
871 struct dnode_of_data *dn, block_t old_blkaddr,
872 block_t *new_blkaddr)
873{
874 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
875 struct f2fs_summary sum;
876 struct node_info ni;
877
878 BUG_ON(old_blkaddr == NULL_ADDR);
879 get_node_info(sbi, dn->nid, &ni);
880 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
881
882 do_write_page(sbi, page, old_blkaddr,
883 new_blkaddr, &sum, DATA);
884}
885
886void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page,
887 block_t old_blk_addr)
888{
889 submit_write_page(sbi, page, old_blk_addr, DATA);
890}
891
892void recover_data_page(struct f2fs_sb_info *sbi,
893 struct page *page, struct f2fs_summary *sum,
894 block_t old_blkaddr, block_t new_blkaddr)
895{
896 struct sit_info *sit_i = SIT_I(sbi);
897 struct curseg_info *curseg;
898 unsigned int segno, old_cursegno;
899 struct seg_entry *se;
900 int type;
901
902 segno = GET_SEGNO(sbi, new_blkaddr);
903 se = get_seg_entry(sbi, segno);
904 type = se->type;
905
906 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
907 if (old_blkaddr == NULL_ADDR)
908 type = CURSEG_COLD_DATA;
909 else
910 type = CURSEG_WARM_DATA;
911 }
912 curseg = CURSEG_I(sbi, type);
913
914 mutex_lock(&curseg->curseg_mutex);
915 mutex_lock(&sit_i->sentry_lock);
916
917 old_cursegno = curseg->segno;
918
919 /* change the current segment */
920 if (segno != curseg->segno) {
921 curseg->next_segno = segno;
922 change_curseg(sbi, type, true);
923 }
924
925 curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
926 (sbi->blocks_per_seg - 1);
927 __add_sum_entry(sbi, type, sum, curseg->next_blkoff);
928
929 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
930
931 locate_dirty_segment(sbi, old_cursegno);
932 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
933
934 mutex_unlock(&sit_i->sentry_lock);
935 mutex_unlock(&curseg->curseg_mutex);
936}
937
938void rewrite_node_page(struct f2fs_sb_info *sbi,
939 struct page *page, struct f2fs_summary *sum,
940 block_t old_blkaddr, block_t new_blkaddr)
941{
942 struct sit_info *sit_i = SIT_I(sbi);
943 int type = CURSEG_WARM_NODE;
944 struct curseg_info *curseg;
945 unsigned int segno, old_cursegno;
946 block_t next_blkaddr = next_blkaddr_of_node(page);
947 unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr);
948
949 curseg = CURSEG_I(sbi, type);
950
951 mutex_lock(&curseg->curseg_mutex);
952 mutex_lock(&sit_i->sentry_lock);
953
954 segno = GET_SEGNO(sbi, new_blkaddr);
955 old_cursegno = curseg->segno;
956
957 /* change the current segment */
958 if (segno != curseg->segno) {
959 curseg->next_segno = segno;
960 change_curseg(sbi, type, true);
961 }
962 curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
963 (sbi->blocks_per_seg - 1);
964 __add_sum_entry(sbi, type, sum, curseg->next_blkoff);
965
966 /* change the current log to the next block addr in advance */
967 if (next_segno != segno) {
968 curseg->next_segno = next_segno;
969 change_curseg(sbi, type, true);
970 }
971 curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) &
972 (sbi->blocks_per_seg - 1);
973
974 /* rewrite node page */
975 set_page_writeback(page);
976 submit_write_page(sbi, page, new_blkaddr, NODE);
977 f2fs_submit_bio(sbi, NODE, true);
978 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
979
980 locate_dirty_segment(sbi, old_cursegno);
981 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
982
983 mutex_unlock(&sit_i->sentry_lock);
984 mutex_unlock(&curseg->curseg_mutex);
985}
986
987static int read_compacted_summaries(struct f2fs_sb_info *sbi)
988{
989 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
990 struct curseg_info *seg_i;
991 unsigned char *kaddr;
992 struct page *page;
993 block_t start;
994 int i, j, offset;
995
996 start = start_sum_block(sbi);
997
998 page = get_meta_page(sbi, start++);
999 kaddr = (unsigned char *)page_address(page);
1000
1001 /* Step 1: restore nat cache */
1002 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1003 memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE);
1004
1005 /* Step 2: restore sit cache */
1006 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1007 memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE,
1008 SUM_JOURNAL_SIZE);
1009 offset = 2 * SUM_JOURNAL_SIZE;
1010
1011 /* Step 3: restore summary entries */
1012 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1013 unsigned short blk_off;
1014 unsigned int segno;
1015
1016 seg_i = CURSEG_I(sbi, i);
1017 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
1018 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
1019 seg_i->next_segno = segno;
1020 reset_curseg(sbi, i, 0);
1021 seg_i->alloc_type = ckpt->alloc_type[i];
1022 seg_i->next_blkoff = blk_off;
1023
1024 if (seg_i->alloc_type == SSR)
1025 blk_off = sbi->blocks_per_seg;
1026
1027 for (j = 0; j < blk_off; j++) {
1028 struct f2fs_summary *s;
1029 s = (struct f2fs_summary *)(kaddr + offset);
1030 seg_i->sum_blk->entries[j] = *s;
1031 offset += SUMMARY_SIZE;
1032 if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1033 SUM_FOOTER_SIZE)
1034 continue;
1035
1036 f2fs_put_page(page, 1);
1037 page = NULL;
1038
1039 page = get_meta_page(sbi, start++);
1040 kaddr = (unsigned char *)page_address(page);
1041 offset = 0;
1042 }
1043 }
1044 f2fs_put_page(page, 1);
1045 return 0;
1046}
1047
1048static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1049{
1050 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1051 struct f2fs_summary_block *sum;
1052 struct curseg_info *curseg;
1053 struct page *new;
1054 unsigned short blk_off;
1055 unsigned int segno = 0;
1056 block_t blk_addr = 0;
1057
1058 /* get segment number and block addr */
1059 if (IS_DATASEG(type)) {
1060 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
1061 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
1062 CURSEG_HOT_DATA]);
1063 if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
1064 blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
1065 else
1066 blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
1067 } else {
1068 segno = le32_to_cpu(ckpt->cur_node_segno[type -
1069 CURSEG_HOT_NODE]);
1070 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
1071 CURSEG_HOT_NODE]);
1072 if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
1073 blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
1074 type - CURSEG_HOT_NODE);
1075 else
1076 blk_addr = GET_SUM_BLOCK(sbi, segno);
1077 }
1078
1079 new = get_meta_page(sbi, blk_addr);
1080 sum = (struct f2fs_summary_block *)page_address(new);
1081
1082 if (IS_NODESEG(type)) {
1083 if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) {
1084 struct f2fs_summary *ns = &sum->entries[0];
1085 int i;
1086 for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
1087 ns->version = 0;
1088 ns->ofs_in_node = 0;
1089 }
1090 } else {
1091 if (restore_node_summary(sbi, segno, sum)) {
1092 f2fs_put_page(new, 1);
1093 return -EINVAL;
1094 }
1095 }
1096 }
1097
1098 /* set uncompleted segment to curseg */
1099 curseg = CURSEG_I(sbi, type);
1100 mutex_lock(&curseg->curseg_mutex);
1101 memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE);
1102 curseg->next_segno = segno;
1103 reset_curseg(sbi, type, 0);
1104 curseg->alloc_type = ckpt->alloc_type[type];
1105 curseg->next_blkoff = blk_off;
1106 mutex_unlock(&curseg->curseg_mutex);
1107 f2fs_put_page(new, 1);
1108 return 0;
1109}
1110
1111static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1112{
1113 int type = CURSEG_HOT_DATA;
1114
1115 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
1116 /* restore for compacted data summary */
1117 if (read_compacted_summaries(sbi))
1118 return -EINVAL;
1119 type = CURSEG_HOT_NODE;
1120 }
1121
1122 for (; type <= CURSEG_COLD_NODE; type++)
1123 if (read_normal_summaries(sbi, type))
1124 return -EINVAL;
1125 return 0;
1126}
1127
1128static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
1129{
1130 struct page *page;
1131 unsigned char *kaddr;
1132 struct f2fs_summary *summary;
1133 struct curseg_info *seg_i;
1134 int written_size = 0;
1135 int i, j;
1136
1137 page = grab_meta_page(sbi, blkaddr++);
1138 kaddr = (unsigned char *)page_address(page);
1139
1140 /* Step 1: write nat cache */
1141 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1142 memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE);
1143 written_size += SUM_JOURNAL_SIZE;
1144
1145 /* Step 2: write sit cache */
1146 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1147 memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits,
1148 SUM_JOURNAL_SIZE);
1149 written_size += SUM_JOURNAL_SIZE;
1150
1151 set_page_dirty(page);
1152
1153 /* Step 3: write summary entries */
1154 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1155 unsigned short blkoff;
1156 seg_i = CURSEG_I(sbi, i);
1157 if (sbi->ckpt->alloc_type[i] == SSR)
1158 blkoff = sbi->blocks_per_seg;
1159 else
1160 blkoff = curseg_blkoff(sbi, i);
1161
1162 for (j = 0; j < blkoff; j++) {
1163 if (!page) {
1164 page = grab_meta_page(sbi, blkaddr++);
1165 kaddr = (unsigned char *)page_address(page);
1166 written_size = 0;
1167 }
1168 summary = (struct f2fs_summary *)(kaddr + written_size);
1169 *summary = seg_i->sum_blk->entries[j];
1170 written_size += SUMMARY_SIZE;
1171 set_page_dirty(page);
1172
1173 if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1174 SUM_FOOTER_SIZE)
1175 continue;
1176
1177 f2fs_put_page(page, 1);
1178 page = NULL;
1179 }
1180 }
1181 if (page)
1182 f2fs_put_page(page, 1);
1183}
1184
1185static void write_normal_summaries(struct f2fs_sb_info *sbi,
1186 block_t blkaddr, int type)
1187{
1188 int i, end;
1189 if (IS_DATASEG(type))
1190 end = type + NR_CURSEG_DATA_TYPE;
1191 else
1192 end = type + NR_CURSEG_NODE_TYPE;
1193
1194 for (i = type; i < end; i++) {
1195 struct curseg_info *sum = CURSEG_I(sbi, i);
1196 mutex_lock(&sum->curseg_mutex);
1197 write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type));
1198 mutex_unlock(&sum->curseg_mutex);
1199 }
1200}
1201
1202void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1203{
1204 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG))
1205 write_compacted_summaries(sbi, start_blk);
1206 else
1207 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
1208}
1209
1210void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1211{
1212 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG))
1213 write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
1214 return;
1215}
1216
1217int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
1218 unsigned int val, int alloc)
1219{
1220 int i;
1221
1222 if (type == NAT_JOURNAL) {
1223 for (i = 0; i < nats_in_cursum(sum); i++) {
1224 if (le32_to_cpu(nid_in_journal(sum, i)) == val)
1225 return i;
1226 }
1227 if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
1228 return update_nats_in_cursum(sum, 1);
1229 } else if (type == SIT_JOURNAL) {
1230 for (i = 0; i < sits_in_cursum(sum); i++)
1231 if (le32_to_cpu(segno_in_journal(sum, i)) == val)
1232 return i;
1233 if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
1234 return update_sits_in_cursum(sum, 1);
1235 }
1236 return -1;
1237}
1238
1239static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
1240 unsigned int segno)
1241{
1242 struct sit_info *sit_i = SIT_I(sbi);
1243 unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno);
1244 block_t blk_addr = sit_i->sit_base_addr + offset;
1245
1246 check_seg_range(sbi, segno);
1247
1248 /* calculate sit block address */
1249 if (f2fs_test_bit(offset, sit_i->sit_bitmap))
1250 blk_addr += sit_i->sit_blocks;
1251
1252 return get_meta_page(sbi, blk_addr);
1253}
1254
1255static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1256 unsigned int start)
1257{
1258 struct sit_info *sit_i = SIT_I(sbi);
1259 struct page *src_page, *dst_page;
1260 pgoff_t src_off, dst_off;
1261 void *src_addr, *dst_addr;
1262
1263 src_off = current_sit_addr(sbi, start);
1264 dst_off = next_sit_addr(sbi, src_off);
1265
1266 /* get current sit block page without lock */
1267 src_page = get_meta_page(sbi, src_off);
1268 dst_page = grab_meta_page(sbi, dst_off);
1269 BUG_ON(PageDirty(src_page));
1270
1271 src_addr = page_address(src_page);
1272 dst_addr = page_address(dst_page);
1273 memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
1274
1275 set_page_dirty(dst_page);
1276 f2fs_put_page(src_page, 1);
1277
1278 set_to_next_sit(sit_i, start);
1279
1280 return dst_page;
1281}
1282
1283static bool flush_sits_in_journal(struct f2fs_sb_info *sbi)
1284{
1285 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1286 struct f2fs_summary_block *sum = curseg->sum_blk;
1287 int i;
1288
1289 /*
1290 * If the journal area in the current summary is full of sit entries,
1291 * all the sit entries will be flushed. Otherwise the sit entries
1292 * are not able to replace with newly hot sit entries.
1293 */
1294 if (sits_in_cursum(sum) >= SIT_JOURNAL_ENTRIES) {
1295 for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
1296 unsigned int segno;
1297 segno = le32_to_cpu(segno_in_journal(sum, i));
1298 __mark_sit_entry_dirty(sbi, segno);
1299 }
1300 update_sits_in_cursum(sum, -sits_in_cursum(sum));
1301 return 1;
1302 }
1303 return 0;
1304}
1305
1306/*
1307 * CP calls this function, which flushes SIT entries including sit_journal,
1308 * and moves prefree segs to free segs.
1309 */
1310void flush_sit_entries(struct f2fs_sb_info *sbi)
1311{
1312 struct sit_info *sit_i = SIT_I(sbi);
1313 unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
1314 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1315 struct f2fs_summary_block *sum = curseg->sum_blk;
1316 unsigned long nsegs = TOTAL_SEGS(sbi);
1317 struct page *page = NULL;
1318 struct f2fs_sit_block *raw_sit = NULL;
1319 unsigned int start = 0, end = 0;
1320 unsigned int segno = -1;
1321 bool flushed;
1322
1323 mutex_lock(&curseg->curseg_mutex);
1324 mutex_lock(&sit_i->sentry_lock);
1325
1326 /*
1327 * "flushed" indicates whether sit entries in journal are flushed
1328 * to the SIT area or not.
1329 */
1330 flushed = flush_sits_in_journal(sbi);
1331
1332 while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) {
1333 struct seg_entry *se = get_seg_entry(sbi, segno);
1334 int sit_offset, offset;
1335
1336 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
1337
1338 if (flushed)
1339 goto to_sit_page;
1340
1341 offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1);
1342 if (offset >= 0) {
1343 segno_in_journal(sum, offset) = cpu_to_le32(segno);
1344 seg_info_to_raw_sit(se, &sit_in_journal(sum, offset));
1345 goto flush_done;
1346 }
1347to_sit_page:
1348 if (!page || (start > segno) || (segno > end)) {
1349 if (page) {
1350 f2fs_put_page(page, 1);
1351 page = NULL;
1352 }
1353
1354 start = START_SEGNO(sit_i, segno);
1355 end = start + SIT_ENTRY_PER_BLOCK - 1;
1356
1357 /* read sit block that will be updated */
1358 page = get_next_sit_page(sbi, start);
1359 raw_sit = page_address(page);
1360 }
1361
1362 /* udpate entry in SIT block */
1363 seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]);
1364flush_done:
1365 __clear_bit(segno, bitmap);
1366 sit_i->dirty_sentries--;
1367 }
1368 mutex_unlock(&sit_i->sentry_lock);
1369 mutex_unlock(&curseg->curseg_mutex);
1370
1371 /* writeout last modified SIT block */
1372 f2fs_put_page(page, 1);
1373
1374 set_prefree_as_free_segments(sbi);
1375}
1376
1377static int build_sit_info(struct f2fs_sb_info *sbi)
1378{
1379 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1380 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1381 struct sit_info *sit_i;
1382 unsigned int sit_segs, start;
1383 char *src_bitmap, *dst_bitmap;
1384 unsigned int bitmap_size;
1385
1386 /* allocate memory for SIT information */
1387 sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
1388 if (!sit_i)
1389 return -ENOMEM;
1390
1391 SM_I(sbi)->sit_info = sit_i;
1392
1393 sit_i->sentries = vzalloc(TOTAL_SEGS(sbi) * sizeof(struct seg_entry));
1394 if (!sit_i->sentries)
1395 return -ENOMEM;
1396
1397 bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1398 sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
1399 if (!sit_i->dirty_sentries_bitmap)
1400 return -ENOMEM;
1401
1402 for (start = 0; start < TOTAL_SEGS(sbi); start++) {
1403 sit_i->sentries[start].cur_valid_map
1404 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1405 sit_i->sentries[start].ckpt_valid_map
1406 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1407 if (!sit_i->sentries[start].cur_valid_map
1408 || !sit_i->sentries[start].ckpt_valid_map)
1409 return -ENOMEM;
1410 }
1411
1412 if (sbi->segs_per_sec > 1) {
1413 sit_i->sec_entries = vzalloc(sbi->total_sections *
1414 sizeof(struct sec_entry));
1415 if (!sit_i->sec_entries)
1416 return -ENOMEM;
1417 }
1418
1419 /* get information related with SIT */
1420 sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
1421
1422 /* setup SIT bitmap from ckeckpoint pack */
1423 bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
1424 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
1425
1426 dst_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
1427 if (!dst_bitmap)
1428 return -ENOMEM;
1429 memcpy(dst_bitmap, src_bitmap, bitmap_size);
1430
1431 /* init SIT information */
1432 sit_i->s_ops = &default_salloc_ops;
1433
1434 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
1435 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
1436 sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
1437 sit_i->sit_bitmap = dst_bitmap;
1438 sit_i->bitmap_size = bitmap_size;
1439 sit_i->dirty_sentries = 0;
1440 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
1441 sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
1442 sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
1443 mutex_init(&sit_i->sentry_lock);
1444 return 0;
1445}
1446
1447static int build_free_segmap(struct f2fs_sb_info *sbi)
1448{
1449 struct f2fs_sm_info *sm_info = SM_I(sbi);
1450 struct free_segmap_info *free_i;
1451 unsigned int bitmap_size, sec_bitmap_size;
1452
1453 /* allocate memory for free segmap information */
1454 free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
1455 if (!free_i)
1456 return -ENOMEM;
1457
1458 SM_I(sbi)->free_info = free_i;
1459
1460 bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1461 free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL);
1462 if (!free_i->free_segmap)
1463 return -ENOMEM;
1464
1465 sec_bitmap_size = f2fs_bitmap_size(sbi->total_sections);
1466 free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL);
1467 if (!free_i->free_secmap)
1468 return -ENOMEM;
1469
1470 /* set all segments as dirty temporarily */
1471 memset(free_i->free_segmap, 0xff, bitmap_size);
1472 memset(free_i->free_secmap, 0xff, sec_bitmap_size);
1473
1474 /* init free segmap information */
1475 free_i->start_segno =
1476 (unsigned int) GET_SEGNO_FROM_SEG0(sbi, sm_info->main_blkaddr);
1477 free_i->free_segments = 0;
1478 free_i->free_sections = 0;
1479 rwlock_init(&free_i->segmap_lock);
1480 return 0;
1481}
1482
1483static int build_curseg(struct f2fs_sb_info *sbi)
1484{
1485 struct curseg_info *array;
1486 int i;
1487
1488 array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL);
1489 if (!array)
1490 return -ENOMEM;
1491
1492 SM_I(sbi)->curseg_array = array;
1493
1494 for (i = 0; i < NR_CURSEG_TYPE; i++) {
1495 mutex_init(&array[i].curseg_mutex);
1496 array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
1497 if (!array[i].sum_blk)
1498 return -ENOMEM;
1499 array[i].segno = NULL_SEGNO;
1500 array[i].next_blkoff = 0;
1501 }
1502 return restore_curseg_summaries(sbi);
1503}
1504
1505static void build_sit_entries(struct f2fs_sb_info *sbi)
1506{
1507 struct sit_info *sit_i = SIT_I(sbi);
1508 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1509 struct f2fs_summary_block *sum = curseg->sum_blk;
1510 unsigned int start;
1511
1512 for (start = 0; start < TOTAL_SEGS(sbi); start++) {
1513 struct seg_entry *se = &sit_i->sentries[start];
1514 struct f2fs_sit_block *sit_blk;
1515 struct f2fs_sit_entry sit;
1516 struct page *page;
1517 int i;
1518
1519 mutex_lock(&curseg->curseg_mutex);
1520 for (i = 0; i < sits_in_cursum(sum); i++) {
1521 if (le32_to_cpu(segno_in_journal(sum, i)) == start) {
1522 sit = sit_in_journal(sum, i);
1523 mutex_unlock(&curseg->curseg_mutex);
1524 goto got_it;
1525 }
1526 }
1527 mutex_unlock(&curseg->curseg_mutex);
1528 page = get_current_sit_page(sbi, start);
1529 sit_blk = (struct f2fs_sit_block *)page_address(page);
1530 sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
1531 f2fs_put_page(page, 1);
1532got_it:
1533 check_block_count(sbi, start, &sit);
1534 seg_info_from_raw_sit(se, &sit);
1535 if (sbi->segs_per_sec > 1) {
1536 struct sec_entry *e = get_sec_entry(sbi, start);
1537 e->valid_blocks += se->valid_blocks;
1538 }
1539 }
1540}
1541
1542static void init_free_segmap(struct f2fs_sb_info *sbi)
1543{
1544 unsigned int start;
1545 int type;
1546
1547 for (start = 0; start < TOTAL_SEGS(sbi); start++) {
1548 struct seg_entry *sentry = get_seg_entry(sbi, start);
1549 if (!sentry->valid_blocks)
1550 __set_free(sbi, start);
1551 }
1552
1553 /* set use the current segments */
1554 for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
1555 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
1556 __set_test_and_inuse(sbi, curseg_t->segno);
1557 }
1558}
1559
1560static void init_dirty_segmap(struct f2fs_sb_info *sbi)
1561{
1562 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1563 struct free_segmap_info *free_i = FREE_I(sbi);
1564 unsigned int segno = 0, offset = 0;
1565 unsigned short valid_blocks;
1566
1567 while (segno < TOTAL_SEGS(sbi)) {
1568 /* find dirty segment based on free segmap */
1569 segno = find_next_inuse(free_i, TOTAL_SEGS(sbi), offset);
1570 if (segno >= TOTAL_SEGS(sbi))
1571 break;
1572 offset = segno + 1;
1573 valid_blocks = get_valid_blocks(sbi, segno, 0);
1574 if (valid_blocks >= sbi->blocks_per_seg || !valid_blocks)
1575 continue;
1576 mutex_lock(&dirty_i->seglist_lock);
1577 __locate_dirty_segment(sbi, segno, DIRTY);
1578 mutex_unlock(&dirty_i->seglist_lock);
1579 }
1580}
1581
1582static int init_victim_segmap(struct f2fs_sb_info *sbi)
1583{
1584 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1585 unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1586
1587 dirty_i->victim_segmap[FG_GC] = kzalloc(bitmap_size, GFP_KERNEL);
1588 dirty_i->victim_segmap[BG_GC] = kzalloc(bitmap_size, GFP_KERNEL);
1589 if (!dirty_i->victim_segmap[FG_GC] || !dirty_i->victim_segmap[BG_GC])
1590 return -ENOMEM;
1591 return 0;
1592}
1593
1594static int build_dirty_segmap(struct f2fs_sb_info *sbi)
1595{
1596 struct dirty_seglist_info *dirty_i;
1597 unsigned int bitmap_size, i;
1598
1599 /* allocate memory for dirty segments list information */
1600 dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
1601 if (!dirty_i)
1602 return -ENOMEM;
1603
1604 SM_I(sbi)->dirty_info = dirty_i;
1605 mutex_init(&dirty_i->seglist_lock);
1606
1607 bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1608
1609 for (i = 0; i < NR_DIRTY_TYPE; i++) {
1610 dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
1611 dirty_i->nr_dirty[i] = 0;
1612 if (!dirty_i->dirty_segmap[i])
1613 return -ENOMEM;
1614 }
1615
1616 init_dirty_segmap(sbi);
1617 return init_victim_segmap(sbi);
1618}
1619
1620/*
1621 * Update min, max modified time for cost-benefit GC algorithm
1622 */
1623static void init_min_max_mtime(struct f2fs_sb_info *sbi)
1624{
1625 struct sit_info *sit_i = SIT_I(sbi);
1626 unsigned int segno;
1627
1628 mutex_lock(&sit_i->sentry_lock);
1629
1630 sit_i->min_mtime = LLONG_MAX;
1631
1632 for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) {
1633 unsigned int i;
1634 unsigned long long mtime = 0;
1635
1636 for (i = 0; i < sbi->segs_per_sec; i++)
1637 mtime += get_seg_entry(sbi, segno + i)->mtime;
1638
1639 mtime = div_u64(mtime, sbi->segs_per_sec);
1640
1641 if (sit_i->min_mtime > mtime)
1642 sit_i->min_mtime = mtime;
1643 }
1644 sit_i->max_mtime = get_mtime(sbi);
1645 mutex_unlock(&sit_i->sentry_lock);
1646}
1647
1648int build_segment_manager(struct f2fs_sb_info *sbi)
1649{
1650 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1651 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1652 struct f2fs_sm_info *sm_info;
1653 int err;
1654
1655 sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
1656 if (!sm_info)
1657 return -ENOMEM;
1658
1659 /* init sm info */
1660 sbi->sm_info = sm_info;
1661 INIT_LIST_HEAD(&sm_info->wblist_head);
1662 spin_lock_init(&sm_info->wblist_lock);
1663 sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
1664 sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
1665 sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
1666 sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
1667 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
1668 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
1669 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
1670
1671 err = build_sit_info(sbi);
1672 if (err)
1673 return err;
1674 err = build_free_segmap(sbi);
1675 if (err)
1676 return err;
1677 err = build_curseg(sbi);
1678 if (err)
1679 return err;
1680
1681 /* reinit free segmap based on SIT */
1682 build_sit_entries(sbi);
1683
1684 init_free_segmap(sbi);
1685 err = build_dirty_segmap(sbi);
1686 if (err)
1687 return err;
1688
1689 init_min_max_mtime(sbi);
1690 return 0;
1691}
1692
1693static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
1694 enum dirty_type dirty_type)
1695{
1696 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1697
1698 mutex_lock(&dirty_i->seglist_lock);
1699 kfree(dirty_i->dirty_segmap[dirty_type]);
1700 dirty_i->nr_dirty[dirty_type] = 0;
1701 mutex_unlock(&dirty_i->seglist_lock);
1702}
1703
1704void reset_victim_segmap(struct f2fs_sb_info *sbi)
1705{
1706 unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1707 memset(DIRTY_I(sbi)->victim_segmap[FG_GC], 0, bitmap_size);
1708}
1709
1710static void destroy_victim_segmap(struct f2fs_sb_info *sbi)
1711{
1712 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1713
1714 kfree(dirty_i->victim_segmap[FG_GC]);
1715 kfree(dirty_i->victim_segmap[BG_GC]);
1716}
1717
1718static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
1719{
1720 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1721 int i;
1722
1723 if (!dirty_i)
1724 return;
1725
1726 /* discard pre-free/dirty segments list */
1727 for (i = 0; i < NR_DIRTY_TYPE; i++)
1728 discard_dirty_segmap(sbi, i);
1729
1730 destroy_victim_segmap(sbi);
1731 SM_I(sbi)->dirty_info = NULL;
1732 kfree(dirty_i);
1733}
1734
1735static void destroy_curseg(struct f2fs_sb_info *sbi)
1736{
1737 struct curseg_info *array = SM_I(sbi)->curseg_array;
1738 int i;
1739
1740 if (!array)
1741 return;
1742 SM_I(sbi)->curseg_array = NULL;
1743 for (i = 0; i < NR_CURSEG_TYPE; i++)
1744 kfree(array[i].sum_blk);
1745 kfree(array);
1746}
1747
1748static void destroy_free_segmap(struct f2fs_sb_info *sbi)
1749{
1750 struct free_segmap_info *free_i = SM_I(sbi)->free_info;
1751 if (!free_i)
1752 return;
1753 SM_I(sbi)->free_info = NULL;
1754 kfree(free_i->free_segmap);
1755 kfree(free_i->free_secmap);
1756 kfree(free_i);
1757}
1758
1759static void destroy_sit_info(struct f2fs_sb_info *sbi)
1760{
1761 struct sit_info *sit_i = SIT_I(sbi);
1762 unsigned int start;
1763
1764 if (!sit_i)
1765 return;
1766
1767 if (sit_i->sentries) {
1768 for (start = 0; start < TOTAL_SEGS(sbi); start++) {
1769 kfree(sit_i->sentries[start].cur_valid_map);
1770 kfree(sit_i->sentries[start].ckpt_valid_map);
1771 }
1772 }
1773 vfree(sit_i->sentries);
1774 vfree(sit_i->sec_entries);
1775 kfree(sit_i->dirty_sentries_bitmap);
1776
1777 SM_I(sbi)->sit_info = NULL;
1778 kfree(sit_i->sit_bitmap);
1779 kfree(sit_i);
1780}
1781
1782void destroy_segment_manager(struct f2fs_sb_info *sbi)
1783{
1784 struct f2fs_sm_info *sm_info = SM_I(sbi);
1785 destroy_dirty_segmap(sbi);
1786 destroy_curseg(sbi);
1787 destroy_free_segmap(sbi);
1788 destroy_sit_info(sbi);
1789 sbi->sm_info = NULL;
1790 kfree(sm_info);
1791}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
new file mode 100644
index 000000000000..0948405af6f5
--- /dev/null
+++ b/fs/f2fs/segment.h
@@ -0,0 +1,618 @@
1/*
2 * fs/f2fs/segment.h
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11/* constant macro */
12#define NULL_SEGNO ((unsigned int)(~0))
13
14/* V: Logical segment # in volume, R: Relative segment # in main area */
15#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
16#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno)
17
18#define IS_DATASEG(t) \
19 ((t == CURSEG_HOT_DATA) || (t == CURSEG_COLD_DATA) || \
20 (t == CURSEG_WARM_DATA))
21
22#define IS_NODESEG(t) \
23 ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \
24 (t == CURSEG_WARM_NODE))
25
26#define IS_CURSEG(sbi, segno) \
27 ((segno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \
28 (segno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \
29 (segno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \
30 (segno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \
31 (segno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \
32 (segno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
33
34#define IS_CURSEC(sbi, secno) \
35 ((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \
36 sbi->segs_per_sec) || \
37 (secno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \
38 sbi->segs_per_sec) || \
39 (secno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \
40 sbi->segs_per_sec) || \
41 (secno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \
42 sbi->segs_per_sec) || \
43 (secno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \
44 sbi->segs_per_sec) || \
45 (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
46 sbi->segs_per_sec)) \
47
48#define START_BLOCK(sbi, segno) \
49 (SM_I(sbi)->seg0_blkaddr + \
50 (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg))
51#define NEXT_FREE_BLKADDR(sbi, curseg) \
52 (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff)
53
54#define MAIN_BASE_BLOCK(sbi) (SM_I(sbi)->main_blkaddr)
55
56#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) \
57 ((blk_addr) - SM_I(sbi)->seg0_blkaddr)
58#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \
59 (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg)
60#define GET_SEGNO(sbi, blk_addr) \
61 (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \
62 NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
63 GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
64#define GET_SECNO(sbi, segno) \
65 ((segno) / sbi->segs_per_sec)
66#define GET_ZONENO_FROM_SEGNO(sbi, segno) \
67 ((segno / sbi->segs_per_sec) / sbi->secs_per_zone)
68
69#define GET_SUM_BLOCK(sbi, segno) \
70 ((sbi->sm_info->ssa_blkaddr) + segno)
71
72#define GET_SUM_TYPE(footer) ((footer)->entry_type)
73#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = type)
74
75#define SIT_ENTRY_OFFSET(sit_i, segno) \
76 (segno % sit_i->sents_per_block)
77#define SIT_BLOCK_OFFSET(sit_i, segno) \
78 (segno / SIT_ENTRY_PER_BLOCK)
79#define START_SEGNO(sit_i, segno) \
80 (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK)
81#define f2fs_bitmap_size(nr) \
82 (BITS_TO_LONGS(nr) * sizeof(unsigned long))
83#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments)
84
85#define SECTOR_FROM_BLOCK(sbi, blk_addr) \
86 (blk_addr << ((sbi)->log_blocksize - F2FS_LOG_SECTOR_SIZE))
87
88/* during checkpoint, bio_private is used to synchronize the last bio */
89struct bio_private {
90 struct f2fs_sb_info *sbi;
91 bool is_sync;
92 void *wait;
93};
94
95/*
96 * indicate a block allocation direction: RIGHT and LEFT.
97 * RIGHT means allocating new sections towards the end of volume.
98 * LEFT means the opposite direction.
99 */
100enum {
101 ALLOC_RIGHT = 0,
102 ALLOC_LEFT
103};
104
105/*
106 * In the victim_sel_policy->alloc_mode, there are two block allocation modes.
107 * LFS writes data sequentially with cleaning operations.
108 * SSR (Slack Space Recycle) reuses obsolete space without cleaning operations.
109 */
110enum {
111 LFS = 0,
112 SSR
113};
114
115/*
116 * In the victim_sel_policy->gc_mode, there are two gc, aka cleaning, modes.
117 * GC_CB is based on cost-benefit algorithm.
118 * GC_GREEDY is based on greedy algorithm.
119 */
120enum {
121 GC_CB = 0,
122 GC_GREEDY
123};
124
125/*
126 * BG_GC means the background cleaning job.
127 * FG_GC means the on-demand cleaning job.
128 */
129enum {
130 BG_GC = 0,
131 FG_GC
132};
133
134/* for a function parameter to select a victim segment */
135struct victim_sel_policy {
136 int alloc_mode; /* LFS or SSR */
137 int gc_mode; /* GC_CB or GC_GREEDY */
138 unsigned long *dirty_segmap; /* dirty segment bitmap */
139 unsigned int offset; /* last scanned bitmap offset */
140 unsigned int ofs_unit; /* bitmap search unit */
141 unsigned int min_cost; /* minimum cost */
142 unsigned int min_segno; /* segment # having min. cost */
143};
144
145struct seg_entry {
146 unsigned short valid_blocks; /* # of valid blocks */
147 unsigned char *cur_valid_map; /* validity bitmap of blocks */
148 /*
149 * # of valid blocks and the validity bitmap stored in the the last
150 * checkpoint pack. This information is used by the SSR mode.
151 */
152 unsigned short ckpt_valid_blocks;
153 unsigned char *ckpt_valid_map;
154 unsigned char type; /* segment type like CURSEG_XXX_TYPE */
155 unsigned long long mtime; /* modification time of the segment */
156};
157
158struct sec_entry {
159 unsigned int valid_blocks; /* # of valid blocks in a section */
160};
161
162struct segment_allocation {
163 void (*allocate_segment)(struct f2fs_sb_info *, int, bool);
164};
165
166struct sit_info {
167 const struct segment_allocation *s_ops;
168
169 block_t sit_base_addr; /* start block address of SIT area */
170 block_t sit_blocks; /* # of blocks used by SIT area */
171 block_t written_valid_blocks; /* # of valid blocks in main area */
172 char *sit_bitmap; /* SIT bitmap pointer */
173 unsigned int bitmap_size; /* SIT bitmap size */
174
175 unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */
176 unsigned int dirty_sentries; /* # of dirty sentries */
177 unsigned int sents_per_block; /* # of SIT entries per block */
178 struct mutex sentry_lock; /* to protect SIT cache */
179 struct seg_entry *sentries; /* SIT segment-level cache */
180 struct sec_entry *sec_entries; /* SIT section-level cache */
181
182 /* for cost-benefit algorithm in cleaning procedure */
183 unsigned long long elapsed_time; /* elapsed time after mount */
184 unsigned long long mounted_time; /* mount time */
185 unsigned long long min_mtime; /* min. modification time */
186 unsigned long long max_mtime; /* max. modification time */
187};
188
189struct free_segmap_info {
190 unsigned int start_segno; /* start segment number logically */
191 unsigned int free_segments; /* # of free segments */
192 unsigned int free_sections; /* # of free sections */
193 rwlock_t segmap_lock; /* free segmap lock */
194 unsigned long *free_segmap; /* free segment bitmap */
195 unsigned long *free_secmap; /* free section bitmap */
196};
197
198/* Notice: The order of dirty type is same with CURSEG_XXX in f2fs.h */
199enum dirty_type {
200 DIRTY_HOT_DATA, /* dirty segments assigned as hot data logs */
201 DIRTY_WARM_DATA, /* dirty segments assigned as warm data logs */
202 DIRTY_COLD_DATA, /* dirty segments assigned as cold data logs */
203 DIRTY_HOT_NODE, /* dirty segments assigned as hot node logs */
204 DIRTY_WARM_NODE, /* dirty segments assigned as warm node logs */
205 DIRTY_COLD_NODE, /* dirty segments assigned as cold node logs */
206 DIRTY, /* to count # of dirty segments */
207 PRE, /* to count # of entirely obsolete segments */
208 NR_DIRTY_TYPE
209};
210
211struct dirty_seglist_info {
212 const struct victim_selection *v_ops; /* victim selction operation */
213 unsigned long *dirty_segmap[NR_DIRTY_TYPE];
214 struct mutex seglist_lock; /* lock for segment bitmaps */
215 int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */
216 unsigned long *victim_segmap[2]; /* BG_GC, FG_GC */
217};
218
219/* victim selection function for cleaning and SSR */
220struct victim_selection {
221 int (*get_victim)(struct f2fs_sb_info *, unsigned int *,
222 int, int, char);
223};
224
225/* for active log information */
226struct curseg_info {
227 struct mutex curseg_mutex; /* lock for consistency */
228 struct f2fs_summary_block *sum_blk; /* cached summary block */
229 unsigned char alloc_type; /* current allocation type */
230 unsigned int segno; /* current segment number */
231 unsigned short next_blkoff; /* next block offset to write */
232 unsigned int zone; /* current zone number */
233 unsigned int next_segno; /* preallocated segment */
234};
235
236/*
237 * inline functions
238 */
239static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
240{
241 return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
242}
243
244static inline struct seg_entry *get_seg_entry(struct f2fs_sb_info *sbi,
245 unsigned int segno)
246{
247 struct sit_info *sit_i = SIT_I(sbi);
248 return &sit_i->sentries[segno];
249}
250
251static inline struct sec_entry *get_sec_entry(struct f2fs_sb_info *sbi,
252 unsigned int segno)
253{
254 struct sit_info *sit_i = SIT_I(sbi);
255 return &sit_i->sec_entries[GET_SECNO(sbi, segno)];
256}
257
258static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
259 unsigned int segno, int section)
260{
261 /*
262 * In order to get # of valid blocks in a section instantly from many
263 * segments, f2fs manages two counting structures separately.
264 */
265 if (section > 1)
266 return get_sec_entry(sbi, segno)->valid_blocks;
267 else
268 return get_seg_entry(sbi, segno)->valid_blocks;
269}
270
271static inline void seg_info_from_raw_sit(struct seg_entry *se,
272 struct f2fs_sit_entry *rs)
273{
274 se->valid_blocks = GET_SIT_VBLOCKS(rs);
275 se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs);
276 memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
277 memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
278 se->type = GET_SIT_TYPE(rs);
279 se->mtime = le64_to_cpu(rs->mtime);
280}
281
282static inline void seg_info_to_raw_sit(struct seg_entry *se,
283 struct f2fs_sit_entry *rs)
284{
285 unsigned short raw_vblocks = (se->type << SIT_VBLOCKS_SHIFT) |
286 se->valid_blocks;
287 rs->vblocks = cpu_to_le16(raw_vblocks);
288 memcpy(rs->valid_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE);
289 memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
290 se->ckpt_valid_blocks = se->valid_blocks;
291 rs->mtime = cpu_to_le64(se->mtime);
292}
293
294static inline unsigned int find_next_inuse(struct free_segmap_info *free_i,
295 unsigned int max, unsigned int segno)
296{
297 unsigned int ret;
298 read_lock(&free_i->segmap_lock);
299 ret = find_next_bit(free_i->free_segmap, max, segno);
300 read_unlock(&free_i->segmap_lock);
301 return ret;
302}
303
304static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
305{
306 struct free_segmap_info *free_i = FREE_I(sbi);
307 unsigned int secno = segno / sbi->segs_per_sec;
308 unsigned int start_segno = secno * sbi->segs_per_sec;
309 unsigned int next;
310
311 write_lock(&free_i->segmap_lock);
312 clear_bit(segno, free_i->free_segmap);
313 free_i->free_segments++;
314
315 next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), start_segno);
316 if (next >= start_segno + sbi->segs_per_sec) {
317 clear_bit(secno, free_i->free_secmap);
318 free_i->free_sections++;
319 }
320 write_unlock(&free_i->segmap_lock);
321}
322
323static inline void __set_inuse(struct f2fs_sb_info *sbi,
324 unsigned int segno)
325{
326 struct free_segmap_info *free_i = FREE_I(sbi);
327 unsigned int secno = segno / sbi->segs_per_sec;
328 set_bit(segno, free_i->free_segmap);
329 free_i->free_segments--;
330 if (!test_and_set_bit(secno, free_i->free_secmap))
331 free_i->free_sections--;
332}
333
334static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
335 unsigned int segno)
336{
337 struct free_segmap_info *free_i = FREE_I(sbi);
338 unsigned int secno = segno / sbi->segs_per_sec;
339 unsigned int start_segno = secno * sbi->segs_per_sec;
340 unsigned int next;
341
342 write_lock(&free_i->segmap_lock);
343 if (test_and_clear_bit(segno, free_i->free_segmap)) {
344 free_i->free_segments++;
345
346 next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi),
347 start_segno);
348 if (next >= start_segno + sbi->segs_per_sec) {
349 if (test_and_clear_bit(secno, free_i->free_secmap))
350 free_i->free_sections++;
351 }
352 }
353 write_unlock(&free_i->segmap_lock);
354}
355
356static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi,
357 unsigned int segno)
358{
359 struct free_segmap_info *free_i = FREE_I(sbi);
360 unsigned int secno = segno / sbi->segs_per_sec;
361 write_lock(&free_i->segmap_lock);
362 if (!test_and_set_bit(segno, free_i->free_segmap)) {
363 free_i->free_segments--;
364 if (!test_and_set_bit(secno, free_i->free_secmap))
365 free_i->free_sections--;
366 }
367 write_unlock(&free_i->segmap_lock);
368}
369
370static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
371 void *dst_addr)
372{
373 struct sit_info *sit_i = SIT_I(sbi);
374 memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size);
375}
376
377static inline block_t written_block_count(struct f2fs_sb_info *sbi)
378{
379 struct sit_info *sit_i = SIT_I(sbi);
380 block_t vblocks;
381
382 mutex_lock(&sit_i->sentry_lock);
383 vblocks = sit_i->written_valid_blocks;
384 mutex_unlock(&sit_i->sentry_lock);
385
386 return vblocks;
387}
388
389static inline unsigned int free_segments(struct f2fs_sb_info *sbi)
390{
391 struct free_segmap_info *free_i = FREE_I(sbi);
392 unsigned int free_segs;
393
394 read_lock(&free_i->segmap_lock);
395 free_segs = free_i->free_segments;
396 read_unlock(&free_i->segmap_lock);
397
398 return free_segs;
399}
400
401static inline int reserved_segments(struct f2fs_sb_info *sbi)
402{
403 return SM_I(sbi)->reserved_segments;
404}
405
406static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
407{
408 struct free_segmap_info *free_i = FREE_I(sbi);
409 unsigned int free_secs;
410
411 read_lock(&free_i->segmap_lock);
412 free_secs = free_i->free_sections;
413 read_unlock(&free_i->segmap_lock);
414
415 return free_secs;
416}
417
418static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi)
419{
420 return DIRTY_I(sbi)->nr_dirty[PRE];
421}
422
423static inline unsigned int dirty_segments(struct f2fs_sb_info *sbi)
424{
425 return DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_DATA] +
426 DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_DATA] +
427 DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_DATA] +
428 DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_NODE] +
429 DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_NODE] +
430 DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_NODE];
431}
432
433static inline int overprovision_segments(struct f2fs_sb_info *sbi)
434{
435 return SM_I(sbi)->ovp_segments;
436}
437
438static inline int overprovision_sections(struct f2fs_sb_info *sbi)
439{
440 return ((unsigned int) overprovision_segments(sbi)) / sbi->segs_per_sec;
441}
442
443static inline int reserved_sections(struct f2fs_sb_info *sbi)
444{
445 return ((unsigned int) reserved_segments(sbi)) / sbi->segs_per_sec;
446}
447
448static inline bool need_SSR(struct f2fs_sb_info *sbi)
449{
450 return (free_sections(sbi) < overprovision_sections(sbi));
451}
452
453static inline int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
454{
455 struct curseg_info *curseg = CURSEG_I(sbi, type);
456 return DIRTY_I(sbi)->v_ops->get_victim(sbi,
457 &(curseg)->next_segno, BG_GC, type, SSR);
458}
459
460static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi)
461{
462 return free_sections(sbi) <= reserved_sections(sbi);
463}
464
465static inline int utilization(struct f2fs_sb_info *sbi)
466{
467 return (long int)valid_user_blocks(sbi) * 100 /
468 (long int)sbi->user_block_count;
469}
470
471/*
472 * Sometimes f2fs may be better to drop out-of-place update policy.
473 * So, if fs utilization is over MIN_IPU_UTIL, then f2fs tries to write
474 * data in the original place likewise other traditional file systems.
475 * But, currently set 100 in percentage, which means it is disabled.
476 * See below need_inplace_update().
477 */
478#define MIN_IPU_UTIL 100
479static inline bool need_inplace_update(struct inode *inode)
480{
481 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
482 if (S_ISDIR(inode->i_mode))
483 return false;
484 if (need_SSR(sbi) && utilization(sbi) > MIN_IPU_UTIL)
485 return true;
486 return false;
487}
488
489static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi,
490 int type)
491{
492 struct curseg_info *curseg = CURSEG_I(sbi, type);
493 return curseg->segno;
494}
495
496static inline unsigned char curseg_alloc_type(struct f2fs_sb_info *sbi,
497 int type)
498{
499 struct curseg_info *curseg = CURSEG_I(sbi, type);
500 return curseg->alloc_type;
501}
502
503static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type)
504{
505 struct curseg_info *curseg = CURSEG_I(sbi, type);
506 return curseg->next_blkoff;
507}
508
509static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
510{
511 unsigned int end_segno = SM_I(sbi)->segment_count - 1;
512 BUG_ON(segno > end_segno);
513}
514
515/*
516 * This function is used for only debugging.
517 * NOTE: In future, we have to remove this function.
518 */
519static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
520{
521 struct f2fs_sm_info *sm_info = SM_I(sbi);
522 block_t total_blks = sm_info->segment_count << sbi->log_blocks_per_seg;
523 block_t start_addr = sm_info->seg0_blkaddr;
524 block_t end_addr = start_addr + total_blks - 1;
525 BUG_ON(blk_addr < start_addr);
526 BUG_ON(blk_addr > end_addr);
527}
528
529/*
530 * Summary block is always treated as invalid block
531 */
532static inline void check_block_count(struct f2fs_sb_info *sbi,
533 int segno, struct f2fs_sit_entry *raw_sit)
534{
535 struct f2fs_sm_info *sm_info = SM_I(sbi);
536 unsigned int end_segno = sm_info->segment_count - 1;
537 int valid_blocks = 0;
538 int i;
539
540 /* check segment usage */
541 BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg);
542
543 /* check boundary of a given segment number */
544 BUG_ON(segno > end_segno);
545
546 /* check bitmap with valid block count */
547 for (i = 0; i < sbi->blocks_per_seg; i++)
548 if (f2fs_test_bit(i, raw_sit->valid_map))
549 valid_blocks++;
550 BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks);
551}
552
553static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
554 unsigned int start)
555{
556 struct sit_info *sit_i = SIT_I(sbi);
557 unsigned int offset = SIT_BLOCK_OFFSET(sit_i, start);
558 block_t blk_addr = sit_i->sit_base_addr + offset;
559
560 check_seg_range(sbi, start);
561
562 /* calculate sit block address */
563 if (f2fs_test_bit(offset, sit_i->sit_bitmap))
564 blk_addr += sit_i->sit_blocks;
565
566 return blk_addr;
567}
568
569static inline pgoff_t next_sit_addr(struct f2fs_sb_info *sbi,
570 pgoff_t block_addr)
571{
572 struct sit_info *sit_i = SIT_I(sbi);
573 block_addr -= sit_i->sit_base_addr;
574 if (block_addr < sit_i->sit_blocks)
575 block_addr += sit_i->sit_blocks;
576 else
577 block_addr -= sit_i->sit_blocks;
578
579 return block_addr + sit_i->sit_base_addr;
580}
581
582static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start)
583{
584 unsigned int block_off = SIT_BLOCK_OFFSET(sit_i, start);
585
586 if (f2fs_test_bit(block_off, sit_i->sit_bitmap))
587 f2fs_clear_bit(block_off, sit_i->sit_bitmap);
588 else
589 f2fs_set_bit(block_off, sit_i->sit_bitmap);
590}
591
592static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi)
593{
594 struct sit_info *sit_i = SIT_I(sbi);
595 return sit_i->elapsed_time + CURRENT_TIME_SEC.tv_sec -
596 sit_i->mounted_time;
597}
598
599static inline void set_summary(struct f2fs_summary *sum, nid_t nid,
600 unsigned int ofs_in_node, unsigned char version)
601{
602 sum->nid = cpu_to_le32(nid);
603 sum->ofs_in_node = cpu_to_le16(ofs_in_node);
604 sum->version = version;
605}
606
607static inline block_t start_sum_block(struct f2fs_sb_info *sbi)
608{
609 return __start_cp_addr(sbi) +
610 le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum);
611}
612
613static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
614{
615 return __start_cp_addr(sbi) +
616 le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_total_block_count)
617 - (base + 1) + type;
618}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
new file mode 100644
index 000000000000..13867322cf5a
--- /dev/null
+++ b/fs/f2fs/super.c
@@ -0,0 +1,657 @@
1/*
2 * fs/f2fs/super.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/module.h>
12#include <linux/init.h>
13#include <linux/fs.h>
14#include <linux/statfs.h>
15#include <linux/proc_fs.h>
16#include <linux/buffer_head.h>
17#include <linux/backing-dev.h>
18#include <linux/kthread.h>
19#include <linux/parser.h>
20#include <linux/mount.h>
21#include <linux/seq_file.h>
22#include <linux/random.h>
23#include <linux/exportfs.h>
24#include <linux/f2fs_fs.h>
25
26#include "f2fs.h"
27#include "node.h"
28#include "xattr.h"
29
30static struct kmem_cache *f2fs_inode_cachep;
31
32enum {
33 Opt_gc_background_off,
34 Opt_disable_roll_forward,
35 Opt_discard,
36 Opt_noheap,
37 Opt_nouser_xattr,
38 Opt_noacl,
39 Opt_active_logs,
40 Opt_disable_ext_identify,
41 Opt_err,
42};
43
44static match_table_t f2fs_tokens = {
45 {Opt_gc_background_off, "background_gc_off"},
46 {Opt_disable_roll_forward, "disable_roll_forward"},
47 {Opt_discard, "discard"},
48 {Opt_noheap, "no_heap"},
49 {Opt_nouser_xattr, "nouser_xattr"},
50 {Opt_noacl, "noacl"},
51 {Opt_active_logs, "active_logs=%u"},
52 {Opt_disable_ext_identify, "disable_ext_identify"},
53 {Opt_err, NULL},
54};
55
56static void init_once(void *foo)
57{
58 struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
59
60 inode_init_once(&fi->vfs_inode);
61}
62
63static struct inode *f2fs_alloc_inode(struct super_block *sb)
64{
65 struct f2fs_inode_info *fi;
66
67 fi = kmem_cache_alloc(f2fs_inode_cachep, GFP_NOFS | __GFP_ZERO);
68 if (!fi)
69 return NULL;
70
71 init_once((void *) fi);
72
73 /* Initilize f2fs-specific inode info */
74 fi->vfs_inode.i_version = 1;
75 atomic_set(&fi->dirty_dents, 0);
76 fi->i_current_depth = 1;
77 fi->i_advise = 0;
78 rwlock_init(&fi->ext.ext_lock);
79
80 set_inode_flag(fi, FI_NEW_INODE);
81
82 return &fi->vfs_inode;
83}
84
85static void f2fs_i_callback(struct rcu_head *head)
86{
87 struct inode *inode = container_of(head, struct inode, i_rcu);
88 kmem_cache_free(f2fs_inode_cachep, F2FS_I(inode));
89}
90
91static void f2fs_destroy_inode(struct inode *inode)
92{
93 call_rcu(&inode->i_rcu, f2fs_i_callback);
94}
95
96static void f2fs_put_super(struct super_block *sb)
97{
98 struct f2fs_sb_info *sbi = F2FS_SB(sb);
99
100 f2fs_destroy_stats(sbi);
101 stop_gc_thread(sbi);
102
103 write_checkpoint(sbi, false, true);
104
105 iput(sbi->node_inode);
106 iput(sbi->meta_inode);
107
108 /* destroy f2fs internal modules */
109 destroy_node_manager(sbi);
110 destroy_segment_manager(sbi);
111
112 kfree(sbi->ckpt);
113
114 sb->s_fs_info = NULL;
115 brelse(sbi->raw_super_buf);
116 kfree(sbi);
117}
118
119int f2fs_sync_fs(struct super_block *sb, int sync)
120{
121 struct f2fs_sb_info *sbi = F2FS_SB(sb);
122 int ret = 0;
123
124 if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES))
125 return 0;
126
127 if (sync)
128 write_checkpoint(sbi, false, false);
129
130 return ret;
131}
132
133static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
134{
135 struct super_block *sb = dentry->d_sb;
136 struct f2fs_sb_info *sbi = F2FS_SB(sb);
137 u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
138 block_t total_count, user_block_count, start_count, ovp_count;
139
140 total_count = le64_to_cpu(sbi->raw_super->block_count);
141 user_block_count = sbi->user_block_count;
142 start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr);
143 ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
144 buf->f_type = F2FS_SUPER_MAGIC;
145 buf->f_bsize = sbi->blocksize;
146
147 buf->f_blocks = total_count - start_count;
148 buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count;
149 buf->f_bavail = user_block_count - valid_user_blocks(sbi);
150
151 buf->f_files = valid_inode_count(sbi);
152 buf->f_ffree = sbi->total_node_count - valid_node_count(sbi);
153
154 buf->f_namelen = F2FS_MAX_NAME_LEN;
155 buf->f_fsid.val[0] = (u32)id;
156 buf->f_fsid.val[1] = (u32)(id >> 32);
157
158 return 0;
159}
160
161static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
162{
163 struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
164
165 if (test_opt(sbi, BG_GC))
166 seq_puts(seq, ",background_gc_on");
167 else
168 seq_puts(seq, ",background_gc_off");
169 if (test_opt(sbi, DISABLE_ROLL_FORWARD))
170 seq_puts(seq, ",disable_roll_forward");
171 if (test_opt(sbi, DISCARD))
172 seq_puts(seq, ",discard");
173 if (test_opt(sbi, NOHEAP))
174 seq_puts(seq, ",no_heap_alloc");
175#ifdef CONFIG_F2FS_FS_XATTR
176 if (test_opt(sbi, XATTR_USER))
177 seq_puts(seq, ",user_xattr");
178 else
179 seq_puts(seq, ",nouser_xattr");
180#endif
181#ifdef CONFIG_F2FS_FS_POSIX_ACL
182 if (test_opt(sbi, POSIX_ACL))
183 seq_puts(seq, ",acl");
184 else
185 seq_puts(seq, ",noacl");
186#endif
187 if (test_opt(sbi, DISABLE_EXT_IDENTIFY))
188 seq_puts(seq, ",disable_ext_indentify");
189
190 seq_printf(seq, ",active_logs=%u", sbi->active_logs);
191
192 return 0;
193}
194
195static struct super_operations f2fs_sops = {
196 .alloc_inode = f2fs_alloc_inode,
197 .destroy_inode = f2fs_destroy_inode,
198 .write_inode = f2fs_write_inode,
199 .show_options = f2fs_show_options,
200 .evict_inode = f2fs_evict_inode,
201 .put_super = f2fs_put_super,
202 .sync_fs = f2fs_sync_fs,
203 .statfs = f2fs_statfs,
204};
205
206static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
207 u64 ino, u32 generation)
208{
209 struct f2fs_sb_info *sbi = F2FS_SB(sb);
210 struct inode *inode;
211
212 if (ino < F2FS_ROOT_INO(sbi))
213 return ERR_PTR(-ESTALE);
214
215 /*
216 * f2fs_iget isn't quite right if the inode is currently unallocated!
217 * However f2fs_iget currently does appropriate checks to handle stale
218 * inodes so everything is OK.
219 */
220 inode = f2fs_iget(sb, ino);
221 if (IS_ERR(inode))
222 return ERR_CAST(inode);
223 if (generation && inode->i_generation != generation) {
224 /* we didn't find the right inode.. */
225 iput(inode);
226 return ERR_PTR(-ESTALE);
227 }
228 return inode;
229}
230
231static struct dentry *f2fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
232 int fh_len, int fh_type)
233{
234 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
235 f2fs_nfs_get_inode);
236}
237
238static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid,
239 int fh_len, int fh_type)
240{
241 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
242 f2fs_nfs_get_inode);
243}
244
245static const struct export_operations f2fs_export_ops = {
246 .fh_to_dentry = f2fs_fh_to_dentry,
247 .fh_to_parent = f2fs_fh_to_parent,
248 .get_parent = f2fs_get_parent,
249};
250
251static int parse_options(struct f2fs_sb_info *sbi, char *options)
252{
253 substring_t args[MAX_OPT_ARGS];
254 char *p;
255 int arg = 0;
256
257 if (!options)
258 return 0;
259
260 while ((p = strsep(&options, ",")) != NULL) {
261 int token;
262 if (!*p)
263 continue;
264 /*
265 * Initialize args struct so we know whether arg was
266 * found; some options take optional arguments.
267 */
268 args[0].to = args[0].from = NULL;
269 token = match_token(p, f2fs_tokens, args);
270
271 switch (token) {
272 case Opt_gc_background_off:
273 clear_opt(sbi, BG_GC);
274 break;
275 case Opt_disable_roll_forward:
276 set_opt(sbi, DISABLE_ROLL_FORWARD);
277 break;
278 case Opt_discard:
279 set_opt(sbi, DISCARD);
280 break;
281 case Opt_noheap:
282 set_opt(sbi, NOHEAP);
283 break;
284#ifdef CONFIG_F2FS_FS_XATTR
285 case Opt_nouser_xattr:
286 clear_opt(sbi, XATTR_USER);
287 break;
288#else
289 case Opt_nouser_xattr:
290 pr_info("nouser_xattr options not supported\n");
291 break;
292#endif
293#ifdef CONFIG_F2FS_FS_POSIX_ACL
294 case Opt_noacl:
295 clear_opt(sbi, POSIX_ACL);
296 break;
297#else
298 case Opt_noacl:
299 pr_info("noacl options not supported\n");
300 break;
301#endif
302 case Opt_active_logs:
303 if (args->from && match_int(args, &arg))
304 return -EINVAL;
305 if (arg != 2 && arg != 4 && arg != 6)
306 return -EINVAL;
307 sbi->active_logs = arg;
308 break;
309 case Opt_disable_ext_identify:
310 set_opt(sbi, DISABLE_EXT_IDENTIFY);
311 break;
312 default:
313 pr_err("Unrecognized mount option \"%s\" or missing value\n",
314 p);
315 return -EINVAL;
316 }
317 }
318 return 0;
319}
320
321static loff_t max_file_size(unsigned bits)
322{
323 loff_t result = ADDRS_PER_INODE;
324 loff_t leaf_count = ADDRS_PER_BLOCK;
325
326 /* two direct node blocks */
327 result += (leaf_count * 2);
328
329 /* two indirect node blocks */
330 leaf_count *= NIDS_PER_BLOCK;
331 result += (leaf_count * 2);
332
333 /* one double indirect node block */
334 leaf_count *= NIDS_PER_BLOCK;
335 result += leaf_count;
336
337 result <<= bits;
338 return result;
339}
340
341static int sanity_check_raw_super(struct f2fs_super_block *raw_super)
342{
343 unsigned int blocksize;
344
345 if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic))
346 return 1;
347
348 /* Currently, support only 4KB block size */
349 blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
350 if (blocksize != PAGE_CACHE_SIZE)
351 return 1;
352 if (le32_to_cpu(raw_super->log_sectorsize) !=
353 F2FS_LOG_SECTOR_SIZE)
354 return 1;
355 if (le32_to_cpu(raw_super->log_sectors_per_block) !=
356 F2FS_LOG_SECTORS_PER_BLOCK)
357 return 1;
358 return 0;
359}
360
361static int sanity_check_ckpt(struct f2fs_super_block *raw_super,
362 struct f2fs_checkpoint *ckpt)
363{
364 unsigned int total, fsmeta;
365
366 total = le32_to_cpu(raw_super->segment_count);
367 fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
368 fsmeta += le32_to_cpu(raw_super->segment_count_sit);
369 fsmeta += le32_to_cpu(raw_super->segment_count_nat);
370 fsmeta += le32_to_cpu(ckpt->rsvd_segment_count);
371 fsmeta += le32_to_cpu(raw_super->segment_count_ssa);
372
373 if (fsmeta >= total)
374 return 1;
375 return 0;
376}
377
378static void init_sb_info(struct f2fs_sb_info *sbi)
379{
380 struct f2fs_super_block *raw_super = sbi->raw_super;
381 int i;
382
383 sbi->log_sectors_per_block =
384 le32_to_cpu(raw_super->log_sectors_per_block);
385 sbi->log_blocksize = le32_to_cpu(raw_super->log_blocksize);
386 sbi->blocksize = 1 << sbi->log_blocksize;
387 sbi->log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
388 sbi->blocks_per_seg = 1 << sbi->log_blocks_per_seg;
389 sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
390 sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
391 sbi->total_sections = le32_to_cpu(raw_super->section_count);
392 sbi->total_node_count =
393 (le32_to_cpu(raw_super->segment_count_nat) / 2)
394 * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK;
395 sbi->root_ino_num = le32_to_cpu(raw_super->root_ino);
396 sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
397 sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
398
399 for (i = 0; i < NR_COUNT_TYPE; i++)
400 atomic_set(&sbi->nr_pages[i], 0);
401}
402
403static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
404{
405 struct f2fs_sb_info *sbi;
406 struct f2fs_super_block *raw_super;
407 struct buffer_head *raw_super_buf;
408 struct inode *root;
409 long err = -EINVAL;
410 int i;
411
412 /* allocate memory for f2fs-specific super block info */
413 sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL);
414 if (!sbi)
415 return -ENOMEM;
416
417 /* set a temporary block size */
418 if (!sb_set_blocksize(sb, F2FS_BLKSIZE))
419 goto free_sbi;
420
421 /* read f2fs raw super block */
422 raw_super_buf = sb_bread(sb, 0);
423 if (!raw_super_buf) {
424 err = -EIO;
425 goto free_sbi;
426 }
427 raw_super = (struct f2fs_super_block *)
428 ((char *)raw_super_buf->b_data + F2FS_SUPER_OFFSET);
429
430 /* init some FS parameters */
431 sbi->active_logs = NR_CURSEG_TYPE;
432
433 set_opt(sbi, BG_GC);
434
435#ifdef CONFIG_F2FS_FS_XATTR
436 set_opt(sbi, XATTR_USER);
437#endif
438#ifdef CONFIG_F2FS_FS_POSIX_ACL
439 set_opt(sbi, POSIX_ACL);
440#endif
441 /* parse mount options */
442 if (parse_options(sbi, (char *)data))
443 goto free_sb_buf;
444
445 /* sanity checking of raw super */
446 if (sanity_check_raw_super(raw_super))
447 goto free_sb_buf;
448
449 sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize));
450 sb->s_max_links = F2FS_LINK_MAX;
451 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
452
453 sb->s_op = &f2fs_sops;
454 sb->s_xattr = f2fs_xattr_handlers;
455 sb->s_export_op = &f2fs_export_ops;
456 sb->s_magic = F2FS_SUPER_MAGIC;
457 sb->s_fs_info = sbi;
458 sb->s_time_gran = 1;
459 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
460 (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
461 memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
462
463 /* init f2fs-specific super block info */
464 sbi->sb = sb;
465 sbi->raw_super = raw_super;
466 sbi->raw_super_buf = raw_super_buf;
467 mutex_init(&sbi->gc_mutex);
468 mutex_init(&sbi->write_inode);
469 mutex_init(&sbi->writepages);
470 mutex_init(&sbi->cp_mutex);
471 for (i = 0; i < NR_LOCK_TYPE; i++)
472 mutex_init(&sbi->fs_lock[i]);
473 sbi->por_doing = 0;
474 spin_lock_init(&sbi->stat_lock);
475 init_rwsem(&sbi->bio_sem);
476 init_sb_info(sbi);
477
478 /* get an inode for meta space */
479 sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
480 if (IS_ERR(sbi->meta_inode)) {
481 err = PTR_ERR(sbi->meta_inode);
482 goto free_sb_buf;
483 }
484
485 err = get_valid_checkpoint(sbi);
486 if (err)
487 goto free_meta_inode;
488
489 /* sanity checking of checkpoint */
490 err = -EINVAL;
491 if (sanity_check_ckpt(raw_super, sbi->ckpt))
492 goto free_cp;
493
494 sbi->total_valid_node_count =
495 le32_to_cpu(sbi->ckpt->valid_node_count);
496 sbi->total_valid_inode_count =
497 le32_to_cpu(sbi->ckpt->valid_inode_count);
498 sbi->user_block_count = le64_to_cpu(sbi->ckpt->user_block_count);
499 sbi->total_valid_block_count =
500 le64_to_cpu(sbi->ckpt->valid_block_count);
501 sbi->last_valid_block_count = sbi->total_valid_block_count;
502 sbi->alloc_valid_block_count = 0;
503 INIT_LIST_HEAD(&sbi->dir_inode_list);
504 spin_lock_init(&sbi->dir_inode_lock);
505
506 /* init super block */
507 if (!sb_set_blocksize(sb, sbi->blocksize))
508 goto free_cp;
509
510 init_orphan_info(sbi);
511
512 /* setup f2fs internal modules */
513 err = build_segment_manager(sbi);
514 if (err)
515 goto free_sm;
516 err = build_node_manager(sbi);
517 if (err)
518 goto free_nm;
519
520 build_gc_manager(sbi);
521
522 /* get an inode for node space */
523 sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
524 if (IS_ERR(sbi->node_inode)) {
525 err = PTR_ERR(sbi->node_inode);
526 goto free_nm;
527 }
528
529 /* if there are nt orphan nodes free them */
530 err = -EINVAL;
531 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) &&
532 recover_orphan_inodes(sbi))
533 goto free_node_inode;
534
535 /* read root inode and dentry */
536 root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
537 if (IS_ERR(root)) {
538 err = PTR_ERR(root);
539 goto free_node_inode;
540 }
541 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size)
542 goto free_root_inode;
543
544 sb->s_root = d_make_root(root); /* allocate root dentry */
545 if (!sb->s_root) {
546 err = -ENOMEM;
547 goto free_root_inode;
548 }
549
550 /* recover fsynced data */
551 if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) &&
552 !test_opt(sbi, DISABLE_ROLL_FORWARD))
553 recover_fsync_data(sbi);
554
555 /* After POR, we can run background GC thread */
556 err = start_gc_thread(sbi);
557 if (err)
558 goto fail;
559
560 err = f2fs_build_stats(sbi);
561 if (err)
562 goto fail;
563
564 return 0;
565fail:
566 stop_gc_thread(sbi);
567free_root_inode:
568 dput(sb->s_root);
569 sb->s_root = NULL;
570free_node_inode:
571 iput(sbi->node_inode);
572free_nm:
573 destroy_node_manager(sbi);
574free_sm:
575 destroy_segment_manager(sbi);
576free_cp:
577 kfree(sbi->ckpt);
578free_meta_inode:
579 make_bad_inode(sbi->meta_inode);
580 iput(sbi->meta_inode);
581free_sb_buf:
582 brelse(raw_super_buf);
583free_sbi:
584 kfree(sbi);
585 return err;
586}
587
588static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
589 const char *dev_name, void *data)
590{
591 return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super);
592}
593
594static struct file_system_type f2fs_fs_type = {
595 .owner = THIS_MODULE,
596 .name = "f2fs",
597 .mount = f2fs_mount,
598 .kill_sb = kill_block_super,
599 .fs_flags = FS_REQUIRES_DEV,
600};
601
602static int init_inodecache(void)
603{
604 f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache",
605 sizeof(struct f2fs_inode_info), NULL);
606 if (f2fs_inode_cachep == NULL)
607 return -ENOMEM;
608 return 0;
609}
610
611static void destroy_inodecache(void)
612{
613 /*
614 * Make sure all delayed rcu free inodes are flushed before we
615 * destroy cache.
616 */
617 rcu_barrier();
618 kmem_cache_destroy(f2fs_inode_cachep);
619}
620
621static int __init init_f2fs_fs(void)
622{
623 int err;
624
625 err = init_inodecache();
626 if (err)
627 goto fail;
628 err = create_node_manager_caches();
629 if (err)
630 goto fail;
631 err = create_gc_caches();
632 if (err)
633 goto fail;
634 err = create_checkpoint_caches();
635 if (err)
636 goto fail;
637 return register_filesystem(&f2fs_fs_type);
638fail:
639 return err;
640}
641
642static void __exit exit_f2fs_fs(void)
643{
644 destroy_root_stats();
645 unregister_filesystem(&f2fs_fs_type);
646 destroy_checkpoint_caches();
647 destroy_gc_caches();
648 destroy_node_manager_caches();
649 destroy_inodecache();
650}
651
652module_init(init_f2fs_fs)
653module_exit(exit_f2fs_fs)
654
655MODULE_AUTHOR("Samsung Electronics's Praesto Team");
656MODULE_DESCRIPTION("Flash Friendly File System");
657MODULE_LICENSE("GPL");
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
new file mode 100644
index 000000000000..7d52e8dc0c59
--- /dev/null
+++ b/fs/f2fs/xattr.c
@@ -0,0 +1,440 @@
1/*
2 * fs/f2fs/xattr.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * Portions of this code from linux/fs/ext2/xattr.c
8 *
9 * Copyright (C) 2001-2003 Andreas Gruenbacher <agruen@suse.de>
10 *
11 * Fix by Harrison Xing <harrison@mountainviewdata.com>.
12 * Extended attributes for symlinks and special files added per
13 * suggestion of Luka Renko <luka.renko@hermes.si>.
14 * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
15 * Red Hat Inc.
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License version 2 as
19 * published by the Free Software Foundation.
20 */
21#include <linux/rwsem.h>
22#include <linux/f2fs_fs.h>
23#include "f2fs.h"
24#include "xattr.h"
25
26static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list,
27 size_t list_size, const char *name, size_t name_len, int type)
28{
29 struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
30 int total_len, prefix_len = 0;
31 const char *prefix = NULL;
32
33 switch (type) {
34 case F2FS_XATTR_INDEX_USER:
35 if (!test_opt(sbi, XATTR_USER))
36 return -EOPNOTSUPP;
37 prefix = XATTR_USER_PREFIX;
38 prefix_len = XATTR_USER_PREFIX_LEN;
39 break;
40 case F2FS_XATTR_INDEX_TRUSTED:
41 if (!capable(CAP_SYS_ADMIN))
42 return -EPERM;
43 prefix = XATTR_TRUSTED_PREFIX;
44 prefix_len = XATTR_TRUSTED_PREFIX_LEN;
45 break;
46 default:
47 return -EINVAL;
48 }
49
50 total_len = prefix_len + name_len + 1;
51 if (list && total_len <= list_size) {
52 memcpy(list, prefix, prefix_len);
53 memcpy(list+prefix_len, name, name_len);
54 list[prefix_len + name_len] = '\0';
55 }
56 return total_len;
57}
58
59static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name,
60 void *buffer, size_t size, int type)
61{
62 struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
63
64 switch (type) {
65 case F2FS_XATTR_INDEX_USER:
66 if (!test_opt(sbi, XATTR_USER))
67 return -EOPNOTSUPP;
68 break;
69 case F2FS_XATTR_INDEX_TRUSTED:
70 if (!capable(CAP_SYS_ADMIN))
71 return -EPERM;
72 break;
73 default:
74 return -EINVAL;
75 }
76 if (strcmp(name, "") == 0)
77 return -EINVAL;
78 return f2fs_getxattr(dentry->d_inode, type, name,
79 buffer, size);
80}
81
82static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name,
83 const void *value, size_t size, int flags, int type)
84{
85 struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
86
87 switch (type) {
88 case F2FS_XATTR_INDEX_USER:
89 if (!test_opt(sbi, XATTR_USER))
90 return -EOPNOTSUPP;
91 break;
92 case F2FS_XATTR_INDEX_TRUSTED:
93 if (!capable(CAP_SYS_ADMIN))
94 return -EPERM;
95 break;
96 default:
97 return -EINVAL;
98 }
99 if (strcmp(name, "") == 0)
100 return -EINVAL;
101
102 return f2fs_setxattr(dentry->d_inode, type, name, value, size);
103}
104
105static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list,
106 size_t list_size, const char *name, size_t name_len, int type)
107{
108 const char *xname = F2FS_SYSTEM_ADVISE_PREFIX;
109 size_t size;
110
111 if (type != F2FS_XATTR_INDEX_ADVISE)
112 return 0;
113
114 size = strlen(xname) + 1;
115 if (list && size <= list_size)
116 memcpy(list, xname, size);
117 return size;
118}
119
120static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name,
121 void *buffer, size_t size, int type)
122{
123 struct inode *inode = dentry->d_inode;
124
125 if (strcmp(name, "") != 0)
126 return -EINVAL;
127
128 *((char *)buffer) = F2FS_I(inode)->i_advise;
129 return sizeof(char);
130}
131
132static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name,
133 const void *value, size_t size, int flags, int type)
134{
135 struct inode *inode = dentry->d_inode;
136
137 if (strcmp(name, "") != 0)
138 return -EINVAL;
139 if (!inode_owner_or_capable(inode))
140 return -EPERM;
141 if (value == NULL)
142 return -EINVAL;
143
144 F2FS_I(inode)->i_advise |= *(char *)value;
145 return 0;
146}
147
148const struct xattr_handler f2fs_xattr_user_handler = {
149 .prefix = XATTR_USER_PREFIX,
150 .flags = F2FS_XATTR_INDEX_USER,
151 .list = f2fs_xattr_generic_list,
152 .get = f2fs_xattr_generic_get,
153 .set = f2fs_xattr_generic_set,
154};
155
156const struct xattr_handler f2fs_xattr_trusted_handler = {
157 .prefix = XATTR_TRUSTED_PREFIX,
158 .flags = F2FS_XATTR_INDEX_TRUSTED,
159 .list = f2fs_xattr_generic_list,
160 .get = f2fs_xattr_generic_get,
161 .set = f2fs_xattr_generic_set,
162};
163
164const struct xattr_handler f2fs_xattr_advise_handler = {
165 .prefix = F2FS_SYSTEM_ADVISE_PREFIX,
166 .flags = F2FS_XATTR_INDEX_ADVISE,
167 .list = f2fs_xattr_advise_list,
168 .get = f2fs_xattr_advise_get,
169 .set = f2fs_xattr_advise_set,
170};
171
172static const struct xattr_handler *f2fs_xattr_handler_map[] = {
173 [F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler,
174#ifdef CONFIG_F2FS_FS_POSIX_ACL
175 [F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &f2fs_xattr_acl_access_handler,
176 [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler,
177#endif
178 [F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler,
179 [F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler,
180};
181
182const struct xattr_handler *f2fs_xattr_handlers[] = {
183 &f2fs_xattr_user_handler,
184#ifdef CONFIG_F2FS_FS_POSIX_ACL
185 &f2fs_xattr_acl_access_handler,
186 &f2fs_xattr_acl_default_handler,
187#endif
188 &f2fs_xattr_trusted_handler,
189 &f2fs_xattr_advise_handler,
190 NULL,
191};
192
193static inline const struct xattr_handler *f2fs_xattr_handler(int name_index)
194{
195 const struct xattr_handler *handler = NULL;
196
197 if (name_index > 0 && name_index < ARRAY_SIZE(f2fs_xattr_handler_map))
198 handler = f2fs_xattr_handler_map[name_index];
199 return handler;
200}
201
202int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
203 void *buffer, size_t buffer_size)
204{
205 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
206 struct f2fs_inode_info *fi = F2FS_I(inode);
207 struct f2fs_xattr_entry *entry;
208 struct page *page;
209 void *base_addr;
210 int error = 0, found = 0;
211 int value_len, name_len;
212
213 if (name == NULL)
214 return -EINVAL;
215 name_len = strlen(name);
216
217 if (!fi->i_xattr_nid)
218 return -ENODATA;
219
220 page = get_node_page(sbi, fi->i_xattr_nid);
221 base_addr = page_address(page);
222
223 list_for_each_xattr(entry, base_addr) {
224 if (entry->e_name_index != name_index)
225 continue;
226 if (entry->e_name_len != name_len)
227 continue;
228 if (!memcmp(entry->e_name, name, name_len)) {
229 found = 1;
230 break;
231 }
232 }
233 if (!found) {
234 error = -ENODATA;
235 goto cleanup;
236 }
237
238 value_len = le16_to_cpu(entry->e_value_size);
239
240 if (buffer && value_len > buffer_size) {
241 error = -ERANGE;
242 goto cleanup;
243 }
244
245 if (buffer) {
246 char *pval = entry->e_name + entry->e_name_len;
247 memcpy(buffer, pval, value_len);
248 }
249 error = value_len;
250
251cleanup:
252 f2fs_put_page(page, 1);
253 return error;
254}
255
256ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
257{
258 struct inode *inode = dentry->d_inode;
259 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
260 struct f2fs_inode_info *fi = F2FS_I(inode);
261 struct f2fs_xattr_entry *entry;
262 struct page *page;
263 void *base_addr;
264 int error = 0;
265 size_t rest = buffer_size;
266
267 if (!fi->i_xattr_nid)
268 return 0;
269
270 page = get_node_page(sbi, fi->i_xattr_nid);
271 base_addr = page_address(page);
272
273 list_for_each_xattr(entry, base_addr) {
274 const struct xattr_handler *handler =
275 f2fs_xattr_handler(entry->e_name_index);
276 size_t size;
277
278 if (!handler)
279 continue;
280
281 size = handler->list(dentry, buffer, rest, entry->e_name,
282 entry->e_name_len, handler->flags);
283 if (buffer && size > rest) {
284 error = -ERANGE;
285 goto cleanup;
286 }
287
288 if (buffer)
289 buffer += size;
290 rest -= size;
291 }
292 error = buffer_size - rest;
293cleanup:
294 f2fs_put_page(page, 1);
295 return error;
296}
297
298int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
299 const void *value, size_t value_len)
300{
301 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
302 struct f2fs_inode_info *fi = F2FS_I(inode);
303 struct f2fs_xattr_header *header = NULL;
304 struct f2fs_xattr_entry *here, *last;
305 struct page *page;
306 void *base_addr;
307 int error, found, free, name_len, newsize;
308 char *pval;
309
310 if (name == NULL)
311 return -EINVAL;
312 name_len = strlen(name);
313
314 if (value == NULL)
315 value_len = 0;
316
317 if (name_len > 255 || value_len > MAX_VALUE_LEN)
318 return -ERANGE;
319
320 mutex_lock_op(sbi, NODE_NEW);
321 if (!fi->i_xattr_nid) {
322 /* Allocate new attribute block */
323 struct dnode_of_data dn;
324
325 if (!alloc_nid(sbi, &fi->i_xattr_nid)) {
326 mutex_unlock_op(sbi, NODE_NEW);
327 return -ENOSPC;
328 }
329 set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid);
330 mark_inode_dirty(inode);
331
332 page = new_node_page(&dn, XATTR_NODE_OFFSET);
333 if (IS_ERR(page)) {
334 alloc_nid_failed(sbi, fi->i_xattr_nid);
335 fi->i_xattr_nid = 0;
336 mutex_unlock_op(sbi, NODE_NEW);
337 return PTR_ERR(page);
338 }
339
340 alloc_nid_done(sbi, fi->i_xattr_nid);
341 base_addr = page_address(page);
342 header = XATTR_HDR(base_addr);
343 header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC);
344 header->h_refcount = cpu_to_le32(1);
345 } else {
346 /* The inode already has an extended attribute block. */
347 page = get_node_page(sbi, fi->i_xattr_nid);
348 if (IS_ERR(page)) {
349 mutex_unlock_op(sbi, NODE_NEW);
350 return PTR_ERR(page);
351 }
352
353 base_addr = page_address(page);
354 header = XATTR_HDR(base_addr);
355 }
356
357 if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) {
358 error = -EIO;
359 goto cleanup;
360 }
361
362 /* find entry with wanted name. */
363 found = 0;
364 list_for_each_xattr(here, base_addr) {
365 if (here->e_name_index != name_index)
366 continue;
367 if (here->e_name_len != name_len)
368 continue;
369 if (!memcmp(here->e_name, name, name_len)) {
370 found = 1;
371 break;
372 }
373 }
374
375 last = here;
376
377 while (!IS_XATTR_LAST_ENTRY(last))
378 last = XATTR_NEXT_ENTRY(last);
379
380 newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) +
381 name_len + value_len);
382
383 /* 1. Check space */
384 if (value) {
385 /* If value is NULL, it is remove operation.
386 * In case of update operation, we caculate free.
387 */
388 free = MIN_OFFSET - ((char *)last - (char *)header);
389 if (found)
390 free = free - ENTRY_SIZE(here);
391
392 if (free < newsize) {
393 error = -ENOSPC;
394 goto cleanup;
395 }
396 }
397
398 /* 2. Remove old entry */
399 if (found) {
400 /* If entry is found, remove old entry.
401 * If not found, remove operation is not needed.
402 */
403 struct f2fs_xattr_entry *next = XATTR_NEXT_ENTRY(here);
404 int oldsize = ENTRY_SIZE(here);
405
406 memmove(here, next, (char *)last - (char *)next);
407 last = (struct f2fs_xattr_entry *)((char *)last - oldsize);
408 memset(last, 0, oldsize);
409 }
410
411 /* 3. Write new entry */
412 if (value) {
413 /* Before we come here, old entry is removed.
414 * We just write new entry. */
415 memset(last, 0, newsize);
416 last->e_name_index = name_index;
417 last->e_name_len = name_len;
418 memcpy(last->e_name, name, name_len);
419 pval = last->e_name + name_len;
420 memcpy(pval, value, value_len);
421 last->e_value_size = cpu_to_le16(value_len);
422 }
423
424 set_page_dirty(page);
425 f2fs_put_page(page, 1);
426
427 if (is_inode_flag_set(fi, FI_ACL_MODE)) {
428 inode->i_mode = fi->i_acl_mode;
429 inode->i_ctime = CURRENT_TIME;
430 clear_inode_flag(fi, FI_ACL_MODE);
431 }
432 f2fs_write_inode(inode, NULL);
433 mutex_unlock_op(sbi, NODE_NEW);
434
435 return 0;
436cleanup:
437 f2fs_put_page(page, 1);
438 mutex_unlock_op(sbi, NODE_NEW);
439 return error;
440}
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
new file mode 100644
index 000000000000..49c9558305e3
--- /dev/null
+++ b/fs/f2fs/xattr.h
@@ -0,0 +1,145 @@
1/*
2 * fs/f2fs/xattr.h
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * Portions of this code from linux/fs/ext2/xattr.h
8 *
9 * On-disk format of extended attributes for the ext2 filesystem.
10 *
11 * (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License version 2 as
15 * published by the Free Software Foundation.
16 */
17#ifndef __F2FS_XATTR_H__
18#define __F2FS_XATTR_H__
19
20#include <linux/init.h>
21#include <linux/xattr.h>
22
23/* Magic value in attribute blocks */
24#define F2FS_XATTR_MAGIC 0xF2F52011
25
26/* Maximum number of references to one attribute block */
27#define F2FS_XATTR_REFCOUNT_MAX 1024
28
29/* Name indexes */
30#define F2FS_SYSTEM_ADVISE_PREFIX "system.advise"
31#define F2FS_XATTR_INDEX_USER 1
32#define F2FS_XATTR_INDEX_POSIX_ACL_ACCESS 2
33#define F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT 3
34#define F2FS_XATTR_INDEX_TRUSTED 4
35#define F2FS_XATTR_INDEX_LUSTRE 5
36#define F2FS_XATTR_INDEX_SECURITY 6
37#define F2FS_XATTR_INDEX_ADVISE 7
38
39struct f2fs_xattr_header {
40 __le32 h_magic; /* magic number for identification */
41 __le32 h_refcount; /* reference count */
42 __u32 h_reserved[4]; /* zero right now */
43};
44
45struct f2fs_xattr_entry {
46 __u8 e_name_index;
47 __u8 e_name_len;
48 __le16 e_value_size; /* size of attribute value */
49 char e_name[0]; /* attribute name */
50};
51
52#define XATTR_HDR(ptr) ((struct f2fs_xattr_header *)(ptr))
53#define XATTR_ENTRY(ptr) ((struct f2fs_xattr_entry *)(ptr))
54#define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr)+1))
55#define XATTR_ROUND (3)
56
57#define XATTR_ALIGN(size) ((size + XATTR_ROUND) & ~XATTR_ROUND)
58
59#define ENTRY_SIZE(entry) (XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + \
60 entry->e_name_len + le16_to_cpu(entry->e_value_size)))
61
62#define XATTR_NEXT_ENTRY(entry) ((struct f2fs_xattr_entry *)((char *)(entry) +\
63 ENTRY_SIZE(entry)))
64
65#define IS_XATTR_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
66
67#define list_for_each_xattr(entry, addr) \
68 for (entry = XATTR_FIRST_ENTRY(addr);\
69 !IS_XATTR_LAST_ENTRY(entry);\
70 entry = XATTR_NEXT_ENTRY(entry))
71
72
73#define MIN_OFFSET XATTR_ALIGN(PAGE_SIZE - \
74 sizeof(struct node_footer) - \
75 sizeof(__u32))
76
77#define MAX_VALUE_LEN (MIN_OFFSET - sizeof(struct f2fs_xattr_header) - \
78 sizeof(struct f2fs_xattr_entry))
79
80/*
81 * On-disk structure of f2fs_xattr
82 * We use only 1 block for xattr.
83 *
84 * +--------------------+
85 * | f2fs_xattr_header |
86 * | |
87 * +--------------------+
88 * | f2fs_xattr_entry |
89 * | .e_name_index = 1 |
90 * | .e_name_len = 3 |
91 * | .e_value_size = 14 |
92 * | .e_name = "foo" |
93 * | "value_of_xattr" |<- value_offs = e_name + e_name_len
94 * +--------------------+
95 * | f2fs_xattr_entry |
96 * | .e_name_index = 4 |
97 * | .e_name = "bar" |
98 * +--------------------+
99 * | |
100 * | Free |
101 * | |
102 * +--------------------+<- MIN_OFFSET
103 * | node_footer |
104 * | (nid, ino, offset) |
105 * +--------------------+
106 *
107 **/
108
109#ifdef CONFIG_F2FS_FS_XATTR
110extern const struct xattr_handler f2fs_xattr_user_handler;
111extern const struct xattr_handler f2fs_xattr_trusted_handler;
112extern const struct xattr_handler f2fs_xattr_acl_access_handler;
113extern const struct xattr_handler f2fs_xattr_acl_default_handler;
114extern const struct xattr_handler f2fs_xattr_advise_handler;
115
116extern const struct xattr_handler *f2fs_xattr_handlers[];
117
118extern int f2fs_setxattr(struct inode *inode, int name_index, const char *name,
119 const void *value, size_t value_len);
120extern int f2fs_getxattr(struct inode *inode, int name_index, const char *name,
121 void *buffer, size_t buffer_size);
122extern ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer,
123 size_t buffer_size);
124
125#else
126
127#define f2fs_xattr_handlers NULL
128static inline int f2fs_setxattr(struct inode *inode, int name_index,
129 const char *name, const void *value, size_t value_len)
130{
131 return -EOPNOTSUPP;
132}
133static inline int f2fs_getxattr(struct inode *inode, int name_index,
134 const char *name, void *buffer, size_t buffer_size)
135{
136 return -EOPNOTSUPP;
137}
138static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer,
139 size_t buffer_size)
140{
141 return -EOPNOTSUPP;
142}
143#endif
144
145#endif /* __F2FS_XATTR_H__ */
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
new file mode 100644
index 000000000000..f9a12f6243a5
--- /dev/null
+++ b/include/linux/f2fs_fs.h
@@ -0,0 +1,413 @@
1/**
2 * include/linux/f2fs_fs.h
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#ifndef _LINUX_F2FS_FS_H
12#define _LINUX_F2FS_FS_H
13
14#include <linux/pagemap.h>
15#include <linux/types.h>
16
17#define F2FS_SUPER_OFFSET 1024 /* byte-size offset */
18#define F2FS_LOG_SECTOR_SIZE 9 /* 9 bits for 512 byte */
19#define F2FS_LOG_SECTORS_PER_BLOCK 3 /* 4KB: F2FS_BLKSIZE */
20#define F2FS_BLKSIZE 4096 /* support only 4KB block */
21#define F2FS_MAX_EXTENSION 64 /* # of extension entries */
22
23#define NULL_ADDR 0x0U
24#define NEW_ADDR -1U
25
26#define F2FS_ROOT_INO(sbi) (sbi->root_ino_num)
27#define F2FS_NODE_INO(sbi) (sbi->node_ino_num)
28#define F2FS_META_INO(sbi) (sbi->meta_ino_num)
29
30/* This flag is used by node and meta inodes, and by recovery */
31#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO)
32
33/*
34 * For further optimization on multi-head logs, on-disk layout supports maximum
35 * 16 logs by default. The number, 16, is expected to cover all the cases
36 * enoughly. The implementaion currently uses no more than 6 logs.
37 * Half the logs are used for nodes, and the other half are used for data.
38 */
39#define MAX_ACTIVE_LOGS 16
40#define MAX_ACTIVE_NODE_LOGS 8
41#define MAX_ACTIVE_DATA_LOGS 8
42
43/*
44 * For superblock
45 */
46struct f2fs_super_block {
47 __le32 magic; /* Magic Number */
48 __le16 major_ver; /* Major Version */
49 __le16 minor_ver; /* Minor Version */
50 __le32 log_sectorsize; /* log2 sector size in bytes */
51 __le32 log_sectors_per_block; /* log2 # of sectors per block */
52 __le32 log_blocksize; /* log2 block size in bytes */
53 __le32 log_blocks_per_seg; /* log2 # of blocks per segment */
54 __le32 segs_per_sec; /* # of segments per section */
55 __le32 secs_per_zone; /* # of sections per zone */
56 __le32 checksum_offset; /* checksum offset inside super block */
57 __le64 block_count; /* total # of user blocks */
58 __le32 section_count; /* total # of sections */
59 __le32 segment_count; /* total # of segments */
60 __le32 segment_count_ckpt; /* # of segments for checkpoint */
61 __le32 segment_count_sit; /* # of segments for SIT */
62 __le32 segment_count_nat; /* # of segments for NAT */
63 __le32 segment_count_ssa; /* # of segments for SSA */
64 __le32 segment_count_main; /* # of segments for main area */
65 __le32 segment0_blkaddr; /* start block address of segment 0 */
66 __le32 cp_blkaddr; /* start block address of checkpoint */
67 __le32 sit_blkaddr; /* start block address of SIT */
68 __le32 nat_blkaddr; /* start block address of NAT */
69 __le32 ssa_blkaddr; /* start block address of SSA */
70 __le32 main_blkaddr; /* start block address of main area */
71 __le32 root_ino; /* root inode number */
72 __le32 node_ino; /* node inode number */
73 __le32 meta_ino; /* meta inode number */
74 __u8 uuid[16]; /* 128-bit uuid for volume */
75 __le16 volume_name[512]; /* volume name */
76 __le32 extension_count; /* # of extensions below */
77 __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */
78} __packed;
79
80/*
81 * For checkpoint
82 */
83#define CP_ERROR_FLAG 0x00000008
84#define CP_COMPACT_SUM_FLAG 0x00000004
85#define CP_ORPHAN_PRESENT_FLAG 0x00000002
86#define CP_UMOUNT_FLAG 0x00000001
87
88struct f2fs_checkpoint {
89 __le64 checkpoint_ver; /* checkpoint block version number */
90 __le64 user_block_count; /* # of user blocks */
91 __le64 valid_block_count; /* # of valid blocks in main area */
92 __le32 rsvd_segment_count; /* # of reserved segments for gc */
93 __le32 overprov_segment_count; /* # of overprovision segments */
94 __le32 free_segment_count; /* # of free segments in main area */
95
96 /* information of current node segments */
97 __le32 cur_node_segno[MAX_ACTIVE_NODE_LOGS];
98 __le16 cur_node_blkoff[MAX_ACTIVE_NODE_LOGS];
99 /* information of current data segments */
100 __le32 cur_data_segno[MAX_ACTIVE_DATA_LOGS];
101 __le16 cur_data_blkoff[MAX_ACTIVE_DATA_LOGS];
102 __le32 ckpt_flags; /* Flags : umount and journal_present */
103 __le32 cp_pack_total_block_count; /* total # of one cp pack */
104 __le32 cp_pack_start_sum; /* start block number of data summary */
105 __le32 valid_node_count; /* Total number of valid nodes */
106 __le32 valid_inode_count; /* Total number of valid inodes */
107 __le32 next_free_nid; /* Next free node number */
108 __le32 sit_ver_bitmap_bytesize; /* Default value 64 */
109 __le32 nat_ver_bitmap_bytesize; /* Default value 256 */
110 __le32 checksum_offset; /* checksum offset inside cp block */
111 __le64 elapsed_time; /* mounted time */
112 /* allocation type of current segment */
113 unsigned char alloc_type[MAX_ACTIVE_LOGS];
114
115 /* SIT and NAT version bitmap */
116 unsigned char sit_nat_version_bitmap[1];
117} __packed;
118
119/*
120 * For orphan inode management
121 */
122#define F2FS_ORPHANS_PER_BLOCK 1020
123
124struct f2fs_orphan_block {
125 __le32 ino[F2FS_ORPHANS_PER_BLOCK]; /* inode numbers */
126 __le32 reserved; /* reserved */
127 __le16 blk_addr; /* block index in current CP */
128 __le16 blk_count; /* Number of orphan inode blocks in CP */
129 __le32 entry_count; /* Total number of orphan nodes in current CP */
130 __le32 check_sum; /* CRC32 for orphan inode block */
131} __packed;
132
133/*
134 * For NODE structure
135 */
136struct f2fs_extent {
137 __le32 fofs; /* start file offset of the extent */
138 __le32 blk_addr; /* start block address of the extent */
139 __le32 len; /* lengh of the extent */
140} __packed;
141
142#define F2FS_MAX_NAME_LEN 256
143#define ADDRS_PER_INODE 923 /* Address Pointers in an Inode */
144#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */
145#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */
146
147struct f2fs_inode {
148 __le16 i_mode; /* file mode */
149 __u8 i_advise; /* file hints */
150 __u8 i_reserved; /* reserved */
151 __le32 i_uid; /* user ID */
152 __le32 i_gid; /* group ID */
153 __le32 i_links; /* links count */
154 __le64 i_size; /* file size in bytes */
155 __le64 i_blocks; /* file size in blocks */
156 __le64 i_atime; /* access time */
157 __le64 i_ctime; /* change time */
158 __le64 i_mtime; /* modification time */
159 __le32 i_atime_nsec; /* access time in nano scale */
160 __le32 i_ctime_nsec; /* change time in nano scale */
161 __le32 i_mtime_nsec; /* modification time in nano scale */
162 __le32 i_generation; /* file version (for NFS) */
163 __le32 i_current_depth; /* only for directory depth */
164 __le32 i_xattr_nid; /* nid to save xattr */
165 __le32 i_flags; /* file attributes */
166 __le32 i_pino; /* parent inode number */
167 __le32 i_namelen; /* file name length */
168 __u8 i_name[F2FS_MAX_NAME_LEN]; /* file name for SPOR */
169
170 struct f2fs_extent i_ext; /* caching a largest extent */
171
172 __le32 i_addr[ADDRS_PER_INODE]; /* Pointers to data blocks */
173
174 __le32 i_nid[5]; /* direct(2), indirect(2),
175 double_indirect(1) node id */
176} __packed;
177
178struct direct_node {
179 __le32 addr[ADDRS_PER_BLOCK]; /* array of data block address */
180} __packed;
181
182struct indirect_node {
183 __le32 nid[NIDS_PER_BLOCK]; /* array of data block address */
184} __packed;
185
186enum {
187 COLD_BIT_SHIFT = 0,
188 FSYNC_BIT_SHIFT,
189 DENT_BIT_SHIFT,
190 OFFSET_BIT_SHIFT
191};
192
193struct node_footer {
194 __le32 nid; /* node id */
195 __le32 ino; /* inode nunmber */
196 __le32 flag; /* include cold/fsync/dentry marks and offset */
197 __le64 cp_ver; /* checkpoint version */
198 __le32 next_blkaddr; /* next node page block address */
199} __packed;
200
201struct f2fs_node {
202 /* can be one of three types: inode, direct, and indirect types */
203 union {
204 struct f2fs_inode i;
205 struct direct_node dn;
206 struct indirect_node in;
207 };
208 struct node_footer footer;
209} __packed;
210
211/*
212 * For NAT entries
213 */
214#define NAT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_nat_entry))
215
216struct f2fs_nat_entry {
217 __u8 version; /* latest version of cached nat entry */
218 __le32 ino; /* inode number */
219 __le32 block_addr; /* block address */
220} __packed;
221
222struct f2fs_nat_block {
223 struct f2fs_nat_entry entries[NAT_ENTRY_PER_BLOCK];
224} __packed;
225
226/*
227 * For SIT entries
228 *
229 * Each segment is 2MB in size by default so that a bitmap for validity of
230 * there-in blocks should occupy 64 bytes, 512 bits.
231 * Not allow to change this.
232 */
233#define SIT_VBLOCK_MAP_SIZE 64
234#define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry))
235
236/*
237 * Note that f2fs_sit_entry->vblocks has the following bit-field information.
238 * [15:10] : allocation type such as CURSEG_XXXX_TYPE
239 * [9:0] : valid block count
240 */
241#define SIT_VBLOCKS_SHIFT 10
242#define SIT_VBLOCKS_MASK ((1 << SIT_VBLOCKS_SHIFT) - 1)
243#define GET_SIT_VBLOCKS(raw_sit) \
244 (le16_to_cpu((raw_sit)->vblocks) & SIT_VBLOCKS_MASK)
245#define GET_SIT_TYPE(raw_sit) \
246 ((le16_to_cpu((raw_sit)->vblocks) & ~SIT_VBLOCKS_MASK) \
247 >> SIT_VBLOCKS_SHIFT)
248
249struct f2fs_sit_entry {
250 __le16 vblocks; /* reference above */
251 __u8 valid_map[SIT_VBLOCK_MAP_SIZE]; /* bitmap for valid blocks */
252 __le64 mtime; /* segment age for cleaning */
253} __packed;
254
255struct f2fs_sit_block {
256 struct f2fs_sit_entry entries[SIT_ENTRY_PER_BLOCK];
257} __packed;
258
259/*
260 * For segment summary
261 *
262 * One summary block contains exactly 512 summary entries, which represents
263 * exactly 2MB segment by default. Not allow to change the basic units.
264 *
265 * NOTE: For initializing fields, you must use set_summary
266 *
267 * - If data page, nid represents dnode's nid
268 * - If node page, nid represents the node page's nid.
269 *
270 * The ofs_in_node is used by only data page. It represents offset
271 * from node's page's beginning to get a data block address.
272 * ex) data_blkaddr = (block_t)(nodepage_start_address + ofs_in_node)
273 */
274#define ENTRIES_IN_SUM 512
275#define SUMMARY_SIZE (7) /* sizeof(struct summary) */
276#define SUM_FOOTER_SIZE (5) /* sizeof(struct summary_footer) */
277#define SUM_ENTRY_SIZE (SUMMARY_SIZE * ENTRIES_IN_SUM)
278
279/* a summary entry for a 4KB-sized block in a segment */
280struct f2fs_summary {
281 __le32 nid; /* parent node id */
282 union {
283 __u8 reserved[3];
284 struct {
285 __u8 version; /* node version number */
286 __le16 ofs_in_node; /* block index in parent node */
287 } __packed;
288 };
289} __packed;
290
291/* summary block type, node or data, is stored to the summary_footer */
292#define SUM_TYPE_NODE (1)
293#define SUM_TYPE_DATA (0)
294
295struct summary_footer {
296 unsigned char entry_type; /* SUM_TYPE_XXX */
297 __u32 check_sum; /* summary checksum */
298} __packed;
299
300#define SUM_JOURNAL_SIZE (F2FS_BLKSIZE - SUM_FOOTER_SIZE -\
301 SUM_ENTRY_SIZE)
302#define NAT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\
303 sizeof(struct nat_journal_entry))
304#define NAT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\
305 sizeof(struct nat_journal_entry))
306#define SIT_JOURNAL_ENTRIES ((SUM_JOURNAL_SIZE - 2) /\
307 sizeof(struct sit_journal_entry))
308#define SIT_JOURNAL_RESERVED ((SUM_JOURNAL_SIZE - 2) %\
309 sizeof(struct sit_journal_entry))
310/*
311 * frequently updated NAT/SIT entries can be stored in the spare area in
312 * summary blocks
313 */
314enum {
315 NAT_JOURNAL = 0,
316 SIT_JOURNAL
317};
318
319struct nat_journal_entry {
320 __le32 nid;
321 struct f2fs_nat_entry ne;
322} __packed;
323
324struct nat_journal {
325 struct nat_journal_entry entries[NAT_JOURNAL_ENTRIES];
326 __u8 reserved[NAT_JOURNAL_RESERVED];
327} __packed;
328
329struct sit_journal_entry {
330 __le32 segno;
331 struct f2fs_sit_entry se;
332} __packed;
333
334struct sit_journal {
335 struct sit_journal_entry entries[SIT_JOURNAL_ENTRIES];
336 __u8 reserved[SIT_JOURNAL_RESERVED];
337} __packed;
338
339/* 4KB-sized summary block structure */
340struct f2fs_summary_block {
341 struct f2fs_summary entries[ENTRIES_IN_SUM];
342 union {
343 __le16 n_nats;
344 __le16 n_sits;
345 };
346 /* spare area is used by NAT or SIT journals */
347 union {
348 struct nat_journal nat_j;
349 struct sit_journal sit_j;
350 };
351 struct summary_footer footer;
352} __packed;
353
354/*
355 * For directory operations
356 */
357#define F2FS_DOT_HASH 0
358#define F2FS_DDOT_HASH F2FS_DOT_HASH
359#define F2FS_MAX_HASH (~((0x3ULL) << 62))
360#define F2FS_HASH_COL_BIT ((0x1ULL) << 63)
361
362typedef __le32 f2fs_hash_t;
363
364/* One directory entry slot covers 8bytes-long file name */
365#define F2FS_NAME_LEN 8
366#define F2FS_NAME_LEN_BITS 3
367
368#define GET_DENTRY_SLOTS(x) ((x + F2FS_NAME_LEN - 1) >> F2FS_NAME_LEN_BITS)
369
370/* the number of dentry in a block */
371#define NR_DENTRY_IN_BLOCK 214
372
373/* MAX level for dir lookup */
374#define MAX_DIR_HASH_DEPTH 63
375
376#define SIZE_OF_DIR_ENTRY 11 /* by byte */
377#define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \
378 BITS_PER_BYTE)
379#define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \
380 F2FS_NAME_LEN) * \
381 NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP))
382
383/* One directory entry slot representing F2FS_NAME_LEN-sized file name */
384struct f2fs_dir_entry {
385 __le32 hash_code; /* hash code of file name */
386 __le32 ino; /* inode number */
387 __le16 name_len; /* lengh of file name */
388 __u8 file_type; /* file type */
389} __packed;
390
391/* 4KB-sized directory entry block */
392struct f2fs_dentry_block {
393 /* validity bitmap for directory entries in each block */
394 __u8 dentry_bitmap[SIZE_OF_DENTRY_BITMAP];
395 __u8 reserved[SIZE_OF_RESERVED];
396 struct f2fs_dir_entry dentry[NR_DENTRY_IN_BLOCK];
397 __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_NAME_LEN];
398} __packed;
399
400/* file types used in inode_info->flags */
401enum {
402 F2FS_FT_UNKNOWN,
403 F2FS_FT_REG_FILE,
404 F2FS_FT_DIR,
405 F2FS_FT_CHRDEV,
406 F2FS_FT_BLKDEV,
407 F2FS_FT_FIFO,
408 F2FS_FT_SOCK,
409 F2FS_FT_SYMLINK,
410 F2FS_FT_MAX
411};
412
413#endif /* _LINUX_F2FS_FS_H */
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 12f68c7ceba6..873e086ce3a1 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -23,6 +23,7 @@
23#define EXT4_SUPER_MAGIC 0xEF53 23#define EXT4_SUPER_MAGIC 0xEF53
24#define BTRFS_SUPER_MAGIC 0x9123683E 24#define BTRFS_SUPER_MAGIC 0x9123683E
25#define NILFS_SUPER_MAGIC 0x3434 25#define NILFS_SUPER_MAGIC 0x3434
26#define F2FS_SUPER_MAGIC 0xF2F52010
26#define HPFS_SUPER_MAGIC 0xf995e849 27#define HPFS_SUPER_MAGIC 0xf995e849
27#define ISOFS_SUPER_MAGIC 0x9660 28#define ISOFS_SUPER_MAGIC 0x9660
28#define JFFS2_SUPER_MAGIC 0x72b6 29#define JFFS2_SUPER_MAGIC 0x72b6