aboutsummaryrefslogtreecommitdiffstats
path: root/fs/f2fs
diff options
context:
space:
mode:
authorJaegeuk Kim <jaegeuk.kim@samsung.com>2012-11-02 04:09:16 -0400
committerJaegeuk Kim <jaegeuk.kim@samsung.com>2012-12-10 23:43:40 -0500
commit351df4b201157351c7d26bf12c3eeb9dbce98854 (patch)
tree54c06e4c6d4369f8786292e2556d6f1b70c94ab3 /fs/f2fs
parente05df3b115e7308afbca652769b54e4549fcc723 (diff)
f2fs: add segment operations
This adds specific functions not only to manage dirty/free segments, SIT pages, a cache for SIT entries, and summary entries, but also to allocate free blocks and write three types of pages: data, node, and meta. - F2FS maintains three types of bitmaps in memory, which indicate free, prefree, and dirty segments respectively. - The key information of an SIT entry consists of a segment number, the number of valid blocks in the segment, a bitmap to identify there-in valid or invalid blocks. - An SIT page is composed of a certain range of SIT entries, which is maintained by the address space of meta_inode. - To cache SIT entries, a simple array is used. The index for the array is the segment number. - A summary entry for data contains the parent node information. A summary entry for node contains its node offset from the inode. - F2FS manages information about six active logs and those summary entries in memory. Whenever one of them is changed, its summary entries are flushed to its SIT page maintained by the address space of meta_inode. - This patch adds a default block allocation function which supports heap-based allocation policy. - This patch adds core functions to write data, node, and meta pages. Since LFS basically produces a series of sequential writes, F2FS merges sequential bios with a single one as much as possible to reduce the IO scheduling overhead. Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
Diffstat (limited to 'fs/f2fs')
-rw-r--r--fs/f2fs/segment.c1798
1 files changed, 1798 insertions, 0 deletions
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
new file mode 100644
index 00000000000..ed7c079cfc7
--- /dev/null
+++ b/fs/f2fs/segment.c
@@ -0,0 +1,1798 @@
1/**
2 * fs/f2fs/segment.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/bio.h>
14#include <linux/blkdev.h>
15#include <linux/vmalloc.h>
16
17#include "f2fs.h"
18#include "segment.h"
19#include "node.h"
20
21static int need_to_flush(struct f2fs_sb_info *sbi)
22{
23 unsigned int pages_per_sec = (1 << sbi->log_blocks_per_seg) *
24 sbi->segs_per_sec;
25 int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1)
26 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
27 int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1)
28 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
29
30 if (sbi->por_doing)
31 return 0;
32
33 if (free_sections(sbi) <= (node_secs + 2 * dent_secs +
34 reserved_sections(sbi)))
35 return 1;
36 return 0;
37}
38
39/**
40 * This function balances dirty node and dentry pages.
41 * In addition, it controls garbage collection.
42 */
43void f2fs_balance_fs(struct f2fs_sb_info *sbi)
44{
45 struct writeback_control wbc = {
46 .sync_mode = WB_SYNC_ALL,
47 .nr_to_write = LONG_MAX,
48 .for_reclaim = 0,
49 };
50
51 if (sbi->por_doing)
52 return;
53
54 /*
55 * We should do checkpoint when there are so many dirty node pages
56 * with enough free segments. After then, we should do GC.
57 */
58 if (need_to_flush(sbi)) {
59 sync_dirty_dir_inodes(sbi);
60 sync_node_pages(sbi, 0, &wbc);
61 }
62
63 if (has_not_enough_free_secs(sbi)) {
64 mutex_lock(&sbi->gc_mutex);
65 f2fs_gc(sbi, 1);
66 }
67}
68
69static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
70 enum dirty_type dirty_type)
71{
72 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
73
74 /* need not be added */
75 if (IS_CURSEG(sbi, segno))
76 return;
77
78 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
79 dirty_i->nr_dirty[dirty_type]++;
80
81 if (dirty_type == DIRTY) {
82 struct seg_entry *sentry = get_seg_entry(sbi, segno);
83 dirty_type = sentry->type;
84 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
85 dirty_i->nr_dirty[dirty_type]++;
86 }
87}
88
89static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
90 enum dirty_type dirty_type)
91{
92 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
93
94 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
95 dirty_i->nr_dirty[dirty_type]--;
96
97 if (dirty_type == DIRTY) {
98 struct seg_entry *sentry = get_seg_entry(sbi, segno);
99 dirty_type = sentry->type;
100 if (test_and_clear_bit(segno,
101 dirty_i->dirty_segmap[dirty_type]))
102 dirty_i->nr_dirty[dirty_type]--;
103 clear_bit(segno, dirty_i->victim_segmap[FG_GC]);
104 clear_bit(segno, dirty_i->victim_segmap[BG_GC]);
105 }
106}
107
108/**
109 * Should not occur error such as -ENOMEM.
110 * Adding dirty entry into seglist is not critical operation.
111 * If a given segment is one of current working segments, it won't be added.
112 */
113void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
114{
115 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
116 unsigned short valid_blocks;
117
118 if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
119 return;
120
121 mutex_lock(&dirty_i->seglist_lock);
122
123 valid_blocks = get_valid_blocks(sbi, segno, 0);
124
125 if (valid_blocks == 0) {
126 __locate_dirty_segment(sbi, segno, PRE);
127 __remove_dirty_segment(sbi, segno, DIRTY);
128 } else if (valid_blocks < sbi->blocks_per_seg) {
129 __locate_dirty_segment(sbi, segno, DIRTY);
130 } else {
131 /* Recovery routine with SSR needs this */
132 __remove_dirty_segment(sbi, segno, DIRTY);
133 }
134
135 mutex_unlock(&dirty_i->seglist_lock);
136 return;
137}
138
139/**
140 * Should call clear_prefree_segments after checkpoint is done.
141 */
142static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
143{
144 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
145 unsigned int segno, offset = 0;
146 unsigned int total_segs = TOTAL_SEGS(sbi);
147
148 mutex_lock(&dirty_i->seglist_lock);
149 while (1) {
150 segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
151 offset);
152 if (segno >= total_segs)
153 break;
154 __set_test_and_free(sbi, segno);
155 offset = segno + 1;
156 }
157 mutex_unlock(&dirty_i->seglist_lock);
158}
159
160void clear_prefree_segments(struct f2fs_sb_info *sbi)
161{
162 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
163 unsigned int segno, offset = 0;
164 unsigned int total_segs = TOTAL_SEGS(sbi);
165
166 mutex_lock(&dirty_i->seglist_lock);
167 while (1) {
168 segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs,
169 offset);
170 if (segno >= total_segs)
171 break;
172
173 offset = segno + 1;
174 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE]))
175 dirty_i->nr_dirty[PRE]--;
176
177 /* Let's use trim */
178 if (test_opt(sbi, DISCARD))
179 blkdev_issue_discard(sbi->sb->s_bdev,
180 START_BLOCK(sbi, segno) <<
181 sbi->log_sectors_per_block,
182 1 << (sbi->log_sectors_per_block +
183 sbi->log_blocks_per_seg),
184 GFP_NOFS, 0);
185 }
186 mutex_unlock(&dirty_i->seglist_lock);
187}
188
189static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
190{
191 struct sit_info *sit_i = SIT_I(sbi);
192 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap))
193 sit_i->dirty_sentries++;
194}
195
196static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
197 unsigned int segno, int modified)
198{
199 struct seg_entry *se = get_seg_entry(sbi, segno);
200 se->type = type;
201 if (modified)
202 __mark_sit_entry_dirty(sbi, segno);
203}
204
205static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
206{
207 struct seg_entry *se;
208 unsigned int segno, offset;
209 long int new_vblocks;
210
211 segno = GET_SEGNO(sbi, blkaddr);
212
213 se = get_seg_entry(sbi, segno);
214 new_vblocks = se->valid_blocks + del;
215 offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1);
216
217 BUG_ON((new_vblocks >> (sizeof(unsigned short) << 3) ||
218 (new_vblocks > sbi->blocks_per_seg)));
219
220 se->valid_blocks = new_vblocks;
221 se->mtime = get_mtime(sbi);
222 SIT_I(sbi)->max_mtime = se->mtime;
223
224 /* Update valid block bitmap */
225 if (del > 0) {
226 if (f2fs_set_bit(offset, se->cur_valid_map))
227 BUG();
228 } else {
229 if (!f2fs_clear_bit(offset, se->cur_valid_map))
230 BUG();
231 }
232 if (!f2fs_test_bit(offset, se->ckpt_valid_map))
233 se->ckpt_valid_blocks += del;
234
235 __mark_sit_entry_dirty(sbi, segno);
236
237 /* update total number of valid blocks to be written in ckpt area */
238 SIT_I(sbi)->written_valid_blocks += del;
239
240 if (sbi->segs_per_sec > 1)
241 get_sec_entry(sbi, segno)->valid_blocks += del;
242}
243
244static void refresh_sit_entry(struct f2fs_sb_info *sbi,
245 block_t old_blkaddr, block_t new_blkaddr)
246{
247 update_sit_entry(sbi, new_blkaddr, 1);
248 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
249 update_sit_entry(sbi, old_blkaddr, -1);
250}
251
252void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
253{
254 unsigned int segno = GET_SEGNO(sbi, addr);
255 struct sit_info *sit_i = SIT_I(sbi);
256
257 BUG_ON(addr == NULL_ADDR);
258 if (addr == NEW_ADDR)
259 return;
260
261 /* add it into sit main buffer */
262 mutex_lock(&sit_i->sentry_lock);
263
264 update_sit_entry(sbi, addr, -1);
265
266 /* add it into dirty seglist */
267 locate_dirty_segment(sbi, segno);
268
269 mutex_unlock(&sit_i->sentry_lock);
270}
271
272/**
273 * This function should be resided under the curseg_mutex lock
274 */
275static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
276 struct f2fs_summary *sum, unsigned short offset)
277{
278 struct curseg_info *curseg = CURSEG_I(sbi, type);
279 void *addr = curseg->sum_blk;
280 addr += offset * sizeof(struct f2fs_summary);
281 memcpy(addr, sum, sizeof(struct f2fs_summary));
282 return;
283}
284
285/**
286 * Calculate the number of current summary pages for writing
287 */
288int npages_for_summary_flush(struct f2fs_sb_info *sbi)
289{
290 int total_size_bytes = 0;
291 int valid_sum_count = 0;
292 int i, sum_space;
293
294 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
295 if (sbi->ckpt->alloc_type[i] == SSR)
296 valid_sum_count += sbi->blocks_per_seg;
297 else
298 valid_sum_count += curseg_blkoff(sbi, i);
299 }
300
301 total_size_bytes = valid_sum_count * (SUMMARY_SIZE + 1)
302 + sizeof(struct nat_journal) + 2
303 + sizeof(struct sit_journal) + 2;
304 sum_space = PAGE_CACHE_SIZE - SUM_FOOTER_SIZE;
305 if (total_size_bytes < sum_space)
306 return 1;
307 else if (total_size_bytes < 2 * sum_space)
308 return 2;
309 return 3;
310}
311
312/**
313 * Caller should put this summary page
314 */
315struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
316{
317 return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
318}
319
320static void write_sum_page(struct f2fs_sb_info *sbi,
321 struct f2fs_summary_block *sum_blk, block_t blk_addr)
322{
323 struct page *page = grab_meta_page(sbi, blk_addr);
324 void *kaddr = page_address(page);
325 memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE);
326 set_page_dirty(page);
327 f2fs_put_page(page, 1);
328}
329
330static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi,
331 int ofs_unit, int type)
332{
333 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
334 unsigned long *prefree_segmap = dirty_i->dirty_segmap[PRE];
335 unsigned int segno, next_segno, i;
336 int ofs = 0;
337
338 /*
339 * If there is not enough reserved sections,
340 * we should not reuse prefree segments.
341 */
342 if (has_not_enough_free_secs(sbi))
343 return NULL_SEGNO;
344
345 /*
346 * NODE page should not reuse prefree segment,
347 * since those information is used for SPOR.
348 */
349 if (IS_NODESEG(type))
350 return NULL_SEGNO;
351next:
352 segno = find_next_bit(prefree_segmap, TOTAL_SEGS(sbi), ofs++);
353 ofs = ((segno / ofs_unit) * ofs_unit) + ofs_unit;
354 if (segno < TOTAL_SEGS(sbi)) {
355 /* skip intermediate segments in a section */
356 if (segno % ofs_unit)
357 goto next;
358
359 /* skip if whole section is not prefree */
360 next_segno = find_next_zero_bit(prefree_segmap,
361 TOTAL_SEGS(sbi), segno + 1);
362 if (next_segno - segno < ofs_unit)
363 goto next;
364
365 /* skip if whole section was not free at the last checkpoint */
366 for (i = 0; i < ofs_unit; i++)
367 if (get_seg_entry(sbi, segno)->ckpt_valid_blocks)
368 goto next;
369 return segno;
370 }
371 return NULL_SEGNO;
372}
373
374/**
375 * Find a new segment from the free segments bitmap to right order
376 * This function should be returned with success, otherwise BUG
377 */
378static void get_new_segment(struct f2fs_sb_info *sbi,
379 unsigned int *newseg, bool new_sec, int dir)
380{
381 struct free_segmap_info *free_i = FREE_I(sbi);
382 unsigned int total_secs = sbi->total_sections;
383 unsigned int segno, secno, zoneno;
384 unsigned int total_zones = sbi->total_sections / sbi->secs_per_zone;
385 unsigned int hint = *newseg / sbi->segs_per_sec;
386 unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
387 unsigned int left_start = hint;
388 bool init = true;
389 int go_left = 0;
390 int i;
391
392 write_lock(&free_i->segmap_lock);
393
394 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
395 segno = find_next_zero_bit(free_i->free_segmap,
396 TOTAL_SEGS(sbi), *newseg + 1);
397 if (segno < TOTAL_SEGS(sbi))
398 goto got_it;
399 }
400find_other_zone:
401 secno = find_next_zero_bit(free_i->free_secmap, total_secs, hint);
402 if (secno >= total_secs) {
403 if (dir == ALLOC_RIGHT) {
404 secno = find_next_zero_bit(free_i->free_secmap,
405 total_secs, 0);
406 BUG_ON(secno >= total_secs);
407 } else {
408 go_left = 1;
409 left_start = hint - 1;
410 }
411 }
412 if (go_left == 0)
413 goto skip_left;
414
415 while (test_bit(left_start, free_i->free_secmap)) {
416 if (left_start > 0) {
417 left_start--;
418 continue;
419 }
420 left_start = find_next_zero_bit(free_i->free_secmap,
421 total_secs, 0);
422 BUG_ON(left_start >= total_secs);
423 break;
424 }
425 secno = left_start;
426skip_left:
427 hint = secno;
428 segno = secno * sbi->segs_per_sec;
429 zoneno = secno / sbi->secs_per_zone;
430
431 /* give up on finding another zone */
432 if (!init)
433 goto got_it;
434 if (sbi->secs_per_zone == 1)
435 goto got_it;
436 if (zoneno == old_zoneno)
437 goto got_it;
438 if (dir == ALLOC_LEFT) {
439 if (!go_left && zoneno + 1 >= total_zones)
440 goto got_it;
441 if (go_left && zoneno == 0)
442 goto got_it;
443 }
444 for (i = 0; i < NR_CURSEG_TYPE; i++)
445 if (CURSEG_I(sbi, i)->zone == zoneno)
446 break;
447
448 if (i < NR_CURSEG_TYPE) {
449 /* zone is in user, try another */
450 if (go_left)
451 hint = zoneno * sbi->secs_per_zone - 1;
452 else if (zoneno + 1 >= total_zones)
453 hint = 0;
454 else
455 hint = (zoneno + 1) * sbi->secs_per_zone;
456 init = false;
457 goto find_other_zone;
458 }
459got_it:
460 /* set it as dirty segment in free segmap */
461 BUG_ON(test_bit(segno, free_i->free_segmap));
462 __set_inuse(sbi, segno);
463 *newseg = segno;
464 write_unlock(&free_i->segmap_lock);
465}
466
467static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
468{
469 struct curseg_info *curseg = CURSEG_I(sbi, type);
470 struct summary_footer *sum_footer;
471
472 curseg->segno = curseg->next_segno;
473 curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno);
474 curseg->next_blkoff = 0;
475 curseg->next_segno = NULL_SEGNO;
476
477 sum_footer = &(curseg->sum_blk->footer);
478 memset(sum_footer, 0, sizeof(struct summary_footer));
479 if (IS_DATASEG(type))
480 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
481 if (IS_NODESEG(type))
482 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
483 __set_sit_entry_type(sbi, type, curseg->segno, modified);
484}
485
486/**
487 * Allocate a current working segment.
488 * This function always allocates a free segment in LFS manner.
489 */
490static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
491{
492 struct curseg_info *curseg = CURSEG_I(sbi, type);
493 unsigned int segno = curseg->segno;
494 int dir = ALLOC_LEFT;
495
496 write_sum_page(sbi, curseg->sum_blk,
497 GET_SUM_BLOCK(sbi, curseg->segno));
498 if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
499 dir = ALLOC_RIGHT;
500
501 if (test_opt(sbi, NOHEAP))
502 dir = ALLOC_RIGHT;
503
504 get_new_segment(sbi, &segno, new_sec, dir);
505 curseg->next_segno = segno;
506 reset_curseg(sbi, type, 1);
507 curseg->alloc_type = LFS;
508}
509
510static void __next_free_blkoff(struct f2fs_sb_info *sbi,
511 struct curseg_info *seg, block_t start)
512{
513 struct seg_entry *se = get_seg_entry(sbi, seg->segno);
514 block_t ofs;
515 for (ofs = start; ofs < sbi->blocks_per_seg; ofs++) {
516 if (!f2fs_test_bit(ofs, se->ckpt_valid_map)
517 && !f2fs_test_bit(ofs, se->cur_valid_map))
518 break;
519 }
520 seg->next_blkoff = ofs;
521}
522
523/**
524 * If a segment is written by LFS manner, next block offset is just obtained
525 * by increasing the current block offset. However, if a segment is written by
526 * SSR manner, next block offset obtained by calling __next_free_blkoff
527 */
528static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
529 struct curseg_info *seg)
530{
531 if (seg->alloc_type == SSR)
532 __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
533 else
534 seg->next_blkoff++;
535}
536
537/**
538 * This function always allocates a used segment (from dirty seglist) by SSR
539 * manner, so it should recover the existing segment information of valid blocks
540 */
541static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
542{
543 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
544 struct curseg_info *curseg = CURSEG_I(sbi, type);
545 unsigned int new_segno = curseg->next_segno;
546 struct f2fs_summary_block *sum_node;
547 struct page *sum_page;
548
549 write_sum_page(sbi, curseg->sum_blk,
550 GET_SUM_BLOCK(sbi, curseg->segno));
551 __set_test_and_inuse(sbi, new_segno);
552
553 mutex_lock(&dirty_i->seglist_lock);
554 __remove_dirty_segment(sbi, new_segno, PRE);
555 __remove_dirty_segment(sbi, new_segno, DIRTY);
556 mutex_unlock(&dirty_i->seglist_lock);
557
558 reset_curseg(sbi, type, 1);
559 curseg->alloc_type = SSR;
560 __next_free_blkoff(sbi, curseg, 0);
561
562 if (reuse) {
563 sum_page = get_sum_page(sbi, new_segno);
564 sum_node = (struct f2fs_summary_block *)page_address(sum_page);
565 memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
566 f2fs_put_page(sum_page, 1);
567 }
568}
569
570/*
571 * flush out current segment and replace it with new segment
572 * This function should be returned with success, otherwise BUG
573 */
574static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
575 int type, bool force)
576{
577 struct curseg_info *curseg = CURSEG_I(sbi, type);
578 unsigned int ofs_unit;
579
580 if (force) {
581 new_curseg(sbi, type, true);
582 goto out;
583 }
584
585 ofs_unit = need_SSR(sbi) ? 1 : sbi->segs_per_sec;
586 curseg->next_segno = check_prefree_segments(sbi, ofs_unit, type);
587
588 if (curseg->next_segno != NULL_SEGNO)
589 change_curseg(sbi, type, false);
590 else if (type == CURSEG_WARM_NODE)
591 new_curseg(sbi, type, false);
592 else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
593 change_curseg(sbi, type, true);
594 else
595 new_curseg(sbi, type, false);
596out:
597 sbi->segment_count[curseg->alloc_type]++;
598}
599
600void allocate_new_segments(struct f2fs_sb_info *sbi)
601{
602 struct curseg_info *curseg;
603 unsigned int old_curseg;
604 int i;
605
606 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
607 curseg = CURSEG_I(sbi, i);
608 old_curseg = curseg->segno;
609 SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
610 locate_dirty_segment(sbi, old_curseg);
611 }
612}
613
614static const struct segment_allocation default_salloc_ops = {
615 .allocate_segment = allocate_segment_by_default,
616};
617
618static void f2fs_end_io_write(struct bio *bio, int err)
619{
620 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
621 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
622 struct bio_private *p = bio->bi_private;
623
624 do {
625 struct page *page = bvec->bv_page;
626
627 if (--bvec >= bio->bi_io_vec)
628 prefetchw(&bvec->bv_page->flags);
629 if (!uptodate) {
630 SetPageError(page);
631 if (page->mapping)
632 set_bit(AS_EIO, &page->mapping->flags);
633 p->sbi->ckpt->ckpt_flags |= CP_ERROR_FLAG;
634 set_page_dirty(page);
635 }
636 end_page_writeback(page);
637 dec_page_count(p->sbi, F2FS_WRITEBACK);
638 } while (bvec >= bio->bi_io_vec);
639
640 if (p->is_sync)
641 complete(p->wait);
642 kfree(p);
643 bio_put(bio);
644}
645
646struct bio *f2fs_bio_alloc(struct block_device *bdev, sector_t first_sector,
647 int nr_vecs, gfp_t gfp_flags)
648{
649 struct bio *bio;
650repeat:
651 /* allocate new bio */
652 bio = bio_alloc(gfp_flags, nr_vecs);
653
654 if (bio == NULL && (current->flags & PF_MEMALLOC)) {
655 while (!bio && (nr_vecs /= 2))
656 bio = bio_alloc(gfp_flags, nr_vecs);
657 }
658 if (bio) {
659 bio->bi_bdev = bdev;
660 bio->bi_sector = first_sector;
661retry:
662 bio->bi_private = kmalloc(sizeof(struct bio_private),
663 GFP_NOFS | __GFP_HIGH);
664 if (!bio->bi_private) {
665 cond_resched();
666 goto retry;
667 }
668 }
669 if (bio == NULL) {
670 cond_resched();
671 goto repeat;
672 }
673 return bio;
674}
675
676static void do_submit_bio(struct f2fs_sb_info *sbi,
677 enum page_type type, bool sync)
678{
679 int rw = sync ? WRITE_SYNC : WRITE;
680 enum page_type btype = type > META ? META : type;
681
682 if (type >= META_FLUSH)
683 rw = WRITE_FLUSH_FUA;
684
685 if (sbi->bio[btype]) {
686 struct bio_private *p = sbi->bio[btype]->bi_private;
687 p->sbi = sbi;
688 sbi->bio[btype]->bi_end_io = f2fs_end_io_write;
689 if (type == META_FLUSH) {
690 DECLARE_COMPLETION_ONSTACK(wait);
691 p->is_sync = true;
692 p->wait = &wait;
693 submit_bio(rw, sbi->bio[btype]);
694 wait_for_completion(&wait);
695 } else {
696 p->is_sync = false;
697 submit_bio(rw, sbi->bio[btype]);
698 }
699 sbi->bio[btype] = NULL;
700 }
701}
702
703void f2fs_submit_bio(struct f2fs_sb_info *sbi, enum page_type type, bool sync)
704{
705 down_write(&sbi->bio_sem);
706 do_submit_bio(sbi, type, sync);
707 up_write(&sbi->bio_sem);
708}
709
710static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page,
711 block_t blk_addr, enum page_type type)
712{
713 struct block_device *bdev = sbi->sb->s_bdev;
714
715 verify_block_addr(sbi, blk_addr);
716
717 down_write(&sbi->bio_sem);
718
719 inc_page_count(sbi, F2FS_WRITEBACK);
720
721 if (sbi->bio[type] && sbi->last_block_in_bio[type] != blk_addr - 1)
722 do_submit_bio(sbi, type, false);
723alloc_new:
724 if (sbi->bio[type] == NULL)
725 sbi->bio[type] = f2fs_bio_alloc(bdev,
726 blk_addr << (sbi->log_blocksize - 9),
727 bio_get_nr_vecs(bdev), GFP_NOFS | __GFP_HIGH);
728
729 if (bio_add_page(sbi->bio[type], page, PAGE_CACHE_SIZE, 0) <
730 PAGE_CACHE_SIZE) {
731 do_submit_bio(sbi, type, false);
732 goto alloc_new;
733 }
734
735 sbi->last_block_in_bio[type] = blk_addr;
736
737 up_write(&sbi->bio_sem);
738}
739
740static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
741{
742 struct curseg_info *curseg = CURSEG_I(sbi, type);
743 if (curseg->next_blkoff < sbi->blocks_per_seg)
744 return true;
745 return false;
746}
747
748static int __get_segment_type_2(struct page *page, enum page_type p_type)
749{
750 if (p_type == DATA)
751 return CURSEG_HOT_DATA;
752 else
753 return CURSEG_HOT_NODE;
754}
755
756static int __get_segment_type_4(struct page *page, enum page_type p_type)
757{
758 if (p_type == DATA) {
759 struct inode *inode = page->mapping->host;
760
761 if (S_ISDIR(inode->i_mode))
762 return CURSEG_HOT_DATA;
763 else
764 return CURSEG_COLD_DATA;
765 } else {
766 if (IS_DNODE(page) && !is_cold_node(page))
767 return CURSEG_HOT_NODE;
768 else
769 return CURSEG_COLD_NODE;
770 }
771}
772
773static int __get_segment_type_6(struct page *page, enum page_type p_type)
774{
775 if (p_type == DATA) {
776 struct inode *inode = page->mapping->host;
777
778 if (S_ISDIR(inode->i_mode))
779 return CURSEG_HOT_DATA;
780 else if (is_cold_data(page) || is_cold_file(inode))
781 return CURSEG_COLD_DATA;
782 else
783 return CURSEG_WARM_DATA;
784 } else {
785 if (IS_DNODE(page))
786 return is_cold_node(page) ? CURSEG_WARM_NODE :
787 CURSEG_HOT_NODE;
788 else
789 return CURSEG_COLD_NODE;
790 }
791}
792
793static int __get_segment_type(struct page *page, enum page_type p_type)
794{
795 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
796 switch (sbi->active_logs) {
797 case 2:
798 return __get_segment_type_2(page, p_type);
799 case 4:
800 return __get_segment_type_4(page, p_type);
801 case 6:
802 return __get_segment_type_6(page, p_type);
803 default:
804 BUG();
805 }
806}
807
808static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
809 block_t old_blkaddr, block_t *new_blkaddr,
810 struct f2fs_summary *sum, enum page_type p_type)
811{
812 struct sit_info *sit_i = SIT_I(sbi);
813 struct curseg_info *curseg;
814 unsigned int old_cursegno;
815 int type;
816
817 type = __get_segment_type(page, p_type);
818 curseg = CURSEG_I(sbi, type);
819
820 mutex_lock(&curseg->curseg_mutex);
821
822 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
823 old_cursegno = curseg->segno;
824
825 /*
826 * __add_sum_entry should be resided under the curseg_mutex
827 * because, this function updates a summary entry in the
828 * current summary block.
829 */
830 __add_sum_entry(sbi, type, sum, curseg->next_blkoff);
831
832 mutex_lock(&sit_i->sentry_lock);
833 __refresh_next_blkoff(sbi, curseg);
834 sbi->block_count[curseg->alloc_type]++;
835
836 /*
837 * SIT information should be updated before segment allocation,
838 * since SSR needs latest valid block information.
839 */
840 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
841
842 if (!__has_curseg_space(sbi, type))
843 sit_i->s_ops->allocate_segment(sbi, type, false);
844
845 locate_dirty_segment(sbi, old_cursegno);
846 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
847 mutex_unlock(&sit_i->sentry_lock);
848
849 if (p_type == NODE)
850 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
851
852 /* writeout dirty page into bdev */
853 submit_write_page(sbi, page, *new_blkaddr, p_type);
854
855 mutex_unlock(&curseg->curseg_mutex);
856}
857
858int write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
859 struct writeback_control *wbc)
860{
861 if (wbc->for_reclaim)
862 return AOP_WRITEPAGE_ACTIVATE;
863
864 set_page_writeback(page);
865 submit_write_page(sbi, page, page->index, META);
866 return 0;
867}
868
869void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
870 unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr)
871{
872 struct f2fs_summary sum;
873 set_summary(&sum, nid, 0, 0);
874 do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, NODE);
875}
876
877void write_data_page(struct inode *inode, struct page *page,
878 struct dnode_of_data *dn, block_t old_blkaddr,
879 block_t *new_blkaddr)
880{
881 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
882 struct f2fs_summary sum;
883 struct node_info ni;
884
885 BUG_ON(old_blkaddr == NULL_ADDR);
886 get_node_info(sbi, dn->nid, &ni);
887 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
888
889 do_write_page(sbi, page, old_blkaddr,
890 new_blkaddr, &sum, DATA);
891}
892
893void rewrite_data_page(struct f2fs_sb_info *sbi, struct page *page,
894 block_t old_blk_addr)
895{
896 submit_write_page(sbi, page, old_blk_addr, DATA);
897}
898
899void recover_data_page(struct f2fs_sb_info *sbi,
900 struct page *page, struct f2fs_summary *sum,
901 block_t old_blkaddr, block_t new_blkaddr)
902{
903 struct sit_info *sit_i = SIT_I(sbi);
904 struct curseg_info *curseg;
905 unsigned int segno, old_cursegno;
906 struct seg_entry *se;
907 int type;
908
909 segno = GET_SEGNO(sbi, new_blkaddr);
910 se = get_seg_entry(sbi, segno);
911 type = se->type;
912
913 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
914 if (old_blkaddr == NULL_ADDR)
915 type = CURSEG_COLD_DATA;
916 else
917 type = CURSEG_WARM_DATA;
918 }
919 curseg = CURSEG_I(sbi, type);
920
921 mutex_lock(&curseg->curseg_mutex);
922 mutex_lock(&sit_i->sentry_lock);
923
924 old_cursegno = curseg->segno;
925
926 /* change the current segment */
927 if (segno != curseg->segno) {
928 curseg->next_segno = segno;
929 change_curseg(sbi, type, true);
930 }
931
932 curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
933 (sbi->blocks_per_seg - 1);
934 __add_sum_entry(sbi, type, sum, curseg->next_blkoff);
935
936 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
937
938 locate_dirty_segment(sbi, old_cursegno);
939 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
940
941 mutex_unlock(&sit_i->sentry_lock);
942 mutex_unlock(&curseg->curseg_mutex);
943}
944
945void rewrite_node_page(struct f2fs_sb_info *sbi,
946 struct page *page, struct f2fs_summary *sum,
947 block_t old_blkaddr, block_t new_blkaddr)
948{
949 struct sit_info *sit_i = SIT_I(sbi);
950 int type = CURSEG_WARM_NODE;
951 struct curseg_info *curseg;
952 unsigned int segno, old_cursegno;
953 block_t next_blkaddr = next_blkaddr_of_node(page);
954 unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr);
955
956 curseg = CURSEG_I(sbi, type);
957
958 mutex_lock(&curseg->curseg_mutex);
959 mutex_lock(&sit_i->sentry_lock);
960
961 segno = GET_SEGNO(sbi, new_blkaddr);
962 old_cursegno = curseg->segno;
963
964 /* change the current segment */
965 if (segno != curseg->segno) {
966 curseg->next_segno = segno;
967 change_curseg(sbi, type, true);
968 }
969 curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) &
970 (sbi->blocks_per_seg - 1);
971 __add_sum_entry(sbi, type, sum, curseg->next_blkoff);
972
973 /* change the current log to the next block addr in advance */
974 if (next_segno != segno) {
975 curseg->next_segno = next_segno;
976 change_curseg(sbi, type, true);
977 }
978 curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) &
979 (sbi->blocks_per_seg - 1);
980
981 /* rewrite node page */
982 set_page_writeback(page);
983 submit_write_page(sbi, page, new_blkaddr, NODE);
984 f2fs_submit_bio(sbi, NODE, true);
985 refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
986
987 locate_dirty_segment(sbi, old_cursegno);
988 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
989
990 mutex_unlock(&sit_i->sentry_lock);
991 mutex_unlock(&curseg->curseg_mutex);
992}
993
994static int read_compacted_summaries(struct f2fs_sb_info *sbi)
995{
996 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
997 struct curseg_info *seg_i;
998 unsigned char *kaddr;
999 struct page *page;
1000 block_t start;
1001 int i, j, offset;
1002
1003 start = start_sum_block(sbi);
1004
1005 page = get_meta_page(sbi, start++);
1006 kaddr = (unsigned char *)page_address(page);
1007
1008 /* Step 1: restore nat cache */
1009 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1010 memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE);
1011
1012 /* Step 2: restore sit cache */
1013 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1014 memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE,
1015 SUM_JOURNAL_SIZE);
1016 offset = 2 * SUM_JOURNAL_SIZE;
1017
1018 /* Step 3: restore summary entries */
1019 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1020 unsigned short blk_off;
1021 unsigned int segno;
1022
1023 seg_i = CURSEG_I(sbi, i);
1024 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
1025 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
1026 seg_i->next_segno = segno;
1027 reset_curseg(sbi, i, 0);
1028 seg_i->alloc_type = ckpt->alloc_type[i];
1029 seg_i->next_blkoff = blk_off;
1030
1031 if (seg_i->alloc_type == SSR)
1032 blk_off = sbi->blocks_per_seg;
1033
1034 for (j = 0; j < blk_off; j++) {
1035 struct f2fs_summary *s;
1036 s = (struct f2fs_summary *)(kaddr + offset);
1037 seg_i->sum_blk->entries[j] = *s;
1038 offset += SUMMARY_SIZE;
1039 if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1040 SUM_FOOTER_SIZE)
1041 continue;
1042
1043 f2fs_put_page(page, 1);
1044 page = NULL;
1045
1046 page = get_meta_page(sbi, start++);
1047 kaddr = (unsigned char *)page_address(page);
1048 offset = 0;
1049 }
1050 }
1051 f2fs_put_page(page, 1);
1052 return 0;
1053}
1054
1055static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1056{
1057 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1058 struct f2fs_summary_block *sum;
1059 struct curseg_info *curseg;
1060 struct page *new;
1061 unsigned short blk_off;
1062 unsigned int segno = 0;
1063 block_t blk_addr = 0;
1064
1065 /* get segment number and block addr */
1066 if (IS_DATASEG(type)) {
1067 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
1068 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
1069 CURSEG_HOT_DATA]);
1070 if (ckpt->ckpt_flags & CP_UMOUNT_FLAG)
1071 blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
1072 else
1073 blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
1074 } else {
1075 segno = le32_to_cpu(ckpt->cur_node_segno[type -
1076 CURSEG_HOT_NODE]);
1077 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
1078 CURSEG_HOT_NODE]);
1079 if (ckpt->ckpt_flags & CP_UMOUNT_FLAG)
1080 blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
1081 type - CURSEG_HOT_NODE);
1082 else
1083 blk_addr = GET_SUM_BLOCK(sbi, segno);
1084 }
1085
1086 new = get_meta_page(sbi, blk_addr);
1087 sum = (struct f2fs_summary_block *)page_address(new);
1088
1089 if (IS_NODESEG(type)) {
1090 if (ckpt->ckpt_flags & CP_UMOUNT_FLAG) {
1091 struct f2fs_summary *ns = &sum->entries[0];
1092 int i;
1093 for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
1094 ns->version = 0;
1095 ns->ofs_in_node = 0;
1096 }
1097 } else {
1098 if (restore_node_summary(sbi, segno, sum)) {
1099 f2fs_put_page(new, 1);
1100 return -EINVAL;
1101 }
1102 }
1103 }
1104
1105 /* set uncompleted segment to curseg */
1106 curseg = CURSEG_I(sbi, type);
1107 mutex_lock(&curseg->curseg_mutex);
1108 memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE);
1109 curseg->next_segno = segno;
1110 reset_curseg(sbi, type, 0);
1111 curseg->alloc_type = ckpt->alloc_type[type];
1112 curseg->next_blkoff = blk_off;
1113 mutex_unlock(&curseg->curseg_mutex);
1114 f2fs_put_page(new, 1);
1115 return 0;
1116}
1117
1118static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1119{
1120 int type = CURSEG_HOT_DATA;
1121
1122 if (sbi->ckpt->ckpt_flags & CP_COMPACT_SUM_FLAG) {
1123 /* restore for compacted data summary */
1124 if (read_compacted_summaries(sbi))
1125 return -EINVAL;
1126 type = CURSEG_HOT_NODE;
1127 }
1128
1129 for (; type <= CURSEG_COLD_NODE; type++)
1130 if (read_normal_summaries(sbi, type))
1131 return -EINVAL;
1132 return 0;
1133}
1134
1135static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
1136{
1137 struct page *page;
1138 unsigned char *kaddr;
1139 struct f2fs_summary *summary;
1140 struct curseg_info *seg_i;
1141 int written_size = 0;
1142 int i, j;
1143
1144 page = grab_meta_page(sbi, blkaddr++);
1145 kaddr = (unsigned char *)page_address(page);
1146
1147 /* Step 1: write nat cache */
1148 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1149 memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE);
1150 written_size += SUM_JOURNAL_SIZE;
1151
1152 /* Step 2: write sit cache */
1153 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1154 memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits,
1155 SUM_JOURNAL_SIZE);
1156 written_size += SUM_JOURNAL_SIZE;
1157
1158 set_page_dirty(page);
1159
1160 /* Step 3: write summary entries */
1161 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1162 unsigned short blkoff;
1163 seg_i = CURSEG_I(sbi, i);
1164 if (sbi->ckpt->alloc_type[i] == SSR)
1165 blkoff = sbi->blocks_per_seg;
1166 else
1167 blkoff = curseg_blkoff(sbi, i);
1168
1169 for (j = 0; j < blkoff; j++) {
1170 if (!page) {
1171 page = grab_meta_page(sbi, blkaddr++);
1172 kaddr = (unsigned char *)page_address(page);
1173 written_size = 0;
1174 }
1175 summary = (struct f2fs_summary *)(kaddr + written_size);
1176 *summary = seg_i->sum_blk->entries[j];
1177 written_size += SUMMARY_SIZE;
1178 set_page_dirty(page);
1179
1180 if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1181 SUM_FOOTER_SIZE)
1182 continue;
1183
1184 f2fs_put_page(page, 1);
1185 page = NULL;
1186 }
1187 }
1188 if (page)
1189 f2fs_put_page(page, 1);
1190}
1191
1192static void write_normal_summaries(struct f2fs_sb_info *sbi,
1193 block_t blkaddr, int type)
1194{
1195 int i, end;
1196 if (IS_DATASEG(type))
1197 end = type + NR_CURSEG_DATA_TYPE;
1198 else
1199 end = type + NR_CURSEG_NODE_TYPE;
1200
1201 for (i = type; i < end; i++) {
1202 struct curseg_info *sum = CURSEG_I(sbi, i);
1203 mutex_lock(&sum->curseg_mutex);
1204 write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type));
1205 mutex_unlock(&sum->curseg_mutex);
1206 }
1207}
1208
1209void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1210{
1211 if (sbi->ckpt->ckpt_flags & CP_COMPACT_SUM_FLAG)
1212 write_compacted_summaries(sbi, start_blk);
1213 else
1214 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
1215}
1216
1217void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1218{
1219 if (sbi->ckpt->ckpt_flags & CP_UMOUNT_FLAG)
1220 write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
1221 return;
1222}
1223
1224int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
1225 unsigned int val, int alloc)
1226{
1227 int i;
1228
1229 if (type == NAT_JOURNAL) {
1230 for (i = 0; i < nats_in_cursum(sum); i++) {
1231 if (le32_to_cpu(nid_in_journal(sum, i)) == val)
1232 return i;
1233 }
1234 if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
1235 return update_nats_in_cursum(sum, 1);
1236 } else if (type == SIT_JOURNAL) {
1237 for (i = 0; i < sits_in_cursum(sum); i++)
1238 if (le32_to_cpu(segno_in_journal(sum, i)) == val)
1239 return i;
1240 if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
1241 return update_sits_in_cursum(sum, 1);
1242 }
1243 return -1;
1244}
1245
1246static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
1247 unsigned int segno)
1248{
1249 struct sit_info *sit_i = SIT_I(sbi);
1250 unsigned int offset = SIT_BLOCK_OFFSET(sit_i, segno);
1251 block_t blk_addr = sit_i->sit_base_addr + offset;
1252
1253 check_seg_range(sbi, segno);
1254
1255 /* calculate sit block address */
1256 if (f2fs_test_bit(offset, sit_i->sit_bitmap))
1257 blk_addr += sit_i->sit_blocks;
1258
1259 return get_meta_page(sbi, blk_addr);
1260}
1261
1262static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1263 unsigned int start)
1264{
1265 struct sit_info *sit_i = SIT_I(sbi);
1266 struct page *src_page, *dst_page;
1267 pgoff_t src_off, dst_off;
1268 void *src_addr, *dst_addr;
1269
1270 src_off = current_sit_addr(sbi, start);
1271 dst_off = next_sit_addr(sbi, src_off);
1272
1273 /* get current sit block page without lock */
1274 src_page = get_meta_page(sbi, src_off);
1275 dst_page = grab_meta_page(sbi, dst_off);
1276 BUG_ON(PageDirty(src_page));
1277
1278 src_addr = page_address(src_page);
1279 dst_addr = page_address(dst_page);
1280 memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
1281
1282 set_page_dirty(dst_page);
1283 f2fs_put_page(src_page, 1);
1284
1285 set_to_next_sit(sit_i, start);
1286
1287 return dst_page;
1288}
1289
1290static bool flush_sits_in_journal(struct f2fs_sb_info *sbi)
1291{
1292 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1293 struct f2fs_summary_block *sum = curseg->sum_blk;
1294 int i;
1295
1296 /*
1297 * If the journal area in the current summary is full of sit entries,
1298 * all the sit entries will be flushed. Otherwise the sit entries
1299 * are not able to replace with newly hot sit entries.
1300 */
1301 if (sits_in_cursum(sum) >= SIT_JOURNAL_ENTRIES) {
1302 for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
1303 unsigned int segno;
1304 segno = le32_to_cpu(segno_in_journal(sum, i));
1305 __mark_sit_entry_dirty(sbi, segno);
1306 }
1307 update_sits_in_cursum(sum, -sits_in_cursum(sum));
1308 return 1;
1309 }
1310 return 0;
1311}
1312
1313/**
1314 * CP calls this function, which flushes SIT entries including sit_journal,
1315 * and moves prefree segs to free segs.
1316 */
1317void flush_sit_entries(struct f2fs_sb_info *sbi)
1318{
1319 struct sit_info *sit_i = SIT_I(sbi);
1320 unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
1321 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1322 struct f2fs_summary_block *sum = curseg->sum_blk;
1323 unsigned long nsegs = TOTAL_SEGS(sbi);
1324 struct page *page = NULL;
1325 struct f2fs_sit_block *raw_sit = NULL;
1326 unsigned int start = 0, end = 0;
1327 unsigned int segno = -1;
1328 bool flushed;
1329
1330 mutex_lock(&curseg->curseg_mutex);
1331 mutex_lock(&sit_i->sentry_lock);
1332
1333 /*
1334 * "flushed" indicates whether sit entries in journal are flushed
1335 * to the SIT area or not.
1336 */
1337 flushed = flush_sits_in_journal(sbi);
1338
1339 while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) {
1340 struct seg_entry *se = get_seg_entry(sbi, segno);
1341 int sit_offset, offset;
1342
1343 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
1344
1345 if (flushed)
1346 goto to_sit_page;
1347
1348 offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1);
1349 if (offset >= 0) {
1350 segno_in_journal(sum, offset) = cpu_to_le32(segno);
1351 seg_info_to_raw_sit(se, &sit_in_journal(sum, offset));
1352 goto flush_done;
1353 }
1354to_sit_page:
1355 if (!page || (start > segno) || (segno > end)) {
1356 if (page) {
1357 f2fs_put_page(page, 1);
1358 page = NULL;
1359 }
1360
1361 start = START_SEGNO(sit_i, segno);
1362 end = start + SIT_ENTRY_PER_BLOCK - 1;
1363
1364 /* read sit block that will be updated */
1365 page = get_next_sit_page(sbi, start);
1366 raw_sit = page_address(page);
1367 }
1368
1369 /* udpate entry in SIT block */
1370 seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]);
1371flush_done:
1372 __clear_bit(segno, bitmap);
1373 sit_i->dirty_sentries--;
1374 }
1375 mutex_unlock(&sit_i->sentry_lock);
1376 mutex_unlock(&curseg->curseg_mutex);
1377
1378 /* writeout last modified SIT block */
1379 f2fs_put_page(page, 1);
1380
1381 set_prefree_as_free_segments(sbi);
1382}
1383
1384static int build_sit_info(struct f2fs_sb_info *sbi)
1385{
1386 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1387 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1388 struct sit_info *sit_i;
1389 unsigned int sit_segs, start;
1390 char *src_bitmap, *dst_bitmap;
1391 unsigned int bitmap_size;
1392
1393 /* allocate memory for SIT information */
1394 sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
1395 if (!sit_i)
1396 return -ENOMEM;
1397
1398 SM_I(sbi)->sit_info = sit_i;
1399
1400 sit_i->sentries = vzalloc(TOTAL_SEGS(sbi) * sizeof(struct seg_entry));
1401 if (!sit_i->sentries)
1402 return -ENOMEM;
1403
1404 bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1405 sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
1406 if (!sit_i->dirty_sentries_bitmap)
1407 return -ENOMEM;
1408
1409 for (start = 0; start < TOTAL_SEGS(sbi); start++) {
1410 sit_i->sentries[start].cur_valid_map
1411 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1412 sit_i->sentries[start].ckpt_valid_map
1413 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1414 if (!sit_i->sentries[start].cur_valid_map
1415 || !sit_i->sentries[start].ckpt_valid_map)
1416 return -ENOMEM;
1417 }
1418
1419 if (sbi->segs_per_sec > 1) {
1420 sit_i->sec_entries = vzalloc(sbi->total_sections *
1421 sizeof(struct sec_entry));
1422 if (!sit_i->sec_entries)
1423 return -ENOMEM;
1424 }
1425
1426 /* get information related with SIT */
1427 sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
1428
1429 /* setup SIT bitmap from ckeckpoint pack */
1430 bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
1431 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
1432
1433 dst_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
1434 if (!dst_bitmap)
1435 return -ENOMEM;
1436 memcpy(dst_bitmap, src_bitmap, bitmap_size);
1437
1438 /* init SIT information */
1439 sit_i->s_ops = &default_salloc_ops;
1440
1441 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
1442 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
1443 sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
1444 sit_i->sit_bitmap = dst_bitmap;
1445 sit_i->bitmap_size = bitmap_size;
1446 sit_i->dirty_sentries = 0;
1447 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
1448 sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
1449 sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
1450 mutex_init(&sit_i->sentry_lock);
1451 return 0;
1452}
1453
1454static int build_free_segmap(struct f2fs_sb_info *sbi)
1455{
1456 struct f2fs_sm_info *sm_info = SM_I(sbi);
1457 struct free_segmap_info *free_i;
1458 unsigned int bitmap_size, sec_bitmap_size;
1459
1460 /* allocate memory for free segmap information */
1461 free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
1462 if (!free_i)
1463 return -ENOMEM;
1464
1465 SM_I(sbi)->free_info = free_i;
1466
1467 bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1468 free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL);
1469 if (!free_i->free_segmap)
1470 return -ENOMEM;
1471
1472 sec_bitmap_size = f2fs_bitmap_size(sbi->total_sections);
1473 free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL);
1474 if (!free_i->free_secmap)
1475 return -ENOMEM;
1476
1477 /* set all segments as dirty temporarily */
1478 memset(free_i->free_segmap, 0xff, bitmap_size);
1479 memset(free_i->free_secmap, 0xff, sec_bitmap_size);
1480
1481 /* init free segmap information */
1482 free_i->start_segno =
1483 (unsigned int) GET_SEGNO_FROM_SEG0(sbi, sm_info->main_blkaddr);
1484 free_i->free_segments = 0;
1485 free_i->free_sections = 0;
1486 rwlock_init(&free_i->segmap_lock);
1487 return 0;
1488}
1489
1490static int build_curseg(struct f2fs_sb_info *sbi)
1491{
1492 struct curseg_info *array = NULL;
1493 int i;
1494
1495 array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL);
1496 if (!array)
1497 return -ENOMEM;
1498
1499 SM_I(sbi)->curseg_array = array;
1500
1501 for (i = 0; i < NR_CURSEG_TYPE; i++) {
1502 mutex_init(&array[i].curseg_mutex);
1503 array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
1504 if (!array[i].sum_blk)
1505 return -ENOMEM;
1506 array[i].segno = NULL_SEGNO;
1507 array[i].next_blkoff = 0;
1508 }
1509 return restore_curseg_summaries(sbi);
1510}
1511
1512static void build_sit_entries(struct f2fs_sb_info *sbi)
1513{
1514 struct sit_info *sit_i = SIT_I(sbi);
1515 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1516 struct f2fs_summary_block *sum = curseg->sum_blk;
1517 unsigned int start;
1518
1519 for (start = 0; start < TOTAL_SEGS(sbi); start++) {
1520 struct seg_entry *se = &sit_i->sentries[start];
1521 struct f2fs_sit_block *sit_blk;
1522 struct f2fs_sit_entry sit;
1523 struct page *page;
1524 int i;
1525
1526 mutex_lock(&curseg->curseg_mutex);
1527 for (i = 0; i < sits_in_cursum(sum); i++) {
1528 if (le32_to_cpu(segno_in_journal(sum, i)) == start) {
1529 sit = sit_in_journal(sum, i);
1530 mutex_unlock(&curseg->curseg_mutex);
1531 goto got_it;
1532 }
1533 }
1534 mutex_unlock(&curseg->curseg_mutex);
1535 page = get_current_sit_page(sbi, start);
1536 sit_blk = (struct f2fs_sit_block *)page_address(page);
1537 sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
1538 f2fs_put_page(page, 1);
1539got_it:
1540 check_block_count(sbi, start, &sit);
1541 seg_info_from_raw_sit(se, &sit);
1542 if (sbi->segs_per_sec > 1) {
1543 struct sec_entry *e = get_sec_entry(sbi, start);
1544 e->valid_blocks += se->valid_blocks;
1545 }
1546 }
1547}
1548
1549static void init_free_segmap(struct f2fs_sb_info *sbi)
1550{
1551 unsigned int start;
1552 int type;
1553
1554 for (start = 0; start < TOTAL_SEGS(sbi); start++) {
1555 struct seg_entry *sentry = get_seg_entry(sbi, start);
1556 if (!sentry->valid_blocks)
1557 __set_free(sbi, start);
1558 }
1559
1560 /* set use the current segments */
1561 for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
1562 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
1563 __set_test_and_inuse(sbi, curseg_t->segno);
1564 }
1565}
1566
1567static void init_dirty_segmap(struct f2fs_sb_info *sbi)
1568{
1569 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1570 struct free_segmap_info *free_i = FREE_I(sbi);
1571 unsigned int segno = 0, offset = 0;
1572 unsigned short valid_blocks;
1573
1574 while (segno < TOTAL_SEGS(sbi)) {
1575 /* find dirty segment based on free segmap */
1576 segno = find_next_inuse(free_i, TOTAL_SEGS(sbi), offset);
1577 if (segno >= TOTAL_SEGS(sbi))
1578 break;
1579 offset = segno + 1;
1580 valid_blocks = get_valid_blocks(sbi, segno, 0);
1581 if (valid_blocks >= sbi->blocks_per_seg || !valid_blocks)
1582 continue;
1583 mutex_lock(&dirty_i->seglist_lock);
1584 __locate_dirty_segment(sbi, segno, DIRTY);
1585 mutex_unlock(&dirty_i->seglist_lock);
1586 }
1587}
1588
1589static int init_victim_segmap(struct f2fs_sb_info *sbi)
1590{
1591 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1592 unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1593
1594 dirty_i->victim_segmap[FG_GC] = kzalloc(bitmap_size, GFP_KERNEL);
1595 dirty_i->victim_segmap[BG_GC] = kzalloc(bitmap_size, GFP_KERNEL);
1596 if (!dirty_i->victim_segmap[FG_GC] || !dirty_i->victim_segmap[BG_GC])
1597 return -ENOMEM;
1598 return 0;
1599}
1600
1601static int build_dirty_segmap(struct f2fs_sb_info *sbi)
1602{
1603 struct dirty_seglist_info *dirty_i;
1604 unsigned int bitmap_size, i;
1605
1606 /* allocate memory for dirty segments list information */
1607 dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
1608 if (!dirty_i)
1609 return -ENOMEM;
1610
1611 SM_I(sbi)->dirty_info = dirty_i;
1612 mutex_init(&dirty_i->seglist_lock);
1613
1614 bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1615
1616 for (i = 0; i < NR_DIRTY_TYPE; i++) {
1617 dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
1618 dirty_i->nr_dirty[i] = 0;
1619 if (!dirty_i->dirty_segmap[i])
1620 return -ENOMEM;
1621 }
1622
1623 init_dirty_segmap(sbi);
1624 return init_victim_segmap(sbi);
1625}
1626
1627/**
1628 * Update min, max modified time for cost-benefit GC algorithm
1629 */
1630static void init_min_max_mtime(struct f2fs_sb_info *sbi)
1631{
1632 struct sit_info *sit_i = SIT_I(sbi);
1633 unsigned int segno;
1634
1635 mutex_lock(&sit_i->sentry_lock);
1636
1637 sit_i->min_mtime = LLONG_MAX;
1638
1639 for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) {
1640 unsigned int i;
1641 unsigned long long mtime = 0;
1642
1643 for (i = 0; i < sbi->segs_per_sec; i++)
1644 mtime += get_seg_entry(sbi, segno + i)->mtime;
1645
1646 mtime = div_u64(mtime, sbi->segs_per_sec);
1647
1648 if (sit_i->min_mtime > mtime)
1649 sit_i->min_mtime = mtime;
1650 }
1651 sit_i->max_mtime = get_mtime(sbi);
1652 mutex_unlock(&sit_i->sentry_lock);
1653}
1654
1655int build_segment_manager(struct f2fs_sb_info *sbi)
1656{
1657 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1658 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1659 struct f2fs_sm_info *sm_info = NULL;
1660 int err;
1661
1662 sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
1663 if (!sm_info)
1664 return -ENOMEM;
1665
1666 /* init sm info */
1667 sbi->sm_info = sm_info;
1668 INIT_LIST_HEAD(&sm_info->wblist_head);
1669 spin_lock_init(&sm_info->wblist_lock);
1670 sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
1671 sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
1672 sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
1673 sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
1674 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
1675 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
1676 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
1677
1678 err = build_sit_info(sbi);
1679 if (err)
1680 return err;
1681 err = build_free_segmap(sbi);
1682 if (err)
1683 return err;
1684 err = build_curseg(sbi);
1685 if (err)
1686 return err;
1687
1688 /* reinit free segmap based on SIT */
1689 build_sit_entries(sbi);
1690
1691 init_free_segmap(sbi);
1692 err = build_dirty_segmap(sbi);
1693 if (err)
1694 return err;
1695
1696 init_min_max_mtime(sbi);
1697 return 0;
1698}
1699
1700static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
1701 enum dirty_type dirty_type)
1702{
1703 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1704
1705 mutex_lock(&dirty_i->seglist_lock);
1706 kfree(dirty_i->dirty_segmap[dirty_type]);
1707 dirty_i->nr_dirty[dirty_type] = 0;
1708 mutex_unlock(&dirty_i->seglist_lock);
1709}
1710
1711void reset_victim_segmap(struct f2fs_sb_info *sbi)
1712{
1713 unsigned int bitmap_size = f2fs_bitmap_size(TOTAL_SEGS(sbi));
1714 memset(DIRTY_I(sbi)->victim_segmap[FG_GC], 0, bitmap_size);
1715}
1716
1717static void destroy_victim_segmap(struct f2fs_sb_info *sbi)
1718{
1719 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1720
1721 kfree(dirty_i->victim_segmap[FG_GC]);
1722 kfree(dirty_i->victim_segmap[BG_GC]);
1723}
1724
1725static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
1726{
1727 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1728 int i;
1729
1730 if (!dirty_i)
1731 return;
1732
1733 /* discard pre-free/dirty segments list */
1734 for (i = 0; i < NR_DIRTY_TYPE; i++)
1735 discard_dirty_segmap(sbi, i);
1736
1737 destroy_victim_segmap(sbi);
1738 SM_I(sbi)->dirty_info = NULL;
1739 kfree(dirty_i);
1740}
1741
1742static void destroy_curseg(struct f2fs_sb_info *sbi)
1743{
1744 struct curseg_info *array = SM_I(sbi)->curseg_array;
1745 int i;
1746
1747 if (!array)
1748 return;
1749 SM_I(sbi)->curseg_array = NULL;
1750 for (i = 0; i < NR_CURSEG_TYPE; i++)
1751 kfree(array[i].sum_blk);
1752 kfree(array);
1753}
1754
1755static void destroy_free_segmap(struct f2fs_sb_info *sbi)
1756{
1757 struct free_segmap_info *free_i = SM_I(sbi)->free_info;
1758 if (!free_i)
1759 return;
1760 SM_I(sbi)->free_info = NULL;
1761 kfree(free_i->free_segmap);
1762 kfree(free_i->free_secmap);
1763 kfree(free_i);
1764}
1765
1766static void destroy_sit_info(struct f2fs_sb_info *sbi)
1767{
1768 struct sit_info *sit_i = SIT_I(sbi);
1769 unsigned int start;
1770
1771 if (!sit_i)
1772 return;
1773
1774 if (sit_i->sentries) {
1775 for (start = 0; start < TOTAL_SEGS(sbi); start++) {
1776 kfree(sit_i->sentries[start].cur_valid_map);
1777 kfree(sit_i->sentries[start].ckpt_valid_map);
1778 }
1779 }
1780 vfree(sit_i->sentries);
1781 vfree(sit_i->sec_entries);
1782 kfree(sit_i->dirty_sentries_bitmap);
1783
1784 SM_I(sbi)->sit_info = NULL;
1785 kfree(sit_i->sit_bitmap);
1786 kfree(sit_i);
1787}
1788
1789void destroy_segment_manager(struct f2fs_sb_info *sbi)
1790{
1791 struct f2fs_sm_info *sm_info = SM_I(sbi);
1792 destroy_dirty_segmap(sbi);
1793 destroy_curseg(sbi);
1794 destroy_free_segmap(sbi);
1795 destroy_sit_info(sbi);
1796 sbi->sm_info = NULL;
1797 kfree(sm_info);
1798}