aboutsummaryrefslogtreecommitdiffstats
path: root/fs/f2fs
diff options
context:
space:
mode:
authorJaegeuk Kim <jaegeuk.kim@samsung.com>2012-11-02 04:13:01 -0400
committerJaegeuk Kim <jaegeuk.kim@samsung.com>2012-12-10 23:43:41 -0500
commit7bc0900347e069a1676d28ad6f98cafaf8cfd6e9 (patch)
treed1c89e27d39e9137dbed44af293af8f7ce8da29e /fs/f2fs
parentaf48b85b8cd3fbb12c9b6759c16db6d69c0b03da (diff)
f2fs: add garbage collection functions
This adds on-demand and background cleaning functions. - The basic background cleaning policy is trying to do cleaning jobs as much as possible whenever the system is idle. Once the background cleaning is done, the cleaner sleeps an amount of time not to interfere with VFS calls. The time is dynamically adjusted according to the status of whole segments, which is decreased when the following conditions are satisfied. . GC is not conducted currently, and . IO subsystem is idle by checking the number of requets in bdev's request list, and . There are enough dirty segments. Otherwise, the time is increased incrementally until to the maximum time. Note that, min and max times are 10 secs and 30 secs by default. - F2FS adopts a default victim selection policy where background cleaning uses a cost-benefit algorithm, while on-demand cleaning uses a greedy algorithm. - The method of moving data during the cleaning is slightly different between background and on-demand cleaning schemes. In the case of background cleaning, F2FS loads the data, and marks them as dirty. Then, F2FS expects that the data will be moved by flusher or VM. In the case of on-demand cleaning, F2FS should move the data right away. - In order to identify valid blocks in a victim segment, F2FS scans the bitmap of the segment managed as an SIT entry. Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
Diffstat (limited to 'fs/f2fs')
-rw-r--r--fs/f2fs/gc.c742
-rw-r--r--fs/f2fs/gc.h117
2 files changed, 859 insertions, 0 deletions
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
new file mode 100644
index 000000000000..46774ce3ae03
--- /dev/null
+++ b/fs/f2fs/gc.c
@@ -0,0 +1,742 @@
1/**
2 * fs/f2fs/gc.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/module.h>
13#include <linux/backing-dev.h>
14#include <linux/proc_fs.h>
15#include <linux/init.h>
16#include <linux/f2fs_fs.h>
17#include <linux/kthread.h>
18#include <linux/delay.h>
19#include <linux/freezer.h>
20#include <linux/blkdev.h>
21
22#include "f2fs.h"
23#include "node.h"
24#include "segment.h"
25#include "gc.h"
26
27static struct kmem_cache *winode_slab;
28
29static int gc_thread_func(void *data)
30{
31 struct f2fs_sb_info *sbi = data;
32 wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
33 long wait_ms;
34
35 wait_ms = GC_THREAD_MIN_SLEEP_TIME;
36
37 do {
38 if (try_to_freeze())
39 continue;
40 else
41 wait_event_interruptible_timeout(*wq,
42 kthread_should_stop(),
43 msecs_to_jiffies(wait_ms));
44 if (kthread_should_stop())
45 break;
46
47 f2fs_balance_fs(sbi);
48
49 if (!test_opt(sbi, BG_GC))
50 continue;
51
52 /*
53 * [GC triggering condition]
54 * 0. GC is not conducted currently.
55 * 1. There are enough dirty segments.
56 * 2. IO subsystem is idle by checking the # of writeback pages.
57 * 3. IO subsystem is idle by checking the # of requests in
58 * bdev's request list.
59 *
60 * Note) We have to avoid triggering GCs too much frequently.
61 * Because it is possible that some segments can be
62 * invalidated soon after by user update or deletion.
63 * So, I'd like to wait some time to collect dirty segments.
64 */
65 if (!mutex_trylock(&sbi->gc_mutex))
66 continue;
67
68 if (!is_idle(sbi)) {
69 wait_ms = increase_sleep_time(wait_ms);
70 mutex_unlock(&sbi->gc_mutex);
71 continue;
72 }
73
74 if (has_enough_invalid_blocks(sbi))
75 wait_ms = decrease_sleep_time(wait_ms);
76 else
77 wait_ms = increase_sleep_time(wait_ms);
78
79 sbi->bg_gc++;
80
81 if (f2fs_gc(sbi, 1) == GC_NONE)
82 wait_ms = GC_THREAD_NOGC_SLEEP_TIME;
83 else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME)
84 wait_ms = GC_THREAD_MAX_SLEEP_TIME;
85
86 } while (!kthread_should_stop());
87 return 0;
88}
89
90int start_gc_thread(struct f2fs_sb_info *sbi)
91{
92 struct f2fs_gc_kthread *gc_th = NULL;
93
94 gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
95 if (!gc_th)
96 return -ENOMEM;
97
98 sbi->gc_thread = gc_th;
99 init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
100 sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
101 GC_THREAD_NAME);
102 if (IS_ERR(gc_th->f2fs_gc_task)) {
103 kfree(gc_th);
104 return -ENOMEM;
105 }
106 return 0;
107}
108
109void stop_gc_thread(struct f2fs_sb_info *sbi)
110{
111 struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
112 if (!gc_th)
113 return;
114 kthread_stop(gc_th->f2fs_gc_task);
115 kfree(gc_th);
116 sbi->gc_thread = NULL;
117}
118
119static int select_gc_type(int gc_type)
120{
121 return (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
122}
123
124static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
125 int type, struct victim_sel_policy *p)
126{
127 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
128
129 if (p->alloc_mode) {
130 p->gc_mode = GC_GREEDY;
131 p->dirty_segmap = dirty_i->dirty_segmap[type];
132 p->ofs_unit = 1;
133 } else {
134 p->gc_mode = select_gc_type(gc_type);
135 p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
136 p->ofs_unit = sbi->segs_per_sec;
137 }
138 p->offset = sbi->last_victim[p->gc_mode];
139}
140
141static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
142 struct victim_sel_policy *p)
143{
144 if (p->gc_mode == GC_GREEDY)
145 return (1 << sbi->log_blocks_per_seg) * p->ofs_unit;
146 else if (p->gc_mode == GC_CB)
147 return UINT_MAX;
148 else /* No other gc_mode */
149 return 0;
150}
151
152static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
153{
154 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
155 unsigned int segno;
156
157 /*
158 * If the gc_type is FG_GC, we can select victim segments
159 * selected by background GC before.
160 * Those segments guarantee they have small valid blocks.
161 */
162 segno = find_next_bit(dirty_i->victim_segmap[BG_GC],
163 TOTAL_SEGS(sbi), 0);
164 if (segno < TOTAL_SEGS(sbi)) {
165 clear_bit(segno, dirty_i->victim_segmap[BG_GC]);
166 return segno;
167 }
168 return NULL_SEGNO;
169}
170
171static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
172{
173 struct sit_info *sit_i = SIT_I(sbi);
174 unsigned int secno = GET_SECNO(sbi, segno);
175 unsigned int start = secno * sbi->segs_per_sec;
176 unsigned long long mtime = 0;
177 unsigned int vblocks;
178 unsigned char age = 0;
179 unsigned char u;
180 unsigned int i;
181
182 for (i = 0; i < sbi->segs_per_sec; i++)
183 mtime += get_seg_entry(sbi, start + i)->mtime;
184 vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
185
186 mtime = div_u64(mtime, sbi->segs_per_sec);
187 vblocks = div_u64(vblocks, sbi->segs_per_sec);
188
189 u = (vblocks * 100) >> sbi->log_blocks_per_seg;
190
191 /* Handle if the system time is changed by user */
192 if (mtime < sit_i->min_mtime)
193 sit_i->min_mtime = mtime;
194 if (mtime > sit_i->max_mtime)
195 sit_i->max_mtime = mtime;
196 if (sit_i->max_mtime != sit_i->min_mtime)
197 age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime),
198 sit_i->max_mtime - sit_i->min_mtime);
199
200 return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
201}
202
203static unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno,
204 struct victim_sel_policy *p)
205{
206 if (p->alloc_mode == SSR)
207 return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
208
209 /* alloc_mode == LFS */
210 if (p->gc_mode == GC_GREEDY)
211 return get_valid_blocks(sbi, segno, sbi->segs_per_sec);
212 else
213 return get_cb_cost(sbi, segno);
214}
215
216/**
217 * This function is called from two pathes.
218 * One is garbage collection and the other is SSR segment selection.
219 * When it is called during GC, it just gets a victim segment
220 * and it does not remove it from dirty seglist.
221 * When it is called from SSR segment selection, it finds a segment
222 * which has minimum valid blocks and removes it from dirty seglist.
223 */
224static int get_victim_by_default(struct f2fs_sb_info *sbi,
225 unsigned int *result, int gc_type, int type, char alloc_mode)
226{
227 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
228 struct victim_sel_policy p;
229 unsigned int segno;
230 int nsearched = 0;
231
232 p.alloc_mode = alloc_mode;
233 select_policy(sbi, gc_type, type, &p);
234
235 p.min_segno = NULL_SEGNO;
236 p.min_cost = get_max_cost(sbi, &p);
237
238 mutex_lock(&dirty_i->seglist_lock);
239
240 if (p.alloc_mode == LFS && gc_type == FG_GC) {
241 p.min_segno = check_bg_victims(sbi);
242 if (p.min_segno != NULL_SEGNO)
243 goto got_it;
244 }
245
246 while (1) {
247 unsigned long cost;
248
249 segno = find_next_bit(p.dirty_segmap,
250 TOTAL_SEGS(sbi), p.offset);
251 if (segno >= TOTAL_SEGS(sbi)) {
252 if (sbi->last_victim[p.gc_mode]) {
253 sbi->last_victim[p.gc_mode] = 0;
254 p.offset = 0;
255 continue;
256 }
257 break;
258 }
259 p.offset = ((segno / p.ofs_unit) * p.ofs_unit) + p.ofs_unit;
260
261 if (test_bit(segno, dirty_i->victim_segmap[FG_GC]))
262 continue;
263 if (gc_type == BG_GC &&
264 test_bit(segno, dirty_i->victim_segmap[BG_GC]))
265 continue;
266 if (IS_CURSEC(sbi, GET_SECNO(sbi, segno)))
267 continue;
268
269 cost = get_gc_cost(sbi, segno, &p);
270
271 if (p.min_cost > cost) {
272 p.min_segno = segno;
273 p.min_cost = cost;
274 }
275
276 if (cost == get_max_cost(sbi, &p))
277 continue;
278
279 if (nsearched++ >= MAX_VICTIM_SEARCH) {
280 sbi->last_victim[p.gc_mode] = segno;
281 break;
282 }
283 }
284got_it:
285 if (p.min_segno != NULL_SEGNO) {
286 *result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
287 if (p.alloc_mode == LFS) {
288 int i;
289 for (i = 0; i < p.ofs_unit; i++)
290 set_bit(*result + i,
291 dirty_i->victim_segmap[gc_type]);
292 }
293 }
294 mutex_unlock(&dirty_i->seglist_lock);
295
296 return (p.min_segno == NULL_SEGNO) ? 0 : 1;
297}
298
299static const struct victim_selection default_v_ops = {
300 .get_victim = get_victim_by_default,
301};
302
303static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist)
304{
305 struct list_head *this;
306 struct inode_entry *ie;
307
308 list_for_each(this, ilist) {
309 ie = list_entry(this, struct inode_entry, list);
310 if (ie->inode->i_ino == ino)
311 return ie->inode;
312 }
313 return NULL;
314}
315
316static void add_gc_inode(struct inode *inode, struct list_head *ilist)
317{
318 struct list_head *this;
319 struct inode_entry *new_ie, *ie;
320
321 list_for_each(this, ilist) {
322 ie = list_entry(this, struct inode_entry, list);
323 if (ie->inode == inode) {
324 iput(inode);
325 return;
326 }
327 }
328repeat:
329 new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS);
330 if (!new_ie) {
331 cond_resched();
332 goto repeat;
333 }
334 new_ie->inode = inode;
335 list_add_tail(&new_ie->list, ilist);
336}
337
338static void put_gc_inode(struct list_head *ilist)
339{
340 struct inode_entry *ie, *next_ie;
341 list_for_each_entry_safe(ie, next_ie, ilist, list) {
342 iput(ie->inode);
343 list_del(&ie->list);
344 kmem_cache_free(winode_slab, ie);
345 }
346}
347
348static int check_valid_map(struct f2fs_sb_info *sbi,
349 unsigned int segno, int offset)
350{
351 struct sit_info *sit_i = SIT_I(sbi);
352 struct seg_entry *sentry;
353 int ret;
354
355 mutex_lock(&sit_i->sentry_lock);
356 sentry = get_seg_entry(sbi, segno);
357 ret = f2fs_test_bit(offset, sentry->cur_valid_map);
358 mutex_unlock(&sit_i->sentry_lock);
359 return ret ? GC_OK : GC_NEXT;
360}
361
362/**
363 * This function compares node address got in summary with that in NAT.
364 * On validity, copy that node with cold status, otherwise (invalid node)
365 * ignore that.
366 */
367static int gc_node_segment(struct f2fs_sb_info *sbi,
368 struct f2fs_summary *sum, unsigned int segno, int gc_type)
369{
370 bool initial = true;
371 struct f2fs_summary *entry;
372 int off;
373
374next_step:
375 entry = sum;
376 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
377 nid_t nid = le32_to_cpu(entry->nid);
378 struct page *node_page;
379 int err;
380
381 /*
382 * It makes sure that free segments are able to write
383 * all the dirty node pages before CP after this CP.
384 * So let's check the space of dirty node pages.
385 */
386 if (should_do_checkpoint(sbi)) {
387 mutex_lock(&sbi->cp_mutex);
388 block_operations(sbi);
389 return GC_BLOCKED;
390 }
391
392 err = check_valid_map(sbi, segno, off);
393 if (err == GC_ERROR)
394 return err;
395 else if (err == GC_NEXT)
396 continue;
397
398 if (initial) {
399 ra_node_page(sbi, nid);
400 continue;
401 }
402 node_page = get_node_page(sbi, nid);
403 if (IS_ERR(node_page))
404 continue;
405
406 /* set page dirty and write it */
407 if (!PageWriteback(node_page))
408 set_page_dirty(node_page);
409 f2fs_put_page(node_page, 1);
410 stat_inc_node_blk_count(sbi, 1);
411 }
412 if (initial) {
413 initial = false;
414 goto next_step;
415 }
416
417 if (gc_type == FG_GC) {
418 struct writeback_control wbc = {
419 .sync_mode = WB_SYNC_ALL,
420 .nr_to_write = LONG_MAX,
421 .for_reclaim = 0,
422 };
423 sync_node_pages(sbi, 0, &wbc);
424 }
425 return GC_DONE;
426}
427
428/**
429 * Calculate start block index that this node page contains
430 */
431block_t start_bidx_of_node(unsigned int node_ofs)
432{
433 block_t start_bidx;
434 unsigned int bidx, indirect_blks;
435 int dec;
436
437 indirect_blks = 2 * NIDS_PER_BLOCK + 4;
438
439 start_bidx = 1;
440 if (node_ofs == 0) {
441 start_bidx = 0;
442 } else if (node_ofs <= 2) {
443 bidx = node_ofs - 1;
444 } else if (node_ofs <= indirect_blks) {
445 dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1);
446 bidx = node_ofs - 2 - dec;
447 } else {
448 dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
449 bidx = node_ofs - 5 - dec;
450 }
451
452 if (start_bidx)
453 start_bidx = bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE;
454 return start_bidx;
455}
456
457static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
458 struct node_info *dni, block_t blkaddr, unsigned int *nofs)
459{
460 struct page *node_page;
461 nid_t nid;
462 unsigned int ofs_in_node;
463 block_t source_blkaddr;
464
465 nid = le32_to_cpu(sum->nid);
466 ofs_in_node = le16_to_cpu(sum->ofs_in_node);
467
468 node_page = get_node_page(sbi, nid);
469 if (IS_ERR(node_page))
470 return GC_NEXT;
471
472 get_node_info(sbi, nid, dni);
473
474 if (sum->version != dni->version) {
475 f2fs_put_page(node_page, 1);
476 return GC_NEXT;
477 }
478
479 *nofs = ofs_of_node(node_page);
480 source_blkaddr = datablock_addr(node_page, ofs_in_node);
481 f2fs_put_page(node_page, 1);
482
483 if (source_blkaddr != blkaddr)
484 return GC_NEXT;
485 return GC_OK;
486}
487
488static void move_data_page(struct inode *inode, struct page *page, int gc_type)
489{
490 if (page->mapping != inode->i_mapping)
491 goto out;
492
493 if (inode != page->mapping->host)
494 goto out;
495
496 if (PageWriteback(page))
497 goto out;
498
499 if (gc_type == BG_GC) {
500 set_page_dirty(page);
501 set_cold_data(page);
502 } else {
503 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
504 mutex_lock_op(sbi, DATA_WRITE);
505 if (clear_page_dirty_for_io(page) &&
506 S_ISDIR(inode->i_mode)) {
507 dec_page_count(sbi, F2FS_DIRTY_DENTS);
508 inode_dec_dirty_dents(inode);
509 }
510 set_cold_data(page);
511 do_write_data_page(page);
512 mutex_unlock_op(sbi, DATA_WRITE);
513 clear_cold_data(page);
514 }
515out:
516 f2fs_put_page(page, 1);
517}
518
519/**
520 * This function tries to get parent node of victim data block, and identifies
521 * data block validity. If the block is valid, copy that with cold status and
522 * modify parent node.
523 * If the parent node is not valid or the data block address is different,
524 * the victim data block is ignored.
525 */
526static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
527 struct list_head *ilist, unsigned int segno, int gc_type)
528{
529 struct super_block *sb = sbi->sb;
530 struct f2fs_summary *entry;
531 block_t start_addr;
532 int err, off;
533 int phase = 0;
534
535 start_addr = START_BLOCK(sbi, segno);
536
537next_step:
538 entry = sum;
539 for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
540 struct page *data_page;
541 struct inode *inode;
542 struct node_info dni; /* dnode info for the data */
543 unsigned int ofs_in_node, nofs;
544 block_t start_bidx;
545
546 /*
547 * It makes sure that free segments are able to write
548 * all the dirty node pages before CP after this CP.
549 * So let's check the space of dirty node pages.
550 */
551 if (should_do_checkpoint(sbi)) {
552 mutex_lock(&sbi->cp_mutex);
553 block_operations(sbi);
554 err = GC_BLOCKED;
555 goto stop;
556 }
557
558 err = check_valid_map(sbi, segno, off);
559 if (err == GC_ERROR)
560 goto stop;
561 else if (err == GC_NEXT)
562 continue;
563
564 if (phase == 0) {
565 ra_node_page(sbi, le32_to_cpu(entry->nid));
566 continue;
567 }
568
569 /* Get an inode by ino with checking validity */
570 err = check_dnode(sbi, entry, &dni, start_addr + off, &nofs);
571 if (err == GC_ERROR)
572 goto stop;
573 else if (err == GC_NEXT)
574 continue;
575
576 if (phase == 1) {
577 ra_node_page(sbi, dni.ino);
578 continue;
579 }
580
581 start_bidx = start_bidx_of_node(nofs);
582 ofs_in_node = le16_to_cpu(entry->ofs_in_node);
583
584 if (phase == 2) {
585 inode = f2fs_iget_nowait(sb, dni.ino);
586 if (IS_ERR(inode))
587 continue;
588
589 data_page = find_data_page(inode,
590 start_bidx + ofs_in_node);
591 if (IS_ERR(data_page))
592 goto next_iput;
593
594 f2fs_put_page(data_page, 0);
595 add_gc_inode(inode, ilist);
596 } else {
597 inode = find_gc_inode(dni.ino, ilist);
598 if (inode) {
599 data_page = get_lock_data_page(inode,
600 start_bidx + ofs_in_node);
601 if (IS_ERR(data_page))
602 continue;
603 move_data_page(inode, data_page, gc_type);
604 stat_inc_data_blk_count(sbi, 1);
605 }
606 }
607 continue;
608next_iput:
609 iput(inode);
610 }
611 if (++phase < 4)
612 goto next_step;
613 err = GC_DONE;
614stop:
615 if (gc_type == FG_GC)
616 f2fs_submit_bio(sbi, DATA, true);
617 return err;
618}
619
620static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
621 int gc_type, int type)
622{
623 struct sit_info *sit_i = SIT_I(sbi);
624 int ret;
625 mutex_lock(&sit_i->sentry_lock);
626 ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, type, LFS);
627 mutex_unlock(&sit_i->sentry_lock);
628 return ret;
629}
630
631static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
632 struct list_head *ilist, int gc_type)
633{
634 struct page *sum_page;
635 struct f2fs_summary_block *sum;
636 int ret = GC_DONE;
637
638 /* read segment summary of victim */
639 sum_page = get_sum_page(sbi, segno);
640 if (IS_ERR(sum_page))
641 return GC_ERROR;
642
643 /*
644 * CP needs to lock sum_page. In this time, we don't need
645 * to lock this page, because this summary page is not gone anywhere.
646 * Also, this page is not gonna be updated before GC is done.
647 */
648 unlock_page(sum_page);
649 sum = page_address(sum_page);
650
651 switch (GET_SUM_TYPE((&sum->footer))) {
652 case SUM_TYPE_NODE:
653 ret = gc_node_segment(sbi, sum->entries, segno, gc_type);
654 break;
655 case SUM_TYPE_DATA:
656 ret = gc_data_segment(sbi, sum->entries, ilist, segno, gc_type);
657 break;
658 }
659 stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)));
660 stat_inc_call_count(sbi->stat_info);
661
662 f2fs_put_page(sum_page, 0);
663 return ret;
664}
665
666int f2fs_gc(struct f2fs_sb_info *sbi, int nGC)
667{
668 unsigned int segno;
669 int old_free_secs, cur_free_secs;
670 int gc_status, nfree;
671 struct list_head ilist;
672 int gc_type = BG_GC;
673
674 INIT_LIST_HEAD(&ilist);
675gc_more:
676 nfree = 0;
677 gc_status = GC_NONE;
678
679 if (has_not_enough_free_secs(sbi))
680 old_free_secs = reserved_sections(sbi);
681 else
682 old_free_secs = free_sections(sbi);
683
684 while (sbi->sb->s_flags & MS_ACTIVE) {
685 int i;
686 if (has_not_enough_free_secs(sbi))
687 gc_type = FG_GC;
688
689 cur_free_secs = free_sections(sbi) + nfree;
690
691 /* We got free space successfully. */
692 if (nGC < cur_free_secs - old_free_secs)
693 break;
694
695 if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
696 break;
697
698 for (i = 0; i < sbi->segs_per_sec; i++) {
699 /*
700 * do_garbage_collect will give us three gc_status:
701 * GC_ERROR, GC_DONE, and GC_BLOCKED.
702 * If GC is finished uncleanly, we have to return
703 * the victim to dirty segment list.
704 */
705 gc_status = do_garbage_collect(sbi, segno + i,
706 &ilist, gc_type);
707 if (gc_status != GC_DONE)
708 goto stop;
709 nfree++;
710 }
711 }
712stop:
713 if (has_not_enough_free_secs(sbi) || gc_status == GC_BLOCKED) {
714 write_checkpoint(sbi, (gc_status == GC_BLOCKED), false);
715 if (nfree)
716 goto gc_more;
717 }
718 mutex_unlock(&sbi->gc_mutex);
719
720 put_gc_inode(&ilist);
721 BUG_ON(!list_empty(&ilist));
722 return gc_status;
723}
724
725void build_gc_manager(struct f2fs_sb_info *sbi)
726{
727 DIRTY_I(sbi)->v_ops = &default_v_ops;
728}
729
730int create_gc_caches(void)
731{
732 winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes",
733 sizeof(struct inode_entry), NULL);
734 if (!winode_slab)
735 return -ENOMEM;
736 return 0;
737}
738
739void destroy_gc_caches(void)
740{
741 kmem_cache_destroy(winode_slab);
742}
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
new file mode 100644
index 000000000000..cf42a554ca0a
--- /dev/null
+++ b/fs/f2fs/gc.h
@@ -0,0 +1,117 @@
1/**
2 * fs/f2fs/gc.h
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#define GC_THREAD_NAME "f2fs_gc_task"
12#define GC_THREAD_MIN_WB_PAGES 1 /*
13 * a threshold to determine
14 * whether IO subsystem is idle
15 * or not
16 */
17#define GC_THREAD_MIN_SLEEP_TIME 10000 /* milliseconds */
18#define GC_THREAD_MAX_SLEEP_TIME 30000
19#define GC_THREAD_NOGC_SLEEP_TIME 10000
20#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */
21#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
22
23/* Search max. number of dirty segments to select a victim segment */
24#define MAX_VICTIM_SEARCH 20
25
26enum {
27 GC_NONE = 0,
28 GC_ERROR,
29 GC_OK,
30 GC_NEXT,
31 GC_BLOCKED,
32 GC_DONE,
33};
34
35struct f2fs_gc_kthread {
36 struct task_struct *f2fs_gc_task;
37 wait_queue_head_t gc_wait_queue_head;
38};
39
40struct inode_entry {
41 struct list_head list;
42 struct inode *inode;
43};
44
45/**
46 * inline functions
47 */
48static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)
49{
50 if (free_segments(sbi) < overprovision_segments(sbi))
51 return 0;
52 else
53 return (free_segments(sbi) - overprovision_segments(sbi))
54 << sbi->log_blocks_per_seg;
55}
56
57static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi)
58{
59 return (long)(sbi->user_block_count * LIMIT_INVALID_BLOCK) / 100;
60}
61
62static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi)
63{
64 block_t reclaimable_user_blocks = sbi->user_block_count -
65 written_block_count(sbi);
66 return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100;
67}
68
69static inline long increase_sleep_time(long wait)
70{
71 wait += GC_THREAD_MIN_SLEEP_TIME;
72 if (wait > GC_THREAD_MAX_SLEEP_TIME)
73 wait = GC_THREAD_MAX_SLEEP_TIME;
74 return wait;
75}
76
77static inline long decrease_sleep_time(long wait)
78{
79 wait -= GC_THREAD_MIN_SLEEP_TIME;
80 if (wait <= GC_THREAD_MIN_SLEEP_TIME)
81 wait = GC_THREAD_MIN_SLEEP_TIME;
82 return wait;
83}
84
85static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
86{
87 block_t invalid_user_blocks = sbi->user_block_count -
88 written_block_count(sbi);
89 /*
90 * Background GC is triggered with the following condition.
91 * 1. There are a number of invalid blocks.
92 * 2. There is not enough free space.
93 */
94 if (invalid_user_blocks > limit_invalid_user_blocks(sbi) &&
95 free_user_blocks(sbi) < limit_free_user_blocks(sbi))
96 return true;
97 return false;
98}
99
100static inline int is_idle(struct f2fs_sb_info *sbi)
101{
102 struct block_device *bdev = sbi->sb->s_bdev;
103 struct request_queue *q = bdev_get_queue(bdev);
104 struct request_list *rl = &q->root_rl;
105 return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]);
106}
107
108static inline bool should_do_checkpoint(struct f2fs_sb_info *sbi)
109{
110 unsigned int pages_per_sec = sbi->segs_per_sec *
111 (1 << sbi->log_blocks_per_seg);
112 int node_secs = ((get_pages(sbi, F2FS_DIRTY_NODES) + pages_per_sec - 1)
113 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
114 int dent_secs = ((get_pages(sbi, F2FS_DIRTY_DENTS) + pages_per_sec - 1)
115 >> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
116 return free_sections(sbi) <= (node_secs + 2 * dent_secs + 2);
117}