aboutsummaryrefslogtreecommitdiffstats
path: root/fs/f2fs
diff options
context:
space:
mode:
authorChao Yu <chao2.yu@samsung.com>2014-09-04 06:13:01 -0400
committerJaegeuk Kim <jaegeuk@kernel.org>2014-09-09 16:15:05 -0400
commit184a5cd2ce281f1207d72adb9ae18e416ca371db (patch)
treeb91e2cfcf93adc065377908fa601659ca24fcefc /fs/f2fs
parentd3a14afd5ed1970519a2d6ed59f4062ec3ba821f (diff)
f2fs: refactor flush_sit_entries codes for reducing SIT writes
In commit aec71382c681 ("f2fs: refactor flush_nat_entries codes for reducing NAT writes"), we descripte the issue as below: "Although building NAT journal in cursum reduce the read/write work for NAT block, but previous design leave us lower performance when write checkpoint frequently for these cases: 1. if journal in cursum has already full, it's a bit of waste that we flush all nat entries to page for persistence, but not to cache any entries. 2. if journal in cursum is not full, we fill nat entries to journal util journal is full, then flush the left dirty entries to disk without merge journaled entries, so these journaled entries may be flushed to disk at next checkpoint but lost chance to flushed last time." Actually, we have the same problem in using SIT journal area. In this patch, firstly we will update sit journal with dirty entries as many as possible. Secondly if there is no space in sit journal, we will remove all entries in journal and walk through the whole dirty entry bitmap of sit, accounting dirty sit entries located in same SIT block to sit entry set. All entry sets are linked to list sit_entry_set in sm_info, sorted ascending order by count of entries in set. Later we flush entries in set which have fewest entries into journal as many as we can, and then flush dense set with merged entries to disk. In this way we can use sit journal area more effectively, also we will reduce SIT update, result in gaining in performance and saving lifetime of flash device. In my testing environment, it shows this patch can help to reduce SIT block update obviously. virtual machine + hard disk: fsstress -p 20 -n 400 -l 5 sit page num cp count sit pages/cp based 2006.50 1349.75 1.486 patched 1566.25 1463.25 1.070 Our latency of merging op is small when handling a great number of dirty SIT entries in flush_sit_entries: latency(ns) dirty sit count 36038 2151 49168 2123 37174 2232 Signed-off-by: Chao Yu <chao2.yu@samsung.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Diffstat (limited to 'fs/f2fs')
-rw-r--r--fs/f2fs/f2fs.h11
-rw-r--r--fs/f2fs/node.c13
-rw-r--r--fs/f2fs/segment.c227
-rw-r--r--fs/f2fs/segment.h6
4 files changed, 186 insertions, 71 deletions
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b389ced9090b..dd7b171a1b16 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -161,6 +161,15 @@ static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i)
161 return before; 161 return before;
162} 162}
163 163
164static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
165 int type)
166{
167 if (type == NAT_JOURNAL)
168 return nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES;
169
170 return sits_in_cursum(sum) + size <= SIT_JOURNAL_ENTRIES;
171}
172
164/* 173/*
165 * ioctl commands 174 * ioctl commands
166 */ 175 */
@@ -375,6 +384,8 @@ struct f2fs_sm_info {
375 int nr_discards; /* # of discards in the list */ 384 int nr_discards; /* # of discards in the list */
376 int max_discards; /* max. discards to be issued */ 385 int max_discards; /* max. discards to be issued */
377 386
387 struct list_head sit_entry_set; /* sit entry set list */
388
378 unsigned int ipu_policy; /* in-place-update policy */ 389 unsigned int ipu_policy; /* in-place-update policy */
379 unsigned int min_ipu_util; /* in-place-update threshold */ 390 unsigned int min_ipu_util; /* in-place-update threshold */
380 391
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 1af7879bfb75..b32eb565e6b3 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1798,14 +1798,6 @@ static void merge_nats_in_set(struct f2fs_sb_info *sbi)
1798 write_unlock(&nm_i->nat_tree_lock); 1798 write_unlock(&nm_i->nat_tree_lock);
1799} 1799}
1800 1800
1801static bool __has_cursum_space(struct f2fs_summary_block *sum, int size)
1802{
1803 if (nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES)
1804 return true;
1805 else
1806 return false;
1807}
1808
1809static void remove_nats_in_journal(struct f2fs_sb_info *sbi) 1801static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
1810{ 1802{
1811 struct f2fs_nm_info *nm_i = NM_I(sbi); 1803 struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1860,7 +1852,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1860 * entries, remove all entries from journal and merge them 1852 * entries, remove all entries from journal and merge them
1861 * into nat entry set. 1853 * into nat entry set.
1862 */ 1854 */
1863 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt)) { 1855 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) {
1864 remove_nats_in_journal(sbi); 1856 remove_nats_in_journal(sbi);
1865 1857
1866 /* 1858 /*
@@ -1883,7 +1875,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1883 struct page *page; 1875 struct page *page;
1884 nid_t start_nid = nes->start_nid; 1876 nid_t start_nid = nes->start_nid;
1885 1877
1886 if (to_journal && !__has_cursum_space(sum, nes->entry_cnt)) 1878 if (to_journal &&
1879 !__has_cursum_space(sum, nes->entry_cnt, NAT_JOURNAL))
1887 to_journal = false; 1880 to_journal = false;
1888 1881
1889 if (to_journal) { 1882 if (to_journal) {
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index a6b90a520894..d1ff2250cc35 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -25,6 +25,7 @@
25#define __reverse_ffz(x) __reverse_ffs(~(x)) 25#define __reverse_ffz(x) __reverse_ffs(~(x))
26 26
27static struct kmem_cache *discard_entry_slab; 27static struct kmem_cache *discard_entry_slab;
28static struct kmem_cache *sit_entry_set_slab;
28 29
29/* 30/*
30 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since 31 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
@@ -492,11 +493,16 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi)
492 } 493 }
493} 494}
494 495
495static void __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) 496static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
496{ 497{
497 struct sit_info *sit_i = SIT_I(sbi); 498 struct sit_info *sit_i = SIT_I(sbi);
498 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) 499
500 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
499 sit_i->dirty_sentries++; 501 sit_i->dirty_sentries++;
502 return false;
503 }
504
505 return true;
500} 506}
501 507
502static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type, 508static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
@@ -1443,27 +1449,86 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1443 return dst_page; 1449 return dst_page;
1444} 1450}
1445 1451
1446static bool flush_sits_in_journal(struct f2fs_sb_info *sbi) 1452static struct sit_entry_set *grab_sit_entry_set(void)
1453{
1454 struct sit_entry_set *ses =
1455 f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_ATOMIC);
1456
1457 ses->entry_cnt = 0;
1458 INIT_LIST_HEAD(&ses->set_list);
1459 return ses;
1460}
1461
1462static void release_sit_entry_set(struct sit_entry_set *ses)
1463{
1464 list_del(&ses->set_list);
1465 kmem_cache_free(sit_entry_set_slab, ses);
1466}
1467
1468static void adjust_sit_entry_set(struct sit_entry_set *ses,
1469 struct list_head *head)
1470{
1471 struct sit_entry_set *next = ses;
1472
1473 if (list_is_last(&ses->set_list, head))
1474 return;
1475
1476 list_for_each_entry_continue(next, head, set_list)
1477 if (ses->entry_cnt <= next->entry_cnt)
1478 break;
1479
1480 list_move_tail(&ses->set_list, &next->set_list);
1481}
1482
1483static void add_sit_entry(unsigned int segno, struct list_head *head)
1484{
1485 struct sit_entry_set *ses;
1486 unsigned int start_segno = START_SEGNO(segno);
1487
1488 list_for_each_entry(ses, head, set_list) {
1489 if (ses->start_segno == start_segno) {
1490 ses->entry_cnt++;
1491 adjust_sit_entry_set(ses, head);
1492 return;
1493 }
1494 }
1495
1496 ses = grab_sit_entry_set();
1497
1498 ses->start_segno = start_segno;
1499 ses->entry_cnt++;
1500 list_add(&ses->set_list, head);
1501}
1502
1503static void add_sits_in_set(struct f2fs_sb_info *sbi)
1504{
1505 struct f2fs_sm_info *sm_info = SM_I(sbi);
1506 struct list_head *set_list = &sm_info->sit_entry_set;
1507 unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
1508 unsigned long nsegs = TOTAL_SEGS(sbi);
1509 unsigned int segno;
1510
1511 for_each_set_bit(segno, bitmap, nsegs)
1512 add_sit_entry(segno, set_list);
1513}
1514
1515static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
1447{ 1516{
1448 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 1517 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1449 struct f2fs_summary_block *sum = curseg->sum_blk; 1518 struct f2fs_summary_block *sum = curseg->sum_blk;
1450 int i; 1519 int i;
1451 1520
1452 /* 1521 for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
1453 * If the journal area in the current summary is full of sit entries, 1522 unsigned int segno;
1454 * all the sit entries will be flushed. Otherwise the sit entries 1523 bool dirtied;
1455 * are not able to replace with newly hot sit entries. 1524
1456 */ 1525 segno = le32_to_cpu(segno_in_journal(sum, i));
1457 if (sits_in_cursum(sum) >= SIT_JOURNAL_ENTRIES) { 1526 dirtied = __mark_sit_entry_dirty(sbi, segno);
1458 for (i = sits_in_cursum(sum) - 1; i >= 0; i--) { 1527
1459 unsigned int segno; 1528 if (!dirtied)
1460 segno = le32_to_cpu(segno_in_journal(sum, i)); 1529 add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
1461 __mark_sit_entry_dirty(sbi, segno);
1462 }
1463 update_sits_in_cursum(sum, -sits_in_cursum(sum));
1464 return true;
1465 } 1530 }
1466 return false; 1531 update_sits_in_cursum(sum, -sits_in_cursum(sum));
1467} 1532}
1468 1533
1469/* 1534/*
@@ -1476,68 +1541,95 @@ void flush_sit_entries(struct f2fs_sb_info *sbi)
1476 unsigned long *bitmap = sit_i->dirty_sentries_bitmap; 1541 unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
1477 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); 1542 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1478 struct f2fs_summary_block *sum = curseg->sum_blk; 1543 struct f2fs_summary_block *sum = curseg->sum_blk;
1544 struct sit_entry_set *ses, *tmp;
1545 struct list_head *head = &SM_I(sbi)->sit_entry_set;
1479 unsigned long nsegs = TOTAL_SEGS(sbi); 1546 unsigned long nsegs = TOTAL_SEGS(sbi);
1480 struct page *page = NULL; 1547 bool to_journal = true;
1481 struct f2fs_sit_block *raw_sit = NULL;
1482 unsigned int start = 0, end = 0;
1483 unsigned int segno;
1484 bool flushed;
1485 1548
1486 mutex_lock(&curseg->curseg_mutex); 1549 mutex_lock(&curseg->curseg_mutex);
1487 mutex_lock(&sit_i->sentry_lock); 1550 mutex_lock(&sit_i->sentry_lock);
1488 1551
1489 /* 1552 /*
1490 * "flushed" indicates whether sit entries in journal are flushed 1553 * add and account sit entries of dirty bitmap in sit entry
1491 * to the SIT area or not. 1554 * set temporarily
1492 */ 1555 */
1493 flushed = flush_sits_in_journal(sbi); 1556 add_sits_in_set(sbi);
1494
1495 for_each_set_bit(segno, bitmap, nsegs) {
1496 struct seg_entry *se = get_seg_entry(sbi, segno);
1497 int sit_offset, offset;
1498 1557
1499 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno); 1558 /*
1500 1559 * if there are no enough space in journal to store dirty sit
1501 /* add discard candidates */ 1560 * entries, remove all entries from journal and add and account
1502 if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) 1561 * them in sit entry set.
1503 add_discard_addrs(sbi, segno, se); 1562 */
1563 if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL))
1564 remove_sits_in_journal(sbi);
1504 1565
1505 if (flushed) 1566 if (!sit_i->dirty_sentries)
1506 goto to_sit_page; 1567 goto out;
1507 1568
1508 offset = lookup_journal_in_cursum(sum, SIT_JOURNAL, segno, 1); 1569 /*
1509 if (offset >= 0) { 1570 * there are two steps to flush sit entries:
1510 segno_in_journal(sum, offset) = cpu_to_le32(segno); 1571 * #1, flush sit entries to journal in current cold data summary block.
1511 seg_info_to_raw_sit(se, &sit_in_journal(sum, offset)); 1572 * #2, flush sit entries to sit page.
1512 goto flush_done; 1573 */
1574 list_for_each_entry_safe(ses, tmp, head, set_list) {
1575 struct page *page;
1576 struct f2fs_sit_block *raw_sit = NULL;
1577 unsigned int start_segno = ses->start_segno;
1578 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
1579 nsegs);
1580 unsigned int segno = start_segno;
1581
1582 if (to_journal &&
1583 !__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL))
1584 to_journal = false;
1585
1586 if (!to_journal) {
1587 page = get_next_sit_page(sbi, start_segno);
1588 raw_sit = page_address(page);
1513 } 1589 }
1514to_sit_page:
1515 if (!page || (start > segno) || (segno > end)) {
1516 if (page) {
1517 f2fs_put_page(page, 1);
1518 page = NULL;
1519 }
1520 1590
1521 start = START_SEGNO(segno); 1591 /* flush dirty sit entries in region of current sit set */
1522 end = start + SIT_ENTRY_PER_BLOCK - 1; 1592 for_each_set_bit_from(segno, bitmap, end) {
1593 int offset, sit_offset;
1594 struct seg_entry *se = get_seg_entry(sbi, segno);
1595
1596 /* add discard candidates */
1597 if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards)
1598 add_discard_addrs(sbi, segno, se);
1599
1600 if (to_journal) {
1601 offset = lookup_journal_in_cursum(sum,
1602 SIT_JOURNAL, segno, 1);
1603 f2fs_bug_on(sbi, offset < 0);
1604 segno_in_journal(sum, offset) =
1605 cpu_to_le32(segno);
1606 seg_info_to_raw_sit(se,
1607 &sit_in_journal(sum, offset));
1608 } else {
1609 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
1610 seg_info_to_raw_sit(se,
1611 &raw_sit->entries[sit_offset]);
1612 }
1523 1613
1524 /* read sit block that will be updated */ 1614 __clear_bit(segno, bitmap);
1525 page = get_next_sit_page(sbi, start); 1615 sit_i->dirty_sentries--;
1526 raw_sit = page_address(page); 1616 ses->entry_cnt--;
1527 } 1617 }
1528 1618
1529 /* udpate entry in SIT block */ 1619 if (!to_journal)
1530 seg_info_to_raw_sit(se, &raw_sit->entries[sit_offset]); 1620 f2fs_put_page(page, 1);
1531flush_done: 1621
1532 __clear_bit(segno, bitmap); 1622 f2fs_bug_on(sbi, ses->entry_cnt);
1533 sit_i->dirty_sentries--; 1623 release_sit_entry_set(ses);
1534 } 1624 }
1625
1626 f2fs_bug_on(sbi, !list_empty(head));
1627 f2fs_bug_on(sbi, sit_i->dirty_sentries);
1628
1629out:
1535 mutex_unlock(&sit_i->sentry_lock); 1630 mutex_unlock(&sit_i->sentry_lock);
1536 mutex_unlock(&curseg->curseg_mutex); 1631 mutex_unlock(&curseg->curseg_mutex);
1537 1632
1538 /* writeout last modified SIT block */
1539 f2fs_put_page(page, 1);
1540
1541 set_prefree_as_free_segments(sbi); 1633 set_prefree_as_free_segments(sbi);
1542} 1634}
1543 1635
@@ -1854,6 +1946,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
1854 sm_info->nr_discards = 0; 1946 sm_info->nr_discards = 0;
1855 sm_info->max_discards = 0; 1947 sm_info->max_discards = 0;
1856 1948
1949 INIT_LIST_HEAD(&sm_info->sit_entry_set);
1950
1857 if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { 1951 if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
1858 err = create_flush_cmd_control(sbi); 1952 err = create_flush_cmd_control(sbi);
1859 if (err) 1953 if (err)
@@ -1983,11 +2077,22 @@ int __init create_segment_manager_caches(void)
1983 discard_entry_slab = f2fs_kmem_cache_create("discard_entry", 2077 discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
1984 sizeof(struct discard_entry)); 2078 sizeof(struct discard_entry));
1985 if (!discard_entry_slab) 2079 if (!discard_entry_slab)
1986 return -ENOMEM; 2080 goto fail;
2081
2082 sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
2083 sizeof(struct nat_entry_set));
2084 if (!sit_entry_set_slab)
2085 goto destory_discard_entry;
1987 return 0; 2086 return 0;
2087
2088destory_discard_entry:
2089 kmem_cache_destroy(discard_entry_slab);
2090fail:
2091 return -ENOMEM;
1988} 2092}
1989 2093
1990void destroy_segment_manager_caches(void) 2094void destroy_segment_manager_caches(void)
1991{ 2095{
2096 kmem_cache_destroy(sit_entry_set_slab);
1992 kmem_cache_destroy(discard_entry_slab); 2097 kmem_cache_destroy(discard_entry_slab);
1993} 2098}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 2548bfdf0240..bed0dc967f29 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -237,6 +237,12 @@ struct curseg_info {
237 unsigned int next_segno; /* preallocated segment */ 237 unsigned int next_segno; /* preallocated segment */
238}; 238};
239 239
240struct sit_entry_set {
241 struct list_head set_list; /* link with all sit sets */
242 unsigned int start_segno; /* start segno of sits in set */
243 unsigned int entry_cnt; /* the # of sit entries in set */
244};
245
240/* 246/*
241 * inline functions 247 * inline functions
242 */ 248 */