aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChao Yu <chao2.yu@samsung.com>2014-06-23 21:18:20 -0400
committerJaegeuk Kim <jaegeuk@kernel.org>2014-07-09 17:04:25 -0400
commitaec71382c68135261ef6efc3d8a96b7149939446 (patch)
tree947ff0bb52e12693c0f551aaef1f70aacad735d2
parenta014e037be26b5c9ee6fb4e49e7804141cf3bb89 (diff)
f2fs: refactor flush_nat_entries codes for reducing NAT writes
Although building NAT journal in cursum reduce the read/write work for NAT block, but previous design leave us lower performance when write checkpoint frequently for these cases: 1. if journal in cursum has already full, it's a bit of waste that we flush all nat entries to page for persistence, but not to cache any entries. 2. if journal in cursum is not full, we fill nat entries to journal util journal is full, then flush the left dirty entries to disk without merge journaled entries, so these journaled entries may be flushed to disk at next checkpoint but lost chance to flushed last time. In this patch we merge dirty entries located in same NAT block to nat entry set, and linked all set to list, sorted ascending order by entries' count of set. Later we flush entries in sparse set into journal as many as we can, and then flush merged entries to disk. In this way we can not only gain in performance, but also save lifetime of flash device. In my testing environment, it shows this patch can help to reduce NAT block writes obviously. In hard disk test case: cost time of fsstress is stablely reduced by about 5%. 1. virtual machine + hard disk: fsstress -p 20 -n 200 -l 5 node num cp count nodes/cp based 4599.6 1803.0 2.551 patched 2714.6 1829.6 1.483 2. virtual machine + 32g micro SD card: fsstress -p 20 -n 200 -l 1 -w -f chown=0 -f creat=4 -f dwrite=0 -f fdatasync=4 -f fsync=4 -f link=0 -f mkdir=4 -f mknod=4 -f rename=5 -f rmdir=5 -f symlink=0 -f truncate=4 -f unlink=5 -f write=0 -S node num cp count nodes/cp based 84.5 43.7 1.933 patched 49.2 40.0 1.23 Our latency of merging op shows not bad when handling extreme case like: merging a great number of dirty nats: latency(ns) dirty nat count 3089219 24922 5129423 27422 4000250 24523 change log from v1: o fix wrong logic in add_nat_entry when grab a new nat entry set. o swith to create slab cache in create_node_manager_caches. o use GFP_ATOMIC instead of GFP_NOFS to avoid potential long latency. change log from v2: o make comment position more appropriate suggested by Jaegeuk Kim. Signed-off-by: Chao Yu <chao2.yu@samsung.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
-rw-r--r--fs/f2fs/f2fs.h2
-rw-r--r--fs/f2fs/node.c263
-rw-r--r--fs/f2fs/node.h7
3 files changed, 188 insertions, 84 deletions
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 3f0291b840ef..ec480b1a6e33 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -256,6 +256,8 @@ struct f2fs_nm_info {
256 unsigned int nat_cnt; /* the # of cached nat entries */ 256 unsigned int nat_cnt; /* the # of cached nat entries */
257 struct list_head nat_entries; /* cached nat entry list (clean) */ 257 struct list_head nat_entries; /* cached nat entry list (clean) */
258 struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ 258 struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */
259 struct list_head nat_entry_set; /* nat entry set list */
260 unsigned int dirty_nat_cnt; /* total num of nat entries in set */
259 261
260 /* free node ids management */ 262 /* free node ids management */
261 struct radix_tree_root free_nid_root;/* root of the free_nid cache */ 263 struct radix_tree_root free_nid_root;/* root of the free_nid cache */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index de709f0a445e..a90f51d32482 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -25,6 +25,7 @@
25 25
26static struct kmem_cache *nat_entry_slab; 26static struct kmem_cache *nat_entry_slab;
27static struct kmem_cache *free_nid_slab; 27static struct kmem_cache *free_nid_slab;
28static struct kmem_cache *nat_entry_set_slab;
28 29
29bool available_free_memory(struct f2fs_sb_info *sbi, int type) 30bool available_free_memory(struct f2fs_sb_info *sbi, int type)
30{ 31{
@@ -90,12 +91,8 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
90 91
91 /* get current nat block page with lock */ 92 /* get current nat block page with lock */
92 src_page = get_meta_page(sbi, src_off); 93 src_page = get_meta_page(sbi, src_off);
93
94 /* Dirty src_page means that it is already the new target NAT page. */
95 if (PageDirty(src_page))
96 return src_page;
97
98 dst_page = grab_meta_page(sbi, dst_off); 94 dst_page = grab_meta_page(sbi, dst_off);
95 f2fs_bug_on(PageDirty(src_page));
99 96
100 src_addr = page_address(src_page); 97 src_addr = page_address(src_page);
101 dst_addr = page_address(dst_page); 98 dst_addr = page_address(dst_page);
@@ -1744,7 +1741,90 @@ skip:
1744 return err; 1741 return err;
1745} 1742}
1746 1743
1747static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) 1744static struct nat_entry_set *grab_nat_entry_set(void)
1745{
1746 struct nat_entry_set *nes =
1747 f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
1748
1749 nes->entry_cnt = 0;
1750 INIT_LIST_HEAD(&nes->set_list);
1751 INIT_LIST_HEAD(&nes->entry_list);
1752 return nes;
1753}
1754
1755static void release_nat_entry_set(struct nat_entry_set *nes,
1756 struct f2fs_nm_info *nm_i)
1757{
1758 f2fs_bug_on(!list_empty(&nes->entry_list));
1759
1760 nm_i->dirty_nat_cnt -= nes->entry_cnt;
1761 list_del(&nes->set_list);
1762 kmem_cache_free(nat_entry_set_slab, nes);
1763}
1764
1765static void adjust_nat_entry_set(struct nat_entry_set *nes,
1766 struct list_head *head)
1767{
1768 struct nat_entry_set *next = nes;
1769
1770 if (list_is_last(&nes->set_list, head))
1771 return;
1772
1773 list_for_each_entry_continue(next, head, set_list)
1774 if (nes->entry_cnt <= next->entry_cnt)
1775 break;
1776
1777 list_move_tail(&nes->set_list, &next->set_list);
1778}
1779
1780static void add_nat_entry(struct nat_entry *ne, struct list_head *head)
1781{
1782 struct nat_entry_set *nes;
1783 nid_t start_nid = START_NID(ne->ni.nid);
1784
1785 list_for_each_entry(nes, head, set_list) {
1786 if (nes->start_nid == start_nid) {
1787 list_move_tail(&ne->list, &nes->entry_list);
1788 nes->entry_cnt++;
1789 adjust_nat_entry_set(nes, head);
1790 return;
1791 }
1792 }
1793
1794 nes = grab_nat_entry_set();
1795
1796 nes->start_nid = start_nid;
1797 list_move_tail(&ne->list, &nes->entry_list);
1798 nes->entry_cnt++;
1799 list_add(&nes->set_list, head);
1800}
1801
1802static void merge_nats_in_set(struct f2fs_sb_info *sbi)
1803{
1804 struct f2fs_nm_info *nm_i = NM_I(sbi);
1805 struct list_head *dirty_list = &nm_i->dirty_nat_entries;
1806 struct list_head *set_list = &nm_i->nat_entry_set;
1807 struct nat_entry *ne, *tmp;
1808
1809 write_lock(&nm_i->nat_tree_lock);
1810 list_for_each_entry_safe(ne, tmp, dirty_list, list) {
1811 if (nat_get_blkaddr(ne) == NEW_ADDR)
1812 continue;
1813 add_nat_entry(ne, set_list);
1814 nm_i->dirty_nat_cnt++;
1815 }
1816 write_unlock(&nm_i->nat_tree_lock);
1817}
1818
1819static bool __has_cursum_space(struct f2fs_summary_block *sum, int size)
1820{
1821 if (nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES)
1822 return true;
1823 else
1824 return false;
1825}
1826
1827static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
1748{ 1828{
1749 struct f2fs_nm_info *nm_i = NM_I(sbi); 1829 struct f2fs_nm_info *nm_i = NM_I(sbi);
1750 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1830 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1752,12 +1832,6 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
1752 int i; 1832 int i;
1753 1833
1754 mutex_lock(&curseg->curseg_mutex); 1834 mutex_lock(&curseg->curseg_mutex);
1755
1756 if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) {
1757 mutex_unlock(&curseg->curseg_mutex);
1758 return false;
1759 }
1760
1761 for (i = 0; i < nats_in_cursum(sum); i++) { 1835 for (i = 0; i < nats_in_cursum(sum); i++) {
1762 struct nat_entry *ne; 1836 struct nat_entry *ne;
1763 struct f2fs_nat_entry raw_ne; 1837 struct f2fs_nat_entry raw_ne;
@@ -1767,23 +1841,21 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
1767retry: 1841retry:
1768 write_lock(&nm_i->nat_tree_lock); 1842 write_lock(&nm_i->nat_tree_lock);
1769 ne = __lookup_nat_cache(nm_i, nid); 1843 ne = __lookup_nat_cache(nm_i, nid);
1770 if (ne) { 1844 if (ne)
1771 __set_nat_cache_dirty(nm_i, ne); 1845 goto found;
1772 write_unlock(&nm_i->nat_tree_lock); 1846
1773 continue;
1774 }
1775 ne = grab_nat_entry(nm_i, nid); 1847 ne = grab_nat_entry(nm_i, nid);
1776 if (!ne) { 1848 if (!ne) {
1777 write_unlock(&nm_i->nat_tree_lock); 1849 write_unlock(&nm_i->nat_tree_lock);
1778 goto retry; 1850 goto retry;
1779 } 1851 }
1780 node_info_from_raw_nat(&ne->ni, &raw_ne); 1852 node_info_from_raw_nat(&ne->ni, &raw_ne);
1853found:
1781 __set_nat_cache_dirty(nm_i, ne); 1854 __set_nat_cache_dirty(nm_i, ne);
1782 write_unlock(&nm_i->nat_tree_lock); 1855 write_unlock(&nm_i->nat_tree_lock);
1783 } 1856 }
1784 update_nats_in_cursum(sum, -i); 1857 update_nats_in_cursum(sum, -i);
1785 mutex_unlock(&curseg->curseg_mutex); 1858 mutex_unlock(&curseg->curseg_mutex);
1786 return true;
1787} 1859}
1788 1860
1789/* 1861/*
@@ -1794,80 +1866,91 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
1794 struct f2fs_nm_info *nm_i = NM_I(sbi); 1866 struct f2fs_nm_info *nm_i = NM_I(sbi);
1795 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); 1867 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1796 struct f2fs_summary_block *sum = curseg->sum_blk; 1868 struct f2fs_summary_block *sum = curseg->sum_blk;
1797 struct nat_entry *ne, *cur; 1869 struct nat_entry_set *nes, *tmp;
1798 struct page *page = NULL; 1870 struct list_head *head = &nm_i->nat_entry_set;
1799 struct f2fs_nat_block *nat_blk = NULL; 1871 bool to_journal = true;
1800 nid_t start_nid = 0, end_nid = 0;
1801 bool flushed;
1802
1803 flushed = flush_nats_in_journal(sbi);
1804 1872
1805 if (!flushed) 1873 /* merge nat entries of dirty list to nat entry set temporarily */
1806 mutex_lock(&curseg->curseg_mutex); 1874 merge_nats_in_set(sbi);
1807 1875
1808 /* 1) flush dirty nat caches */ 1876 /*
1809 list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) { 1877 * if there are no enough space in journal to store dirty nat
1810 nid_t nid; 1878 * entries, remove all entries from journal and merge them
1811 struct f2fs_nat_entry raw_ne; 1879 * into nat entry set.
1812 int offset = -1; 1880 */
1813 1881 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt)) {
1814 if (nat_get_blkaddr(ne) == NEW_ADDR) 1882 remove_nats_in_journal(sbi);
1815 continue;
1816
1817 nid = nat_get_nid(ne);
1818 1883
1819 if (flushed) 1884 /*
1820 goto to_nat_page; 1885 * merge nat entries of dirty list to nat entry set temporarily
1886 */
1887 merge_nats_in_set(sbi);
1888 }
1821 1889
1822 /* if there is room for nat enries in curseg->sumpage */ 1890 if (!nm_i->dirty_nat_cnt)
1823 offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1); 1891 return;
1824 if (offset >= 0) {
1825 raw_ne = nat_in_journal(sum, offset);
1826 goto flush_now;
1827 }
1828to_nat_page:
1829 if (!page || (start_nid > nid || nid > end_nid)) {
1830 if (page) {
1831 f2fs_put_page(page, 1);
1832 page = NULL;
1833 }
1834 start_nid = START_NID(nid);
1835 end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1;
1836 1892
1837 /* 1893 /*
1838 * get nat block with dirty flag, increased reference 1894 * there are two steps to flush nat entries:
1839 * count, mapped and lock 1895 * #1, flush nat entries to journal in current hot data summary block.
1840 */ 1896 * #2, flush nat entries to nat page.
1897 */
1898 list_for_each_entry_safe(nes, tmp, head, set_list) {
1899 struct f2fs_nat_block *nat_blk;
1900 struct nat_entry *ne, *cur;
1901 struct page *page;
1902 nid_t start_nid = nes->start_nid;
1903
1904 if (to_journal && !__has_cursum_space(sum, nes->entry_cnt))
1905 to_journal = false;
1906
1907 if (to_journal) {
1908 mutex_lock(&curseg->curseg_mutex);
1909 } else {
1841 page = get_next_nat_page(sbi, start_nid); 1910 page = get_next_nat_page(sbi, start_nid);
1842 nat_blk = page_address(page); 1911 nat_blk = page_address(page);
1912 f2fs_bug_on(!nat_blk);
1843 } 1913 }
1844 1914
1845 f2fs_bug_on(!nat_blk); 1915 /* flush dirty nats in nat entry set */
1846 raw_ne = nat_blk->entries[nid - start_nid]; 1916 list_for_each_entry_safe(ne, cur, &nes->entry_list, list) {
1847flush_now: 1917 struct f2fs_nat_entry *raw_ne;
1848 raw_nat_from_node_info(&raw_ne, &ne->ni); 1918 nid_t nid = nat_get_nid(ne);
1849 1919 int offset;
1850 if (offset < 0) { 1920
1851 nat_blk->entries[nid - start_nid] = raw_ne; 1921 if (to_journal) {
1852 } else { 1922 offset = lookup_journal_in_cursum(sum,
1853 nat_in_journal(sum, offset) = raw_ne; 1923 NAT_JOURNAL, nid, 1);
1854 nid_in_journal(sum, offset) = cpu_to_le32(nid); 1924 f2fs_bug_on(offset < 0);
1855 } 1925 raw_ne = &nat_in_journal(sum, offset);
1926 nid_in_journal(sum, offset) = cpu_to_le32(nid);
1927 } else {
1928 raw_ne = &nat_blk->entries[nid - start_nid];
1929 }
1930 raw_nat_from_node_info(raw_ne, &ne->ni);
1856 1931
1857 if (nat_get_blkaddr(ne) == NULL_ADDR && 1932 if (nat_get_blkaddr(ne) == NULL_ADDR &&
1858 add_free_nid(sbi, nid, false) <= 0) { 1933 add_free_nid(sbi, nid, false) <= 0) {
1859 write_lock(&nm_i->nat_tree_lock); 1934 write_lock(&nm_i->nat_tree_lock);
1860 __del_from_nat_cache(nm_i, ne); 1935 __del_from_nat_cache(nm_i, ne);
1861 write_unlock(&nm_i->nat_tree_lock); 1936 write_unlock(&nm_i->nat_tree_lock);
1862 } else { 1937 } else {
1863 write_lock(&nm_i->nat_tree_lock); 1938 write_lock(&nm_i->nat_tree_lock);
1864 __clear_nat_cache_dirty(nm_i, ne); 1939 __clear_nat_cache_dirty(nm_i, ne);
1865 write_unlock(&nm_i->nat_tree_lock); 1940 write_unlock(&nm_i->nat_tree_lock);
1941 }
1866 } 1942 }
1943
1944 if (to_journal)
1945 mutex_unlock(&curseg->curseg_mutex);
1946 else
1947 f2fs_put_page(page, 1);
1948
1949 release_nat_entry_set(nes, nm_i);
1867 } 1950 }
1868 if (!flushed) 1951
1869 mutex_unlock(&curseg->curseg_mutex); 1952 f2fs_bug_on(!list_empty(head));
1870 f2fs_put_page(page, 1); 1953 f2fs_bug_on(nm_i->dirty_nat_cnt);
1871} 1954}
1872 1955
1873static int init_node_manager(struct f2fs_sb_info *sbi) 1956static int init_node_manager(struct f2fs_sb_info *sbi)
@@ -1896,6 +1979,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
1896 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); 1979 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
1897 INIT_LIST_HEAD(&nm_i->nat_entries); 1980 INIT_LIST_HEAD(&nm_i->nat_entries);
1898 INIT_LIST_HEAD(&nm_i->dirty_nat_entries); 1981 INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
1982 INIT_LIST_HEAD(&nm_i->nat_entry_set);
1899 1983
1900 mutex_init(&nm_i->build_lock); 1984 mutex_init(&nm_i->build_lock);
1901 spin_lock_init(&nm_i->free_nid_list_lock); 1985 spin_lock_init(&nm_i->free_nid_list_lock);
@@ -1976,19 +2060,30 @@ int __init create_node_manager_caches(void)
1976 nat_entry_slab = f2fs_kmem_cache_create("nat_entry", 2060 nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
1977 sizeof(struct nat_entry)); 2061 sizeof(struct nat_entry));
1978 if (!nat_entry_slab) 2062 if (!nat_entry_slab)
1979 return -ENOMEM; 2063 goto fail;
1980 2064
1981 free_nid_slab = f2fs_kmem_cache_create("free_nid", 2065 free_nid_slab = f2fs_kmem_cache_create("free_nid",
1982 sizeof(struct free_nid)); 2066 sizeof(struct free_nid));
1983 if (!free_nid_slab) { 2067 if (!free_nid_slab)
1984 kmem_cache_destroy(nat_entry_slab); 2068 goto destory_nat_entry;
1985 return -ENOMEM; 2069
1986 } 2070 nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
2071 sizeof(struct nat_entry_set));
2072 if (!nat_entry_set_slab)
2073 goto destory_free_nid;
1987 return 0; 2074 return 0;
2075
2076destory_free_nid:
2077 kmem_cache_destroy(free_nid_slab);
2078destory_nat_entry:
2079 kmem_cache_destroy(nat_entry_slab);
2080fail:
2081 return -ENOMEM;
1988} 2082}
1989 2083
1990void destroy_node_manager_caches(void) 2084void destroy_node_manager_caches(void)
1991{ 2085{
2086 kmem_cache_destroy(nat_entry_set_slab);
1992 kmem_cache_destroy(free_nid_slab); 2087 kmem_cache_destroy(free_nid_slab);
1993 kmem_cache_destroy(nat_entry_slab); 2088 kmem_cache_destroy(nat_entry_slab);
1994} 2089}
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 7281112cd1c8..8a116a407599 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -89,6 +89,13 @@ enum mem_type {
89 DIRTY_DENTS /* indicates dirty dentry pages */ 89 DIRTY_DENTS /* indicates dirty dentry pages */
90}; 90};
91 91
92struct nat_entry_set {
93 struct list_head set_list; /* link with all nat sets */
94 struct list_head entry_list; /* link with dirty nat entries */
95 nid_t start_nid; /* start nid of nats in set */
96 unsigned int entry_cnt; /* the # of nat entries in set */
97};
98
92/* 99/*
93 * For free nid mangement 100 * For free nid mangement
94 */ 101 */