aboutsummaryrefslogtreecommitdiffstats
path: root/fs/f2fs/node.c
diff options
context:
space:
mode:
authorJaegeuk Kim <jaegeuk@kernel.org>2014-09-22 14:40:48 -0400
committerJaegeuk Kim <jaegeuk@kernel.org>2014-09-30 18:30:41 -0400
commit309cc2b6e7ae6672ff9744fe07735ed234a8994e (patch)
tree0119d3d6d0cae0913b29ba6cfd419c089b2fb004 /fs/f2fs/node.c
parent4b2fecc84655055a6a1fe9151786992ac04b56ce (diff)
f2fs: refactor flush_nat_entries to remove costly reorganizing ops
Previously, f2fs tries to reorganize the dirty nat entries into multiple sets according to its nid ranges. This can improve the flushing nat pages, however, if there are a lot of cached nat entries, it becomes a bottleneck. This patch introduces a new set management flow by removing dirty nat list and adding a series of set operations when the nat entry becomes dirty. Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Diffstat (limited to 'fs/f2fs/node.c')
-rw-r--r--fs/f2fs/node.c299
1 files changed, 152 insertions, 147 deletions
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 21ed91b3cb54..44b8afef43d9 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -123,6 +123,57 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
123 kmem_cache_free(nat_entry_slab, e); 123 kmem_cache_free(nat_entry_slab, e);
124} 124}
125 125
126static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
127 struct nat_entry *ne)
128{
129 nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
130 struct nat_entry_set *head;
131
132 if (get_nat_flag(ne, IS_DIRTY))
133 return;
134retry:
135 head = radix_tree_lookup(&nm_i->nat_set_root, set);
136 if (!head) {
137 head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
138
139 INIT_LIST_HEAD(&head->entry_list);
140 INIT_LIST_HEAD(&head->set_list);
141 head->set = set;
142 head->entry_cnt = 0;
143
144 if (radix_tree_insert(&nm_i->nat_set_root, set, head)) {
145 cond_resched();
146 goto retry;
147 }
148 }
149 list_move_tail(&ne->list, &head->entry_list);
150 nm_i->dirty_nat_cnt++;
151 head->entry_cnt++;
152 set_nat_flag(ne, IS_DIRTY, true);
153}
154
155static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
156 struct nat_entry *ne)
157{
158 nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK;
159 struct nat_entry_set *head;
160
161 head = radix_tree_lookup(&nm_i->nat_set_root, set);
162 if (head) {
163 list_move_tail(&ne->list, &nm_i->nat_entries);
164 set_nat_flag(ne, IS_DIRTY, false);
165 head->entry_cnt--;
166 nm_i->dirty_nat_cnt--;
167 }
168}
169
170static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
171 nid_t start, unsigned int nr, struct nat_entry_set **ep)
172{
173 return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep,
174 start, nr);
175}
176
126bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) 177bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
127{ 178{
128 struct f2fs_nm_info *nm_i = NM_I(sbi); 179 struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1739,79 +1790,6 @@ skip:
1739 return err; 1790 return err;
1740} 1791}
1741 1792
1742static struct nat_entry_set *grab_nat_entry_set(void)
1743{
1744 struct nat_entry_set *nes =
1745 f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
1746
1747 nes->entry_cnt = 0;
1748 INIT_LIST_HEAD(&nes->set_list);
1749 INIT_LIST_HEAD(&nes->entry_list);
1750 return nes;
1751}
1752
1753static void release_nat_entry_set(struct nat_entry_set *nes,
1754 struct f2fs_nm_info *nm_i)
1755{
1756 nm_i->dirty_nat_cnt -= nes->entry_cnt;
1757 list_del(&nes->set_list);
1758 kmem_cache_free(nat_entry_set_slab, nes);
1759}
1760
1761static void adjust_nat_entry_set(struct nat_entry_set *nes,
1762 struct list_head *head)
1763{
1764 struct nat_entry_set *next = nes;
1765
1766 if (list_is_last(&nes->set_list, head))
1767 return;
1768
1769 list_for_each_entry_continue(next, head, set_list)
1770 if (nes->entry_cnt <= next->entry_cnt)
1771 break;
1772
1773 list_move_tail(&nes->set_list, &next->set_list);
1774}
1775
1776static void add_nat_entry(struct nat_entry *ne, struct list_head *head)
1777{
1778 struct nat_entry_set *nes;
1779 nid_t start_nid = START_NID(ne->ni.nid);
1780
1781 list_for_each_entry(nes, head, set_list) {
1782 if (nes->start_nid == start_nid) {
1783 list_move_tail(&ne->list, &nes->entry_list);
1784 nes->entry_cnt++;
1785 adjust_nat_entry_set(nes, head);
1786 return;
1787 }
1788 }
1789
1790 nes = grab_nat_entry_set();
1791
1792 nes->start_nid = start_nid;
1793 list_move_tail(&ne->list, &nes->entry_list);
1794 nes->entry_cnt++;
1795 list_add(&nes->set_list, head);
1796}
1797
1798static void merge_nats_in_set(struct f2fs_sb_info *sbi)
1799{
1800 struct f2fs_nm_info *nm_i = NM_I(sbi);
1801 struct list_head *dirty_list = &nm_i->dirty_nat_entries;
1802 struct list_head *set_list = &nm_i->nat_entry_set;
1803 struct nat_entry *ne, *tmp;
1804
1805 write_lock(&nm_i->nat_tree_lock);
1806 list_for_each_entry_safe(ne, tmp, dirty_list, list) {
1807 if (nat_get_blkaddr(ne) == NEW_ADDR)
1808 continue;
1809 add_nat_entry(ne, set_list);
1810 nm_i->dirty_nat_cnt++;
1811 }
1812 write_unlock(&nm_i->nat_tree_lock);
1813}
1814
1815static void remove_nats_in_journal(struct f2fs_sb_info *sbi) 1793static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
1816{ 1794{
1817 struct f2fs_nm_info *nm_i = NM_I(sbi); 1795 struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1846,101 +1824,129 @@ found:
1846 mutex_unlock(&curseg->curseg_mutex); 1824 mutex_unlock(&curseg->curseg_mutex);
1847} 1825}
1848 1826
1849/* 1827static void __adjust_nat_entry_set(struct nat_entry_set *nes,
1850 * This function is called during the checkpointing process. 1828 struct list_head *head, int max)
1851 */
1852void flush_nat_entries(struct f2fs_sb_info *sbi)
1853{ 1829{
1854 struct f2fs_nm_info *nm_i = NM_I(sbi); 1830 struct nat_entry_set *cur;
1855 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1856 struct f2fs_summary_block *sum = curseg->sum_blk;
1857 struct nat_entry_set *nes, *tmp;
1858 struct list_head *head = &nm_i->nat_entry_set;
1859 bool to_journal = true;
1860 1831
1861 /* merge nat entries of dirty list to nat entry set temporarily */ 1832 if (nes->entry_cnt >= max)
1862 merge_nats_in_set(sbi); 1833 goto add_out;
1863 1834
1864 /* 1835 list_for_each_entry(cur, head, set_list) {
1865 * if there are no enough space in journal to store dirty nat 1836 if (cur->entry_cnt >= nes->entry_cnt) {
1866 * entries, remove all entries from journal and merge them 1837 list_add(&nes->set_list, cur->set_list.prev);
1867 * into nat entry set. 1838 return;
1868 */ 1839 }
1869 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) {
1870 remove_nats_in_journal(sbi);
1871
1872 /*
1873 * merge nat entries of dirty list to nat entry set temporarily
1874 */
1875 merge_nats_in_set(sbi);
1876 } 1840 }
1841add_out:
1842 list_add_tail(&nes->set_list, head);
1843}
1877 1844
1878 if (!nm_i->dirty_nat_cnt) 1845static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
1879 return; 1846 struct nat_entry_set *set)
1847{
1848 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1849 struct f2fs_summary_block *sum = curseg->sum_blk;
1850 nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
1851 bool to_journal = true;
1852 struct f2fs_nat_block *nat_blk;
1853 struct nat_entry *ne, *cur;
1854 struct page *page = NULL;
1880 1855
1881 /* 1856 /*
1882 * there are two steps to flush nat entries: 1857 * there are two steps to flush nat entries:
1883 * #1, flush nat entries to journal in current hot data summary block. 1858 * #1, flush nat entries to journal in current hot data summary block.
1884 * #2, flush nat entries to nat page. 1859 * #2, flush nat entries to nat page.
1885 */ 1860 */
1886 list_for_each_entry_safe(nes, tmp, head, set_list) { 1861 if (!__has_cursum_space(sum, set->entry_cnt, NAT_JOURNAL))
1887 struct f2fs_nat_block *nat_blk; 1862 to_journal = false;
1888 struct nat_entry *ne, *cur; 1863
1889 struct page *page; 1864 if (to_journal) {
1890 nid_t start_nid = nes->start_nid; 1865 mutex_lock(&curseg->curseg_mutex);
1866 } else {
1867 page = get_next_nat_page(sbi, start_nid);
1868 nat_blk = page_address(page);
1869 f2fs_bug_on(sbi, !nat_blk);
1870 }
1891 1871
1892 if (to_journal && 1872 /* flush dirty nats in nat entry set */
1893 !__has_cursum_space(sum, nes->entry_cnt, NAT_JOURNAL)) 1873 list_for_each_entry_safe(ne, cur, &set->entry_list, list) {
1894 to_journal = false; 1874 struct f2fs_nat_entry *raw_ne;
1875 nid_t nid = nat_get_nid(ne);
1876 int offset;
1877
1878 if (nat_get_blkaddr(ne) == NEW_ADDR)
1879 continue;
1895 1880
1896 if (to_journal) { 1881 if (to_journal) {
1897 mutex_lock(&curseg->curseg_mutex); 1882 offset = lookup_journal_in_cursum(sum,
1883 NAT_JOURNAL, nid, 1);
1884 f2fs_bug_on(sbi, offset < 0);
1885 raw_ne = &nat_in_journal(sum, offset);
1886 nid_in_journal(sum, offset) = cpu_to_le32(nid);
1898 } else { 1887 } else {
1899 page = get_next_nat_page(sbi, start_nid); 1888 raw_ne = &nat_blk->entries[nid - start_nid];
1900 nat_blk = page_address(page);
1901 f2fs_bug_on(sbi, !nat_blk);
1902 } 1889 }
1890 raw_nat_from_node_info(raw_ne, &ne->ni);
1903 1891
1904 /* flush dirty nats in nat entry set */ 1892 write_lock(&NM_I(sbi)->nat_tree_lock);
1905 list_for_each_entry_safe(ne, cur, &nes->entry_list, list) { 1893 nat_reset_flag(ne);
1906 struct f2fs_nat_entry *raw_ne; 1894 __clear_nat_cache_dirty(NM_I(sbi), ne);
1907 nid_t nid = nat_get_nid(ne); 1895 write_unlock(&NM_I(sbi)->nat_tree_lock);
1908 int offset;
1909 1896
1910 if (to_journal) { 1897 if (nat_get_blkaddr(ne) == NULL_ADDR)
1911 offset = lookup_journal_in_cursum(sum, 1898 add_free_nid(sbi, nid, false);
1912 NAT_JOURNAL, nid, 1); 1899 }
1913 f2fs_bug_on(sbi, offset < 0);
1914 raw_ne = &nat_in_journal(sum, offset);
1915 nid_in_journal(sum, offset) = cpu_to_le32(nid);
1916 } else {
1917 raw_ne = &nat_blk->entries[nid - start_nid];
1918 }
1919 raw_nat_from_node_info(raw_ne, &ne->ni);
1920 1900
1921 if (nat_get_blkaddr(ne) == NULL_ADDR && 1901 if (to_journal)
1922 add_free_nid(sbi, nid, false) <= 0) { 1902 mutex_unlock(&curseg->curseg_mutex);
1923 write_lock(&nm_i->nat_tree_lock); 1903 else
1924 __del_from_nat_cache(nm_i, ne); 1904 f2fs_put_page(page, 1);
1925 write_unlock(&nm_i->nat_tree_lock);
1926 } else {
1927 write_lock(&nm_i->nat_tree_lock);
1928 nat_reset_flag(ne);
1929 __clear_nat_cache_dirty(nm_i, ne);
1930 write_unlock(&nm_i->nat_tree_lock);
1931 }
1932 }
1933 1905
1934 if (to_journal) 1906 if (!set->entry_cnt) {
1935 mutex_unlock(&curseg->curseg_mutex); 1907 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
1936 else 1908 kmem_cache_free(nat_entry_set_slab, set);
1937 f2fs_put_page(page, 1); 1909 }
1910}
1911
1912/*
1913 * This function is called during the checkpointing process.
1914 */
1915void flush_nat_entries(struct f2fs_sb_info *sbi)
1916{
1917 struct f2fs_nm_info *nm_i = NM_I(sbi);
1918 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1919 struct f2fs_summary_block *sum = curseg->sum_blk;
1920 struct nat_entry_set *setvec[NATVEC_SIZE];
1921 struct nat_entry_set *set, *tmp;
1922 unsigned int found;
1923 nid_t set_idx = 0;
1924 LIST_HEAD(sets);
1938 1925
1939 f2fs_bug_on(sbi, !list_empty(&nes->entry_list)); 1926 /*
1940 release_nat_entry_set(nes, nm_i); 1927 * if there are no enough space in journal to store dirty nat
1928 * entries, remove all entries from journal and merge them
1929 * into nat entry set.
1930 */
1931 if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
1932 remove_nats_in_journal(sbi);
1933
1934 if (!nm_i->dirty_nat_cnt)
1935 return;
1936
1937 while ((found = __gang_lookup_nat_set(nm_i,
1938 set_idx, NATVEC_SIZE, setvec))) {
1939 unsigned idx;
1940 set_idx = setvec[found - 1]->set + 1;
1941 for (idx = 0; idx < found; idx++)
1942 __adjust_nat_entry_set(setvec[idx], &sets,
1943 MAX_NAT_JENTRIES(sum));
1941 } 1944 }
1942 1945
1943 f2fs_bug_on(sbi, !list_empty(head)); 1946 /* flush dirty nats in nat entry set */
1947 list_for_each_entry_safe(set, tmp, &sets, set_list)
1948 __flush_nat_entry_set(sbi, set);
1949
1944 f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); 1950 f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
1945} 1951}
1946 1952
@@ -1968,9 +1974,8 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
1968 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); 1974 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
1969 INIT_LIST_HEAD(&nm_i->free_nid_list); 1975 INIT_LIST_HEAD(&nm_i->free_nid_list);
1970 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); 1976 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
1977 INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC);
1971 INIT_LIST_HEAD(&nm_i->nat_entries); 1978 INIT_LIST_HEAD(&nm_i->nat_entries);
1972 INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
1973 INIT_LIST_HEAD(&nm_i->nat_entry_set);
1974 1979
1975 mutex_init(&nm_i->build_lock); 1980 mutex_init(&nm_i->build_lock);
1976 spin_lock_init(&nm_i->free_nid_list_lock); 1981 spin_lock_init(&nm_i->free_nid_list_lock);