diff options
author | Jaegeuk Kim <jaegeuk@kernel.org> | 2014-09-22 14:40:48 -0400 |
---|---|---|
committer | Jaegeuk Kim <jaegeuk@kernel.org> | 2014-09-30 18:30:41 -0400 |
commit | 309cc2b6e7ae6672ff9744fe07735ed234a8994e (patch) | |
tree | 0119d3d6d0cae0913b29ba6cfd419c089b2fb004 /fs/f2fs/node.c | |
parent | 4b2fecc84655055a6a1fe9151786992ac04b56ce (diff) |
f2fs: refactor flush_nat_entries to remove costly reorganizing ops
Previously, f2fs tries to reorganize the dirty nat entries into multiple sets
according to its nid ranges. This can improve the flushing nat pages, however,
if there are a lot of cached nat entries, it becomes a bottleneck.
This patch introduces a new set management flow by removing dirty nat list and
adding a series of set operations when the nat entry becomes dirty.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Diffstat (limited to 'fs/f2fs/node.c')
-rw-r--r-- | fs/f2fs/node.c | 299 |
1 files changed, 152 insertions, 147 deletions
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 21ed91b3cb54..44b8afef43d9 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c | |||
@@ -123,6 +123,57 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) | |||
123 | kmem_cache_free(nat_entry_slab, e); | 123 | kmem_cache_free(nat_entry_slab, e); |
124 | } | 124 | } |
125 | 125 | ||
126 | static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, | ||
127 | struct nat_entry *ne) | ||
128 | { | ||
129 | nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); | ||
130 | struct nat_entry_set *head; | ||
131 | |||
132 | if (get_nat_flag(ne, IS_DIRTY)) | ||
133 | return; | ||
134 | retry: | ||
135 | head = radix_tree_lookup(&nm_i->nat_set_root, set); | ||
136 | if (!head) { | ||
137 | head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); | ||
138 | |||
139 | INIT_LIST_HEAD(&head->entry_list); | ||
140 | INIT_LIST_HEAD(&head->set_list); | ||
141 | head->set = set; | ||
142 | head->entry_cnt = 0; | ||
143 | |||
144 | if (radix_tree_insert(&nm_i->nat_set_root, set, head)) { | ||
145 | cond_resched(); | ||
146 | goto retry; | ||
147 | } | ||
148 | } | ||
149 | list_move_tail(&ne->list, &head->entry_list); | ||
150 | nm_i->dirty_nat_cnt++; | ||
151 | head->entry_cnt++; | ||
152 | set_nat_flag(ne, IS_DIRTY, true); | ||
153 | } | ||
154 | |||
155 | static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, | ||
156 | struct nat_entry *ne) | ||
157 | { | ||
158 | nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK; | ||
159 | struct nat_entry_set *head; | ||
160 | |||
161 | head = radix_tree_lookup(&nm_i->nat_set_root, set); | ||
162 | if (head) { | ||
163 | list_move_tail(&ne->list, &nm_i->nat_entries); | ||
164 | set_nat_flag(ne, IS_DIRTY, false); | ||
165 | head->entry_cnt--; | ||
166 | nm_i->dirty_nat_cnt--; | ||
167 | } | ||
168 | } | ||
169 | |||
170 | static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, | ||
171 | nid_t start, unsigned int nr, struct nat_entry_set **ep) | ||
172 | { | ||
173 | return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep, | ||
174 | start, nr); | ||
175 | } | ||
176 | |||
126 | bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) | 177 | bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) |
127 | { | 178 | { |
128 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 179 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
@@ -1739,79 +1790,6 @@ skip: | |||
1739 | return err; | 1790 | return err; |
1740 | } | 1791 | } |
1741 | 1792 | ||
1742 | static struct nat_entry_set *grab_nat_entry_set(void) | ||
1743 | { | ||
1744 | struct nat_entry_set *nes = | ||
1745 | f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); | ||
1746 | |||
1747 | nes->entry_cnt = 0; | ||
1748 | INIT_LIST_HEAD(&nes->set_list); | ||
1749 | INIT_LIST_HEAD(&nes->entry_list); | ||
1750 | return nes; | ||
1751 | } | ||
1752 | |||
1753 | static void release_nat_entry_set(struct nat_entry_set *nes, | ||
1754 | struct f2fs_nm_info *nm_i) | ||
1755 | { | ||
1756 | nm_i->dirty_nat_cnt -= nes->entry_cnt; | ||
1757 | list_del(&nes->set_list); | ||
1758 | kmem_cache_free(nat_entry_set_slab, nes); | ||
1759 | } | ||
1760 | |||
1761 | static void adjust_nat_entry_set(struct nat_entry_set *nes, | ||
1762 | struct list_head *head) | ||
1763 | { | ||
1764 | struct nat_entry_set *next = nes; | ||
1765 | |||
1766 | if (list_is_last(&nes->set_list, head)) | ||
1767 | return; | ||
1768 | |||
1769 | list_for_each_entry_continue(next, head, set_list) | ||
1770 | if (nes->entry_cnt <= next->entry_cnt) | ||
1771 | break; | ||
1772 | |||
1773 | list_move_tail(&nes->set_list, &next->set_list); | ||
1774 | } | ||
1775 | |||
1776 | static void add_nat_entry(struct nat_entry *ne, struct list_head *head) | ||
1777 | { | ||
1778 | struct nat_entry_set *nes; | ||
1779 | nid_t start_nid = START_NID(ne->ni.nid); | ||
1780 | |||
1781 | list_for_each_entry(nes, head, set_list) { | ||
1782 | if (nes->start_nid == start_nid) { | ||
1783 | list_move_tail(&ne->list, &nes->entry_list); | ||
1784 | nes->entry_cnt++; | ||
1785 | adjust_nat_entry_set(nes, head); | ||
1786 | return; | ||
1787 | } | ||
1788 | } | ||
1789 | |||
1790 | nes = grab_nat_entry_set(); | ||
1791 | |||
1792 | nes->start_nid = start_nid; | ||
1793 | list_move_tail(&ne->list, &nes->entry_list); | ||
1794 | nes->entry_cnt++; | ||
1795 | list_add(&nes->set_list, head); | ||
1796 | } | ||
1797 | |||
1798 | static void merge_nats_in_set(struct f2fs_sb_info *sbi) | ||
1799 | { | ||
1800 | struct f2fs_nm_info *nm_i = NM_I(sbi); | ||
1801 | struct list_head *dirty_list = &nm_i->dirty_nat_entries; | ||
1802 | struct list_head *set_list = &nm_i->nat_entry_set; | ||
1803 | struct nat_entry *ne, *tmp; | ||
1804 | |||
1805 | write_lock(&nm_i->nat_tree_lock); | ||
1806 | list_for_each_entry_safe(ne, tmp, dirty_list, list) { | ||
1807 | if (nat_get_blkaddr(ne) == NEW_ADDR) | ||
1808 | continue; | ||
1809 | add_nat_entry(ne, set_list); | ||
1810 | nm_i->dirty_nat_cnt++; | ||
1811 | } | ||
1812 | write_unlock(&nm_i->nat_tree_lock); | ||
1813 | } | ||
1814 | |||
1815 | static void remove_nats_in_journal(struct f2fs_sb_info *sbi) | 1793 | static void remove_nats_in_journal(struct f2fs_sb_info *sbi) |
1816 | { | 1794 | { |
1817 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 1795 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
@@ -1846,101 +1824,129 @@ found: | |||
1846 | mutex_unlock(&curseg->curseg_mutex); | 1824 | mutex_unlock(&curseg->curseg_mutex); |
1847 | } | 1825 | } |
1848 | 1826 | ||
1849 | /* | 1827 | static void __adjust_nat_entry_set(struct nat_entry_set *nes, |
1850 | * This function is called during the checkpointing process. | 1828 | struct list_head *head, int max) |
1851 | */ | ||
1852 | void flush_nat_entries(struct f2fs_sb_info *sbi) | ||
1853 | { | 1829 | { |
1854 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 1830 | struct nat_entry_set *cur; |
1855 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); | ||
1856 | struct f2fs_summary_block *sum = curseg->sum_blk; | ||
1857 | struct nat_entry_set *nes, *tmp; | ||
1858 | struct list_head *head = &nm_i->nat_entry_set; | ||
1859 | bool to_journal = true; | ||
1860 | 1831 | ||
1861 | /* merge nat entries of dirty list to nat entry set temporarily */ | 1832 | if (nes->entry_cnt >= max) |
1862 | merge_nats_in_set(sbi); | 1833 | goto add_out; |
1863 | 1834 | ||
1864 | /* | 1835 | list_for_each_entry(cur, head, set_list) { |
1865 | * if there are no enough space in journal to store dirty nat | 1836 | if (cur->entry_cnt >= nes->entry_cnt) { |
1866 | * entries, remove all entries from journal and merge them | 1837 | list_add(&nes->set_list, cur->set_list.prev); |
1867 | * into nat entry set. | 1838 | return; |
1868 | */ | 1839 | } |
1869 | if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) { | ||
1870 | remove_nats_in_journal(sbi); | ||
1871 | |||
1872 | /* | ||
1873 | * merge nat entries of dirty list to nat entry set temporarily | ||
1874 | */ | ||
1875 | merge_nats_in_set(sbi); | ||
1876 | } | 1840 | } |
1841 | add_out: | ||
1842 | list_add_tail(&nes->set_list, head); | ||
1843 | } | ||
1877 | 1844 | ||
1878 | if (!nm_i->dirty_nat_cnt) | 1845 | static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, |
1879 | return; | 1846 | struct nat_entry_set *set) |
1847 | { | ||
1848 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); | ||
1849 | struct f2fs_summary_block *sum = curseg->sum_blk; | ||
1850 | nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK; | ||
1851 | bool to_journal = true; | ||
1852 | struct f2fs_nat_block *nat_blk; | ||
1853 | struct nat_entry *ne, *cur; | ||
1854 | struct page *page = NULL; | ||
1880 | 1855 | ||
1881 | /* | 1856 | /* |
1882 | * there are two steps to flush nat entries: | 1857 | * there are two steps to flush nat entries: |
1883 | * #1, flush nat entries to journal in current hot data summary block. | 1858 | * #1, flush nat entries to journal in current hot data summary block. |
1884 | * #2, flush nat entries to nat page. | 1859 | * #2, flush nat entries to nat page. |
1885 | */ | 1860 | */ |
1886 | list_for_each_entry_safe(nes, tmp, head, set_list) { | 1861 | if (!__has_cursum_space(sum, set->entry_cnt, NAT_JOURNAL)) |
1887 | struct f2fs_nat_block *nat_blk; | 1862 | to_journal = false; |
1888 | struct nat_entry *ne, *cur; | 1863 | |
1889 | struct page *page; | 1864 | if (to_journal) { |
1890 | nid_t start_nid = nes->start_nid; | 1865 | mutex_lock(&curseg->curseg_mutex); |
1866 | } else { | ||
1867 | page = get_next_nat_page(sbi, start_nid); | ||
1868 | nat_blk = page_address(page); | ||
1869 | f2fs_bug_on(sbi, !nat_blk); | ||
1870 | } | ||
1891 | 1871 | ||
1892 | if (to_journal && | 1872 | /* flush dirty nats in nat entry set */ |
1893 | !__has_cursum_space(sum, nes->entry_cnt, NAT_JOURNAL)) | 1873 | list_for_each_entry_safe(ne, cur, &set->entry_list, list) { |
1894 | to_journal = false; | 1874 | struct f2fs_nat_entry *raw_ne; |
1875 | nid_t nid = nat_get_nid(ne); | ||
1876 | int offset; | ||
1877 | |||
1878 | if (nat_get_blkaddr(ne) == NEW_ADDR) | ||
1879 | continue; | ||
1895 | 1880 | ||
1896 | if (to_journal) { | 1881 | if (to_journal) { |
1897 | mutex_lock(&curseg->curseg_mutex); | 1882 | offset = lookup_journal_in_cursum(sum, |
1883 | NAT_JOURNAL, nid, 1); | ||
1884 | f2fs_bug_on(sbi, offset < 0); | ||
1885 | raw_ne = &nat_in_journal(sum, offset); | ||
1886 | nid_in_journal(sum, offset) = cpu_to_le32(nid); | ||
1898 | } else { | 1887 | } else { |
1899 | page = get_next_nat_page(sbi, start_nid); | 1888 | raw_ne = &nat_blk->entries[nid - start_nid]; |
1900 | nat_blk = page_address(page); | ||
1901 | f2fs_bug_on(sbi, !nat_blk); | ||
1902 | } | 1889 | } |
1890 | raw_nat_from_node_info(raw_ne, &ne->ni); | ||
1903 | 1891 | ||
1904 | /* flush dirty nats in nat entry set */ | 1892 | write_lock(&NM_I(sbi)->nat_tree_lock); |
1905 | list_for_each_entry_safe(ne, cur, &nes->entry_list, list) { | 1893 | nat_reset_flag(ne); |
1906 | struct f2fs_nat_entry *raw_ne; | 1894 | __clear_nat_cache_dirty(NM_I(sbi), ne); |
1907 | nid_t nid = nat_get_nid(ne); | 1895 | write_unlock(&NM_I(sbi)->nat_tree_lock); |
1908 | int offset; | ||
1909 | 1896 | ||
1910 | if (to_journal) { | 1897 | if (nat_get_blkaddr(ne) == NULL_ADDR) |
1911 | offset = lookup_journal_in_cursum(sum, | 1898 | add_free_nid(sbi, nid, false); |
1912 | NAT_JOURNAL, nid, 1); | 1899 | } |
1913 | f2fs_bug_on(sbi, offset < 0); | ||
1914 | raw_ne = &nat_in_journal(sum, offset); | ||
1915 | nid_in_journal(sum, offset) = cpu_to_le32(nid); | ||
1916 | } else { | ||
1917 | raw_ne = &nat_blk->entries[nid - start_nid]; | ||
1918 | } | ||
1919 | raw_nat_from_node_info(raw_ne, &ne->ni); | ||
1920 | 1900 | ||
1921 | if (nat_get_blkaddr(ne) == NULL_ADDR && | 1901 | if (to_journal) |
1922 | add_free_nid(sbi, nid, false) <= 0) { | 1902 | mutex_unlock(&curseg->curseg_mutex); |
1923 | write_lock(&nm_i->nat_tree_lock); | 1903 | else |
1924 | __del_from_nat_cache(nm_i, ne); | 1904 | f2fs_put_page(page, 1); |
1925 | write_unlock(&nm_i->nat_tree_lock); | ||
1926 | } else { | ||
1927 | write_lock(&nm_i->nat_tree_lock); | ||
1928 | nat_reset_flag(ne); | ||
1929 | __clear_nat_cache_dirty(nm_i, ne); | ||
1930 | write_unlock(&nm_i->nat_tree_lock); | ||
1931 | } | ||
1932 | } | ||
1933 | 1905 | ||
1934 | if (to_journal) | 1906 | if (!set->entry_cnt) { |
1935 | mutex_unlock(&curseg->curseg_mutex); | 1907 | radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); |
1936 | else | 1908 | kmem_cache_free(nat_entry_set_slab, set); |
1937 | f2fs_put_page(page, 1); | 1909 | } |
1910 | } | ||
1911 | |||
1912 | /* | ||
1913 | * This function is called during the checkpointing process. | ||
1914 | */ | ||
1915 | void flush_nat_entries(struct f2fs_sb_info *sbi) | ||
1916 | { | ||
1917 | struct f2fs_nm_info *nm_i = NM_I(sbi); | ||
1918 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); | ||
1919 | struct f2fs_summary_block *sum = curseg->sum_blk; | ||
1920 | struct nat_entry_set *setvec[NATVEC_SIZE]; | ||
1921 | struct nat_entry_set *set, *tmp; | ||
1922 | unsigned int found; | ||
1923 | nid_t set_idx = 0; | ||
1924 | LIST_HEAD(sets); | ||
1938 | 1925 | ||
1939 | f2fs_bug_on(sbi, !list_empty(&nes->entry_list)); | 1926 | /* |
1940 | release_nat_entry_set(nes, nm_i); | 1927 | * if there are no enough space in journal to store dirty nat |
1928 | * entries, remove all entries from journal and merge them | ||
1929 | * into nat entry set. | ||
1930 | */ | ||
1931 | if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) | ||
1932 | remove_nats_in_journal(sbi); | ||
1933 | |||
1934 | if (!nm_i->dirty_nat_cnt) | ||
1935 | return; | ||
1936 | |||
1937 | while ((found = __gang_lookup_nat_set(nm_i, | ||
1938 | set_idx, NATVEC_SIZE, setvec))) { | ||
1939 | unsigned idx; | ||
1940 | set_idx = setvec[found - 1]->set + 1; | ||
1941 | for (idx = 0; idx < found; idx++) | ||
1942 | __adjust_nat_entry_set(setvec[idx], &sets, | ||
1943 | MAX_NAT_JENTRIES(sum)); | ||
1941 | } | 1944 | } |
1942 | 1945 | ||
1943 | f2fs_bug_on(sbi, !list_empty(head)); | 1946 | /* flush dirty nats in nat entry set */ |
1947 | list_for_each_entry_safe(set, tmp, &sets, set_list) | ||
1948 | __flush_nat_entry_set(sbi, set); | ||
1949 | |||
1944 | f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); | 1950 | f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); |
1945 | } | 1951 | } |
1946 | 1952 | ||
@@ -1968,9 +1974,8 @@ static int init_node_manager(struct f2fs_sb_info *sbi) | |||
1968 | INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); | 1974 | INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); |
1969 | INIT_LIST_HEAD(&nm_i->free_nid_list); | 1975 | INIT_LIST_HEAD(&nm_i->free_nid_list); |
1970 | INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); | 1976 | INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); |
1977 | INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC); | ||
1971 | INIT_LIST_HEAD(&nm_i->nat_entries); | 1978 | INIT_LIST_HEAD(&nm_i->nat_entries); |
1972 | INIT_LIST_HEAD(&nm_i->dirty_nat_entries); | ||
1973 | INIT_LIST_HEAD(&nm_i->nat_entry_set); | ||
1974 | 1979 | ||
1975 | mutex_init(&nm_i->build_lock); | 1980 | mutex_init(&nm_i->build_lock); |
1976 | spin_lock_init(&nm_i->free_nid_list_lock); | 1981 | spin_lock_init(&nm_i->free_nid_list_lock); |