aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2013-02-22 19:32:19 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-23 20:50:09 -0500
commit9b4f98cdac9683ee9cdb28c582a81223f0c10a3f (patch)
treebca65df9e9bca718b7afc4fb15c56d104e0ba346 /mm/vmscan.c
parent9a2651140ef740b3b67ad47ea3d0af75581aacc6 (diff)
mm: vmscan: compaction works against zones, not lruvecs
The restart logic for when reclaim operates back to back with compaction is currently applied on the lruvec level. But this does not make sense, because the container of interest for compaction is a zone as a whole, not the zone pages that are part of a certain memory cgroup. Negative impact is bounded. For one, the code checks that the lruvec has enough reclaim candidates, so it does not risk getting stuck on a condition that can not be fulfilled. And the unfairness of hammering on one particular memory cgroup to make progress in a zone will be amortized by the round robin manner in which reclaim goes through the memory cgroups. Still, this can lead to unnecessary allocation latencies when the code elects to restart on a hard to reclaim or small group when there are other, more reclaimable groups in the zone. Move this logic to the zone level and restart reclaim for all memory cgroups in a zone when compaction requires more free pages from it. [akpm@linux-foundation.org: no need for min_t] Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Rik van Riel <riel@redhat.com> Acked-by: Mel Gorman <mgorman@suse.de> Reviewed-by: Michal Hocko <mhocko@suse.cz> Cc: Hugh Dickins <hughd@google.com> Cc: Satoru Moriya <satoru.moriya@hds.com> Cc: Simon Jeons <simon.jeons@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c179
1 files changed, 91 insertions, 88 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d4f37634194e..ef123356c6ab 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1822,6 +1822,58 @@ out:
1822 } 1822 }
1823} 1823}
1824 1824
1825/*
1826 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
1827 */
1828static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
1829{
1830 unsigned long nr[NR_LRU_LISTS];
1831 unsigned long nr_to_scan;
1832 enum lru_list lru;
1833 unsigned long nr_reclaimed = 0;
1834 unsigned long nr_to_reclaim = sc->nr_to_reclaim;
1835 struct blk_plug plug;
1836
1837 get_scan_count(lruvec, sc, nr);
1838
1839 blk_start_plug(&plug);
1840 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
1841 nr[LRU_INACTIVE_FILE]) {
1842 for_each_evictable_lru(lru) {
1843 if (nr[lru]) {
1844 nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX);
1845 nr[lru] -= nr_to_scan;
1846
1847 nr_reclaimed += shrink_list(lru, nr_to_scan,
1848 lruvec, sc);
1849 }
1850 }
1851 /*
1852 * On large memory systems, scan >> priority can become
1853 * really large. This is fine for the starting priority;
1854 * we want to put equal scanning pressure on each zone.
1855 * However, if the VM has a harder time of freeing pages,
1856 * with multiple processes reclaiming pages, the total
1857 * freeing target can get unreasonably large.
1858 */
1859 if (nr_reclaimed >= nr_to_reclaim &&
1860 sc->priority < DEF_PRIORITY)
1861 break;
1862 }
1863 blk_finish_plug(&plug);
1864 sc->nr_reclaimed += nr_reclaimed;
1865
1866 /*
1867 * Even if we did not try to evict anon pages at all, we want to
1868 * rebalance the anon lru active/inactive ratio.
1869 */
1870 if (inactive_anon_is_low(lruvec))
1871 shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
1872 sc, LRU_ACTIVE_ANON);
1873
1874 throttle_vm_writeout(sc->gfp_mask);
1875}
1876
1825/* Use reclaim/compaction for costly allocs or under memory pressure */ 1877/* Use reclaim/compaction for costly allocs or under memory pressure */
1826static bool in_reclaim_compaction(struct scan_control *sc) 1878static bool in_reclaim_compaction(struct scan_control *sc)
1827{ 1879{
@@ -1840,7 +1892,7 @@ static bool in_reclaim_compaction(struct scan_control *sc)
1840 * calls try_to_compact_zone() that it will have enough free pages to succeed. 1892 * calls try_to_compact_zone() that it will have enough free pages to succeed.
1841 * It will give up earlier than that if there is difficulty reclaiming pages. 1893 * It will give up earlier than that if there is difficulty reclaiming pages.
1842 */ 1894 */
1843static inline bool should_continue_reclaim(struct lruvec *lruvec, 1895static inline bool should_continue_reclaim(struct zone *zone,
1844 unsigned long nr_reclaimed, 1896 unsigned long nr_reclaimed,
1845 unsigned long nr_scanned, 1897 unsigned long nr_scanned,
1846 struct scan_control *sc) 1898 struct scan_control *sc)
@@ -1880,15 +1932,15 @@ static inline bool should_continue_reclaim(struct lruvec *lruvec,
1880 * inactive lists are large enough, continue reclaiming 1932 * inactive lists are large enough, continue reclaiming
1881 */ 1933 */
1882 pages_for_compaction = (2UL << sc->order); 1934 pages_for_compaction = (2UL << sc->order);
1883 inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE); 1935 inactive_lru_pages = zone_page_state(zone, NR_INACTIVE_FILE);
1884 if (nr_swap_pages > 0) 1936 if (nr_swap_pages > 0)
1885 inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON); 1937 inactive_lru_pages += zone_page_state(zone, NR_INACTIVE_ANON);
1886 if (sc->nr_reclaimed < pages_for_compaction && 1938 if (sc->nr_reclaimed < pages_for_compaction &&
1887 inactive_lru_pages > pages_for_compaction) 1939 inactive_lru_pages > pages_for_compaction)
1888 return true; 1940 return true;
1889 1941
1890 /* If compaction would go ahead or the allocation would succeed, stop */ 1942 /* If compaction would go ahead or the allocation would succeed, stop */
1891 switch (compaction_suitable(lruvec_zone(lruvec), sc->order)) { 1943 switch (compaction_suitable(zone, sc->order)) {
1892 case COMPACT_PARTIAL: 1944 case COMPACT_PARTIAL:
1893 case COMPACT_CONTINUE: 1945 case COMPACT_CONTINUE:
1894 return false; 1946 return false;
@@ -1897,98 +1949,49 @@ static inline bool should_continue_reclaim(struct lruvec *lruvec,
1897 } 1949 }
1898} 1950}
1899 1951
1900/* 1952static void shrink_zone(struct zone *zone, struct scan_control *sc)
1901 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
1902 */
1903static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
1904{ 1953{
1905 unsigned long nr[NR_LRU_LISTS];
1906 unsigned long nr_to_scan;
1907 enum lru_list lru;
1908 unsigned long nr_reclaimed, nr_scanned; 1954 unsigned long nr_reclaimed, nr_scanned;
1909 unsigned long nr_to_reclaim = sc->nr_to_reclaim;
1910 struct blk_plug plug;
1911
1912restart:
1913 nr_reclaimed = 0;
1914 nr_scanned = sc->nr_scanned;
1915 get_scan_count(lruvec, sc, nr);
1916
1917 blk_start_plug(&plug);
1918 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
1919 nr[LRU_INACTIVE_FILE]) {
1920 for_each_evictable_lru(lru) {
1921 if (nr[lru]) {
1922 nr_to_scan = min_t(unsigned long,
1923 nr[lru], SWAP_CLUSTER_MAX);
1924 nr[lru] -= nr_to_scan;
1925
1926 nr_reclaimed += shrink_list(lru, nr_to_scan,
1927 lruvec, sc);
1928 }
1929 }
1930 /*
1931 * On large memory systems, scan >> priority can become
1932 * really large. This is fine for the starting priority;
1933 * we want to put equal scanning pressure on each zone.
1934 * However, if the VM has a harder time of freeing pages,
1935 * with multiple processes reclaiming pages, the total
1936 * freeing target can get unreasonably large.
1937 */
1938 if (nr_reclaimed >= nr_to_reclaim &&
1939 sc->priority < DEF_PRIORITY)
1940 break;
1941 }
1942 blk_finish_plug(&plug);
1943 sc->nr_reclaimed += nr_reclaimed;
1944 1955
1945 /* 1956 do {
1946 * Even if we did not try to evict anon pages at all, we want to 1957 struct mem_cgroup *root = sc->target_mem_cgroup;
1947 * rebalance the anon lru active/inactive ratio. 1958 struct mem_cgroup_reclaim_cookie reclaim = {
1948 */ 1959 .zone = zone,
1949 if (inactive_anon_is_low(lruvec)) 1960 .priority = sc->priority,
1950 shrink_active_list(SWAP_CLUSTER_MAX, lruvec, 1961 };
1951 sc, LRU_ACTIVE_ANON); 1962 struct mem_cgroup *memcg;
1952
1953 /* reclaim/compaction might need reclaim to continue */
1954 if (should_continue_reclaim(lruvec, nr_reclaimed,
1955 sc->nr_scanned - nr_scanned, sc))
1956 goto restart;
1957 1963
1958 throttle_vm_writeout(sc->gfp_mask); 1964 nr_reclaimed = sc->nr_reclaimed;
1959} 1965 nr_scanned = sc->nr_scanned;
1960 1966
1961static void shrink_zone(struct zone *zone, struct scan_control *sc) 1967 memcg = mem_cgroup_iter(root, NULL, &reclaim);
1962{ 1968 do {
1963 struct mem_cgroup *root = sc->target_mem_cgroup; 1969 struct lruvec *lruvec;
1964 struct mem_cgroup_reclaim_cookie reclaim = {
1965 .zone = zone,
1966 .priority = sc->priority,
1967 };
1968 struct mem_cgroup *memcg;
1969 1970
1970 memcg = mem_cgroup_iter(root, NULL, &reclaim); 1971 lruvec = mem_cgroup_zone_lruvec(zone, memcg);
1971 do {
1972 struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
1973 1972
1974 shrink_lruvec(lruvec, sc); 1973 shrink_lruvec(lruvec, sc);
1975 1974
1976 /* 1975 /*
1977 * Limit reclaim has historically picked one memcg and 1976 * Limit reclaim has historically picked one
1978 * scanned it with decreasing priority levels until 1977 * memcg and scanned it with decreasing
1979 * nr_to_reclaim had been reclaimed. This priority 1978 * priority levels until nr_to_reclaim had
1980 * cycle is thus over after a single memcg. 1979 * been reclaimed. This priority cycle is
1981 * 1980 * thus over after a single memcg.
1982 * Direct reclaim and kswapd, on the other hand, have 1981 *
1983 * to scan all memory cgroups to fulfill the overall 1982 * Direct reclaim and kswapd, on the other
1984 * scan target for the zone. 1983 * hand, have to scan all memory cgroups to
1985 */ 1984 * fulfill the overall scan target for the
1986 if (!global_reclaim(sc)) { 1985 * zone.
1987 mem_cgroup_iter_break(root, memcg); 1986 */
1988 break; 1987 if (!global_reclaim(sc)) {
1989 } 1988 mem_cgroup_iter_break(root, memcg);
1990 memcg = mem_cgroup_iter(root, memcg, &reclaim); 1989 break;
1991 } while (memcg); 1990 }
1991 memcg = mem_cgroup_iter(root, memcg, &reclaim);
1992 } while (memcg);
1993 } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
1994 sc->nr_scanned - nr_scanned, sc));
1992} 1995}
1993 1996
1994/* Returns true if compaction should go ahead for a high-order request */ 1997/* Returns true if compaction should go ahead for a high-order request */