aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorRik van Riel <riel@redhat.com>2012-05-29 18:06:18 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-29 19:22:19 -0400
commite709ffd6169ccd259eb5874e853303e91e94e829 (patch)
tree796b56c2507b8581492da73e354d651c9dd7076b /mm
parentedad9d2c337d43278a9d5aeb0ed531c2e838f8a6 (diff)
mm: remove swap token code
The swap token code no longer fits in with the current VM model. It does not play well with cgroups or the better NUMA placement code in development, since we have only one swap token globally. It also has the potential to mess with scalability of the system, by increasing the number of non-reclaimable pages on the active and inactive anon LRU lists. Last but not least, the swap token code has been broken for a year without complaints, as reported by Konstantin Khlebnikov. This suggests we no longer have much use for it. The days of sub-1G memory systems with heavy use of swap are over. If we ever need thrashing reducing code in the future, we will have to implement something that does scale. Signed-off-by: Rik van Riel <riel@redhat.com> Cc: Konstantin Khlebnikov <khlebnikov@openvz.org> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Hugh Dickins <hughd@google.com> Acked-by: Bob Picco <bpicco@meloft.net> Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile2
-rw-r--r--mm/memcontrol.c1
-rw-r--r--mm/memory.c2
-rw-r--r--mm/rmap.c6
-rw-r--r--mm/thrash.c155
-rw-r--r--mm/vmscan.c6
6 files changed, 2 insertions, 170 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 8aada89efbbb..ccecbf9818f5 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -25,7 +25,7 @@ endif
25obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o 25obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
26 26
27obj-$(CONFIG_BOUNCE) += bounce.o 27obj-$(CONFIG_BOUNCE) += bounce.o
28obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o 28obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o
29obj-$(CONFIG_HAS_DMA) += dmapool.o 29obj-$(CONFIG_HAS_DMA) += dmapool.o
30obj-$(CONFIG_HUGETLBFS) += hugetlb.o 30obj-$(CONFIG_HUGETLBFS) += hugetlb.o
31obj-$(CONFIG_NUMA) += mempolicy.o 31obj-$(CONFIG_NUMA) += mempolicy.o
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f342778a0c0a..92675fe8a2ef 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5598,7 +5598,6 @@ static void mem_cgroup_move_task(struct cgroup *cont,
5598 if (mm) { 5598 if (mm) {
5599 if (mc.to) 5599 if (mc.to)
5600 mem_cgroup_move_charge(mm); 5600 mem_cgroup_move_charge(mm);
5601 put_swap_token(mm);
5602 mmput(mm); 5601 mmput(mm);
5603 } 5602 }
5604 if (mc.to) 5603 if (mc.to)
diff --git a/mm/memory.c b/mm/memory.c
index e40f6759ba98..2bf9e110437c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2908,7 +2908,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2908 delayacct_set_flag(DELAYACCT_PF_SWAPIN); 2908 delayacct_set_flag(DELAYACCT_PF_SWAPIN);
2909 page = lookup_swap_cache(entry); 2909 page = lookup_swap_cache(entry);
2910 if (!page) { 2910 if (!page) {
2911 grab_swap_token(mm); /* Contend for token _before_ read-in */
2912 page = swapin_readahead(entry, 2911 page = swapin_readahead(entry,
2913 GFP_HIGHUSER_MOVABLE, vma, address); 2912 GFP_HIGHUSER_MOVABLE, vma, address);
2914 if (!page) { 2913 if (!page) {
@@ -2938,6 +2937,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2938 } 2937 }
2939 2938
2940 locked = lock_page_or_retry(page, mm, flags); 2939 locked = lock_page_or_retry(page, mm, flags);
2940
2941 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 2941 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2942 if (!locked) { 2942 if (!locked) {
2943 ret |= VM_FAULT_RETRY; 2943 ret |= VM_FAULT_RETRY;
diff --git a/mm/rmap.c b/mm/rmap.c
index 5b5ad584ffb7..0f3b7cda2a24 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -755,12 +755,6 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
755 pte_unmap_unlock(pte, ptl); 755 pte_unmap_unlock(pte, ptl);
756 } 756 }
757 757
758 /* Pretend the page is referenced if the task has the
759 swap token and is in the middle of a page fault. */
760 if (mm != current->mm && has_swap_token(mm) &&
761 rwsem_is_locked(&mm->mmap_sem))
762 referenced++;
763
764 (*mapcount)--; 758 (*mapcount)--;
765 759
766 if (referenced) 760 if (referenced)
diff --git a/mm/thrash.c b/mm/thrash.c
deleted file mode 100644
index 57ad495dbd54..000000000000
--- a/mm/thrash.c
+++ /dev/null
@@ -1,155 +0,0 @@
1/*
2 * mm/thrash.c
3 *
4 * Copyright (C) 2004, Red Hat, Inc.
5 * Copyright (C) 2004, Rik van Riel <riel@redhat.com>
6 * Released under the GPL, see the file COPYING for details.
7 *
8 * Simple token based thrashing protection, using the algorithm
9 * described in: http://www.cse.ohio-state.edu/hpcs/WWW/HTML/publications/abs05-1.html
10 *
11 * Sep 2006, Ashwin Chaugule <ashwin.chaugule@celunite.com>
12 * Improved algorithm to pass token:
13 * Each task has a priority which is incremented if it contended
14 * for the token in an interval less than its previous attempt.
15 * If the token is acquired, that task's priority is boosted to prevent
16 * the token from bouncing around too often and to let the task make
17 * some progress in its execution.
18 */
19
20#include <linux/jiffies.h>
21#include <linux/mm.h>
22#include <linux/sched.h>
23#include <linux/swap.h>
24#include <linux/memcontrol.h>
25
26#include <trace/events/vmscan.h>
27
28#define TOKEN_AGING_INTERVAL (0xFF)
29
30static DEFINE_SPINLOCK(swap_token_lock);
31struct mm_struct *swap_token_mm;
32static struct mem_cgroup *swap_token_memcg;
33
34#ifdef CONFIG_CGROUP_MEM_RES_CTLR
35static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm)
36{
37 struct mem_cgroup *memcg;
38
39 memcg = try_get_mem_cgroup_from_mm(mm);
40 if (memcg)
41 css_put(mem_cgroup_css(memcg));
42
43 return memcg;
44}
45#else
46static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm)
47{
48 return NULL;
49}
50#endif
51
52void grab_swap_token(struct mm_struct *mm)
53{
54 int current_interval;
55 unsigned int old_prio = mm->token_priority;
56 static unsigned int global_faults;
57 static unsigned int last_aging;
58
59 global_faults++;
60
61 current_interval = global_faults - mm->faultstamp;
62
63 if (!spin_trylock(&swap_token_lock))
64 return;
65
66 /* First come first served */
67 if (!swap_token_mm)
68 goto replace_token;
69
70 /*
71 * Usually, we don't need priority aging because long interval faults
72 * makes priority decrease quickly. But there is one exception. If the
73 * token owner task is sleeping, it never make long interval faults.
74 * Thus, we need a priority aging mechanism instead. The requirements
75 * of priority aging are
76 * 1) An aging interval is reasonable enough long. Too short aging
77 * interval makes quick swap token lost and decrease performance.
78 * 2) The swap token owner task have to get priority aging even if
79 * it's under sleep.
80 */
81 if ((global_faults - last_aging) > TOKEN_AGING_INTERVAL) {
82 swap_token_mm->token_priority /= 2;
83 last_aging = global_faults;
84 }
85
86 if (mm == swap_token_mm) {
87 mm->token_priority += 2;
88 goto update_priority;
89 }
90
91 if (current_interval < mm->last_interval)
92 mm->token_priority++;
93 else {
94 if (likely(mm->token_priority > 0))
95 mm->token_priority--;
96 }
97
98 /* Check if we deserve the token */
99 if (mm->token_priority > swap_token_mm->token_priority)
100 goto replace_token;
101
102update_priority:
103 trace_update_swap_token_priority(mm, old_prio, swap_token_mm);
104
105out:
106 mm->faultstamp = global_faults;
107 mm->last_interval = current_interval;
108 spin_unlock(&swap_token_lock);
109 return;
110
111replace_token:
112 mm->token_priority += 2;
113 trace_replace_swap_token(swap_token_mm, mm);
114 swap_token_mm = mm;
115 swap_token_memcg = swap_token_memcg_from_mm(mm);
116 last_aging = global_faults;
117 goto out;
118}
119
120/* Called on process exit. */
121void __put_swap_token(struct mm_struct *mm)
122{
123 spin_lock(&swap_token_lock);
124 if (likely(mm == swap_token_mm)) {
125 trace_put_swap_token(swap_token_mm);
126 swap_token_mm = NULL;
127 swap_token_memcg = NULL;
128 }
129 spin_unlock(&swap_token_lock);
130}
131
132static bool match_memcg(struct mem_cgroup *a, struct mem_cgroup *b)
133{
134 if (!a)
135 return true;
136 if (!b)
137 return true;
138 if (a == b)
139 return true;
140 return false;
141}
142
143void disable_swap_token(struct mem_cgroup *memcg)
144{
145 /* memcg reclaim don't disable unrelated mm token. */
146 if (match_memcg(memcg, swap_token_memcg)) {
147 spin_lock(&swap_token_lock);
148 if (match_memcg(memcg, swap_token_memcg)) {
149 trace_disable_swap_token(swap_token_mm);
150 swap_token_mm = NULL;
151 swap_token_memcg = NULL;
152 }
153 spin_unlock(&swap_token_lock);
154 }
155}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 33dc256033b5..ca46080bb074 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2352,8 +2352,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2352 2352
2353 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 2353 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
2354 sc->nr_scanned = 0; 2354 sc->nr_scanned = 0;
2355 if (!priority)
2356 disable_swap_token(sc->target_mem_cgroup);
2357 aborted_reclaim = shrink_zones(priority, zonelist, sc); 2355 aborted_reclaim = shrink_zones(priority, zonelist, sc);
2358 2356
2359 /* 2357 /*
@@ -2704,10 +2702,6 @@ loop_again:
2704 unsigned long lru_pages = 0; 2702 unsigned long lru_pages = 0;
2705 int has_under_min_watermark_zone = 0; 2703 int has_under_min_watermark_zone = 0;
2706 2704
2707 /* The swap token gets in the way of swapout... */
2708 if (!priority)
2709 disable_swap_token(NULL);
2710
2711 all_zones_ok = 1; 2705 all_zones_ok = 1;
2712 balanced = 0; 2706 balanced = 0;
2713 2707