aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRik van Riel <riel@redhat.com>2012-05-29 18:06:18 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-29 19:22:19 -0400
commite709ffd6169ccd259eb5874e853303e91e94e829 (patch)
tree796b56c2507b8581492da73e354d651c9dd7076b
parentedad9d2c337d43278a9d5aeb0ed531c2e838f8a6 (diff)
mm: remove swap token code
The swap token code no longer fits in with the current VM model. It does not play well with cgroups or the better NUMA placement code in development, since we have only one swap token globally. It also has the potential to mess with scalability of the system, by increasing the number of non-reclaimable pages on the active and inactive anon LRU lists. Last but not least, the swap token code has been broken for a year without complaints, as reported by Konstantin Khlebnikov. This suggests we no longer have much use for it. The days of sub-1G memory systems with heavy use of swap are over. If we ever need thrashing reducing code in the future, we will have to implement something that does scale. Signed-off-by: Rik van Riel <riel@redhat.com> Cc: Konstantin Khlebnikov <khlebnikov@openvz.org> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Hugh Dickins <hughd@google.com> Acked-by: Bob Picco <bpicco@meloft.net> Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/mm_types.h11
-rw-r--r--include/linux/swap.h35
-rw-r--r--include/trace/events/vmscan.h82
-rw-r--r--kernel/fork.c9
-rw-r--r--mm/Makefile2
-rw-r--r--mm/memcontrol.c1
-rw-r--r--mm/memory.c2
-rw-r--r--mm/rmap.c6
-rw-r--r--mm/thrash.c155
-rw-r--r--mm/vmscan.c6
10 files changed, 2 insertions, 307 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 26574c726121..dad95bdd06d7 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -345,17 +345,6 @@ struct mm_struct {
345 /* Architecture-specific MM context */ 345 /* Architecture-specific MM context */
346 mm_context_t context; 346 mm_context_t context;
347 347
348 /* Swap token stuff */
349 /*
350 * Last value of global fault stamp as seen by this process.
351 * In other words, this value gives an indication of how long
352 * it has been since this task got the token.
353 * Look at mm/thrash.c
354 */
355 unsigned int faultstamp;
356 unsigned int token_priority;
357 unsigned int last_interval;
358
359 unsigned long flags; /* Must use atomic bitops to access the bits */ 348 unsigned long flags; /* Must use atomic bitops to access the bits */
360 349
361 struct core_state *core_state; /* coredumping support */ 350 struct core_state *core_state; /* coredumping support */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index b1fd5c7925fe..bc3073ce95cc 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -355,23 +355,6 @@ extern int reuse_swap_page(struct page *);
355extern int try_to_free_swap(struct page *); 355extern int try_to_free_swap(struct page *);
356struct backing_dev_info; 356struct backing_dev_info;
357 357
358/* linux/mm/thrash.c */
359extern struct mm_struct *swap_token_mm;
360extern void grab_swap_token(struct mm_struct *);
361extern void __put_swap_token(struct mm_struct *);
362extern void disable_swap_token(struct mem_cgroup *memcg);
363
364static inline int has_swap_token(struct mm_struct *mm)
365{
366 return (mm == swap_token_mm);
367}
368
369static inline void put_swap_token(struct mm_struct *mm)
370{
371 if (has_swap_token(mm))
372 __put_swap_token(mm);
373}
374
375#ifdef CONFIG_CGROUP_MEM_RES_CTLR 358#ifdef CONFIG_CGROUP_MEM_RES_CTLR
376extern void 359extern void
377mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout); 360mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout);
@@ -476,24 +459,6 @@ static inline swp_entry_t get_swap_page(void)
476 return entry; 459 return entry;
477} 460}
478 461
479/* linux/mm/thrash.c */
480static inline void put_swap_token(struct mm_struct *mm)
481{
482}
483
484static inline void grab_swap_token(struct mm_struct *mm)
485{
486}
487
488static inline int has_swap_token(struct mm_struct *mm)
489{
490 return 0;
491}
492
493static inline void disable_swap_token(struct mem_cgroup *memcg)
494{
495}
496
497static inline void 462static inline void
498mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) 463mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
499{ 464{
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index f64560e204bc..572195459d58 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -395,88 +395,6 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
395 show_reclaim_flags(__entry->reclaim_flags)) 395 show_reclaim_flags(__entry->reclaim_flags))
396); 396);
397 397
398TRACE_EVENT(replace_swap_token,
399 TP_PROTO(struct mm_struct *old_mm,
400 struct mm_struct *new_mm),
401
402 TP_ARGS(old_mm, new_mm),
403
404 TP_STRUCT__entry(
405 __field(struct mm_struct*, old_mm)
406 __field(unsigned int, old_prio)
407 __field(struct mm_struct*, new_mm)
408 __field(unsigned int, new_prio)
409 ),
410
411 TP_fast_assign(
412 __entry->old_mm = old_mm;
413 __entry->old_prio = old_mm ? old_mm->token_priority : 0;
414 __entry->new_mm = new_mm;
415 __entry->new_prio = new_mm->token_priority;
416 ),
417
418 TP_printk("old_token_mm=%p old_prio=%u new_token_mm=%p new_prio=%u",
419 __entry->old_mm, __entry->old_prio,
420 __entry->new_mm, __entry->new_prio)
421);
422
423DECLARE_EVENT_CLASS(put_swap_token_template,
424 TP_PROTO(struct mm_struct *swap_token_mm),
425
426 TP_ARGS(swap_token_mm),
427
428 TP_STRUCT__entry(
429 __field(struct mm_struct*, swap_token_mm)
430 ),
431
432 TP_fast_assign(
433 __entry->swap_token_mm = swap_token_mm;
434 ),
435
436 TP_printk("token_mm=%p", __entry->swap_token_mm)
437);
438
439DEFINE_EVENT(put_swap_token_template, put_swap_token,
440 TP_PROTO(struct mm_struct *swap_token_mm),
441 TP_ARGS(swap_token_mm)
442);
443
444DEFINE_EVENT_CONDITION(put_swap_token_template, disable_swap_token,
445 TP_PROTO(struct mm_struct *swap_token_mm),
446 TP_ARGS(swap_token_mm),
447 TP_CONDITION(swap_token_mm != NULL)
448);
449
450TRACE_EVENT_CONDITION(update_swap_token_priority,
451 TP_PROTO(struct mm_struct *mm,
452 unsigned int old_prio,
453 struct mm_struct *swap_token_mm),
454
455 TP_ARGS(mm, old_prio, swap_token_mm),
456
457 TP_CONDITION(mm->token_priority != old_prio),
458
459 TP_STRUCT__entry(
460 __field(struct mm_struct*, mm)
461 __field(unsigned int, old_prio)
462 __field(unsigned int, new_prio)
463 __field(struct mm_struct*, swap_token_mm)
464 __field(unsigned int, swap_token_prio)
465 ),
466
467 TP_fast_assign(
468 __entry->mm = mm;
469 __entry->old_prio = old_prio;
470 __entry->new_prio = mm->token_priority;
471 __entry->swap_token_mm = swap_token_mm;
472 __entry->swap_token_prio = swap_token_mm ? swap_token_mm->token_priority : 0;
473 ),
474
475 TP_printk("mm=%p old_prio=%u new_prio=%u swap_token_mm=%p token_prio=%u",
476 __entry->mm, __entry->old_prio, __entry->new_prio,
477 __entry->swap_token_mm, __entry->swap_token_prio)
478);
479
480#endif /* _TRACE_VMSCAN_H */ 398#endif /* _TRACE_VMSCAN_H */
481 399
482/* This part must be outside protection */ 400/* This part must be outside protection */
diff --git a/kernel/fork.c b/kernel/fork.c
index 47b4e4f379f9..5b13eea2e757 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -614,7 +614,6 @@ void mmput(struct mm_struct *mm)
614 list_del(&mm->mmlist); 614 list_del(&mm->mmlist);
615 spin_unlock(&mmlist_lock); 615 spin_unlock(&mmlist_lock);
616 } 616 }
617 put_swap_token(mm);
618 if (mm->binfmt) 617 if (mm->binfmt)
619 module_put(mm->binfmt->module); 618 module_put(mm->binfmt->module);
620 mmdrop(mm); 619 mmdrop(mm);
@@ -831,10 +830,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
831 memcpy(mm, oldmm, sizeof(*mm)); 830 memcpy(mm, oldmm, sizeof(*mm));
832 mm_init_cpumask(mm); 831 mm_init_cpumask(mm);
833 832
834 /* Initializing for Swap token stuff */
835 mm->token_priority = 0;
836 mm->last_interval = 0;
837
838#ifdef CONFIG_TRANSPARENT_HUGEPAGE 833#ifdef CONFIG_TRANSPARENT_HUGEPAGE
839 mm->pmd_huge_pte = NULL; 834 mm->pmd_huge_pte = NULL;
840#endif 835#endif
@@ -913,10 +908,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
913 goto fail_nomem; 908 goto fail_nomem;
914 909
915good_mm: 910good_mm:
916 /* Initializing for Swap token stuff */
917 mm->token_priority = 0;
918 mm->last_interval = 0;
919
920 tsk->mm = mm; 911 tsk->mm = mm;
921 tsk->active_mm = mm; 912 tsk->active_mm = mm;
922 return 0; 913 return 0;
diff --git a/mm/Makefile b/mm/Makefile
index 8aada89efbbb..ccecbf9818f5 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -25,7 +25,7 @@ endif
25obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o 25obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
26 26
27obj-$(CONFIG_BOUNCE) += bounce.o 27obj-$(CONFIG_BOUNCE) += bounce.o
28obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o 28obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o
29obj-$(CONFIG_HAS_DMA) += dmapool.o 29obj-$(CONFIG_HAS_DMA) += dmapool.o
30obj-$(CONFIG_HUGETLBFS) += hugetlb.o 30obj-$(CONFIG_HUGETLBFS) += hugetlb.o
31obj-$(CONFIG_NUMA) += mempolicy.o 31obj-$(CONFIG_NUMA) += mempolicy.o
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f342778a0c0a..92675fe8a2ef 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5598,7 +5598,6 @@ static void mem_cgroup_move_task(struct cgroup *cont,
5598 if (mm) { 5598 if (mm) {
5599 if (mc.to) 5599 if (mc.to)
5600 mem_cgroup_move_charge(mm); 5600 mem_cgroup_move_charge(mm);
5601 put_swap_token(mm);
5602 mmput(mm); 5601 mmput(mm);
5603 } 5602 }
5604 if (mc.to) 5603 if (mc.to)
diff --git a/mm/memory.c b/mm/memory.c
index e40f6759ba98..2bf9e110437c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2908,7 +2908,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2908 delayacct_set_flag(DELAYACCT_PF_SWAPIN); 2908 delayacct_set_flag(DELAYACCT_PF_SWAPIN);
2909 page = lookup_swap_cache(entry); 2909 page = lookup_swap_cache(entry);
2910 if (!page) { 2910 if (!page) {
2911 grab_swap_token(mm); /* Contend for token _before_ read-in */
2912 page = swapin_readahead(entry, 2911 page = swapin_readahead(entry,
2913 GFP_HIGHUSER_MOVABLE, vma, address); 2912 GFP_HIGHUSER_MOVABLE, vma, address);
2914 if (!page) { 2913 if (!page) {
@@ -2938,6 +2937,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2938 } 2937 }
2939 2938
2940 locked = lock_page_or_retry(page, mm, flags); 2939 locked = lock_page_or_retry(page, mm, flags);
2940
2941 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 2941 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2942 if (!locked) { 2942 if (!locked) {
2943 ret |= VM_FAULT_RETRY; 2943 ret |= VM_FAULT_RETRY;
diff --git a/mm/rmap.c b/mm/rmap.c
index 5b5ad584ffb7..0f3b7cda2a24 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -755,12 +755,6 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
755 pte_unmap_unlock(pte, ptl); 755 pte_unmap_unlock(pte, ptl);
756 } 756 }
757 757
758 /* Pretend the page is referenced if the task has the
759 swap token and is in the middle of a page fault. */
760 if (mm != current->mm && has_swap_token(mm) &&
761 rwsem_is_locked(&mm->mmap_sem))
762 referenced++;
763
764 (*mapcount)--; 758 (*mapcount)--;
765 759
766 if (referenced) 760 if (referenced)
diff --git a/mm/thrash.c b/mm/thrash.c
deleted file mode 100644
index 57ad495dbd54..000000000000
--- a/mm/thrash.c
+++ /dev/null
@@ -1,155 +0,0 @@
1/*
2 * mm/thrash.c
3 *
4 * Copyright (C) 2004, Red Hat, Inc.
5 * Copyright (C) 2004, Rik van Riel <riel@redhat.com>
6 * Released under the GPL, see the file COPYING for details.
7 *
8 * Simple token based thrashing protection, using the algorithm
9 * described in: http://www.cse.ohio-state.edu/hpcs/WWW/HTML/publications/abs05-1.html
10 *
11 * Sep 2006, Ashwin Chaugule <ashwin.chaugule@celunite.com>
12 * Improved algorithm to pass token:
13 * Each task has a priority which is incremented if it contended
14 * for the token in an interval less than its previous attempt.
15 * If the token is acquired, that task's priority is boosted to prevent
16 * the token from bouncing around too often and to let the task make
17 * some progress in its execution.
18 */
19
20#include <linux/jiffies.h>
21#include <linux/mm.h>
22#include <linux/sched.h>
23#include <linux/swap.h>
24#include <linux/memcontrol.h>
25
26#include <trace/events/vmscan.h>
27
28#define TOKEN_AGING_INTERVAL (0xFF)
29
30static DEFINE_SPINLOCK(swap_token_lock);
31struct mm_struct *swap_token_mm;
32static struct mem_cgroup *swap_token_memcg;
33
34#ifdef CONFIG_CGROUP_MEM_RES_CTLR
35static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm)
36{
37 struct mem_cgroup *memcg;
38
39 memcg = try_get_mem_cgroup_from_mm(mm);
40 if (memcg)
41 css_put(mem_cgroup_css(memcg));
42
43 return memcg;
44}
45#else
46static struct mem_cgroup *swap_token_memcg_from_mm(struct mm_struct *mm)
47{
48 return NULL;
49}
50#endif
51
52void grab_swap_token(struct mm_struct *mm)
53{
54 int current_interval;
55 unsigned int old_prio = mm->token_priority;
56 static unsigned int global_faults;
57 static unsigned int last_aging;
58
59 global_faults++;
60
61 current_interval = global_faults - mm->faultstamp;
62
63 if (!spin_trylock(&swap_token_lock))
64 return;
65
66 /* First come first served */
67 if (!swap_token_mm)
68 goto replace_token;
69
70 /*
71 * Usually, we don't need priority aging because long interval faults
72 * makes priority decrease quickly. But there is one exception. If the
73 * token owner task is sleeping, it never make long interval faults.
74 * Thus, we need a priority aging mechanism instead. The requirements
75 * of priority aging are
76 * 1) An aging interval is reasonable enough long. Too short aging
77 * interval makes quick swap token lost and decrease performance.
78 * 2) The swap token owner task have to get priority aging even if
79 * it's under sleep.
80 */
81 if ((global_faults - last_aging) > TOKEN_AGING_INTERVAL) {
82 swap_token_mm->token_priority /= 2;
83 last_aging = global_faults;
84 }
85
86 if (mm == swap_token_mm) {
87 mm->token_priority += 2;
88 goto update_priority;
89 }
90
91 if (current_interval < mm->last_interval)
92 mm->token_priority++;
93 else {
94 if (likely(mm->token_priority > 0))
95 mm->token_priority--;
96 }
97
98 /* Check if we deserve the token */
99 if (mm->token_priority > swap_token_mm->token_priority)
100 goto replace_token;
101
102update_priority:
103 trace_update_swap_token_priority(mm, old_prio, swap_token_mm);
104
105out:
106 mm->faultstamp = global_faults;
107 mm->last_interval = current_interval;
108 spin_unlock(&swap_token_lock);
109 return;
110
111replace_token:
112 mm->token_priority += 2;
113 trace_replace_swap_token(swap_token_mm, mm);
114 swap_token_mm = mm;
115 swap_token_memcg = swap_token_memcg_from_mm(mm);
116 last_aging = global_faults;
117 goto out;
118}
119
120/* Called on process exit. */
121void __put_swap_token(struct mm_struct *mm)
122{
123 spin_lock(&swap_token_lock);
124 if (likely(mm == swap_token_mm)) {
125 trace_put_swap_token(swap_token_mm);
126 swap_token_mm = NULL;
127 swap_token_memcg = NULL;
128 }
129 spin_unlock(&swap_token_lock);
130}
131
132static bool match_memcg(struct mem_cgroup *a, struct mem_cgroup *b)
133{
134 if (!a)
135 return true;
136 if (!b)
137 return true;
138 if (a == b)
139 return true;
140 return false;
141}
142
143void disable_swap_token(struct mem_cgroup *memcg)
144{
145 /* memcg reclaim don't disable unrelated mm token. */
146 if (match_memcg(memcg, swap_token_memcg)) {
147 spin_lock(&swap_token_lock);
148 if (match_memcg(memcg, swap_token_memcg)) {
149 trace_disable_swap_token(swap_token_mm);
150 swap_token_mm = NULL;
151 swap_token_memcg = NULL;
152 }
153 spin_unlock(&swap_token_lock);
154 }
155}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 33dc256033b5..ca46080bb074 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2352,8 +2352,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2352 2352
2353 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 2353 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
2354 sc->nr_scanned = 0; 2354 sc->nr_scanned = 0;
2355 if (!priority)
2356 disable_swap_token(sc->target_mem_cgroup);
2357 aborted_reclaim = shrink_zones(priority, zonelist, sc); 2355 aborted_reclaim = shrink_zones(priority, zonelist, sc);
2358 2356
2359 /* 2357 /*
@@ -2704,10 +2702,6 @@ loop_again:
2704 unsigned long lru_pages = 0; 2702 unsigned long lru_pages = 0;
2705 int has_under_min_watermark_zone = 0; 2703 int has_under_min_watermark_zone = 0;
2706 2704
2707 /* The swap token gets in the way of swapout... */
2708 if (!priority)
2709 disable_swap_token(NULL);
2710
2711 all_zones_ok = 1; 2705 all_zones_ok = 1;
2712 balanced = 0; 2706 balanced = 0;
2713 2707