aboutsummaryrefslogtreecommitdiffstats
path: root/mm/swap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/swap.c')
-rw-r--r--mm/swap.c111
1 files changed, 79 insertions, 32 deletions
diff --git a/mm/swap.c b/mm/swap.c
index d3cb966fe992..a65eff8a517a 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -24,16 +24,19 @@
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/mm_inline.h> 25#include <linux/mm_inline.h>
26#include <linux/buffer_head.h> /* for try_to_release_page() */ 26#include <linux/buffer_head.h> /* for try_to_release_page() */
27#include <linux/module.h>
28#include <linux/percpu_counter.h> 27#include <linux/percpu_counter.h>
29#include <linux/percpu.h> 28#include <linux/percpu.h>
30#include <linux/cpu.h> 29#include <linux/cpu.h>
31#include <linux/notifier.h> 30#include <linux/notifier.h>
32#include <linux/init.h> 31#include <linux/backing-dev.h>
33 32
34/* How many pages do we try to swap or page in/out together? */ 33/* How many pages do we try to swap or page in/out together? */
35int page_cluster; 34int page_cluster;
36 35
36static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, };
37static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, };
38static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs) = { 0, };
39
37/* 40/*
38 * This path almost never happens for VM activity - pages are normally 41 * This path almost never happens for VM activity - pages are normally
39 * freed via pagevecs. But it gets used by networking. 42 * freed via pagevecs. But it gets used by networking.
@@ -94,23 +97,47 @@ void put_pages_list(struct list_head *pages)
94EXPORT_SYMBOL(put_pages_list); 97EXPORT_SYMBOL(put_pages_list);
95 98
96/* 99/*
100 * pagevec_move_tail() must be called with IRQ disabled.
101 * Otherwise this may cause nasty races.
102 */
103static void pagevec_move_tail(struct pagevec *pvec)
104{
105 int i;
106 int pgmoved = 0;
107 struct zone *zone = NULL;
108
109 for (i = 0; i < pagevec_count(pvec); i++) {
110 struct page *page = pvec->pages[i];
111 struct zone *pagezone = page_zone(page);
112
113 if (pagezone != zone) {
114 if (zone)
115 spin_unlock(&zone->lru_lock);
116 zone = pagezone;
117 spin_lock(&zone->lru_lock);
118 }
119 if (PageLRU(page) && !PageActive(page)) {
120 list_move_tail(&page->lru, &zone->inactive_list);
121 pgmoved++;
122 }
123 }
124 if (zone)
125 spin_unlock(&zone->lru_lock);
126 __count_vm_events(PGROTATED, pgmoved);
127 release_pages(pvec->pages, pvec->nr, pvec->cold);
128 pagevec_reinit(pvec);
129}
130
131/*
97 * Writeback is about to end against a page which has been marked for immediate 132 * Writeback is about to end against a page which has been marked for immediate
98 * reclaim. If it still appears to be reclaimable, move it to the tail of the 133 * reclaim. If it still appears to be reclaimable, move it to the tail of the
99 * inactive list. The page still has PageWriteback set, which will pin it. 134 * inactive list.
100 *
101 * We don't expect many pages to come through here, so don't bother batching
102 * things up.
103 *
104 * To avoid placing the page at the tail of the LRU while PG_writeback is still
105 * set, this function will clear PG_writeback before performing the page
106 * motion. Do that inside the lru lock because once PG_writeback is cleared
107 * we may not touch the page.
108 * 135 *
109 * Returns zero if it cleared PG_writeback. 136 * Returns zero if it cleared PG_writeback.
110 */ 137 */
111int rotate_reclaimable_page(struct page *page) 138int rotate_reclaimable_page(struct page *page)
112{ 139{
113 struct zone *zone; 140 struct pagevec *pvec;
114 unsigned long flags; 141 unsigned long flags;
115 142
116 if (PageLocked(page)) 143 if (PageLocked(page))
@@ -122,15 +149,16 @@ int rotate_reclaimable_page(struct page *page)
122 if (!PageLRU(page)) 149 if (!PageLRU(page))
123 return 1; 150 return 1;
124 151
125 zone = page_zone(page); 152 page_cache_get(page);
126 spin_lock_irqsave(&zone->lru_lock, flags); 153 local_irq_save(flags);
127 if (PageLRU(page) && !PageActive(page)) { 154 pvec = &__get_cpu_var(lru_rotate_pvecs);
128 list_move_tail(&page->lru, &zone->inactive_list); 155 if (!pagevec_add(pvec, page))
129 __count_vm_event(PGROTATED); 156 pagevec_move_tail(pvec);
130 } 157 local_irq_restore(flags);
158
131 if (!test_clear_page_writeback(page)) 159 if (!test_clear_page_writeback(page))
132 BUG(); 160 BUG();
133 spin_unlock_irqrestore(&zone->lru_lock, flags); 161
134 return 0; 162 return 0;
135} 163}
136 164
@@ -174,9 +202,6 @@ EXPORT_SYMBOL(mark_page_accessed);
174 * lru_cache_add: add a page to the page lists 202 * lru_cache_add: add a page to the page lists
175 * @page: the page to add 203 * @page: the page to add
176 */ 204 */
177static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, };
178static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, };
179
180void fastcall lru_cache_add(struct page *page) 205void fastcall lru_cache_add(struct page *page)
181{ 206{
182 struct pagevec *pvec = &get_cpu_var(lru_add_pvecs); 207 struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
@@ -197,21 +222,37 @@ void fastcall lru_cache_add_active(struct page *page)
197 put_cpu_var(lru_add_active_pvecs); 222 put_cpu_var(lru_add_active_pvecs);
198} 223}
199 224
200static void __lru_add_drain(int cpu) 225/*
226 * Drain pages out of the cpu's pagevecs.
227 * Either "cpu" is the current CPU, and preemption has already been
228 * disabled; or "cpu" is being hot-unplugged, and is already dead.
229 */
230static void drain_cpu_pagevecs(int cpu)
201{ 231{
202 struct pagevec *pvec = &per_cpu(lru_add_pvecs, cpu); 232 struct pagevec *pvec;
203 233
204 /* CPU is dead, so no locking needed. */ 234 pvec = &per_cpu(lru_add_pvecs, cpu);
205 if (pagevec_count(pvec)) 235 if (pagevec_count(pvec))
206 __pagevec_lru_add(pvec); 236 __pagevec_lru_add(pvec);
237
207 pvec = &per_cpu(lru_add_active_pvecs, cpu); 238 pvec = &per_cpu(lru_add_active_pvecs, cpu);
208 if (pagevec_count(pvec)) 239 if (pagevec_count(pvec))
209 __pagevec_lru_add_active(pvec); 240 __pagevec_lru_add_active(pvec);
241
242 pvec = &per_cpu(lru_rotate_pvecs, cpu);
243 if (pagevec_count(pvec)) {
244 unsigned long flags;
245
246 /* No harm done if a racing interrupt already did this */
247 local_irq_save(flags);
248 pagevec_move_tail(pvec);
249 local_irq_restore(flags);
250 }
210} 251}
211 252
212void lru_add_drain(void) 253void lru_add_drain(void)
213{ 254{
214 __lru_add_drain(get_cpu()); 255 drain_cpu_pagevecs(get_cpu());
215 put_cpu(); 256 put_cpu();
216} 257}
217 258
@@ -258,6 +299,7 @@ void release_pages(struct page **pages, int nr, int cold)
258 int i; 299 int i;
259 struct pagevec pages_to_free; 300 struct pagevec pages_to_free;
260 struct zone *zone = NULL; 301 struct zone *zone = NULL;
302 unsigned long uninitialized_var(flags);
261 303
262 pagevec_init(&pages_to_free, cold); 304 pagevec_init(&pages_to_free, cold);
263 for (i = 0; i < nr; i++) { 305 for (i = 0; i < nr; i++) {
@@ -265,7 +307,7 @@ void release_pages(struct page **pages, int nr, int cold)
265 307
266 if (unlikely(PageCompound(page))) { 308 if (unlikely(PageCompound(page))) {
267 if (zone) { 309 if (zone) {
268 spin_unlock_irq(&zone->lru_lock); 310 spin_unlock_irqrestore(&zone->lru_lock, flags);
269 zone = NULL; 311 zone = NULL;
270 } 312 }
271 put_compound_page(page); 313 put_compound_page(page);
@@ -279,9 +321,10 @@ void release_pages(struct page **pages, int nr, int cold)
279 struct zone *pagezone = page_zone(page); 321 struct zone *pagezone = page_zone(page);
280 if (pagezone != zone) { 322 if (pagezone != zone) {
281 if (zone) 323 if (zone)
282 spin_unlock_irq(&zone->lru_lock); 324 spin_unlock_irqrestore(&zone->lru_lock,
325 flags);
283 zone = pagezone; 326 zone = pagezone;
284 spin_lock_irq(&zone->lru_lock); 327 spin_lock_irqsave(&zone->lru_lock, flags);
285 } 328 }
286 VM_BUG_ON(!PageLRU(page)); 329 VM_BUG_ON(!PageLRU(page));
287 __ClearPageLRU(page); 330 __ClearPageLRU(page);
@@ -290,7 +333,7 @@ void release_pages(struct page **pages, int nr, int cold)
290 333
291 if (!pagevec_add(&pages_to_free, page)) { 334 if (!pagevec_add(&pages_to_free, page)) {
292 if (zone) { 335 if (zone) {
293 spin_unlock_irq(&zone->lru_lock); 336 spin_unlock_irqrestore(&zone->lru_lock, flags);
294 zone = NULL; 337 zone = NULL;
295 } 338 }
296 __pagevec_free(&pages_to_free); 339 __pagevec_free(&pages_to_free);
@@ -298,7 +341,7 @@ void release_pages(struct page **pages, int nr, int cold)
298 } 341 }
299 } 342 }
300 if (zone) 343 if (zone)
301 spin_unlock_irq(&zone->lru_lock); 344 spin_unlock_irqrestore(&zone->lru_lock, flags);
302 345
303 pagevec_free(&pages_to_free); 346 pagevec_free(&pages_to_free);
304} 347}
@@ -491,7 +534,7 @@ static int cpu_swap_callback(struct notifier_block *nfb,
491 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { 534 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
492 atomic_add(*committed, &vm_committed_space); 535 atomic_add(*committed, &vm_committed_space);
493 *committed = 0; 536 *committed = 0;
494 __lru_add_drain((long)hcpu); 537 drain_cpu_pagevecs((long)hcpu);
495 } 538 }
496 return NOTIFY_OK; 539 return NOTIFY_OK;
497} 540}
@@ -505,6 +548,10 @@ void __init swap_setup(void)
505{ 548{
506 unsigned long megs = num_physpages >> (20 - PAGE_SHIFT); 549 unsigned long megs = num_physpages >> (20 - PAGE_SHIFT);
507 550
551#ifdef CONFIG_SWAP
552 bdi_init(swapper_space.backing_dev_info);
553#endif
554
508 /* Use a smaller cluster for small-memory machines */ 555 /* Use a smaller cluster for small-memory machines */
509 if (megs < 16) 556 if (megs < 16)
510 page_cluster = 2; 557 page_cluster = 2;