aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig2
-rw-r--r--mm/backing-dev.c7
-rw-r--r--mm/bounce.c2
-rw-r--r--mm/compaction.c7
-rw-r--r--mm/ksm.c3
-rw-r--r--mm/memory.c39
-rw-r--r--mm/memory_hotplug.c16
-rw-r--r--mm/mlock.c6
-rw-r--r--mm/mmzone.c21
-rw-r--r--mm/page_alloc.c33
-rw-r--r--mm/percpu.c6
-rw-r--r--mm/percpu_up.c4
-rw-r--r--mm/swapfile.c129
-rw-r--r--mm/vmstat.c16
14 files changed, 164 insertions, 127 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index f4e516e9c37c..f0fb9124e410 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -189,7 +189,7 @@ config COMPACTION
189config MIGRATION 189config MIGRATION
190 bool "Page migration" 190 bool "Page migration"
191 def_bool y 191 def_bool y
192 depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE 192 depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION
193 help 193 help
194 Allows the migration of the physical location of pages of processes 194 Allows the migration of the physical location of pages of processes
195 while the virtual addresses are not changed. This is useful in 195 while the virtual addresses are not changed. This is useful in
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index eaa4a5bbe063..c2bf86f470ed 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -445,8 +445,8 @@ static int bdi_forker_thread(void *ptr)
445 switch (action) { 445 switch (action) {
446 case FORK_THREAD: 446 case FORK_THREAD:
447 __set_current_state(TASK_RUNNING); 447 __set_current_state(TASK_RUNNING);
448 task = kthread_run(bdi_writeback_thread, &bdi->wb, "flush-%s", 448 task = kthread_create(bdi_writeback_thread, &bdi->wb,
449 dev_name(bdi->dev)); 449 "flush-%s", dev_name(bdi->dev));
450 if (IS_ERR(task)) { 450 if (IS_ERR(task)) {
451 /* 451 /*
452 * If thread creation fails, force writeout of 452 * If thread creation fails, force writeout of
@@ -457,10 +457,13 @@ static int bdi_forker_thread(void *ptr)
457 /* 457 /*
458 * The spinlock makes sure we do not lose 458 * The spinlock makes sure we do not lose
459 * wake-ups when racing with 'bdi_queue_work()'. 459 * wake-ups when racing with 'bdi_queue_work()'.
460 * And as soon as the bdi thread is visible, we
461 * can start it.
460 */ 462 */
461 spin_lock_bh(&bdi->wb_lock); 463 spin_lock_bh(&bdi->wb_lock);
462 bdi->wb.task = task; 464 bdi->wb.task = task;
463 spin_unlock_bh(&bdi->wb_lock); 465 spin_unlock_bh(&bdi->wb_lock);
466 wake_up_process(task);
464 } 467 }
465 break; 468 break;
466 469
diff --git a/mm/bounce.c b/mm/bounce.c
index 13b6dad1eed2..1481de68184b 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -116,8 +116,8 @@ static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
116 */ 116 */
117 vfrom = page_address(fromvec->bv_page) + tovec->bv_offset; 117 vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
118 118
119 flush_dcache_page(tovec->bv_page);
120 bounce_copy_vec(tovec, vfrom); 119 bounce_copy_vec(tovec, vfrom);
120 flush_dcache_page(tovec->bv_page);
121 } 121 }
122} 122}
123 123
diff --git a/mm/compaction.c b/mm/compaction.c
index 94cce51b0b35..4d709ee59013 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -214,15 +214,16 @@ static void acct_isolated(struct zone *zone, struct compact_control *cc)
214/* Similar to reclaim, but different enough that they don't share logic */ 214/* Similar to reclaim, but different enough that they don't share logic */
215static bool too_many_isolated(struct zone *zone) 215static bool too_many_isolated(struct zone *zone)
216{ 216{
217 217 unsigned long active, inactive, isolated;
218 unsigned long inactive, isolated;
219 218
220 inactive = zone_page_state(zone, NR_INACTIVE_FILE) + 219 inactive = zone_page_state(zone, NR_INACTIVE_FILE) +
221 zone_page_state(zone, NR_INACTIVE_ANON); 220 zone_page_state(zone, NR_INACTIVE_ANON);
221 active = zone_page_state(zone, NR_ACTIVE_FILE) +
222 zone_page_state(zone, NR_ACTIVE_ANON);
222 isolated = zone_page_state(zone, NR_ISOLATED_FILE) + 223 isolated = zone_page_state(zone, NR_ISOLATED_FILE) +
223 zone_page_state(zone, NR_ISOLATED_ANON); 224 zone_page_state(zone, NR_ISOLATED_ANON);
224 225
225 return isolated > inactive; 226 return isolated > (inactive + active) / 2;
226} 227}
227 228
228/* 229/*
diff --git a/mm/ksm.c b/mm/ksm.c
index e2ae00458320..b1873cf03ed9 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1504,8 +1504,6 @@ struct page *ksm_does_need_to_copy(struct page *page,
1504{ 1504{
1505 struct page *new_page; 1505 struct page *new_page;
1506 1506
1507 unlock_page(page); /* any racers will COW it, not modify it */
1508
1509 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); 1507 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
1510 if (new_page) { 1508 if (new_page) {
1511 copy_user_highpage(new_page, page, address, vma); 1509 copy_user_highpage(new_page, page, address, vma);
@@ -1521,7 +1519,6 @@ struct page *ksm_does_need_to_copy(struct page *page,
1521 add_page_to_unevictable_list(new_page); 1519 add_page_to_unevictable_list(new_page);
1522 } 1520 }
1523 1521
1524 page_cache_release(page);
1525 return new_page; 1522 return new_page;
1526} 1523}
1527 1524
diff --git a/mm/memory.c b/mm/memory.c
index 6b2ab1051851..71b161b73bb5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2623,7 +2623,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2623 unsigned int flags, pte_t orig_pte) 2623 unsigned int flags, pte_t orig_pte)
2624{ 2624{
2625 spinlock_t *ptl; 2625 spinlock_t *ptl;
2626 struct page *page; 2626 struct page *page, *swapcache = NULL;
2627 swp_entry_t entry; 2627 swp_entry_t entry;
2628 pte_t pte; 2628 pte_t pte;
2629 struct mem_cgroup *ptr = NULL; 2629 struct mem_cgroup *ptr = NULL;
@@ -2679,10 +2679,23 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2679 lock_page(page); 2679 lock_page(page);
2680 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 2680 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2681 2681
2682 page = ksm_might_need_to_copy(page, vma, address); 2682 /*
2683 if (!page) { 2683 * Make sure try_to_free_swap didn't release the swapcache
2684 ret = VM_FAULT_OOM; 2684 * from under us. The page pin isn't enough to prevent that.
2685 goto out; 2685 */
2686 if (unlikely(!PageSwapCache(page)))
2687 goto out_page;
2688
2689 if (ksm_might_need_to_copy(page, vma, address)) {
2690 swapcache = page;
2691 page = ksm_does_need_to_copy(page, vma, address);
2692
2693 if (unlikely(!page)) {
2694 ret = VM_FAULT_OOM;
2695 page = swapcache;
2696 swapcache = NULL;
2697 goto out_page;
2698 }
2686 } 2699 }
2687 2700
2688 if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { 2701 if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
@@ -2735,6 +2748,18 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2735 if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) 2748 if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
2736 try_to_free_swap(page); 2749 try_to_free_swap(page);
2737 unlock_page(page); 2750 unlock_page(page);
2751 if (swapcache) {
2752 /*
2753 * Hold the lock to avoid the swap entry to be reused
2754 * until we take the PT lock for the pte_same() check
2755 * (to avoid false positives from pte_same). For
2756 * further safety release the lock after the swap_free
2757 * so that the swap count won't change under a
2758 * parallel locked swapcache.
2759 */
2760 unlock_page(swapcache);
2761 page_cache_release(swapcache);
2762 }
2738 2763
2739 if (flags & FAULT_FLAG_WRITE) { 2764 if (flags & FAULT_FLAG_WRITE) {
2740 ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte); 2765 ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte);
@@ -2756,6 +2781,10 @@ out_page:
2756 unlock_page(page); 2781 unlock_page(page);
2757out_release: 2782out_release:
2758 page_cache_release(page); 2783 page_cache_release(page);
2784 if (swapcache) {
2785 unlock_page(swapcache);
2786 page_cache_release(swapcache);
2787 }
2759 return ret; 2788 return ret;
2760} 2789}
2761 2790
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a4cfcdc00455..dd186c1a5d53 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -584,19 +584,19 @@ static inline int pageblock_free(struct page *page)
584/* Return the start of the next active pageblock after a given page */ 584/* Return the start of the next active pageblock after a given page */
585static struct page *next_active_pageblock(struct page *page) 585static struct page *next_active_pageblock(struct page *page)
586{ 586{
587 int pageblocks_stride;
588
589 /* Ensure the starting page is pageblock-aligned */ 587 /* Ensure the starting page is pageblock-aligned */
590 BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1)); 588 BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1));
591 589
592 /* Move forward by at least 1 * pageblock_nr_pages */
593 pageblocks_stride = 1;
594
595 /* If the entire pageblock is free, move to the end of free page */ 590 /* If the entire pageblock is free, move to the end of free page */
596 if (pageblock_free(page)) 591 if (pageblock_free(page)) {
597 pageblocks_stride += page_order(page) - pageblock_order; 592 int order;
593 /* be careful. we don't have locks, page_order can be changed.*/
594 order = page_order(page);
595 if ((order < MAX_ORDER) && (order >= pageblock_order))
596 return page + (1 << order);
597 }
598 598
599 return page + (pageblocks_stride * pageblock_nr_pages); 599 return page + pageblock_nr_pages;
600} 600}
601 601
602/* Checks if this range of memory is likely to be hot-removable. */ 602/* Checks if this range of memory is likely to be hot-removable. */
diff --git a/mm/mlock.c b/mm/mlock.c
index cbae7c5b9568..b70919ce4f72 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -135,12 +135,6 @@ void munlock_vma_page(struct page *page)
135 } 135 }
136} 136}
137 137
138/* Is the vma a continuation of the stack vma above it? */
139static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr)
140{
141 return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
142}
143
144static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr) 138static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
145{ 139{
146 return (vma->vm_flags & VM_GROWSDOWN) && 140 return (vma->vm_flags & VM_GROWSDOWN) &&
diff --git a/mm/mmzone.c b/mm/mmzone.c
index f5b7d1760213..e35bfb82c855 100644
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -87,3 +87,24 @@ int memmap_valid_within(unsigned long pfn,
87 return 1; 87 return 1;
88} 88}
89#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ 89#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */
90
91#ifdef CONFIG_SMP
92/* Called when a more accurate view of NR_FREE_PAGES is needed */
93unsigned long zone_nr_free_pages(struct zone *zone)
94{
95 unsigned long nr_free_pages = zone_page_state(zone, NR_FREE_PAGES);
96
97 /*
98 * While kswapd is awake, it is considered the zone is under some
99 * memory pressure. Under pressure, there is a risk that
100 * per-cpu-counter-drift will allow the min watermark to be breached
101 * potentially causing a live-lock. While kswapd is awake and
102 * free pages are low, get a better estimate for free pages
103 */
104 if (nr_free_pages < zone->percpu_drift_mark &&
105 !waitqueue_active(&zone->zone_pgdat->kswapd_wait))
106 return zone_page_state_snapshot(zone, NR_FREE_PAGES);
107
108 return nr_free_pages;
109}
110#endif /* CONFIG_SMP */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a9649f4b261e..a8cfa9cc6e86 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -588,13 +588,13 @@ static void free_pcppages_bulk(struct zone *zone, int count,
588{ 588{
589 int migratetype = 0; 589 int migratetype = 0;
590 int batch_free = 0; 590 int batch_free = 0;
591 int to_free = count;
591 592
592 spin_lock(&zone->lock); 593 spin_lock(&zone->lock);
593 zone->all_unreclaimable = 0; 594 zone->all_unreclaimable = 0;
594 zone->pages_scanned = 0; 595 zone->pages_scanned = 0;
595 596
596 __mod_zone_page_state(zone, NR_FREE_PAGES, count); 597 while (to_free) {
597 while (count) {
598 struct page *page; 598 struct page *page;
599 struct list_head *list; 599 struct list_head *list;
600 600
@@ -619,8 +619,9 @@ static void free_pcppages_bulk(struct zone *zone, int count,
619 /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ 619 /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
620 __free_one_page(page, zone, 0, page_private(page)); 620 __free_one_page(page, zone, 0, page_private(page));
621 trace_mm_page_pcpu_drain(page, 0, page_private(page)); 621 trace_mm_page_pcpu_drain(page, 0, page_private(page));
622 } while (--count && --batch_free && !list_empty(list)); 622 } while (--to_free && --batch_free && !list_empty(list));
623 } 623 }
624 __mod_zone_page_state(zone, NR_FREE_PAGES, count);
624 spin_unlock(&zone->lock); 625 spin_unlock(&zone->lock);
625} 626}
626 627
@@ -631,8 +632,8 @@ static void free_one_page(struct zone *zone, struct page *page, int order,
631 zone->all_unreclaimable = 0; 632 zone->all_unreclaimable = 0;
632 zone->pages_scanned = 0; 633 zone->pages_scanned = 0;
633 634
634 __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
635 __free_one_page(page, zone, order, migratetype); 635 __free_one_page(page, zone, order, migratetype);
636 __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
636 spin_unlock(&zone->lock); 637 spin_unlock(&zone->lock);
637} 638}
638 639
@@ -1461,7 +1462,7 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
1461{ 1462{
1462 /* free_pages my go negative - that's OK */ 1463 /* free_pages my go negative - that's OK */
1463 long min = mark; 1464 long min = mark;
1464 long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1; 1465 long free_pages = zone_nr_free_pages(z) - (1 << order) + 1;
1465 int o; 1466 int o;
1466 1467
1467 if (alloc_flags & ALLOC_HIGH) 1468 if (alloc_flags & ALLOC_HIGH)
@@ -1846,6 +1847,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
1846 struct page *page = NULL; 1847 struct page *page = NULL;
1847 struct reclaim_state reclaim_state; 1848 struct reclaim_state reclaim_state;
1848 struct task_struct *p = current; 1849 struct task_struct *p = current;
1850 bool drained = false;
1849 1851
1850 cond_resched(); 1852 cond_resched();
1851 1853
@@ -1864,14 +1866,25 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
1864 1866
1865 cond_resched(); 1867 cond_resched();
1866 1868
1867 if (order != 0) 1869 if (unlikely(!(*did_some_progress)))
1868 drain_all_pages(); 1870 return NULL;
1869 1871
1870 if (likely(*did_some_progress)) 1872retry:
1871 page = get_page_from_freelist(gfp_mask, nodemask, order, 1873 page = get_page_from_freelist(gfp_mask, nodemask, order,
1872 zonelist, high_zoneidx, 1874 zonelist, high_zoneidx,
1873 alloc_flags, preferred_zone, 1875 alloc_flags, preferred_zone,
1874 migratetype); 1876 migratetype);
1877
1878 /*
1879 * If an allocation failed after direct reclaim, it could be because
1880 * pages are pinned on the per-cpu lists. Drain them and try again
1881 */
1882 if (!page && !drained) {
1883 drain_all_pages();
1884 drained = true;
1885 goto retry;
1886 }
1887
1875 return page; 1888 return page;
1876} 1889}
1877 1890
@@ -2423,7 +2436,7 @@ void show_free_areas(void)
2423 " all_unreclaimable? %s" 2436 " all_unreclaimable? %s"
2424 "\n", 2437 "\n",
2425 zone->name, 2438 zone->name,
2426 K(zone_page_state(zone, NR_FREE_PAGES)), 2439 K(zone_nr_free_pages(zone)),
2427 K(min_wmark_pages(zone)), 2440 K(min_wmark_pages(zone)),
2428 K(low_wmark_pages(zone)), 2441 K(low_wmark_pages(zone)),
2429 K(high_wmark_pages(zone)), 2442 K(high_wmark_pages(zone)),
diff --git a/mm/percpu.c b/mm/percpu.c
index e61dc2cc5873..58c572b18b07 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -393,7 +393,9 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
393 goto out_unlock; 393 goto out_unlock;
394 394
395 old_size = chunk->map_alloc * sizeof(chunk->map[0]); 395 old_size = chunk->map_alloc * sizeof(chunk->map[0]);
396 memcpy(new, chunk->map, old_size); 396 old = chunk->map;
397
398 memcpy(new, old, old_size);
397 399
398 chunk->map_alloc = new_alloc; 400 chunk->map_alloc = new_alloc;
399 chunk->map = new; 401 chunk->map = new;
@@ -1162,7 +1164,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
1162 } 1164 }
1163 1165
1164 /* 1166 /*
1165 * Don't accept if wastage is over 25%. The 1167 * Don't accept if wastage is over 1/3. The
1166 * greater-than comparison ensures upa==1 always 1168 * greater-than comparison ensures upa==1 always
1167 * passes the following check. 1169 * passes the following check.
1168 */ 1170 */
diff --git a/mm/percpu_up.c b/mm/percpu_up.c
index c4351c7f57d2..db884fae5721 100644
--- a/mm/percpu_up.c
+++ b/mm/percpu_up.c
@@ -14,13 +14,13 @@ void __percpu *__alloc_percpu(size_t size, size_t align)
14 * percpu sections on SMP for which this path isn't used. 14 * percpu sections on SMP for which this path isn't used.
15 */ 15 */
16 WARN_ON_ONCE(align > SMP_CACHE_BYTES); 16 WARN_ON_ONCE(align > SMP_CACHE_BYTES);
17 return kzalloc(size, GFP_KERNEL); 17 return (void __percpu __force *)kzalloc(size, GFP_KERNEL);
18} 18}
19EXPORT_SYMBOL_GPL(__alloc_percpu); 19EXPORT_SYMBOL_GPL(__alloc_percpu);
20 20
21void free_percpu(void __percpu *p) 21void free_percpu(void __percpu *p)
22{ 22{
23 kfree(p); 23 kfree(this_cpu_ptr(p));
24} 24}
25EXPORT_SYMBOL_GPL(free_percpu); 25EXPORT_SYMBOL_GPL(free_percpu);
26 26
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1f3f9c59a73a..7c703ff2f36f 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -47,8 +47,6 @@ long nr_swap_pages;
47long total_swap_pages; 47long total_swap_pages;
48static int least_priority; 48static int least_priority;
49 49
50static bool swap_for_hibernation;
51
52static const char Bad_file[] = "Bad swap file entry "; 50static const char Bad_file[] = "Bad swap file entry ";
53static const char Unused_file[] = "Unused swap file entry "; 51static const char Unused_file[] = "Unused swap file entry ";
54static const char Bad_offset[] = "Bad swap offset entry "; 52static const char Bad_offset[] = "Bad swap offset entry ";
@@ -141,8 +139,7 @@ static int discard_swap(struct swap_info_struct *si)
141 nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); 139 nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9);
142 if (nr_blocks) { 140 if (nr_blocks) {
143 err = blkdev_issue_discard(si->bdev, start_block, 141 err = blkdev_issue_discard(si->bdev, start_block,
144 nr_blocks, GFP_KERNEL, 142 nr_blocks, GFP_KERNEL, BLKDEV_IFL_WAIT);
145 BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
146 if (err) 143 if (err)
147 return err; 144 return err;
148 cond_resched(); 145 cond_resched();
@@ -153,8 +150,7 @@ static int discard_swap(struct swap_info_struct *si)
153 nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); 150 nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9);
154 151
155 err = blkdev_issue_discard(si->bdev, start_block, 152 err = blkdev_issue_discard(si->bdev, start_block,
156 nr_blocks, GFP_KERNEL, 153 nr_blocks, GFP_KERNEL, BLKDEV_IFL_WAIT);
157 BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
158 if (err) 154 if (err)
159 break; 155 break;
160 156
@@ -193,8 +189,7 @@ static void discard_swap_cluster(struct swap_info_struct *si,
193 start_block <<= PAGE_SHIFT - 9; 189 start_block <<= PAGE_SHIFT - 9;
194 nr_blocks <<= PAGE_SHIFT - 9; 190 nr_blocks <<= PAGE_SHIFT - 9;
195 if (blkdev_issue_discard(si->bdev, start_block, 191 if (blkdev_issue_discard(si->bdev, start_block,
196 nr_blocks, GFP_NOIO, BLKDEV_IFL_WAIT | 192 nr_blocks, GFP_NOIO, BLKDEV_IFL_WAIT))
197 BLKDEV_IFL_BARRIER))
198 break; 193 break;
199 } 194 }
200 195
@@ -320,10 +315,8 @@ checks:
320 if (offset > si->highest_bit) 315 if (offset > si->highest_bit)
321 scan_base = offset = si->lowest_bit; 316 scan_base = offset = si->lowest_bit;
322 317
323 /* reuse swap entry of cache-only swap if not hibernation. */ 318 /* reuse swap entry of cache-only swap if not busy. */
324 if (vm_swap_full() 319 if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
325 && usage == SWAP_HAS_CACHE
326 && si->swap_map[offset] == SWAP_HAS_CACHE) {
327 int swap_was_freed; 320 int swap_was_freed;
328 spin_unlock(&swap_lock); 321 spin_unlock(&swap_lock);
329 swap_was_freed = __try_to_reclaim_swap(si, offset); 322 swap_was_freed = __try_to_reclaim_swap(si, offset);
@@ -453,8 +446,6 @@ swp_entry_t get_swap_page(void)
453 spin_lock(&swap_lock); 446 spin_lock(&swap_lock);
454 if (nr_swap_pages <= 0) 447 if (nr_swap_pages <= 0)
455 goto noswap; 448 goto noswap;
456 if (swap_for_hibernation)
457 goto noswap;
458 nr_swap_pages--; 449 nr_swap_pages--;
459 450
460 for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) { 451 for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) {
@@ -487,6 +478,28 @@ noswap:
487 return (swp_entry_t) {0}; 478 return (swp_entry_t) {0};
488} 479}
489 480
481/* The only caller of this function is now susupend routine */
482swp_entry_t get_swap_page_of_type(int type)
483{
484 struct swap_info_struct *si;
485 pgoff_t offset;
486
487 spin_lock(&swap_lock);
488 si = swap_info[type];
489 if (si && (si->flags & SWP_WRITEOK)) {
490 nr_swap_pages--;
491 /* This is called for allocating swap entry, not cache */
492 offset = scan_swap_map(si, 1);
493 if (offset) {
494 spin_unlock(&swap_lock);
495 return swp_entry(type, offset);
496 }
497 nr_swap_pages++;
498 }
499 spin_unlock(&swap_lock);
500 return (swp_entry_t) {0};
501}
502
490static struct swap_info_struct *swap_info_get(swp_entry_t entry) 503static struct swap_info_struct *swap_info_get(swp_entry_t entry)
491{ 504{
492 struct swap_info_struct *p; 505 struct swap_info_struct *p;
@@ -670,6 +683,24 @@ int try_to_free_swap(struct page *page)
670 if (page_swapcount(page)) 683 if (page_swapcount(page))
671 return 0; 684 return 0;
672 685
686 /*
687 * Once hibernation has begun to create its image of memory,
688 * there's a danger that one of the calls to try_to_free_swap()
689 * - most probably a call from __try_to_reclaim_swap() while
690 * hibernation is allocating its own swap pages for the image,
691 * but conceivably even a call from memory reclaim - will free
692 * the swap from a page which has already been recorded in the
693 * image as a clean swapcache page, and then reuse its swap for
694 * another page of the image. On waking from hibernation, the
695 * original page might be freed under memory pressure, then
696 * later read back in from swap, now with the wrong data.
697 *
698 * Hibernation clears bits from gfp_allowed_mask to prevent
699 * memory reclaim from writing to disk, so check that here.
700 */
701 if (!(gfp_allowed_mask & __GFP_IO))
702 return 0;
703
673 delete_from_swap_cache(page); 704 delete_from_swap_cache(page);
674 SetPageDirty(page); 705 SetPageDirty(page);
675 return 1; 706 return 1;
@@ -746,74 +777,6 @@ int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep)
746#endif 777#endif
747 778
748#ifdef CONFIG_HIBERNATION 779#ifdef CONFIG_HIBERNATION
749
750static pgoff_t hibernation_offset[MAX_SWAPFILES];
751/*
752 * Once hibernation starts to use swap, we freeze swap_map[]. Otherwise,
753 * saved swap_map[] image to the disk will be an incomplete because it's
754 * changing without synchronization with hibernation snap shot.
755 * At resume, we just make swap_for_hibernation=false. We can forget
756 * used maps easily.
757 */
758void hibernation_freeze_swap(void)
759{
760 int i;
761
762 spin_lock(&swap_lock);
763
764 printk(KERN_INFO "PM: Freeze Swap\n");
765 swap_for_hibernation = true;
766 for (i = 0; i < MAX_SWAPFILES; i++)
767 hibernation_offset[i] = 1;
768 spin_unlock(&swap_lock);
769}
770
771void hibernation_thaw_swap(void)
772{
773 spin_lock(&swap_lock);
774 if (swap_for_hibernation) {
775 printk(KERN_INFO "PM: Thaw Swap\n");
776 swap_for_hibernation = false;
777 }
778 spin_unlock(&swap_lock);
779}
780
781/*
782 * Because updateing swap_map[] can make not-saved-status-change,
783 * we use our own easy allocator.
784 * Please see kernel/power/swap.c, Used swaps are recorded into
785 * RB-tree.
786 */
787swp_entry_t get_swap_for_hibernation(int type)
788{
789 pgoff_t off;
790 swp_entry_t val = {0};
791 struct swap_info_struct *si;
792
793 spin_lock(&swap_lock);
794
795 si = swap_info[type];
796 if (!si || !(si->flags & SWP_WRITEOK))
797 goto done;
798
799 for (off = hibernation_offset[type]; off < si->max; ++off) {
800 if (!si->swap_map[off])
801 break;
802 }
803 if (off < si->max) {
804 val = swp_entry(type, off);
805 hibernation_offset[type] = off + 1;
806 }
807done:
808 spin_unlock(&swap_lock);
809 return val;
810}
811
812void swap_free_for_hibernation(swp_entry_t ent)
813{
814 /* Nothing to do */
815}
816
817/* 780/*
818 * Find the swap type that corresponds to given device (if any). 781 * Find the swap type that corresponds to given device (if any).
819 * 782 *
@@ -2084,7 +2047,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2084 p->flags |= SWP_SOLIDSTATE; 2047 p->flags |= SWP_SOLIDSTATE;
2085 p->cluster_next = 1 + (random32() % p->highest_bit); 2048 p->cluster_next = 1 + (random32() % p->highest_bit);
2086 } 2049 }
2087 if (discard_swap(p) == 0) 2050 if (discard_swap(p) == 0 && (swap_flags & SWAP_FLAG_DISCARD))
2088 p->flags |= SWP_DISCARDABLE; 2051 p->flags |= SWP_DISCARDABLE;
2089 } 2052 }
2090 2053
diff --git a/mm/vmstat.c b/mm/vmstat.c
index f389168f9a83..355a9e669aaa 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -138,11 +138,24 @@ static void refresh_zone_stat_thresholds(void)
138 int threshold; 138 int threshold;
139 139
140 for_each_populated_zone(zone) { 140 for_each_populated_zone(zone) {
141 unsigned long max_drift, tolerate_drift;
142
141 threshold = calculate_threshold(zone); 143 threshold = calculate_threshold(zone);
142 144
143 for_each_online_cpu(cpu) 145 for_each_online_cpu(cpu)
144 per_cpu_ptr(zone->pageset, cpu)->stat_threshold 146 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
145 = threshold; 147 = threshold;
148
149 /*
150 * Only set percpu_drift_mark if there is a danger that
151 * NR_FREE_PAGES reports the low watermark is ok when in fact
152 * the min watermark could be breached by an allocation
153 */
154 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
155 max_drift = num_online_cpus() * threshold;
156 if (max_drift > tolerate_drift)
157 zone->percpu_drift_mark = high_wmark_pages(zone) +
158 max_drift;
146 } 159 }
147} 160}
148 161
@@ -813,7 +826,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
813 "\n scanned %lu" 826 "\n scanned %lu"
814 "\n spanned %lu" 827 "\n spanned %lu"
815 "\n present %lu", 828 "\n present %lu",
816 zone_page_state(zone, NR_FREE_PAGES), 829 zone_nr_free_pages(zone),
817 min_wmark_pages(zone), 830 min_wmark_pages(zone),
818 low_wmark_pages(zone), 831 low_wmark_pages(zone),
819 high_wmark_pages(zone), 832 high_wmark_pages(zone),
@@ -998,6 +1011,7 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
998 switch (action) { 1011 switch (action) {
999 case CPU_ONLINE: 1012 case CPU_ONLINE:
1000 case CPU_ONLINE_FROZEN: 1013 case CPU_ONLINE_FROZEN:
1014 refresh_zone_stat_thresholds();
1001 start_cpu_timer(cpu); 1015 start_cpu_timer(cpu);
1002 node_set_state(cpu_to_node(cpu), N_CPU); 1016 node_set_state(cpu_to_node(cpu), N_CPU);
1003 break; 1017 break;