diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/compaction.c | 1 | ||||
-rw-r--r-- | mm/filemap.c | 37 | ||||
-rw-r--r-- | mm/hugetlb.c | 3 | ||||
-rw-r--r-- | mm/ksm.c | 7 | ||||
-rw-r--r-- | mm/memcontrol.c | 101 | ||||
-rw-r--r-- | mm/memory-failure.c | 8 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 31 | ||||
-rw-r--r-- | mm/mempolicy.c | 3 | ||||
-rw-r--r-- | mm/migrate.c | 2 | ||||
-rw-r--r-- | mm/mmap.c | 16 | ||||
-rw-r--r-- | mm/nommu.c | 29 | ||||
-rw-r--r-- | mm/page-writeback.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 33 | ||||
-rw-r--r-- | mm/pagewalk.c | 5 | ||||
-rw-r--r-- | mm/percpu.c | 10 | ||||
-rw-r--r-- | mm/shmem.c | 9 | ||||
-rw-r--r-- | mm/slab.c | 76 | ||||
-rw-r--r-- | mm/slob.c | 5 | ||||
-rw-r--r-- | mm/slub.c | 77 | ||||
-rw-r--r-- | mm/truncate.c | 4 | ||||
-rw-r--r-- | mm/util.c | 21 | ||||
-rw-r--r-- | mm/vmalloc.c | 28 | ||||
-rw-r--r-- | mm/vmscan.c | 9 | ||||
-rw-r--r-- | mm/vmstat.c | 155 |
24 files changed, 394 insertions, 278 deletions
diff --git a/mm/compaction.c b/mm/compaction.c index 4d709ee59013..1a8894eadf72 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -279,7 +279,6 @@ static unsigned long isolate_migratepages(struct zone *zone, | |||
279 | /* Successfully isolated */ | 279 | /* Successfully isolated */ |
280 | del_page_from_lru_list(zone, page, page_lru(page)); | 280 | del_page_from_lru_list(zone, page, page_lru(page)); |
281 | list_add(&page->lru, migratelist); | 281 | list_add(&page->lru, migratelist); |
282 | mem_cgroup_del_lru(page); | ||
283 | cc->nr_migratepages++; | 282 | cc->nr_migratepages++; |
284 | 283 | ||
285 | /* Avoid isolating too much */ | 284 | /* Avoid isolating too much */ |
diff --git a/mm/filemap.c b/mm/filemap.c index 61ba5e405791..ca389394fa2a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -102,9 +102,6 @@ | |||
102 | * ->inode_lock (zap_pte_range->set_page_dirty) | 102 | * ->inode_lock (zap_pte_range->set_page_dirty) |
103 | * ->private_lock (zap_pte_range->__set_page_dirty_buffers) | 103 | * ->private_lock (zap_pte_range->__set_page_dirty_buffers) |
104 | * | 104 | * |
105 | * ->task->proc_lock | ||
106 | * ->dcache_lock (proc_pid_lookup) | ||
107 | * | ||
108 | * (code doesn't rely on that order, so you could switch it around) | 105 | * (code doesn't rely on that order, so you could switch it around) |
109 | * ->tasklist_lock (memory_failure, collect_procs_ao) | 106 | * ->tasklist_lock (memory_failure, collect_procs_ao) |
110 | * ->i_mmap_lock | 107 | * ->i_mmap_lock |
@@ -143,13 +140,18 @@ void __remove_from_page_cache(struct page *page) | |||
143 | void remove_from_page_cache(struct page *page) | 140 | void remove_from_page_cache(struct page *page) |
144 | { | 141 | { |
145 | struct address_space *mapping = page->mapping; | 142 | struct address_space *mapping = page->mapping; |
143 | void (*freepage)(struct page *); | ||
146 | 144 | ||
147 | BUG_ON(!PageLocked(page)); | 145 | BUG_ON(!PageLocked(page)); |
148 | 146 | ||
147 | freepage = mapping->a_ops->freepage; | ||
149 | spin_lock_irq(&mapping->tree_lock); | 148 | spin_lock_irq(&mapping->tree_lock); |
150 | __remove_from_page_cache(page); | 149 | __remove_from_page_cache(page); |
151 | spin_unlock_irq(&mapping->tree_lock); | 150 | spin_unlock_irq(&mapping->tree_lock); |
152 | mem_cgroup_uncharge_cache_page(page); | 151 | mem_cgroup_uncharge_cache_page(page); |
152 | |||
153 | if (freepage) | ||
154 | freepage(page); | ||
153 | } | 155 | } |
154 | EXPORT_SYMBOL(remove_from_page_cache); | 156 | EXPORT_SYMBOL(remove_from_page_cache); |
155 | 157 | ||
@@ -644,7 +646,9 @@ repeat: | |||
644 | pagep = radix_tree_lookup_slot(&mapping->page_tree, offset); | 646 | pagep = radix_tree_lookup_slot(&mapping->page_tree, offset); |
645 | if (pagep) { | 647 | if (pagep) { |
646 | page = radix_tree_deref_slot(pagep); | 648 | page = radix_tree_deref_slot(pagep); |
647 | if (unlikely(!page || page == RADIX_TREE_RETRY)) | 649 | if (unlikely(!page)) |
650 | goto out; | ||
651 | if (radix_tree_deref_retry(page)) | ||
648 | goto repeat; | 652 | goto repeat; |
649 | 653 | ||
650 | if (!page_cache_get_speculative(page)) | 654 | if (!page_cache_get_speculative(page)) |
@@ -660,6 +664,7 @@ repeat: | |||
660 | goto repeat; | 664 | goto repeat; |
661 | } | 665 | } |
662 | } | 666 | } |
667 | out: | ||
663 | rcu_read_unlock(); | 668 | rcu_read_unlock(); |
664 | 669 | ||
665 | return page; | 670 | return page; |
@@ -777,12 +782,11 @@ repeat: | |||
777 | page = radix_tree_deref_slot((void **)pages[i]); | 782 | page = radix_tree_deref_slot((void **)pages[i]); |
778 | if (unlikely(!page)) | 783 | if (unlikely(!page)) |
779 | continue; | 784 | continue; |
780 | /* | 785 | if (radix_tree_deref_retry(page)) { |
781 | * this can only trigger if nr_found == 1, making livelock | 786 | if (ret) |
782 | * a non issue. | 787 | start = pages[ret-1]->index; |
783 | */ | ||
784 | if (unlikely(page == RADIX_TREE_RETRY)) | ||
785 | goto restart; | 788 | goto restart; |
789 | } | ||
786 | 790 | ||
787 | if (!page_cache_get_speculative(page)) | 791 | if (!page_cache_get_speculative(page)) |
788 | goto repeat; | 792 | goto repeat; |
@@ -830,11 +834,7 @@ repeat: | |||
830 | page = radix_tree_deref_slot((void **)pages[i]); | 834 | page = radix_tree_deref_slot((void **)pages[i]); |
831 | if (unlikely(!page)) | 835 | if (unlikely(!page)) |
832 | continue; | 836 | continue; |
833 | /* | 837 | if (radix_tree_deref_retry(page)) |
834 | * this can only trigger if nr_found == 1, making livelock | ||
835 | * a non issue. | ||
836 | */ | ||
837 | if (unlikely(page == RADIX_TREE_RETRY)) | ||
838 | goto restart; | 838 | goto restart; |
839 | 839 | ||
840 | if (page->mapping == NULL || page->index != index) | 840 | if (page->mapping == NULL || page->index != index) |
@@ -887,11 +887,7 @@ repeat: | |||
887 | page = radix_tree_deref_slot((void **)pages[i]); | 887 | page = radix_tree_deref_slot((void **)pages[i]); |
888 | if (unlikely(!page)) | 888 | if (unlikely(!page)) |
889 | continue; | 889 | continue; |
890 | /* | 890 | if (radix_tree_deref_retry(page)) |
891 | * this can only trigger if nr_found == 1, making livelock | ||
892 | * a non issue. | ||
893 | */ | ||
894 | if (unlikely(page == RADIX_TREE_RETRY)) | ||
895 | goto restart; | 891 | goto restart; |
896 | 892 | ||
897 | if (!page_cache_get_speculative(page)) | 893 | if (!page_cache_get_speculative(page)) |
@@ -1029,6 +1025,9 @@ find_page: | |||
1029 | goto page_not_up_to_date; | 1025 | goto page_not_up_to_date; |
1030 | if (!trylock_page(page)) | 1026 | if (!trylock_page(page)) |
1031 | goto page_not_up_to_date; | 1027 | goto page_not_up_to_date; |
1028 | /* Did it get truncated before we got the lock? */ | ||
1029 | if (!page->mapping) | ||
1030 | goto page_not_up_to_date_locked; | ||
1032 | if (!mapping->a_ops->is_partially_uptodate(page, | 1031 | if (!mapping->a_ops->is_partially_uptodate(page, |
1033 | desc, offset)) | 1032 | desc, offset)) |
1034 | goto page_not_up_to_date_locked; | 1033 | goto page_not_up_to_date_locked; |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c4a3558589ab..85855240933d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -2738,7 +2738,8 @@ out_page_table_lock: | |||
2738 | unlock_page(pagecache_page); | 2738 | unlock_page(pagecache_page); |
2739 | put_page(pagecache_page); | 2739 | put_page(pagecache_page); |
2740 | } | 2740 | } |
2741 | unlock_page(page); | 2741 | if (page != pagecache_page) |
2742 | unlock_page(page); | ||
2742 | 2743 | ||
2743 | out_mutex: | 2744 | out_mutex: |
2744 | mutex_unlock(&hugetlb_instantiation_mutex); | 2745 | mutex_unlock(&hugetlb_instantiation_mutex); |
@@ -1724,8 +1724,13 @@ static int ksm_memory_callback(struct notifier_block *self, | |||
1724 | /* | 1724 | /* |
1725 | * Keep it very simple for now: just lock out ksmd and | 1725 | * Keep it very simple for now: just lock out ksmd and |
1726 | * MADV_UNMERGEABLE while any memory is going offline. | 1726 | * MADV_UNMERGEABLE while any memory is going offline. |
1727 | * mutex_lock_nested() is necessary because lockdep was alarmed | ||
1728 | * that here we take ksm_thread_mutex inside notifier chain | ||
1729 | * mutex, and later take notifier chain mutex inside | ||
1730 | * ksm_thread_mutex to unlock it. But that's safe because both | ||
1731 | * are inside mem_hotplug_mutex. | ||
1727 | */ | 1732 | */ |
1728 | mutex_lock(&ksm_thread_mutex); | 1733 | mutex_lock_nested(&ksm_thread_mutex, SINGLE_DEPTH_NESTING); |
1729 | break; | 1734 | break; |
1730 | 1735 | ||
1731 | case MEM_OFFLINE: | 1736 | case MEM_OFFLINE: |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9a99cfaf0a19..00bb8a64d028 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -61,7 +61,14 @@ struct mem_cgroup *root_mem_cgroup __read_mostly; | |||
61 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 61 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
62 | /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ | 62 | /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ |
63 | int do_swap_account __read_mostly; | 63 | int do_swap_account __read_mostly; |
64 | static int really_do_swap_account __initdata = 1; /* for remember boot option*/ | 64 | |
65 | /* for remember boot option*/ | ||
66 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED | ||
67 | static int really_do_swap_account __initdata = 1; | ||
68 | #else | ||
69 | static int really_do_swap_account __initdata = 0; | ||
70 | #endif | ||
71 | |||
65 | #else | 72 | #else |
66 | #define do_swap_account (0) | 73 | #define do_swap_account (0) |
67 | #endif | 74 | #endif |
@@ -278,13 +285,14 @@ enum move_type { | |||
278 | 285 | ||
279 | /* "mc" and its members are protected by cgroup_mutex */ | 286 | /* "mc" and its members are protected by cgroup_mutex */ |
280 | static struct move_charge_struct { | 287 | static struct move_charge_struct { |
281 | spinlock_t lock; /* for from, to, moving_task */ | 288 | spinlock_t lock; /* for from, to */ |
282 | struct mem_cgroup *from; | 289 | struct mem_cgroup *from; |
283 | struct mem_cgroup *to; | 290 | struct mem_cgroup *to; |
284 | unsigned long precharge; | 291 | unsigned long precharge; |
285 | unsigned long moved_charge; | 292 | unsigned long moved_charge; |
286 | unsigned long moved_swap; | 293 | unsigned long moved_swap; |
287 | struct task_struct *moving_task; /* a task moving charges */ | 294 | struct task_struct *moving_task; /* a task moving charges */ |
295 | struct mm_struct *mm; | ||
288 | wait_queue_head_t waitq; /* a waitq for other context */ | 296 | wait_queue_head_t waitq; /* a waitq for other context */ |
289 | } mc = { | 297 | } mc = { |
290 | .lock = __SPIN_LOCK_UNLOCKED(mc.lock), | 298 | .lock = __SPIN_LOCK_UNLOCKED(mc.lock), |
@@ -1917,19 +1925,18 @@ again: | |||
1917 | 1925 | ||
1918 | rcu_read_lock(); | 1926 | rcu_read_lock(); |
1919 | p = rcu_dereference(mm->owner); | 1927 | p = rcu_dereference(mm->owner); |
1920 | VM_BUG_ON(!p); | ||
1921 | /* | 1928 | /* |
1922 | * because we don't have task_lock(), "p" can exit while | 1929 | * Because we don't have task_lock(), "p" can exit. |
1923 | * we're here. In that case, "mem" can point to root | 1930 | * In that case, "mem" can point to root or p can be NULL with |
1924 | * cgroup but never be NULL. (and task_struct itself is freed | 1931 | * race with swapoff. Then, we have small risk of mis-accouning. |
1925 | * by RCU, cgroup itself is RCU safe.) Then, we have small | 1932 | * But such kind of mis-account by race always happens because |
1926 | * risk here to get wrong cgroup. But such kind of mis-account | 1933 | * we don't have cgroup_mutex(). It's overkill and we allo that |
1927 | * by race always happens because we don't have cgroup_mutex(). | 1934 | * small race, here. |
1928 | * It's overkill and we allow that small race, here. | 1935 | * (*) swapoff at el will charge against mm-struct not against |
1936 | * task-struct. So, mm->owner can be NULL. | ||
1929 | */ | 1937 | */ |
1930 | mem = mem_cgroup_from_task(p); | 1938 | mem = mem_cgroup_from_task(p); |
1931 | VM_BUG_ON(!mem); | 1939 | if (!mem || mem_cgroup_is_root(mem)) { |
1932 | if (mem_cgroup_is_root(mem)) { | ||
1933 | rcu_read_unlock(); | 1940 | rcu_read_unlock(); |
1934 | goto done; | 1941 | goto done; |
1935 | } | 1942 | } |
@@ -2152,7 +2159,7 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, | |||
2152 | { | 2159 | { |
2153 | VM_BUG_ON(from == to); | 2160 | VM_BUG_ON(from == to); |
2154 | VM_BUG_ON(PageLRU(pc->page)); | 2161 | VM_BUG_ON(PageLRU(pc->page)); |
2155 | VM_BUG_ON(!PageCgroupLocked(pc)); | 2162 | VM_BUG_ON(!page_is_cgroup_locked(pc)); |
2156 | VM_BUG_ON(!PageCgroupUsed(pc)); | 2163 | VM_BUG_ON(!PageCgroupUsed(pc)); |
2157 | VM_BUG_ON(pc->mem_cgroup != from); | 2164 | VM_BUG_ON(pc->mem_cgroup != from); |
2158 | 2165 | ||
@@ -4208,15 +4215,17 @@ static struct mem_cgroup *mem_cgroup_alloc(void) | |||
4208 | 4215 | ||
4209 | memset(mem, 0, size); | 4216 | memset(mem, 0, size); |
4210 | mem->stat = alloc_percpu(struct mem_cgroup_stat_cpu); | 4217 | mem->stat = alloc_percpu(struct mem_cgroup_stat_cpu); |
4211 | if (!mem->stat) { | 4218 | if (!mem->stat) |
4212 | if (size < PAGE_SIZE) | 4219 | goto out_free; |
4213 | kfree(mem); | ||
4214 | else | ||
4215 | vfree(mem); | ||
4216 | mem = NULL; | ||
4217 | } | ||
4218 | spin_lock_init(&mem->pcp_counter_lock); | 4220 | spin_lock_init(&mem->pcp_counter_lock); |
4219 | return mem; | 4221 | return mem; |
4222 | |||
4223 | out_free: | ||
4224 | if (size < PAGE_SIZE) | ||
4225 | kfree(mem); | ||
4226 | else | ||
4227 | vfree(mem); | ||
4228 | return NULL; | ||
4220 | } | 4229 | } |
4221 | 4230 | ||
4222 | /* | 4231 | /* |
@@ -4629,7 +4638,7 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) | |||
4629 | unsigned long precharge; | 4638 | unsigned long precharge; |
4630 | struct vm_area_struct *vma; | 4639 | struct vm_area_struct *vma; |
4631 | 4640 | ||
4632 | down_read(&mm->mmap_sem); | 4641 | /* We've already held the mmap_sem */ |
4633 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 4642 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
4634 | struct mm_walk mem_cgroup_count_precharge_walk = { | 4643 | struct mm_walk mem_cgroup_count_precharge_walk = { |
4635 | .pmd_entry = mem_cgroup_count_precharge_pte_range, | 4644 | .pmd_entry = mem_cgroup_count_precharge_pte_range, |
@@ -4641,7 +4650,6 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) | |||
4641 | walk_page_range(vma->vm_start, vma->vm_end, | 4650 | walk_page_range(vma->vm_start, vma->vm_end, |
4642 | &mem_cgroup_count_precharge_walk); | 4651 | &mem_cgroup_count_precharge_walk); |
4643 | } | 4652 | } |
4644 | up_read(&mm->mmap_sem); | ||
4645 | 4653 | ||
4646 | precharge = mc.precharge; | 4654 | precharge = mc.precharge; |
4647 | mc.precharge = 0; | 4655 | mc.precharge = 0; |
@@ -4692,11 +4700,16 @@ static void mem_cgroup_clear_mc(void) | |||
4692 | 4700 | ||
4693 | mc.moved_swap = 0; | 4701 | mc.moved_swap = 0; |
4694 | } | 4702 | } |
4703 | if (mc.mm) { | ||
4704 | up_read(&mc.mm->mmap_sem); | ||
4705 | mmput(mc.mm); | ||
4706 | } | ||
4695 | spin_lock(&mc.lock); | 4707 | spin_lock(&mc.lock); |
4696 | mc.from = NULL; | 4708 | mc.from = NULL; |
4697 | mc.to = NULL; | 4709 | mc.to = NULL; |
4698 | mc.moving_task = NULL; | ||
4699 | spin_unlock(&mc.lock); | 4710 | spin_unlock(&mc.lock); |
4711 | mc.moving_task = NULL; | ||
4712 | mc.mm = NULL; | ||
4700 | mem_cgroup_end_move(from); | 4713 | mem_cgroup_end_move(from); |
4701 | memcg_oom_recover(from); | 4714 | memcg_oom_recover(from); |
4702 | memcg_oom_recover(to); | 4715 | memcg_oom_recover(to); |
@@ -4722,12 +4735,21 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, | |||
4722 | return 0; | 4735 | return 0; |
4723 | /* We move charges only when we move a owner of the mm */ | 4736 | /* We move charges only when we move a owner of the mm */ |
4724 | if (mm->owner == p) { | 4737 | if (mm->owner == p) { |
4738 | /* | ||
4739 | * We do all the move charge works under one mmap_sem to | ||
4740 | * avoid deadlock with down_write(&mmap_sem) | ||
4741 | * -> try_charge() -> if (mc.moving_task) -> sleep. | ||
4742 | */ | ||
4743 | down_read(&mm->mmap_sem); | ||
4744 | |||
4725 | VM_BUG_ON(mc.from); | 4745 | VM_BUG_ON(mc.from); |
4726 | VM_BUG_ON(mc.to); | 4746 | VM_BUG_ON(mc.to); |
4727 | VM_BUG_ON(mc.precharge); | 4747 | VM_BUG_ON(mc.precharge); |
4728 | VM_BUG_ON(mc.moved_charge); | 4748 | VM_BUG_ON(mc.moved_charge); |
4729 | VM_BUG_ON(mc.moved_swap); | 4749 | VM_BUG_ON(mc.moved_swap); |
4730 | VM_BUG_ON(mc.moving_task); | 4750 | VM_BUG_ON(mc.moving_task); |
4751 | VM_BUG_ON(mc.mm); | ||
4752 | |||
4731 | mem_cgroup_start_move(from); | 4753 | mem_cgroup_start_move(from); |
4732 | spin_lock(&mc.lock); | 4754 | spin_lock(&mc.lock); |
4733 | mc.from = from; | 4755 | mc.from = from; |
@@ -4735,14 +4757,16 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, | |||
4735 | mc.precharge = 0; | 4757 | mc.precharge = 0; |
4736 | mc.moved_charge = 0; | 4758 | mc.moved_charge = 0; |
4737 | mc.moved_swap = 0; | 4759 | mc.moved_swap = 0; |
4738 | mc.moving_task = current; | ||
4739 | spin_unlock(&mc.lock); | 4760 | spin_unlock(&mc.lock); |
4761 | mc.moving_task = current; | ||
4762 | mc.mm = mm; | ||
4740 | 4763 | ||
4741 | ret = mem_cgroup_precharge_mc(mm); | 4764 | ret = mem_cgroup_precharge_mc(mm); |
4742 | if (ret) | 4765 | if (ret) |
4743 | mem_cgroup_clear_mc(); | 4766 | mem_cgroup_clear_mc(); |
4744 | } | 4767 | /* We call up_read() and mmput() in clear_mc(). */ |
4745 | mmput(mm); | 4768 | } else |
4769 | mmput(mm); | ||
4746 | } | 4770 | } |
4747 | return ret; | 4771 | return ret; |
4748 | } | 4772 | } |
@@ -4830,7 +4854,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) | |||
4830 | struct vm_area_struct *vma; | 4854 | struct vm_area_struct *vma; |
4831 | 4855 | ||
4832 | lru_add_drain_all(); | 4856 | lru_add_drain_all(); |
4833 | down_read(&mm->mmap_sem); | 4857 | /* We've already held the mmap_sem */ |
4834 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 4858 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
4835 | int ret; | 4859 | int ret; |
4836 | struct mm_walk mem_cgroup_move_charge_walk = { | 4860 | struct mm_walk mem_cgroup_move_charge_walk = { |
@@ -4849,7 +4873,6 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) | |||
4849 | */ | 4873 | */ |
4850 | break; | 4874 | break; |
4851 | } | 4875 | } |
4852 | up_read(&mm->mmap_sem); | ||
4853 | } | 4876 | } |
4854 | 4877 | ||
4855 | static void mem_cgroup_move_task(struct cgroup_subsys *ss, | 4878 | static void mem_cgroup_move_task(struct cgroup_subsys *ss, |
@@ -4858,17 +4881,11 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, | |||
4858 | struct task_struct *p, | 4881 | struct task_struct *p, |
4859 | bool threadgroup) | 4882 | bool threadgroup) |
4860 | { | 4883 | { |
4861 | struct mm_struct *mm; | 4884 | if (!mc.mm) |
4862 | |||
4863 | if (!mc.to) | ||
4864 | /* no need to move charge */ | 4885 | /* no need to move charge */ |
4865 | return; | 4886 | return; |
4866 | 4887 | ||
4867 | mm = get_task_mm(p); | 4888 | mem_cgroup_move_charge(mc.mm); |
4868 | if (mm) { | ||
4869 | mem_cgroup_move_charge(mm); | ||
4870 | mmput(mm); | ||
4871 | } | ||
4872 | mem_cgroup_clear_mc(); | 4889 | mem_cgroup_clear_mc(); |
4873 | } | 4890 | } |
4874 | #else /* !CONFIG_MMU */ | 4891 | #else /* !CONFIG_MMU */ |
@@ -4909,10 +4926,20 @@ struct cgroup_subsys mem_cgroup_subsys = { | |||
4909 | }; | 4926 | }; |
4910 | 4927 | ||
4911 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 4928 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
4929 | static int __init enable_swap_account(char *s) | ||
4930 | { | ||
4931 | /* consider enabled if no parameter or 1 is given */ | ||
4932 | if (!s || !strcmp(s, "1")) | ||
4933 | really_do_swap_account = 1; | ||
4934 | else if (!strcmp(s, "0")) | ||
4935 | really_do_swap_account = 0; | ||
4936 | return 1; | ||
4937 | } | ||
4938 | __setup("swapaccount", enable_swap_account); | ||
4912 | 4939 | ||
4913 | static int __init disable_swap_account(char *s) | 4940 | static int __init disable_swap_account(char *s) |
4914 | { | 4941 | { |
4915 | really_do_swap_account = 0; | 4942 | enable_swap_account("0"); |
4916 | return 1; | 4943 | return 1; |
4917 | } | 4944 | } |
4918 | __setup("noswapaccount", disable_swap_account); | 4945 | __setup("noswapaccount", disable_swap_account); |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 124324134ff6..46ab2c044b0e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <linux/slab.h> | 51 | #include <linux/slab.h> |
52 | #include <linux/swapops.h> | 52 | #include <linux/swapops.h> |
53 | #include <linux/hugetlb.h> | 53 | #include <linux/hugetlb.h> |
54 | #include <linux/memory_hotplug.h> | ||
54 | #include "internal.h" | 55 | #include "internal.h" |
55 | 56 | ||
56 | int sysctl_memory_failure_early_kill __read_mostly = 0; | 57 | int sysctl_memory_failure_early_kill __read_mostly = 0; |
@@ -1230,11 +1231,10 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) | |||
1230 | return 1; | 1231 | return 1; |
1231 | 1232 | ||
1232 | /* | 1233 | /* |
1233 | * The lock_system_sleep prevents a race with memory hotplug, | 1234 | * The lock_memory_hotplug prevents a race with memory hotplug. |
1234 | * because the isolation assumes there's only a single user. | ||
1235 | * This is a big hammer, a better would be nicer. | 1235 | * This is a big hammer, a better would be nicer. |
1236 | */ | 1236 | */ |
1237 | lock_system_sleep(); | 1237 | lock_memory_hotplug(); |
1238 | 1238 | ||
1239 | /* | 1239 | /* |
1240 | * Isolate the page, so that it doesn't get reallocated if it | 1240 | * Isolate the page, so that it doesn't get reallocated if it |
@@ -1264,7 +1264,7 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) | |||
1264 | ret = 1; | 1264 | ret = 1; |
1265 | } | 1265 | } |
1266 | unset_migratetype_isolate(p); | 1266 | unset_migratetype_isolate(p); |
1267 | unlock_system_sleep(); | 1267 | unlock_memory_hotplug(); |
1268 | return ret; | 1268 | return ret; |
1269 | } | 1269 | } |
1270 | 1270 | ||
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9260314a221e..2c6523af5473 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -34,6 +34,23 @@ | |||
34 | 34 | ||
35 | #include "internal.h" | 35 | #include "internal.h" |
36 | 36 | ||
37 | DEFINE_MUTEX(mem_hotplug_mutex); | ||
38 | |||
39 | void lock_memory_hotplug(void) | ||
40 | { | ||
41 | mutex_lock(&mem_hotplug_mutex); | ||
42 | |||
43 | /* for exclusive hibernation if CONFIG_HIBERNATION=y */ | ||
44 | lock_system_sleep(); | ||
45 | } | ||
46 | |||
47 | void unlock_memory_hotplug(void) | ||
48 | { | ||
49 | unlock_system_sleep(); | ||
50 | mutex_unlock(&mem_hotplug_mutex); | ||
51 | } | ||
52 | |||
53 | |||
37 | /* add this memory to iomem resource */ | 54 | /* add this memory to iomem resource */ |
38 | static struct resource *register_memory_resource(u64 start, u64 size) | 55 | static struct resource *register_memory_resource(u64 start, u64 size) |
39 | { | 56 | { |
@@ -493,7 +510,7 @@ int mem_online_node(int nid) | |||
493 | pg_data_t *pgdat; | 510 | pg_data_t *pgdat; |
494 | int ret; | 511 | int ret; |
495 | 512 | ||
496 | lock_system_sleep(); | 513 | lock_memory_hotplug(); |
497 | pgdat = hotadd_new_pgdat(nid, 0); | 514 | pgdat = hotadd_new_pgdat(nid, 0); |
498 | if (pgdat) { | 515 | if (pgdat) { |
499 | ret = -ENOMEM; | 516 | ret = -ENOMEM; |
@@ -504,7 +521,7 @@ int mem_online_node(int nid) | |||
504 | BUG_ON(ret); | 521 | BUG_ON(ret); |
505 | 522 | ||
506 | out: | 523 | out: |
507 | unlock_system_sleep(); | 524 | unlock_memory_hotplug(); |
508 | return ret; | 525 | return ret; |
509 | } | 526 | } |
510 | 527 | ||
@@ -516,7 +533,7 @@ int __ref add_memory(int nid, u64 start, u64 size) | |||
516 | struct resource *res; | 533 | struct resource *res; |
517 | int ret; | 534 | int ret; |
518 | 535 | ||
519 | lock_system_sleep(); | 536 | lock_memory_hotplug(); |
520 | 537 | ||
521 | res = register_memory_resource(start, size); | 538 | res = register_memory_resource(start, size); |
522 | ret = -EEXIST; | 539 | ret = -EEXIST; |
@@ -563,7 +580,7 @@ error: | |||
563 | release_memory_resource(res); | 580 | release_memory_resource(res); |
564 | 581 | ||
565 | out: | 582 | out: |
566 | unlock_system_sleep(); | 583 | unlock_memory_hotplug(); |
567 | return ret; | 584 | return ret; |
568 | } | 585 | } |
569 | EXPORT_SYMBOL_GPL(add_memory); | 586 | EXPORT_SYMBOL_GPL(add_memory); |
@@ -791,7 +808,7 @@ static int offline_pages(unsigned long start_pfn, | |||
791 | if (!test_pages_in_a_zone(start_pfn, end_pfn)) | 808 | if (!test_pages_in_a_zone(start_pfn, end_pfn)) |
792 | return -EINVAL; | 809 | return -EINVAL; |
793 | 810 | ||
794 | lock_system_sleep(); | 811 | lock_memory_hotplug(); |
795 | 812 | ||
796 | zone = page_zone(pfn_to_page(start_pfn)); | 813 | zone = page_zone(pfn_to_page(start_pfn)); |
797 | node = zone_to_nid(zone); | 814 | node = zone_to_nid(zone); |
@@ -880,7 +897,7 @@ repeat: | |||
880 | writeback_set_ratelimit(); | 897 | writeback_set_ratelimit(); |
881 | 898 | ||
882 | memory_notify(MEM_OFFLINE, &arg); | 899 | memory_notify(MEM_OFFLINE, &arg); |
883 | unlock_system_sleep(); | 900 | unlock_memory_hotplug(); |
884 | return 0; | 901 | return 0; |
885 | 902 | ||
886 | failed_removal: | 903 | failed_removal: |
@@ -891,7 +908,7 @@ failed_removal: | |||
891 | undo_isolate_page_range(start_pfn, end_pfn); | 908 | undo_isolate_page_range(start_pfn, end_pfn); |
892 | 909 | ||
893 | out: | 910 | out: |
894 | unlock_system_sleep(); | 911 | unlock_memory_hotplug(); |
895 | return ret; | 912 | return ret; |
896 | } | 913 | } |
897 | 914 | ||
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 4a57f135b76e..11ff260fb282 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1307,15 +1307,18 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode, | |||
1307 | goto out; | 1307 | goto out; |
1308 | 1308 | ||
1309 | /* Find the mm_struct */ | 1309 | /* Find the mm_struct */ |
1310 | rcu_read_lock(); | ||
1310 | read_lock(&tasklist_lock); | 1311 | read_lock(&tasklist_lock); |
1311 | task = pid ? find_task_by_vpid(pid) : current; | 1312 | task = pid ? find_task_by_vpid(pid) : current; |
1312 | if (!task) { | 1313 | if (!task) { |
1313 | read_unlock(&tasklist_lock); | 1314 | read_unlock(&tasklist_lock); |
1315 | rcu_read_unlock(); | ||
1314 | err = -ESRCH; | 1316 | err = -ESRCH; |
1315 | goto out; | 1317 | goto out; |
1316 | } | 1318 | } |
1317 | mm = get_task_mm(task); | 1319 | mm = get_task_mm(task); |
1318 | read_unlock(&tasklist_lock); | 1320 | read_unlock(&tasklist_lock); |
1321 | rcu_read_unlock(); | ||
1319 | 1322 | ||
1320 | err = -EINVAL; | 1323 | err = -EINVAL; |
1321 | if (!mm) | 1324 | if (!mm) |
diff --git a/mm/migrate.c b/mm/migrate.c index fe5a3c6a5426..6ae8a66a7045 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -35,6 +35,8 @@ | |||
35 | #include <linux/hugetlb.h> | 35 | #include <linux/hugetlb.h> |
36 | #include <linux/gfp.h> | 36 | #include <linux/gfp.h> |
37 | 37 | ||
38 | #include <asm/tlbflush.h> | ||
39 | |||
38 | #include "internal.h" | 40 | #include "internal.h" |
39 | 41 | ||
40 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) | 42 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) |
@@ -2462,6 +2462,7 @@ int install_special_mapping(struct mm_struct *mm, | |||
2462 | unsigned long addr, unsigned long len, | 2462 | unsigned long addr, unsigned long len, |
2463 | unsigned long vm_flags, struct page **pages) | 2463 | unsigned long vm_flags, struct page **pages) |
2464 | { | 2464 | { |
2465 | int ret; | ||
2465 | struct vm_area_struct *vma; | 2466 | struct vm_area_struct *vma; |
2466 | 2467 | ||
2467 | vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); | 2468 | vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); |
@@ -2479,16 +2480,23 @@ int install_special_mapping(struct mm_struct *mm, | |||
2479 | vma->vm_ops = &special_mapping_vmops; | 2480 | vma->vm_ops = &special_mapping_vmops; |
2480 | vma->vm_private_data = pages; | 2481 | vma->vm_private_data = pages; |
2481 | 2482 | ||
2482 | if (unlikely(insert_vm_struct(mm, vma))) { | 2483 | ret = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1); |
2483 | kmem_cache_free(vm_area_cachep, vma); | 2484 | if (ret) |
2484 | return -ENOMEM; | 2485 | goto out; |
2485 | } | 2486 | |
2487 | ret = insert_vm_struct(mm, vma); | ||
2488 | if (ret) | ||
2489 | goto out; | ||
2486 | 2490 | ||
2487 | mm->total_vm += len >> PAGE_SHIFT; | 2491 | mm->total_vm += len >> PAGE_SHIFT; |
2488 | 2492 | ||
2489 | perf_event_mmap(vma); | 2493 | perf_event_mmap(vma); |
2490 | 2494 | ||
2491 | return 0; | 2495 | return 0; |
2496 | |||
2497 | out: | ||
2498 | kmem_cache_free(vm_area_cachep, vma); | ||
2499 | return ret; | ||
2492 | } | 2500 | } |
2493 | 2501 | ||
2494 | static DEFINE_MUTEX(mm_all_locks_mutex); | 2502 | static DEFINE_MUTEX(mm_all_locks_mutex); |
diff --git a/mm/nommu.c b/mm/nommu.c index 3613517c7592..ef4045d010d5 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -10,7 +10,7 @@ | |||
10 | * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> | 10 | * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> |
11 | * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> | 11 | * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> |
12 | * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> | 12 | * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> |
13 | * Copyright (c) 2007-2009 Paul Mundt <lethal@linux-sh.org> | 13 | * Copyright (c) 2007-2010 Paul Mundt <lethal@linux-sh.org> |
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
@@ -328,6 +328,7 @@ void *vmalloc_node(unsigned long size, int node) | |||
328 | { | 328 | { |
329 | return vmalloc(size); | 329 | return vmalloc(size); |
330 | } | 330 | } |
331 | EXPORT_SYMBOL(vmalloc_node); | ||
331 | 332 | ||
332 | /** | 333 | /** |
333 | * vzalloc_node - allocate memory on a specific node with zero fill | 334 | * vzalloc_node - allocate memory on a specific node with zero fill |
@@ -440,6 +441,31 @@ void __attribute__((weak)) vmalloc_sync_all(void) | |||
440 | { | 441 | { |
441 | } | 442 | } |
442 | 443 | ||
444 | /** | ||
445 | * alloc_vm_area - allocate a range of kernel address space | ||
446 | * @size: size of the area | ||
447 | * | ||
448 | * Returns: NULL on failure, vm_struct on success | ||
449 | * | ||
450 | * This function reserves a range of kernel address space, and | ||
451 | * allocates pagetables to map that range. No actual mappings | ||
452 | * are created. If the kernel address space is not shared | ||
453 | * between processes, it syncs the pagetable across all | ||
454 | * processes. | ||
455 | */ | ||
456 | struct vm_struct *alloc_vm_area(size_t size) | ||
457 | { | ||
458 | BUG(); | ||
459 | return NULL; | ||
460 | } | ||
461 | EXPORT_SYMBOL_GPL(alloc_vm_area); | ||
462 | |||
463 | void free_vm_area(struct vm_struct *area) | ||
464 | { | ||
465 | BUG(); | ||
466 | } | ||
467 | EXPORT_SYMBOL_GPL(free_vm_area); | ||
468 | |||
443 | int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, | 469 | int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, |
444 | struct page *page) | 470 | struct page *page) |
445 | { | 471 | { |
@@ -1717,6 +1743,7 @@ void exit_mmap(struct mm_struct *mm) | |||
1717 | mm->mmap = vma->vm_next; | 1743 | mm->mmap = vma->vm_next; |
1718 | delete_vma_from_mm(vma); | 1744 | delete_vma_from_mm(vma); |
1719 | delete_vma(mm, vma); | 1745 | delete_vma(mm, vma); |
1746 | cond_resched(); | ||
1720 | } | 1747 | } |
1721 | 1748 | ||
1722 | kleave(""); | 1749 | kleave(""); |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index b840afa89761..b4edfe7ce06c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -563,7 +563,7 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
563 | break; /* We've done our duty */ | 563 | break; /* We've done our duty */ |
564 | } | 564 | } |
565 | trace_wbc_balance_dirty_wait(&wbc, bdi); | 565 | trace_wbc_balance_dirty_wait(&wbc, bdi); |
566 | __set_current_state(TASK_INTERRUPTIBLE); | 566 | __set_current_state(TASK_UNINTERRUPTIBLE); |
567 | io_schedule_timeout(pause); | 567 | io_schedule_timeout(pause); |
568 | 568 | ||
569 | /* | 569 | /* |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 07a654486f75..ff7e15872398 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -104,19 +104,24 @@ gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; | |||
104 | * only be modified with pm_mutex held, unless the suspend/hibernate code is | 104 | * only be modified with pm_mutex held, unless the suspend/hibernate code is |
105 | * guaranteed not to run in parallel with that modification). | 105 | * guaranteed not to run in parallel with that modification). |
106 | */ | 106 | */ |
107 | void set_gfp_allowed_mask(gfp_t mask) | 107 | |
108 | static gfp_t saved_gfp_mask; | ||
109 | |||
110 | void pm_restore_gfp_mask(void) | ||
108 | { | 111 | { |
109 | WARN_ON(!mutex_is_locked(&pm_mutex)); | 112 | WARN_ON(!mutex_is_locked(&pm_mutex)); |
110 | gfp_allowed_mask = mask; | 113 | if (saved_gfp_mask) { |
114 | gfp_allowed_mask = saved_gfp_mask; | ||
115 | saved_gfp_mask = 0; | ||
116 | } | ||
111 | } | 117 | } |
112 | 118 | ||
113 | gfp_t clear_gfp_allowed_mask(gfp_t mask) | 119 | void pm_restrict_gfp_mask(void) |
114 | { | 120 | { |
115 | gfp_t ret = gfp_allowed_mask; | ||
116 | |||
117 | WARN_ON(!mutex_is_locked(&pm_mutex)); | 121 | WARN_ON(!mutex_is_locked(&pm_mutex)); |
118 | gfp_allowed_mask &= ~mask; | 122 | WARN_ON(saved_gfp_mask); |
119 | return ret; | 123 | saved_gfp_mask = gfp_allowed_mask; |
124 | gfp_allowed_mask &= ~GFP_IOFS; | ||
120 | } | 125 | } |
121 | #endif /* CONFIG_PM_SLEEP */ | 126 | #endif /* CONFIG_PM_SLEEP */ |
122 | 127 | ||
@@ -3008,14 +3013,6 @@ static __init_refok int __build_all_zonelists(void *data) | |||
3008 | build_zonelist_cache(pgdat); | 3013 | build_zonelist_cache(pgdat); |
3009 | } | 3014 | } |
3010 | 3015 | ||
3011 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
3012 | /* Setup real pagesets for the new zone */ | ||
3013 | if (data) { | ||
3014 | struct zone *zone = data; | ||
3015 | setup_zone_pageset(zone); | ||
3016 | } | ||
3017 | #endif | ||
3018 | |||
3019 | /* | 3016 | /* |
3020 | * Initialize the boot_pagesets that are going to be used | 3017 | * Initialize the boot_pagesets that are going to be used |
3021 | * for bootstrapping processors. The real pagesets for | 3018 | * for bootstrapping processors. The real pagesets for |
@@ -3064,7 +3061,11 @@ void build_all_zonelists(void *data) | |||
3064 | } else { | 3061 | } else { |
3065 | /* we have to stop all cpus to guarantee there is no user | 3062 | /* we have to stop all cpus to guarantee there is no user |
3066 | of zonelist */ | 3063 | of zonelist */ |
3067 | stop_machine(__build_all_zonelists, data, NULL); | 3064 | #ifdef CONFIG_MEMORY_HOTPLUG |
3065 | if (data) | ||
3066 | setup_zone_pageset((struct zone *)data); | ||
3067 | #endif | ||
3068 | stop_machine(__build_all_zonelists, NULL, NULL); | ||
3068 | /* cpuset refresh routine should be here */ | 3069 | /* cpuset refresh routine should be here */ |
3069 | } | 3070 | } |
3070 | vm_total_pages = nr_free_pagecache_pages(); | 3071 | vm_total_pages = nr_free_pagecache_pages(); |
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 8b1a2ce21ee5..38cc58b8b2b0 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c | |||
@@ -139,7 +139,6 @@ int walk_page_range(unsigned long addr, unsigned long end, | |||
139 | pgd_t *pgd; | 139 | pgd_t *pgd; |
140 | unsigned long next; | 140 | unsigned long next; |
141 | int err = 0; | 141 | int err = 0; |
142 | struct vm_area_struct *vma; | ||
143 | 142 | ||
144 | if (addr >= end) | 143 | if (addr >= end) |
145 | return err; | 144 | return err; |
@@ -149,15 +148,17 @@ int walk_page_range(unsigned long addr, unsigned long end, | |||
149 | 148 | ||
150 | pgd = pgd_offset(walk->mm, addr); | 149 | pgd = pgd_offset(walk->mm, addr); |
151 | do { | 150 | do { |
151 | struct vm_area_struct *uninitialized_var(vma); | ||
152 | |||
152 | next = pgd_addr_end(addr, end); | 153 | next = pgd_addr_end(addr, end); |
153 | 154 | ||
155 | #ifdef CONFIG_HUGETLB_PAGE | ||
154 | /* | 156 | /* |
155 | * handle hugetlb vma individually because pagetable walk for | 157 | * handle hugetlb vma individually because pagetable walk for |
156 | * the hugetlb page is dependent on the architecture and | 158 | * the hugetlb page is dependent on the architecture and |
157 | * we can't handled it in the same manner as non-huge pages. | 159 | * we can't handled it in the same manner as non-huge pages. |
158 | */ | 160 | */ |
159 | vma = find_vma(walk->mm, addr); | 161 | vma = find_vma(walk->mm, addr); |
160 | #ifdef CONFIG_HUGETLB_PAGE | ||
161 | if (vma && is_vm_hugetlb_page(vma)) { | 162 | if (vma && is_vm_hugetlb_page(vma)) { |
162 | if (vma->vm_end < next) | 163 | if (vma->vm_end < next) |
163 | next = vma->vm_end; | 164 | next = vma->vm_end; |
diff --git a/mm/percpu.c b/mm/percpu.c index efe816856a9d..3dd4984bdef8 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -293,12 +293,8 @@ static void *pcpu_mem_alloc(size_t size) | |||
293 | 293 | ||
294 | if (size <= PAGE_SIZE) | 294 | if (size <= PAGE_SIZE) |
295 | return kzalloc(size, GFP_KERNEL); | 295 | return kzalloc(size, GFP_KERNEL); |
296 | else { | 296 | else |
297 | void *ptr = vmalloc(size); | 297 | return vzalloc(size); |
298 | if (ptr) | ||
299 | memset(ptr, 0, size); | ||
300 | return ptr; | ||
301 | } | ||
302 | } | 298 | } |
303 | 299 | ||
304 | /** | 300 | /** |
@@ -1268,7 +1264,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, | |||
1268 | 1264 | ||
1269 | /* we're done parsing the input, undefine BUG macro and dump config */ | 1265 | /* we're done parsing the input, undefine BUG macro and dump config */ |
1270 | #undef PCPU_SETUP_BUG_ON | 1266 | #undef PCPU_SETUP_BUG_ON |
1271 | pcpu_dump_alloc_info(KERN_INFO, ai); | 1267 | pcpu_dump_alloc_info(KERN_DEBUG, ai); |
1272 | 1268 | ||
1273 | pcpu_nr_groups = ai->nr_groups; | 1269 | pcpu_nr_groups = ai->nr_groups; |
1274 | pcpu_group_offsets = group_offsets; | 1270 | pcpu_group_offsets = group_offsets; |
diff --git a/mm/shmem.c b/mm/shmem.c index 47fdeeb9d636..5ee67c990602 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -2415,13 +2415,20 @@ static struct inode *shmem_alloc_inode(struct super_block *sb) | |||
2415 | return &p->vfs_inode; | 2415 | return &p->vfs_inode; |
2416 | } | 2416 | } |
2417 | 2417 | ||
2418 | static void shmem_i_callback(struct rcu_head *head) | ||
2419 | { | ||
2420 | struct inode *inode = container_of(head, struct inode, i_rcu); | ||
2421 | INIT_LIST_HEAD(&inode->i_dentry); | ||
2422 | kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); | ||
2423 | } | ||
2424 | |||
2418 | static void shmem_destroy_inode(struct inode *inode) | 2425 | static void shmem_destroy_inode(struct inode *inode) |
2419 | { | 2426 | { |
2420 | if ((inode->i_mode & S_IFMT) == S_IFREG) { | 2427 | if ((inode->i_mode & S_IFMT) == S_IFREG) { |
2421 | /* only struct inode is valid if it's an inline symlink */ | 2428 | /* only struct inode is valid if it's an inline symlink */ |
2422 | mpol_free_shared_policy(&SHMEM_I(inode)->policy); | 2429 | mpol_free_shared_policy(&SHMEM_I(inode)->policy); |
2423 | } | 2430 | } |
2424 | kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); | 2431 | call_rcu(&inode->i_rcu, shmem_i_callback); |
2425 | } | 2432 | } |
2426 | 2433 | ||
2427 | static void init_once(void *foo) | 2434 | static void init_once(void *foo) |
@@ -829,12 +829,12 @@ static void init_reap_node(int cpu) | |||
829 | 829 | ||
830 | static void next_reap_node(void) | 830 | static void next_reap_node(void) |
831 | { | 831 | { |
832 | int node = __get_cpu_var(slab_reap_node); | 832 | int node = __this_cpu_read(slab_reap_node); |
833 | 833 | ||
834 | node = next_node(node, node_online_map); | 834 | node = next_node(node, node_online_map); |
835 | if (unlikely(node >= MAX_NUMNODES)) | 835 | if (unlikely(node >= MAX_NUMNODES)) |
836 | node = first_node(node_online_map); | 836 | node = first_node(node_online_map); |
837 | __get_cpu_var(slab_reap_node) = node; | 837 | __this_cpu_write(slab_reap_node, node); |
838 | } | 838 | } |
839 | 839 | ||
840 | #else | 840 | #else |
@@ -1012,7 +1012,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep, | |||
1012 | */ | 1012 | */ |
1013 | static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) | 1013 | static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) |
1014 | { | 1014 | { |
1015 | int node = __get_cpu_var(slab_reap_node); | 1015 | int node = __this_cpu_read(slab_reap_node); |
1016 | 1016 | ||
1017 | if (l3->alien) { | 1017 | if (l3->alien) { |
1018 | struct array_cache *ac = l3->alien[node]; | 1018 | struct array_cache *ac = l3->alien[node]; |
@@ -1293,7 +1293,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, | |||
1293 | * anything expensive but will only modify reap_work | 1293 | * anything expensive but will only modify reap_work |
1294 | * and reschedule the timer. | 1294 | * and reschedule the timer. |
1295 | */ | 1295 | */ |
1296 | cancel_rearming_delayed_work(&per_cpu(slab_reap_work, cpu)); | 1296 | cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu)); |
1297 | /* Now the cache_reaper is guaranteed to be not running. */ | 1297 | /* Now the cache_reaper is guaranteed to be not running. */ |
1298 | per_cpu(slab_reap_work, cpu).work.func = NULL; | 1298 | per_cpu(slab_reap_work, cpu).work.func = NULL; |
1299 | break; | 1299 | break; |
@@ -2781,7 +2781,7 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, | |||
2781 | /* | 2781 | /* |
2782 | * Map pages beginning at addr to the given cache and slab. This is required | 2782 | * Map pages beginning at addr to the given cache and slab. This is required |
2783 | * for the slab allocator to be able to lookup the cache and slab of a | 2783 | * for the slab allocator to be able to lookup the cache and slab of a |
2784 | * virtual address for kfree, ksize, kmem_ptr_validate, and slab debugging. | 2784 | * virtual address for kfree, ksize, and slab debugging. |
2785 | */ | 2785 | */ |
2786 | static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, | 2786 | static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, |
2787 | void *addr) | 2787 | void *addr) |
@@ -3653,42 +3653,19 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
3653 | EXPORT_SYMBOL(kmem_cache_alloc); | 3653 | EXPORT_SYMBOL(kmem_cache_alloc); |
3654 | 3654 | ||
3655 | #ifdef CONFIG_TRACING | 3655 | #ifdef CONFIG_TRACING |
3656 | void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags) | 3656 | void * |
3657 | kmem_cache_alloc_trace(size_t size, struct kmem_cache *cachep, gfp_t flags) | ||
3657 | { | 3658 | { |
3658 | return __cache_alloc(cachep, flags, __builtin_return_address(0)); | 3659 | void *ret; |
3659 | } | ||
3660 | EXPORT_SYMBOL(kmem_cache_alloc_notrace); | ||
3661 | #endif | ||
3662 | 3660 | ||
3663 | /** | 3661 | ret = __cache_alloc(cachep, flags, __builtin_return_address(0)); |
3664 | * kmem_ptr_validate - check if an untrusted pointer might be a slab entry. | ||
3665 | * @cachep: the cache we're checking against | ||
3666 | * @ptr: pointer to validate | ||
3667 | * | ||
3668 | * This verifies that the untrusted pointer looks sane; | ||
3669 | * it is _not_ a guarantee that the pointer is actually | ||
3670 | * part of the slab cache in question, but it at least | ||
3671 | * validates that the pointer can be dereferenced and | ||
3672 | * looks half-way sane. | ||
3673 | * | ||
3674 | * Currently only used for dentry validation. | ||
3675 | */ | ||
3676 | int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr) | ||
3677 | { | ||
3678 | unsigned long size = cachep->buffer_size; | ||
3679 | struct page *page; | ||
3680 | 3662 | ||
3681 | if (unlikely(!kern_ptr_validate(ptr, size))) | 3663 | trace_kmalloc(_RET_IP_, ret, |
3682 | goto out; | 3664 | size, slab_buffer_size(cachep), flags); |
3683 | page = virt_to_page(ptr); | 3665 | return ret; |
3684 | if (unlikely(!PageSlab(page))) | ||
3685 | goto out; | ||
3686 | if (unlikely(page_get_cache(page) != cachep)) | ||
3687 | goto out; | ||
3688 | return 1; | ||
3689 | out: | ||
3690 | return 0; | ||
3691 | } | 3666 | } |
3667 | EXPORT_SYMBOL(kmem_cache_alloc_trace); | ||
3668 | #endif | ||
3692 | 3669 | ||
3693 | #ifdef CONFIG_NUMA | 3670 | #ifdef CONFIG_NUMA |
3694 | void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) | 3671 | void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) |
@@ -3705,31 +3682,32 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
3705 | EXPORT_SYMBOL(kmem_cache_alloc_node); | 3682 | EXPORT_SYMBOL(kmem_cache_alloc_node); |
3706 | 3683 | ||
3707 | #ifdef CONFIG_TRACING | 3684 | #ifdef CONFIG_TRACING |
3708 | void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep, | 3685 | void *kmem_cache_alloc_node_trace(size_t size, |
3709 | gfp_t flags, | 3686 | struct kmem_cache *cachep, |
3710 | int nodeid) | 3687 | gfp_t flags, |
3688 | int nodeid) | ||
3711 | { | 3689 | { |
3712 | return __cache_alloc_node(cachep, flags, nodeid, | 3690 | void *ret; |
3691 | |||
3692 | ret = __cache_alloc_node(cachep, flags, nodeid, | ||
3713 | __builtin_return_address(0)); | 3693 | __builtin_return_address(0)); |
3694 | trace_kmalloc_node(_RET_IP_, ret, | ||
3695 | size, slab_buffer_size(cachep), | ||
3696 | flags, nodeid); | ||
3697 | return ret; | ||
3714 | } | 3698 | } |
3715 | EXPORT_SYMBOL(kmem_cache_alloc_node_notrace); | 3699 | EXPORT_SYMBOL(kmem_cache_alloc_node_trace); |
3716 | #endif | 3700 | #endif |
3717 | 3701 | ||
3718 | static __always_inline void * | 3702 | static __always_inline void * |
3719 | __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller) | 3703 | __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller) |
3720 | { | 3704 | { |
3721 | struct kmem_cache *cachep; | 3705 | struct kmem_cache *cachep; |
3722 | void *ret; | ||
3723 | 3706 | ||
3724 | cachep = kmem_find_general_cachep(size, flags); | 3707 | cachep = kmem_find_general_cachep(size, flags); |
3725 | if (unlikely(ZERO_OR_NULL_PTR(cachep))) | 3708 | if (unlikely(ZERO_OR_NULL_PTR(cachep))) |
3726 | return cachep; | 3709 | return cachep; |
3727 | ret = kmem_cache_alloc_node_notrace(cachep, flags, node); | 3710 | return kmem_cache_alloc_node_trace(size, cachep, flags, node); |
3728 | |||
3729 | trace_kmalloc_node((unsigned long) caller, ret, | ||
3730 | size, cachep->buffer_size, flags, node); | ||
3731 | |||
3732 | return ret; | ||
3733 | } | 3711 | } |
3734 | 3712 | ||
3735 | #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING) | 3713 | #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING) |
@@ -678,11 +678,6 @@ int kmem_cache_shrink(struct kmem_cache *d) | |||
678 | } | 678 | } |
679 | EXPORT_SYMBOL(kmem_cache_shrink); | 679 | EXPORT_SYMBOL(kmem_cache_shrink); |
680 | 680 | ||
681 | int kmem_ptr_validate(struct kmem_cache *a, const void *b) | ||
682 | { | ||
683 | return 0; | ||
684 | } | ||
685 | |||
686 | static unsigned int slob_ready __read_mostly; | 681 | static unsigned int slob_ready __read_mostly; |
687 | 682 | ||
688 | int slab_is_available(void) | 683 | int slab_is_available(void) |
@@ -28,6 +28,8 @@ | |||
28 | #include <linux/math64.h> | 28 | #include <linux/math64.h> |
29 | #include <linux/fault-inject.h> | 29 | #include <linux/fault-inject.h> |
30 | 30 | ||
31 | #include <trace/events/kmem.h> | ||
32 | |||
31 | /* | 33 | /* |
32 | * Lock order: | 34 | * Lock order: |
33 | * 1. slab_lock(page) | 35 | * 1. slab_lock(page) |
@@ -1774,11 +1776,21 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) | |||
1774 | EXPORT_SYMBOL(kmem_cache_alloc); | 1776 | EXPORT_SYMBOL(kmem_cache_alloc); |
1775 | 1777 | ||
1776 | #ifdef CONFIG_TRACING | 1778 | #ifdef CONFIG_TRACING |
1777 | void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags) | 1779 | void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) |
1780 | { | ||
1781 | void *ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); | ||
1782 | trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags); | ||
1783 | return ret; | ||
1784 | } | ||
1785 | EXPORT_SYMBOL(kmem_cache_alloc_trace); | ||
1786 | |||
1787 | void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) | ||
1778 | { | 1788 | { |
1779 | return slab_alloc(s, gfpflags, NUMA_NO_NODE, _RET_IP_); | 1789 | void *ret = kmalloc_order(size, flags, order); |
1790 | trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags); | ||
1791 | return ret; | ||
1780 | } | 1792 | } |
1781 | EXPORT_SYMBOL(kmem_cache_alloc_notrace); | 1793 | EXPORT_SYMBOL(kmalloc_order_trace); |
1782 | #endif | 1794 | #endif |
1783 | 1795 | ||
1784 | #ifdef CONFIG_NUMA | 1796 | #ifdef CONFIG_NUMA |
@@ -1794,13 +1806,17 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) | |||
1794 | EXPORT_SYMBOL(kmem_cache_alloc_node); | 1806 | EXPORT_SYMBOL(kmem_cache_alloc_node); |
1795 | 1807 | ||
1796 | #ifdef CONFIG_TRACING | 1808 | #ifdef CONFIG_TRACING |
1797 | void *kmem_cache_alloc_node_notrace(struct kmem_cache *s, | 1809 | void *kmem_cache_alloc_node_trace(struct kmem_cache *s, |
1798 | gfp_t gfpflags, | 1810 | gfp_t gfpflags, |
1799 | int node) | 1811 | int node, size_t size) |
1800 | { | 1812 | { |
1801 | return slab_alloc(s, gfpflags, node, _RET_IP_); | 1813 | void *ret = slab_alloc(s, gfpflags, node, _RET_IP_); |
1814 | |||
1815 | trace_kmalloc_node(_RET_IP_, ret, | ||
1816 | size, s->size, gfpflags, node); | ||
1817 | return ret; | ||
1802 | } | 1818 | } |
1803 | EXPORT_SYMBOL(kmem_cache_alloc_node_notrace); | 1819 | EXPORT_SYMBOL(kmem_cache_alloc_node_trace); |
1804 | #endif | 1820 | #endif |
1805 | #endif | 1821 | #endif |
1806 | 1822 | ||
@@ -1917,17 +1933,6 @@ void kmem_cache_free(struct kmem_cache *s, void *x) | |||
1917 | } | 1933 | } |
1918 | EXPORT_SYMBOL(kmem_cache_free); | 1934 | EXPORT_SYMBOL(kmem_cache_free); |
1919 | 1935 | ||
1920 | /* Figure out on which slab page the object resides */ | ||
1921 | static struct page *get_object_page(const void *x) | ||
1922 | { | ||
1923 | struct page *page = virt_to_head_page(x); | ||
1924 | |||
1925 | if (!PageSlab(page)) | ||
1926 | return NULL; | ||
1927 | |||
1928 | return page; | ||
1929 | } | ||
1930 | |||
1931 | /* | 1936 | /* |
1932 | * Object placement in a slab is made very easy because we always start at | 1937 | * Object placement in a slab is made very easy because we always start at |
1933 | * offset 0. If we tune the size of the object to the alignment then we can | 1938 | * offset 0. If we tune the size of the object to the alignment then we can |
@@ -2386,35 +2391,6 @@ error: | |||
2386 | } | 2391 | } |
2387 | 2392 | ||
2388 | /* | 2393 | /* |
2389 | * Check if a given pointer is valid | ||
2390 | */ | ||
2391 | int kmem_ptr_validate(struct kmem_cache *s, const void *object) | ||
2392 | { | ||
2393 | struct page *page; | ||
2394 | |||
2395 | if (!kern_ptr_validate(object, s->size)) | ||
2396 | return 0; | ||
2397 | |||
2398 | page = get_object_page(object); | ||
2399 | |||
2400 | if (!page || s != page->slab) | ||
2401 | /* No slab or wrong slab */ | ||
2402 | return 0; | ||
2403 | |||
2404 | if (!check_valid_pointer(s, page, object)) | ||
2405 | return 0; | ||
2406 | |||
2407 | /* | ||
2408 | * We could also check if the object is on the slabs freelist. | ||
2409 | * But this would be too expensive and it seems that the main | ||
2410 | * purpose of kmem_ptr_valid() is to check if the object belongs | ||
2411 | * to a certain slab. | ||
2412 | */ | ||
2413 | return 1; | ||
2414 | } | ||
2415 | EXPORT_SYMBOL(kmem_ptr_validate); | ||
2416 | |||
2417 | /* | ||
2418 | * Determine the size of a slab object | 2394 | * Determine the size of a slab object |
2419 | */ | 2395 | */ |
2420 | unsigned int kmem_cache_size(struct kmem_cache *s) | 2396 | unsigned int kmem_cache_size(struct kmem_cache *s) |
@@ -3273,9 +3249,9 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, | |||
3273 | kfree(n); | 3249 | kfree(n); |
3274 | kfree(s); | 3250 | kfree(s); |
3275 | } | 3251 | } |
3252 | err: | ||
3276 | up_write(&slub_lock); | 3253 | up_write(&slub_lock); |
3277 | 3254 | ||
3278 | err: | ||
3279 | if (flags & SLAB_PANIC) | 3255 | if (flags & SLAB_PANIC) |
3280 | panic("Cannot create slabcache %s\n", name); | 3256 | panic("Cannot create slabcache %s\n", name); |
3281 | else | 3257 | else |
@@ -3401,13 +3377,13 @@ static int validate_slab(struct kmem_cache *s, struct page *page, | |||
3401 | 3377 | ||
3402 | for_each_free_object(p, s, page->freelist) { | 3378 | for_each_free_object(p, s, page->freelist) { |
3403 | set_bit(slab_index(p, s, addr), map); | 3379 | set_bit(slab_index(p, s, addr), map); |
3404 | if (!check_object(s, page, p, 0)) | 3380 | if (!check_object(s, page, p, SLUB_RED_INACTIVE)) |
3405 | return 0; | 3381 | return 0; |
3406 | } | 3382 | } |
3407 | 3383 | ||
3408 | for_each_object(p, s, addr, page->objects) | 3384 | for_each_object(p, s, addr, page->objects) |
3409 | if (!test_bit(slab_index(p, s, addr), map)) | 3385 | if (!test_bit(slab_index(p, s, addr), map)) |
3410 | if (!check_object(s, page, p, 1)) | 3386 | if (!check_object(s, page, p, SLUB_RED_ACTIVE)) |
3411 | return 0; | 3387 | return 0; |
3412 | return 1; | 3388 | return 1; |
3413 | } | 3389 | } |
@@ -3862,6 +3838,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s, | |||
3862 | x += sprintf(buf + x, " N%d=%lu", | 3838 | x += sprintf(buf + x, " N%d=%lu", |
3863 | node, nodes[node]); | 3839 | node, nodes[node]); |
3864 | #endif | 3840 | #endif |
3841 | up_read(&slub_lock); | ||
3865 | kfree(nodes); | 3842 | kfree(nodes); |
3866 | return x + sprintf(buf + x, "\n"); | 3843 | return x + sprintf(buf + x, "\n"); |
3867 | } | 3844 | } |
diff --git a/mm/truncate.c b/mm/truncate.c index ba887bff48c5..3c2d5ddfa0d4 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -390,6 +390,10 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) | |||
390 | __remove_from_page_cache(page); | 390 | __remove_from_page_cache(page); |
391 | spin_unlock_irq(&mapping->tree_lock); | 391 | spin_unlock_irq(&mapping->tree_lock); |
392 | mem_cgroup_uncharge_cache_page(page); | 392 | mem_cgroup_uncharge_cache_page(page); |
393 | |||
394 | if (mapping->a_ops->freepage) | ||
395 | mapping->a_ops->freepage(page); | ||
396 | |||
393 | page_cache_release(page); /* pagecache ref */ | 397 | page_cache_release(page); /* pagecache ref */ |
394 | return 1; | 398 | return 1; |
395 | failed: | 399 | failed: |
@@ -186,27 +186,6 @@ void kzfree(const void *p) | |||
186 | } | 186 | } |
187 | EXPORT_SYMBOL(kzfree); | 187 | EXPORT_SYMBOL(kzfree); |
188 | 188 | ||
189 | int kern_ptr_validate(const void *ptr, unsigned long size) | ||
190 | { | ||
191 | unsigned long addr = (unsigned long)ptr; | ||
192 | unsigned long min_addr = PAGE_OFFSET; | ||
193 | unsigned long align_mask = sizeof(void *) - 1; | ||
194 | |||
195 | if (unlikely(addr < min_addr)) | ||
196 | goto out; | ||
197 | if (unlikely(addr > (unsigned long)high_memory - size)) | ||
198 | goto out; | ||
199 | if (unlikely(addr & align_mask)) | ||
200 | goto out; | ||
201 | if (unlikely(!kern_addr_valid(addr))) | ||
202 | goto out; | ||
203 | if (unlikely(!kern_addr_valid(addr + size - 1))) | ||
204 | goto out; | ||
205 | return 1; | ||
206 | out: | ||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | /* | 189 | /* |
211 | * strndup_user - duplicate an existing string from user space | 190 | * strndup_user - duplicate an existing string from user space |
212 | * @s: The string to duplicate | 191 | * @s: The string to duplicate |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index a3d66b3dc5cb..eb5cc7d00c5a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -31,8 +31,6 @@ | |||
31 | #include <asm/tlbflush.h> | 31 | #include <asm/tlbflush.h> |
32 | #include <asm/shmparam.h> | 32 | #include <asm/shmparam.h> |
33 | 33 | ||
34 | bool vmap_lazy_unmap __read_mostly = true; | ||
35 | |||
36 | /*** Page table manipulation functions ***/ | 34 | /*** Page table manipulation functions ***/ |
37 | 35 | ||
38 | static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) | 36 | static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) |
@@ -503,9 +501,6 @@ static unsigned long lazy_max_pages(void) | |||
503 | { | 501 | { |
504 | unsigned int log; | 502 | unsigned int log; |
505 | 503 | ||
506 | if (!vmap_lazy_unmap) | ||
507 | return 0; | ||
508 | |||
509 | log = fls(num_online_cpus()); | 504 | log = fls(num_online_cpus()); |
510 | 505 | ||
511 | return log * (32UL * 1024 * 1024 / PAGE_SIZE); | 506 | return log * (32UL * 1024 * 1024 / PAGE_SIZE); |
@@ -566,7 +561,6 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, | |||
566 | if (va->va_end > *end) | 561 | if (va->va_end > *end) |
567 | *end = va->va_end; | 562 | *end = va->va_end; |
568 | nr += (va->va_end - va->va_start) >> PAGE_SHIFT; | 563 | nr += (va->va_end - va->va_start) >> PAGE_SHIFT; |
569 | unmap_vmap_area(va); | ||
570 | list_add_tail(&va->purge_list, &valist); | 564 | list_add_tail(&va->purge_list, &valist); |
571 | va->flags |= VM_LAZY_FREEING; | 565 | va->flags |= VM_LAZY_FREEING; |
572 | va->flags &= ~VM_LAZY_FREE; | 566 | va->flags &= ~VM_LAZY_FREE; |
@@ -611,10 +605,11 @@ static void purge_vmap_area_lazy(void) | |||
611 | } | 605 | } |
612 | 606 | ||
613 | /* | 607 | /* |
614 | * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been | 608 | * Free a vmap area, caller ensuring that the area has been unmapped |
615 | * called for the correct range previously. | 609 | * and flush_cache_vunmap had been called for the correct range |
610 | * previously. | ||
616 | */ | 611 | */ |
617 | static void free_unmap_vmap_area_noflush(struct vmap_area *va) | 612 | static void free_vmap_area_noflush(struct vmap_area *va) |
618 | { | 613 | { |
619 | va->flags |= VM_LAZY_FREE; | 614 | va->flags |= VM_LAZY_FREE; |
620 | atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); | 615 | atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); |
@@ -623,6 +618,16 @@ static void free_unmap_vmap_area_noflush(struct vmap_area *va) | |||
623 | } | 618 | } |
624 | 619 | ||
625 | /* | 620 | /* |
621 | * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been | ||
622 | * called for the correct range previously. | ||
623 | */ | ||
624 | static void free_unmap_vmap_area_noflush(struct vmap_area *va) | ||
625 | { | ||
626 | unmap_vmap_area(va); | ||
627 | free_vmap_area_noflush(va); | ||
628 | } | ||
629 | |||
630 | /* | ||
626 | * Free and unmap a vmap area | 631 | * Free and unmap a vmap area |
627 | */ | 632 | */ |
628 | static void free_unmap_vmap_area(struct vmap_area *va) | 633 | static void free_unmap_vmap_area(struct vmap_area *va) |
@@ -798,7 +803,7 @@ static void free_vmap_block(struct vmap_block *vb) | |||
798 | spin_unlock(&vmap_block_tree_lock); | 803 | spin_unlock(&vmap_block_tree_lock); |
799 | BUG_ON(tmp != vb); | 804 | BUG_ON(tmp != vb); |
800 | 805 | ||
801 | free_unmap_vmap_area_noflush(vb->va); | 806 | free_vmap_area_noflush(vb->va); |
802 | call_rcu(&vb->rcu_head, rcu_free_vb); | 807 | call_rcu(&vb->rcu_head, rcu_free_vb); |
803 | } | 808 | } |
804 | 809 | ||
@@ -936,6 +941,8 @@ static void vb_free(const void *addr, unsigned long size) | |||
936 | rcu_read_unlock(); | 941 | rcu_read_unlock(); |
937 | BUG_ON(!vb); | 942 | BUG_ON(!vb); |
938 | 943 | ||
944 | vunmap_page_range((unsigned long)addr, (unsigned long)addr + size); | ||
945 | |||
939 | spin_lock(&vb->lock); | 946 | spin_lock(&vb->lock); |
940 | BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order)); | 947 | BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order)); |
941 | 948 | ||
@@ -988,7 +995,6 @@ void vm_unmap_aliases(void) | |||
988 | 995 | ||
989 | s = vb->va->va_start + (i << PAGE_SHIFT); | 996 | s = vb->va->va_start + (i << PAGE_SHIFT); |
990 | e = vb->va->va_start + (j << PAGE_SHIFT); | 997 | e = vb->va->va_start + (j << PAGE_SHIFT); |
991 | vunmap_page_range(s, e); | ||
992 | flush = 1; | 998 | flush = 1; |
993 | 999 | ||
994 | if (s < start) | 1000 | if (s < start) |
diff --git a/mm/vmscan.c b/mm/vmscan.c index b8a6fdc21312..9ca587c69274 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -494,9 +494,16 @@ static int __remove_mapping(struct address_space *mapping, struct page *page) | |||
494 | spin_unlock_irq(&mapping->tree_lock); | 494 | spin_unlock_irq(&mapping->tree_lock); |
495 | swapcache_free(swap, page); | 495 | swapcache_free(swap, page); |
496 | } else { | 496 | } else { |
497 | void (*freepage)(struct page *); | ||
498 | |||
499 | freepage = mapping->a_ops->freepage; | ||
500 | |||
497 | __remove_from_page_cache(page); | 501 | __remove_from_page_cache(page); |
498 | spin_unlock_irq(&mapping->tree_lock); | 502 | spin_unlock_irq(&mapping->tree_lock); |
499 | mem_cgroup_uncharge_cache_page(page); | 503 | mem_cgroup_uncharge_cache_page(page); |
504 | |||
505 | if (freepage != NULL) | ||
506 | freepage(page); | ||
500 | } | 507 | } |
501 | 508 | ||
502 | return 1; | 509 | return 1; |
@@ -913,7 +920,7 @@ keep_lumpy: | |||
913 | * back off and wait for congestion to clear because further reclaim | 920 | * back off and wait for congestion to clear because further reclaim |
914 | * will encounter the same problem | 921 | * will encounter the same problem |
915 | */ | 922 | */ |
916 | if (nr_dirty == nr_congested) | 923 | if (nr_dirty == nr_congested && nr_dirty != 0) |
917 | zone_set_flag(zone, ZONE_CONGESTED); | 924 | zone_set_flag(zone, ZONE_CONGESTED); |
918 | 925 | ||
919 | free_page_list(&free_pages); | 926 | free_page_list(&free_pages); |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 42eac4d33216..312d728976f1 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -167,36 +167,24 @@ static void refresh_zone_stat_thresholds(void) | |||
167 | void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | 167 | void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, |
168 | int delta) | 168 | int delta) |
169 | { | 169 | { |
170 | struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); | 170 | struct per_cpu_pageset __percpu *pcp = zone->pageset; |
171 | 171 | s8 __percpu *p = pcp->vm_stat_diff + item; | |
172 | s8 *p = pcp->vm_stat_diff + item; | ||
173 | long x; | 172 | long x; |
173 | long t; | ||
174 | |||
175 | x = delta + __this_cpu_read(*p); | ||
174 | 176 | ||
175 | x = delta + *p; | 177 | t = __this_cpu_read(pcp->stat_threshold); |
176 | 178 | ||
177 | if (unlikely(x > pcp->stat_threshold || x < -pcp->stat_threshold)) { | 179 | if (unlikely(x > t || x < -t)) { |
178 | zone_page_state_add(x, zone, item); | 180 | zone_page_state_add(x, zone, item); |
179 | x = 0; | 181 | x = 0; |
180 | } | 182 | } |
181 | *p = x; | 183 | __this_cpu_write(*p, x); |
182 | } | 184 | } |
183 | EXPORT_SYMBOL(__mod_zone_page_state); | 185 | EXPORT_SYMBOL(__mod_zone_page_state); |
184 | 186 | ||
185 | /* | 187 | /* |
186 | * For an unknown interrupt state | ||
187 | */ | ||
188 | void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | ||
189 | int delta) | ||
190 | { | ||
191 | unsigned long flags; | ||
192 | |||
193 | local_irq_save(flags); | ||
194 | __mod_zone_page_state(zone, item, delta); | ||
195 | local_irq_restore(flags); | ||
196 | } | ||
197 | EXPORT_SYMBOL(mod_zone_page_state); | ||
198 | |||
199 | /* | ||
200 | * Optimized increment and decrement functions. | 188 | * Optimized increment and decrement functions. |
201 | * | 189 | * |
202 | * These are only for a single page and therefore can take a struct page * | 190 | * These are only for a single page and therefore can take a struct page * |
@@ -221,16 +209,17 @@ EXPORT_SYMBOL(mod_zone_page_state); | |||
221 | */ | 209 | */ |
222 | void __inc_zone_state(struct zone *zone, enum zone_stat_item item) | 210 | void __inc_zone_state(struct zone *zone, enum zone_stat_item item) |
223 | { | 211 | { |
224 | struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); | 212 | struct per_cpu_pageset __percpu *pcp = zone->pageset; |
225 | s8 *p = pcp->vm_stat_diff + item; | 213 | s8 __percpu *p = pcp->vm_stat_diff + item; |
226 | 214 | s8 v, t; | |
227 | (*p)++; | ||
228 | 215 | ||
229 | if (unlikely(*p > pcp->stat_threshold)) { | 216 | v = __this_cpu_inc_return(*p); |
230 | int overstep = pcp->stat_threshold / 2; | 217 | t = __this_cpu_read(pcp->stat_threshold); |
218 | if (unlikely(v > t)) { | ||
219 | s8 overstep = t >> 1; | ||
231 | 220 | ||
232 | zone_page_state_add(*p + overstep, zone, item); | 221 | zone_page_state_add(v + overstep, zone, item); |
233 | *p = -overstep; | 222 | __this_cpu_write(*p, -overstep); |
234 | } | 223 | } |
235 | } | 224 | } |
236 | 225 | ||
@@ -242,16 +231,17 @@ EXPORT_SYMBOL(__inc_zone_page_state); | |||
242 | 231 | ||
243 | void __dec_zone_state(struct zone *zone, enum zone_stat_item item) | 232 | void __dec_zone_state(struct zone *zone, enum zone_stat_item item) |
244 | { | 233 | { |
245 | struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); | 234 | struct per_cpu_pageset __percpu *pcp = zone->pageset; |
246 | s8 *p = pcp->vm_stat_diff + item; | 235 | s8 __percpu *p = pcp->vm_stat_diff + item; |
236 | s8 v, t; | ||
247 | 237 | ||
248 | (*p)--; | 238 | v = __this_cpu_dec_return(*p); |
239 | t = __this_cpu_read(pcp->stat_threshold); | ||
240 | if (unlikely(v < - t)) { | ||
241 | s8 overstep = t >> 1; | ||
249 | 242 | ||
250 | if (unlikely(*p < - pcp->stat_threshold)) { | 243 | zone_page_state_add(v - overstep, zone, item); |
251 | int overstep = pcp->stat_threshold / 2; | 244 | __this_cpu_write(*p, overstep); |
252 | |||
253 | zone_page_state_add(*p - overstep, zone, item); | ||
254 | *p = overstep; | ||
255 | } | 245 | } |
256 | } | 246 | } |
257 | 247 | ||
@@ -261,6 +251,92 @@ void __dec_zone_page_state(struct page *page, enum zone_stat_item item) | |||
261 | } | 251 | } |
262 | EXPORT_SYMBOL(__dec_zone_page_state); | 252 | EXPORT_SYMBOL(__dec_zone_page_state); |
263 | 253 | ||
254 | #ifdef CONFIG_CMPXCHG_LOCAL | ||
255 | /* | ||
256 | * If we have cmpxchg_local support then we do not need to incur the overhead | ||
257 | * that comes with local_irq_save/restore if we use this_cpu_cmpxchg. | ||
258 | * | ||
259 | * mod_state() modifies the zone counter state through atomic per cpu | ||
260 | * operations. | ||
261 | * | ||
262 | * Overstep mode specifies how overstep should handled: | ||
263 | * 0 No overstepping | ||
264 | * 1 Overstepping half of threshold | ||
265 | * -1 Overstepping minus half of threshold | ||
266 | */ | ||
267 | static inline void mod_state(struct zone *zone, | ||
268 | enum zone_stat_item item, int delta, int overstep_mode) | ||
269 | { | ||
270 | struct per_cpu_pageset __percpu *pcp = zone->pageset; | ||
271 | s8 __percpu *p = pcp->vm_stat_diff + item; | ||
272 | long o, n, t, z; | ||
273 | |||
274 | do { | ||
275 | z = 0; /* overflow to zone counters */ | ||
276 | |||
277 | /* | ||
278 | * The fetching of the stat_threshold is racy. We may apply | ||
279 | * a counter threshold to the wrong the cpu if we get | ||
280 | * rescheduled while executing here. However, the following | ||
281 | * will apply the threshold again and therefore bring the | ||
282 | * counter under the threshold. | ||
283 | */ | ||
284 | t = this_cpu_read(pcp->stat_threshold); | ||
285 | |||
286 | o = this_cpu_read(*p); | ||
287 | n = delta + o; | ||
288 | |||
289 | if (n > t || n < -t) { | ||
290 | int os = overstep_mode * (t >> 1) ; | ||
291 | |||
292 | /* Overflow must be added to zone counters */ | ||
293 | z = n + os; | ||
294 | n = -os; | ||
295 | } | ||
296 | } while (this_cpu_cmpxchg(*p, o, n) != o); | ||
297 | |||
298 | if (z) | ||
299 | zone_page_state_add(z, zone, item); | ||
300 | } | ||
301 | |||
302 | void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | ||
303 | int delta) | ||
304 | { | ||
305 | mod_state(zone, item, delta, 0); | ||
306 | } | ||
307 | EXPORT_SYMBOL(mod_zone_page_state); | ||
308 | |||
309 | void inc_zone_state(struct zone *zone, enum zone_stat_item item) | ||
310 | { | ||
311 | mod_state(zone, item, 1, 1); | ||
312 | } | ||
313 | |||
314 | void inc_zone_page_state(struct page *page, enum zone_stat_item item) | ||
315 | { | ||
316 | mod_state(page_zone(page), item, 1, 1); | ||
317 | } | ||
318 | EXPORT_SYMBOL(inc_zone_page_state); | ||
319 | |||
320 | void dec_zone_page_state(struct page *page, enum zone_stat_item item) | ||
321 | { | ||
322 | mod_state(page_zone(page), item, -1, -1); | ||
323 | } | ||
324 | EXPORT_SYMBOL(dec_zone_page_state); | ||
325 | #else | ||
326 | /* | ||
327 | * Use interrupt disable to serialize counter updates | ||
328 | */ | ||
329 | void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | ||
330 | int delta) | ||
331 | { | ||
332 | unsigned long flags; | ||
333 | |||
334 | local_irq_save(flags); | ||
335 | __mod_zone_page_state(zone, item, delta); | ||
336 | local_irq_restore(flags); | ||
337 | } | ||
338 | EXPORT_SYMBOL(mod_zone_page_state); | ||
339 | |||
264 | void inc_zone_state(struct zone *zone, enum zone_stat_item item) | 340 | void inc_zone_state(struct zone *zone, enum zone_stat_item item) |
265 | { | 341 | { |
266 | unsigned long flags; | 342 | unsigned long flags; |
@@ -291,6 +367,7 @@ void dec_zone_page_state(struct page *page, enum zone_stat_item item) | |||
291 | local_irq_restore(flags); | 367 | local_irq_restore(flags); |
292 | } | 368 | } |
293 | EXPORT_SYMBOL(dec_zone_page_state); | 369 | EXPORT_SYMBOL(dec_zone_page_state); |
370 | #endif | ||
294 | 371 | ||
295 | /* | 372 | /* |
296 | * Update the zone counters for one cpu. | 373 | * Update the zone counters for one cpu. |
@@ -750,8 +827,6 @@ static const char * const vmstat_text[] = { | |||
750 | "nr_shmem", | 827 | "nr_shmem", |
751 | "nr_dirtied", | 828 | "nr_dirtied", |
752 | "nr_written", | 829 | "nr_written", |
753 | "nr_dirty_threshold", | ||
754 | "nr_dirty_background_threshold", | ||
755 | 830 | ||
756 | #ifdef CONFIG_NUMA | 831 | #ifdef CONFIG_NUMA |
757 | "numa_hit", | 832 | "numa_hit", |
@@ -761,6 +836,8 @@ static const char * const vmstat_text[] = { | |||
761 | "numa_local", | 836 | "numa_local", |
762 | "numa_other", | 837 | "numa_other", |
763 | #endif | 838 | #endif |
839 | "nr_dirty_threshold", | ||
840 | "nr_dirty_background_threshold", | ||
764 | 841 | ||
765 | #ifdef CONFIG_VM_EVENT_COUNTERS | 842 | #ifdef CONFIG_VM_EVENT_COUNTERS |
766 | "pgpgin", | 843 | "pgpgin", |
@@ -1033,7 +1110,7 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, | |||
1033 | break; | 1110 | break; |
1034 | case CPU_DOWN_PREPARE: | 1111 | case CPU_DOWN_PREPARE: |
1035 | case CPU_DOWN_PREPARE_FROZEN: | 1112 | case CPU_DOWN_PREPARE_FROZEN: |
1036 | cancel_rearming_delayed_work(&per_cpu(vmstat_work, cpu)); | 1113 | cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu)); |
1037 | per_cpu(vmstat_work, cpu).work.func = NULL; | 1114 | per_cpu(vmstat_work, cpu).work.func = NULL; |
1038 | break; | 1115 | break; |
1039 | case CPU_DOWN_FAILED: | 1116 | case CPU_DOWN_FAILED: |