aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@linux.intel.com>2014-05-21 20:36:33 -0400
committerH. Peter Anvin <hpa@linux.intel.com>2014-05-21 20:36:33 -0400
commit03c1b4e8e560455a2634a76998883a22f1a01207 (patch)
tree30ca1237d094d66df1dc1533f7bf39b3877b5932 /mm
parentac49b9a9f26b6c42585f87857722085ef4b19c13 (diff)
parente6ab9a20e73e790d47e6aa231fcf66f27b6ce3d4 (diff)
Merge remote-tracking branch 'origin/x86/espfix' into x86/vdso
Merge x86/espfix into x86/vdso, due to changes in the vdso setup code that otherwise cause conflicts. Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'mm')
-rw-r--r--mm/compaction.c22
-rw-r--r--mm/filemap.c49
-rw-r--r--mm/hugetlb.c19
-rw-r--r--mm/kmemleak.c4
-rw-r--r--mm/memcontrol.c20
-rw-r--r--mm/mremap.c9
-rw-r--r--mm/page-writeback.c6
-rw-r--r--mm/percpu.c2
-rw-r--r--mm/slab.c6
-rw-r--r--mm/slab.h1
-rw-r--r--mm/slab_common.c13
-rw-r--r--mm/slub.c41
-rw-r--r--mm/truncate.c8
-rw-r--r--mm/util.c10
-rw-r--r--mm/vmscan.c18
15 files changed, 138 insertions, 90 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 37f976287068..627dc2e4320f 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -671,16 +671,20 @@ static void isolate_freepages(struct zone *zone,
671 struct compact_control *cc) 671 struct compact_control *cc)
672{ 672{
673 struct page *page; 673 struct page *page;
674 unsigned long high_pfn, low_pfn, pfn, z_end_pfn, end_pfn; 674 unsigned long high_pfn, low_pfn, pfn, z_end_pfn;
675 int nr_freepages = cc->nr_freepages; 675 int nr_freepages = cc->nr_freepages;
676 struct list_head *freelist = &cc->freepages; 676 struct list_head *freelist = &cc->freepages;
677 677
678 /* 678 /*
679 * Initialise the free scanner. The starting point is where we last 679 * Initialise the free scanner. The starting point is where we last
680 * scanned from (or the end of the zone if starting). The low point 680 * successfully isolated from, zone-cached value, or the end of the
681 * is the end of the pageblock the migration scanner is using. 681 * zone when isolating for the first time. We need this aligned to
682 * the pageblock boundary, because we do pfn -= pageblock_nr_pages
683 * in the for loop.
684 * The low boundary is the end of the pageblock the migration scanner
685 * is using.
682 */ 686 */
683 pfn = cc->free_pfn; 687 pfn = cc->free_pfn & ~(pageblock_nr_pages-1);
684 low_pfn = ALIGN(cc->migrate_pfn + 1, pageblock_nr_pages); 688 low_pfn = ALIGN(cc->migrate_pfn + 1, pageblock_nr_pages);
685 689
686 /* 690 /*
@@ -700,6 +704,7 @@ static void isolate_freepages(struct zone *zone,
700 for (; pfn >= low_pfn && cc->nr_migratepages > nr_freepages; 704 for (; pfn >= low_pfn && cc->nr_migratepages > nr_freepages;
701 pfn -= pageblock_nr_pages) { 705 pfn -= pageblock_nr_pages) {
702 unsigned long isolated; 706 unsigned long isolated;
707 unsigned long end_pfn;
703 708
704 /* 709 /*
705 * This can iterate a massively long zone without finding any 710 * This can iterate a massively long zone without finding any
@@ -734,13 +739,10 @@ static void isolate_freepages(struct zone *zone,
734 isolated = 0; 739 isolated = 0;
735 740
736 /* 741 /*
737 * As pfn may not start aligned, pfn+pageblock_nr_page 742 * Take care when isolating in last pageblock of a zone which
738 * may cross a MAX_ORDER_NR_PAGES boundary and miss 743 * ends in the middle of a pageblock.
739 * a pfn_valid check. Ensure isolate_freepages_block()
740 * only scans within a pageblock
741 */ 744 */
742 end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); 745 end_pfn = min(pfn + pageblock_nr_pages, z_end_pfn);
743 end_pfn = min(end_pfn, z_end_pfn);
744 isolated = isolate_freepages_block(cc, pfn, end_pfn, 746 isolated = isolate_freepages_block(cc, pfn, end_pfn,
745 freelist, false); 747 freelist, false);
746 nr_freepages += isolated; 748 nr_freepages += isolated;
diff --git a/mm/filemap.c b/mm/filemap.c
index 5020b280a771..000a220e2a41 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -906,8 +906,8 @@ EXPORT_SYMBOL(page_cache_prev_hole);
906 * Looks up the page cache slot at @mapping & @offset. If there is a 906 * Looks up the page cache slot at @mapping & @offset. If there is a
907 * page cache page, it is returned with an increased refcount. 907 * page cache page, it is returned with an increased refcount.
908 * 908 *
909 * If the slot holds a shadow entry of a previously evicted page, it 909 * If the slot holds a shadow entry of a previously evicted page, or a
910 * is returned. 910 * swap entry from shmem/tmpfs, it is returned.
911 * 911 *
912 * Otherwise, %NULL is returned. 912 * Otherwise, %NULL is returned.
913 */ 913 */
@@ -928,9 +928,9 @@ repeat:
928 if (radix_tree_deref_retry(page)) 928 if (radix_tree_deref_retry(page))
929 goto repeat; 929 goto repeat;
930 /* 930 /*
931 * Otherwise, shmem/tmpfs must be storing a swap entry 931 * A shadow entry of a recently evicted page,
932 * here as an exceptional entry: so return it without 932 * or a swap entry from shmem/tmpfs. Return
933 * attempting to raise page count. 933 * it without attempting to raise page count.
934 */ 934 */
935 goto out; 935 goto out;
936 } 936 }
@@ -983,8 +983,8 @@ EXPORT_SYMBOL(find_get_page);
983 * page cache page, it is returned locked and with an increased 983 * page cache page, it is returned locked and with an increased
984 * refcount. 984 * refcount.
985 * 985 *
986 * If the slot holds a shadow entry of a previously evicted page, it 986 * If the slot holds a shadow entry of a previously evicted page, or a
987 * is returned. 987 * swap entry from shmem/tmpfs, it is returned.
988 * 988 *
989 * Otherwise, %NULL is returned. 989 * Otherwise, %NULL is returned.
990 * 990 *
@@ -1099,8 +1099,8 @@ EXPORT_SYMBOL(find_or_create_page);
1099 * with ascending indexes. There may be holes in the indices due to 1099 * with ascending indexes. There may be holes in the indices due to
1100 * not-present pages. 1100 * not-present pages.
1101 * 1101 *
1102 * Any shadow entries of evicted pages are included in the returned 1102 * Any shadow entries of evicted pages, or swap entries from
1103 * array. 1103 * shmem/tmpfs, are included in the returned array.
1104 * 1104 *
1105 * find_get_entries() returns the number of pages and shadow entries 1105 * find_get_entries() returns the number of pages and shadow entries
1106 * which were found. 1106 * which were found.
@@ -1128,9 +1128,9 @@ repeat:
1128 if (radix_tree_deref_retry(page)) 1128 if (radix_tree_deref_retry(page))
1129 goto restart; 1129 goto restart;
1130 /* 1130 /*
1131 * Otherwise, we must be storing a swap entry 1131 * A shadow entry of a recently evicted page,
1132 * here as an exceptional entry: so return it 1132 * or a swap entry from shmem/tmpfs. Return
1133 * without attempting to raise page count. 1133 * it without attempting to raise page count.
1134 */ 1134 */
1135 goto export; 1135 goto export;
1136 } 1136 }
@@ -1198,9 +1198,9 @@ repeat:
1198 goto restart; 1198 goto restart;
1199 } 1199 }
1200 /* 1200 /*
1201 * Otherwise, shmem/tmpfs must be storing a swap entry 1201 * A shadow entry of a recently evicted page,
1202 * here as an exceptional entry: so skip over it - 1202 * or a swap entry from shmem/tmpfs. Skip
1203 * we only reach this from invalidate_mapping_pages(). 1203 * over it.
1204 */ 1204 */
1205 continue; 1205 continue;
1206 } 1206 }
@@ -1265,9 +1265,9 @@ repeat:
1265 goto restart; 1265 goto restart;
1266 } 1266 }
1267 /* 1267 /*
1268 * Otherwise, shmem/tmpfs must be storing a swap entry 1268 * A shadow entry of a recently evicted page,
1269 * here as an exceptional entry: so stop looking for 1269 * or a swap entry from shmem/tmpfs. Stop
1270 * contiguous pages. 1270 * looking for contiguous pages.
1271 */ 1271 */
1272 break; 1272 break;
1273 } 1273 }
@@ -1341,10 +1341,17 @@ repeat:
1341 goto restart; 1341 goto restart;
1342 } 1342 }
1343 /* 1343 /*
1344 * This function is never used on a shmem/tmpfs 1344 * A shadow entry of a recently evicted page.
1345 * mapping, so a swap entry won't be found here. 1345 *
1346 * Those entries should never be tagged, but
1347 * this tree walk is lockless and the tags are
1348 * looked up in bulk, one radix tree node at a
1349 * time, so there is a sizable window for page
1350 * reclaim to evict a page we saw tagged.
1351 *
1352 * Skip over it.
1346 */ 1353 */
1347 BUG(); 1354 continue;
1348 } 1355 }
1349 1356
1350 if (!page_cache_get_speculative(page)) 1357 if (!page_cache_get_speculative(page))
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 246192929a2d..c82290b9c1fc 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1981,11 +1981,7 @@ static int __init hugetlb_init(void)
1981{ 1981{
1982 int i; 1982 int i;
1983 1983
1984 /* Some platform decide whether they support huge pages at boot 1984 if (!hugepages_supported())
1985 * time. On these, such as powerpc, HPAGE_SHIFT is set to 0 when
1986 * there is no such support
1987 */
1988 if (HPAGE_SHIFT == 0)
1989 return 0; 1985 return 0;
1990 1986
1991 if (!size_to_hstate(default_hstate_size)) { 1987 if (!size_to_hstate(default_hstate_size)) {
@@ -2112,6 +2108,9 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
2112 unsigned long tmp; 2108 unsigned long tmp;
2113 int ret; 2109 int ret;
2114 2110
2111 if (!hugepages_supported())
2112 return -ENOTSUPP;
2113
2115 tmp = h->max_huge_pages; 2114 tmp = h->max_huge_pages;
2116 2115
2117 if (write && h->order >= MAX_ORDER) 2116 if (write && h->order >= MAX_ORDER)
@@ -2165,6 +2164,9 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
2165 unsigned long tmp; 2164 unsigned long tmp;
2166 int ret; 2165 int ret;
2167 2166
2167 if (!hugepages_supported())
2168 return -ENOTSUPP;
2169
2168 tmp = h->nr_overcommit_huge_pages; 2170 tmp = h->nr_overcommit_huge_pages;
2169 2171
2170 if (write && h->order >= MAX_ORDER) 2172 if (write && h->order >= MAX_ORDER)
@@ -2190,6 +2192,8 @@ out:
2190void hugetlb_report_meminfo(struct seq_file *m) 2192void hugetlb_report_meminfo(struct seq_file *m)
2191{ 2193{
2192 struct hstate *h = &default_hstate; 2194 struct hstate *h = &default_hstate;
2195 if (!hugepages_supported())
2196 return;
2193 seq_printf(m, 2197 seq_printf(m,
2194 "HugePages_Total: %5lu\n" 2198 "HugePages_Total: %5lu\n"
2195 "HugePages_Free: %5lu\n" 2199 "HugePages_Free: %5lu\n"
@@ -2206,6 +2210,8 @@ void hugetlb_report_meminfo(struct seq_file *m)
2206int hugetlb_report_node_meminfo(int nid, char *buf) 2210int hugetlb_report_node_meminfo(int nid, char *buf)
2207{ 2211{
2208 struct hstate *h = &default_hstate; 2212 struct hstate *h = &default_hstate;
2213 if (!hugepages_supported())
2214 return 0;
2209 return sprintf(buf, 2215 return sprintf(buf,
2210 "Node %d HugePages_Total: %5u\n" 2216 "Node %d HugePages_Total: %5u\n"
2211 "Node %d HugePages_Free: %5u\n" 2217 "Node %d HugePages_Free: %5u\n"
@@ -2220,6 +2226,9 @@ void hugetlb_show_meminfo(void)
2220 struct hstate *h; 2226 struct hstate *h;
2221 int nid; 2227 int nid;
2222 2228
2229 if (!hugepages_supported())
2230 return;
2231
2223 for_each_node_state(nid, N_MEMORY) 2232 for_each_node_state(nid, N_MEMORY)
2224 for_each_hstate(h) 2233 for_each_hstate(h)
2225 pr_info("Node %d hugepages_total=%u hugepages_free=%u hugepages_surp=%u hugepages_size=%lukB\n", 2234 pr_info("Node %d hugepages_total=%u hugepages_free=%u hugepages_surp=%u hugepages_size=%lukB\n",
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 91d67eaee050..8d2fcdfeff7f 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1775,10 +1775,9 @@ void __init kmemleak_init(void)
1775 int i; 1775 int i;
1776 unsigned long flags; 1776 unsigned long flags;
1777 1777
1778 kmemleak_early_log = 0;
1779
1780#ifdef CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF 1778#ifdef CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF
1781 if (!kmemleak_skip_disable) { 1779 if (!kmemleak_skip_disable) {
1780 kmemleak_early_log = 0;
1782 kmemleak_disable(); 1781 kmemleak_disable();
1783 return; 1782 return;
1784 } 1783 }
@@ -1796,6 +1795,7 @@ void __init kmemleak_init(void)
1796 1795
1797 /* the kernel is still in UP mode, so disabling the IRQs is enough */ 1796 /* the kernel is still in UP mode, so disabling the IRQs is enough */
1798 local_irq_save(flags); 1797 local_irq_save(flags);
1798 kmemleak_early_log = 0;
1799 if (kmemleak_error) { 1799 if (kmemleak_error) {
1800 local_irq_restore(flags); 1800 local_irq_restore(flags);
1801 return; 1801 return;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 29501f040568..c47dffdcb246 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6686,16 +6686,20 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
6686 pgoff = pte_to_pgoff(ptent); 6686 pgoff = pte_to_pgoff(ptent);
6687 6687
6688 /* page is moved even if it's not RSS of this task(page-faulted). */ 6688 /* page is moved even if it's not RSS of this task(page-faulted). */
6689 page = find_get_page(mapping, pgoff);
6690
6691#ifdef CONFIG_SWAP 6689#ifdef CONFIG_SWAP
6692 /* shmem/tmpfs may report page out on swap: account for that too. */ 6690 /* shmem/tmpfs may report page out on swap: account for that too. */
6693 if (radix_tree_exceptional_entry(page)) { 6691 if (shmem_mapping(mapping)) {
6694 swp_entry_t swap = radix_to_swp_entry(page); 6692 page = find_get_entry(mapping, pgoff);
6695 if (do_swap_account) 6693 if (radix_tree_exceptional_entry(page)) {
6696 *entry = swap; 6694 swp_entry_t swp = radix_to_swp_entry(page);
6697 page = find_get_page(swap_address_space(swap), swap.val); 6695 if (do_swap_account)
6698 } 6696 *entry = swp;
6697 page = find_get_page(swap_address_space(swp), swp.val);
6698 }
6699 } else
6700 page = find_get_page(mapping, pgoff);
6701#else
6702 page = find_get_page(mapping, pgoff);
6699#endif 6703#endif
6700 return page; 6704 return page;
6701} 6705}
diff --git a/mm/mremap.c b/mm/mremap.c
index 0843feb66f3d..05f1180e9f21 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -194,10 +194,17 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
194 break; 194 break;
195 if (pmd_trans_huge(*old_pmd)) { 195 if (pmd_trans_huge(*old_pmd)) {
196 int err = 0; 196 int err = 0;
197 if (extent == HPAGE_PMD_SIZE) 197 if (extent == HPAGE_PMD_SIZE) {
198 VM_BUG_ON(vma->vm_file || !vma->anon_vma);
199 /* See comment in move_ptes() */
200 if (need_rmap_locks)
201 anon_vma_lock_write(vma->anon_vma);
198 err = move_huge_pmd(vma, new_vma, old_addr, 202 err = move_huge_pmd(vma, new_vma, old_addr,
199 new_addr, old_end, 203 new_addr, old_end,
200 old_pmd, new_pmd); 204 old_pmd, new_pmd);
205 if (need_rmap_locks)
206 anon_vma_unlock_write(vma->anon_vma);
207 }
201 if (err > 0) { 208 if (err > 0) {
202 need_flush = true; 209 need_flush = true;
203 continue; 210 continue;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index ef413492a149..a4317da60532 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -593,14 +593,14 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, unsigned long dirty)
593 * (5) the closer to setpoint, the smaller |df/dx| (and the reverse) 593 * (5) the closer to setpoint, the smaller |df/dx| (and the reverse)
594 * => fast response on large errors; small oscillation near setpoint 594 * => fast response on large errors; small oscillation near setpoint
595 */ 595 */
596static inline long long pos_ratio_polynom(unsigned long setpoint, 596static long long pos_ratio_polynom(unsigned long setpoint,
597 unsigned long dirty, 597 unsigned long dirty,
598 unsigned long limit) 598 unsigned long limit)
599{ 599{
600 long long pos_ratio; 600 long long pos_ratio;
601 long x; 601 long x;
602 602
603 x = div_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT, 603 x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
604 limit - setpoint + 1); 604 limit - setpoint + 1);
605 pos_ratio = x; 605 pos_ratio = x;
606 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; 606 pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
@@ -842,7 +842,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
842 x_intercept = bdi_setpoint + span; 842 x_intercept = bdi_setpoint + span;
843 843
844 if (bdi_dirty < x_intercept - span / 4) { 844 if (bdi_dirty < x_intercept - span / 4) {
845 pos_ratio = div_u64(pos_ratio * (x_intercept - bdi_dirty), 845 pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty),
846 x_intercept - bdi_setpoint + 1); 846 x_intercept - bdi_setpoint + 1);
847 } else 847 } else
848 pos_ratio /= 4; 848 pos_ratio /= 4;
diff --git a/mm/percpu.c b/mm/percpu.c
index 63e24fb4387b..2ddf9a990dbd 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -610,7 +610,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
610 chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC * 610 chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC *
611 sizeof(chunk->map[0])); 611 sizeof(chunk->map[0]));
612 if (!chunk->map) { 612 if (!chunk->map) {
613 kfree(chunk); 613 pcpu_mem_free(chunk, pcpu_chunk_struct_size);
614 return NULL; 614 return NULL;
615 } 615 }
616 616
diff --git a/mm/slab.c b/mm/slab.c
index 388cb1ae6fbc..19d92181ce24 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -166,7 +166,7 @@ typedef unsigned char freelist_idx_t;
166typedef unsigned short freelist_idx_t; 166typedef unsigned short freelist_idx_t;
167#endif 167#endif
168 168
169#define SLAB_OBJ_MAX_NUM (1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) 169#define SLAB_OBJ_MAX_NUM ((1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) - 1)
170 170
171/* 171/*
172 * true if a page was allocated from pfmemalloc reserves for network-based 172 * true if a page was allocated from pfmemalloc reserves for network-based
@@ -2572,13 +2572,13 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep,
2572 return freelist; 2572 return freelist;
2573} 2573}
2574 2574
2575static inline freelist_idx_t get_free_obj(struct page *page, unsigned char idx) 2575static inline freelist_idx_t get_free_obj(struct page *page, unsigned int idx)
2576{ 2576{
2577 return ((freelist_idx_t *)page->freelist)[idx]; 2577 return ((freelist_idx_t *)page->freelist)[idx];
2578} 2578}
2579 2579
2580static inline void set_free_obj(struct page *page, 2580static inline void set_free_obj(struct page *page,
2581 unsigned char idx, freelist_idx_t val) 2581 unsigned int idx, freelist_idx_t val)
2582{ 2582{
2583 ((freelist_idx_t *)(page->freelist))[idx] = val; 2583 ((freelist_idx_t *)(page->freelist))[idx] = val;
2584} 2584}
diff --git a/mm/slab.h b/mm/slab.h
index 3045316b7c9d..6bd4c353704f 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -91,6 +91,7 @@ __kmem_cache_alias(const char *name, size_t size, size_t align,
91#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS) 91#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
92 92
93int __kmem_cache_shutdown(struct kmem_cache *); 93int __kmem_cache_shutdown(struct kmem_cache *);
94void slab_kmem_cache_release(struct kmem_cache *);
94 95
95struct seq_file; 96struct seq_file;
96struct file; 97struct file;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index f3cfccf76dda..102cc6fca3d3 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -323,6 +323,12 @@ static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
323} 323}
324#endif /* CONFIG_MEMCG_KMEM */ 324#endif /* CONFIG_MEMCG_KMEM */
325 325
326void slab_kmem_cache_release(struct kmem_cache *s)
327{
328 kfree(s->name);
329 kmem_cache_free(kmem_cache, s);
330}
331
326void kmem_cache_destroy(struct kmem_cache *s) 332void kmem_cache_destroy(struct kmem_cache *s)
327{ 333{
328 get_online_cpus(); 334 get_online_cpus();
@@ -352,8 +358,11 @@ void kmem_cache_destroy(struct kmem_cache *s)
352 rcu_barrier(); 358 rcu_barrier();
353 359
354 memcg_free_cache_params(s); 360 memcg_free_cache_params(s);
355 kfree(s->name); 361#ifdef SLAB_SUPPORTS_SYSFS
356 kmem_cache_free(kmem_cache, s); 362 sysfs_slab_remove(s);
363#else
364 slab_kmem_cache_release(s);
365#endif
357 goto out_put_cpus; 366 goto out_put_cpus;
358 367
359out_unlock: 368out_unlock:
diff --git a/mm/slub.c b/mm/slub.c
index 5e234f1f8853..2b1ce697fc4b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -210,14 +210,11 @@ enum track_item { TRACK_ALLOC, TRACK_FREE };
210#ifdef CONFIG_SYSFS 210#ifdef CONFIG_SYSFS
211static int sysfs_slab_add(struct kmem_cache *); 211static int sysfs_slab_add(struct kmem_cache *);
212static int sysfs_slab_alias(struct kmem_cache *, const char *); 212static int sysfs_slab_alias(struct kmem_cache *, const char *);
213static void sysfs_slab_remove(struct kmem_cache *);
214static void memcg_propagate_slab_attrs(struct kmem_cache *s); 213static void memcg_propagate_slab_attrs(struct kmem_cache *s);
215#else 214#else
216static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } 215static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
217static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) 216static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
218 { return 0; } 217 { return 0; }
219static inline void sysfs_slab_remove(struct kmem_cache *s) { }
220
221static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { } 218static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
222#endif 219#endif
223 220
@@ -3238,24 +3235,7 @@ static inline int kmem_cache_close(struct kmem_cache *s)
3238 3235
3239int __kmem_cache_shutdown(struct kmem_cache *s) 3236int __kmem_cache_shutdown(struct kmem_cache *s)
3240{ 3237{
3241 int rc = kmem_cache_close(s); 3238 return kmem_cache_close(s);
3242
3243 if (!rc) {
3244 /*
3245 * Since slab_attr_store may take the slab_mutex, we should
3246 * release the lock while removing the sysfs entry in order to
3247 * avoid a deadlock. Because this is pretty much the last
3248 * operation we do and the lock will be released shortly after
3249 * that in slab_common.c, we could just move sysfs_slab_remove
3250 * to a later point in common code. We should do that when we
3251 * have a common sysfs framework for all allocators.
3252 */
3253 mutex_unlock(&slab_mutex);
3254 sysfs_slab_remove(s);
3255 mutex_lock(&slab_mutex);
3256 }
3257
3258 return rc;
3259} 3239}
3260 3240
3261/******************************************************************** 3241/********************************************************************
@@ -5071,15 +5051,18 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
5071#ifdef CONFIG_MEMCG_KMEM 5051#ifdef CONFIG_MEMCG_KMEM
5072 int i; 5052 int i;
5073 char *buffer = NULL; 5053 char *buffer = NULL;
5054 struct kmem_cache *root_cache;
5074 5055
5075 if (!is_root_cache(s)) 5056 if (is_root_cache(s))
5076 return; 5057 return;
5077 5058
5059 root_cache = s->memcg_params->root_cache;
5060
5078 /* 5061 /*
5079 * This mean this cache had no attribute written. Therefore, no point 5062 * This mean this cache had no attribute written. Therefore, no point
5080 * in copying default values around 5063 * in copying default values around
5081 */ 5064 */
5082 if (!s->max_attr_size) 5065 if (!root_cache->max_attr_size)
5083 return; 5066 return;
5084 5067
5085 for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) { 5068 for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
@@ -5101,7 +5084,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
5101 */ 5084 */
5102 if (buffer) 5085 if (buffer)
5103 buf = buffer; 5086 buf = buffer;
5104 else if (s->max_attr_size < ARRAY_SIZE(mbuf)) 5087 else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf))
5105 buf = mbuf; 5088 buf = mbuf;
5106 else { 5089 else {
5107 buffer = (char *) get_zeroed_page(GFP_KERNEL); 5090 buffer = (char *) get_zeroed_page(GFP_KERNEL);
@@ -5110,7 +5093,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
5110 buf = buffer; 5093 buf = buffer;
5111 } 5094 }
5112 5095
5113 attr->show(s->memcg_params->root_cache, buf); 5096 attr->show(root_cache, buf);
5114 attr->store(s, buf, strlen(buf)); 5097 attr->store(s, buf, strlen(buf));
5115 } 5098 }
5116 5099
@@ -5119,6 +5102,11 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s)
5119#endif 5102#endif
5120} 5103}
5121 5104
5105static void kmem_cache_release(struct kobject *k)
5106{
5107 slab_kmem_cache_release(to_slab(k));
5108}
5109
5122static const struct sysfs_ops slab_sysfs_ops = { 5110static const struct sysfs_ops slab_sysfs_ops = {
5123 .show = slab_attr_show, 5111 .show = slab_attr_show,
5124 .store = slab_attr_store, 5112 .store = slab_attr_store,
@@ -5126,6 +5114,7 @@ static const struct sysfs_ops slab_sysfs_ops = {
5126 5114
5127static struct kobj_type slab_ktype = { 5115static struct kobj_type slab_ktype = {
5128 .sysfs_ops = &slab_sysfs_ops, 5116 .sysfs_ops = &slab_sysfs_ops,
5117 .release = kmem_cache_release,
5129}; 5118};
5130 5119
5131static int uevent_filter(struct kset *kset, struct kobject *kobj) 5120static int uevent_filter(struct kset *kset, struct kobject *kobj)
@@ -5252,7 +5241,7 @@ out_put_kobj:
5252 goto out; 5241 goto out;
5253} 5242}
5254 5243
5255static void sysfs_slab_remove(struct kmem_cache *s) 5244void sysfs_slab_remove(struct kmem_cache *s)
5256{ 5245{
5257 if (slab_state < FULL) 5246 if (slab_state < FULL)
5258 /* 5247 /*
diff --git a/mm/truncate.c b/mm/truncate.c
index e5cc39ab0751..6a78c814bebf 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -484,14 +484,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
484 unsigned long count = 0; 484 unsigned long count = 0;
485 int i; 485 int i;
486 486
487 /*
488 * Note: this function may get called on a shmem/tmpfs mapping:
489 * pagevec_lookup() might then return 0 prematurely (because it
490 * got a gangful of swap entries); but it's hardly worth worrying
491 * about - it can rarely have anything to free from such a mapping
492 * (most pages are dirty), and already skips over any difficulties.
493 */
494
495 pagevec_init(&pvec, 0); 487 pagevec_init(&pvec, 0);
496 while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, 488 while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
497 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, 489 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
diff --git a/mm/util.c b/mm/util.c
index f380af7ea779..d5ea733c5082 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -10,6 +10,7 @@
10#include <linux/swapops.h> 10#include <linux/swapops.h>
11#include <linux/mman.h> 11#include <linux/mman.h>
12#include <linux/hugetlb.h> 12#include <linux/hugetlb.h>
13#include <linux/vmalloc.h>
13 14
14#include <asm/uaccess.h> 15#include <asm/uaccess.h>
15 16
@@ -387,6 +388,15 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
387} 388}
388EXPORT_SYMBOL(vm_mmap); 389EXPORT_SYMBOL(vm_mmap);
389 390
391void kvfree(const void *addr)
392{
393 if (is_vmalloc_addr(addr))
394 vfree(addr);
395 else
396 kfree(addr);
397}
398EXPORT_SYMBOL(kvfree);
399
390struct address_space *page_mapping(struct page *page) 400struct address_space *page_mapping(struct page *page)
391{ 401{
392 struct address_space *mapping = page->mapping; 402 struct address_space *mapping = page->mapping;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3f56c8deb3c0..32c661d66a45 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1916,6 +1916,24 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
1916 get_lru_size(lruvec, LRU_INACTIVE_FILE); 1916 get_lru_size(lruvec, LRU_INACTIVE_FILE);
1917 1917
1918 /* 1918 /*
1919 * Prevent the reclaimer from falling into the cache trap: as
1920 * cache pages start out inactive, every cache fault will tip
1921 * the scan balance towards the file LRU. And as the file LRU
1922 * shrinks, so does the window for rotation from references.
1923 * This means we have a runaway feedback loop where a tiny
1924 * thrashing file LRU becomes infinitely more attractive than
1925 * anon pages. Try to detect this based on file LRU size.
1926 */
1927 if (global_reclaim(sc)) {
1928 unsigned long free = zone_page_state(zone, NR_FREE_PAGES);
1929
1930 if (unlikely(file + free <= high_wmark_pages(zone))) {
1931 scan_balance = SCAN_ANON;
1932 goto out;
1933 }
1934 }
1935
1936 /*
1919 * There is enough inactive page cache, do not reclaim 1937 * There is enough inactive page cache, do not reclaim
1920 * anything from the anonymous working set right now. 1938 * anything from the anonymous working set right now.
1921 */ 1939 */