aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c3
-rw-r--r--mm/filemap.c242
-rw-r--r--mm/hugetlb.c59
-rw-r--r--mm/internal.h29
-rw-r--r--mm/memcontrol.c1
-rw-r--r--mm/memory.c70
-rw-r--r--mm/memory_hotplug.c12
-rw-r--r--mm/mempolicy.c18
-rw-r--r--mm/migrate.c66
-rw-r--r--mm/mlock.c18
-rw-r--r--mm/mmap.c5
-rw-r--r--mm/nommu.c3
-rw-r--r--mm/oom_kill.c3
-rw-r--r--mm/page_alloc.c32
-rw-r--r--mm/page_cgroup.c84
-rw-r--r--mm/page_isolation.c5
-rw-r--r--mm/shmem.c8
-rw-r--r--mm/slab.c52
-rw-r--r--mm/slob.c2
-rw-r--r--mm/slub.c37
-rw-r--r--mm/sparse-vmemmap.c2
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/swap.c20
-rw-r--r--mm/swapfile.c9
-rw-r--r--mm/vmalloc.c88
-rw-r--r--mm/vmscan.c46
-rw-r--r--mm/vmstat.c69
27 files changed, 545 insertions, 440 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f2e574dbc300..801c08b046e6 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -176,6 +176,9 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
176 int ret = 0; 176 int ret = 0;
177 struct device *dev; 177 struct device *dev;
178 178
179 if (bdi->dev) /* The driver needs to use separate queues per device */
180 goto exit;
181
179 va_start(args, fmt); 182 va_start(args, fmt);
180 dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args); 183 dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args);
181 va_end(args); 184 va_end(args);
diff --git a/mm/filemap.c b/mm/filemap.c
index ab8553658af3..f3e5f8944d17 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2029,48 +2029,8 @@ int pagecache_write_begin(struct file *file, struct address_space *mapping,
2029{ 2029{
2030 const struct address_space_operations *aops = mapping->a_ops; 2030 const struct address_space_operations *aops = mapping->a_ops;
2031 2031
2032 if (aops->write_begin) { 2032 return aops->write_begin(file, mapping, pos, len, flags,
2033 return aops->write_begin(file, mapping, pos, len, flags,
2034 pagep, fsdata); 2033 pagep, fsdata);
2035 } else {
2036 int ret;
2037 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2038 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2039 struct inode *inode = mapping->host;
2040 struct page *page;
2041again:
2042 page = __grab_cache_page(mapping, index);
2043 *pagep = page;
2044 if (!page)
2045 return -ENOMEM;
2046
2047 if (flags & AOP_FLAG_UNINTERRUPTIBLE && !PageUptodate(page)) {
2048 /*
2049 * There is no way to resolve a short write situation
2050 * for a !Uptodate page (except by double copying in
2051 * the caller done by generic_perform_write_2copy).
2052 *
2053 * Instead, we have to bring it uptodate here.
2054 */
2055 ret = aops->readpage(file, page);
2056 page_cache_release(page);
2057 if (ret) {
2058 if (ret == AOP_TRUNCATED_PAGE)
2059 goto again;
2060 return ret;
2061 }
2062 goto again;
2063 }
2064
2065 ret = aops->prepare_write(file, page, offset, offset+len);
2066 if (ret) {
2067 unlock_page(page);
2068 page_cache_release(page);
2069 if (pos + len > inode->i_size)
2070 vmtruncate(inode, inode->i_size);
2071 }
2072 return ret;
2073 }
2074} 2034}
2075EXPORT_SYMBOL(pagecache_write_begin); 2035EXPORT_SYMBOL(pagecache_write_begin);
2076 2036
@@ -2079,32 +2039,9 @@ int pagecache_write_end(struct file *file, struct address_space *mapping,
2079 struct page *page, void *fsdata) 2039 struct page *page, void *fsdata)
2080{ 2040{
2081 const struct address_space_operations *aops = mapping->a_ops; 2041 const struct address_space_operations *aops = mapping->a_ops;
2082 int ret;
2083
2084 if (aops->write_end) {
2085 mark_page_accessed(page);
2086 ret = aops->write_end(file, mapping, pos, len, copied,
2087 page, fsdata);
2088 } else {
2089 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2090 struct inode *inode = mapping->host;
2091
2092 flush_dcache_page(page);
2093 ret = aops->commit_write(file, page, offset, offset+len);
2094 unlock_page(page);
2095 mark_page_accessed(page);
2096 page_cache_release(page);
2097
2098 if (ret < 0) {
2099 if (pos + len > inode->i_size)
2100 vmtruncate(inode, inode->i_size);
2101 } else if (ret > 0)
2102 ret = min_t(size_t, copied, ret);
2103 else
2104 ret = copied;
2105 }
2106 2042
2107 return ret; 2043 mark_page_accessed(page);
2044 return aops->write_end(file, mapping, pos, len, copied, page, fsdata);
2108} 2045}
2109EXPORT_SYMBOL(pagecache_write_end); 2046EXPORT_SYMBOL(pagecache_write_end);
2110 2047
@@ -2226,174 +2163,6 @@ repeat:
2226} 2163}
2227EXPORT_SYMBOL(__grab_cache_page); 2164EXPORT_SYMBOL(__grab_cache_page);
2228 2165
2229static ssize_t generic_perform_write_2copy(struct file *file,
2230 struct iov_iter *i, loff_t pos)
2231{
2232 struct address_space *mapping = file->f_mapping;
2233 const struct address_space_operations *a_ops = mapping->a_ops;
2234 struct inode *inode = mapping->host;
2235 long status = 0;
2236 ssize_t written = 0;
2237
2238 do {
2239 struct page *src_page;
2240 struct page *page;
2241 pgoff_t index; /* Pagecache index for current page */
2242 unsigned long offset; /* Offset into pagecache page */
2243 unsigned long bytes; /* Bytes to write to page */
2244 size_t copied; /* Bytes copied from user */
2245
2246 offset = (pos & (PAGE_CACHE_SIZE - 1));
2247 index = pos >> PAGE_CACHE_SHIFT;
2248 bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
2249 iov_iter_count(i));
2250
2251 /*
2252 * a non-NULL src_page indicates that we're doing the
2253 * copy via get_user_pages and kmap.
2254 */
2255 src_page = NULL;
2256
2257 /*
2258 * Bring in the user page that we will copy from _first_.
2259 * Otherwise there's a nasty deadlock on copying from the
2260 * same page as we're writing to, without it being marked
2261 * up-to-date.
2262 *
2263 * Not only is this an optimisation, but it is also required
2264 * to check that the address is actually valid, when atomic
2265 * usercopies are used, below.
2266 */
2267 if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
2268 status = -EFAULT;
2269 break;
2270 }
2271
2272 page = __grab_cache_page(mapping, index);
2273 if (!page) {
2274 status = -ENOMEM;
2275 break;
2276 }
2277
2278 /*
2279 * non-uptodate pages cannot cope with short copies, and we
2280 * cannot take a pagefault with the destination page locked.
2281 * So pin the source page to copy it.
2282 */
2283 if (!PageUptodate(page) && !segment_eq(get_fs(), KERNEL_DS)) {
2284 unlock_page(page);
2285
2286 src_page = alloc_page(GFP_KERNEL);
2287 if (!src_page) {
2288 page_cache_release(page);
2289 status = -ENOMEM;
2290 break;
2291 }
2292
2293 /*
2294 * Cannot get_user_pages with a page locked for the
2295 * same reason as we can't take a page fault with a
2296 * page locked (as explained below).
2297 */
2298 copied = iov_iter_copy_from_user(src_page, i,
2299 offset, bytes);
2300 if (unlikely(copied == 0)) {
2301 status = -EFAULT;
2302 page_cache_release(page);
2303 page_cache_release(src_page);
2304 break;
2305 }
2306 bytes = copied;
2307
2308 lock_page(page);
2309 /*
2310 * Can't handle the page going uptodate here, because
2311 * that means we would use non-atomic usercopies, which
2312 * zero out the tail of the page, which can cause
2313 * zeroes to become transiently visible. We could just
2314 * use a non-zeroing copy, but the APIs aren't too
2315 * consistent.
2316 */
2317 if (unlikely(!page->mapping || PageUptodate(page))) {
2318 unlock_page(page);
2319 page_cache_release(page);
2320 page_cache_release(src_page);
2321 continue;
2322 }
2323 }
2324
2325 status = a_ops->prepare_write(file, page, offset, offset+bytes);
2326 if (unlikely(status))
2327 goto fs_write_aop_error;
2328
2329 if (!src_page) {
2330 /*
2331 * Must not enter the pagefault handler here, because
2332 * we hold the page lock, so we might recursively
2333 * deadlock on the same lock, or get an ABBA deadlock
2334 * against a different lock, or against the mmap_sem
2335 * (which nests outside the page lock). So increment
2336 * preempt count, and use _atomic usercopies.
2337 *
2338 * The page is uptodate so we are OK to encounter a
2339 * short copy: if unmodified parts of the page are
2340 * marked dirty and written out to disk, it doesn't
2341 * really matter.
2342 */
2343 pagefault_disable();
2344 copied = iov_iter_copy_from_user_atomic(page, i,
2345 offset, bytes);
2346 pagefault_enable();
2347 } else {
2348 void *src, *dst;
2349 src = kmap_atomic(src_page, KM_USER0);
2350 dst = kmap_atomic(page, KM_USER1);
2351 memcpy(dst + offset, src + offset, bytes);
2352 kunmap_atomic(dst, KM_USER1);
2353 kunmap_atomic(src, KM_USER0);
2354 copied = bytes;
2355 }
2356 flush_dcache_page(page);
2357
2358 status = a_ops->commit_write(file, page, offset, offset+bytes);
2359 if (unlikely(status < 0))
2360 goto fs_write_aop_error;
2361 if (unlikely(status > 0)) /* filesystem did partial write */
2362 copied = min_t(size_t, copied, status);
2363
2364 unlock_page(page);
2365 mark_page_accessed(page);
2366 page_cache_release(page);
2367 if (src_page)
2368 page_cache_release(src_page);
2369
2370 iov_iter_advance(i, copied);
2371 pos += copied;
2372 written += copied;
2373
2374 balance_dirty_pages_ratelimited(mapping);
2375 cond_resched();
2376 continue;
2377
2378fs_write_aop_error:
2379 unlock_page(page);
2380 page_cache_release(page);
2381 if (src_page)
2382 page_cache_release(src_page);
2383
2384 /*
2385 * prepare_write() may have instantiated a few blocks
2386 * outside i_size. Trim these off again. Don't need
2387 * i_size_read because we hold i_mutex.
2388 */
2389 if (pos + bytes > inode->i_size)
2390 vmtruncate(inode, inode->i_size);
2391 break;
2392 } while (iov_iter_count(i));
2393
2394 return written ? written : status;
2395}
2396
2397static ssize_t generic_perform_write(struct file *file, 2166static ssize_t generic_perform_write(struct file *file,
2398 struct iov_iter *i, loff_t pos) 2167 struct iov_iter *i, loff_t pos)
2399{ 2168{
@@ -2494,10 +2263,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2494 struct iov_iter i; 2263 struct iov_iter i;
2495 2264
2496 iov_iter_init(&i, iov, nr_segs, count, written); 2265 iov_iter_init(&i, iov, nr_segs, count, written);
2497 if (a_ops->write_begin) 2266 status = generic_perform_write(file, &i, pos);
2498 status = generic_perform_write(file, &i, pos);
2499 else
2500 status = generic_perform_write_2copy(file, &i, pos);
2501 2267
2502 if (likely(status >= 0)) { 2268 if (likely(status >= 0)) {
2503 written += status; 2269 written += status;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ce8cbb29860b..6058b53dcb89 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7,6 +7,7 @@
7#include <linux/init.h> 7#include <linux/init.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/seq_file.h>
10#include <linux/sysctl.h> 11#include <linux/sysctl.h>
11#include <linux/highmem.h> 12#include <linux/highmem.h>
12#include <linux/mmu_notifier.h> 13#include <linux/mmu_notifier.h>
@@ -353,11 +354,26 @@ static int vma_has_reserves(struct vm_area_struct *vma)
353 return 0; 354 return 0;
354} 355}
355 356
357static void clear_gigantic_page(struct page *page,
358 unsigned long addr, unsigned long sz)
359{
360 int i;
361 struct page *p = page;
362
363 might_sleep();
364 for (i = 0; i < sz/PAGE_SIZE; i++, p = mem_map_next(p, page, i)) {
365 cond_resched();
366 clear_user_highpage(p, addr + i * PAGE_SIZE);
367 }
368}
356static void clear_huge_page(struct page *page, 369static void clear_huge_page(struct page *page,
357 unsigned long addr, unsigned long sz) 370 unsigned long addr, unsigned long sz)
358{ 371{
359 int i; 372 int i;
360 373
374 if (unlikely(sz > MAX_ORDER_NR_PAGES))
375 return clear_gigantic_page(page, addr, sz);
376
361 might_sleep(); 377 might_sleep();
362 for (i = 0; i < sz/PAGE_SIZE; i++) { 378 for (i = 0; i < sz/PAGE_SIZE; i++) {
363 cond_resched(); 379 cond_resched();
@@ -365,12 +381,32 @@ static void clear_huge_page(struct page *page,
365 } 381 }
366} 382}
367 383
384static void copy_gigantic_page(struct page *dst, struct page *src,
385 unsigned long addr, struct vm_area_struct *vma)
386{
387 int i;
388 struct hstate *h = hstate_vma(vma);
389 struct page *dst_base = dst;
390 struct page *src_base = src;
391 might_sleep();
392 for (i = 0; i < pages_per_huge_page(h); ) {
393 cond_resched();
394 copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma);
395
396 i++;
397 dst = mem_map_next(dst, dst_base, i);
398 src = mem_map_next(src, src_base, i);
399 }
400}
368static void copy_huge_page(struct page *dst, struct page *src, 401static void copy_huge_page(struct page *dst, struct page *src,
369 unsigned long addr, struct vm_area_struct *vma) 402 unsigned long addr, struct vm_area_struct *vma)
370{ 403{
371 int i; 404 int i;
372 struct hstate *h = hstate_vma(vma); 405 struct hstate *h = hstate_vma(vma);
373 406
407 if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES))
408 return copy_gigantic_page(dst, src, addr, vma);
409
374 might_sleep(); 410 might_sleep();
375 for (i = 0; i < pages_per_huge_page(h); i++) { 411 for (i = 0; i < pages_per_huge_page(h); i++) {
376 cond_resched(); 412 cond_resched();
@@ -455,6 +491,8 @@ static void update_and_free_page(struct hstate *h, struct page *page)
455{ 491{
456 int i; 492 int i;
457 493
494 VM_BUG_ON(h->order >= MAX_ORDER);
495
458 h->nr_huge_pages--; 496 h->nr_huge_pages--;
459 h->nr_huge_pages_node[page_to_nid(page)]--; 497 h->nr_huge_pages_node[page_to_nid(page)]--;
460 for (i = 0; i < pages_per_huge_page(h); i++) { 498 for (i = 0; i < pages_per_huge_page(h); i++) {
@@ -969,6 +1007,14 @@ found:
969 return 1; 1007 return 1;
970} 1008}
971 1009
1010static void prep_compound_huge_page(struct page *page, int order)
1011{
1012 if (unlikely(order > (MAX_ORDER - 1)))
1013 prep_compound_gigantic_page(page, order);
1014 else
1015 prep_compound_page(page, order);
1016}
1017
972/* Put bootmem huge pages into the standard lists after mem_map is up */ 1018/* Put bootmem huge pages into the standard lists after mem_map is up */
973static void __init gather_bootmem_prealloc(void) 1019static void __init gather_bootmem_prealloc(void)
974{ 1020{
@@ -979,7 +1025,7 @@ static void __init gather_bootmem_prealloc(void)
979 struct hstate *h = m->hstate; 1025 struct hstate *h = m->hstate;
980 __ClearPageReserved(page); 1026 __ClearPageReserved(page);
981 WARN_ON(page_count(page) != 1); 1027 WARN_ON(page_count(page) != 1);
982 prep_compound_page(page, h->order); 1028 prep_compound_huge_page(page, h->order);
983 prep_new_huge_page(h, page, page_to_nid(page)); 1029 prep_new_huge_page(h, page, page_to_nid(page));
984 } 1030 }
985} 1031}
@@ -1455,10 +1501,10 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
1455 1501
1456#endif /* CONFIG_SYSCTL */ 1502#endif /* CONFIG_SYSCTL */
1457 1503
1458int hugetlb_report_meminfo(char *buf) 1504void hugetlb_report_meminfo(struct seq_file *m)
1459{ 1505{
1460 struct hstate *h = &default_hstate; 1506 struct hstate *h = &default_hstate;
1461 return sprintf(buf, 1507 seq_printf(m,
1462 "HugePages_Total: %5lu\n" 1508 "HugePages_Total: %5lu\n"
1463 "HugePages_Free: %5lu\n" 1509 "HugePages_Free: %5lu\n"
1464 "HugePages_Rsvd: %5lu\n" 1510 "HugePages_Rsvd: %5lu\n"
@@ -1750,6 +1796,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
1750static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, 1796static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
1751 struct page *page, unsigned long address) 1797 struct page *page, unsigned long address)
1752{ 1798{
1799 struct hstate *h = hstate_vma(vma);
1753 struct vm_area_struct *iter_vma; 1800 struct vm_area_struct *iter_vma;
1754 struct address_space *mapping; 1801 struct address_space *mapping;
1755 struct prio_tree_iter iter; 1802 struct prio_tree_iter iter;
@@ -1759,7 +1806,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
1759 * vm_pgoff is in PAGE_SIZE units, hence the different calculation 1806 * vm_pgoff is in PAGE_SIZE units, hence the different calculation
1760 * from page cache lookup which is in HPAGE_SIZE units. 1807 * from page cache lookup which is in HPAGE_SIZE units.
1761 */ 1808 */
1762 address = address & huge_page_mask(hstate_vma(vma)); 1809 address = address & huge_page_mask(h);
1763 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) 1810 pgoff = ((address - vma->vm_start) >> PAGE_SHIFT)
1764 + (vma->vm_pgoff >> PAGE_SHIFT); 1811 + (vma->vm_pgoff >> PAGE_SHIFT);
1765 mapping = (struct address_space *)page_private(page); 1812 mapping = (struct address_space *)page_private(page);
@@ -1778,7 +1825,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
1778 */ 1825 */
1779 if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER)) 1826 if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER))
1780 unmap_hugepage_range(iter_vma, 1827 unmap_hugepage_range(iter_vma,
1781 address, address + HPAGE_SIZE, 1828 address, address + huge_page_size(h),
1782 page); 1829 page);
1783 } 1830 }
1784 1831
@@ -2129,7 +2176,7 @@ same_page:
2129 if (zeropage_ok) 2176 if (zeropage_ok)
2130 pages[i] = ZERO_PAGE(0); 2177 pages[i] = ZERO_PAGE(0);
2131 else 2178 else
2132 pages[i] = page + pfn_offset; 2179 pages[i] = mem_map_offset(page, pfn_offset);
2133 get_page(pages[i]); 2180 get_page(pages[i]);
2134 } 2181 }
2135 2182
diff --git a/mm/internal.h b/mm/internal.h
index e4e728bdf324..13333bc2eb68 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -17,6 +17,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
17 unsigned long floor, unsigned long ceiling); 17 unsigned long floor, unsigned long ceiling);
18 18
19extern void prep_compound_page(struct page *page, unsigned long order); 19extern void prep_compound_page(struct page *page, unsigned long order);
20extern void prep_compound_gigantic_page(struct page *page, unsigned long order);
20 21
21static inline void set_page_count(struct page *page, int v) 22static inline void set_page_count(struct page *page, int v)
22{ 23{
@@ -176,6 +177,34 @@ static inline void free_page_mlock(struct page *page) { }
176#endif /* CONFIG_UNEVICTABLE_LRU */ 177#endif /* CONFIG_UNEVICTABLE_LRU */
177 178
178/* 179/*
180 * Return the mem_map entry representing the 'offset' subpage within
181 * the maximally aligned gigantic page 'base'. Handle any discontiguity
182 * in the mem_map at MAX_ORDER_NR_PAGES boundaries.
183 */
184static inline struct page *mem_map_offset(struct page *base, int offset)
185{
186 if (unlikely(offset >= MAX_ORDER_NR_PAGES))
187 return pfn_to_page(page_to_pfn(base) + offset);
188 return base + offset;
189}
190
191/*
192 * Iterator over all subpages withing the maximally aligned gigantic
193 * page 'base'. Handle any discontiguity in the mem_map.
194 */
195static inline struct page *mem_map_next(struct page *iter,
196 struct page *base, int offset)
197{
198 if (unlikely((offset & (MAX_ORDER_NR_PAGES - 1)) == 0)) {
199 unsigned long pfn = page_to_pfn(base) + offset;
200 if (!pfn_valid(pfn))
201 return NULL;
202 return pfn_to_page(pfn);
203 }
204 return iter + 1;
205}
206
207/*
179 * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, 208 * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node,
180 * so all functions starting at paging_init should be marked __init 209 * so all functions starting at paging_init should be marked __init
181 * in those cases. SPARSEMEM, however, allows for memory hotplug, 210 * in those cases. SPARSEMEM, however, allows for memory hotplug,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d4a92b63e98e..866dcc7eeb0c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1088,7 +1088,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
1088 int node; 1088 int node;
1089 1089
1090 if (unlikely((cont->parent) == NULL)) { 1090 if (unlikely((cont->parent) == NULL)) {
1091 page_cgroup_init();
1092 mem = &init_mem_cgroup; 1091 mem = &init_mem_cgroup;
1093 } else { 1092 } else {
1094 mem = mem_cgroup_alloc(); 1093 mem = mem_cgroup_alloc();
diff --git a/mm/memory.c b/mm/memory.c
index 164951c47305..f01b7eed6e16 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -669,6 +669,16 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
669 if (is_vm_hugetlb_page(vma)) 669 if (is_vm_hugetlb_page(vma))
670 return copy_hugetlb_page_range(dst_mm, src_mm, vma); 670 return copy_hugetlb_page_range(dst_mm, src_mm, vma);
671 671
672 if (unlikely(is_pfn_mapping(vma))) {
673 /*
674 * We do not free on error cases below as remove_vma
675 * gets called on error from higher level routine
676 */
677 ret = track_pfn_vma_copy(vma);
678 if (ret)
679 return ret;
680 }
681
672 /* 682 /*
673 * We need to invalidate the secondary MMU mappings only when 683 * We need to invalidate the secondary MMU mappings only when
674 * there could be a permission downgrade on the ptes of the 684 * there could be a permission downgrade on the ptes of the
@@ -915,6 +925,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
915 if (vma->vm_flags & VM_ACCOUNT) 925 if (vma->vm_flags & VM_ACCOUNT)
916 *nr_accounted += (end - start) >> PAGE_SHIFT; 926 *nr_accounted += (end - start) >> PAGE_SHIFT;
917 927
928 if (unlikely(is_pfn_mapping(vma)))
929 untrack_pfn_vma(vma, 0, 0);
930
918 while (start != end) { 931 while (start != end) {
919 if (!tlb_start_valid) { 932 if (!tlb_start_valid) {
920 tlb_start = start; 933 tlb_start = start;
@@ -1430,6 +1443,7 @@ out:
1430int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, 1443int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
1431 unsigned long pfn) 1444 unsigned long pfn)
1432{ 1445{
1446 int ret;
1433 /* 1447 /*
1434 * Technically, architectures with pte_special can avoid all these 1448 * Technically, architectures with pte_special can avoid all these
1435 * restrictions (same for remap_pfn_range). However we would like 1449 * restrictions (same for remap_pfn_range). However we would like
@@ -1444,7 +1458,15 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
1444 1458
1445 if (addr < vma->vm_start || addr >= vma->vm_end) 1459 if (addr < vma->vm_start || addr >= vma->vm_end)
1446 return -EFAULT; 1460 return -EFAULT;
1447 return insert_pfn(vma, addr, pfn, vma->vm_page_prot); 1461 if (track_pfn_vma_new(vma, vma->vm_page_prot, pfn, PAGE_SIZE))
1462 return -EINVAL;
1463
1464 ret = insert_pfn(vma, addr, pfn, vma->vm_page_prot);
1465
1466 if (ret)
1467 untrack_pfn_vma(vma, pfn, PAGE_SIZE);
1468
1469 return ret;
1448} 1470}
1449EXPORT_SYMBOL(vm_insert_pfn); 1471EXPORT_SYMBOL(vm_insert_pfn);
1450 1472
@@ -1575,14 +1597,17 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
1575 * behaviour that some programs depend on. We mark the "original" 1597 * behaviour that some programs depend on. We mark the "original"
1576 * un-COW'ed pages by matching them up with "vma->vm_pgoff". 1598 * un-COW'ed pages by matching them up with "vma->vm_pgoff".
1577 */ 1599 */
1578 if (is_cow_mapping(vma->vm_flags)) { 1600 if (addr == vma->vm_start && end == vma->vm_end)
1579 if (addr != vma->vm_start || end != vma->vm_end)
1580 return -EINVAL;
1581 vma->vm_pgoff = pfn; 1601 vma->vm_pgoff = pfn;
1582 } 1602 else if (is_cow_mapping(vma->vm_flags))
1603 return -EINVAL;
1583 1604
1584 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; 1605 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
1585 1606
1607 err = track_pfn_vma_new(vma, prot, pfn, PAGE_ALIGN(size));
1608 if (err)
1609 return -EINVAL;
1610
1586 BUG_ON(addr >= end); 1611 BUG_ON(addr >= end);
1587 pfn -= addr >> PAGE_SHIFT; 1612 pfn -= addr >> PAGE_SHIFT;
1588 pgd = pgd_offset(mm, addr); 1613 pgd = pgd_offset(mm, addr);
@@ -1594,6 +1619,10 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
1594 if (err) 1619 if (err)
1595 break; 1620 break;
1596 } while (pgd++, addr = next, addr != end); 1621 } while (pgd++, addr = next, addr != end);
1622
1623 if (err)
1624 untrack_pfn_vma(vma, pfn, PAGE_ALIGN(size));
1625
1597 return err; 1626 return err;
1598} 1627}
1599EXPORT_SYMBOL(remap_pfn_range); 1628EXPORT_SYMBOL(remap_pfn_range);
@@ -2865,9 +2894,9 @@ int in_gate_area_no_task(unsigned long addr)
2865#endif /* __HAVE_ARCH_GATE_AREA */ 2894#endif /* __HAVE_ARCH_GATE_AREA */
2866 2895
2867#ifdef CONFIG_HAVE_IOREMAP_PROT 2896#ifdef CONFIG_HAVE_IOREMAP_PROT
2868static resource_size_t follow_phys(struct vm_area_struct *vma, 2897int follow_phys(struct vm_area_struct *vma,
2869 unsigned long address, unsigned int flags, 2898 unsigned long address, unsigned int flags,
2870 unsigned long *prot) 2899 unsigned long *prot, resource_size_t *phys)
2871{ 2900{
2872 pgd_t *pgd; 2901 pgd_t *pgd;
2873 pud_t *pud; 2902 pud_t *pud;
@@ -2876,24 +2905,26 @@ static resource_size_t follow_phys(struct vm_area_struct *vma,
2876 spinlock_t *ptl; 2905 spinlock_t *ptl;
2877 resource_size_t phys_addr = 0; 2906 resource_size_t phys_addr = 0;
2878 struct mm_struct *mm = vma->vm_mm; 2907 struct mm_struct *mm = vma->vm_mm;
2908 int ret = -EINVAL;
2879 2909
2880 VM_BUG_ON(!(vma->vm_flags & (VM_IO | VM_PFNMAP))); 2910 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
2911 goto out;
2881 2912
2882 pgd = pgd_offset(mm, address); 2913 pgd = pgd_offset(mm, address);
2883 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) 2914 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
2884 goto no_page_table; 2915 goto out;
2885 2916
2886 pud = pud_offset(pgd, address); 2917 pud = pud_offset(pgd, address);
2887 if (pud_none(*pud) || unlikely(pud_bad(*pud))) 2918 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
2888 goto no_page_table; 2919 goto out;
2889 2920
2890 pmd = pmd_offset(pud, address); 2921 pmd = pmd_offset(pud, address);
2891 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) 2922 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
2892 goto no_page_table; 2923 goto out;
2893 2924
2894 /* We cannot handle huge page PFN maps. Luckily they don't exist. */ 2925 /* We cannot handle huge page PFN maps. Luckily they don't exist. */
2895 if (pmd_huge(*pmd)) 2926 if (pmd_huge(*pmd))
2896 goto no_page_table; 2927 goto out;
2897 2928
2898 ptep = pte_offset_map_lock(mm, pmd, address, &ptl); 2929 ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
2899 if (!ptep) 2930 if (!ptep)
@@ -2908,13 +2939,13 @@ static resource_size_t follow_phys(struct vm_area_struct *vma,
2908 phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */ 2939 phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */
2909 2940
2910 *prot = pgprot_val(pte_pgprot(pte)); 2941 *prot = pgprot_val(pte_pgprot(pte));
2942 *phys = phys_addr;
2943 ret = 0;
2911 2944
2912unlock: 2945unlock:
2913 pte_unmap_unlock(ptep, ptl); 2946 pte_unmap_unlock(ptep, ptl);
2914out: 2947out:
2915 return phys_addr; 2948 return ret;
2916no_page_table:
2917 return 0;
2918} 2949}
2919 2950
2920int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, 2951int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
@@ -2925,12 +2956,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
2925 void *maddr; 2956 void *maddr;
2926 int offset = addr & (PAGE_SIZE-1); 2957 int offset = addr & (PAGE_SIZE-1);
2927 2958
2928 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) 2959 if (follow_phys(vma, addr, write, &prot, &phys_addr))
2929 return -EINVAL;
2930
2931 phys_addr = follow_phys(vma, addr, write, &prot);
2932
2933 if (!phys_addr)
2934 return -EINVAL; 2960 return -EINVAL;
2935 2961
2936 maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot); 2962 maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 6837a1014372..b17371185468 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -22,7 +22,6 @@
22#include <linux/highmem.h> 22#include <linux/highmem.h>
23#include <linux/vmalloc.h> 23#include <linux/vmalloc.h>
24#include <linux/ioport.h> 24#include <linux/ioport.h>
25#include <linux/cpuset.h>
26#include <linux/delay.h> 25#include <linux/delay.h>
27#include <linux/migrate.h> 26#include <linux/migrate.h>
28#include <linux/page-isolation.h> 27#include <linux/page-isolation.h>
@@ -190,7 +189,7 @@ static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
190 pgdat->node_start_pfn; 189 pgdat->node_start_pfn;
191} 190}
192 191
193static int __add_zone(struct zone *zone, unsigned long phys_start_pfn) 192static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
194{ 193{
195 struct pglist_data *pgdat = zone->zone_pgdat; 194 struct pglist_data *pgdat = zone->zone_pgdat;
196 int nr_pages = PAGES_PER_SECTION; 195 int nr_pages = PAGES_PER_SECTION;
@@ -217,7 +216,7 @@ static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
217 return 0; 216 return 0;
218} 217}
219 218
220static int __add_section(struct zone *zone, unsigned long phys_start_pfn) 219static int __meminit __add_section(struct zone *zone, unsigned long phys_start_pfn)
221{ 220{
222 int nr_pages = PAGES_PER_SECTION; 221 int nr_pages = PAGES_PER_SECTION;
223 int ret; 222 int ret;
@@ -274,7 +273,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms)
274 * call this function after deciding the zone to which to 273 * call this function after deciding the zone to which to
275 * add the new pages. 274 * add the new pages.
276 */ 275 */
277int __add_pages(struct zone *zone, unsigned long phys_start_pfn, 276int __ref __add_pages(struct zone *zone, unsigned long phys_start_pfn,
278 unsigned long nr_pages) 277 unsigned long nr_pages)
279{ 278{
280 unsigned long i; 279 unsigned long i;
@@ -471,7 +470,8 @@ static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
471} 470}
472 471
473 472
474int add_memory(int nid, u64 start, u64 size) 473/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
474int __ref add_memory(int nid, u64 start, u64 size)
475{ 475{
476 pg_data_t *pgdat = NULL; 476 pg_data_t *pgdat = NULL;
477 int new_pgdat = 0; 477 int new_pgdat = 0;
@@ -498,8 +498,6 @@ int add_memory(int nid, u64 start, u64 size)
498 /* we online node here. we can't roll back from here. */ 498 /* we online node here. we can't roll back from here. */
499 node_set_online(nid); 499 node_set_online(nid);
500 500
501 cpuset_track_online_nodes();
502
503 if (new_pgdat) { 501 if (new_pgdat) {
504 ret = register_one_node(nid); 502 ret = register_one_node(nid);
505 /* 503 /*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 36f42573a335..e9493b1c1117 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -489,12 +489,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
489 int err; 489 int err;
490 struct vm_area_struct *first, *vma, *prev; 490 struct vm_area_struct *first, *vma, *prev;
491 491
492 if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
493
494 err = migrate_prep();
495 if (err)
496 return ERR_PTR(err);
497 }
498 492
499 first = find_vma(mm, start); 493 first = find_vma(mm, start);
500 if (!first) 494 if (!first)
@@ -809,9 +803,13 @@ int do_migrate_pages(struct mm_struct *mm,
809 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) 803 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags)
810{ 804{
811 int busy = 0; 805 int busy = 0;
812 int err = 0; 806 int err;
813 nodemask_t tmp; 807 nodemask_t tmp;
814 808
809 err = migrate_prep();
810 if (err)
811 return err;
812
815 down_read(&mm->mmap_sem); 813 down_read(&mm->mmap_sem);
816 814
817 err = migrate_vmas(mm, from_nodes, to_nodes, flags); 815 err = migrate_vmas(mm, from_nodes, to_nodes, flags);
@@ -974,6 +972,12 @@ static long do_mbind(unsigned long start, unsigned long len,
974 start, start + len, mode, mode_flags, 972 start, start + len, mode, mode_flags,
975 nmask ? nodes_addr(*nmask)[0] : -1); 973 nmask ? nodes_addr(*nmask)[0] : -1);
976 974
975 if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
976
977 err = migrate_prep();
978 if (err)
979 return err;
980 }
977 down_write(&mm->mmap_sem); 981 down_write(&mm->mmap_sem);
978 vma = check_range(mm, start, end, nmask, 982 vma = check_range(mm, start, end, nmask,
979 flags | MPOL_MF_INVERT, &pagelist); 983 flags | MPOL_MF_INVERT, &pagelist);
diff --git a/mm/migrate.c b/mm/migrate.c
index 6602941bfab0..037b0967c1e3 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -522,15 +522,12 @@ static int writeout(struct address_space *mapping, struct page *page)
522 remove_migration_ptes(page, page); 522 remove_migration_ptes(page, page);
523 523
524 rc = mapping->a_ops->writepage(page, &wbc); 524 rc = mapping->a_ops->writepage(page, &wbc);
525 if (rc < 0)
526 /* I/O Error writing */
527 return -EIO;
528 525
529 if (rc != AOP_WRITEPAGE_ACTIVATE) 526 if (rc != AOP_WRITEPAGE_ACTIVATE)
530 /* unlocked. Relock */ 527 /* unlocked. Relock */
531 lock_page(page); 528 lock_page(page);
532 529
533 return -EAGAIN; 530 return (rc < 0) ? -EIO : -EAGAIN;
534} 531}
535 532
536/* 533/*
@@ -841,12 +838,12 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
841 struct page_to_node *pp; 838 struct page_to_node *pp;
842 LIST_HEAD(pagelist); 839 LIST_HEAD(pagelist);
843 840
841 migrate_prep();
844 down_read(&mm->mmap_sem); 842 down_read(&mm->mmap_sem);
845 843
846 /* 844 /*
847 * Build a list of pages to migrate 845 * Build a list of pages to migrate
848 */ 846 */
849 migrate_prep();
850 for (pp = pm; pp->node != MAX_NUMNODES; pp++) { 847 for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
851 struct vm_area_struct *vma; 848 struct vm_area_struct *vma;
852 struct page *page; 849 struct page *page;
@@ -990,25 +987,18 @@ out:
990/* 987/*
991 * Determine the nodes of an array of pages and store it in an array of status. 988 * Determine the nodes of an array of pages and store it in an array of status.
992 */ 989 */
993static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, 990static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
994 const void __user * __user *pages, 991 const void __user **pages, int *status)
995 int __user *status)
996{ 992{
997 unsigned long i; 993 unsigned long i;
998 int err;
999 994
1000 down_read(&mm->mmap_sem); 995 down_read(&mm->mmap_sem);
1001 996
1002 for (i = 0; i < nr_pages; i++) { 997 for (i = 0; i < nr_pages; i++) {
1003 const void __user *p; 998 unsigned long addr = (unsigned long)(*pages);
1004 unsigned long addr;
1005 struct vm_area_struct *vma; 999 struct vm_area_struct *vma;
1006 struct page *page; 1000 struct page *page;
1007 1001 int err = -EFAULT;
1008 err = -EFAULT;
1009 if (get_user(p, pages+i))
1010 goto out;
1011 addr = (unsigned long) p;
1012 1002
1013 vma = find_vma(mm, addr); 1003 vma = find_vma(mm, addr);
1014 if (!vma) 1004 if (!vma)
@@ -1027,12 +1017,52 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
1027 1017
1028 err = page_to_nid(page); 1018 err = page_to_nid(page);
1029set_status: 1019set_status:
1030 put_user(err, status+i); 1020 *status = err;
1021
1022 pages++;
1023 status++;
1024 }
1025
1026 up_read(&mm->mmap_sem);
1027}
1028
1029/*
1030 * Determine the nodes of a user array of pages and store it in
1031 * a user array of status.
1032 */
1033static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
1034 const void __user * __user *pages,
1035 int __user *status)
1036{
1037#define DO_PAGES_STAT_CHUNK_NR 16
1038 const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
1039 int chunk_status[DO_PAGES_STAT_CHUNK_NR];
1040 unsigned long i, chunk_nr = DO_PAGES_STAT_CHUNK_NR;
1041 int err;
1042
1043 for (i = 0; i < nr_pages; i += chunk_nr) {
1044 if (chunk_nr + i > nr_pages)
1045 chunk_nr = nr_pages - i;
1046
1047 err = copy_from_user(chunk_pages, &pages[i],
1048 chunk_nr * sizeof(*chunk_pages));
1049 if (err) {
1050 err = -EFAULT;
1051 goto out;
1052 }
1053
1054 do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
1055
1056 err = copy_to_user(&status[i], chunk_status,
1057 chunk_nr * sizeof(*chunk_status));
1058 if (err) {
1059 err = -EFAULT;
1060 goto out;
1061 }
1031 } 1062 }
1032 err = 0; 1063 err = 0;
1033 1064
1034out: 1065out:
1035 up_read(&mm->mmap_sem);
1036 return err; 1066 return err;
1037} 1067}
1038 1068
diff --git a/mm/mlock.c b/mm/mlock.c
index 008ea70b7afa..1ada366570cb 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -66,14 +66,10 @@ void __clear_page_mlock(struct page *page)
66 putback_lru_page(page); 66 putback_lru_page(page);
67 } else { 67 } else {
68 /* 68 /*
69 * Page not on the LRU yet. Flush all pagevecs and retry. 69 * We lost the race. the page already moved to evictable list.
70 */ 70 */
71 lru_add_drain_all(); 71 if (PageUnevictable(page))
72 if (!isolate_lru_page(page))
73 putback_lru_page(page);
74 else if (PageUnevictable(page))
75 count_vm_event(UNEVICTABLE_PGSTRANDED); 72 count_vm_event(UNEVICTABLE_PGSTRANDED);
76
77 } 73 }
78} 74}
79 75
@@ -166,7 +162,7 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
166 unsigned long addr = start; 162 unsigned long addr = start;
167 struct page *pages[16]; /* 16 gives a reasonable batch */ 163 struct page *pages[16]; /* 16 gives a reasonable batch */
168 int nr_pages = (end - start) / PAGE_SIZE; 164 int nr_pages = (end - start) / PAGE_SIZE;
169 int ret; 165 int ret = 0;
170 int gup_flags = 0; 166 int gup_flags = 0;
171 167
172 VM_BUG_ON(start & ~PAGE_MASK); 168 VM_BUG_ON(start & ~PAGE_MASK);
@@ -187,8 +183,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
187 if (vma->vm_flags & VM_WRITE) 183 if (vma->vm_flags & VM_WRITE)
188 gup_flags |= GUP_FLAGS_WRITE; 184 gup_flags |= GUP_FLAGS_WRITE;
189 185
190 lru_add_drain_all(); /* push cached pages to LRU */
191
192 while (nr_pages > 0) { 186 while (nr_pages > 0) {
193 int i; 187 int i;
194 188
@@ -251,8 +245,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
251 ret = 0; 245 ret = 0;
252 } 246 }
253 247
254 lru_add_drain_all(); /* to update stats */
255
256 return ret; /* count entire vma as locked_vm */ 248 return ret; /* count entire vma as locked_vm */
257} 249}
258 250
@@ -546,6 +538,8 @@ asmlinkage long sys_mlock(unsigned long start, size_t len)
546 if (!can_do_mlock()) 538 if (!can_do_mlock())
547 return -EPERM; 539 return -EPERM;
548 540
541 lru_add_drain_all(); /* flush pagevec */
542
549 down_write(&current->mm->mmap_sem); 543 down_write(&current->mm->mmap_sem);
550 len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); 544 len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
551 start &= PAGE_MASK; 545 start &= PAGE_MASK;
@@ -612,6 +606,8 @@ asmlinkage long sys_mlockall(int flags)
612 if (!can_do_mlock()) 606 if (!can_do_mlock())
613 goto out; 607 goto out;
614 608
609 lru_add_drain_all(); /* flush pagevec */
610
615 down_write(&current->mm->mmap_sem); 611 down_write(&current->mm->mmap_sem);
616 612
617 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; 613 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
diff --git a/mm/mmap.c b/mm/mmap.c
index 74f4d158022e..d4855a682ab6 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -175,7 +175,8 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
175 175
176 /* Don't let a single process grow too big: 176 /* Don't let a single process grow too big:
177 leave 3% of the size of this process for other processes */ 177 leave 3% of the size of this process for other processes */
178 allowed -= mm->total_vm / 32; 178 if (mm)
179 allowed -= mm->total_vm / 32;
179 180
180 /* 181 /*
181 * cast `allowed' as a signed long because vm_committed_space 182 * cast `allowed' as a signed long because vm_committed_space
@@ -1703,7 +1704,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
1703 vma = find_vma_prev(mm, addr, &prev); 1704 vma = find_vma_prev(mm, addr, &prev);
1704 if (vma && (vma->vm_start <= addr)) 1705 if (vma && (vma->vm_start <= addr))
1705 return vma; 1706 return vma;
1706 if (expand_stack(prev, addr)) 1707 if (!prev || expand_stack(prev, addr))
1707 return NULL; 1708 return NULL;
1708 if (prev->vm_flags & VM_LOCKED) { 1709 if (prev->vm_flags & VM_LOCKED) {
1709 if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0) 1710 if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0)
diff --git a/mm/nommu.c b/mm/nommu.c
index 2696b24f2bb3..7695dc850785 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1454,7 +1454,8 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
1454 1454
1455 /* Don't let a single process grow too big: 1455 /* Don't let a single process grow too big:
1456 leave 3% of the size of this process for other processes */ 1456 leave 3% of the size of this process for other processes */
1457 allowed -= current->mm->total_vm / 32; 1457 if (mm)
1458 allowed -= mm->total_vm / 32;
1458 1459
1459 /* 1460 /*
1460 * cast `allowed' as a signed long because vm_committed_space 1461 * cast `allowed' as a signed long because vm_committed_space
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 64e5b4bcd964..a0a01902f551 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -38,7 +38,6 @@ static DEFINE_SPINLOCK(zone_scan_mutex);
38 * badness - calculate a numeric value for how bad this task has been 38 * badness - calculate a numeric value for how bad this task has been
39 * @p: task struct of which task we should calculate 39 * @p: task struct of which task we should calculate
40 * @uptime: current uptime in seconds 40 * @uptime: current uptime in seconds
41 * @mem: target memory controller
42 * 41 *
43 * The formula used is relatively simple and documented inline in the 42 * The formula used is relatively simple and documented inline in the
44 * function. The main rationale is that we want to select a good task 43 * function. The main rationale is that we want to select a good task
@@ -295,6 +294,8 @@ static void dump_tasks(const struct mem_cgroup *mem)
295 continue; 294 continue;
296 if (mem && !task_in_mem_cgroup(p, mem)) 295 if (mem && !task_in_mem_cgroup(p, mem))
297 continue; 296 continue;
297 if (!thread_group_leader(p))
298 continue;
298 299
299 task_lock(p); 300 task_lock(p);
300 printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", 301 printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n",
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d0a240fbb8bf..d8ac01474563 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -263,24 +263,39 @@ void prep_compound_page(struct page *page, unsigned long order)
263{ 263{
264 int i; 264 int i;
265 int nr_pages = 1 << order; 265 int nr_pages = 1 << order;
266
267 set_compound_page_dtor(page, free_compound_page);
268 set_compound_order(page, order);
269 __SetPageHead(page);
270 for (i = 1; i < nr_pages; i++) {
271 struct page *p = page + i;
272
273 __SetPageTail(p);
274 p->first_page = page;
275 }
276}
277
278#ifdef CONFIG_HUGETLBFS
279void prep_compound_gigantic_page(struct page *page, unsigned long order)
280{
281 int i;
282 int nr_pages = 1 << order;
266 struct page *p = page + 1; 283 struct page *p = page + 1;
267 284
268 set_compound_page_dtor(page, free_compound_page); 285 set_compound_page_dtor(page, free_compound_page);
269 set_compound_order(page, order); 286 set_compound_order(page, order);
270 __SetPageHead(page); 287 __SetPageHead(page);
271 for (i = 1; i < nr_pages; i++, p++) { 288 for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
272 if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0))
273 p = pfn_to_page(page_to_pfn(page) + i);
274 __SetPageTail(p); 289 __SetPageTail(p);
275 p->first_page = page; 290 p->first_page = page;
276 } 291 }
277} 292}
293#endif
278 294
279static void destroy_compound_page(struct page *page, unsigned long order) 295static void destroy_compound_page(struct page *page, unsigned long order)
280{ 296{
281 int i; 297 int i;
282 int nr_pages = 1 << order; 298 int nr_pages = 1 << order;
283 struct page *p = page + 1;
284 299
285 if (unlikely(compound_order(page) != order)) 300 if (unlikely(compound_order(page) != order))
286 bad_page(page); 301 bad_page(page);
@@ -288,9 +303,8 @@ static void destroy_compound_page(struct page *page, unsigned long order)
288 if (unlikely(!PageHead(page))) 303 if (unlikely(!PageHead(page)))
289 bad_page(page); 304 bad_page(page);
290 __ClearPageHead(page); 305 __ClearPageHead(page);
291 for (i = 1; i < nr_pages; i++, p++) { 306 for (i = 1; i < nr_pages; i++) {
292 if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0)) 307 struct page *p = page + i;
293 p = pfn_to_page(page_to_pfn(page) + i);
294 308
295 if (unlikely(!PageTail(p) | 309 if (unlikely(!PageTail(p) |
296 (p->first_page != page))) 310 (p->first_page != page)))
@@ -1547,6 +1561,10 @@ nofail_alloc:
1547 1561
1548 /* We now go into synchronous reclaim */ 1562 /* We now go into synchronous reclaim */
1549 cpuset_memory_pressure_bump(); 1563 cpuset_memory_pressure_bump();
1564 /*
1565 * The task's cpuset might have expanded its set of allowable nodes
1566 */
1567 cpuset_update_task_memory_state();
1550 p->flags |= PF_MEMALLOC; 1568 p->flags |= PF_MEMALLOC;
1551 reclaim_state.reclaimed_slab = 0; 1569 reclaim_state.reclaimed_slab = 0;
1552 p->reclaim_state = &reclaim_state; 1570 p->reclaim_state = &reclaim_state;
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 5d86550701f2..ab27ff750519 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -4,7 +4,10 @@
4#include <linux/bit_spinlock.h> 4#include <linux/bit_spinlock.h>
5#include <linux/page_cgroup.h> 5#include <linux/page_cgroup.h>
6#include <linux/hash.h> 6#include <linux/hash.h>
7#include <linux/slab.h>
7#include <linux/memory.h> 8#include <linux/memory.h>
9#include <linux/vmalloc.h>
10#include <linux/cgroup.h>
8 11
9static void __meminit 12static void __meminit
10__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) 13__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
@@ -18,7 +21,7 @@ static unsigned long total_usage;
18#if !defined(CONFIG_SPARSEMEM) 21#if !defined(CONFIG_SPARSEMEM)
19 22
20 23
21void __init pgdat_page_cgroup_init(struct pglist_data *pgdat) 24void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
22{ 25{
23 pgdat->node_page_cgroup = NULL; 26 pgdat->node_page_cgroup = NULL;
24} 27}
@@ -46,6 +49,9 @@ static int __init alloc_node_page_cgroup(int nid)
46 start_pfn = NODE_DATA(nid)->node_start_pfn; 49 start_pfn = NODE_DATA(nid)->node_start_pfn;
47 nr_pages = NODE_DATA(nid)->node_spanned_pages; 50 nr_pages = NODE_DATA(nid)->node_spanned_pages;
48 51
52 if (!nr_pages)
53 return 0;
54
49 table_size = sizeof(struct page_cgroup) * nr_pages; 55 table_size = sizeof(struct page_cgroup) * nr_pages;
50 56
51 base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), 57 base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
@@ -66,6 +72,9 @@ void __init page_cgroup_init(void)
66 72
67 int nid, fail; 73 int nid, fail;
68 74
75 if (mem_cgroup_subsys.disabled)
76 return;
77
69 for_each_online_node(nid) { 78 for_each_online_node(nid) {
70 fail = alloc_node_page_cgroup(nid); 79 fail = alloc_node_page_cgroup(nid);
71 if (fail) 80 if (fail)
@@ -91,7 +100,8 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
91 return section->page_cgroup + pfn; 100 return section->page_cgroup + pfn;
92} 101}
93 102
94int __meminit init_section_page_cgroup(unsigned long pfn) 103/* __alloc_bootmem...() is protected by !slab_available() */
104int __init_refok init_section_page_cgroup(unsigned long pfn)
95{ 105{
96 struct mem_section *section; 106 struct mem_section *section;
97 struct page_cgroup *base, *pc; 107 struct page_cgroup *base, *pc;
@@ -100,15 +110,30 @@ int __meminit init_section_page_cgroup(unsigned long pfn)
100 110
101 section = __pfn_to_section(pfn); 111 section = __pfn_to_section(pfn);
102 112
103 if (section->page_cgroup) 113 if (!section->page_cgroup) {
104 return 0; 114 nid = page_to_nid(pfn_to_page(pfn));
105 115 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
106 nid = page_to_nid(pfn_to_page(pfn)); 116 if (slab_is_available()) {
107 117 base = kmalloc_node(table_size, GFP_KERNEL, nid);
108 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; 118 if (!base)
109 base = kmalloc_node(table_size, GFP_KERNEL, nid); 119 base = vmalloc_node(table_size, nid);
110 if (!base) 120 } else {
111 base = vmalloc_node(table_size, nid); 121 base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
122 table_size,
123 PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
124 }
125 } else {
126 /*
127 * We don't have to allocate page_cgroup again, but
128 * address of memmap may be changed. So, we have to initialize
129 * again.
130 */
131 base = section->page_cgroup + pfn;
132 table_size = 0;
133 /* check address of memmap is changed or not. */
134 if (base->page == pfn_to_page(pfn))
135 return 0;
136 }
112 137
113 if (!base) { 138 if (!base) {
114 printk(KERN_ERR "page cgroup allocation failure\n"); 139 printk(KERN_ERR "page cgroup allocation failure\n");
@@ -135,21 +160,26 @@ void __free_page_cgroup(unsigned long pfn)
135 if (!ms || !ms->page_cgroup) 160 if (!ms || !ms->page_cgroup)
136 return; 161 return;
137 base = ms->page_cgroup + pfn; 162 base = ms->page_cgroup + pfn;
138 ms->page_cgroup = NULL; 163 if (is_vmalloc_addr(base)) {
139 if (is_vmalloc_addr(base))
140 vfree(base); 164 vfree(base);
141 else 165 ms->page_cgroup = NULL;
142 kfree(base); 166 } else {
167 struct page *page = virt_to_page(base);
168 if (!PageReserved(page)) { /* Is bootmem ? */
169 kfree(base);
170 ms->page_cgroup = NULL;
171 }
172 }
143} 173}
144 174
145int online_page_cgroup(unsigned long start_pfn, 175int __meminit online_page_cgroup(unsigned long start_pfn,
146 unsigned long nr_pages, 176 unsigned long nr_pages,
147 int nid) 177 int nid)
148{ 178{
149 unsigned long start, end, pfn; 179 unsigned long start, end, pfn;
150 int fail = 0; 180 int fail = 0;
151 181
152 start = start_pfn & (PAGES_PER_SECTION - 1); 182 start = start_pfn & ~(PAGES_PER_SECTION - 1);
153 end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); 183 end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
154 184
155 for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) { 185 for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) {
@@ -167,12 +197,12 @@ int online_page_cgroup(unsigned long start_pfn,
167 return -ENOMEM; 197 return -ENOMEM;
168} 198}
169 199
170int offline_page_cgroup(unsigned long start_pfn, 200int __meminit offline_page_cgroup(unsigned long start_pfn,
171 unsigned long nr_pages, int nid) 201 unsigned long nr_pages, int nid)
172{ 202{
173 unsigned long start, end, pfn; 203 unsigned long start, end, pfn;
174 204
175 start = start_pfn & (PAGES_PER_SECTION - 1); 205 start = start_pfn & ~(PAGES_PER_SECTION - 1);
176 end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); 206 end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
177 207
178 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) 208 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
@@ -181,7 +211,7 @@ int offline_page_cgroup(unsigned long start_pfn,
181 211
182} 212}
183 213
184static int page_cgroup_callback(struct notifier_block *self, 214static int __meminit page_cgroup_callback(struct notifier_block *self,
185 unsigned long action, void *arg) 215 unsigned long action, void *arg)
186{ 216{
187 struct memory_notify *mn = arg; 217 struct memory_notify *mn = arg;
@@ -191,18 +221,23 @@ static int page_cgroup_callback(struct notifier_block *self,
191 ret = online_page_cgroup(mn->start_pfn, 221 ret = online_page_cgroup(mn->start_pfn,
192 mn->nr_pages, mn->status_change_nid); 222 mn->nr_pages, mn->status_change_nid);
193 break; 223 break;
194 case MEM_CANCEL_ONLINE:
195 case MEM_OFFLINE: 224 case MEM_OFFLINE:
196 offline_page_cgroup(mn->start_pfn, 225 offline_page_cgroup(mn->start_pfn,
197 mn->nr_pages, mn->status_change_nid); 226 mn->nr_pages, mn->status_change_nid);
198 break; 227 break;
228 case MEM_CANCEL_ONLINE:
199 case MEM_GOING_OFFLINE: 229 case MEM_GOING_OFFLINE:
200 break; 230 break;
201 case MEM_ONLINE: 231 case MEM_ONLINE:
202 case MEM_CANCEL_OFFLINE: 232 case MEM_CANCEL_OFFLINE:
203 break; 233 break;
204 } 234 }
205 ret = notifier_from_errno(ret); 235
236 if (ret)
237 ret = notifier_from_errno(ret);
238 else
239 ret = NOTIFY_OK;
240
206 return ret; 241 return ret;
207} 242}
208 243
@@ -213,6 +248,9 @@ void __init page_cgroup_init(void)
213 unsigned long pfn; 248 unsigned long pfn;
214 int fail = 0; 249 int fail = 0;
215 250
251 if (mem_cgroup_subsys.disabled)
252 return;
253
216 for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) { 254 for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) {
217 if (!pfn_present(pfn)) 255 if (!pfn_present(pfn))
218 continue; 256 continue;
@@ -229,7 +267,7 @@ void __init page_cgroup_init(void)
229 " want\n"); 267 " want\n");
230} 268}
231 269
232void __init pgdat_page_cgroup_init(struct pglist_data *pgdat) 270void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
233{ 271{
234 return; 272 return;
235} 273}
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index b70a7fec1ff6..5e0ffd967452 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -130,10 +130,11 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
130 if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE) 130 if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
131 break; 131 break;
132 } 132 }
133 if (pfn < end_pfn) 133 page = __first_valid_page(start_pfn, end_pfn - start_pfn);
134 if ((pfn < end_pfn) || !page)
134 return -EBUSY; 135 return -EBUSY;
135 /* Check all pages are free or Marked as ISOLATED */ 136 /* Check all pages are free or Marked as ISOLATED */
136 zone = page_zone(pfn_to_page(pfn)); 137 zone = page_zone(page);
137 spin_lock_irqsave(&zone->lock, flags); 138 spin_lock_irqsave(&zone->lock, flags);
138 ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn); 139 ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn);
139 spin_unlock_irqrestore(&zone->lock, flags); 140 spin_unlock_irqrestore(&zone->lock, flags);
diff --git a/mm/shmem.c b/mm/shmem.c
index d38d7e61fcd0..0ed075215e5f 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -161,8 +161,8 @@ static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
161 */ 161 */
162static inline int shmem_acct_size(unsigned long flags, loff_t size) 162static inline int shmem_acct_size(unsigned long flags, loff_t size)
163{ 163{
164 return (flags & VM_ACCOUNT)? 164 return (flags & VM_ACCOUNT) ?
165 security_vm_enough_memory(VM_ACCT(size)): 0; 165 security_vm_enough_memory_kern(VM_ACCT(size)) : 0;
166} 166}
167 167
168static inline void shmem_unacct_size(unsigned long flags, loff_t size) 168static inline void shmem_unacct_size(unsigned long flags, loff_t size)
@@ -179,8 +179,8 @@ static inline void shmem_unacct_size(unsigned long flags, loff_t size)
179 */ 179 */
180static inline int shmem_acct_block(unsigned long flags) 180static inline int shmem_acct_block(unsigned long flags)
181{ 181{
182 return (flags & VM_ACCOUNT)? 182 return (flags & VM_ACCOUNT) ?
183 0: security_vm_enough_memory(VM_ACCT(PAGE_CACHE_SIZE)); 183 0 : security_vm_enough_memory_kern(VM_ACCT(PAGE_CACHE_SIZE));
184} 184}
185 185
186static inline void shmem_unacct_blocks(unsigned long flags, long pages) 186static inline void shmem_unacct_blocks(unsigned long flags, long pages)
diff --git a/mm/slab.c b/mm/slab.c
index e76eee466886..09187517f9dc 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -95,6 +95,7 @@
95#include <linux/init.h> 95#include <linux/init.h>
96#include <linux/compiler.h> 96#include <linux/compiler.h>
97#include <linux/cpuset.h> 97#include <linux/cpuset.h>
98#include <linux/proc_fs.h>
98#include <linux/seq_file.h> 99#include <linux/seq_file.h>
99#include <linux/notifier.h> 100#include <linux/notifier.h>
100#include <linux/kallsyms.h> 101#include <linux/kallsyms.h>
@@ -4258,7 +4259,7 @@ static int s_show(struct seq_file *m, void *p)
4258 * + further values on SMP and with statistics enabled 4259 * + further values on SMP and with statistics enabled
4259 */ 4260 */
4260 4261
4261const struct seq_operations slabinfo_op = { 4262static const struct seq_operations slabinfo_op = {
4262 .start = s_start, 4263 .start = s_start,
4263 .next = s_next, 4264 .next = s_next,
4264 .stop = s_stop, 4265 .stop = s_stop,
@@ -4315,6 +4316,19 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer,
4315 return res; 4316 return res;
4316} 4317}
4317 4318
4319static int slabinfo_open(struct inode *inode, struct file *file)
4320{
4321 return seq_open(file, &slabinfo_op);
4322}
4323
4324static const struct file_operations proc_slabinfo_operations = {
4325 .open = slabinfo_open,
4326 .read = seq_read,
4327 .write = slabinfo_write,
4328 .llseek = seq_lseek,
4329 .release = seq_release,
4330};
4331
4318#ifdef CONFIG_DEBUG_SLAB_LEAK 4332#ifdef CONFIG_DEBUG_SLAB_LEAK
4319 4333
4320static void *leaks_start(struct seq_file *m, loff_t *pos) 4334static void *leaks_start(struct seq_file *m, loff_t *pos)
@@ -4443,13 +4457,47 @@ static int leaks_show(struct seq_file *m, void *p)
4443 return 0; 4457 return 0;
4444} 4458}
4445 4459
4446const struct seq_operations slabstats_op = { 4460static const struct seq_operations slabstats_op = {
4447 .start = leaks_start, 4461 .start = leaks_start,
4448 .next = s_next, 4462 .next = s_next,
4449 .stop = s_stop, 4463 .stop = s_stop,
4450 .show = leaks_show, 4464 .show = leaks_show,
4451}; 4465};
4466
4467static int slabstats_open(struct inode *inode, struct file *file)
4468{
4469 unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL);
4470 int ret = -ENOMEM;
4471 if (n) {
4472 ret = seq_open(file, &slabstats_op);
4473 if (!ret) {
4474 struct seq_file *m = file->private_data;
4475 *n = PAGE_SIZE / (2 * sizeof(unsigned long));
4476 m->private = n;
4477 n = NULL;
4478 }
4479 kfree(n);
4480 }
4481 return ret;
4482}
4483
4484static const struct file_operations proc_slabstats_operations = {
4485 .open = slabstats_open,
4486 .read = seq_read,
4487 .llseek = seq_lseek,
4488 .release = seq_release_private,
4489};
4490#endif
4491
4492static int __init slab_proc_init(void)
4493{
4494 proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
4495#ifdef CONFIG_DEBUG_SLAB_LEAK
4496 proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
4452#endif 4497#endif
4498 return 0;
4499}
4500module_init(slab_proc_init);
4453#endif 4501#endif
4454 4502
4455/** 4503/**
diff --git a/mm/slob.c b/mm/slob.c
index cb675d126791..bf7e8fc3aed8 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -535,7 +535,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
535 struct kmem_cache *c; 535 struct kmem_cache *c;
536 536
537 c = slob_alloc(sizeof(struct kmem_cache), 537 c = slob_alloc(sizeof(struct kmem_cache),
538 flags, ARCH_KMALLOC_MINALIGN, -1); 538 GFP_KERNEL, ARCH_KMALLOC_MINALIGN, -1);
539 539
540 if (c) { 540 if (c) {
541 c->name = name; 541 c->name = name;
diff --git a/mm/slub.c b/mm/slub.c
index 0c83e6afe7b2..a2cd47d89e0a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -14,6 +14,7 @@
14#include <linux/interrupt.h> 14#include <linux/interrupt.h>
15#include <linux/bitops.h> 15#include <linux/bitops.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/proc_fs.h>
17#include <linux/seq_file.h> 18#include <linux/seq_file.h>
18#include <linux/cpu.h> 19#include <linux/cpu.h>
19#include <linux/cpuset.h> 20#include <linux/cpuset.h>
@@ -2930,8 +2931,10 @@ static int slab_memory_callback(struct notifier_block *self,
2930 case MEM_CANCEL_OFFLINE: 2931 case MEM_CANCEL_OFFLINE:
2931 break; 2932 break;
2932 } 2933 }
2933 2934 if (ret)
2934 ret = notifier_from_errno(ret); 2935 ret = notifier_from_errno(ret);
2936 else
2937 ret = NOTIFY_OK;
2935 return ret; 2938 return ret;
2936} 2939}
2937 2940
@@ -3594,7 +3597,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
3594 for (i = 0; i < t.count; i++) { 3597 for (i = 0; i < t.count; i++) {
3595 struct location *l = &t.loc[i]; 3598 struct location *l = &t.loc[i];
3596 3599
3597 if (len > PAGE_SIZE - 100) 3600 if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
3598 break; 3601 break;
3599 len += sprintf(buf + len, "%7ld ", l->count); 3602 len += sprintf(buf + len, "%7ld ", l->count);
3600 3603
@@ -4417,14 +4420,6 @@ __initcall(slab_sysfs_init);
4417 * The /proc/slabinfo ABI 4420 * The /proc/slabinfo ABI
4418 */ 4421 */
4419#ifdef CONFIG_SLABINFO 4422#ifdef CONFIG_SLABINFO
4420
4421ssize_t slabinfo_write(struct file *file, const char __user *buffer,
4422 size_t count, loff_t *ppos)
4423{
4424 return -EINVAL;
4425}
4426
4427
4428static void print_slabinfo_header(struct seq_file *m) 4423static void print_slabinfo_header(struct seq_file *m)
4429{ 4424{
4430 seq_puts(m, "slabinfo - version: 2.1\n"); 4425 seq_puts(m, "slabinfo - version: 2.1\n");
@@ -4492,11 +4487,29 @@ static int s_show(struct seq_file *m, void *p)
4492 return 0; 4487 return 0;
4493} 4488}
4494 4489
4495const struct seq_operations slabinfo_op = { 4490static const struct seq_operations slabinfo_op = {
4496 .start = s_start, 4491 .start = s_start,
4497 .next = s_next, 4492 .next = s_next,
4498 .stop = s_stop, 4493 .stop = s_stop,
4499 .show = s_show, 4494 .show = s_show,
4500}; 4495};
4501 4496
4497static int slabinfo_open(struct inode *inode, struct file *file)
4498{
4499 return seq_open(file, &slabinfo_op);
4500}
4501
4502static const struct file_operations proc_slabinfo_operations = {
4503 .open = slabinfo_open,
4504 .read = seq_read,
4505 .llseek = seq_lseek,
4506 .release = seq_release,
4507};
4508
4509static int __init slab_proc_init(void)
4510{
4511 proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
4512 return 0;
4513}
4514module_init(slab_proc_init);
4502#endif /* CONFIG_SLABINFO */ 4515#endif /* CONFIG_SLABINFO */
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index a91b5f8fcaf6..a13ea6401ae7 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -64,7 +64,7 @@ void __meminit vmemmap_verify(pte_t *pte, int node,
64 unsigned long pfn = pte_pfn(*pte); 64 unsigned long pfn = pte_pfn(*pte);
65 int actual_node = early_pfn_to_nid(pfn); 65 int actual_node = early_pfn_to_nid(pfn);
66 66
67 if (actual_node != node) 67 if (node_distance(actual_node, node) > LOCAL_DISTANCE)
68 printk(KERN_WARNING "[%lx-%lx] potential offnode " 68 printk(KERN_WARNING "[%lx-%lx] potential offnode "
69 "page_structs\n", start, end - 1); 69 "page_structs\n", start, end - 1);
70} 70}
diff --git a/mm/sparse.c b/mm/sparse.c
index 39db301b920d..083f5b63e7a8 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -570,7 +570,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap)
570 * set. If this is <=0, then that means that the passed-in 570 * set. If this is <=0, then that means that the passed-in
571 * map was not consumed and must be freed. 571 * map was not consumed and must be freed.
572 */ 572 */
573int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, 573int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
574 int nr_pages) 574 int nr_pages)
575{ 575{
576 unsigned long section_nr = pfn_to_section_nr(start_pfn); 576 unsigned long section_nr = pfn_to_section_nr(start_pfn);
diff --git a/mm/swap.c b/mm/swap.c
index 2152e48a7b8f..b135ec90cdeb 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -299,7 +299,6 @@ void lru_add_drain(void)
299 put_cpu(); 299 put_cpu();
300} 300}
301 301
302#if defined(CONFIG_NUMA) || defined(CONFIG_UNEVICTABLE_LRU)
303static void lru_add_drain_per_cpu(struct work_struct *dummy) 302static void lru_add_drain_per_cpu(struct work_struct *dummy)
304{ 303{
305 lru_add_drain(); 304 lru_add_drain();
@@ -313,18 +312,6 @@ int lru_add_drain_all(void)
313 return schedule_on_each_cpu(lru_add_drain_per_cpu); 312 return schedule_on_each_cpu(lru_add_drain_per_cpu);
314} 313}
315 314
316#else
317
318/*
319 * Returns 0 for success
320 */
321int lru_add_drain_all(void)
322{
323 lru_add_drain();
324 return 0;
325}
326#endif
327
328/* 315/*
329 * Batched page_cache_release(). Decrement the reference count on all the 316 * Batched page_cache_release(). Decrement the reference count on all the
330 * passed pages. If it fell to zero then remove the page from the LRU and 317 * passed pages. If it fell to zero then remove the page from the LRU and
@@ -445,6 +432,7 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
445 for (i = 0; i < pagevec_count(pvec); i++) { 432 for (i = 0; i < pagevec_count(pvec); i++) {
446 struct page *page = pvec->pages[i]; 433 struct page *page = pvec->pages[i];
447 struct zone *pagezone = page_zone(page); 434 struct zone *pagezone = page_zone(page);
435 int file;
448 436
449 if (pagezone != zone) { 437 if (pagezone != zone) {
450 if (zone) 438 if (zone)
@@ -456,8 +444,12 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
456 VM_BUG_ON(PageUnevictable(page)); 444 VM_BUG_ON(PageUnevictable(page));
457 VM_BUG_ON(PageLRU(page)); 445 VM_BUG_ON(PageLRU(page));
458 SetPageLRU(page); 446 SetPageLRU(page);
459 if (is_active_lru(lru)) 447 file = is_file_lru(lru);
448 zone->recent_scanned[file]++;
449 if (is_active_lru(lru)) {
460 SetPageActive(page); 450 SetPageActive(page);
451 zone->recent_rotated[file]++;
452 }
461 add_page_to_lru_list(zone, page, lru); 453 add_page_to_lru_list(zone, page, lru);
462 } 454 }
463 if (zone) 455 if (zone)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 90cb67a5417c..54a9f87e5162 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1462,6 +1462,15 @@ static int __init procswaps_init(void)
1462__initcall(procswaps_init); 1462__initcall(procswaps_init);
1463#endif /* CONFIG_PROC_FS */ 1463#endif /* CONFIG_PROC_FS */
1464 1464
1465#ifdef MAX_SWAPFILES_CHECK
1466static int __init max_swapfiles_check(void)
1467{
1468 MAX_SWAPFILES_CHECK();
1469 return 0;
1470}
1471late_initcall(max_swapfiles_check);
1472#endif
1473
1465/* 1474/*
1466 * Written 01/25/92 by Simmule Turner, heavily changed by Linus. 1475 * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
1467 * 1476 *
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 65ae576030da..1ddb77ba3995 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -15,6 +15,7 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/spinlock.h> 16#include <linux/spinlock.h>
17#include <linux/interrupt.h> 17#include <linux/interrupt.h>
18#include <linux/proc_fs.h>
18#include <linux/seq_file.h> 19#include <linux/seq_file.h>
19#include <linux/debugobjects.h> 20#include <linux/debugobjects.h>
20#include <linux/kallsyms.h> 21#include <linux/kallsyms.h>
@@ -76,7 +77,6 @@ static void vunmap_page_range(unsigned long addr, unsigned long end)
76 77
77 BUG_ON(addr >= end); 78 BUG_ON(addr >= end);
78 pgd = pgd_offset_k(addr); 79 pgd = pgd_offset_k(addr);
79 flush_cache_vunmap(addr, end);
80 do { 80 do {
81 next = pgd_addr_end(addr, end); 81 next = pgd_addr_end(addr, end);
82 if (pgd_none_or_clear_bad(pgd)) 82 if (pgd_none_or_clear_bad(pgd))
@@ -177,7 +177,7 @@ static int vmap_page_range(unsigned long addr, unsigned long end,
177static inline int is_vmalloc_or_module_addr(const void *x) 177static inline int is_vmalloc_or_module_addr(const void *x)
178{ 178{
179 /* 179 /*
180 * x86-64 and sparc64 put modules in a special place, 180 * ARM, x86-64 and sparc64 put modules in a special place,
181 * and fall back on vmalloc() if that fails. Others 181 * and fall back on vmalloc() if that fails. Others
182 * just put it in the vmalloc space. 182 * just put it in the vmalloc space.
183 */ 183 */
@@ -323,14 +323,14 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
323 323
324 BUG_ON(size & ~PAGE_MASK); 324 BUG_ON(size & ~PAGE_MASK);
325 325
326 addr = ALIGN(vstart, align);
327
328 va = kmalloc_node(sizeof(struct vmap_area), 326 va = kmalloc_node(sizeof(struct vmap_area),
329 gfp_mask & GFP_RECLAIM_MASK, node); 327 gfp_mask & GFP_RECLAIM_MASK, node);
330 if (unlikely(!va)) 328 if (unlikely(!va))
331 return ERR_PTR(-ENOMEM); 329 return ERR_PTR(-ENOMEM);
332 330
333retry: 331retry:
332 addr = ALIGN(vstart, align);
333
334 spin_lock(&vmap_area_lock); 334 spin_lock(&vmap_area_lock);
335 /* XXX: could have a last_hole cache */ 335 /* XXX: could have a last_hole cache */
336 n = vmap_area_root.rb_node; 336 n = vmap_area_root.rb_node;
@@ -361,7 +361,7 @@ retry:
361 goto found; 361 goto found;
362 } 362 }
363 363
364 while (addr + size >= first->va_start && addr + size <= vend) { 364 while (addr + size > first->va_start && addr + size <= vend) {
365 addr = ALIGN(first->va_end + PAGE_SIZE, align); 365 addr = ALIGN(first->va_end + PAGE_SIZE, align);
366 366
367 n = rb_next(&first->rb_node); 367 n = rb_next(&first->rb_node);
@@ -521,24 +521,45 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
521} 521}
522 522
523/* 523/*
524 * Kick off a purge of the outstanding lazy areas. Don't bother if somebody
525 * is already purging.
526 */
527static void try_purge_vmap_area_lazy(void)
528{
529 unsigned long start = ULONG_MAX, end = 0;
530
531 __purge_vmap_area_lazy(&start, &end, 0, 0);
532}
533
534/*
524 * Kick off a purge of the outstanding lazy areas. 535 * Kick off a purge of the outstanding lazy areas.
525 */ 536 */
526static void purge_vmap_area_lazy(void) 537static void purge_vmap_area_lazy(void)
527{ 538{
528 unsigned long start = ULONG_MAX, end = 0; 539 unsigned long start = ULONG_MAX, end = 0;
529 540
530 __purge_vmap_area_lazy(&start, &end, 0, 0); 541 __purge_vmap_area_lazy(&start, &end, 1, 0);
531} 542}
532 543
533/* 544/*
534 * Free and unmap a vmap area 545 * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been
546 * called for the correct range previously.
535 */ 547 */
536static void free_unmap_vmap_area(struct vmap_area *va) 548static void free_unmap_vmap_area_noflush(struct vmap_area *va)
537{ 549{
538 va->flags |= VM_LAZY_FREE; 550 va->flags |= VM_LAZY_FREE;
539 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); 551 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
540 if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages())) 552 if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
541 purge_vmap_area_lazy(); 553 try_purge_vmap_area_lazy();
554}
555
556/*
557 * Free and unmap a vmap area
558 */
559static void free_unmap_vmap_area(struct vmap_area *va)
560{
561 flush_cache_vunmap(va->va_start, va->va_end);
562 free_unmap_vmap_area_noflush(va);
542} 563}
543 564
544static struct vmap_area *find_vmap_area(unsigned long addr) 565static struct vmap_area *find_vmap_area(unsigned long addr)
@@ -591,6 +612,8 @@ static void free_unmap_vmap_area_addr(unsigned long addr)
591 612
592#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE) 613#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
593 614
615static bool vmap_initialized __read_mostly = false;
616
594struct vmap_block_queue { 617struct vmap_block_queue {
595 spinlock_t lock; 618 spinlock_t lock;
596 struct list_head free; 619 struct list_head free;
@@ -720,7 +743,7 @@ static void free_vmap_block(struct vmap_block *vb)
720 spin_unlock(&vmap_block_tree_lock); 743 spin_unlock(&vmap_block_tree_lock);
721 BUG_ON(tmp != vb); 744 BUG_ON(tmp != vb);
722 745
723 free_unmap_vmap_area(vb->va); 746 free_unmap_vmap_area_noflush(vb->va);
724 call_rcu(&vb->rcu_head, rcu_free_vb); 747 call_rcu(&vb->rcu_head, rcu_free_vb);
725} 748}
726 749
@@ -782,6 +805,9 @@ static void vb_free(const void *addr, unsigned long size)
782 805
783 BUG_ON(size & ~PAGE_MASK); 806 BUG_ON(size & ~PAGE_MASK);
784 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); 807 BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
808
809 flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
810
785 order = get_order(size); 811 order = get_order(size);
786 812
787 offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1); 813 offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
@@ -827,6 +853,9 @@ void vm_unmap_aliases(void)
827 int cpu; 853 int cpu;
828 int flush = 0; 854 int flush = 0;
829 855
856 if (unlikely(!vmap_initialized))
857 return;
858
830 for_each_possible_cpu(cpu) { 859 for_each_possible_cpu(cpu) {
831 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); 860 struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
832 struct vmap_block *vb; 861 struct vmap_block *vb;
@@ -896,7 +925,8 @@ EXPORT_SYMBOL(vm_unmap_ram);
896 * @count: number of pages 925 * @count: number of pages
897 * @node: prefer to allocate data structures on this node 926 * @node: prefer to allocate data structures on this node
898 * @prot: memory protection to use. PAGE_KERNEL for regular RAM 927 * @prot: memory protection to use. PAGE_KERNEL for regular RAM
899 * @returns: a pointer to the address that has been mapped, or NULL on failure 928 *
929 * Returns: a pointer to the address that has been mapped, or %NULL on failure
900 */ 930 */
901void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) 931void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
902{ 932{
@@ -940,6 +970,8 @@ void __init vmalloc_init(void)
940 INIT_LIST_HEAD(&vbq->dirty); 970 INIT_LIST_HEAD(&vbq->dirty);
941 vbq->nr_dirty = 0; 971 vbq->nr_dirty = 0;
942 } 972 }
973
974 vmap_initialized = true;
943} 975}
944 976
945void unmap_kernel_range(unsigned long addr, unsigned long size) 977void unmap_kernel_range(unsigned long addr, unsigned long size)
@@ -1685,7 +1717,7 @@ static int s_show(struct seq_file *m, void *p)
1685 v->addr, v->addr + v->size, v->size); 1717 v->addr, v->addr + v->size, v->size);
1686 1718
1687 if (v->caller) { 1719 if (v->caller) {
1688 char buff[2 * KSYM_NAME_LEN]; 1720 char buff[KSYM_SYMBOL_LEN];
1689 1721
1690 seq_putc(m, ' '); 1722 seq_putc(m, ' ');
1691 sprint_symbol(buff, (unsigned long)v->caller); 1723 sprint_symbol(buff, (unsigned long)v->caller);
@@ -1718,11 +1750,41 @@ static int s_show(struct seq_file *m, void *p)
1718 return 0; 1750 return 0;
1719} 1751}
1720 1752
1721const struct seq_operations vmalloc_op = { 1753static const struct seq_operations vmalloc_op = {
1722 .start = s_start, 1754 .start = s_start,
1723 .next = s_next, 1755 .next = s_next,
1724 .stop = s_stop, 1756 .stop = s_stop,
1725 .show = s_show, 1757 .show = s_show,
1726}; 1758};
1759
1760static int vmalloc_open(struct inode *inode, struct file *file)
1761{
1762 unsigned int *ptr = NULL;
1763 int ret;
1764
1765 if (NUMA_BUILD)
1766 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
1767 ret = seq_open(file, &vmalloc_op);
1768 if (!ret) {
1769 struct seq_file *m = file->private_data;
1770 m->private = ptr;
1771 } else
1772 kfree(ptr);
1773 return ret;
1774}
1775
1776static const struct file_operations proc_vmalloc_operations = {
1777 .open = vmalloc_open,
1778 .read = seq_read,
1779 .llseek = seq_lseek,
1780 .release = seq_release_private,
1781};
1782
1783static int __init proc_vmalloc_init(void)
1784{
1785 proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
1786 return 0;
1787}
1788module_init(proc_vmalloc_init);
1727#endif 1789#endif
1728 1790
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3b5860294bb6..62e7f62fb559 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -623,6 +623,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
623 * Try to allocate it some swap space here. 623 * Try to allocate it some swap space here.
624 */ 624 */
625 if (PageAnon(page) && !PageSwapCache(page)) { 625 if (PageAnon(page) && !PageSwapCache(page)) {
626 if (!(sc->gfp_mask & __GFP_IO))
627 goto keep_locked;
626 switch (try_to_munlock(page)) { 628 switch (try_to_munlock(page)) {
627 case SWAP_FAIL: /* shouldn't happen */ 629 case SWAP_FAIL: /* shouldn't happen */
628 case SWAP_AGAIN: 630 case SWAP_AGAIN:
@@ -634,6 +636,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
634 } 636 }
635 if (!add_to_swap(page, GFP_ATOMIC)) 637 if (!add_to_swap(page, GFP_ATOMIC))
636 goto activate_locked; 638 goto activate_locked;
639 may_enter_fs = 1;
637 } 640 }
638#endif /* CONFIG_SWAP */ 641#endif /* CONFIG_SWAP */
639 642
@@ -1245,6 +1248,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1245 list_add(&page->lru, &l_inactive); 1248 list_add(&page->lru, &l_inactive);
1246 } 1249 }
1247 1250
1251 spin_lock_irq(&zone->lru_lock);
1248 /* 1252 /*
1249 * Count referenced pages from currently used mappings as 1253 * Count referenced pages from currently used mappings as
1250 * rotated, even though they are moved to the inactive list. 1254 * rotated, even though they are moved to the inactive list.
@@ -1260,7 +1264,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1260 1264
1261 pgmoved = 0; 1265 pgmoved = 0;
1262 lru = LRU_BASE + file * LRU_FILE; 1266 lru = LRU_BASE + file * LRU_FILE;
1263 spin_lock_irq(&zone->lru_lock);
1264 while (!list_empty(&l_inactive)) { 1267 while (!list_empty(&l_inactive)) {
1265 page = lru_to_page(&l_inactive); 1268 page = lru_to_page(&l_inactive);
1266 prefetchw_prev_lru_page(page, &l_inactive, flags); 1269 prefetchw_prev_lru_page(page, &l_inactive, flags);
@@ -1386,9 +1389,9 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
1386 file_prio = 200 - sc->swappiness; 1389 file_prio = 200 - sc->swappiness;
1387 1390
1388 /* 1391 /*
1389 * anon recent_rotated[0] 1392 * The amount of pressure on anon vs file pages is inversely
1390 * %anon = 100 * ----------- / ----------------- * IO cost 1393 * proportional to the fraction of recently scanned pages on
1391 * anon + file rotate_sum 1394 * each list that were recently referenced and in active use.
1392 */ 1395 */
1393 ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1); 1396 ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1);
1394 ap /= zone->recent_rotated[0] + 1; 1397 ap /= zone->recent_rotated[0] + 1;
@@ -2368,39 +2371,6 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
2368 return 1; 2371 return 1;
2369} 2372}
2370 2373
2371static void show_page_path(struct page *page)
2372{
2373 char buf[256];
2374 if (page_is_file_cache(page)) {
2375 struct address_space *mapping = page->mapping;
2376 struct dentry *dentry;
2377 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
2378
2379 spin_lock(&mapping->i_mmap_lock);
2380 dentry = d_find_alias(mapping->host);
2381 printk(KERN_INFO "rescued: %s %lu\n",
2382 dentry_path(dentry, buf, 256), pgoff);
2383 spin_unlock(&mapping->i_mmap_lock);
2384 } else {
2385#if defined(CONFIG_MM_OWNER) && defined(CONFIG_MMU)
2386 struct anon_vma *anon_vma;
2387 struct vm_area_struct *vma;
2388
2389 anon_vma = page_lock_anon_vma(page);
2390 if (!anon_vma)
2391 return;
2392
2393 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
2394 printk(KERN_INFO "rescued: anon %s\n",
2395 vma->vm_mm->owner->comm);
2396 break;
2397 }
2398 page_unlock_anon_vma(anon_vma);
2399#endif
2400 }
2401}
2402
2403
2404/** 2374/**
2405 * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list 2375 * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list
2406 * @page: page to check evictability and move to appropriate lru list 2376 * @page: page to check evictability and move to appropriate lru list
@@ -2421,8 +2391,6 @@ retry:
2421 if (page_evictable(page, NULL)) { 2391 if (page_evictable(page, NULL)) {
2422 enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page); 2392 enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page);
2423 2393
2424 show_page_path(page);
2425
2426 __dec_zone_state(zone, NR_UNEVICTABLE); 2394 __dec_zone_state(zone, NR_UNEVICTABLE);
2427 list_move(&page->lru, &zone->lru[l].list); 2395 list_move(&page->lru, &zone->lru[l].list);
2428 __inc_zone_state(zone, NR_INACTIVE_ANON + l); 2396 __inc_zone_state(zone, NR_INACTIVE_ANON + l);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 9343227c5c60..c3ccfda23adc 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -8,7 +8,7 @@
8 * Copyright (C) 2006 Silicon Graphics, Inc., 8 * Copyright (C) 2006 Silicon Graphics, Inc.,
9 * Christoph Lameter <christoph@lameter.com> 9 * Christoph Lameter <christoph@lameter.com>
10 */ 10 */
11 11#include <linux/fs.h>
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/err.h> 13#include <linux/err.h>
14#include <linux/module.h> 14#include <linux/module.h>
@@ -384,7 +384,7 @@ void zone_statistics(struct zone *preferred_zone, struct zone *z)
384#endif 384#endif
385 385
386#ifdef CONFIG_PROC_FS 386#ifdef CONFIG_PROC_FS
387 387#include <linux/proc_fs.h>
388#include <linux/seq_file.h> 388#include <linux/seq_file.h>
389 389
390static char * const migratetype_names[MIGRATE_TYPES] = { 390static char * const migratetype_names[MIGRATE_TYPES] = {
@@ -581,20 +581,44 @@ static int pagetypeinfo_show(struct seq_file *m, void *arg)
581 return 0; 581 return 0;
582} 582}
583 583
584const struct seq_operations fragmentation_op = { 584static const struct seq_operations fragmentation_op = {
585 .start = frag_start, 585 .start = frag_start,
586 .next = frag_next, 586 .next = frag_next,
587 .stop = frag_stop, 587 .stop = frag_stop,
588 .show = frag_show, 588 .show = frag_show,
589}; 589};
590 590
591const struct seq_operations pagetypeinfo_op = { 591static int fragmentation_open(struct inode *inode, struct file *file)
592{
593 return seq_open(file, &fragmentation_op);
594}
595
596static const struct file_operations fragmentation_file_operations = {
597 .open = fragmentation_open,
598 .read = seq_read,
599 .llseek = seq_lseek,
600 .release = seq_release,
601};
602
603static const struct seq_operations pagetypeinfo_op = {
592 .start = frag_start, 604 .start = frag_start,
593 .next = frag_next, 605 .next = frag_next,
594 .stop = frag_stop, 606 .stop = frag_stop,
595 .show = pagetypeinfo_show, 607 .show = pagetypeinfo_show,
596}; 608};
597 609
610static int pagetypeinfo_open(struct inode *inode, struct file *file)
611{
612 return seq_open(file, &pagetypeinfo_op);
613}
614
615static const struct file_operations pagetypeinfo_file_ops = {
616 .open = pagetypeinfo_open,
617 .read = seq_read,
618 .llseek = seq_lseek,
619 .release = seq_release,
620};
621
598#ifdef CONFIG_ZONE_DMA 622#ifdef CONFIG_ZONE_DMA
599#define TEXT_FOR_DMA(xx) xx "_dma", 623#define TEXT_FOR_DMA(xx) xx "_dma",
600#else 624#else
@@ -771,7 +795,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
771 return 0; 795 return 0;
772} 796}
773 797
774const struct seq_operations zoneinfo_op = { 798static const struct seq_operations zoneinfo_op = {
775 .start = frag_start, /* iterate over all zones. The same as in 799 .start = frag_start, /* iterate over all zones. The same as in
776 * fragmentation. */ 800 * fragmentation. */
777 .next = frag_next, 801 .next = frag_next,
@@ -779,6 +803,18 @@ const struct seq_operations zoneinfo_op = {
779 .show = zoneinfo_show, 803 .show = zoneinfo_show,
780}; 804};
781 805
806static int zoneinfo_open(struct inode *inode, struct file *file)
807{
808 return seq_open(file, &zoneinfo_op);
809}
810
811static const struct file_operations proc_zoneinfo_file_operations = {
812 .open = zoneinfo_open,
813 .read = seq_read,
814 .llseek = seq_lseek,
815 .release = seq_release,
816};
817
782static void *vmstat_start(struct seq_file *m, loff_t *pos) 818static void *vmstat_start(struct seq_file *m, loff_t *pos)
783{ 819{
784 unsigned long *v; 820 unsigned long *v;
@@ -834,13 +870,24 @@ static void vmstat_stop(struct seq_file *m, void *arg)
834 m->private = NULL; 870 m->private = NULL;
835} 871}
836 872
837const struct seq_operations vmstat_op = { 873static const struct seq_operations vmstat_op = {
838 .start = vmstat_start, 874 .start = vmstat_start,
839 .next = vmstat_next, 875 .next = vmstat_next,
840 .stop = vmstat_stop, 876 .stop = vmstat_stop,
841 .show = vmstat_show, 877 .show = vmstat_show,
842}; 878};
843 879
880static int vmstat_open(struct inode *inode, struct file *file)
881{
882 return seq_open(file, &vmstat_op);
883}
884
885static const struct file_operations proc_vmstat_file_operations = {
886 .open = vmstat_open,
887 .read = seq_read,
888 .llseek = seq_lseek,
889 .release = seq_release,
890};
844#endif /* CONFIG_PROC_FS */ 891#endif /* CONFIG_PROC_FS */
845 892
846#ifdef CONFIG_SMP 893#ifdef CONFIG_SMP
@@ -898,9 +945,11 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
898 945
899static struct notifier_block __cpuinitdata vmstat_notifier = 946static struct notifier_block __cpuinitdata vmstat_notifier =
900 { &vmstat_cpuup_callback, NULL, 0 }; 947 { &vmstat_cpuup_callback, NULL, 0 };
948#endif
901 949
902static int __init setup_vmstat(void) 950static int __init setup_vmstat(void)
903{ 951{
952#ifdef CONFIG_SMP
904 int cpu; 953 int cpu;
905 954
906 refresh_zone_stat_thresholds(); 955 refresh_zone_stat_thresholds();
@@ -908,7 +957,13 @@ static int __init setup_vmstat(void)
908 957
909 for_each_online_cpu(cpu) 958 for_each_online_cpu(cpu)
910 start_cpu_timer(cpu); 959 start_cpu_timer(cpu);
960#endif
961#ifdef CONFIG_PROC_FS
962 proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
963 proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops);
964 proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
965 proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
966#endif
911 return 0; 967 return 0;
912} 968}
913module_init(setup_vmstat) 969module_init(setup_vmstat)
914#endif