diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 3 | ||||
-rw-r--r-- | mm/filemap.c | 242 | ||||
-rw-r--r-- | mm/hugetlb.c | 59 | ||||
-rw-r--r-- | mm/internal.h | 29 | ||||
-rw-r--r-- | mm/memcontrol.c | 1 | ||||
-rw-r--r-- | mm/memory.c | 70 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 12 | ||||
-rw-r--r-- | mm/mempolicy.c | 18 | ||||
-rw-r--r-- | mm/migrate.c | 66 | ||||
-rw-r--r-- | mm/mlock.c | 18 | ||||
-rw-r--r-- | mm/mmap.c | 5 | ||||
-rw-r--r-- | mm/nommu.c | 3 | ||||
-rw-r--r-- | mm/oom_kill.c | 3 | ||||
-rw-r--r-- | mm/page_alloc.c | 32 | ||||
-rw-r--r-- | mm/page_cgroup.c | 84 | ||||
-rw-r--r-- | mm/page_isolation.c | 5 | ||||
-rw-r--r-- | mm/shmem.c | 8 | ||||
-rw-r--r-- | mm/slab.c | 52 | ||||
-rw-r--r-- | mm/slob.c | 2 | ||||
-rw-r--r-- | mm/slub.c | 37 | ||||
-rw-r--r-- | mm/sparse-vmemmap.c | 2 | ||||
-rw-r--r-- | mm/sparse.c | 2 | ||||
-rw-r--r-- | mm/swap.c | 20 | ||||
-rw-r--r-- | mm/swapfile.c | 9 | ||||
-rw-r--r-- | mm/vmalloc.c | 88 | ||||
-rw-r--r-- | mm/vmscan.c | 46 | ||||
-rw-r--r-- | mm/vmstat.c | 69 |
27 files changed, 545 insertions, 440 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index f2e574dbc300..801c08b046e6 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -176,6 +176,9 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, | |||
176 | int ret = 0; | 176 | int ret = 0; |
177 | struct device *dev; | 177 | struct device *dev; |
178 | 178 | ||
179 | if (bdi->dev) /* The driver needs to use separate queues per device */ | ||
180 | goto exit; | ||
181 | |||
179 | va_start(args, fmt); | 182 | va_start(args, fmt); |
180 | dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args); | 183 | dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args); |
181 | va_end(args); | 184 | va_end(args); |
diff --git a/mm/filemap.c b/mm/filemap.c index ab8553658af3..f3e5f8944d17 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -2029,48 +2029,8 @@ int pagecache_write_begin(struct file *file, struct address_space *mapping, | |||
2029 | { | 2029 | { |
2030 | const struct address_space_operations *aops = mapping->a_ops; | 2030 | const struct address_space_operations *aops = mapping->a_ops; |
2031 | 2031 | ||
2032 | if (aops->write_begin) { | 2032 | return aops->write_begin(file, mapping, pos, len, flags, |
2033 | return aops->write_begin(file, mapping, pos, len, flags, | ||
2034 | pagep, fsdata); | 2033 | pagep, fsdata); |
2035 | } else { | ||
2036 | int ret; | ||
2037 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | ||
2038 | unsigned offset = pos & (PAGE_CACHE_SIZE - 1); | ||
2039 | struct inode *inode = mapping->host; | ||
2040 | struct page *page; | ||
2041 | again: | ||
2042 | page = __grab_cache_page(mapping, index); | ||
2043 | *pagep = page; | ||
2044 | if (!page) | ||
2045 | return -ENOMEM; | ||
2046 | |||
2047 | if (flags & AOP_FLAG_UNINTERRUPTIBLE && !PageUptodate(page)) { | ||
2048 | /* | ||
2049 | * There is no way to resolve a short write situation | ||
2050 | * for a !Uptodate page (except by double copying in | ||
2051 | * the caller done by generic_perform_write_2copy). | ||
2052 | * | ||
2053 | * Instead, we have to bring it uptodate here. | ||
2054 | */ | ||
2055 | ret = aops->readpage(file, page); | ||
2056 | page_cache_release(page); | ||
2057 | if (ret) { | ||
2058 | if (ret == AOP_TRUNCATED_PAGE) | ||
2059 | goto again; | ||
2060 | return ret; | ||
2061 | } | ||
2062 | goto again; | ||
2063 | } | ||
2064 | |||
2065 | ret = aops->prepare_write(file, page, offset, offset+len); | ||
2066 | if (ret) { | ||
2067 | unlock_page(page); | ||
2068 | page_cache_release(page); | ||
2069 | if (pos + len > inode->i_size) | ||
2070 | vmtruncate(inode, inode->i_size); | ||
2071 | } | ||
2072 | return ret; | ||
2073 | } | ||
2074 | } | 2034 | } |
2075 | EXPORT_SYMBOL(pagecache_write_begin); | 2035 | EXPORT_SYMBOL(pagecache_write_begin); |
2076 | 2036 | ||
@@ -2079,32 +2039,9 @@ int pagecache_write_end(struct file *file, struct address_space *mapping, | |||
2079 | struct page *page, void *fsdata) | 2039 | struct page *page, void *fsdata) |
2080 | { | 2040 | { |
2081 | const struct address_space_operations *aops = mapping->a_ops; | 2041 | const struct address_space_operations *aops = mapping->a_ops; |
2082 | int ret; | ||
2083 | |||
2084 | if (aops->write_end) { | ||
2085 | mark_page_accessed(page); | ||
2086 | ret = aops->write_end(file, mapping, pos, len, copied, | ||
2087 | page, fsdata); | ||
2088 | } else { | ||
2089 | unsigned offset = pos & (PAGE_CACHE_SIZE - 1); | ||
2090 | struct inode *inode = mapping->host; | ||
2091 | |||
2092 | flush_dcache_page(page); | ||
2093 | ret = aops->commit_write(file, page, offset, offset+len); | ||
2094 | unlock_page(page); | ||
2095 | mark_page_accessed(page); | ||
2096 | page_cache_release(page); | ||
2097 | |||
2098 | if (ret < 0) { | ||
2099 | if (pos + len > inode->i_size) | ||
2100 | vmtruncate(inode, inode->i_size); | ||
2101 | } else if (ret > 0) | ||
2102 | ret = min_t(size_t, copied, ret); | ||
2103 | else | ||
2104 | ret = copied; | ||
2105 | } | ||
2106 | 2042 | ||
2107 | return ret; | 2043 | mark_page_accessed(page); |
2044 | return aops->write_end(file, mapping, pos, len, copied, page, fsdata); | ||
2108 | } | 2045 | } |
2109 | EXPORT_SYMBOL(pagecache_write_end); | 2046 | EXPORT_SYMBOL(pagecache_write_end); |
2110 | 2047 | ||
@@ -2226,174 +2163,6 @@ repeat: | |||
2226 | } | 2163 | } |
2227 | EXPORT_SYMBOL(__grab_cache_page); | 2164 | EXPORT_SYMBOL(__grab_cache_page); |
2228 | 2165 | ||
2229 | static ssize_t generic_perform_write_2copy(struct file *file, | ||
2230 | struct iov_iter *i, loff_t pos) | ||
2231 | { | ||
2232 | struct address_space *mapping = file->f_mapping; | ||
2233 | const struct address_space_operations *a_ops = mapping->a_ops; | ||
2234 | struct inode *inode = mapping->host; | ||
2235 | long status = 0; | ||
2236 | ssize_t written = 0; | ||
2237 | |||
2238 | do { | ||
2239 | struct page *src_page; | ||
2240 | struct page *page; | ||
2241 | pgoff_t index; /* Pagecache index for current page */ | ||
2242 | unsigned long offset; /* Offset into pagecache page */ | ||
2243 | unsigned long bytes; /* Bytes to write to page */ | ||
2244 | size_t copied; /* Bytes copied from user */ | ||
2245 | |||
2246 | offset = (pos & (PAGE_CACHE_SIZE - 1)); | ||
2247 | index = pos >> PAGE_CACHE_SHIFT; | ||
2248 | bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, | ||
2249 | iov_iter_count(i)); | ||
2250 | |||
2251 | /* | ||
2252 | * a non-NULL src_page indicates that we're doing the | ||
2253 | * copy via get_user_pages and kmap. | ||
2254 | */ | ||
2255 | src_page = NULL; | ||
2256 | |||
2257 | /* | ||
2258 | * Bring in the user page that we will copy from _first_. | ||
2259 | * Otherwise there's a nasty deadlock on copying from the | ||
2260 | * same page as we're writing to, without it being marked | ||
2261 | * up-to-date. | ||
2262 | * | ||
2263 | * Not only is this an optimisation, but it is also required | ||
2264 | * to check that the address is actually valid, when atomic | ||
2265 | * usercopies are used, below. | ||
2266 | */ | ||
2267 | if (unlikely(iov_iter_fault_in_readable(i, bytes))) { | ||
2268 | status = -EFAULT; | ||
2269 | break; | ||
2270 | } | ||
2271 | |||
2272 | page = __grab_cache_page(mapping, index); | ||
2273 | if (!page) { | ||
2274 | status = -ENOMEM; | ||
2275 | break; | ||
2276 | } | ||
2277 | |||
2278 | /* | ||
2279 | * non-uptodate pages cannot cope with short copies, and we | ||
2280 | * cannot take a pagefault with the destination page locked. | ||
2281 | * So pin the source page to copy it. | ||
2282 | */ | ||
2283 | if (!PageUptodate(page) && !segment_eq(get_fs(), KERNEL_DS)) { | ||
2284 | unlock_page(page); | ||
2285 | |||
2286 | src_page = alloc_page(GFP_KERNEL); | ||
2287 | if (!src_page) { | ||
2288 | page_cache_release(page); | ||
2289 | status = -ENOMEM; | ||
2290 | break; | ||
2291 | } | ||
2292 | |||
2293 | /* | ||
2294 | * Cannot get_user_pages with a page locked for the | ||
2295 | * same reason as we can't take a page fault with a | ||
2296 | * page locked (as explained below). | ||
2297 | */ | ||
2298 | copied = iov_iter_copy_from_user(src_page, i, | ||
2299 | offset, bytes); | ||
2300 | if (unlikely(copied == 0)) { | ||
2301 | status = -EFAULT; | ||
2302 | page_cache_release(page); | ||
2303 | page_cache_release(src_page); | ||
2304 | break; | ||
2305 | } | ||
2306 | bytes = copied; | ||
2307 | |||
2308 | lock_page(page); | ||
2309 | /* | ||
2310 | * Can't handle the page going uptodate here, because | ||
2311 | * that means we would use non-atomic usercopies, which | ||
2312 | * zero out the tail of the page, which can cause | ||
2313 | * zeroes to become transiently visible. We could just | ||
2314 | * use a non-zeroing copy, but the APIs aren't too | ||
2315 | * consistent. | ||
2316 | */ | ||
2317 | if (unlikely(!page->mapping || PageUptodate(page))) { | ||
2318 | unlock_page(page); | ||
2319 | page_cache_release(page); | ||
2320 | page_cache_release(src_page); | ||
2321 | continue; | ||
2322 | } | ||
2323 | } | ||
2324 | |||
2325 | status = a_ops->prepare_write(file, page, offset, offset+bytes); | ||
2326 | if (unlikely(status)) | ||
2327 | goto fs_write_aop_error; | ||
2328 | |||
2329 | if (!src_page) { | ||
2330 | /* | ||
2331 | * Must not enter the pagefault handler here, because | ||
2332 | * we hold the page lock, so we might recursively | ||
2333 | * deadlock on the same lock, or get an ABBA deadlock | ||
2334 | * against a different lock, or against the mmap_sem | ||
2335 | * (which nests outside the page lock). So increment | ||
2336 | * preempt count, and use _atomic usercopies. | ||
2337 | * | ||
2338 | * The page is uptodate so we are OK to encounter a | ||
2339 | * short copy: if unmodified parts of the page are | ||
2340 | * marked dirty and written out to disk, it doesn't | ||
2341 | * really matter. | ||
2342 | */ | ||
2343 | pagefault_disable(); | ||
2344 | copied = iov_iter_copy_from_user_atomic(page, i, | ||
2345 | offset, bytes); | ||
2346 | pagefault_enable(); | ||
2347 | } else { | ||
2348 | void *src, *dst; | ||
2349 | src = kmap_atomic(src_page, KM_USER0); | ||
2350 | dst = kmap_atomic(page, KM_USER1); | ||
2351 | memcpy(dst + offset, src + offset, bytes); | ||
2352 | kunmap_atomic(dst, KM_USER1); | ||
2353 | kunmap_atomic(src, KM_USER0); | ||
2354 | copied = bytes; | ||
2355 | } | ||
2356 | flush_dcache_page(page); | ||
2357 | |||
2358 | status = a_ops->commit_write(file, page, offset, offset+bytes); | ||
2359 | if (unlikely(status < 0)) | ||
2360 | goto fs_write_aop_error; | ||
2361 | if (unlikely(status > 0)) /* filesystem did partial write */ | ||
2362 | copied = min_t(size_t, copied, status); | ||
2363 | |||
2364 | unlock_page(page); | ||
2365 | mark_page_accessed(page); | ||
2366 | page_cache_release(page); | ||
2367 | if (src_page) | ||
2368 | page_cache_release(src_page); | ||
2369 | |||
2370 | iov_iter_advance(i, copied); | ||
2371 | pos += copied; | ||
2372 | written += copied; | ||
2373 | |||
2374 | balance_dirty_pages_ratelimited(mapping); | ||
2375 | cond_resched(); | ||
2376 | continue; | ||
2377 | |||
2378 | fs_write_aop_error: | ||
2379 | unlock_page(page); | ||
2380 | page_cache_release(page); | ||
2381 | if (src_page) | ||
2382 | page_cache_release(src_page); | ||
2383 | |||
2384 | /* | ||
2385 | * prepare_write() may have instantiated a few blocks | ||
2386 | * outside i_size. Trim these off again. Don't need | ||
2387 | * i_size_read because we hold i_mutex. | ||
2388 | */ | ||
2389 | if (pos + bytes > inode->i_size) | ||
2390 | vmtruncate(inode, inode->i_size); | ||
2391 | break; | ||
2392 | } while (iov_iter_count(i)); | ||
2393 | |||
2394 | return written ? written : status; | ||
2395 | } | ||
2396 | |||
2397 | static ssize_t generic_perform_write(struct file *file, | 2166 | static ssize_t generic_perform_write(struct file *file, |
2398 | struct iov_iter *i, loff_t pos) | 2167 | struct iov_iter *i, loff_t pos) |
2399 | { | 2168 | { |
@@ -2494,10 +2263,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2494 | struct iov_iter i; | 2263 | struct iov_iter i; |
2495 | 2264 | ||
2496 | iov_iter_init(&i, iov, nr_segs, count, written); | 2265 | iov_iter_init(&i, iov, nr_segs, count, written); |
2497 | if (a_ops->write_begin) | 2266 | status = generic_perform_write(file, &i, pos); |
2498 | status = generic_perform_write(file, &i, pos); | ||
2499 | else | ||
2500 | status = generic_perform_write_2copy(file, &i, pos); | ||
2501 | 2267 | ||
2502 | if (likely(status >= 0)) { | 2268 | if (likely(status >= 0)) { |
2503 | written += status; | 2269 | written += status; |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ce8cbb29860b..6058b53dcb89 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/init.h> | 7 | #include <linux/init.h> |
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/seq_file.h> | ||
10 | #include <linux/sysctl.h> | 11 | #include <linux/sysctl.h> |
11 | #include <linux/highmem.h> | 12 | #include <linux/highmem.h> |
12 | #include <linux/mmu_notifier.h> | 13 | #include <linux/mmu_notifier.h> |
@@ -353,11 +354,26 @@ static int vma_has_reserves(struct vm_area_struct *vma) | |||
353 | return 0; | 354 | return 0; |
354 | } | 355 | } |
355 | 356 | ||
357 | static void clear_gigantic_page(struct page *page, | ||
358 | unsigned long addr, unsigned long sz) | ||
359 | { | ||
360 | int i; | ||
361 | struct page *p = page; | ||
362 | |||
363 | might_sleep(); | ||
364 | for (i = 0; i < sz/PAGE_SIZE; i++, p = mem_map_next(p, page, i)) { | ||
365 | cond_resched(); | ||
366 | clear_user_highpage(p, addr + i * PAGE_SIZE); | ||
367 | } | ||
368 | } | ||
356 | static void clear_huge_page(struct page *page, | 369 | static void clear_huge_page(struct page *page, |
357 | unsigned long addr, unsigned long sz) | 370 | unsigned long addr, unsigned long sz) |
358 | { | 371 | { |
359 | int i; | 372 | int i; |
360 | 373 | ||
374 | if (unlikely(sz > MAX_ORDER_NR_PAGES)) | ||
375 | return clear_gigantic_page(page, addr, sz); | ||
376 | |||
361 | might_sleep(); | 377 | might_sleep(); |
362 | for (i = 0; i < sz/PAGE_SIZE; i++) { | 378 | for (i = 0; i < sz/PAGE_SIZE; i++) { |
363 | cond_resched(); | 379 | cond_resched(); |
@@ -365,12 +381,32 @@ static void clear_huge_page(struct page *page, | |||
365 | } | 381 | } |
366 | } | 382 | } |
367 | 383 | ||
384 | static void copy_gigantic_page(struct page *dst, struct page *src, | ||
385 | unsigned long addr, struct vm_area_struct *vma) | ||
386 | { | ||
387 | int i; | ||
388 | struct hstate *h = hstate_vma(vma); | ||
389 | struct page *dst_base = dst; | ||
390 | struct page *src_base = src; | ||
391 | might_sleep(); | ||
392 | for (i = 0; i < pages_per_huge_page(h); ) { | ||
393 | cond_resched(); | ||
394 | copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma); | ||
395 | |||
396 | i++; | ||
397 | dst = mem_map_next(dst, dst_base, i); | ||
398 | src = mem_map_next(src, src_base, i); | ||
399 | } | ||
400 | } | ||
368 | static void copy_huge_page(struct page *dst, struct page *src, | 401 | static void copy_huge_page(struct page *dst, struct page *src, |
369 | unsigned long addr, struct vm_area_struct *vma) | 402 | unsigned long addr, struct vm_area_struct *vma) |
370 | { | 403 | { |
371 | int i; | 404 | int i; |
372 | struct hstate *h = hstate_vma(vma); | 405 | struct hstate *h = hstate_vma(vma); |
373 | 406 | ||
407 | if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) | ||
408 | return copy_gigantic_page(dst, src, addr, vma); | ||
409 | |||
374 | might_sleep(); | 410 | might_sleep(); |
375 | for (i = 0; i < pages_per_huge_page(h); i++) { | 411 | for (i = 0; i < pages_per_huge_page(h); i++) { |
376 | cond_resched(); | 412 | cond_resched(); |
@@ -455,6 +491,8 @@ static void update_and_free_page(struct hstate *h, struct page *page) | |||
455 | { | 491 | { |
456 | int i; | 492 | int i; |
457 | 493 | ||
494 | VM_BUG_ON(h->order >= MAX_ORDER); | ||
495 | |||
458 | h->nr_huge_pages--; | 496 | h->nr_huge_pages--; |
459 | h->nr_huge_pages_node[page_to_nid(page)]--; | 497 | h->nr_huge_pages_node[page_to_nid(page)]--; |
460 | for (i = 0; i < pages_per_huge_page(h); i++) { | 498 | for (i = 0; i < pages_per_huge_page(h); i++) { |
@@ -969,6 +1007,14 @@ found: | |||
969 | return 1; | 1007 | return 1; |
970 | } | 1008 | } |
971 | 1009 | ||
1010 | static void prep_compound_huge_page(struct page *page, int order) | ||
1011 | { | ||
1012 | if (unlikely(order > (MAX_ORDER - 1))) | ||
1013 | prep_compound_gigantic_page(page, order); | ||
1014 | else | ||
1015 | prep_compound_page(page, order); | ||
1016 | } | ||
1017 | |||
972 | /* Put bootmem huge pages into the standard lists after mem_map is up */ | 1018 | /* Put bootmem huge pages into the standard lists after mem_map is up */ |
973 | static void __init gather_bootmem_prealloc(void) | 1019 | static void __init gather_bootmem_prealloc(void) |
974 | { | 1020 | { |
@@ -979,7 +1025,7 @@ static void __init gather_bootmem_prealloc(void) | |||
979 | struct hstate *h = m->hstate; | 1025 | struct hstate *h = m->hstate; |
980 | __ClearPageReserved(page); | 1026 | __ClearPageReserved(page); |
981 | WARN_ON(page_count(page) != 1); | 1027 | WARN_ON(page_count(page) != 1); |
982 | prep_compound_page(page, h->order); | 1028 | prep_compound_huge_page(page, h->order); |
983 | prep_new_huge_page(h, page, page_to_nid(page)); | 1029 | prep_new_huge_page(h, page, page_to_nid(page)); |
984 | } | 1030 | } |
985 | } | 1031 | } |
@@ -1455,10 +1501,10 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, | |||
1455 | 1501 | ||
1456 | #endif /* CONFIG_SYSCTL */ | 1502 | #endif /* CONFIG_SYSCTL */ |
1457 | 1503 | ||
1458 | int hugetlb_report_meminfo(char *buf) | 1504 | void hugetlb_report_meminfo(struct seq_file *m) |
1459 | { | 1505 | { |
1460 | struct hstate *h = &default_hstate; | 1506 | struct hstate *h = &default_hstate; |
1461 | return sprintf(buf, | 1507 | seq_printf(m, |
1462 | "HugePages_Total: %5lu\n" | 1508 | "HugePages_Total: %5lu\n" |
1463 | "HugePages_Free: %5lu\n" | 1509 | "HugePages_Free: %5lu\n" |
1464 | "HugePages_Rsvd: %5lu\n" | 1510 | "HugePages_Rsvd: %5lu\n" |
@@ -1750,6 +1796,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
1750 | static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, | 1796 | static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, |
1751 | struct page *page, unsigned long address) | 1797 | struct page *page, unsigned long address) |
1752 | { | 1798 | { |
1799 | struct hstate *h = hstate_vma(vma); | ||
1753 | struct vm_area_struct *iter_vma; | 1800 | struct vm_area_struct *iter_vma; |
1754 | struct address_space *mapping; | 1801 | struct address_space *mapping; |
1755 | struct prio_tree_iter iter; | 1802 | struct prio_tree_iter iter; |
@@ -1759,7 +1806,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1759 | * vm_pgoff is in PAGE_SIZE units, hence the different calculation | 1806 | * vm_pgoff is in PAGE_SIZE units, hence the different calculation |
1760 | * from page cache lookup which is in HPAGE_SIZE units. | 1807 | * from page cache lookup which is in HPAGE_SIZE units. |
1761 | */ | 1808 | */ |
1762 | address = address & huge_page_mask(hstate_vma(vma)); | 1809 | address = address & huge_page_mask(h); |
1763 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) | 1810 | pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) |
1764 | + (vma->vm_pgoff >> PAGE_SHIFT); | 1811 | + (vma->vm_pgoff >> PAGE_SHIFT); |
1765 | mapping = (struct address_space *)page_private(page); | 1812 | mapping = (struct address_space *)page_private(page); |
@@ -1778,7 +1825,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1778 | */ | 1825 | */ |
1779 | if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER)) | 1826 | if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER)) |
1780 | unmap_hugepage_range(iter_vma, | 1827 | unmap_hugepage_range(iter_vma, |
1781 | address, address + HPAGE_SIZE, | 1828 | address, address + huge_page_size(h), |
1782 | page); | 1829 | page); |
1783 | } | 1830 | } |
1784 | 1831 | ||
@@ -2129,7 +2176,7 @@ same_page: | |||
2129 | if (zeropage_ok) | 2176 | if (zeropage_ok) |
2130 | pages[i] = ZERO_PAGE(0); | 2177 | pages[i] = ZERO_PAGE(0); |
2131 | else | 2178 | else |
2132 | pages[i] = page + pfn_offset; | 2179 | pages[i] = mem_map_offset(page, pfn_offset); |
2133 | get_page(pages[i]); | 2180 | get_page(pages[i]); |
2134 | } | 2181 | } |
2135 | 2182 | ||
diff --git a/mm/internal.h b/mm/internal.h index e4e728bdf324..13333bc2eb68 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -17,6 +17,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, | |||
17 | unsigned long floor, unsigned long ceiling); | 17 | unsigned long floor, unsigned long ceiling); |
18 | 18 | ||
19 | extern void prep_compound_page(struct page *page, unsigned long order); | 19 | extern void prep_compound_page(struct page *page, unsigned long order); |
20 | extern void prep_compound_gigantic_page(struct page *page, unsigned long order); | ||
20 | 21 | ||
21 | static inline void set_page_count(struct page *page, int v) | 22 | static inline void set_page_count(struct page *page, int v) |
22 | { | 23 | { |
@@ -176,6 +177,34 @@ static inline void free_page_mlock(struct page *page) { } | |||
176 | #endif /* CONFIG_UNEVICTABLE_LRU */ | 177 | #endif /* CONFIG_UNEVICTABLE_LRU */ |
177 | 178 | ||
178 | /* | 179 | /* |
180 | * Return the mem_map entry representing the 'offset' subpage within | ||
181 | * the maximally aligned gigantic page 'base'. Handle any discontiguity | ||
182 | * in the mem_map at MAX_ORDER_NR_PAGES boundaries. | ||
183 | */ | ||
184 | static inline struct page *mem_map_offset(struct page *base, int offset) | ||
185 | { | ||
186 | if (unlikely(offset >= MAX_ORDER_NR_PAGES)) | ||
187 | return pfn_to_page(page_to_pfn(base) + offset); | ||
188 | return base + offset; | ||
189 | } | ||
190 | |||
191 | /* | ||
192 | * Iterator over all subpages withing the maximally aligned gigantic | ||
193 | * page 'base'. Handle any discontiguity in the mem_map. | ||
194 | */ | ||
195 | static inline struct page *mem_map_next(struct page *iter, | ||
196 | struct page *base, int offset) | ||
197 | { | ||
198 | if (unlikely((offset & (MAX_ORDER_NR_PAGES - 1)) == 0)) { | ||
199 | unsigned long pfn = page_to_pfn(base) + offset; | ||
200 | if (!pfn_valid(pfn)) | ||
201 | return NULL; | ||
202 | return pfn_to_page(pfn); | ||
203 | } | ||
204 | return iter + 1; | ||
205 | } | ||
206 | |||
207 | /* | ||
179 | * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, | 208 | * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, |
180 | * so all functions starting at paging_init should be marked __init | 209 | * so all functions starting at paging_init should be marked __init |
181 | * in those cases. SPARSEMEM, however, allows for memory hotplug, | 210 | * in those cases. SPARSEMEM, however, allows for memory hotplug, |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d4a92b63e98e..866dcc7eeb0c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1088,7 +1088,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
1088 | int node; | 1088 | int node; |
1089 | 1089 | ||
1090 | if (unlikely((cont->parent) == NULL)) { | 1090 | if (unlikely((cont->parent) == NULL)) { |
1091 | page_cgroup_init(); | ||
1092 | mem = &init_mem_cgroup; | 1091 | mem = &init_mem_cgroup; |
1093 | } else { | 1092 | } else { |
1094 | mem = mem_cgroup_alloc(); | 1093 | mem = mem_cgroup_alloc(); |
diff --git a/mm/memory.c b/mm/memory.c index 164951c47305..f01b7eed6e16 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -669,6 +669,16 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
669 | if (is_vm_hugetlb_page(vma)) | 669 | if (is_vm_hugetlb_page(vma)) |
670 | return copy_hugetlb_page_range(dst_mm, src_mm, vma); | 670 | return copy_hugetlb_page_range(dst_mm, src_mm, vma); |
671 | 671 | ||
672 | if (unlikely(is_pfn_mapping(vma))) { | ||
673 | /* | ||
674 | * We do not free on error cases below as remove_vma | ||
675 | * gets called on error from higher level routine | ||
676 | */ | ||
677 | ret = track_pfn_vma_copy(vma); | ||
678 | if (ret) | ||
679 | return ret; | ||
680 | } | ||
681 | |||
672 | /* | 682 | /* |
673 | * We need to invalidate the secondary MMU mappings only when | 683 | * We need to invalidate the secondary MMU mappings only when |
674 | * there could be a permission downgrade on the ptes of the | 684 | * there could be a permission downgrade on the ptes of the |
@@ -915,6 +925,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, | |||
915 | if (vma->vm_flags & VM_ACCOUNT) | 925 | if (vma->vm_flags & VM_ACCOUNT) |
916 | *nr_accounted += (end - start) >> PAGE_SHIFT; | 926 | *nr_accounted += (end - start) >> PAGE_SHIFT; |
917 | 927 | ||
928 | if (unlikely(is_pfn_mapping(vma))) | ||
929 | untrack_pfn_vma(vma, 0, 0); | ||
930 | |||
918 | while (start != end) { | 931 | while (start != end) { |
919 | if (!tlb_start_valid) { | 932 | if (!tlb_start_valid) { |
920 | tlb_start = start; | 933 | tlb_start = start; |
@@ -1430,6 +1443,7 @@ out: | |||
1430 | int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, | 1443 | int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, |
1431 | unsigned long pfn) | 1444 | unsigned long pfn) |
1432 | { | 1445 | { |
1446 | int ret; | ||
1433 | /* | 1447 | /* |
1434 | * Technically, architectures with pte_special can avoid all these | 1448 | * Technically, architectures with pte_special can avoid all these |
1435 | * restrictions (same for remap_pfn_range). However we would like | 1449 | * restrictions (same for remap_pfn_range). However we would like |
@@ -1444,7 +1458,15 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, | |||
1444 | 1458 | ||
1445 | if (addr < vma->vm_start || addr >= vma->vm_end) | 1459 | if (addr < vma->vm_start || addr >= vma->vm_end) |
1446 | return -EFAULT; | 1460 | return -EFAULT; |
1447 | return insert_pfn(vma, addr, pfn, vma->vm_page_prot); | 1461 | if (track_pfn_vma_new(vma, vma->vm_page_prot, pfn, PAGE_SIZE)) |
1462 | return -EINVAL; | ||
1463 | |||
1464 | ret = insert_pfn(vma, addr, pfn, vma->vm_page_prot); | ||
1465 | |||
1466 | if (ret) | ||
1467 | untrack_pfn_vma(vma, pfn, PAGE_SIZE); | ||
1468 | |||
1469 | return ret; | ||
1448 | } | 1470 | } |
1449 | EXPORT_SYMBOL(vm_insert_pfn); | 1471 | EXPORT_SYMBOL(vm_insert_pfn); |
1450 | 1472 | ||
@@ -1575,14 +1597,17 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, | |||
1575 | * behaviour that some programs depend on. We mark the "original" | 1597 | * behaviour that some programs depend on. We mark the "original" |
1576 | * un-COW'ed pages by matching them up with "vma->vm_pgoff". | 1598 | * un-COW'ed pages by matching them up with "vma->vm_pgoff". |
1577 | */ | 1599 | */ |
1578 | if (is_cow_mapping(vma->vm_flags)) { | 1600 | if (addr == vma->vm_start && end == vma->vm_end) |
1579 | if (addr != vma->vm_start || end != vma->vm_end) | ||
1580 | return -EINVAL; | ||
1581 | vma->vm_pgoff = pfn; | 1601 | vma->vm_pgoff = pfn; |
1582 | } | 1602 | else if (is_cow_mapping(vma->vm_flags)) |
1603 | return -EINVAL; | ||
1583 | 1604 | ||
1584 | vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; | 1605 | vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; |
1585 | 1606 | ||
1607 | err = track_pfn_vma_new(vma, prot, pfn, PAGE_ALIGN(size)); | ||
1608 | if (err) | ||
1609 | return -EINVAL; | ||
1610 | |||
1586 | BUG_ON(addr >= end); | 1611 | BUG_ON(addr >= end); |
1587 | pfn -= addr >> PAGE_SHIFT; | 1612 | pfn -= addr >> PAGE_SHIFT; |
1588 | pgd = pgd_offset(mm, addr); | 1613 | pgd = pgd_offset(mm, addr); |
@@ -1594,6 +1619,10 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, | |||
1594 | if (err) | 1619 | if (err) |
1595 | break; | 1620 | break; |
1596 | } while (pgd++, addr = next, addr != end); | 1621 | } while (pgd++, addr = next, addr != end); |
1622 | |||
1623 | if (err) | ||
1624 | untrack_pfn_vma(vma, pfn, PAGE_ALIGN(size)); | ||
1625 | |||
1597 | return err; | 1626 | return err; |
1598 | } | 1627 | } |
1599 | EXPORT_SYMBOL(remap_pfn_range); | 1628 | EXPORT_SYMBOL(remap_pfn_range); |
@@ -2865,9 +2894,9 @@ int in_gate_area_no_task(unsigned long addr) | |||
2865 | #endif /* __HAVE_ARCH_GATE_AREA */ | 2894 | #endif /* __HAVE_ARCH_GATE_AREA */ |
2866 | 2895 | ||
2867 | #ifdef CONFIG_HAVE_IOREMAP_PROT | 2896 | #ifdef CONFIG_HAVE_IOREMAP_PROT |
2868 | static resource_size_t follow_phys(struct vm_area_struct *vma, | 2897 | int follow_phys(struct vm_area_struct *vma, |
2869 | unsigned long address, unsigned int flags, | 2898 | unsigned long address, unsigned int flags, |
2870 | unsigned long *prot) | 2899 | unsigned long *prot, resource_size_t *phys) |
2871 | { | 2900 | { |
2872 | pgd_t *pgd; | 2901 | pgd_t *pgd; |
2873 | pud_t *pud; | 2902 | pud_t *pud; |
@@ -2876,24 +2905,26 @@ static resource_size_t follow_phys(struct vm_area_struct *vma, | |||
2876 | spinlock_t *ptl; | 2905 | spinlock_t *ptl; |
2877 | resource_size_t phys_addr = 0; | 2906 | resource_size_t phys_addr = 0; |
2878 | struct mm_struct *mm = vma->vm_mm; | 2907 | struct mm_struct *mm = vma->vm_mm; |
2908 | int ret = -EINVAL; | ||
2879 | 2909 | ||
2880 | VM_BUG_ON(!(vma->vm_flags & (VM_IO | VM_PFNMAP))); | 2910 | if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) |
2911 | goto out; | ||
2881 | 2912 | ||
2882 | pgd = pgd_offset(mm, address); | 2913 | pgd = pgd_offset(mm, address); |
2883 | if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) | 2914 | if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) |
2884 | goto no_page_table; | 2915 | goto out; |
2885 | 2916 | ||
2886 | pud = pud_offset(pgd, address); | 2917 | pud = pud_offset(pgd, address); |
2887 | if (pud_none(*pud) || unlikely(pud_bad(*pud))) | 2918 | if (pud_none(*pud) || unlikely(pud_bad(*pud))) |
2888 | goto no_page_table; | 2919 | goto out; |
2889 | 2920 | ||
2890 | pmd = pmd_offset(pud, address); | 2921 | pmd = pmd_offset(pud, address); |
2891 | if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) | 2922 | if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) |
2892 | goto no_page_table; | 2923 | goto out; |
2893 | 2924 | ||
2894 | /* We cannot handle huge page PFN maps. Luckily they don't exist. */ | 2925 | /* We cannot handle huge page PFN maps. Luckily they don't exist. */ |
2895 | if (pmd_huge(*pmd)) | 2926 | if (pmd_huge(*pmd)) |
2896 | goto no_page_table; | 2927 | goto out; |
2897 | 2928 | ||
2898 | ptep = pte_offset_map_lock(mm, pmd, address, &ptl); | 2929 | ptep = pte_offset_map_lock(mm, pmd, address, &ptl); |
2899 | if (!ptep) | 2930 | if (!ptep) |
@@ -2908,13 +2939,13 @@ static resource_size_t follow_phys(struct vm_area_struct *vma, | |||
2908 | phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */ | 2939 | phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */ |
2909 | 2940 | ||
2910 | *prot = pgprot_val(pte_pgprot(pte)); | 2941 | *prot = pgprot_val(pte_pgprot(pte)); |
2942 | *phys = phys_addr; | ||
2943 | ret = 0; | ||
2911 | 2944 | ||
2912 | unlock: | 2945 | unlock: |
2913 | pte_unmap_unlock(ptep, ptl); | 2946 | pte_unmap_unlock(ptep, ptl); |
2914 | out: | 2947 | out: |
2915 | return phys_addr; | 2948 | return ret; |
2916 | no_page_table: | ||
2917 | return 0; | ||
2918 | } | 2949 | } |
2919 | 2950 | ||
2920 | int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, | 2951 | int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, |
@@ -2925,12 +2956,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, | |||
2925 | void *maddr; | 2956 | void *maddr; |
2926 | int offset = addr & (PAGE_SIZE-1); | 2957 | int offset = addr & (PAGE_SIZE-1); |
2927 | 2958 | ||
2928 | if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) | 2959 | if (follow_phys(vma, addr, write, &prot, &phys_addr)) |
2929 | return -EINVAL; | ||
2930 | |||
2931 | phys_addr = follow_phys(vma, addr, write, &prot); | ||
2932 | |||
2933 | if (!phys_addr) | ||
2934 | return -EINVAL; | 2960 | return -EINVAL; |
2935 | 2961 | ||
2936 | maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot); | 2962 | maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot); |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6837a1014372..b17371185468 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/highmem.h> | 22 | #include <linux/highmem.h> |
23 | #include <linux/vmalloc.h> | 23 | #include <linux/vmalloc.h> |
24 | #include <linux/ioport.h> | 24 | #include <linux/ioport.h> |
25 | #include <linux/cpuset.h> | ||
26 | #include <linux/delay.h> | 25 | #include <linux/delay.h> |
27 | #include <linux/migrate.h> | 26 | #include <linux/migrate.h> |
28 | #include <linux/page-isolation.h> | 27 | #include <linux/page-isolation.h> |
@@ -190,7 +189,7 @@ static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn, | |||
190 | pgdat->node_start_pfn; | 189 | pgdat->node_start_pfn; |
191 | } | 190 | } |
192 | 191 | ||
193 | static int __add_zone(struct zone *zone, unsigned long phys_start_pfn) | 192 | static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn) |
194 | { | 193 | { |
195 | struct pglist_data *pgdat = zone->zone_pgdat; | 194 | struct pglist_data *pgdat = zone->zone_pgdat; |
196 | int nr_pages = PAGES_PER_SECTION; | 195 | int nr_pages = PAGES_PER_SECTION; |
@@ -217,7 +216,7 @@ static int __add_zone(struct zone *zone, unsigned long phys_start_pfn) | |||
217 | return 0; | 216 | return 0; |
218 | } | 217 | } |
219 | 218 | ||
220 | static int __add_section(struct zone *zone, unsigned long phys_start_pfn) | 219 | static int __meminit __add_section(struct zone *zone, unsigned long phys_start_pfn) |
221 | { | 220 | { |
222 | int nr_pages = PAGES_PER_SECTION; | 221 | int nr_pages = PAGES_PER_SECTION; |
223 | int ret; | 222 | int ret; |
@@ -274,7 +273,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms) | |||
274 | * call this function after deciding the zone to which to | 273 | * call this function after deciding the zone to which to |
275 | * add the new pages. | 274 | * add the new pages. |
276 | */ | 275 | */ |
277 | int __add_pages(struct zone *zone, unsigned long phys_start_pfn, | 276 | int __ref __add_pages(struct zone *zone, unsigned long phys_start_pfn, |
278 | unsigned long nr_pages) | 277 | unsigned long nr_pages) |
279 | { | 278 | { |
280 | unsigned long i; | 279 | unsigned long i; |
@@ -471,7 +470,8 @@ static void rollback_node_hotadd(int nid, pg_data_t *pgdat) | |||
471 | } | 470 | } |
472 | 471 | ||
473 | 472 | ||
474 | int add_memory(int nid, u64 start, u64 size) | 473 | /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ |
474 | int __ref add_memory(int nid, u64 start, u64 size) | ||
475 | { | 475 | { |
476 | pg_data_t *pgdat = NULL; | 476 | pg_data_t *pgdat = NULL; |
477 | int new_pgdat = 0; | 477 | int new_pgdat = 0; |
@@ -498,8 +498,6 @@ int add_memory(int nid, u64 start, u64 size) | |||
498 | /* we online node here. we can't roll back from here. */ | 498 | /* we online node here. we can't roll back from here. */ |
499 | node_set_online(nid); | 499 | node_set_online(nid); |
500 | 500 | ||
501 | cpuset_track_online_nodes(); | ||
502 | |||
503 | if (new_pgdat) { | 501 | if (new_pgdat) { |
504 | ret = register_one_node(nid); | 502 | ret = register_one_node(nid); |
505 | /* | 503 | /* |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 36f42573a335..e9493b1c1117 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -489,12 +489,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
489 | int err; | 489 | int err; |
490 | struct vm_area_struct *first, *vma, *prev; | 490 | struct vm_area_struct *first, *vma, *prev; |
491 | 491 | ||
492 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { | ||
493 | |||
494 | err = migrate_prep(); | ||
495 | if (err) | ||
496 | return ERR_PTR(err); | ||
497 | } | ||
498 | 492 | ||
499 | first = find_vma(mm, start); | 493 | first = find_vma(mm, start); |
500 | if (!first) | 494 | if (!first) |
@@ -809,9 +803,13 @@ int do_migrate_pages(struct mm_struct *mm, | |||
809 | const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) | 803 | const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) |
810 | { | 804 | { |
811 | int busy = 0; | 805 | int busy = 0; |
812 | int err = 0; | 806 | int err; |
813 | nodemask_t tmp; | 807 | nodemask_t tmp; |
814 | 808 | ||
809 | err = migrate_prep(); | ||
810 | if (err) | ||
811 | return err; | ||
812 | |||
815 | down_read(&mm->mmap_sem); | 813 | down_read(&mm->mmap_sem); |
816 | 814 | ||
817 | err = migrate_vmas(mm, from_nodes, to_nodes, flags); | 815 | err = migrate_vmas(mm, from_nodes, to_nodes, flags); |
@@ -974,6 +972,12 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
974 | start, start + len, mode, mode_flags, | 972 | start, start + len, mode, mode_flags, |
975 | nmask ? nodes_addr(*nmask)[0] : -1); | 973 | nmask ? nodes_addr(*nmask)[0] : -1); |
976 | 974 | ||
975 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { | ||
976 | |||
977 | err = migrate_prep(); | ||
978 | if (err) | ||
979 | return err; | ||
980 | } | ||
977 | down_write(&mm->mmap_sem); | 981 | down_write(&mm->mmap_sem); |
978 | vma = check_range(mm, start, end, nmask, | 982 | vma = check_range(mm, start, end, nmask, |
979 | flags | MPOL_MF_INVERT, &pagelist); | 983 | flags | MPOL_MF_INVERT, &pagelist); |
diff --git a/mm/migrate.c b/mm/migrate.c index 6602941bfab0..037b0967c1e3 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -522,15 +522,12 @@ static int writeout(struct address_space *mapping, struct page *page) | |||
522 | remove_migration_ptes(page, page); | 522 | remove_migration_ptes(page, page); |
523 | 523 | ||
524 | rc = mapping->a_ops->writepage(page, &wbc); | 524 | rc = mapping->a_ops->writepage(page, &wbc); |
525 | if (rc < 0) | ||
526 | /* I/O Error writing */ | ||
527 | return -EIO; | ||
528 | 525 | ||
529 | if (rc != AOP_WRITEPAGE_ACTIVATE) | 526 | if (rc != AOP_WRITEPAGE_ACTIVATE) |
530 | /* unlocked. Relock */ | 527 | /* unlocked. Relock */ |
531 | lock_page(page); | 528 | lock_page(page); |
532 | 529 | ||
533 | return -EAGAIN; | 530 | return (rc < 0) ? -EIO : -EAGAIN; |
534 | } | 531 | } |
535 | 532 | ||
536 | /* | 533 | /* |
@@ -841,12 +838,12 @@ static int do_move_page_to_node_array(struct mm_struct *mm, | |||
841 | struct page_to_node *pp; | 838 | struct page_to_node *pp; |
842 | LIST_HEAD(pagelist); | 839 | LIST_HEAD(pagelist); |
843 | 840 | ||
841 | migrate_prep(); | ||
844 | down_read(&mm->mmap_sem); | 842 | down_read(&mm->mmap_sem); |
845 | 843 | ||
846 | /* | 844 | /* |
847 | * Build a list of pages to migrate | 845 | * Build a list of pages to migrate |
848 | */ | 846 | */ |
849 | migrate_prep(); | ||
850 | for (pp = pm; pp->node != MAX_NUMNODES; pp++) { | 847 | for (pp = pm; pp->node != MAX_NUMNODES; pp++) { |
851 | struct vm_area_struct *vma; | 848 | struct vm_area_struct *vma; |
852 | struct page *page; | 849 | struct page *page; |
@@ -990,25 +987,18 @@ out: | |||
990 | /* | 987 | /* |
991 | * Determine the nodes of an array of pages and store it in an array of status. | 988 | * Determine the nodes of an array of pages and store it in an array of status. |
992 | */ | 989 | */ |
993 | static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, | 990 | static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, |
994 | const void __user * __user *pages, | 991 | const void __user **pages, int *status) |
995 | int __user *status) | ||
996 | { | 992 | { |
997 | unsigned long i; | 993 | unsigned long i; |
998 | int err; | ||
999 | 994 | ||
1000 | down_read(&mm->mmap_sem); | 995 | down_read(&mm->mmap_sem); |
1001 | 996 | ||
1002 | for (i = 0; i < nr_pages; i++) { | 997 | for (i = 0; i < nr_pages; i++) { |
1003 | const void __user *p; | 998 | unsigned long addr = (unsigned long)(*pages); |
1004 | unsigned long addr; | ||
1005 | struct vm_area_struct *vma; | 999 | struct vm_area_struct *vma; |
1006 | struct page *page; | 1000 | struct page *page; |
1007 | 1001 | int err = -EFAULT; | |
1008 | err = -EFAULT; | ||
1009 | if (get_user(p, pages+i)) | ||
1010 | goto out; | ||
1011 | addr = (unsigned long) p; | ||
1012 | 1002 | ||
1013 | vma = find_vma(mm, addr); | 1003 | vma = find_vma(mm, addr); |
1014 | if (!vma) | 1004 | if (!vma) |
@@ -1027,12 +1017,52 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, | |||
1027 | 1017 | ||
1028 | err = page_to_nid(page); | 1018 | err = page_to_nid(page); |
1029 | set_status: | 1019 | set_status: |
1030 | put_user(err, status+i); | 1020 | *status = err; |
1021 | |||
1022 | pages++; | ||
1023 | status++; | ||
1024 | } | ||
1025 | |||
1026 | up_read(&mm->mmap_sem); | ||
1027 | } | ||
1028 | |||
1029 | /* | ||
1030 | * Determine the nodes of a user array of pages and store it in | ||
1031 | * a user array of status. | ||
1032 | */ | ||
1033 | static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, | ||
1034 | const void __user * __user *pages, | ||
1035 | int __user *status) | ||
1036 | { | ||
1037 | #define DO_PAGES_STAT_CHUNK_NR 16 | ||
1038 | const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; | ||
1039 | int chunk_status[DO_PAGES_STAT_CHUNK_NR]; | ||
1040 | unsigned long i, chunk_nr = DO_PAGES_STAT_CHUNK_NR; | ||
1041 | int err; | ||
1042 | |||
1043 | for (i = 0; i < nr_pages; i += chunk_nr) { | ||
1044 | if (chunk_nr + i > nr_pages) | ||
1045 | chunk_nr = nr_pages - i; | ||
1046 | |||
1047 | err = copy_from_user(chunk_pages, &pages[i], | ||
1048 | chunk_nr * sizeof(*chunk_pages)); | ||
1049 | if (err) { | ||
1050 | err = -EFAULT; | ||
1051 | goto out; | ||
1052 | } | ||
1053 | |||
1054 | do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); | ||
1055 | |||
1056 | err = copy_to_user(&status[i], chunk_status, | ||
1057 | chunk_nr * sizeof(*chunk_status)); | ||
1058 | if (err) { | ||
1059 | err = -EFAULT; | ||
1060 | goto out; | ||
1061 | } | ||
1031 | } | 1062 | } |
1032 | err = 0; | 1063 | err = 0; |
1033 | 1064 | ||
1034 | out: | 1065 | out: |
1035 | up_read(&mm->mmap_sem); | ||
1036 | return err; | 1066 | return err; |
1037 | } | 1067 | } |
1038 | 1068 | ||
diff --git a/mm/mlock.c b/mm/mlock.c index 008ea70b7afa..1ada366570cb 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -66,14 +66,10 @@ void __clear_page_mlock(struct page *page) | |||
66 | putback_lru_page(page); | 66 | putback_lru_page(page); |
67 | } else { | 67 | } else { |
68 | /* | 68 | /* |
69 | * Page not on the LRU yet. Flush all pagevecs and retry. | 69 | * We lost the race. the page already moved to evictable list. |
70 | */ | 70 | */ |
71 | lru_add_drain_all(); | 71 | if (PageUnevictable(page)) |
72 | if (!isolate_lru_page(page)) | ||
73 | putback_lru_page(page); | ||
74 | else if (PageUnevictable(page)) | ||
75 | count_vm_event(UNEVICTABLE_PGSTRANDED); | 72 | count_vm_event(UNEVICTABLE_PGSTRANDED); |
76 | |||
77 | } | 73 | } |
78 | } | 74 | } |
79 | 75 | ||
@@ -166,7 +162,7 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
166 | unsigned long addr = start; | 162 | unsigned long addr = start; |
167 | struct page *pages[16]; /* 16 gives a reasonable batch */ | 163 | struct page *pages[16]; /* 16 gives a reasonable batch */ |
168 | int nr_pages = (end - start) / PAGE_SIZE; | 164 | int nr_pages = (end - start) / PAGE_SIZE; |
169 | int ret; | 165 | int ret = 0; |
170 | int gup_flags = 0; | 166 | int gup_flags = 0; |
171 | 167 | ||
172 | VM_BUG_ON(start & ~PAGE_MASK); | 168 | VM_BUG_ON(start & ~PAGE_MASK); |
@@ -187,8 +183,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
187 | if (vma->vm_flags & VM_WRITE) | 183 | if (vma->vm_flags & VM_WRITE) |
188 | gup_flags |= GUP_FLAGS_WRITE; | 184 | gup_flags |= GUP_FLAGS_WRITE; |
189 | 185 | ||
190 | lru_add_drain_all(); /* push cached pages to LRU */ | ||
191 | |||
192 | while (nr_pages > 0) { | 186 | while (nr_pages > 0) { |
193 | int i; | 187 | int i; |
194 | 188 | ||
@@ -251,8 +245,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
251 | ret = 0; | 245 | ret = 0; |
252 | } | 246 | } |
253 | 247 | ||
254 | lru_add_drain_all(); /* to update stats */ | ||
255 | |||
256 | return ret; /* count entire vma as locked_vm */ | 248 | return ret; /* count entire vma as locked_vm */ |
257 | } | 249 | } |
258 | 250 | ||
@@ -546,6 +538,8 @@ asmlinkage long sys_mlock(unsigned long start, size_t len) | |||
546 | if (!can_do_mlock()) | 538 | if (!can_do_mlock()) |
547 | return -EPERM; | 539 | return -EPERM; |
548 | 540 | ||
541 | lru_add_drain_all(); /* flush pagevec */ | ||
542 | |||
549 | down_write(¤t->mm->mmap_sem); | 543 | down_write(¤t->mm->mmap_sem); |
550 | len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); | 544 | len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); |
551 | start &= PAGE_MASK; | 545 | start &= PAGE_MASK; |
@@ -612,6 +606,8 @@ asmlinkage long sys_mlockall(int flags) | |||
612 | if (!can_do_mlock()) | 606 | if (!can_do_mlock()) |
613 | goto out; | 607 | goto out; |
614 | 608 | ||
609 | lru_add_drain_all(); /* flush pagevec */ | ||
610 | |||
615 | down_write(¤t->mm->mmap_sem); | 611 | down_write(¤t->mm->mmap_sem); |
616 | 612 | ||
617 | lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; | 613 | lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; |
@@ -175,7 +175,8 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
175 | 175 | ||
176 | /* Don't let a single process grow too big: | 176 | /* Don't let a single process grow too big: |
177 | leave 3% of the size of this process for other processes */ | 177 | leave 3% of the size of this process for other processes */ |
178 | allowed -= mm->total_vm / 32; | 178 | if (mm) |
179 | allowed -= mm->total_vm / 32; | ||
179 | 180 | ||
180 | /* | 181 | /* |
181 | * cast `allowed' as a signed long because vm_committed_space | 182 | * cast `allowed' as a signed long because vm_committed_space |
@@ -1703,7 +1704,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) | |||
1703 | vma = find_vma_prev(mm, addr, &prev); | 1704 | vma = find_vma_prev(mm, addr, &prev); |
1704 | if (vma && (vma->vm_start <= addr)) | 1705 | if (vma && (vma->vm_start <= addr)) |
1705 | return vma; | 1706 | return vma; |
1706 | if (expand_stack(prev, addr)) | 1707 | if (!prev || expand_stack(prev, addr)) |
1707 | return NULL; | 1708 | return NULL; |
1708 | if (prev->vm_flags & VM_LOCKED) { | 1709 | if (prev->vm_flags & VM_LOCKED) { |
1709 | if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0) | 1710 | if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0) |
diff --git a/mm/nommu.c b/mm/nommu.c index 2696b24f2bb3..7695dc850785 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -1454,7 +1454,8 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
1454 | 1454 | ||
1455 | /* Don't let a single process grow too big: | 1455 | /* Don't let a single process grow too big: |
1456 | leave 3% of the size of this process for other processes */ | 1456 | leave 3% of the size of this process for other processes */ |
1457 | allowed -= current->mm->total_vm / 32; | 1457 | if (mm) |
1458 | allowed -= mm->total_vm / 32; | ||
1458 | 1459 | ||
1459 | /* | 1460 | /* |
1460 | * cast `allowed' as a signed long because vm_committed_space | 1461 | * cast `allowed' as a signed long because vm_committed_space |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 64e5b4bcd964..a0a01902f551 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -38,7 +38,6 @@ static DEFINE_SPINLOCK(zone_scan_mutex); | |||
38 | * badness - calculate a numeric value for how bad this task has been | 38 | * badness - calculate a numeric value for how bad this task has been |
39 | * @p: task struct of which task we should calculate | 39 | * @p: task struct of which task we should calculate |
40 | * @uptime: current uptime in seconds | 40 | * @uptime: current uptime in seconds |
41 | * @mem: target memory controller | ||
42 | * | 41 | * |
43 | * The formula used is relatively simple and documented inline in the | 42 | * The formula used is relatively simple and documented inline in the |
44 | * function. The main rationale is that we want to select a good task | 43 | * function. The main rationale is that we want to select a good task |
@@ -295,6 +294,8 @@ static void dump_tasks(const struct mem_cgroup *mem) | |||
295 | continue; | 294 | continue; |
296 | if (mem && !task_in_mem_cgroup(p, mem)) | 295 | if (mem && !task_in_mem_cgroup(p, mem)) |
297 | continue; | 296 | continue; |
297 | if (!thread_group_leader(p)) | ||
298 | continue; | ||
298 | 299 | ||
299 | task_lock(p); | 300 | task_lock(p); |
300 | printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", | 301 | printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d0a240fbb8bf..d8ac01474563 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -263,24 +263,39 @@ void prep_compound_page(struct page *page, unsigned long order) | |||
263 | { | 263 | { |
264 | int i; | 264 | int i; |
265 | int nr_pages = 1 << order; | 265 | int nr_pages = 1 << order; |
266 | |||
267 | set_compound_page_dtor(page, free_compound_page); | ||
268 | set_compound_order(page, order); | ||
269 | __SetPageHead(page); | ||
270 | for (i = 1; i < nr_pages; i++) { | ||
271 | struct page *p = page + i; | ||
272 | |||
273 | __SetPageTail(p); | ||
274 | p->first_page = page; | ||
275 | } | ||
276 | } | ||
277 | |||
278 | #ifdef CONFIG_HUGETLBFS | ||
279 | void prep_compound_gigantic_page(struct page *page, unsigned long order) | ||
280 | { | ||
281 | int i; | ||
282 | int nr_pages = 1 << order; | ||
266 | struct page *p = page + 1; | 283 | struct page *p = page + 1; |
267 | 284 | ||
268 | set_compound_page_dtor(page, free_compound_page); | 285 | set_compound_page_dtor(page, free_compound_page); |
269 | set_compound_order(page, order); | 286 | set_compound_order(page, order); |
270 | __SetPageHead(page); | 287 | __SetPageHead(page); |
271 | for (i = 1; i < nr_pages; i++, p++) { | 288 | for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { |
272 | if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0)) | ||
273 | p = pfn_to_page(page_to_pfn(page) + i); | ||
274 | __SetPageTail(p); | 289 | __SetPageTail(p); |
275 | p->first_page = page; | 290 | p->first_page = page; |
276 | } | 291 | } |
277 | } | 292 | } |
293 | #endif | ||
278 | 294 | ||
279 | static void destroy_compound_page(struct page *page, unsigned long order) | 295 | static void destroy_compound_page(struct page *page, unsigned long order) |
280 | { | 296 | { |
281 | int i; | 297 | int i; |
282 | int nr_pages = 1 << order; | 298 | int nr_pages = 1 << order; |
283 | struct page *p = page + 1; | ||
284 | 299 | ||
285 | if (unlikely(compound_order(page) != order)) | 300 | if (unlikely(compound_order(page) != order)) |
286 | bad_page(page); | 301 | bad_page(page); |
@@ -288,9 +303,8 @@ static void destroy_compound_page(struct page *page, unsigned long order) | |||
288 | if (unlikely(!PageHead(page))) | 303 | if (unlikely(!PageHead(page))) |
289 | bad_page(page); | 304 | bad_page(page); |
290 | __ClearPageHead(page); | 305 | __ClearPageHead(page); |
291 | for (i = 1; i < nr_pages; i++, p++) { | 306 | for (i = 1; i < nr_pages; i++) { |
292 | if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0)) | 307 | struct page *p = page + i; |
293 | p = pfn_to_page(page_to_pfn(page) + i); | ||
294 | 308 | ||
295 | if (unlikely(!PageTail(p) | | 309 | if (unlikely(!PageTail(p) | |
296 | (p->first_page != page))) | 310 | (p->first_page != page))) |
@@ -1547,6 +1561,10 @@ nofail_alloc: | |||
1547 | 1561 | ||
1548 | /* We now go into synchronous reclaim */ | 1562 | /* We now go into synchronous reclaim */ |
1549 | cpuset_memory_pressure_bump(); | 1563 | cpuset_memory_pressure_bump(); |
1564 | /* | ||
1565 | * The task's cpuset might have expanded its set of allowable nodes | ||
1566 | */ | ||
1567 | cpuset_update_task_memory_state(); | ||
1550 | p->flags |= PF_MEMALLOC; | 1568 | p->flags |= PF_MEMALLOC; |
1551 | reclaim_state.reclaimed_slab = 0; | 1569 | reclaim_state.reclaimed_slab = 0; |
1552 | p->reclaim_state = &reclaim_state; | 1570 | p->reclaim_state = &reclaim_state; |
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 5d86550701f2..ab27ff750519 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c | |||
@@ -4,7 +4,10 @@ | |||
4 | #include <linux/bit_spinlock.h> | 4 | #include <linux/bit_spinlock.h> |
5 | #include <linux/page_cgroup.h> | 5 | #include <linux/page_cgroup.h> |
6 | #include <linux/hash.h> | 6 | #include <linux/hash.h> |
7 | #include <linux/slab.h> | ||
7 | #include <linux/memory.h> | 8 | #include <linux/memory.h> |
9 | #include <linux/vmalloc.h> | ||
10 | #include <linux/cgroup.h> | ||
8 | 11 | ||
9 | static void __meminit | 12 | static void __meminit |
10 | __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) | 13 | __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) |
@@ -18,7 +21,7 @@ static unsigned long total_usage; | |||
18 | #if !defined(CONFIG_SPARSEMEM) | 21 | #if !defined(CONFIG_SPARSEMEM) |
19 | 22 | ||
20 | 23 | ||
21 | void __init pgdat_page_cgroup_init(struct pglist_data *pgdat) | 24 | void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) |
22 | { | 25 | { |
23 | pgdat->node_page_cgroup = NULL; | 26 | pgdat->node_page_cgroup = NULL; |
24 | } | 27 | } |
@@ -46,6 +49,9 @@ static int __init alloc_node_page_cgroup(int nid) | |||
46 | start_pfn = NODE_DATA(nid)->node_start_pfn; | 49 | start_pfn = NODE_DATA(nid)->node_start_pfn; |
47 | nr_pages = NODE_DATA(nid)->node_spanned_pages; | 50 | nr_pages = NODE_DATA(nid)->node_spanned_pages; |
48 | 51 | ||
52 | if (!nr_pages) | ||
53 | return 0; | ||
54 | |||
49 | table_size = sizeof(struct page_cgroup) * nr_pages; | 55 | table_size = sizeof(struct page_cgroup) * nr_pages; |
50 | 56 | ||
51 | base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), | 57 | base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), |
@@ -66,6 +72,9 @@ void __init page_cgroup_init(void) | |||
66 | 72 | ||
67 | int nid, fail; | 73 | int nid, fail; |
68 | 74 | ||
75 | if (mem_cgroup_subsys.disabled) | ||
76 | return; | ||
77 | |||
69 | for_each_online_node(nid) { | 78 | for_each_online_node(nid) { |
70 | fail = alloc_node_page_cgroup(nid); | 79 | fail = alloc_node_page_cgroup(nid); |
71 | if (fail) | 80 | if (fail) |
@@ -91,7 +100,8 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) | |||
91 | return section->page_cgroup + pfn; | 100 | return section->page_cgroup + pfn; |
92 | } | 101 | } |
93 | 102 | ||
94 | int __meminit init_section_page_cgroup(unsigned long pfn) | 103 | /* __alloc_bootmem...() is protected by !slab_available() */ |
104 | int __init_refok init_section_page_cgroup(unsigned long pfn) | ||
95 | { | 105 | { |
96 | struct mem_section *section; | 106 | struct mem_section *section; |
97 | struct page_cgroup *base, *pc; | 107 | struct page_cgroup *base, *pc; |
@@ -100,15 +110,30 @@ int __meminit init_section_page_cgroup(unsigned long pfn) | |||
100 | 110 | ||
101 | section = __pfn_to_section(pfn); | 111 | section = __pfn_to_section(pfn); |
102 | 112 | ||
103 | if (section->page_cgroup) | 113 | if (!section->page_cgroup) { |
104 | return 0; | 114 | nid = page_to_nid(pfn_to_page(pfn)); |
105 | 115 | table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; | |
106 | nid = page_to_nid(pfn_to_page(pfn)); | 116 | if (slab_is_available()) { |
107 | 117 | base = kmalloc_node(table_size, GFP_KERNEL, nid); | |
108 | table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; | 118 | if (!base) |
109 | base = kmalloc_node(table_size, GFP_KERNEL, nid); | 119 | base = vmalloc_node(table_size, nid); |
110 | if (!base) | 120 | } else { |
111 | base = vmalloc_node(table_size, nid); | 121 | base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), |
122 | table_size, | ||
123 | PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); | ||
124 | } | ||
125 | } else { | ||
126 | /* | ||
127 | * We don't have to allocate page_cgroup again, but | ||
128 | * address of memmap may be changed. So, we have to initialize | ||
129 | * again. | ||
130 | */ | ||
131 | base = section->page_cgroup + pfn; | ||
132 | table_size = 0; | ||
133 | /* check address of memmap is changed or not. */ | ||
134 | if (base->page == pfn_to_page(pfn)) | ||
135 | return 0; | ||
136 | } | ||
112 | 137 | ||
113 | if (!base) { | 138 | if (!base) { |
114 | printk(KERN_ERR "page cgroup allocation failure\n"); | 139 | printk(KERN_ERR "page cgroup allocation failure\n"); |
@@ -135,21 +160,26 @@ void __free_page_cgroup(unsigned long pfn) | |||
135 | if (!ms || !ms->page_cgroup) | 160 | if (!ms || !ms->page_cgroup) |
136 | return; | 161 | return; |
137 | base = ms->page_cgroup + pfn; | 162 | base = ms->page_cgroup + pfn; |
138 | ms->page_cgroup = NULL; | 163 | if (is_vmalloc_addr(base)) { |
139 | if (is_vmalloc_addr(base)) | ||
140 | vfree(base); | 164 | vfree(base); |
141 | else | 165 | ms->page_cgroup = NULL; |
142 | kfree(base); | 166 | } else { |
167 | struct page *page = virt_to_page(base); | ||
168 | if (!PageReserved(page)) { /* Is bootmem ? */ | ||
169 | kfree(base); | ||
170 | ms->page_cgroup = NULL; | ||
171 | } | ||
172 | } | ||
143 | } | 173 | } |
144 | 174 | ||
145 | int online_page_cgroup(unsigned long start_pfn, | 175 | int __meminit online_page_cgroup(unsigned long start_pfn, |
146 | unsigned long nr_pages, | 176 | unsigned long nr_pages, |
147 | int nid) | 177 | int nid) |
148 | { | 178 | { |
149 | unsigned long start, end, pfn; | 179 | unsigned long start, end, pfn; |
150 | int fail = 0; | 180 | int fail = 0; |
151 | 181 | ||
152 | start = start_pfn & (PAGES_PER_SECTION - 1); | 182 | start = start_pfn & ~(PAGES_PER_SECTION - 1); |
153 | end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); | 183 | end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); |
154 | 184 | ||
155 | for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) { | 185 | for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) { |
@@ -167,12 +197,12 @@ int online_page_cgroup(unsigned long start_pfn, | |||
167 | return -ENOMEM; | 197 | return -ENOMEM; |
168 | } | 198 | } |
169 | 199 | ||
170 | int offline_page_cgroup(unsigned long start_pfn, | 200 | int __meminit offline_page_cgroup(unsigned long start_pfn, |
171 | unsigned long nr_pages, int nid) | 201 | unsigned long nr_pages, int nid) |
172 | { | 202 | { |
173 | unsigned long start, end, pfn; | 203 | unsigned long start, end, pfn; |
174 | 204 | ||
175 | start = start_pfn & (PAGES_PER_SECTION - 1); | 205 | start = start_pfn & ~(PAGES_PER_SECTION - 1); |
176 | end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); | 206 | end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); |
177 | 207 | ||
178 | for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) | 208 | for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) |
@@ -181,7 +211,7 @@ int offline_page_cgroup(unsigned long start_pfn, | |||
181 | 211 | ||
182 | } | 212 | } |
183 | 213 | ||
184 | static int page_cgroup_callback(struct notifier_block *self, | 214 | static int __meminit page_cgroup_callback(struct notifier_block *self, |
185 | unsigned long action, void *arg) | 215 | unsigned long action, void *arg) |
186 | { | 216 | { |
187 | struct memory_notify *mn = arg; | 217 | struct memory_notify *mn = arg; |
@@ -191,18 +221,23 @@ static int page_cgroup_callback(struct notifier_block *self, | |||
191 | ret = online_page_cgroup(mn->start_pfn, | 221 | ret = online_page_cgroup(mn->start_pfn, |
192 | mn->nr_pages, mn->status_change_nid); | 222 | mn->nr_pages, mn->status_change_nid); |
193 | break; | 223 | break; |
194 | case MEM_CANCEL_ONLINE: | ||
195 | case MEM_OFFLINE: | 224 | case MEM_OFFLINE: |
196 | offline_page_cgroup(mn->start_pfn, | 225 | offline_page_cgroup(mn->start_pfn, |
197 | mn->nr_pages, mn->status_change_nid); | 226 | mn->nr_pages, mn->status_change_nid); |
198 | break; | 227 | break; |
228 | case MEM_CANCEL_ONLINE: | ||
199 | case MEM_GOING_OFFLINE: | 229 | case MEM_GOING_OFFLINE: |
200 | break; | 230 | break; |
201 | case MEM_ONLINE: | 231 | case MEM_ONLINE: |
202 | case MEM_CANCEL_OFFLINE: | 232 | case MEM_CANCEL_OFFLINE: |
203 | break; | 233 | break; |
204 | } | 234 | } |
205 | ret = notifier_from_errno(ret); | 235 | |
236 | if (ret) | ||
237 | ret = notifier_from_errno(ret); | ||
238 | else | ||
239 | ret = NOTIFY_OK; | ||
240 | |||
206 | return ret; | 241 | return ret; |
207 | } | 242 | } |
208 | 243 | ||
@@ -213,6 +248,9 @@ void __init page_cgroup_init(void) | |||
213 | unsigned long pfn; | 248 | unsigned long pfn; |
214 | int fail = 0; | 249 | int fail = 0; |
215 | 250 | ||
251 | if (mem_cgroup_subsys.disabled) | ||
252 | return; | ||
253 | |||
216 | for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) { | 254 | for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) { |
217 | if (!pfn_present(pfn)) | 255 | if (!pfn_present(pfn)) |
218 | continue; | 256 | continue; |
@@ -229,7 +267,7 @@ void __init page_cgroup_init(void) | |||
229 | " want\n"); | 267 | " want\n"); |
230 | } | 268 | } |
231 | 269 | ||
232 | void __init pgdat_page_cgroup_init(struct pglist_data *pgdat) | 270 | void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) |
233 | { | 271 | { |
234 | return; | 272 | return; |
235 | } | 273 | } |
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index b70a7fec1ff6..5e0ffd967452 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c | |||
@@ -130,10 +130,11 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn) | |||
130 | if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE) | 130 | if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE) |
131 | break; | 131 | break; |
132 | } | 132 | } |
133 | if (pfn < end_pfn) | 133 | page = __first_valid_page(start_pfn, end_pfn - start_pfn); |
134 | if ((pfn < end_pfn) || !page) | ||
134 | return -EBUSY; | 135 | return -EBUSY; |
135 | /* Check all pages are free or Marked as ISOLATED */ | 136 | /* Check all pages are free or Marked as ISOLATED */ |
136 | zone = page_zone(pfn_to_page(pfn)); | 137 | zone = page_zone(page); |
137 | spin_lock_irqsave(&zone->lock, flags); | 138 | spin_lock_irqsave(&zone->lock, flags); |
138 | ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn); | 139 | ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn); |
139 | spin_unlock_irqrestore(&zone->lock, flags); | 140 | spin_unlock_irqrestore(&zone->lock, flags); |
diff --git a/mm/shmem.c b/mm/shmem.c index d38d7e61fcd0..0ed075215e5f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -161,8 +161,8 @@ static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) | |||
161 | */ | 161 | */ |
162 | static inline int shmem_acct_size(unsigned long flags, loff_t size) | 162 | static inline int shmem_acct_size(unsigned long flags, loff_t size) |
163 | { | 163 | { |
164 | return (flags & VM_ACCOUNT)? | 164 | return (flags & VM_ACCOUNT) ? |
165 | security_vm_enough_memory(VM_ACCT(size)): 0; | 165 | security_vm_enough_memory_kern(VM_ACCT(size)) : 0; |
166 | } | 166 | } |
167 | 167 | ||
168 | static inline void shmem_unacct_size(unsigned long flags, loff_t size) | 168 | static inline void shmem_unacct_size(unsigned long flags, loff_t size) |
@@ -179,8 +179,8 @@ static inline void shmem_unacct_size(unsigned long flags, loff_t size) | |||
179 | */ | 179 | */ |
180 | static inline int shmem_acct_block(unsigned long flags) | 180 | static inline int shmem_acct_block(unsigned long flags) |
181 | { | 181 | { |
182 | return (flags & VM_ACCOUNT)? | 182 | return (flags & VM_ACCOUNT) ? |
183 | 0: security_vm_enough_memory(VM_ACCT(PAGE_CACHE_SIZE)); | 183 | 0 : security_vm_enough_memory_kern(VM_ACCT(PAGE_CACHE_SIZE)); |
184 | } | 184 | } |
185 | 185 | ||
186 | static inline void shmem_unacct_blocks(unsigned long flags, long pages) | 186 | static inline void shmem_unacct_blocks(unsigned long flags, long pages) |
@@ -95,6 +95,7 @@ | |||
95 | #include <linux/init.h> | 95 | #include <linux/init.h> |
96 | #include <linux/compiler.h> | 96 | #include <linux/compiler.h> |
97 | #include <linux/cpuset.h> | 97 | #include <linux/cpuset.h> |
98 | #include <linux/proc_fs.h> | ||
98 | #include <linux/seq_file.h> | 99 | #include <linux/seq_file.h> |
99 | #include <linux/notifier.h> | 100 | #include <linux/notifier.h> |
100 | #include <linux/kallsyms.h> | 101 | #include <linux/kallsyms.h> |
@@ -4258,7 +4259,7 @@ static int s_show(struct seq_file *m, void *p) | |||
4258 | * + further values on SMP and with statistics enabled | 4259 | * + further values on SMP and with statistics enabled |
4259 | */ | 4260 | */ |
4260 | 4261 | ||
4261 | const struct seq_operations slabinfo_op = { | 4262 | static const struct seq_operations slabinfo_op = { |
4262 | .start = s_start, | 4263 | .start = s_start, |
4263 | .next = s_next, | 4264 | .next = s_next, |
4264 | .stop = s_stop, | 4265 | .stop = s_stop, |
@@ -4315,6 +4316,19 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer, | |||
4315 | return res; | 4316 | return res; |
4316 | } | 4317 | } |
4317 | 4318 | ||
4319 | static int slabinfo_open(struct inode *inode, struct file *file) | ||
4320 | { | ||
4321 | return seq_open(file, &slabinfo_op); | ||
4322 | } | ||
4323 | |||
4324 | static const struct file_operations proc_slabinfo_operations = { | ||
4325 | .open = slabinfo_open, | ||
4326 | .read = seq_read, | ||
4327 | .write = slabinfo_write, | ||
4328 | .llseek = seq_lseek, | ||
4329 | .release = seq_release, | ||
4330 | }; | ||
4331 | |||
4318 | #ifdef CONFIG_DEBUG_SLAB_LEAK | 4332 | #ifdef CONFIG_DEBUG_SLAB_LEAK |
4319 | 4333 | ||
4320 | static void *leaks_start(struct seq_file *m, loff_t *pos) | 4334 | static void *leaks_start(struct seq_file *m, loff_t *pos) |
@@ -4443,13 +4457,47 @@ static int leaks_show(struct seq_file *m, void *p) | |||
4443 | return 0; | 4457 | return 0; |
4444 | } | 4458 | } |
4445 | 4459 | ||
4446 | const struct seq_operations slabstats_op = { | 4460 | static const struct seq_operations slabstats_op = { |
4447 | .start = leaks_start, | 4461 | .start = leaks_start, |
4448 | .next = s_next, | 4462 | .next = s_next, |
4449 | .stop = s_stop, | 4463 | .stop = s_stop, |
4450 | .show = leaks_show, | 4464 | .show = leaks_show, |
4451 | }; | 4465 | }; |
4466 | |||
4467 | static int slabstats_open(struct inode *inode, struct file *file) | ||
4468 | { | ||
4469 | unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL); | ||
4470 | int ret = -ENOMEM; | ||
4471 | if (n) { | ||
4472 | ret = seq_open(file, &slabstats_op); | ||
4473 | if (!ret) { | ||
4474 | struct seq_file *m = file->private_data; | ||
4475 | *n = PAGE_SIZE / (2 * sizeof(unsigned long)); | ||
4476 | m->private = n; | ||
4477 | n = NULL; | ||
4478 | } | ||
4479 | kfree(n); | ||
4480 | } | ||
4481 | return ret; | ||
4482 | } | ||
4483 | |||
4484 | static const struct file_operations proc_slabstats_operations = { | ||
4485 | .open = slabstats_open, | ||
4486 | .read = seq_read, | ||
4487 | .llseek = seq_lseek, | ||
4488 | .release = seq_release_private, | ||
4489 | }; | ||
4490 | #endif | ||
4491 | |||
4492 | static int __init slab_proc_init(void) | ||
4493 | { | ||
4494 | proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); | ||
4495 | #ifdef CONFIG_DEBUG_SLAB_LEAK | ||
4496 | proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations); | ||
4452 | #endif | 4497 | #endif |
4498 | return 0; | ||
4499 | } | ||
4500 | module_init(slab_proc_init); | ||
4453 | #endif | 4501 | #endif |
4454 | 4502 | ||
4455 | /** | 4503 | /** |
@@ -535,7 +535,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, | |||
535 | struct kmem_cache *c; | 535 | struct kmem_cache *c; |
536 | 536 | ||
537 | c = slob_alloc(sizeof(struct kmem_cache), | 537 | c = slob_alloc(sizeof(struct kmem_cache), |
538 | flags, ARCH_KMALLOC_MINALIGN, -1); | 538 | GFP_KERNEL, ARCH_KMALLOC_MINALIGN, -1); |
539 | 539 | ||
540 | if (c) { | 540 | if (c) { |
541 | c->name = name; | 541 | c->name = name; |
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/interrupt.h> | 14 | #include <linux/interrupt.h> |
15 | #include <linux/bitops.h> | 15 | #include <linux/bitops.h> |
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <linux/proc_fs.h> | ||
17 | #include <linux/seq_file.h> | 18 | #include <linux/seq_file.h> |
18 | #include <linux/cpu.h> | 19 | #include <linux/cpu.h> |
19 | #include <linux/cpuset.h> | 20 | #include <linux/cpuset.h> |
@@ -2930,8 +2931,10 @@ static int slab_memory_callback(struct notifier_block *self, | |||
2930 | case MEM_CANCEL_OFFLINE: | 2931 | case MEM_CANCEL_OFFLINE: |
2931 | break; | 2932 | break; |
2932 | } | 2933 | } |
2933 | 2934 | if (ret) | |
2934 | ret = notifier_from_errno(ret); | 2935 | ret = notifier_from_errno(ret); |
2936 | else | ||
2937 | ret = NOTIFY_OK; | ||
2935 | return ret; | 2938 | return ret; |
2936 | } | 2939 | } |
2937 | 2940 | ||
@@ -3594,7 +3597,7 @@ static int list_locations(struct kmem_cache *s, char *buf, | |||
3594 | for (i = 0; i < t.count; i++) { | 3597 | for (i = 0; i < t.count; i++) { |
3595 | struct location *l = &t.loc[i]; | 3598 | struct location *l = &t.loc[i]; |
3596 | 3599 | ||
3597 | if (len > PAGE_SIZE - 100) | 3600 | if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100) |
3598 | break; | 3601 | break; |
3599 | len += sprintf(buf + len, "%7ld ", l->count); | 3602 | len += sprintf(buf + len, "%7ld ", l->count); |
3600 | 3603 | ||
@@ -4417,14 +4420,6 @@ __initcall(slab_sysfs_init); | |||
4417 | * The /proc/slabinfo ABI | 4420 | * The /proc/slabinfo ABI |
4418 | */ | 4421 | */ |
4419 | #ifdef CONFIG_SLABINFO | 4422 | #ifdef CONFIG_SLABINFO |
4420 | |||
4421 | ssize_t slabinfo_write(struct file *file, const char __user *buffer, | ||
4422 | size_t count, loff_t *ppos) | ||
4423 | { | ||
4424 | return -EINVAL; | ||
4425 | } | ||
4426 | |||
4427 | |||
4428 | static void print_slabinfo_header(struct seq_file *m) | 4423 | static void print_slabinfo_header(struct seq_file *m) |
4429 | { | 4424 | { |
4430 | seq_puts(m, "slabinfo - version: 2.1\n"); | 4425 | seq_puts(m, "slabinfo - version: 2.1\n"); |
@@ -4492,11 +4487,29 @@ static int s_show(struct seq_file *m, void *p) | |||
4492 | return 0; | 4487 | return 0; |
4493 | } | 4488 | } |
4494 | 4489 | ||
4495 | const struct seq_operations slabinfo_op = { | 4490 | static const struct seq_operations slabinfo_op = { |
4496 | .start = s_start, | 4491 | .start = s_start, |
4497 | .next = s_next, | 4492 | .next = s_next, |
4498 | .stop = s_stop, | 4493 | .stop = s_stop, |
4499 | .show = s_show, | 4494 | .show = s_show, |
4500 | }; | 4495 | }; |
4501 | 4496 | ||
4497 | static int slabinfo_open(struct inode *inode, struct file *file) | ||
4498 | { | ||
4499 | return seq_open(file, &slabinfo_op); | ||
4500 | } | ||
4501 | |||
4502 | static const struct file_operations proc_slabinfo_operations = { | ||
4503 | .open = slabinfo_open, | ||
4504 | .read = seq_read, | ||
4505 | .llseek = seq_lseek, | ||
4506 | .release = seq_release, | ||
4507 | }; | ||
4508 | |||
4509 | static int __init slab_proc_init(void) | ||
4510 | { | ||
4511 | proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); | ||
4512 | return 0; | ||
4513 | } | ||
4514 | module_init(slab_proc_init); | ||
4502 | #endif /* CONFIG_SLABINFO */ | 4515 | #endif /* CONFIG_SLABINFO */ |
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index a91b5f8fcaf6..a13ea6401ae7 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c | |||
@@ -64,7 +64,7 @@ void __meminit vmemmap_verify(pte_t *pte, int node, | |||
64 | unsigned long pfn = pte_pfn(*pte); | 64 | unsigned long pfn = pte_pfn(*pte); |
65 | int actual_node = early_pfn_to_nid(pfn); | 65 | int actual_node = early_pfn_to_nid(pfn); |
66 | 66 | ||
67 | if (actual_node != node) | 67 | if (node_distance(actual_node, node) > LOCAL_DISTANCE) |
68 | printk(KERN_WARNING "[%lx-%lx] potential offnode " | 68 | printk(KERN_WARNING "[%lx-%lx] potential offnode " |
69 | "page_structs\n", start, end - 1); | 69 | "page_structs\n", start, end - 1); |
70 | } | 70 | } |
diff --git a/mm/sparse.c b/mm/sparse.c index 39db301b920d..083f5b63e7a8 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -570,7 +570,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap) | |||
570 | * set. If this is <=0, then that means that the passed-in | 570 | * set. If this is <=0, then that means that the passed-in |
571 | * map was not consumed and must be freed. | 571 | * map was not consumed and must be freed. |
572 | */ | 572 | */ |
573 | int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, | 573 | int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn, |
574 | int nr_pages) | 574 | int nr_pages) |
575 | { | 575 | { |
576 | unsigned long section_nr = pfn_to_section_nr(start_pfn); | 576 | unsigned long section_nr = pfn_to_section_nr(start_pfn); |
@@ -299,7 +299,6 @@ void lru_add_drain(void) | |||
299 | put_cpu(); | 299 | put_cpu(); |
300 | } | 300 | } |
301 | 301 | ||
302 | #if defined(CONFIG_NUMA) || defined(CONFIG_UNEVICTABLE_LRU) | ||
303 | static void lru_add_drain_per_cpu(struct work_struct *dummy) | 302 | static void lru_add_drain_per_cpu(struct work_struct *dummy) |
304 | { | 303 | { |
305 | lru_add_drain(); | 304 | lru_add_drain(); |
@@ -313,18 +312,6 @@ int lru_add_drain_all(void) | |||
313 | return schedule_on_each_cpu(lru_add_drain_per_cpu); | 312 | return schedule_on_each_cpu(lru_add_drain_per_cpu); |
314 | } | 313 | } |
315 | 314 | ||
316 | #else | ||
317 | |||
318 | /* | ||
319 | * Returns 0 for success | ||
320 | */ | ||
321 | int lru_add_drain_all(void) | ||
322 | { | ||
323 | lru_add_drain(); | ||
324 | return 0; | ||
325 | } | ||
326 | #endif | ||
327 | |||
328 | /* | 315 | /* |
329 | * Batched page_cache_release(). Decrement the reference count on all the | 316 | * Batched page_cache_release(). Decrement the reference count on all the |
330 | * passed pages. If it fell to zero then remove the page from the LRU and | 317 | * passed pages. If it fell to zero then remove the page from the LRU and |
@@ -445,6 +432,7 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) | |||
445 | for (i = 0; i < pagevec_count(pvec); i++) { | 432 | for (i = 0; i < pagevec_count(pvec); i++) { |
446 | struct page *page = pvec->pages[i]; | 433 | struct page *page = pvec->pages[i]; |
447 | struct zone *pagezone = page_zone(page); | 434 | struct zone *pagezone = page_zone(page); |
435 | int file; | ||
448 | 436 | ||
449 | if (pagezone != zone) { | 437 | if (pagezone != zone) { |
450 | if (zone) | 438 | if (zone) |
@@ -456,8 +444,12 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru) | |||
456 | VM_BUG_ON(PageUnevictable(page)); | 444 | VM_BUG_ON(PageUnevictable(page)); |
457 | VM_BUG_ON(PageLRU(page)); | 445 | VM_BUG_ON(PageLRU(page)); |
458 | SetPageLRU(page); | 446 | SetPageLRU(page); |
459 | if (is_active_lru(lru)) | 447 | file = is_file_lru(lru); |
448 | zone->recent_scanned[file]++; | ||
449 | if (is_active_lru(lru)) { | ||
460 | SetPageActive(page); | 450 | SetPageActive(page); |
451 | zone->recent_rotated[file]++; | ||
452 | } | ||
461 | add_page_to_lru_list(zone, page, lru); | 453 | add_page_to_lru_list(zone, page, lru); |
462 | } | 454 | } |
463 | if (zone) | 455 | if (zone) |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 90cb67a5417c..54a9f87e5162 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -1462,6 +1462,15 @@ static int __init procswaps_init(void) | |||
1462 | __initcall(procswaps_init); | 1462 | __initcall(procswaps_init); |
1463 | #endif /* CONFIG_PROC_FS */ | 1463 | #endif /* CONFIG_PROC_FS */ |
1464 | 1464 | ||
1465 | #ifdef MAX_SWAPFILES_CHECK | ||
1466 | static int __init max_swapfiles_check(void) | ||
1467 | { | ||
1468 | MAX_SWAPFILES_CHECK(); | ||
1469 | return 0; | ||
1470 | } | ||
1471 | late_initcall(max_swapfiles_check); | ||
1472 | #endif | ||
1473 | |||
1465 | /* | 1474 | /* |
1466 | * Written 01/25/92 by Simmule Turner, heavily changed by Linus. | 1475 | * Written 01/25/92 by Simmule Turner, heavily changed by Linus. |
1467 | * | 1476 | * |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 65ae576030da..1ddb77ba3995 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | #include <linux/spinlock.h> | 16 | #include <linux/spinlock.h> |
17 | #include <linux/interrupt.h> | 17 | #include <linux/interrupt.h> |
18 | #include <linux/proc_fs.h> | ||
18 | #include <linux/seq_file.h> | 19 | #include <linux/seq_file.h> |
19 | #include <linux/debugobjects.h> | 20 | #include <linux/debugobjects.h> |
20 | #include <linux/kallsyms.h> | 21 | #include <linux/kallsyms.h> |
@@ -76,7 +77,6 @@ static void vunmap_page_range(unsigned long addr, unsigned long end) | |||
76 | 77 | ||
77 | BUG_ON(addr >= end); | 78 | BUG_ON(addr >= end); |
78 | pgd = pgd_offset_k(addr); | 79 | pgd = pgd_offset_k(addr); |
79 | flush_cache_vunmap(addr, end); | ||
80 | do { | 80 | do { |
81 | next = pgd_addr_end(addr, end); | 81 | next = pgd_addr_end(addr, end); |
82 | if (pgd_none_or_clear_bad(pgd)) | 82 | if (pgd_none_or_clear_bad(pgd)) |
@@ -177,7 +177,7 @@ static int vmap_page_range(unsigned long addr, unsigned long end, | |||
177 | static inline int is_vmalloc_or_module_addr(const void *x) | 177 | static inline int is_vmalloc_or_module_addr(const void *x) |
178 | { | 178 | { |
179 | /* | 179 | /* |
180 | * x86-64 and sparc64 put modules in a special place, | 180 | * ARM, x86-64 and sparc64 put modules in a special place, |
181 | * and fall back on vmalloc() if that fails. Others | 181 | * and fall back on vmalloc() if that fails. Others |
182 | * just put it in the vmalloc space. | 182 | * just put it in the vmalloc space. |
183 | */ | 183 | */ |
@@ -323,14 +323,14 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, | |||
323 | 323 | ||
324 | BUG_ON(size & ~PAGE_MASK); | 324 | BUG_ON(size & ~PAGE_MASK); |
325 | 325 | ||
326 | addr = ALIGN(vstart, align); | ||
327 | |||
328 | va = kmalloc_node(sizeof(struct vmap_area), | 326 | va = kmalloc_node(sizeof(struct vmap_area), |
329 | gfp_mask & GFP_RECLAIM_MASK, node); | 327 | gfp_mask & GFP_RECLAIM_MASK, node); |
330 | if (unlikely(!va)) | 328 | if (unlikely(!va)) |
331 | return ERR_PTR(-ENOMEM); | 329 | return ERR_PTR(-ENOMEM); |
332 | 330 | ||
333 | retry: | 331 | retry: |
332 | addr = ALIGN(vstart, align); | ||
333 | |||
334 | spin_lock(&vmap_area_lock); | 334 | spin_lock(&vmap_area_lock); |
335 | /* XXX: could have a last_hole cache */ | 335 | /* XXX: could have a last_hole cache */ |
336 | n = vmap_area_root.rb_node; | 336 | n = vmap_area_root.rb_node; |
@@ -361,7 +361,7 @@ retry: | |||
361 | goto found; | 361 | goto found; |
362 | } | 362 | } |
363 | 363 | ||
364 | while (addr + size >= first->va_start && addr + size <= vend) { | 364 | while (addr + size > first->va_start && addr + size <= vend) { |
365 | addr = ALIGN(first->va_end + PAGE_SIZE, align); | 365 | addr = ALIGN(first->va_end + PAGE_SIZE, align); |
366 | 366 | ||
367 | n = rb_next(&first->rb_node); | 367 | n = rb_next(&first->rb_node); |
@@ -521,24 +521,45 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, | |||
521 | } | 521 | } |
522 | 522 | ||
523 | /* | 523 | /* |
524 | * Kick off a purge of the outstanding lazy areas. Don't bother if somebody | ||
525 | * is already purging. | ||
526 | */ | ||
527 | static void try_purge_vmap_area_lazy(void) | ||
528 | { | ||
529 | unsigned long start = ULONG_MAX, end = 0; | ||
530 | |||
531 | __purge_vmap_area_lazy(&start, &end, 0, 0); | ||
532 | } | ||
533 | |||
534 | /* | ||
524 | * Kick off a purge of the outstanding lazy areas. | 535 | * Kick off a purge of the outstanding lazy areas. |
525 | */ | 536 | */ |
526 | static void purge_vmap_area_lazy(void) | 537 | static void purge_vmap_area_lazy(void) |
527 | { | 538 | { |
528 | unsigned long start = ULONG_MAX, end = 0; | 539 | unsigned long start = ULONG_MAX, end = 0; |
529 | 540 | ||
530 | __purge_vmap_area_lazy(&start, &end, 0, 0); | 541 | __purge_vmap_area_lazy(&start, &end, 1, 0); |
531 | } | 542 | } |
532 | 543 | ||
533 | /* | 544 | /* |
534 | * Free and unmap a vmap area | 545 | * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been |
546 | * called for the correct range previously. | ||
535 | */ | 547 | */ |
536 | static void free_unmap_vmap_area(struct vmap_area *va) | 548 | static void free_unmap_vmap_area_noflush(struct vmap_area *va) |
537 | { | 549 | { |
538 | va->flags |= VM_LAZY_FREE; | 550 | va->flags |= VM_LAZY_FREE; |
539 | atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); | 551 | atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); |
540 | if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages())) | 552 | if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages())) |
541 | purge_vmap_area_lazy(); | 553 | try_purge_vmap_area_lazy(); |
554 | } | ||
555 | |||
556 | /* | ||
557 | * Free and unmap a vmap area | ||
558 | */ | ||
559 | static void free_unmap_vmap_area(struct vmap_area *va) | ||
560 | { | ||
561 | flush_cache_vunmap(va->va_start, va->va_end); | ||
562 | free_unmap_vmap_area_noflush(va); | ||
542 | } | 563 | } |
543 | 564 | ||
544 | static struct vmap_area *find_vmap_area(unsigned long addr) | 565 | static struct vmap_area *find_vmap_area(unsigned long addr) |
@@ -591,6 +612,8 @@ static void free_unmap_vmap_area_addr(unsigned long addr) | |||
591 | 612 | ||
592 | #define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE) | 613 | #define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE) |
593 | 614 | ||
615 | static bool vmap_initialized __read_mostly = false; | ||
616 | |||
594 | struct vmap_block_queue { | 617 | struct vmap_block_queue { |
595 | spinlock_t lock; | 618 | spinlock_t lock; |
596 | struct list_head free; | 619 | struct list_head free; |
@@ -720,7 +743,7 @@ static void free_vmap_block(struct vmap_block *vb) | |||
720 | spin_unlock(&vmap_block_tree_lock); | 743 | spin_unlock(&vmap_block_tree_lock); |
721 | BUG_ON(tmp != vb); | 744 | BUG_ON(tmp != vb); |
722 | 745 | ||
723 | free_unmap_vmap_area(vb->va); | 746 | free_unmap_vmap_area_noflush(vb->va); |
724 | call_rcu(&vb->rcu_head, rcu_free_vb); | 747 | call_rcu(&vb->rcu_head, rcu_free_vb); |
725 | } | 748 | } |
726 | 749 | ||
@@ -782,6 +805,9 @@ static void vb_free(const void *addr, unsigned long size) | |||
782 | 805 | ||
783 | BUG_ON(size & ~PAGE_MASK); | 806 | BUG_ON(size & ~PAGE_MASK); |
784 | BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); | 807 | BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); |
808 | |||
809 | flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size); | ||
810 | |||
785 | order = get_order(size); | 811 | order = get_order(size); |
786 | 812 | ||
787 | offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1); | 813 | offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1); |
@@ -827,6 +853,9 @@ void vm_unmap_aliases(void) | |||
827 | int cpu; | 853 | int cpu; |
828 | int flush = 0; | 854 | int flush = 0; |
829 | 855 | ||
856 | if (unlikely(!vmap_initialized)) | ||
857 | return; | ||
858 | |||
830 | for_each_possible_cpu(cpu) { | 859 | for_each_possible_cpu(cpu) { |
831 | struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); | 860 | struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); |
832 | struct vmap_block *vb; | 861 | struct vmap_block *vb; |
@@ -896,7 +925,8 @@ EXPORT_SYMBOL(vm_unmap_ram); | |||
896 | * @count: number of pages | 925 | * @count: number of pages |
897 | * @node: prefer to allocate data structures on this node | 926 | * @node: prefer to allocate data structures on this node |
898 | * @prot: memory protection to use. PAGE_KERNEL for regular RAM | 927 | * @prot: memory protection to use. PAGE_KERNEL for regular RAM |
899 | * @returns: a pointer to the address that has been mapped, or NULL on failure | 928 | * |
929 | * Returns: a pointer to the address that has been mapped, or %NULL on failure | ||
900 | */ | 930 | */ |
901 | void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) | 931 | void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) |
902 | { | 932 | { |
@@ -940,6 +970,8 @@ void __init vmalloc_init(void) | |||
940 | INIT_LIST_HEAD(&vbq->dirty); | 970 | INIT_LIST_HEAD(&vbq->dirty); |
941 | vbq->nr_dirty = 0; | 971 | vbq->nr_dirty = 0; |
942 | } | 972 | } |
973 | |||
974 | vmap_initialized = true; | ||
943 | } | 975 | } |
944 | 976 | ||
945 | void unmap_kernel_range(unsigned long addr, unsigned long size) | 977 | void unmap_kernel_range(unsigned long addr, unsigned long size) |
@@ -1685,7 +1717,7 @@ static int s_show(struct seq_file *m, void *p) | |||
1685 | v->addr, v->addr + v->size, v->size); | 1717 | v->addr, v->addr + v->size, v->size); |
1686 | 1718 | ||
1687 | if (v->caller) { | 1719 | if (v->caller) { |
1688 | char buff[2 * KSYM_NAME_LEN]; | 1720 | char buff[KSYM_SYMBOL_LEN]; |
1689 | 1721 | ||
1690 | seq_putc(m, ' '); | 1722 | seq_putc(m, ' '); |
1691 | sprint_symbol(buff, (unsigned long)v->caller); | 1723 | sprint_symbol(buff, (unsigned long)v->caller); |
@@ -1718,11 +1750,41 @@ static int s_show(struct seq_file *m, void *p) | |||
1718 | return 0; | 1750 | return 0; |
1719 | } | 1751 | } |
1720 | 1752 | ||
1721 | const struct seq_operations vmalloc_op = { | 1753 | static const struct seq_operations vmalloc_op = { |
1722 | .start = s_start, | 1754 | .start = s_start, |
1723 | .next = s_next, | 1755 | .next = s_next, |
1724 | .stop = s_stop, | 1756 | .stop = s_stop, |
1725 | .show = s_show, | 1757 | .show = s_show, |
1726 | }; | 1758 | }; |
1759 | |||
1760 | static int vmalloc_open(struct inode *inode, struct file *file) | ||
1761 | { | ||
1762 | unsigned int *ptr = NULL; | ||
1763 | int ret; | ||
1764 | |||
1765 | if (NUMA_BUILD) | ||
1766 | ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL); | ||
1767 | ret = seq_open(file, &vmalloc_op); | ||
1768 | if (!ret) { | ||
1769 | struct seq_file *m = file->private_data; | ||
1770 | m->private = ptr; | ||
1771 | } else | ||
1772 | kfree(ptr); | ||
1773 | return ret; | ||
1774 | } | ||
1775 | |||
1776 | static const struct file_operations proc_vmalloc_operations = { | ||
1777 | .open = vmalloc_open, | ||
1778 | .read = seq_read, | ||
1779 | .llseek = seq_lseek, | ||
1780 | .release = seq_release_private, | ||
1781 | }; | ||
1782 | |||
1783 | static int __init proc_vmalloc_init(void) | ||
1784 | { | ||
1785 | proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations); | ||
1786 | return 0; | ||
1787 | } | ||
1788 | module_init(proc_vmalloc_init); | ||
1727 | #endif | 1789 | #endif |
1728 | 1790 | ||
diff --git a/mm/vmscan.c b/mm/vmscan.c index 3b5860294bb6..62e7f62fb559 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -623,6 +623,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
623 | * Try to allocate it some swap space here. | 623 | * Try to allocate it some swap space here. |
624 | */ | 624 | */ |
625 | if (PageAnon(page) && !PageSwapCache(page)) { | 625 | if (PageAnon(page) && !PageSwapCache(page)) { |
626 | if (!(sc->gfp_mask & __GFP_IO)) | ||
627 | goto keep_locked; | ||
626 | switch (try_to_munlock(page)) { | 628 | switch (try_to_munlock(page)) { |
627 | case SWAP_FAIL: /* shouldn't happen */ | 629 | case SWAP_FAIL: /* shouldn't happen */ |
628 | case SWAP_AGAIN: | 630 | case SWAP_AGAIN: |
@@ -634,6 +636,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
634 | } | 636 | } |
635 | if (!add_to_swap(page, GFP_ATOMIC)) | 637 | if (!add_to_swap(page, GFP_ATOMIC)) |
636 | goto activate_locked; | 638 | goto activate_locked; |
639 | may_enter_fs = 1; | ||
637 | } | 640 | } |
638 | #endif /* CONFIG_SWAP */ | 641 | #endif /* CONFIG_SWAP */ |
639 | 642 | ||
@@ -1245,6 +1248,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1245 | list_add(&page->lru, &l_inactive); | 1248 | list_add(&page->lru, &l_inactive); |
1246 | } | 1249 | } |
1247 | 1250 | ||
1251 | spin_lock_irq(&zone->lru_lock); | ||
1248 | /* | 1252 | /* |
1249 | * Count referenced pages from currently used mappings as | 1253 | * Count referenced pages from currently used mappings as |
1250 | * rotated, even though they are moved to the inactive list. | 1254 | * rotated, even though they are moved to the inactive list. |
@@ -1260,7 +1264,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1260 | 1264 | ||
1261 | pgmoved = 0; | 1265 | pgmoved = 0; |
1262 | lru = LRU_BASE + file * LRU_FILE; | 1266 | lru = LRU_BASE + file * LRU_FILE; |
1263 | spin_lock_irq(&zone->lru_lock); | ||
1264 | while (!list_empty(&l_inactive)) { | 1267 | while (!list_empty(&l_inactive)) { |
1265 | page = lru_to_page(&l_inactive); | 1268 | page = lru_to_page(&l_inactive); |
1266 | prefetchw_prev_lru_page(page, &l_inactive, flags); | 1269 | prefetchw_prev_lru_page(page, &l_inactive, flags); |
@@ -1386,9 +1389,9 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | |||
1386 | file_prio = 200 - sc->swappiness; | 1389 | file_prio = 200 - sc->swappiness; |
1387 | 1390 | ||
1388 | /* | 1391 | /* |
1389 | * anon recent_rotated[0] | 1392 | * The amount of pressure on anon vs file pages is inversely |
1390 | * %anon = 100 * ----------- / ----------------- * IO cost | 1393 | * proportional to the fraction of recently scanned pages on |
1391 | * anon + file rotate_sum | 1394 | * each list that were recently referenced and in active use. |
1392 | */ | 1395 | */ |
1393 | ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1); | 1396 | ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1); |
1394 | ap /= zone->recent_rotated[0] + 1; | 1397 | ap /= zone->recent_rotated[0] + 1; |
@@ -2368,39 +2371,6 @@ int page_evictable(struct page *page, struct vm_area_struct *vma) | |||
2368 | return 1; | 2371 | return 1; |
2369 | } | 2372 | } |
2370 | 2373 | ||
2371 | static void show_page_path(struct page *page) | ||
2372 | { | ||
2373 | char buf[256]; | ||
2374 | if (page_is_file_cache(page)) { | ||
2375 | struct address_space *mapping = page->mapping; | ||
2376 | struct dentry *dentry; | ||
2377 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
2378 | |||
2379 | spin_lock(&mapping->i_mmap_lock); | ||
2380 | dentry = d_find_alias(mapping->host); | ||
2381 | printk(KERN_INFO "rescued: %s %lu\n", | ||
2382 | dentry_path(dentry, buf, 256), pgoff); | ||
2383 | spin_unlock(&mapping->i_mmap_lock); | ||
2384 | } else { | ||
2385 | #if defined(CONFIG_MM_OWNER) && defined(CONFIG_MMU) | ||
2386 | struct anon_vma *anon_vma; | ||
2387 | struct vm_area_struct *vma; | ||
2388 | |||
2389 | anon_vma = page_lock_anon_vma(page); | ||
2390 | if (!anon_vma) | ||
2391 | return; | ||
2392 | |||
2393 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | ||
2394 | printk(KERN_INFO "rescued: anon %s\n", | ||
2395 | vma->vm_mm->owner->comm); | ||
2396 | break; | ||
2397 | } | ||
2398 | page_unlock_anon_vma(anon_vma); | ||
2399 | #endif | ||
2400 | } | ||
2401 | } | ||
2402 | |||
2403 | |||
2404 | /** | 2374 | /** |
2405 | * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list | 2375 | * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list |
2406 | * @page: page to check evictability and move to appropriate lru list | 2376 | * @page: page to check evictability and move to appropriate lru list |
@@ -2421,8 +2391,6 @@ retry: | |||
2421 | if (page_evictable(page, NULL)) { | 2391 | if (page_evictable(page, NULL)) { |
2422 | enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page); | 2392 | enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page); |
2423 | 2393 | ||
2424 | show_page_path(page); | ||
2425 | |||
2426 | __dec_zone_state(zone, NR_UNEVICTABLE); | 2394 | __dec_zone_state(zone, NR_UNEVICTABLE); |
2427 | list_move(&page->lru, &zone->lru[l].list); | 2395 | list_move(&page->lru, &zone->lru[l].list); |
2428 | __inc_zone_state(zone, NR_INACTIVE_ANON + l); | 2396 | __inc_zone_state(zone, NR_INACTIVE_ANON + l); |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 9343227c5c60..c3ccfda23adc 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -8,7 +8,7 @@ | |||
8 | * Copyright (C) 2006 Silicon Graphics, Inc., | 8 | * Copyright (C) 2006 Silicon Graphics, Inc., |
9 | * Christoph Lameter <christoph@lameter.com> | 9 | * Christoph Lameter <christoph@lameter.com> |
10 | */ | 10 | */ |
11 | 11 | #include <linux/fs.h> | |
12 | #include <linux/mm.h> | 12 | #include <linux/mm.h> |
13 | #include <linux/err.h> | 13 | #include <linux/err.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
@@ -384,7 +384,7 @@ void zone_statistics(struct zone *preferred_zone, struct zone *z) | |||
384 | #endif | 384 | #endif |
385 | 385 | ||
386 | #ifdef CONFIG_PROC_FS | 386 | #ifdef CONFIG_PROC_FS |
387 | 387 | #include <linux/proc_fs.h> | |
388 | #include <linux/seq_file.h> | 388 | #include <linux/seq_file.h> |
389 | 389 | ||
390 | static char * const migratetype_names[MIGRATE_TYPES] = { | 390 | static char * const migratetype_names[MIGRATE_TYPES] = { |
@@ -581,20 +581,44 @@ static int pagetypeinfo_show(struct seq_file *m, void *arg) | |||
581 | return 0; | 581 | return 0; |
582 | } | 582 | } |
583 | 583 | ||
584 | const struct seq_operations fragmentation_op = { | 584 | static const struct seq_operations fragmentation_op = { |
585 | .start = frag_start, | 585 | .start = frag_start, |
586 | .next = frag_next, | 586 | .next = frag_next, |
587 | .stop = frag_stop, | 587 | .stop = frag_stop, |
588 | .show = frag_show, | 588 | .show = frag_show, |
589 | }; | 589 | }; |
590 | 590 | ||
591 | const struct seq_operations pagetypeinfo_op = { | 591 | static int fragmentation_open(struct inode *inode, struct file *file) |
592 | { | ||
593 | return seq_open(file, &fragmentation_op); | ||
594 | } | ||
595 | |||
596 | static const struct file_operations fragmentation_file_operations = { | ||
597 | .open = fragmentation_open, | ||
598 | .read = seq_read, | ||
599 | .llseek = seq_lseek, | ||
600 | .release = seq_release, | ||
601 | }; | ||
602 | |||
603 | static const struct seq_operations pagetypeinfo_op = { | ||
592 | .start = frag_start, | 604 | .start = frag_start, |
593 | .next = frag_next, | 605 | .next = frag_next, |
594 | .stop = frag_stop, | 606 | .stop = frag_stop, |
595 | .show = pagetypeinfo_show, | 607 | .show = pagetypeinfo_show, |
596 | }; | 608 | }; |
597 | 609 | ||
610 | static int pagetypeinfo_open(struct inode *inode, struct file *file) | ||
611 | { | ||
612 | return seq_open(file, &pagetypeinfo_op); | ||
613 | } | ||
614 | |||
615 | static const struct file_operations pagetypeinfo_file_ops = { | ||
616 | .open = pagetypeinfo_open, | ||
617 | .read = seq_read, | ||
618 | .llseek = seq_lseek, | ||
619 | .release = seq_release, | ||
620 | }; | ||
621 | |||
598 | #ifdef CONFIG_ZONE_DMA | 622 | #ifdef CONFIG_ZONE_DMA |
599 | #define TEXT_FOR_DMA(xx) xx "_dma", | 623 | #define TEXT_FOR_DMA(xx) xx "_dma", |
600 | #else | 624 | #else |
@@ -771,7 +795,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg) | |||
771 | return 0; | 795 | return 0; |
772 | } | 796 | } |
773 | 797 | ||
774 | const struct seq_operations zoneinfo_op = { | 798 | static const struct seq_operations zoneinfo_op = { |
775 | .start = frag_start, /* iterate over all zones. The same as in | 799 | .start = frag_start, /* iterate over all zones. The same as in |
776 | * fragmentation. */ | 800 | * fragmentation. */ |
777 | .next = frag_next, | 801 | .next = frag_next, |
@@ -779,6 +803,18 @@ const struct seq_operations zoneinfo_op = { | |||
779 | .show = zoneinfo_show, | 803 | .show = zoneinfo_show, |
780 | }; | 804 | }; |
781 | 805 | ||
806 | static int zoneinfo_open(struct inode *inode, struct file *file) | ||
807 | { | ||
808 | return seq_open(file, &zoneinfo_op); | ||
809 | } | ||
810 | |||
811 | static const struct file_operations proc_zoneinfo_file_operations = { | ||
812 | .open = zoneinfo_open, | ||
813 | .read = seq_read, | ||
814 | .llseek = seq_lseek, | ||
815 | .release = seq_release, | ||
816 | }; | ||
817 | |||
782 | static void *vmstat_start(struct seq_file *m, loff_t *pos) | 818 | static void *vmstat_start(struct seq_file *m, loff_t *pos) |
783 | { | 819 | { |
784 | unsigned long *v; | 820 | unsigned long *v; |
@@ -834,13 +870,24 @@ static void vmstat_stop(struct seq_file *m, void *arg) | |||
834 | m->private = NULL; | 870 | m->private = NULL; |
835 | } | 871 | } |
836 | 872 | ||
837 | const struct seq_operations vmstat_op = { | 873 | static const struct seq_operations vmstat_op = { |
838 | .start = vmstat_start, | 874 | .start = vmstat_start, |
839 | .next = vmstat_next, | 875 | .next = vmstat_next, |
840 | .stop = vmstat_stop, | 876 | .stop = vmstat_stop, |
841 | .show = vmstat_show, | 877 | .show = vmstat_show, |
842 | }; | 878 | }; |
843 | 879 | ||
880 | static int vmstat_open(struct inode *inode, struct file *file) | ||
881 | { | ||
882 | return seq_open(file, &vmstat_op); | ||
883 | } | ||
884 | |||
885 | static const struct file_operations proc_vmstat_file_operations = { | ||
886 | .open = vmstat_open, | ||
887 | .read = seq_read, | ||
888 | .llseek = seq_lseek, | ||
889 | .release = seq_release, | ||
890 | }; | ||
844 | #endif /* CONFIG_PROC_FS */ | 891 | #endif /* CONFIG_PROC_FS */ |
845 | 892 | ||
846 | #ifdef CONFIG_SMP | 893 | #ifdef CONFIG_SMP |
@@ -898,9 +945,11 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, | |||
898 | 945 | ||
899 | static struct notifier_block __cpuinitdata vmstat_notifier = | 946 | static struct notifier_block __cpuinitdata vmstat_notifier = |
900 | { &vmstat_cpuup_callback, NULL, 0 }; | 947 | { &vmstat_cpuup_callback, NULL, 0 }; |
948 | #endif | ||
901 | 949 | ||
902 | static int __init setup_vmstat(void) | 950 | static int __init setup_vmstat(void) |
903 | { | 951 | { |
952 | #ifdef CONFIG_SMP | ||
904 | int cpu; | 953 | int cpu; |
905 | 954 | ||
906 | refresh_zone_stat_thresholds(); | 955 | refresh_zone_stat_thresholds(); |
@@ -908,7 +957,13 @@ static int __init setup_vmstat(void) | |||
908 | 957 | ||
909 | for_each_online_cpu(cpu) | 958 | for_each_online_cpu(cpu) |
910 | start_cpu_timer(cpu); | 959 | start_cpu_timer(cpu); |
960 | #endif | ||
961 | #ifdef CONFIG_PROC_FS | ||
962 | proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); | ||
963 | proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops); | ||
964 | proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations); | ||
965 | proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations); | ||
966 | #endif | ||
911 | return 0; | 967 | return 0; |
912 | } | 968 | } |
913 | module_init(setup_vmstat) | 969 | module_init(setup_vmstat) |
914 | #endif | ||