diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-11-12 06:39:21 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-11-12 06:39:21 -0500 |
commit | 708b8eae0fd532af73ea8350e6dcc10255ff7376 (patch) | |
tree | f336436934fd79bc91aff7112a9beb10bc4e839f /mm | |
parent | d98d38f2014ab79f28c126ff175d034891f7aefc (diff) | |
parent | f21f237cf55494c3a4209de323281a3b0528da10 (diff) |
Merge branch 'linus' into core/locking
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 242 | ||||
-rw-r--r-- | mm/hugetlb.c | 49 | ||||
-rw-r--r-- | mm/internal.h | 29 | ||||
-rw-r--r-- | mm/mempolicy.c | 18 | ||||
-rw-r--r-- | mm/migrate.c | 2 | ||||
-rw-r--r-- | mm/mmap.c | 3 | ||||
-rw-r--r-- | mm/nommu.c | 3 | ||||
-rw-r--r-- | mm/oom_kill.c | 3 | ||||
-rw-r--r-- | mm/page_alloc.c | 28 | ||||
-rw-r--r-- | mm/page_isolation.c | 5 | ||||
-rw-r--r-- | mm/shmem.c | 8 | ||||
-rw-r--r-- | mm/sparse-vmemmap.c | 2 | ||||
-rw-r--r-- | mm/vmalloc.c | 12 |
13 files changed, 137 insertions, 267 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index ab8553658af3..f3e5f8944d17 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -2029,48 +2029,8 @@ int pagecache_write_begin(struct file *file, struct address_space *mapping, | |||
2029 | { | 2029 | { |
2030 | const struct address_space_operations *aops = mapping->a_ops; | 2030 | const struct address_space_operations *aops = mapping->a_ops; |
2031 | 2031 | ||
2032 | if (aops->write_begin) { | 2032 | return aops->write_begin(file, mapping, pos, len, flags, |
2033 | return aops->write_begin(file, mapping, pos, len, flags, | ||
2034 | pagep, fsdata); | 2033 | pagep, fsdata); |
2035 | } else { | ||
2036 | int ret; | ||
2037 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | ||
2038 | unsigned offset = pos & (PAGE_CACHE_SIZE - 1); | ||
2039 | struct inode *inode = mapping->host; | ||
2040 | struct page *page; | ||
2041 | again: | ||
2042 | page = __grab_cache_page(mapping, index); | ||
2043 | *pagep = page; | ||
2044 | if (!page) | ||
2045 | return -ENOMEM; | ||
2046 | |||
2047 | if (flags & AOP_FLAG_UNINTERRUPTIBLE && !PageUptodate(page)) { | ||
2048 | /* | ||
2049 | * There is no way to resolve a short write situation | ||
2050 | * for a !Uptodate page (except by double copying in | ||
2051 | * the caller done by generic_perform_write_2copy). | ||
2052 | * | ||
2053 | * Instead, we have to bring it uptodate here. | ||
2054 | */ | ||
2055 | ret = aops->readpage(file, page); | ||
2056 | page_cache_release(page); | ||
2057 | if (ret) { | ||
2058 | if (ret == AOP_TRUNCATED_PAGE) | ||
2059 | goto again; | ||
2060 | return ret; | ||
2061 | } | ||
2062 | goto again; | ||
2063 | } | ||
2064 | |||
2065 | ret = aops->prepare_write(file, page, offset, offset+len); | ||
2066 | if (ret) { | ||
2067 | unlock_page(page); | ||
2068 | page_cache_release(page); | ||
2069 | if (pos + len > inode->i_size) | ||
2070 | vmtruncate(inode, inode->i_size); | ||
2071 | } | ||
2072 | return ret; | ||
2073 | } | ||
2074 | } | 2034 | } |
2075 | EXPORT_SYMBOL(pagecache_write_begin); | 2035 | EXPORT_SYMBOL(pagecache_write_begin); |
2076 | 2036 | ||
@@ -2079,32 +2039,9 @@ int pagecache_write_end(struct file *file, struct address_space *mapping, | |||
2079 | struct page *page, void *fsdata) | 2039 | struct page *page, void *fsdata) |
2080 | { | 2040 | { |
2081 | const struct address_space_operations *aops = mapping->a_ops; | 2041 | const struct address_space_operations *aops = mapping->a_ops; |
2082 | int ret; | ||
2083 | |||
2084 | if (aops->write_end) { | ||
2085 | mark_page_accessed(page); | ||
2086 | ret = aops->write_end(file, mapping, pos, len, copied, | ||
2087 | page, fsdata); | ||
2088 | } else { | ||
2089 | unsigned offset = pos & (PAGE_CACHE_SIZE - 1); | ||
2090 | struct inode *inode = mapping->host; | ||
2091 | |||
2092 | flush_dcache_page(page); | ||
2093 | ret = aops->commit_write(file, page, offset, offset+len); | ||
2094 | unlock_page(page); | ||
2095 | mark_page_accessed(page); | ||
2096 | page_cache_release(page); | ||
2097 | |||
2098 | if (ret < 0) { | ||
2099 | if (pos + len > inode->i_size) | ||
2100 | vmtruncate(inode, inode->i_size); | ||
2101 | } else if (ret > 0) | ||
2102 | ret = min_t(size_t, copied, ret); | ||
2103 | else | ||
2104 | ret = copied; | ||
2105 | } | ||
2106 | 2042 | ||
2107 | return ret; | 2043 | mark_page_accessed(page); |
2044 | return aops->write_end(file, mapping, pos, len, copied, page, fsdata); | ||
2108 | } | 2045 | } |
2109 | EXPORT_SYMBOL(pagecache_write_end); | 2046 | EXPORT_SYMBOL(pagecache_write_end); |
2110 | 2047 | ||
@@ -2226,174 +2163,6 @@ repeat: | |||
2226 | } | 2163 | } |
2227 | EXPORT_SYMBOL(__grab_cache_page); | 2164 | EXPORT_SYMBOL(__grab_cache_page); |
2228 | 2165 | ||
2229 | static ssize_t generic_perform_write_2copy(struct file *file, | ||
2230 | struct iov_iter *i, loff_t pos) | ||
2231 | { | ||
2232 | struct address_space *mapping = file->f_mapping; | ||
2233 | const struct address_space_operations *a_ops = mapping->a_ops; | ||
2234 | struct inode *inode = mapping->host; | ||
2235 | long status = 0; | ||
2236 | ssize_t written = 0; | ||
2237 | |||
2238 | do { | ||
2239 | struct page *src_page; | ||
2240 | struct page *page; | ||
2241 | pgoff_t index; /* Pagecache index for current page */ | ||
2242 | unsigned long offset; /* Offset into pagecache page */ | ||
2243 | unsigned long bytes; /* Bytes to write to page */ | ||
2244 | size_t copied; /* Bytes copied from user */ | ||
2245 | |||
2246 | offset = (pos & (PAGE_CACHE_SIZE - 1)); | ||
2247 | index = pos >> PAGE_CACHE_SHIFT; | ||
2248 | bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, | ||
2249 | iov_iter_count(i)); | ||
2250 | |||
2251 | /* | ||
2252 | * a non-NULL src_page indicates that we're doing the | ||
2253 | * copy via get_user_pages and kmap. | ||
2254 | */ | ||
2255 | src_page = NULL; | ||
2256 | |||
2257 | /* | ||
2258 | * Bring in the user page that we will copy from _first_. | ||
2259 | * Otherwise there's a nasty deadlock on copying from the | ||
2260 | * same page as we're writing to, without it being marked | ||
2261 | * up-to-date. | ||
2262 | * | ||
2263 | * Not only is this an optimisation, but it is also required | ||
2264 | * to check that the address is actually valid, when atomic | ||
2265 | * usercopies are used, below. | ||
2266 | */ | ||
2267 | if (unlikely(iov_iter_fault_in_readable(i, bytes))) { | ||
2268 | status = -EFAULT; | ||
2269 | break; | ||
2270 | } | ||
2271 | |||
2272 | page = __grab_cache_page(mapping, index); | ||
2273 | if (!page) { | ||
2274 | status = -ENOMEM; | ||
2275 | break; | ||
2276 | } | ||
2277 | |||
2278 | /* | ||
2279 | * non-uptodate pages cannot cope with short copies, and we | ||
2280 | * cannot take a pagefault with the destination page locked. | ||
2281 | * So pin the source page to copy it. | ||
2282 | */ | ||
2283 | if (!PageUptodate(page) && !segment_eq(get_fs(), KERNEL_DS)) { | ||
2284 | unlock_page(page); | ||
2285 | |||
2286 | src_page = alloc_page(GFP_KERNEL); | ||
2287 | if (!src_page) { | ||
2288 | page_cache_release(page); | ||
2289 | status = -ENOMEM; | ||
2290 | break; | ||
2291 | } | ||
2292 | |||
2293 | /* | ||
2294 | * Cannot get_user_pages with a page locked for the | ||
2295 | * same reason as we can't take a page fault with a | ||
2296 | * page locked (as explained below). | ||
2297 | */ | ||
2298 | copied = iov_iter_copy_from_user(src_page, i, | ||
2299 | offset, bytes); | ||
2300 | if (unlikely(copied == 0)) { | ||
2301 | status = -EFAULT; | ||
2302 | page_cache_release(page); | ||
2303 | page_cache_release(src_page); | ||
2304 | break; | ||
2305 | } | ||
2306 | bytes = copied; | ||
2307 | |||
2308 | lock_page(page); | ||
2309 | /* | ||
2310 | * Can't handle the page going uptodate here, because | ||
2311 | * that means we would use non-atomic usercopies, which | ||
2312 | * zero out the tail of the page, which can cause | ||
2313 | * zeroes to become transiently visible. We could just | ||
2314 | * use a non-zeroing copy, but the APIs aren't too | ||
2315 | * consistent. | ||
2316 | */ | ||
2317 | if (unlikely(!page->mapping || PageUptodate(page))) { | ||
2318 | unlock_page(page); | ||
2319 | page_cache_release(page); | ||
2320 | page_cache_release(src_page); | ||
2321 | continue; | ||
2322 | } | ||
2323 | } | ||
2324 | |||
2325 | status = a_ops->prepare_write(file, page, offset, offset+bytes); | ||
2326 | if (unlikely(status)) | ||
2327 | goto fs_write_aop_error; | ||
2328 | |||
2329 | if (!src_page) { | ||
2330 | /* | ||
2331 | * Must not enter the pagefault handler here, because | ||
2332 | * we hold the page lock, so we might recursively | ||
2333 | * deadlock on the same lock, or get an ABBA deadlock | ||
2334 | * against a different lock, or against the mmap_sem | ||
2335 | * (which nests outside the page lock). So increment | ||
2336 | * preempt count, and use _atomic usercopies. | ||
2337 | * | ||
2338 | * The page is uptodate so we are OK to encounter a | ||
2339 | * short copy: if unmodified parts of the page are | ||
2340 | * marked dirty and written out to disk, it doesn't | ||
2341 | * really matter. | ||
2342 | */ | ||
2343 | pagefault_disable(); | ||
2344 | copied = iov_iter_copy_from_user_atomic(page, i, | ||
2345 | offset, bytes); | ||
2346 | pagefault_enable(); | ||
2347 | } else { | ||
2348 | void *src, *dst; | ||
2349 | src = kmap_atomic(src_page, KM_USER0); | ||
2350 | dst = kmap_atomic(page, KM_USER1); | ||
2351 | memcpy(dst + offset, src + offset, bytes); | ||
2352 | kunmap_atomic(dst, KM_USER1); | ||
2353 | kunmap_atomic(src, KM_USER0); | ||
2354 | copied = bytes; | ||
2355 | } | ||
2356 | flush_dcache_page(page); | ||
2357 | |||
2358 | status = a_ops->commit_write(file, page, offset, offset+bytes); | ||
2359 | if (unlikely(status < 0)) | ||
2360 | goto fs_write_aop_error; | ||
2361 | if (unlikely(status > 0)) /* filesystem did partial write */ | ||
2362 | copied = min_t(size_t, copied, status); | ||
2363 | |||
2364 | unlock_page(page); | ||
2365 | mark_page_accessed(page); | ||
2366 | page_cache_release(page); | ||
2367 | if (src_page) | ||
2368 | page_cache_release(src_page); | ||
2369 | |||
2370 | iov_iter_advance(i, copied); | ||
2371 | pos += copied; | ||
2372 | written += copied; | ||
2373 | |||
2374 | balance_dirty_pages_ratelimited(mapping); | ||
2375 | cond_resched(); | ||
2376 | continue; | ||
2377 | |||
2378 | fs_write_aop_error: | ||
2379 | unlock_page(page); | ||
2380 | page_cache_release(page); | ||
2381 | if (src_page) | ||
2382 | page_cache_release(src_page); | ||
2383 | |||
2384 | /* | ||
2385 | * prepare_write() may have instantiated a few blocks | ||
2386 | * outside i_size. Trim these off again. Don't need | ||
2387 | * i_size_read because we hold i_mutex. | ||
2388 | */ | ||
2389 | if (pos + bytes > inode->i_size) | ||
2390 | vmtruncate(inode, inode->i_size); | ||
2391 | break; | ||
2392 | } while (iov_iter_count(i)); | ||
2393 | |||
2394 | return written ? written : status; | ||
2395 | } | ||
2396 | |||
2397 | static ssize_t generic_perform_write(struct file *file, | 2166 | static ssize_t generic_perform_write(struct file *file, |
2398 | struct iov_iter *i, loff_t pos) | 2167 | struct iov_iter *i, loff_t pos) |
2399 | { | 2168 | { |
@@ -2494,10 +2263,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
2494 | struct iov_iter i; | 2263 | struct iov_iter i; |
2495 | 2264 | ||
2496 | iov_iter_init(&i, iov, nr_segs, count, written); | 2265 | iov_iter_init(&i, iov, nr_segs, count, written); |
2497 | if (a_ops->write_begin) | 2266 | status = generic_perform_write(file, &i, pos); |
2498 | status = generic_perform_write(file, &i, pos); | ||
2499 | else | ||
2500 | status = generic_perform_write_2copy(file, &i, pos); | ||
2501 | 2267 | ||
2502 | if (likely(status >= 0)) { | 2268 | if (likely(status >= 0)) { |
2503 | written += status; | 2269 | written += status; |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 421aee99b84a..d143ab67be44 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -354,11 +354,26 @@ static int vma_has_reserves(struct vm_area_struct *vma) | |||
354 | return 0; | 354 | return 0; |
355 | } | 355 | } |
356 | 356 | ||
357 | static void clear_gigantic_page(struct page *page, | ||
358 | unsigned long addr, unsigned long sz) | ||
359 | { | ||
360 | int i; | ||
361 | struct page *p = page; | ||
362 | |||
363 | might_sleep(); | ||
364 | for (i = 0; i < sz/PAGE_SIZE; i++, p = mem_map_next(p, page, i)) { | ||
365 | cond_resched(); | ||
366 | clear_user_highpage(p, addr + i * PAGE_SIZE); | ||
367 | } | ||
368 | } | ||
357 | static void clear_huge_page(struct page *page, | 369 | static void clear_huge_page(struct page *page, |
358 | unsigned long addr, unsigned long sz) | 370 | unsigned long addr, unsigned long sz) |
359 | { | 371 | { |
360 | int i; | 372 | int i; |
361 | 373 | ||
374 | if (unlikely(sz > MAX_ORDER_NR_PAGES)) | ||
375 | return clear_gigantic_page(page, addr, sz); | ||
376 | |||
362 | might_sleep(); | 377 | might_sleep(); |
363 | for (i = 0; i < sz/PAGE_SIZE; i++) { | 378 | for (i = 0; i < sz/PAGE_SIZE; i++) { |
364 | cond_resched(); | 379 | cond_resched(); |
@@ -366,12 +381,32 @@ static void clear_huge_page(struct page *page, | |||
366 | } | 381 | } |
367 | } | 382 | } |
368 | 383 | ||
384 | static void copy_gigantic_page(struct page *dst, struct page *src, | ||
385 | unsigned long addr, struct vm_area_struct *vma) | ||
386 | { | ||
387 | int i; | ||
388 | struct hstate *h = hstate_vma(vma); | ||
389 | struct page *dst_base = dst; | ||
390 | struct page *src_base = src; | ||
391 | might_sleep(); | ||
392 | for (i = 0; i < pages_per_huge_page(h); ) { | ||
393 | cond_resched(); | ||
394 | copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma); | ||
395 | |||
396 | i++; | ||
397 | dst = mem_map_next(dst, dst_base, i); | ||
398 | src = mem_map_next(src, src_base, i); | ||
399 | } | ||
400 | } | ||
369 | static void copy_huge_page(struct page *dst, struct page *src, | 401 | static void copy_huge_page(struct page *dst, struct page *src, |
370 | unsigned long addr, struct vm_area_struct *vma) | 402 | unsigned long addr, struct vm_area_struct *vma) |
371 | { | 403 | { |
372 | int i; | 404 | int i; |
373 | struct hstate *h = hstate_vma(vma); | 405 | struct hstate *h = hstate_vma(vma); |
374 | 406 | ||
407 | if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) | ||
408 | return copy_gigantic_page(dst, src, addr, vma); | ||
409 | |||
375 | might_sleep(); | 410 | might_sleep(); |
376 | for (i = 0; i < pages_per_huge_page(h); i++) { | 411 | for (i = 0; i < pages_per_huge_page(h); i++) { |
377 | cond_resched(); | 412 | cond_resched(); |
@@ -456,6 +491,8 @@ static void update_and_free_page(struct hstate *h, struct page *page) | |||
456 | { | 491 | { |
457 | int i; | 492 | int i; |
458 | 493 | ||
494 | VM_BUG_ON(h->order >= MAX_ORDER); | ||
495 | |||
459 | h->nr_huge_pages--; | 496 | h->nr_huge_pages--; |
460 | h->nr_huge_pages_node[page_to_nid(page)]--; | 497 | h->nr_huge_pages_node[page_to_nid(page)]--; |
461 | for (i = 0; i < pages_per_huge_page(h); i++) { | 498 | for (i = 0; i < pages_per_huge_page(h); i++) { |
@@ -970,6 +1007,14 @@ found: | |||
970 | return 1; | 1007 | return 1; |
971 | } | 1008 | } |
972 | 1009 | ||
1010 | static void prep_compound_huge_page(struct page *page, int order) | ||
1011 | { | ||
1012 | if (unlikely(order > (MAX_ORDER - 1))) | ||
1013 | prep_compound_gigantic_page(page, order); | ||
1014 | else | ||
1015 | prep_compound_page(page, order); | ||
1016 | } | ||
1017 | |||
973 | /* Put bootmem huge pages into the standard lists after mem_map is up */ | 1018 | /* Put bootmem huge pages into the standard lists after mem_map is up */ |
974 | static void __init gather_bootmem_prealloc(void) | 1019 | static void __init gather_bootmem_prealloc(void) |
975 | { | 1020 | { |
@@ -980,7 +1025,7 @@ static void __init gather_bootmem_prealloc(void) | |||
980 | struct hstate *h = m->hstate; | 1025 | struct hstate *h = m->hstate; |
981 | __ClearPageReserved(page); | 1026 | __ClearPageReserved(page); |
982 | WARN_ON(page_count(page) != 1); | 1027 | WARN_ON(page_count(page) != 1); |
983 | prep_compound_page(page, h->order); | 1028 | prep_compound_huge_page(page, h->order); |
984 | prep_new_huge_page(h, page, page_to_nid(page)); | 1029 | prep_new_huge_page(h, page, page_to_nid(page)); |
985 | } | 1030 | } |
986 | } | 1031 | } |
@@ -2130,7 +2175,7 @@ same_page: | |||
2130 | if (zeropage_ok) | 2175 | if (zeropage_ok) |
2131 | pages[i] = ZERO_PAGE(0); | 2176 | pages[i] = ZERO_PAGE(0); |
2132 | else | 2177 | else |
2133 | pages[i] = page + pfn_offset; | 2178 | pages[i] = mem_map_offset(page, pfn_offset); |
2134 | get_page(pages[i]); | 2179 | get_page(pages[i]); |
2135 | } | 2180 | } |
2136 | 2181 | ||
diff --git a/mm/internal.h b/mm/internal.h index e4e728bdf324..13333bc2eb68 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -17,6 +17,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, | |||
17 | unsigned long floor, unsigned long ceiling); | 17 | unsigned long floor, unsigned long ceiling); |
18 | 18 | ||
19 | extern void prep_compound_page(struct page *page, unsigned long order); | 19 | extern void prep_compound_page(struct page *page, unsigned long order); |
20 | extern void prep_compound_gigantic_page(struct page *page, unsigned long order); | ||
20 | 21 | ||
21 | static inline void set_page_count(struct page *page, int v) | 22 | static inline void set_page_count(struct page *page, int v) |
22 | { | 23 | { |
@@ -176,6 +177,34 @@ static inline void free_page_mlock(struct page *page) { } | |||
176 | #endif /* CONFIG_UNEVICTABLE_LRU */ | 177 | #endif /* CONFIG_UNEVICTABLE_LRU */ |
177 | 178 | ||
178 | /* | 179 | /* |
180 | * Return the mem_map entry representing the 'offset' subpage within | ||
181 | * the maximally aligned gigantic page 'base'. Handle any discontiguity | ||
182 | * in the mem_map at MAX_ORDER_NR_PAGES boundaries. | ||
183 | */ | ||
184 | static inline struct page *mem_map_offset(struct page *base, int offset) | ||
185 | { | ||
186 | if (unlikely(offset >= MAX_ORDER_NR_PAGES)) | ||
187 | return pfn_to_page(page_to_pfn(base) + offset); | ||
188 | return base + offset; | ||
189 | } | ||
190 | |||
191 | /* | ||
192 | * Iterator over all subpages withing the maximally aligned gigantic | ||
193 | * page 'base'. Handle any discontiguity in the mem_map. | ||
194 | */ | ||
195 | static inline struct page *mem_map_next(struct page *iter, | ||
196 | struct page *base, int offset) | ||
197 | { | ||
198 | if (unlikely((offset & (MAX_ORDER_NR_PAGES - 1)) == 0)) { | ||
199 | unsigned long pfn = page_to_pfn(base) + offset; | ||
200 | if (!pfn_valid(pfn)) | ||
201 | return NULL; | ||
202 | return pfn_to_page(pfn); | ||
203 | } | ||
204 | return iter + 1; | ||
205 | } | ||
206 | |||
207 | /* | ||
179 | * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, | 208 | * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, |
180 | * so all functions starting at paging_init should be marked __init | 209 | * so all functions starting at paging_init should be marked __init |
181 | * in those cases. SPARSEMEM, however, allows for memory hotplug, | 210 | * in those cases. SPARSEMEM, however, allows for memory hotplug, |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 36f42573a335..e9493b1c1117 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -489,12 +489,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
489 | int err; | 489 | int err; |
490 | struct vm_area_struct *first, *vma, *prev; | 490 | struct vm_area_struct *first, *vma, *prev; |
491 | 491 | ||
492 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { | ||
493 | |||
494 | err = migrate_prep(); | ||
495 | if (err) | ||
496 | return ERR_PTR(err); | ||
497 | } | ||
498 | 492 | ||
499 | first = find_vma(mm, start); | 493 | first = find_vma(mm, start); |
500 | if (!first) | 494 | if (!first) |
@@ -809,9 +803,13 @@ int do_migrate_pages(struct mm_struct *mm, | |||
809 | const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) | 803 | const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) |
810 | { | 804 | { |
811 | int busy = 0; | 805 | int busy = 0; |
812 | int err = 0; | 806 | int err; |
813 | nodemask_t tmp; | 807 | nodemask_t tmp; |
814 | 808 | ||
809 | err = migrate_prep(); | ||
810 | if (err) | ||
811 | return err; | ||
812 | |||
815 | down_read(&mm->mmap_sem); | 813 | down_read(&mm->mmap_sem); |
816 | 814 | ||
817 | err = migrate_vmas(mm, from_nodes, to_nodes, flags); | 815 | err = migrate_vmas(mm, from_nodes, to_nodes, flags); |
@@ -974,6 +972,12 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
974 | start, start + len, mode, mode_flags, | 972 | start, start + len, mode, mode_flags, |
975 | nmask ? nodes_addr(*nmask)[0] : -1); | 973 | nmask ? nodes_addr(*nmask)[0] : -1); |
976 | 974 | ||
975 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { | ||
976 | |||
977 | err = migrate_prep(); | ||
978 | if (err) | ||
979 | return err; | ||
980 | } | ||
977 | down_write(&mm->mmap_sem); | 981 | down_write(&mm->mmap_sem); |
978 | vma = check_range(mm, start, end, nmask, | 982 | vma = check_range(mm, start, end, nmask, |
979 | flags | MPOL_MF_INVERT, &pagelist); | 983 | flags | MPOL_MF_INVERT, &pagelist); |
diff --git a/mm/migrate.c b/mm/migrate.c index 6602941bfab0..385db89f0c33 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -841,12 +841,12 @@ static int do_move_page_to_node_array(struct mm_struct *mm, | |||
841 | struct page_to_node *pp; | 841 | struct page_to_node *pp; |
842 | LIST_HEAD(pagelist); | 842 | LIST_HEAD(pagelist); |
843 | 843 | ||
844 | migrate_prep(); | ||
844 | down_read(&mm->mmap_sem); | 845 | down_read(&mm->mmap_sem); |
845 | 846 | ||
846 | /* | 847 | /* |
847 | * Build a list of pages to migrate | 848 | * Build a list of pages to migrate |
848 | */ | 849 | */ |
849 | migrate_prep(); | ||
850 | for (pp = pm; pp->node != MAX_NUMNODES; pp++) { | 850 | for (pp = pm; pp->node != MAX_NUMNODES; pp++) { |
851 | struct vm_area_struct *vma; | 851 | struct vm_area_struct *vma; |
852 | struct page *page; | 852 | struct page *page; |
@@ -175,7 +175,8 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
175 | 175 | ||
176 | /* Don't let a single process grow too big: | 176 | /* Don't let a single process grow too big: |
177 | leave 3% of the size of this process for other processes */ | 177 | leave 3% of the size of this process for other processes */ |
178 | allowed -= mm->total_vm / 32; | 178 | if (mm) |
179 | allowed -= mm->total_vm / 32; | ||
179 | 180 | ||
180 | /* | 181 | /* |
181 | * cast `allowed' as a signed long because vm_committed_space | 182 | * cast `allowed' as a signed long because vm_committed_space |
diff --git a/mm/nommu.c b/mm/nommu.c index 2696b24f2bb3..7695dc850785 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -1454,7 +1454,8 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
1454 | 1454 | ||
1455 | /* Don't let a single process grow too big: | 1455 | /* Don't let a single process grow too big: |
1456 | leave 3% of the size of this process for other processes */ | 1456 | leave 3% of the size of this process for other processes */ |
1457 | allowed -= current->mm->total_vm / 32; | 1457 | if (mm) |
1458 | allowed -= mm->total_vm / 32; | ||
1458 | 1459 | ||
1459 | /* | 1460 | /* |
1460 | * cast `allowed' as a signed long because vm_committed_space | 1461 | * cast `allowed' as a signed long because vm_committed_space |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 64e5b4bcd964..a0a01902f551 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -38,7 +38,6 @@ static DEFINE_SPINLOCK(zone_scan_mutex); | |||
38 | * badness - calculate a numeric value for how bad this task has been | 38 | * badness - calculate a numeric value for how bad this task has been |
39 | * @p: task struct of which task we should calculate | 39 | * @p: task struct of which task we should calculate |
40 | * @uptime: current uptime in seconds | 40 | * @uptime: current uptime in seconds |
41 | * @mem: target memory controller | ||
42 | * | 41 | * |
43 | * The formula used is relatively simple and documented inline in the | 42 | * The formula used is relatively simple and documented inline in the |
44 | * function. The main rationale is that we want to select a good task | 43 | * function. The main rationale is that we want to select a good task |
@@ -295,6 +294,8 @@ static void dump_tasks(const struct mem_cgroup *mem) | |||
295 | continue; | 294 | continue; |
296 | if (mem && !task_in_mem_cgroup(p, mem)) | 295 | if (mem && !task_in_mem_cgroup(p, mem)) |
297 | continue; | 296 | continue; |
297 | if (!thread_group_leader(p)) | ||
298 | continue; | ||
298 | 299 | ||
299 | task_lock(p); | 300 | task_lock(p); |
300 | printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", | 301 | printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d0a240fbb8bf..54069e64e3a8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -263,24 +263,39 @@ void prep_compound_page(struct page *page, unsigned long order) | |||
263 | { | 263 | { |
264 | int i; | 264 | int i; |
265 | int nr_pages = 1 << order; | 265 | int nr_pages = 1 << order; |
266 | |||
267 | set_compound_page_dtor(page, free_compound_page); | ||
268 | set_compound_order(page, order); | ||
269 | __SetPageHead(page); | ||
270 | for (i = 1; i < nr_pages; i++) { | ||
271 | struct page *p = page + i; | ||
272 | |||
273 | __SetPageTail(p); | ||
274 | p->first_page = page; | ||
275 | } | ||
276 | } | ||
277 | |||
278 | #ifdef CONFIG_HUGETLBFS | ||
279 | void prep_compound_gigantic_page(struct page *page, unsigned long order) | ||
280 | { | ||
281 | int i; | ||
282 | int nr_pages = 1 << order; | ||
266 | struct page *p = page + 1; | 283 | struct page *p = page + 1; |
267 | 284 | ||
268 | set_compound_page_dtor(page, free_compound_page); | 285 | set_compound_page_dtor(page, free_compound_page); |
269 | set_compound_order(page, order); | 286 | set_compound_order(page, order); |
270 | __SetPageHead(page); | 287 | __SetPageHead(page); |
271 | for (i = 1; i < nr_pages; i++, p++) { | 288 | for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { |
272 | if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0)) | ||
273 | p = pfn_to_page(page_to_pfn(page) + i); | ||
274 | __SetPageTail(p); | 289 | __SetPageTail(p); |
275 | p->first_page = page; | 290 | p->first_page = page; |
276 | } | 291 | } |
277 | } | 292 | } |
293 | #endif | ||
278 | 294 | ||
279 | static void destroy_compound_page(struct page *page, unsigned long order) | 295 | static void destroy_compound_page(struct page *page, unsigned long order) |
280 | { | 296 | { |
281 | int i; | 297 | int i; |
282 | int nr_pages = 1 << order; | 298 | int nr_pages = 1 << order; |
283 | struct page *p = page + 1; | ||
284 | 299 | ||
285 | if (unlikely(compound_order(page) != order)) | 300 | if (unlikely(compound_order(page) != order)) |
286 | bad_page(page); | 301 | bad_page(page); |
@@ -288,9 +303,8 @@ static void destroy_compound_page(struct page *page, unsigned long order) | |||
288 | if (unlikely(!PageHead(page))) | 303 | if (unlikely(!PageHead(page))) |
289 | bad_page(page); | 304 | bad_page(page); |
290 | __ClearPageHead(page); | 305 | __ClearPageHead(page); |
291 | for (i = 1; i < nr_pages; i++, p++) { | 306 | for (i = 1; i < nr_pages; i++) { |
292 | if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0)) | 307 | struct page *p = page + i; |
293 | p = pfn_to_page(page_to_pfn(page) + i); | ||
294 | 308 | ||
295 | if (unlikely(!PageTail(p) | | 309 | if (unlikely(!PageTail(p) | |
296 | (p->first_page != page))) | 310 | (p->first_page != page))) |
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index b70a7fec1ff6..5e0ffd967452 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c | |||
@@ -130,10 +130,11 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn) | |||
130 | if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE) | 130 | if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE) |
131 | break; | 131 | break; |
132 | } | 132 | } |
133 | if (pfn < end_pfn) | 133 | page = __first_valid_page(start_pfn, end_pfn - start_pfn); |
134 | if ((pfn < end_pfn) || !page) | ||
134 | return -EBUSY; | 135 | return -EBUSY; |
135 | /* Check all pages are free or Marked as ISOLATED */ | 136 | /* Check all pages are free or Marked as ISOLATED */ |
136 | zone = page_zone(pfn_to_page(pfn)); | 137 | zone = page_zone(page); |
137 | spin_lock_irqsave(&zone->lock, flags); | 138 | spin_lock_irqsave(&zone->lock, flags); |
138 | ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn); | 139 | ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn); |
139 | spin_unlock_irqrestore(&zone->lock, flags); | 140 | spin_unlock_irqrestore(&zone->lock, flags); |
diff --git a/mm/shmem.c b/mm/shmem.c index d38d7e61fcd0..0ed075215e5f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -161,8 +161,8 @@ static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) | |||
161 | */ | 161 | */ |
162 | static inline int shmem_acct_size(unsigned long flags, loff_t size) | 162 | static inline int shmem_acct_size(unsigned long flags, loff_t size) |
163 | { | 163 | { |
164 | return (flags & VM_ACCOUNT)? | 164 | return (flags & VM_ACCOUNT) ? |
165 | security_vm_enough_memory(VM_ACCT(size)): 0; | 165 | security_vm_enough_memory_kern(VM_ACCT(size)) : 0; |
166 | } | 166 | } |
167 | 167 | ||
168 | static inline void shmem_unacct_size(unsigned long flags, loff_t size) | 168 | static inline void shmem_unacct_size(unsigned long flags, loff_t size) |
@@ -179,8 +179,8 @@ static inline void shmem_unacct_size(unsigned long flags, loff_t size) | |||
179 | */ | 179 | */ |
180 | static inline int shmem_acct_block(unsigned long flags) | 180 | static inline int shmem_acct_block(unsigned long flags) |
181 | { | 181 | { |
182 | return (flags & VM_ACCOUNT)? | 182 | return (flags & VM_ACCOUNT) ? |
183 | 0: security_vm_enough_memory(VM_ACCT(PAGE_CACHE_SIZE)); | 183 | 0 : security_vm_enough_memory_kern(VM_ACCT(PAGE_CACHE_SIZE)); |
184 | } | 184 | } |
185 | 185 | ||
186 | static inline void shmem_unacct_blocks(unsigned long flags, long pages) | 186 | static inline void shmem_unacct_blocks(unsigned long flags, long pages) |
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index a91b5f8fcaf6..a13ea6401ae7 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c | |||
@@ -64,7 +64,7 @@ void __meminit vmemmap_verify(pte_t *pte, int node, | |||
64 | unsigned long pfn = pte_pfn(*pte); | 64 | unsigned long pfn = pte_pfn(*pte); |
65 | int actual_node = early_pfn_to_nid(pfn); | 65 | int actual_node = early_pfn_to_nid(pfn); |
66 | 66 | ||
67 | if (actual_node != node) | 67 | if (node_distance(actual_node, node) > LOCAL_DISTANCE) |
68 | printk(KERN_WARNING "[%lx-%lx] potential offnode " | 68 | printk(KERN_WARNING "[%lx-%lx] potential offnode " |
69 | "page_structs\n", start, end - 1); | 69 | "page_structs\n", start, end - 1); |
70 | } | 70 | } |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 036536945dd9..ba6b0f5f7fac 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -178,7 +178,7 @@ static int vmap_page_range(unsigned long addr, unsigned long end, | |||
178 | static inline int is_vmalloc_or_module_addr(const void *x) | 178 | static inline int is_vmalloc_or_module_addr(const void *x) |
179 | { | 179 | { |
180 | /* | 180 | /* |
181 | * x86-64 and sparc64 put modules in a special place, | 181 | * ARM, x86-64 and sparc64 put modules in a special place, |
182 | * and fall back on vmalloc() if that fails. Others | 182 | * and fall back on vmalloc() if that fails. Others |
183 | * just put it in the vmalloc space. | 183 | * just put it in the vmalloc space. |
184 | */ | 184 | */ |
@@ -592,6 +592,8 @@ static void free_unmap_vmap_area_addr(unsigned long addr) | |||
592 | 592 | ||
593 | #define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE) | 593 | #define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE) |
594 | 594 | ||
595 | static bool vmap_initialized __read_mostly = false; | ||
596 | |||
595 | struct vmap_block_queue { | 597 | struct vmap_block_queue { |
596 | spinlock_t lock; | 598 | spinlock_t lock; |
597 | struct list_head free; | 599 | struct list_head free; |
@@ -828,6 +830,9 @@ void vm_unmap_aliases(void) | |||
828 | int cpu; | 830 | int cpu; |
829 | int flush = 0; | 831 | int flush = 0; |
830 | 832 | ||
833 | if (unlikely(!vmap_initialized)) | ||
834 | return; | ||
835 | |||
831 | for_each_possible_cpu(cpu) { | 836 | for_each_possible_cpu(cpu) { |
832 | struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); | 837 | struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); |
833 | struct vmap_block *vb; | 838 | struct vmap_block *vb; |
@@ -897,7 +902,8 @@ EXPORT_SYMBOL(vm_unmap_ram); | |||
897 | * @count: number of pages | 902 | * @count: number of pages |
898 | * @node: prefer to allocate data structures on this node | 903 | * @node: prefer to allocate data structures on this node |
899 | * @prot: memory protection to use. PAGE_KERNEL for regular RAM | 904 | * @prot: memory protection to use. PAGE_KERNEL for regular RAM |
900 | * @returns: a pointer to the address that has been mapped, or NULL on failure | 905 | * |
906 | * Returns: a pointer to the address that has been mapped, or %NULL on failure | ||
901 | */ | 907 | */ |
902 | void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) | 908 | void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) |
903 | { | 909 | { |
@@ -941,6 +947,8 @@ void __init vmalloc_init(void) | |||
941 | INIT_LIST_HEAD(&vbq->dirty); | 947 | INIT_LIST_HEAD(&vbq->dirty); |
942 | vbq->nr_dirty = 0; | 948 | vbq->nr_dirty = 0; |
943 | } | 949 | } |
950 | |||
951 | vmap_initialized = true; | ||
944 | } | 952 | } |
945 | 953 | ||
946 | void unmap_kernel_range(unsigned long addr, unsigned long size) | 954 | void unmap_kernel_range(unsigned long addr, unsigned long size) |