diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/bootmem.c | 31 | ||||
-rw-r--r-- | mm/hugetlb.c | 57 | ||||
-rw-r--r-- | mm/memory.c | 4 | ||||
-rw-r--r-- | mm/vmscan.c | 13 |
4 files changed, 75 insertions, 30 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c index c1330cc19783..a58699b6579e 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -154,10 +154,10 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, | |||
154 | */ | 154 | */ |
155 | static void * __init | 155 | static void * __init |
156 | __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, | 156 | __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, |
157 | unsigned long align, unsigned long goal) | 157 | unsigned long align, unsigned long goal, unsigned long limit) |
158 | { | 158 | { |
159 | unsigned long offset, remaining_size, areasize, preferred; | 159 | unsigned long offset, remaining_size, areasize, preferred; |
160 | unsigned long i, start = 0, incr, eidx; | 160 | unsigned long i, start = 0, incr, eidx, end_pfn = bdata->node_low_pfn; |
161 | void *ret; | 161 | void *ret; |
162 | 162 | ||
163 | if(!size) { | 163 | if(!size) { |
@@ -166,7 +166,14 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, | |||
166 | } | 166 | } |
167 | BUG_ON(align & (align-1)); | 167 | BUG_ON(align & (align-1)); |
168 | 168 | ||
169 | eidx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); | 169 | if (limit && bdata->node_boot_start >= limit) |
170 | return NULL; | ||
171 | |||
172 | limit >>=PAGE_SHIFT; | ||
173 | if (limit && end_pfn > limit) | ||
174 | end_pfn = limit; | ||
175 | |||
176 | eidx = end_pfn - (bdata->node_boot_start >> PAGE_SHIFT); | ||
170 | offset = 0; | 177 | offset = 0; |
171 | if (align && | 178 | if (align && |
172 | (bdata->node_boot_start & (align - 1UL)) != 0) | 179 | (bdata->node_boot_start & (align - 1UL)) != 0) |
@@ -178,11 +185,12 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, | |||
178 | * first, then we try to allocate lower pages. | 185 | * first, then we try to allocate lower pages. |
179 | */ | 186 | */ |
180 | if (goal && (goal >= bdata->node_boot_start) && | 187 | if (goal && (goal >= bdata->node_boot_start) && |
181 | ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) { | 188 | ((goal >> PAGE_SHIFT) < end_pfn)) { |
182 | preferred = goal - bdata->node_boot_start; | 189 | preferred = goal - bdata->node_boot_start; |
183 | 190 | ||
184 | if (bdata->last_success >= preferred) | 191 | if (bdata->last_success >= preferred) |
185 | preferred = bdata->last_success; | 192 | if (!limit || (limit && limit > bdata->last_success)) |
193 | preferred = bdata->last_success; | ||
186 | } else | 194 | } else |
187 | preferred = 0; | 195 | preferred = 0; |
188 | 196 | ||
@@ -382,14 +390,15 @@ unsigned long __init free_all_bootmem (void) | |||
382 | return(free_all_bootmem_core(NODE_DATA(0))); | 390 | return(free_all_bootmem_core(NODE_DATA(0))); |
383 | } | 391 | } |
384 | 392 | ||
385 | void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal) | 393 | void * __init __alloc_bootmem_limit (unsigned long size, unsigned long align, unsigned long goal, |
394 | unsigned long limit) | ||
386 | { | 395 | { |
387 | pg_data_t *pgdat = pgdat_list; | 396 | pg_data_t *pgdat = pgdat_list; |
388 | void *ptr; | 397 | void *ptr; |
389 | 398 | ||
390 | for_each_pgdat(pgdat) | 399 | for_each_pgdat(pgdat) |
391 | if ((ptr = __alloc_bootmem_core(pgdat->bdata, size, | 400 | if ((ptr = __alloc_bootmem_core(pgdat->bdata, size, |
392 | align, goal))) | 401 | align, goal, limit))) |
393 | return(ptr); | 402 | return(ptr); |
394 | 403 | ||
395 | /* | 404 | /* |
@@ -400,14 +409,16 @@ void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned | |||
400 | return NULL; | 409 | return NULL; |
401 | } | 410 | } |
402 | 411 | ||
403 | void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) | 412 | |
413 | void * __init __alloc_bootmem_node_limit (pg_data_t *pgdat, unsigned long size, unsigned long align, | ||
414 | unsigned long goal, unsigned long limit) | ||
404 | { | 415 | { |
405 | void *ptr; | 416 | void *ptr; |
406 | 417 | ||
407 | ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal); | 418 | ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, limit); |
408 | if (ptr) | 419 | if (ptr) |
409 | return (ptr); | 420 | return (ptr); |
410 | 421 | ||
411 | return __alloc_bootmem(size, align, goal); | 422 | return __alloc_bootmem_limit(size, align, goal, limit); |
412 | } | 423 | } |
413 | 424 | ||
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 901ac523a1c3..61d380678030 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -274,21 +274,22 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
274 | { | 274 | { |
275 | pte_t *src_pte, *dst_pte, entry; | 275 | pte_t *src_pte, *dst_pte, entry; |
276 | struct page *ptepage; | 276 | struct page *ptepage; |
277 | unsigned long addr = vma->vm_start; | 277 | unsigned long addr; |
278 | unsigned long end = vma->vm_end; | ||
279 | 278 | ||
280 | while (addr < end) { | 279 | for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { |
281 | dst_pte = huge_pte_alloc(dst, addr); | 280 | dst_pte = huge_pte_alloc(dst, addr); |
282 | if (!dst_pte) | 281 | if (!dst_pte) |
283 | goto nomem; | 282 | goto nomem; |
283 | spin_lock(&src->page_table_lock); | ||
284 | src_pte = huge_pte_offset(src, addr); | 284 | src_pte = huge_pte_offset(src, addr); |
285 | BUG_ON(!src_pte || pte_none(*src_pte)); /* prefaulted */ | 285 | if (src_pte && !pte_none(*src_pte)) { |
286 | entry = *src_pte; | 286 | entry = *src_pte; |
287 | ptepage = pte_page(entry); | 287 | ptepage = pte_page(entry); |
288 | get_page(ptepage); | 288 | get_page(ptepage); |
289 | add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); | 289 | add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); |
290 | set_huge_pte_at(dst, addr, dst_pte, entry); | 290 | set_huge_pte_at(dst, addr, dst_pte, entry); |
291 | addr += HPAGE_SIZE; | 291 | } |
292 | spin_unlock(&src->page_table_lock); | ||
292 | } | 293 | } |
293 | return 0; | 294 | return 0; |
294 | 295 | ||
@@ -323,8 +324,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
323 | 324 | ||
324 | page = pte_page(pte); | 325 | page = pte_page(pte); |
325 | put_page(page); | 326 | put_page(page); |
327 | add_mm_counter(mm, rss, - (HPAGE_SIZE / PAGE_SIZE)); | ||
326 | } | 328 | } |
327 | add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT)); | ||
328 | flush_tlb_range(vma, start, end); | 329 | flush_tlb_range(vma, start, end); |
329 | } | 330 | } |
330 | 331 | ||
@@ -393,6 +394,28 @@ out: | |||
393 | return ret; | 394 | return ret; |
394 | } | 395 | } |
395 | 396 | ||
397 | /* | ||
398 | * On ia64 at least, it is possible to receive a hugetlb fault from a | ||
399 | * stale zero entry left in the TLB from earlier hardware prefetching. | ||
400 | * Low-level arch code should already have flushed the stale entry as | ||
401 | * part of its fault handling, but we do need to accept this minor fault | ||
402 | * and return successfully. Whereas the "normal" case is that this is | ||
403 | * an access to a hugetlb page which has been truncated off since mmap. | ||
404 | */ | ||
405 | int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | ||
406 | unsigned long address, int write_access) | ||
407 | { | ||
408 | int ret = VM_FAULT_SIGBUS; | ||
409 | pte_t *pte; | ||
410 | |||
411 | spin_lock(&mm->page_table_lock); | ||
412 | pte = huge_pte_offset(mm, address); | ||
413 | if (pte && !pte_none(*pte)) | ||
414 | ret = VM_FAULT_MINOR; | ||
415 | spin_unlock(&mm->page_table_lock); | ||
416 | return ret; | ||
417 | } | ||
418 | |||
396 | int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | 419 | int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, |
397 | struct page **pages, struct vm_area_struct **vmas, | 420 | struct page **pages, struct vm_area_struct **vmas, |
398 | unsigned long *position, int *length, int i) | 421 | unsigned long *position, int *length, int i) |
@@ -403,6 +426,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
403 | BUG_ON(!is_vm_hugetlb_page(vma)); | 426 | BUG_ON(!is_vm_hugetlb_page(vma)); |
404 | 427 | ||
405 | vpfn = vaddr/PAGE_SIZE; | 428 | vpfn = vaddr/PAGE_SIZE; |
429 | spin_lock(&mm->page_table_lock); | ||
406 | while (vaddr < vma->vm_end && remainder) { | 430 | while (vaddr < vma->vm_end && remainder) { |
407 | 431 | ||
408 | if (pages) { | 432 | if (pages) { |
@@ -415,8 +439,13 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
415 | * indexing below to work. */ | 439 | * indexing below to work. */ |
416 | pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); | 440 | pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); |
417 | 441 | ||
418 | /* hugetlb should be locked, and hence, prefaulted */ | 442 | /* the hugetlb file might have been truncated */ |
419 | WARN_ON(!pte || pte_none(*pte)); | 443 | if (!pte || pte_none(*pte)) { |
444 | remainder = 0; | ||
445 | if (!i) | ||
446 | i = -EFAULT; | ||
447 | break; | ||
448 | } | ||
420 | 449 | ||
421 | page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; | 450 | page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; |
422 | 451 | ||
@@ -434,7 +463,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
434 | --remainder; | 463 | --remainder; |
435 | ++i; | 464 | ++i; |
436 | } | 465 | } |
437 | 466 | spin_unlock(&mm->page_table_lock); | |
438 | *length = remainder; | 467 | *length = remainder; |
439 | *position = vaddr; | 468 | *position = vaddr; |
440 | 469 | ||
diff --git a/mm/memory.c b/mm/memory.c index ae8161f1f459..1db40e935e55 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -2045,8 +2045,8 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, | |||
2045 | 2045 | ||
2046 | inc_page_state(pgfault); | 2046 | inc_page_state(pgfault); |
2047 | 2047 | ||
2048 | if (is_vm_hugetlb_page(vma)) | 2048 | if (unlikely(is_vm_hugetlb_page(vma))) |
2049 | return VM_FAULT_SIGBUS; /* mapping truncation does this. */ | 2049 | return hugetlb_fault(mm, vma, address, write_access); |
2050 | 2050 | ||
2051 | /* | 2051 | /* |
2052 | * We need the page table lock to synchronize with kswapd | 2052 | * We need the page table lock to synchronize with kswapd |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 0ea71e887bb6..64f9570cff56 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -511,10 +511,11 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) | |||
511 | * PageDirty _after_ making sure that the page is freeable and | 511 | * PageDirty _after_ making sure that the page is freeable and |
512 | * not in use by anybody. (pagecache + us == 2) | 512 | * not in use by anybody. (pagecache + us == 2) |
513 | */ | 513 | */ |
514 | if (page_count(page) != 2 || PageDirty(page)) { | 514 | if (unlikely(page_count(page) != 2)) |
515 | write_unlock_irq(&mapping->tree_lock); | 515 | goto cannot_free; |
516 | goto keep_locked; | 516 | smp_rmb(); |
517 | } | 517 | if (unlikely(PageDirty(page))) |
518 | goto cannot_free; | ||
518 | 519 | ||
519 | #ifdef CONFIG_SWAP | 520 | #ifdef CONFIG_SWAP |
520 | if (PageSwapCache(page)) { | 521 | if (PageSwapCache(page)) { |
@@ -538,6 +539,10 @@ free_it: | |||
538 | __pagevec_release_nonlru(&freed_pvec); | 539 | __pagevec_release_nonlru(&freed_pvec); |
539 | continue; | 540 | continue; |
540 | 541 | ||
542 | cannot_free: | ||
543 | write_unlock_irq(&mapping->tree_lock); | ||
544 | goto keep_locked; | ||
545 | |||
541 | activate_locked: | 546 | activate_locked: |
542 | SetPageActive(page); | 547 | SetPageActive(page); |
543 | pgactivate++; | 548 | pgactivate++; |