aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/bootmem.c31
-rw-r--r--mm/hugetlb.c57
-rw-r--r--mm/memory.c4
-rw-r--r--mm/vmscan.c13
4 files changed, 75 insertions, 30 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c
index c1330cc19783..a58699b6579e 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -154,10 +154,10 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
154 */ 154 */
155static void * __init 155static void * __init
156__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, 156__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
157 unsigned long align, unsigned long goal) 157 unsigned long align, unsigned long goal, unsigned long limit)
158{ 158{
159 unsigned long offset, remaining_size, areasize, preferred; 159 unsigned long offset, remaining_size, areasize, preferred;
160 unsigned long i, start = 0, incr, eidx; 160 unsigned long i, start = 0, incr, eidx, end_pfn = bdata->node_low_pfn;
161 void *ret; 161 void *ret;
162 162
163 if(!size) { 163 if(!size) {
@@ -166,7 +166,14 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
166 } 166 }
167 BUG_ON(align & (align-1)); 167 BUG_ON(align & (align-1));
168 168
169 eidx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); 169 if (limit && bdata->node_boot_start >= limit)
170 return NULL;
171
172 limit >>=PAGE_SHIFT;
173 if (limit && end_pfn > limit)
174 end_pfn = limit;
175
176 eidx = end_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
170 offset = 0; 177 offset = 0;
171 if (align && 178 if (align &&
172 (bdata->node_boot_start & (align - 1UL)) != 0) 179 (bdata->node_boot_start & (align - 1UL)) != 0)
@@ -178,11 +185,12 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
178 * first, then we try to allocate lower pages. 185 * first, then we try to allocate lower pages.
179 */ 186 */
180 if (goal && (goal >= bdata->node_boot_start) && 187 if (goal && (goal >= bdata->node_boot_start) &&
181 ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) { 188 ((goal >> PAGE_SHIFT) < end_pfn)) {
182 preferred = goal - bdata->node_boot_start; 189 preferred = goal - bdata->node_boot_start;
183 190
184 if (bdata->last_success >= preferred) 191 if (bdata->last_success >= preferred)
185 preferred = bdata->last_success; 192 if (!limit || (limit && limit > bdata->last_success))
193 preferred = bdata->last_success;
186 } else 194 } else
187 preferred = 0; 195 preferred = 0;
188 196
@@ -382,14 +390,15 @@ unsigned long __init free_all_bootmem (void)
382 return(free_all_bootmem_core(NODE_DATA(0))); 390 return(free_all_bootmem_core(NODE_DATA(0)));
383} 391}
384 392
385void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal) 393void * __init __alloc_bootmem_limit (unsigned long size, unsigned long align, unsigned long goal,
394 unsigned long limit)
386{ 395{
387 pg_data_t *pgdat = pgdat_list; 396 pg_data_t *pgdat = pgdat_list;
388 void *ptr; 397 void *ptr;
389 398
390 for_each_pgdat(pgdat) 399 for_each_pgdat(pgdat)
391 if ((ptr = __alloc_bootmem_core(pgdat->bdata, size, 400 if ((ptr = __alloc_bootmem_core(pgdat->bdata, size,
392 align, goal))) 401 align, goal, limit)))
393 return(ptr); 402 return(ptr);
394 403
395 /* 404 /*
@@ -400,14 +409,16 @@ void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned
400 return NULL; 409 return NULL;
401} 410}
402 411
403void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) 412
413void * __init __alloc_bootmem_node_limit (pg_data_t *pgdat, unsigned long size, unsigned long align,
414 unsigned long goal, unsigned long limit)
404{ 415{
405 void *ptr; 416 void *ptr;
406 417
407 ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal); 418 ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, limit);
408 if (ptr) 419 if (ptr)
409 return (ptr); 420 return (ptr);
410 421
411 return __alloc_bootmem(size, align, goal); 422 return __alloc_bootmem_limit(size, align, goal, limit);
412} 423}
413 424
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 901ac523a1c3..61d380678030 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -274,21 +274,22 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
274{ 274{
275 pte_t *src_pte, *dst_pte, entry; 275 pte_t *src_pte, *dst_pte, entry;
276 struct page *ptepage; 276 struct page *ptepage;
277 unsigned long addr = vma->vm_start; 277 unsigned long addr;
278 unsigned long end = vma->vm_end;
279 278
280 while (addr < end) { 279 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
281 dst_pte = huge_pte_alloc(dst, addr); 280 dst_pte = huge_pte_alloc(dst, addr);
282 if (!dst_pte) 281 if (!dst_pte)
283 goto nomem; 282 goto nomem;
283 spin_lock(&src->page_table_lock);
284 src_pte = huge_pte_offset(src, addr); 284 src_pte = huge_pte_offset(src, addr);
285 BUG_ON(!src_pte || pte_none(*src_pte)); /* prefaulted */ 285 if (src_pte && !pte_none(*src_pte)) {
286 entry = *src_pte; 286 entry = *src_pte;
287 ptepage = pte_page(entry); 287 ptepage = pte_page(entry);
288 get_page(ptepage); 288 get_page(ptepage);
289 add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); 289 add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
290 set_huge_pte_at(dst, addr, dst_pte, entry); 290 set_huge_pte_at(dst, addr, dst_pte, entry);
291 addr += HPAGE_SIZE; 291 }
292 spin_unlock(&src->page_table_lock);
292 } 293 }
293 return 0; 294 return 0;
294 295
@@ -323,8 +324,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
323 324
324 page = pte_page(pte); 325 page = pte_page(pte);
325 put_page(page); 326 put_page(page);
327 add_mm_counter(mm, rss, - (HPAGE_SIZE / PAGE_SIZE));
326 } 328 }
327 add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT));
328 flush_tlb_range(vma, start, end); 329 flush_tlb_range(vma, start, end);
329} 330}
330 331
@@ -393,6 +394,28 @@ out:
393 return ret; 394 return ret;
394} 395}
395 396
397/*
398 * On ia64 at least, it is possible to receive a hugetlb fault from a
399 * stale zero entry left in the TLB from earlier hardware prefetching.
400 * Low-level arch code should already have flushed the stale entry as
401 * part of its fault handling, but we do need to accept this minor fault
402 * and return successfully. Whereas the "normal" case is that this is
403 * an access to a hugetlb page which has been truncated off since mmap.
404 */
405int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
406 unsigned long address, int write_access)
407{
408 int ret = VM_FAULT_SIGBUS;
409 pte_t *pte;
410
411 spin_lock(&mm->page_table_lock);
412 pte = huge_pte_offset(mm, address);
413 if (pte && !pte_none(*pte))
414 ret = VM_FAULT_MINOR;
415 spin_unlock(&mm->page_table_lock);
416 return ret;
417}
418
396int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, 419int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
397 struct page **pages, struct vm_area_struct **vmas, 420 struct page **pages, struct vm_area_struct **vmas,
398 unsigned long *position, int *length, int i) 421 unsigned long *position, int *length, int i)
@@ -403,6 +426,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
403 BUG_ON(!is_vm_hugetlb_page(vma)); 426 BUG_ON(!is_vm_hugetlb_page(vma));
404 427
405 vpfn = vaddr/PAGE_SIZE; 428 vpfn = vaddr/PAGE_SIZE;
429 spin_lock(&mm->page_table_lock);
406 while (vaddr < vma->vm_end && remainder) { 430 while (vaddr < vma->vm_end && remainder) {
407 431
408 if (pages) { 432 if (pages) {
@@ -415,8 +439,13 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
415 * indexing below to work. */ 439 * indexing below to work. */
416 pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); 440 pte = huge_pte_offset(mm, vaddr & HPAGE_MASK);
417 441
418 /* hugetlb should be locked, and hence, prefaulted */ 442 /* the hugetlb file might have been truncated */
419 WARN_ON(!pte || pte_none(*pte)); 443 if (!pte || pte_none(*pte)) {
444 remainder = 0;
445 if (!i)
446 i = -EFAULT;
447 break;
448 }
420 449
421 page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; 450 page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
422 451
@@ -434,7 +463,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
434 --remainder; 463 --remainder;
435 ++i; 464 ++i;
436 } 465 }
437 466 spin_unlock(&mm->page_table_lock);
438 *length = remainder; 467 *length = remainder;
439 *position = vaddr; 468 *position = vaddr;
440 469
diff --git a/mm/memory.c b/mm/memory.c
index ae8161f1f459..1db40e935e55 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2045,8 +2045,8 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
2045 2045
2046 inc_page_state(pgfault); 2046 inc_page_state(pgfault);
2047 2047
2048 if (is_vm_hugetlb_page(vma)) 2048 if (unlikely(is_vm_hugetlb_page(vma)))
2049 return VM_FAULT_SIGBUS; /* mapping truncation does this. */ 2049 return hugetlb_fault(mm, vma, address, write_access);
2050 2050
2051 /* 2051 /*
2052 * We need the page table lock to synchronize with kswapd 2052 * We need the page table lock to synchronize with kswapd
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0ea71e887bb6..64f9570cff56 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -511,10 +511,11 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
511 * PageDirty _after_ making sure that the page is freeable and 511 * PageDirty _after_ making sure that the page is freeable and
512 * not in use by anybody. (pagecache + us == 2) 512 * not in use by anybody. (pagecache + us == 2)
513 */ 513 */
514 if (page_count(page) != 2 || PageDirty(page)) { 514 if (unlikely(page_count(page) != 2))
515 write_unlock_irq(&mapping->tree_lock); 515 goto cannot_free;
516 goto keep_locked; 516 smp_rmb();
517 } 517 if (unlikely(PageDirty(page)))
518 goto cannot_free;
518 519
519#ifdef CONFIG_SWAP 520#ifdef CONFIG_SWAP
520 if (PageSwapCache(page)) { 521 if (PageSwapCache(page)) {
@@ -538,6 +539,10 @@ free_it:
538 __pagevec_release_nonlru(&freed_pvec); 539 __pagevec_release_nonlru(&freed_pvec);
539 continue; 540 continue;
540 541
542cannot_free:
543 write_unlock_irq(&mapping->tree_lock);
544 goto keep_locked;
545
541activate_locked: 546activate_locked:
542 SetPageActive(page); 547 SetPageActive(page);
543 pgactivate++; 548 pgactivate++;