aboutsummaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c180
1 files changed, 95 insertions, 85 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f29b7dc02c39..c9b43360fd33 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -321,10 +321,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
321 321
322 for (address = start; address < end; address += HPAGE_SIZE) { 322 for (address = start; address < end; address += HPAGE_SIZE) {
323 ptep = huge_pte_offset(mm, address); 323 ptep = huge_pte_offset(mm, address);
324 if (! ptep) 324 if (!ptep)
325 /* This can happen on truncate, or if an
326 * mmap() is aborted due to an error before
327 * the prefault */
328 continue; 325 continue;
329 326
330 pte = huge_ptep_get_and_clear(mm, address, ptep); 327 pte = huge_ptep_get_and_clear(mm, address, ptep);
@@ -340,81 +337,92 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
340 flush_tlb_range(vma, start, end); 337 flush_tlb_range(vma, start, end);
341} 338}
342 339
343int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) 340static struct page *find_lock_huge_page(struct address_space *mapping,
341 unsigned long idx)
344{ 342{
345 struct mm_struct *mm = current->mm; 343 struct page *page;
346 unsigned long addr; 344 int err;
347 int ret = 0; 345 struct inode *inode = mapping->host;
348 346 unsigned long size;
349 WARN_ON(!is_vm_hugetlb_page(vma)); 347
350 BUG_ON(vma->vm_start & ~HPAGE_MASK); 348retry:
351 BUG_ON(vma->vm_end & ~HPAGE_MASK); 349 page = find_lock_page(mapping, idx);
352 350 if (page)
353 hugetlb_prefault_arch_hook(mm); 351 goto out;
354 352
355 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { 353 /* Check to make sure the mapping hasn't been truncated */
356 unsigned long idx; 354 size = i_size_read(inode) >> HPAGE_SHIFT;
357 pte_t *pte = huge_pte_alloc(mm, addr); 355 if (idx >= size)
358 struct page *page; 356 goto out;
359 357
360 if (!pte) { 358 if (hugetlb_get_quota(mapping))
361 ret = -ENOMEM; 359 goto out;
362 goto out; 360 page = alloc_huge_page();
363 } 361 if (!page) {
362 hugetlb_put_quota(mapping);
363 goto out;
364 }
364 365
365 idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) 366 err = add_to_page_cache(page, mapping, idx, GFP_KERNEL);
366 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); 367 if (err) {
367 page = find_get_page(mapping, idx); 368 put_page(page);
368 if (!page) { 369 hugetlb_put_quota(mapping);
369 /* charge the fs quota first */ 370 if (err == -EEXIST)
370 if (hugetlb_get_quota(mapping)) { 371 goto retry;
371 ret = -ENOMEM; 372 page = NULL;
372 goto out;
373 }
374 page = alloc_huge_page();
375 if (!page) {
376 hugetlb_put_quota(mapping);
377 ret = -ENOMEM;
378 goto out;
379 }
380 ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
381 if (! ret) {
382 unlock_page(page);
383 } else {
384 hugetlb_put_quota(mapping);
385 free_huge_page(page);
386 goto out;
387 }
388 }
389 spin_lock(&mm->page_table_lock);
390 add_mm_counter(mm, file_rss, HPAGE_SIZE / PAGE_SIZE);
391 set_huge_pte_at(mm, addr, pte, make_huge_pte(vma, page));
392 spin_unlock(&mm->page_table_lock);
393 } 373 }
394out: 374out:
395 return ret; 375 return page;
396} 376}
397 377
398/*
399 * On ia64 at least, it is possible to receive a hugetlb fault from a
400 * stale zero entry left in the TLB from earlier hardware prefetching.
401 * Low-level arch code should already have flushed the stale entry as
402 * part of its fault handling, but we do need to accept this minor fault
403 * and return successfully. Whereas the "normal" case is that this is
404 * an access to a hugetlb page which has been truncated off since mmap.
405 */
406int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, 378int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
407 unsigned long address, int write_access) 379 unsigned long address, int write_access)
408{ 380{
409 int ret = VM_FAULT_SIGBUS; 381 int ret = VM_FAULT_SIGBUS;
382 unsigned long idx;
383 unsigned long size;
410 pte_t *pte; 384 pte_t *pte;
385 struct page *page;
386 struct address_space *mapping;
387
388 pte = huge_pte_alloc(mm, address);
389 if (!pte)
390 goto out;
391
392 mapping = vma->vm_file->f_mapping;
393 idx = ((address - vma->vm_start) >> HPAGE_SHIFT)
394 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
395
396 /*
397 * Use page lock to guard against racing truncation
398 * before we get page_table_lock.
399 */
400 page = find_lock_huge_page(mapping, idx);
401 if (!page)
402 goto out;
411 403
412 spin_lock(&mm->page_table_lock); 404 spin_lock(&mm->page_table_lock);
413 pte = huge_pte_offset(mm, address); 405 size = i_size_read(mapping->host) >> HPAGE_SHIFT;
414 if (pte && !pte_none(*pte)) 406 if (idx >= size)
415 ret = VM_FAULT_MINOR; 407 goto backout;
408
409 ret = VM_FAULT_MINOR;
410 if (!pte_none(*pte))
411 goto backout;
412
413 add_mm_counter(mm, file_rss, HPAGE_SIZE / PAGE_SIZE);
414 set_huge_pte_at(mm, address, pte, make_huge_pte(vma, page));
416 spin_unlock(&mm->page_table_lock); 415 spin_unlock(&mm->page_table_lock);
416 unlock_page(page);
417out:
417 return ret; 418 return ret;
419
420backout:
421 spin_unlock(&mm->page_table_lock);
422 hugetlb_put_quota(mapping);
423 unlock_page(page);
424 put_page(page);
425 goto out;
418} 426}
419 427
420int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, 428int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -424,34 +432,36 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
424 unsigned long vpfn, vaddr = *position; 432 unsigned long vpfn, vaddr = *position;
425 int remainder = *length; 433 int remainder = *length;
426 434
427 BUG_ON(!is_vm_hugetlb_page(vma));
428
429 vpfn = vaddr/PAGE_SIZE; 435 vpfn = vaddr/PAGE_SIZE;
430 spin_lock(&mm->page_table_lock); 436 spin_lock(&mm->page_table_lock);
431 while (vaddr < vma->vm_end && remainder) { 437 while (vaddr < vma->vm_end && remainder) {
438 pte_t *pte;
439 struct page *page;
432 440
433 if (pages) { 441 /*
434 pte_t *pte; 442 * Some archs (sparc64, sh*) have multiple pte_ts to
435 struct page *page; 443 * each hugepage. We have to make * sure we get the
436 444 * first, for the page indexing below to work.
437 /* Some archs (sparc64, sh*) have multiple 445 */
438 * pte_ts to each hugepage. We have to make 446 pte = huge_pte_offset(mm, vaddr & HPAGE_MASK);
439 * sure we get the first, for the page
440 * indexing below to work. */
441 pte = huge_pte_offset(mm, vaddr & HPAGE_MASK);
442
443 /* the hugetlb file might have been truncated */
444 if (!pte || pte_none(*pte)) {
445 remainder = 0;
446 if (!i)
447 i = -EFAULT;
448 break;
449 }
450 447
451 page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; 448 if (!pte || pte_none(*pte)) {
449 int ret;
452 450
453 WARN_ON(!PageCompound(page)); 451 spin_unlock(&mm->page_table_lock);
452 ret = hugetlb_fault(mm, vma, vaddr, 0);
453 spin_lock(&mm->page_table_lock);
454 if (ret == VM_FAULT_MINOR)
455 continue;
454 456
457 remainder = 0;
458 if (!i)
459 i = -EFAULT;
460 break;
461 }
462
463 if (pages) {
464 page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
455 get_page(page); 465 get_page(page);
456 pages[i] = page; 466 pages[i] = page;
457 } 467 }