diff options
author | Hugh Dickins <hugh@veritas.com> | 2005-10-20 11:24:28 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-20 12:02:07 -0400 |
commit | ac9b9c667c2e1194e22ebe0a441ae1c37aaa9b90 (patch) | |
tree | 3903d87d0b56a49ead39c0460b5bc0b86b040775 | |
parent | 93918e9afc76717176e9e114e79cdbb602a45ae8 (diff) |
[PATCH] Fix handling spurious page fault for hugetlb region
This reverts commit 3359b54c8c07338f3a863d1109b42eebccdcf379 and
replaces it with a cleaner version that is purely based on page table
operations, so that the synchronization between inode size and hugetlb
mappings becomes moot.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | include/linux/hugetlb.h | 16 | ||||
-rw-r--r-- | mm/hugetlb.c | 22 | ||||
-rw-r--r-- | mm/memory.c | 14 |
3 files changed, 27 insertions, 25 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 42cb7d70f9ac..d664330d900e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
@@ -25,6 +25,8 @@ int is_hugepage_mem_enough(size_t); | |||
25 | unsigned long hugetlb_total_pages(void); | 25 | unsigned long hugetlb_total_pages(void); |
26 | struct page *alloc_huge_page(void); | 26 | struct page *alloc_huge_page(void); |
27 | void free_huge_page(struct page *); | 27 | void free_huge_page(struct page *); |
28 | int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | ||
29 | unsigned long address, int write_access); | ||
28 | 30 | ||
29 | extern unsigned long max_huge_pages; | 31 | extern unsigned long max_huge_pages; |
30 | extern const unsigned long hugetlb_zero, hugetlb_infinity; | 32 | extern const unsigned long hugetlb_zero, hugetlb_infinity; |
@@ -99,6 +101,7 @@ static inline unsigned long hugetlb_total_pages(void) | |||
99 | do { } while (0) | 101 | do { } while (0) |
100 | #define alloc_huge_page() ({ NULL; }) | 102 | #define alloc_huge_page() ({ NULL; }) |
101 | #define free_huge_page(p) ({ (void)(p); BUG(); }) | 103 | #define free_huge_page(p) ({ (void)(p); BUG(); }) |
104 | #define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; }) | ||
102 | 105 | ||
103 | #ifndef HPAGE_MASK | 106 | #ifndef HPAGE_MASK |
104 | #define HPAGE_MASK 0 /* Keep the compiler happy */ | 107 | #define HPAGE_MASK 0 /* Keep the compiler happy */ |
@@ -155,24 +158,11 @@ static inline void set_file_hugepages(struct file *file) | |||
155 | { | 158 | { |
156 | file->f_op = &hugetlbfs_file_operations; | 159 | file->f_op = &hugetlbfs_file_operations; |
157 | } | 160 | } |
158 | |||
159 | static inline int valid_hugetlb_file_off(struct vm_area_struct *vma, | ||
160 | unsigned long address) | ||
161 | { | ||
162 | struct inode *inode = vma->vm_file->f_dentry->d_inode; | ||
163 | loff_t file_off = address - vma->vm_start; | ||
164 | |||
165 | file_off += (vma->vm_pgoff << PAGE_SHIFT); | ||
166 | |||
167 | return (file_off < inode->i_size); | ||
168 | } | ||
169 | |||
170 | #else /* !CONFIG_HUGETLBFS */ | 161 | #else /* !CONFIG_HUGETLBFS */ |
171 | 162 | ||
172 | #define is_file_hugepages(file) 0 | 163 | #define is_file_hugepages(file) 0 |
173 | #define set_file_hugepages(file) BUG() | 164 | #define set_file_hugepages(file) BUG() |
174 | #define hugetlb_zero_setup(size) ERR_PTR(-ENOSYS) | 165 | #define hugetlb_zero_setup(size) ERR_PTR(-ENOSYS) |
175 | #define valid_hugetlb_file_off(vma, address) 0 | ||
176 | 166 | ||
177 | #endif /* !CONFIG_HUGETLBFS */ | 167 | #endif /* !CONFIG_HUGETLBFS */ |
178 | 168 | ||
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a1b30d45459e..61d380678030 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -394,6 +394,28 @@ out: | |||
394 | return ret; | 394 | return ret; |
395 | } | 395 | } |
396 | 396 | ||
397 | /* | ||
398 | * On ia64 at least, it is possible to receive a hugetlb fault from a | ||
399 | * stale zero entry left in the TLB from earlier hardware prefetching. | ||
400 | * Low-level arch code should already have flushed the stale entry as | ||
401 | * part of its fault handling, but we do need to accept this minor fault | ||
402 | * and return successfully. Whereas the "normal" case is that this is | ||
403 | * an access to a hugetlb page which has been truncated off since mmap. | ||
404 | */ | ||
405 | int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | ||
406 | unsigned long address, int write_access) | ||
407 | { | ||
408 | int ret = VM_FAULT_SIGBUS; | ||
409 | pte_t *pte; | ||
410 | |||
411 | spin_lock(&mm->page_table_lock); | ||
412 | pte = huge_pte_offset(mm, address); | ||
413 | if (pte && !pte_none(*pte)) | ||
414 | ret = VM_FAULT_MINOR; | ||
415 | spin_unlock(&mm->page_table_lock); | ||
416 | return ret; | ||
417 | } | ||
418 | |||
397 | int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | 419 | int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, |
398 | struct page **pages, struct vm_area_struct **vmas, | 420 | struct page **pages, struct vm_area_struct **vmas, |
399 | unsigned long *position, int *length, int i) | 421 | unsigned long *position, int *length, int i) |
diff --git a/mm/memory.c b/mm/memory.c index 8c88b973abc5..1db40e935e55 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -2045,18 +2045,8 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, | |||
2045 | 2045 | ||
2046 | inc_page_state(pgfault); | 2046 | inc_page_state(pgfault); |
2047 | 2047 | ||
2048 | if (unlikely(is_vm_hugetlb_page(vma))) { | 2048 | if (unlikely(is_vm_hugetlb_page(vma))) |
2049 | if (valid_hugetlb_file_off(vma, address)) | 2049 | return hugetlb_fault(mm, vma, address, write_access); |
2050 | /* We get here only if there was a stale(zero) TLB entry | ||
2051 | * (because of HW prefetching). | ||
2052 | * Low-level arch code (if needed) should have already | ||
2053 | * purged the stale entry as part of this fault handling. | ||
2054 | * Here we just return. | ||
2055 | */ | ||
2056 | return VM_FAULT_MINOR; | ||
2057 | else | ||
2058 | return VM_FAULT_SIGBUS; /* mapping truncation does this. */ | ||
2059 | } | ||
2060 | 2050 | ||
2061 | /* | 2051 | /* |
2062 | * We need the page table lock to synchronize with kswapd | 2052 | * We need the page table lock to synchronize with kswapd |