diff options
author | Anton Altaparmakov <aia21@cantab.net> | 2005-12-05 10:48:41 -0500 |
---|---|---|
committer | Anton Altaparmakov <aia21@cantab.net> | 2005-12-05 10:48:41 -0500 |
commit | 292d4ed32e35df4755052b5002e533348d1648fd (patch) | |
tree | 8522e6bab962696bd25a6c02fb068c674a09b7ee /mm/memory.c | |
parent | 3c6af7fa787f21f8873a050568ed892312899eb5 (diff) | |
parent | e4f5c82a92c2a546a16af1614114eec19120e40a (diff) |
Merge branch 'master' of /usr/src/ntfs-2.6/
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 341 |
1 files changed, 249 insertions, 92 deletions
diff --git a/mm/memory.c b/mm/memory.c index d1f46f4e4c8a..aa8af0e20269 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -333,9 +333,9 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss) | |||
333 | } | 333 | } |
334 | 334 | ||
335 | /* | 335 | /* |
336 | * This function is called to print an error when a pte in a | 336 | * This function is called to print an error when a bad pte |
337 | * !VM_UNPAGED region is found pointing to an invalid pfn (which | 337 | * is found. For example, we might have a PFN-mapped pte in |
338 | * is an error. | 338 | * a region that doesn't allow it. |
339 | * | 339 | * |
340 | * The calling function must still handle the error. | 340 | * The calling function must still handle the error. |
341 | */ | 341 | */ |
@@ -350,19 +350,56 @@ void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr) | |||
350 | } | 350 | } |
351 | 351 | ||
352 | /* | 352 | /* |
353 | * page_is_anon applies strict checks for an anonymous page belonging to | 353 | * This function gets the "struct page" associated with a pte. |
354 | * this vma at this address. It is used on VM_UNPAGED vmas, which are | 354 | * |
355 | * usually populated with shared originals (which must not be counted), | 355 | * NOTE! Some mappings do not have "struct pages". A raw PFN mapping |
356 | * but occasionally contain private COWed copies (when !VM_SHARED, or | 356 | * will have each page table entry just pointing to a raw page frame |
357 | * perhaps via ptrace when VM_SHARED). An mmap of /dev/mem might window | 357 | * number, and as far as the VM layer is concerned, those do not have |
358 | * free pages, pages from other processes, or from other parts of this: | 358 | * pages associated with them - even if the PFN might point to memory |
359 | * it's tricky, but try not to be deceived by foreign anonymous pages. | 359 | * that otherwise is perfectly fine and has a "struct page". |
360 | * | ||
361 | * The way we recognize those mappings is through the rules set up | ||
362 | * by "remap_pfn_range()": the vma will have the VM_PFNMAP bit set, | ||
363 | * and the vm_pgoff will point to the first PFN mapped: thus every | ||
364 | * page that is a raw mapping will always honor the rule | ||
365 | * | ||
366 | * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT) | ||
367 | * | ||
368 | * and if that isn't true, the page has been COW'ed (in which case it | ||
369 | * _does_ have a "struct page" associated with it even if it is in a | ||
370 | * VM_PFNMAP range). | ||
360 | */ | 371 | */ |
361 | static inline int page_is_anon(struct page *page, | 372 | struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte) |
362 | struct vm_area_struct *vma, unsigned long addr) | ||
363 | { | 373 | { |
364 | return page && PageAnon(page) && page_mapped(page) && | 374 | unsigned long pfn = pte_pfn(pte); |
365 | page_address_in_vma(page, vma) == addr; | 375 | |
376 | if (vma->vm_flags & VM_PFNMAP) { | ||
377 | unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT; | ||
378 | if (pfn == vma->vm_pgoff + off) | ||
379 | return NULL; | ||
380 | } | ||
381 | |||
382 | /* | ||
383 | * Add some anal sanity checks for now. Eventually, | ||
384 | * we should just do "return pfn_to_page(pfn)", but | ||
385 | * in the meantime we check that we get a valid pfn, | ||
386 | * and that the resulting page looks ok. | ||
387 | * | ||
388 | * Remove this test eventually! | ||
389 | */ | ||
390 | if (unlikely(!pfn_valid(pfn))) { | ||
391 | print_bad_pte(vma, pte, addr); | ||
392 | return NULL; | ||
393 | } | ||
394 | |||
395 | /* | ||
396 | * NOTE! We still have PageReserved() pages in the page | ||
397 | * tables. | ||
398 | * | ||
399 | * The PAGE_ZERO() pages and various VDSO mappings can | ||
400 | * cause them to exist. | ||
401 | */ | ||
402 | return pfn_to_page(pfn); | ||
366 | } | 403 | } |
367 | 404 | ||
368 | /* | 405 | /* |
@@ -379,7 +416,6 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
379 | unsigned long vm_flags = vma->vm_flags; | 416 | unsigned long vm_flags = vma->vm_flags; |
380 | pte_t pte = *src_pte; | 417 | pte_t pte = *src_pte; |
381 | struct page *page; | 418 | struct page *page; |
382 | unsigned long pfn; | ||
383 | 419 | ||
384 | /* pte contains position in swap or file, so copy. */ | 420 | /* pte contains position in swap or file, so copy. */ |
385 | if (unlikely(!pte_present(pte))) { | 421 | if (unlikely(!pte_present(pte))) { |
@@ -397,22 +433,6 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
397 | goto out_set_pte; | 433 | goto out_set_pte; |
398 | } | 434 | } |
399 | 435 | ||
400 | pfn = pte_pfn(pte); | ||
401 | page = pfn_valid(pfn)? pfn_to_page(pfn): NULL; | ||
402 | |||
403 | if (unlikely(vm_flags & VM_UNPAGED)) | ||
404 | if (!page_is_anon(page, vma, addr)) | ||
405 | goto out_set_pte; | ||
406 | |||
407 | /* | ||
408 | * If the pte points outside of valid memory but | ||
409 | * the region is not VM_UNPAGED, we have a problem. | ||
410 | */ | ||
411 | if (unlikely(!page)) { | ||
412 | print_bad_pte(vma, pte, addr); | ||
413 | goto out_set_pte; /* try to do something sane */ | ||
414 | } | ||
415 | |||
416 | /* | 436 | /* |
417 | * If it's a COW mapping, write protect it both | 437 | * If it's a COW mapping, write protect it both |
418 | * in the parent and the child | 438 | * in the parent and the child |
@@ -429,9 +449,13 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
429 | if (vm_flags & VM_SHARED) | 449 | if (vm_flags & VM_SHARED) |
430 | pte = pte_mkclean(pte); | 450 | pte = pte_mkclean(pte); |
431 | pte = pte_mkold(pte); | 451 | pte = pte_mkold(pte); |
432 | get_page(page); | 452 | |
433 | page_dup_rmap(page); | 453 | page = vm_normal_page(vma, addr, pte); |
434 | rss[!!PageAnon(page)]++; | 454 | if (page) { |
455 | get_page(page); | ||
456 | page_dup_rmap(page); | ||
457 | rss[!!PageAnon(page)]++; | ||
458 | } | ||
435 | 459 | ||
436 | out_set_pte: | 460 | out_set_pte: |
437 | set_pte_at(dst_mm, addr, dst_pte, pte); | 461 | set_pte_at(dst_mm, addr, dst_pte, pte); |
@@ -543,7 +567,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
543 | * readonly mappings. The tradeoff is that copy_page_range is more | 567 | * readonly mappings. The tradeoff is that copy_page_range is more |
544 | * efficient than faulting. | 568 | * efficient than faulting. |
545 | */ | 569 | */ |
546 | if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_UNPAGED))) { | 570 | if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP))) { |
547 | if (!vma->anon_vma) | 571 | if (!vma->anon_vma) |
548 | return 0; | 572 | return 0; |
549 | } | 573 | } |
@@ -584,19 +608,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, | |||
584 | } | 608 | } |
585 | if (pte_present(ptent)) { | 609 | if (pte_present(ptent)) { |
586 | struct page *page; | 610 | struct page *page; |
587 | unsigned long pfn; | ||
588 | 611 | ||
589 | (*zap_work) -= PAGE_SIZE; | 612 | (*zap_work) -= PAGE_SIZE; |
590 | 613 | ||
591 | pfn = pte_pfn(ptent); | 614 | page = vm_normal_page(vma, addr, ptent); |
592 | page = pfn_valid(pfn)? pfn_to_page(pfn): NULL; | ||
593 | |||
594 | if (unlikely(vma->vm_flags & VM_UNPAGED)) { | ||
595 | if (!page_is_anon(page, vma, addr)) | ||
596 | page = NULL; | ||
597 | } else if (unlikely(!page)) | ||
598 | print_bad_pte(vma, ptent, addr); | ||
599 | |||
600 | if (unlikely(details) && page) { | 615 | if (unlikely(details) && page) { |
601 | /* | 616 | /* |
602 | * unmap_shared_mapping_pages() wants to | 617 | * unmap_shared_mapping_pages() wants to |
@@ -852,7 +867,7 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, | |||
852 | /* | 867 | /* |
853 | * Do a quick page-table lookup for a single page. | 868 | * Do a quick page-table lookup for a single page. |
854 | */ | 869 | */ |
855 | struct page *follow_page(struct mm_struct *mm, unsigned long address, | 870 | struct page *follow_page(struct vm_area_struct *vma, unsigned long address, |
856 | unsigned int flags) | 871 | unsigned int flags) |
857 | { | 872 | { |
858 | pgd_t *pgd; | 873 | pgd_t *pgd; |
@@ -860,8 +875,8 @@ struct page *follow_page(struct mm_struct *mm, unsigned long address, | |||
860 | pmd_t *pmd; | 875 | pmd_t *pmd; |
861 | pte_t *ptep, pte; | 876 | pte_t *ptep, pte; |
862 | spinlock_t *ptl; | 877 | spinlock_t *ptl; |
863 | unsigned long pfn; | ||
864 | struct page *page; | 878 | struct page *page; |
879 | struct mm_struct *mm = vma->vm_mm; | ||
865 | 880 | ||
866 | page = follow_huge_addr(mm, address, flags & FOLL_WRITE); | 881 | page = follow_huge_addr(mm, address, flags & FOLL_WRITE); |
867 | if (!IS_ERR(page)) { | 882 | if (!IS_ERR(page)) { |
@@ -897,11 +912,10 @@ struct page *follow_page(struct mm_struct *mm, unsigned long address, | |||
897 | goto unlock; | 912 | goto unlock; |
898 | if ((flags & FOLL_WRITE) && !pte_write(pte)) | 913 | if ((flags & FOLL_WRITE) && !pte_write(pte)) |
899 | goto unlock; | 914 | goto unlock; |
900 | pfn = pte_pfn(pte); | 915 | page = vm_normal_page(vma, address, pte); |
901 | if (!pfn_valid(pfn)) | 916 | if (unlikely(!page)) |
902 | goto unlock; | 917 | goto unlock; |
903 | 918 | ||
904 | page = pfn_to_page(pfn); | ||
905 | if (flags & FOLL_GET) | 919 | if (flags & FOLL_GET) |
906 | get_page(page); | 920 | get_page(page); |
907 | if (flags & FOLL_TOUCH) { | 921 | if (flags & FOLL_TOUCH) { |
@@ -974,8 +988,10 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
974 | return i ? : -EFAULT; | 988 | return i ? : -EFAULT; |
975 | } | 989 | } |
976 | if (pages) { | 990 | if (pages) { |
977 | pages[i] = pte_page(*pte); | 991 | struct page *page = vm_normal_page(gate_vma, start, *pte); |
978 | get_page(pages[i]); | 992 | pages[i] = page; |
993 | if (page) | ||
994 | get_page(page); | ||
979 | } | 995 | } |
980 | pte_unmap(pte); | 996 | pte_unmap(pte); |
981 | if (vmas) | 997 | if (vmas) |
@@ -1010,7 +1026,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1010 | foll_flags |= FOLL_WRITE; | 1026 | foll_flags |= FOLL_WRITE; |
1011 | 1027 | ||
1012 | cond_resched(); | 1028 | cond_resched(); |
1013 | while (!(page = follow_page(mm, start, foll_flags))) { | 1029 | while (!(page = follow_page(vma, start, foll_flags))) { |
1014 | int ret; | 1030 | int ret; |
1015 | ret = __handle_mm_fault(mm, vma, start, | 1031 | ret = __handle_mm_fault(mm, vma, start, |
1016 | foll_flags & FOLL_WRITE); | 1032 | foll_flags & FOLL_WRITE); |
@@ -1130,6 +1146,129 @@ int zeromap_page_range(struct vm_area_struct *vma, | |||
1130 | return err; | 1146 | return err; |
1131 | } | 1147 | } |
1132 | 1148 | ||
1149 | pte_t * fastcall get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl) | ||
1150 | { | ||
1151 | pgd_t * pgd = pgd_offset(mm, addr); | ||
1152 | pud_t * pud = pud_alloc(mm, pgd, addr); | ||
1153 | if (pud) { | ||
1154 | pmd_t * pmd = pmd_alloc(mm, pud, addr); | ||
1155 | if (pmd) | ||
1156 | return pte_alloc_map_lock(mm, pmd, addr, ptl); | ||
1157 | } | ||
1158 | return NULL; | ||
1159 | } | ||
1160 | |||
1161 | /* | ||
1162 | * This is the old fallback for page remapping. | ||
1163 | * | ||
1164 | * For historical reasons, it only allows reserved pages. Only | ||
1165 | * old drivers should use this, and they needed to mark their | ||
1166 | * pages reserved for the old functions anyway. | ||
1167 | */ | ||
1168 | static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *page, pgprot_t prot) | ||
1169 | { | ||
1170 | int retval; | ||
1171 | pte_t *pte; | ||
1172 | spinlock_t *ptl; | ||
1173 | |||
1174 | retval = -EINVAL; | ||
1175 | if (PageAnon(page)) | ||
1176 | goto out; | ||
1177 | retval = -ENOMEM; | ||
1178 | flush_dcache_page(page); | ||
1179 | pte = get_locked_pte(mm, addr, &ptl); | ||
1180 | if (!pte) | ||
1181 | goto out; | ||
1182 | retval = -EBUSY; | ||
1183 | if (!pte_none(*pte)) | ||
1184 | goto out_unlock; | ||
1185 | |||
1186 | /* Ok, finally just insert the thing.. */ | ||
1187 | get_page(page); | ||
1188 | inc_mm_counter(mm, file_rss); | ||
1189 | page_add_file_rmap(page); | ||
1190 | set_pte_at(mm, addr, pte, mk_pte(page, prot)); | ||
1191 | |||
1192 | retval = 0; | ||
1193 | out_unlock: | ||
1194 | pte_unmap_unlock(pte, ptl); | ||
1195 | out: | ||
1196 | return retval; | ||
1197 | } | ||
1198 | |||
1199 | /* | ||
1200 | * This allows drivers to insert individual pages they've allocated | ||
1201 | * into a user vma. | ||
1202 | * | ||
1203 | * The page has to be a nice clean _individual_ kernel allocation. | ||
1204 | * If you allocate a compound page, you need to have marked it as | ||
1205 | * such (__GFP_COMP), or manually just split the page up yourself | ||
1206 | * (which is mainly an issue of doing "set_page_count(page, 1)" for | ||
1207 | * each sub-page, and then freeing them one by one when you free | ||
1208 | * them rather than freeing it as a compound page). | ||
1209 | * | ||
1210 | * NOTE! Traditionally this was done with "remap_pfn_range()" which | ||
1211 | * took an arbitrary page protection parameter. This doesn't allow | ||
1212 | * that. Your vma protection will have to be set up correctly, which | ||
1213 | * means that if you want a shared writable mapping, you'd better | ||
1214 | * ask for a shared writable mapping! | ||
1215 | * | ||
1216 | * The page does not need to be reserved. | ||
1217 | */ | ||
1218 | int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *page) | ||
1219 | { | ||
1220 | if (addr < vma->vm_start || addr >= vma->vm_end) | ||
1221 | return -EFAULT; | ||
1222 | if (!page_count(page)) | ||
1223 | return -EINVAL; | ||
1224 | return insert_page(vma->vm_mm, addr, page, vma->vm_page_prot); | ||
1225 | } | ||
1226 | EXPORT_SYMBOL(vm_insert_page); | ||
1227 | |||
1228 | /* | ||
1229 | * Somebody does a pfn remapping that doesn't actually work as a vma. | ||
1230 | * | ||
1231 | * Do it as individual pages instead, and warn about it. It's bad form, | ||
1232 | * and very inefficient. | ||
1233 | */ | ||
1234 | static int incomplete_pfn_remap(struct vm_area_struct *vma, | ||
1235 | unsigned long start, unsigned long end, | ||
1236 | unsigned long pfn, pgprot_t prot) | ||
1237 | { | ||
1238 | static int warn = 10; | ||
1239 | struct page *page; | ||
1240 | int retval; | ||
1241 | |||
1242 | if (!(vma->vm_flags & VM_INCOMPLETE)) { | ||
1243 | if (warn) { | ||
1244 | warn--; | ||
1245 | printk("%s does an incomplete pfn remapping", current->comm); | ||
1246 | dump_stack(); | ||
1247 | } | ||
1248 | } | ||
1249 | vma->vm_flags |= VM_INCOMPLETE | VM_IO | VM_RESERVED; | ||
1250 | |||
1251 | if (start < vma->vm_start || end > vma->vm_end) | ||
1252 | return -EINVAL; | ||
1253 | |||
1254 | if (!pfn_valid(pfn)) | ||
1255 | return -EINVAL; | ||
1256 | |||
1257 | page = pfn_to_page(pfn); | ||
1258 | if (!PageReserved(page)) | ||
1259 | return -EINVAL; | ||
1260 | |||
1261 | retval = 0; | ||
1262 | while (start < end) { | ||
1263 | retval = insert_page(vma->vm_mm, start, page, prot); | ||
1264 | if (retval < 0) | ||
1265 | break; | ||
1266 | start += PAGE_SIZE; | ||
1267 | page++; | ||
1268 | } | ||
1269 | return retval; | ||
1270 | } | ||
1271 | |||
1133 | /* | 1272 | /* |
1134 | * maps a range of physical memory into the requested pages. the old | 1273 | * maps a range of physical memory into the requested pages. the old |
1135 | * mappings are removed. any references to nonexistent pages results | 1274 | * mappings are removed. any references to nonexistent pages results |
@@ -1204,6 +1343,9 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, | |||
1204 | struct mm_struct *mm = vma->vm_mm; | 1343 | struct mm_struct *mm = vma->vm_mm; |
1205 | int err; | 1344 | int err; |
1206 | 1345 | ||
1346 | if (addr != vma->vm_start || end != vma->vm_end) | ||
1347 | return incomplete_pfn_remap(vma, addr, end, pfn, prot); | ||
1348 | |||
1207 | /* | 1349 | /* |
1208 | * Physically remapped pages are special. Tell the | 1350 | * Physically remapped pages are special. Tell the |
1209 | * rest of the world about it: | 1351 | * rest of the world about it: |
@@ -1214,11 +1356,12 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, | |||
1214 | * in 2.6 the LRU scan won't even find its pages, so this | 1356 | * in 2.6 the LRU scan won't even find its pages, so this |
1215 | * flag means no more than count its pages in reserved_vm, | 1357 | * flag means no more than count its pages in reserved_vm, |
1216 | * and omit it from core dump, even when VM_IO turned off. | 1358 | * and omit it from core dump, even when VM_IO turned off. |
1217 | * VM_UNPAGED tells the core MM not to "manage" these pages | 1359 | * VM_PFNMAP tells the core MM that the base pages are just |
1218 | * (e.g. refcount, mapcount, try to swap them out): in | 1360 | * raw PFN mappings, and do not have a "struct page" associated |
1219 | * particular, zap_pte_range does not try to free them. | 1361 | * with them. |
1220 | */ | 1362 | */ |
1221 | vma->vm_flags |= VM_IO | VM_RESERVED | VM_UNPAGED; | 1363 | vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; |
1364 | vma->vm_pgoff = pfn; | ||
1222 | 1365 | ||
1223 | BUG_ON(addr >= end); | 1366 | BUG_ON(addr >= end); |
1224 | pfn -= addr >> PAGE_SHIFT; | 1367 | pfn -= addr >> PAGE_SHIFT; |
@@ -1273,6 +1416,33 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) | |||
1273 | return pte; | 1416 | return pte; |
1274 | } | 1417 | } |
1275 | 1418 | ||
1419 | static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va) | ||
1420 | { | ||
1421 | /* | ||
1422 | * If the source page was a PFN mapping, we don't have | ||
1423 | * a "struct page" for it. We do a best-effort copy by | ||
1424 | * just copying from the original user address. If that | ||
1425 | * fails, we just zero-fill it. Live with it. | ||
1426 | */ | ||
1427 | if (unlikely(!src)) { | ||
1428 | void *kaddr = kmap_atomic(dst, KM_USER0); | ||
1429 | void __user *uaddr = (void __user *)(va & PAGE_MASK); | ||
1430 | |||
1431 | /* | ||
1432 | * This really shouldn't fail, because the page is there | ||
1433 | * in the page tables. But it might just be unreadable, | ||
1434 | * in which case we just give up and fill the result with | ||
1435 | * zeroes. | ||
1436 | */ | ||
1437 | if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) | ||
1438 | memset(kaddr, 0, PAGE_SIZE); | ||
1439 | kunmap_atomic(kaddr, KM_USER0); | ||
1440 | return; | ||
1441 | |||
1442 | } | ||
1443 | copy_user_highpage(dst, src, va); | ||
1444 | } | ||
1445 | |||
1276 | /* | 1446 | /* |
1277 | * This routine handles present pages, when users try to write | 1447 | * This routine handles present pages, when users try to write |
1278 | * to a shared page. It is done by copying the page to a new address | 1448 | * to a shared page. It is done by copying the page to a new address |
@@ -1295,35 +1465,19 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1295 | unsigned long address, pte_t *page_table, pmd_t *pmd, | 1465 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
1296 | spinlock_t *ptl, pte_t orig_pte) | 1466 | spinlock_t *ptl, pte_t orig_pte) |
1297 | { | 1467 | { |
1298 | struct page *old_page, *src_page, *new_page; | 1468 | struct page *old_page, *new_page; |
1299 | unsigned long pfn = pte_pfn(orig_pte); | ||
1300 | pte_t entry; | 1469 | pte_t entry; |
1301 | int ret = VM_FAULT_MINOR; | 1470 | int ret = VM_FAULT_MINOR; |
1302 | 1471 | ||
1303 | if (unlikely(!pfn_valid(pfn))) { | 1472 | old_page = vm_normal_page(vma, address, orig_pte); |
1304 | /* | 1473 | if (!old_page) |
1305 | * Page table corrupted: show pte and kill process. | 1474 | goto gotten; |
1306 | * Or it's an attempt to COW an out-of-map VM_UNPAGED | ||
1307 | * entry, which copy_user_highpage does not support. | ||
1308 | */ | ||
1309 | print_bad_pte(vma, orig_pte, address); | ||
1310 | ret = VM_FAULT_OOM; | ||
1311 | goto unlock; | ||
1312 | } | ||
1313 | old_page = pfn_to_page(pfn); | ||
1314 | src_page = old_page; | ||
1315 | |||
1316 | if (unlikely(vma->vm_flags & VM_UNPAGED)) | ||
1317 | if (!page_is_anon(old_page, vma, address)) { | ||
1318 | old_page = NULL; | ||
1319 | goto gotten; | ||
1320 | } | ||
1321 | 1475 | ||
1322 | if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { | 1476 | if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { |
1323 | int reuse = can_share_swap_page(old_page); | 1477 | int reuse = can_share_swap_page(old_page); |
1324 | unlock_page(old_page); | 1478 | unlock_page(old_page); |
1325 | if (reuse) { | 1479 | if (reuse) { |
1326 | flush_cache_page(vma, address, pfn); | 1480 | flush_cache_page(vma, address, pte_pfn(orig_pte)); |
1327 | entry = pte_mkyoung(orig_pte); | 1481 | entry = pte_mkyoung(orig_pte); |
1328 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 1482 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
1329 | ptep_set_access_flags(vma, address, page_table, entry, 1); | 1483 | ptep_set_access_flags(vma, address, page_table, entry, 1); |
@@ -1343,7 +1497,7 @@ gotten: | |||
1343 | 1497 | ||
1344 | if (unlikely(anon_vma_prepare(vma))) | 1498 | if (unlikely(anon_vma_prepare(vma))) |
1345 | goto oom; | 1499 | goto oom; |
1346 | if (src_page == ZERO_PAGE(address)) { | 1500 | if (old_page == ZERO_PAGE(address)) { |
1347 | new_page = alloc_zeroed_user_highpage(vma, address); | 1501 | new_page = alloc_zeroed_user_highpage(vma, address); |
1348 | if (!new_page) | 1502 | if (!new_page) |
1349 | goto oom; | 1503 | goto oom; |
@@ -1351,7 +1505,7 @@ gotten: | |||
1351 | new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); | 1505 | new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); |
1352 | if (!new_page) | 1506 | if (!new_page) |
1353 | goto oom; | 1507 | goto oom; |
1354 | copy_user_highpage(new_page, src_page, address); | 1508 | cow_user_page(new_page, old_page, address); |
1355 | } | 1509 | } |
1356 | 1510 | ||
1357 | /* | 1511 | /* |
@@ -1367,7 +1521,7 @@ gotten: | |||
1367 | } | 1521 | } |
1368 | } else | 1522 | } else |
1369 | inc_mm_counter(mm, anon_rss); | 1523 | inc_mm_counter(mm, anon_rss); |
1370 | flush_cache_page(vma, address, pfn); | 1524 | flush_cache_page(vma, address, pte_pfn(orig_pte)); |
1371 | entry = mk_pte(new_page, vma->vm_page_prot); | 1525 | entry = mk_pte(new_page, vma->vm_page_prot); |
1372 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 1526 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
1373 | ptep_establish(vma, address, page_table, entry); | 1527 | ptep_establish(vma, address, page_table, entry); |
@@ -1812,16 +1966,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1812 | spinlock_t *ptl; | 1966 | spinlock_t *ptl; |
1813 | pte_t entry; | 1967 | pte_t entry; |
1814 | 1968 | ||
1815 | /* | 1969 | if (write_access) { |
1816 | * A VM_UNPAGED vma will normally be filled with present ptes | ||
1817 | * by remap_pfn_range, and never arrive here; but it might have | ||
1818 | * holes, or if !VM_DONTEXPAND, mremap might have expanded it. | ||
1819 | * It's weird enough handling anon pages in unpaged vmas, we do | ||
1820 | * not want to worry about ZERO_PAGEs too (it may or may not | ||
1821 | * matter if their counts wrap): just give them anon pages. | ||
1822 | */ | ||
1823 | |||
1824 | if (write_access || (vma->vm_flags & VM_UNPAGED)) { | ||
1825 | /* Allocate our own private page. */ | 1970 | /* Allocate our own private page. */ |
1826 | pte_unmap(page_table); | 1971 | pte_unmap(page_table); |
1827 | 1972 | ||
@@ -1896,7 +2041,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1896 | int anon = 0; | 2041 | int anon = 0; |
1897 | 2042 | ||
1898 | pte_unmap(page_table); | 2043 | pte_unmap(page_table); |
1899 | BUG_ON(vma->vm_flags & VM_UNPAGED); | 2044 | BUG_ON(vma->vm_flags & VM_PFNMAP); |
1900 | 2045 | ||
1901 | if (vma->vm_file) { | 2046 | if (vma->vm_file) { |
1902 | mapping = vma->vm_file->f_mapping; | 2047 | mapping = vma->vm_file->f_mapping; |
@@ -2149,6 +2294,12 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) | |||
2149 | spin_unlock(&mm->page_table_lock); | 2294 | spin_unlock(&mm->page_table_lock); |
2150 | return 0; | 2295 | return 0; |
2151 | } | 2296 | } |
2297 | #else | ||
2298 | /* Workaround for gcc 2.96 */ | ||
2299 | int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) | ||
2300 | { | ||
2301 | return 0; | ||
2302 | } | ||
2152 | #endif /* __PAGETABLE_PUD_FOLDED */ | 2303 | #endif /* __PAGETABLE_PUD_FOLDED */ |
2153 | 2304 | ||
2154 | #ifndef __PAGETABLE_PMD_FOLDED | 2305 | #ifndef __PAGETABLE_PMD_FOLDED |
@@ -2177,6 +2328,12 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) | |||
2177 | spin_unlock(&mm->page_table_lock); | 2328 | spin_unlock(&mm->page_table_lock); |
2178 | return 0; | 2329 | return 0; |
2179 | } | 2330 | } |
2331 | #else | ||
2332 | /* Workaround for gcc 2.96 */ | ||
2333 | int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) | ||
2334 | { | ||
2335 | return 0; | ||
2336 | } | ||
2180 | #endif /* __PAGETABLE_PMD_FOLDED */ | 2337 | #endif /* __PAGETABLE_PMD_FOLDED */ |
2181 | 2338 | ||
2182 | int make_pages_present(unsigned long addr, unsigned long end) | 2339 | int make_pages_present(unsigned long addr, unsigned long end) |