diff options
author | Hugh Dickins <hugh@veritas.com> | 2005-10-29 21:15:59 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-30 00:40:37 -0400 |
commit | 65500d234e74fc4e8f18e1a429bc24e51e75de4a (patch) | |
tree | 2bae8c3622b6537dbd142ba2744c7cc9430d3b69 /mm/memory.c | |
parent | 7c1fd6b964860cdcf44b6b98d7dcd8cc16a0a26d (diff) |
[PATCH] mm: page fault handlers tidyup
Impose a little more consistency on the page fault handlers do_wp_page,
do_swap_page, do_anonymous_page, do_no_page, do_file_page: why not pass their
arguments in the same order, called the same names?
break_cow is all very well, but what it did was inlined elsewhere: easier to
compare if it's brought back into do_wp_page.
do_file_page's fallback to do_no_page dates from a time when we were testing
pte_file by using it wherever possible: currently it's peculiar to nonlinear
vmas, so just check that. BUG_ON if not? Better not, it's probably page
table corruption, so just show the pte: hmm, there's a pte_ERROR macro, let's
use that for do_wp_page's invalid pfn too.
Hah! Someone in the ppc64 world noticed pte_ERROR was unused so removed it:
restored (and say "pud" not "pmd" in its pud_ERROR).
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 220 |
1 files changed, 97 insertions, 123 deletions
diff --git a/mm/memory.c b/mm/memory.c index 13667681cd16..eaf79031f573 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1213,28 +1213,10 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) | |||
1213 | } | 1213 | } |
1214 | 1214 | ||
1215 | /* | 1215 | /* |
1216 | * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock | ||
1217 | */ | ||
1218 | static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address, | ||
1219 | pte_t *page_table) | ||
1220 | { | ||
1221 | pte_t entry; | ||
1222 | |||
1223 | entry = maybe_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)), | ||
1224 | vma); | ||
1225 | ptep_establish(vma, address, page_table, entry); | ||
1226 | update_mmu_cache(vma, address, entry); | ||
1227 | lazy_mmu_prot_update(entry); | ||
1228 | } | ||
1229 | |||
1230 | /* | ||
1231 | * This routine handles present pages, when users try to write | 1216 | * This routine handles present pages, when users try to write |
1232 | * to a shared page. It is done by copying the page to a new address | 1217 | * to a shared page. It is done by copying the page to a new address |
1233 | * and decrementing the shared-page counter for the old page. | 1218 | * and decrementing the shared-page counter for the old page. |
1234 | * | 1219 | * |
1235 | * Goto-purists beware: the only reason for goto's here is that it results | ||
1236 | * in better assembly code.. The "default" path will see no jumps at all. | ||
1237 | * | ||
1238 | * Note that this routine assumes that the protection checks have been | 1220 | * Note that this routine assumes that the protection checks have been |
1239 | * done by the caller (the low-level page fault routine in most cases). | 1221 | * done by the caller (the low-level page fault routine in most cases). |
1240 | * Thus we can safely just mark it writable once we've done any necessary | 1222 | * Thus we can safely just mark it writable once we've done any necessary |
@@ -1247,25 +1229,22 @@ static inline void break_cow(struct vm_area_struct * vma, struct page * new_page | |||
1247 | * We hold the mm semaphore and the page_table_lock on entry and exit | 1229 | * We hold the mm semaphore and the page_table_lock on entry and exit |
1248 | * with the page_table_lock released. | 1230 | * with the page_table_lock released. |
1249 | */ | 1231 | */ |
1250 | static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | 1232 | static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, |
1251 | unsigned long address, pte_t *page_table, pmd_t *pmd, pte_t pte) | 1233 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
1234 | pte_t orig_pte) | ||
1252 | { | 1235 | { |
1253 | struct page *old_page, *new_page; | 1236 | struct page *old_page, *new_page; |
1254 | unsigned long pfn = pte_pfn(pte); | 1237 | unsigned long pfn = pte_pfn(orig_pte); |
1255 | pte_t entry; | 1238 | pte_t entry; |
1256 | int ret; | 1239 | int ret = VM_FAULT_MINOR; |
1257 | 1240 | ||
1258 | if (unlikely(!pfn_valid(pfn))) { | 1241 | if (unlikely(!pfn_valid(pfn))) { |
1259 | /* | 1242 | /* |
1260 | * This should really halt the system so it can be debugged or | 1243 | * Page table corrupted: show pte and kill process. |
1261 | * at least the kernel stops what it's doing before it corrupts | ||
1262 | * data, but for the moment just pretend this is OOM. | ||
1263 | */ | 1244 | */ |
1264 | pte_unmap(page_table); | 1245 | pte_ERROR(orig_pte); |
1265 | printk(KERN_ERR "do_wp_page: bogus page at address %08lx\n", | 1246 | ret = VM_FAULT_OOM; |
1266 | address); | 1247 | goto unlock; |
1267 | spin_unlock(&mm->page_table_lock); | ||
1268 | return VM_FAULT_OOM; | ||
1269 | } | 1248 | } |
1270 | old_page = pfn_to_page(pfn); | 1249 | old_page = pfn_to_page(pfn); |
1271 | 1250 | ||
@@ -1274,52 +1253,57 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
1274 | unlock_page(old_page); | 1253 | unlock_page(old_page); |
1275 | if (reuse) { | 1254 | if (reuse) { |
1276 | flush_cache_page(vma, address, pfn); | 1255 | flush_cache_page(vma, address, pfn); |
1277 | entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)), | 1256 | entry = pte_mkyoung(orig_pte); |
1278 | vma); | 1257 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
1279 | ptep_set_access_flags(vma, address, page_table, entry, 1); | 1258 | ptep_set_access_flags(vma, address, page_table, entry, 1); |
1280 | update_mmu_cache(vma, address, entry); | 1259 | update_mmu_cache(vma, address, entry); |
1281 | lazy_mmu_prot_update(entry); | 1260 | lazy_mmu_prot_update(entry); |
1282 | pte_unmap(page_table); | 1261 | ret |= VM_FAULT_WRITE; |
1283 | spin_unlock(&mm->page_table_lock); | 1262 | goto unlock; |
1284 | return VM_FAULT_MINOR|VM_FAULT_WRITE; | ||
1285 | } | 1263 | } |
1286 | } | 1264 | } |
1287 | pte_unmap(page_table); | ||
1288 | 1265 | ||
1289 | /* | 1266 | /* |
1290 | * Ok, we need to copy. Oh, well.. | 1267 | * Ok, we need to copy. Oh, well.. |
1291 | */ | 1268 | */ |
1292 | if (!PageReserved(old_page)) | 1269 | if (!PageReserved(old_page)) |
1293 | page_cache_get(old_page); | 1270 | page_cache_get(old_page); |
1271 | pte_unmap(page_table); | ||
1294 | spin_unlock(&mm->page_table_lock); | 1272 | spin_unlock(&mm->page_table_lock); |
1295 | 1273 | ||
1296 | if (unlikely(anon_vma_prepare(vma))) | 1274 | if (unlikely(anon_vma_prepare(vma))) |
1297 | goto no_new_page; | 1275 | goto oom; |
1298 | if (old_page == ZERO_PAGE(address)) { | 1276 | if (old_page == ZERO_PAGE(address)) { |
1299 | new_page = alloc_zeroed_user_highpage(vma, address); | 1277 | new_page = alloc_zeroed_user_highpage(vma, address); |
1300 | if (!new_page) | 1278 | if (!new_page) |
1301 | goto no_new_page; | 1279 | goto oom; |
1302 | } else { | 1280 | } else { |
1303 | new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); | 1281 | new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); |
1304 | if (!new_page) | 1282 | if (!new_page) |
1305 | goto no_new_page; | 1283 | goto oom; |
1306 | copy_user_highpage(new_page, old_page, address); | 1284 | copy_user_highpage(new_page, old_page, address); |
1307 | } | 1285 | } |
1286 | |||
1308 | /* | 1287 | /* |
1309 | * Re-check the pte - we dropped the lock | 1288 | * Re-check the pte - we dropped the lock |
1310 | */ | 1289 | */ |
1311 | ret = VM_FAULT_MINOR; | ||
1312 | spin_lock(&mm->page_table_lock); | 1290 | spin_lock(&mm->page_table_lock); |
1313 | page_table = pte_offset_map(pmd, address); | 1291 | page_table = pte_offset_map(pmd, address); |
1314 | if (likely(pte_same(*page_table, pte))) { | 1292 | if (likely(pte_same(*page_table, orig_pte))) { |
1315 | if (PageAnon(old_page)) | 1293 | if (PageAnon(old_page)) |
1316 | dec_mm_counter(mm, anon_rss); | 1294 | dec_mm_counter(mm, anon_rss); |
1317 | if (PageReserved(old_page)) | 1295 | if (PageReserved(old_page)) |
1318 | inc_mm_counter(mm, rss); | 1296 | inc_mm_counter(mm, rss); |
1319 | else | 1297 | else |
1320 | page_remove_rmap(old_page); | 1298 | page_remove_rmap(old_page); |
1299 | |||
1321 | flush_cache_page(vma, address, pfn); | 1300 | flush_cache_page(vma, address, pfn); |
1322 | break_cow(vma, new_page, address, page_table); | 1301 | entry = mk_pte(new_page, vma->vm_page_prot); |
1302 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | ||
1303 | ptep_establish(vma, address, page_table, entry); | ||
1304 | update_mmu_cache(vma, address, entry); | ||
1305 | lazy_mmu_prot_update(entry); | ||
1306 | |||
1323 | lru_cache_add_active(new_page); | 1307 | lru_cache_add_active(new_page); |
1324 | page_add_anon_rmap(new_page, vma, address); | 1308 | page_add_anon_rmap(new_page, vma, address); |
1325 | 1309 | ||
@@ -1327,13 +1311,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, | |||
1327 | new_page = old_page; | 1311 | new_page = old_page; |
1328 | ret |= VM_FAULT_WRITE; | 1312 | ret |= VM_FAULT_WRITE; |
1329 | } | 1313 | } |
1330 | pte_unmap(page_table); | ||
1331 | page_cache_release(new_page); | 1314 | page_cache_release(new_page); |
1332 | page_cache_release(old_page); | 1315 | page_cache_release(old_page); |
1316 | unlock: | ||
1317 | pte_unmap(page_table); | ||
1333 | spin_unlock(&mm->page_table_lock); | 1318 | spin_unlock(&mm->page_table_lock); |
1334 | return ret; | 1319 | return ret; |
1335 | 1320 | oom: | |
1336 | no_new_page: | ||
1337 | page_cache_release(old_page); | 1321 | page_cache_release(old_page); |
1338 | return VM_FAULT_OOM; | 1322 | return VM_FAULT_OOM; |
1339 | } | 1323 | } |
@@ -1661,17 +1645,19 @@ void swapin_readahead(swp_entry_t entry, unsigned long addr,struct vm_area_struc | |||
1661 | * We hold the mm semaphore and the page_table_lock on entry and | 1645 | * We hold the mm semaphore and the page_table_lock on entry and |
1662 | * should release the pagetable lock on exit.. | 1646 | * should release the pagetable lock on exit.. |
1663 | */ | 1647 | */ |
1664 | static int do_swap_page(struct mm_struct * mm, | 1648 | static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, |
1665 | struct vm_area_struct * vma, unsigned long address, | 1649 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
1666 | pte_t *page_table, pmd_t *pmd, pte_t orig_pte, int write_access) | 1650 | int write_access, pte_t orig_pte) |
1667 | { | 1651 | { |
1668 | struct page *page; | 1652 | struct page *page; |
1669 | swp_entry_t entry = pte_to_swp_entry(orig_pte); | 1653 | swp_entry_t entry; |
1670 | pte_t pte; | 1654 | pte_t pte; |
1671 | int ret = VM_FAULT_MINOR; | 1655 | int ret = VM_FAULT_MINOR; |
1672 | 1656 | ||
1673 | pte_unmap(page_table); | 1657 | pte_unmap(page_table); |
1674 | spin_unlock(&mm->page_table_lock); | 1658 | spin_unlock(&mm->page_table_lock); |
1659 | |||
1660 | entry = pte_to_swp_entry(orig_pte); | ||
1675 | page = lookup_swap_cache(entry); | 1661 | page = lookup_swap_cache(entry); |
1676 | if (!page) { | 1662 | if (!page) { |
1677 | swapin_readahead(entry, address, vma); | 1663 | swapin_readahead(entry, address, vma); |
@@ -1685,11 +1671,7 @@ static int do_swap_page(struct mm_struct * mm, | |||
1685 | page_table = pte_offset_map(pmd, address); | 1671 | page_table = pte_offset_map(pmd, address); |
1686 | if (likely(pte_same(*page_table, orig_pte))) | 1672 | if (likely(pte_same(*page_table, orig_pte))) |
1687 | ret = VM_FAULT_OOM; | 1673 | ret = VM_FAULT_OOM; |
1688 | else | 1674 | goto unlock; |
1689 | ret = VM_FAULT_MINOR; | ||
1690 | pte_unmap(page_table); | ||
1691 | spin_unlock(&mm->page_table_lock); | ||
1692 | goto out; | ||
1693 | } | 1675 | } |
1694 | 1676 | ||
1695 | /* Had to read the page from swap area: Major fault */ | 1677 | /* Had to read the page from swap area: Major fault */ |
@@ -1745,6 +1727,7 @@ static int do_swap_page(struct mm_struct * mm, | |||
1745 | /* No need to invalidate - it was non-present before */ | 1727 | /* No need to invalidate - it was non-present before */ |
1746 | update_mmu_cache(vma, address, pte); | 1728 | update_mmu_cache(vma, address, pte); |
1747 | lazy_mmu_prot_update(pte); | 1729 | lazy_mmu_prot_update(pte); |
1730 | unlock: | ||
1748 | pte_unmap(page_table); | 1731 | pte_unmap(page_table); |
1749 | spin_unlock(&mm->page_table_lock); | 1732 | spin_unlock(&mm->page_table_lock); |
1750 | out: | 1733 | out: |
@@ -1754,7 +1737,7 @@ out_nomap: | |||
1754 | spin_unlock(&mm->page_table_lock); | 1737 | spin_unlock(&mm->page_table_lock); |
1755 | unlock_page(page); | 1738 | unlock_page(page); |
1756 | page_cache_release(page); | 1739 | page_cache_release(page); |
1757 | goto out; | 1740 | return ret; |
1758 | } | 1741 | } |
1759 | 1742 | ||
1760 | /* | 1743 | /* |
@@ -1762,17 +1745,15 @@ out_nomap: | |||
1762 | * spinlock held to protect against concurrent faults in | 1745 | * spinlock held to protect against concurrent faults in |
1763 | * multithreaded programs. | 1746 | * multithreaded programs. |
1764 | */ | 1747 | */ |
1765 | static int | 1748 | static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, |
1766 | do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | 1749 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
1767 | pte_t *page_table, pmd_t *pmd, int write_access, | 1750 | int write_access) |
1768 | unsigned long addr) | ||
1769 | { | 1751 | { |
1770 | pte_t entry; | 1752 | pte_t entry; |
1771 | 1753 | ||
1772 | /* Mapping of ZERO_PAGE - vm_page_prot is readonly */ | 1754 | /* Mapping of ZERO_PAGE - vm_page_prot is readonly */ |
1773 | entry = mk_pte(ZERO_PAGE(addr), vma->vm_page_prot); | 1755 | entry = mk_pte(ZERO_PAGE(addr), vma->vm_page_prot); |
1774 | 1756 | ||
1775 | /* ..except if it's a write access */ | ||
1776 | if (write_access) { | 1757 | if (write_access) { |
1777 | struct page *page; | 1758 | struct page *page; |
1778 | 1759 | ||
@@ -1781,39 +1762,36 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1781 | spin_unlock(&mm->page_table_lock); | 1762 | spin_unlock(&mm->page_table_lock); |
1782 | 1763 | ||
1783 | if (unlikely(anon_vma_prepare(vma))) | 1764 | if (unlikely(anon_vma_prepare(vma))) |
1784 | goto no_mem; | 1765 | goto oom; |
1785 | page = alloc_zeroed_user_highpage(vma, addr); | 1766 | page = alloc_zeroed_user_highpage(vma, address); |
1786 | if (!page) | 1767 | if (!page) |
1787 | goto no_mem; | 1768 | goto oom; |
1788 | 1769 | ||
1789 | spin_lock(&mm->page_table_lock); | 1770 | spin_lock(&mm->page_table_lock); |
1790 | page_table = pte_offset_map(pmd, addr); | 1771 | page_table = pte_offset_map(pmd, address); |
1791 | 1772 | ||
1792 | if (!pte_none(*page_table)) { | 1773 | if (!pte_none(*page_table)) { |
1793 | pte_unmap(page_table); | ||
1794 | page_cache_release(page); | 1774 | page_cache_release(page); |
1795 | spin_unlock(&mm->page_table_lock); | 1775 | goto unlock; |
1796 | goto out; | ||
1797 | } | 1776 | } |
1798 | inc_mm_counter(mm, rss); | 1777 | inc_mm_counter(mm, rss); |
1799 | entry = maybe_mkwrite(pte_mkdirty(mk_pte(page, | 1778 | entry = mk_pte(page, vma->vm_page_prot); |
1800 | vma->vm_page_prot)), | 1779 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
1801 | vma); | ||
1802 | lru_cache_add_active(page); | 1780 | lru_cache_add_active(page); |
1803 | SetPageReferenced(page); | 1781 | SetPageReferenced(page); |
1804 | page_add_anon_rmap(page, vma, addr); | 1782 | page_add_anon_rmap(page, vma, address); |
1805 | } | 1783 | } |
1806 | 1784 | ||
1807 | set_pte_at(mm, addr, page_table, entry); | 1785 | set_pte_at(mm, address, page_table, entry); |
1808 | pte_unmap(page_table); | ||
1809 | 1786 | ||
1810 | /* No need to invalidate - it was non-present before */ | 1787 | /* No need to invalidate - it was non-present before */ |
1811 | update_mmu_cache(vma, addr, entry); | 1788 | update_mmu_cache(vma, address, entry); |
1812 | lazy_mmu_prot_update(entry); | 1789 | lazy_mmu_prot_update(entry); |
1790 | unlock: | ||
1791 | pte_unmap(page_table); | ||
1813 | spin_unlock(&mm->page_table_lock); | 1792 | spin_unlock(&mm->page_table_lock); |
1814 | out: | ||
1815 | return VM_FAULT_MINOR; | 1793 | return VM_FAULT_MINOR; |
1816 | no_mem: | 1794 | oom: |
1817 | return VM_FAULT_OOM; | 1795 | return VM_FAULT_OOM; |
1818 | } | 1796 | } |
1819 | 1797 | ||
@@ -1829,20 +1807,17 @@ no_mem: | |||
1829 | * This is called with the MM semaphore held and the page table | 1807 | * This is called with the MM semaphore held and the page table |
1830 | * spinlock held. Exit with the spinlock released. | 1808 | * spinlock held. Exit with the spinlock released. |
1831 | */ | 1809 | */ |
1832 | static int | 1810 | static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, |
1833 | do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | 1811 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
1834 | unsigned long address, int write_access, pte_t *page_table, pmd_t *pmd) | 1812 | int write_access) |
1835 | { | 1813 | { |
1836 | struct page * new_page; | 1814 | struct page *new_page; |
1837 | struct address_space *mapping = NULL; | 1815 | struct address_space *mapping = NULL; |
1838 | pte_t entry; | 1816 | pte_t entry; |
1839 | unsigned int sequence = 0; | 1817 | unsigned int sequence = 0; |
1840 | int ret = VM_FAULT_MINOR; | 1818 | int ret = VM_FAULT_MINOR; |
1841 | int anon = 0; | 1819 | int anon = 0; |
1842 | 1820 | ||
1843 | if (!vma->vm_ops || !vma->vm_ops->nopage) | ||
1844 | return do_anonymous_page(mm, vma, page_table, | ||
1845 | pmd, write_access, address); | ||
1846 | pte_unmap(page_table); | 1821 | pte_unmap(page_table); |
1847 | spin_unlock(&mm->page_table_lock); | 1822 | spin_unlock(&mm->page_table_lock); |
1848 | 1823 | ||
@@ -1852,7 +1827,6 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1852 | smp_rmb(); /* serializes i_size against truncate_count */ | 1827 | smp_rmb(); /* serializes i_size against truncate_count */ |
1853 | } | 1828 | } |
1854 | retry: | 1829 | retry: |
1855 | cond_resched(); | ||
1856 | new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); | 1830 | new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); |
1857 | /* | 1831 | /* |
1858 | * No smp_rmb is needed here as long as there's a full | 1832 | * No smp_rmb is needed here as long as there's a full |
@@ -1892,9 +1866,11 @@ retry: | |||
1892 | * retry getting the page. | 1866 | * retry getting the page. |
1893 | */ | 1867 | */ |
1894 | if (mapping && unlikely(sequence != mapping->truncate_count)) { | 1868 | if (mapping && unlikely(sequence != mapping->truncate_count)) { |
1895 | sequence = mapping->truncate_count; | ||
1896 | spin_unlock(&mm->page_table_lock); | 1869 | spin_unlock(&mm->page_table_lock); |
1897 | page_cache_release(new_page); | 1870 | page_cache_release(new_page); |
1871 | cond_resched(); | ||
1872 | sequence = mapping->truncate_count; | ||
1873 | smp_rmb(); | ||
1898 | goto retry; | 1874 | goto retry; |
1899 | } | 1875 | } |
1900 | page_table = pte_offset_map(pmd, address); | 1876 | page_table = pte_offset_map(pmd, address); |
@@ -1924,25 +1900,22 @@ retry: | |||
1924 | page_add_anon_rmap(new_page, vma, address); | 1900 | page_add_anon_rmap(new_page, vma, address); |
1925 | } else | 1901 | } else |
1926 | page_add_file_rmap(new_page); | 1902 | page_add_file_rmap(new_page); |
1927 | pte_unmap(page_table); | ||
1928 | } else { | 1903 | } else { |
1929 | /* One of our sibling threads was faster, back out. */ | 1904 | /* One of our sibling threads was faster, back out. */ |
1930 | pte_unmap(page_table); | ||
1931 | page_cache_release(new_page); | 1905 | page_cache_release(new_page); |
1932 | spin_unlock(&mm->page_table_lock); | 1906 | goto unlock; |
1933 | goto out; | ||
1934 | } | 1907 | } |
1935 | 1908 | ||
1936 | /* no need to invalidate: a not-present page shouldn't be cached */ | 1909 | /* no need to invalidate: a not-present page shouldn't be cached */ |
1937 | update_mmu_cache(vma, address, entry); | 1910 | update_mmu_cache(vma, address, entry); |
1938 | lazy_mmu_prot_update(entry); | 1911 | lazy_mmu_prot_update(entry); |
1912 | unlock: | ||
1913 | pte_unmap(page_table); | ||
1939 | spin_unlock(&mm->page_table_lock); | 1914 | spin_unlock(&mm->page_table_lock); |
1940 | out: | ||
1941 | return ret; | 1915 | return ret; |
1942 | oom: | 1916 | oom: |
1943 | page_cache_release(new_page); | 1917 | page_cache_release(new_page); |
1944 | ret = VM_FAULT_OOM; | 1918 | return VM_FAULT_OOM; |
1945 | goto out; | ||
1946 | } | 1919 | } |
1947 | 1920 | ||
1948 | /* | 1921 | /* |
@@ -1950,29 +1923,28 @@ oom: | |||
1950 | * from the encoded file_pte if possible. This enables swappable | 1923 | * from the encoded file_pte if possible. This enables swappable |
1951 | * nonlinear vmas. | 1924 | * nonlinear vmas. |
1952 | */ | 1925 | */ |
1953 | static int do_file_page(struct mm_struct * mm, struct vm_area_struct * vma, | 1926 | static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma, |
1954 | unsigned long address, int write_access, pte_t *pte, pmd_t *pmd) | 1927 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
1928 | int write_access, pte_t orig_pte) | ||
1955 | { | 1929 | { |
1956 | unsigned long pgoff; | 1930 | pgoff_t pgoff; |
1957 | int err; | 1931 | int err; |
1958 | 1932 | ||
1959 | BUG_ON(!vma->vm_ops || !vma->vm_ops->nopage); | 1933 | pte_unmap(page_table); |
1960 | /* | ||
1961 | * Fall back to the linear mapping if the fs does not support | ||
1962 | * ->populate: | ||
1963 | */ | ||
1964 | if (!vma->vm_ops->populate || | ||
1965 | (write_access && !(vma->vm_flags & VM_SHARED))) { | ||
1966 | pte_clear(mm, address, pte); | ||
1967 | return do_no_page(mm, vma, address, write_access, pte, pmd); | ||
1968 | } | ||
1969 | |||
1970 | pgoff = pte_to_pgoff(*pte); | ||
1971 | |||
1972 | pte_unmap(pte); | ||
1973 | spin_unlock(&mm->page_table_lock); | 1934 | spin_unlock(&mm->page_table_lock); |
1974 | 1935 | ||
1975 | err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, vma->vm_page_prot, pgoff, 0); | 1936 | if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) { |
1937 | /* | ||
1938 | * Page table corrupted: show pte and kill process. | ||
1939 | */ | ||
1940 | pte_ERROR(orig_pte); | ||
1941 | return VM_FAULT_OOM; | ||
1942 | } | ||
1943 | /* We can then assume vm->vm_ops && vma->vm_ops->populate */ | ||
1944 | |||
1945 | pgoff = pte_to_pgoff(orig_pte); | ||
1946 | err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, | ||
1947 | vma->vm_page_prot, pgoff, 0); | ||
1976 | if (err == -ENOMEM) | 1948 | if (err == -ENOMEM) |
1977 | return VM_FAULT_OOM; | 1949 | return VM_FAULT_OOM; |
1978 | if (err) | 1950 | if (err) |
@@ -2002,23 +1974,25 @@ static int do_file_page(struct mm_struct * mm, struct vm_area_struct * vma, | |||
2002 | * release it when done. | 1974 | * release it when done. |
2003 | */ | 1975 | */ |
2004 | static inline int handle_pte_fault(struct mm_struct *mm, | 1976 | static inline int handle_pte_fault(struct mm_struct *mm, |
2005 | struct vm_area_struct * vma, unsigned long address, | 1977 | struct vm_area_struct *vma, unsigned long address, |
2006 | int write_access, pte_t *pte, pmd_t *pmd) | 1978 | pte_t *pte, pmd_t *pmd, int write_access) |
2007 | { | 1979 | { |
2008 | pte_t entry; | 1980 | pte_t entry; |
2009 | 1981 | ||
2010 | entry = *pte; | 1982 | entry = *pte; |
2011 | if (!pte_present(entry)) { | 1983 | if (!pte_present(entry)) { |
2012 | /* | 1984 | if (pte_none(entry)) { |
2013 | * If it truly wasn't present, we know that kswapd | 1985 | if (!vma->vm_ops || !vma->vm_ops->nopage) |
2014 | * and the PTE updates will not touch it later. So | 1986 | return do_anonymous_page(mm, vma, address, |
2015 | * drop the lock. | 1987 | pte, pmd, write_access); |
2016 | */ | 1988 | return do_no_page(mm, vma, address, |
2017 | if (pte_none(entry)) | 1989 | pte, pmd, write_access); |
2018 | return do_no_page(mm, vma, address, write_access, pte, pmd); | 1990 | } |
2019 | if (pte_file(entry)) | 1991 | if (pte_file(entry)) |
2020 | return do_file_page(mm, vma, address, write_access, pte, pmd); | 1992 | return do_file_page(mm, vma, address, |
2021 | return do_swap_page(mm, vma, address, pte, pmd, entry, write_access); | 1993 | pte, pmd, write_access, entry); |
1994 | return do_swap_page(mm, vma, address, | ||
1995 | pte, pmd, write_access, entry); | ||
2022 | } | 1996 | } |
2023 | 1997 | ||
2024 | if (write_access) { | 1998 | if (write_access) { |
@@ -2038,7 +2012,7 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
2038 | /* | 2012 | /* |
2039 | * By the time we get here, we already hold the mm semaphore | 2013 | * By the time we get here, we already hold the mm semaphore |
2040 | */ | 2014 | */ |
2041 | int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, | 2015 | int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
2042 | unsigned long address, int write_access) | 2016 | unsigned long address, int write_access) |
2043 | { | 2017 | { |
2044 | pgd_t *pgd; | 2018 | pgd_t *pgd; |
@@ -2072,7 +2046,7 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, | |||
2072 | if (!pte) | 2046 | if (!pte) |
2073 | goto oom; | 2047 | goto oom; |
2074 | 2048 | ||
2075 | return handle_pte_fault(mm, vma, address, write_access, pte, pmd); | 2049 | return handle_pte_fault(mm, vma, address, pte, pmd, write_access); |
2076 | 2050 | ||
2077 | oom: | 2051 | oom: |
2078 | spin_unlock(&mm->page_table_lock); | 2052 | spin_unlock(&mm->page_table_lock); |