aboutsummaryrefslogtreecommitdiffstats
path: root/mm/shmem.c
diff options
context:
space:
mode:
authorVineeth Remanan Pillai <vpillai@digitalocean.com>2019-03-05 18:46:58 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-06 00:07:18 -0500
commitc5bf121e4350a933bd431385e6fcb72a898ecc68 (patch)
tree83cc8c0050d70e9ae073030674e72d2af7f5d5e8 /mm/shmem.c
parenta9e7c39fa9fd908bc914d691045c96fdc97da7cd (diff)
mm: refactor swap-in logic out of shmem_getpage_gfp
swapin logic can be reused independently without rest of the logic in shmem_getpage_gfp. So lets refactor it out as an independent function. Link: http://lkml.kernel.org/r/20190114153129.4852-1-vpillai@digitalocean.com Signed-off-by: Vineeth Remanan Pillai <vpillai@digitalocean.com> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Cc: Huang Ying <ying.huang@intel.com> Cc: Hugh Dickins <hughd@google.com> Cc: Kelley Nielsen <kelleynnn@gmail.com> Cc: Rik van Riel <riel@surriel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/shmem.c')
-rw-r--r--mm/shmem.c449
1 files changed, 244 insertions, 205 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index 2c012eee133d..b4d27ef87496 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -123,6 +123,10 @@ static unsigned long shmem_default_max_inodes(void)
123static bool shmem_should_replace_page(struct page *page, gfp_t gfp); 123static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
124static int shmem_replace_page(struct page **pagep, gfp_t gfp, 124static int shmem_replace_page(struct page **pagep, gfp_t gfp,
125 struct shmem_inode_info *info, pgoff_t index); 125 struct shmem_inode_info *info, pgoff_t index);
126static int shmem_swapin_page(struct inode *inode, pgoff_t index,
127 struct page **pagep, enum sgp_type sgp,
128 gfp_t gfp, struct vm_area_struct *vma,
129 vm_fault_t *fault_type);
126static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, 130static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
127 struct page **pagep, enum sgp_type sgp, 131 struct page **pagep, enum sgp_type sgp,
128 gfp_t gfp, struct vm_area_struct *vma, 132 gfp_t gfp, struct vm_area_struct *vma,
@@ -1576,6 +1580,116 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
1576} 1580}
1577 1581
1578/* 1582/*
1583 * Swap in the page pointed to by *pagep.
1584 * Caller has to make sure that *pagep contains a valid swapped page.
1585 * Returns 0 and the page in pagep if success. On failure, returns the
1586 * the error code and NULL in *pagep.
1587 */
1588static int shmem_swapin_page(struct inode *inode, pgoff_t index,
1589 struct page **pagep, enum sgp_type sgp,
1590 gfp_t gfp, struct vm_area_struct *vma,
1591 vm_fault_t *fault_type)
1592{
1593 struct address_space *mapping = inode->i_mapping;
1594 struct shmem_inode_info *info = SHMEM_I(inode);
1595 struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm;
1596 struct mem_cgroup *memcg;
1597 struct page *page;
1598 swp_entry_t swap;
1599 int error;
1600
1601 VM_BUG_ON(!*pagep || !xa_is_value(*pagep));
1602 swap = radix_to_swp_entry(*pagep);
1603 *pagep = NULL;
1604
1605 /* Look it up and read it in.. */
1606 page = lookup_swap_cache(swap, NULL, 0);
1607 if (!page) {
1608 /* Or update major stats only when swapin succeeds?? */
1609 if (fault_type) {
1610 *fault_type |= VM_FAULT_MAJOR;
1611 count_vm_event(PGMAJFAULT);
1612 count_memcg_event_mm(charge_mm, PGMAJFAULT);
1613 }
1614 /* Here we actually start the io */
1615 page = shmem_swapin(swap, gfp, info, index);
1616 if (!page) {
1617 error = -ENOMEM;
1618 goto failed;
1619 }
1620 }
1621
1622 /* We have to do this with page locked to prevent races */
1623 lock_page(page);
1624 if (!PageSwapCache(page) || page_private(page) != swap.val ||
1625 !shmem_confirm_swap(mapping, index, swap)) {
1626 error = -EEXIST;
1627 goto unlock;
1628 }
1629 if (!PageUptodate(page)) {
1630 error = -EIO;
1631 goto failed;
1632 }
1633 wait_on_page_writeback(page);
1634
1635 if (shmem_should_replace_page(page, gfp)) {
1636 error = shmem_replace_page(&page, gfp, info, index);
1637 if (error)
1638 goto failed;
1639 }
1640
1641 error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
1642 false);
1643 if (!error) {
1644 error = shmem_add_to_page_cache(page, mapping, index,
1645 swp_to_radix_entry(swap), gfp);
1646 /*
1647 * We already confirmed swap under page lock, and make
1648 * no memory allocation here, so usually no possibility
1649 * of error; but free_swap_and_cache() only trylocks a
1650 * page, so it is just possible that the entry has been
1651 * truncated or holepunched since swap was confirmed.
1652 * shmem_undo_range() will have done some of the
1653 * unaccounting, now delete_from_swap_cache() will do
1654 * the rest.
1655 */
1656 if (error) {
1657 mem_cgroup_cancel_charge(page, memcg, false);
1658 delete_from_swap_cache(page);
1659 }
1660 }
1661 if (error)
1662 goto failed;
1663
1664 mem_cgroup_commit_charge(page, memcg, true, false);
1665
1666 spin_lock_irq(&info->lock);
1667 info->swapped--;
1668 shmem_recalc_inode(inode);
1669 spin_unlock_irq(&info->lock);
1670
1671 if (sgp == SGP_WRITE)
1672 mark_page_accessed(page);
1673
1674 delete_from_swap_cache(page);
1675 set_page_dirty(page);
1676 swap_free(swap);
1677
1678 *pagep = page;
1679 return 0;
1680failed:
1681 if (!shmem_confirm_swap(mapping, index, swap))
1682 error = -EEXIST;
1683unlock:
1684 if (page) {
1685 unlock_page(page);
1686 put_page(page);
1687 }
1688
1689 return error;
1690}
1691
1692/*
1579 * shmem_getpage_gfp - find page in cache, or get from swap, or allocate 1693 * shmem_getpage_gfp - find page in cache, or get from swap, or allocate
1580 * 1694 *
1581 * If we allocate a new one we do not mark it dirty. That's up to the 1695 * If we allocate a new one we do not mark it dirty. That's up to the
@@ -1596,7 +1710,6 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
1596 struct mm_struct *charge_mm; 1710 struct mm_struct *charge_mm;
1597 struct mem_cgroup *memcg; 1711 struct mem_cgroup *memcg;
1598 struct page *page; 1712 struct page *page;
1599 swp_entry_t swap;
1600 enum sgp_type sgp_huge = sgp; 1713 enum sgp_type sgp_huge = sgp;
1601 pgoff_t hindex = index; 1714 pgoff_t hindex = index;
1602 int error; 1715 int error;
@@ -1608,17 +1721,23 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
1608 if (sgp == SGP_NOHUGE || sgp == SGP_HUGE) 1721 if (sgp == SGP_NOHUGE || sgp == SGP_HUGE)
1609 sgp = SGP_CACHE; 1722 sgp = SGP_CACHE;
1610repeat: 1723repeat:
1611 swap.val = 0; 1724 if (sgp <= SGP_CACHE &&
1725 ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
1726 return -EINVAL;
1727 }
1728
1729 sbinfo = SHMEM_SB(inode->i_sb);
1730 charge_mm = vma ? vma->vm_mm : current->mm;
1731
1612 page = find_lock_entry(mapping, index); 1732 page = find_lock_entry(mapping, index);
1613 if (xa_is_value(page)) { 1733 if (xa_is_value(page)) {
1614 swap = radix_to_swp_entry(page); 1734 error = shmem_swapin_page(inode, index, &page,
1615 page = NULL; 1735 sgp, gfp, vma, fault_type);
1616 } 1736 if (error == -EEXIST)
1737 goto repeat;
1617 1738
1618 if (sgp <= SGP_CACHE && 1739 *pagep = page;
1619 ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) { 1740 return error;
1620 error = -EINVAL;
1621 goto unlock;
1622 } 1741 }
1623 1742
1624 if (page && sgp == SGP_WRITE) 1743 if (page && sgp == SGP_WRITE)
@@ -1632,7 +1751,7 @@ repeat:
1632 put_page(page); 1751 put_page(page);
1633 page = NULL; 1752 page = NULL;
1634 } 1753 }
1635 if (page || (sgp == SGP_READ && !swap.val)) { 1754 if (page || sgp == SGP_READ) {
1636 *pagep = page; 1755 *pagep = page;
1637 return 0; 1756 return 0;
1638 } 1757 }
@@ -1641,215 +1760,138 @@ repeat:
1641 * Fast cache lookup did not find it: 1760 * Fast cache lookup did not find it:
1642 * bring it back from swap or allocate. 1761 * bring it back from swap or allocate.
1643 */ 1762 */
1644 sbinfo = SHMEM_SB(inode->i_sb);
1645 charge_mm = vma ? vma->vm_mm : current->mm;
1646
1647 if (swap.val) {
1648 /* Look it up and read it in.. */
1649 page = lookup_swap_cache(swap, NULL, 0);
1650 if (!page) {
1651 /* Or update major stats only when swapin succeeds?? */
1652 if (fault_type) {
1653 *fault_type |= VM_FAULT_MAJOR;
1654 count_vm_event(PGMAJFAULT);
1655 count_memcg_event_mm(charge_mm, PGMAJFAULT);
1656 }
1657 /* Here we actually start the io */
1658 page = shmem_swapin(swap, gfp, info, index);
1659 if (!page) {
1660 error = -ENOMEM;
1661 goto failed;
1662 }
1663 }
1664
1665 /* We have to do this with page locked to prevent races */
1666 lock_page(page);
1667 if (!PageSwapCache(page) || page_private(page) != swap.val ||
1668 !shmem_confirm_swap(mapping, index, swap)) {
1669 error = -EEXIST; /* try again */
1670 goto unlock;
1671 }
1672 if (!PageUptodate(page)) {
1673 error = -EIO;
1674 goto failed;
1675 }
1676 wait_on_page_writeback(page);
1677
1678 if (shmem_should_replace_page(page, gfp)) {
1679 error = shmem_replace_page(&page, gfp, info, index);
1680 if (error)
1681 goto failed;
1682 }
1683 1763
1684 error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg, 1764 if (vma && userfaultfd_missing(vma)) {
1685 false); 1765 *fault_type = handle_userfault(vmf, VM_UFFD_MISSING);
1686 if (!error) { 1766 return 0;
1687 error = shmem_add_to_page_cache(page, mapping, index, 1767 }
1688 swp_to_radix_entry(swap), gfp);
1689 /*
1690 * We already confirmed swap under page lock, and make
1691 * no memory allocation here, so usually no possibility
1692 * of error; but free_swap_and_cache() only trylocks a
1693 * page, so it is just possible that the entry has been
1694 * truncated or holepunched since swap was confirmed.
1695 * shmem_undo_range() will have done some of the
1696 * unaccounting, now delete_from_swap_cache() will do
1697 * the rest.
1698 * Reset swap.val? No, leave it so "failed" goes back to
1699 * "repeat": reading a hole and writing should succeed.
1700 */
1701 if (error) {
1702 mem_cgroup_cancel_charge(page, memcg, false);
1703 delete_from_swap_cache(page);
1704 }
1705 }
1706 if (error)
1707 goto failed;
1708
1709 mem_cgroup_commit_charge(page, memcg, true, false);
1710
1711 spin_lock_irq(&info->lock);
1712 info->swapped--;
1713 shmem_recalc_inode(inode);
1714 spin_unlock_irq(&info->lock);
1715
1716 if (sgp == SGP_WRITE)
1717 mark_page_accessed(page);
1718
1719 delete_from_swap_cache(page);
1720 set_page_dirty(page);
1721 swap_free(swap);
1722
1723 } else {
1724 if (vma && userfaultfd_missing(vma)) {
1725 *fault_type = handle_userfault(vmf, VM_UFFD_MISSING);
1726 return 0;
1727 }
1728 1768
1729 /* shmem_symlink() */ 1769 /* shmem_symlink() */
1730 if (mapping->a_ops != &shmem_aops) 1770 if (mapping->a_ops != &shmem_aops)
1731 goto alloc_nohuge; 1771 goto alloc_nohuge;
1732 if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE) 1772 if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE)
1733 goto alloc_nohuge; 1773 goto alloc_nohuge;
1734 if (shmem_huge == SHMEM_HUGE_FORCE) 1774 if (shmem_huge == SHMEM_HUGE_FORCE)
1775 goto alloc_huge;
1776 switch (sbinfo->huge) {
1777 loff_t i_size;
1778 pgoff_t off;
1779 case SHMEM_HUGE_NEVER:
1780 goto alloc_nohuge;
1781 case SHMEM_HUGE_WITHIN_SIZE:
1782 off = round_up(index, HPAGE_PMD_NR);
1783 i_size = round_up(i_size_read(inode), PAGE_SIZE);
1784 if (i_size >= HPAGE_PMD_SIZE &&
1785 i_size >> PAGE_SHIFT >= off)
1735 goto alloc_huge; 1786 goto alloc_huge;
1736 switch (sbinfo->huge) { 1787 /* fallthrough */
1737 loff_t i_size; 1788 case SHMEM_HUGE_ADVISE:
1738 pgoff_t off; 1789 if (sgp_huge == SGP_HUGE)
1739 case SHMEM_HUGE_NEVER: 1790 goto alloc_huge;
1740 goto alloc_nohuge; 1791 /* TODO: implement fadvise() hints */
1741 case SHMEM_HUGE_WITHIN_SIZE: 1792 goto alloc_nohuge;
1742 off = round_up(index, HPAGE_PMD_NR); 1793 }
1743 i_size = round_up(i_size_read(inode), PAGE_SIZE);
1744 if (i_size >= HPAGE_PMD_SIZE &&
1745 i_size >> PAGE_SHIFT >= off)
1746 goto alloc_huge;
1747 /* fallthrough */
1748 case SHMEM_HUGE_ADVISE:
1749 if (sgp_huge == SGP_HUGE)
1750 goto alloc_huge;
1751 /* TODO: implement fadvise() hints */
1752 goto alloc_nohuge;
1753 }
1754 1794
1755alloc_huge: 1795alloc_huge:
1756 page = shmem_alloc_and_acct_page(gfp, inode, index, true); 1796 page = shmem_alloc_and_acct_page(gfp, inode, index, true);
1757 if (IS_ERR(page)) { 1797 if (IS_ERR(page)) {
1758alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, inode, 1798alloc_nohuge:
1759 index, false); 1799 page = shmem_alloc_and_acct_page(gfp, inode,
1760 } 1800 index, false);
1761 if (IS_ERR(page)) { 1801 }
1762 int retry = 5; 1802 if (IS_ERR(page)) {
1763 error = PTR_ERR(page); 1803 int retry = 5;
1764 page = NULL;
1765 if (error != -ENOSPC)
1766 goto failed;
1767 /*
1768 * Try to reclaim some spece by splitting a huge page
1769 * beyond i_size on the filesystem.
1770 */
1771 while (retry--) {
1772 int ret;
1773 ret = shmem_unused_huge_shrink(sbinfo, NULL, 1);
1774 if (ret == SHRINK_STOP)
1775 break;
1776 if (ret)
1777 goto alloc_nohuge;
1778 }
1779 goto failed;
1780 }
1781
1782 if (PageTransHuge(page))
1783 hindex = round_down(index, HPAGE_PMD_NR);
1784 else
1785 hindex = index;
1786 1804
1787 if (sgp == SGP_WRITE) 1805 error = PTR_ERR(page);
1788 __SetPageReferenced(page); 1806 page = NULL;
1807 if (error != -ENOSPC)
1808 goto unlock;
1809 /*
1810 * Try to reclaim some space by splitting a huge page
1811 * beyond i_size on the filesystem.
1812 */
1813 while (retry--) {
1814 int ret;
1789 1815
1790 error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg, 1816 ret = shmem_unused_huge_shrink(sbinfo, NULL, 1);
1791 PageTransHuge(page)); 1817 if (ret == SHRINK_STOP)
1792 if (error) 1818 break;
1793 goto unacct; 1819 if (ret)
1794 error = shmem_add_to_page_cache(page, mapping, hindex, 1820 goto alloc_nohuge;
1795 NULL, gfp & GFP_RECLAIM_MASK);
1796 if (error) {
1797 mem_cgroup_cancel_charge(page, memcg,
1798 PageTransHuge(page));
1799 goto unacct;
1800 } 1821 }
1801 mem_cgroup_commit_charge(page, memcg, false, 1822 goto unlock;
1802 PageTransHuge(page)); 1823 }
1803 lru_cache_add_anon(page);
1804 1824
1805 spin_lock_irq(&info->lock); 1825 if (PageTransHuge(page))
1806 info->alloced += 1 << compound_order(page); 1826 hindex = round_down(index, HPAGE_PMD_NR);
1807 inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page); 1827 else
1808 shmem_recalc_inode(inode); 1828 hindex = index;
1809 spin_unlock_irq(&info->lock);
1810 alloced = true;
1811 1829
1812 if (PageTransHuge(page) && 1830 if (sgp == SGP_WRITE)
1813 DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) < 1831 __SetPageReferenced(page);
1814 hindex + HPAGE_PMD_NR - 1) { 1832
1815 /* 1833 error = mem_cgroup_try_charge_delay(page, charge_mm, gfp, &memcg,
1816 * Part of the huge page is beyond i_size: subject 1834 PageTransHuge(page));
1817 * to shrink under memory pressure. 1835 if (error)
1818 */ 1836 goto unacct;
1819 spin_lock(&sbinfo->shrinklist_lock); 1837 error = shmem_add_to_page_cache(page, mapping, hindex,
1820 /* 1838 NULL, gfp & GFP_RECLAIM_MASK);
1821 * _careful to defend against unlocked access to 1839 if (error) {
1822 * ->shrink_list in shmem_unused_huge_shrink() 1840 mem_cgroup_cancel_charge(page, memcg,
1823 */ 1841 PageTransHuge(page));
1824 if (list_empty_careful(&info->shrinklist)) { 1842 goto unacct;
1825 list_add_tail(&info->shrinklist, 1843 }
1826 &sbinfo->shrinklist); 1844 mem_cgroup_commit_charge(page, memcg, false,
1827 sbinfo->shrinklist_len++; 1845 PageTransHuge(page));
1828 } 1846 lru_cache_add_anon(page);
1829 spin_unlock(&sbinfo->shrinklist_lock); 1847
1830 } 1848 spin_lock_irq(&info->lock);
1849 info->alloced += 1 << compound_order(page);
1850 inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page);
1851 shmem_recalc_inode(inode);
1852 spin_unlock_irq(&info->lock);
1853 alloced = true;
1831 1854
1855 if (PageTransHuge(page) &&
1856 DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
1857 hindex + HPAGE_PMD_NR - 1) {
1832 /* 1858 /*
1833 * Let SGP_FALLOC use the SGP_WRITE optimization on a new page. 1859 * Part of the huge page is beyond i_size: subject
1860 * to shrink under memory pressure.
1834 */ 1861 */
1835 if (sgp == SGP_FALLOC) 1862 spin_lock(&sbinfo->shrinklist_lock);
1836 sgp = SGP_WRITE;
1837clear:
1838 /* 1863 /*
1839 * Let SGP_WRITE caller clear ends if write does not fill page; 1864 * _careful to defend against unlocked access to
1840 * but SGP_FALLOC on a page fallocated earlier must initialize 1865 * ->shrink_list in shmem_unused_huge_shrink()
1841 * it now, lest undo on failure cancel our earlier guarantee.
1842 */ 1866 */
1843 if (sgp != SGP_WRITE && !PageUptodate(page)) { 1867 if (list_empty_careful(&info->shrinklist)) {
1844 struct page *head = compound_head(page); 1868 list_add_tail(&info->shrinklist,
1845 int i; 1869 &sbinfo->shrinklist);
1870 sbinfo->shrinklist_len++;
1871 }
1872 spin_unlock(&sbinfo->shrinklist_lock);
1873 }
1846 1874
1847 for (i = 0; i < (1 << compound_order(head)); i++) { 1875 /*
1848 clear_highpage(head + i); 1876 * Let SGP_FALLOC use the SGP_WRITE optimization on a new page.
1849 flush_dcache_page(head + i); 1877 */
1850 } 1878 if (sgp == SGP_FALLOC)
1851 SetPageUptodate(head); 1879 sgp = SGP_WRITE;
1880clear:
1881 /*
1882 * Let SGP_WRITE caller clear ends if write does not fill page;
1883 * but SGP_FALLOC on a page fallocated earlier must initialize
1884 * it now, lest undo on failure cancel our earlier guarantee.
1885 */
1886 if (sgp != SGP_WRITE && !PageUptodate(page)) {
1887 struct page *head = compound_head(page);
1888 int i;
1889
1890 for (i = 0; i < (1 << compound_order(head)); i++) {
1891 clear_highpage(head + i);
1892 flush_dcache_page(head + i);
1852 } 1893 }
1894 SetPageUptodate(head);
1853 } 1895 }
1854 1896
1855 /* Perhaps the file has been truncated since we checked */ 1897 /* Perhaps the file has been truncated since we checked */
@@ -1879,9 +1921,6 @@ unacct:
1879 put_page(page); 1921 put_page(page);
1880 goto alloc_nohuge; 1922 goto alloc_nohuge;
1881 } 1923 }
1882failed:
1883 if (swap.val && !shmem_confirm_swap(mapping, index, swap))
1884 error = -EEXIST;
1885unlock: 1924unlock:
1886 if (page) { 1925 if (page) {
1887 unlock_page(page); 1926 unlock_page(page);