diff options
Diffstat (limited to 'mm/shmem.c')
-rw-r--r-- | mm/shmem.c | 110 |
1 files changed, 109 insertions, 1 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index 4a5498795a2b..a42add14331c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -1828,9 +1828,117 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence) | |||
1828 | return offset; | 1828 | return offset; |
1829 | } | 1829 | } |
1830 | 1830 | ||
1831 | /* | ||
1832 | * We need a tag: a new tag would expand every radix_tree_node by 8 bytes, | ||
1833 | * so reuse a tag which we firmly believe is never set or cleared on shmem. | ||
1834 | */ | ||
1835 | #define SHMEM_TAG_PINNED PAGECACHE_TAG_TOWRITE | ||
1836 | #define LAST_SCAN 4 /* about 150ms max */ | ||
1837 | |||
1838 | static void shmem_tag_pins(struct address_space *mapping) | ||
1839 | { | ||
1840 | struct radix_tree_iter iter; | ||
1841 | void **slot; | ||
1842 | pgoff_t start; | ||
1843 | struct page *page; | ||
1844 | |||
1845 | lru_add_drain(); | ||
1846 | start = 0; | ||
1847 | rcu_read_lock(); | ||
1848 | |||
1849 | restart: | ||
1850 | radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { | ||
1851 | page = radix_tree_deref_slot(slot); | ||
1852 | if (!page || radix_tree_exception(page)) { | ||
1853 | if (radix_tree_deref_retry(page)) | ||
1854 | goto restart; | ||
1855 | } else if (page_count(page) - page_mapcount(page) > 1) { | ||
1856 | spin_lock_irq(&mapping->tree_lock); | ||
1857 | radix_tree_tag_set(&mapping->page_tree, iter.index, | ||
1858 | SHMEM_TAG_PINNED); | ||
1859 | spin_unlock_irq(&mapping->tree_lock); | ||
1860 | } | ||
1861 | |||
1862 | if (need_resched()) { | ||
1863 | cond_resched_rcu(); | ||
1864 | start = iter.index + 1; | ||
1865 | goto restart; | ||
1866 | } | ||
1867 | } | ||
1868 | rcu_read_unlock(); | ||
1869 | } | ||
1870 | |||
1871 | /* | ||
1872 | * Setting SEAL_WRITE requires us to verify there's no pending writer. However, | ||
1873 | * via get_user_pages(), drivers might have some pending I/O without any active | ||
1874 | * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all pages | ||
1875 | * and see whether it has an elevated ref-count. If so, we tag them and wait for | ||
1876 | * them to be dropped. | ||
1877 | * The caller must guarantee that no new user will acquire writable references | ||
1878 | * to those pages to avoid races. | ||
1879 | */ | ||
1831 | static int shmem_wait_for_pins(struct address_space *mapping) | 1880 | static int shmem_wait_for_pins(struct address_space *mapping) |
1832 | { | 1881 | { |
1833 | return 0; | 1882 | struct radix_tree_iter iter; |
1883 | void **slot; | ||
1884 | pgoff_t start; | ||
1885 | struct page *page; | ||
1886 | int error, scan; | ||
1887 | |||
1888 | shmem_tag_pins(mapping); | ||
1889 | |||
1890 | error = 0; | ||
1891 | for (scan = 0; scan <= LAST_SCAN; scan++) { | ||
1892 | if (!radix_tree_tagged(&mapping->page_tree, SHMEM_TAG_PINNED)) | ||
1893 | break; | ||
1894 | |||
1895 | if (!scan) | ||
1896 | lru_add_drain_all(); | ||
1897 | else if (schedule_timeout_killable((HZ << scan) / 200)) | ||
1898 | scan = LAST_SCAN; | ||
1899 | |||
1900 | start = 0; | ||
1901 | rcu_read_lock(); | ||
1902 | restart: | ||
1903 | radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, | ||
1904 | start, SHMEM_TAG_PINNED) { | ||
1905 | |||
1906 | page = radix_tree_deref_slot(slot); | ||
1907 | if (radix_tree_exception(page)) { | ||
1908 | if (radix_tree_deref_retry(page)) | ||
1909 | goto restart; | ||
1910 | |||
1911 | page = NULL; | ||
1912 | } | ||
1913 | |||
1914 | if (page && | ||
1915 | page_count(page) - page_mapcount(page) != 1) { | ||
1916 | if (scan < LAST_SCAN) | ||
1917 | goto continue_resched; | ||
1918 | |||
1919 | /* | ||
1920 | * On the last scan, we clean up all those tags | ||
1921 | * we inserted; but make a note that we still | ||
1922 | * found pages pinned. | ||
1923 | */ | ||
1924 | error = -EBUSY; | ||
1925 | } | ||
1926 | |||
1927 | spin_lock_irq(&mapping->tree_lock); | ||
1928 | radix_tree_tag_clear(&mapping->page_tree, | ||
1929 | iter.index, SHMEM_TAG_PINNED); | ||
1930 | spin_unlock_irq(&mapping->tree_lock); | ||
1931 | continue_resched: | ||
1932 | if (need_resched()) { | ||
1933 | cond_resched_rcu(); | ||
1934 | start = iter.index + 1; | ||
1935 | goto restart; | ||
1936 | } | ||
1937 | } | ||
1938 | rcu_read_unlock(); | ||
1939 | } | ||
1940 | |||
1941 | return error; | ||
1834 | } | 1942 | } |
1835 | 1943 | ||
1836 | #define F_ALL_SEALS (F_SEAL_SEAL | \ | 1944 | #define F_ALL_SEALS (F_SEAL_SEAL | \ |