diff options
author | Nick Piggin <npiggin@suse.de> | 2007-10-16 04:24:57 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-16 12:42:54 -0400 |
commit | eb2be189317d031895b5ca534fbf735eb546158b (patch) | |
tree | 8f1eda7af3be7285244a6f1ad77682e90d403c7b | |
parent | 64649a58919e66ec21792dbb6c48cb3da22cbd7f (diff) |
mm: buffered write cleanup
Quite a bit of code is used in maintaining these "cached pages" that are
probably pretty unlikely to get used. It would require a narrow race where
the page is inserted concurrently while this process is allocating a page
in order to create the spare page. Then a multi-page write into an uncached
part of the file, to make use of it.
Next, the buffered write path (and others) uses its own LRU pagevec when it
should be just using the per-CPU LRU pagevec (which will cut down on both data
and code size cacheline footprint). Also, these private LRU pagevecs are
emptied after just a very short time, in contrast with the per-CPU pagevecs
that are persistent. Net result: 7.3 times fewer lru_lock acquisitions required
to add the pages to pagecache for a bulk write (in 4K chunks).
[this gets rid of some cond_resched() calls in readahead.c and mpage.c due
to clashes in -mm. What put them there, and why? ]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/mpage.c | 10 | ||||
-rw-r--r-- | mm/filemap.c | 143 | ||||
-rw-r--r-- | mm/readahead.c | 24 |
3 files changed, 65 insertions, 112 deletions
diff --git a/fs/mpage.c b/fs/mpage.c index b1c3e5890508..d54f8f897224 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
@@ -379,31 +379,25 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, | |||
379 | struct bio *bio = NULL; | 379 | struct bio *bio = NULL; |
380 | unsigned page_idx; | 380 | unsigned page_idx; |
381 | sector_t last_block_in_bio = 0; | 381 | sector_t last_block_in_bio = 0; |
382 | struct pagevec lru_pvec; | ||
383 | struct buffer_head map_bh; | 382 | struct buffer_head map_bh; |
384 | unsigned long first_logical_block = 0; | 383 | unsigned long first_logical_block = 0; |
385 | 384 | ||
386 | clear_buffer_mapped(&map_bh); | 385 | clear_buffer_mapped(&map_bh); |
387 | pagevec_init(&lru_pvec, 0); | ||
388 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | 386 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { |
389 | struct page *page = list_entry(pages->prev, struct page, lru); | 387 | struct page *page = list_entry(pages->prev, struct page, lru); |
390 | 388 | ||
391 | prefetchw(&page->flags); | 389 | prefetchw(&page->flags); |
392 | list_del(&page->lru); | 390 | list_del(&page->lru); |
393 | if (!add_to_page_cache(page, mapping, | 391 | if (!add_to_page_cache_lru(page, mapping, |
394 | page->index, GFP_KERNEL)) { | 392 | page->index, GFP_KERNEL)) { |
395 | bio = do_mpage_readpage(bio, page, | 393 | bio = do_mpage_readpage(bio, page, |
396 | nr_pages - page_idx, | 394 | nr_pages - page_idx, |
397 | &last_block_in_bio, &map_bh, | 395 | &last_block_in_bio, &map_bh, |
398 | &first_logical_block, | 396 | &first_logical_block, |
399 | get_block); | 397 | get_block); |
400 | if (!pagevec_add(&lru_pvec, page)) | ||
401 | __pagevec_lru_add(&lru_pvec); | ||
402 | } else { | ||
403 | page_cache_release(page); | ||
404 | } | 398 | } |
399 | page_cache_release(page); | ||
405 | } | 400 | } |
406 | pagevec_lru_add(&lru_pvec); | ||
407 | BUG_ON(!list_empty(pages)); | 401 | BUG_ON(!list_empty(pages)); |
408 | if (bio) | 402 | if (bio) |
409 | mpage_bio_submit(READ, bio); | 403 | mpage_bio_submit(READ, bio); |
diff --git a/mm/filemap.c b/mm/filemap.c index 73b98c6a3389..fb4c1c0792e7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -666,27 +666,22 @@ EXPORT_SYMBOL(find_lock_page); | |||
666 | struct page *find_or_create_page(struct address_space *mapping, | 666 | struct page *find_or_create_page(struct address_space *mapping, |
667 | pgoff_t index, gfp_t gfp_mask) | 667 | pgoff_t index, gfp_t gfp_mask) |
668 | { | 668 | { |
669 | struct page *page, *cached_page = NULL; | 669 | struct page *page; |
670 | int err; | 670 | int err; |
671 | repeat: | 671 | repeat: |
672 | page = find_lock_page(mapping, index); | 672 | page = find_lock_page(mapping, index); |
673 | if (!page) { | 673 | if (!page) { |
674 | if (!cached_page) { | 674 | page = __page_cache_alloc(gfp_mask); |
675 | cached_page = | 675 | if (!page) |
676 | __page_cache_alloc(gfp_mask); | 676 | return NULL; |
677 | if (!cached_page) | 677 | err = add_to_page_cache_lru(page, mapping, index, gfp_mask); |
678 | return NULL; | 678 | if (unlikely(err)) { |
679 | page_cache_release(page); | ||
680 | page = NULL; | ||
681 | if (err == -EEXIST) | ||
682 | goto repeat; | ||
679 | } | 683 | } |
680 | err = add_to_page_cache_lru(cached_page, mapping, | ||
681 | index, gfp_mask); | ||
682 | if (!err) { | ||
683 | page = cached_page; | ||
684 | cached_page = NULL; | ||
685 | } else if (err == -EEXIST) | ||
686 | goto repeat; | ||
687 | } | 684 | } |
688 | if (cached_page) | ||
689 | page_cache_release(cached_page); | ||
690 | return page; | 685 | return page; |
691 | } | 686 | } |
692 | EXPORT_SYMBOL(find_or_create_page); | 687 | EXPORT_SYMBOL(find_or_create_page); |
@@ -872,10 +867,8 @@ void do_generic_mapping_read(struct address_space *mapping, | |||
872 | pgoff_t prev_index; | 867 | pgoff_t prev_index; |
873 | unsigned long offset; /* offset into pagecache page */ | 868 | unsigned long offset; /* offset into pagecache page */ |
874 | unsigned int prev_offset; | 869 | unsigned int prev_offset; |
875 | struct page *cached_page; | ||
876 | int error; | 870 | int error; |
877 | 871 | ||
878 | cached_page = NULL; | ||
879 | index = *ppos >> PAGE_CACHE_SHIFT; | 872 | index = *ppos >> PAGE_CACHE_SHIFT; |
880 | prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT; | 873 | prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT; |
881 | prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1); | 874 | prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1); |
@@ -1031,23 +1024,20 @@ no_cached_page: | |||
1031 | * Ok, it wasn't cached, so we need to create a new | 1024 | * Ok, it wasn't cached, so we need to create a new |
1032 | * page.. | 1025 | * page.. |
1033 | */ | 1026 | */ |
1034 | if (!cached_page) { | 1027 | page = page_cache_alloc_cold(mapping); |
1035 | cached_page = page_cache_alloc_cold(mapping); | 1028 | if (!page) { |
1036 | if (!cached_page) { | 1029 | desc->error = -ENOMEM; |
1037 | desc->error = -ENOMEM; | 1030 | goto out; |
1038 | goto out; | ||
1039 | } | ||
1040 | } | 1031 | } |
1041 | error = add_to_page_cache_lru(cached_page, mapping, | 1032 | error = add_to_page_cache_lru(page, mapping, |
1042 | index, GFP_KERNEL); | 1033 | index, GFP_KERNEL); |
1043 | if (error) { | 1034 | if (error) { |
1035 | page_cache_release(page); | ||
1044 | if (error == -EEXIST) | 1036 | if (error == -EEXIST) |
1045 | goto find_page; | 1037 | goto find_page; |
1046 | desc->error = error; | 1038 | desc->error = error; |
1047 | goto out; | 1039 | goto out; |
1048 | } | 1040 | } |
1049 | page = cached_page; | ||
1050 | cached_page = NULL; | ||
1051 | goto readpage; | 1041 | goto readpage; |
1052 | } | 1042 | } |
1053 | 1043 | ||
@@ -1057,8 +1047,6 @@ out: | |||
1057 | ra->prev_pos |= prev_offset; | 1047 | ra->prev_pos |= prev_offset; |
1058 | 1048 | ||
1059 | *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset; | 1049 | *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset; |
1060 | if (cached_page) | ||
1061 | page_cache_release(cached_page); | ||
1062 | if (filp) | 1050 | if (filp) |
1063 | file_accessed(filp); | 1051 | file_accessed(filp); |
1064 | } | 1052 | } |
@@ -1502,35 +1490,28 @@ static struct page *__read_cache_page(struct address_space *mapping, | |||
1502 | int (*filler)(void *,struct page*), | 1490 | int (*filler)(void *,struct page*), |
1503 | void *data) | 1491 | void *data) |
1504 | { | 1492 | { |
1505 | struct page *page, *cached_page = NULL; | 1493 | struct page *page; |
1506 | int err; | 1494 | int err; |
1507 | repeat: | 1495 | repeat: |
1508 | page = find_get_page(mapping, index); | 1496 | page = find_get_page(mapping, index); |
1509 | if (!page) { | 1497 | if (!page) { |
1510 | if (!cached_page) { | 1498 | page = page_cache_alloc_cold(mapping); |
1511 | cached_page = page_cache_alloc_cold(mapping); | 1499 | if (!page) |
1512 | if (!cached_page) | 1500 | return ERR_PTR(-ENOMEM); |
1513 | return ERR_PTR(-ENOMEM); | 1501 | err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); |
1514 | } | 1502 | if (unlikely(err)) { |
1515 | err = add_to_page_cache_lru(cached_page, mapping, | 1503 | page_cache_release(page); |
1516 | index, GFP_KERNEL); | 1504 | if (err == -EEXIST) |
1517 | if (err == -EEXIST) | 1505 | goto repeat; |
1518 | goto repeat; | ||
1519 | if (err < 0) { | ||
1520 | /* Presumably ENOMEM for radix tree node */ | 1506 | /* Presumably ENOMEM for radix tree node */ |
1521 | page_cache_release(cached_page); | ||
1522 | return ERR_PTR(err); | 1507 | return ERR_PTR(err); |
1523 | } | 1508 | } |
1524 | page = cached_page; | ||
1525 | cached_page = NULL; | ||
1526 | err = filler(data, page); | 1509 | err = filler(data, page); |
1527 | if (err < 0) { | 1510 | if (err < 0) { |
1528 | page_cache_release(page); | 1511 | page_cache_release(page); |
1529 | page = ERR_PTR(err); | 1512 | page = ERR_PTR(err); |
1530 | } | 1513 | } |
1531 | } | 1514 | } |
1532 | if (cached_page) | ||
1533 | page_cache_release(cached_page); | ||
1534 | return page; | 1515 | return page; |
1535 | } | 1516 | } |
1536 | 1517 | ||
@@ -1607,40 +1588,6 @@ struct page *read_cache_page(struct address_space *mapping, | |||
1607 | EXPORT_SYMBOL(read_cache_page); | 1588 | EXPORT_SYMBOL(read_cache_page); |
1608 | 1589 | ||
1609 | /* | 1590 | /* |
1610 | * If the page was newly created, increment its refcount and add it to the | ||
1611 | * caller's lru-buffering pagevec. This function is specifically for | ||
1612 | * generic_file_write(). | ||
1613 | */ | ||
1614 | static inline struct page * | ||
1615 | __grab_cache_page(struct address_space *mapping, unsigned long index, | ||
1616 | struct page **cached_page, struct pagevec *lru_pvec) | ||
1617 | { | ||
1618 | int err; | ||
1619 | struct page *page; | ||
1620 | repeat: | ||
1621 | page = find_lock_page(mapping, index); | ||
1622 | if (!page) { | ||
1623 | if (!*cached_page) { | ||
1624 | *cached_page = page_cache_alloc(mapping); | ||
1625 | if (!*cached_page) | ||
1626 | return NULL; | ||
1627 | } | ||
1628 | err = add_to_page_cache(*cached_page, mapping, | ||
1629 | index, GFP_KERNEL); | ||
1630 | if (err == -EEXIST) | ||
1631 | goto repeat; | ||
1632 | if (err == 0) { | ||
1633 | page = *cached_page; | ||
1634 | page_cache_get(page); | ||
1635 | if (!pagevec_add(lru_pvec, page)) | ||
1636 | __pagevec_lru_add(lru_pvec); | ||
1637 | *cached_page = NULL; | ||
1638 | } | ||
1639 | } | ||
1640 | return page; | ||
1641 | } | ||
1642 | |||
1643 | /* | ||
1644 | * The logic we want is | 1591 | * The logic we want is |
1645 | * | 1592 | * |
1646 | * if suid or (sgid and xgrp) | 1593 | * if suid or (sgid and xgrp) |
@@ -1832,6 +1779,33 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
1832 | } | 1779 | } |
1833 | EXPORT_SYMBOL(generic_file_direct_write); | 1780 | EXPORT_SYMBOL(generic_file_direct_write); |
1834 | 1781 | ||
1782 | /* | ||
1783 | * Find or create a page at the given pagecache position. Return the locked | ||
1784 | * page. This function is specifically for buffered writes. | ||
1785 | */ | ||
1786 | static struct page *__grab_cache_page(struct address_space *mapping, | ||
1787 | pgoff_t index) | ||
1788 | { | ||
1789 | int status; | ||
1790 | struct page *page; | ||
1791 | repeat: | ||
1792 | page = find_lock_page(mapping, index); | ||
1793 | if (likely(page)) | ||
1794 | return page; | ||
1795 | |||
1796 | page = page_cache_alloc(mapping); | ||
1797 | if (!page) | ||
1798 | return NULL; | ||
1799 | status = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); | ||
1800 | if (unlikely(status)) { | ||
1801 | page_cache_release(page); | ||
1802 | if (status == -EEXIST) | ||
1803 | goto repeat; | ||
1804 | return NULL; | ||
1805 | } | ||
1806 | return page; | ||
1807 | } | ||
1808 | |||
1835 | ssize_t | 1809 | ssize_t |
1836 | generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | 1810 | generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, |
1837 | unsigned long nr_segs, loff_t pos, loff_t *ppos, | 1811 | unsigned long nr_segs, loff_t pos, loff_t *ppos, |
@@ -1842,15 +1816,10 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
1842 | const struct address_space_operations *a_ops = mapping->a_ops; | 1816 | const struct address_space_operations *a_ops = mapping->a_ops; |
1843 | struct inode *inode = mapping->host; | 1817 | struct inode *inode = mapping->host; |
1844 | long status = 0; | 1818 | long status = 0; |
1845 | struct page *page; | ||
1846 | struct page *cached_page = NULL; | ||
1847 | struct pagevec lru_pvec; | ||
1848 | const struct iovec *cur_iov = iov; /* current iovec */ | 1819 | const struct iovec *cur_iov = iov; /* current iovec */ |
1849 | size_t iov_offset = 0; /* offset in the current iovec */ | 1820 | size_t iov_offset = 0; /* offset in the current iovec */ |
1850 | char __user *buf; | 1821 | char __user *buf; |
1851 | 1822 | ||
1852 | pagevec_init(&lru_pvec, 0); | ||
1853 | |||
1854 | /* | 1823 | /* |
1855 | * handle partial DIO write. Adjust cur_iov if needed. | 1824 | * handle partial DIO write. Adjust cur_iov if needed. |
1856 | */ | 1825 | */ |
@@ -1862,6 +1831,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
1862 | } | 1831 | } |
1863 | 1832 | ||
1864 | do { | 1833 | do { |
1834 | struct page *page; | ||
1865 | pgoff_t index; /* Pagecache index for current page */ | 1835 | pgoff_t index; /* Pagecache index for current page */ |
1866 | unsigned long offset; /* Offset into pagecache page */ | 1836 | unsigned long offset; /* Offset into pagecache page */ |
1867 | unsigned long maxlen; /* Bytes remaining in current iovec */ | 1837 | unsigned long maxlen; /* Bytes remaining in current iovec */ |
@@ -1888,7 +1858,8 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, | |||
1888 | fault_in_pages_readable(buf, maxlen); | 1858 | fault_in_pages_readable(buf, maxlen); |
1889 | #endif | 1859 | #endif |
1890 | 1860 | ||
1891 | page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); | 1861 | |
1862 | page = __grab_cache_page(mapping, index); | ||
1892 | if (!page) { | 1863 | if (!page) { |
1893 | status = -ENOMEM; | 1864 | status = -ENOMEM; |
1894 | break; | 1865 | break; |
@@ -1956,9 +1927,6 @@ fs_write_aop_error: | |||
1956 | } while (count); | 1927 | } while (count); |
1957 | *ppos = pos; | 1928 | *ppos = pos; |
1958 | 1929 | ||
1959 | if (cached_page) | ||
1960 | page_cache_release(cached_page); | ||
1961 | |||
1962 | /* | 1930 | /* |
1963 | * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC | 1931 | * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC |
1964 | */ | 1932 | */ |
@@ -1978,7 +1946,6 @@ fs_write_aop_error: | |||
1978 | if (unlikely(file->f_flags & O_DIRECT) && written) | 1946 | if (unlikely(file->f_flags & O_DIRECT) && written) |
1979 | status = filemap_write_and_wait(mapping); | 1947 | status = filemap_write_and_wait(mapping); |
1980 | 1948 | ||
1981 | pagevec_lru_add(&lru_pvec); | ||
1982 | return written ? written : status; | 1949 | return written ? written : status; |
1983 | } | 1950 | } |
1984 | EXPORT_SYMBOL(generic_file_buffered_write); | 1951 | EXPORT_SYMBOL(generic_file_buffered_write); |
diff --git a/mm/readahead.c b/mm/readahead.c index c5c8981469e7..229788884010 100644 --- a/mm/readahead.c +++ b/mm/readahead.c | |||
@@ -58,28 +58,25 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages, | |||
58 | int (*filler)(void *, struct page *), void *data) | 58 | int (*filler)(void *, struct page *), void *data) |
59 | { | 59 | { |
60 | struct page *page; | 60 | struct page *page; |
61 | struct pagevec lru_pvec; | ||
62 | int ret = 0; | 61 | int ret = 0; |
63 | 62 | ||
64 | pagevec_init(&lru_pvec, 0); | ||
65 | |||
66 | while (!list_empty(pages)) { | 63 | while (!list_empty(pages)) { |
67 | page = list_to_page(pages); | 64 | page = list_to_page(pages); |
68 | list_del(&page->lru); | 65 | list_del(&page->lru); |
69 | if (add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) { | 66 | if (add_to_page_cache_lru(page, mapping, |
67 | page->index, GFP_KERNEL)) { | ||
70 | page_cache_release(page); | 68 | page_cache_release(page); |
71 | continue; | 69 | continue; |
72 | } | 70 | } |
71 | page_cache_release(page); | ||
72 | |||
73 | ret = filler(data, page); | 73 | ret = filler(data, page); |
74 | if (!pagevec_add(&lru_pvec, page)) | 74 | if (unlikely(ret)) { |
75 | __pagevec_lru_add(&lru_pvec); | ||
76 | if (ret) { | ||
77 | put_pages_list(pages); | 75 | put_pages_list(pages); |
78 | break; | 76 | break; |
79 | } | 77 | } |
80 | task_io_account_read(PAGE_CACHE_SIZE); | 78 | task_io_account_read(PAGE_CACHE_SIZE); |
81 | } | 79 | } |
82 | pagevec_lru_add(&lru_pvec); | ||
83 | return ret; | 80 | return ret; |
84 | } | 81 | } |
85 | 82 | ||
@@ -89,7 +86,6 @@ static int read_pages(struct address_space *mapping, struct file *filp, | |||
89 | struct list_head *pages, unsigned nr_pages) | 86 | struct list_head *pages, unsigned nr_pages) |
90 | { | 87 | { |
91 | unsigned page_idx; | 88 | unsigned page_idx; |
92 | struct pagevec lru_pvec; | ||
93 | int ret; | 89 | int ret; |
94 | 90 | ||
95 | if (mapping->a_ops->readpages) { | 91 | if (mapping->a_ops->readpages) { |
@@ -99,19 +95,15 @@ static int read_pages(struct address_space *mapping, struct file *filp, | |||
99 | goto out; | 95 | goto out; |
100 | } | 96 | } |
101 | 97 | ||
102 | pagevec_init(&lru_pvec, 0); | ||
103 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | 98 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { |
104 | struct page *page = list_to_page(pages); | 99 | struct page *page = list_to_page(pages); |
105 | list_del(&page->lru); | 100 | list_del(&page->lru); |
106 | if (!add_to_page_cache(page, mapping, | 101 | if (!add_to_page_cache_lru(page, mapping, |
107 | page->index, GFP_KERNEL)) { | 102 | page->index, GFP_KERNEL)) { |
108 | mapping->a_ops->readpage(filp, page); | 103 | mapping->a_ops->readpage(filp, page); |
109 | if (!pagevec_add(&lru_pvec, page)) | 104 | } |
110 | __pagevec_lru_add(&lru_pvec); | 105 | page_cache_release(page); |
111 | } else | ||
112 | page_cache_release(page); | ||
113 | } | 106 | } |
114 | pagevec_lru_add(&lru_pvec); | ||
115 | ret = 0; | 107 | ret = 0; |
116 | out: | 108 | out: |
117 | return ret; | 109 | return ret; |