aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2007-10-16 04:24:57 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-16 12:42:54 -0400
commiteb2be189317d031895b5ca534fbf735eb546158b (patch)
tree8f1eda7af3be7285244a6f1ad77682e90d403c7b
parent64649a58919e66ec21792dbb6c48cb3da22cbd7f (diff)
mm: buffered write cleanup
Quite a bit of code is used in maintaining these "cached pages" that are probably pretty unlikely to get used. It would require a narrow race where the page is inserted concurrently while this process is allocating a page in order to create the spare page. Then a multi-page write into an uncached part of the file, to make use of it. Next, the buffered write path (and others) uses its own LRU pagevec when it should be just using the per-CPU LRU pagevec (which will cut down on both data and code size cacheline footprint). Also, these private LRU pagevecs are emptied after just a very short time, in contrast with the per-CPU pagevecs that are persistent. Net result: 7.3 times fewer lru_lock acquisitions required to add the pages to pagecache for a bulk write (in 4K chunks). [this gets rid of some cond_resched() calls in readahead.c and mpage.c due to clashes in -mm. What put them there, and why? ] Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/mpage.c10
-rw-r--r--mm/filemap.c143
-rw-r--r--mm/readahead.c24
3 files changed, 65 insertions, 112 deletions
diff --git a/fs/mpage.c b/fs/mpage.c
index b1c3e5890508..d54f8f897224 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -379,31 +379,25 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
379 struct bio *bio = NULL; 379 struct bio *bio = NULL;
380 unsigned page_idx; 380 unsigned page_idx;
381 sector_t last_block_in_bio = 0; 381 sector_t last_block_in_bio = 0;
382 struct pagevec lru_pvec;
383 struct buffer_head map_bh; 382 struct buffer_head map_bh;
384 unsigned long first_logical_block = 0; 383 unsigned long first_logical_block = 0;
385 384
386 clear_buffer_mapped(&map_bh); 385 clear_buffer_mapped(&map_bh);
387 pagevec_init(&lru_pvec, 0);
388 for (page_idx = 0; page_idx < nr_pages; page_idx++) { 386 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
389 struct page *page = list_entry(pages->prev, struct page, lru); 387 struct page *page = list_entry(pages->prev, struct page, lru);
390 388
391 prefetchw(&page->flags); 389 prefetchw(&page->flags);
392 list_del(&page->lru); 390 list_del(&page->lru);
393 if (!add_to_page_cache(page, mapping, 391 if (!add_to_page_cache_lru(page, mapping,
394 page->index, GFP_KERNEL)) { 392 page->index, GFP_KERNEL)) {
395 bio = do_mpage_readpage(bio, page, 393 bio = do_mpage_readpage(bio, page,
396 nr_pages - page_idx, 394 nr_pages - page_idx,
397 &last_block_in_bio, &map_bh, 395 &last_block_in_bio, &map_bh,
398 &first_logical_block, 396 &first_logical_block,
399 get_block); 397 get_block);
400 if (!pagevec_add(&lru_pvec, page))
401 __pagevec_lru_add(&lru_pvec);
402 } else {
403 page_cache_release(page);
404 } 398 }
399 page_cache_release(page);
405 } 400 }
406 pagevec_lru_add(&lru_pvec);
407 BUG_ON(!list_empty(pages)); 401 BUG_ON(!list_empty(pages));
408 if (bio) 402 if (bio)
409 mpage_bio_submit(READ, bio); 403 mpage_bio_submit(READ, bio);
diff --git a/mm/filemap.c b/mm/filemap.c
index 73b98c6a3389..fb4c1c0792e7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -666,27 +666,22 @@ EXPORT_SYMBOL(find_lock_page);
666struct page *find_or_create_page(struct address_space *mapping, 666struct page *find_or_create_page(struct address_space *mapping,
667 pgoff_t index, gfp_t gfp_mask) 667 pgoff_t index, gfp_t gfp_mask)
668{ 668{
669 struct page *page, *cached_page = NULL; 669 struct page *page;
670 int err; 670 int err;
671repeat: 671repeat:
672 page = find_lock_page(mapping, index); 672 page = find_lock_page(mapping, index);
673 if (!page) { 673 if (!page) {
674 if (!cached_page) { 674 page = __page_cache_alloc(gfp_mask);
675 cached_page = 675 if (!page)
676 __page_cache_alloc(gfp_mask); 676 return NULL;
677 if (!cached_page) 677 err = add_to_page_cache_lru(page, mapping, index, gfp_mask);
678 return NULL; 678 if (unlikely(err)) {
679 page_cache_release(page);
680 page = NULL;
681 if (err == -EEXIST)
682 goto repeat;
679 } 683 }
680 err = add_to_page_cache_lru(cached_page, mapping,
681 index, gfp_mask);
682 if (!err) {
683 page = cached_page;
684 cached_page = NULL;
685 } else if (err == -EEXIST)
686 goto repeat;
687 } 684 }
688 if (cached_page)
689 page_cache_release(cached_page);
690 return page; 685 return page;
691} 686}
692EXPORT_SYMBOL(find_or_create_page); 687EXPORT_SYMBOL(find_or_create_page);
@@ -872,10 +867,8 @@ void do_generic_mapping_read(struct address_space *mapping,
872 pgoff_t prev_index; 867 pgoff_t prev_index;
873 unsigned long offset; /* offset into pagecache page */ 868 unsigned long offset; /* offset into pagecache page */
874 unsigned int prev_offset; 869 unsigned int prev_offset;
875 struct page *cached_page;
876 int error; 870 int error;
877 871
878 cached_page = NULL;
879 index = *ppos >> PAGE_CACHE_SHIFT; 872 index = *ppos >> PAGE_CACHE_SHIFT;
880 prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT; 873 prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT;
881 prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1); 874 prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1);
@@ -1031,23 +1024,20 @@ no_cached_page:
1031 * Ok, it wasn't cached, so we need to create a new 1024 * Ok, it wasn't cached, so we need to create a new
1032 * page.. 1025 * page..
1033 */ 1026 */
1034 if (!cached_page) { 1027 page = page_cache_alloc_cold(mapping);
1035 cached_page = page_cache_alloc_cold(mapping); 1028 if (!page) {
1036 if (!cached_page) { 1029 desc->error = -ENOMEM;
1037 desc->error = -ENOMEM; 1030 goto out;
1038 goto out;
1039 }
1040 } 1031 }
1041 error = add_to_page_cache_lru(cached_page, mapping, 1032 error = add_to_page_cache_lru(page, mapping,
1042 index, GFP_KERNEL); 1033 index, GFP_KERNEL);
1043 if (error) { 1034 if (error) {
1035 page_cache_release(page);
1044 if (error == -EEXIST) 1036 if (error == -EEXIST)
1045 goto find_page; 1037 goto find_page;
1046 desc->error = error; 1038 desc->error = error;
1047 goto out; 1039 goto out;
1048 } 1040 }
1049 page = cached_page;
1050 cached_page = NULL;
1051 goto readpage; 1041 goto readpage;
1052 } 1042 }
1053 1043
@@ -1057,8 +1047,6 @@ out:
1057 ra->prev_pos |= prev_offset; 1047 ra->prev_pos |= prev_offset;
1058 1048
1059 *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset; 1049 *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;
1060 if (cached_page)
1061 page_cache_release(cached_page);
1062 if (filp) 1050 if (filp)
1063 file_accessed(filp); 1051 file_accessed(filp);
1064} 1052}
@@ -1502,35 +1490,28 @@ static struct page *__read_cache_page(struct address_space *mapping,
1502 int (*filler)(void *,struct page*), 1490 int (*filler)(void *,struct page*),
1503 void *data) 1491 void *data)
1504{ 1492{
1505 struct page *page, *cached_page = NULL; 1493 struct page *page;
1506 int err; 1494 int err;
1507repeat: 1495repeat:
1508 page = find_get_page(mapping, index); 1496 page = find_get_page(mapping, index);
1509 if (!page) { 1497 if (!page) {
1510 if (!cached_page) { 1498 page = page_cache_alloc_cold(mapping);
1511 cached_page = page_cache_alloc_cold(mapping); 1499 if (!page)
1512 if (!cached_page) 1500 return ERR_PTR(-ENOMEM);
1513 return ERR_PTR(-ENOMEM); 1501 err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
1514 } 1502 if (unlikely(err)) {
1515 err = add_to_page_cache_lru(cached_page, mapping, 1503 page_cache_release(page);
1516 index, GFP_KERNEL); 1504 if (err == -EEXIST)
1517 if (err == -EEXIST) 1505 goto repeat;
1518 goto repeat;
1519 if (err < 0) {
1520 /* Presumably ENOMEM for radix tree node */ 1506 /* Presumably ENOMEM for radix tree node */
1521 page_cache_release(cached_page);
1522 return ERR_PTR(err); 1507 return ERR_PTR(err);
1523 } 1508 }
1524 page = cached_page;
1525 cached_page = NULL;
1526 err = filler(data, page); 1509 err = filler(data, page);
1527 if (err < 0) { 1510 if (err < 0) {
1528 page_cache_release(page); 1511 page_cache_release(page);
1529 page = ERR_PTR(err); 1512 page = ERR_PTR(err);
1530 } 1513 }
1531 } 1514 }
1532 if (cached_page)
1533 page_cache_release(cached_page);
1534 return page; 1515 return page;
1535} 1516}
1536 1517
@@ -1607,40 +1588,6 @@ struct page *read_cache_page(struct address_space *mapping,
1607EXPORT_SYMBOL(read_cache_page); 1588EXPORT_SYMBOL(read_cache_page);
1608 1589
1609/* 1590/*
1610 * If the page was newly created, increment its refcount and add it to the
1611 * caller's lru-buffering pagevec. This function is specifically for
1612 * generic_file_write().
1613 */
1614static inline struct page *
1615__grab_cache_page(struct address_space *mapping, unsigned long index,
1616 struct page **cached_page, struct pagevec *lru_pvec)
1617{
1618 int err;
1619 struct page *page;
1620repeat:
1621 page = find_lock_page(mapping, index);
1622 if (!page) {
1623 if (!*cached_page) {
1624 *cached_page = page_cache_alloc(mapping);
1625 if (!*cached_page)
1626 return NULL;
1627 }
1628 err = add_to_page_cache(*cached_page, mapping,
1629 index, GFP_KERNEL);
1630 if (err == -EEXIST)
1631 goto repeat;
1632 if (err == 0) {
1633 page = *cached_page;
1634 page_cache_get(page);
1635 if (!pagevec_add(lru_pvec, page))
1636 __pagevec_lru_add(lru_pvec);
1637 *cached_page = NULL;
1638 }
1639 }
1640 return page;
1641}
1642
1643/*
1644 * The logic we want is 1591 * The logic we want is
1645 * 1592 *
1646 * if suid or (sgid and xgrp) 1593 * if suid or (sgid and xgrp)
@@ -1832,6 +1779,33 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
1832} 1779}
1833EXPORT_SYMBOL(generic_file_direct_write); 1780EXPORT_SYMBOL(generic_file_direct_write);
1834 1781
1782/*
1783 * Find or create a page at the given pagecache position. Return the locked
1784 * page. This function is specifically for buffered writes.
1785 */
1786static struct page *__grab_cache_page(struct address_space *mapping,
1787 pgoff_t index)
1788{
1789 int status;
1790 struct page *page;
1791repeat:
1792 page = find_lock_page(mapping, index);
1793 if (likely(page))
1794 return page;
1795
1796 page = page_cache_alloc(mapping);
1797 if (!page)
1798 return NULL;
1799 status = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
1800 if (unlikely(status)) {
1801 page_cache_release(page);
1802 if (status == -EEXIST)
1803 goto repeat;
1804 return NULL;
1805 }
1806 return page;
1807}
1808
1835ssize_t 1809ssize_t
1836generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, 1810generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
1837 unsigned long nr_segs, loff_t pos, loff_t *ppos, 1811 unsigned long nr_segs, loff_t pos, loff_t *ppos,
@@ -1842,15 +1816,10 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
1842 const struct address_space_operations *a_ops = mapping->a_ops; 1816 const struct address_space_operations *a_ops = mapping->a_ops;
1843 struct inode *inode = mapping->host; 1817 struct inode *inode = mapping->host;
1844 long status = 0; 1818 long status = 0;
1845 struct page *page;
1846 struct page *cached_page = NULL;
1847 struct pagevec lru_pvec;
1848 const struct iovec *cur_iov = iov; /* current iovec */ 1819 const struct iovec *cur_iov = iov; /* current iovec */
1849 size_t iov_offset = 0; /* offset in the current iovec */ 1820 size_t iov_offset = 0; /* offset in the current iovec */
1850 char __user *buf; 1821 char __user *buf;
1851 1822
1852 pagevec_init(&lru_pvec, 0);
1853
1854 /* 1823 /*
1855 * handle partial DIO write. Adjust cur_iov if needed. 1824 * handle partial DIO write. Adjust cur_iov if needed.
1856 */ 1825 */
@@ -1862,6 +1831,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
1862 } 1831 }
1863 1832
1864 do { 1833 do {
1834 struct page *page;
1865 pgoff_t index; /* Pagecache index for current page */ 1835 pgoff_t index; /* Pagecache index for current page */
1866 unsigned long offset; /* Offset into pagecache page */ 1836 unsigned long offset; /* Offset into pagecache page */
1867 unsigned long maxlen; /* Bytes remaining in current iovec */ 1837 unsigned long maxlen; /* Bytes remaining in current iovec */
@@ -1888,7 +1858,8 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
1888 fault_in_pages_readable(buf, maxlen); 1858 fault_in_pages_readable(buf, maxlen);
1889#endif 1859#endif
1890 1860
1891 page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); 1861
1862 page = __grab_cache_page(mapping, index);
1892 if (!page) { 1863 if (!page) {
1893 status = -ENOMEM; 1864 status = -ENOMEM;
1894 break; 1865 break;
@@ -1956,9 +1927,6 @@ fs_write_aop_error:
1956 } while (count); 1927 } while (count);
1957 *ppos = pos; 1928 *ppos = pos;
1958 1929
1959 if (cached_page)
1960 page_cache_release(cached_page);
1961
1962 /* 1930 /*
1963 * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC 1931 * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC
1964 */ 1932 */
@@ -1978,7 +1946,6 @@ fs_write_aop_error:
1978 if (unlikely(file->f_flags & O_DIRECT) && written) 1946 if (unlikely(file->f_flags & O_DIRECT) && written)
1979 status = filemap_write_and_wait(mapping); 1947 status = filemap_write_and_wait(mapping);
1980 1948
1981 pagevec_lru_add(&lru_pvec);
1982 return written ? written : status; 1949 return written ? written : status;
1983} 1950}
1984EXPORT_SYMBOL(generic_file_buffered_write); 1951EXPORT_SYMBOL(generic_file_buffered_write);
diff --git a/mm/readahead.c b/mm/readahead.c
index c5c8981469e7..229788884010 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -58,28 +58,25 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
58 int (*filler)(void *, struct page *), void *data) 58 int (*filler)(void *, struct page *), void *data)
59{ 59{
60 struct page *page; 60 struct page *page;
61 struct pagevec lru_pvec;
62 int ret = 0; 61 int ret = 0;
63 62
64 pagevec_init(&lru_pvec, 0);
65
66 while (!list_empty(pages)) { 63 while (!list_empty(pages)) {
67 page = list_to_page(pages); 64 page = list_to_page(pages);
68 list_del(&page->lru); 65 list_del(&page->lru);
69 if (add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) { 66 if (add_to_page_cache_lru(page, mapping,
67 page->index, GFP_KERNEL)) {
70 page_cache_release(page); 68 page_cache_release(page);
71 continue; 69 continue;
72 } 70 }
71 page_cache_release(page);
72
73 ret = filler(data, page); 73 ret = filler(data, page);
74 if (!pagevec_add(&lru_pvec, page)) 74 if (unlikely(ret)) {
75 __pagevec_lru_add(&lru_pvec);
76 if (ret) {
77 put_pages_list(pages); 75 put_pages_list(pages);
78 break; 76 break;
79 } 77 }
80 task_io_account_read(PAGE_CACHE_SIZE); 78 task_io_account_read(PAGE_CACHE_SIZE);
81 } 79 }
82 pagevec_lru_add(&lru_pvec);
83 return ret; 80 return ret;
84} 81}
85 82
@@ -89,7 +86,6 @@ static int read_pages(struct address_space *mapping, struct file *filp,
89 struct list_head *pages, unsigned nr_pages) 86 struct list_head *pages, unsigned nr_pages)
90{ 87{
91 unsigned page_idx; 88 unsigned page_idx;
92 struct pagevec lru_pvec;
93 int ret; 89 int ret;
94 90
95 if (mapping->a_ops->readpages) { 91 if (mapping->a_ops->readpages) {
@@ -99,19 +95,15 @@ static int read_pages(struct address_space *mapping, struct file *filp,
99 goto out; 95 goto out;
100 } 96 }
101 97
102 pagevec_init(&lru_pvec, 0);
103 for (page_idx = 0; page_idx < nr_pages; page_idx++) { 98 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
104 struct page *page = list_to_page(pages); 99 struct page *page = list_to_page(pages);
105 list_del(&page->lru); 100 list_del(&page->lru);
106 if (!add_to_page_cache(page, mapping, 101 if (!add_to_page_cache_lru(page, mapping,
107 page->index, GFP_KERNEL)) { 102 page->index, GFP_KERNEL)) {
108 mapping->a_ops->readpage(filp, page); 103 mapping->a_ops->readpage(filp, page);
109 if (!pagevec_add(&lru_pvec, page)) 104 }
110 __pagevec_lru_add(&lru_pvec); 105 page_cache_release(page);
111 } else
112 page_cache_release(page);
113 } 106 }
114 pagevec_lru_add(&lru_pvec);
115 ret = 0; 107 ret = 0;
116out: 108out:
117 return ret; 109 return ret;