aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoman Gushchin <klamm@yandex-team.ru>2015-11-05 21:47:08 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-05 22:34:48 -0500
commit600e19afc5f8a6c18ea49cee9511c5797db02391 (patch)
tree9ac5a5559a12e64f1f7653a9a4155623d7092690
parentb171e4093017d4d6e411f5e97823e5e4a21266a2 (diff)
mm: use only per-device readahead limit
Maximal readahead size is limited now by two values: 1) by global 2Mb constant (MAX_READAHEAD in max_sane_readahead()) 2) by configurable per-device value* (bdi->ra_pages) There are devices, which require custom readahead limit. For instance, for RAIDs it's calculated as number of devices multiplied by chunk size times 2. Readahead size can never be larger than bdi->ra_pages * 2 value (POSIX_FADV_SEQUNTIAL doubles readahead size). If so, why do we need two limits? I suggest to completely remove this max_sane_readahead() stuff and use per-device readahead limit everywhere. Also, using right readahead size for RAID disks can significantly increase i/o performance: before: dd if=/dev/md2 of=/dev/null bs=100M count=100 100+0 records in 100+0 records out 10485760000 bytes (10 GB) copied, 12.9741 s, 808 MB/s after: $ dd if=/dev/md2 of=/dev/null bs=100M count=100 100+0 records in 100+0 records out 10485760000 bytes (10 GB) copied, 8.91317 s, 1.2 GB/s (It's an 8-disks RAID5 storage). This patch doesn't change sys_readahead and madvise(MADV_WILLNEED) behavior introduced by 6d2be915e589b58 ("mm/readahead.c: fix readahead failure for memoryless NUMA nodes and limit readahead pages"). Signed-off-by: Roman Gushchin <klamm@yandex-team.ru> Cc: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com> Cc: Jan Kara <jack@suse.cz> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: David Rientjes <rientjes@google.com> Cc: onstantin Khlebnikov <khlebnikov@yandex-team.ru> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/mm.h2
-rw-r--r--mm/filemap.c8
-rw-r--r--mm/readahead.c14
3 files changed, 5 insertions, 19 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 80001de019ba..fa08f3cf0f22 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2036,8 +2036,6 @@ void page_cache_async_readahead(struct address_space *mapping,
2036 pgoff_t offset, 2036 pgoff_t offset,
2037 unsigned long size); 2037 unsigned long size);
2038 2038
2039unsigned long max_sane_readahead(unsigned long nr);
2040
2041/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ 2039/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
2042extern int expand_stack(struct vm_area_struct *vma, unsigned long address); 2040extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
2043 2041
diff --git a/mm/filemap.c b/mm/filemap.c
index 327910c2400c..1fe962b49f31 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1807,7 +1807,6 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma,
1807 struct file *file, 1807 struct file *file,
1808 pgoff_t offset) 1808 pgoff_t offset)
1809{ 1809{
1810 unsigned long ra_pages;
1811 struct address_space *mapping = file->f_mapping; 1810 struct address_space *mapping = file->f_mapping;
1812 1811
1813 /* If we don't want any read-ahead, don't bother */ 1812 /* If we don't want any read-ahead, don't bother */
@@ -1836,10 +1835,9 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma,
1836 /* 1835 /*
1837 * mmap read-around 1836 * mmap read-around
1838 */ 1837 */
1839 ra_pages = max_sane_readahead(ra->ra_pages); 1838 ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
1840 ra->start = max_t(long, 0, offset - ra_pages / 2); 1839 ra->size = ra->ra_pages;
1841 ra->size = ra_pages; 1840 ra->async_size = ra->ra_pages / 4;
1842 ra->async_size = ra_pages / 4;
1843 ra_submit(ra, mapping, file); 1841 ra_submit(ra, mapping, file);
1844} 1842}
1845 1843
diff --git a/mm/readahead.c b/mm/readahead.c
index 24682f6f4cfd..998ad592f408 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -213,7 +213,7 @@ int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
213 if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages)) 213 if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages))
214 return -EINVAL; 214 return -EINVAL;
215 215
216 nr_to_read = max_sane_readahead(nr_to_read); 216 nr_to_read = min(nr_to_read, inode_to_bdi(mapping->host)->ra_pages);
217 while (nr_to_read) { 217 while (nr_to_read) {
218 int err; 218 int err;
219 219
@@ -232,16 +232,6 @@ int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
232 return 0; 232 return 0;
233} 233}
234 234
235#define MAX_READAHEAD ((512*4096)/PAGE_CACHE_SIZE)
236/*
237 * Given a desired number of PAGE_CACHE_SIZE readahead pages, return a
238 * sensible upper limit.
239 */
240unsigned long max_sane_readahead(unsigned long nr)
241{
242 return min(nr, MAX_READAHEAD);
243}
244
245/* 235/*
246 * Set the initial window size, round to next power of 2 and square 236 * Set the initial window size, round to next power of 2 and square
247 * for small size, x 4 for medium, and x 2 for large 237 * for small size, x 4 for medium, and x 2 for large
@@ -380,7 +370,7 @@ ondemand_readahead(struct address_space *mapping,
380 bool hit_readahead_marker, pgoff_t offset, 370 bool hit_readahead_marker, pgoff_t offset,
381 unsigned long req_size) 371 unsigned long req_size)
382{ 372{
383 unsigned long max = max_sane_readahead(ra->ra_pages); 373 unsigned long max = ra->ra_pages;
384 pgoff_t prev_offset; 374 pgoff_t prev_offset;
385 375
386 /* 376 /*