diff options
-rw-r--r-- | include/linux/mm.h | 6 | ||||
-rw-r--r-- | mm/readahead.c | 174 |
2 files changed, 180 insertions, 0 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index f8e12b3b6110..619c0e80cf0c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1138,6 +1138,12 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp, | |||
1138 | pgoff_t offset, unsigned long nr_to_read); | 1138 | pgoff_t offset, unsigned long nr_to_read); |
1139 | int force_page_cache_readahead(struct address_space *mapping, struct file *filp, | 1139 | int force_page_cache_readahead(struct address_space *mapping, struct file *filp, |
1140 | pgoff_t offset, unsigned long nr_to_read); | 1140 | pgoff_t offset, unsigned long nr_to_read); |
1141 | unsigned long page_cache_readahead_ondemand(struct address_space *mapping, | ||
1142 | struct file_ra_state *ra, | ||
1143 | struct file *filp, | ||
1144 | struct page *page, | ||
1145 | pgoff_t offset, | ||
1146 | unsigned long size); | ||
1141 | unsigned long page_cache_readahead(struct address_space *mapping, | 1147 | unsigned long page_cache_readahead(struct address_space *mapping, |
1142 | struct file_ra_state *ra, | 1148 | struct file_ra_state *ra, |
1143 | struct file *filp, | 1149 | struct file *filp, |
diff --git a/mm/readahead.c b/mm/readahead.c index 072ce8f8357d..c094e4f5a250 100644 --- a/mm/readahead.c +++ b/mm/readahead.c | |||
@@ -611,3 +611,177 @@ unsigned long ra_submit(struct file_ra_state *ra, | |||
611 | return actual; | 611 | return actual; |
612 | } | 612 | } |
613 | EXPORT_SYMBOL_GPL(ra_submit); | 613 | EXPORT_SYMBOL_GPL(ra_submit); |
614 | |||
615 | /* | ||
616 | * Get the previous window size, ramp it up, and | ||
617 | * return it as the new window size. | ||
618 | */ | ||
619 | static unsigned long get_next_ra_size2(struct file_ra_state *ra, | ||
620 | unsigned long max) | ||
621 | { | ||
622 | unsigned long cur = ra->readahead_index - ra->ra_index; | ||
623 | unsigned long newsize; | ||
624 | |||
625 | if (cur < max / 16) | ||
626 | newsize = cur * 4; | ||
627 | else | ||
628 | newsize = cur * 2; | ||
629 | |||
630 | return min(newsize, max); | ||
631 | } | ||
632 | |||
633 | /* | ||
634 | * On-demand readahead design. | ||
635 | * | ||
636 | * The fields in struct file_ra_state represent the most-recently-executed | ||
637 | * readahead attempt: | ||
638 | * | ||
639 | * |-------- last readahead window -------->| | ||
640 | * |-- application walking here -->| | ||
641 | * ======#============|==================#=====================| | ||
642 | * ^la_index ^ra_index ^lookahead_index ^readahead_index | ||
643 | * | ||
644 | * [ra_index, readahead_index) represents the last readahead window. | ||
645 | * | ||
646 | * [la_index, lookahead_index] is where the application would be walking(in | ||
647 | * the common case of cache-cold sequential reads): the last window was | ||
648 | * established when the application was at la_index, and the next window will | ||
649 | * be bring in when the application reaches lookahead_index. | ||
650 | * | ||
651 | * To overlap application thinking time and disk I/O time, we do | ||
652 | * `readahead pipelining': Do not wait until the application consumed all | ||
653 | * readahead pages and stalled on the missing page at readahead_index; | ||
654 | * Instead, submit an asynchronous readahead I/O as early as the application | ||
655 | * reads on the page at lookahead_index. Normally lookahead_index will be | ||
656 | * equal to ra_index, for maximum pipelining. | ||
657 | * | ||
658 | * In interleaved sequential reads, concurrent streams on the same fd can | ||
659 | * be invalidating each other's readahead state. So we flag the new readahead | ||
660 | * page at lookahead_index with PG_readahead, and use it as readahead | ||
661 | * indicator. The flag won't be set on already cached pages, to avoid the | ||
662 | * readahead-for-nothing fuss, saving pointless page cache lookups. | ||
663 | * | ||
664 | * prev_index tracks the last visited page in the _previous_ read request. | ||
665 | * It should be maintained by the caller, and will be used for detecting | ||
666 | * small random reads. Note that the readahead algorithm checks loosely | ||
667 | * for sequential patterns. Hence interleaved reads might be served as | ||
668 | * sequential ones. | ||
669 | * | ||
670 | * There is a special-case: if the first page which the application tries to | ||
671 | * read happens to be the first page of the file, it is assumed that a linear | ||
672 | * read is about to happen and the window is immediately set to the initial size | ||
673 | * based on I/O request size and the max_readahead. | ||
674 | * | ||
675 | * The code ramps up the readahead size aggressively at first, but slow down as | ||
676 | * it approaches max_readhead. | ||
677 | */ | ||
678 | |||
679 | /* | ||
680 | * A minimal readahead algorithm for trivial sequential/random reads. | ||
681 | */ | ||
682 | static unsigned long | ||
683 | ondemand_readahead(struct address_space *mapping, | ||
684 | struct file_ra_state *ra, struct file *filp, | ||
685 | struct page *page, pgoff_t offset, | ||
686 | unsigned long req_size) | ||
687 | { | ||
688 | unsigned long max; /* max readahead pages */ | ||
689 | pgoff_t ra_index; /* readahead index */ | ||
690 | unsigned long ra_size; /* readahead size */ | ||
691 | unsigned long la_size; /* lookahead size */ | ||
692 | int sequential; | ||
693 | |||
694 | max = ra->ra_pages; | ||
695 | sequential = (offset - ra->prev_index <= 1UL) || (req_size > max); | ||
696 | |||
697 | /* | ||
698 | * Lookahead/readahead hit, assume sequential access. | ||
699 | * Ramp up sizes, and push forward the readahead window. | ||
700 | */ | ||
701 | if (offset && (offset == ra->lookahead_index || | ||
702 | offset == ra->readahead_index)) { | ||
703 | ra_index = ra->readahead_index; | ||
704 | ra_size = get_next_ra_size2(ra, max); | ||
705 | la_size = ra_size; | ||
706 | goto fill_ra; | ||
707 | } | ||
708 | |||
709 | /* | ||
710 | * Standalone, small read. | ||
711 | * Read as is, and do not pollute the readahead state. | ||
712 | */ | ||
713 | if (!page && !sequential) { | ||
714 | return __do_page_cache_readahead(mapping, filp, | ||
715 | offset, req_size, 0); | ||
716 | } | ||
717 | |||
718 | /* | ||
719 | * It may be one of | ||
720 | * - first read on start of file | ||
721 | * - sequential cache miss | ||
722 | * - oversize random read | ||
723 | * Start readahead for it. | ||
724 | */ | ||
725 | ra_index = offset; | ||
726 | ra_size = get_init_ra_size(req_size, max); | ||
727 | la_size = ra_size > req_size ? ra_size - req_size : ra_size; | ||
728 | |||
729 | /* | ||
730 | * Hit on a lookahead page without valid readahead state. | ||
731 | * E.g. interleaved reads. | ||
732 | * Not knowing its readahead pos/size, bet on the minimal possible one. | ||
733 | */ | ||
734 | if (page) { | ||
735 | ra_index++; | ||
736 | ra_size = min(4 * ra_size, max); | ||
737 | } | ||
738 | |||
739 | fill_ra: | ||
740 | ra_set_index(ra, offset, ra_index); | ||
741 | ra_set_size(ra, ra_size, la_size); | ||
742 | |||
743 | return ra_submit(ra, mapping, filp); | ||
744 | } | ||
745 | |||
746 | /** | ||
747 | * page_cache_readahead_ondemand - generic file readahead | ||
748 | * @mapping: address_space which holds the pagecache and I/O vectors | ||
749 | * @ra: file_ra_state which holds the readahead state | ||
750 | * @filp: passed on to ->readpage() and ->readpages() | ||
751 | * @page: the page at @offset, or NULL if non-present | ||
752 | * @offset: start offset into @mapping, in PAGE_CACHE_SIZE units | ||
753 | * @req_size: hint: total size of the read which the caller is performing in | ||
754 | * PAGE_CACHE_SIZE units | ||
755 | * | ||
756 | * page_cache_readahead_ondemand() is the entry point of readahead logic. | ||
757 | * This function should be called when it is time to perform readahead: | ||
758 | * 1) @page == NULL | ||
759 | * A cache miss happened, time for synchronous readahead. | ||
760 | * 2) @page != NULL && PageReadahead(@page) | ||
761 | * A look-ahead hit occured, time for asynchronous readahead. | ||
762 | */ | ||
763 | unsigned long | ||
764 | page_cache_readahead_ondemand(struct address_space *mapping, | ||
765 | struct file_ra_state *ra, struct file *filp, | ||
766 | struct page *page, pgoff_t offset, | ||
767 | unsigned long req_size) | ||
768 | { | ||
769 | /* no read-ahead */ | ||
770 | if (!ra->ra_pages) | ||
771 | return 0; | ||
772 | |||
773 | if (page) { | ||
774 | ClearPageReadahead(page); | ||
775 | |||
776 | /* | ||
777 | * Defer asynchronous read-ahead on IO congestion. | ||
778 | */ | ||
779 | if (bdi_read_congested(mapping->backing_dev_info)) | ||
780 | return 0; | ||
781 | } | ||
782 | |||
783 | /* do read-ahead */ | ||
784 | return ondemand_readahead(mapping, ra, filp, page, | ||
785 | offset, req_size); | ||
786 | } | ||
787 | EXPORT_SYMBOL_GPL(page_cache_readahead_ondemand); | ||