diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/ext4.h | 1 | ||||
-rw-r--r-- | fs/ext4/inode.c | 105 | ||||
-rw-r--r-- | fs/ext4/super.c | 3 |
3 files changed, 97 insertions, 12 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index e227eea23f05..a58438e18d0b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -942,6 +942,7 @@ struct ext4_sb_info { | |||
942 | unsigned int s_mb_stats; | 942 | unsigned int s_mb_stats; |
943 | unsigned int s_mb_order2_reqs; | 943 | unsigned int s_mb_order2_reqs; |
944 | unsigned int s_mb_group_prealloc; | 944 | unsigned int s_mb_group_prealloc; |
945 | unsigned int s_max_writeback_mb_bump; | ||
945 | /* where last allocation was done - for stream allocation */ | 946 | /* where last allocation was done - for stream allocation */ |
946 | unsigned long s_mb_last_group; | 947 | unsigned long s_mb_last_group; |
947 | unsigned long s_mb_last_start; | 948 | unsigned long s_mb_last_start; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5fb72a98ccbe..20e2d704dc2e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -1145,6 +1145,64 @@ static int check_block_validity(struct inode *inode, const char *msg, | |||
1145 | } | 1145 | } |
1146 | 1146 | ||
1147 | /* | 1147 | /* |
1148 | * Return the number of dirty pages in the given inode starting at | ||
1149 | * page frame idx. | ||
1150 | */ | ||
1151 | static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | ||
1152 | unsigned int max_pages) | ||
1153 | { | ||
1154 | struct address_space *mapping = inode->i_mapping; | ||
1155 | pgoff_t index; | ||
1156 | struct pagevec pvec; | ||
1157 | pgoff_t num = 0; | ||
1158 | int i, nr_pages, done = 0; | ||
1159 | |||
1160 | if (max_pages == 0) | ||
1161 | return 0; | ||
1162 | pagevec_init(&pvec, 0); | ||
1163 | while (!done) { | ||
1164 | index = idx; | ||
1165 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
1166 | PAGECACHE_TAG_DIRTY, | ||
1167 | (pgoff_t)PAGEVEC_SIZE); | ||
1168 | if (nr_pages == 0) | ||
1169 | break; | ||
1170 | for (i = 0; i < nr_pages; i++) { | ||
1171 | struct page *page = pvec.pages[i]; | ||
1172 | struct buffer_head *bh, *head; | ||
1173 | |||
1174 | lock_page(page); | ||
1175 | if (unlikely(page->mapping != mapping) || | ||
1176 | !PageDirty(page) || | ||
1177 | PageWriteback(page) || | ||
1178 | page->index != idx) { | ||
1179 | done = 1; | ||
1180 | unlock_page(page); | ||
1181 | break; | ||
1182 | } | ||
1183 | head = page_buffers(page); | ||
1184 | bh = head; | ||
1185 | do { | ||
1186 | if (!buffer_delay(bh) && | ||
1187 | !buffer_unwritten(bh)) { | ||
1188 | done = 1; | ||
1189 | break; | ||
1190 | } | ||
1191 | } while ((bh = bh->b_this_page) != head); | ||
1192 | unlock_page(page); | ||
1193 | if (done) | ||
1194 | break; | ||
1195 | idx++; | ||
1196 | num++; | ||
1197 | if (num >= max_pages) | ||
1198 | break; | ||
1199 | } | ||
1200 | pagevec_release(&pvec); | ||
1201 | } | ||
1202 | return num; | ||
1203 | } | ||
1204 | |||
1205 | /* | ||
1148 | * The ext4_get_blocks() function tries to look up the requested blocks, | 1206 | * The ext4_get_blocks() function tries to look up the requested blocks, |
1149 | * and returns if the blocks are already mapped. | 1207 | * and returns if the blocks are already mapped. |
1150 | * | 1208 | * |
@@ -2743,8 +2801,10 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2743 | int no_nrwrite_index_update; | 2801 | int no_nrwrite_index_update; |
2744 | int pages_written = 0; | 2802 | int pages_written = 0; |
2745 | long pages_skipped; | 2803 | long pages_skipped; |
2804 | unsigned int max_pages; | ||
2746 | int range_cyclic, cycled = 1, io_done = 0; | 2805 | int range_cyclic, cycled = 1, io_done = 0; |
2747 | int needed_blocks, ret = 0, nr_to_writebump = 0; | 2806 | int needed_blocks, ret = 0; |
2807 | long desired_nr_to_write, nr_to_writebump = 0; | ||
2748 | loff_t range_start = wbc->range_start; | 2808 | loff_t range_start = wbc->range_start; |
2749 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2809 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2750 | 2810 | ||
@@ -2771,16 +2831,6 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2771 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) | 2831 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) |
2772 | return -EROFS; | 2832 | return -EROFS; |
2773 | 2833 | ||
2774 | /* | ||
2775 | * Make sure nr_to_write is >= sbi->s_mb_stream_request | ||
2776 | * This make sure small files blocks are allocated in | ||
2777 | * single attempt. This ensure that small files | ||
2778 | * get less fragmented. | ||
2779 | */ | ||
2780 | if (wbc->nr_to_write < sbi->s_mb_stream_request) { | ||
2781 | nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; | ||
2782 | wbc->nr_to_write = sbi->s_mb_stream_request; | ||
2783 | } | ||
2784 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | 2834 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) |
2785 | range_whole = 1; | 2835 | range_whole = 1; |
2786 | 2836 | ||
@@ -2795,6 +2845,36 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2795 | } else | 2845 | } else |
2796 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2846 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2797 | 2847 | ||
2848 | /* | ||
2849 | * This works around two forms of stupidity. The first is in | ||
2850 | * the writeback code, which caps the maximum number of pages | ||
2851 | * written to be 1024 pages. This is wrong on multiple | ||
2852 | * levels; different architectues have a different page size, | ||
2853 | * which changes the maximum amount of data which gets | ||
2854 | * written. Secondly, 4 megabytes is way too small. XFS | ||
2855 | * forces this value to be 16 megabytes by multiplying | ||
2856 | * nr_to_write parameter by four, and then relies on its | ||
2857 | * allocator to allocate larger extents to make them | ||
2858 | * contiguous. Unfortunately this brings us to the second | ||
2859 | * stupidity, which is that ext4's mballoc code only allocates | ||
2860 | * at most 2048 blocks. So we force contiguous writes up to | ||
2861 | * the number of dirty blocks in the inode, or | ||
2862 | * sbi->max_writeback_mb_bump whichever is smaller. | ||
2863 | */ | ||
2864 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); | ||
2865 | if (!range_cyclic && range_whole) | ||
2866 | desired_nr_to_write = wbc->nr_to_write * 8; | ||
2867 | else | ||
2868 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, | ||
2869 | max_pages); | ||
2870 | if (desired_nr_to_write > max_pages) | ||
2871 | desired_nr_to_write = max_pages; | ||
2872 | |||
2873 | if (wbc->nr_to_write < desired_nr_to_write) { | ||
2874 | nr_to_writebump = desired_nr_to_write - wbc->nr_to_write; | ||
2875 | wbc->nr_to_write = desired_nr_to_write; | ||
2876 | } | ||
2877 | |||
2798 | mpd.wbc = wbc; | 2878 | mpd.wbc = wbc; |
2799 | mpd.inode = mapping->host; | 2879 | mpd.inode = mapping->host; |
2800 | 2880 | ||
@@ -2914,7 +2994,8 @@ retry: | |||
2914 | out_writepages: | 2994 | out_writepages: |
2915 | if (!no_nrwrite_index_update) | 2995 | if (!no_nrwrite_index_update) |
2916 | wbc->no_nrwrite_index_update = 0; | 2996 | wbc->no_nrwrite_index_update = 0; |
2917 | wbc->nr_to_write -= nr_to_writebump; | 2997 | if (wbc->nr_to_write > nr_to_writebump) |
2998 | wbc->nr_to_write -= nr_to_writebump; | ||
2918 | wbc->range_start = range_start; | 2999 | wbc->range_start = range_start; |
2919 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | 3000 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); |
2920 | return ret; | 3001 | return ret; |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index df539ba27779..16817737ba52 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -2197,6 +2197,7 @@ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); | |||
2197 | EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); | 2197 | EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); |
2198 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); | 2198 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); |
2199 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); | 2199 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); |
2200 | EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); | ||
2200 | 2201 | ||
2201 | static struct attribute *ext4_attrs[] = { | 2202 | static struct attribute *ext4_attrs[] = { |
2202 | ATTR_LIST(delayed_allocation_blocks), | 2203 | ATTR_LIST(delayed_allocation_blocks), |
@@ -2210,6 +2211,7 @@ static struct attribute *ext4_attrs[] = { | |||
2210 | ATTR_LIST(mb_order2_req), | 2211 | ATTR_LIST(mb_order2_req), |
2211 | ATTR_LIST(mb_stream_req), | 2212 | ATTR_LIST(mb_stream_req), |
2212 | ATTR_LIST(mb_group_prealloc), | 2213 | ATTR_LIST(mb_group_prealloc), |
2214 | ATTR_LIST(max_writeback_mb_bump), | ||
2213 | NULL, | 2215 | NULL, |
2214 | }; | 2216 | }; |
2215 | 2217 | ||
@@ -2679,6 +2681,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2679 | } | 2681 | } |
2680 | 2682 | ||
2681 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 2683 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
2684 | sbi->s_max_writeback_mb_bump = 128; | ||
2682 | 2685 | ||
2683 | /* | 2686 | /* |
2684 | * set up enough so that it can read an inode | 2687 | * set up enough so that it can read an inode |