diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/ext4/ext4.h | 1 | ||||
| -rw-r--r-- | fs/ext4/inode.c | 105 | ||||
| -rw-r--r-- | fs/ext4/super.c | 3 |
3 files changed, 97 insertions, 12 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index e227eea23f05..a58438e18d0b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
| @@ -942,6 +942,7 @@ struct ext4_sb_info { | |||
| 942 | unsigned int s_mb_stats; | 942 | unsigned int s_mb_stats; |
| 943 | unsigned int s_mb_order2_reqs; | 943 | unsigned int s_mb_order2_reqs; |
| 944 | unsigned int s_mb_group_prealloc; | 944 | unsigned int s_mb_group_prealloc; |
| 945 | unsigned int s_max_writeback_mb_bump; | ||
| 945 | /* where last allocation was done - for stream allocation */ | 946 | /* where last allocation was done - for stream allocation */ |
| 946 | unsigned long s_mb_last_group; | 947 | unsigned long s_mb_last_group; |
| 947 | unsigned long s_mb_last_start; | 948 | unsigned long s_mb_last_start; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5fb72a98ccbe..20e2d704dc2e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -1145,6 +1145,64 @@ static int check_block_validity(struct inode *inode, const char *msg, | |||
| 1145 | } | 1145 | } |
| 1146 | 1146 | ||
| 1147 | /* | 1147 | /* |
| 1148 | * Return the number of dirty pages in the given inode starting at | ||
| 1149 | * page frame idx. | ||
| 1150 | */ | ||
| 1151 | static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | ||
| 1152 | unsigned int max_pages) | ||
| 1153 | { | ||
| 1154 | struct address_space *mapping = inode->i_mapping; | ||
| 1155 | pgoff_t index; | ||
| 1156 | struct pagevec pvec; | ||
| 1157 | pgoff_t num = 0; | ||
| 1158 | int i, nr_pages, done = 0; | ||
| 1159 | |||
| 1160 | if (max_pages == 0) | ||
| 1161 | return 0; | ||
| 1162 | pagevec_init(&pvec, 0); | ||
| 1163 | while (!done) { | ||
| 1164 | index = idx; | ||
| 1165 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
| 1166 | PAGECACHE_TAG_DIRTY, | ||
| 1167 | (pgoff_t)PAGEVEC_SIZE); | ||
| 1168 | if (nr_pages == 0) | ||
| 1169 | break; | ||
| 1170 | for (i = 0; i < nr_pages; i++) { | ||
| 1171 | struct page *page = pvec.pages[i]; | ||
| 1172 | struct buffer_head *bh, *head; | ||
| 1173 | |||
| 1174 | lock_page(page); | ||
| 1175 | if (unlikely(page->mapping != mapping) || | ||
| 1176 | !PageDirty(page) || | ||
| 1177 | PageWriteback(page) || | ||
| 1178 | page->index != idx) { | ||
| 1179 | done = 1; | ||
| 1180 | unlock_page(page); | ||
| 1181 | break; | ||
| 1182 | } | ||
| 1183 | head = page_buffers(page); | ||
| 1184 | bh = head; | ||
| 1185 | do { | ||
| 1186 | if (!buffer_delay(bh) && | ||
| 1187 | !buffer_unwritten(bh)) { | ||
| 1188 | done = 1; | ||
| 1189 | break; | ||
| 1190 | } | ||
| 1191 | } while ((bh = bh->b_this_page) != head); | ||
| 1192 | unlock_page(page); | ||
| 1193 | if (done) | ||
| 1194 | break; | ||
| 1195 | idx++; | ||
| 1196 | num++; | ||
| 1197 | if (num >= max_pages) | ||
| 1198 | break; | ||
| 1199 | } | ||
| 1200 | pagevec_release(&pvec); | ||
| 1201 | } | ||
| 1202 | return num; | ||
| 1203 | } | ||
| 1204 | |||
| 1205 | /* | ||
| 1148 | * The ext4_get_blocks() function tries to look up the requested blocks, | 1206 | * The ext4_get_blocks() function tries to look up the requested blocks, |
| 1149 | * and returns if the blocks are already mapped. | 1207 | * and returns if the blocks are already mapped. |
| 1150 | * | 1208 | * |
| @@ -2743,8 +2801,10 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
| 2743 | int no_nrwrite_index_update; | 2801 | int no_nrwrite_index_update; |
| 2744 | int pages_written = 0; | 2802 | int pages_written = 0; |
| 2745 | long pages_skipped; | 2803 | long pages_skipped; |
| 2804 | unsigned int max_pages; | ||
| 2746 | int range_cyclic, cycled = 1, io_done = 0; | 2805 | int range_cyclic, cycled = 1, io_done = 0; |
| 2747 | int needed_blocks, ret = 0, nr_to_writebump = 0; | 2806 | int needed_blocks, ret = 0; |
| 2807 | long desired_nr_to_write, nr_to_writebump = 0; | ||
| 2748 | loff_t range_start = wbc->range_start; | 2808 | loff_t range_start = wbc->range_start; |
| 2749 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2809 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
| 2750 | 2810 | ||
| @@ -2771,16 +2831,6 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
| 2771 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) | 2831 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) |
| 2772 | return -EROFS; | 2832 | return -EROFS; |
| 2773 | 2833 | ||
| 2774 | /* | ||
| 2775 | * Make sure nr_to_write is >= sbi->s_mb_stream_request | ||
| 2776 | * This make sure small files blocks are allocated in | ||
| 2777 | * single attempt. This ensure that small files | ||
| 2778 | * get less fragmented. | ||
| 2779 | */ | ||
| 2780 | if (wbc->nr_to_write < sbi->s_mb_stream_request) { | ||
| 2781 | nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; | ||
| 2782 | wbc->nr_to_write = sbi->s_mb_stream_request; | ||
| 2783 | } | ||
| 2784 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | 2834 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) |
| 2785 | range_whole = 1; | 2835 | range_whole = 1; |
| 2786 | 2836 | ||
| @@ -2795,6 +2845,36 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
| 2795 | } else | 2845 | } else |
| 2796 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2846 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
| 2797 | 2847 | ||
| 2848 | /* | ||
| 2849 | * This works around two forms of stupidity. The first is in | ||
| 2850 | * the writeback code, which caps the maximum number of pages | ||
| 2851 | * written to be 1024 pages. This is wrong on multiple | ||
| 2852 | * levels; different architectues have a different page size, | ||
| 2853 | * which changes the maximum amount of data which gets | ||
| 2854 | * written. Secondly, 4 megabytes is way too small. XFS | ||
| 2855 | * forces this value to be 16 megabytes by multiplying | ||
| 2856 | * nr_to_write parameter by four, and then relies on its | ||
| 2857 | * allocator to allocate larger extents to make them | ||
| 2858 | * contiguous. Unfortunately this brings us to the second | ||
| 2859 | * stupidity, which is that ext4's mballoc code only allocates | ||
| 2860 | * at most 2048 blocks. So we force contiguous writes up to | ||
| 2861 | * the number of dirty blocks in the inode, or | ||
| 2862 | * sbi->max_writeback_mb_bump whichever is smaller. | ||
| 2863 | */ | ||
| 2864 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); | ||
| 2865 | if (!range_cyclic && range_whole) | ||
| 2866 | desired_nr_to_write = wbc->nr_to_write * 8; | ||
| 2867 | else | ||
| 2868 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, | ||
| 2869 | max_pages); | ||
| 2870 | if (desired_nr_to_write > max_pages) | ||
| 2871 | desired_nr_to_write = max_pages; | ||
| 2872 | |||
| 2873 | if (wbc->nr_to_write < desired_nr_to_write) { | ||
| 2874 | nr_to_writebump = desired_nr_to_write - wbc->nr_to_write; | ||
| 2875 | wbc->nr_to_write = desired_nr_to_write; | ||
| 2876 | } | ||
| 2877 | |||
| 2798 | mpd.wbc = wbc; | 2878 | mpd.wbc = wbc; |
| 2799 | mpd.inode = mapping->host; | 2879 | mpd.inode = mapping->host; |
| 2800 | 2880 | ||
| @@ -2914,7 +2994,8 @@ retry: | |||
| 2914 | out_writepages: | 2994 | out_writepages: |
| 2915 | if (!no_nrwrite_index_update) | 2995 | if (!no_nrwrite_index_update) |
| 2916 | wbc->no_nrwrite_index_update = 0; | 2996 | wbc->no_nrwrite_index_update = 0; |
| 2917 | wbc->nr_to_write -= nr_to_writebump; | 2997 | if (wbc->nr_to_write > nr_to_writebump) |
| 2998 | wbc->nr_to_write -= nr_to_writebump; | ||
| 2918 | wbc->range_start = range_start; | 2999 | wbc->range_start = range_start; |
| 2919 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | 3000 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); |
| 2920 | return ret; | 3001 | return ret; |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index df539ba27779..16817737ba52 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
| @@ -2197,6 +2197,7 @@ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); | |||
| 2197 | EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); | 2197 | EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); |
| 2198 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); | 2198 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); |
| 2199 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); | 2199 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); |
| 2200 | EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); | ||
| 2200 | 2201 | ||
| 2201 | static struct attribute *ext4_attrs[] = { | 2202 | static struct attribute *ext4_attrs[] = { |
| 2202 | ATTR_LIST(delayed_allocation_blocks), | 2203 | ATTR_LIST(delayed_allocation_blocks), |
| @@ -2210,6 +2211,7 @@ static struct attribute *ext4_attrs[] = { | |||
| 2210 | ATTR_LIST(mb_order2_req), | 2211 | ATTR_LIST(mb_order2_req), |
| 2211 | ATTR_LIST(mb_stream_req), | 2212 | ATTR_LIST(mb_stream_req), |
| 2212 | ATTR_LIST(mb_group_prealloc), | 2213 | ATTR_LIST(mb_group_prealloc), |
| 2214 | ATTR_LIST(max_writeback_mb_bump), | ||
| 2213 | NULL, | 2215 | NULL, |
| 2214 | }; | 2216 | }; |
| 2215 | 2217 | ||
| @@ -2679,6 +2681,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 2679 | } | 2681 | } |
| 2680 | 2682 | ||
| 2681 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 2683 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
| 2684 | sbi->s_max_writeback_mb_bump = 128; | ||
| 2682 | 2685 | ||
| 2683 | /* | 2686 | /* |
| 2684 | * set up enough so that it can read an inode | 2687 | * set up enough so that it can read an inode |
