diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 530 |
1 files changed, 453 insertions, 77 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c4da98a959ae..fffec40d5996 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -42,7 +42,6 @@ | |||
42 | #include "ext4_jbd2.h" | 42 | #include "ext4_jbd2.h" |
43 | #include "xattr.h" | 43 | #include "xattr.h" |
44 | #include "acl.h" | 44 | #include "acl.h" |
45 | #include "ext4_extents.h" | ||
46 | #include "truncate.h" | 45 | #include "truncate.h" |
47 | 46 | ||
48 | #include <trace/events/ext4.h> | 47 | #include <trace/events/ext4.h> |
@@ -121,9 +120,6 @@ void ext4_evict_inode(struct inode *inode) | |||
121 | 120 | ||
122 | trace_ext4_evict_inode(inode); | 121 | trace_ext4_evict_inode(inode); |
123 | 122 | ||
124 | mutex_lock(&inode->i_mutex); | ||
125 | ext4_flush_completed_IO(inode); | ||
126 | mutex_unlock(&inode->i_mutex); | ||
127 | ext4_ioend_wait(inode); | 123 | ext4_ioend_wait(inode); |
128 | 124 | ||
129 | if (inode->i_nlink) { | 125 | if (inode->i_nlink) { |
@@ -271,7 +267,7 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
271 | struct ext4_inode_info *ei = EXT4_I(inode); | 267 | struct ext4_inode_info *ei = EXT4_I(inode); |
272 | 268 | ||
273 | spin_lock(&ei->i_block_reservation_lock); | 269 | spin_lock(&ei->i_block_reservation_lock); |
274 | trace_ext4_da_update_reserve_space(inode, used); | 270 | trace_ext4_da_update_reserve_space(inode, used, quota_claim); |
275 | if (unlikely(used > ei->i_reserved_data_blocks)) { | 271 | if (unlikely(used > ei->i_reserved_data_blocks)) { |
276 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " | 272 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " |
277 | "with only %d reserved data blocks\n", | 273 | "with only %d reserved data blocks\n", |
@@ -284,7 +280,7 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
284 | /* Update per-inode reservations */ | 280 | /* Update per-inode reservations */ |
285 | ei->i_reserved_data_blocks -= used; | 281 | ei->i_reserved_data_blocks -= used; |
286 | ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; | 282 | ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; |
287 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | 283 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, |
288 | used + ei->i_allocated_meta_blocks); | 284 | used + ei->i_allocated_meta_blocks); |
289 | ei->i_allocated_meta_blocks = 0; | 285 | ei->i_allocated_meta_blocks = 0; |
290 | 286 | ||
@@ -294,7 +290,7 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
294 | * only when we have written all of the delayed | 290 | * only when we have written all of the delayed |
295 | * allocation blocks. | 291 | * allocation blocks. |
296 | */ | 292 | */ |
297 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | 293 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, |
298 | ei->i_reserved_meta_blocks); | 294 | ei->i_reserved_meta_blocks); |
299 | ei->i_reserved_meta_blocks = 0; | 295 | ei->i_reserved_meta_blocks = 0; |
300 | ei->i_da_metadata_calc_len = 0; | 296 | ei->i_da_metadata_calc_len = 0; |
@@ -303,14 +299,14 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
303 | 299 | ||
304 | /* Update quota subsystem for data blocks */ | 300 | /* Update quota subsystem for data blocks */ |
305 | if (quota_claim) | 301 | if (quota_claim) |
306 | dquot_claim_block(inode, used); | 302 | dquot_claim_block(inode, EXT4_C2B(sbi, used)); |
307 | else { | 303 | else { |
308 | /* | 304 | /* |
309 | * We did fallocate with an offset that is already delayed | 305 | * We did fallocate with an offset that is already delayed |
310 | * allocated. So on delayed allocated writeback we should | 306 | * allocated. So on delayed allocated writeback we should |
311 | * not re-claim the quota for fallocated blocks. | 307 | * not re-claim the quota for fallocated blocks. |
312 | */ | 308 | */ |
313 | dquot_release_reservation_block(inode, used); | 309 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, used)); |
314 | } | 310 | } |
315 | 311 | ||
316 | /* | 312 | /* |
@@ -402,6 +398,49 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
402 | } | 398 | } |
403 | 399 | ||
404 | /* | 400 | /* |
401 | * Sets the BH_Da_Mapped bit on the buffer heads corresponding to the given map. | ||
402 | */ | ||
403 | static void set_buffers_da_mapped(struct inode *inode, | ||
404 | struct ext4_map_blocks *map) | ||
405 | { | ||
406 | struct address_space *mapping = inode->i_mapping; | ||
407 | struct pagevec pvec; | ||
408 | int i, nr_pages; | ||
409 | pgoff_t index, end; | ||
410 | |||
411 | index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
412 | end = (map->m_lblk + map->m_len - 1) >> | ||
413 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
414 | |||
415 | pagevec_init(&pvec, 0); | ||
416 | while (index <= end) { | ||
417 | nr_pages = pagevec_lookup(&pvec, mapping, index, | ||
418 | min(end - index + 1, | ||
419 | (pgoff_t)PAGEVEC_SIZE)); | ||
420 | if (nr_pages == 0) | ||
421 | break; | ||
422 | for (i = 0; i < nr_pages; i++) { | ||
423 | struct page *page = pvec.pages[i]; | ||
424 | struct buffer_head *bh, *head; | ||
425 | |||
426 | if (unlikely(page->mapping != mapping) || | ||
427 | !PageDirty(page)) | ||
428 | break; | ||
429 | |||
430 | if (page_has_buffers(page)) { | ||
431 | bh = head = page_buffers(page); | ||
432 | do { | ||
433 | set_buffer_da_mapped(bh); | ||
434 | bh = bh->b_this_page; | ||
435 | } while (bh != head); | ||
436 | } | ||
437 | index++; | ||
438 | } | ||
439 | pagevec_release(&pvec); | ||
440 | } | ||
441 | } | ||
442 | |||
443 | /* | ||
405 | * The ext4_map_blocks() function tries to look up the requested blocks, | 444 | * The ext4_map_blocks() function tries to look up the requested blocks, |
406 | * and returns if the blocks are already mapped. | 445 | * and returns if the blocks are already mapped. |
407 | * | 446 | * |
@@ -419,7 +458,7 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
419 | * the buffer head is mapped. | 458 | * the buffer head is mapped. |
420 | * | 459 | * |
421 | * It returns 0 if plain look up failed (blocks have not been allocated), in | 460 | * It returns 0 if plain look up failed (blocks have not been allocated), in |
422 | * that casem, buffer head is unmapped | 461 | * that case, buffer head is unmapped |
423 | * | 462 | * |
424 | * It returns the error in case of allocation failure. | 463 | * It returns the error in case of allocation failure. |
425 | */ | 464 | */ |
@@ -438,9 +477,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
438 | */ | 477 | */ |
439 | down_read((&EXT4_I(inode)->i_data_sem)); | 478 | down_read((&EXT4_I(inode)->i_data_sem)); |
440 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 479 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
441 | retval = ext4_ext_map_blocks(handle, inode, map, 0); | 480 | retval = ext4_ext_map_blocks(handle, inode, map, flags & |
481 | EXT4_GET_BLOCKS_KEEP_SIZE); | ||
442 | } else { | 482 | } else { |
443 | retval = ext4_ind_map_blocks(handle, inode, map, 0); | 483 | retval = ext4_ind_map_blocks(handle, inode, map, flags & |
484 | EXT4_GET_BLOCKS_KEEP_SIZE); | ||
444 | } | 485 | } |
445 | up_read((&EXT4_I(inode)->i_data_sem)); | 486 | up_read((&EXT4_I(inode)->i_data_sem)); |
446 | 487 | ||
@@ -458,7 +499,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
458 | * Returns if the blocks have already allocated | 499 | * Returns if the blocks have already allocated |
459 | * | 500 | * |
460 | * Note that if blocks have been preallocated | 501 | * Note that if blocks have been preallocated |
461 | * ext4_ext_get_block() returns th create = 0 | 502 | * ext4_ext_get_block() returns the create = 0 |
462 | * with buffer head unmapped. | 503 | * with buffer head unmapped. |
463 | */ | 504 | */ |
464 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) | 505 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) |
@@ -520,9 +561,17 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
520 | (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) | 561 | (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) |
521 | ext4_da_update_reserve_space(inode, retval, 1); | 562 | ext4_da_update_reserve_space(inode, retval, 1); |
522 | } | 563 | } |
523 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | 564 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { |
524 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); | 565 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); |
525 | 566 | ||
567 | /* If we have successfully mapped the delayed allocated blocks, | ||
568 | * set the BH_Da_Mapped bit on them. Its important to do this | ||
569 | * under the protection of i_data_sem. | ||
570 | */ | ||
571 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) | ||
572 | set_buffers_da_mapped(inode, map); | ||
573 | } | ||
574 | |||
526 | up_write((&EXT4_I(inode)->i_data_sem)); | 575 | up_write((&EXT4_I(inode)->i_data_sem)); |
527 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 576 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
528 | int ret = check_block_validity(inode, map); | 577 | int ret = check_block_validity(inode, map); |
@@ -650,7 +699,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | |||
650 | return bh; | 699 | return bh; |
651 | if (buffer_uptodate(bh)) | 700 | if (buffer_uptodate(bh)) |
652 | return bh; | 701 | return bh; |
653 | ll_rw_block(READ_META, 1, &bh); | 702 | ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); |
654 | wait_on_buffer(bh); | 703 | wait_on_buffer(bh); |
655 | if (buffer_uptodate(bh)) | 704 | if (buffer_uptodate(bh)) |
656 | return bh; | 705 | return bh; |
@@ -912,7 +961,11 @@ static int ext4_ordered_write_end(struct file *file, | |||
912 | ext4_orphan_add(handle, inode); | 961 | ext4_orphan_add(handle, inode); |
913 | if (ret2 < 0) | 962 | if (ret2 < 0) |
914 | ret = ret2; | 963 | ret = ret2; |
964 | } else { | ||
965 | unlock_page(page); | ||
966 | page_cache_release(page); | ||
915 | } | 967 | } |
968 | |||
916 | ret2 = ext4_journal_stop(handle); | 969 | ret2 = ext4_journal_stop(handle); |
917 | if (!ret) | 970 | if (!ret) |
918 | ret = ret2; | 971 | ret = ret2; |
@@ -1040,14 +1093,14 @@ static int ext4_journalled_write_end(struct file *file, | |||
1040 | } | 1093 | } |
1041 | 1094 | ||
1042 | /* | 1095 | /* |
1043 | * Reserve a single block located at lblock | 1096 | * Reserve a single cluster located at lblock |
1044 | */ | 1097 | */ |
1045 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | 1098 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) |
1046 | { | 1099 | { |
1047 | int retries = 0; | 1100 | int retries = 0; |
1048 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1101 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1049 | struct ext4_inode_info *ei = EXT4_I(inode); | 1102 | struct ext4_inode_info *ei = EXT4_I(inode); |
1050 | unsigned long md_needed; | 1103 | unsigned int md_needed; |
1051 | int ret; | 1104 | int ret; |
1052 | 1105 | ||
1053 | /* | 1106 | /* |
@@ -1057,7 +1110,8 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | |||
1057 | */ | 1110 | */ |
1058 | repeat: | 1111 | repeat: |
1059 | spin_lock(&ei->i_block_reservation_lock); | 1112 | spin_lock(&ei->i_block_reservation_lock); |
1060 | md_needed = ext4_calc_metadata_amount(inode, lblock); | 1113 | md_needed = EXT4_NUM_B2C(sbi, |
1114 | ext4_calc_metadata_amount(inode, lblock)); | ||
1061 | trace_ext4_da_reserve_space(inode, md_needed); | 1115 | trace_ext4_da_reserve_space(inode, md_needed); |
1062 | spin_unlock(&ei->i_block_reservation_lock); | 1116 | spin_unlock(&ei->i_block_reservation_lock); |
1063 | 1117 | ||
@@ -1066,15 +1120,15 @@ repeat: | |||
1066 | * us from metadata over-estimation, though we may go over by | 1120 | * us from metadata over-estimation, though we may go over by |
1067 | * a small amount in the end. Here we just reserve for data. | 1121 | * a small amount in the end. Here we just reserve for data. |
1068 | */ | 1122 | */ |
1069 | ret = dquot_reserve_block(inode, 1); | 1123 | ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1)); |
1070 | if (ret) | 1124 | if (ret) |
1071 | return ret; | 1125 | return ret; |
1072 | /* | 1126 | /* |
1073 | * We do still charge estimated metadata to the sb though; | 1127 | * We do still charge estimated metadata to the sb though; |
1074 | * we cannot afford to run out of free blocks. | 1128 | * we cannot afford to run out of free blocks. |
1075 | */ | 1129 | */ |
1076 | if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) { | 1130 | if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) { |
1077 | dquot_release_reservation_block(inode, 1); | 1131 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); |
1078 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1132 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
1079 | yield(); | 1133 | yield(); |
1080 | goto repeat; | 1134 | goto repeat; |
@@ -1121,19 +1175,21 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1121 | * We can release all of the reserved metadata blocks | 1175 | * We can release all of the reserved metadata blocks |
1122 | * only when we have written all of the delayed | 1176 | * only when we have written all of the delayed |
1123 | * allocation blocks. | 1177 | * allocation blocks. |
1178 | * Note that in case of bigalloc, i_reserved_meta_blocks, | ||
1179 | * i_reserved_data_blocks, etc. refer to number of clusters. | ||
1124 | */ | 1180 | */ |
1125 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | 1181 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, |
1126 | ei->i_reserved_meta_blocks); | 1182 | ei->i_reserved_meta_blocks); |
1127 | ei->i_reserved_meta_blocks = 0; | 1183 | ei->i_reserved_meta_blocks = 0; |
1128 | ei->i_da_metadata_calc_len = 0; | 1184 | ei->i_da_metadata_calc_len = 0; |
1129 | } | 1185 | } |
1130 | 1186 | ||
1131 | /* update fs dirty data blocks counter */ | 1187 | /* update fs dirty data blocks counter */ |
1132 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); | 1188 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free); |
1133 | 1189 | ||
1134 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1190 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1135 | 1191 | ||
1136 | dquot_release_reservation_block(inode, to_free); | 1192 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free)); |
1137 | } | 1193 | } |
1138 | 1194 | ||
1139 | static void ext4_da_page_release_reservation(struct page *page, | 1195 | static void ext4_da_page_release_reservation(struct page *page, |
@@ -1142,6 +1198,9 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1142 | int to_release = 0; | 1198 | int to_release = 0; |
1143 | struct buffer_head *head, *bh; | 1199 | struct buffer_head *head, *bh; |
1144 | unsigned int curr_off = 0; | 1200 | unsigned int curr_off = 0; |
1201 | struct inode *inode = page->mapping->host; | ||
1202 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
1203 | int num_clusters; | ||
1145 | 1204 | ||
1146 | head = page_buffers(page); | 1205 | head = page_buffers(page); |
1147 | bh = head; | 1206 | bh = head; |
@@ -1151,10 +1210,24 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1151 | if ((offset <= curr_off) && (buffer_delay(bh))) { | 1210 | if ((offset <= curr_off) && (buffer_delay(bh))) { |
1152 | to_release++; | 1211 | to_release++; |
1153 | clear_buffer_delay(bh); | 1212 | clear_buffer_delay(bh); |
1213 | clear_buffer_da_mapped(bh); | ||
1154 | } | 1214 | } |
1155 | curr_off = next_off; | 1215 | curr_off = next_off; |
1156 | } while ((bh = bh->b_this_page) != head); | 1216 | } while ((bh = bh->b_this_page) != head); |
1157 | ext4_da_release_space(page->mapping->host, to_release); | 1217 | |
1218 | /* If we have released all the blocks belonging to a cluster, then we | ||
1219 | * need to release the reserved space for that cluster. */ | ||
1220 | num_clusters = EXT4_NUM_B2C(sbi, to_release); | ||
1221 | while (num_clusters > 0) { | ||
1222 | ext4_fsblk_t lblk; | ||
1223 | lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) + | ||
1224 | ((num_clusters - 1) << sbi->s_cluster_bits); | ||
1225 | if (sbi->s_cluster_ratio == 1 || | ||
1226 | !ext4_find_delalloc_cluster(inode, lblk, 1)) | ||
1227 | ext4_da_release_space(inode, 1); | ||
1228 | |||
1229 | num_clusters--; | ||
1230 | } | ||
1158 | } | 1231 | } |
1159 | 1232 | ||
1160 | /* | 1233 | /* |
@@ -1256,6 +1329,8 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
1256 | clear_buffer_delay(bh); | 1329 | clear_buffer_delay(bh); |
1257 | bh->b_blocknr = pblock; | 1330 | bh->b_blocknr = pblock; |
1258 | } | 1331 | } |
1332 | if (buffer_da_mapped(bh)) | ||
1333 | clear_buffer_da_mapped(bh); | ||
1259 | if (buffer_unwritten(bh) || | 1334 | if (buffer_unwritten(bh) || |
1260 | buffer_mapped(bh)) | 1335 | buffer_mapped(bh)) |
1261 | BUG_ON(bh->b_blocknr != pblock); | 1336 | BUG_ON(bh->b_blocknr != pblock); |
@@ -1349,12 +1424,15 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
1349 | { | 1424 | { |
1350 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1425 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1351 | printk(KERN_CRIT "Total free blocks count %lld\n", | 1426 | printk(KERN_CRIT "Total free blocks count %lld\n", |
1352 | ext4_count_free_blocks(inode->i_sb)); | 1427 | EXT4_C2B(EXT4_SB(inode->i_sb), |
1428 | ext4_count_free_clusters(inode->i_sb))); | ||
1353 | printk(KERN_CRIT "Free/Dirty block details\n"); | 1429 | printk(KERN_CRIT "Free/Dirty block details\n"); |
1354 | printk(KERN_CRIT "free_blocks=%lld\n", | 1430 | printk(KERN_CRIT "free_blocks=%lld\n", |
1355 | (long long) percpu_counter_sum(&sbi->s_freeblocks_counter)); | 1431 | (long long) EXT4_C2B(EXT4_SB(inode->i_sb), |
1432 | percpu_counter_sum(&sbi->s_freeclusters_counter))); | ||
1356 | printk(KERN_CRIT "dirty_blocks=%lld\n", | 1433 | printk(KERN_CRIT "dirty_blocks=%lld\n", |
1357 | (long long) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); | 1434 | (long long) EXT4_C2B(EXT4_SB(inode->i_sb), |
1435 | percpu_counter_sum(&sbi->s_dirtyclusters_counter))); | ||
1358 | printk(KERN_CRIT "Block reservation details\n"); | 1436 | printk(KERN_CRIT "Block reservation details\n"); |
1359 | printk(KERN_CRIT "i_reserved_data_blocks=%u\n", | 1437 | printk(KERN_CRIT "i_reserved_data_blocks=%u\n", |
1360 | EXT4_I(inode)->i_reserved_data_blocks); | 1438 | EXT4_I(inode)->i_reserved_data_blocks); |
@@ -1433,8 +1511,7 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | |||
1433 | if (err == -EAGAIN) | 1511 | if (err == -EAGAIN) |
1434 | goto submit_io; | 1512 | goto submit_io; |
1435 | 1513 | ||
1436 | if (err == -ENOSPC && | 1514 | if (err == -ENOSPC && ext4_count_free_clusters(sb)) { |
1437 | ext4_count_free_blocks(sb)) { | ||
1438 | mpd->retval = err; | 1515 | mpd->retval = err; |
1439 | goto submit_io; | 1516 | goto submit_io; |
1440 | } | 1517 | } |
@@ -1474,13 +1551,15 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | |||
1474 | 1551 | ||
1475 | for (i = 0; i < map.m_len; i++) | 1552 | for (i = 0; i < map.m_len; i++) |
1476 | unmap_underlying_metadata(bdev, map.m_pblk + i); | 1553 | unmap_underlying_metadata(bdev, map.m_pblk + i); |
1477 | } | ||
1478 | 1554 | ||
1479 | if (ext4_should_order_data(mpd->inode)) { | 1555 | if (ext4_should_order_data(mpd->inode)) { |
1480 | err = ext4_jbd2_file_inode(handle, mpd->inode); | 1556 | err = ext4_jbd2_file_inode(handle, mpd->inode); |
1481 | if (err) | 1557 | if (err) { |
1482 | /* This only happens if the journal is aborted */ | 1558 | /* Only if the journal is aborted */ |
1483 | return; | 1559 | mpd->retval = err; |
1560 | goto submit_io; | ||
1561 | } | ||
1562 | } | ||
1484 | } | 1563 | } |
1485 | 1564 | ||
1486 | /* | 1565 | /* |
@@ -1587,6 +1666,66 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | |||
1587 | } | 1666 | } |
1588 | 1667 | ||
1589 | /* | 1668 | /* |
1669 | * This function is grabs code from the very beginning of | ||
1670 | * ext4_map_blocks, but assumes that the caller is from delayed write | ||
1671 | * time. This function looks up the requested blocks and sets the | ||
1672 | * buffer delay bit under the protection of i_data_sem. | ||
1673 | */ | ||
1674 | static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | ||
1675 | struct ext4_map_blocks *map, | ||
1676 | struct buffer_head *bh) | ||
1677 | { | ||
1678 | int retval; | ||
1679 | sector_t invalid_block = ~((sector_t) 0xffff); | ||
1680 | |||
1681 | if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) | ||
1682 | invalid_block = ~0; | ||
1683 | |||
1684 | map->m_flags = 0; | ||
1685 | ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u," | ||
1686 | "logical block %lu\n", inode->i_ino, map->m_len, | ||
1687 | (unsigned long) map->m_lblk); | ||
1688 | /* | ||
1689 | * Try to see if we can get the block without requesting a new | ||
1690 | * file system block. | ||
1691 | */ | ||
1692 | down_read((&EXT4_I(inode)->i_data_sem)); | ||
1693 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | ||
1694 | retval = ext4_ext_map_blocks(NULL, inode, map, 0); | ||
1695 | else | ||
1696 | retval = ext4_ind_map_blocks(NULL, inode, map, 0); | ||
1697 | |||
1698 | if (retval == 0) { | ||
1699 | /* | ||
1700 | * XXX: __block_prepare_write() unmaps passed block, | ||
1701 | * is it OK? | ||
1702 | */ | ||
1703 | /* If the block was allocated from previously allocated cluster, | ||
1704 | * then we dont need to reserve it again. */ | ||
1705 | if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { | ||
1706 | retval = ext4_da_reserve_space(inode, iblock); | ||
1707 | if (retval) | ||
1708 | /* not enough space to reserve */ | ||
1709 | goto out_unlock; | ||
1710 | } | ||
1711 | |||
1712 | /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served | ||
1713 | * and it should not appear on the bh->b_state. | ||
1714 | */ | ||
1715 | map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; | ||
1716 | |||
1717 | map_bh(bh, inode->i_sb, invalid_block); | ||
1718 | set_buffer_new(bh); | ||
1719 | set_buffer_delay(bh); | ||
1720 | } | ||
1721 | |||
1722 | out_unlock: | ||
1723 | up_read((&EXT4_I(inode)->i_data_sem)); | ||
1724 | |||
1725 | return retval; | ||
1726 | } | ||
1727 | |||
1728 | /* | ||
1590 | * This is a special get_blocks_t callback which is used by | 1729 | * This is a special get_blocks_t callback which is used by |
1591 | * ext4_da_write_begin(). It will either return mapped block or | 1730 | * ext4_da_write_begin(). It will either return mapped block or |
1592 | * reserve space for a single block. | 1731 | * reserve space for a single block. |
@@ -1603,10 +1742,6 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
1603 | { | 1742 | { |
1604 | struct ext4_map_blocks map; | 1743 | struct ext4_map_blocks map; |
1605 | int ret = 0; | 1744 | int ret = 0; |
1606 | sector_t invalid_block = ~((sector_t) 0xffff); | ||
1607 | |||
1608 | if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) | ||
1609 | invalid_block = ~0; | ||
1610 | 1745 | ||
1611 | BUG_ON(create == 0); | 1746 | BUG_ON(create == 0); |
1612 | BUG_ON(bh->b_size != inode->i_sb->s_blocksize); | 1747 | BUG_ON(bh->b_size != inode->i_sb->s_blocksize); |
@@ -1619,25 +1754,9 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
1619 | * preallocated blocks are unmapped but should treated | 1754 | * preallocated blocks are unmapped but should treated |
1620 | * the same as allocated blocks. | 1755 | * the same as allocated blocks. |
1621 | */ | 1756 | */ |
1622 | ret = ext4_map_blocks(NULL, inode, &map, 0); | 1757 | ret = ext4_da_map_blocks(inode, iblock, &map, bh); |
1623 | if (ret < 0) | 1758 | if (ret <= 0) |
1624 | return ret; | 1759 | return ret; |
1625 | if (ret == 0) { | ||
1626 | if (buffer_delay(bh)) | ||
1627 | return 0; /* Not sure this could or should happen */ | ||
1628 | /* | ||
1629 | * XXX: __block_write_begin() unmaps passed block, is it OK? | ||
1630 | */ | ||
1631 | ret = ext4_da_reserve_space(inode, iblock); | ||
1632 | if (ret) | ||
1633 | /* not enough space to reserve */ | ||
1634 | return ret; | ||
1635 | |||
1636 | map_bh(bh, inode->i_sb, invalid_block); | ||
1637 | set_buffer_new(bh); | ||
1638 | set_buffer_delay(bh); | ||
1639 | return 0; | ||
1640 | } | ||
1641 | 1760 | ||
1642 | map_bh(bh, inode->i_sb, map.m_pblk); | 1761 | map_bh(bh, inode->i_sb, map.m_pblk); |
1643 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | 1762 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; |
@@ -1814,8 +1933,12 @@ static int ext4_writepage(struct page *page, | |||
1814 | * We don't want to do block allocation, so redirty | 1933 | * We don't want to do block allocation, so redirty |
1815 | * the page and return. We may reach here when we do | 1934 | * the page and return. We may reach here when we do |
1816 | * a journal commit via journal_submit_inode_data_buffers. | 1935 | * a journal commit via journal_submit_inode_data_buffers. |
1817 | * We can also reach here via shrink_page_list | 1936 | * We can also reach here via shrink_page_list but it |
1937 | * should never be for direct reclaim so warn if that | ||
1938 | * happens | ||
1818 | */ | 1939 | */ |
1940 | WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == | ||
1941 | PF_MEMALLOC); | ||
1819 | goto redirty_page; | 1942 | goto redirty_page; |
1820 | } | 1943 | } |
1821 | if (commit_write) | 1944 | if (commit_write) |
@@ -2049,6 +2172,7 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2049 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2172 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2050 | pgoff_t done_index = 0; | 2173 | pgoff_t done_index = 0; |
2051 | pgoff_t end; | 2174 | pgoff_t end; |
2175 | struct blk_plug plug; | ||
2052 | 2176 | ||
2053 | trace_ext4_da_writepages(inode, wbc); | 2177 | trace_ext4_da_writepages(inode, wbc); |
2054 | 2178 | ||
@@ -2127,6 +2251,7 @@ retry: | |||
2127 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) | 2251 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) |
2128 | tag_pages_for_writeback(mapping, index, end); | 2252 | tag_pages_for_writeback(mapping, index, end); |
2129 | 2253 | ||
2254 | blk_start_plug(&plug); | ||
2130 | while (!ret && wbc->nr_to_write > 0) { | 2255 | while (!ret && wbc->nr_to_write > 0) { |
2131 | 2256 | ||
2132 | /* | 2257 | /* |
@@ -2145,6 +2270,7 @@ retry: | |||
2145 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " | 2270 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " |
2146 | "%ld pages, ino %lu; err %d", __func__, | 2271 | "%ld pages, ino %lu; err %d", __func__, |
2147 | wbc->nr_to_write, inode->i_ino, ret); | 2272 | wbc->nr_to_write, inode->i_ino, ret); |
2273 | blk_finish_plug(&plug); | ||
2148 | goto out_writepages; | 2274 | goto out_writepages; |
2149 | } | 2275 | } |
2150 | 2276 | ||
@@ -2177,11 +2303,12 @@ retry: | |||
2177 | ret = 0; | 2303 | ret = 0; |
2178 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { | 2304 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { |
2179 | /* | 2305 | /* |
2180 | * got one extent now try with | 2306 | * Got one extent now try with rest of the pages. |
2181 | * rest of the pages | 2307 | * If mpd.retval is set -EIO, journal is aborted. |
2308 | * So we don't need to write any more. | ||
2182 | */ | 2309 | */ |
2183 | pages_written += mpd.pages_written; | 2310 | pages_written += mpd.pages_written; |
2184 | ret = 0; | 2311 | ret = mpd.retval; |
2185 | io_done = 1; | 2312 | io_done = 1; |
2186 | } else if (wbc->nr_to_write) | 2313 | } else if (wbc->nr_to_write) |
2187 | /* | 2314 | /* |
@@ -2191,6 +2318,7 @@ retry: | |||
2191 | */ | 2318 | */ |
2192 | break; | 2319 | break; |
2193 | } | 2320 | } |
2321 | blk_finish_plug(&plug); | ||
2194 | if (!io_done && !cycled) { | 2322 | if (!io_done && !cycled) { |
2195 | cycled = 1; | 2323 | cycled = 1; |
2196 | index = 0; | 2324 | index = 0; |
@@ -2229,10 +2357,11 @@ static int ext4_nonda_switch(struct super_block *sb) | |||
2229 | * Delalloc need an accurate free block accounting. So switch | 2357 | * Delalloc need an accurate free block accounting. So switch |
2230 | * to non delalloc when we are near to error range. | 2358 | * to non delalloc when we are near to error range. |
2231 | */ | 2359 | */ |
2232 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | 2360 | free_blocks = EXT4_C2B(sbi, |
2233 | dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter); | 2361 | percpu_counter_read_positive(&sbi->s_freeclusters_counter)); |
2362 | dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); | ||
2234 | if (2 * free_blocks < 3 * dirty_blocks || | 2363 | if (2 * free_blocks < 3 * dirty_blocks || |
2235 | free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { | 2364 | free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { |
2236 | /* | 2365 | /* |
2237 | * free block count is less than 150% of dirty blocks | 2366 | * free block count is less than 150% of dirty blocks |
2238 | * or free blocks is less than watermark | 2367 | * or free blocks is less than watermark |
@@ -2244,7 +2373,7 @@ static int ext4_nonda_switch(struct super_block *sb) | |||
2244 | * start pushing delalloc when 1/2 of free blocks are dirty. | 2373 | * start pushing delalloc when 1/2 of free blocks are dirty. |
2245 | */ | 2374 | */ |
2246 | if (free_blocks < 2 * dirty_blocks) | 2375 | if (free_blocks < 2 * dirty_blocks) |
2247 | writeback_inodes_sb_if_idle(sb); | 2376 | writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE); |
2248 | 2377 | ||
2249 | return 0; | 2378 | return 0; |
2250 | } | 2379 | } |
@@ -2258,6 +2387,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
2258 | pgoff_t index; | 2387 | pgoff_t index; |
2259 | struct inode *inode = mapping->host; | 2388 | struct inode *inode = mapping->host; |
2260 | handle_t *handle; | 2389 | handle_t *handle; |
2390 | loff_t page_len; | ||
2261 | 2391 | ||
2262 | index = pos >> PAGE_CACHE_SHIFT; | 2392 | index = pos >> PAGE_CACHE_SHIFT; |
2263 | 2393 | ||
@@ -2304,6 +2434,13 @@ retry: | |||
2304 | */ | 2434 | */ |
2305 | if (pos + len > inode->i_size) | 2435 | if (pos + len > inode->i_size) |
2306 | ext4_truncate_failed_write(inode); | 2436 | ext4_truncate_failed_write(inode); |
2437 | } else { | ||
2438 | page_len = pos & (PAGE_CACHE_SIZE - 1); | ||
2439 | if (page_len > 0) { | ||
2440 | ret = ext4_discard_partial_page_buffers_no_lock(handle, | ||
2441 | inode, page, pos - page_len, page_len, | ||
2442 | EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED); | ||
2443 | } | ||
2307 | } | 2444 | } |
2308 | 2445 | ||
2309 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 2446 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -2346,6 +2483,7 @@ static int ext4_da_write_end(struct file *file, | |||
2346 | loff_t new_i_size; | 2483 | loff_t new_i_size; |
2347 | unsigned long start, end; | 2484 | unsigned long start, end; |
2348 | int write_mode = (int)(unsigned long)fsdata; | 2485 | int write_mode = (int)(unsigned long)fsdata; |
2486 | loff_t page_len; | ||
2349 | 2487 | ||
2350 | if (write_mode == FALL_BACK_TO_NONDELALLOC) { | 2488 | if (write_mode == FALL_BACK_TO_NONDELALLOC) { |
2351 | if (ext4_should_order_data(inode)) { | 2489 | if (ext4_should_order_data(inode)) { |
@@ -2394,6 +2532,16 @@ static int ext4_da_write_end(struct file *file, | |||
2394 | } | 2532 | } |
2395 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 2533 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
2396 | page, fsdata); | 2534 | page, fsdata); |
2535 | |||
2536 | page_len = PAGE_CACHE_SIZE - | ||
2537 | ((pos + copied - 1) & (PAGE_CACHE_SIZE - 1)); | ||
2538 | |||
2539 | if (page_len > 0) { | ||
2540 | ret = ext4_discard_partial_page_buffers_no_lock(handle, | ||
2541 | inode, page, pos + copied - 1, page_len, | ||
2542 | EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED); | ||
2543 | } | ||
2544 | |||
2397 | copied = ret2; | 2545 | copied = ret2; |
2398 | if (ret2 < 0) | 2546 | if (ret2 < 0) |
2399 | ret = ret2; | 2547 | ret = ret2; |
@@ -2688,10 +2836,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | |||
2688 | * but being more careful is always safe for the future change. | 2836 | * but being more careful is always safe for the future change. |
2689 | */ | 2837 | */ |
2690 | inode = io_end->inode; | 2838 | inode = io_end->inode; |
2691 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | 2839 | ext4_set_io_unwritten_flag(inode, io_end); |
2692 | io_end->flag |= EXT4_IO_END_UNWRITTEN; | ||
2693 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); | ||
2694 | } | ||
2695 | 2840 | ||
2696 | /* Add the io_end to per-inode completed io list*/ | 2841 | /* Add the io_end to per-inode completed io list*/ |
2697 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | 2842 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); |
@@ -2857,6 +3002,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | |||
2857 | struct inode *inode = file->f_mapping->host; | 3002 | struct inode *inode = file->f_mapping->host; |
2858 | ssize_t ret; | 3003 | ssize_t ret; |
2859 | 3004 | ||
3005 | /* | ||
3006 | * If we are doing data journalling we don't support O_DIRECT | ||
3007 | */ | ||
3008 | if (ext4_should_journal_data(inode)) | ||
3009 | return 0; | ||
3010 | |||
2860 | trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); | 3011 | trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); |
2861 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3012 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
2862 | ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); | 3013 | ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); |
@@ -2926,6 +3077,7 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
2926 | .bmap = ext4_bmap, | 3077 | .bmap = ext4_bmap, |
2927 | .invalidatepage = ext4_invalidatepage, | 3078 | .invalidatepage = ext4_invalidatepage, |
2928 | .releasepage = ext4_releasepage, | 3079 | .releasepage = ext4_releasepage, |
3080 | .direct_IO = ext4_direct_IO, | ||
2929 | .is_partially_uptodate = block_is_partially_uptodate, | 3081 | .is_partially_uptodate = block_is_partially_uptodate, |
2930 | .error_remove_page = generic_error_remove_page, | 3082 | .error_remove_page = generic_error_remove_page, |
2931 | }; | 3083 | }; |
@@ -2962,6 +3114,227 @@ void ext4_set_aops(struct inode *inode) | |||
2962 | inode->i_mapping->a_ops = &ext4_journalled_aops; | 3114 | inode->i_mapping->a_ops = &ext4_journalled_aops; |
2963 | } | 3115 | } |
2964 | 3116 | ||
3117 | |||
3118 | /* | ||
3119 | * ext4_discard_partial_page_buffers() | ||
3120 | * Wrapper function for ext4_discard_partial_page_buffers_no_lock. | ||
3121 | * This function finds and locks the page containing the offset | ||
3122 | * "from" and passes it to ext4_discard_partial_page_buffers_no_lock. | ||
3123 | * Calling functions that already have the page locked should call | ||
3124 | * ext4_discard_partial_page_buffers_no_lock directly. | ||
3125 | */ | ||
3126 | int ext4_discard_partial_page_buffers(handle_t *handle, | ||
3127 | struct address_space *mapping, loff_t from, | ||
3128 | loff_t length, int flags) | ||
3129 | { | ||
3130 | struct inode *inode = mapping->host; | ||
3131 | struct page *page; | ||
3132 | int err = 0; | ||
3133 | |||
3134 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, | ||
3135 | mapping_gfp_mask(mapping) & ~__GFP_FS); | ||
3136 | if (!page) | ||
3137 | return -ENOMEM; | ||
3138 | |||
3139 | err = ext4_discard_partial_page_buffers_no_lock(handle, inode, page, | ||
3140 | from, length, flags); | ||
3141 | |||
3142 | unlock_page(page); | ||
3143 | page_cache_release(page); | ||
3144 | return err; | ||
3145 | } | ||
3146 | |||
3147 | /* | ||
3148 | * ext4_discard_partial_page_buffers_no_lock() | ||
3149 | * Zeros a page range of length 'length' starting from offset 'from'. | ||
3150 | * Buffer heads that correspond to the block aligned regions of the | ||
3151 | * zeroed range will be unmapped. Unblock aligned regions | ||
3152 | * will have the corresponding buffer head mapped if needed so that | ||
3153 | * that region of the page can be updated with the partial zero out. | ||
3154 | * | ||
3155 | * This function assumes that the page has already been locked. The | ||
3156 | * The range to be discarded must be contained with in the given page. | ||
3157 | * If the specified range exceeds the end of the page it will be shortened | ||
3158 | * to the end of the page that corresponds to 'from'. This function is | ||
3159 | * appropriate for updating a page and it buffer heads to be unmapped and | ||
3160 | * zeroed for blocks that have been either released, or are going to be | ||
3161 | * released. | ||
3162 | * | ||
3163 | * handle: The journal handle | ||
3164 | * inode: The files inode | ||
3165 | * page: A locked page that contains the offset "from" | ||
3166 | * from: The starting byte offset (from the begining of the file) | ||
3167 | * to begin discarding | ||
3168 | * len: The length of bytes to discard | ||
3169 | * flags: Optional flags that may be used: | ||
3170 | * | ||
3171 | * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED | ||
3172 | * Only zero the regions of the page whose buffer heads | ||
3173 | * have already been unmapped. This flag is appropriate | ||
3174 | * for updateing the contents of a page whose blocks may | ||
3175 | * have already been released, and we only want to zero | ||
3176 | * out the regions that correspond to those released blocks. | ||
3177 | * | ||
3178 | * Returns zero on sucess or negative on failure. | ||
3179 | */ | ||
3180 | int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | ||
3181 | struct inode *inode, struct page *page, loff_t from, | ||
3182 | loff_t length, int flags) | ||
3183 | { | ||
3184 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | ||
3185 | unsigned int offset = from & (PAGE_CACHE_SIZE-1); | ||
3186 | unsigned int blocksize, max, pos; | ||
3187 | ext4_lblk_t iblock; | ||
3188 | struct buffer_head *bh; | ||
3189 | int err = 0; | ||
3190 | |||
3191 | blocksize = inode->i_sb->s_blocksize; | ||
3192 | max = PAGE_CACHE_SIZE - offset; | ||
3193 | |||
3194 | if (index != page->index) | ||
3195 | return -EINVAL; | ||
3196 | |||
3197 | /* | ||
3198 | * correct length if it does not fall between | ||
3199 | * 'from' and the end of the page | ||
3200 | */ | ||
3201 | if (length > max || length < 0) | ||
3202 | length = max; | ||
3203 | |||
3204 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | ||
3205 | |||
3206 | if (!page_has_buffers(page)) { | ||
3207 | /* | ||
3208 | * If the range to be discarded covers a partial block | ||
3209 | * we need to get the page buffers. This is because | ||
3210 | * partial blocks cannot be released and the page needs | ||
3211 | * to be updated with the contents of the block before | ||
3212 | * we write the zeros on top of it. | ||
3213 | */ | ||
3214 | if ((from & (blocksize - 1)) || | ||
3215 | ((from + length) & (blocksize - 1))) { | ||
3216 | create_empty_buffers(page, blocksize, 0); | ||
3217 | } else { | ||
3218 | /* | ||
3219 | * If there are no partial blocks, | ||
3220 | * there is nothing to update, | ||
3221 | * so we can return now | ||
3222 | */ | ||
3223 | return 0; | ||
3224 | } | ||
3225 | } | ||
3226 | |||
3227 | /* Find the buffer that contains "offset" */ | ||
3228 | bh = page_buffers(page); | ||
3229 | pos = blocksize; | ||
3230 | while (offset >= pos) { | ||
3231 | bh = bh->b_this_page; | ||
3232 | iblock++; | ||
3233 | pos += blocksize; | ||
3234 | } | ||
3235 | |||
3236 | pos = offset; | ||
3237 | while (pos < offset + length) { | ||
3238 | unsigned int end_of_block, range_to_discard; | ||
3239 | |||
3240 | err = 0; | ||
3241 | |||
3242 | /* The length of space left to zero and unmap */ | ||
3243 | range_to_discard = offset + length - pos; | ||
3244 | |||
3245 | /* The length of space until the end of the block */ | ||
3246 | end_of_block = blocksize - (pos & (blocksize-1)); | ||
3247 | |||
3248 | /* | ||
3249 | * Do not unmap or zero past end of block | ||
3250 | * for this buffer head | ||
3251 | */ | ||
3252 | if (range_to_discard > end_of_block) | ||
3253 | range_to_discard = end_of_block; | ||
3254 | |||
3255 | |||
3256 | /* | ||
3257 | * Skip this buffer head if we are only zeroing unampped | ||
3258 | * regions of the page | ||
3259 | */ | ||
3260 | if (flags & EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED && | ||
3261 | buffer_mapped(bh)) | ||
3262 | goto next; | ||
3263 | |||
3264 | /* If the range is block aligned, unmap */ | ||
3265 | if (range_to_discard == blocksize) { | ||
3266 | clear_buffer_dirty(bh); | ||
3267 | bh->b_bdev = NULL; | ||
3268 | clear_buffer_mapped(bh); | ||
3269 | clear_buffer_req(bh); | ||
3270 | clear_buffer_new(bh); | ||
3271 | clear_buffer_delay(bh); | ||
3272 | clear_buffer_unwritten(bh); | ||
3273 | clear_buffer_uptodate(bh); | ||
3274 | zero_user(page, pos, range_to_discard); | ||
3275 | BUFFER_TRACE(bh, "Buffer discarded"); | ||
3276 | goto next; | ||
3277 | } | ||
3278 | |||
3279 | /* | ||
3280 | * If this block is not completely contained in the range | ||
3281 | * to be discarded, then it is not going to be released. Because | ||
3282 | * we need to keep this block, we need to make sure this part | ||
3283 | * of the page is uptodate before we modify it by writeing | ||
3284 | * partial zeros on it. | ||
3285 | */ | ||
3286 | if (!buffer_mapped(bh)) { | ||
3287 | /* | ||
3288 | * Buffer head must be mapped before we can read | ||
3289 | * from the block | ||
3290 | */ | ||
3291 | BUFFER_TRACE(bh, "unmapped"); | ||
3292 | ext4_get_block(inode, iblock, bh, 0); | ||
3293 | /* unmapped? It's a hole - nothing to do */ | ||
3294 | if (!buffer_mapped(bh)) { | ||
3295 | BUFFER_TRACE(bh, "still unmapped"); | ||
3296 | goto next; | ||
3297 | } | ||
3298 | } | ||
3299 | |||
3300 | /* Ok, it's mapped. Make sure it's up-to-date */ | ||
3301 | if (PageUptodate(page)) | ||
3302 | set_buffer_uptodate(bh); | ||
3303 | |||
3304 | if (!buffer_uptodate(bh)) { | ||
3305 | err = -EIO; | ||
3306 | ll_rw_block(READ, 1, &bh); | ||
3307 | wait_on_buffer(bh); | ||
3308 | /* Uhhuh. Read error. Complain and punt.*/ | ||
3309 | if (!buffer_uptodate(bh)) | ||
3310 | goto next; | ||
3311 | } | ||
3312 | |||
3313 | if (ext4_should_journal_data(inode)) { | ||
3314 | BUFFER_TRACE(bh, "get write access"); | ||
3315 | err = ext4_journal_get_write_access(handle, bh); | ||
3316 | if (err) | ||
3317 | goto next; | ||
3318 | } | ||
3319 | |||
3320 | zero_user(page, pos, range_to_discard); | ||
3321 | |||
3322 | err = 0; | ||
3323 | if (ext4_should_journal_data(inode)) { | ||
3324 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
3325 | } else | ||
3326 | mark_buffer_dirty(bh); | ||
3327 | |||
3328 | BUFFER_TRACE(bh, "Partial buffer zeroed"); | ||
3329 | next: | ||
3330 | bh = bh->b_this_page; | ||
3331 | iblock++; | ||
3332 | pos += range_to_discard; | ||
3333 | } | ||
3334 | |||
3335 | return err; | ||
3336 | } | ||
3337 | |||
2965 | /* | 3338 | /* |
2966 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' | 3339 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' |
2967 | * up to the end of the block which corresponds to `from'. | 3340 | * up to the end of the block which corresponds to `from'. |
@@ -3004,7 +3377,7 @@ int ext4_block_zero_page_range(handle_t *handle, | |||
3004 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, | 3377 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, |
3005 | mapping_gfp_mask(mapping) & ~__GFP_FS); | 3378 | mapping_gfp_mask(mapping) & ~__GFP_FS); |
3006 | if (!page) | 3379 | if (!page) |
3007 | return -EINVAL; | 3380 | return -ENOMEM; |
3008 | 3381 | ||
3009 | blocksize = inode->i_sb->s_blocksize; | 3382 | blocksize = inode->i_sb->s_blocksize; |
3010 | max = blocksize - (offset & (blocksize - 1)); | 3383 | max = blocksize - (offset & (blocksize - 1)); |
@@ -3073,11 +3446,8 @@ int ext4_block_zero_page_range(handle_t *handle, | |||
3073 | err = 0; | 3446 | err = 0; |
3074 | if (ext4_should_journal_data(inode)) { | 3447 | if (ext4_should_journal_data(inode)) { |
3075 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 3448 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
3076 | } else { | 3449 | } else |
3077 | if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode) | ||
3078 | err = ext4_jbd2_file_inode(handle, inode); | ||
3079 | mark_buffer_dirty(bh); | 3450 | mark_buffer_dirty(bh); |
3080 | } | ||
3081 | 3451 | ||
3082 | unlock: | 3452 | unlock: |
3083 | unlock_page(page); | 3453 | unlock_page(page); |
@@ -3118,6 +3488,11 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
3118 | return -ENOTSUPP; | 3488 | return -ENOTSUPP; |
3119 | } | 3489 | } |
3120 | 3490 | ||
3491 | if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) { | ||
3492 | /* TODO: Add support for bigalloc file systems */ | ||
3493 | return -ENOTSUPP; | ||
3494 | } | ||
3495 | |||
3121 | return ext4_ext_punch_hole(file, offset, length); | 3496 | return ext4_ext_punch_hole(file, offset, length); |
3122 | } | 3497 | } |
3123 | 3498 | ||
@@ -3301,7 +3676,7 @@ make_io: | |||
3301 | trace_ext4_load_inode(inode); | 3676 | trace_ext4_load_inode(inode); |
3302 | get_bh(bh); | 3677 | get_bh(bh); |
3303 | bh->b_end_io = end_buffer_read_sync; | 3678 | bh->b_end_io = end_buffer_read_sync; |
3304 | submit_bh(READ_META, bh); | 3679 | submit_bh(READ | REQ_META | REQ_PRIO, bh); |
3305 | wait_on_buffer(bh); | 3680 | wait_on_buffer(bh); |
3306 | if (!buffer_uptodate(bh)) { | 3681 | if (!buffer_uptodate(bh)) { |
3307 | EXT4_ERROR_INODE_BLOCK(inode, block, | 3682 | EXT4_ERROR_INODE_BLOCK(inode, block, |
@@ -3417,7 +3792,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3417 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; | 3792 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; |
3418 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; | 3793 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; |
3419 | } | 3794 | } |
3420 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); | 3795 | set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); |
3421 | 3796 | ||
3422 | ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ | 3797 | ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ |
3423 | ei->i_dir_start_lookup = 0; | 3798 | ei->i_dir_start_lookup = 0; |
@@ -4419,6 +4794,7 @@ retry_alloc: | |||
4419 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { | 4794 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { |
4420 | unlock_page(page); | 4795 | unlock_page(page); |
4421 | ret = VM_FAULT_SIGBUS; | 4796 | ret = VM_FAULT_SIGBUS; |
4797 | ext4_journal_stop(handle); | ||
4422 | goto out; | 4798 | goto out; |
4423 | } | 4799 | } |
4424 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); | 4800 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); |