diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 520 |
1 files changed, 449 insertions, 71 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7fa73a3b212..240f6e2dc7e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -42,7 +42,6 @@ | |||
42 | #include "ext4_jbd2.h" | 42 | #include "ext4_jbd2.h" |
43 | #include "xattr.h" | 43 | #include "xattr.h" |
44 | #include "acl.h" | 44 | #include "acl.h" |
45 | #include "ext4_extents.h" | ||
46 | #include "truncate.h" | 45 | #include "truncate.h" |
47 | 46 | ||
48 | #include <trace/events/ext4.h> | 47 | #include <trace/events/ext4.h> |
@@ -268,7 +267,7 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
268 | struct ext4_inode_info *ei = EXT4_I(inode); | 267 | struct ext4_inode_info *ei = EXT4_I(inode); |
269 | 268 | ||
270 | spin_lock(&ei->i_block_reservation_lock); | 269 | spin_lock(&ei->i_block_reservation_lock); |
271 | trace_ext4_da_update_reserve_space(inode, used); | 270 | trace_ext4_da_update_reserve_space(inode, used, quota_claim); |
272 | if (unlikely(used > ei->i_reserved_data_blocks)) { | 271 | if (unlikely(used > ei->i_reserved_data_blocks)) { |
273 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " | 272 | ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " |
274 | "with only %d reserved data blocks\n", | 273 | "with only %d reserved data blocks\n", |
@@ -281,7 +280,7 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
281 | /* Update per-inode reservations */ | 280 | /* Update per-inode reservations */ |
282 | ei->i_reserved_data_blocks -= used; | 281 | ei->i_reserved_data_blocks -= used; |
283 | ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; | 282 | ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; |
284 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | 283 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, |
285 | used + ei->i_allocated_meta_blocks); | 284 | used + ei->i_allocated_meta_blocks); |
286 | ei->i_allocated_meta_blocks = 0; | 285 | ei->i_allocated_meta_blocks = 0; |
287 | 286 | ||
@@ -291,7 +290,7 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
291 | * only when we have written all of the delayed | 290 | * only when we have written all of the delayed |
292 | * allocation blocks. | 291 | * allocation blocks. |
293 | */ | 292 | */ |
294 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | 293 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, |
295 | ei->i_reserved_meta_blocks); | 294 | ei->i_reserved_meta_blocks); |
296 | ei->i_reserved_meta_blocks = 0; | 295 | ei->i_reserved_meta_blocks = 0; |
297 | ei->i_da_metadata_calc_len = 0; | 296 | ei->i_da_metadata_calc_len = 0; |
@@ -300,14 +299,14 @@ void ext4_da_update_reserve_space(struct inode *inode, | |||
300 | 299 | ||
301 | /* Update quota subsystem for data blocks */ | 300 | /* Update quota subsystem for data blocks */ |
302 | if (quota_claim) | 301 | if (quota_claim) |
303 | dquot_claim_block(inode, used); | 302 | dquot_claim_block(inode, EXT4_C2B(sbi, used)); |
304 | else { | 303 | else { |
305 | /* | 304 | /* |
306 | * We did fallocate with an offset that is already delayed | 305 | * We did fallocate with an offset that is already delayed |
307 | * allocated. So on delayed allocated writeback we should | 306 | * allocated. So on delayed allocated writeback we should |
308 | * not re-claim the quota for fallocated blocks. | 307 | * not re-claim the quota for fallocated blocks. |
309 | */ | 308 | */ |
310 | dquot_release_reservation_block(inode, used); | 309 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, used)); |
311 | } | 310 | } |
312 | 311 | ||
313 | /* | 312 | /* |
@@ -399,6 +398,49 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
399 | } | 398 | } |
400 | 399 | ||
401 | /* | 400 | /* |
401 | * Sets the BH_Da_Mapped bit on the buffer heads corresponding to the given map. | ||
402 | */ | ||
403 | static void set_buffers_da_mapped(struct inode *inode, | ||
404 | struct ext4_map_blocks *map) | ||
405 | { | ||
406 | struct address_space *mapping = inode->i_mapping; | ||
407 | struct pagevec pvec; | ||
408 | int i, nr_pages; | ||
409 | pgoff_t index, end; | ||
410 | |||
411 | index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
412 | end = (map->m_lblk + map->m_len - 1) >> | ||
413 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
414 | |||
415 | pagevec_init(&pvec, 0); | ||
416 | while (index <= end) { | ||
417 | nr_pages = pagevec_lookup(&pvec, mapping, index, | ||
418 | min(end - index + 1, | ||
419 | (pgoff_t)PAGEVEC_SIZE)); | ||
420 | if (nr_pages == 0) | ||
421 | break; | ||
422 | for (i = 0; i < nr_pages; i++) { | ||
423 | struct page *page = pvec.pages[i]; | ||
424 | struct buffer_head *bh, *head; | ||
425 | |||
426 | if (unlikely(page->mapping != mapping) || | ||
427 | !PageDirty(page)) | ||
428 | break; | ||
429 | |||
430 | if (page_has_buffers(page)) { | ||
431 | bh = head = page_buffers(page); | ||
432 | do { | ||
433 | set_buffer_da_mapped(bh); | ||
434 | bh = bh->b_this_page; | ||
435 | } while (bh != head); | ||
436 | } | ||
437 | index++; | ||
438 | } | ||
439 | pagevec_release(&pvec); | ||
440 | } | ||
441 | } | ||
442 | |||
443 | /* | ||
402 | * The ext4_map_blocks() function tries to look up the requested blocks, | 444 | * The ext4_map_blocks() function tries to look up the requested blocks, |
403 | * and returns if the blocks are already mapped. | 445 | * and returns if the blocks are already mapped. |
404 | * | 446 | * |
@@ -416,7 +458,7 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | |||
416 | * the buffer head is mapped. | 458 | * the buffer head is mapped. |
417 | * | 459 | * |
418 | * It returns 0 if plain look up failed (blocks have not been allocated), in | 460 | * It returns 0 if plain look up failed (blocks have not been allocated), in |
419 | * that casem, buffer head is unmapped | 461 | * that case, buffer head is unmapped |
420 | * | 462 | * |
421 | * It returns the error in case of allocation failure. | 463 | * It returns the error in case of allocation failure. |
422 | */ | 464 | */ |
@@ -435,9 +477,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
435 | */ | 477 | */ |
436 | down_read((&EXT4_I(inode)->i_data_sem)); | 478 | down_read((&EXT4_I(inode)->i_data_sem)); |
437 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 479 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
438 | retval = ext4_ext_map_blocks(handle, inode, map, 0); | 480 | retval = ext4_ext_map_blocks(handle, inode, map, flags & |
481 | EXT4_GET_BLOCKS_KEEP_SIZE); | ||
439 | } else { | 482 | } else { |
440 | retval = ext4_ind_map_blocks(handle, inode, map, 0); | 483 | retval = ext4_ind_map_blocks(handle, inode, map, flags & |
484 | EXT4_GET_BLOCKS_KEEP_SIZE); | ||
441 | } | 485 | } |
442 | up_read((&EXT4_I(inode)->i_data_sem)); | 486 | up_read((&EXT4_I(inode)->i_data_sem)); |
443 | 487 | ||
@@ -455,7 +499,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
455 | * Returns if the blocks have already allocated | 499 | * Returns if the blocks have already allocated |
456 | * | 500 | * |
457 | * Note that if blocks have been preallocated | 501 | * Note that if blocks have been preallocated |
458 | * ext4_ext_get_block() returns th create = 0 | 502 | * ext4_ext_get_block() returns the create = 0 |
459 | * with buffer head unmapped. | 503 | * with buffer head unmapped. |
460 | */ | 504 | */ |
461 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) | 505 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) |
@@ -517,9 +561,17 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
517 | (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) | 561 | (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) |
518 | ext4_da_update_reserve_space(inode, retval, 1); | 562 | ext4_da_update_reserve_space(inode, retval, 1); |
519 | } | 563 | } |
520 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) | 564 | if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { |
521 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); | 565 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); |
522 | 566 | ||
567 | /* If we have successfully mapped the delayed allocated blocks, | ||
568 | * set the BH_Da_Mapped bit on them. Its important to do this | ||
569 | * under the protection of i_data_sem. | ||
570 | */ | ||
571 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) | ||
572 | set_buffers_da_mapped(inode, map); | ||
573 | } | ||
574 | |||
523 | up_write((&EXT4_I(inode)->i_data_sem)); | 575 | up_write((&EXT4_I(inode)->i_data_sem)); |
524 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 576 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
525 | int ret = check_block_validity(inode, map); | 577 | int ret = check_block_validity(inode, map); |
@@ -909,7 +961,11 @@ static int ext4_ordered_write_end(struct file *file, | |||
909 | ext4_orphan_add(handle, inode); | 961 | ext4_orphan_add(handle, inode); |
910 | if (ret2 < 0) | 962 | if (ret2 < 0) |
911 | ret = ret2; | 963 | ret = ret2; |
964 | } else { | ||
965 | unlock_page(page); | ||
966 | page_cache_release(page); | ||
912 | } | 967 | } |
968 | |||
913 | ret2 = ext4_journal_stop(handle); | 969 | ret2 = ext4_journal_stop(handle); |
914 | if (!ret) | 970 | if (!ret) |
915 | ret = ret2; | 971 | ret = ret2; |
@@ -1037,14 +1093,14 @@ static int ext4_journalled_write_end(struct file *file, | |||
1037 | } | 1093 | } |
1038 | 1094 | ||
1039 | /* | 1095 | /* |
1040 | * Reserve a single block located at lblock | 1096 | * Reserve a single cluster located at lblock |
1041 | */ | 1097 | */ |
1042 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | 1098 | static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) |
1043 | { | 1099 | { |
1044 | int retries = 0; | 1100 | int retries = 0; |
1045 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1101 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1046 | struct ext4_inode_info *ei = EXT4_I(inode); | 1102 | struct ext4_inode_info *ei = EXT4_I(inode); |
1047 | unsigned long md_needed; | 1103 | unsigned int md_needed; |
1048 | int ret; | 1104 | int ret; |
1049 | 1105 | ||
1050 | /* | 1106 | /* |
@@ -1054,7 +1110,8 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) | |||
1054 | */ | 1110 | */ |
1055 | repeat: | 1111 | repeat: |
1056 | spin_lock(&ei->i_block_reservation_lock); | 1112 | spin_lock(&ei->i_block_reservation_lock); |
1057 | md_needed = ext4_calc_metadata_amount(inode, lblock); | 1113 | md_needed = EXT4_NUM_B2C(sbi, |
1114 | ext4_calc_metadata_amount(inode, lblock)); | ||
1058 | trace_ext4_da_reserve_space(inode, md_needed); | 1115 | trace_ext4_da_reserve_space(inode, md_needed); |
1059 | spin_unlock(&ei->i_block_reservation_lock); | 1116 | spin_unlock(&ei->i_block_reservation_lock); |
1060 | 1117 | ||
@@ -1063,15 +1120,15 @@ repeat: | |||
1063 | * us from metadata over-estimation, though we may go over by | 1120 | * us from metadata over-estimation, though we may go over by |
1064 | * a small amount in the end. Here we just reserve for data. | 1121 | * a small amount in the end. Here we just reserve for data. |
1065 | */ | 1122 | */ |
1066 | ret = dquot_reserve_block(inode, 1); | 1123 | ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1)); |
1067 | if (ret) | 1124 | if (ret) |
1068 | return ret; | 1125 | return ret; |
1069 | /* | 1126 | /* |
1070 | * We do still charge estimated metadata to the sb though; | 1127 | * We do still charge estimated metadata to the sb though; |
1071 | * we cannot afford to run out of free blocks. | 1128 | * we cannot afford to run out of free blocks. |
1072 | */ | 1129 | */ |
1073 | if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) { | 1130 | if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) { |
1074 | dquot_release_reservation_block(inode, 1); | 1131 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); |
1075 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | 1132 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { |
1076 | yield(); | 1133 | yield(); |
1077 | goto repeat; | 1134 | goto repeat; |
@@ -1118,19 +1175,21 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1118 | * We can release all of the reserved metadata blocks | 1175 | * We can release all of the reserved metadata blocks |
1119 | * only when we have written all of the delayed | 1176 | * only when we have written all of the delayed |
1120 | * allocation blocks. | 1177 | * allocation blocks. |
1178 | * Note that in case of bigalloc, i_reserved_meta_blocks, | ||
1179 | * i_reserved_data_blocks, etc. refer to number of clusters. | ||
1121 | */ | 1180 | */ |
1122 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | 1181 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, |
1123 | ei->i_reserved_meta_blocks); | 1182 | ei->i_reserved_meta_blocks); |
1124 | ei->i_reserved_meta_blocks = 0; | 1183 | ei->i_reserved_meta_blocks = 0; |
1125 | ei->i_da_metadata_calc_len = 0; | 1184 | ei->i_da_metadata_calc_len = 0; |
1126 | } | 1185 | } |
1127 | 1186 | ||
1128 | /* update fs dirty data blocks counter */ | 1187 | /* update fs dirty data blocks counter */ |
1129 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); | 1188 | percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free); |
1130 | 1189 | ||
1131 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1190 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1132 | 1191 | ||
1133 | dquot_release_reservation_block(inode, to_free); | 1192 | dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free)); |
1134 | } | 1193 | } |
1135 | 1194 | ||
1136 | static void ext4_da_page_release_reservation(struct page *page, | 1195 | static void ext4_da_page_release_reservation(struct page *page, |
@@ -1139,6 +1198,9 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1139 | int to_release = 0; | 1198 | int to_release = 0; |
1140 | struct buffer_head *head, *bh; | 1199 | struct buffer_head *head, *bh; |
1141 | unsigned int curr_off = 0; | 1200 | unsigned int curr_off = 0; |
1201 | struct inode *inode = page->mapping->host; | ||
1202 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
1203 | int num_clusters; | ||
1142 | 1204 | ||
1143 | head = page_buffers(page); | 1205 | head = page_buffers(page); |
1144 | bh = head; | 1206 | bh = head; |
@@ -1148,10 +1210,24 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
1148 | if ((offset <= curr_off) && (buffer_delay(bh))) { | 1210 | if ((offset <= curr_off) && (buffer_delay(bh))) { |
1149 | to_release++; | 1211 | to_release++; |
1150 | clear_buffer_delay(bh); | 1212 | clear_buffer_delay(bh); |
1213 | clear_buffer_da_mapped(bh); | ||
1151 | } | 1214 | } |
1152 | curr_off = next_off; | 1215 | curr_off = next_off; |
1153 | } while ((bh = bh->b_this_page) != head); | 1216 | } while ((bh = bh->b_this_page) != head); |
1154 | ext4_da_release_space(page->mapping->host, to_release); | 1217 | |
1218 | /* If we have released all the blocks belonging to a cluster, then we | ||
1219 | * need to release the reserved space for that cluster. */ | ||
1220 | num_clusters = EXT4_NUM_B2C(sbi, to_release); | ||
1221 | while (num_clusters > 0) { | ||
1222 | ext4_fsblk_t lblk; | ||
1223 | lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) + | ||
1224 | ((num_clusters - 1) << sbi->s_cluster_bits); | ||
1225 | if (sbi->s_cluster_ratio == 1 || | ||
1226 | !ext4_find_delalloc_cluster(inode, lblk, 1)) | ||
1227 | ext4_da_release_space(inode, 1); | ||
1228 | |||
1229 | num_clusters--; | ||
1230 | } | ||
1155 | } | 1231 | } |
1156 | 1232 | ||
1157 | /* | 1233 | /* |
@@ -1253,6 +1329,8 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
1253 | clear_buffer_delay(bh); | 1329 | clear_buffer_delay(bh); |
1254 | bh->b_blocknr = pblock; | 1330 | bh->b_blocknr = pblock; |
1255 | } | 1331 | } |
1332 | if (buffer_da_mapped(bh)) | ||
1333 | clear_buffer_da_mapped(bh); | ||
1256 | if (buffer_unwritten(bh) || | 1334 | if (buffer_unwritten(bh) || |
1257 | buffer_mapped(bh)) | 1335 | buffer_mapped(bh)) |
1258 | BUG_ON(bh->b_blocknr != pblock); | 1336 | BUG_ON(bh->b_blocknr != pblock); |
@@ -1346,12 +1424,15 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
1346 | { | 1424 | { |
1347 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1425 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1348 | printk(KERN_CRIT "Total free blocks count %lld\n", | 1426 | printk(KERN_CRIT "Total free blocks count %lld\n", |
1349 | ext4_count_free_blocks(inode->i_sb)); | 1427 | EXT4_C2B(EXT4_SB(inode->i_sb), |
1428 | ext4_count_free_clusters(inode->i_sb))); | ||
1350 | printk(KERN_CRIT "Free/Dirty block details\n"); | 1429 | printk(KERN_CRIT "Free/Dirty block details\n"); |
1351 | printk(KERN_CRIT "free_blocks=%lld\n", | 1430 | printk(KERN_CRIT "free_blocks=%lld\n", |
1352 | (long long) percpu_counter_sum(&sbi->s_freeblocks_counter)); | 1431 | (long long) EXT4_C2B(EXT4_SB(inode->i_sb), |
1432 | percpu_counter_sum(&sbi->s_freeclusters_counter))); | ||
1353 | printk(KERN_CRIT "dirty_blocks=%lld\n", | 1433 | printk(KERN_CRIT "dirty_blocks=%lld\n", |
1354 | (long long) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); | 1434 | (long long) EXT4_C2B(EXT4_SB(inode->i_sb), |
1435 | percpu_counter_sum(&sbi->s_dirtyclusters_counter))); | ||
1355 | printk(KERN_CRIT "Block reservation details\n"); | 1436 | printk(KERN_CRIT "Block reservation details\n"); |
1356 | printk(KERN_CRIT "i_reserved_data_blocks=%u\n", | 1437 | printk(KERN_CRIT "i_reserved_data_blocks=%u\n", |
1357 | EXT4_I(inode)->i_reserved_data_blocks); | 1438 | EXT4_I(inode)->i_reserved_data_blocks); |
@@ -1430,8 +1511,7 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | |||
1430 | if (err == -EAGAIN) | 1511 | if (err == -EAGAIN) |
1431 | goto submit_io; | 1512 | goto submit_io; |
1432 | 1513 | ||
1433 | if (err == -ENOSPC && | 1514 | if (err == -ENOSPC && ext4_count_free_clusters(sb)) { |
1434 | ext4_count_free_blocks(sb)) { | ||
1435 | mpd->retval = err; | 1515 | mpd->retval = err; |
1436 | goto submit_io; | 1516 | goto submit_io; |
1437 | } | 1517 | } |
@@ -1471,13 +1551,15 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | |||
1471 | 1551 | ||
1472 | for (i = 0; i < map.m_len; i++) | 1552 | for (i = 0; i < map.m_len; i++) |
1473 | unmap_underlying_metadata(bdev, map.m_pblk + i); | 1553 | unmap_underlying_metadata(bdev, map.m_pblk + i); |
1474 | } | ||
1475 | 1554 | ||
1476 | if (ext4_should_order_data(mpd->inode)) { | 1555 | if (ext4_should_order_data(mpd->inode)) { |
1477 | err = ext4_jbd2_file_inode(handle, mpd->inode); | 1556 | err = ext4_jbd2_file_inode(handle, mpd->inode); |
1478 | if (err) | 1557 | if (err) { |
1479 | /* This only happens if the journal is aborted */ | 1558 | /* Only if the journal is aborted */ |
1480 | return; | 1559 | mpd->retval = err; |
1560 | goto submit_io; | ||
1561 | } | ||
1562 | } | ||
1481 | } | 1563 | } |
1482 | 1564 | ||
1483 | /* | 1565 | /* |
@@ -1584,6 +1666,66 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | |||
1584 | } | 1666 | } |
1585 | 1667 | ||
1586 | /* | 1668 | /* |
1669 | * This function is grabs code from the very beginning of | ||
1670 | * ext4_map_blocks, but assumes that the caller is from delayed write | ||
1671 | * time. This function looks up the requested blocks and sets the | ||
1672 | * buffer delay bit under the protection of i_data_sem. | ||
1673 | */ | ||
1674 | static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | ||
1675 | struct ext4_map_blocks *map, | ||
1676 | struct buffer_head *bh) | ||
1677 | { | ||
1678 | int retval; | ||
1679 | sector_t invalid_block = ~((sector_t) 0xffff); | ||
1680 | |||
1681 | if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) | ||
1682 | invalid_block = ~0; | ||
1683 | |||
1684 | map->m_flags = 0; | ||
1685 | ext_debug("ext4_da_map_blocks(): inode %lu, max_blocks %u," | ||
1686 | "logical block %lu\n", inode->i_ino, map->m_len, | ||
1687 | (unsigned long) map->m_lblk); | ||
1688 | /* | ||
1689 | * Try to see if we can get the block without requesting a new | ||
1690 | * file system block. | ||
1691 | */ | ||
1692 | down_read((&EXT4_I(inode)->i_data_sem)); | ||
1693 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | ||
1694 | retval = ext4_ext_map_blocks(NULL, inode, map, 0); | ||
1695 | else | ||
1696 | retval = ext4_ind_map_blocks(NULL, inode, map, 0); | ||
1697 | |||
1698 | if (retval == 0) { | ||
1699 | /* | ||
1700 | * XXX: __block_prepare_write() unmaps passed block, | ||
1701 | * is it OK? | ||
1702 | */ | ||
1703 | /* If the block was allocated from previously allocated cluster, | ||
1704 | * then we dont need to reserve it again. */ | ||
1705 | if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { | ||
1706 | retval = ext4_da_reserve_space(inode, iblock); | ||
1707 | if (retval) | ||
1708 | /* not enough space to reserve */ | ||
1709 | goto out_unlock; | ||
1710 | } | ||
1711 | |||
1712 | /* Clear EXT4_MAP_FROM_CLUSTER flag since its purpose is served | ||
1713 | * and it should not appear on the bh->b_state. | ||
1714 | */ | ||
1715 | map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; | ||
1716 | |||
1717 | map_bh(bh, inode->i_sb, invalid_block); | ||
1718 | set_buffer_new(bh); | ||
1719 | set_buffer_delay(bh); | ||
1720 | } | ||
1721 | |||
1722 | out_unlock: | ||
1723 | up_read((&EXT4_I(inode)->i_data_sem)); | ||
1724 | |||
1725 | return retval; | ||
1726 | } | ||
1727 | |||
1728 | /* | ||
1587 | * This is a special get_blocks_t callback which is used by | 1729 | * This is a special get_blocks_t callback which is used by |
1588 | * ext4_da_write_begin(). It will either return mapped block or | 1730 | * ext4_da_write_begin(). It will either return mapped block or |
1589 | * reserve space for a single block. | 1731 | * reserve space for a single block. |
@@ -1600,10 +1742,6 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
1600 | { | 1742 | { |
1601 | struct ext4_map_blocks map; | 1743 | struct ext4_map_blocks map; |
1602 | int ret = 0; | 1744 | int ret = 0; |
1603 | sector_t invalid_block = ~((sector_t) 0xffff); | ||
1604 | |||
1605 | if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) | ||
1606 | invalid_block = ~0; | ||
1607 | 1745 | ||
1608 | BUG_ON(create == 0); | 1746 | BUG_ON(create == 0); |
1609 | BUG_ON(bh->b_size != inode->i_sb->s_blocksize); | 1747 | BUG_ON(bh->b_size != inode->i_sb->s_blocksize); |
@@ -1616,25 +1754,9 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
1616 | * preallocated blocks are unmapped but should treated | 1754 | * preallocated blocks are unmapped but should treated |
1617 | * the same as allocated blocks. | 1755 | * the same as allocated blocks. |
1618 | */ | 1756 | */ |
1619 | ret = ext4_map_blocks(NULL, inode, &map, 0); | 1757 | ret = ext4_da_map_blocks(inode, iblock, &map, bh); |
1620 | if (ret < 0) | 1758 | if (ret <= 0) |
1621 | return ret; | 1759 | return ret; |
1622 | if (ret == 0) { | ||
1623 | if (buffer_delay(bh)) | ||
1624 | return 0; /* Not sure this could or should happen */ | ||
1625 | /* | ||
1626 | * XXX: __block_write_begin() unmaps passed block, is it OK? | ||
1627 | */ | ||
1628 | ret = ext4_da_reserve_space(inode, iblock); | ||
1629 | if (ret) | ||
1630 | /* not enough space to reserve */ | ||
1631 | return ret; | ||
1632 | |||
1633 | map_bh(bh, inode->i_sb, invalid_block); | ||
1634 | set_buffer_new(bh); | ||
1635 | set_buffer_delay(bh); | ||
1636 | return 0; | ||
1637 | } | ||
1638 | 1760 | ||
1639 | map_bh(bh, inode->i_sb, map.m_pblk); | 1761 | map_bh(bh, inode->i_sb, map.m_pblk); |
1640 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | 1762 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; |
@@ -1811,8 +1933,12 @@ static int ext4_writepage(struct page *page, | |||
1811 | * We don't want to do block allocation, so redirty | 1933 | * We don't want to do block allocation, so redirty |
1812 | * the page and return. We may reach here when we do | 1934 | * the page and return. We may reach here when we do |
1813 | * a journal commit via journal_submit_inode_data_buffers. | 1935 | * a journal commit via journal_submit_inode_data_buffers. |
1814 | * We can also reach here via shrink_page_list | 1936 | * We can also reach here via shrink_page_list but it |
1937 | * should never be for direct reclaim so warn if that | ||
1938 | * happens | ||
1815 | */ | 1939 | */ |
1940 | WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == | ||
1941 | PF_MEMALLOC); | ||
1816 | goto redirty_page; | 1942 | goto redirty_page; |
1817 | } | 1943 | } |
1818 | if (commit_write) | 1944 | if (commit_write) |
@@ -2046,6 +2172,7 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2046 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2172 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2047 | pgoff_t done_index = 0; | 2173 | pgoff_t done_index = 0; |
2048 | pgoff_t end; | 2174 | pgoff_t end; |
2175 | struct blk_plug plug; | ||
2049 | 2176 | ||
2050 | trace_ext4_da_writepages(inode, wbc); | 2177 | trace_ext4_da_writepages(inode, wbc); |
2051 | 2178 | ||
@@ -2124,6 +2251,7 @@ retry: | |||
2124 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) | 2251 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) |
2125 | tag_pages_for_writeback(mapping, index, end); | 2252 | tag_pages_for_writeback(mapping, index, end); |
2126 | 2253 | ||
2254 | blk_start_plug(&plug); | ||
2127 | while (!ret && wbc->nr_to_write > 0) { | 2255 | while (!ret && wbc->nr_to_write > 0) { |
2128 | 2256 | ||
2129 | /* | 2257 | /* |
@@ -2174,11 +2302,12 @@ retry: | |||
2174 | ret = 0; | 2302 | ret = 0; |
2175 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { | 2303 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { |
2176 | /* | 2304 | /* |
2177 | * got one extent now try with | 2305 | * Got one extent now try with rest of the pages. |
2178 | * rest of the pages | 2306 | * If mpd.retval is set -EIO, journal is aborted. |
2307 | * So we don't need to write any more. | ||
2179 | */ | 2308 | */ |
2180 | pages_written += mpd.pages_written; | 2309 | pages_written += mpd.pages_written; |
2181 | ret = 0; | 2310 | ret = mpd.retval; |
2182 | io_done = 1; | 2311 | io_done = 1; |
2183 | } else if (wbc->nr_to_write) | 2312 | } else if (wbc->nr_to_write) |
2184 | /* | 2313 | /* |
@@ -2188,6 +2317,7 @@ retry: | |||
2188 | */ | 2317 | */ |
2189 | break; | 2318 | break; |
2190 | } | 2319 | } |
2320 | blk_finish_plug(&plug); | ||
2191 | if (!io_done && !cycled) { | 2321 | if (!io_done && !cycled) { |
2192 | cycled = 1; | 2322 | cycled = 1; |
2193 | index = 0; | 2323 | index = 0; |
@@ -2226,10 +2356,11 @@ static int ext4_nonda_switch(struct super_block *sb) | |||
2226 | * Delalloc need an accurate free block accounting. So switch | 2356 | * Delalloc need an accurate free block accounting. So switch |
2227 | * to non delalloc when we are near to error range. | 2357 | * to non delalloc when we are near to error range. |
2228 | */ | 2358 | */ |
2229 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | 2359 | free_blocks = EXT4_C2B(sbi, |
2230 | dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter); | 2360 | percpu_counter_read_positive(&sbi->s_freeclusters_counter)); |
2361 | dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); | ||
2231 | if (2 * free_blocks < 3 * dirty_blocks || | 2362 | if (2 * free_blocks < 3 * dirty_blocks || |
2232 | free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { | 2363 | free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { |
2233 | /* | 2364 | /* |
2234 | * free block count is less than 150% of dirty blocks | 2365 | * free block count is less than 150% of dirty blocks |
2235 | * or free blocks is less than watermark | 2366 | * or free blocks is less than watermark |
@@ -2255,6 +2386,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
2255 | pgoff_t index; | 2386 | pgoff_t index; |
2256 | struct inode *inode = mapping->host; | 2387 | struct inode *inode = mapping->host; |
2257 | handle_t *handle; | 2388 | handle_t *handle; |
2389 | loff_t page_len; | ||
2258 | 2390 | ||
2259 | index = pos >> PAGE_CACHE_SHIFT; | 2391 | index = pos >> PAGE_CACHE_SHIFT; |
2260 | 2392 | ||
@@ -2301,6 +2433,13 @@ retry: | |||
2301 | */ | 2433 | */ |
2302 | if (pos + len > inode->i_size) | 2434 | if (pos + len > inode->i_size) |
2303 | ext4_truncate_failed_write(inode); | 2435 | ext4_truncate_failed_write(inode); |
2436 | } else { | ||
2437 | page_len = pos & (PAGE_CACHE_SIZE - 1); | ||
2438 | if (page_len > 0) { | ||
2439 | ret = ext4_discard_partial_page_buffers_no_lock(handle, | ||
2440 | inode, page, pos - page_len, page_len, | ||
2441 | EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED); | ||
2442 | } | ||
2304 | } | 2443 | } |
2305 | 2444 | ||
2306 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 2445 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
@@ -2343,6 +2482,7 @@ static int ext4_da_write_end(struct file *file, | |||
2343 | loff_t new_i_size; | 2482 | loff_t new_i_size; |
2344 | unsigned long start, end; | 2483 | unsigned long start, end; |
2345 | int write_mode = (int)(unsigned long)fsdata; | 2484 | int write_mode = (int)(unsigned long)fsdata; |
2485 | loff_t page_len; | ||
2346 | 2486 | ||
2347 | if (write_mode == FALL_BACK_TO_NONDELALLOC) { | 2487 | if (write_mode == FALL_BACK_TO_NONDELALLOC) { |
2348 | if (ext4_should_order_data(inode)) { | 2488 | if (ext4_should_order_data(inode)) { |
@@ -2391,6 +2531,16 @@ static int ext4_da_write_end(struct file *file, | |||
2391 | } | 2531 | } |
2392 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 2532 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
2393 | page, fsdata); | 2533 | page, fsdata); |
2534 | |||
2535 | page_len = PAGE_CACHE_SIZE - | ||
2536 | ((pos + copied - 1) & (PAGE_CACHE_SIZE - 1)); | ||
2537 | |||
2538 | if (page_len > 0) { | ||
2539 | ret = ext4_discard_partial_page_buffers_no_lock(handle, | ||
2540 | inode, page, pos + copied - 1, page_len, | ||
2541 | EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED); | ||
2542 | } | ||
2543 | |||
2394 | copied = ret2; | 2544 | copied = ret2; |
2395 | if (ret2 < 0) | 2545 | if (ret2 < 0) |
2396 | ret = ret2; | 2546 | ret = ret2; |
@@ -2685,10 +2835,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) | |||
2685 | * but being more careful is always safe for the future change. | 2835 | * but being more careful is always safe for the future change. |
2686 | */ | 2836 | */ |
2687 | inode = io_end->inode; | 2837 | inode = io_end->inode; |
2688 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | 2838 | ext4_set_io_unwritten_flag(inode, io_end); |
2689 | io_end->flag |= EXT4_IO_END_UNWRITTEN; | ||
2690 | atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); | ||
2691 | } | ||
2692 | 2839 | ||
2693 | /* Add the io_end to per-inode completed io list*/ | 2840 | /* Add the io_end to per-inode completed io list*/ |
2694 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | 2841 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); |
@@ -2854,6 +3001,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | |||
2854 | struct inode *inode = file->f_mapping->host; | 3001 | struct inode *inode = file->f_mapping->host; |
2855 | ssize_t ret; | 3002 | ssize_t ret; |
2856 | 3003 | ||
3004 | /* | ||
3005 | * If we are doing data journalling we don't support O_DIRECT | ||
3006 | */ | ||
3007 | if (ext4_should_journal_data(inode)) | ||
3008 | return 0; | ||
3009 | |||
2857 | trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); | 3010 | trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); |
2858 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3011 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
2859 | ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); | 3012 | ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); |
@@ -2923,6 +3076,7 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
2923 | .bmap = ext4_bmap, | 3076 | .bmap = ext4_bmap, |
2924 | .invalidatepage = ext4_invalidatepage, | 3077 | .invalidatepage = ext4_invalidatepage, |
2925 | .releasepage = ext4_releasepage, | 3078 | .releasepage = ext4_releasepage, |
3079 | .direct_IO = ext4_direct_IO, | ||
2926 | .is_partially_uptodate = block_is_partially_uptodate, | 3080 | .is_partially_uptodate = block_is_partially_uptodate, |
2927 | .error_remove_page = generic_error_remove_page, | 3081 | .error_remove_page = generic_error_remove_page, |
2928 | }; | 3082 | }; |
@@ -2959,6 +3113,227 @@ void ext4_set_aops(struct inode *inode) | |||
2959 | inode->i_mapping->a_ops = &ext4_journalled_aops; | 3113 | inode->i_mapping->a_ops = &ext4_journalled_aops; |
2960 | } | 3114 | } |
2961 | 3115 | ||
3116 | |||
3117 | /* | ||
3118 | * ext4_discard_partial_page_buffers() | ||
3119 | * Wrapper function for ext4_discard_partial_page_buffers_no_lock. | ||
3120 | * This function finds and locks the page containing the offset | ||
3121 | * "from" and passes it to ext4_discard_partial_page_buffers_no_lock. | ||
3122 | * Calling functions that already have the page locked should call | ||
3123 | * ext4_discard_partial_page_buffers_no_lock directly. | ||
3124 | */ | ||
3125 | int ext4_discard_partial_page_buffers(handle_t *handle, | ||
3126 | struct address_space *mapping, loff_t from, | ||
3127 | loff_t length, int flags) | ||
3128 | { | ||
3129 | struct inode *inode = mapping->host; | ||
3130 | struct page *page; | ||
3131 | int err = 0; | ||
3132 | |||
3133 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, | ||
3134 | mapping_gfp_mask(mapping) & ~__GFP_FS); | ||
3135 | if (!page) | ||
3136 | return -ENOMEM; | ||
3137 | |||
3138 | err = ext4_discard_partial_page_buffers_no_lock(handle, inode, page, | ||
3139 | from, length, flags); | ||
3140 | |||
3141 | unlock_page(page); | ||
3142 | page_cache_release(page); | ||
3143 | return err; | ||
3144 | } | ||
3145 | |||
3146 | /* | ||
3147 | * ext4_discard_partial_page_buffers_no_lock() | ||
3148 | * Zeros a page range of length 'length' starting from offset 'from'. | ||
3149 | * Buffer heads that correspond to the block aligned regions of the | ||
3150 | * zeroed range will be unmapped. Unblock aligned regions | ||
3151 | * will have the corresponding buffer head mapped if needed so that | ||
3152 | * that region of the page can be updated with the partial zero out. | ||
3153 | * | ||
3154 | * This function assumes that the page has already been locked. The | ||
3155 | * The range to be discarded must be contained with in the given page. | ||
3156 | * If the specified range exceeds the end of the page it will be shortened | ||
3157 | * to the end of the page that corresponds to 'from'. This function is | ||
3158 | * appropriate for updating a page and it buffer heads to be unmapped and | ||
3159 | * zeroed for blocks that have been either released, or are going to be | ||
3160 | * released. | ||
3161 | * | ||
3162 | * handle: The journal handle | ||
3163 | * inode: The files inode | ||
3164 | * page: A locked page that contains the offset "from" | ||
3165 | * from: The starting byte offset (from the begining of the file) | ||
3166 | * to begin discarding | ||
3167 | * len: The length of bytes to discard | ||
3168 | * flags: Optional flags that may be used: | ||
3169 | * | ||
3170 | * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED | ||
3171 | * Only zero the regions of the page whose buffer heads | ||
3172 | * have already been unmapped. This flag is appropriate | ||
3173 | * for updateing the contents of a page whose blocks may | ||
3174 | * have already been released, and we only want to zero | ||
3175 | * out the regions that correspond to those released blocks. | ||
3176 | * | ||
3177 | * Returns zero on sucess or negative on failure. | ||
3178 | */ | ||
3179 | int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | ||
3180 | struct inode *inode, struct page *page, loff_t from, | ||
3181 | loff_t length, int flags) | ||
3182 | { | ||
3183 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | ||
3184 | unsigned int offset = from & (PAGE_CACHE_SIZE-1); | ||
3185 | unsigned int blocksize, max, pos; | ||
3186 | ext4_lblk_t iblock; | ||
3187 | struct buffer_head *bh; | ||
3188 | int err = 0; | ||
3189 | |||
3190 | blocksize = inode->i_sb->s_blocksize; | ||
3191 | max = PAGE_CACHE_SIZE - offset; | ||
3192 | |||
3193 | if (index != page->index) | ||
3194 | return -EINVAL; | ||
3195 | |||
3196 | /* | ||
3197 | * correct length if it does not fall between | ||
3198 | * 'from' and the end of the page | ||
3199 | */ | ||
3200 | if (length > max || length < 0) | ||
3201 | length = max; | ||
3202 | |||
3203 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | ||
3204 | |||
3205 | if (!page_has_buffers(page)) { | ||
3206 | /* | ||
3207 | * If the range to be discarded covers a partial block | ||
3208 | * we need to get the page buffers. This is because | ||
3209 | * partial blocks cannot be released and the page needs | ||
3210 | * to be updated with the contents of the block before | ||
3211 | * we write the zeros on top of it. | ||
3212 | */ | ||
3213 | if ((from & (blocksize - 1)) || | ||
3214 | ((from + length) & (blocksize - 1))) { | ||
3215 | create_empty_buffers(page, blocksize, 0); | ||
3216 | } else { | ||
3217 | /* | ||
3218 | * If there are no partial blocks, | ||
3219 | * there is nothing to update, | ||
3220 | * so we can return now | ||
3221 | */ | ||
3222 | return 0; | ||
3223 | } | ||
3224 | } | ||
3225 | |||
3226 | /* Find the buffer that contains "offset" */ | ||
3227 | bh = page_buffers(page); | ||
3228 | pos = blocksize; | ||
3229 | while (offset >= pos) { | ||
3230 | bh = bh->b_this_page; | ||
3231 | iblock++; | ||
3232 | pos += blocksize; | ||
3233 | } | ||
3234 | |||
3235 | pos = offset; | ||
3236 | while (pos < offset + length) { | ||
3237 | unsigned int end_of_block, range_to_discard; | ||
3238 | |||
3239 | err = 0; | ||
3240 | |||
3241 | /* The length of space left to zero and unmap */ | ||
3242 | range_to_discard = offset + length - pos; | ||
3243 | |||
3244 | /* The length of space until the end of the block */ | ||
3245 | end_of_block = blocksize - (pos & (blocksize-1)); | ||
3246 | |||
3247 | /* | ||
3248 | * Do not unmap or zero past end of block | ||
3249 | * for this buffer head | ||
3250 | */ | ||
3251 | if (range_to_discard > end_of_block) | ||
3252 | range_to_discard = end_of_block; | ||
3253 | |||
3254 | |||
3255 | /* | ||
3256 | * Skip this buffer head if we are only zeroing unampped | ||
3257 | * regions of the page | ||
3258 | */ | ||
3259 | if (flags & EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED && | ||
3260 | buffer_mapped(bh)) | ||
3261 | goto next; | ||
3262 | |||
3263 | /* If the range is block aligned, unmap */ | ||
3264 | if (range_to_discard == blocksize) { | ||
3265 | clear_buffer_dirty(bh); | ||
3266 | bh->b_bdev = NULL; | ||
3267 | clear_buffer_mapped(bh); | ||
3268 | clear_buffer_req(bh); | ||
3269 | clear_buffer_new(bh); | ||
3270 | clear_buffer_delay(bh); | ||
3271 | clear_buffer_unwritten(bh); | ||
3272 | clear_buffer_uptodate(bh); | ||
3273 | zero_user(page, pos, range_to_discard); | ||
3274 | BUFFER_TRACE(bh, "Buffer discarded"); | ||
3275 | goto next; | ||
3276 | } | ||
3277 | |||
3278 | /* | ||
3279 | * If this block is not completely contained in the range | ||
3280 | * to be discarded, then it is not going to be released. Because | ||
3281 | * we need to keep this block, we need to make sure this part | ||
3282 | * of the page is uptodate before we modify it by writeing | ||
3283 | * partial zeros on it. | ||
3284 | */ | ||
3285 | if (!buffer_mapped(bh)) { | ||
3286 | /* | ||
3287 | * Buffer head must be mapped before we can read | ||
3288 | * from the block | ||
3289 | */ | ||
3290 | BUFFER_TRACE(bh, "unmapped"); | ||
3291 | ext4_get_block(inode, iblock, bh, 0); | ||
3292 | /* unmapped? It's a hole - nothing to do */ | ||
3293 | if (!buffer_mapped(bh)) { | ||
3294 | BUFFER_TRACE(bh, "still unmapped"); | ||
3295 | goto next; | ||
3296 | } | ||
3297 | } | ||
3298 | |||
3299 | /* Ok, it's mapped. Make sure it's up-to-date */ | ||
3300 | if (PageUptodate(page)) | ||
3301 | set_buffer_uptodate(bh); | ||
3302 | |||
3303 | if (!buffer_uptodate(bh)) { | ||
3304 | err = -EIO; | ||
3305 | ll_rw_block(READ, 1, &bh); | ||
3306 | wait_on_buffer(bh); | ||
3307 | /* Uhhuh. Read error. Complain and punt.*/ | ||
3308 | if (!buffer_uptodate(bh)) | ||
3309 | goto next; | ||
3310 | } | ||
3311 | |||
3312 | if (ext4_should_journal_data(inode)) { | ||
3313 | BUFFER_TRACE(bh, "get write access"); | ||
3314 | err = ext4_journal_get_write_access(handle, bh); | ||
3315 | if (err) | ||
3316 | goto next; | ||
3317 | } | ||
3318 | |||
3319 | zero_user(page, pos, range_to_discard); | ||
3320 | |||
3321 | err = 0; | ||
3322 | if (ext4_should_journal_data(inode)) { | ||
3323 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
3324 | } else | ||
3325 | mark_buffer_dirty(bh); | ||
3326 | |||
3327 | BUFFER_TRACE(bh, "Partial buffer zeroed"); | ||
3328 | next: | ||
3329 | bh = bh->b_this_page; | ||
3330 | iblock++; | ||
3331 | pos += range_to_discard; | ||
3332 | } | ||
3333 | |||
3334 | return err; | ||
3335 | } | ||
3336 | |||
2962 | /* | 3337 | /* |
2963 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' | 3338 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' |
2964 | * up to the end of the block which corresponds to `from'. | 3339 | * up to the end of the block which corresponds to `from'. |
@@ -3001,7 +3376,7 @@ int ext4_block_zero_page_range(handle_t *handle, | |||
3001 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, | 3376 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, |
3002 | mapping_gfp_mask(mapping) & ~__GFP_FS); | 3377 | mapping_gfp_mask(mapping) & ~__GFP_FS); |
3003 | if (!page) | 3378 | if (!page) |
3004 | return -EINVAL; | 3379 | return -ENOMEM; |
3005 | 3380 | ||
3006 | blocksize = inode->i_sb->s_blocksize; | 3381 | blocksize = inode->i_sb->s_blocksize; |
3007 | max = blocksize - (offset & (blocksize - 1)); | 3382 | max = blocksize - (offset & (blocksize - 1)); |
@@ -3070,11 +3445,8 @@ int ext4_block_zero_page_range(handle_t *handle, | |||
3070 | err = 0; | 3445 | err = 0; |
3071 | if (ext4_should_journal_data(inode)) { | 3446 | if (ext4_should_journal_data(inode)) { |
3072 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 3447 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
3073 | } else { | 3448 | } else |
3074 | if (ext4_should_order_data(inode) && EXT4_I(inode)->jinode) | ||
3075 | err = ext4_jbd2_file_inode(handle, inode); | ||
3076 | mark_buffer_dirty(bh); | 3449 | mark_buffer_dirty(bh); |
3077 | } | ||
3078 | 3450 | ||
3079 | unlock: | 3451 | unlock: |
3080 | unlock_page(page); | 3452 | unlock_page(page); |
@@ -3115,6 +3487,11 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
3115 | return -ENOTSUPP; | 3487 | return -ENOTSUPP; |
3116 | } | 3488 | } |
3117 | 3489 | ||
3490 | if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) { | ||
3491 | /* TODO: Add support for bigalloc file systems */ | ||
3492 | return -ENOTSUPP; | ||
3493 | } | ||
3494 | |||
3118 | return ext4_ext_punch_hole(file, offset, length); | 3495 | return ext4_ext_punch_hole(file, offset, length); |
3119 | } | 3496 | } |
3120 | 3497 | ||
@@ -3414,7 +3791,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
3414 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; | 3791 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; |
3415 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; | 3792 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; |
3416 | } | 3793 | } |
3417 | inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); | 3794 | set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); |
3418 | 3795 | ||
3419 | ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ | 3796 | ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ |
3420 | ei->i_dir_start_lookup = 0; | 3797 | ei->i_dir_start_lookup = 0; |
@@ -4416,6 +4793,7 @@ retry_alloc: | |||
4416 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { | 4793 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) { |
4417 | unlock_page(page); | 4794 | unlock_page(page); |
4418 | ret = VM_FAULT_SIGBUS; | 4795 | ret = VM_FAULT_SIGBUS; |
4796 | ext4_journal_stop(handle); | ||
4419 | goto out; | 4797 | goto out; |
4420 | } | 4798 | } |
4421 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); | 4799 | ext4_set_inode_state(inode, EXT4_STATE_JDATA); |