aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c331
1 files changed, 206 insertions, 125 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 17a5a57c415a..e2d8be8f28bf 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -70,8 +70,8 @@
70 * 70 *
71 * pa_lstart -> the logical start block for this prealloc space 71 * pa_lstart -> the logical start block for this prealloc space
72 * pa_pstart -> the physical start block for this prealloc space 72 * pa_pstart -> the physical start block for this prealloc space
73 * pa_len -> length for this prealloc space 73 * pa_len -> length for this prealloc space (in clusters)
74 * pa_free -> free space available in this prealloc space 74 * pa_free -> free space available in this prealloc space (in clusters)
75 * 75 *
76 * The inode preallocation space is used looking at the _logical_ start 76 * The inode preallocation space is used looking at the _logical_ start
77 * block. If only the logical file block falls within the range of prealloc 77 * block. If only the logical file block falls within the range of prealloc
@@ -126,7 +126,8 @@
126 * list. In case of inode preallocation we follow a list of heuristics 126 * list. In case of inode preallocation we follow a list of heuristics
127 * based on file size. This can be found in ext4_mb_normalize_request. If 127 * based on file size. This can be found in ext4_mb_normalize_request. If
128 * we are doing a group prealloc we try to normalize the request to 128 * we are doing a group prealloc we try to normalize the request to
129 * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is 129 * sbi->s_mb_group_prealloc. The default value of s_mb_group_prealloc is
130 * dependent on the cluster size; for non-bigalloc file systems, it is
130 * 512 blocks. This can be tuned via 131 * 512 blocks. This can be tuned via
131 * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in 132 * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in
132 * terms of number of blocks. If we have mounted the file system with -O 133 * terms of number of blocks. If we have mounted the file system with -O
@@ -459,7 +460,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
459 ext4_fsblk_t blocknr; 460 ext4_fsblk_t blocknr;
460 461
461 blocknr = ext4_group_first_block_no(sb, e4b->bd_group); 462 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
462 blocknr += first + i; 463 blocknr += EXT4_C2B(EXT4_SB(sb), first + i);
463 ext4_grp_locked_error(sb, e4b->bd_group, 464 ext4_grp_locked_error(sb, e4b->bd_group,
464 inode ? inode->i_ino : 0, 465 inode ? inode->i_ino : 0,
465 blocknr, 466 blocknr,
@@ -580,7 +581,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
580 continue; 581 continue;
581 } 582 }
582 583
583 /* both bits in buddy2 must be 0 */ 584 /* both bits in buddy2 must be 1 */
584 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2)); 585 MB_CHECK_ASSERT(mb_test_bit(i << 1, buddy2));
585 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2)); 586 MB_CHECK_ASSERT(mb_test_bit((i << 1) + 1, buddy2));
586 587
@@ -653,7 +654,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
653 ext4_grpblk_t chunk; 654 ext4_grpblk_t chunk;
654 unsigned short border; 655 unsigned short border;
655 656
656 BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb)); 657 BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
657 658
658 border = 2 << sb->s_blocksize_bits; 659 border = 2 << sb->s_blocksize_bits;
659 660
@@ -705,7 +706,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
705 void *buddy, void *bitmap, ext4_group_t group) 706 void *buddy, void *bitmap, ext4_group_t group)
706{ 707{
707 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 708 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
708 ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb); 709 ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
709 ext4_grpblk_t i = 0; 710 ext4_grpblk_t i = 0;
710 ext4_grpblk_t first; 711 ext4_grpblk_t first;
711 ext4_grpblk_t len; 712 ext4_grpblk_t len;
@@ -734,7 +735,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
734 735
735 if (free != grp->bb_free) { 736 if (free != grp->bb_free) {
736 ext4_grp_locked_error(sb, group, 0, 0, 737 ext4_grp_locked_error(sb, group, 0, 0,
737 "%u blocks in bitmap, %u in gd", 738 "%u clusters in bitmap, %u in gd",
738 free, grp->bb_free); 739 free, grp->bb_free);
739 /* 740 /*
740 * If we intent to continue, we consider group descritor 741 * If we intent to continue, we consider group descritor
@@ -1339,7 +1340,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1339 ext4_fsblk_t blocknr; 1340 ext4_fsblk_t blocknr;
1340 1341
1341 blocknr = ext4_group_first_block_no(sb, e4b->bd_group); 1342 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
1342 blocknr += block; 1343 blocknr += EXT4_C2B(EXT4_SB(sb), block);
1343 ext4_grp_locked_error(sb, e4b->bd_group, 1344 ext4_grp_locked_error(sb, e4b->bd_group,
1344 inode ? inode->i_ino : 0, 1345 inode ? inode->i_ino : 0,
1345 blocknr, 1346 blocknr,
@@ -1390,7 +1391,6 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1390{ 1391{
1391 int next = block; 1392 int next = block;
1392 int max; 1393 int max;
1393 int ord;
1394 void *buddy; 1394 void *buddy;
1395 1395
1396 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); 1396 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
@@ -1432,9 +1432,8 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1432 if (mb_test_bit(next, EXT4_MB_BITMAP(e4b))) 1432 if (mb_test_bit(next, EXT4_MB_BITMAP(e4b)))
1433 break; 1433 break;
1434 1434
1435 ord = mb_find_order_for_block(e4b, next); 1435 order = mb_find_order_for_block(e4b, next);
1436 1436
1437 order = ord;
1438 block = next >> order; 1437 block = next >> order;
1439 ex->fe_len += 1 << order; 1438 ex->fe_len += 1 << order;
1440 } 1439 }
@@ -1624,8 +1623,8 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1624 struct ext4_free_extent *gex = &ac->ac_g_ex; 1623 struct ext4_free_extent *gex = &ac->ac_g_ex;
1625 1624
1626 BUG_ON(ex->fe_len <= 0); 1625 BUG_ON(ex->fe_len <= 0);
1627 BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); 1626 BUG_ON(ex->fe_len > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1628 BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); 1627 BUG_ON(ex->fe_start >= EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
1629 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE); 1628 BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
1630 1629
1631 ac->ac_found++; 1630 ac->ac_found++;
@@ -1823,15 +1822,15 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1823 1822
1824 while (free && ac->ac_status == AC_STATUS_CONTINUE) { 1823 while (free && ac->ac_status == AC_STATUS_CONTINUE) {
1825 i = mb_find_next_zero_bit(bitmap, 1824 i = mb_find_next_zero_bit(bitmap,
1826 EXT4_BLOCKS_PER_GROUP(sb), i); 1825 EXT4_CLUSTERS_PER_GROUP(sb), i);
1827 if (i >= EXT4_BLOCKS_PER_GROUP(sb)) { 1826 if (i >= EXT4_CLUSTERS_PER_GROUP(sb)) {
1828 /* 1827 /*
1829 * IF we have corrupt bitmap, we won't find any 1828 * IF we have corrupt bitmap, we won't find any
1830 * free blocks even though group info says we 1829 * free blocks even though group info says we
1831 * we have free blocks 1830 * we have free blocks
1832 */ 1831 */
1833 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, 1832 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1834 "%d free blocks as per " 1833 "%d free clusters as per "
1835 "group info. But bitmap says 0", 1834 "group info. But bitmap says 0",
1836 free); 1835 free);
1837 break; 1836 break;
@@ -1841,7 +1840,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1841 BUG_ON(ex.fe_len <= 0); 1840 BUG_ON(ex.fe_len <= 0);
1842 if (free < ex.fe_len) { 1841 if (free < ex.fe_len) {
1843 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, 1842 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
1844 "%d free blocks as per " 1843 "%d free clusters as per "
1845 "group info. But got %d blocks", 1844 "group info. But got %d blocks",
1846 free, ex.fe_len); 1845 free, ex.fe_len);
1847 /* 1846 /*
@@ -1887,7 +1886,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1887 do_div(a, sbi->s_stripe); 1886 do_div(a, sbi->s_stripe);
1888 i = (a * sbi->s_stripe) - first_group_block; 1887 i = (a * sbi->s_stripe) - first_group_block;
1889 1888
1890 while (i < EXT4_BLOCKS_PER_GROUP(sb)) { 1889 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
1891 if (!mb_test_bit(i, bitmap)) { 1890 if (!mb_test_bit(i, bitmap)) {
1892 max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex); 1891 max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
1893 if (max >= sbi->s_stripe) { 1892 if (max >= sbi->s_stripe) {
@@ -2252,10 +2251,10 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2252 */ 2251 */
2253 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 2252 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2254 meta_group_info[i]->bb_free = 2253 meta_group_info[i]->bb_free =
2255 ext4_free_blocks_after_init(sb, group, desc); 2254 ext4_free_clusters_after_init(sb, group, desc);
2256 } else { 2255 } else {
2257 meta_group_info[i]->bb_free = 2256 meta_group_info[i]->bb_free =
2258 ext4_free_blks_count(sb, desc); 2257 ext4_free_group_clusters(sb, desc);
2259 } 2258 }
2260 2259
2261 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); 2260 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
@@ -2473,7 +2472,20 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2473 sbi->s_mb_stats = MB_DEFAULT_STATS; 2472 sbi->s_mb_stats = MB_DEFAULT_STATS;
2474 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; 2473 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2475 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; 2474 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2476 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; 2475 /*
2476 * The default group preallocation is 512, which for 4k block
2477 * sizes translates to 2 megabytes. However for bigalloc file
2478 * systems, this is probably too big (i.e, if the cluster size
2479 * is 1 megabyte, then group preallocation size becomes half a
2480 * gigabyte!). As a default, we will keep a two megabyte
2481 * group pralloc size for cluster sizes up to 64k, and after
2482 * that, we will force a minimum group preallocation size of
2483 * 32 clusters. This translates to 8 megs when the cluster
2484 * size is 256k, and 32 megs when the cluster size is 1 meg,
2485 * which seems reasonable as a default.
2486 */
2487 sbi->s_mb_group_prealloc = max(MB_DEFAULT_GROUP_PREALLOC >>
2488 sbi->s_cluster_bits, 32);
2477 /* 2489 /*
2478 * If there is a s_stripe > 1, then we set the s_mb_group_prealloc 2490 * If there is a s_stripe > 1, then we set the s_mb_group_prealloc
2479 * to the lowest multiple of s_stripe which is bigger than 2491 * to the lowest multiple of s_stripe which is bigger than
@@ -2490,7 +2502,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2490 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); 2502 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2491 if (sbi->s_locality_groups == NULL) { 2503 if (sbi->s_locality_groups == NULL) {
2492 ret = -ENOMEM; 2504 ret = -ENOMEM;
2493 goto out; 2505 goto out_free_groupinfo_slab;
2494 } 2506 }
2495 for_each_possible_cpu(i) { 2507 for_each_possible_cpu(i) {
2496 struct ext4_locality_group *lg; 2508 struct ext4_locality_group *lg;
@@ -2503,9 +2515,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2503 2515
2504 /* init file for buddy data */ 2516 /* init file for buddy data */
2505 ret = ext4_mb_init_backend(sb); 2517 ret = ext4_mb_init_backend(sb);
2506 if (ret != 0) { 2518 if (ret != 0)
2507 goto out; 2519 goto out_free_locality_groups;
2508 }
2509 2520
2510 if (sbi->s_proc) 2521 if (sbi->s_proc)
2511 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, 2522 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
@@ -2513,11 +2524,19 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2513 2524
2514 if (sbi->s_journal) 2525 if (sbi->s_journal)
2515 sbi->s_journal->j_commit_callback = release_blocks_on_commit; 2526 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2527
2528 return 0;
2529
2530out_free_locality_groups:
2531 free_percpu(sbi->s_locality_groups);
2532 sbi->s_locality_groups = NULL;
2533out_free_groupinfo_slab:
2534 ext4_groupinfo_destroy_slabs();
2516out: 2535out:
2517 if (ret) { 2536 kfree(sbi->s_mb_offsets);
2518 kfree(sbi->s_mb_offsets); 2537 sbi->s_mb_offsets = NULL;
2519 kfree(sbi->s_mb_maxs); 2538 kfree(sbi->s_mb_maxs);
2520 } 2539 sbi->s_mb_maxs = NULL;
2521 return ret; 2540 return ret;
2522} 2541}
2523 2542
@@ -2602,11 +2621,13 @@ int ext4_mb_release(struct super_block *sb)
2602} 2621}
2603 2622
2604static inline int ext4_issue_discard(struct super_block *sb, 2623static inline int ext4_issue_discard(struct super_block *sb,
2605 ext4_group_t block_group, ext4_grpblk_t block, int count) 2624 ext4_group_t block_group, ext4_grpblk_t cluster, int count)
2606{ 2625{
2607 ext4_fsblk_t discard_block; 2626 ext4_fsblk_t discard_block;
2608 2627
2609 discard_block = block + ext4_group_first_block_no(sb, block_group); 2628 discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
2629 ext4_group_first_block_no(sb, block_group));
2630 count = EXT4_C2B(EXT4_SB(sb), count);
2610 trace_ext4_discard_blocks(sb, 2631 trace_ext4_discard_blocks(sb,
2611 (unsigned long long) discard_block, count); 2632 (unsigned long long) discard_block, count);
2612 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); 2633 return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
@@ -2633,7 +2654,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2633 2654
2634 if (test_opt(sb, DISCARD)) 2655 if (test_opt(sb, DISCARD))
2635 ext4_issue_discard(sb, entry->group, 2656 ext4_issue_discard(sb, entry->group,
2636 entry->start_blk, entry->count); 2657 entry->start_cluster, entry->count);
2637 2658
2638 err = ext4_mb_load_buddy(sb, entry->group, &e4b); 2659 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2639 /* we expect to find existing buddy because it's pinned */ 2660 /* we expect to find existing buddy because it's pinned */
@@ -2646,7 +2667,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2646 ext4_lock_group(sb, entry->group); 2667 ext4_lock_group(sb, entry->group);
2647 /* Take it out of per group rb tree */ 2668 /* Take it out of per group rb tree */
2648 rb_erase(&entry->node, &(db->bb_free_root)); 2669 rb_erase(&entry->node, &(db->bb_free_root));
2649 mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); 2670 mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count);
2650 2671
2651 /* 2672 /*
2652 * Clear the trimmed flag for the group so that the next 2673 * Clear the trimmed flag for the group so that the next
@@ -2752,7 +2773,7 @@ void ext4_exit_mballoc(void)
2752 */ 2773 */
2753static noinline_for_stack int 2774static noinline_for_stack int
2754ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, 2775ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2755 handle_t *handle, unsigned int reserv_blks) 2776 handle_t *handle, unsigned int reserv_clstrs)
2756{ 2777{
2757 struct buffer_head *bitmap_bh = NULL; 2778 struct buffer_head *bitmap_bh = NULL;
2758 struct ext4_group_desc *gdp; 2779 struct ext4_group_desc *gdp;
@@ -2783,7 +2804,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2783 goto out_err; 2804 goto out_err;
2784 2805
2785 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group, 2806 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
2786 ext4_free_blks_count(sb, gdp)); 2807 ext4_free_group_clusters(sb, gdp));
2787 2808
2788 err = ext4_journal_get_write_access(handle, gdp_bh); 2809 err = ext4_journal_get_write_access(handle, gdp_bh);
2789 if (err) 2810 if (err)
@@ -2791,7 +2812,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2791 2812
2792 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); 2813 block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
2793 2814
2794 len = ac->ac_b_ex.fe_len; 2815 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
2795 if (!ext4_data_block_valid(sbi, block, len)) { 2816 if (!ext4_data_block_valid(sbi, block, len)) {
2796 ext4_error(sb, "Allocating blocks %llu-%llu which overlap " 2817 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2797 "fs metadata\n", block, block+len); 2818 "fs metadata\n", block, block+len);
@@ -2823,28 +2844,29 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2823 ac->ac_b_ex.fe_len); 2844 ac->ac_b_ex.fe_len);
2824 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 2845 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
2825 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 2846 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
2826 ext4_free_blks_set(sb, gdp, 2847 ext4_free_group_clusters_set(sb, gdp,
2827 ext4_free_blocks_after_init(sb, 2848 ext4_free_clusters_after_init(sb,
2828 ac->ac_b_ex.fe_group, gdp)); 2849 ac->ac_b_ex.fe_group, gdp));
2829 } 2850 }
2830 len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len; 2851 len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
2831 ext4_free_blks_set(sb, gdp, len); 2852 ext4_free_group_clusters_set(sb, gdp, len);
2832 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); 2853 gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
2833 2854
2834 ext4_unlock_group(sb, ac->ac_b_ex.fe_group); 2855 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
2835 percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); 2856 percpu_counter_sub(&sbi->s_freeclusters_counter, ac->ac_b_ex.fe_len);
2836 /* 2857 /*
2837 * Now reduce the dirty block count also. Should not go negative 2858 * Now reduce the dirty block count also. Should not go negative
2838 */ 2859 */
2839 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) 2860 if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
2840 /* release all the reserved blocks if non delalloc */ 2861 /* release all the reserved blocks if non delalloc */
2841 percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); 2862 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
2863 reserv_clstrs);
2842 2864
2843 if (sbi->s_log_groups_per_flex) { 2865 if (sbi->s_log_groups_per_flex) {
2844 ext4_group_t flex_group = ext4_flex_group(sbi, 2866 ext4_group_t flex_group = ext4_flex_group(sbi,
2845 ac->ac_b_ex.fe_group); 2867 ac->ac_b_ex.fe_group);
2846 atomic_sub(ac->ac_b_ex.fe_len, 2868 atomic_sub(ac->ac_b_ex.fe_len,
2847 &sbi->s_flex_groups[flex_group].free_blocks); 2869 &sbi->s_flex_groups[flex_group].free_clusters);
2848 } 2870 }
2849 2871
2850 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 2872 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -2886,6 +2908,7 @@ static noinline_for_stack void
2886ext4_mb_normalize_request(struct ext4_allocation_context *ac, 2908ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2887 struct ext4_allocation_request *ar) 2909 struct ext4_allocation_request *ar)
2888{ 2910{
2911 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2889 int bsbits, max; 2912 int bsbits, max;
2890 ext4_lblk_t end; 2913 ext4_lblk_t end;
2891 loff_t size, orig_size, start_off; 2914 loff_t size, orig_size, start_off;
@@ -2916,7 +2939,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2916 2939
2917 /* first, let's learn actual file size 2940 /* first, let's learn actual file size
2918 * given current request is allocated */ 2941 * given current request is allocated */
2919 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; 2942 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
2920 size = size << bsbits; 2943 size = size << bsbits;
2921 if (size < i_size_read(ac->ac_inode)) 2944 if (size < i_size_read(ac->ac_inode))
2922 size = i_size_read(ac->ac_inode); 2945 size = i_size_read(ac->ac_inode);
@@ -2988,7 +3011,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2988 continue; 3011 continue;
2989 } 3012 }
2990 3013
2991 pa_end = pa->pa_lstart + pa->pa_len; 3014 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3015 pa->pa_len);
2992 3016
2993 /* PA must not overlap original request */ 3017 /* PA must not overlap original request */
2994 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || 3018 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
@@ -3018,9 +3042,11 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3018 rcu_read_lock(); 3042 rcu_read_lock();
3019 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { 3043 list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
3020 ext4_lblk_t pa_end; 3044 ext4_lblk_t pa_end;
3045
3021 spin_lock(&pa->pa_lock); 3046 spin_lock(&pa->pa_lock);
3022 if (pa->pa_deleted == 0) { 3047 if (pa->pa_deleted == 0) {
3023 pa_end = pa->pa_lstart + pa->pa_len; 3048 pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
3049 pa->pa_len);
3024 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart)); 3050 BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
3025 } 3051 }
3026 spin_unlock(&pa->pa_lock); 3052 spin_unlock(&pa->pa_lock);
@@ -3036,14 +3062,14 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3036 } 3062 }
3037 BUG_ON(start + size <= ac->ac_o_ex.fe_logical && 3063 BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
3038 start > ac->ac_o_ex.fe_logical); 3064 start > ac->ac_o_ex.fe_logical);
3039 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); 3065 BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
3040 3066
3041 /* now prepare goal request */ 3067 /* now prepare goal request */
3042 3068
3043 /* XXX: is it better to align blocks WRT to logical 3069 /* XXX: is it better to align blocks WRT to logical
3044 * placement or satisfy big request as is */ 3070 * placement or satisfy big request as is */
3045 ac->ac_g_ex.fe_logical = start; 3071 ac->ac_g_ex.fe_logical = start;
3046 ac->ac_g_ex.fe_len = size; 3072 ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
3047 3073
3048 /* define goal start in order to merge */ 3074 /* define goal start in order to merge */
3049 if (ar->pright && (ar->lright == (start + size))) { 3075 if (ar->pright && (ar->lright == (start + size))) {
@@ -3112,14 +3138,16 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3112static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, 3138static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3113 struct ext4_prealloc_space *pa) 3139 struct ext4_prealloc_space *pa)
3114{ 3140{
3141 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3115 ext4_fsblk_t start; 3142 ext4_fsblk_t start;
3116 ext4_fsblk_t end; 3143 ext4_fsblk_t end;
3117 int len; 3144 int len;
3118 3145
3119 /* found preallocated blocks, use them */ 3146 /* found preallocated blocks, use them */
3120 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart); 3147 start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
3121 end = min(pa->pa_pstart + pa->pa_len, start + ac->ac_o_ex.fe_len); 3148 end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
3122 len = end - start; 3149 start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
3150 len = EXT4_NUM_B2C(sbi, end - start);
3123 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group, 3151 ext4_get_group_no_and_offset(ac->ac_sb, start, &ac->ac_b_ex.fe_group,
3124 &ac->ac_b_ex.fe_start); 3152 &ac->ac_b_ex.fe_start);
3125 ac->ac_b_ex.fe_len = len; 3153 ac->ac_b_ex.fe_len = len;
@@ -3127,7 +3155,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3127 ac->ac_pa = pa; 3155 ac->ac_pa = pa;
3128 3156
3129 BUG_ON(start < pa->pa_pstart); 3157 BUG_ON(start < pa->pa_pstart);
3130 BUG_ON(start + len > pa->pa_pstart + pa->pa_len); 3158 BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
3131 BUG_ON(pa->pa_free < len); 3159 BUG_ON(pa->pa_free < len);
3132 pa->pa_free -= len; 3160 pa->pa_free -= len;
3133 3161
@@ -3193,6 +3221,7 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
3193static noinline_for_stack int 3221static noinline_for_stack int
3194ext4_mb_use_preallocated(struct ext4_allocation_context *ac) 3222ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3195{ 3223{
3224 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
3196 int order, i; 3225 int order, i;
3197 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 3226 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3198 struct ext4_locality_group *lg; 3227 struct ext4_locality_group *lg;
@@ -3210,12 +3239,14 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3210 /* all fields in this condition don't change, 3239 /* all fields in this condition don't change,
3211 * so we can skip locking for them */ 3240 * so we can skip locking for them */
3212 if (ac->ac_o_ex.fe_logical < pa->pa_lstart || 3241 if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
3213 ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len) 3242 ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
3243 EXT4_C2B(sbi, pa->pa_len)))
3214 continue; 3244 continue;
3215 3245
3216 /* non-extent files can't have physical blocks past 2^32 */ 3246 /* non-extent files can't have physical blocks past 2^32 */
3217 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) && 3247 if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
3218 pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS) 3248 (pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len) >
3249 EXT4_MAX_BLOCK_FILE_PHYS))
3219 continue; 3250 continue;
3220 3251
3221 /* found preallocated blocks, use them */ 3252 /* found preallocated blocks, use them */
@@ -3291,7 +3322,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3291 3322
3292 while (n) { 3323 while (n) {
3293 entry = rb_entry(n, struct ext4_free_data, node); 3324 entry = rb_entry(n, struct ext4_free_data, node);
3294 ext4_set_bits(bitmap, entry->start_blk, entry->count); 3325 ext4_set_bits(bitmap, entry->start_cluster, entry->count);
3295 n = rb_next(n); 3326 n = rb_next(n);
3296 } 3327 }
3297 return; 3328 return;
@@ -3312,7 +3343,6 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3312 ext4_group_t groupnr; 3343 ext4_group_t groupnr;
3313 ext4_grpblk_t start; 3344 ext4_grpblk_t start;
3314 int preallocated = 0; 3345 int preallocated = 0;
3315 int count = 0;
3316 int len; 3346 int len;
3317 3347
3318 /* all form of preallocation discards first load group, 3348 /* all form of preallocation discards first load group,
@@ -3335,7 +3365,6 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3335 BUG_ON(groupnr != group); 3365 BUG_ON(groupnr != group);
3336 ext4_set_bits(bitmap, start, len); 3366 ext4_set_bits(bitmap, start, len);
3337 preallocated += len; 3367 preallocated += len;
3338 count++;
3339 } 3368 }
3340 mb_debug(1, "prellocated %u for group %u\n", preallocated, group); 3369 mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
3341} 3370}
@@ -3412,6 +3441,7 @@ static noinline_for_stack int
3412ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) 3441ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3413{ 3442{
3414 struct super_block *sb = ac->ac_sb; 3443 struct super_block *sb = ac->ac_sb;
3444 struct ext4_sb_info *sbi = EXT4_SB(sb);
3415 struct ext4_prealloc_space *pa; 3445 struct ext4_prealloc_space *pa;
3416 struct ext4_group_info *grp; 3446 struct ext4_group_info *grp;
3417 struct ext4_inode_info *ei; 3447 struct ext4_inode_info *ei;
@@ -3443,16 +3473,18 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3443 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical; 3473 winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
3444 3474
3445 /* also, we should cover whole original request */ 3475 /* also, we should cover whole original request */
3446 wins = ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len; 3476 wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
3447 3477
3448 /* the smallest one defines real window */ 3478 /* the smallest one defines real window */
3449 win = min(winl, wins); 3479 win = min(winl, wins);
3450 3480
3451 offs = ac->ac_o_ex.fe_logical % ac->ac_b_ex.fe_len; 3481 offs = ac->ac_o_ex.fe_logical %
3482 EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
3452 if (offs && offs < win) 3483 if (offs && offs < win)
3453 win = offs; 3484 win = offs;
3454 3485
3455 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - win; 3486 ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
3487 EXT4_B2C(sbi, win);
3456 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); 3488 BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
3457 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); 3489 BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
3458 } 3490 }
@@ -3477,7 +3509,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3477 trace_ext4_mb_new_inode_pa(ac, pa); 3509 trace_ext4_mb_new_inode_pa(ac, pa);
3478 3510
3479 ext4_mb_use_inode_pa(ac, pa); 3511 ext4_mb_use_inode_pa(ac, pa);
3480 atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); 3512 atomic_add(pa->pa_free, &sbi->s_mb_preallocated);
3481 3513
3482 ei = EXT4_I(ac->ac_inode); 3514 ei = EXT4_I(ac->ac_inode);
3483 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group); 3515 grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
@@ -3592,7 +3624,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3592 3624
3593 BUG_ON(pa->pa_deleted == 0); 3625 BUG_ON(pa->pa_deleted == 0);
3594 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3626 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3595 grp_blk_start = pa->pa_pstart - bit; 3627 grp_blk_start = pa->pa_pstart - EXT4_C2B(sbi, bit);
3596 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3628 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
3597 end = bit + pa->pa_len; 3629 end = bit + pa->pa_len;
3598 3630
@@ -3607,7 +3639,8 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3607 free += next - bit; 3639 free += next - bit;
3608 3640
3609 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit); 3641 trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
3610 trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit, 3642 trace_ext4_mb_release_inode_pa(pa, (grp_blk_start +
3643 EXT4_C2B(sbi, bit)),
3611 next - bit); 3644 next - bit);
3612 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); 3645 mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
3613 bit = next + 1; 3646 bit = next + 1;
@@ -3690,7 +3723,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3690 } 3723 }
3691 3724
3692 if (needed == 0) 3725 if (needed == 0)
3693 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; 3726 needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
3694 3727
3695 INIT_LIST_HEAD(&list); 3728 INIT_LIST_HEAD(&list);
3696repeat: 3729repeat:
@@ -3958,7 +3991,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3958 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) 3991 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
3959 return; 3992 return;
3960 3993
3961 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; 3994 size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
3962 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1) 3995 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
3963 >> bsbits; 3996 >> bsbits;
3964 3997
@@ -3969,6 +4002,11 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
3969 return; 4002 return;
3970 } 4003 }
3971 4004
4005 if (sbi->s_mb_group_prealloc <= 0) {
4006 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4007 return;
4008 }
4009
3972 /* don't use group allocation for large files */ 4010 /* don't use group allocation for large files */
3973 size = max(size, isize); 4011 size = max(size, isize);
3974 if (size > sbi->s_mb_stream_request) { 4012 if (size > sbi->s_mb_stream_request) {
@@ -4007,8 +4045,8 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4007 len = ar->len; 4045 len = ar->len;
4008 4046
4009 /* just a dirty hack to filter too big requests */ 4047 /* just a dirty hack to filter too big requests */
4010 if (len >= EXT4_BLOCKS_PER_GROUP(sb) - 10) 4048 if (len >= EXT4_CLUSTERS_PER_GROUP(sb) - 10)
4011 len = EXT4_BLOCKS_PER_GROUP(sb) - 10; 4049 len = EXT4_CLUSTERS_PER_GROUP(sb) - 10;
4012 4050
4013 /* start searching from the goal */ 4051 /* start searching from the goal */
4014 goal = ar->goal; 4052 goal = ar->goal;
@@ -4019,18 +4057,15 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4019 4057
4020 /* set up allocation goals */ 4058 /* set up allocation goals */
4021 memset(ac, 0, sizeof(struct ext4_allocation_context)); 4059 memset(ac, 0, sizeof(struct ext4_allocation_context));
4022 ac->ac_b_ex.fe_logical = ar->logical; 4060 ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
4023 ac->ac_status = AC_STATUS_CONTINUE; 4061 ac->ac_status = AC_STATUS_CONTINUE;
4024 ac->ac_sb = sb; 4062 ac->ac_sb = sb;
4025 ac->ac_inode = ar->inode; 4063 ac->ac_inode = ar->inode;
4026 ac->ac_o_ex.fe_logical = ar->logical; 4064 ac->ac_o_ex.fe_logical = ac->ac_b_ex.fe_logical;
4027 ac->ac_o_ex.fe_group = group; 4065 ac->ac_o_ex.fe_group = group;
4028 ac->ac_o_ex.fe_start = block; 4066 ac->ac_o_ex.fe_start = block;
4029 ac->ac_o_ex.fe_len = len; 4067 ac->ac_o_ex.fe_len = len;
4030 ac->ac_g_ex.fe_logical = ar->logical; 4068 ac->ac_g_ex = ac->ac_o_ex;
4031 ac->ac_g_ex.fe_group = group;
4032 ac->ac_g_ex.fe_start = block;
4033 ac->ac_g_ex.fe_len = len;
4034 ac->ac_flags = ar->flags; 4069 ac->ac_flags = ar->flags;
4035 4070
4036 /* we have to define context: we'll we work with a file or 4071 /* we have to define context: we'll we work with a file or
@@ -4182,13 +4217,14 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4182 */ 4217 */
4183static int ext4_mb_release_context(struct ext4_allocation_context *ac) 4218static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4184{ 4219{
4220 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
4185 struct ext4_prealloc_space *pa = ac->ac_pa; 4221 struct ext4_prealloc_space *pa = ac->ac_pa;
4186 if (pa) { 4222 if (pa) {
4187 if (pa->pa_type == MB_GROUP_PA) { 4223 if (pa->pa_type == MB_GROUP_PA) {
4188 /* see comment in ext4_mb_use_group_pa() */ 4224 /* see comment in ext4_mb_use_group_pa() */
4189 spin_lock(&pa->pa_lock); 4225 spin_lock(&pa->pa_lock);
4190 pa->pa_pstart += ac->ac_b_ex.fe_len; 4226 pa->pa_pstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4191 pa->pa_lstart += ac->ac_b_ex.fe_len; 4227 pa->pa_lstart += EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
4192 pa->pa_free -= ac->ac_b_ex.fe_len; 4228 pa->pa_free -= ac->ac_b_ex.fe_len;
4193 pa->pa_len -= ac->ac_b_ex.fe_len; 4229 pa->pa_len -= ac->ac_b_ex.fe_len;
4194 spin_unlock(&pa->pa_lock); 4230 spin_unlock(&pa->pa_lock);
@@ -4249,13 +4285,17 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4249 struct super_block *sb; 4285 struct super_block *sb;
4250 ext4_fsblk_t block = 0; 4286 ext4_fsblk_t block = 0;
4251 unsigned int inquota = 0; 4287 unsigned int inquota = 0;
4252 unsigned int reserv_blks = 0; 4288 unsigned int reserv_clstrs = 0;
4253 4289
4254 sb = ar->inode->i_sb; 4290 sb = ar->inode->i_sb;
4255 sbi = EXT4_SB(sb); 4291 sbi = EXT4_SB(sb);
4256 4292
4257 trace_ext4_request_blocks(ar); 4293 trace_ext4_request_blocks(ar);
4258 4294
4295 /* Allow to use superuser reservation for quota file */
4296 if (IS_NOQUOTA(ar->inode))
4297 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
4298
4259 /* 4299 /*
4260 * For delayed allocation, we could skip the ENOSPC and 4300 * For delayed allocation, we could skip the ENOSPC and
4261 * EDQUOT check, as blocks and quotas have been already 4301 * EDQUOT check, as blocks and quotas have been already
@@ -4269,7 +4309,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4269 * and verify allocation doesn't exceed the quota limits. 4309 * and verify allocation doesn't exceed the quota limits.
4270 */ 4310 */
4271 while (ar->len && 4311 while (ar->len &&
4272 ext4_claim_free_blocks(sbi, ar->len, ar->flags)) { 4312 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
4273 4313
4274 /* let others to free the space */ 4314 /* let others to free the space */
4275 yield(); 4315 yield();
@@ -4279,12 +4319,14 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4279 *errp = -ENOSPC; 4319 *errp = -ENOSPC;
4280 return 0; 4320 return 0;
4281 } 4321 }
4282 reserv_blks = ar->len; 4322 reserv_clstrs = ar->len;
4283 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) { 4323 if (ar->flags & EXT4_MB_USE_ROOT_BLOCKS) {
4284 dquot_alloc_block_nofail(ar->inode, ar->len); 4324 dquot_alloc_block_nofail(ar->inode,
4325 EXT4_C2B(sbi, ar->len));
4285 } else { 4326 } else {
4286 while (ar->len && 4327 while (ar->len &&
4287 dquot_alloc_block(ar->inode, ar->len)) { 4328 dquot_alloc_block(ar->inode,
4329 EXT4_C2B(sbi, ar->len))) {
4288 4330
4289 ar->flags |= EXT4_MB_HINT_NOPREALLOC; 4331 ar->flags |= EXT4_MB_HINT_NOPREALLOC;
4290 ar->len--; 4332 ar->len--;
@@ -4328,7 +4370,7 @@ repeat:
4328 ext4_mb_new_preallocation(ac); 4370 ext4_mb_new_preallocation(ac);
4329 } 4371 }
4330 if (likely(ac->ac_status == AC_STATUS_FOUND)) { 4372 if (likely(ac->ac_status == AC_STATUS_FOUND)) {
4331 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); 4373 *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
4332 if (*errp == -EAGAIN) { 4374 if (*errp == -EAGAIN) {
4333 /* 4375 /*
4334 * drop the reference that we took 4376 * drop the reference that we took
@@ -4364,13 +4406,13 @@ out:
4364 if (ac) 4406 if (ac)
4365 kmem_cache_free(ext4_ac_cachep, ac); 4407 kmem_cache_free(ext4_ac_cachep, ac);
4366 if (inquota && ar->len < inquota) 4408 if (inquota && ar->len < inquota)
4367 dquot_free_block(ar->inode, inquota - ar->len); 4409 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
4368 if (!ar->len) { 4410 if (!ar->len) {
4369 if (!ext4_test_inode_state(ar->inode, 4411 if (!ext4_test_inode_state(ar->inode,
4370 EXT4_STATE_DELALLOC_RESERVED)) 4412 EXT4_STATE_DELALLOC_RESERVED))
4371 /* release all the reserved blocks if non delalloc */ 4413 /* release all the reserved blocks if non delalloc */
4372 percpu_counter_sub(&sbi->s_dirtyblocks_counter, 4414 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
4373 reserv_blks); 4415 reserv_clstrs);
4374 } 4416 }
4375 4417
4376 trace_ext4_allocate_blocks(ar, (unsigned long long)block); 4418 trace_ext4_allocate_blocks(ar, (unsigned long long)block);
@@ -4388,7 +4430,7 @@ static int can_merge(struct ext4_free_data *entry1,
4388{ 4430{
4389 if ((entry1->t_tid == entry2->t_tid) && 4431 if ((entry1->t_tid == entry2->t_tid) &&
4390 (entry1->group == entry2->group) && 4432 (entry1->group == entry2->group) &&
4391 ((entry1->start_blk + entry1->count) == entry2->start_blk)) 4433 ((entry1->start_cluster + entry1->count) == entry2->start_cluster))
4392 return 1; 4434 return 1;
4393 return 0; 4435 return 0;
4394} 4436}
@@ -4398,7 +4440,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4398 struct ext4_free_data *new_entry) 4440 struct ext4_free_data *new_entry)
4399{ 4441{
4400 ext4_group_t group = e4b->bd_group; 4442 ext4_group_t group = e4b->bd_group;
4401 ext4_grpblk_t block; 4443 ext4_grpblk_t cluster;
4402 struct ext4_free_data *entry; 4444 struct ext4_free_data *entry;
4403 struct ext4_group_info *db = e4b->bd_info; 4445 struct ext4_group_info *db = e4b->bd_info;
4404 struct super_block *sb = e4b->bd_sb; 4446 struct super_block *sb = e4b->bd_sb;
@@ -4411,7 +4453,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4411 BUG_ON(e4b->bd_buddy_page == NULL); 4453 BUG_ON(e4b->bd_buddy_page == NULL);
4412 4454
4413 new_node = &new_entry->node; 4455 new_node = &new_entry->node;
4414 block = new_entry->start_blk; 4456 cluster = new_entry->start_cluster;
4415 4457
4416 if (!*n) { 4458 if (!*n) {
4417 /* first free block exent. We need to 4459 /* first free block exent. We need to
@@ -4425,13 +4467,14 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4425 while (*n) { 4467 while (*n) {
4426 parent = *n; 4468 parent = *n;
4427 entry = rb_entry(parent, struct ext4_free_data, node); 4469 entry = rb_entry(parent, struct ext4_free_data, node);
4428 if (block < entry->start_blk) 4470 if (cluster < entry->start_cluster)
4429 n = &(*n)->rb_left; 4471 n = &(*n)->rb_left;
4430 else if (block >= (entry->start_blk + entry->count)) 4472 else if (cluster >= (entry->start_cluster + entry->count))
4431 n = &(*n)->rb_right; 4473 n = &(*n)->rb_right;
4432 else { 4474 else {
4433 ext4_grp_locked_error(sb, group, 0, 4475 ext4_grp_locked_error(sb, group, 0,
4434 ext4_group_first_block_no(sb, group) + block, 4476 ext4_group_first_block_no(sb, group) +
4477 EXT4_C2B(sbi, cluster),
4435 "Block already on to-be-freed list"); 4478 "Block already on to-be-freed list");
4436 return 0; 4479 return 0;
4437 } 4480 }
@@ -4445,7 +4488,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4445 if (node) { 4488 if (node) {
4446 entry = rb_entry(node, struct ext4_free_data, node); 4489 entry = rb_entry(node, struct ext4_free_data, node);
4447 if (can_merge(entry, new_entry)) { 4490 if (can_merge(entry, new_entry)) {
4448 new_entry->start_blk = entry->start_blk; 4491 new_entry->start_cluster = entry->start_cluster;
4449 new_entry->count += entry->count; 4492 new_entry->count += entry->count;
4450 rb_erase(node, &(db->bb_free_root)); 4493 rb_erase(node, &(db->bb_free_root));
4451 spin_lock(&sbi->s_md_lock); 4494 spin_lock(&sbi->s_md_lock);
@@ -4496,6 +4539,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4496 ext4_group_t block_group; 4539 ext4_group_t block_group;
4497 struct ext4_sb_info *sbi; 4540 struct ext4_sb_info *sbi;
4498 struct ext4_buddy e4b; 4541 struct ext4_buddy e4b;
4542 unsigned int count_clusters;
4499 int err = 0; 4543 int err = 0;
4500 int ret; 4544 int ret;
4501 4545
@@ -4544,6 +4588,38 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4544 if (!ext4_should_writeback_data(inode)) 4588 if (!ext4_should_writeback_data(inode))
4545 flags |= EXT4_FREE_BLOCKS_METADATA; 4589 flags |= EXT4_FREE_BLOCKS_METADATA;
4546 4590
4591 /*
4592 * If the extent to be freed does not begin on a cluster
4593 * boundary, we need to deal with partial clusters at the
4594 * beginning and end of the extent. Normally we will free
4595 * blocks at the beginning or the end unless we are explicitly
4596 * requested to avoid doing so.
4597 */
4598 overflow = block & (sbi->s_cluster_ratio - 1);
4599 if (overflow) {
4600 if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
4601 overflow = sbi->s_cluster_ratio - overflow;
4602 block += overflow;
4603 if (count > overflow)
4604 count -= overflow;
4605 else
4606 return;
4607 } else {
4608 block -= overflow;
4609 count += overflow;
4610 }
4611 }
4612 overflow = count & (sbi->s_cluster_ratio - 1);
4613 if (overflow) {
4614 if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
4615 if (count > overflow)
4616 count -= overflow;
4617 else
4618 return;
4619 } else
4620 count += sbi->s_cluster_ratio - overflow;
4621 }
4622
4547do_more: 4623do_more:
4548 overflow = 0; 4624 overflow = 0;
4549 ext4_get_group_no_and_offset(sb, block, &block_group, &bit); 4625 ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -4552,10 +4628,12 @@ do_more:
4552 * Check to see if we are freeing blocks across a group 4628 * Check to see if we are freeing blocks across a group
4553 * boundary. 4629 * boundary.
4554 */ 4630 */
4555 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { 4631 if (EXT4_C2B(sbi, bit) + count > EXT4_BLOCKS_PER_GROUP(sb)) {
4556 overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); 4632 overflow = EXT4_C2B(sbi, bit) + count -
4633 EXT4_BLOCKS_PER_GROUP(sb);
4557 count -= overflow; 4634 count -= overflow;
4558 } 4635 }
4636 count_clusters = EXT4_B2C(sbi, count);
4559 bitmap_bh = ext4_read_block_bitmap(sb, block_group); 4637 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4560 if (!bitmap_bh) { 4638 if (!bitmap_bh) {
4561 err = -EIO; 4639 err = -EIO;
@@ -4570,9 +4648,9 @@ do_more:
4570 if (in_range(ext4_block_bitmap(sb, gdp), block, count) || 4648 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4571 in_range(ext4_inode_bitmap(sb, gdp), block, count) || 4649 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
4572 in_range(block, ext4_inode_table(sb, gdp), 4650 in_range(block, ext4_inode_table(sb, gdp),
4573 EXT4_SB(sb)->s_itb_per_group) || 4651 EXT4_SB(sb)->s_itb_per_group) ||
4574 in_range(block + count - 1, ext4_inode_table(sb, gdp), 4652 in_range(block + count - 1, ext4_inode_table(sb, gdp),
4575 EXT4_SB(sb)->s_itb_per_group)) { 4653 EXT4_SB(sb)->s_itb_per_group)) {
4576 4654
4577 ext4_error(sb, "Freeing blocks in system zone - " 4655 ext4_error(sb, "Freeing blocks in system zone - "
4578 "Block = %llu, count = %lu", block, count); 4656 "Block = %llu, count = %lu", block, count);
@@ -4597,11 +4675,11 @@ do_more:
4597#ifdef AGGRESSIVE_CHECK 4675#ifdef AGGRESSIVE_CHECK
4598 { 4676 {
4599 int i; 4677 int i;
4600 for (i = 0; i < count; i++) 4678 for (i = 0; i < count_clusters; i++)
4601 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); 4679 BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
4602 } 4680 }
4603#endif 4681#endif
4604 trace_ext4_mballoc_free(sb, inode, block_group, bit, count); 4682 trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
4605 4683
4606 err = ext4_mb_load_buddy(sb, block_group, &e4b); 4684 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4607 if (err) 4685 if (err)
@@ -4618,13 +4696,13 @@ do_more:
4618 err = -ENOMEM; 4696 err = -ENOMEM;
4619 goto error_return; 4697 goto error_return;
4620 } 4698 }
4621 new_entry->start_blk = bit; 4699 new_entry->start_cluster = bit;
4622 new_entry->group = block_group; 4700 new_entry->group = block_group;
4623 new_entry->count = count; 4701 new_entry->count = count_clusters;
4624 new_entry->t_tid = handle->h_transaction->t_tid; 4702 new_entry->t_tid = handle->h_transaction->t_tid;
4625 4703
4626 ext4_lock_group(sb, block_group); 4704 ext4_lock_group(sb, block_group);
4627 mb_clear_bits(bitmap_bh->b_data, bit, count); 4705 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4628 ext4_mb_free_metadata(handle, &e4b, new_entry); 4706 ext4_mb_free_metadata(handle, &e4b, new_entry);
4629 } else { 4707 } else {
4630 /* need to update group_info->bb_free and bitmap 4708 /* need to update group_info->bb_free and bitmap
@@ -4632,25 +4710,29 @@ do_more:
4632 * them with group lock_held 4710 * them with group lock_held
4633 */ 4711 */
4634 ext4_lock_group(sb, block_group); 4712 ext4_lock_group(sb, block_group);
4635 mb_clear_bits(bitmap_bh->b_data, bit, count); 4713 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4636 mb_free_blocks(inode, &e4b, bit, count); 4714 mb_free_blocks(inode, &e4b, bit, count_clusters);
4637 } 4715 }
4638 4716
4639 ret = ext4_free_blks_count(sb, gdp) + count; 4717 ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
4640 ext4_free_blks_set(sb, gdp, ret); 4718 ext4_free_group_clusters_set(sb, gdp, ret);
4641 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); 4719 gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
4642 ext4_unlock_group(sb, block_group); 4720 ext4_unlock_group(sb, block_group);
4643 percpu_counter_add(&sbi->s_freeblocks_counter, count); 4721 percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
4644 4722
4645 if (sbi->s_log_groups_per_flex) { 4723 if (sbi->s_log_groups_per_flex) {
4646 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 4724 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4647 atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks); 4725 atomic_add(count_clusters,
4726 &sbi->s_flex_groups[flex_group].free_clusters);
4648 } 4727 }
4649 4728
4650 ext4_mb_unload_buddy(&e4b); 4729 ext4_mb_unload_buddy(&e4b);
4651 4730
4652 freed += count; 4731 freed += count;
4653 4732
4733 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4734 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4735
4654 /* We dirtied the bitmap block */ 4736 /* We dirtied the bitmap block */
4655 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 4737 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4656 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 4738 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -4669,8 +4751,6 @@ do_more:
4669 } 4751 }
4670 ext4_mark_super_dirty(sb); 4752 ext4_mark_super_dirty(sb);
4671error_return: 4753error_return:
4672 if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4673 dquot_free_block(inode, freed);
4674 brelse(bitmap_bh); 4754 brelse(bitmap_bh);
4675 ext4_std_error(sb, err); 4755 ext4_std_error(sb, err);
4676 return; 4756 return;
@@ -4778,16 +4858,17 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
4778 ext4_lock_group(sb, block_group); 4858 ext4_lock_group(sb, block_group);
4779 mb_clear_bits(bitmap_bh->b_data, bit, count); 4859 mb_clear_bits(bitmap_bh->b_data, bit, count);
4780 mb_free_blocks(NULL, &e4b, bit, count); 4860 mb_free_blocks(NULL, &e4b, bit, count);
4781 blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); 4861 blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
4782 ext4_free_blks_set(sb, desc, blk_free_count); 4862 ext4_free_group_clusters_set(sb, desc, blk_free_count);
4783 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); 4863 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
4784 ext4_unlock_group(sb, block_group); 4864 ext4_unlock_group(sb, block_group);
4785 percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); 4865 percpu_counter_add(&sbi->s_freeclusters_counter,
4866 EXT4_B2C(sbi, blocks_freed));
4786 4867
4787 if (sbi->s_log_groups_per_flex) { 4868 if (sbi->s_log_groups_per_flex) {
4788 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 4869 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4789 atomic_add(blocks_freed, 4870 atomic_add(EXT4_B2C(sbi, blocks_freed),
4790 &sbi->s_flex_groups[flex_group].free_blocks); 4871 &sbi->s_flex_groups[flex_group].free_clusters);
4791 } 4872 }
4792 4873
4793 ext4_mb_unload_buddy(&e4b); 4874 ext4_mb_unload_buddy(&e4b);
@@ -4948,7 +5029,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4948 struct ext4_group_info *grp; 5029 struct ext4_group_info *grp;
4949 ext4_group_t first_group, last_group; 5030 ext4_group_t first_group, last_group;
4950 ext4_group_t group, ngroups = ext4_get_groups_count(sb); 5031 ext4_group_t group, ngroups = ext4_get_groups_count(sb);
4951 ext4_grpblk_t cnt = 0, first_block, last_block; 5032 ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
4952 uint64_t start, len, minlen, trimmed = 0; 5033 uint64_t start, len, minlen, trimmed = 0;
4953 ext4_fsblk_t first_data_blk = 5034 ext4_fsblk_t first_data_blk =
4954 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 5035 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
@@ -4958,7 +5039,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4958 len = range->len >> sb->s_blocksize_bits; 5039 len = range->len >> sb->s_blocksize_bits;
4959 minlen = range->minlen >> sb->s_blocksize_bits; 5040 minlen = range->minlen >> sb->s_blocksize_bits;
4960 5041
4961 if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb))) 5042 if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)))
4962 return -EINVAL; 5043 return -EINVAL;
4963 if (start + len <= first_data_blk) 5044 if (start + len <= first_data_blk)
4964 goto out; 5045 goto out;
@@ -4969,11 +5050,11 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4969 5050
4970 /* Determine first and last group to examine based on start and len */ 5051 /* Determine first and last group to examine based on start and len */
4971 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, 5052 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
4972 &first_group, &first_block); 5053 &first_group, &first_cluster);
4973 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), 5054 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
4974 &last_group, &last_block); 5055 &last_group, &last_cluster);
4975 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group; 5056 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
4976 last_block = EXT4_BLOCKS_PER_GROUP(sb); 5057 last_cluster = EXT4_CLUSTERS_PER_GROUP(sb);
4977 5058
4978 if (first_group > last_group) 5059 if (first_group > last_group)
4979 return -EINVAL; 5060 return -EINVAL;
@@ -4993,20 +5074,20 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4993 * change it for the last group in which case start + 5074 * change it for the last group in which case start +
4994 * len < EXT4_BLOCKS_PER_GROUP(sb). 5075 * len < EXT4_BLOCKS_PER_GROUP(sb).
4995 */ 5076 */
4996 if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb)) 5077 if (first_cluster + len < EXT4_CLUSTERS_PER_GROUP(sb))
4997 last_block = first_block + len; 5078 last_cluster = first_cluster + len;
4998 len -= last_block - first_block; 5079 len -= last_cluster - first_cluster;
4999 5080
5000 if (grp->bb_free >= minlen) { 5081 if (grp->bb_free >= minlen) {
5001 cnt = ext4_trim_all_free(sb, group, first_block, 5082 cnt = ext4_trim_all_free(sb, group, first_cluster,
5002 last_block, minlen); 5083 last_cluster, minlen);
5003 if (cnt < 0) { 5084 if (cnt < 0) {
5004 ret = cnt; 5085 ret = cnt;
5005 break; 5086 break;
5006 } 5087 }
5007 } 5088 }
5008 trimmed += cnt; 5089 trimmed += cnt;
5009 first_block = 0; 5090 first_cluster = 0;
5010 } 5091 }
5011 range->len = trimmed * sb->s_blocksize; 5092 range->len = trimmed * sb->s_blocksize;
5012 5093