aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c342
1 files changed, 144 insertions, 198 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index cb990b21c69..99ab428bcfa 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -21,6 +21,7 @@
21 * mballoc.c contains the multiblocks allocation routines 21 * mballoc.c contains the multiblocks allocation routines
22 */ 22 */
23 23
24#include "ext4_jbd2.h"
24#include "mballoc.h" 25#include "mballoc.h"
25#include <linux/debugfs.h> 26#include <linux/debugfs.h>
26#include <linux/slab.h> 27#include <linux/slab.h>
@@ -339,7 +340,7 @@
339 */ 340 */
340static struct kmem_cache *ext4_pspace_cachep; 341static struct kmem_cache *ext4_pspace_cachep;
341static struct kmem_cache *ext4_ac_cachep; 342static struct kmem_cache *ext4_ac_cachep;
342static struct kmem_cache *ext4_free_ext_cachep; 343static struct kmem_cache *ext4_free_data_cachep;
343 344
344/* We create slab caches for groupinfo data structures based on the 345/* We create slab caches for groupinfo data structures based on the
345 * superblock block size. There will be one per mounted filesystem for 346 * superblock block size. There will be one per mounted filesystem for
@@ -357,7 +358,8 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
357 ext4_group_t group); 358 ext4_group_t group);
358static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, 359static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
359 ext4_group_t group); 360 ext4_group_t group);
360static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); 361static void ext4_free_data_callback(struct super_block *sb,
362 struct ext4_journal_cb_entry *jce, int rc);
361 363
362static inline void *mb_correct_addr_and_bit(int *bit, void *addr) 364static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
363{ 365{
@@ -425,7 +427,7 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
425{ 427{
426 char *bb; 428 char *bb;
427 429
428 BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); 430 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
429 BUG_ON(max == NULL); 431 BUG_ON(max == NULL);
430 432
431 if (order > e4b->bd_blkbits + 1) { 433 if (order > e4b->bd_blkbits + 1) {
@@ -436,10 +438,10 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
436 /* at order 0 we see each particular block */ 438 /* at order 0 we see each particular block */
437 if (order == 0) { 439 if (order == 0) {
438 *max = 1 << (e4b->bd_blkbits + 3); 440 *max = 1 << (e4b->bd_blkbits + 3);
439 return EXT4_MB_BITMAP(e4b); 441 return e4b->bd_bitmap;
440 } 442 }
441 443
442 bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; 444 bb = e4b->bd_buddy + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
443 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; 445 *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
444 446
445 return bb; 447 return bb;
@@ -588,7 +590,7 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
588 for (j = 0; j < (1 << order); j++) { 590 for (j = 0; j < (1 << order); j++) {
589 k = (i * (1 << order)) + j; 591 k = (i * (1 << order)) + j;
590 MB_CHECK_ASSERT( 592 MB_CHECK_ASSERT(
591 !mb_test_bit(k, EXT4_MB_BITMAP(e4b))); 593 !mb_test_bit(k, e4b->bd_bitmap));
592 } 594 }
593 count++; 595 count++;
594 } 596 }
@@ -782,7 +784,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
782 int groups_per_page; 784 int groups_per_page;
783 int err = 0; 785 int err = 0;
784 int i; 786 int i;
785 ext4_group_t first_group; 787 ext4_group_t first_group, group;
786 int first_block; 788 int first_block;
787 struct super_block *sb; 789 struct super_block *sb;
788 struct buffer_head *bhs; 790 struct buffer_head *bhs;
@@ -806,24 +808,23 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
806 808
807 /* allocate buffer_heads to read bitmaps */ 809 /* allocate buffer_heads to read bitmaps */
808 if (groups_per_page > 1) { 810 if (groups_per_page > 1) {
809 err = -ENOMEM;
810 i = sizeof(struct buffer_head *) * groups_per_page; 811 i = sizeof(struct buffer_head *) * groups_per_page;
811 bh = kzalloc(i, GFP_NOFS); 812 bh = kzalloc(i, GFP_NOFS);
812 if (bh == NULL) 813 if (bh == NULL) {
814 err = -ENOMEM;
813 goto out; 815 goto out;
816 }
814 } else 817 } else
815 bh = &bhs; 818 bh = &bhs;
816 819
817 first_group = page->index * blocks_per_page / 2; 820 first_group = page->index * blocks_per_page / 2;
818 821
819 /* read all groups the page covers into the cache */ 822 /* read all groups the page covers into the cache */
820 for (i = 0; i < groups_per_page; i++) { 823 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
821 struct ext4_group_desc *desc; 824 if (group >= ngroups)
822
823 if (first_group + i >= ngroups)
824 break; 825 break;
825 826
826 grinfo = ext4_get_group_info(sb, first_group + i); 827 grinfo = ext4_get_group_info(sb, group);
827 /* 828 /*
828 * If page is uptodate then we came here after online resize 829 * If page is uptodate then we came here after online resize
829 * which added some new uninitialized group info structs, so 830 * which added some new uninitialized group info structs, so
@@ -834,69 +835,21 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
834 bh[i] = NULL; 835 bh[i] = NULL;
835 continue; 836 continue;
836 } 837 }
837 838 if (!(bh[i] = ext4_read_block_bitmap_nowait(sb, group))) {
838 err = -EIO; 839 err = -ENOMEM;
839 desc = ext4_get_group_desc(sb, first_group + i, NULL);
840 if (desc == NULL)
841 goto out;
842
843 err = -ENOMEM;
844 bh[i] = sb_getblk(sb, ext4_block_bitmap(sb, desc));
845 if (bh[i] == NULL)
846 goto out; 840 goto out;
847
848 if (bitmap_uptodate(bh[i]))
849 continue;
850
851 lock_buffer(bh[i]);
852 if (bitmap_uptodate(bh[i])) {
853 unlock_buffer(bh[i]);
854 continue;
855 }
856 ext4_lock_group(sb, first_group + i);
857 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
858 ext4_init_block_bitmap(sb, bh[i],
859 first_group + i, desc);
860 set_bitmap_uptodate(bh[i]);
861 set_buffer_uptodate(bh[i]);
862 ext4_unlock_group(sb, first_group + i);
863 unlock_buffer(bh[i]);
864 continue;
865 } 841 }
866 ext4_unlock_group(sb, first_group + i); 842 mb_debug(1, "read bitmap for group %u\n", group);
867 if (buffer_uptodate(bh[i])) {
868 /*
869 * if not uninit if bh is uptodate,
870 * bitmap is also uptodate
871 */
872 set_bitmap_uptodate(bh[i]);
873 unlock_buffer(bh[i]);
874 continue;
875 }
876 get_bh(bh[i]);
877 /*
878 * submit the buffer_head for read. We can
879 * safely mark the bitmap as uptodate now.
880 * We do it here so the bitmap uptodate bit
881 * get set with buffer lock held.
882 */
883 set_bitmap_uptodate(bh[i]);
884 bh[i]->b_end_io = end_buffer_read_sync;
885 submit_bh(READ, bh[i]);
886 mb_debug(1, "read bitmap for group %u\n", first_group + i);
887 } 843 }
888 844
889 /* wait for I/O completion */ 845 /* wait for I/O completion */
890 for (i = 0; i < groups_per_page; i++) 846 for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
891 if (bh[i]) 847 if (bh[i] && ext4_wait_block_bitmap(sb, group, bh[i])) {
892 wait_on_buffer(bh[i]); 848 err = -EIO;
893
894 err = -EIO;
895 for (i = 0; i < groups_per_page; i++)
896 if (bh[i] && !buffer_uptodate(bh[i]))
897 goto out; 849 goto out;
850 }
851 }
898 852
899 err = 0;
900 first_block = page->index * blocks_per_page; 853 first_block = page->index * blocks_per_page;
901 for (i = 0; i < blocks_per_page; i++) { 854 for (i = 0; i < blocks_per_page; i++) {
902 int group; 855 int group;
@@ -1250,10 +1203,10 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
1250 int order = 1; 1203 int order = 1;
1251 void *bb; 1204 void *bb;
1252 1205
1253 BUG_ON(EXT4_MB_BITMAP(e4b) == EXT4_MB_BUDDY(e4b)); 1206 BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
1254 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3))); 1207 BUG_ON(block >= (1 << (e4b->bd_blkbits + 3)));
1255 1208
1256 bb = EXT4_MB_BUDDY(e4b); 1209 bb = e4b->bd_buddy;
1257 while (order <= e4b->bd_blkbits + 1) { 1210 while (order <= e4b->bd_blkbits + 1) {
1258 block = block >> 1; 1211 block = block >> 1;
1259 if (!mb_test_bit(block, bb)) { 1212 if (!mb_test_bit(block, bb)) {
@@ -1323,9 +1276,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1323 1276
1324 /* let's maintain fragments counter */ 1277 /* let's maintain fragments counter */
1325 if (first != 0) 1278 if (first != 0)
1326 block = !mb_test_bit(first - 1, EXT4_MB_BITMAP(e4b)); 1279 block = !mb_test_bit(first - 1, e4b->bd_bitmap);
1327 if (first + count < EXT4_SB(sb)->s_mb_maxs[0]) 1280 if (first + count < EXT4_SB(sb)->s_mb_maxs[0])
1328 max = !mb_test_bit(first + count, EXT4_MB_BITMAP(e4b)); 1281 max = !mb_test_bit(first + count, e4b->bd_bitmap);
1329 if (block && max) 1282 if (block && max)
1330 e4b->bd_info->bb_fragments--; 1283 e4b->bd_info->bb_fragments--;
1331 else if (!block && !max) 1284 else if (!block && !max)
@@ -1336,7 +1289,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1336 block = first++; 1289 block = first++;
1337 order = 0; 1290 order = 0;
1338 1291
1339 if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) { 1292 if (!mb_test_bit(block, e4b->bd_bitmap)) {
1340 ext4_fsblk_t blocknr; 1293 ext4_fsblk_t blocknr;
1341 1294
1342 blocknr = ext4_group_first_block_no(sb, e4b->bd_group); 1295 blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
@@ -1347,7 +1300,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1347 "freeing already freed block " 1300 "freeing already freed block "
1348 "(bit %u)", block); 1301 "(bit %u)", block);
1349 } 1302 }
1350 mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); 1303 mb_clear_bit(block, e4b->bd_bitmap);
1351 e4b->bd_info->bb_counters[order]++; 1304 e4b->bd_info->bb_counters[order]++;
1352 1305
1353 /* start of the buddy */ 1306 /* start of the buddy */
@@ -1429,7 +1382,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1429 break; 1382 break;
1430 1383
1431 next = (block + 1) * (1 << order); 1384 next = (block + 1) * (1 << order);
1432 if (mb_test_bit(next, EXT4_MB_BITMAP(e4b))) 1385 if (mb_test_bit(next, e4b->bd_bitmap))
1433 break; 1386 break;
1434 1387
1435 order = mb_find_order_for_block(e4b, next); 1388 order = mb_find_order_for_block(e4b, next);
@@ -1466,9 +1419,9 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1466 1419
1467 /* let's maintain fragments counter */ 1420 /* let's maintain fragments counter */
1468 if (start != 0) 1421 if (start != 0)
1469 mlen = !mb_test_bit(start - 1, EXT4_MB_BITMAP(e4b)); 1422 mlen = !mb_test_bit(start - 1, e4b->bd_bitmap);
1470 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0]) 1423 if (start + len < EXT4_SB(e4b->bd_sb)->s_mb_maxs[0])
1471 max = !mb_test_bit(start + len, EXT4_MB_BITMAP(e4b)); 1424 max = !mb_test_bit(start + len, e4b->bd_bitmap);
1472 if (mlen && max) 1425 if (mlen && max)
1473 e4b->bd_info->bb_fragments++; 1426 e4b->bd_info->bb_fragments++;
1474 else if (!mlen && !max) 1427 else if (!mlen && !max)
@@ -1511,7 +1464,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
1511 } 1464 }
1512 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); 1465 mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
1513 1466
1514 ext4_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); 1467 ext4_set_bits(e4b->bd_bitmap, ex->fe_start, len0);
1515 mb_check_buddy(e4b); 1468 mb_check_buddy(e4b);
1516 1469
1517 return ret; 1470 return ret;
@@ -1810,7 +1763,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1810 struct ext4_buddy *e4b) 1763 struct ext4_buddy *e4b)
1811{ 1764{
1812 struct super_block *sb = ac->ac_sb; 1765 struct super_block *sb = ac->ac_sb;
1813 void *bitmap = EXT4_MB_BITMAP(e4b); 1766 void *bitmap = e4b->bd_bitmap;
1814 struct ext4_free_extent ex; 1767 struct ext4_free_extent ex;
1815 int i; 1768 int i;
1816 int free; 1769 int free;
@@ -1870,7 +1823,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1870{ 1823{
1871 struct super_block *sb = ac->ac_sb; 1824 struct super_block *sb = ac->ac_sb;
1872 struct ext4_sb_info *sbi = EXT4_SB(sb); 1825 struct ext4_sb_info *sbi = EXT4_SB(sb);
1873 void *bitmap = EXT4_MB_BITMAP(e4b); 1826 void *bitmap = e4b->bd_bitmap;
1874 struct ext4_free_extent ex; 1827 struct ext4_free_extent ex;
1875 ext4_fsblk_t first_group_block; 1828 ext4_fsblk_t first_group_block;
1876 ext4_fsblk_t a; 1829 ext4_fsblk_t a;
@@ -2224,7 +2177,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2224 EXT4_DESC_PER_BLOCK_BITS(sb); 2177 EXT4_DESC_PER_BLOCK_BITS(sb);
2225 meta_group_info = kmalloc(metalen, GFP_KERNEL); 2178 meta_group_info = kmalloc(metalen, GFP_KERNEL);
2226 if (meta_group_info == NULL) { 2179 if (meta_group_info == NULL) {
2227 ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate mem " 2180 ext4_msg(sb, KERN_ERR, "can't allocate mem "
2228 "for a buddy group"); 2181 "for a buddy group");
2229 goto exit_meta_group_info; 2182 goto exit_meta_group_info;
2230 } 2183 }
@@ -2238,7 +2191,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2238 2191
2239 meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); 2192 meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
2240 if (meta_group_info[i] == NULL) { 2193 if (meta_group_info[i] == NULL) {
2241 ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate buddy mem"); 2194 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
2242 goto exit_group_info; 2195 goto exit_group_info;
2243 } 2196 }
2244 memset(meta_group_info[i], 0, kmem_cache_size(cachep)); 2197 memset(meta_group_info[i], 0, kmem_cache_size(cachep));
@@ -2522,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2522 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, 2475 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2523 &ext4_mb_seq_groups_fops, sb); 2476 &ext4_mb_seq_groups_fops, sb);
2524 2477
2525 if (sbi->s_journal)
2526 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2527
2528 return 0; 2478 return 0;
2529 2479
2530out_free_locality_groups: 2480out_free_locality_groups:
@@ -2637,58 +2587,55 @@ static inline int ext4_issue_discard(struct super_block *sb,
2637 * This function is called by the jbd2 layer once the commit has finished, 2587 * This function is called by the jbd2 layer once the commit has finished,
2638 * so we know we can free the blocks that were released with that commit. 2588 * so we know we can free the blocks that were released with that commit.
2639 */ 2589 */
2640static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) 2590static void ext4_free_data_callback(struct super_block *sb,
2591 struct ext4_journal_cb_entry *jce,
2592 int rc)
2641{ 2593{
2642 struct super_block *sb = journal->j_private; 2594 struct ext4_free_data *entry = (struct ext4_free_data *)jce;
2643 struct ext4_buddy e4b; 2595 struct ext4_buddy e4b;
2644 struct ext4_group_info *db; 2596 struct ext4_group_info *db;
2645 int err, count = 0, count2 = 0; 2597 int err, count = 0, count2 = 0;
2646 struct ext4_free_data *entry;
2647 struct list_head *l, *ltmp;
2648 2598
2649 list_for_each_safe(l, ltmp, &txn->t_private_list) { 2599 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2650 entry = list_entry(l, struct ext4_free_data, list); 2600 entry->efd_count, entry->efd_group, entry);
2651 2601
2652 mb_debug(1, "gonna free %u blocks in group %u (0x%p):", 2602 if (test_opt(sb, DISCARD))
2653 entry->count, entry->group, entry); 2603 ext4_issue_discard(sb, entry->efd_group,
2604 entry->efd_start_cluster, entry->efd_count);
2654 2605
2655 if (test_opt(sb, DISCARD)) 2606 err = ext4_mb_load_buddy(sb, entry->efd_group, &e4b);
2656 ext4_issue_discard(sb, entry->group, 2607 /* we expect to find existing buddy because it's pinned */
2657 entry->start_cluster, entry->count); 2608 BUG_ON(err != 0);
2658 2609
2659 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2660 /* we expect to find existing buddy because it's pinned */
2661 BUG_ON(err != 0);
2662 2610
2663 db = e4b.bd_info; 2611 db = e4b.bd_info;
2664 /* there are blocks to put in buddy to make them really free */ 2612 /* there are blocks to put in buddy to make them really free */
2665 count += entry->count; 2613 count += entry->efd_count;
2666 count2++; 2614 count2++;
2667 ext4_lock_group(sb, entry->group); 2615 ext4_lock_group(sb, entry->efd_group);
2668 /* Take it out of per group rb tree */ 2616 /* Take it out of per group rb tree */
2669 rb_erase(&entry->node, &(db->bb_free_root)); 2617 rb_erase(&entry->efd_node, &(db->bb_free_root));
2670 mb_free_blocks(NULL, &e4b, entry->start_cluster, entry->count); 2618 mb_free_blocks(NULL, &e4b, entry->efd_start_cluster, entry->efd_count);
2671 2619
2672 /* 2620 /*
2673 * Clear the trimmed flag for the group so that the next 2621 * Clear the trimmed flag for the group so that the next
2674 * ext4_trim_fs can trim it. 2622 * ext4_trim_fs can trim it.
2675 * If the volume is mounted with -o discard, online discard 2623 * If the volume is mounted with -o discard, online discard
2676 * is supported and the free blocks will be trimmed online. 2624 * is supported and the free blocks will be trimmed online.
2677 */ 2625 */
2678 if (!test_opt(sb, DISCARD)) 2626 if (!test_opt(sb, DISCARD))
2679 EXT4_MB_GRP_CLEAR_TRIMMED(db); 2627 EXT4_MB_GRP_CLEAR_TRIMMED(db);
2680 2628
2681 if (!db->bb_free_root.rb_node) { 2629 if (!db->bb_free_root.rb_node) {
2682 /* No more items in the per group rb tree 2630 /* No more items in the per group rb tree
2683 * balance refcounts from ext4_mb_free_metadata() 2631 * balance refcounts from ext4_mb_free_metadata()
2684 */ 2632 */
2685 page_cache_release(e4b.bd_buddy_page); 2633 page_cache_release(e4b.bd_buddy_page);
2686 page_cache_release(e4b.bd_bitmap_page); 2634 page_cache_release(e4b.bd_bitmap_page);
2687 }
2688 ext4_unlock_group(sb, entry->group);
2689 kmem_cache_free(ext4_free_ext_cachep, entry);
2690 ext4_mb_unload_buddy(&e4b);
2691 } 2635 }
2636 ext4_unlock_group(sb, entry->efd_group);
2637 kmem_cache_free(ext4_free_data_cachep, entry);
2638 ext4_mb_unload_buddy(&e4b);
2692 2639
2693 mb_debug(1, "freed %u blocks in %u structures\n", count, count2); 2640 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2694} 2641}
@@ -2741,9 +2688,9 @@ int __init ext4_init_mballoc(void)
2741 return -ENOMEM; 2688 return -ENOMEM;
2742 } 2689 }
2743 2690
2744 ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data, 2691 ext4_free_data_cachep = KMEM_CACHE(ext4_free_data,
2745 SLAB_RECLAIM_ACCOUNT); 2692 SLAB_RECLAIM_ACCOUNT);
2746 if (ext4_free_ext_cachep == NULL) { 2693 if (ext4_free_data_cachep == NULL) {
2747 kmem_cache_destroy(ext4_pspace_cachep); 2694 kmem_cache_destroy(ext4_pspace_cachep);
2748 kmem_cache_destroy(ext4_ac_cachep); 2695 kmem_cache_destroy(ext4_ac_cachep);
2749 return -ENOMEM; 2696 return -ENOMEM;
@@ -2761,7 +2708,7 @@ void ext4_exit_mballoc(void)
2761 rcu_barrier(); 2708 rcu_barrier();
2762 kmem_cache_destroy(ext4_pspace_cachep); 2709 kmem_cache_destroy(ext4_pspace_cachep);
2763 kmem_cache_destroy(ext4_ac_cachep); 2710 kmem_cache_destroy(ext4_ac_cachep);
2764 kmem_cache_destroy(ext4_free_ext_cachep); 2711 kmem_cache_destroy(ext4_free_data_cachep);
2765 ext4_groupinfo_destroy_slabs(); 2712 ext4_groupinfo_destroy_slabs();
2766 ext4_remove_debugfs_entry(); 2713 ext4_remove_debugfs_entry();
2767} 2714}
@@ -2815,7 +2762,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2815 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len); 2762 len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
2816 if (!ext4_data_block_valid(sbi, block, len)) { 2763 if (!ext4_data_block_valid(sbi, block, len)) {
2817 ext4_error(sb, "Allocating blocks %llu-%llu which overlap " 2764 ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
2818 "fs metadata\n", block, block+len); 2765 "fs metadata", block, block+len);
2819 /* File system mounted not to panic on error 2766 /* File system mounted not to panic on error
2820 * Fix the bitmap and repeat the block allocation 2767 * Fix the bitmap and repeat the block allocation
2821 * We leak some of the blocks here. 2768 * We leak some of the blocks here.
@@ -2911,7 +2858,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
2911 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 2858 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2912 int bsbits, max; 2859 int bsbits, max;
2913 ext4_lblk_t end; 2860 ext4_lblk_t end;
2914 loff_t size, orig_size, start_off; 2861 loff_t size, start_off;
2862 loff_t orig_size __maybe_unused;
2915 ext4_lblk_t start; 2863 ext4_lblk_t start;
2916 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 2864 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
2917 struct ext4_prealloc_space *pa; 2865 struct ext4_prealloc_space *pa;
@@ -3321,8 +3269,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3321 n = rb_first(&(grp->bb_free_root)); 3269 n = rb_first(&(grp->bb_free_root));
3322 3270
3323 while (n) { 3271 while (n) {
3324 entry = rb_entry(n, struct ext4_free_data, node); 3272 entry = rb_entry(n, struct ext4_free_data, efd_node);
3325 ext4_set_bits(bitmap, entry->start_cluster, entry->count); 3273 ext4_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
3326 n = rb_next(n); 3274 n = rb_next(n);
3327 } 3275 }
3328 return; 3276 return;
@@ -3916,11 +3864,11 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3916 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) 3864 (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
3917 return; 3865 return;
3918 3866
3919 ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: Can't allocate:" 3867 ext4_msg(ac->ac_sb, KERN_ERR, "Can't allocate:"
3920 " Allocation context details:"); 3868 " Allocation context details:");
3921 ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: status %d flags %d", 3869 ext4_msg(ac->ac_sb, KERN_ERR, "status %d flags %d",
3922 ac->ac_status, ac->ac_flags); 3870 ac->ac_status, ac->ac_flags);
3923 ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: orig %lu/%lu/%lu@%lu, " 3871 ext4_msg(ac->ac_sb, KERN_ERR, "orig %lu/%lu/%lu@%lu, "
3924 "goal %lu/%lu/%lu@%lu, " 3872 "goal %lu/%lu/%lu@%lu, "
3925 "best %lu/%lu/%lu@%lu cr %d", 3873 "best %lu/%lu/%lu@%lu cr %d",
3926 (unsigned long)ac->ac_o_ex.fe_group, 3874 (unsigned long)ac->ac_o_ex.fe_group,
@@ -3936,9 +3884,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
3936 (unsigned long)ac->ac_b_ex.fe_len, 3884 (unsigned long)ac->ac_b_ex.fe_len,
3937 (unsigned long)ac->ac_b_ex.fe_logical, 3885 (unsigned long)ac->ac_b_ex.fe_logical,
3938 (int)ac->ac_criteria); 3886 (int)ac->ac_criteria);
3939 ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: %lu scanned, %d found", 3887 ext4_msg(ac->ac_sb, KERN_ERR, "%lu scanned, %d found",
3940 ac->ac_ex_scanned, ac->ac_found); 3888 ac->ac_ex_scanned, ac->ac_found);
3941 ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: groups: "); 3889 ext4_msg(ac->ac_sb, KERN_ERR, "groups: ");
3942 ngroups = ext4_get_groups_count(sb); 3890 ngroups = ext4_get_groups_count(sb);
3943 for (i = 0; i < ngroups; i++) { 3891 for (i = 0; i < ngroups; i++) {
3944 struct ext4_group_info *grp = ext4_get_group_info(sb, i); 3892 struct ext4_group_info *grp = ext4_get_group_info(sb, i);
@@ -4428,9 +4376,9 @@ out:
4428static int can_merge(struct ext4_free_data *entry1, 4376static int can_merge(struct ext4_free_data *entry1,
4429 struct ext4_free_data *entry2) 4377 struct ext4_free_data *entry2)
4430{ 4378{
4431 if ((entry1->t_tid == entry2->t_tid) && 4379 if ((entry1->efd_tid == entry2->efd_tid) &&
4432 (entry1->group == entry2->group) && 4380 (entry1->efd_group == entry2->efd_group) &&
4433 ((entry1->start_cluster + entry1->count) == entry2->start_cluster)) 4381 ((entry1->efd_start_cluster + entry1->efd_count) == entry2->efd_start_cluster))
4434 return 1; 4382 return 1;
4435 return 0; 4383 return 0;
4436} 4384}
@@ -4452,8 +4400,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4452 BUG_ON(e4b->bd_bitmap_page == NULL); 4400 BUG_ON(e4b->bd_bitmap_page == NULL);
4453 BUG_ON(e4b->bd_buddy_page == NULL); 4401 BUG_ON(e4b->bd_buddy_page == NULL);
4454 4402
4455 new_node = &new_entry->node; 4403 new_node = &new_entry->efd_node;
4456 cluster = new_entry->start_cluster; 4404 cluster = new_entry->efd_start_cluster;
4457 4405
4458 if (!*n) { 4406 if (!*n) {
4459 /* first free block exent. We need to 4407 /* first free block exent. We need to
@@ -4466,10 +4414,10 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4466 } 4414 }
4467 while (*n) { 4415 while (*n) {
4468 parent = *n; 4416 parent = *n;
4469 entry = rb_entry(parent, struct ext4_free_data, node); 4417 entry = rb_entry(parent, struct ext4_free_data, efd_node);
4470 if (cluster < entry->start_cluster) 4418 if (cluster < entry->efd_start_cluster)
4471 n = &(*n)->rb_left; 4419 n = &(*n)->rb_left;
4472 else if (cluster >= (entry->start_cluster + entry->count)) 4420 else if (cluster >= (entry->efd_start_cluster + entry->efd_count))
4473 n = &(*n)->rb_right; 4421 n = &(*n)->rb_right;
4474 else { 4422 else {
4475 ext4_grp_locked_error(sb, group, 0, 4423 ext4_grp_locked_error(sb, group, 0,
@@ -4486,34 +4434,29 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4486 /* Now try to see the extent can be merged to left and right */ 4434 /* Now try to see the extent can be merged to left and right */
4487 node = rb_prev(new_node); 4435 node = rb_prev(new_node);
4488 if (node) { 4436 if (node) {
4489 entry = rb_entry(node, struct ext4_free_data, node); 4437 entry = rb_entry(node, struct ext4_free_data, efd_node);
4490 if (can_merge(entry, new_entry)) { 4438 if (can_merge(entry, new_entry)) {
4491 new_entry->start_cluster = entry->start_cluster; 4439 new_entry->efd_start_cluster = entry->efd_start_cluster;
4492 new_entry->count += entry->count; 4440 new_entry->efd_count += entry->efd_count;
4493 rb_erase(node, &(db->bb_free_root)); 4441 rb_erase(node, &(db->bb_free_root));
4494 spin_lock(&sbi->s_md_lock); 4442 ext4_journal_callback_del(handle, &entry->efd_jce);
4495 list_del(&entry->list); 4443 kmem_cache_free(ext4_free_data_cachep, entry);
4496 spin_unlock(&sbi->s_md_lock);
4497 kmem_cache_free(ext4_free_ext_cachep, entry);
4498 } 4444 }
4499 } 4445 }
4500 4446
4501 node = rb_next(new_node); 4447 node = rb_next(new_node);
4502 if (node) { 4448 if (node) {
4503 entry = rb_entry(node, struct ext4_free_data, node); 4449 entry = rb_entry(node, struct ext4_free_data, efd_node);
4504 if (can_merge(new_entry, entry)) { 4450 if (can_merge(new_entry, entry)) {
4505 new_entry->count += entry->count; 4451 new_entry->efd_count += entry->efd_count;
4506 rb_erase(node, &(db->bb_free_root)); 4452 rb_erase(node, &(db->bb_free_root));
4507 spin_lock(&sbi->s_md_lock); 4453 ext4_journal_callback_del(handle, &entry->efd_jce);
4508 list_del(&entry->list); 4454 kmem_cache_free(ext4_free_data_cachep, entry);
4509 spin_unlock(&sbi->s_md_lock);
4510 kmem_cache_free(ext4_free_ext_cachep, entry);
4511 } 4455 }
4512 } 4456 }
4513 /* Add the extent to transaction's private list */ 4457 /* Add the extent to transaction's private list */
4514 spin_lock(&sbi->s_md_lock); 4458 ext4_journal_callback_add(handle, ext4_free_data_callback,
4515 list_add(&new_entry->list, &handle->h_transaction->t_private_list); 4459 &new_entry->efd_jce);
4516 spin_unlock(&sbi->s_md_lock);
4517 return 0; 4460 return 0;
4518} 4461}
4519 4462
@@ -4691,15 +4634,15 @@ do_more:
4691 * blocks being freed are metadata. these blocks shouldn't 4634 * blocks being freed are metadata. these blocks shouldn't
4692 * be used until this transaction is committed 4635 * be used until this transaction is committed
4693 */ 4636 */
4694 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); 4637 new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
4695 if (!new_entry) { 4638 if (!new_entry) {
4696 err = -ENOMEM; 4639 err = -ENOMEM;
4697 goto error_return; 4640 goto error_return;
4698 } 4641 }
4699 new_entry->start_cluster = bit; 4642 new_entry->efd_start_cluster = bit;
4700 new_entry->group = block_group; 4643 new_entry->efd_group = block_group;
4701 new_entry->count = count_clusters; 4644 new_entry->efd_count = count_clusters;
4702 new_entry->t_tid = handle->h_transaction->t_tid; 4645 new_entry->efd_tid = handle->h_transaction->t_tid;
4703 4646
4704 ext4_lock_group(sb, block_group); 4647 ext4_lock_group(sb, block_group);
4705 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); 4648 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
@@ -4971,11 +4914,11 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
4971 start = (e4b.bd_info->bb_first_free > start) ? 4914 start = (e4b.bd_info->bb_first_free > start) ?
4972 e4b.bd_info->bb_first_free : start; 4915 e4b.bd_info->bb_first_free : start;
4973 4916
4974 while (start < max) { 4917 while (start <= max) {
4975 start = mb_find_next_zero_bit(bitmap, max, start); 4918 start = mb_find_next_zero_bit(bitmap, max + 1, start);
4976 if (start >= max) 4919 if (start > max)
4977 break; 4920 break;
4978 next = mb_find_next_bit(bitmap, max, start); 4921 next = mb_find_next_bit(bitmap, max + 1, start);
4979 4922
4980 if ((next - start) >= minblocks) { 4923 if ((next - start) >= minblocks) {
4981 ext4_trim_extent(sb, start, 4924 ext4_trim_extent(sb, start,
@@ -5027,37 +4970,36 @@ out:
5027int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) 4970int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
5028{ 4971{
5029 struct ext4_group_info *grp; 4972 struct ext4_group_info *grp;
5030 ext4_group_t first_group, last_group; 4973 ext4_group_t group, first_group, last_group;
5031 ext4_group_t group, ngroups = ext4_get_groups_count(sb);
5032 ext4_grpblk_t cnt = 0, first_cluster, last_cluster; 4974 ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
5033 uint64_t start, len, minlen, trimmed = 0; 4975 uint64_t start, end, minlen, trimmed = 0;
5034 ext4_fsblk_t first_data_blk = 4976 ext4_fsblk_t first_data_blk =
5035 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 4977 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
4978 ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es);
5036 int ret = 0; 4979 int ret = 0;
5037 4980
5038 start = range->start >> sb->s_blocksize_bits; 4981 start = range->start >> sb->s_blocksize_bits;
5039 len = range->len >> sb->s_blocksize_bits; 4982 end = start + (range->len >> sb->s_blocksize_bits) - 1;
5040 minlen = range->minlen >> sb->s_blocksize_bits; 4983 minlen = range->minlen >> sb->s_blocksize_bits;
5041 4984
5042 if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb))) 4985 if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) ||
4986 unlikely(start >= max_blks))
5043 return -EINVAL; 4987 return -EINVAL;
5044 if (start + len <= first_data_blk) 4988 if (end >= max_blks)
4989 end = max_blks - 1;
4990 if (end <= first_data_blk)
5045 goto out; 4991 goto out;
5046 if (start < first_data_blk) { 4992 if (start < first_data_blk)
5047 len -= first_data_blk - start;
5048 start = first_data_blk; 4993 start = first_data_blk;
5049 }
5050 4994
5051 /* Determine first and last group to examine based on start and len */ 4995 /* Determine first and last group to examine based on start and end */
5052 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start, 4996 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
5053 &first_group, &first_cluster); 4997 &first_group, &first_cluster);
5054 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len), 4998 ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) end,
5055 &last_group, &last_cluster); 4999 &last_group, &last_cluster);
5056 last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
5057 last_cluster = EXT4_CLUSTERS_PER_GROUP(sb);
5058 5000
5059 if (first_group > last_group) 5001 /* end now represents the last cluster to discard in this group */
5060 return -EINVAL; 5002 end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
5061 5003
5062 for (group = first_group; group <= last_group; group++) { 5004 for (group = first_group; group <= last_group; group++) {
5063 grp = ext4_get_group_info(sb, group); 5005 grp = ext4_get_group_info(sb, group);
@@ -5069,31 +5011,35 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
5069 } 5011 }
5070 5012
5071 /* 5013 /*
5072 * For all the groups except the last one, last block will 5014 * For all the groups except the last one, last cluster will
5073 * always be EXT4_BLOCKS_PER_GROUP(sb), so we only need to 5015 * always be EXT4_CLUSTERS_PER_GROUP(sb)-1, so we only need to
5074 * change it for the last group in which case start + 5016 * change it for the last group, note that last_cluster is
5075 * len < EXT4_BLOCKS_PER_GROUP(sb). 5017 * already computed earlier by ext4_get_group_no_and_offset()
5076 */ 5018 */
5077 if (first_cluster + len < EXT4_CLUSTERS_PER_GROUP(sb)) 5019 if (group == last_group)
5078 last_cluster = first_cluster + len; 5020 end = last_cluster;
5079 len -= last_cluster - first_cluster;
5080 5021
5081 if (grp->bb_free >= minlen) { 5022 if (grp->bb_free >= minlen) {
5082 cnt = ext4_trim_all_free(sb, group, first_cluster, 5023 cnt = ext4_trim_all_free(sb, group, first_cluster,
5083 last_cluster, minlen); 5024 end, minlen);
5084 if (cnt < 0) { 5025 if (cnt < 0) {
5085 ret = cnt; 5026 ret = cnt;
5086 break; 5027 break;
5087 } 5028 }
5029 trimmed += cnt;
5088 } 5030 }
5089 trimmed += cnt; 5031
5032 /*
5033 * For every group except the first one, we are sure
5034 * that the first cluster to discard will be cluster #0.
5035 */
5090 first_cluster = 0; 5036 first_cluster = 0;
5091 } 5037 }
5092 range->len = trimmed * sb->s_blocksize;
5093 5038
5094 if (!ret) 5039 if (!ret)
5095 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); 5040 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
5096 5041
5097out: 5042out:
5043 range->len = trimmed * sb->s_blocksize;
5098 return ret; 5044 return ret;
5099} 5045}