diff options
author | Lachlan McIlroy <lachlan@redback.melbourne.sgi.com> | 2009-01-14 00:29:51 -0500 |
---|---|---|
committer | Lachlan McIlroy <lachlan@redback.melbourne.sgi.com> | 2009-01-14 00:29:51 -0500 |
commit | cb7a97d01521797cad9f63e8478403c3e51fea49 (patch) | |
tree | 84cddf20369f82f10c1c3712e6cce20dd1b9d863 /fs/ext4/mballoc.c | |
parent | 0335cb76aa3fa913a2164bc9b669e5aef9d56fa3 (diff) | |
parent | a6525042bfdfcab128bd91fad264de10fd24a55e (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into for-linus
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r-- | fs/ext4/mballoc.c | 629 |
1 files changed, 462 insertions, 167 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 444ad998f72e..918aec0c8a11 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -100,7 +100,7 @@ | |||
100 | * inode as: | 100 | * inode as: |
101 | * | 101 | * |
102 | * { page } | 102 | * { page } |
103 | * [ group 0 buddy][ group 0 bitmap] [group 1][ group 1]... | 103 | * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]... |
104 | * | 104 | * |
105 | * | 105 | * |
106 | * one block each for bitmap and buddy information. So for each group we | 106 | * one block each for bitmap and buddy information. So for each group we |
@@ -330,6 +330,18 @@ | |||
330 | * object | 330 | * object |
331 | * | 331 | * |
332 | */ | 332 | */ |
333 | static struct kmem_cache *ext4_pspace_cachep; | ||
334 | static struct kmem_cache *ext4_ac_cachep; | ||
335 | static struct kmem_cache *ext4_free_ext_cachep; | ||
336 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | ||
337 | ext4_group_t group); | ||
338 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | ||
339 | ext4_group_t group); | ||
340 | static int ext4_mb_init_per_dev_proc(struct super_block *sb); | ||
341 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); | ||
342 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); | ||
343 | |||
344 | |||
333 | 345 | ||
334 | static inline void *mb_correct_addr_and_bit(int *bit, void *addr) | 346 | static inline void *mb_correct_addr_and_bit(int *bit, void *addr) |
335 | { | 347 | { |
@@ -445,9 +457,9 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, | |||
445 | blocknr += first + i; | 457 | blocknr += first + i; |
446 | blocknr += | 458 | blocknr += |
447 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | 459 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); |
448 | 460 | ext4_grp_locked_error(sb, e4b->bd_group, | |
449 | ext4_error(sb, __func__, "double-free of inode" | 461 | __func__, "double-free of inode" |
450 | " %lu's block %llu(bit %u in group %lu)\n", | 462 | " %lu's block %llu(bit %u in group %u)", |
451 | inode ? inode->i_ino : 0, blocknr, | 463 | inode ? inode->i_ino : 0, blocknr, |
452 | first + i, e4b->bd_group); | 464 | first + i, e4b->bd_group); |
453 | } | 465 | } |
@@ -477,7 +489,7 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) | |||
477 | b2 = (unsigned char *) bitmap; | 489 | b2 = (unsigned char *) bitmap; |
478 | for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { | 490 | for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { |
479 | if (b1[i] != b2[i]) { | 491 | if (b1[i] != b2[i]) { |
480 | printk(KERN_ERR "corruption in group %lu " | 492 | printk(KERN_ERR "corruption in group %u " |
481 | "at byte %u(%u): %x in copy != %x " | 493 | "at byte %u(%u): %x in copy != %x " |
482 | "on disk/prealloc\n", | 494 | "on disk/prealloc\n", |
483 | e4b->bd_group, i, i * 8, b1[i], b2[i]); | 495 | e4b->bd_group, i, i * 8, b1[i], b2[i]); |
@@ -690,8 +702,8 @@ static void ext4_mb_generate_buddy(struct super_block *sb, | |||
690 | grp->bb_fragments = fragments; | 702 | grp->bb_fragments = fragments; |
691 | 703 | ||
692 | if (free != grp->bb_free) { | 704 | if (free != grp->bb_free) { |
693 | ext4_error(sb, __func__, | 705 | ext4_grp_locked_error(sb, group, __func__, |
694 | "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n", | 706 | "EXT4-fs: group %u: %u blocks in bitmap, %u in gd", |
695 | group, free, grp->bb_free); | 707 | group, free, grp->bb_free); |
696 | /* | 708 | /* |
697 | * If we intent to continue, we consider group descritor | 709 | * If we intent to continue, we consider group descritor |
@@ -716,7 +728,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb, | |||
716 | * stored in the inode as | 728 | * stored in the inode as |
717 | * | 729 | * |
718 | * { page } | 730 | * { page } |
719 | * [ group 0 buddy][ group 0 bitmap] [group 1][ group 1]... | 731 | * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]... |
720 | * | 732 | * |
721 | * | 733 | * |
722 | * one block each for bitmap and buddy information. | 734 | * one block each for bitmap and buddy information. |
@@ -782,25 +794,45 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
782 | if (bh[i] == NULL) | 794 | if (bh[i] == NULL) |
783 | goto out; | 795 | goto out; |
784 | 796 | ||
785 | if (buffer_uptodate(bh[i]) && | 797 | if (bitmap_uptodate(bh[i])) |
786 | !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) | ||
787 | continue; | 798 | continue; |
788 | 799 | ||
789 | lock_buffer(bh[i]); | 800 | lock_buffer(bh[i]); |
801 | if (bitmap_uptodate(bh[i])) { | ||
802 | unlock_buffer(bh[i]); | ||
803 | continue; | ||
804 | } | ||
790 | spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); | 805 | spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); |
791 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 806 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
792 | ext4_init_block_bitmap(sb, bh[i], | 807 | ext4_init_block_bitmap(sb, bh[i], |
793 | first_group + i, desc); | 808 | first_group + i, desc); |
809 | set_bitmap_uptodate(bh[i]); | ||
794 | set_buffer_uptodate(bh[i]); | 810 | set_buffer_uptodate(bh[i]); |
795 | unlock_buffer(bh[i]); | ||
796 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); | 811 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); |
812 | unlock_buffer(bh[i]); | ||
797 | continue; | 813 | continue; |
798 | } | 814 | } |
799 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); | 815 | spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); |
816 | if (buffer_uptodate(bh[i])) { | ||
817 | /* | ||
818 | * if not uninit if bh is uptodate, | ||
819 | * bitmap is also uptodate | ||
820 | */ | ||
821 | set_bitmap_uptodate(bh[i]); | ||
822 | unlock_buffer(bh[i]); | ||
823 | continue; | ||
824 | } | ||
800 | get_bh(bh[i]); | 825 | get_bh(bh[i]); |
826 | /* | ||
827 | * submit the buffer_head for read. We can | ||
828 | * safely mark the bitmap as uptodate now. | ||
829 | * We do it here so the bitmap uptodate bit | ||
830 | * get set with buffer lock held. | ||
831 | */ | ||
832 | set_bitmap_uptodate(bh[i]); | ||
801 | bh[i]->b_end_io = end_buffer_read_sync; | 833 | bh[i]->b_end_io = end_buffer_read_sync; |
802 | submit_bh(READ, bh[i]); | 834 | submit_bh(READ, bh[i]); |
803 | mb_debug("read bitmap for group %lu\n", first_group + i); | 835 | mb_debug("read bitmap for group %u\n", first_group + i); |
804 | } | 836 | } |
805 | 837 | ||
806 | /* wait for I/O completion */ | 838 | /* wait for I/O completion */ |
@@ -814,6 +846,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
814 | 846 | ||
815 | err = 0; | 847 | err = 0; |
816 | first_block = page->index * blocks_per_page; | 848 | first_block = page->index * blocks_per_page; |
849 | /* init the page */ | ||
850 | memset(page_address(page), 0xff, PAGE_CACHE_SIZE); | ||
817 | for (i = 0; i < blocks_per_page; i++) { | 851 | for (i = 0; i < blocks_per_page; i++) { |
818 | int group; | 852 | int group; |
819 | struct ext4_group_info *grinfo; | 853 | struct ext4_group_info *grinfo; |
@@ -840,7 +874,6 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
840 | BUG_ON(incore == NULL); | 874 | BUG_ON(incore == NULL); |
841 | mb_debug("put buddy for group %u in page %lu/%x\n", | 875 | mb_debug("put buddy for group %u in page %lu/%x\n", |
842 | group, page->index, i * blocksize); | 876 | group, page->index, i * blocksize); |
843 | memset(data, 0xff, blocksize); | ||
844 | grinfo = ext4_get_group_info(sb, group); | 877 | grinfo = ext4_get_group_info(sb, group); |
845 | grinfo->bb_fragments = 0; | 878 | grinfo->bb_fragments = 0; |
846 | memset(grinfo->bb_counters, 0, | 879 | memset(grinfo->bb_counters, 0, |
@@ -848,7 +881,9 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
848 | /* | 881 | /* |
849 | * incore got set to the group block bitmap below | 882 | * incore got set to the group block bitmap below |
850 | */ | 883 | */ |
884 | ext4_lock_group(sb, group); | ||
851 | ext4_mb_generate_buddy(sb, data, incore, group); | 885 | ext4_mb_generate_buddy(sb, data, incore, group); |
886 | ext4_unlock_group(sb, group); | ||
852 | incore = NULL; | 887 | incore = NULL; |
853 | } else { | 888 | } else { |
854 | /* this is block of bitmap */ | 889 | /* this is block of bitmap */ |
@@ -862,6 +897,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
862 | 897 | ||
863 | /* mark all preallocated blks used in in-core bitmap */ | 898 | /* mark all preallocated blks used in in-core bitmap */ |
864 | ext4_mb_generate_from_pa(sb, data, group); | 899 | ext4_mb_generate_from_pa(sb, data, group); |
900 | ext4_mb_generate_from_freelist(sb, data, group); | ||
865 | ext4_unlock_group(sb, group); | 901 | ext4_unlock_group(sb, group); |
866 | 902 | ||
867 | /* set incore so that the buddy information can be | 903 | /* set incore so that the buddy information can be |
@@ -886,18 +922,20 @@ static noinline_for_stack int | |||
886 | ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | 922 | ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, |
887 | struct ext4_buddy *e4b) | 923 | struct ext4_buddy *e4b) |
888 | { | 924 | { |
889 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
890 | struct inode *inode = sbi->s_buddy_cache; | ||
891 | int blocks_per_page; | 925 | int blocks_per_page; |
892 | int block; | 926 | int block; |
893 | int pnum; | 927 | int pnum; |
894 | int poff; | 928 | int poff; |
895 | struct page *page; | 929 | struct page *page; |
896 | int ret; | 930 | int ret; |
931 | struct ext4_group_info *grp; | ||
932 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
933 | struct inode *inode = sbi->s_buddy_cache; | ||
897 | 934 | ||
898 | mb_debug("load group %lu\n", group); | 935 | mb_debug("load group %u\n", group); |
899 | 936 | ||
900 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | 937 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; |
938 | grp = ext4_get_group_info(sb, group); | ||
901 | 939 | ||
902 | e4b->bd_blkbits = sb->s_blocksize_bits; | 940 | e4b->bd_blkbits = sb->s_blocksize_bits; |
903 | e4b->bd_info = ext4_get_group_info(sb, group); | 941 | e4b->bd_info = ext4_get_group_info(sb, group); |
@@ -905,6 +943,15 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
905 | e4b->bd_group = group; | 943 | e4b->bd_group = group; |
906 | e4b->bd_buddy_page = NULL; | 944 | e4b->bd_buddy_page = NULL; |
907 | e4b->bd_bitmap_page = NULL; | 945 | e4b->bd_bitmap_page = NULL; |
946 | e4b->alloc_semp = &grp->alloc_sem; | ||
947 | |||
948 | /* Take the read lock on the group alloc | ||
949 | * sem. This would make sure a parallel | ||
950 | * ext4_mb_init_group happening on other | ||
951 | * groups mapped by the page is blocked | ||
952 | * till we are done with allocation | ||
953 | */ | ||
954 | down_read(e4b->alloc_semp); | ||
908 | 955 | ||
909 | /* | 956 | /* |
910 | * the buddy cache inode stores the block bitmap | 957 | * the buddy cache inode stores the block bitmap |
@@ -920,6 +967,14 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
920 | page = find_get_page(inode->i_mapping, pnum); | 967 | page = find_get_page(inode->i_mapping, pnum); |
921 | if (page == NULL || !PageUptodate(page)) { | 968 | if (page == NULL || !PageUptodate(page)) { |
922 | if (page) | 969 | if (page) |
970 | /* | ||
971 | * drop the page reference and try | ||
972 | * to get the page with lock. If we | ||
973 | * are not uptodate that implies | ||
974 | * somebody just created the page but | ||
975 | * is yet to initialize the same. So | ||
976 | * wait for it to initialize. | ||
977 | */ | ||
923 | page_cache_release(page); | 978 | page_cache_release(page); |
924 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | 979 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); |
925 | if (page) { | 980 | if (page) { |
@@ -985,6 +1040,9 @@ err: | |||
985 | page_cache_release(e4b->bd_buddy_page); | 1040 | page_cache_release(e4b->bd_buddy_page); |
986 | e4b->bd_buddy = NULL; | 1041 | e4b->bd_buddy = NULL; |
987 | e4b->bd_bitmap = NULL; | 1042 | e4b->bd_bitmap = NULL; |
1043 | |||
1044 | /* Done with the buddy cache */ | ||
1045 | up_read(e4b->alloc_semp); | ||
988 | return ret; | 1046 | return ret; |
989 | } | 1047 | } |
990 | 1048 | ||
@@ -994,6 +1052,9 @@ static void ext4_mb_release_desc(struct ext4_buddy *e4b) | |||
994 | page_cache_release(e4b->bd_bitmap_page); | 1052 | page_cache_release(e4b->bd_bitmap_page); |
995 | if (e4b->bd_buddy_page) | 1053 | if (e4b->bd_buddy_page) |
996 | page_cache_release(e4b->bd_buddy_page); | 1054 | page_cache_release(e4b->bd_buddy_page); |
1055 | /* Done with the buddy cache */ | ||
1056 | if (e4b->alloc_semp) | ||
1057 | up_read(e4b->alloc_semp); | ||
997 | } | 1058 | } |
998 | 1059 | ||
999 | 1060 | ||
@@ -1031,7 +1092,10 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len) | |||
1031 | cur += 32; | 1092 | cur += 32; |
1032 | continue; | 1093 | continue; |
1033 | } | 1094 | } |
1034 | mb_clear_bit_atomic(lock, cur, bm); | 1095 | if (lock) |
1096 | mb_clear_bit_atomic(lock, cur, bm); | ||
1097 | else | ||
1098 | mb_clear_bit(cur, bm); | ||
1035 | cur++; | 1099 | cur++; |
1036 | } | 1100 | } |
1037 | } | 1101 | } |
@@ -1049,7 +1113,10 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) | |||
1049 | cur += 32; | 1113 | cur += 32; |
1050 | continue; | 1114 | continue; |
1051 | } | 1115 | } |
1052 | mb_set_bit_atomic(lock, cur, bm); | 1116 | if (lock) |
1117 | mb_set_bit_atomic(lock, cur, bm); | ||
1118 | else | ||
1119 | mb_set_bit(cur, bm); | ||
1053 | cur++; | 1120 | cur++; |
1054 | } | 1121 | } |
1055 | } | 1122 | } |
@@ -1094,12 +1161,11 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
1094 | blocknr += block; | 1161 | blocknr += block; |
1095 | blocknr += | 1162 | blocknr += |
1096 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | 1163 | le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); |
1097 | ext4_unlock_group(sb, e4b->bd_group); | 1164 | ext4_grp_locked_error(sb, e4b->bd_group, |
1098 | ext4_error(sb, __func__, "double-free of inode" | 1165 | __func__, "double-free of inode" |
1099 | " %lu's block %llu(bit %u in group %lu)\n", | 1166 | " %lu's block %llu(bit %u in group %u)", |
1100 | inode ? inode->i_ino : 0, blocknr, block, | 1167 | inode ? inode->i_ino : 0, blocknr, block, |
1101 | e4b->bd_group); | 1168 | e4b->bd_group); |
1102 | ext4_lock_group(sb, e4b->bd_group); | ||
1103 | } | 1169 | } |
1104 | mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); | 1170 | mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); |
1105 | e4b->bd_info->bb_counters[order]++; | 1171 | e4b->bd_info->bb_counters[order]++; |
@@ -1296,13 +1362,20 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, | |||
1296 | ac->ac_tail = ret & 0xffff; | 1362 | ac->ac_tail = ret & 0xffff; |
1297 | ac->ac_buddy = ret >> 16; | 1363 | ac->ac_buddy = ret >> 16; |
1298 | 1364 | ||
1299 | /* XXXXXXX: SUCH A HORRIBLE **CK */ | 1365 | /* |
1300 | /*FIXME!! Why ? */ | 1366 | * take the page reference. We want the page to be pinned |
1367 | * so that we don't get a ext4_mb_init_cache_call for this | ||
1368 | * group until we update the bitmap. That would mean we | ||
1369 | * double allocate blocks. The reference is dropped | ||
1370 | * in ext4_mb_release_context | ||
1371 | */ | ||
1301 | ac->ac_bitmap_page = e4b->bd_bitmap_page; | 1372 | ac->ac_bitmap_page = e4b->bd_bitmap_page; |
1302 | get_page(ac->ac_bitmap_page); | 1373 | get_page(ac->ac_bitmap_page); |
1303 | ac->ac_buddy_page = e4b->bd_buddy_page; | 1374 | ac->ac_buddy_page = e4b->bd_buddy_page; |
1304 | get_page(ac->ac_buddy_page); | 1375 | get_page(ac->ac_buddy_page); |
1305 | 1376 | /* on allocation we use ac to track the held semaphore */ | |
1377 | ac->alloc_semp = e4b->alloc_semp; | ||
1378 | e4b->alloc_semp = NULL; | ||
1306 | /* store last allocated for subsequent stream allocation */ | 1379 | /* store last allocated for subsequent stream allocation */ |
1307 | if ((ac->ac_flags & EXT4_MB_HINT_DATA)) { | 1380 | if ((ac->ac_flags & EXT4_MB_HINT_DATA)) { |
1308 | spin_lock(&sbi->s_md_lock); | 1381 | spin_lock(&sbi->s_md_lock); |
@@ -1326,6 +1399,8 @@ static void ext4_mb_check_limits(struct ext4_allocation_context *ac, | |||
1326 | struct ext4_free_extent ex; | 1399 | struct ext4_free_extent ex; |
1327 | int max; | 1400 | int max; |
1328 | 1401 | ||
1402 | if (ac->ac_status == AC_STATUS_FOUND) | ||
1403 | return; | ||
1329 | /* | 1404 | /* |
1330 | * We don't want to scan for a whole year | 1405 | * We don't want to scan for a whole year |
1331 | */ | 1406 | */ |
@@ -1575,8 +1650,9 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
1575 | * free blocks even though group info says we | 1650 | * free blocks even though group info says we |
1576 | * we have free blocks | 1651 | * we have free blocks |
1577 | */ | 1652 | */ |
1578 | ext4_error(sb, __func__, "%d free blocks as per " | 1653 | ext4_grp_locked_error(sb, e4b->bd_group, |
1579 | "group info. But bitmap says 0\n", | 1654 | __func__, "%d free blocks as per " |
1655 | "group info. But bitmap says 0", | ||
1580 | free); | 1656 | free); |
1581 | break; | 1657 | break; |
1582 | } | 1658 | } |
@@ -1584,8 +1660,9 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
1584 | mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); | 1660 | mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); |
1585 | BUG_ON(ex.fe_len <= 0); | 1661 | BUG_ON(ex.fe_len <= 0); |
1586 | if (free < ex.fe_len) { | 1662 | if (free < ex.fe_len) { |
1587 | ext4_error(sb, __func__, "%d free blocks as per " | 1663 | ext4_grp_locked_error(sb, e4b->bd_group, |
1588 | "group info. But got %d blocks\n", | 1664 | __func__, "%d free blocks as per " |
1665 | "group info. But got %d blocks", | ||
1589 | free, ex.fe_len); | 1666 | free, ex.fe_len); |
1590 | /* | 1667 | /* |
1591 | * The number of free blocks differs. This mostly | 1668 | * The number of free blocks differs. This mostly |
@@ -1692,6 +1769,173 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1692 | return 0; | 1769 | return 0; |
1693 | } | 1770 | } |
1694 | 1771 | ||
1772 | /* | ||
1773 | * lock the group_info alloc_sem of all the groups | ||
1774 | * belonging to the same buddy cache page. This | ||
1775 | * make sure other parallel operation on the buddy | ||
1776 | * cache doesn't happen whild holding the buddy cache | ||
1777 | * lock | ||
1778 | */ | ||
1779 | int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) | ||
1780 | { | ||
1781 | int i; | ||
1782 | int block, pnum; | ||
1783 | int blocks_per_page; | ||
1784 | int groups_per_page; | ||
1785 | ext4_group_t first_group; | ||
1786 | struct ext4_group_info *grp; | ||
1787 | |||
1788 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1789 | /* | ||
1790 | * the buddy cache inode stores the block bitmap | ||
1791 | * and buddy information in consecutive blocks. | ||
1792 | * So for each group we need two blocks. | ||
1793 | */ | ||
1794 | block = group * 2; | ||
1795 | pnum = block / blocks_per_page; | ||
1796 | first_group = pnum * blocks_per_page / 2; | ||
1797 | |||
1798 | groups_per_page = blocks_per_page >> 1; | ||
1799 | if (groups_per_page == 0) | ||
1800 | groups_per_page = 1; | ||
1801 | /* read all groups the page covers into the cache */ | ||
1802 | for (i = 0; i < groups_per_page; i++) { | ||
1803 | |||
1804 | if ((first_group + i) >= EXT4_SB(sb)->s_groups_count) | ||
1805 | break; | ||
1806 | grp = ext4_get_group_info(sb, first_group + i); | ||
1807 | /* take all groups write allocation | ||
1808 | * semaphore. This make sure there is | ||
1809 | * no block allocation going on in any | ||
1810 | * of that groups | ||
1811 | */ | ||
1812 | down_write_nested(&grp->alloc_sem, i); | ||
1813 | } | ||
1814 | return i; | ||
1815 | } | ||
1816 | |||
1817 | void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | ||
1818 | ext4_group_t group, int locked_group) | ||
1819 | { | ||
1820 | int i; | ||
1821 | int block, pnum; | ||
1822 | int blocks_per_page; | ||
1823 | ext4_group_t first_group; | ||
1824 | struct ext4_group_info *grp; | ||
1825 | |||
1826 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1827 | /* | ||
1828 | * the buddy cache inode stores the block bitmap | ||
1829 | * and buddy information in consecutive blocks. | ||
1830 | * So for each group we need two blocks. | ||
1831 | */ | ||
1832 | block = group * 2; | ||
1833 | pnum = block / blocks_per_page; | ||
1834 | first_group = pnum * blocks_per_page / 2; | ||
1835 | /* release locks on all the groups */ | ||
1836 | for (i = 0; i < locked_group; i++) { | ||
1837 | |||
1838 | grp = ext4_get_group_info(sb, first_group + i); | ||
1839 | /* take all groups write allocation | ||
1840 | * semaphore. This make sure there is | ||
1841 | * no block allocation going on in any | ||
1842 | * of that groups | ||
1843 | */ | ||
1844 | up_write(&grp->alloc_sem); | ||
1845 | } | ||
1846 | |||
1847 | } | ||
1848 | |||
1849 | static int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | ||
1850 | { | ||
1851 | |||
1852 | int ret; | ||
1853 | void *bitmap; | ||
1854 | int blocks_per_page; | ||
1855 | int block, pnum, poff; | ||
1856 | int num_grp_locked = 0; | ||
1857 | struct ext4_group_info *this_grp; | ||
1858 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1859 | struct inode *inode = sbi->s_buddy_cache; | ||
1860 | struct page *page = NULL, *bitmap_page = NULL; | ||
1861 | |||
1862 | mb_debug("init group %lu\n", group); | ||
1863 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1864 | this_grp = ext4_get_group_info(sb, group); | ||
1865 | /* | ||
1866 | * This ensures we don't add group | ||
1867 | * to this buddy cache via resize | ||
1868 | */ | ||
1869 | num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); | ||
1870 | if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { | ||
1871 | /* | ||
1872 | * somebody initialized the group | ||
1873 | * return without doing anything | ||
1874 | */ | ||
1875 | ret = 0; | ||
1876 | goto err; | ||
1877 | } | ||
1878 | /* | ||
1879 | * the buddy cache inode stores the block bitmap | ||
1880 | * and buddy information in consecutive blocks. | ||
1881 | * So for each group we need two blocks. | ||
1882 | */ | ||
1883 | block = group * 2; | ||
1884 | pnum = block / blocks_per_page; | ||
1885 | poff = block % blocks_per_page; | ||
1886 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1887 | if (page) { | ||
1888 | BUG_ON(page->mapping != inode->i_mapping); | ||
1889 | ret = ext4_mb_init_cache(page, NULL); | ||
1890 | if (ret) { | ||
1891 | unlock_page(page); | ||
1892 | goto err; | ||
1893 | } | ||
1894 | unlock_page(page); | ||
1895 | } | ||
1896 | if (page == NULL || !PageUptodate(page)) { | ||
1897 | ret = -EIO; | ||
1898 | goto err; | ||
1899 | } | ||
1900 | mark_page_accessed(page); | ||
1901 | bitmap_page = page; | ||
1902 | bitmap = page_address(page) + (poff * sb->s_blocksize); | ||
1903 | |||
1904 | /* init buddy cache */ | ||
1905 | block++; | ||
1906 | pnum = block / blocks_per_page; | ||
1907 | poff = block % blocks_per_page; | ||
1908 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1909 | if (page == bitmap_page) { | ||
1910 | /* | ||
1911 | * If both the bitmap and buddy are in | ||
1912 | * the same page we don't need to force | ||
1913 | * init the buddy | ||
1914 | */ | ||
1915 | unlock_page(page); | ||
1916 | } else if (page) { | ||
1917 | BUG_ON(page->mapping != inode->i_mapping); | ||
1918 | ret = ext4_mb_init_cache(page, bitmap); | ||
1919 | if (ret) { | ||
1920 | unlock_page(page); | ||
1921 | goto err; | ||
1922 | } | ||
1923 | unlock_page(page); | ||
1924 | } | ||
1925 | if (page == NULL || !PageUptodate(page)) { | ||
1926 | ret = -EIO; | ||
1927 | goto err; | ||
1928 | } | ||
1929 | mark_page_accessed(page); | ||
1930 | err: | ||
1931 | ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); | ||
1932 | if (bitmap_page) | ||
1933 | page_cache_release(bitmap_page); | ||
1934 | if (page) | ||
1935 | page_cache_release(page); | ||
1936 | return ret; | ||
1937 | } | ||
1938 | |||
1695 | static noinline_for_stack int | 1939 | static noinline_for_stack int |
1696 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | 1940 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) |
1697 | { | 1941 | { |
@@ -1775,7 +2019,7 @@ repeat: | |||
1775 | group = 0; | 2019 | group = 0; |
1776 | 2020 | ||
1777 | /* quick check to skip empty groups */ | 2021 | /* quick check to skip empty groups */ |
1778 | grp = ext4_get_group_info(ac->ac_sb, group); | 2022 | grp = ext4_get_group_info(sb, group); |
1779 | if (grp->bb_free == 0) | 2023 | if (grp->bb_free == 0) |
1780 | continue; | 2024 | continue; |
1781 | 2025 | ||
@@ -1788,10 +2032,9 @@ repeat: | |||
1788 | * we need full data about the group | 2032 | * we need full data about the group |
1789 | * to make a good selection | 2033 | * to make a good selection |
1790 | */ | 2034 | */ |
1791 | err = ext4_mb_load_buddy(sb, group, &e4b); | 2035 | err = ext4_mb_init_group(sb, group); |
1792 | if (err) | 2036 | if (err) |
1793 | goto out; | 2037 | goto out; |
1794 | ext4_mb_release_desc(&e4b); | ||
1795 | } | 2038 | } |
1796 | 2039 | ||
1797 | /* | 2040 | /* |
@@ -1932,13 +2175,13 @@ static int ext4_mb_seq_history_show(struct seq_file *seq, void *v) | |||
1932 | if (hs->op == EXT4_MB_HISTORY_ALLOC) { | 2175 | if (hs->op == EXT4_MB_HISTORY_ALLOC) { |
1933 | fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u " | 2176 | fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u " |
1934 | "%-5u %-5s %-5u %-6u\n"; | 2177 | "%-5u %-5s %-5u %-6u\n"; |
1935 | sprintf(buf2, "%lu/%d/%u@%u", hs->result.fe_group, | 2178 | sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group, |
1936 | hs->result.fe_start, hs->result.fe_len, | 2179 | hs->result.fe_start, hs->result.fe_len, |
1937 | hs->result.fe_logical); | 2180 | hs->result.fe_logical); |
1938 | sprintf(buf, "%lu/%d/%u@%u", hs->orig.fe_group, | 2181 | sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group, |
1939 | hs->orig.fe_start, hs->orig.fe_len, | 2182 | hs->orig.fe_start, hs->orig.fe_len, |
1940 | hs->orig.fe_logical); | 2183 | hs->orig.fe_logical); |
1941 | sprintf(buf3, "%lu/%d/%u@%u", hs->goal.fe_group, | 2184 | sprintf(buf3, "%u/%d/%u@%u", hs->goal.fe_group, |
1942 | hs->goal.fe_start, hs->goal.fe_len, | 2185 | hs->goal.fe_start, hs->goal.fe_len, |
1943 | hs->goal.fe_logical); | 2186 | hs->goal.fe_logical); |
1944 | seq_printf(seq, fmt, hs->pid, hs->ino, buf, buf3, buf2, | 2187 | seq_printf(seq, fmt, hs->pid, hs->ino, buf, buf3, buf2, |
@@ -1947,20 +2190,20 @@ static int ext4_mb_seq_history_show(struct seq_file *seq, void *v) | |||
1947 | hs->buddy ? 1 << hs->buddy : 0); | 2190 | hs->buddy ? 1 << hs->buddy : 0); |
1948 | } else if (hs->op == EXT4_MB_HISTORY_PREALLOC) { | 2191 | } else if (hs->op == EXT4_MB_HISTORY_PREALLOC) { |
1949 | fmt = "%-5u %-8u %-23s %-23s %-23s\n"; | 2192 | fmt = "%-5u %-8u %-23s %-23s %-23s\n"; |
1950 | sprintf(buf2, "%lu/%d/%u@%u", hs->result.fe_group, | 2193 | sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group, |
1951 | hs->result.fe_start, hs->result.fe_len, | 2194 | hs->result.fe_start, hs->result.fe_len, |
1952 | hs->result.fe_logical); | 2195 | hs->result.fe_logical); |
1953 | sprintf(buf, "%lu/%d/%u@%u", hs->orig.fe_group, | 2196 | sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group, |
1954 | hs->orig.fe_start, hs->orig.fe_len, | 2197 | hs->orig.fe_start, hs->orig.fe_len, |
1955 | hs->orig.fe_logical); | 2198 | hs->orig.fe_logical); |
1956 | seq_printf(seq, fmt, hs->pid, hs->ino, buf, "", buf2); | 2199 | seq_printf(seq, fmt, hs->pid, hs->ino, buf, "", buf2); |
1957 | } else if (hs->op == EXT4_MB_HISTORY_DISCARD) { | 2200 | } else if (hs->op == EXT4_MB_HISTORY_DISCARD) { |
1958 | sprintf(buf2, "%lu/%d/%u", hs->result.fe_group, | 2201 | sprintf(buf2, "%u/%d/%u", hs->result.fe_group, |
1959 | hs->result.fe_start, hs->result.fe_len); | 2202 | hs->result.fe_start, hs->result.fe_len); |
1960 | seq_printf(seq, "%-5u %-8u %-23s discard\n", | 2203 | seq_printf(seq, "%-5u %-8u %-23s discard\n", |
1961 | hs->pid, hs->ino, buf2); | 2204 | hs->pid, hs->ino, buf2); |
1962 | } else if (hs->op == EXT4_MB_HISTORY_FREE) { | 2205 | } else if (hs->op == EXT4_MB_HISTORY_FREE) { |
1963 | sprintf(buf2, "%lu/%d/%u", hs->result.fe_group, | 2206 | sprintf(buf2, "%u/%d/%u", hs->result.fe_group, |
1964 | hs->result.fe_start, hs->result.fe_len); | 2207 | hs->result.fe_start, hs->result.fe_len); |
1965 | seq_printf(seq, "%-5u %-8u %-23s free\n", | 2208 | seq_printf(seq, "%-5u %-8u %-23s free\n", |
1966 | hs->pid, hs->ino, buf2); | 2209 | hs->pid, hs->ino, buf2); |
@@ -2073,7 +2316,7 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) | |||
2073 | return NULL; | 2316 | return NULL; |
2074 | 2317 | ||
2075 | group = *pos + 1; | 2318 | group = *pos + 1; |
2076 | return (void *) group; | 2319 | return (void *) ((unsigned long) group); |
2077 | } | 2320 | } |
2078 | 2321 | ||
2079 | static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) | 2322 | static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) |
@@ -2086,13 +2329,13 @@ static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) | |||
2086 | if (*pos < 0 || *pos >= sbi->s_groups_count) | 2329 | if (*pos < 0 || *pos >= sbi->s_groups_count) |
2087 | return NULL; | 2330 | return NULL; |
2088 | group = *pos + 1; | 2331 | group = *pos + 1; |
2089 | return (void *) group;; | 2332 | return (void *) ((unsigned long) group); |
2090 | } | 2333 | } |
2091 | 2334 | ||
2092 | static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) | 2335 | static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) |
2093 | { | 2336 | { |
2094 | struct super_block *sb = seq->private; | 2337 | struct super_block *sb = seq->private; |
2095 | long group = (long) v; | 2338 | ext4_group_t group = (ext4_group_t) ((unsigned long) v); |
2096 | int i; | 2339 | int i; |
2097 | int err; | 2340 | int err; |
2098 | struct ext4_buddy e4b; | 2341 | struct ext4_buddy e4b; |
@@ -2114,7 +2357,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) | |||
2114 | sizeof(struct ext4_group_info); | 2357 | sizeof(struct ext4_group_info); |
2115 | err = ext4_mb_load_buddy(sb, group, &e4b); | 2358 | err = ext4_mb_load_buddy(sb, group, &e4b); |
2116 | if (err) { | 2359 | if (err) { |
2117 | seq_printf(seq, "#%-5lu: I/O error\n", group); | 2360 | seq_printf(seq, "#%-5u: I/O error\n", group); |
2118 | return 0; | 2361 | return 0; |
2119 | } | 2362 | } |
2120 | ext4_lock_group(sb, group); | 2363 | ext4_lock_group(sb, group); |
@@ -2122,7 +2365,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) | |||
2122 | ext4_unlock_group(sb, group); | 2365 | ext4_unlock_group(sb, group); |
2123 | ext4_mb_release_desc(&e4b); | 2366 | ext4_mb_release_desc(&e4b); |
2124 | 2367 | ||
2125 | seq_printf(seq, "#%-5lu: %-5u %-5u %-5u [", group, sg.info.bb_free, | 2368 | seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free, |
2126 | sg.info.bb_fragments, sg.info.bb_first_free); | 2369 | sg.info.bb_fragments, sg.info.bb_first_free); |
2127 | for (i = 0; i <= 13; i++) | 2370 | for (i = 0; i <= 13; i++) |
2128 | seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ? | 2371 | seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ? |
@@ -2296,10 +2539,11 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2296 | ext4_free_blocks_after_init(sb, group, desc); | 2539 | ext4_free_blocks_after_init(sb, group, desc); |
2297 | } else { | 2540 | } else { |
2298 | meta_group_info[i]->bb_free = | 2541 | meta_group_info[i]->bb_free = |
2299 | le16_to_cpu(desc->bg_free_blocks_count); | 2542 | ext4_free_blks_count(sb, desc); |
2300 | } | 2543 | } |
2301 | 2544 | ||
2302 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | 2545 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); |
2546 | init_rwsem(&meta_group_info[i]->alloc_sem); | ||
2303 | meta_group_info[i]->bb_free_root.rb_node = NULL;; | 2547 | meta_group_info[i]->bb_free_root.rb_node = NULL;; |
2304 | 2548 | ||
2305 | #ifdef DOUBLE_CHECK | 2549 | #ifdef DOUBLE_CHECK |
@@ -2327,54 +2571,6 @@ exit_meta_group_info: | |||
2327 | } /* ext4_mb_add_groupinfo */ | 2571 | } /* ext4_mb_add_groupinfo */ |
2328 | 2572 | ||
2329 | /* | 2573 | /* |
2330 | * Add a group to the existing groups. | ||
2331 | * This function is used for online resize | ||
2332 | */ | ||
2333 | int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group, | ||
2334 | struct ext4_group_desc *desc) | ||
2335 | { | ||
2336 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2337 | struct inode *inode = sbi->s_buddy_cache; | ||
2338 | int blocks_per_page; | ||
2339 | int block; | ||
2340 | int pnum; | ||
2341 | struct page *page; | ||
2342 | int err; | ||
2343 | |||
2344 | /* Add group based on group descriptor*/ | ||
2345 | err = ext4_mb_add_groupinfo(sb, group, desc); | ||
2346 | if (err) | ||
2347 | return err; | ||
2348 | |||
2349 | /* | ||
2350 | * Cache pages containing dynamic mb_alloc datas (buddy and bitmap | ||
2351 | * datas) are set not up to date so that they will be re-initilaized | ||
2352 | * during the next call to ext4_mb_load_buddy | ||
2353 | */ | ||
2354 | |||
2355 | /* Set buddy page as not up to date */ | ||
2356 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
2357 | block = group * 2; | ||
2358 | pnum = block / blocks_per_page; | ||
2359 | page = find_get_page(inode->i_mapping, pnum); | ||
2360 | if (page != NULL) { | ||
2361 | ClearPageUptodate(page); | ||
2362 | page_cache_release(page); | ||
2363 | } | ||
2364 | |||
2365 | /* Set bitmap page as not up to date */ | ||
2366 | block++; | ||
2367 | pnum = block / blocks_per_page; | ||
2368 | page = find_get_page(inode->i_mapping, pnum); | ||
2369 | if (page != NULL) { | ||
2370 | ClearPageUptodate(page); | ||
2371 | page_cache_release(page); | ||
2372 | } | ||
2373 | |||
2374 | return 0; | ||
2375 | } | ||
2376 | |||
2377 | /* | ||
2378 | * Update an existing group. | 2574 | * Update an existing group. |
2379 | * This function is used for online resize | 2575 | * This function is used for online resize |
2380 | */ | 2576 | */ |
@@ -2457,7 +2653,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2457 | desc = ext4_get_group_desc(sb, i, NULL); | 2653 | desc = ext4_get_group_desc(sb, i, NULL); |
2458 | if (desc == NULL) { | 2654 | if (desc == NULL) { |
2459 | printk(KERN_ERR | 2655 | printk(KERN_ERR |
2460 | "EXT4-fs: can't read descriptor %lu\n", i); | 2656 | "EXT4-fs: can't read descriptor %u\n", i); |
2461 | goto err_freebuddy; | 2657 | goto err_freebuddy; |
2462 | } | 2658 | } |
2463 | if (ext4_mb_add_groupinfo(sb, i, desc) != 0) | 2659 | if (ext4_mb_add_groupinfo(sb, i, desc) != 0) |
@@ -2493,6 +2689,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2493 | if (sbi->s_mb_offsets == NULL) { | 2689 | if (sbi->s_mb_offsets == NULL) { |
2494 | return -ENOMEM; | 2690 | return -ENOMEM; |
2495 | } | 2691 | } |
2692 | |||
2693 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int); | ||
2496 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); | 2694 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); |
2497 | if (sbi->s_mb_maxs == NULL) { | 2695 | if (sbi->s_mb_maxs == NULL) { |
2498 | kfree(sbi->s_mb_maxs); | 2696 | kfree(sbi->s_mb_maxs); |
@@ -2551,7 +2749,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2551 | ext4_mb_init_per_dev_proc(sb); | 2749 | ext4_mb_init_per_dev_proc(sb); |
2552 | ext4_mb_history_init(sb); | 2750 | ext4_mb_history_init(sb); |
2553 | 2751 | ||
2554 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; | 2752 | if (sbi->s_journal) |
2753 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; | ||
2555 | 2754 | ||
2556 | printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); | 2755 | printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); |
2557 | return 0; | 2756 | return 0; |
@@ -2652,7 +2851,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2652 | list_for_each_safe(l, ltmp, &txn->t_private_list) { | 2851 | list_for_each_safe(l, ltmp, &txn->t_private_list) { |
2653 | entry = list_entry(l, struct ext4_free_data, list); | 2852 | entry = list_entry(l, struct ext4_free_data, list); |
2654 | 2853 | ||
2655 | mb_debug("gonna free %u blocks in group %lu (0x%p):", | 2854 | mb_debug("gonna free %u blocks in group %u (0x%p):", |
2656 | entry->count, entry->group, entry); | 2855 | entry->count, entry->group, entry); |
2657 | 2856 | ||
2658 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); | 2857 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
@@ -2679,8 +2878,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | |||
2679 | discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) | 2878 | discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) |
2680 | + entry->start_blk | 2879 | + entry->start_blk |
2681 | + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); | 2880 | + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); |
2682 | trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", sb->s_id, | 2881 | trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", |
2683 | (unsigned long long) discard_block, entry->count); | 2882 | sb->s_id, (unsigned long long) discard_block, |
2883 | entry->count); | ||
2684 | sb_issue_discard(sb, discard_block, entry->count); | 2884 | sb_issue_discard(sb, discard_block, entry->count); |
2685 | 2885 | ||
2686 | kmem_cache_free(ext4_free_ext_cachep, entry); | 2886 | kmem_cache_free(ext4_free_ext_cachep, entry); |
@@ -2791,7 +2991,7 @@ void exit_ext4_mballoc(void) | |||
2791 | */ | 2991 | */ |
2792 | static noinline_for_stack int | 2992 | static noinline_for_stack int |
2793 | ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | 2993 | ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, |
2794 | handle_t *handle, unsigned long reserv_blks) | 2994 | handle_t *handle, unsigned int reserv_blks) |
2795 | { | 2995 | { |
2796 | struct buffer_head *bitmap_bh = NULL; | 2996 | struct buffer_head *bitmap_bh = NULL; |
2797 | struct ext4_super_block *es; | 2997 | struct ext4_super_block *es; |
@@ -2824,7 +3024,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2824 | if (!gdp) | 3024 | if (!gdp) |
2825 | goto out_err; | 3025 | goto out_err; |
2826 | 3026 | ||
2827 | ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group, | 3027 | ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group, |
2828 | gdp->bg_free_blocks_count); | 3028 | gdp->bg_free_blocks_count); |
2829 | 3029 | ||
2830 | err = ext4_journal_get_write_access(handle, gdp_bh); | 3030 | err = ext4_journal_get_write_access(handle, gdp_bh); |
@@ -2843,8 +3043,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2843 | in_range(block + len - 1, ext4_inode_table(sb, gdp), | 3043 | in_range(block + len - 1, ext4_inode_table(sb, gdp), |
2844 | EXT4_SB(sb)->s_itb_per_group)) { | 3044 | EXT4_SB(sb)->s_itb_per_group)) { |
2845 | ext4_error(sb, __func__, | 3045 | ext4_error(sb, __func__, |
2846 | "Allocating block in system zone - block = %llu", | 3046 | "Allocating block %llu in system zone of %d group\n", |
2847 | block); | 3047 | block, ac->ac_b_ex.fe_group); |
2848 | /* File system mounted not to panic on error | 3048 | /* File system mounted not to panic on error |
2849 | * Fix the bitmap and repeat the block allocation | 3049 | * Fix the bitmap and repeat the block allocation |
2850 | * We leak some of the blocks here. | 3050 | * We leak some of the blocks here. |
@@ -2852,7 +3052,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2852 | mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), | 3052 | mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), |
2853 | bitmap_bh->b_data, ac->ac_b_ex.fe_start, | 3053 | bitmap_bh->b_data, ac->ac_b_ex.fe_start, |
2854 | ac->ac_b_ex.fe_len); | 3054 | ac->ac_b_ex.fe_len); |
2855 | err = ext4_journal_dirty_metadata(handle, bitmap_bh); | 3055 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); |
2856 | if (!err) | 3056 | if (!err) |
2857 | err = -EAGAIN; | 3057 | err = -EAGAIN; |
2858 | goto out_err; | 3058 | goto out_err; |
@@ -2866,18 +3066,17 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2866 | } | 3066 | } |
2867 | } | 3067 | } |
2868 | #endif | 3068 | #endif |
2869 | mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), bitmap_bh->b_data, | ||
2870 | ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); | ||
2871 | |||
2872 | spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); | 3069 | spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); |
3070 | mb_set_bits(NULL, bitmap_bh->b_data, | ||
3071 | ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); | ||
2873 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 3072 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
2874 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | 3073 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); |
2875 | gdp->bg_free_blocks_count = | 3074 | ext4_free_blks_set(sb, gdp, |
2876 | cpu_to_le16(ext4_free_blocks_after_init(sb, | 3075 | ext4_free_blocks_after_init(sb, |
2877 | ac->ac_b_ex.fe_group, | 3076 | ac->ac_b_ex.fe_group, gdp)); |
2878 | gdp)); | ||
2879 | } | 3077 | } |
2880 | le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); | 3078 | len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len; |
3079 | ext4_free_blks_set(sb, gdp, len); | ||
2881 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); | 3080 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); |
2882 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); | 3081 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); |
2883 | percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); | 3082 | percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); |
@@ -2899,10 +3098,10 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
2899 | spin_unlock(sb_bgl_lock(sbi, flex_group)); | 3098 | spin_unlock(sb_bgl_lock(sbi, flex_group)); |
2900 | } | 3099 | } |
2901 | 3100 | ||
2902 | err = ext4_journal_dirty_metadata(handle, bitmap_bh); | 3101 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); |
2903 | if (err) | 3102 | if (err) |
2904 | goto out_err; | 3103 | goto out_err; |
2905 | err = ext4_journal_dirty_metadata(handle, gdp_bh); | 3104 | err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh); |
2906 | 3105 | ||
2907 | out_err: | 3106 | out_err: |
2908 | sb->s_dirt = 1; | 3107 | sb->s_dirt = 1; |
@@ -3031,7 +3230,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
3031 | /* check we don't cross already preallocated blocks */ | 3230 | /* check we don't cross already preallocated blocks */ |
3032 | rcu_read_lock(); | 3231 | rcu_read_lock(); |
3033 | list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { | 3232 | list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { |
3034 | unsigned long pa_end; | 3233 | ext4_lblk_t pa_end; |
3035 | 3234 | ||
3036 | if (pa->pa_deleted) | 3235 | if (pa->pa_deleted) |
3037 | continue; | 3236 | continue; |
@@ -3075,7 +3274,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, | |||
3075 | /* XXX: extra loop to check we really don't overlap preallocations */ | 3274 | /* XXX: extra loop to check we really don't overlap preallocations */ |
3076 | rcu_read_lock(); | 3275 | rcu_read_lock(); |
3077 | list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { | 3276 | list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) { |
3078 | unsigned long pa_end; | 3277 | ext4_lblk_t pa_end; |
3079 | spin_lock(&pa->pa_lock); | 3278 | spin_lock(&pa->pa_lock); |
3080 | if (pa->pa_deleted == 0) { | 3279 | if (pa->pa_deleted == 0) { |
3081 | pa_end = pa->pa_lstart + pa->pa_len; | 3280 | pa_end = pa->pa_lstart + pa->pa_len; |
@@ -3307,6 +3506,32 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3307 | } | 3506 | } |
3308 | 3507 | ||
3309 | /* | 3508 | /* |
3509 | * the function goes through all block freed in the group | ||
3510 | * but not yet committed and marks them used in in-core bitmap. | ||
3511 | * buddy must be generated from this bitmap | ||
3512 | * Need to be called with ext4 group lock (ext4_lock_group) | ||
3513 | */ | ||
3514 | static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | ||
3515 | ext4_group_t group) | ||
3516 | { | ||
3517 | struct rb_node *n; | ||
3518 | struct ext4_group_info *grp; | ||
3519 | struct ext4_free_data *entry; | ||
3520 | |||
3521 | grp = ext4_get_group_info(sb, group); | ||
3522 | n = rb_first(&(grp->bb_free_root)); | ||
3523 | |||
3524 | while (n) { | ||
3525 | entry = rb_entry(n, struct ext4_free_data, node); | ||
3526 | mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), | ||
3527 | bitmap, entry->start_blk, | ||
3528 | entry->count); | ||
3529 | n = rb_next(n); | ||
3530 | } | ||
3531 | return; | ||
3532 | } | ||
3533 | |||
3534 | /* | ||
3310 | * the function goes through all preallocation in this group and marks them | 3535 | * the function goes through all preallocation in this group and marks them |
3311 | * used in in-core bitmap. buddy must be generated from this bitmap | 3536 | * used in in-core bitmap. buddy must be generated from this bitmap |
3312 | * Need to be called with ext4 group lock (ext4_lock_group) | 3537 | * Need to be called with ext4 group lock (ext4_lock_group) |
@@ -3346,7 +3571,7 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | |||
3346 | preallocated += len; | 3571 | preallocated += len; |
3347 | count++; | 3572 | count++; |
3348 | } | 3573 | } |
3349 | mb_debug("prellocated %u for group %lu\n", preallocated, group); | 3574 | mb_debug("prellocated %u for group %u\n", preallocated, group); |
3350 | } | 3575 | } |
3351 | 3576 | ||
3352 | static void ext4_mb_pa_callback(struct rcu_head *head) | 3577 | static void ext4_mb_pa_callback(struct rcu_head *head) |
@@ -3363,7 +3588,7 @@ static void ext4_mb_pa_callback(struct rcu_head *head) | |||
3363 | static void ext4_mb_put_pa(struct ext4_allocation_context *ac, | 3588 | static void ext4_mb_put_pa(struct ext4_allocation_context *ac, |
3364 | struct super_block *sb, struct ext4_prealloc_space *pa) | 3589 | struct super_block *sb, struct ext4_prealloc_space *pa) |
3365 | { | 3590 | { |
3366 | unsigned long grp; | 3591 | ext4_group_t grp; |
3367 | 3592 | ||
3368 | if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) | 3593 | if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) |
3369 | return; | 3594 | return; |
@@ -3473,6 +3698,10 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) | |||
3473 | 3698 | ||
3474 | mb_debug("new inode pa %p: %llu/%u for %u\n", pa, | 3699 | mb_debug("new inode pa %p: %llu/%u for %u\n", pa, |
3475 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); | 3700 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); |
3701 | trace_mark(ext4_mb_new_inode_pa, | ||
3702 | "dev %s ino %lu pstart %llu len %u lstart %u", | ||
3703 | sb->s_id, ac->ac_inode->i_ino, | ||
3704 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); | ||
3476 | 3705 | ||
3477 | ext4_mb_use_inode_pa(ac, pa); | 3706 | ext4_mb_use_inode_pa(ac, pa); |
3478 | atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); | 3707 | atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); |
@@ -3530,7 +3759,9 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac) | |||
3530 | pa->pa_linear = 1; | 3759 | pa->pa_linear = 1; |
3531 | 3760 | ||
3532 | mb_debug("new group pa %p: %llu/%u for %u\n", pa, | 3761 | mb_debug("new group pa %p: %llu/%u for %u\n", pa, |
3533 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); | 3762 | pa->pa_pstart, pa->pa_len, pa->pa_lstart); |
3763 | trace_mark(ext4_mb_new_group_pa, "dev %s pstart %llu len %u lstart %u", | ||
3764 | sb->s_id, pa->pa_pstart, pa->pa_len, pa->pa_lstart); | ||
3534 | 3765 | ||
3535 | ext4_mb_use_group_pa(ac, pa); | 3766 | ext4_mb_use_group_pa(ac, pa); |
3536 | atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); | 3767 | atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated); |
@@ -3579,16 +3810,18 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3579 | { | 3810 | { |
3580 | struct super_block *sb = e4b->bd_sb; | 3811 | struct super_block *sb = e4b->bd_sb; |
3581 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 3812 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
3582 | unsigned long end; | 3813 | unsigned int end; |
3583 | unsigned long next; | 3814 | unsigned int next; |
3584 | ext4_group_t group; | 3815 | ext4_group_t group; |
3585 | ext4_grpblk_t bit; | 3816 | ext4_grpblk_t bit; |
3817 | unsigned long long grp_blk_start; | ||
3586 | sector_t start; | 3818 | sector_t start; |
3587 | int err = 0; | 3819 | int err = 0; |
3588 | int free = 0; | 3820 | int free = 0; |
3589 | 3821 | ||
3590 | BUG_ON(pa->pa_deleted == 0); | 3822 | BUG_ON(pa->pa_deleted == 0); |
3591 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); | 3823 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); |
3824 | grp_blk_start = pa->pa_pstart - bit; | ||
3592 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3825 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
3593 | end = bit + pa->pa_len; | 3826 | end = bit + pa->pa_len; |
3594 | 3827 | ||
@@ -3618,6 +3851,10 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3618 | ext4_mb_store_history(ac); | 3851 | ext4_mb_store_history(ac); |
3619 | } | 3852 | } |
3620 | 3853 | ||
3854 | trace_mark(ext4_mb_release_inode_pa, | ||
3855 | "dev %s ino %lu block %llu count %u", | ||
3856 | sb->s_id, pa->pa_inode->i_ino, grp_blk_start + bit, | ||
3857 | next - bit); | ||
3621 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); | 3858 | mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); |
3622 | bit = next + 1; | 3859 | bit = next + 1; |
3623 | } | 3860 | } |
@@ -3626,8 +3863,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3626 | pa, (unsigned long) pa->pa_lstart, | 3863 | pa, (unsigned long) pa->pa_lstart, |
3627 | (unsigned long) pa->pa_pstart, | 3864 | (unsigned long) pa->pa_pstart, |
3628 | (unsigned long) pa->pa_len); | 3865 | (unsigned long) pa->pa_len); |
3629 | ext4_error(sb, __func__, "free %u, pa_free %u\n", | 3866 | ext4_grp_locked_error(sb, group, |
3630 | free, pa->pa_free); | 3867 | __func__, "free %u, pa_free %u", |
3868 | free, pa->pa_free); | ||
3631 | /* | 3869 | /* |
3632 | * pa is already deleted so we use the value obtained | 3870 | * pa is already deleted so we use the value obtained |
3633 | * from the bitmap and continue. | 3871 | * from the bitmap and continue. |
@@ -3650,6 +3888,8 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, | |||
3650 | if (ac) | 3888 | if (ac) |
3651 | ac->ac_op = EXT4_MB_HISTORY_DISCARD; | 3889 | ac->ac_op = EXT4_MB_HISTORY_DISCARD; |
3652 | 3890 | ||
3891 | trace_mark(ext4_mb_release_group_pa, "dev %s pstart %llu len %d", | ||
3892 | sb->s_id, pa->pa_pstart, pa->pa_len); | ||
3653 | BUG_ON(pa->pa_deleted == 0); | 3893 | BUG_ON(pa->pa_deleted == 0); |
3654 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); | 3894 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); |
3655 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3895 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
@@ -3692,7 +3932,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3692 | int busy = 0; | 3932 | int busy = 0; |
3693 | int free = 0; | 3933 | int free = 0; |
3694 | 3934 | ||
3695 | mb_debug("discard preallocation for group %lu\n", group); | 3935 | mb_debug("discard preallocation for group %u\n", group); |
3696 | 3936 | ||
3697 | if (list_empty(&grp->bb_prealloc_list)) | 3937 | if (list_empty(&grp->bb_prealloc_list)) |
3698 | return 0; | 3938 | return 0; |
@@ -3700,14 +3940,14 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, | |||
3700 | bitmap_bh = ext4_read_block_bitmap(sb, group); | 3940 | bitmap_bh = ext4_read_block_bitmap(sb, group); |
3701 | if (bitmap_bh == NULL) { | 3941 | if (bitmap_bh == NULL) { |
3702 | ext4_error(sb, __func__, "Error in reading block " | 3942 | ext4_error(sb, __func__, "Error in reading block " |
3703 | "bitmap for %lu\n", group); | 3943 | "bitmap for %u", group); |
3704 | return 0; | 3944 | return 0; |
3705 | } | 3945 | } |
3706 | 3946 | ||
3707 | err = ext4_mb_load_buddy(sb, group, &e4b); | 3947 | err = ext4_mb_load_buddy(sb, group, &e4b); |
3708 | if (err) { | 3948 | if (err) { |
3709 | ext4_error(sb, __func__, "Error in loading buddy " | 3949 | ext4_error(sb, __func__, "Error in loading buddy " |
3710 | "information for %lu\n", group); | 3950 | "information for %u", group); |
3711 | put_bh(bitmap_bh); | 3951 | put_bh(bitmap_bh); |
3712 | return 0; | 3952 | return 0; |
3713 | } | 3953 | } |
@@ -3815,6 +4055,8 @@ void ext4_discard_preallocations(struct inode *inode) | |||
3815 | } | 4055 | } |
3816 | 4056 | ||
3817 | mb_debug("discard preallocation for inode %lu\n", inode->i_ino); | 4057 | mb_debug("discard preallocation for inode %lu\n", inode->i_ino); |
4058 | trace_mark(ext4_discard_preallocations, "dev %s ino %lu", sb->s_id, | ||
4059 | inode->i_ino); | ||
3818 | 4060 | ||
3819 | INIT_LIST_HEAD(&list); | 4061 | INIT_LIST_HEAD(&list); |
3820 | 4062 | ||
@@ -3874,14 +4116,14 @@ repeat: | |||
3874 | err = ext4_mb_load_buddy(sb, group, &e4b); | 4116 | err = ext4_mb_load_buddy(sb, group, &e4b); |
3875 | if (err) { | 4117 | if (err) { |
3876 | ext4_error(sb, __func__, "Error in loading buddy " | 4118 | ext4_error(sb, __func__, "Error in loading buddy " |
3877 | "information for %lu\n", group); | 4119 | "information for %u", group); |
3878 | continue; | 4120 | continue; |
3879 | } | 4121 | } |
3880 | 4122 | ||
3881 | bitmap_bh = ext4_read_block_bitmap(sb, group); | 4123 | bitmap_bh = ext4_read_block_bitmap(sb, group); |
3882 | if (bitmap_bh == NULL) { | 4124 | if (bitmap_bh == NULL) { |
3883 | ext4_error(sb, __func__, "Error in reading block " | 4125 | ext4_error(sb, __func__, "Error in reading block " |
3884 | "bitmap for %lu\n", group); | 4126 | "bitmap for %u", group); |
3885 | ext4_mb_release_desc(&e4b); | 4127 | ext4_mb_release_desc(&e4b); |
3886 | continue; | 4128 | continue; |
3887 | } | 4129 | } |
@@ -4024,8 +4266,8 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, | |||
4024 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4266 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
4025 | struct ext4_super_block *es = sbi->s_es; | 4267 | struct ext4_super_block *es = sbi->s_es; |
4026 | ext4_group_t group; | 4268 | ext4_group_t group; |
4027 | unsigned long len; | 4269 | unsigned int len; |
4028 | unsigned long goal; | 4270 | ext4_fsblk_t goal; |
4029 | ext4_grpblk_t block; | 4271 | ext4_grpblk_t block; |
4030 | 4272 | ||
4031 | /* we can't allocate > group size */ | 4273 | /* we can't allocate > group size */ |
@@ -4068,6 +4310,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, | |||
4068 | ac->ac_pa = NULL; | 4310 | ac->ac_pa = NULL; |
4069 | ac->ac_bitmap_page = NULL; | 4311 | ac->ac_bitmap_page = NULL; |
4070 | ac->ac_buddy_page = NULL; | 4312 | ac->ac_buddy_page = NULL; |
4313 | ac->alloc_semp = NULL; | ||
4071 | ac->ac_lg = NULL; | 4314 | ac->ac_lg = NULL; |
4072 | 4315 | ||
4073 | /* we have to define context: we'll we work with a file or | 4316 | /* we have to define context: we'll we work with a file or |
@@ -4146,7 +4389,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, | |||
4146 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); | 4389 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); |
4147 | if (ext4_mb_load_buddy(sb, group, &e4b)) { | 4390 | if (ext4_mb_load_buddy(sb, group, &e4b)) { |
4148 | ext4_error(sb, __func__, "Error in loading buddy " | 4391 | ext4_error(sb, __func__, "Error in loading buddy " |
4149 | "information for %lu\n", group); | 4392 | "information for %u", group); |
4150 | continue; | 4393 | continue; |
4151 | } | 4394 | } |
4152 | ext4_lock_group(sb, group); | 4395 | ext4_lock_group(sb, group); |
@@ -4248,6 +4491,8 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) | |||
4248 | } | 4491 | } |
4249 | ext4_mb_put_pa(ac, ac->ac_sb, pa); | 4492 | ext4_mb_put_pa(ac, ac->ac_sb, pa); |
4250 | } | 4493 | } |
4494 | if (ac->alloc_semp) | ||
4495 | up_read(ac->alloc_semp); | ||
4251 | if (ac->ac_bitmap_page) | 4496 | if (ac->ac_bitmap_page) |
4252 | page_cache_release(ac->ac_bitmap_page); | 4497 | page_cache_release(ac->ac_bitmap_page); |
4253 | if (ac->ac_buddy_page) | 4498 | if (ac->ac_buddy_page) |
@@ -4264,6 +4509,8 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) | |||
4264 | int ret; | 4509 | int ret; |
4265 | int freed = 0; | 4510 | int freed = 0; |
4266 | 4511 | ||
4512 | trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", | ||
4513 | sb->s_id, needed); | ||
4267 | for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) { | 4514 | for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) { |
4268 | ret = ext4_mb_discard_group_preallocations(sb, i, needed); | 4515 | ret = ext4_mb_discard_group_preallocations(sb, i, needed); |
4269 | freed += ret; | 4516 | freed += ret; |
@@ -4286,12 +4533,24 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4286 | struct ext4_sb_info *sbi; | 4533 | struct ext4_sb_info *sbi; |
4287 | struct super_block *sb; | 4534 | struct super_block *sb; |
4288 | ext4_fsblk_t block = 0; | 4535 | ext4_fsblk_t block = 0; |
4289 | unsigned long inquota; | 4536 | unsigned int inquota; |
4290 | unsigned long reserv_blks = 0; | 4537 | unsigned int reserv_blks = 0; |
4291 | 4538 | ||
4292 | sb = ar->inode->i_sb; | 4539 | sb = ar->inode->i_sb; |
4293 | sbi = EXT4_SB(sb); | 4540 | sbi = EXT4_SB(sb); |
4294 | 4541 | ||
4542 | trace_mark(ext4_request_blocks, "dev %s flags %u len %u ino %lu " | ||
4543 | "lblk %llu goal %llu lleft %llu lright %llu " | ||
4544 | "pleft %llu pright %llu ", | ||
4545 | sb->s_id, ar->flags, ar->len, | ||
4546 | ar->inode ? ar->inode->i_ino : 0, | ||
4547 | (unsigned long long) ar->logical, | ||
4548 | (unsigned long long) ar->goal, | ||
4549 | (unsigned long long) ar->lleft, | ||
4550 | (unsigned long long) ar->lright, | ||
4551 | (unsigned long long) ar->pleft, | ||
4552 | (unsigned long long) ar->pright); | ||
4553 | |||
4295 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { | 4554 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { |
4296 | /* | 4555 | /* |
4297 | * With delalloc we already reserved the blocks | 4556 | * With delalloc we already reserved the blocks |
@@ -4313,7 +4572,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4313 | } | 4572 | } |
4314 | if (ar->len == 0) { | 4573 | if (ar->len == 0) { |
4315 | *errp = -EDQUOT; | 4574 | *errp = -EDQUOT; |
4316 | return 0; | 4575 | goto out3; |
4317 | } | 4576 | } |
4318 | inquota = ar->len; | 4577 | inquota = ar->len; |
4319 | 4578 | ||
@@ -4348,10 +4607,14 @@ repeat: | |||
4348 | ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len) | 4607 | ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len) |
4349 | ext4_mb_new_preallocation(ac); | 4608 | ext4_mb_new_preallocation(ac); |
4350 | } | 4609 | } |
4351 | |||
4352 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { | 4610 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { |
4353 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); | 4611 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); |
4354 | if (*errp == -EAGAIN) { | 4612 | if (*errp == -EAGAIN) { |
4613 | /* | ||
4614 | * drop the reference that we took | ||
4615 | * in ext4_mb_use_best_found | ||
4616 | */ | ||
4617 | ext4_mb_release_context(ac); | ||
4355 | ac->ac_b_ex.fe_group = 0; | 4618 | ac->ac_b_ex.fe_group = 0; |
4356 | ac->ac_b_ex.fe_start = 0; | 4619 | ac->ac_b_ex.fe_start = 0; |
4357 | ac->ac_b_ex.fe_len = 0; | 4620 | ac->ac_b_ex.fe_len = 0; |
@@ -4382,6 +4645,26 @@ out2: | |||
4382 | out1: | 4645 | out1: |
4383 | if (ar->len < inquota) | 4646 | if (ar->len < inquota) |
4384 | DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len); | 4647 | DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len); |
4648 | out3: | ||
4649 | if (!ar->len) { | ||
4650 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) | ||
4651 | /* release all the reserved blocks if non delalloc */ | ||
4652 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | ||
4653 | reserv_blks); | ||
4654 | } | ||
4655 | |||
4656 | trace_mark(ext4_allocate_blocks, | ||
4657 | "dev %s block %llu flags %u len %u ino %lu " | ||
4658 | "logical %llu goal %llu lleft %llu lright %llu " | ||
4659 | "pleft %llu pright %llu ", | ||
4660 | sb->s_id, (unsigned long long) block, | ||
4661 | ar->flags, ar->len, ar->inode ? ar->inode->i_ino : 0, | ||
4662 | (unsigned long long) ar->logical, | ||
4663 | (unsigned long long) ar->goal, | ||
4664 | (unsigned long long) ar->lleft, | ||
4665 | (unsigned long long) ar->lright, | ||
4666 | (unsigned long long) ar->pleft, | ||
4667 | (unsigned long long) ar->pright); | ||
4385 | 4668 | ||
4386 | return block; | 4669 | return block; |
4387 | } | 4670 | } |
@@ -4403,27 +4686,23 @@ static int can_merge(struct ext4_free_data *entry1, | |||
4403 | 4686 | ||
4404 | static noinline_for_stack int | 4687 | static noinline_for_stack int |
4405 | ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | 4688 | ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, |
4406 | ext4_group_t group, ext4_grpblk_t block, int count) | 4689 | struct ext4_free_data *new_entry) |
4407 | { | 4690 | { |
4691 | ext4_grpblk_t block; | ||
4692 | struct ext4_free_data *entry; | ||
4408 | struct ext4_group_info *db = e4b->bd_info; | 4693 | struct ext4_group_info *db = e4b->bd_info; |
4409 | struct super_block *sb = e4b->bd_sb; | 4694 | struct super_block *sb = e4b->bd_sb; |
4410 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4695 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
4411 | struct ext4_free_data *entry, *new_entry; | ||
4412 | struct rb_node **n = &db->bb_free_root.rb_node, *node; | 4696 | struct rb_node **n = &db->bb_free_root.rb_node, *node; |
4413 | struct rb_node *parent = NULL, *new_node; | 4697 | struct rb_node *parent = NULL, *new_node; |
4414 | 4698 | ||
4415 | 4699 | BUG_ON(!ext4_handle_valid(handle)); | |
4416 | BUG_ON(e4b->bd_bitmap_page == NULL); | 4700 | BUG_ON(e4b->bd_bitmap_page == NULL); |
4417 | BUG_ON(e4b->bd_buddy_page == NULL); | 4701 | BUG_ON(e4b->bd_buddy_page == NULL); |
4418 | 4702 | ||
4419 | new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); | ||
4420 | new_entry->start_blk = block; | ||
4421 | new_entry->group = group; | ||
4422 | new_entry->count = count; | ||
4423 | new_entry->t_tid = handle->h_transaction->t_tid; | ||
4424 | new_node = &new_entry->node; | 4703 | new_node = &new_entry->node; |
4704 | block = new_entry->start_blk; | ||
4425 | 4705 | ||
4426 | ext4_lock_group(sb, group); | ||
4427 | if (!*n) { | 4706 | if (!*n) { |
4428 | /* first free block exent. We need to | 4707 | /* first free block exent. We need to |
4429 | protect buddy cache from being freed, | 4708 | protect buddy cache from being freed, |
@@ -4441,10 +4720,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4441 | else if (block >= (entry->start_blk + entry->count)) | 4720 | else if (block >= (entry->start_blk + entry->count)) |
4442 | n = &(*n)->rb_right; | 4721 | n = &(*n)->rb_right; |
4443 | else { | 4722 | else { |
4444 | ext4_unlock_group(sb, group); | 4723 | ext4_grp_locked_error(sb, e4b->bd_group, __func__, |
4445 | ext4_error(sb, __func__, | 4724 | "Double free of blocks %d (%d %d)", |
4446 | "Double free of blocks %d (%d %d)\n", | 4725 | block, entry->start_blk, entry->count); |
4447 | block, entry->start_blk, entry->count); | ||
4448 | return 0; | 4726 | return 0; |
4449 | } | 4727 | } |
4450 | } | 4728 | } |
@@ -4483,7 +4761,6 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4483 | spin_lock(&sbi->s_md_lock); | 4761 | spin_lock(&sbi->s_md_lock); |
4484 | list_add(&new_entry->list, &handle->h_transaction->t_private_list); | 4762 | list_add(&new_entry->list, &handle->h_transaction->t_private_list); |
4485 | spin_unlock(&sbi->s_md_lock); | 4763 | spin_unlock(&sbi->s_md_lock); |
4486 | ext4_unlock_group(sb, group); | ||
4487 | return 0; | 4764 | return 0; |
4488 | } | 4765 | } |
4489 | 4766 | ||
@@ -4499,7 +4776,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, | |||
4499 | struct ext4_allocation_context *ac = NULL; | 4776 | struct ext4_allocation_context *ac = NULL; |
4500 | struct ext4_group_desc *gdp; | 4777 | struct ext4_group_desc *gdp; |
4501 | struct ext4_super_block *es; | 4778 | struct ext4_super_block *es; |
4502 | unsigned long overflow; | 4779 | unsigned int overflow; |
4503 | ext4_grpblk_t bit; | 4780 | ext4_grpblk_t bit; |
4504 | struct buffer_head *gd_bh; | 4781 | struct buffer_head *gd_bh; |
4505 | ext4_group_t block_group; | 4782 | ext4_group_t block_group; |
@@ -4522,6 +4799,10 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, | |||
4522 | } | 4799 | } |
4523 | 4800 | ||
4524 | ext4_debug("freeing block %lu\n", block); | 4801 | ext4_debug("freeing block %lu\n", block); |
4802 | trace_mark(ext4_free_blocks, | ||
4803 | "dev %s block %llu count %lu metadata %d ino %lu", | ||
4804 | sb->s_id, (unsigned long long) block, count, metadata, | ||
4805 | inode ? inode->i_ino : 0); | ||
4525 | 4806 | ||
4526 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | 4807 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); |
4527 | if (ac) { | 4808 | if (ac) { |
@@ -4581,11 +4862,6 @@ do_more: | |||
4581 | err = ext4_journal_get_write_access(handle, gd_bh); | 4862 | err = ext4_journal_get_write_access(handle, gd_bh); |
4582 | if (err) | 4863 | if (err) |
4583 | goto error_return; | 4864 | goto error_return; |
4584 | |||
4585 | err = ext4_mb_load_buddy(sb, block_group, &e4b); | ||
4586 | if (err) | ||
4587 | goto error_return; | ||
4588 | |||
4589 | #ifdef AGGRESSIVE_CHECK | 4865 | #ifdef AGGRESSIVE_CHECK |
4590 | { | 4866 | { |
4591 | int i; | 4867 | int i; |
@@ -4593,13 +4869,6 @@ do_more: | |||
4593 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); | 4869 | BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); |
4594 | } | 4870 | } |
4595 | #endif | 4871 | #endif |
4596 | mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, | ||
4597 | bit, count); | ||
4598 | |||
4599 | /* We dirtied the bitmap block */ | ||
4600 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | ||
4601 | err = ext4_journal_dirty_metadata(handle, bitmap_bh); | ||
4602 | |||
4603 | if (ac) { | 4872 | if (ac) { |
4604 | ac->ac_b_ex.fe_group = block_group; | 4873 | ac->ac_b_ex.fe_group = block_group; |
4605 | ac->ac_b_ex.fe_start = bit; | 4874 | ac->ac_b_ex.fe_start = bit; |
@@ -4607,19 +4876,41 @@ do_more: | |||
4607 | ext4_mb_store_history(ac); | 4876 | ext4_mb_store_history(ac); |
4608 | } | 4877 | } |
4609 | 4878 | ||
4610 | if (metadata) { | 4879 | err = ext4_mb_load_buddy(sb, block_group, &e4b); |
4611 | /* blocks being freed are metadata. these blocks shouldn't | 4880 | if (err) |
4612 | * be used until this transaction is committed */ | 4881 | goto error_return; |
4613 | ext4_mb_free_metadata(handle, &e4b, block_group, bit, count); | 4882 | if (metadata && ext4_handle_valid(handle)) { |
4883 | struct ext4_free_data *new_entry; | ||
4884 | /* | ||
4885 | * blocks being freed are metadata. these blocks shouldn't | ||
4886 | * be used until this transaction is committed | ||
4887 | */ | ||
4888 | new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); | ||
4889 | new_entry->start_blk = bit; | ||
4890 | new_entry->group = block_group; | ||
4891 | new_entry->count = count; | ||
4892 | new_entry->t_tid = handle->h_transaction->t_tid; | ||
4893 | ext4_lock_group(sb, block_group); | ||
4894 | mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, | ||
4895 | bit, count); | ||
4896 | ext4_mb_free_metadata(handle, &e4b, new_entry); | ||
4897 | ext4_unlock_group(sb, block_group); | ||
4614 | } else { | 4898 | } else { |
4615 | ext4_lock_group(sb, block_group); | 4899 | ext4_lock_group(sb, block_group); |
4900 | /* need to update group_info->bb_free and bitmap | ||
4901 | * with group lock held. generate_buddy look at | ||
4902 | * them with group lock_held | ||
4903 | */ | ||
4904 | mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, | ||
4905 | bit, count); | ||
4616 | mb_free_blocks(inode, &e4b, bit, count); | 4906 | mb_free_blocks(inode, &e4b, bit, count); |
4617 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); | 4907 | ext4_mb_return_to_preallocation(inode, &e4b, block, count); |
4618 | ext4_unlock_group(sb, block_group); | 4908 | ext4_unlock_group(sb, block_group); |
4619 | } | 4909 | } |
4620 | 4910 | ||
4621 | spin_lock(sb_bgl_lock(sbi, block_group)); | 4911 | spin_lock(sb_bgl_lock(sbi, block_group)); |
4622 | le16_add_cpu(&gdp->bg_free_blocks_count, count); | 4912 | ret = ext4_free_blks_count(sb, gdp) + count; |
4913 | ext4_free_blks_set(sb, gdp, ret); | ||
4623 | gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); | 4914 | gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); |
4624 | spin_unlock(sb_bgl_lock(sbi, block_group)); | 4915 | spin_unlock(sb_bgl_lock(sbi, block_group)); |
4625 | percpu_counter_add(&sbi->s_freeblocks_counter, count); | 4916 | percpu_counter_add(&sbi->s_freeblocks_counter, count); |
@@ -4635,9 +4926,13 @@ do_more: | |||
4635 | 4926 | ||
4636 | *freed += count; | 4927 | *freed += count; |
4637 | 4928 | ||
4929 | /* We dirtied the bitmap block */ | ||
4930 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | ||
4931 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | ||
4932 | |||
4638 | /* And the group descriptor block */ | 4933 | /* And the group descriptor block */ |
4639 | BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); | 4934 | BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); |
4640 | ret = ext4_journal_dirty_metadata(handle, gd_bh); | 4935 | ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); |
4641 | if (!err) | 4936 | if (!err) |
4642 | err = ret; | 4937 | err = ret; |
4643 | 4938 | ||