diff options
Diffstat (limited to 'fs/ext4/ialloc.c')
-rw-r--r-- | fs/ext4/ialloc.c | 206 |
1 files changed, 35 insertions, 171 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 9c63f273b550..00beb4f9cc4f 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -78,7 +78,7 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb, | |||
78 | * allocation, essentially implementing a per-group read-only flag. */ | 78 | * allocation, essentially implementing a per-group read-only flag. */ |
79 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { | 79 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { |
80 | ext4_error(sb, "Checksum bad for group %u", block_group); | 80 | ext4_error(sb, "Checksum bad for group %u", block_group); |
81 | ext4_free_blks_set(sb, gdp, 0); | 81 | ext4_free_group_clusters_set(sb, gdp, 0); |
82 | ext4_free_inodes_set(sb, gdp, 0); | 82 | ext4_free_inodes_set(sb, gdp, 0); |
83 | ext4_itable_unused_set(sb, gdp, 0); | 83 | ext4_itable_unused_set(sb, gdp, 0); |
84 | memset(bh->b_data, 0xff, sb->s_blocksize); | 84 | memset(bh->b_data, 0xff, sb->s_blocksize); |
@@ -293,121 +293,9 @@ error_return: | |||
293 | ext4_std_error(sb, fatal); | 293 | ext4_std_error(sb, fatal); |
294 | } | 294 | } |
295 | 295 | ||
296 | /* | ||
297 | * There are two policies for allocating an inode. If the new inode is | ||
298 | * a directory, then a forward search is made for a block group with both | ||
299 | * free space and a low directory-to-inode ratio; if that fails, then of | ||
300 | * the groups with above-average free space, that group with the fewest | ||
301 | * directories already is chosen. | ||
302 | * | ||
303 | * For other inodes, search forward from the parent directory\'s block | ||
304 | * group to find a free inode. | ||
305 | */ | ||
306 | static int find_group_dir(struct super_block *sb, struct inode *parent, | ||
307 | ext4_group_t *best_group) | ||
308 | { | ||
309 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
310 | unsigned int freei, avefreei; | ||
311 | struct ext4_group_desc *desc, *best_desc = NULL; | ||
312 | ext4_group_t group; | ||
313 | int ret = -1; | ||
314 | |||
315 | freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); | ||
316 | avefreei = freei / ngroups; | ||
317 | |||
318 | for (group = 0; group < ngroups; group++) { | ||
319 | desc = ext4_get_group_desc(sb, group, NULL); | ||
320 | if (!desc || !ext4_free_inodes_count(sb, desc)) | ||
321 | continue; | ||
322 | if (ext4_free_inodes_count(sb, desc) < avefreei) | ||
323 | continue; | ||
324 | if (!best_desc || | ||
325 | (ext4_free_blks_count(sb, desc) > | ||
326 | ext4_free_blks_count(sb, best_desc))) { | ||
327 | *best_group = group; | ||
328 | best_desc = desc; | ||
329 | ret = 0; | ||
330 | } | ||
331 | } | ||
332 | return ret; | ||
333 | } | ||
334 | |||
335 | #define free_block_ratio 10 | ||
336 | |||
337 | static int find_group_flex(struct super_block *sb, struct inode *parent, | ||
338 | ext4_group_t *best_group) | ||
339 | { | ||
340 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
341 | struct ext4_group_desc *desc; | ||
342 | struct flex_groups *flex_group = sbi->s_flex_groups; | ||
343 | ext4_group_t parent_group = EXT4_I(parent)->i_block_group; | ||
344 | ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); | ||
345 | ext4_group_t ngroups = ext4_get_groups_count(sb); | ||
346 | int flex_size = ext4_flex_bg_size(sbi); | ||
347 | ext4_group_t best_flex = parent_fbg_group; | ||
348 | int blocks_per_flex = sbi->s_blocks_per_group * flex_size; | ||
349 | int flexbg_free_blocks; | ||
350 | int flex_freeb_ratio; | ||
351 | ext4_group_t n_fbg_groups; | ||
352 | ext4_group_t i; | ||
353 | |||
354 | n_fbg_groups = (ngroups + flex_size - 1) >> | ||
355 | sbi->s_log_groups_per_flex; | ||
356 | |||
357 | find_close_to_parent: | ||
358 | flexbg_free_blocks = atomic_read(&flex_group[best_flex].free_blocks); | ||
359 | flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; | ||
360 | if (atomic_read(&flex_group[best_flex].free_inodes) && | ||
361 | flex_freeb_ratio > free_block_ratio) | ||
362 | goto found_flexbg; | ||
363 | |||
364 | if (best_flex && best_flex == parent_fbg_group) { | ||
365 | best_flex--; | ||
366 | goto find_close_to_parent; | ||
367 | } | ||
368 | |||
369 | for (i = 0; i < n_fbg_groups; i++) { | ||
370 | if (i == parent_fbg_group || i == parent_fbg_group - 1) | ||
371 | continue; | ||
372 | |||
373 | flexbg_free_blocks = atomic_read(&flex_group[i].free_blocks); | ||
374 | flex_freeb_ratio = flexbg_free_blocks * 100 / blocks_per_flex; | ||
375 | |||
376 | if (flex_freeb_ratio > free_block_ratio && | ||
377 | (atomic_read(&flex_group[i].free_inodes))) { | ||
378 | best_flex = i; | ||
379 | goto found_flexbg; | ||
380 | } | ||
381 | |||
382 | if ((atomic_read(&flex_group[best_flex].free_inodes) == 0) || | ||
383 | ((atomic_read(&flex_group[i].free_blocks) > | ||
384 | atomic_read(&flex_group[best_flex].free_blocks)) && | ||
385 | atomic_read(&flex_group[i].free_inodes))) | ||
386 | best_flex = i; | ||
387 | } | ||
388 | |||
389 | if (!atomic_read(&flex_group[best_flex].free_inodes) || | ||
390 | !atomic_read(&flex_group[best_flex].free_blocks)) | ||
391 | return -1; | ||
392 | |||
393 | found_flexbg: | ||
394 | for (i = best_flex * flex_size; i < ngroups && | ||
395 | i < (best_flex + 1) * flex_size; i++) { | ||
396 | desc = ext4_get_group_desc(sb, i, NULL); | ||
397 | if (ext4_free_inodes_count(sb, desc)) { | ||
398 | *best_group = i; | ||
399 | goto out; | ||
400 | } | ||
401 | } | ||
402 | |||
403 | return -1; | ||
404 | out: | ||
405 | return 0; | ||
406 | } | ||
407 | |||
408 | struct orlov_stats { | 296 | struct orlov_stats { |
409 | __u32 free_inodes; | 297 | __u32 free_inodes; |
410 | __u32 free_blocks; | 298 | __u32 free_clusters; |
411 | __u32 used_dirs; | 299 | __u32 used_dirs; |
412 | }; | 300 | }; |
413 | 301 | ||
@@ -424,7 +312,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, | |||
424 | 312 | ||
425 | if (flex_size > 1) { | 313 | if (flex_size > 1) { |
426 | stats->free_inodes = atomic_read(&flex_group[g].free_inodes); | 314 | stats->free_inodes = atomic_read(&flex_group[g].free_inodes); |
427 | stats->free_blocks = atomic_read(&flex_group[g].free_blocks); | 315 | stats->free_clusters = atomic_read(&flex_group[g].free_clusters); |
428 | stats->used_dirs = atomic_read(&flex_group[g].used_dirs); | 316 | stats->used_dirs = atomic_read(&flex_group[g].used_dirs); |
429 | return; | 317 | return; |
430 | } | 318 | } |
@@ -432,11 +320,11 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, | |||
432 | desc = ext4_get_group_desc(sb, g, NULL); | 320 | desc = ext4_get_group_desc(sb, g, NULL); |
433 | if (desc) { | 321 | if (desc) { |
434 | stats->free_inodes = ext4_free_inodes_count(sb, desc); | 322 | stats->free_inodes = ext4_free_inodes_count(sb, desc); |
435 | stats->free_blocks = ext4_free_blks_count(sb, desc); | 323 | stats->free_clusters = ext4_free_group_clusters(sb, desc); |
436 | stats->used_dirs = ext4_used_dirs_count(sb, desc); | 324 | stats->used_dirs = ext4_used_dirs_count(sb, desc); |
437 | } else { | 325 | } else { |
438 | stats->free_inodes = 0; | 326 | stats->free_inodes = 0; |
439 | stats->free_blocks = 0; | 327 | stats->free_clusters = 0; |
440 | stats->used_dirs = 0; | 328 | stats->used_dirs = 0; |
441 | } | 329 | } |
442 | } | 330 | } |
@@ -471,10 +359,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
471 | ext4_group_t real_ngroups = ext4_get_groups_count(sb); | 359 | ext4_group_t real_ngroups = ext4_get_groups_count(sb); |
472 | int inodes_per_group = EXT4_INODES_PER_GROUP(sb); | 360 | int inodes_per_group = EXT4_INODES_PER_GROUP(sb); |
473 | unsigned int freei, avefreei; | 361 | unsigned int freei, avefreei; |
474 | ext4_fsblk_t freeb, avefreeb; | 362 | ext4_fsblk_t freeb, avefreec; |
475 | unsigned int ndirs; | 363 | unsigned int ndirs; |
476 | int max_dirs, min_inodes; | 364 | int max_dirs, min_inodes; |
477 | ext4_grpblk_t min_blocks; | 365 | ext4_grpblk_t min_clusters; |
478 | ext4_group_t i, grp, g, ngroups; | 366 | ext4_group_t i, grp, g, ngroups; |
479 | struct ext4_group_desc *desc; | 367 | struct ext4_group_desc *desc; |
480 | struct orlov_stats stats; | 368 | struct orlov_stats stats; |
@@ -490,9 +378,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
490 | 378 | ||
491 | freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); | 379 | freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); |
492 | avefreei = freei / ngroups; | 380 | avefreei = freei / ngroups; |
493 | freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | 381 | freeb = EXT4_C2B(sbi, |
494 | avefreeb = freeb; | 382 | percpu_counter_read_positive(&sbi->s_freeclusters_counter)); |
495 | do_div(avefreeb, ngroups); | 383 | avefreec = freeb; |
384 | do_div(avefreec, ngroups); | ||
496 | ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); | 385 | ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); |
497 | 386 | ||
498 | if (S_ISDIR(mode) && | 387 | if (S_ISDIR(mode) && |
@@ -518,7 +407,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
518 | continue; | 407 | continue; |
519 | if (stats.free_inodes < avefreei) | 408 | if (stats.free_inodes < avefreei) |
520 | continue; | 409 | continue; |
521 | if (stats.free_blocks < avefreeb) | 410 | if (stats.free_clusters < avefreec) |
522 | continue; | 411 | continue; |
523 | grp = g; | 412 | grp = g; |
524 | ret = 0; | 413 | ret = 0; |
@@ -556,7 +445,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
556 | min_inodes = avefreei - inodes_per_group*flex_size / 4; | 445 | min_inodes = avefreei - inodes_per_group*flex_size / 4; |
557 | if (min_inodes < 1) | 446 | if (min_inodes < 1) |
558 | min_inodes = 1; | 447 | min_inodes = 1; |
559 | min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb)*flex_size / 4; | 448 | min_clusters = avefreec - EXT4_CLUSTERS_PER_GROUP(sb)*flex_size / 4; |
560 | 449 | ||
561 | /* | 450 | /* |
562 | * Start looking in the flex group where we last allocated an | 451 | * Start looking in the flex group where we last allocated an |
@@ -575,7 +464,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
575 | continue; | 464 | continue; |
576 | if (stats.free_inodes < min_inodes) | 465 | if (stats.free_inodes < min_inodes) |
577 | continue; | 466 | continue; |
578 | if (stats.free_blocks < min_blocks) | 467 | if (stats.free_clusters < min_clusters) |
579 | continue; | 468 | continue; |
580 | goto found_flex_bg; | 469 | goto found_flex_bg; |
581 | } | 470 | } |
@@ -659,7 +548,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
659 | *group = parent_group; | 548 | *group = parent_group; |
660 | desc = ext4_get_group_desc(sb, *group, NULL); | 549 | desc = ext4_get_group_desc(sb, *group, NULL); |
661 | if (desc && ext4_free_inodes_count(sb, desc) && | 550 | if (desc && ext4_free_inodes_count(sb, desc) && |
662 | ext4_free_blks_count(sb, desc)) | 551 | ext4_free_group_clusters(sb, desc)) |
663 | return 0; | 552 | return 0; |
664 | 553 | ||
665 | /* | 554 | /* |
@@ -683,7 +572,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
683 | *group -= ngroups; | 572 | *group -= ngroups; |
684 | desc = ext4_get_group_desc(sb, *group, NULL); | 573 | desc = ext4_get_group_desc(sb, *group, NULL); |
685 | if (desc && ext4_free_inodes_count(sb, desc) && | 574 | if (desc && ext4_free_inodes_count(sb, desc) && |
686 | ext4_free_blks_count(sb, desc)) | 575 | ext4_free_group_clusters(sb, desc)) |
687 | return 0; | 576 | return 0; |
688 | } | 577 | } |
689 | 578 | ||
@@ -802,7 +691,7 @@ err_ret: | |||
802 | * group to find a free inode. | 691 | * group to find a free inode. |
803 | */ | 692 | */ |
804 | struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, | 693 | struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, |
805 | const struct qstr *qstr, __u32 goal) | 694 | const struct qstr *qstr, __u32 goal, uid_t *owner) |
806 | { | 695 | { |
807 | struct super_block *sb; | 696 | struct super_block *sb; |
808 | struct buffer_head *inode_bitmap_bh = NULL; | 697 | struct buffer_head *inode_bitmap_bh = NULL; |
@@ -816,8 +705,6 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, | |||
816 | int ret2, err = 0; | 705 | int ret2, err = 0; |
817 | struct inode *ret; | 706 | struct inode *ret; |
818 | ext4_group_t i; | 707 | ext4_group_t i; |
819 | int free = 0; | ||
820 | static int once = 1; | ||
821 | ext4_group_t flex_group; | 708 | ext4_group_t flex_group; |
822 | 709 | ||
823 | /* Cannot create files in a deleted directory */ | 710 | /* Cannot create files in a deleted directory */ |
@@ -843,26 +730,9 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, | |||
843 | goto got_group; | 730 | goto got_group; |
844 | } | 731 | } |
845 | 732 | ||
846 | if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { | 733 | if (S_ISDIR(mode)) |
847 | ret2 = find_group_flex(sb, dir, &group); | 734 | ret2 = find_group_orlov(sb, dir, &group, mode, qstr); |
848 | if (ret2 == -1) { | 735 | else |
849 | ret2 = find_group_other(sb, dir, &group, mode); | ||
850 | if (ret2 == 0 && once) { | ||
851 | once = 0; | ||
852 | printk(KERN_NOTICE "ext4: find_group_flex " | ||
853 | "failed, fallback succeeded dir %lu\n", | ||
854 | dir->i_ino); | ||
855 | } | ||
856 | } | ||
857 | goto got_group; | ||
858 | } | ||
859 | |||
860 | if (S_ISDIR(mode)) { | ||
861 | if (test_opt(sb, OLDALLOC)) | ||
862 | ret2 = find_group_dir(sb, dir, &group); | ||
863 | else | ||
864 | ret2 = find_group_orlov(sb, dir, &group, mode, qstr); | ||
865 | } else | ||
866 | ret2 = find_group_other(sb, dir, &group, mode); | 736 | ret2 = find_group_other(sb, dir, &group, mode); |
867 | 737 | ||
868 | got_group: | 738 | got_group: |
@@ -950,26 +820,21 @@ got: | |||
950 | goto fail; | 820 | goto fail; |
951 | } | 821 | } |
952 | 822 | ||
953 | free = 0; | 823 | BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap"); |
954 | ext4_lock_group(sb, group); | 824 | err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh); |
825 | brelse(block_bitmap_bh); | ||
826 | |||
955 | /* recheck and clear flag under lock if we still need to */ | 827 | /* recheck and clear flag under lock if we still need to */ |
828 | ext4_lock_group(sb, group); | ||
956 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 829 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
957 | free = ext4_free_blocks_after_init(sb, group, gdp); | ||
958 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | 830 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); |
959 | ext4_free_blks_set(sb, gdp, free); | 831 | ext4_free_group_clusters_set(sb, gdp, |
832 | ext4_free_clusters_after_init(sb, group, gdp)); | ||
960 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, | 833 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, |
961 | gdp); | 834 | gdp); |
962 | } | 835 | } |
963 | ext4_unlock_group(sb, group); | 836 | ext4_unlock_group(sb, group); |
964 | 837 | ||
965 | /* Don't need to dirty bitmap block if we didn't change it */ | ||
966 | if (free) { | ||
967 | BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap"); | ||
968 | err = ext4_handle_dirty_metadata(handle, | ||
969 | NULL, block_bitmap_bh); | ||
970 | } | ||
971 | |||
972 | brelse(block_bitmap_bh); | ||
973 | if (err) | 838 | if (err) |
974 | goto fail; | 839 | goto fail; |
975 | } | 840 | } |
@@ -987,8 +852,11 @@ got: | |||
987 | flex_group = ext4_flex_group(sbi, group); | 852 | flex_group = ext4_flex_group(sbi, group); |
988 | atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); | 853 | atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); |
989 | } | 854 | } |
990 | 855 | if (owner) { | |
991 | if (test_opt(sb, GRPID)) { | 856 | inode->i_mode = mode; |
857 | inode->i_uid = owner[0]; | ||
858 | inode->i_gid = owner[1]; | ||
859 | } else if (test_opt(sb, GRPID)) { | ||
992 | inode->i_mode = mode; | 860 | inode->i_mode = mode; |
993 | inode->i_uid = current_fsuid(); | 861 | inode->i_uid = current_fsuid(); |
994 | inode->i_gid = dir->i_gid; | 862 | inode->i_gid = dir->i_gid; |
@@ -1005,11 +873,7 @@ got: | |||
1005 | ei->i_dir_start_lookup = 0; | 873 | ei->i_dir_start_lookup = 0; |
1006 | ei->i_disksize = 0; | 874 | ei->i_disksize = 0; |
1007 | 875 | ||
1008 | /* | 876 | /* Don't inherit extent flag from directory, amongst others. */ |
1009 | * Don't inherit extent flag from directory, amongst others. We set | ||
1010 | * extent flag on newly created directory and file only if -o extent | ||
1011 | * mount option is specified | ||
1012 | */ | ||
1013 | ei->i_flags = | 877 | ei->i_flags = |
1014 | ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED); | 878 | ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED); |
1015 | ei->i_file_acl = 0; | 879 | ei->i_file_acl = 0; |
@@ -1084,7 +948,7 @@ fail_free_drop: | |||
1084 | fail_drop: | 948 | fail_drop: |
1085 | dquot_drop(inode); | 949 | dquot_drop(inode); |
1086 | inode->i_flags |= S_NOQUOTA; | 950 | inode->i_flags |= S_NOQUOTA; |
1087 | inode->i_nlink = 0; | 951 | clear_nlink(inode); |
1088 | unlock_new_inode(inode); | 952 | unlock_new_inode(inode); |
1089 | iput(inode); | 953 | iput(inode); |
1090 | brelse(inode_bitmap_bh); | 954 | brelse(inode_bitmap_bh); |
@@ -1235,7 +1099,7 @@ unsigned long ext4_count_dirs(struct super_block * sb) | |||
1235 | * inode allocation from the current group, so we take alloc_sem lock, to | 1099 | * inode allocation from the current group, so we take alloc_sem lock, to |
1236 | * block ext4_claim_inode until we are finished. | 1100 | * block ext4_claim_inode until we are finished. |
1237 | */ | 1101 | */ |
1238 | extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, | 1102 | int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, |
1239 | int barrier) | 1103 | int barrier) |
1240 | { | 1104 | { |
1241 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | 1105 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); |