aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/balloc.c
diff options
context:
space:
mode:
authorLukas Czerner <lczerner@redhat.com>2013-04-09 22:11:22 -0400
committerTheodore Ts'o <tytso@mit.edu>2013-04-09 22:11:22 -0400
commit27dd43854227bb0e6ab70129bd21b60d396db2e7 (patch)
tree14e490b9d0ac63583849c4cad4b3ad0123902c3a /fs/ext4/balloc.c
parentf45a5ef91bef7e02149a216ed6dc3fcdd8b38268 (diff)
ext4: introduce reserved space
Currently in ENOSPC condition when writing into unwritten space, or punching a hole, we might need to split the extent and grow extent tree. However since we can not allocate any new metadata blocks we'll have to zero out unwritten part of extent or punched out part of extent, or in the worst case return ENOSPC even though use actually does not allocate any space. Also in delalloc path we do reserve metadata and data blocks for the time we're going to write out, however metadata block reservation is very tricky especially since we expect that logical connectivity implies physical connectivity, however that might not be the case and hence we might end up allocating more metadata blocks than previously reserved. So in future, metadata reservation checks should be removed since we can not assure that we do not under reserve. And this is where reserved space comes into the picture. When mounting the file system we slice off a little bit of the file system space (2% or 4096 clusters, whichever is smaller) which can be then used for the cases mentioned above to prevent costly zeroout, or unexpected ENOSPC. The number of reserved clusters can be set via sysfs, however it can never be bigger than number of free clusters in the file system. Note that this patch fixes the failure of xfstest 274 as expected. Signed-off-by: Lukas Czerner <lczerner@redhat.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Diffstat (limited to 'fs/ext4/balloc.c')
-rw-r--r--fs/ext4/balloc.c18
1 files changed, 13 insertions, 5 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 9e8d8ffb063f..8dcaea69e37f 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -499,20 +499,22 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
499static int ext4_has_free_clusters(struct ext4_sb_info *sbi, 499static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
500 s64 nclusters, unsigned int flags) 500 s64 nclusters, unsigned int flags)
501{ 501{
502 s64 free_clusters, dirty_clusters, root_clusters; 502 s64 free_clusters, dirty_clusters, rsv, resv_clusters;
503 struct percpu_counter *fcc = &sbi->s_freeclusters_counter; 503 struct percpu_counter *fcc = &sbi->s_freeclusters_counter;
504 struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter; 504 struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter;
505 505
506 free_clusters = percpu_counter_read_positive(fcc); 506 free_clusters = percpu_counter_read_positive(fcc);
507 dirty_clusters = percpu_counter_read_positive(dcc); 507 dirty_clusters = percpu_counter_read_positive(dcc);
508 resv_clusters = atomic64_read(&sbi->s_resv_clusters);
508 509
509 /* 510 /*
510 * r_blocks_count should always be multiple of the cluster ratio so 511 * r_blocks_count should always be multiple of the cluster ratio so
511 * we are safe to do a plane bit shift only. 512 * we are safe to do a plane bit shift only.
512 */ 513 */
513 root_clusters = ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits; 514 rsv = (ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits) +
515 resv_clusters;
514 516
515 if (free_clusters - (nclusters + root_clusters + dirty_clusters) < 517 if (free_clusters - (nclusters + rsv + dirty_clusters) <
516 EXT4_FREECLUSTERS_WATERMARK) { 518 EXT4_FREECLUSTERS_WATERMARK) {
517 free_clusters = percpu_counter_sum_positive(fcc); 519 free_clusters = percpu_counter_sum_positive(fcc);
518 dirty_clusters = percpu_counter_sum_positive(dcc); 520 dirty_clusters = percpu_counter_sum_positive(dcc);
@@ -520,15 +522,21 @@ static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
520 /* Check whether we have space after accounting for current 522 /* Check whether we have space after accounting for current
521 * dirty clusters & root reserved clusters. 523 * dirty clusters & root reserved clusters.
522 */ 524 */
523 if (free_clusters >= ((root_clusters + nclusters) + dirty_clusters)) 525 if (free_clusters >= (rsv + nclusters + dirty_clusters))
524 return 1; 526 return 1;
525 527
526 /* Hm, nope. Are (enough) root reserved clusters available? */ 528 /* Hm, nope. Are (enough) root reserved clusters available? */
527 if (uid_eq(sbi->s_resuid, current_fsuid()) || 529 if (uid_eq(sbi->s_resuid, current_fsuid()) ||
528 (!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && in_group_p(sbi->s_resgid)) || 530 (!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && in_group_p(sbi->s_resgid)) ||
529 capable(CAP_SYS_RESOURCE) || 531 capable(CAP_SYS_RESOURCE) ||
530 (flags & EXT4_MB_USE_ROOT_BLOCKS)) { 532 (flags & EXT4_MB_USE_ROOT_BLOCKS)) {
531 533
534 if (free_clusters >= (nclusters + dirty_clusters +
535 resv_clusters))
536 return 1;
537 }
538 /* No free blocks. Let's see if we can dip into reserved pool */
539 if (flags & EXT4_MB_USE_RESERVED) {
532 if (free_clusters >= (nclusters + dirty_clusters)) 540 if (free_clusters >= (nclusters + dirty_clusters))
533 return 1; 541 return 1;
534 } 542 }