aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJoel Becker <joel.becker@oracle.com>2008-09-03 23:03:39 -0400
committerMark Fasheh <mfasheh@suse.com>2008-10-13 19:57:07 -0400
commit1187c968852e3c668f3b9376083851f81f6eee22 (patch)
tree23a34ab89f724cc015f9e4f6e8bc6ed0fc20ce51 /fs
parent08413899db89d8d636c2a2d4ba5c356ab587d7ef (diff)
ocfs2: Limit inode allocation to 32bits.
ocfs2 inode numbers are block numbers. For any filesystem with less than 2^32 blocks, this is not a problem. However, when ocfs2 starts using JDB2, it will be able to support filesystems with more than 2^32 blocks. This would result in inode numbers higher than 2^32. The problem is that stat(2) can't handle those numbers on 32bit machines. The simple solution is to have ocfs2 allocate all inodes below that boundary. The suballoc code is changed to honor an optional block limit. Only the inode suballocator sets that limit - all other allocations stay unlimited. The biggest trick is to grow the inode suballocator beneath that limit. There's no point in allocating block groups that are above the limit, then rejecting their elements later on. We want to prevent the inode allocator from ever having block groups above the limit. This involves a little gyration with the local alloc code. If the local alloc window is above the limit, it signals the caller to try the global bitmap but does not disable the local alloc file (which can be used for other allocations). [ Minor cleanup - removed an ML_NOTICE comment. --Mark ] Signed-off-by: Joel Becker <joel.becker@oracle.com> Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/ocfs2/localalloc.c55
-rw-r--r--fs/ocfs2/suballoc.c83
-rw-r--r--fs/ocfs2/suballoc.h11
3 files changed, 130 insertions, 19 deletions
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index b889f10d809..02227c39251 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -570,6 +570,46 @@ out:
570 return status; 570 return status;
571} 571}
572 572
573/* Check to see if the local alloc window is within ac->ac_max_block */
574static int ocfs2_local_alloc_in_range(struct inode *inode,
575 struct ocfs2_alloc_context *ac,
576 u32 bits_wanted)
577{
578 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
579 struct ocfs2_dinode *alloc;
580 struct ocfs2_local_alloc *la;
581 int start;
582 u64 block_off;
583
584 if (!ac->ac_max_block)
585 return 1;
586
587 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
588 la = OCFS2_LOCAL_ALLOC(alloc);
589
590 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
591 if (start == -1) {
592 mlog_errno(-ENOSPC);
593 return 0;
594 }
595
596 /*
597 * Converting (bm_off + start + bits_wanted) to blocks gives us
598 * the blkno just past our actual allocation. This is perfect
599 * to compare with ac_max_block.
600 */
601 block_off = ocfs2_clusters_to_blocks(inode->i_sb,
602 le32_to_cpu(la->la_bm_off) +
603 start + bits_wanted);
604 mlog(0, "Checking %llu against %llu\n",
605 (unsigned long long)block_off,
606 (unsigned long long)ac->ac_max_block);
607 if (block_off > ac->ac_max_block)
608 return 0;
609
610 return 1;
611}
612
573/* 613/*
574 * make sure we've got at least bits_wanted contiguous bits in the 614 * make sure we've got at least bits_wanted contiguous bits in the
575 * local alloc. You lose them when you drop i_mutex. 615 * local alloc. You lose them when you drop i_mutex.
@@ -658,6 +698,21 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
658 goto bail; 698 goto bail;
659 } 699 }
660 700
701 if (ac->ac_max_block)
702 mlog(0, "Calling in_range for max block %llu\n",
703 (unsigned long long)ac->ac_max_block);
704
705 if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
706 bits_wanted)) {
707 /*
708 * The window is outside ac->ac_max_block.
709 * This errno tells the caller to keep localalloc enabled
710 * but to get the allocation from the main bitmap.
711 */
712 status = -EFBIG;
713 goto bail;
714 }
715
661 ac->ac_inode = local_alloc_inode; 716 ac->ac_inode = local_alloc_inode;
662 /* We should never use localalloc from another slot */ 717 /* We should never use localalloc from another slot */
663 ac->ac_alloc_slot = osb->slot_num; 718 ac->ac_alloc_slot = osb->slot_num;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 8d3947e94a2..213bdca16fe 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -62,15 +62,18 @@ static int ocfs2_block_group_fill(handle_t *handle,
62 struct ocfs2_chain_list *cl); 62 struct ocfs2_chain_list *cl);
63static int ocfs2_block_group_alloc(struct ocfs2_super *osb, 63static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
64 struct inode *alloc_inode, 64 struct inode *alloc_inode,
65 struct buffer_head *bh); 65 struct buffer_head *bh,
66 u64 max_block);
66 67
67static int ocfs2_cluster_group_search(struct inode *inode, 68static int ocfs2_cluster_group_search(struct inode *inode,
68 struct buffer_head *group_bh, 69 struct buffer_head *group_bh,
69 u32 bits_wanted, u32 min_bits, 70 u32 bits_wanted, u32 min_bits,
71 u64 max_block,
70 u16 *bit_off, u16 *bits_found); 72 u16 *bit_off, u16 *bits_found);
71static int ocfs2_block_group_search(struct inode *inode, 73static int ocfs2_block_group_search(struct inode *inode,
72 struct buffer_head *group_bh, 74 struct buffer_head *group_bh,
73 u32 bits_wanted, u32 min_bits, 75 u32 bits_wanted, u32 min_bits,
76 u64 max_block,
74 u16 *bit_off, u16 *bits_found); 77 u16 *bit_off, u16 *bits_found);
75static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, 78static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
76 struct ocfs2_alloc_context *ac, 79 struct ocfs2_alloc_context *ac,
@@ -110,6 +113,9 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
110 u64 data_blkno, 113 u64 data_blkno,
111 u64 *bg_blkno, 114 u64 *bg_blkno,
112 u16 *bg_bit_off); 115 u16 *bg_bit_off);
116static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
117 u32 bits_wanted, u64 max_block,
118 struct ocfs2_alloc_context **ac);
113 119
114void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) 120void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
115{ 121{
@@ -276,7 +282,8 @@ static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl)
276 */ 282 */
277static int ocfs2_block_group_alloc(struct ocfs2_super *osb, 283static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
278 struct inode *alloc_inode, 284 struct inode *alloc_inode,
279 struct buffer_head *bh) 285 struct buffer_head *bh,
286 u64 max_block)
280{ 287{
281 int status, credits; 288 int status, credits;
282 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; 289 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
@@ -294,9 +301,9 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
294 mlog_entry_void(); 301 mlog_entry_void();
295 302
296 cl = &fe->id2.i_chain; 303 cl = &fe->id2.i_chain;
297 status = ocfs2_reserve_clusters(osb, 304 status = ocfs2_reserve_clusters_with_limit(osb,
298 le16_to_cpu(cl->cl_cpg), 305 le16_to_cpu(cl->cl_cpg),
299 &ac); 306 max_block, &ac);
300 if (status < 0) { 307 if (status < 0) {
301 if (status != -ENOSPC) 308 if (status != -ENOSPC)
302 mlog_errno(status); 309 mlog_errno(status);
@@ -469,7 +476,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
469 goto bail; 476 goto bail;
470 } 477 }
471 478
472 status = ocfs2_block_group_alloc(osb, alloc_inode, bh); 479 status = ocfs2_block_group_alloc(osb, alloc_inode, bh,
480 ac->ac_max_block);
473 if (status < 0) { 481 if (status < 0) {
474 if (status != -ENOSPC) 482 if (status != -ENOSPC)
475 mlog_errno(status); 483 mlog_errno(status);
@@ -591,6 +599,13 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
591 (*ac)->ac_group_search = ocfs2_block_group_search; 599 (*ac)->ac_group_search = ocfs2_block_group_search;
592 600
593 /* 601 /*
602 * stat(2) can't handle i_ino > 32bits, so we tell the
603 * lower levels not to allocate us a block group past that
604 * limit.
605 */
606 (*ac)->ac_max_block = (u32)~0U;
607
608 /*
594 * slot is set when we successfully steal inode from other nodes. 609 * slot is set when we successfully steal inode from other nodes.
595 * It is reset in 3 places: 610 * It is reset in 3 places:
596 * 1. when we flush the truncate log 611 * 1. when we flush the truncate log
@@ -670,9 +685,9 @@ bail:
670/* Callers don't need to care which bitmap (local alloc or main) to 685/* Callers don't need to care which bitmap (local alloc or main) to
671 * use so we figure it out for them, but unfortunately this clutters 686 * use so we figure it out for them, but unfortunately this clutters
672 * things a bit. */ 687 * things a bit. */
673int ocfs2_reserve_clusters(struct ocfs2_super *osb, 688static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
674 u32 bits_wanted, 689 u32 bits_wanted, u64 max_block,
675 struct ocfs2_alloc_context **ac) 690 struct ocfs2_alloc_context **ac)
676{ 691{
677 int status; 692 int status;
678 693
@@ -686,13 +701,18 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
686 } 701 }
687 702
688 (*ac)->ac_bits_wanted = bits_wanted; 703 (*ac)->ac_bits_wanted = bits_wanted;
704 (*ac)->ac_max_block = max_block;
689 705
690 status = -ENOSPC; 706 status = -ENOSPC;
691 if (ocfs2_alloc_should_use_local(osb, bits_wanted)) { 707 if (ocfs2_alloc_should_use_local(osb, bits_wanted)) {
692 status = ocfs2_reserve_local_alloc_bits(osb, 708 status = ocfs2_reserve_local_alloc_bits(osb,
693 bits_wanted, 709 bits_wanted,
694 *ac); 710 *ac);
695 if ((status < 0) && (status != -ENOSPC)) { 711 if (status == -EFBIG) {
712 /* The local alloc window is outside ac_max_block.
713 * use the main bitmap. */
714 status = -ENOSPC;
715 } else if ((status < 0) && (status != -ENOSPC)) {
696 mlog_errno(status); 716 mlog_errno(status);
697 goto bail; 717 goto bail;
698 } 718 }
@@ -718,6 +738,13 @@ bail:
718 return status; 738 return status;
719} 739}
720 740
741int ocfs2_reserve_clusters(struct ocfs2_super *osb,
742 u32 bits_wanted,
743 struct ocfs2_alloc_context **ac)
744{
745 return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac);
746}
747
721/* 748/*
722 * More or less lifted from ext3. I'll leave their description below: 749 * More or less lifted from ext3. I'll leave their description below:
723 * 750 *
@@ -1000,10 +1027,12 @@ static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg
1000static int ocfs2_cluster_group_search(struct inode *inode, 1027static int ocfs2_cluster_group_search(struct inode *inode,
1001 struct buffer_head *group_bh, 1028 struct buffer_head *group_bh,
1002 u32 bits_wanted, u32 min_bits, 1029 u32 bits_wanted, u32 min_bits,
1030 u64 max_block,
1003 u16 *bit_off, u16 *bits_found) 1031 u16 *bit_off, u16 *bits_found)
1004{ 1032{
1005 int search = -ENOSPC; 1033 int search = -ENOSPC;
1006 int ret; 1034 int ret;
1035 u64 blkoff;
1007 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; 1036 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
1008 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1037 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1009 u16 tmp_off, tmp_found; 1038 u16 tmp_off, tmp_found;
@@ -1038,6 +1067,17 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1038 if (ret) 1067 if (ret)
1039 return ret; 1068 return ret;
1040 1069
1070 if (max_block) {
1071 blkoff = ocfs2_clusters_to_blocks(inode->i_sb,
1072 gd_cluster_off +
1073 tmp_off + tmp_found);
1074 mlog(0, "Checking %llu against %llu\n",
1075 (unsigned long long)blkoff,
1076 (unsigned long long)max_block);
1077 if (blkoff > max_block)
1078 return -ENOSPC;
1079 }
1080
1041 /* ocfs2_block_group_find_clear_bits() might 1081 /* ocfs2_block_group_find_clear_bits() might
1042 * return success, but we still want to return 1082 * return success, but we still want to return
1043 * -ENOSPC unless it found the minimum number 1083 * -ENOSPC unless it found the minimum number
@@ -1061,19 +1101,31 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1061static int ocfs2_block_group_search(struct inode *inode, 1101static int ocfs2_block_group_search(struct inode *inode,
1062 struct buffer_head *group_bh, 1102 struct buffer_head *group_bh,
1063 u32 bits_wanted, u32 min_bits, 1103 u32 bits_wanted, u32 min_bits,
1104 u64 max_block,
1064 u16 *bit_off, u16 *bits_found) 1105 u16 *bit_off, u16 *bits_found)
1065{ 1106{
1066 int ret = -ENOSPC; 1107 int ret = -ENOSPC;
1108 u64 blkoff;
1067 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; 1109 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
1068 1110
1069 BUG_ON(min_bits != 1); 1111 BUG_ON(min_bits != 1);
1070 BUG_ON(ocfs2_is_cluster_bitmap(inode)); 1112 BUG_ON(ocfs2_is_cluster_bitmap(inode));
1071 1113
1072 if (bg->bg_free_bits_count) 1114 if (bg->bg_free_bits_count) {
1073 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 1115 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
1074 group_bh, bits_wanted, 1116 group_bh, bits_wanted,
1075 le16_to_cpu(bg->bg_bits), 1117 le16_to_cpu(bg->bg_bits),
1076 bit_off, bits_found); 1118 bit_off, bits_found);
1119 if (!ret && max_block) {
1120 blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off +
1121 *bits_found;
1122 mlog(0, "Checking %llu against %llu\n",
1123 (unsigned long long)blkoff,
1124 (unsigned long long)max_block);
1125 if (blkoff > max_block)
1126 ret = -ENOSPC;
1127 }
1128 }
1077 1129
1078 return ret; 1130 return ret;
1079} 1131}
@@ -1138,7 +1190,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1138 } 1190 }
1139 1191
1140 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, 1192 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
1141 bit_off, &found); 1193 ac->ac_max_block, bit_off, &found);
1142 if (ret < 0) { 1194 if (ret < 0) {
1143 if (ret != -ENOSPC) 1195 if (ret != -ENOSPC)
1144 mlog_errno(ret); 1196 mlog_errno(ret);
@@ -1210,11 +1262,12 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1210 status = -ENOSPC; 1262 status = -ENOSPC;
1211 /* for now, the chain search is a bit simplistic. We just use 1263 /* for now, the chain search is a bit simplistic. We just use
1212 * the 1st group with any empty bits. */ 1264 * the 1st group with any empty bits. */
1213 while ((status = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, 1265 while ((status = ac->ac_group_search(alloc_inode, group_bh,
1214 min_bits, bit_off, &tmp_bits)) == -ENOSPC) { 1266 bits_wanted, min_bits,
1267 ac->ac_max_block, bit_off,
1268 &tmp_bits)) == -ENOSPC) {
1215 if (!bg->bg_next_group) 1269 if (!bg->bg_next_group)
1216 break; 1270 break;
1217
1218 if (prev_group_bh) { 1271 if (prev_group_bh) {
1219 brelse(prev_group_bh); 1272 brelse(prev_group_bh);
1220 prev_group_bh = NULL; 1273 prev_group_bh = NULL;
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index dd0963695ed..4df159d8f45 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -28,10 +28,11 @@
28 28
29typedef int (group_search_t)(struct inode *, 29typedef int (group_search_t)(struct inode *,
30 struct buffer_head *, 30 struct buffer_head *,
31 u32, 31 u32, /* bits_wanted */
32 u32, 32 u32, /* min_bits */
33 u16 *, 33 u64, /* max_block */
34 u16 *); 34 u16 *, /* *bit_off */
35 u16 *); /* *bits_found */
35 36
36struct ocfs2_alloc_context { 37struct ocfs2_alloc_context {
37 struct inode *ac_inode; /* which bitmap are we allocating from? */ 38 struct inode *ac_inode; /* which bitmap are we allocating from? */
@@ -51,6 +52,8 @@ struct ocfs2_alloc_context {
51 group_search_t *ac_group_search; 52 group_search_t *ac_group_search;
52 53
53 u64 ac_last_group; 54 u64 ac_last_group;
55 u64 ac_max_block; /* Highest block number to allocate. 0 is
56 is the same as ~0 - unlimited */
54}; 57};
55 58
56void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac); 59void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac);