aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext3/resize.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-03 15:28:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-03 15:28:30 -0400
commite31fb9e00543e5d3c5b686747d3c862bc09b59f3 (patch)
tree4300b111471a858b542d55d47d587fb8ef52513a /fs/ext3/resize.c
parent824b005c86f91fe02eb2743a4526361f11786f70 (diff)
parent9181f8bf5abf4b9d59b12e878895375b84fe32ba (diff)
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
Pull ext3 removal, quota & udf fixes from Jan Kara: "The biggest change in the pull is the removal of ext3 filesystem driver (~28k lines removed). Ext4 driver is a full featured replacement these days and both RH and SUSE use it for several years without issues. Also there are some workarounds in VM & block layer mainly for ext3 which we could eventually get rid of. Other larger change is addition of proper error handling for dquot_initialize(). The rest is small fixes and cleanups" [ I wasn't convinced about the ext3 removal and worried about things falling through the cracks for legacy users, but ext4 maintainers piped up and were all unanimously in favor of removal, and maintaining all legacy ext3 support inside ext4. - Linus ] * 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs: udf: Don't modify filesystem for read-only mounts quota: remove an unneeded condition ext4: memory leak on error in ext4_symlink() mm/Kconfig: NEED_BOUNCE_POOL: clean-up condition ext4: Improve ext4 Kconfig test block: Remove forced page bouncing under IO fs: Remove ext3 filesystem driver doc: Update doc about journalling layer jfs: Handle error from dquot_initialize() reiserfs: Handle error from dquot_initialize() ocfs2: Handle error from dquot_initialize() ext4: Handle error from dquot_initialize() ext2: Handle error from dquot_initalize() quota: Propagate error from ->acquire_dquot()
Diffstat (limited to 'fs/ext3/resize.c')
-rw-r--r--fs/ext3/resize.c1117
1 files changed, 0 insertions, 1117 deletions
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
deleted file mode 100644
index 27105655502c..000000000000
--- a/fs/ext3/resize.c
+++ /dev/null
@@ -1,1117 +0,0 @@
1/*
2 * linux/fs/ext3/resize.c
3 *
4 * Support for resizing an ext3 filesystem while it is mounted.
5 *
6 * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com>
7 *
8 * This could probably be made into a module, because it is not often in use.
9 */
10
11
12#define EXT3FS_DEBUG
13
14#include "ext3.h"
15
16
17#define outside(b, first, last) ((b) < (first) || (b) >= (last))
18#define inside(b, first, last) ((b) >= (first) && (b) < (last))
19
20static int verify_group_input(struct super_block *sb,
21 struct ext3_new_group_data *input)
22{
23 struct ext3_sb_info *sbi = EXT3_SB(sb);
24 struct ext3_super_block *es = sbi->s_es;
25 ext3_fsblk_t start = le32_to_cpu(es->s_blocks_count);
26 ext3_fsblk_t end = start + input->blocks_count;
27 unsigned group = input->group;
28 ext3_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
29 unsigned overhead = ext3_bg_has_super(sb, group) ?
30 (1 + ext3_bg_num_gdb(sb, group) +
31 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
32 ext3_fsblk_t metaend = start + overhead;
33 struct buffer_head *bh = NULL;
34 ext3_grpblk_t free_blocks_count;
35 int err = -EINVAL;
36
37 input->free_blocks_count = free_blocks_count =
38 input->blocks_count - 2 - overhead - sbi->s_itb_per_group;
39
40 if (test_opt(sb, DEBUG))
41 printk(KERN_DEBUG "EXT3-fs: adding %s group %u: %u blocks "
42 "(%d free, %u reserved)\n",
43 ext3_bg_has_super(sb, input->group) ? "normal" :
44 "no-super", input->group, input->blocks_count,
45 free_blocks_count, input->reserved_blocks);
46
47 if (group != sbi->s_groups_count)
48 ext3_warning(sb, __func__,
49 "Cannot add at group %u (only %lu groups)",
50 input->group, sbi->s_groups_count);
51 else if ((start - le32_to_cpu(es->s_first_data_block)) %
52 EXT3_BLOCKS_PER_GROUP(sb))
53 ext3_warning(sb, __func__, "Last group not full");
54 else if (input->reserved_blocks > input->blocks_count / 5)
55 ext3_warning(sb, __func__, "Reserved blocks too high (%u)",
56 input->reserved_blocks);
57 else if (free_blocks_count < 0)
58 ext3_warning(sb, __func__, "Bad blocks count %u",
59 input->blocks_count);
60 else if (!(bh = sb_bread(sb, end - 1)))
61 ext3_warning(sb, __func__,
62 "Cannot read last block ("E3FSBLK")",
63 end - 1);
64 else if (outside(input->block_bitmap, start, end))
65 ext3_warning(sb, __func__,
66 "Block bitmap not in group (block %u)",
67 input->block_bitmap);
68 else if (outside(input->inode_bitmap, start, end))
69 ext3_warning(sb, __func__,
70 "Inode bitmap not in group (block %u)",
71 input->inode_bitmap);
72 else if (outside(input->inode_table, start, end) ||
73 outside(itend - 1, start, end))
74 ext3_warning(sb, __func__,
75 "Inode table not in group (blocks %u-"E3FSBLK")",
76 input->inode_table, itend - 1);
77 else if (input->inode_bitmap == input->block_bitmap)
78 ext3_warning(sb, __func__,
79 "Block bitmap same as inode bitmap (%u)",
80 input->block_bitmap);
81 else if (inside(input->block_bitmap, input->inode_table, itend))
82 ext3_warning(sb, __func__,
83 "Block bitmap (%u) in inode table (%u-"E3FSBLK")",
84 input->block_bitmap, input->inode_table, itend-1);
85 else if (inside(input->inode_bitmap, input->inode_table, itend))
86 ext3_warning(sb, __func__,
87 "Inode bitmap (%u) in inode table (%u-"E3FSBLK")",
88 input->inode_bitmap, input->inode_table, itend-1);
89 else if (inside(input->block_bitmap, start, metaend))
90 ext3_warning(sb, __func__,
91 "Block bitmap (%u) in GDT table"
92 " ("E3FSBLK"-"E3FSBLK")",
93 input->block_bitmap, start, metaend - 1);
94 else if (inside(input->inode_bitmap, start, metaend))
95 ext3_warning(sb, __func__,
96 "Inode bitmap (%u) in GDT table"
97 " ("E3FSBLK"-"E3FSBLK")",
98 input->inode_bitmap, start, metaend - 1);
99 else if (inside(input->inode_table, start, metaend) ||
100 inside(itend - 1, start, metaend))
101 ext3_warning(sb, __func__,
102 "Inode table (%u-"E3FSBLK") overlaps"
103 "GDT table ("E3FSBLK"-"E3FSBLK")",
104 input->inode_table, itend - 1, start, metaend - 1);
105 else
106 err = 0;
107 brelse(bh);
108
109 return err;
110}
111
112static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
113 ext3_fsblk_t blk)
114{
115 struct buffer_head *bh;
116 int err;
117
118 bh = sb_getblk(sb, blk);
119 if (unlikely(!bh))
120 return ERR_PTR(-ENOMEM);
121 if ((err = ext3_journal_get_write_access(handle, bh))) {
122 brelse(bh);
123 bh = ERR_PTR(err);
124 } else {
125 lock_buffer(bh);
126 memset(bh->b_data, 0, sb->s_blocksize);
127 set_buffer_uptodate(bh);
128 unlock_buffer(bh);
129 }
130
131 return bh;
132}
133
134/*
135 * To avoid calling the atomic setbit hundreds or thousands of times, we only
136 * need to use it within a single byte (to ensure we get endianness right).
137 * We can use memset for the rest of the bitmap as there are no other users.
138 */
139static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
140{
141 int i;
142
143 if (start_bit >= end_bit)
144 return;
145
146 ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
147 for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
148 ext3_set_bit(i, bitmap);
149 if (i < end_bit)
150 memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
151}
152
153/*
154 * If we have fewer than thresh credits, extend by EXT3_MAX_TRANS_DATA.
155 * If that fails, restart the transaction & regain write access for the
156 * buffer head which is used for block_bitmap modifications.
157 */
158static int extend_or_restart_transaction(handle_t *handle, int thresh,
159 struct buffer_head *bh)
160{
161 int err;
162
163 if (handle->h_buffer_credits >= thresh)
164 return 0;
165
166 err = ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA);
167 if (err < 0)
168 return err;
169 if (err) {
170 err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA);
171 if (err)
172 return err;
173 err = ext3_journal_get_write_access(handle, bh);
174 if (err)
175 return err;
176 }
177
178 return 0;
179}
180
181/*
182 * Set up the block and inode bitmaps, and the inode table for the new group.
183 * This doesn't need to be part of the main transaction, since we are only
184 * changing blocks outside the actual filesystem. We still do journaling to
185 * ensure the recovery is correct in case of a failure just after resize.
186 * If any part of this fails, we simply abort the resize.
187 */
188static int setup_new_group_blocks(struct super_block *sb,
189 struct ext3_new_group_data *input)
190{
191 struct ext3_sb_info *sbi = EXT3_SB(sb);
192 ext3_fsblk_t start = ext3_group_first_block_no(sb, input->group);
193 int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
194 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0;
195 unsigned long gdblocks = ext3_bg_num_gdb(sb, input->group);
196 struct buffer_head *bh;
197 handle_t *handle;
198 ext3_fsblk_t block;
199 ext3_grpblk_t bit;
200 int i;
201 int err = 0, err2;
202
203 /* This transaction may be extended/restarted along the way */
204 handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA);
205
206 if (IS_ERR(handle))
207 return PTR_ERR(handle);
208
209 mutex_lock(&sbi->s_resize_lock);
210 if (input->group != sbi->s_groups_count) {
211 err = -EBUSY;
212 goto exit_journal;
213 }
214
215 if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) {
216 err = PTR_ERR(bh);
217 goto exit_journal;
218 }
219
220 if (ext3_bg_has_super(sb, input->group)) {
221 ext3_debug("mark backup superblock %#04lx (+0)\n", start);
222 ext3_set_bit(0, bh->b_data);
223 }
224
225 /* Copy all of the GDT blocks into the backup in this group */
226 for (i = 0, bit = 1, block = start + 1;
227 i < gdblocks; i++, block++, bit++) {
228 struct buffer_head *gdb;
229
230 ext3_debug("update backup group %#04lx (+%d)\n", block, bit);
231
232 err = extend_or_restart_transaction(handle, 1, bh);
233 if (err)
234 goto exit_bh;
235
236 gdb = sb_getblk(sb, block);
237 if (unlikely(!gdb)) {
238 err = -ENOMEM;
239 goto exit_bh;
240 }
241 if ((err = ext3_journal_get_write_access(handle, gdb))) {
242 brelse(gdb);
243 goto exit_bh;
244 }
245 lock_buffer(gdb);
246 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
247 set_buffer_uptodate(gdb);
248 unlock_buffer(gdb);
249 err = ext3_journal_dirty_metadata(handle, gdb);
250 if (err) {
251 brelse(gdb);
252 goto exit_bh;
253 }
254 ext3_set_bit(bit, bh->b_data);
255 brelse(gdb);
256 }
257
258 /* Zero out all of the reserved backup group descriptor table blocks */
259 for (i = 0, bit = gdblocks + 1, block = start + bit;
260 i < reserved_gdb; i++, block++, bit++) {
261 struct buffer_head *gdb;
262
263 ext3_debug("clear reserved block %#04lx (+%d)\n", block, bit);
264
265 err = extend_or_restart_transaction(handle, 1, bh);
266 if (err)
267 goto exit_bh;
268
269 if (IS_ERR(gdb = bclean(handle, sb, block))) {
270 err = PTR_ERR(gdb);
271 goto exit_bh;
272 }
273 err = ext3_journal_dirty_metadata(handle, gdb);
274 if (err) {
275 brelse(gdb);
276 goto exit_bh;
277 }
278 ext3_set_bit(bit, bh->b_data);
279 brelse(gdb);
280 }
281 ext3_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap,
282 input->block_bitmap - start);
283 ext3_set_bit(input->block_bitmap - start, bh->b_data);
284 ext3_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap,
285 input->inode_bitmap - start);
286 ext3_set_bit(input->inode_bitmap - start, bh->b_data);
287
288 /* Zero out all of the inode table blocks */
289 for (i = 0, block = input->inode_table, bit = block - start;
290 i < sbi->s_itb_per_group; i++, bit++, block++) {
291 struct buffer_head *it;
292
293 ext3_debug("clear inode block %#04lx (+%d)\n", block, bit);
294
295 err = extend_or_restart_transaction(handle, 1, bh);
296 if (err)
297 goto exit_bh;
298
299 if (IS_ERR(it = bclean(handle, sb, block))) {
300 err = PTR_ERR(it);
301 goto exit_bh;
302 }
303 err = ext3_journal_dirty_metadata(handle, it);
304 if (err) {
305 brelse(it);
306 goto exit_bh;
307 }
308 brelse(it);
309 ext3_set_bit(bit, bh->b_data);
310 }
311
312 err = extend_or_restart_transaction(handle, 2, bh);
313 if (err)
314 goto exit_bh;
315
316 mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb),
317 bh->b_data);
318 err = ext3_journal_dirty_metadata(handle, bh);
319 if (err)
320 goto exit_bh;
321 brelse(bh);
322
323 /* Mark unused entries in inode bitmap used */
324 ext3_debug("clear inode bitmap %#04x (+%ld)\n",
325 input->inode_bitmap, input->inode_bitmap - start);
326 if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) {
327 err = PTR_ERR(bh);
328 goto exit_journal;
329 }
330
331 mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb),
332 bh->b_data);
333 err = ext3_journal_dirty_metadata(handle, bh);
334exit_bh:
335 brelse(bh);
336
337exit_journal:
338 mutex_unlock(&sbi->s_resize_lock);
339 if ((err2 = ext3_journal_stop(handle)) && !err)
340 err = err2;
341
342 return err;
343}
344
345/*
346 * Iterate through the groups which hold BACKUP superblock/GDT copies in an
347 * ext3 filesystem. The counters should be initialized to 1, 5, and 7 before
348 * calling this for the first time. In a sparse filesystem it will be the
349 * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
350 * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
351 */
352static unsigned ext3_list_backups(struct super_block *sb, unsigned *three,
353 unsigned *five, unsigned *seven)
354{
355 unsigned *min = three;
356 int mult = 3;
357 unsigned ret;
358
359 if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
360 EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
361 ret = *min;
362 *min += 1;
363 return ret;
364 }
365
366 if (*five < *min) {
367 min = five;
368 mult = 5;
369 }
370 if (*seven < *min) {
371 min = seven;
372 mult = 7;
373 }
374
375 ret = *min;
376 *min *= mult;
377
378 return ret;
379}
380
381/*
382 * Check that all of the backup GDT blocks are held in the primary GDT block.
383 * It is assumed that they are stored in group order. Returns the number of
384 * groups in current filesystem that have BACKUPS, or -ve error code.
385 */
386static int verify_reserved_gdb(struct super_block *sb,
387 struct buffer_head *primary)
388{
389 const ext3_fsblk_t blk = primary->b_blocknr;
390 const unsigned long end = EXT3_SB(sb)->s_groups_count;
391 unsigned three = 1;
392 unsigned five = 5;
393 unsigned seven = 7;
394 unsigned grp;
395 __le32 *p = (__le32 *)primary->b_data;
396 int gdbackups = 0;
397
398 while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
399 if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){
400 ext3_warning(sb, __func__,
401 "reserved GDT "E3FSBLK
402 " missing grp %d ("E3FSBLK")",
403 blk, grp,
404 grp * EXT3_BLOCKS_PER_GROUP(sb) + blk);
405 return -EINVAL;
406 }
407 if (++gdbackups > EXT3_ADDR_PER_BLOCK(sb))
408 return -EFBIG;
409 }
410
411 return gdbackups;
412}
413
414/*
415 * Called when we need to bring a reserved group descriptor table block into
416 * use from the resize inode. The primary copy of the new GDT block currently
417 * is an indirect block (under the double indirect block in the resize inode).
418 * The new backup GDT blocks will be stored as leaf blocks in this indirect
419 * block, in group order. Even though we know all the block numbers we need,
420 * we check to ensure that the resize inode has actually reserved these blocks.
421 *
422 * Don't need to update the block bitmaps because the blocks are still in use.
423 *
424 * We get all of the error cases out of the way, so that we are sure to not
425 * fail once we start modifying the data on disk, because JBD has no rollback.
426 */
427static int add_new_gdb(handle_t *handle, struct inode *inode,
428 struct ext3_new_group_data *input,
429 struct buffer_head **primary)
430{
431 struct super_block *sb = inode->i_sb;
432 struct ext3_super_block *es = EXT3_SB(sb)->s_es;
433 unsigned long gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
434 ext3_fsblk_t gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
435 struct buffer_head **o_group_desc, **n_group_desc;
436 struct buffer_head *dind;
437 int gdbackups;
438 struct ext3_iloc iloc;
439 __le32 *data;
440 int err;
441
442 if (test_opt(sb, DEBUG))
443 printk(KERN_DEBUG
444 "EXT3-fs: ext3_add_new_gdb: adding group block %lu\n",
445 gdb_num);
446
447 /*
448 * If we are not using the primary superblock/GDT copy don't resize,
449 * because the user tools have no way of handling this. Probably a
450 * bad time to do it anyways.
451 */
452 if (EXT3_SB(sb)->s_sbh->b_blocknr !=
453 le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) {
454 ext3_warning(sb, __func__,
455 "won't resize using backup superblock at %llu",
456 (unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr);
457 return -EPERM;
458 }
459
460 *primary = sb_bread(sb, gdblock);
461 if (!*primary)
462 return -EIO;
463
464 if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) {
465 err = gdbackups;
466 goto exit_bh;
467 }
468
469 data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
470 dind = sb_bread(sb, le32_to_cpu(*data));
471 if (!dind) {
472 err = -EIO;
473 goto exit_bh;
474 }
475
476 data = (__le32 *)dind->b_data;
477 if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
478 ext3_warning(sb, __func__,
479 "new group %u GDT block "E3FSBLK" not reserved",
480 input->group, gdblock);
481 err = -EINVAL;
482 goto exit_dind;
483 }
484
485 if ((err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh)))
486 goto exit_dind;
487
488 if ((err = ext3_journal_get_write_access(handle, *primary)))
489 goto exit_sbh;
490
491 if ((err = ext3_journal_get_write_access(handle, dind)))
492 goto exit_primary;
493
494 /* ext3_reserve_inode_write() gets a reference on the iloc */
495 if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
496 goto exit_dindj;
497
498 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
499 GFP_NOFS);
500 if (!n_group_desc) {
501 err = -ENOMEM;
502 ext3_warning (sb, __func__,
503 "not enough memory for %lu groups", gdb_num + 1);
504 goto exit_inode;
505 }
506
507 /*
508 * Finally, we have all of the possible failures behind us...
509 *
510 * Remove new GDT block from inode double-indirect block and clear out
511 * the new GDT block for use (which also "frees" the backup GDT blocks
512 * from the reserved inode). We don't need to change the bitmaps for
513 * these blocks, because they are marked as in-use from being in the
514 * reserved inode, and will become GDT blocks (primary and backup).
515 */
516 data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0;
517 err = ext3_journal_dirty_metadata(handle, dind);
518 if (err)
519 goto exit_group_desc;
520 brelse(dind);
521 dind = NULL;
522 inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
523 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
524 if (err)
525 goto exit_group_desc;
526 memset((*primary)->b_data, 0, sb->s_blocksize);
527 err = ext3_journal_dirty_metadata(handle, *primary);
528 if (err)
529 goto exit_group_desc;
530
531 o_group_desc = EXT3_SB(sb)->s_group_desc;
532 memcpy(n_group_desc, o_group_desc,
533 EXT3_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
534 n_group_desc[gdb_num] = *primary;
535 EXT3_SB(sb)->s_group_desc = n_group_desc;
536 EXT3_SB(sb)->s_gdb_count++;
537 kfree(o_group_desc);
538
539 le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
540 err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
541 if (err)
542 goto exit_inode;
543
544 return 0;
545
546exit_group_desc:
547 kfree(n_group_desc);
548exit_inode:
549 //ext3_journal_release_buffer(handle, iloc.bh);
550 brelse(iloc.bh);
551exit_dindj:
552 //ext3_journal_release_buffer(handle, dind);
553exit_primary:
554 //ext3_journal_release_buffer(handle, *primary);
555exit_sbh:
556 //ext3_journal_release_buffer(handle, *primary);
557exit_dind:
558 brelse(dind);
559exit_bh:
560 brelse(*primary);
561
562 ext3_debug("leaving with error %d\n", err);
563 return err;
564}
565
566/*
567 * Called when we are adding a new group which has a backup copy of each of
568 * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks.
569 * We need to add these reserved backup GDT blocks to the resize inode, so
570 * that they are kept for future resizing and not allocated to files.
571 *
572 * Each reserved backup GDT block will go into a different indirect block.
573 * The indirect blocks are actually the primary reserved GDT blocks,
574 * so we know in advance what their block numbers are. We only get the
575 * double-indirect block to verify it is pointing to the primary reserved
576 * GDT blocks so we don't overwrite a data block by accident. The reserved
577 * backup GDT blocks are stored in their reserved primary GDT block.
578 */
579static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
580 struct ext3_new_group_data *input)
581{
582 struct super_block *sb = inode->i_sb;
583 int reserved_gdb =le16_to_cpu(EXT3_SB(sb)->s_es->s_reserved_gdt_blocks);
584 struct buffer_head **primary;
585 struct buffer_head *dind;
586 struct ext3_iloc iloc;
587 ext3_fsblk_t blk;
588 __le32 *data, *end;
589 int gdbackups = 0;
590 int res, i;
591 int err;
592
593 primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS);
594 if (!primary)
595 return -ENOMEM;
596
597 data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
598 dind = sb_bread(sb, le32_to_cpu(*data));
599 if (!dind) {
600 err = -EIO;
601 goto exit_free;
602 }
603
604 blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count;
605 data = (__le32 *)dind->b_data + (EXT3_SB(sb)->s_gdb_count %
606 EXT3_ADDR_PER_BLOCK(sb));
607 end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb);
608
609 /* Get each reserved primary GDT block and verify it holds backups */
610 for (res = 0; res < reserved_gdb; res++, blk++) {
611 if (le32_to_cpu(*data) != blk) {
612 ext3_warning(sb, __func__,
613 "reserved block "E3FSBLK
614 " not at offset %ld",
615 blk,
616 (long)(data - (__le32 *)dind->b_data));
617 err = -EINVAL;
618 goto exit_bh;
619 }
620 primary[res] = sb_bread(sb, blk);
621 if (!primary[res]) {
622 err = -EIO;
623 goto exit_bh;
624 }
625 if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) {
626 brelse(primary[res]);
627 err = gdbackups;
628 goto exit_bh;
629 }
630 if (++data >= end)
631 data = (__le32 *)dind->b_data;
632 }
633
634 for (i = 0; i < reserved_gdb; i++) {
635 if ((err = ext3_journal_get_write_access(handle, primary[i]))) {
636 /*
637 int j;
638 for (j = 0; j < i; j++)
639 ext3_journal_release_buffer(handle, primary[j]);
640 */
641 goto exit_bh;
642 }
643 }
644
645 if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
646 goto exit_bh;
647
648 /*
649 * Finally we can add each of the reserved backup GDT blocks from
650 * the new group to its reserved primary GDT block.
651 */
652 blk = input->group * EXT3_BLOCKS_PER_GROUP(sb);
653 for (i = 0; i < reserved_gdb; i++) {
654 int err2;
655 data = (__le32 *)primary[i]->b_data;
656 /* printk("reserving backup %lu[%u] = %lu\n",
657 primary[i]->b_blocknr, gdbackups,
658 blk + primary[i]->b_blocknr); */
659 data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
660 err2 = ext3_journal_dirty_metadata(handle, primary[i]);
661 if (!err)
662 err = err2;
663 }
664 inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9;
665 ext3_mark_iloc_dirty(handle, inode, &iloc);
666
667exit_bh:
668 while (--res >= 0)
669 brelse(primary[res]);
670 brelse(dind);
671
672exit_free:
673 kfree(primary);
674
675 return err;
676}
677
678/*
679 * Update the backup copies of the ext3 metadata. These don't need to be part
680 * of the main resize transaction, because e2fsck will re-write them if there
681 * is a problem (basically only OOM will cause a problem). However, we
682 * _should_ update the backups if possible, in case the primary gets trashed
683 * for some reason and we need to run e2fsck from a backup superblock. The
684 * important part is that the new block and inode counts are in the backup
685 * superblocks, and the location of the new group metadata in the GDT backups.
686 *
687 * We do not need take the s_resize_lock for this, because these
688 * blocks are not otherwise touched by the filesystem code when it is
689 * mounted. We don't need to worry about last changing from
690 * sbi->s_groups_count, because the worst that can happen is that we
691 * do not copy the full number of backups at this time. The resize
692 * which changed s_groups_count will backup again.
693 */
694static void update_backups(struct super_block *sb,
695 int blk_off, char *data, int size)
696{
697 struct ext3_sb_info *sbi = EXT3_SB(sb);
698 const unsigned long last = sbi->s_groups_count;
699 const int bpg = EXT3_BLOCKS_PER_GROUP(sb);
700 unsigned three = 1;
701 unsigned five = 5;
702 unsigned seven = 7;
703 unsigned group;
704 int rest = sb->s_blocksize - size;
705 handle_t *handle;
706 int err = 0, err2;
707
708 handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA);
709 if (IS_ERR(handle)) {
710 group = 1;
711 err = PTR_ERR(handle);
712 goto exit_err;
713 }
714
715 while ((group = ext3_list_backups(sb, &three, &five, &seven)) < last) {
716 struct buffer_head *bh;
717
718 /* Out of journal space, and can't get more - abort - so sad */
719 if (handle->h_buffer_credits == 0 &&
720 ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA) &&
721 (err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA)))
722 break;
723
724 bh = sb_getblk(sb, group * bpg + blk_off);
725 if (unlikely(!bh)) {
726 err = -ENOMEM;
727 break;
728 }
729 ext3_debug("update metadata backup %#04lx\n",
730 (unsigned long)bh->b_blocknr);
731 if ((err = ext3_journal_get_write_access(handle, bh))) {
732 brelse(bh);
733 break;
734 }
735 lock_buffer(bh);
736 memcpy(bh->b_data, data, size);
737 if (rest)
738 memset(bh->b_data + size, 0, rest);
739 set_buffer_uptodate(bh);
740 unlock_buffer(bh);
741 err = ext3_journal_dirty_metadata(handle, bh);
742 brelse(bh);
743 if (err)
744 break;
745 }
746 if ((err2 = ext3_journal_stop(handle)) && !err)
747 err = err2;
748
749 /*
750 * Ugh! Need to have e2fsck write the backup copies. It is too
751 * late to revert the resize, we shouldn't fail just because of
752 * the backup copies (they are only needed in case of corruption).
753 *
754 * However, if we got here we have a journal problem too, so we
755 * can't really start a transaction to mark the superblock.
756 * Chicken out and just set the flag on the hope it will be written
757 * to disk, and if not - we will simply wait until next fsck.
758 */
759exit_err:
760 if (err) {
761 ext3_warning(sb, __func__,
762 "can't update backup for group %d (err %d), "
763 "forcing fsck on next reboot", group, err);
764 sbi->s_mount_state &= ~EXT3_VALID_FS;
765 sbi->s_es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
766 mark_buffer_dirty(sbi->s_sbh);
767 }
768}
769
770/* Add group descriptor data to an existing or new group descriptor block.
771 * Ensure we handle all possible error conditions _before_ we start modifying
772 * the filesystem, because we cannot abort the transaction and not have it
773 * write the data to disk.
774 *
775 * If we are on a GDT block boundary, we need to get the reserved GDT block.
776 * Otherwise, we may need to add backup GDT blocks for a sparse group.
777 *
778 * We only need to hold the superblock lock while we are actually adding
779 * in the new group's counts to the superblock. Prior to that we have
780 * not really "added" the group at all. We re-check that we are still
781 * adding in the last group in case things have changed since verifying.
782 */
783int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
784{
785 struct ext3_sb_info *sbi = EXT3_SB(sb);
786 struct ext3_super_block *es = sbi->s_es;
787 int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
788 le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
789 struct buffer_head *primary = NULL;
790 struct ext3_group_desc *gdp;
791 struct inode *inode = NULL;
792 handle_t *handle;
793 int gdb_off, gdb_num;
794 int err, err2;
795
796 gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
797 gdb_off = input->group % EXT3_DESC_PER_BLOCK(sb);
798
799 if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb,
800 EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
801 ext3_warning(sb, __func__,
802 "Can't resize non-sparse filesystem further");
803 return -EPERM;
804 }
805
806 if (le32_to_cpu(es->s_blocks_count) + input->blocks_count <
807 le32_to_cpu(es->s_blocks_count)) {
808 ext3_warning(sb, __func__, "blocks_count overflow\n");
809 return -EINVAL;
810 }
811
812 if (le32_to_cpu(es->s_inodes_count) + EXT3_INODES_PER_GROUP(sb) <
813 le32_to_cpu(es->s_inodes_count)) {
814 ext3_warning(sb, __func__, "inodes_count overflow\n");
815 return -EINVAL;
816 }
817
818 if (reserved_gdb || gdb_off == 0) {
819 if (!EXT3_HAS_COMPAT_FEATURE(sb,
820 EXT3_FEATURE_COMPAT_RESIZE_INODE)
821 || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
822 ext3_warning(sb, __func__,
823 "No reserved GDT blocks, can't resize");
824 return -EPERM;
825 }
826 inode = ext3_iget(sb, EXT3_RESIZE_INO);
827 if (IS_ERR(inode)) {
828 ext3_warning(sb, __func__,
829 "Error opening resize inode");
830 return PTR_ERR(inode);
831 }
832 }
833
834 if ((err = verify_group_input(sb, input)))
835 goto exit_put;
836
837 if ((err = setup_new_group_blocks(sb, input)))
838 goto exit_put;
839
840 /*
841 * We will always be modifying at least the superblock and a GDT
842 * block. If we are adding a group past the last current GDT block,
843 * we will also modify the inode and the dindirect block. If we
844 * are adding a group with superblock/GDT backups we will also
845 * modify each of the reserved GDT dindirect blocks.
846 */
847 handle = ext3_journal_start_sb(sb,
848 ext3_bg_has_super(sb, input->group) ?
849 3 + reserved_gdb : 4);
850 if (IS_ERR(handle)) {
851 err = PTR_ERR(handle);
852 goto exit_put;
853 }
854
855 mutex_lock(&sbi->s_resize_lock);
856 if (input->group != sbi->s_groups_count) {
857 ext3_warning(sb, __func__,
858 "multiple resizers run on filesystem!");
859 err = -EBUSY;
860 goto exit_journal;
861 }
862
863 if ((err = ext3_journal_get_write_access(handle, sbi->s_sbh)))
864 goto exit_journal;
865
866 /*
867 * We will only either add reserved group blocks to a backup group
868 * or remove reserved blocks for the first group in a new group block.
869 * Doing both would be mean more complex code, and sane people don't
870 * use non-sparse filesystems anymore. This is already checked above.
871 */
872 if (gdb_off) {
873 primary = sbi->s_group_desc[gdb_num];
874 if ((err = ext3_journal_get_write_access(handle, primary)))
875 goto exit_journal;
876
877 if (reserved_gdb && ext3_bg_num_gdb(sb, input->group) &&
878 (err = reserve_backup_gdb(handle, inode, input)))
879 goto exit_journal;
880 } else if ((err = add_new_gdb(handle, inode, input, &primary)))
881 goto exit_journal;
882
883 /*
884 * OK, now we've set up the new group. Time to make it active.
885 *
886 * We do not lock all allocations via s_resize_lock
887 * so we have to be safe wrt. concurrent accesses the group
888 * data. So we need to be careful to set all of the relevant
889 * group descriptor data etc. *before* we enable the group.
890 *
891 * The key field here is sbi->s_groups_count: as long as
892 * that retains its old value, nobody is going to access the new
893 * group.
894 *
895 * So first we update all the descriptor metadata for the new
896 * group; then we update the total disk blocks count; then we
897 * update the groups count to enable the group; then finally we
898 * update the free space counts so that the system can start
899 * using the new disk blocks.
900 */
901
902 /* Update group descriptor block for new group */
903 gdp = (struct ext3_group_desc *)primary->b_data + gdb_off;
904
905 gdp->bg_block_bitmap = cpu_to_le32(input->block_bitmap);
906 gdp->bg_inode_bitmap = cpu_to_le32(input->inode_bitmap);
907 gdp->bg_inode_table = cpu_to_le32(input->inode_table);
908 gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
909 gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb));
910
911 /*
912 * Make the new blocks and inodes valid next. We do this before
913 * increasing the group count so that once the group is enabled,
914 * all of its blocks and inodes are already valid.
915 *
916 * We always allocate group-by-group, then block-by-block or
917 * inode-by-inode within a group, so enabling these
918 * blocks/inodes before the group is live won't actually let us
919 * allocate the new space yet.
920 */
921 le32_add_cpu(&es->s_blocks_count, input->blocks_count);
922 le32_add_cpu(&es->s_inodes_count, EXT3_INODES_PER_GROUP(sb));
923
924 /*
925 * We need to protect s_groups_count against other CPUs seeing
926 * inconsistent state in the superblock.
927 *
928 * The precise rules we use are:
929 *
930 * * Writers of s_groups_count *must* hold s_resize_lock
931 * AND
932 * * Writers must perform a smp_wmb() after updating all dependent
933 * data and before modifying the groups count
934 *
935 * * Readers must hold s_resize_lock over the access
936 * OR
937 * * Readers must perform an smp_rmb() after reading the groups count
938 * and before reading any dependent data.
939 *
940 * NB. These rules can be relaxed when checking the group count
941 * while freeing data, as we can only allocate from a block
942 * group after serialising against the group count, and we can
943 * only then free after serialising in turn against that
944 * allocation.
945 */
946 smp_wmb();
947
948 /* Update the global fs size fields */
949 sbi->s_groups_count++;
950
951 err = ext3_journal_dirty_metadata(handle, primary);
952 if (err)
953 goto exit_journal;
954
955 /* Update the reserved block counts only once the new group is
956 * active. */
957 le32_add_cpu(&es->s_r_blocks_count, input->reserved_blocks);
958
959 /* Update the free space counts */
960 percpu_counter_add(&sbi->s_freeblocks_counter,
961 input->free_blocks_count);
962 percpu_counter_add(&sbi->s_freeinodes_counter,
963 EXT3_INODES_PER_GROUP(sb));
964
965 err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
966
967exit_journal:
968 mutex_unlock(&sbi->s_resize_lock);
969 if ((err2 = ext3_journal_stop(handle)) && !err)
970 err = err2;
971 if (!err) {
972 update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
973 sizeof(struct ext3_super_block));
974 update_backups(sb, primary->b_blocknr, primary->b_data,
975 primary->b_size);
976 }
977exit_put:
978 iput(inode);
979 return err;
980} /* ext3_group_add */
981
982/* Extend the filesystem to the new number of blocks specified. This entry
983 * point is only used to extend the current filesystem to the end of the last
984 * existing group. It can be accessed via ioctl, or by "remount,resize=<size>"
985 * for emergencies (because it has no dependencies on reserved blocks).
986 *
987 * If we _really_ wanted, we could use default values to call ext3_group_add()
988 * allow the "remount" trick to work for arbitrary resizing, assuming enough
989 * GDT blocks are reserved to grow to the desired size.
990 */
991int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
992 ext3_fsblk_t n_blocks_count)
993{
994 ext3_fsblk_t o_blocks_count;
995 ext3_grpblk_t last;
996 ext3_grpblk_t add;
997 struct buffer_head * bh;
998 handle_t *handle;
999 int err;
1000 unsigned long freed_blocks;
1001
1002 /* We don't need to worry about locking wrt other resizers just
1003 * yet: we're going to revalidate es->s_blocks_count after
1004 * taking the s_resize_lock below. */
1005 o_blocks_count = le32_to_cpu(es->s_blocks_count);
1006
1007 if (test_opt(sb, DEBUG))
1008 printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK
1009 " up to "E3FSBLK" blocks\n",
1010 o_blocks_count, n_blocks_count);
1011
1012 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
1013 return 0;
1014
1015 if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
1016 printk(KERN_ERR "EXT3-fs: filesystem on %s:"
1017 " too large to resize to "E3FSBLK" blocks safely\n",
1018 sb->s_id, n_blocks_count);
1019 if (sizeof(sector_t) < 8)
1020 ext3_warning(sb, __func__,
1021 "CONFIG_LBDAF not enabled\n");
1022 return -EINVAL;
1023 }
1024
1025 if (n_blocks_count < o_blocks_count) {
1026 ext3_warning(sb, __func__,
1027 "can't shrink FS - resize aborted");
1028 return -EBUSY;
1029 }
1030
1031 /* Handle the remaining blocks in the last group only. */
1032 last = (o_blocks_count - le32_to_cpu(es->s_first_data_block)) %
1033 EXT3_BLOCKS_PER_GROUP(sb);
1034
1035 if (last == 0) {
1036 ext3_warning(sb, __func__,
1037 "need to use ext2online to resize further");
1038 return -EPERM;
1039 }
1040
1041 add = EXT3_BLOCKS_PER_GROUP(sb) - last;
1042
1043 if (o_blocks_count + add < o_blocks_count) {
1044 ext3_warning(sb, __func__, "blocks_count overflow");
1045 return -EINVAL;
1046 }
1047
1048 if (o_blocks_count + add > n_blocks_count)
1049 add = n_blocks_count - o_blocks_count;
1050
1051 if (o_blocks_count + add < n_blocks_count)
1052 ext3_warning(sb, __func__,
1053 "will only finish group ("E3FSBLK
1054 " blocks, %u new)",
1055 o_blocks_count + add, add);
1056
1057 /* See if the device is actually as big as what was requested */
1058 bh = sb_bread(sb, o_blocks_count + add -1);
1059 if (!bh) {
1060 ext3_warning(sb, __func__,
1061 "can't read last block, resize aborted");
1062 return -ENOSPC;
1063 }
1064 brelse(bh);
1065
1066 /* We will update the superblock, one block bitmap, and
1067 * one group descriptor via ext3_free_blocks().
1068 */
1069 handle = ext3_journal_start_sb(sb, 3);
1070 if (IS_ERR(handle)) {
1071 err = PTR_ERR(handle);
1072 ext3_warning(sb, __func__, "error %d on journal start",err);
1073 goto exit_put;
1074 }
1075
1076 mutex_lock(&EXT3_SB(sb)->s_resize_lock);
1077 if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
1078 ext3_warning(sb, __func__,
1079 "multiple resizers run on filesystem!");
1080 mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1081 ext3_journal_stop(handle);
1082 err = -EBUSY;
1083 goto exit_put;
1084 }
1085
1086 if ((err = ext3_journal_get_write_access(handle,
1087 EXT3_SB(sb)->s_sbh))) {
1088 ext3_warning(sb, __func__,
1089 "error %d on journal write access", err);
1090 mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1091 ext3_journal_stop(handle);
1092 goto exit_put;
1093 }
1094 es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
1095 err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
1096 mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
1097 if (err) {
1098 ext3_warning(sb, __func__,
1099 "error %d on journal dirty metadata", err);
1100 ext3_journal_stop(handle);
1101 goto exit_put;
1102 }
1103 ext3_debug("freeing blocks "E3FSBLK" through "E3FSBLK"\n",
1104 o_blocks_count, o_blocks_count + add);
1105 ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
1106 ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n",
1107 o_blocks_count, o_blocks_count + add);
1108 if ((err = ext3_journal_stop(handle)))
1109 goto exit_put;
1110 if (test_opt(sb, DEBUG))
1111 printk(KERN_DEBUG "EXT3-fs: extended group to %u blocks\n",
1112 le32_to_cpu(es->s_blocks_count));
1113 update_backups(sb, EXT3_SB(sb)->s_sbh->b_blocknr, (char *)es,
1114 sizeof(struct ext3_super_block));
1115exit_put:
1116 return err;
1117} /* ext3_group_extend */