diff options
author | Tao Ma <tao.ma@oracle.com> | 2007-12-18 02:47:03 -0500 |
---|---|---|
committer | Mark Fasheh <mark.fasheh@oracle.com> | 2008-01-25 17:53:35 -0500 |
commit | d659072f736837e56b6433d58e5315ad1d4d5ccf (patch) | |
tree | 56882b7b36c6b60a8208f6ed5bee9904adc7b649 /fs/ocfs2/resize.c | |
parent | 7f68fc28219be3b44ef4132f95c6506ff3e806b5 (diff) |
[PATCH 1/2] ocfs2: Add group extend for online resize
This patch adds the ability for a userspace program to request an extend of
last cluster group on an Ocfs2 file system. The request is made via ioctl,
OCFS2_IOC_GROUP_EXTEND. This is derived from EXT3_IOC_GROUP_EXTEND, but is
obviously Ocfs2 specific.
tunefs.ocfs2 would call this for an online-resize operation if the last
cluster group isn't full.
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2/resize.c')
-rw-r--r-- | fs/ocfs2/resize.c | 398 |
1 files changed, 398 insertions, 0 deletions
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c new file mode 100644 index 000000000000..848f7293f4fc --- /dev/null +++ b/fs/ocfs2/resize.c | |||
@@ -0,0 +1,398 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * resize.c | ||
5 | * | ||
6 | * volume resize. | ||
7 | * Inspired by ext3/resize.c. | ||
8 | * | ||
9 | * Copyright (C) 2007 Oracle. All rights reserved. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or | ||
12 | * modify it under the terms of the GNU General Public | ||
13 | * License as published by the Free Software Foundation; either | ||
14 | * version 2 of the License, or (at your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
19 | * General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public | ||
22 | * License along with this program; if not, write to the | ||
23 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
24 | * Boston, MA 021110-1307, USA. | ||
25 | */ | ||
26 | |||
27 | #include <linux/fs.h> | ||
28 | #include <linux/types.h> | ||
29 | |||
30 | #define MLOG_MASK_PREFIX ML_DISK_ALLOC | ||
31 | #include <cluster/masklog.h> | ||
32 | |||
33 | #include "ocfs2.h" | ||
34 | |||
35 | #include "alloc.h" | ||
36 | #include "dlmglue.h" | ||
37 | #include "inode.h" | ||
38 | #include "journal.h" | ||
39 | #include "super.h" | ||
40 | #include "sysfile.h" | ||
41 | #include "uptodate.h" | ||
42 | |||
43 | #include "buffer_head_io.h" | ||
44 | #include "suballoc.h" | ||
45 | #include "resize.h" | ||
46 | |||
47 | /* | ||
48 | * Check whether there are new backup superblocks exist | ||
49 | * in the last group. If there are some, mark them or clear | ||
50 | * them in the bitmap. | ||
51 | * | ||
52 | * Return how many backups we find in the last group. | ||
53 | */ | ||
54 | static u16 ocfs2_calc_new_backup_super(struct inode *inode, | ||
55 | struct ocfs2_group_desc *gd, | ||
56 | int new_clusters, | ||
57 | u32 first_new_cluster, | ||
58 | u16 cl_cpg, | ||
59 | int set) | ||
60 | { | ||
61 | int i; | ||
62 | u16 backups = 0; | ||
63 | u32 cluster; | ||
64 | u64 blkno, gd_blkno, lgd_blkno = le64_to_cpu(gd->bg_blkno); | ||
65 | |||
66 | for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) { | ||
67 | blkno = ocfs2_backup_super_blkno(inode->i_sb, i); | ||
68 | cluster = ocfs2_blocks_to_clusters(inode->i_sb, blkno); | ||
69 | |||
70 | gd_blkno = ocfs2_which_cluster_group(inode, cluster); | ||
71 | if (gd_blkno < lgd_blkno) | ||
72 | continue; | ||
73 | else if (gd_blkno > lgd_blkno) | ||
74 | break; | ||
75 | |||
76 | if (set) | ||
77 | ocfs2_set_bit(cluster % cl_cpg, | ||
78 | (unsigned long *)gd->bg_bitmap); | ||
79 | else | ||
80 | ocfs2_clear_bit(cluster % cl_cpg, | ||
81 | (unsigned long *)gd->bg_bitmap); | ||
82 | backups++; | ||
83 | } | ||
84 | |||
85 | mlog_exit_void(); | ||
86 | return backups; | ||
87 | } | ||
88 | |||
89 | static int ocfs2_update_last_group_and_inode(handle_t *handle, | ||
90 | struct inode *bm_inode, | ||
91 | struct buffer_head *bm_bh, | ||
92 | struct buffer_head *group_bh, | ||
93 | u32 first_new_cluster, | ||
94 | int new_clusters) | ||
95 | { | ||
96 | int ret = 0; | ||
97 | struct ocfs2_super *osb = OCFS2_SB(bm_inode->i_sb); | ||
98 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bm_bh->b_data; | ||
99 | struct ocfs2_chain_list *cl = &fe->id2.i_chain; | ||
100 | struct ocfs2_chain_rec *cr; | ||
101 | struct ocfs2_group_desc *group; | ||
102 | u16 chain, num_bits, backups = 0; | ||
103 | u16 cl_bpc = le16_to_cpu(cl->cl_bpc); | ||
104 | u16 cl_cpg = le16_to_cpu(cl->cl_cpg); | ||
105 | |||
106 | mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n", | ||
107 | new_clusters, first_new_cluster); | ||
108 | |||
109 | ret = ocfs2_journal_access(handle, bm_inode, group_bh, | ||
110 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
111 | if (ret < 0) { | ||
112 | mlog_errno(ret); | ||
113 | goto out; | ||
114 | } | ||
115 | |||
116 | group = (struct ocfs2_group_desc *)group_bh->b_data; | ||
117 | |||
118 | /* update the group first. */ | ||
119 | num_bits = new_clusters * cl_bpc; | ||
120 | le16_add_cpu(&group->bg_bits, num_bits); | ||
121 | le16_add_cpu(&group->bg_free_bits_count, num_bits); | ||
122 | |||
123 | /* | ||
124 | * check whether there are some new backup superblocks exist in | ||
125 | * this group and update the group bitmap accordingly. | ||
126 | */ | ||
127 | if (OCFS2_HAS_COMPAT_FEATURE(osb->sb, | ||
128 | OCFS2_FEATURE_COMPAT_BACKUP_SB)) { | ||
129 | backups = ocfs2_calc_new_backup_super(bm_inode, | ||
130 | group, | ||
131 | new_clusters, | ||
132 | first_new_cluster, | ||
133 | cl_cpg, 1); | ||
134 | le16_add_cpu(&group->bg_free_bits_count, -1 * backups); | ||
135 | } | ||
136 | |||
137 | ret = ocfs2_journal_dirty(handle, group_bh); | ||
138 | if (ret < 0) { | ||
139 | mlog_errno(ret); | ||
140 | goto out_rollback; | ||
141 | } | ||
142 | |||
143 | /* update the inode accordingly. */ | ||
144 | ret = ocfs2_journal_access(handle, bm_inode, bm_bh, | ||
145 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
146 | if (ret < 0) { | ||
147 | mlog_errno(ret); | ||
148 | goto out_rollback; | ||
149 | } | ||
150 | |||
151 | chain = le16_to_cpu(group->bg_chain); | ||
152 | cr = (&cl->cl_recs[chain]); | ||
153 | le32_add_cpu(&cr->c_total, num_bits); | ||
154 | le32_add_cpu(&cr->c_free, num_bits); | ||
155 | le32_add_cpu(&fe->id1.bitmap1.i_total, num_bits); | ||
156 | le32_add_cpu(&fe->i_clusters, new_clusters); | ||
157 | |||
158 | if (backups) { | ||
159 | le32_add_cpu(&cr->c_free, -1 * backups); | ||
160 | le32_add_cpu(&fe->id1.bitmap1.i_used, backups); | ||
161 | } | ||
162 | |||
163 | spin_lock(&OCFS2_I(bm_inode)->ip_lock); | ||
164 | OCFS2_I(bm_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); | ||
165 | le64_add_cpu(&fe->i_size, new_clusters << osb->s_clustersize_bits); | ||
166 | spin_unlock(&OCFS2_I(bm_inode)->ip_lock); | ||
167 | i_size_write(bm_inode, le64_to_cpu(fe->i_size)); | ||
168 | |||
169 | ocfs2_journal_dirty(handle, bm_bh); | ||
170 | |||
171 | out_rollback: | ||
172 | if (ret < 0) { | ||
173 | ocfs2_calc_new_backup_super(bm_inode, | ||
174 | group, | ||
175 | new_clusters, | ||
176 | first_new_cluster, | ||
177 | cl_cpg, 0); | ||
178 | le16_add_cpu(&group->bg_free_bits_count, backups); | ||
179 | le16_add_cpu(&group->bg_bits, -1 * num_bits); | ||
180 | le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits); | ||
181 | } | ||
182 | out: | ||
183 | mlog_exit(ret); | ||
184 | return ret; | ||
185 | } | ||
186 | |||
187 | static int update_backups(struct inode * inode, u32 clusters, char *data) | ||
188 | { | ||
189 | int i, ret = 0; | ||
190 | u32 cluster; | ||
191 | u64 blkno; | ||
192 | struct buffer_head *backup = NULL; | ||
193 | struct ocfs2_dinode *backup_di = NULL; | ||
194 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
195 | |||
196 | /* calculate the real backups we need to update. */ | ||
197 | for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) { | ||
198 | blkno = ocfs2_backup_super_blkno(inode->i_sb, i); | ||
199 | cluster = ocfs2_blocks_to_clusters(inode->i_sb, blkno); | ||
200 | if (cluster > clusters) | ||
201 | break; | ||
202 | |||
203 | ret = ocfs2_read_block(osb, blkno, &backup, 0, NULL); | ||
204 | if (ret < 0) { | ||
205 | mlog_errno(ret); | ||
206 | break; | ||
207 | } | ||
208 | |||
209 | memcpy(backup->b_data, data, inode->i_sb->s_blocksize); | ||
210 | |||
211 | backup_di = (struct ocfs2_dinode *)backup->b_data; | ||
212 | backup_di->i_blkno = cpu_to_le64(blkno); | ||
213 | |||
214 | ret = ocfs2_write_super_or_backup(osb, backup); | ||
215 | brelse(backup); | ||
216 | backup = NULL; | ||
217 | if (ret < 0) { | ||
218 | mlog_errno(ret); | ||
219 | break; | ||
220 | } | ||
221 | } | ||
222 | |||
223 | return ret; | ||
224 | } | ||
225 | |||
226 | static void ocfs2_update_super_and_backups(struct inode *inode, | ||
227 | int new_clusters) | ||
228 | { | ||
229 | int ret; | ||
230 | u32 clusters = 0; | ||
231 | struct buffer_head *super_bh = NULL; | ||
232 | struct ocfs2_dinode *super_di = NULL; | ||
233 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
234 | |||
235 | /* | ||
236 | * update the superblock last. | ||
237 | * It doesn't matter if the write failed. | ||
238 | */ | ||
239 | ret = ocfs2_read_block(osb, OCFS2_SUPER_BLOCK_BLKNO, | ||
240 | &super_bh, 0, NULL); | ||
241 | if (ret < 0) { | ||
242 | mlog_errno(ret); | ||
243 | goto out; | ||
244 | } | ||
245 | |||
246 | super_di = (struct ocfs2_dinode *)super_bh->b_data; | ||
247 | le32_add_cpu(&super_di->i_clusters, new_clusters); | ||
248 | clusters = le32_to_cpu(super_di->i_clusters); | ||
249 | |||
250 | ret = ocfs2_write_super_or_backup(osb, super_bh); | ||
251 | if (ret < 0) { | ||
252 | mlog_errno(ret); | ||
253 | goto out; | ||
254 | } | ||
255 | |||
256 | if (OCFS2_HAS_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_COMPAT_BACKUP_SB)) | ||
257 | ret = update_backups(inode, clusters, super_bh->b_data); | ||
258 | |||
259 | out: | ||
260 | if (super_bh) | ||
261 | brelse(super_bh); | ||
262 | if (ret) | ||
263 | printk(KERN_WARNING "ocfs2: Failed to update super blocks on %s" | ||
264 | " during fs resize. This condition is not fatal," | ||
265 | " but fsck.ocfs2 should be run to fix it\n", | ||
266 | osb->dev_str); | ||
267 | return; | ||
268 | } | ||
269 | |||
270 | /* | ||
271 | * Extend the filesystem to the new number of clusters specified. This entry | ||
272 | * point is only used to extend the current filesystem to the end of the last | ||
273 | * existing group. | ||
274 | */ | ||
275 | int ocfs2_group_extend(struct inode * inode, int new_clusters) | ||
276 | { | ||
277 | int ret; | ||
278 | handle_t *handle; | ||
279 | struct buffer_head *main_bm_bh = NULL; | ||
280 | struct buffer_head *group_bh = NULL; | ||
281 | struct inode *main_bm_inode = NULL; | ||
282 | struct ocfs2_dinode *fe = NULL; | ||
283 | struct ocfs2_group_desc *group = NULL; | ||
284 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
285 | u16 cl_bpc; | ||
286 | u32 first_new_cluster; | ||
287 | u64 lgd_blkno; | ||
288 | |||
289 | mlog_entry_void(); | ||
290 | |||
291 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) | ||
292 | return -EROFS; | ||
293 | |||
294 | if (new_clusters < 0) | ||
295 | return -EINVAL; | ||
296 | else if (new_clusters == 0) | ||
297 | return 0; | ||
298 | |||
299 | main_bm_inode = ocfs2_get_system_file_inode(osb, | ||
300 | GLOBAL_BITMAP_SYSTEM_INODE, | ||
301 | OCFS2_INVALID_SLOT); | ||
302 | if (!main_bm_inode) { | ||
303 | ret = -EINVAL; | ||
304 | mlog_errno(ret); | ||
305 | goto out; | ||
306 | } | ||
307 | |||
308 | mutex_lock(&main_bm_inode->i_mutex); | ||
309 | |||
310 | ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); | ||
311 | if (ret < 0) { | ||
312 | mlog_errno(ret); | ||
313 | goto out_mutex; | ||
314 | } | ||
315 | |||
316 | fe = (struct ocfs2_dinode *)main_bm_bh->b_data; | ||
317 | |||
318 | if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != | ||
319 | ocfs2_group_bitmap_size(osb->sb) * 8) { | ||
320 | mlog(ML_ERROR, "The disk is too old and small. " | ||
321 | "Force to do offline resize."); | ||
322 | ret = -EINVAL; | ||
323 | goto out_unlock; | ||
324 | } | ||
325 | |||
326 | if (!OCFS2_IS_VALID_DINODE(fe)) { | ||
327 | OCFS2_RO_ON_INVALID_DINODE(main_bm_inode->i_sb, fe); | ||
328 | ret = -EIO; | ||
329 | goto out_unlock; | ||
330 | } | ||
331 | |||
332 | first_new_cluster = le32_to_cpu(fe->i_clusters); | ||
333 | lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, | ||
334 | first_new_cluster - 1); | ||
335 | |||
336 | ret = ocfs2_read_block(osb, lgd_blkno, &group_bh, OCFS2_BH_CACHED, | ||
337 | main_bm_inode); | ||
338 | if (ret < 0) { | ||
339 | mlog_errno(ret); | ||
340 | goto out_unlock; | ||
341 | } | ||
342 | |||
343 | group = (struct ocfs2_group_desc *)group_bh->b_data; | ||
344 | |||
345 | ret = ocfs2_check_group_descriptor(inode->i_sb, fe, group); | ||
346 | if (ret) { | ||
347 | mlog_errno(ret); | ||
348 | goto out_unlock; | ||
349 | } | ||
350 | |||
351 | cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc); | ||
352 | if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters > | ||
353 | le16_to_cpu(fe->id2.i_chain.cl_cpg)) { | ||
354 | ret = -EINVAL; | ||
355 | goto out_unlock; | ||
356 | } | ||
357 | |||
358 | mlog(0, "extend the last group at %llu, new clusters = %d\n", | ||
359 | le64_to_cpu(group->bg_blkno), new_clusters); | ||
360 | |||
361 | handle = ocfs2_start_trans(osb, OCFS2_GROUP_EXTEND_CREDITS); | ||
362 | if (IS_ERR(handle)) { | ||
363 | mlog_errno(PTR_ERR(handle)); | ||
364 | ret = -EINVAL; | ||
365 | goto out_unlock; | ||
366 | } | ||
367 | |||
368 | /* update the last group descriptor and inode. */ | ||
369 | ret = ocfs2_update_last_group_and_inode(handle, main_bm_inode, | ||
370 | main_bm_bh, group_bh, | ||
371 | first_new_cluster, | ||
372 | new_clusters); | ||
373 | if (ret) { | ||
374 | mlog_errno(ret); | ||
375 | goto out_commit; | ||
376 | } | ||
377 | |||
378 | ocfs2_update_super_and_backups(main_bm_inode, new_clusters); | ||
379 | |||
380 | out_commit: | ||
381 | ocfs2_commit_trans(osb, handle); | ||
382 | out_unlock: | ||
383 | if (group_bh) | ||
384 | brelse(group_bh); | ||
385 | |||
386 | if (main_bm_bh) | ||
387 | brelse(main_bm_bh); | ||
388 | |||
389 | ocfs2_inode_unlock(main_bm_inode, 1); | ||
390 | |||
391 | out_mutex: | ||
392 | mutex_unlock(&main_bm_inode->i_mutex); | ||
393 | iput(main_bm_inode); | ||
394 | |||
395 | out: | ||
396 | mlog_exit_void(); | ||
397 | return ret; | ||
398 | } | ||