diff options
author | Boaz Harrosh <bharrosh@panasas.com> | 2011-02-03 10:53:25 -0500 |
---|---|---|
committer | Boaz Harrosh <bharrosh@panasas.com> | 2011-03-15 09:02:51 -0400 |
commit | 1cea312ad49d9cb964179a784fedb1fcfe396283 (patch) | |
tree | 27c45af006b48b1a079698605ea9007398f652b5 /fs/exofs/super.c | |
parent | 9ed96484311b89360b80a4181d856cbdb21630fd (diff) |
exofs: Write sbi->s_nextid as part of the Create command
Before when creating a new inode, we'd set the sb->s_dirt flag,
and sometime later the system would write out s_nextid as part
of the sb_info. Also on inode sync we would force the sb sync
as well.
Define the s_nextid as a new partition attribute and set it
every time we create a new object.
At mount we read it from it's new place.
We now never set sb->s_dirt anywhere in exofs. write_super
is actually never called. The call to exofs_write_super from
exofs_put_super is also removed because the VFS always calls
->sync_fs before calling ->put_super twice.
To stay backward-and-forward compatible we also write the old
s_nextid in the super_block object at unmount, and support zero
length attribute on mount.
This also fixes a BUG where in layouts when group_width was not
a divisor of EXOFS_SUPER_ID (0x10000) the s_nextid was not read
from the device it was written to. Because of the sliding window
layout trick, and because the read was always done from the 0
device but the write was done via the raid engine that might slide
the device view. Now we read and write through the raid engine.
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Diffstat (limited to 'fs/exofs/super.c')
-rw-r--r-- | fs/exofs/super.c | 135 |
1 files changed, 119 insertions, 16 deletions
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 474989eeb7d6..5eb0851e5481 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c | |||
@@ -213,6 +213,101 @@ static void destroy_inodecache(void) | |||
213 | static const struct super_operations exofs_sops; | 213 | static const struct super_operations exofs_sops; |
214 | static const struct export_operations exofs_export_ops; | 214 | static const struct export_operations exofs_export_ops; |
215 | 215 | ||
216 | static const struct osd_attr g_attr_sb_stats = ATTR_DEF( | ||
217 | EXOFS_APAGE_SB_DATA, | ||
218 | EXOFS_ATTR_SB_STATS, | ||
219 | sizeof(struct exofs_sb_stats)); | ||
220 | |||
221 | static int __sbi_read_stats(struct exofs_sb_info *sbi) | ||
222 | { | ||
223 | struct osd_attr attrs[] = { | ||
224 | [0] = g_attr_sb_stats, | ||
225 | }; | ||
226 | struct exofs_io_state *ios; | ||
227 | int ret; | ||
228 | |||
229 | ret = exofs_get_io_state(&sbi->layout, &ios); | ||
230 | if (unlikely(ret)) { | ||
231 | EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); | ||
232 | return ret; | ||
233 | } | ||
234 | |||
235 | ios->cred = sbi->s_cred; | ||
236 | |||
237 | ios->in_attr = attrs; | ||
238 | ios->in_attr_len = ARRAY_SIZE(attrs); | ||
239 | |||
240 | ret = exofs_sbi_read(ios); | ||
241 | if (unlikely(ret)) { | ||
242 | EXOFS_ERR("Error reading super_block stats => %d\n", ret); | ||
243 | goto out; | ||
244 | } | ||
245 | |||
246 | ret = extract_attr_from_ios(ios, &attrs[0]); | ||
247 | if (ret) { | ||
248 | EXOFS_ERR("%s: extract_attr of sb_stats failed\n", __func__); | ||
249 | goto out; | ||
250 | } | ||
251 | if (attrs[0].len) { | ||
252 | struct exofs_sb_stats *ess; | ||
253 | |||
254 | if (unlikely(attrs[0].len != sizeof(*ess))) { | ||
255 | EXOFS_ERR("%s: Wrong version of exofs_sb_stats " | ||
256 | "size(%d) != expected(%zd)\n", | ||
257 | __func__, attrs[0].len, sizeof(*ess)); | ||
258 | goto out; | ||
259 | } | ||
260 | |||
261 | ess = attrs[0].val_ptr; | ||
262 | sbi->s_nextid = le64_to_cpu(ess->s_nextid); | ||
263 | sbi->s_numfiles = le32_to_cpu(ess->s_numfiles); | ||
264 | } | ||
265 | |||
266 | out: | ||
267 | exofs_put_io_state(ios); | ||
268 | return ret; | ||
269 | } | ||
270 | |||
271 | static void stats_done(struct exofs_io_state *ios, void *p) | ||
272 | { | ||
273 | exofs_put_io_state(ios); | ||
274 | /* Good thanks nothing to do anymore */ | ||
275 | } | ||
276 | |||
277 | /* Asynchronously write the stats attribute */ | ||
278 | int exofs_sbi_write_stats(struct exofs_sb_info *sbi) | ||
279 | { | ||
280 | struct osd_attr attrs[] = { | ||
281 | [0] = g_attr_sb_stats, | ||
282 | }; | ||
283 | struct exofs_io_state *ios; | ||
284 | int ret; | ||
285 | |||
286 | ret = exofs_get_io_state(&sbi->layout, &ios); | ||
287 | if (unlikely(ret)) { | ||
288 | EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); | ||
289 | return ret; | ||
290 | } | ||
291 | |||
292 | sbi->s_ess.s_nextid = cpu_to_le64(sbi->s_nextid); | ||
293 | sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles); | ||
294 | attrs[0].val_ptr = &sbi->s_ess; | ||
295 | |||
296 | ios->cred = sbi->s_cred; | ||
297 | ios->done = stats_done; | ||
298 | ios->private = sbi; | ||
299 | ios->out_attr = attrs; | ||
300 | ios->out_attr_len = ARRAY_SIZE(attrs); | ||
301 | |||
302 | ret = exofs_sbi_write(ios); | ||
303 | if (unlikely(ret)) { | ||
304 | EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); | ||
305 | exofs_put_io_state(ios); | ||
306 | } | ||
307 | |||
308 | return ret; | ||
309 | } | ||
310 | |||
216 | /* | 311 | /* |
217 | * Write the superblock to the OSD | 312 | * Write the superblock to the OSD |
218 | */ | 313 | */ |
@@ -223,18 +318,25 @@ int exofs_sync_fs(struct super_block *sb, int wait) | |||
223 | struct exofs_io_state *ios; | 318 | struct exofs_io_state *ios; |
224 | int ret = -ENOMEM; | 319 | int ret = -ENOMEM; |
225 | 320 | ||
226 | lock_super(sb); | 321 | fscb = kmalloc(sizeof(*fscb), GFP_KERNEL); |
322 | if (unlikely(!fscb)) | ||
323 | return -ENOMEM; | ||
324 | |||
227 | sbi = sb->s_fs_info; | 325 | sbi = sb->s_fs_info; |
228 | fscb = &sbi->s_fscb; | ||
229 | 326 | ||
327 | /* NOTE: We no longer dirty the super_block anywhere in exofs. The | ||
328 | * reason we write the fscb here on unmount is so we can stay backwards | ||
329 | * compatible with fscb->s_version == 1. (What we are not compatible | ||
330 | * with is if a new version FS crashed and then we try to mount an old | ||
331 | * version). Otherwise the exofs_fscb is read-only from mkfs time. All | ||
332 | * the writeable info is set in exofs_sbi_write_stats() above. | ||
333 | */ | ||
230 | ret = exofs_get_io_state(&sbi->layout, &ios); | 334 | ret = exofs_get_io_state(&sbi->layout, &ios); |
231 | if (ret) | 335 | if (unlikely(ret)) |
232 | goto out; | 336 | goto out; |
233 | 337 | ||
234 | /* Note: We only write the changing part of the fscb. .i.e upto the | 338 | lock_super(sb); |
235 | * the fscb->s_dev_table_oid member. There is no read-modify-write | 339 | |
236 | * here. | ||
237 | */ | ||
238 | ios->length = offsetof(struct exofs_fscb, s_dev_table_oid); | 340 | ios->length = offsetof(struct exofs_fscb, s_dev_table_oid); |
239 | memset(fscb, 0, ios->length); | 341 | memset(fscb, 0, ios->length); |
240 | fscb->s_nextid = cpu_to_le64(sbi->s_nextid); | 342 | fscb->s_nextid = cpu_to_le64(sbi->s_nextid); |
@@ -249,16 +351,17 @@ int exofs_sync_fs(struct super_block *sb, int wait) | |||
249 | ios->cred = sbi->s_cred; | 351 | ios->cred = sbi->s_cred; |
250 | 352 | ||
251 | ret = exofs_sbi_write(ios); | 353 | ret = exofs_sbi_write(ios); |
252 | if (unlikely(ret)) { | 354 | if (unlikely(ret)) |
253 | EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); | 355 | EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); |
254 | goto out; | 356 | else |
255 | } | 357 | sb->s_dirt = 0; |
256 | sb->s_dirt = 0; | ||
257 | 358 | ||
359 | |||
360 | unlock_super(sb); | ||
258 | out: | 361 | out: |
259 | EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); | 362 | EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); |
260 | exofs_put_io_state(ios); | 363 | exofs_put_io_state(ios); |
261 | unlock_super(sb); | 364 | kfree(fscb); |
262 | return ret; | 365 | return ret; |
263 | } | 366 | } |
264 | 367 | ||
@@ -302,9 +405,6 @@ static void exofs_put_super(struct super_block *sb) | |||
302 | int num_pend; | 405 | int num_pend; |
303 | struct exofs_sb_info *sbi = sb->s_fs_info; | 406 | struct exofs_sb_info *sbi = sb->s_fs_info; |
304 | 407 | ||
305 | if (sb->s_dirt) | ||
306 | exofs_write_super(sb); | ||
307 | |||
308 | /* make sure there are no pending commands */ | 408 | /* make sure there are no pending commands */ |
309 | for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0; | 409 | for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0; |
310 | num_pend = atomic_read(&sbi->s_curr_pending)) { | 410 | num_pend = atomic_read(&sbi->s_curr_pending)) { |
@@ -629,6 +729,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
629 | goto free_sbi; | 729 | goto free_sbi; |
630 | 730 | ||
631 | sb->s_magic = le16_to_cpu(fscb.s_magic); | 731 | sb->s_magic = le16_to_cpu(fscb.s_magic); |
732 | /* NOTE: we read below to be backward compatible with old versions */ | ||
632 | sbi->s_nextid = le64_to_cpu(fscb.s_nextid); | 733 | sbi->s_nextid = le64_to_cpu(fscb.s_nextid); |
633 | sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles); | 734 | sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles); |
634 | 735 | ||
@@ -639,7 +740,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
639 | ret = -EINVAL; | 740 | ret = -EINVAL; |
640 | goto free_sbi; | 741 | goto free_sbi; |
641 | } | 742 | } |
642 | if (le32_to_cpu(fscb.s_version) != EXOFS_FSCB_VER) { | 743 | if (le32_to_cpu(fscb.s_version) > EXOFS_FSCB_VER) { |
643 | EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n", | 744 | EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n", |
644 | EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version)); | 745 | EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version)); |
645 | ret = -EINVAL; | 746 | ret = -EINVAL; |
@@ -657,6 +758,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) | |||
657 | goto free_sbi; | 758 | goto free_sbi; |
658 | } | 759 | } |
659 | 760 | ||
761 | __sbi_read_stats(sbi); | ||
762 | |||
660 | /* set up operation vectors */ | 763 | /* set up operation vectors */ |
661 | sbi->bdi.ra_pages = __ra_pages(&sbi->layout); | 764 | sbi->bdi.ra_pages = __ra_pages(&sbi->layout); |
662 | sb->s_bdi = &sbi->bdi; | 765 | sb->s_bdi = &sbi->bdi; |