aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext3/super.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-03 15:28:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-03 15:28:30 -0400
commite31fb9e00543e5d3c5b686747d3c862bc09b59f3 (patch)
tree4300b111471a858b542d55d47d587fb8ef52513a /fs/ext3/super.c
parent824b005c86f91fe02eb2743a4526361f11786f70 (diff)
parent9181f8bf5abf4b9d59b12e878895375b84fe32ba (diff)
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
Pull ext3 removal, quota & udf fixes from Jan Kara: "The biggest change in the pull is the removal of ext3 filesystem driver (~28k lines removed). Ext4 driver is a full featured replacement these days and both RH and SUSE use it for several years without issues. Also there are some workarounds in VM & block layer mainly for ext3 which we could eventually get rid of. Other larger change is addition of proper error handling for dquot_initialize(). The rest is small fixes and cleanups" [ I wasn't convinced about the ext3 removal and worried about things falling through the cracks for legacy users, but ext4 maintainers piped up and were all unanimously in favor of removal, and maintaining all legacy ext3 support inside ext4. - Linus ] * 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs: udf: Don't modify filesystem for read-only mounts quota: remove an unneeded condition ext4: memory leak on error in ext4_symlink() mm/Kconfig: NEED_BOUNCE_POOL: clean-up condition ext4: Improve ext4 Kconfig test block: Remove forced page bouncing under IO fs: Remove ext3 filesystem driver doc: Update doc about journalling layer jfs: Handle error from dquot_initialize() reiserfs: Handle error from dquot_initialize() ocfs2: Handle error from dquot_initialize() ext4: Handle error from dquot_initialize() ext2: Handle error from dquot_initalize() quota: Propagate error from ->acquire_dquot()
Diffstat (limited to 'fs/ext3/super.c')
-rw-r--r--fs/ext3/super.c3165
1 files changed, 0 insertions, 3165 deletions
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
deleted file mode 100644
index 5ed0044fbb37..000000000000
--- a/fs/ext3/super.c
+++ /dev/null
@@ -1,3165 +0,0 @@
1/*
2 * linux/fs/ext3/super.c
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/fs/minix/inode.c
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 *
15 * Big-endian to little-endian byte-swapping/bitmaps by
16 * David S. Miller (davem@caip.rutgers.edu), 1995
17 */
18
19#include <linux/module.h>
20#include <linux/blkdev.h>
21#include <linux/parser.h>
22#include <linux/exportfs.h>
23#include <linux/statfs.h>
24#include <linux/random.h>
25#include <linux/mount.h>
26#include <linux/quotaops.h>
27#include <linux/seq_file.h>
28#include <linux/log2.h>
29#include <linux/cleancache.h>
30#include <linux/namei.h>
31
32#include <asm/uaccess.h>
33
34#define CREATE_TRACE_POINTS
35
36#include "ext3.h"
37#include "xattr.h"
38#include "acl.h"
39#include "namei.h"
40
41#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
42 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
43#else
44 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_WRITEBACK_DATA
45#endif
46
47static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
48 unsigned long journal_devnum);
49static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
50 unsigned int);
51static int ext3_commit_super(struct super_block *sb,
52 struct ext3_super_block *es,
53 int sync);
54static void ext3_mark_recovery_complete(struct super_block * sb,
55 struct ext3_super_block * es);
56static void ext3_clear_journal_err(struct super_block * sb,
57 struct ext3_super_block * es);
58static int ext3_sync_fs(struct super_block *sb, int wait);
59static const char *ext3_decode_error(struct super_block * sb, int errno,
60 char nbuf[16]);
61static int ext3_remount (struct super_block * sb, int * flags, char * data);
62static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf);
63static int ext3_unfreeze(struct super_block *sb);
64static int ext3_freeze(struct super_block *sb);
65
66/*
67 * Wrappers for journal_start/end.
68 */
69handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
70{
71 journal_t *journal;
72
73 if (sb->s_flags & MS_RDONLY)
74 return ERR_PTR(-EROFS);
75
76 /* Special case here: if the journal has aborted behind our
77 * backs (eg. EIO in the commit thread), then we still need to
78 * take the FS itself readonly cleanly. */
79 journal = EXT3_SB(sb)->s_journal;
80 if (is_journal_aborted(journal)) {
81 ext3_abort(sb, __func__,
82 "Detected aborted journal");
83 return ERR_PTR(-EROFS);
84 }
85
86 return journal_start(journal, nblocks);
87}
88
89int __ext3_journal_stop(const char *where, handle_t *handle)
90{
91 struct super_block *sb;
92 int err;
93 int rc;
94
95 sb = handle->h_transaction->t_journal->j_private;
96 err = handle->h_err;
97 rc = journal_stop(handle);
98
99 if (!err)
100 err = rc;
101 if (err)
102 __ext3_std_error(sb, where, err);
103 return err;
104}
105
106void ext3_journal_abort_handle(const char *caller, const char *err_fn,
107 struct buffer_head *bh, handle_t *handle, int err)
108{
109 char nbuf[16];
110 const char *errstr = ext3_decode_error(NULL, err, nbuf);
111
112 if (bh)
113 BUFFER_TRACE(bh, "abort");
114
115 if (!handle->h_err)
116 handle->h_err = err;
117
118 if (is_handle_aborted(handle))
119 return;
120
121 printk(KERN_ERR "EXT3-fs: %s: aborting transaction: %s in %s\n",
122 caller, errstr, err_fn);
123
124 journal_abort_handle(handle);
125}
126
127void ext3_msg(struct super_block *sb, const char *prefix,
128 const char *fmt, ...)
129{
130 struct va_format vaf;
131 va_list args;
132
133 va_start(args, fmt);
134
135 vaf.fmt = fmt;
136 vaf.va = &args;
137
138 printk("%sEXT3-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
139
140 va_end(args);
141}
142
143/* Deal with the reporting of failure conditions on a filesystem such as
144 * inconsistencies detected or read IO failures.
145 *
146 * On ext2, we can store the error state of the filesystem in the
147 * superblock. That is not possible on ext3, because we may have other
148 * write ordering constraints on the superblock which prevent us from
149 * writing it out straight away; and given that the journal is about to
150 * be aborted, we can't rely on the current, or future, transactions to
151 * write out the superblock safely.
152 *
153 * We'll just use the journal_abort() error code to record an error in
154 * the journal instead. On recovery, the journal will complain about
155 * that error until we've noted it down and cleared it.
156 */
157
158static void ext3_handle_error(struct super_block *sb)
159{
160 struct ext3_super_block *es = EXT3_SB(sb)->s_es;
161
162 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
163 es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
164
165 if (sb->s_flags & MS_RDONLY)
166 return;
167
168 if (!test_opt (sb, ERRORS_CONT)) {
169 journal_t *journal = EXT3_SB(sb)->s_journal;
170
171 set_opt(EXT3_SB(sb)->s_mount_opt, ABORT);
172 if (journal)
173 journal_abort(journal, -EIO);
174 }
175 if (test_opt (sb, ERRORS_RO)) {
176 ext3_msg(sb, KERN_CRIT,
177 "error: remounting filesystem read-only");
178 /*
179 * Make sure updated value of ->s_mount_state will be visible
180 * before ->s_flags update.
181 */
182 smp_wmb();
183 sb->s_flags |= MS_RDONLY;
184 }
185 ext3_commit_super(sb, es, 1);
186 if (test_opt(sb, ERRORS_PANIC))
187 panic("EXT3-fs (%s): panic forced after error\n",
188 sb->s_id);
189}
190
191void ext3_error(struct super_block *sb, const char *function,
192 const char *fmt, ...)
193{
194 struct va_format vaf;
195 va_list args;
196
197 va_start(args, fmt);
198
199 vaf.fmt = fmt;
200 vaf.va = &args;
201
202 printk(KERN_CRIT "EXT3-fs error (device %s): %s: %pV\n",
203 sb->s_id, function, &vaf);
204
205 va_end(args);
206
207 ext3_handle_error(sb);
208}
209
210static const char *ext3_decode_error(struct super_block * sb, int errno,
211 char nbuf[16])
212{
213 char *errstr = NULL;
214
215 switch (errno) {
216 case -EIO:
217 errstr = "IO failure";
218 break;
219 case -ENOMEM:
220 errstr = "Out of memory";
221 break;
222 case -EROFS:
223 if (!sb || EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT)
224 errstr = "Journal has aborted";
225 else
226 errstr = "Readonly filesystem";
227 break;
228 default:
229 /* If the caller passed in an extra buffer for unknown
230 * errors, textualise them now. Else we just return
231 * NULL. */
232 if (nbuf) {
233 /* Check for truncated error codes... */
234 if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
235 errstr = nbuf;
236 }
237 break;
238 }
239
240 return errstr;
241}
242
243/* __ext3_std_error decodes expected errors from journaling functions
244 * automatically and invokes the appropriate error response. */
245
246void __ext3_std_error (struct super_block * sb, const char * function,
247 int errno)
248{
249 char nbuf[16];
250 const char *errstr;
251
252 /* Special case: if the error is EROFS, and we're not already
253 * inside a transaction, then there's really no point in logging
254 * an error. */
255 if (errno == -EROFS && journal_current_handle() == NULL &&
256 (sb->s_flags & MS_RDONLY))
257 return;
258
259 errstr = ext3_decode_error(sb, errno, nbuf);
260 ext3_msg(sb, KERN_CRIT, "error in %s: %s", function, errstr);
261
262 ext3_handle_error(sb);
263}
264
265/*
266 * ext3_abort is a much stronger failure handler than ext3_error. The
267 * abort function may be used to deal with unrecoverable failures such
268 * as journal IO errors or ENOMEM at a critical moment in log management.
269 *
270 * We unconditionally force the filesystem into an ABORT|READONLY state,
271 * unless the error response on the fs has been set to panic in which
272 * case we take the easy way out and panic immediately.
273 */
274
275void ext3_abort(struct super_block *sb, const char *function,
276 const char *fmt, ...)
277{
278 struct va_format vaf;
279 va_list args;
280
281 va_start(args, fmt);
282
283 vaf.fmt = fmt;
284 vaf.va = &args;
285
286 printk(KERN_CRIT "EXT3-fs (%s): error: %s: %pV\n",
287 sb->s_id, function, &vaf);
288
289 va_end(args);
290
291 if (test_opt(sb, ERRORS_PANIC))
292 panic("EXT3-fs: panic from previous error\n");
293
294 if (sb->s_flags & MS_RDONLY)
295 return;
296
297 ext3_msg(sb, KERN_CRIT,
298 "error: remounting filesystem read-only");
299 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
300 set_opt(EXT3_SB(sb)->s_mount_opt, ABORT);
301 /*
302 * Make sure updated value of ->s_mount_state will be visible
303 * before ->s_flags update.
304 */
305 smp_wmb();
306 sb->s_flags |= MS_RDONLY;
307
308 if (EXT3_SB(sb)->s_journal)
309 journal_abort(EXT3_SB(sb)->s_journal, -EIO);
310}
311
312void ext3_warning(struct super_block *sb, const char *function,
313 const char *fmt, ...)
314{
315 struct va_format vaf;
316 va_list args;
317
318 va_start(args, fmt);
319
320 vaf.fmt = fmt;
321 vaf.va = &args;
322
323 printk(KERN_WARNING "EXT3-fs (%s): warning: %s: %pV\n",
324 sb->s_id, function, &vaf);
325
326 va_end(args);
327}
328
329void ext3_update_dynamic_rev(struct super_block *sb)
330{
331 struct ext3_super_block *es = EXT3_SB(sb)->s_es;
332
333 if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
334 return;
335
336 ext3_msg(sb, KERN_WARNING,
337 "warning: updating to rev %d because of "
338 "new feature flag, running e2fsck is recommended",
339 EXT3_DYNAMIC_REV);
340
341 es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO);
342 es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE);
343 es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV);
344 /* leave es->s_feature_*compat flags alone */
345 /* es->s_uuid will be set by e2fsck if empty */
346
347 /*
348 * The rest of the superblock fields should be zero, and if not it
349 * means they are likely already in use, so leave them alone. We
350 * can leave it up to e2fsck to clean up any inconsistencies there.
351 */
352}
353
354/*
355 * Open the external journal device
356 */
357static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb)
358{
359 struct block_device *bdev;
360 char b[BDEVNAME_SIZE];
361
362 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
363 if (IS_ERR(bdev))
364 goto fail;
365 return bdev;
366
367fail:
368 ext3_msg(sb, KERN_ERR, "error: failed to open journal device %s: %ld",
369 __bdevname(dev, b), PTR_ERR(bdev));
370
371 return NULL;
372}
373
374/*
375 * Release the journal device
376 */
377static void ext3_blkdev_put(struct block_device *bdev)
378{
379 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
380}
381
382static void ext3_blkdev_remove(struct ext3_sb_info *sbi)
383{
384 struct block_device *bdev;
385 bdev = sbi->journal_bdev;
386 if (bdev) {
387 ext3_blkdev_put(bdev);
388 sbi->journal_bdev = NULL;
389 }
390}
391
392static inline struct inode *orphan_list_entry(struct list_head *l)
393{
394 return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode;
395}
396
397static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
398{
399 struct list_head *l;
400
401 ext3_msg(sb, KERN_ERR, "error: sb orphan head is %d",
402 le32_to_cpu(sbi->s_es->s_last_orphan));
403
404 ext3_msg(sb, KERN_ERR, "sb_info orphan list:");
405 list_for_each(l, &sbi->s_orphan) {
406 struct inode *inode = orphan_list_entry(l);
407 ext3_msg(sb, KERN_ERR, " "
408 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
409 inode->i_sb->s_id, inode->i_ino, inode,
410 inode->i_mode, inode->i_nlink,
411 NEXT_ORPHAN(inode));
412 }
413}
414
415static void ext3_put_super (struct super_block * sb)
416{
417 struct ext3_sb_info *sbi = EXT3_SB(sb);
418 struct ext3_super_block *es = sbi->s_es;
419 int i, err;
420
421 dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
422 ext3_xattr_put_super(sb);
423 err = journal_destroy(sbi->s_journal);
424 sbi->s_journal = NULL;
425 if (err < 0)
426 ext3_abort(sb, __func__, "Couldn't clean up the journal");
427
428 if (!(sb->s_flags & MS_RDONLY)) {
429 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
430 es->s_state = cpu_to_le16(sbi->s_mount_state);
431 BUFFER_TRACE(sbi->s_sbh, "marking dirty");
432 mark_buffer_dirty(sbi->s_sbh);
433 ext3_commit_super(sb, es, 1);
434 }
435
436 for (i = 0; i < sbi->s_gdb_count; i++)
437 brelse(sbi->s_group_desc[i]);
438 kfree(sbi->s_group_desc);
439 percpu_counter_destroy(&sbi->s_freeblocks_counter);
440 percpu_counter_destroy(&sbi->s_freeinodes_counter);
441 percpu_counter_destroy(&sbi->s_dirs_counter);
442 brelse(sbi->s_sbh);
443#ifdef CONFIG_QUOTA
444 for (i = 0; i < EXT3_MAXQUOTAS; i++)
445 kfree(sbi->s_qf_names[i]);
446#endif
447
448 /* Debugging code just in case the in-memory inode orphan list
449 * isn't empty. The on-disk one can be non-empty if we've
450 * detected an error and taken the fs readonly, but the
451 * in-memory list had better be clean by this point. */
452 if (!list_empty(&sbi->s_orphan))
453 dump_orphan_list(sb, sbi);
454 J_ASSERT(list_empty(&sbi->s_orphan));
455
456 invalidate_bdev(sb->s_bdev);
457 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
458 /*
459 * Invalidate the journal device's buffers. We don't want them
460 * floating about in memory - the physical journal device may
461 * hotswapped, and it breaks the `ro-after' testing code.
462 */
463 sync_blockdev(sbi->journal_bdev);
464 invalidate_bdev(sbi->journal_bdev);
465 ext3_blkdev_remove(sbi);
466 }
467 sb->s_fs_info = NULL;
468 kfree(sbi->s_blockgroup_lock);
469 mutex_destroy(&sbi->s_orphan_lock);
470 mutex_destroy(&sbi->s_resize_lock);
471 kfree(sbi);
472}
473
474static struct kmem_cache *ext3_inode_cachep;
475
476/*
477 * Called inside transaction, so use GFP_NOFS
478 */
479static struct inode *ext3_alloc_inode(struct super_block *sb)
480{
481 struct ext3_inode_info *ei;
482
483 ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS);
484 if (!ei)
485 return NULL;
486 ei->i_block_alloc_info = NULL;
487 ei->vfs_inode.i_version = 1;
488 atomic_set(&ei->i_datasync_tid, 0);
489 atomic_set(&ei->i_sync_tid, 0);
490#ifdef CONFIG_QUOTA
491 memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
492#endif
493
494 return &ei->vfs_inode;
495}
496
497static int ext3_drop_inode(struct inode *inode)
498{
499 int drop = generic_drop_inode(inode);
500
501 trace_ext3_drop_inode(inode, drop);
502 return drop;
503}
504
505static void ext3_i_callback(struct rcu_head *head)
506{
507 struct inode *inode = container_of(head, struct inode, i_rcu);
508 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
509}
510
511static void ext3_destroy_inode(struct inode *inode)
512{
513 if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
514 printk("EXT3 Inode %p: orphan list check failed!\n",
515 EXT3_I(inode));
516 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
517 EXT3_I(inode), sizeof(struct ext3_inode_info),
518 false);
519 dump_stack();
520 }
521 call_rcu(&inode->i_rcu, ext3_i_callback);
522}
523
524static void init_once(void *foo)
525{
526 struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
527
528 INIT_LIST_HEAD(&ei->i_orphan);
529#ifdef CONFIG_EXT3_FS_XATTR
530 init_rwsem(&ei->xattr_sem);
531#endif
532 mutex_init(&ei->truncate_mutex);
533 inode_init_once(&ei->vfs_inode);
534}
535
536static int __init init_inodecache(void)
537{
538 ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
539 sizeof(struct ext3_inode_info),
540 0, (SLAB_RECLAIM_ACCOUNT|
541 SLAB_MEM_SPREAD),
542 init_once);
543 if (ext3_inode_cachep == NULL)
544 return -ENOMEM;
545 return 0;
546}
547
548static void destroy_inodecache(void)
549{
550 /*
551 * Make sure all delayed rcu free inodes are flushed before we
552 * destroy cache.
553 */
554 rcu_barrier();
555 kmem_cache_destroy(ext3_inode_cachep);
556}
557
558static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb)
559{
560#if defined(CONFIG_QUOTA)
561 struct ext3_sb_info *sbi = EXT3_SB(sb);
562
563 if (sbi->s_jquota_fmt) {
564 char *fmtname = "";
565
566 switch (sbi->s_jquota_fmt) {
567 case QFMT_VFS_OLD:
568 fmtname = "vfsold";
569 break;
570 case QFMT_VFS_V0:
571 fmtname = "vfsv0";
572 break;
573 case QFMT_VFS_V1:
574 fmtname = "vfsv1";
575 break;
576 }
577 seq_printf(seq, ",jqfmt=%s", fmtname);
578 }
579
580 if (sbi->s_qf_names[USRQUOTA])
581 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
582
583 if (sbi->s_qf_names[GRPQUOTA])
584 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
585
586 if (test_opt(sb, USRQUOTA))
587 seq_puts(seq, ",usrquota");
588
589 if (test_opt(sb, GRPQUOTA))
590 seq_puts(seq, ",grpquota");
591#endif
592}
593
594static char *data_mode_string(unsigned long mode)
595{
596 switch (mode) {
597 case EXT3_MOUNT_JOURNAL_DATA:
598 return "journal";
599 case EXT3_MOUNT_ORDERED_DATA:
600 return "ordered";
601 case EXT3_MOUNT_WRITEBACK_DATA:
602 return "writeback";
603 }
604 return "unknown";
605}
606
607/*
608 * Show an option if
609 * - it's set to a non-default value OR
610 * - if the per-sb default is different from the global default
611 */
612static int ext3_show_options(struct seq_file *seq, struct dentry *root)
613{
614 struct super_block *sb = root->d_sb;
615 struct ext3_sb_info *sbi = EXT3_SB(sb);
616 struct ext3_super_block *es = sbi->s_es;
617 unsigned long def_mount_opts;
618
619 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
620
621 if (sbi->s_sb_block != 1)
622 seq_printf(seq, ",sb=%lu", sbi->s_sb_block);
623 if (test_opt(sb, MINIX_DF))
624 seq_puts(seq, ",minixdf");
625 if (test_opt(sb, GRPID))
626 seq_puts(seq, ",grpid");
627 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT3_DEFM_BSDGROUPS))
628 seq_puts(seq, ",nogrpid");
629 if (!uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT3_DEF_RESUID)) ||
630 le16_to_cpu(es->s_def_resuid) != EXT3_DEF_RESUID) {
631 seq_printf(seq, ",resuid=%u",
632 from_kuid_munged(&init_user_ns, sbi->s_resuid));
633 }
634 if (!gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT3_DEF_RESGID)) ||
635 le16_to_cpu(es->s_def_resgid) != EXT3_DEF_RESGID) {
636 seq_printf(seq, ",resgid=%u",
637 from_kgid_munged(&init_user_ns, sbi->s_resgid));
638 }
639 if (test_opt(sb, ERRORS_RO)) {
640 int def_errors = le16_to_cpu(es->s_errors);
641
642 if (def_errors == EXT3_ERRORS_PANIC ||
643 def_errors == EXT3_ERRORS_CONTINUE) {
644 seq_puts(seq, ",errors=remount-ro");
645 }
646 }
647 if (test_opt(sb, ERRORS_CONT))
648 seq_puts(seq, ",errors=continue");
649 if (test_opt(sb, ERRORS_PANIC))
650 seq_puts(seq, ",errors=panic");
651 if (test_opt(sb, NO_UID32))
652 seq_puts(seq, ",nouid32");
653 if (test_opt(sb, DEBUG))
654 seq_puts(seq, ",debug");
655#ifdef CONFIG_EXT3_FS_XATTR
656 if (test_opt(sb, XATTR_USER))
657 seq_puts(seq, ",user_xattr");
658 if (!test_opt(sb, XATTR_USER) &&
659 (def_mount_opts & EXT3_DEFM_XATTR_USER)) {
660 seq_puts(seq, ",nouser_xattr");
661 }
662#endif
663#ifdef CONFIG_EXT3_FS_POSIX_ACL
664 if (test_opt(sb, POSIX_ACL))
665 seq_puts(seq, ",acl");
666 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT3_DEFM_ACL))
667 seq_puts(seq, ",noacl");
668#endif
669 if (!test_opt(sb, RESERVATION))
670 seq_puts(seq, ",noreservation");
671 if (sbi->s_commit_interval) {
672 seq_printf(seq, ",commit=%u",
673 (unsigned) (sbi->s_commit_interval / HZ));
674 }
675
676 /*
677 * Always display barrier state so it's clear what the status is.
678 */
679 seq_puts(seq, ",barrier=");
680 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
681 seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS)));
682 if (test_opt(sb, DATA_ERR_ABORT))
683 seq_puts(seq, ",data_err=abort");
684
685 if (test_opt(sb, NOLOAD))
686 seq_puts(seq, ",norecovery");
687
688 ext3_show_quota_options(seq, sb);
689
690 return 0;
691}
692
693
694static struct inode *ext3_nfs_get_inode(struct super_block *sb,
695 u64 ino, u32 generation)
696{
697 struct inode *inode;
698
699 if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO)
700 return ERR_PTR(-ESTALE);
701 if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count))
702 return ERR_PTR(-ESTALE);
703
704 /* iget isn't really right if the inode is currently unallocated!!
705 *
706 * ext3_read_inode will return a bad_inode if the inode had been
707 * deleted, so we should be safe.
708 *
709 * Currently we don't know the generation for parent directory, so
710 * a generation of 0 means "accept any"
711 */
712 inode = ext3_iget(sb, ino);
713 if (IS_ERR(inode))
714 return ERR_CAST(inode);
715 if (generation && inode->i_generation != generation) {
716 iput(inode);
717 return ERR_PTR(-ESTALE);
718 }
719
720 return inode;
721}
722
723static struct dentry *ext3_fh_to_dentry(struct super_block *sb, struct fid *fid,
724 int fh_len, int fh_type)
725{
726 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
727 ext3_nfs_get_inode);
728}
729
730static struct dentry *ext3_fh_to_parent(struct super_block *sb, struct fid *fid,
731 int fh_len, int fh_type)
732{
733 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
734 ext3_nfs_get_inode);
735}
736
737/*
738 * Try to release metadata pages (indirect blocks, directories) which are
739 * mapped via the block device. Since these pages could have journal heads
740 * which would prevent try_to_free_buffers() from freeing them, we must use
741 * jbd layer's try_to_free_buffers() function to release them.
742 */
743static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
744 gfp_t wait)
745{
746 journal_t *journal = EXT3_SB(sb)->s_journal;
747
748 WARN_ON(PageChecked(page));
749 if (!page_has_buffers(page))
750 return 0;
751 if (journal)
752 return journal_try_to_free_buffers(journal, page,
753 wait & ~__GFP_WAIT);
754 return try_to_free_buffers(page);
755}
756
757#ifdef CONFIG_QUOTA
758#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
759#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
760
761static int ext3_write_dquot(struct dquot *dquot);
762static int ext3_acquire_dquot(struct dquot *dquot);
763static int ext3_release_dquot(struct dquot *dquot);
764static int ext3_mark_dquot_dirty(struct dquot *dquot);
765static int ext3_write_info(struct super_block *sb, int type);
766static int ext3_quota_on(struct super_block *sb, int type, int format_id,
767 struct path *path);
768static int ext3_quota_on_mount(struct super_block *sb, int type);
769static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
770 size_t len, loff_t off);
771static ssize_t ext3_quota_write(struct super_block *sb, int type,
772 const char *data, size_t len, loff_t off);
773static struct dquot **ext3_get_dquots(struct inode *inode)
774{
775 return EXT3_I(inode)->i_dquot;
776}
777
778static const struct dquot_operations ext3_quota_operations = {
779 .write_dquot = ext3_write_dquot,
780 .acquire_dquot = ext3_acquire_dquot,
781 .release_dquot = ext3_release_dquot,
782 .mark_dirty = ext3_mark_dquot_dirty,
783 .write_info = ext3_write_info,
784 .alloc_dquot = dquot_alloc,
785 .destroy_dquot = dquot_destroy,
786};
787
788static const struct quotactl_ops ext3_qctl_operations = {
789 .quota_on = ext3_quota_on,
790 .quota_off = dquot_quota_off,
791 .quota_sync = dquot_quota_sync,
792 .get_state = dquot_get_state,
793 .set_info = dquot_set_dqinfo,
794 .get_dqblk = dquot_get_dqblk,
795 .set_dqblk = dquot_set_dqblk
796};
797#endif
798
799static const struct super_operations ext3_sops = {
800 .alloc_inode = ext3_alloc_inode,
801 .destroy_inode = ext3_destroy_inode,
802 .write_inode = ext3_write_inode,
803 .dirty_inode = ext3_dirty_inode,
804 .drop_inode = ext3_drop_inode,
805 .evict_inode = ext3_evict_inode,
806 .put_super = ext3_put_super,
807 .sync_fs = ext3_sync_fs,
808 .freeze_fs = ext3_freeze,
809 .unfreeze_fs = ext3_unfreeze,
810 .statfs = ext3_statfs,
811 .remount_fs = ext3_remount,
812 .show_options = ext3_show_options,
813#ifdef CONFIG_QUOTA
814 .quota_read = ext3_quota_read,
815 .quota_write = ext3_quota_write,
816 .get_dquots = ext3_get_dquots,
817#endif
818 .bdev_try_to_free_page = bdev_try_to_free_page,
819};
820
821static const struct export_operations ext3_export_ops = {
822 .fh_to_dentry = ext3_fh_to_dentry,
823 .fh_to_parent = ext3_fh_to_parent,
824 .get_parent = ext3_get_parent,
825};
826
827enum {
828 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
829 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
830 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
831 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
832 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
833 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
834 Opt_journal_path,
835 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
836 Opt_data_err_abort, Opt_data_err_ignore,
837 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
838 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
839 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
840 Opt_resize, Opt_usrquota, Opt_grpquota
841};
842
843static const match_table_t tokens = {
844 {Opt_bsd_df, "bsddf"},
845 {Opt_minix_df, "minixdf"},
846 {Opt_grpid, "grpid"},
847 {Opt_grpid, "bsdgroups"},
848 {Opt_nogrpid, "nogrpid"},
849 {Opt_nogrpid, "sysvgroups"},
850 {Opt_resgid, "resgid=%u"},
851 {Opt_resuid, "resuid=%u"},
852 {Opt_sb, "sb=%u"},
853 {Opt_err_cont, "errors=continue"},
854 {Opt_err_panic, "errors=panic"},
855 {Opt_err_ro, "errors=remount-ro"},
856 {Opt_nouid32, "nouid32"},
857 {Opt_nocheck, "nocheck"},
858 {Opt_nocheck, "check=none"},
859 {Opt_debug, "debug"},
860 {Opt_oldalloc, "oldalloc"},
861 {Opt_orlov, "orlov"},
862 {Opt_user_xattr, "user_xattr"},
863 {Opt_nouser_xattr, "nouser_xattr"},
864 {Opt_acl, "acl"},
865 {Opt_noacl, "noacl"},
866 {Opt_reservation, "reservation"},
867 {Opt_noreservation, "noreservation"},
868 {Opt_noload, "noload"},
869 {Opt_noload, "norecovery"},
870 {Opt_nobh, "nobh"},
871 {Opt_bh, "bh"},
872 {Opt_commit, "commit=%u"},
873 {Opt_journal_update, "journal=update"},
874 {Opt_journal_inum, "journal=%u"},
875 {Opt_journal_dev, "journal_dev=%u"},
876 {Opt_journal_path, "journal_path=%s"},
877 {Opt_abort, "abort"},
878 {Opt_data_journal, "data=journal"},
879 {Opt_data_ordered, "data=ordered"},
880 {Opt_data_writeback, "data=writeback"},
881 {Opt_data_err_abort, "data_err=abort"},
882 {Opt_data_err_ignore, "data_err=ignore"},
883 {Opt_offusrjquota, "usrjquota="},
884 {Opt_usrjquota, "usrjquota=%s"},
885 {Opt_offgrpjquota, "grpjquota="},
886 {Opt_grpjquota, "grpjquota=%s"},
887 {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
888 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
889 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
890 {Opt_grpquota, "grpquota"},
891 {Opt_noquota, "noquota"},
892 {Opt_quota, "quota"},
893 {Opt_usrquota, "usrquota"},
894 {Opt_barrier, "barrier=%u"},
895 {Opt_barrier, "barrier"},
896 {Opt_nobarrier, "nobarrier"},
897 {Opt_resize, "resize"},
898 {Opt_err, NULL},
899};
900
901static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb)
902{
903 ext3_fsblk_t sb_block;
904 char *options = (char *) *data;
905
906 if (!options || strncmp(options, "sb=", 3) != 0)
907 return 1; /* Default location */
908 options += 3;
909 /*todo: use simple_strtoll with >32bit ext3 */
910 sb_block = simple_strtoul(options, &options, 0);
911 if (*options && *options != ',') {
912 ext3_msg(sb, KERN_ERR, "error: invalid sb specification: %s",
913 (char *) *data);
914 return 1;
915 }
916 if (*options == ',')
917 options++;
918 *data = (void *) options;
919 return sb_block;
920}
921
922#ifdef CONFIG_QUOTA
923static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
924{
925 struct ext3_sb_info *sbi = EXT3_SB(sb);
926 char *qname;
927
928 if (sb_any_quota_loaded(sb) &&
929 !sbi->s_qf_names[qtype]) {
930 ext3_msg(sb, KERN_ERR,
931 "Cannot change journaled "
932 "quota options when quota turned on");
933 return 0;
934 }
935 qname = match_strdup(args);
936 if (!qname) {
937 ext3_msg(sb, KERN_ERR,
938 "Not enough memory for storing quotafile name");
939 return 0;
940 }
941 if (sbi->s_qf_names[qtype]) {
942 int same = !strcmp(sbi->s_qf_names[qtype], qname);
943
944 kfree(qname);
945 if (!same) {
946 ext3_msg(sb, KERN_ERR,
947 "%s quota file already specified",
948 QTYPE2NAME(qtype));
949 }
950 return same;
951 }
952 if (strchr(qname, '/')) {
953 ext3_msg(sb, KERN_ERR,
954 "quotafile must be on filesystem root");
955 kfree(qname);
956 return 0;
957 }
958 sbi->s_qf_names[qtype] = qname;
959 set_opt(sbi->s_mount_opt, QUOTA);
960 return 1;
961}
962
963static int clear_qf_name(struct super_block *sb, int qtype) {
964
965 struct ext3_sb_info *sbi = EXT3_SB(sb);
966
967 if (sb_any_quota_loaded(sb) &&
968 sbi->s_qf_names[qtype]) {
969 ext3_msg(sb, KERN_ERR, "Cannot change journaled quota options"
970 " when quota turned on");
971 return 0;
972 }
973 if (sbi->s_qf_names[qtype]) {
974 kfree(sbi->s_qf_names[qtype]);
975 sbi->s_qf_names[qtype] = NULL;
976 }
977 return 1;
978}
979#endif
980
981static int parse_options (char *options, struct super_block *sb,
982 unsigned int *inum, unsigned long *journal_devnum,
983 ext3_fsblk_t *n_blocks_count, int is_remount)
984{
985 struct ext3_sb_info *sbi = EXT3_SB(sb);
986 char * p;
987 substring_t args[MAX_OPT_ARGS];
988 int data_opt = 0;
989 int option;
990 kuid_t uid;
991 kgid_t gid;
992 char *journal_path;
993 struct inode *journal_inode;
994 struct path path;
995 int error;
996
997#ifdef CONFIG_QUOTA
998 int qfmt;
999#endif
1000
1001 if (!options)
1002 return 1;
1003
1004 while ((p = strsep (&options, ",")) != NULL) {
1005 int token;
1006 if (!*p)
1007 continue;
1008 /*
1009 * Initialize args struct so we know whether arg was
1010 * found; some options take optional arguments.
1011 */
1012 args[0].to = args[0].from = NULL;
1013 token = match_token(p, tokens, args);
1014 switch (token) {
1015 case Opt_bsd_df:
1016 clear_opt (sbi->s_mount_opt, MINIX_DF);
1017 break;
1018 case Opt_minix_df:
1019 set_opt (sbi->s_mount_opt, MINIX_DF);
1020 break;
1021 case Opt_grpid:
1022 set_opt (sbi->s_mount_opt, GRPID);
1023 break;
1024 case Opt_nogrpid:
1025 clear_opt (sbi->s_mount_opt, GRPID);
1026 break;
1027 case Opt_resuid:
1028 if (match_int(&args[0], &option))
1029 return 0;
1030 uid = make_kuid(current_user_ns(), option);
1031 if (!uid_valid(uid)) {
1032 ext3_msg(sb, KERN_ERR, "Invalid uid value %d", option);
1033 return 0;
1034
1035 }
1036 sbi->s_resuid = uid;
1037 break;
1038 case Opt_resgid:
1039 if (match_int(&args[0], &option))
1040 return 0;
1041 gid = make_kgid(current_user_ns(), option);
1042 if (!gid_valid(gid)) {
1043 ext3_msg(sb, KERN_ERR, "Invalid gid value %d", option);
1044 return 0;
1045 }
1046 sbi->s_resgid = gid;
1047 break;
1048 case Opt_sb:
1049 /* handled by get_sb_block() instead of here */
1050 /* *sb_block = match_int(&args[0]); */
1051 break;
1052 case Opt_err_panic:
1053 clear_opt (sbi->s_mount_opt, ERRORS_CONT);
1054 clear_opt (sbi->s_mount_opt, ERRORS_RO);
1055 set_opt (sbi->s_mount_opt, ERRORS_PANIC);
1056 break;
1057 case Opt_err_ro:
1058 clear_opt (sbi->s_mount_opt, ERRORS_CONT);
1059 clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
1060 set_opt (sbi->s_mount_opt, ERRORS_RO);
1061 break;
1062 case Opt_err_cont:
1063 clear_opt (sbi->s_mount_opt, ERRORS_RO);
1064 clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
1065 set_opt (sbi->s_mount_opt, ERRORS_CONT);
1066 break;
1067 case Opt_nouid32:
1068 set_opt (sbi->s_mount_opt, NO_UID32);
1069 break;
1070 case Opt_nocheck:
1071 clear_opt (sbi->s_mount_opt, CHECK);
1072 break;
1073 case Opt_debug:
1074 set_opt (sbi->s_mount_opt, DEBUG);
1075 break;
1076 case Opt_oldalloc:
1077 ext3_msg(sb, KERN_WARNING,
1078 "Ignoring deprecated oldalloc option");
1079 break;
1080 case Opt_orlov:
1081 ext3_msg(sb, KERN_WARNING,
1082 "Ignoring deprecated orlov option");
1083 break;
1084#ifdef CONFIG_EXT3_FS_XATTR
1085 case Opt_user_xattr:
1086 set_opt (sbi->s_mount_opt, XATTR_USER);
1087 break;
1088 case Opt_nouser_xattr:
1089 clear_opt (sbi->s_mount_opt, XATTR_USER);
1090 break;
1091#else
1092 case Opt_user_xattr:
1093 case Opt_nouser_xattr:
1094 ext3_msg(sb, KERN_INFO,
1095 "(no)user_xattr options not supported");
1096 break;
1097#endif
1098#ifdef CONFIG_EXT3_FS_POSIX_ACL
1099 case Opt_acl:
1100 set_opt(sbi->s_mount_opt, POSIX_ACL);
1101 break;
1102 case Opt_noacl:
1103 clear_opt(sbi->s_mount_opt, POSIX_ACL);
1104 break;
1105#else
1106 case Opt_acl:
1107 case Opt_noacl:
1108 ext3_msg(sb, KERN_INFO,
1109 "(no)acl options not supported");
1110 break;
1111#endif
1112 case Opt_reservation:
1113 set_opt(sbi->s_mount_opt, RESERVATION);
1114 break;
1115 case Opt_noreservation:
1116 clear_opt(sbi->s_mount_opt, RESERVATION);
1117 break;
1118 case Opt_journal_update:
1119 /* @@@ FIXME */
1120 /* Eventually we will want to be able to create
1121 a journal file here. For now, only allow the
1122 user to specify an existing inode to be the
1123 journal file. */
1124 if (is_remount) {
1125 ext3_msg(sb, KERN_ERR, "error: cannot specify "
1126 "journal on remount");
1127 return 0;
1128 }
1129 set_opt (sbi->s_mount_opt, UPDATE_JOURNAL);
1130 break;
1131 case Opt_journal_inum:
1132 if (is_remount) {
1133 ext3_msg(sb, KERN_ERR, "error: cannot specify "
1134 "journal on remount");
1135 return 0;
1136 }
1137 if (match_int(&args[0], &option))
1138 return 0;
1139 *inum = option;
1140 break;
1141 case Opt_journal_dev:
1142 if (is_remount) {
1143 ext3_msg(sb, KERN_ERR, "error: cannot specify "
1144 "journal on remount");
1145 return 0;
1146 }
1147 if (match_int(&args[0], &option))
1148 return 0;
1149 *journal_devnum = option;
1150 break;
1151 case Opt_journal_path:
1152 if (is_remount) {
1153 ext3_msg(sb, KERN_ERR, "error: cannot specify "
1154 "journal on remount");
1155 return 0;
1156 }
1157
1158 journal_path = match_strdup(&args[0]);
1159 if (!journal_path) {
1160 ext3_msg(sb, KERN_ERR, "error: could not dup "
1161 "journal device string");
1162 return 0;
1163 }
1164
1165 error = kern_path(journal_path, LOOKUP_FOLLOW, &path);
1166 if (error) {
1167 ext3_msg(sb, KERN_ERR, "error: could not find "
1168 "journal device path: error %d", error);
1169 kfree(journal_path);
1170 return 0;
1171 }
1172
1173 journal_inode = d_inode(path.dentry);
1174 if (!S_ISBLK(journal_inode->i_mode)) {
1175 ext3_msg(sb, KERN_ERR, "error: journal path %s "
1176 "is not a block device", journal_path);
1177 path_put(&path);
1178 kfree(journal_path);
1179 return 0;
1180 }
1181
1182 *journal_devnum = new_encode_dev(journal_inode->i_rdev);
1183 path_put(&path);
1184 kfree(journal_path);
1185 break;
1186 case Opt_noload:
1187 set_opt (sbi->s_mount_opt, NOLOAD);
1188 break;
1189 case Opt_commit:
1190 if (match_int(&args[0], &option))
1191 return 0;
1192 if (option < 0)
1193 return 0;
1194 if (option == 0)
1195 option = JBD_DEFAULT_MAX_COMMIT_AGE;
1196 sbi->s_commit_interval = HZ * option;
1197 break;
1198 case Opt_data_journal:
1199 data_opt = EXT3_MOUNT_JOURNAL_DATA;
1200 goto datacheck;
1201 case Opt_data_ordered:
1202 data_opt = EXT3_MOUNT_ORDERED_DATA;
1203 goto datacheck;
1204 case Opt_data_writeback:
1205 data_opt = EXT3_MOUNT_WRITEBACK_DATA;
1206 datacheck:
1207 if (is_remount) {
1208 if (test_opt(sb, DATA_FLAGS) == data_opt)
1209 break;
1210 ext3_msg(sb, KERN_ERR,
1211 "error: cannot change "
1212 "data mode on remount. The filesystem "
1213 "is mounted in data=%s mode and you "
1214 "try to remount it in data=%s mode.",
1215 data_mode_string(test_opt(sb,
1216 DATA_FLAGS)),
1217 data_mode_string(data_opt));
1218 return 0;
1219 } else {
1220 clear_opt(sbi->s_mount_opt, DATA_FLAGS);
1221 sbi->s_mount_opt |= data_opt;
1222 }
1223 break;
1224 case Opt_data_err_abort:
1225 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1226 break;
1227 case Opt_data_err_ignore:
1228 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1229 break;
1230#ifdef CONFIG_QUOTA
1231 case Opt_usrjquota:
1232 if (!set_qf_name(sb, USRQUOTA, &args[0]))
1233 return 0;
1234 break;
1235 case Opt_grpjquota:
1236 if (!set_qf_name(sb, GRPQUOTA, &args[0]))
1237 return 0;
1238 break;
1239 case Opt_offusrjquota:
1240 if (!clear_qf_name(sb, USRQUOTA))
1241 return 0;
1242 break;
1243 case Opt_offgrpjquota:
1244 if (!clear_qf_name(sb, GRPQUOTA))
1245 return 0;
1246 break;
1247 case Opt_jqfmt_vfsold:
1248 qfmt = QFMT_VFS_OLD;
1249 goto set_qf_format;
1250 case Opt_jqfmt_vfsv0:
1251 qfmt = QFMT_VFS_V0;
1252 goto set_qf_format;
1253 case Opt_jqfmt_vfsv1:
1254 qfmt = QFMT_VFS_V1;
1255set_qf_format:
1256 if (sb_any_quota_loaded(sb) &&
1257 sbi->s_jquota_fmt != qfmt) {
1258 ext3_msg(sb, KERN_ERR, "error: cannot change "
1259 "journaled quota options when "
1260 "quota turned on.");
1261 return 0;
1262 }
1263 sbi->s_jquota_fmt = qfmt;
1264 break;
1265 case Opt_quota:
1266 case Opt_usrquota:
1267 set_opt(sbi->s_mount_opt, QUOTA);
1268 set_opt(sbi->s_mount_opt, USRQUOTA);
1269 break;
1270 case Opt_grpquota:
1271 set_opt(sbi->s_mount_opt, QUOTA);
1272 set_opt(sbi->s_mount_opt, GRPQUOTA);
1273 break;
1274 case Opt_noquota:
1275 if (sb_any_quota_loaded(sb)) {
1276 ext3_msg(sb, KERN_ERR, "error: cannot change "
1277 "quota options when quota turned on.");
1278 return 0;
1279 }
1280 clear_opt(sbi->s_mount_opt, QUOTA);
1281 clear_opt(sbi->s_mount_opt, USRQUOTA);
1282 clear_opt(sbi->s_mount_opt, GRPQUOTA);
1283 break;
1284#else
1285 case Opt_quota:
1286 case Opt_usrquota:
1287 case Opt_grpquota:
1288 ext3_msg(sb, KERN_ERR,
1289 "error: quota options not supported.");
1290 break;
1291 case Opt_usrjquota:
1292 case Opt_grpjquota:
1293 case Opt_offusrjquota:
1294 case Opt_offgrpjquota:
1295 case Opt_jqfmt_vfsold:
1296 case Opt_jqfmt_vfsv0:
1297 case Opt_jqfmt_vfsv1:
1298 ext3_msg(sb, KERN_ERR,
1299 "error: journaled quota options not "
1300 "supported.");
1301 break;
1302 case Opt_noquota:
1303 break;
1304#endif
1305 case Opt_abort:
1306 set_opt(sbi->s_mount_opt, ABORT);
1307 break;
1308 case Opt_nobarrier:
1309 clear_opt(sbi->s_mount_opt, BARRIER);
1310 break;
1311 case Opt_barrier:
1312 if (args[0].from) {
1313 if (match_int(&args[0], &option))
1314 return 0;
1315 } else
1316 option = 1; /* No argument, default to 1 */
1317 if (option)
1318 set_opt(sbi->s_mount_opt, BARRIER);
1319 else
1320 clear_opt(sbi->s_mount_opt, BARRIER);
1321 break;
1322 case Opt_ignore:
1323 break;
1324 case Opt_resize:
1325 if (!is_remount) {
1326 ext3_msg(sb, KERN_ERR,
1327 "error: resize option only available "
1328 "for remount");
1329 return 0;
1330 }
1331 if (match_int(&args[0], &option) != 0)
1332 return 0;
1333 *n_blocks_count = option;
1334 break;
1335 case Opt_nobh:
1336 ext3_msg(sb, KERN_WARNING,
1337 "warning: ignoring deprecated nobh option");
1338 break;
1339 case Opt_bh:
1340 ext3_msg(sb, KERN_WARNING,
1341 "warning: ignoring deprecated bh option");
1342 break;
1343 default:
1344 ext3_msg(sb, KERN_ERR,
1345 "error: unrecognized mount option \"%s\" "
1346 "or missing value", p);
1347 return 0;
1348 }
1349 }
1350#ifdef CONFIG_QUOTA
1351 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1352 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
1353 clear_opt(sbi->s_mount_opt, USRQUOTA);
1354 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
1355 clear_opt(sbi->s_mount_opt, GRPQUOTA);
1356
1357 if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
1358 ext3_msg(sb, KERN_ERR, "error: old and new quota "
1359 "format mixing.");
1360 return 0;
1361 }
1362
1363 if (!sbi->s_jquota_fmt) {
1364 ext3_msg(sb, KERN_ERR, "error: journaled quota format "
1365 "not specified.");
1366 return 0;
1367 }
1368 }
1369#endif
1370 return 1;
1371}
1372
1373static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1374 int read_only)
1375{
1376 struct ext3_sb_info *sbi = EXT3_SB(sb);
1377 int res = 0;
1378
1379 if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) {
1380 ext3_msg(sb, KERN_ERR,
1381 "error: revision level too high, "
1382 "forcing read-only mode");
1383 res = MS_RDONLY;
1384 }
1385 if (read_only)
1386 return res;
1387 if (!(sbi->s_mount_state & EXT3_VALID_FS))
1388 ext3_msg(sb, KERN_WARNING,
1389 "warning: mounting unchecked fs, "
1390 "running e2fsck is recommended");
1391 else if ((sbi->s_mount_state & EXT3_ERROR_FS))
1392 ext3_msg(sb, KERN_WARNING,
1393 "warning: mounting fs with errors, "
1394 "running e2fsck is recommended");
1395 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
1396 le16_to_cpu(es->s_mnt_count) >=
1397 le16_to_cpu(es->s_max_mnt_count))
1398 ext3_msg(sb, KERN_WARNING,
1399 "warning: maximal mount count reached, "
1400 "running e2fsck is recommended");
1401 else if (le32_to_cpu(es->s_checkinterval) &&
1402 (le32_to_cpu(es->s_lastcheck) +
1403 le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1404 ext3_msg(sb, KERN_WARNING,
1405 "warning: checktime reached, "
1406 "running e2fsck is recommended");
1407#if 0
1408 /* @@@ We _will_ want to clear the valid bit if we find
1409 inconsistencies, to force a fsck at reboot. But for
1410 a plain journaled filesystem we can keep it set as
1411 valid forever! :) */
1412 es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
1413#endif
1414 if (!le16_to_cpu(es->s_max_mnt_count))
1415 es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
1416 le16_add_cpu(&es->s_mnt_count, 1);
1417 es->s_mtime = cpu_to_le32(get_seconds());
1418 ext3_update_dynamic_rev(sb);
1419 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
1420
1421 ext3_commit_super(sb, es, 1);
1422 if (test_opt(sb, DEBUG))
1423 ext3_msg(sb, KERN_INFO, "[bs=%lu, gc=%lu, "
1424 "bpg=%lu, ipg=%lu, mo=%04lx]",
1425 sb->s_blocksize,
1426 sbi->s_groups_count,
1427 EXT3_BLOCKS_PER_GROUP(sb),
1428 EXT3_INODES_PER_GROUP(sb),
1429 sbi->s_mount_opt);
1430
1431 if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
1432 char b[BDEVNAME_SIZE];
1433 ext3_msg(sb, KERN_INFO, "using external journal on %s",
1434 bdevname(EXT3_SB(sb)->s_journal->j_dev, b));
1435 } else {
1436 ext3_msg(sb, KERN_INFO, "using internal journal");
1437 }
1438 cleancache_init_fs(sb);
1439 return res;
1440}
1441
1442/* Called at mount-time, super-block is locked */
1443static int ext3_check_descriptors(struct super_block *sb)
1444{
1445 struct ext3_sb_info *sbi = EXT3_SB(sb);
1446 int i;
1447
1448 ext3_debug ("Checking group descriptors");
1449
1450 for (i = 0; i < sbi->s_groups_count; i++) {
1451 struct ext3_group_desc *gdp = ext3_get_group_desc(sb, i, NULL);
1452 ext3_fsblk_t first_block = ext3_group_first_block_no(sb, i);
1453 ext3_fsblk_t last_block;
1454
1455 if (i == sbi->s_groups_count - 1)
1456 last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
1457 else
1458 last_block = first_block +
1459 (EXT3_BLOCKS_PER_GROUP(sb) - 1);
1460
1461 if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
1462 le32_to_cpu(gdp->bg_block_bitmap) > last_block)
1463 {
1464 ext3_error (sb, "ext3_check_descriptors",
1465 "Block bitmap for group %d"
1466 " not in group (block %lu)!",
1467 i, (unsigned long)
1468 le32_to_cpu(gdp->bg_block_bitmap));
1469 return 0;
1470 }
1471 if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block ||
1472 le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
1473 {
1474 ext3_error (sb, "ext3_check_descriptors",
1475 "Inode bitmap for group %d"
1476 " not in group (block %lu)!",
1477 i, (unsigned long)
1478 le32_to_cpu(gdp->bg_inode_bitmap));
1479 return 0;
1480 }
1481 if (le32_to_cpu(gdp->bg_inode_table) < first_block ||
1482 le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group - 1 >
1483 last_block)
1484 {
1485 ext3_error (sb, "ext3_check_descriptors",
1486 "Inode table for group %d"
1487 " not in group (block %lu)!",
1488 i, (unsigned long)
1489 le32_to_cpu(gdp->bg_inode_table));
1490 return 0;
1491 }
1492 }
1493
1494 sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb));
1495 sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb));
1496 return 1;
1497}
1498
1499
1500/* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at
1501 * the superblock) which were deleted from all directories, but held open by
1502 * a process at the time of a crash. We walk the list and try to delete these
1503 * inodes at recovery time (only with a read-write filesystem).
1504 *
1505 * In order to keep the orphan inode chain consistent during traversal (in
1506 * case of crash during recovery), we link each inode into the superblock
1507 * orphan list_head and handle it the same way as an inode deletion during
1508 * normal operation (which journals the operations for us).
1509 *
1510 * We only do an iget() and an iput() on each inode, which is very safe if we
1511 * accidentally point at an in-use or already deleted inode. The worst that
1512 * can happen in this case is that we get a "bit already cleared" message from
1513 * ext3_free_inode(). The only reason we would point at a wrong inode is if
1514 * e2fsck was run on this filesystem, and it must have already done the orphan
1515 * inode cleanup for us, so we can safely abort without any further action.
1516 */
1517static void ext3_orphan_cleanup (struct super_block * sb,
1518 struct ext3_super_block * es)
1519{
1520 unsigned int s_flags = sb->s_flags;
1521 int nr_orphans = 0, nr_truncates = 0;
1522#ifdef CONFIG_QUOTA
1523 int i;
1524#endif
1525 if (!es->s_last_orphan) {
1526 jbd_debug(4, "no orphan inodes to clean up\n");
1527 return;
1528 }
1529
1530 if (bdev_read_only(sb->s_bdev)) {
1531 ext3_msg(sb, KERN_ERR, "error: write access "
1532 "unavailable, skipping orphan cleanup.");
1533 return;
1534 }
1535
1536 /* Check if feature set allows readwrite operations */
1537 if (EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) {
1538 ext3_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
1539 "unknown ROCOMPAT features");
1540 return;
1541 }
1542
1543 if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
1544 /* don't clear list on RO mount w/ errors */
1545 if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
1546 jbd_debug(1, "Errors on filesystem, "
1547 "clearing orphan list.\n");
1548 es->s_last_orphan = 0;
1549 }
1550 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
1551 return;
1552 }
1553
1554 if (s_flags & MS_RDONLY) {
1555 ext3_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
1556 sb->s_flags &= ~MS_RDONLY;
1557 }
1558#ifdef CONFIG_QUOTA
1559 /* Needed for iput() to work correctly and not trash data */
1560 sb->s_flags |= MS_ACTIVE;
1561 /* Turn on quotas so that they are updated correctly */
1562 for (i = 0; i < EXT3_MAXQUOTAS; i++) {
1563 if (EXT3_SB(sb)->s_qf_names[i]) {
1564 int ret = ext3_quota_on_mount(sb, i);
1565 if (ret < 0)
1566 ext3_msg(sb, KERN_ERR,
1567 "error: cannot turn on journaled "
1568 "quota: %d", ret);
1569 }
1570 }
1571#endif
1572
1573 while (es->s_last_orphan) {
1574 struct inode *inode;
1575
1576 inode = ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
1577 if (IS_ERR(inode)) {
1578 es->s_last_orphan = 0;
1579 break;
1580 }
1581
1582 list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
1583 dquot_initialize(inode);
1584 if (inode->i_nlink) {
1585 printk(KERN_DEBUG
1586 "%s: truncating inode %lu to %Ld bytes\n",
1587 __func__, inode->i_ino, inode->i_size);
1588 jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
1589 inode->i_ino, inode->i_size);
1590 ext3_truncate(inode);
1591 nr_truncates++;
1592 } else {
1593 printk(KERN_DEBUG
1594 "%s: deleting unreferenced inode %lu\n",
1595 __func__, inode->i_ino);
1596 jbd_debug(2, "deleting unreferenced inode %lu\n",
1597 inode->i_ino);
1598 nr_orphans++;
1599 }
1600 iput(inode); /* The delete magic happens here! */
1601 }
1602
1603#define PLURAL(x) (x), ((x)==1) ? "" : "s"
1604
1605 if (nr_orphans)
1606 ext3_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
1607 PLURAL(nr_orphans));
1608 if (nr_truncates)
1609 ext3_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
1610 PLURAL(nr_truncates));
1611#ifdef CONFIG_QUOTA
1612 /* Turn quotas off */
1613 for (i = 0; i < EXT3_MAXQUOTAS; i++) {
1614 if (sb_dqopt(sb)->files[i])
1615 dquot_quota_off(sb, i);
1616 }
1617#endif
1618 sb->s_flags = s_flags; /* Restore MS_RDONLY status */
1619}
1620
1621/*
1622 * Maximal file size. There is a direct, and {,double-,triple-}indirect
1623 * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
1624 * We need to be 1 filesystem block less than the 2^32 sector limit.
1625 */
1626static loff_t ext3_max_size(int bits)
1627{
1628 loff_t res = EXT3_NDIR_BLOCKS;
1629 int meta_blocks;
1630 loff_t upper_limit;
1631
1632 /* This is calculated to be the largest file size for a
1633 * dense, file such that the total number of
1634 * sectors in the file, including data and all indirect blocks,
1635 * does not exceed 2^32 -1
1636 * __u32 i_blocks representing the total number of
1637 * 512 bytes blocks of the file
1638 */
1639 upper_limit = (1LL << 32) - 1;
1640
1641 /* total blocks in file system block size */
1642 upper_limit >>= (bits - 9);
1643
1644
1645 /* indirect blocks */
1646 meta_blocks = 1;
1647 /* double indirect blocks */
1648 meta_blocks += 1 + (1LL << (bits-2));
1649 /* tripple indirect blocks */
1650 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
1651
1652 upper_limit -= meta_blocks;
1653 upper_limit <<= bits;
1654
1655 res += 1LL << (bits-2);
1656 res += 1LL << (2*(bits-2));
1657 res += 1LL << (3*(bits-2));
1658 res <<= bits;
1659 if (res > upper_limit)
1660 res = upper_limit;
1661
1662 if (res > MAX_LFS_FILESIZE)
1663 res = MAX_LFS_FILESIZE;
1664
1665 return res;
1666}
1667
1668static ext3_fsblk_t descriptor_loc(struct super_block *sb,
1669 ext3_fsblk_t logic_sb_block,
1670 int nr)
1671{
1672 struct ext3_sb_info *sbi = EXT3_SB(sb);
1673 unsigned long bg, first_meta_bg;
1674 int has_super = 0;
1675
1676 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
1677
1678 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) ||
1679 nr < first_meta_bg)
1680 return (logic_sb_block + nr + 1);
1681 bg = sbi->s_desc_per_block * nr;
1682 if (ext3_bg_has_super(sb, bg))
1683 has_super = 1;
1684 return (has_super + ext3_group_first_block_no(sb, bg));
1685}
1686
1687
1688static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1689{
1690 struct buffer_head * bh;
1691 struct ext3_super_block *es = NULL;
1692 struct ext3_sb_info *sbi;
1693 ext3_fsblk_t block;
1694 ext3_fsblk_t sb_block = get_sb_block(&data, sb);
1695 ext3_fsblk_t logic_sb_block;
1696 unsigned long offset = 0;
1697 unsigned int journal_inum = 0;
1698 unsigned long journal_devnum = 0;
1699 unsigned long def_mount_opts;
1700 struct inode *root;
1701 int blocksize;
1702 int hblock;
1703 int db_count;
1704 int i;
1705 int needs_recovery;
1706 int ret = -EINVAL;
1707 __le32 features;
1708 int err;
1709
1710 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
1711 if (!sbi)
1712 return -ENOMEM;
1713
1714 sbi->s_blockgroup_lock =
1715 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
1716 if (!sbi->s_blockgroup_lock) {
1717 kfree(sbi);
1718 return -ENOMEM;
1719 }
1720 sb->s_fs_info = sbi;
1721 sbi->s_sb_block = sb_block;
1722
1723 blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
1724 if (!blocksize) {
1725 ext3_msg(sb, KERN_ERR, "error: unable to set blocksize");
1726 goto out_fail;
1727 }
1728
1729 /*
1730 * The ext3 superblock will not be buffer aligned for other than 1kB
1731 * block sizes. We need to calculate the offset from buffer start.
1732 */
1733 if (blocksize != EXT3_MIN_BLOCK_SIZE) {
1734 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
1735 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
1736 } else {
1737 logic_sb_block = sb_block;
1738 }
1739
1740 if (!(bh = sb_bread(sb, logic_sb_block))) {
1741 ext3_msg(sb, KERN_ERR, "error: unable to read superblock");
1742 goto out_fail;
1743 }
1744 /*
1745 * Note: s_es must be initialized as soon as possible because
1746 * some ext3 macro-instructions depend on its value
1747 */
1748 es = (struct ext3_super_block *) (bh->b_data + offset);
1749 sbi->s_es = es;
1750 sb->s_magic = le16_to_cpu(es->s_magic);
1751 if (sb->s_magic != EXT3_SUPER_MAGIC)
1752 goto cantfind_ext3;
1753
1754 /* Set defaults before we parse the mount options */
1755 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
1756 if (def_mount_opts & EXT3_DEFM_DEBUG)
1757 set_opt(sbi->s_mount_opt, DEBUG);
1758 if (def_mount_opts & EXT3_DEFM_BSDGROUPS)
1759 set_opt(sbi->s_mount_opt, GRPID);
1760 if (def_mount_opts & EXT3_DEFM_UID16)
1761 set_opt(sbi->s_mount_opt, NO_UID32);
1762#ifdef CONFIG_EXT3_FS_XATTR
1763 if (def_mount_opts & EXT3_DEFM_XATTR_USER)
1764 set_opt(sbi->s_mount_opt, XATTR_USER);
1765#endif
1766#ifdef CONFIG_EXT3_FS_POSIX_ACL
1767 if (def_mount_opts & EXT3_DEFM_ACL)
1768 set_opt(sbi->s_mount_opt, POSIX_ACL);
1769#endif
1770 if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
1771 set_opt(sbi->s_mount_opt, JOURNAL_DATA);
1772 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
1773 set_opt(sbi->s_mount_opt, ORDERED_DATA);
1774 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK)
1775 set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
1776
1777 if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC)
1778 set_opt(sbi->s_mount_opt, ERRORS_PANIC);
1779 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_CONTINUE)
1780 set_opt(sbi->s_mount_opt, ERRORS_CONT);
1781 else
1782 set_opt(sbi->s_mount_opt, ERRORS_RO);
1783
1784 sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
1785 sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
1786
1787 /* enable barriers by default */
1788 set_opt(sbi->s_mount_opt, BARRIER);
1789 set_opt(sbi->s_mount_opt, RESERVATION);
1790
1791 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
1792 NULL, 0))
1793 goto failed_mount;
1794
1795 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
1796 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
1797
1798 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV &&
1799 (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
1800 EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
1801 EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
1802 ext3_msg(sb, KERN_WARNING,
1803 "warning: feature flags set on rev 0 fs, "
1804 "running e2fsck is recommended");
1805 /*
1806 * Check feature flags regardless of the revision level, since we
1807 * previously didn't change the revision level when setting the flags,
1808 * so there is a chance incompat flags are set on a rev 0 filesystem.
1809 */
1810 features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
1811 if (features) {
1812 ext3_msg(sb, KERN_ERR,
1813 "error: couldn't mount because of unsupported "
1814 "optional features (%x)", le32_to_cpu(features));
1815 goto failed_mount;
1816 }
1817 features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
1818 if (!(sb->s_flags & MS_RDONLY) && features) {
1819 ext3_msg(sb, KERN_ERR,
1820 "error: couldn't mount RDWR because of unsupported "
1821 "optional features (%x)", le32_to_cpu(features));
1822 goto failed_mount;
1823 }
1824 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
1825
1826 if (blocksize < EXT3_MIN_BLOCK_SIZE ||
1827 blocksize > EXT3_MAX_BLOCK_SIZE) {
1828 ext3_msg(sb, KERN_ERR,
1829 "error: couldn't mount because of unsupported "
1830 "filesystem blocksize %d", blocksize);
1831 goto failed_mount;
1832 }
1833
1834 hblock = bdev_logical_block_size(sb->s_bdev);
1835 if (sb->s_blocksize != blocksize) {
1836 /*
1837 * Make sure the blocksize for the filesystem is larger
1838 * than the hardware sectorsize for the machine.
1839 */
1840 if (blocksize < hblock) {
1841 ext3_msg(sb, KERN_ERR,
1842 "error: fsblocksize %d too small for "
1843 "hardware sectorsize %d", blocksize, hblock);
1844 goto failed_mount;
1845 }
1846
1847 brelse (bh);
1848 if (!sb_set_blocksize(sb, blocksize)) {
1849 ext3_msg(sb, KERN_ERR,
1850 "error: bad blocksize %d", blocksize);
1851 goto out_fail;
1852 }
1853 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
1854 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
1855 bh = sb_bread(sb, logic_sb_block);
1856 if (!bh) {
1857 ext3_msg(sb, KERN_ERR,
1858 "error: can't read superblock on 2nd try");
1859 goto failed_mount;
1860 }
1861 es = (struct ext3_super_block *)(bh->b_data + offset);
1862 sbi->s_es = es;
1863 if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
1864 ext3_msg(sb, KERN_ERR,
1865 "error: magic mismatch");
1866 goto failed_mount;
1867 }
1868 }
1869
1870 sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits);
1871
1872 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) {
1873 sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE;
1874 sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO;
1875 } else {
1876 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
1877 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
1878 if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) ||
1879 (!is_power_of_2(sbi->s_inode_size)) ||
1880 (sbi->s_inode_size > blocksize)) {
1881 ext3_msg(sb, KERN_ERR,
1882 "error: unsupported inode size: %d",
1883 sbi->s_inode_size);
1884 goto failed_mount;
1885 }
1886 }
1887 sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
1888 le32_to_cpu(es->s_log_frag_size);
1889 if (blocksize != sbi->s_frag_size) {
1890 ext3_msg(sb, KERN_ERR,
1891 "error: fragsize %lu != blocksize %u (unsupported)",
1892 sbi->s_frag_size, blocksize);
1893 goto failed_mount;
1894 }
1895 sbi->s_frags_per_block = 1;
1896 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
1897 sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
1898 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
1899 if (EXT3_INODE_SIZE(sb) == 0 || EXT3_INODES_PER_GROUP(sb) == 0)
1900 goto cantfind_ext3;
1901 sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb);
1902 if (sbi->s_inodes_per_block == 0)
1903 goto cantfind_ext3;
1904 sbi->s_itb_per_group = sbi->s_inodes_per_group /
1905 sbi->s_inodes_per_block;
1906 sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc);
1907 sbi->s_sbh = bh;
1908 sbi->s_mount_state = le16_to_cpu(es->s_state);
1909 sbi->s_addr_per_block_bits = ilog2(EXT3_ADDR_PER_BLOCK(sb));
1910 sbi->s_desc_per_block_bits = ilog2(EXT3_DESC_PER_BLOCK(sb));
1911 for (i = 0; i < 4; i++)
1912 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
1913 sbi->s_def_hash_version = es->s_def_hash_version;
1914 i = le32_to_cpu(es->s_flags);
1915 if (i & EXT2_FLAGS_UNSIGNED_HASH)
1916 sbi->s_hash_unsigned = 3;
1917 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
1918#ifdef __CHAR_UNSIGNED__
1919 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
1920 sbi->s_hash_unsigned = 3;
1921#else
1922 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
1923#endif
1924 }
1925
1926 if (sbi->s_blocks_per_group > blocksize * 8) {
1927 ext3_msg(sb, KERN_ERR,
1928 "#blocks per group too big: %lu",
1929 sbi->s_blocks_per_group);
1930 goto failed_mount;
1931 }
1932 if (sbi->s_frags_per_group > blocksize * 8) {
1933 ext3_msg(sb, KERN_ERR,
1934 "error: #fragments per group too big: %lu",
1935 sbi->s_frags_per_group);
1936 goto failed_mount;
1937 }
1938 if (sbi->s_inodes_per_group > blocksize * 8) {
1939 ext3_msg(sb, KERN_ERR,
1940 "error: #inodes per group too big: %lu",
1941 sbi->s_inodes_per_group);
1942 goto failed_mount;
1943 }
1944
1945 err = generic_check_addressable(sb->s_blocksize_bits,
1946 le32_to_cpu(es->s_blocks_count));
1947 if (err) {
1948 ext3_msg(sb, KERN_ERR,
1949 "error: filesystem is too large to mount safely");
1950 if (sizeof(sector_t) < 8)
1951 ext3_msg(sb, KERN_ERR,
1952 "error: CONFIG_LBDAF not enabled");
1953 ret = err;
1954 goto failed_mount;
1955 }
1956
1957 if (EXT3_BLOCKS_PER_GROUP(sb) == 0)
1958 goto cantfind_ext3;
1959 sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
1960 le32_to_cpu(es->s_first_data_block) - 1)
1961 / EXT3_BLOCKS_PER_GROUP(sb)) + 1;
1962 db_count = DIV_ROUND_UP(sbi->s_groups_count, EXT3_DESC_PER_BLOCK(sb));
1963 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
1964 GFP_KERNEL);
1965 if (sbi->s_group_desc == NULL) {
1966 ext3_msg(sb, KERN_ERR,
1967 "error: not enough memory");
1968 ret = -ENOMEM;
1969 goto failed_mount;
1970 }
1971
1972 bgl_lock_init(sbi->s_blockgroup_lock);
1973
1974 for (i = 0; i < db_count; i++) {
1975 block = descriptor_loc(sb, logic_sb_block, i);
1976 sbi->s_group_desc[i] = sb_bread(sb, block);
1977 if (!sbi->s_group_desc[i]) {
1978 ext3_msg(sb, KERN_ERR,
1979 "error: can't read group descriptor %d", i);
1980 db_count = i;
1981 goto failed_mount2;
1982 }
1983 }
1984 if (!ext3_check_descriptors (sb)) {
1985 ext3_msg(sb, KERN_ERR,
1986 "error: group descriptors corrupted");
1987 goto failed_mount2;
1988 }
1989 sbi->s_gdb_count = db_count;
1990 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
1991 spin_lock_init(&sbi->s_next_gen_lock);
1992
1993 /* per fileystem reservation list head & lock */
1994 spin_lock_init(&sbi->s_rsv_window_lock);
1995 sbi->s_rsv_window_root = RB_ROOT;
1996 /* Add a single, static dummy reservation to the start of the
1997 * reservation window list --- it gives us a placeholder for
1998 * append-at-start-of-list which makes the allocation logic
1999 * _much_ simpler. */
2000 sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
2001 sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
2002 sbi->s_rsv_window_head.rsv_alloc_hit = 0;
2003 sbi->s_rsv_window_head.rsv_goal_size = 0;
2004 ext3_rsv_window_add(sb, &sbi->s_rsv_window_head);
2005
2006 /*
2007 * set up enough so that it can read an inode
2008 */
2009 sb->s_op = &ext3_sops;
2010 sb->s_export_op = &ext3_export_ops;
2011 sb->s_xattr = ext3_xattr_handlers;
2012#ifdef CONFIG_QUOTA
2013 sb->s_qcop = &ext3_qctl_operations;
2014 sb->dq_op = &ext3_quota_operations;
2015 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
2016#endif
2017 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
2018 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
2019 mutex_init(&sbi->s_orphan_lock);
2020 mutex_init(&sbi->s_resize_lock);
2021
2022 sb->s_root = NULL;
2023
2024 needs_recovery = (es->s_last_orphan != 0 ||
2025 EXT3_HAS_INCOMPAT_FEATURE(sb,
2026 EXT3_FEATURE_INCOMPAT_RECOVER));
2027
2028 /*
2029 * The first inode we look at is the journal inode. Don't try
2030 * root first: it may be modified in the journal!
2031 */
2032 if (!test_opt(sb, NOLOAD) &&
2033 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
2034 if (ext3_load_journal(sb, es, journal_devnum))
2035 goto failed_mount2;
2036 } else if (journal_inum) {
2037 if (ext3_create_journal(sb, es, journal_inum))
2038 goto failed_mount2;
2039 } else {
2040 if (!silent)
2041 ext3_msg(sb, KERN_ERR,
2042 "error: no journal found. "
2043 "mounting ext3 over ext2?");
2044 goto failed_mount2;
2045 }
2046 err = percpu_counter_init(&sbi->s_freeblocks_counter,
2047 ext3_count_free_blocks(sb), GFP_KERNEL);
2048 if (!err) {
2049 err = percpu_counter_init(&sbi->s_freeinodes_counter,
2050 ext3_count_free_inodes(sb), GFP_KERNEL);
2051 }
2052 if (!err) {
2053 err = percpu_counter_init(&sbi->s_dirs_counter,
2054 ext3_count_dirs(sb), GFP_KERNEL);
2055 }
2056 if (err) {
2057 ext3_msg(sb, KERN_ERR, "error: insufficient memory");
2058 ret = err;
2059 goto failed_mount3;
2060 }
2061
2062 /* We have now updated the journal if required, so we can
2063 * validate the data journaling mode. */
2064 switch (test_opt(sb, DATA_FLAGS)) {
2065 case 0:
2066 /* No mode set, assume a default based on the journal
2067 capabilities: ORDERED_DATA if the journal can
2068 cope, else JOURNAL_DATA */
2069 if (journal_check_available_features
2070 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE))
2071 set_opt(sbi->s_mount_opt, DEFAULT_DATA_MODE);
2072 else
2073 set_opt(sbi->s_mount_opt, JOURNAL_DATA);
2074 break;
2075
2076 case EXT3_MOUNT_ORDERED_DATA:
2077 case EXT3_MOUNT_WRITEBACK_DATA:
2078 if (!journal_check_available_features
2079 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
2080 ext3_msg(sb, KERN_ERR,
2081 "error: journal does not support "
2082 "requested data journaling mode");
2083 goto failed_mount3;
2084 }
2085 default:
2086 break;
2087 }
2088
2089 /*
2090 * The journal_load will have done any necessary log recovery,
2091 * so we can safely mount the rest of the filesystem now.
2092 */
2093
2094 root = ext3_iget(sb, EXT3_ROOT_INO);
2095 if (IS_ERR(root)) {
2096 ext3_msg(sb, KERN_ERR, "error: get root inode failed");
2097 ret = PTR_ERR(root);
2098 goto failed_mount3;
2099 }
2100 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2101 iput(root);
2102 ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
2103 goto failed_mount3;
2104 }
2105 sb->s_root = d_make_root(root);
2106 if (!sb->s_root) {
2107 ext3_msg(sb, KERN_ERR, "error: get root dentry failed");
2108 ret = -ENOMEM;
2109 goto failed_mount3;
2110 }
2111
2112 if (ext3_setup_super(sb, es, sb->s_flags & MS_RDONLY))
2113 sb->s_flags |= MS_RDONLY;
2114
2115 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
2116 ext3_orphan_cleanup(sb, es);
2117 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
2118 if (needs_recovery) {
2119 ext3_mark_recovery_complete(sb, es);
2120 ext3_msg(sb, KERN_INFO, "recovery complete");
2121 }
2122 ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode",
2123 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
2124 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
2125 "writeback");
2126
2127 return 0;
2128
2129cantfind_ext3:
2130 if (!silent)
2131 ext3_msg(sb, KERN_INFO,
2132 "error: can't find ext3 filesystem on dev %s.",
2133 sb->s_id);
2134 goto failed_mount;
2135
2136failed_mount3:
2137 percpu_counter_destroy(&sbi->s_freeblocks_counter);
2138 percpu_counter_destroy(&sbi->s_freeinodes_counter);
2139 percpu_counter_destroy(&sbi->s_dirs_counter);
2140 journal_destroy(sbi->s_journal);
2141failed_mount2:
2142 for (i = 0; i < db_count; i++)
2143 brelse(sbi->s_group_desc[i]);
2144 kfree(sbi->s_group_desc);
2145failed_mount:
2146#ifdef CONFIG_QUOTA
2147 for (i = 0; i < EXT3_MAXQUOTAS; i++)
2148 kfree(sbi->s_qf_names[i]);
2149#endif
2150 ext3_blkdev_remove(sbi);
2151 brelse(bh);
2152out_fail:
2153 sb->s_fs_info = NULL;
2154 kfree(sbi->s_blockgroup_lock);
2155 kfree(sbi);
2156 return ret;
2157}
2158
2159/*
2160 * Setup any per-fs journal parameters now. We'll do this both on
2161 * initial mount, once the journal has been initialised but before we've
2162 * done any recovery; and again on any subsequent remount.
2163 */
2164static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
2165{
2166 struct ext3_sb_info *sbi = EXT3_SB(sb);
2167
2168 if (sbi->s_commit_interval)
2169 journal->j_commit_interval = sbi->s_commit_interval;
2170 /* We could also set up an ext3-specific default for the commit
2171 * interval here, but for now we'll just fall back to the jbd
2172 * default. */
2173
2174 spin_lock(&journal->j_state_lock);
2175 if (test_opt(sb, BARRIER))
2176 journal->j_flags |= JFS_BARRIER;
2177 else
2178 journal->j_flags &= ~JFS_BARRIER;
2179 if (test_opt(sb, DATA_ERR_ABORT))
2180 journal->j_flags |= JFS_ABORT_ON_SYNCDATA_ERR;
2181 else
2182 journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR;
2183 spin_unlock(&journal->j_state_lock);
2184}
2185
2186static journal_t *ext3_get_journal(struct super_block *sb,
2187 unsigned int journal_inum)
2188{
2189 struct inode *journal_inode;
2190 journal_t *journal;
2191
2192 /* First, test for the existence of a valid inode on disk. Bad
2193 * things happen if we iget() an unused inode, as the subsequent
2194 * iput() will try to delete it. */
2195
2196 journal_inode = ext3_iget(sb, journal_inum);
2197 if (IS_ERR(journal_inode)) {
2198 ext3_msg(sb, KERN_ERR, "error: no journal found");
2199 return NULL;
2200 }
2201 if (!journal_inode->i_nlink) {
2202 make_bad_inode(journal_inode);
2203 iput(journal_inode);
2204 ext3_msg(sb, KERN_ERR, "error: journal inode is deleted");
2205 return NULL;
2206 }
2207
2208 jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
2209 journal_inode, journal_inode->i_size);
2210 if (!S_ISREG(journal_inode->i_mode)) {
2211 ext3_msg(sb, KERN_ERR, "error: invalid journal inode");
2212 iput(journal_inode);
2213 return NULL;
2214 }
2215
2216 journal = journal_init_inode(journal_inode);
2217 if (!journal) {
2218 ext3_msg(sb, KERN_ERR, "error: could not load journal inode");
2219 iput(journal_inode);
2220 return NULL;
2221 }
2222 journal->j_private = sb;
2223 ext3_init_journal_params(sb, journal);
2224 return journal;
2225}
2226
2227static journal_t *ext3_get_dev_journal(struct super_block *sb,
2228 dev_t j_dev)
2229{
2230 struct buffer_head * bh;
2231 journal_t *journal;
2232 ext3_fsblk_t start;
2233 ext3_fsblk_t len;
2234 int hblock, blocksize;
2235 ext3_fsblk_t sb_block;
2236 unsigned long offset;
2237 struct ext3_super_block * es;
2238 struct block_device *bdev;
2239
2240 bdev = ext3_blkdev_get(j_dev, sb);
2241 if (bdev == NULL)
2242 return NULL;
2243
2244 blocksize = sb->s_blocksize;
2245 hblock = bdev_logical_block_size(bdev);
2246 if (blocksize < hblock) {
2247 ext3_msg(sb, KERN_ERR,
2248 "error: blocksize too small for journal device");
2249 goto out_bdev;
2250 }
2251
2252 sb_block = EXT3_MIN_BLOCK_SIZE / blocksize;
2253 offset = EXT3_MIN_BLOCK_SIZE % blocksize;
2254 set_blocksize(bdev, blocksize);
2255 if (!(bh = __bread(bdev, sb_block, blocksize))) {
2256 ext3_msg(sb, KERN_ERR, "error: couldn't read superblock of "
2257 "external journal");
2258 goto out_bdev;
2259 }
2260
2261 es = (struct ext3_super_block *) (bh->b_data + offset);
2262 if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) ||
2263 !(le32_to_cpu(es->s_feature_incompat) &
2264 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
2265 ext3_msg(sb, KERN_ERR, "error: external journal has "
2266 "bad superblock");
2267 brelse(bh);
2268 goto out_bdev;
2269 }
2270
2271 if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
2272 ext3_msg(sb, KERN_ERR, "error: journal UUID does not match");
2273 brelse(bh);
2274 goto out_bdev;
2275 }
2276
2277 len = le32_to_cpu(es->s_blocks_count);
2278 start = sb_block + 1;
2279 brelse(bh); /* we're done with the superblock */
2280
2281 journal = journal_init_dev(bdev, sb->s_bdev,
2282 start, len, blocksize);
2283 if (!journal) {
2284 ext3_msg(sb, KERN_ERR,
2285 "error: failed to create device journal");
2286 goto out_bdev;
2287 }
2288 journal->j_private = sb;
2289 if (!bh_uptodate_or_lock(journal->j_sb_buffer)) {
2290 if (bh_submit_read(journal->j_sb_buffer)) {
2291 ext3_msg(sb, KERN_ERR, "I/O error on journal device");
2292 goto out_journal;
2293 }
2294 }
2295 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
2296 ext3_msg(sb, KERN_ERR,
2297 "error: external journal has more than one "
2298 "user (unsupported) - %d",
2299 be32_to_cpu(journal->j_superblock->s_nr_users));
2300 goto out_journal;
2301 }
2302 EXT3_SB(sb)->journal_bdev = bdev;
2303 ext3_init_journal_params(sb, journal);
2304 return journal;
2305out_journal:
2306 journal_destroy(journal);
2307out_bdev:
2308 ext3_blkdev_put(bdev);
2309 return NULL;
2310}
2311
2312static int ext3_load_journal(struct super_block *sb,
2313 struct ext3_super_block *es,
2314 unsigned long journal_devnum)
2315{
2316 journal_t *journal;
2317 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
2318 dev_t journal_dev;
2319 int err = 0;
2320 int really_read_only;
2321
2322 if (journal_devnum &&
2323 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
2324 ext3_msg(sb, KERN_INFO, "external journal device major/minor "
2325 "numbers have changed");
2326 journal_dev = new_decode_dev(journal_devnum);
2327 } else
2328 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
2329
2330 really_read_only = bdev_read_only(sb->s_bdev);
2331
2332 /*
2333 * Are we loading a blank journal or performing recovery after a
2334 * crash? For recovery, we need to check in advance whether we
2335 * can get read-write access to the device.
2336 */
2337
2338 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) {
2339 if (sb->s_flags & MS_RDONLY) {
2340 ext3_msg(sb, KERN_INFO,
2341 "recovery required on readonly filesystem");
2342 if (really_read_only) {
2343 ext3_msg(sb, KERN_ERR, "error: write access "
2344 "unavailable, cannot proceed");
2345 return -EROFS;
2346 }
2347 ext3_msg(sb, KERN_INFO,
2348 "write access will be enabled during recovery");
2349 }
2350 }
2351
2352 if (journal_inum && journal_dev) {
2353 ext3_msg(sb, KERN_ERR, "error: filesystem has both journal "
2354 "and inode journals");
2355 return -EINVAL;
2356 }
2357
2358 if (journal_inum) {
2359 if (!(journal = ext3_get_journal(sb, journal_inum)))
2360 return -EINVAL;
2361 } else {
2362 if (!(journal = ext3_get_dev_journal(sb, journal_dev)))
2363 return -EINVAL;
2364 }
2365
2366 if (!(journal->j_flags & JFS_BARRIER))
2367 printk(KERN_INFO "EXT3-fs: barriers not enabled\n");
2368
2369 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
2370 err = journal_update_format(journal);
2371 if (err) {
2372 ext3_msg(sb, KERN_ERR, "error updating journal");
2373 journal_destroy(journal);
2374 return err;
2375 }
2376 }
2377
2378 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER))
2379 err = journal_wipe(journal, !really_read_only);
2380 if (!err)
2381 err = journal_load(journal);
2382
2383 if (err) {
2384 ext3_msg(sb, KERN_ERR, "error loading journal");
2385 journal_destroy(journal);
2386 return err;
2387 }
2388
2389 EXT3_SB(sb)->s_journal = journal;
2390 ext3_clear_journal_err(sb, es);
2391
2392 if (!really_read_only && journal_devnum &&
2393 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
2394 es->s_journal_dev = cpu_to_le32(journal_devnum);
2395
2396 /* Make sure we flush the recovery flag to disk. */
2397 ext3_commit_super(sb, es, 1);
2398 }
2399
2400 return 0;
2401}
2402
2403static int ext3_create_journal(struct super_block *sb,
2404 struct ext3_super_block *es,
2405 unsigned int journal_inum)
2406{
2407 journal_t *journal;
2408 int err;
2409
2410 if (sb->s_flags & MS_RDONLY) {
2411 ext3_msg(sb, KERN_ERR,
2412 "error: readonly filesystem when trying to "
2413 "create journal");
2414 return -EROFS;
2415 }
2416
2417 journal = ext3_get_journal(sb, journal_inum);
2418 if (!journal)
2419 return -EINVAL;
2420
2421 ext3_msg(sb, KERN_INFO, "creating new journal on inode %u",
2422 journal_inum);
2423
2424 err = journal_create(journal);
2425 if (err) {
2426 ext3_msg(sb, KERN_ERR, "error creating journal");
2427 journal_destroy(journal);
2428 return -EIO;
2429 }
2430
2431 EXT3_SB(sb)->s_journal = journal;
2432
2433 ext3_update_dynamic_rev(sb);
2434 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2435 EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL);
2436
2437 es->s_journal_inum = cpu_to_le32(journal_inum);
2438
2439 /* Make sure we flush the recovery flag to disk. */
2440 ext3_commit_super(sb, es, 1);
2441
2442 return 0;
2443}
2444
2445static int ext3_commit_super(struct super_block *sb,
2446 struct ext3_super_block *es,
2447 int sync)
2448{
2449 struct buffer_head *sbh = EXT3_SB(sb)->s_sbh;
2450 int error = 0;
2451
2452 if (!sbh)
2453 return error;
2454
2455 if (buffer_write_io_error(sbh)) {
2456 /*
2457 * Oh, dear. A previous attempt to write the
2458 * superblock failed. This could happen because the
2459 * USB device was yanked out. Or it could happen to
2460 * be a transient write error and maybe the block will
2461 * be remapped. Nothing we can do but to retry the
2462 * write and hope for the best.
2463 */
2464 ext3_msg(sb, KERN_ERR, "previous I/O error to "
2465 "superblock detected");
2466 clear_buffer_write_io_error(sbh);
2467 set_buffer_uptodate(sbh);
2468 }
2469 /*
2470 * If the file system is mounted read-only, don't update the
2471 * superblock write time. This avoids updating the superblock
2472 * write time when we are mounting the root file system
2473 * read/only but we need to replay the journal; at that point,
2474 * for people who are east of GMT and who make their clock
2475 * tick in localtime for Windows bug-for-bug compatibility,
2476 * the clock is set in the future, and this will cause e2fsck
2477 * to complain and force a full file system check.
2478 */
2479 if (!(sb->s_flags & MS_RDONLY))
2480 es->s_wtime = cpu_to_le32(get_seconds());
2481 es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb));
2482 es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
2483 BUFFER_TRACE(sbh, "marking dirty");
2484 mark_buffer_dirty(sbh);
2485 if (sync) {
2486 error = sync_dirty_buffer(sbh);
2487 if (buffer_write_io_error(sbh)) {
2488 ext3_msg(sb, KERN_ERR, "I/O error while writing "
2489 "superblock");
2490 clear_buffer_write_io_error(sbh);
2491 set_buffer_uptodate(sbh);
2492 }
2493 }
2494 return error;
2495}
2496
2497
2498/*
2499 * Have we just finished recovery? If so, and if we are mounting (or
2500 * remounting) the filesystem readonly, then we will end up with a
2501 * consistent fs on disk. Record that fact.
2502 */
2503static void ext3_mark_recovery_complete(struct super_block * sb,
2504 struct ext3_super_block * es)
2505{
2506 journal_t *journal = EXT3_SB(sb)->s_journal;
2507
2508 journal_lock_updates(journal);
2509 if (journal_flush(journal) < 0)
2510 goto out;
2511
2512 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
2513 sb->s_flags & MS_RDONLY) {
2514 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2515 ext3_commit_super(sb, es, 1);
2516 }
2517
2518out:
2519 journal_unlock_updates(journal);
2520}
2521
2522/*
2523 * If we are mounting (or read-write remounting) a filesystem whose journal
2524 * has recorded an error from a previous lifetime, move that error to the
2525 * main filesystem now.
2526 */
2527static void ext3_clear_journal_err(struct super_block *sb,
2528 struct ext3_super_block *es)
2529{
2530 journal_t *journal;
2531 int j_errno;
2532 const char *errstr;
2533
2534 journal = EXT3_SB(sb)->s_journal;
2535
2536 /*
2537 * Now check for any error status which may have been recorded in the
2538 * journal by a prior ext3_error() or ext3_abort()
2539 */
2540
2541 j_errno = journal_errno(journal);
2542 if (j_errno) {
2543 char nbuf[16];
2544
2545 errstr = ext3_decode_error(sb, j_errno, nbuf);
2546 ext3_warning(sb, __func__, "Filesystem error recorded "
2547 "from previous mount: %s", errstr);
2548 ext3_warning(sb, __func__, "Marking fs in need of "
2549 "filesystem check.");
2550
2551 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
2552 es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
2553 ext3_commit_super (sb, es, 1);
2554
2555 journal_clear_err(journal);
2556 }
2557}
2558
2559/*
2560 * Force the running and committing transactions to commit,
2561 * and wait on the commit.
2562 */
2563int ext3_force_commit(struct super_block *sb)
2564{
2565 journal_t *journal;
2566 int ret;
2567
2568 if (sb->s_flags & MS_RDONLY)
2569 return 0;
2570
2571 journal = EXT3_SB(sb)->s_journal;
2572 ret = ext3_journal_force_commit(journal);
2573 return ret;
2574}
2575
2576static int ext3_sync_fs(struct super_block *sb, int wait)
2577{
2578 tid_t target;
2579
2580 trace_ext3_sync_fs(sb, wait);
2581 /*
2582 * Writeback quota in non-journalled quota case - journalled quota has
2583 * no dirty dquots
2584 */
2585 dquot_writeback_dquots(sb, -1);
2586 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
2587 if (wait)
2588 log_wait_commit(EXT3_SB(sb)->s_journal, target);
2589 }
2590 return 0;
2591}
2592
2593/*
2594 * LVM calls this function before a (read-only) snapshot is created. This
2595 * gives us a chance to flush the journal completely and mark the fs clean.
2596 */
2597static int ext3_freeze(struct super_block *sb)
2598{
2599 int error = 0;
2600 journal_t *journal;
2601
2602 if (!(sb->s_flags & MS_RDONLY)) {
2603 journal = EXT3_SB(sb)->s_journal;
2604
2605 /* Now we set up the journal barrier. */
2606 journal_lock_updates(journal);
2607
2608 /*
2609 * We don't want to clear needs_recovery flag when we failed
2610 * to flush the journal.
2611 */
2612 error = journal_flush(journal);
2613 if (error < 0)
2614 goto out;
2615
2616 /* Journal blocked and flushed, clear needs_recovery flag. */
2617 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2618 error = ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
2619 if (error)
2620 goto out;
2621 }
2622 return 0;
2623
2624out:
2625 journal_unlock_updates(journal);
2626 return error;
2627}
2628
2629/*
2630 * Called by LVM after the snapshot is done. We need to reset the RECOVER
2631 * flag here, even though the filesystem is not technically dirty yet.
2632 */
2633static int ext3_unfreeze(struct super_block *sb)
2634{
2635 if (!(sb->s_flags & MS_RDONLY)) {
2636 /* Reser the needs_recovery flag before the fs is unlocked. */
2637 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2638 ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
2639 journal_unlock_updates(EXT3_SB(sb)->s_journal);
2640 }
2641 return 0;
2642}
2643
2644static int ext3_remount (struct super_block * sb, int * flags, char * data)
2645{
2646 struct ext3_super_block * es;
2647 struct ext3_sb_info *sbi = EXT3_SB(sb);
2648 ext3_fsblk_t n_blocks_count = 0;
2649 unsigned long old_sb_flags;
2650 struct ext3_mount_options old_opts;
2651 int enable_quota = 0;
2652 int err;
2653#ifdef CONFIG_QUOTA
2654 int i;
2655#endif
2656
2657 sync_filesystem(sb);
2658
2659 /* Store the original options */
2660 old_sb_flags = sb->s_flags;
2661 old_opts.s_mount_opt = sbi->s_mount_opt;
2662 old_opts.s_resuid = sbi->s_resuid;
2663 old_opts.s_resgid = sbi->s_resgid;
2664 old_opts.s_commit_interval = sbi->s_commit_interval;
2665#ifdef CONFIG_QUOTA
2666 old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
2667 for (i = 0; i < EXT3_MAXQUOTAS; i++)
2668 if (sbi->s_qf_names[i]) {
2669 old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
2670 GFP_KERNEL);
2671 if (!old_opts.s_qf_names[i]) {
2672 int j;
2673
2674 for (j = 0; j < i; j++)
2675 kfree(old_opts.s_qf_names[j]);
2676 return -ENOMEM;
2677 }
2678 } else
2679 old_opts.s_qf_names[i] = NULL;
2680#endif
2681
2682 /*
2683 * Allow the "check" option to be passed as a remount option.
2684 */
2685 if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) {
2686 err = -EINVAL;
2687 goto restore_opts;
2688 }
2689
2690 if (test_opt(sb, ABORT))
2691 ext3_abort(sb, __func__, "Abort forced by user");
2692
2693 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2694 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
2695
2696 es = sbi->s_es;
2697
2698 ext3_init_journal_params(sb, sbi->s_journal);
2699
2700 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
2701 n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
2702 if (test_opt(sb, ABORT)) {
2703 err = -EROFS;
2704 goto restore_opts;
2705 }
2706
2707 if (*flags & MS_RDONLY) {
2708 err = dquot_suspend(sb, -1);
2709 if (err < 0)
2710 goto restore_opts;
2711
2712 /*
2713 * First of all, the unconditional stuff we have to do
2714 * to disable replay of the journal when we next remount
2715 */
2716 sb->s_flags |= MS_RDONLY;
2717
2718 /*
2719 * OK, test if we are remounting a valid rw partition
2720 * readonly, and if so set the rdonly flag and then
2721 * mark the partition as valid again.
2722 */
2723 if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) &&
2724 (sbi->s_mount_state & EXT3_VALID_FS))
2725 es->s_state = cpu_to_le16(sbi->s_mount_state);
2726
2727 ext3_mark_recovery_complete(sb, es);
2728 } else {
2729 __le32 ret;
2730 if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
2731 ~EXT3_FEATURE_RO_COMPAT_SUPP))) {
2732 ext3_msg(sb, KERN_WARNING,
2733 "warning: couldn't remount RDWR "
2734 "because of unsupported optional "
2735 "features (%x)", le32_to_cpu(ret));
2736 err = -EROFS;
2737 goto restore_opts;
2738 }
2739
2740 /*
2741 * If we have an unprocessed orphan list hanging
2742 * around from a previously readonly bdev mount,
2743 * require a full umount & mount for now.
2744 */
2745 if (es->s_last_orphan) {
2746 ext3_msg(sb, KERN_WARNING, "warning: couldn't "
2747 "remount RDWR because of unprocessed "
2748 "orphan inode list. Please "
2749 "umount & mount instead.");
2750 err = -EINVAL;
2751 goto restore_opts;
2752 }
2753
2754 /*
2755 * Mounting a RDONLY partition read-write, so reread
2756 * and store the current valid flag. (It may have
2757 * been changed by e2fsck since we originally mounted
2758 * the partition.)
2759 */
2760 ext3_clear_journal_err(sb, es);
2761 sbi->s_mount_state = le16_to_cpu(es->s_state);
2762 if ((err = ext3_group_extend(sb, es, n_blocks_count)))
2763 goto restore_opts;
2764 if (!ext3_setup_super (sb, es, 0))
2765 sb->s_flags &= ~MS_RDONLY;
2766 enable_quota = 1;
2767 }
2768 }
2769#ifdef CONFIG_QUOTA
2770 /* Release old quota file names */
2771 for (i = 0; i < EXT3_MAXQUOTAS; i++)
2772 kfree(old_opts.s_qf_names[i]);
2773#endif
2774 if (enable_quota)
2775 dquot_resume(sb, -1);
2776 return 0;
2777restore_opts:
2778 sb->s_flags = old_sb_flags;
2779 sbi->s_mount_opt = old_opts.s_mount_opt;
2780 sbi->s_resuid = old_opts.s_resuid;
2781 sbi->s_resgid = old_opts.s_resgid;
2782 sbi->s_commit_interval = old_opts.s_commit_interval;
2783#ifdef CONFIG_QUOTA
2784 sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
2785 for (i = 0; i < EXT3_MAXQUOTAS; i++) {
2786 kfree(sbi->s_qf_names[i]);
2787 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
2788 }
2789#endif
2790 return err;
2791}
2792
2793static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
2794{
2795 struct super_block *sb = dentry->d_sb;
2796 struct ext3_sb_info *sbi = EXT3_SB(sb);
2797 struct ext3_super_block *es = sbi->s_es;
2798 u64 fsid;
2799
2800 if (test_opt(sb, MINIX_DF)) {
2801 sbi->s_overhead_last = 0;
2802 } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
2803 unsigned long ngroups = sbi->s_groups_count, i;
2804 ext3_fsblk_t overhead = 0;
2805 smp_rmb();
2806
2807 /*
2808 * Compute the overhead (FS structures). This is constant
2809 * for a given filesystem unless the number of block groups
2810 * changes so we cache the previous value until it does.
2811 */
2812
2813 /*
2814 * All of the blocks before first_data_block are
2815 * overhead
2816 */
2817 overhead = le32_to_cpu(es->s_first_data_block);
2818
2819 /*
2820 * Add the overhead attributed to the superblock and
2821 * block group descriptors. If the sparse superblocks
2822 * feature is turned on, then not all groups have this.
2823 */
2824 for (i = 0; i < ngroups; i++) {
2825 overhead += ext3_bg_has_super(sb, i) +
2826 ext3_bg_num_gdb(sb, i);
2827 cond_resched();
2828 }
2829
2830 /*
2831 * Every block group has an inode bitmap, a block
2832 * bitmap, and an inode table.
2833 */
2834 overhead += ngroups * (2 + sbi->s_itb_per_group);
2835
2836 /* Add the internal journal blocks as well */
2837 if (sbi->s_journal && !sbi->journal_bdev)
2838 overhead += sbi->s_journal->j_maxlen;
2839
2840 sbi->s_overhead_last = overhead;
2841 smp_wmb();
2842 sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
2843 }
2844
2845 buf->f_type = EXT3_SUPER_MAGIC;
2846 buf->f_bsize = sb->s_blocksize;
2847 buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last;
2848 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
2849 buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
2850 if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
2851 buf->f_bavail = 0;
2852 buf->f_files = le32_to_cpu(es->s_inodes_count);
2853 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
2854 buf->f_namelen = EXT3_NAME_LEN;
2855 fsid = le64_to_cpup((void *)es->s_uuid) ^
2856 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
2857 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
2858 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
2859 return 0;
2860}
2861
2862/* Helper function for writing quotas on sync - we need to start transaction before quota file
2863 * is locked for write. Otherwise the are possible deadlocks:
2864 * Process 1 Process 2
2865 * ext3_create() quota_sync()
2866 * journal_start() write_dquot()
2867 * dquot_initialize() down(dqio_mutex)
2868 * down(dqio_mutex) journal_start()
2869 *
2870 */
2871
2872#ifdef CONFIG_QUOTA
2873
2874static inline struct inode *dquot_to_inode(struct dquot *dquot)
2875{
2876 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
2877}
2878
2879static int ext3_write_dquot(struct dquot *dquot)
2880{
2881 int ret, err;
2882 handle_t *handle;
2883 struct inode *inode;
2884
2885 inode = dquot_to_inode(dquot);
2886 handle = ext3_journal_start(inode,
2887 EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
2888 if (IS_ERR(handle))
2889 return PTR_ERR(handle);
2890 ret = dquot_commit(dquot);
2891 err = ext3_journal_stop(handle);
2892 if (!ret)
2893 ret = err;
2894 return ret;
2895}
2896
2897static int ext3_acquire_dquot(struct dquot *dquot)
2898{
2899 int ret, err;
2900 handle_t *handle;
2901
2902 handle = ext3_journal_start(dquot_to_inode(dquot),
2903 EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
2904 if (IS_ERR(handle))
2905 return PTR_ERR(handle);
2906 ret = dquot_acquire(dquot);
2907 err = ext3_journal_stop(handle);
2908 if (!ret)
2909 ret = err;
2910 return ret;
2911}
2912
2913static int ext3_release_dquot(struct dquot *dquot)
2914{
2915 int ret, err;
2916 handle_t *handle;
2917
2918 handle = ext3_journal_start(dquot_to_inode(dquot),
2919 EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
2920 if (IS_ERR(handle)) {
2921 /* Release dquot anyway to avoid endless cycle in dqput() */
2922 dquot_release(dquot);
2923 return PTR_ERR(handle);
2924 }
2925 ret = dquot_release(dquot);
2926 err = ext3_journal_stop(handle);
2927 if (!ret)
2928 ret = err;
2929 return ret;
2930}
2931
2932static int ext3_mark_dquot_dirty(struct dquot *dquot)
2933{
2934 /* Are we journaling quotas? */
2935 if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
2936 EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
2937 dquot_mark_dquot_dirty(dquot);
2938 return ext3_write_dquot(dquot);
2939 } else {
2940 return dquot_mark_dquot_dirty(dquot);
2941 }
2942}
2943
2944static int ext3_write_info(struct super_block *sb, int type)
2945{
2946 int ret, err;
2947 handle_t *handle;
2948
2949 /* Data block + inode block */
2950 handle = ext3_journal_start(d_inode(sb->s_root), 2);
2951 if (IS_ERR(handle))
2952 return PTR_ERR(handle);
2953 ret = dquot_commit_info(sb, type);
2954 err = ext3_journal_stop(handle);
2955 if (!ret)
2956 ret = err;
2957 return ret;
2958}
2959
2960/*
2961 * Turn on quotas during mount time - we need to find
2962 * the quota file and such...
2963 */
2964static int ext3_quota_on_mount(struct super_block *sb, int type)
2965{
2966 return dquot_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
2967 EXT3_SB(sb)->s_jquota_fmt, type);
2968}
2969
2970/*
2971 * Standard function to be called on quota_on
2972 */
2973static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2974 struct path *path)
2975{
2976 int err;
2977
2978 if (!test_opt(sb, QUOTA))
2979 return -EINVAL;
2980
2981 /* Quotafile not on the same filesystem? */
2982 if (path->dentry->d_sb != sb)
2983 return -EXDEV;
2984 /* Journaling quota? */
2985 if (EXT3_SB(sb)->s_qf_names[type]) {
2986 /* Quotafile not of fs root? */
2987 if (path->dentry->d_parent != sb->s_root)
2988 ext3_msg(sb, KERN_WARNING,
2989 "warning: Quota file not on filesystem root. "
2990 "Journaled quota will not work.");
2991 }
2992
2993 /*
2994 * When we journal data on quota file, we have to flush journal to see
2995 * all updates to the file when we bypass pagecache...
2996 */
2997 if (ext3_should_journal_data(d_inode(path->dentry))) {
2998 /*
2999 * We don't need to lock updates but journal_flush() could
3000 * otherwise be livelocked...
3001 */
3002 journal_lock_updates(EXT3_SB(sb)->s_journal);
3003 err = journal_flush(EXT3_SB(sb)->s_journal);
3004 journal_unlock_updates(EXT3_SB(sb)->s_journal);
3005 if (err)
3006 return err;
3007 }
3008
3009 return dquot_quota_on(sb, type, format_id, path);
3010}
3011
3012/* Read data from quotafile - avoid pagecache and such because we cannot afford
3013 * acquiring the locks... As quota files are never truncated and quota code
3014 * itself serializes the operations (and no one else should touch the files)
3015 * we don't have to be afraid of races */
3016static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
3017 size_t len, loff_t off)
3018{
3019 struct inode *inode = sb_dqopt(sb)->files[type];
3020 sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
3021 int err = 0;
3022 int offset = off & (sb->s_blocksize - 1);
3023 int tocopy;
3024 size_t toread;
3025 struct buffer_head *bh;
3026 loff_t i_size = i_size_read(inode);
3027
3028 if (off > i_size)
3029 return 0;
3030 if (off+len > i_size)
3031 len = i_size-off;
3032 toread = len;
3033 while (toread > 0) {
3034 tocopy = sb->s_blocksize - offset < toread ?
3035 sb->s_blocksize - offset : toread;
3036 bh = ext3_bread(NULL, inode, blk, 0, &err);
3037 if (err)
3038 return err;
3039 if (!bh) /* A hole? */
3040 memset(data, 0, tocopy);
3041 else
3042 memcpy(data, bh->b_data+offset, tocopy);
3043 brelse(bh);
3044 offset = 0;
3045 toread -= tocopy;
3046 data += tocopy;
3047 blk++;
3048 }
3049 return len;
3050}
3051
3052/* Write to quotafile (we know the transaction is already started and has
3053 * enough credits) */
3054static ssize_t ext3_quota_write(struct super_block *sb, int type,
3055 const char *data, size_t len, loff_t off)
3056{
3057 struct inode *inode = sb_dqopt(sb)->files[type];
3058 sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
3059 int err = 0;
3060 int offset = off & (sb->s_blocksize - 1);
3061 int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL;
3062 struct buffer_head *bh;
3063 handle_t *handle = journal_current_handle();
3064
3065 if (!handle) {
3066 ext3_msg(sb, KERN_WARNING,
3067 "warning: quota write (off=%llu, len=%llu)"
3068 " cancelled because transaction is not started.",
3069 (unsigned long long)off, (unsigned long long)len);
3070 return -EIO;
3071 }
3072
3073 /*
3074 * Since we account only one data block in transaction credits,
3075 * then it is impossible to cross a block boundary.
3076 */
3077 if (sb->s_blocksize - offset < len) {
3078 ext3_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
3079 " cancelled because not block aligned",
3080 (unsigned long long)off, (unsigned long long)len);
3081 return -EIO;
3082 }
3083 bh = ext3_bread(handle, inode, blk, 1, &err);
3084 if (!bh)
3085 goto out;
3086 if (journal_quota) {
3087 err = ext3_journal_get_write_access(handle, bh);
3088 if (err) {
3089 brelse(bh);
3090 goto out;
3091 }
3092 }
3093 lock_buffer(bh);
3094 memcpy(bh->b_data+offset, data, len);
3095 flush_dcache_page(bh->b_page);
3096 unlock_buffer(bh);
3097 if (journal_quota)
3098 err = ext3_journal_dirty_metadata(handle, bh);
3099 else {
3100 /* Always do at least ordered writes for quotas */
3101 err = ext3_journal_dirty_data(handle, bh);
3102 mark_buffer_dirty(bh);
3103 }
3104 brelse(bh);
3105out:
3106 if (err)
3107 return err;
3108 if (inode->i_size < off + len) {
3109 i_size_write(inode, off + len);
3110 EXT3_I(inode)->i_disksize = inode->i_size;
3111 }
3112 inode->i_version++;
3113 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
3114 ext3_mark_inode_dirty(handle, inode);
3115 return len;
3116}
3117
3118#endif
3119
3120static struct dentry *ext3_mount(struct file_system_type *fs_type,
3121 int flags, const char *dev_name, void *data)
3122{
3123 return mount_bdev(fs_type, flags, dev_name, data, ext3_fill_super);
3124}
3125
3126static struct file_system_type ext3_fs_type = {
3127 .owner = THIS_MODULE,
3128 .name = "ext3",
3129 .mount = ext3_mount,
3130 .kill_sb = kill_block_super,
3131 .fs_flags = FS_REQUIRES_DEV,
3132};
3133MODULE_ALIAS_FS("ext3");
3134
3135static int __init init_ext3_fs(void)
3136{
3137 int err = init_ext3_xattr();
3138 if (err)
3139 return err;
3140 err = init_inodecache();
3141 if (err)
3142 goto out1;
3143 err = register_filesystem(&ext3_fs_type);
3144 if (err)
3145 goto out;
3146 return 0;
3147out:
3148 destroy_inodecache();
3149out1:
3150 exit_ext3_xattr();
3151 return err;
3152}
3153
3154static void __exit exit_ext3_fs(void)
3155{
3156 unregister_filesystem(&ext3_fs_type);
3157 destroy_inodecache();
3158 exit_ext3_xattr();
3159}
3160
3161MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
3162MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
3163MODULE_LICENSE("GPL");
3164module_init(init_ext3_fs)
3165module_exit(exit_ext3_fs)