From b657c95c11088d77fc1bfc9c84d940f778bf9d12 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 13 Nov 2008 14:49:11 -0800 Subject: ocfs2: Wrap inode block reads in a dedicated function. The ocfs2 code currently reads inodes off disk with a simple ocfs2_read_block() call. Each place that does this has a different set of sanity checks it performs. Some check only the signature. A couple validate the block number (the block read vs di->i_blkno). A couple others check for VALID_FL. Only one place validates i_fs_generation. A couple check nothing. Even when an error is found, they don't all do the same thing. We wrap inode reading into ocfs2_read_inode_block(). This will validate all the above fields, going readonly if they are invalid (they never should be). ocfs2_read_inode_block_full() is provided for the places that want to pass read_block flags. Every caller is passing a struct inode with a valid ip_blkno, so we don't need a separate blkno argument either. We will remove the validation checks from the rest of the code in a later commit, as they are no longer necessary. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/inode.c | 136 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 98 insertions(+), 38 deletions(-) (limited to 'fs/ocfs2/inode.c') diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 7aa00d511874..9eb701b86466 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -214,12 +214,11 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) return 0; } -int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, - int create_ino) +void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, + int create_ino) { struct super_block *sb; struct ocfs2_super *osb; - int status = -EINVAL; int use_plocks = 1; mlog_entry("(0x%p, size:%llu)\n", inode, @@ -232,25 +231,17 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks()) use_plocks = 0; - /* this means that read_inode cannot create a superblock inode - * today. change if needed. */ - if (!OCFS2_IS_VALID_DINODE(fe) || - !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))) { - mlog(0, "Invalid dinode: i_ino=%lu, i_blkno=%llu, " - "signature = %.*s, flags = 0x%x\n", - inode->i_ino, - (unsigned long long)le64_to_cpu(fe->i_blkno), 7, - fe->i_signature, le32_to_cpu(fe->i_flags)); - goto bail; - } + /* + * These have all been checked by ocfs2_read_inode_block() or set + * by ocfs2_mknod_locked(), so a failure is a code bug. + */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); /* This means that read_inode + cannot create a superblock + inode today. change if + that is needed. */ + BUG_ON(!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))); + BUG_ON(le32_to_cpu(fe->i_fs_generation) != osb->fs_generation); - if (le32_to_cpu(fe->i_fs_generation) != osb->fs_generation) { - mlog(ML_ERROR, "file entry generation does not match " - "superblock! osb->fs_generation=%x, " - "fe->i_fs_generation=%x\n", - osb->fs_generation, le32_to_cpu(fe->i_fs_generation)); - goto bail; - } OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); @@ -354,10 +345,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, ocfs2_set_inode_flags(inode); - status = 0; -bail: - mlog_exit(status); - return status; + mlog_exit_void(); } static int ocfs2_read_locked_inode(struct inode *inode, @@ -460,11 +448,14 @@ static int ocfs2_read_locked_inode(struct inode *inode, } } - if (can_lock) - status = ocfs2_read_blocks(inode, args->fi_blkno, 1, &bh, - OCFS2_BH_IGNORE_CACHE); - else + if (can_lock) { + status = ocfs2_read_inode_block_full(inode, &bh, + OCFS2_BH_IGNORE_CACHE); + } else { status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); + if (!status) + status = ocfs2_validate_inode_block(osb->sb, bh); + } if (status < 0) { mlog_errno(status); goto bail; @@ -472,12 +463,6 @@ static int ocfs2_read_locked_inode(struct inode *inode, status = -EINVAL; fe = (struct ocfs2_dinode *) bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - mlog(0, "Invalid dinode #%llu: signature = %.*s\n", - (unsigned long long)args->fi_blkno, 7, - fe->i_signature); - goto bail; - } /* * This is a code bug. Right now the caller needs to @@ -491,10 +476,9 @@ static int ocfs2_read_locked_inode(struct inode *inode, if (S_ISCHR(le16_to_cpu(fe->i_mode)) || S_ISBLK(le16_to_cpu(fe->i_mode))) - inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); + inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); - if (ocfs2_populate_inode(inode, fe, 0) < 0) - goto bail; + ocfs2_populate_inode(inode, fe, 0); BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); @@ -1264,3 +1248,79 @@ void ocfs2_refresh_inode(struct inode *inode, spin_unlock(&OCFS2_I(inode)->ip_lock); } + +int ocfs2_validate_inode_block(struct super_block *sb, + struct buffer_head *bh) +{ + int rc = -EINVAL; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; + + BUG_ON(!buffer_uptodate(bh)); + + if (!OCFS2_IS_VALID_DINODE(di)) { + ocfs2_error(sb, "Invalid dinode #%llu: signature = %.*s\n", + (unsigned long long)bh->b_blocknr, 7, + di->i_signature); + goto bail; + } + + if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) { + ocfs2_error(sb, "Invalid dinode #%llu: i_blkno is %llu\n", + (unsigned long long)bh->b_blocknr, + (unsigned long long)le64_to_cpu(di->i_blkno)); + goto bail; + } + + if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) { + ocfs2_error(sb, + "Invalid dinode #%llu: OCFS2_VALID_FL not set\n", + (unsigned long long)bh->b_blocknr); + goto bail; + } + + if (le32_to_cpu(di->i_fs_generation) != + OCFS2_SB(sb)->fs_generation) { + ocfs2_error(sb, + "Invalid dinode #%llu: fs_generation is %u\n", + (unsigned long long)bh->b_blocknr, + le32_to_cpu(di->i_fs_generation)); + goto bail; + } + + rc = 0; + +bail: + return rc; +} + +int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh, + int flags) +{ + int rc; + struct buffer_head *tmp = *bh; + + rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp, + flags); + if (rc) + goto out; + + if (!(flags & OCFS2_BH_READAHEAD)) { + rc = ocfs2_validate_inode_block(inode->i_sb, tmp); + if (rc) { + brelse(tmp); + goto out; + } + } + + /* If ocfs2_read_blocks() got us a new bh, pass it up. */ + if (!*bh) + *bh = tmp; + +out: + return rc; +} + +int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh) +{ + return ocfs2_read_inode_block_full(inode, bh, 0); +} -- cgit v1.2.2 From 970e4936d7d15f35d00fd15a14f5343ba78b2fc8 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 13 Nov 2008 14:49:19 -0800 Subject: ocfs2: Validate metadata only when it's read from disk. Add an optional validation hook to ocfs2_read_blocks(). Now the validation function is only called when a block was actually read off of disk. It is not called when the buffer was in cache. We add a buffer state bit BH_NeedsValidate to flag these buffers. It must always be one higher than the last JBD2 buffer state bit. The dinode, dirblock, extent_block, and xattr_block validators are lifted to this scheme directly. The group_descriptor validator needs to be split into two pieces. The first part only needs the gd buffer and is passed to ocfs2_read_block(). The second part requires the dinode as well, and is called every time. It's only 3 compares, so it's tiny. This also allows us to clean up the non-fatal gd check used by resize.c. It now has no magic argument. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/inode.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'fs/ocfs2/inode.c') diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 9eb701b86466..ec3497bafda6 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -1255,6 +1255,9 @@ int ocfs2_validate_inode_block(struct super_block *sb, int rc = -EINVAL; struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; + mlog(0, "Validating dinode %llu\n", + (unsigned long long)bh->b_blocknr); + BUG_ON(!buffer_uptodate(bh)); if (!OCFS2_IS_VALID_DINODE(di)) { @@ -1300,23 +1303,12 @@ int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh, struct buffer_head *tmp = *bh; rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp, - flags); - if (rc) - goto out; - - if (!(flags & OCFS2_BH_READAHEAD)) { - rc = ocfs2_validate_inode_block(inode->i_sb, tmp); - if (rc) { - brelse(tmp); - goto out; - } - } + flags, ocfs2_validate_inode_block); /* If ocfs2_read_blocks() got us a new bh, pass it up. */ - if (!*bh) + if (!rc && !*bh) *bh = tmp; -out: return rc; } -- cgit v1.2.2 From 1a224ad11eeb190da4a123e156601aad1bb67f24 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 20 Aug 2008 15:43:36 +0200 Subject: ocfs2: Assign feature bits and system inodes to quota feature and quota files Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- fs/ocfs2/inode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ocfs2/inode.c') diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index ec3497bafda6..ec25d9984192 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -283,6 +283,8 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino); } else if (fe->i_flags & cpu_to_le32(OCFS2_BITMAP_FL)) { OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; + } else if (fe->i_flags & cpu_to_le32(OCFS2_QUOTA_FL)) { + inode->i_flags |= S_NOQUOTA; } else if (fe->i_flags & cpu_to_le32(OCFS2_SUPER_BLOCK_FL)) { mlog(0, "superblock inode: i_ino=%lu\n", inode->i_ino); /* we can't actually hit this as read_inode can't -- cgit v1.2.2 From bbbd0eb34bf801dee01e345785959a75258f6567 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 21 Aug 2008 18:22:30 +0200 Subject: ocfs2: Mark system files as not subject to quota accounting Mark system files as not subject to quota accounting. This prevents possible recursions into quota code and thus deadlocks. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- fs/ocfs2/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2/inode.c') diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index ec25d9984192..50dbc486ef71 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -275,8 +275,10 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, inode->i_nlink = le16_to_cpu(fe->i_links_count); - if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) + if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) { OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE; + inode->i_flags |= S_NOQUOTA; + } if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) { OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; -- cgit v1.2.2 From a90714c150e3ce677c57a9dac3ab1ec342c75a95 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 9 Oct 2008 19:38:40 +0200 Subject: ocfs2: Add quota calls for allocation and freeing of inodes and space Add quota calls for allocation and freeing of inodes and space, also update estimates on number of needed credits for a transaction. Move out inode allocation from ocfs2_mknod_locked() because vfs_dq_init() must be called outside of a transaction. Signed-off-by: Jan Kara Signed-off-by: Mark Fasheh --- fs/ocfs2/inode.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs/ocfs2/inode.c') diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 50dbc486ef71..288512c9dbc2 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -603,7 +604,8 @@ static int ocfs2_remove_inode(struct inode *inode, goto bail; } - handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS); + handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS + + ocfs2_quota_trans_credits(inode->i_sb)); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); @@ -635,6 +637,7 @@ static int ocfs2_remove_inode(struct inode *inode, } ocfs2_remove_from_cache(inode, di_bh); + vfs_dq_free_inode(inode); status = ocfs2_free_dinode(handle, inode_alloc_inode, inode_alloc_bh, di); @@ -917,7 +920,10 @@ void ocfs2_delete_inode(struct inode *inode) mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); - if (is_bad_inode(inode)) { + /* When we fail in read_inode() we mark inode as bad. The second test + * catches the case when inode allocation fails before allocating + * a block for inode. */ + if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) { mlog(0, "Skipping delete of bad inode\n"); goto bail; } -- cgit v1.2.2 From d6b32bbb3eae3fb787f1c33bf9f767ca1ddeb208 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 17 Oct 2008 14:55:01 -0700 Subject: ocfs2: block read meta ecc. Add block check calls to the read_block validate functions. This is the almost all of the read-side checking of metaecc. xattr buckets are not checked yet. Writes are also unchecked, and so a read-write mount will quickly fail. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/inode.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2/inode.c') diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 288512c9dbc2..9370b652ab94 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -38,6 +38,7 @@ #include "ocfs2.h" #include "alloc.h" +#include "blockcheck.h" #include "dlmglue.h" #include "extent_map.h" #include "file.h" @@ -1262,7 +1263,7 @@ void ocfs2_refresh_inode(struct inode *inode, int ocfs2_validate_inode_block(struct super_block *sb, struct buffer_head *bh) { - int rc = -EINVAL; + int rc; struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; mlog(0, "Validating dinode %llu\n", @@ -1270,6 +1271,21 @@ int ocfs2_validate_inode_block(struct super_block *sb, BUG_ON(!buffer_uptodate(bh)); + /* + * If the ecc fails, we return the error but otherwise + * leave the filesystem running. We know any error is + * local to this block. + */ + rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check); + if (rc) + goto bail; + + /* + * Errors after here are fatal. + */ + + rc = -EINVAL; + if (!OCFS2_IS_VALID_DINODE(di)) { ocfs2_error(sb, "Invalid dinode #%llu: signature = %.*s\n", (unsigned long long)bh->b_blocknr, 7, -- cgit v1.2.2 From 13723d00e374c2a6d6ccb5af6de965e89c3e1b01 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Fri, 17 Oct 2008 19:25:01 -0700 Subject: ocfs2: Use metadata-specific ocfs2_journal_access_*() functions. The per-metadata-type ocfs2_journal_access_*() functions hook up jbd2 commit triggers and allow us to compute metadata ecc right before the buffers are written out. This commit provides ecc for inodes, extent blocks, group descriptors, and quota blocks. It is not safe to use extened attributes and metaecc at the same time yet. The ocfs2_extent_tree and ocfs2_path abstractions in alloc.c both hide the type of block at their root. Before, it didn't matter, but now the root block must use the appropriate ocfs2_journal_access_*() function. To keep this abstract, the structures now have a pointer to the matching journal_access function and a wrapper call to call it. A few places use naked ocfs2_write_block() calls instead of adding the blocks to the journal. We make sure to calculate their checksum and ecc before the write. Since we pass around the journal_access functions. Let's typedef them in ocfs2.h. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/inode.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'fs/ocfs2/inode.c') diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 9370b652ab94..229e707bc050 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -537,8 +537,8 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, goto out; } - status = ocfs2_journal_access(handle, inode, fe_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, inode, fe_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto out; @@ -621,8 +621,8 @@ static int ocfs2_remove_inode(struct inode *inode, } /* set the inodes dtime */ - status = ocfs2_journal_access(handle, inode, di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, inode, di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail_commit; @@ -1190,8 +1190,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle, mlog_entry("(inode %llu)\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); - status = ocfs2_journal_access(handle, inode, bh, - OCFS2_JOURNAL_ACCESS_WRITE); + status = ocfs2_journal_access_di(handle, inode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto leave; @@ -1277,8 +1277,11 @@ int ocfs2_validate_inode_block(struct super_block *sb, * local to this block. */ rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check); - if (rc) + if (rc) { + mlog(ML_ERROR, "Checksum failed for dinode %llu\n", + (unsigned long long)bh->b_blocknr); goto bail; + } /* * Errors after here are fatal. -- cgit v1.2.2