From 115ff50bade0f93a288677745a5884def6cbf9b1 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Wed, 26 Jul 2006 14:52:13 -0500 Subject: JFS: Quota support broken, no quota_read and quota_write jfs_quota_read/write are very near duplicates of ext2_quota_read/write. Cleaned up jfs_get_block as long as I had to change it to be non-static. Signed-off-by: Dave Kleikamp --- fs/jfs/inode.c | 16 ++------ fs/jfs/jfs_inode.h | 1 + fs/jfs/super.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 119 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 43e3f566aad6..a223cf4faa9b 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -168,16 +168,15 @@ void jfs_dirty_inode(struct inode *inode) set_cflag(COMMIT_Dirty, inode); } -static int -jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks, - struct buffer_head *bh_result, int create) +int jfs_get_block(struct inode *ip, sector_t lblock, + struct buffer_head *bh_result, int create) { s64 lblock64 = lblock; int rc = 0; xad_t xad; s64 xaddr; int xflag; - s32 xlen = max_blocks; + s32 xlen = bh_result->b_size >> ip->i_blkbits; /* * Take appropriate lock on inode @@ -188,7 +187,7 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks, IREAD_LOCK(ip); if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) && - (!xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0)) && + (!xtLookup(ip, lblock64, xlen, &xflag, &xaddr, &xlen, 0)) && xaddr) { if (xflag & XAD_NOTRECORDED) { if (!create) @@ -255,13 +254,6 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks, return rc; } -static int jfs_get_block(struct inode *ip, sector_t lblock, - struct buffer_head *bh_result, int create) -{ - return jfs_get_blocks(ip, lblock, bh_result->b_size >> ip->i_blkbits, - bh_result, create); -} - static int jfs_writepage(struct page *page, struct writeback_control *wbc) { return nobh_writepage(page, jfs_get_block, wbc); diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index b5c7da6190dc..1fc48df670c8 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -32,6 +32,7 @@ extern void jfs_truncate_nolock(struct inode *, loff_t); extern void jfs_free_zero_link(struct inode *); extern struct dentry *jfs_get_parent(struct dentry *dentry); extern void jfs_set_inode_flags(struct inode *); +extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); extern const struct address_space_operations jfs_aops; extern struct inode_operations jfs_dir_inode_operations; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 4f6cfebc82db..90ee0de829c1 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -298,7 +299,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, break; } -#if defined(CONFIG_QUOTA) +#ifdef CONFIG_QUOTA case Opt_quota: case Opt_usrquota: *flag |= JFS_USRQUOTA; @@ -597,7 +598,7 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs) if (sbi->flag & JFS_NOINTEGRITY) seq_puts(seq, ",nointegrity"); -#if defined(CONFIG_QUOTA) +#ifdef CONFIG_QUOTA if (sbi->flag & JFS_USRQUOTA) seq_puts(seq, ",usrquota"); @@ -608,6 +609,112 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs) return 0; } +#ifdef CONFIG_QUOTA + +/* Read data from quotafile - avoid pagecache and such because we cannot afford + * acquiring the locks... As quota files are never truncated and quota code + * itself serializes the operations (and noone else should touch the files) + * we don't have to be afraid of races */ +static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data, + size_t len, loff_t off) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + sector_t blk = off >> sb->s_blocksize_bits; + int err = 0; + int offset = off & (sb->s_blocksize - 1); + int tocopy; + size_t toread; + struct buffer_head tmp_bh; + struct buffer_head *bh; + loff_t i_size = i_size_read(inode); + + if (off > i_size) + return 0; + if (off+len > i_size) + len = i_size-off; + toread = len; + while (toread > 0) { + tocopy = sb->s_blocksize - offset < toread ? + sb->s_blocksize - offset : toread; + + tmp_bh.b_state = 0; + tmp_bh.b_size = 1 << inode->i_blkbits; + err = jfs_get_block(inode, blk, &tmp_bh, 0); + if (err) + return err; + if (!buffer_mapped(&tmp_bh)) /* A hole? */ + memset(data, 0, tocopy); + else { + bh = sb_bread(sb, tmp_bh.b_blocknr); + if (!bh) + return -EIO; + memcpy(data, bh->b_data+offset, tocopy); + brelse(bh); + } + offset = 0; + toread -= tocopy; + data += tocopy; + blk++; + } + return len; +} + +/* Write to quotafile */ +static ssize_t jfs_quota_write(struct super_block *sb, int type, + const char *data, size_t len, loff_t off) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + sector_t blk = off >> sb->s_blocksize_bits; + int err = 0; + int offset = off & (sb->s_blocksize - 1); + int tocopy; + size_t towrite = len; + struct buffer_head tmp_bh; + struct buffer_head *bh; + + mutex_lock(&inode->i_mutex); + while (towrite > 0) { + tocopy = sb->s_blocksize - offset < towrite ? + sb->s_blocksize - offset : towrite; + + tmp_bh.b_state = 0; + err = jfs_get_block(inode, blk, &tmp_bh, 1); + if (err) + goto out; + if (offset || tocopy != sb->s_blocksize) + bh = sb_bread(sb, tmp_bh.b_blocknr); + else + bh = sb_getblk(sb, tmp_bh.b_blocknr); + if (!bh) { + err = -EIO; + goto out; + } + lock_buffer(bh); + memcpy(bh->b_data+offset, data, tocopy); + flush_dcache_page(bh->b_page); + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + unlock_buffer(bh); + brelse(bh); + offset = 0; + towrite -= tocopy; + data += tocopy; + blk++; + } +out: + if (len == towrite) + return err; + if (inode->i_size < off+len-towrite) + i_size_write(inode, off+len-towrite); + inode->i_version++; + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + mutex_unlock(&inode->i_mutex); + return len - towrite; +} + +#endif + static struct super_operations jfs_super_operations = { .alloc_inode = jfs_alloc_inode, .destroy_inode = jfs_destroy_inode, @@ -621,7 +728,11 @@ static struct super_operations jfs_super_operations = { .unlockfs = jfs_unlockfs, .statfs = jfs_statfs, .remount_fs = jfs_remount, - .show_options = jfs_show_options + .show_options = jfs_show_options, +#ifdef CONFIG_QUOTA + .quota_read = jfs_quota_read, + .quota_write = jfs_quota_write, +#endif }; static struct export_operations jfs_export_operations = { -- cgit v1.2.2 From 8bcb2839b74d605f5549962a6e69dc07768e95b6 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Fri, 28 Jul 2006 08:46:05 -0500 Subject: JFS: Fix bug in quota code. tmp_bh.b_size must be initialized Signed-off-by: Dave Kleikamp --- fs/jfs/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 90ee0de829c1..143bcd1d5eaa 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -678,6 +678,7 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type, sb->s_blocksize - offset : towrite; tmp_bh.b_state = 0; + tmp_bh.b_size = 1 << inode->i_blkbits; err = jfs_get_block(inode, blk, &tmp_bh, 1); if (err) goto out; -- cgit v1.2.2 From 4b1af774451bbc8440719e3fe441934a337c3b63 Mon Sep 17 00:00:00 2001 From: Kurt Hackel Date: Mon, 26 Jun 2006 15:17:47 -0700 Subject: ocfs2: Fix lvb corruption Properly ignore LVB flags during a PR downconvert. This avoids an illegal lvb update. Signed-off-by: Kurt Hackel Signed-off-by: Mark Fasheh --- fs/ocfs2/dlm/dlmunlock.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index b0c3134f4f70..4ce3f82fb736 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -483,6 +483,10 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data) /* lock was found on queue */ lksb = lock->lksb; + if (flags & (LKM_VALBLK|LKM_PUT_LVB) && + lock->ml.type != LKM_EXMODE) + flags &= ~(LKM_VALBLK|LKM_PUT_LVB); + /* unlockast only called on originating node */ if (flags & LKM_PUT_LVB) { lksb->flags |= DLM_LKSB_PUT_LVB; @@ -632,6 +636,8 @@ retry: spin_lock(&res->spinlock); is_master = (res->owner == dlm->node_num); + if (flags & LKM_VALBLK && lock->ml.type != LKM_EXMODE) + flags &= ~LKM_VALBLK; spin_unlock(&res->spinlock); if (is_master) { -- cgit v1.2.2 From a23eac99d4392b8b779305498d7614e41a0e16e9 Mon Sep 17 00:00:00 2001 From: Kurt Hackel Date: Sun, 18 Jun 2006 21:28:01 -0700 Subject: ocfs2: do not modify lksb->status in the unlock ast This can race with other ast notification, which can cause bad status values to propagate into the unlock ast. Signed-off-by: Kurt Hackel Signed-off-by: Mark Fasheh --- fs/ocfs2/dlm/dlmunlock.c | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 4ce3f82fb736..59866e471d96 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -155,7 +155,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, else status = dlm_get_unlock_actions(dlm, res, lock, lksb, &actions); - if (status != DLM_NORMAL) + if (status != DLM_NORMAL && status != DLM_CANCELGRANT) goto leave; /* By now this has been masked out of cancel requests. */ @@ -183,8 +183,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, spin_lock(&lock->spinlock); /* if the master told us the lock was already granted, * let the ast handle all of these actions */ - if (status == DLM_NORMAL && - lksb->status == DLM_CANCELGRANT) { + if (status == DLM_CANCELGRANT) { actions &= ~(DLM_UNLOCK_REMOVE_LOCK| DLM_UNLOCK_REGRANT_LOCK| DLM_UNLOCK_CLEAR_CONVERT_TYPE); @@ -349,14 +348,9 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm, vec, veclen, owner, &status); if (tmpret >= 0) { // successfully sent and received - if (status == DLM_CANCELGRANT) - ret = DLM_NORMAL; - else if (status == DLM_FORWARD) { + if (status == DLM_FORWARD) mlog(0, "master was in-progress. retry\n"); - ret = DLM_FORWARD; - } else - ret = status; - lksb->status = status; + ret = status; } else { mlog_errno(tmpret); if (dlm_is_host_down(tmpret)) { @@ -372,7 +366,6 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm, /* something bad. this will BUG in ocfs2 */ ret = dlm_err_to_dlm_status(tmpret); } - lksb->status = ret; } return ret; @@ -511,11 +504,8 @@ not_found: "cookie=%u:%llu\n", dlm_get_lock_cookie_node(unlock->cookie), dlm_get_lock_cookie_seq(unlock->cookie)); - else { - /* send the lksb->status back to the other node */ - status = lksb->status; + else dlm_lock_put(lock); - } leave: if (res) @@ -537,26 +527,22 @@ static enum dlm_status dlm_get_cancel_actions(struct dlm_ctxt *dlm, if (dlm_lock_on_list(&res->blocked, lock)) { /* cancel this outright */ - lksb->status = DLM_NORMAL; status = DLM_NORMAL; *actions = (DLM_UNLOCK_CALL_AST | DLM_UNLOCK_REMOVE_LOCK); } else if (dlm_lock_on_list(&res->converting, lock)) { /* cancel the request, put back on granted */ - lksb->status = DLM_NORMAL; status = DLM_NORMAL; *actions = (DLM_UNLOCK_CALL_AST | DLM_UNLOCK_REMOVE_LOCK | DLM_UNLOCK_REGRANT_LOCK | DLM_UNLOCK_CLEAR_CONVERT_TYPE); } else if (dlm_lock_on_list(&res->granted, lock)) { - /* too late, already granted. DLM_CANCELGRANT */ - lksb->status = DLM_CANCELGRANT; - status = DLM_NORMAL; + /* too late, already granted. */ + status = DLM_CANCELGRANT; *actions = DLM_UNLOCK_CALL_AST; } else { mlog(ML_ERROR, "lock to cancel is not on any list!\n"); - lksb->status = DLM_IVLOCKID; status = DLM_IVLOCKID; *actions = 0; } @@ -573,13 +559,11 @@ static enum dlm_status dlm_get_unlock_actions(struct dlm_ctxt *dlm, /* unlock request */ if (!dlm_lock_on_list(&res->granted, lock)) { - lksb->status = DLM_DENIED; status = DLM_DENIED; dlm_error(status); *actions = 0; } else { /* unlock granted lock */ - lksb->status = DLM_NORMAL; status = DLM_NORMAL; *actions = (DLM_UNLOCK_FREE_LOCK | DLM_UNLOCK_CALL_AST | @@ -671,7 +655,7 @@ retry: } if (call_ast) { - mlog(0, "calling unlockast(%p, %d)\n", data, lksb->status); + mlog(0, "calling unlockast(%p, %d)\n", data, status); if (is_master) { /* it is possible that there is one last bast * pending. make sure it is flushed, then @@ -683,9 +667,12 @@ retry: wait_event(dlm->ast_wq, dlm_lock_basts_flushed(dlm, lock)); } - (*unlockast)(data, lksb->status); + (*unlockast)(data, status); } + if (status == DLM_CANCELGRANT) + status = DLM_NORMAL; + if (status == DLM_NORMAL) { mlog(0, "kicking the thread\n"); dlm_kick_thread(dlm, res); -- cgit v1.2.2 From 34e3d180370c44ad3ecd3a1f9099e150d3bb103f Mon Sep 17 00:00:00 2001 From: Kurt Hackel Date: Sat, 15 Jul 2006 10:22:39 -0700 Subject: ocfs2: fix check for locally granted state during dlmunlock() If a process requests a lock cancel but the lock has been remotely granted already then there is no need to send the cancel message. Signed-off-by: Kurt Hackel Signed-off-by: Mark Fasheh --- fs/ocfs2/dlm/dlmunlock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 59866e471d96..37be4b2e0d4a 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -155,7 +155,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, else status = dlm_get_unlock_actions(dlm, res, lock, lksb, &actions); - if (status != DLM_NORMAL && status != DLM_CANCELGRANT) + if (status != DLM_NORMAL && (status != DLM_CANCELGRANT || !master_node)) goto leave; /* By now this has been masked out of cancel requests. */ -- cgit v1.2.2 From 9acd72f4240429dfd762c9a2c7eb5c18b5d32529 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 15 Jul 2006 02:36:01 +0200 Subject: [PATCH] fs/ocfs2/dlm/dlmmaster.c: unexport dlm_migrate_lockres This patch removes the unused EXPORT_SYMBOL_GPL(dlm_migrate_lockres). Signed-off-by: Adrian Bunk Signed-off-by: Mark Fasheh --- fs/ocfs2/dlm/dlmmaster.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 1b8346dd0572..9503240ef0e5 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2375,7 +2375,6 @@ leave: mlog(0, "returning %d\n", ret); return ret; } -EXPORT_SYMBOL_GPL(dlm_migrate_lockres); int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock) { -- cgit v1.2.2 From 101ebf256de54e78e6d3277adacf656e125a2c5a Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 2 May 2006 17:54:45 -0700 Subject: ocfs2: limit cluster bitmap information saved at mount We were storing cluster count on the ocfs2_super structure, but never actually using it so remove that. Also, we don't want to populate the uptodate cache with the unlocked block read - it is technically safe as is, but we should change it for correctness. Signed-off-by: Mark Fasheh --- fs/ocfs2/ocfs2.h | 1 - fs/ocfs2/super.c | 8 ++++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index cd4a6f253d13..d52100d49b6c 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -197,7 +197,6 @@ struct ocfs2_super struct ocfs2_node_map recovery_map; struct ocfs2_node_map umount_map; - u32 num_clusters; u64 root_blkno; u64 system_dir_blkno; u64 bitmap_blkno; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 382706a67ffd..d17e33e66a1e 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1442,8 +1442,13 @@ static int ocfs2_initialize_super(struct super_block *sb, osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; + /* We don't have a cluster lock on the bitmap here because + * we're only interested in static information and the extra + * complexity at mount time isn't worht it. Don't pass the + * inode in to the read function though as we don't want it to + * be put in the cache. */ status = ocfs2_read_block(osb, osb->bitmap_blkno, &bitmap_bh, 0, - inode); + NULL); iput(inode); if (status < 0) { mlog_errno(status); @@ -1452,7 +1457,6 @@ static int ocfs2_initialize_super(struct super_block *sb, di = (struct ocfs2_dinode *) bitmap_bh->b_data; osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg); - osb->num_clusters = le32_to_cpu(di->id1.bitmap1.i_total); brelse(bitmap_bh); mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n", (unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg); -- cgit v1.2.2 From 7bf72edee614e10b8d470c40a326f47bfdd69992 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 3 May 2006 17:46:50 -0700 Subject: ocfs2: better group descriptor consistency checks Try to catch corrupted group descriptors with some stronger checks placed in a couple of strategic locations. Detect a failed resizefs and refuse to allocate past what bitmap i_clusters allows. Signed-off-by: Mark Fasheh --- fs/ocfs2/suballoc.c | 114 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 94 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 195523090c87..7ac68cac041d 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -85,11 +85,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, u64 *bg_blkno); static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, int nr); -static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, - struct buffer_head *bg_bh, - unsigned int bits_wanted, - u16 *bit_off, - u16 *bits_found); static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle, struct inode *alloc_inode, struct ocfs2_group_desc *bg, @@ -143,6 +138,64 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); } +/* somewhat more expensive than our other checks, so use sparingly. */ +static int ocfs2_check_group_descriptor(struct super_block *sb, + struct ocfs2_dinode *di, + struct ocfs2_group_desc *gd) +{ + unsigned int max_bits; + + if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { + OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd); + return -EIO; + } + + if (di->i_blkno != gd->bg_parent_dinode) { + ocfs2_error(sb, "Group descriptor # %llu has bad parent " + "pointer (%llu, expected %llu)", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), + (unsigned long long)le64_to_cpu(di->i_blkno)); + return -EIO; + } + + max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc); + if (le16_to_cpu(gd->bg_bits) > max_bits) { + ocfs2_error(sb, "Group descriptor # %llu has bit count of %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits)); + return -EIO; + } + + if (le16_to_cpu(gd->bg_chain) >= + le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { + ocfs2_error(sb, "Group descriptor # %llu has bad chain %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_chain)); + return -EIO; + } + + if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { + ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " + "claims that %u are free", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits), + le16_to_cpu(gd->bg_free_bits_count)); + return -EIO; + } + + if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { + ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " + "max bitmap bits of %u", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits), + 8 * le16_to_cpu(gd->bg_size)); + return -EIO; + } + + return 0; +} + static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle, struct inode *alloc_inode, struct buffer_head *bg_bh, @@ -663,6 +716,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, struct buffer_head *bg_bh, unsigned int bits_wanted, + unsigned int total_bits, u16 *bit_off, u16 *bits_found) { @@ -679,10 +733,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, found = start = best_offset = best_size = 0; bitmap = bg->bg_bitmap; - while((offset = ocfs2_find_next_zero_bit(bitmap, - le16_to_cpu(bg->bg_bits), - start)) != -1) { - if (offset == le16_to_cpu(bg->bg_bits)) + while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) { + if (offset == total_bits) break; if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) { @@ -911,14 +963,35 @@ static int ocfs2_cluster_group_search(struct inode *inode, { int search = -ENOSPC; int ret; - struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; + struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; u16 tmp_off, tmp_found; + unsigned int max_bits, gd_cluster_off; BUG_ON(!ocfs2_is_cluster_bitmap(inode)); - if (bg->bg_free_bits_count) { + if (gd->bg_free_bits_count) { + max_bits = le16_to_cpu(gd->bg_bits); + + /* Tail groups in cluster bitmaps which aren't cpg + * aligned are prone to partial extention by a failed + * fs resize. If the file system resize never got to + * update the dinode cluster count, then we don't want + * to trust any clusters past it, regardless of what + * the group descriptor says. */ + gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb, + le64_to_cpu(gd->bg_blkno)); + if ((gd_cluster_off + max_bits) > + OCFS2_I(inode)->ip_clusters) { + max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off; + mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n", + (unsigned long long)le64_to_cpu(gd->bg_blkno), + le16_to_cpu(gd->bg_bits), + OCFS2_I(inode)->ip_clusters, max_bits); + } + ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), group_bh, bits_wanted, + max_bits, &tmp_off, &tmp_found); if (ret) return ret; @@ -951,6 +1024,7 @@ static int ocfs2_block_group_search(struct inode *inode, if (bg->bg_free_bits_count) ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), group_bh, bits_wanted, + le16_to_cpu(bg->bg_bits), bit_off, bits_found); return ret; @@ -988,9 +1062,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, goto bail; } bg = (struct ocfs2_group_desc *) group_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); - status = -EIO; + status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); + if (status) { + mlog_errno(status); goto bail; } @@ -1018,9 +1092,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, goto bail; } bg = (struct ocfs2_group_desc *) group_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); - status = -EIO; + status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); + if (status) { + mlog_errno(status); goto bail; } } @@ -1494,9 +1568,9 @@ static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle, } group = (struct ocfs2_group_desc *) group_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(group)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, group); - status = -EIO; + status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group); + if (status) { + mlog_errno(status); goto bail; } BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); -- cgit v1.2.2 From 883d4cae4a2b01a05193cf2665c77b7489a8b6a0 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Mon, 5 Jun 2006 16:41:00 -0400 Subject: ocfs2: allocation hints Record the most recently used allocation group on the allocation context, so that subsequent allocations can attempt to optimize for contiguousness. Local alloc especially should benefit from this as the current chain search tends to let it spew across the disk. Signed-off-by: Mark Fasheh --- fs/ocfs2/localalloc.c | 8 +++ fs/ocfs2/ocfs2.h | 1 + fs/ocfs2/suballoc.c | 147 +++++++++++++++++++++++++++++++++++++++++++++----- fs/ocfs2/suballoc.h | 2 + 4 files changed, 145 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 0d1973ea32b0..1f17a4d08287 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -840,6 +840,12 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, mlog(0, "Allocating %u clusters for a new window.\n", ocfs2_local_alloc_window_bits(osb)); + + /* Instruct the allocation code to try the most recently used + * cluster group. We'll re-record the group used this pass + * below. */ + ac->ac_last_group = osb->la_last_gd; + /* we used the generic suballoc reserve function, but we set * everything up nicely, so there's no reason why we can't use * the more specific cluster api to claim bits. */ @@ -852,6 +858,8 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, goto bail; } + osb->la_last_gd = ac->ac_last_group; + la->la_bm_off = cpu_to_le32(cluster_off); alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count); /* just in case... In the future when we find space ourselves, diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index d52100d49b6c..0462a7f4e21b 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -236,6 +236,7 @@ struct ocfs2_super enum ocfs2_local_alloc_state local_alloc_state; struct buffer_head *local_alloc_bh; + u64 la_last_gd; /* Next two fields are for local node slot recovery during * mount. */ diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 7ac68cac041d..9d91e66f51a9 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -70,12 +70,6 @@ static int ocfs2_block_group_search(struct inode *inode, struct buffer_head *group_bh, u32 bits_wanted, u32 min_bits, u16 *bit_off, u16 *bits_found); -static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, - u32 bits_wanted, - u32 min_bits, - u16 *bit_off, - unsigned int *num_bits, - u64 *bg_blkno); static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, struct ocfs2_alloc_context *ac, u32 bits_wanted, @@ -1030,12 +1024,103 @@ static int ocfs2_block_group_search(struct inode *inode, return ret; } +static int ocfs2_alloc_dinode_update_counts(struct inode *inode, + struct ocfs2_journal_handle *handle, + struct buffer_head *di_bh, + u32 num_bits, + u16 chain) +{ + int ret; + u32 tmp_used; + struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; + struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain; + + ret = ocfs2_journal_access(handle, inode, di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); + di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used); + le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits); + + ret = ocfs2_journal_dirty(handle, di_bh); + if (ret < 0) + mlog_errno(ret); + +out: + return ret; +} + +static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, + u32 bits_wanted, + u32 min_bits, + u16 *bit_off, + unsigned int *num_bits, + u64 gd_blkno, + u16 *bits_left) +{ + int ret; + u16 found; + struct buffer_head *group_bh = NULL; + struct ocfs2_group_desc *gd; + struct inode *alloc_inode = ac->ac_inode; + struct ocfs2_journal_handle *handle = ac->ac_handle; + + ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno, + &group_bh, OCFS2_BH_CACHED, alloc_inode); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + gd = (struct ocfs2_group_desc *) group_bh->b_data; + if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { + OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd); + ret = -EIO; + goto out; + } + + ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, + bit_off, &found); + if (ret < 0) { + if (ret != -ENOSPC) + mlog_errno(ret); + goto out; + } + + *num_bits = found; + + ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, + *num_bits, + le16_to_cpu(gd->bg_chain)); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh, + *bit_off, *num_bits); + if (ret < 0) + mlog_errno(ret); + + *bits_left = le16_to_cpu(gd->bg_free_bits_count); + +out: + brelse(group_bh); + + return ret; +} + static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, u32 bits_wanted, u32 min_bits, u16 *bit_off, unsigned int *num_bits, - u64 *bg_blkno) + u64 *bg_blkno, + u16 *bits_left) { int status; u16 chain, tmp_bits; @@ -1173,6 +1258,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, (unsigned long long)fe->i_blkno); *bg_blkno = le64_to_cpu(bg->bg_blkno); + *bits_left = le16_to_cpu(bg->bg_free_bits_count); bail: if (group_bh) brelse(group_bh); @@ -1194,6 +1280,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, { int status; u16 victim, i; + u16 bits_left = 0; + u64 hint_blkno = ac->ac_last_group; struct ocfs2_chain_list *cl; struct ocfs2_dinode *fe; @@ -1220,6 +1308,28 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, goto bail; } + if (hint_blkno) { + /* Attempt to short-circuit the usual search mechanism + * by jumping straight to the most recently used + * allocation group. This helps us mantain some + * contiguousness across allocations. */ + status = ocfs2_search_one_group(ac, bits_wanted, min_bits, + bit_off, num_bits, + hint_blkno, &bits_left); + if (!status) { + /* Be careful to update *bg_blkno here as the + * caller is expecting it to be filled in, and + * ocfs2_search_one_group() won't do that for + * us. */ + *bg_blkno = hint_blkno; + goto set_hint; + } + if (status < 0 && status != -ENOSPC) { + mlog_errno(status); + goto bail; + } + } + cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; victim = ocfs2_find_victim_chain(cl); @@ -1227,9 +1337,9 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, ac->ac_allow_chain_relink = 1; status = ocfs2_search_chain(ac, bits_wanted, min_bits, bit_off, - num_bits, bg_blkno); + num_bits, bg_blkno, &bits_left); if (!status) - goto bail; + goto set_hint; if (status < 0 && status != -ENOSPC) { mlog_errno(status); goto bail; @@ -1251,8 +1361,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, ac->ac_chain = i; status = ocfs2_search_chain(ac, bits_wanted, min_bits, - bit_off, num_bits, - bg_blkno); + bit_off, num_bits, bg_blkno, + &bits_left); if (!status) break; if (status < 0 && status != -ENOSPC) { @@ -1260,8 +1370,19 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, goto bail; } } -bail: +set_hint: + if (status != -ENOSPC) { + /* If the next search of this group is not likely to + * yield a suitable extent, then we reset the last + * group hint so as to not waste a disk read */ + if (bits_left < min_bits) + ac->ac_last_group = 0; + else + ac->ac_last_group = *bg_blkno; + } + +bail: mlog_exit(status); return status; } @@ -1415,7 +1536,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb, { int status; unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; - u64 bg_blkno; + u64 bg_blkno = 0; u16 bg_bit_off; mlog_entry_void(); diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index a76c82a7ceac..c787838d1052 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -49,6 +49,8 @@ struct ocfs2_alloc_context { u16 ac_chain; int ac_allow_chain_relink; group_search_t *ac_group_search; + + u64 ac_last_group; }; void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac); -- cgit v1.2.2 From 0e1edbd99994270023cea5afe593f972eb09a778 Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Thu, 10 Aug 2006 14:40:41 +1000 Subject: [XFS] Fix xfs_free_extent related NULL pointer dereference. We recently fixed an out-of-space deadlock in XFS, and part of that fix involved the addition of the XFS_ALLOC_FLAG_FREEING flag to some of the space allocator calls to indicate they're freeing space, not allocating it. There was a missed xfs_alloc_fix_freelist condition test that did not correctly test "flags". The same test would also test an uninitialised structure field (args->userdata) and depending on its value either would or would not return early with a critical buffer pointer set to NULL. This fixes that up, adds asserts to several places to catch future botches of this nature, and skips sections of xfs_alloc_fix_freelist that are irrelevent for the space-freeing case. SGI-PV: 955303 SGI-Modid: xfs-linux-melb:xfs-kern:26743a Signed-off-by: Nathan Scott --- fs/xfs/xfs_alloc.c | 103 ++++++++++++++++++++++++++++------------------------- 1 file changed, 54 insertions(+), 49 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index eef6763f3a67..d2bbcd882a69 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -1835,40 +1835,47 @@ xfs_alloc_fix_freelist( &agbp))) return error; if (!pag->pagf_init) { + ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); + ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); args->agbp = NULL; return 0; } } else agbp = NULL; - /* If this is a metadata preferred pag and we are user data + /* + * If this is a metadata preferred pag and we are user data * then try somewhere else if we are not being asked to * try harder at this point */ - if (pag->pagf_metadata && args->userdata && flags) { + if (pag->pagf_metadata && args->userdata && + (flags & XFS_ALLOC_FLAG_TRYLOCK)) { + ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); args->agbp = NULL; return 0; } - need = XFS_MIN_FREELIST_PAG(pag, mp); - delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0; - /* - * If it looks like there isn't a long enough extent, or enough - * total blocks, reject it. - */ - longest = (pag->pagf_longest > delta) ? - (pag->pagf_longest - delta) : - (pag->pagf_flcount > 0 || pag->pagf_longest > 0); - if (args->minlen + args->alignment + args->minalignslop - 1 > longest || - (!(flags & XFS_ALLOC_FLAG_FREEING) && - (int)(pag->pagf_freeblks + pag->pagf_flcount - - need - args->total) < - (int)args->minleft)) { - if (agbp) - xfs_trans_brelse(tp, agbp); - args->agbp = NULL; - return 0; + if (!(flags & XFS_ALLOC_FLAG_FREEING)) { + need = XFS_MIN_FREELIST_PAG(pag, mp); + delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0; + /* + * If it looks like there isn't a long enough extent, or enough + * total blocks, reject it. + */ + longest = (pag->pagf_longest > delta) ? + (pag->pagf_longest - delta) : + (pag->pagf_flcount > 0 || pag->pagf_longest > 0); + if ((args->minlen + args->alignment + args->minalignslop - 1) > + longest || + ((int)(pag->pagf_freeblks + pag->pagf_flcount - + need - args->total) < (int)args->minleft)) { + if (agbp) + xfs_trans_brelse(tp, agbp); + args->agbp = NULL; + return 0; + } } + /* * Get the a.g. freespace buffer. * Can fail if we're not blocking on locks, and it's held. @@ -1878,6 +1885,8 @@ xfs_alloc_fix_freelist( &agbp))) return error; if (agbp == NULL) { + ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); + ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); args->agbp = NULL; return 0; } @@ -1887,22 +1896,24 @@ xfs_alloc_fix_freelist( */ agf = XFS_BUF_TO_AGF(agbp); need = XFS_MIN_FREELIST(agf, mp); - delta = need > be32_to_cpu(agf->agf_flcount) ? - (need - be32_to_cpu(agf->agf_flcount)) : 0; /* * If there isn't enough total or single-extent, reject it. */ - longest = be32_to_cpu(agf->agf_longest); - longest = (longest > delta) ? (longest - delta) : - (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0); - if (args->minlen + args->alignment + args->minalignslop - 1 > longest || - (!(flags & XFS_ALLOC_FLAG_FREEING) && - (int)(be32_to_cpu(agf->agf_freeblks) + - be32_to_cpu(agf->agf_flcount) - need - args->total) < - (int)args->minleft)) { - xfs_trans_brelse(tp, agbp); - args->agbp = NULL; - return 0; + if (!(flags & XFS_ALLOC_FLAG_FREEING)) { + delta = need > be32_to_cpu(agf->agf_flcount) ? + (need - be32_to_cpu(agf->agf_flcount)) : 0; + longest = be32_to_cpu(agf->agf_longest); + longest = (longest > delta) ? (longest - delta) : + (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0); + if ((args->minlen + args->alignment + args->minalignslop - 1) > + longest || + ((int)(be32_to_cpu(agf->agf_freeblks) + + be32_to_cpu(agf->agf_flcount) - need - args->total) < + (int)args->minleft)) { + xfs_trans_brelse(tp, agbp); + args->agbp = NULL; + return 0; + } } /* * Make the freelist shorter if it's too long. @@ -1950,12 +1961,11 @@ xfs_alloc_fix_freelist( * on a completely full ag. */ if (targs.agbno == NULLAGBLOCK) { - if (!(flags & XFS_ALLOC_FLAG_FREEING)) { - xfs_trans_brelse(tp, agflbp); - args->agbp = NULL; - return 0; - } - break; + if (flags & XFS_ALLOC_FLAG_FREEING) + break; + xfs_trans_brelse(tp, agflbp); + args->agbp = NULL; + return 0; } /* * Put each allocated block on the list. @@ -2442,31 +2452,26 @@ xfs_free_extent( xfs_fsblock_t bno, /* starting block number of extent */ xfs_extlen_t len) /* length of extent */ { -#ifdef DEBUG - xfs_agf_t *agf; /* a.g. freespace header */ -#endif - xfs_alloc_arg_t args; /* allocation argument structure */ + xfs_alloc_arg_t args; int error; ASSERT(len != 0); + memset(&args, 0, sizeof(xfs_alloc_arg_t)); args.tp = tp; args.mp = tp->t_mountp; args.agno = XFS_FSB_TO_AGNO(args.mp, bno); ASSERT(args.agno < args.mp->m_sb.sb_agcount); args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); - args.alignment = 1; - args.minlen = args.minleft = args.minalignslop = 0; down_read(&args.mp->m_peraglock); args.pag = &args.mp->m_perag[args.agno]; if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) goto error0; #ifdef DEBUG ASSERT(args.agbp != NULL); - agf = XFS_BUF_TO_AGF(args.agbp); - ASSERT(args.agbno + len <= be32_to_cpu(agf->agf_length)); + ASSERT((args.agbno + len) <= + be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)); #endif - error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, - len, 0); + error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); error0: up_read(&args.mp->m_peraglock); return error; -- cgit v1.2.2 From 3a5ff61c18659443f76bad6cf06f60103046de5d Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 14 Jul 2006 22:37:11 +0000 Subject: [CIFS] Do not time out posix brl requests when using new posix setfileinfo request and do not time out slow requests to a server that is still responding well to other threads Suggested by jra of Samba team Signed-off-by: Steve French (cherry picked from 89b57148115479eef074b8d3f86c4c86c96ac969 commit) --- fs/cifs/CHANGES | 6 ++++++ fs/cifs/cifsfs.c | 6 +----- fs/cifs/cifsfs.h | 2 +- fs/cifs/cifsglob.h | 3 ++- fs/cifs/cifssmb.c | 11 ++++++++--- fs/cifs/connect.c | 17 ++++++++++++++++- fs/cifs/file.c | 6 ++---- fs/cifs/transport.c | 16 +++++++++++++--- 8 files changed, 49 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index a61d17ed1827..acb843b9bc3b 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,3 +1,9 @@ +Version 1.45 +------------ +Do not time out lockw calls when using posix extensions. Do not +time out requests if server still responding reasonably fast +on requests on other threads + Version 1.44 ------------ Rewritten sessionsetup support, including support for legacy SMB diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index c28ede599946..3cd750029be2 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -402,7 +402,6 @@ static struct quotactl_ops cifs_quotactl_ops = { }; #endif -#ifdef CONFIG_CIFS_EXPERIMENTAL static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags) { struct cifs_sb_info *cifs_sb; @@ -422,7 +421,7 @@ static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags) tcon->tidStatus = CifsExiting; up(&tcon->tconSem); - /* cancel_brl_requests(tcon); */ + /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */ /* cancel_notify_requests(tcon); */ if(tcon->ses && tcon->ses->server) { @@ -438,7 +437,6 @@ static void cifs_umount_begin(struct vfsmount * vfsmnt, int flags) return; } -#endif static int cifs_remount(struct super_block *sb, int *flags, char *data) { @@ -457,9 +455,7 @@ struct super_operations cifs_super_ops = { unless later we add lazy close of inodes or unless the kernel forgets to call us with the same number of releases (closes) as opens */ .show_options = cifs_show_options, -#ifdef CONFIG_CIFS_EXPERIMENTAL .umount_begin = cifs_umount_begin, -#endif .remount_fs = cifs_remount, }; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 8f75c6f24701..39ee8ef3bdeb 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -100,5 +100,5 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); extern int cifs_ioctl (struct inode * inode, struct file * filep, unsigned int command, unsigned long arg); -#define CIFS_VERSION "1.44" +#define CIFS_VERSION "1.45" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 6d7cf5f3bc0b..39b43e6a7509 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -158,7 +158,8 @@ struct TCP_Server_Info { /* 16th byte of RFC1001 workstation name is always null */ char workstation_RFC1001_name[SERVER_NAME_LEN_WITH_NULL]; __u32 sequence_number; /* needed for CIFS PDU signature */ - char mac_signing_key[CIFS_SESS_KEY_SIZE + 16]; + char mac_signing_key[CIFS_SESS_KEY_SIZE + 16]; + unsigned long lstrp; /* when we got last response from this server */ }; /* diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 19678c575dfc..c03c42ee9eb9 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1484,6 +1484,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, char *data_offset; struct cifs_posix_lock *parm_data; int rc = 0; + int timeout = 0; int bytes_returned = 0; __u16 params, param_offset, offset, byte_count, count; @@ -1503,7 +1504,6 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; pSMB->Flags = 0; - pSMB->Timeout = 0; pSMB->Reserved2 = 0; param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; offset = param_offset + params; @@ -1529,8 +1529,13 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, (((char *) &pSMB->hdr.Protocol) + offset); parm_data->lock_type = cpu_to_le16(lock_type); - if(waitFlag) + if(waitFlag) { + timeout = 3; /* blocking operation, no timeout */ parm_data->lock_flags = cpu_to_le16(1); + pSMB->Timeout = cpu_to_le32(-1); + } else + pSMB->Timeout = 0; + parm_data->pid = cpu_to_le32(current->tgid); parm_data->start = cpu_to_le64(pLockData->fl_start); parm_data->length = cpu_to_le64(len); /* normalize negative numbers */ @@ -1542,7 +1547,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, pSMB->hdr.smb_buf_length += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, - (struct smb_hdr *) pSMBr, &bytes_returned, 0); + (struct smb_hdr *) pSMBr, &bytes_returned, timeout); if (rc) { cFYI(1, ("Send error in Posix Lock = %d", rc)); } else if (get_flag) { diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 876eb9ef85fe..c4aedcf5c924 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -612,6 +612,10 @@ multi_t2_fnd: #ifdef CONFIG_CIFS_STATS2 mid_entry->when_received = jiffies; #endif + /* so we do not time out requests to server + which is still responding (since server could + be busy but not dead) */ + server->lstrp = jiffies; break; } } @@ -1969,7 +1973,18 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, } cFYI(1,("Negotiate caps 0x%x",(int)cap)); - +#ifdef CONFIG_CIFS_DEBUG2 + if(cap & CIFS_UNIX_FCNTL_CAP) + cFYI(1,("FCNTL cap")); + if(cap & CIFS_UNIX_EXTATTR_CAP) + cFYI(1,("EXTATTR cap")); + if(cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) + cFYI(1,("POSIX path cap")); + if(cap & CIFS_UNIX_XATTR_CAP) + cFYI(1,("XATTR cap")); + if(cap & CIFS_UNIX_POSIX_ACL_CAP) + cFYI(1,("POSIX ACL cap")); +#endif /* CIFS_DEBUG2 */ if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) { cFYI(1,("setting capabilities failed")); } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 944d2b9e092d..52e2e4c8794b 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -644,8 +644,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) account for negative length which we can not accept over the wire */ if (IS_GETLK(cmd)) { - if(experimEnabled && - (cifs_sb->tcon->ses->capabilities & CAP_UNIX) && + if((cifs_sb->tcon->ses->capabilities & CAP_UNIX) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(cifs_sb->tcon->fsUnixInfo.Capability))) { int posix_lock_type; @@ -683,8 +682,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) FreeXid(xid); return rc; } - if (experimEnabled && - (cifs_sb->tcon->ses->capabilities & CAP_UNIX) && + if ((cifs_sb->tcon->ses->capabilities & CAP_UNIX) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(cifs_sb->tcon->fsUnixInfo.Capability))) { int posix_lock_type; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 17ba329e2b3d..95e23ca670a8 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -444,8 +444,9 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, if(timeout != MAX_SCHEDULE_TIMEOUT) { timeout += jiffies; wait_event(ses->server->response_q, - (!(midQ->midState & MID_REQUEST_SUBMITTED)) || - time_after(jiffies, timeout) || + (!(midQ->midState & MID_REQUEST_SUBMITTED)) || + (time_after(jiffies, timeout) && + time_after(jiffies, ses->server->lstrp + HZ)) || ((ses->server->tcpStatus != CifsGood) && (ses->server->tcpStatus != CifsNew))); } else { @@ -710,9 +711,18 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, /* No user interrupts in wait - wreaks havoc with performance */ if(timeout != MAX_SCHEDULE_TIMEOUT) { timeout += jiffies; + /* although we prefer not to time out if the server is still + responding - we will time out if the server takes + more than 15 (or 45 or 180) seconds to respond to this request + and has not responded to any request from other threads + on this client within a second (note that it is not worth + grabbing the GlobalMid_Lock and slowing things down in this + wait event to more accurately check the lstrsp field on some + arch since we are already in an error path that will retry */ wait_event(ses->server->response_q, (!(midQ->midState & MID_REQUEST_SUBMITTED)) || - time_after(jiffies, timeout) || + (time_after(jiffies, timeout) && + time_after(jiffies, ses->server->lstrp + HZ)) || ((ses->server->tcpStatus != CifsGood) && (ses->server->tcpStatus != CifsNew))); } else { -- cgit v1.2.2 From 14a441a2b4ee1dfc00ec822d91d9fb20f401c62f Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 16 Jul 2006 04:32:51 +0000 Subject: [CIFS] spinlock protect read of last srv response time in timeout path Signed-off-by: Jeremy Allison Signed-off-by: Steve French (cherry picked from b33a3f55e54fd210fc043eafcf83728b03bc9e02 commit) --- fs/cifs/transport.c | 99 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 76 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 95e23ca670a8..d16e6032d4cf 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -3,7 +3,8 @@ * * Copyright (C) International Business Machines Corp., 2002,2005 * Author(s): Steve French (sfrench@us.ibm.com) - * + * Jeremy Allison (jra@samba.org) 2006. + * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation; either version 2.1 of the License, or @@ -442,13 +443,46 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, /* No user interrupts in wait - wreaks havoc with performance */ if(timeout != MAX_SCHEDULE_TIMEOUT) { - timeout += jiffies; - wait_event(ses->server->response_q, - (!(midQ->midState & MID_REQUEST_SUBMITTED)) || - (time_after(jiffies, timeout) && - time_after(jiffies, ses->server->lstrp + HZ)) || - ((ses->server->tcpStatus != CifsGood) && - (ses->server->tcpStatus != CifsNew))); + unsigned long curr_timeout; + + for (;;) { + curr_timeout = timeout + jiffies; + wait_event(ses->server->response_q, + (!(midQ->midState & MID_REQUEST_SUBMITTED)) || + time_after(jiffies, curr_timeout) || + ((ses->server->tcpStatus != CifsGood) && + (ses->server->tcpStatus != CifsNew))); + + if (time_after(jiffies, curr_timeout) && + (midQ->midState & MID_REQUEST_SUBMITTED) && + ((ses->server->tcpStatus == CifsGood) || + (ses->server->tcpStatus == CifsNew))) { + + unsigned long lrt; + + /* We timed out. Is the server still + sending replies ? */ + spin_lock(&GlobalMid_Lock); + lrt = ses->server->lstrp; + spin_unlock(&GlobalMid_Lock); + + /* Calculate 10 seconds past last receive time. + Although we prefer not to time out if the + server is still responding - we will time + out if the server takes more than 15 (or 45 + or 180) seconds to respond to this request + and has not responded to any request from + other threads on the client within 10 seconds */ + lrt += (10 * HZ); + if (time_after(jiffies, lrt)) { + /* No replies for 10 seconds. */ + cERROR(1,("server not responding")); + break; + } + } else { + break; + } + } } else { wait_event(ses->server->response_q, (!(midQ->midState & MID_REQUEST_SUBMITTED)) || @@ -710,21 +744,40 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, /* No user interrupts in wait - wreaks havoc with performance */ if(timeout != MAX_SCHEDULE_TIMEOUT) { - timeout += jiffies; - /* although we prefer not to time out if the server is still - responding - we will time out if the server takes - more than 15 (or 45 or 180) seconds to respond to this request - and has not responded to any request from other threads - on this client within a second (note that it is not worth - grabbing the GlobalMid_Lock and slowing things down in this - wait event to more accurately check the lstrsp field on some - arch since we are already in an error path that will retry */ - wait_event(ses->server->response_q, - (!(midQ->midState & MID_REQUEST_SUBMITTED)) || - (time_after(jiffies, timeout) && - time_after(jiffies, ses->server->lstrp + HZ)) || - ((ses->server->tcpStatus != CifsGood) && - (ses->server->tcpStatus != CifsNew))); + unsigned long curr_timeout; + + for (;;) { + curr_timeout = timeout + jiffies; + wait_event(ses->server->response_q, + (!(midQ->midState & MID_REQUEST_SUBMITTED)) || + time_after(jiffies, curr_timeout) || + ((ses->server->tcpStatus != CifsGood) && + (ses->server->tcpStatus != CifsNew))); + + if (time_after(jiffies, curr_timeout) && + (midQ->midState & MID_REQUEST_SUBMITTED) && + ((ses->server->tcpStatus == CifsGood) || + (ses->server->tcpStatus == CifsNew))) { + + unsigned long lrt; + + /* We timed out. Is the server still + sending replies ? */ + spin_lock(&GlobalMid_Lock); + lrt = ses->server->lstrp; + spin_unlock(&GlobalMid_Lock); + + /* Calculate 10 seconds past last receive time*/ + lrt += (10 * HZ); + if (time_after(jiffies, lrt)) { + /* Server sent no reply in 10 seconds */ + cERROR(1,("Server not responding")); + break; + } + } else { + break; + } + } } else { wait_event(ses->server->response_q, (!(midQ->midState & MID_REQUEST_SUBMITTED)) || -- cgit v1.2.2 From 5da07b0208066fd3544ecf2b521fc7a2e0228ad5 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 16 Jul 2006 21:33:15 +0000 Subject: [CIFS] Make midState usage more consistent Although harmless, we were sometimes treating midState like it contained flags but they are exclusive states, and this makes that more clear. Signed-off-by: Jeremy Allison Signed-off-by: Steve French (cherry picked from 586c057c3a68dd6ae0f3ba94fbf76798b1558074 commit) --- fs/cifs/transport.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index d16e6032d4cf..748b0df07a73 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -448,13 +448,13 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, for (;;) { curr_timeout = timeout + jiffies; wait_event(ses->server->response_q, - (!(midQ->midState & MID_REQUEST_SUBMITTED)) || + (!(midQ->midState == MID_REQUEST_SUBMITTED)) || time_after(jiffies, curr_timeout) || ((ses->server->tcpStatus != CifsGood) && (ses->server->tcpStatus != CifsNew))); if (time_after(jiffies, curr_timeout) && - (midQ->midState & MID_REQUEST_SUBMITTED) && + (midQ->midState == MID_REQUEST_SUBMITTED) && ((ses->server->tcpStatus == CifsGood) || (ses->server->tcpStatus == CifsNew))) { @@ -485,7 +485,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, } } else { wait_event(ses->server->response_q, - (!(midQ->midState & MID_REQUEST_SUBMITTED)) || + (!(midQ->midState == MID_REQUEST_SUBMITTED)) || ((ses->server->tcpStatus != CifsGood) && (ses->server->tcpStatus != CifsNew))); } @@ -749,13 +749,13 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, for (;;) { curr_timeout = timeout + jiffies; wait_event(ses->server->response_q, - (!(midQ->midState & MID_REQUEST_SUBMITTED)) || + (!(midQ->midState == MID_REQUEST_SUBMITTED)) || time_after(jiffies, curr_timeout) || ((ses->server->tcpStatus != CifsGood) && (ses->server->tcpStatus != CifsNew))); if (time_after(jiffies, curr_timeout) && - (midQ->midState & MID_REQUEST_SUBMITTED) && + (midQ->midState == MID_REQUEST_SUBMITTED) && ((ses->server->tcpStatus == CifsGood) || (ses->server->tcpStatus == CifsNew))) { @@ -780,7 +780,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, } } else { wait_event(ses->server->response_q, - (!(midQ->midState & MID_REQUEST_SUBMITTED)) || + (!(midQ->midState == MID_REQUEST_SUBMITTED)) || ((ses->server->tcpStatus != CifsGood) && (ses->server->tcpStatus != CifsNew))); } -- cgit v1.2.2 From 6c3d8909d85b2c18fd7a6e64f0ca757a257b40fa Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 31 Jul 2006 22:46:20 +0000 Subject: [CIFS] Allow cifsd to suspend if connection is lost Make cifsd allow us to suspend if it has lost the connection with a server Ref: http://bugzilla.kernel.org/show_bug.cgi?id=6811 Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Steve French (cherry picked from 27bd6cd87b0ada66515ad49bc346d77d1e9d3e05 commit) --- fs/cifs/connect.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index c4aedcf5c924..b706b4f48b10 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -182,6 +182,7 @@ cifs_reconnect(struct TCP_Server_Info *server) while ((server->tcpStatus != CifsExiting) && (server->tcpStatus != CifsGood)) { + try_to_freeze(); if(server->protocolType == IPV6) { rc = ipv6_connect(&server->addr.sockAddr6,&server->ssocket); } else { -- cgit v1.2.2 From 7ee1af765dfa3146aef958258003245e082284e5 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Wed, 2 Aug 2006 21:56:33 +0000 Subject: [CIFS] Allow Windows blocking locks to be cancelled via a CANCEL_LOCK call. TODO - restrict this to servers that support NT_STATUS codes (Win9x will probably not support this call). Signed-off-by: Jeremy Allison Signed-off-by: Steve French (cherry picked from 570d4d2d895569825d0d017d4e76b51138f68864 commit) --- fs/cifs/cifsglob.h | 15 +- fs/cifs/cifsproto.h | 4 + fs/cifs/cifssmb.c | 15 +- fs/cifs/file.c | 95 ++++++-- fs/cifs/netmisc.c | 1 + fs/cifs/smberr.h | 1 + fs/cifs/transport.c | 677 +++++++++++++++++++++++++++++++--------------------- 7 files changed, 512 insertions(+), 296 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 39b43e6a7509..b24006c47df1 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -3,6 +3,7 @@ * * Copyright (C) International Business Machines Corp., 2002,2006 * Author(s): Steve French (sfrench@us.ibm.com) + * Jeremy Allison (jra@samba.org) * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -267,14 +268,14 @@ struct cifsTconInfo { }; /* - * This info hangs off the cifsFileInfo structure. This is used to track - * byte stream locks on the file + * This info hangs off the cifsFileInfo structure, pointed to by llist. + * This is used to track byte stream locks on the file */ struct cifsLockInfo { - struct cifsLockInfo *next; - int start; - int length; - int type; + struct list_head llist; /* pointer to next cifsLockInfo */ + __u64 offset; + __u64 length; + __u8 type; }; /* @@ -305,6 +306,8 @@ struct cifsFileInfo { /* lock scope id (0 if none) */ struct file * pfile; /* needed for writepage */ struct inode * pInode; /* needed for oplock break */ + struct semaphore lock_sem; + struct list_head llist; /* list of byte range locks we have. */ unsigned closePend:1; /* file is marked to close */ unsigned invalidHandle:1; /* file closed via session abend */ atomic_t wrtPending; /* handle in use - defer close */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index a5ddc62d6fe6..b35c55c3c8bb 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -50,6 +50,10 @@ extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *, extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *, struct kvec *, int /* nvec to send */, int * /* type of buf returned */ , const int long_op); +extern int SendReceiveBlockingLock(const unsigned int /* xid */ , struct cifsTconInfo *, + struct smb_hdr * /* input */ , + struct smb_hdr * /* out */ , + int * /* bytes returned */); extern int checkSMBhdr(struct smb_hdr *smb, __u16 mid); extern int checkSMB(struct smb_hdr *smb, __u16 mid, int length); extern int is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index c03c42ee9eb9..dcbc3e075e81 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1460,8 +1460,13 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, pSMB->hdr.smb_buf_length += count; pSMB->ByteCount = cpu_to_le16(count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + if (waitFlag) { + rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB, + (struct smb_hdr *) pSMBr, &bytes_returned); + } else { + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, timeout); + } cifs_stats_inc(&tcon->num_locks); if (rc) { cFYI(1, ("Send error in Lock = %d", rc)); @@ -1546,8 +1551,14 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon, pSMB->Reserved4 = 0; pSMB->hdr.smb_buf_length += byte_count; pSMB->ByteCount = cpu_to_le16(byte_count); - rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, + if (waitFlag) { + rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB, + (struct smb_hdr *) pSMBr, &bytes_returned); + } else { + rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, timeout); + } + if (rc) { cFYI(1, ("Send error in Posix Lock = %d", rc)); } else if (get_flag) { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 52e2e4c8794b..e9c5ba9084fc 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -5,6 +5,7 @@ * * Copyright (C) International Business Machines Corp., 2002,2003 * Author(s): Steve French (sfrench@us.ibm.com) + * Jeremy Allison (jra@samba.org) * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -47,6 +48,8 @@ static inline struct cifsFileInfo *cifs_init_private( private_data->netfid = netfid; private_data->pid = current->tgid; init_MUTEX(&private_data->fh_sem); + init_MUTEX(&private_data->lock_sem); + INIT_LIST_HEAD(&private_data->llist); private_data->pfile = file; /* needed for writepage */ private_data->pInode = inode; private_data->invalidHandle = FALSE; @@ -473,6 +476,8 @@ int cifs_close(struct inode *inode, struct file *file) cifs_sb = CIFS_SB(inode->i_sb); pTcon = cifs_sb->tcon; if (pSMBFile) { + struct cifsLockInfo *li, *tmp; + pSMBFile->closePend = TRUE; if (pTcon) { /* no sense reconnecting to close a file that is @@ -496,6 +501,16 @@ int cifs_close(struct inode *inode, struct file *file) pSMBFile->netfid); } } + + /* Delete any outstanding lock records. + We'll lose them when the file is closed anyway. */ + down(&pSMBFile->lock_sem); + list_for_each_entry_safe(li, tmp, &pSMBFile->llist, llist) { + list_del(&li->llist); + kfree(li); + } + up(&pSMBFile->lock_sem); + write_lock(&GlobalSMBSeslock); list_del(&pSMBFile->flist); list_del(&pSMBFile->tlist); @@ -570,6 +585,21 @@ int cifs_closedir(struct inode *inode, struct file *file) return rc; } +static int store_file_lock(struct cifsFileInfo *fid, __u64 len, + __u64 offset, __u8 lockType) +{ + struct cifsLockInfo *li = kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); + if (li == NULL) + return -ENOMEM; + li->offset = offset; + li->length = len; + li->type = lockType; + down(&fid->lock_sem); + list_add(&li->llist, &fid->llist); + up(&fid->lock_sem); + return 0; +} + int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) { int rc, xid; @@ -581,6 +611,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) struct cifsTconInfo *pTcon; __u16 netfid; __u8 lockType = LOCKING_ANDX_LARGE_FILES; + int posix_locking; length = 1 + pfLock->fl_end - pfLock->fl_start; rc = -EACCES; @@ -639,14 +670,14 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) } netfid = ((struct cifsFileInfo *)file->private_data)->netfid; + posix_locking = (cifs_sb->tcon->ses->capabilities & CAP_UNIX) && + (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(cifs_sb->tcon->fsUnixInfo.Capability)); /* BB add code here to normalize offset and length to account for negative length which we can not accept over the wire */ if (IS_GETLK(cmd)) { - if((cifs_sb->tcon->ses->capabilities & CAP_UNIX) && - (CIFS_UNIX_FCNTL_CAP & - le64_to_cpu(cifs_sb->tcon->fsUnixInfo.Capability))) { + if(posix_locking) { int posix_lock_type; if(lockType & LOCKING_ANDX_SHARED_LOCK) posix_lock_type = CIFS_RDLCK; @@ -682,9 +713,15 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) FreeXid(xid); return rc; } - if ((cifs_sb->tcon->ses->capabilities & CAP_UNIX) && - (CIFS_UNIX_FCNTL_CAP & - le64_to_cpu(cifs_sb->tcon->fsUnixInfo.Capability))) { + + if (!numLock && !numUnlock) { + /* if no lock or unlock then nothing + to do since we do not know what it is */ + FreeXid(xid); + return -EOPNOTSUPP; + } + + if (posix_locking) { int posix_lock_type; if(lockType & LOCKING_ANDX_SHARED_LOCK) posix_lock_type = CIFS_RDLCK; @@ -693,18 +730,46 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) if(numUnlock == 1) posix_lock_type = CIFS_UNLCK; - else if(numLock == 0) { - /* if no lock or unlock then nothing - to do since we do not know what it is */ - FreeXid(xid); - return -EOPNOTSUPP; - } + rc = CIFSSMBPosixLock(xid, pTcon, netfid, 0 /* set */, length, pfLock, posix_lock_type, wait_flag); - } else - rc = CIFSSMBLock(xid, pTcon, netfid, length, pfLock->fl_start, - numUnlock, numLock, lockType, wait_flag); + } else { + struct cifsFileInfo *fid = (struct cifsFileInfo *)file->private_data; + + if (numLock) { + rc = CIFSSMBLock(xid, pTcon, netfid, length, pfLock->fl_start, + 0, numLock, lockType, wait_flag); + + if (rc == 0) { + /* For Windows locks we must store them. */ + rc = store_file_lock(fid, length, + pfLock->fl_start, lockType); + } + } else if (numUnlock) { + /* For each stored lock that this unlock overlaps + completely, unlock it. */ + int stored_rc = 0; + struct cifsLockInfo *li, *tmp; + + down(&fid->lock_sem); + list_for_each_entry_safe(li, tmp, &fid->llist, llist) { + if (pfLock->fl_start <= li->offset && + length >= li->length) { + stored_rc = CIFSSMBLock(xid, pTcon, netfid, + li->length, li->offset, + 1, 0, li->type, FALSE); + if (stored_rc) + rc = stored_rc; + + list_del(&li->llist); + kfree(li); + } + } + up(&fid->lock_sem); + } + } + if (pfLock->fl_flags & FL_POSIX) posix_lock_file_wait(file, pfLock); FreeXid(xid); diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index b66eff5dc624..ce87550e918f 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -72,6 +72,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = { {ERRinvlevel,-EOPNOTSUPP}, {ERRdirnotempty, -ENOTEMPTY}, {ERRnotlocked, -ENOLCK}, + {ERRcancelviolation, -ENOLCK}, {ERRalreadyexists, -EEXIST}, {ERRmoredata, -EOVERFLOW}, {ERReasnotsupported,-EOPNOTSUPP}, diff --git a/fs/cifs/smberr.h b/fs/cifs/smberr.h index cd41c67ff8d3..212c3c296409 100644 --- a/fs/cifs/smberr.h +++ b/fs/cifs/smberr.h @@ -95,6 +95,7 @@ #define ERRinvlevel 124 #define ERRdirnotempty 145 #define ERRnotlocked 158 +#define ERRcancelviolation 173 #define ERRalreadyexists 183 #define ERRbadpipe 230 #define ERRpipebusy 231 diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 748b0df07a73..48d47b46b1fb 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -37,7 +37,7 @@ extern mempool_t *cifs_mid_poolp; extern kmem_cache_t *cifs_oplock_cachep; static struct mid_q_entry * -AllocMidQEntry(struct smb_hdr *smb_buffer, struct cifsSesInfo *ses) +AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses) { struct mid_q_entry *temp; @@ -204,6 +204,10 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, rc = 0; } + /* Don't want to modify the buffer as a + side effect of this call. */ + smb_buffer->smb_buf_length = smb_buf_length; + return rc; } @@ -218,6 +222,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, unsigned int len = iov[0].iov_len; unsigned int total_len; int first_vec = 0; + unsigned int smb_buf_length = smb_buffer->smb_buf_length; if(ssocket == NULL) return -ENOTSOCK; /* BB eventually add reconnect code here */ @@ -294,36 +299,15 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, } else rc = 0; + /* Don't want to modify the buffer as a + side effect of this call. */ + smb_buffer->smb_buf_length = smb_buf_length; + return rc; } -int -SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, - struct kvec *iov, int n_vec, int * pRespBufType /* ret */, - const int long_op) +static int wait_for_free_request(struct cifsSesInfo *ses, const int long_op) { - int rc = 0; - unsigned int receive_len; - unsigned long timeout; - struct mid_q_entry *midQ; - struct smb_hdr *in_buf = iov[0].iov_base; - - *pRespBufType = CIFS_NO_BUFFER; /* no response buf yet */ - - if ((ses == NULL) || (ses->server == NULL)) { - cifs_small_buf_release(in_buf); - cERROR(1,("Null session")); - return -EIO; - } - - if(ses->server->tcpStatus == CifsExiting) { - cifs_small_buf_release(in_buf); - return -ENOENT; - } - - /* Ensure that we do not send more than 50 overlapping requests - to the same server. We may make this configurable later or - use ses->maxReq */ if(long_op == -1) { /* oplock breaks must not be held up */ atomic_inc(&ses->server->inFlight); @@ -346,53 +330,140 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, } else { if(ses->server->tcpStatus == CifsExiting) { spin_unlock(&GlobalMid_Lock); - cifs_small_buf_release(in_buf); return -ENOENT; } - /* can not count locking commands against total since - they are allowed to block on server */ + /* can not count locking commands against total since + they are allowed to block on server */ - if(long_op < 3) { /* update # of requests on the wire to server */ + if (long_op < 3) atomic_inc(&ses->server->inFlight); - } spin_unlock(&GlobalMid_Lock); break; } } } - /* make sure that we sign in the same order that we send on this socket - and avoid races inside tcp sendmsg code that could cause corruption - of smb data */ - - down(&ses->server->tcpSem); + return 0; +} +static int allocate_mid(struct cifsSesInfo *ses, struct smb_hdr *in_buf, + struct mid_q_entry **ppmidQ) +{ if (ses->server->tcpStatus == CifsExiting) { - rc = -ENOENT; - goto out_unlock2; + return -ENOENT; } else if (ses->server->tcpStatus == CifsNeedReconnect) { cFYI(1,("tcp session dead - return to caller to retry")); - rc = -EAGAIN; - goto out_unlock2; + return -EAGAIN; } else if (ses->status != CifsGood) { /* check if SMB session is bad because we are setting it up */ if((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && (in_buf->Command != SMB_COM_NEGOTIATE)) { - rc = -EAGAIN; - goto out_unlock2; + return -EAGAIN; } /* else ok - we are setting up session */ } - midQ = AllocMidQEntry(in_buf, ses); - if (midQ == NULL) { + *ppmidQ = AllocMidQEntry(in_buf, ses); + if (*ppmidQ == NULL) { + return -ENOMEM; + } + return 0; +} + +static int wait_for_response(struct cifsSesInfo *ses, + struct mid_q_entry *midQ, + unsigned long timeout, + unsigned long time_to_wait) +{ + unsigned long curr_timeout; + + for (;;) { + curr_timeout = timeout + jiffies; + wait_event(ses->server->response_q, + (!(midQ->midState == MID_REQUEST_SUBMITTED)) || + time_after(jiffies, curr_timeout) || + ((ses->server->tcpStatus != CifsGood) && + (ses->server->tcpStatus != CifsNew))); + + if (time_after(jiffies, curr_timeout) && + (midQ->midState == MID_REQUEST_SUBMITTED) && + ((ses->server->tcpStatus == CifsGood) || + (ses->server->tcpStatus == CifsNew))) { + + unsigned long lrt; + + /* We timed out. Is the server still + sending replies ? */ + spin_lock(&GlobalMid_Lock); + lrt = ses->server->lstrp; + spin_unlock(&GlobalMid_Lock); + + /* Calculate time_to_wait past last receive time. + Although we prefer not to time out if the + server is still responding - we will time + out if the server takes more than 15 (or 45 + or 180) seconds to respond to this request + and has not responded to any request from + other threads on the client within 10 seconds */ + lrt += time_to_wait; + if (time_after(jiffies, lrt)) { + /* No replies for time_to_wait. */ + cERROR(1,("server not responding")); + return -1; + } + } else { + return 0; + } + } +} + +int +SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, + struct kvec *iov, int n_vec, int * pRespBufType /* ret */, + const int long_op) +{ + int rc = 0; + unsigned int receive_len; + unsigned long timeout; + struct mid_q_entry *midQ; + struct smb_hdr *in_buf = iov[0].iov_base; + + *pRespBufType = CIFS_NO_BUFFER; /* no response buf yet */ + + if ((ses == NULL) || (ses->server == NULL)) { + cifs_small_buf_release(in_buf); + cERROR(1,("Null session")); + return -EIO; + } + + if(ses->server->tcpStatus == CifsExiting) { + cifs_small_buf_release(in_buf); + return -ENOENT; + } + + /* Ensure that we do not send more than 50 overlapping requests + to the same server. We may make this configurable later or + use ses->maxReq */ + + rc = wait_for_free_request(ses, long_op); + if (rc) { + cifs_small_buf_release(in_buf); + return rc; + } + + /* make sure that we sign in the same order that we send on this socket + and avoid races inside tcp sendmsg code that could cause corruption + of smb data */ + + down(&ses->server->tcpSem); + + rc = allocate_mid(ses, in_buf, &midQ); + if (rc) { up(&ses->server->tcpSem); cifs_small_buf_release(in_buf); - /* If not lock req, update # of requests on wire to server */ - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); - } - return -ENOMEM; + /* Update # of requests on wire to server */ + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); + return rc; } rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number); @@ -407,32 +478,23 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, atomic_dec(&ses->server->inSend); midQ->when_sent = jiffies; #endif - if(rc < 0) { - DeleteMidQEntry(midQ); - up(&ses->server->tcpSem); - cifs_small_buf_release(in_buf); - /* If not lock req, update # of requests on wire to server */ - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); - } - return rc; - } else { - up(&ses->server->tcpSem); - cifs_small_buf_release(in_buf); - } + + up(&ses->server->tcpSem); + cifs_small_buf_release(in_buf); + + if(rc < 0) + goto out; if (long_op == -1) - goto cifs_no_response_exit2; + goto out; else if (long_op == 2) /* writes past end of file can take loong time */ timeout = 180 * HZ; else if (long_op == 1) timeout = 45 * HZ; /* should be greater than servers oplock break timeout (about 43 seconds) */ - else if (long_op > 2) { - timeout = MAX_SCHEDULE_TIMEOUT; - } else + else timeout = 15 * HZ; + /* wait for 15 seconds or until woken up due to response arriving or due to last connection to this server being unmounted */ if (signal_pending(current)) { @@ -442,53 +504,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, } /* No user interrupts in wait - wreaks havoc with performance */ - if(timeout != MAX_SCHEDULE_TIMEOUT) { - unsigned long curr_timeout; - - for (;;) { - curr_timeout = timeout + jiffies; - wait_event(ses->server->response_q, - (!(midQ->midState == MID_REQUEST_SUBMITTED)) || - time_after(jiffies, curr_timeout) || - ((ses->server->tcpStatus != CifsGood) && - (ses->server->tcpStatus != CifsNew))); - - if (time_after(jiffies, curr_timeout) && - (midQ->midState == MID_REQUEST_SUBMITTED) && - ((ses->server->tcpStatus == CifsGood) || - (ses->server->tcpStatus == CifsNew))) { - - unsigned long lrt; - - /* We timed out. Is the server still - sending replies ? */ - spin_lock(&GlobalMid_Lock); - lrt = ses->server->lstrp; - spin_unlock(&GlobalMid_Lock); - - /* Calculate 10 seconds past last receive time. - Although we prefer not to time out if the - server is still responding - we will time - out if the server takes more than 15 (or 45 - or 180) seconds to respond to this request - and has not responded to any request from - other threads on the client within 10 seconds */ - lrt += (10 * HZ); - if (time_after(jiffies, lrt)) { - /* No replies for 10 seconds. */ - cERROR(1,("server not responding")); - break; - } - } else { - break; - } - } - } else { - wait_event(ses->server->response_q, - (!(midQ->midState == MID_REQUEST_SUBMITTED)) || - ((ses->server->tcpStatus != CifsGood) && - (ses->server->tcpStatus != CifsNew))); - } + wait_for_response(ses, midQ, timeout, 10 * HZ); spin_lock(&GlobalMid_Lock); if (midQ->resp_buf) { @@ -516,11 +532,9 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, } spin_unlock(&GlobalMid_Lock); DeleteMidQEntry(midQ); - /* If not lock req, update # of requests on wire to server */ - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); - } + /* Update # of requests on wire to server */ + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); return rc; } @@ -571,24 +585,12 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, cFYI(1,("Bad MID state?")); } } -cifs_no_response_exit2: - DeleteMidQEntry(midQ); - - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); - } - return rc; +out: -out_unlock2: - up(&ses->server->tcpSem); - cifs_small_buf_release(in_buf); - /* If not lock req, update # of requests on wire to server */ - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); - } + DeleteMidQEntry(midQ); + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); return rc; } @@ -618,85 +620,34 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, /* Ensure that we do not send more than 50 overlapping requests to the same server. We may make this configurable later or use ses->maxReq */ - if(long_op == -1) { - /* oplock breaks must not be held up */ - atomic_inc(&ses->server->inFlight); - } else { - spin_lock(&GlobalMid_Lock); - while(1) { - if(atomic_read(&ses->server->inFlight) >= - cifs_max_pending){ - spin_unlock(&GlobalMid_Lock); -#ifdef CONFIG_CIFS_STATS2 - atomic_inc(&ses->server->num_waiters); -#endif - wait_event(ses->server->request_q, - atomic_read(&ses->server->inFlight) - < cifs_max_pending); -#ifdef CONFIG_CIFS_STATS2 - atomic_dec(&ses->server->num_waiters); -#endif - spin_lock(&GlobalMid_Lock); - } else { - if(ses->server->tcpStatus == CifsExiting) { - spin_unlock(&GlobalMid_Lock); - return -ENOENT; - } - /* can not count locking commands against total since - they are allowed to block on server */ - - if(long_op < 3) { - /* update # of requests on the wire to server */ - atomic_inc(&ses->server->inFlight); - } - spin_unlock(&GlobalMid_Lock); - break; - } - } - } + rc = wait_for_free_request(ses, long_op); + if (rc) + return rc; + /* make sure that we sign in the same order that we send on this socket and avoid races inside tcp sendmsg code that could cause corruption of smb data */ down(&ses->server->tcpSem); - if (ses->server->tcpStatus == CifsExiting) { - rc = -ENOENT; - goto out_unlock; - } else if (ses->server->tcpStatus == CifsNeedReconnect) { - cFYI(1,("tcp session dead - return to caller to retry")); - rc = -EAGAIN; - goto out_unlock; - } else if (ses->status != CifsGood) { - /* check if SMB session is bad because we are setting it up */ - if((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && - (in_buf->Command != SMB_COM_NEGOTIATE)) { - rc = -EAGAIN; - goto out_unlock; - } /* else ok - we are setting up session */ - } - midQ = AllocMidQEntry(in_buf, ses); - if (midQ == NULL) { + rc = allocate_mid(ses, in_buf, &midQ); + if (rc) { up(&ses->server->tcpSem); - /* If not lock req, update # of requests on wire to server */ - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); - } - return -ENOMEM; + /* Update # of requests on wire to server */ + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); + return rc; } if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { - up(&ses->server->tcpSem); cERROR(1, ("Illegal length, greater than maximum frame, %d", in_buf->smb_buf_length)); DeleteMidQEntry(midQ); - /* If not lock req, update # of requests on wire to server */ - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); - } + up(&ses->server->tcpSem); + /* Update # of requests on wire to server */ + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); return -EIO; } @@ -712,27 +663,19 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, atomic_dec(&ses->server->inSend); midQ->when_sent = jiffies; #endif - if(rc < 0) { - DeleteMidQEntry(midQ); - up(&ses->server->tcpSem); - /* If not lock req, update # of requests on wire to server */ - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); - } - return rc; - } else - up(&ses->server->tcpSem); + up(&ses->server->tcpSem); + + if(rc < 0) + goto out; + if (long_op == -1) - goto cifs_no_response_exit; + goto out; else if (long_op == 2) /* writes past end of file can take loong time */ timeout = 180 * HZ; else if (long_op == 1) timeout = 45 * HZ; /* should be greater than servers oplock break timeout (about 43 seconds) */ - else if (long_op > 2) { - timeout = MAX_SCHEDULE_TIMEOUT; - } else + else timeout = 15 * HZ; /* wait for 15 seconds or until woken up due to response arriving or due to last connection to this server being unmounted */ @@ -743,47 +686,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, } /* No user interrupts in wait - wreaks havoc with performance */ - if(timeout != MAX_SCHEDULE_TIMEOUT) { - unsigned long curr_timeout; - - for (;;) { - curr_timeout = timeout + jiffies; - wait_event(ses->server->response_q, - (!(midQ->midState == MID_REQUEST_SUBMITTED)) || - time_after(jiffies, curr_timeout) || - ((ses->server->tcpStatus != CifsGood) && - (ses->server->tcpStatus != CifsNew))); - - if (time_after(jiffies, curr_timeout) && - (midQ->midState == MID_REQUEST_SUBMITTED) && - ((ses->server->tcpStatus == CifsGood) || - (ses->server->tcpStatus == CifsNew))) { - - unsigned long lrt; - - /* We timed out. Is the server still - sending replies ? */ - spin_lock(&GlobalMid_Lock); - lrt = ses->server->lstrp; - spin_unlock(&GlobalMid_Lock); - - /* Calculate 10 seconds past last receive time*/ - lrt += (10 * HZ); - if (time_after(jiffies, lrt)) { - /* Server sent no reply in 10 seconds */ - cERROR(1,("Server not responding")); - break; - } - } else { - break; - } - } - } else { - wait_event(ses->server->response_q, - (!(midQ->midState == MID_REQUEST_SUBMITTED)) || - ((ses->server->tcpStatus != CifsGood) && - (ses->server->tcpStatus != CifsNew))); - } + wait_for_response(ses, midQ, timeout, 10 * HZ); spin_lock(&GlobalMid_Lock); if (midQ->resp_buf) { @@ -811,11 +714,9 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, } spin_unlock(&GlobalMid_Lock); DeleteMidQEntry(midQ); - /* If not lock req, update # of requests on wire to server */ - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); - } + /* Update # of requests on wire to server */ + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); return rc; } @@ -862,23 +763,253 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, cERROR(1,("Bad MID state?")); } } -cifs_no_response_exit: + +out: + DeleteMidQEntry(midQ); + atomic_dec(&ses->server->inFlight); + wake_up(&ses->server->request_q); - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); - } + return rc; +} +/* Send an NT_CANCEL SMB to cause the POSIX blocking lock to return. */ + +static int +send_nt_cancel(struct cifsTconInfo *tcon, struct smb_hdr *in_buf, + struct mid_q_entry *midQ) +{ + int rc = 0; + struct cifsSesInfo *ses = tcon->ses; + __u16 mid = in_buf->Mid; + + header_assemble(in_buf, SMB_COM_NT_CANCEL, tcon, 0); + in_buf->Mid = mid; + down(&ses->server->tcpSem); + rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number); + if (rc) { + up(&ses->server->tcpSem); + return rc; + } + rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, + (struct sockaddr *) &(ses->server->addr.sockAddr)); + up(&ses->server->tcpSem); return rc; +} + +/* We send a LOCKINGX_CANCEL_LOCK to cause the Windows + blocking lock to return. */ + +static int +send_lock_cancel(const unsigned int xid, struct cifsTconInfo *tcon, + struct smb_hdr *in_buf, + struct smb_hdr *out_buf) +{ + int bytes_returned; + struct cifsSesInfo *ses = tcon->ses; + LOCK_REQ *pSMB = (LOCK_REQ *)in_buf; + + /* We just modify the current in_buf to change + the type of lock from LOCKING_ANDX_SHARED_LOCK + or LOCKING_ANDX_EXCLUSIVE_LOCK to + LOCKING_ANDX_CANCEL_LOCK. */ + + pSMB->LockType = LOCKING_ANDX_CANCEL_LOCK|LOCKING_ANDX_LARGE_FILES; + pSMB->Timeout = 0; + pSMB->hdr.Mid = GetNextMid(ses->server); + + return SendReceive(xid, ses, in_buf, out_buf, + &bytes_returned, 0); +} + +int +SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, + struct smb_hdr *in_buf, struct smb_hdr *out_buf, + int *pbytes_returned) +{ + int rc = 0; + int rstart = 0; + unsigned int receive_len; + struct mid_q_entry *midQ; + struct cifsSesInfo *ses; + + if (tcon == NULL || tcon->ses == NULL) { + cERROR(1,("Null smb session")); + return -EIO; + } + ses = tcon->ses; + + if(ses->server == NULL) { + cERROR(1,("Null tcp session")); + return -EIO; + } + + if(ses->server->tcpStatus == CifsExiting) + return -ENOENT; + + /* Ensure that we do not send more than 50 overlapping requests + to the same server. We may make this configurable later or + use ses->maxReq */ + + rc = wait_for_free_request(ses, 3); + if (rc) + return rc; + + /* make sure that we sign in the same order that we send on this socket + and avoid races inside tcp sendmsg code that could cause corruption + of smb data */ + + down(&ses->server->tcpSem); + + rc = allocate_mid(ses, in_buf, &midQ); + if (rc) { + up(&ses->server->tcpSem); + return rc; + } + + if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { + up(&ses->server->tcpSem); + cERROR(1, ("Illegal length, greater than maximum frame, %d", + in_buf->smb_buf_length)); + DeleteMidQEntry(midQ); + return -EIO; + } + + rc = cifs_sign_smb(in_buf, ses->server, &midQ->sequence_number); -out_unlock: + midQ->midState = MID_REQUEST_SUBMITTED; +#ifdef CONFIG_CIFS_STATS2 + atomic_inc(&ses->server->inSend); +#endif + rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, + (struct sockaddr *) &(ses->server->addr.sockAddr)); +#ifdef CONFIG_CIFS_STATS2 + atomic_dec(&ses->server->inSend); + midQ->when_sent = jiffies; +#endif up(&ses->server->tcpSem); - /* If not lock req, update # of requests on wire to server */ - if(long_op < 3) { - atomic_dec(&ses->server->inFlight); - wake_up(&ses->server->request_q); + + if(rc < 0) { + DeleteMidQEntry(midQ); + return rc; + } + + /* Wait for a reply - allow signals to interrupt. */ + rc = wait_event_interruptible(ses->server->response_q, + (!(midQ->midState == MID_REQUEST_SUBMITTED)) || + ((ses->server->tcpStatus != CifsGood) && + (ses->server->tcpStatus != CifsNew))); + + /* Were we interrupted by a signal ? */ + if ((rc == -ERESTARTSYS) && + (midQ->midState == MID_REQUEST_SUBMITTED) && + ((ses->server->tcpStatus == CifsGood) || + (ses->server->tcpStatus == CifsNew))) { + + if (in_buf->Command == SMB_COM_TRANSACTION2) { + /* POSIX lock. We send a NT_CANCEL SMB to cause the + blocking lock to return. */ + + rc = send_nt_cancel(tcon, in_buf, midQ); + if (rc) { + DeleteMidQEntry(midQ); + return rc; + } + } else { + /* Windows lock. We send a LOCKINGX_CANCEL_LOCK + to cause the blocking lock to return. */ + + rc = send_lock_cancel(xid, tcon, in_buf, out_buf); + + /* If we get -ENOLCK back the lock may have + already been removed. Don't exit in this case. */ + if (rc && rc != -ENOLCK) { + DeleteMidQEntry(midQ); + return rc; + } + } + + /* Wait 5 seconds for the response. */ + if (wait_for_response(ses, midQ, 5 * HZ, 5 * HZ)==0) { + /* We got the response - restart system call. */ + rstart = 1; + } + } + + spin_lock(&GlobalMid_Lock); + if (midQ->resp_buf) { + spin_unlock(&GlobalMid_Lock); + receive_len = midQ->resp_buf->smb_buf_length; + } else { + cERROR(1,("No response for cmd %d mid %d", + midQ->command, midQ->mid)); + if(midQ->midState == MID_REQUEST_SUBMITTED) { + if(ses->server->tcpStatus == CifsExiting) + rc = -EHOSTDOWN; + else { + ses->server->tcpStatus = CifsNeedReconnect; + midQ->midState = MID_RETRY_NEEDED; + } + } + + if (rc != -EHOSTDOWN) { + if(midQ->midState == MID_RETRY_NEEDED) { + rc = -EAGAIN; + cFYI(1,("marking request for retry")); + } else { + rc = -EIO; + } + } + spin_unlock(&GlobalMid_Lock); + DeleteMidQEntry(midQ); + return rc; } + + if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { + cERROR(1, ("Frame too large received. Length: %d Xid: %d", + receive_len, xid)); + rc = -EIO; + } else { /* rcvd frame is ok */ + + if (midQ->resp_buf && out_buf + && (midQ->midState == MID_RESPONSE_RECEIVED)) { + out_buf->smb_buf_length = receive_len; + memcpy((char *)out_buf + 4, + (char *)midQ->resp_buf + 4, + receive_len); + + dump_smb(out_buf, 92); + /* convert the length into a more usable form */ + if((receive_len > 24) && + (ses->server->secMode & (SECMODE_SIGN_REQUIRED | + SECMODE_SIGN_ENABLED))) { + rc = cifs_verify_signature(out_buf, + ses->server->mac_signing_key, + midQ->sequence_number+1); + if(rc) { + cERROR(1,("Unexpected SMB signature")); + /* BB FIXME add code to kill session */ + } + } + + *pbytes_returned = out_buf->smb_buf_length; + + /* BB special case reconnect tid and uid here? */ + rc = map_smb_to_linux_error(out_buf); + /* convert ByteCount if necessary */ + if (receive_len >= + sizeof (struct smb_hdr) - + 4 /* do not count RFC1001 header */ + + (2 * out_buf->WordCount) + 2 /* bcc */ ) + BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf)); + } else { + rc = -EIO; + cERROR(1,("Bad MID state?")); + } + } + DeleteMidQEntry(midQ); + if (rstart && rc == -EACCES) + return -ERESTARTSYS; return rc; } -- cgit v1.2.2 From 66abda5e1fa48e12e06d0b68746b0e67202a97d2 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 11 Aug 2006 16:52:09 +0000 Subject: [CIFS] Fix oops when negotiating lanman and no password specified Pointed out by Guenter Kukkukk Signed-of-by: Steve French (cherry picked from bbf33d512da608c7221fec42b56b9ef89c25a5ee commit) --- fs/cifs/cifsencrypt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index a89efaf78a26..4bc250b2d9fc 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -277,7 +277,8 @@ void calc_lanman_hash(struct cifsSesInfo * ses, char * lnm_session_key) return; memset(password_with_pad, 0, CIFS_ENCPWD_SIZE); - strncpy(password_with_pad, ses->password, CIFS_ENCPWD_SIZE); + if(ses->password) + strncpy(password_with_pad, ses->password, CIFS_ENCPWD_SIZE); if((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) if(extended_security & CIFSSEC_MAY_PLNTXT) { -- cgit v1.2.2 From 1725cd0ae07bb31f68803edcc5bdc99952c7d2f4 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 13 Aug 2006 23:24:17 -0700 Subject: [PATCH] adfs error message fix Don't use NULL as a printf control string. Fixes bug #6889. Cc: Ralph Corderoy Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/adfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/adfs/super.c b/fs/adfs/super.c index ba1c88af49fe..82011019494c 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -308,7 +308,7 @@ static struct adfs_discmap *adfs_read_map(struct super_block *sb, struct adfs_di if (adfs_checkmap(sb, dm)) return dm; - adfs_error(sb, NULL, "map corrupted"); + adfs_error(sb, "map corrupted"); error_free: while (--zone >= 0) -- cgit v1.2.2 From 95f8797f42b058333d1e6f0d1dcd8edf5dc6c244 Mon Sep 17 00:00:00 2001 From: Dan Bastone Date: Sun, 13 Aug 2006 23:24:18 -0700 Subject: [PATCH] initialize parts of udf inode earlier in create Eric says: > I saw an oops down this path when trying to create a new file on a UDF > filesystem which was internally marked as readonly, but mounted rw: > > udf_create > udf_new_inode > new_inode > alloc_inode > udf_alloc_inode > udf_new_block > returns EIO due to readonlyness > iput (on error) I ran into the same issue today, but when listing a directory with invalid/corrupt entries: udf_lookup udf_iget get_new_inode_fast alloc_inode udf_alloc_inode __udf_read_inode fails for any reason iput (on error) ... The following patch to udf_alloc_inode() should take care of both (and other similar) cases, but I've only tested it with udf_lookup(). Signed-off-by: Dan Bastone Cc: Eric Sandeen Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/udf/super.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/udf/super.c b/fs/udf/super.c index 4df822c881b6..7de172efa084 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -115,6 +115,13 @@ static struct inode *udf_alloc_inode(struct super_block *sb) ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, SLAB_KERNEL); if (!ei) return NULL; + + ei->i_unique = 0; + ei->i_lenExtents = 0; + ei->i_next_alloc_block = 0; + ei->i_next_alloc_goal = 0; + ei->i_strat4096 = 0; + return &ei->vfs_inode; } -- cgit v1.2.2 From 1d7ea7324ae7a59f8e17e4ba76a2707c1e6f24d2 Mon Sep 17 00:00:00 2001 From: Alexander Zarochentsev Date: Sun, 13 Aug 2006 23:24:27 -0700 Subject: [PATCH] fuse: fix error case in fuse_readpages Don't let fuse_readpages leave the @pages list not empty when exiting on error. [akpm@osdl.org: kernel-doc fixes] Signed-off-by: Alexander Zarochentsev Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/fuse/file.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 63614ed16336..5c4fcd1dbf59 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -395,14 +395,16 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, struct fuse_readpages_data data; int err; + err = -EIO; if (is_bad_inode(inode)) - return -EIO; + goto clean_pages_up; data.file = file; data.inode = inode; data.req = fuse_get_req(fc); + err = PTR_ERR(data.req); if (IS_ERR(data.req)) - return PTR_ERR(data.req); + goto clean_pages_up; err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); if (!err) { @@ -412,6 +414,10 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, fuse_put_request(fc, data.req); } return err; + +clean_pages_up: + put_pages_list(pages); + return err; } static size_t fuse_send_write(struct fuse_req *req, struct file *file, -- cgit v1.2.2 From 74361cb6828398a96167b3234e186fbd731e5f30 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 14 Aug 2006 08:54:48 -0700 Subject: [PATCH] fcntl(F_SETSIG) fix fcntl(F_SETSIG) no longer works on leases because lease_release_private_callback() gets called as the lease is copied in order to initialise it. The problem is that lease_alloc() performs an unnecessary initialisation, which sets the lease_manager_ops. Avoid the problem by allocating the target lease structure using locks_alloc_lock(). Signed-off-by: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/locks.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/locks.c b/fs/locks.c index b0b41a64e10b..d7c53392cac1 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1421,8 +1421,9 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp) if (!leases_enable) goto out; - error = lease_alloc(filp, arg, &fl); - if (error) + error = -ENOMEM; + fl = locks_alloc_lock(); + if (fl == NULL) goto out; locks_copy_lock(fl, lease); @@ -1430,6 +1431,7 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp) locks_insert_lock(before, fl); *flp = fl; + error = 0; out: return error; } -- cgit v1.2.2 From e466e4876bf39474e15d0572f2204578137ae7f5 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 15 Aug 2006 13:07:18 +0000 Subject: [CIFS] Fix oops in cifs_close due to unitialized lock sem and list in new POSIX locking code Signed-off-by: Steve French --- fs/cifs/CHANGES | 5 ++++- fs/cifs/connect.c | 14 ++++++++------ fs/cifs/dir.c | 4 ++++ 3 files changed, 16 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index acb843b9bc3b..bd37727526ea 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -2,7 +2,10 @@ Version 1.45 ------------ Do not time out lockw calls when using posix extensions. Do not time out requests if server still responding reasonably fast -on requests on other threads +on requests on other threads. Improve POSIX locking emulation, +(lock cancel now works, and unlock of merged range works even +to Windows servers now). Fix oops on mount to lanman servers +(win9x, os/2 etc.) when null password. Version 1.44 ------------ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index b706b4f48b10..5d394c726860 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1271,33 +1271,35 @@ find_unc(__be32 new_target_ip_addr, char *uncName, char *userName) read_lock(&GlobalSMBSeslock); list_for_each(tmp, &GlobalTreeConnectionList) { - cFYI(1, ("Next tcon - ")); + cFYI(1, ("Next tcon")); tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); if (tcon->ses) { if (tcon->ses->server) { cFYI(1, - (" old ip addr: %x == new ip %x ?", + ("old ip addr: %x == new ip %x ?", tcon->ses->server->addr.sockAddr.sin_addr. s_addr, new_target_ip_addr)); if (tcon->ses->server->addr.sockAddr.sin_addr. s_addr == new_target_ip_addr) { - /* BB lock tcon and server and tcp session and increment use count here? */ + /* BB lock tcon, server and tcp session and increment use count here? */ /* found a match on the TCP session */ /* BB check if reconnection needed */ - cFYI(1,("Matched ip, old UNC: %s == new: %s ?", + cFYI(1,("IP match, old UNC: %s new: %s", tcon->treeName, uncName)); if (strncmp (tcon->treeName, uncName, MAX_TREE_SIZE) == 0) { cFYI(1, - ("Matched UNC, old user: %s == new: %s ?", + ("and old usr: %s new: %s", tcon->treeName, uncName)); if (strncmp (tcon->ses->userName, userName, MAX_USERNAME_SIZE) == 0) { read_unlock(&GlobalSMBSeslock); - return tcon;/* also matched user (smb session)*/ + /* matched smb session + (user name */ + return tcon; } } } diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index ba4cbe9b0684..914239d53634 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -267,6 +267,10 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, pCifsFile->invalidHandle = FALSE; pCifsFile->closePend = FALSE; init_MUTEX(&pCifsFile->fh_sem); + init_MUTEX(&pCifsFile->lock_sem); + INIT_LIST_HEAD(&pCifsFile->llist); + atomic_set(&pCifsFile->wrtPending,0); + /* set the following in open now pCifsFile->pfile = file; */ write_lock(&GlobalSMBSeslock); -- cgit v1.2.2 From 5ddaa683a513439081c9511b0d9ad490672c51c9 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 15 Aug 2006 13:35:48 +0000 Subject: [CIFS] endian errors in lanman protocol support le16 compared to host-endian constant u8 fed to le32_to_cpu() le16 compared to host-endian constant Signed-off-by: Al Viro Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 2 +- fs/cifs/readdir.c | 2 +- fs/cifs/sess.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index dcbc3e075e81..075d8fb3d376 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -477,7 +477,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) /* BB get server time for time conversions and add code to use it and timezone since this is not UTC */ - if (rsp->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) { + if (rsp->EncryptionKeyLength == cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) { memcpy(server->cryptKey, rsp->EncryptionKey, CIFS_CRYPTO_KEY_SIZE); } else if (server->secMode & SECMODE_PW_ENCRYPT) { diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 03bbcb377913..105761e3ba0e 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -556,7 +556,7 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile) FIND_FILE_STANDARD_INFO * pFindData = (FIND_FILE_STANDARD_INFO *)current_entry; filename = &pFindData->FileName[0]; - len = le32_to_cpu(pFindData->FileNameLength); + len = pFindData->FileNameLength; } else { cFYI(1,("Unknown findfirst level %d",cfile->srch_inf.info_level)); } diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 7202d534ef0b..d1705ab8136e 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -372,7 +372,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, /* no capabilities flags in old lanman negotiation */ - pSMB->old_req.PasswordLength = CIFS_SESS_KEY_SIZE; + pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); /* BB calculate hash with password */ /* and copy into bcc */ -- cgit v1.2.2 From ea4c07d780a6f7b7be2d984117bd3e0a2b772e3d Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 16 Aug 2006 19:44:25 +0000 Subject: [CIFS] Do not send Query All EAs SMB when mount option nouser_xattr specified Pointed out by Bjoern Jacke Signed-off-by: Steve French --- fs/cifs/CHANGES | 3 ++- fs/cifs/README | 2 +- fs/cifs/xattr.c | 6 +++++- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index bd37727526ea..0feb3bd49cb8 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -5,7 +5,8 @@ time out requests if server still responding reasonably fast on requests on other threads. Improve POSIX locking emulation, (lock cancel now works, and unlock of merged range works even to Windows servers now). Fix oops on mount to lanman servers -(win9x, os/2 etc.) when null password. +(win9x, os/2 etc.) when null password. Do not send listxattr +(SMB to query all EAs) if nouser_xattr specified. Version 1.44 ------------ diff --git a/fs/cifs/README b/fs/cifs/README index 7986d0d97ace..5f0e1bd64fee 100644 --- a/fs/cifs/README +++ b/fs/cifs/README @@ -408,7 +408,7 @@ A partial list of the supported mount options follows: user_xattr Allow getting and setting user xattrs as OS/2 EAs (extended attributes) to the server (default) e.g. via setfattr and getfattr utilities. - nouser_xattr Do not allow getfattr/setfattr to get/set xattrs + nouser_xattr Do not allow getfattr/setfattr to get/set/list xattrs mapchars Translate six of the seven reserved characters (not backslash) *?<>|: to the remap range (above 0xF000), which also diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 7754d641775e..067648b7179b 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -330,11 +330,15 @@ ssize_t cifs_listxattr(struct dentry * direntry, char * data, size_t buf_size) sb = direntry->d_inode->i_sb; if(sb == NULL) return -EIO; - xid = GetXid(); cifs_sb = CIFS_SB(sb); pTcon = cifs_sb->tcon; + if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + return -EOPNOTSUPP; + + xid = GetXid(); + full_path = build_path_from_dentry(direntry); if(full_path == NULL) { FreeXid(xid); -- cgit v1.2.2 From 78bd4d484f81a611ef6ff02f909e576cb9aac7f2 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Aug 2006 08:33:23 +0200 Subject: [PATCH] sys_ioprio_set: minor do_each_thread+break fix From include/linux/sched.h: * Careful: do_each_thread/while_each_thread is a double loop so * 'break' will not work as expected - use goto instead. */ Signed-off-by: Oleg Nesterov Signed-off-by: Jens Axboe --- fs/ioprio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ioprio.c b/fs/ioprio.c index 93aa5715f224..3db31038e9ab 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -111,9 +111,9 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) continue; ret = set_task_ioprio(p, ioprio); if (ret) - break; + goto free_uid; } while_each_thread(g, p); - +free_uid: if (who) free_uid(user); break; -- cgit v1.2.2 From 9f83e45eb54fc7198dc59fc63255341851ba4c48 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Aug 2006 08:34:15 +0200 Subject: [PATCH] Fix current_io_context() vs set_task_ioprio() race I know nothing about io scheduler, but I suspect set_task_ioprio() is not safe. current_io_context() initializes "struct io_context", then sets ->io_context. set_task_ioprio() running on another cpu may see the changes out of order, so ->set_ioprio(ioc) may use io_context which was not initialized properly. Signed-off-by: Oleg Nesterov Signed-off-by: Jens Axboe --- fs/ioprio.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ioprio.c b/fs/ioprio.c index 3db31038e9ab..06578311c63f 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -44,6 +44,9 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) task->ioprio = ioprio; ioc = task->io_context; + /* see wmb() in current_io_context() */ + smp_read_barrier_depends(); + if (ioc && ioc->set_ioprio) ioc->set_ioprio(ioc, ioprio); -- cgit v1.2.2 From e014ff8d4285b81f0de0719d8eee72bc50bfd4be Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 21 Aug 2006 10:02:50 +0200 Subject: [PATCH] uninline ioprio_best() Saves 376 bytes (5 callers) for me. Signed-off-by: Oleg Nesterov Signed-off-by: Jens Axboe --- fs/ioprio.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'fs') diff --git a/fs/ioprio.c b/fs/ioprio.c index 06578311c63f..78b1deae3fa2 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -140,6 +140,29 @@ out: return ret; } +int ioprio_best(unsigned short aprio, unsigned short bprio) +{ + unsigned short aclass = IOPRIO_PRIO_CLASS(aprio); + unsigned short bclass = IOPRIO_PRIO_CLASS(bprio); + + if (!ioprio_valid(aprio)) + return bprio; + if (!ioprio_valid(bprio)) + return aprio; + + if (aclass == IOPRIO_CLASS_NONE) + aclass = IOPRIO_CLASS_BE; + if (bclass == IOPRIO_CLASS_NONE) + bclass = IOPRIO_CLASS_BE; + + if (aclass == bclass) + return min(aprio, bprio); + if (aclass > bclass) + return bprio; + else + return aprio; +} + asmlinkage long sys_ioprio_get(int which, int who) { struct task_struct *g, *p; -- cgit v1.2.2 From 00a2b0f6dd2372842df73de72d51621b539fea44 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 15 Aug 2006 13:56:26 +0200 Subject: Fix possible UDF deadlock and memory corruption (CVE-2006-4145) UDF code is not really ready to handle extents larger that 1GB. This is the easy way to forbid creating those. Also truncation code did not count with the case when there are no extents in the file and we are extending the file. Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman --- fs/udf/super.c | 2 +- fs/udf/truncate.c | 64 +++++++++++++++++++++++++++++++++---------------------- 2 files changed, 40 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/udf/super.c b/fs/udf/super.c index 7de172efa084..fcce1a21a51b 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1659,7 +1659,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) iput(inode); goto error_out; } - sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_maxbytes = 1<<30; return 0; error_out: diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index e1b0e8cfecb4..0abd66ce36ea 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -239,37 +239,51 @@ void udf_truncate_extents(struct inode * inode) { if (offset) { - extoffset -= adsize; - etype = udf_next_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 1); - if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) - { - extoffset -= adsize; - elen = EXT_NOT_RECORDED_NOT_ALLOCATED | (elen + offset); - udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 0); + /* + * OK, there is not extent covering inode->i_size and + * no extent above inode->i_size => truncate is + * extending the file by 'offset'. + */ + if ((!bh && extoffset == udf_file_entry_alloc_offset(inode)) || + (bh && extoffset == sizeof(struct allocExtDesc))) { + /* File has no extents at all! */ + memset(&eloc, 0x00, sizeof(kernel_lb_addr)); + elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset; + udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1); } - else if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) - { - kernel_lb_addr neloc = { 0, 0 }; + else { extoffset -= adsize; - nelen = EXT_NOT_RECORDED_NOT_ALLOCATED | - ((elen + offset + inode->i_sb->s_blocksize - 1) & - ~(inode->i_sb->s_blocksize - 1)); - udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1); - udf_add_aext(inode, &bloc, &extoffset, eloc, (etype << 30) | elen, &bh, 1); - } - else - { - if (elen & (inode->i_sb->s_blocksize - 1)) + etype = udf_next_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 1); + if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30)) + { + extoffset -= adsize; + elen = EXT_NOT_RECORDED_NOT_ALLOCATED | (elen + offset); + udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 0); + } + else if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { + kernel_lb_addr neloc = { 0, 0 }; extoffset -= adsize; - elen = EXT_RECORDED_ALLOCATED | - ((elen + inode->i_sb->s_blocksize - 1) & + nelen = EXT_NOT_RECORDED_NOT_ALLOCATED | + ((elen + offset + inode->i_sb->s_blocksize - 1) & ~(inode->i_sb->s_blocksize - 1)); - udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 1); + udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1); + udf_add_aext(inode, &bloc, &extoffset, eloc, (etype << 30) | elen, &bh, 1); + } + else + { + if (elen & (inode->i_sb->s_blocksize - 1)) + { + extoffset -= adsize; + elen = EXT_RECORDED_ALLOCATED | + ((elen + inode->i_sb->s_blocksize - 1) & + ~(inode->i_sb->s_blocksize - 1)); + udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 1); + } + memset(&eloc, 0x00, sizeof(kernel_lb_addr)); + elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset; + udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1); } - memset(&eloc, 0x00, sizeof(kernel_lb_addr)); - elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset; - udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1); } } } -- cgit v1.2.2 From ddeff520f02b92128132c282c350fa72afffb84a Mon Sep 17 00:00:00 2001 From: Nikita Danilov Date: Wed, 9 Aug 2006 13:53:47 -0400 Subject: NFS: Fix a potential deadlock in nfs_release_page nfs_wb_page() waits on request completion and, as a result, is not safe to be called from nfs_release_page() invoked by VM scanner as part of GFP_NOFS allocation. Fix possible deadlock by analyzing gfp mask and refusing to release page if __GFP_FS is not set. Signed-off-by: Nikita Danilov Signed-off-by: Trond Myklebust (cherry picked from 374d969debfb290bafcb41d28918dc6f7e43ce31 commit) --- fs/nfs/file.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index cc2b874ad5a4..48e892880d5b 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -312,7 +312,13 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) static int nfs_release_page(struct page *page, gfp_t gfp) { - return !nfs_wb_page(page->mapping->host, page); + if (gfp & __GFP_FS) + return !nfs_wb_page(page->mapping->host, page); + else + /* + * Avoid deadlock on nfs_wait_on_request(). + */ + return 0; } const struct address_space_operations nfs_file_aops = { -- cgit v1.2.2 From a634904a7de0d3a0bc606f608007a34e8c05bfee Mon Sep 17 00:00:00 2001 From: ASANO Masahiro Date: Tue, 22 Aug 2006 20:06:02 -0400 Subject: VFS: add lookup hint for network file systems I'm trying to speeding up mkdir(2) for network file systems. A typical mkdir(2) calls two inode_operations: lookup and mkdir. The lookup operation would fail with ENOENT in common case. I think it is unnecessary because the subsequent mkdir operation can check it. In case of creat(2), lookup operation is called with the LOOKUP_CREATE flag, so individual filesystem can omit real lookup. e.g. nfs_lookup(). Here is a sample patch which uses LOOKUP_CREATE and O_EXCL on mkdir, symlink and mknod. This uses the gadget for creat(2). And here is the result of a benchmark on NFSv3. mkdir(2) 10,000 times: original 50.5 sec patched 29.0 sec Signed-off-by: ASANO Masahiro Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust (cherry picked from fab7bf44449b29f9d5572a5dd8adcf7c91d5bf0f commit) --- fs/namei.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 55a131230f94..863166441bf3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1767,6 +1767,8 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir) if (nd->last_type != LAST_NORM) goto fail; nd->flags &= ~LOOKUP_PARENT; + nd->flags |= LOOKUP_CREATE; + nd->intent.open.flags = O_EXCL; /* * Do the final lookup. -- cgit v1.2.2 From 5d67476fff2df6ff12f60b540fd0e74cf2a668f9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 31 Jul 2006 14:11:48 -0700 Subject: SUNRPC: make rpc_unlink() take a dentry argument instead of a path Signe-off-by: Trond Myklebust (cherry picked from 88bf6d811b01a4be7fd507d18bf5f1c527989089 commit) --- fs/nfs/idmap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index b81e7ed3c902..df0be1214358 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -130,9 +130,8 @@ nfs_idmap_delete(struct nfs4_client *clp) if (!idmap) return; + rpc_unlink(idmap->idmap_dentry); dput(idmap->idmap_dentry); - idmap->idmap_dentry = NULL; - rpc_unlink(idmap->idmap_path); clp->cl_idmap = NULL; kfree(idmap); } -- cgit v1.2.2 From 8f8e7a50f450fcb86a5b2ffb94543c57a14f8260 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 14 Aug 2006 13:11:15 -0400 Subject: SUNRPC: Fix dentry refcounting issues with users of rpc_pipefs rpc_unlink() and rpc_rmdir() will dput the dentry reference for you. Signed-off-by: Trond Myklebust (cherry picked from a05a57effa71a1f67ccbfc52335c10c8b85f3f6a commit) --- fs/nfs/idmap.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index df0be1214358..07a5dd57646e 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -131,7 +131,6 @@ nfs_idmap_delete(struct nfs4_client *clp) if (!idmap) return; rpc_unlink(idmap->idmap_dentry); - dput(idmap->idmap_dentry); clp->cl_idmap = NULL; kfree(idmap); } -- cgit v1.2.2 From 01df9c5e918ae5559f2d96da0143f8bfbb9e6171 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 10 Aug 2006 11:58:57 -0400 Subject: LOCKD: Fix a deadlock in nlm_traverse_files() nlm_traverse_files() is not allowed to hold the nlm_file_mutex while calling nlm_inspect file, since it may end up calling nlm_release_file() when releaseing the blocks. Signed-off-by: Trond Myklebust (cherry picked from e558d3cde986e04f68afe8c790ad68ef4b94587a commit) --- fs/lockd/svcsubs.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 2a4df9b3779a..01b4db9e5466 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -237,19 +237,22 @@ static int nlm_traverse_files(struct nlm_host *host, int action) { struct nlm_file *file, **fp; - int i; + int i, ret = 0; mutex_lock(&nlm_file_mutex); for (i = 0; i < FILE_NRHASH; i++) { fp = nlm_files + i; while ((file = *fp) != NULL) { + file->f_count++; + mutex_unlock(&nlm_file_mutex); + /* Traverse locks, blocks and shares of this file * and update file->f_locks count */ - if (nlm_inspect_file(host, file, action)) { - mutex_unlock(&nlm_file_mutex); - return 1; - } + if (nlm_inspect_file(host, file, action)) + ret = 1; + mutex_lock(&nlm_file_mutex); + file->f_count--; /* No more references to this file. Let go of it. */ if (!file->f_blocks && !file->f_locks && !file->f_shares && !file->f_count) { @@ -262,7 +265,7 @@ nlm_traverse_files(struct nlm_host *host, int action) } } mutex_unlock(&nlm_file_mutex); - return 0; + return ret; } /* -- cgit v1.2.2 From 79558f3610efd7928e8882b2eaca3093b283630e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Aug 2006 13:44:32 -0400 Subject: NFS: Fix issue with EIO on NFS read The problem is that we may be caching writes that would extend the file and create a hole in the region that we are reading. In this case, we need to detect the eof from the server, ensure that we zero out the pages that are part of the hole and mark them as up to date. Signed-off-by: Trond Myklebust (cherry picked from 856b603b01b99146918c093969b6cb1b1b0f1c01 commit) --- fs/nfs/read.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 65c0c5b32351..da9cf11c326f 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -116,10 +116,17 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; base &= ~PAGE_CACHE_MASK; pglen = PAGE_CACHE_SIZE - base; - if (pglen < remainder) + for (;;) { + if (remainder <= pglen) { + memclear_highpage_flush(*pages, base, remainder); + break; + } memclear_highpage_flush(*pages, base, pglen); - else - memclear_highpage_flush(*pages, base, remainder); + pages++; + remainder -= pglen; + pglen = PAGE_CACHE_SIZE; + base = 0; + } } /* @@ -476,6 +483,8 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) unsigned int base = data->args.pgbase; struct page **pages; + if (data->res.eof) + count = data->args.count; if (unlikely(count == 0)) return; pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; @@ -483,11 +492,7 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) count += base; for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) SetPageUptodate(*pages); - /* - * Was this an eof or a short read? If the latter, don't mark the page - * as uptodate yet. - */ - if (count > 0 && (data->res.eof || data->args.count == data->res.count)) + if (count != 0) SetPageUptodate(*pages); } @@ -502,6 +507,8 @@ static void nfs_readpage_set_pages_error(struct nfs_read_data *data) count += base; for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) SetPageError(*pages); + if (count != 0) + SetPageError(*pages); } /* -- cgit v1.2.2 From e8896495bca8490a427409e0886d63d05419ec65 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 24 Aug 2006 15:44:19 -0400 Subject: NFS: Check lengths more thoroughly in NFS4 readdir XDR decode Check the bounds of length specifiers more thoroughly in the XDR decoding of NFS4 readdir reply data. Currently, if the server returns a bitmap or attr length that causes the current decode point pointer to wrap, this could go undetected (consider a small "negative" length on a 32-bit machine). Also add a check into the main XDR decode handler to make sure that the amount of data is a multiple of four bytes (as specified by RFC-1014). This makes sure that we can do u32* pointer subtraction in the NFS client without risking an undefined result (the result is undefined if the pointers are not correctly aligned with respect to one another). Signed-Off-By: David Howells Signed-off-by: Trond Myklebust (cherry picked from 5861fddd64a7eaf7e8b1a9997455a24e7f688092 commit) --- fs/nfs/nfs4xdr.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1750d996f49f..730ec8fb31c6 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3355,7 +3355,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n struct kvec *iov = rcvbuf->head; unsigned int nr, pglen = rcvbuf->page_len; uint32_t *end, *entry, *p, *kaddr; - uint32_t len, attrlen; + uint32_t len, attrlen, xlen; int hdrlen, recvd, status; status = decode_op_hdr(xdr, OP_READDIR); @@ -3377,10 +3377,10 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE); kaddr = p = (uint32_t *) kmap_atomic(page, KM_USER0); - end = (uint32_t *) ((char *)p + pglen + readdir->pgbase); + end = p + ((pglen + readdir->pgbase) >> 2); entry = p; for (nr = 0; *p++; nr++) { - if (p + 3 > end) + if (end - p < 3) goto short_pkt; dprintk("cookie = %Lu, ", *((unsigned long long *)p)); p += 2; /* cookie */ @@ -3389,18 +3389,19 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len); goto err_unmap; } - dprintk("filename = %*s\n", len, (char *)p); - p += XDR_QUADLEN(len); - if (p + 1 > end) + xlen = XDR_QUADLEN(len); + if (end - p < xlen + 1) goto short_pkt; + dprintk("filename = %*s\n", len, (char *)p); + p += xlen; len = ntohl(*p++); /* bitmap length */ - p += len; - if (p + 1 > end) + if (end - p < len + 1) goto short_pkt; + p += len; attrlen = XDR_QUADLEN(ntohl(*p++)); - p += attrlen; /* attributes */ - if (p + 2 > end) + if (end - p < attrlen + 2) goto short_pkt; + p += attrlen; /* attributes */ entry = p; } if (!nr && (entry[0] != 0 || entry[1] == 0)) -- cgit v1.2.2 From 16b4289c7460ba9c04af40c574949dcca9029658 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 24 Aug 2006 12:27:15 -0400 Subject: NFSv4: Add v4 exception handling for the ACL functions. This is needed in order to handle any NFS4ERR_DELAY errors that might be returned by the server. It also ensures that we map the NFSv4 errors before they are returned to userland. Signed-off-by: Trond Myklebust (cherry picked from 71c12b3f0abc7501f6ed231a6d17bc9c05a238dc commit) --- fs/nfs/nfs4proc.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e6ee97f19d81..153898e1331f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2668,7 +2668,7 @@ out: nfs4_set_cached_acl(inode, acl); } -static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) +static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) { struct page *pages[NFS4ACL_MAXPAGES]; struct nfs_getaclargs args = { @@ -2721,6 +2721,19 @@ out_free: return ret; } +static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) +{ + struct nfs4_exception exception = { }; + ssize_t ret; + do { + ret = __nfs4_get_acl_uncached(inode, buf, buflen); + if (ret >= 0) + break; + ret = nfs4_handle_exception(NFS_SERVER(inode), ret, &exception); + } while (exception.retry); + return ret; +} + static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) { struct nfs_server *server = NFS_SERVER(inode); @@ -2737,7 +2750,7 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) return nfs4_get_acl_uncached(inode, buf, buflen); } -static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) +static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) { struct nfs_server *server = NFS_SERVER(inode); struct page *pages[NFS4ACL_MAXPAGES]; @@ -2763,6 +2776,18 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen return ret; } +static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(inode), + __nfs4_proc_set_acl(inode, buf, buflen), + &exception); + } while (exception.retry); + return err; +} + static int nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) { -- cgit v1.2.2 From a343bb7750e6a098909c34f5c5dfddbc4fa40053 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Aug 2006 20:06:03 -0400 Subject: VFS: Fix access("file", X_OK) in the presence of ACLs Currently, the access() call will return incorrect information on NFS if there exists an ACL that grants execute access to the user on a regular file. The reason the information is incorrect is that the VFS overrides this execute access in open_exec() by checking (inode->i_mode & 0111). This patch propagates the VFS execute bit check back into the generic permission() call. Signed-off-by: Trond Myklebust (cherry picked from 64cbae98848c4c99851cb0a405f0b4982cd76c1e commit) --- fs/namei.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 863166441bf3..432d6bc6fab0 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -227,10 +227,10 @@ int generic_permission(struct inode *inode, int mask, int permission(struct inode *inode, int mask, struct nameidata *nd) { + umode_t mode = inode->i_mode; int retval, submask; if (mask & MAY_WRITE) { - umode_t mode = inode->i_mode; /* * Nobody gets write access to a read-only fs. @@ -247,6 +247,13 @@ int permission(struct inode *inode, int mask, struct nameidata *nd) } + /* + * MAY_EXEC on regular files requires special handling: We override + * filesystem execute permissions if the mode bits aren't set. + */ + if ((mask & MAY_EXEC) && S_ISREG(mode) && !(mode & S_IXUGO)) + return -EACCES; + /* Ordinary permission routines do not understand MAY_APPEND. */ submask = mask & ~MAY_APPEND; if (inode->i_op && inode->i_op->permission) -- cgit v1.2.2 From 9167b0b9a0ab7907191523f5a0528e3b9c288e21 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Aug 2006 20:06:03 -0400 Subject: VFS: Remove redundant open-coded mode bit check in prepare_binfmt(). The check in prepare_binfmt() for inode->i_mode & 0111 is redundant, since open_exec() will already have done that. Signed-off-by: Trond Myklebust (cherry picked from 822dec482ced07af32c378cd936d77345786572b commit) --- fs/exec.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 8344ba73a2a6..a6f64a98ac50 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -922,12 +922,6 @@ int prepare_binprm(struct linux_binprm *bprm) int retval; mode = inode->i_mode; - /* - * Check execute perms again - if the caller has CAP_DAC_OVERRIDE, - * generic_permission lets a non-executable through - */ - if (!(mode & 0111)) /* with at least _one_ execute bit set */ - return -EACCES; if (bprm->file->f_op == NULL) return -EACCES; -- cgit v1.2.2 From a969fd5a4e162c4485ae8f3e49d674656a18fa36 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Aug 2006 20:06:04 -0400 Subject: VFS: Remove redundant open-coded mode bit checks in open_exec(). The check in open_exec() for inode->i_mode & 0111 has been made redundant by the fix to permission(). Signed-off-by: Trond Myklebust (cherry picked from 1d3741c5d991686699f100b65b9956f7ee7ae0ae commit) --- fs/exec.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index a6f64a98ac50..f7aabfeca033 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -486,8 +486,6 @@ struct file *open_exec(const char *name) if (!(nd.mnt->mnt_flags & MNT_NOEXEC) && S_ISREG(inode->i_mode)) { int err = vfs_permission(&nd, MAY_EXEC); - if (!err && !(inode->i_mode & 0111)) - err = -EACCES; file = ERR_PTR(err); if (!err) { file = nameidata_to_filp(&nd, O_RDONLY); -- cgit v1.2.2 From 81a42d298d8bd1b96be4bd459494f25fdd99b594 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Fri, 25 Aug 2006 15:58:57 -0700 Subject: [DISKLABEL] SUN: Fix signed int usage for sector count The current sun disklabel code uses a signed int for the sector count. When partitions larger than 1 TB are used, the cast to a sector_t causes the partition sizes to be invalid: # cat /proc/paritions | grep sdan 66 112 2146435072 sdan 66 115 9223372036853660736 sdan3 66 120 9223372036853660736 sdan8 This patch switches the sector count to an unsigned int to fix this. Signed-off-by: Jeff Mahoney Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- fs/partitions/sun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c index abe91ca03edf..0a5927c806ca 100644 --- a/fs/partitions/sun.c +++ b/fs/partitions/sun.c @@ -74,7 +74,7 @@ int sun_partition(struct parsed_partitions *state, struct block_device *bdev) spc = be16_to_cpu(label->ntrks) * be16_to_cpu(label->nsect); for (i = 0; i < 8; i++, p++) { unsigned long st_sector; - int num_sectors; + unsigned int num_sectors; st_sector = be32_to_cpu(p->start_cylinder) * spc; num_sectors = be32_to_cpu(p->num_sectors); -- cgit v1.2.2 From 6946bd636364effce06ea46fe8f8cd6e2edb004e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 27 Aug 2006 01:23:31 -0700 Subject: [PATCH] lockdep: fix blkdev_open() warning On Wed, 2006-08-09 at 07:57 +0200, Rolf Eike Beer wrote: > ============================================= > [ INFO: possible recursive locking detected ] > --------------------------------------------- > parted/7929 is trying to acquire lock: > (&bdev->bd_mutex){--..}, at: [] __blkdev_put+0x1e/0x13c > > but task is already holding lock: > (&bdev->bd_mutex){--..}, at: [] do_open+0x72/0x3a8 > > other info that might help us debug this: > 1 lock held by parted/7929: > #0: (&bdev->bd_mutex){--..}, at: [] do_open+0x72/0x3a8 > stack backtrace: > [] show_trace_log_lvl+0x58/0x15b > [] show_trace+0xd/0x10 > [] dump_stack+0x17/0x1a > [] __lock_acquire+0x753/0x99c > [] lock_acquire+0x4a/0x6a > [] mutex_lock_nested+0xc8/0x20c > [] __blkdev_put+0x1e/0x13c > [] blkdev_put+0xa/0xc > [] do_open+0x336/0x3a8 > [] blkdev_open+0x1f/0x4c > [] __dentry_open+0xc7/0x1aa > [] nameidata_to_filp+0x1c/0x2e > [] do_filp_open+0x2e/0x35 > [] do_sys_open+0x38/0x68 > [] sys_open+0x16/0x18 > [] sysenter_past_esp+0x56/0x8d OK, I'm having a look here; its all new to me so bear with me. blkdev_open() calls do_open(bdev, ...,BD_MUTEX_NORMAL) and takes mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_NORMAL) then something fails, and we're thrown to: out_first: where if (bdev != bdev->bd_contains) blkdev_put(bdev->bd_contains) which is __blkdev_put(bdev->bd_contains, BD_MUTEX_NORMAL) which does mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_NORMAL) <--- lockdep trigger When going to out_first, dbev->bd_contains is either bdev or whole, and since we take the branch it must be whole. So it seems to me the following patch would be the right one: [akpm@osdl.org: compile fix] Signed-off-by: Peter Zijlstra Cc: Arjan van de Ven Acked-by: NeilBrown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/block_dev.c | 114 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 56 insertions(+), 58 deletions(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 37534573960b..045f98854f14 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -884,6 +884,61 @@ void bd_set_size(struct block_device *bdev, loff_t size) } EXPORT_SYMBOL(bd_set_size); +static int __blkdev_put(struct block_device *bdev, unsigned int subclass) +{ + int ret = 0; + struct inode *bd_inode = bdev->bd_inode; + struct gendisk *disk = bdev->bd_disk; + + mutex_lock_nested(&bdev->bd_mutex, subclass); + lock_kernel(); + if (!--bdev->bd_openers) { + sync_blockdev(bdev); + kill_bdev(bdev); + } + if (bdev->bd_contains == bdev) { + if (disk->fops->release) + ret = disk->fops->release(bd_inode, NULL); + } else { + mutex_lock_nested(&bdev->bd_contains->bd_mutex, + subclass + 1); + bdev->bd_contains->bd_part_count--; + mutex_unlock(&bdev->bd_contains->bd_mutex); + } + if (!bdev->bd_openers) { + struct module *owner = disk->fops->owner; + + put_disk(disk); + module_put(owner); + + if (bdev->bd_contains != bdev) { + kobject_put(&bdev->bd_part->kobj); + bdev->bd_part = NULL; + } + bdev->bd_disk = NULL; + bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; + if (bdev != bdev->bd_contains) + __blkdev_put(bdev->bd_contains, subclass + 1); + bdev->bd_contains = NULL; + } + unlock_kernel(); + mutex_unlock(&bdev->bd_mutex); + bdput(bdev); + return ret; +} + +int blkdev_put(struct block_device *bdev) +{ + return __blkdev_put(bdev, BD_MUTEX_NORMAL); +} +EXPORT_SYMBOL(blkdev_put); + +int blkdev_put_partition(struct block_device *bdev) +{ + return __blkdev_put(bdev, BD_MUTEX_PARTITION); +} +EXPORT_SYMBOL(blkdev_put_partition); + static int blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags); @@ -980,7 +1035,7 @@ out_first: bdev->bd_disk = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) - blkdev_put(bdev->bd_contains); + __blkdev_put(bdev->bd_contains, BD_MUTEX_WHOLE); bdev->bd_contains = NULL; put_disk(disk); module_put(owner); @@ -1079,63 +1134,6 @@ static int blkdev_open(struct inode * inode, struct file * filp) return res; } -static int __blkdev_put(struct block_device *bdev, unsigned int subclass) -{ - int ret = 0; - struct inode *bd_inode = bdev->bd_inode; - struct gendisk *disk = bdev->bd_disk; - - mutex_lock_nested(&bdev->bd_mutex, subclass); - lock_kernel(); - if (!--bdev->bd_openers) { - sync_blockdev(bdev); - kill_bdev(bdev); - } - if (bdev->bd_contains == bdev) { - if (disk->fops->release) - ret = disk->fops->release(bd_inode, NULL); - } else { - mutex_lock_nested(&bdev->bd_contains->bd_mutex, - subclass + 1); - bdev->bd_contains->bd_part_count--; - mutex_unlock(&bdev->bd_contains->bd_mutex); - } - if (!bdev->bd_openers) { - struct module *owner = disk->fops->owner; - - put_disk(disk); - module_put(owner); - - if (bdev->bd_contains != bdev) { - kobject_put(&bdev->bd_part->kobj); - bdev->bd_part = NULL; - } - bdev->bd_disk = NULL; - bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; - if (bdev != bdev->bd_contains) - __blkdev_put(bdev->bd_contains, subclass + 1); - bdev->bd_contains = NULL; - } - unlock_kernel(); - mutex_unlock(&bdev->bd_mutex); - bdput(bdev); - return ret; -} - -int blkdev_put(struct block_device *bdev) -{ - return __blkdev_put(bdev, BD_MUTEX_NORMAL); -} - -EXPORT_SYMBOL(blkdev_put); - -int blkdev_put_partition(struct block_device *bdev) -{ - return __blkdev_put(bdev, BD_MUTEX_PARTITION); -} - -EXPORT_SYMBOL(blkdev_put_partition); - static int blkdev_close(struct inode * inode, struct file * filp) { struct block_device *bdev = I_BDEV(filp->f_mapping->host); -- cgit v1.2.2 From f5fb09fa3392ad43fbcfc2f4580752f383ab5996 Mon Sep 17 00:00:00 2001 From: Andries Brouwer Date: Sun, 27 Aug 2006 01:23:42 -0700 Subject: [PATCH] Fix for minix crash Mounting a (corrupt) minix filesystem with zero s_zmap_blocks gives a spectacular crash on my 2.6.17.8 system, no doubt because minix/inode.c does an unconditional minix_set_bit(0,sbi->s_zmap[0]->b_data); [akpm@osdl.org: make labels conistent while we're there] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/minix/inode.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 9ea91c5eeb7b..330ff9fc7cf0 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -204,6 +204,8 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) /* * Allocate the buffer map to keep the superblock small. */ + if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0) + goto out_illegal_sb; i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh); map = kmalloc(i, GFP_KERNEL); if (!map) @@ -263,7 +265,7 @@ out_no_root: out_no_bitmap: printk("MINIX-fs: bad superblock or unable to read bitmaps\n"); - out_freemap: +out_freemap: for (i = 0; i < sbi->s_imap_blocks; i++) brelse(sbi->s_imap[i]); for (i = 0; i < sbi->s_zmap_blocks; i++) @@ -276,11 +278,16 @@ out_no_map: printk("MINIX-fs: can't allocate map\n"); goto out_release; +out_illegal_sb: + if (!silent) + printk("MINIX-fs: bad superblock\n"); + goto out_release; + out_no_fs: if (!silent) printk("VFS: Can't find a Minix or Minix V2 filesystem " "on device %s\n", s->s_id); - out_release: +out_release: brelse(bh); goto out; @@ -290,7 +297,7 @@ out_bad_hblock: out_bad_sb: printk("MINIX-fs: unable to read superblock\n"); - out: +out: s->s_fs_info = NULL; kfree(sbi); return -EINVAL; -- cgit v1.2.2 From 607eb266aea9dd2abe515985e12c5cd8b32546e8 Mon Sep 17 00:00:00 2001 From: Andries Brouwer Date: Sun, 27 Aug 2006 01:23:43 -0700 Subject: [PATCH] ext2: prevent div-by-zero on corrupted fs Mounting an ext2 filesystem with zero s_inodes_per_group will cause a divide error. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext2/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext2/super.c b/fs/ext2/super.c index f2702cda9779..681dea8f9532 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -775,7 +775,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) if (EXT2_INODE_SIZE(sb) == 0) goto cantfind_ext2; sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb); - if (sbi->s_inodes_per_block == 0) + if (sbi->s_inodes_per_block == 0 || sbi->s_inodes_per_group == 0) goto cantfind_ext2; sbi->s_itb_per_group = sbi->s_inodes_per_group / sbi->s_inodes_per_block; -- cgit v1.2.2 From 08fb306fe63d98eb86e3b16f4cc21816fa47f18e Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Sun, 27 Aug 2006 01:23:44 -0700 Subject: [PATCH] ext3 filesystem bogus ENOSPC with reservation fix To handle the earlier bogus ENOSPC error caused by filesystem full of block reservation, current code falls back to non block reservation, starts to allocate block(s) from the goal allocation block group as if there is no block reservation. Current code needs to re-load the corresponding block group descriptor for the initial goal block group in this case. The patch fixes this. Signed-off-by: Mingming Cao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/balloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index a504a40d6d29..063d994bda0b 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -1269,12 +1269,12 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, goal = le32_to_cpu(es->s_first_data_block); group_no = (goal - le32_to_cpu(es->s_first_data_block)) / EXT3_BLOCKS_PER_GROUP(sb); + goal_group = group_no; +retry_alloc: gdp = ext3_get_group_desc(sb, group_no, &gdp_bh); if (!gdp) goto io_error; - goal_group = group_no; -retry: free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); /* * if there is not enough free blocks to make a new resevation @@ -1349,7 +1349,7 @@ retry: if (my_rsv) { my_rsv = NULL; group_no = goal_group; - goto retry; + goto retry_alloc; } /* No space left on the device */ *errp = -ENOSPC; -- cgit v1.2.2 From c37336b078ba9d2ff38c535b194996a7ad6e69f8 Mon Sep 17 00:00:00 2001 From: Evgeniy Dushistov Date: Sun, 27 Aug 2006 01:23:45 -0700 Subject: [PATCH] ufs: write to hole in big file On UFS, this scenario: open(O_TRUNC) lseek(1024 * 1024 * 80) write("A") lseek(1024 * 2) write("A") may cause access to invalid address. This happened because of "goal" is calculated in wrong way in block allocation path, as I see this problem exists also in 2.4. We use construction like this i_data[lastfrag], i_data array of pointers to direct blocks, indirect and so on, it has ceratain size ~20 elements, and lastfrag may have value for example 40000. Also this patch fixes related to handling such scenario issues, wrong zeroing metadata, in case of block(not fragment) allocation, and wrong goal calculation, when we allocate block Signed-off-by: Evgeniy Dushistov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ufs/inode.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index e7c8615beb65..30c6e8a9446c 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -169,18 +169,20 @@ static void ufs_clear_frag(struct inode *inode, struct buffer_head *bh) static struct buffer_head * ufs_clear_frags(struct inode *inode, sector_t beg, - unsigned int n) + unsigned int n, sector_t want) { - struct buffer_head *res, *bh; + struct buffer_head *res = NULL, *bh; sector_t end = beg + n; - res = sb_getblk(inode->i_sb, beg); - ufs_clear_frag(inode, res); - for (++beg; beg < end; ++beg) { + for (; beg < end; ++beg) { bh = sb_getblk(inode->i_sb, beg); ufs_clear_frag(inode, bh); - brelse(bh); + if (want != beg) + brelse(bh); + else + res = bh; } + BUG_ON(!res); return res; } @@ -265,7 +267,9 @@ repeat: lastfrag = ufsi->i_lastfrag; } - goal = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]) + uspi->s_fpb; + tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]); + if (tmp) + goal = tmp + uspi->s_fpb; tmp = ufs_new_fragments (inode, p, fragment - blockoff, goal, required + blockoff, err, locked_page); @@ -277,13 +281,15 @@ repeat: tmp = ufs_new_fragments(inode, p, fragment - (blockoff - lastblockoff), fs32_to_cpu(sb, *p), required + (blockoff - lastblockoff), err, locked_page); - } + } else /* (lastblock > block) */ { /* * We will allocate new block before last allocated block */ - else /* (lastblock > block) */ { - if (lastblock && (tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock-1]))) - goal = tmp + uspi->s_fpb; + if (block) { + tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[block-1]); + if (tmp) + goal = tmp + uspi->s_fpb; + } tmp = ufs_new_fragments(inode, p, fragment - blockoff, goal, uspi->s_fpb, err, locked_page); } @@ -296,7 +302,7 @@ repeat: } if (!phys) { - result = ufs_clear_frags(inode, tmp + blockoff, required); + result = ufs_clear_frags(inode, tmp, required, tmp + blockoff); } else { *phys = tmp + blockoff; result = NULL; @@ -383,7 +389,7 @@ repeat: } } - if (block && (tmp = fs32_to_cpu(sb, ((__fs32*)bh->b_data)[block-1]) + uspi->s_fpb)) + if (block && (tmp = fs32_to_cpu(sb, ((__fs32*)bh->b_data)[block-1]))) goal = tmp + uspi->s_fpb; else goal = bh->b_blocknr + uspi->s_fpb; @@ -397,7 +403,8 @@ repeat: if (!phys) { - result = ufs_clear_frags(inode, tmp + blockoff, uspi->s_fpb); + result = ufs_clear_frags(inode, tmp, uspi->s_fpb, + tmp + blockoff); } else { *phys = tmp + blockoff; *new = 1; -- cgit v1.2.2 From ecdc63948763586e101108dfe1ba316ec069fe39 Mon Sep 17 00:00:00 2001 From: Evgeniy Dushistov Date: Sun, 27 Aug 2006 01:23:46 -0700 Subject: [PATCH] ufs: truncate correction 1) When we allocated last fragment in ufs_truncate, we read page, check if block mapped to address, and if not trying to allocate it. This is wrong behaviour, fragment may be NOT allocated, but mapped, this happened because of "block map" function not checked allocated fragment or not, it just take address of the first fragment in the block, add offset of fragment and return result, this is correct behaviour in almost all situation except call from ufs_truncate. 2) Almost all implementation of UFS, which I can investigate have such "defect": if you have full disk, and try truncate file, for example 3GB to 2MB, and have hole in this region, truncate return -ENOSPC. I tried evade from this problem, but "block allocation" algorithm is tied to right value of i_lastfrag, and fix of this corner case may slow down of ordinaries scenarios, so this patch makes behavior of "truncate" operations similar to what other UFS implementations do. Signed-off-by: Evgeniy Dushistov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ufs/truncate.c | 77 ++++++++++++++++++------------------------------------- 1 file changed, 25 insertions(+), 52 deletions(-) (limited to 'fs') diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index c9b55872079b..ea11d04c41a0 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -375,17 +375,15 @@ static int ufs_alloc_lastblock(struct inode *inode) int err = 0; struct address_space *mapping = inode->i_mapping; struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi; - struct ufs_inode_info *ufsi = UFS_I(inode); unsigned lastfrag, i, end; struct page *lastpage; struct buffer_head *bh; lastfrag = (i_size_read(inode) + uspi->s_fsize - 1) >> uspi->s_fshift; - if (!lastfrag) { - ufsi->i_lastfrag = 0; + if (!lastfrag) goto out; - } + lastfrag--; lastpage = ufs_get_locked_page(mapping, lastfrag >> @@ -400,25 +398,25 @@ static int ufs_alloc_lastblock(struct inode *inode) for (i = 0; i < end; ++i) bh = bh->b_this_page; - if (!buffer_mapped(bh)) { - err = ufs_getfrag_block(inode, lastfrag, bh, 1); - - if (unlikely(err)) - goto out_unlock; - - if (buffer_new(bh)) { - clear_buffer_new(bh); - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); - /* - * we do not zeroize fragment, because of - * if it maped to hole, it already contains zeroes - */ - set_buffer_uptodate(bh); - mark_buffer_dirty(bh); - set_page_dirty(lastpage); - } + + err = ufs_getfrag_block(inode, lastfrag, bh, 1); + + if (unlikely(err)) + goto out_unlock; + + if (buffer_new(bh)) { + clear_buffer_new(bh); + unmap_underlying_metadata(bh->b_bdev, + bh->b_blocknr); + /* + * we do not zeroize fragment, because of + * if it maped to hole, it already contains zeroes + */ + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + set_page_dirty(lastpage); } + out_unlock: ufs_put_locked_page(lastpage); out: @@ -440,23 +438,11 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size) if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return -EPERM; - if (inode->i_size > old_i_size) { - /* - * if we expand file we should care about - * allocation of block for last byte first of all - */ - err = ufs_alloc_lastblock(inode); + err = ufs_alloc_lastblock(inode); - if (err) { - i_size_write(inode, old_i_size); - goto out; - } - /* - * go away, because of we expand file, and we do not - * need free blocks, and zeroizes page - */ - lock_kernel(); - goto almost_end; + if (err) { + i_size_write(inode, old_i_size); + goto out; } block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block); @@ -477,21 +463,8 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size) yield(); } - if (inode->i_size < old_i_size) { - /* - * now we should have enough space - * to allocate block for last byte - */ - err = ufs_alloc_lastblock(inode); - if (err) - /* - * looks like all the same - we have no space, - * but we truncate file already - */ - inode->i_size = (ufsi->i_lastfrag - 1) * uspi->s_fsize; - } -almost_end: inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; + ufsi->i_lastfrag = DIRECT_FRAGMENT; unlock_kernel(); mark_inode_dirty(inode); out: -- cgit v1.2.2 From 45f17e0c2ae05c133a348452690de0e5fa863293 Mon Sep 17 00:00:00 2001 From: Masoud Asgharifard Sharbiani Date: Sun, 27 Aug 2006 01:23:48 -0700 Subject: [PATCH] eventpoll.c compile fix Fix two compile failures in eventpoll.c code which would happen if DEBUG_EPOLL is bigger than zero. Signed-off-by: Masoud Sharbiani Cc: Davide Libenzi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 19ffb043abbc..3a3567433b92 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1168,7 +1168,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi) eexit_1: DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n", - current, ep, epi->file, error)); + current, ep, epi->ffd.file, error)); return error; } @@ -1236,7 +1236,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k struct eventpoll *ep = epi->ep; DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", - current, epi->file, epi, ep)); + current, epi->ffd.file, epi, ep)); write_lock_irqsave(&ep->lock, flags); -- cgit v1.2.2 From ea817398e68dfa25612229fda7fc74580cf915fb Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sun, 27 Aug 2006 01:23:52 -0700 Subject: [PATCH] Manage jbd allocations from its own slabs JBD currently allocates commit and frozen buffers from slabs. With CONFIG_SLAB_DEBUG, its possible for an allocation to cross the page boundary causing IO problems. https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=200127 So, instead of allocating these from regular slabs - manage allocation from its own slabs and disable slab debug for these slabs. [akpm@osdl.org: cleanups] Signed-off-by: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd/commit.c | 6 ++-- fs/jbd/journal.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++---- fs/jbd/transaction.c | 9 ++--- 3 files changed, 94 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 0971814c38b8..42da60784311 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -261,7 +261,7 @@ void journal_commit_transaction(journal_t *journal) struct buffer_head *bh = jh2bh(jh); jbd_lock_bh_state(bh); - kfree(jh->b_committed_data); + jbd_slab_free(jh->b_committed_data, bh->b_size); jh->b_committed_data = NULL; jbd_unlock_bh_state(bh); } @@ -745,14 +745,14 @@ restart_loop: * Otherwise, we can just throw away the frozen data now. */ if (jh->b_committed_data) { - kfree(jh->b_committed_data); + jbd_slab_free(jh->b_committed_data, bh->b_size); jh->b_committed_data = NULL; if (jh->b_frozen_data) { jh->b_committed_data = jh->b_frozen_data; jh->b_frozen_data = NULL; } } else if (jh->b_frozen_data) { - kfree(jh->b_frozen_data); + jbd_slab_free(jh->b_frozen_data, bh->b_size); jh->b_frozen_data = NULL; } diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 8c9b28dff119..f66724ce443a 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -84,6 +84,7 @@ EXPORT_SYMBOL(journal_force_commit); static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); static void __journal_abort_soft (journal_t *journal, int errno); +static int journal_create_jbd_slab(size_t slab_size); /* * Helper function used to manage commit timeouts @@ -328,10 +329,10 @@ repeat: char *tmp; jbd_unlock_bh_state(bh_in); - tmp = jbd_rep_kmalloc(bh_in->b_size, GFP_NOFS); + tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS); jbd_lock_bh_state(bh_in); if (jh_in->b_frozen_data) { - kfree(tmp); + jbd_slab_free(tmp, bh_in->b_size); goto repeat; } @@ -1069,17 +1070,17 @@ static int load_superblock(journal_t *journal) int journal_load(journal_t *journal) { int err; + journal_superblock_t *sb; err = load_superblock(journal); if (err) return err; + sb = journal->j_superblock; /* If this is a V2 superblock, then we have to check the * features flags on it. */ if (journal->j_format_version >= 2) { - journal_superblock_t *sb = journal->j_superblock; - if ((sb->s_feature_ro_compat & ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) || (sb->s_feature_incompat & @@ -1090,6 +1091,13 @@ int journal_load(journal_t *journal) } } + /* + * Create a slab for this blocksize + */ + err = journal_create_jbd_slab(cpu_to_be32(sb->s_blocksize)); + if (err) + return err; + /* Let the recovery code check whether it needs to recover any * data from the journal. */ if (journal_recover(journal)) @@ -1611,6 +1619,77 @@ void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry) return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0)); } +/* + * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed + * and allocate frozen and commit buffers from these slabs. + * + * Reason for doing this is to avoid, SLAB_DEBUG - since it could + * cause bh to cross page boundary. + */ + +#define JBD_MAX_SLABS 5 +#define JBD_SLAB_INDEX(size) (size >> 11) + +static kmem_cache_t *jbd_slab[JBD_MAX_SLABS]; +static const char *jbd_slab_names[JBD_MAX_SLABS] = { + "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k" +}; + +static void journal_destroy_jbd_slabs(void) +{ + int i; + + for (i = 0; i < JBD_MAX_SLABS; i++) { + if (jbd_slab[i]) + kmem_cache_destroy(jbd_slab[i]); + jbd_slab[i] = NULL; + } +} + +static int journal_create_jbd_slab(size_t slab_size) +{ + int i = JBD_SLAB_INDEX(slab_size); + + BUG_ON(i >= JBD_MAX_SLABS); + + /* + * Check if we already have a slab created for this size + */ + if (jbd_slab[i]) + return 0; + + /* + * Create a slab and force alignment to be same as slabsize - + * this will make sure that allocations won't cross the page + * boundary. + */ + jbd_slab[i] = kmem_cache_create(jbd_slab_names[i], + slab_size, slab_size, 0, NULL, NULL); + if (!jbd_slab[i]) { + printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); + return -ENOMEM; + } + return 0; +} + +void * jbd_slab_alloc(size_t size, gfp_t flags) +{ + int idx; + + idx = JBD_SLAB_INDEX(size); + BUG_ON(jbd_slab[idx] == NULL); + return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); +} + +void jbd_slab_free(void *ptr, size_t size) +{ + int idx; + + idx = JBD_SLAB_INDEX(size); + BUG_ON(jbd_slab[idx] == NULL); + kmem_cache_free(jbd_slab[idx], ptr); +} + /* * Journal_head storage management */ @@ -1799,13 +1878,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh) printk(KERN_WARNING "%s: freeing " "b_frozen_data\n", __FUNCTION__); - kfree(jh->b_frozen_data); + jbd_slab_free(jh->b_frozen_data, bh->b_size); } if (jh->b_committed_data) { printk(KERN_WARNING "%s: freeing " "b_committed_data\n", __FUNCTION__); - kfree(jh->b_committed_data); + jbd_slab_free(jh->b_committed_data, bh->b_size); } bh->b_private = NULL; jh->b_bh = NULL; /* debug, really */ @@ -1961,6 +2040,7 @@ static void journal_destroy_caches(void) journal_destroy_revoke_caches(); journal_destroy_journal_head_cache(); journal_destroy_handle_cache(); + journal_destroy_jbd_slabs(); } static int __init journal_init(void) diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 508b2ea91f43..de2e4cbbf79a 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -666,8 +666,9 @@ repeat: if (!frozen_buffer) { JBUFFER_TRACE(jh, "allocate memory for buffer"); jbd_unlock_bh_state(bh); - frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size, - GFP_NOFS); + frozen_buffer = + jbd_slab_alloc(jh2bh(jh)->b_size, + GFP_NOFS); if (!frozen_buffer) { printk(KERN_EMERG "%s: OOM for frozen_buffer\n", @@ -879,7 +880,7 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh) repeat: if (!jh->b_committed_data) { - committed_data = jbd_kmalloc(jh2bh(jh)->b_size, GFP_NOFS); + committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!committed_data) { printk(KERN_EMERG "%s: No memory for committed data\n", __FUNCTION__); @@ -906,7 +907,7 @@ repeat: out: journal_put_journal_head(jh); if (unlikely(committed_data)) - kfree(committed_data); + jbd_slab_free(committed_data, bh->b_size); return err; } -- cgit v1.2.2 From 4df46240a1312161e3c794f6ace50ef7eb5ff3d7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 27 Aug 2006 01:23:56 -0700 Subject: [PATCH] lockdep: annotate reiserfs reiserfs seems to have another locking level layer for the i_mutex due to the xattrs-are-a-directory thing. Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 39fedaa88a0c..d935fb9394e3 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -424,7 +424,7 @@ int xattr_readdir(struct file *file, filldir_t filler, void *buf) int res = -ENOTDIR; if (!file->f_op || !file->f_op->readdir) goto out; - mutex_lock(&inode->i_mutex); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR); // down(&inode->i_zombie); res = -ENOENT; if (!IS_DEADDIR(inode)) { -- cgit v1.2.2 From 513627d7fec6fcb7b3d56ce355cb4d192c76b530 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sun, 27 Aug 2006 01:23:57 -0700 Subject: [PATCH] fix up lockdep trace in fs/exec.c This fixes the locking error noticed by lockdep: ============================================= [ INFO: possible recursive locking detected ] --------------------------------------------- init/1 is trying to acquire lock: (&sighand->siglock){....}, at: [] flush_old_exec+0x3ae/0x859 but task is already holding lock: (&sighand->siglock){....}, at: [] flush_old_exec+0x39e/0x859 other info that might help us debug this: 2 locks held by init/1: #0: (tasklist_lock){..--}, at: [] flush_old_exec+0x38e/0x859 #1: (&sighand->siglock){....}, at: [] flush_old_exec+0x39e/0x859 stack backtrace: [] show_trace_log_lvl+0x54/0xfd [] show_trace+0xd/0x10 [] dump_stack+0x19/0x1b [] __lock_acquire+0x773/0x997 [] lock_acquire+0x4b/0x6c [] _spin_lock+0x19/0x28 [] flush_old_exec+0x3ae/0x859 [] load_elf_binary+0x4aa/0x1628 [] search_binary_handler+0xa7/0x24e [] do_execve+0x15b/0x1f9 [] sys_execve+0x29/0x4d [] syscall_call+0x7/0xb Signed-off-by: Arjan van de Ven Signed-off-by: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index f7aabfeca033..54135df2a966 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -751,7 +751,7 @@ no_thread_group: write_lock_irq(&tasklist_lock); spin_lock(&oldsighand->siglock); - spin_lock(&newsighand->siglock); + spin_lock_nested(&newsighand->siglock, SINGLE_DEPTH_NESTING); rcu_assign_pointer(current->sighand, newsighand); recalc_sigpending(); -- cgit v1.2.2 From f5ef68da5fda5e095b585ea5ecdd42af3c8695f7 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 27 Aug 2006 01:23:58 -0700 Subject: [PATCH] /proc/meminfo: don't put spaces in names None of the other /proc/meminfo lines have a space in the identifier. This post-2.6.17 addition has the potential to break existing parsers, so use an underscore instead (like Committed_AS). Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 9f2cfc30f9cf..942156225447 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -169,7 +169,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, "Mapped: %8lu kB\n" "Slab: %8lu kB\n" "PageTables: %8lu kB\n" - "NFS Unstable: %8lu kB\n" + "NFS_Unstable: %8lu kB\n" "Bounce: %8lu kB\n" "CommitLimit: %8lu kB\n" "Committed_AS: %8lu kB\n" -- cgit v1.2.2 From 7288026b8671061aff7663b1766037b3f2573627 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 30 Aug 2006 13:41:58 +1000 Subject: [XFS] Fix char size overflow in bmap_alloc call for unwritten extent conversion. Since bma.conv is a char and XFS_BMAPI_CONVERT is 0x1000, bma.conv was always assigned zero. Spotted by the GNU C compiler (SVN version). SGI-PV: 947312 SGI-Modid: xfs-linux-melb:xfs-kern:26887a Signed-off-by: Adrian Bunk Signed-off-by: Nathan Scott --- fs/xfs/xfs_bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 3a6137539064..bf46fae303af 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -4993,7 +4993,7 @@ xfs_bmapi( bma.firstblock = *firstblock; bma.alen = alen; bma.off = aoff; - bma.conv = (flags & XFS_BMAPI_CONVERT); + bma.conv = !!(flags & XFS_BMAPI_CONVERT); bma.wasdel = wasdelay; bma.minlen = minlen; bma.low = flist->xbf_low; -- cgit v1.2.2