From 72f465033702ebfe20db8f50edaad59f0f38b0f5 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 23 Aug 2010 13:35:09 +0200 Subject: bio-integrity.c: remove dependency on __GFP_NOFAIL The kmalloc() in bio_integrity_prep() is failable, so remove __GFP_NOFAIL from its mask. Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- fs/bio-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 612a5c38d3c1..a8f4cc679983 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -413,7 +413,7 @@ int bio_integrity_prep(struct bio *bio) /* Allocate kernel buffer for protection data */ len = sectors * blk_integrity_tuple_size(bi); - buf = kmalloc(len, GFP_NOIO | __GFP_NOFAIL | q->bounce_gfp); + buf = kmalloc(len, GFP_NOIO | q->bounce_gfp); if (unlikely(buf == NULL)) { printk(KERN_ERR "could not allocate integrity buffer\n"); return -EIO; -- cgit v1.2.2 From 220eb7fd984bfc7e6b4005fdf32efe9cd8af7cf2 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 23 Aug 2010 13:36:20 +0200 Subject: fs/bio-integrity.c: return -ENOMEM on kmalloc failure Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- fs/bio-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index a8f4cc679983..4d0ff5ee27b8 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -416,7 +416,7 @@ int bio_integrity_prep(struct bio *bio) buf = kmalloc(len, GFP_NOIO | q->bounce_gfp); if (unlikely(buf == NULL)) { printk(KERN_ERR "could not allocate integrity buffer\n"); - return -EIO; + return -ENOMEM; } end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT; -- cgit v1.2.2 From b76b4014f9d988d2412b873e4d4c13c7f9afc4e4 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 28 Aug 2010 08:52:10 +0200 Subject: writeback: Fix lost wake-up shutting down writeback thread Setting the task state here may cause us to miss the wake up from kthread_stop(), so we need to recheck kthread_should_stop() or risk sleeping forever in the following schedule(). Symptom was an indefinite hang on an NFSv4 mount. (NFSv4 may create multiple mounts in a temporary namespace while traversing the mount path, and since the temporary namespace is immediately destroyed, it may end up destroying a mount very soon after it was created, possibly making this race more likely.) INFO: task mount.nfs4:4314 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. mount.nfs4 D 0000000000000000 2880 4314 4313 0x00000000 ffff88001ed6da28 0000000000000046 ffff88001ed6dfd8 ffff88001ed6dfd8 ffff88001ed6c000 ffff88001ed6c000 ffff88001ed6c000 ffff88001e5003a0 ffff88001ed6dfd8 ffff88001e5003a8 ffff88001ed6c000 ffff88001ed6dfd8 Call Trace: [] schedule_timeout+0x1cd/0x2e0 [] ? mark_held_locks+0x6c/0xa0 [] ? _raw_spin_unlock_irq+0x30/0x60 [] ? trace_hardirqs_on_caller+0x14d/0x190 [] ? sub_preempt_count+0xe/0xd0 [] wait_for_common+0x120/0x190 [] ? default_wake_function+0x0/0x20 [] wait_for_completion+0x1d/0x20 [] kthread_stop+0x4a/0x150 [] ? thaw_process+0x70/0x80 [] bdi_unregister+0x10a/0x1a0 [] nfs_put_super+0x19/0x20 [] generic_shutdown_super+0x54/0xe0 [] kill_anon_super+0x16/0x60 [] nfs4_kill_super+0x39/0x90 [] deactivate_locked_super+0x45/0x60 [] deactivate_super+0x49/0x70 [] mntput_no_expire+0x84/0xe0 [] release_mounts+0x9f/0xc0 [] put_mnt_ns+0x65/0x80 [] nfs_follow_remote_path+0x1e6/0x420 [] nfs4_try_mount+0x6f/0xd0 [] nfs4_get_sb+0xa2/0x360 [] vfs_kern_mount+0x88/0x1f0 [] do_kern_mount+0x52/0x130 [] ? _lock_kernel+0x6a/0x170 [] do_mount+0x26e/0x7f0 [] ? copy_mount_options+0xea/0x190 [] sys_mount+0x98/0xf0 [] system_call_fastpath+0x16/0x1b 1 lock held by mount.nfs4/4314: #0: (&type->s_umount_key#24){+.+...}, at: [] deactivate_super+0x41/0x70 Signed-off-by: J. Bruce Fields Signed-off-by: Jens Axboe Acked-by: Artem Bityutskiy --- fs/fs-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 7d9d06ba184b..81e086d8aa57 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -808,7 +808,7 @@ int bdi_writeback_thread(void *data) wb->last_active = jiffies; set_current_state(TASK_INTERRUPTIBLE); - if (!list_empty(&bdi->work_list)) { + if (!list_empty(&bdi->work_list) || kthread_should_stop()) { __set_current_state(TASK_RUNNING); continue; } -- cgit v1.2.2 From 4afc31345e5f543e5d89a47aeadaaad1d91a5bc8 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 29 Aug 2010 01:55:38 +0900 Subject: nilfs2: fix leak of shadow dat inode in error path of load_nilfs If load_nilfs() gets an error while doing recovery, it will fail to free the shadow inode of dat (nilfs->ns_gc_dat). This fixes the leak issue. Signed-off-by: Ryusuke Konishi --- fs/nilfs2/the_nilfs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 4317f177ea7c..ba7c10c917fc 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -446,6 +446,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) nilfs_mdt_destroy(nilfs->ns_cpfile); nilfs_mdt_destroy(nilfs->ns_sufile); nilfs_mdt_destroy(nilfs->ns_dat); + nilfs_mdt_destroy(nilfs->ns_gc_dat); failed: nilfs_clear_recovery_info(&ri); -- cgit v1.2.2 From 8f587df479c3cea14ba1a9b9d58f34fd2fd6d58b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Aug 2010 16:27:45 +0000 Subject: 9p: potential ERR_PTR() dereference p9_client_walk() can return error values if we run out of space or there is a problem with the network. Signed-off-by: Dan Carpenter Signed-off-by: Eric Van Hensbergen --- fs/9p/fid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 358563689064..6406f896bf95 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -242,7 +242,8 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) } kfree(wnames); fid_out: - v9fs_fid_add(dentry, fid); + if (!IS_ERR(fid)) + v9fs_fid_add(dentry, fid); err_out: up_read(&v9ses->rename_sem); return fid; -- cgit v1.2.2 From 9bc08a45fb117c696e4940cfa1208cb1cc7a2f25 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 2 Sep 2010 15:14:38 +1000 Subject: xfs: improve buffer cache hash scalability When doing large parallel file creates on a 16p machines, large amounts of time is being spent in _xfs_buf_find(). A system wide profile with perf top shows this: 1134740.00 19.3% _xfs_buf_find 733142.00 12.5% __ticket_spin_lock The problem is that the hash contains 45,000 buffers, and the hash table width is only 256 buffers. That means we've got around 200 buffers per chain, and searching it is quite expensive. The hash table size needs to increase. Secondly, every time we do a lookup, we promote the buffer we find to the head of the hash chain. This is causing cachelines to be dirtied and causes invalidation of cachelines across all CPUs that may have walked the hash chain recently. hence every walk of the hash chain is effectively a cold cache walk. Remove the promotion to avoid this invalidation. The results are: 1045043.00 21.2% __ticket_spin_lock 326184.00 6.6% _xfs_buf_find A 70% drop in the CPU usage when looking up buffers. Unfortunately that does not result in an increase in performance underthis workload as contention on the inode_lock soaks up most of the reduction in CPU usage. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_buf.c | 8 +------- fs/xfs/linux-2.6/xfs_buf.h | 1 - 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index ea79072f5210..d72cf2bb054a 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -440,12 +440,7 @@ _xfs_buf_find( ASSERT(btp == bp->b_target); if (bp->b_file_offset == range_base && bp->b_buffer_length == range_length) { - /* - * If we look at something, bring it to the - * front of the list for next time. - */ atomic_inc(&bp->b_hold); - list_move(&bp->b_hash_list, &hash->bh_list); goto found; } } @@ -1443,8 +1438,7 @@ xfs_alloc_bufhash( { unsigned int i; - btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ - btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; + btp->bt_hashshift = external ? 3 : 12; /* 8 or 4096 buckets */ btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t)); for (i = 0; i < (1 << btp->bt_hashshift); i++) { diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index d072e5ff923b..2a05614f0b92 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -137,7 +137,6 @@ typedef struct xfs_buftarg { size_t bt_smask; /* per device buffer hash table */ - uint bt_hashmask; uint bt_hashshift; xfs_bufhash_t *bt_hash; -- cgit v1.2.2 From 23963e54ce187ca6e907c83176c15508b0f6e60d Mon Sep 17 00:00:00 2001 From: Arkadiusz Mi?kiewicz Date: Thu, 26 Aug 2010 10:19:43 +0000 Subject: xfs: Disallow 32bit project quota id Currently on-disk structure is able to keep only 16bit project quota id, so disallow 32bit ones. This fixes a problem where parts of kernel structures holding project quota id are 32bit while parts (on-disk) are 16bit variables which causes project quota member files to be inaccessible for some operations (like mv/rm). Signed-off-by: Arkadiusz Mi?kiewicz Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_ioctl.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 237f5ffb2ee8..4fec427b83ef 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -906,6 +906,13 @@ xfs_ioctl_setattr( if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); + /* + * Disallow 32bit project ids because on-disk structure + * is 16bit only. + */ + if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1)) + return XFS_ERROR(EINVAL); + /* * If disk quotas is on, we make sure that the dquots do exist on disk, * before we start any other transactions. Trying to do this later -- cgit v1.2.2 From 8f34a430ac16d5fbd9d6b383184d35e152f5a963 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 2 Sep 2010 15:23:16 -0400 Subject: nfsd4: mask out non-access bits in nfs4_access_to_omode This fixes an unnecessary BUG(). Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3dfef0623968..cf0d2ffb3c84 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -440,7 +440,7 @@ test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) { static int nfs4_access_to_omode(u32 access) { - switch (access) { + switch (access & NFS4_SHARE_ACCESS_BOTH) { case NFS4_SHARE_ACCESS_READ: return O_RDONLY; case NFS4_SHARE_ACCESS_WRITE: -- cgit v1.2.2 From 72656c46f50b8dfe50e15793692982e636e3df20 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 3 Sep 2010 12:19:33 +1000 Subject: xfs: prevent 32bit overflow in space reservation If we attempt to preallocate more than 2^32 blocks of space in a single syscall, the transaction block reservation will overflow leading to a hangs in the superblock block accounting code. This is trivially reproduced with xfs_io. Fix the problem by capping the allocation reservation to the maximum number of blocks a single xfs_bmapi() call can allocate (2^21 blocks). Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_vnodeops.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 66d585c6917c..4c7c7bfb2b2f 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2299,15 +2299,22 @@ xfs_alloc_file_space( e = allocatesize_fsb; } + /* + * The transaction reservation is limited to a 32-bit block + * count, hence we need to limit the number of blocks we are + * trying to reserve to avoid an overflow. We can't allocate + * more than @nimaps extents, and an extent is limited on disk + * to MAXEXTLEN (21 bits), so use that to enforce the limit. + */ + resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps)); if (unlikely(rt)) { - resrtextents = qblocks = (uint)(e - s); + resrtextents = qblocks = resblks; resrtextents /= mp->m_sb.sb_rextsize; resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); quota_flag = XFS_QMOPT_RES_RTBLKS; } else { resrtextents = 0; - resblks = qblocks = \ - XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s)); + resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); quota_flag = XFS_QMOPT_RES_REGBLKS; } -- cgit v1.2.2 From 9af25465081480a75824fd7a16a37a5cfebeede9 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 30 Aug 2010 02:44:03 +0000 Subject: xfs: Make fiemap work with sparse files In xfs_vn_fiemap, we set bvm_count to fi_extent_max + 1 and want to return fi_extent_max extents, but actually it won't work for a sparse file. The reason is that in xfs_getbmap we will calculate holes and set it in 'out', while out is malloced by bmv_count(fi_extent_max+1) which didn't consider holes. So in the worst case, if 'out' vector looks like [hole, extent, hole, extent, hole, ... hole, extent, hole], we will only return half of fi_extent_max extents. This patch add a new parameter BMV_IF_NO_HOLES for bvm_iflags. So with this flags, we don't use our 'out' in xfs_getbmap for a hole. The solution is a bit ugly by just don't increasing index of 'out' vector. I felt that it is not easy to skip it at the very beginning since we have the complicated check and some function like xfs_getbmapx_fix_eof_hole to adjust 'out'. Cc: Dave Chinner Signed-off-by: Tao Ma Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_iops.c | 2 +- fs/xfs/xfs_bmap.c | 14 +++++++++++++- fs/xfs/xfs_fs.h | 4 +++- 3 files changed, 17 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 68be25dcd301..b1fc2a6bfe83 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -664,7 +664,7 @@ xfs_vn_fiemap( fieinfo->fi_extents_max + 1; bm.bmv_count = min_t(__s32, bm.bmv_count, (PAGE_SIZE * 16 / sizeof(struct getbmapx))); - bm.bmv_iflags = BMV_IF_PREALLOC; + bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES; if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) bm.bmv_iflags |= BMV_IF_ATTRFORK; if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 23f14e595c18..f90dadd5a968 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -5533,12 +5533,24 @@ xfs_getbmap( map[i].br_startblock)) goto out_free_map; - nexleft--; bmv->bmv_offset = out[cur_ext].bmv_offset + out[cur_ext].bmv_length; bmv->bmv_length = max_t(__int64_t, 0, bmvend - bmv->bmv_offset); + + /* + * In case we don't want to return the hole, + * don't increase cur_ext so that we can reuse + * it in the next loop. + */ + if ((iflags & BMV_IF_NO_HOLES) && + map[i].br_startblock == HOLESTARTBLOCK) { + memset(&out[cur_ext], 0, sizeof(out[cur_ext])); + continue; + } + + nexleft--; bmv->bmv_entries++; cur_ext++; } diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 7cf7220e7d5f..87c2e9d02288 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -114,8 +114,10 @@ struct getbmapx { #define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ #define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ #define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */ +#define BMV_IF_NO_HOLES 0x10 /* Do not return holes */ #define BMV_IF_VALID \ - (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC) + (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC| \ + BMV_IF_DELALLOC|BMV_IF_NO_HOLES) /* bmv_oflags values - returned for each non-header segment */ #define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ -- cgit v1.2.2 From 57f9bdac2510cd7fda58e4a111d250861eb1ebeb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 25 Aug 2010 09:12:29 +0200 Subject: sysfs: checking for NULL instead of ERR_PTR d_path() returns an ERR_PTR and it doesn't return NULL. Signed-off-by: Dan Carpenter Cc: stable Reviewed-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 1b27b5688f62..da3fefe91a8f 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -340,7 +340,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file) char *p; p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file)); - if (p) + if (!IS_ERR(p)) memmove(last_sysfs_file, p, strlen(p) + 1); /* need attr_sd for attr and ops, its parent for kobj */ -- cgit v1.2.2 From 595afaf9e6ee1b48e13ec4b8bcc8c7dee888161a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 7 Sep 2010 13:42:41 +0200 Subject: fuse: flush background queue on connection close David Bartly reported that fuse can hang in fuse_get_req_nofail() when the connection to the filesystem server is no longer active. If bg_queue is not empty then flush_bg_queue() called from request_end() can put more requests on to the pending queue. If this happens while ending requests on the processing queue then those background requests will be queued to the pending list and never ended. Another problem is that fuse_dev_release() didn't wake up processes sleeping on blocked_waitq. Solve this by: a) flushing the background queue before calling end_requests() on the pending and processing queues b) setting blocked = 0 and waking up processes waiting on blocked_waitq() Thanks to David for an excellent bug report. Reported-by: David Bartley Signed-off-by: Miklos Szeredi CC: stable@kernel.org --- fs/fuse/dev.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 69ad053ffd78..b4fc47ff4bc2 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1769,6 +1769,14 @@ __acquires(&fc->lock) } } +static void end_queued_requests(struct fuse_conn *fc) +{ + fc->max_background = UINT_MAX; + flush_bg_queue(fc); + end_requests(fc, &fc->pending); + end_requests(fc, &fc->processing); +} + /* * Abort all requests. * @@ -1795,8 +1803,7 @@ void fuse_abort_conn(struct fuse_conn *fc) fc->connected = 0; fc->blocked = 0; end_io_requests(fc); - end_requests(fc, &fc->pending); - end_requests(fc, &fc->processing); + end_queued_requests(fc); wake_up_all(&fc->waitq); wake_up_all(&fc->blocked_waitq); kill_fasync(&fc->fasync, SIGIO, POLL_IN); @@ -1811,8 +1818,9 @@ int fuse_dev_release(struct inode *inode, struct file *file) if (fc) { spin_lock(&fc->lock); fc->connected = 0; - end_requests(fc, &fc->pending); - end_requests(fc, &fc->processing); + fc->blocked = 0; + end_queued_requests(fc); + wake_up_all(&fc->blocked_waitq); spin_unlock(&fc->lock); fuse_conn_put(fc); } -- cgit v1.2.2 From b9ca67b2ddf021491a3741d9555e8ff59b2175ba Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 7 Sep 2010 13:42:41 +0200 Subject: fuse: fix lock annotations Sparse doesn't understand lock annotations of the form __releases(&foo->lock). Change them to __releases(foo->lock). Same for __acquires(). Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 26 ++++++++++++++------------ fs/fuse/file.c | 8 ++++---- 2 files changed, 18 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index b4fc47ff4bc2..d367af1514ef 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -276,7 +276,7 @@ static void flush_bg_queue(struct fuse_conn *fc) * Called with fc->lock, unlocks it */ static void request_end(struct fuse_conn *fc, struct fuse_req *req) -__releases(&fc->lock) +__releases(fc->lock) { void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; req->end = NULL; @@ -306,8 +306,8 @@ __releases(&fc->lock) static void wait_answer_interruptible(struct fuse_conn *fc, struct fuse_req *req) -__releases(&fc->lock) -__acquires(&fc->lock) +__releases(fc->lock) +__acquires(fc->lock) { if (signal_pending(current)) return; @@ -325,8 +325,8 @@ static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req) } static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) -__releases(&fc->lock) -__acquires(&fc->lock) +__releases(fc->lock) +__acquires(fc->lock) { if (!fc->no_interrupt) { /* Any signal may interrupt this */ @@ -905,8 +905,8 @@ static int request_pending(struct fuse_conn *fc) /* Wait until a request is available on the pending list */ static void request_wait(struct fuse_conn *fc) -__releases(&fc->lock) -__acquires(&fc->lock) +__releases(fc->lock) +__acquires(fc->lock) { DECLARE_WAITQUEUE(wait, current); @@ -934,7 +934,7 @@ __acquires(&fc->lock) */ static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs, size_t nbytes, struct fuse_req *req) -__releases(&fc->lock) +__releases(fc->lock) { struct fuse_in_header ih; struct fuse_interrupt_in arg; @@ -1720,8 +1720,8 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait) * This function releases and reacquires fc->lock */ static void end_requests(struct fuse_conn *fc, struct list_head *head) -__releases(&fc->lock) -__acquires(&fc->lock) +__releases(fc->lock) +__acquires(fc->lock) { while (!list_empty(head)) { struct fuse_req *req; @@ -1744,8 +1744,8 @@ __acquires(&fc->lock) * locked). */ static void end_io_requests(struct fuse_conn *fc) -__releases(&fc->lock) -__acquires(&fc->lock) +__releases(fc->lock) +__acquires(fc->lock) { while (!list_empty(&fc->io)) { struct fuse_req *req = @@ -1770,6 +1770,8 @@ __acquires(&fc->lock) } static void end_queued_requests(struct fuse_conn *fc) +__releases(fc->lock) +__acquires(fc->lock) { fc->max_background = UINT_MAX; flush_bg_queue(fc); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 147c1f71bdb9..c8224587123f 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1144,8 +1144,8 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) /* Called under fc->lock, may release and reacquire it */ static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) -__releases(&fc->lock) -__acquires(&fc->lock) +__releases(fc->lock) +__acquires(fc->lock) { struct fuse_inode *fi = get_fuse_inode(req->inode); loff_t size = i_size_read(req->inode); @@ -1183,8 +1183,8 @@ __acquires(&fc->lock) * Called with fc->lock */ void fuse_flush_writepages(struct inode *inode) -__releases(&fc->lock) -__acquires(&fc->lock) +__releases(fc->lock) +__acquires(fc->lock) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); -- cgit v1.2.2 From 7a2e8a8faab76386d8eaae9ded739ee5615be174 Mon Sep 17 00:00:00 2001 From: Valerie Aurora Date: Thu, 26 Aug 2010 11:07:22 -0700 Subject: VFS: Sanity check mount flags passed to change_mnt_propagation() Sanity check the flags passed to change_mnt_propagation(). Exactly one flag should be set. Return EINVAL otherwise. Userspace can pass in arbitrary combinations of MS_* flags to mount(). do_change_type() is called if any of MS_SHARED, MS_PRIVATE, MS_SLAVE, or MS_UNBINDABLE is set. do_change_type() clears MS_REC and then calls change_mnt_propagation() with the rest of the user-supplied flags. change_mnt_propagation() clearly assumes only one flag is set but do_change_type() does not check that this is true. For example, mount() with flags MS_SHARED | MS_RDONLY does not actually make the mount shared or read-only but does clear MNT_UNBINDABLE. Signed-off-by: Valerie Aurora Signed-off-by: Linus Torvalds --- fs/namespace.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index de402eb6eafb..a72eaabfe8f2 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1483,6 +1483,23 @@ out_unlock: return err; } +/* + * Sanity check the flags to change_mnt_propagation. + */ + +static int flags_to_propagation_type(int flags) +{ + int type = flags & ~MS_REC; + + /* Fail if any non-propagation flags are set */ + if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) + return 0; + /* Only one propagation flag should be set */ + if (!is_power_of_2(type)) + return 0; + return type; +} + /* * recursively change the type of the mountpoint. */ @@ -1490,7 +1507,7 @@ static int do_change_type(struct path *path, int flag) { struct vfsmount *m, *mnt = path->mnt; int recurse = flag & MS_REC; - int type = flag & ~MS_REC; + int type; int err = 0; if (!capable(CAP_SYS_ADMIN)) @@ -1499,6 +1516,10 @@ static int do_change_type(struct path *path, int flag) if (path->dentry != path->mnt->mnt_root) return -EINVAL; + type = flags_to_propagation_type(flag); + if (!type) + return -EINVAL; + down_write(&namespace_sem); if (type == MS_SHARED) { err = invent_group_ids(mnt, recurse); -- cgit v1.2.2 From dc696aced9f09f05b1f927b93f5a7918017a3e49 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Thu, 12 Aug 2010 16:24:25 -0700 Subject: ocfs2: Fix metaecc error messages Like tools, the checksum validate function now prints the values in hex. Signed-off-by: Sunil Mushran Singed-off-by: Tao Ma --- fs/ocfs2/blockcheck.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index ec6d12339593..c7ee03c22226 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c @@ -439,7 +439,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, ocfs2_blockcheck_inc_failure(stats); mlog(ML_ERROR, - "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", + "CRC32 failed: stored: 0x%x, computed 0x%x. Applying ECC.\n", (unsigned int)check.bc_crc32e, (unsigned int)crc); /* Ok, try ECC fixups */ @@ -453,7 +453,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, goto out; } - mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", + mlog(ML_ERROR, "Fixed CRC32 failed: stored: 0x%x, computed 0x%x\n", (unsigned int)check.bc_crc32e, (unsigned int)crc); rc = -EIO; -- cgit v1.2.2 From f5ce5a08a40f2086435858ddc80cb40394b082eb Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Thu, 12 Aug 2010 16:24:26 -0700 Subject: ocfs2: Fix incorrect checksum validation error For local mounts, ocfs2_read_locked_inode() calls ocfs2_read_blocks_sync() to read the inode off the disk. The latter first checks to see if that block is cached in the journal, and, if so, returns that block. That is ok. But ocfs2_read_locked_inode() goes wrong when it tries to validate the checksum of such blocks. Blocks that are cached in the journal may not have had their checksum computed as yet. We should not validate the checksums of such blocks. Fixes ossbz#1282 http://oss.oracle.com/bugzilla/show_bug.cgi?id=1282 Signed-off-by: Sunil Mushran Cc: stable@kernel.org Singed-off-by: Tao Ma --- fs/ocfs2/inode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 0492464916b1..eece3e05d9d0 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -488,7 +488,11 @@ static int ocfs2_read_locked_inode(struct inode *inode, OCFS2_BH_IGNORE_CACHE); } else { status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); - if (!status) + /* + * If buffer is in jbd, then its checksum may not have been + * computed as yet. + */ + if (!status && !buffer_jbd(bh)) status = ocfs2_validate_inode_block(osb->sb, bh); } if (status < 0) { -- cgit v1.2.2 From f63afdb2c32db850fa1bfccf84643a8885cbeb61 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Sat, 17 Jul 2010 21:45:49 +0800 Subject: ocfs2: make __ocfs2_page_mkwrite handle file end properly. __ocfs2_page_mkwrite now is broken in handling file end. 1. the last page should be the page contains i_size - 1. 2. the len in the last page is also calculated wrong. So change them accordingly. Acked-by: Mark Fasheh Signed-off-by: Tao Ma --- fs/ocfs2/mmap.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index af2b8fe1f139..4c18f4ad93b4 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -74,9 +74,11 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, /* * Another node might have truncated while we were waiting on * cluster locks. + * We don't check size == 0 before the shift. This is borrowed + * from do_generic_file_read. */ - last_index = size >> PAGE_CACHE_SHIFT; - if (page->index > last_index) { + last_index = (size - 1) >> PAGE_CACHE_SHIFT; + if (unlikely(!size || page->index > last_index)) { ret = -EINVAL; goto out; } @@ -107,7 +109,7 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, * because the "write" would invalidate their data. */ if (page->index == last_index) - len = size & ~PAGE_CACHE_MASK; + len = ((size - 1) & ~PAGE_CACHE_MASK) + 1; ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page, &fsdata, di_bh, page); -- cgit v1.2.2 From 04eda1a18019bb387dc7e97ee99979dd88dc608a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 5 Aug 2010 20:32:45 +0200 Subject: ocfs2: Flush drive's caches on fdatasync When 'barrier' mount option is specified, we have to issue a cache flush during fdatasync(2). We have to do this even if inode doesn't have I_DIRTY_DATASYNC set because we still have to get written *data* to disk so that they are not lost in case of crash. Acked-by: Tao Ma Signed-off-by: Jan Kara Singed-off-by: Tao Ma --- fs/ocfs2/file.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 81296b4e3646..6b2be0f2eacd 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -36,6 +36,7 @@ #include #include #include +#include #define MLOG_MASK_PREFIX ML_INODE #include @@ -190,8 +191,16 @@ static int ocfs2_sync_file(struct file *file, int datasync) if (err) goto bail; - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { + /* + * We still have to flush drive's caches to get data to the + * platter + */ + if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) + blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, + NULL, BLKDEV_IFL_WAIT); goto bail; + } journal = osb->journal->j_journal; err = jbd2_journal_force_commit(journal); -- cgit v1.2.2 From 889f004a8c83d515f275078687f859bc0d5ede9d Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Thu, 2 Sep 2010 13:10:10 +0800 Subject: ocfs2: Use the right group in nfs sync check. We have added discontig block group now, and now an inode can be allocated in an discontig block group. So get it in ocfs2_get_suballoc_slot_bit. The old ocfs2_test_suballoc_bit gets group block no from the allocation inode which is wrong. Fix it by passing the right group. Acked-by: Mark Fasheh Signed-off-by: Tao Ma --- fs/ocfs2/suballoc.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index a8e6a95a353f..8a009ee1f7fd 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -2567,7 +2567,8 @@ out: * suballoc_bit. */ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, - u16 *suballoc_slot, u16 *suballoc_bit) + u16 *suballoc_slot, u64 *group_blkno, + u16 *suballoc_bit) { int status; struct buffer_head *inode_bh = NULL; @@ -2604,6 +2605,8 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot); if (suballoc_bit) *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit); + if (group_blkno) + *group_blkno = le64_to_cpu(inode_fe->i_suballoc_loc); bail: brelse(inode_bh); @@ -2621,7 +2624,8 @@ bail: */ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, struct inode *suballoc, - struct buffer_head *alloc_bh, u64 blkno, + struct buffer_head *alloc_bh, + u64 group_blkno, u64 blkno, u16 bit, int *res) { struct ocfs2_dinode *alloc_di; @@ -2642,10 +2646,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, goto bail; } - if (alloc_di->i_suballoc_loc) - bg_blkno = le64_to_cpu(alloc_di->i_suballoc_loc); - else - bg_blkno = ocfs2_which_suballoc_group(blkno, bit); + bg_blkno = group_blkno ? group_blkno : + ocfs2_which_suballoc_group(blkno, bit); status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno, &group_bh); if (status < 0) { @@ -2680,6 +2682,7 @@ bail: int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) { int status; + u64 group_blkno = 0; u16 suballoc_bit = 0, suballoc_slot = 0; struct inode *inode_alloc_inode; struct buffer_head *alloc_bh = NULL; @@ -2687,7 +2690,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) mlog_entry("blkno: %llu", (unsigned long long)blkno); status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, - &suballoc_bit); + &group_blkno, &suballoc_bit); if (status < 0) { mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status); goto bail; @@ -2715,7 +2718,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) } status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh, - blkno, suballoc_bit, res); + group_blkno, blkno, suballoc_bit, res); if (status < 0) mlog(ML_ERROR, "test suballoc bit failed %d\n", status); -- cgit v1.2.2 From b2b6ebf5f740e015b2155343958f067e594323ea Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Thu, 26 Aug 2010 13:06:50 -0700 Subject: ocfs2: properly set and use inode group alloc hint We were setting ac->ac_last_group in ocfs2_claim_suballoc_bits from res->sr_bg_blkno. Unfortunately, res->sr_bg_blkno is going to be zero under normal (non-fragmented) circumstances. The discontig block group patches effectively turned off that feature. Fix this by correctly calculating what the next group hint should be. Acked-by: Tao Ma Signed-off-by: Mark Fasheh Tested-by: Goldwyn Rodrigues Signed-off-by: Tao Ma --- fs/ocfs2/suballoc.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 8a009ee1f7fd..b93d7e72175a 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -62,6 +62,17 @@ struct ocfs2_suballoc_result { unsigned int sr_bits; /* How many bits we claimed */ }; +static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res) +{ + if (res->sr_blkno == 0) + return 0; + + if (res->sr_bg_blkno) + return res->sr_bg_blkno; + + return ocfs2_which_suballoc_group(res->sr_blkno, res->sr_bit_offset); +} + static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); @@ -1845,6 +1856,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, int status; u16 victim, i; u16 bits_left = 0; + u64 hint = ac->ac_last_group; struct ocfs2_chain_list *cl; struct ocfs2_dinode *fe; @@ -1872,7 +1884,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, goto bail; } - res->sr_bg_blkno = ac->ac_last_group; + res->sr_bg_blkno = hint; if (res->sr_bg_blkno) { /* Attempt to short-circuit the usual search mechanism * by jumping straight to the most recently used @@ -1896,8 +1908,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, res, &bits_left); - if (!status) + if (!status) { + hint = ocfs2_group_from_res(res); goto set_hint; + } if (status < 0 && status != -ENOSPC) { mlog_errno(status); goto bail; @@ -1920,8 +1934,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, ac->ac_chain = i; status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, res, &bits_left); - if (!status) + if (!status) { + hint = ocfs2_group_from_res(res); break; + } if (status < 0 && status != -ENOSPC) { mlog_errno(status); goto bail; @@ -1936,7 +1952,7 @@ set_hint: if (bits_left < min_bits) ac->ac_last_group = 0; else - ac->ac_last_group = res->sr_bg_blkno; + ac->ac_last_group = hint; } bail: -- cgit v1.2.2 From 9b4c0ff32ccd87ab52d4c5bd0a0536febce11370 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 24 Aug 2010 14:28:03 +0200 Subject: ocfs2: Fix deadlock when allocating page We cannot call grab_cache_page() when holding filesystem locks or with a transaction started as grab_cache_page() calls page allocation with GFP_KERNEL flag and thus page reclaim can recurse back into the filesystem causing deadlocks or various assertion failures. We have to use find_or_create_page() instead and pass it GFP_NOFS as we do with other allocations. Acked-by: Mark Fasheh Signed-off-by: Jan Kara Signed-off-by: Tao Ma --- fs/ocfs2/alloc.c | 2 +- fs/ocfs2/file.c | 2 +- fs/ocfs2/refcounttree.c | 5 +++-- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 215e12ce1d85..592fae5007d1 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -6672,7 +6672,7 @@ int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end, last_page_bytes = PAGE_ALIGN(end); index = start >> PAGE_CACHE_SHIFT; do { - pages[numpages] = grab_cache_page(mapping, index); + pages[numpages] = find_or_create_page(mapping, index, GFP_NOFS); if (!pages[numpages]) { ret = -ENOMEM; mlog_errno(ret); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 6b2be0f2eacd..2caa3a7a1a39 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -783,7 +783,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); BUG_ON(abs_from & (inode->i_blkbits - 1)); - page = grab_cache_page(mapping, index); + page = find_or_create_page(mapping, index, GFP_NOFS); if (!page) { ret = -ENOMEM; mlog_errno(ret); diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 73a11ccfd4c2..0afeda83120f 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -2960,7 +2960,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, if (map_end & (PAGE_CACHE_SIZE - 1)) to = map_end & (PAGE_CACHE_SIZE - 1); - page = grab_cache_page(mapping, page_index); + page = find_or_create_page(mapping, page_index, GFP_NOFS); /* * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page @@ -3179,7 +3179,8 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb, if (map_end > end) map_end = end; - page = grab_cache_page(context->inode->i_mapping, page_index); + page = find_or_create_page(context->inode->i_mapping, + page_index, GFP_NOFS); BUG_ON(!page); wait_on_page_writeback(page); -- cgit v1.2.2 From 81c8c82b5a39f9127e8b239e9b406a6c3a41b228 Mon Sep 17 00:00:00 2001 From: Tristan Ye Date: Thu, 19 Aug 2010 15:15:00 +0800 Subject: Ocfs2: Fix a regression bug from mainline commit(6b933c8e6f1a2f3118082c455eef25f9b1ac7b45). The patch is to fix the regression bug brought from commit 6b933c8...( 'ocfs2: Avoid direct write if we fall back to buffered I/O'): http://oss.oracle.com/bugzilla/show_bug.cgi?id=1285 The commit 6b933c8e6f1a2f3118082c455eef25f9b1ac7b45 changed __generic_file_aio_write to generic_file_buffered_write, which didn't call filemap_{write,wait}_range to flush the pagecaches when we were falling O_DIRECT writes back to buffered ones. it did hurt the O_DIRECT semantics somehow in extented odirect writes. This patch tries to guarantee O_DIRECT writes of 'fall back to buffered' to be correctly flushed. Signed-off-by: Tristan Ye Signed-off-by: Tao Ma --- fs/ocfs2/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 2caa3a7a1a39..9a03c151b5ce 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2338,7 +2338,7 @@ out_dio: BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || - ((file->f_flags & O_DIRECT) && has_refcount)) { + ((file->f_flags & O_DIRECT) && !direct_io)) { ret = filemap_fdatawrite_range(file->f_mapping, pos, pos + count - 1); if (ret < 0) -- cgit v1.2.2 From 021960cab320ae3cc4e9aba9cca42f9f5ce785f3 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Fri, 13 Aug 2010 15:15:15 -0700 Subject: ocfs2: split out inode alloc code from ocfs2_mknod_locked Do this by splitting the bulk of the function away from the inode allocation code at the very tom of ocfs2_mknod_locked(). Existing callers don't need to change and won't see any difference. The new function created, __ocfs2_mknod_locked() will be used shortly. Signed-off-by: Mark Fasheh Signed-off-by: Tao Ma --- fs/ocfs2/namei.c | 55 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index f171b51a74f7..2aa66b695fab 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -472,32 +472,23 @@ leave: return status; } -static int ocfs2_mknod_locked(struct ocfs2_super *osb, - struct inode *dir, - struct inode *inode, - dev_t dev, - struct buffer_head **new_fe_bh, - struct buffer_head *parent_fe_bh, - handle_t *handle, - struct ocfs2_alloc_context *inode_ac) +static int __ocfs2_mknod_locked(struct inode *dir, + struct inode *inode, + dev_t dev, + struct buffer_head **new_fe_bh, + struct buffer_head *parent_fe_bh, + handle_t *handle, + struct ocfs2_alloc_context *inode_ac, + u64 fe_blkno, u64 suballoc_loc, u16 suballoc_bit) { int status = 0; + struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); struct ocfs2_dinode *fe = NULL; struct ocfs2_extent_list *fel; - u64 suballoc_loc, fe_blkno = 0; - u16 suballoc_bit; u16 feat; *new_fe_bh = NULL; - status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh, - inode_ac, &suballoc_loc, - &suballoc_bit, &fe_blkno); - if (status < 0) { - mlog_errno(status); - goto leave; - } - /* populate as many fields early on as possible - many of * these are used by the support functions here and in * callers. */ @@ -591,6 +582,34 @@ leave: return status; } +static int ocfs2_mknod_locked(struct ocfs2_super *osb, + struct inode *dir, + struct inode *inode, + dev_t dev, + struct buffer_head **new_fe_bh, + struct buffer_head *parent_fe_bh, + handle_t *handle, + struct ocfs2_alloc_context *inode_ac) +{ + int status = 0; + u64 suballoc_loc, fe_blkno = 0; + u16 suballoc_bit; + + *new_fe_bh = NULL; + + status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh, + inode_ac, &suballoc_loc, + &suballoc_bit, &fe_blkno); + if (status < 0) { + mlog_errno(status); + return status; + } + + return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh, + parent_fe_bh, handle, inode_ac, + fe_blkno, suballoc_loc, suballoc_bit); +} + static int ocfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) -- cgit v1.2.2 From d51349829c378c06ba4aa7d4b16ca23739858608 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Fri, 13 Aug 2010 15:15:16 -0700 Subject: ocfs2: use ocfs2_alloc_dinode_update_counts() instead of open coding ocfs2_search_chain() makes the same updates as ocfs2_alloc_dinode_update_counts to the alloc inode. Instead of open coding the bitmap update, use our helper function. Signed-off-by: Mark Fasheh Signed-off-by: Tao Ma --- fs/ocfs2/suballoc.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index b93d7e72175a..e7edda8c6a11 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1719,7 +1719,6 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, { int status; u16 chain; - u32 tmp_used; u64 next_group; struct inode *alloc_inode = ac->ac_inode; struct buffer_head *group_bh = NULL; @@ -1807,22 +1806,14 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, } } - /* Ok, claim our bits now: set the info on dinode, chainlist - * and then the group */ - status = ocfs2_journal_access_di(handle, - INODE_CACHE(alloc_inode), - ac->ac_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (status < 0) { + status = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, + ac->ac_bh, res->sr_bits, + chain); + if (status) { mlog_errno(status); goto bail; } - tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); - fe->id1.bitmap1.i_used = cpu_to_le32(res->sr_bits + tmp_used); - le32_add_cpu(&cl->cl_recs[chain].c_free, -res->sr_bits); - ocfs2_journal_dirty(handle, ac->ac_bh); - status = ocfs2_block_group_set_bits(handle, alloc_inode, bg, -- cgit v1.2.2 From e49e27674d1dd2717ad90b21ece8f83102153315 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Fri, 13 Aug 2010 15:15:17 -0700 Subject: ocfs2: allow return of new inode block location before allocation of the inode This allows code which needs to know the eventual block number of an inode but can't allocate it yet due to transaction or lock ordering. For example, ocfs2_create_inode_in_orphan() currently gives a junk blkno for preparation of the orphan dir because it can't yet know where the actual inode is placed - that code is actually in ocfs2_mknod_locked. This is a problem when the orphan dirs are indexed as the junk inode number will create an index entry which goes unused (and fails the later removal from the orphan dir). Now with these interfaces, ocfs2_create_inode_in_orphan() can run the block group search (and get back the inode block number) *before* any actual allocation occurs. Signed-off-by: Mark Fasheh Signed-off-by: Tao Ma --- fs/ocfs2/suballoc.c | 159 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ocfs2/suballoc.h | 21 +++++++ 2 files changed, 180 insertions(+) (limited to 'fs') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index e7edda8c6a11..8a286f54dca1 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -57,6 +57,12 @@ struct ocfs2_suballoc_result { u64 sr_bg_blkno; /* The bg we allocated from. Set to 0 when a block group is contiguous. */ + u64 sr_bg_stable_blkno; /* + * Doesn't change, always + * set to target block + * group descriptor + * block. + */ u64 sr_blkno; /* The first allocated block */ unsigned int sr_bit_offset; /* The bit in the bg */ unsigned int sr_bits; /* How many bits we claimed */ @@ -149,6 +155,10 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) brelse(ac->ac_bh); ac->ac_bh = NULL; ac->ac_resv = NULL; + if (ac->ac_find_loc_priv) { + kfree(ac->ac_find_loc_priv); + ac->ac_find_loc_priv = NULL; + } } void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) @@ -1689,6 +1699,15 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, if (!ret) ocfs2_bg_discontig_fix_result(ac, gd, res); + /* + * sr_bg_blkno might have been changed by + * ocfs2_bg_discontig_fix_result + */ + res->sr_bg_stable_blkno = group_bh->b_blocknr; + + if (ac->ac_find_loc_only) + goto out_loc_only; + ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, res->sr_bits, le16_to_cpu(gd->bg_chain)); @@ -1702,6 +1721,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, if (ret < 0) mlog_errno(ret); +out_loc_only: *bits_left = le16_to_cpu(gd->bg_free_bits_count); out: @@ -1780,6 +1800,11 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, if (!status) ocfs2_bg_discontig_fix_result(ac, bg, res); + /* + * sr_bg_blkno might have been changed by + * ocfs2_bg_discontig_fix_result + */ + res->sr_bg_stable_blkno = group_bh->b_blocknr; /* * Keep track of previous block descriptor read. When @@ -1806,6 +1831,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, } } + if (ac->ac_find_loc_only) + goto out_loc_only; + status = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, res->sr_bits, chain); @@ -1828,6 +1856,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, (unsigned long long)le64_to_cpu(fe->i_blkno)); +out_loc_only: *bits_left = le16_to_cpu(bg->bg_free_bits_count); bail: brelse(group_bh); @@ -2023,6 +2052,136 @@ static inline void ocfs2_save_inode_ac_group(struct inode *dir, OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot; } +int ocfs2_find_new_inode_loc(struct inode *dir, + struct buffer_head *parent_fe_bh, + struct ocfs2_alloc_context *ac, + u64 *fe_blkno) +{ + int ret; + handle_t *handle = NULL; + struct ocfs2_suballoc_result *res; + + BUG_ON(!ac); + BUG_ON(ac->ac_bits_given != 0); + BUG_ON(ac->ac_bits_wanted != 1); + BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE); + + res = kzalloc(sizeof(*res), GFP_NOFS); + if (res == NULL) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac); + + /* + * The handle started here is for chain relink. Alternatively, + * we could just disable relink for these calls. + */ + handle = ocfs2_start_trans(OCFS2_SB(dir->i_sb), OCFS2_SUBALLOC_ALLOC); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + handle = NULL; + mlog_errno(ret); + goto out; + } + + /* + * This will instruct ocfs2_claim_suballoc_bits and + * ocfs2_search_one_group to search but save actual allocation + * for later. + */ + ac->ac_find_loc_only = 1; + + ret = ocfs2_claim_suballoc_bits(ac, handle, 1, 1, res); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + ac->ac_find_loc_priv = res; + *fe_blkno = res->sr_blkno; + +out: + if (handle) + ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle); + + if (ret) + kfree(res); + + return ret; +} + +int ocfs2_claim_new_inode_at_loc(handle_t *handle, + struct inode *dir, + struct ocfs2_alloc_context *ac, + u64 *suballoc_loc, + u16 *suballoc_bit, + u64 di_blkno) +{ + int ret; + u16 chain; + struct ocfs2_suballoc_result *res = ac->ac_find_loc_priv; + struct buffer_head *bg_bh = NULL; + struct ocfs2_group_desc *bg; + struct ocfs2_dinode *di = (struct ocfs2_dinode *) ac->ac_bh->b_data; + + /* + * Since di_blkno is being passed back in, we check for any + * inconsistencies which may have happened between + * calls. These are code bugs as di_blkno is not expected to + * change once returned from ocfs2_find_new_inode_loc() + */ + BUG_ON(res->sr_blkno != di_blkno); + + ret = ocfs2_read_group_descriptor(ac->ac_inode, di, + res->sr_bg_stable_blkno, &bg_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + + bg = (struct ocfs2_group_desc *) bg_bh->b_data; + chain = le16_to_cpu(bg->bg_chain); + + ret = ocfs2_alloc_dinode_update_counts(ac->ac_inode, handle, + ac->ac_bh, res->sr_bits, + chain); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_block_group_set_bits(handle, + ac->ac_inode, + bg, + bg_bh, + res->sr_bit_offset, + res->sr_bits); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, + (unsigned long long)di_blkno); + + atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs); + + BUG_ON(res->sr_bits != 1); + + *suballoc_loc = res->sr_bg_blkno; + *suballoc_bit = res->sr_bit_offset; + ac->ac_bits_given++; + ocfs2_save_inode_ac_group(dir, ac); + +out: + brelse(bg_bh); + + return ret; +} + int ocfs2_claim_new_inode(handle_t *handle, struct inode *dir, struct buffer_head *parent_fe_bh, diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index a017dd3ee7d9..b8afabfeede4 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -56,6 +56,9 @@ struct ocfs2_alloc_context { u64 ac_max_block; /* Highest block number to allocate. 0 is is the same as ~0 - unlimited */ + int ac_find_loc_only; /* hack for reflink operation ordering */ + struct ocfs2_suballoc_result *ac_find_loc_priv; /* */ + struct ocfs2_alloc_reservation *ac_resv; }; @@ -197,4 +200,22 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et, struct ocfs2_alloc_context **meta_ac); int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res); + + + +/* + * The following two interfaces are for ocfs2_create_inode_in_orphan(). + */ +int ocfs2_find_new_inode_loc(struct inode *dir, + struct buffer_head *parent_fe_bh, + struct ocfs2_alloc_context *ac, + u64 *fe_blkno); + +int ocfs2_claim_new_inode_at_loc(handle_t *handle, + struct inode *dir, + struct ocfs2_alloc_context *ac, + u64 *suballoc_loc, + u16 *suballoc_bit, + u64 di_blkno); + #endif /* _CHAINALLOC_H_ */ -- cgit v1.2.2 From dd43bcde23c527f64897eef41aa1fed2c9905ea9 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Fri, 13 Aug 2010 15:15:18 -0700 Subject: ocfs2: split out ocfs2_prepare_orphan_dir() into locking and prep functions We do this because ocfs2_create_inode_in_orphan() wants to order locking of the orphan dir with respect to locking of the inode allocator *before* making any changes to the directory. Signed-off-by: Mark Fasheh Signed-off-by: Tao Ma --- fs/ocfs2/namei.c | 120 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 88 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 2aa66b695fab..54c629855357 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -1871,61 +1871,117 @@ bail: return status; } -static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, - struct inode **ret_orphan_dir, - u64 blkno, - char *name, - struct ocfs2_dir_lookup_result *lookup) +static int ocfs2_lookup_lock_orphan_dir(struct ocfs2_super *osb, + struct inode **ret_orphan_dir, + struct buffer_head **ret_orphan_dir_bh) { struct inode *orphan_dir_inode; struct buffer_head *orphan_dir_bh = NULL; - int status = 0; - - status = ocfs2_blkno_stringify(blkno, name); - if (status < 0) { - mlog_errno(status); - return status; - } + int ret = 0; orphan_dir_inode = ocfs2_get_system_file_inode(osb, ORPHAN_DIR_SYSTEM_INODE, osb->slot_num); if (!orphan_dir_inode) { - status = -ENOENT; - mlog_errno(status); - return status; + ret = -ENOENT; + mlog_errno(ret); + return ret; } mutex_lock(&orphan_dir_inode->i_mutex); - status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); - if (status < 0) { - mlog_errno(status); - goto leave; + ret = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); + if (ret < 0) { + mutex_unlock(&orphan_dir_inode->i_mutex); + iput(orphan_dir_inode); + + mlog_errno(ret); + return ret; } - status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode, - orphan_dir_bh, name, - OCFS2_ORPHAN_NAMELEN, lookup); - if (status < 0) { - ocfs2_inode_unlock(orphan_dir_inode, 1); + *ret_orphan_dir = orphan_dir_inode; + *ret_orphan_dir_bh = orphan_dir_bh; - mlog_errno(status); - goto leave; + return 0; +} + +static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode, + struct buffer_head *orphan_dir_bh, + u64 blkno, + char *name, + struct ocfs2_dir_lookup_result *lookup) +{ + int ret; + struct ocfs2_super *osb = OCFS2_SB(orphan_dir_inode->i_sb); + + ret = ocfs2_blkno_stringify(blkno, name); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + ret = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode, + orphan_dir_bh, name, + OCFS2_ORPHAN_NAMELEN, lookup); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + return 0; +} + +/** + * ocfs2_prepare_orphan_dir() - Prepare an orphan directory for + * insertion of an orphan. + * @osb: ocfs2 file system + * @ret_orphan_dir: Orphan dir inode - returned locked! + * @blkno: Actual block number of the inode to be inserted into orphan dir. + * @lookup: dir lookup result, to be passed back into functions like + * ocfs2_orphan_add + * + * Returns zero on success and the ret_orphan_dir, name and lookup + * fields will be populated. + * + * Returns non-zero on failure. + */ +static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, + struct inode **ret_orphan_dir, + u64 blkno, + char *name, + struct ocfs2_dir_lookup_result *lookup) +{ + struct inode *orphan_dir_inode = NULL; + struct buffer_head *orphan_dir_bh = NULL; + int ret = 0; + + ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir_inode, + &orphan_dir_bh); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + ret = __ocfs2_prepare_orphan_dir(orphan_dir_inode, orphan_dir_bh, + blkno, name, lookup); + if (ret < 0) { + mlog_errno(ret); + goto out; } *ret_orphan_dir = orphan_dir_inode; -leave: - if (status) { +out: + brelse(orphan_dir_bh); + + if (ret) { + ocfs2_inode_unlock(orphan_dir_inode, 1); mutex_unlock(&orphan_dir_inode->i_mutex); iput(orphan_dir_inode); } - brelse(orphan_dir_bh); - - mlog_exit(status); - return status; + mlog_exit(ret); + return ret; } static int ocfs2_orphan_add(struct ocfs2_super *osb, -- cgit v1.2.2 From 97b8f4a9dfd932997677136e11980eb2fafea91d Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Fri, 13 Aug 2010 15:15:19 -0700 Subject: ocfs2: Fix orphan add in ocfs2_create_inode_in_orphan ocfs2_create_inode_in_orphan() is used by reflink to create the newly reflinked inode simultaneously in the orphan dir. This allows us to easily handle partially-reflinked files during recovery cleanup. We have a problem though - the orphan dir stringifies inode # to determine a unique name under which the orphan entry dirent can be created. Since ocfs2_create_inode_in_orphan() needs the space allocated in the orphan dir before it can allocate the inode, we currently call into the orphan code: /* * We give the orphan dir the root blkno to fake an orphan name, * and allocate enough space for our insertion. */ status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, osb->root_blkno, orphan_name, &orphan_insert); Using osb->root_blkno might work fine on unindexed directories, but the orphan dir can have an index. When it has that index, the above code fails to allocate the proper index entry. Later, when we try to remove the file from the orphan dir (using the actual inode #), the reflink operation will fail. To fix this, I created a function ocfs2_alloc_orphaned_file() which uses the newly split out orphan and inode alloc code to figure out what the inode block number will be (once allocated) and then prepare the orphan dir from that data. Signed-off-by: Mark Fasheh Signed-off-by: Tao Ma --- fs/ocfs2/namei.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 107 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 54c629855357..a00dda2e4f16 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -2128,6 +2128,99 @@ leave: return status; } +/** + * ocfs2_prep_new_orphaned_file() - Prepare the orphan dir to recieve a newly + * allocated file. This is different from the typical 'add to orphan dir' + * operation in that the inode does not yet exist. This is a problem because + * the orphan dir stringifies the inode block number to come up with it's + * dirent. Obviously if the inode does not yet exist we have a chicken and egg + * problem. This function works around it by calling deeper into the orphan + * and suballoc code than other callers. Use this only by necessity. + * @dir: The directory which this inode will ultimately wind up under - not the + * orphan dir! + * @dir_bh: buffer_head the @dir inode block + * @orphan_name: string of length (CFS2_ORPHAN_NAMELEN + 1). Will be filled + * with the string to be used for orphan dirent. Pass back to the orphan dir + * code. + * @ret_orphan_dir: orphan dir inode returned to be passed back into orphan + * dir code. + * @ret_di_blkno: block number where the new inode will be allocated. + * @orphan_insert: Dir insert context to be passed back into orphan dir code. + * @ret_inode_ac: Inode alloc context to be passed back to the allocator. + * + * Returns zero on success and the ret_orphan_dir, name and lookup + * fields will be populated. + * + * Returns non-zero on failure. + */ +static int ocfs2_prep_new_orphaned_file(struct inode *dir, + struct buffer_head *dir_bh, + char *orphan_name, + struct inode **ret_orphan_dir, + u64 *ret_di_blkno, + struct ocfs2_dir_lookup_result *orphan_insert, + struct ocfs2_alloc_context **ret_inode_ac) +{ + int ret; + u64 di_blkno; + struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); + struct inode *orphan_dir = NULL; + struct buffer_head *orphan_dir_bh = NULL; + struct ocfs2_alloc_context *inode_ac = NULL; + + ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir, &orphan_dir_bh); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + /* reserve an inode spot */ + ret = ocfs2_reserve_new_inode(osb, &inode_ac); + if (ret < 0) { + if (ret != -ENOSPC) + mlog_errno(ret); + goto out; + } + + ret = ocfs2_find_new_inode_loc(dir, dir_bh, inode_ac, + &di_blkno); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = __ocfs2_prepare_orphan_dir(orphan_dir, orphan_dir_bh, + di_blkno, orphan_name, orphan_insert); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + +out: + if (ret == 0) { + *ret_orphan_dir = orphan_dir; + *ret_di_blkno = di_blkno; + *ret_inode_ac = inode_ac; + /* + * orphan_name and orphan_insert are already up to + * date via prepare_orphan_dir + */ + } else { + /* Unroll reserve_new_inode* */ + if (inode_ac) + ocfs2_free_alloc_context(inode_ac); + + /* Unroll orphan dir locking */ + mutex_unlock(&orphan_dir->i_mutex); + ocfs2_inode_unlock(orphan_dir, 1); + iput(orphan_dir); + } + + brelse(orphan_dir_bh); + + return 0; +} + int ocfs2_create_inode_in_orphan(struct inode *dir, int mode, struct inode **new_inode) @@ -2143,6 +2236,8 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, struct buffer_head *new_di_bh = NULL; struct ocfs2_alloc_context *inode_ac = NULL; struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; + u64 uninitialized_var(di_blkno), suballoc_loc; + u16 suballoc_bit; status = ocfs2_inode_lock(dir, &parent_di_bh, 1); if (status < 0) { @@ -2151,20 +2246,9 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, return status; } - /* - * We give the orphan dir the root blkno to fake an orphan name, - * and allocate enough space for our insertion. - */ - status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, - osb->root_blkno, - orphan_name, &orphan_insert); - if (status < 0) { - mlog_errno(status); - goto leave; - } - - /* reserve an inode spot */ - status = ocfs2_reserve_new_inode(osb, &inode_ac); + status = ocfs2_prep_new_orphaned_file(dir, parent_di_bh, + orphan_name, &orphan_dir, + &di_blkno, &orphan_insert, &inode_ac); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); @@ -2191,17 +2275,20 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, goto leave; did_quota_inode = 1; - inode->i_nlink = 0; - /* do the real work now. */ - status = ocfs2_mknod_locked(osb, dir, inode, - 0, &new_di_bh, parent_di_bh, handle, - inode_ac); + status = ocfs2_claim_new_inode_at_loc(handle, dir, inode_ac, + &suballoc_loc, + &suballoc_bit, di_blkno); if (status < 0) { mlog_errno(status); goto leave; } - status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, orphan_name); + inode->i_nlink = 0; + /* do the real work now. */ + status = __ocfs2_mknod_locked(dir, inode, + 0, &new_di_bh, parent_di_bh, handle, + inode_ac, di_blkno, suballoc_loc, + suballoc_bit); if (status < 0) { mlog_errno(status); goto leave; -- cgit v1.2.2 From 7100ae97266e387d25d0c8a5d9934931f0b07dbc Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 8 Sep 2010 20:54:49 +0000 Subject: Revert "[CIFS] Eliminate unused variable warning" The change to kernel crypto and fixes to ntlvm2 and ntlmssp series, introduced a regression. Deferring this patch series to 2.6.37 after Shirish fixes it. This reverts commit c89e5198b26a869ce2842bad8519264f3394dee9. Signed-off-by: Steve French Acked-by: Jeff Layton CC: Shirish Pargaonkar --- fs/cifs/sess.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 795095f4eac6..4788e16a02cc 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -620,6 +620,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, struct key *spnego_key = NULL; __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ bool first_time; + char *ntlmsspblob; if (ses == NULL) return -EINVAL; @@ -867,8 +868,6 @@ ssetup_ntlmssp_authenticate: iov[1].iov_base = &pSMB->req.SecurityBlob[0]; } else if (phase == NtLmAuthenticate) { int blob_len; - char *ntlmsspblob; - ntlmsspblob = kmalloc(5 * sizeof(struct _AUTHENTICATE_MESSAGE), GFP_KERNEL); -- cgit v1.2.2 From 56234e2767496c125a858f880f1b3a62e04a3406 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 8 Sep 2010 20:57:05 +0000 Subject: Revert "Eliminate sparse warning - bad constant expression" This reverts commit 2d20ca835867d93ead6ce61780d883a4b128106d. The change to kernel crypto and fixes to ntlvm2 and ntlmssp series, introduced a regression. Deferring this patch series to 2.6.37 after Shirish fixes it. Signed-off-by: Steve French Acked-by: Jeff Layton CC: Shirish Pargaonkar --- fs/cifs/cifsencrypt.c | 193 +++++++++++++++++++------------------------------- fs/cifs/cifsglob.h | 7 -- 2 files changed, 72 insertions(+), 128 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 709f2296bdb4..eef78c24e0cc 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -45,38 +45,39 @@ extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8, static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, char *signature) { - int rc; + int rc = 0; + struct { + struct shash_desc shash; + char ctx[crypto_shash_descsize(server->ntlmssp.md5)]; + } sdesc; if (cifs_pdu == NULL || server == NULL || signature == NULL) return -EINVAL; - if (!server->ntlmssp.sdescmd5) { - cERROR(1, - "cifs_calculate_signature: can't generate signature\n"); - return -1; - } + sdesc.shash.tfm = server->ntlmssp.md5; + sdesc.shash.flags = 0x0; - rc = crypto_shash_init(&server->ntlmssp.sdescmd5->shash); + rc = crypto_shash_init(&sdesc.shash); if (rc) { - cERROR(1, "cifs_calculate_signature: oould not init md5\n"); + cERROR(1, "could not initialize master crypto API hmacmd5\n"); return rc; } if (server->secType == RawNTLMSSP) - crypto_shash_update(&server->ntlmssp.sdescmd5->shash, + crypto_shash_update(&sdesc.shash, server->session_key.data.ntlmv2.key, CIFS_NTLMV2_SESSKEY_SIZE); else - crypto_shash_update(&server->ntlmssp.sdescmd5->shash, + crypto_shash_update(&sdesc.shash, (char *)&server->session_key.data, server->session_key.len); - crypto_shash_update(&server->ntlmssp.sdescmd5->shash, + crypto_shash_update(&sdesc.shash, cifs_pdu->Protocol, cifs_pdu->smb_buf_length); - rc = crypto_shash_final(&server->ntlmssp.sdescmd5->shash, signature); + rc = crypto_shash_final(&sdesc.shash, signature); - return rc; + return 0; } @@ -114,28 +115,30 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec, struct TCP_Server_Info *server, char *signature) { int i; - int rc; + int rc = 0; + struct { + struct shash_desc shash; + char ctx[crypto_shash_descsize(server->ntlmssp.md5)]; + } sdesc; if (iov == NULL || server == NULL || signature == NULL) return -EINVAL; - if (!server->ntlmssp.sdescmd5) { - cERROR(1, "cifs_calc_signature2: can't generate signature\n"); - return -1; - } + sdesc.shash.tfm = server->ntlmssp.md5; + sdesc.shash.flags = 0x0; - rc = crypto_shash_init(&server->ntlmssp.sdescmd5->shash); + rc = crypto_shash_init(&sdesc.shash); if (rc) { - cERROR(1, "cifs_calc_signature2: oould not init md5\n"); + cERROR(1, "could not initialize master crypto API hmacmd5\n"); return rc; } if (server->secType == RawNTLMSSP) - crypto_shash_update(&server->ntlmssp.sdescmd5->shash, + crypto_shash_update(&sdesc.shash, server->session_key.data.ntlmv2.key, CIFS_NTLMV2_SESSKEY_SIZE); else - crypto_shash_update(&server->ntlmssp.sdescmd5->shash, + crypto_shash_update(&sdesc.shash, (char *)&server->session_key.data, server->session_key.len); @@ -143,7 +146,7 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec, if (iov[i].iov_len == 0) continue; if (iov[i].iov_base == NULL) { - cERROR(1, "cifs_calc_signature2: null iovec entry"); + cERROR(1, "null iovec entry"); return -EIO; } /* The first entry includes a length field (which does not get @@ -151,16 +154,16 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec, if (i == 0) { if (iov[0].iov_len <= 8) /* cmd field at offset 9 */ break; /* nothing to sign or corrupt header */ - crypto_shash_update(&server->ntlmssp.sdescmd5->shash, + crypto_shash_update(&sdesc.shash, iov[i].iov_base + 4, iov[i].iov_len - 4); } else - crypto_shash_update(&server->ntlmssp.sdescmd5->shash, + crypto_shash_update(&sdesc.shash, iov[i].iov_base, iov[i].iov_len); } - rc = crypto_shash_final(&server->ntlmssp.sdescmd5->shash, signature); + rc = crypto_shash_final(&sdesc.shash, signature); - return rc; + return 0; } int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, @@ -310,48 +313,43 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses, wchar_t *user; wchar_t *domain; wchar_t *server; - - if (!ses->server->ntlmssp.sdeschmacmd5) { - cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n"); - return -1; - } + struct { + struct shash_desc shash; + char ctx[crypto_shash_descsize(ses->server->ntlmssp.hmacmd5)]; + } sdesc; /* calculate md4 hash of password */ E_md4hash(ses->password, nt_hash); + sdesc.shash.tfm = ses->server->ntlmssp.hmacmd5; + sdesc.shash.flags = 0x0; + crypto_shash_setkey(ses->server->ntlmssp.hmacmd5, nt_hash, CIFS_NTHASH_SIZE); - rc = crypto_shash_init(&ses->server->ntlmssp.sdeschmacmd5->shash); + rc = crypto_shash_init(&sdesc.shash); if (rc) { - cERROR(1, "calc_ntlmv2_hash: could not init hmacmd5\n"); + cERROR(1, "could not initialize master crypto API hmacmd5\n"); return rc; } /* convert ses->userName to unicode and uppercase */ len = strlen(ses->userName); user = kmalloc(2 + (len * 2), GFP_KERNEL); - if (user == NULL) { - cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n"); - rc = -ENOMEM; + if (user == NULL) goto calc_exit_2; - } len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp); UniStrupr(user); - crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash, - (char *)user, 2 * len); + crypto_shash_update(&sdesc.shash, (char *)user, 2 * len); /* convert ses->domainName to unicode and uppercase */ if (ses->domainName) { len = strlen(ses->domainName); domain = kmalloc(2 + (len * 2), GFP_KERNEL); - if (domain == NULL) { - cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure"); - rc = -ENOMEM; + if (domain == NULL) goto calc_exit_1; - } len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len, nls_cp); /* the following line was removed since it didn't work well @@ -359,19 +357,15 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses, Maybe converting the domain name earlier makes sense */ /* UniStrupr(domain); */ - crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash, - (char *)domain, 2 * len); + crypto_shash_update(&sdesc.shash, (char *)domain, 2 * len); kfree(domain); } else if (ses->serverName) { len = strlen(ses->serverName); server = kmalloc(2 + (len * 2), GFP_KERNEL); - if (server == NULL) { - cERROR(1, "calc_ntlmv2_hash: server mem alloc failure"); - rc = -ENOMEM; + if (server == NULL) goto calc_exit_1; - } len = cifs_strtoUCS((__le16 *)server, ses->serverName, len, nls_cp); /* the following line was removed since it didn't work well @@ -379,20 +373,16 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses, Maybe converting the domain name earlier makes sense */ /* UniStrupr(domain); */ - crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash, - (char *)server, 2 * len); + crypto_shash_update(&sdesc.shash, (char *)server, 2 * len); kfree(server); } - - rc = crypto_shash_final(&ses->server->ntlmssp.sdeschmacmd5->shash, - ses->server->ntlmv2_hash); - calc_exit_1: kfree(user); calc_exit_2: /* BB FIXME what about bytes 24 through 40 of the signing key? compare with the NTLM example */ + rc = crypto_shash_final(&sdesc.shash, ses->server->ntlmv2_hash); return rc; } @@ -452,33 +442,34 @@ CalcNTLMv2_response(const struct TCP_Server_Info *server, char *v2_session_response) { int rc; + struct { + struct shash_desc shash; + char ctx[crypto_shash_descsize(server->ntlmssp.hmacmd5)]; + } sdesc; - if (!server->ntlmssp.sdeschmacmd5) { - cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n"); - return -1; - } + sdesc.shash.tfm = server->ntlmssp.hmacmd5; + sdesc.shash.flags = 0x0; crypto_shash_setkey(server->ntlmssp.hmacmd5, server->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); - rc = crypto_shash_init(&server->ntlmssp.sdeschmacmd5->shash); + rc = crypto_shash_init(&sdesc.shash); if (rc) { - cERROR(1, "CalcNTLMv2_response: could not init hmacmd5"); + cERROR(1, "could not initialize master crypto API hmacmd5\n"); return rc; } memcpy(v2_session_response + CIFS_SERVER_CHALLENGE_SIZE, server->cryptKey, CIFS_SERVER_CHALLENGE_SIZE); - crypto_shash_update(&server->ntlmssp.sdeschmacmd5->shash, + crypto_shash_update(&sdesc.shash, v2_session_response + CIFS_SERVER_CHALLENGE_SIZE, sizeof(struct ntlmv2_resp) - CIFS_SERVER_CHALLENGE_SIZE); if (server->tilen) - crypto_shash_update(&server->ntlmssp.sdeschmacmd5->shash, + crypto_shash_update(&sdesc.shash, server->tiblob, server->tilen); - rc = crypto_shash_final(&server->ntlmssp.sdeschmacmd5->shash, - v2_session_response); + rc = crypto_shash_final(&sdesc.shash, v2_session_response); return rc; } @@ -489,6 +480,10 @@ setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf, { int rc = 0; struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf; + struct { + struct shash_desc shash; + char ctx[crypto_shash_descsize(ses->server->ntlmssp.hmacmd5)]; + } sdesc; buf->blob_signature = cpu_to_le32(0x00000101); buf->reserved = 0; @@ -516,24 +511,21 @@ setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf, return rc; } - if (!ses->server->ntlmssp.sdeschmacmd5) { - cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n"); - return -1; - } - crypto_shash_setkey(ses->server->ntlmssp.hmacmd5, ses->server->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); - rc = crypto_shash_init(&ses->server->ntlmssp.sdeschmacmd5->shash); + sdesc.shash.tfm = ses->server->ntlmssp.hmacmd5; + sdesc.shash.flags = 0x0; + + rc = crypto_shash_init(&sdesc.shash); if (rc) { - cERROR(1, "setup_ntlmv2_rsp: could not init hmacmd5\n"); + cERROR(1, "could not initialize master crypto API hmacmd5\n"); return rc; } - crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash, - resp_buf, CIFS_HMAC_MD5_HASH_SIZE); + crypto_shash_update(&sdesc.shash, resp_buf, CIFS_HMAC_MD5_HASH_SIZE); - rc = crypto_shash_final(&ses->server->ntlmssp.sdeschmacmd5->shash, + rc = crypto_shash_final(&sdesc.shash, ses->server->session_key.data.ntlmv2.key); memcpy(&ses->server->session_key.data.ntlmv2.resp, resp_buf, @@ -586,65 +578,24 @@ cifs_crypto_shash_release(struct TCP_Server_Info *server) if (server->ntlmssp.hmacmd5) crypto_free_shash(server->ntlmssp.hmacmd5); - - kfree(server->ntlmssp.sdeschmacmd5); - - kfree(server->ntlmssp.sdescmd5); } int cifs_crypto_shash_allocate(struct TCP_Server_Info *server) { - int rc; - unsigned int size; - server->ntlmssp.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0); if (!server->ntlmssp.hmacmd5 || IS_ERR(server->ntlmssp.hmacmd5)) { - cERROR(1, "could not allocate crypto hmacmd5\n"); + cERROR(1, "could not allocate master crypto API hmacmd5\n"); return 1; } server->ntlmssp.md5 = crypto_alloc_shash("md5", 0, 0); if (!server->ntlmssp.md5 || IS_ERR(server->ntlmssp.md5)) { - cERROR(1, "could not allocate crypto md5\n"); - rc = 1; - goto cifs_crypto_shash_allocate_ret1; - } - - size = sizeof(struct shash_desc) + - crypto_shash_descsize(server->ntlmssp.hmacmd5); - server->ntlmssp.sdeschmacmd5 = kmalloc(size, GFP_KERNEL); - if (!server->ntlmssp.sdeschmacmd5) { - cERROR(1, "cifs_crypto_shash_allocate: can't alloc hmacmd5\n"); - rc = -ENOMEM; - goto cifs_crypto_shash_allocate_ret2; - } - server->ntlmssp.sdeschmacmd5->shash.tfm = server->ntlmssp.hmacmd5; - server->ntlmssp.sdeschmacmd5->shash.flags = 0x0; - - - size = sizeof(struct shash_desc) + - crypto_shash_descsize(server->ntlmssp.md5); - server->ntlmssp.sdescmd5 = kmalloc(size, GFP_KERNEL); - if (!server->ntlmssp.sdescmd5) { - cERROR(1, "cifs_crypto_shash_allocate: can't alloc md5\n"); - rc = -ENOMEM; - goto cifs_crypto_shash_allocate_ret3; + crypto_free_shash(server->ntlmssp.hmacmd5); + cERROR(1, "could not allocate master crypto API md5\n"); + return 1; } - server->ntlmssp.sdescmd5->shash.tfm = server->ntlmssp.md5; - server->ntlmssp.sdescmd5->shash.flags = 0x0; return 0; - -cifs_crypto_shash_allocate_ret3: - kfree(server->ntlmssp.sdeschmacmd5); - -cifs_crypto_shash_allocate_ret2: - crypto_free_shash(server->ntlmssp.md5); - -cifs_crypto_shash_allocate_ret1: - crypto_free_shash(server->ntlmssp.hmacmd5); - - return rc; } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index c9d0cfc086eb..49563e0c1725 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -123,19 +123,12 @@ struct cifs_cred { struct cifs_ace *aces; }; -struct sdesc { - struct shash_desc shash; - char ctx[]; -}; - struct ntlmssp_auth { __u32 client_flags; __u32 server_flags; unsigned char ciphertext[CIFS_CPHTXT_SIZE]; struct crypto_shash *hmacmd5; struct crypto_shash *md5; - struct sdesc *sdeschmacmd5; - struct sdesc *sdescmd5; }; /* -- cgit v1.2.2 From 745e507a9c79c6e1385d3414d5e56f3d4621a375 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 8 Sep 2010 21:09:27 +0000 Subject: Revert "missing changes during ntlmv2/ntlmssp auth and sign" This reverts commit 3ec6bbcdb4e85403f2c5958876ca9492afdf4031. The change to kernel crypto and fixes to ntlvm2 and ntlmssp series, introduced a regression. Deferring this patch series to 2.6.37 after Shirish fixes it. Signed-off-by: Steve French Acked-by: Jeff Layton CC: Shirish Pargaonkar --- fs/cifs/cifsencrypt.c | 2 -- fs/cifs/sess.c | 13 +++++-------- 2 files changed, 5 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index eef78c24e0cc..051d00011ca3 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -553,8 +553,6 @@ calc_seckey(struct TCP_Server_Info *server) return 1; } - desc.tfm = tfm_arc4; - crypto_blkcipher_setkey(tfm_arc4, server->session_key.data.ntlmv2.key, CIFS_CPHTXT_SIZE); sg_init_one(&sgin, sec_key, CIFS_CPHTXT_SIZE); diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 4788e16a02cc..41fc5328120d 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -408,8 +408,6 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, /* BB spec says that if AvId field of MsvAvTimestamp is populated then we must set the MIC field of the AUTHENTICATE_MESSAGE */ - ses->server->ntlmssp.server_flags = le32_to_cpu(pblob->NegotiateFlags); - tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset); tilen = cpu_to_le16(pblob->TargetInfoArray.Length); ses->server->tilen = tilen; @@ -442,13 +440,12 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, /* BB is NTLMV2 session security format easier to use here? */ flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | - NTLMSSP_NEGOTIATE_NTLM; + NTLMSSP_NEGOTIATE_NT_ONLY | NTLMSSP_NEGOTIATE_NTLM; if (ses->server->secMode & - (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { - flags |= NTLMSSP_NEGOTIATE_SIGN | - NTLMSSP_NEGOTIATE_KEY_XCH | - NTLMSSP_NEGOTIATE_EXTENDED_SEC; - } + (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + flags |= NTLMSSP_NEGOTIATE_SIGN; + if (ses->server->secMode & SECMODE_SIGN_REQUIRED) + flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN; sec_blob->NegotiateFlags |= cpu_to_le32(flags); -- cgit v1.2.2 From c8e56f1f4fb9f82f63e4ce6d73a14501d0432c76 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 8 Sep 2010 21:10:58 +0000 Subject: Revert "[CIFS] Fix ntlmv2 auth with ntlmssp" This reverts commit 9fbc590860e75785bdaf8b83e48fabfe4d4f7d58. The change to kernel crypto and fixes to ntlvm2 and ntlmssp series, introduced a regression. Deferring this patch series to 2.6.37 after Shirish fixes it. Signed-off-by: Steve French Acked-by: Jeff Layton CC: Shirish Pargaonkar --- fs/cifs/Kconfig | 2 - fs/cifs/asn1.c | 6 +- fs/cifs/cifsencrypt.c | 416 +++++++++++++++----------------------------------- fs/cifs/cifsglob.h | 18 +-- fs/cifs/cifspdu.h | 7 +- fs/cifs/cifsproto.h | 12 +- fs/cifs/cifssmb.c | 13 +- fs/cifs/connect.c | 13 +- fs/cifs/ntlmssp.h | 13 -- fs/cifs/sess.c | 118 ++++---------- fs/cifs/transport.c | 6 +- 11 files changed, 172 insertions(+), 452 deletions(-) (limited to 'fs') diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 0da1debd499d..917b7d449bb2 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -2,8 +2,6 @@ config CIFS tristate "CIFS support (advanced network filesystem, SMBFS successor)" depends on INET select NLS - select CRYPTO_MD5 - select CRYPTO_ARC4 help This is the client VFS module for the Common Internet File System (CIFS) protocol which is the successor to the Server Message Block diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index 21f0fbd86989..cfd1ce34e0bc 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -597,13 +597,13 @@ decode_negTokenInit(unsigned char *security_blob, int length, if (compare_oid(oid, oidlen, MSKRB5_OID, MSKRB5_OID_LEN)) server->sec_mskerberos = true; - if (compare_oid(oid, oidlen, KRB5U2U_OID, + else if (compare_oid(oid, oidlen, KRB5U2U_OID, KRB5U2U_OID_LEN)) server->sec_kerberosu2u = true; - if (compare_oid(oid, oidlen, KRB5_OID, + else if (compare_oid(oid, oidlen, KRB5_OID, KRB5_OID_LEN)) server->sec_kerberos = true; - if (compare_oid(oid, oidlen, NTLMSSP_OID, + else if (compare_oid(oid, oidlen, NTLMSSP_OID, NTLMSSP_OID_LEN)) server->sec_ntlmssp = true; diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 051d00011ca3..847628dfdc44 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -27,7 +27,6 @@ #include "md5.h" #include "cifs_unicode.h" #include "cifsproto.h" -#include "ntlmssp.h" #include #include @@ -43,44 +42,21 @@ extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24); static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, - struct TCP_Server_Info *server, char *signature) + const struct mac_key *key, char *signature) { - int rc = 0; - struct { - struct shash_desc shash; - char ctx[crypto_shash_descsize(server->ntlmssp.md5)]; - } sdesc; + struct MD5Context context; - if (cifs_pdu == NULL || server == NULL || signature == NULL) + if ((cifs_pdu == NULL) || (signature == NULL) || (key == NULL)) return -EINVAL; - sdesc.shash.tfm = server->ntlmssp.md5; - sdesc.shash.flags = 0x0; - - rc = crypto_shash_init(&sdesc.shash); - if (rc) { - cERROR(1, "could not initialize master crypto API hmacmd5\n"); - return rc; - } - - if (server->secType == RawNTLMSSP) - crypto_shash_update(&sdesc.shash, - server->session_key.data.ntlmv2.key, - CIFS_NTLMV2_SESSKEY_SIZE); - else - crypto_shash_update(&sdesc.shash, - (char *)&server->session_key.data, - server->session_key.len); - - crypto_shash_update(&sdesc.shash, - cifs_pdu->Protocol, cifs_pdu->smb_buf_length); - - rc = crypto_shash_final(&sdesc.shash, signature); + cifs_MD5_init(&context); + cifs_MD5_update(&context, (char *)&key->data, key->len); + cifs_MD5_update(&context, cifs_pdu->Protocol, cifs_pdu->smb_buf_length); + cifs_MD5_final(signature, &context); return 0; } - int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, __u32 *pexpected_response_sequence_number) { @@ -102,7 +78,8 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, server->sequence_number++; spin_unlock(&GlobalMid_Lock); - rc = cifs_calculate_signature(cifs_pdu, server, smb_signature); + rc = cifs_calculate_signature(cifs_pdu, &server->mac_signing_key, + smb_signature); if (rc) memset(cifs_pdu->Signature.SecuritySignature, 0, 8); else @@ -112,36 +89,16 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, } static int cifs_calc_signature2(const struct kvec *iov, int n_vec, - struct TCP_Server_Info *server, char *signature) + const struct mac_key *key, char *signature) { + struct MD5Context context; int i; - int rc = 0; - struct { - struct shash_desc shash; - char ctx[crypto_shash_descsize(server->ntlmssp.md5)]; - } sdesc; - if (iov == NULL || server == NULL || signature == NULL) + if ((iov == NULL) || (signature == NULL) || (key == NULL)) return -EINVAL; - sdesc.shash.tfm = server->ntlmssp.md5; - sdesc.shash.flags = 0x0; - - rc = crypto_shash_init(&sdesc.shash); - if (rc) { - cERROR(1, "could not initialize master crypto API hmacmd5\n"); - return rc; - } - - if (server->secType == RawNTLMSSP) - crypto_shash_update(&sdesc.shash, - server->session_key.data.ntlmv2.key, - CIFS_NTLMV2_SESSKEY_SIZE); - else - crypto_shash_update(&sdesc.shash, - (char *)&server->session_key.data, - server->session_key.len); - + cifs_MD5_init(&context); + cifs_MD5_update(&context, (char *)&key->data, key->len); for (i = 0; i < n_vec; i++) { if (iov[i].iov_len == 0) continue; @@ -154,18 +111,18 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec, if (i == 0) { if (iov[0].iov_len <= 8) /* cmd field at offset 9 */ break; /* nothing to sign or corrupt header */ - crypto_shash_update(&sdesc.shash, - iov[i].iov_base + 4, iov[i].iov_len - 4); + cifs_MD5_update(&context, iov[0].iov_base+4, + iov[0].iov_len-4); } else - crypto_shash_update(&sdesc.shash, - iov[i].iov_base, iov[i].iov_len); + cifs_MD5_update(&context, iov[i].iov_base, iov[i].iov_len); } - rc = crypto_shash_final(&sdesc.shash, signature); + cifs_MD5_final(signature, &context); return 0; } + int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, __u32 *pexpected_response_sequence_number) { @@ -188,7 +145,8 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, server->sequence_number++; spin_unlock(&GlobalMid_Lock); - rc = cifs_calc_signature2(iov, n_vec, server, smb_signature); + rc = cifs_calc_signature2(iov, n_vec, &server->mac_signing_key, + smb_signature); if (rc) memset(cifs_pdu->Signature.SecuritySignature, 0, 8); else @@ -198,14 +156,14 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, } int cifs_verify_signature(struct smb_hdr *cifs_pdu, - struct TCP_Server_Info *server, + const struct mac_key *mac_key, __u32 expected_sequence_number) { - int rc; + unsigned int rc; char server_response_sig[8]; char what_we_think_sig_should_be[20]; - if (cifs_pdu == NULL || server == NULL) + if ((cifs_pdu == NULL) || (mac_key == NULL)) return -EINVAL; if (cifs_pdu->Command == SMB_COM_NEGOTIATE) @@ -234,7 +192,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu, cpu_to_le32(expected_sequence_number); cifs_pdu->Signature.Sequence.Reserved = 0; - rc = cifs_calculate_signature(cifs_pdu, server, + rc = cifs_calculate_signature(cifs_pdu, mac_key, what_we_think_sig_should_be); if (rc) @@ -251,7 +209,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu, } /* We fill in key by putting in 40 byte array which was allocated by caller */ -int cifs_calculate_session_key(struct session_key *key, const char *rn, +int cifs_calculate_mac_key(struct mac_key *key, const char *rn, const char *password) { char temp_key[16]; @@ -265,6 +223,63 @@ int cifs_calculate_session_key(struct session_key *key, const char *rn, return 0; } +int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *ses, + const struct nls_table *nls_info) +{ + char temp_hash[16]; + struct HMACMD5Context ctx; + char *ucase_buf; + __le16 *unicode_buf; + unsigned int i, user_name_len, dom_name_len; + + if (ses == NULL) + return -EINVAL; + + E_md4hash(ses->password, temp_hash); + + hmac_md5_init_limK_to_64(temp_hash, 16, &ctx); + user_name_len = strlen(ses->userName); + if (user_name_len > MAX_USERNAME_SIZE) + return -EINVAL; + if (ses->domainName == NULL) + return -EINVAL; /* BB should we use CIFS_LINUX_DOM */ + dom_name_len = strlen(ses->domainName); + if (dom_name_len > MAX_USERNAME_SIZE) + return -EINVAL; + + ucase_buf = kmalloc((MAX_USERNAME_SIZE+1), GFP_KERNEL); + if (ucase_buf == NULL) + return -ENOMEM; + unicode_buf = kmalloc((MAX_USERNAME_SIZE+1)*4, GFP_KERNEL); + if (unicode_buf == NULL) { + kfree(ucase_buf); + return -ENOMEM; + } + + for (i = 0; i < user_name_len; i++) + ucase_buf[i] = nls_info->charset2upper[(int)ses->userName[i]]; + ucase_buf[i] = 0; + user_name_len = cifs_strtoUCS(unicode_buf, ucase_buf, + MAX_USERNAME_SIZE*2, nls_info); + unicode_buf[user_name_len] = 0; + user_name_len++; + + for (i = 0; i < dom_name_len; i++) + ucase_buf[i] = nls_info->charset2upper[(int)ses->domainName[i]]; + ucase_buf[i] = 0; + dom_name_len = cifs_strtoUCS(unicode_buf+user_name_len, ucase_buf, + MAX_USERNAME_SIZE*2, nls_info); + + unicode_buf[user_name_len + dom_name_len] = 0; + hmac_md5_update((const unsigned char *) unicode_buf, + (user_name_len+dom_name_len)*2, &ctx); + + hmac_md5_final(ses->server->ntlmv2_hash, &ctx); + kfree(ucase_buf); + kfree(unicode_buf); + return 0; +} + #ifdef CONFIG_CIFS_WEAK_PW_HASH void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, char *lnm_session_key) @@ -309,29 +324,21 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses, { int rc = 0; int len; - char nt_hash[CIFS_NTHASH_SIZE]; + char nt_hash[16]; + struct HMACMD5Context *pctxt; wchar_t *user; wchar_t *domain; - wchar_t *server; - struct { - struct shash_desc shash; - char ctx[crypto_shash_descsize(ses->server->ntlmssp.hmacmd5)]; - } sdesc; - /* calculate md4 hash of password */ - E_md4hash(ses->password, nt_hash); + pctxt = kmalloc(sizeof(struct HMACMD5Context), GFP_KERNEL); - sdesc.shash.tfm = ses->server->ntlmssp.hmacmd5; - sdesc.shash.flags = 0x0; + if (pctxt == NULL) + return -ENOMEM; - crypto_shash_setkey(ses->server->ntlmssp.hmacmd5, nt_hash, - CIFS_NTHASH_SIZE); + /* calculate md4 hash of password */ + E_md4hash(ses->password, nt_hash); - rc = crypto_shash_init(&sdesc.shash); - if (rc) { - cERROR(1, "could not initialize master crypto API hmacmd5\n"); - return rc; - } + /* convert Domainname to unicode and uppercase */ + hmac_md5_init_limK_to_64(nt_hash, 16, pctxt); /* convert ses->userName to unicode and uppercase */ len = strlen(ses->userName); @@ -340,8 +347,7 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses, goto calc_exit_2; len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp); UniStrupr(user); - - crypto_shash_update(&sdesc.shash, (char *)user, 2 * len); + hmac_md5_update((char *)user, 2*len, pctxt); /* convert ses->domainName to unicode and uppercase */ if (ses->domainName) { @@ -357,243 +363,65 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses, Maybe converting the domain name earlier makes sense */ /* UniStrupr(domain); */ - crypto_shash_update(&sdesc.shash, (char *)domain, 2 * len); + hmac_md5_update((char *)domain, 2*len, pctxt); kfree(domain); - } else if (ses->serverName) { - len = strlen(ses->serverName); - - server = kmalloc(2 + (len * 2), GFP_KERNEL); - if (server == NULL) - goto calc_exit_1; - len = cifs_strtoUCS((__le16 *)server, ses->serverName, len, - nls_cp); - /* the following line was removed since it didn't work well - with lower cased domain name that passed as an option. - Maybe converting the domain name earlier makes sense */ - /* UniStrupr(domain); */ - - crypto_shash_update(&sdesc.shash, (char *)server, 2 * len); - - kfree(server); } calc_exit_1: kfree(user); calc_exit_2: /* BB FIXME what about bytes 24 through 40 of the signing key? compare with the NTLM example */ - rc = crypto_shash_final(&sdesc.shash, ses->server->ntlmv2_hash); - - return rc; -} - -static int -find_domain_name(struct cifsSesInfo *ses) -{ - int rc = 0; - unsigned int attrsize; - unsigned int type; - unsigned char *blobptr; - struct ntlmssp2_name *attrptr; - - if (ses->server->tiblob) { - blobptr = ses->server->tiblob; - attrptr = (struct ntlmssp2_name *) blobptr; - - while ((type = attrptr->type) != 0) { - blobptr += 2; /* advance attr type */ - attrsize = attrptr->length; - blobptr += 2; /* advance attr size */ - if (type == NTLMSSP_AV_NB_DOMAIN_NAME) { - if (!ses->domainName) { - ses->domainName = - kmalloc(attrptr->length + 1, - GFP_KERNEL); - if (!ses->domainName) - return -ENOMEM; - cifs_from_ucs2(ses->domainName, - (__le16 *)blobptr, - attrptr->length, - attrptr->length, - load_nls_default(), false); - } - } - blobptr += attrsize; /* advance attr value */ - attrptr = (struct ntlmssp2_name *) blobptr; - } - } else { - ses->server->tilen = 2 * sizeof(struct ntlmssp2_name); - ses->server->tiblob = kmalloc(ses->server->tilen, GFP_KERNEL); - if (!ses->server->tiblob) { - ses->server->tilen = 0; - cERROR(1, "Challenge target info allocation failure"); - return -ENOMEM; - } - memset(ses->server->tiblob, 0x0, ses->server->tilen); - attrptr = (struct ntlmssp2_name *) ses->server->tiblob; - attrptr->type = cpu_to_le16(NTLMSSP_DOMAIN_TYPE); - } + hmac_md5_final(ses->server->ntlmv2_hash, pctxt); + kfree(pctxt); return rc; } -static int -CalcNTLMv2_response(const struct TCP_Server_Info *server, - char *v2_session_response) -{ - int rc; - struct { - struct shash_desc shash; - char ctx[crypto_shash_descsize(server->ntlmssp.hmacmd5)]; - } sdesc; - - sdesc.shash.tfm = server->ntlmssp.hmacmd5; - sdesc.shash.flags = 0x0; - - crypto_shash_setkey(server->ntlmssp.hmacmd5, server->ntlmv2_hash, - CIFS_HMAC_MD5_HASH_SIZE); - - rc = crypto_shash_init(&sdesc.shash); - if (rc) { - cERROR(1, "could not initialize master crypto API hmacmd5\n"); - return rc; - } - - memcpy(v2_session_response + CIFS_SERVER_CHALLENGE_SIZE, - server->cryptKey, CIFS_SERVER_CHALLENGE_SIZE); - crypto_shash_update(&sdesc.shash, - v2_session_response + CIFS_SERVER_CHALLENGE_SIZE, - sizeof(struct ntlmv2_resp) - CIFS_SERVER_CHALLENGE_SIZE); - - if (server->tilen) - crypto_shash_update(&sdesc.shash, - server->tiblob, server->tilen); - - rc = crypto_shash_final(&sdesc.shash, v2_session_response); - - return rc; -} - -int -setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf, +void setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf, const struct nls_table *nls_cp) { - int rc = 0; + int rc; struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf; - struct { - struct shash_desc shash; - char ctx[crypto_shash_descsize(ses->server->ntlmssp.hmacmd5)]; - } sdesc; + struct HMACMD5Context context; buf->blob_signature = cpu_to_le32(0x00000101); buf->reserved = 0; buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); get_random_bytes(&buf->client_chal, sizeof(buf->client_chal)); buf->reserved2 = 0; - - if (!ses->domainName) { - rc = find_domain_name(ses); - if (rc) { - cERROR(1, "could not get domain/server name rc %d", rc); - return rc; - } - } + buf->names[0].type = cpu_to_le16(NTLMSSP_DOMAIN_TYPE); + buf->names[0].length = 0; + buf->names[1].type = 0; + buf->names[1].length = 0; /* calculate buf->ntlmv2_hash */ rc = calc_ntlmv2_hash(ses, nls_cp); - if (rc) { - cERROR(1, "could not get v2 hash rc %d", rc); - return rc; - } - rc = CalcNTLMv2_response(ses->server, resp_buf); - if (rc) { + if (rc) cERROR(1, "could not get v2 hash rc %d", rc); - return rc; - } - - crypto_shash_setkey(ses->server->ntlmssp.hmacmd5, - ses->server->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); - - sdesc.shash.tfm = ses->server->ntlmssp.hmacmd5; - sdesc.shash.flags = 0x0; - - rc = crypto_shash_init(&sdesc.shash); - if (rc) { - cERROR(1, "could not initialize master crypto API hmacmd5\n"); - return rc; - } - - crypto_shash_update(&sdesc.shash, resp_buf, CIFS_HMAC_MD5_HASH_SIZE); + CalcNTLMv2_response(ses, resp_buf); - rc = crypto_shash_final(&sdesc.shash, - ses->server->session_key.data.ntlmv2.key); + /* now calculate the MAC key for NTLMv2 */ + hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context); + hmac_md5_update(resp_buf, 16, &context); + hmac_md5_final(ses->server->mac_signing_key.data.ntlmv2.key, &context); - memcpy(&ses->server->session_key.data.ntlmv2.resp, resp_buf, - sizeof(struct ntlmv2_resp)); - ses->server->session_key.len = 16 + sizeof(struct ntlmv2_resp); - - return rc; + memcpy(&ses->server->mac_signing_key.data.ntlmv2.resp, resp_buf, + sizeof(struct ntlmv2_resp)); + ses->server->mac_signing_key.len = 16 + sizeof(struct ntlmv2_resp); } -int -calc_seckey(struct TCP_Server_Info *server) -{ - int rc; - unsigned char sec_key[CIFS_NTLMV2_SESSKEY_SIZE]; - struct crypto_blkcipher *tfm_arc4; - struct scatterlist sgin, sgout; - struct blkcipher_desc desc; - - get_random_bytes(sec_key, CIFS_NTLMV2_SESSKEY_SIZE); - - tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", - 0, CRYPTO_ALG_ASYNC); - if (!tfm_arc4 || IS_ERR(tfm_arc4)) { - cERROR(1, "could not allocate " "master crypto API arc4\n"); - return 1; - } - - crypto_blkcipher_setkey(tfm_arc4, - server->session_key.data.ntlmv2.key, CIFS_CPHTXT_SIZE); - sg_init_one(&sgin, sec_key, CIFS_CPHTXT_SIZE); - sg_init_one(&sgout, server->ntlmssp.ciphertext, CIFS_CPHTXT_SIZE); - rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE); - - if (!rc) - memcpy(server->session_key.data.ntlmv2.key, - sec_key, CIFS_NTLMV2_SESSKEY_SIZE); - - crypto_free_blkcipher(tfm_arc4); - - return 0; -} - -void -cifs_crypto_shash_release(struct TCP_Server_Info *server) -{ - if (server->ntlmssp.md5) - crypto_free_shash(server->ntlmssp.md5); - - if (server->ntlmssp.hmacmd5) - crypto_free_shash(server->ntlmssp.hmacmd5); -} - -int -cifs_crypto_shash_allocate(struct TCP_Server_Info *server) +void CalcNTLMv2_response(const struct cifsSesInfo *ses, + char *v2_session_response) { - server->ntlmssp.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0); - if (!server->ntlmssp.hmacmd5 || - IS_ERR(server->ntlmssp.hmacmd5)) { - cERROR(1, "could not allocate master crypto API hmacmd5\n"); - return 1; - } + struct HMACMD5Context context; + /* rest of v2 struct already generated */ + memcpy(v2_session_response + 8, ses->server->cryptKey, 8); + hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context); - server->ntlmssp.md5 = crypto_alloc_shash("md5", 0, 0); - if (!server->ntlmssp.md5 || IS_ERR(server->ntlmssp.md5)) { - crypto_free_shash(server->ntlmssp.hmacmd5); - cERROR(1, "could not allocate master crypto API md5\n"); - return 1; - } + hmac_md5_update(v2_session_response+8, + sizeof(struct ntlmv2_resp) - 8, &context); - return 0; + hmac_md5_final(v2_session_response, &context); +/* cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */ } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 49563e0c1725..0cdfb8c32ac6 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -25,9 +25,6 @@ #include #include "cifs_fs_sb.h" #include "cifsacl.h" -#include -#include - /* * The sizes of various internal tables and strings */ @@ -100,7 +97,7 @@ enum protocolEnum { /* Netbios frames protocol not supported at this time */ }; -struct session_key { +struct mac_key { unsigned int len; union { char ntlm[CIFS_SESS_KEY_SIZE + 16]; @@ -123,14 +120,6 @@ struct cifs_cred { struct cifs_ace *aces; }; -struct ntlmssp_auth { - __u32 client_flags; - __u32 server_flags; - unsigned char ciphertext[CIFS_CPHTXT_SIZE]; - struct crypto_shash *hmacmd5; - struct crypto_shash *md5; -}; - /* ***************************************************************** * Except the CIFS PDUs themselves all the @@ -193,14 +182,11 @@ struct TCP_Server_Info { /* 16th byte of RFC1001 workstation name is always null */ char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; __u32 sequence_number; /* needed for CIFS PDU signature */ - struct session_key session_key; + struct mac_key mac_signing_key; char ntlmv2_hash[16]; unsigned long lstrp; /* when we got last response from this server */ u16 dialect; /* dialect index that server chose */ /* extended security flavors that server supports */ - unsigned int tilen; /* length of the target info blob */ - unsigned char *tiblob; /* target info blob in challenge response */ - struct ntlmssp_auth ntlmssp; /* various keys, ciphers, flags */ bool sec_kerberos; /* supports plain Kerberos */ bool sec_mskerberos; /* supports legacy MS Kerberos */ bool sec_kerberosu2u; /* supports U2U Kerberos */ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 320e0fd0ba7b..14d036d8db11 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -134,12 +134,6 @@ * Size of the session key (crypto key encrypted with the password */ #define CIFS_SESS_KEY_SIZE (24) -#define CIFS_CLIENT_CHALLENGE_SIZE (8) -#define CIFS_SERVER_CHALLENGE_SIZE (8) -#define CIFS_HMAC_MD5_HASH_SIZE (16) -#define CIFS_CPHTXT_SIZE (16) -#define CIFS_NTLMV2_SESSKEY_SIZE (16) -#define CIFS_NTHASH_SIZE (16) /* * Maximum user name length @@ -669,6 +663,7 @@ struct ntlmv2_resp { __le64 time; __u64 client_chal; /* random */ __u32 reserved2; + struct ntlmssp2_name names[2]; /* array of name entries could follow ending in minimum 4 byte struct */ } __attribute__((packed)); diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 1378d9133844..1f5450814087 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -361,15 +361,15 @@ extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *); extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *, __u32 *); extern int cifs_verify_signature(struct smb_hdr *, - struct TCP_Server_Info *server, + const struct mac_key *mac_key, __u32 expected_sequence_number); -extern int cifs_calculate_session_key(struct session_key *key, const char *rn, +extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn, const char *pass); -extern int setup_ntlmv2_rsp(struct cifsSesInfo *, char *, +extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *, + const struct nls_table *); +extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *); +extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *, const struct nls_table *); -extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *); -extern void cifs_crypto_shash_release(struct TCP_Server_Info *); -extern int calc_seckey(struct TCP_Server_Info *); #ifdef CONFIG_CIFS_WEAK_PW_HASH extern void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, char *lnm_session_key); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 4bda920d1f75..c65c3419dd37 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -604,14 +604,11 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) else rc = -EINVAL; - if (server->secType == Kerberos) { - if (!server->sec_kerberos && - !server->sec_mskerberos) - rc = -EOPNOTSUPP; - } else if (server->secType == RawNTLMSSP) { - if (!server->sec_ntlmssp) - rc = -EOPNOTSUPP; - } else + if (server->sec_kerberos || server->sec_mskerberos) + server->secType = Kerberos; + else if (server->sec_ntlmssp) + server->secType = RawNTLMSSP; + else rc = -EOPNOTSUPP; } } else diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index ec0ea4a43bdb..0ea52e9f9065 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1708,7 +1708,6 @@ cifs_put_smb_ses(struct cifsSesInfo *ses) CIFSSMBLogoff(xid, ses); _FreeXid(xid); } - cifs_crypto_shash_release(server); sesInfoFree(ses); cifs_put_tcp_session(server); } @@ -1788,23 +1787,13 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) ses->linux_uid = volume_info->linux_uid; ses->overrideSecFlg = volume_info->secFlg; - rc = cifs_crypto_shash_allocate(server); - if (rc) { - cERROR(1, "could not setup hash structures rc %d", rc); - goto get_ses_fail; - } - server->tilen = 0; - server->tiblob = NULL; - mutex_lock(&ses->session_mutex); rc = cifs_negotiate_protocol(xid, ses); if (!rc) rc = cifs_setup_session(xid, ses, volume_info->local_nls); mutex_unlock(&ses->session_mutex); - if (rc) { - cifs_crypto_shash_release(ses->server); + if (rc) goto get_ses_fail; - } /* success, put it on the list */ write_lock(&cifs_tcp_ses_lock); diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h index 1db0f0746a5b..49c9a4e75319 100644 --- a/fs/cifs/ntlmssp.h +++ b/fs/cifs/ntlmssp.h @@ -61,19 +61,6 @@ #define NTLMSSP_NEGOTIATE_KEY_XCH 0x40000000 #define NTLMSSP_NEGOTIATE_56 0x80000000 -/* Define AV Pair Field IDs */ -#define NTLMSSP_AV_EOL 0 -#define NTLMSSP_AV_NB_COMPUTER_NAME 1 -#define NTLMSSP_AV_NB_DOMAIN_NAME 2 -#define NTLMSSP_AV_DNS_COMPUTER_NAME 3 -#define NTLMSSP_AV_DNS_DOMAIN_NAME 4 -#define NTLMSSP_AV_DNS_TREE_NAME 5 -#define NTLMSSP_AV_FLAGS 6 -#define NTLMSSP_AV_TIMESTAMP 7 -#define NTLMSSP_AV_RESTRICTION 8 -#define NTLMSSP_AV_TARGET_NAME 9 -#define NTLMSSP_AV_CHANNEL_BINDINGS 10 - /* Although typedefs are not commonly used for structure definitions */ /* in the Linux kernel, in this particular case they are useful */ /* to more closely match the standards document for NTLMSSP from */ diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 41fc5328120d..0a57cb7db5dd 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -383,9 +383,6 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft, static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifsSesInfo *ses) { - unsigned int tioffset; /* challeng message target info area */ - unsigned int tilen; /* challeng message target info area length */ - CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr; if (blob_len < sizeof(CHALLENGE_MESSAGE)) { @@ -408,18 +405,6 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, /* BB spec says that if AvId field of MsvAvTimestamp is populated then we must set the MIC field of the AUTHENTICATE_MESSAGE */ - tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset); - tilen = cpu_to_le16(pblob->TargetInfoArray.Length); - ses->server->tilen = tilen; - if (tilen) { - ses->server->tiblob = kmalloc(tilen, GFP_KERNEL); - if (!ses->server->tiblob) { - cERROR(1, "Challenge target info allocation failure"); - return -ENOMEM; - } - memcpy(ses->server->tiblob, bcc_ptr + tioffset, tilen); - } - return 0; } @@ -466,12 +451,10 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, struct cifsSesInfo *ses, const struct nls_table *nls_cp, bool first) { - int rc; - unsigned int size; AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer; __u32 flags; unsigned char *tmp; - struct ntlmv2_resp ntlmv2_response = {}; + char ntlm_session_key[CIFS_SESS_KEY_SIZE]; memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); sec_blob->MessageType = NtLmAuthenticate; @@ -494,25 +477,19 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, sec_blob->LmChallengeResponse.Length = 0; sec_blob->LmChallengeResponse.MaximumLength = 0; - sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer); - rc = setup_ntlmv2_rsp(ses, (char *)&ntlmv2_response, nls_cp); - if (rc) { - cERROR(1, "error rc: %d during ntlmssp ntlmv2 setup", rc); - goto setup_ntlmv2_ret; - } - size = sizeof(struct ntlmv2_resp); - memcpy(tmp, (char *)&ntlmv2_response, size); - tmp += size; - if (ses->server->tilen > 0) { - memcpy(tmp, ses->server->tiblob, ses->server->tilen); - tmp += ses->server->tilen; - } else - ses->server->tilen = 0; + /* calculate session key, BB what about adding similar ntlmv2 path? */ + SMBNTencrypt(ses->password, ses->server->cryptKey, ntlm_session_key); + if (first) + cifs_calculate_mac_key(&ses->server->mac_signing_key, + ntlm_session_key, ses->password); - sec_blob->NtChallengeResponse.Length = cpu_to_le16(size + - ses->server->tilen); + memcpy(tmp, ntlm_session_key, CIFS_SESS_KEY_SIZE); + sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->NtChallengeResponse.Length = cpu_to_le16(CIFS_SESS_KEY_SIZE); sec_blob->NtChallengeResponse.MaximumLength = - cpu_to_le16(size + ses->server->tilen); + cpu_to_le16(CIFS_SESS_KEY_SIZE); + + tmp += CIFS_SESS_KEY_SIZE; if (ses->domainName == NULL) { sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); @@ -524,6 +501,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, len = cifs_strtoUCS((__le16 *)tmp, ses->domainName, MAX_USERNAME_SIZE, nls_cp); len *= 2; /* unicode is 2 bytes each */ + len += 2; /* trailing null */ sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); sec_blob->DomainName.Length = cpu_to_le16(len); sec_blob->DomainName.MaximumLength = cpu_to_le16(len); @@ -540,6 +518,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, len = cifs_strtoUCS((__le16 *)tmp, ses->userName, MAX_USERNAME_SIZE, nls_cp); len *= 2; /* unicode is 2 bytes each */ + len += 2; /* trailing null */ sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); sec_blob->UserName.Length = cpu_to_le16(len); sec_blob->UserName.MaximumLength = cpu_to_le16(len); @@ -551,26 +530,9 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, sec_blob->WorkstationName.MaximumLength = 0; tmp += 2; - if ((ses->server->ntlmssp.server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) && - !calc_seckey(ses->server)) { - memcpy(tmp, ses->server->ntlmssp.ciphertext, CIFS_CPHTXT_SIZE); - sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); - sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE); - sec_blob->SessionKey.MaximumLength = - cpu_to_le16(CIFS_CPHTXT_SIZE); - tmp += CIFS_CPHTXT_SIZE; - } else { - sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); - sec_blob->SessionKey.Length = 0; - sec_blob->SessionKey.MaximumLength = 0; - } - - ses->server->sequence_number = 0; - -setup_ntlmv2_ret: - if (ses->server->tilen > 0) - kfree(ses->server->tiblob); - + sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->SessionKey.Length = 0; + sec_blob->SessionKey.MaximumLength = 0; return tmp - pbuffer; } @@ -584,14 +546,15 @@ static void setup_ntlmssp_neg_req(SESSION_SETUP_ANDX *pSMB, return; } -static int setup_ntlmssp_auth_req(char *ntlmsspblob, +static int setup_ntlmssp_auth_req(SESSION_SETUP_ANDX *pSMB, struct cifsSesInfo *ses, const struct nls_table *nls, bool first_time) { int bloblen; - bloblen = build_ntlmssp_auth_blob(ntlmsspblob, ses, nls, + bloblen = build_ntlmssp_auth_blob(&pSMB->req.SecurityBlob[0], ses, nls, first_time); + pSMB->req.SecurityBlobLength = cpu_to_le16(bloblen); return bloblen; } @@ -617,7 +580,6 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, struct key *spnego_key = NULL; __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ bool first_time; - char *ntlmsspblob; if (ses == NULL) return -EINVAL; @@ -728,7 +690,7 @@ ssetup_ntlmssp_authenticate: if (first_time) /* should this be moved into common code with similar ntlmv2 path? */ - cifs_calculate_session_key(&ses->server->session_key, + cifs_calculate_mac_key(&ses->server->mac_signing_key, ntlm_session_key, ses->password); /* copy session key */ @@ -767,21 +729,12 @@ ssetup_ntlmssp_authenticate: cpu_to_le16(sizeof(struct ntlmv2_resp)); /* calculate session key */ - rc = setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp); - if (rc) { - kfree(v2_sess_key); - goto ssetup_exit; - } + setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp); /* FIXME: calculate MAC key */ memcpy(bcc_ptr, (char *)v2_sess_key, sizeof(struct ntlmv2_resp)); bcc_ptr += sizeof(struct ntlmv2_resp); kfree(v2_sess_key); - if (ses->server->tilen > 0) { - memcpy(bcc_ptr, ses->server->tiblob, - ses->server->tilen); - bcc_ptr += ses->server->tilen; - } if (ses->capabilities & CAP_UNICODE) { if (iov[0].iov_len % 2) { *bcc_ptr = 0; @@ -812,15 +765,15 @@ ssetup_ntlmssp_authenticate: } /* bail out if key is too long */ if (msg->sesskey_len > - sizeof(ses->server->session_key.data.krb5)) { + sizeof(ses->server->mac_signing_key.data.krb5)) { cERROR(1, "Kerberos signing key too long (%u bytes)", msg->sesskey_len); rc = -EOVERFLOW; goto ssetup_exit; } if (first_time) { - ses->server->session_key.len = msg->sesskey_len; - memcpy(ses->server->session_key.data.krb5, + ses->server->mac_signing_key.len = msg->sesskey_len; + memcpy(ses->server->mac_signing_key.data.krb5, msg->data, msg->sesskey_len); } pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; @@ -862,26 +815,12 @@ ssetup_ntlmssp_authenticate: if (phase == NtLmNegotiate) { setup_ntlmssp_neg_req(pSMB, ses); iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE); - iov[1].iov_base = &pSMB->req.SecurityBlob[0]; } else if (phase == NtLmAuthenticate) { int blob_len; - ntlmsspblob = kmalloc(5 * - sizeof(struct _AUTHENTICATE_MESSAGE), - GFP_KERNEL); - if (!ntlmsspblob) { - cERROR(1, "Can't allocate NTLMSSP"); - rc = -ENOMEM; - goto ssetup_exit; - } - - blob_len = setup_ntlmssp_auth_req(ntlmsspblob, - ses, - nls_cp, - first_time); + blob_len = setup_ntlmssp_auth_req(pSMB, ses, + nls_cp, + first_time); iov[1].iov_len = blob_len; - iov[1].iov_base = ntlmsspblob; - pSMB->req.SecurityBlobLength = - cpu_to_le16(blob_len); /* Make sure that we tell the server that we are using the uid that it just gave us back on the response (challenge) */ @@ -891,6 +830,7 @@ ssetup_ntlmssp_authenticate: rc = -ENOSYS; goto ssetup_exit; } + iov[1].iov_base = &pSMB->req.SecurityBlob[0]; /* unicode strings must be word aligned */ if ((iov[0].iov_len + iov[1].iov_len) % 2) { *bcc_ptr = 0; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index e0588cdf4cc5..82f78c4d6978 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -543,7 +543,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))) { rc = cifs_verify_signature(midQ->resp_buf, - ses->server, + &ses->server->mac_signing_key, midQ->sequence_number+1); if (rc) { cERROR(1, "Unexpected SMB signature"); @@ -731,7 +731,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))) { rc = cifs_verify_signature(out_buf, - ses->server, + &ses->server->mac_signing_key, midQ->sequence_number+1); if (rc) { cERROR(1, "Unexpected SMB signature"); @@ -981,7 +981,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))) { rc = cifs_verify_signature(out_buf, - ses->server, + &ses->server->mac_signing_key, midQ->sequence_number+1); if (rc) { cERROR(1, "Unexpected SMB signature"); -- cgit v1.2.2 From 639e7a913d81f918bfbf506e6ecd54664f787cbd Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 3 Sep 2010 11:50:09 -0400 Subject: cifs: eliminate redundant xdev check in cifs_rename The VFS always checks that the source and target of a rename are on the same vfsmount, and hence have the same superblock. So, this check is redundant. Remove it and simplify the error handling. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/inode.c | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 86a164f08a74..93f77d438d3c 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1462,28 +1462,17 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, { char *fromName = NULL; char *toName = NULL; - struct cifs_sb_info *cifs_sb_source; - struct cifs_sb_info *cifs_sb_target; + struct cifs_sb_info *cifs_sb; struct cifsTconInfo *tcon; FILE_UNIX_BASIC_INFO *info_buf_source = NULL; FILE_UNIX_BASIC_INFO *info_buf_target; int xid, rc, tmprc; - cifs_sb_target = CIFS_SB(target_dir->i_sb); - cifs_sb_source = CIFS_SB(source_dir->i_sb); - tcon = cifs_sb_source->tcon; + cifs_sb = CIFS_SB(source_dir->i_sb); + tcon = cifs_sb->tcon; xid = GetXid(); - /* - * BB: this might be allowed if same server, but different share. - * Consider adding support for this - */ - if (tcon != cifs_sb_target->tcon) { - rc = -EXDEV; - goto cifs_rename_exit; - } - /* * we already have the rename sem so we do not need to * grab it again here to protect the path integrity @@ -1519,17 +1508,16 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, info_buf_target = info_buf_source + 1; tmprc = CIFSSMBUnixQPathInfo(xid, tcon, fromName, info_buf_source, - cifs_sb_source->local_nls, - cifs_sb_source->mnt_cifs_flags & + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (tmprc != 0) goto unlink_target; - tmprc = CIFSSMBUnixQPathInfo(xid, tcon, - toName, info_buf_target, - cifs_sb_target->local_nls, - /* remap based on source sb */ - cifs_sb_source->mnt_cifs_flags & + tmprc = CIFSSMBUnixQPathInfo(xid, tcon, toName, + info_buf_target, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (tmprc == 0 && (info_buf_source->UniqueId == -- cgit v1.2.2 From 4266d9118f85b050a341992f0cfab40d392ef32c Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 8 Sep 2010 21:17:29 +0000 Subject: [CIFS] ntlmv2/ntlmssp remove-unused-function CalcNTLMv2_partial_mac_key This function is not used, so remove the definition and declaration. Reviewed-by: Jeff Layton Signed-off-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/cifsencrypt.c | 57 --------------------------------------------------- fs/cifs/cifsproto.h | 2 -- 2 files changed, 59 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 847628dfdc44..35042d8f7338 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -223,63 +223,6 @@ int cifs_calculate_mac_key(struct mac_key *key, const char *rn, return 0; } -int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *ses, - const struct nls_table *nls_info) -{ - char temp_hash[16]; - struct HMACMD5Context ctx; - char *ucase_buf; - __le16 *unicode_buf; - unsigned int i, user_name_len, dom_name_len; - - if (ses == NULL) - return -EINVAL; - - E_md4hash(ses->password, temp_hash); - - hmac_md5_init_limK_to_64(temp_hash, 16, &ctx); - user_name_len = strlen(ses->userName); - if (user_name_len > MAX_USERNAME_SIZE) - return -EINVAL; - if (ses->domainName == NULL) - return -EINVAL; /* BB should we use CIFS_LINUX_DOM */ - dom_name_len = strlen(ses->domainName); - if (dom_name_len > MAX_USERNAME_SIZE) - return -EINVAL; - - ucase_buf = kmalloc((MAX_USERNAME_SIZE+1), GFP_KERNEL); - if (ucase_buf == NULL) - return -ENOMEM; - unicode_buf = kmalloc((MAX_USERNAME_SIZE+1)*4, GFP_KERNEL); - if (unicode_buf == NULL) { - kfree(ucase_buf); - return -ENOMEM; - } - - for (i = 0; i < user_name_len; i++) - ucase_buf[i] = nls_info->charset2upper[(int)ses->userName[i]]; - ucase_buf[i] = 0; - user_name_len = cifs_strtoUCS(unicode_buf, ucase_buf, - MAX_USERNAME_SIZE*2, nls_info); - unicode_buf[user_name_len] = 0; - user_name_len++; - - for (i = 0; i < dom_name_len; i++) - ucase_buf[i] = nls_info->charset2upper[(int)ses->domainName[i]]; - ucase_buf[i] = 0; - dom_name_len = cifs_strtoUCS(unicode_buf+user_name_len, ucase_buf, - MAX_USERNAME_SIZE*2, nls_info); - - unicode_buf[user_name_len + dom_name_len] = 0; - hmac_md5_update((const unsigned char *) unicode_buf, - (user_name_len+dom_name_len)*2, &ctx); - - hmac_md5_final(ses->server->ntlmv2_hash, &ctx); - kfree(ucase_buf); - kfree(unicode_buf); - return 0; -} - #ifdef CONFIG_CIFS_WEAK_PW_HASH void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, char *lnm_session_key) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 1f5450814087..f399b16cb7d5 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -365,8 +365,6 @@ extern int cifs_verify_signature(struct smb_hdr *, __u32 expected_sequence_number); extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn, const char *pass); -extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *, - const struct nls_table *); extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *); extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *, const struct nls_table *); -- cgit v1.2.2 From 522bbe65a2415fabce618186fc7777eb4c502989 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 3 Sep 2010 12:00:49 -0400 Subject: cifs: prevent cifsd from exiting prematurely When cifs_demultiplex_thread exits, it does a number of cleanup tasks including freeing the TCP_Server_Info struct. Much of the existing code in cifs assumes that when there is a cisfSesInfo struct, that it holds a reference to a valid TCP_Server_Info struct. We can never allow cifsd to exit when a cifsSesInfo struct is still holding a reference to the server. The server pointers will then point to freed memory. This patch eliminates a couple of questionable conditions where it does this. The idea here is to make an -EINTR return from kernel_recvmsg behave the same way as -ERESTARTSYS or -EAGAIN. If the task was signalled from cifs_put_tcp_session, then tcpStatus will be CifsExiting, and the kernel_recvmsg call will return quickly. There's also another condition where this can occur too -- if the tcpStatus is still in CifsNew, then it will also exit if the server closes the socket prematurely. I think we'll probably also need to fix that situation, but that requires a bit more consideration. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0ea52e9f9065..5f68b968faa7 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -400,7 +400,9 @@ incomplete_rcv: cFYI(1, "call to reconnect done"); csocket = server->ssocket; continue; - } else if ((length == -ERESTARTSYS) || (length == -EAGAIN)) { + } else if (length == -ERESTARTSYS || + length == -EAGAIN || + length == -EINTR) { msleep(1); /* minimum sleep to prevent looping allowing socket to clear and app threads to set tcpStatus CifsNeedReconnect if server hung */ @@ -422,10 +424,6 @@ incomplete_rcv: and so simply return error to mount */ break; } - if (!try_to_freeze() && (length == -EINTR)) { - cFYI(1, "cifsd thread killed"); - break; - } cFYI(1, "Reconnect after unexpected peek error %d", length); cifs_reconnect(server); @@ -522,8 +520,7 @@ incomplete_rcv: total_read += length) { length = kernel_recvmsg(csocket, &smb_msg, &iov, 1, pdu_length - total_read, 0); - if ((server->tcpStatus == CifsExiting) || - (length == -EINTR)) { + if (server->tcpStatus == CifsExiting) { /* then will exit */ reconnect = 2; break; @@ -534,8 +531,9 @@ incomplete_rcv: /* Now we will reread sock */ reconnect = 1; break; - } else if ((length == -ERESTARTSYS) || - (length == -EAGAIN)) { + } else if (length == -ERESTARTSYS || + length == -EAGAIN || + length == -EINTR) { msleep(1); /* minimum sleep to prevent looping, allowing socket to clear and app threads to set tcpStatus -- cgit v1.2.2 From 7332f2a6217ee6925f83ef0e725013067ed316ba Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 3 Sep 2010 12:00:49 -0400 Subject: cifs: eliminate some more premature cifsd exits If the tcpStatus is still CifsNew, the main cifs_demultiplex_loop can break out prematurely in some cases. This is wrong as we will almost always have other structures with pointers to the TCP_Server_Info. If the main loop breaks under any other condition other than tcpStatus == CifsExiting, then it'll face a use-after-free situation. I don't see any reason to treat a CifsNew tcpStatus differently than CifsGood. I believe we'll still want to attempt to reconnect in either case. What should happen in those situations is that the MIDs get marked as MID_RETRY_NEEDED. This will make CIFSSMBNegotiate return -EAGAIN, and then the caller can retry the whole thing on a newly reconnected socket. If that fails again in the same way, the caller of cifs_get_smb_ses should tear down the TCP_Server_Info struct. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 41 ++++++++++++----------------------------- 1 file changed, 12 insertions(+), 29 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 5f68b968faa7..5fde83f0c75e 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -416,14 +416,6 @@ incomplete_rcv: } else continue; } else if (length <= 0) { - if (server->tcpStatus == CifsNew) { - cFYI(1, "tcp session abend after SMBnegprot"); - /* some servers kill the TCP session rather than - returning an SMB negprot error, in which - case reconnecting here is not going to help, - and so simply return error to mount */ - break; - } cFYI(1, "Reconnect after unexpected peek error %d", length); cifs_reconnect(server); @@ -464,27 +456,18 @@ incomplete_rcv: an error on SMB negprot response */ cFYI(1, "Negative RFC1002 Session Response Error 0x%x)", pdu_length); - if (server->tcpStatus == CifsNew) { - /* if nack on negprot (rather than - ret of smb negprot error) reconnecting - not going to help, ret error to mount */ - break; - } else { - /* give server a second to - clean up before reconnect attempt */ - msleep(1000); - /* always try 445 first on reconnect - since we get NACK on some if we ever - connected to port 139 (the NACK is - since we do not begin with RFC1001 - session initialize frame) */ - server->addr.sockAddr.sin_port = - htons(CIFS_PORT); - cifs_reconnect(server); - csocket = server->ssocket; - wake_up(&server->response_q); - continue; - } + /* give server a second to clean up */ + msleep(1000); + /* always try 445 first on reconnect since we get NACK + * on some if we ever connected to port 139 (the NACK + * is since we do not begin with RFC1001 session + * initialize frame) + */ + server->addr.sockAddr.sin_port = htons(CIFS_PORT); + cifs_reconnect(server); + csocket = server->ssocket; + wake_up(&server->response_q); + continue; } else if (temp != (char) 0) { cERROR(1, "Unknown RFC 1002 frame"); cifs_dump_mem(" Received Data: ", (char *)smb_buffer, -- cgit v1.2.2 From 32670396e7fc6e4f37451a69339968985461a374 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 3 Sep 2010 12:00:50 -0400 Subject: cifs: prevent possible memory corruption in cifs_demultiplex_thread cifs_demultiplex_thread sets the addr.sockAddr.sin_port without any regard for the socket family. While it may be that the error in question here never occurs on an IPv6 socket, it's probably best to be safe and set the port properly if it ever does. Break the port setting code out of cifs_fill_sockaddr and into a new function, and call that from cifs_demultiplex_thread. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/cifsproto.h | 3 ++- fs/cifs/connect.c | 3 ++- fs/cifs/netmisc.c | 22 +++++++++++++--------- 3 files changed, 17 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index f399b16cb7d5..1d60c655e3e0 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -87,8 +87,9 @@ extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); extern int decode_negTokenInit(unsigned char *security_blob, int length, struct TCP_Server_Info *server); extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); +extern int cifs_set_port(struct sockaddr *addr, const unsigned short int port); extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, - unsigned short int port); + const unsigned short int port); extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); extern void header_assemble(struct smb_hdr *, char /* command */ , const struct cifsTconInfo *, int /* length of diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 5fde83f0c75e..67dad54fbfa1 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -463,7 +463,8 @@ incomplete_rcv: * is since we do not begin with RFC1001 session * initialize frame) */ - server->addr.sockAddr.sin_port = htons(CIFS_PORT); + cifs_set_port((struct sockaddr *) + &server->addr.sockAddr, CIFS_PORT); cifs_reconnect(server); csocket = server->ssocket; wake_up(&server->response_q); diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index f97851119e6c..9aad47a2d62f 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -206,26 +206,30 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len) } int -cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, - const unsigned short int port) +cifs_set_port(struct sockaddr *addr, const unsigned short int port) { - if (!cifs_convert_address(dst, src, len)) - return 0; - - switch (dst->sa_family) { + switch (addr->sa_family) { case AF_INET: - ((struct sockaddr_in *)dst)->sin_port = htons(port); + ((struct sockaddr_in *)addr)->sin_port = htons(port); break; case AF_INET6: - ((struct sockaddr_in6 *)dst)->sin6_port = htons(port); + ((struct sockaddr_in6 *)addr)->sin6_port = htons(port); break; default: return 0; } - return 1; } +int +cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, + const unsigned short int port) +{ + if (!cifs_convert_address(dst, src, len)) + return 0; + return cifs_set_port(dst, port); +} + /***************************************************************************** convert a NT status code to a dos class/code *****************************************************************************/ -- cgit v1.2.2 From 39aa3cb3e8250db9188a6f1e3fb62ffa1a717678 Mon Sep 17 00:00:00 2001 From: Stefan Bader Date: Tue, 31 Aug 2010 15:52:27 +0200 Subject: mm: Move vma_stack_continue into mm.h So it can be used by all that need to check for that. Signed-off-by: Stefan Bader Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 439fc1f1c1c4..271afc48b9a5 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -224,7 +224,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) /* We don't show the stack guard page in /proc/maps */ start = vma->vm_start; if (vma->vm_flags & VM_GROWSDOWN) - start += PAGE_SIZE; + if (!vma_stack_continue(vma->vm_prev, vma->vm_start)) + start += PAGE_SIZE; seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", start, -- cgit v1.2.2 From 7a801ac6f5067539ceb5fad0fe90ec49fc156e47 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Thu, 9 Sep 2010 16:37:33 -0700 Subject: O_DIRECT: fix the splitting up of contiguous I/O commit c2c6ca4 (direct-io: do not merge logically non-contiguous requests) introduced a bug whereby all O_DIRECT I/Os were submitted a page at a time to the block layer. The problem is that the code expected dio->block_in_file to correspond to the current page in the dio. In fact, it corresponds to the previous page submitted via submit_page_section. This was purely an oversight, as the dio->cur_page_fs_offset field was introduced for just this purpose. This patch simply uses the correct variable when calculating whether there is a mismatch between contiguous logical blocks and contiguous physical blocks (as described in the comments). I also switched the if conditional following this check to an else if, to ensure that we never call dio_bio_submit twice for the same dio (in theory, this should not happen, anyway). I've tested this by running blktrace and verifying that a 64KB I/O was submitted as a single I/O. I also ran the patched kernel through xfstests' aio tests using xfs, ext4 (with 1k and 4k block sizes) and btrfs and verified that there were no regressions as compared to an unpatched kernel. Signed-off-by: Jeff Moyer Acked-by: Josef Bacik Cc: Christoph Hellwig Cc: Chris Mason Cc: [2.6.35.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/direct-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/direct-io.c b/fs/direct-io.c index 51f270b479b6..48d74c7391d1 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -634,7 +634,7 @@ static int dio_send_cur_page(struct dio *dio) int ret = 0; if (dio->bio) { - loff_t cur_offset = dio->block_in_file << dio->blkbits; + loff_t cur_offset = dio->cur_page_fs_offset; loff_t bio_next_offset = dio->logical_offset_in_bio + dio->bio->bi_size; @@ -659,7 +659,7 @@ static int dio_send_cur_page(struct dio *dio) * Submit now if the underlying fs is about to perform a * metadata read */ - if (dio->boundary) + else if (dio->boundary) dio_bio_submit(dio); } -- cgit v1.2.2 From ed430fec756ad65f7cfba24f8ad17c3d5a403290 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 9 Sep 2010 16:37:36 -0700 Subject: proc: export uncached bit properly in /proc/kpageflags Fix the left-over old ifdef for PG_uncached in /proc/kpageflags. Now it's used by x86, too. Signed-off-by: Takashi Iwai Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/page.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/page.c b/fs/proc/page.c index 180cf5a0bd67..3b8b45660331 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -146,7 +146,7 @@ u64 stable_page_flags(struct page *page) u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison); #endif -#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR +#ifdef CONFIG_ARCH_USES_PG_UNCACHED u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached); #endif -- cgit v1.2.2 From ee3aebdd8f5f8eac41c25c80ceee3d728f920f3b Mon Sep 17 00:00:00 2001 From: Jan Sembera Date: Thu, 9 Sep 2010 16:37:54 -0700 Subject: binfmt_misc: fix binfmt_misc priority Commit 74641f584da ("alpha: binfmt_aout fix") (May 2009) introduced a regression - binfmt_misc is now consulted after binfmt_elf, which will unfortunately break ia32el. ia32 ELF binaries on ia64 used to be matched using binfmt_misc and executed using wrapper. As 32bit binaries are now matched by binfmt_elf before bindmt_misc kicks in, the wrapper is ignored. The fix increases precedence of binfmt_misc to the original state. Signed-off-by: Jan Sembera Cc: Ivan Kokshaysky Cc: Al Viro Cc: Richard Henderson [2.6.everything.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index a7528b913936..fd0cc0bf9a40 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -724,7 +724,7 @@ static int __init init_misc_binfmt(void) { int err = register_filesystem(&bm_fs_type); if (!err) { - err = register_binfmt(&misc_format); + err = insert_binfmt(&misc_format); if (err) unregister_filesystem(&bm_fs_type); } -- cgit v1.2.2 From 3ab04d5cf9736b7a4e9dfcf28285d8663b01aa0e Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 9 Sep 2010 16:38:12 -0700 Subject: vfs: take O_NONBLOCK out of the O_* uniqueness test O_NONBLOCK on parisc has a dual value: #define O_NONBLOCK 000200004 /* HPUX has separate NDELAY & NONBLOCK */ It is caught by the O_* bits uniqueness check and leads to a parisc compile error. The fix would be to take O_NONBLOCK out. Signed-off-by: Wu Fengguang Signed-off-by: James Bottomley Cc: Jamie Lokier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fcntl.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/fcntl.c b/fs/fcntl.c index 6769fd0f35b8..f8cc34f542c3 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -769,11 +769,15 @@ EXPORT_SYMBOL(kill_fasync); static int __init fcntl_init(void) { - /* please add new bits here to ensure allocation uniqueness */ - BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( + /* + * Please add new bits here to ensure allocation uniqueness. + * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY + * is defined as O_NONBLOCK on some platforms and not on others. + */ + BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( O_RDONLY | O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_NOCTTY | - O_TRUNC | O_APPEND | O_NONBLOCK | + O_TRUNC | O_APPEND | /* O_NONBLOCK | */ __O_SYNC | O_DSYNC | FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | -- cgit v1.2.2 From eee743fd7eac9f2ea69ad06d093dfb5a12538fe5 Mon Sep 17 00:00:00 2001 From: "Jorge Boncompte [DTI2]" Date: Thu, 9 Sep 2010 16:38:19 -0700 Subject: minix: fix regression in minix_mkdir() Commit 9eed1fb721c ("minix: replace inode uid,gid,mode init with helper") broke directory creation on minix filesystems. Fix it by passing the needed mode flag to inode init helper. Signed-off-by: Jorge Boncompte [DTI2] Cc: Dmitry Monakhov Cc: Al Viro Cc: [2.6.35.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/minix/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/minix/namei.c b/fs/minix/namei.c index e20ee85955d1..f3f3578393a4 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -115,7 +115,7 @@ static int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode) inode_inc_link_count(dir); - inode = minix_new_inode(dir, mode, &err); + inode = minix_new_inode(dir, S_IFDIR | mode, &err); if (!inode) goto out_dir; -- cgit v1.2.2 From a122eb2fdfd78b58c6dd992d6f4b1aaef667eef9 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Mon, 6 Sep 2010 18:24:57 -0400 Subject: xfs: prevent reading uninitialized stack memory The XFS_IOC_FSGETXATTR ioctl allows unprivileged users to read 12 bytes of uninitialized stack memory, because the fsxattr struct declared on the stack in xfs_ioc_fsgetxattr() does not alter (or zero) the 12-byte fsx_pad member before copying it back to the user. This patch takes care of it. Signed-off-by: Dan Rosenberg Reviewed-by: Eric Sandeen Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_ioctl.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 4fec427b83ef..3b9e626f7cd1 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -785,6 +785,8 @@ xfs_ioc_fsgetxattr( { struct fsxattr fa; + memset(&fa, 0, sizeof(struct fsxattr)); + xfs_ilock(ip, XFS_ILOCK_SHARED); fa.fsx_xflags = xfs_ip2xflags(ip); fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; -- cgit v1.2.2 From 1b528181b2ffa14721fb28ad1bd539fe1732c583 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Tue, 7 Sep 2010 19:35:49 -0700 Subject: setup_arg_pages: diagnose excessive argument size The CONFIG_STACK_GROWSDOWN variant of setup_arg_pages() does not check the size of the argument/environment area on the stack. When it is unworkably large, shift_arg_pages() hits its BUG_ON. This is exploitable with a very large RLIMIT_STACK limit, to create a crash pretty easily. Check that the initial stack is not too large to make it possible to map in any executable. We're not checking that the actual executable (or intepreter, for binfmt_elf) will fit. So those mappings might clobber part of the initial stack mapping. But that is just userland lossage that userland made happen, not a kernel problem. Signed-off-by: Roland McGrath Reviewed-by: KOSAKI Motohiro Signed-off-by: Linus Torvalds --- fs/exec.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 2d9455282744..1b63237fc6dc 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -594,6 +594,11 @@ int setup_arg_pages(struct linux_binprm *bprm, #else stack_top = arch_align_stack(stack_top); stack_top = PAGE_ALIGN(stack_top); + + if (unlikely(stack_top < mmap_min_addr) || + unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr)) + return -ENOMEM; + stack_shift = vma->vm_end - stack_top; bprm->p -= stack_shift; -- cgit v1.2.2 From 7993bc1f4663c0db67bb8f0d98e6678145b387cd Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Tue, 7 Sep 2010 19:36:28 -0700 Subject: execve: improve interactivity with large arguments This adds a preemption point during the copying of the argument and environment strings for execve, in copy_strings(). There is already a preemption point in the count() loop, so this doesn't add any new points in the abstract sense. When the total argument+environment strings are very large, the time spent copying them can be much more than a normal user time slice. So this change improves the interactivity of the rest of the system when one process is doing an execve with very large arguments. Signed-off-by: Roland McGrath Reviewed-by: KOSAKI Motohiro Signed-off-by: Linus Torvalds --- fs/exec.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 1b63237fc6dc..6f2d777431a8 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -419,6 +419,8 @@ static int copy_strings(int argc, const char __user *const __user *argv, while (len > 0) { int offset, bytes_to_copy; + cond_resched(); + offset = pos % PAGE_SIZE; if (offset == 0) offset = PAGE_SIZE; -- cgit v1.2.2 From 9aea5a65aa7a1af9a4236dfaeb0088f1624f9919 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Tue, 7 Sep 2010 19:37:06 -0700 Subject: execve: make responsive to SIGKILL with large arguments An execve with a very large total of argument/environment strings can take a really long time in the execve system call. It runs uninterruptibly to count and copy all the strings. This change makes it abort the exec quickly if sent a SIGKILL. Note that this is the conservative change, to interrupt only for SIGKILL, by using fatal_signal_pending(). It would be perfectly correct semantics to let any signal interrupt the string-copying in execve, i.e. use signal_pending() instead of fatal_signal_pending(). We'll save that change for later, since it could have user-visible consequences, such as having a timer set too quickly make it so that an execve can never complete, though it always happened to work before. Signed-off-by: Roland McGrath Reviewed-by: KOSAKI Motohiro Signed-off-by: Linus Torvalds --- fs/exec.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 6f2d777431a8..828dd2461d6b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -376,6 +376,9 @@ static int count(const char __user * const __user * argv, int max) argv++; if (i++ >= max) return -E2BIG; + + if (fatal_signal_pending(current)) + return -ERESTARTNOHAND; cond_resched(); } } @@ -419,6 +422,10 @@ static int copy_strings(int argc, const char __user *const __user *argv, while (len > 0) { int offset, bytes_to_copy; + if (fatal_signal_pending(current)) { + ret = -ERESTARTNOHAND; + goto out; + } cond_resched(); offset = pos % PAGE_SIZE; -- cgit v1.2.2 From 51749e47e191db8e588ad5cebea731caf7b705d7 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 8 Sep 2010 09:00:22 +0000 Subject: xfs: log IO completion workqueue is a high priority queue The workqueue implementation in 2.6.36-rcX has changed, resulting in the workqueues no longer having dedicated threads for work processing. This has caused severe livelocks under heavy parallel create workloads because the log IO completions have been getting held up behind metadata IO completions. Hence log commits would stall, memory allocation would stall because pages could not be cleaned, and lock contention on the AIL during inode IO completion processing was being seen to slow everything down even further. By making the log Io completion workqueue a high priority workqueue, they are queued ahead of all data/metadata IO completions and processed before the data/metadata completions. Hence the log never gets stalled, and operations needed to clean memory can continue as quickly as possible. This avoids the livelock conditions and allos the system to keep running under heavy load as per normal. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_buf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index d72cf2bb054a..286e36e21dae 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1932,7 +1932,8 @@ xfs_buf_init(void) if (!xfs_buf_zone) goto out; - xfslogd_workqueue = create_workqueue("xfslogd"); + xfslogd_workqueue = alloc_workqueue("xfslogd", + WQ_RESCUER | WQ_HIGHPRI, 1); if (!xfslogd_workqueue) goto out_free_buf_zone; -- cgit v1.2.2 From 5e64b0d9e86ffff8b299556341d85319117539e9 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Tue, 7 Sep 2010 13:30:05 +0800 Subject: ocfs2/lockdep: Move ip_xattr_sem out of ocfs2_xattr_get_nolock. As the name shows, we shouldn't have any lock in ocfs2_xattr_get_nolock. so lift ip_xattr_sem to the caller. This should be safe for us since the only 2 callers are: 1. ocfs2_xattr_get which will lock the resources. 2. ocfs2_mknod which don't need this locking. And this also resolves the following lockdep warning. ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.35+ #5 ------------------------------------------------------- reflink/30027 is trying to acquire lock: (&oi->ip_alloc_sem){+.+.+.}, at: [] ocfs2_reflink_ioctl+0x69a/0x1226 [ocfs2] but task is already holding lock: (&oi->ip_xattr_sem){++++..}, at: [] ocfs2_reflink_ioctl+0x68b/0x1226 [ocfs2] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&oi->ip_xattr_sem){++++..}: [] __lock_acquire+0x79a/0x7f1 [] lock_acquire+0xc6/0xed [] down_read+0x34/0x47 [] ocfs2_xattr_get_nolock+0xa0/0x4e6 [ocfs2] [] ocfs2_get_acl_nolock+0x5c/0x132 [ocfs2] [] ocfs2_init_acl+0x60/0x243 [ocfs2] [] ocfs2_mknod+0xae8/0xfea [ocfs2] [] ocfs2_create+0x9d/0x105 [ocfs2] [] vfs_create+0x9b/0xf4 [] do_last+0x2fd/0x5be [] do_filp_open+0x1fb/0x572 [] do_sys_open+0x5a/0xe7 [] sys_open+0x1b/0x1d [] system_call_fastpath+0x16/0x1b -> #2 (jbd2_handle){+.+...}: [] __lock_acquire+0x79a/0x7f1 [] lock_acquire+0xc6/0xed [] start_this_handle+0x4a3/0x4bc [jbd2] [] jbd2__journal_start+0xba/0xee [jbd2] [] jbd2_journal_start+0xe/0x10 [jbd2] [] ocfs2_start_trans+0xb7/0x19b [ocfs2] [] ocfs2_mknod+0x73e/0xfea [ocfs2] [] ocfs2_create+0x9d/0x105 [ocfs2] [] vfs_create+0x9b/0xf4 [] do_last+0x2fd/0x5be [] do_filp_open+0x1fb/0x572 [] do_sys_open+0x5a/0xe7 [] sys_open+0x1b/0x1d [] system_call_fastpath+0x16/0x1b -> #1 (&journal->j_trans_barrier){.+.+..}: [] __lock_acquire+0x79a/0x7f1 [] lock_release_non_nested+0x1e5/0x24b [] lock_release+0x158/0x17a [] __mutex_unlock_slowpath+0xbf/0x11b [] mutex_unlock+0x9/0xb [] ocfs2_free_ac_resource+0x31/0x67 [ocfs2] [] ocfs2_free_alloc_context+0x11/0x1d [ocfs2] [] ocfs2_write_begin_nolock+0x141e/0x159b [ocfs2] [] ocfs2_write_begin+0x11e/0x1e7 [ocfs2] [] generic_file_buffered_write+0x10c/0x210 [] ocfs2_file_aio_write+0x4cc/0x6d3 [ocfs2] [] do_sync_write+0xc2/0x106 [] vfs_write+0xae/0x131 [] sys_write+0x47/0x6f [] system_call_fastpath+0x16/0x1b -> #0 (&oi->ip_alloc_sem){+.+.+.}: [] validate_chain+0x727/0xd68 [] __lock_acquire+0x79a/0x7f1 [] lock_acquire+0xc6/0xed [] down_write+0x31/0x52 [] ocfs2_reflink_ioctl+0x69a/0x1226 [ocfs2] [] ocfs2_ioctl+0x61a/0x656 [ocfs2] [] vfs_ioctl+0x2a/0x9d [] do_vfs_ioctl+0x45d/0x4ae [] sys_ioctl+0x57/0x7a [] system_call_fastpath+0x16/0x1b Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/xattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index d03469f61801..06fa5e77c40e 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1286,13 +1286,11 @@ int ocfs2_xattr_get_nolock(struct inode *inode, xis.inode_bh = xbs.inode_bh = di_bh; di = (struct ocfs2_dinode *)di_bh->b_data; - down_read(&oi->ip_xattr_sem); ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, buffer_size, &xis); if (ret == -ENODATA && di->i_xattr_loc) ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, buffer_size, &xbs); - up_read(&oi->ip_xattr_sem); return ret; } @@ -1316,8 +1314,10 @@ static int ocfs2_xattr_get(struct inode *inode, mlog_errno(ret); return ret; } + down_read(&OCFS2_I(inode)->ip_xattr_sem); ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, name, buffer, buffer_size); + up_read(&OCFS2_I(inode)->ip_xattr_sem); ocfs2_inode_unlock(inode, 0); -- cgit v1.2.2 From 07eaac9438b13ec0b863111698b91ccec8f3b8d4 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Tue, 7 Sep 2010 13:30:06 +0800 Subject: ocfs2: Fix lockdep warning in reflink. This patch change mutex_lock to a new subclass and add a new inode lock subclass for the target inode which caused this lockdep warning. ============================================= [ INFO: possible recursive locking detected ] 2.6.35+ #5 --------------------------------------------- reflink/11086 is trying to acquire lock: (Meta){+++++.}, at: [] ocfs2_reflink_ioctl+0x898/0x1229 [ocfs2] but task is already holding lock: (Meta){+++++.}, at: [] ocfs2_reflink_ioctl+0x5d3/0x1229 [ocfs2] other info that might help us debug this: 6 locks held by reflink/11086: #0: (&sb->s_type->i_mutex_key#15/1){+.+.+.}, at: [] lookup_create+0x26/0x97 #1: (&sb->s_type->i_mutex_key#15){+.+.+.}, at: [] ocfs2_reflink_ioctl+0x4d3/0x1229 [ocfs2] #2: (Meta){+++++.}, at: [] ocfs2_reflink_ioctl+0x5d3/0x1229 [ocfs2] #3: (&oi->ip_xattr_sem){+.+.+.}, at: [] ocfs2_reflink_ioctl+0x68b/0x1229 [ocfs2] #4: (&oi->ip_alloc_sem){+.+.+.}, at: [] ocfs2_reflink_ioctl+0x69a/0x1229 [ocfs2] #5: (&sb->s_type->i_mutex_key#15/2){+.+...}, at: [] ocfs2_reflink_ioctl+0x882/0x1229 [ocfs2] stack backtrace: Pid: 11086, comm: reflink Not tainted 2.6.35+ #5 Call Trace: [] validate_chain+0x56e/0xd68 [] ? mark_held_locks+0x49/0x69 [] __lock_acquire+0x79a/0x7f1 [] lock_acquire+0xc6/0xed [] ? ocfs2_reflink_ioctl+0x898/0x1229 [ocfs2] [] __ocfs2_cluster_lock+0x975/0xa0d [ocfs2] [] ? ocfs2_reflink_ioctl+0x898/0x1229 [ocfs2] [] ? ocfs2_wait_for_recovery+0x15/0x8a [ocfs2] [] ocfs2_inode_lock_full_nested+0x1ac/0xdc5 [ocfs2] [] ? ocfs2_reflink_ioctl+0x898/0x1229 [ocfs2] [] ? trace_hardirqs_on_caller+0x10b/0x12f [] ? debug_mutex_free_waiter+0x4f/0x53 [] ocfs2_reflink_ioctl+0x898/0x1229 [ocfs2] [] ? ocfs2_file_lock_res_init+0x66/0x78 [ocfs2] [] ? might_fault+0x40/0x8d [] ocfs2_ioctl+0x61a/0x656 [ocfs2] [] ? mntput_no_expire+0x1d/0xb0 [] ? path_put+0x2c/0x31 [] vfs_ioctl+0x2a/0x9d [] do_vfs_ioctl+0x45d/0x4ae [] ? _raw_spin_unlock+0x26/0x2a [] ? sysret_check+0x27/0x62 [] sys_ioctl+0x57/0x7a [] system_call_fastpath+0x16/0x1b Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/dlmglue.h | 1 + fs/ocfs2/refcounttree.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index d1ce48e1b3d6..1d596d8c4a4a 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -84,6 +84,7 @@ enum { OI_LS_PARENT, OI_LS_RENAME1, OI_LS_RENAME2, + OI_LS_REFLINK_TARGET, }; int ocfs2_dlm_init(struct ocfs2_super *osb); diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 0afeda83120f..efdd75607406 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4201,8 +4201,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry, goto out; } - mutex_lock(&new_inode->i_mutex); - ret = ocfs2_inode_lock(new_inode, &new_bh, 1); + mutex_lock_nested(&new_inode->i_mutex, I_MUTEX_CHILD); + ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1, + OI_LS_REFLINK_TARGET); if (ret) { mlog_errno(ret); goto out_unlock; -- cgit v1.2.2 From 0f4da216b8c3c35c90ecd18e1899c6f125957c2b Mon Sep 17 00:00:00 2001 From: Tristan Ye Date: Wed, 8 Sep 2010 17:12:38 +0800 Subject: Ocfs2: Re-access the journal after ocfs2_insert_extent() in dxdir codes. In ocfs2_dx_dir_rebalance(), we need to rejournal_acess the blocks after calling ocfs2_insert_extent() since growing an extent tree may trigger ocfs2_extend_trans(), which makes previous journal_access meaningless. Signed-off-by: Tristan Ye Signed-off-by: Joel Becker --- fs/ocfs2/dir.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index f04ebcfffc4a..c49f6de0e7ab 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -3931,6 +3931,15 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, goto out_commit; } + cpos = split_hash; + ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle, + data_ac, meta_ac, new_dx_leaves, + num_dx_leaves); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + for (i = 0; i < num_dx_leaves; i++) { ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), orig_dx_leaves[i], @@ -3939,15 +3948,14 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, mlog_errno(ret); goto out_commit; } - } - cpos = split_hash; - ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle, - data_ac, meta_ac, new_dx_leaves, - num_dx_leaves); - if (ret) { - mlog_errno(ret); - goto out_commit; + ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), + new_dx_leaves[i], + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out_commit; + } } ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf, -- cgit v1.2.2 From 228ac6357718df2d5c8d70210fa51b2225aab5ee Mon Sep 17 00:00:00 2001 From: Tristan Ye Date: Fri, 10 Sep 2010 10:16:33 +0800 Subject: Ocfs2: Handle empty list in lockres_seq_start() for dlmdebug.c This patch tries to handle the case in which list 'dlm->tracking_list' is empty, to avoid accessing an invalid pointer. It fixes the following oops: http://oss.oracle.com/bugzilla/show_bug.cgi?id=1287 Signed-off-by: Tristan Ye Signed-off-by: Joel Becker --- fs/ocfs2/dlm/dlmdebug.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 5efdd37dfe48..901ca52bf86b 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -636,8 +636,14 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos) spin_lock(&dlm->track_lock); if (oldres) track_list = &oldres->tracking; - else + else { track_list = &dlm->tracking_list; + if (list_empty(track_list)) { + dl = NULL; + spin_unlock(&dlm->track_lock); + goto bail; + } + } list_for_each_entry(res, track_list, tracking) { if (&res->tracking == &dlm->tracking_list) @@ -660,6 +666,7 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos) } else dl = NULL; +bail: /* passed to seq_show */ return dl; } -- cgit v1.2.2 From ca04d9c3ec721e474f00992efc1b1afb625507f5 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 26 Aug 2010 16:12:01 -0700 Subject: ceph: fix null pointer deref on anon root dentry release When we release a root dentry, particularly after a splice, the parent (actually our) inode was evaluating to NULL and was getting dereferenced by ceph_snap(). This is reproduced by something as simple as mount -t ceph monhost:/a/b mnt mount -t ceph monhost:/a mnt2 ls mnt2 A splice_dentry() would kill the old 'b' inode's root dentry, and we'd crash while releasing it. Fix by checking for both the ROOT and NULL cases explicitly. We only need to invalidate the parent dir when we have a correct parent to invalidate. Signed-off-by: Sage Weil --- fs/ceph/dir.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 6e4f43ff23ec..a1986eb52045 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1021,11 +1021,15 @@ out_touch: static void ceph_dentry_release(struct dentry *dentry) { struct ceph_dentry_info *di = ceph_dentry(dentry); - struct inode *parent_inode = dentry->d_parent->d_inode; - u64 snapid = ceph_snap(parent_inode); + struct inode *parent_inode = NULL; + u64 snapid = CEPH_NOSNAP; + if (!IS_ROOT(dentry)) { + parent_inode = dentry->d_parent->d_inode; + if (parent_inode) + snapid = ceph_snap(parent_inode); + } dout("dentry_release %p parent %p\n", dentry, parent_inode); - if (parent_inode && snapid != CEPH_SNAPDIR) { struct ceph_inode_info *ci = ceph_inode(parent_inode); -- cgit v1.2.2 From 3d4401d9d0aef5c40706350685ddea3df6708496 Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Fri, 3 Sep 2010 12:57:11 -0700 Subject: ceph: fix pagelist kunmap tail A wrong parameter was passed to the kunmap. Signed-off-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/pagelist.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c index b6859f47d364..46a368b6dce5 100644 --- a/fs/ceph/pagelist.c +++ b/fs/ceph/pagelist.c @@ -5,10 +5,18 @@ #include "pagelist.h" +static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) +{ + struct page *page = list_entry(pl->head.prev, struct page, + lru); + kunmap(page); +} + int ceph_pagelist_release(struct ceph_pagelist *pl) { if (pl->mapped_tail) - kunmap(pl->mapped_tail); + ceph_pagelist_unmap_tail(pl); + while (!list_empty(&pl->head)) { struct page *page = list_first_entry(&pl->head, struct page, lru); @@ -26,7 +34,7 @@ static int ceph_pagelist_addpage(struct ceph_pagelist *pl) pl->room += PAGE_SIZE; list_add_tail(&page->lru, &pl->head); if (pl->mapped_tail) - kunmap(pl->mapped_tail); + ceph_pagelist_unmap_tail(pl); pl->mapped_tail = kmap(page); return 0; } -- cgit v1.2.2 From 3612abbd5df6baa9ca3e0777c6c8646e202d3f66 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 7 Sep 2010 15:59:27 -0700 Subject: ceph: fix reconnect encoding for old servers Fix the reconnect encoding to encode the cap record when the MDS does not have the FLOCK capability (i.e., pre v0.22). Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index f091b1351786..fad95f8f2608 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2374,6 +2374,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, num_fcntl_locks, num_flock_locks); unlock_kernel(); + } else { + err = ceph_pagelist_append(pagelist, &rec, reclen); } out_free: -- cgit v1.2.2 From a77d9f7dce7600058d56f0670ed29d77abffcde2 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 11 Sep 2010 10:55:25 -0700 Subject: ceph: fix file offset wrapping at 4GB on 32-bit archs Cast the value before shifting so that we don't run out of bits with a 32-bit unsigned long. This fixes wrapping of high file offsets into the low 4GB of a file on disk, and the subsequent data corruption for large files. Signed-off-by: Sage Weil --- fs/ceph/addr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4cfce1ee31fa..50461b8c23a4 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -766,7 +766,8 @@ get_more_pages: /* ok */ if (locked_pages == 0) { /* prepare async write request */ - offset = page->index << PAGE_CACHE_SHIFT; + offset = (unsigned long long)page->index + << PAGE_CACHE_SHIFT; len = wsize; req = ceph_osdc_new_request(&client->osdc, &ci->i_layout, -- cgit v1.2.2 From b1bde04c6d9a120dec602cc8a70b8a7f21600883 Mon Sep 17 00:00:00 2001 From: Fabio Olive Leite Date: Sun, 12 Sep 2010 19:55:25 -0400 Subject: Remove incorrect do_vfs_lock message The do_vfs_lock function on fs/nfs/file.c is only called if NLM is not being used, via the -onolock mount option. Therefore it cannot really be "out of sync with lock manager" when the local locking function called returns an error, as there will be no corresponding call to the NLM. For details, simply check the if/else on do_setlk and do_unlk on fs/nfs/file.c. Signed-Off-By: Fabio Olive Leite Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index eb51bd6201da..05bf3c0dc751 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -723,10 +723,6 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl) default: BUG(); } - if (res < 0) - dprintk(KERN_WARNING "%s: VFS is out of sync with lock manager" - " - error %d!\n", - __func__, res); return res; } -- cgit v1.2.2 From b20d37ca9561711c6a3c4b859c2855f49565e061 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 12 Sep 2010 19:55:26 -0400 Subject: NFS: Fix a typo in nfs_sockaddr_match_ipaddr6 Reported-by: Ben Greear Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- fs/nfs/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 4e7df2adb212..e7340729af89 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -275,7 +275,7 @@ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1, sin1->sin6_scope_id != sin2->sin6_scope_id) return 0; - return ipv6_addr_equal(&sin1->sin6_addr, &sin1->sin6_addr); + return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr); } #else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1, -- cgit v1.2.2 From fbf3fdd2443965d9ba6fb4b5fecd1f6e0847218f Mon Sep 17 00:00:00 2001 From: Menyhart Zoltan Date: Sun, 12 Sep 2010 19:55:26 -0400 Subject: statfs() gives ESTALE error Hi, An NFS client executes a statfs("file", &buff) call. "file" exists / existed, the client has read / written it, but it has already closed it. user_path(pathname, &path) looks up "file" successfully in the directory-cache and restarts the aging timer of the directory-entry. Even if "file" has already been removed from the server, because the lookupcache=positive option I use, keeps the entries valid for a while. nfs_statfs() returns ESTALE if "file" has already been removed from the server. If the user application repeats the statfs("file", &buff) call, we are stuck: "file" remains young forever in the directory-cache. Signed-off-by: Zoltan Menyhart Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- fs/nfs/super.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ec3966e4706b..f4cbf0c306c6 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -431,7 +431,15 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) goto out_err; error = server->nfs_client->rpc_ops->statfs(server, fh, &res); + if (unlikely(error == -ESTALE)) { + struct dentry *pd_dentry; + pd_dentry = dget_parent(dentry); + if (pd_dentry != NULL) { + nfs_zap_caches(pd_dentry->d_inode); + dput(pd_dentry); + } + } nfs_free_fattr(res.fattr); if (error < 0) goto out_err; -- cgit v1.2.2 From 827e3457022d0bb0b1bb8a0eb88501876fe7dcf0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 12 Sep 2010 19:57:50 -0400 Subject: SUNRPC: Fix the NFSv4 and RPCSEC_GSS Kconfig dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The NFSv4 client's callback server calls svc_gss_principal(), which is defined in the auth_rpcgss.ko The NFSv4 server has the same dependency, and in addition calls svcauth_gss_flavor(), gss_mech_get_by_pseudoflavor(), gss_pseudoflavor_to_service() and gss_mech_put() from the same module. The module auth_rpcgss itself has no dependencies aside from sunrpc, so we only need to select RPCSEC_GSS. Reported-by: Uwe Kleine-König Signed-off-by: Trond Myklebust --- fs/nfs/Kconfig | 1 + fs/nfsd/Kconfig | 1 + 2 files changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 6c2aad49d731..f7e13db613cb 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -63,6 +63,7 @@ config NFS_V3_ACL config NFS_V4 bool "NFS client support for NFS version 4" depends on NFS_FS + select SUNRPC_GSS help This option enables support for version 4 of the NFS protocol (RFC 3530) in the kernel's NFS client. diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 95932f523aef..4264377552e2 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -69,6 +69,7 @@ config NFSD_V4 depends on NFSD && PROC_FS && EXPERIMENTAL select NFSD_V3 select FS_POSIX_ACL + select SUNRPC_GSS help This option enables support in your system's NFS server for version 4 of the NFS protocol (RFC 3530). -- cgit v1.2.2 From 62b2be591a9b12c550308ef7718a31abfc815b50 Mon Sep 17 00:00:00 2001 From: Latchesar Ionkov Date: Tue, 24 Aug 2010 18:13:59 +0000 Subject: fs/9p, net/9p: memory leak fixes Four memory leak fixes in the 9P code. Signed-off-by: Latchesar Ionkov Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_inode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index c7c23eab9440..84159cf9c521 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1128,6 +1128,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb); generic_fillattr(dentry->d_inode, stat); + p9stat_free(st); kfree(st); return 0; } @@ -1489,6 +1490,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) retval = strnlen(buffer, buflen); done: + p9stat_free(st); kfree(st); return retval; } -- cgit v1.2.2 From 5c25f347a7b00b2ebe0a55c4a3cfe4c3e1e8725e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 24 Aug 2010 10:30:49 +0000 Subject: fs/9p: Fix error handling in v9fs_get_sb This was introduced by 7cadb63d58a932041afa3f957d5cbb6ce69dcee5 Signed-off-by: Aneesh Kumar K.V Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_super.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index f9311077de68..1d12ba0ed3db 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -122,6 +122,10 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, fid = v9fs_session_init(v9ses, dev_name, data); if (IS_ERR(fid)) { retval = PTR_ERR(fid); + /* + * we need to call session_close to tear down some + * of the data structure setup by session_init + */ goto close_session; } @@ -144,7 +148,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, retval = -ENOMEM; goto release_sb; } - sb->s_root = root; if (v9fs_proto_dotl(v9ses)) { @@ -152,7 +155,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); if (IS_ERR(st)) { retval = PTR_ERR(st); - goto clunk_fid; + goto release_sb; } v9fs_stat2inode_dotl(st, root->d_inode); @@ -162,7 +165,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, st = p9_client_stat(fid); if (IS_ERR(st)) { retval = PTR_ERR(st); - goto clunk_fid; + goto release_sb; } root->d_inode->i_ino = v9fs_qid2ino(&st->qid); @@ -174,19 +177,24 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, v9fs_fid_add(root, fid); -P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); + P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); simple_set_mnt(mnt, sb); return 0; clunk_fid: p9_client_clunk(fid); - close_session: v9fs_session_close(v9ses); kfree(v9ses); return retval; - release_sb: + /* + * we will do the session_close and root dentry release + * in the below call. But we need to clunk fid, because we haven't + * attached the fid to dentry so it won't get clunked + * automatically. + */ + p9_client_clunk(fid); deactivate_locked_super(sb); return retval; } -- cgit v1.2.2 From 62726a7ab3a6a3624256172af055ff0a38c6ffa2 Mon Sep 17 00:00:00 2001 From: jvrao Date: Wed, 25 Aug 2010 16:26:21 +0000 Subject: 9p: Check for NULL fid in v9fs_dir_release() NULL fid should be handled in cases where we endup calling v9fs_dir_release() before even we instantiate the fid in filp. Signed-off-by: Venkateswararao Jujjuri Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_dir.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 16c8a2a98c1b..899f168fd19c 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -292,9 +292,11 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) fid = filp->private_data; P9_DPRINTK(P9_DEBUG_VFS, - "inode: %p filp: %p fid: %d\n", inode, filp, fid->fid); + "v9fs_dir_release: inode: %p filp: %p fid: %d\n", + inode, filp, fid ? fid->fid : -1); filemap_write_and_wait(inode->i_mapping); - p9_client_clunk(fid); + if (fid) + p9_client_clunk(fid); return 0; } -- cgit v1.2.2 From 3c30750ffafbc32af040b09f777b67aa2486b063 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 30 Aug 2010 16:04:35 +0000 Subject: fs/9p: Use the correct dentry operations We should use the cached dentry operation only if caching mode is enabled Signed-off-by: Aneesh Kumar K.V Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_inode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 84159cf9c521..a6990bbf6056 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -730,7 +730,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode, P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + if (v9ses->cache) + dentry->d_op = &v9fs_cached_dentry_operations; + else + dentry->d_op = &v9fs_dentry_operations; d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) -- cgit v1.2.2 From 1d76e3135733a06aa12bb35891c05f306b27b2d6 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 30 Aug 2010 17:43:07 +0000 Subject: fs/9p: Don't use dotl version of mknod for dotu inode operations We should not use dotlversion for the dotu inode operations Signed-off-by: Aneesh Kumar K.V Signed-off-by: Eric Van Hensbergen --- fs/9p/vfs_inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index a6990bbf6056..9e670d527646 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1947,7 +1947,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = { .unlink = v9fs_vfs_unlink, .mkdir = v9fs_vfs_mkdir, .rmdir = v9fs_vfs_rmdir, - .mknod = v9fs_vfs_mknod_dotl, + .mknod = v9fs_vfs_mknod, .rename = v9fs_vfs_rename, .getattr = v9fs_vfs_getattr, .setattr = v9fs_vfs_setattr, -- cgit v1.2.2 From 467c525109d5d542d7d416b0c11bdd54610fe2f4 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 13 Sep 2010 11:39:20 -0700 Subject: ceph: fix dn offset during readdir_prepopulate When adding the readdir results to the cache, ceph_set_dentry_offset was clobbered our just-set offset. This can cause the readdir result offsets to get out of sync with the server. Add an argument to the helper so that it does not. This bug was introduced by 1cd3935bedccf592d44343890251452a6dd74fc4. Signed-off-by: Sage Weil --- fs/ceph/inode.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e7cca414da03..62377ec37edf 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -845,7 +845,7 @@ static void ceph_set_dentry_offset(struct dentry *dn) * the caller) if we fail. */ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, - bool *prehash) + bool *prehash, bool set_offset) { struct dentry *realdn; @@ -877,7 +877,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, } if ((!prehash || *prehash) && d_unhashed(dn)) d_rehash(dn); - ceph_set_dentry_offset(dn); + if (set_offset) + ceph_set_dentry_offset(dn); out: return dn; } @@ -1062,7 +1063,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, d_delete(dn); goto done; } - dn = splice_dentry(dn, in, &have_lease); + dn = splice_dentry(dn, in, &have_lease, true); if (IS_ERR(dn)) { err = PTR_ERR(dn); goto done; @@ -1105,7 +1106,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, goto done; } dout(" linking snapped dir %p to dn %p\n", in, dn); - dn = splice_dentry(dn, in, NULL); + dn = splice_dentry(dn, in, NULL, true); if (IS_ERR(dn)) { err = PTR_ERR(dn); goto done; @@ -1237,7 +1238,7 @@ retry_lookup: err = PTR_ERR(in); goto out; } - dn = splice_dentry(dn, in, NULL); + dn = splice_dentry(dn, in, NULL, false); if (IS_ERR(dn)) dn = NULL; } -- cgit v1.2.2 From 8bef9239ee1a42eb37d3f83bacf6a75f019c028d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 14 Sep 2010 15:45:44 -0700 Subject: ceph: correctly set 'follows' in flushsnap messages The 'follows' should match the seq for the snap context for the given snap cap, which is the context under which we have been dirtying and writing data and metadata. The snapshot that _contains_ those updates thus _follows_ that context's seq #. Signed-off-by: Sage Weil --- fs/ceph/snap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 4868b9dcac5a..9e836afba341 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -467,7 +467,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) INIT_LIST_HEAD(&capsnap->ci_item); INIT_LIST_HEAD(&capsnap->flushing_item); - capsnap->follows = snapc->seq - 1; + capsnap->follows = snapc->seq; capsnap->issued = __ceph_caps_issued(ci, NULL); capsnap->dirty = dirty; -- cgit v1.2.2 From cfc0bf6640dfd0f43bf8bfec5a475284809baa4d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 14 Sep 2010 15:50:59 -0700 Subject: ceph: stop sending FLUSHSNAPs when we hit a dirty capsnap Stop sending FLUSHSNAP messages when we hit a capsnap that has dirty_pages or is still writing. We'll send the newer capsnaps only after the older ones complete. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index a2069b6680ae..9fbe9019155c 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1227,7 +1227,7 @@ retry: * pages to be written out. */ if (capsnap->dirty_pages || capsnap->writing) - continue; + break; /* * if cap writeback already occurred, we should have dropped @@ -1276,8 +1276,8 @@ retry: &session->s_cap_snaps_flushing); spin_unlock(&inode->i_lock); - dout("flush_snaps %p cap_snap %p follows %lld size %llu\n", - inode, capsnap, next_follows, capsnap->size); + dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", + inode, capsnap, capsnap->follows, capsnap->flush_tid); send_cap_msg(session, ceph_vino(inode).ino, 0, CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, -- cgit v1.2.2 From 460cf3411b858ad509d5255e0dfaf862a83c0299 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 14 Sep 2010 11:38:24 -0400 Subject: cifs: fix potential double put of TCP session reference cifs_get_smb_ses must be called on a server pointer on which it holds an active reference. It first does a search for an existing SMB session. If it finds one, it'll put the server reference and then try to ensure that the negprot is done, etc. If it encounters an error at that point then it'll return an error. There's a potential problem here though. When cifs_get_smb_ses returns an error, the caller will also put the TCP server reference leading to a double-put. Fix this by having cifs_get_smb_ses only put the server reference if it found an existing session that it could use and isn't returning an error. Cc: stable@kernel.org Reviewed-by: Suresh Jayaraman Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 67dad54fbfa1..88c84a38bccb 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1706,9 +1706,6 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) if (ses) { cFYI(1, "Existing smb sess found (status=%d)", ses->status); - /* existing SMB ses has a server reference already */ - cifs_put_tcp_session(server); - mutex_lock(&ses->session_mutex); rc = cifs_negotiate_protocol(xid, ses); if (rc) { @@ -1731,6 +1728,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) } } mutex_unlock(&ses->session_mutex); + + /* existing SMB ses has a server reference already */ + cifs_put_tcp_session(server); FreeXid(xid); return ses; } -- cgit v1.2.2 From 75e1c70fc31490ef8a373ea2a4bea2524099b478 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Fri, 10 Sep 2010 14:16:00 -0700 Subject: aio: check for multiplication overflow in do_io_submit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tavis Ormandy pointed out that do_io_submit does not do proper bounds checking on the passed-in iocb array:        if (unlikely(nr < 0))                return -EINVAL;        if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(iocbpp)))))                return -EFAULT;                      ^^^^^^^^^^^^^^^^^^ The attached patch checks for overflow, and if it is detected, the number of iocbs submitted is scaled down to a number that will fit in the long.  This is an ok thing to do, as sys_io_submit is documented as returning the number of iocbs submitted, so callers should handle a return value of less than the 'nr' argument passed in. Reported-by: Tavis Ormandy Signed-off-by: Jeff Moyer Signed-off-by: Linus Torvalds --- fs/aio.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index 3006b5bc33d6..1320b2a05fb2 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1659,6 +1659,9 @@ long do_io_submit(aio_context_t ctx_id, long nr, if (unlikely(nr < 0)) return -EINVAL; + if (unlikely(nr > LONG_MAX/sizeof(*iocbpp))) + nr = LONG_MAX/sizeof(*iocbpp); + if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp))))) return -EFAULT; -- cgit v1.2.2 From ae00d4f37f4df56821331deb1028748110dd6dc9 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 16 Sep 2010 16:26:51 -0700 Subject: ceph: fix cap_snap and realm split The cap_snap creation/queueing relies on both the current i_head_snapc _and_ the i_snap_realm pointers being correct, so that the new cap_snap can properly reference the old context and the new i_head_snapc can be updated to reference the new snaprealm's context. To fix this, we: - move inodes completely to the new (split) realm so that i_snap_realm is correct, and - generate the new snapc's _before_ queueing the cap_snaps in ceph_update_snap_trace(). Signed-off-by: Sage Weil --- fs/ceph/addr.c | 4 +-- fs/ceph/snap.c | 88 +++++++++++++++++++-------------------------------------- fs/ceph/super.h | 2 ++ 3 files changed, 33 insertions(+), 61 deletions(-) (limited to 'fs') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 50461b8c23a4..efbc604001c8 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -411,8 +411,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) if (i_size < page_off + len) len = i_size - page_off; - dout("writepage %p page %p index %lu on %llu~%u\n", - inode, page, page->index, page_off, len); + dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", + inode, page, page->index, page_off, len, snapc); writeback_stat = atomic_long_inc_return(&client->writeback_count); if (writeback_stat > diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 9e836afba341..9e6eef14b7df 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -119,6 +119,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm( INIT_LIST_HEAD(&realm->children); INIT_LIST_HEAD(&realm->child_item); INIT_LIST_HEAD(&realm->empty_item); + INIT_LIST_HEAD(&realm->dirty_item); INIT_LIST_HEAD(&realm->inodes_with_caps); spin_lock_init(&realm->inodes_with_caps_lock); __insert_snap_realm(&mdsc->snap_realms, realm); @@ -604,6 +605,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm; int invalidate = 0; int err = -ENOMEM; + LIST_HEAD(dirty_realms); dout("update_snap_trace deletion=%d\n", deletion); more: @@ -626,24 +628,6 @@ more: } } - if (le64_to_cpu(ri->seq) > realm->seq) { - dout("update_snap_trace updating %llx %p %lld -> %lld\n", - realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); - /* - * if the realm seq has changed, queue a cap_snap for every - * inode with open caps. we do this _before_ we update - * the realm info so that we prepare for writeback under the - * _previous_ snap context. - * - * ...unless it's a snap deletion! - */ - if (!deletion) - queue_realm_cap_snaps(realm); - } else { - dout("update_snap_trace %llx %p seq %lld unchanged\n", - realm->ino, realm, realm->seq); - } - /* ensure the parent is correct */ err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); if (err < 0) @@ -651,6 +635,8 @@ more: invalidate += err; if (le64_to_cpu(ri->seq) > realm->seq) { + dout("update_snap_trace updating %llx %p %lld -> %lld\n", + realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); /* update realm parameters, snap lists */ realm->seq = le64_to_cpu(ri->seq); realm->created = le64_to_cpu(ri->created); @@ -668,9 +654,17 @@ more: if (err < 0) goto fail; + /* queue realm for cap_snap creation */ + list_add(&realm->dirty_item, &dirty_realms); + invalidate = 1; } else if (!realm->cached_context) { + dout("update_snap_trace %llx %p seq %lld new\n", + realm->ino, realm, realm->seq); invalidate = 1; + } else { + dout("update_snap_trace %llx %p seq %lld unchanged\n", + realm->ino, realm, realm->seq); } dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, @@ -683,6 +677,14 @@ more: if (invalidate) rebuild_snap_realms(realm); + /* + * queue cap snaps _after_ we've built the new snap contexts, + * so that i_head_snapc can be set appropriately. + */ + list_for_each_entry(realm, &dirty_realms, dirty_item) { + queue_realm_cap_snaps(realm); + } + __cleanup_empty_realms(mdsc); return 0; @@ -816,6 +818,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, }; struct inode *inode = ceph_find_inode(sb, vino); struct ceph_inode_info *ci; + struct ceph_snap_realm *oldrealm; if (!inode) continue; @@ -841,18 +844,19 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, dout(" will move %p to split realm %llx %p\n", inode, realm->ino, realm); /* - * Remove the inode from the realm's inode - * list, but don't add it to the new realm - * yet. We don't want the cap_snap to be - * queued (again) by ceph_update_snap_trace() - * below. Queue it _now_, under the old context. + * Move the inode to the new realm */ spin_lock(&realm->inodes_with_caps_lock); list_del_init(&ci->i_snap_realm_item); + list_add(&ci->i_snap_realm_item, + &realm->inodes_with_caps); + oldrealm = ci->i_snap_realm; + ci->i_snap_realm = realm; spin_unlock(&realm->inodes_with_caps_lock); spin_unlock(&inode->i_lock); - ceph_queue_cap_snap(ci); + ceph_get_snap_realm(mdsc, realm); + ceph_put_snap_realm(mdsc, oldrealm); iput(inode); continue; @@ -880,43 +884,9 @@ skip_inode: ceph_update_snap_trace(mdsc, p, e, op == CEPH_SNAP_OP_DESTROY); - if (op == CEPH_SNAP_OP_SPLIT) { - /* - * ok, _now_ add the inodes into the new realm. - */ - for (i = 0; i < num_split_inos; i++) { - struct ceph_vino vino = { - .ino = le64_to_cpu(split_inos[i]), - .snap = CEPH_NOSNAP, - }; - struct inode *inode = ceph_find_inode(sb, vino); - struct ceph_inode_info *ci; - - if (!inode) - continue; - ci = ceph_inode(inode); - spin_lock(&inode->i_lock); - if (list_empty(&ci->i_snap_realm_item)) { - struct ceph_snap_realm *oldrealm = - ci->i_snap_realm; - - dout(" moving %p to split realm %llx %p\n", - inode, realm->ino, realm); - spin_lock(&realm->inodes_with_caps_lock); - list_add(&ci->i_snap_realm_item, - &realm->inodes_with_caps); - ci->i_snap_realm = realm; - spin_unlock(&realm->inodes_with_caps_lock); - ceph_get_snap_realm(mdsc, realm); - ceph_put_snap_realm(mdsc, oldrealm); - } - spin_unlock(&inode->i_lock); - iput(inode); - } - + if (op == CEPH_SNAP_OP_SPLIT) /* we took a reference when we created the realm, above */ ceph_put_snap_realm(mdsc, realm); - } __cleanup_empty_realms(mdsc); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index c33897ae5725..c80bfbe27b05 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -690,6 +690,8 @@ struct ceph_snap_realm { struct list_head empty_item; /* if i have ref==0 */ + struct list_head dirty_item; /* if realm needs new context */ + /* the current set of snaps for this realm */ struct ceph_snap_context *cached_context; -- cgit v1.2.2 From 5f4874903df3562b9d5649fc1cf7b8c6bb238e42 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 9 Sep 2010 14:45:00 +0100 Subject: GFS2: gfs2_logd should be using interruptible waits Looks like this crept in, in a recent update. Reported-by: Krzysztof Urbaniak Signed-off-by: Steven Whitehouse --- fs/gfs2/log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index cde1248a6225..ac750bd31a6f 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -932,7 +932,7 @@ int gfs2_logd(void *data) do { prepare_to_wait(&sdp->sd_logd_waitq, &wait, - TASK_UNINTERRUPTIBLE); + TASK_INTERRUPTIBLE); if (!gfs2_ail_flush_reqd(sdp) && !gfs2_jrnl_flush_reqd(sdp) && !kthread_should_stop()) -- cgit v1.2.2 From e835124c2be289515b918f2688ced4249e2de566 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 17 Sep 2010 08:03:08 -0700 Subject: ceph: only send one flushsnap per cap_snap per mds session Sending multiple flushsnap messages is problematic because we ignore the response if the tid doesn't match, and the server may only respond to each one once. It's also a waste. So, skip cap_snaps that are already on the flushing list, unless the caller tells us to resend (because we are reconnecting). Signed-off-by: Sage Weil --- fs/ceph/caps.c | 19 +++++++++++++++---- fs/ceph/snap.c | 2 +- fs/ceph/super.h | 3 ++- 3 files changed, 18 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 9fbe9019155c..b01c316a8148 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1195,10 +1195,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, * asynchronously back to the MDS once sync writes complete and dirty * data is written out. * + * Unless @again is true, skip cap_snaps that were already sent to + * the MDS (i.e., during this session). + * * Called under i_lock. Takes s_mutex as needed. */ void __ceph_flush_snaps(struct ceph_inode_info *ci, - struct ceph_mds_session **psession) + struct ceph_mds_session **psession, + int again) __releases(ci->vfs_inode->i_lock) __acquires(ci->vfs_inode->i_lock) { @@ -1240,6 +1244,13 @@ retry: dout("no auth cap (migrating?), doing nothing\n"); goto out; } + + /* only flush each capsnap once */ + if (!again && !list_empty(&capsnap->flushing_item)) { + dout("already flushed %p, skipping\n", capsnap); + continue; + } + mds = ci->i_auth_cap->session->s_mds; mseq = ci->i_auth_cap->mseq; @@ -1314,7 +1325,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) struct inode *inode = &ci->vfs_inode; spin_lock(&inode->i_lock); - __ceph_flush_snaps(ci, NULL); + __ceph_flush_snaps(ci, NULL, 0); spin_unlock(&inode->i_lock); } @@ -1477,7 +1488,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, /* flush snaps first time around only */ if (!list_empty(&ci->i_cap_snaps)) - __ceph_flush_snaps(ci, &session); + __ceph_flush_snaps(ci, &session, 0); goto retry_locked; retry: spin_lock(&inode->i_lock); @@ -1894,7 +1905,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc, if (cap && cap->session == session) { dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, cap, capsnap); - __ceph_flush_snaps(ci, &session); + __ceph_flush_snaps(ci, &session, 1); } else { pr_err("%p auth cap %p not mds%d ???\n", inode, cap, session->s_mds); diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 9e6eef14b7df..190b6c4a6f2b 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -717,7 +717,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc) igrab(inode); spin_unlock(&mdsc->snap_flush_lock); spin_lock(&inode->i_lock); - __ceph_flush_snaps(ci, &session); + __ceph_flush_snaps(ci, &session, 0); spin_unlock(&inode->i_lock); iput(inode); spin_lock(&mdsc->snap_flush_lock); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index c80bfbe27b05..b87638e84c4b 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -828,7 +828,8 @@ extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, struct ceph_snap_context *snapc); extern void __ceph_flush_snaps(struct ceph_inode_info *ci, - struct ceph_mds_session **psession); + struct ceph_mds_session **psession, + int again); extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_session *session); extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); -- cgit v1.2.2 From a43fb73101eaf6db0b33d22c152b338ab8b3edbb Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 17 Sep 2010 09:54:08 -0700 Subject: ceph: check mapping to determine if FILE_CACHE cap is used See if the i_data mapping has any pages to determine if the FILE_CACHE capability is currently in use, instead of assuming it is any time the rdcache_gen value is set (i.e., issued -> used). This allows the MDS RECALL_STATE process work for inodes that have cached pages. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index b01c316a8148..73c153092f72 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -814,7 +814,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci) used |= CEPH_CAP_PIN; if (ci->i_rd_ref) used |= CEPH_CAP_FILE_RD; - if (ci->i_rdcache_ref || ci->i_rdcache_gen) + if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages) used |= CEPH_CAP_FILE_CACHE; if (ci->i_wr_ref) used |= CEPH_CAP_FILE_WR; -- cgit v1.2.2 From be4f104dfd3b5e3ae262bff607965cfc38027dec Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 17 Sep 2010 12:30:31 -0700 Subject: ceph: select CRYPTO We select CRYPTO_AES, but not CRYPTO. Signed-off-by: Sage Weil --- fs/ceph/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index bc87b9c1d27e..0fcd2640c23f 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig @@ -3,6 +3,7 @@ config CEPH_FS depends on INET && EXPERIMENTAL select LIBCRC32C select CRYPTO_AES + select CRYPTO help Choose Y or M here to include support for mounting the experimental Ceph distributed file system. Ceph is an extremely -- cgit v1.2.2 From 50aff040363d31f87e94f38f1710973d99489951 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Sat, 21 Aug 2010 14:40:20 +0800 Subject: ocfs2/net: fix uninitialized ret in o2net_send_message_vec() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mmotm/fs/ocfs2/cluster/tcp.c: In function ‘o2net_send_message_vec’: mmotm/fs/ocfs2/cluster/tcp.c:980:6: warning: ‘ret’ may be used uninitialized in this function It seems a real bug introduced by commit 9af0b38ff3 (ocfs2/net: Use wait_event() in o2net_send_message_vec()). cc: Sunil Mushran Signed-off-by: Wu Fengguang Signed-off-by: Joel Becker --- fs/ocfs2/cluster/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 1361997cf205..cbe2f057cc28 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -977,7 +977,7 @@ static int o2net_tx_can_proceed(struct o2net_node *nn, int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, size_t caller_veclen, u8 target_node, int *status) { - int ret; + int ret = 0; struct o2net_msg *msg = NULL; size_t veclen, caller_bytes = 0; struct kvec *vec = NULL; -- cgit v1.2.2 From 112d421df2fddc0278584b084f4fcfedd144c5f4 Mon Sep 17 00:00:00 2001 From: Jan Harkes Date: Fri, 17 Sep 2010 23:26:01 -0400 Subject: Coda: mount hangs because of missed REQ_WRITE rename Coda's REQ_* defines were renamed to avoid clashes with the block layer (commit 4aeefdc69f7b: "coda: fixup clash with block layer REQ_* defines"). However one was missed and response messages are no longer matched with requests and waiting threads are no longer woken up. This patch fixes this. Signed-off-by: Jan Harkes [ Also fixed up whitespace while at it -Linus ] Signed-off-by: Linus Torvalds --- fs/coda/psdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index de89645777c7..116af7546cf0 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -184,8 +184,8 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf, } /* adjust outsize. is this useful ?? */ - req->uc_outSize = nbytes; - req->uc_flags |= REQ_WRITE; + req->uc_outSize = nbytes; + req->uc_flags |= CODA_REQ_WRITE; count = nbytes; /* Convert filedescriptor into a file handle */ -- cgit v1.2.2 From 371d217ee1ff8b418b8f73fb2a34990f951ec2d4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 21 Sep 2010 11:49:01 +0200 Subject: char: Mark /dev/zero and /dev/kmem as not capable of writeback These devices don't do any writeback but their device inodes still can get dirty so mark bdi appropriately so that bdi code does the right thing and files inodes to lists of bdi carrying the device inodes. Cc: stable@kernel.org Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- fs/char_dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/char_dev.c b/fs/char_dev.c index f80a4f25123c..143d393881cb 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -40,7 +40,9 @@ struct backing_dev_info directly_mappable_cdev_bdi = { #endif /* permit direct mmap, for read, write or exec */ BDI_CAP_MAP_DIRECT | - BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP), + BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP | + /* no writeback happens */ + BDI_CAP_NO_ACCT_AND_WRITEBACK), }; static struct kobj_map *cdev_map; -- cgit v1.2.2 From 692ebd17c2905313fff3c504c249c6a0faad16ec Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 21 Sep 2010 11:51:01 +0200 Subject: bdi: Fix warnings in __mark_inode_dirty for /dev/zero and friends Inodes of devices such as /dev/zero can get dirty for example via utime(2) syscall or due to atime update. Backing device of such inodes (zero_bdi, etc.) is however unable to handle dirty inodes and thus __mark_inode_dirty complains. In fact, inode should be rather dirtied against backing device of the filesystem holding it. This is generally a good rule except for filesystems such as 'bdev' or 'mtd_inodefs'. Inodes in these pseudofilesystems are referenced from ordinary filesystem inodes and carry mapping with real data of the device. Thus for these inodes we have to use inode->i_mapping->backing_dev_info as we did so far. We distinguish these filesystems by checking whether sb->s_bdi points to a non-trivial backing device or not. Example: Assume we have an ext3 filesystem on /dev/sda1 mounted on /. There's a device inode A described by a path "/dev/sdb" on this filesystem. This inode will be dirtied against backing device "8:0" after this patch. bdev filesystem contains block device inode B coupled with our inode A. When someone modifies a page of /dev/sdb, it's B that gets dirtied and the dirtying happens against the backing device "8:16". Thus both inodes get filed to a correct bdi list. Cc: stable@kernel.org Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 81e086d8aa57..5581122bd2c0 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -52,8 +52,6 @@ struct wb_writeback_work { #define CREATE_TRACE_POINTS #include -#define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) - /* * We don't actually have pdflush, but this one is exported though /proc... */ @@ -71,6 +69,27 @@ int writeback_in_progress(struct backing_dev_info *bdi) return test_bit(BDI_writeback_running, &bdi->state); } +static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; + + /* + * For inodes on standard filesystems, we use superblock's bdi. For + * inodes on virtual filesystems, we want to use inode mapping's bdi + * because they can possibly point to something useful (think about + * block_dev filesystem). + */ + if (sb->s_bdi && sb->s_bdi != &noop_backing_dev_info) { + /* Some device inodes could play dirty tricks. Catch them... */ + WARN(bdi != sb->s_bdi && bdi_cap_writeback_dirty(bdi), + "Dirtiable inode bdi %s != sb bdi %s\n", + bdi->name, sb->s_bdi->name); + return sb->s_bdi; + } + return bdi; +} + static void bdi_queue_work(struct backing_dev_info *bdi, struct wb_writeback_work *work) { -- cgit v1.2.2 From 767b68e96993e29e3480d7ecdd9c4b84667c5762 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Wed, 22 Sep 2010 14:32:56 -0400 Subject: Prevent freeing uninitialized pointer in compat_do_readv_writev In 32-bit compatibility mode, the error handling for compat_do_readv_writev() may free an uninitialized pointer, potentially leading to all sorts of ugly memory corruption. This is reliably triggerable by unprivileged users by invoking the readv()/writev() syscalls with an invalid iovec pointer. The below patch fixes this to emulate the non-compat version. Introduced by commit b83733639a49 ("compat: factor out compat_rw_copy_check_uvector from compat_do_readv_writev") Signed-off-by: Dan Rosenberg Cc: stable@kernel.org (2.6.35) Cc: Al Viro Signed-off-by: Linus Torvalds --- fs/compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/compat.c b/fs/compat.c index 718c7062aec1..0644a154672b 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1153,7 +1153,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, { compat_ssize_t tot_len; struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov; + struct iovec *iov = iovstack; ssize_t ret; io_fn_t fn; iov_fn_t fnv; -- cgit v1.2.2 From c227e69028473c7c7994a9b0a2cc0034f3f7e0fe Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 22 Sep 2010 13:04:54 -0700 Subject: /proc/vmcore: fix seeking Commit 73296bc611 ("procfs: Use generic_file_llseek in /proc/vmcore") broke seeking on /proc/vmcore. This changes it back to use default_llseek in order to restore the original behaviour. The problem with generic_file_llseek is that it only allows seeks up to inode->i_sb->s_maxbytes, which is zero on procfs and some other virtual file systems. We should merge generic_file_llseek and default_llseek some day and clean this up in a proper way, but for 2.6.35/36, reverting vmcore is the safer solution. Signed-off-by: Arnd Bergmann Cc: Frederic Weisbecker Reported-by: CAI Qian Tested-by: CAI Qian Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/vmcore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 91c817ff02c3..2367fb3f70bc 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -163,7 +163,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, static const struct file_operations proc_vmcore_operations = { .read = read_vmcore, - .llseek = generic_file_llseek, + .llseek = default_llseek, }; static struct vmcore* __init get_new_element(void) -- cgit v1.2.2 From a0c42bac79731276c9b2f28d54f9e658fcf843a2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 22 Sep 2010 13:05:03 -0700 Subject: aio: do not return ERESTARTSYS as a result of AIO OCFS2 can return ERESTARTSYS from its write function when the process is signalled while waiting for a cluster lock (and the filesystem is mounted with intr mount option). Generally, it seems reasonable to allow filesystems to return this error code from its IO functions. As we must not leak ERESTARTSYS (and similar error codes) to userspace as a result of an AIO operation, we have to properly convert it to EINTR inside AIO code (restarting the syscall isn't really an option because other AIO could have been already submitted by the same io_submit syscall). Signed-off-by: Jan Kara Reviewed-by: Jeff Moyer Cc: Christoph Hellwig Cc: Zach Brown Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/aio.c b/fs/aio.c index 1320b2a05fb2..250b0a73c8a8 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -712,8 +712,16 @@ static ssize_t aio_run_iocb(struct kiocb *iocb) */ ret = retry(iocb); - if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) + if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) { + /* + * There's no easy way to restart the syscall since other AIO's + * may be already running. Just fail this IO with EINTR. + */ + if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR || + ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK)) + ret = -EINTR; aio_complete(iocb, ret, 0); + } out: spin_lock_irq(&ctx->ctx_lock); -- cgit v1.2.2 From 1c2499ae87f828eabddf6483b0dfc11da1100c07 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 22 Sep 2010 13:05:06 -0700 Subject: /proc/pid/smaps: fix dirty pages accounting Currently, /proc//smaps has wrong dirty pages accounting. Shared_Dirty and Private_Dirty output only pte dirty pages and ignore PG_dirty page flag. It is difference against documentation, but also inconsistent against Referenced field. (Referenced checks both pte and page flags) This patch fixes it. Test program: large-array.c --------------------------------------------------- #include #include #include #include char array[1*1024*1024*1024L]; int main(void) { memset(array, 1, sizeof(array)); pause(); return 0; } --------------------------------------------------- Test case: 1. run ./large-array 2. cat /proc/`pidof large-array`/smaps 3. swapoff -a 4. cat /proc/`pidof large-array`/smaps again Test result: 00601000-40601000 rw-p 00000000 00:00 0 Size: 1048576 kB Rss: 1048576 kB Pss: 1048576 kB Shared_Clean: 0 kB Shared_Dirty: 0 kB Private_Clean: 218992 kB <-- showed pages as clean incorrectly Private_Dirty: 829584 kB Referenced: 388364 kB Swap: 0 kB KernelPageSize: 4 kB MMUPageSize: 4 kB 00601000-40601000 rw-p 00000000 00:00 0 Size: 1048576 kB Rss: 1048576 kB Pss: 1048576 kB Shared_Clean: 0 kB Shared_Dirty: 0 kB Private_Clean: 0 kB Private_Dirty: 1048576 kB <-- fixed Referenced: 388480 kB Swap: 0 kB KernelPageSize: 4 kB MMUPageSize: 4 kB Signed-off-by: KOSAKI Motohiro Acked-by: Hugh Dickins Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 271afc48b9a5..1dbca4e8cc16 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -363,13 +363,13 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, mss->referenced += PAGE_SIZE; mapcount = page_mapcount(page); if (mapcount >= 2) { - if (pte_dirty(ptent)) + if (pte_dirty(ptent) || PageDirty(page)) mss->shared_dirty += PAGE_SIZE; else mss->shared_clean += PAGE_SIZE; mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; } else { - if (pte_dirty(ptent)) + if (pte_dirty(ptent) || PageDirty(page)) mss->private_dirty += PAGE_SIZE; else mss->private_clean += PAGE_SIZE; -- cgit v1.2.2 From 12828061cdacfb1db3eb03fd71952d5ebc555bbb Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 13 Sep 2010 14:00:23 +0800 Subject: ocfs2: update ctime when changing the file's permission by setfacl In commit 30e2bab, ext3 fixed it. So change it accordingly in ocfs2. Steps to reproduce: # touch aaa # stat -c %Z aaa 1283760364 # setfacl -m 'u::x,g::x,o::x' aaa # stat -c %Z aaa 1283760364 Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/acl.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index a76e0aa5cd3f..391915093fe1 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -209,7 +209,10 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh, } inode->i_mode = new_mode; + inode->i_ctime = CURRENT_TIME; di->i_mode = cpu_to_le16(inode->i_mode); + di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); + di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); ocfs2_journal_dirty(handle, di_bh); -- cgit v1.2.2 From 47dea423799d98c53793237ab386a94976f305d5 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 13 Sep 2010 15:13:50 +0800 Subject: ocfs2: Use cpu_to_le16 for e_leaf_clusters in ocfs2_bg_discontig_add_extent. e_leaf_clusters is a le16, so use cpu_to_le16 instead of cpu_to_le32. What's more, we change 'clusters' to unsigned int to signify that the size of 'clusters' isn't important here. Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/suballoc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 8a286f54dca1..849c2f0e0a0e 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -357,7 +357,7 @@ out: static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, struct ocfs2_group_desc *bg, struct ocfs2_chain_list *cl, - u64 p_blkno, u32 clusters) + u64 p_blkno, unsigned int clusters) { struct ocfs2_extent_list *el = &bg->bg_list; struct ocfs2_extent_rec *rec; @@ -369,7 +369,7 @@ static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, rec->e_blkno = cpu_to_le64(p_blkno); rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) / le16_to_cpu(cl->cl_bpc)); - rec->e_leaf_clusters = cpu_to_le32(clusters); + rec->e_leaf_clusters = cpu_to_le16(clusters); le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc)); le16_add_cpu(&bg->bg_free_bits_count, clusters * le16_to_cpu(cl->cl_bpc)); -- cgit v1.2.2 From 4a452de4fdfe4dbb27e491904d8bfaf1262bdff4 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Sun, 19 Sep 2010 13:42:28 +0800 Subject: ocfs2: Move 'wanted' into parens of ocfs2_resmap_resv_bits. The first time I read the function ocfs2_resmap_resv_bits, I consider about what 'wanted' will be used and consider about the comments. Then I find it is only used if the reservation is empty. ;) So we'd better move it to the parens so that it make the code more readable, what's more, ocfs2_resmap_resv_bits is used so frequently and we should save some cpus. Acked-by: Mark Fasheh Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/reservations.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c index d8b6e4259b80..3e78db361bc7 100644 --- a/fs/ocfs2/reservations.c +++ b/fs/ocfs2/reservations.c @@ -732,25 +732,23 @@ int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap, struct ocfs2_alloc_reservation *resv, int *cstart, int *clen) { - unsigned int wanted = *clen; - if (resv == NULL || ocfs2_resmap_disabled(resmap)) return -ENOSPC; spin_lock(&resv_lock); - /* - * We don't want to over-allocate for temporary - * windows. Otherwise, we run the risk of fragmenting the - * allocation space. - */ - wanted = ocfs2_resv_window_bits(resmap, resv); - if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen) - wanted = *clen; - if (ocfs2_resv_empty(resv)) { - mlog(0, "empty reservation, find new window\n"); + /* + * We don't want to over-allocate for temporary + * windows. Otherwise, we run the risk of fragmenting the + * allocation space. + */ + unsigned int wanted = ocfs2_resv_window_bits(resmap, resv); + if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen) + wanted = *clen; + + mlog(0, "empty reservation, find new window\n"); /* * Try to get a window here. If it works, we must fall * through and test the bitmap . This avoids some -- cgit v1.2.2 From 0000b862027d624ac564609b87c1aa4d14dd1e46 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Sun, 19 Sep 2010 13:42:29 +0800 Subject: ocfs2: Sync inode flags with ext2. We sync our inode flags with ext2 and define them by hex values. But actually in commit 3669567(4 years ago), all these values are moved to include/linux/fs.h. So we'd better also use them as what ext2 did. So sync our inode flags with ext2 by using FS_*. Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/ocfs2_fs.h | 37 +++++++++++++++++++++++++------------ fs/ocfs2/ocfs2_ioctl.h | 8 ++++---- 2 files changed, 29 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 33f1c9a8258d..fa31d05e41b7 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -235,18 +235,31 @@ #define OCFS2_HAS_REFCOUNT_FL (0x0010) /* Inode attributes, keep in sync with EXT2 */ -#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */ -#define OCFS2_UNRM_FL (0x00000002) /* Undelete */ -#define OCFS2_COMPR_FL (0x00000004) /* Compress file */ -#define OCFS2_SYNC_FL (0x00000008) /* Synchronous updates */ -#define OCFS2_IMMUTABLE_FL (0x00000010) /* Immutable file */ -#define OCFS2_APPEND_FL (0x00000020) /* writes to file may only append */ -#define OCFS2_NODUMP_FL (0x00000040) /* do not dump file */ -#define OCFS2_NOATIME_FL (0x00000080) /* do not update atime */ -#define OCFS2_DIRSYNC_FL (0x00010000) /* dirsync behaviour (directories only) */ - -#define OCFS2_FL_VISIBLE (0x000100FF) /* User visible flags */ -#define OCFS2_FL_MODIFIABLE (0x000100FF) /* User modifiable flags */ +#define OCFS2_SECRM_FL FS_SECRM_FL /* Secure deletion */ +#define OCFS2_UNRM_FL FS_UNRM_FL /* Undelete */ +#define OCFS2_COMPR_FL FS_COMPR_FL /* Compress file */ +#define OCFS2_SYNC_FL FS_SYNC_FL /* Synchronous updates */ +#define OCFS2_IMMUTABLE_FL FS_IMMUTABLE_FL /* Immutable file */ +#define OCFS2_APPEND_FL FS_APPEND_FL /* writes to file may only append */ +#define OCFS2_NODUMP_FL FS_NODUMP_FL /* do not dump file */ +#define OCFS2_NOATIME_FL FS_NOATIME_FL /* do not update atime */ +/* Reserved for compression usage... */ +#define OCFS2_DIRTY_FL FS_DIRTY_FL +#define OCFS2_COMPRBLK_FL FS_COMPRBLK_FL /* One or more compressed clusters */ +#define OCFS2_NOCOMP_FL FS_NOCOMP_FL /* Don't compress */ +#define OCFS2_ECOMPR_FL FS_ECOMPR_FL /* Compression error */ +/* End compression flags --- maybe not all used */ +#define OCFS2_BTREE_FL FS_BTREE_FL /* btree format dir */ +#define OCFS2_INDEX_FL FS_INDEX_FL /* hash-indexed directory */ +#define OCFS2_IMAGIC_FL FS_IMAGIC_FL /* AFS directory */ +#define OCFS2_JOURNAL_DATA_FL FS_JOURNAL_DATA_FL /* Reserved for ext3 */ +#define OCFS2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */ +#define OCFS2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */ +#define OCFS2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/ +#define OCFS2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */ + +#define OCFS2_FL_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */ +#define OCFS2_FL_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */ /* * Extent record flags (e_node.leaf.flags) diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h index 2d3420af1a83..5d241505690b 100644 --- a/fs/ocfs2/ocfs2_ioctl.h +++ b/fs/ocfs2/ocfs2_ioctl.h @@ -23,10 +23,10 @@ /* * ioctl commands */ -#define OCFS2_IOC_GETFLAGS _IOR('f', 1, long) -#define OCFS2_IOC_SETFLAGS _IOW('f', 2, long) -#define OCFS2_IOC32_GETFLAGS _IOR('f', 1, int) -#define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int) +#define OCFS2_IOC_GETFLAGS FS_IOC_GETFLAGS +#define OCFS2_IOC_SETFLAGS FS_IOC_SETFLAGS +#define OCFS2_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define OCFS2_IOC32_SETFLAGS FS_IOC32_SETFLAGS /* * Space reservation / allocation / free ioctls and argument structure -- cgit v1.2.2 From 5dad6c39d156fbbde0b0ef170d9173feffdeb546 Mon Sep 17 00:00:00 2001 From: Srinivas Eeda Date: Tue, 21 Sep 2010 16:27:26 -0700 Subject: o2dlm: force free mles during dlm exit While umounting, a block mle doesn't get freed if dlm is shutdown after master request is received but before assert master. This results in unclean shutdown of dlm domain. This patch frees all mles that lie around after other nodes were notified about exiting the dlm and marking dlm state as leaving. Only block mles are expected to be around, so we log ERROR for other mles but still free them. Signed-off-by: Srinivas Eeda Signed-off-by: Joel Becker --- fs/ocfs2/dlm/dlmcommon.h | 1 + fs/ocfs2/dlm/dlmdomain.c | 1 + fs/ocfs2/dlm/dlmmaster.c | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+) (limited to 'fs') diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 4b6ae2c13b47..765298908f1d 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -1030,6 +1030,7 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node); +void dlm_force_free_mles(struct dlm_ctxt *dlm); int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); int __dlm_lockres_has_locks(struct dlm_lock_resource *res); int __dlm_lockres_unused(struct dlm_lock_resource *res); diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 153abb5abef0..11a5c87fd7f7 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -693,6 +693,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) dlm_mark_domain_leaving(dlm); dlm_leave_domain(dlm); + dlm_force_free_mles(dlm); dlm_complete_dlm_shutdown(dlm); } dlm_put(dlm); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index ffb4c68dafa4..f564b0e5f80d 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -3433,3 +3433,43 @@ void dlm_lockres_release_ast(struct dlm_ctxt *dlm, wake_up(&res->wq); wake_up(&dlm->migration_wq); } + +void dlm_force_free_mles(struct dlm_ctxt *dlm) +{ + int i; + struct hlist_head *bucket; + struct dlm_master_list_entry *mle; + struct hlist_node *tmp, *list; + + /* + * We notified all other nodes that we are exiting the domain and + * marked the dlm state to DLM_CTXT_LEAVING. If any mles are still + * around we force free them and wake any processes that are waiting + * on the mles + */ + spin_lock(&dlm->spinlock); + spin_lock(&dlm->master_lock); + + BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING); + BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES)); + + for (i = 0; i < DLM_HASH_BUCKETS; i++) { + bucket = dlm_master_hash(dlm, i); + hlist_for_each_safe(list, tmp, bucket) { + mle = hlist_entry(list, struct dlm_master_list_entry, + master_hash_node); + if (mle->type != DLM_MLE_BLOCK) { + mlog(ML_ERROR, "bad mle: %p\n", mle); + dlm_print_one_mle(mle); + } + atomic_set(&mle->woken, 1); + wake_up(&mle->wq); + + __dlm_unlink_mle(dlm, mle); + __dlm_mle_detach_hb_events(dlm, mle); + __dlm_put_mle(mle); + } + } + spin_unlock(&dlm->master_lock); + spin_unlock(&dlm->spinlock); +} -- cgit v1.2.2 From e4eab08d6050ad04d960738f589724204fd1064c Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 20 Aug 2010 21:14:46 +0100 Subject: ARM: 6342/1: fix ASLR of PIE executables Since commits 990cb8acf2 and cc92c28b2d, it is possible to have full address space layout randomization (ASLR) on ARM. Except that one small change was missing for ASLR of PIE executables. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- fs/binfmt_elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 535e763ab1a6..6884e198e0c7 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -800,7 +800,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) * default mmap base, as well as whatever program they * might try to exec. This is because the brk will * follow the loader, and is not movable. */ -#ifdef CONFIG_X86 +#if defined(CONFIG_X86) || defined(CONFIG_ARM) load_bias = 0; #else load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); -- cgit v1.2.2