aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/autofs4/root.c5
-rw-r--r--fs/btrfs/ioctl.c5
-rw-r--r--fs/cachefiles/internal.h1
-rw-r--r--fs/cachefiles/namei.c98
-rw-r--r--fs/cachefiles/security.c4
-rw-r--r--fs/ceph/addr.c8
-rw-r--r--fs/ceph/auth.c1
-rw-r--r--fs/ceph/auth_none.h2
-rw-r--r--fs/ceph/auth_x.c32
-rw-r--r--fs/ceph/caps.c21
-rw-r--r--fs/ceph/dir.c9
-rw-r--r--fs/ceph/file.c3
-rw-r--r--fs/ceph/inode.c8
-rw-r--r--fs/ceph/mds_client.c34
-rw-r--r--fs/ceph/messenger.c39
-rw-r--r--fs/ceph/messenger.h1
-rw-r--r--fs/ceph/osd_client.c26
-rw-r--r--fs/ceph/osd_client.h3
-rw-r--r--fs/ceph/osdmap.c29
-rw-r--r--fs/ceph/osdmap.h2
-rw-r--r--fs/ceph/rados.h1
-rw-r--r--fs/ceph/snap.c24
-rw-r--r--fs/ceph/super.c30
-rw-r--r--fs/ceph/super.h1
-rw-r--r--fs/cifs/asn1.c103
-rw-r--r--fs/cifs/cifs_debug.c48
-rw-r--r--fs/cifs/cifs_debug.h42
-rw-r--r--fs/cifs/cifs_dfs_ref.c34
-rw-r--r--fs/cifs/cifs_spnego.c6
-rw-r--r--fs/cifs/cifs_unicode.c5
-rw-r--r--fs/cifs/cifsacl.c76
-rw-r--r--fs/cifs/cifsencrypt.c10
-rw-r--r--fs/cifs/cifsfs.c163
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h10
-rw-r--r--fs/cifs/cifsproto.h30
-rw-r--r--fs/cifs/cifssmb.c437
-rw-r--r--fs/cifs/connect.c639
-rw-r--r--fs/cifs/dir.c91
-rw-r--r--fs/cifs/dns_resolve.c16
-rw-r--r--fs/cifs/export.c2
-rw-r--r--fs/cifs/file.c222
-rw-r--r--fs/cifs/inode.c144
-rw-r--r--fs/cifs/ioctl.c10
-rw-r--r--fs/cifs/link.c10
-rw-r--r--fs/cifs/misc.c81
-rw-r--r--fs/cifs/netmisc.c16
-rw-r--r--fs/cifs/readdir.c85
-rw-r--r--fs/cifs/sess.c81
-rw-r--r--fs/cifs/transport.c92
-rw-r--r--fs/cifs/xattr.c40
-rw-r--r--fs/compat.c2
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/configfs/dir.c4
-rw-r--r--fs/dlm/lock.c5
-rw-r--r--fs/dlm/user.c88
-rw-r--r--fs/eventpoll.c3
-rw-r--r--fs/exec.c2
-rw-r--r--fs/exofs/exofs.h2
-rw-r--r--fs/ext2/balloc.c6
-rw-r--r--fs/ext2/ialloc.c9
-rw-r--r--fs/ext2/inode.c7
-rw-r--r--fs/ext2/super.c99
-rw-r--r--fs/ext2/xattr.c2
-rw-r--r--fs/ext3/balloc.c6
-rw-r--r--fs/ext3/fsync.c20
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/ext3/super.c77
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/fcntl.c66
-rw-r--r--fs/gfs2/aops.c8
-rw-r--r--fs/gfs2/bmap.c17
-rw-r--r--fs/gfs2/dir.c2
-rw-r--r--fs/gfs2/export.c2
-rw-r--r--fs/gfs2/glock.c3
-rw-r--r--fs/gfs2/incore.h11
-rw-r--r--fs/gfs2/inode.c101
-rw-r--r--fs/gfs2/inode.h5
-rw-r--r--fs/gfs2/log.c158
-rw-r--r--fs/gfs2/log.h1
-rw-r--r--fs/gfs2/lops.c2
-rw-r--r--fs/gfs2/main.c2
-rw-r--r--fs/gfs2/meta_io.c5
-rw-r--r--fs/gfs2/ops_fstype.c19
-rw-r--r--fs/gfs2/quota.c114
-rw-r--r--fs/gfs2/rgrp.c68
-rw-r--r--fs/gfs2/super.c11
-rw-r--r--fs/gfs2/sys.c6
-rw-r--r--fs/gfs2/trans.c18
-rw-r--r--fs/inode.c2
-rw-r--r--fs/jbd/commit.c8
-rw-r--r--fs/jbd/journal.c33
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/jffs2/background.c3
-rw-r--r--fs/jffs2/erase.c12
-rw-r--r--fs/jffs2/fs.c10
-rw-r--r--fs/jffs2/gc.c17
-rw-r--r--fs/jffs2/nodelist.h10
-rw-r--r--fs/jffs2/nodemgmt.c28
-rw-r--r--fs/jffs2/os-linux.h3
-rw-r--r--fs/jffs2/scan.c4
-rw-r--r--fs/jffs2/super.c2
-rw-r--r--fs/jffs2/wbuf.c8
-rw-r--r--fs/jfs/file.c2
-rw-r--r--fs/jfs/jfs_dmap.c2
-rw-r--r--fs/jfs/super.c13
-rw-r--r--fs/libfs.c35
-rw-r--r--fs/logfs/dev_bdev.c6
-rw-r--r--fs/logfs/dev_mtd.c26
-rw-r--r--fs/logfs/dir.c2
-rw-r--r--fs/logfs/file.c16
-rw-r--r--fs/logfs/gc.c49
-rw-r--r--fs/logfs/inode.c6
-rw-r--r--fs/logfs/journal.c7
-rw-r--r--fs/logfs/logfs.h15
-rw-r--r--fs/logfs/logfs_abi.h10
-rw-r--r--fs/logfs/readwrite.c19
-rw-r--r--fs/logfs/segment.c7
-rw-r--r--fs/logfs/super.c22
-rw-r--r--fs/namei.c27
-rw-r--r--fs/namespace.c19
-rw-r--r--fs/nfs/client.c57
-rw-r--r--fs/nfs/delegation.c88
-rw-r--r--fs/nfs/dir.c145
-rw-r--r--fs/nfs/file.c15
-rw-r--r--fs/nfs/fscache.c3
-rw-r--r--fs/nfs/getroot.c191
-rw-r--r--fs/nfs/inode.c58
-rw-r--r--fs/nfs/internal.h4
-rw-r--r--fs/nfs/iostat.h6
-rw-r--r--fs/nfs/namespace.c20
-rw-r--r--fs/nfs/nfs3acl.c23
-rw-r--r--fs/nfs/nfs3proc.c128
-rw-r--r--fs/nfs/nfs3xdr.c2
-rw-r--r--fs/nfs/nfs4_fs.h8
-rw-r--r--fs/nfs/nfs4namespace.c12
-rw-r--r--fs/nfs/nfs4proc.c177
-rw-r--r--fs/nfs/nfs4state.c36
-rw-r--r--fs/nfs/nfs4xdr.c22
-rw-r--r--fs/nfs/nfsroot.c14
-rw-r--r--fs/nfs/pagelist.c14
-rw-r--r--fs/nfs/proc.c144
-rw-r--r--fs/nfs/read.c4
-rw-r--r--fs/nfs/super.c150
-rw-r--r--fs/nfs/unlink.c4
-rw-r--r--fs/nfs/write.c55
-rw-r--r--fs/nfsd/export.c44
-rw-r--r--fs/nfsd/nfs4callback.c140
-rw-r--r--fs/nfsd/nfs4proc.c50
-rw-r--r--fs/nfsd/nfs4state.c376
-rw-r--r--fs/nfsd/nfs4xdr.c27
-rw-r--r--fs/nfsd/nfsctl.c64
-rw-r--r--fs/nfsd/nfsd.h6
-rw-r--r--fs/nfsd/nfssvc.c2
-rw-r--r--fs/nfsd/state.h47
-rw-r--r--fs/nfsd/vfs.c8
-rw-r--r--fs/nfsd/vfs.h1
-rw-r--r--fs/nfsd/xdr4.h11
-rw-r--r--fs/nilfs2/alloc.c154
-rw-r--r--fs/nilfs2/alloc.h7
-rw-r--r--fs/nilfs2/btree.c91
-rw-r--r--fs/nilfs2/btree.h23
-rw-r--r--fs/nilfs2/inode.c4
-rw-r--r--fs/nilfs2/recovery.c2
-rw-r--r--fs/nilfs2/segbuf.c70
-rw-r--r--fs/nilfs2/segbuf.h10
-rw-r--r--fs/nilfs2/segment.c157
-rw-r--r--fs/nilfs2/segment.h6
-rw-r--r--fs/nilfs2/super.c219
-rw-r--r--fs/nilfs2/the_nilfs.c10
-rw-r--r--fs/notify/inotify/Kconfig1
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/inotify/inotify_user.c16
-rw-r--r--fs/ocfs2/Makefile1
-rw-r--r--fs/ocfs2/alloc.c908
-rw-r--r--fs/ocfs2/alloc.h12
-rw-r--r--fs/ocfs2/aops.c3
-rw-r--r--fs/ocfs2/buffer_head_io.c2
-rw-r--r--fs/ocfs2/cluster/masklog.c1
-rw-r--r--fs/ocfs2/cluster/masklog.h1
-rw-r--r--fs/ocfs2/cluster/tcp.c3
-rw-r--r--fs/ocfs2/dir.c75
-rw-r--r--fs/ocfs2/dlm/dlmast.c13
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h4
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c4
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c28
-rw-r--r--fs/ocfs2/dlm/dlmlock.c6
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c30
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c27
-rw-r--r--fs/ocfs2/dlm/dlmthread.c16
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c3
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c14
-rw-r--r--fs/ocfs2/dlmglue.c3
-rw-r--r--fs/ocfs2/file.c268
-rw-r--r--fs/ocfs2/inode.c113
-rw-r--r--fs/ocfs2/inode.h4
-rw-r--r--fs/ocfs2/journal.c26
-rw-r--r--fs/ocfs2/journal.h15
-rw-r--r--fs/ocfs2/localalloc.c275
-rw-r--r--fs/ocfs2/localalloc.h3
-rw-r--r--fs/ocfs2/mmap.c48
-rw-r--r--fs/ocfs2/namei.c149
-rw-r--r--fs/ocfs2/ocfs2.h22
-rw-r--r--fs/ocfs2/ocfs2_fs.h144
-rw-r--r--fs/ocfs2/quota.h12
-rw-r--r--fs/ocfs2/quota_global.c351
-rw-r--r--fs/ocfs2/quota_local.c183
-rw-r--r--fs/ocfs2/refcounttree.c77
-rw-r--r--fs/ocfs2/refcounttree.h4
-rw-r--r--fs/ocfs2/reservations.c847
-rw-r--r--fs/ocfs2/reservations.h159
-rw-r--r--fs/ocfs2/resize.c19
-rw-r--r--fs/ocfs2/suballoc.c688
-rw-r--r--fs/ocfs2/suballoc.h21
-rw-r--r--fs/ocfs2/super.c92
-rw-r--r--fs/ocfs2/super.h7
-rw-r--r--fs/ocfs2/xattr.c103
-rw-r--r--fs/omfs/inode.c1
-rw-r--r--fs/proc/array.c3
-rw-r--r--fs/proc/base.c10
-rw-r--r--fs/proc/inode.c4
-rw-r--r--fs/proc/kcore.c1
-rw-r--r--fs/proc/kmsg.c1
-rw-r--r--fs/proc/task_mmu.c19
-rw-r--r--fs/proc/vmcore.c1
-rw-r--r--fs/quota/dquot.c275
-rw-r--r--fs/quota/quota.c63
-rw-r--r--fs/quota/quota_tree.c50
-rw-r--r--fs/quota/quota_tree.h6
-rw-r--r--fs/quota/quota_v1.c4
-rw-r--r--fs/quota/quota_v2.c6
-rw-r--r--fs/ramfs/inode.c2
-rw-r--r--fs/reiserfs/inode.c3
-rw-r--r--fs/sysfs/bin.c26
-rw-r--r--fs/sysfs/dir.c114
-rw-r--r--fs/sysfs/file.c17
-rw-r--r--fs/sysfs/group.c6
-rw-r--r--fs/sysfs/inode.c6
-rw-r--r--fs/sysfs/mount.c95
-rw-r--r--fs/sysfs/symlink.c36
-rw-r--r--fs/sysfs/sysfs.h34
-rw-r--r--fs/sysv/dir.c2
-rw-r--r--fs/timerfd.c25
-rw-r--r--fs/ubifs/io.c1
-rw-r--r--fs/udf/dir.c2
-rw-r--r--fs/udf/file.c45
-rw-r--r--fs/udf/udfdecl.h3
-rw-r--r--fs/ufs/inode.c2
-rw-r--r--fs/ufs/namei.c2
-rw-r--r--fs/ufs/symlink.c8
-rw-r--r--fs/ufs/truncate.c10
-rw-r--r--fs/ufs/ufs.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c231
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c27
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c203
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h150
-rw-r--r--fs/xfs/quota/xfs_dquot.c193
-rw-r--r--fs/xfs/quota/xfs_dquot.h35
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c30
-rw-r--r--fs/xfs/quota/xfs_qm.c609
-rw-r--r--fs/xfs/quota/xfs_qm.h23
-rw-r--r--fs/xfs/quota/xfs_qm_stats.c2
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c165
-rw-r--r--fs/xfs/quota/xfs_quota_priv.h102
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c29
-rw-r--r--fs/xfs/xfs_ag.h1
-rw-r--r--fs/xfs/xfs_bmap.c2
-rw-r--r--fs/xfs/xfs_buf_item.c55
-rw-r--r--fs/xfs/xfs_buf_item.h2
-rw-r--r--fs/xfs/xfs_error.c30
-rw-r--r--fs/xfs/xfs_error.h9
-rw-r--r--fs/xfs/xfs_extfree_item.c18
-rw-r--r--fs/xfs/xfs_inode.c2
-rw-r--r--fs/xfs/xfs_inode_item.c21
-rw-r--r--fs/xfs/xfs_iomap.c123
-rw-r--r--fs/xfs/xfs_iomap.h47
-rw-r--r--fs/xfs/xfs_log.c702
-rw-r--r--fs/xfs/xfs_log.h13
-rw-r--r--fs/xfs/xfs_log_priv.h12
-rw-r--r--fs/xfs/xfs_log_recover.c311
-rw-r--r--fs/xfs/xfs_mount.c7
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_quota.h3
-rw-r--r--fs/xfs/xfs_trans.c760
-rw-r--r--fs/xfs/xfs_trans.h14
-rw-r--r--fs/xfs/xfs_trans_buf.c187
295 files changed, 9784 insertions, 7248 deletions
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 109a6c606d92..e8e5e63ac950 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -177,8 +177,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
177 } 177 }
178 /* Trigger mount for path component or follow link */ 178 /* Trigger mount for path component or follow link */
179 } else if (ino->flags & AUTOFS_INF_PENDING || 179 } else if (ino->flags & AUTOFS_INF_PENDING ||
180 autofs4_need_mount(flags) || 180 autofs4_need_mount(flags)) {
181 current->link_count) {
182 DPRINTK("waiting for mount name=%.*s", 181 DPRINTK("waiting for mount name=%.*s",
183 dentry->d_name.len, dentry->d_name.name); 182 dentry->d_name.len, dentry->d_name.name);
184 183
@@ -262,7 +261,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
262 spin_unlock(&dcache_lock); 261 spin_unlock(&dcache_lock);
263 spin_unlock(&sbi->fs_lock); 262 spin_unlock(&sbi->fs_lock);
264 263
265 status = try_to_fill_dentry(dentry, 0); 264 status = try_to_fill_dentry(dentry, nd->flags);
266 if (status) 265 if (status)
267 goto out_error; 266 goto out_error;
268 267
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index e84ef60ffe35..97a97839a867 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1481,12 +1481,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1481 ret = -EBADF; 1481 ret = -EBADF;
1482 goto out_drop_write; 1482 goto out_drop_write;
1483 } 1483 }
1484
1484 src = src_file->f_dentry->d_inode; 1485 src = src_file->f_dentry->d_inode;
1485 1486
1486 ret = -EINVAL; 1487 ret = -EINVAL;
1487 if (src == inode) 1488 if (src == inode)
1488 goto out_fput; 1489 goto out_fput;
1489 1490
1491 /* the src must be open for reading */
1492 if (!(src_file->f_mode & FMODE_READ))
1493 goto out_fput;
1494
1490 ret = -EISDIR; 1495 ret = -EISDIR;
1491 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) 1496 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
1492 goto out_fput; 1497 goto out_fput;
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index f7c255f9c624..a8cd821226da 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -34,6 +34,7 @@ struct cachefiles_object {
34 loff_t i_size; /* object size */ 34 loff_t i_size; /* object size */
35 unsigned long flags; 35 unsigned long flags;
36#define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */ 36#define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */
37#define CACHEFILES_OBJECT_BURIED 1 /* T if preemptively buried */
37 atomic_t usage; /* object usage count */ 38 atomic_t usage; /* object usage count */
38 uint8_t type; /* object type */ 39 uint8_t type; /* object type */
39 uint8_t new; /* T if object new */ 40 uint8_t new; /* T if object new */
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index d5db84a1ee0d..f4a7840bf42c 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -93,6 +93,59 @@ static noinline void cachefiles_printk_object(struct cachefiles_object *object,
93} 93}
94 94
95/* 95/*
96 * mark the owner of a dentry, if there is one, to indicate that that dentry
97 * has been preemptively deleted
98 * - the caller must hold the i_mutex on the dentry's parent as required to
99 * call vfs_unlink(), vfs_rmdir() or vfs_rename()
100 */
101static void cachefiles_mark_object_buried(struct cachefiles_cache *cache,
102 struct dentry *dentry)
103{
104 struct cachefiles_object *object;
105 struct rb_node *p;
106
107 _enter(",'%*.*s'",
108 dentry->d_name.len, dentry->d_name.len, dentry->d_name.name);
109
110 write_lock(&cache->active_lock);
111
112 p = cache->active_nodes.rb_node;
113 while (p) {
114 object = rb_entry(p, struct cachefiles_object, active_node);
115 if (object->dentry > dentry)
116 p = p->rb_left;
117 else if (object->dentry < dentry)
118 p = p->rb_right;
119 else
120 goto found_dentry;
121 }
122
123 write_unlock(&cache->active_lock);
124 _leave(" [no owner]");
125 return;
126
127 /* found the dentry for */
128found_dentry:
129 kdebug("preemptive burial: OBJ%x [%s] %p",
130 object->fscache.debug_id,
131 fscache_object_states[object->fscache.state],
132 dentry);
133
134 if (object->fscache.state < FSCACHE_OBJECT_DYING) {
135 printk(KERN_ERR "\n");
136 printk(KERN_ERR "CacheFiles: Error:"
137 " Can't preemptively bury live object\n");
138 cachefiles_printk_object(object, NULL);
139 } else if (test_and_set_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
140 printk(KERN_ERR "CacheFiles: Error:"
141 " Object already preemptively buried\n");
142 }
143
144 write_unlock(&cache->active_lock);
145 _leave(" [owner marked]");
146}
147
148/*
96 * record the fact that an object is now active 149 * record the fact that an object is now active
97 */ 150 */
98static int cachefiles_mark_object_active(struct cachefiles_cache *cache, 151static int cachefiles_mark_object_active(struct cachefiles_cache *cache,
@@ -219,7 +272,8 @@ requeue:
219 */ 272 */
220static int cachefiles_bury_object(struct cachefiles_cache *cache, 273static int cachefiles_bury_object(struct cachefiles_cache *cache,
221 struct dentry *dir, 274 struct dentry *dir,
222 struct dentry *rep) 275 struct dentry *rep,
276 bool preemptive)
223{ 277{
224 struct dentry *grave, *trap; 278 struct dentry *grave, *trap;
225 char nbuffer[8 + 8 + 1]; 279 char nbuffer[8 + 8 + 1];
@@ -229,11 +283,16 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
229 dir->d_name.len, dir->d_name.len, dir->d_name.name, 283 dir->d_name.len, dir->d_name.len, dir->d_name.name,
230 rep->d_name.len, rep->d_name.len, rep->d_name.name); 284 rep->d_name.len, rep->d_name.len, rep->d_name.name);
231 285
286 _debug("remove %p from %p", rep, dir);
287
232 /* non-directories can just be unlinked */ 288 /* non-directories can just be unlinked */
233 if (!S_ISDIR(rep->d_inode->i_mode)) { 289 if (!S_ISDIR(rep->d_inode->i_mode)) {
234 _debug("unlink stale object"); 290 _debug("unlink stale object");
235 ret = vfs_unlink(dir->d_inode, rep); 291 ret = vfs_unlink(dir->d_inode, rep);
236 292
293 if (preemptive)
294 cachefiles_mark_object_buried(cache, rep);
295
237 mutex_unlock(&dir->d_inode->i_mutex); 296 mutex_unlock(&dir->d_inode->i_mutex);
238 297
239 if (ret == -EIO) 298 if (ret == -EIO)
@@ -325,6 +384,9 @@ try_again:
325 if (ret != 0 && ret != -ENOMEM) 384 if (ret != 0 && ret != -ENOMEM)
326 cachefiles_io_error(cache, "Rename failed with error %d", ret); 385 cachefiles_io_error(cache, "Rename failed with error %d", ret);
327 386
387 if (preemptive)
388 cachefiles_mark_object_buried(cache, rep);
389
328 unlock_rename(cache->graveyard, dir); 390 unlock_rename(cache->graveyard, dir);
329 dput(grave); 391 dput(grave);
330 _leave(" = 0"); 392 _leave(" = 0");
@@ -340,7 +402,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
340 struct dentry *dir; 402 struct dentry *dir;
341 int ret; 403 int ret;
342 404
343 _enter(",{%p}", object->dentry); 405 _enter(",OBJ%x{%p}", object->fscache.debug_id, object->dentry);
344 406
345 ASSERT(object->dentry); 407 ASSERT(object->dentry);
346 ASSERT(object->dentry->d_inode); 408 ASSERT(object->dentry->d_inode);
@@ -350,15 +412,25 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
350 412
351 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); 413 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
352 414
353 /* we need to check that our parent is _still_ our parent - it may have 415 if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
354 * been renamed */ 416 /* object allocation for the same key preemptively deleted this
355 if (dir == object->dentry->d_parent) { 417 * object's file so that it could create its own file */
356 ret = cachefiles_bury_object(cache, dir, object->dentry); 418 _debug("object preemptively buried");
357 } else {
358 /* it got moved, presumably by cachefilesd culling it, so it's
359 * no longer in the key path and we can ignore it */
360 mutex_unlock(&dir->d_inode->i_mutex); 419 mutex_unlock(&dir->d_inode->i_mutex);
361 ret = 0; 420 ret = 0;
421 } else {
422 /* we need to check that our parent is _still_ our parent - it
423 * may have been renamed */
424 if (dir == object->dentry->d_parent) {
425 ret = cachefiles_bury_object(cache, dir,
426 object->dentry, false);
427 } else {
428 /* it got moved, presumably by cachefilesd culling it,
429 * so it's no longer in the key path and we can ignore
430 * it */
431 mutex_unlock(&dir->d_inode->i_mutex);
432 ret = 0;
433 }
362 } 434 }
363 435
364 dput(dir); 436 dput(dir);
@@ -381,7 +453,9 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
381 const char *name; 453 const char *name;
382 int ret, nlen; 454 int ret, nlen;
383 455
384 _enter("{%p},,%s,", parent->dentry, key); 456 _enter("OBJ%x{%p},OBJ%x,%s,",
457 parent->fscache.debug_id, parent->dentry,
458 object->fscache.debug_id, key);
385 459
386 cache = container_of(parent->fscache.cache, 460 cache = container_of(parent->fscache.cache,
387 struct cachefiles_cache, cache); 461 struct cachefiles_cache, cache);
@@ -509,7 +583,7 @@ lookup_again:
509 * mutex) */ 583 * mutex) */
510 object->dentry = NULL; 584 object->dentry = NULL;
511 585
512 ret = cachefiles_bury_object(cache, dir, next); 586 ret = cachefiles_bury_object(cache, dir, next, true);
513 dput(next); 587 dput(next);
514 next = NULL; 588 next = NULL;
515 589
@@ -828,7 +902,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
828 /* actually remove the victim (drops the dir mutex) */ 902 /* actually remove the victim (drops the dir mutex) */
829 _debug("bury"); 903 _debug("bury");
830 904
831 ret = cachefiles_bury_object(cache, dir, victim); 905 ret = cachefiles_bury_object(cache, dir, victim, false);
832 if (ret < 0) 906 if (ret < 0)
833 goto error; 907 goto error;
834 908
diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c
index b5808cdb2232..039b5011d83b 100644
--- a/fs/cachefiles/security.c
+++ b/fs/cachefiles/security.c
@@ -77,6 +77,8 @@ static int cachefiles_check_cache_dir(struct cachefiles_cache *cache,
77/* 77/*
78 * check the security details of the on-disk cache 78 * check the security details of the on-disk cache
79 * - must be called with security override in force 79 * - must be called with security override in force
80 * - must return with a security override in force - even in the case of an
81 * error
80 */ 82 */
81int cachefiles_determine_cache_security(struct cachefiles_cache *cache, 83int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
82 struct dentry *root, 84 struct dentry *root,
@@ -99,6 +101,8 @@ int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
99 * which create files */ 101 * which create files */
100 ret = set_create_files_as(new, root->d_inode); 102 ret = set_create_files_as(new, root->d_inode);
101 if (ret < 0) { 103 if (ret < 0) {
104 abort_creds(new);
105 cachefiles_begin_secure(cache, _saved_cred);
102 _leave(" = %d [cfa]", ret); 106 _leave(" = %d [cfa]", ret);
103 return ret; 107 return ret;
104 } 108 }
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 412593703d1e..a9005d862ed4 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -504,12 +504,11 @@ static void writepages_finish(struct ceph_osd_request *req,
504 int i; 504 int i;
505 struct ceph_snap_context *snapc = req->r_snapc; 505 struct ceph_snap_context *snapc = req->r_snapc;
506 struct address_space *mapping = inode->i_mapping; 506 struct address_space *mapping = inode->i_mapping;
507 struct writeback_control *wbc = req->r_wbc;
508 __s32 rc = -EIO; 507 __s32 rc = -EIO;
509 u64 bytes = 0; 508 u64 bytes = 0;
510 struct ceph_client *client = ceph_inode_to_client(inode); 509 struct ceph_client *client = ceph_inode_to_client(inode);
511 long writeback_stat; 510 long writeback_stat;
512 unsigned issued = __ceph_caps_issued(ci, NULL); 511 unsigned issued = ceph_caps_issued(ci);
513 512
514 /* parse reply */ 513 /* parse reply */
515 replyhead = msg->front.iov_base; 514 replyhead = msg->front.iov_base;
@@ -546,10 +545,6 @@ static void writepages_finish(struct ceph_osd_request *req,
546 clear_bdi_congested(&client->backing_dev_info, 545 clear_bdi_congested(&client->backing_dev_info,
547 BLK_RW_ASYNC); 546 BLK_RW_ASYNC);
548 547
549 if (i >= wrote) {
550 dout("inode %p skipping page %p\n", inode, page);
551 wbc->pages_skipped++;
552 }
553 ceph_put_snap_context((void *)page->private); 548 ceph_put_snap_context((void *)page->private);
554 page->private = 0; 549 page->private = 0;
555 ClearPagePrivate(page); 550 ClearPagePrivate(page);
@@ -799,7 +794,6 @@ get_more_pages:
799 alloc_page_vec(client, req); 794 alloc_page_vec(client, req);
800 req->r_callback = writepages_finish; 795 req->r_callback = writepages_finish;
801 req->r_inode = inode; 796 req->r_inode = inode;
802 req->r_wbc = wbc;
803 } 797 }
804 798
805 /* note position of first page in pvec */ 799 /* note position of first page in pvec */
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c
index f6394b94b866..818afe72e6c7 100644
--- a/fs/ceph/auth.c
+++ b/fs/ceph/auth.c
@@ -3,6 +3,7 @@
3#include <linux/module.h> 3#include <linux/module.h>
4#include <linux/slab.h> 4#include <linux/slab.h>
5#include <linux/err.h> 5#include <linux/err.h>
6#include <linux/slab.h>
6 7
7#include "types.h" 8#include "types.h"
8#include "auth_none.h" 9#include "auth_none.h"
diff --git a/fs/ceph/auth_none.h b/fs/ceph/auth_none.h
index 56c05533a31c..8164df1a08be 100644
--- a/fs/ceph/auth_none.h
+++ b/fs/ceph/auth_none.h
@@ -1,6 +1,8 @@
1#ifndef _FS_CEPH_AUTH_NONE_H 1#ifndef _FS_CEPH_AUTH_NONE_H
2#define _FS_CEPH_AUTH_NONE_H 2#define _FS_CEPH_AUTH_NONE_H
3 3
4#include <linux/slab.h>
5
4#include "auth.h" 6#include "auth.h"
5 7
6/* 8/*
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index d9001a4dc8cc..fee5a08da881 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -12,8 +12,6 @@
12#include "auth.h" 12#include "auth.h"
13#include "decode.h" 13#include "decode.h"
14 14
15struct kmem_cache *ceph_x_ticketbuf_cachep;
16
17#define TEMP_TICKET_BUF_LEN 256 15#define TEMP_TICKET_BUF_LEN 256
18 16
19static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed); 17static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
@@ -131,13 +129,12 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
131 char *ticket_buf; 129 char *ticket_buf;
132 u8 struct_v; 130 u8 struct_v;
133 131
134 dbuf = kmem_cache_alloc(ceph_x_ticketbuf_cachep, GFP_NOFS | GFP_ATOMIC); 132 dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
135 if (!dbuf) 133 if (!dbuf)
136 return -ENOMEM; 134 return -ENOMEM;
137 135
138 ret = -ENOMEM; 136 ret = -ENOMEM;
139 ticket_buf = kmem_cache_alloc(ceph_x_ticketbuf_cachep, 137 ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
140 GFP_NOFS | GFP_ATOMIC);
141 if (!ticket_buf) 138 if (!ticket_buf)
142 goto out_dbuf; 139 goto out_dbuf;
143 140
@@ -251,9 +248,9 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
251 248
252 ret = 0; 249 ret = 0;
253out: 250out:
254 kmem_cache_free(ceph_x_ticketbuf_cachep, ticket_buf); 251 kfree(ticket_buf);
255out_dbuf: 252out_dbuf:
256 kmem_cache_free(ceph_x_ticketbuf_cachep, dbuf); 253 kfree(dbuf);
257 return ret; 254 return ret;
258 255
259bad: 256bad:
@@ -605,8 +602,6 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
605 remove_ticket_handler(ac, th); 602 remove_ticket_handler(ac, th);
606 } 603 }
607 604
608 kmem_cache_destroy(ceph_x_ticketbuf_cachep);
609
610 kfree(ac->private); 605 kfree(ac->private);
611 ac->private = NULL; 606 ac->private = NULL;
612} 607}
@@ -641,26 +636,20 @@ int ceph_x_init(struct ceph_auth_client *ac)
641 int ret; 636 int ret;
642 637
643 dout("ceph_x_init %p\n", ac); 638 dout("ceph_x_init %p\n", ac);
639 ret = -ENOMEM;
644 xi = kzalloc(sizeof(*xi), GFP_NOFS); 640 xi = kzalloc(sizeof(*xi), GFP_NOFS);
645 if (!xi) 641 if (!xi)
646 return -ENOMEM; 642 goto out;
647 643
648 ret = -ENOMEM;
649 ceph_x_ticketbuf_cachep = kmem_cache_create("ceph_x_ticketbuf",
650 TEMP_TICKET_BUF_LEN, 8,
651 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
652 NULL);
653 if (!ceph_x_ticketbuf_cachep)
654 goto done_nomem;
655 ret = -EINVAL; 644 ret = -EINVAL;
656 if (!ac->secret) { 645 if (!ac->secret) {
657 pr_err("no secret set (for auth_x protocol)\n"); 646 pr_err("no secret set (for auth_x protocol)\n");
658 goto done_nomem; 647 goto out_nomem;
659 } 648 }
660 649
661 ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret); 650 ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret);
662 if (ret) 651 if (ret)
663 goto done_nomem; 652 goto out_nomem;
664 653
665 xi->starting = true; 654 xi->starting = true;
666 xi->ticket_handlers = RB_ROOT; 655 xi->ticket_handlers = RB_ROOT;
@@ -670,10 +659,9 @@ int ceph_x_init(struct ceph_auth_client *ac)
670 ac->ops = &ceph_x_ops; 659 ac->ops = &ceph_x_ops;
671 return 0; 660 return 0;
672 661
673done_nomem: 662out_nomem:
674 kfree(xi); 663 kfree(xi);
675 if (ceph_x_ticketbuf_cachep) 664out:
676 kmem_cache_destroy(ceph_x_ticketbuf_cachep);
677 return ret; 665 return ret;
678} 666}
679 667
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index aa2239fa9a3b..d9400534b279 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -858,6 +858,8 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
858} 858}
859 859
860/* 860/*
861 * Remove a cap. Take steps to deal with a racing iterate_session_caps.
862 *
861 * caller should hold i_lock. 863 * caller should hold i_lock.
862 * caller will not hold session s_mutex if called from destroy_inode. 864 * caller will not hold session s_mutex if called from destroy_inode.
863 */ 865 */
@@ -866,15 +868,10 @@ void __ceph_remove_cap(struct ceph_cap *cap)
866 struct ceph_mds_session *session = cap->session; 868 struct ceph_mds_session *session = cap->session;
867 struct ceph_inode_info *ci = cap->ci; 869 struct ceph_inode_info *ci = cap->ci;
868 struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; 870 struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc;
871 int removed = 0;
869 872
870 dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); 873 dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
871 874
872 /* remove from inode list */
873 rb_erase(&cap->ci_node, &ci->i_caps);
874 cap->ci = NULL;
875 if (ci->i_auth_cap == cap)
876 ci->i_auth_cap = NULL;
877
878 /* remove from session list */ 875 /* remove from session list */
879 spin_lock(&session->s_cap_lock); 876 spin_lock(&session->s_cap_lock);
880 if (session->s_cap_iterator == cap) { 877 if (session->s_cap_iterator == cap) {
@@ -885,10 +882,18 @@ void __ceph_remove_cap(struct ceph_cap *cap)
885 list_del_init(&cap->session_caps); 882 list_del_init(&cap->session_caps);
886 session->s_nr_caps--; 883 session->s_nr_caps--;
887 cap->session = NULL; 884 cap->session = NULL;
885 removed = 1;
888 } 886 }
887 /* protect backpointer with s_cap_lock: see iterate_session_caps */
888 cap->ci = NULL;
889 spin_unlock(&session->s_cap_lock); 889 spin_unlock(&session->s_cap_lock);
890 890
891 if (cap->session == NULL) 891 /* remove from inode list */
892 rb_erase(&cap->ci_node, &ci->i_caps);
893 if (ci->i_auth_cap == cap)
894 ci->i_auth_cap = NULL;
895
896 if (removed)
892 ceph_put_cap(cap); 897 ceph_put_cap(cap);
893 898
894 if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { 899 if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
@@ -1861,8 +1866,8 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
1861 } else { 1866 } else {
1862 pr_err("%p auth cap %p not mds%d ???\n", inode, 1867 pr_err("%p auth cap %p not mds%d ???\n", inode,
1863 cap, session->s_mds); 1868 cap, session->s_mds);
1864 spin_unlock(&inode->i_lock);
1865 } 1869 }
1870 spin_unlock(&inode->i_lock);
1866 } 1871 }
1867} 1872}
1868 1873
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index ea8ee2e526aa..650d2db5ed26 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -880,7 +880,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
880 * do_request, above). If there is no trace, we need 880 * do_request, above). If there is no trace, we need
881 * to do it here. 881 * to do it here.
882 */ 882 */
883
884 /* d_move screws up d_subdirs order */
885 ceph_i_clear(new_dir, CEPH_I_COMPLETE);
886
883 d_move(old_dentry, new_dentry); 887 d_move(old_dentry, new_dentry);
888
889 /* ensure target dentry is invalidated, despite
890 rehashing bug in vfs_rename_dir */
891 new_dentry->d_time = jiffies;
892 ceph_dentry(new_dentry)->lease_shared_gen = 0;
884 } 893 }
885 ceph_mdsc_put_request(req); 894 ceph_mdsc_put_request(req);
886 return err; 895 return err;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4add3d5da2c1..ed6f19721d6e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -665,7 +665,8 @@ more:
665 * throw out any page cache pages in this range. this 665 * throw out any page cache pages in this range. this
666 * may block. 666 * may block.
667 */ 667 */
668 truncate_inode_pages_range(inode->i_mapping, pos, pos+len); 668 truncate_inode_pages_range(inode->i_mapping, pos,
669 (pos+len) | (PAGE_CACHE_SIZE-1));
669 } else { 670 } else {
670 pages = alloc_page_vector(num_pages); 671 pages = alloc_page_vector(num_pages);
671 if (IS_ERR(pages)) { 672 if (IS_ERR(pages)) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 26f883c275e8..85b4d2ffdeba 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -733,6 +733,10 @@ no_change:
733 __ceph_get_fmode(ci, cap_fmode); 733 __ceph_get_fmode(ci, cap_fmode);
734 spin_unlock(&inode->i_lock); 734 spin_unlock(&inode->i_lock);
735 } 735 }
736 } else if (cap_fmode >= 0) {
737 pr_warning("mds issued no caps on %llx.%llx\n",
738 ceph_vinop(inode));
739 __ceph_get_fmode(ci, cap_fmode);
736 } 740 }
737 741
738 /* update delegation info? */ 742 /* update delegation info? */
@@ -997,6 +1001,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
997 dn, dn->d_name.len, dn->d_name.name); 1001 dn, dn->d_name.len, dn->d_name.name);
998 dout("fill_trace doing d_move %p -> %p\n", 1002 dout("fill_trace doing d_move %p -> %p\n",
999 req->r_old_dentry, dn); 1003 req->r_old_dentry, dn);
1004
1005 /* d_move screws up d_subdirs order */
1006 ceph_i_clear(dir, CEPH_I_COMPLETE);
1007
1000 d_move(req->r_old_dentry, dn); 1008 d_move(req->r_old_dentry, dn);
1001 dout(" src %p '%.*s' dst %p '%.*s'\n", 1009 dout(" src %p '%.*s' dst %p '%.*s'\n",
1002 req->r_old_dentry, 1010 req->r_old_dentry,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 60a9a4ae47be..24561a557e01 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -736,9 +736,10 @@ static void cleanup_cap_releases(struct ceph_mds_session *session)
736} 736}
737 737
738/* 738/*
739 * Helper to safely iterate over all caps associated with a session. 739 * Helper to safely iterate over all caps associated with a session, with
740 * special care taken to handle a racing __ceph_remove_cap().
740 * 741 *
741 * caller must hold session s_mutex 742 * Caller must hold session s_mutex.
742 */ 743 */
743static int iterate_session_caps(struct ceph_mds_session *session, 744static int iterate_session_caps(struct ceph_mds_session *session,
744 int (*cb)(struct inode *, struct ceph_cap *, 745 int (*cb)(struct inode *, struct ceph_cap *,
@@ -2136,7 +2137,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
2136 struct ceph_mds_session *session = NULL; 2137 struct ceph_mds_session *session = NULL;
2137 struct ceph_msg *reply; 2138 struct ceph_msg *reply;
2138 struct rb_node *p; 2139 struct rb_node *p;
2139 int err; 2140 int err = -ENOMEM;
2140 struct ceph_pagelist *pagelist; 2141 struct ceph_pagelist *pagelist;
2141 2142
2142 pr_info("reconnect to recovering mds%d\n", mds); 2143 pr_info("reconnect to recovering mds%d\n", mds);
@@ -2185,7 +2186,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
2185 goto fail; 2186 goto fail;
2186 err = iterate_session_caps(session, encode_caps_cb, pagelist); 2187 err = iterate_session_caps(session, encode_caps_cb, pagelist);
2187 if (err < 0) 2188 if (err < 0)
2188 goto out; 2189 goto fail;
2189 2190
2190 /* 2191 /*
2191 * snaprealms. we provide mds with the ino, seq (version), and 2192 * snaprealms. we provide mds with the ino, seq (version), and
@@ -2213,28 +2214,31 @@ send:
2213 reply->nr_pages = calc_pages_for(0, pagelist->length); 2214 reply->nr_pages = calc_pages_for(0, pagelist->length);
2214 ceph_con_send(&session->s_con, reply); 2215 ceph_con_send(&session->s_con, reply);
2215 2216
2216 if (session) { 2217 session->s_state = CEPH_MDS_SESSION_OPEN;
2217 session->s_state = CEPH_MDS_SESSION_OPEN; 2218 mutex_unlock(&session->s_mutex);
2218 __wake_requests(mdsc, &session->s_waiting); 2219
2219 } 2220 mutex_lock(&mdsc->mutex);
2221 __wake_requests(mdsc, &session->s_waiting);
2222 mutex_unlock(&mdsc->mutex);
2223
2224 ceph_put_mds_session(session);
2220 2225
2221out:
2222 up_read(&mdsc->snap_rwsem); 2226 up_read(&mdsc->snap_rwsem);
2223 if (session) {
2224 mutex_unlock(&session->s_mutex);
2225 ceph_put_mds_session(session);
2226 }
2227 mutex_lock(&mdsc->mutex); 2227 mutex_lock(&mdsc->mutex);
2228 return; 2228 return;
2229 2229
2230fail: 2230fail:
2231 ceph_msg_put(reply); 2231 ceph_msg_put(reply);
2232 up_read(&mdsc->snap_rwsem);
2233 mutex_unlock(&session->s_mutex);
2234 ceph_put_mds_session(session);
2232fail_nomsg: 2235fail_nomsg:
2233 ceph_pagelist_release(pagelist); 2236 ceph_pagelist_release(pagelist);
2234 kfree(pagelist); 2237 kfree(pagelist);
2235fail_nopagelist: 2238fail_nopagelist:
2236 pr_err("ENOMEM preparing reconnect for mds%d\n", mds); 2239 pr_err("error %d preparing reconnect for mds%d\n", err, mds);
2237 goto out; 2240 mutex_lock(&mdsc->mutex);
2241 return;
2238} 2242}
2239 2243
2240 2244
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index cdaaa131add3..cd4fadb6491a 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -492,7 +492,14 @@ static void prepare_write_message(struct ceph_connection *con)
492 list_move_tail(&m->list_head, &con->out_sent); 492 list_move_tail(&m->list_head, &con->out_sent);
493 } 493 }
494 494
495 m->hdr.seq = cpu_to_le64(++con->out_seq); 495 /*
496 * only assign outgoing seq # if we haven't sent this message
497 * yet. if it is requeued, resend with it's original seq.
498 */
499 if (m->needs_out_seq) {
500 m->hdr.seq = cpu_to_le64(++con->out_seq);
501 m->needs_out_seq = false;
502 }
496 503
497 dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", 504 dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n",
498 m, con->out_seq, le16_to_cpu(m->hdr.type), 505 m, con->out_seq, le16_to_cpu(m->hdr.type),
@@ -1334,6 +1341,7 @@ static int read_partial_message(struct ceph_connection *con)
1334 unsigned front_len, middle_len, data_len, data_off; 1341 unsigned front_len, middle_len, data_len, data_off;
1335 int datacrc = con->msgr->nocrc; 1342 int datacrc = con->msgr->nocrc;
1336 int skip; 1343 int skip;
1344 u64 seq;
1337 1345
1338 dout("read_partial_message con %p msg %p\n", con, m); 1346 dout("read_partial_message con %p msg %p\n", con, m);
1339 1347
@@ -1368,6 +1376,25 @@ static int read_partial_message(struct ceph_connection *con)
1368 return -EIO; 1376 return -EIO;
1369 data_off = le16_to_cpu(con->in_hdr.data_off); 1377 data_off = le16_to_cpu(con->in_hdr.data_off);
1370 1378
1379 /* verify seq# */
1380 seq = le64_to_cpu(con->in_hdr.seq);
1381 if ((s64)seq - (s64)con->in_seq < 1) {
1382 pr_info("skipping %s%lld %s seq %lld, expected %lld\n",
1383 ENTITY_NAME(con->peer_name),
1384 pr_addr(&con->peer_addr.in_addr),
1385 seq, con->in_seq + 1);
1386 con->in_base_pos = -front_len - middle_len - data_len -
1387 sizeof(m->footer);
1388 con->in_tag = CEPH_MSGR_TAG_READY;
1389 con->in_seq++;
1390 return 0;
1391 } else if ((s64)seq - (s64)con->in_seq > 1) {
1392 pr_err("read_partial_message bad seq %lld expected %lld\n",
1393 seq, con->in_seq + 1);
1394 con->error_msg = "bad message sequence # for incoming message";
1395 return -EBADMSG;
1396 }
1397
1371 /* allocate message? */ 1398 /* allocate message? */
1372 if (!con->in_msg) { 1399 if (!con->in_msg) {
1373 dout("got hdr type %d front %d data %d\n", con->in_hdr.type, 1400 dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
@@ -1379,6 +1406,7 @@ static int read_partial_message(struct ceph_connection *con)
1379 con->in_base_pos = -front_len - middle_len - data_len - 1406 con->in_base_pos = -front_len - middle_len - data_len -
1380 sizeof(m->footer); 1407 sizeof(m->footer);
1381 con->in_tag = CEPH_MSGR_TAG_READY; 1408 con->in_tag = CEPH_MSGR_TAG_READY;
1409 con->in_seq++;
1382 return 0; 1410 return 0;
1383 } 1411 }
1384 if (IS_ERR(con->in_msg)) { 1412 if (IS_ERR(con->in_msg)) {
@@ -1965,6 +1993,8 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
1965 1993
1966 BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); 1994 BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
1967 1995
1996 msg->needs_out_seq = true;
1997
1968 /* queue */ 1998 /* queue */
1969 mutex_lock(&con->mutex); 1999 mutex_lock(&con->mutex);
1970 BUG_ON(!list_empty(&msg->list_head)); 2000 BUG_ON(!list_empty(&msg->list_head));
@@ -2030,6 +2060,7 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
2030 ceph_msg_put(con->in_msg); 2060 ceph_msg_put(con->in_msg);
2031 con->in_msg = NULL; 2061 con->in_msg = NULL;
2032 con->in_tag = CEPH_MSGR_TAG_READY; 2062 con->in_tag = CEPH_MSGR_TAG_READY;
2063 con->in_seq++;
2033 } else { 2064 } else {
2034 dout("con_revoke_pages %p msg %p pages %p no-op\n", 2065 dout("con_revoke_pages %p msg %p pages %p no-op\n",
2035 con, con->in_msg, msg); 2066 con, con->in_msg, msg);
@@ -2063,15 +2094,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len,
2063 kref_init(&m->kref); 2094 kref_init(&m->kref);
2064 INIT_LIST_HEAD(&m->list_head); 2095 INIT_LIST_HEAD(&m->list_head);
2065 2096
2097 m->hdr.tid = 0;
2066 m->hdr.type = cpu_to_le16(type); 2098 m->hdr.type = cpu_to_le16(type);
2099 m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
2100 m->hdr.version = 0;
2067 m->hdr.front_len = cpu_to_le32(front_len); 2101 m->hdr.front_len = cpu_to_le32(front_len);
2068 m->hdr.middle_len = 0; 2102 m->hdr.middle_len = 0;
2069 m->hdr.data_len = cpu_to_le32(page_len); 2103 m->hdr.data_len = cpu_to_le32(page_len);
2070 m->hdr.data_off = cpu_to_le16(page_off); 2104 m->hdr.data_off = cpu_to_le16(page_off);
2071 m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); 2105 m->hdr.reserved = 0;
2072 m->footer.front_crc = 0; 2106 m->footer.front_crc = 0;
2073 m->footer.middle_crc = 0; 2107 m->footer.middle_crc = 0;
2074 m->footer.data_crc = 0; 2108 m->footer.data_crc = 0;
2109 m->footer.flags = 0;
2075 m->front_max = front_len; 2110 m->front_max = front_len;
2076 m->front_is_vmalloc = false; 2111 m->front_is_vmalloc = false;
2077 m->more_to_follow = false; 2112 m->more_to_follow = false;
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h
index a343dae73cdc..a5caf91cc971 100644
--- a/fs/ceph/messenger.h
+++ b/fs/ceph/messenger.h
@@ -86,6 +86,7 @@ struct ceph_msg {
86 struct kref kref; 86 struct kref kref;
87 bool front_is_vmalloc; 87 bool front_is_vmalloc;
88 bool more_to_follow; 88 bool more_to_follow;
89 bool needs_out_seq;
89 int front_max; 90 int front_max;
90 91
91 struct ceph_msgpool *pool; 92 struct ceph_msgpool *pool;
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index c7b4dedaace6..3514f71ff85f 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -565,7 +565,8 @@ static int __map_osds(struct ceph_osd_client *osdc,
565{ 565{
566 struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; 566 struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
567 struct ceph_pg pgid; 567 struct ceph_pg pgid;
568 int o = -1; 568 int acting[CEPH_PG_MAX_SIZE];
569 int o = -1, num = 0;
569 int err; 570 int err;
570 571
571 dout("map_osds %p tid %lld\n", req, req->r_tid); 572 dout("map_osds %p tid %lld\n", req, req->r_tid);
@@ -576,10 +577,16 @@ static int __map_osds(struct ceph_osd_client *osdc,
576 pgid = reqhead->layout.ol_pgid; 577 pgid = reqhead->layout.ol_pgid;
577 req->r_pgid = pgid; 578 req->r_pgid = pgid;
578 579
579 o = ceph_calc_pg_primary(osdc->osdmap, pgid); 580 err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting);
581 if (err > 0) {
582 o = acting[0];
583 num = err;
584 }
580 585
581 if ((req->r_osd && req->r_osd->o_osd == o && 586 if ((req->r_osd && req->r_osd->o_osd == o &&
582 req->r_sent >= req->r_osd->o_incarnation) || 587 req->r_sent >= req->r_osd->o_incarnation &&
588 req->r_num_pg_osds == num &&
589 memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) ||
583 (req->r_osd == NULL && o == -1)) 590 (req->r_osd == NULL && o == -1))
584 return 0; /* no change */ 591 return 0; /* no change */
585 592
@@ -587,6 +594,10 @@ static int __map_osds(struct ceph_osd_client *osdc,
587 req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, 594 req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
588 req->r_osd ? req->r_osd->o_osd : -1); 595 req->r_osd ? req->r_osd->o_osd : -1);
589 596
597 /* record full pg acting set */
598 memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num);
599 req->r_num_pg_osds = num;
600
590 if (req->r_osd) { 601 if (req->r_osd) {
591 __cancel_request(req); 602 __cancel_request(req);
592 list_del_init(&req->r_osd_item); 603 list_del_init(&req->r_osd_item);
@@ -612,7 +623,7 @@ static int __map_osds(struct ceph_osd_client *osdc,
612 __remove_osd_from_lru(req->r_osd); 623 __remove_osd_from_lru(req->r_osd);
613 list_add(&req->r_osd_item, &req->r_osd->o_requests); 624 list_add(&req->r_osd_item, &req->r_osd->o_requests);
614 } 625 }
615 err = 1; /* osd changed */ 626 err = 1; /* osd or pg changed */
616 627
617out: 628out:
618 return err; 629 return err;
@@ -779,16 +790,18 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
779 struct ceph_osd_request *req; 790 struct ceph_osd_request *req;
780 u64 tid; 791 u64 tid;
781 int numops, object_len, flags; 792 int numops, object_len, flags;
793 s32 result;
782 794
783 tid = le64_to_cpu(msg->hdr.tid); 795 tid = le64_to_cpu(msg->hdr.tid);
784 if (msg->front.iov_len < sizeof(*rhead)) 796 if (msg->front.iov_len < sizeof(*rhead))
785 goto bad; 797 goto bad;
786 numops = le32_to_cpu(rhead->num_ops); 798 numops = le32_to_cpu(rhead->num_ops);
787 object_len = le32_to_cpu(rhead->object_len); 799 object_len = le32_to_cpu(rhead->object_len);
800 result = le32_to_cpu(rhead->result);
788 if (msg->front.iov_len != sizeof(*rhead) + object_len + 801 if (msg->front.iov_len != sizeof(*rhead) + object_len +
789 numops * sizeof(struct ceph_osd_op)) 802 numops * sizeof(struct ceph_osd_op))
790 goto bad; 803 goto bad;
791 dout("handle_reply %p tid %llu\n", msg, tid); 804 dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result);
792 805
793 /* lookup */ 806 /* lookup */
794 mutex_lock(&osdc->request_mutex); 807 mutex_lock(&osdc->request_mutex);
@@ -834,7 +847,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
834 dout("handle_reply tid %llu flags %d\n", tid, flags); 847 dout("handle_reply tid %llu flags %d\n", tid, flags);
835 848
836 /* either this is a read, or we got the safe response */ 849 /* either this is a read, or we got the safe response */
837 if ((flags & CEPH_OSD_FLAG_ONDISK) || 850 if (result < 0 ||
851 (flags & CEPH_OSD_FLAG_ONDISK) ||
838 ((flags & CEPH_OSD_FLAG_WRITE) == 0)) 852 ((flags & CEPH_OSD_FLAG_WRITE) == 0))
839 __unregister_request(osdc, req); 853 __unregister_request(osdc, req);
840 854
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h
index b0759911e7c3..ce776989ef6a 100644
--- a/fs/ceph/osd_client.h
+++ b/fs/ceph/osd_client.h
@@ -48,6 +48,8 @@ struct ceph_osd_request {
48 struct list_head r_osd_item; 48 struct list_head r_osd_item;
49 struct ceph_osd *r_osd; 49 struct ceph_osd *r_osd;
50 struct ceph_pg r_pgid; 50 struct ceph_pg r_pgid;
51 int r_pg_osds[CEPH_PG_MAX_SIZE];
52 int r_num_pg_osds;
51 53
52 struct ceph_connection *r_con_filling_msg; 54 struct ceph_connection *r_con_filling_msg;
53 55
@@ -66,7 +68,6 @@ struct ceph_osd_request {
66 struct list_head r_unsafe_item; 68 struct list_head r_unsafe_item;
67 69
68 struct inode *r_inode; /* for use by callbacks */ 70 struct inode *r_inode; /* for use by callbacks */
69 struct writeback_control *r_wbc; /* ditto */
70 71
71 char r_oid[40]; /* object name */ 72 char r_oid[40]; /* object name */
72 int r_oid_len; 73 int r_oid_len;
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index 2e2c15eed82a..cfdd8f4388b7 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -1041,12 +1041,33 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1041} 1041}
1042 1042
1043/* 1043/*
1044 * Return acting set for given pgid.
1045 */
1046int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1047 int *acting)
1048{
1049 int rawosds[CEPH_PG_MAX_SIZE], *osds;
1050 int i, o, num = CEPH_PG_MAX_SIZE;
1051
1052 osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
1053 if (!osds)
1054 return -1;
1055
1056 /* primary is first up osd */
1057 o = 0;
1058 for (i = 0; i < num; i++)
1059 if (ceph_osd_is_up(osdmap, osds[i]))
1060 acting[o++] = osds[i];
1061 return o;
1062}
1063
1064/*
1044 * Return primary osd for given pgid, or -1 if none. 1065 * Return primary osd for given pgid, or -1 if none.
1045 */ 1066 */
1046int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) 1067int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
1047{ 1068{
1048 int rawosds[10], *osds; 1069 int rawosds[CEPH_PG_MAX_SIZE], *osds;
1049 int i, num = ARRAY_SIZE(rawosds); 1070 int i, num = CEPH_PG_MAX_SIZE;
1050 1071
1051 osds = calc_pg_raw(osdmap, pgid, rawosds, &num); 1072 osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
1052 if (!osds) 1073 if (!osds)
@@ -1054,9 +1075,7 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
1054 1075
1055 /* primary is first up osd */ 1076 /* primary is first up osd */
1056 for (i = 0; i < num; i++) 1077 for (i = 0; i < num; i++)
1057 if (ceph_osd_is_up(osdmap, osds[i])) { 1078 if (ceph_osd_is_up(osdmap, osds[i]))
1058 return osds[i]; 1079 return osds[i];
1059 break;
1060 }
1061 return -1; 1080 return -1;
1062} 1081}
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h
index 8bc9f1e4f562..970b547e510d 100644
--- a/fs/ceph/osdmap.h
+++ b/fs/ceph/osdmap.h
@@ -120,6 +120,8 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
120 const char *oid, 120 const char *oid,
121 struct ceph_file_layout *fl, 121 struct ceph_file_layout *fl,
122 struct ceph_osdmap *osdmap); 122 struct ceph_osdmap *osdmap);
123extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
124 int *acting);
123extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, 125extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
124 struct ceph_pg pgid); 126 struct ceph_pg pgid);
125 127
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h
index a1fc1d017b58..fd56451a871f 100644
--- a/fs/ceph/rados.h
+++ b/fs/ceph/rados.h
@@ -58,6 +58,7 @@ struct ceph_timespec {
58#define CEPH_PG_LAYOUT_LINEAR 2 58#define CEPH_PG_LAYOUT_LINEAR 2
59#define CEPH_PG_LAYOUT_HYBRID 3 59#define CEPH_PG_LAYOUT_HYBRID 3
60 60
61#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */
61 62
62/* 63/*
63 * placement group. 64 * placement group.
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 2b881262ef67..d5114db70453 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -869,16 +869,20 @@ skip_inode:
869 continue; 869 continue;
870 ci = ceph_inode(inode); 870 ci = ceph_inode(inode);
871 spin_lock(&inode->i_lock); 871 spin_lock(&inode->i_lock);
872 if (!ci->i_snap_realm) 872 if (list_empty(&ci->i_snap_realm_item)) {
873 goto split_skip_inode; 873 struct ceph_snap_realm *oldrealm =
874 ceph_put_snap_realm(mdsc, ci->i_snap_realm); 874 ci->i_snap_realm;
875 spin_lock(&realm->inodes_with_caps_lock); 875
876 list_add(&ci->i_snap_realm_item, 876 dout(" moving %p to split realm %llx %p\n",
877 &realm->inodes_with_caps); 877 inode, realm->ino, realm);
878 ci->i_snap_realm = realm; 878 spin_lock(&realm->inodes_with_caps_lock);
879 spin_unlock(&realm->inodes_with_caps_lock); 879 list_add(&ci->i_snap_realm_item,
880 ceph_get_snap_realm(mdsc, realm); 880 &realm->inodes_with_caps);
881split_skip_inode: 881 ci->i_snap_realm = realm;
882 spin_unlock(&realm->inodes_with_caps_lock);
883 ceph_get_snap_realm(mdsc, realm);
884 ceph_put_snap_realm(mdsc, oldrealm);
885 }
882 spin_unlock(&inode->i_lock); 886 spin_unlock(&inode->i_lock);
883 iput(inode); 887 iput(inode);
884 } 888 }
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 75d02eaa1279..110857ba9269 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -47,10 +47,20 @@ const char *ceph_file_part(const char *s, int len)
47 */ 47 */
48static void ceph_put_super(struct super_block *s) 48static void ceph_put_super(struct super_block *s)
49{ 49{
50 struct ceph_client *cl = ceph_client(s); 50 struct ceph_client *client = ceph_sb_to_client(s);
51 51
52 dout("put_super\n"); 52 dout("put_super\n");
53 ceph_mdsc_close_sessions(&cl->mdsc); 53 ceph_mdsc_close_sessions(&client->mdsc);
54
55 /*
56 * ensure we release the bdi before put_anon_super releases
57 * the device name.
58 */
59 if (s->s_bdi == &client->backing_dev_info) {
60 bdi_unregister(&client->backing_dev_info);
61 s->s_bdi = NULL;
62 }
63
54 return; 64 return;
55} 65}
56 66
@@ -636,6 +646,8 @@ static void ceph_destroy_client(struct ceph_client *client)
636 destroy_workqueue(client->pg_inv_wq); 646 destroy_workqueue(client->pg_inv_wq);
637 destroy_workqueue(client->trunc_wq); 647 destroy_workqueue(client->trunc_wq);
638 648
649 bdi_destroy(&client->backing_dev_info);
650
639 if (client->msgr) 651 if (client->msgr)
640 ceph_messenger_destroy(client->msgr); 652 ceph_messenger_destroy(client->msgr);
641 mempool_destroy(client->wb_pagevec_pool); 653 mempool_destroy(client->wb_pagevec_pool);
@@ -876,14 +888,14 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
876{ 888{
877 int err; 889 int err;
878 890
879 sb->s_bdi = &client->backing_dev_info;
880
881 /* set ra_pages based on rsize mount option? */ 891 /* set ra_pages based on rsize mount option? */
882 if (client->mount_args->rsize >= PAGE_CACHE_SIZE) 892 if (client->mount_args->rsize >= PAGE_CACHE_SIZE)
883 client->backing_dev_info.ra_pages = 893 client->backing_dev_info.ra_pages =
884 (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) 894 (client->mount_args->rsize + PAGE_CACHE_SIZE - 1)
885 >> PAGE_SHIFT; 895 >> PAGE_SHIFT;
886 err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); 896 err = bdi_register_dev(&client->backing_dev_info, sb->s_dev);
897 if (!err)
898 sb->s_bdi = &client->backing_dev_info;
887 return err; 899 return err;
888} 900}
889 901
@@ -957,9 +969,6 @@ static void ceph_kill_sb(struct super_block *s)
957 dout("kill_sb %p\n", s); 969 dout("kill_sb %p\n", s);
958 ceph_mdsc_pre_umount(&client->mdsc); 970 ceph_mdsc_pre_umount(&client->mdsc);
959 kill_anon_super(s); /* will call put_super after sb is r/o */ 971 kill_anon_super(s); /* will call put_super after sb is r/o */
960 if (s->s_bdi == &client->backing_dev_info)
961 bdi_unregister(&client->backing_dev_info);
962 bdi_destroy(&client->backing_dev_info);
963 ceph_destroy_client(client); 972 ceph_destroy_client(client);
964} 973}
965 974
@@ -996,9 +1005,10 @@ static int __init init_ceph(void)
996 if (ret) 1005 if (ret)
997 goto out_icache; 1006 goto out_icache;
998 1007
999 pr_info("loaded %d.%d.%d (mon/mds/osd proto %d/%d/%d)\n", 1008 pr_info("loaded (mon/mds/osd proto %d/%d/%d, osdmap %d/%d %d/%d)\n",
1000 CEPH_VERSION_MAJOR, CEPH_VERSION_MINOR, CEPH_VERSION_PATCH, 1009 CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL,
1001 CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL); 1010 CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
1011 CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
1002 return 0; 1012 return 0;
1003 1013
1004out_icache: 1014out_icache:
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index e30dfbb056c3..13513b80d87f 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -10,6 +10,7 @@
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/mempool.h> 11#include <linux/mempool.h>
12#include <linux/pagemap.h> 12#include <linux/pagemap.h>
13#include <linux/slab.h>
13#include <linux/wait.h> 14#include <linux/wait.h>
14#include <linux/writeback.h> 15#include <linux/writeback.h>
15#include <linux/slab.h> 16#include <linux/slab.h>
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index a20bea598933..cfd1ce34e0bc 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -492,17 +492,13 @@ compare_oid(unsigned long *oid1, unsigned int oid1len,
492 492
493int 493int
494decode_negTokenInit(unsigned char *security_blob, int length, 494decode_negTokenInit(unsigned char *security_blob, int length,
495 enum securityEnum *secType) 495 struct TCP_Server_Info *server)
496{ 496{
497 struct asn1_ctx ctx; 497 struct asn1_ctx ctx;
498 unsigned char *end; 498 unsigned char *end;
499 unsigned char *sequence_end; 499 unsigned char *sequence_end;
500 unsigned long *oid = NULL; 500 unsigned long *oid = NULL;
501 unsigned int cls, con, tag, oidlen, rc; 501 unsigned int cls, con, tag, oidlen, rc;
502 bool use_ntlmssp = false;
503 bool use_kerberos = false;
504 bool use_kerberosu2u = false;
505 bool use_mskerberos = false;
506 502
507 /* cifs_dump_mem(" Received SecBlob ", security_blob, length); */ 503 /* cifs_dump_mem(" Received SecBlob ", security_blob, length); */
508 504
@@ -510,11 +506,11 @@ decode_negTokenInit(unsigned char *security_blob, int length,
510 506
511 /* GSSAPI header */ 507 /* GSSAPI header */
512 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 508 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
513 cFYI(1, ("Error decoding negTokenInit header")); 509 cFYI(1, "Error decoding negTokenInit header");
514 return 0; 510 return 0;
515 } else if ((cls != ASN1_APL) || (con != ASN1_CON) 511 } else if ((cls != ASN1_APL) || (con != ASN1_CON)
516 || (tag != ASN1_EOC)) { 512 || (tag != ASN1_EOC)) {
517 cFYI(1, ("cls = %d con = %d tag = %d", cls, con, tag)); 513 cFYI(1, "cls = %d con = %d tag = %d", cls, con, tag);
518 return 0; 514 return 0;
519 } 515 }
520 516
@@ -535,56 +531,52 @@ decode_negTokenInit(unsigned char *security_blob, int length,
535 531
536 /* SPNEGO OID not present or garbled -- bail out */ 532 /* SPNEGO OID not present or garbled -- bail out */
537 if (!rc) { 533 if (!rc) {
538 cFYI(1, ("Error decoding negTokenInit header")); 534 cFYI(1, "Error decoding negTokenInit header");
539 return 0; 535 return 0;
540 } 536 }
541 537
542 /* SPNEGO */ 538 /* SPNEGO */
543 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 539 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
544 cFYI(1, ("Error decoding negTokenInit")); 540 cFYI(1, "Error decoding negTokenInit");
545 return 0; 541 return 0;
546 } else if ((cls != ASN1_CTX) || (con != ASN1_CON) 542 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
547 || (tag != ASN1_EOC)) { 543 || (tag != ASN1_EOC)) {
548 cFYI(1, 544 cFYI(1, "cls = %d con = %d tag = %d end = %p (%d) exit 0",
549 ("cls = %d con = %d tag = %d end = %p (%d) exit 0", 545 cls, con, tag, end, *end);
550 cls, con, tag, end, *end));
551 return 0; 546 return 0;
552 } 547 }
553 548
554 /* negTokenInit */ 549 /* negTokenInit */
555 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 550 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
556 cFYI(1, ("Error decoding negTokenInit")); 551 cFYI(1, "Error decoding negTokenInit");
557 return 0; 552 return 0;
558 } else if ((cls != ASN1_UNI) || (con != ASN1_CON) 553 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
559 || (tag != ASN1_SEQ)) { 554 || (tag != ASN1_SEQ)) {
560 cFYI(1, 555 cFYI(1, "cls = %d con = %d tag = %d end = %p (%d) exit 1",
561 ("cls = %d con = %d tag = %d end = %p (%d) exit 1", 556 cls, con, tag, end, *end);
562 cls, con, tag, end, *end));
563 return 0; 557 return 0;
564 } 558 }
565 559
566 /* sequence */ 560 /* sequence */
567 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 561 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
568 cFYI(1, ("Error decoding 2nd part of negTokenInit")); 562 cFYI(1, "Error decoding 2nd part of negTokenInit");
569 return 0; 563 return 0;
570 } else if ((cls != ASN1_CTX) || (con != ASN1_CON) 564 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
571 || (tag != ASN1_EOC)) { 565 || (tag != ASN1_EOC)) {
572 cFYI(1, 566 cFYI(1, "cls = %d con = %d tag = %d end = %p (%d) exit 0",
573 ("cls = %d con = %d tag = %d end = %p (%d) exit 0", 567 cls, con, tag, end, *end);
574 cls, con, tag, end, *end));
575 return 0; 568 return 0;
576 } 569 }
577 570
578 /* sequence of */ 571 /* sequence of */
579 if (asn1_header_decode 572 if (asn1_header_decode
580 (&ctx, &sequence_end, &cls, &con, &tag) == 0) { 573 (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
581 cFYI(1, ("Error decoding 2nd part of negTokenInit")); 574 cFYI(1, "Error decoding 2nd part of negTokenInit");
582 return 0; 575 return 0;
583 } else if ((cls != ASN1_UNI) || (con != ASN1_CON) 576 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
584 || (tag != ASN1_SEQ)) { 577 || (tag != ASN1_SEQ)) {
585 cFYI(1, 578 cFYI(1, "cls = %d con = %d tag = %d end = %p (%d) exit 1",
586 ("cls = %d con = %d tag = %d end = %p (%d) exit 1", 579 cls, con, tag, end, *end);
587 cls, con, tag, end, *end));
588 return 0; 580 return 0;
589 } 581 }
590 582
@@ -592,37 +584,33 @@ decode_negTokenInit(unsigned char *security_blob, int length,
592 while (!asn1_eoc_decode(&ctx, sequence_end)) { 584 while (!asn1_eoc_decode(&ctx, sequence_end)) {
593 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); 585 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
594 if (!rc) { 586 if (!rc) {
595 cFYI(1, 587 cFYI(1, "Error decoding negTokenInit hdr exit2");
596 ("Error decoding negTokenInit hdr exit2"));
597 return 0; 588 return 0;
598 } 589 }
599 if ((tag == ASN1_OJI) && (con == ASN1_PRI)) { 590 if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
600 if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) { 591 if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) {
601 592
602 cFYI(1, ("OID len = %d oid = 0x%lx 0x%lx " 593 cFYI(1, "OID len = %d oid = 0x%lx 0x%lx "
603 "0x%lx 0x%lx", oidlen, *oid, 594 "0x%lx 0x%lx", oidlen, *oid,
604 *(oid + 1), *(oid + 2), *(oid + 3))); 595 *(oid + 1), *(oid + 2), *(oid + 3));
605 596
606 if (compare_oid(oid, oidlen, MSKRB5_OID, 597 if (compare_oid(oid, oidlen, MSKRB5_OID,
607 MSKRB5_OID_LEN) && 598 MSKRB5_OID_LEN))
608 !use_mskerberos) 599 server->sec_mskerberos = true;
609 use_mskerberos = true;
610 else if (compare_oid(oid, oidlen, KRB5U2U_OID, 600 else if (compare_oid(oid, oidlen, KRB5U2U_OID,
611 KRB5U2U_OID_LEN) && 601 KRB5U2U_OID_LEN))
612 !use_kerberosu2u) 602 server->sec_kerberosu2u = true;
613 use_kerberosu2u = true;
614 else if (compare_oid(oid, oidlen, KRB5_OID, 603 else if (compare_oid(oid, oidlen, KRB5_OID,
615 KRB5_OID_LEN) && 604 KRB5_OID_LEN))
616 !use_kerberos) 605 server->sec_kerberos = true;
617 use_kerberos = true;
618 else if (compare_oid(oid, oidlen, NTLMSSP_OID, 606 else if (compare_oid(oid, oidlen, NTLMSSP_OID,
619 NTLMSSP_OID_LEN)) 607 NTLMSSP_OID_LEN))
620 use_ntlmssp = true; 608 server->sec_ntlmssp = true;
621 609
622 kfree(oid); 610 kfree(oid);
623 } 611 }
624 } else { 612 } else {
625 cFYI(1, ("Should be an oid what is going on?")); 613 cFYI(1, "Should be an oid what is going on?");
626 } 614 }
627 } 615 }
628 616
@@ -632,54 +620,47 @@ decode_negTokenInit(unsigned char *security_blob, int length,
632 no mechListMic (e.g. NTLMSSP instead of KRB5) */ 620 no mechListMic (e.g. NTLMSSP instead of KRB5) */
633 if (ctx.error == ASN1_ERR_DEC_EMPTY) 621 if (ctx.error == ASN1_ERR_DEC_EMPTY)
634 goto decode_negtoken_exit; 622 goto decode_negtoken_exit;
635 cFYI(1, ("Error decoding last part negTokenInit exit3")); 623 cFYI(1, "Error decoding last part negTokenInit exit3");
636 return 0; 624 return 0;
637 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { 625 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
638 /* tag = 3 indicating mechListMIC */ 626 /* tag = 3 indicating mechListMIC */
639 cFYI(1, ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)", 627 cFYI(1, "Exit 4 cls = %d con = %d tag = %d end = %p (%d)",
640 cls, con, tag, end, *end)); 628 cls, con, tag, end, *end);
641 return 0; 629 return 0;
642 } 630 }
643 631
644 /* sequence */ 632 /* sequence */
645 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 633 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
646 cFYI(1, ("Error decoding last part negTokenInit exit5")); 634 cFYI(1, "Error decoding last part negTokenInit exit5");
647 return 0; 635 return 0;
648 } else if ((cls != ASN1_UNI) || (con != ASN1_CON) 636 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
649 || (tag != ASN1_SEQ)) { 637 || (tag != ASN1_SEQ)) {
650 cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)", 638 cFYI(1, "cls = %d con = %d tag = %d end = %p (%d)",
651 cls, con, tag, end, *end)); 639 cls, con, tag, end, *end);
652 } 640 }
653 641
654 /* sequence of */ 642 /* sequence of */
655 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 643 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
656 cFYI(1, ("Error decoding last part negTokenInit exit 7")); 644 cFYI(1, "Error decoding last part negTokenInit exit 7");
657 return 0; 645 return 0;
658 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { 646 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
659 cFYI(1, ("Exit 8 cls = %d con = %d tag = %d end = %p (%d)", 647 cFYI(1, "Exit 8 cls = %d con = %d tag = %d end = %p (%d)",
660 cls, con, tag, end, *end)); 648 cls, con, tag, end, *end);
661 return 0; 649 return 0;
662 } 650 }
663 651
664 /* general string */ 652 /* general string */
665 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 653 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
666 cFYI(1, ("Error decoding last part negTokenInit exit9")); 654 cFYI(1, "Error decoding last part negTokenInit exit9");
667 return 0; 655 return 0;
668 } else if ((cls != ASN1_UNI) || (con != ASN1_PRI) 656 } else if ((cls != ASN1_UNI) || (con != ASN1_PRI)
669 || (tag != ASN1_GENSTR)) { 657 || (tag != ASN1_GENSTR)) {
670 cFYI(1, ("Exit10 cls = %d con = %d tag = %d end = %p (%d)", 658 cFYI(1, "Exit10 cls = %d con = %d tag = %d end = %p (%d)",
671 cls, con, tag, end, *end)); 659 cls, con, tag, end, *end);
672 return 0; 660 return 0;
673 } 661 }
674 cFYI(1, ("Need to call asn1_octets_decode() function for %s", 662 cFYI(1, "Need to call asn1_octets_decode() function for %s",
675 ctx.pointer)); /* is this UTF-8 or ASCII? */ 663 ctx.pointer); /* is this UTF-8 or ASCII? */
676decode_negtoken_exit: 664decode_negtoken_exit:
677 if (use_kerberos)
678 *secType = Kerberos;
679 else if (use_mskerberos)
680 *secType = MSKerberos;
681 else if (use_ntlmssp)
682 *secType = RawNTLMSSP;
683
684 return 1; 665 return 1;
685} 666}
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 42cec2a7c0cf..4fce6e61b34e 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -60,10 +60,10 @@ cifs_dump_mem(char *label, void *data, int length)
60#ifdef CONFIG_CIFS_DEBUG2 60#ifdef CONFIG_CIFS_DEBUG2
61void cifs_dump_detail(struct smb_hdr *smb) 61void cifs_dump_detail(struct smb_hdr *smb)
62{ 62{
63 cERROR(1, ("Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d", 63 cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
64 smb->Command, smb->Status.CifsError, 64 smb->Command, smb->Status.CifsError,
65 smb->Flags, smb->Flags2, smb->Mid, smb->Pid)); 65 smb->Flags, smb->Flags2, smb->Mid, smb->Pid);
66 cERROR(1, ("smb buf %p len %d", smb, smbCalcSize_LE(smb))); 66 cERROR(1, "smb buf %p len %d", smb, smbCalcSize_LE(smb));
67} 67}
68 68
69 69
@@ -75,25 +75,25 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
75 if (server == NULL) 75 if (server == NULL)
76 return; 76 return;
77 77
78 cERROR(1, ("Dump pending requests:")); 78 cERROR(1, "Dump pending requests:");
79 spin_lock(&GlobalMid_Lock); 79 spin_lock(&GlobalMid_Lock);
80 list_for_each(tmp, &server->pending_mid_q) { 80 list_for_each(tmp, &server->pending_mid_q) {
81 mid_entry = list_entry(tmp, struct mid_q_entry, qhead); 81 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
82 cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d", 82 cERROR(1, "State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
83 mid_entry->midState, 83 mid_entry->midState,
84 (int)mid_entry->command, 84 (int)mid_entry->command,
85 mid_entry->pid, 85 mid_entry->pid,
86 mid_entry->tsk, 86 mid_entry->tsk,
87 mid_entry->mid)); 87 mid_entry->mid);
88#ifdef CONFIG_CIFS_STATS2 88#ifdef CONFIG_CIFS_STATS2
89 cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld", 89 cERROR(1, "IsLarge: %d buf: %p time rcv: %ld now: %ld",
90 mid_entry->largeBuf, 90 mid_entry->largeBuf,
91 mid_entry->resp_buf, 91 mid_entry->resp_buf,
92 mid_entry->when_received, 92 mid_entry->when_received,
93 jiffies)); 93 jiffies);
94#endif /* STATS2 */ 94#endif /* STATS2 */
95 cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp, 95 cERROR(1, "IsMult: %d IsEnd: %d", mid_entry->multiRsp,
96 mid_entry->multiEnd)); 96 mid_entry->multiEnd);
97 if (mid_entry->resp_buf) { 97 if (mid_entry->resp_buf) {
98 cifs_dump_detail(mid_entry->resp_buf); 98 cifs_dump_detail(mid_entry->resp_buf);
99 cifs_dump_mem("existing buf: ", 99 cifs_dump_mem("existing buf: ",
@@ -716,7 +716,7 @@ static const struct file_operations cifs_multiuser_mount_proc_fops = {
716 716
717static int cifs_security_flags_proc_show(struct seq_file *m, void *v) 717static int cifs_security_flags_proc_show(struct seq_file *m, void *v)
718{ 718{
719 seq_printf(m, "0x%x\n", extended_security); 719 seq_printf(m, "0x%x\n", global_secflags);
720 return 0; 720 return 0;
721} 721}
722 722
@@ -744,13 +744,13 @@ static ssize_t cifs_security_flags_proc_write(struct file *file,
744 /* single char or single char followed by null */ 744 /* single char or single char followed by null */
745 c = flags_string[0]; 745 c = flags_string[0];
746 if (c == '0' || c == 'n' || c == 'N') { 746 if (c == '0' || c == 'n' || c == 'N') {
747 extended_security = CIFSSEC_DEF; /* default */ 747 global_secflags = CIFSSEC_DEF; /* default */
748 return count; 748 return count;
749 } else if (c == '1' || c == 'y' || c == 'Y') { 749 } else if (c == '1' || c == 'y' || c == 'Y') {
750 extended_security = CIFSSEC_MAX; 750 global_secflags = CIFSSEC_MAX;
751 return count; 751 return count;
752 } else if (!isdigit(c)) { 752 } else if (!isdigit(c)) {
753 cERROR(1, ("invalid flag %c", c)); 753 cERROR(1, "invalid flag %c", c);
754 return -EINVAL; 754 return -EINVAL;
755 } 755 }
756 } 756 }
@@ -758,26 +758,26 @@ static ssize_t cifs_security_flags_proc_write(struct file *file,
758 758
759 flags = simple_strtoul(flags_string, NULL, 0); 759 flags = simple_strtoul(flags_string, NULL, 0);
760 760
761 cFYI(1, ("sec flags 0x%x", flags)); 761 cFYI(1, "sec flags 0x%x", flags);
762 762
763 if (flags <= 0) { 763 if (flags <= 0) {
764 cERROR(1, ("invalid security flags %s", flags_string)); 764 cERROR(1, "invalid security flags %s", flags_string);
765 return -EINVAL; 765 return -EINVAL;
766 } 766 }
767 767
768 if (flags & ~CIFSSEC_MASK) { 768 if (flags & ~CIFSSEC_MASK) {
769 cERROR(1, ("attempt to set unsupported security flags 0x%x", 769 cERROR(1, "attempt to set unsupported security flags 0x%x",
770 flags & ~CIFSSEC_MASK)); 770 flags & ~CIFSSEC_MASK);
771 return -EINVAL; 771 return -EINVAL;
772 } 772 }
773 /* flags look ok - update the global security flags for cifs module */ 773 /* flags look ok - update the global security flags for cifs module */
774 extended_security = flags; 774 global_secflags = flags;
775 if (extended_security & CIFSSEC_MUST_SIGN) { 775 if (global_secflags & CIFSSEC_MUST_SIGN) {
776 /* requiring signing implies signing is allowed */ 776 /* requiring signing implies signing is allowed */
777 extended_security |= CIFSSEC_MAY_SIGN; 777 global_secflags |= CIFSSEC_MAY_SIGN;
778 cFYI(1, ("packet signing now required")); 778 cFYI(1, "packet signing now required");
779 } else if ((extended_security & CIFSSEC_MAY_SIGN) == 0) { 779 } else if ((global_secflags & CIFSSEC_MAY_SIGN) == 0) {
780 cFYI(1, ("packet signing disabled")); 780 cFYI(1, "packet signing disabled");
781 } 781 }
782 /* BB should we turn on MAY flags for other MUST options? */ 782 /* BB should we turn on MAY flags for other MUST options? */
783 return count; 783 return count;
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index 5eb3b83bbfa7..aa316891ac0c 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -43,34 +43,54 @@ void dump_smb(struct smb_hdr *, int);
43 */ 43 */
44#ifdef CIFS_DEBUG 44#ifdef CIFS_DEBUG
45 45
46
47/* information message: e.g., configuration, major event */ 46/* information message: e.g., configuration, major event */
48extern int cifsFYI; 47extern int cifsFYI;
49#define cifsfyi(format,arg...) if (cifsFYI & CIFS_INFO) printk(KERN_DEBUG " " __FILE__ ": " format "\n" "" , ## arg) 48#define cifsfyi(fmt, arg...) \
49do { \
50 if (cifsFYI & CIFS_INFO) \
51 printk(KERN_DEBUG "%s: " fmt "\n", __FILE__, ##arg); \
52} while (0)
50 53
51#define cFYI(button,prspec) if (button) cifsfyi prspec 54#define cFYI(set, fmt, arg...) \
55do { \
56 if (set) \
57 cifsfyi(fmt, ##arg); \
58} while (0)
52 59
53#define cifswarn(format, arg...) printk(KERN_WARNING ": " format "\n" , ## arg) 60#define cifswarn(fmt, arg...) \
61 printk(KERN_WARNING fmt "\n", ##arg)
54 62
55/* debug event message: */ 63/* debug event message: */
56extern int cifsERROR; 64extern int cifsERROR;
57 65
58#define cEVENT(format,arg...) if (cifsERROR) printk(KERN_EVENT __FILE__ ": " format "\n" , ## arg) 66#define cEVENT(fmt, arg...) \
67do { \
68 if (cifsERROR) \
69 printk(KERN_EVENT "%s: " fmt "\n", __FILE__, ##arg); \
70} while (0)
59 71
60/* error event message: e.g., i/o error */ 72/* error event message: e.g., i/o error */
61#define cifserror(format,arg...) if (cifsERROR) printk(KERN_ERR " CIFS VFS: " format "\n" "" , ## arg) 73#define cifserror(fmt, arg...) \
74do { \
75 if (cifsERROR) \
76 printk(KERN_ERR "CIFS VFS: " fmt "\n", ##arg); \
77} while (0)
62 78
63#define cERROR(button, prspec) if (button) cifserror prspec 79#define cERROR(set, fmt, arg...) \
80do { \
81 if (set) \
82 cifserror(fmt, ##arg); \
83} while (0)
64 84
65/* 85/*
66 * debug OFF 86 * debug OFF
67 * --------- 87 * ---------
68 */ 88 */
69#else /* _CIFS_DEBUG */ 89#else /* _CIFS_DEBUG */
70#define cERROR(button, prspec) 90#define cERROR(set, fmt, arg...)
71#define cEVENT(format, arg...) 91#define cEVENT(fmt, arg...)
72#define cFYI(button, prspec) 92#define cFYI(set, fmt, arg...)
73#define cifserror(format, arg...) 93#define cifserror(fmt, arg...)
74#endif /* _CIFS_DEBUG */ 94#endif /* _CIFS_DEBUG */
75 95
76#endif /* _H_CIFS_DEBUG */ 96#endif /* _H_CIFS_DEBUG */
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 78e4d2a3a68b..ac19a6f3dae0 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -85,8 +85,8 @@ static char *cifs_get_share_name(const char *node_name)
85 /* find server name end */ 85 /* find server name end */
86 pSep = memchr(UNC+2, '\\', len-2); 86 pSep = memchr(UNC+2, '\\', len-2);
87 if (!pSep) { 87 if (!pSep) {
88 cERROR(1, ("%s: no server name end in node name: %s", 88 cERROR(1, "%s: no server name end in node name: %s",
89 __func__, node_name)); 89 __func__, node_name);
90 kfree(UNC); 90 kfree(UNC);
91 return ERR_PTR(-EINVAL); 91 return ERR_PTR(-EINVAL);
92 } 92 }
@@ -142,8 +142,8 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
142 142
143 rc = dns_resolve_server_name_to_ip(*devname, &srvIP); 143 rc = dns_resolve_server_name_to_ip(*devname, &srvIP);
144 if (rc != 0) { 144 if (rc != 0) {
145 cERROR(1, ("%s: Failed to resolve server part of %s to IP: %d", 145 cERROR(1, "%s: Failed to resolve server part of %s to IP: %d",
146 __func__, *devname, rc)); 146 __func__, *devname, rc);
147 goto compose_mount_options_err; 147 goto compose_mount_options_err;
148 } 148 }
149 /* md_len = strlen(...) + 12 for 'sep+prefixpath=' 149 /* md_len = strlen(...) + 12 for 'sep+prefixpath='
@@ -217,8 +217,8 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
217 strcat(mountdata, fullpath + ref->path_consumed); 217 strcat(mountdata, fullpath + ref->path_consumed);
218 } 218 }
219 219
220 /*cFYI(1,("%s: parent mountdata: %s", __func__,sb_mountdata));*/ 220 /*cFYI(1, "%s: parent mountdata: %s", __func__,sb_mountdata);*/
221 /*cFYI(1, ("%s: submount mountdata: %s", __func__, mountdata ));*/ 221 /*cFYI(1, "%s: submount mountdata: %s", __func__, mountdata );*/
222 222
223compose_mount_options_out: 223compose_mount_options_out:
224 kfree(srvIP); 224 kfree(srvIP);
@@ -294,11 +294,11 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
294 294
295static void dump_referral(const struct dfs_info3_param *ref) 295static void dump_referral(const struct dfs_info3_param *ref)
296{ 296{
297 cFYI(1, ("DFS: ref path: %s", ref->path_name)); 297 cFYI(1, "DFS: ref path: %s", ref->path_name);
298 cFYI(1, ("DFS: node path: %s", ref->node_name)); 298 cFYI(1, "DFS: node path: %s", ref->node_name);
299 cFYI(1, ("DFS: fl: %hd, srv_type: %hd", ref->flags, ref->server_type)); 299 cFYI(1, "DFS: fl: %hd, srv_type: %hd", ref->flags, ref->server_type);
300 cFYI(1, ("DFS: ref_flags: %hd, path_consumed: %hd", ref->ref_flag, 300 cFYI(1, "DFS: ref_flags: %hd, path_consumed: %hd", ref->ref_flag,
301 ref->path_consumed)); 301 ref->path_consumed);
302} 302}
303 303
304 304
@@ -314,7 +314,7 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
314 int rc = 0; 314 int rc = 0;
315 struct vfsmount *mnt = ERR_PTR(-ENOENT); 315 struct vfsmount *mnt = ERR_PTR(-ENOENT);
316 316
317 cFYI(1, ("in %s", __func__)); 317 cFYI(1, "in %s", __func__);
318 BUG_ON(IS_ROOT(dentry)); 318 BUG_ON(IS_ROOT(dentry));
319 319
320 xid = GetXid(); 320 xid = GetXid();
@@ -352,15 +352,15 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
352 /* connect to a node */ 352 /* connect to a node */
353 len = strlen(referrals[i].node_name); 353 len = strlen(referrals[i].node_name);
354 if (len < 2) { 354 if (len < 2) {
355 cERROR(1, ("%s: Net Address path too short: %s", 355 cERROR(1, "%s: Net Address path too short: %s",
356 __func__, referrals[i].node_name)); 356 __func__, referrals[i].node_name);
357 rc = -EINVAL; 357 rc = -EINVAL;
358 goto out_err; 358 goto out_err;
359 } 359 }
360 mnt = cifs_dfs_do_refmount(nd->path.mnt, 360 mnt = cifs_dfs_do_refmount(nd->path.mnt,
361 nd->path.dentry, referrals + i); 361 nd->path.dentry, referrals + i);
362 cFYI(1, ("%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__, 362 cFYI(1, "%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__,
363 referrals[i].node_name, mnt)); 363 referrals[i].node_name, mnt);
364 364
365 /* complete mount procedure if we accured submount */ 365 /* complete mount procedure if we accured submount */
366 if (!IS_ERR(mnt)) 366 if (!IS_ERR(mnt))
@@ -378,7 +378,7 @@ out:
378 FreeXid(xid); 378 FreeXid(xid);
379 free_dfs_info_array(referrals, num_referrals); 379 free_dfs_info_array(referrals, num_referrals);
380 kfree(full_path); 380 kfree(full_path);
381 cFYI(1, ("leaving %s" , __func__)); 381 cFYI(1, "leaving %s" , __func__);
382 return ERR_PTR(rc); 382 return ERR_PTR(rc);
383out_err: 383out_err:
384 path_put(&nd->path); 384 path_put(&nd->path);
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 310d12f69a92..379bd7d9c05f 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -133,9 +133,9 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
133 dp = description + strlen(description); 133 dp = description + strlen(description);
134 134
135 /* for now, only sec=krb5 and sec=mskrb5 are valid */ 135 /* for now, only sec=krb5 and sec=mskrb5 are valid */
136 if (server->secType == Kerberos) 136 if (server->sec_kerberos)
137 sprintf(dp, ";sec=krb5"); 137 sprintf(dp, ";sec=krb5");
138 else if (server->secType == MSKerberos) 138 else if (server->sec_mskerberos)
139 sprintf(dp, ";sec=mskrb5"); 139 sprintf(dp, ";sec=mskrb5");
140 else 140 else
141 goto out; 141 goto out;
@@ -149,7 +149,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
149 dp = description + strlen(description); 149 dp = description + strlen(description);
150 sprintf(dp, ";pid=0x%x", current->pid); 150 sprintf(dp, ";pid=0x%x", current->pid);
151 151
152 cFYI(1, ("key description = %s", description)); 152 cFYI(1, "key description = %s", description);
153 spnego_key = request_key(&cifs_spnego_key_type, description, ""); 153 spnego_key = request_key(&cifs_spnego_key_type, description, "");
154 154
155#ifdef CONFIG_CIFS_DEBUG2 155#ifdef CONFIG_CIFS_DEBUG2
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index d07676bd76d2..430f510a1720 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -200,9 +200,8 @@ cifs_strtoUCS(__le16 *to, const char *from, int len,
200 /* works for 2.4.0 kernel or later */ 200 /* works for 2.4.0 kernel or later */
201 charlen = codepage->char2uni(from, len, &wchar_to[i]); 201 charlen = codepage->char2uni(from, len, &wchar_to[i]);
202 if (charlen < 1) { 202 if (charlen < 1) {
203 cERROR(1, 203 cERROR(1, "strtoUCS: char2uni of %d returned %d",
204 ("strtoUCS: char2uni of %d returned %d", 204 (int)*from, charlen);
205 (int)*from, charlen));
206 /* A question mark */ 205 /* A question mark */
207 to[i] = cpu_to_le16(0x003f); 206 to[i] = cpu_to_le16(0x003f);
208 charlen = 1; 207 charlen = 1;
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 9b716d044bbd..85d7cf7ff2c8 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -87,11 +87,11 @@ int match_sid(struct cifs_sid *ctsid)
87 continue; /* all sub_auth values do not match */ 87 continue; /* all sub_auth values do not match */
88 } 88 }
89 89
90 cFYI(1, ("matching sid: %s\n", wksidarr[i].sidname)); 90 cFYI(1, "matching sid: %s\n", wksidarr[i].sidname);
91 return 0; /* sids compare/match */ 91 return 0; /* sids compare/match */
92 } 92 }
93 93
94 cFYI(1, ("No matching sid")); 94 cFYI(1, "No matching sid");
95 return -1; 95 return -1;
96} 96}
97 97
@@ -208,14 +208,14 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode,
208 *pbits_to_set &= ~S_IXUGO; 208 *pbits_to_set &= ~S_IXUGO;
209 return; 209 return;
210 } else if (type != ACCESS_ALLOWED) { 210 } else if (type != ACCESS_ALLOWED) {
211 cERROR(1, ("unknown access control type %d", type)); 211 cERROR(1, "unknown access control type %d", type);
212 return; 212 return;
213 } 213 }
214 /* else ACCESS_ALLOWED type */ 214 /* else ACCESS_ALLOWED type */
215 215
216 if (flags & GENERIC_ALL) { 216 if (flags & GENERIC_ALL) {
217 *pmode |= (S_IRWXUGO & (*pbits_to_set)); 217 *pmode |= (S_IRWXUGO & (*pbits_to_set));
218 cFYI(DBG2, ("all perms")); 218 cFYI(DBG2, "all perms");
219 return; 219 return;
220 } 220 }
221 if ((flags & GENERIC_WRITE) || 221 if ((flags & GENERIC_WRITE) ||
@@ -228,7 +228,7 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode,
228 ((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS)) 228 ((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS))
229 *pmode |= (S_IXUGO & (*pbits_to_set)); 229 *pmode |= (S_IXUGO & (*pbits_to_set));
230 230
231 cFYI(DBG2, ("access flags 0x%x mode now 0x%x", flags, *pmode)); 231 cFYI(DBG2, "access flags 0x%x mode now 0x%x", flags, *pmode);
232 return; 232 return;
233} 233}
234 234
@@ -257,7 +257,7 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use,
257 if (mode & S_IXUGO) 257 if (mode & S_IXUGO)
258 *pace_flags |= SET_FILE_EXEC_RIGHTS; 258 *pace_flags |= SET_FILE_EXEC_RIGHTS;
259 259
260 cFYI(DBG2, ("mode: 0x%x, access flags now 0x%x", mode, *pace_flags)); 260 cFYI(DBG2, "mode: 0x%x, access flags now 0x%x", mode, *pace_flags);
261 return; 261 return;
262} 262}
263 263
@@ -297,24 +297,24 @@ static void dump_ace(struct cifs_ace *pace, char *end_of_acl)
297 /* validate that we do not go past end of acl */ 297 /* validate that we do not go past end of acl */
298 298
299 if (le16_to_cpu(pace->size) < 16) { 299 if (le16_to_cpu(pace->size) < 16) {
300 cERROR(1, ("ACE too small, %d", le16_to_cpu(pace->size))); 300 cERROR(1, "ACE too small %d", le16_to_cpu(pace->size));
301 return; 301 return;
302 } 302 }
303 303
304 if (end_of_acl < (char *)pace + le16_to_cpu(pace->size)) { 304 if (end_of_acl < (char *)pace + le16_to_cpu(pace->size)) {
305 cERROR(1, ("ACL too small to parse ACE")); 305 cERROR(1, "ACL too small to parse ACE");
306 return; 306 return;
307 } 307 }
308 308
309 num_subauth = pace->sid.num_subauth; 309 num_subauth = pace->sid.num_subauth;
310 if (num_subauth) { 310 if (num_subauth) {
311 int i; 311 int i;
312 cFYI(1, ("ACE revision %d num_auth %d type %d flags %d size %d", 312 cFYI(1, "ACE revision %d num_auth %d type %d flags %d size %d",
313 pace->sid.revision, pace->sid.num_subauth, pace->type, 313 pace->sid.revision, pace->sid.num_subauth, pace->type,
314 pace->flags, le16_to_cpu(pace->size))); 314 pace->flags, le16_to_cpu(pace->size));
315 for (i = 0; i < num_subauth; ++i) { 315 for (i = 0; i < num_subauth; ++i) {
316 cFYI(1, ("ACE sub_auth[%d]: 0x%x", i, 316 cFYI(1, "ACE sub_auth[%d]: 0x%x", i,
317 le32_to_cpu(pace->sid.sub_auth[i]))); 317 le32_to_cpu(pace->sid.sub_auth[i]));
318 } 318 }
319 319
320 /* BB add length check to make sure that we do not have huge 320 /* BB add length check to make sure that we do not have huge
@@ -347,13 +347,13 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
347 347
348 /* validate that we do not go past end of acl */ 348 /* validate that we do not go past end of acl */
349 if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) { 349 if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) {
350 cERROR(1, ("ACL too small to parse DACL")); 350 cERROR(1, "ACL too small to parse DACL");
351 return; 351 return;
352 } 352 }
353 353
354 cFYI(DBG2, ("DACL revision %d size %d num aces %d", 354 cFYI(DBG2, "DACL revision %d size %d num aces %d",
355 le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size), 355 le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size),
356 le32_to_cpu(pdacl->num_aces))); 356 le32_to_cpu(pdacl->num_aces));
357 357
358 /* reset rwx permissions for user/group/other. 358 /* reset rwx permissions for user/group/other.
359 Also, if num_aces is 0 i.e. DACL has no ACEs, 359 Also, if num_aces is 0 i.e. DACL has no ACEs,
@@ -437,25 +437,25 @@ static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
437 /* validate that we do not go past end of ACL - sid must be at least 8 437 /* validate that we do not go past end of ACL - sid must be at least 8
438 bytes long (assuming no sub-auths - e.g. the null SID */ 438 bytes long (assuming no sub-auths - e.g. the null SID */
439 if (end_of_acl < (char *)psid + 8) { 439 if (end_of_acl < (char *)psid + 8) {
440 cERROR(1, ("ACL too small to parse SID %p", psid)); 440 cERROR(1, "ACL too small to parse SID %p", psid);
441 return -EINVAL; 441 return -EINVAL;
442 } 442 }
443 443
444 if (psid->num_subauth) { 444 if (psid->num_subauth) {
445#ifdef CONFIG_CIFS_DEBUG2 445#ifdef CONFIG_CIFS_DEBUG2
446 int i; 446 int i;
447 cFYI(1, ("SID revision %d num_auth %d", 447 cFYI(1, "SID revision %d num_auth %d",
448 psid->revision, psid->num_subauth)); 448 psid->revision, psid->num_subauth);
449 449
450 for (i = 0; i < psid->num_subauth; i++) { 450 for (i = 0; i < psid->num_subauth; i++) {
451 cFYI(1, ("SID sub_auth[%d]: 0x%x ", i, 451 cFYI(1, "SID sub_auth[%d]: 0x%x ", i,
452 le32_to_cpu(psid->sub_auth[i]))); 452 le32_to_cpu(psid->sub_auth[i]));
453 } 453 }
454 454
455 /* BB add length check to make sure that we do not have huge 455 /* BB add length check to make sure that we do not have huge
456 num auths and therefore go off the end */ 456 num auths and therefore go off the end */
457 cFYI(1, ("RID 0x%x", 457 cFYI(1, "RID 0x%x",
458 le32_to_cpu(psid->sub_auth[psid->num_subauth-1]))); 458 le32_to_cpu(psid->sub_auth[psid->num_subauth-1]));
459#endif 459#endif
460 } 460 }
461 461
@@ -482,11 +482,11 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
482 le32_to_cpu(pntsd->gsidoffset)); 482 le32_to_cpu(pntsd->gsidoffset));
483 dacloffset = le32_to_cpu(pntsd->dacloffset); 483 dacloffset = le32_to_cpu(pntsd->dacloffset);
484 dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset); 484 dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset);
485 cFYI(DBG2, ("revision %d type 0x%x ooffset 0x%x goffset 0x%x " 485 cFYI(DBG2, "revision %d type 0x%x ooffset 0x%x goffset 0x%x "
486 "sacloffset 0x%x dacloffset 0x%x", 486 "sacloffset 0x%x dacloffset 0x%x",
487 pntsd->revision, pntsd->type, le32_to_cpu(pntsd->osidoffset), 487 pntsd->revision, pntsd->type, le32_to_cpu(pntsd->osidoffset),
488 le32_to_cpu(pntsd->gsidoffset), 488 le32_to_cpu(pntsd->gsidoffset),
489 le32_to_cpu(pntsd->sacloffset), dacloffset)); 489 le32_to_cpu(pntsd->sacloffset), dacloffset);
490/* cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */ 490/* cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */
491 rc = parse_sid(owner_sid_ptr, end_of_acl); 491 rc = parse_sid(owner_sid_ptr, end_of_acl);
492 if (rc) 492 if (rc)
@@ -500,7 +500,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
500 parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr, 500 parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr,
501 group_sid_ptr, fattr); 501 group_sid_ptr, fattr);
502 else 502 else
503 cFYI(1, ("no ACL")); /* BB grant all or default perms? */ 503 cFYI(1, "no ACL"); /* BB grant all or default perms? */
504 504
505/* cifscred->uid = owner_sid_ptr->rid; 505/* cifscred->uid = owner_sid_ptr->rid;
506 cifscred->gid = group_sid_ptr->rid; 506 cifscred->gid = group_sid_ptr->rid;
@@ -563,7 +563,7 @@ static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb,
563 FreeXid(xid); 563 FreeXid(xid);
564 564
565 565
566 cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen)); 566 cFYI(1, "GetCIFSACL rc = %d ACL len %d", rc, *pacllen);
567 return pntsd; 567 return pntsd;
568} 568}
569 569
@@ -581,12 +581,12 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
581 &fid, &oplock, NULL, cifs_sb->local_nls, 581 &fid, &oplock, NULL, cifs_sb->local_nls,
582 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 582 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
583 if (rc) { 583 if (rc) {
584 cERROR(1, ("Unable to open file to get ACL")); 584 cERROR(1, "Unable to open file to get ACL");
585 goto out; 585 goto out;
586 } 586 }
587 587
588 rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen); 588 rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen);
589 cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen)); 589 cFYI(1, "GetCIFSACL rc = %d ACL len %d", rc, *pacllen);
590 590
591 CIFSSMBClose(xid, cifs_sb->tcon, fid); 591 CIFSSMBClose(xid, cifs_sb->tcon, fid);
592 out: 592 out:
@@ -621,7 +621,7 @@ static int set_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, __u16 fid,
621 rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen); 621 rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen);
622 FreeXid(xid); 622 FreeXid(xid);
623 623
624 cFYI(DBG2, ("SetCIFSACL rc = %d", rc)); 624 cFYI(DBG2, "SetCIFSACL rc = %d", rc);
625 return rc; 625 return rc;
626} 626}
627 627
@@ -638,12 +638,12 @@ static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path,
638 &fid, &oplock, NULL, cifs_sb->local_nls, 638 &fid, &oplock, NULL, cifs_sb->local_nls,
639 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 639 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
640 if (rc) { 640 if (rc) {
641 cERROR(1, ("Unable to open file to set ACL")); 641 cERROR(1, "Unable to open file to set ACL");
642 goto out; 642 goto out;
643 } 643 }
644 644
645 rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen); 645 rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen);
646 cFYI(DBG2, ("SetCIFSACL rc = %d", rc)); 646 cFYI(DBG2, "SetCIFSACL rc = %d", rc);
647 647
648 CIFSSMBClose(xid, cifs_sb->tcon, fid); 648 CIFSSMBClose(xid, cifs_sb->tcon, fid);
649 out: 649 out:
@@ -659,7 +659,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
659 struct cifsFileInfo *open_file; 659 struct cifsFileInfo *open_file;
660 int rc; 660 int rc;
661 661
662 cFYI(DBG2, ("set ACL for %s from mode 0x%x", path, inode->i_mode)); 662 cFYI(DBG2, "set ACL for %s from mode 0x%x", path, inode->i_mode);
663 663
664 open_file = find_readable_file(CIFS_I(inode)); 664 open_file = find_readable_file(CIFS_I(inode));
665 if (!open_file) 665 if (!open_file)
@@ -679,7 +679,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
679 u32 acllen = 0; 679 u32 acllen = 0;
680 int rc = 0; 680 int rc = 0;
681 681
682 cFYI(DBG2, ("converting ACL to mode for %s", path)); 682 cFYI(DBG2, "converting ACL to mode for %s", path);
683 683
684 if (pfid) 684 if (pfid)
685 pntsd = get_cifs_acl_by_fid(cifs_sb, *pfid, &acllen); 685 pntsd = get_cifs_acl_by_fid(cifs_sb, *pfid, &acllen);
@@ -690,7 +690,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
690 if (pntsd) 690 if (pntsd)
691 rc = parse_sec_desc(pntsd, acllen, fattr); 691 rc = parse_sec_desc(pntsd, acllen, fattr);
692 if (rc) 692 if (rc)
693 cFYI(1, ("parse sec desc failed rc = %d", rc)); 693 cFYI(1, "parse sec desc failed rc = %d", rc);
694 694
695 kfree(pntsd); 695 kfree(pntsd);
696 return; 696 return;
@@ -704,7 +704,7 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
704 struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */ 704 struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */
705 struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */ 705 struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */
706 706
707 cFYI(DBG2, ("set ACL from mode for %s", path)); 707 cFYI(DBG2, "set ACL from mode for %s", path);
708 708
709 /* Get the security descriptor */ 709 /* Get the security descriptor */
710 pntsd = get_cifs_acl(CIFS_SB(inode->i_sb), inode, path, &secdesclen); 710 pntsd = get_cifs_acl(CIFS_SB(inode->i_sb), inode, path, &secdesclen);
@@ -721,19 +721,19 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
721 DEFSECDESCLEN : secdesclen; 721 DEFSECDESCLEN : secdesclen;
722 pnntsd = kmalloc(secdesclen, GFP_KERNEL); 722 pnntsd = kmalloc(secdesclen, GFP_KERNEL);
723 if (!pnntsd) { 723 if (!pnntsd) {
724 cERROR(1, ("Unable to allocate security descriptor")); 724 cERROR(1, "Unable to allocate security descriptor");
725 kfree(pntsd); 725 kfree(pntsd);
726 return -ENOMEM; 726 return -ENOMEM;
727 } 727 }
728 728
729 rc = build_sec_desc(pntsd, pnntsd, inode, nmode); 729 rc = build_sec_desc(pntsd, pnntsd, inode, nmode);
730 730
731 cFYI(DBG2, ("build_sec_desc rc: %d", rc)); 731 cFYI(DBG2, "build_sec_desc rc: %d", rc);
732 732
733 if (!rc) { 733 if (!rc) {
734 /* Set the security descriptor */ 734 /* Set the security descriptor */
735 rc = set_cifs_acl(pnntsd, secdesclen, inode, path); 735 rc = set_cifs_acl(pnntsd, secdesclen, inode, path);
736 cFYI(DBG2, ("set_cifs_acl rc: %d", rc)); 736 cFYI(DBG2, "set_cifs_acl rc: %d", rc);
737 } 737 }
738 738
739 kfree(pnntsd); 739 kfree(pnntsd);
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index fbe986430d0c..847628dfdc44 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -103,7 +103,7 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
103 if (iov[i].iov_len == 0) 103 if (iov[i].iov_len == 0)
104 continue; 104 continue;
105 if (iov[i].iov_base == NULL) { 105 if (iov[i].iov_base == NULL) {
106 cERROR(1, ("null iovec entry")); 106 cERROR(1, "null iovec entry");
107 return -EIO; 107 return -EIO;
108 } 108 }
109 /* The first entry includes a length field (which does not get 109 /* The first entry includes a length field (which does not get
@@ -181,8 +181,8 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
181 181
182 /* Do not need to verify session setups with signature "BSRSPYL " */ 182 /* Do not need to verify session setups with signature "BSRSPYL " */
183 if (memcmp(cifs_pdu->Signature.SecuritySignature, "BSRSPYL ", 8) == 0) 183 if (memcmp(cifs_pdu->Signature.SecuritySignature, "BSRSPYL ", 8) == 0)
184 cFYI(1, ("dummy signature received for smb command 0x%x", 184 cFYI(1, "dummy signature received for smb command 0x%x",
185 cifs_pdu->Command)); 185 cifs_pdu->Command);
186 186
187 /* save off the origiginal signature so we can modify the smb and check 187 /* save off the origiginal signature so we can modify the smb and check
188 its signature against what the server sent */ 188 its signature against what the server sent */
@@ -291,7 +291,7 @@ void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
291 if (password) 291 if (password)
292 strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE); 292 strncpy(password_with_pad, password, CIFS_ENCPWD_SIZE);
293 293
294 if (!encrypt && extended_security & CIFSSEC_MAY_PLNTXT) { 294 if (!encrypt && global_secflags & CIFSSEC_MAY_PLNTXT) {
295 memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE); 295 memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE);
296 memcpy(lnm_session_key, password_with_pad, 296 memcpy(lnm_session_key, password_with_pad,
297 CIFS_ENCPWD_SIZE); 297 CIFS_ENCPWD_SIZE);
@@ -398,7 +398,7 @@ void setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
398 /* calculate buf->ntlmv2_hash */ 398 /* calculate buf->ntlmv2_hash */
399 rc = calc_ntlmv2_hash(ses, nls_cp); 399 rc = calc_ntlmv2_hash(ses, nls_cp);
400 if (rc) 400 if (rc)
401 cERROR(1, ("could not get v2 hash rc %d", rc)); 401 cERROR(1, "could not get v2 hash rc %d", rc);
402 CalcNTLMv2_response(ses, resp_buf); 402 CalcNTLMv2_response(ses, resp_buf);
403 403
404 /* now calculate the MAC key for NTLMv2 */ 404 /* now calculate the MAC key for NTLMv2 */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ad235d604a0b..78c02eb4cb1f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -49,10 +49,6 @@
49#include "cifs_spnego.h" 49#include "cifs_spnego.h"
50#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */ 50#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */
51 51
52#ifdef CONFIG_CIFS_QUOTA
53static const struct quotactl_ops cifs_quotactl_ops;
54#endif /* QUOTA */
55
56int cifsFYI = 0; 52int cifsFYI = 0;
57int cifsERROR = 1; 53int cifsERROR = 1;
58int traceSMB = 0; 54int traceSMB = 0;
@@ -61,7 +57,7 @@ unsigned int experimEnabled = 0;
61unsigned int linuxExtEnabled = 1; 57unsigned int linuxExtEnabled = 1;
62unsigned int lookupCacheEnabled = 1; 58unsigned int lookupCacheEnabled = 1;
63unsigned int multiuser_mount = 0; 59unsigned int multiuser_mount = 0;
64unsigned int extended_security = CIFSSEC_DEF; 60unsigned int global_secflags = CIFSSEC_DEF;
65/* unsigned int ntlmv2_support = 0; */ 61/* unsigned int ntlmv2_support = 0; */
66unsigned int sign_CIFS_PDUs = 1; 62unsigned int sign_CIFS_PDUs = 1;
67static const struct super_operations cifs_super_ops; 63static const struct super_operations cifs_super_ops;
@@ -86,8 +82,6 @@ extern mempool_t *cifs_sm_req_poolp;
86extern mempool_t *cifs_req_poolp; 82extern mempool_t *cifs_req_poolp;
87extern mempool_t *cifs_mid_poolp; 83extern mempool_t *cifs_mid_poolp;
88 84
89extern struct kmem_cache *cifs_oplock_cachep;
90
91static int 85static int
92cifs_read_super(struct super_block *sb, void *data, 86cifs_read_super(struct super_block *sb, void *data,
93 const char *devname, int silent) 87 const char *devname, int silent)
@@ -135,8 +129,7 @@ cifs_read_super(struct super_block *sb, void *data,
135 129
136 if (rc) { 130 if (rc) {
137 if (!silent) 131 if (!silent)
138 cERROR(1, 132 cERROR(1, "cifs_mount failed w/return code = %d", rc);
139 ("cifs_mount failed w/return code = %d", rc));
140 goto out_mount_failed; 133 goto out_mount_failed;
141 } 134 }
142 135
@@ -146,9 +139,6 @@ cifs_read_super(struct super_block *sb, void *data,
146/* if (cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512) 139/* if (cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512)
147 sb->s_blocksize = 140 sb->s_blocksize =
148 cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */ 141 cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */
149#ifdef CONFIG_CIFS_QUOTA
150 sb->s_qcop = &cifs_quotactl_ops;
151#endif
152 sb->s_blocksize = CIFS_MAX_MSGSIZE; 142 sb->s_blocksize = CIFS_MAX_MSGSIZE;
153 sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */ 143 sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */
154 inode = cifs_root_iget(sb, ROOT_I); 144 inode = cifs_root_iget(sb, ROOT_I);
@@ -168,7 +158,7 @@ cifs_read_super(struct super_block *sb, void *data,
168 158
169#ifdef CONFIG_CIFS_EXPERIMENTAL 159#ifdef CONFIG_CIFS_EXPERIMENTAL
170 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { 160 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
171 cFYI(1, ("export ops supported")); 161 cFYI(1, "export ops supported");
172 sb->s_export_op = &cifs_export_ops; 162 sb->s_export_op = &cifs_export_ops;
173 } 163 }
174#endif /* EXPERIMENTAL */ 164#endif /* EXPERIMENTAL */
@@ -176,7 +166,7 @@ cifs_read_super(struct super_block *sb, void *data,
176 return 0; 166 return 0;
177 167
178out_no_root: 168out_no_root:
179 cERROR(1, ("cifs_read_super: get root inode failed")); 169 cERROR(1, "cifs_read_super: get root inode failed");
180 if (inode) 170 if (inode)
181 iput(inode); 171 iput(inode);
182 172
@@ -203,10 +193,10 @@ cifs_put_super(struct super_block *sb)
203 int rc = 0; 193 int rc = 0;
204 struct cifs_sb_info *cifs_sb; 194 struct cifs_sb_info *cifs_sb;
205 195
206 cFYI(1, ("In cifs_put_super")); 196 cFYI(1, "In cifs_put_super");
207 cifs_sb = CIFS_SB(sb); 197 cifs_sb = CIFS_SB(sb);
208 if (cifs_sb == NULL) { 198 if (cifs_sb == NULL) {
209 cFYI(1, ("Empty cifs superblock info passed to unmount")); 199 cFYI(1, "Empty cifs superblock info passed to unmount");
210 return; 200 return;
211 } 201 }
212 202
@@ -214,7 +204,7 @@ cifs_put_super(struct super_block *sb)
214 204
215 rc = cifs_umount(sb, cifs_sb); 205 rc = cifs_umount(sb, cifs_sb);
216 if (rc) 206 if (rc)
217 cERROR(1, ("cifs_umount failed with return code %d", rc)); 207 cERROR(1, "cifs_umount failed with return code %d", rc);
218#ifdef CONFIG_CIFS_DFS_UPCALL 208#ifdef CONFIG_CIFS_DFS_UPCALL
219 if (cifs_sb->mountdata) { 209 if (cifs_sb->mountdata) {
220 kfree(cifs_sb->mountdata); 210 kfree(cifs_sb->mountdata);
@@ -300,7 +290,6 @@ static int cifs_permission(struct inode *inode, int mask)
300static struct kmem_cache *cifs_inode_cachep; 290static struct kmem_cache *cifs_inode_cachep;
301static struct kmem_cache *cifs_req_cachep; 291static struct kmem_cache *cifs_req_cachep;
302static struct kmem_cache *cifs_mid_cachep; 292static struct kmem_cache *cifs_mid_cachep;
303struct kmem_cache *cifs_oplock_cachep;
304static struct kmem_cache *cifs_sm_req_cachep; 293static struct kmem_cache *cifs_sm_req_cachep;
305mempool_t *cifs_sm_req_poolp; 294mempool_t *cifs_sm_req_poolp;
306mempool_t *cifs_req_poolp; 295mempool_t *cifs_req_poolp;
@@ -432,106 +421,6 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
432 return 0; 421 return 0;
433} 422}
434 423
435#ifdef CONFIG_CIFS_QUOTA
436int cifs_xquota_set(struct super_block *sb, int quota_type, qid_t qid,
437 struct fs_disk_quota *pdquota)
438{
439 int xid;
440 int rc = 0;
441 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
442 struct cifsTconInfo *pTcon;
443
444 if (cifs_sb)
445 pTcon = cifs_sb->tcon;
446 else
447 return -EIO;
448
449
450 xid = GetXid();
451 if (pTcon) {
452 cFYI(1, ("set type: 0x%x id: %d", quota_type, qid));
453 } else
454 rc = -EIO;
455
456 FreeXid(xid);
457 return rc;
458}
459
460int cifs_xquota_get(struct super_block *sb, int quota_type, qid_t qid,
461 struct fs_disk_quota *pdquota)
462{
463 int xid;
464 int rc = 0;
465 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
466 struct cifsTconInfo *pTcon;
467
468 if (cifs_sb)
469 pTcon = cifs_sb->tcon;
470 else
471 return -EIO;
472
473 xid = GetXid();
474 if (pTcon) {
475 cFYI(1, ("set type: 0x%x id: %d", quota_type, qid));
476 } else
477 rc = -EIO;
478
479 FreeXid(xid);
480 return rc;
481}
482
483int cifs_xstate_set(struct super_block *sb, unsigned int flags, int operation)
484{
485 int xid;
486 int rc = 0;
487 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
488 struct cifsTconInfo *pTcon;
489
490 if (cifs_sb)
491 pTcon = cifs_sb->tcon;
492 else
493 return -EIO;
494
495 xid = GetXid();
496 if (pTcon) {
497 cFYI(1, ("flags: 0x%x operation: 0x%x", flags, operation));
498 } else
499 rc = -EIO;
500
501 FreeXid(xid);
502 return rc;
503}
504
505int cifs_xstate_get(struct super_block *sb, struct fs_quota_stat *qstats)
506{
507 int xid;
508 int rc = 0;
509 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
510 struct cifsTconInfo *pTcon;
511
512 if (cifs_sb)
513 pTcon = cifs_sb->tcon;
514 else
515 return -EIO;
516
517 xid = GetXid();
518 if (pTcon) {
519 cFYI(1, ("pqstats %p", qstats));
520 } else
521 rc = -EIO;
522
523 FreeXid(xid);
524 return rc;
525}
526
527static const struct quotactl_ops cifs_quotactl_ops = {
528 .set_xquota = cifs_xquota_set,
529 .get_xquota = cifs_xquota_get,
530 .set_xstate = cifs_xstate_set,
531 .get_xstate = cifs_xstate_get,
532};
533#endif
534
535static void cifs_umount_begin(struct super_block *sb) 424static void cifs_umount_begin(struct super_block *sb)
536{ 425{
537 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 426 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
@@ -558,7 +447,7 @@ static void cifs_umount_begin(struct super_block *sb)
558 /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */ 447 /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */
559 /* cancel_notify_requests(tcon); */ 448 /* cancel_notify_requests(tcon); */
560 if (tcon->ses && tcon->ses->server) { 449 if (tcon->ses && tcon->ses->server) {
561 cFYI(1, ("wake up tasks now - umount begin not complete")); 450 cFYI(1, "wake up tasks now - umount begin not complete");
562 wake_up_all(&tcon->ses->server->request_q); 451 wake_up_all(&tcon->ses->server->request_q);
563 wake_up_all(&tcon->ses->server->response_q); 452 wake_up_all(&tcon->ses->server->response_q);
564 msleep(1); /* yield */ 453 msleep(1); /* yield */
@@ -609,7 +498,7 @@ cifs_get_sb(struct file_system_type *fs_type,
609 int rc; 498 int rc;
610 struct super_block *sb = sget(fs_type, NULL, set_anon_super, NULL); 499 struct super_block *sb = sget(fs_type, NULL, set_anon_super, NULL);
611 500
612 cFYI(1, ("Devname: %s flags: %d ", dev_name, flags)); 501 cFYI(1, "Devname: %s flags: %d ", dev_name, flags);
613 502
614 if (IS_ERR(sb)) 503 if (IS_ERR(sb))
615 return PTR_ERR(sb); 504 return PTR_ERR(sb);
@@ -656,7 +545,6 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
656 return generic_file_llseek_unlocked(file, offset, origin); 545 return generic_file_llseek_unlocked(file, offset, origin);
657} 546}
658 547
659#ifdef CONFIG_CIFS_EXPERIMENTAL
660static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) 548static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
661{ 549{
662 /* note that this is called by vfs setlease with the BKL held 550 /* note that this is called by vfs setlease with the BKL held
@@ -685,7 +573,6 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
685 else 573 else
686 return -EAGAIN; 574 return -EAGAIN;
687} 575}
688#endif
689 576
690struct file_system_type cifs_fs_type = { 577struct file_system_type cifs_fs_type = {
691 .owner = THIS_MODULE, 578 .owner = THIS_MODULE,
@@ -762,10 +649,7 @@ const struct file_operations cifs_file_ops = {
762#ifdef CONFIG_CIFS_POSIX 649#ifdef CONFIG_CIFS_POSIX
763 .unlocked_ioctl = cifs_ioctl, 650 .unlocked_ioctl = cifs_ioctl,
764#endif /* CONFIG_CIFS_POSIX */ 651#endif /* CONFIG_CIFS_POSIX */
765
766#ifdef CONFIG_CIFS_EXPERIMENTAL
767 .setlease = cifs_setlease, 652 .setlease = cifs_setlease,
768#endif /* CONFIG_CIFS_EXPERIMENTAL */
769}; 653};
770 654
771const struct file_operations cifs_file_direct_ops = { 655const struct file_operations cifs_file_direct_ops = {
@@ -784,9 +668,7 @@ const struct file_operations cifs_file_direct_ops = {
784 .unlocked_ioctl = cifs_ioctl, 668 .unlocked_ioctl = cifs_ioctl,
785#endif /* CONFIG_CIFS_POSIX */ 669#endif /* CONFIG_CIFS_POSIX */
786 .llseek = cifs_llseek, 670 .llseek = cifs_llseek,
787#ifdef CONFIG_CIFS_EXPERIMENTAL
788 .setlease = cifs_setlease, 671 .setlease = cifs_setlease,
789#endif /* CONFIG_CIFS_EXPERIMENTAL */
790}; 672};
791const struct file_operations cifs_file_nobrl_ops = { 673const struct file_operations cifs_file_nobrl_ops = {
792 .read = do_sync_read, 674 .read = do_sync_read,
@@ -803,10 +685,7 @@ const struct file_operations cifs_file_nobrl_ops = {
803#ifdef CONFIG_CIFS_POSIX 685#ifdef CONFIG_CIFS_POSIX
804 .unlocked_ioctl = cifs_ioctl, 686 .unlocked_ioctl = cifs_ioctl,
805#endif /* CONFIG_CIFS_POSIX */ 687#endif /* CONFIG_CIFS_POSIX */
806
807#ifdef CONFIG_CIFS_EXPERIMENTAL
808 .setlease = cifs_setlease, 688 .setlease = cifs_setlease,
809#endif /* CONFIG_CIFS_EXPERIMENTAL */
810}; 689};
811 690
812const struct file_operations cifs_file_direct_nobrl_ops = { 691const struct file_operations cifs_file_direct_nobrl_ops = {
@@ -824,9 +703,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
824 .unlocked_ioctl = cifs_ioctl, 703 .unlocked_ioctl = cifs_ioctl,
825#endif /* CONFIG_CIFS_POSIX */ 704#endif /* CONFIG_CIFS_POSIX */
826 .llseek = cifs_llseek, 705 .llseek = cifs_llseek,
827#ifdef CONFIG_CIFS_EXPERIMENTAL
828 .setlease = cifs_setlease, 706 .setlease = cifs_setlease,
829#endif /* CONFIG_CIFS_EXPERIMENTAL */
830}; 707};
831 708
832const struct file_operations cifs_dir_ops = { 709const struct file_operations cifs_dir_ops = {
@@ -878,7 +755,7 @@ cifs_init_request_bufs(void)
878 } else { 755 } else {
879 CIFSMaxBufSize &= 0x1FE00; /* Round size to even 512 byte mult*/ 756 CIFSMaxBufSize &= 0x1FE00; /* Round size to even 512 byte mult*/
880 } 757 }
881/* cERROR(1,("CIFSMaxBufSize %d 0x%x",CIFSMaxBufSize,CIFSMaxBufSize)); */ 758/* cERROR(1, "CIFSMaxBufSize %d 0x%x",CIFSMaxBufSize,CIFSMaxBufSize); */
882 cifs_req_cachep = kmem_cache_create("cifs_request", 759 cifs_req_cachep = kmem_cache_create("cifs_request",
883 CIFSMaxBufSize + 760 CIFSMaxBufSize +
884 MAX_CIFS_HDR_SIZE, 0, 761 MAX_CIFS_HDR_SIZE, 0,
@@ -890,7 +767,7 @@ cifs_init_request_bufs(void)
890 cifs_min_rcv = 1; 767 cifs_min_rcv = 1;
891 else if (cifs_min_rcv > 64) { 768 else if (cifs_min_rcv > 64) {
892 cifs_min_rcv = 64; 769 cifs_min_rcv = 64;
893 cERROR(1, ("cifs_min_rcv set to maximum (64)")); 770 cERROR(1, "cifs_min_rcv set to maximum (64)");
894 } 771 }
895 772
896 cifs_req_poolp = mempool_create_slab_pool(cifs_min_rcv, 773 cifs_req_poolp = mempool_create_slab_pool(cifs_min_rcv,
@@ -921,7 +798,7 @@ cifs_init_request_bufs(void)
921 cifs_min_small = 2; 798 cifs_min_small = 2;
922 else if (cifs_min_small > 256) { 799 else if (cifs_min_small > 256) {
923 cifs_min_small = 256; 800 cifs_min_small = 256;
924 cFYI(1, ("cifs_min_small set to maximum (256)")); 801 cFYI(1, "cifs_min_small set to maximum (256)");
925 } 802 }
926 803
927 cifs_sm_req_poolp = mempool_create_slab_pool(cifs_min_small, 804 cifs_sm_req_poolp = mempool_create_slab_pool(cifs_min_small,
@@ -962,15 +839,6 @@ cifs_init_mids(void)
962 return -ENOMEM; 839 return -ENOMEM;
963 } 840 }
964 841
965 cifs_oplock_cachep = kmem_cache_create("cifs_oplock_structs",
966 sizeof(struct oplock_q_entry), 0,
967 SLAB_HWCACHE_ALIGN, NULL);
968 if (cifs_oplock_cachep == NULL) {
969 mempool_destroy(cifs_mid_poolp);
970 kmem_cache_destroy(cifs_mid_cachep);
971 return -ENOMEM;
972 }
973
974 return 0; 842 return 0;
975} 843}
976 844
@@ -979,7 +847,6 @@ cifs_destroy_mids(void)
979{ 847{
980 mempool_destroy(cifs_mid_poolp); 848 mempool_destroy(cifs_mid_poolp);
981 kmem_cache_destroy(cifs_mid_cachep); 849 kmem_cache_destroy(cifs_mid_cachep);
982 kmem_cache_destroy(cifs_oplock_cachep);
983} 850}
984 851
985static int __init 852static int __init
@@ -1019,10 +886,10 @@ init_cifs(void)
1019 886
1020 if (cifs_max_pending < 2) { 887 if (cifs_max_pending < 2) {
1021 cifs_max_pending = 2; 888 cifs_max_pending = 2;
1022 cFYI(1, ("cifs_max_pending set to min of 2")); 889 cFYI(1, "cifs_max_pending set to min of 2");
1023 } else if (cifs_max_pending > 256) { 890 } else if (cifs_max_pending > 256) {
1024 cifs_max_pending = 256; 891 cifs_max_pending = 256;
1025 cFYI(1, ("cifs_max_pending set to max of 256")); 892 cFYI(1, "cifs_max_pending set to max of 256");
1026 } 893 }
1027 894
1028 rc = cifs_init_inodecache(); 895 rc = cifs_init_inodecache();
@@ -1080,7 +947,7 @@ init_cifs(void)
1080static void __exit 947static void __exit
1081exit_cifs(void) 948exit_cifs(void)
1082{ 949{
1083 cFYI(DBG2, ("exit_cifs")); 950 cFYI(DBG2, "exit_cifs");
1084 cifs_proc_clean(); 951 cifs_proc_clean();
1085#ifdef CONFIG_CIFS_DFS_UPCALL 952#ifdef CONFIG_CIFS_DFS_UPCALL
1086 cifs_dfs_release_automount_timer(); 953 cifs_dfs_release_automount_timer();
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 7aa57ecdc437..0242ff9cbf41 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -114,5 +114,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
114extern const struct export_operations cifs_export_ops; 114extern const struct export_operations cifs_export_ops;
115#endif /* EXPERIMENTAL */ 115#endif /* EXPERIMENTAL */
116 116
117#define CIFS_VERSION "1.62" 117#define CIFS_VERSION "1.64"
118#endif /* _CIFSFS_H */ 118#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index ecf0ffbe2b64..a88479ceaad5 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -87,7 +87,6 @@ enum securityEnum {
87 RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */ 87 RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */
88/* NTLMSSP, */ /* can use rawNTLMSSP instead of NTLMSSP via SPNEGO */ 88/* NTLMSSP, */ /* can use rawNTLMSSP instead of NTLMSSP via SPNEGO */
89 Kerberos, /* Kerberos via SPNEGO */ 89 Kerberos, /* Kerberos via SPNEGO */
90 MSKerberos, /* MS Kerberos via SPNEGO */
91}; 90};
92 91
93enum protocolEnum { 92enum protocolEnum {
@@ -185,6 +184,12 @@ struct TCP_Server_Info {
185 struct mac_key mac_signing_key; 184 struct mac_key mac_signing_key;
186 char ntlmv2_hash[16]; 185 char ntlmv2_hash[16];
187 unsigned long lstrp; /* when we got last response from this server */ 186 unsigned long lstrp; /* when we got last response from this server */
187 u16 dialect; /* dialect index that server chose */
188 /* extended security flavors that server supports */
189 bool sec_kerberos; /* supports plain Kerberos */
190 bool sec_mskerberos; /* supports legacy MS Kerberos */
191 bool sec_kerberosu2u; /* supports U2U Kerberos */
192 bool sec_ntlmssp; /* supports NTLMSSP */
188}; 193};
189 194
190/* 195/*
@@ -502,6 +507,7 @@ struct dfs_info3_param {
502#define CIFS_FATTR_DFS_REFERRAL 0x1 507#define CIFS_FATTR_DFS_REFERRAL 0x1
503#define CIFS_FATTR_DELETE_PENDING 0x2 508#define CIFS_FATTR_DELETE_PENDING 0x2
504#define CIFS_FATTR_NEED_REVAL 0x4 509#define CIFS_FATTR_NEED_REVAL 0x4
510#define CIFS_FATTR_INO_COLLISION 0x8
505 511
506struct cifs_fattr { 512struct cifs_fattr {
507 u32 cf_flags; 513 u32 cf_flags;
@@ -717,7 +723,7 @@ GLOBAL_EXTERN unsigned int multiuser_mount; /* if enabled allows new sessions
717GLOBAL_EXTERN unsigned int oplockEnabled; 723GLOBAL_EXTERN unsigned int oplockEnabled;
718GLOBAL_EXTERN unsigned int experimEnabled; 724GLOBAL_EXTERN unsigned int experimEnabled;
719GLOBAL_EXTERN unsigned int lookupCacheEnabled; 725GLOBAL_EXTERN unsigned int lookupCacheEnabled;
720GLOBAL_EXTERN unsigned int extended_security; /* if on, session setup sent 726GLOBAL_EXTERN unsigned int global_secflags; /* if on, session setup sent
721 with more secure ntlmssp2 challenge/resp */ 727 with more secure ntlmssp2 challenge/resp */
722GLOBAL_EXTERN unsigned int sign_CIFS_PDUs; /* enable smb packet signing */ 728GLOBAL_EXTERN unsigned int sign_CIFS_PDUs; /* enable smb packet signing */
723GLOBAL_EXTERN unsigned int linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/ 729GLOBAL_EXTERN unsigned int linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 39e47f46dea5..fb1657e0fdb8 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -39,8 +39,20 @@ extern int smb_send(struct TCP_Server_Info *, struct smb_hdr *,
39 unsigned int /* length */); 39 unsigned int /* length */);
40extern unsigned int _GetXid(void); 40extern unsigned int _GetXid(void);
41extern void _FreeXid(unsigned int); 41extern void _FreeXid(unsigned int);
42#define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current_fsuid())); 42#define GetXid() \
43#define FreeXid(curr_xid) {_FreeXid(curr_xid); cFYI(1,("CIFS VFS: leaving %s (xid = %d) rc = %d",__func__,curr_xid,(int)rc));} 43({ \
44 int __xid = (int)_GetXid(); \
45 cFYI(1, "CIFS VFS: in %s as Xid: %d with uid: %d", \
46 __func__, __xid, current_fsuid()); \
47 __xid; \
48})
49
50#define FreeXid(curr_xid) \
51do { \
52 _FreeXid(curr_xid); \
53 cFYI(1, "CIFS VFS: leaving %s (xid = %d) rc = %d", \
54 __func__, curr_xid, (int)rc); \
55} while (0)
44extern char *build_path_from_dentry(struct dentry *); 56extern char *build_path_from_dentry(struct dentry *);
45extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb); 57extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb);
46extern char *build_wildcard_path_from_dentry(struct dentry *direntry); 58extern char *build_wildcard_path_from_dentry(struct dentry *direntry);
@@ -73,7 +85,7 @@ extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *);
73extern unsigned int smbCalcSize(struct smb_hdr *ptr); 85extern unsigned int smbCalcSize(struct smb_hdr *ptr);
74extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); 86extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
75extern int decode_negTokenInit(unsigned char *security_blob, int length, 87extern int decode_negTokenInit(unsigned char *security_blob, int length,
76 enum securityEnum *secType); 88 struct TCP_Server_Info *server);
77extern int cifs_convert_address(char *src, void *dst); 89extern int cifs_convert_address(char *src, void *dst);
78extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); 90extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
79extern void header_assemble(struct smb_hdr *, char /* command */ , 91extern void header_assemble(struct smb_hdr *, char /* command */ ,
@@ -83,7 +95,6 @@ extern int small_smb_init_no_tc(const int smb_cmd, const int wct,
83 struct cifsSesInfo *ses, 95 struct cifsSesInfo *ses,
84 void **request_buf); 96 void **request_buf);
85extern int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, 97extern int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
86 const int stage,
87 const struct nls_table *nls_cp); 98 const struct nls_table *nls_cp);
88extern __u16 GetNextMid(struct TCP_Server_Info *server); 99extern __u16 GetNextMid(struct TCP_Server_Info *server);
89extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); 100extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
@@ -95,8 +106,11 @@ extern struct cifsFileInfo *cifs_new_fileinfo(struct inode *newinode,
95 __u16 fileHandle, struct file *file, 106 __u16 fileHandle, struct file *file,
96 struct vfsmount *mnt, unsigned int oflags); 107 struct vfsmount *mnt, unsigned int oflags);
97extern int cifs_posix_open(char *full_path, struct inode **pinode, 108extern int cifs_posix_open(char *full_path, struct inode **pinode,
98 struct vfsmount *mnt, int mode, int oflags, 109 struct vfsmount *mnt,
99 __u32 *poplock, __u16 *pnetfid, int xid); 110 struct super_block *sb,
111 int mode, int oflags,
112 __u32 *poplock, __u16 *pnetfid, int xid);
113void cifs_fill_uniqueid(struct super_block *sb, struct cifs_fattr *fattr);
100extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, 114extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr,
101 FILE_UNIX_BASIC_INFO *info, 115 FILE_UNIX_BASIC_INFO *info,
102 struct cifs_sb_info *cifs_sb); 116 struct cifs_sb_info *cifs_sb);
@@ -125,7 +139,9 @@ extern void cifs_dfs_release_automount_timer(void);
125void cifs_proc_init(void); 139void cifs_proc_init(void);
126void cifs_proc_clean(void); 140void cifs_proc_clean(void);
127 141
128extern int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, 142extern int cifs_negotiate_protocol(unsigned int xid,
143 struct cifsSesInfo *ses);
144extern int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
129 struct nls_table *nls_info); 145 struct nls_table *nls_info);
130extern int CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses); 146extern int CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses);
131 147
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 5d3f29fef532..c65c3419dd37 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/cifssmb.c 2 * fs/cifs/cifssmb.c
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2002,2009 4 * Copyright (C) International Business Machines Corp., 2002,2010
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * Contains the routines for constructing the SMB PDUs themselves 7 * Contains the routines for constructing the SMB PDUs themselves
@@ -130,8 +130,8 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
130 if (smb_command != SMB_COM_WRITE_ANDX && 130 if (smb_command != SMB_COM_WRITE_ANDX &&
131 smb_command != SMB_COM_OPEN_ANDX && 131 smb_command != SMB_COM_OPEN_ANDX &&
132 smb_command != SMB_COM_TREE_DISCONNECT) { 132 smb_command != SMB_COM_TREE_DISCONNECT) {
133 cFYI(1, ("can not send cmd %d while umounting", 133 cFYI(1, "can not send cmd %d while umounting",
134 smb_command)); 134 smb_command);
135 return -ENODEV; 135 return -ENODEV;
136 } 136 }
137 } 137 }
@@ -157,7 +157,7 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
157 * back on-line 157 * back on-line
158 */ 158 */
159 if (!tcon->retry || ses->status == CifsExiting) { 159 if (!tcon->retry || ses->status == CifsExiting) {
160 cFYI(1, ("gave up waiting on reconnect in smb_init")); 160 cFYI(1, "gave up waiting on reconnect in smb_init");
161 return -EHOSTDOWN; 161 return -EHOSTDOWN;
162 } 162 }
163 } 163 }
@@ -172,7 +172,8 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
172 * reconnect the same SMB session 172 * reconnect the same SMB session
173 */ 173 */
174 mutex_lock(&ses->session_mutex); 174 mutex_lock(&ses->session_mutex);
175 if (ses->need_reconnect) 175 rc = cifs_negotiate_protocol(0, ses);
176 if (rc == 0 && ses->need_reconnect)
176 rc = cifs_setup_session(0, ses, nls_codepage); 177 rc = cifs_setup_session(0, ses, nls_codepage);
177 178
178 /* do we need to reconnect tcon? */ 179 /* do we need to reconnect tcon? */
@@ -184,7 +185,7 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
184 mark_open_files_invalid(tcon); 185 mark_open_files_invalid(tcon);
185 rc = CIFSTCon(0, ses, tcon->treeName, tcon, nls_codepage); 186 rc = CIFSTCon(0, ses, tcon->treeName, tcon, nls_codepage);
186 mutex_unlock(&ses->session_mutex); 187 mutex_unlock(&ses->session_mutex);
187 cFYI(1, ("reconnect tcon rc = %d", rc)); 188 cFYI(1, "reconnect tcon rc = %d", rc);
188 189
189 if (rc) 190 if (rc)
190 goto out; 191 goto out;
@@ -355,7 +356,6 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
355 struct TCP_Server_Info *server; 356 struct TCP_Server_Info *server;
356 u16 count; 357 u16 count;
357 unsigned int secFlags; 358 unsigned int secFlags;
358 u16 dialect;
359 359
360 if (ses->server) 360 if (ses->server)
361 server = ses->server; 361 server = ses->server;
@@ -372,9 +372,9 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
372 if (ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL))) 372 if (ses->overrideSecFlg & (~(CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL)))
373 secFlags = ses->overrideSecFlg; /* BB FIXME fix sign flags? */ 373 secFlags = ses->overrideSecFlg; /* BB FIXME fix sign flags? */
374 else /* if override flags set only sign/seal OR them with global auth */ 374 else /* if override flags set only sign/seal OR them with global auth */
375 secFlags = extended_security | ses->overrideSecFlg; 375 secFlags = global_secflags | ses->overrideSecFlg;
376 376
377 cFYI(1, ("secFlags 0x%x", secFlags)); 377 cFYI(1, "secFlags 0x%x", secFlags);
378 378
379 pSMB->hdr.Mid = GetNextMid(server); 379 pSMB->hdr.Mid = GetNextMid(server);
380 pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS); 380 pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS);
@@ -382,14 +382,14 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
382 if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) 382 if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
383 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; 383 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
384 else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_KRB5) { 384 else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_KRB5) {
385 cFYI(1, ("Kerberos only mechanism, enable extended security")); 385 cFYI(1, "Kerberos only mechanism, enable extended security");
386 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; 386 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
387 } 387 }
388#ifdef CONFIG_CIFS_EXPERIMENTAL 388#ifdef CONFIG_CIFS_EXPERIMENTAL
389 else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP) 389 else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP)
390 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; 390 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
391 else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_NTLMSSP) { 391 else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_NTLMSSP) {
392 cFYI(1, ("NTLMSSP only mechanism, enable extended security")); 392 cFYI(1, "NTLMSSP only mechanism, enable extended security");
393 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; 393 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
394 } 394 }
395#endif 395#endif
@@ -408,10 +408,10 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
408 if (rc != 0) 408 if (rc != 0)
409 goto neg_err_exit; 409 goto neg_err_exit;
410 410
411 dialect = le16_to_cpu(pSMBr->DialectIndex); 411 server->dialect = le16_to_cpu(pSMBr->DialectIndex);
412 cFYI(1, ("Dialect: %d", dialect)); 412 cFYI(1, "Dialect: %d", server->dialect);
413 /* Check wct = 1 error case */ 413 /* Check wct = 1 error case */
414 if ((pSMBr->hdr.WordCount < 13) || (dialect == BAD_PROT)) { 414 if ((pSMBr->hdr.WordCount < 13) || (server->dialect == BAD_PROT)) {
415 /* core returns wct = 1, but we do not ask for core - otherwise 415 /* core returns wct = 1, but we do not ask for core - otherwise
416 small wct just comes when dialect index is -1 indicating we 416 small wct just comes when dialect index is -1 indicating we
417 could not negotiate a common dialect */ 417 could not negotiate a common dialect */
@@ -419,8 +419,8 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
419 goto neg_err_exit; 419 goto neg_err_exit;
420#ifdef CONFIG_CIFS_WEAK_PW_HASH 420#ifdef CONFIG_CIFS_WEAK_PW_HASH
421 } else if ((pSMBr->hdr.WordCount == 13) 421 } else if ((pSMBr->hdr.WordCount == 13)
422 && ((dialect == LANMAN_PROT) 422 && ((server->dialect == LANMAN_PROT)
423 || (dialect == LANMAN2_PROT))) { 423 || (server->dialect == LANMAN2_PROT))) {
424 __s16 tmp; 424 __s16 tmp;
425 struct lanman_neg_rsp *rsp = (struct lanman_neg_rsp *)pSMBr; 425 struct lanman_neg_rsp *rsp = (struct lanman_neg_rsp *)pSMBr;
426 426
@@ -428,8 +428,8 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
428 (secFlags & CIFSSEC_MAY_PLNTXT)) 428 (secFlags & CIFSSEC_MAY_PLNTXT))
429 server->secType = LANMAN; 429 server->secType = LANMAN;
430 else { 430 else {
431 cERROR(1, ("mount failed weak security disabled" 431 cERROR(1, "mount failed weak security disabled"
432 " in /proc/fs/cifs/SecurityFlags")); 432 " in /proc/fs/cifs/SecurityFlags");
433 rc = -EOPNOTSUPP; 433 rc = -EOPNOTSUPP;
434 goto neg_err_exit; 434 goto neg_err_exit;
435 } 435 }
@@ -462,9 +462,9 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
462 utc = CURRENT_TIME; 462 utc = CURRENT_TIME;
463 ts = cnvrtDosUnixTm(rsp->SrvTime.Date, 463 ts = cnvrtDosUnixTm(rsp->SrvTime.Date,
464 rsp->SrvTime.Time, 0); 464 rsp->SrvTime.Time, 0);
465 cFYI(1, ("SrvTime %d sec since 1970 (utc: %d) diff: %d", 465 cFYI(1, "SrvTime %d sec since 1970 (utc: %d) diff: %d",
466 (int)ts.tv_sec, (int)utc.tv_sec, 466 (int)ts.tv_sec, (int)utc.tv_sec,
467 (int)(utc.tv_sec - ts.tv_sec))); 467 (int)(utc.tv_sec - ts.tv_sec));
468 val = (int)(utc.tv_sec - ts.tv_sec); 468 val = (int)(utc.tv_sec - ts.tv_sec);
469 seconds = abs(val); 469 seconds = abs(val);
470 result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ; 470 result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ;
@@ -478,7 +478,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
478 server->timeAdj = (int)tmp; 478 server->timeAdj = (int)tmp;
479 server->timeAdj *= 60; /* also in seconds */ 479 server->timeAdj *= 60; /* also in seconds */
480 } 480 }
481 cFYI(1, ("server->timeAdj: %d seconds", server->timeAdj)); 481 cFYI(1, "server->timeAdj: %d seconds", server->timeAdj);
482 482
483 483
484 /* BB get server time for time conversions and add 484 /* BB get server time for time conversions and add
@@ -493,14 +493,14 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
493 goto neg_err_exit; 493 goto neg_err_exit;
494 } 494 }
495 495
496 cFYI(1, ("LANMAN negotiated")); 496 cFYI(1, "LANMAN negotiated");
497 /* we will not end up setting signing flags - as no signing 497 /* we will not end up setting signing flags - as no signing
498 was in LANMAN and server did not return the flags on */ 498 was in LANMAN and server did not return the flags on */
499 goto signing_check; 499 goto signing_check;
500#else /* weak security disabled */ 500#else /* weak security disabled */
501 } else if (pSMBr->hdr.WordCount == 13) { 501 } else if (pSMBr->hdr.WordCount == 13) {
502 cERROR(1, ("mount failed, cifs module not built " 502 cERROR(1, "mount failed, cifs module not built "
503 "with CIFS_WEAK_PW_HASH support")); 503 "with CIFS_WEAK_PW_HASH support");
504 rc = -EOPNOTSUPP; 504 rc = -EOPNOTSUPP;
505#endif /* WEAK_PW_HASH */ 505#endif /* WEAK_PW_HASH */
506 goto neg_err_exit; 506 goto neg_err_exit;
@@ -512,14 +512,14 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
512 /* else wct == 17 NTLM */ 512 /* else wct == 17 NTLM */
513 server->secMode = pSMBr->SecurityMode; 513 server->secMode = pSMBr->SecurityMode;
514 if ((server->secMode & SECMODE_USER) == 0) 514 if ((server->secMode & SECMODE_USER) == 0)
515 cFYI(1, ("share mode security")); 515 cFYI(1, "share mode security");
516 516
517 if ((server->secMode & SECMODE_PW_ENCRYPT) == 0) 517 if ((server->secMode & SECMODE_PW_ENCRYPT) == 0)
518#ifdef CONFIG_CIFS_WEAK_PW_HASH 518#ifdef CONFIG_CIFS_WEAK_PW_HASH
519 if ((secFlags & CIFSSEC_MAY_PLNTXT) == 0) 519 if ((secFlags & CIFSSEC_MAY_PLNTXT) == 0)
520#endif /* CIFS_WEAK_PW_HASH */ 520#endif /* CIFS_WEAK_PW_HASH */
521 cERROR(1, ("Server requests plain text password" 521 cERROR(1, "Server requests plain text password"
522 " but client support disabled")); 522 " but client support disabled");
523 523
524 if ((secFlags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2) 524 if ((secFlags & CIFSSEC_MUST_NTLMV2) == CIFSSEC_MUST_NTLMV2)
525 server->secType = NTLMv2; 525 server->secType = NTLMv2;
@@ -539,7 +539,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
539#endif */ 539#endif */
540 else { 540 else {
541 rc = -EOPNOTSUPP; 541 rc = -EOPNOTSUPP;
542 cERROR(1, ("Invalid security type")); 542 cERROR(1, "Invalid security type");
543 goto neg_err_exit; 543 goto neg_err_exit;
544 } 544 }
545 /* else ... any others ...? */ 545 /* else ... any others ...? */
@@ -551,7 +551,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
551 server->maxBuf = min(le32_to_cpu(pSMBr->MaxBufferSize), 551 server->maxBuf = min(le32_to_cpu(pSMBr->MaxBufferSize),
552 (__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE); 552 (__u32) CIFSMaxBufSize + MAX_CIFS_HDR_SIZE);
553 server->max_rw = le32_to_cpu(pSMBr->MaxRawSize); 553 server->max_rw = le32_to_cpu(pSMBr->MaxRawSize);
554 cFYI(DBG2, ("Max buf = %d", ses->server->maxBuf)); 554 cFYI(DBG2, "Max buf = %d", ses->server->maxBuf);
555 GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey); 555 GETU32(ses->server->sessid) = le32_to_cpu(pSMBr->SessionKey);
556 server->capabilities = le32_to_cpu(pSMBr->Capabilities); 556 server->capabilities = le32_to_cpu(pSMBr->Capabilities);
557 server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone); 557 server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone);
@@ -582,7 +582,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
582 if (memcmp(server->server_GUID, 582 if (memcmp(server->server_GUID,
583 pSMBr->u.extended_response. 583 pSMBr->u.extended_response.
584 GUID, 16) != 0) { 584 GUID, 16) != 0) {
585 cFYI(1, ("server UID changed")); 585 cFYI(1, "server UID changed");
586 memcpy(server->server_GUID, 586 memcpy(server->server_GUID,
587 pSMBr->u.extended_response.GUID, 587 pSMBr->u.extended_response.GUID,
588 16); 588 16);
@@ -597,13 +597,19 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
597 server->secType = RawNTLMSSP; 597 server->secType = RawNTLMSSP;
598 } else { 598 } else {
599 rc = decode_negTokenInit(pSMBr->u.extended_response. 599 rc = decode_negTokenInit(pSMBr->u.extended_response.
600 SecurityBlob, 600 SecurityBlob, count - 16,
601 count - 16, 601 server);
602 &server->secType);
603 if (rc == 1) 602 if (rc == 1)
604 rc = 0; 603 rc = 0;
605 else 604 else
606 rc = -EINVAL; 605 rc = -EINVAL;
606
607 if (server->sec_kerberos || server->sec_mskerberos)
608 server->secType = Kerberos;
609 else if (server->sec_ntlmssp)
610 server->secType = RawNTLMSSP;
611 else
612 rc = -EOPNOTSUPP;
607 } 613 }
608 } else 614 } else
609 server->capabilities &= ~CAP_EXTENDED_SECURITY; 615 server->capabilities &= ~CAP_EXTENDED_SECURITY;
@@ -614,22 +620,21 @@ signing_check:
614 if ((secFlags & CIFSSEC_MAY_SIGN) == 0) { 620 if ((secFlags & CIFSSEC_MAY_SIGN) == 0) {
615 /* MUST_SIGN already includes the MAY_SIGN FLAG 621 /* MUST_SIGN already includes the MAY_SIGN FLAG
616 so if this is zero it means that signing is disabled */ 622 so if this is zero it means that signing is disabled */
617 cFYI(1, ("Signing disabled")); 623 cFYI(1, "Signing disabled");
618 if (server->secMode & SECMODE_SIGN_REQUIRED) { 624 if (server->secMode & SECMODE_SIGN_REQUIRED) {
619 cERROR(1, ("Server requires " 625 cERROR(1, "Server requires "
620 "packet signing to be enabled in " 626 "packet signing to be enabled in "
621 "/proc/fs/cifs/SecurityFlags.")); 627 "/proc/fs/cifs/SecurityFlags.");
622 rc = -EOPNOTSUPP; 628 rc = -EOPNOTSUPP;
623 } 629 }
624 server->secMode &= 630 server->secMode &=
625 ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED); 631 ~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
626 } else if ((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) { 632 } else if ((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) {
627 /* signing required */ 633 /* signing required */
628 cFYI(1, ("Must sign - secFlags 0x%x", secFlags)); 634 cFYI(1, "Must sign - secFlags 0x%x", secFlags);
629 if ((server->secMode & 635 if ((server->secMode &
630 (SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED)) == 0) { 636 (SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED)) == 0) {
631 cERROR(1, 637 cERROR(1, "signing required but server lacks support");
632 ("signing required but server lacks support"));
633 rc = -EOPNOTSUPP; 638 rc = -EOPNOTSUPP;
634 } else 639 } else
635 server->secMode |= SECMODE_SIGN_REQUIRED; 640 server->secMode |= SECMODE_SIGN_REQUIRED;
@@ -643,7 +648,7 @@ signing_check:
643neg_err_exit: 648neg_err_exit:
644 cifs_buf_release(pSMB); 649 cifs_buf_release(pSMB);
645 650
646 cFYI(1, ("negprot rc %d", rc)); 651 cFYI(1, "negprot rc %d", rc);
647 return rc; 652 return rc;
648} 653}
649 654
@@ -653,7 +658,7 @@ CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon)
653 struct smb_hdr *smb_buffer; 658 struct smb_hdr *smb_buffer;
654 int rc = 0; 659 int rc = 0;
655 660
656 cFYI(1, ("In tree disconnect")); 661 cFYI(1, "In tree disconnect");
657 662
658 /* BB: do we need to check this? These should never be NULL. */ 663 /* BB: do we need to check this? These should never be NULL. */
659 if ((tcon->ses == NULL) || (tcon->ses->server == NULL)) 664 if ((tcon->ses == NULL) || (tcon->ses->server == NULL))
@@ -675,7 +680,7 @@ CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon)
675 680
676 rc = SendReceiveNoRsp(xid, tcon->ses, smb_buffer, 0); 681 rc = SendReceiveNoRsp(xid, tcon->ses, smb_buffer, 0);
677 if (rc) 682 if (rc)
678 cFYI(1, ("Tree disconnect failed %d", rc)); 683 cFYI(1, "Tree disconnect failed %d", rc);
679 684
680 /* No need to return error on this operation if tid invalidated and 685 /* No need to return error on this operation if tid invalidated and
681 closed on server already e.g. due to tcp session crashing */ 686 closed on server already e.g. due to tcp session crashing */
@@ -691,7 +696,7 @@ CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses)
691 LOGOFF_ANDX_REQ *pSMB; 696 LOGOFF_ANDX_REQ *pSMB;
692 int rc = 0; 697 int rc = 0;
693 698
694 cFYI(1, ("In SMBLogoff for session disconnect")); 699 cFYI(1, "In SMBLogoff for session disconnect");
695 700
696 /* 701 /*
697 * BB: do we need to check validity of ses and server? They should 702 * BB: do we need to check validity of ses and server? They should
@@ -744,7 +749,7 @@ CIFSPOSIXDelFile(const int xid, struct cifsTconInfo *tcon, const char *fileName,
744 int bytes_returned = 0; 749 int bytes_returned = 0;
745 __u16 params, param_offset, offset, byte_count; 750 __u16 params, param_offset, offset, byte_count;
746 751
747 cFYI(1, ("In POSIX delete")); 752 cFYI(1, "In POSIX delete");
748PsxDelete: 753PsxDelete:
749 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 754 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
750 (void **) &pSMBr); 755 (void **) &pSMBr);
@@ -796,7 +801,7 @@ PsxDelete:
796 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 801 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
797 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 802 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
798 if (rc) 803 if (rc)
799 cFYI(1, ("Posix delete returned %d", rc)); 804 cFYI(1, "Posix delete returned %d", rc);
800 cifs_buf_release(pSMB); 805 cifs_buf_release(pSMB);
801 806
802 cifs_stats_inc(&tcon->num_deletes); 807 cifs_stats_inc(&tcon->num_deletes);
@@ -843,7 +848,7 @@ DelFileRetry:
843 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 848 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
844 cifs_stats_inc(&tcon->num_deletes); 849 cifs_stats_inc(&tcon->num_deletes);
845 if (rc) 850 if (rc)
846 cFYI(1, ("Error in RMFile = %d", rc)); 851 cFYI(1, "Error in RMFile = %d", rc);
847 852
848 cifs_buf_release(pSMB); 853 cifs_buf_release(pSMB);
849 if (rc == -EAGAIN) 854 if (rc == -EAGAIN)
@@ -862,7 +867,7 @@ CIFSSMBRmDir(const int xid, struct cifsTconInfo *tcon, const char *dirName,
862 int bytes_returned; 867 int bytes_returned;
863 int name_len; 868 int name_len;
864 869
865 cFYI(1, ("In CIFSSMBRmDir")); 870 cFYI(1, "In CIFSSMBRmDir");
866RmDirRetry: 871RmDirRetry:
867 rc = smb_init(SMB_COM_DELETE_DIRECTORY, 0, tcon, (void **) &pSMB, 872 rc = smb_init(SMB_COM_DELETE_DIRECTORY, 0, tcon, (void **) &pSMB,
868 (void **) &pSMBr); 873 (void **) &pSMBr);
@@ -887,7 +892,7 @@ RmDirRetry:
887 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 892 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
888 cifs_stats_inc(&tcon->num_rmdirs); 893 cifs_stats_inc(&tcon->num_rmdirs);
889 if (rc) 894 if (rc)
890 cFYI(1, ("Error in RMDir = %d", rc)); 895 cFYI(1, "Error in RMDir = %d", rc);
891 896
892 cifs_buf_release(pSMB); 897 cifs_buf_release(pSMB);
893 if (rc == -EAGAIN) 898 if (rc == -EAGAIN)
@@ -905,7 +910,7 @@ CIFSSMBMkDir(const int xid, struct cifsTconInfo *tcon,
905 int bytes_returned; 910 int bytes_returned;
906 int name_len; 911 int name_len;
907 912
908 cFYI(1, ("In CIFSSMBMkDir")); 913 cFYI(1, "In CIFSSMBMkDir");
909MkDirRetry: 914MkDirRetry:
910 rc = smb_init(SMB_COM_CREATE_DIRECTORY, 0, tcon, (void **) &pSMB, 915 rc = smb_init(SMB_COM_CREATE_DIRECTORY, 0, tcon, (void **) &pSMB,
911 (void **) &pSMBr); 916 (void **) &pSMBr);
@@ -930,7 +935,7 @@ MkDirRetry:
930 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 935 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
931 cifs_stats_inc(&tcon->num_mkdirs); 936 cifs_stats_inc(&tcon->num_mkdirs);
932 if (rc) 937 if (rc)
933 cFYI(1, ("Error in Mkdir = %d", rc)); 938 cFYI(1, "Error in Mkdir = %d", rc);
934 939
935 cifs_buf_release(pSMB); 940 cifs_buf_release(pSMB);
936 if (rc == -EAGAIN) 941 if (rc == -EAGAIN)
@@ -953,7 +958,7 @@ CIFSPOSIXCreate(const int xid, struct cifsTconInfo *tcon, __u32 posix_flags,
953 OPEN_PSX_REQ *pdata; 958 OPEN_PSX_REQ *pdata;
954 OPEN_PSX_RSP *psx_rsp; 959 OPEN_PSX_RSP *psx_rsp;
955 960
956 cFYI(1, ("In POSIX Create")); 961 cFYI(1, "In POSIX Create");
957PsxCreat: 962PsxCreat:
958 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 963 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
959 (void **) &pSMBr); 964 (void **) &pSMBr);
@@ -1007,11 +1012,11 @@ PsxCreat:
1007 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 1012 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
1008 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 1013 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
1009 if (rc) { 1014 if (rc) {
1010 cFYI(1, ("Posix create returned %d", rc)); 1015 cFYI(1, "Posix create returned %d", rc);
1011 goto psx_create_err; 1016 goto psx_create_err;
1012 } 1017 }
1013 1018
1014 cFYI(1, ("copying inode info")); 1019 cFYI(1, "copying inode info");
1015 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 1020 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
1016 1021
1017 if (rc || (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP))) { 1022 if (rc || (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP))) {
@@ -1033,11 +1038,11 @@ PsxCreat:
1033 /* check to make sure response data is there */ 1038 /* check to make sure response data is there */
1034 if (psx_rsp->ReturnedLevel != cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC)) { 1039 if (psx_rsp->ReturnedLevel != cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC)) {
1035 pRetData->Type = cpu_to_le32(-1); /* unknown */ 1040 pRetData->Type = cpu_to_le32(-1); /* unknown */
1036 cFYI(DBG2, ("unknown type")); 1041 cFYI(DBG2, "unknown type");
1037 } else { 1042 } else {
1038 if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP) 1043 if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP)
1039 + sizeof(FILE_UNIX_BASIC_INFO)) { 1044 + sizeof(FILE_UNIX_BASIC_INFO)) {
1040 cERROR(1, ("Open response data too small")); 1045 cERROR(1, "Open response data too small");
1041 pRetData->Type = cpu_to_le32(-1); 1046 pRetData->Type = cpu_to_le32(-1);
1042 goto psx_create_err; 1047 goto psx_create_err;
1043 } 1048 }
@@ -1084,7 +1089,7 @@ static __u16 convert_disposition(int disposition)
1084 ofun = SMBOPEN_OCREATE | SMBOPEN_OTRUNC; 1089 ofun = SMBOPEN_OCREATE | SMBOPEN_OTRUNC;
1085 break; 1090 break;
1086 default: 1091 default:
1087 cFYI(1, ("unknown disposition %d", disposition)); 1092 cFYI(1, "unknown disposition %d", disposition);
1088 ofun = SMBOPEN_OAPPEND; /* regular open */ 1093 ofun = SMBOPEN_OAPPEND; /* regular open */
1089 } 1094 }
1090 return ofun; 1095 return ofun;
@@ -1175,7 +1180,7 @@ OldOpenRetry:
1175 (struct smb_hdr *)pSMBr, &bytes_returned, CIFS_LONG_OP); 1180 (struct smb_hdr *)pSMBr, &bytes_returned, CIFS_LONG_OP);
1176 cifs_stats_inc(&tcon->num_opens); 1181 cifs_stats_inc(&tcon->num_opens);
1177 if (rc) { 1182 if (rc) {
1178 cFYI(1, ("Error in Open = %d", rc)); 1183 cFYI(1, "Error in Open = %d", rc);
1179 } else { 1184 } else {
1180 /* BB verify if wct == 15 */ 1185 /* BB verify if wct == 15 */
1181 1186
@@ -1288,7 +1293,7 @@ openRetry:
1288 (struct smb_hdr *)pSMBr, &bytes_returned, CIFS_LONG_OP); 1293 (struct smb_hdr *)pSMBr, &bytes_returned, CIFS_LONG_OP);
1289 cifs_stats_inc(&tcon->num_opens); 1294 cifs_stats_inc(&tcon->num_opens);
1290 if (rc) { 1295 if (rc) {
1291 cFYI(1, ("Error in Open = %d", rc)); 1296 cFYI(1, "Error in Open = %d", rc);
1292 } else { 1297 } else {
1293 *pOplock = pSMBr->OplockLevel; /* 1 byte no need to le_to_cpu */ 1298 *pOplock = pSMBr->OplockLevel; /* 1 byte no need to le_to_cpu */
1294 *netfid = pSMBr->Fid; /* cifs fid stays in le */ 1299 *netfid = pSMBr->Fid; /* cifs fid stays in le */
@@ -1326,7 +1331,7 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1326 int resp_buf_type = 0; 1331 int resp_buf_type = 0;
1327 struct kvec iov[1]; 1332 struct kvec iov[1];
1328 1333
1329 cFYI(1, ("Reading %d bytes on fid %d", count, netfid)); 1334 cFYI(1, "Reading %d bytes on fid %d", count, netfid);
1330 if (tcon->ses->capabilities & CAP_LARGE_FILES) 1335 if (tcon->ses->capabilities & CAP_LARGE_FILES)
1331 wct = 12; 1336 wct = 12;
1332 else { 1337 else {
@@ -1371,7 +1376,7 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1371 cifs_stats_inc(&tcon->num_reads); 1376 cifs_stats_inc(&tcon->num_reads);
1372 pSMBr = (READ_RSP *)iov[0].iov_base; 1377 pSMBr = (READ_RSP *)iov[0].iov_base;
1373 if (rc) { 1378 if (rc) {
1374 cERROR(1, ("Send error in read = %d", rc)); 1379 cERROR(1, "Send error in read = %d", rc);
1375 } else { 1380 } else {
1376 int data_length = le16_to_cpu(pSMBr->DataLengthHigh); 1381 int data_length = le16_to_cpu(pSMBr->DataLengthHigh);
1377 data_length = data_length << 16; 1382 data_length = data_length << 16;
@@ -1381,15 +1386,15 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1381 /*check that DataLength would not go beyond end of SMB */ 1386 /*check that DataLength would not go beyond end of SMB */
1382 if ((data_length > CIFSMaxBufSize) 1387 if ((data_length > CIFSMaxBufSize)
1383 || (data_length > count)) { 1388 || (data_length > count)) {
1384 cFYI(1, ("bad length %d for count %d", 1389 cFYI(1, "bad length %d for count %d",
1385 data_length, count)); 1390 data_length, count);
1386 rc = -EIO; 1391 rc = -EIO;
1387 *nbytes = 0; 1392 *nbytes = 0;
1388 } else { 1393 } else {
1389 pReadData = (char *) (&pSMBr->hdr.Protocol) + 1394 pReadData = (char *) (&pSMBr->hdr.Protocol) +
1390 le16_to_cpu(pSMBr->DataOffset); 1395 le16_to_cpu(pSMBr->DataOffset);
1391/* if (rc = copy_to_user(buf, pReadData, data_length)) { 1396/* if (rc = copy_to_user(buf, pReadData, data_length)) {
1392 cERROR(1,("Faulting on read rc = %d",rc)); 1397 cERROR(1, "Faulting on read rc = %d",rc);
1393 rc = -EFAULT; 1398 rc = -EFAULT;
1394 }*/ /* can not use copy_to_user when using page cache*/ 1399 }*/ /* can not use copy_to_user when using page cache*/
1395 if (*buf) 1400 if (*buf)
@@ -1433,7 +1438,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1433 1438
1434 *nbytes = 0; 1439 *nbytes = 0;
1435 1440
1436 /* cFYI(1, ("write at %lld %d bytes", offset, count));*/ 1441 /* cFYI(1, "write at %lld %d bytes", offset, count);*/
1437 if (tcon->ses == NULL) 1442 if (tcon->ses == NULL)
1438 return -ECONNABORTED; 1443 return -ECONNABORTED;
1439 1444
@@ -1514,7 +1519,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1514 (struct smb_hdr *) pSMBr, &bytes_returned, long_op); 1519 (struct smb_hdr *) pSMBr, &bytes_returned, long_op);
1515 cifs_stats_inc(&tcon->num_writes); 1520 cifs_stats_inc(&tcon->num_writes);
1516 if (rc) { 1521 if (rc) {
1517 cFYI(1, ("Send error in write = %d", rc)); 1522 cFYI(1, "Send error in write = %d", rc);
1518 } else { 1523 } else {
1519 *nbytes = le16_to_cpu(pSMBr->CountHigh); 1524 *nbytes = le16_to_cpu(pSMBr->CountHigh);
1520 *nbytes = (*nbytes) << 16; 1525 *nbytes = (*nbytes) << 16;
@@ -1551,7 +1556,7 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1551 1556
1552 *nbytes = 0; 1557 *nbytes = 0;
1553 1558
1554 cFYI(1, ("write2 at %lld %d bytes", (long long)offset, count)); 1559 cFYI(1, "write2 at %lld %d bytes", (long long)offset, count);
1555 1560
1556 if (tcon->ses->capabilities & CAP_LARGE_FILES) { 1561 if (tcon->ses->capabilities & CAP_LARGE_FILES) {
1557 wct = 14; 1562 wct = 14;
@@ -1606,7 +1611,7 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1606 long_op); 1611 long_op);
1607 cifs_stats_inc(&tcon->num_writes); 1612 cifs_stats_inc(&tcon->num_writes);
1608 if (rc) { 1613 if (rc) {
1609 cFYI(1, ("Send error Write2 = %d", rc)); 1614 cFYI(1, "Send error Write2 = %d", rc);
1610 } else if (resp_buf_type == 0) { 1615 } else if (resp_buf_type == 0) {
1611 /* presumably this can not happen, but best to be safe */ 1616 /* presumably this can not happen, but best to be safe */
1612 rc = -EIO; 1617 rc = -EIO;
@@ -1651,7 +1656,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1651 int timeout = 0; 1656 int timeout = 0;
1652 __u16 count; 1657 __u16 count;
1653 1658
1654 cFYI(1, ("CIFSSMBLock timeout %d numLock %d", (int)waitFlag, numLock)); 1659 cFYI(1, "CIFSSMBLock timeout %d numLock %d", (int)waitFlag, numLock);
1655 rc = small_smb_init(SMB_COM_LOCKING_ANDX, 8, tcon, (void **) &pSMB); 1660 rc = small_smb_init(SMB_COM_LOCKING_ANDX, 8, tcon, (void **) &pSMB);
1656 1661
1657 if (rc) 1662 if (rc)
@@ -1699,7 +1704,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1699 } 1704 }
1700 cifs_stats_inc(&tcon->num_locks); 1705 cifs_stats_inc(&tcon->num_locks);
1701 if (rc) 1706 if (rc)
1702 cFYI(1, ("Send error in Lock = %d", rc)); 1707 cFYI(1, "Send error in Lock = %d", rc);
1703 1708
1704 /* Note: On -EAGAIN error only caller can retry on handle based calls 1709 /* Note: On -EAGAIN error only caller can retry on handle based calls
1705 since file handle passed in no longer valid */ 1710 since file handle passed in no longer valid */
@@ -1722,7 +1727,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1722 __u16 params, param_offset, offset, byte_count, count; 1727 __u16 params, param_offset, offset, byte_count, count;
1723 struct kvec iov[1]; 1728 struct kvec iov[1];
1724 1729
1725 cFYI(1, ("Posix Lock")); 1730 cFYI(1, "Posix Lock");
1726 1731
1727 if (pLockData == NULL) 1732 if (pLockData == NULL)
1728 return -EINVAL; 1733 return -EINVAL;
@@ -1792,7 +1797,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1792 } 1797 }
1793 1798
1794 if (rc) { 1799 if (rc) {
1795 cFYI(1, ("Send error in Posix Lock = %d", rc)); 1800 cFYI(1, "Send error in Posix Lock = %d", rc);
1796 } else if (get_flag) { 1801 } else if (get_flag) {
1797 /* lock structure can be returned on get */ 1802 /* lock structure can be returned on get */
1798 __u16 data_offset; 1803 __u16 data_offset;
@@ -1849,7 +1854,7 @@ CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, int smb_file_id)
1849{ 1854{
1850 int rc = 0; 1855 int rc = 0;
1851 CLOSE_REQ *pSMB = NULL; 1856 CLOSE_REQ *pSMB = NULL;
1852 cFYI(1, ("In CIFSSMBClose")); 1857 cFYI(1, "In CIFSSMBClose");
1853 1858
1854/* do not retry on dead session on close */ 1859/* do not retry on dead session on close */
1855 rc = small_smb_init(SMB_COM_CLOSE, 3, tcon, (void **) &pSMB); 1860 rc = small_smb_init(SMB_COM_CLOSE, 3, tcon, (void **) &pSMB);
@@ -1866,7 +1871,7 @@ CIFSSMBClose(const int xid, struct cifsTconInfo *tcon, int smb_file_id)
1866 if (rc) { 1871 if (rc) {
1867 if (rc != -EINTR) { 1872 if (rc != -EINTR) {
1868 /* EINTR is expected when user ctl-c to kill app */ 1873 /* EINTR is expected when user ctl-c to kill app */
1869 cERROR(1, ("Send error in Close = %d", rc)); 1874 cERROR(1, "Send error in Close = %d", rc);
1870 } 1875 }
1871 } 1876 }
1872 1877
@@ -1882,7 +1887,7 @@ CIFSSMBFlush(const int xid, struct cifsTconInfo *tcon, int smb_file_id)
1882{ 1887{
1883 int rc = 0; 1888 int rc = 0;
1884 FLUSH_REQ *pSMB = NULL; 1889 FLUSH_REQ *pSMB = NULL;
1885 cFYI(1, ("In CIFSSMBFlush")); 1890 cFYI(1, "In CIFSSMBFlush");
1886 1891
1887 rc = small_smb_init(SMB_COM_FLUSH, 1, tcon, (void **) &pSMB); 1892 rc = small_smb_init(SMB_COM_FLUSH, 1, tcon, (void **) &pSMB);
1888 if (rc) 1893 if (rc)
@@ -1893,7 +1898,7 @@ CIFSSMBFlush(const int xid, struct cifsTconInfo *tcon, int smb_file_id)
1893 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 1898 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
1894 cifs_stats_inc(&tcon->num_flushes); 1899 cifs_stats_inc(&tcon->num_flushes);
1895 if (rc) 1900 if (rc)
1896 cERROR(1, ("Send error in Flush = %d", rc)); 1901 cERROR(1, "Send error in Flush = %d", rc);
1897 1902
1898 return rc; 1903 return rc;
1899} 1904}
@@ -1910,7 +1915,7 @@ CIFSSMBRename(const int xid, struct cifsTconInfo *tcon,
1910 int name_len, name_len2; 1915 int name_len, name_len2;
1911 __u16 count; 1916 __u16 count;
1912 1917
1913 cFYI(1, ("In CIFSSMBRename")); 1918 cFYI(1, "In CIFSSMBRename");
1914renameRetry: 1919renameRetry:
1915 rc = smb_init(SMB_COM_RENAME, 1, tcon, (void **) &pSMB, 1920 rc = smb_init(SMB_COM_RENAME, 1, tcon, (void **) &pSMB,
1916 (void **) &pSMBr); 1921 (void **) &pSMBr);
@@ -1956,7 +1961,7 @@ renameRetry:
1956 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 1961 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
1957 cifs_stats_inc(&tcon->num_renames); 1962 cifs_stats_inc(&tcon->num_renames);
1958 if (rc) 1963 if (rc)
1959 cFYI(1, ("Send error in rename = %d", rc)); 1964 cFYI(1, "Send error in rename = %d", rc);
1960 1965
1961 cifs_buf_release(pSMB); 1966 cifs_buf_release(pSMB);
1962 1967
@@ -1980,7 +1985,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
1980 int len_of_str; 1985 int len_of_str;
1981 __u16 params, param_offset, offset, count, byte_count; 1986 __u16 params, param_offset, offset, count, byte_count;
1982 1987
1983 cFYI(1, ("Rename to File by handle")); 1988 cFYI(1, "Rename to File by handle");
1984 rc = smb_init(SMB_COM_TRANSACTION2, 15, pTcon, (void **) &pSMB, 1989 rc = smb_init(SMB_COM_TRANSACTION2, 15, pTcon, (void **) &pSMB,
1985 (void **) &pSMBr); 1990 (void **) &pSMBr);
1986 if (rc) 1991 if (rc)
@@ -2035,7 +2040,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
2035 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2040 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2036 cifs_stats_inc(&pTcon->num_t2renames); 2041 cifs_stats_inc(&pTcon->num_t2renames);
2037 if (rc) 2042 if (rc)
2038 cFYI(1, ("Send error in Rename (by file handle) = %d", rc)); 2043 cFYI(1, "Send error in Rename (by file handle) = %d", rc);
2039 2044
2040 cifs_buf_release(pSMB); 2045 cifs_buf_release(pSMB);
2041 2046
@@ -2057,7 +2062,7 @@ CIFSSMBCopy(const int xid, struct cifsTconInfo *tcon, const char *fromName,
2057 int name_len, name_len2; 2062 int name_len, name_len2;
2058 __u16 count; 2063 __u16 count;
2059 2064
2060 cFYI(1, ("In CIFSSMBCopy")); 2065 cFYI(1, "In CIFSSMBCopy");
2061copyRetry: 2066copyRetry:
2062 rc = smb_init(SMB_COM_COPY, 1, tcon, (void **) &pSMB, 2067 rc = smb_init(SMB_COM_COPY, 1, tcon, (void **) &pSMB,
2063 (void **) &pSMBr); 2068 (void **) &pSMBr);
@@ -2102,8 +2107,8 @@ copyRetry:
2102 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2107 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2103 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2108 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2104 if (rc) { 2109 if (rc) {
2105 cFYI(1, ("Send error in copy = %d with %d files copied", 2110 cFYI(1, "Send error in copy = %d with %d files copied",
2106 rc, le16_to_cpu(pSMBr->CopyCount))); 2111 rc, le16_to_cpu(pSMBr->CopyCount));
2107 } 2112 }
2108 cifs_buf_release(pSMB); 2113 cifs_buf_release(pSMB);
2109 2114
@@ -2127,7 +2132,7 @@ CIFSUnixCreateSymLink(const int xid, struct cifsTconInfo *tcon,
2127 int bytes_returned = 0; 2132 int bytes_returned = 0;
2128 __u16 params, param_offset, offset, byte_count; 2133 __u16 params, param_offset, offset, byte_count;
2129 2134
2130 cFYI(1, ("In Symlink Unix style")); 2135 cFYI(1, "In Symlink Unix style");
2131createSymLinkRetry: 2136createSymLinkRetry:
2132 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 2137 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
2133 (void **) &pSMBr); 2138 (void **) &pSMBr);
@@ -2192,7 +2197,7 @@ createSymLinkRetry:
2192 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2197 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2193 cifs_stats_inc(&tcon->num_symlinks); 2198 cifs_stats_inc(&tcon->num_symlinks);
2194 if (rc) 2199 if (rc)
2195 cFYI(1, ("Send error in SetPathInfo create symlink = %d", rc)); 2200 cFYI(1, "Send error in SetPathInfo create symlink = %d", rc);
2196 2201
2197 cifs_buf_release(pSMB); 2202 cifs_buf_release(pSMB);
2198 2203
@@ -2216,7 +2221,7 @@ CIFSUnixCreateHardLink(const int xid, struct cifsTconInfo *tcon,
2216 int bytes_returned = 0; 2221 int bytes_returned = 0;
2217 __u16 params, param_offset, offset, byte_count; 2222 __u16 params, param_offset, offset, byte_count;
2218 2223
2219 cFYI(1, ("In Create Hard link Unix style")); 2224 cFYI(1, "In Create Hard link Unix style");
2220createHardLinkRetry: 2225createHardLinkRetry:
2221 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 2226 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
2222 (void **) &pSMBr); 2227 (void **) &pSMBr);
@@ -2278,7 +2283,7 @@ createHardLinkRetry:
2278 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2283 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2279 cifs_stats_inc(&tcon->num_hardlinks); 2284 cifs_stats_inc(&tcon->num_hardlinks);
2280 if (rc) 2285 if (rc)
2281 cFYI(1, ("Send error in SetPathInfo (hard link) = %d", rc)); 2286 cFYI(1, "Send error in SetPathInfo (hard link) = %d", rc);
2282 2287
2283 cifs_buf_release(pSMB); 2288 cifs_buf_release(pSMB);
2284 if (rc == -EAGAIN) 2289 if (rc == -EAGAIN)
@@ -2299,7 +2304,7 @@ CIFSCreateHardLink(const int xid, struct cifsTconInfo *tcon,
2299 int name_len, name_len2; 2304 int name_len, name_len2;
2300 __u16 count; 2305 __u16 count;
2301 2306
2302 cFYI(1, ("In CIFSCreateHardLink")); 2307 cFYI(1, "In CIFSCreateHardLink");
2303winCreateHardLinkRetry: 2308winCreateHardLinkRetry:
2304 2309
2305 rc = smb_init(SMB_COM_NT_RENAME, 4, tcon, (void **) &pSMB, 2310 rc = smb_init(SMB_COM_NT_RENAME, 4, tcon, (void **) &pSMB,
@@ -2350,7 +2355,7 @@ winCreateHardLinkRetry:
2350 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2355 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2351 cifs_stats_inc(&tcon->num_hardlinks); 2356 cifs_stats_inc(&tcon->num_hardlinks);
2352 if (rc) 2357 if (rc)
2353 cFYI(1, ("Send error in hard link (NT rename) = %d", rc)); 2358 cFYI(1, "Send error in hard link (NT rename) = %d", rc);
2354 2359
2355 cifs_buf_release(pSMB); 2360 cifs_buf_release(pSMB);
2356 if (rc == -EAGAIN) 2361 if (rc == -EAGAIN)
@@ -2373,7 +2378,7 @@ CIFSSMBUnixQuerySymLink(const int xid, struct cifsTconInfo *tcon,
2373 __u16 params, byte_count; 2378 __u16 params, byte_count;
2374 char *data_start; 2379 char *data_start;
2375 2380
2376 cFYI(1, ("In QPathSymLinkInfo (Unix) for path %s", searchName)); 2381 cFYI(1, "In QPathSymLinkInfo (Unix) for path %s", searchName);
2377 2382
2378querySymLinkRetry: 2383querySymLinkRetry:
2379 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 2384 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
@@ -2420,7 +2425,7 @@ querySymLinkRetry:
2420 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2425 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2421 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2426 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2422 if (rc) { 2427 if (rc) {
2423 cFYI(1, ("Send error in QuerySymLinkInfo = %d", rc)); 2428 cFYI(1, "Send error in QuerySymLinkInfo = %d", rc);
2424 } else { 2429 } else {
2425 /* decode response */ 2430 /* decode response */
2426 2431
@@ -2521,21 +2526,21 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
2521 2526
2522 /* should we also check that parm and data areas do not overlap? */ 2527 /* should we also check that parm and data areas do not overlap? */
2523 if (*ppparm > end_of_smb) { 2528 if (*ppparm > end_of_smb) {
2524 cFYI(1, ("parms start after end of smb")); 2529 cFYI(1, "parms start after end of smb");
2525 return -EINVAL; 2530 return -EINVAL;
2526 } else if (parm_count + *ppparm > end_of_smb) { 2531 } else if (parm_count + *ppparm > end_of_smb) {
2527 cFYI(1, ("parm end after end of smb")); 2532 cFYI(1, "parm end after end of smb");
2528 return -EINVAL; 2533 return -EINVAL;
2529 } else if (*ppdata > end_of_smb) { 2534 } else if (*ppdata > end_of_smb) {
2530 cFYI(1, ("data starts after end of smb")); 2535 cFYI(1, "data starts after end of smb");
2531 return -EINVAL; 2536 return -EINVAL;
2532 } else if (data_count + *ppdata > end_of_smb) { 2537 } else if (data_count + *ppdata > end_of_smb) {
2533 cFYI(1, ("data %p + count %d (%p) ends after end of smb %p start %p", 2538 cFYI(1, "data %p + count %d (%p) past smb end %p start %p",
2534 *ppdata, data_count, (data_count + *ppdata), 2539 *ppdata, data_count, (data_count + *ppdata),
2535 end_of_smb, pSMBr)); 2540 end_of_smb, pSMBr);
2536 return -EINVAL; 2541 return -EINVAL;
2537 } else if (parm_count + data_count > pSMBr->ByteCount) { 2542 } else if (parm_count + data_count > pSMBr->ByteCount) {
2538 cFYI(1, ("parm count and data count larger than SMB")); 2543 cFYI(1, "parm count and data count larger than SMB");
2539 return -EINVAL; 2544 return -EINVAL;
2540 } 2545 }
2541 *pdatalen = data_count; 2546 *pdatalen = data_count;
@@ -2554,7 +2559,7 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2554 struct smb_com_transaction_ioctl_req *pSMB; 2559 struct smb_com_transaction_ioctl_req *pSMB;
2555 struct smb_com_transaction_ioctl_rsp *pSMBr; 2560 struct smb_com_transaction_ioctl_rsp *pSMBr;
2556 2561
2557 cFYI(1, ("In Windows reparse style QueryLink for path %s", searchName)); 2562 cFYI(1, "In Windows reparse style QueryLink for path %s", searchName);
2558 rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB, 2563 rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
2559 (void **) &pSMBr); 2564 (void **) &pSMBr);
2560 if (rc) 2565 if (rc)
@@ -2583,7 +2588,7 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2583 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2588 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2584 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2589 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2585 if (rc) { 2590 if (rc) {
2586 cFYI(1, ("Send error in QueryReparseLinkInfo = %d", rc)); 2591 cFYI(1, "Send error in QueryReparseLinkInfo = %d", rc);
2587 } else { /* decode response */ 2592 } else { /* decode response */
2588 __u32 data_offset = le32_to_cpu(pSMBr->DataOffset); 2593 __u32 data_offset = le32_to_cpu(pSMBr->DataOffset);
2589 __u32 data_count = le32_to_cpu(pSMBr->DataCount); 2594 __u32 data_count = le32_to_cpu(pSMBr->DataCount);
@@ -2607,7 +2612,7 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2607 if ((reparse_buf->LinkNamesBuf + 2612 if ((reparse_buf->LinkNamesBuf +
2608 reparse_buf->TargetNameOffset + 2613 reparse_buf->TargetNameOffset +
2609 reparse_buf->TargetNameLen) > end_of_smb) { 2614 reparse_buf->TargetNameLen) > end_of_smb) {
2610 cFYI(1, ("reparse buf beyond SMB")); 2615 cFYI(1, "reparse buf beyond SMB");
2611 rc = -EIO; 2616 rc = -EIO;
2612 goto qreparse_out; 2617 goto qreparse_out;
2613 } 2618 }
@@ -2628,12 +2633,12 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2628 } 2633 }
2629 } else { 2634 } else {
2630 rc = -EIO; 2635 rc = -EIO;
2631 cFYI(1, ("Invalid return data count on " 2636 cFYI(1, "Invalid return data count on "
2632 "get reparse info ioctl")); 2637 "get reparse info ioctl");
2633 } 2638 }
2634 symlinkinfo[buflen] = 0; /* just in case so the caller 2639 symlinkinfo[buflen] = 0; /* just in case so the caller
2635 does not go off the end of the buffer */ 2640 does not go off the end of the buffer */
2636 cFYI(1, ("readlink result - %s", symlinkinfo)); 2641 cFYI(1, "readlink result - %s", symlinkinfo);
2637 } 2642 }
2638 2643
2639qreparse_out: 2644qreparse_out:
@@ -2656,7 +2661,7 @@ static void cifs_convert_ace(posix_acl_xattr_entry *ace,
2656 ace->e_perm = cpu_to_le16(cifs_ace->cifs_e_perm); 2661 ace->e_perm = cpu_to_le16(cifs_ace->cifs_e_perm);
2657 ace->e_tag = cpu_to_le16(cifs_ace->cifs_e_tag); 2662 ace->e_tag = cpu_to_le16(cifs_ace->cifs_e_tag);
2658 ace->e_id = cpu_to_le32(le64_to_cpu(cifs_ace->cifs_uid)); 2663 ace->e_id = cpu_to_le32(le64_to_cpu(cifs_ace->cifs_uid));
2659 /* cFYI(1,("perm %d tag %d id %d",ace->e_perm,ace->e_tag,ace->e_id)); */ 2664 /* cFYI(1, "perm %d tag %d id %d",ace->e_perm,ace->e_tag,ace->e_id); */
2660 2665
2661 return; 2666 return;
2662} 2667}
@@ -2682,8 +2687,8 @@ static int cifs_copy_posix_acl(char *trgt, char *src, const int buflen,
2682 size += sizeof(struct cifs_posix_ace) * count; 2687 size += sizeof(struct cifs_posix_ace) * count;
2683 /* check if we would go beyond end of SMB */ 2688 /* check if we would go beyond end of SMB */
2684 if (size_of_data_area < size) { 2689 if (size_of_data_area < size) {
2685 cFYI(1, ("bad CIFS POSIX ACL size %d vs. %d", 2690 cFYI(1, "bad CIFS POSIX ACL size %d vs. %d",
2686 size_of_data_area, size)); 2691 size_of_data_area, size);
2687 return -EINVAL; 2692 return -EINVAL;
2688 } 2693 }
2689 } else if (acl_type & ACL_TYPE_DEFAULT) { 2694 } else if (acl_type & ACL_TYPE_DEFAULT) {
@@ -2730,7 +2735,7 @@ static __u16 convert_ace_to_cifs_ace(struct cifs_posix_ace *cifs_ace,
2730 cifs_ace->cifs_uid = cpu_to_le64(-1); 2735 cifs_ace->cifs_uid = cpu_to_le64(-1);
2731 } else 2736 } else
2732 cifs_ace->cifs_uid = cpu_to_le64(le32_to_cpu(local_ace->e_id)); 2737 cifs_ace->cifs_uid = cpu_to_le64(le32_to_cpu(local_ace->e_id));
2733 /*cFYI(1,("perm %d tag %d id %d",ace->e_perm,ace->e_tag,ace->e_id));*/ 2738 /*cFYI(1, "perm %d tag %d id %d",ace->e_perm,ace->e_tag,ace->e_id);*/
2734 return rc; 2739 return rc;
2735} 2740}
2736 2741
@@ -2748,12 +2753,12 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
2748 return 0; 2753 return 0;
2749 2754
2750 count = posix_acl_xattr_count((size_t)buflen); 2755 count = posix_acl_xattr_count((size_t)buflen);
2751 cFYI(1, ("setting acl with %d entries from buf of length %d and " 2756 cFYI(1, "setting acl with %d entries from buf of length %d and "
2752 "version of %d", 2757 "version of %d",
2753 count, buflen, le32_to_cpu(local_acl->a_version))); 2758 count, buflen, le32_to_cpu(local_acl->a_version));
2754 if (le32_to_cpu(local_acl->a_version) != 2) { 2759 if (le32_to_cpu(local_acl->a_version) != 2) {
2755 cFYI(1, ("unknown POSIX ACL version %d", 2760 cFYI(1, "unknown POSIX ACL version %d",
2756 le32_to_cpu(local_acl->a_version))); 2761 le32_to_cpu(local_acl->a_version));
2757 return 0; 2762 return 0;
2758 } 2763 }
2759 cifs_acl->version = cpu_to_le16(1); 2764 cifs_acl->version = cpu_to_le16(1);
@@ -2762,7 +2767,7 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
2762 else if (acl_type == ACL_TYPE_DEFAULT) 2767 else if (acl_type == ACL_TYPE_DEFAULT)
2763 cifs_acl->default_entry_count = cpu_to_le16(count); 2768 cifs_acl->default_entry_count = cpu_to_le16(count);
2764 else { 2769 else {
2765 cFYI(1, ("unknown ACL type %d", acl_type)); 2770 cFYI(1, "unknown ACL type %d", acl_type);
2766 return 0; 2771 return 0;
2767 } 2772 }
2768 for (i = 0; i < count; i++) { 2773 for (i = 0; i < count; i++) {
@@ -2795,7 +2800,7 @@ CIFSSMBGetPosixACL(const int xid, struct cifsTconInfo *tcon,
2795 int name_len; 2800 int name_len;
2796 __u16 params, byte_count; 2801 __u16 params, byte_count;
2797 2802
2798 cFYI(1, ("In GetPosixACL (Unix) for path %s", searchName)); 2803 cFYI(1, "In GetPosixACL (Unix) for path %s", searchName);
2799 2804
2800queryAclRetry: 2805queryAclRetry:
2801 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 2806 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
@@ -2847,7 +2852,7 @@ queryAclRetry:
2847 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2852 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2848 cifs_stats_inc(&tcon->num_acl_get); 2853 cifs_stats_inc(&tcon->num_acl_get);
2849 if (rc) { 2854 if (rc) {
2850 cFYI(1, ("Send error in Query POSIX ACL = %d", rc)); 2855 cFYI(1, "Send error in Query POSIX ACL = %d", rc);
2851 } else { 2856 } else {
2852 /* decode response */ 2857 /* decode response */
2853 2858
@@ -2884,7 +2889,7 @@ CIFSSMBSetPosixACL(const int xid, struct cifsTconInfo *tcon,
2884 int bytes_returned = 0; 2889 int bytes_returned = 0;
2885 __u16 params, byte_count, data_count, param_offset, offset; 2890 __u16 params, byte_count, data_count, param_offset, offset;
2886 2891
2887 cFYI(1, ("In SetPosixACL (Unix) for path %s", fileName)); 2892 cFYI(1, "In SetPosixACL (Unix) for path %s", fileName);
2888setAclRetry: 2893setAclRetry:
2889 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 2894 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
2890 (void **) &pSMBr); 2895 (void **) &pSMBr);
@@ -2939,7 +2944,7 @@ setAclRetry:
2939 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2944 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2940 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2945 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
2941 if (rc) 2946 if (rc)
2942 cFYI(1, ("Set POSIX ACL returned %d", rc)); 2947 cFYI(1, "Set POSIX ACL returned %d", rc);
2943 2948
2944setACLerrorExit: 2949setACLerrorExit:
2945 cifs_buf_release(pSMB); 2950 cifs_buf_release(pSMB);
@@ -2959,7 +2964,7 @@ CIFSGetExtAttr(const int xid, struct cifsTconInfo *tcon,
2959 int bytes_returned; 2964 int bytes_returned;
2960 __u16 params, byte_count; 2965 __u16 params, byte_count;
2961 2966
2962 cFYI(1, ("In GetExtAttr")); 2967 cFYI(1, "In GetExtAttr");
2963 if (tcon == NULL) 2968 if (tcon == NULL)
2964 return -ENODEV; 2969 return -ENODEV;
2965 2970
@@ -2998,7 +3003,7 @@ GetExtAttrRetry:
2998 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3003 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2999 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3004 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
3000 if (rc) { 3005 if (rc) {
3001 cFYI(1, ("error %d in GetExtAttr", rc)); 3006 cFYI(1, "error %d in GetExtAttr", rc);
3002 } else { 3007 } else {
3003 /* decode response */ 3008 /* decode response */
3004 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3009 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
@@ -3013,7 +3018,7 @@ GetExtAttrRetry:
3013 struct file_chattr_info *pfinfo; 3018 struct file_chattr_info *pfinfo;
3014 /* BB Do we need a cast or hash here ? */ 3019 /* BB Do we need a cast or hash here ? */
3015 if (count != 16) { 3020 if (count != 16) {
3016 cFYI(1, ("Illegal size ret in GetExtAttr")); 3021 cFYI(1, "Illegal size ret in GetExtAttr");
3017 rc = -EIO; 3022 rc = -EIO;
3018 goto GetExtAttrOut; 3023 goto GetExtAttrOut;
3019 } 3024 }
@@ -3043,7 +3048,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3043 QUERY_SEC_DESC_REQ *pSMB; 3048 QUERY_SEC_DESC_REQ *pSMB;
3044 struct kvec iov[1]; 3049 struct kvec iov[1];
3045 3050
3046 cFYI(1, ("GetCifsACL")); 3051 cFYI(1, "GetCifsACL");
3047 3052
3048 *pbuflen = 0; 3053 *pbuflen = 0;
3049 *acl_inf = NULL; 3054 *acl_inf = NULL;
@@ -3068,7 +3073,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3068 CIFS_STD_OP); 3073 CIFS_STD_OP);
3069 cifs_stats_inc(&tcon->num_acl_get); 3074 cifs_stats_inc(&tcon->num_acl_get);
3070 if (rc) { 3075 if (rc) {
3071 cFYI(1, ("Send error in QuerySecDesc = %d", rc)); 3076 cFYI(1, "Send error in QuerySecDesc = %d", rc);
3072 } else { /* decode response */ 3077 } else { /* decode response */
3073 __le32 *parm; 3078 __le32 *parm;
3074 __u32 parm_len; 3079 __u32 parm_len;
@@ -3083,7 +3088,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3083 goto qsec_out; 3088 goto qsec_out;
3084 pSMBr = (struct smb_com_ntransact_rsp *)iov[0].iov_base; 3089 pSMBr = (struct smb_com_ntransact_rsp *)iov[0].iov_base;
3085 3090
3086 cFYI(1, ("smb %p parm %p data %p", pSMBr, parm, *acl_inf)); 3091 cFYI(1, "smb %p parm %p data %p", pSMBr, parm, *acl_inf);
3087 3092
3088 if (le32_to_cpu(pSMBr->ParameterCount) != 4) { 3093 if (le32_to_cpu(pSMBr->ParameterCount) != 4) {
3089 rc = -EIO; /* bad smb */ 3094 rc = -EIO; /* bad smb */
@@ -3095,8 +3100,8 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3095 3100
3096 acl_len = le32_to_cpu(*parm); 3101 acl_len = le32_to_cpu(*parm);
3097 if (acl_len != *pbuflen) { 3102 if (acl_len != *pbuflen) {
3098 cERROR(1, ("acl length %d does not match %d", 3103 cERROR(1, "acl length %d does not match %d",
3099 acl_len, *pbuflen)); 3104 acl_len, *pbuflen);
3100 if (*pbuflen > acl_len) 3105 if (*pbuflen > acl_len)
3101 *pbuflen = acl_len; 3106 *pbuflen = acl_len;
3102 } 3107 }
@@ -3105,7 +3110,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3105 header followed by the smallest SID */ 3110 header followed by the smallest SID */
3106 if ((*pbuflen < sizeof(struct cifs_ntsd) + 8) || 3111 if ((*pbuflen < sizeof(struct cifs_ntsd) + 8) ||
3107 (*pbuflen >= 64 * 1024)) { 3112 (*pbuflen >= 64 * 1024)) {
3108 cERROR(1, ("bad acl length %d", *pbuflen)); 3113 cERROR(1, "bad acl length %d", *pbuflen);
3109 rc = -EINVAL; 3114 rc = -EINVAL;
3110 *pbuflen = 0; 3115 *pbuflen = 0;
3111 } else { 3116 } else {
@@ -3179,9 +3184,9 @@ setCifsAclRetry:
3179 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3184 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3180 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3185 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
3181 3186
3182 cFYI(1, ("SetCIFSACL bytes_returned: %d, rc: %d", bytes_returned, rc)); 3187 cFYI(1, "SetCIFSACL bytes_returned: %d, rc: %d", bytes_returned, rc);
3183 if (rc) 3188 if (rc)
3184 cFYI(1, ("Set CIFS ACL returned %d", rc)); 3189 cFYI(1, "Set CIFS ACL returned %d", rc);
3185 cifs_buf_release(pSMB); 3190 cifs_buf_release(pSMB);
3186 3191
3187 if (rc == -EAGAIN) 3192 if (rc == -EAGAIN)
@@ -3205,7 +3210,7 @@ int SMBQueryInformation(const int xid, struct cifsTconInfo *tcon,
3205 int bytes_returned; 3210 int bytes_returned;
3206 int name_len; 3211 int name_len;
3207 3212
3208 cFYI(1, ("In SMBQPath path %s", searchName)); 3213 cFYI(1, "In SMBQPath path %s", searchName);
3209QInfRetry: 3214QInfRetry:
3210 rc = smb_init(SMB_COM_QUERY_INFORMATION, 0, tcon, (void **) &pSMB, 3215 rc = smb_init(SMB_COM_QUERY_INFORMATION, 0, tcon, (void **) &pSMB,
3211 (void **) &pSMBr); 3216 (void **) &pSMBr);
@@ -3231,7 +3236,7 @@ QInfRetry:
3231 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3236 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3232 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3237 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
3233 if (rc) { 3238 if (rc) {
3234 cFYI(1, ("Send error in QueryInfo = %d", rc)); 3239 cFYI(1, "Send error in QueryInfo = %d", rc);
3235 } else if (pFinfo) { 3240 } else if (pFinfo) {
3236 struct timespec ts; 3241 struct timespec ts;
3237 __u32 time = le32_to_cpu(pSMBr->last_write_time); 3242 __u32 time = le32_to_cpu(pSMBr->last_write_time);
@@ -3305,7 +3310,7 @@ QFileInfoRetry:
3305 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3310 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3306 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3311 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
3307 if (rc) { 3312 if (rc) {
3308 cFYI(1, ("Send error in QPathInfo = %d", rc)); 3313 cFYI(1, "Send error in QPathInfo = %d", rc);
3309 } else { /* decode response */ 3314 } else { /* decode response */
3310 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3315 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3311 3316
@@ -3343,7 +3348,7 @@ CIFSSMBQPathInfo(const int xid, struct cifsTconInfo *tcon,
3343 int name_len; 3348 int name_len;
3344 __u16 params, byte_count; 3349 __u16 params, byte_count;
3345 3350
3346/* cFYI(1, ("In QPathInfo path %s", searchName)); */ 3351/* cFYI(1, "In QPathInfo path %s", searchName); */
3347QPathInfoRetry: 3352QPathInfoRetry:
3348 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 3353 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
3349 (void **) &pSMBr); 3354 (void **) &pSMBr);
@@ -3393,7 +3398,7 @@ QPathInfoRetry:
3393 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3398 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3394 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3399 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
3395 if (rc) { 3400 if (rc) {
3396 cFYI(1, ("Send error in QPathInfo = %d", rc)); 3401 cFYI(1, "Send error in QPathInfo = %d", rc);
3397 } else { /* decode response */ 3402 } else { /* decode response */
3398 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3403 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3399 3404
@@ -3473,14 +3478,14 @@ UnixQFileInfoRetry:
3473 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3478 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3474 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3479 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
3475 if (rc) { 3480 if (rc) {
3476 cFYI(1, ("Send error in QPathInfo = %d", rc)); 3481 cFYI(1, "Send error in QPathInfo = %d", rc);
3477 } else { /* decode response */ 3482 } else { /* decode response */
3478 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3483 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3479 3484
3480 if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) { 3485 if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) {
3481 cERROR(1, ("Malformed FILE_UNIX_BASIC_INFO response.\n" 3486 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n"
3482 "Unix Extensions can be disabled on mount " 3487 "Unix Extensions can be disabled on mount "
3483 "by specifying the nosfu mount option.")); 3488 "by specifying the nosfu mount option.");
3484 rc = -EIO; /* bad smb */ 3489 rc = -EIO; /* bad smb */
3485 } else { 3490 } else {
3486 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 3491 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -3512,7 +3517,7 @@ CIFSSMBUnixQPathInfo(const int xid, struct cifsTconInfo *tcon,
3512 int name_len; 3517 int name_len;
3513 __u16 params, byte_count; 3518 __u16 params, byte_count;
3514 3519
3515 cFYI(1, ("In QPathInfo (Unix) the path %s", searchName)); 3520 cFYI(1, "In QPathInfo (Unix) the path %s", searchName);
3516UnixQPathInfoRetry: 3521UnixQPathInfoRetry:
3517 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 3522 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
3518 (void **) &pSMBr); 3523 (void **) &pSMBr);
@@ -3559,14 +3564,14 @@ UnixQPathInfoRetry:
3559 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3564 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3560 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3565 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
3561 if (rc) { 3566 if (rc) {
3562 cFYI(1, ("Send error in QPathInfo = %d", rc)); 3567 cFYI(1, "Send error in QPathInfo = %d", rc);
3563 } else { /* decode response */ 3568 } else { /* decode response */
3564 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3569 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3565 3570
3566 if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) { 3571 if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) {
3567 cERROR(1, ("Malformed FILE_UNIX_BASIC_INFO response.\n" 3572 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n"
3568 "Unix Extensions can be disabled on mount " 3573 "Unix Extensions can be disabled on mount "
3569 "by specifying the nosfu mount option.")); 3574 "by specifying the nosfu mount option.");
3570 rc = -EIO; /* bad smb */ 3575 rc = -EIO; /* bad smb */
3571 } else { 3576 } else {
3572 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 3577 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -3600,7 +3605,7 @@ CIFSFindFirst(const int xid, struct cifsTconInfo *tcon,
3600 int name_len; 3605 int name_len;
3601 __u16 params, byte_count; 3606 __u16 params, byte_count;
3602 3607
3603 cFYI(1, ("In FindFirst for %s", searchName)); 3608 cFYI(1, "In FindFirst for %s", searchName);
3604 3609
3605findFirstRetry: 3610findFirstRetry:
3606 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 3611 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
@@ -3677,7 +3682,7 @@ findFirstRetry:
3677 if (rc) {/* BB add logic to retry regular search if Unix search 3682 if (rc) {/* BB add logic to retry regular search if Unix search
3678 rejected unexpectedly by server */ 3683 rejected unexpectedly by server */
3679 /* BB Add code to handle unsupported level rc */ 3684 /* BB Add code to handle unsupported level rc */
3680 cFYI(1, ("Error in FindFirst = %d", rc)); 3685 cFYI(1, "Error in FindFirst = %d", rc);
3681 3686
3682 cifs_buf_release(pSMB); 3687 cifs_buf_release(pSMB);
3683 3688
@@ -3716,7 +3721,7 @@ findFirstRetry:
3716 lnoff = le16_to_cpu(parms->LastNameOffset); 3721 lnoff = le16_to_cpu(parms->LastNameOffset);
3717 if (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE < 3722 if (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE <
3718 lnoff) { 3723 lnoff) {
3719 cERROR(1, ("ignoring corrupt resume name")); 3724 cERROR(1, "ignoring corrupt resume name");
3720 psrch_inf->last_entry = NULL; 3725 psrch_inf->last_entry = NULL;
3721 return rc; 3726 return rc;
3722 } 3727 }
@@ -3744,7 +3749,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
3744 int bytes_returned, name_len; 3749 int bytes_returned, name_len;
3745 __u16 params, byte_count; 3750 __u16 params, byte_count;
3746 3751
3747 cFYI(1, ("In FindNext")); 3752 cFYI(1, "In FindNext");
3748 3753
3749 if (psrch_inf->endOfSearch) 3754 if (psrch_inf->endOfSearch)
3750 return -ENOENT; 3755 return -ENOENT;
@@ -3808,7 +3813,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
3808 cifs_buf_release(pSMB); 3813 cifs_buf_release(pSMB);
3809 rc = 0; /* search probably was closed at end of search*/ 3814 rc = 0; /* search probably was closed at end of search*/
3810 } else 3815 } else
3811 cFYI(1, ("FindNext returned = %d", rc)); 3816 cFYI(1, "FindNext returned = %d", rc);
3812 } else { /* decode response */ 3817 } else { /* decode response */
3813 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3818 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3814 3819
@@ -3844,15 +3849,15 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
3844 lnoff = le16_to_cpu(parms->LastNameOffset); 3849 lnoff = le16_to_cpu(parms->LastNameOffset);
3845 if (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE < 3850 if (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE <
3846 lnoff) { 3851 lnoff) {
3847 cERROR(1, ("ignoring corrupt resume name")); 3852 cERROR(1, "ignoring corrupt resume name");
3848 psrch_inf->last_entry = NULL; 3853 psrch_inf->last_entry = NULL;
3849 return rc; 3854 return rc;
3850 } else 3855 } else
3851 psrch_inf->last_entry = 3856 psrch_inf->last_entry =
3852 psrch_inf->srch_entries_start + lnoff; 3857 psrch_inf->srch_entries_start + lnoff;
3853 3858
3854/* cFYI(1,("fnxt2 entries in buf %d index_of_last %d", 3859/* cFYI(1, "fnxt2 entries in buf %d index_of_last %d",
3855 psrch_inf->entries_in_buffer, psrch_inf->index_of_last_entry)); */ 3860 psrch_inf->entries_in_buffer, psrch_inf->index_of_last_entry); */
3856 3861
3857 /* BB fixme add unlock here */ 3862 /* BB fixme add unlock here */
3858 } 3863 }
@@ -3877,7 +3882,7 @@ CIFSFindClose(const int xid, struct cifsTconInfo *tcon,
3877 int rc = 0; 3882 int rc = 0;
3878 FINDCLOSE_REQ *pSMB = NULL; 3883 FINDCLOSE_REQ *pSMB = NULL;
3879 3884
3880 cFYI(1, ("In CIFSSMBFindClose")); 3885 cFYI(1, "In CIFSSMBFindClose");
3881 rc = small_smb_init(SMB_COM_FIND_CLOSE2, 1, tcon, (void **)&pSMB); 3886 rc = small_smb_init(SMB_COM_FIND_CLOSE2, 1, tcon, (void **)&pSMB);
3882 3887
3883 /* no sense returning error if session restarted 3888 /* no sense returning error if session restarted
@@ -3891,7 +3896,7 @@ CIFSFindClose(const int xid, struct cifsTconInfo *tcon,
3891 pSMB->ByteCount = 0; 3896 pSMB->ByteCount = 0;
3892 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 3897 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
3893 if (rc) 3898 if (rc)
3894 cERROR(1, ("Send error in FindClose = %d", rc)); 3899 cERROR(1, "Send error in FindClose = %d", rc);
3895 3900
3896 cifs_stats_inc(&tcon->num_fclose); 3901 cifs_stats_inc(&tcon->num_fclose);
3897 3902
@@ -3914,7 +3919,7 @@ CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon,
3914 int name_len, bytes_returned; 3919 int name_len, bytes_returned;
3915 __u16 params, byte_count; 3920 __u16 params, byte_count;
3916 3921
3917 cFYI(1, ("In GetSrvInodeNum for %s", searchName)); 3922 cFYI(1, "In GetSrvInodeNum for %s", searchName);
3918 if (tcon == NULL) 3923 if (tcon == NULL)
3919 return -ENODEV; 3924 return -ENODEV;
3920 3925
@@ -3964,7 +3969,7 @@ GetInodeNumberRetry:
3964 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3969 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3965 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3970 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
3966 if (rc) { 3971 if (rc) {
3967 cFYI(1, ("error %d in QueryInternalInfo", rc)); 3972 cFYI(1, "error %d in QueryInternalInfo", rc);
3968 } else { 3973 } else {
3969 /* decode response */ 3974 /* decode response */
3970 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3975 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
@@ -3979,7 +3984,7 @@ GetInodeNumberRetry:
3979 struct file_internal_info *pfinfo; 3984 struct file_internal_info *pfinfo;
3980 /* BB Do we need a cast or hash here ? */ 3985 /* BB Do we need a cast or hash here ? */
3981 if (count < 8) { 3986 if (count < 8) {
3982 cFYI(1, ("Illegal size ret in QryIntrnlInf")); 3987 cFYI(1, "Illegal size ret in QryIntrnlInf");
3983 rc = -EIO; 3988 rc = -EIO;
3984 goto GetInodeNumOut; 3989 goto GetInodeNumOut;
3985 } 3990 }
@@ -4020,16 +4025,16 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
4020 *num_of_nodes = le16_to_cpu(pSMBr->NumberOfReferrals); 4025 *num_of_nodes = le16_to_cpu(pSMBr->NumberOfReferrals);
4021 4026
4022 if (*num_of_nodes < 1) { 4027 if (*num_of_nodes < 1) {
4023 cERROR(1, ("num_referrals: must be at least > 0," 4028 cERROR(1, "num_referrals: must be at least > 0,"
4024 "but we get num_referrals = %d\n", *num_of_nodes)); 4029 "but we get num_referrals = %d\n", *num_of_nodes);
4025 rc = -EINVAL; 4030 rc = -EINVAL;
4026 goto parse_DFS_referrals_exit; 4031 goto parse_DFS_referrals_exit;
4027 } 4032 }
4028 4033
4029 ref = (struct dfs_referral_level_3 *) &(pSMBr->referrals); 4034 ref = (struct dfs_referral_level_3 *) &(pSMBr->referrals);
4030 if (ref->VersionNumber != cpu_to_le16(3)) { 4035 if (ref->VersionNumber != cpu_to_le16(3)) {
4031 cERROR(1, ("Referrals of V%d version are not supported," 4036 cERROR(1, "Referrals of V%d version are not supported,"
4032 "should be V3", le16_to_cpu(ref->VersionNumber))); 4037 "should be V3", le16_to_cpu(ref->VersionNumber));
4033 rc = -EINVAL; 4038 rc = -EINVAL;
4034 goto parse_DFS_referrals_exit; 4039 goto parse_DFS_referrals_exit;
4035 } 4040 }
@@ -4038,14 +4043,14 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
4038 data_end = (char *)(&(pSMBr->PathConsumed)) + 4043 data_end = (char *)(&(pSMBr->PathConsumed)) +
4039 le16_to_cpu(pSMBr->t2.DataCount); 4044 le16_to_cpu(pSMBr->t2.DataCount);
4040 4045
4041 cFYI(1, ("num_referrals: %d dfs flags: 0x%x ... \n", 4046 cFYI(1, "num_referrals: %d dfs flags: 0x%x ...\n",
4042 *num_of_nodes, 4047 *num_of_nodes,
4043 le32_to_cpu(pSMBr->DFSFlags))); 4048 le32_to_cpu(pSMBr->DFSFlags));
4044 4049
4045 *target_nodes = kzalloc(sizeof(struct dfs_info3_param) * 4050 *target_nodes = kzalloc(sizeof(struct dfs_info3_param) *
4046 *num_of_nodes, GFP_KERNEL); 4051 *num_of_nodes, GFP_KERNEL);
4047 if (*target_nodes == NULL) { 4052 if (*target_nodes == NULL) {
4048 cERROR(1, ("Failed to allocate buffer for target_nodes\n")); 4053 cERROR(1, "Failed to allocate buffer for target_nodes\n");
4049 rc = -ENOMEM; 4054 rc = -ENOMEM;
4050 goto parse_DFS_referrals_exit; 4055 goto parse_DFS_referrals_exit;
4051 } 4056 }
@@ -4121,7 +4126,7 @@ CIFSGetDFSRefer(const int xid, struct cifsSesInfo *ses,
4121 *num_of_nodes = 0; 4126 *num_of_nodes = 0;
4122 *target_nodes = NULL; 4127 *target_nodes = NULL;
4123 4128
4124 cFYI(1, ("In GetDFSRefer the path %s", searchName)); 4129 cFYI(1, "In GetDFSRefer the path %s", searchName);
4125 if (ses == NULL) 4130 if (ses == NULL)
4126 return -ENODEV; 4131 return -ENODEV;
4127getDFSRetry: 4132getDFSRetry:
@@ -4188,7 +4193,7 @@ getDFSRetry:
4188 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, 4193 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
4189 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4194 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4190 if (rc) { 4195 if (rc) {
4191 cFYI(1, ("Send error in GetDFSRefer = %d", rc)); 4196 cFYI(1, "Send error in GetDFSRefer = %d", rc);
4192 goto GetDFSRefExit; 4197 goto GetDFSRefExit;
4193 } 4198 }
4194 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4199 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
@@ -4199,9 +4204,9 @@ getDFSRetry:
4199 goto GetDFSRefExit; 4204 goto GetDFSRefExit;
4200 } 4205 }
4201 4206
4202 cFYI(1, ("Decoding GetDFSRefer response BCC: %d Offset %d", 4207 cFYI(1, "Decoding GetDFSRefer response BCC: %d Offset %d",
4203 pSMBr->ByteCount, 4208 pSMBr->ByteCount,
4204 le16_to_cpu(pSMBr->t2.DataOffset))); 4209 le16_to_cpu(pSMBr->t2.DataOffset));
4205 4210
4206 /* parse returned result into more usable form */ 4211 /* parse returned result into more usable form */
4207 rc = parse_DFS_referrals(pSMBr, num_of_nodes, 4212 rc = parse_DFS_referrals(pSMBr, num_of_nodes,
@@ -4229,7 +4234,7 @@ SMBOldQFSInfo(const int xid, struct cifsTconInfo *tcon, struct kstatfs *FSData)
4229 int bytes_returned = 0; 4234 int bytes_returned = 0;
4230 __u16 params, byte_count; 4235 __u16 params, byte_count;
4231 4236
4232 cFYI(1, ("OldQFSInfo")); 4237 cFYI(1, "OldQFSInfo");
4233oldQFSInfoRetry: 4238oldQFSInfoRetry:
4234 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 4239 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
4235 (void **) &pSMBr); 4240 (void **) &pSMBr);
@@ -4262,7 +4267,7 @@ oldQFSInfoRetry:
4262 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4267 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4263 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4268 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4264 if (rc) { 4269 if (rc) {
4265 cFYI(1, ("Send error in QFSInfo = %d", rc)); 4270 cFYI(1, "Send error in QFSInfo = %d", rc);
4266 } else { /* decode response */ 4271 } else { /* decode response */
4267 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4272 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4268 4273
@@ -4270,8 +4275,8 @@ oldQFSInfoRetry:
4270 rc = -EIO; /* bad smb */ 4275 rc = -EIO; /* bad smb */
4271 else { 4276 else {
4272 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4277 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
4273 cFYI(1, ("qfsinf resp BCC: %d Offset %d", 4278 cFYI(1, "qfsinf resp BCC: %d Offset %d",
4274 pSMBr->ByteCount, data_offset)); 4279 pSMBr->ByteCount, data_offset);
4275 4280
4276 response_data = (FILE_SYSTEM_ALLOC_INFO *) 4281 response_data = (FILE_SYSTEM_ALLOC_INFO *)
4277 (((char *) &pSMBr->hdr.Protocol) + data_offset); 4282 (((char *) &pSMBr->hdr.Protocol) + data_offset);
@@ -4283,11 +4288,10 @@ oldQFSInfoRetry:
4283 le32_to_cpu(response_data->TotalAllocationUnits); 4288 le32_to_cpu(response_data->TotalAllocationUnits);
4284 FSData->f_bfree = FSData->f_bavail = 4289 FSData->f_bfree = FSData->f_bavail =
4285 le32_to_cpu(response_data->FreeAllocationUnits); 4290 le32_to_cpu(response_data->FreeAllocationUnits);
4286 cFYI(1, 4291 cFYI(1, "Blocks: %lld Free: %lld Block size %ld",
4287 ("Blocks: %lld Free: %lld Block size %ld", 4292 (unsigned long long)FSData->f_blocks,
4288 (unsigned long long)FSData->f_blocks, 4293 (unsigned long long)FSData->f_bfree,
4289 (unsigned long long)FSData->f_bfree, 4294 FSData->f_bsize);
4290 FSData->f_bsize));
4291 } 4295 }
4292 } 4296 }
4293 cifs_buf_release(pSMB); 4297 cifs_buf_release(pSMB);
@@ -4309,7 +4313,7 @@ CIFSSMBQFSInfo(const int xid, struct cifsTconInfo *tcon, struct kstatfs *FSData)
4309 int bytes_returned = 0; 4313 int bytes_returned = 0;
4310 __u16 params, byte_count; 4314 __u16 params, byte_count;
4311 4315
4312 cFYI(1, ("In QFSInfo")); 4316 cFYI(1, "In QFSInfo");
4313QFSInfoRetry: 4317QFSInfoRetry:
4314 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 4318 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
4315 (void **) &pSMBr); 4319 (void **) &pSMBr);
@@ -4342,7 +4346,7 @@ QFSInfoRetry:
4342 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4346 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4343 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4347 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4344 if (rc) { 4348 if (rc) {
4345 cFYI(1, ("Send error in QFSInfo = %d", rc)); 4349 cFYI(1, "Send error in QFSInfo = %d", rc);
4346 } else { /* decode response */ 4350 } else { /* decode response */
4347 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4351 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4348 4352
@@ -4363,11 +4367,10 @@ QFSInfoRetry:
4363 le64_to_cpu(response_data->TotalAllocationUnits); 4367 le64_to_cpu(response_data->TotalAllocationUnits);
4364 FSData->f_bfree = FSData->f_bavail = 4368 FSData->f_bfree = FSData->f_bavail =
4365 le64_to_cpu(response_data->FreeAllocationUnits); 4369 le64_to_cpu(response_data->FreeAllocationUnits);
4366 cFYI(1, 4370 cFYI(1, "Blocks: %lld Free: %lld Block size %ld",
4367 ("Blocks: %lld Free: %lld Block size %ld", 4371 (unsigned long long)FSData->f_blocks,
4368 (unsigned long long)FSData->f_blocks, 4372 (unsigned long long)FSData->f_bfree,
4369 (unsigned long long)FSData->f_bfree, 4373 FSData->f_bsize);
4370 FSData->f_bsize));
4371 } 4374 }
4372 } 4375 }
4373 cifs_buf_release(pSMB); 4376 cifs_buf_release(pSMB);
@@ -4389,7 +4392,7 @@ CIFSSMBQFSAttributeInfo(const int xid, struct cifsTconInfo *tcon)
4389 int bytes_returned = 0; 4392 int bytes_returned = 0;
4390 __u16 params, byte_count; 4393 __u16 params, byte_count;
4391 4394
4392 cFYI(1, ("In QFSAttributeInfo")); 4395 cFYI(1, "In QFSAttributeInfo");
4393QFSAttributeRetry: 4396QFSAttributeRetry:
4394 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 4397 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
4395 (void **) &pSMBr); 4398 (void **) &pSMBr);
@@ -4423,7 +4426,7 @@ QFSAttributeRetry:
4423 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4426 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4424 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4427 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4425 if (rc) { 4428 if (rc) {
4426 cERROR(1, ("Send error in QFSAttributeInfo = %d", rc)); 4429 cERROR(1, "Send error in QFSAttributeInfo = %d", rc);
4427 } else { /* decode response */ 4430 } else { /* decode response */
4428 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4431 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4429 4432
@@ -4459,7 +4462,7 @@ CIFSSMBQFSDeviceInfo(const int xid, struct cifsTconInfo *tcon)
4459 int bytes_returned = 0; 4462 int bytes_returned = 0;
4460 __u16 params, byte_count; 4463 __u16 params, byte_count;
4461 4464
4462 cFYI(1, ("In QFSDeviceInfo")); 4465 cFYI(1, "In QFSDeviceInfo");
4463QFSDeviceRetry: 4466QFSDeviceRetry:
4464 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 4467 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
4465 (void **) &pSMBr); 4468 (void **) &pSMBr);
@@ -4494,7 +4497,7 @@ QFSDeviceRetry:
4494 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4497 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4495 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4498 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4496 if (rc) { 4499 if (rc) {
4497 cFYI(1, ("Send error in QFSDeviceInfo = %d", rc)); 4500 cFYI(1, "Send error in QFSDeviceInfo = %d", rc);
4498 } else { /* decode response */ 4501 } else { /* decode response */
4499 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4502 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4500 4503
@@ -4529,7 +4532,7 @@ CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon)
4529 int bytes_returned = 0; 4532 int bytes_returned = 0;
4530 __u16 params, byte_count; 4533 __u16 params, byte_count;
4531 4534
4532 cFYI(1, ("In QFSUnixInfo")); 4535 cFYI(1, "In QFSUnixInfo");
4533QFSUnixRetry: 4536QFSUnixRetry:
4534 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 4537 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
4535 (void **) &pSMBr); 4538 (void **) &pSMBr);
@@ -4563,7 +4566,7 @@ QFSUnixRetry:
4563 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4566 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4564 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4567 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4565 if (rc) { 4568 if (rc) {
4566 cERROR(1, ("Send error in QFSUnixInfo = %d", rc)); 4569 cERROR(1, "Send error in QFSUnixInfo = %d", rc);
4567 } else { /* decode response */ 4570 } else { /* decode response */
4568 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4571 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4569 4572
@@ -4598,7 +4601,7 @@ CIFSSMBSetFSUnixInfo(const int xid, struct cifsTconInfo *tcon, __u64 cap)
4598 int bytes_returned = 0; 4601 int bytes_returned = 0;
4599 __u16 params, param_offset, offset, byte_count; 4602 __u16 params, param_offset, offset, byte_count;
4600 4603
4601 cFYI(1, ("In SETFSUnixInfo")); 4604 cFYI(1, "In SETFSUnixInfo");
4602SETFSUnixRetry: 4605SETFSUnixRetry:
4603 /* BB switch to small buf init to save memory */ 4606 /* BB switch to small buf init to save memory */
4604 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 4607 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
@@ -4646,7 +4649,7 @@ SETFSUnixRetry:
4646 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4649 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4647 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4650 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4648 if (rc) { 4651 if (rc) {
4649 cERROR(1, ("Send error in SETFSUnixInfo = %d", rc)); 4652 cERROR(1, "Send error in SETFSUnixInfo = %d", rc);
4650 } else { /* decode response */ 4653 } else { /* decode response */
4651 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4654 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4652 if (rc) 4655 if (rc)
@@ -4674,7 +4677,7 @@ CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon,
4674 int bytes_returned = 0; 4677 int bytes_returned = 0;
4675 __u16 params, byte_count; 4678 __u16 params, byte_count;
4676 4679
4677 cFYI(1, ("In QFSPosixInfo")); 4680 cFYI(1, "In QFSPosixInfo");
4678QFSPosixRetry: 4681QFSPosixRetry:
4679 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 4682 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
4680 (void **) &pSMBr); 4683 (void **) &pSMBr);
@@ -4708,7 +4711,7 @@ QFSPosixRetry:
4708 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4711 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4709 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4712 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4710 if (rc) { 4713 if (rc) {
4711 cFYI(1, ("Send error in QFSUnixInfo = %d", rc)); 4714 cFYI(1, "Send error in QFSUnixInfo = %d", rc);
4712 } else { /* decode response */ 4715 } else { /* decode response */
4713 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4716 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4714 4717
@@ -4768,7 +4771,7 @@ CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon, const char *fileName,
4768 int bytes_returned = 0; 4771 int bytes_returned = 0;
4769 __u16 params, byte_count, data_count, param_offset, offset; 4772 __u16 params, byte_count, data_count, param_offset, offset;
4770 4773
4771 cFYI(1, ("In SetEOF")); 4774 cFYI(1, "In SetEOF");
4772SetEOFRetry: 4775SetEOFRetry:
4773 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 4776 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
4774 (void **) &pSMBr); 4777 (void **) &pSMBr);
@@ -4834,7 +4837,7 @@ SetEOFRetry:
4834 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4837 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
4835 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 4838 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
4836 if (rc) 4839 if (rc)
4837 cFYI(1, ("SetPathInfo (file size) returned %d", rc)); 4840 cFYI(1, "SetPathInfo (file size) returned %d", rc);
4838 4841
4839 cifs_buf_release(pSMB); 4842 cifs_buf_release(pSMB);
4840 4843
@@ -4854,8 +4857,8 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
4854 int rc = 0; 4857 int rc = 0;
4855 __u16 params, param_offset, offset, byte_count, count; 4858 __u16 params, param_offset, offset, byte_count, count;
4856 4859
4857 cFYI(1, ("SetFileSize (via SetFileInfo) %lld", 4860 cFYI(1, "SetFileSize (via SetFileInfo) %lld",
4858 (long long)size)); 4861 (long long)size);
4859 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); 4862 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB);
4860 4863
4861 if (rc) 4864 if (rc)
@@ -4914,9 +4917,7 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
4914 pSMB->ByteCount = cpu_to_le16(byte_count); 4917 pSMB->ByteCount = cpu_to_le16(byte_count);
4915 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 4918 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
4916 if (rc) { 4919 if (rc) {
4917 cFYI(1, 4920 cFYI(1, "Send error in SetFileInfo (SetFileSize) = %d", rc);
4918 ("Send error in SetFileInfo (SetFileSize) = %d",
4919 rc));
4920 } 4921 }
4921 4922
4922 /* Note: On -EAGAIN error only caller can retry on handle based calls 4923 /* Note: On -EAGAIN error only caller can retry on handle based calls
@@ -4940,7 +4941,7 @@ CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
4940 int rc = 0; 4941 int rc = 0;
4941 __u16 params, param_offset, offset, byte_count, count; 4942 __u16 params, param_offset, offset, byte_count, count;
4942 4943
4943 cFYI(1, ("Set Times (via SetFileInfo)")); 4944 cFYI(1, "Set Times (via SetFileInfo)");
4944 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); 4945 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB);
4945 4946
4946 if (rc) 4947 if (rc)
@@ -4985,7 +4986,7 @@ CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
4985 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO)); 4986 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
4986 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 4987 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
4987 if (rc) 4988 if (rc)
4988 cFYI(1, ("Send error in Set Time (SetFileInfo) = %d", rc)); 4989 cFYI(1, "Send error in Set Time (SetFileInfo) = %d", rc);
4989 4990
4990 /* Note: On -EAGAIN error only caller can retry on handle based calls 4991 /* Note: On -EAGAIN error only caller can retry on handle based calls
4991 since file handle passed in no longer valid */ 4992 since file handle passed in no longer valid */
@@ -5002,7 +5003,7 @@ CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon,
5002 int rc = 0; 5003 int rc = 0;
5003 __u16 params, param_offset, offset, byte_count, count; 5004 __u16 params, param_offset, offset, byte_count, count;
5004 5005
5005 cFYI(1, ("Set File Disposition (via SetFileInfo)")); 5006 cFYI(1, "Set File Disposition (via SetFileInfo)");
5006 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); 5007 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB);
5007 5008
5008 if (rc) 5009 if (rc)
@@ -5044,7 +5045,7 @@ CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon,
5044 *data_offset = delete_file ? 1 : 0; 5045 *data_offset = delete_file ? 1 : 0;
5045 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 5046 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
5046 if (rc) 5047 if (rc)
5047 cFYI(1, ("Send error in SetFileDisposition = %d", rc)); 5048 cFYI(1, "Send error in SetFileDisposition = %d", rc);
5048 5049
5049 return rc; 5050 return rc;
5050} 5051}
@@ -5062,7 +5063,7 @@ CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon,
5062 char *data_offset; 5063 char *data_offset;
5063 __u16 params, param_offset, offset, byte_count, count; 5064 __u16 params, param_offset, offset, byte_count, count;
5064 5065
5065 cFYI(1, ("In SetTimes")); 5066 cFYI(1, "In SetTimes");
5066 5067
5067SetTimesRetry: 5068SetTimesRetry:
5068 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 5069 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
@@ -5118,7 +5119,7 @@ SetTimesRetry:
5118 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5119 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5119 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5120 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
5120 if (rc) 5121 if (rc)
5121 cFYI(1, ("SetPathInfo (times) returned %d", rc)); 5122 cFYI(1, "SetPathInfo (times) returned %d", rc);
5122 5123
5123 cifs_buf_release(pSMB); 5124 cifs_buf_release(pSMB);
5124 5125
@@ -5143,7 +5144,7 @@ CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, char *fileName,
5143 int bytes_returned; 5144 int bytes_returned;
5144 int name_len; 5145 int name_len;
5145 5146
5146 cFYI(1, ("In SetAttrLegacy")); 5147 cFYI(1, "In SetAttrLegacy");
5147 5148
5148SetAttrLgcyRetry: 5149SetAttrLgcyRetry:
5149 rc = smb_init(SMB_COM_SETATTR, 8, tcon, (void **) &pSMB, 5150 rc = smb_init(SMB_COM_SETATTR, 8, tcon, (void **) &pSMB,
@@ -5169,7 +5170,7 @@ SetAttrLgcyRetry:
5169 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5170 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5170 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5171 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
5171 if (rc) 5172 if (rc)
5172 cFYI(1, ("Error in LegacySetAttr = %d", rc)); 5173 cFYI(1, "Error in LegacySetAttr = %d", rc);
5173 5174
5174 cifs_buf_release(pSMB); 5175 cifs_buf_release(pSMB);
5175 5176
@@ -5231,7 +5232,7 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
5231 int rc = 0; 5232 int rc = 0;
5232 u16 params, param_offset, offset, byte_count, count; 5233 u16 params, param_offset, offset, byte_count, count;
5233 5234
5234 cFYI(1, ("Set Unix Info (via SetFileInfo)")); 5235 cFYI(1, "Set Unix Info (via SetFileInfo)");
5235 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); 5236 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB);
5236 5237
5237 if (rc) 5238 if (rc)
@@ -5276,7 +5277,7 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
5276 5277
5277 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 5278 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
5278 if (rc) 5279 if (rc)
5279 cFYI(1, ("Send error in Set Time (SetFileInfo) = %d", rc)); 5280 cFYI(1, "Send error in Set Time (SetFileInfo) = %d", rc);
5280 5281
5281 /* Note: On -EAGAIN error only caller can retry on handle based calls 5282 /* Note: On -EAGAIN error only caller can retry on handle based calls
5282 since file handle passed in no longer valid */ 5283 since file handle passed in no longer valid */
@@ -5297,7 +5298,7 @@ CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
5297 FILE_UNIX_BASIC_INFO *data_offset; 5298 FILE_UNIX_BASIC_INFO *data_offset;
5298 __u16 params, param_offset, offset, count, byte_count; 5299 __u16 params, param_offset, offset, count, byte_count;
5299 5300
5300 cFYI(1, ("In SetUID/GID/Mode")); 5301 cFYI(1, "In SetUID/GID/Mode");
5301setPermsRetry: 5302setPermsRetry:
5302 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 5303 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
5303 (void **) &pSMBr); 5304 (void **) &pSMBr);
@@ -5353,7 +5354,7 @@ setPermsRetry:
5353 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5354 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5354 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5355 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
5355 if (rc) 5356 if (rc)
5356 cFYI(1, ("SetPathInfo (perms) returned %d", rc)); 5357 cFYI(1, "SetPathInfo (perms) returned %d", rc);
5357 5358
5358 cifs_buf_release(pSMB); 5359 cifs_buf_release(pSMB);
5359 if (rc == -EAGAIN) 5360 if (rc == -EAGAIN)
@@ -5372,7 +5373,7 @@ int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
5372 struct dir_notify_req *dnotify_req; 5373 struct dir_notify_req *dnotify_req;
5373 int bytes_returned; 5374 int bytes_returned;
5374 5375
5375 cFYI(1, ("In CIFSSMBNotify for file handle %d", (int)netfid)); 5376 cFYI(1, "In CIFSSMBNotify for file handle %d", (int)netfid);
5376 rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB, 5377 rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
5377 (void **) &pSMBr); 5378 (void **) &pSMBr);
5378 if (rc) 5379 if (rc)
@@ -5406,7 +5407,7 @@ int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
5406 (struct smb_hdr *)pSMBr, &bytes_returned, 5407 (struct smb_hdr *)pSMBr, &bytes_returned,
5407 CIFS_ASYNC_OP); 5408 CIFS_ASYNC_OP);
5408 if (rc) { 5409 if (rc) {
5409 cFYI(1, ("Error in Notify = %d", rc)); 5410 cFYI(1, "Error in Notify = %d", rc);
5410 } else { 5411 } else {
5411 /* Add file to outstanding requests */ 5412 /* Add file to outstanding requests */
5412 /* BB change to kmem cache alloc */ 5413 /* BB change to kmem cache alloc */
@@ -5462,7 +5463,7 @@ CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon,
5462 char *end_of_smb; 5463 char *end_of_smb;
5463 __u16 params, byte_count, data_offset; 5464 __u16 params, byte_count, data_offset;
5464 5465
5465 cFYI(1, ("In Query All EAs path %s", searchName)); 5466 cFYI(1, "In Query All EAs path %s", searchName);
5466QAllEAsRetry: 5467QAllEAsRetry:
5467 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 5468 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
5468 (void **) &pSMBr); 5469 (void **) &pSMBr);
@@ -5509,7 +5510,7 @@ QAllEAsRetry:
5509 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5510 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5510 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5511 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
5511 if (rc) { 5512 if (rc) {
5512 cFYI(1, ("Send error in QueryAllEAs = %d", rc)); 5513 cFYI(1, "Send error in QueryAllEAs = %d", rc);
5513 goto QAllEAsOut; 5514 goto QAllEAsOut;
5514 } 5515 }
5515 5516
@@ -5537,16 +5538,16 @@ QAllEAsRetry:
5537 (((char *) &pSMBr->hdr.Protocol) + data_offset); 5538 (((char *) &pSMBr->hdr.Protocol) + data_offset);
5538 5539
5539 list_len = le32_to_cpu(ea_response_data->list_len); 5540 list_len = le32_to_cpu(ea_response_data->list_len);
5540 cFYI(1, ("ea length %d", list_len)); 5541 cFYI(1, "ea length %d", list_len);
5541 if (list_len <= 8) { 5542 if (list_len <= 8) {
5542 cFYI(1, ("empty EA list returned from server")); 5543 cFYI(1, "empty EA list returned from server");
5543 goto QAllEAsOut; 5544 goto QAllEAsOut;
5544 } 5545 }
5545 5546
5546 /* make sure list_len doesn't go past end of SMB */ 5547 /* make sure list_len doesn't go past end of SMB */
5547 end_of_smb = (char *)pByteArea(&pSMBr->hdr) + BCC(&pSMBr->hdr); 5548 end_of_smb = (char *)pByteArea(&pSMBr->hdr) + BCC(&pSMBr->hdr);
5548 if ((char *)ea_response_data + list_len > end_of_smb) { 5549 if ((char *)ea_response_data + list_len > end_of_smb) {
5549 cFYI(1, ("EA list appears to go beyond SMB")); 5550 cFYI(1, "EA list appears to go beyond SMB");
5550 rc = -EIO; 5551 rc = -EIO;
5551 goto QAllEAsOut; 5552 goto QAllEAsOut;
5552 } 5553 }
@@ -5563,7 +5564,7 @@ QAllEAsRetry:
5563 temp_ptr += 4; 5564 temp_ptr += 4;
5564 /* make sure we can read name_len and value_len */ 5565 /* make sure we can read name_len and value_len */
5565 if (list_len < 0) { 5566 if (list_len < 0) {
5566 cFYI(1, ("EA entry goes beyond length of list")); 5567 cFYI(1, "EA entry goes beyond length of list");
5567 rc = -EIO; 5568 rc = -EIO;
5568 goto QAllEAsOut; 5569 goto QAllEAsOut;
5569 } 5570 }
@@ -5572,7 +5573,7 @@ QAllEAsRetry:
5572 value_len = le16_to_cpu(temp_fea->value_len); 5573 value_len = le16_to_cpu(temp_fea->value_len);
5573 list_len -= name_len + 1 + value_len; 5574 list_len -= name_len + 1 + value_len;
5574 if (list_len < 0) { 5575 if (list_len < 0) {
5575 cFYI(1, ("EA entry goes beyond length of list")); 5576 cFYI(1, "EA entry goes beyond length of list");
5576 rc = -EIO; 5577 rc = -EIO;
5577 goto QAllEAsOut; 5578 goto QAllEAsOut;
5578 } 5579 }
@@ -5639,7 +5640,7 @@ CIFSSMBSetEA(const int xid, struct cifsTconInfo *tcon, const char *fileName,
5639 int bytes_returned = 0; 5640 int bytes_returned = 0;
5640 __u16 params, param_offset, byte_count, offset, count; 5641 __u16 params, param_offset, byte_count, offset, count;
5641 5642
5642 cFYI(1, ("In SetEA")); 5643 cFYI(1, "In SetEA");
5643SetEARetry: 5644SetEARetry:
5644 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, 5645 rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
5645 (void **) &pSMBr); 5646 (void **) &pSMBr);
@@ -5721,7 +5722,7 @@ SetEARetry:
5721 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5722 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5722 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5723 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
5723 if (rc) 5724 if (rc)
5724 cFYI(1, ("SetPathInfo (EA) returned %d", rc)); 5725 cFYI(1, "SetPathInfo (EA) returned %d", rc);
5725 5726
5726 cifs_buf_release(pSMB); 5727 cifs_buf_release(pSMB);
5727 5728
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index d9566bf8f917..2208f06e4c45 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -102,6 +102,7 @@ struct smb_vol {
102 bool sockopt_tcp_nodelay:1; 102 bool sockopt_tcp_nodelay:1;
103 unsigned short int port; 103 unsigned short int port;
104 char *prepath; 104 char *prepath;
105 struct nls_table *local_nls;
105}; 106};
106 107
107static int ipv4_connect(struct TCP_Server_Info *server); 108static int ipv4_connect(struct TCP_Server_Info *server);
@@ -135,7 +136,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
135 spin_unlock(&GlobalMid_Lock); 136 spin_unlock(&GlobalMid_Lock);
136 server->maxBuf = 0; 137 server->maxBuf = 0;
137 138
138 cFYI(1, ("Reconnecting tcp session")); 139 cFYI(1, "Reconnecting tcp session");
139 140
140 /* before reconnecting the tcp session, mark the smb session (uid) 141 /* before reconnecting the tcp session, mark the smb session (uid)
141 and the tid bad so they are not used until reconnected */ 142 and the tid bad so they are not used until reconnected */
@@ -153,12 +154,12 @@ cifs_reconnect(struct TCP_Server_Info *server)
153 /* do not want to be sending data on a socket we are freeing */ 154 /* do not want to be sending data on a socket we are freeing */
154 mutex_lock(&server->srv_mutex); 155 mutex_lock(&server->srv_mutex);
155 if (server->ssocket) { 156 if (server->ssocket) {
156 cFYI(1, ("State: 0x%x Flags: 0x%lx", server->ssocket->state, 157 cFYI(1, "State: 0x%x Flags: 0x%lx", server->ssocket->state,
157 server->ssocket->flags)); 158 server->ssocket->flags);
158 kernel_sock_shutdown(server->ssocket, SHUT_WR); 159 kernel_sock_shutdown(server->ssocket, SHUT_WR);
159 cFYI(1, ("Post shutdown state: 0x%x Flags: 0x%lx", 160 cFYI(1, "Post shutdown state: 0x%x Flags: 0x%lx",
160 server->ssocket->state, 161 server->ssocket->state,
161 server->ssocket->flags)); 162 server->ssocket->flags);
162 sock_release(server->ssocket); 163 sock_release(server->ssocket);
163 server->ssocket = NULL; 164 server->ssocket = NULL;
164 } 165 }
@@ -187,7 +188,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
187 else 188 else
188 rc = ipv4_connect(server); 189 rc = ipv4_connect(server);
189 if (rc) { 190 if (rc) {
190 cFYI(1, ("reconnect error %d", rc)); 191 cFYI(1, "reconnect error %d", rc);
191 msleep(3000); 192 msleep(3000);
192 } else { 193 } else {
193 atomic_inc(&tcpSesReconnectCount); 194 atomic_inc(&tcpSesReconnectCount);
@@ -223,7 +224,7 @@ static int check2ndT2(struct smb_hdr *pSMB, unsigned int maxBufSize)
223 /* check for plausible wct, bcc and t2 data and parm sizes */ 224 /* check for plausible wct, bcc and t2 data and parm sizes */
224 /* check for parm and data offset going beyond end of smb */ 225 /* check for parm and data offset going beyond end of smb */
225 if (pSMB->WordCount != 10) { /* coalesce_t2 depends on this */ 226 if (pSMB->WordCount != 10) { /* coalesce_t2 depends on this */
226 cFYI(1, ("invalid transact2 word count")); 227 cFYI(1, "invalid transact2 word count");
227 return -EINVAL; 228 return -EINVAL;
228 } 229 }
229 230
@@ -237,15 +238,15 @@ static int check2ndT2(struct smb_hdr *pSMB, unsigned int maxBufSize)
237 if (remaining == 0) 238 if (remaining == 0)
238 return 0; 239 return 0;
239 else if (remaining < 0) { 240 else if (remaining < 0) {
240 cFYI(1, ("total data %d smaller than data in frame %d", 241 cFYI(1, "total data %d smaller than data in frame %d",
241 total_data_size, data_in_this_rsp)); 242 total_data_size, data_in_this_rsp);
242 return -EINVAL; 243 return -EINVAL;
243 } else { 244 } else {
244 cFYI(1, ("missing %d bytes from transact2, check next response", 245 cFYI(1, "missing %d bytes from transact2, check next response",
245 remaining)); 246 remaining);
246 if (total_data_size > maxBufSize) { 247 if (total_data_size > maxBufSize) {
247 cERROR(1, ("TotalDataSize %d is over maximum buffer %d", 248 cERROR(1, "TotalDataSize %d is over maximum buffer %d",
248 total_data_size, maxBufSize)); 249 total_data_size, maxBufSize);
249 return -EINVAL; 250 return -EINVAL;
250 } 251 }
251 return remaining; 252 return remaining;
@@ -267,7 +268,7 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
267 total_data_size = le16_to_cpu(pSMBt->t2_rsp.TotalDataCount); 268 total_data_size = le16_to_cpu(pSMBt->t2_rsp.TotalDataCount);
268 269
269 if (total_data_size != le16_to_cpu(pSMB2->t2_rsp.TotalDataCount)) { 270 if (total_data_size != le16_to_cpu(pSMB2->t2_rsp.TotalDataCount)) {
270 cFYI(1, ("total data size of primary and secondary t2 differ")); 271 cFYI(1, "total data size of primary and secondary t2 differ");
271 } 272 }
272 273
273 total_in_buf = le16_to_cpu(pSMBt->t2_rsp.DataCount); 274 total_in_buf = le16_to_cpu(pSMBt->t2_rsp.DataCount);
@@ -282,7 +283,7 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
282 283
283 total_in_buf2 = le16_to_cpu(pSMB2->t2_rsp.DataCount); 284 total_in_buf2 = le16_to_cpu(pSMB2->t2_rsp.DataCount);
284 if (remaining < total_in_buf2) { 285 if (remaining < total_in_buf2) {
285 cFYI(1, ("transact2 2nd response contains too much data")); 286 cFYI(1, "transact2 2nd response contains too much data");
286 } 287 }
287 288
288 /* find end of first SMB data area */ 289 /* find end of first SMB data area */
@@ -311,7 +312,7 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
311 pTargetSMB->smb_buf_length = byte_count; 312 pTargetSMB->smb_buf_length = byte_count;
312 313
313 if (remaining == total_in_buf2) { 314 if (remaining == total_in_buf2) {
314 cFYI(1, ("found the last secondary response")); 315 cFYI(1, "found the last secondary response");
315 return 0; /* we are done */ 316 return 0; /* we are done */
316 } else /* more responses to go */ 317 } else /* more responses to go */
317 return 1; 318 return 1;
@@ -339,7 +340,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
339 int reconnect; 340 int reconnect;
340 341
341 current->flags |= PF_MEMALLOC; 342 current->flags |= PF_MEMALLOC;
342 cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current))); 343 cFYI(1, "Demultiplex PID: %d", task_pid_nr(current));
343 344
344 length = atomic_inc_return(&tcpSesAllocCount); 345 length = atomic_inc_return(&tcpSesAllocCount);
345 if (length > 1) 346 if (length > 1)
@@ -353,7 +354,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
353 if (bigbuf == NULL) { 354 if (bigbuf == NULL) {
354 bigbuf = cifs_buf_get(); 355 bigbuf = cifs_buf_get();
355 if (!bigbuf) { 356 if (!bigbuf) {
356 cERROR(1, ("No memory for large SMB response")); 357 cERROR(1, "No memory for large SMB response");
357 msleep(3000); 358 msleep(3000);
358 /* retry will check if exiting */ 359 /* retry will check if exiting */
359 continue; 360 continue;
@@ -366,7 +367,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
366 if (smallbuf == NULL) { 367 if (smallbuf == NULL) {
367 smallbuf = cifs_small_buf_get(); 368 smallbuf = cifs_small_buf_get();
368 if (!smallbuf) { 369 if (!smallbuf) {
369 cERROR(1, ("No memory for SMB response")); 370 cERROR(1, "No memory for SMB response");
370 msleep(1000); 371 msleep(1000);
371 /* retry will check if exiting */ 372 /* retry will check if exiting */
372 continue; 373 continue;
@@ -391,9 +392,9 @@ incomplete_rcv:
391 if (server->tcpStatus == CifsExiting) { 392 if (server->tcpStatus == CifsExiting) {
392 break; 393 break;
393 } else if (server->tcpStatus == CifsNeedReconnect) { 394 } else if (server->tcpStatus == CifsNeedReconnect) {
394 cFYI(1, ("Reconnect after server stopped responding")); 395 cFYI(1, "Reconnect after server stopped responding");
395 cifs_reconnect(server); 396 cifs_reconnect(server);
396 cFYI(1, ("call to reconnect done")); 397 cFYI(1, "call to reconnect done");
397 csocket = server->ssocket; 398 csocket = server->ssocket;
398 continue; 399 continue;
399 } else if ((length == -ERESTARTSYS) || (length == -EAGAIN)) { 400 } else if ((length == -ERESTARTSYS) || (length == -EAGAIN)) {
@@ -411,7 +412,7 @@ incomplete_rcv:
411 continue; 412 continue;
412 } else if (length <= 0) { 413 } else if (length <= 0) {
413 if (server->tcpStatus == CifsNew) { 414 if (server->tcpStatus == CifsNew) {
414 cFYI(1, ("tcp session abend after SMBnegprot")); 415 cFYI(1, "tcp session abend after SMBnegprot");
415 /* some servers kill the TCP session rather than 416 /* some servers kill the TCP session rather than
416 returning an SMB negprot error, in which 417 returning an SMB negprot error, in which
417 case reconnecting here is not going to help, 418 case reconnecting here is not going to help,
@@ -419,18 +420,18 @@ incomplete_rcv:
419 break; 420 break;
420 } 421 }
421 if (!try_to_freeze() && (length == -EINTR)) { 422 if (!try_to_freeze() && (length == -EINTR)) {
422 cFYI(1, ("cifsd thread killed")); 423 cFYI(1, "cifsd thread killed");
423 break; 424 break;
424 } 425 }
425 cFYI(1, ("Reconnect after unexpected peek error %d", 426 cFYI(1, "Reconnect after unexpected peek error %d",
426 length)); 427 length);
427 cifs_reconnect(server); 428 cifs_reconnect(server);
428 csocket = server->ssocket; 429 csocket = server->ssocket;
429 wake_up(&server->response_q); 430 wake_up(&server->response_q);
430 continue; 431 continue;
431 } else if (length < pdu_length) { 432 } else if (length < pdu_length) {
432 cFYI(1, ("requested %d bytes but only got %d bytes", 433 cFYI(1, "requested %d bytes but only got %d bytes",
433 pdu_length, length)); 434 pdu_length, length);
434 pdu_length -= length; 435 pdu_length -= length;
435 msleep(1); 436 msleep(1);
436 goto incomplete_rcv; 437 goto incomplete_rcv;
@@ -450,18 +451,18 @@ incomplete_rcv:
450 pdu_length = be32_to_cpu((__force __be32)smb_buffer->smb_buf_length); 451 pdu_length = be32_to_cpu((__force __be32)smb_buffer->smb_buf_length);
451 smb_buffer->smb_buf_length = pdu_length; 452 smb_buffer->smb_buf_length = pdu_length;
452 453
453 cFYI(1, ("rfc1002 length 0x%x", pdu_length+4)); 454 cFYI(1, "rfc1002 length 0x%x", pdu_length+4);
454 455
455 if (temp == (char) RFC1002_SESSION_KEEP_ALIVE) { 456 if (temp == (char) RFC1002_SESSION_KEEP_ALIVE) {
456 continue; 457 continue;
457 } else if (temp == (char)RFC1002_POSITIVE_SESSION_RESPONSE) { 458 } else if (temp == (char)RFC1002_POSITIVE_SESSION_RESPONSE) {
458 cFYI(1, ("Good RFC 1002 session rsp")); 459 cFYI(1, "Good RFC 1002 session rsp");
459 continue; 460 continue;
460 } else if (temp == (char)RFC1002_NEGATIVE_SESSION_RESPONSE) { 461 } else if (temp == (char)RFC1002_NEGATIVE_SESSION_RESPONSE) {
461 /* we get this from Windows 98 instead of 462 /* we get this from Windows 98 instead of
462 an error on SMB negprot response */ 463 an error on SMB negprot response */
463 cFYI(1, ("Negative RFC1002 Session Response Error 0x%x)", 464 cFYI(1, "Negative RFC1002 Session Response Error 0x%x)",
464 pdu_length)); 465 pdu_length);
465 if (server->tcpStatus == CifsNew) { 466 if (server->tcpStatus == CifsNew) {
466 /* if nack on negprot (rather than 467 /* if nack on negprot (rather than
467 ret of smb negprot error) reconnecting 468 ret of smb negprot error) reconnecting
@@ -484,7 +485,7 @@ incomplete_rcv:
484 continue; 485 continue;
485 } 486 }
486 } else if (temp != (char) 0) { 487 } else if (temp != (char) 0) {
487 cERROR(1, ("Unknown RFC 1002 frame")); 488 cERROR(1, "Unknown RFC 1002 frame");
488 cifs_dump_mem(" Received Data: ", (char *)smb_buffer, 489 cifs_dump_mem(" Received Data: ", (char *)smb_buffer,
489 length); 490 length);
490 cifs_reconnect(server); 491 cifs_reconnect(server);
@@ -495,8 +496,8 @@ incomplete_rcv:
495 /* else we have an SMB response */ 496 /* else we have an SMB response */
496 if ((pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) || 497 if ((pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) ||
497 (pdu_length < sizeof(struct smb_hdr) - 1 - 4)) { 498 (pdu_length < sizeof(struct smb_hdr) - 1 - 4)) {
498 cERROR(1, ("Invalid size SMB length %d pdu_length %d", 499 cERROR(1, "Invalid size SMB length %d pdu_length %d",
499 length, pdu_length+4)); 500 length, pdu_length+4);
500 cifs_reconnect(server); 501 cifs_reconnect(server);
501 csocket = server->ssocket; 502 csocket = server->ssocket;
502 wake_up(&server->response_q); 503 wake_up(&server->response_q);
@@ -539,8 +540,8 @@ incomplete_rcv:
539 length = 0; 540 length = 0;
540 continue; 541 continue;
541 } else if (length <= 0) { 542 } else if (length <= 0) {
542 cERROR(1, ("Received no data, expecting %d", 543 cERROR(1, "Received no data, expecting %d",
543 pdu_length - total_read)); 544 pdu_length - total_read);
544 cifs_reconnect(server); 545 cifs_reconnect(server);
545 csocket = server->ssocket; 546 csocket = server->ssocket;
546 reconnect = 1; 547 reconnect = 1;
@@ -588,7 +589,7 @@ incomplete_rcv:
588 } 589 }
589 } else { 590 } else {
590 if (!isLargeBuf) { 591 if (!isLargeBuf) {
591 cERROR(1,("1st trans2 resp needs bigbuf")); 592 cERROR(1, "1st trans2 resp needs bigbuf");
592 /* BB maybe we can fix this up, switch 593 /* BB maybe we can fix this up, switch
593 to already allocated large buffer? */ 594 to already allocated large buffer? */
594 } else { 595 } else {
@@ -630,8 +631,8 @@ multi_t2_fnd:
630 wake_up_process(task_to_wake); 631 wake_up_process(task_to_wake);
631 } else if (!is_valid_oplock_break(smb_buffer, server) && 632 } else if (!is_valid_oplock_break(smb_buffer, server) &&
632 !isMultiRsp) { 633 !isMultiRsp) {
633 cERROR(1, ("No task to wake, unknown frame received! " 634 cERROR(1, "No task to wake, unknown frame received! "
634 "NumMids %d", midCount.counter)); 635 "NumMids %d", midCount.counter);
635 cifs_dump_mem("Received Data is: ", (char *)smb_buffer, 636 cifs_dump_mem("Received Data is: ", (char *)smb_buffer,
636 sizeof(struct smb_hdr)); 637 sizeof(struct smb_hdr));
637#ifdef CONFIG_CIFS_DEBUG2 638#ifdef CONFIG_CIFS_DEBUG2
@@ -708,8 +709,8 @@ multi_t2_fnd:
708 list_for_each(tmp, &server->pending_mid_q) { 709 list_for_each(tmp, &server->pending_mid_q) {
709 mid_entry = list_entry(tmp, struct mid_q_entry, qhead); 710 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
710 if (mid_entry->midState == MID_REQUEST_SUBMITTED) { 711 if (mid_entry->midState == MID_REQUEST_SUBMITTED) {
711 cFYI(1, ("Clearing Mid 0x%x - waking up ", 712 cFYI(1, "Clearing Mid 0x%x - waking up ",
712 mid_entry->mid)); 713 mid_entry->mid);
713 task_to_wake = mid_entry->tsk; 714 task_to_wake = mid_entry->tsk;
714 if (task_to_wake) 715 if (task_to_wake)
715 wake_up_process(task_to_wake); 716 wake_up_process(task_to_wake);
@@ -728,7 +729,7 @@ multi_t2_fnd:
728 to wait at least 45 seconds before giving up 729 to wait at least 45 seconds before giving up
729 on a request getting a response and going ahead 730 on a request getting a response and going ahead
730 and killing cifsd */ 731 and killing cifsd */
731 cFYI(1, ("Wait for exit from demultiplex thread")); 732 cFYI(1, "Wait for exit from demultiplex thread");
732 msleep(46000); 733 msleep(46000);
733 /* if threads still have not exited they are probably never 734 /* if threads still have not exited they are probably never
734 coming home not much else we can do but free the memory */ 735 coming home not much else we can do but free the memory */
@@ -849,7 +850,7 @@ cifs_parse_mount_options(char *options, const char *devname,
849 separator[0] = options[4]; 850 separator[0] = options[4];
850 options += 5; 851 options += 5;
851 } else { 852 } else {
852 cFYI(1, ("Null separator not allowed")); 853 cFYI(1, "Null separator not allowed");
853 } 854 }
854 } 855 }
855 856
@@ -974,7 +975,7 @@ cifs_parse_mount_options(char *options, const char *devname,
974 } 975 }
975 } else if (strnicmp(data, "sec", 3) == 0) { 976 } else if (strnicmp(data, "sec", 3) == 0) {
976 if (!value || !*value) { 977 if (!value || !*value) {
977 cERROR(1, ("no security value specified")); 978 cERROR(1, "no security value specified");
978 continue; 979 continue;
979 } else if (strnicmp(value, "krb5i", 5) == 0) { 980 } else if (strnicmp(value, "krb5i", 5) == 0) {
980 vol->secFlg |= CIFSSEC_MAY_KRB5 | 981 vol->secFlg |= CIFSSEC_MAY_KRB5 |
@@ -982,7 +983,7 @@ cifs_parse_mount_options(char *options, const char *devname,
982 } else if (strnicmp(value, "krb5p", 5) == 0) { 983 } else if (strnicmp(value, "krb5p", 5) == 0) {
983 /* vol->secFlg |= CIFSSEC_MUST_SEAL | 984 /* vol->secFlg |= CIFSSEC_MUST_SEAL |
984 CIFSSEC_MAY_KRB5; */ 985 CIFSSEC_MAY_KRB5; */
985 cERROR(1, ("Krb5 cifs privacy not supported")); 986 cERROR(1, "Krb5 cifs privacy not supported");
986 return 1; 987 return 1;
987 } else if (strnicmp(value, "krb5", 4) == 0) { 988 } else if (strnicmp(value, "krb5", 4) == 0) {
988 vol->secFlg |= CIFSSEC_MAY_KRB5; 989 vol->secFlg |= CIFSSEC_MAY_KRB5;
@@ -1014,7 +1015,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1014 } else if (strnicmp(value, "none", 4) == 0) { 1015 } else if (strnicmp(value, "none", 4) == 0) {
1015 vol->nullauth = 1; 1016 vol->nullauth = 1;
1016 } else { 1017 } else {
1017 cERROR(1, ("bad security option: %s", value)); 1018 cERROR(1, "bad security option: %s", value);
1018 return 1; 1019 return 1;
1019 } 1020 }
1020 } else if ((strnicmp(data, "unc", 3) == 0) 1021 } else if ((strnicmp(data, "unc", 3) == 0)
@@ -1053,7 +1054,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1053 a domain name and need special handling? */ 1054 a domain name and need special handling? */
1054 if (strnlen(value, 256) < 256) { 1055 if (strnlen(value, 256) < 256) {
1055 vol->domainname = value; 1056 vol->domainname = value;
1056 cFYI(1, ("Domain name set")); 1057 cFYI(1, "Domain name set");
1057 } else { 1058 } else {
1058 printk(KERN_WARNING "CIFS: domain name too " 1059 printk(KERN_WARNING "CIFS: domain name too "
1059 "long\n"); 1060 "long\n");
@@ -1076,7 +1077,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1076 strcpy(vol->prepath+1, value); 1077 strcpy(vol->prepath+1, value);
1077 } else 1078 } else
1078 strcpy(vol->prepath, value); 1079 strcpy(vol->prepath, value);
1079 cFYI(1, ("prefix path %s", vol->prepath)); 1080 cFYI(1, "prefix path %s", vol->prepath);
1080 } else { 1081 } else {
1081 printk(KERN_WARNING "CIFS: prefix too long\n"); 1082 printk(KERN_WARNING "CIFS: prefix too long\n");
1082 return 1; 1083 return 1;
@@ -1092,7 +1093,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1092 vol->iocharset = value; 1093 vol->iocharset = value;
1093 /* if iocharset not set then load_nls_default 1094 /* if iocharset not set then load_nls_default
1094 is used by caller */ 1095 is used by caller */
1095 cFYI(1, ("iocharset set to %s", value)); 1096 cFYI(1, "iocharset set to %s", value);
1096 } else { 1097 } else {
1097 printk(KERN_WARNING "CIFS: iocharset name " 1098 printk(KERN_WARNING "CIFS: iocharset name "
1098 "too long.\n"); 1099 "too long.\n");
@@ -1144,14 +1145,14 @@ cifs_parse_mount_options(char *options, const char *devname,
1144 } 1145 }
1145 } else if (strnicmp(data, "sockopt", 5) == 0) { 1146 } else if (strnicmp(data, "sockopt", 5) == 0) {
1146 if (!value || !*value) { 1147 if (!value || !*value) {
1147 cERROR(1, ("no socket option specified")); 1148 cERROR(1, "no socket option specified");
1148 continue; 1149 continue;
1149 } else if (strnicmp(value, "TCP_NODELAY", 11) == 0) { 1150 } else if (strnicmp(value, "TCP_NODELAY", 11) == 0) {
1150 vol->sockopt_tcp_nodelay = 1; 1151 vol->sockopt_tcp_nodelay = 1;
1151 } 1152 }
1152 } else if (strnicmp(data, "netbiosname", 4) == 0) { 1153 } else if (strnicmp(data, "netbiosname", 4) == 0) {
1153 if (!value || !*value || (*value == ' ')) { 1154 if (!value || !*value || (*value == ' ')) {
1154 cFYI(1, ("invalid (empty) netbiosname")); 1155 cFYI(1, "invalid (empty) netbiosname");
1155 } else { 1156 } else {
1156 memset(vol->source_rfc1001_name, 0x20, 15); 1157 memset(vol->source_rfc1001_name, 0x20, 15);
1157 for (i = 0; i < 15; i++) { 1158 for (i = 0; i < 15; i++) {
@@ -1175,7 +1176,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1175 } else if (strnicmp(data, "servern", 7) == 0) { 1176 } else if (strnicmp(data, "servern", 7) == 0) {
1176 /* servernetbiosname specified override *SMBSERVER */ 1177 /* servernetbiosname specified override *SMBSERVER */
1177 if (!value || !*value || (*value == ' ')) { 1178 if (!value || !*value || (*value == ' ')) {
1178 cFYI(1, ("empty server netbiosname specified")); 1179 cFYI(1, "empty server netbiosname specified");
1179 } else { 1180 } else {
1180 /* last byte, type, is 0x20 for servr type */ 1181 /* last byte, type, is 0x20 for servr type */
1181 memset(vol->target_rfc1001_name, 0x20, 16); 1182 memset(vol->target_rfc1001_name, 0x20, 16);
@@ -1434,7 +1435,7 @@ cifs_find_tcp_session(struct sockaddr_storage *addr, unsigned short int port)
1434 1435
1435 ++server->srv_count; 1436 ++server->srv_count;
1436 write_unlock(&cifs_tcp_ses_lock); 1437 write_unlock(&cifs_tcp_ses_lock);
1437 cFYI(1, ("Existing tcp session with server found")); 1438 cFYI(1, "Existing tcp session with server found");
1438 return server; 1439 return server;
1439 } 1440 }
1440 write_unlock(&cifs_tcp_ses_lock); 1441 write_unlock(&cifs_tcp_ses_lock);
@@ -1475,7 +1476,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1475 1476
1476 memset(&addr, 0, sizeof(struct sockaddr_storage)); 1477 memset(&addr, 0, sizeof(struct sockaddr_storage));
1477 1478
1478 cFYI(1, ("UNC: %s ip: %s", volume_info->UNC, volume_info->UNCip)); 1479 cFYI(1, "UNC: %s ip: %s", volume_info->UNC, volume_info->UNCip);
1479 1480
1480 if (volume_info->UNCip && volume_info->UNC) { 1481 if (volume_info->UNCip && volume_info->UNC) {
1481 rc = cifs_convert_address(volume_info->UNCip, &addr); 1482 rc = cifs_convert_address(volume_info->UNCip, &addr);
@@ -1487,13 +1488,12 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1487 } else if (volume_info->UNCip) { 1488 } else if (volume_info->UNCip) {
1488 /* BB using ip addr as tcp_ses name to connect to the 1489 /* BB using ip addr as tcp_ses name to connect to the
1489 DFS root below */ 1490 DFS root below */
1490 cERROR(1, ("Connecting to DFS root not implemented yet")); 1491 cERROR(1, "Connecting to DFS root not implemented yet");
1491 rc = -EINVAL; 1492 rc = -EINVAL;
1492 goto out_err; 1493 goto out_err;
1493 } else /* which tcp_sess DFS root would we conect to */ { 1494 } else /* which tcp_sess DFS root would we conect to */ {
1494 cERROR(1, 1495 cERROR(1, "CIFS mount error: No UNC path (e.g. -o "
1495 ("CIFS mount error: No UNC path (e.g. -o " 1496 "unc=//192.168.1.100/public) specified");
1496 "unc=//192.168.1.100/public) specified"));
1497 rc = -EINVAL; 1497 rc = -EINVAL;
1498 goto out_err; 1498 goto out_err;
1499 } 1499 }
@@ -1540,7 +1540,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1540 ++tcp_ses->srv_count; 1540 ++tcp_ses->srv_count;
1541 1541
1542 if (addr.ss_family == AF_INET6) { 1542 if (addr.ss_family == AF_INET6) {
1543 cFYI(1, ("attempting ipv6 connect")); 1543 cFYI(1, "attempting ipv6 connect");
1544 /* BB should we allow ipv6 on port 139? */ 1544 /* BB should we allow ipv6 on port 139? */
1545 /* other OS never observed in Wild doing 139 with v6 */ 1545 /* other OS never observed in Wild doing 139 with v6 */
1546 sin_server6->sin6_port = htons(volume_info->port); 1546 sin_server6->sin6_port = htons(volume_info->port);
@@ -1554,7 +1554,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1554 rc = ipv4_connect(tcp_ses); 1554 rc = ipv4_connect(tcp_ses);
1555 } 1555 }
1556 if (rc < 0) { 1556 if (rc < 0) {
1557 cERROR(1, ("Error connecting to socket. Aborting operation")); 1557 cERROR(1, "Error connecting to socket. Aborting operation");
1558 goto out_err; 1558 goto out_err;
1559 } 1559 }
1560 1560
@@ -1567,7 +1567,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1567 tcp_ses, "cifsd"); 1567 tcp_ses, "cifsd");
1568 if (IS_ERR(tcp_ses->tsk)) { 1568 if (IS_ERR(tcp_ses->tsk)) {
1569 rc = PTR_ERR(tcp_ses->tsk); 1569 rc = PTR_ERR(tcp_ses->tsk);
1570 cERROR(1, ("error %d create cifsd thread", rc)); 1570 cERROR(1, "error %d create cifsd thread", rc);
1571 module_put(THIS_MODULE); 1571 module_put(THIS_MODULE);
1572 goto out_err; 1572 goto out_err;
1573 } 1573 }
@@ -1616,6 +1616,7 @@ cifs_put_smb_ses(struct cifsSesInfo *ses)
1616 int xid; 1616 int xid;
1617 struct TCP_Server_Info *server = ses->server; 1617 struct TCP_Server_Info *server = ses->server;
1618 1618
1619 cFYI(1, "%s: ses_count=%d\n", __func__, ses->ses_count);
1619 write_lock(&cifs_tcp_ses_lock); 1620 write_lock(&cifs_tcp_ses_lock);
1620 if (--ses->ses_count > 0) { 1621 if (--ses->ses_count > 0) {
1621 write_unlock(&cifs_tcp_ses_lock); 1622 write_unlock(&cifs_tcp_ses_lock);
@@ -1634,6 +1635,102 @@ cifs_put_smb_ses(struct cifsSesInfo *ses)
1634 cifs_put_tcp_session(server); 1635 cifs_put_tcp_session(server);
1635} 1636}
1636 1637
1638static struct cifsSesInfo *
1639cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1640{
1641 int rc = -ENOMEM, xid;
1642 struct cifsSesInfo *ses;
1643
1644 xid = GetXid();
1645
1646 ses = cifs_find_smb_ses(server, volume_info->username);
1647 if (ses) {
1648 cFYI(1, "Existing smb sess found (status=%d)", ses->status);
1649
1650 /* existing SMB ses has a server reference already */
1651 cifs_put_tcp_session(server);
1652
1653 mutex_lock(&ses->session_mutex);
1654 rc = cifs_negotiate_protocol(xid, ses);
1655 if (rc) {
1656 mutex_unlock(&ses->session_mutex);
1657 /* problem -- put our ses reference */
1658 cifs_put_smb_ses(ses);
1659 FreeXid(xid);
1660 return ERR_PTR(rc);
1661 }
1662 if (ses->need_reconnect) {
1663 cFYI(1, "Session needs reconnect");
1664 rc = cifs_setup_session(xid, ses,
1665 volume_info->local_nls);
1666 if (rc) {
1667 mutex_unlock(&ses->session_mutex);
1668 /* problem -- put our reference */
1669 cifs_put_smb_ses(ses);
1670 FreeXid(xid);
1671 return ERR_PTR(rc);
1672 }
1673 }
1674 mutex_unlock(&ses->session_mutex);
1675 FreeXid(xid);
1676 return ses;
1677 }
1678
1679 cFYI(1, "Existing smb sess not found");
1680 ses = sesInfoAlloc();
1681 if (ses == NULL)
1682 goto get_ses_fail;
1683
1684 /* new SMB session uses our server ref */
1685 ses->server = server;
1686 if (server->addr.sockAddr6.sin6_family == AF_INET6)
1687 sprintf(ses->serverName, "%pI6",
1688 &server->addr.sockAddr6.sin6_addr);
1689 else
1690 sprintf(ses->serverName, "%pI4",
1691 &server->addr.sockAddr.sin_addr.s_addr);
1692
1693 if (volume_info->username)
1694 strncpy(ses->userName, volume_info->username,
1695 MAX_USERNAME_SIZE);
1696
1697 /* volume_info->password freed at unmount */
1698 if (volume_info->password) {
1699 ses->password = kstrdup(volume_info->password, GFP_KERNEL);
1700 if (!ses->password)
1701 goto get_ses_fail;
1702 }
1703 if (volume_info->domainname) {
1704 int len = strlen(volume_info->domainname);
1705 ses->domainName = kmalloc(len + 1, GFP_KERNEL);
1706 if (ses->domainName)
1707 strcpy(ses->domainName, volume_info->domainname);
1708 }
1709 ses->linux_uid = volume_info->linux_uid;
1710 ses->overrideSecFlg = volume_info->secFlg;
1711
1712 mutex_lock(&ses->session_mutex);
1713 rc = cifs_negotiate_protocol(xid, ses);
1714 if (!rc)
1715 rc = cifs_setup_session(xid, ses, volume_info->local_nls);
1716 mutex_unlock(&ses->session_mutex);
1717 if (rc)
1718 goto get_ses_fail;
1719
1720 /* success, put it on the list */
1721 write_lock(&cifs_tcp_ses_lock);
1722 list_add(&ses->smb_ses_list, &server->smb_ses_list);
1723 write_unlock(&cifs_tcp_ses_lock);
1724
1725 FreeXid(xid);
1726 return ses;
1727
1728get_ses_fail:
1729 sesInfoFree(ses);
1730 FreeXid(xid);
1731 return ERR_PTR(rc);
1732}
1733
1637static struct cifsTconInfo * 1734static struct cifsTconInfo *
1638cifs_find_tcon(struct cifsSesInfo *ses, const char *unc) 1735cifs_find_tcon(struct cifsSesInfo *ses, const char *unc)
1639{ 1736{
@@ -1662,6 +1759,7 @@ cifs_put_tcon(struct cifsTconInfo *tcon)
1662 int xid; 1759 int xid;
1663 struct cifsSesInfo *ses = tcon->ses; 1760 struct cifsSesInfo *ses = tcon->ses;
1664 1761
1762 cFYI(1, "%s: tc_count=%d\n", __func__, tcon->tc_count);
1665 write_lock(&cifs_tcp_ses_lock); 1763 write_lock(&cifs_tcp_ses_lock);
1666 if (--tcon->tc_count > 0) { 1764 if (--tcon->tc_count > 0) {
1667 write_unlock(&cifs_tcp_ses_lock); 1765 write_unlock(&cifs_tcp_ses_lock);
@@ -1679,6 +1777,80 @@ cifs_put_tcon(struct cifsTconInfo *tcon)
1679 cifs_put_smb_ses(ses); 1777 cifs_put_smb_ses(ses);
1680} 1778}
1681 1779
1780static struct cifsTconInfo *
1781cifs_get_tcon(struct cifsSesInfo *ses, struct smb_vol *volume_info)
1782{
1783 int rc, xid;
1784 struct cifsTconInfo *tcon;
1785
1786 tcon = cifs_find_tcon(ses, volume_info->UNC);
1787 if (tcon) {
1788 cFYI(1, "Found match on UNC path");
1789 /* existing tcon already has a reference */
1790 cifs_put_smb_ses(ses);
1791 if (tcon->seal != volume_info->seal)
1792 cERROR(1, "transport encryption setting "
1793 "conflicts with existing tid");
1794 return tcon;
1795 }
1796
1797 tcon = tconInfoAlloc();
1798 if (tcon == NULL) {
1799 rc = -ENOMEM;
1800 goto out_fail;
1801 }
1802
1803 tcon->ses = ses;
1804 if (volume_info->password) {
1805 tcon->password = kstrdup(volume_info->password, GFP_KERNEL);
1806 if (!tcon->password) {
1807 rc = -ENOMEM;
1808 goto out_fail;
1809 }
1810 }
1811
1812 if (strchr(volume_info->UNC + 3, '\\') == NULL
1813 && strchr(volume_info->UNC + 3, '/') == NULL) {
1814 cERROR(1, "Missing share name");
1815 rc = -ENODEV;
1816 goto out_fail;
1817 }
1818
1819 /* BB Do we need to wrap session_mutex around
1820 * this TCon call and Unix SetFS as
1821 * we do on SessSetup and reconnect? */
1822 xid = GetXid();
1823 rc = CIFSTCon(xid, ses, volume_info->UNC, tcon, volume_info->local_nls);
1824 FreeXid(xid);
1825 cFYI(1, "CIFS Tcon rc = %d", rc);
1826 if (rc)
1827 goto out_fail;
1828
1829 if (volume_info->nodfs) {
1830 tcon->Flags &= ~SMB_SHARE_IS_IN_DFS;
1831 cFYI(1, "DFS disabled (%d)", tcon->Flags);
1832 }
1833 tcon->seal = volume_info->seal;
1834 /* we can have only one retry value for a connection
1835 to a share so for resources mounted more than once
1836 to the same server share the last value passed in
1837 for the retry flag is used */
1838 tcon->retry = volume_info->retry;
1839 tcon->nocase = volume_info->nocase;
1840 tcon->local_lease = volume_info->local_lease;
1841
1842 write_lock(&cifs_tcp_ses_lock);
1843 list_add(&tcon->tcon_list, &ses->tcon_list);
1844 write_unlock(&cifs_tcp_ses_lock);
1845
1846 return tcon;
1847
1848out_fail:
1849 tconInfoFree(tcon);
1850 return ERR_PTR(rc);
1851}
1852
1853
1682int 1854int
1683get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path, 1855get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path,
1684 const struct nls_table *nls_codepage, unsigned int *pnum_referrals, 1856 const struct nls_table *nls_codepage, unsigned int *pnum_referrals,
@@ -1703,8 +1875,7 @@ get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path,
1703 strcpy(temp_unc + 2, pSesInfo->serverName); 1875 strcpy(temp_unc + 2, pSesInfo->serverName);
1704 strcpy(temp_unc + 2 + strlen(pSesInfo->serverName), "\\IPC$"); 1876 strcpy(temp_unc + 2 + strlen(pSesInfo->serverName), "\\IPC$");
1705 rc = CIFSTCon(xid, pSesInfo, temp_unc, NULL, nls_codepage); 1877 rc = CIFSTCon(xid, pSesInfo, temp_unc, NULL, nls_codepage);
1706 cFYI(1, 1878 cFYI(1, "CIFS Tcon rc = %d ipc_tid = %d", rc, pSesInfo->ipc_tid);
1707 ("CIFS Tcon rc = %d ipc_tid = %d", rc, pSesInfo->ipc_tid));
1708 kfree(temp_unc); 1879 kfree(temp_unc);
1709 } 1880 }
1710 if (rc == 0) 1881 if (rc == 0)
@@ -1777,12 +1948,12 @@ ipv4_connect(struct TCP_Server_Info *server)
1777 rc = sock_create_kern(PF_INET, SOCK_STREAM, 1948 rc = sock_create_kern(PF_INET, SOCK_STREAM,
1778 IPPROTO_TCP, &socket); 1949 IPPROTO_TCP, &socket);
1779 if (rc < 0) { 1950 if (rc < 0) {
1780 cERROR(1, ("Error %d creating socket", rc)); 1951 cERROR(1, "Error %d creating socket", rc);
1781 return rc; 1952 return rc;
1782 } 1953 }
1783 1954
1784 /* BB other socket options to set KEEPALIVE, NODELAY? */ 1955 /* BB other socket options to set KEEPALIVE, NODELAY? */
1785 cFYI(1, ("Socket created")); 1956 cFYI(1, "Socket created");
1786 server->ssocket = socket; 1957 server->ssocket = socket;
1787 socket->sk->sk_allocation = GFP_NOFS; 1958 socket->sk->sk_allocation = GFP_NOFS;
1788 cifs_reclassify_socket4(socket); 1959 cifs_reclassify_socket4(socket);
@@ -1827,7 +1998,7 @@ ipv4_connect(struct TCP_Server_Info *server)
1827 if (!connected) { 1998 if (!connected) {
1828 if (orig_port) 1999 if (orig_port)
1829 server->addr.sockAddr.sin_port = orig_port; 2000 server->addr.sockAddr.sin_port = orig_port;
1830 cFYI(1, ("Error %d connecting to server via ipv4", rc)); 2001 cFYI(1, "Error %d connecting to server via ipv4", rc);
1831 sock_release(socket); 2002 sock_release(socket);
1832 server->ssocket = NULL; 2003 server->ssocket = NULL;
1833 return rc; 2004 return rc;
@@ -1855,12 +2026,12 @@ ipv4_connect(struct TCP_Server_Info *server)
1855 rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY, 2026 rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
1856 (char *)&val, sizeof(val)); 2027 (char *)&val, sizeof(val));
1857 if (rc) 2028 if (rc)
1858 cFYI(1, ("set TCP_NODELAY socket option error %d", rc)); 2029 cFYI(1, "set TCP_NODELAY socket option error %d", rc);
1859 } 2030 }
1860 2031
1861 cFYI(1, ("sndbuf %d rcvbuf %d rcvtimeo 0x%lx", 2032 cFYI(1, "sndbuf %d rcvbuf %d rcvtimeo 0x%lx",
1862 socket->sk->sk_sndbuf, 2033 socket->sk->sk_sndbuf,
1863 socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo)); 2034 socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo);
1864 2035
1865 /* send RFC1001 sessinit */ 2036 /* send RFC1001 sessinit */
1866 if (server->addr.sockAddr.sin_port == htons(RFC1001_PORT)) { 2037 if (server->addr.sockAddr.sin_port == htons(RFC1001_PORT)) {
@@ -1938,13 +2109,13 @@ ipv6_connect(struct TCP_Server_Info *server)
1938 rc = sock_create_kern(PF_INET6, SOCK_STREAM, 2109 rc = sock_create_kern(PF_INET6, SOCK_STREAM,
1939 IPPROTO_TCP, &socket); 2110 IPPROTO_TCP, &socket);
1940 if (rc < 0) { 2111 if (rc < 0) {
1941 cERROR(1, ("Error %d creating ipv6 socket", rc)); 2112 cERROR(1, "Error %d creating ipv6 socket", rc);
1942 socket = NULL; 2113 socket = NULL;
1943 return rc; 2114 return rc;
1944 } 2115 }
1945 2116
1946 /* BB other socket options to set KEEPALIVE, NODELAY? */ 2117 /* BB other socket options to set KEEPALIVE, NODELAY? */
1947 cFYI(1, ("ipv6 Socket created")); 2118 cFYI(1, "ipv6 Socket created");
1948 server->ssocket = socket; 2119 server->ssocket = socket;
1949 socket->sk->sk_allocation = GFP_NOFS; 2120 socket->sk->sk_allocation = GFP_NOFS;
1950 cifs_reclassify_socket6(socket); 2121 cifs_reclassify_socket6(socket);
@@ -1988,7 +2159,7 @@ ipv6_connect(struct TCP_Server_Info *server)
1988 if (!connected) { 2159 if (!connected) {
1989 if (orig_port) 2160 if (orig_port)
1990 server->addr.sockAddr6.sin6_port = orig_port; 2161 server->addr.sockAddr6.sin6_port = orig_port;
1991 cFYI(1, ("Error %d connecting to server via ipv6", rc)); 2162 cFYI(1, "Error %d connecting to server via ipv6", rc);
1992 sock_release(socket); 2163 sock_release(socket);
1993 server->ssocket = NULL; 2164 server->ssocket = NULL;
1994 return rc; 2165 return rc;
@@ -2007,7 +2178,7 @@ ipv6_connect(struct TCP_Server_Info *server)
2007 rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY, 2178 rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
2008 (char *)&val, sizeof(val)); 2179 (char *)&val, sizeof(val));
2009 if (rc) 2180 if (rc)
2010 cFYI(1, ("set TCP_NODELAY socket option error %d", rc)); 2181 cFYI(1, "set TCP_NODELAY socket option error %d", rc);
2011 } 2182 }
2012 2183
2013 server->ssocket = socket; 2184 server->ssocket = socket;
@@ -2032,13 +2203,13 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
2032 if (vol_info && vol_info->no_linux_ext) { 2203 if (vol_info && vol_info->no_linux_ext) {
2033 tcon->fsUnixInfo.Capability = 0; 2204 tcon->fsUnixInfo.Capability = 0;
2034 tcon->unix_ext = 0; /* Unix Extensions disabled */ 2205 tcon->unix_ext = 0; /* Unix Extensions disabled */
2035 cFYI(1, ("Linux protocol extensions disabled")); 2206 cFYI(1, "Linux protocol extensions disabled");
2036 return; 2207 return;
2037 } else if (vol_info) 2208 } else if (vol_info)
2038 tcon->unix_ext = 1; /* Unix Extensions supported */ 2209 tcon->unix_ext = 1; /* Unix Extensions supported */
2039 2210
2040 if (tcon->unix_ext == 0) { 2211 if (tcon->unix_ext == 0) {
2041 cFYI(1, ("Unix extensions disabled so not set on reconnect")); 2212 cFYI(1, "Unix extensions disabled so not set on reconnect");
2042 return; 2213 return;
2043 } 2214 }
2044 2215
@@ -2054,12 +2225,11 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
2054 cap &= ~CIFS_UNIX_POSIX_ACL_CAP; 2225 cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
2055 if ((saved_cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) { 2226 if ((saved_cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) {
2056 if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) 2227 if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP)
2057 cERROR(1, ("POSIXPATH support change")); 2228 cERROR(1, "POSIXPATH support change");
2058 cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP; 2229 cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP;
2059 } else if ((cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) { 2230 } else if ((cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) {
2060 cERROR(1, ("possible reconnect error")); 2231 cERROR(1, "possible reconnect error");
2061 cERROR(1, 2232 cERROR(1, "server disabled POSIX path support");
2062 ("server disabled POSIX path support"));
2063 } 2233 }
2064 } 2234 }
2065 2235
@@ -2067,7 +2237,7 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
2067 if (vol_info && vol_info->no_psx_acl) 2237 if (vol_info && vol_info->no_psx_acl)
2068 cap &= ~CIFS_UNIX_POSIX_ACL_CAP; 2238 cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
2069 else if (CIFS_UNIX_POSIX_ACL_CAP & cap) { 2239 else if (CIFS_UNIX_POSIX_ACL_CAP & cap) {
2070 cFYI(1, ("negotiated posix acl support")); 2240 cFYI(1, "negotiated posix acl support");
2071 if (sb) 2241 if (sb)
2072 sb->s_flags |= MS_POSIXACL; 2242 sb->s_flags |= MS_POSIXACL;
2073 } 2243 }
@@ -2075,7 +2245,7 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
2075 if (vol_info && vol_info->posix_paths == 0) 2245 if (vol_info && vol_info->posix_paths == 0)
2076 cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP; 2246 cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP;
2077 else if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) { 2247 else if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) {
2078 cFYI(1, ("negotiate posix pathnames")); 2248 cFYI(1, "negotiate posix pathnames");
2079 if (sb) 2249 if (sb)
2080 CIFS_SB(sb)->mnt_cifs_flags |= 2250 CIFS_SB(sb)->mnt_cifs_flags |=
2081 CIFS_MOUNT_POSIX_PATHS; 2251 CIFS_MOUNT_POSIX_PATHS;
@@ -2090,39 +2260,38 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
2090 if (sb && (CIFS_SB(sb)->rsize > 127 * 1024)) { 2260 if (sb && (CIFS_SB(sb)->rsize > 127 * 1024)) {
2091 if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) { 2261 if ((cap & CIFS_UNIX_LARGE_READ_CAP) == 0) {
2092 CIFS_SB(sb)->rsize = 127 * 1024; 2262 CIFS_SB(sb)->rsize = 127 * 1024;
2093 cFYI(DBG2, 2263 cFYI(DBG2, "larger reads not supported by srv");
2094 ("larger reads not supported by srv"));
2095 } 2264 }
2096 } 2265 }
2097 2266
2098 2267
2099 cFYI(1, ("Negotiate caps 0x%x", (int)cap)); 2268 cFYI(1, "Negotiate caps 0x%x", (int)cap);
2100#ifdef CONFIG_CIFS_DEBUG2 2269#ifdef CONFIG_CIFS_DEBUG2
2101 if (cap & CIFS_UNIX_FCNTL_CAP) 2270 if (cap & CIFS_UNIX_FCNTL_CAP)
2102 cFYI(1, ("FCNTL cap")); 2271 cFYI(1, "FCNTL cap");
2103 if (cap & CIFS_UNIX_EXTATTR_CAP) 2272 if (cap & CIFS_UNIX_EXTATTR_CAP)
2104 cFYI(1, ("EXTATTR cap")); 2273 cFYI(1, "EXTATTR cap");
2105 if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) 2274 if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP)
2106 cFYI(1, ("POSIX path cap")); 2275 cFYI(1, "POSIX path cap");
2107 if (cap & CIFS_UNIX_XATTR_CAP) 2276 if (cap & CIFS_UNIX_XATTR_CAP)
2108 cFYI(1, ("XATTR cap")); 2277 cFYI(1, "XATTR cap");
2109 if (cap & CIFS_UNIX_POSIX_ACL_CAP) 2278 if (cap & CIFS_UNIX_POSIX_ACL_CAP)
2110 cFYI(1, ("POSIX ACL cap")); 2279 cFYI(1, "POSIX ACL cap");
2111 if (cap & CIFS_UNIX_LARGE_READ_CAP) 2280 if (cap & CIFS_UNIX_LARGE_READ_CAP)
2112 cFYI(1, ("very large read cap")); 2281 cFYI(1, "very large read cap");
2113 if (cap & CIFS_UNIX_LARGE_WRITE_CAP) 2282 if (cap & CIFS_UNIX_LARGE_WRITE_CAP)
2114 cFYI(1, ("very large write cap")); 2283 cFYI(1, "very large write cap");
2115#endif /* CIFS_DEBUG2 */ 2284#endif /* CIFS_DEBUG2 */
2116 if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) { 2285 if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) {
2117 if (vol_info == NULL) { 2286 if (vol_info == NULL) {
2118 cFYI(1, ("resetting capabilities failed")); 2287 cFYI(1, "resetting capabilities failed");
2119 } else 2288 } else
2120 cERROR(1, ("Negotiating Unix capabilities " 2289 cERROR(1, "Negotiating Unix capabilities "
2121 "with the server failed. Consider " 2290 "with the server failed. Consider "
2122 "mounting with the Unix Extensions\n" 2291 "mounting with the Unix Extensions\n"
2123 "disabled, if problems are found, " 2292 "disabled, if problems are found, "
2124 "by specifying the nounix mount " 2293 "by specifying the nounix mount "
2125 "option.")); 2294 "option.");
2126 2295
2127 } 2296 }
2128 } 2297 }
@@ -2152,8 +2321,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2152 struct cifs_sb_info *cifs_sb) 2321 struct cifs_sb_info *cifs_sb)
2153{ 2322{
2154 if (pvolume_info->rsize > CIFSMaxBufSize) { 2323 if (pvolume_info->rsize > CIFSMaxBufSize) {
2155 cERROR(1, ("rsize %d too large, using MaxBufSize", 2324 cERROR(1, "rsize %d too large, using MaxBufSize",
2156 pvolume_info->rsize)); 2325 pvolume_info->rsize);
2157 cifs_sb->rsize = CIFSMaxBufSize; 2326 cifs_sb->rsize = CIFSMaxBufSize;
2158 } else if ((pvolume_info->rsize) && 2327 } else if ((pvolume_info->rsize) &&
2159 (pvolume_info->rsize <= CIFSMaxBufSize)) 2328 (pvolume_info->rsize <= CIFSMaxBufSize))
@@ -2162,8 +2331,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2162 cifs_sb->rsize = CIFSMaxBufSize; 2331 cifs_sb->rsize = CIFSMaxBufSize;
2163 2332
2164 if (pvolume_info->wsize > PAGEVEC_SIZE * PAGE_CACHE_SIZE) { 2333 if (pvolume_info->wsize > PAGEVEC_SIZE * PAGE_CACHE_SIZE) {
2165 cERROR(1, ("wsize %d too large, using 4096 instead", 2334 cERROR(1, "wsize %d too large, using 4096 instead",
2166 pvolume_info->wsize)); 2335 pvolume_info->wsize);
2167 cifs_sb->wsize = 4096; 2336 cifs_sb->wsize = 4096;
2168 } else if (pvolume_info->wsize) 2337 } else if (pvolume_info->wsize)
2169 cifs_sb->wsize = pvolume_info->wsize; 2338 cifs_sb->wsize = pvolume_info->wsize;
@@ -2181,7 +2350,7 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2181 if (cifs_sb->rsize < 2048) { 2350 if (cifs_sb->rsize < 2048) {
2182 cifs_sb->rsize = 2048; 2351 cifs_sb->rsize = 2048;
2183 /* Windows ME may prefer this */ 2352 /* Windows ME may prefer this */
2184 cFYI(1, ("readsize set to minimum: 2048")); 2353 cFYI(1, "readsize set to minimum: 2048");
2185 } 2354 }
2186 /* calculate prepath */ 2355 /* calculate prepath */
2187 cifs_sb->prepath = pvolume_info->prepath; 2356 cifs_sb->prepath = pvolume_info->prepath;
@@ -2199,8 +2368,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2199 cifs_sb->mnt_gid = pvolume_info->linux_gid; 2368 cifs_sb->mnt_gid = pvolume_info->linux_gid;
2200 cifs_sb->mnt_file_mode = pvolume_info->file_mode; 2369 cifs_sb->mnt_file_mode = pvolume_info->file_mode;
2201 cifs_sb->mnt_dir_mode = pvolume_info->dir_mode; 2370 cifs_sb->mnt_dir_mode = pvolume_info->dir_mode;
2202 cFYI(1, ("file mode: 0x%x dir mode: 0x%x", 2371 cFYI(1, "file mode: 0x%x dir mode: 0x%x",
2203 cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode)); 2372 cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode);
2204 2373
2205 if (pvolume_info->noperm) 2374 if (pvolume_info->noperm)
2206 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM; 2375 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
@@ -2229,13 +2398,13 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2229 if (pvolume_info->dynperm) 2398 if (pvolume_info->dynperm)
2230 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM; 2399 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM;
2231 if (pvolume_info->direct_io) { 2400 if (pvolume_info->direct_io) {
2232 cFYI(1, ("mounting share using direct i/o")); 2401 cFYI(1, "mounting share using direct i/o");
2233 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; 2402 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
2234 } 2403 }
2235 2404
2236 if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm)) 2405 if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm))
2237 cERROR(1, ("mount option dynperm ignored if cifsacl " 2406 cERROR(1, "mount option dynperm ignored if cifsacl "
2238 "mount option supported")); 2407 "mount option supported");
2239} 2408}
2240 2409
2241static int 2410static int
@@ -2262,7 +2431,7 @@ cleanup_volume_info(struct smb_vol **pvolume_info)
2262{ 2431{
2263 struct smb_vol *volume_info; 2432 struct smb_vol *volume_info;
2264 2433
2265 if (!pvolume_info && !*pvolume_info) 2434 if (!pvolume_info || !*pvolume_info)
2266 return; 2435 return;
2267 2436
2268 volume_info = *pvolume_info; 2437 volume_info = *pvolume_info;
@@ -2344,11 +2513,11 @@ try_mount_again:
2344 } 2513 }
2345 2514
2346 if (volume_info->nullauth) { 2515 if (volume_info->nullauth) {
2347 cFYI(1, ("null user")); 2516 cFYI(1, "null user");
2348 volume_info->username = ""; 2517 volume_info->username = "";
2349 } else if (volume_info->username) { 2518 } else if (volume_info->username) {
2350 /* BB fixme parse for domain name here */ 2519 /* BB fixme parse for domain name here */
2351 cFYI(1, ("Username: %s", volume_info->username)); 2520 cFYI(1, "Username: %s", volume_info->username);
2352 } else { 2521 } else {
2353 cifserror("No username specified"); 2522 cifserror("No username specified");
2354 /* In userspace mount helper we can get user name from alternate 2523 /* In userspace mount helper we can get user name from alternate
@@ -2357,20 +2526,20 @@ try_mount_again:
2357 goto out; 2526 goto out;
2358 } 2527 }
2359 2528
2360
2361 /* this is needed for ASCII cp to Unicode converts */ 2529 /* this is needed for ASCII cp to Unicode converts */
2362 if (volume_info->iocharset == NULL) { 2530 if (volume_info->iocharset == NULL) {
2363 cifs_sb->local_nls = load_nls_default(); 2531 /* load_nls_default cannot return null */
2364 /* load_nls_default can not return null */ 2532 volume_info->local_nls = load_nls_default();
2365 } else { 2533 } else {
2366 cifs_sb->local_nls = load_nls(volume_info->iocharset); 2534 volume_info->local_nls = load_nls(volume_info->iocharset);
2367 if (cifs_sb->local_nls == NULL) { 2535 if (volume_info->local_nls == NULL) {
2368 cERROR(1, ("CIFS mount error: iocharset %s not found", 2536 cERROR(1, "CIFS mount error: iocharset %s not found",
2369 volume_info->iocharset)); 2537 volume_info->iocharset);
2370 rc = -ELIBACC; 2538 rc = -ELIBACC;
2371 goto out; 2539 goto out;
2372 } 2540 }
2373 } 2541 }
2542 cifs_sb->local_nls = volume_info->local_nls;
2374 2543
2375 /* get a reference to a tcp session */ 2544 /* get a reference to a tcp session */
2376 srvTcp = cifs_get_tcp_session(volume_info); 2545 srvTcp = cifs_get_tcp_session(volume_info);
@@ -2379,148 +2548,30 @@ try_mount_again:
2379 goto out; 2548 goto out;
2380 } 2549 }
2381 2550
2382 pSesInfo = cifs_find_smb_ses(srvTcp, volume_info->username); 2551 /* get a reference to a SMB session */
2383 if (pSesInfo) { 2552 pSesInfo = cifs_get_smb_ses(srvTcp, volume_info);
2384 cFYI(1, ("Existing smb sess found (status=%d)", 2553 if (IS_ERR(pSesInfo)) {
2385 pSesInfo->status)); 2554 rc = PTR_ERR(pSesInfo);
2386 /* 2555 pSesInfo = NULL;
2387 * The existing SMB session already has a reference to srvTcp, 2556 goto mount_fail_check;
2388 * so we can put back the extra one we got before
2389 */
2390 cifs_put_tcp_session(srvTcp);
2391
2392 mutex_lock(&pSesInfo->session_mutex);
2393 if (pSesInfo->need_reconnect) {
2394 cFYI(1, ("Session needs reconnect"));
2395 rc = cifs_setup_session(xid, pSesInfo,
2396 cifs_sb->local_nls);
2397 }
2398 mutex_unlock(&pSesInfo->session_mutex);
2399 } else if (!rc) {
2400 cFYI(1, ("Existing smb sess not found"));
2401 pSesInfo = sesInfoAlloc();
2402 if (pSesInfo == NULL) {
2403 rc = -ENOMEM;
2404 goto mount_fail_check;
2405 }
2406
2407 /* new SMB session uses our srvTcp ref */
2408 pSesInfo->server = srvTcp;
2409 if (srvTcp->addr.sockAddr6.sin6_family == AF_INET6)
2410 sprintf(pSesInfo->serverName, "%pI6",
2411 &srvTcp->addr.sockAddr6.sin6_addr);
2412 else
2413 sprintf(pSesInfo->serverName, "%pI4",
2414 &srvTcp->addr.sockAddr.sin_addr.s_addr);
2415
2416 write_lock(&cifs_tcp_ses_lock);
2417 list_add(&pSesInfo->smb_ses_list, &srvTcp->smb_ses_list);
2418 write_unlock(&cifs_tcp_ses_lock);
2419
2420 /* volume_info->password freed at unmount */
2421 if (volume_info->password) {
2422 pSesInfo->password = kstrdup(volume_info->password,
2423 GFP_KERNEL);
2424 if (!pSesInfo->password) {
2425 rc = -ENOMEM;
2426 goto mount_fail_check;
2427 }
2428 }
2429 if (volume_info->username)
2430 strncpy(pSesInfo->userName, volume_info->username,
2431 MAX_USERNAME_SIZE);
2432 if (volume_info->domainname) {
2433 int len = strlen(volume_info->domainname);
2434 pSesInfo->domainName = kmalloc(len + 1, GFP_KERNEL);
2435 if (pSesInfo->domainName)
2436 strcpy(pSesInfo->domainName,
2437 volume_info->domainname);
2438 }
2439 pSesInfo->linux_uid = volume_info->linux_uid;
2440 pSesInfo->overrideSecFlg = volume_info->secFlg;
2441 mutex_lock(&pSesInfo->session_mutex);
2442
2443 /* BB FIXME need to pass vol->secFlgs BB */
2444 rc = cifs_setup_session(xid, pSesInfo,
2445 cifs_sb->local_nls);
2446 mutex_unlock(&pSesInfo->session_mutex);
2447 } 2557 }
2448 2558
2449 /* search for existing tcon to this server share */ 2559 setup_cifs_sb(volume_info, cifs_sb);
2450 if (!rc) { 2560 if (pSesInfo->capabilities & CAP_LARGE_FILES)
2451 setup_cifs_sb(volume_info, cifs_sb); 2561 sb->s_maxbytes = MAX_LFS_FILESIZE;
2452 2562 else
2453 tcon = cifs_find_tcon(pSesInfo, volume_info->UNC); 2563 sb->s_maxbytes = MAX_NON_LFS;
2454 if (tcon) {
2455 cFYI(1, ("Found match on UNC path"));
2456 /* existing tcon already has a reference */
2457 cifs_put_smb_ses(pSesInfo);
2458 if (tcon->seal != volume_info->seal)
2459 cERROR(1, ("transport encryption setting "
2460 "conflicts with existing tid"));
2461 } else {
2462 tcon = tconInfoAlloc();
2463 if (tcon == NULL) {
2464 rc = -ENOMEM;
2465 goto mount_fail_check;
2466 }
2467
2468 tcon->ses = pSesInfo;
2469 if (volume_info->password) {
2470 tcon->password = kstrdup(volume_info->password,
2471 GFP_KERNEL);
2472 if (!tcon->password) {
2473 rc = -ENOMEM;
2474 goto mount_fail_check;
2475 }
2476 }
2477
2478 if ((strchr(volume_info->UNC + 3, '\\') == NULL)
2479 && (strchr(volume_info->UNC + 3, '/') == NULL)) {
2480 cERROR(1, ("Missing share name"));
2481 rc = -ENODEV;
2482 goto mount_fail_check;
2483 } else {
2484 /* BB Do we need to wrap sesSem around
2485 * this TCon call and Unix SetFS as
2486 * we do on SessSetup and reconnect? */
2487 rc = CIFSTCon(xid, pSesInfo, volume_info->UNC,
2488 tcon, cifs_sb->local_nls);
2489 cFYI(1, ("CIFS Tcon rc = %d", rc));
2490 if (volume_info->nodfs) {
2491 tcon->Flags &= ~SMB_SHARE_IS_IN_DFS;
2492 cFYI(1, ("DFS disabled (%d)",
2493 tcon->Flags));
2494 }
2495 }
2496 if (rc)
2497 goto remote_path_check;
2498 tcon->seal = volume_info->seal;
2499 write_lock(&cifs_tcp_ses_lock);
2500 list_add(&tcon->tcon_list, &pSesInfo->tcon_list);
2501 write_unlock(&cifs_tcp_ses_lock);
2502 }
2503
2504 /* we can have only one retry value for a connection
2505 to a share so for resources mounted more than once
2506 to the same server share the last value passed in
2507 for the retry flag is used */
2508 tcon->retry = volume_info->retry;
2509 tcon->nocase = volume_info->nocase;
2510 tcon->local_lease = volume_info->local_lease;
2511 }
2512 if (pSesInfo) {
2513 if (pSesInfo->capabilities & CAP_LARGE_FILES)
2514 sb->s_maxbytes = MAX_LFS_FILESIZE;
2515 else
2516 sb->s_maxbytes = MAX_NON_LFS;
2517 }
2518 2564
2519 /* BB FIXME fix time_gran to be larger for LANMAN sessions */ 2565 /* BB FIXME fix time_gran to be larger for LANMAN sessions */
2520 sb->s_time_gran = 100; 2566 sb->s_time_gran = 100;
2521 2567
2522 if (rc) 2568 /* search for existing tcon to this server share */
2569 tcon = cifs_get_tcon(pSesInfo, volume_info);
2570 if (IS_ERR(tcon)) {
2571 rc = PTR_ERR(tcon);
2572 tcon = NULL;
2523 goto remote_path_check; 2573 goto remote_path_check;
2574 }
2524 2575
2525 cifs_sb->tcon = tcon; 2576 cifs_sb->tcon = tcon;
2526 2577
@@ -2544,7 +2595,7 @@ try_mount_again:
2544 2595
2545 if ((tcon->unix_ext == 0) && (cifs_sb->rsize > (1024 * 127))) { 2596 if ((tcon->unix_ext == 0) && (cifs_sb->rsize > (1024 * 127))) {
2546 cifs_sb->rsize = 1024 * 127; 2597 cifs_sb->rsize = 1024 * 127;
2547 cFYI(DBG2, ("no very large read support, rsize now 127K")); 2598 cFYI(DBG2, "no very large read support, rsize now 127K");
2548 } 2599 }
2549 if (!(tcon->ses->capabilities & CAP_LARGE_WRITE_X)) 2600 if (!(tcon->ses->capabilities & CAP_LARGE_WRITE_X))
2550 cifs_sb->wsize = min(cifs_sb->wsize, 2601 cifs_sb->wsize = min(cifs_sb->wsize,
@@ -2593,7 +2644,7 @@ remote_path_check:
2593 goto mount_fail_check; 2644 goto mount_fail_check;
2594 } 2645 }
2595 2646
2596 cFYI(1, ("Getting referral for: %s", full_path)); 2647 cFYI(1, "Getting referral for: %s", full_path);
2597 rc = get_dfs_path(xid, pSesInfo , full_path + 1, 2648 rc = get_dfs_path(xid, pSesInfo , full_path + 1,
2598 cifs_sb->local_nls, &num_referrals, &referrals, 2649 cifs_sb->local_nls, &num_referrals, &referrals,
2599 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 2650 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -2707,7 +2758,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
2707 by Samba (not sure whether other servers allow 2758 by Samba (not sure whether other servers allow
2708 NTLMv2 password here) */ 2759 NTLMv2 password here) */
2709#ifdef CONFIG_CIFS_WEAK_PW_HASH 2760#ifdef CONFIG_CIFS_WEAK_PW_HASH
2710 if ((extended_security & CIFSSEC_MAY_LANMAN) && 2761 if ((global_secflags & CIFSSEC_MAY_LANMAN) &&
2711 (ses->server->secType == LANMAN)) 2762 (ses->server->secType == LANMAN))
2712 calc_lanman_hash(tcon->password, ses->server->cryptKey, 2763 calc_lanman_hash(tcon->password, ses->server->cryptKey,
2713 ses->server->secMode & 2764 ses->server->secMode &
@@ -2778,13 +2829,13 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
2778 if (length == 3) { 2829 if (length == 3) {
2779 if ((bcc_ptr[0] == 'I') && (bcc_ptr[1] == 'P') && 2830 if ((bcc_ptr[0] == 'I') && (bcc_ptr[1] == 'P') &&
2780 (bcc_ptr[2] == 'C')) { 2831 (bcc_ptr[2] == 'C')) {
2781 cFYI(1, ("IPC connection")); 2832 cFYI(1, "IPC connection");
2782 tcon->ipc = 1; 2833 tcon->ipc = 1;
2783 } 2834 }
2784 } else if (length == 2) { 2835 } else if (length == 2) {
2785 if ((bcc_ptr[0] == 'A') && (bcc_ptr[1] == ':')) { 2836 if ((bcc_ptr[0] == 'A') && (bcc_ptr[1] == ':')) {
2786 /* the most common case */ 2837 /* the most common case */
2787 cFYI(1, ("disk share connection")); 2838 cFYI(1, "disk share connection");
2788 } 2839 }
2789 } 2840 }
2790 bcc_ptr += length + 1; 2841 bcc_ptr += length + 1;
@@ -2797,7 +2848,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
2797 bytes_left, is_unicode, 2848 bytes_left, is_unicode,
2798 nls_codepage); 2849 nls_codepage);
2799 2850
2800 cFYI(1, ("nativeFileSystem=%s", tcon->nativeFileSystem)); 2851 cFYI(1, "nativeFileSystem=%s", tcon->nativeFileSystem);
2801 2852
2802 if ((smb_buffer_response->WordCount == 3) || 2853 if ((smb_buffer_response->WordCount == 3) ||
2803 (smb_buffer_response->WordCount == 7)) 2854 (smb_buffer_response->WordCount == 7))
@@ -2805,7 +2856,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
2805 tcon->Flags = le16_to_cpu(pSMBr->OptionalSupport); 2856 tcon->Flags = le16_to_cpu(pSMBr->OptionalSupport);
2806 else 2857 else
2807 tcon->Flags = 0; 2858 tcon->Flags = 0;
2808 cFYI(1, ("Tcon flags: 0x%x ", tcon->Flags)); 2859 cFYI(1, "Tcon flags: 0x%x ", tcon->Flags);
2809 } else if ((rc == 0) && tcon == NULL) { 2860 } else if ((rc == 0) && tcon == NULL) {
2810 /* all we need to save for IPC$ connection */ 2861 /* all we need to save for IPC$ connection */
2811 ses->ipc_tid = smb_buffer_response->Tid; 2862 ses->ipc_tid = smb_buffer_response->Tid;
@@ -2833,57 +2884,61 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
2833 return rc; 2884 return rc;
2834} 2885}
2835 2886
2836int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, 2887int cifs_negotiate_protocol(unsigned int xid, struct cifsSesInfo *ses)
2837 struct nls_table *nls_info)
2838{ 2888{
2839 int rc = 0; 2889 int rc = 0;
2840 int first_time = 0; 2890 struct TCP_Server_Info *server = ses->server;
2841 struct TCP_Server_Info *server = pSesInfo->server; 2891
2842 2892 /* only send once per connect */
2843 /* what if server changes its buffer size after dropping the session? */ 2893 if (server->maxBuf != 0)
2844 if (server->maxBuf == 0) /* no need to send on reconnect */ { 2894 return 0;
2845 rc = CIFSSMBNegotiate(xid, pSesInfo); 2895
2846 if (rc == -EAGAIN) { 2896 rc = CIFSSMBNegotiate(xid, ses);
2847 /* retry only once on 1st time connection */ 2897 if (rc == -EAGAIN) {
2848 rc = CIFSSMBNegotiate(xid, pSesInfo); 2898 /* retry only once on 1st time connection */
2849 if (rc == -EAGAIN) 2899 rc = CIFSSMBNegotiate(xid, ses);
2850 rc = -EHOSTDOWN; 2900 if (rc == -EAGAIN)
2851 } 2901 rc = -EHOSTDOWN;
2852 if (rc == 0) { 2902 }
2853 spin_lock(&GlobalMid_Lock); 2903 if (rc == 0) {
2854 if (server->tcpStatus != CifsExiting) 2904 spin_lock(&GlobalMid_Lock);
2855 server->tcpStatus = CifsGood; 2905 if (server->tcpStatus != CifsExiting)
2856 else 2906 server->tcpStatus = CifsGood;
2857 rc = -EHOSTDOWN; 2907 else
2858 spin_unlock(&GlobalMid_Lock); 2908 rc = -EHOSTDOWN;
2909 spin_unlock(&GlobalMid_Lock);
2859 2910
2860 }
2861 first_time = 1;
2862 } 2911 }
2863 2912
2864 if (rc) 2913 return rc;
2865 goto ss_err_exit; 2914}
2915
2916
2917int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
2918 struct nls_table *nls_info)
2919{
2920 int rc = 0;
2921 struct TCP_Server_Info *server = ses->server;
2866 2922
2867 pSesInfo->flags = 0; 2923 ses->flags = 0;
2868 pSesInfo->capabilities = server->capabilities; 2924 ses->capabilities = server->capabilities;
2869 if (linuxExtEnabled == 0) 2925 if (linuxExtEnabled == 0)
2870 pSesInfo->capabilities &= (~CAP_UNIX); 2926 ses->capabilities &= (~CAP_UNIX);
2871 2927
2872 cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", 2928 cFYI(1, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
2873 server->secMode, server->capabilities, server->timeAdj)); 2929 server->secMode, server->capabilities, server->timeAdj);
2874 2930
2875 rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info); 2931 rc = CIFS_SessSetup(xid, ses, nls_info);
2876 if (rc) { 2932 if (rc) {
2877 cERROR(1, ("Send error in SessSetup = %d", rc)); 2933 cERROR(1, "Send error in SessSetup = %d", rc);
2878 } else { 2934 } else {
2879 cFYI(1, ("CIFS Session Established successfully")); 2935 cFYI(1, "CIFS Session Established successfully");
2880 spin_lock(&GlobalMid_Lock); 2936 spin_lock(&GlobalMid_Lock);
2881 pSesInfo->status = CifsGood; 2937 ses->status = CifsGood;
2882 pSesInfo->need_reconnect = false; 2938 ses->need_reconnect = false;
2883 spin_unlock(&GlobalMid_Lock); 2939 spin_unlock(&GlobalMid_Lock);
2884 } 2940 }
2885 2941
2886ss_err_exit:
2887 return rc; 2942 return rc;
2888} 2943}
2889 2944
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index e9f7ecc2714b..391816b461ca 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -73,7 +73,7 @@ cifs_bp_rename_retry:
73 namelen += (1 + temp->d_name.len); 73 namelen += (1 + temp->d_name.len);
74 temp = temp->d_parent; 74 temp = temp->d_parent;
75 if (temp == NULL) { 75 if (temp == NULL) {
76 cERROR(1, ("corrupt dentry")); 76 cERROR(1, "corrupt dentry");
77 return NULL; 77 return NULL;
78 } 78 }
79 } 79 }
@@ -90,19 +90,18 @@ cifs_bp_rename_retry:
90 full_path[namelen] = dirsep; 90 full_path[namelen] = dirsep;
91 strncpy(full_path + namelen + 1, temp->d_name.name, 91 strncpy(full_path + namelen + 1, temp->d_name.name,
92 temp->d_name.len); 92 temp->d_name.len);
93 cFYI(0, ("name: %s", full_path + namelen)); 93 cFYI(0, "name: %s", full_path + namelen);
94 } 94 }
95 temp = temp->d_parent; 95 temp = temp->d_parent;
96 if (temp == NULL) { 96 if (temp == NULL) {
97 cERROR(1, ("corrupt dentry")); 97 cERROR(1, "corrupt dentry");
98 kfree(full_path); 98 kfree(full_path);
99 return NULL; 99 return NULL;
100 } 100 }
101 } 101 }
102 if (namelen != pplen + dfsplen) { 102 if (namelen != pplen + dfsplen) {
103 cERROR(1, 103 cERROR(1, "did not end path lookup where expected namelen is %d",
104 ("did not end path lookup where expected namelen is %d", 104 namelen);
105 namelen));
106 /* presumably this is only possible if racing with a rename 105 /* presumably this is only possible if racing with a rename
107 of one of the parent directories (we can not lock the dentries 106 of one of the parent directories (we can not lock the dentries
108 above us to prevent this, but retrying should be harmless) */ 107 above us to prevent this, but retrying should be harmless) */
@@ -130,6 +129,12 @@ cifs_bp_rename_retry:
130 return full_path; 129 return full_path;
131} 130}
132 131
132/*
133 * When called with struct file pointer set to NULL, there is no way we could
134 * update file->private_data, but getting it stuck on openFileList provides a
135 * way to access it from cifs_fill_filedata and thereby set file->private_data
136 * from cifs_open.
137 */
133struct cifsFileInfo * 138struct cifsFileInfo *
134cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle, 139cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
135 struct file *file, struct vfsmount *mnt, unsigned int oflags) 140 struct file *file, struct vfsmount *mnt, unsigned int oflags)
@@ -173,7 +178,7 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
173 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { 178 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
174 pCifsInode->clientCanCacheAll = true; 179 pCifsInode->clientCanCacheAll = true;
175 pCifsInode->clientCanCacheRead = true; 180 pCifsInode->clientCanCacheRead = true;
176 cFYI(1, ("Exclusive Oplock inode %p", newinode)); 181 cFYI(1, "Exclusive Oplock inode %p", newinode);
177 } else if ((oplock & 0xF) == OPLOCK_READ) 182 } else if ((oplock & 0xF) == OPLOCK_READ)
178 pCifsInode->clientCanCacheRead = true; 183 pCifsInode->clientCanCacheRead = true;
179 } 184 }
@@ -183,16 +188,17 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
183} 188}
184 189
185int cifs_posix_open(char *full_path, struct inode **pinode, 190int cifs_posix_open(char *full_path, struct inode **pinode,
186 struct vfsmount *mnt, int mode, int oflags, 191 struct vfsmount *mnt, struct super_block *sb,
187 __u32 *poplock, __u16 *pnetfid, int xid) 192 int mode, int oflags,
193 __u32 *poplock, __u16 *pnetfid, int xid)
188{ 194{
189 int rc; 195 int rc;
190 FILE_UNIX_BASIC_INFO *presp_data; 196 FILE_UNIX_BASIC_INFO *presp_data;
191 __u32 posix_flags = 0; 197 __u32 posix_flags = 0;
192 struct cifs_sb_info *cifs_sb = CIFS_SB(mnt->mnt_sb); 198 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
193 struct cifs_fattr fattr; 199 struct cifs_fattr fattr;
194 200
195 cFYI(1, ("posix open %s", full_path)); 201 cFYI(1, "posix open %s", full_path);
196 202
197 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); 203 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
198 if (presp_data == NULL) 204 if (presp_data == NULL)
@@ -242,7 +248,8 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
242 248
243 /* get new inode and set it up */ 249 /* get new inode and set it up */
244 if (*pinode == NULL) { 250 if (*pinode == NULL) {
245 *pinode = cifs_iget(mnt->mnt_sb, &fattr); 251 cifs_fill_uniqueid(sb, &fattr);
252 *pinode = cifs_iget(sb, &fattr);
246 if (!*pinode) { 253 if (!*pinode) {
247 rc = -ENOMEM; 254 rc = -ENOMEM;
248 goto posix_open_ret; 255 goto posix_open_ret;
@@ -251,7 +258,18 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
251 cifs_fattr_to_inode(*pinode, &fattr); 258 cifs_fattr_to_inode(*pinode, &fattr);
252 } 259 }
253 260
254 cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt, oflags); 261 /*
262 * cifs_fill_filedata() takes care of setting cifsFileInfo pointer to
263 * file->private_data.
264 */
265 if (mnt) {
266 struct cifsFileInfo *pfile_info;
267
268 pfile_info = cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt,
269 oflags);
270 if (pfile_info == NULL)
271 rc = -ENOMEM;
272 }
255 273
256posix_open_ret: 274posix_open_ret:
257 kfree(presp_data); 275 kfree(presp_data);
@@ -315,13 +333,14 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
315 if (nd && (nd->flags & LOOKUP_OPEN)) 333 if (nd && (nd->flags & LOOKUP_OPEN))
316 oflags = nd->intent.open.flags; 334 oflags = nd->intent.open.flags;
317 else 335 else
318 oflags = FMODE_READ; 336 oflags = FMODE_READ | SMB_O_CREAT;
319 337
320 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) && 338 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
321 (CIFS_UNIX_POSIX_PATH_OPS_CAP & 339 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
322 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 340 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
323 rc = cifs_posix_open(full_path, &newinode, nd->path.mnt, 341 rc = cifs_posix_open(full_path, &newinode,
324 mode, oflags, &oplock, &fileHandle, xid); 342 nd ? nd->path.mnt : NULL,
343 inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
325 /* EIO could indicate that (posix open) operation is not 344 /* EIO could indicate that (posix open) operation is not
326 supported, despite what server claimed in capability 345 supported, despite what server claimed in capability
327 negotation. EREMOTE indicates DFS junction, which is not 346 negotation. EREMOTE indicates DFS junction, which is not
@@ -358,7 +377,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
358 else if ((oflags & O_CREAT) == O_CREAT) 377 else if ((oflags & O_CREAT) == O_CREAT)
359 disposition = FILE_OPEN_IF; 378 disposition = FILE_OPEN_IF;
360 else 379 else
361 cFYI(1, ("Create flag not set in create function")); 380 cFYI(1, "Create flag not set in create function");
362 } 381 }
363 382
364 /* BB add processing to set equivalent of mode - e.g. via CreateX with 383 /* BB add processing to set equivalent of mode - e.g. via CreateX with
@@ -394,7 +413,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
394 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 413 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
395 } 414 }
396 if (rc) { 415 if (rc) {
397 cFYI(1, ("cifs_create returned 0x%x", rc)); 416 cFYI(1, "cifs_create returned 0x%x", rc);
398 goto cifs_create_out; 417 goto cifs_create_out;
399 } 418 }
400 419
@@ -457,15 +476,22 @@ cifs_create_set_dentry:
457 if (rc == 0) 476 if (rc == 0)
458 setup_cifs_dentry(tcon, direntry, newinode); 477 setup_cifs_dentry(tcon, direntry, newinode);
459 else 478 else
460 cFYI(1, ("Create worked, get_inode_info failed rc = %d", rc)); 479 cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
461 480
462 /* nfsd case - nfs srv does not set nd */ 481 /* nfsd case - nfs srv does not set nd */
463 if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) { 482 if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) {
464 /* mknod case - do not leave file open */ 483 /* mknod case - do not leave file open */
465 CIFSSMBClose(xid, tcon, fileHandle); 484 CIFSSMBClose(xid, tcon, fileHandle);
466 } else if (!(posix_create) && (newinode)) { 485 } else if (!(posix_create) && (newinode)) {
467 cifs_new_fileinfo(newinode, fileHandle, NULL, 486 struct cifsFileInfo *pfile_info;
468 nd->path.mnt, oflags); 487 /*
488 * cifs_fill_filedata() takes care of setting cifsFileInfo
489 * pointer to file->private_data.
490 */
491 pfile_info = cifs_new_fileinfo(newinode, fileHandle, NULL,
492 nd->path.mnt, oflags);
493 if (pfile_info == NULL)
494 rc = -ENOMEM;
469 } 495 }
470cifs_create_out: 496cifs_create_out:
471 kfree(buf); 497 kfree(buf);
@@ -531,7 +557,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
531 u16 fileHandle; 557 u16 fileHandle;
532 FILE_ALL_INFO *buf; 558 FILE_ALL_INFO *buf;
533 559
534 cFYI(1, ("sfu compat create special file")); 560 cFYI(1, "sfu compat create special file");
535 561
536 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); 562 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
537 if (buf == NULL) { 563 if (buf == NULL) {
@@ -616,8 +642,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
616 642
617 xid = GetXid(); 643 xid = GetXid();
618 644
619 cFYI(1, ("parent inode = 0x%p name is: %s and dentry = 0x%p", 645 cFYI(1, "parent inode = 0x%p name is: %s and dentry = 0x%p",
620 parent_dir_inode, direntry->d_name.name, direntry)); 646 parent_dir_inode, direntry->d_name.name, direntry);
621 647
622 /* check whether path exists */ 648 /* check whether path exists */
623 649
@@ -632,7 +658,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
632 int i; 658 int i;
633 for (i = 0; i < direntry->d_name.len; i++) 659 for (i = 0; i < direntry->d_name.len; i++)
634 if (direntry->d_name.name[i] == '\\') { 660 if (direntry->d_name.name[i] == '\\') {
635 cFYI(1, ("Invalid file name")); 661 cFYI(1, "Invalid file name");
636 FreeXid(xid); 662 FreeXid(xid);
637 return ERR_PTR(-EINVAL); 663 return ERR_PTR(-EINVAL);
638 } 664 }
@@ -657,11 +683,11 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
657 } 683 }
658 684
659 if (direntry->d_inode != NULL) { 685 if (direntry->d_inode != NULL) {
660 cFYI(1, ("non-NULL inode in lookup")); 686 cFYI(1, "non-NULL inode in lookup");
661 } else { 687 } else {
662 cFYI(1, ("NULL inode in lookup")); 688 cFYI(1, "NULL inode in lookup");
663 } 689 }
664 cFYI(1, ("Full path: %s inode = 0x%p", full_path, direntry->d_inode)); 690 cFYI(1, "Full path: %s inode = 0x%p", full_path, direntry->d_inode);
665 691
666 /* Posix open is only called (at lookup time) for file create now. 692 /* Posix open is only called (at lookup time) for file create now.
667 * For opens (rather than creates), because we do not know if it 693 * For opens (rather than creates), because we do not know if it
@@ -678,6 +704,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
678 (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && 704 (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
679 (nd->intent.open.flags & O_CREAT)) { 705 (nd->intent.open.flags & O_CREAT)) {
680 rc = cifs_posix_open(full_path, &newInode, nd->path.mnt, 706 rc = cifs_posix_open(full_path, &newInode, nd->path.mnt,
707 parent_dir_inode->i_sb,
681 nd->intent.open.create_mode, 708 nd->intent.open.create_mode,
682 nd->intent.open.flags, &oplock, 709 nd->intent.open.flags, &oplock,
683 &fileHandle, xid); 710 &fileHandle, xid);
@@ -723,7 +750,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
723 /* if it was once a directory (but how can we tell?) we could do 750 /* if it was once a directory (but how can we tell?) we could do
724 shrink_dcache_parent(direntry); */ 751 shrink_dcache_parent(direntry); */
725 } else if (rc != -EACCES) { 752 } else if (rc != -EACCES) {
726 cERROR(1, ("Unexpected lookup error %d", rc)); 753 cERROR(1, "Unexpected lookup error %d", rc);
727 /* We special case check for Access Denied - since that 754 /* We special case check for Access Denied - since that
728 is a common return code */ 755 is a common return code */
729 } 756 }
@@ -742,8 +769,8 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
742 if (cifs_revalidate_dentry(direntry)) 769 if (cifs_revalidate_dentry(direntry))
743 return 0; 770 return 0;
744 } else { 771 } else {
745 cFYI(1, ("neg dentry 0x%p name = %s", 772 cFYI(1, "neg dentry 0x%p name = %s",
746 direntry, direntry->d_name.name)); 773 direntry, direntry->d_name.name);
747 if (time_after(jiffies, direntry->d_time + HZ) || 774 if (time_after(jiffies, direntry->d_time + HZ) ||
748 !lookupCacheEnabled) { 775 !lookupCacheEnabled) {
749 d_drop(direntry); 776 d_drop(direntry);
@@ -758,7 +785,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
758{ 785{
759 int rc = 0; 786 int rc = 0;
760 787
761 cFYI(1, ("In cifs d_delete, name = %s", direntry->d_name.name)); 788 cFYI(1, "In cifs d_delete, name = %s", direntry->d_name.name);
762 789
763 return rc; 790 return rc;
764} */ 791} */
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 6f8a0e3fb25b..4db2c5e7283f 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -106,14 +106,14 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
106 /* search for server name delimiter */ 106 /* search for server name delimiter */
107 len = strlen(unc); 107 len = strlen(unc);
108 if (len < 3) { 108 if (len < 3) {
109 cFYI(1, ("%s: unc is too short: %s", __func__, unc)); 109 cFYI(1, "%s: unc is too short: %s", __func__, unc);
110 return -EINVAL; 110 return -EINVAL;
111 } 111 }
112 len -= 2; 112 len -= 2;
113 name = memchr(unc+2, '\\', len); 113 name = memchr(unc+2, '\\', len);
114 if (!name) { 114 if (!name) {
115 cFYI(1, ("%s: probably server name is whole unc: %s", 115 cFYI(1, "%s: probably server name is whole unc: %s",
116 __func__, unc)); 116 __func__, unc);
117 } else { 117 } else {
118 len = (name - unc) - 2/* leading // */; 118 len = (name - unc) - 2/* leading // */;
119 } 119 }
@@ -127,8 +127,8 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
127 name[len] = 0; 127 name[len] = 0;
128 128
129 if (is_ip(name)) { 129 if (is_ip(name)) {
130 cFYI(1, ("%s: it is IP, skipping dns upcall: %s", 130 cFYI(1, "%s: it is IP, skipping dns upcall: %s",
131 __func__, name)); 131 __func__, name);
132 data = name; 132 data = name;
133 goto skip_upcall; 133 goto skip_upcall;
134 } 134 }
@@ -138,7 +138,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
138 len = rkey->type_data.x[0]; 138 len = rkey->type_data.x[0];
139 data = rkey->payload.data; 139 data = rkey->payload.data;
140 } else { 140 } else {
141 cERROR(1, ("%s: unable to resolve: %s", __func__, name)); 141 cERROR(1, "%s: unable to resolve: %s", __func__, name);
142 goto out; 142 goto out;
143 } 143 }
144 144
@@ -148,10 +148,10 @@ skip_upcall:
148 if (*ip_addr) { 148 if (*ip_addr) {
149 memcpy(*ip_addr, data, len + 1); 149 memcpy(*ip_addr, data, len + 1);
150 if (!IS_ERR(rkey)) 150 if (!IS_ERR(rkey))
151 cFYI(1, ("%s: resolved: %s to %s", __func__, 151 cFYI(1, "%s: resolved: %s to %s", __func__,
152 name, 152 name,
153 *ip_addr 153 *ip_addr
154 )); 154 );
155 rc = 0; 155 rc = 0;
156 } else { 156 } else {
157 rc = -ENOMEM; 157 rc = -ENOMEM;
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 6177f7cca16a..993f82045bf6 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -49,7 +49,7 @@
49static struct dentry *cifs_get_parent(struct dentry *dentry) 49static struct dentry *cifs_get_parent(struct dentry *dentry)
50{ 50{
51 /* BB need to add code here eventually to enable export via NFSD */ 51 /* BB need to add code here eventually to enable export via NFSD */
52 cFYI(1, ("get parent for %p", dentry)); 52 cFYI(1, "get parent for %p", dentry);
53 return ERR_PTR(-EACCES); 53 return ERR_PTR(-EACCES);
54} 54}
55 55
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 9b11a8f56f3a..a83541ec9713 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * vfs operations that deal with files 4 * vfs operations that deal with files
5 * 5 *
6 * Copyright (C) International Business Machines Corp., 2002,2007 6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com) 7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org) 8 * Jeremy Allison (jra@samba.org)
9 * 9 *
@@ -108,8 +108,7 @@ static inline int cifs_get_disposition(unsigned int flags)
108/* all arguments to this function must be checked for validity in caller */ 108/* all arguments to this function must be checked for validity in caller */
109static inline int 109static inline int
110cifs_posix_open_inode_helper(struct inode *inode, struct file *file, 110cifs_posix_open_inode_helper(struct inode *inode, struct file *file,
111 struct cifsInodeInfo *pCifsInode, 111 struct cifsInodeInfo *pCifsInode, __u32 oplock,
112 struct cifsFileInfo *pCifsFile, __u32 oplock,
113 u16 netfid) 112 u16 netfid)
114{ 113{
115 114
@@ -136,15 +135,15 @@ cifs_posix_open_inode_helper(struct inode *inode, struct file *file,
136 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) && 135 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
137 (file->f_path.dentry->d_inode->i_size == 136 (file->f_path.dentry->d_inode->i_size ==
138 (loff_t)le64_to_cpu(buf->EndOfFile))) { 137 (loff_t)le64_to_cpu(buf->EndOfFile))) {
139 cFYI(1, ("inode unchanged on server")); 138 cFYI(1, "inode unchanged on server");
140 } else { 139 } else {
141 if (file->f_path.dentry->d_inode->i_mapping) { 140 if (file->f_path.dentry->d_inode->i_mapping) {
142 rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping); 141 rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
143 if (rc != 0) 142 if (rc != 0)
144 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc; 143 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
145 } 144 }
146 cFYI(1, ("invalidating remote inode since open detected it " 145 cFYI(1, "invalidating remote inode since open detected it "
147 "changed")); 146 "changed");
148 invalidate_remote_inode(file->f_path.dentry->d_inode); 147 invalidate_remote_inode(file->f_path.dentry->d_inode);
149 } */ 148 } */
150 149
@@ -152,8 +151,8 @@ psx_client_can_cache:
152 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { 151 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
153 pCifsInode->clientCanCacheAll = true; 152 pCifsInode->clientCanCacheAll = true;
154 pCifsInode->clientCanCacheRead = true; 153 pCifsInode->clientCanCacheRead = true;
155 cFYI(1, ("Exclusive Oplock granted on inode %p", 154 cFYI(1, "Exclusive Oplock granted on inode %p",
156 file->f_path.dentry->d_inode)); 155 file->f_path.dentry->d_inode);
157 } else if ((oplock & 0xF) == OPLOCK_READ) 156 } else if ((oplock & 0xF) == OPLOCK_READ)
158 pCifsInode->clientCanCacheRead = true; 157 pCifsInode->clientCanCacheRead = true;
159 158
@@ -190,8 +189,8 @@ cifs_fill_filedata(struct file *file)
190 if (file->private_data != NULL) { 189 if (file->private_data != NULL) {
191 return pCifsFile; 190 return pCifsFile;
192 } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL)) 191 } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
193 cERROR(1, ("could not find file instance for " 192 cERROR(1, "could not find file instance for "
194 "new file %p", file)); 193 "new file %p", file);
195 return NULL; 194 return NULL;
196} 195}
197 196
@@ -217,7 +216,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
217 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) && 216 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
218 (file->f_path.dentry->d_inode->i_size == 217 (file->f_path.dentry->d_inode->i_size ==
219 (loff_t)le64_to_cpu(buf->EndOfFile))) { 218 (loff_t)le64_to_cpu(buf->EndOfFile))) {
220 cFYI(1, ("inode unchanged on server")); 219 cFYI(1, "inode unchanged on server");
221 } else { 220 } else {
222 if (file->f_path.dentry->d_inode->i_mapping) { 221 if (file->f_path.dentry->d_inode->i_mapping) {
223 /* BB no need to lock inode until after invalidate 222 /* BB no need to lock inode until after invalidate
@@ -226,8 +225,8 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
226 if (rc != 0) 225 if (rc != 0)
227 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc; 226 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
228 } 227 }
229 cFYI(1, ("invalidating remote inode since open detected it " 228 cFYI(1, "invalidating remote inode since open detected it "
230 "changed")); 229 "changed");
231 invalidate_remote_inode(file->f_path.dentry->d_inode); 230 invalidate_remote_inode(file->f_path.dentry->d_inode);
232 } 231 }
233 232
@@ -242,8 +241,8 @@ client_can_cache:
242 if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) { 241 if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
243 pCifsInode->clientCanCacheAll = true; 242 pCifsInode->clientCanCacheAll = true;
244 pCifsInode->clientCanCacheRead = true; 243 pCifsInode->clientCanCacheRead = true;
245 cFYI(1, ("Exclusive Oplock granted on inode %p", 244 cFYI(1, "Exclusive Oplock granted on inode %p",
246 file->f_path.dentry->d_inode)); 245 file->f_path.dentry->d_inode);
247 } else if ((*oplock & 0xF) == OPLOCK_READ) 246 } else if ((*oplock & 0xF) == OPLOCK_READ)
248 pCifsInode->clientCanCacheRead = true; 247 pCifsInode->clientCanCacheRead = true;
249 248
@@ -285,8 +284,8 @@ int cifs_open(struct inode *inode, struct file *file)
285 return rc; 284 return rc;
286 } 285 }
287 286
288 cFYI(1, ("inode = 0x%p file flags are 0x%x for %s", 287 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
289 inode, file->f_flags, full_path)); 288 inode, file->f_flags, full_path);
290 289
291 if (oplockEnabled) 290 if (oplockEnabled)
292 oplock = REQ_OPLOCK; 291 oplock = REQ_OPLOCK;
@@ -298,27 +297,29 @@ int cifs_open(struct inode *inode, struct file *file)
298 (CIFS_UNIX_POSIX_PATH_OPS_CAP & 297 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
299 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 298 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
300 int oflags = (int) cifs_posix_convert_flags(file->f_flags); 299 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
300 oflags |= SMB_O_CREAT;
301 /* can not refresh inode info since size could be stale */ 301 /* can not refresh inode info since size could be stale */
302 rc = cifs_posix_open(full_path, &inode, file->f_path.mnt, 302 rc = cifs_posix_open(full_path, &inode, file->f_path.mnt,
303 cifs_sb->mnt_file_mode /* ignored */, 303 inode->i_sb,
304 oflags, &oplock, &netfid, xid); 304 cifs_sb->mnt_file_mode /* ignored */,
305 oflags, &oplock, &netfid, xid);
305 if (rc == 0) { 306 if (rc == 0) {
306 cFYI(1, ("posix open succeeded")); 307 cFYI(1, "posix open succeeded");
307 /* no need for special case handling of setting mode 308 /* no need for special case handling of setting mode
308 on read only files needed here */ 309 on read only files needed here */
309 310
310 pCifsFile = cifs_fill_filedata(file); 311 pCifsFile = cifs_fill_filedata(file);
311 cifs_posix_open_inode_helper(inode, file, pCifsInode, 312 cifs_posix_open_inode_helper(inode, file, pCifsInode,
312 pCifsFile, oplock, netfid); 313 oplock, netfid);
313 goto out; 314 goto out;
314 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 315 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
315 if (tcon->ses->serverNOS) 316 if (tcon->ses->serverNOS)
316 cERROR(1, ("server %s of type %s returned" 317 cERROR(1, "server %s of type %s returned"
317 " unexpected error on SMB posix open" 318 " unexpected error on SMB posix open"
318 ", disabling posix open support." 319 ", disabling posix open support."
319 " Check if server update available.", 320 " Check if server update available.",
320 tcon->ses->serverName, 321 tcon->ses->serverName,
321 tcon->ses->serverNOS)); 322 tcon->ses->serverNOS);
322 tcon->broken_posix_open = true; 323 tcon->broken_posix_open = true;
323 } else if ((rc != -EIO) && (rc != -EREMOTE) && 324 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
324 (rc != -EOPNOTSUPP)) /* path not found or net err */ 325 (rc != -EOPNOTSUPP)) /* path not found or net err */
@@ -386,7 +387,7 @@ int cifs_open(struct inode *inode, struct file *file)
386 & CIFS_MOUNT_MAP_SPECIAL_CHR); 387 & CIFS_MOUNT_MAP_SPECIAL_CHR);
387 } 388 }
388 if (rc) { 389 if (rc) {
389 cFYI(1, ("cifs_open returned 0x%x", rc)); 390 cFYI(1, "cifs_open returned 0x%x", rc);
390 goto out; 391 goto out;
391 } 392 }
392 393
@@ -469,7 +470,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
469 } 470 }
470 471
471 if (file->f_path.dentry == NULL) { 472 if (file->f_path.dentry == NULL) {
472 cERROR(1, ("no valid name if dentry freed")); 473 cERROR(1, "no valid name if dentry freed");
473 dump_stack(); 474 dump_stack();
474 rc = -EBADF; 475 rc = -EBADF;
475 goto reopen_error_exit; 476 goto reopen_error_exit;
@@ -477,7 +478,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
477 478
478 inode = file->f_path.dentry->d_inode; 479 inode = file->f_path.dentry->d_inode;
479 if (inode == NULL) { 480 if (inode == NULL) {
480 cERROR(1, ("inode not valid")); 481 cERROR(1, "inode not valid");
481 dump_stack(); 482 dump_stack();
482 rc = -EBADF; 483 rc = -EBADF;
483 goto reopen_error_exit; 484 goto reopen_error_exit;
@@ -499,8 +500,8 @@ reopen_error_exit:
499 return rc; 500 return rc;
500 } 501 }
501 502
502 cFYI(1, ("inode = 0x%p file flags 0x%x for %s", 503 cFYI(1, "inode = 0x%p file flags 0x%x for %s",
503 inode, file->f_flags, full_path)); 504 inode, file->f_flags, full_path);
504 505
505 if (oplockEnabled) 506 if (oplockEnabled)
506 oplock = REQ_OPLOCK; 507 oplock = REQ_OPLOCK;
@@ -513,10 +514,11 @@ reopen_error_exit:
513 int oflags = (int) cifs_posix_convert_flags(file->f_flags); 514 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
514 /* can not refresh inode info since size could be stale */ 515 /* can not refresh inode info since size could be stale */
515 rc = cifs_posix_open(full_path, NULL, file->f_path.mnt, 516 rc = cifs_posix_open(full_path, NULL, file->f_path.mnt,
516 cifs_sb->mnt_file_mode /* ignored */, 517 inode->i_sb,
517 oflags, &oplock, &netfid, xid); 518 cifs_sb->mnt_file_mode /* ignored */,
519 oflags, &oplock, &netfid, xid);
518 if (rc == 0) { 520 if (rc == 0) {
519 cFYI(1, ("posix reopen succeeded")); 521 cFYI(1, "posix reopen succeeded");
520 goto reopen_success; 522 goto reopen_success;
521 } 523 }
522 /* fallthrough to retry open the old way on errors, especially 524 /* fallthrough to retry open the old way on errors, especially
@@ -537,8 +539,8 @@ reopen_error_exit:
537 CIFS_MOUNT_MAP_SPECIAL_CHR); 539 CIFS_MOUNT_MAP_SPECIAL_CHR);
538 if (rc) { 540 if (rc) {
539 mutex_unlock(&pCifsFile->fh_mutex); 541 mutex_unlock(&pCifsFile->fh_mutex);
540 cFYI(1, ("cifs_open returned 0x%x", rc)); 542 cFYI(1, "cifs_open returned 0x%x", rc);
541 cFYI(1, ("oplock: %d", oplock)); 543 cFYI(1, "oplock: %d", oplock);
542 } else { 544 } else {
543reopen_success: 545reopen_success:
544 pCifsFile->netfid = netfid; 546 pCifsFile->netfid = netfid;
@@ -570,8 +572,8 @@ reopen_success:
570 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { 572 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
571 pCifsInode->clientCanCacheAll = true; 573 pCifsInode->clientCanCacheAll = true;
572 pCifsInode->clientCanCacheRead = true; 574 pCifsInode->clientCanCacheRead = true;
573 cFYI(1, ("Exclusive Oplock granted on inode %p", 575 cFYI(1, "Exclusive Oplock granted on inode %p",
574 file->f_path.dentry->d_inode)); 576 file->f_path.dentry->d_inode);
575 } else if ((oplock & 0xF) == OPLOCK_READ) { 577 } else if ((oplock & 0xF) == OPLOCK_READ) {
576 pCifsInode->clientCanCacheRead = true; 578 pCifsInode->clientCanCacheRead = true;
577 pCifsInode->clientCanCacheAll = false; 579 pCifsInode->clientCanCacheAll = false;
@@ -619,8 +621,7 @@ int cifs_close(struct inode *inode, struct file *file)
619 the struct would be in each open file, 621 the struct would be in each open file,
620 but this should give enough time to 622 but this should give enough time to
621 clear the socket */ 623 clear the socket */
622 cFYI(DBG2, 624 cFYI(DBG2, "close delay, write pending");
623 ("close delay, write pending"));
624 msleep(timeout); 625 msleep(timeout);
625 timeout *= 4; 626 timeout *= 4;
626 } 627 }
@@ -653,7 +654,7 @@ int cifs_close(struct inode *inode, struct file *file)
653 654
654 read_lock(&GlobalSMBSeslock); 655 read_lock(&GlobalSMBSeslock);
655 if (list_empty(&(CIFS_I(inode)->openFileList))) { 656 if (list_empty(&(CIFS_I(inode)->openFileList))) {
656 cFYI(1, ("closing last open instance for inode %p", inode)); 657 cFYI(1, "closing last open instance for inode %p", inode);
657 /* if the file is not open we do not know if we can cache info 658 /* if the file is not open we do not know if we can cache info
658 on this inode, much less write behind and read ahead */ 659 on this inode, much less write behind and read ahead */
659 CIFS_I(inode)->clientCanCacheRead = false; 660 CIFS_I(inode)->clientCanCacheRead = false;
@@ -674,7 +675,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
674 (struct cifsFileInfo *)file->private_data; 675 (struct cifsFileInfo *)file->private_data;
675 char *ptmp; 676 char *ptmp;
676 677
677 cFYI(1, ("Closedir inode = 0x%p", inode)); 678 cFYI(1, "Closedir inode = 0x%p", inode);
678 679
679 xid = GetXid(); 680 xid = GetXid();
680 681
@@ -685,22 +686,22 @@ int cifs_closedir(struct inode *inode, struct file *file)
685 686
686 pTcon = cifs_sb->tcon; 687 pTcon = cifs_sb->tcon;
687 688
688 cFYI(1, ("Freeing private data in close dir")); 689 cFYI(1, "Freeing private data in close dir");
689 write_lock(&GlobalSMBSeslock); 690 write_lock(&GlobalSMBSeslock);
690 if (!pCFileStruct->srch_inf.endOfSearch && 691 if (!pCFileStruct->srch_inf.endOfSearch &&
691 !pCFileStruct->invalidHandle) { 692 !pCFileStruct->invalidHandle) {
692 pCFileStruct->invalidHandle = true; 693 pCFileStruct->invalidHandle = true;
693 write_unlock(&GlobalSMBSeslock); 694 write_unlock(&GlobalSMBSeslock);
694 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid); 695 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
695 cFYI(1, ("Closing uncompleted readdir with rc %d", 696 cFYI(1, "Closing uncompleted readdir with rc %d",
696 rc)); 697 rc);
697 /* not much we can do if it fails anyway, ignore rc */ 698 /* not much we can do if it fails anyway, ignore rc */
698 rc = 0; 699 rc = 0;
699 } else 700 } else
700 write_unlock(&GlobalSMBSeslock); 701 write_unlock(&GlobalSMBSeslock);
701 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start; 702 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
702 if (ptmp) { 703 if (ptmp) {
703 cFYI(1, ("closedir free smb buf in srch struct")); 704 cFYI(1, "closedir free smb buf in srch struct");
704 pCFileStruct->srch_inf.ntwrk_buf_start = NULL; 705 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
705 if (pCFileStruct->srch_inf.smallBuf) 706 if (pCFileStruct->srch_inf.smallBuf)
706 cifs_small_buf_release(ptmp); 707 cifs_small_buf_release(ptmp);
@@ -748,49 +749,49 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
748 rc = -EACCES; 749 rc = -EACCES;
749 xid = GetXid(); 750 xid = GetXid();
750 751
751 cFYI(1, ("Lock parm: 0x%x flockflags: " 752 cFYI(1, "Lock parm: 0x%x flockflags: "
752 "0x%x flocktype: 0x%x start: %lld end: %lld", 753 "0x%x flocktype: 0x%x start: %lld end: %lld",
753 cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start, 754 cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start,
754 pfLock->fl_end)); 755 pfLock->fl_end);
755 756
756 if (pfLock->fl_flags & FL_POSIX) 757 if (pfLock->fl_flags & FL_POSIX)
757 cFYI(1, ("Posix")); 758 cFYI(1, "Posix");
758 if (pfLock->fl_flags & FL_FLOCK) 759 if (pfLock->fl_flags & FL_FLOCK)
759 cFYI(1, ("Flock")); 760 cFYI(1, "Flock");
760 if (pfLock->fl_flags & FL_SLEEP) { 761 if (pfLock->fl_flags & FL_SLEEP) {
761 cFYI(1, ("Blocking lock")); 762 cFYI(1, "Blocking lock");
762 wait_flag = true; 763 wait_flag = true;
763 } 764 }
764 if (pfLock->fl_flags & FL_ACCESS) 765 if (pfLock->fl_flags & FL_ACCESS)
765 cFYI(1, ("Process suspended by mandatory locking - " 766 cFYI(1, "Process suspended by mandatory locking - "
766 "not implemented yet")); 767 "not implemented yet");
767 if (pfLock->fl_flags & FL_LEASE) 768 if (pfLock->fl_flags & FL_LEASE)
768 cFYI(1, ("Lease on file - not implemented yet")); 769 cFYI(1, "Lease on file - not implemented yet");
769 if (pfLock->fl_flags & 770 if (pfLock->fl_flags &
770 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE))) 771 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
771 cFYI(1, ("Unknown lock flags 0x%x", pfLock->fl_flags)); 772 cFYI(1, "Unknown lock flags 0x%x", pfLock->fl_flags);
772 773
773 if (pfLock->fl_type == F_WRLCK) { 774 if (pfLock->fl_type == F_WRLCK) {
774 cFYI(1, ("F_WRLCK ")); 775 cFYI(1, "F_WRLCK ");
775 numLock = 1; 776 numLock = 1;
776 } else if (pfLock->fl_type == F_UNLCK) { 777 } else if (pfLock->fl_type == F_UNLCK) {
777 cFYI(1, ("F_UNLCK")); 778 cFYI(1, "F_UNLCK");
778 numUnlock = 1; 779 numUnlock = 1;
779 /* Check if unlock includes more than 780 /* Check if unlock includes more than
780 one lock range */ 781 one lock range */
781 } else if (pfLock->fl_type == F_RDLCK) { 782 } else if (pfLock->fl_type == F_RDLCK) {
782 cFYI(1, ("F_RDLCK")); 783 cFYI(1, "F_RDLCK");
783 lockType |= LOCKING_ANDX_SHARED_LOCK; 784 lockType |= LOCKING_ANDX_SHARED_LOCK;
784 numLock = 1; 785 numLock = 1;
785 } else if (pfLock->fl_type == F_EXLCK) { 786 } else if (pfLock->fl_type == F_EXLCK) {
786 cFYI(1, ("F_EXLCK")); 787 cFYI(1, "F_EXLCK");
787 numLock = 1; 788 numLock = 1;
788 } else if (pfLock->fl_type == F_SHLCK) { 789 } else if (pfLock->fl_type == F_SHLCK) {
789 cFYI(1, ("F_SHLCK")); 790 cFYI(1, "F_SHLCK");
790 lockType |= LOCKING_ANDX_SHARED_LOCK; 791 lockType |= LOCKING_ANDX_SHARED_LOCK;
791 numLock = 1; 792 numLock = 1;
792 } else 793 } else
793 cFYI(1, ("Unknown type of lock")); 794 cFYI(1, "Unknown type of lock");
794 795
795 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 796 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
796 tcon = cifs_sb->tcon; 797 tcon = cifs_sb->tcon;
@@ -833,8 +834,8 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
833 0 /* wait flag */ ); 834 0 /* wait flag */ );
834 pfLock->fl_type = F_UNLCK; 835 pfLock->fl_type = F_UNLCK;
835 if (rc != 0) 836 if (rc != 0)
836 cERROR(1, ("Error unlocking previously locked " 837 cERROR(1, "Error unlocking previously locked "
837 "range %d during test of lock", rc)); 838 "range %d during test of lock", rc);
838 rc = 0; 839 rc = 0;
839 840
840 } else { 841 } else {
@@ -856,9 +857,9 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
856 0 /* wait flag */); 857 0 /* wait flag */);
857 pfLock->fl_type = F_RDLCK; 858 pfLock->fl_type = F_RDLCK;
858 if (rc != 0) 859 if (rc != 0)
859 cERROR(1, ("Error unlocking " 860 cERROR(1, "Error unlocking "
860 "previously locked range %d " 861 "previously locked range %d "
861 "during test of lock", rc)); 862 "during test of lock", rc);
862 rc = 0; 863 rc = 0;
863 } else { 864 } else {
864 pfLock->fl_type = F_WRLCK; 865 pfLock->fl_type = F_WRLCK;
@@ -923,9 +924,10 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
923 1, 0, li->type, false); 924 1, 0, li->type, false);
924 if (stored_rc) 925 if (stored_rc)
925 rc = stored_rc; 926 rc = stored_rc;
926 927 else {
927 list_del(&li->llist); 928 list_del(&li->llist);
928 kfree(li); 929 kfree(li);
930 }
929 } 931 }
930 } 932 }
931 mutex_unlock(&fid->lock_mutex); 933 mutex_unlock(&fid->lock_mutex);
@@ -988,9 +990,8 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
988 990
989 pTcon = cifs_sb->tcon; 991 pTcon = cifs_sb->tcon;
990 992
991 /* cFYI(1, 993 /* cFYI(1, " write %d bytes to offset %lld of %s", write_size,
992 (" write %d bytes to offset %lld of %s", write_size, 994 *poffset, file->f_path.dentry->d_name.name); */
993 *poffset, file->f_path.dentry->d_name.name)); */
994 995
995 if (file->private_data == NULL) 996 if (file->private_data == NULL)
996 return -EBADF; 997 return -EBADF;
@@ -1091,8 +1092,8 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
1091 1092
1092 pTcon = cifs_sb->tcon; 1093 pTcon = cifs_sb->tcon;
1093 1094
1094 cFYI(1, ("write %zd bytes to offset %lld of %s", write_size, 1095 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1095 *poffset, file->f_path.dentry->d_name.name)); 1096 *poffset, file->f_path.dentry->d_name.name);
1096 1097
1097 if (file->private_data == NULL) 1098 if (file->private_data == NULL)
1098 return -EBADF; 1099 return -EBADF;
@@ -1233,7 +1234,7 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
1233 it being zero) during stress testcases so we need to check for it */ 1234 it being zero) during stress testcases so we need to check for it */
1234 1235
1235 if (cifs_inode == NULL) { 1236 if (cifs_inode == NULL) {
1236 cERROR(1, ("Null inode passed to cifs_writeable_file")); 1237 cERROR(1, "Null inode passed to cifs_writeable_file");
1237 dump_stack(); 1238 dump_stack();
1238 return NULL; 1239 return NULL;
1239 } 1240 }
@@ -1277,7 +1278,7 @@ refind_writable:
1277 again. Note that it would be bad 1278 again. Note that it would be bad
1278 to hold up writepages here (rather than 1279 to hold up writepages here (rather than
1279 in caller) with continuous retries */ 1280 in caller) with continuous retries */
1280 cFYI(1, ("wp failed on reopen file")); 1281 cFYI(1, "wp failed on reopen file");
1281 read_lock(&GlobalSMBSeslock); 1282 read_lock(&GlobalSMBSeslock);
1282 /* can not use this handle, no write 1283 /* can not use this handle, no write
1283 pending on this one after all */ 1284 pending on this one after all */
@@ -1353,7 +1354,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1353 else if (bytes_written < 0) 1354 else if (bytes_written < 0)
1354 rc = bytes_written; 1355 rc = bytes_written;
1355 } else { 1356 } else {
1356 cFYI(1, ("No writeable filehandles for inode")); 1357 cFYI(1, "No writeable filehandles for inode");
1357 rc = -EIO; 1358 rc = -EIO;
1358 } 1359 }
1359 1360
@@ -1525,7 +1526,7 @@ retry:
1525 */ 1526 */
1526 open_file = find_writable_file(CIFS_I(mapping->host)); 1527 open_file = find_writable_file(CIFS_I(mapping->host));
1527 if (!open_file) { 1528 if (!open_file) {
1528 cERROR(1, ("No writable handles for inode")); 1529 cERROR(1, "No writable handles for inode");
1529 rc = -EBADF; 1530 rc = -EBADF;
1530 } else { 1531 } else {
1531 long_op = cifs_write_timeout(cifsi, offset); 1532 long_op = cifs_write_timeout(cifsi, offset);
@@ -1538,8 +1539,8 @@ retry:
1538 cifs_update_eof(cifsi, offset, bytes_written); 1539 cifs_update_eof(cifsi, offset, bytes_written);
1539 1540
1540 if (rc || bytes_written < bytes_to_write) { 1541 if (rc || bytes_written < bytes_to_write) {
1541 cERROR(1, ("Write2 ret %d, wrote %d", 1542 cERROR(1, "Write2 ret %d, wrote %d",
1542 rc, bytes_written)); 1543 rc, bytes_written);
1543 /* BB what if continued retry is 1544 /* BB what if continued retry is
1544 requested via mount flags? */ 1545 requested via mount flags? */
1545 if (rc == -ENOSPC) 1546 if (rc == -ENOSPC)
@@ -1600,7 +1601,7 @@ static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1600/* BB add check for wbc flags */ 1601/* BB add check for wbc flags */
1601 page_cache_get(page); 1602 page_cache_get(page);
1602 if (!PageUptodate(page)) 1603 if (!PageUptodate(page))
1603 cFYI(1, ("ppw - page not up to date")); 1604 cFYI(1, "ppw - page not up to date");
1604 1605
1605 /* 1606 /*
1606 * Set the "writeback" flag, and clear "dirty" in the radix tree. 1607 * Set the "writeback" flag, and clear "dirty" in the radix tree.
@@ -1629,8 +1630,8 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
1629 int rc; 1630 int rc;
1630 struct inode *inode = mapping->host; 1631 struct inode *inode = mapping->host;
1631 1632
1632 cFYI(1, ("write_end for page %p from pos %lld with %d bytes", 1633 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
1633 page, pos, copied)); 1634 page, pos, copied);
1634 1635
1635 if (PageChecked(page)) { 1636 if (PageChecked(page)) {
1636 if (copied == len) 1637 if (copied == len)
@@ -1686,8 +1687,8 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
1686 1687
1687 xid = GetXid(); 1688 xid = GetXid();
1688 1689
1689 cFYI(1, ("Sync file - name: %s datasync: 0x%x", 1690 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1690 dentry->d_name.name, datasync)); 1691 dentry->d_name.name, datasync);
1691 1692
1692 rc = filemap_write_and_wait(inode->i_mapping); 1693 rc = filemap_write_and_wait(inode->i_mapping);
1693 if (rc == 0) { 1694 if (rc == 0) {
@@ -1711,7 +1712,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
1711 unsigned int rpages = 0; 1712 unsigned int rpages = 0;
1712 int rc = 0; 1713 int rc = 0;
1713 1714
1714 cFYI(1, ("sync page %p",page)); 1715 cFYI(1, "sync page %p", page);
1715 mapping = page->mapping; 1716 mapping = page->mapping;
1716 if (!mapping) 1717 if (!mapping)
1717 return 0; 1718 return 0;
@@ -1722,7 +1723,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
1722/* fill in rpages then 1723/* fill in rpages then
1723 result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */ 1724 result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
1724 1725
1725/* cFYI(1, ("rpages is %d for sync page of Index %ld", rpages, index)); 1726/* cFYI(1, "rpages is %d for sync page of Index %ld", rpages, index);
1726 1727
1727#if 0 1728#if 0
1728 if (rc < 0) 1729 if (rc < 0)
@@ -1756,7 +1757,7 @@ int cifs_flush(struct file *file, fl_owner_t id)
1756 CIFS_I(inode)->write_behind_rc = 0; 1757 CIFS_I(inode)->write_behind_rc = 0;
1757 } 1758 }
1758 1759
1759 cFYI(1, ("Flush inode %p file %p rc %d", inode, file, rc)); 1760 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
1760 1761
1761 return rc; 1762 return rc;
1762} 1763}
@@ -1788,7 +1789,7 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
1788 open_file = (struct cifsFileInfo *)file->private_data; 1789 open_file = (struct cifsFileInfo *)file->private_data;
1789 1790
1790 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 1791 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1791 cFYI(1, ("attempting read on write only file instance")); 1792 cFYI(1, "attempting read on write only file instance");
1792 1793
1793 for (total_read = 0, current_offset = read_data; 1794 for (total_read = 0, current_offset = read_data;
1794 read_size > total_read; 1795 read_size > total_read;
@@ -1869,7 +1870,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1869 open_file = (struct cifsFileInfo *)file->private_data; 1870 open_file = (struct cifsFileInfo *)file->private_data;
1870 1871
1871 if ((file->f_flags & O_ACCMODE) == O_WRONLY) 1872 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1872 cFYI(1, ("attempting read on write only file instance")); 1873 cFYI(1, "attempting read on write only file instance");
1873 1874
1874 for (total_read = 0, current_offset = read_data; 1875 for (total_read = 0, current_offset = read_data;
1875 read_size > total_read; 1876 read_size > total_read;
@@ -1920,7 +1921,7 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1920 xid = GetXid(); 1921 xid = GetXid();
1921 rc = cifs_revalidate_file(file); 1922 rc = cifs_revalidate_file(file);
1922 if (rc) { 1923 if (rc) {
1923 cFYI(1, ("Validation prior to mmap failed, error=%d", rc)); 1924 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
1924 FreeXid(xid); 1925 FreeXid(xid);
1925 return rc; 1926 return rc;
1926 } 1927 }
@@ -1931,8 +1932,7 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1931 1932
1932 1933
1933static void cifs_copy_cache_pages(struct address_space *mapping, 1934static void cifs_copy_cache_pages(struct address_space *mapping,
1934 struct list_head *pages, int bytes_read, char *data, 1935 struct list_head *pages, int bytes_read, char *data)
1935 struct pagevec *plru_pvec)
1936{ 1936{
1937 struct page *page; 1937 struct page *page;
1938 char *target; 1938 char *target;
@@ -1944,10 +1944,10 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
1944 page = list_entry(pages->prev, struct page, lru); 1944 page = list_entry(pages->prev, struct page, lru);
1945 list_del(&page->lru); 1945 list_del(&page->lru);
1946 1946
1947 if (add_to_page_cache(page, mapping, page->index, 1947 if (add_to_page_cache_lru(page, mapping, page->index,
1948 GFP_KERNEL)) { 1948 GFP_KERNEL)) {
1949 page_cache_release(page); 1949 page_cache_release(page);
1950 cFYI(1, ("Add page cache failed")); 1950 cFYI(1, "Add page cache failed");
1951 data += PAGE_CACHE_SIZE; 1951 data += PAGE_CACHE_SIZE;
1952 bytes_read -= PAGE_CACHE_SIZE; 1952 bytes_read -= PAGE_CACHE_SIZE;
1953 continue; 1953 continue;
@@ -1970,8 +1970,6 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
1970 flush_dcache_page(page); 1970 flush_dcache_page(page);
1971 SetPageUptodate(page); 1971 SetPageUptodate(page);
1972 unlock_page(page); 1972 unlock_page(page);
1973 if (!pagevec_add(plru_pvec, page))
1974 __pagevec_lru_add_file(plru_pvec);
1975 data += PAGE_CACHE_SIZE; 1973 data += PAGE_CACHE_SIZE;
1976 } 1974 }
1977 return; 1975 return;
@@ -1990,7 +1988,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1990 unsigned int read_size, i; 1988 unsigned int read_size, i;
1991 char *smb_read_data = NULL; 1989 char *smb_read_data = NULL;
1992 struct smb_com_read_rsp *pSMBr; 1990 struct smb_com_read_rsp *pSMBr;
1993 struct pagevec lru_pvec;
1994 struct cifsFileInfo *open_file; 1991 struct cifsFileInfo *open_file;
1995 int buf_type = CIFS_NO_BUFFER; 1992 int buf_type = CIFS_NO_BUFFER;
1996 1993
@@ -2004,8 +2001,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2004 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 2001 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2005 pTcon = cifs_sb->tcon; 2002 pTcon = cifs_sb->tcon;
2006 2003
2007 pagevec_init(&lru_pvec, 0); 2004 cFYI(DBG2, "rpages: num pages %d", num_pages);
2008 cFYI(DBG2, ("rpages: num pages %d", num_pages));
2009 for (i = 0; i < num_pages; ) { 2005 for (i = 0; i < num_pages; ) {
2010 unsigned contig_pages; 2006 unsigned contig_pages;
2011 struct page *tmp_page; 2007 struct page *tmp_page;
@@ -2038,8 +2034,8 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2038 /* Read size needs to be in multiples of one page */ 2034 /* Read size needs to be in multiples of one page */
2039 read_size = min_t(const unsigned int, read_size, 2035 read_size = min_t(const unsigned int, read_size,
2040 cifs_sb->rsize & PAGE_CACHE_MASK); 2036 cifs_sb->rsize & PAGE_CACHE_MASK);
2041 cFYI(DBG2, ("rpages: read size 0x%x contiguous pages %d", 2037 cFYI(DBG2, "rpages: read size 0x%x contiguous pages %d",
2042 read_size, contig_pages)); 2038 read_size, contig_pages);
2043 rc = -EAGAIN; 2039 rc = -EAGAIN;
2044 while (rc == -EAGAIN) { 2040 while (rc == -EAGAIN) {
2045 if ((open_file->invalidHandle) && 2041 if ((open_file->invalidHandle) &&
@@ -2066,14 +2062,14 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2066 } 2062 }
2067 } 2063 }
2068 if ((rc < 0) || (smb_read_data == NULL)) { 2064 if ((rc < 0) || (smb_read_data == NULL)) {
2069 cFYI(1, ("Read error in readpages: %d", rc)); 2065 cFYI(1, "Read error in readpages: %d", rc);
2070 break; 2066 break;
2071 } else if (bytes_read > 0) { 2067 } else if (bytes_read > 0) {
2072 task_io_account_read(bytes_read); 2068 task_io_account_read(bytes_read);
2073 pSMBr = (struct smb_com_read_rsp *)smb_read_data; 2069 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
2074 cifs_copy_cache_pages(mapping, page_list, bytes_read, 2070 cifs_copy_cache_pages(mapping, page_list, bytes_read,
2075 smb_read_data + 4 /* RFC1001 hdr */ + 2071 smb_read_data + 4 /* RFC1001 hdr */ +
2076 le16_to_cpu(pSMBr->DataOffset), &lru_pvec); 2072 le16_to_cpu(pSMBr->DataOffset));
2077 2073
2078 i += bytes_read >> PAGE_CACHE_SHIFT; 2074 i += bytes_read >> PAGE_CACHE_SHIFT;
2079 cifs_stats_bytes_read(pTcon, bytes_read); 2075 cifs_stats_bytes_read(pTcon, bytes_read);
@@ -2089,9 +2085,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2089 /* break; */ 2085 /* break; */
2090 } 2086 }
2091 } else { 2087 } else {
2092 cFYI(1, ("No bytes read (%d) at offset %lld . " 2088 cFYI(1, "No bytes read (%d) at offset %lld . "
2093 "Cleaning remaining pages from readahead list", 2089 "Cleaning remaining pages from readahead list",
2094 bytes_read, offset)); 2090 bytes_read, offset);
2095 /* BB turn off caching and do new lookup on 2091 /* BB turn off caching and do new lookup on
2096 file size at server? */ 2092 file size at server? */
2097 break; 2093 break;
@@ -2106,8 +2102,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2106 bytes_read = 0; 2102 bytes_read = 0;
2107 } 2103 }
2108 2104
2109 pagevec_lru_add_file(&lru_pvec);
2110
2111/* need to free smb_read_data buf before exit */ 2105/* need to free smb_read_data buf before exit */
2112 if (smb_read_data) { 2106 if (smb_read_data) {
2113 if (buf_type == CIFS_SMALL_BUFFER) 2107 if (buf_type == CIFS_SMALL_BUFFER)
@@ -2136,7 +2130,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
2136 if (rc < 0) 2130 if (rc < 0)
2137 goto io_error; 2131 goto io_error;
2138 else 2132 else
2139 cFYI(1, ("Bytes read %d", rc)); 2133 cFYI(1, "Bytes read %d", rc);
2140 2134
2141 file->f_path.dentry->d_inode->i_atime = 2135 file->f_path.dentry->d_inode->i_atime =
2142 current_fs_time(file->f_path.dentry->d_inode->i_sb); 2136 current_fs_time(file->f_path.dentry->d_inode->i_sb);
@@ -2168,8 +2162,8 @@ static int cifs_readpage(struct file *file, struct page *page)
2168 return rc; 2162 return rc;
2169 } 2163 }
2170 2164
2171 cFYI(1, ("readpage %p at offset %d 0x%x\n", 2165 cFYI(1, "readpage %p at offset %d 0x%x\n",
2172 page, (int)offset, (int)offset)); 2166 page, (int)offset, (int)offset);
2173 2167
2174 rc = cifs_readpage_worker(file, page, &offset); 2168 rc = cifs_readpage_worker(file, page, &offset);
2175 2169
@@ -2239,7 +2233,7 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping,
2239 struct page *page; 2233 struct page *page;
2240 int rc = 0; 2234 int rc = 0;
2241 2235
2242 cFYI(1, ("write_begin from %lld len %d", (long long)pos, len)); 2236 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
2243 2237
2244 page = grab_cache_page_write_begin(mapping, index, flags); 2238 page = grab_cache_page_write_begin(mapping, index, flags);
2245 if (!page) { 2239 if (!page) {
@@ -2311,12 +2305,10 @@ cifs_oplock_break(struct slow_work *work)
2311 int rc, waitrc = 0; 2305 int rc, waitrc = 0;
2312 2306
2313 if (inode && S_ISREG(inode->i_mode)) { 2307 if (inode && S_ISREG(inode->i_mode)) {
2314#ifdef CONFIG_CIFS_EXPERIMENTAL 2308 if (cinode->clientCanCacheRead)
2315 if (cinode->clientCanCacheAll == 0)
2316 break_lease(inode, O_RDONLY); 2309 break_lease(inode, O_RDONLY);
2317 else if (cinode->clientCanCacheRead == 0) 2310 else
2318 break_lease(inode, O_WRONLY); 2311 break_lease(inode, O_WRONLY);
2319#endif
2320 rc = filemap_fdatawrite(inode->i_mapping); 2312 rc = filemap_fdatawrite(inode->i_mapping);
2321 if (cinode->clientCanCacheRead == 0) { 2313 if (cinode->clientCanCacheRead == 0) {
2322 waitrc = filemap_fdatawait(inode->i_mapping); 2314 waitrc = filemap_fdatawait(inode->i_mapping);
@@ -2326,7 +2318,7 @@ cifs_oplock_break(struct slow_work *work)
2326 rc = waitrc; 2318 rc = waitrc;
2327 if (rc) 2319 if (rc)
2328 cinode->write_behind_rc = rc; 2320 cinode->write_behind_rc = rc;
2329 cFYI(1, ("Oplock flush inode %p rc %d", inode, rc)); 2321 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
2330 } 2322 }
2331 2323
2332 /* 2324 /*
@@ -2338,7 +2330,7 @@ cifs_oplock_break(struct slow_work *work)
2338 if (!cfile->closePend && !cfile->oplock_break_cancelled) { 2330 if (!cfile->closePend && !cfile->oplock_break_cancelled) {
2339 rc = CIFSSMBLock(0, cifs_sb->tcon, cfile->netfid, 0, 0, 0, 0, 2331 rc = CIFSSMBLock(0, cifs_sb->tcon, cfile->netfid, 0, 0, 0, 0,
2340 LOCKING_ANDX_OPLOCK_RELEASE, false); 2332 LOCKING_ANDX_OPLOCK_RELEASE, false);
2341 cFYI(1, ("Oplock release rc = %d", rc)); 2333 cFYI(1, "Oplock release rc = %d", rc);
2342 } 2334 }
2343} 2335}
2344 2336
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 35ec11716213..62b324f26a56 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/inode.c 2 * fs/cifs/inode.c
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2002,2008 4 * Copyright (C) International Business Machines Corp., 2002,2010
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
@@ -86,30 +86,30 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr)
86{ 86{
87 struct cifsInodeInfo *cifs_i = CIFS_I(inode); 87 struct cifsInodeInfo *cifs_i = CIFS_I(inode);
88 88
89 cFYI(1, ("%s: revalidating inode %llu", __func__, cifs_i->uniqueid)); 89 cFYI(1, "%s: revalidating inode %llu", __func__, cifs_i->uniqueid);
90 90
91 if (inode->i_state & I_NEW) { 91 if (inode->i_state & I_NEW) {
92 cFYI(1, ("%s: inode %llu is new", __func__, cifs_i->uniqueid)); 92 cFYI(1, "%s: inode %llu is new", __func__, cifs_i->uniqueid);
93 return; 93 return;
94 } 94 }
95 95
96 /* don't bother with revalidation if we have an oplock */ 96 /* don't bother with revalidation if we have an oplock */
97 if (cifs_i->clientCanCacheRead) { 97 if (cifs_i->clientCanCacheRead) {
98 cFYI(1, ("%s: inode %llu is oplocked", __func__, 98 cFYI(1, "%s: inode %llu is oplocked", __func__,
99 cifs_i->uniqueid)); 99 cifs_i->uniqueid);
100 return; 100 return;
101 } 101 }
102 102
103 /* revalidate if mtime or size have changed */ 103 /* revalidate if mtime or size have changed */
104 if (timespec_equal(&inode->i_mtime, &fattr->cf_mtime) && 104 if (timespec_equal(&inode->i_mtime, &fattr->cf_mtime) &&
105 cifs_i->server_eof == fattr->cf_eof) { 105 cifs_i->server_eof == fattr->cf_eof) {
106 cFYI(1, ("%s: inode %llu is unchanged", __func__, 106 cFYI(1, "%s: inode %llu is unchanged", __func__,
107 cifs_i->uniqueid)); 107 cifs_i->uniqueid);
108 return; 108 return;
109 } 109 }
110 110
111 cFYI(1, ("%s: invalidating inode %llu mapping", __func__, 111 cFYI(1, "%s: invalidating inode %llu mapping", __func__,
112 cifs_i->uniqueid)); 112 cifs_i->uniqueid);
113 cifs_i->invalid_mapping = true; 113 cifs_i->invalid_mapping = true;
114} 114}
115 115
@@ -137,15 +137,14 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
137 inode->i_mode = fattr->cf_mode; 137 inode->i_mode = fattr->cf_mode;
138 138
139 cifs_i->cifsAttrs = fattr->cf_cifsattrs; 139 cifs_i->cifsAttrs = fattr->cf_cifsattrs;
140 cifs_i->uniqueid = fattr->cf_uniqueid;
141 140
142 if (fattr->cf_flags & CIFS_FATTR_NEED_REVAL) 141 if (fattr->cf_flags & CIFS_FATTR_NEED_REVAL)
143 cifs_i->time = 0; 142 cifs_i->time = 0;
144 else 143 else
145 cifs_i->time = jiffies; 144 cifs_i->time = jiffies;
146 145
147 cFYI(1, ("inode 0x%p old_time=%ld new_time=%ld", inode, 146 cFYI(1, "inode 0x%p old_time=%ld new_time=%ld", inode,
148 oldtime, cifs_i->time)); 147 oldtime, cifs_i->time);
149 148
150 cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING; 149 cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING;
151 150
@@ -170,6 +169,17 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
170 cifs_set_ops(inode, fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL); 169 cifs_set_ops(inode, fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL);
171} 170}
172 171
172void
173cifs_fill_uniqueid(struct super_block *sb, struct cifs_fattr *fattr)
174{
175 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
176
177 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
178 return;
179
180 fattr->cf_uniqueid = iunique(sb, ROOT_I);
181}
182
173/* Fill a cifs_fattr struct with info from FILE_UNIX_BASIC_INFO. */ 183/* Fill a cifs_fattr struct with info from FILE_UNIX_BASIC_INFO. */
174void 184void
175cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, 185cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
@@ -227,7 +237,7 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
227 /* safest to call it a file if we do not know */ 237 /* safest to call it a file if we do not know */
228 fattr->cf_mode |= S_IFREG; 238 fattr->cf_mode |= S_IFREG;
229 fattr->cf_dtype = DT_REG; 239 fattr->cf_dtype = DT_REG;
230 cFYI(1, ("unknown type %d", le32_to_cpu(info->Type))); 240 cFYI(1, "unknown type %d", le32_to_cpu(info->Type));
231 break; 241 break;
232 } 242 }
233 243
@@ -256,7 +266,7 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb)
256{ 266{
257 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 267 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
258 268
259 cFYI(1, ("creating fake fattr for DFS referral")); 269 cFYI(1, "creating fake fattr for DFS referral");
260 270
261 memset(fattr, 0, sizeof(*fattr)); 271 memset(fattr, 0, sizeof(*fattr));
262 fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU; 272 fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU;
@@ -305,7 +315,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
305 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 315 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
306 316
307 tcon = cifs_sb->tcon; 317 tcon = cifs_sb->tcon;
308 cFYI(1, ("Getting info on %s", full_path)); 318 cFYI(1, "Getting info on %s", full_path);
309 319
310 /* could have done a find first instead but this returns more info */ 320 /* could have done a find first instead but this returns more info */
311 rc = CIFSSMBUnixQPathInfo(xid, tcon, full_path, &find_data, 321 rc = CIFSSMBUnixQPathInfo(xid, tcon, full_path, &find_data,
@@ -323,6 +333,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
323 333
324 if (*pinode == NULL) { 334 if (*pinode == NULL) {
325 /* get new inode */ 335 /* get new inode */
336 cifs_fill_uniqueid(sb, &fattr);
326 *pinode = cifs_iget(sb, &fattr); 337 *pinode = cifs_iget(sb, &fattr);
327 if (!*pinode) 338 if (!*pinode)
328 rc = -ENOMEM; 339 rc = -ENOMEM;
@@ -373,7 +384,7 @@ cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path,
373 &bytes_read, &pbuf, &buf_type); 384 &bytes_read, &pbuf, &buf_type);
374 if ((rc == 0) && (bytes_read >= 8)) { 385 if ((rc == 0) && (bytes_read >= 8)) {
375 if (memcmp("IntxBLK", pbuf, 8) == 0) { 386 if (memcmp("IntxBLK", pbuf, 8) == 0) {
376 cFYI(1, ("Block device")); 387 cFYI(1, "Block device");
377 fattr->cf_mode |= S_IFBLK; 388 fattr->cf_mode |= S_IFBLK;
378 fattr->cf_dtype = DT_BLK; 389 fattr->cf_dtype = DT_BLK;
379 if (bytes_read == 24) { 390 if (bytes_read == 24) {
@@ -385,7 +396,7 @@ cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path,
385 fattr->cf_rdev = MKDEV(mjr, mnr); 396 fattr->cf_rdev = MKDEV(mjr, mnr);
386 } 397 }
387 } else if (memcmp("IntxCHR", pbuf, 8) == 0) { 398 } else if (memcmp("IntxCHR", pbuf, 8) == 0) {
388 cFYI(1, ("Char device")); 399 cFYI(1, "Char device");
389 fattr->cf_mode |= S_IFCHR; 400 fattr->cf_mode |= S_IFCHR;
390 fattr->cf_dtype = DT_CHR; 401 fattr->cf_dtype = DT_CHR;
391 if (bytes_read == 24) { 402 if (bytes_read == 24) {
@@ -397,7 +408,7 @@ cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path,
397 fattr->cf_rdev = MKDEV(mjr, mnr); 408 fattr->cf_rdev = MKDEV(mjr, mnr);
398 } 409 }
399 } else if (memcmp("IntxLNK", pbuf, 7) == 0) { 410 } else if (memcmp("IntxLNK", pbuf, 7) == 0) {
400 cFYI(1, ("Symlink")); 411 cFYI(1, "Symlink");
401 fattr->cf_mode |= S_IFLNK; 412 fattr->cf_mode |= S_IFLNK;
402 fattr->cf_dtype = DT_LNK; 413 fattr->cf_dtype = DT_LNK;
403 } else { 414 } else {
@@ -439,10 +450,10 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path,
439 else if (rc > 3) { 450 else if (rc > 3) {
440 mode = le32_to_cpu(*((__le32 *)ea_value)); 451 mode = le32_to_cpu(*((__le32 *)ea_value));
441 fattr->cf_mode &= ~SFBITS_MASK; 452 fattr->cf_mode &= ~SFBITS_MASK;
442 cFYI(1, ("special bits 0%o org mode 0%o", mode, 453 cFYI(1, "special bits 0%o org mode 0%o", mode,
443 fattr->cf_mode)); 454 fattr->cf_mode);
444 fattr->cf_mode = (mode & SFBITS_MASK) | fattr->cf_mode; 455 fattr->cf_mode = (mode & SFBITS_MASK) | fattr->cf_mode;
445 cFYI(1, ("special mode bits 0%o", mode)); 456 cFYI(1, "special mode bits 0%o", mode);
446 } 457 }
447 458
448 return 0; 459 return 0;
@@ -548,11 +559,11 @@ int cifs_get_inode_info(struct inode **pinode,
548 struct cifs_fattr fattr; 559 struct cifs_fattr fattr;
549 560
550 pTcon = cifs_sb->tcon; 561 pTcon = cifs_sb->tcon;
551 cFYI(1, ("Getting info on %s", full_path)); 562 cFYI(1, "Getting info on %s", full_path);
552 563
553 if ((pfindData == NULL) && (*pinode != NULL)) { 564 if ((pfindData == NULL) && (*pinode != NULL)) {
554 if (CIFS_I(*pinode)->clientCanCacheRead) { 565 if (CIFS_I(*pinode)->clientCanCacheRead) {
555 cFYI(1, ("No need to revalidate cached inode sizes")); 566 cFYI(1, "No need to revalidate cached inode sizes");
556 return rc; 567 return rc;
557 } 568 }
558 } 569 }
@@ -618,7 +629,7 @@ int cifs_get_inode_info(struct inode **pinode,
618 cifs_sb->mnt_cifs_flags & 629 cifs_sb->mnt_cifs_flags &
619 CIFS_MOUNT_MAP_SPECIAL_CHR); 630 CIFS_MOUNT_MAP_SPECIAL_CHR);
620 if (rc1 || !fattr.cf_uniqueid) { 631 if (rc1 || !fattr.cf_uniqueid) {
621 cFYI(1, ("GetSrvInodeNum rc %d", rc1)); 632 cFYI(1, "GetSrvInodeNum rc %d", rc1);
622 fattr.cf_uniqueid = iunique(sb, ROOT_I); 633 fattr.cf_uniqueid = iunique(sb, ROOT_I);
623 cifs_autodisable_serverino(cifs_sb); 634 cifs_autodisable_serverino(cifs_sb);
624 } 635 }
@@ -634,13 +645,13 @@ int cifs_get_inode_info(struct inode **pinode,
634 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { 645 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
635 tmprc = cifs_sfu_type(&fattr, full_path, cifs_sb, xid); 646 tmprc = cifs_sfu_type(&fattr, full_path, cifs_sb, xid);
636 if (tmprc) 647 if (tmprc)
637 cFYI(1, ("cifs_sfu_type failed: %d", tmprc)); 648 cFYI(1, "cifs_sfu_type failed: %d", tmprc);
638 } 649 }
639 650
640#ifdef CONFIG_CIFS_EXPERIMENTAL 651#ifdef CONFIG_CIFS_EXPERIMENTAL
641 /* fill in 0777 bits from ACL */ 652 /* fill in 0777 bits from ACL */
642 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { 653 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
643 cFYI(1, ("Getting mode bits from ACL")); 654 cFYI(1, "Getting mode bits from ACL");
644 cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path, pfid); 655 cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path, pfid);
645 } 656 }
646#endif 657#endif
@@ -715,6 +726,16 @@ cifs_find_inode(struct inode *inode, void *opaque)
715 if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) 726 if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
716 return 0; 727 return 0;
717 728
729 /*
730 * uh oh -- it's a directory. We can't use it since hardlinked dirs are
731 * verboten. Disable serverino and return it as if it were found, the
732 * caller can discard it, generate a uniqueid and retry the find
733 */
734 if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) {
735 fattr->cf_flags |= CIFS_FATTR_INO_COLLISION;
736 cifs_autodisable_serverino(CIFS_SB(inode->i_sb));
737 }
738
718 return 1; 739 return 1;
719} 740}
720 741
@@ -734,15 +755,22 @@ cifs_iget(struct super_block *sb, struct cifs_fattr *fattr)
734 unsigned long hash; 755 unsigned long hash;
735 struct inode *inode; 756 struct inode *inode;
736 757
737 cFYI(1, ("looking for uniqueid=%llu", fattr->cf_uniqueid)); 758retry_iget5_locked:
759 cFYI(1, "looking for uniqueid=%llu", fattr->cf_uniqueid);
738 760
739 /* hash down to 32-bits on 32-bit arch */ 761 /* hash down to 32-bits on 32-bit arch */
740 hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); 762 hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid);
741 763
742 inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr); 764 inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr);
743
744 /* we have fattrs in hand, update the inode */
745 if (inode) { 765 if (inode) {
766 /* was there a problematic inode number collision? */
767 if (fattr->cf_flags & CIFS_FATTR_INO_COLLISION) {
768 iput(inode);
769 fattr->cf_uniqueid = iunique(sb, ROOT_I);
770 fattr->cf_flags &= ~CIFS_FATTR_INO_COLLISION;
771 goto retry_iget5_locked;
772 }
773
746 cifs_fattr_to_inode(inode, fattr); 774 cifs_fattr_to_inode(inode, fattr);
747 if (sb->s_flags & MS_NOATIME) 775 if (sb->s_flags & MS_NOATIME)
748 inode->i_flags |= S_NOATIME | S_NOCMTIME; 776 inode->i_flags |= S_NOATIME | S_NOCMTIME;
@@ -780,7 +808,7 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
780 return ERR_PTR(-ENOMEM); 808 return ERR_PTR(-ENOMEM);
781 809
782 if (rc && cifs_sb->tcon->ipc) { 810 if (rc && cifs_sb->tcon->ipc) {
783 cFYI(1, ("ipc connection - fake read inode")); 811 cFYI(1, "ipc connection - fake read inode");
784 inode->i_mode |= S_IFDIR; 812 inode->i_mode |= S_IFDIR;
785 inode->i_nlink = 2; 813 inode->i_nlink = 2;
786 inode->i_op = &cifs_ipc_inode_ops; 814 inode->i_op = &cifs_ipc_inode_ops;
@@ -842,7 +870,7 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid,
842 * server times. 870 * server times.
843 */ 871 */
844 if (set_time && (attrs->ia_valid & ATTR_CTIME)) { 872 if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
845 cFYI(1, ("CIFS - CTIME changed")); 873 cFYI(1, "CIFS - CTIME changed");
846 info_buf.ChangeTime = 874 info_buf.ChangeTime =
847 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime)); 875 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
848 } else 876 } else
@@ -877,8 +905,8 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid,
877 goto out; 905 goto out;
878 } 906 }
879 907
880 cFYI(1, ("calling SetFileInfo since SetPathInfo for " 908 cFYI(1, "calling SetFileInfo since SetPathInfo for "
881 "times not supported by this server")); 909 "times not supported by this server");
882 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, 910 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
883 SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, 911 SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
884 CREATE_NOT_DIR, &netfid, &oplock, 912 CREATE_NOT_DIR, &netfid, &oplock,
@@ -1036,7 +1064,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
1036 struct iattr *attrs = NULL; 1064 struct iattr *attrs = NULL;
1037 __u32 dosattr = 0, origattr = 0; 1065 __u32 dosattr = 0, origattr = 0;
1038 1066
1039 cFYI(1, ("cifs_unlink, dir=0x%p, dentry=0x%p", dir, dentry)); 1067 cFYI(1, "cifs_unlink, dir=0x%p, dentry=0x%p", dir, dentry);
1040 1068
1041 xid = GetXid(); 1069 xid = GetXid();
1042 1070
@@ -1055,7 +1083,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
1055 rc = CIFSPOSIXDelFile(xid, tcon, full_path, 1083 rc = CIFSPOSIXDelFile(xid, tcon, full_path,
1056 SMB_POSIX_UNLINK_FILE_TARGET, cifs_sb->local_nls, 1084 SMB_POSIX_UNLINK_FILE_TARGET, cifs_sb->local_nls,
1057 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 1085 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
1058 cFYI(1, ("posix del rc %d", rc)); 1086 cFYI(1, "posix del rc %d", rc);
1059 if ((rc == 0) || (rc == -ENOENT)) 1087 if ((rc == 0) || (rc == -ENOENT))
1060 goto psx_del_no_retry; 1088 goto psx_del_no_retry;
1061 } 1089 }
@@ -1129,7 +1157,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1129 struct inode *newinode = NULL; 1157 struct inode *newinode = NULL;
1130 struct cifs_fattr fattr; 1158 struct cifs_fattr fattr;
1131 1159
1132 cFYI(1, ("In cifs_mkdir, mode = 0x%x inode = 0x%p", mode, inode)); 1160 cFYI(1, "In cifs_mkdir, mode = 0x%x inode = 0x%p", mode, inode);
1133 1161
1134 xid = GetXid(); 1162 xid = GetXid();
1135 1163
@@ -1164,7 +1192,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1164 kfree(pInfo); 1192 kfree(pInfo);
1165 goto mkdir_retry_old; 1193 goto mkdir_retry_old;
1166 } else if (rc) { 1194 } else if (rc) {
1167 cFYI(1, ("posix mkdir returned 0x%x", rc)); 1195 cFYI(1, "posix mkdir returned 0x%x", rc);
1168 d_drop(direntry); 1196 d_drop(direntry);
1169 } else { 1197 } else {
1170 if (pInfo->Type == cpu_to_le32(-1)) { 1198 if (pInfo->Type == cpu_to_le32(-1)) {
@@ -1181,6 +1209,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1181 direntry->d_op = &cifs_dentry_ops; 1209 direntry->d_op = &cifs_dentry_ops;
1182 1210
1183 cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb); 1211 cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb);
1212 cifs_fill_uniqueid(inode->i_sb, &fattr);
1184 newinode = cifs_iget(inode->i_sb, &fattr); 1213 newinode = cifs_iget(inode->i_sb, &fattr);
1185 if (!newinode) { 1214 if (!newinode) {
1186 kfree(pInfo); 1215 kfree(pInfo);
@@ -1190,12 +1219,12 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1190 d_instantiate(direntry, newinode); 1219 d_instantiate(direntry, newinode);
1191 1220
1192#ifdef CONFIG_CIFS_DEBUG2 1221#ifdef CONFIG_CIFS_DEBUG2
1193 cFYI(1, ("instantiated dentry %p %s to inode %p", 1222 cFYI(1, "instantiated dentry %p %s to inode %p",
1194 direntry, direntry->d_name.name, newinode)); 1223 direntry, direntry->d_name.name, newinode);
1195 1224
1196 if (newinode->i_nlink != 2) 1225 if (newinode->i_nlink != 2)
1197 cFYI(1, ("unexpected number of links %d", 1226 cFYI(1, "unexpected number of links %d",
1198 newinode->i_nlink)); 1227 newinode->i_nlink);
1199#endif 1228#endif
1200 } 1229 }
1201 kfree(pInfo); 1230 kfree(pInfo);
@@ -1206,7 +1235,7 @@ mkdir_retry_old:
1206 rc = CIFSSMBMkDir(xid, pTcon, full_path, cifs_sb->local_nls, 1235 rc = CIFSSMBMkDir(xid, pTcon, full_path, cifs_sb->local_nls,
1207 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 1236 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
1208 if (rc) { 1237 if (rc) {
1209 cFYI(1, ("cifs_mkdir returned 0x%x", rc)); 1238 cFYI(1, "cifs_mkdir returned 0x%x", rc);
1210 d_drop(direntry); 1239 d_drop(direntry);
1211 } else { 1240 } else {
1212mkdir_get_info: 1241mkdir_get_info:
@@ -1309,7 +1338,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
1309 char *full_path = NULL; 1338 char *full_path = NULL;
1310 struct cifsInodeInfo *cifsInode; 1339 struct cifsInodeInfo *cifsInode;
1311 1340
1312 cFYI(1, ("cifs_rmdir, inode = 0x%p", inode)); 1341 cFYI(1, "cifs_rmdir, inode = 0x%p", inode);
1313 1342
1314 xid = GetXid(); 1343 xid = GetXid();
1315 1344
@@ -1511,6 +1540,11 @@ cifs_inode_needs_reval(struct inode *inode)
1511 if (time_after_eq(jiffies, cifs_i->time + HZ)) 1540 if (time_after_eq(jiffies, cifs_i->time + HZ))
1512 return true; 1541 return true;
1513 1542
1543 /* hardlinked files w/ noserverino get "special" treatment */
1544 if (!(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
1545 S_ISREG(inode->i_mode) && inode->i_nlink != 1)
1546 return true;
1547
1514 return false; 1548 return false;
1515} 1549}
1516 1550
@@ -1577,9 +1611,9 @@ int cifs_revalidate_dentry(struct dentry *dentry)
1577 goto check_inval; 1611 goto check_inval;
1578 } 1612 }
1579 1613
1580 cFYI(1, ("Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld " 1614 cFYI(1, "Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld "
1581 "jiffies %ld", full_path, inode, inode->i_count.counter, 1615 "jiffies %ld", full_path, inode, inode->i_count.counter,
1582 dentry, dentry->d_time, jiffies)); 1616 dentry, dentry->d_time, jiffies);
1583 1617
1584 if (CIFS_SB(sb)->tcon->unix_ext) 1618 if (CIFS_SB(sb)->tcon->unix_ext)
1585 rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid); 1619 rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid);
@@ -1673,12 +1707,12 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1673 rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid, 1707 rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid,
1674 npid, false); 1708 npid, false);
1675 cifsFileInfo_put(open_file); 1709 cifsFileInfo_put(open_file);
1676 cFYI(1, ("SetFSize for attrs rc = %d", rc)); 1710 cFYI(1, "SetFSize for attrs rc = %d", rc);
1677 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 1711 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1678 unsigned int bytes_written; 1712 unsigned int bytes_written;
1679 rc = CIFSSMBWrite(xid, pTcon, nfid, 0, attrs->ia_size, 1713 rc = CIFSSMBWrite(xid, pTcon, nfid, 0, attrs->ia_size,
1680 &bytes_written, NULL, NULL, 1); 1714 &bytes_written, NULL, NULL, 1);
1681 cFYI(1, ("Wrt seteof rc %d", rc)); 1715 cFYI(1, "Wrt seteof rc %d", rc);
1682 } 1716 }
1683 } else 1717 } else
1684 rc = -EINVAL; 1718 rc = -EINVAL;
@@ -1692,7 +1726,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1692 false, cifs_sb->local_nls, 1726 false, cifs_sb->local_nls,
1693 cifs_sb->mnt_cifs_flags & 1727 cifs_sb->mnt_cifs_flags &
1694 CIFS_MOUNT_MAP_SPECIAL_CHR); 1728 CIFS_MOUNT_MAP_SPECIAL_CHR);
1695 cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc)); 1729 cFYI(1, "SetEOF by path (setattrs) rc = %d", rc);
1696 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 1730 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1697 __u16 netfid; 1731 __u16 netfid;
1698 int oplock = 0; 1732 int oplock = 0;
@@ -1709,7 +1743,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1709 attrs->ia_size, 1743 attrs->ia_size,
1710 &bytes_written, NULL, 1744 &bytes_written, NULL,
1711 NULL, 1); 1745 NULL, 1);
1712 cFYI(1, ("wrt seteof rc %d", rc)); 1746 cFYI(1, "wrt seteof rc %d", rc);
1713 CIFSSMBClose(xid, pTcon, netfid); 1747 CIFSSMBClose(xid, pTcon, netfid);
1714 } 1748 }
1715 } 1749 }
@@ -1737,8 +1771,8 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1737 struct cifs_unix_set_info_args *args = NULL; 1771 struct cifs_unix_set_info_args *args = NULL;
1738 struct cifsFileInfo *open_file; 1772 struct cifsFileInfo *open_file;
1739 1773
1740 cFYI(1, ("setattr_unix on file %s attrs->ia_valid=0x%x", 1774 cFYI(1, "setattr_unix on file %s attrs->ia_valid=0x%x",
1741 direntry->d_name.name, attrs->ia_valid)); 1775 direntry->d_name.name, attrs->ia_valid);
1742 1776
1743 xid = GetXid(); 1777 xid = GetXid();
1744 1778
@@ -1868,8 +1902,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
1868 1902
1869 xid = GetXid(); 1903 xid = GetXid();
1870 1904
1871 cFYI(1, ("setattr on file %s attrs->iavalid 0x%x", 1905 cFYI(1, "setattr on file %s attrs->iavalid 0x%x",
1872 direntry->d_name.name, attrs->ia_valid)); 1906 direntry->d_name.name, attrs->ia_valid);
1873 1907
1874 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { 1908 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
1875 /* check if we have permission to change attrs */ 1909 /* check if we have permission to change attrs */
@@ -1926,7 +1960,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
1926 attrs->ia_valid &= ~ATTR_MODE; 1960 attrs->ia_valid &= ~ATTR_MODE;
1927 1961
1928 if (attrs->ia_valid & ATTR_MODE) { 1962 if (attrs->ia_valid & ATTR_MODE) {
1929 cFYI(1, ("Mode changed to 0%o", attrs->ia_mode)); 1963 cFYI(1, "Mode changed to 0%o", attrs->ia_mode);
1930 mode = attrs->ia_mode; 1964 mode = attrs->ia_mode;
1931 } 1965 }
1932 1966
@@ -2012,7 +2046,7 @@ cifs_setattr(struct dentry *direntry, struct iattr *attrs)
2012#if 0 2046#if 0
2013void cifs_delete_inode(struct inode *inode) 2047void cifs_delete_inode(struct inode *inode)
2014{ 2048{
2015 cFYI(1, ("In cifs_delete_inode, inode = 0x%p", inode)); 2049 cFYI(1, "In cifs_delete_inode, inode = 0x%p", inode);
2016 /* may have to add back in if and when safe distributed caching of 2050 /* may have to add back in if and when safe distributed caching of
2017 directories added e.g. via FindNotify */ 2051 directories added e.g. via FindNotify */
2018} 2052}
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index f94650683a00..505926f1ee6b 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -47,7 +47,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
47 47
48 xid = GetXid(); 48 xid = GetXid();
49 49
50 cFYI(1, ("ioctl file %p cmd %u arg %lu", filep, command, arg)); 50 cFYI(1, "ioctl file %p cmd %u arg %lu", filep, command, arg);
51 51
52 cifs_sb = CIFS_SB(inode->i_sb); 52 cifs_sb = CIFS_SB(inode->i_sb);
53 53
@@ -64,12 +64,12 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
64 64
65 switch (command) { 65 switch (command) {
66 case CIFS_IOC_CHECKUMOUNT: 66 case CIFS_IOC_CHECKUMOUNT:
67 cFYI(1, ("User unmount attempted")); 67 cFYI(1, "User unmount attempted");
68 if (cifs_sb->mnt_uid == current_uid()) 68 if (cifs_sb->mnt_uid == current_uid())
69 rc = 0; 69 rc = 0;
70 else { 70 else {
71 rc = -EACCES; 71 rc = -EACCES;
72 cFYI(1, ("uids do not match")); 72 cFYI(1, "uids do not match");
73 } 73 }
74 break; 74 break;
75#ifdef CONFIG_CIFS_POSIX 75#ifdef CONFIG_CIFS_POSIX
@@ -97,11 +97,11 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
97 /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid, 97 /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid,
98 extAttrBits, &ExtAttrMask);*/ 98 extAttrBits, &ExtAttrMask);*/
99 } 99 }
100 cFYI(1, ("set flags not implemented yet")); 100 cFYI(1, "set flags not implemented yet");
101 break; 101 break;
102#endif /* CONFIG_CIFS_POSIX */ 102#endif /* CONFIG_CIFS_POSIX */
103 default: 103 default:
104 cFYI(1, ("unsupported ioctl")); 104 cFYI(1, "unsupported ioctl");
105 break; 105 break;
106 } 106 }
107 107
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index c1a9d4236a8c..473ca8033656 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -139,7 +139,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
139 if (!full_path) 139 if (!full_path)
140 goto out; 140 goto out;
141 141
142 cFYI(1, ("Full path: %s inode = 0x%p", full_path, inode)); 142 cFYI(1, "Full path: %s inode = 0x%p", full_path, inode);
143 143
144 rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, &target_path, 144 rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, &target_path,
145 cifs_sb->local_nls); 145 cifs_sb->local_nls);
@@ -178,8 +178,8 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
178 return rc; 178 return rc;
179 } 179 }
180 180
181 cFYI(1, ("Full path: %s", full_path)); 181 cFYI(1, "Full path: %s", full_path);
182 cFYI(1, ("symname is %s", symname)); 182 cFYI(1, "symname is %s", symname);
183 183
184 /* BB what if DFS and this volume is on different share? BB */ 184 /* BB what if DFS and this volume is on different share? BB */
185 if (pTcon->unix_ext) 185 if (pTcon->unix_ext)
@@ -198,8 +198,8 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
198 inode->i_sb, xid, NULL); 198 inode->i_sb, xid, NULL);
199 199
200 if (rc != 0) { 200 if (rc != 0) {
201 cFYI(1, ("Create symlink ok, getinodeinfo fail rc = %d", 201 cFYI(1, "Create symlink ok, getinodeinfo fail rc = %d",
202 rc)); 202 rc);
203 } else { 203 } else {
204 if (pTcon->nocase) 204 if (pTcon->nocase)
205 direntry->d_op = &cifs_ci_dentry_ops; 205 direntry->d_op = &cifs_ci_dentry_ops;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index d1474996a812..1394aa37f26c 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -51,7 +51,7 @@ _GetXid(void)
51 if (GlobalTotalActiveXid > GlobalMaxActiveXid) 51 if (GlobalTotalActiveXid > GlobalMaxActiveXid)
52 GlobalMaxActiveXid = GlobalTotalActiveXid; 52 GlobalMaxActiveXid = GlobalTotalActiveXid;
53 if (GlobalTotalActiveXid > 65000) 53 if (GlobalTotalActiveXid > 65000)
54 cFYI(1, ("warning: more than 65000 requests active")); 54 cFYI(1, "warning: more than 65000 requests active");
55 xid = GlobalCurrentXid++; 55 xid = GlobalCurrentXid++;
56 spin_unlock(&GlobalMid_Lock); 56 spin_unlock(&GlobalMid_Lock);
57 return xid; 57 return xid;
@@ -88,7 +88,7 @@ void
88sesInfoFree(struct cifsSesInfo *buf_to_free) 88sesInfoFree(struct cifsSesInfo *buf_to_free)
89{ 89{
90 if (buf_to_free == NULL) { 90 if (buf_to_free == NULL) {
91 cFYI(1, ("Null buffer passed to sesInfoFree")); 91 cFYI(1, "Null buffer passed to sesInfoFree");
92 return; 92 return;
93 } 93 }
94 94
@@ -126,7 +126,7 @@ void
126tconInfoFree(struct cifsTconInfo *buf_to_free) 126tconInfoFree(struct cifsTconInfo *buf_to_free)
127{ 127{
128 if (buf_to_free == NULL) { 128 if (buf_to_free == NULL) {
129 cFYI(1, ("Null buffer passed to tconInfoFree")); 129 cFYI(1, "Null buffer passed to tconInfoFree");
130 return; 130 return;
131 } 131 }
132 atomic_dec(&tconInfoAllocCount); 132 atomic_dec(&tconInfoAllocCount);
@@ -166,7 +166,7 @@ void
166cifs_buf_release(void *buf_to_free) 166cifs_buf_release(void *buf_to_free)
167{ 167{
168 if (buf_to_free == NULL) { 168 if (buf_to_free == NULL) {
169 /* cFYI(1, ("Null buffer passed to cifs_buf_release"));*/ 169 /* cFYI(1, "Null buffer passed to cifs_buf_release");*/
170 return; 170 return;
171 } 171 }
172 mempool_free(buf_to_free, cifs_req_poolp); 172 mempool_free(buf_to_free, cifs_req_poolp);
@@ -202,7 +202,7 @@ cifs_small_buf_release(void *buf_to_free)
202{ 202{
203 203
204 if (buf_to_free == NULL) { 204 if (buf_to_free == NULL) {
205 cFYI(1, ("Null buffer passed to cifs_small_buf_release")); 205 cFYI(1, "Null buffer passed to cifs_small_buf_release");
206 return; 206 return;
207 } 207 }
208 mempool_free(buf_to_free, cifs_sm_req_poolp); 208 mempool_free(buf_to_free, cifs_sm_req_poolp);
@@ -345,19 +345,19 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
345 /* with userid/password pairs found on the smb session */ 345 /* with userid/password pairs found on the smb session */
346 /* for other target tcp/ip addresses BB */ 346 /* for other target tcp/ip addresses BB */
347 if (current_fsuid() != treeCon->ses->linux_uid) { 347 if (current_fsuid() != treeCon->ses->linux_uid) {
348 cFYI(1, ("Multiuser mode and UID " 348 cFYI(1, "Multiuser mode and UID "
349 "did not match tcon uid")); 349 "did not match tcon uid");
350 read_lock(&cifs_tcp_ses_lock); 350 read_lock(&cifs_tcp_ses_lock);
351 list_for_each(temp_item, &treeCon->ses->server->smb_ses_list) { 351 list_for_each(temp_item, &treeCon->ses->server->smb_ses_list) {
352 ses = list_entry(temp_item, struct cifsSesInfo, smb_ses_list); 352 ses = list_entry(temp_item, struct cifsSesInfo, smb_ses_list);
353 if (ses->linux_uid == current_fsuid()) { 353 if (ses->linux_uid == current_fsuid()) {
354 if (ses->server == treeCon->ses->server) { 354 if (ses->server == treeCon->ses->server) {
355 cFYI(1, ("found matching uid substitute right smb_uid")); 355 cFYI(1, "found matching uid substitute right smb_uid");
356 buffer->Uid = ses->Suid; 356 buffer->Uid = ses->Suid;
357 break; 357 break;
358 } else { 358 } else {
359 /* BB eventually call cifs_setup_session here */ 359 /* BB eventually call cifs_setup_session here */
360 cFYI(1, ("local UID found but no smb sess with this server exists")); 360 cFYI(1, "local UID found but no smb sess with this server exists");
361 } 361 }
362 } 362 }
363 } 363 }
@@ -394,17 +394,16 @@ checkSMBhdr(struct smb_hdr *smb, __u16 mid)
394 if (smb->Command == SMB_COM_LOCKING_ANDX) 394 if (smb->Command == SMB_COM_LOCKING_ANDX)
395 return 0; 395 return 0;
396 else 396 else
397 cERROR(1, ("Received Request not response")); 397 cERROR(1, "Received Request not response");
398 } 398 }
399 } else { /* bad signature or mid */ 399 } else { /* bad signature or mid */
400 if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff)) 400 if (*(__le32 *) smb->Protocol != cpu_to_le32(0x424d53ff))
401 cERROR(1, 401 cERROR(1, "Bad protocol string signature header %x",
402 ("Bad protocol string signature header %x", 402 *(unsigned int *) smb->Protocol);
403 *(unsigned int *) smb->Protocol));
404 if (mid != smb->Mid) 403 if (mid != smb->Mid)
405 cERROR(1, ("Mids do not match")); 404 cERROR(1, "Mids do not match");
406 } 405 }
407 cERROR(1, ("bad smb detected. The Mid=%d", smb->Mid)); 406 cERROR(1, "bad smb detected. The Mid=%d", smb->Mid);
408 return 1; 407 return 1;
409} 408}
410 409
@@ -413,7 +412,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
413{ 412{
414 __u32 len = smb->smb_buf_length; 413 __u32 len = smb->smb_buf_length;
415 __u32 clc_len; /* calculated length */ 414 __u32 clc_len; /* calculated length */
416 cFYI(0, ("checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len)); 415 cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len);
417 416
418 if (length < 2 + sizeof(struct smb_hdr)) { 417 if (length < 2 + sizeof(struct smb_hdr)) {
419 if ((length >= sizeof(struct smb_hdr) - 1) 418 if ((length >= sizeof(struct smb_hdr) - 1)
@@ -437,15 +436,15 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
437 tmp[sizeof(struct smb_hdr)+1] = 0; 436 tmp[sizeof(struct smb_hdr)+1] = 0;
438 return 0; 437 return 0;
439 } 438 }
440 cERROR(1, ("rcvd invalid byte count (bcc)")); 439 cERROR(1, "rcvd invalid byte count (bcc)");
441 } else { 440 } else {
442 cERROR(1, ("Length less than smb header size")); 441 cERROR(1, "Length less than smb header size");
443 } 442 }
444 return 1; 443 return 1;
445 } 444 }
446 if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 445 if (len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
447 cERROR(1, ("smb length greater than MaxBufSize, mid=%d", 446 cERROR(1, "smb length greater than MaxBufSize, mid=%d",
448 smb->Mid)); 447 smb->Mid);
449 return 1; 448 return 1;
450 } 449 }
451 450
@@ -454,8 +453,8 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
454 clc_len = smbCalcSize_LE(smb); 453 clc_len = smbCalcSize_LE(smb);
455 454
456 if (4 + len != length) { 455 if (4 + len != length) {
457 cERROR(1, ("Length read does not match RFC1001 length %d", 456 cERROR(1, "Length read does not match RFC1001 length %d",
458 len)); 457 len);
459 return 1; 458 return 1;
460 } 459 }
461 460
@@ -466,8 +465,8 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
466 if (((4 + len) & 0xFFFF) == (clc_len & 0xFFFF)) 465 if (((4 + len) & 0xFFFF) == (clc_len & 0xFFFF))
467 return 0; /* bcc wrapped */ 466 return 0; /* bcc wrapped */
468 } 467 }
469 cFYI(1, ("Calculated size %d vs length %d mismatch for mid %d", 468 cFYI(1, "Calculated size %d vs length %d mismatch for mid %d",
470 clc_len, 4 + len, smb->Mid)); 469 clc_len, 4 + len, smb->Mid);
471 /* Windows XP can return a few bytes too much, presumably 470 /* Windows XP can return a few bytes too much, presumably
472 an illegal pad, at the end of byte range lock responses 471 an illegal pad, at the end of byte range lock responses
473 so we allow for that three byte pad, as long as actual 472 so we allow for that three byte pad, as long as actual
@@ -482,8 +481,8 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
482 if ((4+len > clc_len) && (len <= clc_len + 512)) 481 if ((4+len > clc_len) && (len <= clc_len + 512))
483 return 0; 482 return 0;
484 else { 483 else {
485 cERROR(1, ("RFC1001 size %d bigger than SMB for Mid=%d", 484 cERROR(1, "RFC1001 size %d bigger than SMB for Mid=%d",
486 len, smb->Mid)); 485 len, smb->Mid);
487 return 1; 486 return 1;
488 } 487 }
489 } 488 }
@@ -501,7 +500,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
501 struct cifsFileInfo *netfile; 500 struct cifsFileInfo *netfile;
502 int rc; 501 int rc;
503 502
504 cFYI(1, ("Checking for oplock break or dnotify response")); 503 cFYI(1, "Checking for oplock break or dnotify response");
505 if ((pSMB->hdr.Command == SMB_COM_NT_TRANSACT) && 504 if ((pSMB->hdr.Command == SMB_COM_NT_TRANSACT) &&
506 (pSMB->hdr.Flags & SMBFLG_RESPONSE)) { 505 (pSMB->hdr.Flags & SMBFLG_RESPONSE)) {
507 struct smb_com_transaction_change_notify_rsp *pSMBr = 506 struct smb_com_transaction_change_notify_rsp *pSMBr =
@@ -513,15 +512,15 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
513 512
514 pnotify = (struct file_notify_information *) 513 pnotify = (struct file_notify_information *)
515 ((char *)&pSMBr->hdr.Protocol + data_offset); 514 ((char *)&pSMBr->hdr.Protocol + data_offset);
516 cFYI(1, ("dnotify on %s Action: 0x%x", 515 cFYI(1, "dnotify on %s Action: 0x%x",
517 pnotify->FileName, pnotify->Action)); 516 pnotify->FileName, pnotify->Action);
518 /* cifs_dump_mem("Rcvd notify Data: ",buf, 517 /* cifs_dump_mem("Rcvd notify Data: ",buf,
519 sizeof(struct smb_hdr)+60); */ 518 sizeof(struct smb_hdr)+60); */
520 return true; 519 return true;
521 } 520 }
522 if (pSMBr->hdr.Status.CifsError) { 521 if (pSMBr->hdr.Status.CifsError) {
523 cFYI(1, ("notify err 0x%d", 522 cFYI(1, "notify err 0x%d",
524 pSMBr->hdr.Status.CifsError)); 523 pSMBr->hdr.Status.CifsError);
525 return true; 524 return true;
526 } 525 }
527 return false; 526 return false;
@@ -535,7 +534,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
535 large dirty files cached on the client */ 534 large dirty files cached on the client */
536 if ((NT_STATUS_INVALID_HANDLE) == 535 if ((NT_STATUS_INVALID_HANDLE) ==
537 le32_to_cpu(pSMB->hdr.Status.CifsError)) { 536 le32_to_cpu(pSMB->hdr.Status.CifsError)) {
538 cFYI(1, ("invalid handle on oplock break")); 537 cFYI(1, "invalid handle on oplock break");
539 return true; 538 return true;
540 } else if (ERRbadfid == 539 } else if (ERRbadfid ==
541 le16_to_cpu(pSMB->hdr.Status.DosError.Error)) { 540 le16_to_cpu(pSMB->hdr.Status.DosError.Error)) {
@@ -547,8 +546,8 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
547 if (pSMB->hdr.WordCount != 8) 546 if (pSMB->hdr.WordCount != 8)
548 return false; 547 return false;
549 548
550 cFYI(1, ("oplock type 0x%d level 0x%d", 549 cFYI(1, "oplock type 0x%d level 0x%d",
551 pSMB->LockType, pSMB->OplockLevel)); 550 pSMB->LockType, pSMB->OplockLevel);
552 if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE)) 551 if (!(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE))
553 return false; 552 return false;
554 553
@@ -579,15 +578,15 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
579 return true; 578 return true;
580 } 579 }
581 580
582 cFYI(1, ("file id match, oplock break")); 581 cFYI(1, "file id match, oplock break");
583 pCifsInode = CIFS_I(netfile->pInode); 582 pCifsInode = CIFS_I(netfile->pInode);
584 pCifsInode->clientCanCacheAll = false; 583 pCifsInode->clientCanCacheAll = false;
585 if (pSMB->OplockLevel == 0) 584 if (pSMB->OplockLevel == 0)
586 pCifsInode->clientCanCacheRead = false; 585 pCifsInode->clientCanCacheRead = false;
587 rc = slow_work_enqueue(&netfile->oplock_break); 586 rc = slow_work_enqueue(&netfile->oplock_break);
588 if (rc) { 587 if (rc) {
589 cERROR(1, ("failed to enqueue oplock " 588 cERROR(1, "failed to enqueue oplock "
590 "break: %d\n", rc)); 589 "break: %d\n", rc);
591 } else { 590 } else {
592 netfile->oplock_break_cancelled = false; 591 netfile->oplock_break_cancelled = false;
593 } 592 }
@@ -597,12 +596,12 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
597 } 596 }
598 read_unlock(&GlobalSMBSeslock); 597 read_unlock(&GlobalSMBSeslock);
599 read_unlock(&cifs_tcp_ses_lock); 598 read_unlock(&cifs_tcp_ses_lock);
600 cFYI(1, ("No matching file for oplock break")); 599 cFYI(1, "No matching file for oplock break");
601 return true; 600 return true;
602 } 601 }
603 } 602 }
604 read_unlock(&cifs_tcp_ses_lock); 603 read_unlock(&cifs_tcp_ses_lock);
605 cFYI(1, ("Can not process oplock break for non-existent connection")); 604 cFYI(1, "Can not process oplock break for non-existent connection");
606 return true; 605 return true;
607} 606}
608 607
@@ -721,11 +720,11 @@ cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb)
721{ 720{
722 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { 721 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
723 cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM; 722 cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM;
724 cERROR(1, ("Autodisabling the use of server inode numbers on " 723 cERROR(1, "Autodisabling the use of server inode numbers on "
725 "%s. This server doesn't seem to support them " 724 "%s. This server doesn't seem to support them "
726 "properly. Hardlinks will not be recognized on this " 725 "properly. Hardlinks will not be recognized on this "
727 "mount. Consider mounting with the \"noserverino\" " 726 "mount. Consider mounting with the \"noserverino\" "
728 "option to silence this message.", 727 "option to silence this message.",
729 cifs_sb->tcon->treeName)); 728 cifs_sb->tcon->treeName);
730 } 729 }
731} 730}
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index bd6d6895730d..d35d52889cb5 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -149,7 +149,7 @@ cifs_inet_pton(const int address_family, const char *cp, void *dst)
149 else if (address_family == AF_INET6) 149 else if (address_family == AF_INET6)
150 ret = in6_pton(cp, -1 /* len */, dst , '\\', NULL); 150 ret = in6_pton(cp, -1 /* len */, dst , '\\', NULL);
151 151
152 cFYI(DBG2, ("address conversion returned %d for %s", ret, cp)); 152 cFYI(DBG2, "address conversion returned %d for %s", ret, cp);
153 if (ret > 0) 153 if (ret > 0)
154 ret = 1; 154 ret = 1;
155 return ret; 155 return ret;
@@ -870,8 +870,8 @@ map_smb_to_linux_error(struct smb_hdr *smb, int logErr)
870 } 870 }
871 /* else ERRHRD class errors or junk - return EIO */ 871 /* else ERRHRD class errors or junk - return EIO */
872 872
873 cFYI(1, ("Mapping smb error code %d to POSIX err %d", 873 cFYI(1, "Mapping smb error code %d to POSIX err %d",
874 smberrcode, rc)); 874 smberrcode, rc);
875 875
876 /* generic corrective action e.g. reconnect SMB session on 876 /* generic corrective action e.g. reconnect SMB session on
877 * ERRbaduid could be added */ 877 * ERRbaduid could be added */
@@ -940,20 +940,20 @@ struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset)
940 SMB_TIME *st = (SMB_TIME *)&time; 940 SMB_TIME *st = (SMB_TIME *)&time;
941 SMB_DATE *sd = (SMB_DATE *)&date; 941 SMB_DATE *sd = (SMB_DATE *)&date;
942 942
943 cFYI(1, ("date %d time %d", date, time)); 943 cFYI(1, "date %d time %d", date, time);
944 944
945 sec = 2 * st->TwoSeconds; 945 sec = 2 * st->TwoSeconds;
946 min = st->Minutes; 946 min = st->Minutes;
947 if ((sec > 59) || (min > 59)) 947 if ((sec > 59) || (min > 59))
948 cERROR(1, ("illegal time min %d sec %d", min, sec)); 948 cERROR(1, "illegal time min %d sec %d", min, sec);
949 sec += (min * 60); 949 sec += (min * 60);
950 sec += 60 * 60 * st->Hours; 950 sec += 60 * 60 * st->Hours;
951 if (st->Hours > 24) 951 if (st->Hours > 24)
952 cERROR(1, ("illegal hours %d", st->Hours)); 952 cERROR(1, "illegal hours %d", st->Hours);
953 days = sd->Day; 953 days = sd->Day;
954 month = sd->Month; 954 month = sd->Month;
955 if ((days > 31) || (month > 12)) { 955 if ((days > 31) || (month > 12)) {
956 cERROR(1, ("illegal date, month %d day: %d", month, days)); 956 cERROR(1, "illegal date, month %d day: %d", month, days);
957 if (month > 12) 957 if (month > 12)
958 month = 12; 958 month = 12;
959 } 959 }
@@ -979,7 +979,7 @@ struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset)
979 979
980 ts.tv_sec = sec + offset; 980 ts.tv_sec = sec + offset;
981 981
982 /* cFYI(1,("sec after cnvrt dos to unix time %d",sec)); */ 982 /* cFYI(1, "sec after cnvrt dos to unix time %d",sec); */
983 983
984 ts.tv_nsec = 0; 984 ts.tv_nsec = 0;
985 return ts; 985 return ts;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 18e0bc1fb593..daf1753af674 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -47,15 +47,15 @@ static void dump_cifs_file_struct(struct file *file, char *label)
47 if (file) { 47 if (file) {
48 cf = file->private_data; 48 cf = file->private_data;
49 if (cf == NULL) { 49 if (cf == NULL) {
50 cFYI(1, ("empty cifs private file data")); 50 cFYI(1, "empty cifs private file data");
51 return; 51 return;
52 } 52 }
53 if (cf->invalidHandle) 53 if (cf->invalidHandle)
54 cFYI(1, ("invalid handle")); 54 cFYI(1, "invalid handle");
55 if (cf->srch_inf.endOfSearch) 55 if (cf->srch_inf.endOfSearch)
56 cFYI(1, ("end of search")); 56 cFYI(1, "end of search");
57 if (cf->srch_inf.emptyDir) 57 if (cf->srch_inf.emptyDir)
58 cFYI(1, ("empty dir")); 58 cFYI(1, "empty dir");
59 } 59 }
60} 60}
61#else 61#else
@@ -76,7 +76,7 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
76 struct inode *inode; 76 struct inode *inode;
77 struct super_block *sb = parent->d_inode->i_sb; 77 struct super_block *sb = parent->d_inode->i_sb;
78 78
79 cFYI(1, ("For %s", name->name)); 79 cFYI(1, "For %s", name->name);
80 80
81 if (parent->d_op && parent->d_op->d_hash) 81 if (parent->d_op && parent->d_op->d_hash)
82 parent->d_op->d_hash(parent, name); 82 parent->d_op->d_hash(parent, name);
@@ -214,7 +214,7 @@ int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb,
214 fid, 214 fid,
215 cifs_sb->local_nls); 215 cifs_sb->local_nls);
216 if (CIFSSMBClose(xid, ptcon, fid)) { 216 if (CIFSSMBClose(xid, ptcon, fid)) {
217 cFYI(1, ("Error closing temporary reparsepoint open)")); 217 cFYI(1, "Error closing temporary reparsepoint open");
218 } 218 }
219 } 219 }
220} 220}
@@ -252,7 +252,7 @@ static int initiate_cifs_search(const int xid, struct file *file)
252 if (full_path == NULL) 252 if (full_path == NULL)
253 return -ENOMEM; 253 return -ENOMEM;
254 254
255 cFYI(1, ("Full path: %s start at: %lld", full_path, file->f_pos)); 255 cFYI(1, "Full path: %s start at: %lld", full_path, file->f_pos);
256 256
257ffirst_retry: 257ffirst_retry:
258 /* test for Unix extensions */ 258 /* test for Unix extensions */
@@ -297,7 +297,7 @@ static int cifs_unicode_bytelen(char *str)
297 if (ustr[len] == 0) 297 if (ustr[len] == 0)
298 return len << 1; 298 return len << 1;
299 } 299 }
300 cFYI(1, ("Unicode string longer than PATH_MAX found")); 300 cFYI(1, "Unicode string longer than PATH_MAX found");
301 return len << 1; 301 return len << 1;
302} 302}
303 303
@@ -314,19 +314,18 @@ static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level)
314 pfData->FileNameLength; 314 pfData->FileNameLength;
315 } else 315 } else
316 new_entry = old_entry + le32_to_cpu(pDirInfo->NextEntryOffset); 316 new_entry = old_entry + le32_to_cpu(pDirInfo->NextEntryOffset);
317 cFYI(1, ("new entry %p old entry %p", new_entry, old_entry)); 317 cFYI(1, "new entry %p old entry %p", new_entry, old_entry);
318 /* validate that new_entry is not past end of SMB */ 318 /* validate that new_entry is not past end of SMB */
319 if (new_entry >= end_of_smb) { 319 if (new_entry >= end_of_smb) {
320 cERROR(1, 320 cERROR(1, "search entry %p began after end of SMB %p old entry %p",
321 ("search entry %p began after end of SMB %p old entry %p", 321 new_entry, end_of_smb, old_entry);
322 new_entry, end_of_smb, old_entry));
323 return NULL; 322 return NULL;
324 } else if (((level == SMB_FIND_FILE_INFO_STANDARD) && 323 } else if (((level == SMB_FIND_FILE_INFO_STANDARD) &&
325 (new_entry + sizeof(FIND_FILE_STANDARD_INFO) > end_of_smb)) 324 (new_entry + sizeof(FIND_FILE_STANDARD_INFO) > end_of_smb))
326 || ((level != SMB_FIND_FILE_INFO_STANDARD) && 325 || ((level != SMB_FIND_FILE_INFO_STANDARD) &&
327 (new_entry + sizeof(FILE_DIRECTORY_INFO) > end_of_smb))) { 326 (new_entry + sizeof(FILE_DIRECTORY_INFO) > end_of_smb))) {
328 cERROR(1, ("search entry %p extends after end of SMB %p", 327 cERROR(1, "search entry %p extends after end of SMB %p",
329 new_entry, end_of_smb)); 328 new_entry, end_of_smb);
330 return NULL; 329 return NULL;
331 } else 330 } else
332 return new_entry; 331 return new_entry;
@@ -380,8 +379,8 @@ static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile)
380 filename = &pFindData->FileName[0]; 379 filename = &pFindData->FileName[0];
381 len = pFindData->FileNameLength; 380 len = pFindData->FileNameLength;
382 } else { 381 } else {
383 cFYI(1, ("Unknown findfirst level %d", 382 cFYI(1, "Unknown findfirst level %d",
384 cfile->srch_inf.info_level)); 383 cfile->srch_inf.info_level);
385 } 384 }
386 385
387 if (filename) { 386 if (filename) {
@@ -481,7 +480,7 @@ static int cifs_save_resume_key(const char *current_entry,
481 len = (unsigned int)pFindData->FileNameLength; 480 len = (unsigned int)pFindData->FileNameLength;
482 cifsFile->srch_inf.resume_key = pFindData->ResumeKey; 481 cifsFile->srch_inf.resume_key = pFindData->ResumeKey;
483 } else { 482 } else {
484 cFYI(1, ("Unknown findfirst level %d", level)); 483 cFYI(1, "Unknown findfirst level %d", level);
485 return -EINVAL; 484 return -EINVAL;
486 } 485 }
487 cifsFile->srch_inf.resume_name_len = len; 486 cifsFile->srch_inf.resume_name_len = len;
@@ -525,7 +524,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
525 is_dir_changed(file)) || 524 is_dir_changed(file)) ||
526 (index_to_find < first_entry_in_buffer)) { 525 (index_to_find < first_entry_in_buffer)) {
527 /* close and restart search */ 526 /* close and restart search */
528 cFYI(1, ("search backing up - close and restart search")); 527 cFYI(1, "search backing up - close and restart search");
529 write_lock(&GlobalSMBSeslock); 528 write_lock(&GlobalSMBSeslock);
530 if (!cifsFile->srch_inf.endOfSearch && 529 if (!cifsFile->srch_inf.endOfSearch &&
531 !cifsFile->invalidHandle) { 530 !cifsFile->invalidHandle) {
@@ -535,7 +534,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
535 } else 534 } else
536 write_unlock(&GlobalSMBSeslock); 535 write_unlock(&GlobalSMBSeslock);
537 if (cifsFile->srch_inf.ntwrk_buf_start) { 536 if (cifsFile->srch_inf.ntwrk_buf_start) {
538 cFYI(1, ("freeing SMB ff cache buf on search rewind")); 537 cFYI(1, "freeing SMB ff cache buf on search rewind");
539 if (cifsFile->srch_inf.smallBuf) 538 if (cifsFile->srch_inf.smallBuf)
540 cifs_small_buf_release(cifsFile->srch_inf. 539 cifs_small_buf_release(cifsFile->srch_inf.
541 ntwrk_buf_start); 540 ntwrk_buf_start);
@@ -546,8 +545,8 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
546 } 545 }
547 rc = initiate_cifs_search(xid, file); 546 rc = initiate_cifs_search(xid, file);
548 if (rc) { 547 if (rc) {
549 cFYI(1, ("error %d reinitiating a search on rewind", 548 cFYI(1, "error %d reinitiating a search on rewind",
550 rc)); 549 rc);
551 return rc; 550 return rc;
552 } 551 }
553 cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile); 552 cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile);
@@ -555,7 +554,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
555 554
556 while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && 555 while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) &&
557 (rc == 0) && !cifsFile->srch_inf.endOfSearch) { 556 (rc == 0) && !cifsFile->srch_inf.endOfSearch) {
558 cFYI(1, ("calling findnext2")); 557 cFYI(1, "calling findnext2");
559 rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, 558 rc = CIFSFindNext(xid, pTcon, cifsFile->netfid,
560 &cifsFile->srch_inf); 559 &cifsFile->srch_inf);
561 cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile); 560 cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile);
@@ -575,7 +574,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
575 first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry 574 first_entry_in_buffer = cifsFile->srch_inf.index_of_last_entry
576 - cifsFile->srch_inf.entries_in_buffer; 575 - cifsFile->srch_inf.entries_in_buffer;
577 pos_in_buf = index_to_find - first_entry_in_buffer; 576 pos_in_buf = index_to_find - first_entry_in_buffer;
578 cFYI(1, ("found entry - pos_in_buf %d", pos_in_buf)); 577 cFYI(1, "found entry - pos_in_buf %d", pos_in_buf);
579 578
580 for (i = 0; (i < (pos_in_buf)) && (current_entry != NULL); i++) { 579 for (i = 0; (i < (pos_in_buf)) && (current_entry != NULL); i++) {
581 /* go entry by entry figuring out which is first */ 580 /* go entry by entry figuring out which is first */
@@ -584,19 +583,19 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
584 } 583 }
585 if ((current_entry == NULL) && (i < pos_in_buf)) { 584 if ((current_entry == NULL) && (i < pos_in_buf)) {
586 /* BB fixme - check if we should flag this error */ 585 /* BB fixme - check if we should flag this error */
587 cERROR(1, ("reached end of buf searching for pos in buf" 586 cERROR(1, "reached end of buf searching for pos in buf"
588 " %d index to find %lld rc %d", 587 " %d index to find %lld rc %d",
589 pos_in_buf, index_to_find, rc)); 588 pos_in_buf, index_to_find, rc);
590 } 589 }
591 rc = 0; 590 rc = 0;
592 *ppCurrentEntry = current_entry; 591 *ppCurrentEntry = current_entry;
593 } else { 592 } else {
594 cFYI(1, ("index not in buffer - could not findnext into it")); 593 cFYI(1, "index not in buffer - could not findnext into it");
595 return 0; 594 return 0;
596 } 595 }
597 596
598 if (pos_in_buf >= cifsFile->srch_inf.entries_in_buffer) { 597 if (pos_in_buf >= cifsFile->srch_inf.entries_in_buffer) {
599 cFYI(1, ("can not return entries pos_in_buf beyond last")); 598 cFYI(1, "can not return entries pos_in_buf beyond last");
600 *num_to_ret = 0; 599 *num_to_ret = 0;
601 } else 600 } else
602 *num_to_ret = cifsFile->srch_inf.entries_in_buffer - pos_in_buf; 601 *num_to_ret = cifsFile->srch_inf.entries_in_buffer - pos_in_buf;
@@ -656,12 +655,12 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
656 /* one byte length, no name conversion */ 655 /* one byte length, no name conversion */
657 len = (unsigned int)pFindData->FileNameLength; 656 len = (unsigned int)pFindData->FileNameLength;
658 } else { 657 } else {
659 cFYI(1, ("Unknown findfirst level %d", level)); 658 cFYI(1, "Unknown findfirst level %d", level);
660 return -EINVAL; 659 return -EINVAL;
661 } 660 }
662 661
663 if (len > max_len) { 662 if (len > max_len) {
664 cERROR(1, ("bad search response length %d past smb end", len)); 663 cERROR(1, "bad search response length %d past smb end", len);
665 return -EINVAL; 664 return -EINVAL;
666 } 665 }
667 666
@@ -754,7 +753,7 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
754 * case already. Why should we be clobbering other errors from it? 753 * case already. Why should we be clobbering other errors from it?
755 */ 754 */
756 if (rc) { 755 if (rc) {
757 cFYI(1, ("filldir rc = %d", rc)); 756 cFYI(1, "filldir rc = %d", rc);
758 rc = -EOVERFLOW; 757 rc = -EOVERFLOW;
759 } 758 }
760 dput(tmp_dentry); 759 dput(tmp_dentry);
@@ -786,7 +785,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
786 case 0: 785 case 0:
787 if (filldir(direntry, ".", 1, file->f_pos, 786 if (filldir(direntry, ".", 1, file->f_pos,
788 file->f_path.dentry->d_inode->i_ino, DT_DIR) < 0) { 787 file->f_path.dentry->d_inode->i_ino, DT_DIR) < 0) {
789 cERROR(1, ("Filldir for current dir failed")); 788 cERROR(1, "Filldir for current dir failed");
790 rc = -ENOMEM; 789 rc = -ENOMEM;
791 break; 790 break;
792 } 791 }
@@ -794,7 +793,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
794 case 1: 793 case 1:
795 if (filldir(direntry, "..", 2, file->f_pos, 794 if (filldir(direntry, "..", 2, file->f_pos,
796 file->f_path.dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) { 795 file->f_path.dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) {
797 cERROR(1, ("Filldir for parent dir failed")); 796 cERROR(1, "Filldir for parent dir failed");
798 rc = -ENOMEM; 797 rc = -ENOMEM;
799 break; 798 break;
800 } 799 }
@@ -807,7 +806,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
807 806
808 if (file->private_data == NULL) { 807 if (file->private_data == NULL) {
809 rc = initiate_cifs_search(xid, file); 808 rc = initiate_cifs_search(xid, file);
810 cFYI(1, ("initiate cifs search rc %d", rc)); 809 cFYI(1, "initiate cifs search rc %d", rc);
811 if (rc) { 810 if (rc) {
812 FreeXid(xid); 811 FreeXid(xid);
813 return rc; 812 return rc;
@@ -821,7 +820,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
821 cifsFile = file->private_data; 820 cifsFile = file->private_data;
822 if (cifsFile->srch_inf.endOfSearch) { 821 if (cifsFile->srch_inf.endOfSearch) {
823 if (cifsFile->srch_inf.emptyDir) { 822 if (cifsFile->srch_inf.emptyDir) {
824 cFYI(1, ("End of search, empty dir")); 823 cFYI(1, "End of search, empty dir");
825 rc = 0; 824 rc = 0;
826 break; 825 break;
827 } 826 }
@@ -833,16 +832,16 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
833 rc = find_cifs_entry(xid, pTcon, file, 832 rc = find_cifs_entry(xid, pTcon, file,
834 &current_entry, &num_to_fill); 833 &current_entry, &num_to_fill);
835 if (rc) { 834 if (rc) {
836 cFYI(1, ("fce error %d", rc)); 835 cFYI(1, "fce error %d", rc);
837 goto rddir2_exit; 836 goto rddir2_exit;
838 } else if (current_entry != NULL) { 837 } else if (current_entry != NULL) {
839 cFYI(1, ("entry %lld found", file->f_pos)); 838 cFYI(1, "entry %lld found", file->f_pos);
840 } else { 839 } else {
841 cFYI(1, ("could not find entry")); 840 cFYI(1, "could not find entry");
842 goto rddir2_exit; 841 goto rddir2_exit;
843 } 842 }
844 cFYI(1, ("loop through %d times filling dir for net buf %p", 843 cFYI(1, "loop through %d times filling dir for net buf %p",
845 num_to_fill, cifsFile->srch_inf.ntwrk_buf_start)); 844 num_to_fill, cifsFile->srch_inf.ntwrk_buf_start);
846 max_len = smbCalcSize((struct smb_hdr *) 845 max_len = smbCalcSize((struct smb_hdr *)
847 cifsFile->srch_inf.ntwrk_buf_start); 846 cifsFile->srch_inf.ntwrk_buf_start);
848 end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len; 847 end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len;
@@ -851,8 +850,8 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
851 for (i = 0; (i < num_to_fill) && (rc == 0); i++) { 850 for (i = 0; (i < num_to_fill) && (rc == 0); i++) {
852 if (current_entry == NULL) { 851 if (current_entry == NULL) {
853 /* evaluate whether this case is an error */ 852 /* evaluate whether this case is an error */
854 cERROR(1, ("past SMB end, num to fill %d i %d", 853 cERROR(1, "past SMB end, num to fill %d i %d",
855 num_to_fill, i)); 854 num_to_fill, i);
856 break; 855 break;
857 } 856 }
858 /* if buggy server returns . and .. late do 857 /* if buggy server returns . and .. late do
@@ -867,8 +866,8 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
867 file->f_pos++; 866 file->f_pos++;
868 if (file->f_pos == 867 if (file->f_pos ==
869 cifsFile->srch_inf.index_of_last_entry) { 868 cifsFile->srch_inf.index_of_last_entry) {
870 cFYI(1, ("last entry in buf at pos %lld %s", 869 cFYI(1, "last entry in buf at pos %lld %s",
871 file->f_pos, tmp_buf)); 870 file->f_pos, tmp_buf);
872 cifs_save_resume_key(current_entry, cifsFile); 871 cifs_save_resume_key(current_entry, cifsFile);
873 break; 872 break;
874 } else 873 } else
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7c3fd7463f44..7707389bdf2c 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -35,9 +35,11 @@
35extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, 35extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
36 unsigned char *p24); 36 unsigned char *p24);
37 37
38/* Checks if this is the first smb session to be reconnected after 38/*
39 the socket has been reestablished (so we know whether to use vc 0). 39 * Checks if this is the first smb session to be reconnected after
40 Called while holding the cifs_tcp_ses_lock, so do not block */ 40 * the socket has been reestablished (so we know whether to use vc 0).
41 * Called while holding the cifs_tcp_ses_lock, so do not block
42 */
41static bool is_first_ses_reconnect(struct cifsSesInfo *ses) 43static bool is_first_ses_reconnect(struct cifsSesInfo *ses)
42{ 44{
43 struct list_head *tmp; 45 struct list_head *tmp;
@@ -284,7 +286,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses,
284 int len; 286 int len;
285 char *data = *pbcc_area; 287 char *data = *pbcc_area;
286 288
287 cFYI(1, ("bleft %d", bleft)); 289 cFYI(1, "bleft %d", bleft);
288 290
289 /* 291 /*
290 * Windows servers do not always double null terminate their final 292 * Windows servers do not always double null terminate their final
@@ -301,7 +303,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses,
301 303
302 kfree(ses->serverOS); 304 kfree(ses->serverOS);
303 ses->serverOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp); 305 ses->serverOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp);
304 cFYI(1, ("serverOS=%s", ses->serverOS)); 306 cFYI(1, "serverOS=%s", ses->serverOS);
305 len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2; 307 len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2;
306 data += len; 308 data += len;
307 bleft -= len; 309 bleft -= len;
@@ -310,7 +312,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses,
310 312
311 kfree(ses->serverNOS); 313 kfree(ses->serverNOS);
312 ses->serverNOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp); 314 ses->serverNOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp);
313 cFYI(1, ("serverNOS=%s", ses->serverNOS)); 315 cFYI(1, "serverNOS=%s", ses->serverNOS);
314 len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2; 316 len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2;
315 data += len; 317 data += len;
316 bleft -= len; 318 bleft -= len;
@@ -319,7 +321,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses,
319 321
320 kfree(ses->serverDomain); 322 kfree(ses->serverDomain);
321 ses->serverDomain = cifs_strndup_from_ucs(data, bleft, true, nls_cp); 323 ses->serverDomain = cifs_strndup_from_ucs(data, bleft, true, nls_cp);
322 cFYI(1, ("serverDomain=%s", ses->serverDomain)); 324 cFYI(1, "serverDomain=%s", ses->serverDomain);
323 325
324 return; 326 return;
325} 327}
@@ -332,7 +334,7 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft,
332 int len; 334 int len;
333 char *bcc_ptr = *pbcc_area; 335 char *bcc_ptr = *pbcc_area;
334 336
335 cFYI(1, ("decode sessetup ascii. bleft %d", bleft)); 337 cFYI(1, "decode sessetup ascii. bleft %d", bleft);
336 338
337 len = strnlen(bcc_ptr, bleft); 339 len = strnlen(bcc_ptr, bleft);
338 if (len >= bleft) 340 if (len >= bleft)
@@ -344,7 +346,7 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft,
344 if (ses->serverOS) 346 if (ses->serverOS)
345 strncpy(ses->serverOS, bcc_ptr, len); 347 strncpy(ses->serverOS, bcc_ptr, len);
346 if (strncmp(ses->serverOS, "OS/2", 4) == 0) { 348 if (strncmp(ses->serverOS, "OS/2", 4) == 0) {
347 cFYI(1, ("OS/2 server")); 349 cFYI(1, "OS/2 server");
348 ses->flags |= CIFS_SES_OS2; 350 ses->flags |= CIFS_SES_OS2;
349 } 351 }
350 352
@@ -373,7 +375,7 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft,
373 /* BB For newer servers which do not support Unicode, 375 /* BB For newer servers which do not support Unicode,
374 but thus do return domain here we could add parsing 376 but thus do return domain here we could add parsing
375 for it later, but it is not very important */ 377 for it later, but it is not very important */
376 cFYI(1, ("ascii: bytes left %d", bleft)); 378 cFYI(1, "ascii: bytes left %d", bleft);
377 379
378 return rc; 380 return rc;
379} 381}
@@ -384,16 +386,16 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
384 CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr; 386 CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr;
385 387
386 if (blob_len < sizeof(CHALLENGE_MESSAGE)) { 388 if (blob_len < sizeof(CHALLENGE_MESSAGE)) {
387 cERROR(1, ("challenge blob len %d too small", blob_len)); 389 cERROR(1, "challenge blob len %d too small", blob_len);
388 return -EINVAL; 390 return -EINVAL;
389 } 391 }
390 392
391 if (memcmp(pblob->Signature, "NTLMSSP", 8)) { 393 if (memcmp(pblob->Signature, "NTLMSSP", 8)) {
392 cERROR(1, ("blob signature incorrect %s", pblob->Signature)); 394 cERROR(1, "blob signature incorrect %s", pblob->Signature);
393 return -EINVAL; 395 return -EINVAL;
394 } 396 }
395 if (pblob->MessageType != NtLmChallenge) { 397 if (pblob->MessageType != NtLmChallenge) {
396 cERROR(1, ("Incorrect message type %d", pblob->MessageType)); 398 cERROR(1, "Incorrect message type %d", pblob->MessageType);
397 return -EINVAL; 399 return -EINVAL;
398 } 400 }
399 401
@@ -447,7 +449,7 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
447 This function returns the length of the data in the blob */ 449 This function returns the length of the data in the blob */
448static int build_ntlmssp_auth_blob(unsigned char *pbuffer, 450static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
449 struct cifsSesInfo *ses, 451 struct cifsSesInfo *ses,
450 const struct nls_table *nls_cp, int first) 452 const struct nls_table *nls_cp, bool first)
451{ 453{
452 AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer; 454 AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
453 __u32 flags; 455 __u32 flags;
@@ -546,7 +548,7 @@ static void setup_ntlmssp_neg_req(SESSION_SETUP_ANDX *pSMB,
546 548
547static int setup_ntlmssp_auth_req(SESSION_SETUP_ANDX *pSMB, 549static int setup_ntlmssp_auth_req(SESSION_SETUP_ANDX *pSMB,
548 struct cifsSesInfo *ses, 550 struct cifsSesInfo *ses,
549 const struct nls_table *nls, int first_time) 551 const struct nls_table *nls, bool first_time)
550{ 552{
551 int bloblen; 553 int bloblen;
552 554
@@ -559,8 +561,8 @@ static int setup_ntlmssp_auth_req(SESSION_SETUP_ANDX *pSMB,
559#endif 561#endif
560 562
561int 563int
562CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, 564CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
563 const struct nls_table *nls_cp) 565 const struct nls_table *nls_cp)
564{ 566{
565 int rc = 0; 567 int rc = 0;
566 int wct; 568 int wct;
@@ -577,13 +579,18 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
577 int bytes_remaining; 579 int bytes_remaining;
578 struct key *spnego_key = NULL; 580 struct key *spnego_key = NULL;
579 __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ 581 __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
582 bool first_time;
580 583
581 if (ses == NULL) 584 if (ses == NULL)
582 return -EINVAL; 585 return -EINVAL;
583 586
587 read_lock(&cifs_tcp_ses_lock);
588 first_time = is_first_ses_reconnect(ses);
589 read_unlock(&cifs_tcp_ses_lock);
590
584 type = ses->server->secType; 591 type = ses->server->secType;
585 592
586 cFYI(1, ("sess setup type %d", type)); 593 cFYI(1, "sess setup type %d", type);
587ssetup_ntlmssp_authenticate: 594ssetup_ntlmssp_authenticate:
588 if (phase == NtLmChallenge) 595 if (phase == NtLmChallenge)
589 phase = NtLmAuthenticate; /* if ntlmssp, now final phase */ 596 phase = NtLmAuthenticate; /* if ntlmssp, now final phase */
@@ -664,7 +671,7 @@ ssetup_ntlmssp_authenticate:
664 changed to do higher than lanman dialect and 671 changed to do higher than lanman dialect and
665 we reconnected would we ever calc signing_key? */ 672 we reconnected would we ever calc signing_key? */
666 673
667 cFYI(1, ("Negotiating LANMAN setting up strings")); 674 cFYI(1, "Negotiating LANMAN setting up strings");
668 /* Unicode not allowed for LANMAN dialects */ 675 /* Unicode not allowed for LANMAN dialects */
669 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); 676 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
670#endif 677#endif
@@ -744,7 +751,7 @@ ssetup_ntlmssp_authenticate:
744 unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); 751 unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
745 } else 752 } else
746 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); 753 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
747 } else if (type == Kerberos || type == MSKerberos) { 754 } else if (type == Kerberos) {
748#ifdef CONFIG_CIFS_UPCALL 755#ifdef CONFIG_CIFS_UPCALL
749 struct cifs_spnego_msg *msg; 756 struct cifs_spnego_msg *msg;
750 spnego_key = cifs_get_spnego_key(ses); 757 spnego_key = cifs_get_spnego_key(ses);
@@ -758,17 +765,17 @@ ssetup_ntlmssp_authenticate:
758 /* check version field to make sure that cifs.upcall is 765 /* check version field to make sure that cifs.upcall is
759 sending us a response in an expected form */ 766 sending us a response in an expected form */
760 if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) { 767 if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
761 cERROR(1, ("incorrect version of cifs.upcall (expected" 768 cERROR(1, "incorrect version of cifs.upcall (expected"
762 " %d but got %d)", 769 " %d but got %d)",
763 CIFS_SPNEGO_UPCALL_VERSION, msg->version)); 770 CIFS_SPNEGO_UPCALL_VERSION, msg->version);
764 rc = -EKEYREJECTED; 771 rc = -EKEYREJECTED;
765 goto ssetup_exit; 772 goto ssetup_exit;
766 } 773 }
767 /* bail out if key is too long */ 774 /* bail out if key is too long */
768 if (msg->sesskey_len > 775 if (msg->sesskey_len >
769 sizeof(ses->server->mac_signing_key.data.krb5)) { 776 sizeof(ses->server->mac_signing_key.data.krb5)) {
770 cERROR(1, ("Kerberos signing key too long (%u bytes)", 777 cERROR(1, "Kerberos signing key too long (%u bytes)",
771 msg->sesskey_len)); 778 msg->sesskey_len);
772 rc = -EOVERFLOW; 779 rc = -EOVERFLOW;
773 goto ssetup_exit; 780 goto ssetup_exit;
774 } 781 }
@@ -796,7 +803,7 @@ ssetup_ntlmssp_authenticate:
796 /* BB: is this right? */ 803 /* BB: is this right? */
797 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); 804 ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
798#else /* ! CONFIG_CIFS_UPCALL */ 805#else /* ! CONFIG_CIFS_UPCALL */
799 cERROR(1, ("Kerberos negotiated but upcall support disabled!")); 806 cERROR(1, "Kerberos negotiated but upcall support disabled!");
800 rc = -ENOSYS; 807 rc = -ENOSYS;
801 goto ssetup_exit; 808 goto ssetup_exit;
802#endif /* CONFIG_CIFS_UPCALL */ 809#endif /* CONFIG_CIFS_UPCALL */
@@ -804,12 +811,12 @@ ssetup_ntlmssp_authenticate:
804#ifdef CONFIG_CIFS_EXPERIMENTAL 811#ifdef CONFIG_CIFS_EXPERIMENTAL
805 if (type == RawNTLMSSP) { 812 if (type == RawNTLMSSP) {
806 if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) { 813 if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
807 cERROR(1, ("NTLMSSP requires Unicode support")); 814 cERROR(1, "NTLMSSP requires Unicode support");
808 rc = -ENOSYS; 815 rc = -ENOSYS;
809 goto ssetup_exit; 816 goto ssetup_exit;
810 } 817 }
811 818
812 cFYI(1, ("ntlmssp session setup phase %d", phase)); 819 cFYI(1, "ntlmssp session setup phase %d", phase);
813 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; 820 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
814 capabilities |= CAP_EXTENDED_SECURITY; 821 capabilities |= CAP_EXTENDED_SECURITY;
815 pSMB->req.Capabilities |= cpu_to_le32(capabilities); 822 pSMB->req.Capabilities |= cpu_to_le32(capabilities);
@@ -827,7 +834,7 @@ ssetup_ntlmssp_authenticate:
827 on the response (challenge) */ 834 on the response (challenge) */
828 smb_buf->Uid = ses->Suid; 835 smb_buf->Uid = ses->Suid;
829 } else { 836 } else {
830 cERROR(1, ("invalid phase %d", phase)); 837 cERROR(1, "invalid phase %d", phase);
831 rc = -ENOSYS; 838 rc = -ENOSYS;
832 goto ssetup_exit; 839 goto ssetup_exit;
833 } 840 }
@@ -839,12 +846,12 @@ ssetup_ntlmssp_authenticate:
839 } 846 }
840 unicode_oslm_strings(&bcc_ptr, nls_cp); 847 unicode_oslm_strings(&bcc_ptr, nls_cp);
841 } else { 848 } else {
842 cERROR(1, ("secType %d not supported!", type)); 849 cERROR(1, "secType %d not supported!", type);
843 rc = -ENOSYS; 850 rc = -ENOSYS;
844 goto ssetup_exit; 851 goto ssetup_exit;
845 } 852 }
846#else 853#else
847 cERROR(1, ("secType %d not supported!", type)); 854 cERROR(1, "secType %d not supported!", type);
848 rc = -ENOSYS; 855 rc = -ENOSYS;
849 goto ssetup_exit; 856 goto ssetup_exit;
850#endif 857#endif
@@ -862,7 +869,7 @@ ssetup_ntlmssp_authenticate:
862 CIFS_STD_OP /* not long */ | CIFS_LOG_ERROR); 869 CIFS_STD_OP /* not long */ | CIFS_LOG_ERROR);
863 /* SMB request buf freed in SendReceive2 */ 870 /* SMB request buf freed in SendReceive2 */
864 871
865 cFYI(1, ("ssetup rc from sendrecv2 is %d", rc)); 872 cFYI(1, "ssetup rc from sendrecv2 is %d", rc);
866 873
867 pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base; 874 pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base;
868 smb_buf = (struct smb_hdr *)iov[0].iov_base; 875 smb_buf = (struct smb_hdr *)iov[0].iov_base;
@@ -870,7 +877,7 @@ ssetup_ntlmssp_authenticate:
870 if ((type == RawNTLMSSP) && (smb_buf->Status.CifsError == 877 if ((type == RawNTLMSSP) && (smb_buf->Status.CifsError ==
871 cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) { 878 cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) {
872 if (phase != NtLmNegotiate) { 879 if (phase != NtLmNegotiate) {
873 cERROR(1, ("Unexpected more processing error")); 880 cERROR(1, "Unexpected more processing error");
874 goto ssetup_exit; 881 goto ssetup_exit;
875 } 882 }
876 /* NTLMSSP Negotiate sent now processing challenge (response) */ 883 /* NTLMSSP Negotiate sent now processing challenge (response) */
@@ -882,14 +889,14 @@ ssetup_ntlmssp_authenticate:
882 889
883 if ((smb_buf->WordCount != 3) && (smb_buf->WordCount != 4)) { 890 if ((smb_buf->WordCount != 3) && (smb_buf->WordCount != 4)) {
884 rc = -EIO; 891 rc = -EIO;
885 cERROR(1, ("bad word count %d", smb_buf->WordCount)); 892 cERROR(1, "bad word count %d", smb_buf->WordCount);
886 goto ssetup_exit; 893 goto ssetup_exit;
887 } 894 }
888 action = le16_to_cpu(pSMB->resp.Action); 895 action = le16_to_cpu(pSMB->resp.Action);
889 if (action & GUEST_LOGIN) 896 if (action & GUEST_LOGIN)
890 cFYI(1, ("Guest login")); /* BB mark SesInfo struct? */ 897 cFYI(1, "Guest login"); /* BB mark SesInfo struct? */
891 ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */ 898 ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */
892 cFYI(1, ("UID = %d ", ses->Suid)); 899 cFYI(1, "UID = %d ", ses->Suid);
893 /* response can have either 3 or 4 word count - Samba sends 3 */ 900 /* response can have either 3 or 4 word count - Samba sends 3 */
894 /* and lanman response is 3 */ 901 /* and lanman response is 3 */
895 bytes_remaining = BCC(smb_buf); 902 bytes_remaining = BCC(smb_buf);
@@ -899,7 +906,7 @@ ssetup_ntlmssp_authenticate:
899 __u16 blob_len; 906 __u16 blob_len;
900 blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); 907 blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
901 if (blob_len > bytes_remaining) { 908 if (blob_len > bytes_remaining) {
902 cERROR(1, ("bad security blob length %d", blob_len)); 909 cERROR(1, "bad security blob length %d", blob_len);
903 rc = -EINVAL; 910 rc = -EINVAL;
904 goto ssetup_exit; 911 goto ssetup_exit;
905 } 912 }
@@ -933,7 +940,7 @@ ssetup_exit:
933 } 940 }
934 kfree(str_area); 941 kfree(str_area);
935 if (resp_buf_type == CIFS_SMALL_BUFFER) { 942 if (resp_buf_type == CIFS_SMALL_BUFFER) {
936 cFYI(1, ("ssetup freeing small buf %p", iov[0].iov_base)); 943 cFYI(1, "ssetup freeing small buf %p", iov[0].iov_base);
937 cifs_small_buf_release(iov[0].iov_base); 944 cifs_small_buf_release(iov[0].iov_base);
938 } else if (resp_buf_type == CIFS_LARGE_BUFFER) 945 } else if (resp_buf_type == CIFS_LARGE_BUFFER)
939 cifs_buf_release(iov[0].iov_base); 946 cifs_buf_release(iov[0].iov_base);
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index ad081fe7eb18..82f78c4d6978 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -35,7 +35,6 @@
35#include "cifs_debug.h" 35#include "cifs_debug.h"
36 36
37extern mempool_t *cifs_mid_poolp; 37extern mempool_t *cifs_mid_poolp;
38extern struct kmem_cache *cifs_oplock_cachep;
39 38
40static struct mid_q_entry * 39static struct mid_q_entry *
41AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) 40AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
@@ -43,7 +42,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
43 struct mid_q_entry *temp; 42 struct mid_q_entry *temp;
44 43
45 if (server == NULL) { 44 if (server == NULL) {
46 cERROR(1, ("Null TCP session in AllocMidQEntry")); 45 cERROR(1, "Null TCP session in AllocMidQEntry");
47 return NULL; 46 return NULL;
48 } 47 }
49 48
@@ -55,7 +54,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
55 temp->mid = smb_buffer->Mid; /* always LE */ 54 temp->mid = smb_buffer->Mid; /* always LE */
56 temp->pid = current->pid; 55 temp->pid = current->pid;
57 temp->command = smb_buffer->Command; 56 temp->command = smb_buffer->Command;
58 cFYI(1, ("For smb_command %d", temp->command)); 57 cFYI(1, "For smb_command %d", temp->command);
59 /* do_gettimeofday(&temp->when_sent);*/ /* easier to use jiffies */ 58 /* do_gettimeofday(&temp->when_sent);*/ /* easier to use jiffies */
60 /* when mid allocated can be before when sent */ 59 /* when mid allocated can be before when sent */
61 temp->when_alloc = jiffies; 60 temp->when_alloc = jiffies;
@@ -140,7 +139,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
140 total_len += iov[i].iov_len; 139 total_len += iov[i].iov_len;
141 140
142 smb_buffer->smb_buf_length = cpu_to_be32(smb_buffer->smb_buf_length); 141 smb_buffer->smb_buf_length = cpu_to_be32(smb_buffer->smb_buf_length);
143 cFYI(1, ("Sending smb: total_len %d", total_len)); 142 cFYI(1, "Sending smb: total_len %d", total_len);
144 dump_smb(smb_buffer, len); 143 dump_smb(smb_buffer, len);
145 144
146 i = 0; 145 i = 0;
@@ -168,9 +167,8 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
168 reconnect which may clear the network problem. 167 reconnect which may clear the network problem.
169 */ 168 */
170 if ((i >= 14) || (!server->noblocksnd && (i > 2))) { 169 if ((i >= 14) || (!server->noblocksnd && (i > 2))) {
171 cERROR(1, 170 cERROR(1, "sends on sock %p stuck for 15 seconds",
172 ("sends on sock %p stuck for 15 seconds", 171 ssocket);
173 ssocket));
174 rc = -EAGAIN; 172 rc = -EAGAIN;
175 break; 173 break;
176 } 174 }
@@ -184,13 +182,13 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
184 total_len = 0; 182 total_len = 0;
185 break; 183 break;
186 } else if (rc > total_len) { 184 } else if (rc > total_len) {
187 cERROR(1, ("sent %d requested %d", rc, total_len)); 185 cERROR(1, "sent %d requested %d", rc, total_len);
188 break; 186 break;
189 } 187 }
190 if (rc == 0) { 188 if (rc == 0) {
191 /* should never happen, letting socket clear before 189 /* should never happen, letting socket clear before
192 retrying is our only obvious option here */ 190 retrying is our only obvious option here */
193 cERROR(1, ("tcp sent no data")); 191 cERROR(1, "tcp sent no data");
194 msleep(500); 192 msleep(500);
195 continue; 193 continue;
196 } 194 }
@@ -213,8 +211,8 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
213 } 211 }
214 212
215 if ((total_len > 0) && (total_len != smb_buf_length + 4)) { 213 if ((total_len > 0) && (total_len != smb_buf_length + 4)) {
216 cFYI(1, ("partial send (%d remaining), terminating session", 214 cFYI(1, "partial send (%d remaining), terminating session",
217 total_len)); 215 total_len);
218 /* If we have only sent part of an SMB then the next SMB 216 /* If we have only sent part of an SMB then the next SMB
219 could be taken as the remainder of this one. We need 217 could be taken as the remainder of this one. We need
220 to kill the socket so the server throws away the partial 218 to kill the socket so the server throws away the partial
@@ -223,7 +221,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
223 } 221 }
224 222
225 if (rc < 0) { 223 if (rc < 0) {
226 cERROR(1, ("Error %d sending data on socket to server", rc)); 224 cERROR(1, "Error %d sending data on socket to server", rc);
227 } else 225 } else
228 rc = 0; 226 rc = 0;
229 227
@@ -296,7 +294,7 @@ static int allocate_mid(struct cifsSesInfo *ses, struct smb_hdr *in_buf,
296 } 294 }
297 295
298 if (ses->server->tcpStatus == CifsNeedReconnect) { 296 if (ses->server->tcpStatus == CifsNeedReconnect) {
299 cFYI(1, ("tcp session dead - return to caller to retry")); 297 cFYI(1, "tcp session dead - return to caller to retry");
300 return -EAGAIN; 298 return -EAGAIN;
301 } 299 }
302 300
@@ -348,7 +346,7 @@ static int wait_for_response(struct cifsSesInfo *ses,
348 lrt += time_to_wait; 346 lrt += time_to_wait;
349 if (time_after(jiffies, lrt)) { 347 if (time_after(jiffies, lrt)) {
350 /* No replies for time_to_wait. */ 348 /* No replies for time_to_wait. */
351 cERROR(1, ("server not responding")); 349 cERROR(1, "server not responding");
352 return -1; 350 return -1;
353 } 351 }
354 } else { 352 } else {
@@ -379,7 +377,7 @@ SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses,
379 iov[0].iov_len = in_buf->smb_buf_length + 4; 377 iov[0].iov_len = in_buf->smb_buf_length + 4;
380 flags |= CIFS_NO_RESP; 378 flags |= CIFS_NO_RESP;
381 rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags); 379 rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags);
382 cFYI(DBG2, ("SendRcvNoRsp flags %d rc %d", flags, rc)); 380 cFYI(DBG2, "SendRcvNoRsp flags %d rc %d", flags, rc);
383 381
384 return rc; 382 return rc;
385} 383}
@@ -402,7 +400,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
402 400
403 if ((ses == NULL) || (ses->server == NULL)) { 401 if ((ses == NULL) || (ses->server == NULL)) {
404 cifs_small_buf_release(in_buf); 402 cifs_small_buf_release(in_buf);
405 cERROR(1, ("Null session")); 403 cERROR(1, "Null session");
406 return -EIO; 404 return -EIO;
407 } 405 }
408 406
@@ -471,7 +469,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
471 else if (long_op == CIFS_BLOCKING_OP) 469 else if (long_op == CIFS_BLOCKING_OP)
472 timeout = 0x7FFFFFFF; /* large, but not so large as to wrap */ 470 timeout = 0x7FFFFFFF; /* large, but not so large as to wrap */
473 else { 471 else {
474 cERROR(1, ("unknown timeout flag %d", long_op)); 472 cERROR(1, "unknown timeout flag %d", long_op);
475 rc = -EIO; 473 rc = -EIO;
476 goto out; 474 goto out;
477 } 475 }
@@ -490,8 +488,8 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
490 spin_lock(&GlobalMid_Lock); 488 spin_lock(&GlobalMid_Lock);
491 489
492 if (midQ->resp_buf == NULL) { 490 if (midQ->resp_buf == NULL) {
493 cERROR(1, ("No response to cmd %d mid %d", 491 cERROR(1, "No response to cmd %d mid %d",
494 midQ->command, midQ->mid)); 492 midQ->command, midQ->mid);
495 if (midQ->midState == MID_REQUEST_SUBMITTED) { 493 if (midQ->midState == MID_REQUEST_SUBMITTED) {
496 if (ses->server->tcpStatus == CifsExiting) 494 if (ses->server->tcpStatus == CifsExiting)
497 rc = -EHOSTDOWN; 495 rc = -EHOSTDOWN;
@@ -504,7 +502,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
504 if (rc != -EHOSTDOWN) { 502 if (rc != -EHOSTDOWN) {
505 if (midQ->midState == MID_RETRY_NEEDED) { 503 if (midQ->midState == MID_RETRY_NEEDED) {
506 rc = -EAGAIN; 504 rc = -EAGAIN;
507 cFYI(1, ("marking request for retry")); 505 cFYI(1, "marking request for retry");
508 } else { 506 } else {
509 rc = -EIO; 507 rc = -EIO;
510 } 508 }
@@ -521,8 +519,8 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
521 receive_len = midQ->resp_buf->smb_buf_length; 519 receive_len = midQ->resp_buf->smb_buf_length;
522 520
523 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { 521 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
524 cERROR(1, ("Frame too large received. Length: %d Xid: %d", 522 cERROR(1, "Frame too large received. Length: %d Xid: %d",
525 receive_len, xid)); 523 receive_len, xid);
526 rc = -EIO; 524 rc = -EIO;
527 goto out; 525 goto out;
528 } 526 }
@@ -548,7 +546,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
548 &ses->server->mac_signing_key, 546 &ses->server->mac_signing_key,
549 midQ->sequence_number+1); 547 midQ->sequence_number+1);
550 if (rc) { 548 if (rc) {
551 cERROR(1, ("Unexpected SMB signature")); 549 cERROR(1, "Unexpected SMB signature");
552 /* BB FIXME add code to kill session */ 550 /* BB FIXME add code to kill session */
553 } 551 }
554 } 552 }
@@ -569,7 +567,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
569 DeleteMidQEntry */ 567 DeleteMidQEntry */
570 } else { 568 } else {
571 rc = -EIO; 569 rc = -EIO;
572 cFYI(1, ("Bad MID state?")); 570 cFYI(1, "Bad MID state?");
573 } 571 }
574 572
575out: 573out:
@@ -591,11 +589,11 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
591 struct mid_q_entry *midQ; 589 struct mid_q_entry *midQ;
592 590
593 if (ses == NULL) { 591 if (ses == NULL) {
594 cERROR(1, ("Null smb session")); 592 cERROR(1, "Null smb session");
595 return -EIO; 593 return -EIO;
596 } 594 }
597 if (ses->server == NULL) { 595 if (ses->server == NULL) {
598 cERROR(1, ("Null tcp session")); 596 cERROR(1, "Null tcp session");
599 return -EIO; 597 return -EIO;
600 } 598 }
601 599
@@ -607,8 +605,8 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
607 use ses->maxReq */ 605 use ses->maxReq */
608 606
609 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 607 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
610 cERROR(1, ("Illegal length, greater than maximum frame, %d", 608 cERROR(1, "Illegal length, greater than maximum frame, %d",
611 in_buf->smb_buf_length)); 609 in_buf->smb_buf_length);
612 return -EIO; 610 return -EIO;
613 } 611 }
614 612
@@ -665,7 +663,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
665 else if (long_op == CIFS_BLOCKING_OP) 663 else if (long_op == CIFS_BLOCKING_OP)
666 timeout = 0x7FFFFFFF; /* large but no so large as to wrap */ 664 timeout = 0x7FFFFFFF; /* large but no so large as to wrap */
667 else { 665 else {
668 cERROR(1, ("unknown timeout flag %d", long_op)); 666 cERROR(1, "unknown timeout flag %d", long_op);
669 rc = -EIO; 667 rc = -EIO;
670 goto out; 668 goto out;
671 } 669 }
@@ -681,8 +679,8 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
681 679
682 spin_lock(&GlobalMid_Lock); 680 spin_lock(&GlobalMid_Lock);
683 if (midQ->resp_buf == NULL) { 681 if (midQ->resp_buf == NULL) {
684 cERROR(1, ("No response for cmd %d mid %d", 682 cERROR(1, "No response for cmd %d mid %d",
685 midQ->command, midQ->mid)); 683 midQ->command, midQ->mid);
686 if (midQ->midState == MID_REQUEST_SUBMITTED) { 684 if (midQ->midState == MID_REQUEST_SUBMITTED) {
687 if (ses->server->tcpStatus == CifsExiting) 685 if (ses->server->tcpStatus == CifsExiting)
688 rc = -EHOSTDOWN; 686 rc = -EHOSTDOWN;
@@ -695,7 +693,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
695 if (rc != -EHOSTDOWN) { 693 if (rc != -EHOSTDOWN) {
696 if (midQ->midState == MID_RETRY_NEEDED) { 694 if (midQ->midState == MID_RETRY_NEEDED) {
697 rc = -EAGAIN; 695 rc = -EAGAIN;
698 cFYI(1, ("marking request for retry")); 696 cFYI(1, "marking request for retry");
699 } else { 697 } else {
700 rc = -EIO; 698 rc = -EIO;
701 } 699 }
@@ -712,8 +710,8 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
712 receive_len = midQ->resp_buf->smb_buf_length; 710 receive_len = midQ->resp_buf->smb_buf_length;
713 711
714 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { 712 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
715 cERROR(1, ("Frame too large received. Length: %d Xid: %d", 713 cERROR(1, "Frame too large received. Length: %d Xid: %d",
716 receive_len, xid)); 714 receive_len, xid);
717 rc = -EIO; 715 rc = -EIO;
718 goto out; 716 goto out;
719 } 717 }
@@ -736,7 +734,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
736 &ses->server->mac_signing_key, 734 &ses->server->mac_signing_key,
737 midQ->sequence_number+1); 735 midQ->sequence_number+1);
738 if (rc) { 736 if (rc) {
739 cERROR(1, ("Unexpected SMB signature")); 737 cERROR(1, "Unexpected SMB signature");
740 /* BB FIXME add code to kill session */ 738 /* BB FIXME add code to kill session */
741 } 739 }
742 } 740 }
@@ -753,7 +751,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
753 BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf)); 751 BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf));
754 } else { 752 } else {
755 rc = -EIO; 753 rc = -EIO;
756 cERROR(1, ("Bad MID state?")); 754 cERROR(1, "Bad MID state?");
757 } 755 }
758 756
759out: 757out:
@@ -824,13 +822,13 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
824 struct cifsSesInfo *ses; 822 struct cifsSesInfo *ses;
825 823
826 if (tcon == NULL || tcon->ses == NULL) { 824 if (tcon == NULL || tcon->ses == NULL) {
827 cERROR(1, ("Null smb session")); 825 cERROR(1, "Null smb session");
828 return -EIO; 826 return -EIO;
829 } 827 }
830 ses = tcon->ses; 828 ses = tcon->ses;
831 829
832 if (ses->server == NULL) { 830 if (ses->server == NULL) {
833 cERROR(1, ("Null tcp session")); 831 cERROR(1, "Null tcp session");
834 return -EIO; 832 return -EIO;
835 } 833 }
836 834
@@ -842,8 +840,8 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
842 use ses->maxReq */ 840 use ses->maxReq */
843 841
844 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 842 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) {
845 cERROR(1, ("Illegal length, greater than maximum frame, %d", 843 cERROR(1, "Illegal length, greater than maximum frame, %d",
846 in_buf->smb_buf_length)); 844 in_buf->smb_buf_length);
847 return -EIO; 845 return -EIO;
848 } 846 }
849 847
@@ -933,8 +931,8 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
933 spin_unlock(&GlobalMid_Lock); 931 spin_unlock(&GlobalMid_Lock);
934 receive_len = midQ->resp_buf->smb_buf_length; 932 receive_len = midQ->resp_buf->smb_buf_length;
935 } else { 933 } else {
936 cERROR(1, ("No response for cmd %d mid %d", 934 cERROR(1, "No response for cmd %d mid %d",
937 midQ->command, midQ->mid)); 935 midQ->command, midQ->mid);
938 if (midQ->midState == MID_REQUEST_SUBMITTED) { 936 if (midQ->midState == MID_REQUEST_SUBMITTED) {
939 if (ses->server->tcpStatus == CifsExiting) 937 if (ses->server->tcpStatus == CifsExiting)
940 rc = -EHOSTDOWN; 938 rc = -EHOSTDOWN;
@@ -947,7 +945,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
947 if (rc != -EHOSTDOWN) { 945 if (rc != -EHOSTDOWN) {
948 if (midQ->midState == MID_RETRY_NEEDED) { 946 if (midQ->midState == MID_RETRY_NEEDED) {
949 rc = -EAGAIN; 947 rc = -EAGAIN;
950 cFYI(1, ("marking request for retry")); 948 cFYI(1, "marking request for retry");
951 } else { 949 } else {
952 rc = -EIO; 950 rc = -EIO;
953 } 951 }
@@ -958,8 +956,8 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
958 } 956 }
959 957
960 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { 958 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
961 cERROR(1, ("Frame too large received. Length: %d Xid: %d", 959 cERROR(1, "Frame too large received. Length: %d Xid: %d",
962 receive_len, xid)); 960 receive_len, xid);
963 rc = -EIO; 961 rc = -EIO;
964 goto out; 962 goto out;
965 } 963 }
@@ -968,7 +966,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
968 966
969 if ((out_buf == NULL) || (midQ->midState != MID_RESPONSE_RECEIVED)) { 967 if ((out_buf == NULL) || (midQ->midState != MID_RESPONSE_RECEIVED)) {
970 rc = -EIO; 968 rc = -EIO;
971 cERROR(1, ("Bad MID state?")); 969 cERROR(1, "Bad MID state?");
972 goto out; 970 goto out;
973 } 971 }
974 972
@@ -986,7 +984,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
986 &ses->server->mac_signing_key, 984 &ses->server->mac_signing_key,
987 midQ->sequence_number+1); 985 midQ->sequence_number+1);
988 if (rc) { 986 if (rc) {
989 cERROR(1, ("Unexpected SMB signature")); 987 cERROR(1, "Unexpected SMB signature");
990 /* BB FIXME add code to kill session */ 988 /* BB FIXME add code to kill session */
991 } 989 }
992 } 990 }
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index f555ce077d4f..a1509207bfa6 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -70,12 +70,12 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name)
70 return rc; 70 return rc;
71 } 71 }
72 if (ea_name == NULL) { 72 if (ea_name == NULL) {
73 cFYI(1, ("Null xattr names not supported")); 73 cFYI(1, "Null xattr names not supported");
74 } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) 74 } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5)
75 && (strncmp(ea_name, CIFS_XATTR_OS2_PREFIX, 4))) { 75 && (strncmp(ea_name, CIFS_XATTR_OS2_PREFIX, 4))) {
76 cFYI(1, 76 cFYI(1,
77 ("illegal xattr request %s (only user namespace supported)", 77 "illegal xattr request %s (only user namespace supported)",
78 ea_name)); 78 ea_name);
79 /* BB what if no namespace prefix? */ 79 /* BB what if no namespace prefix? */
80 /* Should we just pass them to server, except for 80 /* Should we just pass them to server, except for
81 system and perhaps security prefixes? */ 81 system and perhaps security prefixes? */
@@ -131,19 +131,19 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
131 search server for EAs or streams to 131 search server for EAs or streams to
132 returns as xattrs */ 132 returns as xattrs */
133 if (value_size > MAX_EA_VALUE_SIZE) { 133 if (value_size > MAX_EA_VALUE_SIZE) {
134 cFYI(1, ("size of EA value too large")); 134 cFYI(1, "size of EA value too large");
135 kfree(full_path); 135 kfree(full_path);
136 FreeXid(xid); 136 FreeXid(xid);
137 return -EOPNOTSUPP; 137 return -EOPNOTSUPP;
138 } 138 }
139 139
140 if (ea_name == NULL) { 140 if (ea_name == NULL) {
141 cFYI(1, ("Null xattr names not supported")); 141 cFYI(1, "Null xattr names not supported");
142 } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) == 0) { 142 } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) == 0) {
143 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) 143 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
144 goto set_ea_exit; 144 goto set_ea_exit;
145 if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0) 145 if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0)
146 cFYI(1, ("attempt to set cifs inode metadata")); 146 cFYI(1, "attempt to set cifs inode metadata");
147 147
148 ea_name += 5; /* skip past user. prefix */ 148 ea_name += 5; /* skip past user. prefix */
149 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, 149 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
@@ -169,9 +169,9 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
169 ACL_TYPE_ACCESS, cifs_sb->local_nls, 169 ACL_TYPE_ACCESS, cifs_sb->local_nls,
170 cifs_sb->mnt_cifs_flags & 170 cifs_sb->mnt_cifs_flags &
171 CIFS_MOUNT_MAP_SPECIAL_CHR); 171 CIFS_MOUNT_MAP_SPECIAL_CHR);
172 cFYI(1, ("set POSIX ACL rc %d", rc)); 172 cFYI(1, "set POSIX ACL rc %d", rc);
173#else 173#else
174 cFYI(1, ("set POSIX ACL not supported")); 174 cFYI(1, "set POSIX ACL not supported");
175#endif 175#endif
176 } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT, 176 } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT,
177 strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) { 177 strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) {
@@ -182,13 +182,13 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
182 ACL_TYPE_DEFAULT, cifs_sb->local_nls, 182 ACL_TYPE_DEFAULT, cifs_sb->local_nls,
183 cifs_sb->mnt_cifs_flags & 183 cifs_sb->mnt_cifs_flags &
184 CIFS_MOUNT_MAP_SPECIAL_CHR); 184 CIFS_MOUNT_MAP_SPECIAL_CHR);
185 cFYI(1, ("set POSIX default ACL rc %d", rc)); 185 cFYI(1, "set POSIX default ACL rc %d", rc);
186#else 186#else
187 cFYI(1, ("set default POSIX ACL not supported")); 187 cFYI(1, "set default POSIX ACL not supported");
188#endif 188#endif
189 } else { 189 } else {
190 cFYI(1, ("illegal xattr request %s (only user namespace" 190 cFYI(1, "illegal xattr request %s (only user namespace"
191 " supported)", ea_name)); 191 " supported)", ea_name);
192 /* BB what if no namespace prefix? */ 192 /* BB what if no namespace prefix? */
193 /* Should we just pass them to server, except for 193 /* Should we just pass them to server, except for
194 system and perhaps security prefixes? */ 194 system and perhaps security prefixes? */
@@ -235,13 +235,13 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
235 /* return dos attributes as pseudo xattr */ 235 /* return dos attributes as pseudo xattr */
236 /* return alt name if available as pseudo attr */ 236 /* return alt name if available as pseudo attr */
237 if (ea_name == NULL) { 237 if (ea_name == NULL) {
238 cFYI(1, ("Null xattr names not supported")); 238 cFYI(1, "Null xattr names not supported");
239 } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) == 0) { 239 } else if (strncmp(ea_name, CIFS_XATTR_USER_PREFIX, 5) == 0) {
240 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) 240 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
241 goto get_ea_exit; 241 goto get_ea_exit;
242 242
243 if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0) { 243 if (strncmp(ea_name, CIFS_XATTR_DOS_ATTRIB, 14) == 0) {
244 cFYI(1, ("attempt to query cifs inode metadata")); 244 cFYI(1, "attempt to query cifs inode metadata");
245 /* revalidate/getattr then populate from inode */ 245 /* revalidate/getattr then populate from inode */
246 } /* BB add else when above is implemented */ 246 } /* BB add else when above is implemented */
247 ea_name += 5; /* skip past user. prefix */ 247 ea_name += 5; /* skip past user. prefix */
@@ -287,7 +287,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
287 } 287 }
288#endif /* EXPERIMENTAL */ 288#endif /* EXPERIMENTAL */
289#else 289#else
290 cFYI(1, ("query POSIX ACL not supported yet")); 290 cFYI(1, "query POSIX ACL not supported yet");
291#endif /* CONFIG_CIFS_POSIX */ 291#endif /* CONFIG_CIFS_POSIX */
292 } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT, 292 } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT,
293 strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) { 293 strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) {
@@ -299,18 +299,18 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
299 cifs_sb->mnt_cifs_flags & 299 cifs_sb->mnt_cifs_flags &
300 CIFS_MOUNT_MAP_SPECIAL_CHR); 300 CIFS_MOUNT_MAP_SPECIAL_CHR);
301#else 301#else
302 cFYI(1, ("query POSIX default ACL not supported yet")); 302 cFYI(1, "query POSIX default ACL not supported yet");
303#endif 303#endif
304 } else if (strncmp(ea_name, 304 } else if (strncmp(ea_name,
305 CIFS_XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) { 305 CIFS_XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) {
306 cFYI(1, ("Trusted xattr namespace not supported yet")); 306 cFYI(1, "Trusted xattr namespace not supported yet");
307 } else if (strncmp(ea_name, 307 } else if (strncmp(ea_name,
308 CIFS_XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) == 0) { 308 CIFS_XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) == 0) {
309 cFYI(1, ("Security xattr namespace not supported yet")); 309 cFYI(1, "Security xattr namespace not supported yet");
310 } else 310 } else
311 cFYI(1, 311 cFYI(1,
312 ("illegal xattr request %s (only user namespace supported)", 312 "illegal xattr request %s (only user namespace supported)",
313 ea_name)); 313 ea_name);
314 314
315 /* We could add an additional check for streams ie 315 /* We could add an additional check for streams ie
316 if proc/fs/cifs/streamstoxattr is set then 316 if proc/fs/cifs/streamstoxattr is set then
diff --git a/fs/compat.c b/fs/compat.c
index 4b6ed03cc478..05448730f840 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1531,8 +1531,6 @@ int compat_do_execve(char * filename,
1531 if (retval < 0) 1531 if (retval < 0)
1532 goto out; 1532 goto out;
1533 1533
1534 current->stack_start = current->mm->start_stack;
1535
1536 /* execve succeeded */ 1534 /* execve succeeded */
1537 current->fs->in_exec = 0; 1535 current->fs->in_exec = 0;
1538 current->in_execve = 0; 1536 current->in_execve = 0;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c32a1b6a856b..641640dc7ae5 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -102,7 +102,6 @@
102#include <linux/nbd.h> 102#include <linux/nbd.h>
103#include <linux/random.h> 103#include <linux/random.h>
104#include <linux/filter.h> 104#include <linux/filter.h>
105#include <linux/pktcdvd.h>
106 105
107#include <linux/hiddev.h> 106#include <linux/hiddev.h>
108 107
@@ -1126,8 +1125,6 @@ COMPATIBLE_IOCTL(PPGETMODE)
1126COMPATIBLE_IOCTL(PPGETPHASE) 1125COMPATIBLE_IOCTL(PPGETPHASE)
1127COMPATIBLE_IOCTL(PPGETFLAGS) 1126COMPATIBLE_IOCTL(PPGETFLAGS)
1128COMPATIBLE_IOCTL(PPSETFLAGS) 1127COMPATIBLE_IOCTL(PPSETFLAGS)
1129/* pktcdvd */
1130COMPATIBLE_IOCTL(PACKET_CTRL_CMD)
1131/* Big A */ 1128/* Big A */
1132/* sparc only */ 1129/* sparc only */
1133/* Big Q for sound/OSS */ 1130/* Big Q for sound/OSS */
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 8e48b52205aa..0b502f80c691 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -645,6 +645,7 @@ static void detach_groups(struct config_group *group)
645 645
646 configfs_detach_group(sd->s_element); 646 configfs_detach_group(sd->s_element);
647 child->d_inode->i_flags |= S_DEAD; 647 child->d_inode->i_flags |= S_DEAD;
648 dont_mount(child);
648 649
649 mutex_unlock(&child->d_inode->i_mutex); 650 mutex_unlock(&child->d_inode->i_mutex);
650 651
@@ -840,6 +841,7 @@ static int configfs_attach_item(struct config_item *parent_item,
840 mutex_lock(&dentry->d_inode->i_mutex); 841 mutex_lock(&dentry->d_inode->i_mutex);
841 configfs_remove_dir(item); 842 configfs_remove_dir(item);
842 dentry->d_inode->i_flags |= S_DEAD; 843 dentry->d_inode->i_flags |= S_DEAD;
844 dont_mount(dentry);
843 mutex_unlock(&dentry->d_inode->i_mutex); 845 mutex_unlock(&dentry->d_inode->i_mutex);
844 d_delete(dentry); 846 d_delete(dentry);
845 } 847 }
@@ -882,6 +884,7 @@ static int configfs_attach_group(struct config_item *parent_item,
882 if (ret) { 884 if (ret) {
883 configfs_detach_item(item); 885 configfs_detach_item(item);
884 dentry->d_inode->i_flags |= S_DEAD; 886 dentry->d_inode->i_flags |= S_DEAD;
887 dont_mount(dentry);
885 } 888 }
886 configfs_adjust_dir_dirent_depth_after_populate(sd); 889 configfs_adjust_dir_dirent_depth_after_populate(sd);
887 mutex_unlock(&dentry->d_inode->i_mutex); 890 mutex_unlock(&dentry->d_inode->i_mutex);
@@ -1725,6 +1728,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
1725 mutex_unlock(&configfs_symlink_mutex); 1728 mutex_unlock(&configfs_symlink_mutex);
1726 configfs_detach_group(&group->cg_item); 1729 configfs_detach_group(&group->cg_item);
1727 dentry->d_inode->i_flags |= S_DEAD; 1730 dentry->d_inode->i_flags |= S_DEAD;
1731 dont_mount(dentry);
1728 mutex_unlock(&dentry->d_inode->i_mutex); 1732 mutex_unlock(&dentry->d_inode->i_mutex);
1729 1733
1730 d_delete(dentry); 1734 d_delete(dentry);
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 17903b491298..031dbe3a15ca 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -733,10 +733,7 @@ static void lkb_add_ordered(struct list_head *new, struct list_head *head,
733 if (lkb->lkb_rqmode < mode) 733 if (lkb->lkb_rqmode < mode)
734 break; 734 break;
735 735
736 if (!lkb) 736 __list_add(new, lkb->lkb_statequeue.prev, &lkb->lkb_statequeue);
737 list_add_tail(new, head);
738 else
739 __list_add(new, lkb->lkb_statequeue.prev, &lkb->lkb_statequeue);
740} 737}
741 738
742/* add/remove lkb to rsb's grant/convert/wait queue */ 739/* add/remove lkb to rsb's grant/convert/wait queue */
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 8b6e73c47435..b6272853130c 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -215,6 +215,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int mode)
215 if (!ast_type) { 215 if (!ast_type) {
216 kref_get(&lkb->lkb_ref); 216 kref_get(&lkb->lkb_ref);
217 list_add_tail(&lkb->lkb_astqueue, &proc->asts); 217 list_add_tail(&lkb->lkb_astqueue, &proc->asts);
218 lkb->lkb_ast_first = type;
218 wake_up_interruptible(&proc->wait); 219 wake_up_interruptible(&proc->wait);
219 } 220 }
220 if (type == AST_COMP && (ast_type & AST_COMP)) 221 if (type == AST_COMP && (ast_type & AST_COMP))
@@ -223,7 +224,6 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int mode)
223 224
224 eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type); 225 eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type);
225 if (eol) { 226 if (eol) {
226 lkb->lkb_ast_type &= ~AST_BAST;
227 lkb->lkb_flags |= DLM_IFL_ENDOFLIFE; 227 lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
228 } 228 }
229 229
@@ -706,7 +706,7 @@ static int device_close(struct inode *inode, struct file *file)
706} 706}
707 707
708static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type, 708static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
709 int bmode, char __user *buf, size_t count) 709 int mode, char __user *buf, size_t count)
710{ 710{
711#ifdef CONFIG_COMPAT 711#ifdef CONFIG_COMPAT
712 struct dlm_lock_result32 result32; 712 struct dlm_lock_result32 result32;
@@ -733,7 +733,7 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
733 if (type == AST_BAST) { 733 if (type == AST_BAST) {
734 result.user_astaddr = ua->bastaddr; 734 result.user_astaddr = ua->bastaddr;
735 result.user_astparam = ua->bastparam; 735 result.user_astparam = ua->bastparam;
736 result.bast_mode = bmode; 736 result.bast_mode = mode;
737 } else { 737 } else {
738 result.user_astaddr = ua->castaddr; 738 result.user_astaddr = ua->castaddr;
739 result.user_astparam = ua->castparam; 739 result.user_astparam = ua->castparam;
@@ -801,7 +801,9 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
801 struct dlm_user_proc *proc = file->private_data; 801 struct dlm_user_proc *proc = file->private_data;
802 struct dlm_lkb *lkb; 802 struct dlm_lkb *lkb;
803 DECLARE_WAITQUEUE(wait, current); 803 DECLARE_WAITQUEUE(wait, current);
804 int error, type=0, bmode=0, removed = 0; 804 int error = 0, removed;
805 int ret_type, ret_mode;
806 int bastmode, castmode, do_bast, do_cast;
805 807
806 if (count == sizeof(struct dlm_device_version)) { 808 if (count == sizeof(struct dlm_device_version)) {
807 error = copy_version_to_user(buf, count); 809 error = copy_version_to_user(buf, count);
@@ -820,6 +822,8 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
820#endif 822#endif
821 return -EINVAL; 823 return -EINVAL;
822 824
825 try_another:
826
823 /* do we really need this? can a read happen after a close? */ 827 /* do we really need this? can a read happen after a close? */
824 if (test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags)) 828 if (test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))
825 return -EINVAL; 829 return -EINVAL;
@@ -855,13 +859,55 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
855 859
856 lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_astqueue); 860 lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_astqueue);
857 861
858 if (lkb->lkb_ast_type & AST_COMP) { 862 removed = 0;
859 lkb->lkb_ast_type &= ~AST_COMP; 863 ret_type = 0;
860 type = AST_COMP; 864 ret_mode = 0;
861 } else if (lkb->lkb_ast_type & AST_BAST) { 865 do_bast = lkb->lkb_ast_type & AST_BAST;
862 lkb->lkb_ast_type &= ~AST_BAST; 866 do_cast = lkb->lkb_ast_type & AST_COMP;
863 type = AST_BAST; 867 bastmode = lkb->lkb_bastmode;
864 bmode = lkb->lkb_bastmode; 868 castmode = lkb->lkb_castmode;
869
870 /* when both are queued figure out which to do first and
871 switch first so the other goes in the next read */
872
873 if (do_cast && do_bast) {
874 if (lkb->lkb_ast_first == AST_COMP) {
875 ret_type = AST_COMP;
876 ret_mode = castmode;
877 lkb->lkb_ast_type &= ~AST_COMP;
878 lkb->lkb_ast_first = AST_BAST;
879 } else {
880 ret_type = AST_BAST;
881 ret_mode = bastmode;
882 lkb->lkb_ast_type &= ~AST_BAST;
883 lkb->lkb_ast_first = AST_COMP;
884 }
885 } else {
886 ret_type = lkb->lkb_ast_first;
887 ret_mode = (ret_type == AST_COMP) ? castmode : bastmode;
888 lkb->lkb_ast_type &= ~ret_type;
889 lkb->lkb_ast_first = 0;
890 }
891
892 /* if we're doing a bast but the bast is unnecessary, then
893 switch to do nothing or do a cast if that was needed next */
894
895 if ((ret_type == AST_BAST) &&
896 dlm_modes_compat(bastmode, lkb->lkb_castmode_done)) {
897 ret_type = 0;
898 ret_mode = 0;
899
900 if (do_cast) {
901 ret_type = AST_COMP;
902 ret_mode = castmode;
903 lkb->lkb_ast_type &= ~AST_COMP;
904 lkb->lkb_ast_first = 0;
905 }
906 }
907
908 if (lkb->lkb_ast_first != lkb->lkb_ast_type) {
909 log_print("device_read %x ast_first %x ast_type %x",
910 lkb->lkb_id, lkb->lkb_ast_first, lkb->lkb_ast_type);
865 } 911 }
866 912
867 if (!lkb->lkb_ast_type) { 913 if (!lkb->lkb_ast_type) {
@@ -870,15 +916,29 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
870 } 916 }
871 spin_unlock(&proc->asts_spin); 917 spin_unlock(&proc->asts_spin);
872 918
873 error = copy_result_to_user(lkb->lkb_ua, 919 if (ret_type) {
874 test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags), 920 error = copy_result_to_user(lkb->lkb_ua,
875 type, bmode, buf, count); 921 test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags),
922 ret_type, ret_mode, buf, count);
923
924 if (ret_type == AST_COMP)
925 lkb->lkb_castmode_done = castmode;
926 if (ret_type == AST_BAST)
927 lkb->lkb_bastmode_done = bastmode;
928 }
876 929
877 /* removes reference for the proc->asts lists added by 930 /* removes reference for the proc->asts lists added by
878 dlm_user_add_ast() and may result in the lkb being freed */ 931 dlm_user_add_ast() and may result in the lkb being freed */
932
879 if (removed) 933 if (removed)
880 dlm_put_lkb(lkb); 934 dlm_put_lkb(lkb);
881 935
936 /* the bast that was queued was eliminated (see unnecessary above),
937 leaving nothing to return */
938
939 if (!ret_type)
940 goto try_another;
941
882 return error; 942 return error;
883} 943}
884 944
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index bd056a5b4efc..3817149919cb 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1140,8 +1140,7 @@ retry:
1140 * ep_poll_callback() when events will become available. 1140 * ep_poll_callback() when events will become available.
1141 */ 1141 */
1142 init_waitqueue_entry(&wait, current); 1142 init_waitqueue_entry(&wait, current);
1143 wait.flags |= WQ_FLAG_EXCLUSIVE; 1143 __add_wait_queue_exclusive(&ep->wq, &wait);
1144 __add_wait_queue(&ep->wq, &wait);
1145 1144
1146 for (;;) { 1145 for (;;) {
1147 /* 1146 /*
diff --git a/fs/exec.c b/fs/exec.c
index 49cdaa19e5b9..e6e94c626c2c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1387,8 +1387,6 @@ int do_execve(char * filename,
1387 if (retval < 0) 1387 if (retval < 0)
1388 goto out; 1388 goto out;
1389 1389
1390 current->stack_start = current->mm->start_stack;
1391
1392 /* execve succeeded */ 1390 /* execve succeeded */
1393 current->fs->in_exec = 0; 1391 current->fs->in_exec = 0;
1394 current->in_execve = 0; 1392 current->in_execve = 0;
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 54373278a353..22721b2fd890 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -85,6 +85,7 @@ struct exofs_sb_info {
85 u32 s_next_generation; /* next gen # to use */ 85 u32 s_next_generation; /* next gen # to use */
86 atomic_t s_curr_pending; /* number of pending commands */ 86 atomic_t s_curr_pending; /* number of pending commands */
87 uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */ 87 uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */
88 struct backing_dev_info bdi; /* register our bdi with VFS */
88 89
89 struct pnfs_osd_data_map data_map; /* Default raid to use 90 struct pnfs_osd_data_map data_map; /* Default raid to use
90 * FIXME: Needed ? 91 * FIXME: Needed ?
@@ -93,7 +94,6 @@ struct exofs_sb_info {
93 struct exofs_layout layout; /* Default files layout, 94 struct exofs_layout layout; /* Default files layout,
94 * contains the variable osd_dev 95 * contains the variable osd_dev
95 * array. Keep last */ 96 * array. Keep last */
96 struct backing_dev_info bdi;
97 struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */ 97 struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */
98}; 98};
99 99
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 3cf038c055d7..e8766a396776 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -1332,6 +1332,12 @@ retry_alloc:
1332 1332
1333 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); 1333 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1334 /* 1334 /*
1335 * skip this group (and avoid loading bitmap) if there
1336 * are no free blocks
1337 */
1338 if (!free_blocks)
1339 continue;
1340 /*
1335 * skip this group if the number of 1341 * skip this group if the number of
1336 * free blocks is less than half of the reservation 1342 * free blocks is less than half of the reservation
1337 * window size. 1343 * window size.
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index ad7d572ee8dc..f0c5286f9342 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -106,7 +106,7 @@ void ext2_free_inode (struct inode * inode)
106 struct super_block * sb = inode->i_sb; 106 struct super_block * sb = inode->i_sb;
107 int is_directory; 107 int is_directory;
108 unsigned long ino; 108 unsigned long ino;
109 struct buffer_head *bitmap_bh = NULL; 109 struct buffer_head *bitmap_bh;
110 unsigned long block_group; 110 unsigned long block_group;
111 unsigned long bit; 111 unsigned long bit;
112 struct ext2_super_block * es; 112 struct ext2_super_block * es;
@@ -135,14 +135,13 @@ void ext2_free_inode (struct inode * inode)
135 ino > le32_to_cpu(es->s_inodes_count)) { 135 ino > le32_to_cpu(es->s_inodes_count)) {
136 ext2_error (sb, "ext2_free_inode", 136 ext2_error (sb, "ext2_free_inode",
137 "reserved or nonexistent inode %lu", ino); 137 "reserved or nonexistent inode %lu", ino);
138 goto error_return; 138 return;
139 } 139 }
140 block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb); 140 block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb);
141 bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb); 141 bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb);
142 brelse(bitmap_bh);
143 bitmap_bh = read_inode_bitmap(sb, block_group); 142 bitmap_bh = read_inode_bitmap(sb, block_group);
144 if (!bitmap_bh) 143 if (!bitmap_bh)
145 goto error_return; 144 return;
146 145
147 /* Ok, now we can actually update the inode bitmaps.. */ 146 /* Ok, now we can actually update the inode bitmaps.. */
148 if (!ext2_clear_bit_atomic(sb_bgl_lock(EXT2_SB(sb), block_group), 147 if (!ext2_clear_bit_atomic(sb_bgl_lock(EXT2_SB(sb), block_group),
@@ -154,7 +153,7 @@ void ext2_free_inode (struct inode * inode)
154 mark_buffer_dirty(bitmap_bh); 153 mark_buffer_dirty(bitmap_bh);
155 if (sb->s_flags & MS_SYNCHRONOUS) 154 if (sb->s_flags & MS_SYNCHRONOUS)
156 sync_dirty_buffer(bitmap_bh); 155 sync_dirty_buffer(bitmap_bh);
157error_return: 156
158 brelse(bitmap_bh); 157 brelse(bitmap_bh);
159} 158}
160 159
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index fc13cc119aad..527c46d9bc1f 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -22,7 +22,6 @@
22 * Assorted race fixes, rewrite of ext2_get_block() by Al Viro, 2000 22 * Assorted race fixes, rewrite of ext2_get_block() by Al Viro, 2000
23 */ 23 */
24 24
25#include <linux/smp_lock.h>
26#include <linux/time.h> 25#include <linux/time.h>
27#include <linux/highuid.h> 26#include <linux/highuid.h>
28#include <linux/pagemap.h> 27#include <linux/pagemap.h>
@@ -1406,11 +1405,11 @@ static int __ext2_write_inode(struct inode *inode, int do_sync)
1406 /* If this is the first large file 1405 /* If this is the first large file
1407 * created, add a flag to the superblock. 1406 * created, add a flag to the superblock.
1408 */ 1407 */
1409 lock_kernel(); 1408 spin_lock(&EXT2_SB(sb)->s_lock);
1410 ext2_update_dynamic_rev(sb); 1409 ext2_update_dynamic_rev(sb);
1411 EXT2_SET_RO_COMPAT_FEATURE(sb, 1410 EXT2_SET_RO_COMPAT_FEATURE(sb,
1412 EXT2_FEATURE_RO_COMPAT_LARGE_FILE); 1411 EXT2_FEATURE_RO_COMPAT_LARGE_FILE);
1413 unlock_kernel(); 1412 spin_unlock(&EXT2_SB(sb)->s_lock);
1414 ext2_write_super(sb); 1413 ext2_write_super(sb);
1415 } 1414 }
1416 } 1415 }
@@ -1467,7 +1466,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
1467 if (error) 1466 if (error)
1468 return error; 1467 return error;
1469 1468
1470 if (iattr->ia_valid & ATTR_SIZE) 1469 if (is_quota_modification(inode, iattr))
1471 dquot_initialize(inode); 1470 dquot_initialize(inode);
1472 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || 1471 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
1473 (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { 1472 (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 42e4a303b675..71e9eb1fa696 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -26,7 +26,6 @@
26#include <linux/random.h> 26#include <linux/random.h>
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/exportfs.h> 28#include <linux/exportfs.h>
29#include <linux/smp_lock.h>
30#include <linux/vfs.h> 29#include <linux/vfs.h>
31#include <linux/seq_file.h> 30#include <linux/seq_file.h>
32#include <linux/mount.h> 31#include <linux/mount.h>
@@ -39,7 +38,7 @@
39#include "xip.h" 38#include "xip.h"
40 39
41static void ext2_sync_super(struct super_block *sb, 40static void ext2_sync_super(struct super_block *sb,
42 struct ext2_super_block *es); 41 struct ext2_super_block *es, int wait);
43static int ext2_remount (struct super_block * sb, int * flags, char * data); 42static int ext2_remount (struct super_block * sb, int * flags, char * data);
44static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); 43static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
45static int ext2_sync_fs(struct super_block *sb, int wait); 44static int ext2_sync_fs(struct super_block *sb, int wait);
@@ -52,9 +51,11 @@ void ext2_error (struct super_block * sb, const char * function,
52 struct ext2_super_block *es = sbi->s_es; 51 struct ext2_super_block *es = sbi->s_es;
53 52
54 if (!(sb->s_flags & MS_RDONLY)) { 53 if (!(sb->s_flags & MS_RDONLY)) {
54 spin_lock(&sbi->s_lock);
55 sbi->s_mount_state |= EXT2_ERROR_FS; 55 sbi->s_mount_state |= EXT2_ERROR_FS;
56 es->s_state |= cpu_to_le16(EXT2_ERROR_FS); 56 es->s_state |= cpu_to_le16(EXT2_ERROR_FS);
57 ext2_sync_super(sb, es); 57 spin_unlock(&sbi->s_lock);
58 ext2_sync_super(sb, es, 1);
58 } 59 }
59 60
60 va_start(args, fmt); 61 va_start(args, fmt);
@@ -84,6 +85,9 @@ void ext2_msg(struct super_block *sb, const char *prefix,
84 va_end(args); 85 va_end(args);
85} 86}
86 87
88/*
89 * This must be called with sbi->s_lock held.
90 */
87void ext2_update_dynamic_rev(struct super_block *sb) 91void ext2_update_dynamic_rev(struct super_block *sb)
88{ 92{
89 struct ext2_super_block *es = EXT2_SB(sb)->s_es; 93 struct ext2_super_block *es = EXT2_SB(sb)->s_es;
@@ -115,8 +119,6 @@ static void ext2_put_super (struct super_block * sb)
115 int i; 119 int i;
116 struct ext2_sb_info *sbi = EXT2_SB(sb); 120 struct ext2_sb_info *sbi = EXT2_SB(sb);
117 121
118 lock_kernel();
119
120 if (sb->s_dirt) 122 if (sb->s_dirt)
121 ext2_write_super(sb); 123 ext2_write_super(sb);
122 124
@@ -124,8 +126,10 @@ static void ext2_put_super (struct super_block * sb)
124 if (!(sb->s_flags & MS_RDONLY)) { 126 if (!(sb->s_flags & MS_RDONLY)) {
125 struct ext2_super_block *es = sbi->s_es; 127 struct ext2_super_block *es = sbi->s_es;
126 128
129 spin_lock(&sbi->s_lock);
127 es->s_state = cpu_to_le16(sbi->s_mount_state); 130 es->s_state = cpu_to_le16(sbi->s_mount_state);
128 ext2_sync_super(sb, es); 131 spin_unlock(&sbi->s_lock);
132 ext2_sync_super(sb, es, 1);
129 } 133 }
130 db_count = sbi->s_gdb_count; 134 db_count = sbi->s_gdb_count;
131 for (i = 0; i < db_count; i++) 135 for (i = 0; i < db_count; i++)
@@ -140,8 +144,6 @@ static void ext2_put_super (struct super_block * sb)
140 sb->s_fs_info = NULL; 144 sb->s_fs_info = NULL;
141 kfree(sbi->s_blockgroup_lock); 145 kfree(sbi->s_blockgroup_lock);
142 kfree(sbi); 146 kfree(sbi);
143
144 unlock_kernel();
145} 147}
146 148
147static struct kmem_cache * ext2_inode_cachep; 149static struct kmem_cache * ext2_inode_cachep;
@@ -209,6 +211,7 @@ static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs)
209 struct ext2_super_block *es = sbi->s_es; 211 struct ext2_super_block *es = sbi->s_es;
210 unsigned long def_mount_opts; 212 unsigned long def_mount_opts;
211 213
214 spin_lock(&sbi->s_lock);
212 def_mount_opts = le32_to_cpu(es->s_default_mount_opts); 215 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
213 216
214 if (sbi->s_sb_block != 1) 217 if (sbi->s_sb_block != 1)
@@ -281,6 +284,7 @@ static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs)
281 if (!test_opt(sb, RESERVATION)) 284 if (!test_opt(sb, RESERVATION))
282 seq_puts(seq, ",noreservation"); 285 seq_puts(seq, ",noreservation");
283 286
287 spin_unlock(&sbi->s_lock);
284 return 0; 288 return 0;
285} 289}
286 290
@@ -606,7 +610,6 @@ static int ext2_setup_super (struct super_block * sb,
606 if (!le16_to_cpu(es->s_max_mnt_count)) 610 if (!le16_to_cpu(es->s_max_mnt_count))
607 es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT); 611 es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT);
608 le16_add_cpu(&es->s_mnt_count, 1); 612 le16_add_cpu(&es->s_mnt_count, 1);
609 ext2_write_super(sb);
610 if (test_opt (sb, DEBUG)) 613 if (test_opt (sb, DEBUG))
611 ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, " 614 ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, "
612 "bpg=%lu, ipg=%lu, mo=%04lx]", 615 "bpg=%lu, ipg=%lu, mo=%04lx]",
@@ -767,6 +770,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
767 sb->s_fs_info = sbi; 770 sb->s_fs_info = sbi;
768 sbi->s_sb_block = sb_block; 771 sbi->s_sb_block = sb_block;
769 772
773 spin_lock_init(&sbi->s_lock);
774
770 /* 775 /*
771 * See what the current blocksize for the device is, and 776 * See what the current blocksize for the device is, and
772 * use that as the blocksize. Otherwise (or if the blocksize 777 * use that as the blocksize. Otherwise (or if the blocksize
@@ -1079,7 +1084,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
1079 if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) 1084 if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL))
1080 ext2_msg(sb, KERN_WARNING, 1085 ext2_msg(sb, KERN_WARNING,
1081 "warning: mounting ext3 filesystem as ext2"); 1086 "warning: mounting ext3 filesystem as ext2");
1082 ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY); 1087 if (ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY))
1088 sb->s_flags |= MS_RDONLY;
1089 ext2_write_super(sb);
1083 return 0; 1090 return 0;
1084 1091
1085cantfind_ext2: 1092cantfind_ext2:
@@ -1120,30 +1127,26 @@ static void ext2_clear_super_error(struct super_block *sb)
1120 * be remapped. Nothing we can do but to retry the 1127 * be remapped. Nothing we can do but to retry the
1121 * write and hope for the best. 1128 * write and hope for the best.
1122 */ 1129 */
1123 printk(KERN_ERR "EXT2-fs: %s previous I/O error to " 1130 ext2_msg(sb, KERN_ERR,
1124 "superblock detected", sb->s_id); 1131 "previous I/O error to superblock detected\n");
1125 clear_buffer_write_io_error(sbh); 1132 clear_buffer_write_io_error(sbh);
1126 set_buffer_uptodate(sbh); 1133 set_buffer_uptodate(sbh);
1127 } 1134 }
1128} 1135}
1129 1136
1130static void ext2_commit_super (struct super_block * sb, 1137static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es,
1131 struct ext2_super_block * es) 1138 int wait)
1132{
1133 ext2_clear_super_error(sb);
1134 es->s_wtime = cpu_to_le32(get_seconds());
1135 mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
1136 sb->s_dirt = 0;
1137}
1138
1139static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
1140{ 1139{
1141 ext2_clear_super_error(sb); 1140 ext2_clear_super_error(sb);
1141 spin_lock(&EXT2_SB(sb)->s_lock);
1142 es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); 1142 es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
1143 es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); 1143 es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
1144 es->s_wtime = cpu_to_le32(get_seconds()); 1144 es->s_wtime = cpu_to_le32(get_seconds());
1145 /* unlock before we do IO */
1146 spin_unlock(&EXT2_SB(sb)->s_lock);
1145 mark_buffer_dirty(EXT2_SB(sb)->s_sbh); 1147 mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
1146 sync_dirty_buffer(EXT2_SB(sb)->s_sbh); 1148 if (wait)
1149 sync_dirty_buffer(EXT2_SB(sb)->s_sbh);
1147 sb->s_dirt = 0; 1150 sb->s_dirt = 0;
1148} 1151}
1149 1152
@@ -1157,43 +1160,18 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
1157 * may have been checked while mounted and e2fsck may have 1160 * may have been checked while mounted and e2fsck may have
1158 * set s_state to EXT2_VALID_FS after some corrections. 1161 * set s_state to EXT2_VALID_FS after some corrections.
1159 */ 1162 */
1160
1161static int ext2_sync_fs(struct super_block *sb, int wait) 1163static int ext2_sync_fs(struct super_block *sb, int wait)
1162{ 1164{
1165 struct ext2_sb_info *sbi = EXT2_SB(sb);
1163 struct ext2_super_block *es = EXT2_SB(sb)->s_es; 1166 struct ext2_super_block *es = EXT2_SB(sb)->s_es;
1164 struct buffer_head *sbh = EXT2_SB(sb)->s_sbh;
1165
1166 lock_kernel();
1167 if (buffer_write_io_error(sbh)) {
1168 /*
1169 * Oh, dear. A previous attempt to write the
1170 * superblock failed. This could happen because the
1171 * USB device was yanked out. Or it could happen to
1172 * be a transient write error and maybe the block will
1173 * be remapped. Nothing we can do but to retry the
1174 * write and hope for the best.
1175 */
1176 ext2_msg(sb, KERN_ERR,
1177 "previous I/O error to superblock detected\n");
1178 clear_buffer_write_io_error(sbh);
1179 set_buffer_uptodate(sbh);
1180 }
1181 1167
1168 spin_lock(&sbi->s_lock);
1182 if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { 1169 if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
1183 ext2_debug("setting valid to 0\n"); 1170 ext2_debug("setting valid to 0\n");
1184 es->s_state &= cpu_to_le16(~EXT2_VALID_FS); 1171 es->s_state &= cpu_to_le16(~EXT2_VALID_FS);
1185 es->s_free_blocks_count =
1186 cpu_to_le32(ext2_count_free_blocks(sb));
1187 es->s_free_inodes_count =
1188 cpu_to_le32(ext2_count_free_inodes(sb));
1189 es->s_mtime = cpu_to_le32(get_seconds());
1190 ext2_sync_super(sb, es);
1191 } else {
1192 ext2_commit_super(sb, es);
1193 } 1172 }
1194 sb->s_dirt = 0; 1173 spin_unlock(&sbi->s_lock);
1195 unlock_kernel(); 1174 ext2_sync_super(sb, es, wait);
1196
1197 return 0; 1175 return 0;
1198} 1176}
1199 1177
@@ -1215,7 +1193,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1215 unsigned long old_sb_flags; 1193 unsigned long old_sb_flags;
1216 int err; 1194 int err;
1217 1195
1218 lock_kernel(); 1196 spin_lock(&sbi->s_lock);
1219 1197
1220 /* Store the old options */ 1198 /* Store the old options */
1221 old_sb_flags = sb->s_flags; 1199 old_sb_flags = sb->s_flags;
@@ -1254,13 +1232,13 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1254 sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP; 1232 sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP;
1255 } 1233 }
1256 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { 1234 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
1257 unlock_kernel(); 1235 spin_unlock(&sbi->s_lock);
1258 return 0; 1236 return 0;
1259 } 1237 }
1260 if (*flags & MS_RDONLY) { 1238 if (*flags & MS_RDONLY) {
1261 if (le16_to_cpu(es->s_state) & EXT2_VALID_FS || 1239 if (le16_to_cpu(es->s_state) & EXT2_VALID_FS ||
1262 !(sbi->s_mount_state & EXT2_VALID_FS)) { 1240 !(sbi->s_mount_state & EXT2_VALID_FS)) {
1263 unlock_kernel(); 1241 spin_unlock(&sbi->s_lock);
1264 return 0; 1242 return 0;
1265 } 1243 }
1266 /* 1244 /*
@@ -1269,6 +1247,8 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1269 */ 1247 */
1270 es->s_state = cpu_to_le16(sbi->s_mount_state); 1248 es->s_state = cpu_to_le16(sbi->s_mount_state);
1271 es->s_mtime = cpu_to_le32(get_seconds()); 1249 es->s_mtime = cpu_to_le32(get_seconds());
1250 spin_unlock(&sbi->s_lock);
1251 ext2_sync_super(sb, es, 1);
1272 } else { 1252 } else {
1273 __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb, 1253 __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb,
1274 ~EXT2_FEATURE_RO_COMPAT_SUPP); 1254 ~EXT2_FEATURE_RO_COMPAT_SUPP);
@@ -1288,16 +1268,16 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1288 sbi->s_mount_state = le16_to_cpu(es->s_state); 1268 sbi->s_mount_state = le16_to_cpu(es->s_state);
1289 if (!ext2_setup_super (sb, es, 0)) 1269 if (!ext2_setup_super (sb, es, 0))
1290 sb->s_flags &= ~MS_RDONLY; 1270 sb->s_flags &= ~MS_RDONLY;
1271 spin_unlock(&sbi->s_lock);
1272 ext2_write_super(sb);
1291 } 1273 }
1292 ext2_sync_super(sb, es);
1293 unlock_kernel();
1294 return 0; 1274 return 0;
1295restore_opts: 1275restore_opts:
1296 sbi->s_mount_opt = old_opts.s_mount_opt; 1276 sbi->s_mount_opt = old_opts.s_mount_opt;
1297 sbi->s_resuid = old_opts.s_resuid; 1277 sbi->s_resuid = old_opts.s_resuid;
1298 sbi->s_resgid = old_opts.s_resgid; 1278 sbi->s_resgid = old_opts.s_resgid;
1299 sb->s_flags = old_sb_flags; 1279 sb->s_flags = old_sb_flags;
1300 unlock_kernel(); 1280 spin_unlock(&sbi->s_lock);
1301 return err; 1281 return err;
1302} 1282}
1303 1283
@@ -1308,6 +1288,8 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
1308 struct ext2_super_block *es = sbi->s_es; 1288 struct ext2_super_block *es = sbi->s_es;
1309 u64 fsid; 1289 u64 fsid;
1310 1290
1291 spin_lock(&sbi->s_lock);
1292
1311 if (test_opt (sb, MINIX_DF)) 1293 if (test_opt (sb, MINIX_DF))
1312 sbi->s_overhead_last = 0; 1294 sbi->s_overhead_last = 0;
1313 else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) { 1295 else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
@@ -1362,6 +1344,7 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
1362 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 1344 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
1363 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; 1345 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
1364 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; 1346 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
1347 spin_unlock(&sbi->s_lock);
1365 return 0; 1348 return 0;
1366} 1349}
1367 1350
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index e44dc92609be..3b96045a00ce 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -345,7 +345,9 @@ static void ext2_xattr_update_super_block(struct super_block *sb)
345 if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR)) 345 if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR))
346 return; 346 return;
347 347
348 spin_lock(&EXT2_SB(sb)->s_lock);
348 EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR); 349 EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR);
350 spin_unlock(&EXT2_SB(sb)->s_lock);
349 sb->s_dirt = 1; 351 sb->s_dirt = 1;
350 mark_buffer_dirty(EXT2_SB(sb)->s_sbh); 352 mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
351} 353}
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index a177122a1b25..4a32511f4ded 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1584,6 +1584,12 @@ retry_alloc:
1584 goto io_error; 1584 goto io_error;
1585 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); 1585 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
1586 /* 1586 /*
1587 * skip this group (and avoid loading bitmap) if there
1588 * are no free blocks
1589 */
1590 if (!free_blocks)
1591 continue;
1592 /*
1587 * skip this group if the number of 1593 * skip this group if the number of
1588 * free blocks is less than half of the reservation 1594 * free blocks is less than half of the reservation
1589 * window size. 1595 * window size.
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 9492f6003ef9..fcf7487734b6 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -48,7 +48,7 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
48 struct inode *inode = dentry->d_inode; 48 struct inode *inode = dentry->d_inode;
49 struct ext3_inode_info *ei = EXT3_I(inode); 49 struct ext3_inode_info *ei = EXT3_I(inode);
50 journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; 50 journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
51 int ret = 0; 51 int ret, needs_barrier = 0;
52 tid_t commit_tid; 52 tid_t commit_tid;
53 53
54 if (inode->i_sb->s_flags & MS_RDONLY) 54 if (inode->i_sb->s_flags & MS_RDONLY)
@@ -70,29 +70,27 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
70 * (they were dirtied by commit). But that's OK - the blocks are 70 * (they were dirtied by commit). But that's OK - the blocks are
71 * safe in-journal, which is all fsync() needs to ensure. 71 * safe in-journal, which is all fsync() needs to ensure.
72 */ 72 */
73 if (ext3_should_journal_data(inode)) { 73 if (ext3_should_journal_data(inode))
74 ret = ext3_force_commit(inode->i_sb); 74 return ext3_force_commit(inode->i_sb);
75 goto out;
76 }
77 75
78 if (datasync) 76 if (datasync)
79 commit_tid = atomic_read(&ei->i_datasync_tid); 77 commit_tid = atomic_read(&ei->i_datasync_tid);
80 else 78 else
81 commit_tid = atomic_read(&ei->i_sync_tid); 79 commit_tid = atomic_read(&ei->i_sync_tid);
82 80
83 if (log_start_commit(journal, commit_tid)) { 81 if (test_opt(inode->i_sb, BARRIER) &&
84 log_wait_commit(journal, commit_tid); 82 !journal_trans_will_send_data_barrier(journal, commit_tid))
85 goto out; 83 needs_barrier = 1;
86 } 84 log_start_commit(journal, commit_tid);
85 ret = log_wait_commit(journal, commit_tid);
87 86
88 /* 87 /*
89 * In case we didn't commit a transaction, we have to flush 88 * In case we didn't commit a transaction, we have to flush
90 * disk caches manually so that data really is on persistent 89 * disk caches manually so that data really is on persistent
91 * storage 90 * storage
92 */ 91 */
93 if (test_opt(inode->i_sb, BARRIER)) 92 if (needs_barrier)
94 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, 93 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
95 BLKDEV_IFL_WAIT); 94 BLKDEV_IFL_WAIT);
96out:
97 return ret; 95 return ret;
98} 96}
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ea33bdf0a300..735f0190ec2a 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3151,7 +3151,7 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
3151 if (error) 3151 if (error)
3152 return error; 3152 return error;
3153 3153
3154 if (ia_valid & ATTR_SIZE) 3154 if (is_quota_modification(inode, attr))
3155 dquot_initialize(inode); 3155 dquot_initialize(inode);
3156 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 3156 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
3157 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { 3157 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 1bee604cc6cd..0fc1293d0e96 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -653,8 +653,12 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
653 seq_printf(seq, ",commit=%u", 653 seq_printf(seq, ",commit=%u",
654 (unsigned) (sbi->s_commit_interval / HZ)); 654 (unsigned) (sbi->s_commit_interval / HZ));
655 } 655 }
656 if (test_opt(sb, BARRIER)) 656
657 seq_puts(seq, ",barrier=1"); 657 /*
658 * Always display barrier state so it's clear what the status is.
659 */
660 seq_puts(seq, ",barrier=");
661 seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
658 if (test_opt(sb, NOBH)) 662 if (test_opt(sb, NOBH))
659 seq_puts(seq, ",nobh"); 663 seq_puts(seq, ",nobh");
660 664
@@ -810,8 +814,8 @@ enum {
810 Opt_data_err_abort, Opt_data_err_ignore, 814 Opt_data_err_abort, Opt_data_err_ignore,
811 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 815 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
812 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 816 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
813 Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, 817 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
814 Opt_usrquota, Opt_grpquota 818 Opt_resize, Opt_usrquota, Opt_grpquota
815}; 819};
816 820
817static const match_table_t tokens = { 821static const match_table_t tokens = {
@@ -865,6 +869,8 @@ static const match_table_t tokens = {
865 {Opt_quota, "quota"}, 869 {Opt_quota, "quota"},
866 {Opt_usrquota, "usrquota"}, 870 {Opt_usrquota, "usrquota"},
867 {Opt_barrier, "barrier=%u"}, 871 {Opt_barrier, "barrier=%u"},
872 {Opt_barrier, "barrier"},
873 {Opt_nobarrier, "nobarrier"},
868 {Opt_resize, "resize"}, 874 {Opt_resize, "resize"},
869 {Opt_err, NULL}, 875 {Opt_err, NULL},
870}; 876};
@@ -967,7 +973,11 @@ static int parse_options (char *options, struct super_block *sb,
967 int token; 973 int token;
968 if (!*p) 974 if (!*p)
969 continue; 975 continue;
970 976 /*
977 * Initialize args struct so we know whether arg was
978 * found; some options take optional arguments.
979 */
980 args[0].to = args[0].from = 0;
971 token = match_token(p, tokens, args); 981 token = match_token(p, tokens, args);
972 switch (token) { 982 switch (token) {
973 case Opt_bsd_df: 983 case Opt_bsd_df:
@@ -1215,9 +1225,15 @@ set_qf_format:
1215 case Opt_abort: 1225 case Opt_abort:
1216 set_opt(sbi->s_mount_opt, ABORT); 1226 set_opt(sbi->s_mount_opt, ABORT);
1217 break; 1227 break;
1228 case Opt_nobarrier:
1229 clear_opt(sbi->s_mount_opt, BARRIER);
1230 break;
1218 case Opt_barrier: 1231 case Opt_barrier:
1219 if (match_int(&args[0], &option)) 1232 if (args[0].from) {
1220 return 0; 1233 if (match_int(&args[0], &option))
1234 return 0;
1235 } else
1236 option = 1; /* No argument, default to 1 */
1221 if (option) 1237 if (option)
1222 set_opt(sbi->s_mount_opt, BARRIER); 1238 set_opt(sbi->s_mount_opt, BARRIER);
1223 else 1239 else
@@ -1890,21 +1906,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1890 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 1906 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
1891 spin_lock_init(&sbi->s_next_gen_lock); 1907 spin_lock_init(&sbi->s_next_gen_lock);
1892 1908
1893 err = percpu_counter_init(&sbi->s_freeblocks_counter,
1894 ext3_count_free_blocks(sb));
1895 if (!err) {
1896 err = percpu_counter_init(&sbi->s_freeinodes_counter,
1897 ext3_count_free_inodes(sb));
1898 }
1899 if (!err) {
1900 err = percpu_counter_init(&sbi->s_dirs_counter,
1901 ext3_count_dirs(sb));
1902 }
1903 if (err) {
1904 ext3_msg(sb, KERN_ERR, "error: insufficient memory");
1905 goto failed_mount3;
1906 }
1907
1908 /* per fileystem reservation list head & lock */ 1909 /* per fileystem reservation list head & lock */
1909 spin_lock_init(&sbi->s_rsv_window_lock); 1910 spin_lock_init(&sbi->s_rsv_window_lock);
1910 sbi->s_rsv_window_root = RB_ROOT; 1911 sbi->s_rsv_window_root = RB_ROOT;
@@ -1945,15 +1946,29 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1945 if (!test_opt(sb, NOLOAD) && 1946 if (!test_opt(sb, NOLOAD) &&
1946 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { 1947 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
1947 if (ext3_load_journal(sb, es, journal_devnum)) 1948 if (ext3_load_journal(sb, es, journal_devnum))
1948 goto failed_mount3; 1949 goto failed_mount2;
1949 } else if (journal_inum) { 1950 } else if (journal_inum) {
1950 if (ext3_create_journal(sb, es, journal_inum)) 1951 if (ext3_create_journal(sb, es, journal_inum))
1951 goto failed_mount3; 1952 goto failed_mount2;
1952 } else { 1953 } else {
1953 if (!silent) 1954 if (!silent)
1954 ext3_msg(sb, KERN_ERR, 1955 ext3_msg(sb, KERN_ERR,
1955 "error: no journal found. " 1956 "error: no journal found. "
1956 "mounting ext3 over ext2?"); 1957 "mounting ext3 over ext2?");
1958 goto failed_mount2;
1959 }
1960 err = percpu_counter_init(&sbi->s_freeblocks_counter,
1961 ext3_count_free_blocks(sb));
1962 if (!err) {
1963 err = percpu_counter_init(&sbi->s_freeinodes_counter,
1964 ext3_count_free_inodes(sb));
1965 }
1966 if (!err) {
1967 err = percpu_counter_init(&sbi->s_dirs_counter,
1968 ext3_count_dirs(sb));
1969 }
1970 if (err) {
1971 ext3_msg(sb, KERN_ERR, "error: insufficient memory");
1957 goto failed_mount3; 1972 goto failed_mount3;
1958 } 1973 }
1959 1974
@@ -1978,7 +1993,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1978 ext3_msg(sb, KERN_ERR, 1993 ext3_msg(sb, KERN_ERR,
1979 "error: journal does not support " 1994 "error: journal does not support "
1980 "requested data journaling mode"); 1995 "requested data journaling mode");
1981 goto failed_mount4; 1996 goto failed_mount3;
1982 } 1997 }
1983 default: 1998 default:
1984 break; 1999 break;
@@ -2001,19 +2016,19 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
2001 if (IS_ERR(root)) { 2016 if (IS_ERR(root)) {
2002 ext3_msg(sb, KERN_ERR, "error: get root inode failed"); 2017 ext3_msg(sb, KERN_ERR, "error: get root inode failed");
2003 ret = PTR_ERR(root); 2018 ret = PTR_ERR(root);
2004 goto failed_mount4; 2019 goto failed_mount3;
2005 } 2020 }
2006 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { 2021 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2007 iput(root); 2022 iput(root);
2008 ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck"); 2023 ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
2009 goto failed_mount4; 2024 goto failed_mount3;
2010 } 2025 }
2011 sb->s_root = d_alloc_root(root); 2026 sb->s_root = d_alloc_root(root);
2012 if (!sb->s_root) { 2027 if (!sb->s_root) {
2013 ext3_msg(sb, KERN_ERR, "error: get root dentry failed"); 2028 ext3_msg(sb, KERN_ERR, "error: get root dentry failed");
2014 iput(root); 2029 iput(root);
2015 ret = -ENOMEM; 2030 ret = -ENOMEM;
2016 goto failed_mount4; 2031 goto failed_mount3;
2017 } 2032 }
2018 2033
2019 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); 2034 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
@@ -2039,12 +2054,11 @@ cantfind_ext3:
2039 sb->s_id); 2054 sb->s_id);
2040 goto failed_mount; 2055 goto failed_mount;
2041 2056
2042failed_mount4:
2043 journal_destroy(sbi->s_journal);
2044failed_mount3: 2057failed_mount3:
2045 percpu_counter_destroy(&sbi->s_freeblocks_counter); 2058 percpu_counter_destroy(&sbi->s_freeblocks_counter);
2046 percpu_counter_destroy(&sbi->s_freeinodes_counter); 2059 percpu_counter_destroy(&sbi->s_freeinodes_counter);
2047 percpu_counter_destroy(&sbi->s_dirs_counter); 2060 percpu_counter_destroy(&sbi->s_dirs_counter);
2061 journal_destroy(sbi->s_journal);
2048failed_mount2: 2062failed_mount2:
2049 for (i = 0; i < db_count; i++) 2063 for (i = 0; i < db_count; i++)
2050 brelse(sbi->s_group_desc[i]); 2064 brelse(sbi->s_group_desc[i]);
@@ -2278,6 +2292,9 @@ static int ext3_load_journal(struct super_block *sb,
2278 return -EINVAL; 2292 return -EINVAL;
2279 } 2293 }
2280 2294
2295 if (!(journal->j_flags & JFS_BARRIER))
2296 printk(KERN_INFO "EXT3-fs: barriers not enabled\n");
2297
2281 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { 2298 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
2282 err = journal_update_format(journal); 2299 err = journal_update_format(journal);
2283 if (err) { 2300 if (err) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 81d605412844..3e0f6af9d08d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5425,7 +5425,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5425 if (error) 5425 if (error)
5426 return error; 5426 return error;
5427 5427
5428 if (ia_valid & ATTR_SIZE) 5428 if (is_quota_modification(inode, attr))
5429 dquot_initialize(inode); 5429 dquot_initialize(inode);
5430 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 5430 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
5431 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { 5431 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
diff --git a/fs/fcntl.c b/fs/fcntl.c
index bcba960328fa..f74d270ba155 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -619,9 +619,15 @@ int send_sigurg(struct fown_struct *fown)
619 return ret; 619 return ret;
620} 620}
621 621
622static DEFINE_RWLOCK(fasync_lock); 622static DEFINE_SPINLOCK(fasync_lock);
623static struct kmem_cache *fasync_cache __read_mostly; 623static struct kmem_cache *fasync_cache __read_mostly;
624 624
625static void fasync_free_rcu(struct rcu_head *head)
626{
627 kmem_cache_free(fasync_cache,
628 container_of(head, struct fasync_struct, fa_rcu));
629}
630
625/* 631/*
626 * Remove a fasync entry. If successfully removed, return 632 * Remove a fasync entry. If successfully removed, return
627 * positive and clear the FASYNC flag. If no entry exists, 633 * positive and clear the FASYNC flag. If no entry exists,
@@ -630,8 +636,6 @@ static struct kmem_cache *fasync_cache __read_mostly;
630 * NOTE! It is very important that the FASYNC flag always 636 * NOTE! It is very important that the FASYNC flag always
631 * match the state "is the filp on a fasync list". 637 * match the state "is the filp on a fasync list".
632 * 638 *
633 * We always take the 'filp->f_lock', in since fasync_lock
634 * needs to be irq-safe.
635 */ 639 */
636static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) 640static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
637{ 641{
@@ -639,17 +643,22 @@ static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
639 int result = 0; 643 int result = 0;
640 644
641 spin_lock(&filp->f_lock); 645 spin_lock(&filp->f_lock);
642 write_lock_irq(&fasync_lock); 646 spin_lock(&fasync_lock);
643 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 647 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
644 if (fa->fa_file != filp) 648 if (fa->fa_file != filp)
645 continue; 649 continue;
650
651 spin_lock_irq(&fa->fa_lock);
652 fa->fa_file = NULL;
653 spin_unlock_irq(&fa->fa_lock);
654
646 *fp = fa->fa_next; 655 *fp = fa->fa_next;
647 kmem_cache_free(fasync_cache, fa); 656 call_rcu(&fa->fa_rcu, fasync_free_rcu);
648 filp->f_flags &= ~FASYNC; 657 filp->f_flags &= ~FASYNC;
649 result = 1; 658 result = 1;
650 break; 659 break;
651 } 660 }
652 write_unlock_irq(&fasync_lock); 661 spin_unlock(&fasync_lock);
653 spin_unlock(&filp->f_lock); 662 spin_unlock(&filp->f_lock);
654 return result; 663 return result;
655} 664}
@@ -671,25 +680,30 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa
671 return -ENOMEM; 680 return -ENOMEM;
672 681
673 spin_lock(&filp->f_lock); 682 spin_lock(&filp->f_lock);
674 write_lock_irq(&fasync_lock); 683 spin_lock(&fasync_lock);
675 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 684 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
676 if (fa->fa_file != filp) 685 if (fa->fa_file != filp)
677 continue; 686 continue;
687
688 spin_lock_irq(&fa->fa_lock);
678 fa->fa_fd = fd; 689 fa->fa_fd = fd;
690 spin_unlock_irq(&fa->fa_lock);
691
679 kmem_cache_free(fasync_cache, new); 692 kmem_cache_free(fasync_cache, new);
680 goto out; 693 goto out;
681 } 694 }
682 695
696 spin_lock_init(&new->fa_lock);
683 new->magic = FASYNC_MAGIC; 697 new->magic = FASYNC_MAGIC;
684 new->fa_file = filp; 698 new->fa_file = filp;
685 new->fa_fd = fd; 699 new->fa_fd = fd;
686 new->fa_next = *fapp; 700 new->fa_next = *fapp;
687 *fapp = new; 701 rcu_assign_pointer(*fapp, new);
688 result = 1; 702 result = 1;
689 filp->f_flags |= FASYNC; 703 filp->f_flags |= FASYNC;
690 704
691out: 705out:
692 write_unlock_irq(&fasync_lock); 706 spin_unlock(&fasync_lock);
693 spin_unlock(&filp->f_lock); 707 spin_unlock(&filp->f_lock);
694 return result; 708 return result;
695} 709}
@@ -709,37 +723,41 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap
709 723
710EXPORT_SYMBOL(fasync_helper); 724EXPORT_SYMBOL(fasync_helper);
711 725
712void __kill_fasync(struct fasync_struct *fa, int sig, int band) 726/*
727 * rcu_read_lock() is held
728 */
729static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
713{ 730{
714 while (fa) { 731 while (fa) {
715 struct fown_struct * fown; 732 struct fown_struct *fown;
716 if (fa->magic != FASYNC_MAGIC) { 733 if (fa->magic != FASYNC_MAGIC) {
717 printk(KERN_ERR "kill_fasync: bad magic number in " 734 printk(KERN_ERR "kill_fasync: bad magic number in "
718 "fasync_struct!\n"); 735 "fasync_struct!\n");
719 return; 736 return;
720 } 737 }
721 fown = &fa->fa_file->f_owner; 738 spin_lock(&fa->fa_lock);
722 /* Don't send SIGURG to processes which have not set a 739 if (fa->fa_file) {
723 queued signum: SIGURG has its own default signalling 740 fown = &fa->fa_file->f_owner;
724 mechanism. */ 741 /* Don't send SIGURG to processes which have not set a
725 if (!(sig == SIGURG && fown->signum == 0)) 742 queued signum: SIGURG has its own default signalling
726 send_sigio(fown, fa->fa_fd, band); 743 mechanism. */
727 fa = fa->fa_next; 744 if (!(sig == SIGURG && fown->signum == 0))
745 send_sigio(fown, fa->fa_fd, band);
746 }
747 spin_unlock(&fa->fa_lock);
748 fa = rcu_dereference(fa->fa_next);
728 } 749 }
729} 750}
730 751
731EXPORT_SYMBOL(__kill_fasync);
732
733void kill_fasync(struct fasync_struct **fp, int sig, int band) 752void kill_fasync(struct fasync_struct **fp, int sig, int band)
734{ 753{
735 /* First a quick test without locking: usually 754 /* First a quick test without locking: usually
736 * the list is empty. 755 * the list is empty.
737 */ 756 */
738 if (*fp) { 757 if (*fp) {
739 read_lock(&fasync_lock); 758 rcu_read_lock();
740 /* reread *fp after obtaining the lock */ 759 kill_fasync_rcu(rcu_dereference(*fp), sig, band);
741 __kill_fasync(*fp, sig, band); 760 rcu_read_unlock();
742 read_unlock(&fasync_lock);
743 } 761 }
744} 762}
745EXPORT_SYMBOL(kill_fasync); 763EXPORT_SYMBOL(kill_fasync);
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 0c1d0b82dcf1..a739a0a48067 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -418,6 +418,7 @@ static int gfs2_jdata_writepages(struct address_space *mapping,
418static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) 418static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
419{ 419{
420 struct buffer_head *dibh; 420 struct buffer_head *dibh;
421 u64 dsize = i_size_read(&ip->i_inode);
421 void *kaddr; 422 void *kaddr;
422 int error; 423 int error;
423 424
@@ -437,9 +438,10 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
437 return error; 438 return error;
438 439
439 kaddr = kmap_atomic(page, KM_USER0); 440 kaddr = kmap_atomic(page, KM_USER0);
440 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), 441 if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
441 ip->i_disksize); 442 dsize = (dibh->b_size - sizeof(struct gfs2_dinode));
442 memset(kaddr + ip->i_disksize, 0, PAGE_CACHE_SIZE - ip->i_disksize); 443 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
444 memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize);
443 kunmap_atomic(kaddr, KM_USER0); 445 kunmap_atomic(kaddr, KM_USER0);
444 flush_dcache_page(page); 446 flush_dcache_page(page);
445 brelse(dibh); 447 brelse(dibh);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 5e411d5f4697..4a48c0f4b402 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -71,11 +71,13 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
71 71
72 if (!PageUptodate(page)) { 72 if (!PageUptodate(page)) {
73 void *kaddr = kmap(page); 73 void *kaddr = kmap(page);
74 u64 dsize = i_size_read(inode);
75
76 if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
77 dsize = dibh->b_size - sizeof(struct gfs2_dinode);
74 78
75 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), 79 memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
76 ip->i_disksize); 80 memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize);
77 memset(kaddr + ip->i_disksize, 0,
78 PAGE_CACHE_SIZE - ip->i_disksize);
79 kunmap(page); 81 kunmap(page);
80 82
81 SetPageUptodate(page); 83 SetPageUptodate(page);
@@ -1038,13 +1040,14 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
1038 goto out; 1040 goto out;
1039 1041
1040 if (gfs2_is_stuffed(ip)) { 1042 if (gfs2_is_stuffed(ip)) {
1041 ip->i_disksize = size; 1043 u64 dsize = size + sizeof(struct gfs2_inode);
1042 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1044 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1043 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1045 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1044 gfs2_dinode_out(ip, dibh->b_data); 1046 gfs2_dinode_out(ip, dibh->b_data);
1045 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size); 1047 if (dsize > dibh->b_size)
1048 dsize = dibh->b_size;
1049 gfs2_buffer_clear_tail(dibh, dsize);
1046 error = 1; 1050 error = 1;
1047
1048 } else { 1051 } else {
1049 if (size & (u64)(sdp->sd_sb.sb_bsize - 1)) 1052 if (size & (u64)(sdp->sd_sb.sb_bsize - 1))
1050 error = gfs2_block_truncate_page(ip->i_inode.i_mapping); 1053 error = gfs2_block_truncate_page(ip->i_inode.i_mapping);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 25fddc100f18..8295c5b5d4a9 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1475,7 +1475,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
1475 inode = gfs2_inode_lookup(dir->i_sb, 1475 inode = gfs2_inode_lookup(dir->i_sb,
1476 be16_to_cpu(dent->de_type), 1476 be16_to_cpu(dent->de_type),
1477 be64_to_cpu(dent->de_inum.no_addr), 1477 be64_to_cpu(dent->de_inum.no_addr),
1478 be64_to_cpu(dent->de_inum.no_formal_ino), 0); 1478 be64_to_cpu(dent->de_inum.no_formal_ino));
1479 brelse(bh); 1479 brelse(bh);
1480 return inode; 1480 return inode;
1481 } 1481 }
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index c22c21174833..dfe237a3f8ad 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -168,7 +168,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
168 if (error) 168 if (error)
169 goto fail; 169 goto fail;
170 170
171 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, inum->no_addr, 0, 0); 171 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, inum->no_addr, 0);
172 if (IS_ERR(inode)) { 172 if (IS_ERR(inode)) {
173 error = PTR_ERR(inode); 173 error = PTR_ERR(inode);
174 goto fail; 174 goto fail;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 454d4b4eb36b..ddcdbf493536 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -855,6 +855,9 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *
855 gh->gh_flags = flags; 855 gh->gh_flags = flags;
856 gh->gh_iflags = 0; 856 gh->gh_iflags = 0;
857 gh->gh_ip = (unsigned long)__builtin_return_address(0); 857 gh->gh_ip = (unsigned long)__builtin_return_address(0);
858 if (gh->gh_owner_pid)
859 put_pid(gh->gh_owner_pid);
860 gh->gh_owner_pid = get_pid(task_pid(current));
858} 861}
859 862
860/** 863/**
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 3aac46f6853e..b5d7363b22da 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -439,9 +439,6 @@ struct gfs2_args {
439struct gfs2_tune { 439struct gfs2_tune {
440 spinlock_t gt_spin; 440 spinlock_t gt_spin;
441 441
442 unsigned int gt_incore_log_blocks;
443 unsigned int gt_log_flush_secs;
444
445 unsigned int gt_logd_secs; 442 unsigned int gt_logd_secs;
446 443
447 unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */ 444 unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */
@@ -462,6 +459,7 @@ enum {
462 SDF_SHUTDOWN = 2, 459 SDF_SHUTDOWN = 2,
463 SDF_NOBARRIERS = 3, 460 SDF_NOBARRIERS = 3,
464 SDF_NORECOVERY = 4, 461 SDF_NORECOVERY = 4,
462 SDF_DEMOTE = 5,
465}; 463};
466 464
467#define GFS2_FSNAME_LEN 256 465#define GFS2_FSNAME_LEN 256
@@ -618,6 +616,7 @@ struct gfs2_sbd {
618 unsigned int sd_log_commited_databuf; 616 unsigned int sd_log_commited_databuf;
619 int sd_log_commited_revoke; 617 int sd_log_commited_revoke;
620 618
619 atomic_t sd_log_pinned;
621 unsigned int sd_log_num_buf; 620 unsigned int sd_log_num_buf;
622 unsigned int sd_log_num_revoke; 621 unsigned int sd_log_num_revoke;
623 unsigned int sd_log_num_rg; 622 unsigned int sd_log_num_rg;
@@ -629,15 +628,17 @@ struct gfs2_sbd {
629 struct list_head sd_log_le_databuf; 628 struct list_head sd_log_le_databuf;
630 struct list_head sd_log_le_ordered; 629 struct list_head sd_log_le_ordered;
631 630
631 atomic_t sd_log_thresh1;
632 atomic_t sd_log_thresh2;
632 atomic_t sd_log_blks_free; 633 atomic_t sd_log_blks_free;
633 struct mutex sd_log_reserve_mutex; 634 wait_queue_head_t sd_log_waitq;
635 wait_queue_head_t sd_logd_waitq;
634 636
635 u64 sd_log_sequence; 637 u64 sd_log_sequence;
636 unsigned int sd_log_head; 638 unsigned int sd_log_head;
637 unsigned int sd_log_tail; 639 unsigned int sd_log_tail;
638 int sd_log_idle; 640 int sd_log_idle;
639 641
640 unsigned long sd_log_flush_time;
641 struct rw_semaphore sd_log_flush_lock; 642 struct rw_semaphore sd_log_flush_lock;
642 atomic_t sd_log_in_flight; 643 atomic_t sd_log_in_flight;
643 wait_queue_head_t sd_log_flush_wait; 644 wait_queue_head_t sd_log_flush_wait;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index b1bf2694fb2b..51d8061fa07a 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -158,7 +158,6 @@ void gfs2_set_iop(struct inode *inode)
158 * @sb: The super block 158 * @sb: The super block
159 * @no_addr: The inode number 159 * @no_addr: The inode number
160 * @type: The type of the inode 160 * @type: The type of the inode
161 * @skip_freeing: set this not return an inode if it is currently being freed.
162 * 161 *
163 * Returns: A VFS inode, or an error 162 * Returns: A VFS inode, or an error
164 */ 163 */
@@ -166,17 +165,14 @@ void gfs2_set_iop(struct inode *inode)
166struct inode *gfs2_inode_lookup(struct super_block *sb, 165struct inode *gfs2_inode_lookup(struct super_block *sb,
167 unsigned int type, 166 unsigned int type,
168 u64 no_addr, 167 u64 no_addr,
169 u64 no_formal_ino, int skip_freeing) 168 u64 no_formal_ino)
170{ 169{
171 struct inode *inode; 170 struct inode *inode;
172 struct gfs2_inode *ip; 171 struct gfs2_inode *ip;
173 struct gfs2_glock *io_gl; 172 struct gfs2_glock *io_gl;
174 int error; 173 int error;
175 174
176 if (skip_freeing) 175 inode = gfs2_iget(sb, no_addr);
177 inode = gfs2_iget_skip(sb, no_addr);
178 else
179 inode = gfs2_iget(sb, no_addr);
180 ip = GFS2_I(inode); 176 ip = GFS2_I(inode);
181 177
182 if (!inode) 178 if (!inode)
@@ -234,13 +230,100 @@ fail_glock:
234fail_iopen: 230fail_iopen:
235 gfs2_glock_put(io_gl); 231 gfs2_glock_put(io_gl);
236fail_put: 232fail_put:
237 ip->i_gl->gl_object = NULL; 233 if (inode->i_state & I_NEW)
234 ip->i_gl->gl_object = NULL;
238 gfs2_glock_put(ip->i_gl); 235 gfs2_glock_put(ip->i_gl);
239fail: 236fail:
240 iget_failed(inode); 237 if (inode->i_state & I_NEW)
238 iget_failed(inode);
239 else
240 iput(inode);
241 return ERR_PTR(error); 241 return ERR_PTR(error);
242} 242}
243 243
244/**
245 * gfs2_unlinked_inode_lookup - Lookup an unlinked inode for reclamation
246 * @sb: The super block
247 * no_addr: The inode number
248 * @@inode: A pointer to the inode found, if any
249 *
250 * Returns: 0 and *inode if no errors occurred. If an error occurs,
251 * the resulting *inode may or may not be NULL.
252 */
253
254int gfs2_unlinked_inode_lookup(struct super_block *sb, u64 no_addr,
255 struct inode **inode)
256{
257 struct gfs2_sbd *sdp;
258 struct gfs2_inode *ip;
259 struct gfs2_glock *io_gl;
260 int error;
261 struct gfs2_holder gh;
262
263 *inode = gfs2_iget_skip(sb, no_addr);
264
265 if (!(*inode))
266 return -ENOBUFS;
267
268 if (!((*inode)->i_state & I_NEW))
269 return -ENOBUFS;
270
271 ip = GFS2_I(*inode);
272 sdp = GFS2_SB(*inode);
273 ip->i_no_formal_ino = -1;
274
275 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
276 if (unlikely(error))
277 goto fail;
278 ip->i_gl->gl_object = ip;
279
280 error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
281 if (unlikely(error))
282 goto fail_put;
283
284 set_bit(GIF_INVALID, &ip->i_flags);
285 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, LM_FLAG_TRY | GL_EXACT,
286 &ip->i_iopen_gh);
287 if (unlikely(error)) {
288 if (error == GLR_TRYFAILED)
289 error = 0;
290 goto fail_iopen;
291 }
292 ip->i_iopen_gh.gh_gl->gl_object = ip;
293 gfs2_glock_put(io_gl);
294
295 (*inode)->i_mode = DT2IF(DT_UNKNOWN);
296
297 /*
298 * We must read the inode in order to work out its type in
299 * this case. Note that this doesn't happen often as we normally
300 * know the type beforehand. This code path only occurs during
301 * unlinked inode recovery (where it is safe to do this glock,
302 * which is not true in the general case).
303 */
304 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY,
305 &gh);
306 if (unlikely(error)) {
307 if (error == GLR_TRYFAILED)
308 error = 0;
309 goto fail_glock;
310 }
311 /* Inode is now uptodate */
312 gfs2_glock_dq_uninit(&gh);
313 gfs2_set_iop(*inode);
314
315 return 0;
316fail_glock:
317 gfs2_glock_dq(&ip->i_iopen_gh);
318fail_iopen:
319 gfs2_glock_put(io_gl);
320fail_put:
321 ip->i_gl->gl_object = NULL;
322 gfs2_glock_put(ip->i_gl);
323fail:
324 return error;
325}
326
244static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) 327static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
245{ 328{
246 const struct gfs2_dinode *str = buf; 329 const struct gfs2_dinode *str = buf;
@@ -862,7 +945,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
862 goto fail_gunlock2; 945 goto fail_gunlock2;
863 946
864 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr, 947 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr,
865 inum.no_formal_ino, 0); 948 inum.no_formal_ino);
866 if (IS_ERR(inode)) 949 if (IS_ERR(inode))
867 goto fail_gunlock2; 950 goto fail_gunlock2;
868 951
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index c341aaf67adb..e161461d4c57 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -83,8 +83,9 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip,
83 83
84extern void gfs2_set_iop(struct inode *inode); 84extern void gfs2_set_iop(struct inode *inode);
85extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 85extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
86 u64 no_addr, u64 no_formal_ino, 86 u64 no_addr, u64 no_formal_ino);
87 int skip_freeing); 87extern int gfs2_unlinked_inode_lookup(struct super_block *sb, u64 no_addr,
88 struct inode **inode);
88extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); 89extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
89 90
90extern int gfs2_inode_refresh(struct gfs2_inode *ip); 91extern int gfs2_inode_refresh(struct gfs2_inode *ip);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index e5bf4b59d46e..b593f0e28f25 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -168,12 +168,11 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
168 return list_empty(&ai->ai_ail1_list); 168 return list_empty(&ai->ai_ail1_list);
169} 169}
170 170
171static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags) 171static void gfs2_ail1_start(struct gfs2_sbd *sdp)
172{ 172{
173 struct list_head *head; 173 struct list_head *head;
174 u64 sync_gen; 174 u64 sync_gen;
175 struct list_head *first; 175 struct gfs2_ail *ai;
176 struct gfs2_ail *first_ai, *ai, *tmp;
177 int done = 0; 176 int done = 0;
178 177
179 gfs2_log_lock(sdp); 178 gfs2_log_lock(sdp);
@@ -184,21 +183,9 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
184 } 183 }
185 sync_gen = sdp->sd_ail_sync_gen++; 184 sync_gen = sdp->sd_ail_sync_gen++;
186 185
187 first = head->prev;
188 first_ai = list_entry(first, struct gfs2_ail, ai_list);
189 first_ai->ai_sync_gen = sync_gen;
190 gfs2_ail1_start_one(sdp, first_ai); /* This may drop log lock */
191
192 if (flags & DIO_ALL)
193 first = NULL;
194
195 while(!done) { 186 while(!done) {
196 if (first && (head->prev != first ||
197 gfs2_ail1_empty_one(sdp, first_ai, 0)))
198 break;
199
200 done = 1; 187 done = 1;
201 list_for_each_entry_safe_reverse(ai, tmp, head, ai_list) { 188 list_for_each_entry_reverse(ai, head, ai_list) {
202 if (ai->ai_sync_gen >= sync_gen) 189 if (ai->ai_sync_gen >= sync_gen)
203 continue; 190 continue;
204 ai->ai_sync_gen = sync_gen; 191 ai->ai_sync_gen = sync_gen;
@@ -290,58 +277,57 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
290 * flush time, so we ensure that we have just enough free blocks at all 277 * flush time, so we ensure that we have just enough free blocks at all
291 * times to avoid running out during a log flush. 278 * times to avoid running out during a log flush.
292 * 279 *
280 * We no longer flush the log here, instead we wake up logd to do that
281 * for us. To avoid the thundering herd and to ensure that we deal fairly
282 * with queued waiters, we use an exclusive wait. This means that when we
283 * get woken with enough journal space to get our reservation, we need to
284 * wake the next waiter on the list.
285 *
293 * Returns: errno 286 * Returns: errno
294 */ 287 */
295 288
296int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) 289int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
297{ 290{
298 unsigned int try = 0;
299 unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize); 291 unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize);
292 unsigned wanted = blks + reserved_blks;
293 DEFINE_WAIT(wait);
294 int did_wait = 0;
295 unsigned int free_blocks;
300 296
301 if (gfs2_assert_warn(sdp, blks) || 297 if (gfs2_assert_warn(sdp, blks) ||
302 gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks)) 298 gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
303 return -EINVAL; 299 return -EINVAL;
304 300retry:
305 mutex_lock(&sdp->sd_log_reserve_mutex); 301 free_blocks = atomic_read(&sdp->sd_log_blks_free);
306 gfs2_log_lock(sdp); 302 if (unlikely(free_blocks <= wanted)) {
307 while(atomic_read(&sdp->sd_log_blks_free) <= (blks + reserved_blks)) { 303 do {
308 gfs2_log_unlock(sdp); 304 prepare_to_wait_exclusive(&sdp->sd_log_waitq, &wait,
309 gfs2_ail1_empty(sdp, 0); 305 TASK_UNINTERRUPTIBLE);
310 gfs2_log_flush(sdp, NULL); 306 wake_up(&sdp->sd_logd_waitq);
311 307 did_wait = 1;
312 if (try++) 308 if (atomic_read(&sdp->sd_log_blks_free) <= wanted)
313 gfs2_ail1_start(sdp, 0); 309 io_schedule();
314 gfs2_log_lock(sdp); 310 free_blocks = atomic_read(&sdp->sd_log_blks_free);
311 } while(free_blocks <= wanted);
312 finish_wait(&sdp->sd_log_waitq, &wait);
315 } 313 }
316 atomic_sub(blks, &sdp->sd_log_blks_free); 314 if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks,
315 free_blocks - blks) != free_blocks)
316 goto retry;
317 trace_gfs2_log_blocks(sdp, -blks); 317 trace_gfs2_log_blocks(sdp, -blks);
318 gfs2_log_unlock(sdp); 318
319 mutex_unlock(&sdp->sd_log_reserve_mutex); 319 /*
320 * If we waited, then so might others, wake them up _after_ we get
321 * our share of the log.
322 */
323 if (unlikely(did_wait))
324 wake_up(&sdp->sd_log_waitq);
320 325
321 down_read(&sdp->sd_log_flush_lock); 326 down_read(&sdp->sd_log_flush_lock);
322 327
323 return 0; 328 return 0;
324} 329}
325 330
326/**
327 * gfs2_log_release - Release a given number of log blocks
328 * @sdp: The GFS2 superblock
329 * @blks: The number of blocks
330 *
331 */
332
333void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
334{
335
336 gfs2_log_lock(sdp);
337 atomic_add(blks, &sdp->sd_log_blks_free);
338 trace_gfs2_log_blocks(sdp, blks);
339 gfs2_assert_withdraw(sdp,
340 atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks);
341 gfs2_log_unlock(sdp);
342 up_read(&sdp->sd_log_flush_lock);
343}
344
345static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) 331static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
346{ 332{
347 struct gfs2_journal_extent *je; 333 struct gfs2_journal_extent *je;
@@ -559,11 +545,10 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
559 545
560 ail2_empty(sdp, new_tail); 546 ail2_empty(sdp, new_tail);
561 547
562 gfs2_log_lock(sdp);
563 atomic_add(dist, &sdp->sd_log_blks_free); 548 atomic_add(dist, &sdp->sd_log_blks_free);
564 trace_gfs2_log_blocks(sdp, dist); 549 trace_gfs2_log_blocks(sdp, dist);
565 gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); 550 gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
566 gfs2_log_unlock(sdp); 551 sdp->sd_jdesc->jd_blocks);
567 552
568 sdp->sd_log_tail = new_tail; 553 sdp->sd_log_tail = new_tail;
569} 554}
@@ -615,6 +600,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
615 if (buffer_eopnotsupp(bh)) { 600 if (buffer_eopnotsupp(bh)) {
616 clear_buffer_eopnotsupp(bh); 601 clear_buffer_eopnotsupp(bh);
617 set_buffer_uptodate(bh); 602 set_buffer_uptodate(bh);
603 fs_info(sdp, "barrier sync failed - disabling barriers\n");
618 set_bit(SDF_NOBARRIERS, &sdp->sd_flags); 604 set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
619 lock_buffer(bh); 605 lock_buffer(bh);
620skip_barrier: 606skip_barrier:
@@ -822,6 +808,13 @@ static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
822 * @sdp: the filesystem 808 * @sdp: the filesystem
823 * @tr: the transaction 809 * @tr: the transaction
824 * 810 *
811 * We wake up gfs2_logd if the number of pinned blocks exceed thresh1
812 * or the total number of used blocks (pinned blocks plus AIL blocks)
813 * is greater than thresh2.
814 *
815 * At mount time thresh1 is 1/3rd of journal size, thresh2 is 2/3rd of
816 * journal size.
817 *
825 * Returns: errno 818 * Returns: errno
826 */ 819 */
827 820
@@ -832,10 +825,10 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
832 825
833 up_read(&sdp->sd_log_flush_lock); 826 up_read(&sdp->sd_log_flush_lock);
834 827
835 gfs2_log_lock(sdp); 828 if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) ||
836 if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks)) 829 ((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) >
837 wake_up_process(sdp->sd_logd_process); 830 atomic_read(&sdp->sd_log_thresh2)))
838 gfs2_log_unlock(sdp); 831 wake_up(&sdp->sd_logd_waitq);
839} 832}
840 833
841/** 834/**
@@ -882,13 +875,23 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
882{ 875{
883 gfs2_log_flush(sdp, NULL); 876 gfs2_log_flush(sdp, NULL);
884 for (;;) { 877 for (;;) {
885 gfs2_ail1_start(sdp, DIO_ALL); 878 gfs2_ail1_start(sdp);
886 if (gfs2_ail1_empty(sdp, DIO_ALL)) 879 if (gfs2_ail1_empty(sdp, DIO_ALL))
887 break; 880 break;
888 msleep(10); 881 msleep(10);
889 } 882 }
890} 883}
891 884
885static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
886{
887 return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1));
888}
889
890static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
891{
892 unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
893 return used_blocks >= atomic_read(&sdp->sd_log_thresh2);
894}
892 895
893/** 896/**
894 * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks 897 * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
@@ -901,28 +904,43 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
901int gfs2_logd(void *data) 904int gfs2_logd(void *data)
902{ 905{
903 struct gfs2_sbd *sdp = data; 906 struct gfs2_sbd *sdp = data;
904 unsigned long t; 907 unsigned long t = 1;
905 int need_flush; 908 DEFINE_WAIT(wait);
909 unsigned preflush;
906 910
907 while (!kthread_should_stop()) { 911 while (!kthread_should_stop()) {
908 /* Advance the log tail */
909 912
910 t = sdp->sd_log_flush_time + 913 preflush = atomic_read(&sdp->sd_log_pinned);
911 gfs2_tune_get(sdp, gt_log_flush_secs) * HZ; 914 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
915 gfs2_ail1_empty(sdp, DIO_ALL);
916 gfs2_log_flush(sdp, NULL);
917 gfs2_ail1_empty(sdp, DIO_ALL);
918 }
912 919
913 gfs2_ail1_empty(sdp, DIO_ALL); 920 if (gfs2_ail_flush_reqd(sdp)) {
914 gfs2_log_lock(sdp); 921 gfs2_ail1_start(sdp);
915 need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks); 922 io_schedule();
916 gfs2_log_unlock(sdp); 923 gfs2_ail1_empty(sdp, 0);
917 if (need_flush || time_after_eq(jiffies, t)) {
918 gfs2_log_flush(sdp, NULL); 924 gfs2_log_flush(sdp, NULL);
919 sdp->sd_log_flush_time = jiffies; 925 gfs2_ail1_empty(sdp, DIO_ALL);
920 } 926 }
921 927
928 wake_up(&sdp->sd_log_waitq);
922 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; 929 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
923 if (freezing(current)) 930 if (freezing(current))
924 refrigerator(); 931 refrigerator();
925 schedule_timeout_interruptible(t); 932
933 do {
934 prepare_to_wait(&sdp->sd_logd_waitq, &wait,
935 TASK_UNINTERRUPTIBLE);
936 if (!gfs2_ail_flush_reqd(sdp) &&
937 !gfs2_jrnl_flush_reqd(sdp) &&
938 !kthread_should_stop())
939 t = schedule_timeout(t);
940 } while(t && !gfs2_ail_flush_reqd(sdp) &&
941 !gfs2_jrnl_flush_reqd(sdp) &&
942 !kthread_should_stop());
943 finish_wait(&sdp->sd_logd_waitq, &wait);
926 } 944 }
927 945
928 return 0; 946 return 0;
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 7c64510ccfd2..eb570b4ad443 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -51,7 +51,6 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
51 unsigned int ssize); 51 unsigned int ssize);
52 52
53int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); 53int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
54void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
55void gfs2_log_incr_head(struct gfs2_sbd *sdp); 54void gfs2_log_incr_head(struct gfs2_sbd *sdp);
56 55
57struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp); 56struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index adc260fbea90..bf33f822058d 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -54,6 +54,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
54 if (bd->bd_ail) 54 if (bd->bd_ail)
55 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); 55 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
56 get_bh(bh); 56 get_bh(bh);
57 atomic_inc(&sdp->sd_log_pinned);
57 trace_gfs2_pin(bd, 1); 58 trace_gfs2_pin(bd, 1);
58} 59}
59 60
@@ -94,6 +95,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
94 trace_gfs2_pin(bd, 0); 95 trace_gfs2_pin(bd, 0);
95 gfs2_log_unlock(sdp); 96 gfs2_log_unlock(sdp);
96 unlock_buffer(bh); 97 unlock_buffer(bh);
98 atomic_dec(&sdp->sd_log_pinned);
97} 99}
98 100
99 101
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index a88fadc704bb..fb2a5f93b7c3 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -94,7 +94,7 @@ static int __init init_gfs2_fs(void)
94 if (!gfs2_glock_cachep) 94 if (!gfs2_glock_cachep)
95 goto fail; 95 goto fail;
96 96
97 gfs2_glock_aspace_cachep = kmem_cache_create("gfs2_glock (aspace)", 97 gfs2_glock_aspace_cachep = kmem_cache_create("gfs2_glock(aspace)",
98 sizeof(struct gfs2_glock) + 98 sizeof(struct gfs2_glock) +
99 sizeof(struct address_space), 99 sizeof(struct address_space),
100 0, 0, gfs2_init_gl_aspace_once); 100 0, 0, gfs2_init_gl_aspace_once);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 0bb12c80937a..18176d0b75d7 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -34,7 +34,6 @@
34 34
35static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc) 35static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc)
36{ 36{
37 int err;
38 struct buffer_head *bh, *head; 37 struct buffer_head *bh, *head;
39 int nr_underway = 0; 38 int nr_underway = 0;
40 int write_op = (1 << BIO_RW_META) | ((wbc->sync_mode == WB_SYNC_ALL ? 39 int write_op = (1 << BIO_RW_META) | ((wbc->sync_mode == WB_SYNC_ALL ?
@@ -86,11 +85,10 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
86 } while (bh != head); 85 } while (bh != head);
87 unlock_page(page); 86 unlock_page(page);
88 87
89 err = 0;
90 if (nr_underway == 0) 88 if (nr_underway == 0)
91 end_page_writeback(page); 89 end_page_writeback(page);
92 90
93 return err; 91 return 0;
94} 92}
95 93
96const struct address_space_operations gfs2_meta_aops = { 94const struct address_space_operations gfs2_meta_aops = {
@@ -313,6 +311,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
313 struct gfs2_bufdata *bd = bh->b_private; 311 struct gfs2_bufdata *bd = bh->b_private;
314 312
315 if (test_clear_buffer_pinned(bh)) { 313 if (test_clear_buffer_pinned(bh)) {
314 atomic_dec(&sdp->sd_log_pinned);
316 list_del_init(&bd->bd_le.le_list); 315 list_del_init(&bd->bd_le.le_list);
317 if (meta) { 316 if (meta) {
318 gfs2_assert_warn(sdp, sdp->sd_log_num_buf); 317 gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index c1309ed1c496..3593b3a7290e 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -57,8 +57,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
57{ 57{
58 spin_lock_init(&gt->gt_spin); 58 spin_lock_init(&gt->gt_spin);
59 59
60 gt->gt_incore_log_blocks = 1024;
61 gt->gt_logd_secs = 1;
62 gt->gt_quota_simul_sync = 64; 60 gt->gt_quota_simul_sync = 64;
63 gt->gt_quota_warn_period = 10; 61 gt->gt_quota_warn_period = 10;
64 gt->gt_quota_scale_num = 1; 62 gt->gt_quota_scale_num = 1;
@@ -101,14 +99,15 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
101 spin_lock_init(&sdp->sd_trunc_lock); 99 spin_lock_init(&sdp->sd_trunc_lock);
102 100
103 spin_lock_init(&sdp->sd_log_lock); 101 spin_lock_init(&sdp->sd_log_lock);
104 102 atomic_set(&sdp->sd_log_pinned, 0);
105 INIT_LIST_HEAD(&sdp->sd_log_le_buf); 103 INIT_LIST_HEAD(&sdp->sd_log_le_buf);
106 INIT_LIST_HEAD(&sdp->sd_log_le_revoke); 104 INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
107 INIT_LIST_HEAD(&sdp->sd_log_le_rg); 105 INIT_LIST_HEAD(&sdp->sd_log_le_rg);
108 INIT_LIST_HEAD(&sdp->sd_log_le_databuf); 106 INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
109 INIT_LIST_HEAD(&sdp->sd_log_le_ordered); 107 INIT_LIST_HEAD(&sdp->sd_log_le_ordered);
110 108
111 mutex_init(&sdp->sd_log_reserve_mutex); 109 init_waitqueue_head(&sdp->sd_log_waitq);
110 init_waitqueue_head(&sdp->sd_logd_waitq);
112 INIT_LIST_HEAD(&sdp->sd_ail1_list); 111 INIT_LIST_HEAD(&sdp->sd_ail1_list);
113 INIT_LIST_HEAD(&sdp->sd_ail2_list); 112 INIT_LIST_HEAD(&sdp->sd_ail2_list);
114 113
@@ -487,7 +486,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
487 struct dentry *dentry; 486 struct dentry *dentry;
488 struct inode *inode; 487 struct inode *inode;
489 488
490 inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); 489 inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
491 if (IS_ERR(inode)) { 490 if (IS_ERR(inode)) {
492 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); 491 fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
493 return PTR_ERR(inode); 492 return PTR_ERR(inode);
@@ -733,6 +732,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
733 if (sdp->sd_args.ar_spectator) { 732 if (sdp->sd_args.ar_spectator) {
734 sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0); 733 sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0);
735 atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks); 734 atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks);
735 atomic_set(&sdp->sd_log_thresh1, 2*sdp->sd_jdesc->jd_blocks/5);
736 atomic_set(&sdp->sd_log_thresh2, 4*sdp->sd_jdesc->jd_blocks/5);
736 } else { 737 } else {
737 if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) { 738 if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) {
738 fs_err(sdp, "can't mount journal #%u\n", 739 fs_err(sdp, "can't mount journal #%u\n",
@@ -770,6 +771,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
770 goto fail_jinode_gh; 771 goto fail_jinode_gh;
771 } 772 }
772 atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks); 773 atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks);
774 atomic_set(&sdp->sd_log_thresh1, 2*sdp->sd_jdesc->jd_blocks/5);
775 atomic_set(&sdp->sd_log_thresh2, 4*sdp->sd_jdesc->jd_blocks/5);
773 776
774 /* Map the extents for this journal's blocks */ 777 /* Map the extents for this journal's blocks */
775 map_journal_extents(sdp); 778 map_journal_extents(sdp);
@@ -951,8 +954,6 @@ static int init_threads(struct gfs2_sbd *sdp, int undo)
951 if (undo) 954 if (undo)
952 goto fail_quotad; 955 goto fail_quotad;
953 956
954 sdp->sd_log_flush_time = jiffies;
955
956 p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); 957 p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
957 error = IS_ERR(p); 958 error = IS_ERR(p);
958 if (error) { 959 if (error) {
@@ -1160,7 +1161,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
1160 GFS2_BASIC_BLOCK_SHIFT; 1161 GFS2_BASIC_BLOCK_SHIFT;
1161 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; 1162 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
1162 1163
1163 sdp->sd_tune.gt_log_flush_secs = sdp->sd_args.ar_commit; 1164 sdp->sd_tune.gt_logd_secs = sdp->sd_args.ar_commit;
1164 sdp->sd_tune.gt_quota_quantum = sdp->sd_args.ar_quota_quantum; 1165 sdp->sd_tune.gt_quota_quantum = sdp->sd_args.ar_quota_quantum;
1165 if (sdp->sd_args.ar_statfs_quantum) { 1166 if (sdp->sd_args.ar_statfs_quantum) {
1166 sdp->sd_tune.gt_statfs_slow = 0; 1167 sdp->sd_tune.gt_statfs_slow = 0;
@@ -1323,7 +1324,7 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
1323 memset(&args, 0, sizeof(args)); 1324 memset(&args, 0, sizeof(args));
1324 args.ar_quota = GFS2_QUOTA_DEFAULT; 1325 args.ar_quota = GFS2_QUOTA_DEFAULT;
1325 args.ar_data = GFS2_DATA_DEFAULT; 1326 args.ar_data = GFS2_DATA_DEFAULT;
1326 args.ar_commit = 60; 1327 args.ar_commit = 30;
1327 args.ar_statfs_quantum = 30; 1328 args.ar_statfs_quantum = 30;
1328 args.ar_quota_quantum = 60; 1329 args.ar_quota_quantum = 60;
1329 args.ar_errors = GFS2_ERRORS_DEFAULT; 1330 args.ar_errors = GFS2_ERRORS_DEFAULT;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 6dbcbad6ab17..49667d68769e 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -637,15 +637,40 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
637 unsigned blocksize, iblock, pos; 637 unsigned blocksize, iblock, pos;
638 struct buffer_head *bh, *dibh; 638 struct buffer_head *bh, *dibh;
639 struct page *page; 639 struct page *page;
640 void *kaddr; 640 void *kaddr, *ptr;
641 struct gfs2_quota *qp; 641 struct gfs2_quota q, *qp;
642 s64 value; 642 int err, nbytes;
643 int err = -EIO;
644 u64 size; 643 u64 size;
645 644
646 if (gfs2_is_stuffed(ip)) 645 if (gfs2_is_stuffed(ip))
647 gfs2_unstuff_dinode(ip, NULL); 646 gfs2_unstuff_dinode(ip, NULL);
648 647
648 memset(&q, 0, sizeof(struct gfs2_quota));
649 err = gfs2_internal_read(ip, NULL, (char *)&q, &loc, sizeof(q));
650 if (err < 0)
651 return err;
652
653 err = -EIO;
654 qp = &q;
655 qp->qu_value = be64_to_cpu(qp->qu_value);
656 qp->qu_value += change;
657 qp->qu_value = cpu_to_be64(qp->qu_value);
658 qd->qd_qb.qb_value = qp->qu_value;
659 if (fdq) {
660 if (fdq->d_fieldmask & FS_DQ_BSOFT) {
661 qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit);
662 qd->qd_qb.qb_warn = qp->qu_warn;
663 }
664 if (fdq->d_fieldmask & FS_DQ_BHARD) {
665 qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit);
666 qd->qd_qb.qb_limit = qp->qu_limit;
667 }
668 }
669
670 /* Write the quota into the quota file on disk */
671 ptr = qp;
672 nbytes = sizeof(struct gfs2_quota);
673get_a_page:
649 page = grab_cache_page(mapping, index); 674 page = grab_cache_page(mapping, index);
650 if (!page) 675 if (!page)
651 return -ENOMEM; 676 return -ENOMEM;
@@ -667,7 +692,12 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
667 if (!buffer_mapped(bh)) { 692 if (!buffer_mapped(bh)) {
668 gfs2_block_map(inode, iblock, bh, 1); 693 gfs2_block_map(inode, iblock, bh, 1);
669 if (!buffer_mapped(bh)) 694 if (!buffer_mapped(bh))
670 goto unlock; 695 goto unlock_out;
696 /* If it's a newly allocated disk block for quota, zero it */
697 if (buffer_new(bh)) {
698 memset(bh->b_data, 0, bh->b_size);
699 set_buffer_uptodate(bh);
700 }
671 } 701 }
672 702
673 if (PageUptodate(page)) 703 if (PageUptodate(page))
@@ -677,32 +707,34 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
677 ll_rw_block(READ_META, 1, &bh); 707 ll_rw_block(READ_META, 1, &bh);
678 wait_on_buffer(bh); 708 wait_on_buffer(bh);
679 if (!buffer_uptodate(bh)) 709 if (!buffer_uptodate(bh))
680 goto unlock; 710 goto unlock_out;
681 } 711 }
682 712
683 gfs2_trans_add_bh(ip->i_gl, bh, 0); 713 gfs2_trans_add_bh(ip->i_gl, bh, 0);
684 714
685 kaddr = kmap_atomic(page, KM_USER0); 715 kaddr = kmap_atomic(page, KM_USER0);
686 qp = kaddr + offset; 716 if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE)
687 value = (s64)be64_to_cpu(qp->qu_value) + change; 717 nbytes = PAGE_CACHE_SIZE - offset;
688 qp->qu_value = cpu_to_be64(value); 718 memcpy(kaddr + offset, ptr, nbytes);
689 qd->qd_qb.qb_value = qp->qu_value;
690 if (fdq) {
691 if (fdq->d_fieldmask & FS_DQ_BSOFT) {
692 qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit);
693 qd->qd_qb.qb_warn = qp->qu_warn;
694 }
695 if (fdq->d_fieldmask & FS_DQ_BHARD) {
696 qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit);
697 qd->qd_qb.qb_limit = qp->qu_limit;
698 }
699 }
700 flush_dcache_page(page); 719 flush_dcache_page(page);
701 kunmap_atomic(kaddr, KM_USER0); 720 kunmap_atomic(kaddr, KM_USER0);
721 unlock_page(page);
722 page_cache_release(page);
702 723
724 /* If quota straddles page boundary, we need to update the rest of the
725 * quota at the beginning of the next page */
726 if (offset != 0) { /* first page, offset is closer to PAGE_CACHE_SIZE */
727 ptr = ptr + nbytes;
728 nbytes = sizeof(struct gfs2_quota) - nbytes;
729 offset = 0;
730 index++;
731 goto get_a_page;
732 }
733
734 /* Update the disk inode timestamp and size (if extended) */
703 err = gfs2_meta_inode_buffer(ip, &dibh); 735 err = gfs2_meta_inode_buffer(ip, &dibh);
704 if (err) 736 if (err)
705 goto unlock; 737 goto out;
706 738
707 size = loc + sizeof(struct gfs2_quota); 739 size = loc + sizeof(struct gfs2_quota);
708 if (size > inode->i_size) { 740 if (size > inode->i_size) {
@@ -715,7 +747,9 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
715 brelse(dibh); 747 brelse(dibh);
716 mark_inode_dirty(inode); 748 mark_inode_dirty(inode);
717 749
718unlock: 750out:
751 return err;
752unlock_out:
719 unlock_page(page); 753 unlock_page(page);
720 page_cache_release(page); 754 page_cache_release(page);
721 return err; 755 return err;
@@ -779,8 +813,10 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
779 * rgrp since it won't be allocated during the transaction 813 * rgrp since it won't be allocated during the transaction
780 */ 814 */
781 al->al_requested = 1; 815 al->al_requested = 1;
782 /* +1 in the end for block requested above for unstuffing */ 816 /* +3 in the end for unstuffing block, inode size update block
783 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 1; 817 * and another block in case quota straddles page boundary and
818 * two blocks need to be updated instead of 1 */
819 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3;
784 820
785 if (nalloc) 821 if (nalloc)
786 al->al_requested += nalloc * (data_blocks + ind_blocks); 822 al->al_requested += nalloc * (data_blocks + ind_blocks);
@@ -1418,10 +1454,18 @@ static int gfs2_quota_get_xstate(struct super_block *sb,
1418 1454
1419 memset(fqs, 0, sizeof(struct fs_quota_stat)); 1455 memset(fqs, 0, sizeof(struct fs_quota_stat));
1420 fqs->qs_version = FS_QSTAT_VERSION; 1456 fqs->qs_version = FS_QSTAT_VERSION;
1421 if (sdp->sd_args.ar_quota == GFS2_QUOTA_ON) 1457
1422 fqs->qs_flags = (XFS_QUOTA_UDQ_ENFD | XFS_QUOTA_GDQ_ENFD); 1458 switch (sdp->sd_args.ar_quota) {
1423 else if (sdp->sd_args.ar_quota == GFS2_QUOTA_ACCOUNT) 1459 case GFS2_QUOTA_ON:
1424 fqs->qs_flags = (XFS_QUOTA_UDQ_ACCT | XFS_QUOTA_GDQ_ACCT); 1460 fqs->qs_flags |= (XFS_QUOTA_UDQ_ENFD | XFS_QUOTA_GDQ_ENFD);
1461 /*FALLTHRU*/
1462 case GFS2_QUOTA_ACCOUNT:
1463 fqs->qs_flags |= (XFS_QUOTA_UDQ_ACCT | XFS_QUOTA_GDQ_ACCT);
1464 break;
1465 case GFS2_QUOTA_OFF:
1466 break;
1467 }
1468
1425 if (sdp->sd_quota_inode) { 1469 if (sdp->sd_quota_inode) {
1426 fqs->qs_uquota.qfs_ino = GFS2_I(sdp->sd_quota_inode)->i_no_addr; 1470 fqs->qs_uquota.qfs_ino = GFS2_I(sdp->sd_quota_inode)->i_no_addr;
1427 fqs->qs_uquota.qfs_nblks = sdp->sd_quota_inode->i_blocks; 1471 fqs->qs_uquota.qfs_nblks = sdp->sd_quota_inode->i_blocks;
@@ -1432,8 +1476,8 @@ static int gfs2_quota_get_xstate(struct super_block *sb,
1432 return 0; 1476 return 0;
1433} 1477}
1434 1478
1435static int gfs2_xquota_get(struct super_block *sb, int type, qid_t id, 1479static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id,
1436 struct fs_disk_quota *fdq) 1480 struct fs_disk_quota *fdq)
1437{ 1481{
1438 struct gfs2_sbd *sdp = sb->s_fs_info; 1482 struct gfs2_sbd *sdp = sb->s_fs_info;
1439 struct gfs2_quota_lvb *qlvb; 1483 struct gfs2_quota_lvb *qlvb;
@@ -1477,8 +1521,8 @@ out:
1477/* GFS2 only supports a subset of the XFS fields */ 1521/* GFS2 only supports a subset of the XFS fields */
1478#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD) 1522#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD)
1479 1523
1480static int gfs2_xquota_set(struct super_block *sb, int type, qid_t id, 1524static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1481 struct fs_disk_quota *fdq) 1525 struct fs_disk_quota *fdq)
1482{ 1526{
1483 struct gfs2_sbd *sdp = sb->s_fs_info; 1527 struct gfs2_sbd *sdp = sb->s_fs_info;
1484 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); 1528 struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
@@ -1585,7 +1629,7 @@ out_put:
1585const struct quotactl_ops gfs2_quotactl_ops = { 1629const struct quotactl_ops gfs2_quotactl_ops = {
1586 .quota_sync = gfs2_quota_sync, 1630 .quota_sync = gfs2_quota_sync,
1587 .get_xstate = gfs2_quota_get_xstate, 1631 .get_xstate = gfs2_quota_get_xstate,
1588 .get_xquota = gfs2_xquota_get, 1632 .get_dqblk = gfs2_get_dqblk,
1589 .set_xquota = gfs2_xquota_set, 1633 .set_dqblk = gfs2_set_dqblk,
1590}; 1634};
1591 1635
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index bf011dc63471..117fa4171f62 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -949,13 +949,13 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
949 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes 949 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
950 * @rgd: The rgrp 950 * @rgd: The rgrp
951 * 951 *
952 * Returns: The inode, if one has been found 952 * Returns: 0 if no error
953 * The inode, if one has been found, in inode.
953 */ 954 */
954 955
955static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, 956static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked,
956 u64 skip) 957 u64 skip)
957{ 958{
958 struct inode *inode;
959 u32 goal = 0, block; 959 u32 goal = 0, block;
960 u64 no_addr; 960 u64 no_addr;
961 struct gfs2_sbd *sdp = rgd->rd_sbd; 961 struct gfs2_sbd *sdp = rgd->rd_sbd;
@@ -980,14 +980,11 @@ static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked,
980 if (no_addr == skip) 980 if (no_addr == skip)
981 continue; 981 continue;
982 *last_unlinked = no_addr; 982 *last_unlinked = no_addr;
983 inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN, 983 return no_addr;
984 no_addr, -1, 1);
985 if (!IS_ERR(inode))
986 return inode;
987 } 984 }
988 985
989 rgd->rd_flags &= ~GFS2_RDF_CHECK; 986 rgd->rd_flags &= ~GFS2_RDF_CHECK;
990 return NULL; 987 return 0;
991} 988}
992 989
993/** 990/**
@@ -1068,11 +1065,12 @@ static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
1068 * Try to acquire rgrp in way which avoids contending with others. 1065 * Try to acquire rgrp in way which avoids contending with others.
1069 * 1066 *
1070 * Returns: errno 1067 * Returns: errno
1068 * unlinked: the block address of an unlinked block to be reclaimed
1071 */ 1069 */
1072 1070
1073static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) 1071static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
1072 u64 *last_unlinked)
1074{ 1073{
1075 struct inode *inode = NULL;
1076 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1074 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1077 struct gfs2_rgrpd *rgd, *begin = NULL; 1075 struct gfs2_rgrpd *rgd, *begin = NULL;
1078 struct gfs2_alloc *al = ip->i_alloc; 1076 struct gfs2_alloc *al = ip->i_alloc;
@@ -1081,6 +1079,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1081 int loops = 0; 1079 int loops = 0;
1082 int error, rg_locked; 1080 int error, rg_locked;
1083 1081
1082 *unlinked = 0;
1084 rgd = gfs2_blk2rgrpd(sdp, ip->i_goal); 1083 rgd = gfs2_blk2rgrpd(sdp, ip->i_goal);
1085 1084
1086 while (rgd) { 1085 while (rgd) {
@@ -1097,19 +1096,24 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1097 case 0: 1096 case 0:
1098 if (try_rgrp_fit(rgd, al)) 1097 if (try_rgrp_fit(rgd, al))
1099 goto out; 1098 goto out;
1100 if (rgd->rd_flags & GFS2_RDF_CHECK) 1099 /* If the rg came in already locked, there's no
1101 inode = try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); 1100 way we can recover from a failed try_rgrp_unlink
1101 because that would require an iput which can only
1102 happen after the rgrp is unlocked. */
1103 if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK)
1104 *unlinked = try_rgrp_unlink(rgd, last_unlinked,
1105 ip->i_no_addr);
1102 if (!rg_locked) 1106 if (!rg_locked)
1103 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1107 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1104 if (inode) 1108 if (*unlinked)
1105 return inode; 1109 return -EAGAIN;
1106 /* fall through */ 1110 /* fall through */
1107 case GLR_TRYFAILED: 1111 case GLR_TRYFAILED:
1108 rgd = recent_rgrp_next(rgd); 1112 rgd = recent_rgrp_next(rgd);
1109 break; 1113 break;
1110 1114
1111 default: 1115 default:
1112 return ERR_PTR(error); 1116 return error;
1113 } 1117 }
1114 } 1118 }
1115 1119
@@ -1131,12 +1135,13 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1131 case 0: 1135 case 0:
1132 if (try_rgrp_fit(rgd, al)) 1136 if (try_rgrp_fit(rgd, al))
1133 goto out; 1137 goto out;
1134 if (rgd->rd_flags & GFS2_RDF_CHECK) 1138 if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK)
1135 inode = try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); 1139 *unlinked = try_rgrp_unlink(rgd, last_unlinked,
1140 ip->i_no_addr);
1136 if (!rg_locked) 1141 if (!rg_locked)
1137 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1142 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1138 if (inode) 1143 if (*unlinked)
1139 return inode; 1144 return -EAGAIN;
1140 break; 1145 break;
1141 1146
1142 case GLR_TRYFAILED: 1147 case GLR_TRYFAILED:
@@ -1144,7 +1149,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1144 break; 1149 break;
1145 1150
1146 default: 1151 default:
1147 return ERR_PTR(error); 1152 return error;
1148 } 1153 }
1149 1154
1150 rgd = gfs2_rgrpd_get_next(rgd); 1155 rgd = gfs2_rgrpd_get_next(rgd);
@@ -1153,7 +1158,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1153 1158
1154 if (rgd == begin) { 1159 if (rgd == begin) {
1155 if (++loops >= 3) 1160 if (++loops >= 3)
1156 return ERR_PTR(-ENOSPC); 1161 return -ENOSPC;
1157 if (!skipped) 1162 if (!skipped)
1158 loops++; 1163 loops++;
1159 flags = 0; 1164 flags = 0;
@@ -1173,7 +1178,7 @@ out:
1173 forward_rgrp_set(sdp, rgd); 1178 forward_rgrp_set(sdp, rgd);
1174 } 1179 }
1175 1180
1176 return NULL; 1181 return 0;
1177} 1182}
1178 1183
1179/** 1184/**
@@ -1189,7 +1194,7 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
1189 struct gfs2_alloc *al = ip->i_alloc; 1194 struct gfs2_alloc *al = ip->i_alloc;
1190 struct inode *inode; 1195 struct inode *inode;
1191 int error = 0; 1196 int error = 0;
1192 u64 last_unlinked = NO_BLOCK; 1197 u64 last_unlinked = NO_BLOCK, unlinked;
1193 1198
1194 if (gfs2_assert_warn(sdp, al->al_requested)) 1199 if (gfs2_assert_warn(sdp, al->al_requested))
1195 return -EINVAL; 1200 return -EINVAL;
@@ -1205,14 +1210,19 @@ try_again:
1205 if (error) 1210 if (error)
1206 return error; 1211 return error;
1207 1212
1208 inode = get_local_rgrp(ip, &last_unlinked); 1213 error = get_local_rgrp(ip, &unlinked, &last_unlinked);
1209 if (inode) { 1214 if (error) {
1210 if (ip != GFS2_I(sdp->sd_rindex)) 1215 if (ip != GFS2_I(sdp->sd_rindex))
1211 gfs2_glock_dq_uninit(&al->al_ri_gh); 1216 gfs2_glock_dq_uninit(&al->al_ri_gh);
1212 if (IS_ERR(inode)) 1217 if (error != -EAGAIN)
1213 return PTR_ERR(inode); 1218 return error;
1214 iput(inode); 1219 error = gfs2_unlinked_inode_lookup(ip->i_inode.i_sb,
1220 unlinked, &inode);
1221 if (inode)
1222 iput(inode);
1215 gfs2_log_flush(sdp, NULL); 1223 gfs2_log_flush(sdp, NULL);
1224 if (error == GLR_TRYFAILED)
1225 error = 0;
1216 goto try_again; 1226 goto try_again;
1217 } 1227 }
1218 1228
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 50aac606b990..4d1aad38f1b1 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1113,7 +1113,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
1113 int error; 1113 int error;
1114 1114
1115 spin_lock(&gt->gt_spin); 1115 spin_lock(&gt->gt_spin);
1116 args.ar_commit = gt->gt_log_flush_secs; 1116 args.ar_commit = gt->gt_logd_secs;
1117 args.ar_quota_quantum = gt->gt_quota_quantum; 1117 args.ar_quota_quantum = gt->gt_quota_quantum;
1118 if (gt->gt_statfs_slow) 1118 if (gt->gt_statfs_slow)
1119 args.ar_statfs_quantum = 0; 1119 args.ar_statfs_quantum = 0;
@@ -1160,7 +1160,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
1160 else 1160 else
1161 clear_bit(SDF_NOBARRIERS, &sdp->sd_flags); 1161 clear_bit(SDF_NOBARRIERS, &sdp->sd_flags);
1162 spin_lock(&gt->gt_spin); 1162 spin_lock(&gt->gt_spin);
1163 gt->gt_log_flush_secs = args.ar_commit; 1163 gt->gt_logd_secs = args.ar_commit;
1164 gt->gt_quota_quantum = args.ar_quota_quantum; 1164 gt->gt_quota_quantum = args.ar_quota_quantum;
1165 if (args.ar_statfs_quantum) { 1165 if (args.ar_statfs_quantum) {
1166 gt->gt_statfs_slow = 0; 1166 gt->gt_statfs_slow = 0;
@@ -1305,8 +1305,8 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1305 } 1305 }
1306 if (args->ar_discard) 1306 if (args->ar_discard)
1307 seq_printf(s, ",discard"); 1307 seq_printf(s, ",discard");
1308 val = sdp->sd_tune.gt_log_flush_secs; 1308 val = sdp->sd_tune.gt_logd_secs;
1309 if (val != 60) 1309 if (val != 30)
1310 seq_printf(s, ",commit=%d", val); 1310 seq_printf(s, ",commit=%d", val);
1311 val = sdp->sd_tune.gt_statfs_quantum; 1311 val = sdp->sd_tune.gt_statfs_quantum;
1312 if (val != 30) 1312 if (val != 30)
@@ -1334,7 +1334,8 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1334 } 1334 }
1335 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) 1335 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
1336 seq_printf(s, ",nobarrier"); 1336 seq_printf(s, ",nobarrier");
1337 1337 if (test_bit(SDF_DEMOTE, &sdp->sd_flags))
1338 seq_printf(s, ",demote_interface_used");
1338 return 0; 1339 return 0;
1339} 1340}
1340 1341
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 54fd98425991..37f5393e68e6 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -232,6 +232,8 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len
232 glops = gfs2_glops_list[gltype]; 232 glops = gfs2_glops_list[gltype];
233 if (glops == NULL) 233 if (glops == NULL)
234 return -EINVAL; 234 return -EINVAL;
235 if (!test_and_set_bit(SDF_DEMOTE, &sdp->sd_flags))
236 fs_info(sdp, "demote interface used\n");
235 rv = gfs2_glock_get(sdp, glnum, glops, 0, &gl); 237 rv = gfs2_glock_get(sdp, glnum, glops, 0, &gl);
236 if (rv) 238 if (rv)
237 return rv; 239 return rv;
@@ -468,8 +470,6 @@ static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
468} \ 470} \
469TUNE_ATTR_2(name, name##_store) 471TUNE_ATTR_2(name, name##_store)
470 472
471TUNE_ATTR(incore_log_blocks, 0);
472TUNE_ATTR(log_flush_secs, 0);
473TUNE_ATTR(quota_warn_period, 0); 473TUNE_ATTR(quota_warn_period, 0);
474TUNE_ATTR(quota_quantum, 0); 474TUNE_ATTR(quota_quantum, 0);
475TUNE_ATTR(max_readahead, 0); 475TUNE_ATTR(max_readahead, 0);
@@ -481,8 +481,6 @@ TUNE_ATTR(statfs_quantum, 1);
481TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); 481TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
482 482
483static struct attribute *tune_attrs[] = { 483static struct attribute *tune_attrs[] = {
484 &tune_attr_incore_log_blocks.attr,
485 &tune_attr_log_flush_secs.attr,
486 &tune_attr_quota_warn_period.attr, 484 &tune_attr_quota_warn_period.attr,
487 &tune_attr_quota_quantum.attr, 485 &tune_attr_quota_quantum.attr,
488 &tune_attr_max_readahead.attr, 486 &tune_attr_max_readahead.attr,
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 4ef0e9fa3549..9ec73a854111 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -23,6 +23,7 @@
23#include "meta_io.h" 23#include "meta_io.h"
24#include "trans.h" 24#include "trans.h"
25#include "util.h" 25#include "util.h"
26#include "trace_gfs2.h"
26 27
27int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, 28int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
28 unsigned int revokes) 29 unsigned int revokes)
@@ -75,6 +76,23 @@ fail_holder_uninit:
75 return error; 76 return error;
76} 77}
77 78
79/**
80 * gfs2_log_release - Release a given number of log blocks
81 * @sdp: The GFS2 superblock
82 * @blks: The number of blocks
83 *
84 */
85
86static void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
87{
88
89 atomic_add(blks, &sdp->sd_log_blks_free);
90 trace_gfs2_log_blocks(sdp, blks);
91 gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
92 sdp->sd_jdesc->jd_blocks);
93 up_read(&sdp->sd_log_flush_lock);
94}
95
78void gfs2_trans_end(struct gfs2_sbd *sdp) 96void gfs2_trans_end(struct gfs2_sbd *sdp)
79{ 97{
80 struct gfs2_trans *tr = current->journal_info; 98 struct gfs2_trans *tr = current->journal_info;
diff --git a/fs/inode.c b/fs/inode.c
index 407bf392e20a..258ec22bb298 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1205,8 +1205,6 @@ void generic_delete_inode(struct inode *inode)
1205 inodes_stat.nr_inodes--; 1205 inodes_stat.nr_inodes--;
1206 spin_unlock(&inode_lock); 1206 spin_unlock(&inode_lock);
1207 1207
1208 security_inode_delete(inode);
1209
1210 if (op->delete_inode) { 1208 if (op->delete_inode) {
1211 void (*delete)(struct inode *) = op->delete_inode; 1209 void (*delete)(struct inode *) = op->delete_inode;
1212 /* Filesystems implementing their own 1210 /* Filesystems implementing their own
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index ecb44c94ba8d..28a9ddaa0c49 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -786,6 +786,12 @@ wait_for_iobuf:
786 786
787 jbd_debug(3, "JBD: commit phase 6\n"); 787 jbd_debug(3, "JBD: commit phase 6\n");
788 788
789 /* All metadata is written, now write commit record and do cleanup */
790 spin_lock(&journal->j_state_lock);
791 J_ASSERT(commit_transaction->t_state == T_COMMIT);
792 commit_transaction->t_state = T_COMMIT_RECORD;
793 spin_unlock(&journal->j_state_lock);
794
789 if (journal_write_commit_record(journal, commit_transaction)) 795 if (journal_write_commit_record(journal, commit_transaction))
790 err = -EIO; 796 err = -EIO;
791 797
@@ -923,7 +929,7 @@ restart_loop:
923 929
924 jbd_debug(3, "JBD: commit phase 8\n"); 930 jbd_debug(3, "JBD: commit phase 8\n");
925 931
926 J_ASSERT(commit_transaction->t_state == T_COMMIT); 932 J_ASSERT(commit_transaction->t_state == T_COMMIT_RECORD);
927 933
928 commit_transaction->t_state = T_FINISHED; 934 commit_transaction->t_state = T_FINISHED;
929 J_ASSERT(commit_transaction == journal->j_committing_transaction); 935 J_ASSERT(commit_transaction == journal->j_committing_transaction);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index bd224eec9b07..93d1e47647bd 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -565,6 +565,38 @@ int log_wait_commit(journal_t *journal, tid_t tid)
565} 565}
566 566
567/* 567/*
568 * Return 1 if a given transaction has not yet sent barrier request
569 * connected with a transaction commit. If 0 is returned, transaction
570 * may or may not have sent the barrier. Used to avoid sending barrier
571 * twice in common cases.
572 */
573int journal_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
574{
575 int ret = 0;
576 transaction_t *commit_trans;
577
578 if (!(journal->j_flags & JFS_BARRIER))
579 return 0;
580 spin_lock(&journal->j_state_lock);
581 /* Transaction already committed? */
582 if (tid_geq(journal->j_commit_sequence, tid))
583 goto out;
584 /*
585 * Transaction is being committed and we already proceeded to
586 * writing commit record?
587 */
588 commit_trans = journal->j_committing_transaction;
589 if (commit_trans && commit_trans->t_tid == tid &&
590 commit_trans->t_state >= T_COMMIT_RECORD)
591 goto out;
592 ret = 1;
593out:
594 spin_unlock(&journal->j_state_lock);
595 return ret;
596}
597EXPORT_SYMBOL(journal_trans_will_send_data_barrier);
598
599/*
568 * Log buffer allocation routines: 600 * Log buffer allocation routines:
569 */ 601 */
570 602
@@ -1157,6 +1189,7 @@ int journal_destroy(journal_t *journal)
1157{ 1189{
1158 int err = 0; 1190 int err = 0;
1159 1191
1192
1160 /* Wait for the commit thread to wake up and die. */ 1193 /* Wait for the commit thread to wake up and die. */
1161 journal_kill_thread(journal); 1194 journal_kill_thread(journal);
1162 1195
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index c03d4dce4d76..bc2ff5932769 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1889,7 +1889,7 @@ static struct kmem_cache *get_slab(size_t size)
1889 BUG_ON(i >= JBD2_MAX_SLABS); 1889 BUG_ON(i >= JBD2_MAX_SLABS);
1890 if (unlikely(i < 0)) 1890 if (unlikely(i < 0))
1891 i = 0; 1891 i = 0;
1892 BUG_ON(jbd2_slab[i] == 0); 1892 BUG_ON(jbd2_slab[i] == NULL);
1893 return jbd2_slab[i]; 1893 return jbd2_slab[i];
1894} 1894}
1895 1895
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 3ff50da94789..55f1dde2fa8b 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -23,10 +23,9 @@ static int jffs2_garbage_collect_thread(void *);
23 23
24void jffs2_garbage_collect_trigger(struct jffs2_sb_info *c) 24void jffs2_garbage_collect_trigger(struct jffs2_sb_info *c)
25{ 25{
26 spin_lock(&c->erase_completion_lock); 26 assert_spin_locked(&c->erase_completion_lock);
27 if (c->gc_task && jffs2_thread_should_wake(c)) 27 if (c->gc_task && jffs2_thread_should_wake(c))
28 send_sig(SIGHUP, c->gc_task, 1); 28 send_sig(SIGHUP, c->gc_task, 1);
29 spin_unlock(&c->erase_completion_lock);
30} 29}
31 30
32/* This must only ever be called when no GC thread is currently running */ 31/* This must only ever be called when no GC thread is currently running */
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index b47679be118a..6286ad9b00f7 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -103,9 +103,10 @@ static void jffs2_erase_block(struct jffs2_sb_info *c,
103 jffs2_erase_failed(c, jeb, bad_offset); 103 jffs2_erase_failed(c, jeb, bad_offset);
104} 104}
105 105
106void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count) 106int jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
107{ 107{
108 struct jffs2_eraseblock *jeb; 108 struct jffs2_eraseblock *jeb;
109 int work_done = 0;
109 110
110 mutex_lock(&c->erase_free_sem); 111 mutex_lock(&c->erase_free_sem);
111 112
@@ -121,6 +122,7 @@ void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
121 mutex_unlock(&c->erase_free_sem); 122 mutex_unlock(&c->erase_free_sem);
122 jffs2_mark_erased_block(c, jeb); 123 jffs2_mark_erased_block(c, jeb);
123 124
125 work_done++;
124 if (!--count) { 126 if (!--count) {
125 D1(printk(KERN_DEBUG "Count reached. jffs2_erase_pending_blocks leaving\n")); 127 D1(printk(KERN_DEBUG "Count reached. jffs2_erase_pending_blocks leaving\n"));
126 goto done; 128 goto done;
@@ -157,6 +159,7 @@ void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
157 mutex_unlock(&c->erase_free_sem); 159 mutex_unlock(&c->erase_free_sem);
158 done: 160 done:
159 D1(printk(KERN_DEBUG "jffs2_erase_pending_blocks completed\n")); 161 D1(printk(KERN_DEBUG "jffs2_erase_pending_blocks completed\n"));
162 return work_done;
160} 163}
161 164
162static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb) 165static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
@@ -165,10 +168,11 @@ static void jffs2_erase_succeeded(struct jffs2_sb_info *c, struct jffs2_eraseblo
165 mutex_lock(&c->erase_free_sem); 168 mutex_lock(&c->erase_free_sem);
166 spin_lock(&c->erase_completion_lock); 169 spin_lock(&c->erase_completion_lock);
167 list_move_tail(&jeb->list, &c->erase_complete_list); 170 list_move_tail(&jeb->list, &c->erase_complete_list);
171 /* Wake the GC thread to mark them clean */
172 jffs2_garbage_collect_trigger(c);
168 spin_unlock(&c->erase_completion_lock); 173 spin_unlock(&c->erase_completion_lock);
169 mutex_unlock(&c->erase_free_sem); 174 mutex_unlock(&c->erase_free_sem);
170 /* Ensure that kupdated calls us again to mark them clean */ 175 wake_up(&c->erase_wait);
171 jffs2_erase_pending_trigger(c);
172} 176}
173 177
174static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t bad_offset) 178static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t bad_offset)
@@ -487,9 +491,9 @@ filebad:
487 491
488refile: 492refile:
489 /* Stick it back on the list from whence it came and come back later */ 493 /* Stick it back on the list from whence it came and come back later */
490 jffs2_erase_pending_trigger(c);
491 mutex_lock(&c->erase_free_sem); 494 mutex_lock(&c->erase_free_sem);
492 spin_lock(&c->erase_completion_lock); 495 spin_lock(&c->erase_completion_lock);
496 jffs2_garbage_collect_trigger(c);
493 list_move(&jeb->list, &c->erase_complete_list); 497 list_move(&jeb->list, &c->erase_complete_list);
494 spin_unlock(&c->erase_completion_lock); 498 spin_unlock(&c->erase_completion_lock);
495 mutex_unlock(&c->erase_free_sem); 499 mutex_unlock(&c->erase_free_sem);
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 3451a81b2142..86e0821fc989 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -313,8 +313,8 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
313 case S_IFBLK: 313 case S_IFBLK:
314 case S_IFCHR: 314 case S_IFCHR:
315 /* Read the device numbers from the media */ 315 /* Read the device numbers from the media */
316 if (f->metadata->size != sizeof(jdev.old) && 316 if (f->metadata->size != sizeof(jdev.old_id) &&
317 f->metadata->size != sizeof(jdev.new)) { 317 f->metadata->size != sizeof(jdev.new_id)) {
318 printk(KERN_NOTICE "Device node has strange size %d\n", f->metadata->size); 318 printk(KERN_NOTICE "Device node has strange size %d\n", f->metadata->size);
319 goto error_io; 319 goto error_io;
320 } 320 }
@@ -325,10 +325,10 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
325 printk(KERN_NOTICE "Read device numbers for inode %lu failed\n", (unsigned long)inode->i_ino); 325 printk(KERN_NOTICE "Read device numbers for inode %lu failed\n", (unsigned long)inode->i_ino);
326 goto error; 326 goto error;
327 } 327 }
328 if (f->metadata->size == sizeof(jdev.old)) 328 if (f->metadata->size == sizeof(jdev.old_id))
329 rdev = old_decode_dev(je16_to_cpu(jdev.old)); 329 rdev = old_decode_dev(je16_to_cpu(jdev.old_id));
330 else 330 else
331 rdev = new_decode_dev(je32_to_cpu(jdev.new)); 331 rdev = new_decode_dev(je32_to_cpu(jdev.new_id));
332 332
333 case S_IFSOCK: 333 case S_IFSOCK:
334 case S_IFIFO: 334 case S_IFIFO:
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 3b6f2fa12cff..f5e96bd656e8 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -214,6 +214,19 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
214 return ret; 214 return ret;
215 } 215 }
216 216
217 /* If there are any blocks which need erasing, erase them now */
218 if (!list_empty(&c->erase_complete_list) ||
219 !list_empty(&c->erase_pending_list)) {
220 spin_unlock(&c->erase_completion_lock);
221 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() erasing pending blocks\n"));
222 if (jffs2_erase_pending_blocks(c, 1)) {
223 mutex_unlock(&c->alloc_sem);
224 return 0;
225 }
226 D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n"));
227 spin_lock(&c->erase_completion_lock);
228 }
229
217 /* First, work out which block we're garbage-collecting */ 230 /* First, work out which block we're garbage-collecting */
218 jeb = c->gcblock; 231 jeb = c->gcblock;
219 232
@@ -222,7 +235,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
222 235
223 if (!jeb) { 236 if (!jeb) {
224 /* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */ 237 /* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */
225 if (!list_empty(&c->erase_pending_list)) { 238 if (c->nr_erasing_blocks) {
226 spin_unlock(&c->erase_completion_lock); 239 spin_unlock(&c->erase_completion_lock);
227 mutex_unlock(&c->alloc_sem); 240 mutex_unlock(&c->alloc_sem);
228 return -EAGAIN; 241 return -EAGAIN;
@@ -435,7 +448,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
435 list_add_tail(&c->gcblock->list, &c->erase_pending_list); 448 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
436 c->gcblock = NULL; 449 c->gcblock = NULL;
437 c->nr_erasing_blocks++; 450 c->nr_erasing_blocks++;
438 jffs2_erase_pending_trigger(c); 451 jffs2_garbage_collect_trigger(c);
439 } 452 }
440 spin_unlock(&c->erase_completion_lock); 453 spin_unlock(&c->erase_completion_lock);
441 454
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 507ed6ec1847..a881a42f19e3 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -312,11 +312,11 @@ static inline int jffs2_blocks_use_vmalloc(struct jffs2_sb_info *c)
312static inline int jffs2_encode_dev(union jffs2_device_node *jdev, dev_t rdev) 312static inline int jffs2_encode_dev(union jffs2_device_node *jdev, dev_t rdev)
313{ 313{
314 if (old_valid_dev(rdev)) { 314 if (old_valid_dev(rdev)) {
315 jdev->old = cpu_to_je16(old_encode_dev(rdev)); 315 jdev->old_id = cpu_to_je16(old_encode_dev(rdev));
316 return sizeof(jdev->old); 316 return sizeof(jdev->old_id);
317 } else { 317 } else {
318 jdev->new = cpu_to_je32(new_encode_dev(rdev)); 318 jdev->new_id = cpu_to_je32(new_encode_dev(rdev));
319 return sizeof(jdev->new); 319 return sizeof(jdev->new_id);
320 } 320 }
321} 321}
322 322
@@ -464,7 +464,7 @@ int jffs2_scan_dirty_space(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb
464int jffs2_do_mount_fs(struct jffs2_sb_info *c); 464int jffs2_do_mount_fs(struct jffs2_sb_info *c);
465 465
466/* erase.c */ 466/* erase.c */
467void jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count); 467int jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count);
468void jffs2_free_jeb_node_refs(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb); 468void jffs2_free_jeb_node_refs(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb);
469 469
470#ifdef CONFIG_JFFS2_FS_WRITEBUFFER 470#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 191359dde4e1..694aa5b03505 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -116,9 +116,21 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
116 116
117 ret = jffs2_garbage_collect_pass(c); 117 ret = jffs2_garbage_collect_pass(c);
118 118
119 if (ret == -EAGAIN) 119 if (ret == -EAGAIN) {
120 jffs2_erase_pending_blocks(c, 1); 120 spin_lock(&c->erase_completion_lock);
121 else if (ret) 121 if (c->nr_erasing_blocks &&
122 list_empty(&c->erase_pending_list) &&
123 list_empty(&c->erase_complete_list)) {
124 DECLARE_WAITQUEUE(wait, current);
125 set_current_state(TASK_UNINTERRUPTIBLE);
126 add_wait_queue(&c->erase_wait, &wait);
127 D1(printk(KERN_DEBUG "%s waiting for erase to complete\n", __func__));
128 spin_unlock(&c->erase_completion_lock);
129
130 schedule();
131 } else
132 spin_unlock(&c->erase_completion_lock);
133 } else if (ret)
122 return ret; 134 return ret;
123 135
124 cond_resched(); 136 cond_resched();
@@ -217,7 +229,7 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c)
217 ejeb = list_entry(c->erasable_list.next, struct jffs2_eraseblock, list); 229 ejeb = list_entry(c->erasable_list.next, struct jffs2_eraseblock, list);
218 list_move_tail(&ejeb->list, &c->erase_pending_list); 230 list_move_tail(&ejeb->list, &c->erase_pending_list);
219 c->nr_erasing_blocks++; 231 c->nr_erasing_blocks++;
220 jffs2_erase_pending_trigger(c); 232 jffs2_garbage_collect_trigger(c);
221 D1(printk(KERN_DEBUG "jffs2_find_nextblock: Triggering erase of erasable block at 0x%08x\n", 233 D1(printk(KERN_DEBUG "jffs2_find_nextblock: Triggering erase of erasable block at 0x%08x\n",
222 ejeb->offset)); 234 ejeb->offset));
223 } 235 }
@@ -469,7 +481,9 @@ struct jffs2_raw_node_ref *jffs2_add_physical_node_ref(struct jffs2_sb_info *c,
469void jffs2_complete_reservation(struct jffs2_sb_info *c) 481void jffs2_complete_reservation(struct jffs2_sb_info *c)
470{ 482{
471 D1(printk(KERN_DEBUG "jffs2_complete_reservation()\n")); 483 D1(printk(KERN_DEBUG "jffs2_complete_reservation()\n"));
484 spin_lock(&c->erase_completion_lock);
472 jffs2_garbage_collect_trigger(c); 485 jffs2_garbage_collect_trigger(c);
486 spin_unlock(&c->erase_completion_lock);
473 mutex_unlock(&c->alloc_sem); 487 mutex_unlock(&c->alloc_sem);
474} 488}
475 489
@@ -611,7 +625,7 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
611 D1(printk(KERN_DEBUG "...and adding to erase_pending_list\n")); 625 D1(printk(KERN_DEBUG "...and adding to erase_pending_list\n"));
612 list_add_tail(&jeb->list, &c->erase_pending_list); 626 list_add_tail(&jeb->list, &c->erase_pending_list);
613 c->nr_erasing_blocks++; 627 c->nr_erasing_blocks++;
614 jffs2_erase_pending_trigger(c); 628 jffs2_garbage_collect_trigger(c);
615 } else { 629 } else {
616 /* Sometimes, however, we leave it elsewhere so it doesn't get 630 /* Sometimes, however, we leave it elsewhere so it doesn't get
617 immediately reused, and we spread the load a bit. */ 631 immediately reused, and we spread the load a bit. */
@@ -732,6 +746,10 @@ int jffs2_thread_should_wake(struct jffs2_sb_info *c)
732 int nr_very_dirty = 0; 746 int nr_very_dirty = 0;
733 struct jffs2_eraseblock *jeb; 747 struct jffs2_eraseblock *jeb;
734 748
749 if (!list_empty(&c->erase_complete_list) ||
750 !list_empty(&c->erase_pending_list))
751 return 1;
752
735 if (c->unchecked_size) { 753 if (c->unchecked_size) {
736 D1(printk(KERN_DEBUG "jffs2_thread_should_wake(): unchecked_size %d, checked_ino #%d\n", 754 D1(printk(KERN_DEBUG "jffs2_thread_should_wake(): unchecked_size %d, checked_ino #%d\n",
737 c->unchecked_size, c->checked_ino)); 755 c->unchecked_size, c->checked_ino));
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index a7f03b7ebcb3..035a767f958b 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -140,8 +140,7 @@ void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c);
140 140
141#endif /* WRITEBUFFER */ 141#endif /* WRITEBUFFER */
142 142
143/* erase.c */ 143static inline void jffs2_dirty_trigger(struct jffs2_sb_info *c)
144static inline void jffs2_erase_pending_trigger(struct jffs2_sb_info *c)
145{ 144{
146 OFNI_BS_2SFFJ(c)->s_dirt = 1; 145 OFNI_BS_2SFFJ(c)->s_dirt = 1;
147} 146}
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 696686cc206e..46f870d1cc36 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -260,7 +260,9 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
260 ret = -EIO; 260 ret = -EIO;
261 goto out; 261 goto out;
262 } 262 }
263 jffs2_erase_pending_trigger(c); 263 spin_lock(&c->erase_completion_lock);
264 jffs2_garbage_collect_trigger(c);
265 spin_unlock(&c->erase_completion_lock);
264 } 266 }
265 ret = 0; 267 ret = 0;
266 out: 268 out:
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 9a80e8e595d0..511e2d609d12 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -63,8 +63,6 @@ static void jffs2_write_super(struct super_block *sb)
63 63
64 if (!(sb->s_flags & MS_RDONLY)) { 64 if (!(sb->s_flags & MS_RDONLY)) {
65 D1(printk(KERN_DEBUG "jffs2_write_super()\n")); 65 D1(printk(KERN_DEBUG "jffs2_write_super()\n"));
66 jffs2_garbage_collect_trigger(c);
67 jffs2_erase_pending_blocks(c, 0);
68 jffs2_flush_wbuf_gc(c, 0); 66 jffs2_flush_wbuf_gc(c, 0);
69 } 67 }
70 68
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 5ef7bac265e5..07ee1546b2fa 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -84,7 +84,7 @@ static void jffs2_wbuf_dirties_inode(struct jffs2_sb_info *c, uint32_t ino)
84 struct jffs2_inodirty *new; 84 struct jffs2_inodirty *new;
85 85
86 /* Mark the superblock dirty so that kupdated will flush... */ 86 /* Mark the superblock dirty so that kupdated will flush... */
87 jffs2_erase_pending_trigger(c); 87 jffs2_dirty_trigger(c);
88 88
89 if (jffs2_wbuf_pending_for_ino(c, ino)) 89 if (jffs2_wbuf_pending_for_ino(c, ino))
90 return; 90 return;
@@ -121,7 +121,7 @@ static inline void jffs2_refile_wbuf_blocks(struct jffs2_sb_info *c)
121 D1(printk(KERN_DEBUG "...and adding to erase_pending_list\n")); 121 D1(printk(KERN_DEBUG "...and adding to erase_pending_list\n"));
122 list_add_tail(&jeb->list, &c->erase_pending_list); 122 list_add_tail(&jeb->list, &c->erase_pending_list);
123 c->nr_erasing_blocks++; 123 c->nr_erasing_blocks++;
124 jffs2_erase_pending_trigger(c); 124 jffs2_garbage_collect_trigger(c);
125 } else { 125 } else {
126 /* Sometimes, however, we leave it elsewhere so it doesn't get 126 /* Sometimes, however, we leave it elsewhere so it doesn't get
127 immediately reused, and we spread the load a bit. */ 127 immediately reused, and we spread the load a bit. */
@@ -152,7 +152,7 @@ static void jffs2_block_refile(struct jffs2_sb_info *c, struct jffs2_eraseblock
152 D1(printk("Refiling block at %08x to erase_pending_list\n", jeb->offset)); 152 D1(printk("Refiling block at %08x to erase_pending_list\n", jeb->offset));
153 list_add(&jeb->list, &c->erase_pending_list); 153 list_add(&jeb->list, &c->erase_pending_list);
154 c->nr_erasing_blocks++; 154 c->nr_erasing_blocks++;
155 jffs2_erase_pending_trigger(c); 155 jffs2_garbage_collect_trigger(c);
156 } 156 }
157 157
158 if (!jffs2_prealloc_raw_node_refs(c, jeb, 1)) { 158 if (!jffs2_prealloc_raw_node_refs(c, jeb, 1)) {
@@ -543,7 +543,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
543 D1(printk(KERN_DEBUG "Failing block at %08x is now empty. Moving to erase_pending_list\n", jeb->offset)); 543 D1(printk(KERN_DEBUG "Failing block at %08x is now empty. Moving to erase_pending_list\n", jeb->offset));
544 list_move(&jeb->list, &c->erase_pending_list); 544 list_move(&jeb->list, &c->erase_pending_list);
545 c->nr_erasing_blocks++; 545 c->nr_erasing_blocks++;
546 jffs2_erase_pending_trigger(c); 546 jffs2_garbage_collect_trigger(c);
547 } 547 }
548 548
549 jffs2_dbg_acct_sanity_check_nolock(c, jeb); 549 jffs2_dbg_acct_sanity_check_nolock(c, jeb);
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 14ba982b3f24..85d9ec659225 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -98,7 +98,7 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
98 if (rc) 98 if (rc)
99 return rc; 99 return rc;
100 100
101 if (iattr->ia_valid & ATTR_SIZE) 101 if (is_quota_modification(inode, iattr))
102 dquot_initialize(inode); 102 dquot_initialize(inode);
103 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || 103 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
104 (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { 104 (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 9e2f6a721668..c92ea3b3ea5e 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -2438,7 +2438,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
2438 2438
2439 /* check if this is a control page update for an allocation. 2439 /* check if this is a control page update for an allocation.
2440 * if so, update the leaf to reflect the new leaf value using 2440 * if so, update the leaf to reflect the new leaf value using
2441 * dbSplit(); otherwise (deallocation), use dbJoin() to udpate 2441 * dbSplit(); otherwise (deallocation), use dbJoin() to update
2442 * the leaf with the new value. in addition to updating the 2442 * the leaf with the new value. in addition to updating the
2443 * leaf, dbSplit() will also split the binary buddy system of 2443 * leaf, dbSplit() will also split the binary buddy system of
2444 * the leaves, if required, and bubble new values within the 2444 * the leaves, if required, and bubble new values within the
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 157382fa6256..b66832ac33ac 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -446,10 +446,8 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
446 /* initialize the mount flag and determine the default error handler */ 446 /* initialize the mount flag and determine the default error handler */
447 flag = JFS_ERR_REMOUNT_RO; 447 flag = JFS_ERR_REMOUNT_RO;
448 448
449 if (!parse_options((char *) data, sb, &newLVSize, &flag)) { 449 if (!parse_options((char *) data, sb, &newLVSize, &flag))
450 kfree(sbi); 450 goto out_kfree;
451 return -EINVAL;
452 }
453 sbi->flag = flag; 451 sbi->flag = flag;
454 452
455#ifdef CONFIG_JFS_POSIX_ACL 453#ifdef CONFIG_JFS_POSIX_ACL
@@ -458,7 +456,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
458 456
459 if (newLVSize) { 457 if (newLVSize) {
460 printk(KERN_ERR "resize option for remount only\n"); 458 printk(KERN_ERR "resize option for remount only\n");
461 return -EINVAL; 459 goto out_kfree;
462 } 460 }
463 461
464 /* 462 /*
@@ -478,7 +476,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
478 inode = new_inode(sb); 476 inode = new_inode(sb);
479 if (inode == NULL) { 477 if (inode == NULL) {
480 ret = -ENOMEM; 478 ret = -ENOMEM;
481 goto out_kfree; 479 goto out_unload;
482 } 480 }
483 inode->i_ino = 0; 481 inode->i_ino = 0;
484 inode->i_nlink = 1; 482 inode->i_nlink = 1;
@@ -550,9 +548,10 @@ out_mount_failed:
550 make_bad_inode(sbi->direct_inode); 548 make_bad_inode(sbi->direct_inode);
551 iput(sbi->direct_inode); 549 iput(sbi->direct_inode);
552 sbi->direct_inode = NULL; 550 sbi->direct_inode = NULL;
553out_kfree: 551out_unload:
554 if (sbi->nls_tab) 552 if (sbi->nls_tab)
555 unload_nls(sbi->nls_tab); 553 unload_nls(sbi->nls_tab);
554out_kfree:
556 kfree(sbi); 555 kfree(sbi);
557 return ret; 556 return ret;
558} 557}
diff --git a/fs/libfs.c b/fs/libfs.c
index ea9a6cc9b35c..232bea425b09 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -547,6 +547,40 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
547} 547}
548 548
549/** 549/**
550 * simple_write_to_buffer - copy data from user space to the buffer
551 * @to: the buffer to write to
552 * @available: the size of the buffer
553 * @ppos: the current position in the buffer
554 * @from: the user space buffer to read from
555 * @count: the maximum number of bytes to read
556 *
557 * The simple_write_to_buffer() function reads up to @count bytes from the user
558 * space address starting at @from into the buffer @to at offset @ppos.
559 *
560 * On success, the number of bytes written is returned and the offset @ppos is
561 * advanced by this number, or negative value is returned on error.
562 **/
563ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
564 const void __user *from, size_t count)
565{
566 loff_t pos = *ppos;
567 size_t res;
568
569 if (pos < 0)
570 return -EINVAL;
571 if (pos >= available || !count)
572 return 0;
573 if (count > available - pos)
574 count = available - pos;
575 res = copy_from_user(to + pos, from, count);
576 if (res == count)
577 return -EFAULT;
578 count -= res;
579 *ppos = pos + count;
580 return count;
581}
582
583/**
550 * memory_read_from_buffer - copy data from the buffer 584 * memory_read_from_buffer - copy data from the buffer
551 * @to: the kernel space buffer to read to 585 * @to: the kernel space buffer to read to
552 * @count: the maximum number of bytes to read 586 * @count: the maximum number of bytes to read
@@ -864,6 +898,7 @@ EXPORT_SYMBOL(simple_statfs);
864EXPORT_SYMBOL(simple_sync_file); 898EXPORT_SYMBOL(simple_sync_file);
865EXPORT_SYMBOL(simple_unlink); 899EXPORT_SYMBOL(simple_unlink);
866EXPORT_SYMBOL(simple_read_from_buffer); 900EXPORT_SYMBOL(simple_read_from_buffer);
901EXPORT_SYMBOL(simple_write_to_buffer);
867EXPORT_SYMBOL(memory_read_from_buffer); 902EXPORT_SYMBOL(memory_read_from_buffer);
868EXPORT_SYMBOL(simple_transaction_set); 903EXPORT_SYMBOL(simple_transaction_set);
869EXPORT_SYMBOL(simple_transaction_get); 904EXPORT_SYMBOL(simple_transaction_get);
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 243c00071f76..9bd2ce2a3040 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -303,6 +303,11 @@ static void bdev_put_device(struct super_block *sb)
303 close_bdev_exclusive(logfs_super(sb)->s_bdev, FMODE_READ|FMODE_WRITE); 303 close_bdev_exclusive(logfs_super(sb)->s_bdev, FMODE_READ|FMODE_WRITE);
304} 304}
305 305
306static int bdev_can_write_buf(struct super_block *sb, u64 ofs)
307{
308 return 0;
309}
310
306static const struct logfs_device_ops bd_devops = { 311static const struct logfs_device_ops bd_devops = {
307 .find_first_sb = bdev_find_first_sb, 312 .find_first_sb = bdev_find_first_sb,
308 .find_last_sb = bdev_find_last_sb, 313 .find_last_sb = bdev_find_last_sb,
@@ -310,6 +315,7 @@ static const struct logfs_device_ops bd_devops = {
310 .readpage = bdev_readpage, 315 .readpage = bdev_readpage,
311 .writeseg = bdev_writeseg, 316 .writeseg = bdev_writeseg,
312 .erase = bdev_erase, 317 .erase = bdev_erase,
318 .can_write_buf = bdev_can_write_buf,
313 .sync = bdev_sync, 319 .sync = bdev_sync,
314 .put_device = bdev_put_device, 320 .put_device = bdev_put_device,
315}; 321};
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index cafb6ef2e05b..a85d47d13e4b 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -9,6 +9,7 @@
9#include <linux/completion.h> 9#include <linux/completion.h>
10#include <linux/mount.h> 10#include <linux/mount.h>
11#include <linux/sched.h> 11#include <linux/sched.h>
12#include <linux/slab.h>
12 13
13#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) 14#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
14 15
@@ -126,7 +127,8 @@ static int mtd_readpage(void *_sb, struct page *page)
126 127
127 err = mtd_read(sb, page->index << PAGE_SHIFT, PAGE_SIZE, 128 err = mtd_read(sb, page->index << PAGE_SHIFT, PAGE_SIZE,
128 page_address(page)); 129 page_address(page));
129 if (err == -EUCLEAN) { 130 if (err == -EUCLEAN || err == -EBADMSG) {
131 /* -EBADMSG happens regularly on power failures */
130 err = 0; 132 err = 0;
131 /* FIXME: force GC this segment */ 133 /* FIXME: force GC this segment */
132 } 134 }
@@ -233,12 +235,32 @@ static void mtd_put_device(struct super_block *sb)
233 put_mtd_device(logfs_super(sb)->s_mtd); 235 put_mtd_device(logfs_super(sb)->s_mtd);
234} 236}
235 237
238static int mtd_can_write_buf(struct super_block *sb, u64 ofs)
239{
240 struct logfs_super *super = logfs_super(sb);
241 void *buf;
242 int err;
243
244 buf = kmalloc(super->s_writesize, GFP_KERNEL);
245 if (!buf)
246 return -ENOMEM;
247 err = mtd_read(sb, ofs, super->s_writesize, buf);
248 if (err)
249 goto out;
250 if (memchr_inv(buf, 0xff, super->s_writesize))
251 err = -EIO;
252 kfree(buf);
253out:
254 return err;
255}
256
236static const struct logfs_device_ops mtd_devops = { 257static const struct logfs_device_ops mtd_devops = {
237 .find_first_sb = mtd_find_first_sb, 258 .find_first_sb = mtd_find_first_sb,
238 .find_last_sb = mtd_find_last_sb, 259 .find_last_sb = mtd_find_last_sb,
239 .readpage = mtd_readpage, 260 .readpage = mtd_readpage,
240 .writeseg = mtd_writeseg, 261 .writeseg = mtd_writeseg,
241 .erase = mtd_erase, 262 .erase = mtd_erase,
263 .can_write_buf = mtd_can_write_buf,
242 .sync = mtd_sync, 264 .sync = mtd_sync,
243 .put_device = mtd_put_device, 265 .put_device = mtd_put_device,
244}; 266};
@@ -250,5 +272,7 @@ int logfs_get_sb_mtd(struct file_system_type *type, int flags,
250 const struct logfs_device_ops *devops = &mtd_devops; 272 const struct logfs_device_ops *devops = &mtd_devops;
251 273
252 mtd = get_mtd_device(NULL, mtdnr); 274 mtd = get_mtd_device(NULL, mtdnr);
275 if (IS_ERR(mtd))
276 return PTR_ERR(mtd);
253 return logfs_get_sb_device(type, flags, mtd, NULL, devops, mnt); 277 return logfs_get_sb_device(type, flags, mtd, NULL, devops, mnt);
254} 278}
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 2396a85c0f55..72d1893ddd36 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -12,7 +12,7 @@
12 * Atomic dir operations 12 * Atomic dir operations
13 * 13 *
14 * Directory operations are by default not atomic. Dentries and Inodes are 14 * Directory operations are by default not atomic. Dentries and Inodes are
15 * created/removed/altered in seperate operations. Therefore we need to do 15 * created/removed/altered in separate operations. Therefore we need to do
16 * a small amount of journaling. 16 * a small amount of journaling.
17 * 17 *
18 * Create, link, mkdir, mknod and symlink all share the same function to do 18 * Create, link, mkdir, mknod and symlink all share the same function to do
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 370f367a933e..0de524071870 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -161,7 +161,17 @@ static int logfs_writepage(struct page *page, struct writeback_control *wbc)
161 161
162static void logfs_invalidatepage(struct page *page, unsigned long offset) 162static void logfs_invalidatepage(struct page *page, unsigned long offset)
163{ 163{
164 move_page_to_btree(page); 164 struct logfs_block *block = logfs_block(page);
165
166 if (block->reserved_bytes) {
167 struct super_block *sb = page->mapping->host->i_sb;
168 struct logfs_super *super = logfs_super(sb);
169
170 super->s_dirty_pages -= block->reserved_bytes;
171 block->ops->free_block(sb, block);
172 BUG_ON(bitmap_weight(block->alias_map, LOGFS_BLOCK_FACTOR));
173 } else
174 move_page_to_btree(page);
165 BUG_ON(PagePrivate(page) || page->private); 175 BUG_ON(PagePrivate(page) || page->private);
166} 176}
167 177
@@ -212,10 +222,8 @@ int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
212int logfs_fsync(struct file *file, struct dentry *dentry, int datasync) 222int logfs_fsync(struct file *file, struct dentry *dentry, int datasync)
213{ 223{
214 struct super_block *sb = dentry->d_inode->i_sb; 224 struct super_block *sb = dentry->d_inode->i_sb;
215 struct logfs_super *super = logfs_super(sb);
216 225
217 /* FIXME: write anchor */ 226 logfs_write_anchor(sb);
218 super->s_devops->sync(sb);
219 return 0; 227 return 0;
220} 228}
221 229
diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c
index 76c242fbe1b0..caa4419285dc 100644
--- a/fs/logfs/gc.c
+++ b/fs/logfs/gc.c
@@ -122,7 +122,7 @@ static void logfs_cleanse_block(struct super_block *sb, u64 ofs, u64 ino,
122 logfs_safe_iput(inode, cookie); 122 logfs_safe_iput(inode, cookie);
123} 123}
124 124
125static u32 logfs_gc_segment(struct super_block *sb, u32 segno, u8 dist) 125static u32 logfs_gc_segment(struct super_block *sb, u32 segno)
126{ 126{
127 struct logfs_super *super = logfs_super(sb); 127 struct logfs_super *super = logfs_super(sb);
128 struct logfs_segment_header sh; 128 struct logfs_segment_header sh;
@@ -401,7 +401,7 @@ static int __logfs_gc_once(struct super_block *sb, struct gc_candidate *cand)
401 segno, (u64)segno << super->s_segshift, 401 segno, (u64)segno << super->s_segshift,
402 dist, no_free_segments(sb), valid, 402 dist, no_free_segments(sb), valid,
403 super->s_free_bytes); 403 super->s_free_bytes);
404 cleaned = logfs_gc_segment(sb, segno, dist); 404 cleaned = logfs_gc_segment(sb, segno);
405 log_gc("GC segment #%02x complete - now %x valid\n", segno, 405 log_gc("GC segment #%02x complete - now %x valid\n", segno,
406 valid - cleaned); 406 valid - cleaned);
407 BUG_ON(cleaned != valid); 407 BUG_ON(cleaned != valid);
@@ -632,38 +632,31 @@ static int check_area(struct super_block *sb, int i)
632{ 632{
633 struct logfs_super *super = logfs_super(sb); 633 struct logfs_super *super = logfs_super(sb);
634 struct logfs_area *area = super->s_area[i]; 634 struct logfs_area *area = super->s_area[i];
635 struct logfs_object_header oh; 635 gc_level_t gc_level;
636 u32 cleaned, valid, ec;
636 u32 segno = area->a_segno; 637 u32 segno = area->a_segno;
637 u32 ofs = area->a_used_bytes; 638 u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
638 __be32 crc;
639 int err;
640 639
641 if (!area->a_is_open) 640 if (!area->a_is_open)
642 return 0; 641 return 0;
643 642
644 for (ofs = area->a_used_bytes; 643 if (super->s_devops->can_write_buf(sb, ofs) == 0)
645 ofs <= super->s_segsize - sizeof(oh); 644 return 0;
646 ofs += (u32)be16_to_cpu(oh.len) + sizeof(oh)) {
647 err = wbuf_read(sb, dev_ofs(sb, segno, ofs), sizeof(oh), &oh);
648 if (err)
649 return err;
650
651 if (!memchr_inv(&oh, 0xff, sizeof(oh)))
652 break;
653 645
654 crc = logfs_crc32(&oh, sizeof(oh) - 4, 4); 646 printk(KERN_INFO"LogFS: Possibly incomplete write at %llx\n", ofs);
655 if (crc != oh.crc) { 647 /*
656 printk(KERN_INFO "interrupted header at %llx\n", 648 * The device cannot write back the write buffer. Most likely the
657 dev_ofs(sb, segno, ofs)); 649 * wbuf was already written out and the system crashed at some point
658 return 0; 650 * before the journal commit happened. In that case we wouldn't have
659 } 651 * to do anything. But if the crash happened before the wbuf was
660 } 652 * written out correctly, we must GC this segment. So assume the
661 if (ofs != area->a_used_bytes) { 653 * worst and always do the GC run.
662 printk(KERN_INFO "%x bytes unaccounted data found at %llx\n", 654 */
663 ofs - area->a_used_bytes, 655 area->a_is_open = 0;
664 dev_ofs(sb, segno, area->a_used_bytes)); 656 valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
665 area->a_used_bytes = ofs; 657 cleaned = logfs_gc_segment(sb, segno);
666 } 658 if (cleaned != valid)
659 return -EIO;
667 return 0; 660 return 0;
668} 661}
669 662
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index 14ed27274da2..755a92e8daa7 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -193,6 +193,7 @@ static void logfs_init_inode(struct super_block *sb, struct inode *inode)
193 inode->i_ctime = CURRENT_TIME; 193 inode->i_ctime = CURRENT_TIME;
194 inode->i_mtime = CURRENT_TIME; 194 inode->i_mtime = CURRENT_TIME;
195 inode->i_nlink = 1; 195 inode->i_nlink = 1;
196 li->li_refcount = 1;
196 INIT_LIST_HEAD(&li->li_freeing_list); 197 INIT_LIST_HEAD(&li->li_freeing_list);
197 198
198 for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++) 199 for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++)
@@ -326,7 +327,7 @@ static void logfs_set_ino_generation(struct super_block *sb,
326 u64 ino; 327 u64 ino;
327 328
328 mutex_lock(&super->s_journal_mutex); 329 mutex_lock(&super->s_journal_mutex);
329 ino = logfs_seek_hole(super->s_master_inode, super->s_last_ino); 330 ino = logfs_seek_hole(super->s_master_inode, super->s_last_ino + 1);
330 super->s_last_ino = ino; 331 super->s_last_ino = ino;
331 super->s_inos_till_wrap--; 332 super->s_inos_till_wrap--;
332 if (super->s_inos_till_wrap < 0) { 333 if (super->s_inos_till_wrap < 0) {
@@ -386,8 +387,7 @@ static void logfs_init_once(void *_li)
386 387
387static int logfs_sync_fs(struct super_block *sb, int wait) 388static int logfs_sync_fs(struct super_block *sb, int wait)
388{ 389{
389 /* FIXME: write anchor */ 390 logfs_write_anchor(sb);
390 logfs_super(sb)->s_devops->sync(sb);
391 return 0; 391 return 0;
392} 392}
393 393
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index fb0a613f885b..4b0e0616b357 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -132,10 +132,9 @@ static int read_area(struct super_block *sb, struct logfs_je_area *a)
132 132
133 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes); 133 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
134 if (super->s_writesize > 1) 134 if (super->s_writesize > 1)
135 logfs_buf_recover(area, ofs, a + 1, super->s_writesize); 135 return logfs_buf_recover(area, ofs, a + 1, super->s_writesize);
136 else 136 else
137 logfs_buf_recover(area, ofs, NULL, 0); 137 return logfs_buf_recover(area, ofs, NULL, 0);
138 return 0;
139} 138}
140 139
141static void *unpack(void *from, void *to) 140static void *unpack(void *from, void *to)
@@ -245,7 +244,7 @@ static int read_je(struct super_block *sb, u64 ofs)
245 read_erasecount(sb, unpack(jh, scratch)); 244 read_erasecount(sb, unpack(jh, scratch));
246 break; 245 break;
247 case JE_AREA: 246 case JE_AREA:
248 read_area(sb, unpack(jh, scratch)); 247 err = read_area(sb, unpack(jh, scratch));
249 break; 248 break;
250 case JE_OBJ_ALIAS: 249 case JE_OBJ_ALIAS:
251 err = logfs_load_object_aliases(sb, unpack(jh, scratch), 250 err = logfs_load_object_aliases(sb, unpack(jh, scratch),
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 0a3df1a0c936..1a9db84f8d8f 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -144,6 +144,7 @@ struct logfs_area_ops {
144 * @erase: erase one segment 144 * @erase: erase one segment
145 * @read: read from the device 145 * @read: read from the device
146 * @erase: erase part of the device 146 * @erase: erase part of the device
147 * @can_write_buf: decide whether wbuf can be written to ofs
147 */ 148 */
148struct logfs_device_ops { 149struct logfs_device_ops {
149 struct page *(*find_first_sb)(struct super_block *sb, u64 *ofs); 150 struct page *(*find_first_sb)(struct super_block *sb, u64 *ofs);
@@ -153,6 +154,7 @@ struct logfs_device_ops {
153 void (*writeseg)(struct super_block *sb, u64 ofs, size_t len); 154 void (*writeseg)(struct super_block *sb, u64 ofs, size_t len);
154 int (*erase)(struct super_block *sb, loff_t ofs, size_t len, 155 int (*erase)(struct super_block *sb, loff_t ofs, size_t len,
155 int ensure_write); 156 int ensure_write);
157 int (*can_write_buf)(struct super_block *sb, u64 ofs);
156 void (*sync)(struct super_block *sb); 158 void (*sync)(struct super_block *sb);
157 void (*put_device)(struct super_block *sb); 159 void (*put_device)(struct super_block *sb);
158}; 160};
@@ -394,6 +396,7 @@ struct logfs_super {
394 int s_lock_count; 396 int s_lock_count;
395 mempool_t *s_block_pool; /* struct logfs_block pool */ 397 mempool_t *s_block_pool; /* struct logfs_block pool */
396 mempool_t *s_shadow_pool; /* struct logfs_shadow pool */ 398 mempool_t *s_shadow_pool; /* struct logfs_shadow pool */
399 struct list_head s_writeback_list; /* writeback pages */
397 /* 400 /*
398 * Space accounting: 401 * Space accounting:
399 * - s_used_bytes specifies space used to store valid data objects. 402 * - s_used_bytes specifies space used to store valid data objects.
@@ -598,19 +601,19 @@ void freeseg(struct super_block *sb, u32 segno);
598int logfs_init_areas(struct super_block *sb); 601int logfs_init_areas(struct super_block *sb);
599void logfs_cleanup_areas(struct super_block *sb); 602void logfs_cleanup_areas(struct super_block *sb);
600int logfs_open_area(struct logfs_area *area, size_t bytes); 603int logfs_open_area(struct logfs_area *area, size_t bytes);
601void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, 604int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
602 int use_filler); 605 int use_filler);
603 606
604static inline void logfs_buf_write(struct logfs_area *area, u64 ofs, 607static inline int logfs_buf_write(struct logfs_area *area, u64 ofs,
605 void *buf, size_t len) 608 void *buf, size_t len)
606{ 609{
607 __logfs_buf_write(area, ofs, buf, len, 0); 610 return __logfs_buf_write(area, ofs, buf, len, 0);
608} 611}
609 612
610static inline void logfs_buf_recover(struct logfs_area *area, u64 ofs, 613static inline int logfs_buf_recover(struct logfs_area *area, u64 ofs,
611 void *buf, size_t len) 614 void *buf, size_t len)
612{ 615{
613 __logfs_buf_write(area, ofs, buf, len, 1); 616 return __logfs_buf_write(area, ofs, buf, len, 1);
614} 617}
615 618
616/* super.c */ 619/* super.c */
@@ -704,7 +707,7 @@ static inline gc_level_t expand_level(u64 ino, level_t __level)
704 u8 level = (__force u8)__level; 707 u8 level = (__force u8)__level;
705 708
706 if (ino == LOGFS_INO_MASTER) { 709 if (ino == LOGFS_INO_MASTER) {
707 /* ifile has seperate areas */ 710 /* ifile has separate areas */
708 level += LOGFS_MAX_LEVELS; 711 level += LOGFS_MAX_LEVELS;
709 } 712 }
710 return (__force gc_level_t)level; 713 return (__force gc_level_t)level;
diff --git a/fs/logfs/logfs_abi.h b/fs/logfs/logfs_abi.h
index f674725663fe..ae960519c54a 100644
--- a/fs/logfs/logfs_abi.h
+++ b/fs/logfs/logfs_abi.h
@@ -50,9 +50,9 @@ static inline void check_##type(void) \
50 * 12 - gc recycled blocks, long-lived data 50 * 12 - gc recycled blocks, long-lived data
51 * 13 - replacement blocks, short-lived data 51 * 13 - replacement blocks, short-lived data
52 * 52 *
53 * Levels 1-11 are necessary for robust gc operations and help seperate 53 * Levels 1-11 are necessary for robust gc operations and help separate
54 * short-lived metadata from longer-lived file data. In the future, 54 * short-lived metadata from longer-lived file data. In the future,
55 * file data should get seperated into several segments based on simple 55 * file data should get separated into several segments based on simple
56 * heuristics. Old data recycled during gc operation is expected to be 56 * heuristics. Old data recycled during gc operation is expected to be
57 * long-lived. New data is of uncertain life expectancy. New data 57 * long-lived. New data is of uncertain life expectancy. New data
58 * used to replace older blocks in existing files is expected to be 58 * used to replace older blocks in existing files is expected to be
@@ -117,7 +117,7 @@ static inline void check_##type(void) \
117#define pure_ofs(ofs) (ofs & ~LOGFS_FULLY_POPULATED) 117#define pure_ofs(ofs) (ofs & ~LOGFS_FULLY_POPULATED)
118 118
119/* 119/*
120 * LogFS needs to seperate data into levels. Each level is defined as the 120 * LogFS needs to separate data into levels. Each level is defined as the
121 * maximal possible distance from the master inode (inode of the inode file). 121 * maximal possible distance from the master inode (inode of the inode file).
122 * Data blocks reside on level 0, 1x indirect block on level 1, etc. 122 * Data blocks reside on level 0, 1x indirect block on level 1, etc.
123 * Inodes reside on level 6, indirect blocks for the inode file on levels 7-11. 123 * Inodes reside on level 6, indirect blocks for the inode file on levels 7-11.
@@ -204,7 +204,7 @@ SIZE_CHECK(logfs_segment_header, LOGFS_SEGMENT_HEADERSIZE);
204 * @ds_crc: crc32 of structure starting with the next field 204 * @ds_crc: crc32 of structure starting with the next field
205 * @ds_ifile_levels: maximum number of levels for ifile 205 * @ds_ifile_levels: maximum number of levels for ifile
206 * @ds_iblock_levels: maximum number of levels for regular files 206 * @ds_iblock_levels: maximum number of levels for regular files
207 * @ds_data_levels: number of seperate levels for data 207 * @ds_data_levels: number of separate levels for data
208 * @pad0: reserved, must be 0 208 * @pad0: reserved, must be 0
209 * @ds_feature_incompat: incompatible filesystem features 209 * @ds_feature_incompat: incompatible filesystem features
210 * @ds_feature_ro_compat: read-only compatible filesystem features 210 * @ds_feature_ro_compat: read-only compatible filesystem features
@@ -456,7 +456,7 @@ enum logfs_vim {
456 * @vim: life expectancy of data 456 * @vim: life expectancy of data
457 * 457 *
458 * "Areas" are segments currently being used for writing. There is at least 458 * "Areas" are segments currently being used for writing. There is at least
459 * one area per GC level. Several may be used to seperate long-living from 459 * one area per GC level. Several may be used to separate long-living from
460 * short-living data. If an area with unknown vim is encountered, it can 460 * short-living data. If an area with unknown vim is encountered, it can
461 * simply be closed. 461 * simply be closed.
462 * The write buffer immediately follow this header. 462 * The write buffer immediately follow this header.
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 3159db6958e5..0718d112a1a5 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -892,6 +892,8 @@ u64 logfs_seek_hole(struct inode *inode, u64 bix)
892 return bix; 892 return bix;
893 else if (li->li_data[INDIRECT_INDEX] & LOGFS_FULLY_POPULATED) 893 else if (li->li_data[INDIRECT_INDEX] & LOGFS_FULLY_POPULATED)
894 bix = maxbix(li->li_height); 894 bix = maxbix(li->li_height);
895 else if (bix >= maxbix(li->li_height))
896 return bix;
895 else { 897 else {
896 bix = seek_holedata_loop(inode, bix, 0); 898 bix = seek_holedata_loop(inode, bix, 0);
897 if (bix < maxbix(li->li_height)) 899 if (bix < maxbix(li->li_height))
@@ -1093,17 +1095,25 @@ static int logfs_reserve_bytes(struct inode *inode, int bytes)
1093int get_page_reserve(struct inode *inode, struct page *page) 1095int get_page_reserve(struct inode *inode, struct page *page)
1094{ 1096{
1095 struct logfs_super *super = logfs_super(inode->i_sb); 1097 struct logfs_super *super = logfs_super(inode->i_sb);
1098 struct logfs_block *block = logfs_block(page);
1096 int ret; 1099 int ret;
1097 1100
1098 if (logfs_block(page) && logfs_block(page)->reserved_bytes) 1101 if (block && block->reserved_bytes)
1099 return 0; 1102 return 0;
1100 1103
1101 logfs_get_wblocks(inode->i_sb, page, WF_LOCK); 1104 logfs_get_wblocks(inode->i_sb, page, WF_LOCK);
1102 ret = logfs_reserve_bytes(inode, 6 * LOGFS_MAX_OBJECTSIZE); 1105 while ((ret = logfs_reserve_bytes(inode, 6 * LOGFS_MAX_OBJECTSIZE)) &&
1106 !list_empty(&super->s_writeback_list)) {
1107 block = list_entry(super->s_writeback_list.next,
1108 struct logfs_block, alias_list);
1109 block->ops->write_block(block);
1110 }
1103 if (!ret) { 1111 if (!ret) {
1104 alloc_data_block(inode, page); 1112 alloc_data_block(inode, page);
1105 logfs_block(page)->reserved_bytes += 6 * LOGFS_MAX_OBJECTSIZE; 1113 block = logfs_block(page);
1114 block->reserved_bytes += 6 * LOGFS_MAX_OBJECTSIZE;
1106 super->s_dirty_pages += 6 * LOGFS_MAX_OBJECTSIZE; 1115 super->s_dirty_pages += 6 * LOGFS_MAX_OBJECTSIZE;
1116 list_move_tail(&block->alias_list, &super->s_writeback_list);
1107 } 1117 }
1108 logfs_put_wblocks(inode->i_sb, page, WF_LOCK); 1118 logfs_put_wblocks(inode->i_sb, page, WF_LOCK);
1109 return ret; 1119 return ret;
@@ -1861,7 +1871,7 @@ int logfs_truncate(struct inode *inode, u64 target)
1861 size = target; 1871 size = target;
1862 1872
1863 logfs_get_wblocks(sb, NULL, 1); 1873 logfs_get_wblocks(sb, NULL, 1);
1864 err = __logfs_truncate(inode, target); 1874 err = __logfs_truncate(inode, size);
1865 if (!err) 1875 if (!err)
1866 err = __logfs_write_inode(inode, 0); 1876 err = __logfs_write_inode(inode, 0);
1867 logfs_put_wblocks(sb, NULL, 1); 1877 logfs_put_wblocks(sb, NULL, 1);
@@ -2249,6 +2259,7 @@ int logfs_init_rw(struct super_block *sb)
2249 int min_fill = 3 * super->s_no_blocks; 2259 int min_fill = 3 * super->s_no_blocks;
2250 2260
2251 INIT_LIST_HEAD(&super->s_object_alias); 2261 INIT_LIST_HEAD(&super->s_object_alias);
2262 INIT_LIST_HEAD(&super->s_writeback_list);
2252 mutex_init(&super->s_write_mutex); 2263 mutex_init(&super->s_write_mutex);
2253 super->s_block_pool = mempool_create_kmalloc_pool(min_fill, 2264 super->s_block_pool = mempool_create_kmalloc_pool(min_fill,
2254 sizeof(struct logfs_block)); 2265 sizeof(struct logfs_block));
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index f77ce2b470ba..a9657afb70ad 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -67,7 +67,7 @@ static struct page *get_mapping_page(struct super_block *sb, pgoff_t index,
67 return page; 67 return page;
68} 68}
69 69
70void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, 70int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
71 int use_filler) 71 int use_filler)
72{ 72{
73 pgoff_t index = ofs >> PAGE_SHIFT; 73 pgoff_t index = ofs >> PAGE_SHIFT;
@@ -81,8 +81,10 @@ void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
81 copylen = min((ulong)len, PAGE_SIZE - offset); 81 copylen = min((ulong)len, PAGE_SIZE - offset);
82 82
83 page = get_mapping_page(area->a_sb, index, use_filler); 83 page = get_mapping_page(area->a_sb, index, use_filler);
84 SetPageUptodate(page); 84 if (IS_ERR(page))
85 return PTR_ERR(page);
85 BUG_ON(!page); /* FIXME: reserve a pool */ 86 BUG_ON(!page); /* FIXME: reserve a pool */
87 SetPageUptodate(page);
86 memcpy(page_address(page) + offset, buf, copylen); 88 memcpy(page_address(page) + offset, buf, copylen);
87 SetPagePrivate(page); 89 SetPagePrivate(page);
88 page_cache_release(page); 90 page_cache_release(page);
@@ -92,6 +94,7 @@ void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
92 offset = 0; 94 offset = 0;
93 index++; 95 index++;
94 } while (len); 96 } while (len);
97 return 0;
95} 98}
96 99
97static void pad_partial_page(struct logfs_area *area) 100static void pad_partial_page(struct logfs_area *area)
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 5866ee6e1327..d651e10a1e9c 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -138,10 +138,14 @@ static int logfs_sb_set(struct super_block *sb, void *_super)
138 sb->s_fs_info = super; 138 sb->s_fs_info = super;
139 sb->s_mtd = super->s_mtd; 139 sb->s_mtd = super->s_mtd;
140 sb->s_bdev = super->s_bdev; 140 sb->s_bdev = super->s_bdev;
141#ifdef CONFIG_BLOCK
141 if (sb->s_bdev) 142 if (sb->s_bdev)
142 sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info; 143 sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info;
144#endif
145#ifdef CONFIG_MTD
143 if (sb->s_mtd) 146 if (sb->s_mtd)
144 sb->s_bdi = sb->s_mtd->backing_dev_info; 147 sb->s_bdi = sb->s_mtd->backing_dev_info;
148#endif
145 return 0; 149 return 0;
146} 150}
147 151
@@ -333,27 +337,27 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
333 goto fail; 337 goto fail;
334 338
335 sb->s_root = d_alloc_root(rootdir); 339 sb->s_root = d_alloc_root(rootdir);
336 if (!sb->s_root) 340 if (!sb->s_root) {
337 goto fail2; 341 iput(rootdir);
342 goto fail;
343 }
338 344
339 super->s_erase_page = alloc_pages(GFP_KERNEL, 0); 345 super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
340 if (!super->s_erase_page) 346 if (!super->s_erase_page)
341 goto fail2; 347 goto fail;
342 memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE); 348 memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE);
343 349
344 /* FIXME: check for read-only mounts */ 350 /* FIXME: check for read-only mounts */
345 err = logfs_make_writeable(sb); 351 err = logfs_make_writeable(sb);
346 if (err) 352 if (err)
347 goto fail3; 353 goto fail1;
348 354
349 log_super("LogFS: Finished mounting\n"); 355 log_super("LogFS: Finished mounting\n");
350 simple_set_mnt(mnt, sb); 356 simple_set_mnt(mnt, sb);
351 return 0; 357 return 0;
352 358
353fail3: 359fail1:
354 __free_page(super->s_erase_page); 360 __free_page(super->s_erase_page);
355fail2:
356 iput(rootdir);
357fail: 361fail:
358 iput(logfs_super(sb)->s_master_inode); 362 iput(logfs_super(sb)->s_master_inode);
359 return -EIO; 363 return -EIO;
@@ -382,7 +386,7 @@ static struct page *find_super_block(struct super_block *sb)
382 if (!first || IS_ERR(first)) 386 if (!first || IS_ERR(first))
383 return NULL; 387 return NULL;
384 last = super->s_devops->find_last_sb(sb, &super->s_sb_ofs[1]); 388 last = super->s_devops->find_last_sb(sb, &super->s_sb_ofs[1]);
385 if (!last || IS_ERR(first)) { 389 if (!last || IS_ERR(last)) {
386 page_cache_release(first); 390 page_cache_release(first);
387 return NULL; 391 return NULL;
388 } 392 }
@@ -413,7 +417,7 @@ static int __logfs_read_sb(struct super_block *sb)
413 417
414 page = find_super_block(sb); 418 page = find_super_block(sb);
415 if (!page) 419 if (!page)
416 return -EIO; 420 return -EINVAL;
417 421
418 ds = page_address(page); 422 ds = page_address(page);
419 super->s_size = be64_to_cpu(ds->ds_filesystem_size); 423 super->s_size = be64_to_cpu(ds->ds_filesystem_size);
diff --git a/fs/namei.c b/fs/namei.c
index a7dce91a7e42..b86b96fe1dc3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1641,7 +1641,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
1641 if (nd->last.name[nd->last.len]) { 1641 if (nd->last.name[nd->last.len]) {
1642 if (open_flag & O_CREAT) 1642 if (open_flag & O_CREAT)
1643 goto exit; 1643 goto exit;
1644 nd->flags |= LOOKUP_DIRECTORY; 1644 nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW;
1645 } 1645 }
1646 1646
1647 /* just plain open? */ 1647 /* just plain open? */
@@ -1830,6 +1830,8 @@ reval:
1830 } 1830 }
1831 if (open_flag & O_DIRECTORY) 1831 if (open_flag & O_DIRECTORY)
1832 nd.flags |= LOOKUP_DIRECTORY; 1832 nd.flags |= LOOKUP_DIRECTORY;
1833 if (!(open_flag & O_NOFOLLOW))
1834 nd.flags |= LOOKUP_FOLLOW;
1833 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 1835 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
1834 while (unlikely(!filp)) { /* trailing symlink */ 1836 while (unlikely(!filp)) { /* trailing symlink */
1835 struct path holder; 1837 struct path holder;
@@ -1837,7 +1839,7 @@ reval:
1837 void *cookie; 1839 void *cookie;
1838 error = -ELOOP; 1840 error = -ELOOP;
1839 /* S_ISDIR part is a temporary automount kludge */ 1841 /* S_ISDIR part is a temporary automount kludge */
1840 if ((open_flag & O_NOFOLLOW) && !S_ISDIR(inode->i_mode)) 1842 if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode))
1841 goto exit_dput; 1843 goto exit_dput;
1842 if (count++ == 32) 1844 if (count++ == 32)
1843 goto exit_dput; 1845 goto exit_dput;
@@ -2174,8 +2176,10 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
2174 error = security_inode_rmdir(dir, dentry); 2176 error = security_inode_rmdir(dir, dentry);
2175 if (!error) { 2177 if (!error) {
2176 error = dir->i_op->rmdir(dir, dentry); 2178 error = dir->i_op->rmdir(dir, dentry);
2177 if (!error) 2179 if (!error) {
2178 dentry->d_inode->i_flags |= S_DEAD; 2180 dentry->d_inode->i_flags |= S_DEAD;
2181 dont_mount(dentry);
2182 }
2179 } 2183 }
2180 } 2184 }
2181 mutex_unlock(&dentry->d_inode->i_mutex); 2185 mutex_unlock(&dentry->d_inode->i_mutex);
@@ -2259,7 +2263,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
2259 if (!error) { 2263 if (!error) {
2260 error = dir->i_op->unlink(dir, dentry); 2264 error = dir->i_op->unlink(dir, dentry);
2261 if (!error) 2265 if (!error)
2262 dentry->d_inode->i_flags |= S_DEAD; 2266 dont_mount(dentry);
2263 } 2267 }
2264 } 2268 }
2265 mutex_unlock(&dentry->d_inode->i_mutex); 2269 mutex_unlock(&dentry->d_inode->i_mutex);
@@ -2570,17 +2574,20 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
2570 return error; 2574 return error;
2571 2575
2572 target = new_dentry->d_inode; 2576 target = new_dentry->d_inode;
2573 if (target) { 2577 if (target)
2574 mutex_lock(&target->i_mutex); 2578 mutex_lock(&target->i_mutex);
2575 dentry_unhash(new_dentry);
2576 }
2577 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2579 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
2578 error = -EBUSY; 2580 error = -EBUSY;
2579 else 2581 else {
2582 if (target)
2583 dentry_unhash(new_dentry);
2580 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2584 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2585 }
2581 if (target) { 2586 if (target) {
2582 if (!error) 2587 if (!error) {
2583 target->i_flags |= S_DEAD; 2588 target->i_flags |= S_DEAD;
2589 dont_mount(new_dentry);
2590 }
2584 mutex_unlock(&target->i_mutex); 2591 mutex_unlock(&target->i_mutex);
2585 if (d_unhashed(new_dentry)) 2592 if (d_unhashed(new_dentry))
2586 d_rehash(new_dentry); 2593 d_rehash(new_dentry);
@@ -2612,7 +2619,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
2612 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2619 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2613 if (!error) { 2620 if (!error) {
2614 if (target) 2621 if (target)
2615 target->i_flags |= S_DEAD; 2622 dont_mount(new_dentry);
2616 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 2623 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
2617 d_move(old_dentry, new_dentry); 2624 d_move(old_dentry, new_dentry);
2618 } 2625 }
diff --git a/fs/namespace.c b/fs/namespace.c
index 8174c8ab5c70..88058de59c7c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -628,7 +628,6 @@ repeat:
628 mnt->mnt_pinned = 0; 628 mnt->mnt_pinned = 0;
629 spin_unlock(&vfsmount_lock); 629 spin_unlock(&vfsmount_lock);
630 acct_auto_close_mnt(mnt); 630 acct_auto_close_mnt(mnt);
631 security_sb_umount_close(mnt);
632 goto repeat; 631 goto repeat;
633 } 632 }
634} 633}
@@ -1117,8 +1116,6 @@ static int do_umount(struct vfsmount *mnt, int flags)
1117 retval = 0; 1116 retval = 0;
1118 } 1117 }
1119 spin_unlock(&vfsmount_lock); 1118 spin_unlock(&vfsmount_lock);
1120 if (retval)
1121 security_sb_umount_busy(mnt);
1122 up_write(&namespace_sem); 1119 up_write(&namespace_sem);
1123 release_mounts(&umount_list); 1120 release_mounts(&umount_list);
1124 return retval; 1121 return retval;
@@ -1432,20 +1429,13 @@ static int graft_tree(struct vfsmount *mnt, struct path *path)
1432 1429
1433 err = -ENOENT; 1430 err = -ENOENT;
1434 mutex_lock(&path->dentry->d_inode->i_mutex); 1431 mutex_lock(&path->dentry->d_inode->i_mutex);
1435 if (IS_DEADDIR(path->dentry->d_inode)) 1432 if (cant_mount(path->dentry))
1436 goto out_unlock;
1437
1438 err = security_sb_check_sb(mnt, path);
1439 if (err)
1440 goto out_unlock; 1433 goto out_unlock;
1441 1434
1442 err = -ENOENT;
1443 if (!d_unlinked(path->dentry)) 1435 if (!d_unlinked(path->dentry))
1444 err = attach_recursive_mnt(mnt, path, NULL); 1436 err = attach_recursive_mnt(mnt, path, NULL);
1445out_unlock: 1437out_unlock:
1446 mutex_unlock(&path->dentry->d_inode->i_mutex); 1438 mutex_unlock(&path->dentry->d_inode->i_mutex);
1447 if (!err)
1448 security_sb_post_addmount(mnt, path);
1449 return err; 1439 return err;
1450} 1440}
1451 1441
@@ -1581,8 +1571,6 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1581 } 1571 }
1582 up_write(&sb->s_umount); 1572 up_write(&sb->s_umount);
1583 if (!err) { 1573 if (!err) {
1584 security_sb_post_remount(path->mnt, flags, data);
1585
1586 spin_lock(&vfsmount_lock); 1574 spin_lock(&vfsmount_lock);
1587 touch_mnt_namespace(path->mnt->mnt_ns); 1575 touch_mnt_namespace(path->mnt->mnt_ns);
1588 spin_unlock(&vfsmount_lock); 1576 spin_unlock(&vfsmount_lock);
@@ -1623,7 +1611,7 @@ static int do_move_mount(struct path *path, char *old_name)
1623 1611
1624 err = -ENOENT; 1612 err = -ENOENT;
1625 mutex_lock(&path->dentry->d_inode->i_mutex); 1613 mutex_lock(&path->dentry->d_inode->i_mutex);
1626 if (IS_DEADDIR(path->dentry->d_inode)) 1614 if (cant_mount(path->dentry))
1627 goto out1; 1615 goto out1;
1628 1616
1629 if (d_unlinked(path->dentry)) 1617 if (d_unlinked(path->dentry))
@@ -2234,7 +2222,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2234 if (!check_mnt(root.mnt)) 2222 if (!check_mnt(root.mnt))
2235 goto out2; 2223 goto out2;
2236 error = -ENOENT; 2224 error = -ENOENT;
2237 if (IS_DEADDIR(new.dentry->d_inode)) 2225 if (cant_mount(old.dentry))
2238 goto out2; 2226 goto out2;
2239 if (d_unlinked(new.dentry)) 2227 if (d_unlinked(new.dentry))
2240 goto out2; 2228 goto out2;
@@ -2277,7 +2265,6 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2277 touch_mnt_namespace(current->nsproxy->mnt_ns); 2265 touch_mnt_namespace(current->nsproxy->mnt_ns);
2278 spin_unlock(&vfsmount_lock); 2266 spin_unlock(&vfsmount_lock);
2279 chroot_fs_refs(&root, &new); 2267 chroot_fs_refs(&root, &new);
2280 security_sb_post_pivotroot(&root, &new);
2281 error = 0; 2268 error = 0;
2282 path_put(&root_parent); 2269 path_put(&root_parent);
2283 path_put(&parent_path); 2270 path_put(&parent_path);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a8766c4ef2e0..7ec9b34a59f8 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -934,7 +934,6 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str
934 } 934 }
935 935
936 fsinfo.fattr = fattr; 936 fsinfo.fattr = fattr;
937 nfs_fattr_init(fattr);
938 error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); 937 error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
939 if (error < 0) 938 if (error < 0)
940 goto out_error; 939 goto out_error;
@@ -966,6 +965,8 @@ out_error:
966static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) 965static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source)
967{ 966{
968 target->flags = source->flags; 967 target->flags = source->flags;
968 target->rsize = source->rsize;
969 target->wsize = source->wsize;
969 target->acregmin = source->acregmin; 970 target->acregmin = source->acregmin;
970 target->acregmax = source->acregmax; 971 target->acregmax = source->acregmax;
971 target->acdirmin = source->acdirmin; 972 target->acdirmin = source->acdirmin;
@@ -1045,13 +1046,18 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
1045 struct nfs_fh *mntfh) 1046 struct nfs_fh *mntfh)
1046{ 1047{
1047 struct nfs_server *server; 1048 struct nfs_server *server;
1048 struct nfs_fattr fattr; 1049 struct nfs_fattr *fattr;
1049 int error; 1050 int error;
1050 1051
1051 server = nfs_alloc_server(); 1052 server = nfs_alloc_server();
1052 if (!server) 1053 if (!server)
1053 return ERR_PTR(-ENOMEM); 1054 return ERR_PTR(-ENOMEM);
1054 1055
1056 error = -ENOMEM;
1057 fattr = nfs_alloc_fattr();
1058 if (fattr == NULL)
1059 goto error;
1060
1055 /* Get a client representation */ 1061 /* Get a client representation */
1056 error = nfs_init_server(server, data); 1062 error = nfs_init_server(server, data);
1057 if (error < 0) 1063 if (error < 0)
@@ -1062,7 +1068,7 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
1062 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); 1068 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1063 1069
1064 /* Probe the root fh to retrieve its FSID */ 1070 /* Probe the root fh to retrieve its FSID */
1065 error = nfs_probe_fsinfo(server, mntfh, &fattr); 1071 error = nfs_probe_fsinfo(server, mntfh, fattr);
1066 if (error < 0) 1072 if (error < 0)
1067 goto error; 1073 goto error;
1068 if (server->nfs_client->rpc_ops->version == 3) { 1074 if (server->nfs_client->rpc_ops->version == 3) {
@@ -1075,14 +1081,14 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
1075 server->namelen = NFS2_MAXNAMLEN; 1081 server->namelen = NFS2_MAXNAMLEN;
1076 } 1082 }
1077 1083
1078 if (!(fattr.valid & NFS_ATTR_FATTR)) { 1084 if (!(fattr->valid & NFS_ATTR_FATTR)) {
1079 error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr); 1085 error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
1080 if (error < 0) { 1086 if (error < 0) {
1081 dprintk("nfs_create_server: getattr error = %d\n", -error); 1087 dprintk("nfs_create_server: getattr error = %d\n", -error);
1082 goto error; 1088 goto error;
1083 } 1089 }
1084 } 1090 }
1085 memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid)); 1091 memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
1086 1092
1087 dprintk("Server FSID: %llx:%llx\n", 1093 dprintk("Server FSID: %llx:%llx\n",
1088 (unsigned long long) server->fsid.major, 1094 (unsigned long long) server->fsid.major,
@@ -1094,9 +1100,11 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
1094 spin_unlock(&nfs_client_lock); 1100 spin_unlock(&nfs_client_lock);
1095 1101
1096 server->mount_time = jiffies; 1102 server->mount_time = jiffies;
1103 nfs_free_fattr(fattr);
1097 return server; 1104 return server;
1098 1105
1099error: 1106error:
1107 nfs_free_fattr(fattr);
1100 nfs_free_server(server); 1108 nfs_free_server(server);
1101 return ERR_PTR(error); 1109 return ERR_PTR(error);
1102} 1110}
@@ -1338,7 +1346,7 @@ error:
1338struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, 1346struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1339 struct nfs_fh *mntfh) 1347 struct nfs_fh *mntfh)
1340{ 1348{
1341 struct nfs_fattr fattr; 1349 struct nfs_fattr *fattr;
1342 struct nfs_server *server; 1350 struct nfs_server *server;
1343 int error; 1351 int error;
1344 1352
@@ -1348,6 +1356,11 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1348 if (!server) 1356 if (!server)
1349 return ERR_PTR(-ENOMEM); 1357 return ERR_PTR(-ENOMEM);
1350 1358
1359 error = -ENOMEM;
1360 fattr = nfs_alloc_fattr();
1361 if (fattr == NULL)
1362 goto error;
1363
1351 /* set up the general RPC client */ 1364 /* set up the general RPC client */
1352 error = nfs4_init_server(server, data); 1365 error = nfs4_init_server(server, data);
1353 if (error < 0) 1366 if (error < 0)
@@ -1362,7 +1375,7 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1362 goto error; 1375 goto error;
1363 1376
1364 /* Probe the root fh to retrieve its FSID */ 1377 /* Probe the root fh to retrieve its FSID */
1365 error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path); 1378 error = nfs4_get_rootfh(server, mntfh);
1366 if (error < 0) 1379 if (error < 0)
1367 goto error; 1380 goto error;
1368 1381
@@ -1373,7 +1386,7 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1373 1386
1374 nfs4_session_set_rwsize(server); 1387 nfs4_session_set_rwsize(server);
1375 1388
1376 error = nfs_probe_fsinfo(server, mntfh, &fattr); 1389 error = nfs_probe_fsinfo(server, mntfh, fattr);
1377 if (error < 0) 1390 if (error < 0)
1378 goto error; 1391 goto error;
1379 1392
@@ -1387,9 +1400,11 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1387 1400
1388 server->mount_time = jiffies; 1401 server->mount_time = jiffies;
1389 dprintk("<-- nfs4_create_server() = %p\n", server); 1402 dprintk("<-- nfs4_create_server() = %p\n", server);
1403 nfs_free_fattr(fattr);
1390 return server; 1404 return server;
1391 1405
1392error: 1406error:
1407 nfs_free_fattr(fattr);
1393 nfs_free_server(server); 1408 nfs_free_server(server);
1394 dprintk("<-- nfs4_create_server() = error %d\n", error); 1409 dprintk("<-- nfs4_create_server() = error %d\n", error);
1395 return ERR_PTR(error); 1410 return ERR_PTR(error);
@@ -1403,7 +1418,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1403{ 1418{
1404 struct nfs_client *parent_client; 1419 struct nfs_client *parent_client;
1405 struct nfs_server *server, *parent_server; 1420 struct nfs_server *server, *parent_server;
1406 struct nfs_fattr fattr; 1421 struct nfs_fattr *fattr;
1407 int error; 1422 int error;
1408 1423
1409 dprintk("--> nfs4_create_referral_server()\n"); 1424 dprintk("--> nfs4_create_referral_server()\n");
@@ -1412,6 +1427,11 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1412 if (!server) 1427 if (!server)
1413 return ERR_PTR(-ENOMEM); 1428 return ERR_PTR(-ENOMEM);
1414 1429
1430 error = -ENOMEM;
1431 fattr = nfs_alloc_fattr();
1432 if (fattr == NULL)
1433 goto error;
1434
1415 parent_server = NFS_SB(data->sb); 1435 parent_server = NFS_SB(data->sb);
1416 parent_client = parent_server->nfs_client; 1436 parent_client = parent_server->nfs_client;
1417 1437
@@ -1441,12 +1461,12 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1441 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); 1461 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1442 1462
1443 /* Probe the root fh to retrieve its FSID and filehandle */ 1463 /* Probe the root fh to retrieve its FSID and filehandle */
1444 error = nfs4_path_walk(server, mntfh, data->mnt_path); 1464 error = nfs4_get_rootfh(server, mntfh);
1445 if (error < 0) 1465 if (error < 0)
1446 goto error; 1466 goto error;
1447 1467
1448 /* probe the filesystem info for this server filesystem */ 1468 /* probe the filesystem info for this server filesystem */
1449 error = nfs_probe_fsinfo(server, mntfh, &fattr); 1469 error = nfs_probe_fsinfo(server, mntfh, fattr);
1450 if (error < 0) 1470 if (error < 0)
1451 goto error; 1471 goto error;
1452 1472
@@ -1464,10 +1484,12 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1464 1484
1465 server->mount_time = jiffies; 1485 server->mount_time = jiffies;
1466 1486
1487 nfs_free_fattr(fattr);
1467 dprintk("<-- nfs_create_referral_server() = %p\n", server); 1488 dprintk("<-- nfs_create_referral_server() = %p\n", server);
1468 return server; 1489 return server;
1469 1490
1470error: 1491error:
1492 nfs_free_fattr(fattr);
1471 nfs_free_server(server); 1493 nfs_free_server(server);
1472 dprintk("<-- nfs4_create_referral_server() = error %d\n", error); 1494 dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
1473 return ERR_PTR(error); 1495 return ERR_PTR(error);
@@ -1483,7 +1505,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
1483 struct nfs_fattr *fattr) 1505 struct nfs_fattr *fattr)
1484{ 1506{
1485 struct nfs_server *server; 1507 struct nfs_server *server;
1486 struct nfs_fattr fattr_fsinfo; 1508 struct nfs_fattr *fattr_fsinfo;
1487 int error; 1509 int error;
1488 1510
1489 dprintk("--> nfs_clone_server(,%llx:%llx,)\n", 1511 dprintk("--> nfs_clone_server(,%llx:%llx,)\n",
@@ -1494,6 +1516,11 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
1494 if (!server) 1516 if (!server)
1495 return ERR_PTR(-ENOMEM); 1517 return ERR_PTR(-ENOMEM);
1496 1518
1519 error = -ENOMEM;
1520 fattr_fsinfo = nfs_alloc_fattr();
1521 if (fattr_fsinfo == NULL)
1522 goto out_free_server;
1523
1497 /* Copy data from the source */ 1524 /* Copy data from the source */
1498 server->nfs_client = source->nfs_client; 1525 server->nfs_client = source->nfs_client;
1499 atomic_inc(&server->nfs_client->cl_count); 1526 atomic_inc(&server->nfs_client->cl_count);
@@ -1510,7 +1537,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
1510 nfs_init_server_aclclient(server); 1537 nfs_init_server_aclclient(server);
1511 1538
1512 /* probe the filesystem info for this server filesystem */ 1539 /* probe the filesystem info for this server filesystem */
1513 error = nfs_probe_fsinfo(server, fh, &fattr_fsinfo); 1540 error = nfs_probe_fsinfo(server, fh, fattr_fsinfo);
1514 if (error < 0) 1541 if (error < 0)
1515 goto out_free_server; 1542 goto out_free_server;
1516 1543
@@ -1532,10 +1559,12 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
1532 1559
1533 server->mount_time = jiffies; 1560 server->mount_time = jiffies;
1534 1561
1562 nfs_free_fattr(fattr_fsinfo);
1535 dprintk("<-- nfs_clone_server() = %p\n", server); 1563 dprintk("<-- nfs_clone_server() = %p\n", server);
1536 return server; 1564 return server;
1537 1565
1538out_free_server: 1566out_free_server:
1567 nfs_free_fattr(fattr_fsinfo);
1539 nfs_free_server(server); 1568 nfs_free_server(server);
1540 dprintk("<-- nfs_clone_server() = error %d\n", error); 1569 dprintk("<-- nfs_clone_server() = error %d\n", error);
1541 return ERR_PTR(error); 1570 return ERR_PTR(error);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 15671245c6ee..301634543974 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -24,6 +24,8 @@
24 24
25static void nfs_do_free_delegation(struct nfs_delegation *delegation) 25static void nfs_do_free_delegation(struct nfs_delegation *delegation)
26{ 26{
27 if (delegation->cred)
28 put_rpccred(delegation->cred);
27 kfree(delegation); 29 kfree(delegation);
28} 30}
29 31
@@ -36,13 +38,7 @@ static void nfs_free_delegation_callback(struct rcu_head *head)
36 38
37static void nfs_free_delegation(struct nfs_delegation *delegation) 39static void nfs_free_delegation(struct nfs_delegation *delegation)
38{ 40{
39 struct rpc_cred *cred;
40
41 cred = rcu_dereference(delegation->cred);
42 rcu_assign_pointer(delegation->cred, NULL);
43 call_rcu(&delegation->rcu, nfs_free_delegation_callback); 41 call_rcu(&delegation->rcu, nfs_free_delegation_callback);
44 if (cred)
45 put_rpccred(cred);
46} 42}
47 43
48void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) 44void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
@@ -129,21 +125,35 @@ again:
129 */ 125 */
130void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) 126void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
131{ 127{
132 struct nfs_delegation *delegation = NFS_I(inode)->delegation; 128 struct nfs_delegation *delegation;
133 struct rpc_cred *oldcred; 129 struct rpc_cred *oldcred = NULL;
134 130
135 if (delegation == NULL) 131 rcu_read_lock();
136 return; 132 delegation = rcu_dereference(NFS_I(inode)->delegation);
137 memcpy(delegation->stateid.data, res->delegation.data, 133 if (delegation != NULL) {
138 sizeof(delegation->stateid.data)); 134 spin_lock(&delegation->lock);
139 delegation->type = res->delegation_type; 135 if (delegation->inode != NULL) {
140 delegation->maxsize = res->maxsize; 136 memcpy(delegation->stateid.data, res->delegation.data,
141 oldcred = delegation->cred; 137 sizeof(delegation->stateid.data));
142 delegation->cred = get_rpccred(cred); 138 delegation->type = res->delegation_type;
143 clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); 139 delegation->maxsize = res->maxsize;
144 NFS_I(inode)->delegation_state = delegation->type; 140 oldcred = delegation->cred;
145 smp_wmb(); 141 delegation->cred = get_rpccred(cred);
146 put_rpccred(oldcred); 142 clear_bit(NFS_DELEGATION_NEED_RECLAIM,
143 &delegation->flags);
144 NFS_I(inode)->delegation_state = delegation->type;
145 spin_unlock(&delegation->lock);
146 put_rpccred(oldcred);
147 rcu_read_unlock();
148 } else {
149 /* We appear to have raced with a delegation return. */
150 spin_unlock(&delegation->lock);
151 rcu_read_unlock();
152 nfs_inode_set_delegation(inode, cred, res);
153 }
154 } else {
155 rcu_read_unlock();
156 }
147} 157}
148 158
149static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) 159static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
@@ -166,9 +176,13 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
166 return inode; 176 return inode;
167} 177}
168 178
169static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid) 179static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi,
180 const nfs4_stateid *stateid,
181 struct nfs_client *clp)
170{ 182{
171 struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation); 183 struct nfs_delegation *delegation =
184 rcu_dereference_protected(nfsi->delegation,
185 lockdep_is_held(&clp->cl_lock));
172 186
173 if (delegation == NULL) 187 if (delegation == NULL)
174 goto nomatch; 188 goto nomatch;
@@ -195,11 +209,11 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
195{ 209{
196 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 210 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
197 struct nfs_inode *nfsi = NFS_I(inode); 211 struct nfs_inode *nfsi = NFS_I(inode);
198 struct nfs_delegation *delegation; 212 struct nfs_delegation *delegation, *old_delegation;
199 struct nfs_delegation *freeme = NULL; 213 struct nfs_delegation *freeme = NULL;
200 int status = 0; 214 int status = 0;
201 215
202 delegation = kmalloc(sizeof(*delegation), GFP_KERNEL); 216 delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
203 if (delegation == NULL) 217 if (delegation == NULL)
204 return -ENOMEM; 218 return -ENOMEM;
205 memcpy(delegation->stateid.data, res->delegation.data, 219 memcpy(delegation->stateid.data, res->delegation.data,
@@ -213,10 +227,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
213 spin_lock_init(&delegation->lock); 227 spin_lock_init(&delegation->lock);
214 228
215 spin_lock(&clp->cl_lock); 229 spin_lock(&clp->cl_lock);
216 if (rcu_dereference(nfsi->delegation) != NULL) { 230 old_delegation = rcu_dereference_protected(nfsi->delegation,
217 if (memcmp(&delegation->stateid, &nfsi->delegation->stateid, 231 lockdep_is_held(&clp->cl_lock));
218 sizeof(delegation->stateid)) == 0 && 232 if (old_delegation != NULL) {
219 delegation->type == nfsi->delegation->type) { 233 if (memcmp(&delegation->stateid, &old_delegation->stateid,
234 sizeof(old_delegation->stateid)) == 0 &&
235 delegation->type == old_delegation->type) {
220 goto out; 236 goto out;
221 } 237 }
222 /* 238 /*
@@ -226,12 +242,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
226 dfprintk(FILE, "%s: server %s handed out " 242 dfprintk(FILE, "%s: server %s handed out "
227 "a duplicate delegation!\n", 243 "a duplicate delegation!\n",
228 __func__, clp->cl_hostname); 244 __func__, clp->cl_hostname);
229 if (delegation->type <= nfsi->delegation->type) { 245 if (delegation->type <= old_delegation->type) {
230 freeme = delegation; 246 freeme = delegation;
231 delegation = NULL; 247 delegation = NULL;
232 goto out; 248 goto out;
233 } 249 }
234 freeme = nfs_detach_delegation_locked(nfsi, NULL); 250 freeme = nfs_detach_delegation_locked(nfsi, NULL, clp);
235 } 251 }
236 list_add_rcu(&delegation->super_list, &clp->cl_delegations); 252 list_add_rcu(&delegation->super_list, &clp->cl_delegations);
237 nfsi->delegation_state = delegation->type; 253 nfsi->delegation_state = delegation->type;
@@ -301,7 +317,7 @@ restart:
301 if (inode == NULL) 317 if (inode == NULL)
302 continue; 318 continue;
303 spin_lock(&clp->cl_lock); 319 spin_lock(&clp->cl_lock);
304 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); 320 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
305 spin_unlock(&clp->cl_lock); 321 spin_unlock(&clp->cl_lock);
306 rcu_read_unlock(); 322 rcu_read_unlock();
307 if (delegation != NULL) { 323 if (delegation != NULL) {
@@ -330,9 +346,9 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
330 struct nfs_inode *nfsi = NFS_I(inode); 346 struct nfs_inode *nfsi = NFS_I(inode);
331 struct nfs_delegation *delegation; 347 struct nfs_delegation *delegation;
332 348
333 if (rcu_dereference(nfsi->delegation) != NULL) { 349 if (rcu_access_pointer(nfsi->delegation) != NULL) {
334 spin_lock(&clp->cl_lock); 350 spin_lock(&clp->cl_lock);
335 delegation = nfs_detach_delegation_locked(nfsi, NULL); 351 delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
336 spin_unlock(&clp->cl_lock); 352 spin_unlock(&clp->cl_lock);
337 if (delegation != NULL) 353 if (delegation != NULL)
338 nfs_do_return_delegation(inode, delegation, 0); 354 nfs_do_return_delegation(inode, delegation, 0);
@@ -346,9 +362,9 @@ int nfs_inode_return_delegation(struct inode *inode)
346 struct nfs_delegation *delegation; 362 struct nfs_delegation *delegation;
347 int err = 0; 363 int err = 0;
348 364
349 if (rcu_dereference(nfsi->delegation) != NULL) { 365 if (rcu_access_pointer(nfsi->delegation) != NULL) {
350 spin_lock(&clp->cl_lock); 366 spin_lock(&clp->cl_lock);
351 delegation = nfs_detach_delegation_locked(nfsi, NULL); 367 delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
352 spin_unlock(&clp->cl_lock); 368 spin_unlock(&clp->cl_lock);
353 if (delegation != NULL) { 369 if (delegation != NULL) {
354 nfs_msync_inode(inode); 370 nfs_msync_inode(inode);
@@ -526,7 +542,7 @@ restart:
526 if (inode == NULL) 542 if (inode == NULL)
527 continue; 543 continue;
528 spin_lock(&clp->cl_lock); 544 spin_lock(&clp->cl_lock);
529 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); 545 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
530 spin_unlock(&clp->cl_lock); 546 spin_unlock(&clp->cl_lock);
531 rcu_read_unlock(); 547 rcu_read_unlock();
532 if (delegation != NULL) 548 if (delegation != NULL)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index db3ad849a289..ee9a179ebdf3 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -530,9 +530,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
530 nfs_readdir_descriptor_t my_desc, 530 nfs_readdir_descriptor_t my_desc,
531 *desc = &my_desc; 531 *desc = &my_desc;
532 struct nfs_entry my_entry; 532 struct nfs_entry my_entry;
533 struct nfs_fh fh; 533 int res = -ENOMEM;
534 struct nfs_fattr fattr;
535 long res;
536 534
537 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", 535 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
538 dentry->d_parent->d_name.name, dentry->d_name.name, 536 dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -554,9 +552,11 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
554 552
555 my_entry.cookie = my_entry.prev_cookie = 0; 553 my_entry.cookie = my_entry.prev_cookie = 0;
556 my_entry.eof = 0; 554 my_entry.eof = 0;
557 my_entry.fh = &fh; 555 my_entry.fh = nfs_alloc_fhandle();
558 my_entry.fattr = &fattr; 556 my_entry.fattr = nfs_alloc_fattr();
559 nfs_fattr_init(&fattr); 557 if (my_entry.fh == NULL || my_entry.fattr == NULL)
558 goto out_alloc_failed;
559
560 desc->entry = &my_entry; 560 desc->entry = &my_entry;
561 561
562 nfs_block_sillyrename(dentry); 562 nfs_block_sillyrename(dentry);
@@ -598,7 +598,10 @@ out:
598 nfs_unblock_sillyrename(dentry); 598 nfs_unblock_sillyrename(dentry);
599 if (res > 0) 599 if (res > 0)
600 res = 0; 600 res = 0;
601 dfprintk(FILE, "NFS: readdir(%s/%s) returns %ld\n", 601out_alloc_failed:
602 nfs_free_fattr(my_entry.fattr);
603 nfs_free_fhandle(my_entry.fh);
604 dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n",
602 dentry->d_parent->d_name.name, dentry->d_name.name, 605 dentry->d_parent->d_name.name, dentry->d_name.name,
603 res); 606 res);
604 return res; 607 return res;
@@ -776,9 +779,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
776 struct inode *dir; 779 struct inode *dir;
777 struct inode *inode; 780 struct inode *inode;
778 struct dentry *parent; 781 struct dentry *parent;
782 struct nfs_fh *fhandle = NULL;
783 struct nfs_fattr *fattr = NULL;
779 int error; 784 int error;
780 struct nfs_fh fhandle;
781 struct nfs_fattr fattr;
782 785
783 parent = dget_parent(dentry); 786 parent = dget_parent(dentry);
784 dir = parent->d_inode; 787 dir = parent->d_inode;
@@ -811,14 +814,22 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
811 if (NFS_STALE(inode)) 814 if (NFS_STALE(inode))
812 goto out_bad; 815 goto out_bad;
813 816
814 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); 817 error = -ENOMEM;
818 fhandle = nfs_alloc_fhandle();
819 fattr = nfs_alloc_fattr();
820 if (fhandle == NULL || fattr == NULL)
821 goto out_error;
822
823 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
815 if (error) 824 if (error)
816 goto out_bad; 825 goto out_bad;
817 if (nfs_compare_fh(NFS_FH(inode), &fhandle)) 826 if (nfs_compare_fh(NFS_FH(inode), fhandle))
818 goto out_bad; 827 goto out_bad;
819 if ((error = nfs_refresh_inode(inode, &fattr)) != 0) 828 if ((error = nfs_refresh_inode(inode, fattr)) != 0)
820 goto out_bad; 829 goto out_bad;
821 830
831 nfs_free_fattr(fattr);
832 nfs_free_fhandle(fhandle);
822out_set_verifier: 833out_set_verifier:
823 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 834 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
824 out_valid: 835 out_valid:
@@ -842,11 +853,21 @@ out_zap_parent:
842 shrink_dcache_parent(dentry); 853 shrink_dcache_parent(dentry);
843 } 854 }
844 d_drop(dentry); 855 d_drop(dentry);
856 nfs_free_fattr(fattr);
857 nfs_free_fhandle(fhandle);
845 dput(parent); 858 dput(parent);
846 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n", 859 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
847 __func__, dentry->d_parent->d_name.name, 860 __func__, dentry->d_parent->d_name.name,
848 dentry->d_name.name); 861 dentry->d_name.name);
849 return 0; 862 return 0;
863out_error:
864 nfs_free_fattr(fattr);
865 nfs_free_fhandle(fhandle);
866 dput(parent);
867 dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) lookup returned error %d\n",
868 __func__, dentry->d_parent->d_name.name,
869 dentry->d_name.name, error);
870 return error;
850} 871}
851 872
852/* 873/*
@@ -911,9 +932,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
911 struct dentry *res; 932 struct dentry *res;
912 struct dentry *parent; 933 struct dentry *parent;
913 struct inode *inode = NULL; 934 struct inode *inode = NULL;
935 struct nfs_fh *fhandle = NULL;
936 struct nfs_fattr *fattr = NULL;
914 int error; 937 int error;
915 struct nfs_fh fhandle;
916 struct nfs_fattr fattr;
917 938
918 dfprintk(VFS, "NFS: lookup(%s/%s)\n", 939 dfprintk(VFS, "NFS: lookup(%s/%s)\n",
919 dentry->d_parent->d_name.name, dentry->d_name.name); 940 dentry->d_parent->d_name.name, dentry->d_name.name);
@@ -923,7 +944,6 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
923 if (dentry->d_name.len > NFS_SERVER(dir)->namelen) 944 if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
924 goto out; 945 goto out;
925 946
926 res = ERR_PTR(-ENOMEM);
927 dentry->d_op = NFS_PROTO(dir)->dentry_ops; 947 dentry->d_op = NFS_PROTO(dir)->dentry_ops;
928 948
929 /* 949 /*
@@ -936,17 +956,23 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
936 goto out; 956 goto out;
937 } 957 }
938 958
959 res = ERR_PTR(-ENOMEM);
960 fhandle = nfs_alloc_fhandle();
961 fattr = nfs_alloc_fattr();
962 if (fhandle == NULL || fattr == NULL)
963 goto out;
964
939 parent = dentry->d_parent; 965 parent = dentry->d_parent;
940 /* Protect against concurrent sillydeletes */ 966 /* Protect against concurrent sillydeletes */
941 nfs_block_sillyrename(parent); 967 nfs_block_sillyrename(parent);
942 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); 968 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
943 if (error == -ENOENT) 969 if (error == -ENOENT)
944 goto no_entry; 970 goto no_entry;
945 if (error < 0) { 971 if (error < 0) {
946 res = ERR_PTR(error); 972 res = ERR_PTR(error);
947 goto out_unblock_sillyrename; 973 goto out_unblock_sillyrename;
948 } 974 }
949 inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); 975 inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
950 res = (struct dentry *)inode; 976 res = (struct dentry *)inode;
951 if (IS_ERR(res)) 977 if (IS_ERR(res))
952 goto out_unblock_sillyrename; 978 goto out_unblock_sillyrename;
@@ -962,6 +988,8 @@ no_entry:
962out_unblock_sillyrename: 988out_unblock_sillyrename:
963 nfs_unblock_sillyrename(parent); 989 nfs_unblock_sillyrename(parent);
964out: 990out:
991 nfs_free_fattr(fattr);
992 nfs_free_fhandle(fhandle);
965 return res; 993 return res;
966} 994}
967 995
@@ -1052,7 +1080,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1052 struct inode *dir; 1080 struct inode *dir;
1053 int openflags, ret = 0; 1081 int openflags, ret = 0;
1054 1082
1055 if (!is_atomic_open(nd)) 1083 if (!is_atomic_open(nd) || d_mountpoint(dentry))
1056 goto no_open; 1084 goto no_open;
1057 parent = dget_parent(dentry); 1085 parent = dget_parent(dentry);
1058 dir = parent->d_inode; 1086 dir = parent->d_inode;
@@ -1669,28 +1697,33 @@ static void nfs_access_free_entry(struct nfs_access_entry *entry)
1669 smp_mb__after_atomic_dec(); 1697 smp_mb__after_atomic_dec();
1670} 1698}
1671 1699
1700static void nfs_access_free_list(struct list_head *head)
1701{
1702 struct nfs_access_entry *cache;
1703
1704 while (!list_empty(head)) {
1705 cache = list_entry(head->next, struct nfs_access_entry, lru);
1706 list_del(&cache->lru);
1707 nfs_access_free_entry(cache);
1708 }
1709}
1710
1672int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) 1711int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
1673{ 1712{
1674 LIST_HEAD(head); 1713 LIST_HEAD(head);
1675 struct nfs_inode *nfsi; 1714 struct nfs_inode *nfsi;
1676 struct nfs_access_entry *cache; 1715 struct nfs_access_entry *cache;
1677 1716
1678restart: 1717 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
1718 return (nr_to_scan == 0) ? 0 : -1;
1719
1679 spin_lock(&nfs_access_lru_lock); 1720 spin_lock(&nfs_access_lru_lock);
1680 list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { 1721 list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
1681 struct rw_semaphore *s_umount;
1682 struct inode *inode; 1722 struct inode *inode;
1683 1723
1684 if (nr_to_scan-- == 0) 1724 if (nr_to_scan-- == 0)
1685 break; 1725 break;
1686 s_umount = &nfsi->vfs_inode.i_sb->s_umount; 1726 inode = &nfsi->vfs_inode;
1687 if (!down_read_trylock(s_umount))
1688 continue;
1689 inode = igrab(&nfsi->vfs_inode);
1690 if (inode == NULL) {
1691 up_read(s_umount);
1692 continue;
1693 }
1694 spin_lock(&inode->i_lock); 1727 spin_lock(&inode->i_lock);
1695 if (list_empty(&nfsi->access_cache_entry_lru)) 1728 if (list_empty(&nfsi->access_cache_entry_lru))
1696 goto remove_lru_entry; 1729 goto remove_lru_entry;
@@ -1704,61 +1737,47 @@ restart:
1704 else { 1737 else {
1705remove_lru_entry: 1738remove_lru_entry:
1706 list_del_init(&nfsi->access_cache_inode_lru); 1739 list_del_init(&nfsi->access_cache_inode_lru);
1740 smp_mb__before_clear_bit();
1707 clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); 1741 clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
1742 smp_mb__after_clear_bit();
1708 } 1743 }
1709 spin_unlock(&inode->i_lock);
1710 spin_unlock(&nfs_access_lru_lock);
1711 iput(inode);
1712 up_read(s_umount);
1713 goto restart;
1714 } 1744 }
1715 spin_unlock(&nfs_access_lru_lock); 1745 spin_unlock(&nfs_access_lru_lock);
1716 while (!list_empty(&head)) { 1746 nfs_access_free_list(&head);
1717 cache = list_entry(head.next, struct nfs_access_entry, lru);
1718 list_del(&cache->lru);
1719 nfs_access_free_entry(cache);
1720 }
1721 return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure; 1747 return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure;
1722} 1748}
1723 1749
1724static void __nfs_access_zap_cache(struct inode *inode) 1750static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
1725{ 1751{
1726 struct nfs_inode *nfsi = NFS_I(inode);
1727 struct rb_root *root_node = &nfsi->access_cache; 1752 struct rb_root *root_node = &nfsi->access_cache;
1728 struct rb_node *n, *dispose = NULL; 1753 struct rb_node *n;
1729 struct nfs_access_entry *entry; 1754 struct nfs_access_entry *entry;
1730 1755
1731 /* Unhook entries from the cache */ 1756 /* Unhook entries from the cache */
1732 while ((n = rb_first(root_node)) != NULL) { 1757 while ((n = rb_first(root_node)) != NULL) {
1733 entry = rb_entry(n, struct nfs_access_entry, rb_node); 1758 entry = rb_entry(n, struct nfs_access_entry, rb_node);
1734 rb_erase(n, root_node); 1759 rb_erase(n, root_node);
1735 list_del(&entry->lru); 1760 list_move(&entry->lru, head);
1736 n->rb_left = dispose;
1737 dispose = n;
1738 } 1761 }
1739 nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; 1762 nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
1740 spin_unlock(&inode->i_lock);
1741
1742 /* Now kill them all! */
1743 while (dispose != NULL) {
1744 n = dispose;
1745 dispose = n->rb_left;
1746 nfs_access_free_entry(rb_entry(n, struct nfs_access_entry, rb_node));
1747 }
1748} 1763}
1749 1764
1750void nfs_access_zap_cache(struct inode *inode) 1765void nfs_access_zap_cache(struct inode *inode)
1751{ 1766{
1767 LIST_HEAD(head);
1768
1769 if (test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags) == 0)
1770 return;
1752 /* Remove from global LRU init */ 1771 /* Remove from global LRU init */
1753 if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) { 1772 spin_lock(&nfs_access_lru_lock);
1754 spin_lock(&nfs_access_lru_lock); 1773 if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
1755 list_del_init(&NFS_I(inode)->access_cache_inode_lru); 1774 list_del_init(&NFS_I(inode)->access_cache_inode_lru);
1756 spin_unlock(&nfs_access_lru_lock);
1757 }
1758 1775
1759 spin_lock(&inode->i_lock); 1776 spin_lock(&inode->i_lock);
1760 /* This will release the spinlock */ 1777 __nfs_access_zap_cache(NFS_I(inode), &head);
1761 __nfs_access_zap_cache(inode); 1778 spin_unlock(&inode->i_lock);
1779 spin_unlock(&nfs_access_lru_lock);
1780 nfs_access_free_list(&head);
1762} 1781}
1763 1782
1764static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred) 1783static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred)
@@ -1809,8 +1828,8 @@ out_stale:
1809 nfs_access_free_entry(cache); 1828 nfs_access_free_entry(cache);
1810 return -ENOENT; 1829 return -ENOENT;
1811out_zap: 1830out_zap:
1812 /* This will release the spinlock */ 1831 spin_unlock(&inode->i_lock);
1813 __nfs_access_zap_cache(inode); 1832 nfs_access_zap_cache(inode);
1814 return -ENOENT; 1833 return -ENOENT;
1815} 1834}
1816 1835
@@ -1865,9 +1884,11 @@ static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *s
1865 smp_mb__after_atomic_inc(); 1884 smp_mb__after_atomic_inc();
1866 1885
1867 /* Add inode to global LRU list */ 1886 /* Add inode to global LRU list */
1868 if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) { 1887 if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
1869 spin_lock(&nfs_access_lru_lock); 1888 spin_lock(&nfs_access_lru_lock);
1870 list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list); 1889 if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
1890 list_add_tail(&NFS_I(inode)->access_cache_inode_lru,
1891 &nfs_access_lru_list);
1871 spin_unlock(&nfs_access_lru_lock); 1892 spin_unlock(&nfs_access_lru_lock);
1872 } 1893 }
1873} 1894}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 8d965bddb87e..cac96bcc91e4 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -161,14 +161,17 @@ static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
161 struct nfs_server *server = NFS_SERVER(inode); 161 struct nfs_server *server = NFS_SERVER(inode);
162 struct nfs_inode *nfsi = NFS_I(inode); 162 struct nfs_inode *nfsi = NFS_I(inode);
163 163
164 if (server->flags & NFS_MOUNT_NOAC) 164 if (nfs_have_delegated_attributes(inode))
165 goto force_reval; 165 goto out_noreval;
166
166 if (filp->f_flags & O_DIRECT) 167 if (filp->f_flags & O_DIRECT)
167 goto force_reval; 168 goto force_reval;
168 if (nfsi->npages != 0) 169 if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
169 return 0; 170 goto force_reval;
170 if (!(nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode)) 171 if (nfs_attribute_timeout(inode))
171 return 0; 172 goto force_reval;
173out_noreval:
174 return 0;
172force_reval: 175force_reval:
173 return __nfs_revalidate_inode(server, inode); 176 return __nfs_revalidate_inode(server, inode);
174} 177}
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index a6b16ed93229..ce153a6b3aec 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -467,7 +467,8 @@ int __nfs_readpages_from_fscache(struct nfs_open_context *ctx,
467 struct list_head *pages, 467 struct list_head *pages,
468 unsigned *nr_pages) 468 unsigned *nr_pages)
469{ 469{
470 int ret, npages = *nr_pages; 470 unsigned npages = *nr_pages;
471 int ret;
471 472
472 dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n", 473 dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n",
473 NFS_I(inode)->fscache, npages, inode); 474 NFS_I(inode)->fscache, npages, inode);
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index b35d2a616066..7428f7d6273b 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -78,159 +78,94 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
78{ 78{
79 struct nfs_server *server = NFS_SB(sb); 79 struct nfs_server *server = NFS_SB(sb);
80 struct nfs_fsinfo fsinfo; 80 struct nfs_fsinfo fsinfo;
81 struct nfs_fattr fattr; 81 struct dentry *ret;
82 struct dentry *mntroot;
83 struct inode *inode; 82 struct inode *inode;
84 int error; 83 int error;
85 84
86 /* get the actual root for this mount */ 85 /* get the actual root for this mount */
87 fsinfo.fattr = &fattr; 86 fsinfo.fattr = nfs_alloc_fattr();
87 if (fsinfo.fattr == NULL)
88 return ERR_PTR(-ENOMEM);
88 89
89 error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); 90 error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
90 if (error < 0) { 91 if (error < 0) {
91 dprintk("nfs_get_root: getattr error = %d\n", -error); 92 dprintk("nfs_get_root: getattr error = %d\n", -error);
92 return ERR_PTR(error); 93 ret = ERR_PTR(error);
94 goto out;
93 } 95 }
94 96
95 inode = nfs_fhget(sb, mntfh, fsinfo.fattr); 97 inode = nfs_fhget(sb, mntfh, fsinfo.fattr);
96 if (IS_ERR(inode)) { 98 if (IS_ERR(inode)) {
97 dprintk("nfs_get_root: get root inode failed\n"); 99 dprintk("nfs_get_root: get root inode failed\n");
98 return ERR_CAST(inode); 100 ret = ERR_CAST(inode);
101 goto out;
99 } 102 }
100 103
101 error = nfs_superblock_set_dummy_root(sb, inode); 104 error = nfs_superblock_set_dummy_root(sb, inode);
102 if (error != 0) 105 if (error != 0) {
103 return ERR_PTR(error); 106 ret = ERR_PTR(error);
107 goto out;
108 }
104 109
105 /* root dentries normally start off anonymous and get spliced in later 110 /* root dentries normally start off anonymous and get spliced in later
106 * if the dentry tree reaches them; however if the dentry already 111 * if the dentry tree reaches them; however if the dentry already
107 * exists, we'll pick it up at this point and use it as the root 112 * exists, we'll pick it up at this point and use it as the root
108 */ 113 */
109 mntroot = d_obtain_alias(inode); 114 ret = d_obtain_alias(inode);
110 if (IS_ERR(mntroot)) { 115 if (IS_ERR(ret)) {
111 dprintk("nfs_get_root: get root dentry failed\n"); 116 dprintk("nfs_get_root: get root dentry failed\n");
112 return mntroot; 117 goto out;
113 } 118 }
114 119
115 security_d_instantiate(mntroot, inode); 120 security_d_instantiate(ret, inode);
116
117 if (!mntroot->d_op)
118 mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
119 121
120 return mntroot; 122 if (ret->d_op == NULL)
123 ret->d_op = server->nfs_client->rpc_ops->dentry_ops;
124out:
125 nfs_free_fattr(fsinfo.fattr);
126 return ret;
121} 127}
122 128
123#ifdef CONFIG_NFS_V4 129#ifdef CONFIG_NFS_V4
124 130
125/* 131int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
126 * Do a simple pathwalk from the root FH of the server to the nominated target
127 * of the mountpoint
128 * - give error on symlinks
129 * - give error on ".." occurring in the path
130 * - follow traversals
131 */
132int nfs4_path_walk(struct nfs_server *server,
133 struct nfs_fh *mntfh,
134 const char *path)
135{ 132{
136 struct nfs_fsinfo fsinfo; 133 struct nfs_fsinfo fsinfo;
137 struct nfs_fattr fattr; 134 int ret = -ENOMEM;
138 struct nfs_fh lastfh;
139 struct qstr name;
140 int ret;
141 135
142 dprintk("--> nfs4_path_walk(,,%s)\n", path); 136 dprintk("--> nfs4_get_rootfh()\n");
143 137
144 fsinfo.fattr = &fattr; 138 fsinfo.fattr = nfs_alloc_fattr();
145 nfs_fattr_init(&fattr); 139 if (fsinfo.fattr == NULL)
146 140 goto out;
147 /* Eat leading slashes */
148 while (*path == '/')
149 path++;
150 141
151 /* Start by getting the root filehandle from the server */ 142 /* Start by getting the root filehandle from the server */
152 ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); 143 ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
153 if (ret < 0) { 144 if (ret < 0) {
154 dprintk("nfs4_get_root: getroot error = %d\n", -ret); 145 dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret);
155 return ret; 146 goto out;
156 } 147 }
157 148
158 if (!S_ISDIR(fattr.mode)) { 149 if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_MODE)
159 printk(KERN_ERR "nfs4_get_root:" 150 || !S_ISDIR(fsinfo.fattr->mode)) {
151 printk(KERN_ERR "nfs4_get_rootfh:"
160 " getroot encountered non-directory\n"); 152 " getroot encountered non-directory\n");
161 return -ENOTDIR; 153 ret = -ENOTDIR;
154 goto out;
162 } 155 }
163 156
164 /* FIXME: It is quite valid for the server to return a referral here */ 157 if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
165 if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) { 158 printk(KERN_ERR "nfs4_get_rootfh:"
166 printk(KERN_ERR "nfs4_get_root:"
167 " getroot obtained referral\n"); 159 " getroot obtained referral\n");
168 return -EREMOTE; 160 ret = -EREMOTE;
169 } 161 goto out;
170
171next_component:
172 dprintk("Next: %s\n", path);
173
174 /* extract the next bit of the path */
175 if (!*path)
176 goto path_walk_complete;
177
178 name.name = path;
179 while (*path && *path != '/')
180 path++;
181 name.len = path - (const char *) name.name;
182
183 if (name.len > NFS4_MAXNAMLEN)
184 return -ENAMETOOLONG;
185
186eat_dot_dir:
187 while (*path == '/')
188 path++;
189
190 if (path[0] == '.' && (path[1] == '/' || !path[1])) {
191 path += 2;
192 goto eat_dot_dir;
193 }
194
195 /* FIXME: Why shouldn't the user be able to use ".." in the path? */
196 if (path[0] == '.' && path[1] == '.' && (path[2] == '/' || !path[2])
197 ) {
198 printk(KERN_ERR "nfs4_get_root:"
199 " Mount path contains reference to \"..\"\n");
200 return -EINVAL;
201 } 162 }
202 163
203 /* lookup the next FH in the sequence */ 164 memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid));
204 memcpy(&lastfh, mntfh, sizeof(lastfh)); 165out:
205 166 nfs_free_fattr(fsinfo.fattr);
206 dprintk("LookupFH: %*.*s [%s]\n", name.len, name.len, name.name, path); 167 dprintk("<-- nfs4_get_rootfh() = %d\n", ret);
207 168 return ret;
208 ret = server->nfs_client->rpc_ops->lookupfh(server, &lastfh, &name,
209 mntfh, &fattr);
210 if (ret < 0) {
211 dprintk("nfs4_get_root: getroot error = %d\n", -ret);
212 return ret;
213 }
214
215 if (!S_ISDIR(fattr.mode)) {
216 printk(KERN_ERR "nfs4_get_root:"
217 " lookupfh encountered non-directory\n");
218 return -ENOTDIR;
219 }
220
221 /* FIXME: Referrals are quite valid here too */
222 if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
223 printk(KERN_ERR "nfs4_get_root:"
224 " lookupfh obtained referral\n");
225 return -EREMOTE;
226 }
227
228 goto next_component;
229
230path_walk_complete:
231 memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
232 dprintk("<-- nfs4_path_walk() = 0\n");
233 return 0;
234} 169}
235 170
236/* 171/*
@@ -239,8 +174,8 @@ path_walk_complete:
239struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) 174struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
240{ 175{
241 struct nfs_server *server = NFS_SB(sb); 176 struct nfs_server *server = NFS_SB(sb);
242 struct nfs_fattr fattr; 177 struct nfs_fattr *fattr = NULL;
243 struct dentry *mntroot; 178 struct dentry *ret;
244 struct inode *inode; 179 struct inode *inode;
245 int error; 180 int error;
246 181
@@ -254,40 +189,50 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
254 return ERR_PTR(error); 189 return ERR_PTR(error);
255 } 190 }
256 191
192 fattr = nfs_alloc_fattr();
193 if (fattr == NULL)
194 return ERR_PTR(-ENOMEM);;
195
257 /* get the actual root for this mount */ 196 /* get the actual root for this mount */
258 error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr); 197 error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
259 if (error < 0) { 198 if (error < 0) {
260 dprintk("nfs_get_root: getattr error = %d\n", -error); 199 dprintk("nfs_get_root: getattr error = %d\n", -error);
261 return ERR_PTR(error); 200 ret = ERR_PTR(error);
201 goto out;
262 } 202 }
263 203
264 inode = nfs_fhget(sb, mntfh, &fattr); 204 inode = nfs_fhget(sb, mntfh, fattr);
265 if (IS_ERR(inode)) { 205 if (IS_ERR(inode)) {
266 dprintk("nfs_get_root: get root inode failed\n"); 206 dprintk("nfs_get_root: get root inode failed\n");
267 return ERR_CAST(inode); 207 ret = ERR_CAST(inode);
208 goto out;
268 } 209 }
269 210
270 error = nfs_superblock_set_dummy_root(sb, inode); 211 error = nfs_superblock_set_dummy_root(sb, inode);
271 if (error != 0) 212 if (error != 0) {
272 return ERR_PTR(error); 213 ret = ERR_PTR(error);
214 goto out;
215 }
273 216
274 /* root dentries normally start off anonymous and get spliced in later 217 /* root dentries normally start off anonymous and get spliced in later
275 * if the dentry tree reaches them; however if the dentry already 218 * if the dentry tree reaches them; however if the dentry already
276 * exists, we'll pick it up at this point and use it as the root 219 * exists, we'll pick it up at this point and use it as the root
277 */ 220 */
278 mntroot = d_obtain_alias(inode); 221 ret = d_obtain_alias(inode);
279 if (IS_ERR(mntroot)) { 222 if (IS_ERR(ret)) {
280 dprintk("nfs_get_root: get root dentry failed\n"); 223 dprintk("nfs_get_root: get root dentry failed\n");
281 return mntroot; 224 goto out;
282 } 225 }
283 226
284 security_d_instantiate(mntroot, inode); 227 security_d_instantiate(ret, inode);
285 228
286 if (!mntroot->d_op) 229 if (ret->d_op == NULL)
287 mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops; 230 ret->d_op = server->nfs_client->rpc_ops->dentry_ops;
288 231
232out:
233 nfs_free_fattr(fattr);
289 dprintk("<-- nfs4_get_root()\n"); 234 dprintk("<-- nfs4_get_root()\n");
290 return mntroot; 235 return ret;
291} 236}
292 237
293#endif /* CONFIG_NFS_V4 */ 238#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 50a56edca0b5..099b3518feea 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -393,8 +393,8 @@ int
393nfs_setattr(struct dentry *dentry, struct iattr *attr) 393nfs_setattr(struct dentry *dentry, struct iattr *attr)
394{ 394{
395 struct inode *inode = dentry->d_inode; 395 struct inode *inode = dentry->d_inode;
396 struct nfs_fattr fattr; 396 struct nfs_fattr *fattr;
397 int error; 397 int error = -ENOMEM;
398 398
399 nfs_inc_stats(inode, NFSIOS_VFSSETATTR); 399 nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
400 400
@@ -417,14 +417,20 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
417 filemap_write_and_wait(inode->i_mapping); 417 filemap_write_and_wait(inode->i_mapping);
418 nfs_wb_all(inode); 418 nfs_wb_all(inode);
419 } 419 }
420
421 fattr = nfs_alloc_fattr();
422 if (fattr == NULL)
423 goto out;
420 /* 424 /*
421 * Return any delegations if we're going to change ACLs 425 * Return any delegations if we're going to change ACLs
422 */ 426 */
423 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) 427 if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
424 nfs_inode_return_delegation(inode); 428 nfs_inode_return_delegation(inode);
425 error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); 429 error = NFS_PROTO(inode)->setattr(dentry, fattr, attr);
426 if (error == 0) 430 if (error == 0)
427 nfs_refresh_inode(inode, &fattr); 431 nfs_refresh_inode(inode, fattr);
432 nfs_free_fattr(fattr);
433out:
428 return error; 434 return error;
429} 435}
430 436
@@ -682,7 +688,7 @@ int
682__nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) 688__nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
683{ 689{
684 int status = -ESTALE; 690 int status = -ESTALE;
685 struct nfs_fattr fattr; 691 struct nfs_fattr *fattr = NULL;
686 struct nfs_inode *nfsi = NFS_I(inode); 692 struct nfs_inode *nfsi = NFS_I(inode);
687 693
688 dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", 694 dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
@@ -693,8 +699,13 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
693 if (NFS_STALE(inode)) 699 if (NFS_STALE(inode))
694 goto out; 700 goto out;
695 701
702 status = -ENOMEM;
703 fattr = nfs_alloc_fattr();
704 if (fattr == NULL)
705 goto out;
706
696 nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); 707 nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
697 status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); 708 status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr);
698 if (status != 0) { 709 if (status != 0) {
699 dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", 710 dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
700 inode->i_sb->s_id, 711 inode->i_sb->s_id,
@@ -707,7 +718,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
707 goto out; 718 goto out;
708 } 719 }
709 720
710 status = nfs_refresh_inode(inode, &fattr); 721 status = nfs_refresh_inode(inode, fattr);
711 if (status) { 722 if (status) {
712 dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", 723 dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n",
713 inode->i_sb->s_id, 724 inode->i_sb->s_id,
@@ -723,6 +734,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
723 (long long)NFS_FILEID(inode)); 734 (long long)NFS_FILEID(inode));
724 735
725 out: 736 out:
737 nfs_free_fattr(fattr);
726 return status; 738 return status;
727} 739}
728 740
@@ -730,9 +742,14 @@ int nfs_attribute_timeout(struct inode *inode)
730{ 742{
731 struct nfs_inode *nfsi = NFS_I(inode); 743 struct nfs_inode *nfsi = NFS_I(inode);
732 744
745 return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
746}
747
748static int nfs_attribute_cache_expired(struct inode *inode)
749{
733 if (nfs_have_delegated_attributes(inode)) 750 if (nfs_have_delegated_attributes(inode))
734 return 0; 751 return 0;
735 return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); 752 return nfs_attribute_timeout(inode);
736} 753}
737 754
738/** 755/**
@@ -745,7 +762,7 @@ int nfs_attribute_timeout(struct inode *inode)
745int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) 762int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
746{ 763{
747 if (!(NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATTR) 764 if (!(NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATTR)
748 && !nfs_attribute_timeout(inode)) 765 && !nfs_attribute_cache_expired(inode))
749 return NFS_STALE(inode) ? -ESTALE : 0; 766 return NFS_STALE(inode) ? -ESTALE : 0;
750 return __nfs_revalidate_inode(server, inode); 767 return __nfs_revalidate_inode(server, inode);
751} 768}
@@ -782,7 +799,8 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
782 int ret = 0; 799 int ret = 0;
783 800
784 if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) 801 if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
785 || nfs_attribute_timeout(inode) || NFS_STALE(inode)) { 802 || nfs_attribute_cache_expired(inode)
803 || NFS_STALE(inode)) {
786 ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); 804 ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
787 if (ret < 0) 805 if (ret < 0)
788 goto out; 806 goto out;
@@ -916,6 +934,26 @@ void nfs_fattr_init(struct nfs_fattr *fattr)
916 fattr->gencount = nfs_inc_attr_generation_counter(); 934 fattr->gencount = nfs_inc_attr_generation_counter();
917} 935}
918 936
937struct nfs_fattr *nfs_alloc_fattr(void)
938{
939 struct nfs_fattr *fattr;
940
941 fattr = kmalloc(sizeof(*fattr), GFP_NOFS);
942 if (fattr != NULL)
943 nfs_fattr_init(fattr);
944 return fattr;
945}
946
947struct nfs_fh *nfs_alloc_fhandle(void)
948{
949 struct nfs_fh *fh;
950
951 fh = kmalloc(sizeof(struct nfs_fh), GFP_NOFS);
952 if (fh != NULL)
953 fh->size = 0;
954 return fh;
955}
956
919/** 957/**
920 * nfs_inode_attrs_need_update - check if the inode attributes need updating 958 * nfs_inode_attrs_need_update - check if the inode attributes need updating
921 * @inode - pointer to inode 959 * @inode - pointer to inode
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 11f82f03c5de..d8bd619e386c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -244,9 +244,7 @@ extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *);
244#ifdef CONFIG_NFS_V4 244#ifdef CONFIG_NFS_V4
245extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *); 245extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *);
246 246
247extern int nfs4_path_walk(struct nfs_server *server, 247extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
248 struct nfs_fh *mntfh,
249 const char *path);
250#endif 248#endif
251 249
252/* read.c */ 250/* read.c */
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index 1d8d5c813b01..c5832487c456 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -36,14 +36,14 @@ static inline void nfs_inc_stats(const struct inode *inode,
36 36
37static inline void nfs_add_server_stats(const struct nfs_server *server, 37static inline void nfs_add_server_stats(const struct nfs_server *server,
38 enum nfs_stat_bytecounters stat, 38 enum nfs_stat_bytecounters stat,
39 unsigned long addend) 39 long addend)
40{ 40{
41 this_cpu_add(server->io_stats->bytes[stat], addend); 41 this_cpu_add(server->io_stats->bytes[stat], addend);
42} 42}
43 43
44static inline void nfs_add_stats(const struct inode *inode, 44static inline void nfs_add_stats(const struct inode *inode,
45 enum nfs_stat_bytecounters stat, 45 enum nfs_stat_bytecounters stat,
46 unsigned long addend) 46 long addend)
47{ 47{
48 nfs_add_server_stats(NFS_SERVER(inode), stat, addend); 48 nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
49} 49}
@@ -51,7 +51,7 @@ static inline void nfs_add_stats(const struct inode *inode,
51#ifdef CONFIG_NFS_FSCACHE 51#ifdef CONFIG_NFS_FSCACHE
52static inline void nfs_add_fscache_stats(struct inode *inode, 52static inline void nfs_add_fscache_stats(struct inode *inode,
53 enum nfs_stat_fscachecounters stat, 53 enum nfs_stat_fscachecounters stat,
54 unsigned long addend) 54 long addend)
55{ 55{
56 this_cpu_add(NFS_SERVER(inode)->io_stats->fscache[stat], addend); 56 this_cpu_add(NFS_SERVER(inode)->io_stats->fscache[stat], addend);
57} 57}
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 7888cf36022d..db6aa3673cf3 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -105,8 +105,8 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
105 struct vfsmount *mnt; 105 struct vfsmount *mnt;
106 struct nfs_server *server = NFS_SERVER(dentry->d_inode); 106 struct nfs_server *server = NFS_SERVER(dentry->d_inode);
107 struct dentry *parent; 107 struct dentry *parent;
108 struct nfs_fh fh; 108 struct nfs_fh *fh = NULL;
109 struct nfs_fattr fattr; 109 struct nfs_fattr *fattr = NULL;
110 int err; 110 int err;
111 111
112 dprintk("--> nfs_follow_mountpoint()\n"); 112 dprintk("--> nfs_follow_mountpoint()\n");
@@ -115,6 +115,12 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
115 if (IS_ROOT(dentry)) 115 if (IS_ROOT(dentry))
116 goto out_err; 116 goto out_err;
117 117
118 err = -ENOMEM;
119 fh = nfs_alloc_fhandle();
120 fattr = nfs_alloc_fattr();
121 if (fh == NULL || fattr == NULL)
122 goto out_err;
123
118 dprintk("%s: enter\n", __func__); 124 dprintk("%s: enter\n", __func__);
119 dput(nd->path.dentry); 125 dput(nd->path.dentry);
120 nd->path.dentry = dget(dentry); 126 nd->path.dentry = dget(dentry);
@@ -123,16 +129,16 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
123 parent = dget_parent(nd->path.dentry); 129 parent = dget_parent(nd->path.dentry);
124 err = server->nfs_client->rpc_ops->lookup(parent->d_inode, 130 err = server->nfs_client->rpc_ops->lookup(parent->d_inode,
125 &nd->path.dentry->d_name, 131 &nd->path.dentry->d_name,
126 &fh, &fattr); 132 fh, fattr);
127 dput(parent); 133 dput(parent);
128 if (err != 0) 134 if (err != 0)
129 goto out_err; 135 goto out_err;
130 136
131 if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) 137 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
132 mnt = nfs_do_refmount(nd->path.mnt, nd->path.dentry); 138 mnt = nfs_do_refmount(nd->path.mnt, nd->path.dentry);
133 else 139 else
134 mnt = nfs_do_submount(nd->path.mnt, nd->path.dentry, &fh, 140 mnt = nfs_do_submount(nd->path.mnt, nd->path.dentry, fh,
135 &fattr); 141 fattr);
136 err = PTR_ERR(mnt); 142 err = PTR_ERR(mnt);
137 if (IS_ERR(mnt)) 143 if (IS_ERR(mnt))
138 goto out_err; 144 goto out_err;
@@ -151,6 +157,8 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
151 nd->path.dentry = dget(mnt->mnt_root); 157 nd->path.dentry = dget(mnt->mnt_root);
152 schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); 158 schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
153out: 159out:
160 nfs_free_fattr(fattr);
161 nfs_free_fhandle(fh);
154 dprintk("%s: done, returned %d\n", __func__, err); 162 dprintk("%s: done, returned %d\n", __func__, err);
155 163
156 dprintk("<-- nfs_follow_mountpoint() = %d\n", err); 164 dprintk("<-- nfs_follow_mountpoint() = %d\n", err);
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index d150ae0c5ecd..9f88c5f4c7e2 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -185,7 +185,6 @@ static void nfs3_cache_acls(struct inode *inode, struct posix_acl *acl,
185struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) 185struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
186{ 186{
187 struct nfs_server *server = NFS_SERVER(inode); 187 struct nfs_server *server = NFS_SERVER(inode);
188 struct nfs_fattr fattr;
189 struct page *pages[NFSACL_MAXPAGES] = { }; 188 struct page *pages[NFSACL_MAXPAGES] = { };
190 struct nfs3_getaclargs args = { 189 struct nfs3_getaclargs args = {
191 .fh = NFS_FH(inode), 190 .fh = NFS_FH(inode),
@@ -193,7 +192,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
193 .pages = pages, 192 .pages = pages,
194 }; 193 };
195 struct nfs3_getaclres res = { 194 struct nfs3_getaclres res = {
196 .fattr = &fattr, 195 0
197 }; 196 };
198 struct rpc_message msg = { 197 struct rpc_message msg = {
199 .rpc_argp = &args, 198 .rpc_argp = &args,
@@ -228,7 +227,10 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
228 227
229 dprintk("NFS call getacl\n"); 228 dprintk("NFS call getacl\n");
230 msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL]; 229 msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL];
231 nfs_fattr_init(&fattr); 230 res.fattr = nfs_alloc_fattr();
231 if (res.fattr == NULL)
232 return ERR_PTR(-ENOMEM);
233
232 status = rpc_call_sync(server->client_acl, &msg, 0); 234 status = rpc_call_sync(server->client_acl, &msg, 0);
233 dprintk("NFS reply getacl: %d\n", status); 235 dprintk("NFS reply getacl: %d\n", status);
234 236
@@ -238,7 +240,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
238 240
239 switch (status) { 241 switch (status) {
240 case 0: 242 case 0:
241 status = nfs_refresh_inode(inode, &fattr); 243 status = nfs_refresh_inode(inode, res.fattr);
242 break; 244 break;
243 case -EPFNOSUPPORT: 245 case -EPFNOSUPPORT:
244 case -EPROTONOSUPPORT: 246 case -EPROTONOSUPPORT:
@@ -278,6 +280,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
278getout: 280getout:
279 posix_acl_release(res.acl_access); 281 posix_acl_release(res.acl_access);
280 posix_acl_release(res.acl_default); 282 posix_acl_release(res.acl_default);
283 nfs_free_fattr(res.fattr);
281 284
282 if (status != 0) { 285 if (status != 0) {
283 posix_acl_release(acl); 286 posix_acl_release(acl);
@@ -290,7 +293,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
290 struct posix_acl *dfacl) 293 struct posix_acl *dfacl)
291{ 294{
292 struct nfs_server *server = NFS_SERVER(inode); 295 struct nfs_server *server = NFS_SERVER(inode);
293 struct nfs_fattr fattr; 296 struct nfs_fattr *fattr;
294 struct page *pages[NFSACL_MAXPAGES]; 297 struct page *pages[NFSACL_MAXPAGES];
295 struct nfs3_setaclargs args = { 298 struct nfs3_setaclargs args = {
296 .inode = inode, 299 .inode = inode,
@@ -335,8 +338,13 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
335 } 338 }
336 339
337 dprintk("NFS call setacl\n"); 340 dprintk("NFS call setacl\n");
341 status = -ENOMEM;
342 fattr = nfs_alloc_fattr();
343 if (fattr == NULL)
344 goto out_freepages;
345
338 msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; 346 msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
339 nfs_fattr_init(&fattr); 347 msg.rpc_resp = fattr;
340 status = rpc_call_sync(server->client_acl, &msg, 0); 348 status = rpc_call_sync(server->client_acl, &msg, 0);
341 nfs_access_zap_cache(inode); 349 nfs_access_zap_cache(inode);
342 nfs_zap_acl_cache(inode); 350 nfs_zap_acl_cache(inode);
@@ -344,7 +352,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
344 352
345 switch (status) { 353 switch (status) {
346 case 0: 354 case 0:
347 status = nfs_refresh_inode(inode, &fattr); 355 status = nfs_refresh_inode(inode, fattr);
348 nfs3_cache_acls(inode, acl, dfacl); 356 nfs3_cache_acls(inode, acl, dfacl);
349 break; 357 break;
350 case -EPFNOSUPPORT: 358 case -EPFNOSUPPORT:
@@ -355,6 +363,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
355 case -ENOTSUPP: 363 case -ENOTSUPP:
356 status = -EOPNOTSUPP; 364 status = -EOPNOTSUPP;
357 } 365 }
366 nfs_free_fattr(fattr);
358out_freepages: 367out_freepages:
359 while (args.npages != 0) { 368 while (args.npages != 0) {
360 args.npages--; 369 args.npages--;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index e701002694e5..fabb4f2849a1 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -144,14 +144,12 @@ static int
144nfs3_proc_lookup(struct inode *dir, struct qstr *name, 144nfs3_proc_lookup(struct inode *dir, struct qstr *name,
145 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 145 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
146{ 146{
147 struct nfs_fattr dir_attr;
148 struct nfs3_diropargs arg = { 147 struct nfs3_diropargs arg = {
149 .fh = NFS_FH(dir), 148 .fh = NFS_FH(dir),
150 .name = name->name, 149 .name = name->name,
151 .len = name->len 150 .len = name->len
152 }; 151 };
153 struct nfs3_diropres res = { 152 struct nfs3_diropres res = {
154 .dir_attr = &dir_attr,
155 .fh = fhandle, 153 .fh = fhandle,
156 .fattr = fattr 154 .fattr = fattr
157 }; 155 };
@@ -163,29 +161,30 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
163 int status; 161 int status;
164 162
165 dprintk("NFS call lookup %s\n", name->name); 163 dprintk("NFS call lookup %s\n", name->name);
166 nfs_fattr_init(&dir_attr); 164 res.dir_attr = nfs_alloc_fattr();
165 if (res.dir_attr == NULL)
166 return -ENOMEM;
167
167 nfs_fattr_init(fattr); 168 nfs_fattr_init(fattr);
168 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 169 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
169 nfs_refresh_inode(dir, &dir_attr); 170 nfs_refresh_inode(dir, res.dir_attr);
170 if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) { 171 if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
171 msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; 172 msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
172 msg.rpc_argp = fhandle; 173 msg.rpc_argp = fhandle;
173 msg.rpc_resp = fattr; 174 msg.rpc_resp = fattr;
174 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 175 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
175 } 176 }
177 nfs_free_fattr(res.dir_attr);
176 dprintk("NFS reply lookup: %d\n", status); 178 dprintk("NFS reply lookup: %d\n", status);
177 return status; 179 return status;
178} 180}
179 181
180static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) 182static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
181{ 183{
182 struct nfs_fattr fattr;
183 struct nfs3_accessargs arg = { 184 struct nfs3_accessargs arg = {
184 .fh = NFS_FH(inode), 185 .fh = NFS_FH(inode),
185 }; 186 };
186 struct nfs3_accessres res = { 187 struct nfs3_accessres res;
187 .fattr = &fattr,
188 };
189 struct rpc_message msg = { 188 struct rpc_message msg = {
190 .rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS], 189 .rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS],
191 .rpc_argp = &arg, 190 .rpc_argp = &arg,
@@ -193,7 +192,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
193 .rpc_cred = entry->cred, 192 .rpc_cred = entry->cred,
194 }; 193 };
195 int mode = entry->mask; 194 int mode = entry->mask;
196 int status; 195 int status = -ENOMEM;
197 196
198 dprintk("NFS call access\n"); 197 dprintk("NFS call access\n");
199 198
@@ -210,9 +209,13 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
210 if (mode & MAY_EXEC) 209 if (mode & MAY_EXEC)
211 arg.access |= NFS3_ACCESS_EXECUTE; 210 arg.access |= NFS3_ACCESS_EXECUTE;
212 } 211 }
213 nfs_fattr_init(&fattr); 212
213 res.fattr = nfs_alloc_fattr();
214 if (res.fattr == NULL)
215 goto out;
216
214 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 217 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
215 nfs_refresh_inode(inode, &fattr); 218 nfs_refresh_inode(inode, res.fattr);
216 if (status == 0) { 219 if (status == 0) {
217 entry->mask = 0; 220 entry->mask = 0;
218 if (res.access & NFS3_ACCESS_READ) 221 if (res.access & NFS3_ACCESS_READ)
@@ -222,6 +225,8 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
222 if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE)) 225 if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE))
223 entry->mask |= MAY_EXEC; 226 entry->mask |= MAY_EXEC;
224 } 227 }
228 nfs_free_fattr(res.fattr);
229out:
225 dprintk("NFS reply access: %d\n", status); 230 dprintk("NFS reply access: %d\n", status);
226 return status; 231 return status;
227} 232}
@@ -229,7 +234,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
229static int nfs3_proc_readlink(struct inode *inode, struct page *page, 234static int nfs3_proc_readlink(struct inode *inode, struct page *page,
230 unsigned int pgbase, unsigned int pglen) 235 unsigned int pgbase, unsigned int pglen)
231{ 236{
232 struct nfs_fattr fattr; 237 struct nfs_fattr *fattr;
233 struct nfs3_readlinkargs args = { 238 struct nfs3_readlinkargs args = {
234 .fh = NFS_FH(inode), 239 .fh = NFS_FH(inode),
235 .pgbase = pgbase, 240 .pgbase = pgbase,
@@ -239,14 +244,19 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page,
239 struct rpc_message msg = { 244 struct rpc_message msg = {
240 .rpc_proc = &nfs3_procedures[NFS3PROC_READLINK], 245 .rpc_proc = &nfs3_procedures[NFS3PROC_READLINK],
241 .rpc_argp = &args, 246 .rpc_argp = &args,
242 .rpc_resp = &fattr,
243 }; 247 };
244 int status; 248 int status = -ENOMEM;
245 249
246 dprintk("NFS call readlink\n"); 250 dprintk("NFS call readlink\n");
247 nfs_fattr_init(&fattr); 251 fattr = nfs_alloc_fattr();
252 if (fattr == NULL)
253 goto out;
254 msg.rpc_resp = fattr;
255
248 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 256 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
249 nfs_refresh_inode(inode, &fattr); 257 nfs_refresh_inode(inode, fattr);
258 nfs_free_fattr(fattr);
259out:
250 dprintk("NFS reply readlink: %d\n", status); 260 dprintk("NFS reply readlink: %d\n", status);
251 return status; 261 return status;
252} 262}
@@ -396,12 +406,17 @@ nfs3_proc_remove(struct inode *dir, struct qstr *name)
396 .rpc_argp = &arg, 406 .rpc_argp = &arg,
397 .rpc_resp = &res, 407 .rpc_resp = &res,
398 }; 408 };
399 int status; 409 int status = -ENOMEM;
400 410
401 dprintk("NFS call remove %s\n", name->name); 411 dprintk("NFS call remove %s\n", name->name);
402 nfs_fattr_init(&res.dir_attr); 412 res.dir_attr = nfs_alloc_fattr();
413 if (res.dir_attr == NULL)
414 goto out;
415
403 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 416 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
404 nfs_post_op_update_inode(dir, &res.dir_attr); 417 nfs_post_op_update_inode(dir, res.dir_attr);
418 nfs_free_fattr(res.dir_attr);
419out:
405 dprintk("NFS reply remove: %d\n", status); 420 dprintk("NFS reply remove: %d\n", status);
406 return status; 421 return status;
407} 422}
@@ -419,7 +434,7 @@ nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir)
419 if (nfs3_async_handle_jukebox(task, dir)) 434 if (nfs3_async_handle_jukebox(task, dir))
420 return 0; 435 return 0;
421 res = task->tk_msg.rpc_resp; 436 res = task->tk_msg.rpc_resp;
422 nfs_post_op_update_inode(dir, &res->dir_attr); 437 nfs_post_op_update_inode(dir, res->dir_attr);
423 return 1; 438 return 1;
424} 439}
425 440
@@ -427,7 +442,6 @@ static int
427nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, 442nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
428 struct inode *new_dir, struct qstr *new_name) 443 struct inode *new_dir, struct qstr *new_name)
429{ 444{
430 struct nfs_fattr old_dir_attr, new_dir_attr;
431 struct nfs3_renameargs arg = { 445 struct nfs3_renameargs arg = {
432 .fromfh = NFS_FH(old_dir), 446 .fromfh = NFS_FH(old_dir),
433 .fromname = old_name->name, 447 .fromname = old_name->name,
@@ -436,23 +450,27 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
436 .toname = new_name->name, 450 .toname = new_name->name,
437 .tolen = new_name->len 451 .tolen = new_name->len
438 }; 452 };
439 struct nfs3_renameres res = { 453 struct nfs3_renameres res;
440 .fromattr = &old_dir_attr,
441 .toattr = &new_dir_attr
442 };
443 struct rpc_message msg = { 454 struct rpc_message msg = {
444 .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME], 455 .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME],
445 .rpc_argp = &arg, 456 .rpc_argp = &arg,
446 .rpc_resp = &res, 457 .rpc_resp = &res,
447 }; 458 };
448 int status; 459 int status = -ENOMEM;
449 460
450 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); 461 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
451 nfs_fattr_init(&old_dir_attr); 462
452 nfs_fattr_init(&new_dir_attr); 463 res.fromattr = nfs_alloc_fattr();
464 res.toattr = nfs_alloc_fattr();
465 if (res.fromattr == NULL || res.toattr == NULL)
466 goto out;
467
453 status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); 468 status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
454 nfs_post_op_update_inode(old_dir, &old_dir_attr); 469 nfs_post_op_update_inode(old_dir, res.fromattr);
455 nfs_post_op_update_inode(new_dir, &new_dir_attr); 470 nfs_post_op_update_inode(new_dir, res.toattr);
471out:
472 nfs_free_fattr(res.toattr);
473 nfs_free_fattr(res.fromattr);
456 dprintk("NFS reply rename: %d\n", status); 474 dprintk("NFS reply rename: %d\n", status);
457 return status; 475 return status;
458} 476}
@@ -460,30 +478,32 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
460static int 478static int
461nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) 479nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
462{ 480{
463 struct nfs_fattr dir_attr, fattr;
464 struct nfs3_linkargs arg = { 481 struct nfs3_linkargs arg = {
465 .fromfh = NFS_FH(inode), 482 .fromfh = NFS_FH(inode),
466 .tofh = NFS_FH(dir), 483 .tofh = NFS_FH(dir),
467 .toname = name->name, 484 .toname = name->name,
468 .tolen = name->len 485 .tolen = name->len
469 }; 486 };
470 struct nfs3_linkres res = { 487 struct nfs3_linkres res;
471 .dir_attr = &dir_attr,
472 .fattr = &fattr
473 };
474 struct rpc_message msg = { 488 struct rpc_message msg = {
475 .rpc_proc = &nfs3_procedures[NFS3PROC_LINK], 489 .rpc_proc = &nfs3_procedures[NFS3PROC_LINK],
476 .rpc_argp = &arg, 490 .rpc_argp = &arg,
477 .rpc_resp = &res, 491 .rpc_resp = &res,
478 }; 492 };
479 int status; 493 int status = -ENOMEM;
480 494
481 dprintk("NFS call link %s\n", name->name); 495 dprintk("NFS call link %s\n", name->name);
482 nfs_fattr_init(&dir_attr); 496 res.fattr = nfs_alloc_fattr();
483 nfs_fattr_init(&fattr); 497 res.dir_attr = nfs_alloc_fattr();
498 if (res.fattr == NULL || res.dir_attr == NULL)
499 goto out;
500
484 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); 501 status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
485 nfs_post_op_update_inode(dir, &dir_attr); 502 nfs_post_op_update_inode(dir, res.dir_attr);
486 nfs_post_op_update_inode(inode, &fattr); 503 nfs_post_op_update_inode(inode, res.fattr);
504out:
505 nfs_free_fattr(res.dir_attr);
506 nfs_free_fattr(res.fattr);
487 dprintk("NFS reply link: %d\n", status); 507 dprintk("NFS reply link: %d\n", status);
488 return status; 508 return status;
489} 509}
@@ -554,7 +574,7 @@ out:
554static int 574static int
555nfs3_proc_rmdir(struct inode *dir, struct qstr *name) 575nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
556{ 576{
557 struct nfs_fattr dir_attr; 577 struct nfs_fattr *dir_attr;
558 struct nfs3_diropargs arg = { 578 struct nfs3_diropargs arg = {
559 .fh = NFS_FH(dir), 579 .fh = NFS_FH(dir),
560 .name = name->name, 580 .name = name->name,
@@ -563,14 +583,19 @@ nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
563 struct rpc_message msg = { 583 struct rpc_message msg = {
564 .rpc_proc = &nfs3_procedures[NFS3PROC_RMDIR], 584 .rpc_proc = &nfs3_procedures[NFS3PROC_RMDIR],
565 .rpc_argp = &arg, 585 .rpc_argp = &arg,
566 .rpc_resp = &dir_attr,
567 }; 586 };
568 int status; 587 int status = -ENOMEM;
569 588
570 dprintk("NFS call rmdir %s\n", name->name); 589 dprintk("NFS call rmdir %s\n", name->name);
571 nfs_fattr_init(&dir_attr); 590 dir_attr = nfs_alloc_fattr();
591 if (dir_attr == NULL)
592 goto out;
593
594 msg.rpc_resp = dir_attr;
572 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 595 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
573 nfs_post_op_update_inode(dir, &dir_attr); 596 nfs_post_op_update_inode(dir, dir_attr);
597 nfs_free_fattr(dir_attr);
598out:
574 dprintk("NFS reply rmdir: %d\n", status); 599 dprintk("NFS reply rmdir: %d\n", status);
575 return status; 600 return status;
576} 601}
@@ -589,7 +614,6 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
589 u64 cookie, struct page *page, unsigned int count, int plus) 614 u64 cookie, struct page *page, unsigned int count, int plus)
590{ 615{
591 struct inode *dir = dentry->d_inode; 616 struct inode *dir = dentry->d_inode;
592 struct nfs_fattr dir_attr;
593 __be32 *verf = NFS_COOKIEVERF(dir); 617 __be32 *verf = NFS_COOKIEVERF(dir);
594 struct nfs3_readdirargs arg = { 618 struct nfs3_readdirargs arg = {
595 .fh = NFS_FH(dir), 619 .fh = NFS_FH(dir),
@@ -600,7 +624,6 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
600 .pages = &page 624 .pages = &page
601 }; 625 };
602 struct nfs3_readdirres res = { 626 struct nfs3_readdirres res = {
603 .dir_attr = &dir_attr,
604 .verf = verf, 627 .verf = verf,
605 .plus = plus 628 .plus = plus
606 }; 629 };
@@ -610,7 +633,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
610 .rpc_resp = &res, 633 .rpc_resp = &res,
611 .rpc_cred = cred 634 .rpc_cred = cred
612 }; 635 };
613 int status; 636 int status = -ENOMEM;
614 637
615 if (plus) 638 if (plus)
616 msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS]; 639 msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS];
@@ -618,12 +641,17 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
618 dprintk("NFS call readdir%s %d\n", 641 dprintk("NFS call readdir%s %d\n",
619 plus? "plus" : "", (unsigned int) cookie); 642 plus? "plus" : "", (unsigned int) cookie);
620 643
621 nfs_fattr_init(&dir_attr); 644 res.dir_attr = nfs_alloc_fattr();
645 if (res.dir_attr == NULL)
646 goto out;
647
622 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 648 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
623 649
624 nfs_invalidate_atime(dir); 650 nfs_invalidate_atime(dir);
651 nfs_refresh_inode(dir, res.dir_attr);
625 652
626 nfs_refresh_inode(dir, &dir_attr); 653 nfs_free_fattr(res.dir_attr);
654out:
627 dprintk("NFS reply readdir: %d\n", status); 655 dprintk("NFS reply readdir: %d\n", status);
628 return status; 656 return status;
629} 657}
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 56a86f6ac8b5..75dcfc7da365 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -762,7 +762,7 @@ nfs3_xdr_wccstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
762static int 762static int
763nfs3_xdr_removeres(struct rpc_rqst *req, __be32 *p, struct nfs_removeres *res) 763nfs3_xdr_removeres(struct rpc_rqst *req, __be32 *p, struct nfs_removeres *res)
764{ 764{
765 return nfs3_xdr_wccstat(req, p, &res->dir_attr); 765 return nfs3_xdr_wccstat(req, p, res->dir_attr);
766} 766}
767 767
768/* 768/*
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index a187200a7aac..c538c6106e16 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -206,14 +206,14 @@ extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
206 206
207 207
208/* nfs4proc.c */ 208/* nfs4proc.c */
209extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *); 209extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
210extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); 210extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
211extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); 211extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
212extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); 212extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
213extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); 213extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
214extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); 214extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
215extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); 215extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
216extern int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait); 216extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait);
217extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); 217extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
218extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); 218extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
219extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); 219extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
@@ -286,7 +286,7 @@ extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
286extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 286extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
287extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); 287extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
288 288
289extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter); 289extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
290extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); 290extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
291extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); 291extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
292extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); 292extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index f071d12c613b..3c2a1724fbd2 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -115,6 +115,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
115 char *page, char *page2, 115 char *page, char *page2,
116 const struct nfs4_fs_location *location) 116 const struct nfs4_fs_location *location)
117{ 117{
118 const size_t addr_bufsize = sizeof(struct sockaddr_storage);
118 struct vfsmount *mnt = ERR_PTR(-ENOENT); 119 struct vfsmount *mnt = ERR_PTR(-ENOENT);
119 char *mnt_path; 120 char *mnt_path;
120 unsigned int maxbuflen; 121 unsigned int maxbuflen;
@@ -126,9 +127,12 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
126 mountdata->mnt_path = mnt_path; 127 mountdata->mnt_path = mnt_path;
127 maxbuflen = mnt_path - 1 - page2; 128 maxbuflen = mnt_path - 1 - page2;
128 129
130 mountdata->addr = kmalloc(addr_bufsize, GFP_KERNEL);
131 if (mountdata->addr == NULL)
132 return ERR_PTR(-ENOMEM);
133
129 for (s = 0; s < location->nservers; s++) { 134 for (s = 0; s < location->nservers; s++) {
130 const struct nfs4_string *buf = &location->servers[s]; 135 const struct nfs4_string *buf = &location->servers[s];
131 struct sockaddr_storage addr;
132 136
133 if (buf->len <= 0 || buf->len >= maxbuflen) 137 if (buf->len <= 0 || buf->len >= maxbuflen)
134 continue; 138 continue;
@@ -137,11 +141,10 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
137 continue; 141 continue;
138 142
139 mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len, 143 mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len,
140 (struct sockaddr *)&addr, sizeof(addr)); 144 mountdata->addr, addr_bufsize);
141 if (mountdata->addrlen == 0) 145 if (mountdata->addrlen == 0)
142 continue; 146 continue;
143 147
144 mountdata->addr = (struct sockaddr *)&addr;
145 rpc_set_port(mountdata->addr, NFS_PORT); 148 rpc_set_port(mountdata->addr, NFS_PORT);
146 149
147 memcpy(page2, buf->data, buf->len); 150 memcpy(page2, buf->data, buf->len);
@@ -156,6 +159,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
156 if (!IS_ERR(mnt)) 159 if (!IS_ERR(mnt))
157 break; 160 break;
158 } 161 }
162 kfree(mountdata->addr);
159 return mnt; 163 return mnt;
160} 164}
161 165
@@ -221,8 +225,8 @@ out:
221 225
222/* 226/*
223 * nfs_do_refmount - handle crossing a referral on server 227 * nfs_do_refmount - handle crossing a referral on server
228 * @mnt_parent - mountpoint of referral
224 * @dentry - dentry of referral 229 * @dentry - dentry of referral
225 * @nd - nameidata info
226 * 230 *
227 */ 231 */
228struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) 232struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 638067007c65..70015dd60a98 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -70,6 +70,9 @@ static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinf
70static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); 70static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
71static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 71static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
72static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 72static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
73static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
74 struct nfs_fattr *fattr, struct iattr *sattr,
75 struct nfs4_state *state);
73 76
74/* Prevent leaks of NFSv4 errors into userland */ 77/* Prevent leaks of NFSv4 errors into userland */
75static int nfs4_map_errors(int err) 78static int nfs4_map_errors(int err)
@@ -714,17 +717,18 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
714 717
715static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, 718static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
716 struct nfs4_state_owner *sp, fmode_t fmode, int flags, 719 struct nfs4_state_owner *sp, fmode_t fmode, int flags,
717 const struct iattr *attrs) 720 const struct iattr *attrs,
721 gfp_t gfp_mask)
718{ 722{
719 struct dentry *parent = dget_parent(path->dentry); 723 struct dentry *parent = dget_parent(path->dentry);
720 struct inode *dir = parent->d_inode; 724 struct inode *dir = parent->d_inode;
721 struct nfs_server *server = NFS_SERVER(dir); 725 struct nfs_server *server = NFS_SERVER(dir);
722 struct nfs4_opendata *p; 726 struct nfs4_opendata *p;
723 727
724 p = kzalloc(sizeof(*p), GFP_KERNEL); 728 p = kzalloc(sizeof(*p), gfp_mask);
725 if (p == NULL) 729 if (p == NULL)
726 goto err; 730 goto err;
727 p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); 731 p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask);
728 if (p->o_arg.seqid == NULL) 732 if (p->o_arg.seqid == NULL)
729 goto err_free; 733 goto err_free;
730 path_get(path); 734 path_get(path);
@@ -1060,7 +1064,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
1060{ 1064{
1061 struct nfs4_opendata *opendata; 1065 struct nfs4_opendata *opendata;
1062 1066
1063 opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL); 1067 opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL, GFP_NOFS);
1064 if (opendata == NULL) 1068 if (opendata == NULL)
1065 return ERR_PTR(-ENOMEM); 1069 return ERR_PTR(-ENOMEM);
1066 opendata->state = state; 1070 opendata->state = state;
@@ -1648,7 +1652,7 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in
1648 if (path->dentry->d_inode != NULL) 1652 if (path->dentry->d_inode != NULL)
1649 nfs4_return_incompatible_delegation(path->dentry->d_inode, fmode); 1653 nfs4_return_incompatible_delegation(path->dentry->d_inode, fmode);
1650 status = -ENOMEM; 1654 status = -ENOMEM;
1651 opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr); 1655 opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr, GFP_KERNEL);
1652 if (opendata == NULL) 1656 if (opendata == NULL)
1653 goto err_put_state_owner; 1657 goto err_put_state_owner;
1654 1658
@@ -1659,15 +1663,24 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in
1659 if (status != 0) 1663 if (status != 0)
1660 goto err_opendata_put; 1664 goto err_opendata_put;
1661 1665
1662 if (opendata->o_arg.open_flags & O_EXCL)
1663 nfs4_exclusive_attrset(opendata, sattr);
1664
1665 state = nfs4_opendata_to_nfs4_state(opendata); 1666 state = nfs4_opendata_to_nfs4_state(opendata);
1666 status = PTR_ERR(state); 1667 status = PTR_ERR(state);
1667 if (IS_ERR(state)) 1668 if (IS_ERR(state))
1668 goto err_opendata_put; 1669 goto err_opendata_put;
1669 if (server->caps & NFS_CAP_POSIX_LOCK) 1670 if (server->caps & NFS_CAP_POSIX_LOCK)
1670 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); 1671 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
1672
1673 if (opendata->o_arg.open_flags & O_EXCL) {
1674 nfs4_exclusive_attrset(opendata, sattr);
1675
1676 nfs_fattr_init(opendata->o_res.f_attr);
1677 status = nfs4_do_setattr(state->inode, cred,
1678 opendata->o_res.f_attr, sattr,
1679 state);
1680 if (status == 0)
1681 nfs_setattr_update_inode(state->inode, sattr);
1682 nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
1683 }
1671 nfs4_opendata_put(opendata); 1684 nfs4_opendata_put(opendata);
1672 nfs4_put_state_owner(sp); 1685 nfs4_put_state_owner(sp);
1673 *res = state; 1686 *res = state;
@@ -1914,7 +1927,7 @@ static const struct rpc_call_ops nfs4_close_ops = {
1914 * 1927 *
1915 * NOTE: Caller must be holding the sp->so_owner semaphore! 1928 * NOTE: Caller must be holding the sp->so_owner semaphore!
1916 */ 1929 */
1917int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) 1930int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait)
1918{ 1931{
1919 struct nfs_server *server = NFS_SERVER(state->inode); 1932 struct nfs_server *server = NFS_SERVER(state->inode);
1920 struct nfs4_closedata *calldata; 1933 struct nfs4_closedata *calldata;
@@ -1933,7 +1946,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
1933 }; 1946 };
1934 int status = -ENOMEM; 1947 int status = -ENOMEM;
1935 1948
1936 calldata = kzalloc(sizeof(*calldata), GFP_KERNEL); 1949 calldata = kzalloc(sizeof(*calldata), gfp_mask);
1937 if (calldata == NULL) 1950 if (calldata == NULL)
1938 goto out; 1951 goto out;
1939 calldata->inode = state->inode; 1952 calldata->inode = state->inode;
@@ -1941,7 +1954,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
1941 calldata->arg.fh = NFS_FH(state->inode); 1954 calldata->arg.fh = NFS_FH(state->inode);
1942 calldata->arg.stateid = &state->open_stateid; 1955 calldata->arg.stateid = &state->open_stateid;
1943 /* Serialization for the sequence id */ 1956 /* Serialization for the sequence id */
1944 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); 1957 calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid, gfp_mask);
1945 if (calldata->arg.seqid == NULL) 1958 if (calldata->arg.seqid == NULL)
1946 goto out_free_calldata; 1959 goto out_free_calldata;
1947 calldata->arg.fmode = 0; 1960 calldata->arg.fmode = 0;
@@ -2404,14 +2417,12 @@ static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh
2404static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) 2417static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
2405{ 2418{
2406 struct nfs_server *server = NFS_SERVER(inode); 2419 struct nfs_server *server = NFS_SERVER(inode);
2407 struct nfs_fattr fattr;
2408 struct nfs4_accessargs args = { 2420 struct nfs4_accessargs args = {
2409 .fh = NFS_FH(inode), 2421 .fh = NFS_FH(inode),
2410 .bitmask = server->attr_bitmask, 2422 .bitmask = server->attr_bitmask,
2411 }; 2423 };
2412 struct nfs4_accessres res = { 2424 struct nfs4_accessres res = {
2413 .server = server, 2425 .server = server,
2414 .fattr = &fattr,
2415 }; 2426 };
2416 struct rpc_message msg = { 2427 struct rpc_message msg = {
2417 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS], 2428 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS],
@@ -2438,7 +2449,11 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
2438 if (mode & MAY_EXEC) 2449 if (mode & MAY_EXEC)
2439 args.access |= NFS4_ACCESS_EXECUTE; 2450 args.access |= NFS4_ACCESS_EXECUTE;
2440 } 2451 }
2441 nfs_fattr_init(&fattr); 2452
2453 res.fattr = nfs_alloc_fattr();
2454 if (res.fattr == NULL)
2455 return -ENOMEM;
2456
2442 status = nfs4_call_sync(server, &msg, &args, &res, 0); 2457 status = nfs4_call_sync(server, &msg, &args, &res, 0);
2443 if (!status) { 2458 if (!status) {
2444 entry->mask = 0; 2459 entry->mask = 0;
@@ -2448,8 +2463,9 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
2448 entry->mask |= MAY_WRITE; 2463 entry->mask |= MAY_WRITE;
2449 if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) 2464 if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE))
2450 entry->mask |= MAY_EXEC; 2465 entry->mask |= MAY_EXEC;
2451 nfs_refresh_inode(inode, &fattr); 2466 nfs_refresh_inode(inode, res.fattr);
2452 } 2467 }
2468 nfs_free_fattr(res.fattr);
2453 return status; 2469 return status;
2454} 2470}
2455 2471
@@ -2562,13 +2578,6 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
2562 } 2578 }
2563 d_add(dentry, igrab(state->inode)); 2579 d_add(dentry, igrab(state->inode));
2564 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 2580 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2565 if (flags & O_EXCL) {
2566 struct nfs_fattr fattr;
2567 status = nfs4_do_setattr(state->inode, cred, &fattr, sattr, state);
2568 if (status == 0)
2569 nfs_setattr_update_inode(state->inode, sattr);
2570 nfs_post_op_update_inode(state->inode, &fattr);
2571 }
2572 if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) 2581 if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
2573 status = nfs4_intent_set_file(nd, &path, state, fmode); 2582 status = nfs4_intent_set_file(nd, &path, state, fmode);
2574 else 2583 else
@@ -2596,14 +2605,19 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
2596 .rpc_argp = &args, 2605 .rpc_argp = &args,
2597 .rpc_resp = &res, 2606 .rpc_resp = &res,
2598 }; 2607 };
2599 int status; 2608 int status = -ENOMEM;
2609
2610 res.dir_attr = nfs_alloc_fattr();
2611 if (res.dir_attr == NULL)
2612 goto out;
2600 2613
2601 nfs_fattr_init(&res.dir_attr);
2602 status = nfs4_call_sync(server, &msg, &args, &res, 1); 2614 status = nfs4_call_sync(server, &msg, &args, &res, 1);
2603 if (status == 0) { 2615 if (status == 0) {
2604 update_changeattr(dir, &res.cinfo); 2616 update_changeattr(dir, &res.cinfo);
2605 nfs_post_op_update_inode(dir, &res.dir_attr); 2617 nfs_post_op_update_inode(dir, res.dir_attr);
2606 } 2618 }
2619 nfs_free_fattr(res.dir_attr);
2620out:
2607 return status; 2621 return status;
2608} 2622}
2609 2623
@@ -2638,7 +2652,7 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
2638 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) 2652 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
2639 return 0; 2653 return 0;
2640 update_changeattr(dir, &res->cinfo); 2654 update_changeattr(dir, &res->cinfo);
2641 nfs_post_op_update_inode(dir, &res->dir_attr); 2655 nfs_post_op_update_inode(dir, res->dir_attr);
2642 return 1; 2656 return 1;
2643} 2657}
2644 2658
@@ -2653,29 +2667,31 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
2653 .new_name = new_name, 2667 .new_name = new_name,
2654 .bitmask = server->attr_bitmask, 2668 .bitmask = server->attr_bitmask,
2655 }; 2669 };
2656 struct nfs_fattr old_fattr, new_fattr;
2657 struct nfs4_rename_res res = { 2670 struct nfs4_rename_res res = {
2658 .server = server, 2671 .server = server,
2659 .old_fattr = &old_fattr,
2660 .new_fattr = &new_fattr,
2661 }; 2672 };
2662 struct rpc_message msg = { 2673 struct rpc_message msg = {
2663 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME], 2674 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME],
2664 .rpc_argp = &arg, 2675 .rpc_argp = &arg,
2665 .rpc_resp = &res, 2676 .rpc_resp = &res,
2666 }; 2677 };
2667 int status; 2678 int status = -ENOMEM;
2668 2679
2669 nfs_fattr_init(res.old_fattr); 2680 res.old_fattr = nfs_alloc_fattr();
2670 nfs_fattr_init(res.new_fattr); 2681 res.new_fattr = nfs_alloc_fattr();
2671 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 2682 if (res.old_fattr == NULL || res.new_fattr == NULL)
2683 goto out;
2672 2684
2685 status = nfs4_call_sync(server, &msg, &arg, &res, 1);
2673 if (!status) { 2686 if (!status) {
2674 update_changeattr(old_dir, &res.old_cinfo); 2687 update_changeattr(old_dir, &res.old_cinfo);
2675 nfs_post_op_update_inode(old_dir, res.old_fattr); 2688 nfs_post_op_update_inode(old_dir, res.old_fattr);
2676 update_changeattr(new_dir, &res.new_cinfo); 2689 update_changeattr(new_dir, &res.new_cinfo);
2677 nfs_post_op_update_inode(new_dir, res.new_fattr); 2690 nfs_post_op_update_inode(new_dir, res.new_fattr);
2678 } 2691 }
2692out:
2693 nfs_free_fattr(res.new_fattr);
2694 nfs_free_fattr(res.old_fattr);
2679 return status; 2695 return status;
2680} 2696}
2681 2697
@@ -2702,28 +2718,30 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *
2702 .name = name, 2718 .name = name,
2703 .bitmask = server->attr_bitmask, 2719 .bitmask = server->attr_bitmask,
2704 }; 2720 };
2705 struct nfs_fattr fattr, dir_attr;
2706 struct nfs4_link_res res = { 2721 struct nfs4_link_res res = {
2707 .server = server, 2722 .server = server,
2708 .fattr = &fattr,
2709 .dir_attr = &dir_attr,
2710 }; 2723 };
2711 struct rpc_message msg = { 2724 struct rpc_message msg = {
2712 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK], 2725 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK],
2713 .rpc_argp = &arg, 2726 .rpc_argp = &arg,
2714 .rpc_resp = &res, 2727 .rpc_resp = &res,
2715 }; 2728 };
2716 int status; 2729 int status = -ENOMEM;
2730
2731 res.fattr = nfs_alloc_fattr();
2732 res.dir_attr = nfs_alloc_fattr();
2733 if (res.fattr == NULL || res.dir_attr == NULL)
2734 goto out;
2717 2735
2718 nfs_fattr_init(res.fattr);
2719 nfs_fattr_init(res.dir_attr);
2720 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 2736 status = nfs4_call_sync(server, &msg, &arg, &res, 1);
2721 if (!status) { 2737 if (!status) {
2722 update_changeattr(dir, &res.cinfo); 2738 update_changeattr(dir, &res.cinfo);
2723 nfs_post_op_update_inode(dir, res.dir_attr); 2739 nfs_post_op_update_inode(dir, res.dir_attr);
2724 nfs_post_op_update_inode(inode, res.fattr); 2740 nfs_post_op_update_inode(inode, res.fattr);
2725 } 2741 }
2726 2742out:
2743 nfs_free_fattr(res.dir_attr);
2744 nfs_free_fattr(res.fattr);
2727 return status; 2745 return status;
2728} 2746}
2729 2747
@@ -3146,23 +3164,31 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa
3146 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; 3164 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
3147} 3165}
3148 3166
3167struct nfs4_renewdata {
3168 struct nfs_client *client;
3169 unsigned long timestamp;
3170};
3171
3149/* 3172/*
3150 * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special 3173 * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special
3151 * standalone procedure for queueing an asynchronous RENEW. 3174 * standalone procedure for queueing an asynchronous RENEW.
3152 */ 3175 */
3153static void nfs4_renew_release(void *data) 3176static void nfs4_renew_release(void *calldata)
3154{ 3177{
3155 struct nfs_client *clp = data; 3178 struct nfs4_renewdata *data = calldata;
3179 struct nfs_client *clp = data->client;
3156 3180
3157 if (atomic_read(&clp->cl_count) > 1) 3181 if (atomic_read(&clp->cl_count) > 1)
3158 nfs4_schedule_state_renewal(clp); 3182 nfs4_schedule_state_renewal(clp);
3159 nfs_put_client(clp); 3183 nfs_put_client(clp);
3184 kfree(data);
3160} 3185}
3161 3186
3162static void nfs4_renew_done(struct rpc_task *task, void *data) 3187static void nfs4_renew_done(struct rpc_task *task, void *calldata)
3163{ 3188{
3164 struct nfs_client *clp = data; 3189 struct nfs4_renewdata *data = calldata;
3165 unsigned long timestamp = task->tk_start; 3190 struct nfs_client *clp = data->client;
3191 unsigned long timestamp = data->timestamp;
3166 3192
3167 if (task->tk_status < 0) { 3193 if (task->tk_status < 0) {
3168 /* Unless we're shutting down, schedule state recovery! */ 3194 /* Unless we're shutting down, schedule state recovery! */
@@ -3188,11 +3214,17 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
3188 .rpc_argp = clp, 3214 .rpc_argp = clp,
3189 .rpc_cred = cred, 3215 .rpc_cred = cred,
3190 }; 3216 };
3217 struct nfs4_renewdata *data;
3191 3218
3192 if (!atomic_inc_not_zero(&clp->cl_count)) 3219 if (!atomic_inc_not_zero(&clp->cl_count))
3193 return -EIO; 3220 return -EIO;
3221 data = kmalloc(sizeof(*data), GFP_KERNEL);
3222 if (data == NULL)
3223 return -ENOMEM;
3224 data->client = clp;
3225 data->timestamp = jiffies;
3194 return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, 3226 return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
3195 &nfs4_renew_ops, clp); 3227 &nfs4_renew_ops, data);
3196} 3228}
3197 3229
3198int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) 3230int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
@@ -3494,7 +3526,9 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3494 return _nfs4_async_handle_error(task, server, server->nfs_client, state); 3526 return _nfs4_async_handle_error(task, server, server->nfs_client, state);
3495} 3527}
3496 3528
3497int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred) 3529int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
3530 unsigned short port, struct rpc_cred *cred,
3531 struct nfs4_setclientid_res *res)
3498{ 3532{
3499 nfs4_verifier sc_verifier; 3533 nfs4_verifier sc_verifier;
3500 struct nfs4_setclientid setclientid = { 3534 struct nfs4_setclientid setclientid = {
@@ -3504,7 +3538,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po
3504 struct rpc_message msg = { 3538 struct rpc_message msg = {
3505 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], 3539 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID],
3506 .rpc_argp = &setclientid, 3540 .rpc_argp = &setclientid,
3507 .rpc_resp = clp, 3541 .rpc_resp = res,
3508 .rpc_cred = cred, 3542 .rpc_cred = cred,
3509 }; 3543 };
3510 __be32 *p; 3544 __be32 *p;
@@ -3547,12 +3581,14 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po
3547 return status; 3581 return status;
3548} 3582}
3549 3583
3550static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred) 3584static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp,
3585 struct nfs4_setclientid_res *arg,
3586 struct rpc_cred *cred)
3551{ 3587{
3552 struct nfs_fsinfo fsinfo; 3588 struct nfs_fsinfo fsinfo;
3553 struct rpc_message msg = { 3589 struct rpc_message msg = {
3554 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM], 3590 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM],
3555 .rpc_argp = clp, 3591 .rpc_argp = arg,
3556 .rpc_resp = &fsinfo, 3592 .rpc_resp = &fsinfo,
3557 .rpc_cred = cred, 3593 .rpc_cred = cred,
3558 }; 3594 };
@@ -3570,12 +3606,14 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cre
3570 return status; 3606 return status;
3571} 3607}
3572 3608
3573int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred) 3609int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
3610 struct nfs4_setclientid_res *arg,
3611 struct rpc_cred *cred)
3574{ 3612{
3575 long timeout = 0; 3613 long timeout = 0;
3576 int err; 3614 int err;
3577 do { 3615 do {
3578 err = _nfs4_proc_setclientid_confirm(clp, cred); 3616 err = _nfs4_proc_setclientid_confirm(clp, arg, cred);
3579 switch (err) { 3617 switch (err) {
3580 case 0: 3618 case 0:
3581 return err; 3619 return err;
@@ -3667,7 +3705,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
3667 }; 3705 };
3668 int status = 0; 3706 int status = 0;
3669 3707
3670 data = kzalloc(sizeof(*data), GFP_KERNEL); 3708 data = kzalloc(sizeof(*data), GFP_NOFS);
3671 if (data == NULL) 3709 if (data == NULL)
3672 return -ENOMEM; 3710 return -ENOMEM;
3673 data->args.fhandle = &data->fh; 3711 data->args.fhandle = &data->fh;
@@ -3823,7 +3861,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
3823 struct nfs4_unlockdata *p; 3861 struct nfs4_unlockdata *p;
3824 struct inode *inode = lsp->ls_state->inode; 3862 struct inode *inode = lsp->ls_state->inode;
3825 3863
3826 p = kzalloc(sizeof(*p), GFP_KERNEL); 3864 p = kzalloc(sizeof(*p), GFP_NOFS);
3827 if (p == NULL) 3865 if (p == NULL)
3828 return NULL; 3866 return NULL;
3829 p->arg.fh = NFS_FH(inode); 3867 p->arg.fh = NFS_FH(inode);
@@ -3961,7 +3999,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
3961 if (test_bit(NFS_DELEGATED_STATE, &state->flags)) 3999 if (test_bit(NFS_DELEGATED_STATE, &state->flags))
3962 goto out; 4000 goto out;
3963 lsp = request->fl_u.nfs4_fl.owner; 4001 lsp = request->fl_u.nfs4_fl.owner;
3964 seqid = nfs_alloc_seqid(&lsp->ls_seqid); 4002 seqid = nfs_alloc_seqid(&lsp->ls_seqid, GFP_KERNEL);
3965 status = -ENOMEM; 4003 status = -ENOMEM;
3966 if (seqid == NULL) 4004 if (seqid == NULL)
3967 goto out; 4005 goto out;
@@ -3989,22 +4027,23 @@ struct nfs4_lockdata {
3989}; 4027};
3990 4028
3991static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, 4029static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
3992 struct nfs_open_context *ctx, struct nfs4_lock_state *lsp) 4030 struct nfs_open_context *ctx, struct nfs4_lock_state *lsp,
4031 gfp_t gfp_mask)
3993{ 4032{
3994 struct nfs4_lockdata *p; 4033 struct nfs4_lockdata *p;
3995 struct inode *inode = lsp->ls_state->inode; 4034 struct inode *inode = lsp->ls_state->inode;
3996 struct nfs_server *server = NFS_SERVER(inode); 4035 struct nfs_server *server = NFS_SERVER(inode);
3997 4036
3998 p = kzalloc(sizeof(*p), GFP_KERNEL); 4037 p = kzalloc(sizeof(*p), gfp_mask);
3999 if (p == NULL) 4038 if (p == NULL)
4000 return NULL; 4039 return NULL;
4001 4040
4002 p->arg.fh = NFS_FH(inode); 4041 p->arg.fh = NFS_FH(inode);
4003 p->arg.fl = &p->fl; 4042 p->arg.fl = &p->fl;
4004 p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid); 4043 p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid, gfp_mask);
4005 if (p->arg.open_seqid == NULL) 4044 if (p->arg.open_seqid == NULL)
4006 goto out_free; 4045 goto out_free;
4007 p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); 4046 p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid, gfp_mask);
4008 if (p->arg.lock_seqid == NULL) 4047 if (p->arg.lock_seqid == NULL)
4009 goto out_free_seqid; 4048 goto out_free_seqid;
4010 p->arg.lock_stateid = &lsp->ls_stateid; 4049 p->arg.lock_stateid = &lsp->ls_stateid;
@@ -4158,7 +4197,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
4158 4197
4159 dprintk("%s: begin!\n", __func__); 4198 dprintk("%s: begin!\n", __func__);
4160 data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), 4199 data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file),
4161 fl->fl_u.nfs4_fl.owner); 4200 fl->fl_u.nfs4_fl.owner,
4201 recovery_type == NFS_LOCK_NEW ? GFP_KERNEL : GFP_NOFS);
4162 if (data == NULL) 4202 if (data == NULL)
4163 return -ENOMEM; 4203 return -ENOMEM;
4164 if (IS_SETLKW(cmd)) 4204 if (IS_SETLKW(cmd))
@@ -4647,7 +4687,7 @@ static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs,
4647 if (max_reqs != tbl->max_slots) { 4687 if (max_reqs != tbl->max_slots) {
4648 ret = -ENOMEM; 4688 ret = -ENOMEM;
4649 new = kmalloc(max_reqs * sizeof(struct nfs4_slot), 4689 new = kmalloc(max_reqs * sizeof(struct nfs4_slot),
4650 GFP_KERNEL); 4690 GFP_NOFS);
4651 if (!new) 4691 if (!new)
4652 goto out; 4692 goto out;
4653 ret = 0; 4693 ret = 0;
@@ -4712,7 +4752,7 @@ static int nfs4_init_slot_table(struct nfs4_slot_table *tbl,
4712 4752
4713 dprintk("--> %s: max_reqs=%u\n", __func__, max_slots); 4753 dprintk("--> %s: max_reqs=%u\n", __func__, max_slots);
4714 4754
4715 slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_KERNEL); 4755 slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_NOFS);
4716 if (!slot) 4756 if (!slot)
4717 goto out; 4757 goto out;
4718 ret = 0; 4758 ret = 0;
@@ -4761,7 +4801,7 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
4761 struct nfs4_session *session; 4801 struct nfs4_session *session;
4762 struct nfs4_slot_table *tbl; 4802 struct nfs4_slot_table *tbl;
4763 4803
4764 session = kzalloc(sizeof(struct nfs4_session), GFP_KERNEL); 4804 session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS);
4765 if (!session) 4805 if (!session)
4766 return NULL; 4806 return NULL;
4767 4807
@@ -5105,8 +5145,8 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp,
5105 5145
5106 if (!atomic_inc_not_zero(&clp->cl_count)) 5146 if (!atomic_inc_not_zero(&clp->cl_count))
5107 return -EIO; 5147 return -EIO;
5108 args = kzalloc(sizeof(*args), GFP_KERNEL); 5148 args = kzalloc(sizeof(*args), GFP_NOFS);
5109 res = kzalloc(sizeof(*res), GFP_KERNEL); 5149 res = kzalloc(sizeof(*res), GFP_NOFS);
5110 if (!args || !res) { 5150 if (!args || !res) {
5111 kfree(args); 5151 kfree(args);
5112 kfree(res); 5152 kfree(res);
@@ -5207,7 +5247,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5207 int status = -ENOMEM; 5247 int status = -ENOMEM;
5208 5248
5209 dprintk("--> %s\n", __func__); 5249 dprintk("--> %s\n", __func__);
5210 calldata = kzalloc(sizeof(*calldata), GFP_KERNEL); 5250 calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
5211 if (calldata == NULL) 5251 if (calldata == NULL)
5212 goto out; 5252 goto out;
5213 calldata->clp = clp; 5253 calldata->clp = clp;
@@ -5218,9 +5258,12 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5218 msg.rpc_resp = &calldata->res; 5258 msg.rpc_resp = &calldata->res;
5219 task_setup_data.callback_data = calldata; 5259 task_setup_data.callback_data = calldata;
5220 task = rpc_run_task(&task_setup_data); 5260 task = rpc_run_task(&task_setup_data);
5221 if (IS_ERR(task)) 5261 if (IS_ERR(task)) {
5222 status = PTR_ERR(task); 5262 status = PTR_ERR(task);
5263 goto out;
5264 }
5223 rpc_put_task(task); 5265 rpc_put_task(task);
5266 return 0;
5224out: 5267out:
5225 dprintk("<-- %s status=%d\n", __func__, status); 5268 dprintk("<-- %s status=%d\n", __func__, status);
5226 return status; 5269 return status;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 6c5ed51f105e..34acf5926fdc 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -62,6 +62,7 @@ static LIST_HEAD(nfs4_clientid_list);
62 62
63int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) 63int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
64{ 64{
65 struct nfs4_setclientid_res clid;
65 unsigned short port; 66 unsigned short port;
66 int status; 67 int status;
67 68
@@ -69,11 +70,15 @@ int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
69 if (clp->cl_addr.ss_family == AF_INET6) 70 if (clp->cl_addr.ss_family == AF_INET6)
70 port = nfs_callback_tcpport6; 71 port = nfs_callback_tcpport6;
71 72
72 status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred); 73 status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
73 if (status == 0) 74 if (status != 0)
74 status = nfs4_proc_setclientid_confirm(clp, cred); 75 goto out;
75 if (status == 0) 76 status = nfs4_proc_setclientid_confirm(clp, &clid, cred);
76 nfs4_schedule_state_renewal(clp); 77 if (status != 0)
78 goto out;
79 clp->cl_clientid = clid.clientid;
80 nfs4_schedule_state_renewal(clp);
81out:
77 return status; 82 return status;
78} 83}
79 84
@@ -361,7 +366,7 @@ nfs4_alloc_state_owner(void)
361{ 366{
362 struct nfs4_state_owner *sp; 367 struct nfs4_state_owner *sp;
363 368
364 sp = kzalloc(sizeof(*sp),GFP_KERNEL); 369 sp = kzalloc(sizeof(*sp),GFP_NOFS);
365 if (!sp) 370 if (!sp)
366 return NULL; 371 return NULL;
367 spin_lock_init(&sp->so_lock); 372 spin_lock_init(&sp->so_lock);
@@ -435,7 +440,7 @@ nfs4_alloc_open_state(void)
435{ 440{
436 struct nfs4_state *state; 441 struct nfs4_state *state;
437 442
438 state = kzalloc(sizeof(*state), GFP_KERNEL); 443 state = kzalloc(sizeof(*state), GFP_NOFS);
439 if (!state) 444 if (!state)
440 return NULL; 445 return NULL;
441 atomic_set(&state->count, 1); 446 atomic_set(&state->count, 1);
@@ -537,7 +542,8 @@ void nfs4_put_open_state(struct nfs4_state *state)
537/* 542/*
538 * Close the current file. 543 * Close the current file.
539 */ 544 */
540static void __nfs4_close(struct path *path, struct nfs4_state *state, fmode_t fmode, int wait) 545static void __nfs4_close(struct path *path, struct nfs4_state *state,
546 fmode_t fmode, gfp_t gfp_mask, int wait)
541{ 547{
542 struct nfs4_state_owner *owner = state->owner; 548 struct nfs4_state_owner *owner = state->owner;
543 int call_close = 0; 549 int call_close = 0;
@@ -578,17 +584,17 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state, fmode_t fm
578 nfs4_put_open_state(state); 584 nfs4_put_open_state(state);
579 nfs4_put_state_owner(owner); 585 nfs4_put_state_owner(owner);
580 } else 586 } else
581 nfs4_do_close(path, state, wait); 587 nfs4_do_close(path, state, gfp_mask, wait);
582} 588}
583 589
584void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode) 590void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
585{ 591{
586 __nfs4_close(path, state, fmode, 0); 592 __nfs4_close(path, state, fmode, GFP_NOFS, 0);
587} 593}
588 594
589void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode) 595void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode)
590{ 596{
591 __nfs4_close(path, state, fmode, 1); 597 __nfs4_close(path, state, fmode, GFP_KERNEL, 1);
592} 598}
593 599
594/* 600/*
@@ -618,7 +624,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
618 struct nfs4_lock_state *lsp; 624 struct nfs4_lock_state *lsp;
619 struct nfs_client *clp = state->owner->so_client; 625 struct nfs_client *clp = state->owner->so_client;
620 626
621 lsp = kzalloc(sizeof(*lsp), GFP_KERNEL); 627 lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
622 if (lsp == NULL) 628 if (lsp == NULL)
623 return NULL; 629 return NULL;
624 rpc_init_wait_queue(&lsp->ls_sequence.wait, "lock_seqid_waitqueue"); 630 rpc_init_wait_queue(&lsp->ls_sequence.wait, "lock_seqid_waitqueue");
@@ -754,11 +760,11 @@ void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t f
754 nfs4_put_lock_state(lsp); 760 nfs4_put_lock_state(lsp);
755} 761}
756 762
757struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter) 763struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask)
758{ 764{
759 struct nfs_seqid *new; 765 struct nfs_seqid *new;
760 766
761 new = kmalloc(sizeof(*new), GFP_KERNEL); 767 new = kmalloc(sizeof(*new), gfp_mask);
762 if (new != NULL) { 768 if (new != NULL) {
763 new->sequence = counter; 769 new->sequence = counter;
764 INIT_LIST_HEAD(&new->list); 770 INIT_LIST_HEAD(&new->list);
@@ -1347,7 +1353,7 @@ static int nfs4_recall_slot(struct nfs_client *clp)
1347 1353
1348 nfs4_begin_drain_session(clp); 1354 nfs4_begin_drain_session(clp);
1349 new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot), 1355 new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot),
1350 GFP_KERNEL); 1356 GFP_NOFS);
1351 if (!new) 1357 if (!new)
1352 return -ENOMEM; 1358 return -ENOMEM;
1353 1359
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 38f3b582e7c2..6bdef28efa33 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1504,14 +1504,14 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
1504 hdr->replen += decode_setclientid_maxsz; 1504 hdr->replen += decode_setclientid_maxsz;
1505} 1505}
1506 1506
1507static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state, struct compound_hdr *hdr) 1507static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr)
1508{ 1508{
1509 __be32 *p; 1509 __be32 *p;
1510 1510
1511 p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE); 1511 p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE);
1512 *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM); 1512 *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM);
1513 p = xdr_encode_hyper(p, client_state->cl_clientid); 1513 p = xdr_encode_hyper(p, arg->clientid);
1514 xdr_encode_opaque_fixed(p, client_state->cl_confirm.data, NFS4_VERIFIER_SIZE); 1514 xdr_encode_opaque_fixed(p, arg->confirm.data, NFS4_VERIFIER_SIZE);
1515 hdr->nops++; 1515 hdr->nops++;
1516 hdr->replen += decode_setclientid_confirm_maxsz; 1516 hdr->replen += decode_setclientid_confirm_maxsz;
1517} 1517}
@@ -2324,7 +2324,7 @@ static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs4
2324/* 2324/*
2325 * a SETCLIENTID_CONFIRM request 2325 * a SETCLIENTID_CONFIRM request
2326 */ 2326 */
2327static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_client *clp) 2327static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid_res *arg)
2328{ 2328{
2329 struct xdr_stream xdr; 2329 struct xdr_stream xdr;
2330 struct compound_hdr hdr = { 2330 struct compound_hdr hdr = {
@@ -2334,7 +2334,7 @@ static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, str
2334 2334
2335 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 2335 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2336 encode_compound_hdr(&xdr, req, &hdr); 2336 encode_compound_hdr(&xdr, req, &hdr);
2337 encode_setclientid_confirm(&xdr, clp, &hdr); 2337 encode_setclientid_confirm(&xdr, arg, &hdr);
2338 encode_putrootfh(&xdr, &hdr); 2338 encode_putrootfh(&xdr, &hdr);
2339 encode_fsinfo(&xdr, lease_bitmap, &hdr); 2339 encode_fsinfo(&xdr, lease_bitmap, &hdr);
2340 encode_nops(&hdr); 2340 encode_nops(&hdr);
@@ -4397,7 +4397,7 @@ out_overflow:
4397 return -EIO; 4397 return -EIO;
4398} 4398}
4399 4399
4400static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp) 4400static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_res *res)
4401{ 4401{
4402 __be32 *p; 4402 __be32 *p;
4403 uint32_t opnum; 4403 uint32_t opnum;
@@ -4417,8 +4417,8 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
4417 p = xdr_inline_decode(xdr, 8 + NFS4_VERIFIER_SIZE); 4417 p = xdr_inline_decode(xdr, 8 + NFS4_VERIFIER_SIZE);
4418 if (unlikely(!p)) 4418 if (unlikely(!p))
4419 goto out_overflow; 4419 goto out_overflow;
4420 p = xdr_decode_hyper(p, &clp->cl_clientid); 4420 p = xdr_decode_hyper(p, &res->clientid);
4421 memcpy(clp->cl_confirm.data, p, NFS4_VERIFIER_SIZE); 4421 memcpy(res->confirm.data, p, NFS4_VERIFIER_SIZE);
4422 } else if (nfserr == NFSERR_CLID_INUSE) { 4422 } else if (nfserr == NFSERR_CLID_INUSE) {
4423 uint32_t len; 4423 uint32_t len;
4424 4424
@@ -4815,7 +4815,7 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_rem
4815 goto out; 4815 goto out;
4816 if ((status = decode_remove(&xdr, &res->cinfo)) != 0) 4816 if ((status = decode_remove(&xdr, &res->cinfo)) != 0)
4817 goto out; 4817 goto out;
4818 decode_getfattr(&xdr, &res->dir_attr, res->server, 4818 decode_getfattr(&xdr, res->dir_attr, res->server,
4819 !RPC_IS_ASYNC(rqstp->rq_task)); 4819 !RPC_IS_ASYNC(rqstp->rq_task));
4820out: 4820out:
4821 return status; 4821 return status;
@@ -5498,7 +5498,7 @@ static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
5498 * Decode SETCLIENTID response 5498 * Decode SETCLIENTID response
5499 */ 5499 */
5500static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p, 5500static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p,
5501 struct nfs_client *clp) 5501 struct nfs4_setclientid_res *res)
5502{ 5502{
5503 struct xdr_stream xdr; 5503 struct xdr_stream xdr;
5504 struct compound_hdr hdr; 5504 struct compound_hdr hdr;
@@ -5507,7 +5507,7 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p,
5507 xdr_init_decode(&xdr, &req->rq_rcv_buf, p); 5507 xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
5508 status = decode_compound_hdr(&xdr, &hdr); 5508 status = decode_compound_hdr(&xdr, &hdr);
5509 if (!status) 5509 if (!status)
5510 status = decode_setclientid(&xdr, clp); 5510 status = decode_setclientid(&xdr, res);
5511 return status; 5511 return status;
5512} 5512}
5513 5513
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 8c55b27c0de4..6bd19d843af7 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -488,7 +488,6 @@ static int __init root_nfs_ports(void)
488 */ 488 */
489static int __init root_nfs_get_handle(void) 489static int __init root_nfs_get_handle(void)
490{ 490{
491 struct nfs_fh fh;
492 struct sockaddr_in sin; 491 struct sockaddr_in sin;
493 unsigned int auth_flav_len = 0; 492 unsigned int auth_flav_len = 0;
494 struct nfs_mount_request request = { 493 struct nfs_mount_request request = {
@@ -499,21 +498,24 @@ static int __init root_nfs_get_handle(void)
499 NFS_MNT3_VERSION : NFS_MNT_VERSION, 498 NFS_MNT3_VERSION : NFS_MNT_VERSION,
500 .protocol = (nfs_data.flags & NFS_MOUNT_TCP) ? 499 .protocol = (nfs_data.flags & NFS_MOUNT_TCP) ?
501 XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP, 500 XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP,
502 .fh = &fh,
503 .auth_flav_len = &auth_flav_len, 501 .auth_flav_len = &auth_flav_len,
504 }; 502 };
505 int status; 503 int status = -ENOMEM;
506 504
505 request.fh = nfs_alloc_fhandle();
506 if (!request.fh)
507 goto out;
507 set_sockaddr(&sin, servaddr, htons(mount_port)); 508 set_sockaddr(&sin, servaddr, htons(mount_port));
508 status = nfs_mount(&request); 509 status = nfs_mount(&request);
509 if (status < 0) 510 if (status < 0)
510 printk(KERN_ERR "Root-NFS: Server returned error %d " 511 printk(KERN_ERR "Root-NFS: Server returned error %d "
511 "while mounting %s\n", status, nfs_export_path); 512 "while mounting %s\n", status, nfs_export_path);
512 else { 513 else {
513 nfs_data.root.size = fh.size; 514 nfs_data.root.size = request.fh->size;
514 memcpy(nfs_data.root.data, fh.data, fh.size); 515 memcpy(&nfs_data.root.data, request.fh->data, request.fh->size);
515 } 516 }
516 517 nfs_free_fhandle(request.fh);
518out:
517 return status; 519 return status;
518} 520}
519 521
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 29d9d36cd5f4..a3654e57b589 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -60,16 +60,10 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
60{ 60{
61 struct nfs_page *req; 61 struct nfs_page *req;
62 62
63 for (;;) { 63 /* try to allocate the request struct */
64 /* try to allocate the request struct */ 64 req = nfs_page_alloc();
65 req = nfs_page_alloc(); 65 if (req == NULL)
66 if (req != NULL) 66 return ERR_PTR(-ENOMEM);
67 break;
68
69 if (fatal_signal_pending(current))
70 return ERR_PTR(-ERESTARTSYS);
71 yield();
72 }
73 67
74 /* Initialize the request struct. Initially, we assume a 68 /* Initialize the request struct. Initially, we assume a
75 * long write-back delay. This will be adjusted in 69 * long write-back delay. This will be adjusted in
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 0288be80444f..611bec22f552 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -224,35 +224,60 @@ static int nfs_proc_readlink(struct inode *inode, struct page *page,
224 return status; 224 return status;
225} 225}
226 226
227struct nfs_createdata {
228 struct nfs_createargs arg;
229 struct nfs_diropok res;
230 struct nfs_fh fhandle;
231 struct nfs_fattr fattr;
232};
233
234static struct nfs_createdata *nfs_alloc_createdata(struct inode *dir,
235 struct dentry *dentry, struct iattr *sattr)
236{
237 struct nfs_createdata *data;
238
239 data = kmalloc(sizeof(*data), GFP_KERNEL);
240
241 if (data != NULL) {
242 data->arg.fh = NFS_FH(dir);
243 data->arg.name = dentry->d_name.name;
244 data->arg.len = dentry->d_name.len;
245 data->arg.sattr = sattr;
246 nfs_fattr_init(&data->fattr);
247 data->fhandle.size = 0;
248 data->res.fh = &data->fhandle;
249 data->res.fattr = &data->fattr;
250 }
251 return data;
252};
253
254static void nfs_free_createdata(const struct nfs_createdata *data)
255{
256 kfree(data);
257}
258
227static int 259static int
228nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 260nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
229 int flags, struct nameidata *nd) 261 int flags, struct nameidata *nd)
230{ 262{
231 struct nfs_fh fhandle; 263 struct nfs_createdata *data;
232 struct nfs_fattr fattr;
233 struct nfs_createargs arg = {
234 .fh = NFS_FH(dir),
235 .name = dentry->d_name.name,
236 .len = dentry->d_name.len,
237 .sattr = sattr
238 };
239 struct nfs_diropok res = {
240 .fh = &fhandle,
241 .fattr = &fattr
242 };
243 struct rpc_message msg = { 264 struct rpc_message msg = {
244 .rpc_proc = &nfs_procedures[NFSPROC_CREATE], 265 .rpc_proc = &nfs_procedures[NFSPROC_CREATE],
245 .rpc_argp = &arg,
246 .rpc_resp = &res,
247 }; 266 };
248 int status; 267 int status = -ENOMEM;
249 268
250 nfs_fattr_init(&fattr);
251 dprintk("NFS call create %s\n", dentry->d_name.name); 269 dprintk("NFS call create %s\n", dentry->d_name.name);
270 data = nfs_alloc_createdata(dir, dentry, sattr);
271 if (data == NULL)
272 goto out;
273 msg.rpc_argp = &data->arg;
274 msg.rpc_resp = &data->res;
252 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 275 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
253 nfs_mark_for_revalidate(dir); 276 nfs_mark_for_revalidate(dir);
254 if (status == 0) 277 if (status == 0)
255 status = nfs_instantiate(dentry, &fhandle, &fattr); 278 status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
279 nfs_free_createdata(data);
280out:
256 dprintk("NFS reply create: %d\n", status); 281 dprintk("NFS reply create: %d\n", status);
257 return status; 282 return status;
258} 283}
@@ -264,24 +289,12 @@ static int
264nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 289nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
265 dev_t rdev) 290 dev_t rdev)
266{ 291{
267 struct nfs_fh fhandle; 292 struct nfs_createdata *data;
268 struct nfs_fattr fattr;
269 struct nfs_createargs arg = {
270 .fh = NFS_FH(dir),
271 .name = dentry->d_name.name,
272 .len = dentry->d_name.len,
273 .sattr = sattr
274 };
275 struct nfs_diropok res = {
276 .fh = &fhandle,
277 .fattr = &fattr
278 };
279 struct rpc_message msg = { 293 struct rpc_message msg = {
280 .rpc_proc = &nfs_procedures[NFSPROC_CREATE], 294 .rpc_proc = &nfs_procedures[NFSPROC_CREATE],
281 .rpc_argp = &arg,
282 .rpc_resp = &res,
283 }; 295 };
284 int status, mode; 296 umode_t mode;
297 int status = -ENOMEM;
285 298
286 dprintk("NFS call mknod %s\n", dentry->d_name.name); 299 dprintk("NFS call mknod %s\n", dentry->d_name.name);
287 300
@@ -294,17 +307,24 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
294 sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */ 307 sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */
295 } 308 }
296 309
297 nfs_fattr_init(&fattr); 310 data = nfs_alloc_createdata(dir, dentry, sattr);
311 if (data == NULL)
312 goto out;
313 msg.rpc_argp = &data->arg;
314 msg.rpc_resp = &data->res;
315
298 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 316 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
299 nfs_mark_for_revalidate(dir); 317 nfs_mark_for_revalidate(dir);
300 318
301 if (status == -EINVAL && S_ISFIFO(mode)) { 319 if (status == -EINVAL && S_ISFIFO(mode)) {
302 sattr->ia_mode = mode; 320 sattr->ia_mode = mode;
303 nfs_fattr_init(&fattr); 321 nfs_fattr_init(data->res.fattr);
304 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 322 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
305 } 323 }
306 if (status == 0) 324 if (status == 0)
307 status = nfs_instantiate(dentry, &fhandle, &fattr); 325 status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
326 nfs_free_createdata(data);
327out:
308 dprintk("NFS reply mknod: %d\n", status); 328 dprintk("NFS reply mknod: %d\n", status);
309 return status; 329 return status;
310} 330}
@@ -398,8 +418,8 @@ static int
398nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page, 418nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
399 unsigned int len, struct iattr *sattr) 419 unsigned int len, struct iattr *sattr)
400{ 420{
401 struct nfs_fh fhandle; 421 struct nfs_fh *fh;
402 struct nfs_fattr fattr; 422 struct nfs_fattr *fattr;
403 struct nfs_symlinkargs arg = { 423 struct nfs_symlinkargs arg = {
404 .fromfh = NFS_FH(dir), 424 .fromfh = NFS_FH(dir),
405 .fromname = dentry->d_name.name, 425 .fromname = dentry->d_name.name,
@@ -412,12 +432,18 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
412 .rpc_proc = &nfs_procedures[NFSPROC_SYMLINK], 432 .rpc_proc = &nfs_procedures[NFSPROC_SYMLINK],
413 .rpc_argp = &arg, 433 .rpc_argp = &arg,
414 }; 434 };
415 int status; 435 int status = -ENAMETOOLONG;
436
437 dprintk("NFS call symlink %s\n", dentry->d_name.name);
416 438
417 if (len > NFS2_MAXPATHLEN) 439 if (len > NFS2_MAXPATHLEN)
418 return -ENAMETOOLONG; 440 goto out;
419 441
420 dprintk("NFS call symlink %s\n", dentry->d_name.name); 442 fh = nfs_alloc_fhandle();
443 fattr = nfs_alloc_fattr();
444 status = -ENOMEM;
445 if (fh == NULL || fattr == NULL)
446 goto out;
421 447
422 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 448 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
423 nfs_mark_for_revalidate(dir); 449 nfs_mark_for_revalidate(dir);
@@ -427,12 +453,12 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
427 * filehandle size to zero indicates to nfs_instantiate that it 453 * filehandle size to zero indicates to nfs_instantiate that it
428 * should fill in the data with a LOOKUP call on the wire. 454 * should fill in the data with a LOOKUP call on the wire.
429 */ 455 */
430 if (status == 0) { 456 if (status == 0)
431 nfs_fattr_init(&fattr); 457 status = nfs_instantiate(dentry, fh, fattr);
432 fhandle.size = 0;
433 status = nfs_instantiate(dentry, &fhandle, &fattr);
434 }
435 458
459 nfs_free_fattr(fattr);
460 nfs_free_fhandle(fh);
461out:
436 dprintk("NFS reply symlink: %d\n", status); 462 dprintk("NFS reply symlink: %d\n", status);
437 return status; 463 return status;
438} 464}
@@ -440,31 +466,25 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
440static int 466static int
441nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) 467nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
442{ 468{
443 struct nfs_fh fhandle; 469 struct nfs_createdata *data;
444 struct nfs_fattr fattr;
445 struct nfs_createargs arg = {
446 .fh = NFS_FH(dir),
447 .name = dentry->d_name.name,
448 .len = dentry->d_name.len,
449 .sattr = sattr
450 };
451 struct nfs_diropok res = {
452 .fh = &fhandle,
453 .fattr = &fattr
454 };
455 struct rpc_message msg = { 470 struct rpc_message msg = {
456 .rpc_proc = &nfs_procedures[NFSPROC_MKDIR], 471 .rpc_proc = &nfs_procedures[NFSPROC_MKDIR],
457 .rpc_argp = &arg,
458 .rpc_resp = &res,
459 }; 472 };
460 int status; 473 int status = -ENOMEM;
461 474
462 dprintk("NFS call mkdir %s\n", dentry->d_name.name); 475 dprintk("NFS call mkdir %s\n", dentry->d_name.name);
463 nfs_fattr_init(&fattr); 476 data = nfs_alloc_createdata(dir, dentry, sattr);
477 if (data == NULL)
478 goto out;
479 msg.rpc_argp = &data->arg;
480 msg.rpc_resp = &data->res;
481
464 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); 482 status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
465 nfs_mark_for_revalidate(dir); 483 nfs_mark_for_revalidate(dir);
466 if (status == 0) 484 if (status == 0)
467 status = nfs_instantiate(dentry, &fhandle, &fattr); 485 status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
486 nfs_free_createdata(data);
487out:
468 dprintk("NFS reply mkdir: %d\n", status); 488 dprintk("NFS reply mkdir: %d\n", status);
469 return status; 489 return status;
470} 490}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index db9b360ae19d..6e2b06e6ca79 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -40,7 +40,7 @@ static mempool_t *nfs_rdata_mempool;
40 40
41struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) 41struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
42{ 42{
43 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS); 43 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_KERNEL);
44 44
45 if (p) { 45 if (p) {
46 memset(p, 0, sizeof(*p)); 46 memset(p, 0, sizeof(*p));
@@ -50,7 +50,7 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
50 if (pagecount <= ARRAY_SIZE(p->page_array)) 50 if (pagecount <= ARRAY_SIZE(p->page_array))
51 p->pagevec = p->page_array; 51 p->pagevec = p->page_array;
52 else { 52 else {
53 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); 53 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
54 if (!p->pagevec) { 54 if (!p->pagevec) {
55 mempool_free(p, nfs_rdata_mempool); 55 mempool_free(p, nfs_rdata_mempool);
56 p = NULL; 56 p = NULL;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e01637240eeb..2f8b1157daa2 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -141,7 +141,6 @@ static const match_table_t nfs_mount_option_tokens = {
141 { Opt_resvport, "resvport" }, 141 { Opt_resvport, "resvport" },
142 { Opt_noresvport, "noresvport" }, 142 { Opt_noresvport, "noresvport" },
143 { Opt_fscache, "fsc" }, 143 { Opt_fscache, "fsc" },
144 { Opt_fscache_uniq, "fsc=%s" },
145 { Opt_nofscache, "nofsc" }, 144 { Opt_nofscache, "nofsc" },
146 145
147 { Opt_port, "port=%s" }, 146 { Opt_port, "port=%s" },
@@ -171,6 +170,7 @@ static const match_table_t nfs_mount_option_tokens = {
171 { Opt_mountaddr, "mountaddr=%s" }, 170 { Opt_mountaddr, "mountaddr=%s" },
172 171
173 { Opt_lookupcache, "lookupcache=%s" }, 172 { Opt_lookupcache, "lookupcache=%s" },
173 { Opt_fscache_uniq, "fsc=%s" },
174 174
175 { Opt_err, NULL } 175 { Opt_err, NULL }
176}; 176};
@@ -423,15 +423,19 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
423 unsigned char blockbits; 423 unsigned char blockbits;
424 unsigned long blockres; 424 unsigned long blockres;
425 struct nfs_fh *fh = NFS_FH(dentry->d_inode); 425 struct nfs_fh *fh = NFS_FH(dentry->d_inode);
426 struct nfs_fattr fattr; 426 struct nfs_fsstat res;
427 struct nfs_fsstat res = { 427 int error = -ENOMEM;
428 .fattr = &fattr, 428
429 }; 429 res.fattr = nfs_alloc_fattr();
430 int error; 430 if (res.fattr == NULL)
431 goto out_err;
431 432
432 error = server->nfs_client->rpc_ops->statfs(server, fh, &res); 433 error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
434
435 nfs_free_fattr(res.fattr);
433 if (error < 0) 436 if (error < 0)
434 goto out_err; 437 goto out_err;
438
435 buf->f_type = NFS_SUPER_MAGIC; 439 buf->f_type = NFS_SUPER_MAGIC;
436 440
437 /* 441 /*
@@ -1046,14 +1050,6 @@ static int nfs_parse_mount_options(char *raw,
1046 kfree(mnt->fscache_uniq); 1050 kfree(mnt->fscache_uniq);
1047 mnt->fscache_uniq = NULL; 1051 mnt->fscache_uniq = NULL;
1048 break; 1052 break;
1049 case Opt_fscache_uniq:
1050 string = match_strdup(args);
1051 if (!string)
1052 goto out_nomem;
1053 kfree(mnt->fscache_uniq);
1054 mnt->fscache_uniq = string;
1055 mnt->options |= NFS_OPTION_FSCACHE;
1056 break;
1057 1053
1058 /* 1054 /*
1059 * options that take numeric values 1055 * options that take numeric values
@@ -1384,6 +1380,14 @@ static int nfs_parse_mount_options(char *raw,
1384 return 0; 1380 return 0;
1385 }; 1381 };
1386 break; 1382 break;
1383 case Opt_fscache_uniq:
1384 string = match_strdup(args);
1385 if (string == NULL)
1386 goto out_nomem;
1387 kfree(mnt->fscache_uniq);
1388 mnt->fscache_uniq = string;
1389 mnt->options |= NFS_OPTION_FSCACHE;
1390 break;
1387 1391
1388 /* 1392 /*
1389 * Special options 1393 * Special options
@@ -2172,7 +2176,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2172 int error = -ENOMEM; 2176 int error = -ENOMEM;
2173 2177
2174 data = nfs_alloc_parsed_mount_data(3); 2178 data = nfs_alloc_parsed_mount_data(3);
2175 mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); 2179 mntfh = nfs_alloc_fhandle();
2176 if (data == NULL || mntfh == NULL) 2180 if (data == NULL || mntfh == NULL)
2177 goto out_free_fh; 2181 goto out_free_fh;
2178 2182
@@ -2187,6 +2191,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2187 if (data->version == 4) { 2191 if (data->version == 4) {
2188 error = nfs4_try_mount(flags, dev_name, data, mnt); 2192 error = nfs4_try_mount(flags, dev_name, data, mnt);
2189 kfree(data->client_address); 2193 kfree(data->client_address);
2194 kfree(data->nfs_server.export_path);
2190 goto out; 2195 goto out;
2191 } 2196 }
2192#endif /* CONFIG_NFS_V4 */ 2197#endif /* CONFIG_NFS_V4 */
@@ -2246,7 +2251,7 @@ out:
2246 kfree(data->fscache_uniq); 2251 kfree(data->fscache_uniq);
2247 security_free_mnt_opts(&data->lsm_opts); 2252 security_free_mnt_opts(&data->lsm_opts);
2248out_free_fh: 2253out_free_fh:
2249 kfree(mntfh); 2254 nfs_free_fhandle(mntfh);
2250 kfree(data); 2255 kfree(data);
2251 return error; 2256 return error;
2252 2257
@@ -2555,7 +2560,7 @@ static int nfs4_remote_get_sb(struct file_system_type *fs_type,
2555 }; 2560 };
2556 int error = -ENOMEM; 2561 int error = -ENOMEM;
2557 2562
2558 mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); 2563 mntfh = nfs_alloc_fhandle();
2559 if (data == NULL || mntfh == NULL) 2564 if (data == NULL || mntfh == NULL)
2560 goto out_free_fh; 2565 goto out_free_fh;
2561 2566
@@ -2613,7 +2618,7 @@ static int nfs4_remote_get_sb(struct file_system_type *fs_type,
2613out: 2618out:
2614 security_free_mnt_opts(&data->lsm_opts); 2619 security_free_mnt_opts(&data->lsm_opts);
2615out_free_fh: 2620out_free_fh:
2616 kfree(mntfh); 2621 nfs_free_fhandle(mntfh);
2617 return error; 2622 return error;
2618 2623
2619out_free: 2624out_free:
@@ -2657,7 +2662,7 @@ static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt)
2657 devname = nfs_path(path->mnt->mnt_devname, 2662 devname = nfs_path(path->mnt->mnt_devname,
2658 path->mnt->mnt_root, path->dentry, 2663 path->mnt->mnt_root, path->dentry,
2659 page, PAGE_SIZE); 2664 page, PAGE_SIZE);
2660 if (devname == NULL) 2665 if (IS_ERR(devname))
2661 goto out_freepage; 2666 goto out_freepage;
2662 tmp = kstrdup(devname, GFP_KERNEL); 2667 tmp = kstrdup(devname, GFP_KERNEL);
2663 if (tmp == NULL) 2668 if (tmp == NULL)
@@ -2668,41 +2673,120 @@ out_freepage:
2668 free_page((unsigned long)page); 2673 free_page((unsigned long)page);
2669} 2674}
2670 2675
2676struct nfs_referral_count {
2677 struct list_head list;
2678 const struct task_struct *task;
2679 unsigned int referral_count;
2680};
2681
2682static LIST_HEAD(nfs_referral_count_list);
2683static DEFINE_SPINLOCK(nfs_referral_count_list_lock);
2684
2685static struct nfs_referral_count *nfs_find_referral_count(void)
2686{
2687 struct nfs_referral_count *p;
2688
2689 list_for_each_entry(p, &nfs_referral_count_list, list) {
2690 if (p->task == current)
2691 return p;
2692 }
2693 return NULL;
2694}
2695
2696#define NFS_MAX_NESTED_REFERRALS 2
2697
2698static int nfs_referral_loop_protect(void)
2699{
2700 struct nfs_referral_count *p, *new;
2701 int ret = -ENOMEM;
2702
2703 new = kmalloc(sizeof(*new), GFP_KERNEL);
2704 if (!new)
2705 goto out;
2706 new->task = current;
2707 new->referral_count = 1;
2708
2709 ret = 0;
2710 spin_lock(&nfs_referral_count_list_lock);
2711 p = nfs_find_referral_count();
2712 if (p != NULL) {
2713 if (p->referral_count >= NFS_MAX_NESTED_REFERRALS)
2714 ret = -ELOOP;
2715 else
2716 p->referral_count++;
2717 } else {
2718 list_add(&new->list, &nfs_referral_count_list);
2719 new = NULL;
2720 }
2721 spin_unlock(&nfs_referral_count_list_lock);
2722 kfree(new);
2723out:
2724 return ret;
2725}
2726
2727static void nfs_referral_loop_unprotect(void)
2728{
2729 struct nfs_referral_count *p;
2730
2731 spin_lock(&nfs_referral_count_list_lock);
2732 p = nfs_find_referral_count();
2733 p->referral_count--;
2734 if (p->referral_count == 0)
2735 list_del(&p->list);
2736 else
2737 p = NULL;
2738 spin_unlock(&nfs_referral_count_list_lock);
2739 kfree(p);
2740}
2741
2671static int nfs_follow_remote_path(struct vfsmount *root_mnt, 2742static int nfs_follow_remote_path(struct vfsmount *root_mnt,
2672 const char *export_path, struct vfsmount *mnt_target) 2743 const char *export_path, struct vfsmount *mnt_target)
2673{ 2744{
2745 struct nameidata *nd = NULL;
2674 struct mnt_namespace *ns_private; 2746 struct mnt_namespace *ns_private;
2675 struct nameidata nd;
2676 struct super_block *s; 2747 struct super_block *s;
2677 int ret; 2748 int ret;
2678 2749
2750 nd = kmalloc(sizeof(*nd), GFP_KERNEL);
2751 if (nd == NULL)
2752 return -ENOMEM;
2753
2679 ns_private = create_mnt_ns(root_mnt); 2754 ns_private = create_mnt_ns(root_mnt);
2680 ret = PTR_ERR(ns_private); 2755 ret = PTR_ERR(ns_private);
2681 if (IS_ERR(ns_private)) 2756 if (IS_ERR(ns_private))
2682 goto out_mntput; 2757 goto out_mntput;
2683 2758
2759 ret = nfs_referral_loop_protect();
2760 if (ret != 0)
2761 goto out_put_mnt_ns;
2762
2684 ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt, 2763 ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
2685 export_path, LOOKUP_FOLLOW, &nd); 2764 export_path, LOOKUP_FOLLOW, nd);
2686 2765
2766 nfs_referral_loop_unprotect();
2687 put_mnt_ns(ns_private); 2767 put_mnt_ns(ns_private);
2688 2768
2689 if (ret != 0) 2769 if (ret != 0)
2690 goto out_err; 2770 goto out_err;
2691 2771
2692 s = nd.path.mnt->mnt_sb; 2772 s = nd->path.mnt->mnt_sb;
2693 atomic_inc(&s->s_active); 2773 atomic_inc(&s->s_active);
2694 mnt_target->mnt_sb = s; 2774 mnt_target->mnt_sb = s;
2695 mnt_target->mnt_root = dget(nd.path.dentry); 2775 mnt_target->mnt_root = dget(nd->path.dentry);
2696 2776
2697 /* Correct the device pathname */ 2777 /* Correct the device pathname */
2698 nfs_fix_devname(&nd.path, mnt_target); 2778 nfs_fix_devname(&nd->path, mnt_target);
2699 2779
2700 path_put(&nd.path); 2780 path_put(&nd->path);
2781 kfree(nd);
2701 down_write(&s->s_umount); 2782 down_write(&s->s_umount);
2702 return 0; 2783 return 0;
2784out_put_mnt_ns:
2785 put_mnt_ns(ns_private);
2703out_mntput: 2786out_mntput:
2704 mntput(root_mnt); 2787 mntput(root_mnt);
2705out_err: 2788out_err:
2789 kfree(nd);
2706 return ret; 2790 return ret;
2707} 2791}
2708 2792
@@ -2873,17 +2957,21 @@ static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
2873 struct super_block *s; 2957 struct super_block *s;
2874 struct nfs_server *server; 2958 struct nfs_server *server;
2875 struct dentry *mntroot; 2959 struct dentry *mntroot;
2876 struct nfs_fh mntfh; 2960 struct nfs_fh *mntfh;
2877 int (*compare_super)(struct super_block *, void *) = nfs_compare_super; 2961 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2878 struct nfs_sb_mountdata sb_mntdata = { 2962 struct nfs_sb_mountdata sb_mntdata = {
2879 .mntflags = flags, 2963 .mntflags = flags,
2880 }; 2964 };
2881 int error; 2965 int error = -ENOMEM;
2882 2966
2883 dprintk("--> nfs4_referral_get_sb()\n"); 2967 dprintk("--> nfs4_referral_get_sb()\n");
2884 2968
2969 mntfh = nfs_alloc_fhandle();
2970 if (mntfh == NULL)
2971 goto out_err_nofh;
2972
2885 /* create a new volume representation */ 2973 /* create a new volume representation */
2886 server = nfs4_create_referral_server(data, &mntfh); 2974 server = nfs4_create_referral_server(data, mntfh);
2887 if (IS_ERR(server)) { 2975 if (IS_ERR(server)) {
2888 error = PTR_ERR(server); 2976 error = PTR_ERR(server);
2889 goto out_err_noserver; 2977 goto out_err_noserver;
@@ -2915,7 +3003,7 @@ static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
2915 nfs_fscache_get_super_cookie(s, NULL, data); 3003 nfs_fscache_get_super_cookie(s, NULL, data);
2916 } 3004 }
2917 3005
2918 mntroot = nfs4_get_root(s, &mntfh); 3006 mntroot = nfs4_get_root(s, mntfh);
2919 if (IS_ERR(mntroot)) { 3007 if (IS_ERR(mntroot)) {
2920 error = PTR_ERR(mntroot); 3008 error = PTR_ERR(mntroot);
2921 goto error_splat_super; 3009 goto error_splat_super;
@@ -2932,12 +3020,15 @@ static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
2932 3020
2933 security_sb_clone_mnt_opts(data->sb, s); 3021 security_sb_clone_mnt_opts(data->sb, s);
2934 3022
3023 nfs_free_fhandle(mntfh);
2935 dprintk("<-- nfs4_referral_get_sb() = 0\n"); 3024 dprintk("<-- nfs4_referral_get_sb() = 0\n");
2936 return 0; 3025 return 0;
2937 3026
2938out_err_nosb: 3027out_err_nosb:
2939 nfs_free_server(server); 3028 nfs_free_server(server);
2940out_err_noserver: 3029out_err_noserver:
3030 nfs_free_fhandle(mntfh);
3031out_err_nofh:
2941 dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error); 3032 dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
2942 return error; 3033 return error;
2943 3034
@@ -2946,6 +3037,7 @@ error_splat_super:
2946 bdi_unregister(&server->backing_dev_info); 3037 bdi_unregister(&server->backing_dev_info);
2947error_splat_bdi: 3038error_splat_bdi:
2948 deactivate_locked_super(s); 3039 deactivate_locked_super(s);
3040 nfs_free_fhandle(mntfh);
2949 dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error); 3041 dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
2950 return error; 3042 return error;
2951} 3043}
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 6da3d3ff6edd..a2242af6a17d 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -23,6 +23,7 @@ struct nfs_unlinkdata {
23 struct nfs_removeres res; 23 struct nfs_removeres res;
24 struct inode *dir; 24 struct inode *dir;
25 struct rpc_cred *cred; 25 struct rpc_cred *cred;
26 struct nfs_fattr dir_attr;
26}; 27};
27 28
28/** 29/**
@@ -169,7 +170,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
169 } 170 }
170 nfs_sb_active(dir->i_sb); 171 nfs_sb_active(dir->i_sb);
171 data->args.fh = NFS_FH(dir); 172 data->args.fh = NFS_FH(dir);
172 nfs_fattr_init(&data->res.dir_attr); 173 nfs_fattr_init(data->res.dir_attr);
173 174
174 NFS_PROTO(dir)->unlink_setup(&msg, dir); 175 NFS_PROTO(dir)->unlink_setup(&msg, dir);
175 176
@@ -259,6 +260,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
259 goto out_free; 260 goto out_free;
260 } 261 }
261 data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; 262 data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
263 data->res.dir_attr = &data->dir_attr;
262 264
263 status = -EBUSY; 265 status = -EBUSY;
264 spin_lock(&dentry->d_lock); 266 spin_lock(&dentry->d_lock);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index de38d63aa920..3aea3ca98ab7 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1201,6 +1201,25 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1201 1201
1202 1202
1203#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1203#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1204static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
1205{
1206 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
1207 return 1;
1208 if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags,
1209 NFS_INO_COMMIT, nfs_wait_bit_killable,
1210 TASK_KILLABLE))
1211 return 1;
1212 return 0;
1213}
1214
1215static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
1216{
1217 clear_bit(NFS_INO_COMMIT, &nfsi->flags);
1218 smp_mb__after_clear_bit();
1219 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
1220}
1221
1222
1204static void nfs_commitdata_release(void *data) 1223static void nfs_commitdata_release(void *data)
1205{ 1224{
1206 struct nfs_write_data *wdata = data; 1225 struct nfs_write_data *wdata = data;
@@ -1262,8 +1281,6 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1262 task = rpc_run_task(&task_setup_data); 1281 task = rpc_run_task(&task_setup_data);
1263 if (IS_ERR(task)) 1282 if (IS_ERR(task))
1264 return PTR_ERR(task); 1283 return PTR_ERR(task);
1265 if (how & FLUSH_SYNC)
1266 rpc_wait_for_completion_task(task);
1267 rpc_put_task(task); 1284 rpc_put_task(task);
1268 return 0; 1285 return 0;
1269} 1286}
@@ -1294,6 +1311,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1294 BDI_RECLAIMABLE); 1311 BDI_RECLAIMABLE);
1295 nfs_clear_page_tag_locked(req); 1312 nfs_clear_page_tag_locked(req);
1296 } 1313 }
1314 nfs_commit_clear_lock(NFS_I(inode));
1297 return -ENOMEM; 1315 return -ENOMEM;
1298} 1316}
1299 1317
@@ -1349,6 +1367,7 @@ static void nfs_commit_release(void *calldata)
1349 next: 1367 next:
1350 nfs_clear_page_tag_locked(req); 1368 nfs_clear_page_tag_locked(req);
1351 } 1369 }
1370 nfs_commit_clear_lock(NFS_I(data->inode));
1352 nfs_commitdata_release(calldata); 1371 nfs_commitdata_release(calldata);
1353} 1372}
1354 1373
@@ -1363,8 +1382,11 @@ static const struct rpc_call_ops nfs_commit_ops = {
1363static int nfs_commit_inode(struct inode *inode, int how) 1382static int nfs_commit_inode(struct inode *inode, int how)
1364{ 1383{
1365 LIST_HEAD(head); 1384 LIST_HEAD(head);
1366 int res; 1385 int may_wait = how & FLUSH_SYNC;
1386 int res = 0;
1367 1387
1388 if (!nfs_commit_set_lock(NFS_I(inode), may_wait))
1389 goto out;
1368 spin_lock(&inode->i_lock); 1390 spin_lock(&inode->i_lock);
1369 res = nfs_scan_commit(inode, &head, 0, 0); 1391 res = nfs_scan_commit(inode, &head, 0, 0);
1370 spin_unlock(&inode->i_lock); 1392 spin_unlock(&inode->i_lock);
@@ -1372,7 +1394,13 @@ static int nfs_commit_inode(struct inode *inode, int how)
1372 int error = nfs_commit_list(inode, &head, how); 1394 int error = nfs_commit_list(inode, &head, how);
1373 if (error < 0) 1395 if (error < 0)
1374 return error; 1396 return error;
1375 } 1397 if (may_wait)
1398 wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
1399 nfs_wait_bit_killable,
1400 TASK_KILLABLE);
1401 } else
1402 nfs_commit_clear_lock(NFS_I(inode));
1403out:
1376 return res; 1404 return res;
1377} 1405}
1378 1406
@@ -1444,6 +1472,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1444 1472
1445 BUG_ON(!PageLocked(page)); 1473 BUG_ON(!PageLocked(page));
1446 for (;;) { 1474 for (;;) {
1475 wait_on_page_writeback(page);
1447 req = nfs_page_find_request(page); 1476 req = nfs_page_find_request(page);
1448 if (req == NULL) 1477 if (req == NULL)
1449 break; 1478 break;
@@ -1478,30 +1507,18 @@ int nfs_wb_page(struct inode *inode, struct page *page)
1478 .range_start = range_start, 1507 .range_start = range_start,
1479 .range_end = range_end, 1508 .range_end = range_end,
1480 }; 1509 };
1481 struct nfs_page *req;
1482 int need_commit;
1483 int ret; 1510 int ret;
1484 1511
1485 while(PagePrivate(page)) { 1512 while(PagePrivate(page)) {
1513 wait_on_page_writeback(page);
1486 if (clear_page_dirty_for_io(page)) { 1514 if (clear_page_dirty_for_io(page)) {
1487 ret = nfs_writepage_locked(page, &wbc); 1515 ret = nfs_writepage_locked(page, &wbc);
1488 if (ret < 0) 1516 if (ret < 0)
1489 goto out_error; 1517 goto out_error;
1490 } 1518 }
1491 req = nfs_find_and_lock_request(page); 1519 ret = sync_inode(inode, &wbc);
1492 if (!req) 1520 if (ret < 0)
1493 break;
1494 if (IS_ERR(req)) {
1495 ret = PTR_ERR(req);
1496 goto out_error; 1521 goto out_error;
1497 }
1498 need_commit = test_bit(PG_CLEAN, &req->wb_flags);
1499 nfs_clear_page_tag_locked(req);
1500 if (need_commit) {
1501 ret = nfs_commit_inode(inode, FLUSH_SYNC);
1502 if (ret < 0)
1503 goto out_error;
1504 }
1505 } 1522 }
1506 return 0; 1523 return 0;
1507out_error: 1524out_error:
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 872a5ef550c7..c2a4f71d87dd 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -259,10 +259,9 @@ static struct cache_detail svc_expkey_cache = {
259 .alloc = expkey_alloc, 259 .alloc = expkey_alloc,
260}; 260};
261 261
262static struct svc_expkey * 262static int
263svc_expkey_lookup(struct svc_expkey *item) 263svc_expkey_hash(struct svc_expkey *item)
264{ 264{
265 struct cache_head *ch;
266 int hash = item->ek_fsidtype; 265 int hash = item->ek_fsidtype;
267 char * cp = (char*)item->ek_fsid; 266 char * cp = (char*)item->ek_fsid;
268 int len = key_len(item->ek_fsidtype); 267 int len = key_len(item->ek_fsidtype);
@@ -270,6 +269,14 @@ svc_expkey_lookup(struct svc_expkey *item)
270 hash ^= hash_mem(cp, len, EXPKEY_HASHBITS); 269 hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
271 hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS); 270 hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS);
272 hash &= EXPKEY_HASHMASK; 271 hash &= EXPKEY_HASHMASK;
272 return hash;
273}
274
275static struct svc_expkey *
276svc_expkey_lookup(struct svc_expkey *item)
277{
278 struct cache_head *ch;
279 int hash = svc_expkey_hash(item);
273 280
274 ch = sunrpc_cache_lookup(&svc_expkey_cache, &item->h, 281 ch = sunrpc_cache_lookup(&svc_expkey_cache, &item->h,
275 hash); 282 hash);
@@ -283,13 +290,7 @@ static struct svc_expkey *
283svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old) 290svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old)
284{ 291{
285 struct cache_head *ch; 292 struct cache_head *ch;
286 int hash = new->ek_fsidtype; 293 int hash = svc_expkey_hash(new);
287 char * cp = (char*)new->ek_fsid;
288 int len = key_len(new->ek_fsidtype);
289
290 hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
291 hash ^= hash_ptr(new->ek_client, EXPKEY_HASHBITS);
292 hash &= EXPKEY_HASHMASK;
293 294
294 ch = sunrpc_cache_update(&svc_expkey_cache, &new->h, 295 ch = sunrpc_cache_update(&svc_expkey_cache, &new->h,
295 &old->h, hash); 296 &old->h, hash);
@@ -738,14 +739,22 @@ struct cache_detail svc_export_cache = {
738 .alloc = svc_export_alloc, 739 .alloc = svc_export_alloc,
739}; 740};
740 741
741static struct svc_export * 742static int
742svc_export_lookup(struct svc_export *exp) 743svc_export_hash(struct svc_export *exp)
743{ 744{
744 struct cache_head *ch;
745 int hash; 745 int hash;
746
746 hash = hash_ptr(exp->ex_client, EXPORT_HASHBITS); 747 hash = hash_ptr(exp->ex_client, EXPORT_HASHBITS);
747 hash ^= hash_ptr(exp->ex_path.dentry, EXPORT_HASHBITS); 748 hash ^= hash_ptr(exp->ex_path.dentry, EXPORT_HASHBITS);
748 hash ^= hash_ptr(exp->ex_path.mnt, EXPORT_HASHBITS); 749 hash ^= hash_ptr(exp->ex_path.mnt, EXPORT_HASHBITS);
750 return hash;
751}
752
753static struct svc_export *
754svc_export_lookup(struct svc_export *exp)
755{
756 struct cache_head *ch;
757 int hash = svc_export_hash(exp);
749 758
750 ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h, 759 ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h,
751 hash); 760 hash);
@@ -759,10 +768,7 @@ static struct svc_export *
759svc_export_update(struct svc_export *new, struct svc_export *old) 768svc_export_update(struct svc_export *new, struct svc_export *old)
760{ 769{
761 struct cache_head *ch; 770 struct cache_head *ch;
762 int hash; 771 int hash = svc_export_hash(old);
763 hash = hash_ptr(old->ex_client, EXPORT_HASHBITS);
764 hash ^= hash_ptr(old->ex_path.dentry, EXPORT_HASHBITS);
765 hash ^= hash_ptr(old->ex_path.mnt, EXPORT_HASHBITS);
766 772
767 ch = sunrpc_cache_update(&svc_export_cache, &new->h, 773 ch = sunrpc_cache_update(&svc_export_cache, &new->h,
768 &old->h, 774 &old->h,
@@ -1071,9 +1077,9 @@ exp_export(struct nfsctl_export *nxp)
1071 err = 0; 1077 err = 0;
1072finish: 1078finish:
1073 kfree(new.ex_pathname); 1079 kfree(new.ex_pathname);
1074 if (exp) 1080 if (!IS_ERR_OR_NULL(exp))
1075 exp_put(exp); 1081 exp_put(exp);
1076 if (fsid_key && !IS_ERR(fsid_key)) 1082 if (!IS_ERR_OR_NULL(fsid_key))
1077 cache_put(&fsid_key->h, &svc_expkey_cache); 1083 cache_put(&fsid_key->h, &svc_expkey_cache);
1078 path_put(&path); 1084 path_put(&path);
1079out_put_clp: 1085out_put_clp:
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 7e32bd394e86..eb78e7e22077 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -32,6 +32,7 @@
32 */ 32 */
33 33
34#include <linux/sunrpc/clnt.h> 34#include <linux/sunrpc/clnt.h>
35#include <linux/sunrpc/svc_xprt.h>
35#include <linux/slab.h> 36#include <linux/slab.h>
36#include "nfsd.h" 37#include "nfsd.h"
37#include "state.h" 38#include "state.h"
@@ -79,11 +80,6 @@ enum nfs_cb_opnum4 {
79 cb_sequence_dec_sz + \ 80 cb_sequence_dec_sz + \
80 op_dec_sz) 81 op_dec_sz)
81 82
82struct nfs4_rpc_args {
83 void *args_op;
84 struct nfsd4_cb_sequence args_seq;
85};
86
87/* 83/*
88* Generic encode routines from fs/nfs/nfs4xdr.c 84* Generic encode routines from fs/nfs/nfs4xdr.c
89*/ 85*/
@@ -428,13 +424,19 @@ static struct rpc_procinfo nfs4_cb_procedures[] = {
428}; 424};
429 425
430static struct rpc_version nfs_cb_version4 = { 426static struct rpc_version nfs_cb_version4 = {
427/*
428 * Note on the callback rpc program version number: despite language in rfc
429 * 5661 section 18.36.3 requiring servers to use 4 in this field, the
430 * official xdr descriptions for both 4.0 and 4.1 specify version 1, and
431 * in practice that appears to be what implementations use. The section
432 * 18.36.3 language is expected to be fixed in an erratum.
433 */
431 .number = 1, 434 .number = 1,
432 .nrprocs = ARRAY_SIZE(nfs4_cb_procedures), 435 .nrprocs = ARRAY_SIZE(nfs4_cb_procedures),
433 .procs = nfs4_cb_procedures 436 .procs = nfs4_cb_procedures
434}; 437};
435 438
436static struct rpc_version * nfs_cb_version[] = { 439static struct rpc_version * nfs_cb_version[] = {
437 NULL,
438 &nfs_cb_version4, 440 &nfs_cb_version4,
439}; 441};
440 442
@@ -456,15 +458,14 @@ static struct rpc_program cb_program = {
456 458
457static int max_cb_time(void) 459static int max_cb_time(void)
458{ 460{
459 return max(NFSD_LEASE_TIME/10, (time_t)1) * HZ; 461 return max(nfsd4_lease/10, (time_t)1) * HZ;
460} 462}
461 463
462/* Reference counting, callback cleanup, etc., all look racy as heck. 464/* Reference counting, callback cleanup, etc., all look racy as heck.
463 * And why is cb_set an atomic? */ 465 * And why is cl_cb_set an atomic? */
464 466
465int setup_callback_client(struct nfs4_client *clp) 467int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
466{ 468{
467 struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
468 struct rpc_timeout timeparms = { 469 struct rpc_timeout timeparms = {
469 .to_initval = max_cb_time(), 470 .to_initval = max_cb_time(),
470 .to_retries = 0, 471 .to_retries = 0,
@@ -476,7 +477,7 @@ int setup_callback_client(struct nfs4_client *clp)
476 .timeout = &timeparms, 477 .timeout = &timeparms,
477 .program = &cb_program, 478 .program = &cb_program,
478 .prognumber = cb->cb_prog, 479 .prognumber = cb->cb_prog,
479 .version = nfs_cb_version[1]->number, 480 .version = 0,
480 .authflavor = clp->cl_flavor, 481 .authflavor = clp->cl_flavor,
481 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), 482 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
482 .client_name = clp->cl_principal, 483 .client_name = clp->cl_principal,
@@ -486,7 +487,7 @@ int setup_callback_client(struct nfs4_client *clp)
486 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) 487 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
487 return -EINVAL; 488 return -EINVAL;
488 if (cb->cb_minorversion) { 489 if (cb->cb_minorversion) {
489 args.bc_xprt = clp->cl_cb_xprt; 490 args.bc_xprt = cb->cb_xprt;
490 args.protocol = XPRT_TRANSPORT_BC_TCP; 491 args.protocol = XPRT_TRANSPORT_BC_TCP;
491 } 492 }
492 /* Create RPC client */ 493 /* Create RPC client */
@@ -496,7 +497,7 @@ int setup_callback_client(struct nfs4_client *clp)
496 PTR_ERR(client)); 497 PTR_ERR(client));
497 return PTR_ERR(client); 498 return PTR_ERR(client);
498 } 499 }
499 cb->cb_client = client; 500 nfsd4_set_callback_client(clp, client);
500 return 0; 501 return 0;
501 502
502} 503}
@@ -514,8 +515,7 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
514 if (task->tk_status) 515 if (task->tk_status)
515 warn_no_callback_path(clp, task->tk_status); 516 warn_no_callback_path(clp, task->tk_status);
516 else 517 else
517 atomic_set(&clp->cl_cb_conn.cb_set, 1); 518 atomic_set(&clp->cl_cb_set, 1);
518 put_nfs4_client(clp);
519} 519}
520 520
521static const struct rpc_call_ops nfsd4_cb_probe_ops = { 521static const struct rpc_call_ops nfsd4_cb_probe_ops = {
@@ -537,7 +537,6 @@ int set_callback_cred(void)
537 537
538void do_probe_callback(struct nfs4_client *clp) 538void do_probe_callback(struct nfs4_client *clp)
539{ 539{
540 struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
541 struct rpc_message msg = { 540 struct rpc_message msg = {
542 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], 541 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
543 .rpc_argp = clp, 542 .rpc_argp = clp,
@@ -545,34 +544,27 @@ void do_probe_callback(struct nfs4_client *clp)
545 }; 544 };
546 int status; 545 int status;
547 546
548 status = rpc_call_async(cb->cb_client, &msg, 547 status = rpc_call_async(clp->cl_cb_client, &msg,
549 RPC_TASK_SOFT | RPC_TASK_SOFTCONN, 548 RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
550 &nfsd4_cb_probe_ops, (void *)clp); 549 &nfsd4_cb_probe_ops, (void *)clp);
551 if (status) { 550 if (status)
552 warn_no_callback_path(clp, status); 551 warn_no_callback_path(clp, status);
553 put_nfs4_client(clp);
554 }
555} 552}
556 553
557/* 554/*
558 * Set up the callback client and put a NFSPROC4_CB_NULL on the wire... 555 * Set up the callback client and put a NFSPROC4_CB_NULL on the wire...
559 */ 556 */
560void 557void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
561nfsd4_probe_callback(struct nfs4_client *clp)
562{ 558{
563 int status; 559 int status;
564 560
565 BUG_ON(atomic_read(&clp->cl_cb_conn.cb_set)); 561 BUG_ON(atomic_read(&clp->cl_cb_set));
566 562
567 status = setup_callback_client(clp); 563 status = setup_callback_client(clp, cb);
568 if (status) { 564 if (status) {
569 warn_no_callback_path(clp, status); 565 warn_no_callback_path(clp, status);
570 return; 566 return;
571 } 567 }
572
573 /* the task holds a reference to the nfs4_client struct */
574 atomic_inc(&clp->cl_count);
575
576 do_probe_callback(clp); 568 do_probe_callback(clp);
577} 569}
578 570
@@ -658,18 +650,32 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
658 } 650 }
659} 651}
660 652
653
661static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) 654static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
662{ 655{
663 struct nfs4_delegation *dp = calldata; 656 struct nfs4_delegation *dp = calldata;
664 struct nfs4_client *clp = dp->dl_client; 657 struct nfs4_client *clp = dp->dl_client;
658 struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
665 659
666 nfsd4_cb_done(task, calldata); 660 nfsd4_cb_done(task, calldata);
667 661
662 if (current_rpc_client == NULL) {
663 /* We're shutting down; give up. */
664 /* XXX: err, or is it ok just to fall through
665 * and rpc_restart_call? */
666 return;
667 }
668
668 switch (task->tk_status) { 669 switch (task->tk_status) {
669 case -EIO: 670 case -EIO:
670 /* Network partition? */ 671 /* Network partition? */
671 atomic_set(&clp->cl_cb_conn.cb_set, 0); 672 atomic_set(&clp->cl_cb_set, 0);
672 warn_no_callback_path(clp, task->tk_status); 673 warn_no_callback_path(clp, task->tk_status);
674 if (current_rpc_client != task->tk_client) {
675 /* queue a callback on the new connection: */
676 nfsd4_cb_recall(dp);
677 return;
678 }
673 case -EBADHANDLE: 679 case -EBADHANDLE:
674 case -NFS4ERR_BAD_STATEID: 680 case -NFS4ERR_BAD_STATEID:
675 /* Race: client probably got cb_recall 681 /* Race: client probably got cb_recall
@@ -677,7 +683,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
677 break; 683 break;
678 default: 684 default:
679 /* success, or error we can't handle */ 685 /* success, or error we can't handle */
680 goto done; 686 return;
681 } 687 }
682 if (dp->dl_retries--) { 688 if (dp->dl_retries--) {
683 rpc_delay(task, 2*HZ); 689 rpc_delay(task, 2*HZ);
@@ -685,20 +691,16 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
685 rpc_restart_call(task); 691 rpc_restart_call(task);
686 return; 692 return;
687 } else { 693 } else {
688 atomic_set(&clp->cl_cb_conn.cb_set, 0); 694 atomic_set(&clp->cl_cb_set, 0);
689 warn_no_callback_path(clp, task->tk_status); 695 warn_no_callback_path(clp, task->tk_status);
690 } 696 }
691done:
692 kfree(task->tk_msg.rpc_argp);
693} 697}
694 698
695static void nfsd4_cb_recall_release(void *calldata) 699static void nfsd4_cb_recall_release(void *calldata)
696{ 700{
697 struct nfs4_delegation *dp = calldata; 701 struct nfs4_delegation *dp = calldata;
698 struct nfs4_client *clp = dp->dl_client;
699 702
700 nfs4_put_delegation(dp); 703 nfs4_put_delegation(dp);
701 put_nfs4_client(clp);
702} 704}
703 705
704static const struct rpc_call_ops nfsd4_cb_recall_ops = { 706static const struct rpc_call_ops nfsd4_cb_recall_ops = {
@@ -707,33 +709,75 @@ static const struct rpc_call_ops nfsd4_cb_recall_ops = {
707 .rpc_release = nfsd4_cb_recall_release, 709 .rpc_release = nfsd4_cb_recall_release,
708}; 710};
709 711
712static struct workqueue_struct *callback_wq;
713
714int nfsd4_create_callback_queue(void)
715{
716 callback_wq = create_singlethread_workqueue("nfsd4_callbacks");
717 if (!callback_wq)
718 return -ENOMEM;
719 return 0;
720}
721
722void nfsd4_destroy_callback_queue(void)
723{
724 destroy_workqueue(callback_wq);
725}
726
727/* must be called under the state lock */
728void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new)
729{
730 struct rpc_clnt *old = clp->cl_cb_client;
731
732 clp->cl_cb_client = new;
733 /*
734 * After this, any work that saw the old value of cl_cb_client will
735 * be gone:
736 */
737 flush_workqueue(callback_wq);
738 /* So we can safely shut it down: */
739 if (old)
740 rpc_shutdown_client(old);
741}
742
710/* 743/*
711 * called with dp->dl_count inc'ed. 744 * called with dp->dl_count inc'ed.
712 */ 745 */
713void 746static void _nfsd4_cb_recall(struct nfs4_delegation *dp)
714nfsd4_cb_recall(struct nfs4_delegation *dp)
715{ 747{
716 struct nfs4_client *clp = dp->dl_client; 748 struct nfs4_client *clp = dp->dl_client;
717 struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client; 749 struct rpc_clnt *clnt = clp->cl_cb_client;
718 struct nfs4_rpc_args *args; 750 struct nfs4_rpc_args *args = &dp->dl_recall.cb_args;
719 struct rpc_message msg = { 751 struct rpc_message msg = {
720 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], 752 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
721 .rpc_cred = callback_cred 753 .rpc_cred = callback_cred
722 }; 754 };
723 int status = -ENOMEM; 755 int status;
756
757 if (clnt == NULL)
758 return; /* Client is shutting down; give up. */
724 759
725 args = kzalloc(sizeof(*args), GFP_KERNEL);
726 if (!args)
727 goto out;
728 args->args_op = dp; 760 args->args_op = dp;
729 msg.rpc_argp = args; 761 msg.rpc_argp = args;
730 dp->dl_retries = 1; 762 dp->dl_retries = 1;
731 status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT, 763 status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT,
732 &nfsd4_cb_recall_ops, dp); 764 &nfsd4_cb_recall_ops, dp);
733out: 765 if (status)
734 if (status) {
735 kfree(args);
736 put_nfs4_client(clp);
737 nfs4_put_delegation(dp); 766 nfs4_put_delegation(dp);
738 } 767}
768
769void nfsd4_do_callback_rpc(struct work_struct *w)
770{
771 /* XXX: for now, just send off delegation recall. */
772 /* In future, generalize to handle any sort of callback. */
773 struct nfsd4_callback *c = container_of(w, struct nfsd4_callback, cb_work);
774 struct nfs4_delegation *dp = container_of(c, struct nfs4_delegation, dl_recall);
775
776 _nfsd4_cb_recall(dp);
777}
778
779
780void nfsd4_cb_recall(struct nfs4_delegation *dp)
781{
782 queue_work(callback_wq, &dp->dl_recall.cb_work);
739} 783}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 2ab9e8501bfe..59ec449b0c7f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -969,20 +969,36 @@ static struct nfsd4_operation nfsd4_ops[];
969static const char *nfsd4_op_name(unsigned opnum); 969static const char *nfsd4_op_name(unsigned opnum);
970 970
971/* 971/*
972 * Enforce NFSv4.1 COMPOUND ordering rules. 972 * Enforce NFSv4.1 COMPOUND ordering rules:
973 * 973 *
974 * TODO: 974 * Also note, enforced elsewhere:
975 * - enforce NFS4ERR_NOT_ONLY_OP, 975 * - SEQUENCE other than as first op results in
976 * - DESTROY_SESSION MUST be the final operation in the COMPOUND request. 976 * NFS4ERR_SEQUENCE_POS. (Enforced in nfsd4_sequence().)
977 * - BIND_CONN_TO_SESSION must be the only op in its compound
978 * (Will be enforced in nfsd4_bind_conn_to_session().)
979 * - DESTROY_SESSION must be the final operation in a compound, if
980 * sessionid's in SEQUENCE and DESTROY_SESSION are the same.
981 * (Enforced in nfsd4_destroy_session().)
977 */ 982 */
978static bool nfs41_op_ordering_ok(struct nfsd4_compoundargs *args) 983static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args)
979{ 984{
980 if (args->minorversion && args->opcnt > 0) { 985 struct nfsd4_op *op = &args->ops[0];
981 struct nfsd4_op *op = &args->ops[0]; 986
982 return (op->status == nfserr_op_illegal) || 987 /* These ordering requirements don't apply to NFSv4.0: */
983 (nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP); 988 if (args->minorversion == 0)
984 } 989 return nfs_ok;
985 return true; 990 /* This is weird, but OK, not our problem: */
991 if (args->opcnt == 0)
992 return nfs_ok;
993 if (op->status == nfserr_op_illegal)
994 return nfs_ok;
995 if (!(nfsd4_ops[op->opnum].op_flags & ALLOWED_AS_FIRST_OP))
996 return nfserr_op_not_in_session;
997 if (op->opnum == OP_SEQUENCE)
998 return nfs_ok;
999 if (args->opcnt != 1)
1000 return nfserr_not_only_op;
1001 return nfs_ok;
986} 1002}
987 1003
988/* 1004/*
@@ -1012,6 +1028,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1012 resp->rqstp = rqstp; 1028 resp->rqstp = rqstp;
1013 resp->cstate.minorversion = args->minorversion; 1029 resp->cstate.minorversion = args->minorversion;
1014 resp->cstate.replay_owner = NULL; 1030 resp->cstate.replay_owner = NULL;
1031 resp->cstate.session = NULL;
1015 fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); 1032 fh_init(&resp->cstate.current_fh, NFS4_FHSIZE);
1016 fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); 1033 fh_init(&resp->cstate.save_fh, NFS4_FHSIZE);
1017 /* Use the deferral mechanism only for NFSv4.0 compounds */ 1034 /* Use the deferral mechanism only for NFSv4.0 compounds */
@@ -1024,13 +1041,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1024 if (args->minorversion > nfsd_supported_minorversion) 1041 if (args->minorversion > nfsd_supported_minorversion)
1025 goto out; 1042 goto out;
1026 1043
1027 if (!nfs41_op_ordering_ok(args)) { 1044 status = nfs41_check_op_ordering(args);
1045 if (status) {
1028 op = &args->ops[0]; 1046 op = &args->ops[0];
1029 op->status = nfserr_sequence_pos; 1047 op->status = status;
1030 goto encode_op; 1048 goto encode_op;
1031 } 1049 }
1032 1050
1033 status = nfs_ok;
1034 while (!status && resp->opcnt < args->opcnt) { 1051 while (!status && resp->opcnt < args->opcnt) {
1035 op = &args->ops[resp->opcnt++]; 1052 op = &args->ops[resp->opcnt++];
1036 1053
@@ -1295,6 +1312,11 @@ static struct nfsd4_operation nfsd4_ops[] = {
1295 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, 1312 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
1296 .op_name = "OP_SEQUENCE", 1313 .op_name = "OP_SEQUENCE",
1297 }, 1314 },
1315 [OP_RECLAIM_COMPLETE] = {
1316 .op_func = (nfsd4op_func)nfsd4_reclaim_complete,
1317 .op_flags = ALLOWED_WITHOUT_FH,
1318 .op_name = "OP_RECLAIM_COMPLETE",
1319 },
1298}; 1320};
1299 1321
1300static const char *nfsd4_op_name(unsigned opnum) 1322static const char *nfsd4_op_name(unsigned opnum)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6a8fedaa4f55..12f7109720c2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -45,8 +45,8 @@
45#define NFSDDBG_FACILITY NFSDDBG_PROC 45#define NFSDDBG_FACILITY NFSDDBG_PROC
46 46
47/* Globals */ 47/* Globals */
48static time_t lease_time = 90; /* default lease time */ 48time_t nfsd4_lease = 90; /* default lease time */
49static time_t user_lease_time = 90; 49time_t nfsd4_grace = 90;
50static time_t boot_time; 50static time_t boot_time;
51static u32 current_ownerid = 1; 51static u32 current_ownerid = 1;
52static u32 current_fileid = 1; 52static u32 current_fileid = 1;
@@ -190,7 +190,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
190 dp->dl_vfs_file = stp->st_vfs_file; 190 dp->dl_vfs_file = stp->st_vfs_file;
191 dp->dl_type = type; 191 dp->dl_type = type;
192 dp->dl_ident = cb->cb_ident; 192 dp->dl_ident = cb->cb_ident;
193 dp->dl_stateid.si_boot = get_seconds(); 193 dp->dl_stateid.si_boot = boot_time;
194 dp->dl_stateid.si_stateownerid = current_delegid++; 194 dp->dl_stateid.si_stateownerid = current_delegid++;
195 dp->dl_stateid.si_fileid = 0; 195 dp->dl_stateid.si_fileid = 0;
196 dp->dl_stateid.si_generation = 0; 196 dp->dl_stateid.si_generation = 0;
@@ -199,6 +199,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
199 atomic_set(&dp->dl_count, 1); 199 atomic_set(&dp->dl_count, 1);
200 list_add(&dp->dl_perfile, &fp->fi_delegations); 200 list_add(&dp->dl_perfile, &fp->fi_delegations);
201 list_add(&dp->dl_perclnt, &clp->cl_delegations); 201 list_add(&dp->dl_perclnt, &clp->cl_delegations);
202 INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc);
202 return dp; 203 return dp;
203} 204}
204 205
@@ -249,6 +250,9 @@ unhash_delegation(struct nfs4_delegation *dp)
249 * SETCLIENTID state 250 * SETCLIENTID state
250 */ 251 */
251 252
253/* client_lock protects the client lru list and session hash table */
254static DEFINE_SPINLOCK(client_lock);
255
252/* Hash tables for nfs4_clientid state */ 256/* Hash tables for nfs4_clientid state */
253#define CLIENT_HASH_BITS 4 257#define CLIENT_HASH_BITS 4
254#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) 258#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS)
@@ -367,7 +371,6 @@ static void release_openowner(struct nfs4_stateowner *sop)
367 nfs4_put_stateowner(sop); 371 nfs4_put_stateowner(sop);
368} 372}
369 373
370static DEFINE_SPINLOCK(sessionid_lock);
371#define SESSION_HASH_SIZE 512 374#define SESSION_HASH_SIZE 512
372static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE]; 375static struct list_head sessionid_hashtbl[SESSION_HASH_SIZE];
373 376
@@ -565,10 +568,10 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
565 568
566 new->se_flags = cses->flags; 569 new->se_flags = cses->flags;
567 kref_init(&new->se_ref); 570 kref_init(&new->se_ref);
568 spin_lock(&sessionid_lock); 571 spin_lock(&client_lock);
569 list_add(&new->se_hash, &sessionid_hashtbl[idx]); 572 list_add(&new->se_hash, &sessionid_hashtbl[idx]);
570 list_add(&new->se_perclnt, &clp->cl_sessions); 573 list_add(&new->se_perclnt, &clp->cl_sessions);
571 spin_unlock(&sessionid_lock); 574 spin_unlock(&client_lock);
572 575
573 status = nfs_ok; 576 status = nfs_ok;
574out: 577out:
@@ -579,7 +582,7 @@ out_free:
579 goto out; 582 goto out;
580} 583}
581 584
582/* caller must hold sessionid_lock */ 585/* caller must hold client_lock */
583static struct nfsd4_session * 586static struct nfsd4_session *
584find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid) 587find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid)
585{ 588{
@@ -602,7 +605,7 @@ find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid)
602 return NULL; 605 return NULL;
603} 606}
604 607
605/* caller must hold sessionid_lock */ 608/* caller must hold client_lock */
606static void 609static void
607unhash_session(struct nfsd4_session *ses) 610unhash_session(struct nfsd4_session *ses)
608{ 611{
@@ -610,15 +613,6 @@ unhash_session(struct nfsd4_session *ses)
610 list_del(&ses->se_perclnt); 613 list_del(&ses->se_perclnt);
611} 614}
612 615
613static void
614release_session(struct nfsd4_session *ses)
615{
616 spin_lock(&sessionid_lock);
617 unhash_session(ses);
618 spin_unlock(&sessionid_lock);
619 nfsd4_put_session(ses);
620}
621
622void 616void
623free_session(struct kref *kref) 617free_session(struct kref *kref)
624{ 618{
@@ -634,9 +628,18 @@ free_session(struct kref *kref)
634 kfree(ses); 628 kfree(ses);
635} 629}
636 630
631/* must be called under the client_lock */
637static inline void 632static inline void
638renew_client(struct nfs4_client *clp) 633renew_client_locked(struct nfs4_client *clp)
639{ 634{
635 if (is_client_expired(clp)) {
636 dprintk("%s: client (clientid %08x/%08x) already expired\n",
637 __func__,
638 clp->cl_clientid.cl_boot,
639 clp->cl_clientid.cl_id);
640 return;
641 }
642
640 /* 643 /*
641 * Move client to the end to the LRU list. 644 * Move client to the end to the LRU list.
642 */ 645 */
@@ -647,6 +650,14 @@ renew_client(struct nfs4_client *clp)
647 clp->cl_time = get_seconds(); 650 clp->cl_time = get_seconds();
648} 651}
649 652
653static inline void
654renew_client(struct nfs4_client *clp)
655{
656 spin_lock(&client_lock);
657 renew_client_locked(clp);
658 spin_unlock(&client_lock);
659}
660
650/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ 661/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
651static int 662static int
652STALE_CLIENTID(clientid_t *clid) 663STALE_CLIENTID(clientid_t *clid)
@@ -680,27 +691,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
680 return clp; 691 return clp;
681} 692}
682 693
683static void
684shutdown_callback_client(struct nfs4_client *clp)
685{
686 struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client;
687
688 if (clnt) {
689 /*
690 * Callback threads take a reference on the client, so there
691 * should be no outstanding callbacks at this point.
692 */
693 clp->cl_cb_conn.cb_client = NULL;
694 rpc_shutdown_client(clnt);
695 }
696}
697
698static inline void 694static inline void
699free_client(struct nfs4_client *clp) 695free_client(struct nfs4_client *clp)
700{ 696{
701 shutdown_callback_client(clp);
702 if (clp->cl_cb_xprt)
703 svc_xprt_put(clp->cl_cb_xprt);
704 if (clp->cl_cred.cr_group_info) 697 if (clp->cl_cred.cr_group_info)
705 put_group_info(clp->cl_cred.cr_group_info); 698 put_group_info(clp->cl_cred.cr_group_info);
706 kfree(clp->cl_principal); 699 kfree(clp->cl_principal);
@@ -709,10 +702,34 @@ free_client(struct nfs4_client *clp)
709} 702}
710 703
711void 704void
712put_nfs4_client(struct nfs4_client *clp) 705release_session_client(struct nfsd4_session *session)
713{ 706{
714 if (atomic_dec_and_test(&clp->cl_count)) 707 struct nfs4_client *clp = session->se_client;
708
709 if (!atomic_dec_and_lock(&clp->cl_refcount, &client_lock))
710 return;
711 if (is_client_expired(clp)) {
715 free_client(clp); 712 free_client(clp);
713 session->se_client = NULL;
714 } else
715 renew_client_locked(clp);
716 spin_unlock(&client_lock);
717 nfsd4_put_session(session);
718}
719
720/* must be called under the client_lock */
721static inline void
722unhash_client_locked(struct nfs4_client *clp)
723{
724 mark_client_expired(clp);
725 list_del(&clp->cl_lru);
726 while (!list_empty(&clp->cl_sessions)) {
727 struct nfsd4_session *ses;
728 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
729 se_perclnt);
730 unhash_session(ses);
731 nfsd4_put_session(ses);
732 }
716} 733}
717 734
718static void 735static void
@@ -722,9 +739,6 @@ expire_client(struct nfs4_client *clp)
722 struct nfs4_delegation *dp; 739 struct nfs4_delegation *dp;
723 struct list_head reaplist; 740 struct list_head reaplist;
724 741
725 dprintk("NFSD: expire_client cl_count %d\n",
726 atomic_read(&clp->cl_count));
727
728 INIT_LIST_HEAD(&reaplist); 742 INIT_LIST_HEAD(&reaplist);
729 spin_lock(&recall_lock); 743 spin_lock(&recall_lock);
730 while (!list_empty(&clp->cl_delegations)) { 744 while (!list_empty(&clp->cl_delegations)) {
@@ -740,20 +754,20 @@ expire_client(struct nfs4_client *clp)
740 list_del_init(&dp->dl_recall_lru); 754 list_del_init(&dp->dl_recall_lru);
741 unhash_delegation(dp); 755 unhash_delegation(dp);
742 } 756 }
743 list_del(&clp->cl_idhash);
744 list_del(&clp->cl_strhash);
745 list_del(&clp->cl_lru);
746 while (!list_empty(&clp->cl_openowners)) { 757 while (!list_empty(&clp->cl_openowners)) {
747 sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); 758 sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
748 release_openowner(sop); 759 release_openowner(sop);
749 } 760 }
750 while (!list_empty(&clp->cl_sessions)) { 761 nfsd4_set_callback_client(clp, NULL);
751 struct nfsd4_session *ses; 762 if (clp->cl_cb_conn.cb_xprt)
752 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, 763 svc_xprt_put(clp->cl_cb_conn.cb_xprt);
753 se_perclnt); 764 list_del(&clp->cl_idhash);
754 release_session(ses); 765 list_del(&clp->cl_strhash);
755 } 766 spin_lock(&client_lock);
756 put_nfs4_client(clp); 767 unhash_client_locked(clp);
768 if (atomic_read(&clp->cl_refcount) == 0)
769 free_client(clp);
770 spin_unlock(&client_lock);
757} 771}
758 772
759static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) 773static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
@@ -839,14 +853,15 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
839 } 853 }
840 854
841 memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); 855 memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
842 atomic_set(&clp->cl_count, 1); 856 atomic_set(&clp->cl_refcount, 0);
843 atomic_set(&clp->cl_cb_conn.cb_set, 0); 857 atomic_set(&clp->cl_cb_set, 0);
844 INIT_LIST_HEAD(&clp->cl_idhash); 858 INIT_LIST_HEAD(&clp->cl_idhash);
845 INIT_LIST_HEAD(&clp->cl_strhash); 859 INIT_LIST_HEAD(&clp->cl_strhash);
846 INIT_LIST_HEAD(&clp->cl_openowners); 860 INIT_LIST_HEAD(&clp->cl_openowners);
847 INIT_LIST_HEAD(&clp->cl_delegations); 861 INIT_LIST_HEAD(&clp->cl_delegations);
848 INIT_LIST_HEAD(&clp->cl_sessions); 862 INIT_LIST_HEAD(&clp->cl_sessions);
849 INIT_LIST_HEAD(&clp->cl_lru); 863 INIT_LIST_HEAD(&clp->cl_lru);
864 clp->cl_time = get_seconds();
850 clear_bit(0, &clp->cl_cb_slot_busy); 865 clear_bit(0, &clp->cl_cb_slot_busy);
851 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); 866 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
852 copy_verf(clp, verf); 867 copy_verf(clp, verf);
@@ -877,8 +892,7 @@ add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
877 list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]); 892 list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]);
878 idhashval = clientid_hashval(clp->cl_clientid.cl_id); 893 idhashval = clientid_hashval(clp->cl_clientid.cl_id);
879 list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]); 894 list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]);
880 list_add_tail(&clp->cl_lru, &client_lru); 895 renew_client(clp);
881 clp->cl_time = get_seconds();
882} 896}
883 897
884static void 898static void
@@ -888,10 +902,9 @@ move_to_confirmed(struct nfs4_client *clp)
888 unsigned int strhashval; 902 unsigned int strhashval;
889 903
890 dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); 904 dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
891 list_del_init(&clp->cl_strhash);
892 list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]); 905 list_move(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
893 strhashval = clientstr_hashval(clp->cl_recdir); 906 strhashval = clientstr_hashval(clp->cl_recdir);
894 list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]); 907 list_move(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
895 renew_client(clp); 908 renew_client(clp);
896} 909}
897 910
@@ -1327,15 +1340,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1327 cs_slot->sl_seqid++; /* from 0 to 1 */ 1340 cs_slot->sl_seqid++; /* from 0 to 1 */
1328 move_to_confirmed(unconf); 1341 move_to_confirmed(unconf);
1329 1342
1330 /*
1331 * We do not support RDMA or persistent sessions
1332 */
1333 cr_ses->flags &= ~SESSION4_PERSIST;
1334 cr_ses->flags &= ~SESSION4_RDMA;
1335
1336 if (cr_ses->flags & SESSION4_BACK_CHAN) { 1343 if (cr_ses->flags & SESSION4_BACK_CHAN) {
1337 unconf->cl_cb_xprt = rqstp->rq_xprt; 1344 unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
1338 svc_xprt_get(unconf->cl_cb_xprt); 1345 svc_xprt_get(rqstp->rq_xprt);
1339 rpc_copy_addr( 1346 rpc_copy_addr(
1340 (struct sockaddr *)&unconf->cl_cb_conn.cb_addr, 1347 (struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
1341 sa); 1348 sa);
@@ -1344,7 +1351,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1344 cstate->minorversion; 1351 cstate->minorversion;
1345 unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog; 1352 unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
1346 unconf->cl_cb_seq_nr = 1; 1353 unconf->cl_cb_seq_nr = 1;
1347 nfsd4_probe_callback(unconf); 1354 nfsd4_probe_callback(unconf, &unconf->cl_cb_conn);
1348 } 1355 }
1349 conf = unconf; 1356 conf = unconf;
1350 } else { 1357 } else {
@@ -1352,6 +1359,12 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1352 goto out; 1359 goto out;
1353 } 1360 }
1354 1361
1362 /*
1363 * We do not support RDMA or persistent sessions
1364 */
1365 cr_ses->flags &= ~SESSION4_PERSIST;
1366 cr_ses->flags &= ~SESSION4_RDMA;
1367
1355 status = alloc_init_session(rqstp, conf, cr_ses); 1368 status = alloc_init_session(rqstp, conf, cr_ses);
1356 if (status) 1369 if (status)
1357 goto out; 1370 goto out;
@@ -1369,6 +1382,21 @@ out:
1369 return status; 1382 return status;
1370} 1383}
1371 1384
1385static bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
1386{
1387 struct nfsd4_compoundres *resp = rqstp->rq_resp;
1388 struct nfsd4_compoundargs *argp = rqstp->rq_argp;
1389
1390 return argp->opcnt == resp->opcnt;
1391}
1392
1393static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid)
1394{
1395 if (!session)
1396 return 0;
1397 return !memcmp(sid, &session->se_sessionid, sizeof(*sid));
1398}
1399
1372__be32 1400__be32
1373nfsd4_destroy_session(struct svc_rqst *r, 1401nfsd4_destroy_session(struct svc_rqst *r,
1374 struct nfsd4_compound_state *cstate, 1402 struct nfsd4_compound_state *cstate,
@@ -1384,19 +1412,25 @@ nfsd4_destroy_session(struct svc_rqst *r,
1384 * - Do we need to clear any callback info from previous session? 1412 * - Do we need to clear any callback info from previous session?
1385 */ 1413 */
1386 1414
1415 if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) {
1416 if (!nfsd4_last_compound_op(r))
1417 return nfserr_not_only_op;
1418 }
1387 dump_sessionid(__func__, &sessionid->sessionid); 1419 dump_sessionid(__func__, &sessionid->sessionid);
1388 spin_lock(&sessionid_lock); 1420 spin_lock(&client_lock);
1389 ses = find_in_sessionid_hashtbl(&sessionid->sessionid); 1421 ses = find_in_sessionid_hashtbl(&sessionid->sessionid);
1390 if (!ses) { 1422 if (!ses) {
1391 spin_unlock(&sessionid_lock); 1423 spin_unlock(&client_lock);
1392 goto out; 1424 goto out;
1393 } 1425 }
1394 1426
1395 unhash_session(ses); 1427 unhash_session(ses);
1396 spin_unlock(&sessionid_lock); 1428 spin_unlock(&client_lock);
1397 1429
1430 nfs4_lock_state();
1398 /* wait for callbacks */ 1431 /* wait for callbacks */
1399 shutdown_callback_client(ses->se_client); 1432 nfsd4_set_callback_client(ses->se_client, NULL);
1433 nfs4_unlock_state();
1400 nfsd4_put_session(ses); 1434 nfsd4_put_session(ses);
1401 status = nfs_ok; 1435 status = nfs_ok;
1402out: 1436out:
@@ -1417,7 +1451,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1417 if (resp->opcnt != 1) 1451 if (resp->opcnt != 1)
1418 return nfserr_sequence_pos; 1452 return nfserr_sequence_pos;
1419 1453
1420 spin_lock(&sessionid_lock); 1454 spin_lock(&client_lock);
1421 status = nfserr_badsession; 1455 status = nfserr_badsession;
1422 session = find_in_sessionid_hashtbl(&seq->sessionid); 1456 session = find_in_sessionid_hashtbl(&seq->sessionid);
1423 if (!session) 1457 if (!session)
@@ -1456,23 +1490,47 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1456 cstate->slot = slot; 1490 cstate->slot = slot;
1457 cstate->session = session; 1491 cstate->session = session;
1458 1492
1459 /* Hold a session reference until done processing the compound:
1460 * nfsd4_put_session called only if the cstate slot is set.
1461 */
1462 nfsd4_get_session(session);
1463out: 1493out:
1464 spin_unlock(&sessionid_lock); 1494 /* Hold a session reference until done processing the compound. */
1465 /* Renew the clientid on success and on replay */
1466 if (cstate->session) { 1495 if (cstate->session) {
1467 nfs4_lock_state(); 1496 nfsd4_get_session(cstate->session);
1468 renew_client(session->se_client); 1497 atomic_inc(&session->se_client->cl_refcount);
1469 nfs4_unlock_state();
1470 } 1498 }
1499 spin_unlock(&client_lock);
1471 dprintk("%s: return %d\n", __func__, ntohl(status)); 1500 dprintk("%s: return %d\n", __func__, ntohl(status));
1472 return status; 1501 return status;
1473} 1502}
1474 1503
1475__be32 1504__be32
1505nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc)
1506{
1507 if (rc->rca_one_fs) {
1508 if (!cstate->current_fh.fh_dentry)
1509 return nfserr_nofilehandle;
1510 /*
1511 * We don't take advantage of the rca_one_fs case.
1512 * That's OK, it's optional, we can safely ignore it.
1513 */
1514 return nfs_ok;
1515 }
1516 nfs4_lock_state();
1517 if (is_client_expired(cstate->session->se_client)) {
1518 nfs4_unlock_state();
1519 /*
1520 * The following error isn't really legal.
1521 * But we only get here if the client just explicitly
1522 * destroyed the client. Surely it no longer cares what
1523 * error it gets back on an operation for the dead
1524 * client.
1525 */
1526 return nfserr_stale_clientid;
1527 }
1528 nfsd4_create_clid_dir(cstate->session->se_client);
1529 nfs4_unlock_state();
1530 return nfs_ok;
1531}
1532
1533__be32
1476nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 1534nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1477 struct nfsd4_setclientid *setclid) 1535 struct nfsd4_setclientid *setclid)
1478{ 1536{
@@ -1631,9 +1689,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1631 if (!same_creds(&conf->cl_cred, &unconf->cl_cred)) 1689 if (!same_creds(&conf->cl_cred, &unconf->cl_cred))
1632 status = nfserr_clid_inuse; 1690 status = nfserr_clid_inuse;
1633 else { 1691 else {
1634 /* XXX: We just turn off callbacks until we can handle 1692 atomic_set(&conf->cl_cb_set, 0);
1635 * change request correctly. */ 1693 nfsd4_probe_callback(conf, &unconf->cl_cb_conn);
1636 atomic_set(&conf->cl_cb_conn.cb_set, 0);
1637 expire_client(unconf); 1694 expire_client(unconf);
1638 status = nfs_ok; 1695 status = nfs_ok;
1639 1696
@@ -1667,7 +1724,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1667 } 1724 }
1668 move_to_confirmed(unconf); 1725 move_to_confirmed(unconf);
1669 conf = unconf; 1726 conf = unconf;
1670 nfsd4_probe_callback(conf); 1727 nfsd4_probe_callback(conf, &conf->cl_cb_conn);
1671 status = nfs_ok; 1728 status = nfs_ok;
1672 } 1729 }
1673 } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) 1730 } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
@@ -1700,12 +1757,12 @@ alloc_init_file(struct inode *ino)
1700 INIT_LIST_HEAD(&fp->fi_hash); 1757 INIT_LIST_HEAD(&fp->fi_hash);
1701 INIT_LIST_HEAD(&fp->fi_stateids); 1758 INIT_LIST_HEAD(&fp->fi_stateids);
1702 INIT_LIST_HEAD(&fp->fi_delegations); 1759 INIT_LIST_HEAD(&fp->fi_delegations);
1703 spin_lock(&recall_lock);
1704 list_add(&fp->fi_hash, &file_hashtbl[hashval]);
1705 spin_unlock(&recall_lock);
1706 fp->fi_inode = igrab(ino); 1760 fp->fi_inode = igrab(ino);
1707 fp->fi_id = current_fileid++; 1761 fp->fi_id = current_fileid++;
1708 fp->fi_had_conflict = false; 1762 fp->fi_had_conflict = false;
1763 spin_lock(&recall_lock);
1764 list_add(&fp->fi_hash, &file_hashtbl[hashval]);
1765 spin_unlock(&recall_lock);
1709 return fp; 1766 return fp;
1710 } 1767 }
1711 return NULL; 1768 return NULL;
@@ -1827,7 +1884,7 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
1827 stp->st_stateowner = sop; 1884 stp->st_stateowner = sop;
1828 get_nfs4_file(fp); 1885 get_nfs4_file(fp);
1829 stp->st_file = fp; 1886 stp->st_file = fp;
1830 stp->st_stateid.si_boot = get_seconds(); 1887 stp->st_stateid.si_boot = boot_time;
1831 stp->st_stateid.si_stateownerid = sop->so_id; 1888 stp->st_stateid.si_stateownerid = sop->so_id;
1832 stp->st_stateid.si_fileid = fp->fi_id; 1889 stp->st_stateid.si_fileid = fp->fi_id;
1833 stp->st_stateid.si_generation = 0; 1890 stp->st_stateid.si_generation = 0;
@@ -2028,7 +2085,6 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
2028 * lock) we know the server hasn't removed the lease yet, we know 2085 * lock) we know the server hasn't removed the lease yet, we know
2029 * it's safe to take a reference: */ 2086 * it's safe to take a reference: */
2030 atomic_inc(&dp->dl_count); 2087 atomic_inc(&dp->dl_count);
2031 atomic_inc(&dp->dl_client->cl_count);
2032 2088
2033 spin_lock(&recall_lock); 2089 spin_lock(&recall_lock);
2034 list_add_tail(&dp->dl_recall_lru, &del_recall_lru); 2090 list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
@@ -2347,7 +2403,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2347{ 2403{
2348 struct nfs4_delegation *dp; 2404 struct nfs4_delegation *dp;
2349 struct nfs4_stateowner *sop = stp->st_stateowner; 2405 struct nfs4_stateowner *sop = stp->st_stateowner;
2350 struct nfs4_cb_conn *cb = &sop->so_client->cl_cb_conn; 2406 int cb_up = atomic_read(&sop->so_client->cl_cb_set);
2351 struct file_lock fl, *flp = &fl; 2407 struct file_lock fl, *flp = &fl;
2352 int status, flag = 0; 2408 int status, flag = 0;
2353 2409
@@ -2355,7 +2411,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2355 open->op_recall = 0; 2411 open->op_recall = 0;
2356 switch (open->op_claim_type) { 2412 switch (open->op_claim_type) {
2357 case NFS4_OPEN_CLAIM_PREVIOUS: 2413 case NFS4_OPEN_CLAIM_PREVIOUS:
2358 if (!atomic_read(&cb->cb_set)) 2414 if (!cb_up)
2359 open->op_recall = 1; 2415 open->op_recall = 1;
2360 flag = open->op_delegate_type; 2416 flag = open->op_delegate_type;
2361 if (flag == NFS4_OPEN_DELEGATE_NONE) 2417 if (flag == NFS4_OPEN_DELEGATE_NONE)
@@ -2366,7 +2422,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
2366 * had the chance to reclaim theirs.... */ 2422 * had the chance to reclaim theirs.... */
2367 if (locks_in_grace()) 2423 if (locks_in_grace())
2368 goto out; 2424 goto out;
2369 if (!atomic_read(&cb->cb_set) || !sop->so_confirmed) 2425 if (!cb_up || !sop->so_confirmed)
2370 goto out; 2426 goto out;
2371 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) 2427 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
2372 flag = NFS4_OPEN_DELEGATE_WRITE; 2428 flag = NFS4_OPEN_DELEGATE_WRITE;
@@ -2483,10 +2539,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
2483 } 2539 }
2484 memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t)); 2540 memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t));
2485 2541
2486 if (nfsd4_has_session(&resp->cstate)) { 2542 if (nfsd4_has_session(&resp->cstate))
2487 open->op_stateowner->so_confirmed = 1; 2543 open->op_stateowner->so_confirmed = 1;
2488 nfsd4_create_clid_dir(open->op_stateowner->so_client);
2489 }
2490 2544
2491 /* 2545 /*
2492 * Attempt to hand out a delegation. No error return, because the 2546 * Attempt to hand out a delegation. No error return, because the
@@ -2537,7 +2591,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2537 renew_client(clp); 2591 renew_client(clp);
2538 status = nfserr_cb_path_down; 2592 status = nfserr_cb_path_down;
2539 if (!list_empty(&clp->cl_delegations) 2593 if (!list_empty(&clp->cl_delegations)
2540 && !atomic_read(&clp->cl_cb_conn.cb_set)) 2594 && !atomic_read(&clp->cl_cb_set))
2541 goto out; 2595 goto out;
2542 status = nfs_ok; 2596 status = nfs_ok;
2543out: 2597out:
@@ -2554,6 +2608,12 @@ nfsd4_end_grace(void)
2554 dprintk("NFSD: end of grace period\n"); 2608 dprintk("NFSD: end of grace period\n");
2555 nfsd4_recdir_purge_old(); 2609 nfsd4_recdir_purge_old();
2556 locks_end_grace(&nfsd4_manager); 2610 locks_end_grace(&nfsd4_manager);
2611 /*
2612 * Now that every NFSv4 client has had the chance to recover and
2613 * to see the (possibly new, possibly shorter) lease time, we
2614 * can safely set the next grace time to the current lease time:
2615 */
2616 nfsd4_grace = nfsd4_lease;
2557} 2617}
2558 2618
2559static time_t 2619static time_t
@@ -2563,15 +2623,17 @@ nfs4_laundromat(void)
2563 struct nfs4_stateowner *sop; 2623 struct nfs4_stateowner *sop;
2564 struct nfs4_delegation *dp; 2624 struct nfs4_delegation *dp;
2565 struct list_head *pos, *next, reaplist; 2625 struct list_head *pos, *next, reaplist;
2566 time_t cutoff = get_seconds() - NFSD_LEASE_TIME; 2626 time_t cutoff = get_seconds() - nfsd4_lease;
2567 time_t t, clientid_val = NFSD_LEASE_TIME; 2627 time_t t, clientid_val = nfsd4_lease;
2568 time_t u, test_val = NFSD_LEASE_TIME; 2628 time_t u, test_val = nfsd4_lease;
2569 2629
2570 nfs4_lock_state(); 2630 nfs4_lock_state();
2571 2631
2572 dprintk("NFSD: laundromat service - starting\n"); 2632 dprintk("NFSD: laundromat service - starting\n");
2573 if (locks_in_grace()) 2633 if (locks_in_grace())
2574 nfsd4_end_grace(); 2634 nfsd4_end_grace();
2635 INIT_LIST_HEAD(&reaplist);
2636 spin_lock(&client_lock);
2575 list_for_each_safe(pos, next, &client_lru) { 2637 list_for_each_safe(pos, next, &client_lru) {
2576 clp = list_entry(pos, struct nfs4_client, cl_lru); 2638 clp = list_entry(pos, struct nfs4_client, cl_lru);
2577 if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { 2639 if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
@@ -2580,12 +2642,22 @@ nfs4_laundromat(void)
2580 clientid_val = t; 2642 clientid_val = t;
2581 break; 2643 break;
2582 } 2644 }
2645 if (atomic_read(&clp->cl_refcount)) {
2646 dprintk("NFSD: client in use (clientid %08x)\n",
2647 clp->cl_clientid.cl_id);
2648 continue;
2649 }
2650 unhash_client_locked(clp);
2651 list_add(&clp->cl_lru, &reaplist);
2652 }
2653 spin_unlock(&client_lock);
2654 list_for_each_safe(pos, next, &reaplist) {
2655 clp = list_entry(pos, struct nfs4_client, cl_lru);
2583 dprintk("NFSD: purging unused client (clientid %08x)\n", 2656 dprintk("NFSD: purging unused client (clientid %08x)\n",
2584 clp->cl_clientid.cl_id); 2657 clp->cl_clientid.cl_id);
2585 nfsd4_remove_clid_dir(clp); 2658 nfsd4_remove_clid_dir(clp);
2586 expire_client(clp); 2659 expire_client(clp);
2587 } 2660 }
2588 INIT_LIST_HEAD(&reaplist);
2589 spin_lock(&recall_lock); 2661 spin_lock(&recall_lock);
2590 list_for_each_safe(pos, next, &del_recall_lru) { 2662 list_for_each_safe(pos, next, &del_recall_lru) {
2591 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); 2663 dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
@@ -2605,7 +2677,7 @@ nfs4_laundromat(void)
2605 list_del_init(&dp->dl_recall_lru); 2677 list_del_init(&dp->dl_recall_lru);
2606 unhash_delegation(dp); 2678 unhash_delegation(dp);
2607 } 2679 }
2608 test_val = NFSD_LEASE_TIME; 2680 test_val = nfsd4_lease;
2609 list_for_each_safe(pos, next, &close_lru) { 2681 list_for_each_safe(pos, next, &close_lru) {
2610 sop = list_entry(pos, struct nfs4_stateowner, so_close_lru); 2682 sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
2611 if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) { 2683 if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) {
@@ -2661,39 +2733,11 @@ nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp)
2661static int 2733static int
2662STALE_STATEID(stateid_t *stateid) 2734STALE_STATEID(stateid_t *stateid)
2663{ 2735{
2664 if (time_after((unsigned long)boot_time, 2736 if (stateid->si_boot == boot_time)
2665 (unsigned long)stateid->si_boot)) { 2737 return 0;
2666 dprintk("NFSD: stale stateid " STATEID_FMT "!\n", 2738 dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
2667 STATEID_VAL(stateid));
2668 return 1;
2669 }
2670 return 0;
2671}
2672
2673static int
2674EXPIRED_STATEID(stateid_t *stateid)
2675{
2676 if (time_before((unsigned long)boot_time,
2677 ((unsigned long)stateid->si_boot)) &&
2678 time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) {
2679 dprintk("NFSD: expired stateid " STATEID_FMT "!\n",
2680 STATEID_VAL(stateid));
2681 return 1;
2682 }
2683 return 0;
2684}
2685
2686static __be32
2687stateid_error_map(stateid_t *stateid)
2688{
2689 if (STALE_STATEID(stateid))
2690 return nfserr_stale_stateid;
2691 if (EXPIRED_STATEID(stateid))
2692 return nfserr_expired;
2693
2694 dprintk("NFSD: bad stateid " STATEID_FMT "!\n",
2695 STATEID_VAL(stateid)); 2739 STATEID_VAL(stateid));
2696 return nfserr_bad_stateid; 2740 return 1;
2697} 2741}
2698 2742
2699static inline int 2743static inline int
@@ -2817,10 +2861,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
2817 status = nfserr_bad_stateid; 2861 status = nfserr_bad_stateid;
2818 if (is_delegation_stateid(stateid)) { 2862 if (is_delegation_stateid(stateid)) {
2819 dp = find_delegation_stateid(ino, stateid); 2863 dp = find_delegation_stateid(ino, stateid);
2820 if (!dp) { 2864 if (!dp)
2821 status = stateid_error_map(stateid);
2822 goto out; 2865 goto out;
2823 }
2824 status = check_stateid_generation(stateid, &dp->dl_stateid, 2866 status = check_stateid_generation(stateid, &dp->dl_stateid,
2825 flags); 2867 flags);
2826 if (status) 2868 if (status)
@@ -2833,10 +2875,8 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
2833 *filpp = dp->dl_vfs_file; 2875 *filpp = dp->dl_vfs_file;
2834 } else { /* open or lock stateid */ 2876 } else { /* open or lock stateid */
2835 stp = find_stateid(stateid, flags); 2877 stp = find_stateid(stateid, flags);
2836 if (!stp) { 2878 if (!stp)
2837 status = stateid_error_map(stateid);
2838 goto out; 2879 goto out;
2839 }
2840 if (nfs4_check_fh(current_fh, stp)) 2880 if (nfs4_check_fh(current_fh, stp))
2841 goto out; 2881 goto out;
2842 if (!stp->st_stateowner->so_confirmed) 2882 if (!stp->st_stateowner->so_confirmed)
@@ -2908,7 +2948,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
2908 */ 2948 */
2909 sop = search_close_lru(stateid->si_stateownerid, flags); 2949 sop = search_close_lru(stateid->si_stateownerid, flags);
2910 if (sop == NULL) 2950 if (sop == NULL)
2911 return stateid_error_map(stateid); 2951 return nfserr_bad_stateid;
2912 *sopp = sop; 2952 *sopp = sop;
2913 goto check_replay; 2953 goto check_replay;
2914 } 2954 }
@@ -3175,10 +3215,8 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3175 if (!is_delegation_stateid(stateid)) 3215 if (!is_delegation_stateid(stateid))
3176 goto out; 3216 goto out;
3177 dp = find_delegation_stateid(inode, stateid); 3217 dp = find_delegation_stateid(inode, stateid);
3178 if (!dp) { 3218 if (!dp)
3179 status = stateid_error_map(stateid);
3180 goto out; 3219 goto out;
3181 }
3182 status = check_stateid_generation(stateid, &dp->dl_stateid, flags); 3220 status = check_stateid_generation(stateid, &dp->dl_stateid, flags);
3183 if (status) 3221 if (status)
3184 goto out; 3222 goto out;
@@ -3404,7 +3442,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc
3404 stp->st_stateowner = sop; 3442 stp->st_stateowner = sop;
3405 get_nfs4_file(fp); 3443 get_nfs4_file(fp);
3406 stp->st_file = fp; 3444 stp->st_file = fp;
3407 stp->st_stateid.si_boot = get_seconds(); 3445 stp->st_stateid.si_boot = boot_time;
3408 stp->st_stateid.si_stateownerid = sop->so_id; 3446 stp->st_stateid.si_stateownerid = sop->so_id;
3409 stp->st_stateid.si_fileid = fp->fi_id; 3447 stp->st_stateid.si_fileid = fp->fi_id;
3410 stp->st_stateid.si_generation = 0; 3448 stp->st_stateid.si_generation = 0;
@@ -3976,12 +4014,6 @@ nfsd4_load_reboot_recovery_data(void)
3976 printk("NFSD: Failure reading reboot recovery data\n"); 4014 printk("NFSD: Failure reading reboot recovery data\n");
3977} 4015}
3978 4016
3979unsigned long
3980get_nfs4_grace_period(void)
3981{
3982 return max(user_lease_time, lease_time) * HZ;
3983}
3984
3985/* 4017/*
3986 * Since the lifetime of a delegation isn't limited to that of an open, a 4018 * Since the lifetime of a delegation isn't limited to that of an open, a
3987 * client may quite reasonably hang on to a delegation as long as it has 4019 * client may quite reasonably hang on to a delegation as long as it has
@@ -4008,20 +4040,27 @@ set_max_delegations(void)
4008static int 4040static int
4009__nfs4_state_start(void) 4041__nfs4_state_start(void)
4010{ 4042{
4011 unsigned long grace_time; 4043 int ret;
4012 4044
4013 boot_time = get_seconds(); 4045 boot_time = get_seconds();
4014 grace_time = get_nfs4_grace_period();
4015 lease_time = user_lease_time;
4016 locks_start_grace(&nfsd4_manager); 4046 locks_start_grace(&nfsd4_manager);
4017 printk(KERN_INFO "NFSD: starting %ld-second grace period\n", 4047 printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
4018 grace_time/HZ); 4048 nfsd4_grace);
4049 ret = set_callback_cred();
4050 if (ret)
4051 return -ENOMEM;
4019 laundry_wq = create_singlethread_workqueue("nfsd4"); 4052 laundry_wq = create_singlethread_workqueue("nfsd4");
4020 if (laundry_wq == NULL) 4053 if (laundry_wq == NULL)
4021 return -ENOMEM; 4054 return -ENOMEM;
4022 queue_delayed_work(laundry_wq, &laundromat_work, grace_time); 4055 ret = nfsd4_create_callback_queue();
4056 if (ret)
4057 goto out_free_laundry;
4058 queue_delayed_work(laundry_wq, &laundromat_work, nfsd4_grace * HZ);
4023 set_max_delegations(); 4059 set_max_delegations();
4024 return set_callback_cred(); 4060 return 0;
4061out_free_laundry:
4062 destroy_workqueue(laundry_wq);
4063 return ret;
4025} 4064}
4026 4065
4027int 4066int
@@ -4039,12 +4078,6 @@ nfs4_state_start(void)
4039 return 0; 4078 return 0;
4040} 4079}
4041 4080
4042time_t
4043nfs4_lease_time(void)
4044{
4045 return lease_time;
4046}
4047
4048static void 4081static void
4049__nfs4_state_shutdown(void) 4082__nfs4_state_shutdown(void)
4050{ 4083{
@@ -4089,6 +4122,7 @@ nfs4_state_shutdown(void)
4089 nfs4_lock_state(); 4122 nfs4_lock_state();
4090 nfs4_release_reclaim(); 4123 nfs4_release_reclaim();
4091 __nfs4_state_shutdown(); 4124 __nfs4_state_shutdown();
4125 nfsd4_destroy_callback_queue();
4092 nfs4_unlock_state(); 4126 nfs4_unlock_state();
4093} 4127}
4094 4128
@@ -4128,21 +4162,3 @@ nfs4_recoverydir(void)
4128{ 4162{
4129 return user_recovery_dirname; 4163 return user_recovery_dirname;
4130} 4164}
4131
4132/*
4133 * Called when leasetime is changed.
4134 *
4135 * The only way the protocol gives us to handle on-the-fly lease changes is to
4136 * simulate a reboot. Instead of doing that, we just wait till the next time
4137 * we start to register any changes in lease time. If the administrator
4138 * really wants to change the lease time *now*, they can go ahead and bring
4139 * nfsd down and then back up again after changing the lease time.
4140 *
4141 * user_lease_time is protected by nfsd_mutex since it's only really accessed
4142 * when nfsd is starting
4143 */
4144void
4145nfs4_reset_lease(time_t leasetime)
4146{
4147 user_lease_time = leasetime;
4148}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 34ccf815ea8a..ac17a7080239 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1234,6 +1234,16 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
1234 DECODE_TAIL; 1234 DECODE_TAIL;
1235} 1235}
1236 1236
1237static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc)
1238{
1239 DECODE_HEAD;
1240
1241 READ_BUF(4);
1242 READ32(rc->rca_one_fs);
1243
1244 DECODE_TAIL;
1245}
1246
1237static __be32 1247static __be32
1238nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) 1248nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
1239{ 1249{
@@ -1346,7 +1356,7 @@ static nfsd4_dec nfsd41_dec_ops[] = {
1346 [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp, 1356 [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp,
1347 [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, 1357 [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
1348 [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp, 1358 [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp,
1349 [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_notsupp, 1359 [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete,
1350}; 1360};
1351 1361
1352struct nfsd4_minorversion_ops { 1362struct nfsd4_minorversion_ops {
@@ -1900,7 +1910,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1900 if (bmval0 & FATTR4_WORD0_LEASE_TIME) { 1910 if (bmval0 & FATTR4_WORD0_LEASE_TIME) {
1901 if ((buflen -= 4) < 0) 1911 if ((buflen -= 4) < 0)
1902 goto out_resource; 1912 goto out_resource;
1903 WRITE32(NFSD_LEASE_TIME); 1913 WRITE32(nfsd4_lease);
1904 } 1914 }
1905 if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { 1915 if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
1906 if ((buflen -= 4) < 0) 1916 if ((buflen -= 4) < 0)
@@ -3307,11 +3317,14 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
3307 iov = &rqstp->rq_res.head[0]; 3317 iov = &rqstp->rq_res.head[0];
3308 iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; 3318 iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base;
3309 BUG_ON(iov->iov_len > PAGE_SIZE); 3319 BUG_ON(iov->iov_len > PAGE_SIZE);
3310 if (nfsd4_has_session(cs) && cs->status != nfserr_replay_cache) { 3320 if (nfsd4_has_session(cs)) {
3311 nfsd4_store_cache_entry(resp); 3321 if (cs->status != nfserr_replay_cache) {
3312 dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); 3322 nfsd4_store_cache_entry(resp);
3313 resp->cstate.slot->sl_inuse = false; 3323 dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
3314 nfsd4_put_session(resp->cstate.session); 3324 cs->slot->sl_inuse = false;
3325 }
3326 /* Renew the clientid on success and on replay */
3327 release_session_client(cs->session);
3315 } 3328 }
3316 return 1; 3329 return 1;
3317} 3330}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index e3591073098f..bc3194ea01f5 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -46,6 +46,7 @@ enum {
46 */ 46 */
47#ifdef CONFIG_NFSD_V4 47#ifdef CONFIG_NFSD_V4
48 NFSD_Leasetime, 48 NFSD_Leasetime,
49 NFSD_Gracetime,
49 NFSD_RecoveryDir, 50 NFSD_RecoveryDir,
50#endif 51#endif
51}; 52};
@@ -70,6 +71,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size);
70static ssize_t write_maxblksize(struct file *file, char *buf, size_t size); 71static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
71#ifdef CONFIG_NFSD_V4 72#ifdef CONFIG_NFSD_V4
72static ssize_t write_leasetime(struct file *file, char *buf, size_t size); 73static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
74static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
73static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); 75static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
74#endif 76#endif
75 77
@@ -91,6 +93,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
91 [NFSD_MaxBlkSize] = write_maxblksize, 93 [NFSD_MaxBlkSize] = write_maxblksize,
92#ifdef CONFIG_NFSD_V4 94#ifdef CONFIG_NFSD_V4
93 [NFSD_Leasetime] = write_leasetime, 95 [NFSD_Leasetime] = write_leasetime,
96 [NFSD_Gracetime] = write_gracetime,
94 [NFSD_RecoveryDir] = write_recoverydir, 97 [NFSD_RecoveryDir] = write_recoverydir,
95#endif 98#endif
96}; 99};
@@ -1204,29 +1207,45 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
1204} 1207}
1205 1208
1206#ifdef CONFIG_NFSD_V4 1209#ifdef CONFIG_NFSD_V4
1207extern time_t nfs4_leasetime(void); 1210static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
1208
1209static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
1210{ 1211{
1211 /* if size > 10 seconds, call
1212 * nfs4_reset_lease() then write out the new lease (seconds) as reply
1213 */
1214 char *mesg = buf; 1212 char *mesg = buf;
1215 int rv, lease; 1213 int rv, i;
1216 1214
1217 if (size > 0) { 1215 if (size > 0) {
1218 if (nfsd_serv) 1216 if (nfsd_serv)
1219 return -EBUSY; 1217 return -EBUSY;
1220 rv = get_int(&mesg, &lease); 1218 rv = get_int(&mesg, &i);
1221 if (rv) 1219 if (rv)
1222 return rv; 1220 return rv;
1223 if (lease < 10 || lease > 3600) 1221 /*
1222 * Some sanity checking. We don't have a reason for
1223 * these particular numbers, but problems with the
1224 * extremes are:
1225 * - Too short: the briefest network outage may
1226 * cause clients to lose all their locks. Also,
1227 * the frequent polling may be wasteful.
1228 * - Too long: do you really want reboot recovery
1229 * to take more than an hour? Or to make other
1230 * clients wait an hour before being able to
1231 * revoke a dead client's locks?
1232 */
1233 if (i < 10 || i > 3600)
1224 return -EINVAL; 1234 return -EINVAL;
1225 nfs4_reset_lease(lease); 1235 *time = i;
1226 } 1236 }
1227 1237
1228 return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", 1238 return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time);
1229 nfs4_lease_time()); 1239}
1240
1241static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time)
1242{
1243 ssize_t rv;
1244
1245 mutex_lock(&nfsd_mutex);
1246 rv = __nfsd4_write_time(file, buf, size, time);
1247 mutex_unlock(&nfsd_mutex);
1248 return rv;
1230} 1249}
1231 1250
1232/** 1251/**
@@ -1252,12 +1271,22 @@ static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
1252 */ 1271 */
1253static ssize_t write_leasetime(struct file *file, char *buf, size_t size) 1272static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
1254{ 1273{
1255 ssize_t rv; 1274 return nfsd4_write_time(file, buf, size, &nfsd4_lease);
1275}
1256 1276
1257 mutex_lock(&nfsd_mutex); 1277/**
1258 rv = __write_leasetime(file, buf, size); 1278 * write_gracetime - Set or report current NFSv4 grace period time
1259 mutex_unlock(&nfsd_mutex); 1279 *
1260 return rv; 1280 * As above, but sets the time of the NFSv4 grace period.
1281 *
1282 * Note this should never be set to less than the *previous*
1283 * lease-period time, but we don't try to enforce this. (In the common
1284 * case (a new boot), we don't know what the previous lease time was
1285 * anyway.)
1286 */
1287static ssize_t write_gracetime(struct file *file, char *buf, size_t size)
1288{
1289 return nfsd4_write_time(file, buf, size, &nfsd4_grace);
1261} 1290}
1262 1291
1263extern char *nfs4_recoverydir(void); 1292extern char *nfs4_recoverydir(void);
@@ -1351,6 +1380,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1351 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, 1380 [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
1352#ifdef CONFIG_NFSD_V4 1381#ifdef CONFIG_NFSD_V4
1353 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, 1382 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
1383 [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
1354 [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, 1384 [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
1355#endif 1385#endif
1356 /* last one */ {""} 1386 /* last one */ {""}
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index e942a1aaac92..72377761270e 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -82,7 +82,6 @@ int nfs4_state_init(void);
82void nfsd4_free_slabs(void); 82void nfsd4_free_slabs(void);
83int nfs4_state_start(void); 83int nfs4_state_start(void);
84void nfs4_state_shutdown(void); 84void nfs4_state_shutdown(void);
85time_t nfs4_lease_time(void);
86void nfs4_reset_lease(time_t leasetime); 85void nfs4_reset_lease(time_t leasetime);
87int nfs4_reset_recoverydir(char *recdir); 86int nfs4_reset_recoverydir(char *recdir);
88#else 87#else
@@ -90,7 +89,6 @@ static inline int nfs4_state_init(void) { return 0; }
90static inline void nfsd4_free_slabs(void) { } 89static inline void nfsd4_free_slabs(void) { }
91static inline int nfs4_state_start(void) { return 0; } 90static inline int nfs4_state_start(void) { return 0; }
92static inline void nfs4_state_shutdown(void) { } 91static inline void nfs4_state_shutdown(void) { }
93static inline time_t nfs4_lease_time(void) { return 0; }
94static inline void nfs4_reset_lease(time_t leasetime) { } 92static inline void nfs4_reset_lease(time_t leasetime) { }
95static inline int nfs4_reset_recoverydir(char *recdir) { return 0; } 93static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
96#endif 94#endif
@@ -229,6 +227,9 @@ extern struct timeval nfssvc_boot;
229 227
230#ifdef CONFIG_NFSD_V4 228#ifdef CONFIG_NFSD_V4
231 229
230extern time_t nfsd4_lease;
231extern time_t nfsd4_grace;
232
232/* before processing a COMPOUND operation, we have to check that there 233/* before processing a COMPOUND operation, we have to check that there
233 * is enough space in the buffer for XDR encode to succeed. otherwise, 234 * is enough space in the buffer for XDR encode to succeed. otherwise,
234 * we might process an operation with side effects, and be unable to 235 * we might process an operation with side effects, and be unable to
@@ -247,7 +248,6 @@ extern struct timeval nfssvc_boot;
247#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */ 248#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */
248#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */ 249#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */
249 250
250#define NFSD_LEASE_TIME (nfs4_lease_time())
251#define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */ 251#define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */
252 252
253/* 253/*
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 171699eb07c8..06b2a26edfe0 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -120,7 +120,7 @@ u32 nfsd_supported_minorversion;
120int nfsd_vers(int vers, enum vers_op change) 120int nfsd_vers(int vers, enum vers_op change)
121{ 121{
122 if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS) 122 if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS)
123 return -1; 123 return 0;
124 switch(change) { 124 switch(change) {
125 case NFSD_SET: 125 case NFSD_SET:
126 nfsd_versions[vers] = nfsd_version[vers]; 126 nfsd_versions[vers] = nfsd_version[vers];
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index fefeae27f25e..006c84230c7c 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -70,6 +70,16 @@ struct nfsd4_cb_sequence {
70 struct nfs4_client *cbs_clp; 70 struct nfs4_client *cbs_clp;
71}; 71};
72 72
73struct nfs4_rpc_args {
74 void *args_op;
75 struct nfsd4_cb_sequence args_seq;
76};
77
78struct nfsd4_callback {
79 struct nfs4_rpc_args cb_args;
80 struct work_struct cb_work;
81};
82
73struct nfs4_delegation { 83struct nfs4_delegation {
74 struct list_head dl_perfile; 84 struct list_head dl_perfile;
75 struct list_head dl_perclnt; 85 struct list_head dl_perclnt;
@@ -86,6 +96,7 @@ struct nfs4_delegation {
86 stateid_t dl_stateid; 96 stateid_t dl_stateid;
87 struct knfsd_fh dl_fh; 97 struct knfsd_fh dl_fh;
88 int dl_retries; 98 int dl_retries;
99 struct nfsd4_callback dl_recall;
89}; 100};
90 101
91/* client delegation callback info */ 102/* client delegation callback info */
@@ -96,9 +107,7 @@ struct nfs4_cb_conn {
96 u32 cb_prog; 107 u32 cb_prog;
97 u32 cb_minorversion; 108 u32 cb_minorversion;
98 u32 cb_ident; /* minorversion 0 only */ 109 u32 cb_ident; /* minorversion 0 only */
99 /* RPC client info */ 110 struct svc_xprt *cb_xprt; /* minorversion 1 only */
100 atomic_t cb_set; /* successful CB_NULL call */
101 struct rpc_clnt * cb_client;
102}; 111};
103 112
104/* Maximum number of slots per session. 160 is useful for long haul TCP */ 113/* Maximum number of slots per session. 160 is useful for long haul TCP */
@@ -157,7 +166,7 @@ struct nfsd4_session {
157 struct list_head se_hash; /* hash by sessionid */ 166 struct list_head se_hash; /* hash by sessionid */
158 struct list_head se_perclnt; 167 struct list_head se_perclnt;
159 u32 se_flags; 168 u32 se_flags;
160 struct nfs4_client *se_client; /* for expire_client */ 169 struct nfs4_client *se_client;
161 struct nfs4_sessionid se_sessionid; 170 struct nfs4_sessionid se_sessionid;
162 struct nfsd4_channel_attrs se_fchannel; 171 struct nfsd4_channel_attrs se_fchannel;
163 struct nfsd4_channel_attrs se_bchannel; 172 struct nfsd4_channel_attrs se_bchannel;
@@ -212,25 +221,41 @@ struct nfs4_client {
212 struct svc_cred cl_cred; /* setclientid principal */ 221 struct svc_cred cl_cred; /* setclientid principal */
213 clientid_t cl_clientid; /* generated by server */ 222 clientid_t cl_clientid; /* generated by server */
214 nfs4_verifier cl_confirm; /* generated by server */ 223 nfs4_verifier cl_confirm; /* generated by server */
215 struct nfs4_cb_conn cl_cb_conn; /* callback info */
216 atomic_t cl_count; /* ref count */
217 u32 cl_firststate; /* recovery dir creation */ 224 u32 cl_firststate; /* recovery dir creation */
218 225
226 /* for v4.0 and v4.1 callbacks: */
227 struct nfs4_cb_conn cl_cb_conn;
228 struct rpc_clnt *cl_cb_client;
229 atomic_t cl_cb_set;
230
219 /* for nfs41 */ 231 /* for nfs41 */
220 struct list_head cl_sessions; 232 struct list_head cl_sessions;
221 struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ 233 struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
222 u32 cl_exchange_flags; 234 u32 cl_exchange_flags;
223 struct nfs4_sessionid cl_sessionid; 235 struct nfs4_sessionid cl_sessionid;
236 /* number of rpc's in progress over an associated session: */
237 atomic_t cl_refcount;
224 238
225 /* for nfs41 callbacks */ 239 /* for nfs41 callbacks */
226 /* We currently support a single back channel with a single slot */ 240 /* We currently support a single back channel with a single slot */
227 unsigned long cl_cb_slot_busy; 241 unsigned long cl_cb_slot_busy;
228 u32 cl_cb_seq_nr; 242 u32 cl_cb_seq_nr;
229 struct svc_xprt *cl_cb_xprt; /* 4.1 callback transport */
230 struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ 243 struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
231 /* wait here for slots */ 244 /* wait here for slots */
232}; 245};
233 246
247static inline void
248mark_client_expired(struct nfs4_client *clp)
249{
250 clp->cl_time = 0;
251}
252
253static inline bool
254is_client_expired(struct nfs4_client *clp)
255{
256 return clp->cl_time == 0;
257}
258
234/* struct nfs4_client_reset 259/* struct nfs4_client_reset
235 * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl 260 * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl
236 * upon lease reset, or from upcall to state_daemon (to read in state 261 * upon lease reset, or from upcall to state_daemon (to read in state
@@ -377,11 +402,14 @@ extern void nfs4_lock_state(void);
377extern void nfs4_unlock_state(void); 402extern void nfs4_unlock_state(void);
378extern int nfs4_in_grace(void); 403extern int nfs4_in_grace(void);
379extern __be32 nfs4_check_open_reclaim(clientid_t *clid); 404extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
380extern void put_nfs4_client(struct nfs4_client *clp);
381extern void nfs4_free_stateowner(struct kref *kref); 405extern void nfs4_free_stateowner(struct kref *kref);
382extern int set_callback_cred(void); 406extern int set_callback_cred(void);
383extern void nfsd4_probe_callback(struct nfs4_client *clp); 407extern void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
408extern void nfsd4_do_callback_rpc(struct work_struct *);
384extern void nfsd4_cb_recall(struct nfs4_delegation *dp); 409extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
410extern int nfsd4_create_callback_queue(void);
411extern void nfsd4_destroy_callback_queue(void);
412extern void nfsd4_set_callback_client(struct nfs4_client *, struct rpc_clnt *);
385extern void nfs4_put_delegation(struct nfs4_delegation *dp); 413extern void nfs4_put_delegation(struct nfs4_delegation *dp);
386extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); 414extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
387extern void nfsd4_init_recdir(char *recdir_name); 415extern void nfsd4_init_recdir(char *recdir_name);
@@ -392,6 +420,7 @@ extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
392extern void nfsd4_recdir_purge_old(void); 420extern void nfsd4_recdir_purge_old(void);
393extern int nfsd4_create_clid_dir(struct nfs4_client *clp); 421extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
394extern void nfsd4_remove_clid_dir(struct nfs4_client *clp); 422extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
423extern void release_session_client(struct nfsd4_session *);
395 424
396static inline void 425static inline void
397nfs4_put_stateowner(struct nfs4_stateowner *so) 426nfs4_put_stateowner(struct nfs4_stateowner *so)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 6dd5f1970e01..23c06f77f4ca 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -724,7 +724,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
724 struct inode *inode; 724 struct inode *inode;
725 int flags = O_RDONLY|O_LARGEFILE; 725 int flags = O_RDONLY|O_LARGEFILE;
726 __be32 err; 726 __be32 err;
727 int host_err; 727 int host_err = 0;
728 728
729 validate_process_creds(); 729 validate_process_creds();
730 730
@@ -761,7 +761,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
761 * Check to see if there are any leases on this file. 761 * Check to see if there are any leases on this file.
762 * This may block while leases are broken. 762 * This may block while leases are broken.
763 */ 763 */
764 host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0)); 764 if (!(access & NFSD_MAY_NOT_BREAK_LEASE))
765 host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? O_WRONLY : 0));
765 if (host_err == -EWOULDBLOCK) 766 if (host_err == -EWOULDBLOCK)
766 host_err = -ETIMEDOUT; 767 host_err = -ETIMEDOUT;
767 if (host_err) /* NOMEM or WOULDBLOCK */ 768 if (host_err) /* NOMEM or WOULDBLOCK */
@@ -1169,7 +1170,8 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
1169 goto out; 1170 goto out;
1170 } 1171 }
1171 1172
1172 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file); 1173 err = nfsd_open(rqstp, fhp, S_IFREG,
1174 NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file);
1173 if (err) 1175 if (err)
1174 goto out; 1176 goto out;
1175 if (EX_ISSYNC(fhp->fh_export)) { 1177 if (EX_ISSYNC(fhp->fh_export)) {
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 4b1de0a9ea75..217a62c2a357 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -20,6 +20,7 @@
20#define NFSD_MAY_OWNER_OVERRIDE 64 20#define NFSD_MAY_OWNER_OVERRIDE 64
21#define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ 21#define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/
22#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256 22#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
23#define NFSD_MAY_NOT_BREAK_LEASE 512
23 24
24#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) 25#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
25#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) 26#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index efa337739534..4d476ff08ae6 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -381,6 +381,10 @@ struct nfsd4_destroy_session {
381 struct nfs4_sessionid sessionid; 381 struct nfs4_sessionid sessionid;
382}; 382};
383 383
384struct nfsd4_reclaim_complete {
385 u32 rca_one_fs;
386};
387
384struct nfsd4_op { 388struct nfsd4_op {
385 int opnum; 389 int opnum;
386 __be32 status; 390 __be32 status;
@@ -421,6 +425,7 @@ struct nfsd4_op {
421 struct nfsd4_create_session create_session; 425 struct nfsd4_create_session create_session;
422 struct nfsd4_destroy_session destroy_session; 426 struct nfsd4_destroy_session destroy_session;
423 struct nfsd4_sequence sequence; 427 struct nfsd4_sequence sequence;
428 struct nfsd4_reclaim_complete reclaim_complete;
424 } u; 429 } u;
425 struct nfs4_replay * replay; 430 struct nfs4_replay * replay;
426}; 431};
@@ -513,9 +518,8 @@ extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
513extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, 518extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
514 struct nfsd4_sequence *seq); 519 struct nfsd4_sequence *seq);
515extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, 520extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
516 struct nfsd4_compound_state *, 521 struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
517struct nfsd4_exchange_id *); 522extern __be32 nfsd4_create_session(struct svc_rqst *,
518 extern __be32 nfsd4_create_session(struct svc_rqst *,
519 struct nfsd4_compound_state *, 523 struct nfsd4_compound_state *,
520 struct nfsd4_create_session *); 524 struct nfsd4_create_session *);
521extern __be32 nfsd4_sequence(struct svc_rqst *, 525extern __be32 nfsd4_sequence(struct svc_rqst *,
@@ -524,6 +528,7 @@ extern __be32 nfsd4_sequence(struct svc_rqst *,
524extern __be32 nfsd4_destroy_session(struct svc_rqst *, 528extern __be32 nfsd4_destroy_session(struct svc_rqst *,
525 struct nfsd4_compound_state *, 529 struct nfsd4_compound_state *,
526 struct nfsd4_destroy_session *); 530 struct nfsd4_destroy_session *);
531__be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_reclaim_complete *);
527extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *, 532extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
528 struct nfsd4_open *open); 533 struct nfsd4_open *open);
529extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, 534extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 7cfb87e692da..d7fd696e595c 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -31,6 +31,11 @@
31#include "alloc.h" 31#include "alloc.h"
32 32
33 33
34/**
35 * nilfs_palloc_groups_per_desc_block - get the number of groups that a group
36 * descriptor block can maintain
37 * @inode: inode of metadata file using this allocator
38 */
34static inline unsigned long 39static inline unsigned long
35nilfs_palloc_groups_per_desc_block(const struct inode *inode) 40nilfs_palloc_groups_per_desc_block(const struct inode *inode)
36{ 41{
@@ -38,12 +43,21 @@ nilfs_palloc_groups_per_desc_block(const struct inode *inode)
38 sizeof(struct nilfs_palloc_group_desc); 43 sizeof(struct nilfs_palloc_group_desc);
39} 44}
40 45
46/**
47 * nilfs_palloc_groups_count - get maximum number of groups
48 * @inode: inode of metadata file using this allocator
49 */
41static inline unsigned long 50static inline unsigned long
42nilfs_palloc_groups_count(const struct inode *inode) 51nilfs_palloc_groups_count(const struct inode *inode)
43{ 52{
44 return 1UL << (BITS_PER_LONG - (inode->i_blkbits + 3 /* log2(8) */)); 53 return 1UL << (BITS_PER_LONG - (inode->i_blkbits + 3 /* log2(8) */));
45} 54}
46 55
56/**
57 * nilfs_palloc_init_blockgroup - initialize private variables for allocator
58 * @inode: inode of metadata file using this allocator
59 * @entry_size: size of the persistent object
60 */
47int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size) 61int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size)
48{ 62{
49 struct nilfs_mdt_info *mi = NILFS_MDT(inode); 63 struct nilfs_mdt_info *mi = NILFS_MDT(inode);
@@ -69,6 +83,12 @@ int nilfs_palloc_init_blockgroup(struct inode *inode, unsigned entry_size)
69 return 0; 83 return 0;
70} 84}
71 85
86/**
87 * nilfs_palloc_group - get group number and offset from an entry number
88 * @inode: inode of metadata file using this allocator
89 * @nr: serial number of the entry (e.g. inode number)
90 * @offset: pointer to store offset number in the group
91 */
72static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr, 92static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr,
73 unsigned long *offset) 93 unsigned long *offset)
74{ 94{
@@ -78,6 +98,14 @@ static unsigned long nilfs_palloc_group(const struct inode *inode, __u64 nr,
78 return group; 98 return group;
79} 99}
80 100
101/**
102 * nilfs_palloc_desc_blkoff - get block offset of a group descriptor block
103 * @inode: inode of metadata file using this allocator
104 * @group: group number
105 *
106 * nilfs_palloc_desc_blkoff() returns block offset of the descriptor
107 * block which contains a descriptor of the specified group.
108 */
81static unsigned long 109static unsigned long
82nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group) 110nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group)
83{ 111{
@@ -86,6 +114,14 @@ nilfs_palloc_desc_blkoff(const struct inode *inode, unsigned long group)
86 return desc_block * NILFS_MDT(inode)->mi_blocks_per_desc_block; 114 return desc_block * NILFS_MDT(inode)->mi_blocks_per_desc_block;
87} 115}
88 116
117/**
118 * nilfs_palloc_bitmap_blkoff - get block offset of a bitmap block
119 * @inode: inode of metadata file using this allocator
120 * @group: group number
121 *
122 * nilfs_palloc_bitmap_blkoff() returns block offset of the bitmap
123 * block used to allocate/deallocate entries in the specified group.
124 */
89static unsigned long 125static unsigned long
90nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group) 126nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group)
91{ 127{
@@ -95,6 +131,12 @@ nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group)
95 desc_offset * NILFS_MDT(inode)->mi_blocks_per_group; 131 desc_offset * NILFS_MDT(inode)->mi_blocks_per_group;
96} 132}
97 133
134/**
135 * nilfs_palloc_group_desc_nfrees - get the number of free entries in a group
136 * @inode: inode of metadata file using this allocator
137 * @group: group number
138 * @desc: pointer to descriptor structure for the group
139 */
98static unsigned long 140static unsigned long
99nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group, 141nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group,
100 const struct nilfs_palloc_group_desc *desc) 142 const struct nilfs_palloc_group_desc *desc)
@@ -107,6 +149,13 @@ nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group,
107 return nfree; 149 return nfree;
108} 150}
109 151
152/**
153 * nilfs_palloc_group_desc_add_entries - adjust count of free entries
154 * @inode: inode of metadata file using this allocator
155 * @group: group number
156 * @desc: pointer to descriptor structure for the group
157 * @n: delta to be added
158 */
110static void 159static void
111nilfs_palloc_group_desc_add_entries(struct inode *inode, 160nilfs_palloc_group_desc_add_entries(struct inode *inode,
112 unsigned long group, 161 unsigned long group,
@@ -118,6 +167,11 @@ nilfs_palloc_group_desc_add_entries(struct inode *inode,
118 spin_unlock(nilfs_mdt_bgl_lock(inode, group)); 167 spin_unlock(nilfs_mdt_bgl_lock(inode, group));
119} 168}
120 169
170/**
171 * nilfs_palloc_entry_blkoff - get block offset of an entry block
172 * @inode: inode of metadata file using this allocator
173 * @nr: serial number of the entry (e.g. inode number)
174 */
121static unsigned long 175static unsigned long
122nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr) 176nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr)
123{ 177{
@@ -129,6 +183,12 @@ nilfs_palloc_entry_blkoff(const struct inode *inode, __u64 nr)
129 group_offset / NILFS_MDT(inode)->mi_entries_per_block; 183 group_offset / NILFS_MDT(inode)->mi_entries_per_block;
130} 184}
131 185
186/**
187 * nilfs_palloc_desc_block_init - initialize buffer of a group descriptor block
188 * @inode: inode of metadata file
189 * @bh: buffer head of the buffer to be initialized
190 * @kaddr: kernel address mapped for the page including the buffer
191 */
132static void nilfs_palloc_desc_block_init(struct inode *inode, 192static void nilfs_palloc_desc_block_init(struct inode *inode,
133 struct buffer_head *bh, void *kaddr) 193 struct buffer_head *bh, void *kaddr)
134{ 194{
@@ -179,6 +239,13 @@ static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff,
179 return ret; 239 return ret;
180} 240}
181 241
242/**
243 * nilfs_palloc_get_desc_block - get buffer head of a group descriptor block
244 * @inode: inode of metadata file using this allocator
245 * @group: group number
246 * @create: create flag
247 * @bhp: pointer to store the resultant buffer head
248 */
182static int nilfs_palloc_get_desc_block(struct inode *inode, 249static int nilfs_palloc_get_desc_block(struct inode *inode,
183 unsigned long group, 250 unsigned long group,
184 int create, struct buffer_head **bhp) 251 int create, struct buffer_head **bhp)
@@ -191,6 +258,13 @@ static int nilfs_palloc_get_desc_block(struct inode *inode,
191 bhp, &cache->prev_desc, &cache->lock); 258 bhp, &cache->prev_desc, &cache->lock);
192} 259}
193 260
261/**
262 * nilfs_palloc_get_bitmap_block - get buffer head of a bitmap block
263 * @inode: inode of metadata file using this allocator
264 * @group: group number
265 * @create: create flag
266 * @bhp: pointer to store the resultant buffer head
267 */
194static int nilfs_palloc_get_bitmap_block(struct inode *inode, 268static int nilfs_palloc_get_bitmap_block(struct inode *inode,
195 unsigned long group, 269 unsigned long group,
196 int create, struct buffer_head **bhp) 270 int create, struct buffer_head **bhp)
@@ -203,6 +277,13 @@ static int nilfs_palloc_get_bitmap_block(struct inode *inode,
203 &cache->prev_bitmap, &cache->lock); 277 &cache->prev_bitmap, &cache->lock);
204} 278}
205 279
280/**
281 * nilfs_palloc_get_entry_block - get buffer head of an entry block
282 * @inode: inode of metadata file using this allocator
283 * @nr: serial number of the entry (e.g. inode number)
284 * @create: create flag
285 * @bhp: pointer to store the resultant buffer head
286 */
206int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr, 287int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr,
207 int create, struct buffer_head **bhp) 288 int create, struct buffer_head **bhp)
208{ 289{
@@ -214,6 +295,13 @@ int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr,
214 &cache->prev_entry, &cache->lock); 295 &cache->prev_entry, &cache->lock);
215} 296}
216 297
298/**
299 * nilfs_palloc_block_get_group_desc - get kernel address of a group descriptor
300 * @inode: inode of metadata file using this allocator
301 * @group: group number
302 * @bh: buffer head of the buffer storing the group descriptor block
303 * @kaddr: kernel address mapped for the page including the buffer
304 */
217static struct nilfs_palloc_group_desc * 305static struct nilfs_palloc_group_desc *
218nilfs_palloc_block_get_group_desc(const struct inode *inode, 306nilfs_palloc_block_get_group_desc(const struct inode *inode,
219 unsigned long group, 307 unsigned long group,
@@ -223,6 +311,13 @@ nilfs_palloc_block_get_group_desc(const struct inode *inode,
223 group % nilfs_palloc_groups_per_desc_block(inode); 311 group % nilfs_palloc_groups_per_desc_block(inode);
224} 312}
225 313
314/**
315 * nilfs_palloc_block_get_entry - get kernel address of an entry
316 * @inode: inode of metadata file using this allocator
317 * @nr: serial number of the entry (e.g. inode number)
318 * @bh: buffer head of the buffer storing the entry block
319 * @kaddr: kernel address mapped for the page including the buffer
320 */
226void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, 321void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr,
227 const struct buffer_head *bh, void *kaddr) 322 const struct buffer_head *bh, void *kaddr)
228{ 323{
@@ -235,11 +330,19 @@ void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr,
235 entry_offset * NILFS_MDT(inode)->mi_entry_size; 330 entry_offset * NILFS_MDT(inode)->mi_entry_size;
236} 331}
237 332
333/**
334 * nilfs_palloc_find_available_slot - find available slot in a group
335 * @inode: inode of metadata file using this allocator
336 * @group: group number
337 * @target: offset number of an entry in the group (start point)
338 * @bitmap: bitmap of the group
339 * @bsize: size in bits
340 */
238static int nilfs_palloc_find_available_slot(struct inode *inode, 341static int nilfs_palloc_find_available_slot(struct inode *inode,
239 unsigned long group, 342 unsigned long group,
240 unsigned long target, 343 unsigned long target,
241 unsigned char *bitmap, 344 unsigned char *bitmap,
242 int bsize) /* size in bits */ 345 int bsize)
243{ 346{
244 int curr, pos, end, i; 347 int curr, pos, end, i;
245 348
@@ -277,6 +380,13 @@ static int nilfs_palloc_find_available_slot(struct inode *inode,
277 return -ENOSPC; 380 return -ENOSPC;
278} 381}
279 382
383/**
384 * nilfs_palloc_rest_groups_in_desc_block - get the remaining number of groups
385 * in a group descriptor block
386 * @inode: inode of metadata file using this allocator
387 * @curr: current group number
388 * @max: maximum number of groups
389 */
280static unsigned long 390static unsigned long
281nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode, 391nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode,
282 unsigned long curr, unsigned long max) 392 unsigned long curr, unsigned long max)
@@ -287,6 +397,11 @@ nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode,
287 max - curr + 1); 397 max - curr + 1);
288} 398}
289 399
400/**
401 * nilfs_palloc_prepare_alloc_entry - prepare to allocate a persistent object
402 * @inode: inode of metadata file using this allocator
403 * @req: nilfs_palloc_req structure exchanged for the allocation
404 */
290int nilfs_palloc_prepare_alloc_entry(struct inode *inode, 405int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
291 struct nilfs_palloc_req *req) 406 struct nilfs_palloc_req *req)
292{ 407{
@@ -366,6 +481,11 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
366 return ret; 481 return ret;
367} 482}
368 483
484/**
485 * nilfs_palloc_commit_alloc_entry - finish allocation of a persistent object
486 * @inode: inode of metadata file using this allocator
487 * @req: nilfs_palloc_req structure exchanged for the allocation
488 */
369void nilfs_palloc_commit_alloc_entry(struct inode *inode, 489void nilfs_palloc_commit_alloc_entry(struct inode *inode,
370 struct nilfs_palloc_req *req) 490 struct nilfs_palloc_req *req)
371{ 491{
@@ -377,6 +497,11 @@ void nilfs_palloc_commit_alloc_entry(struct inode *inode,
377 brelse(req->pr_desc_bh); 497 brelse(req->pr_desc_bh);
378} 498}
379 499
500/**
501 * nilfs_palloc_commit_free_entry - finish deallocating a persistent object
502 * @inode: inode of metadata file using this allocator
503 * @req: nilfs_palloc_req structure exchanged for the removal
504 */
380void nilfs_palloc_commit_free_entry(struct inode *inode, 505void nilfs_palloc_commit_free_entry(struct inode *inode,
381 struct nilfs_palloc_req *req) 506 struct nilfs_palloc_req *req)
382{ 507{
@@ -410,6 +535,11 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
410 brelse(req->pr_desc_bh); 535 brelse(req->pr_desc_bh);
411} 536}
412 537
538/**
539 * nilfs_palloc_abort_alloc_entry - cancel allocation of a persistent object
540 * @inode: inode of metadata file using this allocator
541 * @req: nilfs_palloc_req structure exchanged for the allocation
542 */
413void nilfs_palloc_abort_alloc_entry(struct inode *inode, 543void nilfs_palloc_abort_alloc_entry(struct inode *inode,
414 struct nilfs_palloc_req *req) 544 struct nilfs_palloc_req *req)
415{ 545{
@@ -442,6 +572,11 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
442 req->pr_desc_bh = NULL; 572 req->pr_desc_bh = NULL;
443} 573}
444 574
575/**
576 * nilfs_palloc_prepare_free_entry - prepare to deallocate a persistent object
577 * @inode: inode of metadata file using this allocator
578 * @req: nilfs_palloc_req structure exchanged for the removal
579 */
445int nilfs_palloc_prepare_free_entry(struct inode *inode, 580int nilfs_palloc_prepare_free_entry(struct inode *inode,
446 struct nilfs_palloc_req *req) 581 struct nilfs_palloc_req *req)
447{ 582{
@@ -464,6 +599,11 @@ int nilfs_palloc_prepare_free_entry(struct inode *inode,
464 return 0; 599 return 0;
465} 600}
466 601
602/**
603 * nilfs_palloc_abort_free_entry - cancel deallocating a persistent object
604 * @inode: inode of metadata file using this allocator
605 * @req: nilfs_palloc_req structure exchanged for the removal
606 */
467void nilfs_palloc_abort_free_entry(struct inode *inode, 607void nilfs_palloc_abort_free_entry(struct inode *inode,
468 struct nilfs_palloc_req *req) 608 struct nilfs_palloc_req *req)
469{ 609{
@@ -475,6 +615,12 @@ void nilfs_palloc_abort_free_entry(struct inode *inode,
475 req->pr_desc_bh = NULL; 615 req->pr_desc_bh = NULL;
476} 616}
477 617
618/**
619 * nilfs_palloc_group_is_in - judge if an entry is in a group
620 * @inode: inode of metadata file using this allocator
621 * @group: group number
622 * @nr: serial number of the entry (e.g. inode number)
623 */
478static int 624static int
479nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr) 625nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr)
480{ 626{
@@ -485,6 +631,12 @@ nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr)
485 return (nr >= first) && (nr <= last); 631 return (nr >= first) && (nr <= last);
486} 632}
487 633
634/**
635 * nilfs_palloc_freev - deallocate a set of persistent objects
636 * @inode: inode of metadata file using this allocator
637 * @entry_nrs: array of entry numbers to be deallocated
638 * @nitems: number of entries stored in @entry_nrs
639 */
488int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) 640int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
489{ 641{
490 struct buffer_head *desc_bh, *bitmap_bh; 642 struct buffer_head *desc_bh, *bitmap_bh;
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h
index 5cccf874d692..9af34a7e6e13 100644
--- a/fs/nilfs2/alloc.h
+++ b/fs/nilfs2/alloc.h
@@ -29,6 +29,13 @@
29#include <linux/buffer_head.h> 29#include <linux/buffer_head.h>
30#include <linux/fs.h> 30#include <linux/fs.h>
31 31
32/**
33 * nilfs_palloc_entries_per_group - get the number of entries per group
34 * @inode: inode of metadata file using this allocator
35 *
36 * The number of entries per group is defined by the number of bits
37 * that a bitmap block can maintain.
38 */
32static inline unsigned long 39static inline unsigned long
33nilfs_palloc_entries_per_group(const struct inode *inode) 40nilfs_palloc_entries_per_group(const struct inode *inode)
34{ 41{
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 76c38e3e19d2..b27a342c5af6 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -31,63 +31,16 @@
31#include "alloc.h" 31#include "alloc.h"
32#include "dat.h" 32#include "dat.h"
33 33
34/** 34static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
35 * struct nilfs_btree_path - A path on which B-tree operations are executed
36 * @bp_bh: buffer head of node block
37 * @bp_sib_bh: buffer head of sibling node block
38 * @bp_index: index of child node
39 * @bp_oldreq: ptr end request for old ptr
40 * @bp_newreq: ptr alloc request for new ptr
41 * @bp_op: rebalance operation
42 */
43struct nilfs_btree_path {
44 struct buffer_head *bp_bh;
45 struct buffer_head *bp_sib_bh;
46 int bp_index;
47 union nilfs_bmap_ptr_req bp_oldreq;
48 union nilfs_bmap_ptr_req bp_newreq;
49 struct nilfs_btnode_chkey_ctxt bp_ctxt;
50 void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
51 int, __u64 *, __u64 *);
52};
53
54/*
55 * B-tree path operations
56 */
57
58static struct kmem_cache *nilfs_btree_path_cache;
59
60int __init nilfs_btree_path_cache_init(void)
61{
62 nilfs_btree_path_cache =
63 kmem_cache_create("nilfs2_btree_path_cache",
64 sizeof(struct nilfs_btree_path) *
65 NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
66 return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
67}
68
69void nilfs_btree_path_cache_destroy(void)
70{
71 kmem_cache_destroy(nilfs_btree_path_cache);
72}
73
74static inline struct nilfs_btree_path *nilfs_btree_alloc_path(void)
75{
76 return kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
77}
78
79static inline void nilfs_btree_free_path(struct nilfs_btree_path *path)
80{ 35{
81 kmem_cache_free(nilfs_btree_path_cache, path); 36 struct nilfs_btree_path *path;
82} 37 int level = NILFS_BTREE_LEVEL_DATA;
83 38
84static void nilfs_btree_init_path(struct nilfs_btree_path *path) 39 path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
85{ 40 if (path == NULL)
86 int level; 41 goto out;
87 42
88 for (level = NILFS_BTREE_LEVEL_DATA; 43 for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
89 level < NILFS_BTREE_LEVEL_MAX;
90 level++) {
91 path[level].bp_bh = NULL; 44 path[level].bp_bh = NULL;
92 path[level].bp_sib_bh = NULL; 45 path[level].bp_sib_bh = NULL;
93 path[level].bp_index = 0; 46 path[level].bp_index = 0;
@@ -95,15 +48,19 @@ static void nilfs_btree_init_path(struct nilfs_btree_path *path)
95 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 48 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
96 path[level].bp_op = NULL; 49 path[level].bp_op = NULL;
97 } 50 }
51
52out:
53 return path;
98} 54}
99 55
100static void nilfs_btree_release_path(struct nilfs_btree_path *path) 56static void nilfs_btree_free_path(struct nilfs_btree_path *path)
101{ 57{
102 int level; 58 int level = NILFS_BTREE_LEVEL_DATA;
103 59
104 for (level = NILFS_BTREE_LEVEL_DATA; level < NILFS_BTREE_LEVEL_MAX; 60 for (; level < NILFS_BTREE_LEVEL_MAX; level++)
105 level++)
106 brelse(path[level].bp_bh); 61 brelse(path[level].bp_bh);
62
63 kmem_cache_free(nilfs_btree_path_cache, path);
107} 64}
108 65
109/* 66/*
@@ -566,14 +523,12 @@ static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
566 path = nilfs_btree_alloc_path(); 523 path = nilfs_btree_alloc_path();
567 if (path == NULL) 524 if (path == NULL)
568 return -ENOMEM; 525 return -ENOMEM;
569 nilfs_btree_init_path(path);
570 526
571 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); 527 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
572 528
573 if (ptrp != NULL) 529 if (ptrp != NULL)
574 *ptrp = ptr; 530 *ptrp = ptr;
575 531
576 nilfs_btree_release_path(path);
577 nilfs_btree_free_path(path); 532 nilfs_btree_free_path(path);
578 533
579 return ret; 534 return ret;
@@ -594,7 +549,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
594 path = nilfs_btree_alloc_path(); 549 path = nilfs_btree_alloc_path();
595 if (path == NULL) 550 if (path == NULL)
596 return -ENOMEM; 551 return -ENOMEM;
597 nilfs_btree_init_path(path); 552
598 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); 553 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
599 if (ret < 0) 554 if (ret < 0)
600 goto out; 555 goto out;
@@ -655,7 +610,6 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
655 *ptrp = ptr; 610 *ptrp = ptr;
656 ret = cnt; 611 ret = cnt;
657 out: 612 out:
658 nilfs_btree_release_path(path);
659 nilfs_btree_free_path(path); 613 nilfs_btree_free_path(path);
660 return ret; 614 return ret;
661} 615}
@@ -1123,7 +1077,6 @@ static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1123 path = nilfs_btree_alloc_path(); 1077 path = nilfs_btree_alloc_path();
1124 if (path == NULL) 1078 if (path == NULL)
1125 return -ENOMEM; 1079 return -ENOMEM;
1126 nilfs_btree_init_path(path);
1127 1080
1128 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1081 ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1129 NILFS_BTREE_LEVEL_NODE_MIN); 1082 NILFS_BTREE_LEVEL_NODE_MIN);
@@ -1140,7 +1093,6 @@ static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1140 nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); 1093 nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1141 1094
1142 out: 1095 out:
1143 nilfs_btree_release_path(path);
1144 nilfs_btree_free_path(path); 1096 nilfs_btree_free_path(path);
1145 return ret; 1097 return ret;
1146} 1098}
@@ -1456,7 +1408,7 @@ static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1456 path = nilfs_btree_alloc_path(); 1408 path = nilfs_btree_alloc_path();
1457 if (path == NULL) 1409 if (path == NULL)
1458 return -ENOMEM; 1410 return -ENOMEM;
1459 nilfs_btree_init_path(path); 1411
1460 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1412 ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1461 NILFS_BTREE_LEVEL_NODE_MIN); 1413 NILFS_BTREE_LEVEL_NODE_MIN);
1462 if (ret < 0) 1414 if (ret < 0)
@@ -1473,7 +1425,6 @@ static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1473 nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks); 1425 nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1474 1426
1475out: 1427out:
1476 nilfs_btree_release_path(path);
1477 nilfs_btree_free_path(path); 1428 nilfs_btree_free_path(path);
1478 return ret; 1429 return ret;
1479} 1430}
@@ -1488,11 +1439,9 @@ static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1488 path = nilfs_btree_alloc_path(); 1439 path = nilfs_btree_alloc_path();
1489 if (path == NULL) 1440 if (path == NULL)
1490 return -ENOMEM; 1441 return -ENOMEM;
1491 nilfs_btree_init_path(path);
1492 1442
1493 ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); 1443 ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1494 1444
1495 nilfs_btree_release_path(path);
1496 nilfs_btree_free_path(path); 1445 nilfs_btree_free_path(path);
1497 1446
1498 return ret; 1447 return ret;
@@ -1923,7 +1872,6 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1923 path = nilfs_btree_alloc_path(); 1872 path = nilfs_btree_alloc_path();
1924 if (path == NULL) 1873 if (path == NULL)
1925 return -ENOMEM; 1874 return -ENOMEM;
1926 nilfs_btree_init_path(path);
1927 1875
1928 if (buffer_nilfs_node(bh)) { 1876 if (buffer_nilfs_node(bh)) {
1929 node = (struct nilfs_btree_node *)bh->b_data; 1877 node = (struct nilfs_btree_node *)bh->b_data;
@@ -1947,7 +1895,6 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1947 nilfs_btree_propagate_p(btree, path, level, bh); 1895 nilfs_btree_propagate_p(btree, path, level, bh);
1948 1896
1949 out: 1897 out:
1950 nilfs_btree_release_path(path);
1951 nilfs_btree_free_path(path); 1898 nilfs_btree_free_path(path);
1952 1899
1953 return ret; 1900 return ret;
@@ -2108,7 +2055,6 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2108 path = nilfs_btree_alloc_path(); 2055 path = nilfs_btree_alloc_path();
2109 if (path == NULL) 2056 if (path == NULL)
2110 return -ENOMEM; 2057 return -ENOMEM;
2111 nilfs_btree_init_path(path);
2112 2058
2113 if (buffer_nilfs_node(*bh)) { 2059 if (buffer_nilfs_node(*bh)) {
2114 node = (struct nilfs_btree_node *)(*bh)->b_data; 2060 node = (struct nilfs_btree_node *)(*bh)->b_data;
@@ -2130,7 +2076,6 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2130 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); 2076 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2131 2077
2132 out: 2078 out:
2133 nilfs_btree_release_path(path);
2134 nilfs_btree_free_path(path); 2079 nilfs_btree_free_path(path);
2135 2080
2136 return ret; 2081 return ret;
@@ -2175,7 +2120,6 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2175 path = nilfs_btree_alloc_path(); 2120 path = nilfs_btree_alloc_path();
2176 if (path == NULL) 2121 if (path == NULL)
2177 return -ENOMEM; 2122 return -ENOMEM;
2178 nilfs_btree_init_path(path);
2179 2123
2180 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1); 2124 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2181 if (ret < 0) { 2125 if (ret < 0) {
@@ -2195,7 +2139,6 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2195 nilfs_bmap_set_dirty(&btree->bt_bmap); 2139 nilfs_bmap_set_dirty(&btree->bt_bmap);
2196 2140
2197 out: 2141 out:
2198 nilfs_btree_release_path(path);
2199 nilfs_btree_free_path(path); 2142 nilfs_btree_free_path(path);
2200 return ret; 2143 return ret;
2201} 2144}
diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h
index 4b82d84ade75..af638d59e3bf 100644
--- a/fs/nilfs2/btree.h
+++ b/fs/nilfs2/btree.h
@@ -30,9 +30,6 @@
30#include "btnode.h" 30#include "btnode.h"
31#include "bmap.h" 31#include "bmap.h"
32 32
33struct nilfs_btree;
34struct nilfs_btree_path;
35
36/** 33/**
37 * struct nilfs_btree - B-tree structure 34 * struct nilfs_btree - B-tree structure
38 * @bt_bmap: bmap base structure 35 * @bt_bmap: bmap base structure
@@ -41,6 +38,25 @@ struct nilfs_btree {
41 struct nilfs_bmap bt_bmap; 38 struct nilfs_bmap bt_bmap;
42}; 39};
43 40
41/**
42 * struct nilfs_btree_path - A path on which B-tree operations are executed
43 * @bp_bh: buffer head of node block
44 * @bp_sib_bh: buffer head of sibling node block
45 * @bp_index: index of child node
46 * @bp_oldreq: ptr end request for old ptr
47 * @bp_newreq: ptr alloc request for new ptr
48 * @bp_op: rebalance operation
49 */
50struct nilfs_btree_path {
51 struct buffer_head *bp_bh;
52 struct buffer_head *bp_sib_bh;
53 int bp_index;
54 union nilfs_bmap_ptr_req bp_oldreq;
55 union nilfs_bmap_ptr_req bp_newreq;
56 struct nilfs_btnode_chkey_ctxt bp_ctxt;
57 void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
58 int, __u64 *, __u64 *);
59};
44 60
45#define NILFS_BTREE_ROOT_SIZE NILFS_BMAP_SIZE 61#define NILFS_BTREE_ROOT_SIZE NILFS_BMAP_SIZE
46#define NILFS_BTREE_ROOT_NCHILDREN_MAX \ 62#define NILFS_BTREE_ROOT_NCHILDREN_MAX \
@@ -57,6 +73,7 @@ struct nilfs_btree {
57#define NILFS_BTREE_KEY_MIN ((__u64)0) 73#define NILFS_BTREE_KEY_MIN ((__u64)0)
58#define NILFS_BTREE_KEY_MAX (~(__u64)0) 74#define NILFS_BTREE_KEY_MAX (~(__u64)0)
59 75
76extern struct kmem_cache *nilfs_btree_path_cache;
60 77
61int nilfs_btree_path_cache_init(void); 78int nilfs_btree_path_cache_init(void);
62void nilfs_btree_path_cache_destroy(void); 79void nilfs_btree_path_cache_destroy(void);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 0957b58f909d..5e226d4b41d3 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -451,7 +451,7 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino,
451 inode->i_op = &nilfs_special_inode_operations; 451 inode->i_op = &nilfs_special_inode_operations;
452 init_special_inode( 452 init_special_inode(
453 inode, inode->i_mode, 453 inode, inode->i_mode,
454 new_decode_dev(le64_to_cpu(raw_inode->i_device_code))); 454 huge_decode_dev(le64_to_cpu(raw_inode->i_device_code)));
455 } 455 }
456 nilfs_ifile_unmap_inode(sbi->s_ifile, ino, bh); 456 nilfs_ifile_unmap_inode(sbi->s_ifile, ino, bh);
457 brelse(bh); 457 brelse(bh);
@@ -511,7 +511,7 @@ void nilfs_write_inode_common(struct inode *inode,
511 nilfs_bmap_write(ii->i_bmap, raw_inode); 511 nilfs_bmap_write(ii->i_bmap, raw_inode);
512 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 512 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
513 raw_inode->i_device_code = 513 raw_inode->i_device_code =
514 cpu_to_le64(new_encode_dev(inode->i_rdev)); 514 cpu_to_le64(huge_encode_dev(inode->i_rdev));
515 /* When extending inode, nilfs->ns_inode_size should be checked 515 /* When extending inode, nilfs->ns_inode_size should be checked
516 for substitutions of appended fields */ 516 for substitutions of appended fields */
517} 517}
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index ba43146f3c30..bae2a516b4ee 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -105,6 +105,8 @@ static void store_segsum_info(struct nilfs_segsum_info *ssi,
105 105
106 ssi->nsumblk = DIV_ROUND_UP(ssi->sumbytes, blocksize); 106 ssi->nsumblk = DIV_ROUND_UP(ssi->sumbytes, blocksize);
107 ssi->nfileblk = ssi->nblocks - ssi->nsumblk - !!NILFS_SEG_HAS_SR(ssi); 107 ssi->nfileblk = ssi->nblocks - ssi->nsumblk - !!NILFS_SEG_HAS_SR(ssi);
108
109 /* need to verify ->ss_bytes field if read ->ss_cno */
108} 110}
109 111
110/** 112/**
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 17851f77f739..2e6a2723b8fa 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -40,35 +40,10 @@ struct nilfs_write_info {
40 sector_t blocknr; 40 sector_t blocknr;
41}; 41};
42 42
43
44static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, 43static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
45 struct the_nilfs *nilfs); 44 struct the_nilfs *nilfs);
46static int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf); 45static int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf);
47 46
48
49static struct kmem_cache *nilfs_segbuf_cachep;
50
51static void nilfs_segbuf_init_once(void *obj)
52{
53 memset(obj, 0, sizeof(struct nilfs_segment_buffer));
54}
55
56int __init nilfs_init_segbuf_cache(void)
57{
58 nilfs_segbuf_cachep =
59 kmem_cache_create("nilfs2_segbuf_cache",
60 sizeof(struct nilfs_segment_buffer),
61 0, SLAB_RECLAIM_ACCOUNT,
62 nilfs_segbuf_init_once);
63
64 return (nilfs_segbuf_cachep == NULL) ? -ENOMEM : 0;
65}
66
67void nilfs_destroy_segbuf_cache(void)
68{
69 kmem_cache_destroy(nilfs_segbuf_cachep);
70}
71
72struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb) 47struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb)
73{ 48{
74 struct nilfs_segment_buffer *segbuf; 49 struct nilfs_segment_buffer *segbuf;
@@ -81,6 +56,7 @@ struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb)
81 INIT_LIST_HEAD(&segbuf->sb_list); 56 INIT_LIST_HEAD(&segbuf->sb_list);
82 INIT_LIST_HEAD(&segbuf->sb_segsum_buffers); 57 INIT_LIST_HEAD(&segbuf->sb_segsum_buffers);
83 INIT_LIST_HEAD(&segbuf->sb_payload_buffers); 58 INIT_LIST_HEAD(&segbuf->sb_payload_buffers);
59 segbuf->sb_super_root = NULL;
84 60
85 init_completion(&segbuf->sb_bio_event); 61 init_completion(&segbuf->sb_bio_event);
86 atomic_set(&segbuf->sb_err, 0); 62 atomic_set(&segbuf->sb_err, 0);
@@ -158,7 +134,7 @@ int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *segbuf,
158} 134}
159 135
160int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags, 136int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags,
161 time_t ctime) 137 time_t ctime, __u64 cno)
162{ 138{
163 int err; 139 int err;
164 140
@@ -171,6 +147,7 @@ int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags,
171 segbuf->sb_sum.sumbytes = sizeof(struct nilfs_segment_summary); 147 segbuf->sb_sum.sumbytes = sizeof(struct nilfs_segment_summary);
172 segbuf->sb_sum.nfinfo = segbuf->sb_sum.nfileblk = 0; 148 segbuf->sb_sum.nfinfo = segbuf->sb_sum.nfileblk = 0;
173 segbuf->sb_sum.ctime = ctime; 149 segbuf->sb_sum.ctime = ctime;
150 segbuf->sb_sum.cno = cno;
174 return 0; 151 return 0;
175} 152}
176 153
@@ -196,13 +173,14 @@ void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *segbuf)
196 raw_sum->ss_nfinfo = cpu_to_le32(segbuf->sb_sum.nfinfo); 173 raw_sum->ss_nfinfo = cpu_to_le32(segbuf->sb_sum.nfinfo);
197 raw_sum->ss_sumbytes = cpu_to_le32(segbuf->sb_sum.sumbytes); 174 raw_sum->ss_sumbytes = cpu_to_le32(segbuf->sb_sum.sumbytes);
198 raw_sum->ss_pad = 0; 175 raw_sum->ss_pad = 0;
176 raw_sum->ss_cno = cpu_to_le64(segbuf->sb_sum.cno);
199} 177}
200 178
201/* 179/*
202 * CRC calculation routines 180 * CRC calculation routines
203 */ 181 */
204void nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *segbuf, 182static void
205 u32 seed) 183nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *segbuf, u32 seed)
206{ 184{
207 struct buffer_head *bh; 185 struct buffer_head *bh;
208 struct nilfs_segment_summary *raw_sum; 186 struct nilfs_segment_summary *raw_sum;
@@ -229,8 +207,8 @@ void nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *segbuf,
229 raw_sum->ss_sumsum = cpu_to_le32(crc); 207 raw_sum->ss_sumsum = cpu_to_le32(crc);
230} 208}
231 209
232void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf, 210static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf,
233 u32 seed) 211 u32 seed)
234{ 212{
235 struct buffer_head *bh; 213 struct buffer_head *bh;
236 struct nilfs_segment_summary *raw_sum; 214 struct nilfs_segment_summary *raw_sum;
@@ -256,6 +234,20 @@ void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf,
256 raw_sum->ss_datasum = cpu_to_le32(crc); 234 raw_sum->ss_datasum = cpu_to_le32(crc);
257} 235}
258 236
237static void
238nilfs_segbuf_fill_in_super_root_crc(struct nilfs_segment_buffer *segbuf,
239 u32 seed)
240{
241 struct nilfs_super_root *raw_sr;
242 u32 crc;
243
244 raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data;
245 crc = crc32_le(seed,
246 (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum),
247 NILFS_SR_BYTES - sizeof(raw_sr->sr_sum));
248 raw_sr->sr_sum = cpu_to_le32(crc);
249}
250
259static void nilfs_release_buffers(struct list_head *list) 251static void nilfs_release_buffers(struct list_head *list)
260{ 252{
261 struct buffer_head *bh, *n; 253 struct buffer_head *bh, *n;
@@ -282,6 +274,7 @@ static void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf)
282{ 274{
283 nilfs_release_buffers(&segbuf->sb_segsum_buffers); 275 nilfs_release_buffers(&segbuf->sb_segsum_buffers);
284 nilfs_release_buffers(&segbuf->sb_payload_buffers); 276 nilfs_release_buffers(&segbuf->sb_payload_buffers);
277 segbuf->sb_super_root = NULL;
285} 278}
286 279
287/* 280/*
@@ -334,6 +327,23 @@ int nilfs_wait_on_logs(struct list_head *logs)
334 return ret; 327 return ret;
335} 328}
336 329
330/**
331 * nilfs_add_checksums_on_logs - add checksums on the logs
332 * @logs: list of segment buffers storing target logs
333 * @seed: checksum seed value
334 */
335void nilfs_add_checksums_on_logs(struct list_head *logs, u32 seed)
336{
337 struct nilfs_segment_buffer *segbuf;
338
339 list_for_each_entry(segbuf, logs, sb_list) {
340 if (segbuf->sb_super_root)
341 nilfs_segbuf_fill_in_super_root_crc(segbuf, seed);
342 nilfs_segbuf_fill_in_segsum_crc(segbuf, seed);
343 nilfs_segbuf_fill_in_data_crc(segbuf, seed);
344 }
345}
346
337/* 347/*
338 * BIO operations 348 * BIO operations
339 */ 349 */
diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h
index 94dfd3517bc0..fdf1c3b6d673 100644
--- a/fs/nilfs2/segbuf.h
+++ b/fs/nilfs2/segbuf.h
@@ -37,6 +37,7 @@
37 * @sumbytes: Byte count of segment summary 37 * @sumbytes: Byte count of segment summary
38 * @nfileblk: Total number of file blocks 38 * @nfileblk: Total number of file blocks
39 * @seg_seq: Segment sequence number 39 * @seg_seq: Segment sequence number
40 * @cno: Checkpoint number
40 * @ctime: Creation time 41 * @ctime: Creation time
41 * @next: Block number of the next full segment 42 * @next: Block number of the next full segment
42 */ 43 */
@@ -48,6 +49,7 @@ struct nilfs_segsum_info {
48 unsigned long sumbytes; 49 unsigned long sumbytes;
49 unsigned long nfileblk; 50 unsigned long nfileblk;
50 u64 seg_seq; 51 u64 seg_seq;
52 __u64 cno;
51 time_t ctime; 53 time_t ctime;
52 sector_t next; 54 sector_t next;
53}; 55};
@@ -76,6 +78,7 @@ struct nilfs_segsum_info {
76 * @sb_rest_blocks: Number of residual blocks in the current segment 78 * @sb_rest_blocks: Number of residual blocks in the current segment
77 * @sb_segsum_buffers: List of buffers for segment summaries 79 * @sb_segsum_buffers: List of buffers for segment summaries
78 * @sb_payload_buffers: List of buffers for segment payload 80 * @sb_payload_buffers: List of buffers for segment payload
81 * @sb_super_root: Pointer to buffer storing a super root block (if exists)
79 * @sb_nbio: Number of flying bio requests 82 * @sb_nbio: Number of flying bio requests
80 * @sb_err: I/O error status 83 * @sb_err: I/O error status
81 * @sb_bio_event: Completion event of log writing 84 * @sb_bio_event: Completion event of log writing
@@ -95,6 +98,7 @@ struct nilfs_segment_buffer {
95 /* Buffers */ 98 /* Buffers */
96 struct list_head sb_segsum_buffers; 99 struct list_head sb_segsum_buffers;
97 struct list_head sb_payload_buffers; /* including super root */ 100 struct list_head sb_payload_buffers; /* including super root */
101 struct buffer_head *sb_super_root;
98 102
99 /* io status */ 103 /* io status */
100 int sb_nbio; 104 int sb_nbio;
@@ -121,6 +125,7 @@ struct nilfs_segment_buffer {
121 b_assoc_buffers)) 125 b_assoc_buffers))
122#define NILFS_SEGBUF_BH_IS_LAST(bh, head) ((bh)->b_assoc_buffers.next == head) 126#define NILFS_SEGBUF_BH_IS_LAST(bh, head) ((bh)->b_assoc_buffers.next == head)
123 127
128extern struct kmem_cache *nilfs_segbuf_cachep;
124 129
125int __init nilfs_init_segbuf_cache(void); 130int __init nilfs_init_segbuf_cache(void);
126void nilfs_destroy_segbuf_cache(void); 131void nilfs_destroy_segbuf_cache(void);
@@ -132,13 +137,11 @@ void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf,
132 struct nilfs_segment_buffer *prev); 137 struct nilfs_segment_buffer *prev);
133void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64, 138void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64,
134 struct the_nilfs *); 139 struct the_nilfs *);
135int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t); 140int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t, __u64);
136int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *); 141int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *);
137int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *, 142int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *,
138 struct buffer_head **); 143 struct buffer_head **);
139void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *); 144void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *);
140void nilfs_segbuf_fill_in_segsum_crc(struct nilfs_segment_buffer *, u32);
141void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *, u32);
142 145
143static inline void 146static inline void
144nilfs_segbuf_add_segsum_buffer(struct nilfs_segment_buffer *segbuf, 147nilfs_segbuf_add_segsum_buffer(struct nilfs_segment_buffer *segbuf,
@@ -171,6 +174,7 @@ void nilfs_truncate_logs(struct list_head *logs,
171 struct nilfs_segment_buffer *last); 174 struct nilfs_segment_buffer *last);
172int nilfs_write_logs(struct list_head *logs, struct the_nilfs *nilfs); 175int nilfs_write_logs(struct list_head *logs, struct the_nilfs *nilfs);
173int nilfs_wait_on_logs(struct list_head *logs); 176int nilfs_wait_on_logs(struct list_head *logs);
177void nilfs_add_checksums_on_logs(struct list_head *logs, u32 seed);
174 178
175static inline void nilfs_destroy_logs(struct list_head *logs) 179static inline void nilfs_destroy_logs(struct list_head *logs)
176{ 180{
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 6a7dbd8451db..c9201649cc49 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -116,42 +116,6 @@ static void nilfs_dispose_list(struct nilfs_sb_info *, struct list_head *,
116#define nilfs_cnt32_lt(a, b) nilfs_cnt32_gt(b, a) 116#define nilfs_cnt32_lt(a, b) nilfs_cnt32_gt(b, a)
117#define nilfs_cnt32_le(a, b) nilfs_cnt32_ge(b, a) 117#define nilfs_cnt32_le(a, b) nilfs_cnt32_ge(b, a)
118 118
119/*
120 * Transaction
121 */
122static struct kmem_cache *nilfs_transaction_cachep;
123
124/**
125 * nilfs_init_transaction_cache - create a cache for nilfs_transaction_info
126 *
127 * nilfs_init_transaction_cache() creates a slab cache for the struct
128 * nilfs_transaction_info.
129 *
130 * Return Value: On success, it returns 0. On error, one of the following
131 * negative error code is returned.
132 *
133 * %-ENOMEM - Insufficient memory available.
134 */
135int nilfs_init_transaction_cache(void)
136{
137 nilfs_transaction_cachep =
138 kmem_cache_create("nilfs2_transaction_cache",
139 sizeof(struct nilfs_transaction_info),
140 0, SLAB_RECLAIM_ACCOUNT, NULL);
141 return (nilfs_transaction_cachep == NULL) ? -ENOMEM : 0;
142}
143
144/**
145 * nilfs_destroy_transaction_cache - destroy the cache for transaction info
146 *
147 * nilfs_destroy_transaction_cache() frees the slab cache for the struct
148 * nilfs_transaction_info.
149 */
150void nilfs_destroy_transaction_cache(void)
151{
152 kmem_cache_destroy(nilfs_transaction_cachep);
153}
154
155static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti) 119static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti)
156{ 120{
157 struct nilfs_transaction_info *cur_ti = current->journal_info; 121 struct nilfs_transaction_info *cur_ti = current->journal_info;
@@ -402,7 +366,8 @@ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci)
402 366
403 if (nilfs_doing_gc()) 367 if (nilfs_doing_gc())
404 flags = NILFS_SS_GC; 368 flags = NILFS_SS_GC;
405 err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime); 369 err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime,
370 sci->sc_sbi->s_nilfs->ns_cno);
406 if (unlikely(err)) 371 if (unlikely(err))
407 return err; 372 return err;
408 373
@@ -435,7 +400,7 @@ static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci)
435 return err; 400 return err;
436 segbuf = sci->sc_curseg; 401 segbuf = sci->sc_curseg;
437 } 402 }
438 err = nilfs_segbuf_extend_payload(segbuf, &sci->sc_super_root); 403 err = nilfs_segbuf_extend_payload(segbuf, &segbuf->sb_super_root);
439 if (likely(!err)) 404 if (likely(!err))
440 segbuf->sb_sum.flags |= NILFS_SS_SR; 405 segbuf->sb_sum.flags |= NILFS_SS_SR;
441 return err; 406 return err;
@@ -599,7 +564,7 @@ static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci,
599 *vblocknr = binfo->bi_v.bi_vblocknr; 564 *vblocknr = binfo->bi_v.bi_vblocknr;
600} 565}
601 566
602struct nilfs_sc_operations nilfs_sc_file_ops = { 567static struct nilfs_sc_operations nilfs_sc_file_ops = {
603 .collect_data = nilfs_collect_file_data, 568 .collect_data = nilfs_collect_file_data,
604 .collect_node = nilfs_collect_file_node, 569 .collect_node = nilfs_collect_file_node,
605 .collect_bmap = nilfs_collect_file_bmap, 570 .collect_bmap = nilfs_collect_file_bmap,
@@ -649,7 +614,7 @@ static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci,
649 *binfo_dat = binfo->bi_dat; 614 *binfo_dat = binfo->bi_dat;
650} 615}
651 616
652struct nilfs_sc_operations nilfs_sc_dat_ops = { 617static struct nilfs_sc_operations nilfs_sc_dat_ops = {
653 .collect_data = nilfs_collect_dat_data, 618 .collect_data = nilfs_collect_dat_data,
654 .collect_node = nilfs_collect_file_node, 619 .collect_node = nilfs_collect_file_node,
655 .collect_bmap = nilfs_collect_dat_bmap, 620 .collect_bmap = nilfs_collect_dat_bmap,
@@ -657,7 +622,7 @@ struct nilfs_sc_operations nilfs_sc_dat_ops = {
657 .write_node_binfo = nilfs_write_dat_node_binfo, 622 .write_node_binfo = nilfs_write_dat_node_binfo,
658}; 623};
659 624
660struct nilfs_sc_operations nilfs_sc_dsync_ops = { 625static struct nilfs_sc_operations nilfs_sc_dsync_ops = {
661 .collect_data = nilfs_collect_file_data, 626 .collect_data = nilfs_collect_file_data,
662 .collect_node = NULL, 627 .collect_node = NULL,
663 .collect_bmap = NULL, 628 .collect_bmap = NULL,
@@ -932,43 +897,16 @@ static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci,
932 } 897 }
933} 898}
934 899
935/*
936 * CRC calculation routines
937 */
938static void nilfs_fill_in_super_root_crc(struct buffer_head *bh_sr, u32 seed)
939{
940 struct nilfs_super_root *raw_sr =
941 (struct nilfs_super_root *)bh_sr->b_data;
942 u32 crc;
943
944 crc = crc32_le(seed,
945 (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum),
946 NILFS_SR_BYTES - sizeof(raw_sr->sr_sum));
947 raw_sr->sr_sum = cpu_to_le32(crc);
948}
949
950static void nilfs_segctor_fill_in_checksums(struct nilfs_sc_info *sci,
951 u32 seed)
952{
953 struct nilfs_segment_buffer *segbuf;
954
955 if (sci->sc_super_root)
956 nilfs_fill_in_super_root_crc(sci->sc_super_root, seed);
957
958 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
959 nilfs_segbuf_fill_in_segsum_crc(segbuf, seed);
960 nilfs_segbuf_fill_in_data_crc(segbuf, seed);
961 }
962}
963
964static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, 900static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
965 struct the_nilfs *nilfs) 901 struct the_nilfs *nilfs)
966{ 902{
967 struct buffer_head *bh_sr = sci->sc_super_root; 903 struct buffer_head *bh_sr;
968 struct nilfs_super_root *raw_sr = 904 struct nilfs_super_root *raw_sr;
969 (struct nilfs_super_root *)bh_sr->b_data;
970 unsigned isz = nilfs->ns_inode_size; 905 unsigned isz = nilfs->ns_inode_size;
971 906
907 bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
908 raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
909
972 raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES); 910 raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES);
973 raw_sr->sr_nongc_ctime 911 raw_sr->sr_nongc_ctime
974 = cpu_to_le64(nilfs_doing_gc() ? 912 = cpu_to_le64(nilfs_doing_gc() ?
@@ -1491,7 +1429,6 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
1491 1429
1492 /* Collection retry loop */ 1430 /* Collection retry loop */
1493 for (;;) { 1431 for (;;) {
1494 sci->sc_super_root = NULL;
1495 sci->sc_nblk_this_inc = 0; 1432 sci->sc_nblk_this_inc = 0;
1496 sci->sc_curseg = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); 1433 sci->sc_curseg = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
1497 1434
@@ -1568,7 +1505,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
1568 ssp.offset = sizeof(struct nilfs_segment_summary); 1505 ssp.offset = sizeof(struct nilfs_segment_summary);
1569 1506
1570 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { 1507 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
1571 if (bh == sci->sc_super_root) 1508 if (bh == segbuf->sb_super_root)
1572 break; 1509 break;
1573 if (!finfo) { 1510 if (!finfo) {
1574 finfo = nilfs_segctor_map_segsum_entry( 1511 finfo = nilfs_segctor_map_segsum_entry(
@@ -1729,7 +1666,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
1729 1666
1730 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1667 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1731 b_assoc_buffers) { 1668 b_assoc_buffers) {
1732 if (bh == sci->sc_super_root) { 1669 if (bh == segbuf->sb_super_root) {
1733 if (bh->b_page != bd_page) { 1670 if (bh->b_page != bd_page) {
1734 lock_page(bd_page); 1671 lock_page(bd_page);
1735 clear_page_dirty_for_io(bd_page); 1672 clear_page_dirty_for_io(bd_page);
@@ -1848,7 +1785,7 @@ static void nilfs_clear_copied_buffers(struct list_head *list, int err)
1848} 1785}
1849 1786
1850static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page, 1787static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page,
1851 struct buffer_head *bh_sr, int err) 1788 int err)
1852{ 1789{
1853 struct nilfs_segment_buffer *segbuf; 1790 struct nilfs_segment_buffer *segbuf;
1854 struct page *bd_page = NULL, *fs_page = NULL; 1791 struct page *bd_page = NULL, *fs_page = NULL;
@@ -1869,7 +1806,7 @@ static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page,
1869 1806
1870 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1807 list_for_each_entry(bh, &segbuf->sb_payload_buffers,
1871 b_assoc_buffers) { 1808 b_assoc_buffers) {
1872 if (bh == bh_sr) { 1809 if (bh == segbuf->sb_super_root) {
1873 if (bh->b_page != bd_page) { 1810 if (bh->b_page != bd_page) {
1874 end_page_writeback(bd_page); 1811 end_page_writeback(bd_page);
1875 bd_page = bh->b_page; 1812 bd_page = bh->b_page;
@@ -1898,7 +1835,7 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
1898 1835
1899 list_splice_tail_init(&sci->sc_write_logs, &logs); 1836 list_splice_tail_init(&sci->sc_write_logs, &logs);
1900 ret = nilfs_wait_on_logs(&logs); 1837 ret = nilfs_wait_on_logs(&logs);
1901 nilfs_abort_logs(&logs, NULL, sci->sc_super_root, ret ? : err); 1838 nilfs_abort_logs(&logs, NULL, ret ? : err);
1902 1839
1903 list_splice_tail_init(&sci->sc_segbufs, &logs); 1840 list_splice_tail_init(&sci->sc_segbufs, &logs);
1904 nilfs_cancel_segusage(&logs, nilfs->ns_sufile); 1841 nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
@@ -1914,7 +1851,6 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
1914 } 1851 }
1915 1852
1916 nilfs_destroy_logs(&logs); 1853 nilfs_destroy_logs(&logs);
1917 sci->sc_super_root = NULL;
1918} 1854}
1919 1855
1920static void nilfs_set_next_segment(struct the_nilfs *nilfs, 1856static void nilfs_set_next_segment(struct the_nilfs *nilfs,
@@ -1933,7 +1869,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1933 struct nilfs_segment_buffer *segbuf; 1869 struct nilfs_segment_buffer *segbuf;
1934 struct page *bd_page = NULL, *fs_page = NULL; 1870 struct page *bd_page = NULL, *fs_page = NULL;
1935 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; 1871 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs;
1936 int update_sr = (sci->sc_super_root != NULL); 1872 int update_sr = false;
1937 1873
1938 list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) { 1874 list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) {
1939 struct buffer_head *bh; 1875 struct buffer_head *bh;
@@ -1964,11 +1900,12 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1964 set_buffer_uptodate(bh); 1900 set_buffer_uptodate(bh);
1965 clear_buffer_dirty(bh); 1901 clear_buffer_dirty(bh);
1966 clear_buffer_nilfs_volatile(bh); 1902 clear_buffer_nilfs_volatile(bh);
1967 if (bh == sci->sc_super_root) { 1903 if (bh == segbuf->sb_super_root) {
1968 if (bh->b_page != bd_page) { 1904 if (bh->b_page != bd_page) {
1969 end_page_writeback(bd_page); 1905 end_page_writeback(bd_page);
1970 bd_page = bh->b_page; 1906 bd_page = bh->b_page;
1971 } 1907 }
1908 update_sr = true;
1972 break; 1909 break;
1973 } 1910 }
1974 if (bh->b_page != fs_page) { 1911 if (bh->b_page != fs_page) {
@@ -2115,7 +2052,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2115 struct nilfs_sb_info *sbi = sci->sc_sbi; 2052 struct nilfs_sb_info *sbi = sci->sc_sbi;
2116 struct the_nilfs *nilfs = sbi->s_nilfs; 2053 struct the_nilfs *nilfs = sbi->s_nilfs;
2117 struct page *failed_page; 2054 struct page *failed_page;
2118 int err, has_sr = 0; 2055 int err;
2119 2056
2120 sci->sc_stage.scnt = NILFS_ST_INIT; 2057 sci->sc_stage.scnt = NILFS_ST_INIT;
2121 2058
@@ -2143,8 +2080,6 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2143 if (unlikely(err)) 2080 if (unlikely(err))
2144 goto failed; 2081 goto failed;
2145 2082
2146 has_sr = (sci->sc_super_root != NULL);
2147
2148 /* Avoid empty segment */ 2083 /* Avoid empty segment */
2149 if (sci->sc_stage.scnt == NILFS_ST_DONE && 2084 if (sci->sc_stage.scnt == NILFS_ST_DONE &&
2150 NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) { 2085 NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) {
@@ -2159,7 +2094,8 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2159 if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) 2094 if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
2160 nilfs_segctor_fill_in_file_bmap(sci, sbi->s_ifile); 2095 nilfs_segctor_fill_in_file_bmap(sci, sbi->s_ifile);
2161 2096
2162 if (has_sr) { 2097 if (mode == SC_LSEG_SR &&
2098 sci->sc_stage.scnt >= NILFS_ST_CPFILE) {
2163 err = nilfs_segctor_fill_in_checkpoint(sci); 2099 err = nilfs_segctor_fill_in_checkpoint(sci);
2164 if (unlikely(err)) 2100 if (unlikely(err))
2165 goto failed_to_write; 2101 goto failed_to_write;
@@ -2171,11 +2107,12 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2171 /* Write partial segments */ 2107 /* Write partial segments */
2172 err = nilfs_segctor_prepare_write(sci, &failed_page); 2108 err = nilfs_segctor_prepare_write(sci, &failed_page);
2173 if (err) { 2109 if (err) {
2174 nilfs_abort_logs(&sci->sc_segbufs, failed_page, 2110 nilfs_abort_logs(&sci->sc_segbufs, failed_page, err);
2175 sci->sc_super_root, err);
2176 goto failed_to_write; 2111 goto failed_to_write;
2177 } 2112 }
2178 nilfs_segctor_fill_in_checksums(sci, nilfs->ns_crc_seed); 2113
2114 nilfs_add_checksums_on_logs(&sci->sc_segbufs,
2115 nilfs->ns_crc_seed);
2179 2116
2180 err = nilfs_segctor_write(sci, nilfs); 2117 err = nilfs_segctor_write(sci, nilfs);
2181 if (unlikely(err)) 2118 if (unlikely(err))
@@ -2196,8 +2133,6 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2196 } 2133 }
2197 } while (sci->sc_stage.scnt != NILFS_ST_DONE); 2134 } while (sci->sc_stage.scnt != NILFS_ST_DONE);
2198 2135
2199 sci->sc_super_root = NULL;
2200
2201 out: 2136 out:
2202 nilfs_segctor_check_out_files(sci, sbi); 2137 nilfs_segctor_check_out_files(sci, sbi);
2203 return err; 2138 return err;
@@ -2224,9 +2159,9 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2224static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci) 2159static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci)
2225{ 2160{
2226 spin_lock(&sci->sc_state_lock); 2161 spin_lock(&sci->sc_state_lock);
2227 if (sci->sc_timer && !(sci->sc_state & NILFS_SEGCTOR_COMMIT)) { 2162 if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) {
2228 sci->sc_timer->expires = jiffies + sci->sc_interval; 2163 sci->sc_timer.expires = jiffies + sci->sc_interval;
2229 add_timer(sci->sc_timer); 2164 add_timer(&sci->sc_timer);
2230 sci->sc_state |= NILFS_SEGCTOR_COMMIT; 2165 sci->sc_state |= NILFS_SEGCTOR_COMMIT;
2231 } 2166 }
2232 spin_unlock(&sci->sc_state_lock); 2167 spin_unlock(&sci->sc_state_lock);
@@ -2431,9 +2366,7 @@ static void nilfs_segctor_accept(struct nilfs_sc_info *sci)
2431 spin_lock(&sci->sc_state_lock); 2366 spin_lock(&sci->sc_state_lock);
2432 sci->sc_seq_accepted = sci->sc_seq_request; 2367 sci->sc_seq_accepted = sci->sc_seq_request;
2433 spin_unlock(&sci->sc_state_lock); 2368 spin_unlock(&sci->sc_state_lock);
2434 2369 del_timer_sync(&sci->sc_timer);
2435 if (sci->sc_timer)
2436 del_timer_sync(sci->sc_timer);
2437} 2370}
2438 2371
2439/** 2372/**
@@ -2459,9 +2392,9 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
2459 sci->sc_flush_request &= ~FLUSH_DAT_BIT; 2392 sci->sc_flush_request &= ~FLUSH_DAT_BIT;
2460 2393
2461 /* re-enable timer if checkpoint creation was not done */ 2394 /* re-enable timer if checkpoint creation was not done */
2462 if (sci->sc_timer && (sci->sc_state & NILFS_SEGCTOR_COMMIT) && 2395 if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
2463 time_before(jiffies, sci->sc_timer->expires)) 2396 time_before(jiffies, sci->sc_timer.expires))
2464 add_timer(sci->sc_timer); 2397 add_timer(&sci->sc_timer);
2465 } 2398 }
2466 spin_unlock(&sci->sc_state_lock); 2399 spin_unlock(&sci->sc_state_lock);
2467} 2400}
@@ -2640,13 +2573,10 @@ static int nilfs_segctor_thread(void *arg)
2640{ 2573{
2641 struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; 2574 struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg;
2642 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; 2575 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs;
2643 struct timer_list timer;
2644 int timeout = 0; 2576 int timeout = 0;
2645 2577
2646 init_timer(&timer); 2578 sci->sc_timer.data = (unsigned long)current;
2647 timer.data = (unsigned long)current; 2579 sci->sc_timer.function = nilfs_construction_timeout;
2648 timer.function = nilfs_construction_timeout;
2649 sci->sc_timer = &timer;
2650 2580
2651 /* start sync. */ 2581 /* start sync. */
2652 sci->sc_task = current; 2582 sci->sc_task = current;
@@ -2695,7 +2625,7 @@ static int nilfs_segctor_thread(void *arg)
2695 should_sleep = 0; 2625 should_sleep = 0;
2696 else if (sci->sc_state & NILFS_SEGCTOR_COMMIT) 2626 else if (sci->sc_state & NILFS_SEGCTOR_COMMIT)
2697 should_sleep = time_before(jiffies, 2627 should_sleep = time_before(jiffies,
2698 sci->sc_timer->expires); 2628 sci->sc_timer.expires);
2699 2629
2700 if (should_sleep) { 2630 if (should_sleep) {
2701 spin_unlock(&sci->sc_state_lock); 2631 spin_unlock(&sci->sc_state_lock);
@@ -2704,7 +2634,7 @@ static int nilfs_segctor_thread(void *arg)
2704 } 2634 }
2705 finish_wait(&sci->sc_wait_daemon, &wait); 2635 finish_wait(&sci->sc_wait_daemon, &wait);
2706 timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && 2636 timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
2707 time_after_eq(jiffies, sci->sc_timer->expires)); 2637 time_after_eq(jiffies, sci->sc_timer.expires));
2708 2638
2709 if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs)) 2639 if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs))
2710 set_nilfs_discontinued(nilfs); 2640 set_nilfs_discontinued(nilfs);
@@ -2713,8 +2643,6 @@ static int nilfs_segctor_thread(void *arg)
2713 2643
2714 end_thread: 2644 end_thread:
2715 spin_unlock(&sci->sc_state_lock); 2645 spin_unlock(&sci->sc_state_lock);
2716 del_timer_sync(sci->sc_timer);
2717 sci->sc_timer = NULL;
2718 2646
2719 /* end sync. */ 2647 /* end sync. */
2720 sci->sc_task = NULL; 2648 sci->sc_task = NULL;
@@ -2750,13 +2678,6 @@ static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci)
2750 } 2678 }
2751} 2679}
2752 2680
2753static int nilfs_segctor_init(struct nilfs_sc_info *sci)
2754{
2755 sci->sc_seq_done = sci->sc_seq_request;
2756
2757 return nilfs_segctor_start_thread(sci);
2758}
2759
2760/* 2681/*
2761 * Setup & clean-up functions 2682 * Setup & clean-up functions
2762 */ 2683 */
@@ -2780,6 +2701,7 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi)
2780 INIT_LIST_HEAD(&sci->sc_write_logs); 2701 INIT_LIST_HEAD(&sci->sc_write_logs);
2781 INIT_LIST_HEAD(&sci->sc_gc_inodes); 2702 INIT_LIST_HEAD(&sci->sc_gc_inodes);
2782 INIT_LIST_HEAD(&sci->sc_copied_buffers); 2703 INIT_LIST_HEAD(&sci->sc_copied_buffers);
2704 init_timer(&sci->sc_timer);
2783 2705
2784 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; 2706 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
2785 sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; 2707 sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ;
@@ -2846,6 +2768,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2846 2768
2847 down_write(&sbi->s_nilfs->ns_segctor_sem); 2769 down_write(&sbi->s_nilfs->ns_segctor_sem);
2848 2770
2771 del_timer_sync(&sci->sc_timer);
2849 kfree(sci); 2772 kfree(sci);
2850} 2773}
2851 2774
@@ -2880,7 +2803,7 @@ int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi)
2880 return -ENOMEM; 2803 return -ENOMEM;
2881 2804
2882 nilfs_attach_writer(nilfs, sbi); 2805 nilfs_attach_writer(nilfs, sbi);
2883 err = nilfs_segctor_init(NILFS_SC(sbi)); 2806 err = nilfs_segctor_start_thread(NILFS_SC(sbi));
2884 if (err) { 2807 if (err) {
2885 nilfs_detach_writer(nilfs, sbi); 2808 nilfs_detach_writer(nilfs, sbi);
2886 kfree(sbi->s_sc_info); 2809 kfree(sbi->s_sc_info);
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 82dfd6a686b9..dca142361ccf 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -100,7 +100,6 @@ struct nilfs_segsum_pointer {
100 * @sc_write_logs: List of segment buffers to hold logs under writing 100 * @sc_write_logs: List of segment buffers to hold logs under writing
101 * @sc_segbuf_nblocks: Number of available blocks in segment buffers. 101 * @sc_segbuf_nblocks: Number of available blocks in segment buffers.
102 * @sc_curseg: Current segment buffer 102 * @sc_curseg: Current segment buffer
103 * @sc_super_root: Pointer to the super root buffer
104 * @sc_stage: Collection stage 103 * @sc_stage: Collection stage
105 * @sc_finfo_ptr: pointer to the current finfo struct in the segment summary 104 * @sc_finfo_ptr: pointer to the current finfo struct in the segment summary
106 * @sc_binfo_ptr: pointer to the current binfo struct in the segment summary 105 * @sc_binfo_ptr: pointer to the current binfo struct in the segment summary
@@ -148,7 +147,6 @@ struct nilfs_sc_info {
148 struct list_head sc_write_logs; 147 struct list_head sc_write_logs;
149 unsigned long sc_segbuf_nblocks; 148 unsigned long sc_segbuf_nblocks;
150 struct nilfs_segment_buffer *sc_curseg; 149 struct nilfs_segment_buffer *sc_curseg;
151 struct buffer_head *sc_super_root;
152 150
153 struct nilfs_cstage sc_stage; 151 struct nilfs_cstage sc_stage;
154 152
@@ -179,7 +177,7 @@ struct nilfs_sc_info {
179 unsigned long sc_lseg_stime; /* in 1/HZ seconds */ 177 unsigned long sc_lseg_stime; /* in 1/HZ seconds */
180 unsigned long sc_watermark; 178 unsigned long sc_watermark;
181 179
182 struct timer_list *sc_timer; 180 struct timer_list sc_timer;
183 struct task_struct *sc_task; 181 struct task_struct *sc_task;
184}; 182};
185 183
@@ -219,6 +217,8 @@ enum {
219 */ 217 */
220#define NILFS_SC_DEFAULT_WATERMARK 3600 218#define NILFS_SC_DEFAULT_WATERMARK 3600
221 219
220/* super.c */
221extern struct kmem_cache *nilfs_transaction_cachep;
222 222
223/* segment.c */ 223/* segment.c */
224extern int nilfs_init_transaction_cache(void); 224extern int nilfs_init_transaction_cache(void);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 0cdbc5e7655a..03b34b738993 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -67,6 +67,11 @@ MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem "
67 "(NILFS)"); 67 "(NILFS)");
68MODULE_LICENSE("GPL"); 68MODULE_LICENSE("GPL");
69 69
70struct kmem_cache *nilfs_inode_cachep;
71struct kmem_cache *nilfs_transaction_cachep;
72struct kmem_cache *nilfs_segbuf_cachep;
73struct kmem_cache *nilfs_btree_path_cache;
74
70static int nilfs_remount(struct super_block *sb, int *flags, char *data); 75static int nilfs_remount(struct super_block *sb, int *flags, char *data);
71 76
72/** 77/**
@@ -129,7 +134,6 @@ void nilfs_warning(struct super_block *sb, const char *function,
129 va_end(args); 134 va_end(args);
130} 135}
131 136
132static struct kmem_cache *nilfs_inode_cachep;
133 137
134struct inode *nilfs_alloc_inode_common(struct the_nilfs *nilfs) 138struct inode *nilfs_alloc_inode_common(struct the_nilfs *nilfs)
135{ 139{
@@ -155,34 +159,6 @@ void nilfs_destroy_inode(struct inode *inode)
155 kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); 159 kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
156} 160}
157 161
158static void init_once(void *obj)
159{
160 struct nilfs_inode_info *ii = obj;
161
162 INIT_LIST_HEAD(&ii->i_dirty);
163#ifdef CONFIG_NILFS_XATTR
164 init_rwsem(&ii->xattr_sem);
165#endif
166 nilfs_btnode_cache_init_once(&ii->i_btnode_cache);
167 ii->i_bmap = (struct nilfs_bmap *)&ii->i_bmap_union;
168 inode_init_once(&ii->vfs_inode);
169}
170
171static int nilfs_init_inode_cache(void)
172{
173 nilfs_inode_cachep = kmem_cache_create("nilfs2_inode_cache",
174 sizeof(struct nilfs_inode_info),
175 0, SLAB_RECLAIM_ACCOUNT,
176 init_once);
177
178 return (nilfs_inode_cachep == NULL) ? -ENOMEM : 0;
179}
180
181static inline void nilfs_destroy_inode_cache(void)
182{
183 kmem_cache_destroy(nilfs_inode_cachep);
184}
185
186static void nilfs_clear_inode(struct inode *inode) 162static void nilfs_clear_inode(struct inode *inode)
187{ 163{
188 struct nilfs_inode_info *ii = NILFS_I(inode); 164 struct nilfs_inode_info *ii = NILFS_I(inode);
@@ -266,8 +242,8 @@ int nilfs_commit_super(struct nilfs_sb_info *sbi, int dupsb)
266 int err; 242 int err;
267 243
268 /* nilfs->sem must be locked by the caller. */ 244 /* nilfs->sem must be locked by the caller. */
269 if (sbp[0]->s_magic != NILFS_SUPER_MAGIC) { 245 if (sbp[0]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) {
270 if (sbp[1] && sbp[1]->s_magic == NILFS_SUPER_MAGIC) 246 if (sbp[1] && sbp[1]->s_magic == cpu_to_le16(NILFS_SUPER_MAGIC))
271 nilfs_swap_super_block(nilfs); 247 nilfs_swap_super_block(nilfs);
272 else { 248 else {
273 printk(KERN_CRIT "NILFS: superblock broke on dev %s\n", 249 printk(KERN_CRIT "NILFS: superblock broke on dev %s\n",
@@ -470,10 +446,10 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
470 if (nilfs_test_opt(sbi, SNAPSHOT)) 446 if (nilfs_test_opt(sbi, SNAPSHOT))
471 seq_printf(seq, ",cp=%llu", 447 seq_printf(seq, ",cp=%llu",
472 (unsigned long long int)sbi->s_snapshot_cno); 448 (unsigned long long int)sbi->s_snapshot_cno);
473 if (nilfs_test_opt(sbi, ERRORS_RO))
474 seq_printf(seq, ",errors=remount-ro");
475 if (nilfs_test_opt(sbi, ERRORS_PANIC)) 449 if (nilfs_test_opt(sbi, ERRORS_PANIC))
476 seq_printf(seq, ",errors=panic"); 450 seq_printf(seq, ",errors=panic");
451 if (nilfs_test_opt(sbi, ERRORS_CONT))
452 seq_printf(seq, ",errors=continue");
477 if (nilfs_test_opt(sbi, STRICT_ORDER)) 453 if (nilfs_test_opt(sbi, STRICT_ORDER))
478 seq_printf(seq, ",order=strict"); 454 seq_printf(seq, ",order=strict");
479 if (nilfs_test_opt(sbi, NORECOVERY)) 455 if (nilfs_test_opt(sbi, NORECOVERY))
@@ -631,7 +607,7 @@ nilfs_set_default_options(struct nilfs_sb_info *sbi,
631 struct nilfs_super_block *sbp) 607 struct nilfs_super_block *sbp)
632{ 608{
633 sbi->s_mount_opt = 609 sbi->s_mount_opt =
634 NILFS_MOUNT_ERRORS_CONT | NILFS_MOUNT_BARRIER; 610 NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER;
635} 611}
636 612
637static int nilfs_setup_super(struct nilfs_sb_info *sbi) 613static int nilfs_setup_super(struct nilfs_sb_info *sbi)
@@ -749,6 +725,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
749 sb->s_export_op = &nilfs_export_ops; 725 sb->s_export_op = &nilfs_export_ops;
750 sb->s_root = NULL; 726 sb->s_root = NULL;
751 sb->s_time_gran = 1; 727 sb->s_time_gran = 1;
728 sb->s_bdi = nilfs->ns_bdi;
752 729
753 err = load_nilfs(nilfs, sbi); 730 err = load_nilfs(nilfs, sbi);
754 if (err) 731 if (err)
@@ -777,9 +754,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
777 goto failed_sbi; 754 goto failed_sbi;
778 } 755 }
779 cno = sbi->s_snapshot_cno; 756 cno = sbi->s_snapshot_cno;
780 } else 757 }
781 /* Read-only mount */
782 sbi->s_snapshot_cno = cno;
783 } 758 }
784 759
785 err = nilfs_attach_checkpoint(sbi, cno); 760 err = nilfs_attach_checkpoint(sbi, cno);
@@ -848,7 +823,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
848 struct the_nilfs *nilfs = sbi->s_nilfs; 823 struct the_nilfs *nilfs = sbi->s_nilfs;
849 unsigned long old_sb_flags; 824 unsigned long old_sb_flags;
850 struct nilfs_mount_options old_opts; 825 struct nilfs_mount_options old_opts;
851 int err; 826 int was_snapshot, err;
852 827
853 lock_kernel(); 828 lock_kernel();
854 829
@@ -856,6 +831,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
856 old_sb_flags = sb->s_flags; 831 old_sb_flags = sb->s_flags;
857 old_opts.mount_opt = sbi->s_mount_opt; 832 old_opts.mount_opt = sbi->s_mount_opt;
858 old_opts.snapshot_cno = sbi->s_snapshot_cno; 833 old_opts.snapshot_cno = sbi->s_snapshot_cno;
834 was_snapshot = nilfs_test_opt(sbi, SNAPSHOT);
859 835
860 if (!parse_options(data, sb)) { 836 if (!parse_options(data, sb)) {
861 err = -EINVAL; 837 err = -EINVAL;
@@ -863,20 +839,32 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
863 } 839 }
864 sb->s_flags = (sb->s_flags & ~MS_POSIXACL); 840 sb->s_flags = (sb->s_flags & ~MS_POSIXACL);
865 841
866 if ((*flags & MS_RDONLY) && 842 err = -EINVAL;
867 sbi->s_snapshot_cno != old_opts.snapshot_cno) { 843 if (was_snapshot) {
868 printk(KERN_WARNING "NILFS (device %s): couldn't " 844 if (!(*flags & MS_RDONLY)) {
869 "remount to a different snapshot.\n", 845 printk(KERN_ERR "NILFS (device %s): cannot remount "
870 sb->s_id); 846 "snapshot read/write.\n",
871 err = -EINVAL; 847 sb->s_id);
872 goto restore_opts; 848 goto restore_opts;
849 } else if (sbi->s_snapshot_cno != old_opts.snapshot_cno) {
850 printk(KERN_ERR "NILFS (device %s): cannot "
851 "remount to a different snapshot.\n",
852 sb->s_id);
853 goto restore_opts;
854 }
855 } else {
856 if (nilfs_test_opt(sbi, SNAPSHOT)) {
857 printk(KERN_ERR "NILFS (device %s): cannot change "
858 "a regular mount to a snapshot.\n",
859 sb->s_id);
860 goto restore_opts;
861 }
873 } 862 }
874 863
875 if (!nilfs_valid_fs(nilfs)) { 864 if (!nilfs_valid_fs(nilfs)) {
876 printk(KERN_WARNING "NILFS (device %s): couldn't " 865 printk(KERN_WARNING "NILFS (device %s): couldn't "
877 "remount because the filesystem is in an " 866 "remount because the filesystem is in an "
878 "incomplete recovery state.\n", sb->s_id); 867 "incomplete recovery state.\n", sb->s_id);
879 err = -EINVAL;
880 goto restore_opts; 868 goto restore_opts;
881 } 869 }
882 870
@@ -887,9 +875,6 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
887 nilfs_detach_segment_constructor(sbi); 875 nilfs_detach_segment_constructor(sbi);
888 sb->s_flags |= MS_RDONLY; 876 sb->s_flags |= MS_RDONLY;
889 877
890 sbi->s_snapshot_cno = nilfs_last_cno(nilfs);
891 /* nilfs_set_opt(sbi, SNAPSHOT); */
892
893 /* 878 /*
894 * Remounting a valid RW partition RDONLY, so set 879 * Remounting a valid RW partition RDONLY, so set
895 * the RDONLY flag and then mark the partition as valid again. 880 * the RDONLY flag and then mark the partition as valid again.
@@ -908,24 +893,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
908 * store the current valid flag. (It may have been changed 893 * store the current valid flag. (It may have been changed
909 * by fsck since we originally mounted the partition.) 894 * by fsck since we originally mounted the partition.)
910 */ 895 */
911 if (nilfs->ns_current && nilfs->ns_current != sbi) {
912 printk(KERN_WARNING "NILFS (device %s): couldn't "
913 "remount because an RW-mount exists.\n",
914 sb->s_id);
915 err = -EBUSY;
916 goto restore_opts;
917 }
918 if (sbi->s_snapshot_cno != nilfs_last_cno(nilfs)) {
919 printk(KERN_WARNING "NILFS (device %s): couldn't "
920 "remount because the current RO-mount is not "
921 "the latest one.\n",
922 sb->s_id);
923 err = -EINVAL;
924 goto restore_opts;
925 }
926 sb->s_flags &= ~MS_RDONLY; 896 sb->s_flags &= ~MS_RDONLY;
927 nilfs_clear_opt(sbi, SNAPSHOT);
928 sbi->s_snapshot_cno = 0;
929 897
930 err = nilfs_attach_segment_constructor(sbi); 898 err = nilfs_attach_segment_constructor(sbi);
931 if (err) 899 if (err)
@@ -934,8 +902,6 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
934 down_write(&nilfs->ns_sem); 902 down_write(&nilfs->ns_sem);
935 nilfs_setup_super(sbi); 903 nilfs_setup_super(sbi);
936 up_write(&nilfs->ns_sem); 904 up_write(&nilfs->ns_sem);
937
938 nilfs->ns_current = sbi;
939 } 905 }
940 out: 906 out:
941 up_write(&nilfs->ns_super_sem); 907 up_write(&nilfs->ns_super_sem);
@@ -1021,10 +987,14 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1021{ 987{
1022 struct nilfs_super_data sd; 988 struct nilfs_super_data sd;
1023 struct super_block *s; 989 struct super_block *s;
990 fmode_t mode = FMODE_READ;
1024 struct the_nilfs *nilfs; 991 struct the_nilfs *nilfs;
1025 int err, need_to_close = 1; 992 int err, need_to_close = 1;
1026 993
1027 sd.bdev = open_bdev_exclusive(dev_name, flags, fs_type); 994 if (!(flags & MS_RDONLY))
995 mode |= FMODE_WRITE;
996
997 sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type);
1028 if (IS_ERR(sd.bdev)) 998 if (IS_ERR(sd.bdev))
1029 return PTR_ERR(sd.bdev); 999 return PTR_ERR(sd.bdev);
1030 1000
@@ -1091,10 +1061,12 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1091 1061
1092 /* New superblock instance created */ 1062 /* New superblock instance created */
1093 s->s_flags = flags; 1063 s->s_flags = flags;
1064 s->s_mode = mode;
1094 strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); 1065 strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id));
1095 sb_set_blocksize(s, block_size(sd.bdev)); 1066 sb_set_blocksize(s, block_size(sd.bdev));
1096 1067
1097 err = nilfs_fill_super(s, data, flags & MS_VERBOSE, nilfs); 1068 err = nilfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0,
1069 nilfs);
1098 if (err) 1070 if (err)
1099 goto cancel_new; 1071 goto cancel_new;
1100 1072
@@ -1105,7 +1077,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1105 mutex_unlock(&nilfs->ns_mount_mutex); 1077 mutex_unlock(&nilfs->ns_mount_mutex);
1106 put_nilfs(nilfs); 1078 put_nilfs(nilfs);
1107 if (need_to_close) 1079 if (need_to_close)
1108 close_bdev_exclusive(sd.bdev, flags); 1080 close_bdev_exclusive(sd.bdev, mode);
1109 simple_set_mnt(mnt, s); 1081 simple_set_mnt(mnt, s);
1110 return 0; 1082 return 0;
1111 1083
@@ -1113,7 +1085,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1113 mutex_unlock(&nilfs->ns_mount_mutex); 1085 mutex_unlock(&nilfs->ns_mount_mutex);
1114 put_nilfs(nilfs); 1086 put_nilfs(nilfs);
1115 failed: 1087 failed:
1116 close_bdev_exclusive(sd.bdev, flags); 1088 close_bdev_exclusive(sd.bdev, mode);
1117 1089
1118 return err; 1090 return err;
1119 1091
@@ -1123,7 +1095,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1123 put_nilfs(nilfs); 1095 put_nilfs(nilfs);
1124 deactivate_locked_super(s); 1096 deactivate_locked_super(s);
1125 /* 1097 /*
1126 * deactivate_super() invokes close_bdev_exclusive(). 1098 * deactivate_locked_super() invokes close_bdev_exclusive().
1127 * We must finish all post-cleaning before this call; 1099 * We must finish all post-cleaning before this call;
1128 * put_nilfs() needs the block device. 1100 * put_nilfs() needs the block device.
1129 */ 1101 */
@@ -1138,54 +1110,93 @@ struct file_system_type nilfs_fs_type = {
1138 .fs_flags = FS_REQUIRES_DEV, 1110 .fs_flags = FS_REQUIRES_DEV,
1139}; 1111};
1140 1112
1141static int __init init_nilfs_fs(void) 1113static void nilfs_inode_init_once(void *obj)
1142{ 1114{
1143 int err; 1115 struct nilfs_inode_info *ii = obj;
1144
1145 err = nilfs_init_inode_cache();
1146 if (err)
1147 goto failed;
1148 1116
1149 err = nilfs_init_transaction_cache(); 1117 INIT_LIST_HEAD(&ii->i_dirty);
1150 if (err) 1118#ifdef CONFIG_NILFS_XATTR
1151 goto failed_inode_cache; 1119 init_rwsem(&ii->xattr_sem);
1120#endif
1121 nilfs_btnode_cache_init_once(&ii->i_btnode_cache);
1122 ii->i_bmap = (struct nilfs_bmap *)&ii->i_bmap_union;
1123 inode_init_once(&ii->vfs_inode);
1124}
1152 1125
1153 err = nilfs_init_segbuf_cache(); 1126static void nilfs_segbuf_init_once(void *obj)
1154 if (err) 1127{
1155 goto failed_transaction_cache; 1128 memset(obj, 0, sizeof(struct nilfs_segment_buffer));
1129}
1156 1130
1157 err = nilfs_btree_path_cache_init(); 1131static void nilfs_destroy_cachep(void)
1158 if (err) 1132{
1159 goto failed_segbuf_cache; 1133 if (nilfs_inode_cachep)
1134 kmem_cache_destroy(nilfs_inode_cachep);
1135 if (nilfs_transaction_cachep)
1136 kmem_cache_destroy(nilfs_transaction_cachep);
1137 if (nilfs_segbuf_cachep)
1138 kmem_cache_destroy(nilfs_segbuf_cachep);
1139 if (nilfs_btree_path_cache)
1140 kmem_cache_destroy(nilfs_btree_path_cache);
1141}
1160 1142
1161 err = register_filesystem(&nilfs_fs_type); 1143static int __init nilfs_init_cachep(void)
1162 if (err) 1144{
1163 goto failed_btree_path_cache; 1145 nilfs_inode_cachep = kmem_cache_create("nilfs2_inode_cache",
1146 sizeof(struct nilfs_inode_info), 0,
1147 SLAB_RECLAIM_ACCOUNT, nilfs_inode_init_once);
1148 if (!nilfs_inode_cachep)
1149 goto fail;
1150
1151 nilfs_transaction_cachep = kmem_cache_create("nilfs2_transaction_cache",
1152 sizeof(struct nilfs_transaction_info), 0,
1153 SLAB_RECLAIM_ACCOUNT, NULL);
1154 if (!nilfs_transaction_cachep)
1155 goto fail;
1156
1157 nilfs_segbuf_cachep = kmem_cache_create("nilfs2_segbuf_cache",
1158 sizeof(struct nilfs_segment_buffer), 0,
1159 SLAB_RECLAIM_ACCOUNT, nilfs_segbuf_init_once);
1160 if (!nilfs_segbuf_cachep)
1161 goto fail;
1162
1163 nilfs_btree_path_cache = kmem_cache_create("nilfs2_btree_path_cache",
1164 sizeof(struct nilfs_btree_path) * NILFS_BTREE_LEVEL_MAX,
1165 0, 0, NULL);
1166 if (!nilfs_btree_path_cache)
1167 goto fail;
1164 1168
1165 return 0; 1169 return 0;
1166 1170
1167 failed_btree_path_cache: 1171fail:
1168 nilfs_btree_path_cache_destroy(); 1172 nilfs_destroy_cachep();
1173 return -ENOMEM;
1174}
1175
1176static int __init init_nilfs_fs(void)
1177{
1178 int err;
1169 1179
1170 failed_segbuf_cache: 1180 err = nilfs_init_cachep();
1171 nilfs_destroy_segbuf_cache(); 1181 if (err)
1182 goto fail;
1172 1183
1173 failed_transaction_cache: 1184 err = register_filesystem(&nilfs_fs_type);
1174 nilfs_destroy_transaction_cache(); 1185 if (err)
1186 goto free_cachep;
1175 1187
1176 failed_inode_cache: 1188 printk(KERN_INFO "NILFS version 2 loaded\n");
1177 nilfs_destroy_inode_cache(); 1189 return 0;
1178 1190
1179 failed: 1191free_cachep:
1192 nilfs_destroy_cachep();
1193fail:
1180 return err; 1194 return err;
1181} 1195}
1182 1196
1183static void __exit exit_nilfs_fs(void) 1197static void __exit exit_nilfs_fs(void)
1184{ 1198{
1185 nilfs_destroy_segbuf_cache(); 1199 nilfs_destroy_cachep();
1186 nilfs_destroy_transaction_cache();
1187 nilfs_destroy_inode_cache();
1188 nilfs_btree_path_cache_destroy();
1189 unregister_filesystem(&nilfs_fs_type); 1200 unregister_filesystem(&nilfs_fs_type);
1190} 1201}
1191 1202
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 7ffcf2b8b1f4..8c1097327abc 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -486,11 +486,15 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
486 printk(KERN_WARNING 486 printk(KERN_WARNING
487 "NILFS warning: unable to read secondary superblock\n"); 487 "NILFS warning: unable to read secondary superblock\n");
488 488
489 /*
490 * Compare two super blocks and set 1 in swp if the secondary
491 * super block is valid and newer. Otherwise, set 0 in swp.
492 */
489 valid[0] = nilfs_valid_sb(sbp[0]); 493 valid[0] = nilfs_valid_sb(sbp[0]);
490 valid[1] = nilfs_valid_sb(sbp[1]); 494 valid[1] = nilfs_valid_sb(sbp[1]);
491 swp = valid[1] && 495 swp = valid[1] && (!valid[0] ||
492 (!valid[0] || 496 le64_to_cpu(sbp[1]->s_last_cno) >
493 le64_to_cpu(sbp[1]->s_wtime) > le64_to_cpu(sbp[0]->s_wtime)); 497 le64_to_cpu(sbp[0]->s_last_cno));
494 498
495 if (valid[swp] && nilfs_sb2_bad_offset(sbp[swp], sb2off)) { 499 if (valid[swp] && nilfs_sb2_bad_offset(sbp[swp], sb2off)) {
496 brelse(sbh[1]); 500 brelse(sbh[1]);
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index 3e56dbffe729..b3a159b21cfd 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -15,6 +15,7 @@ config INOTIFY
15 15
16config INOTIFY_USER 16config INOTIFY_USER
17 bool "Inotify support for userspace" 17 bool "Inotify support for userspace"
18 select ANON_INODES
18 select FSNOTIFY 19 select FSNOTIFY
19 default y 20 default y
20 ---help--- 21 ---help---
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 1afb0a10229f..e27960cd76ab 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -28,6 +28,7 @@
28#include <linux/path.h> /* struct path */ 28#include <linux/path.h> /* struct path */
29#include <linux/slab.h> /* kmem_* */ 29#include <linux/slab.h> /* kmem_* */
30#include <linux/types.h> 30#include <linux/types.h>
31#include <linux/sched.h>
31 32
32#include "inotify.h" 33#include "inotify.h"
33 34
@@ -146,6 +147,7 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
146 idr_for_each(&group->inotify_data.idr, idr_callback, group); 147 idr_for_each(&group->inotify_data.idr, idr_callback, group);
147 idr_remove_all(&group->inotify_data.idr); 148 idr_remove_all(&group->inotify_data.idr);
148 idr_destroy(&group->inotify_data.idr); 149 idr_destroy(&group->inotify_data.idr);
150 free_uid(group->inotify_data.user);
149} 151}
150 152
151void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv) 153void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv)
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 472cdf29ef82..e46ca685b9be 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -546,21 +546,24 @@ retry:
546 if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) 546 if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
547 goto out_err; 547 goto out_err;
548 548
549 /* we are putting the mark on the idr, take a reference */
550 fsnotify_get_mark(&tmp_ientry->fsn_entry);
551
549 spin_lock(&group->inotify_data.idr_lock); 552 spin_lock(&group->inotify_data.idr_lock);
550 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, 553 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
551 group->inotify_data.last_wd+1, 554 group->inotify_data.last_wd+1,
552 &tmp_ientry->wd); 555 &tmp_ientry->wd);
553 spin_unlock(&group->inotify_data.idr_lock); 556 spin_unlock(&group->inotify_data.idr_lock);
554 if (ret) { 557 if (ret) {
558 /* we didn't get on the idr, drop the idr reference */
559 fsnotify_put_mark(&tmp_ientry->fsn_entry);
560
555 /* idr was out of memory allocate and try again */ 561 /* idr was out of memory allocate and try again */
556 if (ret == -EAGAIN) 562 if (ret == -EAGAIN)
557 goto retry; 563 goto retry;
558 goto out_err; 564 goto out_err;
559 } 565 }
560 566
561 /* we put the mark on the idr, take a reference */
562 fsnotify_get_mark(&tmp_ientry->fsn_entry);
563
564 /* we are on the idr, now get on the inode */ 567 /* we are on the idr, now get on the inode */
565 ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); 568 ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode);
566 if (ret) { 569 if (ret) {
@@ -578,16 +581,13 @@ retry:
578 /* return the watch descriptor for this new entry */ 581 /* return the watch descriptor for this new entry */
579 ret = tmp_ientry->wd; 582 ret = tmp_ientry->wd;
580 583
581 /* match the ref from fsnotify_init_markentry() */
582 fsnotify_put_mark(&tmp_ientry->fsn_entry);
583
584 /* if this mark added a new event update the group mask */ 584 /* if this mark added a new event update the group mask */
585 if (mask & ~group->mask) 585 if (mask & ~group->mask)
586 fsnotify_recalc_group_mask(group); 586 fsnotify_recalc_group_mask(group);
587 587
588out_err: 588out_err:
589 if (ret < 0) 589 /* match the ref from fsnotify_init_markentry() */
590 kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry); 590 fsnotify_put_mark(&tmp_ientry->fsn_entry);
591 591
592 return ret; 592 return ret;
593} 593}
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 791c0886c060..07d9fd854350 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -29,6 +29,7 @@ ocfs2-objs := \
29 mmap.o \ 29 mmap.o \
30 namei.o \ 30 namei.o \
31 refcounttree.o \ 31 refcounttree.o \
32 reservations.o \
32 resize.o \ 33 resize.o \
33 slot_map.o \ 34 slot_map.o \
34 suballoc.o \ 35 suballoc.o \
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9f8bd913c51e..215e12ce1d85 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1006,7 +1006,7 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle,
1006 int count, status, i; 1006 int count, status, i;
1007 u16 suballoc_bit_start; 1007 u16 suballoc_bit_start;
1008 u32 num_got; 1008 u32 num_got;
1009 u64 first_blkno; 1009 u64 suballoc_loc, first_blkno;
1010 struct ocfs2_super *osb = 1010 struct ocfs2_super *osb =
1011 OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci)); 1011 OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
1012 struct ocfs2_extent_block *eb; 1012 struct ocfs2_extent_block *eb;
@@ -1015,10 +1015,10 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle,
1015 1015
1016 count = 0; 1016 count = 0;
1017 while (count < wanted) { 1017 while (count < wanted) {
1018 status = ocfs2_claim_metadata(osb, 1018 status = ocfs2_claim_metadata(handle,
1019 handle,
1020 meta_ac, 1019 meta_ac,
1021 wanted - count, 1020 wanted - count,
1021 &suballoc_loc,
1022 &suballoc_bit_start, 1022 &suballoc_bit_start,
1023 &num_got, 1023 &num_got,
1024 &first_blkno); 1024 &first_blkno);
@@ -1052,6 +1052,7 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle,
1052 eb->h_fs_generation = cpu_to_le32(osb->fs_generation); 1052 eb->h_fs_generation = cpu_to_le32(osb->fs_generation);
1053 eb->h_suballoc_slot = 1053 eb->h_suballoc_slot =
1054 cpu_to_le16(meta_ac->ac_alloc_slot); 1054 cpu_to_le16(meta_ac->ac_alloc_slot);
1055 eb->h_suballoc_loc = cpu_to_le64(suballoc_loc);
1055 eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); 1056 eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1056 eb->h_list.l_count = 1057 eb->h_list.l_count =
1057 cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); 1058 cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb));
@@ -1061,11 +1062,7 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle,
1061 1062
1062 /* We'll also be dirtied by the caller, so 1063 /* We'll also be dirtied by the caller, so
1063 * this isn't absolutely necessary. */ 1064 * this isn't absolutely necessary. */
1064 status = ocfs2_journal_dirty(handle, bhs[i]); 1065 ocfs2_journal_dirty(handle, bhs[i]);
1065 if (status < 0) {
1066 mlog_errno(status);
1067 goto bail;
1068 }
1069 } 1066 }
1070 1067
1071 count += num_got; 1068 count += num_got;
@@ -1129,8 +1126,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
1129 goto out; 1126 goto out;
1130 } 1127 }
1131 1128
1132 status = ocfs2_extend_trans(handle, path_num_items(path) + 1129 status = ocfs2_extend_trans(handle, path_num_items(path));
1133 handle->h_buffer_credits);
1134 if (status < 0) { 1130 if (status < 0) {
1135 mlog_errno(status); 1131 mlog_errno(status);
1136 goto out; 1132 goto out;
@@ -1270,12 +1266,7 @@ static int ocfs2_add_branch(handle_t *handle,
1270 if (!eb_el->l_tree_depth) 1266 if (!eb_el->l_tree_depth)
1271 new_last_eb_blk = le64_to_cpu(eb->h_blkno); 1267 new_last_eb_blk = le64_to_cpu(eb->h_blkno);
1272 1268
1273 status = ocfs2_journal_dirty(handle, bh); 1269 ocfs2_journal_dirty(handle, bh);
1274 if (status < 0) {
1275 mlog_errno(status);
1276 goto bail;
1277 }
1278
1279 next_blkno = le64_to_cpu(eb->h_blkno); 1270 next_blkno = le64_to_cpu(eb->h_blkno);
1280 } 1271 }
1281 1272
@@ -1321,17 +1312,10 @@ static int ocfs2_add_branch(handle_t *handle,
1321 eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; 1312 eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data;
1322 eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); 1313 eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk);
1323 1314
1324 status = ocfs2_journal_dirty(handle, *last_eb_bh); 1315 ocfs2_journal_dirty(handle, *last_eb_bh);
1325 if (status < 0) 1316 ocfs2_journal_dirty(handle, et->et_root_bh);
1326 mlog_errno(status); 1317 if (eb_bh)
1327 status = ocfs2_journal_dirty(handle, et->et_root_bh); 1318 ocfs2_journal_dirty(handle, eb_bh);
1328 if (status < 0)
1329 mlog_errno(status);
1330 if (eb_bh) {
1331 status = ocfs2_journal_dirty(handle, eb_bh);
1332 if (status < 0)
1333 mlog_errno(status);
1334 }
1335 1319
1336 /* 1320 /*
1337 * Some callers want to track the rightmost leaf so pass it 1321 * Some callers want to track the rightmost leaf so pass it
@@ -1399,11 +1383,7 @@ static int ocfs2_shift_tree_depth(handle_t *handle,
1399 for (i = 0; i < le16_to_cpu(root_el->l_next_free_rec); i++) 1383 for (i = 0; i < le16_to_cpu(root_el->l_next_free_rec); i++)
1400 eb_el->l_recs[i] = root_el->l_recs[i]; 1384 eb_el->l_recs[i] = root_el->l_recs[i];
1401 1385
1402 status = ocfs2_journal_dirty(handle, new_eb_bh); 1386 ocfs2_journal_dirty(handle, new_eb_bh);
1403 if (status < 0) {
1404 mlog_errno(status);
1405 goto bail;
1406 }
1407 1387
1408 status = ocfs2_et_root_journal_access(handle, et, 1388 status = ocfs2_et_root_journal_access(handle, et,
1409 OCFS2_JOURNAL_ACCESS_WRITE); 1389 OCFS2_JOURNAL_ACCESS_WRITE);
@@ -1428,11 +1408,7 @@ static int ocfs2_shift_tree_depth(handle_t *handle,
1428 if (root_el->l_tree_depth == cpu_to_le16(1)) 1408 if (root_el->l_tree_depth == cpu_to_le16(1))
1429 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); 1409 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
1430 1410
1431 status = ocfs2_journal_dirty(handle, et->et_root_bh); 1411 ocfs2_journal_dirty(handle, et->et_root_bh);
1432 if (status < 0) {
1433 mlog_errno(status);
1434 goto bail;
1435 }
1436 1412
1437 *ret_new_eb_bh = new_eb_bh; 1413 *ret_new_eb_bh = new_eb_bh;
1438 new_eb_bh = NULL; 1414 new_eb_bh = NULL;
@@ -2064,7 +2040,7 @@ static void ocfs2_complete_edge_insert(handle_t *handle,
2064 struct ocfs2_path *right_path, 2040 struct ocfs2_path *right_path,
2065 int subtree_index) 2041 int subtree_index)
2066{ 2042{
2067 int ret, i, idx; 2043 int i, idx;
2068 struct ocfs2_extent_list *el, *left_el, *right_el; 2044 struct ocfs2_extent_list *el, *left_el, *right_el;
2069 struct ocfs2_extent_rec *left_rec, *right_rec; 2045 struct ocfs2_extent_rec *left_rec, *right_rec;
2070 struct buffer_head *root_bh = left_path->p_node[subtree_index].bh; 2046 struct buffer_head *root_bh = left_path->p_node[subtree_index].bh;
@@ -2102,13 +2078,8 @@ static void ocfs2_complete_edge_insert(handle_t *handle,
2102 ocfs2_adjust_adjacent_records(left_rec, left_el, right_rec, 2078 ocfs2_adjust_adjacent_records(left_rec, left_el, right_rec,
2103 right_el); 2079 right_el);
2104 2080
2105 ret = ocfs2_journal_dirty(handle, left_path->p_node[i].bh); 2081 ocfs2_journal_dirty(handle, left_path->p_node[i].bh);
2106 if (ret) 2082 ocfs2_journal_dirty(handle, right_path->p_node[i].bh);
2107 mlog_errno(ret);
2108
2109 ret = ocfs2_journal_dirty(handle, right_path->p_node[i].bh);
2110 if (ret)
2111 mlog_errno(ret);
2112 2083
2113 /* 2084 /*
2114 * Setup our list pointers now so that the current 2085 * Setup our list pointers now so that the current
@@ -2132,9 +2103,7 @@ static void ocfs2_complete_edge_insert(handle_t *handle,
2132 2103
2133 root_bh = left_path->p_node[subtree_index].bh; 2104 root_bh = left_path->p_node[subtree_index].bh;
2134 2105
2135 ret = ocfs2_journal_dirty(handle, root_bh); 2106 ocfs2_journal_dirty(handle, root_bh);
2136 if (ret)
2137 mlog_errno(ret);
2138} 2107}
2139 2108
2140static int ocfs2_rotate_subtree_right(handle_t *handle, 2109static int ocfs2_rotate_subtree_right(handle_t *handle,
@@ -2207,11 +2176,7 @@ static int ocfs2_rotate_subtree_right(handle_t *handle,
2207 2176
2208 ocfs2_create_empty_extent(right_el); 2177 ocfs2_create_empty_extent(right_el);
2209 2178
2210 ret = ocfs2_journal_dirty(handle, right_leaf_bh); 2179 ocfs2_journal_dirty(handle, right_leaf_bh);
2211 if (ret) {
2212 mlog_errno(ret);
2213 goto out;
2214 }
2215 2180
2216 /* Do the copy now. */ 2181 /* Do the copy now. */
2217 i = le16_to_cpu(left_el->l_next_free_rec) - 1; 2182 i = le16_to_cpu(left_el->l_next_free_rec) - 1;
@@ -2230,11 +2195,7 @@ static int ocfs2_rotate_subtree_right(handle_t *handle,
2230 memset(&left_el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); 2195 memset(&left_el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
2231 le16_add_cpu(&left_el->l_next_free_rec, 1); 2196 le16_add_cpu(&left_el->l_next_free_rec, 1);
2232 2197
2233 ret = ocfs2_journal_dirty(handle, left_leaf_bh); 2198 ocfs2_journal_dirty(handle, left_leaf_bh);
2234 if (ret) {
2235 mlog_errno(ret);
2236 goto out;
2237 }
2238 2199
2239 ocfs2_complete_edge_insert(handle, left_path, right_path, 2200 ocfs2_complete_edge_insert(handle, left_path, right_path,
2240 subtree_index); 2201 subtree_index);
@@ -2249,8 +2210,8 @@ out:
2249 * 2210 *
2250 * Will return zero if the path passed in is already the leftmost path. 2211 * Will return zero if the path passed in is already the leftmost path.
2251 */ 2212 */
2252static int ocfs2_find_cpos_for_left_leaf(struct super_block *sb, 2213int ocfs2_find_cpos_for_left_leaf(struct super_block *sb,
2253 struct ocfs2_path *path, u32 *cpos) 2214 struct ocfs2_path *path, u32 *cpos)
2254{ 2215{
2255 int i, j, ret = 0; 2216 int i, j, ret = 0;
2256 u64 blkno; 2217 u64 blkno;
@@ -2327,20 +2288,14 @@ static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth,
2327 int op_credits, 2288 int op_credits,
2328 struct ocfs2_path *path) 2289 struct ocfs2_path *path)
2329{ 2290{
2330 int ret; 2291 int ret = 0;
2331 int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits; 2292 int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits;
2332 2293
2333 if (handle->h_buffer_credits < credits) { 2294 if (handle->h_buffer_credits < credits)
2334 ret = ocfs2_extend_trans(handle, 2295 ret = ocfs2_extend_trans(handle,
2335 credits - handle->h_buffer_credits); 2296 credits - handle->h_buffer_credits);
2336 if (ret)
2337 return ret;
2338 2297
2339 if (unlikely(handle->h_buffer_credits < credits)) 2298 return ret;
2340 return ocfs2_extend_trans(handle, credits);
2341 }
2342
2343 return 0;
2344} 2299}
2345 2300
2346/* 2301/*
@@ -2584,8 +2539,7 @@ static int ocfs2_update_edge_lengths(handle_t *handle,
2584 * records for all the bh in the path. 2539 * records for all the bh in the path.
2585 * So we have to allocate extra credits and access them. 2540 * So we have to allocate extra credits and access them.
2586 */ 2541 */
2587 ret = ocfs2_extend_trans(handle, 2542 ret = ocfs2_extend_trans(handle, subtree_index);
2588 handle->h_buffer_credits + subtree_index);
2589 if (ret) { 2543 if (ret) {
2590 mlog_errno(ret); 2544 mlog_errno(ret);
2591 goto out; 2545 goto out;
@@ -2823,12 +2777,8 @@ static int ocfs2_rotate_subtree_left(handle_t *handle,
2823 ocfs2_remove_empty_extent(right_leaf_el); 2777 ocfs2_remove_empty_extent(right_leaf_el);
2824 } 2778 }
2825 2779
2826 ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); 2780 ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
2827 if (ret) 2781 ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
2828 mlog_errno(ret);
2829 ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
2830 if (ret)
2831 mlog_errno(ret);
2832 2782
2833 if (del_right_subtree) { 2783 if (del_right_subtree) {
2834 ocfs2_unlink_subtree(handle, et, left_path, right_path, 2784 ocfs2_unlink_subtree(handle, et, left_path, right_path,
@@ -2851,9 +2801,7 @@ static int ocfs2_rotate_subtree_left(handle_t *handle,
2851 if (right_has_empty) 2801 if (right_has_empty)
2852 ocfs2_remove_empty_extent(left_leaf_el); 2802 ocfs2_remove_empty_extent(left_leaf_el);
2853 2803
2854 ret = ocfs2_journal_dirty(handle, et_root_bh); 2804 ocfs2_journal_dirty(handle, et_root_bh);
2855 if (ret)
2856 mlog_errno(ret);
2857 2805
2858 *deleted = 1; 2806 *deleted = 1;
2859 } else 2807 } else
@@ -2962,10 +2910,7 @@ static int ocfs2_rotate_rightmost_leaf_left(handle_t *handle,
2962 } 2910 }
2963 2911
2964 ocfs2_remove_empty_extent(el); 2912 ocfs2_remove_empty_extent(el);
2965 2913 ocfs2_journal_dirty(handle, bh);
2966 ret = ocfs2_journal_dirty(handle, bh);
2967 if (ret)
2968 mlog_errno(ret);
2969 2914
2970out: 2915out:
2971 return ret; 2916 return ret;
@@ -3506,15 +3451,9 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
3506 3451
3507 ocfs2_cleanup_merge(el, index); 3452 ocfs2_cleanup_merge(el, index);
3508 3453
3509 ret = ocfs2_journal_dirty(handle, bh); 3454 ocfs2_journal_dirty(handle, bh);
3510 if (ret)
3511 mlog_errno(ret);
3512
3513 if (right_path) { 3455 if (right_path) {
3514 ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); 3456 ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
3515 if (ret)
3516 mlog_errno(ret);
3517
3518 ocfs2_complete_edge_insert(handle, left_path, right_path, 3457 ocfs2_complete_edge_insert(handle, left_path, right_path,
3519 subtree_index); 3458 subtree_index);
3520 } 3459 }
@@ -3683,14 +3622,9 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
3683 3622
3684 ocfs2_cleanup_merge(el, index); 3623 ocfs2_cleanup_merge(el, index);
3685 3624
3686 ret = ocfs2_journal_dirty(handle, bh); 3625 ocfs2_journal_dirty(handle, bh);
3687 if (ret)
3688 mlog_errno(ret);
3689
3690 if (left_path) { 3626 if (left_path) {
3691 ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); 3627 ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
3692 if (ret)
3693 mlog_errno(ret);
3694 3628
3695 /* 3629 /*
3696 * In the situation that the right_rec is empty and the extent 3630 * In the situation that the right_rec is empty and the extent
@@ -4016,10 +3950,7 @@ static void ocfs2_adjust_rightmost_records(handle_t *handle,
4016 le32_add_cpu(&rec->e_int_clusters, 3950 le32_add_cpu(&rec->e_int_clusters,
4017 -le32_to_cpu(rec->e_cpos)); 3951 -le32_to_cpu(rec->e_cpos));
4018 3952
4019 ret = ocfs2_journal_dirty(handle, bh); 3953 ocfs2_journal_dirty(handle, bh);
4020 if (ret)
4021 mlog_errno(ret);
4022
4023 } 3954 }
4024} 3955}
4025 3956
@@ -4203,17 +4134,13 @@ static int ocfs2_insert_path(handle_t *handle,
4203 struct buffer_head *leaf_bh = path_leaf_bh(right_path); 4134 struct buffer_head *leaf_bh = path_leaf_bh(right_path);
4204 4135
4205 if (left_path) { 4136 if (left_path) {
4206 int credits = handle->h_buffer_credits;
4207
4208 /* 4137 /*
4209 * There's a chance that left_path got passed back to 4138 * There's a chance that left_path got passed back to
4210 * us without being accounted for in the 4139 * us without being accounted for in the
4211 * journal. Extend our transaction here to be sure we 4140 * journal. Extend our transaction here to be sure we
4212 * can change those blocks. 4141 * can change those blocks.
4213 */ 4142 */
4214 credits += left_path->p_tree_depth; 4143 ret = ocfs2_extend_trans(handle, left_path->p_tree_depth);
4215
4216 ret = ocfs2_extend_trans(handle, credits);
4217 if (ret < 0) { 4144 if (ret < 0) {
4218 mlog_errno(ret); 4145 mlog_errno(ret);
4219 goto out; 4146 goto out;
@@ -4251,17 +4178,13 @@ static int ocfs2_insert_path(handle_t *handle,
4251 * dirty this for us. 4178 * dirty this for us.
4252 */ 4179 */
4253 if (left_path) 4180 if (left_path)
4254 ret = ocfs2_journal_dirty(handle, 4181 ocfs2_journal_dirty(handle,
4255 path_leaf_bh(left_path)); 4182 path_leaf_bh(left_path));
4256 if (ret)
4257 mlog_errno(ret);
4258 } else 4183 } else
4259 ocfs2_insert_at_leaf(et, insert_rec, path_leaf_el(right_path), 4184 ocfs2_insert_at_leaf(et, insert_rec, path_leaf_el(right_path),
4260 insert); 4185 insert);
4261 4186
4262 ret = ocfs2_journal_dirty(handle, leaf_bh); 4187 ocfs2_journal_dirty(handle, leaf_bh);
4263 if (ret)
4264 mlog_errno(ret);
4265 4188
4266 if (left_path) { 4189 if (left_path) {
4267 /* 4190 /*
@@ -4384,9 +4307,7 @@ out_update_clusters:
4384 ocfs2_et_update_clusters(et, 4307 ocfs2_et_update_clusters(et,
4385 le16_to_cpu(insert_rec->e_leaf_clusters)); 4308 le16_to_cpu(insert_rec->e_leaf_clusters));
4386 4309
4387 ret = ocfs2_journal_dirty(handle, et->et_root_bh); 4310 ocfs2_journal_dirty(handle, et->et_root_bh);
4388 if (ret)
4389 mlog_errno(ret);
4390 4311
4391out: 4312out:
4392 ocfs2_free_path(left_path); 4313 ocfs2_free_path(left_path);
@@ -4866,7 +4787,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
4866 goto leave; 4787 goto leave;
4867 } 4788 }
4868 4789
4869 status = __ocfs2_claim_clusters(osb, handle, data_ac, 1, 4790 status = __ocfs2_claim_clusters(handle, data_ac, 1,
4870 clusters_to_add, &bit_off, &num_bits); 4791 clusters_to_add, &bit_off, &num_bits);
4871 if (status < 0) { 4792 if (status < 0) {
4872 if (status != -ENOSPC) 4793 if (status != -ENOSPC)
@@ -4895,11 +4816,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
4895 goto leave; 4816 goto leave;
4896 } 4817 }
4897 4818
4898 status = ocfs2_journal_dirty(handle, et->et_root_bh); 4819 ocfs2_journal_dirty(handle, et->et_root_bh);
4899 if (status < 0) {
4900 mlog_errno(status);
4901 goto leave;
4902 }
4903 4820
4904 clusters_to_add -= num_bits; 4821 clusters_to_add -= num_bits;
4905 *logical_offset += num_bits; 4822 *logical_offset += num_bits;
@@ -5309,7 +5226,7 @@ static int ocfs2_split_tree(handle_t *handle, struct ocfs2_extent_tree *et,
5309 int index, u32 new_range, 5226 int index, u32 new_range,
5310 struct ocfs2_alloc_context *meta_ac) 5227 struct ocfs2_alloc_context *meta_ac)
5311{ 5228{
5312 int ret, depth, credits = handle->h_buffer_credits; 5229 int ret, depth, credits;
5313 struct buffer_head *last_eb_bh = NULL; 5230 struct buffer_head *last_eb_bh = NULL;
5314 struct ocfs2_extent_block *eb; 5231 struct ocfs2_extent_block *eb;
5315 struct ocfs2_extent_list *rightmost_el, *el; 5232 struct ocfs2_extent_list *rightmost_el, *el;
@@ -5340,8 +5257,8 @@ static int ocfs2_split_tree(handle_t *handle, struct ocfs2_extent_tree *et,
5340 } else 5257 } else
5341 rightmost_el = path_leaf_el(path); 5258 rightmost_el = path_leaf_el(path);
5342 5259
5343 credits += path->p_tree_depth + 5260 credits = path->p_tree_depth +
5344 ocfs2_extend_meta_needed(et->et_root_el); 5261 ocfs2_extend_meta_needed(et->et_root_el);
5345 ret = ocfs2_extend_trans(handle, credits); 5262 ret = ocfs2_extend_trans(handle, credits);
5346 if (ret) { 5263 if (ret) {
5347 mlog_errno(ret); 5264 mlog_errno(ret);
@@ -5671,19 +5588,97 @@ out:
5671 return ret; 5588 return ret;
5672} 5589}
5673 5590
5591/*
5592 * ocfs2_reserve_blocks_for_rec_trunc() would look basically the
5593 * same as ocfs2_lock_alloctors(), except for it accepts a blocks
5594 * number to reserve some extra blocks, and it only handles meta
5595 * data allocations.
5596 *
5597 * Currently, only ocfs2_remove_btree_range() uses it for truncating
5598 * and punching holes.
5599 */
5600static int ocfs2_reserve_blocks_for_rec_trunc(struct inode *inode,
5601 struct ocfs2_extent_tree *et,
5602 u32 extents_to_split,
5603 struct ocfs2_alloc_context **ac,
5604 int extra_blocks)
5605{
5606 int ret = 0, num_free_extents;
5607 unsigned int max_recs_needed = 2 * extents_to_split;
5608 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5609
5610 *ac = NULL;
5611
5612 num_free_extents = ocfs2_num_free_extents(osb, et);
5613 if (num_free_extents < 0) {
5614 ret = num_free_extents;
5615 mlog_errno(ret);
5616 goto out;
5617 }
5618
5619 if (!num_free_extents ||
5620 (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed))
5621 extra_blocks += ocfs2_extend_meta_needed(et->et_root_el);
5622
5623 if (extra_blocks) {
5624 ret = ocfs2_reserve_new_metadata_blocks(osb, extra_blocks, ac);
5625 if (ret < 0) {
5626 if (ret != -ENOSPC)
5627 mlog_errno(ret);
5628 goto out;
5629 }
5630 }
5631
5632out:
5633 if (ret) {
5634 if (*ac) {
5635 ocfs2_free_alloc_context(*ac);
5636 *ac = NULL;
5637 }
5638 }
5639
5640 return ret;
5641}
5642
5674int ocfs2_remove_btree_range(struct inode *inode, 5643int ocfs2_remove_btree_range(struct inode *inode,
5675 struct ocfs2_extent_tree *et, 5644 struct ocfs2_extent_tree *et,
5676 u32 cpos, u32 phys_cpos, u32 len, 5645 u32 cpos, u32 phys_cpos, u32 len, int flags,
5677 struct ocfs2_cached_dealloc_ctxt *dealloc) 5646 struct ocfs2_cached_dealloc_ctxt *dealloc,
5647 u64 refcount_loc)
5678{ 5648{
5679 int ret; 5649 int ret, credits = 0, extra_blocks = 0;
5680 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 5650 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
5681 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5651 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5682 struct inode *tl_inode = osb->osb_tl_inode; 5652 struct inode *tl_inode = osb->osb_tl_inode;
5683 handle_t *handle; 5653 handle_t *handle;
5684 struct ocfs2_alloc_context *meta_ac = NULL; 5654 struct ocfs2_alloc_context *meta_ac = NULL;
5655 struct ocfs2_refcount_tree *ref_tree = NULL;
5656
5657 if ((flags & OCFS2_EXT_REFCOUNTED) && len) {
5658 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
5659 OCFS2_HAS_REFCOUNT_FL));
5660
5661 ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
5662 &ref_tree, NULL);
5663 if (ret) {
5664 mlog_errno(ret);
5665 goto out;
5666 }
5685 5667
5686 ret = ocfs2_lock_allocators(inode, et, 0, 1, NULL, &meta_ac); 5668 ret = ocfs2_prepare_refcount_change_for_del(inode,
5669 refcount_loc,
5670 phys_blkno,
5671 len,
5672 &credits,
5673 &extra_blocks);
5674 if (ret < 0) {
5675 mlog_errno(ret);
5676 goto out;
5677 }
5678 }
5679
5680 ret = ocfs2_reserve_blocks_for_rec_trunc(inode, et, 1, &meta_ac,
5681 extra_blocks);
5687 if (ret) { 5682 if (ret) {
5688 mlog_errno(ret); 5683 mlog_errno(ret);
5689 return ret; 5684 return ret;
@@ -5699,7 +5694,8 @@ int ocfs2_remove_btree_range(struct inode *inode,
5699 } 5694 }
5700 } 5695 }
5701 5696
5702 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); 5697 handle = ocfs2_start_trans(osb,
5698 ocfs2_remove_extent_credits(osb->sb) + credits);
5703 if (IS_ERR(handle)) { 5699 if (IS_ERR(handle)) {
5704 ret = PTR_ERR(handle); 5700 ret = PTR_ERR(handle);
5705 mlog_errno(ret); 5701 mlog_errno(ret);
@@ -5724,15 +5720,22 @@ int ocfs2_remove_btree_range(struct inode *inode,
5724 5720
5725 ocfs2_et_update_clusters(et, -len); 5721 ocfs2_et_update_clusters(et, -len);
5726 5722
5727 ret = ocfs2_journal_dirty(handle, et->et_root_bh); 5723 ocfs2_journal_dirty(handle, et->et_root_bh);
5728 if (ret) {
5729 mlog_errno(ret);
5730 goto out_commit;
5731 }
5732 5724
5733 ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); 5725 if (phys_blkno) {
5734 if (ret) 5726 if (flags & OCFS2_EXT_REFCOUNTED)
5735 mlog_errno(ret); 5727 ret = ocfs2_decrease_refcount(inode, handle,
5728 ocfs2_blocks_to_clusters(osb->sb,
5729 phys_blkno),
5730 len, meta_ac,
5731 dealloc, 1);
5732 else
5733 ret = ocfs2_truncate_log_append(osb, handle,
5734 phys_blkno, len);
5735 if (ret)
5736 mlog_errno(ret);
5737
5738 }
5736 5739
5737out_commit: 5740out_commit:
5738 ocfs2_commit_trans(osb, handle); 5741 ocfs2_commit_trans(osb, handle);
@@ -5742,6 +5745,9 @@ out:
5742 if (meta_ac) 5745 if (meta_ac)
5743 ocfs2_free_alloc_context(meta_ac); 5746 ocfs2_free_alloc_context(meta_ac);
5744 5747
5748 if (ref_tree)
5749 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
5750
5745 return ret; 5751 return ret;
5746} 5752}
5747 5753
@@ -5850,11 +5856,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
5850 } 5856 }
5851 tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters); 5857 tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters);
5852 5858
5853 status = ocfs2_journal_dirty(handle, tl_bh); 5859 ocfs2_journal_dirty(handle, tl_bh);
5854 if (status < 0) {
5855 mlog_errno(status);
5856 goto bail;
5857 }
5858 5860
5859bail: 5861bail:
5860 mlog_exit(status); 5862 mlog_exit(status);
@@ -5893,11 +5895,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
5893 5895
5894 tl->tl_used = cpu_to_le16(i); 5896 tl->tl_used = cpu_to_le16(i);
5895 5897
5896 status = ocfs2_journal_dirty(handle, tl_bh); 5898 ocfs2_journal_dirty(handle, tl_bh);
5897 if (status < 0) {
5898 mlog_errno(status);
5899 goto bail;
5900 }
5901 5899
5902 /* TODO: Perhaps we can calculate the bulk of the 5900 /* TODO: Perhaps we can calculate the bulk of the
5903 * credits up front rather than extending like 5901 * credits up front rather than extending like
@@ -6298,6 +6296,7 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
6298 */ 6296 */
6299struct ocfs2_cached_block_free { 6297struct ocfs2_cached_block_free {
6300 struct ocfs2_cached_block_free *free_next; 6298 struct ocfs2_cached_block_free *free_next;
6299 u64 free_bg;
6301 u64 free_blk; 6300 u64 free_blk;
6302 unsigned int free_bit; 6301 unsigned int free_bit;
6303}; 6302};
@@ -6344,8 +6343,11 @@ static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
6344 } 6343 }
6345 6344
6346 while (head) { 6345 while (head) {
6347 bg_blkno = ocfs2_which_suballoc_group(head->free_blk, 6346 if (head->free_bg)
6348 head->free_bit); 6347 bg_blkno = head->free_bg;
6348 else
6349 bg_blkno = ocfs2_which_suballoc_group(head->free_blk,
6350 head->free_bit);
6349 mlog(0, "Free bit: (bit %u, blkno %llu)\n", 6351 mlog(0, "Free bit: (bit %u, blkno %llu)\n",
6350 head->free_bit, (unsigned long long)head->free_blk); 6352 head->free_bit, (unsigned long long)head->free_blk);
6351 6353
@@ -6393,7 +6395,7 @@ int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6393 int ret = 0; 6395 int ret = 0;
6394 struct ocfs2_cached_block_free *item; 6396 struct ocfs2_cached_block_free *item;
6395 6397
6396 item = kmalloc(sizeof(*item), GFP_NOFS); 6398 item = kzalloc(sizeof(*item), GFP_NOFS);
6397 if (item == NULL) { 6399 if (item == NULL) {
6398 ret = -ENOMEM; 6400 ret = -ENOMEM;
6399 mlog_errno(ret); 6401 mlog_errno(ret);
@@ -6533,8 +6535,8 @@ ocfs2_find_per_slot_free_list(int type,
6533} 6535}
6534 6536
6535int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, 6537int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6536 int type, int slot, u64 blkno, 6538 int type, int slot, u64 suballoc,
6537 unsigned int bit) 6539 u64 blkno, unsigned int bit)
6538{ 6540{
6539 int ret; 6541 int ret;
6540 struct ocfs2_per_slot_free_list *fl; 6542 struct ocfs2_per_slot_free_list *fl;
@@ -6547,7 +6549,7 @@ int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6547 goto out; 6549 goto out;
6548 } 6550 }
6549 6551
6550 item = kmalloc(sizeof(*item), GFP_NOFS); 6552 item = kzalloc(sizeof(*item), GFP_NOFS);
6551 if (item == NULL) { 6553 if (item == NULL) {
6552 ret = -ENOMEM; 6554 ret = -ENOMEM;
6553 mlog_errno(ret); 6555 mlog_errno(ret);
@@ -6557,6 +6559,7 @@ int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6557 mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n", 6559 mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n",
6558 type, slot, bit, (unsigned long long)blkno); 6560 type, slot, bit, (unsigned long long)blkno);
6559 6561
6562 item->free_bg = suballoc;
6560 item->free_blk = blkno; 6563 item->free_blk = blkno;
6561 item->free_bit = bit; 6564 item->free_bit = bit;
6562 item->free_next = fl->f_first; 6565 item->free_next = fl->f_first;
@@ -6573,433 +6576,11 @@ static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
6573{ 6576{
6574 return ocfs2_cache_block_dealloc(ctxt, EXTENT_ALLOC_SYSTEM_INODE, 6577 return ocfs2_cache_block_dealloc(ctxt, EXTENT_ALLOC_SYSTEM_INODE,
6575 le16_to_cpu(eb->h_suballoc_slot), 6578 le16_to_cpu(eb->h_suballoc_slot),
6579 le64_to_cpu(eb->h_suballoc_loc),
6576 le64_to_cpu(eb->h_blkno), 6580 le64_to_cpu(eb->h_blkno),
6577 le16_to_cpu(eb->h_suballoc_bit)); 6581 le16_to_cpu(eb->h_suballoc_bit));
6578} 6582}
6579 6583
6580/* This function will figure out whether the currently last extent
6581 * block will be deleted, and if it will, what the new last extent
6582 * block will be so we can update his h_next_leaf_blk field, as well
6583 * as the dinodes i_last_eb_blk */
6584static int ocfs2_find_new_last_ext_blk(struct inode *inode,
6585 unsigned int clusters_to_del,
6586 struct ocfs2_path *path,
6587 struct buffer_head **new_last_eb)
6588{
6589 int next_free, ret = 0;
6590 u32 cpos;
6591 struct ocfs2_extent_rec *rec;
6592 struct ocfs2_extent_block *eb;
6593 struct ocfs2_extent_list *el;
6594 struct buffer_head *bh = NULL;
6595
6596 *new_last_eb = NULL;
6597
6598 /* we have no tree, so of course, no last_eb. */
6599 if (!path->p_tree_depth)
6600 goto out;
6601
6602 /* trunc to zero special case - this makes tree_depth = 0
6603 * regardless of what it is. */
6604 if (OCFS2_I(inode)->ip_clusters == clusters_to_del)
6605 goto out;
6606
6607 el = path_leaf_el(path);
6608 BUG_ON(!el->l_next_free_rec);
6609
6610 /*
6611 * Make sure that this extent list will actually be empty
6612 * after we clear away the data. We can shortcut out if
6613 * there's more than one non-empty extent in the
6614 * list. Otherwise, a check of the remaining extent is
6615 * necessary.
6616 */
6617 next_free = le16_to_cpu(el->l_next_free_rec);
6618 rec = NULL;
6619 if (ocfs2_is_empty_extent(&el->l_recs[0])) {
6620 if (next_free > 2)
6621 goto out;
6622
6623 /* We may have a valid extent in index 1, check it. */
6624 if (next_free == 2)
6625 rec = &el->l_recs[1];
6626
6627 /*
6628 * Fall through - no more nonempty extents, so we want
6629 * to delete this leaf.
6630 */
6631 } else {
6632 if (next_free > 1)
6633 goto out;
6634
6635 rec = &el->l_recs[0];
6636 }
6637
6638 if (rec) {
6639 /*
6640 * Check it we'll only be trimming off the end of this
6641 * cluster.
6642 */
6643 if (le16_to_cpu(rec->e_leaf_clusters) > clusters_to_del)
6644 goto out;
6645 }
6646
6647 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos);
6648 if (ret) {
6649 mlog_errno(ret);
6650 goto out;
6651 }
6652
6653 ret = ocfs2_find_leaf(INODE_CACHE(inode), path_root_el(path), cpos, &bh);
6654 if (ret) {
6655 mlog_errno(ret);
6656 goto out;
6657 }
6658
6659 eb = (struct ocfs2_extent_block *) bh->b_data;
6660 el = &eb->h_list;
6661
6662 /* ocfs2_find_leaf() gets the eb from ocfs2_read_extent_block().
6663 * Any corruption is a code bug. */
6664 BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
6665
6666 *new_last_eb = bh;
6667 get_bh(*new_last_eb);
6668 mlog(0, "returning block %llu, (cpos: %u)\n",
6669 (unsigned long long)le64_to_cpu(eb->h_blkno), cpos);
6670out:
6671 brelse(bh);
6672
6673 return ret;
6674}
6675
6676/*
6677 * Trim some clusters off the rightmost edge of a tree. Only called
6678 * during truncate.
6679 *
6680 * The caller needs to:
6681 * - start journaling of each path component.
6682 * - compute and fully set up any new last ext block
6683 */
6684static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path,
6685 handle_t *handle, struct ocfs2_truncate_context *tc,
6686 u32 clusters_to_del, u64 *delete_start, u8 *flags)
6687{
6688 int ret, i, index = path->p_tree_depth;
6689 u32 new_edge = 0;
6690 u64 deleted_eb = 0;
6691 struct buffer_head *bh;
6692 struct ocfs2_extent_list *el;
6693 struct ocfs2_extent_rec *rec;
6694
6695 *delete_start = 0;
6696 *flags = 0;
6697
6698 while (index >= 0) {
6699 bh = path->p_node[index].bh;
6700 el = path->p_node[index].el;
6701
6702 mlog(0, "traveling tree (index = %d, block = %llu)\n",
6703 index, (unsigned long long)bh->b_blocknr);
6704
6705 BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0);
6706
6707 if (index !=
6708 (path->p_tree_depth - le16_to_cpu(el->l_tree_depth))) {
6709 ocfs2_error(inode->i_sb,
6710 "Inode %lu has invalid ext. block %llu",
6711 inode->i_ino,
6712 (unsigned long long)bh->b_blocknr);
6713 ret = -EROFS;
6714 goto out;
6715 }
6716
6717find_tail_record:
6718 i = le16_to_cpu(el->l_next_free_rec) - 1;
6719 rec = &el->l_recs[i];
6720
6721 mlog(0, "Extent list before: record %d: (%u, %u, %llu), "
6722 "next = %u\n", i, le32_to_cpu(rec->e_cpos),
6723 ocfs2_rec_clusters(el, rec),
6724 (unsigned long long)le64_to_cpu(rec->e_blkno),
6725 le16_to_cpu(el->l_next_free_rec));
6726
6727 BUG_ON(ocfs2_rec_clusters(el, rec) < clusters_to_del);
6728
6729 if (le16_to_cpu(el->l_tree_depth) == 0) {
6730 /*
6731 * If the leaf block contains a single empty
6732 * extent and no records, we can just remove
6733 * the block.
6734 */
6735 if (i == 0 && ocfs2_is_empty_extent(rec)) {
6736 memset(rec, 0,
6737 sizeof(struct ocfs2_extent_rec));
6738 el->l_next_free_rec = cpu_to_le16(0);
6739
6740 goto delete;
6741 }
6742
6743 /*
6744 * Remove any empty extents by shifting things
6745 * left. That should make life much easier on
6746 * the code below. This condition is rare
6747 * enough that we shouldn't see a performance
6748 * hit.
6749 */
6750 if (ocfs2_is_empty_extent(&el->l_recs[0])) {
6751 le16_add_cpu(&el->l_next_free_rec, -1);
6752
6753 for(i = 0;
6754 i < le16_to_cpu(el->l_next_free_rec); i++)
6755 el->l_recs[i] = el->l_recs[i + 1];
6756
6757 memset(&el->l_recs[i], 0,
6758 sizeof(struct ocfs2_extent_rec));
6759
6760 /*
6761 * We've modified our extent list. The
6762 * simplest way to handle this change
6763 * is to being the search from the
6764 * start again.
6765 */
6766 goto find_tail_record;
6767 }
6768
6769 le16_add_cpu(&rec->e_leaf_clusters, -clusters_to_del);
6770
6771 /*
6772 * We'll use "new_edge" on our way back up the
6773 * tree to know what our rightmost cpos is.
6774 */
6775 new_edge = le16_to_cpu(rec->e_leaf_clusters);
6776 new_edge += le32_to_cpu(rec->e_cpos);
6777
6778 /*
6779 * The caller will use this to delete data blocks.
6780 */
6781 *delete_start = le64_to_cpu(rec->e_blkno)
6782 + ocfs2_clusters_to_blocks(inode->i_sb,
6783 le16_to_cpu(rec->e_leaf_clusters));
6784 *flags = rec->e_flags;
6785
6786 /*
6787 * If it's now empty, remove this record.
6788 */
6789 if (le16_to_cpu(rec->e_leaf_clusters) == 0) {
6790 memset(rec, 0,
6791 sizeof(struct ocfs2_extent_rec));
6792 le16_add_cpu(&el->l_next_free_rec, -1);
6793 }
6794 } else {
6795 if (le64_to_cpu(rec->e_blkno) == deleted_eb) {
6796 memset(rec, 0,
6797 sizeof(struct ocfs2_extent_rec));
6798 le16_add_cpu(&el->l_next_free_rec, -1);
6799
6800 goto delete;
6801 }
6802
6803 /* Can this actually happen? */
6804 if (le16_to_cpu(el->l_next_free_rec) == 0)
6805 goto delete;
6806
6807 /*
6808 * We never actually deleted any clusters
6809 * because our leaf was empty. There's no
6810 * reason to adjust the rightmost edge then.
6811 */
6812 if (new_edge == 0)
6813 goto delete;
6814
6815 rec->e_int_clusters = cpu_to_le32(new_edge);
6816 le32_add_cpu(&rec->e_int_clusters,
6817 -le32_to_cpu(rec->e_cpos));
6818
6819 /*
6820 * A deleted child record should have been
6821 * caught above.
6822 */
6823 BUG_ON(le32_to_cpu(rec->e_int_clusters) == 0);
6824 }
6825
6826delete:
6827 ret = ocfs2_journal_dirty(handle, bh);
6828 if (ret) {
6829 mlog_errno(ret);
6830 goto out;
6831 }
6832
6833 mlog(0, "extent list container %llu, after: record %d: "
6834 "(%u, %u, %llu), next = %u.\n",
6835 (unsigned long long)bh->b_blocknr, i,
6836 le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec),
6837 (unsigned long long)le64_to_cpu(rec->e_blkno),
6838 le16_to_cpu(el->l_next_free_rec));
6839
6840 /*
6841 * We must be careful to only attempt delete of an
6842 * extent block (and not the root inode block).
6843 */
6844 if (index > 0 && le16_to_cpu(el->l_next_free_rec) == 0) {
6845 struct ocfs2_extent_block *eb =
6846 (struct ocfs2_extent_block *)bh->b_data;
6847
6848 /*
6849 * Save this for use when processing the
6850 * parent block.
6851 */
6852 deleted_eb = le64_to_cpu(eb->h_blkno);
6853
6854 mlog(0, "deleting this extent block.\n");
6855
6856 ocfs2_remove_from_cache(INODE_CACHE(inode), bh);
6857
6858 BUG_ON(ocfs2_rec_clusters(el, &el->l_recs[0]));
6859 BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos));
6860 BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno));
6861
6862 ret = ocfs2_cache_extent_block_free(&tc->tc_dealloc, eb);
6863 /* An error here is not fatal. */
6864 if (ret < 0)
6865 mlog_errno(ret);
6866 } else {
6867 deleted_eb = 0;
6868 }
6869
6870 index--;
6871 }
6872
6873 ret = 0;
6874out:
6875 return ret;
6876}
6877
6878static int ocfs2_do_truncate(struct ocfs2_super *osb,
6879 unsigned int clusters_to_del,
6880 struct inode *inode,
6881 struct buffer_head *fe_bh,
6882 handle_t *handle,
6883 struct ocfs2_truncate_context *tc,
6884 struct ocfs2_path *path,
6885 struct ocfs2_alloc_context *meta_ac)
6886{
6887 int status;
6888 struct ocfs2_dinode *fe;
6889 struct ocfs2_extent_block *last_eb = NULL;
6890 struct ocfs2_extent_list *el;
6891 struct buffer_head *last_eb_bh = NULL;
6892 u64 delete_blk = 0;
6893 u8 rec_flags;
6894
6895 fe = (struct ocfs2_dinode *) fe_bh->b_data;
6896
6897 status = ocfs2_find_new_last_ext_blk(inode, clusters_to_del,
6898 path, &last_eb_bh);
6899 if (status < 0) {
6900 mlog_errno(status);
6901 goto bail;
6902 }
6903
6904 /*
6905 * Each component will be touched, so we might as well journal
6906 * here to avoid having to handle errors later.
6907 */
6908 status = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
6909 if (status < 0) {
6910 mlog_errno(status);
6911 goto bail;
6912 }
6913
6914 if (last_eb_bh) {
6915 status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), last_eb_bh,
6916 OCFS2_JOURNAL_ACCESS_WRITE);
6917 if (status < 0) {
6918 mlog_errno(status);
6919 goto bail;
6920 }
6921
6922 last_eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
6923 }
6924
6925 el = &(fe->id2.i_list);
6926
6927 /*
6928 * Lower levels depend on this never happening, but it's best
6929 * to check it up here before changing the tree.
6930 */
6931 if (el->l_tree_depth && el->l_recs[0].e_int_clusters == 0) {
6932 ocfs2_error(inode->i_sb,
6933 "Inode %lu has an empty extent record, depth %u\n",
6934 inode->i_ino, le16_to_cpu(el->l_tree_depth));
6935 status = -EROFS;
6936 goto bail;
6937 }
6938
6939 dquot_free_space_nodirty(inode,
6940 ocfs2_clusters_to_bytes(osb->sb, clusters_to_del));
6941 spin_lock(&OCFS2_I(inode)->ip_lock);
6942 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) -
6943 clusters_to_del;
6944 spin_unlock(&OCFS2_I(inode)->ip_lock);
6945 le32_add_cpu(&fe->i_clusters, -clusters_to_del);
6946 inode->i_blocks = ocfs2_inode_sector_count(inode);
6947
6948 status = ocfs2_trim_tree(inode, path, handle, tc,
6949 clusters_to_del, &delete_blk, &rec_flags);
6950 if (status) {
6951 mlog_errno(status);
6952 goto bail;
6953 }
6954
6955 if (le32_to_cpu(fe->i_clusters) == 0) {
6956 /* trunc to zero is a special case. */
6957 el->l_tree_depth = 0;
6958 fe->i_last_eb_blk = 0;
6959 } else if (last_eb)
6960 fe->i_last_eb_blk = last_eb->h_blkno;
6961
6962 status = ocfs2_journal_dirty(handle, fe_bh);
6963 if (status < 0) {
6964 mlog_errno(status);
6965 goto bail;
6966 }
6967
6968 if (last_eb) {
6969 /* If there will be a new last extent block, then by
6970 * definition, there cannot be any leaves to the right of
6971 * him. */
6972 last_eb->h_next_leaf_blk = 0;
6973 status = ocfs2_journal_dirty(handle, last_eb_bh);
6974 if (status < 0) {
6975 mlog_errno(status);
6976 goto bail;
6977 }
6978 }
6979
6980 if (delete_blk) {
6981 if (rec_flags & OCFS2_EXT_REFCOUNTED)
6982 status = ocfs2_decrease_refcount(inode, handle,
6983 ocfs2_blocks_to_clusters(osb->sb,
6984 delete_blk),
6985 clusters_to_del, meta_ac,
6986 &tc->tc_dealloc, 1);
6987 else
6988 status = ocfs2_truncate_log_append(osb, handle,
6989 delete_blk,
6990 clusters_to_del);
6991 if (status < 0) {
6992 mlog_errno(status);
6993 goto bail;
6994 }
6995 }
6996 status = 0;
6997bail:
6998 brelse(last_eb_bh);
6999 mlog_exit(status);
7000 return status;
7001}
7002
7003static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh) 6584static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh)
7004{ 6585{
7005 set_buffer_uptodate(bh); 6586 set_buffer_uptodate(bh);
@@ -7307,7 +6888,9 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
7307 goto out_commit; 6888 goto out_commit;
7308 did_quota = 1; 6889 did_quota = 1;
7309 6890
7310 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, 6891 data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
6892
6893 ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off,
7311 &num); 6894 &num);
7312 if (ret) { 6895 if (ret) {
7313 mlog_errno(ret); 6896 mlog_errno(ret);
@@ -7406,26 +6989,29 @@ out:
7406 */ 6989 */
7407int ocfs2_commit_truncate(struct ocfs2_super *osb, 6990int ocfs2_commit_truncate(struct ocfs2_super *osb,
7408 struct inode *inode, 6991 struct inode *inode,
7409 struct buffer_head *fe_bh, 6992 struct buffer_head *di_bh)
7410 struct ocfs2_truncate_context *tc)
7411{ 6993{
7412 int status, i, credits, tl_sem = 0; 6994 int status = 0, i, flags = 0;
7413 u32 clusters_to_del, new_highest_cpos, range; 6995 u32 new_highest_cpos, range, trunc_cpos, trunc_len, phys_cpos, coff;
7414 u64 blkno = 0; 6996 u64 blkno = 0;
7415 struct ocfs2_extent_list *el; 6997 struct ocfs2_extent_list *el;
7416 handle_t *handle = NULL; 6998 struct ocfs2_extent_rec *rec;
7417 struct inode *tl_inode = osb->osb_tl_inode;
7418 struct ocfs2_path *path = NULL; 6999 struct ocfs2_path *path = NULL;
7419 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 7000 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
7420 struct ocfs2_alloc_context *meta_ac = NULL; 7001 struct ocfs2_extent_list *root_el = &(di->id2.i_list);
7421 struct ocfs2_refcount_tree *ref_tree = NULL; 7002 u64 refcount_loc = le64_to_cpu(di->i_refcount_loc);
7003 struct ocfs2_extent_tree et;
7004 struct ocfs2_cached_dealloc_ctxt dealloc;
7422 7005
7423 mlog_entry_void(); 7006 mlog_entry_void();
7424 7007
7008 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
7009 ocfs2_init_dealloc_ctxt(&dealloc);
7010
7425 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, 7011 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb,
7426 i_size_read(inode)); 7012 i_size_read(inode));
7427 7013
7428 path = ocfs2_new_path(fe_bh, &di->id2.i_list, 7014 path = ocfs2_new_path(di_bh, &di->id2.i_list,
7429 ocfs2_journal_access_di); 7015 ocfs2_journal_access_di);
7430 if (!path) { 7016 if (!path) {
7431 status = -ENOMEM; 7017 status = -ENOMEM;
@@ -7444,8 +7030,6 @@ start:
7444 goto bail; 7030 goto bail;
7445 } 7031 }
7446 7032
7447 credits = 0;
7448
7449 /* 7033 /*
7450 * Truncate always works against the rightmost tree branch. 7034 * Truncate always works against the rightmost tree branch.
7451 */ 7035 */
@@ -7480,101 +7064,62 @@ start:
7480 } 7064 }
7481 7065
7482 i = le16_to_cpu(el->l_next_free_rec) - 1; 7066 i = le16_to_cpu(el->l_next_free_rec) - 1;
7483 range = le32_to_cpu(el->l_recs[i].e_cpos) + 7067 rec = &el->l_recs[i];
7484 ocfs2_rec_clusters(el, &el->l_recs[i]); 7068 flags = rec->e_flags;
7485 if (i == 0 && ocfs2_is_empty_extent(&el->l_recs[i])) { 7069 range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
7486 clusters_to_del = 0; 7070
7487 } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) { 7071 if (i == 0 && ocfs2_is_empty_extent(rec)) {
7488 clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]); 7072 /*
7489 blkno = le64_to_cpu(el->l_recs[i].e_blkno); 7073 * Lower levels depend on this never happening, but it's best
7074 * to check it up here before changing the tree.
7075 */
7076 if (root_el->l_tree_depth && rec->e_int_clusters == 0) {
7077 ocfs2_error(inode->i_sb, "Inode %lu has an empty "
7078 "extent record, depth %u\n", inode->i_ino,
7079 le16_to_cpu(root_el->l_tree_depth));
7080 status = -EROFS;
7081 goto bail;
7082 }
7083 trunc_cpos = le32_to_cpu(rec->e_cpos);
7084 trunc_len = 0;
7085 blkno = 0;
7086 } else if (le32_to_cpu(rec->e_cpos) >= new_highest_cpos) {
7087 /*
7088 * Truncate entire record.
7089 */
7090 trunc_cpos = le32_to_cpu(rec->e_cpos);
7091 trunc_len = ocfs2_rec_clusters(el, rec);
7092 blkno = le64_to_cpu(rec->e_blkno);
7490 } else if (range > new_highest_cpos) { 7093 } else if (range > new_highest_cpos) {
7491 clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) + 7094 /*
7492 le32_to_cpu(el->l_recs[i].e_cpos)) - 7095 * Partial truncate. it also should be
7493 new_highest_cpos; 7096 * the last truncate we're doing.
7494 blkno = le64_to_cpu(el->l_recs[i].e_blkno) + 7097 */
7495 ocfs2_clusters_to_blocks(inode->i_sb, 7098 trunc_cpos = new_highest_cpos;
7496 ocfs2_rec_clusters(el, &el->l_recs[i]) - 7099 trunc_len = range - new_highest_cpos;
7497 clusters_to_del); 7100 coff = new_highest_cpos - le32_to_cpu(rec->e_cpos);
7101 blkno = le64_to_cpu(rec->e_blkno) +
7102 ocfs2_clusters_to_blocks(inode->i_sb, coff);
7498 } else { 7103 } else {
7104 /*
7105 * Truncate completed, leave happily.
7106 */
7499 status = 0; 7107 status = 0;
7500 goto bail; 7108 goto bail;
7501 } 7109 }
7502 7110
7503 mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n", 7111 phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
7504 clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr);
7505
7506 if (el->l_recs[i].e_flags & OCFS2_EXT_REFCOUNTED && clusters_to_del) {
7507 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
7508 OCFS2_HAS_REFCOUNT_FL));
7509
7510 status = ocfs2_lock_refcount_tree(osb,
7511 le64_to_cpu(di->i_refcount_loc),
7512 1, &ref_tree, NULL);
7513 if (status) {
7514 mlog_errno(status);
7515 goto bail;
7516 }
7517
7518 status = ocfs2_prepare_refcount_change_for_del(inode, fe_bh,
7519 blkno,
7520 clusters_to_del,
7521 &credits,
7522 &meta_ac);
7523 if (status < 0) {
7524 mlog_errno(status);
7525 goto bail;
7526 }
7527 }
7528
7529 mutex_lock(&tl_inode->i_mutex);
7530 tl_sem = 1;
7531 /* ocfs2_truncate_log_needs_flush guarantees us at least one
7532 * record is free for use. If there isn't any, we flush to get
7533 * an empty truncate log. */
7534 if (ocfs2_truncate_log_needs_flush(osb)) {
7535 status = __ocfs2_flush_truncate_log(osb);
7536 if (status < 0) {
7537 mlog_errno(status);
7538 goto bail;
7539 }
7540 }
7541 7112
7542 credits += ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del, 7113 status = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
7543 (struct ocfs2_dinode *)fe_bh->b_data, 7114 phys_cpos, trunc_len, flags, &dealloc,
7544 el); 7115 refcount_loc);
7545 handle = ocfs2_start_trans(osb, credits);
7546 if (IS_ERR(handle)) {
7547 status = PTR_ERR(handle);
7548 handle = NULL;
7549 mlog_errno(status);
7550 goto bail;
7551 }
7552
7553 status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle,
7554 tc, path, meta_ac);
7555 if (status < 0) { 7116 if (status < 0) {
7556 mlog_errno(status); 7117 mlog_errno(status);
7557 goto bail; 7118 goto bail;
7558 } 7119 }
7559 7120
7560 mutex_unlock(&tl_inode->i_mutex);
7561 tl_sem = 0;
7562
7563 ocfs2_commit_trans(osb, handle);
7564 handle = NULL;
7565
7566 ocfs2_reinit_path(path, 1); 7121 ocfs2_reinit_path(path, 1);
7567 7122
7568 if (meta_ac) {
7569 ocfs2_free_alloc_context(meta_ac);
7570 meta_ac = NULL;
7571 }
7572
7573 if (ref_tree) {
7574 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
7575 ref_tree = NULL;
7576 }
7577
7578 /* 7123 /*
7579 * The check above will catch the case where we've truncated 7124 * The check above will catch the case where we've truncated
7580 * away all allocation. 7125 * away all allocation.
@@ -7585,25 +7130,10 @@ bail:
7585 7130
7586 ocfs2_schedule_truncate_log_flush(osb, 1); 7131 ocfs2_schedule_truncate_log_flush(osb, 1);
7587 7132
7588 if (tl_sem) 7133 ocfs2_run_deallocs(osb, &dealloc);
7589 mutex_unlock(&tl_inode->i_mutex);
7590
7591 if (handle)
7592 ocfs2_commit_trans(osb, handle);
7593
7594 if (meta_ac)
7595 ocfs2_free_alloc_context(meta_ac);
7596
7597 if (ref_tree)
7598 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
7599
7600 ocfs2_run_deallocs(osb, &tc->tc_dealloc);
7601 7134
7602 ocfs2_free_path(path); 7135 ocfs2_free_path(path);
7603 7136
7604 /* This will drop the ext_alloc cluster lock for us */
7605 ocfs2_free_truncate_context(tc);
7606
7607 mlog_exit(status); 7137 mlog_exit(status);
7608 return status; 7138 return status;
7609} 7139}
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 1db4359ccb90..55762b554b99 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -140,8 +140,9 @@ int ocfs2_remove_extent(handle_t *handle, struct ocfs2_extent_tree *et,
140 struct ocfs2_cached_dealloc_ctxt *dealloc); 140 struct ocfs2_cached_dealloc_ctxt *dealloc);
141int ocfs2_remove_btree_range(struct inode *inode, 141int ocfs2_remove_btree_range(struct inode *inode,
142 struct ocfs2_extent_tree *et, 142 struct ocfs2_extent_tree *et,
143 u32 cpos, u32 phys_cpos, u32 len, 143 u32 cpos, u32 phys_cpos, u32 len, int flags,
144 struct ocfs2_cached_dealloc_ctxt *dealloc); 144 struct ocfs2_cached_dealloc_ctxt *dealloc,
145 u64 refcount_loc);
145 146
146int ocfs2_num_free_extents(struct ocfs2_super *osb, 147int ocfs2_num_free_extents(struct ocfs2_super *osb,
147 struct ocfs2_extent_tree *et); 148 struct ocfs2_extent_tree *et);
@@ -209,7 +210,7 @@ static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
209int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, 210int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
210 u64 blkno, unsigned int bit); 211 u64 blkno, unsigned int bit);
211int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, 212int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
212 int type, int slot, u64 blkno, 213 int type, int slot, u64 suballoc, u64 blkno,
213 unsigned int bit); 214 unsigned int bit);
214static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c) 215static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c)
215{ 216{
@@ -233,8 +234,7 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
233 struct ocfs2_truncate_context **tc); 234 struct ocfs2_truncate_context **tc);
234int ocfs2_commit_truncate(struct ocfs2_super *osb, 235int ocfs2_commit_truncate(struct ocfs2_super *osb,
235 struct inode *inode, 236 struct inode *inode,
236 struct buffer_head *fe_bh, 237 struct buffer_head *di_bh);
237 struct ocfs2_truncate_context *tc);
238int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, 238int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
239 unsigned int start, unsigned int end, int trunc); 239 unsigned int start, unsigned int end, int trunc);
240 240
@@ -319,6 +319,8 @@ int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
319 struct ocfs2_path *path); 319 struct ocfs2_path *path);
320int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, 320int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
321 struct ocfs2_path *path, u32 *cpos); 321 struct ocfs2_path *path, u32 *cpos);
322int ocfs2_find_cpos_for_left_leaf(struct super_block *sb,
323 struct ocfs2_path *path, u32 *cpos);
322int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et, 324int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
323 struct ocfs2_path *left, 325 struct ocfs2_path *left,
324 struct ocfs2_path *right); 326 struct ocfs2_path *right);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 21441ddb5506..3623ca20cc18 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1735,6 +1735,9 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1735 goto out; 1735 goto out;
1736 } 1736 }
1737 1737
1738 if (data_ac)
1739 data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
1740
1738 credits = ocfs2_calc_extend_credits(inode->i_sb, 1741 credits = ocfs2_calc_extend_credits(inode->i_sb,
1739 &di->id2.i_list, 1742 &di->id2.i_list,
1740 clusters_to_alloc); 1743 clusters_to_alloc);
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index ecebb2276790..f9d5d3ffc75a 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -406,6 +406,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
406 struct buffer_head *bh) 406 struct buffer_head *bh)
407{ 407{
408 int ret = 0; 408 int ret = 0;
409 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
409 410
410 mlog_entry_void(); 411 mlog_entry_void();
411 412
@@ -425,6 +426,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
425 426
426 get_bh(bh); /* for end_buffer_write_sync() */ 427 get_bh(bh); /* for end_buffer_write_sync() */
427 bh->b_end_io = end_buffer_write_sync; 428 bh->b_end_io = end_buffer_write_sync;
429 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check);
428 submit_bh(WRITE, bh); 430 submit_bh(WRITE, bh);
429 431
430 wait_on_buffer(bh); 432 wait_on_buffer(bh);
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index 3bb928a2bf7d..c7fba396392d 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -116,6 +116,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
116 define_mask(ERROR), 116 define_mask(ERROR),
117 define_mask(NOTICE), 117 define_mask(NOTICE),
118 define_mask(KTHREAD), 118 define_mask(KTHREAD),
119 define_mask(RESERVATIONS),
119}; 120};
120 121
121static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, }; 122static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 3dfddbec32f2..fd96e2a2fa56 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -119,6 +119,7 @@
119#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ 119#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
120#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ 120#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
121#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ 121#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */
122#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */
122 123
123#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) 124#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
124#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) 125#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 73e743eea2c8..aa75ca3f78da 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -583,6 +583,9 @@ static void o2net_state_change(struct sock *sk)
583 o2net_sc_queue_work(sc, &sc->sc_connect_work); 583 o2net_sc_queue_work(sc, &sc->sc_connect_work);
584 break; 584 break;
585 default: 585 default:
586 printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT
587 " shutdown, state %d\n",
588 SC_NODEF_ARGS(sc), sk->sk_state);
586 o2net_sc_queue_work(sc, &sc->sc_shutdown_work); 589 o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
587 break; 590 break;
588 } 591 }
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index efd77d071c80..f04ebcfffc4a 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1194,7 +1194,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
1194 else 1194 else
1195 de->inode = 0; 1195 de->inode = 0;
1196 dir->i_version++; 1196 dir->i_version++;
1197 status = ocfs2_journal_dirty(handle, bh); 1197 ocfs2_journal_dirty(handle, bh);
1198 goto bail; 1198 goto bail;
1199 } 1199 }
1200 i += le16_to_cpu(de->rec_len); 1200 i += le16_to_cpu(de->rec_len);
@@ -1752,7 +1752,7 @@ int __ocfs2_add_entry(handle_t *handle,
1752 ocfs2_recalc_free_list(dir, handle, lookup); 1752 ocfs2_recalc_free_list(dir, handle, lookup);
1753 1753
1754 dir->i_version++; 1754 dir->i_version++;
1755 status = ocfs2_journal_dirty(handle, insert_bh); 1755 ocfs2_journal_dirty(handle, insert_bh);
1756 retval = 0; 1756 retval = 0;
1757 goto bail; 1757 goto bail;
1758 } 1758 }
@@ -2297,12 +2297,7 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
2297 } 2297 }
2298 2298
2299 ocfs2_fill_initial_dirents(inode, parent, data->id_data, size); 2299 ocfs2_fill_initial_dirents(inode, parent, data->id_data, size);
2300
2301 ocfs2_journal_dirty(handle, di_bh); 2300 ocfs2_journal_dirty(handle, di_bh);
2302 if (ret) {
2303 mlog_errno(ret);
2304 goto out;
2305 }
2306 2301
2307 i_size_write(inode, size); 2302 i_size_write(inode, size);
2308 inode->i_nlink = 2; 2303 inode->i_nlink = 2;
@@ -2366,11 +2361,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
2366 ocfs2_init_dir_trailer(inode, new_bh, size); 2361 ocfs2_init_dir_trailer(inode, new_bh, size);
2367 } 2362 }
2368 2363
2369 status = ocfs2_journal_dirty(handle, new_bh); 2364 ocfs2_journal_dirty(handle, new_bh);
2370 if (status < 0) {
2371 mlog_errno(status);
2372 goto bail;
2373 }
2374 2365
2375 i_size_write(inode, inode->i_sb->s_blocksize); 2366 i_size_write(inode, inode->i_sb->s_blocksize);
2376 inode->i_nlink = 2; 2367 inode->i_nlink = 2;
@@ -2404,15 +2395,15 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2404 int ret; 2395 int ret;
2405 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; 2396 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
2406 u16 dr_suballoc_bit; 2397 u16 dr_suballoc_bit;
2407 u64 dr_blkno; 2398 u64 suballoc_loc, dr_blkno;
2408 unsigned int num_bits; 2399 unsigned int num_bits;
2409 struct buffer_head *dx_root_bh = NULL; 2400 struct buffer_head *dx_root_bh = NULL;
2410 struct ocfs2_dx_root_block *dx_root; 2401 struct ocfs2_dx_root_block *dx_root;
2411 struct ocfs2_dir_block_trailer *trailer = 2402 struct ocfs2_dir_block_trailer *trailer =
2412 ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb); 2403 ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb);
2413 2404
2414 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, &dr_suballoc_bit, 2405 ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
2415 &num_bits, &dr_blkno); 2406 &dr_suballoc_bit, &num_bits, &dr_blkno);
2416 if (ret) { 2407 if (ret) {
2417 mlog_errno(ret); 2408 mlog_errno(ret);
2418 goto out; 2409 goto out;
@@ -2440,6 +2431,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2440 memset(dx_root, 0, osb->sb->s_blocksize); 2431 memset(dx_root, 0, osb->sb->s_blocksize);
2441 strcpy(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE); 2432 strcpy(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE);
2442 dx_root->dr_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); 2433 dx_root->dr_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
2434 dx_root->dr_suballoc_loc = cpu_to_le64(suballoc_loc);
2443 dx_root->dr_suballoc_bit = cpu_to_le16(dr_suballoc_bit); 2435 dx_root->dr_suballoc_bit = cpu_to_le16(dr_suballoc_bit);
2444 dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation); 2436 dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation);
2445 dx_root->dr_blkno = cpu_to_le64(dr_blkno); 2437 dx_root->dr_blkno = cpu_to_le64(dr_blkno);
@@ -2458,10 +2450,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2458 dx_root->dr_list.l_count = 2450 dx_root->dr_list.l_count =
2459 cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb)); 2451 cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
2460 } 2452 }
2461 2453 ocfs2_journal_dirty(handle, dx_root_bh);
2462 ret = ocfs2_journal_dirty(handle, dx_root_bh);
2463 if (ret)
2464 mlog_errno(ret);
2465 2454
2466 ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh, 2455 ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
2467 OCFS2_JOURNAL_ACCESS_CREATE); 2456 OCFS2_JOURNAL_ACCESS_CREATE);
@@ -2475,9 +2464,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2475 OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL; 2464 OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL;
2476 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); 2465 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
2477 2466
2478 ret = ocfs2_journal_dirty(handle, di_bh); 2467 ocfs2_journal_dirty(handle, di_bh);
2479 if (ret)
2480 mlog_errno(ret);
2481 2468
2482 *ret_dx_root_bh = dx_root_bh; 2469 *ret_dx_root_bh = dx_root_bh;
2483 dx_root_bh = NULL; 2470 dx_root_bh = NULL;
@@ -2558,7 +2545,7 @@ static int __ocfs2_dx_dir_new_cluster(struct inode *dir,
2558 * chance of contiguousness as the directory grows in number 2545 * chance of contiguousness as the directory grows in number
2559 * of entries. 2546 * of entries.
2560 */ 2547 */
2561 ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1, 1, &phys, &num); 2548 ret = __ocfs2_claim_clusters(handle, data_ac, 1, 1, &phys, &num);
2562 if (ret) { 2549 if (ret) {
2563 mlog_errno(ret); 2550 mlog_errno(ret);
2564 goto out; 2551 goto out;
@@ -2991,7 +2978,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
2991 * if we only get one now, that's enough to continue. The rest 2978 * if we only get one now, that's enough to continue. The rest
2992 * will be claimed after the conversion to extents. 2979 * will be claimed after the conversion to extents.
2993 */ 2980 */
2994 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len); 2981 if (ocfs2_dir_resv_allowed(osb))
2982 data_ac->ac_resv = &oi->ip_la_data_resv;
2983 ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off, &len);
2995 if (ret) { 2984 if (ret) {
2996 mlog_errno(ret); 2985 mlog_errno(ret);
2997 goto out_commit; 2986 goto out_commit;
@@ -3034,11 +3023,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3034 ocfs2_init_dir_trailer(dir, dirdata_bh, i); 3023 ocfs2_init_dir_trailer(dir, dirdata_bh, i);
3035 } 3024 }
3036 3025
3037 ret = ocfs2_journal_dirty(handle, dirdata_bh); 3026 ocfs2_journal_dirty(handle, dirdata_bh);
3038 if (ret) {
3039 mlog_errno(ret);
3040 goto out_commit;
3041 }
3042 3027
3043 if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) { 3028 if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
3044 /* 3029 /*
@@ -3104,11 +3089,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3104 */ 3089 */
3105 dir->i_blocks = ocfs2_inode_sector_count(dir); 3090 dir->i_blocks = ocfs2_inode_sector_count(dir);
3106 3091
3107 ret = ocfs2_journal_dirty(handle, di_bh); 3092 ocfs2_journal_dirty(handle, di_bh);
3108 if (ret) {
3109 mlog_errno(ret);
3110 goto out_commit;
3111 }
3112 3093
3113 if (ocfs2_supports_indexed_dirs(osb)) { 3094 if (ocfs2_supports_indexed_dirs(osb)) {
3114 ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh, 3095 ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh,
@@ -3138,7 +3119,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3138 * pass. Claim the 2nd cluster as a separate extent. 3119 * pass. Claim the 2nd cluster as a separate extent.
3139 */ 3120 */
3140 if (alloc > len) { 3121 if (alloc > len) {
3141 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, 3122 ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off,
3142 &len); 3123 &len);
3143 if (ret) { 3124 if (ret) {
3144 mlog_errno(ret); 3125 mlog_errno(ret);
@@ -3369,6 +3350,9 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
3369 goto bail; 3350 goto bail;
3370 } 3351 }
3371 3352
3353 if (ocfs2_dir_resv_allowed(osb))
3354 data_ac->ac_resv = &OCFS2_I(dir)->ip_la_data_resv;
3355
3372 credits = ocfs2_calc_extend_credits(sb, el, 1); 3356 credits = ocfs2_calc_extend_credits(sb, el, 1);
3373 } else { 3357 } else {
3374 spin_unlock(&OCFS2_I(dir)->ip_lock); 3358 spin_unlock(&OCFS2_I(dir)->ip_lock);
@@ -3423,11 +3407,7 @@ do_extend:
3423 } else { 3407 } else {
3424 de->rec_len = cpu_to_le16(sb->s_blocksize); 3408 de->rec_len = cpu_to_le16(sb->s_blocksize);
3425 } 3409 }
3426 status = ocfs2_journal_dirty(handle, new_bh); 3410 ocfs2_journal_dirty(handle, new_bh);
3427 if (status < 0) {
3428 mlog_errno(status);
3429 goto bail;
3430 }
3431 3411
3432 dir_i_size += dir->i_sb->s_blocksize; 3412 dir_i_size += dir->i_sb->s_blocksize;
3433 i_size_write(dir, dir_i_size); 3413 i_size_write(dir, dir_i_size);
@@ -3906,11 +3886,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
3906 sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp, 3886 sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp,
3907 dx_leaf_sort_swap); 3887 dx_leaf_sort_swap);
3908 3888
3909 ret = ocfs2_journal_dirty(handle, dx_leaf_bh); 3889 ocfs2_journal_dirty(handle, dx_leaf_bh);
3910 if (ret) {
3911 mlog_errno(ret);
3912 goto out_commit;
3913 }
3914 3890
3915 ret = ocfs2_dx_dir_find_leaf_split(dx_leaf, leaf_cpos, insert_hash, 3891 ret = ocfs2_dx_dir_find_leaf_split(dx_leaf, leaf_cpos, insert_hash,
3916 &split_hash); 3892 &split_hash);
@@ -4490,7 +4466,10 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
4490 4466
4491 blk = le64_to_cpu(dx_root->dr_blkno); 4467 blk = le64_to_cpu(dx_root->dr_blkno);
4492 bit = le16_to_cpu(dx_root->dr_suballoc_bit); 4468 bit = le16_to_cpu(dx_root->dr_suballoc_bit);
4493 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 4469 if (dx_root->dr_suballoc_loc)
4470 bg_blkno = le64_to_cpu(dx_root->dr_suballoc_loc);
4471 else
4472 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
4494 ret = ocfs2_free_suballoc_bits(handle, dx_alloc_inode, dx_alloc_bh, 4473 ret = ocfs2_free_suballoc_bits(handle, dx_alloc_inode, dx_alloc_bh,
4495 bit, bg_blkno, 1); 4474 bit, bg_blkno, 1);
4496 if (ret) 4475 if (ret)
@@ -4551,8 +4530,8 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
4551 4530
4552 p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno); 4531 p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno);
4553 4532
4554 ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 4533 ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0,
4555 &dealloc); 4534 &dealloc, 0);
4556 if (ret) { 4535 if (ret) {
4557 mlog_errno(ret); 4536 mlog_errno(ret);
4558 goto out; 4537 goto out;
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index a795eb91f4ea..f44999156839 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -88,7 +88,7 @@ static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
88 return 0; 88 return 0;
89} 89}
90 90
91static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock) 91void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
92{ 92{
93 mlog_entry_void(); 93 mlog_entry_void();
94 94
@@ -145,7 +145,7 @@ void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
145} 145}
146 146
147 147
148static void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock) 148void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
149{ 149{
150 mlog_entry_void(); 150 mlog_entry_void();
151 151
@@ -184,9 +184,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
184 BUG_ON(!lksb); 184 BUG_ON(!lksb);
185 185
186 /* only updates if this node masters the lockres */ 186 /* only updates if this node masters the lockres */
187 spin_lock(&res->spinlock);
187 if (res->owner == dlm->node_num) { 188 if (res->owner == dlm->node_num) {
188
189 spin_lock(&res->spinlock);
190 /* check the lksb flags for the direction */ 189 /* check the lksb flags for the direction */
191 if (lksb->flags & DLM_LKSB_GET_LVB) { 190 if (lksb->flags & DLM_LKSB_GET_LVB) {
192 mlog(0, "getting lvb from lockres for %s node\n", 191 mlog(0, "getting lvb from lockres for %s node\n",
@@ -201,8 +200,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
201 * here. In the future we might want to clear it at the time 200 * here. In the future we might want to clear it at the time
202 * the put is actually done. 201 * the put is actually done.
203 */ 202 */
204 spin_unlock(&res->spinlock);
205 } 203 }
204 spin_unlock(&res->spinlock);
206 205
207 /* reset any lvb flags on the lksb */ 206 /* reset any lvb flags on the lksb */
208 lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB); 207 lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB);
@@ -452,7 +451,9 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
452 ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen, 451 ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen,
453 lock->ml.node, &status); 452 lock->ml.node, &status);
454 if (ret < 0) 453 if (ret < 0)
455 mlog_errno(ret); 454 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
455 "node %u\n", ret, DLM_PROXY_AST_MSG, dlm->key,
456 lock->ml.node);
456 else { 457 else {
457 if (status == DLM_RECOVERING) { 458 if (status == DLM_RECOVERING) {
458 mlog(ML_ERROR, "sent AST to node %u, it thinks this " 459 mlog(ML_ERROR, "sent AST to node %u, it thinks this "
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 0102be35980c..4b6ae2c13b47 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -37,7 +37,7 @@
37#define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes 37#define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes
38#define DLM_THREAD_MS 200 // flush at least every 200 ms 38#define DLM_THREAD_MS 200 // flush at least every 200 ms
39 39
40#define DLM_HASH_SIZE_DEFAULT (1 << 14) 40#define DLM_HASH_SIZE_DEFAULT (1 << 17)
41#if DLM_HASH_SIZE_DEFAULT < PAGE_SIZE 41#if DLM_HASH_SIZE_DEFAULT < PAGE_SIZE
42# define DLM_HASH_PAGES 1 42# define DLM_HASH_PAGES 1
43#else 43#else
@@ -904,6 +904,8 @@ void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
904 904
905void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); 905void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
906void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); 906void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
907void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
908void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
907void dlm_do_local_ast(struct dlm_ctxt *dlm, 909void dlm_do_local_ast(struct dlm_ctxt *dlm,
908 struct dlm_lock_resource *res, 910 struct dlm_lock_resource *res,
909 struct dlm_lock *lock); 911 struct dlm_lock *lock);
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index 90803b47cd8c..9f30491e5e88 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -390,7 +390,9 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
390 } else if (ret != DLM_NORMAL && ret != DLM_NOTQUEUED) 390 } else if (ret != DLM_NORMAL && ret != DLM_NOTQUEUED)
391 dlm_error(ret); 391 dlm_error(ret);
392 } else { 392 } else {
393 mlog_errno(tmpret); 393 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
394 "node %u\n", tmpret, DLM_CONVERT_LOCK_MSG, dlm->key,
395 res->owner);
394 if (dlm_is_host_down(tmpret)) { 396 if (dlm_is_host_down(tmpret)) {
395 /* instead of logging the same network error over 397 /* instead of logging the same network error over
396 * and over, sleep here and wait for the heartbeat 398 * and over, sleep here and wait for the heartbeat
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 988c9055fd4e..6b5a492e1749 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -511,7 +511,7 @@ static void __dlm_print_nodes(struct dlm_ctxt *dlm)
511 511
512 assert_spin_locked(&dlm->spinlock); 512 assert_spin_locked(&dlm->spinlock);
513 513
514 printk(KERN_INFO "ocfs2_dlm: Nodes in domain (\"%s\"): ", dlm->name); 514 printk(KERN_NOTICE "o2dlm: Nodes in domain %s: ", dlm->name);
515 515
516 while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 516 while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES,
517 node + 1)) < O2NM_MAX_NODES) { 517 node + 1)) < O2NM_MAX_NODES) {
@@ -534,7 +534,7 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
534 534
535 node = exit_msg->node_idx; 535 node = exit_msg->node_idx;
536 536
537 printk(KERN_INFO "ocfs2_dlm: Node %u leaves domain %s\n", node, dlm->name); 537 printk(KERN_NOTICE "o2dlm: Node %u leaves domain %s\n", node, dlm->name);
538 538
539 spin_lock(&dlm->spinlock); 539 spin_lock(&dlm->spinlock);
540 clear_bit(node, dlm->domain_map); 540 clear_bit(node, dlm->domain_map);
@@ -565,7 +565,9 @@ static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm,
565 status = o2net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key, 565 status = o2net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key,
566 &leave_msg, sizeof(leave_msg), node, 566 &leave_msg, sizeof(leave_msg), node,
567 NULL); 567 NULL);
568 568 if (status < 0)
569 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
570 "node %u\n", status, DLM_EXIT_DOMAIN_MSG, dlm->key, node);
569 mlog(0, "status return %d from o2net_send_message\n", status); 571 mlog(0, "status return %d from o2net_send_message\n", status);
570 572
571 return status; 573 return status;
@@ -904,7 +906,7 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
904 set_bit(assert->node_idx, dlm->domain_map); 906 set_bit(assert->node_idx, dlm->domain_map);
905 __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); 907 __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
906 908
907 printk(KERN_INFO "ocfs2_dlm: Node %u joins domain %s\n", 909 printk(KERN_NOTICE "o2dlm: Node %u joins domain %s\n",
908 assert->node_idx, dlm->name); 910 assert->node_idx, dlm->name);
909 __dlm_print_nodes(dlm); 911 __dlm_print_nodes(dlm);
910 912
@@ -962,7 +964,9 @@ static int dlm_send_one_join_cancel(struct dlm_ctxt *dlm,
962 &cancel_msg, sizeof(cancel_msg), node, 964 &cancel_msg, sizeof(cancel_msg), node,
963 NULL); 965 NULL);
964 if (status < 0) { 966 if (status < 0) {
965 mlog_errno(status); 967 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
968 "node %u\n", status, DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY,
969 node);
966 goto bail; 970 goto bail;
967 } 971 }
968 972
@@ -1029,10 +1033,11 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
1029 byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES); 1033 byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES);
1030 1034
1031 status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg, 1035 status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg,
1032 sizeof(join_msg), node, 1036 sizeof(join_msg), node, &join_resp);
1033 &join_resp);
1034 if (status < 0 && status != -ENOPROTOOPT) { 1037 if (status < 0 && status != -ENOPROTOOPT) {
1035 mlog_errno(status); 1038 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
1039 "node %u\n", status, DLM_QUERY_JOIN_MSG, DLM_MOD_KEY,
1040 node);
1036 goto bail; 1041 goto bail;
1037 } 1042 }
1038 dlm_query_join_wire_to_packet(join_resp, &packet); 1043 dlm_query_join_wire_to_packet(join_resp, &packet);
@@ -1103,7 +1108,9 @@ static int dlm_send_one_join_assert(struct dlm_ctxt *dlm,
1103 &assert_msg, sizeof(assert_msg), node, 1108 &assert_msg, sizeof(assert_msg), node,
1104 NULL); 1109 NULL);
1105 if (status < 0) 1110 if (status < 0)
1106 mlog_errno(status); 1111 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
1112 "node %u\n", status, DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY,
1113 node);
1107 1114
1108 return status; 1115 return status;
1109} 1116}
@@ -1516,7 +1523,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
1516 goto leave; 1523 goto leave;
1517 } 1524 }
1518 1525
1519 dlm->name = kmalloc(strlen(domain) + 1, GFP_KERNEL); 1526 dlm->name = kstrdup(domain, GFP_KERNEL);
1520 if (dlm->name == NULL) { 1527 if (dlm->name == NULL) {
1521 mlog_errno(-ENOMEM); 1528 mlog_errno(-ENOMEM);
1522 kfree(dlm); 1529 kfree(dlm);
@@ -1550,7 +1557,6 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
1550 for (i = 0; i < DLM_HASH_BUCKETS; i++) 1557 for (i = 0; i < DLM_HASH_BUCKETS; i++)
1551 INIT_HLIST_HEAD(dlm_master_hash(dlm, i)); 1558 INIT_HLIST_HEAD(dlm_master_hash(dlm, i));
1552 1559
1553 strcpy(dlm->name, domain);
1554 dlm->key = key; 1560 dlm->key = key;
1555 dlm->node_num = o2nm_this_node(); 1561 dlm->node_num = o2nm_this_node();
1556 1562
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 733337772671..69cf369961c4 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -329,7 +329,9 @@ static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
329 BUG(); 329 BUG();
330 } 330 }
331 } else { 331 } else {
332 mlog_errno(tmpret); 332 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
333 "node %u\n", tmpret, DLM_CREATE_LOCK_MSG, dlm->key,
334 res->owner);
333 if (dlm_is_host_down(tmpret)) { 335 if (dlm_is_host_down(tmpret)) {
334 ret = DLM_RECOVERING; 336 ret = DLM_RECOVERING;
335 mlog(0, "node %u died so returning DLM_RECOVERING " 337 mlog(0, "node %u died so returning DLM_RECOVERING "
@@ -429,7 +431,7 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
429 struct dlm_lock *lock; 431 struct dlm_lock *lock;
430 int kernel_allocated = 0; 432 int kernel_allocated = 0;
431 433
432 lock = (struct dlm_lock *) kmem_cache_zalloc(dlm_lock_cache, GFP_NOFS); 434 lock = kmem_cache_zalloc(dlm_lock_cache, GFP_NOFS);
433 if (!lock) 435 if (!lock)
434 return NULL; 436 return NULL;
435 437
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 9289b4357d27..4a7506a4e314 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -617,13 +617,11 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
617{ 617{
618 struct dlm_lock_resource *res = NULL; 618 struct dlm_lock_resource *res = NULL;
619 619
620 res = (struct dlm_lock_resource *) 620 res = kmem_cache_zalloc(dlm_lockres_cache, GFP_NOFS);
621 kmem_cache_zalloc(dlm_lockres_cache, GFP_NOFS);
622 if (!res) 621 if (!res)
623 goto error; 622 goto error;
624 623
625 res->lockname.name = (char *) 624 res->lockname.name = kmem_cache_zalloc(dlm_lockname_cache, GFP_NOFS);
626 kmem_cache_zalloc(dlm_lockname_cache, GFP_NOFS);
627 if (!res->lockname.name) 625 if (!res->lockname.name)
628 goto error; 626 goto error;
629 627
@@ -757,8 +755,7 @@ lookup:
757 spin_unlock(&dlm->spinlock); 755 spin_unlock(&dlm->spinlock);
758 mlog(0, "allocating a new resource\n"); 756 mlog(0, "allocating a new resource\n");
759 /* nothing found and we need to allocate one. */ 757 /* nothing found and we need to allocate one. */
760 alloc_mle = (struct dlm_master_list_entry *) 758 alloc_mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
761 kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
762 if (!alloc_mle) 759 if (!alloc_mle)
763 goto leave; 760 goto leave;
764 res = dlm_new_lockres(dlm, lockid, namelen); 761 res = dlm_new_lockres(dlm, lockid, namelen);
@@ -1542,8 +1539,7 @@ way_up_top:
1542 spin_unlock(&dlm->master_lock); 1539 spin_unlock(&dlm->master_lock);
1543 spin_unlock(&dlm->spinlock); 1540 spin_unlock(&dlm->spinlock);
1544 1541
1545 mle = (struct dlm_master_list_entry *) 1542 mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
1546 kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
1547 if (!mle) { 1543 if (!mle) {
1548 response = DLM_MASTER_RESP_ERROR; 1544 response = DLM_MASTER_RESP_ERROR;
1549 mlog_errno(-ENOMEM); 1545 mlog_errno(-ENOMEM);
@@ -1666,7 +1662,9 @@ again:
1666 tmpret = o2net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key, 1662 tmpret = o2net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key,
1667 &assert, sizeof(assert), to, &r); 1663 &assert, sizeof(assert), to, &r);
1668 if (tmpret < 0) { 1664 if (tmpret < 0) {
1669 mlog(0, "assert_master returned %d!\n", tmpret); 1665 mlog(ML_ERROR, "Error %d when sending message %u (key "
1666 "0x%x) to node %u\n", tmpret,
1667 DLM_ASSERT_MASTER_MSG, dlm->key, to);
1670 if (!dlm_is_host_down(tmpret)) { 1668 if (!dlm_is_host_down(tmpret)) {
1671 mlog(ML_ERROR, "unhandled error=%d!\n", tmpret); 1669 mlog(ML_ERROR, "unhandled error=%d!\n", tmpret);
1672 BUG(); 1670 BUG();
@@ -2205,7 +2203,9 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
2205 ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key, 2203 ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key,
2206 &deref, sizeof(deref), res->owner, &r); 2204 &deref, sizeof(deref), res->owner, &r);
2207 if (ret < 0) 2205 if (ret < 0)
2208 mlog_errno(ret); 2206 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
2207 "node %u\n", ret, DLM_DEREF_LOCKRES_MSG, dlm->key,
2208 res->owner);
2209 else if (r < 0) { 2209 else if (r < 0) {
2210 /* BAD. other node says I did not have a ref. */ 2210 /* BAD. other node says I did not have a ref. */
2211 mlog(ML_ERROR,"while dropping ref on %s:%.*s " 2211 mlog(ML_ERROR,"while dropping ref on %s:%.*s "
@@ -2452,8 +2452,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2452 goto leave; 2452 goto leave;
2453 } 2453 }
2454 2454
2455 mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache, 2455 mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
2456 GFP_NOFS);
2457 if (!mle) { 2456 if (!mle) {
2458 mlog_errno(ret); 2457 mlog_errno(ret);
2459 goto leave; 2458 goto leave;
@@ -2975,7 +2974,9 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
2975 &migrate, sizeof(migrate), nodenum, 2974 &migrate, sizeof(migrate), nodenum,
2976 &status); 2975 &status);
2977 if (ret < 0) { 2976 if (ret < 0) {
2978 mlog(0, "migrate_request returned %d!\n", ret); 2977 mlog(ML_ERROR, "Error %d when sending message %u (key "
2978 "0x%x) to node %u\n", ret, DLM_MIGRATE_REQUEST_MSG,
2979 dlm->key, nodenum);
2979 if (!dlm_is_host_down(ret)) { 2980 if (!dlm_is_host_down(ret)) {
2980 mlog(ML_ERROR, "unhandled error=%d!\n", ret); 2981 mlog(ML_ERROR, "unhandled error=%d!\n", ret);
2981 BUG(); 2982 BUG();
@@ -3033,8 +3034,7 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data,
3033 hash = dlm_lockid_hash(name, namelen); 3034 hash = dlm_lockid_hash(name, namelen);
3034 3035
3035 /* preallocate.. if this fails, abort */ 3036 /* preallocate.. if this fails, abort */
3036 mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache, 3037 mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
3037 GFP_NOFS);
3038 3038
3039 if (!mle) { 3039 if (!mle) {
3040 ret = -ENOMEM; 3040 ret = -ENOMEM;
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index b4f99de2caf3..f8b75ce4be70 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -803,7 +803,9 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from,
803 803
804 /* negative status is handled by caller */ 804 /* negative status is handled by caller */
805 if (ret < 0) 805 if (ret < 0)
806 mlog_errno(ret); 806 mlog(ML_ERROR, "Error %d when sending message %u (key "
807 "0x%x) to node %u\n", ret, DLM_LOCK_REQUEST_MSG,
808 dlm->key, request_from);
807 809
808 // return from here, then 810 // return from here, then
809 // sleep until all received or error 811 // sleep until all received or error
@@ -955,10 +957,10 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to)
955 ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg, 957 ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg,
956 sizeof(done_msg), send_to, &tmpret); 958 sizeof(done_msg), send_to, &tmpret);
957 if (ret < 0) { 959 if (ret < 0) {
960 mlog(ML_ERROR, "Error %d when sending message %u (key "
961 "0x%x) to node %u\n", ret, DLM_RECO_DATA_DONE_MSG,
962 dlm->key, send_to);
958 if (!dlm_is_host_down(ret)) { 963 if (!dlm_is_host_down(ret)) {
959 mlog_errno(ret);
960 mlog(ML_ERROR, "%s: unknown error sending data-done "
961 "to %u\n", dlm->name, send_to);
962 BUG(); 964 BUG();
963 } 965 }
964 } else 966 } else
@@ -1126,7 +1128,9 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
1126 if (ret < 0) { 1128 if (ret < 0) {
1127 /* XXX: negative status is not handled. 1129 /* XXX: negative status is not handled.
1128 * this will end up killing this node. */ 1130 * this will end up killing this node. */
1129 mlog_errno(ret); 1131 mlog(ML_ERROR, "Error %d when sending message %u (key "
1132 "0x%x) to node %u\n", ret, DLM_MIG_LOCKRES_MSG,
1133 dlm->key, send_to);
1130 } else { 1134 } else {
1131 /* might get an -ENOMEM back here */ 1135 /* might get an -ENOMEM back here */
1132 ret = status; 1136 ret = status;
@@ -1642,7 +1646,9 @@ int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
1642 &req, sizeof(req), nodenum, &status); 1646 &req, sizeof(req), nodenum, &status);
1643 /* XXX: negative status not handled properly here. */ 1647 /* XXX: negative status not handled properly here. */
1644 if (ret < 0) 1648 if (ret < 0)
1645 mlog_errno(ret); 1649 mlog(ML_ERROR, "Error %d when sending message %u (key "
1650 "0x%x) to node %u\n", ret, DLM_MASTER_REQUERY_MSG,
1651 dlm->key, nodenum);
1646 else { 1652 else {
1647 BUG_ON(status < 0); 1653 BUG_ON(status < 0);
1648 BUG_ON(status > DLM_LOCK_RES_OWNER_UNKNOWN); 1654 BUG_ON(status > DLM_LOCK_RES_OWNER_UNKNOWN);
@@ -2640,7 +2646,7 @@ retry:
2640 if (dlm_is_host_down(ret)) { 2646 if (dlm_is_host_down(ret)) {
2641 /* node is down. not involved in recovery 2647 /* node is down. not involved in recovery
2642 * so just keep going */ 2648 * so just keep going */
2643 mlog(0, "%s: node %u was down when sending " 2649 mlog(ML_NOTICE, "%s: node %u was down when sending "
2644 "begin reco msg (%d)\n", dlm->name, nodenum, ret); 2650 "begin reco msg (%d)\n", dlm->name, nodenum, ret);
2645 ret = 0; 2651 ret = 0;
2646 } 2652 }
@@ -2660,11 +2666,12 @@ retry:
2660 } 2666 }
2661 if (ret < 0) { 2667 if (ret < 0) {
2662 struct dlm_lock_resource *res; 2668 struct dlm_lock_resource *res;
2669
2663 /* this is now a serious problem, possibly ENOMEM 2670 /* this is now a serious problem, possibly ENOMEM
2664 * in the network stack. must retry */ 2671 * in the network stack. must retry */
2665 mlog_errno(ret); 2672 mlog_errno(ret);
2666 mlog(ML_ERROR, "begin reco of dlm %s to node %u " 2673 mlog(ML_ERROR, "begin reco of dlm %s to node %u "
2667 " returned %d\n", dlm->name, nodenum, ret); 2674 "returned %d\n", dlm->name, nodenum, ret);
2668 res = dlm_lookup_lockres(dlm, DLM_RECOVERY_LOCK_NAME, 2675 res = dlm_lookup_lockres(dlm, DLM_RECOVERY_LOCK_NAME,
2669 DLM_RECOVERY_LOCK_NAME_LEN); 2676 DLM_RECOVERY_LOCK_NAME_LEN);
2670 if (res) { 2677 if (res) {
@@ -2789,7 +2796,9 @@ stage2:
2789 if (ret >= 0) 2796 if (ret >= 0)
2790 ret = status; 2797 ret = status;
2791 if (ret < 0) { 2798 if (ret < 0) {
2792 mlog_errno(ret); 2799 mlog(ML_ERROR, "Error %d when sending message %u (key "
2800 "0x%x) to node %u\n", ret, DLM_FINALIZE_RECO_MSG,
2801 dlm->key, nodenum);
2793 if (dlm_is_host_down(ret)) { 2802 if (dlm_is_host_down(ret)) {
2794 /* this has no effect on this recovery 2803 /* this has no effect on this recovery
2795 * session, so set the status to zero to 2804 * session, so set the status to zero to
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 11a6d1fd1d35..d4f73ca68fe5 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -309,6 +309,7 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
309 * spinlock, and because we know that it is not migrating/ 309 * spinlock, and because we know that it is not migrating/
310 * recovering/in-progress, it is fine to reserve asts and 310 * recovering/in-progress, it is fine to reserve asts and
311 * basts right before queueing them all throughout */ 311 * basts right before queueing them all throughout */
312 assert_spin_locked(&dlm->ast_lock);
312 assert_spin_locked(&res->spinlock); 313 assert_spin_locked(&res->spinlock);
313 BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING| 314 BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING|
314 DLM_LOCK_RES_RECOVERING| 315 DLM_LOCK_RES_RECOVERING|
@@ -337,7 +338,7 @@ converting:
337 /* queue the BAST if not already */ 338 /* queue the BAST if not already */
338 if (lock->ml.highest_blocked == LKM_IVMODE) { 339 if (lock->ml.highest_blocked == LKM_IVMODE) {
339 __dlm_lockres_reserve_ast(res); 340 __dlm_lockres_reserve_ast(res);
340 dlm_queue_bast(dlm, lock); 341 __dlm_queue_bast(dlm, lock);
341 } 342 }
342 /* update the highest_blocked if needed */ 343 /* update the highest_blocked if needed */
343 if (lock->ml.highest_blocked < target->ml.convert_type) 344 if (lock->ml.highest_blocked < target->ml.convert_type)
@@ -355,7 +356,7 @@ converting:
355 can_grant = 0; 356 can_grant = 0;
356 if (lock->ml.highest_blocked == LKM_IVMODE) { 357 if (lock->ml.highest_blocked == LKM_IVMODE) {
357 __dlm_lockres_reserve_ast(res); 358 __dlm_lockres_reserve_ast(res);
358 dlm_queue_bast(dlm, lock); 359 __dlm_queue_bast(dlm, lock);
359 } 360 }
360 if (lock->ml.highest_blocked < target->ml.convert_type) 361 if (lock->ml.highest_blocked < target->ml.convert_type)
361 lock->ml.highest_blocked = 362 lock->ml.highest_blocked =
@@ -383,7 +384,7 @@ converting:
383 spin_unlock(&target->spinlock); 384 spin_unlock(&target->spinlock);
384 385
385 __dlm_lockres_reserve_ast(res); 386 __dlm_lockres_reserve_ast(res);
386 dlm_queue_ast(dlm, target); 387 __dlm_queue_ast(dlm, target);
387 /* go back and check for more */ 388 /* go back and check for more */
388 goto converting; 389 goto converting;
389 } 390 }
@@ -402,7 +403,7 @@ blocked:
402 can_grant = 0; 403 can_grant = 0;
403 if (lock->ml.highest_blocked == LKM_IVMODE) { 404 if (lock->ml.highest_blocked == LKM_IVMODE) {
404 __dlm_lockres_reserve_ast(res); 405 __dlm_lockres_reserve_ast(res);
405 dlm_queue_bast(dlm, lock); 406 __dlm_queue_bast(dlm, lock);
406 } 407 }
407 if (lock->ml.highest_blocked < target->ml.type) 408 if (lock->ml.highest_blocked < target->ml.type)
408 lock->ml.highest_blocked = target->ml.type; 409 lock->ml.highest_blocked = target->ml.type;
@@ -418,7 +419,7 @@ blocked:
418 can_grant = 0; 419 can_grant = 0;
419 if (lock->ml.highest_blocked == LKM_IVMODE) { 420 if (lock->ml.highest_blocked == LKM_IVMODE) {
420 __dlm_lockres_reserve_ast(res); 421 __dlm_lockres_reserve_ast(res);
421 dlm_queue_bast(dlm, lock); 422 __dlm_queue_bast(dlm, lock);
422 } 423 }
423 if (lock->ml.highest_blocked < target->ml.type) 424 if (lock->ml.highest_blocked < target->ml.type)
424 lock->ml.highest_blocked = target->ml.type; 425 lock->ml.highest_blocked = target->ml.type;
@@ -444,7 +445,7 @@ blocked:
444 spin_unlock(&target->spinlock); 445 spin_unlock(&target->spinlock);
445 446
446 __dlm_lockres_reserve_ast(res); 447 __dlm_lockres_reserve_ast(res);
447 dlm_queue_ast(dlm, target); 448 __dlm_queue_ast(dlm, target);
448 /* go back and check for more */ 449 /* go back and check for more */
449 goto converting; 450 goto converting;
450 } 451 }
@@ -674,6 +675,7 @@ static int dlm_thread(void *data)
674 /* lockres can be re-dirtied/re-added to the 675 /* lockres can be re-dirtied/re-added to the
675 * dirty_list in this gap, but that is ok */ 676 * dirty_list in this gap, but that is ok */
676 677
678 spin_lock(&dlm->ast_lock);
677 spin_lock(&res->spinlock); 679 spin_lock(&res->spinlock);
678 if (res->owner != dlm->node_num) { 680 if (res->owner != dlm->node_num) {
679 __dlm_print_one_lock_resource(res); 681 __dlm_print_one_lock_resource(res);
@@ -694,6 +696,7 @@ static int dlm_thread(void *data)
694 /* move it to the tail and keep going */ 696 /* move it to the tail and keep going */
695 res->state &= ~DLM_LOCK_RES_DIRTY; 697 res->state &= ~DLM_LOCK_RES_DIRTY;
696 spin_unlock(&res->spinlock); 698 spin_unlock(&res->spinlock);
699 spin_unlock(&dlm->ast_lock);
697 mlog(0, "delaying list shuffling for in-" 700 mlog(0, "delaying list shuffling for in-"
698 "progress lockres %.*s, state=%d\n", 701 "progress lockres %.*s, state=%d\n",
699 res->lockname.len, res->lockname.name, 702 res->lockname.len, res->lockname.name,
@@ -715,6 +718,7 @@ static int dlm_thread(void *data)
715 dlm_shuffle_lists(dlm, res); 718 dlm_shuffle_lists(dlm, res);
716 res->state &= ~DLM_LOCK_RES_DIRTY; 719 res->state &= ~DLM_LOCK_RES_DIRTY;
717 spin_unlock(&res->spinlock); 720 spin_unlock(&res->spinlock);
721 spin_unlock(&dlm->ast_lock);
718 722
719 dlm_lockres_calc_usage(dlm, res); 723 dlm_lockres_calc_usage(dlm, res);
720 724
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index b47c1b92b82b..817287c6a6db 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -354,7 +354,8 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
354 mlog(0, "master was in-progress. retry\n"); 354 mlog(0, "master was in-progress. retry\n");
355 ret = status; 355 ret = status;
356 } else { 356 } else {
357 mlog_errno(tmpret); 357 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
358 "node %u\n", tmpret, DLM_UNLOCK_LOCK_MSG, dlm->key, owner);
358 if (dlm_is_host_down(tmpret)) { 359 if (dlm_is_host_down(tmpret)) {
359 /* NOTE: this seems strange, but it is what we want. 360 /* NOTE: this seems strange, but it is what we want.
360 * when the master goes down during a cancel or 361 * when the master goes down during a cancel or
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 1b0de157a08c..b83d6107a1f5 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -112,20 +112,20 @@ MODULE_PARM_DESC(capabilities, DLMFS_CAPABILITIES);
112 * O_RDONLY -> PRMODE level 112 * O_RDONLY -> PRMODE level
113 * O_WRONLY -> EXMODE level 113 * O_WRONLY -> EXMODE level
114 * 114 *
115 * O_NONBLOCK -> LKM_NOQUEUE 115 * O_NONBLOCK -> NOQUEUE
116 */ 116 */
117static int dlmfs_decode_open_flags(int open_flags, 117static int dlmfs_decode_open_flags(int open_flags,
118 int *level, 118 int *level,
119 int *flags) 119 int *flags)
120{ 120{
121 if (open_flags & (O_WRONLY|O_RDWR)) 121 if (open_flags & (O_WRONLY|O_RDWR))
122 *level = LKM_EXMODE; 122 *level = DLM_LOCK_EX;
123 else 123 else
124 *level = LKM_PRMODE; 124 *level = DLM_LOCK_PR;
125 125
126 *flags = 0; 126 *flags = 0;
127 if (open_flags & O_NONBLOCK) 127 if (open_flags & O_NONBLOCK)
128 *flags |= LKM_NOQUEUE; 128 *flags |= DLM_LKF_NOQUEUE;
129 129
130 return 0; 130 return 0;
131} 131}
@@ -166,7 +166,7 @@ static int dlmfs_file_open(struct inode *inode,
166 * to be able userspace to be able to distinguish a 166 * to be able userspace to be able to distinguish a
167 * valid lock request from one that simply couldn't be 167 * valid lock request from one that simply couldn't be
168 * granted. */ 168 * granted. */
169 if (flags & LKM_NOQUEUE && status == -EAGAIN) 169 if (flags & DLM_LKF_NOQUEUE && status == -EAGAIN)
170 status = -ETXTBSY; 170 status = -ETXTBSY;
171 kfree(fp); 171 kfree(fp);
172 goto bail; 172 goto bail;
@@ -193,7 +193,7 @@ static int dlmfs_file_release(struct inode *inode,
193 status = 0; 193 status = 0;
194 if (fp) { 194 if (fp) {
195 level = fp->fp_lock_level; 195 level = fp->fp_lock_level;
196 if (level != LKM_IVMODE) 196 if (level != DLM_LOCK_IV)
197 user_dlm_cluster_unlock(&ip->ip_lockres, level); 197 user_dlm_cluster_unlock(&ip->ip_lockres, level);
198 198
199 kfree(fp); 199 kfree(fp);
@@ -262,7 +262,7 @@ static ssize_t dlmfs_file_read(struct file *filp,
262 if ((count + *ppos) > i_size_read(inode)) 262 if ((count + *ppos) > i_size_read(inode))
263 readlen = i_size_read(inode) - *ppos; 263 readlen = i_size_read(inode) - *ppos;
264 else 264 else
265 readlen = count - *ppos; 265 readlen = count;
266 266
267 lvb_buf = kmalloc(readlen, GFP_NOFS); 267 lvb_buf = kmalloc(readlen, GFP_NOFS);
268 if (!lvb_buf) 268 if (!lvb_buf)
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 50c4ee805da4..39eb16ac5f98 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3897,7 +3897,8 @@ static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
3897 oinfo->dqi_gi.dqi_free_entry = 3897 oinfo->dqi_gi.dqi_free_entry =
3898 be32_to_cpu(lvb->lvb_free_entry); 3898 be32_to_cpu(lvb->lvb_free_entry);
3899 } else { 3899 } else {
3900 status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh); 3900 status = ocfs2_read_quota_phys_block(oinfo->dqi_gqinode,
3901 oinfo->dqi_giblk, &bh);
3901 if (status) { 3902 if (status) {
3902 mlog_errno(status); 3903 mlog_errno(status);
3903 goto bail; 3904 goto bail;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 17947dc8341e..97e54b9e654b 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -278,10 +278,7 @@ int ocfs2_update_inode_atime(struct inode *inode,
278 inode->i_atime = CURRENT_TIME; 278 inode->i_atime = CURRENT_TIME;
279 di->i_atime = cpu_to_le64(inode->i_atime.tv_sec); 279 di->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
280 di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); 280 di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
281 281 ocfs2_journal_dirty(handle, bh);
282 ret = ocfs2_journal_dirty(handle, bh);
283 if (ret < 0)
284 mlog_errno(ret);
285 282
286out_commit: 283out_commit:
287 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 284 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
@@ -430,9 +427,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
430 di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); 427 di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
431 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 428 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
432 429
433 status = ocfs2_journal_dirty(handle, fe_bh); 430 ocfs2_journal_dirty(handle, fe_bh);
434 if (status < 0)
435 mlog_errno(status);
436 431
437out_commit: 432out_commit:
438 ocfs2_commit_trans(osb, handle); 433 ocfs2_commit_trans(osb, handle);
@@ -449,7 +444,6 @@ static int ocfs2_truncate_file(struct inode *inode,
449 int status = 0; 444 int status = 0;
450 struct ocfs2_dinode *fe = NULL; 445 struct ocfs2_dinode *fe = NULL;
451 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 446 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
452 struct ocfs2_truncate_context *tc = NULL;
453 447
454 mlog_entry("(inode = %llu, new_i_size = %llu\n", 448 mlog_entry("(inode = %llu, new_i_size = %llu\n",
455 (unsigned long long)OCFS2_I(inode)->ip_blkno, 449 (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -488,6 +482,9 @@ static int ocfs2_truncate_file(struct inode *inode,
488 482
489 down_write(&OCFS2_I(inode)->ip_alloc_sem); 483 down_write(&OCFS2_I(inode)->ip_alloc_sem);
490 484
485 ocfs2_resv_discard(&osb->osb_la_resmap,
486 &OCFS2_I(inode)->ip_la_data_resv);
487
491 /* 488 /*
492 * The inode lock forced other nodes to sync and drop their 489 * The inode lock forced other nodes to sync and drop their
493 * pages, which (correctly) happens even if we have a truncate 490 * pages, which (correctly) happens even if we have a truncate
@@ -517,13 +514,7 @@ static int ocfs2_truncate_file(struct inode *inode,
517 goto bail_unlock_sem; 514 goto bail_unlock_sem;
518 } 515 }
519 516
520 status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc); 517 status = ocfs2_commit_truncate(osb, inode, di_bh);
521 if (status < 0) {
522 mlog_errno(status);
523 goto bail_unlock_sem;
524 }
525
526 status = ocfs2_commit_truncate(osb, inode, di_bh, tc);
527 if (status < 0) { 518 if (status < 0) {
528 mlog_errno(status); 519 mlog_errno(status);
529 goto bail_unlock_sem; 520 goto bail_unlock_sem;
@@ -666,11 +657,7 @@ restarted_transaction:
666 goto leave; 657 goto leave;
667 } 658 }
668 659
669 status = ocfs2_journal_dirty(handle, bh); 660 ocfs2_journal_dirty(handle, bh);
670 if (status < 0) {
671 mlog_errno(status);
672 goto leave;
673 }
674 661
675 spin_lock(&OCFS2_I(inode)->ip_lock); 662 spin_lock(&OCFS2_I(inode)->ip_lock);
676 clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); 663 clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
@@ -684,6 +671,7 @@ restarted_transaction:
684 if (why == RESTART_META) { 671 if (why == RESTART_META) {
685 mlog(0, "restarting function.\n"); 672 mlog(0, "restarting function.\n");
686 restart_func = 1; 673 restart_func = 1;
674 status = 0;
687 } else { 675 } else {
688 BUG_ON(why != RESTART_TRANS); 676 BUG_ON(why != RESTART_TRANS);
689 677
@@ -945,9 +933,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
945 struct ocfs2_super *osb = OCFS2_SB(sb); 933 struct ocfs2_super *osb = OCFS2_SB(sb);
946 struct buffer_head *bh = NULL; 934 struct buffer_head *bh = NULL;
947 handle_t *handle = NULL; 935 handle_t *handle = NULL;
948 int qtype;
949 struct dquot *transfer_from[MAXQUOTAS] = { };
950 struct dquot *transfer_to[MAXQUOTAS] = { }; 936 struct dquot *transfer_to[MAXQUOTAS] = { };
937 int qtype;
951 938
952 mlog_entry("(0x%p, '%.*s')\n", dentry, 939 mlog_entry("(0x%p, '%.*s')\n", dentry,
953 dentry->d_name.len, dentry->d_name.name); 940 dentry->d_name.len, dentry->d_name.name);
@@ -978,10 +965,10 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
978 if (status) 965 if (status)
979 return status; 966 return status;
980 967
968 if (is_quota_modification(inode, attr))
969 dquot_initialize(inode);
981 size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; 970 size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE;
982 if (size_change) { 971 if (size_change) {
983 dquot_initialize(inode);
984
985 status = ocfs2_rw_lock(inode, 1); 972 status = ocfs2_rw_lock(inode, 1);
986 if (status < 0) { 973 if (status < 0) {
987 mlog_errno(status); 974 mlog_errno(status);
@@ -1031,9 +1018,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1031 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { 1018 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
1032 transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid, 1019 transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid,
1033 USRQUOTA); 1020 USRQUOTA);
1034 transfer_from[USRQUOTA] = dqget(sb, inode->i_uid, 1021 if (!transfer_to[USRQUOTA]) {
1035 USRQUOTA);
1036 if (!transfer_to[USRQUOTA] || !transfer_from[USRQUOTA]) {
1037 status = -ESRCH; 1022 status = -ESRCH;
1038 goto bail_unlock; 1023 goto bail_unlock;
1039 } 1024 }
@@ -1043,9 +1028,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1043 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { 1028 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
1044 transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid, 1029 transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid,
1045 GRPQUOTA); 1030 GRPQUOTA);
1046 transfer_from[GRPQUOTA] = dqget(sb, inode->i_gid, 1031 if (!transfer_to[GRPQUOTA]) {
1047 GRPQUOTA);
1048 if (!transfer_to[GRPQUOTA] || !transfer_from[GRPQUOTA]) {
1049 status = -ESRCH; 1032 status = -ESRCH;
1050 goto bail_unlock; 1033 goto bail_unlock;
1051 } 1034 }
@@ -1057,7 +1040,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1057 mlog_errno(status); 1040 mlog_errno(status);
1058 goto bail_unlock; 1041 goto bail_unlock;
1059 } 1042 }
1060 status = dquot_transfer(inode, attr); 1043 status = __dquot_transfer(inode, transfer_to);
1061 if (status < 0) 1044 if (status < 0)
1062 goto bail_commit; 1045 goto bail_commit;
1063 } else { 1046 } else {
@@ -1097,10 +1080,8 @@ bail:
1097 brelse(bh); 1080 brelse(bh);
1098 1081
1099 /* Release quota pointers in case we acquired them */ 1082 /* Release quota pointers in case we acquired them */
1100 for (qtype = 0; qtype < MAXQUOTAS; qtype++) { 1083 for (qtype = 0; qtype < MAXQUOTAS; qtype++)
1101 dqput(transfer_to[qtype]); 1084 dqput(transfer_to[qtype]);
1102 dqput(transfer_from[qtype]);
1103 }
1104 1085
1105 if (!status && attr->ia_valid & ATTR_MODE) { 1086 if (!status && attr->ia_valid & ATTR_MODE) {
1106 status = ocfs2_acl_chmod(inode); 1087 status = ocfs2_acl_chmod(inode);
@@ -1194,9 +1175,7 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
1194 di = (struct ocfs2_dinode *) bh->b_data; 1175 di = (struct ocfs2_dinode *) bh->b_data;
1195 di->i_mode = cpu_to_le16(inode->i_mode); 1176 di->i_mode = cpu_to_le16(inode->i_mode);
1196 1177
1197 ret = ocfs2_journal_dirty(handle, bh); 1178 ocfs2_journal_dirty(handle, bh);
1198 if (ret < 0)
1199 mlog_errno(ret);
1200 1179
1201out_trans: 1180out_trans:
1202 ocfs2_commit_trans(osb, handle); 1181 ocfs2_commit_trans(osb, handle);
@@ -1433,16 +1412,90 @@ out:
1433 return ret; 1412 return ret;
1434} 1413}
1435 1414
1415static int ocfs2_find_rec(struct ocfs2_extent_list *el, u32 pos)
1416{
1417 int i;
1418 struct ocfs2_extent_rec *rec = NULL;
1419
1420 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
1421
1422 rec = &el->l_recs[i];
1423
1424 if (le32_to_cpu(rec->e_cpos) < pos)
1425 break;
1426 }
1427
1428 return i;
1429}
1430
1431/*
1432 * Helper to calculate the punching pos and length in one run, we handle the
1433 * following three cases in order:
1434 *
1435 * - remove the entire record
1436 * - remove a partial record
1437 * - no record needs to be removed (hole-punching completed)
1438*/
1439static void ocfs2_calc_trunc_pos(struct inode *inode,
1440 struct ocfs2_extent_list *el,
1441 struct ocfs2_extent_rec *rec,
1442 u32 trunc_start, u32 *trunc_cpos,
1443 u32 *trunc_len, u32 *trunc_end,
1444 u64 *blkno, int *done)
1445{
1446 int ret = 0;
1447 u32 coff, range;
1448
1449 range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
1450
1451 if (le32_to_cpu(rec->e_cpos) >= trunc_start) {
1452 *trunc_cpos = le32_to_cpu(rec->e_cpos);
1453 /*
1454 * Skip holes if any.
1455 */
1456 if (range < *trunc_end)
1457 *trunc_end = range;
1458 *trunc_len = *trunc_end - le32_to_cpu(rec->e_cpos);
1459 *blkno = le64_to_cpu(rec->e_blkno);
1460 *trunc_end = le32_to_cpu(rec->e_cpos);
1461 } else if (range > trunc_start) {
1462 *trunc_cpos = trunc_start;
1463 *trunc_len = *trunc_end - trunc_start;
1464 coff = trunc_start - le32_to_cpu(rec->e_cpos);
1465 *blkno = le64_to_cpu(rec->e_blkno) +
1466 ocfs2_clusters_to_blocks(inode->i_sb, coff);
1467 *trunc_end = trunc_start;
1468 } else {
1469 /*
1470 * It may have two following possibilities:
1471 *
1472 * - last record has been removed
1473 * - trunc_start was within a hole
1474 *
1475 * both two cases mean the completion of hole punching.
1476 */
1477 ret = 1;
1478 }
1479
1480 *done = ret;
1481}
1482
1436static int ocfs2_remove_inode_range(struct inode *inode, 1483static int ocfs2_remove_inode_range(struct inode *inode,
1437 struct buffer_head *di_bh, u64 byte_start, 1484 struct buffer_head *di_bh, u64 byte_start,
1438 u64 byte_len) 1485 u64 byte_len)
1439{ 1486{
1440 int ret = 0; 1487 int ret = 0, flags = 0, done = 0, i;
1441 u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size; 1488 u32 trunc_start, trunc_len, trunc_end, trunc_cpos, phys_cpos;
1489 u32 cluster_in_el;
1442 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1490 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1443 struct ocfs2_cached_dealloc_ctxt dealloc; 1491 struct ocfs2_cached_dealloc_ctxt dealloc;
1444 struct address_space *mapping = inode->i_mapping; 1492 struct address_space *mapping = inode->i_mapping;
1445 struct ocfs2_extent_tree et; 1493 struct ocfs2_extent_tree et;
1494 struct ocfs2_path *path = NULL;
1495 struct ocfs2_extent_list *el = NULL;
1496 struct ocfs2_extent_rec *rec = NULL;
1497 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1498 u64 blkno, refcount_loc = le64_to_cpu(di->i_refcount_loc);
1446 1499
1447 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); 1500 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
1448 ocfs2_init_dealloc_ctxt(&dealloc); 1501 ocfs2_init_dealloc_ctxt(&dealloc);
@@ -1468,17 +1521,35 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1468 goto out; 1521 goto out;
1469 } 1522 }
1470 1523
1524 /*
1525 * For reflinks, we may need to CoW 2 clusters which might be
1526 * partially zero'd later, if hole's start and end offset were
1527 * within one cluster(means is not exactly aligned to clustersize).
1528 */
1529
1530 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
1531
1532 ret = ocfs2_cow_file_pos(inode, di_bh, byte_start);
1533 if (ret) {
1534 mlog_errno(ret);
1535 goto out;
1536 }
1537
1538 ret = ocfs2_cow_file_pos(inode, di_bh, byte_start + byte_len);
1539 if (ret) {
1540 mlog_errno(ret);
1541 goto out;
1542 }
1543 }
1544
1471 trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); 1545 trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
1472 trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits; 1546 trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits;
1473 if (trunc_len >= trunc_start) 1547 cluster_in_el = trunc_end;
1474 trunc_len -= trunc_start;
1475 else
1476 trunc_len = 0;
1477 1548
1478 mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n", 1549 mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, cend: %u\n",
1479 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1550 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1480 (unsigned long long)byte_start, 1551 (unsigned long long)byte_start,
1481 (unsigned long long)byte_len, trunc_start, trunc_len); 1552 (unsigned long long)byte_len, trunc_start, trunc_end);
1482 1553
1483 ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); 1554 ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len);
1484 if (ret) { 1555 if (ret) {
@@ -1486,31 +1557,79 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1486 goto out; 1557 goto out;
1487 } 1558 }
1488 1559
1489 cpos = trunc_start; 1560 path = ocfs2_new_path_from_et(&et);
1490 while (trunc_len) { 1561 if (!path) {
1491 ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, 1562 ret = -ENOMEM;
1492 &alloc_size, NULL); 1563 mlog_errno(ret);
1564 goto out;
1565 }
1566
1567 while (trunc_end > trunc_start) {
1568
1569 ret = ocfs2_find_path(INODE_CACHE(inode), path,
1570 cluster_in_el);
1493 if (ret) { 1571 if (ret) {
1494 mlog_errno(ret); 1572 mlog_errno(ret);
1495 goto out; 1573 goto out;
1496 } 1574 }
1497 1575
1498 if (alloc_size > trunc_len) 1576 el = path_leaf_el(path);
1499 alloc_size = trunc_len; 1577
1578 i = ocfs2_find_rec(el, trunc_end);
1579 /*
1580 * Need to go to previous extent block.
1581 */
1582 if (i < 0) {
1583 if (path->p_tree_depth == 0)
1584 break;
1500 1585
1501 /* Only do work for non-holes */ 1586 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
1502 if (phys_cpos != 0) { 1587 path,
1503 ret = ocfs2_remove_btree_range(inode, &et, cpos, 1588 &cluster_in_el);
1504 phys_cpos, alloc_size,
1505 &dealloc);
1506 if (ret) { 1589 if (ret) {
1507 mlog_errno(ret); 1590 mlog_errno(ret);
1508 goto out; 1591 goto out;
1509 } 1592 }
1593
1594 /*
1595 * We've reached the leftmost extent block,
1596 * it's safe to leave.
1597 */
1598 if (cluster_in_el == 0)
1599 break;
1600
1601 /*
1602 * The 'pos' searched for previous extent block is
1603 * always one cluster less than actual trunc_end.
1604 */
1605 trunc_end = cluster_in_el + 1;
1606
1607 ocfs2_reinit_path(path, 1);
1608
1609 continue;
1610
1611 } else
1612 rec = &el->l_recs[i];
1613
1614 ocfs2_calc_trunc_pos(inode, el, rec, trunc_start, &trunc_cpos,
1615 &trunc_len, &trunc_end, &blkno, &done);
1616 if (done)
1617 break;
1618
1619 flags = rec->e_flags;
1620 phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
1621
1622 ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
1623 phys_cpos, trunc_len, flags,
1624 &dealloc, refcount_loc);
1625 if (ret < 0) {
1626 mlog_errno(ret);
1627 goto out;
1510 } 1628 }
1511 1629
1512 cpos += alloc_size; 1630 cluster_in_el = trunc_end;
1513 trunc_len -= alloc_size; 1631
1632 ocfs2_reinit_path(path, 1);
1514 } 1633 }
1515 1634
1516 ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); 1635 ocfs2_truncate_cluster_pages(inode, byte_start, byte_len);
@@ -1981,18 +2100,18 @@ relock:
1981 /* communicate with ocfs2_dio_end_io */ 2100 /* communicate with ocfs2_dio_end_io */
1982 ocfs2_iocb_set_rw_locked(iocb, rw_level); 2101 ocfs2_iocb_set_rw_locked(iocb, rw_level);
1983 2102
1984 if (direct_io) { 2103 ret = generic_segment_checks(iov, &nr_segs, &ocount,
1985 ret = generic_segment_checks(iov, &nr_segs, &ocount, 2104 VERIFY_READ);
1986 VERIFY_READ); 2105 if (ret)
1987 if (ret) 2106 goto out_dio;
1988 goto out_dio;
1989 2107
1990 count = ocount; 2108 count = ocount;
1991 ret = generic_write_checks(file, ppos, &count, 2109 ret = generic_write_checks(file, ppos, &count,
1992 S_ISBLK(inode->i_mode)); 2110 S_ISBLK(inode->i_mode));
1993 if (ret) 2111 if (ret)
1994 goto out_dio; 2112 goto out_dio;
1995 2113
2114 if (direct_io) {
1996 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, 2115 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
1997 ppos, count, ocount); 2116 ppos, count, ocount);
1998 if (written < 0) { 2117 if (written < 0) {
@@ -2007,7 +2126,10 @@ relock:
2007 goto out_dio; 2126 goto out_dio;
2008 } 2127 }
2009 } else { 2128 } else {
2010 written = __generic_file_aio_write(iocb, iov, nr_segs, ppos); 2129 current->backing_dev_info = file->f_mapping->backing_dev_info;
2130 written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos,
2131 ppos, count, 0);
2132 current->backing_dev_info = NULL;
2011 } 2133 }
2012 2134
2013out_dio: 2135out_dio:
@@ -2021,9 +2143,9 @@ out_dio:
2021 if (ret < 0) 2143 if (ret < 0)
2022 written = ret; 2144 written = ret;
2023 2145
2024 if (!ret && (old_size != i_size_read(inode) || 2146 if (!ret && ((old_size != i_size_read(inode)) ||
2025 old_clusters != OCFS2_I(inode)->ip_clusters || 2147 (old_clusters != OCFS2_I(inode)->ip_clusters) ||
2026 has_refcount)) { 2148 has_refcount)) {
2027 ret = jbd2_journal_force_commit(osb->journal->j_journal); 2149 ret = jbd2_journal_force_commit(osb->journal->j_journal);
2028 if (ret < 0) 2150 if (ret < 0)
2029 written = ret; 2151 written = ret;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 07cc8bb68b6d..abb0a95cc717 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -376,6 +376,10 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
376 376
377 OCFS2_I(inode)->ip_last_used_slot = 0; 377 OCFS2_I(inode)->ip_last_used_slot = 0;
378 OCFS2_I(inode)->ip_last_used_group = 0; 378 OCFS2_I(inode)->ip_last_used_group = 0;
379
380 if (S_ISDIR(inode->i_mode))
381 ocfs2_resv_set_type(&OCFS2_I(inode)->ip_la_data_resv,
382 OCFS2_RESV_FLAG_DIR);
379 mlog_exit_void(); 383 mlog_exit_void();
380} 384}
381 385
@@ -539,7 +543,6 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
539 struct buffer_head *fe_bh) 543 struct buffer_head *fe_bh)
540{ 544{
541 int status = 0; 545 int status = 0;
542 struct ocfs2_truncate_context *tc = NULL;
543 struct ocfs2_dinode *fe; 546 struct ocfs2_dinode *fe;
544 handle_t *handle = NULL; 547 handle_t *handle = NULL;
545 548
@@ -558,6 +561,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
558 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 561 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
559 if (IS_ERR(handle)) { 562 if (IS_ERR(handle)) {
560 status = PTR_ERR(handle); 563 status = PTR_ERR(handle);
564 handle = NULL;
561 mlog_errno(status); 565 mlog_errno(status);
562 goto out; 566 goto out;
563 } 567 }
@@ -581,13 +585,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
581 ocfs2_commit_trans(osb, handle); 585 ocfs2_commit_trans(osb, handle);
582 handle = NULL; 586 handle = NULL;
583 587
584 status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc); 588 status = ocfs2_commit_truncate(osb, inode, fe_bh);
585 if (status < 0) {
586 mlog_errno(status);
587 goto out;
588 }
589
590 status = ocfs2_commit_truncate(osb, inode, fe_bh, tc);
591 if (status < 0) { 589 if (status < 0) {
592 mlog_errno(status); 590 mlog_errno(status);
593 goto out; 591 goto out;
@@ -639,11 +637,13 @@ static int ocfs2_remove_inode(struct inode *inode,
639 goto bail_unlock; 637 goto bail_unlock;
640 } 638 }
641 639
642 status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, 640 if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
643 orphan_dir_bh); 641 status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
644 if (status < 0) { 642 orphan_dir_bh);
645 mlog_errno(status); 643 if (status < 0) {
646 goto bail_commit; 644 mlog_errno(status);
645 goto bail_commit;
646 }
647 } 647 }
648 648
649 /* set the inodes dtime */ 649 /* set the inodes dtime */
@@ -656,12 +656,7 @@ static int ocfs2_remove_inode(struct inode *inode,
656 656
657 di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec); 657 di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec);
658 di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL)); 658 di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL));
659 659 ocfs2_journal_dirty(handle, di_bh);
660 status = ocfs2_journal_dirty(handle, di_bh);
661 if (status < 0) {
662 mlog_errno(status);
663 goto bail_commit;
664 }
665 660
666 ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh); 661 ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh);
667 dquot_free_inode(inode); 662 dquot_free_inode(inode);
@@ -722,38 +717,39 @@ static void ocfs2_signal_wipe_completion(struct ocfs2_super *osb,
722static int ocfs2_wipe_inode(struct inode *inode, 717static int ocfs2_wipe_inode(struct inode *inode,
723 struct buffer_head *di_bh) 718 struct buffer_head *di_bh)
724{ 719{
725 int status, orphaned_slot; 720 int status, orphaned_slot = -1;
726 struct inode *orphan_dir_inode = NULL; 721 struct inode *orphan_dir_inode = NULL;
727 struct buffer_head *orphan_dir_bh = NULL; 722 struct buffer_head *orphan_dir_bh = NULL;
728 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 723 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
729 struct ocfs2_dinode *di; 724 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
730 725
731 di = (struct ocfs2_dinode *) di_bh->b_data; 726 if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
732 orphaned_slot = le16_to_cpu(di->i_orphaned_slot); 727 orphaned_slot = le16_to_cpu(di->i_orphaned_slot);
733 728
734 status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot); 729 status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot);
735 if (status) 730 if (status)
736 return status; 731 return status;
737 732
738 orphan_dir_inode = ocfs2_get_system_file_inode(osb, 733 orphan_dir_inode = ocfs2_get_system_file_inode(osb,
739 ORPHAN_DIR_SYSTEM_INODE, 734 ORPHAN_DIR_SYSTEM_INODE,
740 orphaned_slot); 735 orphaned_slot);
741 if (!orphan_dir_inode) { 736 if (!orphan_dir_inode) {
742 status = -EEXIST; 737 status = -EEXIST;
743 mlog_errno(status); 738 mlog_errno(status);
744 goto bail; 739 goto bail;
745 } 740 }
746 741
747 /* Lock the orphan dir. The lock will be held for the entire 742 /* Lock the orphan dir. The lock will be held for the entire
748 * delete_inode operation. We do this now to avoid races with 743 * delete_inode operation. We do this now to avoid races with
749 * recovery completion on other nodes. */ 744 * recovery completion on other nodes. */
750 mutex_lock(&orphan_dir_inode->i_mutex); 745 mutex_lock(&orphan_dir_inode->i_mutex);
751 status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); 746 status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
752 if (status < 0) { 747 if (status < 0) {
753 mutex_unlock(&orphan_dir_inode->i_mutex); 748 mutex_unlock(&orphan_dir_inode->i_mutex);
754 749
755 mlog_errno(status); 750 mlog_errno(status);
756 goto bail; 751 goto bail;
752 }
757 } 753 }
758 754
759 /* we do this while holding the orphan dir lock because we 755 /* we do this while holding the orphan dir lock because we
@@ -794,6 +790,9 @@ static int ocfs2_wipe_inode(struct inode *inode,
794 mlog_errno(status); 790 mlog_errno(status);
795 791
796bail_unlock_dir: 792bail_unlock_dir:
793 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)
794 return status;
795
797 ocfs2_inode_unlock(orphan_dir_inode, 1); 796 ocfs2_inode_unlock(orphan_dir_inode, 1);
798 mutex_unlock(&orphan_dir_inode->i_mutex); 797 mutex_unlock(&orphan_dir_inode->i_mutex);
799 brelse(orphan_dir_bh); 798 brelse(orphan_dir_bh);
@@ -889,7 +888,8 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
889 888
890 /* Do some basic inode verification... */ 889 /* Do some basic inode verification... */
891 di = (struct ocfs2_dinode *) di_bh->b_data; 890 di = (struct ocfs2_dinode *) di_bh->b_data;
892 if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) { 891 if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL)) &&
892 !(oi->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
893 /* 893 /*
894 * Inodes in the orphan dir must have ORPHANED_FL. The only 894 * Inodes in the orphan dir must have ORPHANED_FL. The only
895 * inodes that come back out of the orphan dir are reflink 895 * inodes that come back out of the orphan dir are reflink
@@ -972,7 +972,7 @@ static void ocfs2_cleanup_delete_inode(struct inode *inode,
972void ocfs2_delete_inode(struct inode *inode) 972void ocfs2_delete_inode(struct inode *inode)
973{ 973{
974 int wipe, status; 974 int wipe, status;
975 sigset_t blocked, oldset; 975 sigset_t oldset;
976 struct buffer_head *di_bh = NULL; 976 struct buffer_head *di_bh = NULL;
977 977
978 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); 978 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
@@ -999,13 +999,7 @@ void ocfs2_delete_inode(struct inode *inode)
999 * messaging paths may return us -ERESTARTSYS. Which would 999 * messaging paths may return us -ERESTARTSYS. Which would
1000 * cause us to exit early, resulting in inodes being orphaned 1000 * cause us to exit early, resulting in inodes being orphaned
1001 * forever. */ 1001 * forever. */
1002 sigfillset(&blocked); 1002 ocfs2_block_signals(&oldset);
1003 status = sigprocmask(SIG_BLOCK, &blocked, &oldset);
1004 if (status < 0) {
1005 mlog_errno(status);
1006 ocfs2_cleanup_delete_inode(inode, 1);
1007 goto bail;
1008 }
1009 1003
1010 /* 1004 /*
1011 * Synchronize us against ocfs2_get_dentry. We take this in 1005 * Synchronize us against ocfs2_get_dentry. We take this in
@@ -1079,9 +1073,7 @@ bail_unlock_nfs_sync:
1079 ocfs2_nfs_sync_unlock(OCFS2_SB(inode->i_sb), 0); 1073 ocfs2_nfs_sync_unlock(OCFS2_SB(inode->i_sb), 0);
1080 1074
1081bail_unblock: 1075bail_unblock:
1082 status = sigprocmask(SIG_SETMASK, &oldset, NULL); 1076 ocfs2_unblock_signals(&oldset);
1083 if (status < 0)
1084 mlog_errno(status);
1085bail: 1077bail:
1086 clear_inode(inode); 1078 clear_inode(inode);
1087 mlog_exit_void(); 1079 mlog_exit_void();
@@ -1115,6 +1107,10 @@ void ocfs2_clear_inode(struct inode *inode)
1115 ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres); 1107 ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres);
1116 ocfs2_mark_lockres_freeing(&oi->ip_open_lockres); 1108 ocfs2_mark_lockres_freeing(&oi->ip_open_lockres);
1117 1109
1110 ocfs2_resv_discard(&OCFS2_SB(inode->i_sb)->osb_la_resmap,
1111 &oi->ip_la_data_resv);
1112 ocfs2_resv_init_once(&oi->ip_la_data_resv);
1113
1118 /* We very well may get a clear_inode before all an inodes 1114 /* We very well may get a clear_inode before all an inodes
1119 * metadata has hit disk. Of course, we can't drop any cluster 1115 * metadata has hit disk. Of course, we can't drop any cluster
1120 * locks until the journal has finished with it. The only 1116 * locks until the journal has finished with it. The only
@@ -1290,13 +1286,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
1290 fe->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); 1286 fe->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
1291 fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); 1287 fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
1292 1288
1293 status = ocfs2_journal_dirty(handle, bh); 1289 ocfs2_journal_dirty(handle, bh);
1294 if (status < 0)
1295 mlog_errno(status);
1296
1297 status = 0;
1298leave: 1290leave:
1299
1300 mlog_exit(status); 1291 mlog_exit(status);
1301 return status; 1292 return status;
1302} 1293}
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index ba4fe07b293c..9f5f5fcadc45 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -70,6 +70,8 @@ struct ocfs2_inode_info
70 /* Only valid if the inode is the dir. */ 70 /* Only valid if the inode is the dir. */
71 u32 ip_last_used_slot; 71 u32 ip_last_used_slot;
72 u64 ip_last_used_group; 72 u64 ip_last_used_group;
73
74 struct ocfs2_alloc_reservation ip_la_data_resv;
73}; 75};
74 76
75/* 77/*
@@ -100,6 +102,8 @@ struct ocfs2_inode_info
100#define OCFS2_INODE_MAYBE_ORPHANED 0x00000020 102#define OCFS2_INODE_MAYBE_ORPHANED 0x00000020
101/* Does someone have the file open O_DIRECT */ 103/* Does someone have the file open O_DIRECT */
102#define OCFS2_INODE_OPEN_DIRECT 0x00000040 104#define OCFS2_INODE_OPEN_DIRECT 0x00000040
105/* Tell the inode wipe code it's not in orphan dir */
106#define OCFS2_INODE_SKIP_ORPHAN_DIR 0x00000080
103 107
104static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) 108static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
105{ 109{
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 9336c60e3a36..47878cf16418 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -402,9 +402,7 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
402} 402}
403 403
404/* 404/*
405 * 'nblocks' is what you want to add to the current 405 * 'nblocks' is what you want to add to the current transaction.
406 * transaction. extend_trans will either extend the current handle by
407 * nblocks, or commit it and start a new one with nblocks credits.
408 * 406 *
409 * This might call jbd2_journal_restart() which will commit dirty buffers 407 * This might call jbd2_journal_restart() which will commit dirty buffers
410 * and then restart the transaction. Before calling 408 * and then restart the transaction. Before calling
@@ -422,11 +420,15 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
422 */ 420 */
423int ocfs2_extend_trans(handle_t *handle, int nblocks) 421int ocfs2_extend_trans(handle_t *handle, int nblocks)
424{ 422{
425 int status; 423 int status, old_nblocks;
426 424
427 BUG_ON(!handle); 425 BUG_ON(!handle);
428 BUG_ON(!nblocks); 426 BUG_ON(nblocks < 0);
427
428 if (!nblocks)
429 return 0;
429 430
431 old_nblocks = handle->h_buffer_credits;
430 mlog_entry_void(); 432 mlog_entry_void();
431 433
432 mlog(0, "Trying to extend transaction by %d blocks\n", nblocks); 434 mlog(0, "Trying to extend transaction by %d blocks\n", nblocks);
@@ -445,7 +447,8 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
445 mlog(0, 447 mlog(0,
446 "jbd2_journal_extend failed, trying " 448 "jbd2_journal_extend failed, trying "
447 "jbd2_journal_restart\n"); 449 "jbd2_journal_restart\n");
448 status = jbd2_journal_restart(handle, nblocks); 450 status = jbd2_journal_restart(handle,
451 old_nblocks + nblocks);
449 if (status < 0) { 452 if (status < 0) {
450 mlog_errno(status); 453 mlog_errno(status);
451 goto bail; 454 goto bail;
@@ -734,8 +737,7 @@ int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
734 return __ocfs2_journal_access(handle, ci, bh, NULL, type); 737 return __ocfs2_journal_access(handle, ci, bh, NULL, type);
735} 738}
736 739
737int ocfs2_journal_dirty(handle_t *handle, 740void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
738 struct buffer_head *bh)
739{ 741{
740 int status; 742 int status;
741 743
@@ -743,13 +745,9 @@ int ocfs2_journal_dirty(handle_t *handle,
743 (unsigned long long)bh->b_blocknr); 745 (unsigned long long)bh->b_blocknr);
744 746
745 status = jbd2_journal_dirty_metadata(handle, bh); 747 status = jbd2_journal_dirty_metadata(handle, bh);
746 if (status < 0) 748 BUG_ON(status);
747 mlog(ML_ERROR, "Could not dirty metadata buffer. "
748 "(bh->b_blocknr=%llu)\n",
749 (unsigned long long)bh->b_blocknr);
750 749
751 mlog_exit(status); 750 mlog_exit_void();
752 return status;
753} 751}
754 752
755#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE) 753#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 3f74e09b0d80..b5baaa8e710f 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -325,8 +325,7 @@ int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
325 * <modify the bh> 325 * <modify the bh>
326 * ocfs2_journal_dirty(handle, bh); 326 * ocfs2_journal_dirty(handle, bh);
327 */ 327 */
328int ocfs2_journal_dirty(handle_t *handle, 328void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh);
329 struct buffer_head *bh);
330 329
331/* 330/*
332 * Credit Macros: 331 * Credit Macros:
@@ -562,6 +561,18 @@ static inline int ocfs2_calc_group_alloc_credits(struct super_block *sb,
562 return blocks; 561 return blocks;
563} 562}
564 563
564/*
565 * Allocating a discontiguous block group requires the credits from
566 * ocfs2_calc_group_alloc_credits() as well as enough credits to fill
567 * the group descriptor's extent list. The caller already has started
568 * the transaction with ocfs2_calc_group_alloc_credits(). They extend
569 * it with these credits.
570 */
571static inline int ocfs2_calc_bg_discontig_credits(struct super_block *sb)
572{
573 return ocfs2_extent_recs_per_gd(sb);
574}
575
565static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb, 576static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
566 unsigned int clusters_to_del, 577 unsigned int clusters_to_del,
567 struct ocfs2_dinode *fe, 578 struct ocfs2_dinode *fe,
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index c983715d8d8c..3d7419682dc0 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -52,7 +52,8 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
52 52
53static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 53static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
54 struct ocfs2_dinode *alloc, 54 struct ocfs2_dinode *alloc,
55 u32 numbits); 55 u32 *numbits,
56 struct ocfs2_alloc_reservation *resv);
56 57
57static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc); 58static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
58 59
@@ -74,6 +75,144 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
74static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 75static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
75 struct inode *local_alloc_inode); 76 struct inode *local_alloc_inode);
76 77
78/*
79 * ocfs2_la_default_mb() - determine a default size, in megabytes of
80 * the local alloc.
81 *
82 * Generally, we'd like to pick as large a local alloc as
83 * possible. Performance on large workloads tends to scale
84 * proportionally to la size. In addition to that, the reservations
85 * code functions more efficiently as it can reserve more windows for
86 * write.
87 *
88 * Some things work against us when trying to choose a large local alloc:
89 *
90 * - We need to ensure our sizing is picked to leave enough space in
91 * group descriptors for other allocations (such as block groups,
92 * etc). Picking default sizes which are a multiple of 4 could help
93 * - block groups are allocated in 2mb and 4mb chunks.
94 *
95 * - Likewise, we don't want to starve other nodes of bits on small
96 * file systems. This can easily be taken care of by limiting our
97 * default to a reasonable size (256M) on larger cluster sizes.
98 *
99 * - Some file systems can't support very large sizes - 4k and 8k in
100 * particular are limited to less than 128 and 256 megabytes respectively.
101 *
102 * The following reference table shows group descriptor and local
103 * alloc maximums at various cluster sizes (4k blocksize)
104 *
105 * csize: 4K group: 126M la: 121M
106 * csize: 8K group: 252M la: 243M
107 * csize: 16K group: 504M la: 486M
108 * csize: 32K group: 1008M la: 972M
109 * csize: 64K group: 2016M la: 1944M
110 * csize: 128K group: 4032M la: 3888M
111 * csize: 256K group: 8064M la: 7776M
112 * csize: 512K group: 16128M la: 15552M
113 * csize: 1024K group: 32256M la: 31104M
114 */
115#define OCFS2_LA_MAX_DEFAULT_MB 256
116#define OCFS2_LA_OLD_DEFAULT 8
117unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
118{
119 unsigned int la_mb;
120 unsigned int gd_mb;
121 unsigned int megs_per_slot;
122 struct super_block *sb = osb->sb;
123
124 gd_mb = ocfs2_clusters_to_megabytes(osb->sb,
125 8 * ocfs2_group_bitmap_size(sb, 0, osb->s_feature_incompat));
126
127 /*
128 * This takes care of files systems with very small group
129 * descriptors - 512 byte blocksize at cluster sizes lower
130 * than 16K and also 1k blocksize with 4k cluster size.
131 */
132 if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192)
133 || (sb->s_blocksize == 1024 && osb->s_clustersize == 4096))
134 return OCFS2_LA_OLD_DEFAULT;
135
136 /*
137 * Leave enough room for some block groups and make the final
138 * value we work from a multiple of 4.
139 */
140 gd_mb -= 16;
141 gd_mb &= 0xFFFFFFFB;
142
143 la_mb = gd_mb;
144
145 /*
146 * Keep window sizes down to a reasonable default
147 */
148 if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) {
149 /*
150 * Some clustersize / blocksize combinations will have
151 * given us a larger than OCFS2_LA_MAX_DEFAULT_MB
152 * default size, but get poor distribution when
153 * limited to exactly 256 megabytes.
154 *
155 * As an example, 16K clustersize at 4K blocksize
156 * gives us a cluster group size of 504M. Paring the
157 * local alloc size down to 256 however, would give us
158 * only one window and around 200MB left in the
159 * cluster group. Instead, find the first size below
160 * 256 which would give us an even distribution.
161 *
162 * Larger cluster group sizes actually work out pretty
163 * well when pared to 256, so we don't have to do this
164 * for any group that fits more than two
165 * OCFS2_LA_MAX_DEFAULT_MB windows.
166 */
167 if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB))
168 la_mb = 256;
169 else {
170 unsigned int gd_mult = gd_mb;
171
172 while (gd_mult > 256)
173 gd_mult = gd_mult >> 1;
174
175 la_mb = gd_mult;
176 }
177 }
178
179 megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots;
180 megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot);
181 /* Too many nodes, too few disk clusters. */
182 if (megs_per_slot < la_mb)
183 la_mb = megs_per_slot;
184
185 return la_mb;
186}
187
188void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb)
189{
190 struct super_block *sb = osb->sb;
191 unsigned int la_default_mb = ocfs2_la_default_mb(osb);
192 unsigned int la_max_mb;
193
194 la_max_mb = ocfs2_clusters_to_megabytes(sb,
195 ocfs2_local_alloc_size(sb) * 8);
196
197 mlog(0, "requested: %dM, max: %uM, default: %uM\n",
198 requested_mb, la_max_mb, la_default_mb);
199
200 if (requested_mb == -1) {
201 /* No user request - use defaults */
202 osb->local_alloc_default_bits =
203 ocfs2_megabytes_to_clusters(sb, la_default_mb);
204 } else if (requested_mb > la_max_mb) {
205 /* Request is too big, we give the maximum available */
206 osb->local_alloc_default_bits =
207 ocfs2_megabytes_to_clusters(sb, la_max_mb);
208 } else {
209 osb->local_alloc_default_bits =
210 ocfs2_megabytes_to_clusters(sb, requested_mb);
211 }
212
213 osb->local_alloc_bits = osb->local_alloc_default_bits;
214}
215
77static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) 216static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
78{ 217{
79 return (osb->local_alloc_state == OCFS2_LA_THROTTLED || 218 return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
@@ -156,7 +295,7 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
156 osb->local_alloc_bits, (osb->bitmap_cpg - 1)); 295 osb->local_alloc_bits, (osb->bitmap_cpg - 1));
157 osb->local_alloc_bits = 296 osb->local_alloc_bits =
158 ocfs2_megabytes_to_clusters(osb->sb, 297 ocfs2_megabytes_to_clusters(osb->sb,
159 OCFS2_DEFAULT_LOCAL_ALLOC_SIZE); 298 ocfs2_la_default_mb(osb));
160 } 299 }
161 300
162 /* read the alloc off disk */ 301 /* read the alloc off disk */
@@ -262,6 +401,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
262 401
263 osb->local_alloc_state = OCFS2_LA_DISABLED; 402 osb->local_alloc_state = OCFS2_LA_DISABLED;
264 403
404 ocfs2_resmap_uninit(&osb->osb_la_resmap);
405
265 main_bm_inode = ocfs2_get_system_file_inode(osb, 406 main_bm_inode = ocfs2_get_system_file_inode(osb,
266 GLOBAL_BITMAP_SYSTEM_INODE, 407 GLOBAL_BITMAP_SYSTEM_INODE,
267 OCFS2_INVALID_SLOT); 408 OCFS2_INVALID_SLOT);
@@ -305,12 +446,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
305 } 446 }
306 447
307 ocfs2_clear_local_alloc(alloc); 448 ocfs2_clear_local_alloc(alloc);
308 449 ocfs2_journal_dirty(handle, bh);
309 status = ocfs2_journal_dirty(handle, bh);
310 if (status < 0) {
311 mlog_errno(status);
312 goto out_commit;
313 }
314 450
315 brelse(bh); 451 brelse(bh);
316 osb->local_alloc_bh = NULL; 452 osb->local_alloc_bh = NULL;
@@ -481,46 +617,6 @@ out:
481 return status; 617 return status;
482} 618}
483 619
484/* Check to see if the local alloc window is within ac->ac_max_block */
485static int ocfs2_local_alloc_in_range(struct inode *inode,
486 struct ocfs2_alloc_context *ac,
487 u32 bits_wanted)
488{
489 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
490 struct ocfs2_dinode *alloc;
491 struct ocfs2_local_alloc *la;
492 int start;
493 u64 block_off;
494
495 if (!ac->ac_max_block)
496 return 1;
497
498 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
499 la = OCFS2_LOCAL_ALLOC(alloc);
500
501 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
502 if (start == -1) {
503 mlog_errno(-ENOSPC);
504 return 0;
505 }
506
507 /*
508 * Converting (bm_off + start + bits_wanted) to blocks gives us
509 * the blkno just past our actual allocation. This is perfect
510 * to compare with ac_max_block.
511 */
512 block_off = ocfs2_clusters_to_blocks(inode->i_sb,
513 le32_to_cpu(la->la_bm_off) +
514 start + bits_wanted);
515 mlog(0, "Checking %llu against %llu\n",
516 (unsigned long long)block_off,
517 (unsigned long long)ac->ac_max_block);
518 if (block_off > ac->ac_max_block)
519 return 0;
520
521 return 1;
522}
523
524/* 620/*
525 * make sure we've got at least bits_wanted contiguous bits in the 621 * make sure we've got at least bits_wanted contiguous bits in the
526 * local alloc. You lose them when you drop i_mutex. 622 * local alloc. You lose them when you drop i_mutex.
@@ -613,17 +709,6 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
613 mlog(0, "Calling in_range for max block %llu\n", 709 mlog(0, "Calling in_range for max block %llu\n",
614 (unsigned long long)ac->ac_max_block); 710 (unsigned long long)ac->ac_max_block);
615 711
616 if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
617 bits_wanted)) {
618 /*
619 * The window is outside ac->ac_max_block.
620 * This errno tells the caller to keep localalloc enabled
621 * but to get the allocation from the main bitmap.
622 */
623 status = -EFBIG;
624 goto bail;
625 }
626
627 ac->ac_inode = local_alloc_inode; 712 ac->ac_inode = local_alloc_inode;
628 /* We should never use localalloc from another slot */ 713 /* We should never use localalloc from another slot */
629 ac->ac_alloc_slot = osb->slot_num; 714 ac->ac_alloc_slot = osb->slot_num;
@@ -664,7 +749,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
664 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 749 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
665 la = OCFS2_LOCAL_ALLOC(alloc); 750 la = OCFS2_LOCAL_ALLOC(alloc);
666 751
667 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); 752 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted,
753 ac->ac_resv);
668 if (start == -1) { 754 if (start == -1) {
669 /* TODO: Shouldn't we just BUG here? */ 755 /* TODO: Shouldn't we just BUG here? */
670 status = -ENOSPC; 756 status = -ENOSPC;
@@ -674,8 +760,6 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
674 760
675 bitmap = la->la_bitmap; 761 bitmap = la->la_bitmap;
676 *bit_off = le32_to_cpu(la->la_bm_off) + start; 762 *bit_off = le32_to_cpu(la->la_bm_off) + start;
677 /* local alloc is always contiguous by nature -- we never
678 * delete bits from it! */
679 *num_bits = bits_wanted; 763 *num_bits = bits_wanted;
680 764
681 status = ocfs2_journal_access_di(handle, 765 status = ocfs2_journal_access_di(handle,
@@ -687,18 +771,15 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
687 goto bail; 771 goto bail;
688 } 772 }
689 773
774 ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start,
775 bits_wanted);
776
690 while(bits_wanted--) 777 while(bits_wanted--)
691 ocfs2_set_bit(start++, bitmap); 778 ocfs2_set_bit(start++, bitmap);
692 779
693 le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits); 780 le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits);
781 ocfs2_journal_dirty(handle, osb->local_alloc_bh);
694 782
695 status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
696 if (status < 0) {
697 mlog_errno(status);
698 goto bail;
699 }
700
701 status = 0;
702bail: 783bail:
703 mlog_exit(status); 784 mlog_exit(status);
704 return status; 785 return status;
@@ -722,13 +803,17 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
722} 803}
723 804
724static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 805static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
725 struct ocfs2_dinode *alloc, 806 struct ocfs2_dinode *alloc,
726 u32 numbits) 807 u32 *numbits,
808 struct ocfs2_alloc_reservation *resv)
727{ 809{
728 int numfound, bitoff, left, startoff, lastzero; 810 int numfound, bitoff, left, startoff, lastzero;
811 int local_resv = 0;
812 struct ocfs2_alloc_reservation r;
729 void *bitmap = NULL; 813 void *bitmap = NULL;
814 struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap;
730 815
731 mlog_entry("(numbits wanted = %u)\n", numbits); 816 mlog_entry("(numbits wanted = %u)\n", *numbits);
732 817
733 if (!alloc->id1.bitmap1.i_total) { 818 if (!alloc->id1.bitmap1.i_total) {
734 mlog(0, "No bits in my window!\n"); 819 mlog(0, "No bits in my window!\n");
@@ -736,6 +821,30 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
736 goto bail; 821 goto bail;
737 } 822 }
738 823
824 if (!resv) {
825 local_resv = 1;
826 ocfs2_resv_init_once(&r);
827 ocfs2_resv_set_type(&r, OCFS2_RESV_FLAG_TMP);
828 resv = &r;
829 }
830
831 numfound = *numbits;
832 if (ocfs2_resmap_resv_bits(resmap, resv, &bitoff, &numfound) == 0) {
833 if (numfound < *numbits)
834 *numbits = numfound;
835 goto bail;
836 }
837
838 /*
839 * Code error. While reservations are enabled, local
840 * allocation should _always_ go through them.
841 */
842 BUG_ON(osb->osb_resv_level != 0);
843
844 /*
845 * Reservations are disabled. Handle this the old way.
846 */
847
739 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap; 848 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
740 849
741 numfound = bitoff = startoff = 0; 850 numfound = bitoff = startoff = 0;
@@ -761,7 +870,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
761 startoff = bitoff+1; 870 startoff = bitoff+1;
762 } 871 }
763 /* we got everything we needed */ 872 /* we got everything we needed */
764 if (numfound == numbits) { 873 if (numfound == *numbits) {
765 /* mlog(0, "Found it all!\n"); */ 874 /* mlog(0, "Found it all!\n"); */
766 break; 875 break;
767 } 876 }
@@ -770,12 +879,15 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
770 mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff, 879 mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
771 numfound); 880 numfound);
772 881
773 if (numfound == numbits) 882 if (numfound == *numbits)
774 bitoff = startoff - numfound; 883 bitoff = startoff - numfound;
775 else 884 else
776 bitoff = -1; 885 bitoff = -1;
777 886
778bail: 887bail:
888 if (local_resv)
889 ocfs2_resv_discard(resmap, resv);
890
779 mlog_exit(bitoff); 891 mlog_exit(bitoff);
780 return bitoff; 892 return bitoff;
781} 893}
@@ -1049,7 +1161,7 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
1049 /* we used the generic suballoc reserve function, but we set 1161 /* we used the generic suballoc reserve function, but we set
1050 * everything up nicely, so there's no reason why we can't use 1162 * everything up nicely, so there's no reason why we can't use
1051 * the more specific cluster api to claim bits. */ 1163 * the more specific cluster api to claim bits. */
1052 status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits, 1164 status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits,
1053 &cluster_off, &cluster_count); 1165 &cluster_off, &cluster_count);
1054 if (status == -ENOSPC) { 1166 if (status == -ENOSPC) {
1055retry_enospc: 1167retry_enospc:
@@ -1063,7 +1175,7 @@ retry_enospc:
1063 goto bail; 1175 goto bail;
1064 1176
1065 ac->ac_bits_wanted = osb->local_alloc_default_bits; 1177 ac->ac_bits_wanted = osb->local_alloc_default_bits;
1066 status = ocfs2_claim_clusters(osb, handle, ac, 1178 status = ocfs2_claim_clusters(handle, ac,
1067 osb->local_alloc_bits, 1179 osb->local_alloc_bits,
1068 &cluster_off, 1180 &cluster_off,
1069 &cluster_count); 1181 &cluster_count);
@@ -1098,6 +1210,9 @@ retry_enospc:
1098 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0, 1210 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
1099 le16_to_cpu(la->la_size)); 1211 le16_to_cpu(la->la_size));
1100 1212
1213 ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count,
1214 OCFS2_LOCAL_ALLOC(alloc)->la_bitmap);
1215
1101 mlog(0, "New window allocated:\n"); 1216 mlog(0, "New window allocated:\n");
1102 mlog(0, "window la_bm_off = %u\n", 1217 mlog(0, "window la_bm_off = %u\n",
1103 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); 1218 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
@@ -1169,12 +1284,7 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
1169 } 1284 }
1170 1285
1171 ocfs2_clear_local_alloc(alloc); 1286 ocfs2_clear_local_alloc(alloc);
1172 1287 ocfs2_journal_dirty(handle, osb->local_alloc_bh);
1173 status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
1174 if (status < 0) {
1175 mlog_errno(status);
1176 goto bail;
1177 }
1178 1288
1179 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 1289 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
1180 main_bm_inode, main_bm_bh); 1290 main_bm_inode, main_bm_bh);
@@ -1192,7 +1302,6 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
1192 1302
1193 atomic_inc(&osb->alloc_stats.moves); 1303 atomic_inc(&osb->alloc_stats.moves);
1194 1304
1195 status = 0;
1196bail: 1305bail:
1197 if (handle) 1306 if (handle)
1198 ocfs2_commit_trans(osb, handle); 1307 ocfs2_commit_trans(osb, handle);
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h
index ac5ea9f86653..1be9b5864460 100644
--- a/fs/ocfs2/localalloc.h
+++ b/fs/ocfs2/localalloc.h
@@ -30,6 +30,9 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb);
30 30
31void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb); 31void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb);
32 32
33void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb);
34unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb);
35
33int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, 36int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
34 int node_num, 37 int node_num,
35 struct ocfs2_dinode **alloc_copy); 38 struct ocfs2_dinode **alloc_copy);
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 7898bd3a99f5..af2b8fe1f139 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -41,44 +41,20 @@
41#include "file.h" 41#include "file.h"
42#include "inode.h" 42#include "inode.h"
43#include "mmap.h" 43#include "mmap.h"
44#include "super.h"
44 45
45static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset)
46{
47 /* The best way to deal with signals in the vm path is
48 * to block them upfront, rather than allowing the
49 * locking paths to return -ERESTARTSYS. */
50 sigfillset(blocked);
51
52 /* We should technically never get a bad return value
53 * from sigprocmask */
54 return sigprocmask(SIG_BLOCK, blocked, oldset);
55}
56
57static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset)
58{
59 return sigprocmask(SIG_SETMASK, oldset, NULL);
60}
61 46
62static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) 47static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
63{ 48{
64 sigset_t blocked, oldset; 49 sigset_t oldset;
65 int error, ret; 50 int ret;
66 51
67 mlog_entry("(area=%p, page offset=%lu)\n", area, vmf->pgoff); 52 mlog_entry("(area=%p, page offset=%lu)\n", area, vmf->pgoff);
68 53
69 error = ocfs2_vm_op_block_sigs(&blocked, &oldset); 54 ocfs2_block_signals(&oldset);
70 if (error < 0) {
71 mlog_errno(error);
72 ret = VM_FAULT_SIGBUS;
73 goto out;
74 }
75
76 ret = filemap_fault(area, vmf); 55 ret = filemap_fault(area, vmf);
56 ocfs2_unblock_signals(&oldset);
77 57
78 error = ocfs2_vm_op_unblock_sigs(&oldset);
79 if (error < 0)
80 mlog_errno(error);
81out:
82 mlog_exit_ptr(vmf->page); 58 mlog_exit_ptr(vmf->page);
83 return ret; 59 return ret;
84} 60}
@@ -158,14 +134,10 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
158 struct page *page = vmf->page; 134 struct page *page = vmf->page;
159 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 135 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
160 struct buffer_head *di_bh = NULL; 136 struct buffer_head *di_bh = NULL;
161 sigset_t blocked, oldset; 137 sigset_t oldset;
162 int ret, ret2; 138 int ret;
163 139
164 ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); 140 ocfs2_block_signals(&oldset);
165 if (ret < 0) {
166 mlog_errno(ret);
167 return ret;
168 }
169 141
170 /* 142 /*
171 * The cluster locks taken will block a truncate from another 143 * The cluster locks taken will block a truncate from another
@@ -193,9 +165,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
193 ocfs2_inode_unlock(inode, 1); 165 ocfs2_inode_unlock(inode, 1);
194 166
195out: 167out:
196 ret2 = ocfs2_vm_op_unblock_sigs(&oldset); 168 ocfs2_unblock_signals(&oldset);
197 if (ret2 < 0)
198 mlog_errno(ret2);
199 if (ret) 169 if (ret)
200 ret = VM_FAULT_SIGBUS; 170 ret = VM_FAULT_SIGBUS;
201 return ret; 171 return ret;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b1eb50ae4097..db5dd3ed4df4 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -239,6 +239,8 @@ static int ocfs2_mknod(struct inode *dir,
239 }; 239 };
240 int did_quota_inode = 0; 240 int did_quota_inode = 0;
241 struct ocfs2_dir_lookup_result lookup = { NULL, }; 241 struct ocfs2_dir_lookup_result lookup = { NULL, };
242 sigset_t oldset;
243 int did_block_signals = 0;
242 244
243 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, 245 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
244 (unsigned long)dev, dentry->d_name.len, 246 (unsigned long)dev, dentry->d_name.len,
@@ -350,6 +352,10 @@ static int ocfs2_mknod(struct inode *dir,
350 goto leave; 352 goto leave;
351 } 353 }
352 354
355 /* Starting to change things, restart is no longer possible. */
356 ocfs2_block_signals(&oldset);
357 did_block_signals = 1;
358
353 status = dquot_alloc_inode(inode); 359 status = dquot_alloc_inode(inode);
354 if (status) 360 if (status)
355 goto leave; 361 goto leave;
@@ -384,11 +390,7 @@ static int ocfs2_mknod(struct inode *dir,
384 goto leave; 390 goto leave;
385 } 391 }
386 ocfs2_add_links_count(dirfe, 1); 392 ocfs2_add_links_count(dirfe, 1);
387 status = ocfs2_journal_dirty(handle, parent_fe_bh); 393 ocfs2_journal_dirty(handle, parent_fe_bh);
388 if (status < 0) {
389 mlog_errno(status);
390 goto leave;
391 }
392 inc_nlink(dir); 394 inc_nlink(dir);
393 } 395 }
394 396
@@ -408,23 +410,28 @@ static int ocfs2_mknod(struct inode *dir,
408 } 410 }
409 } 411 }
410 412
411 status = ocfs2_add_entry(handle, dentry, inode, 413 /*
412 OCFS2_I(inode)->ip_blkno, parent_fe_bh, 414 * Do this before adding the entry to the directory. We add
413 &lookup); 415 * also set d_op after success so that ->d_iput() will cleanup
414 if (status < 0) { 416 * the dentry lock even if ocfs2_add_entry() fails below.
417 */
418 status = ocfs2_dentry_attach_lock(dentry, inode,
419 OCFS2_I(dir)->ip_blkno);
420 if (status) {
415 mlog_errno(status); 421 mlog_errno(status);
416 goto leave; 422 goto leave;
417 } 423 }
424 dentry->d_op = &ocfs2_dentry_ops;
418 425
419 status = ocfs2_dentry_attach_lock(dentry, inode, 426 status = ocfs2_add_entry(handle, dentry, inode,
420 OCFS2_I(dir)->ip_blkno); 427 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
421 if (status) { 428 &lookup);
429 if (status < 0) {
422 mlog_errno(status); 430 mlog_errno(status);
423 goto leave; 431 goto leave;
424 } 432 }
425 433
426 insert_inode_hash(inode); 434 insert_inode_hash(inode);
427 dentry->d_op = &ocfs2_dentry_ops;
428 d_instantiate(dentry, inode); 435 d_instantiate(dentry, inode);
429 status = 0; 436 status = 0;
430leave: 437leave:
@@ -434,6 +441,8 @@ leave:
434 ocfs2_commit_trans(osb, handle); 441 ocfs2_commit_trans(osb, handle);
435 442
436 ocfs2_inode_unlock(dir, 1); 443 ocfs2_inode_unlock(dir, 1);
444 if (did_block_signals)
445 ocfs2_unblock_signals(&oldset);
437 446
438 if (status == -ENOSPC) 447 if (status == -ENOSPC)
439 mlog(0, "Disk is full\n"); 448 mlog(0, "Disk is full\n");
@@ -445,11 +454,6 @@ leave:
445 454
446 ocfs2_free_dir_lookup_result(&lookup); 455 ocfs2_free_dir_lookup_result(&lookup);
447 456
448 if ((status < 0) && inode) {
449 clear_nlink(inode);
450 iput(inode);
451 }
452
453 if (inode_ac) 457 if (inode_ac)
454 ocfs2_free_alloc_context(inode_ac); 458 ocfs2_free_alloc_context(inode_ac);
455 459
@@ -459,6 +463,17 @@ leave:
459 if (meta_ac) 463 if (meta_ac)
460 ocfs2_free_alloc_context(meta_ac); 464 ocfs2_free_alloc_context(meta_ac);
461 465
466 /*
467 * We should call iput after the i_mutex of the bitmap been
468 * unlocked in ocfs2_free_alloc_context, or the
469 * ocfs2_delete_inode will mutex_lock again.
470 */
471 if ((status < 0) && inode) {
472 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
473 clear_nlink(inode);
474 iput(inode);
475 }
476
462 mlog_exit(status); 477 mlog_exit(status);
463 478
464 return status; 479 return status;
@@ -476,14 +491,15 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
476 int status = 0; 491 int status = 0;
477 struct ocfs2_dinode *fe = NULL; 492 struct ocfs2_dinode *fe = NULL;
478 struct ocfs2_extent_list *fel; 493 struct ocfs2_extent_list *fel;
479 u64 fe_blkno = 0; 494 u64 suballoc_loc, fe_blkno = 0;
480 u16 suballoc_bit; 495 u16 suballoc_bit;
481 u16 feat; 496 u16 feat;
482 497
483 *new_fe_bh = NULL; 498 *new_fe_bh = NULL;
484 499
485 status = ocfs2_claim_new_inode(osb, handle, dir, parent_fe_bh, 500 status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh,
486 inode_ac, &suballoc_bit, &fe_blkno); 501 inode_ac, &suballoc_loc,
502 &suballoc_bit, &fe_blkno);
487 if (status < 0) { 503 if (status < 0) {
488 mlog_errno(status); 504 mlog_errno(status);
489 goto leave; 505 goto leave;
@@ -520,6 +536,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
520 fe->i_generation = cpu_to_le32(inode->i_generation); 536 fe->i_generation = cpu_to_le32(inode->i_generation);
521 fe->i_fs_generation = cpu_to_le32(osb->fs_generation); 537 fe->i_fs_generation = cpu_to_le32(osb->fs_generation);
522 fe->i_blkno = cpu_to_le64(fe_blkno); 538 fe->i_blkno = cpu_to_le64(fe_blkno);
539 fe->i_suballoc_loc = cpu_to_le64(suballoc_loc);
523 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); 540 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
524 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot); 541 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
525 fe->i_uid = cpu_to_le32(inode->i_uid); 542 fe->i_uid = cpu_to_le32(inode->i_uid);
@@ -556,11 +573,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
556 fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb)); 573 fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb));
557 } 574 }
558 575
559 status = ocfs2_journal_dirty(handle, *new_fe_bh); 576 ocfs2_journal_dirty(handle, *new_fe_bh);
560 if (status < 0) {
561 mlog_errno(status);
562 goto leave;
563 }
564 577
565 ocfs2_populate_inode(inode, fe, 1); 578 ocfs2_populate_inode(inode, fe, 1);
566 ocfs2_ci_set_new(osb, INODE_CACHE(inode)); 579 ocfs2_ci_set_new(osb, INODE_CACHE(inode));
@@ -626,6 +639,7 @@ static int ocfs2_link(struct dentry *old_dentry,
626 struct ocfs2_dinode *fe = NULL; 639 struct ocfs2_dinode *fe = NULL;
627 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 640 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
628 struct ocfs2_dir_lookup_result lookup = { NULL, }; 641 struct ocfs2_dir_lookup_result lookup = { NULL, };
642 sigset_t oldset;
629 643
630 mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino, 644 mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino,
631 old_dentry->d_name.len, old_dentry->d_name.name, 645 old_dentry->d_name.len, old_dentry->d_name.name,
@@ -682,6 +696,9 @@ static int ocfs2_link(struct dentry *old_dentry,
682 goto out_unlock_inode; 696 goto out_unlock_inode;
683 } 697 }
684 698
699 /* Starting to change things, restart is no longer possible. */
700 ocfs2_block_signals(&oldset);
701
685 err = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh, 702 err = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
686 OCFS2_JOURNAL_ACCESS_WRITE); 703 OCFS2_JOURNAL_ACCESS_WRITE);
687 if (err < 0) { 704 if (err < 0) {
@@ -694,14 +711,7 @@ static int ocfs2_link(struct dentry *old_dentry,
694 ocfs2_set_links_count(fe, inode->i_nlink); 711 ocfs2_set_links_count(fe, inode->i_nlink);
695 fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 712 fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
696 fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 713 fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
697 714 ocfs2_journal_dirty(handle, fe_bh);
698 err = ocfs2_journal_dirty(handle, fe_bh);
699 if (err < 0) {
700 ocfs2_add_links_count(fe, -1);
701 drop_nlink(inode);
702 mlog_errno(err);
703 goto out_commit;
704 }
705 715
706 err = ocfs2_add_entry(handle, dentry, inode, 716 err = ocfs2_add_entry(handle, dentry, inode,
707 OCFS2_I(inode)->ip_blkno, 717 OCFS2_I(inode)->ip_blkno,
@@ -725,6 +735,7 @@ static int ocfs2_link(struct dentry *old_dentry,
725 735
726out_commit: 736out_commit:
727 ocfs2_commit_trans(osb, handle); 737 ocfs2_commit_trans(osb, handle);
738 ocfs2_unblock_signals(&oldset);
728out_unlock_inode: 739out_unlock_inode:
729 ocfs2_inode_unlock(inode, 1); 740 ocfs2_inode_unlock(inode, 1);
730 741
@@ -898,12 +909,7 @@ static int ocfs2_unlink(struct inode *dir,
898 drop_nlink(inode); 909 drop_nlink(inode);
899 drop_nlink(inode); 910 drop_nlink(inode);
900 ocfs2_set_links_count(fe, inode->i_nlink); 911 ocfs2_set_links_count(fe, inode->i_nlink);
901 912 ocfs2_journal_dirty(handle, fe_bh);
902 status = ocfs2_journal_dirty(handle, fe_bh);
903 if (status < 0) {
904 mlog_errno(status);
905 goto leave;
906 }
907 913
908 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 914 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
909 if (S_ISDIR(inode->i_mode)) 915 if (S_ISDIR(inode->i_mode))
@@ -1321,12 +1327,7 @@ static int ocfs2_rename(struct inode *old_dir,
1321 ocfs2_set_links_count(newfe, 0); 1327 ocfs2_set_links_count(newfe, 0);
1322 else 1328 else
1323 ocfs2_add_links_count(newfe, -1); 1329 ocfs2_add_links_count(newfe, -1);
1324 1330 ocfs2_journal_dirty(handle, newfe_bh);
1325 status = ocfs2_journal_dirty(handle, newfe_bh);
1326 if (status < 0) {
1327 mlog_errno(status);
1328 goto bail;
1329 }
1330 } else { 1331 } else {
1331 /* if the name was not found in new_dir, add it now */ 1332 /* if the name was not found in new_dir, add it now */
1332 status = ocfs2_add_entry(handle, new_dentry, old_inode, 1333 status = ocfs2_add_entry(handle, new_dentry, old_inode,
@@ -1345,10 +1346,7 @@ static int ocfs2_rename(struct inode *old_dir,
1345 1346
1346 old_di->i_ctime = cpu_to_le64(old_inode->i_ctime.tv_sec); 1347 old_di->i_ctime = cpu_to_le64(old_inode->i_ctime.tv_sec);
1347 old_di->i_ctime_nsec = cpu_to_le32(old_inode->i_ctime.tv_nsec); 1348 old_di->i_ctime_nsec = cpu_to_le32(old_inode->i_ctime.tv_nsec);
1348 1349 ocfs2_journal_dirty(handle, old_inode_bh);
1349 status = ocfs2_journal_dirty(handle, old_inode_bh);
1350 if (status < 0)
1351 mlog_errno(status);
1352 } else 1350 } else
1353 mlog_errno(status); 1351 mlog_errno(status);
1354 1352
@@ -1420,7 +1418,7 @@ static int ocfs2_rename(struct inode *old_dir,
1420 OCFS2_JOURNAL_ACCESS_WRITE); 1418 OCFS2_JOURNAL_ACCESS_WRITE);
1421 fe = (struct ocfs2_dinode *) old_dir_bh->b_data; 1419 fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
1422 ocfs2_set_links_count(fe, old_dir->i_nlink); 1420 ocfs2_set_links_count(fe, old_dir->i_nlink);
1423 status = ocfs2_journal_dirty(handle, old_dir_bh); 1421 ocfs2_journal_dirty(handle, old_dir_bh);
1424 } 1422 }
1425 } 1423 }
1426 ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir); 1424 ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir);
@@ -1552,11 +1550,7 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
1552 (bytes_left > sb->s_blocksize) ? sb->s_blocksize : 1550 (bytes_left > sb->s_blocksize) ? sb->s_blocksize :
1553 bytes_left); 1551 bytes_left);
1554 1552
1555 status = ocfs2_journal_dirty(handle, bhs[virtual]); 1553 ocfs2_journal_dirty(handle, bhs[virtual]);
1556 if (status < 0) {
1557 mlog_errno(status);
1558 goto bail;
1559 }
1560 1554
1561 virtual++; 1555 virtual++;
1562 p_blkno++; 1556 p_blkno++;
@@ -1600,6 +1594,8 @@ static int ocfs2_symlink(struct inode *dir,
1600 }; 1594 };
1601 int did_quota = 0, did_quota_inode = 0; 1595 int did_quota = 0, did_quota_inode = 0;
1602 struct ocfs2_dir_lookup_result lookup = { NULL, }; 1596 struct ocfs2_dir_lookup_result lookup = { NULL, };
1597 sigset_t oldset;
1598 int did_block_signals = 0;
1603 1599
1604 mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir, 1600 mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
1605 dentry, symname, dentry->d_name.len, dentry->d_name.name); 1601 dentry, symname, dentry->d_name.len, dentry->d_name.name);
@@ -1695,6 +1691,10 @@ static int ocfs2_symlink(struct inode *dir,
1695 goto bail; 1691 goto bail;
1696 } 1692 }
1697 1693
1694 /* Starting to change things, restart is no longer possible. */
1695 ocfs2_block_signals(&oldset);
1696 did_block_signals = 1;
1697
1698 status = dquot_alloc_inode(inode); 1698 status = dquot_alloc_inode(inode);
1699 if (status) 1699 if (status)
1700 goto bail; 1700 goto bail;
@@ -1771,22 +1771,27 @@ static int ocfs2_symlink(struct inode *dir,
1771 } 1771 }
1772 } 1772 }
1773 1773
1774 status = ocfs2_add_entry(handle, dentry, inode, 1774 /*
1775 le64_to_cpu(fe->i_blkno), parent_fe_bh, 1775 * Do this before adding the entry to the directory. We add
1776 &lookup); 1776 * also set d_op after success so that ->d_iput() will cleanup
1777 if (status < 0) { 1777 * the dentry lock even if ocfs2_add_entry() fails below.
1778 */
1779 status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
1780 if (status) {
1778 mlog_errno(status); 1781 mlog_errno(status);
1779 goto bail; 1782 goto bail;
1780 } 1783 }
1784 dentry->d_op = &ocfs2_dentry_ops;
1781 1785
1782 status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno); 1786 status = ocfs2_add_entry(handle, dentry, inode,
1783 if (status) { 1787 le64_to_cpu(fe->i_blkno), parent_fe_bh,
1788 &lookup);
1789 if (status < 0) {
1784 mlog_errno(status); 1790 mlog_errno(status);
1785 goto bail; 1791 goto bail;
1786 } 1792 }
1787 1793
1788 insert_inode_hash(inode); 1794 insert_inode_hash(inode);
1789 dentry->d_op = &ocfs2_dentry_ops;
1790 d_instantiate(dentry, inode); 1795 d_instantiate(dentry, inode);
1791bail: 1796bail:
1792 if (status < 0 && did_quota) 1797 if (status < 0 && did_quota)
@@ -1798,6 +1803,8 @@ bail:
1798 ocfs2_commit_trans(osb, handle); 1803 ocfs2_commit_trans(osb, handle);
1799 1804
1800 ocfs2_inode_unlock(dir, 1); 1805 ocfs2_inode_unlock(dir, 1);
1806 if (did_block_signals)
1807 ocfs2_unblock_signals(&oldset);
1801 1808
1802 brelse(new_fe_bh); 1809 brelse(new_fe_bh);
1803 brelse(parent_fe_bh); 1810 brelse(parent_fe_bh);
@@ -1811,6 +1818,7 @@ bail:
1811 if (xattr_ac) 1818 if (xattr_ac)
1812 ocfs2_free_alloc_context(xattr_ac); 1819 ocfs2_free_alloc_context(xattr_ac);
1813 if ((status < 0) && inode) { 1820 if ((status < 0) && inode) {
1821 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
1814 clear_nlink(inode); 1822 clear_nlink(inode);
1815 iput(inode); 1823 iput(inode);
1816 } 1824 }
@@ -1944,12 +1952,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
1944 if (S_ISDIR(inode->i_mode)) 1952 if (S_ISDIR(inode->i_mode))
1945 ocfs2_add_links_count(orphan_fe, 1); 1953 ocfs2_add_links_count(orphan_fe, 1);
1946 orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe); 1954 orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe);
1947 1955 ocfs2_journal_dirty(handle, orphan_dir_bh);
1948 status = ocfs2_journal_dirty(handle, orphan_dir_bh);
1949 if (status < 0) {
1950 mlog_errno(status);
1951 goto leave;
1952 }
1953 1956
1954 status = __ocfs2_add_entry(handle, orphan_dir_inode, name, 1957 status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
1955 OCFS2_ORPHAN_NAMELEN, inode, 1958 OCFS2_ORPHAN_NAMELEN, inode,
@@ -1976,6 +1979,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
1976 } 1979 }
1977 1980
1978 le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL); 1981 le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL);
1982 OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR;
1979 1983
1980 /* Record which orphan dir our inode now resides 1984 /* Record which orphan dir our inode now resides
1981 * in. delete_inode will use this to determine which orphan 1985 * in. delete_inode will use this to determine which orphan
@@ -2047,12 +2051,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
2047 if (S_ISDIR(inode->i_mode)) 2051 if (S_ISDIR(inode->i_mode))
2048 ocfs2_add_links_count(orphan_fe, -1); 2052 ocfs2_add_links_count(orphan_fe, -1);
2049 orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe); 2053 orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe);
2050 2054 ocfs2_journal_dirty(handle, orphan_dir_bh);
2051 status = ocfs2_journal_dirty(handle, orphan_dir_bh);
2052 if (status < 0) {
2053 mlog_errno(status);
2054 goto leave;
2055 }
2056 2055
2057leave: 2056leave:
2058 ocfs2_free_dir_lookup_result(&lookup); 2057 ocfs2_free_dir_lookup_result(&lookup);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index adf5e2ebc2c4..c67003b6b5a2 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -47,6 +47,7 @@
47/* For struct ocfs2_blockcheck_stats */ 47/* For struct ocfs2_blockcheck_stats */
48#include "blockcheck.h" 48#include "blockcheck.h"
49 49
50#include "reservations.h"
50 51
51/* Caching of metadata buffers */ 52/* Caching of metadata buffers */
52 53
@@ -341,6 +342,9 @@ struct ocfs2_super
341 */ 342 */
342 unsigned int local_alloc_bits; 343 unsigned int local_alloc_bits;
343 unsigned int local_alloc_default_bits; 344 unsigned int local_alloc_default_bits;
345 /* osb_clusters_at_boot can become stale! Do not trust it to
346 * be up to date. */
347 unsigned int osb_clusters_at_boot;
344 348
345 enum ocfs2_local_alloc_state local_alloc_state; /* protected 349 enum ocfs2_local_alloc_state local_alloc_state; /* protected
346 * by osb_lock */ 350 * by osb_lock */
@@ -349,6 +353,11 @@ struct ocfs2_super
349 353
350 u64 la_last_gd; 354 u64 la_last_gd;
351 355
356 struct ocfs2_reservation_map osb_la_resmap;
357
358 unsigned int osb_resv_level;
359 unsigned int osb_dir_resv_level;
360
352 /* Next three fields are for local node slot recovery during 361 /* Next three fields are for local node slot recovery during
353 * mount. */ 362 * mount. */
354 int dirty; 363 int dirty;
@@ -482,6 +491,13 @@ static inline int ocfs2_supports_indexed_dirs(struct ocfs2_super *osb)
482 return 0; 491 return 0;
483} 492}
484 493
494static inline int ocfs2_supports_discontig_bg(struct ocfs2_super *osb)
495{
496 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG)
497 return 1;
498 return 0;
499}
500
485static inline unsigned int ocfs2_link_max(struct ocfs2_super *osb) 501static inline unsigned int ocfs2_link_max(struct ocfs2_super *osb)
486{ 502{
487 if (ocfs2_supports_indexed_dirs(osb)) 503 if (ocfs2_supports_indexed_dirs(osb))
@@ -763,6 +779,12 @@ static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb,
763 return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits); 779 return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits);
764} 780}
765 781
782static inline unsigned int ocfs2_clusters_to_megabytes(struct super_block *sb,
783 unsigned int clusters)
784{
785 return clusters >> (20 - OCFS2_SB(sb)->s_clustersize_bits);
786}
787
766static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap) 788static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap)
767{ 789{
768 ext2_set_bit(bit, bitmap); 790 ext2_set_bit(bit, bitmap);
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index bb37218a7978..33f1c9a8258d 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -100,7 +100,8 @@
100 | OCFS2_FEATURE_INCOMPAT_XATTR \ 100 | OCFS2_FEATURE_INCOMPAT_XATTR \
101 | OCFS2_FEATURE_INCOMPAT_META_ECC \ 101 | OCFS2_FEATURE_INCOMPAT_META_ECC \
102 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ 102 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \
103 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE) 103 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \
104 | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG)
104#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ 105#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
105 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ 106 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
106 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) 107 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
@@ -165,6 +166,9 @@
165/* Refcount tree support */ 166/* Refcount tree support */
166#define OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE 0x1000 167#define OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE 0x1000
167 168
169/* Discontigous block groups */
170#define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000
171
168/* 172/*
169 * backup superblock flag is used to indicate that this volume 173 * backup superblock flag is used to indicate that this volume
170 * has backup superblocks. 174 * has backup superblocks.
@@ -283,14 +287,6 @@
283#define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) 287#define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024)
284 288
285/* 289/*
286 * Default local alloc size (in megabytes)
287 *
288 * The value chosen should be such that most allocations, including new
289 * block groups, use local alloc.
290 */
291#define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8
292
293/*
294 * Inline extended attribute size (in bytes) 290 * Inline extended attribute size (in bytes)
295 * The value chosen should be aligned to 16 byte boundaries. 291 * The value chosen should be aligned to 16 byte boundaries.
296 */ 292 */
@@ -512,7 +508,10 @@ struct ocfs2_extent_block
512 block group */ 508 block group */
513 __le32 h_fs_generation; /* Must match super block */ 509 __le32 h_fs_generation; /* Must match super block */
514 __le64 h_blkno; /* Offset on disk, in blocks */ 510 __le64 h_blkno; /* Offset on disk, in blocks */
515/*20*/ __le64 h_reserved3; 511/*20*/ __le64 h_suballoc_loc; /* Suballocator block group this
512 eb belongs to. Only valid
513 if allocated from a
514 discontiguous block group */
516 __le64 h_next_leaf_blk; /* Offset on disk, in blocks, 515 __le64 h_next_leaf_blk; /* Offset on disk, in blocks,
517 of next leaf header pointing 516 of next leaf header pointing
518 to data */ 517 to data */
@@ -679,7 +678,11 @@ struct ocfs2_dinode {
679/*80*/ struct ocfs2_block_check i_check; /* Error checking */ 678/*80*/ struct ocfs2_block_check i_check; /* Error checking */
680/*88*/ __le64 i_dx_root; /* Pointer to dir index root block */ 679/*88*/ __le64 i_dx_root; /* Pointer to dir index root block */
681/*90*/ __le64 i_refcount_loc; 680/*90*/ __le64 i_refcount_loc;
682 __le64 i_reserved2[4]; 681 __le64 i_suballoc_loc; /* Suballocator block group this
682 inode belongs to. Only valid
683 if allocated from a
684 discontiguous block group */
685/*A0*/ __le64 i_reserved2[3];
683/*B8*/ union { 686/*B8*/ union {
684 __le64 i_pad1; /* Generic way to refer to this 687 __le64 i_pad1; /* Generic way to refer to this
685 64bit union */ 688 64bit union */
@@ -814,7 +817,12 @@ struct ocfs2_dx_root_block {
814 __le32 dr_reserved2; 817 __le32 dr_reserved2;
815 __le64 dr_free_blk; /* Pointer to head of free 818 __le64 dr_free_blk; /* Pointer to head of free
816 * unindexed block list. */ 819 * unindexed block list. */
817 __le64 dr_reserved3[15]; 820 __le64 dr_suballoc_loc; /* Suballocator block group
821 this root belongs to.
822 Only valid if allocated
823 from a discontiguous
824 block group */
825 __le64 dr_reserved3[14];
818 union { 826 union {
819 struct ocfs2_extent_list dr_list; /* Keep this aligned to 128 827 struct ocfs2_extent_list dr_list; /* Keep this aligned to 128
820 * bits for maximum space 828 * bits for maximum space
@@ -840,6 +848,13 @@ struct ocfs2_dx_leaf {
840}; 848};
841 849
842/* 850/*
851 * Largest bitmap for a block (suballocator) group in bytes. This limit
852 * does not affect cluster groups (global allocator). Cluster group
853 * bitmaps run to the end of the block.
854 */
855#define OCFS2_MAX_BG_BITMAP_SIZE 256
856
857/*
843 * On disk allocator group structure for OCFS2 858 * On disk allocator group structure for OCFS2
844 */ 859 */
845struct ocfs2_group_desc 860struct ocfs2_group_desc
@@ -860,7 +875,29 @@ struct ocfs2_group_desc
860 __le64 bg_blkno; /* Offset on disk, in blocks */ 875 __le64 bg_blkno; /* Offset on disk, in blocks */
861/*30*/ struct ocfs2_block_check bg_check; /* Error checking */ 876/*30*/ struct ocfs2_block_check bg_check; /* Error checking */
862 __le64 bg_reserved2; 877 __le64 bg_reserved2;
863/*40*/ __u8 bg_bitmap[0]; 878/*40*/ union {
879 __u8 bg_bitmap[0];
880 struct {
881 /*
882 * Block groups may be discontiguous when
883 * OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG is set.
884 * The extents of a discontigous block group are
885 * stored in bg_list. It is a flat list.
886 * l_tree_depth must always be zero. A
887 * discontiguous group is signified by a non-zero
888 * bg_list->l_next_free_rec. Only block groups
889 * can be discontiguous; Cluster groups cannot.
890 * We've never made a block group with more than
891 * 2048 blocks (256 bytes of bg_bitmap). This
892 * codifies that limit so that we can fit bg_list.
893 * bg_size of a discontiguous block group will
894 * be 256 to match bg_bitmap_filler.
895 */
896 __u8 bg_bitmap_filler[OCFS2_MAX_BG_BITMAP_SIZE];
897/*140*/ struct ocfs2_extent_list bg_list;
898 };
899 };
900/* Actual on-disk size is one block */
864}; 901};
865 902
866struct ocfs2_refcount_rec { 903struct ocfs2_refcount_rec {
@@ -905,7 +942,11 @@ struct ocfs2_refcount_block {
905/*40*/ __le32 rf_generation; /* generation number. all be the same 942/*40*/ __le32 rf_generation; /* generation number. all be the same
906 * for the same refcount tree. */ 943 * for the same refcount tree. */
907 __le32 rf_reserved0; 944 __le32 rf_reserved0;
908 __le64 rf_reserved1[7]; 945 __le64 rf_suballoc_loc; /* Suballocator block group this
946 refcount block belongs to. Only
947 valid if allocated from a
948 discontiguous block group */
949/*50*/ __le64 rf_reserved1[6];
909/*80*/ union { 950/*80*/ union {
910 struct ocfs2_refcount_list rf_records; /* List of refcount 951 struct ocfs2_refcount_list rf_records; /* List of refcount
911 records */ 952 records */
@@ -1017,7 +1058,10 @@ struct ocfs2_xattr_block {
1017 real xattr or a xattr tree. */ 1058 real xattr or a xattr tree. */
1018 __le16 xb_reserved0; 1059 __le16 xb_reserved0;
1019 __le32 xb_reserved1; 1060 __le32 xb_reserved1;
1020 __le64 xb_reserved2; 1061 __le64 xb_suballoc_loc; /* Suballocator block group this
1062 xattr block belongs to. Only
1063 valid if allocated from a
1064 discontiguous block group */
1021/*30*/ union { 1065/*30*/ union {
1022 struct ocfs2_xattr_header xb_header; /* xattr header if this 1066 struct ocfs2_xattr_header xb_header; /* xattr header if this
1023 block contains xattr */ 1067 block contains xattr */
@@ -1254,6 +1298,16 @@ static inline u16 ocfs2_extent_recs_per_eb(struct super_block *sb)
1254 return size / sizeof(struct ocfs2_extent_rec); 1298 return size / sizeof(struct ocfs2_extent_rec);
1255} 1299}
1256 1300
1301static inline u16 ocfs2_extent_recs_per_gd(struct super_block *sb)
1302{
1303 int size;
1304
1305 size = sb->s_blocksize -
1306 offsetof(struct ocfs2_group_desc, bg_list.l_recs);
1307
1308 return size / sizeof(struct ocfs2_extent_rec);
1309}
1310
1257static inline int ocfs2_dx_entries_per_leaf(struct super_block *sb) 1311static inline int ocfs2_dx_entries_per_leaf(struct super_block *sb)
1258{ 1312{
1259 int size; 1313 int size;
@@ -1284,13 +1338,23 @@ static inline u16 ocfs2_local_alloc_size(struct super_block *sb)
1284 return size; 1338 return size;
1285} 1339}
1286 1340
1287static inline int ocfs2_group_bitmap_size(struct super_block *sb) 1341static inline int ocfs2_group_bitmap_size(struct super_block *sb,
1342 int suballocator,
1343 u32 feature_incompat)
1288{ 1344{
1289 int size; 1345 int size = sb->s_blocksize -
1290
1291 size = sb->s_blocksize -
1292 offsetof(struct ocfs2_group_desc, bg_bitmap); 1346 offsetof(struct ocfs2_group_desc, bg_bitmap);
1293 1347
1348 /*
1349 * The cluster allocator uses the entire block. Suballocators have
1350 * never used more than OCFS2_MAX_BG_BITMAP_SIZE. Unfortunately, older
1351 * code expects bg_size set to the maximum. Thus we must keep
1352 * bg_size as-is unless discontig_bg is enabled.
1353 */
1354 if (suballocator &&
1355 (feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG))
1356 size = OCFS2_MAX_BG_BITMAP_SIZE;
1357
1294 return size; 1358 return size;
1295} 1359}
1296 1360
@@ -1402,23 +1466,43 @@ static inline int ocfs2_extent_recs_per_eb(int blocksize)
1402 return size / sizeof(struct ocfs2_extent_rec); 1466 return size / sizeof(struct ocfs2_extent_rec);
1403} 1467}
1404 1468
1405static inline int ocfs2_local_alloc_size(int blocksize) 1469static inline int ocfs2_extent_recs_per_gd(int blocksize)
1406{ 1470{
1407 int size; 1471 int size;
1408 1472
1409 size = blocksize - 1473 size = blocksize -
1410 offsetof(struct ocfs2_dinode, id2.i_lab.la_bitmap); 1474 offsetof(struct ocfs2_group_desc, bg_list.l_recs);
1411 1475
1412 return size; 1476 return size / sizeof(struct ocfs2_extent_rec);
1413} 1477}
1414 1478
1415static inline int ocfs2_group_bitmap_size(int blocksize) 1479static inline int ocfs2_local_alloc_size(int blocksize)
1416{ 1480{
1417 int size; 1481 int size;
1418 1482
1419 size = blocksize - 1483 size = blocksize -
1484 offsetof(struct ocfs2_dinode, id2.i_lab.la_bitmap);
1485
1486 return size;
1487}
1488
1489static inline int ocfs2_group_bitmap_size(int blocksize,
1490 int suballocator,
1491 uint32_t feature_incompat)
1492{
1493 int size = sb->s_blocksize -
1420 offsetof(struct ocfs2_group_desc, bg_bitmap); 1494 offsetof(struct ocfs2_group_desc, bg_bitmap);
1421 1495
1496 /*
1497 * The cluster allocator uses the entire block. Suballocators have
1498 * never used more than OCFS2_MAX_BG_BITMAP_SIZE. Unfortunately, older
1499 * code expects bg_size set to the maximum. Thus we must keep
1500 * bg_size as-is unless discontig_bg is enabled.
1501 */
1502 if (suballocator &&
1503 (feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG))
1504 size = OCFS2_MAX_BG_BITMAP_SIZE;
1505
1422 return size; 1506 return size;
1423} 1507}
1424 1508
@@ -1491,5 +1575,19 @@ static inline void ocfs2_set_de_type(struct ocfs2_dir_entry *de,
1491 de->file_type = ocfs2_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; 1575 de->file_type = ocfs2_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
1492} 1576}
1493 1577
1578static inline int ocfs2_gd_is_discontig(struct ocfs2_group_desc *gd)
1579{
1580 if ((offsetof(struct ocfs2_group_desc, bg_bitmap) +
1581 le16_to_cpu(gd->bg_size)) !=
1582 offsetof(struct ocfs2_group_desc, bg_list))
1583 return 0;
1584 /*
1585 * Only valid to check l_next_free_rec if
1586 * bg_bitmap + bg_size == bg_list.
1587 */
1588 if (!gd->bg_list.l_next_free_rec)
1589 return 0;
1590 return 1;
1591}
1494#endif /* _OCFS2_FS_H */ 1592#endif /* _OCFS2_FS_H */
1495 1593
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 123bc520a2c0..196fcb52d95d 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -23,6 +23,7 @@
23struct ocfs2_dquot { 23struct ocfs2_dquot {
24 struct dquot dq_dquot; /* Generic VFS dquot */ 24 struct dquot dq_dquot; /* Generic VFS dquot */
25 loff_t dq_local_off; /* Offset in the local quota file */ 25 loff_t dq_local_off; /* Offset in the local quota file */
26 u64 dq_local_phys_blk; /* Physical block carrying quota structure */
26 struct ocfs2_quota_chunk *dq_chunk; /* Chunk dquot is in */ 27 struct ocfs2_quota_chunk *dq_chunk; /* Chunk dquot is in */
27 unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */ 28 unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */
28 s64 dq_origspace; /* Last globally synced space usage */ 29 s64 dq_origspace; /* Last globally synced space usage */
@@ -51,8 +52,9 @@ struct ocfs2_mem_dqinfo {
51 struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */ 52 struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */
52 struct buffer_head *dqi_gqi_bh; /* Buffer head with global quota file inode - set only if inode lock is obtained */ 53 struct buffer_head *dqi_gqi_bh; /* Buffer head with global quota file inode - set only if inode lock is obtained */
53 int dqi_gqi_count; /* Number of holders of dqi_gqi_bh */ 54 int dqi_gqi_count; /* Number of holders of dqi_gqi_bh */
55 u64 dqi_giblk; /* Number of block with global information header */
54 struct buffer_head *dqi_lqi_bh; /* Buffer head with local quota file inode */ 56 struct buffer_head *dqi_lqi_bh; /* Buffer head with local quota file inode */
55 struct buffer_head *dqi_ibh; /* Buffer with information header */ 57 struct buffer_head *dqi_libh; /* Buffer with local information header */
56 struct qtree_mem_dqinfo dqi_gi; /* Info about global file */ 58 struct qtree_mem_dqinfo dqi_gi; /* Info about global file */
57 struct delayed_work dqi_sync_work; /* Work for syncing dquots */ 59 struct delayed_work dqi_sync_work; /* Work for syncing dquots */
58 struct ocfs2_quota_recovery *dqi_rec; /* Pointer to recovery 60 struct ocfs2_quota_recovery *dqi_rec; /* Pointer to recovery
@@ -102,8 +104,12 @@ static inline int ocfs2_global_release_dquot(struct dquot *dquot)
102 104
103int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex); 105int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
104void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex); 106void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
105int ocfs2_read_quota_block(struct inode *inode, u64 v_block, 107int ocfs2_validate_quota_block(struct super_block *sb, struct buffer_head *bh);
106 struct buffer_head **bh); 108int ocfs2_read_quota_phys_block(struct inode *inode, u64 p_block,
109 struct buffer_head **bh);
110int ocfs2_create_local_dquot(struct dquot *dquot);
111int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot);
112int ocfs2_local_write_dquot(struct dquot *dquot);
107 113
108extern const struct dquot_operations ocfs2_quota_operations; 114extern const struct dquot_operations ocfs2_quota_operations;
109extern struct quota_format_type ocfs2_quota_format; 115extern struct quota_format_type ocfs2_quota_format;
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index ab42a74c7539..2bb35fe00511 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -25,8 +25,44 @@
25#include "dlmglue.h" 25#include "dlmglue.h"
26#include "uptodate.h" 26#include "uptodate.h"
27#include "super.h" 27#include "super.h"
28#include "buffer_head_io.h"
28#include "quota.h" 29#include "quota.h"
29 30
31/*
32 * Locking of quotas with OCFS2 is rather complex. Here are rules that
33 * should be obeyed by all the functions:
34 * - any write of quota structure (either to local or global file) is protected
35 * by dqio_mutex or dquot->dq_lock.
36 * - any modification of global quota file holds inode cluster lock, i_mutex,
37 * and ip_alloc_sem of the global quota file (achieved by
38 * ocfs2_lock_global_qf). It also has to hold qinfo_lock.
39 * - an allocation of new blocks for local quota file is protected by
40 * its ip_alloc_sem
41 *
42 * A rough sketch of locking dependencies (lf = local file, gf = global file):
43 * Normal filesystem operation:
44 * start_trans -> dqio_mutex -> write to lf
45 * Syncing of local and global file:
46 * ocfs2_lock_global_qf -> start_trans -> dqio_mutex -> qinfo_lock ->
47 * write to gf
48 * -> write to lf
49 * Acquire dquot for the first time:
50 * dq_lock -> ocfs2_lock_global_qf -> qinfo_lock -> read from gf
51 * -> alloc space for gf
52 * -> start_trans -> qinfo_lock -> write to gf
53 * -> ip_alloc_sem of lf -> alloc space for lf
54 * -> write to lf
55 * Release last reference to dquot:
56 * dq_lock -> ocfs2_lock_global_qf -> start_trans -> qinfo_lock -> write to gf
57 * -> write to lf
58 * Note that all the above operations also hold the inode cluster lock of lf.
59 * Recovery:
60 * inode cluster lock of recovered lf
61 * -> read bitmaps -> ip_alloc_sem of lf
62 * -> ocfs2_lock_global_qf -> start_trans -> dqio_mutex -> qinfo_lock ->
63 * write to gf
64 */
65
30static struct workqueue_struct *ocfs2_quota_wq = NULL; 66static struct workqueue_struct *ocfs2_quota_wq = NULL;
31 67
32static void qsync_work_fn(struct work_struct *work); 68static void qsync_work_fn(struct work_struct *work);
@@ -91,8 +127,7 @@ struct qtree_fmt_operations ocfs2_global_ops = {
91 .is_id = ocfs2_global_is_id, 127 .is_id = ocfs2_global_is_id,
92}; 128};
93 129
94static int ocfs2_validate_quota_block(struct super_block *sb, 130int ocfs2_validate_quota_block(struct super_block *sb, struct buffer_head *bh)
95 struct buffer_head *bh)
96{ 131{
97 struct ocfs2_disk_dqtrailer *dqt = 132 struct ocfs2_disk_dqtrailer *dqt =
98 ocfs2_block_dqtrailer(sb->s_blocksize, bh->b_data); 133 ocfs2_block_dqtrailer(sb->s_blocksize, bh->b_data);
@@ -110,54 +145,19 @@ static int ocfs2_validate_quota_block(struct super_block *sb,
110 return ocfs2_validate_meta_ecc(sb, bh->b_data, &dqt->dq_check); 145 return ocfs2_validate_meta_ecc(sb, bh->b_data, &dqt->dq_check);
111} 146}
112 147
113int ocfs2_read_quota_block(struct inode *inode, u64 v_block, 148int ocfs2_read_quota_phys_block(struct inode *inode, u64 p_block,
114 struct buffer_head **bh) 149 struct buffer_head **bhp)
115{ 150{
116 int rc = 0; 151 int rc;
117 struct buffer_head *tmp = *bh; 152
118 153 *bhp = NULL;
119 if (i_size_read(inode) >> inode->i_sb->s_blocksize_bits <= v_block) { 154 rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, 1, bhp, 0,
120 ocfs2_error(inode->i_sb, 155 ocfs2_validate_quota_block);
121 "Quota file %llu is probably corrupted! Requested "
122 "to read block %Lu but file has size only %Lu\n",
123 (unsigned long long)OCFS2_I(inode)->ip_blkno,
124 (unsigned long long)v_block,
125 (unsigned long long)i_size_read(inode));
126 return -EIO;
127 }
128 rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0,
129 ocfs2_validate_quota_block);
130 if (rc) 156 if (rc)
131 mlog_errno(rc); 157 mlog_errno(rc);
132
133 /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
134 if (!rc && !*bh)
135 *bh = tmp;
136
137 return rc; 158 return rc;
138} 159}
139 160
140static int ocfs2_get_quota_block(struct inode *inode, int block,
141 struct buffer_head **bh)
142{
143 u64 pblock, pcount;
144 int err;
145
146 down_read(&OCFS2_I(inode)->ip_alloc_sem);
147 err = ocfs2_extent_map_get_blocks(inode, block, &pblock, &pcount, NULL);
148 up_read(&OCFS2_I(inode)->ip_alloc_sem);
149 if (err) {
150 mlog_errno(err);
151 return err;
152 }
153 *bh = sb_getblk(inode->i_sb, pblock);
154 if (!*bh) {
155 err = -EIO;
156 mlog_errno(err);
157 }
158 return err;
159}
160
161/* Read data from global quotafile - avoid pagecache and such because we cannot 161/* Read data from global quotafile - avoid pagecache and such because we cannot
162 * afford acquiring the locks... We use quota cluster lock to serialize 162 * afford acquiring the locks... We use quota cluster lock to serialize
163 * operations. Caller is responsible for acquiring it. */ 163 * operations. Caller is responsible for acquiring it. */
@@ -172,6 +172,7 @@ ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data,
172 int err = 0; 172 int err = 0;
173 struct buffer_head *bh; 173 struct buffer_head *bh;
174 size_t toread, tocopy; 174 size_t toread, tocopy;
175 u64 pblock = 0, pcount = 0;
175 176
176 if (off > i_size) 177 if (off > i_size)
177 return 0; 178 return 0;
@@ -180,8 +181,19 @@ ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data,
180 toread = len; 181 toread = len;
181 while (toread > 0) { 182 while (toread > 0) {
182 tocopy = min_t(size_t, (sb->s_blocksize - offset), toread); 183 tocopy = min_t(size_t, (sb->s_blocksize - offset), toread);
184 if (!pcount) {
185 err = ocfs2_extent_map_get_blocks(gqinode, blk, &pblock,
186 &pcount, NULL);
187 if (err) {
188 mlog_errno(err);
189 return err;
190 }
191 } else {
192 pcount--;
193 pblock++;
194 }
183 bh = NULL; 195 bh = NULL;
184 err = ocfs2_read_quota_block(gqinode, blk, &bh); 196 err = ocfs2_read_quota_phys_block(gqinode, pblock, &bh);
185 if (err) { 197 if (err) {
186 mlog_errno(err); 198 mlog_errno(err);
187 return err; 199 return err;
@@ -209,6 +221,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
209 int err = 0, new = 0, ja_type; 221 int err = 0, new = 0, ja_type;
210 struct buffer_head *bh = NULL; 222 struct buffer_head *bh = NULL;
211 handle_t *handle = journal_current_handle(); 223 handle_t *handle = journal_current_handle();
224 u64 pblock, pcount;
212 225
213 if (!handle) { 226 if (!handle) {
214 mlog(ML_ERROR, "Quota write (off=%llu, len=%llu) cancelled " 227 mlog(ML_ERROR, "Quota write (off=%llu, len=%llu) cancelled "
@@ -221,12 +234,11 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
221 len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset; 234 len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset;
222 } 235 }
223 236
224 mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA);
225 if (gqinode->i_size < off + len) { 237 if (gqinode->i_size < off + len) {
226 loff_t rounded_end = 238 loff_t rounded_end =
227 ocfs2_align_bytes_to_blocks(sb, off + len); 239 ocfs2_align_bytes_to_blocks(sb, off + len);
228 240
229 /* Space is already allocated in ocfs2_global_read_dquot() */ 241 /* Space is already allocated in ocfs2_acquire_dquot() */
230 err = ocfs2_simple_size_update(gqinode, 242 err = ocfs2_simple_size_update(gqinode,
231 oinfo->dqi_gqi_bh, 243 oinfo->dqi_gqi_bh,
232 rounded_end); 244 rounded_end);
@@ -234,13 +246,20 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
234 goto out; 246 goto out;
235 new = 1; 247 new = 1;
236 } 248 }
249 err = ocfs2_extent_map_get_blocks(gqinode, blk, &pblock, &pcount, NULL);
250 if (err) {
251 mlog_errno(err);
252 goto out;
253 }
237 /* Not rewriting whole block? */ 254 /* Not rewriting whole block? */
238 if ((offset || len < sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) && 255 if ((offset || len < sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) &&
239 !new) { 256 !new) {
240 err = ocfs2_read_quota_block(gqinode, blk, &bh); 257 err = ocfs2_read_quota_phys_block(gqinode, pblock, &bh);
241 ja_type = OCFS2_JOURNAL_ACCESS_WRITE; 258 ja_type = OCFS2_JOURNAL_ACCESS_WRITE;
242 } else { 259 } else {
243 err = ocfs2_get_quota_block(gqinode, blk, &bh); 260 bh = sb_getblk(sb, pblock);
261 if (!bh)
262 err = -ENOMEM;
244 ja_type = OCFS2_JOURNAL_ACCESS_CREATE; 263 ja_type = OCFS2_JOURNAL_ACCESS_CREATE;
245 } 264 }
246 if (err) { 265 if (err) {
@@ -261,19 +280,15 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
261 brelse(bh); 280 brelse(bh);
262 goto out; 281 goto out;
263 } 282 }
264 err = ocfs2_journal_dirty(handle, bh); 283 ocfs2_journal_dirty(handle, bh);
265 brelse(bh); 284 brelse(bh);
266 if (err < 0)
267 goto out;
268out: 285out:
269 if (err) { 286 if (err) {
270 mutex_unlock(&gqinode->i_mutex);
271 mlog_errno(err); 287 mlog_errno(err);
272 return err; 288 return err;
273 } 289 }
274 gqinode->i_version++; 290 gqinode->i_version++;
275 ocfs2_mark_inode_dirty(handle, gqinode, oinfo->dqi_gqi_bh); 291 ocfs2_mark_inode_dirty(handle, gqinode, oinfo->dqi_gqi_bh);
276 mutex_unlock(&gqinode->i_mutex);
277 return len; 292 return len;
278} 293}
279 294
@@ -291,11 +306,23 @@ int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
291 else 306 else
292 WARN_ON(bh != oinfo->dqi_gqi_bh); 307 WARN_ON(bh != oinfo->dqi_gqi_bh);
293 spin_unlock(&dq_data_lock); 308 spin_unlock(&dq_data_lock);
309 if (ex) {
310 mutex_lock(&oinfo->dqi_gqinode->i_mutex);
311 down_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
312 } else {
313 down_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
314 }
294 return 0; 315 return 0;
295} 316}
296 317
297void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) 318void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
298{ 319{
320 if (ex) {
321 up_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
322 mutex_unlock(&oinfo->dqi_gqinode->i_mutex);
323 } else {
324 up_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
325 }
299 ocfs2_inode_unlock(oinfo->dqi_gqinode, ex); 326 ocfs2_inode_unlock(oinfo->dqi_gqinode, ex);
300 brelse(oinfo->dqi_gqi_bh); 327 brelse(oinfo->dqi_gqi_bh);
301 spin_lock(&dq_data_lock); 328 spin_lock(&dq_data_lock);
@@ -313,6 +340,7 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
313 struct ocfs2_global_disk_dqinfo dinfo; 340 struct ocfs2_global_disk_dqinfo dinfo;
314 struct mem_dqinfo *info = sb_dqinfo(sb, type); 341 struct mem_dqinfo *info = sb_dqinfo(sb, type);
315 struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; 342 struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
343 u64 pcount;
316 int status; 344 int status;
317 345
318 mlog_entry_void(); 346 mlog_entry_void();
@@ -339,9 +367,19 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
339 mlog_errno(status); 367 mlog_errno(status);
340 goto out_err; 368 goto out_err;
341 } 369 }
370
371 status = ocfs2_extent_map_get_blocks(gqinode, 0, &oinfo->dqi_giblk,
372 &pcount, NULL);
373 if (status < 0)
374 goto out_unlock;
375
376 status = ocfs2_qinfo_lock(oinfo, 0);
377 if (status < 0)
378 goto out_unlock;
342 status = sb->s_op->quota_read(sb, type, (char *)&dinfo, 379 status = sb->s_op->quota_read(sb, type, (char *)&dinfo,
343 sizeof(struct ocfs2_global_disk_dqinfo), 380 sizeof(struct ocfs2_global_disk_dqinfo),
344 OCFS2_GLOBAL_INFO_OFF); 381 OCFS2_GLOBAL_INFO_OFF);
382 ocfs2_qinfo_unlock(oinfo, 0);
345 ocfs2_unlock_global_qf(oinfo, 0); 383 ocfs2_unlock_global_qf(oinfo, 0);
346 if (status != sizeof(struct ocfs2_global_disk_dqinfo)) { 384 if (status != sizeof(struct ocfs2_global_disk_dqinfo)) {
347 mlog(ML_ERROR, "Cannot read global quota info (%d).\n", 385 mlog(ML_ERROR, "Cannot read global quota info (%d).\n",
@@ -368,6 +406,10 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
368out_err: 406out_err:
369 mlog_exit(status); 407 mlog_exit(status);
370 return status; 408 return status;
409out_unlock:
410 ocfs2_unlock_global_qf(oinfo, 0);
411 mlog_errno(status);
412 goto out_err;
371} 413}
372 414
373/* Write information to global quota file. Expects exlusive lock on quota 415/* Write information to global quota file. Expects exlusive lock on quota
@@ -426,78 +468,10 @@ static int ocfs2_global_qinit_alloc(struct super_block *sb, int type)
426 468
427static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type) 469static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type)
428{ 470{
429 /* We modify all the allocated blocks, tree root, and info block */ 471 /* We modify all the allocated blocks, tree root, info block and
472 * the inode */
430 return (ocfs2_global_qinit_alloc(sb, type) + 2) * 473 return (ocfs2_global_qinit_alloc(sb, type) + 2) *
431 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS; 474 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS + 1;
432}
433
434/* Read in information from global quota file and acquire a reference to it.
435 * dquot_acquire() has already started the transaction and locked quota file */
436int ocfs2_global_read_dquot(struct dquot *dquot)
437{
438 int err, err2, ex = 0;
439 struct super_block *sb = dquot->dq_sb;
440 int type = dquot->dq_type;
441 struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
442 struct ocfs2_super *osb = OCFS2_SB(sb);
443 struct inode *gqinode = info->dqi_gqinode;
444 int need_alloc = ocfs2_global_qinit_alloc(sb, type);
445 handle_t *handle = NULL;
446
447 err = ocfs2_qinfo_lock(info, 0);
448 if (err < 0)
449 goto out;
450 err = qtree_read_dquot(&info->dqi_gi, dquot);
451 if (err < 0)
452 goto out_qlock;
453 OCFS2_DQUOT(dquot)->dq_use_count++;
454 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
455 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
456 ocfs2_qinfo_unlock(info, 0);
457
458 if (!dquot->dq_off) { /* No real quota entry? */
459 ex = 1;
460 /*
461 * Add blocks to quota file before we start a transaction since
462 * locking allocators ranks above a transaction start
463 */
464 WARN_ON(journal_current_handle());
465 down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
466 err = ocfs2_extend_no_holes(gqinode,
467 gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
468 gqinode->i_size);
469 up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
470 if (err < 0)
471 goto out;
472 }
473
474 handle = ocfs2_start_trans(osb,
475 ocfs2_calc_global_qinit_credits(sb, type));
476 if (IS_ERR(handle)) {
477 err = PTR_ERR(handle);
478 goto out;
479 }
480 err = ocfs2_qinfo_lock(info, ex);
481 if (err < 0)
482 goto out_trans;
483 err = qtree_write_dquot(&info->dqi_gi, dquot);
484 if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) {
485 err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type);
486 if (!err)
487 err = err2;
488 }
489out_qlock:
490 if (ex)
491 ocfs2_qinfo_unlock(info, 1);
492 else
493 ocfs2_qinfo_unlock(info, 0);
494out_trans:
495 if (handle)
496 ocfs2_commit_trans(osb, handle);
497out:
498 if (err < 0)
499 mlog_errno(err);
500 return err;
501} 475}
502 476
503/* Sync local information about quota modifications with global quota file. 477/* Sync local information about quota modifications with global quota file.
@@ -638,14 +612,13 @@ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type)
638 } 612 }
639 mutex_lock(&sb_dqopt(sb)->dqio_mutex); 613 mutex_lock(&sb_dqopt(sb)->dqio_mutex);
640 status = ocfs2_sync_dquot(dquot); 614 status = ocfs2_sync_dquot(dquot);
641 mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
642 if (status < 0) 615 if (status < 0)
643 mlog_errno(status); 616 mlog_errno(status);
644 /* We have to write local structure as well... */ 617 /* We have to write local structure as well... */
645 dquot_mark_dquot_dirty(dquot); 618 status = ocfs2_local_write_dquot(dquot);
646 status = dquot_commit(dquot);
647 if (status < 0) 619 if (status < 0)
648 mlog_errno(status); 620 mlog_errno(status);
621 mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
649 ocfs2_commit_trans(osb, handle); 622 ocfs2_commit_trans(osb, handle);
650out_ilock: 623out_ilock:
651 ocfs2_unlock_global_qf(oinfo, 1); 624 ocfs2_unlock_global_qf(oinfo, 1);
@@ -684,7 +657,9 @@ static int ocfs2_write_dquot(struct dquot *dquot)
684 mlog_errno(status); 657 mlog_errno(status);
685 goto out; 658 goto out;
686 } 659 }
687 status = dquot_commit(dquot); 660 mutex_lock(&sb_dqopt(dquot->dq_sb)->dqio_mutex);
661 status = ocfs2_local_write_dquot(dquot);
662 mutex_unlock(&sb_dqopt(dquot->dq_sb)->dqio_mutex);
688 ocfs2_commit_trans(osb, handle); 663 ocfs2_commit_trans(osb, handle);
689out: 664out:
690 mlog_exit(status); 665 mlog_exit(status);
@@ -715,6 +690,10 @@ static int ocfs2_release_dquot(struct dquot *dquot)
715 690
716 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); 691 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
717 692
693 mutex_lock(&dquot->dq_lock);
694 /* Check whether we are not racing with some other dqget() */
695 if (atomic_read(&dquot->dq_count) > 1)
696 goto out;
718 status = ocfs2_lock_global_qf(oinfo, 1); 697 status = ocfs2_lock_global_qf(oinfo, 1);
719 if (status < 0) 698 if (status < 0)
720 goto out; 699 goto out;
@@ -725,30 +704,113 @@ static int ocfs2_release_dquot(struct dquot *dquot)
725 mlog_errno(status); 704 mlog_errno(status);
726 goto out_ilock; 705 goto out_ilock;
727 } 706 }
728 status = dquot_release(dquot); 707
708 status = ocfs2_global_release_dquot(dquot);
709 if (status < 0) {
710 mlog_errno(status);
711 goto out_trans;
712 }
713 status = ocfs2_local_release_dquot(handle, dquot);
714 /*
715 * If we fail here, we cannot do much as global structure is
716 * already released. So just complain...
717 */
718 if (status < 0)
719 mlog_errno(status);
720 clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
721out_trans:
729 ocfs2_commit_trans(osb, handle); 722 ocfs2_commit_trans(osb, handle);
730out_ilock: 723out_ilock:
731 ocfs2_unlock_global_qf(oinfo, 1); 724 ocfs2_unlock_global_qf(oinfo, 1);
732out: 725out:
726 mutex_unlock(&dquot->dq_lock);
733 mlog_exit(status); 727 mlog_exit(status);
734 return status; 728 return status;
735} 729}
736 730
731/*
732 * Read global dquot structure from disk or create it if it does
733 * not exist. Also update use count of the global structure and
734 * create structure in node-local quota file.
735 */
737static int ocfs2_acquire_dquot(struct dquot *dquot) 736static int ocfs2_acquire_dquot(struct dquot *dquot)
738{ 737{
739 struct ocfs2_mem_dqinfo *oinfo = 738 int status = 0, err;
740 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 739 int ex = 0;
741 int status = 0; 740 struct super_block *sb = dquot->dq_sb;
741 struct ocfs2_super *osb = OCFS2_SB(sb);
742 int type = dquot->dq_type;
743 struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
744 struct inode *gqinode = info->dqi_gqinode;
745 int need_alloc = ocfs2_global_qinit_alloc(sb, type);
746 handle_t *handle;
742 747
743 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); 748 mlog_entry("id=%u, type=%d", dquot->dq_id, type);
744 /* We need an exclusive lock, because we're going to update use count 749 mutex_lock(&dquot->dq_lock);
745 * and instantiate possibly new dquot structure */ 750 /*
746 status = ocfs2_lock_global_qf(oinfo, 1); 751 * We need an exclusive lock, because we're going to update use count
752 * and instantiate possibly new dquot structure
753 */
754 status = ocfs2_lock_global_qf(info, 1);
747 if (status < 0) 755 if (status < 0)
748 goto out; 756 goto out;
749 status = dquot_acquire(dquot); 757 if (!test_bit(DQ_READ_B, &dquot->dq_flags)) {
750 ocfs2_unlock_global_qf(oinfo, 1); 758 status = ocfs2_qinfo_lock(info, 0);
759 if (status < 0)
760 goto out_dq;
761 status = qtree_read_dquot(&info->dqi_gi, dquot);
762 ocfs2_qinfo_unlock(info, 0);
763 if (status < 0)
764 goto out_dq;
765 }
766 set_bit(DQ_READ_B, &dquot->dq_flags);
767
768 OCFS2_DQUOT(dquot)->dq_use_count++;
769 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
770 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
771 if (!dquot->dq_off) { /* No real quota entry? */
772 ex = 1;
773 /*
774 * Add blocks to quota file before we start a transaction since
775 * locking allocators ranks above a transaction start
776 */
777 WARN_ON(journal_current_handle());
778 status = ocfs2_extend_no_holes(gqinode,
779 gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
780 gqinode->i_size);
781 if (status < 0)
782 goto out_dq;
783 }
784
785 handle = ocfs2_start_trans(osb,
786 ocfs2_calc_global_qinit_credits(sb, type));
787 if (IS_ERR(handle)) {
788 status = PTR_ERR(handle);
789 goto out_dq;
790 }
791 status = ocfs2_qinfo_lock(info, ex);
792 if (status < 0)
793 goto out_trans;
794 status = qtree_write_dquot(&info->dqi_gi, dquot);
795 if (ex && info_dirty(sb_dqinfo(sb, type))) {
796 err = __ocfs2_global_write_info(sb, type);
797 if (!status)
798 status = err;
799 }
800 ocfs2_qinfo_unlock(info, ex);
801out_trans:
802 ocfs2_commit_trans(osb, handle);
803out_dq:
804 ocfs2_unlock_global_qf(info, 1);
805 if (status < 0)
806 goto out;
807
808 status = ocfs2_create_local_dquot(dquot);
809 if (status < 0)
810 goto out;
811 set_bit(DQ_ACTIVE_B, &dquot->dq_flags);
751out: 812out:
813 mutex_unlock(&dquot->dq_lock);
752 mlog_exit(status); 814 mlog_exit(status);
753 return status; 815 return status;
754} 816}
@@ -770,7 +832,6 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
770 struct ocfs2_super *osb = OCFS2_SB(sb); 832 struct ocfs2_super *osb = OCFS2_SB(sb);
771 833
772 mlog_entry("id=%u, type=%d", dquot->dq_id, type); 834 mlog_entry("id=%u, type=%d", dquot->dq_id, type);
773 dquot_mark_dquot_dirty(dquot);
774 835
775 /* In case user set some limits, sync dquot immediately to global 836 /* In case user set some limits, sync dquot immediately to global
776 * quota file so that information propagates quicker */ 837 * quota file so that information propagates quicker */
@@ -793,14 +854,16 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
793 mlog_errno(status); 854 mlog_errno(status);
794 goto out_ilock; 855 goto out_ilock;
795 } 856 }
857 mutex_lock(&sb_dqopt(sb)->dqio_mutex);
796 status = ocfs2_sync_dquot(dquot); 858 status = ocfs2_sync_dquot(dquot);
797 if (status < 0) { 859 if (status < 0) {
798 mlog_errno(status); 860 mlog_errno(status);
799 goto out_trans; 861 goto out_dlock;
800 } 862 }
801 /* Now write updated local dquot structure */ 863 /* Now write updated local dquot structure */
802 status = dquot_commit(dquot); 864 status = ocfs2_local_write_dquot(dquot);
803out_trans: 865out_dlock:
866 mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
804 ocfs2_commit_trans(osb, handle); 867 ocfs2_commit_trans(osb, handle);
805out_ilock: 868out_ilock:
806 ocfs2_unlock_global_qf(oinfo, 1); 869 ocfs2_unlock_global_qf(oinfo, 1);
@@ -852,7 +915,7 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
852} 915}
853 916
854const struct dquot_operations ocfs2_quota_operations = { 917const struct dquot_operations ocfs2_quota_operations = {
855 .write_dquot = ocfs2_write_dquot, 918 /* We never make dquot dirty so .write_dquot is never called */
856 .acquire_dquot = ocfs2_acquire_dquot, 919 .acquire_dquot = ocfs2_acquire_dquot,
857 .release_dquot = ocfs2_release_dquot, 920 .release_dquot = ocfs2_release_dquot,
858 .mark_dirty = ocfs2_mark_dquot_dirty, 921 .mark_dirty = ocfs2_mark_dquot_dirty,
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 9ad49305f450..8bd70d4d184d 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -22,6 +22,7 @@
22#include "dlmglue.h" 22#include "dlmglue.h"
23#include "quota.h" 23#include "quota.h"
24#include "uptodate.h" 24#include "uptodate.h"
25#include "super.h"
25 26
26/* Number of local quota structures per block */ 27/* Number of local quota structures per block */
27static inline unsigned int ol_quota_entries_per_block(struct super_block *sb) 28static inline unsigned int ol_quota_entries_per_block(struct super_block *sb)
@@ -119,12 +120,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
119 lock_buffer(bh); 120 lock_buffer(bh);
120 modify(bh, private); 121 modify(bh, private);
121 unlock_buffer(bh); 122 unlock_buffer(bh);
122 status = ocfs2_journal_dirty(handle, bh); 123 ocfs2_journal_dirty(handle, bh);
123 if (status < 0) { 124
124 mlog_errno(status);
125 ocfs2_commit_trans(OCFS2_SB(sb), handle);
126 return status;
127 }
128 status = ocfs2_commit_trans(OCFS2_SB(sb), handle); 125 status = ocfs2_commit_trans(OCFS2_SB(sb), handle);
129 if (status < 0) { 126 if (status < 0) {
130 mlog_errno(status); 127 mlog_errno(status);
@@ -133,6 +130,39 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
133 return 0; 130 return 0;
134} 131}
135 132
133/*
134 * Read quota block from a given logical offset.
135 *
136 * This function acquires ip_alloc_sem and thus it must not be called with a
137 * transaction started.
138 */
139static int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
140 struct buffer_head **bh)
141{
142 int rc = 0;
143 struct buffer_head *tmp = *bh;
144
145 if (i_size_read(inode) >> inode->i_sb->s_blocksize_bits <= v_block) {
146 ocfs2_error(inode->i_sb,
147 "Quota file %llu is probably corrupted! Requested "
148 "to read block %Lu but file has size only %Lu\n",
149 (unsigned long long)OCFS2_I(inode)->ip_blkno,
150 (unsigned long long)v_block,
151 (unsigned long long)i_size_read(inode));
152 return -EIO;
153 }
154 rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0,
155 ocfs2_validate_quota_block);
156 if (rc)
157 mlog_errno(rc);
158
159 /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
160 if (!rc && !*bh)
161 *bh = tmp;
162
163 return rc;
164}
165
136/* Check whether we understand format of quota files */ 166/* Check whether we understand format of quota files */
137static int ocfs2_local_check_quota_file(struct super_block *sb, int type) 167static int ocfs2_local_check_quota_file(struct super_block *sb, int type)
138{ 168{
@@ -523,9 +553,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
523 ocfs2_clear_bit(bit, dchunk->dqc_bitmap); 553 ocfs2_clear_bit(bit, dchunk->dqc_bitmap);
524 le32_add_cpu(&dchunk->dqc_free, 1); 554 le32_add_cpu(&dchunk->dqc_free, 1);
525 unlock_buffer(qbh); 555 unlock_buffer(qbh);
526 status = ocfs2_journal_dirty(handle, qbh); 556 ocfs2_journal_dirty(handle, qbh);
527 if (status < 0)
528 mlog_errno(status);
529out_commit: 557out_commit:
530 mutex_unlock(&sb_dqopt(sb)->dqio_mutex); 558 mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
531 ocfs2_commit_trans(OCFS2_SB(sb), handle); 559 ocfs2_commit_trans(OCFS2_SB(sb), handle);
@@ -631,9 +659,7 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
631 lock_buffer(bh); 659 lock_buffer(bh);
632 ldinfo->dqi_flags = cpu_to_le32(flags | OLQF_CLEAN); 660 ldinfo->dqi_flags = cpu_to_le32(flags | OLQF_CLEAN);
633 unlock_buffer(bh); 661 unlock_buffer(bh);
634 status = ocfs2_journal_dirty(handle, bh); 662 ocfs2_journal_dirty(handle, bh);
635 if (status < 0)
636 mlog_errno(status);
637out_trans: 663out_trans:
638 ocfs2_commit_trans(osb, handle); 664 ocfs2_commit_trans(osb, handle);
639out_bh: 665out_bh:
@@ -679,7 +705,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
679 INIT_LIST_HEAD(&oinfo->dqi_chunk); 705 INIT_LIST_HEAD(&oinfo->dqi_chunk);
680 oinfo->dqi_rec = NULL; 706 oinfo->dqi_rec = NULL;
681 oinfo->dqi_lqi_bh = NULL; 707 oinfo->dqi_lqi_bh = NULL;
682 oinfo->dqi_ibh = NULL; 708 oinfo->dqi_libh = NULL;
683 709
684 status = ocfs2_global_read_info(sb, type); 710 status = ocfs2_global_read_info(sb, type);
685 if (status < 0) 711 if (status < 0)
@@ -705,7 +731,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
705 info->dqi_flags = le32_to_cpu(ldinfo->dqi_flags); 731 info->dqi_flags = le32_to_cpu(ldinfo->dqi_flags);
706 oinfo->dqi_chunks = le32_to_cpu(ldinfo->dqi_chunks); 732 oinfo->dqi_chunks = le32_to_cpu(ldinfo->dqi_chunks);
707 oinfo->dqi_blocks = le32_to_cpu(ldinfo->dqi_blocks); 733 oinfo->dqi_blocks = le32_to_cpu(ldinfo->dqi_blocks);
708 oinfo->dqi_ibh = bh; 734 oinfo->dqi_libh = bh;
709 735
710 /* We crashed when using local quota file? */ 736 /* We crashed when using local quota file? */
711 if (!(info->dqi_flags & OLQF_CLEAN)) { 737 if (!(info->dqi_flags & OLQF_CLEAN)) {
@@ -767,7 +793,7 @@ static int ocfs2_local_write_info(struct super_block *sb, int type)
767{ 793{
768 struct mem_dqinfo *info = sb_dqinfo(sb, type); 794 struct mem_dqinfo *info = sb_dqinfo(sb, type);
769 struct buffer_head *bh = ((struct ocfs2_mem_dqinfo *)info->dqi_priv) 795 struct buffer_head *bh = ((struct ocfs2_mem_dqinfo *)info->dqi_priv)
770 ->dqi_ibh; 796 ->dqi_libh;
771 int status; 797 int status;
772 798
773 status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], bh, olq_update_info, 799 status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], bh, olq_update_info,
@@ -790,10 +816,6 @@ static int ocfs2_local_free_info(struct super_block *sb, int type)
790 int mark_clean = 1, len; 816 int mark_clean = 1, len;
791 int status; 817 int status;
792 818
793 /* At this point we know there are no more dquots and thus
794 * even if there's some sync in the pdflush queue, it won't
795 * find any dquots and return without doing anything */
796 cancel_delayed_work_sync(&oinfo->dqi_sync_work);
797 iput(oinfo->dqi_gqinode); 819 iput(oinfo->dqi_gqinode);
798 ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock); 820 ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock);
799 ocfs2_lock_res_free(&oinfo->dqi_gqlock); 821 ocfs2_lock_res_free(&oinfo->dqi_gqlock);
@@ -828,7 +850,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type)
828 /* Mark local file as clean */ 850 /* Mark local file as clean */
829 info->dqi_flags |= OLQF_CLEAN; 851 info->dqi_flags |= OLQF_CLEAN;
830 status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], 852 status = ocfs2_modify_bh(sb_dqopt(sb)->files[type],
831 oinfo->dqi_ibh, 853 oinfo->dqi_libh,
832 olq_update_info, 854 olq_update_info,
833 info); 855 info);
834 if (status < 0) { 856 if (status < 0) {
@@ -838,7 +860,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type)
838 860
839out: 861out:
840 ocfs2_inode_unlock(sb_dqopt(sb)->files[type], 1); 862 ocfs2_inode_unlock(sb_dqopt(sb)->files[type], 1);
841 brelse(oinfo->dqi_ibh); 863 brelse(oinfo->dqi_libh);
842 brelse(oinfo->dqi_lqi_bh); 864 brelse(oinfo->dqi_lqi_bh);
843 kfree(oinfo); 865 kfree(oinfo);
844 return 0; 866 return 0;
@@ -866,22 +888,21 @@ static void olq_set_dquot(struct buffer_head *bh, void *private)
866} 888}
867 889
868/* Write dquot to local quota file */ 890/* Write dquot to local quota file */
869static int ocfs2_local_write_dquot(struct dquot *dquot) 891int ocfs2_local_write_dquot(struct dquot *dquot)
870{ 892{
871 struct super_block *sb = dquot->dq_sb; 893 struct super_block *sb = dquot->dq_sb;
872 struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); 894 struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
873 struct buffer_head *bh = NULL; 895 struct buffer_head *bh;
896 struct inode *lqinode = sb_dqopt(sb)->files[dquot->dq_type];
874 int status; 897 int status;
875 898
876 status = ocfs2_read_quota_block(sb_dqopt(sb)->files[dquot->dq_type], 899 status = ocfs2_read_quota_phys_block(lqinode, od->dq_local_phys_blk,
877 ol_dqblk_file_block(sb, od->dq_local_off), 900 &bh);
878 &bh);
879 if (status) { 901 if (status) {
880 mlog_errno(status); 902 mlog_errno(status);
881 goto out; 903 goto out;
882 } 904 }
883 status = ocfs2_modify_bh(sb_dqopt(sb)->files[dquot->dq_type], bh, 905 status = ocfs2_modify_bh(lqinode, bh, olq_set_dquot, od);
884 olq_set_dquot, od);
885 if (status < 0) { 906 if (status < 0) {
886 mlog_errno(status); 907 mlog_errno(status);
887 goto out; 908 goto out;
@@ -981,10 +1002,8 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
981 } 1002 }
982 1003
983 /* Initialize chunk header */ 1004 /* Initialize chunk header */
984 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
985 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, 1005 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
986 &p_blkno, NULL, NULL); 1006 &p_blkno, NULL, NULL);
987 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
988 if (status < 0) { 1007 if (status < 0) {
989 mlog_errno(status); 1008 mlog_errno(status);
990 goto out_trans; 1009 goto out_trans;
@@ -1009,17 +1028,11 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
1009 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) - 1028 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
1010 OCFS2_QBLK_RESERVED_SPACE); 1029 OCFS2_QBLK_RESERVED_SPACE);
1011 unlock_buffer(bh); 1030 unlock_buffer(bh);
1012 status = ocfs2_journal_dirty(handle, bh); 1031 ocfs2_journal_dirty(handle, bh);
1013 if (status < 0) {
1014 mlog_errno(status);
1015 goto out_trans;
1016 }
1017 1032
1018 /* Initialize new block with structures */ 1033 /* Initialize new block with structures */
1019 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1020 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks + 1, 1034 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks + 1,
1021 &p_blkno, NULL, NULL); 1035 &p_blkno, NULL, NULL);
1022 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1023 if (status < 0) { 1036 if (status < 0) {
1024 mlog_errno(status); 1037 mlog_errno(status);
1025 goto out_trans; 1038 goto out_trans;
@@ -1040,11 +1053,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
1040 lock_buffer(dbh); 1053 lock_buffer(dbh);
1041 memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE); 1054 memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE);
1042 unlock_buffer(dbh); 1055 unlock_buffer(dbh);
1043 status = ocfs2_journal_dirty(handle, dbh); 1056 ocfs2_journal_dirty(handle, dbh);
1044 if (status < 0) {
1045 mlog_errno(status);
1046 goto out_trans;
1047 }
1048 1057
1049 /* Update local quotafile info */ 1058 /* Update local quotafile info */
1050 oinfo->dqi_blocks += 2; 1059 oinfo->dqi_blocks += 2;
@@ -1120,10 +1129,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1120 } 1129 }
1121 1130
1122 /* Get buffer from the just added block */ 1131 /* Get buffer from the just added block */
1123 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1124 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, 1132 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
1125 &p_blkno, NULL, NULL); 1133 &p_blkno, NULL, NULL);
1126 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1127 if (status < 0) { 1134 if (status < 0) {
1128 mlog_errno(status); 1135 mlog_errno(status);
1129 goto out; 1136 goto out;
@@ -1155,11 +1162,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1155 lock_buffer(bh); 1162 lock_buffer(bh);
1156 memset(bh->b_data, 0, sb->s_blocksize); 1163 memset(bh->b_data, 0, sb->s_blocksize);
1157 unlock_buffer(bh); 1164 unlock_buffer(bh);
1158 status = ocfs2_journal_dirty(handle, bh); 1165 ocfs2_journal_dirty(handle, bh);
1159 if (status < 0) { 1166
1160 mlog_errno(status);
1161 goto out_trans;
1162 }
1163 /* Update chunk header */ 1167 /* Update chunk header */
1164 status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode), 1168 status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode),
1165 chunk->qc_headerbh, 1169 chunk->qc_headerbh,
@@ -1173,11 +1177,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1173 lock_buffer(chunk->qc_headerbh); 1177 lock_buffer(chunk->qc_headerbh);
1174 le32_add_cpu(&dchunk->dqc_free, ol_quota_entries_per_block(sb)); 1178 le32_add_cpu(&dchunk->dqc_free, ol_quota_entries_per_block(sb));
1175 unlock_buffer(chunk->qc_headerbh); 1179 unlock_buffer(chunk->qc_headerbh);
1176 status = ocfs2_journal_dirty(handle, chunk->qc_headerbh); 1180 ocfs2_journal_dirty(handle, chunk->qc_headerbh);
1177 if (status < 0) { 1181
1178 mlog_errno(status);
1179 goto out_trans;
1180 }
1181 /* Update file header */ 1182 /* Update file header */
1182 oinfo->dqi_blocks++; 1183 oinfo->dqi_blocks++;
1183 status = ocfs2_local_write_info(sb, type); 1184 status = ocfs2_local_write_info(sb, type);
@@ -1210,7 +1211,7 @@ static void olq_alloc_dquot(struct buffer_head *bh, void *private)
1210} 1211}
1211 1212
1212/* Create dquot in the local file for given id */ 1213/* Create dquot in the local file for given id */
1213static int ocfs2_create_local_dquot(struct dquot *dquot) 1214int ocfs2_create_local_dquot(struct dquot *dquot)
1214{ 1215{
1215 struct super_block *sb = dquot->dq_sb; 1216 struct super_block *sb = dquot->dq_sb;
1216 int type = dquot->dq_type; 1217 int type = dquot->dq_type;
@@ -1219,17 +1220,27 @@ static int ocfs2_create_local_dquot(struct dquot *dquot)
1219 struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); 1220 struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
1220 int offset; 1221 int offset;
1221 int status; 1222 int status;
1223 u64 pcount;
1222 1224
1225 down_write(&OCFS2_I(lqinode)->ip_alloc_sem);
1223 chunk = ocfs2_find_free_entry(sb, type, &offset); 1226 chunk = ocfs2_find_free_entry(sb, type, &offset);
1224 if (!chunk) { 1227 if (!chunk) {
1225 chunk = ocfs2_extend_local_quota_file(sb, type, &offset); 1228 chunk = ocfs2_extend_local_quota_file(sb, type, &offset);
1226 if (IS_ERR(chunk)) 1229 if (IS_ERR(chunk)) {
1227 return PTR_ERR(chunk); 1230 status = PTR_ERR(chunk);
1231 goto out;
1232 }
1228 } else if (IS_ERR(chunk)) { 1233 } else if (IS_ERR(chunk)) {
1229 return PTR_ERR(chunk); 1234 status = PTR_ERR(chunk);
1235 goto out;
1230 } 1236 }
1231 od->dq_local_off = ol_dqblk_off(sb, chunk->qc_num, offset); 1237 od->dq_local_off = ol_dqblk_off(sb, chunk->qc_num, offset);
1232 od->dq_chunk = chunk; 1238 od->dq_chunk = chunk;
1239 status = ocfs2_extent_map_get_blocks(lqinode,
1240 ol_dqblk_block(sb, chunk->qc_num, offset),
1241 &od->dq_local_phys_blk,
1242 &pcount,
1243 NULL);
1233 1244
1234 /* Initialize dquot structure on disk */ 1245 /* Initialize dquot structure on disk */
1235 status = ocfs2_local_write_dquot(dquot); 1246 status = ocfs2_local_write_dquot(dquot);
@@ -1246,39 +1257,15 @@ static int ocfs2_create_local_dquot(struct dquot *dquot)
1246 goto out; 1257 goto out;
1247 } 1258 }
1248out: 1259out:
1260 up_write(&OCFS2_I(lqinode)->ip_alloc_sem);
1249 return status; 1261 return status;
1250} 1262}
1251 1263
1252/* Create entry in local file for dquot, load data from the global file */ 1264/*
1253static int ocfs2_local_read_dquot(struct dquot *dquot) 1265 * Release dquot structure from local quota file. ocfs2_release_dquot() has
1254{ 1266 * already started a transaction and written all changes to global quota file
1255 int status; 1267 */
1256 1268int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot)
1257 mlog_entry("id=%u, type=%d\n", dquot->dq_id, dquot->dq_type);
1258
1259 status = ocfs2_global_read_dquot(dquot);
1260 if (status < 0) {
1261 mlog_errno(status);
1262 goto out_err;
1263 }
1264
1265 /* Now create entry in the local quota file */
1266 status = ocfs2_create_local_dquot(dquot);
1267 if (status < 0) {
1268 mlog_errno(status);
1269 goto out_err;
1270 }
1271 mlog_exit(0);
1272 return 0;
1273out_err:
1274 mlog_exit(status);
1275 return status;
1276}
1277
1278/* Release dquot structure from local quota file. ocfs2_release_dquot() has
1279 * already started a transaction and obtained exclusive lock for global
1280 * quota file. */
1281static int ocfs2_local_release_dquot(struct dquot *dquot)
1282{ 1269{
1283 int status; 1270 int status;
1284 int type = dquot->dq_type; 1271 int type = dquot->dq_type;
@@ -1286,15 +1273,6 @@ static int ocfs2_local_release_dquot(struct dquot *dquot)
1286 struct super_block *sb = dquot->dq_sb; 1273 struct super_block *sb = dquot->dq_sb;
1287 struct ocfs2_local_disk_chunk *dchunk; 1274 struct ocfs2_local_disk_chunk *dchunk;
1288 int offset; 1275 int offset;
1289 handle_t *handle = journal_current_handle();
1290
1291 BUG_ON(!handle);
1292 /* First write all local changes to global file */
1293 status = ocfs2_global_release_dquot(dquot);
1294 if (status < 0) {
1295 mlog_errno(status);
1296 goto out;
1297 }
1298 1276
1299 status = ocfs2_journal_access_dq(handle, 1277 status = ocfs2_journal_access_dq(handle,
1300 INODE_CACHE(sb_dqopt(sb)->files[type]), 1278 INODE_CACHE(sb_dqopt(sb)->files[type]),
@@ -1312,12 +1290,8 @@ static int ocfs2_local_release_dquot(struct dquot *dquot)
1312 ocfs2_clear_bit(offset, dchunk->dqc_bitmap); 1290 ocfs2_clear_bit(offset, dchunk->dqc_bitmap);
1313 le32_add_cpu(&dchunk->dqc_free, 1); 1291 le32_add_cpu(&dchunk->dqc_free, 1);
1314 unlock_buffer(od->dq_chunk->qc_headerbh); 1292 unlock_buffer(od->dq_chunk->qc_headerbh);
1315 status = ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh); 1293 ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh);
1316 if (status < 0) { 1294
1317 mlog_errno(status);
1318 goto out;
1319 }
1320 status = 0;
1321out: 1295out:
1322 /* Clear the read bit so that next time someone uses this 1296 /* Clear the read bit so that next time someone uses this
1323 * dquot he reads fresh info from disk and allocates local 1297 * dquot he reads fresh info from disk and allocates local
@@ -1331,9 +1305,6 @@ static const struct quota_format_ops ocfs2_format_ops = {
1331 .read_file_info = ocfs2_local_read_info, 1305 .read_file_info = ocfs2_local_read_info,
1332 .write_file_info = ocfs2_global_write_info, 1306 .write_file_info = ocfs2_global_write_info,
1333 .free_file_info = ocfs2_local_free_info, 1307 .free_file_info = ocfs2_local_free_info,
1334 .read_dqblk = ocfs2_local_read_dquot,
1335 .commit_dqblk = ocfs2_local_write_dquot,
1336 .release_dqblk = ocfs2_local_release_dquot,
1337}; 1308};
1338 1309
1339struct quota_format_type ocfs2_quota_format = { 1310struct quota_format_type ocfs2_quota_format = {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index bd96f6c7877e..4793f36f6518 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -570,7 +570,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
570 struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL; 570 struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL;
571 u16 suballoc_bit_start; 571 u16 suballoc_bit_start;
572 u32 num_got; 572 u32 num_got;
573 u64 first_blkno; 573 u64 suballoc_loc, first_blkno;
574 574
575 BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL); 575 BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
576 576
@@ -596,7 +596,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
596 goto out_commit; 596 goto out_commit;
597 } 597 }
598 598
599 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, 599 ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
600 &suballoc_bit_start, &num_got, 600 &suballoc_bit_start, &num_got,
601 &first_blkno); 601 &first_blkno);
602 if (ret) { 602 if (ret) {
@@ -626,6 +626,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
626 memset(rb, 0, inode->i_sb->s_blocksize); 626 memset(rb, 0, inode->i_sb->s_blocksize);
627 strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); 627 strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
628 rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); 628 rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
629 rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
629 rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 630 rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
630 rb->rf_fs_generation = cpu_to_le32(osb->fs_generation); 631 rb->rf_fs_generation = cpu_to_le32(osb->fs_generation);
631 rb->rf_blkno = cpu_to_le64(first_blkno); 632 rb->rf_blkno = cpu_to_le64(first_blkno);
@@ -790,7 +791,10 @@ int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
790 if (le32_to_cpu(rb->rf_count) == 1) { 791 if (le32_to_cpu(rb->rf_count) == 1) {
791 blk = le64_to_cpu(rb->rf_blkno); 792 blk = le64_to_cpu(rb->rf_blkno);
792 bit = le16_to_cpu(rb->rf_suballoc_bit); 793 bit = le16_to_cpu(rb->rf_suballoc_bit);
793 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 794 if (rb->rf_suballoc_loc)
795 bg_blkno = le64_to_cpu(rb->rf_suballoc_loc);
796 else
797 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
794 798
795 alloc_inode = ocfs2_get_system_file_inode(osb, 799 alloc_inode = ocfs2_get_system_file_inode(osb,
796 EXTENT_ALLOC_SYSTEM_INODE, 800 EXTENT_ALLOC_SYSTEM_INODE,
@@ -1268,9 +1272,7 @@ static int ocfs2_change_refcount_rec(handle_t *handle,
1268 } else if (merge) 1272 } else if (merge)
1269 ocfs2_refcount_rec_merge(rb, index); 1273 ocfs2_refcount_rec_merge(rb, index);
1270 1274
1271 ret = ocfs2_journal_dirty(handle, ref_leaf_bh); 1275 ocfs2_journal_dirty(handle, ref_leaf_bh);
1272 if (ret)
1273 mlog_errno(ret);
1274out: 1276out:
1275 return ret; 1277 return ret;
1276} 1278}
@@ -1284,7 +1286,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
1284 int ret; 1286 int ret;
1285 u16 suballoc_bit_start; 1287 u16 suballoc_bit_start;
1286 u32 num_got; 1288 u32 num_got;
1287 u64 blkno; 1289 u64 suballoc_loc, blkno;
1288 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 1290 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
1289 struct buffer_head *new_bh = NULL; 1291 struct buffer_head *new_bh = NULL;
1290 struct ocfs2_refcount_block *new_rb; 1292 struct ocfs2_refcount_block *new_rb;
@@ -1298,7 +1300,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
1298 goto out; 1300 goto out;
1299 } 1301 }
1300 1302
1301 ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1, 1303 ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
1302 &suballoc_bit_start, &num_got, 1304 &suballoc_bit_start, &num_got,
1303 &blkno); 1305 &blkno);
1304 if (ret) { 1306 if (ret) {
@@ -1330,6 +1332,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
1330 1332
1331 new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; 1333 new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
1332 new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); 1334 new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
1335 new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
1333 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 1336 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1334 new_rb->rf_blkno = cpu_to_le64(blkno); 1337 new_rb->rf_blkno = cpu_to_le64(blkno);
1335 new_rb->rf_cpos = cpu_to_le32(0); 1338 new_rb->rf_cpos = cpu_to_le32(0);
@@ -1524,7 +1527,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
1524 int ret; 1527 int ret;
1525 u16 suballoc_bit_start; 1528 u16 suballoc_bit_start;
1526 u32 num_got, new_cpos; 1529 u32 num_got, new_cpos;
1527 u64 blkno; 1530 u64 suballoc_loc, blkno;
1528 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 1531 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
1529 struct ocfs2_refcount_block *root_rb = 1532 struct ocfs2_refcount_block *root_rb =
1530 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 1533 (struct ocfs2_refcount_block *)ref_root_bh->b_data;
@@ -1548,7 +1551,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
1548 goto out; 1551 goto out;
1549 } 1552 }
1550 1553
1551 ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1, 1554 ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
1552 &suballoc_bit_start, &num_got, 1555 &suballoc_bit_start, &num_got,
1553 &blkno); 1556 &blkno);
1554 if (ret) { 1557 if (ret) {
@@ -1576,6 +1579,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
1576 memset(new_rb, 0, sb->s_blocksize); 1579 memset(new_rb, 0, sb->s_blocksize);
1577 strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); 1580 strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
1578 new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); 1581 new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
1582 new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
1579 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 1583 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1580 new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); 1584 new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
1581 new_rb->rf_blkno = cpu_to_le64(blkno); 1585 new_rb->rf_blkno = cpu_to_le64(blkno);
@@ -1694,7 +1698,7 @@ static int ocfs2_adjust_refcount_rec(handle_t *handle,
1694 * 2 more credits, one for the leaf refcount block, one for 1698 * 2 more credits, one for the leaf refcount block, one for
1695 * the extent block contains the extent rec. 1699 * the extent block contains the extent rec.
1696 */ 1700 */
1697 ret = ocfs2_extend_trans(handle, handle->h_buffer_credits + 2); 1701 ret = ocfs2_extend_trans(handle, 2);
1698 if (ret < 0) { 1702 if (ret < 0) {
1699 mlog_errno(ret); 1703 mlog_errno(ret);
1700 goto out; 1704 goto out;
@@ -1802,11 +1806,7 @@ static int ocfs2_insert_refcount_rec(handle_t *handle,
1802 if (merge) 1806 if (merge)
1803 ocfs2_refcount_rec_merge(rb, index); 1807 ocfs2_refcount_rec_merge(rb, index);
1804 1808
1805 ret = ocfs2_journal_dirty(handle, ref_leaf_bh); 1809 ocfs2_journal_dirty(handle, ref_leaf_bh);
1806 if (ret) {
1807 mlog_errno(ret);
1808 goto out;
1809 }
1810 1810
1811 if (index == 0) { 1811 if (index == 0) {
1812 ret = ocfs2_adjust_refcount_rec(handle, ci, 1812 ret = ocfs2_adjust_refcount_rec(handle, ci,
@@ -1977,9 +1977,7 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
1977 ocfs2_refcount_rec_merge(rb, index); 1977 ocfs2_refcount_rec_merge(rb, index);
1978 } 1978 }
1979 1979
1980 ret = ocfs2_journal_dirty(handle, ref_leaf_bh); 1980 ocfs2_journal_dirty(handle, ref_leaf_bh);
1981 if (ret)
1982 mlog_errno(ret);
1983 1981
1984out: 1982out:
1985 brelse(new_bh); 1983 brelse(new_bh);
@@ -2112,6 +2110,7 @@ static int ocfs2_remove_refcount_extent(handle_t *handle,
2112 */ 2110 */
2113 ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE, 2111 ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE,
2114 le16_to_cpu(rb->rf_suballoc_slot), 2112 le16_to_cpu(rb->rf_suballoc_slot),
2113 le64_to_cpu(rb->rf_suballoc_loc),
2115 le64_to_cpu(rb->rf_blkno), 2114 le64_to_cpu(rb->rf_blkno),
2116 le16_to_cpu(rb->rf_suballoc_bit)); 2115 le16_to_cpu(rb->rf_suballoc_bit));
2117 if (ret) { 2116 if (ret) {
@@ -2516,20 +2515,19 @@ out:
2516 * 2515 *
2517 * Normally the refcount blocks store these refcount should be 2516 * Normally the refcount blocks store these refcount should be
2518 * contiguous also, so that we can get the number easily. 2517 * contiguous also, so that we can get the number easily.
2519 * As for meta_ac, we will at most add split 2 refcount record and 2518 * We will at most add split 2 refcount records and 2 more
2520 * 2 more refcount block, so just check it in a rough way. 2519 * refcount blocks, so just check it in a rough way.
2521 * 2520 *
2522 * Caller must hold refcount tree lock. 2521 * Caller must hold refcount tree lock.
2523 */ 2522 */
2524int ocfs2_prepare_refcount_change_for_del(struct inode *inode, 2523int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
2525 struct buffer_head *di_bh, 2524 u64 refcount_loc,
2526 u64 phys_blkno, 2525 u64 phys_blkno,
2527 u32 clusters, 2526 u32 clusters,
2528 int *credits, 2527 int *credits,
2529 struct ocfs2_alloc_context **meta_ac) 2528 int *ref_blocks)
2530{ 2529{
2531 int ret, ref_blocks = 0; 2530 int ret;
2532 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2533 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2531 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2534 struct buffer_head *ref_root_bh = NULL; 2532 struct buffer_head *ref_root_bh = NULL;
2535 struct ocfs2_refcount_tree *tree; 2533 struct ocfs2_refcount_tree *tree;
@@ -2546,14 +2544,13 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
2546 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); 2544 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
2547 2545
2548 ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), 2546 ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb),
2549 le64_to_cpu(di->i_refcount_loc), &tree); 2547 refcount_loc, &tree);
2550 if (ret) { 2548 if (ret) {
2551 mlog_errno(ret); 2549 mlog_errno(ret);
2552 goto out; 2550 goto out;
2553 } 2551 }
2554 2552
2555 ret = ocfs2_read_refcount_block(&tree->rf_ci, 2553 ret = ocfs2_read_refcount_block(&tree->rf_ci, refcount_loc,
2556 le64_to_cpu(di->i_refcount_loc),
2557 &ref_root_bh); 2554 &ref_root_bh);
2558 if (ret) { 2555 if (ret) {
2559 mlog_errno(ret); 2556 mlog_errno(ret);
@@ -2564,21 +2561,14 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
2564 &tree->rf_ci, 2561 &tree->rf_ci,
2565 ref_root_bh, 2562 ref_root_bh,
2566 start_cpos, clusters, 2563 start_cpos, clusters,
2567 &ref_blocks, credits); 2564 ref_blocks, credits);
2568 if (ret) { 2565 if (ret) {
2569 mlog_errno(ret); 2566 mlog_errno(ret);
2570 goto out; 2567 goto out;
2571 } 2568 }
2572 2569
2573 mlog(0, "reserve new metadata %d, credits = %d\n", 2570 mlog(0, "reserve new metadata %d blocks, credits = %d\n",
2574 ref_blocks, *credits); 2571 *ref_blocks, *credits);
2575
2576 if (ref_blocks) {
2577 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2578 ref_blocks, meta_ac);
2579 if (ret)
2580 mlog_errno(ret);
2581 }
2582 2572
2583out: 2573out:
2584 brelse(ref_root_bh); 2574 brelse(ref_root_bh);
@@ -3040,11 +3030,7 @@ static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
3040 } 3030 }
3041 3031
3042 memcpy(new_bh->b_data, old_bh->b_data, sb->s_blocksize); 3032 memcpy(new_bh->b_data, old_bh->b_data, sb->s_blocksize);
3043 ret = ocfs2_journal_dirty(handle, new_bh); 3033 ocfs2_journal_dirty(handle, new_bh);
3044 if (ret) {
3045 mlog_errno(ret);
3046 break;
3047 }
3048 3034
3049 brelse(new_bh); 3035 brelse(new_bh);
3050 brelse(old_bh); 3036 brelse(old_bh);
@@ -3282,7 +3268,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3282 } else { 3268 } else {
3283 delete = 1; 3269 delete = 1;
3284 3270
3285 ret = __ocfs2_claim_clusters(osb, handle, 3271 ret = __ocfs2_claim_clusters(handle,
3286 context->data_ac, 3272 context->data_ac,
3287 1, set_len, 3273 1, set_len,
3288 &new_bit, &new_len); 3274 &new_bit, &new_len);
@@ -4083,6 +4069,9 @@ static int ocfs2_complete_reflink(struct inode *s_inode,
4083 di->i_attr = s_di->i_attr; 4069 di->i_attr = s_di->i_attr;
4084 4070
4085 if (preserve) { 4071 if (preserve) {
4072 t_inode->i_uid = s_inode->i_uid;
4073 t_inode->i_gid = s_inode->i_gid;
4074 t_inode->i_mode = s_inode->i_mode;
4086 di->i_uid = s_di->i_uid; 4075 di->i_uid = s_di->i_uid;
4087 di->i_gid = s_di->i_gid; 4076 di->i_gid = s_di->i_gid;
4088 di->i_mode = s_di->i_mode; 4077 di->i_mode = s_di->i_mode;
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index c1d19b1d3ecc..9983ba1570e2 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -47,11 +47,11 @@ int ocfs2_decrease_refcount(struct inode *inode,
47 struct ocfs2_cached_dealloc_ctxt *dealloc, 47 struct ocfs2_cached_dealloc_ctxt *dealloc,
48 int delete); 48 int delete);
49int ocfs2_prepare_refcount_change_for_del(struct inode *inode, 49int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
50 struct buffer_head *di_bh, 50 u64 refcount_loc,
51 u64 phys_blkno, 51 u64 phys_blkno,
52 u32 clusters, 52 u32 clusters,
53 int *credits, 53 int *credits,
54 struct ocfs2_alloc_context **meta_ac); 54 int *ref_blocks);
55int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh, 55int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh,
56 u32 cpos, u32 write_len, u32 max_cpos); 56 u32 cpos, u32 write_len, u32 max_cpos);
57 57
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
new file mode 100644
index 000000000000..40650021fc24
--- /dev/null
+++ b/fs/ocfs2/reservations.c
@@ -0,0 +1,847 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * reservations.c
5 *
6 * Allocation reservations implementation
7 *
8 * Some code borrowed from fs/ext3/balloc.c and is:
9 *
10 * Copyright (C) 1992, 1993, 1994, 1995
11 * Remy Card (card@masi.ibp.fr)
12 * Laboratoire MASI - Institut Blaise Pascal
13 * Universite Pierre et Marie Curie (Paris VI)
14 *
15 * The rest is copyright (C) 2010 Novell. All rights reserved.
16 *
17 * This program is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU General Public
19 * License version 2 as published by the Free Software Foundation.
20 *
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 * General Public License for more details.
25 */
26
27#include <linux/fs.h>
28#include <linux/types.h>
29#include <linux/slab.h>
30#include <linux/highmem.h>
31#include <linux/bitops.h>
32#include <linux/list.h>
33
34#define MLOG_MASK_PREFIX ML_RESERVATIONS
35#include <cluster/masklog.h>
36
37#include "ocfs2.h"
38
39#ifdef CONFIG_OCFS2_DEBUG_FS
40#define OCFS2_CHECK_RESERVATIONS
41#endif
42
43DEFINE_SPINLOCK(resv_lock);
44
45#define OCFS2_MIN_RESV_WINDOW_BITS 8
46#define OCFS2_MAX_RESV_WINDOW_BITS 1024
47
48int ocfs2_dir_resv_allowed(struct ocfs2_super *osb)
49{
50 return (osb->osb_resv_level && osb->osb_dir_resv_level);
51}
52
53static unsigned int ocfs2_resv_window_bits(struct ocfs2_reservation_map *resmap,
54 struct ocfs2_alloc_reservation *resv)
55{
56 struct ocfs2_super *osb = resmap->m_osb;
57 unsigned int bits;
58
59 if (!(resv->r_flags & OCFS2_RESV_FLAG_DIR)) {
60 /* 8, 16, 32, 64, 128, 256, 512, 1024 */
61 bits = 4 << osb->osb_resv_level;
62 } else {
63 bits = 4 << osb->osb_dir_resv_level;
64 }
65 return bits;
66}
67
68static inline unsigned int ocfs2_resv_end(struct ocfs2_alloc_reservation *resv)
69{
70 if (resv->r_len)
71 return resv->r_start + resv->r_len - 1;
72 return resv->r_start;
73}
74
75static inline int ocfs2_resv_empty(struct ocfs2_alloc_reservation *resv)
76{
77 return !!(resv->r_len == 0);
78}
79
80static inline int ocfs2_resmap_disabled(struct ocfs2_reservation_map *resmap)
81{
82 if (resmap->m_osb->osb_resv_level == 0)
83 return 1;
84 return 0;
85}
86
87static void ocfs2_dump_resv(struct ocfs2_reservation_map *resmap)
88{
89 struct ocfs2_super *osb = resmap->m_osb;
90 struct rb_node *node;
91 struct ocfs2_alloc_reservation *resv;
92 int i = 0;
93
94 mlog(ML_NOTICE, "Dumping resmap for device %s. Bitmap length: %u\n",
95 osb->dev_str, resmap->m_bitmap_len);
96
97 node = rb_first(&resmap->m_reservations);
98 while (node) {
99 resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
100
101 mlog(ML_NOTICE, "start: %u\tend: %u\tlen: %u\tlast_start: %u"
102 "\tlast_len: %u\n", resv->r_start,
103 ocfs2_resv_end(resv), resv->r_len, resv->r_last_start,
104 resv->r_last_len);
105
106 node = rb_next(node);
107 i++;
108 }
109
110 mlog(ML_NOTICE, "%d reservations found. LRU follows\n", i);
111
112 i = 0;
113 list_for_each_entry(resv, &resmap->m_lru, r_lru) {
114 mlog(ML_NOTICE, "LRU(%d) start: %u\tend: %u\tlen: %u\t"
115 "last_start: %u\tlast_len: %u\n", i, resv->r_start,
116 ocfs2_resv_end(resv), resv->r_len, resv->r_last_start,
117 resv->r_last_len);
118
119 i++;
120 }
121}
122
123#ifdef OCFS2_CHECK_RESERVATIONS
124static int ocfs2_validate_resmap_bits(struct ocfs2_reservation_map *resmap,
125 int i,
126 struct ocfs2_alloc_reservation *resv)
127{
128 char *disk_bitmap = resmap->m_disk_bitmap;
129 unsigned int start = resv->r_start;
130 unsigned int end = ocfs2_resv_end(resv);
131
132 while (start <= end) {
133 if (ocfs2_test_bit(start, disk_bitmap)) {
134 mlog(ML_ERROR,
135 "reservation %d covers an allocated area "
136 "starting at bit %u!\n", i, start);
137 return 1;
138 }
139
140 start++;
141 }
142 return 0;
143}
144
145static void ocfs2_check_resmap(struct ocfs2_reservation_map *resmap)
146{
147 unsigned int off = 0;
148 int i = 0;
149 struct rb_node *node;
150 struct ocfs2_alloc_reservation *resv;
151
152 node = rb_first(&resmap->m_reservations);
153 while (node) {
154 resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
155
156 if (i > 0 && resv->r_start <= off) {
157 mlog(ML_ERROR, "reservation %d has bad start off!\n",
158 i);
159 goto bad;
160 }
161
162 if (resv->r_len == 0) {
163 mlog(ML_ERROR, "reservation %d has no length!\n",
164 i);
165 goto bad;
166 }
167
168 if (resv->r_start > ocfs2_resv_end(resv)) {
169 mlog(ML_ERROR, "reservation %d has invalid range!\n",
170 i);
171 goto bad;
172 }
173
174 if (ocfs2_resv_end(resv) >= resmap->m_bitmap_len) {
175 mlog(ML_ERROR, "reservation %d extends past bitmap!\n",
176 i);
177 goto bad;
178 }
179
180 if (ocfs2_validate_resmap_bits(resmap, i, resv))
181 goto bad;
182
183 off = ocfs2_resv_end(resv);
184 node = rb_next(node);
185
186 i++;
187 }
188 return;
189
190bad:
191 ocfs2_dump_resv(resmap);
192 BUG();
193}
194#else
195static inline void ocfs2_check_resmap(struct ocfs2_reservation_map *resmap)
196{
197
198}
199#endif
200
201void ocfs2_resv_init_once(struct ocfs2_alloc_reservation *resv)
202{
203 memset(resv, 0, sizeof(*resv));
204 INIT_LIST_HEAD(&resv->r_lru);
205}
206
207void ocfs2_resv_set_type(struct ocfs2_alloc_reservation *resv,
208 unsigned int flags)
209{
210 BUG_ON(flags & ~OCFS2_RESV_TYPES);
211
212 resv->r_flags |= flags;
213}
214
215int ocfs2_resmap_init(struct ocfs2_super *osb,
216 struct ocfs2_reservation_map *resmap)
217{
218 memset(resmap, 0, sizeof(*resmap));
219
220 resmap->m_osb = osb;
221 resmap->m_reservations = RB_ROOT;
222 /* m_bitmap_len is initialized to zero by the above memset. */
223 INIT_LIST_HEAD(&resmap->m_lru);
224
225 return 0;
226}
227
228static void ocfs2_resv_mark_lru(struct ocfs2_reservation_map *resmap,
229 struct ocfs2_alloc_reservation *resv)
230{
231 assert_spin_locked(&resv_lock);
232
233 if (!list_empty(&resv->r_lru))
234 list_del_init(&resv->r_lru);
235
236 list_add_tail(&resv->r_lru, &resmap->m_lru);
237}
238
239static void __ocfs2_resv_trunc(struct ocfs2_alloc_reservation *resv)
240{
241 resv->r_len = 0;
242 resv->r_start = 0;
243}
244
245static void ocfs2_resv_remove(struct ocfs2_reservation_map *resmap,
246 struct ocfs2_alloc_reservation *resv)
247{
248 if (resv->r_flags & OCFS2_RESV_FLAG_INUSE) {
249 list_del_init(&resv->r_lru);
250 rb_erase(&resv->r_node, &resmap->m_reservations);
251 resv->r_flags &= ~OCFS2_RESV_FLAG_INUSE;
252 }
253}
254
255static void __ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
256 struct ocfs2_alloc_reservation *resv)
257{
258 assert_spin_locked(&resv_lock);
259
260 __ocfs2_resv_trunc(resv);
261 /*
262 * last_len and last_start no longer make sense if
263 * we're changing the range of our allocations.
264 */
265 resv->r_last_len = resv->r_last_start = 0;
266
267 ocfs2_resv_remove(resmap, resv);
268}
269
270/* does nothing if 'resv' is null */
271void ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
272 struct ocfs2_alloc_reservation *resv)
273{
274 if (resv) {
275 spin_lock(&resv_lock);
276 __ocfs2_resv_discard(resmap, resv);
277 spin_unlock(&resv_lock);
278 }
279}
280
281static void ocfs2_resmap_clear_all_resv(struct ocfs2_reservation_map *resmap)
282{
283 struct rb_node *node;
284 struct ocfs2_alloc_reservation *resv;
285
286 assert_spin_locked(&resv_lock);
287
288 while ((node = rb_last(&resmap->m_reservations)) != NULL) {
289 resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
290
291 __ocfs2_resv_discard(resmap, resv);
292 }
293}
294
295void ocfs2_resmap_restart(struct ocfs2_reservation_map *resmap,
296 unsigned int clen, char *disk_bitmap)
297{
298 if (ocfs2_resmap_disabled(resmap))
299 return;
300
301 spin_lock(&resv_lock);
302
303 ocfs2_resmap_clear_all_resv(resmap);
304 resmap->m_bitmap_len = clen;
305 resmap->m_disk_bitmap = disk_bitmap;
306
307 spin_unlock(&resv_lock);
308}
309
310void ocfs2_resmap_uninit(struct ocfs2_reservation_map *resmap)
311{
312 /* Does nothing for now. Keep this around for API symmetry */
313}
314
315static void ocfs2_resv_insert(struct ocfs2_reservation_map *resmap,
316 struct ocfs2_alloc_reservation *new)
317{
318 struct rb_root *root = &resmap->m_reservations;
319 struct rb_node *parent = NULL;
320 struct rb_node **p = &root->rb_node;
321 struct ocfs2_alloc_reservation *tmp;
322
323 assert_spin_locked(&resv_lock);
324
325 mlog(0, "Insert reservation start: %u len: %u\n", new->r_start,
326 new->r_len);
327
328 while (*p) {
329 parent = *p;
330
331 tmp = rb_entry(parent, struct ocfs2_alloc_reservation, r_node);
332
333 if (new->r_start < tmp->r_start) {
334 p = &(*p)->rb_left;
335
336 /*
337 * This is a good place to check for
338 * overlapping reservations.
339 */
340 BUG_ON(ocfs2_resv_end(new) >= tmp->r_start);
341 } else if (new->r_start > ocfs2_resv_end(tmp)) {
342 p = &(*p)->rb_right;
343 } else {
344 /* This should never happen! */
345 mlog(ML_ERROR, "Duplicate reservation window!\n");
346 BUG();
347 }
348 }
349
350 rb_link_node(&new->r_node, parent, p);
351 rb_insert_color(&new->r_node, root);
352 new->r_flags |= OCFS2_RESV_FLAG_INUSE;
353
354 ocfs2_resv_mark_lru(resmap, new);
355
356 ocfs2_check_resmap(resmap);
357}
358
359/**
360 * ocfs2_find_resv_lhs() - find the window which contains goal
361 * @resmap: reservation map to search
362 * @goal: which bit to search for
363 *
364 * If a window containing that goal is not found, we return the window
365 * which comes before goal. Returns NULL on empty rbtree or no window
366 * before goal.
367 */
368static struct ocfs2_alloc_reservation *
369ocfs2_find_resv_lhs(struct ocfs2_reservation_map *resmap, unsigned int goal)
370{
371 struct ocfs2_alloc_reservation *resv = NULL;
372 struct ocfs2_alloc_reservation *prev_resv = NULL;
373 struct rb_node *node = resmap->m_reservations.rb_node;
374
375 assert_spin_locked(&resv_lock);
376
377 if (!node)
378 return NULL;
379
380 node = rb_first(&resmap->m_reservations);
381 while (node) {
382 resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
383
384 if (resv->r_start <= goal && ocfs2_resv_end(resv) >= goal)
385 break;
386
387 /* Check if we overshot the reservation just before goal? */
388 if (resv->r_start > goal) {
389 resv = prev_resv;
390 break;
391 }
392
393 prev_resv = resv;
394 node = rb_next(node);
395 }
396
397 return resv;
398}
399
400/*
401 * We are given a range within the bitmap, which corresponds to a gap
402 * inside the reservations tree (search_start, search_len). The range
403 * can be anything from the whole bitmap, to a gap between
404 * reservations.
405 *
406 * The start value of *rstart is insignificant.
407 *
408 * This function searches the bitmap range starting at search_start
409 * with length search_len for a set of contiguous free bits. We try
410 * to find up to 'wanted' bits, but can sometimes return less.
411 *
412 * Returns the length of allocation, 0 if no free bits are found.
413 *
414 * *cstart and *clen will also be populated with the result.
415 */
416static int ocfs2_resmap_find_free_bits(struct ocfs2_reservation_map *resmap,
417 unsigned int wanted,
418 unsigned int search_start,
419 unsigned int search_len,
420 unsigned int *rstart,
421 unsigned int *rlen)
422{
423 void *bitmap = resmap->m_disk_bitmap;
424 unsigned int best_start, best_len = 0;
425 int offset, start, found;
426
427 mlog(0, "Find %u bits within range (%u, len %u) resmap len: %u\n",
428 wanted, search_start, search_len, resmap->m_bitmap_len);
429
430 found = best_start = best_len = 0;
431
432 start = search_start;
433 while ((offset = ocfs2_find_next_zero_bit(bitmap, resmap->m_bitmap_len,
434 start)) != -1) {
435 /* Search reached end of the region */
436 if (offset >= (search_start + search_len))
437 break;
438
439 if (offset == start) {
440 /* we found a zero */
441 found++;
442 /* move start to the next bit to test */
443 start++;
444 } else {
445 /* got a zero after some ones */
446 found = 1;
447 start = offset + 1;
448 }
449 if (found > best_len) {
450 best_len = found;
451 best_start = start - found;
452 }
453
454 if (found >= wanted)
455 break;
456 }
457
458 if (best_len == 0)
459 return 0;
460
461 if (best_len >= wanted)
462 best_len = wanted;
463
464 *rlen = best_len;
465 *rstart = best_start;
466
467 mlog(0, "Found start: %u len: %u\n", best_start, best_len);
468
469 return *rlen;
470}
471
472static void __ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
473 struct ocfs2_alloc_reservation *resv,
474 unsigned int goal, unsigned int wanted)
475{
476 struct rb_root *root = &resmap->m_reservations;
477 unsigned int gap_start, gap_end, gap_len;
478 struct ocfs2_alloc_reservation *prev_resv, *next_resv;
479 struct rb_node *prev, *next;
480 unsigned int cstart, clen;
481 unsigned int best_start = 0, best_len = 0;
482
483 /*
484 * Nasty cases to consider:
485 *
486 * - rbtree is empty
487 * - our window should be first in all reservations
488 * - our window should be last in all reservations
489 * - need to make sure we don't go past end of bitmap
490 */
491
492 mlog(0, "resv start: %u resv end: %u goal: %u wanted: %u\n",
493 resv->r_start, ocfs2_resv_end(resv), goal, wanted);
494
495 assert_spin_locked(&resv_lock);
496
497 if (RB_EMPTY_ROOT(root)) {
498 /*
499 * Easiest case - empty tree. We can just take
500 * whatever window of free bits we want.
501 */
502
503 mlog(0, "Empty root\n");
504
505 clen = ocfs2_resmap_find_free_bits(resmap, wanted, goal,
506 resmap->m_bitmap_len - goal,
507 &cstart, &clen);
508
509 /*
510 * This should never happen - the local alloc window
511 * will always have free bits when we're called.
512 */
513 BUG_ON(goal == 0 && clen == 0);
514
515 if (clen == 0)
516 return;
517
518 resv->r_start = cstart;
519 resv->r_len = clen;
520
521 ocfs2_resv_insert(resmap, resv);
522 return;
523 }
524
525 prev_resv = ocfs2_find_resv_lhs(resmap, goal);
526
527 if (prev_resv == NULL) {
528 mlog(0, "Goal on LHS of leftmost window\n");
529
530 /*
531 * A NULL here means that the search code couldn't
532 * find a window that starts before goal.
533 *
534 * However, we can take the first window after goal,
535 * which is also by definition, the leftmost window in
536 * the entire tree. If we can find free bits in the
537 * gap between goal and the LHS window, then the
538 * reservation can safely be placed there.
539 *
540 * Otherwise we fall back to a linear search, checking
541 * the gaps in between windows for a place to
542 * allocate.
543 */
544
545 next = rb_first(root);
546 next_resv = rb_entry(next, struct ocfs2_alloc_reservation,
547 r_node);
548
549 /*
550 * The search should never return such a window. (see
551 * comment above
552 */
553 if (next_resv->r_start <= goal) {
554 mlog(ML_ERROR, "goal: %u next_resv: start %u len %u\n",
555 goal, next_resv->r_start, next_resv->r_len);
556 ocfs2_dump_resv(resmap);
557 BUG();
558 }
559
560 clen = ocfs2_resmap_find_free_bits(resmap, wanted, goal,
561 next_resv->r_start - goal,
562 &cstart, &clen);
563 if (clen) {
564 best_len = clen;
565 best_start = cstart;
566 if (best_len == wanted)
567 goto out_insert;
568 }
569
570 prev_resv = next_resv;
571 next_resv = NULL;
572 }
573
574 prev = &prev_resv->r_node;
575
576 /* Now we do a linear search for a window, starting at 'prev_rsv' */
577 while (1) {
578 next = rb_next(prev);
579 if (next) {
580 mlog(0, "One more resv found in linear search\n");
581 next_resv = rb_entry(next,
582 struct ocfs2_alloc_reservation,
583 r_node);
584
585 gap_start = ocfs2_resv_end(prev_resv) + 1;
586 gap_end = next_resv->r_start - 1;
587 gap_len = gap_end - gap_start + 1;
588 } else {
589 mlog(0, "No next node\n");
590 /*
591 * We're at the rightmost edge of the
592 * tree. See if a reservation between this
593 * window and the end of the bitmap will work.
594 */
595 gap_start = ocfs2_resv_end(prev_resv) + 1;
596 gap_len = resmap->m_bitmap_len - gap_start;
597 gap_end = resmap->m_bitmap_len - 1;
598 }
599
600 /*
601 * No need to check this gap if we have already found
602 * a larger region of free bits.
603 */
604 if (gap_len <= best_len)
605 goto next_resv;
606
607 clen = ocfs2_resmap_find_free_bits(resmap, wanted, gap_start,
608 gap_len, &cstart, &clen);
609 if (clen == wanted) {
610 best_len = clen;
611 best_start = cstart;
612 goto out_insert;
613 } else if (clen > best_len) {
614 best_len = clen;
615 best_start = cstart;
616 }
617
618next_resv:
619 if (!next)
620 break;
621
622 prev = next;
623 prev_resv = rb_entry(prev, struct ocfs2_alloc_reservation,
624 r_node);
625 }
626
627out_insert:
628 if (best_len) {
629 resv->r_start = best_start;
630 resv->r_len = best_len;
631 ocfs2_resv_insert(resmap, resv);
632 }
633}
634
635static void ocfs2_cannibalize_resv(struct ocfs2_reservation_map *resmap,
636 struct ocfs2_alloc_reservation *resv,
637 unsigned int wanted)
638{
639 struct ocfs2_alloc_reservation *lru_resv;
640 int tmpwindow = !!(resv->r_flags & OCFS2_RESV_FLAG_TMP);
641 unsigned int min_bits;
642
643 if (!tmpwindow)
644 min_bits = ocfs2_resv_window_bits(resmap, resv) >> 1;
645 else
646 min_bits = wanted; /* We at know the temp window will use all
647 * of these bits */
648
649 /*
650 * Take the first reservation off the LRU as our 'target'. We
651 * don't try to be smart about it. There might be a case for
652 * searching based on size but I don't have enough data to be
653 * sure. --Mark (3/16/2010)
654 */
655 lru_resv = list_first_entry(&resmap->m_lru,
656 struct ocfs2_alloc_reservation, r_lru);
657
658 mlog(0, "lru resv: start: %u len: %u end: %u\n", lru_resv->r_start,
659 lru_resv->r_len, ocfs2_resv_end(lru_resv));
660
661 /*
662 * Cannibalize (some or all) of the target reservation and
663 * feed it to the current window.
664 */
665 if (lru_resv->r_len <= min_bits) {
666 /*
667 * Discard completely if size is less than or equal to a
668 * reasonable threshold - 50% of window bits for non temporary
669 * windows.
670 */
671 resv->r_start = lru_resv->r_start;
672 resv->r_len = lru_resv->r_len;
673
674 __ocfs2_resv_discard(resmap, lru_resv);
675 } else {
676 unsigned int shrink;
677 if (tmpwindow)
678 shrink = min_bits;
679 else
680 shrink = lru_resv->r_len / 2;
681
682 lru_resv->r_len -= shrink;
683
684 resv->r_start = ocfs2_resv_end(lru_resv) + 1;
685 resv->r_len = shrink;
686 }
687
688 mlog(0, "Reservation now looks like: r_start: %u r_end: %u "
689 "r_len: %u r_last_start: %u r_last_len: %u\n",
690 resv->r_start, ocfs2_resv_end(resv), resv->r_len,
691 resv->r_last_start, resv->r_last_len);
692
693 ocfs2_resv_insert(resmap, resv);
694}
695
696static void ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
697 struct ocfs2_alloc_reservation *resv,
698 unsigned int wanted)
699{
700 unsigned int goal = 0;
701
702 BUG_ON(!ocfs2_resv_empty(resv));
703
704 /*
705 * Begin by trying to get a window as close to the previous
706 * one as possible. Using the most recent allocation as a
707 * start goal makes sense.
708 */
709 if (resv->r_last_len) {
710 goal = resv->r_last_start + resv->r_last_len;
711 if (goal >= resmap->m_bitmap_len)
712 goal = 0;
713 }
714
715 __ocfs2_resv_find_window(resmap, resv, goal, wanted);
716
717 /* Search from last alloc didn't work, try once more from beginning. */
718 if (ocfs2_resv_empty(resv) && goal != 0)
719 __ocfs2_resv_find_window(resmap, resv, 0, wanted);
720
721 if (ocfs2_resv_empty(resv)) {
722 /*
723 * Still empty? Pull oldest one off the LRU, remove it from
724 * tree, put this one in it's place.
725 */
726 ocfs2_cannibalize_resv(resmap, resv, wanted);
727 }
728
729 BUG_ON(ocfs2_resv_empty(resv));
730}
731
732int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
733 struct ocfs2_alloc_reservation *resv,
734 int *cstart, int *clen)
735{
736 unsigned int wanted = *clen;
737
738 if (resv == NULL || ocfs2_resmap_disabled(resmap))
739 return -ENOSPC;
740
741 spin_lock(&resv_lock);
742
743 /*
744 * We don't want to over-allocate for temporary
745 * windows. Otherwise, we run the risk of fragmenting the
746 * allocation space.
747 */
748 wanted = ocfs2_resv_window_bits(resmap, resv);
749 if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen)
750 wanted = *clen;
751
752 if (ocfs2_resv_empty(resv)) {
753 mlog(0, "empty reservation, find new window\n");
754
755 /*
756 * Try to get a window here. If it works, we must fall
757 * through and test the bitmap . This avoids some
758 * ping-ponging of windows due to non-reserved space
759 * being allocation before we initialize a window for
760 * that inode.
761 */
762 ocfs2_resv_find_window(resmap, resv, wanted);
763 }
764
765 BUG_ON(ocfs2_resv_empty(resv));
766
767 *cstart = resv->r_start;
768 *clen = resv->r_len;
769
770 spin_unlock(&resv_lock);
771 return 0;
772}
773
774static void
775 ocfs2_adjust_resv_from_alloc(struct ocfs2_reservation_map *resmap,
776 struct ocfs2_alloc_reservation *resv,
777 unsigned int start, unsigned int end)
778{
779 unsigned int rhs = 0;
780 unsigned int old_end = ocfs2_resv_end(resv);
781
782 BUG_ON(start != resv->r_start || old_end < end);
783
784 /*
785 * Completely used? We can remove it then.
786 */
787 if (old_end == end) {
788 __ocfs2_resv_discard(resmap, resv);
789 return;
790 }
791
792 rhs = old_end - end;
793
794 /*
795 * This should have been trapped above.
796 */
797 BUG_ON(rhs == 0);
798
799 resv->r_start = end + 1;
800 resv->r_len = old_end - resv->r_start + 1;
801}
802
803void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap,
804 struct ocfs2_alloc_reservation *resv,
805 u32 cstart, u32 clen)
806{
807 unsigned int cend = cstart + clen - 1;
808
809 if (resmap == NULL || ocfs2_resmap_disabled(resmap))
810 return;
811
812 if (resv == NULL)
813 return;
814
815 BUG_ON(cstart != resv->r_start);
816
817 spin_lock(&resv_lock);
818
819 mlog(0, "claim bits: cstart: %u cend: %u clen: %u r_start: %u "
820 "r_end: %u r_len: %u, r_last_start: %u r_last_len: %u\n",
821 cstart, cend, clen, resv->r_start, ocfs2_resv_end(resv),
822 resv->r_len, resv->r_last_start, resv->r_last_len);
823
824 BUG_ON(cstart < resv->r_start);
825 BUG_ON(cstart > ocfs2_resv_end(resv));
826 BUG_ON(cend > ocfs2_resv_end(resv));
827
828 ocfs2_adjust_resv_from_alloc(resmap, resv, cstart, cend);
829 resv->r_last_start = cstart;
830 resv->r_last_len = clen;
831
832 /*
833 * May have been discarded above from
834 * ocfs2_adjust_resv_from_alloc().
835 */
836 if (!ocfs2_resv_empty(resv))
837 ocfs2_resv_mark_lru(resmap, resv);
838
839 mlog(0, "Reservation now looks like: r_start: %u r_end: %u "
840 "r_len: %u r_last_start: %u r_last_len: %u\n",
841 resv->r_start, ocfs2_resv_end(resv), resv->r_len,
842 resv->r_last_start, resv->r_last_len);
843
844 ocfs2_check_resmap(resmap);
845
846 spin_unlock(&resv_lock);
847}
diff --git a/fs/ocfs2/reservations.h b/fs/ocfs2/reservations.h
new file mode 100644
index 000000000000..1e49cc29d06c
--- /dev/null
+++ b/fs/ocfs2/reservations.h
@@ -0,0 +1,159 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * reservations.h
5 *
6 * Allocation reservations function prototypes and structures.
7 *
8 * Copyright (C) 2010 Novell. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License version 2 as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 */
19
20#ifndef OCFS2_RESERVATIONS_H
21#define OCFS2_RESERVATIONS_H
22
23#include <linux/rbtree.h>
24
25#define OCFS2_DEFAULT_RESV_LEVEL 2
26#define OCFS2_MAX_RESV_LEVEL 9
27#define OCFS2_MIN_RESV_LEVEL 0
28
29struct ocfs2_alloc_reservation {
30 struct rb_node r_node;
31
32 unsigned int r_start; /* Begining of current window */
33 unsigned int r_len; /* Length of the window */
34
35 unsigned int r_last_len; /* Length of most recent alloc */
36 unsigned int r_last_start; /* Start of most recent alloc */
37 struct list_head r_lru; /* LRU list head */
38
39 unsigned int r_flags;
40};
41
42#define OCFS2_RESV_FLAG_INUSE 0x01 /* Set when r_node is part of a btree */
43#define OCFS2_RESV_FLAG_TMP 0x02 /* Temporary reservation, will be
44 * destroyed immedately after use */
45#define OCFS2_RESV_FLAG_DIR 0x04 /* Reservation is for an unindexed
46 * directory btree */
47
48struct ocfs2_reservation_map {
49 struct rb_root m_reservations;
50 char *m_disk_bitmap;
51
52 struct ocfs2_super *m_osb;
53
54 /* The following are not initialized to meaningful values until a disk
55 * bitmap is provided. */
56 u32 m_bitmap_len; /* Number of valid
57 * bits available */
58
59 struct list_head m_lru; /* LRU of reservations
60 * structures. */
61
62};
63
64void ocfs2_resv_init_once(struct ocfs2_alloc_reservation *resv);
65
66#define OCFS2_RESV_TYPES (OCFS2_RESV_FLAG_TMP|OCFS2_RESV_FLAG_DIR)
67void ocfs2_resv_set_type(struct ocfs2_alloc_reservation *resv,
68 unsigned int flags);
69
70int ocfs2_dir_resv_allowed(struct ocfs2_super *osb);
71
72/**
73 * ocfs2_resv_discard() - truncate a reservation
74 * @resmap:
75 * @resv: the reservation to truncate.
76 *
77 * After this function is called, the reservation will be empty, and
78 * unlinked from the rbtree.
79 */
80void ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
81 struct ocfs2_alloc_reservation *resv);
82
83
84/**
85 * ocfs2_resmap_init() - Initialize fields of a reservations bitmap
86 * @resmap: struct ocfs2_reservation_map to initialize
87 * @obj: unused for now
88 * @ops: unused for now
89 * @max_bitmap_bytes: Maximum size of the bitmap (typically blocksize)
90 *
91 * Only possible return value other than '0' is -ENOMEM for failure to
92 * allocation mirror bitmap.
93 */
94int ocfs2_resmap_init(struct ocfs2_super *osb,
95 struct ocfs2_reservation_map *resmap);
96
97/**
98 * ocfs2_resmap_restart() - "restart" a reservation bitmap
99 * @resmap: reservations bitmap
100 * @clen: Number of valid bits in the bitmap
101 * @disk_bitmap: the disk bitmap this resmap should refer to.
102 *
103 * Re-initialize the parameters of a reservation bitmap. This is
104 * useful for local alloc window slides.
105 *
106 * This function will call ocfs2_trunc_resv against all existing
107 * reservations. A future version will recalculate existing
108 * reservations based on the new bitmap.
109 */
110void ocfs2_resmap_restart(struct ocfs2_reservation_map *resmap,
111 unsigned int clen, char *disk_bitmap);
112
113/**
114 * ocfs2_resmap_uninit() - uninitialize a reservation bitmap structure
115 * @resmap: the struct ocfs2_reservation_map to uninitialize
116 */
117void ocfs2_resmap_uninit(struct ocfs2_reservation_map *resmap);
118
119/**
120 * ocfs2_resmap_resv_bits() - Return still-valid reservation bits
121 * @resmap: reservations bitmap
122 * @resv: reservation to base search from
123 * @cstart: start of proposed allocation
124 * @clen: length (in clusters) of proposed allocation
125 *
126 * Using the reservation data from resv, this function will compare
127 * resmap and resmap->m_disk_bitmap to determine what part (if any) of
128 * the reservation window is still clear to use. If resv is empty,
129 * this function will try to allocate a window for it.
130 *
131 * On success, zero is returned and the valid allocation area is set in cstart
132 * and clen.
133 *
134 * Returns -ENOSPC if reservations are disabled.
135 */
136int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
137 struct ocfs2_alloc_reservation *resv,
138 int *cstart, int *clen);
139
140/**
141 * ocfs2_resmap_claimed_bits() - Tell the reservation code that bits were used.
142 * @resmap: reservations bitmap
143 * @resv: optional reservation to recalulate based on new bitmap
144 * @cstart: start of allocation in clusters
145 * @clen: end of allocation in clusters.
146 *
147 * Tell the reservation code that bits were used to fulfill allocation in
148 * resmap. The bits don't have to have been part of any existing
149 * reservation. But we must always call this function when bits are claimed.
150 * Internally, the reservations code will use this information to mark the
151 * reservations bitmap. If resv is passed, it's next allocation window will be
152 * calculated. It also expects that 'cstart' is the same as we passed back
153 * from ocfs2_resmap_resv_bits().
154 */
155void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap,
156 struct ocfs2_alloc_reservation *resv,
157 u32 cstart, u32 clen);
158
159#endif /* OCFS2_RESERVATIONS_H */
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 3c3d673a4d20..dacd553d8617 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -134,11 +134,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
134 le16_add_cpu(&group->bg_free_bits_count, -1 * backups); 134 le16_add_cpu(&group->bg_free_bits_count, -1 * backups);
135 } 135 }
136 136
137 ret = ocfs2_journal_dirty(handle, group_bh); 137 ocfs2_journal_dirty(handle, group_bh);
138 if (ret < 0) {
139 mlog_errno(ret);
140 goto out_rollback;
141 }
142 138
143 /* update the inode accordingly. */ 139 /* update the inode accordingly. */
144 ret = ocfs2_journal_access_di(handle, INODE_CACHE(bm_inode), bm_bh, 140 ret = ocfs2_journal_access_di(handle, INODE_CACHE(bm_inode), bm_bh,
@@ -319,7 +315,8 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
319 BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); 315 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
320 316
321 if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != 317 if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
322 ocfs2_group_bitmap_size(osb->sb) * 8) { 318 ocfs2_group_bitmap_size(osb->sb, 0,
319 osb->s_feature_incompat) * 8) {
323 mlog(ML_ERROR, "The disk is too old and small. " 320 mlog(ML_ERROR, "The disk is too old and small. "
324 "Force to do offline resize."); 321 "Force to do offline resize.");
325 ret = -EINVAL; 322 ret = -EINVAL;
@@ -500,7 +497,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
500 fe = (struct ocfs2_dinode *)main_bm_bh->b_data; 497 fe = (struct ocfs2_dinode *)main_bm_bh->b_data;
501 498
502 if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != 499 if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
503 ocfs2_group_bitmap_size(osb->sb) * 8) { 500 ocfs2_group_bitmap_size(osb->sb, 0,
501 osb->s_feature_incompat) * 8) {
504 mlog(ML_ERROR, "The disk is too old and small." 502 mlog(ML_ERROR, "The disk is too old and small."
505 " Force to do offline resize."); 503 " Force to do offline resize.");
506 ret = -EINVAL; 504 ret = -EINVAL;
@@ -545,12 +543,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
545 543
546 group = (struct ocfs2_group_desc *)group_bh->b_data; 544 group = (struct ocfs2_group_desc *)group_bh->b_data;
547 group->bg_next_group = cr->c_blkno; 545 group->bg_next_group = cr->c_blkno;
548 546 ocfs2_journal_dirty(handle, group_bh);
549 ret = ocfs2_journal_dirty(handle, group_bh);
550 if (ret < 0) {
551 mlog_errno(ret);
552 goto out_commit;
553 }
554 547
555 ret = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode), 548 ret = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode),
556 main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE); 549 main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE);
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 19ba00f28547..f4c2a9eb8c4d 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -53,6 +53,15 @@
53 53
54#define OCFS2_MAX_TO_STEAL 1024 54#define OCFS2_MAX_TO_STEAL 1024
55 55
56struct ocfs2_suballoc_result {
57 u64 sr_bg_blkno; /* The bg we allocated from. Set
58 to 0 when a block group is
59 contiguous. */
60 u64 sr_blkno; /* The first allocated block */
61 unsigned int sr_bit_offset; /* The bit in the bg */
62 unsigned int sr_bits; /* How many bits we claimed */
63};
64
56static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); 65static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
57static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); 66static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
58static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); 67static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
@@ -60,6 +69,7 @@ static int ocfs2_block_group_fill(handle_t *handle,
60 struct inode *alloc_inode, 69 struct inode *alloc_inode,
61 struct buffer_head *bg_bh, 70 struct buffer_head *bg_bh,
62 u64 group_blkno, 71 u64 group_blkno,
72 unsigned int group_clusters,
63 u16 my_chain, 73 u16 my_chain,
64 struct ocfs2_chain_list *cl); 74 struct ocfs2_chain_list *cl);
65static int ocfs2_block_group_alloc(struct ocfs2_super *osb, 75static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
@@ -73,20 +83,17 @@ static int ocfs2_cluster_group_search(struct inode *inode,
73 struct buffer_head *group_bh, 83 struct buffer_head *group_bh,
74 u32 bits_wanted, u32 min_bits, 84 u32 bits_wanted, u32 min_bits,
75 u64 max_block, 85 u64 max_block,
76 u16 *bit_off, u16 *bits_found); 86 struct ocfs2_suballoc_result *res);
77static int ocfs2_block_group_search(struct inode *inode, 87static int ocfs2_block_group_search(struct inode *inode,
78 struct buffer_head *group_bh, 88 struct buffer_head *group_bh,
79 u32 bits_wanted, u32 min_bits, 89 u32 bits_wanted, u32 min_bits,
80 u64 max_block, 90 u64 max_block,
81 u16 *bit_off, u16 *bits_found); 91 struct ocfs2_suballoc_result *res);
82static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, 92static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
83 struct ocfs2_alloc_context *ac,
84 handle_t *handle, 93 handle_t *handle,
85 u32 bits_wanted, 94 u32 bits_wanted,
86 u32 min_bits, 95 u32 min_bits,
87 u16 *bit_off, 96 struct ocfs2_suballoc_result *res);
88 unsigned int *num_bits,
89 u64 *bg_blkno);
90static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, 97static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
91 int nr); 98 int nr);
92static inline int ocfs2_block_group_set_bits(handle_t *handle, 99static inline int ocfs2_block_group_set_bits(handle_t *handle,
@@ -130,6 +137,7 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
130 } 137 }
131 brelse(ac->ac_bh); 138 brelse(ac->ac_bh);
132 ac->ac_bh = NULL; 139 ac->ac_bh = NULL;
140 ac->ac_resv = NULL;
133} 141}
134 142
135void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) 143void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
@@ -325,14 +333,38 @@ out:
325 return rc; 333 return rc;
326} 334}
327 335
336static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb,
337 struct ocfs2_group_desc *bg,
338 struct ocfs2_chain_list *cl,
339 u64 p_blkno, u32 clusters)
340{
341 struct ocfs2_extent_list *el = &bg->bg_list;
342 struct ocfs2_extent_rec *rec;
343
344 BUG_ON(!ocfs2_supports_discontig_bg(osb));
345 if (!el->l_next_free_rec)
346 el->l_count = cpu_to_le16(ocfs2_extent_recs_per_gd(osb->sb));
347 rec = &el->l_recs[le16_to_cpu(el->l_next_free_rec)];
348 rec->e_blkno = cpu_to_le64(p_blkno);
349 rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) /
350 le16_to_cpu(cl->cl_bpc));
351 rec->e_leaf_clusters = cpu_to_le32(clusters);
352 le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc));
353 le16_add_cpu(&bg->bg_free_bits_count,
354 clusters * le16_to_cpu(cl->cl_bpc));
355 le16_add_cpu(&el->l_next_free_rec, 1);
356}
357
328static int ocfs2_block_group_fill(handle_t *handle, 358static int ocfs2_block_group_fill(handle_t *handle,
329 struct inode *alloc_inode, 359 struct inode *alloc_inode,
330 struct buffer_head *bg_bh, 360 struct buffer_head *bg_bh,
331 u64 group_blkno, 361 u64 group_blkno,
362 unsigned int group_clusters,
332 u16 my_chain, 363 u16 my_chain,
333 struct ocfs2_chain_list *cl) 364 struct ocfs2_chain_list *cl)
334{ 365{
335 int status = 0; 366 int status = 0;
367 struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
336 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 368 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
337 struct super_block * sb = alloc_inode->i_sb; 369 struct super_block * sb = alloc_inode->i_sb;
338 370
@@ -359,19 +391,23 @@ static int ocfs2_block_group_fill(handle_t *handle,
359 memset(bg, 0, sb->s_blocksize); 391 memset(bg, 0, sb->s_blocksize);
360 strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE); 392 strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE);
361 bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); 393 bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
362 bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb)); 394 bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb, 1,
363 bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl)); 395 osb->s_feature_incompat));
364 bg->bg_chain = cpu_to_le16(my_chain); 396 bg->bg_chain = cpu_to_le16(my_chain);
365 bg->bg_next_group = cl->cl_recs[my_chain].c_blkno; 397 bg->bg_next_group = cl->cl_recs[my_chain].c_blkno;
366 bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno); 398 bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno);
367 bg->bg_blkno = cpu_to_le64(group_blkno); 399 bg->bg_blkno = cpu_to_le64(group_blkno);
400 if (group_clusters == le16_to_cpu(cl->cl_cpg))
401 bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl));
402 else
403 ocfs2_bg_discontig_add_extent(osb, bg, cl, group_blkno,
404 group_clusters);
405
368 /* set the 1st bit in the bitmap to account for the descriptor block */ 406 /* set the 1st bit in the bitmap to account for the descriptor block */
369 ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap); 407 ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap);
370 bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1); 408 bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1);
371 409
372 status = ocfs2_journal_dirty(handle, bg_bh); 410 ocfs2_journal_dirty(handle, bg_bh);
373 if (status < 0)
374 mlog_errno(status);
375 411
376 /* There is no need to zero out or otherwise initialize the 412 /* There is no need to zero out or otherwise initialize the
377 * other blocks in a group - All valid FS metadata in a block 413 * other blocks in a group - All valid FS metadata in a block
@@ -397,6 +433,238 @@ static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl)
397 return best; 433 return best;
398} 434}
399 435
436static struct buffer_head *
437ocfs2_block_group_alloc_contig(struct ocfs2_super *osb, handle_t *handle,
438 struct inode *alloc_inode,
439 struct ocfs2_alloc_context *ac,
440 struct ocfs2_chain_list *cl)
441{
442 int status;
443 u32 bit_off, num_bits;
444 u64 bg_blkno;
445 struct buffer_head *bg_bh;
446 unsigned int alloc_rec = ocfs2_find_smallest_chain(cl);
447
448 status = ocfs2_claim_clusters(handle, ac,
449 le16_to_cpu(cl->cl_cpg), &bit_off,
450 &num_bits);
451 if (status < 0) {
452 if (status != -ENOSPC)
453 mlog_errno(status);
454 goto bail;
455 }
456
457 /* setup the group */
458 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
459 mlog(0, "new descriptor, record %u, at block %llu\n",
460 alloc_rec, (unsigned long long)bg_blkno);
461
462 bg_bh = sb_getblk(osb->sb, bg_blkno);
463 if (!bg_bh) {
464 status = -EIO;
465 mlog_errno(status);
466 goto bail;
467 }
468 ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh);
469
470 status = ocfs2_block_group_fill(handle, alloc_inode, bg_bh,
471 bg_blkno, num_bits, alloc_rec, cl);
472 if (status < 0) {
473 brelse(bg_bh);
474 mlog_errno(status);
475 }
476
477bail:
478 return status ? ERR_PTR(status) : bg_bh;
479}
480
481static int ocfs2_block_group_claim_bits(struct ocfs2_super *osb,
482 handle_t *handle,
483 struct ocfs2_alloc_context *ac,
484 unsigned int min_bits,
485 u32 *bit_off, u32 *num_bits)
486{
487 int status = 0;
488
489 while (min_bits) {
490 status = ocfs2_claim_clusters(handle, ac, min_bits,
491 bit_off, num_bits);
492 if (status != -ENOSPC)
493 break;
494
495 min_bits >>= 1;
496 }
497
498 return status;
499}
500
501static int ocfs2_block_group_grow_discontig(handle_t *handle,
502 struct inode *alloc_inode,
503 struct buffer_head *bg_bh,
504 struct ocfs2_alloc_context *ac,
505 struct ocfs2_chain_list *cl,
506 unsigned int min_bits)
507{
508 int status;
509 struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
510 struct ocfs2_group_desc *bg =
511 (struct ocfs2_group_desc *)bg_bh->b_data;
512 unsigned int needed = le16_to_cpu(cl->cl_cpg) -
513 le16_to_cpu(bg->bg_bits) / le16_to_cpu(cl->cl_bpc);
514 u32 p_cpos, clusters;
515 u64 p_blkno;
516 struct ocfs2_extent_list *el = &bg->bg_list;
517
518 status = ocfs2_journal_access_gd(handle,
519 INODE_CACHE(alloc_inode),
520 bg_bh,
521 OCFS2_JOURNAL_ACCESS_CREATE);
522 if (status < 0) {
523 mlog_errno(status);
524 goto bail;
525 }
526
527 while ((needed > 0) && (le16_to_cpu(el->l_next_free_rec) <
528 le16_to_cpu(el->l_count))) {
529 if (min_bits > needed)
530 min_bits = needed;
531 status = ocfs2_block_group_claim_bits(osb, handle, ac,
532 min_bits, &p_cpos,
533 &clusters);
534 if (status < 0) {
535 if (status != -ENOSPC)
536 mlog_errno(status);
537 goto bail;
538 }
539 p_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cpos);
540 ocfs2_bg_discontig_add_extent(osb, bg, cl, p_blkno,
541 clusters);
542
543 min_bits = clusters;
544 needed = le16_to_cpu(cl->cl_cpg) -
545 le16_to_cpu(bg->bg_bits) / le16_to_cpu(cl->cl_bpc);
546 }
547
548 if (needed > 0) {
549 /*
550 * We have used up all the extent rec but can't fill up
551 * the cpg. So bail out.
552 */
553 status = -ENOSPC;
554 goto bail;
555 }
556
557 ocfs2_journal_dirty(handle, bg_bh);
558
559bail:
560 return status;
561}
562
563static void ocfs2_bg_alloc_cleanup(handle_t *handle,
564 struct ocfs2_alloc_context *cluster_ac,
565 struct inode *alloc_inode,
566 struct buffer_head *bg_bh)
567{
568 int i, ret;
569 struct ocfs2_group_desc *bg;
570 struct ocfs2_extent_list *el;
571 struct ocfs2_extent_rec *rec;
572
573 if (!bg_bh)
574 return;
575
576 bg = (struct ocfs2_group_desc *)bg_bh->b_data;
577 el = &bg->bg_list;
578 for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
579 rec = &el->l_recs[i];
580 ret = ocfs2_free_clusters(handle, cluster_ac->ac_inode,
581 cluster_ac->ac_bh,
582 le64_to_cpu(rec->e_blkno),
583 le32_to_cpu(rec->e_leaf_clusters));
584 if (ret)
585 mlog_errno(ret);
586 /* Try all the clusters to free */
587 }
588
589 ocfs2_remove_from_cache(INODE_CACHE(alloc_inode), bg_bh);
590 brelse(bg_bh);
591}
592
593static struct buffer_head *
594ocfs2_block_group_alloc_discontig(handle_t *handle,
595 struct inode *alloc_inode,
596 struct ocfs2_alloc_context *ac,
597 struct ocfs2_chain_list *cl)
598{
599 int status;
600 u32 bit_off, num_bits;
601 u64 bg_blkno;
602 unsigned int min_bits = le16_to_cpu(cl->cl_cpg) >> 1;
603 struct buffer_head *bg_bh = NULL;
604 unsigned int alloc_rec = ocfs2_find_smallest_chain(cl);
605 struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
606
607 if (!ocfs2_supports_discontig_bg(osb)) {
608 status = -ENOSPC;
609 goto bail;
610 }
611
612 status = ocfs2_extend_trans(handle,
613 ocfs2_calc_bg_discontig_credits(osb->sb));
614 if (status) {
615 mlog_errno(status);
616 goto bail;
617 }
618
619 /*
620 * We're going to be grabbing from multiple cluster groups.
621 * We don't have enough credits to relink them all, and the
622 * cluster groups will be staying in cache for the duration of
623 * this operation.
624 */
625 ac->ac_allow_chain_relink = 0;
626
627 /* Claim the first region */
628 status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits,
629 &bit_off, &num_bits);
630 if (status < 0) {
631 if (status != -ENOSPC)
632 mlog_errno(status);
633 goto bail;
634 }
635 min_bits = num_bits;
636
637 /* setup the group */
638 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
639 mlog(0, "new descriptor, record %u, at block %llu\n",
640 alloc_rec, (unsigned long long)bg_blkno);
641
642 bg_bh = sb_getblk(osb->sb, bg_blkno);
643 if (!bg_bh) {
644 status = -EIO;
645 mlog_errno(status);
646 goto bail;
647 }
648 ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh);
649
650 status = ocfs2_block_group_fill(handle, alloc_inode, bg_bh,
651 bg_blkno, num_bits, alloc_rec, cl);
652 if (status < 0) {
653 mlog_errno(status);
654 goto bail;
655 }
656
657 status = ocfs2_block_group_grow_discontig(handle, alloc_inode,
658 bg_bh, ac, cl, min_bits);
659 if (status)
660 mlog_errno(status);
661
662bail:
663 if (status)
664 ocfs2_bg_alloc_cleanup(handle, ac, alloc_inode, bg_bh);
665 return status ? ERR_PTR(status) : bg_bh;
666}
667
400/* 668/*
401 * We expect the block group allocator to already be locked. 669 * We expect the block group allocator to already be locked.
402 */ 670 */
@@ -412,9 +680,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
412 struct ocfs2_chain_list *cl; 680 struct ocfs2_chain_list *cl;
413 struct ocfs2_alloc_context *ac = NULL; 681 struct ocfs2_alloc_context *ac = NULL;
414 handle_t *handle = NULL; 682 handle_t *handle = NULL;
415 u32 bit_off, num_bits;
416 u16 alloc_rec; 683 u16 alloc_rec;
417 u64 bg_blkno;
418 struct buffer_head *bg_bh = NULL; 684 struct buffer_head *bg_bh = NULL;
419 struct ocfs2_group_desc *bg; 685 struct ocfs2_group_desc *bg;
420 686
@@ -447,44 +713,20 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
447 (unsigned long long)*last_alloc_group); 713 (unsigned long long)*last_alloc_group);
448 ac->ac_last_group = *last_alloc_group; 714 ac->ac_last_group = *last_alloc_group;
449 } 715 }
450 status = ocfs2_claim_clusters(osb, 716
451 handle, 717 bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode,
452 ac, 718 ac, cl);
453 le16_to_cpu(cl->cl_cpg), 719 if (IS_ERR(bg_bh) && (PTR_ERR(bg_bh) == -ENOSPC))
454 &bit_off, 720 bg_bh = ocfs2_block_group_alloc_discontig(handle,
455 &num_bits); 721 alloc_inode,
456 if (status < 0) { 722 ac, cl);
723 if (IS_ERR(bg_bh)) {
724 status = PTR_ERR(bg_bh);
725 bg_bh = NULL;
457 if (status != -ENOSPC) 726 if (status != -ENOSPC)
458 mlog_errno(status); 727 mlog_errno(status);
459 goto bail; 728 goto bail;
460 } 729 }
461
462 alloc_rec = ocfs2_find_smallest_chain(cl);
463
464 /* setup the group */
465 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
466 mlog(0, "new descriptor, record %u, at block %llu\n",
467 alloc_rec, (unsigned long long)bg_blkno);
468
469 bg_bh = sb_getblk(osb->sb, bg_blkno);
470 if (!bg_bh) {
471 status = -EIO;
472 mlog_errno(status);
473 goto bail;
474 }
475 ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh);
476
477 status = ocfs2_block_group_fill(handle,
478 alloc_inode,
479 bg_bh,
480 bg_blkno,
481 alloc_rec,
482 cl);
483 if (status < 0) {
484 mlog_errno(status);
485 goto bail;
486 }
487
488 bg = (struct ocfs2_group_desc *) bg_bh->b_data; 730 bg = (struct ocfs2_group_desc *) bg_bh->b_data;
489 731
490 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), 732 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
@@ -494,10 +736,12 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
494 goto bail; 736 goto bail;
495 } 737 }
496 738
739 alloc_rec = le16_to_cpu(bg->bg_chain);
497 le32_add_cpu(&cl->cl_recs[alloc_rec].c_free, 740 le32_add_cpu(&cl->cl_recs[alloc_rec].c_free,
498 le16_to_cpu(bg->bg_free_bits_count)); 741 le16_to_cpu(bg->bg_free_bits_count));
499 le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, le16_to_cpu(bg->bg_bits)); 742 le32_add_cpu(&cl->cl_recs[alloc_rec].c_total,
500 cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg_blkno); 743 le16_to_cpu(bg->bg_bits));
744 cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg->bg_blkno);
501 if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count)) 745 if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
502 le16_add_cpu(&cl->cl_next_free_rec, 1); 746 le16_add_cpu(&cl->cl_next_free_rec, 1);
503 747
@@ -506,11 +750,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
506 le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits)); 750 le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits));
507 le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg)); 751 le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg));
508 752
509 status = ocfs2_journal_dirty(handle, bh); 753 ocfs2_journal_dirty(handle, bh);
510 if (status < 0) {
511 mlog_errno(status);
512 goto bail;
513 }
514 754
515 spin_lock(&OCFS2_I(alloc_inode)->ip_lock); 755 spin_lock(&OCFS2_I(alloc_inode)->ip_lock);
516 OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 756 OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
@@ -760,7 +1000,7 @@ int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
760 status = ocfs2_reserve_suballoc_bits(osb, (*ac), 1000 status = ocfs2_reserve_suballoc_bits(osb, (*ac),
761 EXTENT_ALLOC_SYSTEM_INODE, 1001 EXTENT_ALLOC_SYSTEM_INODE,
762 (u32)osb->slot_num, NULL, 1002 (u32)osb->slot_num, NULL,
763 ALLOC_NEW_GROUP); 1003 ALLOC_GROUPS_FROM_GLOBAL|ALLOC_NEW_GROUP);
764 1004
765 1005
766 if (status >= 0) { 1006 if (status >= 0) {
@@ -946,11 +1186,7 @@ static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
946 status = ocfs2_reserve_local_alloc_bits(osb, 1186 status = ocfs2_reserve_local_alloc_bits(osb,
947 bits_wanted, 1187 bits_wanted,
948 *ac); 1188 *ac);
949 if (status == -EFBIG) { 1189 if ((status < 0) && (status != -ENOSPC)) {
950 /* The local alloc window is outside ac_max_block.
951 * use the main bitmap. */
952 status = -ENOSPC;
953 } else if ((status < 0) && (status != -ENOSPC)) {
954 mlog_errno(status); 1190 mlog_errno(status);
955 goto bail; 1191 goto bail;
956 } 1192 }
@@ -1033,8 +1269,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
1033 struct buffer_head *bg_bh, 1269 struct buffer_head *bg_bh,
1034 unsigned int bits_wanted, 1270 unsigned int bits_wanted,
1035 unsigned int total_bits, 1271 unsigned int total_bits,
1036 u16 *bit_off, 1272 struct ocfs2_suballoc_result *res)
1037 u16 *bits_found)
1038{ 1273{
1039 void *bitmap; 1274 void *bitmap;
1040 u16 best_offset, best_size; 1275 u16 best_offset, best_size;
@@ -1078,14 +1313,9 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
1078 } 1313 }
1079 } 1314 }
1080 1315
1081 /* XXX: I think the first clause is equivalent to the second 1316 if (best_size) {
1082 * - jlbec */ 1317 res->sr_bit_offset = best_offset;
1083 if (found == bits_wanted) { 1318 res->sr_bits = best_size;
1084 *bit_off = start - found;
1085 *bits_found = found;
1086 } else if (best_size) {
1087 *bit_off = best_offset;
1088 *bits_found = best_size;
1089 } else { 1319 } else {
1090 status = -ENOSPC; 1320 status = -ENOSPC;
1091 /* No error log here -- see the comment above 1321 /* No error log here -- see the comment above
@@ -1129,16 +1359,10 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
1129 } 1359 }
1130 1360
1131 le16_add_cpu(&bg->bg_free_bits_count, -num_bits); 1361 le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
1132
1133 while(num_bits--) 1362 while(num_bits--)
1134 ocfs2_set_bit(bit_off++, bitmap); 1363 ocfs2_set_bit(bit_off++, bitmap);
1135 1364
1136 status = ocfs2_journal_dirty(handle, 1365 ocfs2_journal_dirty(handle, group_bh);
1137 group_bh);
1138 if (status < 0) {
1139 mlog_errno(status);
1140 goto bail;
1141 }
1142 1366
1143bail: 1367bail:
1144 mlog_exit(status); 1368 mlog_exit(status);
@@ -1202,12 +1426,7 @@ static int ocfs2_relink_block_group(handle_t *handle,
1202 } 1426 }
1203 1427
1204 prev_bg->bg_next_group = bg->bg_next_group; 1428 prev_bg->bg_next_group = bg->bg_next_group;
1205 1429 ocfs2_journal_dirty(handle, prev_bg_bh);
1206 status = ocfs2_journal_dirty(handle, prev_bg_bh);
1207 if (status < 0) {
1208 mlog_errno(status);
1209 goto out_rollback;
1210 }
1211 1430
1212 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), 1431 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
1213 bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); 1432 bg_bh, OCFS2_JOURNAL_ACCESS_WRITE);
@@ -1217,12 +1436,7 @@ static int ocfs2_relink_block_group(handle_t *handle,
1217 } 1436 }
1218 1437
1219 bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; 1438 bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno;
1220 1439 ocfs2_journal_dirty(handle, bg_bh);
1221 status = ocfs2_journal_dirty(handle, bg_bh);
1222 if (status < 0) {
1223 mlog_errno(status);
1224 goto out_rollback;
1225 }
1226 1440
1227 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), 1441 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
1228 fe_bh, OCFS2_JOURNAL_ACCESS_WRITE); 1442 fe_bh, OCFS2_JOURNAL_ACCESS_WRITE);
@@ -1232,14 +1446,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
1232 } 1446 }
1233 1447
1234 fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; 1448 fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno;
1449 ocfs2_journal_dirty(handle, fe_bh);
1235 1450
1236 status = ocfs2_journal_dirty(handle, fe_bh);
1237 if (status < 0) {
1238 mlog_errno(status);
1239 goto out_rollback;
1240 }
1241
1242 status = 0;
1243out_rollback: 1451out_rollback:
1244 if (status < 0) { 1452 if (status < 0) {
1245 fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr); 1453 fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr);
@@ -1263,14 +1471,13 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1263 struct buffer_head *group_bh, 1471 struct buffer_head *group_bh,
1264 u32 bits_wanted, u32 min_bits, 1472 u32 bits_wanted, u32 min_bits,
1265 u64 max_block, 1473 u64 max_block,
1266 u16 *bit_off, u16 *bits_found) 1474 struct ocfs2_suballoc_result *res)
1267{ 1475{
1268 int search = -ENOSPC; 1476 int search = -ENOSPC;
1269 int ret; 1477 int ret;
1270 u64 blkoff; 1478 u64 blkoff;
1271 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; 1479 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
1272 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1480 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1273 u16 tmp_off, tmp_found;
1274 unsigned int max_bits, gd_cluster_off; 1481 unsigned int max_bits, gd_cluster_off;
1275 1482
1276 BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 1483 BUG_ON(!ocfs2_is_cluster_bitmap(inode));
@@ -1297,15 +1504,15 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1297 1504
1298 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 1505 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
1299 group_bh, bits_wanted, 1506 group_bh, bits_wanted,
1300 max_bits, 1507 max_bits, res);
1301 &tmp_off, &tmp_found);
1302 if (ret) 1508 if (ret)
1303 return ret; 1509 return ret;
1304 1510
1305 if (max_block) { 1511 if (max_block) {
1306 blkoff = ocfs2_clusters_to_blocks(inode->i_sb, 1512 blkoff = ocfs2_clusters_to_blocks(inode->i_sb,
1307 gd_cluster_off + 1513 gd_cluster_off +
1308 tmp_off + tmp_found); 1514 res->sr_bit_offset +
1515 res->sr_bits);
1309 mlog(0, "Checking %llu against %llu\n", 1516 mlog(0, "Checking %llu against %llu\n",
1310 (unsigned long long)blkoff, 1517 (unsigned long long)blkoff,
1311 (unsigned long long)max_block); 1518 (unsigned long long)max_block);
@@ -1317,16 +1524,14 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1317 * return success, but we still want to return 1524 * return success, but we still want to return
1318 * -ENOSPC unless it found the minimum number 1525 * -ENOSPC unless it found the minimum number
1319 * of bits. */ 1526 * of bits. */
1320 if (min_bits <= tmp_found) { 1527 if (min_bits <= res->sr_bits)
1321 *bit_off = tmp_off;
1322 *bits_found = tmp_found;
1323 search = 0; /* success */ 1528 search = 0; /* success */
1324 } else if (tmp_found) { 1529 else if (res->sr_bits) {
1325 /* 1530 /*
1326 * Don't show bits which we'll be returning 1531 * Don't show bits which we'll be returning
1327 * for allocation to the local alloc bitmap. 1532 * for allocation to the local alloc bitmap.
1328 */ 1533 */
1329 ocfs2_local_alloc_seen_free_bits(osb, tmp_found); 1534 ocfs2_local_alloc_seen_free_bits(osb, res->sr_bits);
1330 } 1535 }
1331 } 1536 }
1332 1537
@@ -1337,7 +1542,7 @@ static int ocfs2_block_group_search(struct inode *inode,
1337 struct buffer_head *group_bh, 1542 struct buffer_head *group_bh,
1338 u32 bits_wanted, u32 min_bits, 1543 u32 bits_wanted, u32 min_bits,
1339 u64 max_block, 1544 u64 max_block,
1340 u16 *bit_off, u16 *bits_found) 1545 struct ocfs2_suballoc_result *res)
1341{ 1546{
1342 int ret = -ENOSPC; 1547 int ret = -ENOSPC;
1343 u64 blkoff; 1548 u64 blkoff;
@@ -1350,10 +1555,10 @@ static int ocfs2_block_group_search(struct inode *inode,
1350 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 1555 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
1351 group_bh, bits_wanted, 1556 group_bh, bits_wanted,
1352 le16_to_cpu(bg->bg_bits), 1557 le16_to_cpu(bg->bg_bits),
1353 bit_off, bits_found); 1558 res);
1354 if (!ret && max_block) { 1559 if (!ret && max_block) {
1355 blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off + 1560 blkoff = le64_to_cpu(bg->bg_blkno) +
1356 *bits_found; 1561 res->sr_bit_offset + res->sr_bits;
1357 mlog(0, "Checking %llu against %llu\n", 1562 mlog(0, "Checking %llu against %llu\n",
1358 (unsigned long long)blkoff, 1563 (unsigned long long)blkoff,
1359 (unsigned long long)max_block); 1564 (unsigned long long)max_block);
@@ -1386,33 +1591,76 @@ static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
1386 tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); 1591 tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
1387 di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used); 1592 di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
1388 le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits); 1593 le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
1389 1594 ocfs2_journal_dirty(handle, di_bh);
1390 ret = ocfs2_journal_dirty(handle, di_bh);
1391 if (ret < 0)
1392 mlog_errno(ret);
1393 1595
1394out: 1596out:
1395 return ret; 1597 return ret;
1396} 1598}
1397 1599
1600static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res,
1601 struct ocfs2_extent_rec *rec,
1602 struct ocfs2_chain_list *cl)
1603{
1604 unsigned int bpc = le16_to_cpu(cl->cl_bpc);
1605 unsigned int bitoff = le32_to_cpu(rec->e_cpos) * bpc;
1606 unsigned int bitcount = le32_to_cpu(rec->e_leaf_clusters) * bpc;
1607
1608 if (res->sr_bit_offset < bitoff)
1609 return 0;
1610 if (res->sr_bit_offset >= (bitoff + bitcount))
1611 return 0;
1612 res->sr_blkno = le64_to_cpu(rec->e_blkno) +
1613 (res->sr_bit_offset - bitoff);
1614 if ((res->sr_bit_offset + res->sr_bits) > (bitoff + bitcount))
1615 res->sr_bits = (bitoff + bitcount) - res->sr_bit_offset;
1616 return 1;
1617}
1618
1619static void ocfs2_bg_discontig_fix_result(struct ocfs2_alloc_context *ac,
1620 struct ocfs2_group_desc *bg,
1621 struct ocfs2_suballoc_result *res)
1622{
1623 int i;
1624 u64 bg_blkno = res->sr_bg_blkno; /* Save off */
1625 struct ocfs2_extent_rec *rec;
1626 struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
1627 struct ocfs2_chain_list *cl = &di->id2.i_chain;
1628
1629 if (ocfs2_is_cluster_bitmap(ac->ac_inode)) {
1630 res->sr_blkno = 0;
1631 return;
1632 }
1633
1634 res->sr_blkno = res->sr_bg_blkno + res->sr_bit_offset;
1635 res->sr_bg_blkno = 0; /* Clear it for contig block groups */
1636 if (!ocfs2_supports_discontig_bg(OCFS2_SB(ac->ac_inode->i_sb)) ||
1637 !bg->bg_list.l_next_free_rec)
1638 return;
1639
1640 for (i = 0; i < le16_to_cpu(bg->bg_list.l_next_free_rec); i++) {
1641 rec = &bg->bg_list.l_recs[i];
1642 if (ocfs2_bg_discontig_fix_by_rec(res, rec, cl)) {
1643 res->sr_bg_blkno = bg_blkno; /* Restore */
1644 break;
1645 }
1646 }
1647}
1648
1398static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, 1649static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1399 handle_t *handle, 1650 handle_t *handle,
1400 u32 bits_wanted, 1651 u32 bits_wanted,
1401 u32 min_bits, 1652 u32 min_bits,
1402 u16 *bit_off, 1653 struct ocfs2_suballoc_result *res,
1403 unsigned int *num_bits,
1404 u64 gd_blkno,
1405 u16 *bits_left) 1654 u16 *bits_left)
1406{ 1655{
1407 int ret; 1656 int ret;
1408 u16 found;
1409 struct buffer_head *group_bh = NULL; 1657 struct buffer_head *group_bh = NULL;
1410 struct ocfs2_group_desc *gd; 1658 struct ocfs2_group_desc *gd;
1411 struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data; 1659 struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
1412 struct inode *alloc_inode = ac->ac_inode; 1660 struct inode *alloc_inode = ac->ac_inode;
1413 1661
1414 ret = ocfs2_read_group_descriptor(alloc_inode, di, gd_blkno, 1662 ret = ocfs2_read_group_descriptor(alloc_inode, di,
1415 &group_bh); 1663 res->sr_bg_blkno, &group_bh);
1416 if (ret < 0) { 1664 if (ret < 0) {
1417 mlog_errno(ret); 1665 mlog_errno(ret);
1418 return ret; 1666 return ret;
@@ -1420,17 +1668,18 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1420 1668
1421 gd = (struct ocfs2_group_desc *) group_bh->b_data; 1669 gd = (struct ocfs2_group_desc *) group_bh->b_data;
1422 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, 1670 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
1423 ac->ac_max_block, bit_off, &found); 1671 ac->ac_max_block, res);
1424 if (ret < 0) { 1672 if (ret < 0) {
1425 if (ret != -ENOSPC) 1673 if (ret != -ENOSPC)
1426 mlog_errno(ret); 1674 mlog_errno(ret);
1427 goto out; 1675 goto out;
1428 } 1676 }
1429 1677
1430 *num_bits = found; 1678 if (!ret)
1679 ocfs2_bg_discontig_fix_result(ac, gd, res);
1431 1680
1432 ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, 1681 ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
1433 *num_bits, 1682 res->sr_bits,
1434 le16_to_cpu(gd->bg_chain)); 1683 le16_to_cpu(gd->bg_chain));
1435 if (ret < 0) { 1684 if (ret < 0) {
1436 mlog_errno(ret); 1685 mlog_errno(ret);
@@ -1438,7 +1687,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1438 } 1687 }
1439 1688
1440 ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh, 1689 ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
1441 *bit_off, *num_bits); 1690 res->sr_bit_offset, res->sr_bits);
1442 if (ret < 0) 1691 if (ret < 0)
1443 mlog_errno(ret); 1692 mlog_errno(ret);
1444 1693
@@ -1454,13 +1703,11 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1454 handle_t *handle, 1703 handle_t *handle,
1455 u32 bits_wanted, 1704 u32 bits_wanted,
1456 u32 min_bits, 1705 u32 min_bits,
1457 u16 *bit_off, 1706 struct ocfs2_suballoc_result *res,
1458 unsigned int *num_bits,
1459 u64 *bg_blkno,
1460 u16 *bits_left) 1707 u16 *bits_left)
1461{ 1708{
1462 int status; 1709 int status;
1463 u16 chain, tmp_bits; 1710 u16 chain;
1464 u32 tmp_used; 1711 u32 tmp_used;
1465 u64 next_group; 1712 u64 next_group;
1466 struct inode *alloc_inode = ac->ac_inode; 1713 struct inode *alloc_inode = ac->ac_inode;
@@ -1489,8 +1736,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1489 * the 1st group with any empty bits. */ 1736 * the 1st group with any empty bits. */
1490 while ((status = ac->ac_group_search(alloc_inode, group_bh, 1737 while ((status = ac->ac_group_search(alloc_inode, group_bh,
1491 bits_wanted, min_bits, 1738 bits_wanted, min_bits,
1492 ac->ac_max_block, bit_off, 1739 ac->ac_max_block,
1493 &tmp_bits)) == -ENOSPC) { 1740 res)) == -ENOSPC) {
1494 if (!bg->bg_next_group) 1741 if (!bg->bg_next_group)
1495 break; 1742 break;
1496 1743
@@ -1515,11 +1762,14 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1515 } 1762 }
1516 1763
1517 mlog(0, "alloc succeeds: we give %u bits from block group %llu\n", 1764 mlog(0, "alloc succeeds: we give %u bits from block group %llu\n",
1518 tmp_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno)); 1765 res->sr_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno));
1519 1766
1520 *num_bits = tmp_bits; 1767 res->sr_bg_blkno = le64_to_cpu(bg->bg_blkno);
1768
1769 BUG_ON(res->sr_bits == 0);
1770 if (!status)
1771 ocfs2_bg_discontig_fix_result(ac, bg, res);
1521 1772
1522 BUG_ON(*num_bits == 0);
1523 1773
1524 /* 1774 /*
1525 * Keep track of previous block descriptor read. When 1775 * Keep track of previous block descriptor read. When
@@ -1536,7 +1786,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1536 */ 1786 */
1537 if (ac->ac_allow_chain_relink && 1787 if (ac->ac_allow_chain_relink &&
1538 (prev_group_bh) && 1788 (prev_group_bh) &&
1539 (ocfs2_block_group_reasonably_empty(bg, *num_bits))) { 1789 (ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) {
1540 status = ocfs2_relink_block_group(handle, alloc_inode, 1790 status = ocfs2_relink_block_group(handle, alloc_inode,
1541 ac->ac_bh, group_bh, 1791 ac->ac_bh, group_bh,
1542 prev_group_bh, chain); 1792 prev_group_bh, chain);
@@ -1558,31 +1808,24 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1558 } 1808 }
1559 1809
1560 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); 1810 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
1561 fe->id1.bitmap1.i_used = cpu_to_le32(*num_bits + tmp_used); 1811 fe->id1.bitmap1.i_used = cpu_to_le32(res->sr_bits + tmp_used);
1562 le32_add_cpu(&cl->cl_recs[chain].c_free, -(*num_bits)); 1812 le32_add_cpu(&cl->cl_recs[chain].c_free, -res->sr_bits);
1563 1813 ocfs2_journal_dirty(handle, ac->ac_bh);
1564 status = ocfs2_journal_dirty(handle,
1565 ac->ac_bh);
1566 if (status < 0) {
1567 mlog_errno(status);
1568 goto bail;
1569 }
1570 1814
1571 status = ocfs2_block_group_set_bits(handle, 1815 status = ocfs2_block_group_set_bits(handle,
1572 alloc_inode, 1816 alloc_inode,
1573 bg, 1817 bg,
1574 group_bh, 1818 group_bh,
1575 *bit_off, 1819 res->sr_bit_offset,
1576 *num_bits); 1820 res->sr_bits);
1577 if (status < 0) { 1821 if (status < 0) {
1578 mlog_errno(status); 1822 mlog_errno(status);
1579 goto bail; 1823 goto bail;
1580 } 1824 }
1581 1825
1582 mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits, 1826 mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits,
1583 (unsigned long long)le64_to_cpu(fe->i_blkno)); 1827 (unsigned long long)le64_to_cpu(fe->i_blkno));
1584 1828
1585 *bg_blkno = le64_to_cpu(bg->bg_blkno);
1586 *bits_left = le16_to_cpu(bg->bg_free_bits_count); 1829 *bits_left = le16_to_cpu(bg->bg_free_bits_count);
1587bail: 1830bail:
1588 brelse(group_bh); 1831 brelse(group_bh);
@@ -1593,19 +1836,15 @@ bail:
1593} 1836}
1594 1837
1595/* will give out up to bits_wanted contiguous bits. */ 1838/* will give out up to bits_wanted contiguous bits. */
1596static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, 1839static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
1597 struct ocfs2_alloc_context *ac,
1598 handle_t *handle, 1840 handle_t *handle,
1599 u32 bits_wanted, 1841 u32 bits_wanted,
1600 u32 min_bits, 1842 u32 min_bits,
1601 u16 *bit_off, 1843 struct ocfs2_suballoc_result *res)
1602 unsigned int *num_bits,
1603 u64 *bg_blkno)
1604{ 1844{
1605 int status; 1845 int status;
1606 u16 victim, i; 1846 u16 victim, i;
1607 u16 bits_left = 0; 1847 u16 bits_left = 0;
1608 u64 hint_blkno = ac->ac_last_group;
1609 struct ocfs2_chain_list *cl; 1848 struct ocfs2_chain_list *cl;
1610 struct ocfs2_dinode *fe; 1849 struct ocfs2_dinode *fe;
1611 1850
@@ -1623,7 +1862,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1623 1862
1624 if (le32_to_cpu(fe->id1.bitmap1.i_used) >= 1863 if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
1625 le32_to_cpu(fe->id1.bitmap1.i_total)) { 1864 le32_to_cpu(fe->id1.bitmap1.i_total)) {
1626 ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used " 1865 ocfs2_error(ac->ac_inode->i_sb,
1866 "Chain allocator dinode %llu has %u used "
1627 "bits but only %u total.", 1867 "bits but only %u total.",
1628 (unsigned long long)le64_to_cpu(fe->i_blkno), 1868 (unsigned long long)le64_to_cpu(fe->i_blkno),
1629 le32_to_cpu(fe->id1.bitmap1.i_used), 1869 le32_to_cpu(fe->id1.bitmap1.i_used),
@@ -1632,22 +1872,16 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1632 goto bail; 1872 goto bail;
1633 } 1873 }
1634 1874
1635 if (hint_blkno) { 1875 res->sr_bg_blkno = ac->ac_last_group;
1876 if (res->sr_bg_blkno) {
1636 /* Attempt to short-circuit the usual search mechanism 1877 /* Attempt to short-circuit the usual search mechanism
1637 * by jumping straight to the most recently used 1878 * by jumping straight to the most recently used
1638 * allocation group. This helps us mantain some 1879 * allocation group. This helps us mantain some
1639 * contiguousness across allocations. */ 1880 * contiguousness across allocations. */
1640 status = ocfs2_search_one_group(ac, handle, bits_wanted, 1881 status = ocfs2_search_one_group(ac, handle, bits_wanted,
1641 min_bits, bit_off, num_bits, 1882 min_bits, res, &bits_left);
1642 hint_blkno, &bits_left); 1883 if (!status)
1643 if (!status) {
1644 /* Be careful to update *bg_blkno here as the
1645 * caller is expecting it to be filled in, and
1646 * ocfs2_search_one_group() won't do that for
1647 * us. */
1648 *bg_blkno = hint_blkno;
1649 goto set_hint; 1884 goto set_hint;
1650 }
1651 if (status < 0 && status != -ENOSPC) { 1885 if (status < 0 && status != -ENOSPC) {
1652 mlog_errno(status); 1886 mlog_errno(status);
1653 goto bail; 1887 goto bail;
@@ -1660,8 +1894,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1660 ac->ac_chain = victim; 1894 ac->ac_chain = victim;
1661 ac->ac_allow_chain_relink = 1; 1895 ac->ac_allow_chain_relink = 1;
1662 1896
1663 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, bit_off, 1897 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
1664 num_bits, bg_blkno, &bits_left); 1898 res, &bits_left);
1665 if (!status) 1899 if (!status)
1666 goto set_hint; 1900 goto set_hint;
1667 if (status < 0 && status != -ENOSPC) { 1901 if (status < 0 && status != -ENOSPC) {
@@ -1685,8 +1919,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1685 1919
1686 ac->ac_chain = i; 1920 ac->ac_chain = i;
1687 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, 1921 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
1688 bit_off, num_bits, bg_blkno, 1922 res, &bits_left);
1689 &bits_left);
1690 if (!status) 1923 if (!status)
1691 break; 1924 break;
1692 if (status < 0 && status != -ENOSPC) { 1925 if (status < 0 && status != -ENOSPC) {
@@ -1703,7 +1936,7 @@ set_hint:
1703 if (bits_left < min_bits) 1936 if (bits_left < min_bits)
1704 ac->ac_last_group = 0; 1937 ac->ac_last_group = 0;
1705 else 1938 else
1706 ac->ac_last_group = *bg_blkno; 1939 ac->ac_last_group = res->sr_bg_blkno;
1707 } 1940 }
1708 1941
1709bail: 1942bail:
@@ -1711,37 +1944,37 @@ bail:
1711 return status; 1944 return status;
1712} 1945}
1713 1946
1714int ocfs2_claim_metadata(struct ocfs2_super *osb, 1947int ocfs2_claim_metadata(handle_t *handle,
1715 handle_t *handle,
1716 struct ocfs2_alloc_context *ac, 1948 struct ocfs2_alloc_context *ac,
1717 u32 bits_wanted, 1949 u32 bits_wanted,
1950 u64 *suballoc_loc,
1718 u16 *suballoc_bit_start, 1951 u16 *suballoc_bit_start,
1719 unsigned int *num_bits, 1952 unsigned int *num_bits,
1720 u64 *blkno_start) 1953 u64 *blkno_start)
1721{ 1954{
1722 int status; 1955 int status;
1723 u64 bg_blkno; 1956 struct ocfs2_suballoc_result res = { .sr_blkno = 0, };
1724 1957
1725 BUG_ON(!ac); 1958 BUG_ON(!ac);
1726 BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted)); 1959 BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted));
1727 BUG_ON(ac->ac_which != OCFS2_AC_USE_META); 1960 BUG_ON(ac->ac_which != OCFS2_AC_USE_META);
1728 1961
1729 status = ocfs2_claim_suballoc_bits(osb, 1962 status = ocfs2_claim_suballoc_bits(ac,
1730 ac,
1731 handle, 1963 handle,
1732 bits_wanted, 1964 bits_wanted,
1733 1, 1965 1,
1734 suballoc_bit_start, 1966 &res);
1735 num_bits,
1736 &bg_blkno);
1737 if (status < 0) { 1967 if (status < 0) {
1738 mlog_errno(status); 1968 mlog_errno(status);
1739 goto bail; 1969 goto bail;
1740 } 1970 }
1741 atomic_inc(&osb->alloc_stats.bg_allocs); 1971 atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
1742 1972
1743 *blkno_start = bg_blkno + (u64) *suballoc_bit_start; 1973 *suballoc_loc = res.sr_bg_blkno;
1744 ac->ac_bits_given += (*num_bits); 1974 *suballoc_bit_start = res.sr_bit_offset;
1975 *blkno_start = res.sr_blkno;
1976 ac->ac_bits_given += res.sr_bits;
1977 *num_bits = res.sr_bits;
1745 status = 0; 1978 status = 0;
1746bail: 1979bail:
1747 mlog_exit(status); 1980 mlog_exit(status);
@@ -1749,10 +1982,10 @@ bail:
1749} 1982}
1750 1983
1751static void ocfs2_init_inode_ac_group(struct inode *dir, 1984static void ocfs2_init_inode_ac_group(struct inode *dir,
1752 struct buffer_head *parent_fe_bh, 1985 struct buffer_head *parent_di_bh,
1753 struct ocfs2_alloc_context *ac) 1986 struct ocfs2_alloc_context *ac)
1754{ 1987{
1755 struct ocfs2_dinode *fe = (struct ocfs2_dinode *)parent_fe_bh->b_data; 1988 struct ocfs2_dinode *di = (struct ocfs2_dinode *)parent_di_bh->b_data;
1756 /* 1989 /*
1757 * Try to allocate inodes from some specific group. 1990 * Try to allocate inodes from some specific group.
1758 * 1991 *
@@ -1766,10 +1999,14 @@ static void ocfs2_init_inode_ac_group(struct inode *dir,
1766 if (OCFS2_I(dir)->ip_last_used_group && 1999 if (OCFS2_I(dir)->ip_last_used_group &&
1767 OCFS2_I(dir)->ip_last_used_slot == ac->ac_alloc_slot) 2000 OCFS2_I(dir)->ip_last_used_slot == ac->ac_alloc_slot)
1768 ac->ac_last_group = OCFS2_I(dir)->ip_last_used_group; 2001 ac->ac_last_group = OCFS2_I(dir)->ip_last_used_group;
1769 else if (le16_to_cpu(fe->i_suballoc_slot) == ac->ac_alloc_slot) 2002 else if (le16_to_cpu(di->i_suballoc_slot) == ac->ac_alloc_slot) {
1770 ac->ac_last_group = ocfs2_which_suballoc_group( 2003 if (di->i_suballoc_loc)
1771 le64_to_cpu(fe->i_blkno), 2004 ac->ac_last_group = le64_to_cpu(di->i_suballoc_loc);
1772 le16_to_cpu(fe->i_suballoc_bit)); 2005 else
2006 ac->ac_last_group = ocfs2_which_suballoc_group(
2007 le64_to_cpu(di->i_blkno),
2008 le16_to_cpu(di->i_suballoc_bit));
2009 }
1773} 2010}
1774 2011
1775static inline void ocfs2_save_inode_ac_group(struct inode *dir, 2012static inline void ocfs2_save_inode_ac_group(struct inode *dir,
@@ -1779,17 +2016,16 @@ static inline void ocfs2_save_inode_ac_group(struct inode *dir,
1779 OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot; 2016 OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot;
1780} 2017}
1781 2018
1782int ocfs2_claim_new_inode(struct ocfs2_super *osb, 2019int ocfs2_claim_new_inode(handle_t *handle,
1783 handle_t *handle,
1784 struct inode *dir, 2020 struct inode *dir,
1785 struct buffer_head *parent_fe_bh, 2021 struct buffer_head *parent_fe_bh,
1786 struct ocfs2_alloc_context *ac, 2022 struct ocfs2_alloc_context *ac,
2023 u64 *suballoc_loc,
1787 u16 *suballoc_bit, 2024 u16 *suballoc_bit,
1788 u64 *fe_blkno) 2025 u64 *fe_blkno)
1789{ 2026{
1790 int status; 2027 int status;
1791 unsigned int num_bits; 2028 struct ocfs2_suballoc_result res;
1792 u64 bg_blkno;
1793 2029
1794 mlog_entry_void(); 2030 mlog_entry_void();
1795 2031
@@ -1800,23 +2036,22 @@ int ocfs2_claim_new_inode(struct ocfs2_super *osb,
1800 2036
1801 ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac); 2037 ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac);
1802 2038
1803 status = ocfs2_claim_suballoc_bits(osb, 2039 status = ocfs2_claim_suballoc_bits(ac,
1804 ac,
1805 handle, 2040 handle,
1806 1, 2041 1,
1807 1, 2042 1,
1808 suballoc_bit, 2043 &res);
1809 &num_bits,
1810 &bg_blkno);
1811 if (status < 0) { 2044 if (status < 0) {
1812 mlog_errno(status); 2045 mlog_errno(status);
1813 goto bail; 2046 goto bail;
1814 } 2047 }
1815 atomic_inc(&osb->alloc_stats.bg_allocs); 2048 atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
1816 2049
1817 BUG_ON(num_bits != 1); 2050 BUG_ON(res.sr_bits != 1);
1818 2051
1819 *fe_blkno = bg_blkno + (u64) (*suballoc_bit); 2052 *suballoc_loc = res.sr_bg_blkno;
2053 *suballoc_bit = res.sr_bit_offset;
2054 *fe_blkno = res.sr_blkno;
1820 ac->ac_bits_given++; 2055 ac->ac_bits_given++;
1821 ocfs2_save_inode_ac_group(dir, ac); 2056 ocfs2_save_inode_ac_group(dir, ac);
1822 status = 0; 2057 status = 0;
@@ -1886,8 +2121,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
1886 * contig. allocation, set to '1' to indicate we can deal with extents 2121 * contig. allocation, set to '1' to indicate we can deal with extents
1887 * of any size. 2122 * of any size.
1888 */ 2123 */
1889int __ocfs2_claim_clusters(struct ocfs2_super *osb, 2124int __ocfs2_claim_clusters(handle_t *handle,
1890 handle_t *handle,
1891 struct ocfs2_alloc_context *ac, 2125 struct ocfs2_alloc_context *ac,
1892 u32 min_clusters, 2126 u32 min_clusters,
1893 u32 max_clusters, 2127 u32 max_clusters,
@@ -1896,8 +2130,8 @@ int __ocfs2_claim_clusters(struct ocfs2_super *osb,
1896{ 2130{
1897 int status; 2131 int status;
1898 unsigned int bits_wanted = max_clusters; 2132 unsigned int bits_wanted = max_clusters;
1899 u64 bg_blkno = 0; 2133 struct ocfs2_suballoc_result res = { .sr_blkno = 0, };
1900 u16 bg_bit_off; 2134 struct ocfs2_super *osb = OCFS2_SB(ac->ac_inode->i_sb);
1901 2135
1902 mlog_entry_void(); 2136 mlog_entry_void();
1903 2137
@@ -1907,6 +2141,8 @@ int __ocfs2_claim_clusters(struct ocfs2_super *osb,
1907 && ac->ac_which != OCFS2_AC_USE_MAIN); 2141 && ac->ac_which != OCFS2_AC_USE_MAIN);
1908 2142
1909 if (ac->ac_which == OCFS2_AC_USE_LOCAL) { 2143 if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
2144 WARN_ON(min_clusters > 1);
2145
1910 status = ocfs2_claim_local_alloc_bits(osb, 2146 status = ocfs2_claim_local_alloc_bits(osb,
1911 handle, 2147 handle,
1912 ac, 2148 ac,
@@ -1929,20 +2165,19 @@ int __ocfs2_claim_clusters(struct ocfs2_super *osb,
1929 if (bits_wanted > (osb->bitmap_cpg - 1)) 2165 if (bits_wanted > (osb->bitmap_cpg - 1))
1930 bits_wanted = osb->bitmap_cpg - 1; 2166 bits_wanted = osb->bitmap_cpg - 1;
1931 2167
1932 status = ocfs2_claim_suballoc_bits(osb, 2168 status = ocfs2_claim_suballoc_bits(ac,
1933 ac,
1934 handle, 2169 handle,
1935 bits_wanted, 2170 bits_wanted,
1936 min_clusters, 2171 min_clusters,
1937 &bg_bit_off, 2172 &res);
1938 num_clusters,
1939 &bg_blkno);
1940 if (!status) { 2173 if (!status) {
2174 BUG_ON(res.sr_blkno); /* cluster alloc can't set */
1941 *cluster_start = 2175 *cluster_start =
1942 ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode, 2176 ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode,
1943 bg_blkno, 2177 res.sr_bg_blkno,
1944 bg_bit_off); 2178 res.sr_bit_offset);
1945 atomic_inc(&osb->alloc_stats.bitmap_data); 2179 atomic_inc(&osb->alloc_stats.bitmap_data);
2180 *num_clusters = res.sr_bits;
1946 } 2181 }
1947 } 2182 }
1948 if (status < 0) { 2183 if (status < 0) {
@@ -1958,8 +2193,7 @@ bail:
1958 return status; 2193 return status;
1959} 2194}
1960 2195
1961int ocfs2_claim_clusters(struct ocfs2_super *osb, 2196int ocfs2_claim_clusters(handle_t *handle,
1962 handle_t *handle,
1963 struct ocfs2_alloc_context *ac, 2197 struct ocfs2_alloc_context *ac,
1964 u32 min_clusters, 2198 u32 min_clusters,
1965 u32 *cluster_start, 2199 u32 *cluster_start,
@@ -1967,7 +2201,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
1967{ 2201{
1968 unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; 2202 unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
1969 2203
1970 return __ocfs2_claim_clusters(osb, handle, ac, min_clusters, 2204 return __ocfs2_claim_clusters(handle, ac, min_clusters,
1971 bits_wanted, cluster_start, num_clusters); 2205 bits_wanted, cluster_start, num_clusters);
1972} 2206}
1973 2207
@@ -2023,9 +2257,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
2023 if (undo_fn) 2257 if (undo_fn)
2024 jbd_unlock_bh_state(group_bh); 2258 jbd_unlock_bh_state(group_bh);
2025 2259
2026 status = ocfs2_journal_dirty(handle, group_bh); 2260 ocfs2_journal_dirty(handle, group_bh);
2027 if (status < 0)
2028 mlog_errno(status);
2029bail: 2261bail:
2030 return status; 2262 return status;
2031} 2263}
@@ -2092,12 +2324,7 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
2092 count); 2324 count);
2093 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); 2325 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
2094 fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count); 2326 fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count);
2095 2327 ocfs2_journal_dirty(handle, alloc_bh);
2096 status = ocfs2_journal_dirty(handle, alloc_bh);
2097 if (status < 0) {
2098 mlog_errno(status);
2099 goto bail;
2100 }
2101 2328
2102bail: 2329bail:
2103 brelse(group_bh); 2330 brelse(group_bh);
@@ -2126,6 +2353,8 @@ int ocfs2_free_dinode(handle_t *handle,
2126 u16 bit = le16_to_cpu(di->i_suballoc_bit); 2353 u16 bit = le16_to_cpu(di->i_suballoc_bit);
2127 u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 2354 u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2128 2355
2356 if (di->i_suballoc_loc)
2357 bg_blkno = le64_to_cpu(di->i_suballoc_loc);
2129 return ocfs2_free_suballoc_bits(handle, inode_alloc_inode, 2358 return ocfs2_free_suballoc_bits(handle, inode_alloc_inode,
2130 inode_alloc_bh, bit, bg_blkno, 1); 2359 inode_alloc_bh, bit, bg_blkno, 1);
2131} 2360}
@@ -2395,7 +2624,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
2395 struct buffer_head *alloc_bh, u64 blkno, 2624 struct buffer_head *alloc_bh, u64 blkno,
2396 u16 bit, int *res) 2625 u16 bit, int *res)
2397{ 2626{
2398 struct ocfs2_dinode *alloc_fe; 2627 struct ocfs2_dinode *alloc_di;
2399 struct ocfs2_group_desc *group; 2628 struct ocfs2_group_desc *group;
2400 struct buffer_head *group_bh = NULL; 2629 struct buffer_head *group_bh = NULL;
2401 u64 bg_blkno; 2630 u64 bg_blkno;
@@ -2404,17 +2633,20 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
2404 mlog_entry("blkno: %llu bit: %u\n", (unsigned long long)blkno, 2633 mlog_entry("blkno: %llu bit: %u\n", (unsigned long long)blkno,
2405 (unsigned int)bit); 2634 (unsigned int)bit);
2406 2635
2407 alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data; 2636 alloc_di = (struct ocfs2_dinode *)alloc_bh->b_data;
2408 if ((bit + 1) > ocfs2_bits_per_group(&alloc_fe->id2.i_chain)) { 2637 if ((bit + 1) > ocfs2_bits_per_group(&alloc_di->id2.i_chain)) {
2409 mlog(ML_ERROR, "suballoc bit %u out of range of %u\n", 2638 mlog(ML_ERROR, "suballoc bit %u out of range of %u\n",
2410 (unsigned int)bit, 2639 (unsigned int)bit,
2411 ocfs2_bits_per_group(&alloc_fe->id2.i_chain)); 2640 ocfs2_bits_per_group(&alloc_di->id2.i_chain));
2412 status = -EINVAL; 2641 status = -EINVAL;
2413 goto bail; 2642 goto bail;
2414 } 2643 }
2415 2644
2416 bg_blkno = ocfs2_which_suballoc_group(blkno, bit); 2645 if (alloc_di->i_suballoc_loc)
2417 status = ocfs2_read_group_descriptor(suballoc, alloc_fe, bg_blkno, 2646 bg_blkno = le64_to_cpu(alloc_di->i_suballoc_loc);
2647 else
2648 bg_blkno = ocfs2_which_suballoc_group(blkno, bit);
2649 status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno,
2418 &group_bh); 2650 &group_bh);
2419 if (status < 0) { 2651 if (status < 0) {
2420 mlog(ML_ERROR, "read group %llu failed %d\n", 2652 mlog(ML_ERROR, "read group %llu failed %d\n",
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index e0f46df357e6..a017dd3ee7d9 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -26,13 +26,14 @@
26#ifndef _CHAINALLOC_H_ 26#ifndef _CHAINALLOC_H_
27#define _CHAINALLOC_H_ 27#define _CHAINALLOC_H_
28 28
29struct ocfs2_suballoc_result;
29typedef int (group_search_t)(struct inode *, 30typedef int (group_search_t)(struct inode *,
30 struct buffer_head *, 31 struct buffer_head *,
31 u32, /* bits_wanted */ 32 u32, /* bits_wanted */
32 u32, /* min_bits */ 33 u32, /* min_bits */
33 u64, /* max_block */ 34 u64, /* max_block */
34 u16 *, /* *bit_off */ 35 struct ocfs2_suballoc_result *);
35 u16 *); /* *bits_found */ 36 /* found bits */
36 37
37struct ocfs2_alloc_context { 38struct ocfs2_alloc_context {
38 struct inode *ac_inode; /* which bitmap are we allocating from? */ 39 struct inode *ac_inode; /* which bitmap are we allocating from? */
@@ -54,6 +55,8 @@ struct ocfs2_alloc_context {
54 u64 ac_last_group; 55 u64 ac_last_group;
55 u64 ac_max_block; /* Highest block number to allocate. 0 is 56 u64 ac_max_block; /* Highest block number to allocate. 0 is
56 is the same as ~0 - unlimited */ 57 is the same as ~0 - unlimited */
58
59 struct ocfs2_alloc_reservation *ac_resv;
57}; 60};
58 61
59void ocfs2_init_steal_slots(struct ocfs2_super *osb); 62void ocfs2_init_steal_slots(struct ocfs2_super *osb);
@@ -80,22 +83,21 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
80 u32 bits_wanted, 83 u32 bits_wanted,
81 struct ocfs2_alloc_context **ac); 84 struct ocfs2_alloc_context **ac);
82 85
83int ocfs2_claim_metadata(struct ocfs2_super *osb, 86int ocfs2_claim_metadata(handle_t *handle,
84 handle_t *handle,
85 struct ocfs2_alloc_context *ac, 87 struct ocfs2_alloc_context *ac,
86 u32 bits_wanted, 88 u32 bits_wanted,
89 u64 *suballoc_loc,
87 u16 *suballoc_bit_start, 90 u16 *suballoc_bit_start,
88 u32 *num_bits, 91 u32 *num_bits,
89 u64 *blkno_start); 92 u64 *blkno_start);
90int ocfs2_claim_new_inode(struct ocfs2_super *osb, 93int ocfs2_claim_new_inode(handle_t *handle,
91 handle_t *handle,
92 struct inode *dir, 94 struct inode *dir,
93 struct buffer_head *parent_fe_bh, 95 struct buffer_head *parent_fe_bh,
94 struct ocfs2_alloc_context *ac, 96 struct ocfs2_alloc_context *ac,
97 u64 *suballoc_loc,
95 u16 *suballoc_bit, 98 u16 *suballoc_bit,
96 u64 *fe_blkno); 99 u64 *fe_blkno);
97int ocfs2_claim_clusters(struct ocfs2_super *osb, 100int ocfs2_claim_clusters(handle_t *handle,
98 handle_t *handle,
99 struct ocfs2_alloc_context *ac, 101 struct ocfs2_alloc_context *ac,
100 u32 min_clusters, 102 u32 min_clusters,
101 u32 *cluster_start, 103 u32 *cluster_start,
@@ -104,8 +106,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
104 * Use this variant of ocfs2_claim_clusters to specify a maxiumum 106 * Use this variant of ocfs2_claim_clusters to specify a maxiumum
105 * number of clusters smaller than the allocation reserved. 107 * number of clusters smaller than the allocation reserved.
106 */ 108 */
107int __ocfs2_claim_clusters(struct ocfs2_super *osb, 109int __ocfs2_claim_clusters(handle_t *handle,
108 handle_t *handle,
109 struct ocfs2_alloc_context *ac, 110 struct ocfs2_alloc_context *ac,
110 u32 min_clusters, 111 u32 min_clusters,
111 u32 max_clusters, 112 u32 max_clusters,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index dee03197a494..2c26ce251cb3 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -94,7 +94,9 @@ struct mount_options
94 unsigned long mount_opt; 94 unsigned long mount_opt;
95 unsigned int atime_quantum; 95 unsigned int atime_quantum;
96 signed short slot; 96 signed short slot;
97 unsigned int localalloc_opt; 97 int localalloc_opt;
98 unsigned int resv_level;
99 int dir_resv_level;
98 char cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; 100 char cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
99}; 101};
100 102
@@ -176,6 +178,8 @@ enum {
176 Opt_noacl, 178 Opt_noacl,
177 Opt_usrquota, 179 Opt_usrquota,
178 Opt_grpquota, 180 Opt_grpquota,
181 Opt_resv_level,
182 Opt_dir_resv_level,
179 Opt_err, 183 Opt_err,
180}; 184};
181 185
@@ -202,6 +206,8 @@ static const match_table_t tokens = {
202 {Opt_noacl, "noacl"}, 206 {Opt_noacl, "noacl"},
203 {Opt_usrquota, "usrquota"}, 207 {Opt_usrquota, "usrquota"},
204 {Opt_grpquota, "grpquota"}, 208 {Opt_grpquota, "grpquota"},
209 {Opt_resv_level, "resv_level=%u"},
210 {Opt_dir_resv_level, "dir_resv_level=%u"},
205 {Opt_err, NULL} 211 {Opt_err, NULL}
206}; 212};
207 213
@@ -932,12 +938,16 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb)
932 int type; 938 int type;
933 struct inode *inode; 939 struct inode *inode;
934 struct super_block *sb = osb->sb; 940 struct super_block *sb = osb->sb;
941 struct ocfs2_mem_dqinfo *oinfo;
935 942
936 /* We mostly ignore errors in this function because there's not much 943 /* We mostly ignore errors in this function because there's not much
937 * we can do when we see them */ 944 * we can do when we see them */
938 for (type = 0; type < MAXQUOTAS; type++) { 945 for (type = 0; type < MAXQUOTAS; type++) {
939 if (!sb_has_quota_loaded(sb, type)) 946 if (!sb_has_quota_loaded(sb, type))
940 continue; 947 continue;
948 /* Cancel periodic syncing before we grab dqonoff_mutex */
949 oinfo = sb_dqinfo(sb, type)->dqi_priv;
950 cancel_delayed_work_sync(&oinfo->dqi_sync_work);
941 inode = igrab(sb->s_dquot.files[type]); 951 inode = igrab(sb->s_dquot.files[type]);
942 /* Turn off quotas. This will remove all dquot structures from 952 /* Turn off quotas. This will remove all dquot structures from
943 * memory and so they will be automatically synced to global 953 * memory and so they will be automatically synced to global
@@ -1028,8 +1038,14 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1028 osb->s_atime_quantum = parsed_options.atime_quantum; 1038 osb->s_atime_quantum = parsed_options.atime_quantum;
1029 osb->preferred_slot = parsed_options.slot; 1039 osb->preferred_slot = parsed_options.slot;
1030 osb->osb_commit_interval = parsed_options.commit_interval; 1040 osb->osb_commit_interval = parsed_options.commit_interval;
1031 osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); 1041
1032 osb->local_alloc_bits = osb->local_alloc_default_bits; 1042 ocfs2_la_set_sizes(osb, parsed_options.localalloc_opt);
1043 osb->osb_resv_level = parsed_options.resv_level;
1044 osb->osb_dir_resv_level = parsed_options.resv_level;
1045 if (parsed_options.dir_resv_level == -1)
1046 osb->osb_dir_resv_level = parsed_options.resv_level;
1047 else
1048 osb->osb_dir_resv_level = parsed_options.dir_resv_level;
1033 1049
1034 status = ocfs2_verify_userspace_stack(osb, &parsed_options); 1050 status = ocfs2_verify_userspace_stack(osb, &parsed_options);
1035 if (status) 1051 if (status)
@@ -1285,11 +1301,13 @@ static int ocfs2_parse_options(struct super_block *sb,
1285 options ? options : "(none)"); 1301 options ? options : "(none)");
1286 1302
1287 mopt->commit_interval = 0; 1303 mopt->commit_interval = 0;
1288 mopt->mount_opt = 0; 1304 mopt->mount_opt = OCFS2_MOUNT_NOINTR;
1289 mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; 1305 mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
1290 mopt->slot = OCFS2_INVALID_SLOT; 1306 mopt->slot = OCFS2_INVALID_SLOT;
1291 mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; 1307 mopt->localalloc_opt = -1;
1292 mopt->cluster_stack[0] = '\0'; 1308 mopt->cluster_stack[0] = '\0';
1309 mopt->resv_level = OCFS2_DEFAULT_RESV_LEVEL;
1310 mopt->dir_resv_level = -1;
1293 1311
1294 if (!options) { 1312 if (!options) {
1295 status = 1; 1313 status = 1;
@@ -1380,7 +1398,7 @@ static int ocfs2_parse_options(struct super_block *sb,
1380 status = 0; 1398 status = 0;
1381 goto bail; 1399 goto bail;
1382 } 1400 }
1383 if (option >= 0 && (option <= ocfs2_local_alloc_size(sb) * 8)) 1401 if (option >= 0)
1384 mopt->localalloc_opt = option; 1402 mopt->localalloc_opt = option;
1385 break; 1403 break;
1386 case Opt_localflocks: 1404 case Opt_localflocks:
@@ -1433,6 +1451,28 @@ static int ocfs2_parse_options(struct super_block *sb,
1433 mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL; 1451 mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
1434 mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; 1452 mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
1435 break; 1453 break;
1454 case Opt_resv_level:
1455 if (is_remount)
1456 break;
1457 if (match_int(&args[0], &option)) {
1458 status = 0;
1459 goto bail;
1460 }
1461 if (option >= OCFS2_MIN_RESV_LEVEL &&
1462 option < OCFS2_MAX_RESV_LEVEL)
1463 mopt->resv_level = option;
1464 break;
1465 case Opt_dir_resv_level:
1466 if (is_remount)
1467 break;
1468 if (match_int(&args[0], &option)) {
1469 status = 0;
1470 goto bail;
1471 }
1472 if (option >= OCFS2_MIN_RESV_LEVEL &&
1473 option < OCFS2_MAX_RESV_LEVEL)
1474 mopt->dir_resv_level = option;
1475 break;
1436 default: 1476 default:
1437 mlog(ML_ERROR, 1477 mlog(ML_ERROR,
1438 "Unrecognized mount option \"%s\" " 1478 "Unrecognized mount option \"%s\" "
@@ -1487,7 +1527,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1487 (unsigned) (osb->osb_commit_interval / HZ)); 1527 (unsigned) (osb->osb_commit_interval / HZ));
1488 1528
1489 local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits); 1529 local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits);
1490 if (local_alloc_megs != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) 1530 if (local_alloc_megs != ocfs2_la_default_mb(osb))
1491 seq_printf(s, ",localalloc=%d", local_alloc_megs); 1531 seq_printf(s, ",localalloc=%d", local_alloc_megs);
1492 1532
1493 if (opts & OCFS2_MOUNT_LOCALFLOCKS) 1533 if (opts & OCFS2_MOUNT_LOCALFLOCKS)
@@ -1514,6 +1554,12 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1514 else 1554 else
1515 seq_printf(s, ",noacl"); 1555 seq_printf(s, ",noacl");
1516 1556
1557 if (osb->osb_resv_level != OCFS2_DEFAULT_RESV_LEVEL)
1558 seq_printf(s, ",resv_level=%d", osb->osb_resv_level);
1559
1560 if (osb->osb_dir_resv_level != osb->osb_resv_level)
1561 seq_printf(s, ",dir_resv_level=%d", osb->osb_resv_level);
1562
1517 return 0; 1563 return 0;
1518} 1564}
1519 1565
@@ -1688,6 +1734,8 @@ static void ocfs2_inode_init_once(void *data)
1688 oi->ip_blkno = 0ULL; 1734 oi->ip_blkno = 0ULL;
1689 oi->ip_clusters = 0; 1735 oi->ip_clusters = 0;
1690 1736
1737 ocfs2_resv_init_once(&oi->ip_la_data_resv);
1738
1691 ocfs2_lock_res_init_once(&oi->ip_rw_lockres); 1739 ocfs2_lock_res_init_once(&oi->ip_rw_lockres);
1692 ocfs2_lock_res_init_once(&oi->ip_inode_lockres); 1740 ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
1693 ocfs2_lock_res_init_once(&oi->ip_open_lockres); 1741 ocfs2_lock_res_init_once(&oi->ip_open_lockres);
@@ -2042,6 +2090,12 @@ static int ocfs2_initialize_super(struct super_block *sb,
2042 2090
2043 init_waitqueue_head(&osb->osb_mount_event); 2091 init_waitqueue_head(&osb->osb_mount_event);
2044 2092
2093 status = ocfs2_resmap_init(osb, &osb->osb_la_resmap);
2094 if (status) {
2095 mlog_errno(status);
2096 goto bail;
2097 }
2098
2045 osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL); 2099 osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL);
2046 if (!osb->vol_label) { 2100 if (!osb->vol_label) {
2047 mlog(ML_ERROR, "unable to alloc vol label\n"); 2101 mlog(ML_ERROR, "unable to alloc vol label\n");
@@ -2224,9 +2278,11 @@ static int ocfs2_initialize_super(struct super_block *sb,
2224 } 2278 }
2225 2279
2226 osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; 2280 osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno;
2281 osb->osb_clusters_at_boot = OCFS2_I(inode)->ip_clusters;
2227 iput(inode); 2282 iput(inode);
2228 2283
2229 osb->bitmap_cpg = ocfs2_group_bitmap_size(sb) * 8; 2284 osb->bitmap_cpg = ocfs2_group_bitmap_size(sb, 0,
2285 osb->s_feature_incompat) * 8;
2230 2286
2231 status = ocfs2_init_slot_info(osb); 2287 status = ocfs2_init_slot_info(osb);
2232 if (status < 0) { 2288 if (status < 0) {
@@ -2509,5 +2565,25 @@ void __ocfs2_abort(struct super_block* sb,
2509 ocfs2_handle_error(sb); 2565 ocfs2_handle_error(sb);
2510} 2566}
2511 2567
2568/*
2569 * Void signal blockers, because in-kernel sigprocmask() only fails
2570 * when SIG_* is wrong.
2571 */
2572void ocfs2_block_signals(sigset_t *oldset)
2573{
2574 int rc;
2575 sigset_t blocked;
2576
2577 sigfillset(&blocked);
2578 rc = sigprocmask(SIG_BLOCK, &blocked, oldset);
2579 BUG_ON(rc);
2580}
2581
2582void ocfs2_unblock_signals(sigset_t *oldset)
2583{
2584 int rc = sigprocmask(SIG_SETMASK, oldset, NULL);
2585 BUG_ON(rc);
2586}
2587
2512module_init(ocfs2_init); 2588module_init(ocfs2_init);
2513module_exit(ocfs2_exit); 2589module_exit(ocfs2_exit);
diff --git a/fs/ocfs2/super.h b/fs/ocfs2/super.h
index 783f5270f2a1..40c7de084c10 100644
--- a/fs/ocfs2/super.h
+++ b/fs/ocfs2/super.h
@@ -45,4 +45,11 @@ void __ocfs2_abort(struct super_block *sb,
45 45
46#define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args) 46#define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args)
47 47
48/*
49 * Void signal blockers, because in-kernel sigprocmask() only fails
50 * when SIG_* is wrong.
51 */
52void ocfs2_block_signals(sigset_t *oldset);
53void ocfs2_unblock_signals(sigset_t *oldset);
54
48#endif /* OCFS2_SUPER_H */ 55#endif /* OCFS2_SUPER_H */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3e7773089b96..98ee6c44102d 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -79,6 +79,7 @@ struct ocfs2_xattr_set_ctxt {
79 struct ocfs2_alloc_context *meta_ac; 79 struct ocfs2_alloc_context *meta_ac;
80 struct ocfs2_alloc_context *data_ac; 80 struct ocfs2_alloc_context *data_ac;
81 struct ocfs2_cached_dealloc_ctxt dealloc; 81 struct ocfs2_cached_dealloc_ctxt dealloc;
82 int set_abort;
82}; 83};
83 84
84#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 85#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root))
@@ -739,11 +740,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
739 goto leave; 740 goto leave;
740 } 741 }
741 742
742 status = ocfs2_journal_dirty(handle, vb->vb_bh); 743 ocfs2_journal_dirty(handle, vb->vb_bh);
743 if (status < 0) {
744 mlog_errno(status);
745 goto leave;
746 }
747 744
748 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters; 745 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
749 746
@@ -786,12 +783,7 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
786 } 783 }
787 784
788 le32_add_cpu(&vb->vb_xv->xr_clusters, -len); 785 le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
789 786 ocfs2_journal_dirty(handle, vb->vb_bh);
790 ret = ocfs2_journal_dirty(handle, vb->vb_bh);
791 if (ret) {
792 mlog_errno(ret);
793 goto out;
794 }
795 787
796 if (ext_flags & OCFS2_EXT_REFCOUNTED) 788 if (ext_flags & OCFS2_EXT_REFCOUNTED)
797 ret = ocfs2_decrease_refcount(inode, handle, 789 ret = ocfs2_decrease_refcount(inode, handle,
@@ -1374,11 +1366,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1374 memset(bh->b_data + cp_len, 0, 1366 memset(bh->b_data + cp_len, 0,
1375 blocksize - cp_len); 1367 blocksize - cp_len);
1376 1368
1377 ret = ocfs2_journal_dirty(handle, bh); 1369 ocfs2_journal_dirty(handle, bh);
1378 if (ret < 0) {
1379 mlog_errno(ret);
1380 goto out;
1381 }
1382 brelse(bh); 1370 brelse(bh);
1383 bh = NULL; 1371 bh = NULL;
1384 1372
@@ -2148,15 +2136,19 @@ alloc_value:
2148 orig_clusters = ocfs2_xa_value_clusters(loc); 2136 orig_clusters = ocfs2_xa_value_clusters(loc);
2149 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); 2137 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt);
2150 if (rc < 0) { 2138 if (rc < 0) {
2151 /* 2139 ctxt->set_abort = 1;
2152 * If we tried to grow an existing external value,
2153 * ocfs2_xa_cleanuP-value_truncate() is going to
2154 * let it stand. We have to restore its original
2155 * value size.
2156 */
2157 loc->xl_entry->xe_value_size = orig_value_size;
2158 ocfs2_xa_cleanup_value_truncate(loc, "growing", 2140 ocfs2_xa_cleanup_value_truncate(loc, "growing",
2159 orig_clusters); 2141 orig_clusters);
2142 /*
2143 * If we were growing an existing value,
2144 * ocfs2_xa_cleanup_value_truncate() won't remove
2145 * the entry. We need to restore the original value
2146 * size.
2147 */
2148 if (loc->xl_entry) {
2149 BUG_ON(!orig_value_size);
2150 loc->xl_entry->xe_value_size = orig_value_size;
2151 }
2160 mlog_errno(rc); 2152 mlog_errno(rc);
2161 } 2153 }
2162 } 2154 }
@@ -2479,7 +2471,10 @@ static int ocfs2_xattr_free_block(struct inode *inode,
2479 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2471 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2480 blk = le64_to_cpu(xb->xb_blkno); 2472 blk = le64_to_cpu(xb->xb_blkno);
2481 bit = le16_to_cpu(xb->xb_suballoc_bit); 2473 bit = le16_to_cpu(xb->xb_suballoc_bit);
2482 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 2474 if (xb->xb_suballoc_loc)
2475 bg_blkno = le64_to_cpu(xb->xb_suballoc_loc);
2476 else
2477 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2483 2478
2484 xb_alloc_inode = ocfs2_get_system_file_inode(osb, 2479 xb_alloc_inode = ocfs2_get_system_file_inode(osb,
2485 EXTENT_ALLOC_SYSTEM_INODE, 2480 EXTENT_ALLOC_SYSTEM_INODE,
@@ -2594,9 +2589,7 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2594 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2589 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2595 spin_unlock(&oi->ip_lock); 2590 spin_unlock(&oi->ip_lock);
2596 2591
2597 ret = ocfs2_journal_dirty(handle, di_bh); 2592 ocfs2_journal_dirty(handle, di_bh);
2598 if (ret < 0)
2599 mlog_errno(ret);
2600out_commit: 2593out_commit:
2601 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 2594 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2602out: 2595out:
@@ -2724,9 +2717,7 @@ static int ocfs2_xattr_ibody_init(struct inode *inode,
2724 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2717 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2725 spin_unlock(&oi->ip_lock); 2718 spin_unlock(&oi->ip_lock);
2726 2719
2727 ret = ocfs2_journal_dirty(ctxt->handle, di_bh); 2720 ocfs2_journal_dirty(ctxt->handle, di_bh);
2728 if (ret < 0)
2729 mlog_errno(ret);
2730 2721
2731out: 2722out:
2732 return ret; 2723 return ret;
@@ -2846,9 +2837,8 @@ static int ocfs2_create_xattr_block(struct inode *inode,
2846 int ret; 2837 int ret;
2847 u16 suballoc_bit_start; 2838 u16 suballoc_bit_start;
2848 u32 num_got; 2839 u32 num_got;
2849 u64 first_blkno; 2840 u64 suballoc_loc, first_blkno;
2850 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data; 2841 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data;
2851 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2852 struct buffer_head *new_bh = NULL; 2842 struct buffer_head *new_bh = NULL;
2853 struct ocfs2_xattr_block *xblk; 2843 struct ocfs2_xattr_block *xblk;
2854 2844
@@ -2859,9 +2849,9 @@ static int ocfs2_create_xattr_block(struct inode *inode,
2859 goto end; 2849 goto end;
2860 } 2850 }
2861 2851
2862 ret = ocfs2_claim_metadata(osb, ctxt->handle, ctxt->meta_ac, 1, 2852 ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1,
2863 &suballoc_bit_start, &num_got, 2853 &suballoc_loc, &suballoc_bit_start,
2864 &first_blkno); 2854 &num_got, &first_blkno);
2865 if (ret < 0) { 2855 if (ret < 0) {
2866 mlog_errno(ret); 2856 mlog_errno(ret);
2867 goto end; 2857 goto end;
@@ -2883,8 +2873,10 @@ static int ocfs2_create_xattr_block(struct inode *inode,
2883 memset(xblk, 0, inode->i_sb->s_blocksize); 2873 memset(xblk, 0, inode->i_sb->s_blocksize);
2884 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); 2874 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2885 xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); 2875 xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot);
2876 xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc);
2886 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); 2877 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2887 xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation); 2878 xblk->xb_fs_generation =
2879 cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation);
2888 xblk->xb_blkno = cpu_to_le64(first_blkno); 2880 xblk->xb_blkno = cpu_to_le64(first_blkno);
2889 if (indexed) { 2881 if (indexed) {
2890 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; 2882 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
@@ -2956,7 +2948,7 @@ static int ocfs2_xattr_block_set(struct inode *inode,
2956 ret = ocfs2_xa_set(&loc, xi, ctxt); 2948 ret = ocfs2_xa_set(&loc, xi, ctxt);
2957 if (!ret) 2949 if (!ret)
2958 xs->here = loc.xl_entry; 2950 xs->here = loc.xl_entry;
2959 else if (ret != -ENOSPC) 2951 else if ((ret != -ENOSPC) || ctxt->set_abort)
2960 goto end; 2952 goto end;
2961 else { 2953 else {
2962 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); 2954 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
@@ -3312,14 +3304,13 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
3312 goto out; 3304 goto out;
3313 } 3305 }
3314 3306
3315 ret = ocfs2_extend_trans(ctxt->handle, credits + 3307 ret = ocfs2_extend_trans(ctxt->handle, credits);
3316 ctxt->handle->h_buffer_credits);
3317 if (ret) { 3308 if (ret) {
3318 mlog_errno(ret); 3309 mlog_errno(ret);
3319 goto out; 3310 goto out;
3320 } 3311 }
3321 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3312 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3322 } else if (ret == -ENOSPC) { 3313 } else if ((ret == -ENOSPC) && !ctxt->set_abort) {
3323 if (di->i_xattr_loc && !xbs->xattr_bh) { 3314 if (di->i_xattr_loc && !xbs->xattr_bh) {
3324 ret = ocfs2_xattr_block_find(inode, 3315 ret = ocfs2_xattr_block_find(inode,
3325 xi->xi_name_index, 3316 xi->xi_name_index,
@@ -3343,8 +3334,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
3343 goto out; 3334 goto out;
3344 } 3335 }
3345 3336
3346 ret = ocfs2_extend_trans(ctxt->handle, credits + 3337 ret = ocfs2_extend_trans(ctxt->handle, credits);
3347 ctxt->handle->h_buffer_credits);
3348 if (ret) { 3338 if (ret) {
3349 mlog_errno(ret); 3339 mlog_errno(ret);
3350 goto out; 3340 goto out;
@@ -3378,8 +3368,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
3378 goto out; 3368 goto out;
3379 } 3369 }
3380 3370
3381 ret = ocfs2_extend_trans(ctxt->handle, credits + 3371 ret = ocfs2_extend_trans(ctxt->handle, credits);
3382 ctxt->handle->h_buffer_credits);
3383 if (ret) { 3372 if (ret) {
3384 mlog_errno(ret); 3373 mlog_errno(ret);
3385 goto out; 3374 goto out;
@@ -4249,7 +4238,6 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
4249 u32 bit_off, len; 4238 u32 bit_off, len;
4250 u64 blkno; 4239 u64 blkno;
4251 handle_t *handle = ctxt->handle; 4240 handle_t *handle = ctxt->handle;
4252 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4253 struct ocfs2_inode_info *oi = OCFS2_I(inode); 4241 struct ocfs2_inode_info *oi = OCFS2_I(inode);
4254 struct buffer_head *xb_bh = xs->xattr_bh; 4242 struct buffer_head *xb_bh = xs->xattr_bh;
4255 struct ocfs2_xattr_block *xb = 4243 struct ocfs2_xattr_block *xb =
@@ -4277,7 +4265,7 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
4277 goto out; 4265 goto out;
4278 } 4266 }
4279 4267
4280 ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 4268 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac,
4281 1, 1, &bit_off, &len); 4269 1, 1, &bit_off, &len);
4282 if (ret) { 4270 if (ret) {
4283 mlog_errno(ret); 4271 mlog_errno(ret);
@@ -4887,8 +4875,7 @@ static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4887 * We need to update the first bucket of the old extent and all 4875 * We need to update the first bucket of the old extent and all
4888 * the buckets going to the new extent. 4876 * the buckets going to the new extent.
4889 */ 4877 */
4890 credits = ((num_buckets + 1) * blks_per_bucket) + 4878 credits = ((num_buckets + 1) * blks_per_bucket);
4891 handle->h_buffer_credits;
4892 ret = ocfs2_extend_trans(handle, credits); 4879 ret = ocfs2_extend_trans(handle, credits);
4893 if (ret) { 4880 if (ret) {
4894 mlog_errno(ret); 4881 mlog_errno(ret);
@@ -4958,7 +4945,7 @@ static int ocfs2_divide_xattr_cluster(struct inode *inode,
4958 u32 *first_hash) 4945 u32 *first_hash)
4959{ 4946{
4960 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4947 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4961 int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits; 4948 int ret, credits = 2 * blk_per_bucket;
4962 4949
4963 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); 4950 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4964 4951
@@ -5099,7 +5086,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5099 goto leave; 5086 goto leave;
5100 } 5087 }
5101 5088
5102 ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1, 5089 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1,
5103 clusters_to_add, &bit_off, &num_bits); 5090 clusters_to_add, &bit_off, &num_bits);
5104 if (ret < 0) { 5091 if (ret < 0) {
5105 if (ret != -ENOSPC) 5092 if (ret != -ENOSPC)
@@ -5153,9 +5140,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5153 goto leave; 5140 goto leave;
5154 } 5141 }
5155 5142
5156 ret = ocfs2_journal_dirty(handle, root_bh); 5143 ocfs2_journal_dirty(handle, root_bh);
5157 if (ret < 0)
5158 mlog_errno(ret);
5159 5144
5160leave: 5145leave:
5161 return ret; 5146 return ret;
@@ -5200,8 +5185,7 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
5200 * existing bucket. Then we add the last existing bucket, the 5185 * existing bucket. Then we add the last existing bucket, the
5201 * new bucket, and the first bucket (3 * blk_per_bucket). 5186 * new bucket, and the first bucket (3 * blk_per_bucket).
5202 */ 5187 */
5203 credits = (end_blk - target_blk) + (3 * blk_per_bucket) + 5188 credits = (end_blk - target_blk) + (3 * blk_per_bucket);
5204 handle->h_buffer_credits;
5205 ret = ocfs2_extend_trans(handle, credits); 5189 ret = ocfs2_extend_trans(handle, credits);
5206 if (ret) { 5190 if (ret) {
5207 mlog_errno(ret); 5191 mlog_errno(ret);
@@ -5477,12 +5461,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
5477 } 5461 }
5478 5462
5479 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len); 5463 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5480 5464 ocfs2_journal_dirty(handle, root_bh);
5481 ret = ocfs2_journal_dirty(handle, root_bh);
5482 if (ret) {
5483 mlog_errno(ret);
5484 goto out_commit;
5485 }
5486 5465
5487 ret = ocfs2_truncate_log_append(osb, handle, blkno, len); 5466 ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5488 if (ret) 5467 if (ret)
@@ -6935,7 +6914,7 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
6935 goto out; 6914 goto out;
6936 } 6915 }
6937 6916
6938 ret = ocfs2_claim_clusters(osb, handle, data_ac, 6917 ret = ocfs2_claim_clusters(handle, data_ac,
6939 len, &p_cluster, &num_clusters); 6918 len, &p_cluster, &num_clusters);
6940 if (ret) { 6919 if (ret) {
6941 mlog_errno(ret); 6920 mlog_errno(ret);
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index c82af6acc2e7..b44bb835e8ea 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -3,7 +3,6 @@
3 * Copyright (C) 2006 Bob Copeland <me@bobcopeland.com> 3 * Copyright (C) 2006 Bob Copeland <me@bobcopeland.com>
4 * Released under GPL v2. 4 * Released under GPL v2.
5 */ 5 */
6#include <linux/version.h>
7#include <linux/module.h> 6#include <linux/module.h>
8#include <linux/sched.h> 7#include <linux/sched.h>
9#include <linux/slab.h> 8#include <linux/slab.h>
diff --git a/fs/proc/array.c b/fs/proc/array.c
index e51f2ec2c5e5..885ab5513ac5 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -81,7 +81,6 @@
81#include <linux/pid_namespace.h> 81#include <linux/pid_namespace.h>
82#include <linux/ptrace.h> 82#include <linux/ptrace.h>
83#include <linux/tracehook.h> 83#include <linux/tracehook.h>
84#include <linux/swapops.h>
85 84
86#include <asm/pgtable.h> 85#include <asm/pgtable.h>
87#include <asm/processor.h> 86#include <asm/processor.h>
@@ -495,7 +494,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
495 rsslim, 494 rsslim,
496 mm ? mm->start_code : 0, 495 mm ? mm->start_code : 0,
497 mm ? mm->end_code : 0, 496 mm ? mm->end_code : 0,
498 (permitted && mm) ? task->stack_start : 0, 497 (permitted && mm) ? mm->start_stack : 0,
499 esp, 498 esp,
500 eip, 499 eip,
501 /* The signal information here is obsolete. 500 /* The signal information here is obsolete.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8418fcc0a6ab..c7f9f23449dc 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -730,6 +730,7 @@ out_no_task:
730 730
731static const struct file_operations proc_info_file_operations = { 731static const struct file_operations proc_info_file_operations = {
732 .read = proc_info_read, 732 .read = proc_info_read,
733 .llseek = generic_file_llseek,
733}; 734};
734 735
735static int proc_single_show(struct seq_file *m, void *v) 736static int proc_single_show(struct seq_file *m, void *v)
@@ -987,6 +988,7 @@ out_no_task:
987 988
988static const struct file_operations proc_environ_operations = { 989static const struct file_operations proc_environ_operations = {
989 .read = environ_read, 990 .read = environ_read,
991 .llseek = generic_file_llseek,
990}; 992};
991 993
992static ssize_t oom_adjust_read(struct file *file, char __user *buf, 994static ssize_t oom_adjust_read(struct file *file, char __user *buf,
@@ -1060,6 +1062,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1060static const struct file_operations proc_oom_adjust_operations = { 1062static const struct file_operations proc_oom_adjust_operations = {
1061 .read = oom_adjust_read, 1063 .read = oom_adjust_read,
1062 .write = oom_adjust_write, 1064 .write = oom_adjust_write,
1065 .llseek = generic_file_llseek,
1063}; 1066};
1064 1067
1065#ifdef CONFIG_AUDITSYSCALL 1068#ifdef CONFIG_AUDITSYSCALL
@@ -1131,6 +1134,7 @@ out_free_page:
1131static const struct file_operations proc_loginuid_operations = { 1134static const struct file_operations proc_loginuid_operations = {
1132 .read = proc_loginuid_read, 1135 .read = proc_loginuid_read,
1133 .write = proc_loginuid_write, 1136 .write = proc_loginuid_write,
1137 .llseek = generic_file_llseek,
1134}; 1138};
1135 1139
1136static ssize_t proc_sessionid_read(struct file * file, char __user * buf, 1140static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
@@ -1151,6 +1155,7 @@ static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
1151 1155
1152static const struct file_operations proc_sessionid_operations = { 1156static const struct file_operations proc_sessionid_operations = {
1153 .read = proc_sessionid_read, 1157 .read = proc_sessionid_read,
1158 .llseek = generic_file_llseek,
1154}; 1159};
1155#endif 1160#endif
1156 1161
@@ -1202,6 +1207,7 @@ static ssize_t proc_fault_inject_write(struct file * file,
1202static const struct file_operations proc_fault_inject_operations = { 1207static const struct file_operations proc_fault_inject_operations = {
1203 .read = proc_fault_inject_read, 1208 .read = proc_fault_inject_read,
1204 .write = proc_fault_inject_write, 1209 .write = proc_fault_inject_write,
1210 .llseek = generic_file_llseek,
1205}; 1211};
1206#endif 1212#endif
1207 1213
@@ -1943,7 +1949,7 @@ static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
1943} 1949}
1944 1950
1945static const struct file_operations proc_fdinfo_file_operations = { 1951static const struct file_operations proc_fdinfo_file_operations = {
1946 .open = nonseekable_open, 1952 .open = nonseekable_open,
1947 .read = proc_fdinfo_read, 1953 .read = proc_fdinfo_read,
1948}; 1954};
1949 1955
@@ -2227,6 +2233,7 @@ out_no_task:
2227static const struct file_operations proc_pid_attr_operations = { 2233static const struct file_operations proc_pid_attr_operations = {
2228 .read = proc_pid_attr_read, 2234 .read = proc_pid_attr_read,
2229 .write = proc_pid_attr_write, 2235 .write = proc_pid_attr_write,
2236 .llseek = generic_file_llseek,
2230}; 2237};
2231 2238
2232static const struct pid_entry attr_dir_stuff[] = { 2239static const struct pid_entry attr_dir_stuff[] = {
@@ -2347,6 +2354,7 @@ static ssize_t proc_coredump_filter_write(struct file *file,
2347static const struct file_operations proc_coredump_filter_operations = { 2354static const struct file_operations proc_coredump_filter_operations = {
2348 .read = proc_coredump_filter_read, 2355 .read = proc_coredump_filter_read,
2349 .write = proc_coredump_filter_write, 2356 .write = proc_coredump_filter_write,
2357 .llseek = generic_file_llseek,
2350}; 2358};
2351#endif 2359#endif
2352 2360
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d35b23238fb1..aea8502e58a3 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -232,9 +232,9 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne
232 if (rv == -ENOIOCTLCMD) 232 if (rv == -ENOIOCTLCMD)
233 rv = -EINVAL; 233 rv = -EINVAL;
234 } else if (ioctl) { 234 } else if (ioctl) {
235 lock_kernel(); 235 WARN_ONCE(1, "Procfs ioctl handlers must use unlocked_ioctl, "
236 "%pf will be called without the Bkl held\n", ioctl);
236 rv = ioctl(file->f_path.dentry->d_inode, file, cmd, arg); 237 rv = ioctl(file->f_path.dentry->d_inode, file, cmd, arg);
237 unlock_kernel();
238 } 238 }
239 239
240 pde_users_dec(pde); 240 pde_users_dec(pde);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 19979a2ce272..c837a77351be 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -558,6 +558,7 @@ static int open_kcore(struct inode *inode, struct file *filp)
558static const struct file_operations proc_kcore_operations = { 558static const struct file_operations proc_kcore_operations = {
559 .read = read_kcore, 559 .read = read_kcore,
560 .open = open_kcore, 560 .open = open_kcore,
561 .llseek = generic_file_llseek,
561}; 562};
562 563
563#ifdef CONFIG_MEMORY_HOTPLUG 564#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index cfe90a48a6e8..bd4b5a740ff1 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -53,6 +53,7 @@ static const struct file_operations proc_kmsg_operations = {
53 .poll = kmsg_poll, 53 .poll = kmsg_poll,
54 .open = kmsg_open, 54 .open = kmsg_open,
55 .release = kmsg_release, 55 .release = kmsg_release,
56 .llseek = generic_file_llseek,
56}; 57};
57 58
58static int __init proc_kmsg_init(void) 59static int __init proc_kmsg_init(void)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 070553427dd5..47f5b145f56e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -247,25 +247,6 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
247 } else if (vma->vm_start <= mm->start_stack && 247 } else if (vma->vm_start <= mm->start_stack &&
248 vma->vm_end >= mm->start_stack) { 248 vma->vm_end >= mm->start_stack) {
249 name = "[stack]"; 249 name = "[stack]";
250 } else {
251 unsigned long stack_start;
252 struct proc_maps_private *pmp;
253
254 pmp = m->private;
255 stack_start = pmp->task->stack_start;
256
257 if (vma->vm_start <= stack_start &&
258 vma->vm_end >= stack_start) {
259 pad_len_spaces(m, len);
260 seq_printf(m,
261 "[threadstack:%08lx]",
262#ifdef CONFIG_STACK_GROWSUP
263 vma->vm_end - stack_start
264#else
265 stack_start - vma->vm_start
266#endif
267 );
268 }
269 } 250 }
270 } else { 251 } else {
271 name = "[vdso]"; 252 name = "[vdso]";
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 9fbc99ec799a..91c817ff02c3 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -163,6 +163,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
163 163
164static const struct file_operations proc_vmcore_operations = { 164static const struct file_operations proc_vmcore_operations = {
165 .read = read_vmcore, 165 .read = read_vmcore,
166 .llseek = generic_file_llseek,
166}; 167};
167 168
168static struct vmcore* __init get_new_element(void) 169static struct vmcore* __init get_new_element(void)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 788b5802a7ce..655a4c52b8c3 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -82,7 +82,7 @@
82 82
83/* 83/*
84 * There are three quota SMP locks. dq_list_lock protects all lists with quotas 84 * There are three quota SMP locks. dq_list_lock protects all lists with quotas
85 * and quota formats, dqstats structure containing statistics about the lists 85 * and quota formats.
86 * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and 86 * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and
87 * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes. 87 * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes.
88 * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly 88 * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly
@@ -132,7 +132,9 @@ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_state_lock);
132__cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); 132__cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock);
133EXPORT_SYMBOL(dq_data_lock); 133EXPORT_SYMBOL(dq_data_lock);
134 134
135#if defined(CONFIG_QUOTA_DEBUG) || defined(CONFIG_PRINT_QUOTA_WARNING)
135static char *quotatypes[] = INITQFNAMES; 136static char *quotatypes[] = INITQFNAMES;
137#endif
136static struct quota_format_type *quota_formats; /* List of registered formats */ 138static struct quota_format_type *quota_formats; /* List of registered formats */
137static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES; 139static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES;
138 140
@@ -226,6 +228,10 @@ static struct hlist_head *dquot_hash;
226 228
227struct dqstats dqstats; 229struct dqstats dqstats;
228EXPORT_SYMBOL(dqstats); 230EXPORT_SYMBOL(dqstats);
231#ifdef CONFIG_SMP
232struct dqstats *dqstats_pcpu;
233EXPORT_SYMBOL(dqstats_pcpu);
234#endif
229 235
230static qsize_t inode_get_rsv_space(struct inode *inode); 236static qsize_t inode_get_rsv_space(struct inode *inode);
231static void __dquot_initialize(struct inode *inode, int type); 237static void __dquot_initialize(struct inode *inode, int type);
@@ -273,7 +279,7 @@ static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb,
273static inline void put_dquot_last(struct dquot *dquot) 279static inline void put_dquot_last(struct dquot *dquot)
274{ 280{
275 list_add_tail(&dquot->dq_free, &free_dquots); 281 list_add_tail(&dquot->dq_free, &free_dquots);
276 dqstats.free_dquots++; 282 dqstats_inc(DQST_FREE_DQUOTS);
277} 283}
278 284
279static inline void remove_free_dquot(struct dquot *dquot) 285static inline void remove_free_dquot(struct dquot *dquot)
@@ -281,7 +287,7 @@ static inline void remove_free_dquot(struct dquot *dquot)
281 if (list_empty(&dquot->dq_free)) 287 if (list_empty(&dquot->dq_free))
282 return; 288 return;
283 list_del_init(&dquot->dq_free); 289 list_del_init(&dquot->dq_free);
284 dqstats.free_dquots--; 290 dqstats_dec(DQST_FREE_DQUOTS);
285} 291}
286 292
287static inline void put_inuse(struct dquot *dquot) 293static inline void put_inuse(struct dquot *dquot)
@@ -289,12 +295,12 @@ static inline void put_inuse(struct dquot *dquot)
289 /* We add to the back of inuse list so we don't have to restart 295 /* We add to the back of inuse list so we don't have to restart
290 * when traversing this list and we block */ 296 * when traversing this list and we block */
291 list_add_tail(&dquot->dq_inuse, &inuse_list); 297 list_add_tail(&dquot->dq_inuse, &inuse_list);
292 dqstats.allocated_dquots++; 298 dqstats_inc(DQST_ALLOC_DQUOTS);
293} 299}
294 300
295static inline void remove_inuse(struct dquot *dquot) 301static inline void remove_inuse(struct dquot *dquot)
296{ 302{
297 dqstats.allocated_dquots--; 303 dqstats_dec(DQST_ALLOC_DQUOTS);
298 list_del(&dquot->dq_inuse); 304 list_del(&dquot->dq_inuse);
299} 305}
300/* 306/*
@@ -317,14 +323,23 @@ static inline int mark_dquot_dirty(struct dquot *dquot)
317 return dquot->dq_sb->dq_op->mark_dirty(dquot); 323 return dquot->dq_sb->dq_op->mark_dirty(dquot);
318} 324}
319 325
326/* Mark dquot dirty in atomic manner, and return it's old dirty flag state */
320int dquot_mark_dquot_dirty(struct dquot *dquot) 327int dquot_mark_dquot_dirty(struct dquot *dquot)
321{ 328{
329 int ret = 1;
330
331 /* If quota is dirty already, we don't have to acquire dq_list_lock */
332 if (test_bit(DQ_MOD_B, &dquot->dq_flags))
333 return 1;
334
322 spin_lock(&dq_list_lock); 335 spin_lock(&dq_list_lock);
323 if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) 336 if (!test_and_set_bit(DQ_MOD_B, &dquot->dq_flags)) {
324 list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)-> 337 list_add(&dquot->dq_dirty, &sb_dqopt(dquot->dq_sb)->
325 info[dquot->dq_type].dqi_dirty_list); 338 info[dquot->dq_type].dqi_dirty_list);
339 ret = 0;
340 }
326 spin_unlock(&dq_list_lock); 341 spin_unlock(&dq_list_lock);
327 return 0; 342 return ret;
328} 343}
329EXPORT_SYMBOL(dquot_mark_dquot_dirty); 344EXPORT_SYMBOL(dquot_mark_dquot_dirty);
330 345
@@ -550,8 +565,8 @@ int dquot_scan_active(struct super_block *sb,
550 continue; 565 continue;
551 /* Now we have active dquot so we can just increase use count */ 566 /* Now we have active dquot so we can just increase use count */
552 atomic_inc(&dquot->dq_count); 567 atomic_inc(&dquot->dq_count);
553 dqstats.lookups++;
554 spin_unlock(&dq_list_lock); 568 spin_unlock(&dq_list_lock);
569 dqstats_inc(DQST_LOOKUPS);
555 dqput(old_dquot); 570 dqput(old_dquot);
556 old_dquot = dquot; 571 old_dquot = dquot;
557 ret = fn(dquot, priv); 572 ret = fn(dquot, priv);
@@ -596,8 +611,8 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
596 * holding reference so we can safely just increase 611 * holding reference so we can safely just increase
597 * use count */ 612 * use count */
598 atomic_inc(&dquot->dq_count); 613 atomic_inc(&dquot->dq_count);
599 dqstats.lookups++;
600 spin_unlock(&dq_list_lock); 614 spin_unlock(&dq_list_lock);
615 dqstats_inc(DQST_LOOKUPS);
601 sb->dq_op->write_dquot(dquot); 616 sb->dq_op->write_dquot(dquot);
602 dqput(dquot); 617 dqput(dquot);
603 spin_lock(&dq_list_lock); 618 spin_lock(&dq_list_lock);
@@ -609,9 +624,7 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
609 if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt) 624 if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt)
610 && info_dirty(&dqopt->info[cnt])) 625 && info_dirty(&dqopt->info[cnt]))
611 sb->dq_op->write_info(sb, cnt); 626 sb->dq_op->write_info(sb, cnt);
612 spin_lock(&dq_list_lock); 627 dqstats_inc(DQST_SYNCS);
613 dqstats.syncs++;
614 spin_unlock(&dq_list_lock);
615 mutex_unlock(&dqopt->dqonoff_mutex); 628 mutex_unlock(&dqopt->dqonoff_mutex);
616 629
617 if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)) 630 if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE))
@@ -663,6 +676,22 @@ static void prune_dqcache(int count)
663 } 676 }
664} 677}
665 678
679static int dqstats_read(unsigned int type)
680{
681 int count = 0;
682#ifdef CONFIG_SMP
683 int cpu;
684 for_each_possible_cpu(cpu)
685 count += per_cpu_ptr(dqstats_pcpu, cpu)->stat[type];
686 /* Statistics reading is racy, but absolute accuracy isn't required */
687 if (count < 0)
688 count = 0;
689#else
690 count = dqstats.stat[type];
691#endif
692 return count;
693}
694
666/* 695/*
667 * This is called from kswapd when we think we need some 696 * This is called from kswapd when we think we need some
668 * more memory 697 * more memory
@@ -675,7 +704,7 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
675 prune_dqcache(nr); 704 prune_dqcache(nr);
676 spin_unlock(&dq_list_lock); 705 spin_unlock(&dq_list_lock);
677 } 706 }
678 return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure; 707 return (dqstats_read(DQST_FREE_DQUOTS)/100) * sysctl_vfs_cache_pressure;
679} 708}
680 709
681static struct shrinker dqcache_shrinker = { 710static struct shrinker dqcache_shrinker = {
@@ -703,10 +732,7 @@ void dqput(struct dquot *dquot)
703 BUG(); 732 BUG();
704 } 733 }
705#endif 734#endif
706 735 dqstats_inc(DQST_DROPS);
707 spin_lock(&dq_list_lock);
708 dqstats.drops++;
709 spin_unlock(&dq_list_lock);
710we_slept: 736we_slept:
711 spin_lock(&dq_list_lock); 737 spin_lock(&dq_list_lock);
712 if (atomic_read(&dquot->dq_count) > 1) { 738 if (atomic_read(&dquot->dq_count) > 1) {
@@ -823,15 +849,15 @@ we_slept:
823 put_inuse(dquot); 849 put_inuse(dquot);
824 /* hash it first so it can be found */ 850 /* hash it first so it can be found */
825 insert_dquot_hash(dquot); 851 insert_dquot_hash(dquot);
826 dqstats.lookups++;
827 spin_unlock(&dq_list_lock); 852 spin_unlock(&dq_list_lock);
853 dqstats_inc(DQST_LOOKUPS);
828 } else { 854 } else {
829 if (!atomic_read(&dquot->dq_count)) 855 if (!atomic_read(&dquot->dq_count))
830 remove_free_dquot(dquot); 856 remove_free_dquot(dquot);
831 atomic_inc(&dquot->dq_count); 857 atomic_inc(&dquot->dq_count);
832 dqstats.cache_hits++;
833 dqstats.lookups++;
834 spin_unlock(&dq_list_lock); 858 spin_unlock(&dq_list_lock);
859 dqstats_inc(DQST_CACHE_HITS);
860 dqstats_inc(DQST_LOOKUPS);
835 } 861 }
836 /* Wait for dq_lock - after this we know that either dquot_release() is 862 /* Wait for dq_lock - after this we know that either dquot_release() is
837 * already finished or it will be canceled due to dq_count > 1 test */ 863 * already finished or it will be canceled due to dq_count > 1 test */
@@ -1677,16 +1703,19 @@ EXPORT_SYMBOL(dquot_free_inode);
1677 1703
1678/* 1704/*
1679 * Transfer the number of inode and blocks from one diskquota to an other. 1705 * Transfer the number of inode and blocks from one diskquota to an other.
1706 * On success, dquot references in transfer_to are consumed and references
1707 * to original dquots that need to be released are placed there. On failure,
1708 * references are kept untouched.
1680 * 1709 *
1681 * This operation can block, but only after everything is updated 1710 * This operation can block, but only after everything is updated
1682 * A transaction must be started when entering this function. 1711 * A transaction must be started when entering this function.
1712 *
1683 */ 1713 */
1684static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask) 1714int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
1685{ 1715{
1686 qsize_t space, cur_space; 1716 qsize_t space, cur_space;
1687 qsize_t rsv_space = 0; 1717 qsize_t rsv_space = 0;
1688 struct dquot *transfer_from[MAXQUOTAS]; 1718 struct dquot *transfer_from[MAXQUOTAS] = {};
1689 struct dquot *transfer_to[MAXQUOTAS];
1690 int cnt, ret = 0; 1719 int cnt, ret = 0;
1691 char warntype_to[MAXQUOTAS]; 1720 char warntype_to[MAXQUOTAS];
1692 char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS]; 1721 char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
@@ -1696,19 +1725,12 @@ static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask
1696 if (IS_NOQUOTA(inode)) 1725 if (IS_NOQUOTA(inode))
1697 return 0; 1726 return 0;
1698 /* Initialize the arrays */ 1727 /* Initialize the arrays */
1699 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1728 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1700 transfer_from[cnt] = NULL;
1701 transfer_to[cnt] = NULL;
1702 warntype_to[cnt] = QUOTA_NL_NOWARN; 1729 warntype_to[cnt] = QUOTA_NL_NOWARN;
1703 }
1704 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1705 if (mask & (1 << cnt))
1706 transfer_to[cnt] = dqget(inode->i_sb, chid[cnt], cnt);
1707 }
1708 down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1730 down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1709 if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ 1731 if (IS_NOQUOTA(inode)) { /* File without quota accounting? */
1710 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1732 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1711 goto put_all; 1733 return 0;
1712 } 1734 }
1713 spin_lock(&dq_data_lock); 1735 spin_lock(&dq_data_lock);
1714 cur_space = inode_get_bytes(inode); 1736 cur_space = inode_get_bytes(inode);
@@ -1760,47 +1782,41 @@ static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask
1760 1782
1761 mark_all_dquot_dirty(transfer_from); 1783 mark_all_dquot_dirty(transfer_from);
1762 mark_all_dquot_dirty(transfer_to); 1784 mark_all_dquot_dirty(transfer_to);
1763 /* The reference we got is transferred to the inode */ 1785 /* Pass back references to put */
1764 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1786 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1765 transfer_to[cnt] = NULL; 1787 transfer_to[cnt] = transfer_from[cnt];
1766warn_put_all: 1788warn:
1767 flush_warnings(transfer_to, warntype_to); 1789 flush_warnings(transfer_to, warntype_to);
1768 flush_warnings(transfer_from, warntype_from_inodes); 1790 flush_warnings(transfer_from, warntype_from_inodes);
1769 flush_warnings(transfer_from, warntype_from_space); 1791 flush_warnings(transfer_from, warntype_from_space);
1770put_all:
1771 dqput_all(transfer_from);
1772 dqput_all(transfer_to);
1773 return ret; 1792 return ret;
1774over_quota: 1793over_quota:
1775 spin_unlock(&dq_data_lock); 1794 spin_unlock(&dq_data_lock);
1776 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1795 up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1777 /* Clear dquot pointers we don't want to dqput() */ 1796 goto warn;
1778 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1779 transfer_from[cnt] = NULL;
1780 goto warn_put_all;
1781} 1797}
1798EXPORT_SYMBOL(__dquot_transfer);
1782 1799
1783/* Wrapper for transferring ownership of an inode for uid/gid only 1800/* Wrapper for transferring ownership of an inode for uid/gid only
1784 * Called from FSXXX_setattr() 1801 * Called from FSXXX_setattr()
1785 */ 1802 */
1786int dquot_transfer(struct inode *inode, struct iattr *iattr) 1803int dquot_transfer(struct inode *inode, struct iattr *iattr)
1787{ 1804{
1788 qid_t chid[MAXQUOTAS]; 1805 struct dquot *transfer_to[MAXQUOTAS] = {};
1789 unsigned long mask = 0; 1806 struct super_block *sb = inode->i_sb;
1807 int ret;
1790 1808
1791 if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) { 1809 if (!sb_any_quota_active(sb) || IS_NOQUOTA(inode))
1792 mask |= 1 << USRQUOTA; 1810 return 0;
1793 chid[USRQUOTA] = iattr->ia_uid; 1811
1794 } 1812 if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid)
1795 if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) { 1813 transfer_to[USRQUOTA] = dqget(sb, iattr->ia_uid, USRQUOTA);
1796 mask |= 1 << GRPQUOTA; 1814 if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)
1797 chid[GRPQUOTA] = iattr->ia_gid; 1815 transfer_to[GRPQUOTA] = dqget(sb, iattr->ia_uid, GRPQUOTA);
1798 } 1816
1799 if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) { 1817 ret = __dquot_transfer(inode, transfer_to);
1800 dquot_initialize(inode); 1818 dqput_all(transfer_to);
1801 return __dquot_transfer(inode, chid, mask); 1819 return ret;
1802 }
1803 return 0;
1804} 1820}
1805EXPORT_SYMBOL(dquot_transfer); 1821EXPORT_SYMBOL(dquot_transfer);
1806 1822
@@ -2275,25 +2291,30 @@ static inline qsize_t stoqb(qsize_t space)
2275} 2291}
2276 2292
2277/* Generic routine for getting common part of quota structure */ 2293/* Generic routine for getting common part of quota structure */
2278static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di) 2294static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
2279{ 2295{
2280 struct mem_dqblk *dm = &dquot->dq_dqb; 2296 struct mem_dqblk *dm = &dquot->dq_dqb;
2281 2297
2298 memset(di, 0, sizeof(*di));
2299 di->d_version = FS_DQUOT_VERSION;
2300 di->d_flags = dquot->dq_type == USRQUOTA ?
2301 XFS_USER_QUOTA : XFS_GROUP_QUOTA;
2302 di->d_id = dquot->dq_id;
2303
2282 spin_lock(&dq_data_lock); 2304 spin_lock(&dq_data_lock);
2283 di->dqb_bhardlimit = stoqb(dm->dqb_bhardlimit); 2305 di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit);
2284 di->dqb_bsoftlimit = stoqb(dm->dqb_bsoftlimit); 2306 di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit);
2285 di->dqb_curspace = dm->dqb_curspace + dm->dqb_rsvspace; 2307 di->d_ino_hardlimit = dm->dqb_ihardlimit;
2286 di->dqb_ihardlimit = dm->dqb_ihardlimit; 2308 di->d_ino_softlimit = dm->dqb_isoftlimit;
2287 di->dqb_isoftlimit = dm->dqb_isoftlimit; 2309 di->d_bcount = dm->dqb_curspace + dm->dqb_rsvspace;
2288 di->dqb_curinodes = dm->dqb_curinodes; 2310 di->d_icount = dm->dqb_curinodes;
2289 di->dqb_btime = dm->dqb_btime; 2311 di->d_btimer = dm->dqb_btime;
2290 di->dqb_itime = dm->dqb_itime; 2312 di->d_itimer = dm->dqb_itime;
2291 di->dqb_valid = QIF_ALL;
2292 spin_unlock(&dq_data_lock); 2313 spin_unlock(&dq_data_lock);
2293} 2314}
2294 2315
2295int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, 2316int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
2296 struct if_dqblk *di) 2317 struct fs_disk_quota *di)
2297{ 2318{
2298 struct dquot *dquot; 2319 struct dquot *dquot;
2299 2320
@@ -2307,51 +2328,70 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
2307} 2328}
2308EXPORT_SYMBOL(vfs_get_dqblk); 2329EXPORT_SYMBOL(vfs_get_dqblk);
2309 2330
2331#define VFS_FS_DQ_MASK \
2332 (FS_DQ_BCOUNT | FS_DQ_BSOFT | FS_DQ_BHARD | \
2333 FS_DQ_ICOUNT | FS_DQ_ISOFT | FS_DQ_IHARD | \
2334 FS_DQ_BTIMER | FS_DQ_ITIMER)
2335
2310/* Generic routine for setting common part of quota structure */ 2336/* Generic routine for setting common part of quota structure */
2311static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) 2337static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
2312{ 2338{
2313 struct mem_dqblk *dm = &dquot->dq_dqb; 2339 struct mem_dqblk *dm = &dquot->dq_dqb;
2314 int check_blim = 0, check_ilim = 0; 2340 int check_blim = 0, check_ilim = 0;
2315 struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type]; 2341 struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type];
2316 2342
2317 if ((di->dqb_valid & QIF_BLIMITS && 2343 if (di->d_fieldmask & ~VFS_FS_DQ_MASK)
2318 (di->dqb_bhardlimit > dqi->dqi_maxblimit || 2344 return -EINVAL;
2319 di->dqb_bsoftlimit > dqi->dqi_maxblimit)) || 2345
2320 (di->dqb_valid & QIF_ILIMITS && 2346 if (((di->d_fieldmask & FS_DQ_BSOFT) &&
2321 (di->dqb_ihardlimit > dqi->dqi_maxilimit || 2347 (di->d_blk_softlimit > dqi->dqi_maxblimit)) ||
2322 di->dqb_isoftlimit > dqi->dqi_maxilimit))) 2348 ((di->d_fieldmask & FS_DQ_BHARD) &&
2349 (di->d_blk_hardlimit > dqi->dqi_maxblimit)) ||
2350 ((di->d_fieldmask & FS_DQ_ISOFT) &&
2351 (di->d_ino_softlimit > dqi->dqi_maxilimit)) ||
2352 ((di->d_fieldmask & FS_DQ_IHARD) &&
2353 (di->d_ino_hardlimit > dqi->dqi_maxilimit)))
2323 return -ERANGE; 2354 return -ERANGE;
2324 2355
2325 spin_lock(&dq_data_lock); 2356 spin_lock(&dq_data_lock);
2326 if (di->dqb_valid & QIF_SPACE) { 2357 if (di->d_fieldmask & FS_DQ_BCOUNT) {
2327 dm->dqb_curspace = di->dqb_curspace - dm->dqb_rsvspace; 2358 dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace;
2328 check_blim = 1; 2359 check_blim = 1;
2329 set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags); 2360 set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
2330 } 2361 }
2331 if (di->dqb_valid & QIF_BLIMITS) { 2362
2332 dm->dqb_bsoftlimit = qbtos(di->dqb_bsoftlimit); 2363 if (di->d_fieldmask & FS_DQ_BSOFT)
2333 dm->dqb_bhardlimit = qbtos(di->dqb_bhardlimit); 2364 dm->dqb_bsoftlimit = qbtos(di->d_blk_softlimit);
2365 if (di->d_fieldmask & FS_DQ_BHARD)
2366 dm->dqb_bhardlimit = qbtos(di->d_blk_hardlimit);
2367 if (di->d_fieldmask & (FS_DQ_BSOFT | FS_DQ_BHARD)) {
2334 check_blim = 1; 2368 check_blim = 1;
2335 set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags); 2369 set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
2336 } 2370 }
2337 if (di->dqb_valid & QIF_INODES) { 2371
2338 dm->dqb_curinodes = di->dqb_curinodes; 2372 if (di->d_fieldmask & FS_DQ_ICOUNT) {
2373 dm->dqb_curinodes = di->d_icount;
2339 check_ilim = 1; 2374 check_ilim = 1;
2340 set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags); 2375 set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
2341 } 2376 }
2342 if (di->dqb_valid & QIF_ILIMITS) { 2377
2343 dm->dqb_isoftlimit = di->dqb_isoftlimit; 2378 if (di->d_fieldmask & FS_DQ_ISOFT)
2344 dm->dqb_ihardlimit = di->dqb_ihardlimit; 2379 dm->dqb_isoftlimit = di->d_ino_softlimit;
2380 if (di->d_fieldmask & FS_DQ_IHARD)
2381 dm->dqb_ihardlimit = di->d_ino_hardlimit;
2382 if (di->d_fieldmask & (FS_DQ_ISOFT | FS_DQ_IHARD)) {
2345 check_ilim = 1; 2383 check_ilim = 1;
2346 set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags); 2384 set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
2347 } 2385 }
2348 if (di->dqb_valid & QIF_BTIME) { 2386
2349 dm->dqb_btime = di->dqb_btime; 2387 if (di->d_fieldmask & FS_DQ_BTIMER) {
2388 dm->dqb_btime = di->d_btimer;
2350 check_blim = 1; 2389 check_blim = 1;
2351 set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags); 2390 set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
2352 } 2391 }
2353 if (di->dqb_valid & QIF_ITIME) { 2392
2354 dm->dqb_itime = di->dqb_itime; 2393 if (di->d_fieldmask & FS_DQ_ITIMER) {
2394 dm->dqb_itime = di->d_itimer;
2355 check_ilim = 1; 2395 check_ilim = 1;
2356 set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); 2396 set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
2357 } 2397 }
@@ -2361,7 +2401,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
2361 dm->dqb_curspace < dm->dqb_bsoftlimit) { 2401 dm->dqb_curspace < dm->dqb_bsoftlimit) {
2362 dm->dqb_btime = 0; 2402 dm->dqb_btime = 0;
2363 clear_bit(DQ_BLKS_B, &dquot->dq_flags); 2403 clear_bit(DQ_BLKS_B, &dquot->dq_flags);
2364 } else if (!(di->dqb_valid & QIF_BTIME)) 2404 } else if (!(di->d_fieldmask & FS_DQ_BTIMER))
2365 /* Set grace only if user hasn't provided his own... */ 2405 /* Set grace only if user hasn't provided his own... */
2366 dm->dqb_btime = get_seconds() + dqi->dqi_bgrace; 2406 dm->dqb_btime = get_seconds() + dqi->dqi_bgrace;
2367 } 2407 }
@@ -2370,7 +2410,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
2370 dm->dqb_curinodes < dm->dqb_isoftlimit) { 2410 dm->dqb_curinodes < dm->dqb_isoftlimit) {
2371 dm->dqb_itime = 0; 2411 dm->dqb_itime = 0;
2372 clear_bit(DQ_INODES_B, &dquot->dq_flags); 2412 clear_bit(DQ_INODES_B, &dquot->dq_flags);
2373 } else if (!(di->dqb_valid & QIF_ITIME)) 2413 } else if (!(di->d_fieldmask & FS_DQ_ITIMER))
2374 /* Set grace only if user hasn't provided his own... */ 2414 /* Set grace only if user hasn't provided his own... */
2375 dm->dqb_itime = get_seconds() + dqi->dqi_igrace; 2415 dm->dqb_itime = get_seconds() + dqi->dqi_igrace;
2376 } 2416 }
@@ -2386,7 +2426,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
2386} 2426}
2387 2427
2388int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, 2428int vfs_set_dqblk(struct super_block *sb, int type, qid_t id,
2389 struct if_dqblk *di) 2429 struct fs_disk_quota *di)
2390{ 2430{
2391 struct dquot *dquot; 2431 struct dquot *dquot;
2392 int rc; 2432 int rc;
@@ -2465,62 +2505,74 @@ const struct quotactl_ops vfs_quotactl_ops = {
2465 .set_dqblk = vfs_set_dqblk 2505 .set_dqblk = vfs_set_dqblk
2466}; 2506};
2467 2507
2508
2509static int do_proc_dqstats(struct ctl_table *table, int write,
2510 void __user *buffer, size_t *lenp, loff_t *ppos)
2511{
2512#ifdef CONFIG_SMP
2513 /* Update global table */
2514 unsigned int type = (int *)table->data - dqstats.stat;
2515 dqstats.stat[type] = dqstats_read(type);
2516#endif
2517 return proc_dointvec(table, write, buffer, lenp, ppos);
2518}
2519
2468static ctl_table fs_dqstats_table[] = { 2520static ctl_table fs_dqstats_table[] = {
2469 { 2521 {
2470 .procname = "lookups", 2522 .procname = "lookups",
2471 .data = &dqstats.lookups, 2523 .data = &dqstats.stat[DQST_LOOKUPS],
2472 .maxlen = sizeof(int), 2524 .maxlen = sizeof(int),
2473 .mode = 0444, 2525 .mode = 0444,
2474 .proc_handler = proc_dointvec, 2526 .proc_handler = do_proc_dqstats,
2475 }, 2527 },
2476 { 2528 {
2477 .procname = "drops", 2529 .procname = "drops",
2478 .data = &dqstats.drops, 2530 .data = &dqstats.stat[DQST_DROPS],
2479 .maxlen = sizeof(int), 2531 .maxlen = sizeof(int),
2480 .mode = 0444, 2532 .mode = 0444,
2481 .proc_handler = proc_dointvec, 2533 .proc_handler = do_proc_dqstats,
2482 }, 2534 },
2483 { 2535 {
2484 .procname = "reads", 2536 .procname = "reads",
2485 .data = &dqstats.reads, 2537 .data = &dqstats.stat[DQST_READS],
2486 .maxlen = sizeof(int), 2538 .maxlen = sizeof(int),
2487 .mode = 0444, 2539 .mode = 0444,
2488 .proc_handler = proc_dointvec, 2540 .proc_handler = do_proc_dqstats,
2489 }, 2541 },
2490 { 2542 {
2491 .procname = "writes", 2543 .procname = "writes",
2492 .data = &dqstats.writes, 2544 .data = &dqstats.stat[DQST_WRITES],
2493 .maxlen = sizeof(int), 2545 .maxlen = sizeof(int),
2494 .mode = 0444, 2546 .mode = 0444,
2495 .proc_handler = proc_dointvec, 2547 .proc_handler = do_proc_dqstats,
2496 }, 2548 },
2497 { 2549 {
2498 .procname = "cache_hits", 2550 .procname = "cache_hits",
2499 .data = &dqstats.cache_hits, 2551 .data = &dqstats.stat[DQST_CACHE_HITS],
2500 .maxlen = sizeof(int), 2552 .maxlen = sizeof(int),
2501 .mode = 0444, 2553 .mode = 0444,
2502 .proc_handler = proc_dointvec, 2554 .proc_handler = do_proc_dqstats,
2503 }, 2555 },
2504 { 2556 {
2505 .procname = "allocated_dquots", 2557 .procname = "allocated_dquots",
2506 .data = &dqstats.allocated_dquots, 2558 .data = &dqstats.stat[DQST_ALLOC_DQUOTS],
2507 .maxlen = sizeof(int), 2559 .maxlen = sizeof(int),
2508 .mode = 0444, 2560 .mode = 0444,
2509 .proc_handler = proc_dointvec, 2561 .proc_handler = do_proc_dqstats,
2510 }, 2562 },
2511 { 2563 {
2512 .procname = "free_dquots", 2564 .procname = "free_dquots",
2513 .data = &dqstats.free_dquots, 2565 .data = &dqstats.stat[DQST_FREE_DQUOTS],
2514 .maxlen = sizeof(int), 2566 .maxlen = sizeof(int),
2515 .mode = 0444, 2567 .mode = 0444,
2516 .proc_handler = proc_dointvec, 2568 .proc_handler = do_proc_dqstats,
2517 }, 2569 },
2518 { 2570 {
2519 .procname = "syncs", 2571 .procname = "syncs",
2520 .data = &dqstats.syncs, 2572 .data = &dqstats.stat[DQST_SYNCS],
2521 .maxlen = sizeof(int), 2573 .maxlen = sizeof(int),
2522 .mode = 0444, 2574 .mode = 0444,
2523 .proc_handler = proc_dointvec, 2575 .proc_handler = do_proc_dqstats,
2524 }, 2576 },
2525#ifdef CONFIG_PRINT_QUOTA_WARNING 2577#ifdef CONFIG_PRINT_QUOTA_WARNING
2526 { 2578 {
@@ -2572,6 +2624,13 @@ static int __init dquot_init(void)
2572 if (!dquot_hash) 2624 if (!dquot_hash)
2573 panic("Cannot create dquot hash table"); 2625 panic("Cannot create dquot hash table");
2574 2626
2627#ifdef CONFIG_SMP
2628 dqstats_pcpu = alloc_percpu(struct dqstats);
2629 if (!dqstats_pcpu)
2630 panic("Cannot create dquot stats table");
2631#endif
2632 memset(&dqstats, 0, sizeof(struct dqstats));
2633
2575 /* Find power-of-two hlist_heads which can fit into allocation */ 2634 /* Find power-of-two hlist_heads which can fit into allocation */
2576 nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head); 2635 nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head);
2577 dq_hash_bits = 0; 2636 dq_hash_bits = 0;
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 95388f9b7356..cfc78826da90 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -113,8 +113,6 @@ static int quota_getinfo(struct super_block *sb, int type, void __user *addr)
113 struct if_dqinfo info; 113 struct if_dqinfo info;
114 int ret; 114 int ret;
115 115
116 if (!sb_has_quota_active(sb, type))
117 return -ESRCH;
118 if (!sb->s_qcop->get_info) 116 if (!sb->s_qcop->get_info)
119 return -ENOSYS; 117 return -ENOSYS;
120 ret = sb->s_qcop->get_info(sb, type, &info); 118 ret = sb->s_qcop->get_info(sb, type, &info);
@@ -129,43 +127,80 @@ static int quota_setinfo(struct super_block *sb, int type, void __user *addr)
129 127
130 if (copy_from_user(&info, addr, sizeof(info))) 128 if (copy_from_user(&info, addr, sizeof(info)))
131 return -EFAULT; 129 return -EFAULT;
132 if (!sb_has_quota_active(sb, type))
133 return -ESRCH;
134 if (!sb->s_qcop->set_info) 130 if (!sb->s_qcop->set_info)
135 return -ENOSYS; 131 return -ENOSYS;
136 return sb->s_qcop->set_info(sb, type, &info); 132 return sb->s_qcop->set_info(sb, type, &info);
137} 133}
138 134
135static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src)
136{
137 dst->dqb_bhardlimit = src->d_blk_hardlimit;
138 dst->dqb_bsoftlimit = src->d_blk_softlimit;
139 dst->dqb_curspace = src->d_bcount;
140 dst->dqb_ihardlimit = src->d_ino_hardlimit;
141 dst->dqb_isoftlimit = src->d_ino_softlimit;
142 dst->dqb_curinodes = src->d_icount;
143 dst->dqb_btime = src->d_btimer;
144 dst->dqb_itime = src->d_itimer;
145 dst->dqb_valid = QIF_ALL;
146}
147
139static int quota_getquota(struct super_block *sb, int type, qid_t id, 148static int quota_getquota(struct super_block *sb, int type, qid_t id,
140 void __user *addr) 149 void __user *addr)
141{ 150{
151 struct fs_disk_quota fdq;
142 struct if_dqblk idq; 152 struct if_dqblk idq;
143 int ret; 153 int ret;
144 154
145 if (!sb_has_quota_active(sb, type))
146 return -ESRCH;
147 if (!sb->s_qcop->get_dqblk) 155 if (!sb->s_qcop->get_dqblk)
148 return -ENOSYS; 156 return -ENOSYS;
149 ret = sb->s_qcop->get_dqblk(sb, type, id, &idq); 157 ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq);
150 if (ret) 158 if (ret)
151 return ret; 159 return ret;
160 copy_to_if_dqblk(&idq, &fdq);
152 if (copy_to_user(addr, &idq, sizeof(idq))) 161 if (copy_to_user(addr, &idq, sizeof(idq)))
153 return -EFAULT; 162 return -EFAULT;
154 return 0; 163 return 0;
155} 164}
156 165
166static void copy_from_if_dqblk(struct fs_disk_quota *dst, struct if_dqblk *src)
167{
168 dst->d_blk_hardlimit = src->dqb_bhardlimit;
169 dst->d_blk_softlimit = src->dqb_bsoftlimit;
170 dst->d_bcount = src->dqb_curspace;
171 dst->d_ino_hardlimit = src->dqb_ihardlimit;
172 dst->d_ino_softlimit = src->dqb_isoftlimit;
173 dst->d_icount = src->dqb_curinodes;
174 dst->d_btimer = src->dqb_btime;
175 dst->d_itimer = src->dqb_itime;
176
177 dst->d_fieldmask = 0;
178 if (src->dqb_valid & QIF_BLIMITS)
179 dst->d_fieldmask |= FS_DQ_BSOFT | FS_DQ_BHARD;
180 if (src->dqb_valid & QIF_SPACE)
181 dst->d_fieldmask |= FS_DQ_BCOUNT;
182 if (src->dqb_valid & QIF_ILIMITS)
183 dst->d_fieldmask |= FS_DQ_ISOFT | FS_DQ_IHARD;
184 if (src->dqb_valid & QIF_INODES)
185 dst->d_fieldmask |= FS_DQ_ICOUNT;
186 if (src->dqb_valid & QIF_BTIME)
187 dst->d_fieldmask |= FS_DQ_BTIMER;
188 if (src->dqb_valid & QIF_ITIME)
189 dst->d_fieldmask |= FS_DQ_ITIMER;
190}
191
157static int quota_setquota(struct super_block *sb, int type, qid_t id, 192static int quota_setquota(struct super_block *sb, int type, qid_t id,
158 void __user *addr) 193 void __user *addr)
159{ 194{
195 struct fs_disk_quota fdq;
160 struct if_dqblk idq; 196 struct if_dqblk idq;
161 197
162 if (copy_from_user(&idq, addr, sizeof(idq))) 198 if (copy_from_user(&idq, addr, sizeof(idq)))
163 return -EFAULT; 199 return -EFAULT;
164 if (!sb_has_quota_active(sb, type))
165 return -ESRCH;
166 if (!sb->s_qcop->set_dqblk) 200 if (!sb->s_qcop->set_dqblk)
167 return -ENOSYS; 201 return -ENOSYS;
168 return sb->s_qcop->set_dqblk(sb, type, id, &idq); 202 copy_from_if_dqblk(&fdq, &idq);
203 return sb->s_qcop->set_dqblk(sb, type, id, &fdq);
169} 204}
170 205
171static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr) 206static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr)
@@ -199,9 +234,9 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id,
199 234
200 if (copy_from_user(&fdq, addr, sizeof(fdq))) 235 if (copy_from_user(&fdq, addr, sizeof(fdq)))
201 return -EFAULT; 236 return -EFAULT;
202 if (!sb->s_qcop->set_xquota) 237 if (!sb->s_qcop->set_dqblk)
203 return -ENOSYS; 238 return -ENOSYS;
204 return sb->s_qcop->set_xquota(sb, type, id, &fdq); 239 return sb->s_qcop->set_dqblk(sb, type, id, &fdq);
205} 240}
206 241
207static int quota_getxquota(struct super_block *sb, int type, qid_t id, 242static int quota_getxquota(struct super_block *sb, int type, qid_t id,
@@ -210,9 +245,9 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id,
210 struct fs_disk_quota fdq; 245 struct fs_disk_quota fdq;
211 int ret; 246 int ret;
212 247
213 if (!sb->s_qcop->get_xquota) 248 if (!sb->s_qcop->get_dqblk)
214 return -ENOSYS; 249 return -ENOSYS;
215 ret = sb->s_qcop->get_xquota(sb, type, id, &fdq); 250 ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq);
216 if (!ret && copy_to_user(addr, &fdq, sizeof(fdq))) 251 if (!ret && copy_to_user(addr, &fdq, sizeof(fdq)))
217 return -EFAULT; 252 return -EFAULT;
218 return ret; 253 return ret;
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index f81f4bcfb178..24f03407eeb5 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -60,9 +60,17 @@ static ssize_t read_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
60static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf) 60static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
61{ 61{
62 struct super_block *sb = info->dqi_sb; 62 struct super_block *sb = info->dqi_sb;
63 ssize_t ret;
63 64
64 return sb->s_op->quota_write(sb, info->dqi_type, buf, 65 ret = sb->s_op->quota_write(sb, info->dqi_type, buf,
65 info->dqi_usable_bs, blk << info->dqi_blocksize_bits); 66 info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
67 if (ret != info->dqi_usable_bs) {
68 q_warn(KERN_WARNING "VFS: dquota write failed on "
69 "dev %s\n", sb->s_id);
70 if (ret >= 0)
71 ret = -EIO;
72 }
73 return ret;
66} 74}
67 75
68/* Remove empty block from list and return it */ 76/* Remove empty block from list and return it */
@@ -152,7 +160,7 @@ static int remove_free_dqentry(struct qtree_mem_dqinfo *info, char *buf,
152 dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0); 160 dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
153 /* No matter whether write succeeds block is out of list */ 161 /* No matter whether write succeeds block is out of list */
154 if (write_blk(info, blk, buf) < 0) 162 if (write_blk(info, blk, buf) < 0)
155 printk(KERN_ERR 163 q_warn(KERN_ERR
156 "VFS: Can't write block (%u) with free entries.\n", 164 "VFS: Can't write block (%u) with free entries.\n",
157 blk); 165 blk);
158 return 0; 166 return 0;
@@ -244,7 +252,7 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
244 if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) { 252 if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) {
245 *err = remove_free_dqentry(info, buf, blk); 253 *err = remove_free_dqentry(info, buf, blk);
246 if (*err < 0) { 254 if (*err < 0) {
247 printk(KERN_ERR "VFS: find_free_dqentry(): Can't " 255 q_warn(KERN_ERR "VFS: find_free_dqentry(): Can't "
248 "remove block (%u) from entry free list.\n", 256 "remove block (%u) from entry free list.\n",
249 blk); 257 blk);
250 goto out_buf; 258 goto out_buf;
@@ -268,7 +276,7 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
268#endif 276#endif
269 *err = write_blk(info, blk, buf); 277 *err = write_blk(info, blk, buf);
270 if (*err < 0) { 278 if (*err < 0) {
271 printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota " 279 q_warn(KERN_ERR "VFS: find_free_dqentry(): Can't write quota "
272 "data block %u.\n", blk); 280 "data block %u.\n", blk);
273 goto out_buf; 281 goto out_buf;
274 } 282 }
@@ -303,7 +311,7 @@ static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
303 } else { 311 } else {
304 ret = read_blk(info, *treeblk, buf); 312 ret = read_blk(info, *treeblk, buf);
305 if (ret < 0) { 313 if (ret < 0) {
306 printk(KERN_ERR "VFS: Can't read tree quota block " 314 q_warn(KERN_ERR "VFS: Can't read tree quota block "
307 "%u.\n", *treeblk); 315 "%u.\n", *treeblk);
308 goto out_buf; 316 goto out_buf;
309 } 317 }
@@ -365,7 +373,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
365 if (!dquot->dq_off) { 373 if (!dquot->dq_off) {
366 ret = dq_insert_tree(info, dquot); 374 ret = dq_insert_tree(info, dquot);
367 if (ret < 0) { 375 if (ret < 0) {
368 printk(KERN_ERR "VFS: Error %zd occurred while " 376 q_warn(KERN_ERR "VFS: Error %zd occurred while "
369 "creating quota.\n", ret); 377 "creating quota.\n", ret);
370 kfree(ddquot); 378 kfree(ddquot);
371 return ret; 379 return ret;
@@ -377,14 +385,14 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
377 ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size, 385 ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size,
378 dquot->dq_off); 386 dquot->dq_off);
379 if (ret != info->dqi_entry_size) { 387 if (ret != info->dqi_entry_size) {
380 printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", 388 q_warn(KERN_WARNING "VFS: dquota write failed on dev %s\n",
381 sb->s_id); 389 sb->s_id);
382 if (ret >= 0) 390 if (ret >= 0)
383 ret = -ENOSPC; 391 ret = -ENOSPC;
384 } else { 392 } else {
385 ret = 0; 393 ret = 0;
386 } 394 }
387 dqstats.writes++; 395 dqstats_inc(DQST_WRITES);
388 kfree(ddquot); 396 kfree(ddquot);
389 397
390 return ret; 398 return ret;
@@ -402,14 +410,14 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
402 if (!buf) 410 if (!buf)
403 return -ENOMEM; 411 return -ENOMEM;
404 if (dquot->dq_off >> info->dqi_blocksize_bits != blk) { 412 if (dquot->dq_off >> info->dqi_blocksize_bits != blk) {
405 printk(KERN_ERR "VFS: Quota structure has offset to other " 413 q_warn(KERN_ERR "VFS: Quota structure has offset to other "
406 "block (%u) than it should (%u).\n", blk, 414 "block (%u) than it should (%u).\n", blk,
407 (uint)(dquot->dq_off >> info->dqi_blocksize_bits)); 415 (uint)(dquot->dq_off >> info->dqi_blocksize_bits));
408 goto out_buf; 416 goto out_buf;
409 } 417 }
410 ret = read_blk(info, blk, buf); 418 ret = read_blk(info, blk, buf);
411 if (ret < 0) { 419 if (ret < 0) {
412 printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk); 420 q_warn(KERN_ERR "VFS: Can't read quota data block %u\n", blk);
413 goto out_buf; 421 goto out_buf;
414 } 422 }
415 dh = (struct qt_disk_dqdbheader *)buf; 423 dh = (struct qt_disk_dqdbheader *)buf;
@@ -419,7 +427,7 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
419 if (ret >= 0) 427 if (ret >= 0)
420 ret = put_free_dqblk(info, buf, blk); 428 ret = put_free_dqblk(info, buf, blk);
421 if (ret < 0) { 429 if (ret < 0) {
422 printk(KERN_ERR "VFS: Can't move quota data block (%u) " 430 q_warn(KERN_ERR "VFS: Can't move quota data block (%u) "
423 "to free list.\n", blk); 431 "to free list.\n", blk);
424 goto out_buf; 432 goto out_buf;
425 } 433 }
@@ -432,14 +440,14 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
432 /* Insert will write block itself */ 440 /* Insert will write block itself */
433 ret = insert_free_dqentry(info, buf, blk); 441 ret = insert_free_dqentry(info, buf, blk);
434 if (ret < 0) { 442 if (ret < 0) {
435 printk(KERN_ERR "VFS: Can't insert quota data " 443 q_warn(KERN_ERR "VFS: Can't insert quota data "
436 "block (%u) to free entry list.\n", blk); 444 "block (%u) to free entry list.\n", blk);
437 goto out_buf; 445 goto out_buf;
438 } 446 }
439 } else { 447 } else {
440 ret = write_blk(info, blk, buf); 448 ret = write_blk(info, blk, buf);
441 if (ret < 0) { 449 if (ret < 0) {
442 printk(KERN_ERR "VFS: Can't write quota data " 450 q_warn(KERN_ERR "VFS: Can't write quota data "
443 "block %u\n", blk); 451 "block %u\n", blk);
444 goto out_buf; 452 goto out_buf;
445 } 453 }
@@ -464,7 +472,7 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
464 return -ENOMEM; 472 return -ENOMEM;
465 ret = read_blk(info, *blk, buf); 473 ret = read_blk(info, *blk, buf);
466 if (ret < 0) { 474 if (ret < 0) {
467 printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk); 475 q_warn(KERN_ERR "VFS: Can't read quota data block %u\n", *blk);
468 goto out_buf; 476 goto out_buf;
469 } 477 }
470 newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); 478 newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
@@ -488,7 +496,7 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
488 } else { 496 } else {
489 ret = write_blk(info, *blk, buf); 497 ret = write_blk(info, *blk, buf);
490 if (ret < 0) 498 if (ret < 0)
491 printk(KERN_ERR "VFS: Can't write quota tree " 499 q_warn(KERN_ERR "VFS: Can't write quota tree "
492 "block %u.\n", *blk); 500 "block %u.\n", *blk);
493 } 501 }
494 } 502 }
@@ -521,7 +529,7 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
521 return -ENOMEM; 529 return -ENOMEM;
522 ret = read_blk(info, blk, buf); 530 ret = read_blk(info, blk, buf);
523 if (ret < 0) { 531 if (ret < 0) {
524 printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); 532 q_warn(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
525 goto out_buf; 533 goto out_buf;
526 } 534 }
527 ddquot = buf + sizeof(struct qt_disk_dqdbheader); 535 ddquot = buf + sizeof(struct qt_disk_dqdbheader);
@@ -531,7 +539,7 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
531 ddquot += info->dqi_entry_size; 539 ddquot += info->dqi_entry_size;
532 } 540 }
533 if (i == qtree_dqstr_in_blk(info)) { 541 if (i == qtree_dqstr_in_blk(info)) {
534 printk(KERN_ERR "VFS: Quota for id %u referenced " 542 q_warn(KERN_ERR "VFS: Quota for id %u referenced "
535 "but not present.\n", dquot->dq_id); 543 "but not present.\n", dquot->dq_id);
536 ret = -EIO; 544 ret = -EIO;
537 goto out_buf; 545 goto out_buf;
@@ -556,7 +564,7 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info,
556 return -ENOMEM; 564 return -ENOMEM;
557 ret = read_blk(info, blk, buf); 565 ret = read_blk(info, blk, buf);
558 if (ret < 0) { 566 if (ret < 0) {
559 printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); 567 q_warn(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
560 goto out_buf; 568 goto out_buf;
561 } 569 }
562 ret = 0; 570 ret = 0;
@@ -599,7 +607,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
599 offset = find_dqentry(info, dquot); 607 offset = find_dqentry(info, dquot);
600 if (offset <= 0) { /* Entry not present? */ 608 if (offset <= 0) { /* Entry not present? */
601 if (offset < 0) 609 if (offset < 0)
602 printk(KERN_ERR "VFS: Can't read quota " 610 q_warn(KERN_ERR "VFS: Can't read quota "
603 "structure for id %u.\n", dquot->dq_id); 611 "structure for id %u.\n", dquot->dq_id);
604 dquot->dq_off = 0; 612 dquot->dq_off = 0;
605 set_bit(DQ_FAKE_B, &dquot->dq_flags); 613 set_bit(DQ_FAKE_B, &dquot->dq_flags);
@@ -617,7 +625,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
617 if (ret != info->dqi_entry_size) { 625 if (ret != info->dqi_entry_size) {
618 if (ret >= 0) 626 if (ret >= 0)
619 ret = -EIO; 627 ret = -EIO;
620 printk(KERN_ERR "VFS: Error while reading quota " 628 q_warn(KERN_ERR "VFS: Error while reading quota "
621 "structure for id %u.\n", dquot->dq_id); 629 "structure for id %u.\n", dquot->dq_id);
622 set_bit(DQ_FAKE_B, &dquot->dq_flags); 630 set_bit(DQ_FAKE_B, &dquot->dq_flags);
623 memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); 631 memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
@@ -634,7 +642,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
634 spin_unlock(&dq_data_lock); 642 spin_unlock(&dq_data_lock);
635 kfree(ddquot); 643 kfree(ddquot);
636out: 644out:
637 dqstats.reads++; 645 dqstats_inc(DQST_READS);
638 return ret; 646 return ret;
639} 647}
640EXPORT_SYMBOL(qtree_read_dquot); 648EXPORT_SYMBOL(qtree_read_dquot);
diff --git a/fs/quota/quota_tree.h b/fs/quota/quota_tree.h
index a1ab8db81a51..ccc3e71fb1d8 100644
--- a/fs/quota/quota_tree.h
+++ b/fs/quota/quota_tree.h
@@ -22,4 +22,10 @@ struct qt_disk_dqdbheader {
22 22
23#define QT_TREEOFF 1 /* Offset of tree in file in blocks */ 23#define QT_TREEOFF 1 /* Offset of tree in file in blocks */
24 24
25#define q_warn(fmt, args...) \
26do { \
27 if (printk_ratelimit()) \
28 printk(fmt, ## args); \
29} while(0)
30
25#endif /* _LINUX_QUOTAIO_TREE_H */ 31#endif /* _LINUX_QUOTAIO_TREE_H */
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
index 2ae757e9c008..4af344c5852a 100644
--- a/fs/quota/quota_v1.c
+++ b/fs/quota/quota_v1.c
@@ -71,7 +71,7 @@ static int v1_read_dqblk(struct dquot *dquot)
71 dquot->dq_dqb.dqb_ihardlimit == 0 && 71 dquot->dq_dqb.dqb_ihardlimit == 0 &&
72 dquot->dq_dqb.dqb_isoftlimit == 0) 72 dquot->dq_dqb.dqb_isoftlimit == 0)
73 set_bit(DQ_FAKE_B, &dquot->dq_flags); 73 set_bit(DQ_FAKE_B, &dquot->dq_flags);
74 dqstats.reads++; 74 dqstats_inc(DQST_READS);
75 75
76 return 0; 76 return 0;
77} 77}
@@ -104,7 +104,7 @@ static int v1_commit_dqblk(struct dquot *dquot)
104 ret = 0; 104 ret = 0;
105 105
106out: 106out:
107 dqstats.writes++; 107 dqstats_inc(DQST_WRITES);
108 108
109 return ret; 109 return ret;
110} 110}
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index e3da02f4986f..135206af1458 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -63,7 +63,7 @@ static int v2_read_header(struct super_block *sb, int type,
63 size = sb->s_op->quota_read(sb, type, (char *)dqhead, 63 size = sb->s_op->quota_read(sb, type, (char *)dqhead,
64 sizeof(struct v2_disk_dqheader), 0); 64 sizeof(struct v2_disk_dqheader), 0);
65 if (size != sizeof(struct v2_disk_dqheader)) { 65 if (size != sizeof(struct v2_disk_dqheader)) {
66 printk(KERN_WARNING "quota_v2: Failed header read:" 66 q_warn(KERN_WARNING "quota_v2: Failed header read:"
67 " expected=%zd got=%zd\n", 67 " expected=%zd got=%zd\n",
68 sizeof(struct v2_disk_dqheader), size); 68 sizeof(struct v2_disk_dqheader), size);
69 return 0; 69 return 0;
@@ -106,7 +106,7 @@ static int v2_read_file_info(struct super_block *sb, int type)
106 size = sb->s_op->quota_read(sb, type, (char *)&dinfo, 106 size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
107 sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); 107 sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
108 if (size != sizeof(struct v2_disk_dqinfo)) { 108 if (size != sizeof(struct v2_disk_dqinfo)) {
109 printk(KERN_WARNING "quota_v2: Can't read info structure on device %s.\n", 109 q_warn(KERN_WARNING "quota_v2: Can't read info structure on device %s.\n",
110 sb->s_id); 110 sb->s_id);
111 return -1; 111 return -1;
112 } 112 }
@@ -167,7 +167,7 @@ static int v2_write_file_info(struct super_block *sb, int type)
167 size = sb->s_op->quota_write(sb, type, (char *)&dinfo, 167 size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
168 sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); 168 sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
169 if (size != sizeof(struct v2_disk_dqinfo)) { 169 if (size != sizeof(struct v2_disk_dqinfo)) {
170 printk(KERN_WARNING "Can't write info structure on device %s.\n", 170 q_warn(KERN_WARNING "Can't write info structure on device %s.\n",
171 sb->s_id); 171 sb->s_id);
172 return -1; 172 return -1;
173 } 173 }
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index c94853473ca9..f47cd212dee1 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -214,7 +214,7 @@ static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts)
214 return 0; 214 return 0;
215} 215}
216 216
217static int ramfs_fill_super(struct super_block * sb, void * data, int silent) 217int ramfs_fill_super(struct super_block *sb, void *data, int silent)
218{ 218{
219 struct ramfs_fs_info *fsi; 219 struct ramfs_fs_info *fsi;
220 struct inode *inode = NULL; 220 struct inode *inode = NULL;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index dc2c65e04853..0f22fdaf54ac 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3076,9 +3076,10 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3076 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); 3076 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
3077 3077
3078 depth = reiserfs_write_lock_once(inode->i_sb); 3078 depth = reiserfs_write_lock_once(inode->i_sb);
3079 if (attr->ia_valid & ATTR_SIZE) { 3079 if (is_quota_modification(inode, attr))
3080 dquot_initialize(inode); 3080 dquot_initialize(inode);
3081 3081
3082 if (attr->ia_valid & ATTR_SIZE) {
3082 /* version 2 items will be caught by the s_maxbytes check 3083 /* version 2 items will be caught by the s_maxbytes check
3083 ** done for us in vmtruncate 3084 ** done for us in vmtruncate
3084 */ 3085 */
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index e9d293593e52..4e321f7353fa 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -46,9 +46,9 @@ struct bin_buffer {
46}; 46};
47 47
48static int 48static int
49fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count) 49fill_read(struct file *file, char *buffer, loff_t off, size_t count)
50{ 50{
51 struct sysfs_dirent *attr_sd = dentry->d_fsdata; 51 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
52 struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; 52 struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
53 struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; 53 struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
54 int rc; 54 int rc;
@@ -59,7 +59,7 @@ fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
59 59
60 rc = -EIO; 60 rc = -EIO;
61 if (attr->read) 61 if (attr->read)
62 rc = attr->read(kobj, attr, buffer, off, count); 62 rc = attr->read(file, kobj, attr, buffer, off, count);
63 63
64 sysfs_put_active(attr_sd); 64 sysfs_put_active(attr_sd);
65 65
@@ -70,8 +70,7 @@ static ssize_t
70read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) 70read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
71{ 71{
72 struct bin_buffer *bb = file->private_data; 72 struct bin_buffer *bb = file->private_data;
73 struct dentry *dentry = file->f_path.dentry; 73 int size = file->f_path.dentry->d_inode->i_size;
74 int size = dentry->d_inode->i_size;
75 loff_t offs = *off; 74 loff_t offs = *off;
76 int count = min_t(size_t, bytes, PAGE_SIZE); 75 int count = min_t(size_t, bytes, PAGE_SIZE);
77 char *temp; 76 char *temp;
@@ -92,7 +91,7 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
92 91
93 mutex_lock(&bb->mutex); 92 mutex_lock(&bb->mutex);
94 93
95 count = fill_read(dentry, bb->buffer, offs, count); 94 count = fill_read(file, bb->buffer, offs, count);
96 if (count < 0) { 95 if (count < 0) {
97 mutex_unlock(&bb->mutex); 96 mutex_unlock(&bb->mutex);
98 goto out_free; 97 goto out_free;
@@ -117,9 +116,9 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
117} 116}
118 117
119static int 118static int
120flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count) 119flush_write(struct file *file, char *buffer, loff_t offset, size_t count)
121{ 120{
122 struct sysfs_dirent *attr_sd = dentry->d_fsdata; 121 struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
123 struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; 122 struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
124 struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; 123 struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
125 int rc; 124 int rc;
@@ -130,7 +129,7 @@ flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
130 129
131 rc = -EIO; 130 rc = -EIO;
132 if (attr->write) 131 if (attr->write)
133 rc = attr->write(kobj, attr, buffer, offset, count); 132 rc = attr->write(file, kobj, attr, buffer, offset, count);
134 133
135 sysfs_put_active(attr_sd); 134 sysfs_put_active(attr_sd);
136 135
@@ -141,8 +140,7 @@ static ssize_t write(struct file *file, const char __user *userbuf,
141 size_t bytes, loff_t *off) 140 size_t bytes, loff_t *off)
142{ 141{
143 struct bin_buffer *bb = file->private_data; 142 struct bin_buffer *bb = file->private_data;
144 struct dentry *dentry = file->f_path.dentry; 143 int size = file->f_path.dentry->d_inode->i_size;
145 int size = dentry->d_inode->i_size;
146 loff_t offs = *off; 144 loff_t offs = *off;
147 int count = min_t(size_t, bytes, PAGE_SIZE); 145 int count = min_t(size_t, bytes, PAGE_SIZE);
148 char *temp; 146 char *temp;
@@ -165,7 +163,7 @@ static ssize_t write(struct file *file, const char __user *userbuf,
165 163
166 memcpy(bb->buffer, temp, count); 164 memcpy(bb->buffer, temp, count);
167 165
168 count = flush_write(dentry, bb->buffer, offs, count); 166 count = flush_write(file, bb->buffer, offs, count);
169 mutex_unlock(&bb->mutex); 167 mutex_unlock(&bb->mutex);
170 168
171 if (count > 0) 169 if (count > 0)
@@ -363,7 +361,7 @@ static int mmap(struct file *file, struct vm_area_struct *vma)
363 if (!attr->mmap) 361 if (!attr->mmap)
364 goto out_put; 362 goto out_put;
365 363
366 rc = attr->mmap(kobj, attr, vma); 364 rc = attr->mmap(file, kobj, attr, vma);
367 if (rc) 365 if (rc)
368 goto out_put; 366 goto out_put;
369 367
@@ -501,7 +499,7 @@ int sysfs_create_bin_file(struct kobject *kobj,
501void sysfs_remove_bin_file(struct kobject *kobj, 499void sysfs_remove_bin_file(struct kobject *kobj,
502 const struct bin_attribute *attr) 500 const struct bin_attribute *attr)
503{ 501{
504 sysfs_hash_and_remove(kobj->sd, attr->attr.name); 502 sysfs_hash_and_remove(kobj->sd, NULL, attr->attr.name);
505} 503}
506 504
507EXPORT_SYMBOL_GPL(sysfs_create_bin_file); 505EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 590717861c7a..7e54bac8c4b0 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -380,7 +380,7 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
380{ 380{
381 struct sysfs_inode_attrs *ps_iattr; 381 struct sysfs_inode_attrs *ps_iattr;
382 382
383 if (sysfs_find_dirent(acxt->parent_sd, sd->s_name)) 383 if (sysfs_find_dirent(acxt->parent_sd, sd->s_ns, sd->s_name))
384 return -EEXIST; 384 return -EEXIST;
385 385
386 sd->s_parent = sysfs_get(acxt->parent_sd); 386 sd->s_parent = sysfs_get(acxt->parent_sd);
@@ -533,13 +533,17 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
533 * Pointer to sysfs_dirent if found, NULL if not. 533 * Pointer to sysfs_dirent if found, NULL if not.
534 */ 534 */
535struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, 535struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
536 const void *ns,
536 const unsigned char *name) 537 const unsigned char *name)
537{ 538{
538 struct sysfs_dirent *sd; 539 struct sysfs_dirent *sd;
539 540
540 for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) 541 for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) {
542 if (ns && sd->s_ns && (sd->s_ns != ns))
543 continue;
541 if (!strcmp(sd->s_name, name)) 544 if (!strcmp(sd->s_name, name))
542 return sd; 545 return sd;
546 }
543 return NULL; 547 return NULL;
544} 548}
545 549
@@ -558,12 +562,13 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
558 * Pointer to sysfs_dirent if found, NULL if not. 562 * Pointer to sysfs_dirent if found, NULL if not.
559 */ 563 */
560struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, 564struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
565 const void *ns,
561 const unsigned char *name) 566 const unsigned char *name)
562{ 567{
563 struct sysfs_dirent *sd; 568 struct sysfs_dirent *sd;
564 569
565 mutex_lock(&sysfs_mutex); 570 mutex_lock(&sysfs_mutex);
566 sd = sysfs_find_dirent(parent_sd, name); 571 sd = sysfs_find_dirent(parent_sd, ns, name);
567 sysfs_get(sd); 572 sysfs_get(sd);
568 mutex_unlock(&sysfs_mutex); 573 mutex_unlock(&sysfs_mutex);
569 574
@@ -572,7 +577,8 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
572EXPORT_SYMBOL_GPL(sysfs_get_dirent); 577EXPORT_SYMBOL_GPL(sysfs_get_dirent);
573 578
574static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, 579static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
575 const char *name, struct sysfs_dirent **p_sd) 580 enum kobj_ns_type type, const void *ns, const char *name,
581 struct sysfs_dirent **p_sd)
576{ 582{
577 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; 583 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
578 struct sysfs_addrm_cxt acxt; 584 struct sysfs_addrm_cxt acxt;
@@ -583,6 +589,9 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
583 sd = sysfs_new_dirent(name, mode, SYSFS_DIR); 589 sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
584 if (!sd) 590 if (!sd)
585 return -ENOMEM; 591 return -ENOMEM;
592
593 sd->s_flags |= (type << SYSFS_NS_TYPE_SHIFT);
594 sd->s_ns = ns;
586 sd->s_dir.kobj = kobj; 595 sd->s_dir.kobj = kobj;
587 596
588 /* link in */ 597 /* link in */
@@ -601,7 +610,33 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
601int sysfs_create_subdir(struct kobject *kobj, const char *name, 610int sysfs_create_subdir(struct kobject *kobj, const char *name,
602 struct sysfs_dirent **p_sd) 611 struct sysfs_dirent **p_sd)
603{ 612{
604 return create_dir(kobj, kobj->sd, name, p_sd); 613 return create_dir(kobj, kobj->sd,
614 KOBJ_NS_TYPE_NONE, NULL, name, p_sd);
615}
616
617/**
618 * sysfs_read_ns_type: return associated ns_type
619 * @kobj: the kobject being queried
620 *
621 * Each kobject can be tagged with exactly one namespace type
622 * (i.e. network or user). Return the ns_type associated with
623 * this object if any
624 */
625static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj)
626{
627 const struct kobj_ns_type_operations *ops;
628 enum kobj_ns_type type;
629
630 ops = kobj_child_ns_ops(kobj);
631 if (!ops)
632 return KOBJ_NS_TYPE_NONE;
633
634 type = ops->type;
635 BUG_ON(type <= KOBJ_NS_TYPE_NONE);
636 BUG_ON(type >= KOBJ_NS_TYPES);
637 BUG_ON(!kobj_ns_type_registered(type));
638
639 return type;
605} 640}
606 641
607/** 642/**
@@ -610,7 +645,9 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name,
610 */ 645 */
611int sysfs_create_dir(struct kobject * kobj) 646int sysfs_create_dir(struct kobject * kobj)
612{ 647{
648 enum kobj_ns_type type;
613 struct sysfs_dirent *parent_sd, *sd; 649 struct sysfs_dirent *parent_sd, *sd;
650 const void *ns = NULL;
614 int error = 0; 651 int error = 0;
615 652
616 BUG_ON(!kobj); 653 BUG_ON(!kobj);
@@ -620,7 +657,11 @@ int sysfs_create_dir(struct kobject * kobj)
620 else 657 else
621 parent_sd = &sysfs_root; 658 parent_sd = &sysfs_root;
622 659
623 error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd); 660 if (sysfs_ns_type(parent_sd))
661 ns = kobj->ktype->namespace(kobj);
662 type = sysfs_read_ns_type(kobj);
663
664 error = create_dir(kobj, parent_sd, type, ns, kobject_name(kobj), &sd);
624 if (!error) 665 if (!error)
625 kobj->sd = sd; 666 kobj->sd = sd;
626 return error; 667 return error;
@@ -630,13 +671,19 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
630 struct nameidata *nd) 671 struct nameidata *nd)
631{ 672{
632 struct dentry *ret = NULL; 673 struct dentry *ret = NULL;
633 struct sysfs_dirent *parent_sd = dentry->d_parent->d_fsdata; 674 struct dentry *parent = dentry->d_parent;
675 struct sysfs_dirent *parent_sd = parent->d_fsdata;
634 struct sysfs_dirent *sd; 676 struct sysfs_dirent *sd;
635 struct inode *inode; 677 struct inode *inode;
678 enum kobj_ns_type type;
679 const void *ns;
636 680
637 mutex_lock(&sysfs_mutex); 681 mutex_lock(&sysfs_mutex);
638 682
639 sd = sysfs_find_dirent(parent_sd, dentry->d_name.name); 683 type = sysfs_ns_type(parent_sd);
684 ns = sysfs_info(dir->i_sb)->ns[type];
685
686 sd = sysfs_find_dirent(parent_sd, ns, dentry->d_name.name);
640 687
641 /* no such entry */ 688 /* no such entry */
642 if (!sd) { 689 if (!sd) {
@@ -735,7 +782,8 @@ void sysfs_remove_dir(struct kobject * kobj)
735} 782}
736 783
737int sysfs_rename(struct sysfs_dirent *sd, 784int sysfs_rename(struct sysfs_dirent *sd,
738 struct sysfs_dirent *new_parent_sd, const char *new_name) 785 struct sysfs_dirent *new_parent_sd, const void *new_ns,
786 const char *new_name)
739{ 787{
740 const char *dup_name = NULL; 788 const char *dup_name = NULL;
741 int error; 789 int error;
@@ -743,12 +791,12 @@ int sysfs_rename(struct sysfs_dirent *sd,
743 mutex_lock(&sysfs_mutex); 791 mutex_lock(&sysfs_mutex);
744 792
745 error = 0; 793 error = 0;
746 if ((sd->s_parent == new_parent_sd) && 794 if ((sd->s_parent == new_parent_sd) && (sd->s_ns == new_ns) &&
747 (strcmp(sd->s_name, new_name) == 0)) 795 (strcmp(sd->s_name, new_name) == 0))
748 goto out; /* nothing to rename */ 796 goto out; /* nothing to rename */
749 797
750 error = -EEXIST; 798 error = -EEXIST;
751 if (sysfs_find_dirent(new_parent_sd, new_name)) 799 if (sysfs_find_dirent(new_parent_sd, new_ns, new_name))
752 goto out; 800 goto out;
753 801
754 /* rename sysfs_dirent */ 802 /* rename sysfs_dirent */
@@ -770,6 +818,7 @@ int sysfs_rename(struct sysfs_dirent *sd,
770 sd->s_parent = new_parent_sd; 818 sd->s_parent = new_parent_sd;
771 sysfs_link_sibling(sd); 819 sysfs_link_sibling(sd);
772 } 820 }
821 sd->s_ns = new_ns;
773 822
774 error = 0; 823 error = 0;
775 out: 824 out:
@@ -780,19 +829,28 @@ int sysfs_rename(struct sysfs_dirent *sd,
780 829
781int sysfs_rename_dir(struct kobject *kobj, const char *new_name) 830int sysfs_rename_dir(struct kobject *kobj, const char *new_name)
782{ 831{
783 return sysfs_rename(kobj->sd, kobj->sd->s_parent, new_name); 832 struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
833 const void *new_ns = NULL;
834
835 if (sysfs_ns_type(parent_sd))
836 new_ns = kobj->ktype->namespace(kobj);
837
838 return sysfs_rename(kobj->sd, parent_sd, new_ns, new_name);
784} 839}
785 840
786int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) 841int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
787{ 842{
788 struct sysfs_dirent *sd = kobj->sd; 843 struct sysfs_dirent *sd = kobj->sd;
789 struct sysfs_dirent *new_parent_sd; 844 struct sysfs_dirent *new_parent_sd;
845 const void *new_ns = NULL;
790 846
791 BUG_ON(!sd->s_parent); 847 BUG_ON(!sd->s_parent);
848 if (sysfs_ns_type(sd->s_parent))
849 new_ns = kobj->ktype->namespace(kobj);
792 new_parent_sd = new_parent_kobj && new_parent_kobj->sd ? 850 new_parent_sd = new_parent_kobj && new_parent_kobj->sd ?
793 new_parent_kobj->sd : &sysfs_root; 851 new_parent_kobj->sd : &sysfs_root;
794 852
795 return sysfs_rename(sd, new_parent_sd, sd->s_name); 853 return sysfs_rename(sd, new_parent_sd, new_ns, sd->s_name);
796} 854}
797 855
798/* Relationship between s_mode and the DT_xxx types */ 856/* Relationship between s_mode and the DT_xxx types */
@@ -807,32 +865,35 @@ static int sysfs_dir_release(struct inode *inode, struct file *filp)
807 return 0; 865 return 0;
808} 866}
809 867
810static struct sysfs_dirent *sysfs_dir_pos(struct sysfs_dirent *parent_sd, 868static struct sysfs_dirent *sysfs_dir_pos(const void *ns,
811 ino_t ino, struct sysfs_dirent *pos) 869 struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos)
812{ 870{
813 if (pos) { 871 if (pos) {
814 int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) && 872 int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) &&
815 pos->s_parent == parent_sd && 873 pos->s_parent == parent_sd &&
816 ino == pos->s_ino; 874 ino == pos->s_ino;
817 sysfs_put(pos); 875 sysfs_put(pos);
818 if (valid) 876 if (!valid)
819 return pos; 877 pos = NULL;
820 } 878 }
821 pos = NULL; 879 if (!pos && (ino > 1) && (ino < INT_MAX)) {
822 if ((ino > 1) && (ino < INT_MAX)) {
823 pos = parent_sd->s_dir.children; 880 pos = parent_sd->s_dir.children;
824 while (pos && (ino > pos->s_ino)) 881 while (pos && (ino > pos->s_ino))
825 pos = pos->s_sibling; 882 pos = pos->s_sibling;
826 } 883 }
884 while (pos && pos->s_ns && pos->s_ns != ns)
885 pos = pos->s_sibling;
827 return pos; 886 return pos;
828} 887}
829 888
830static struct sysfs_dirent *sysfs_dir_next_pos(struct sysfs_dirent *parent_sd, 889static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns,
831 ino_t ino, struct sysfs_dirent *pos) 890 struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos)
832{ 891{
833 pos = sysfs_dir_pos(parent_sd, ino, pos); 892 pos = sysfs_dir_pos(ns, parent_sd, ino, pos);
834 if (pos) 893 if (pos)
835 pos = pos->s_sibling; 894 pos = pos->s_sibling;
895 while (pos && pos->s_ns && pos->s_ns != ns)
896 pos = pos->s_sibling;
836 return pos; 897 return pos;
837} 898}
838 899
@@ -841,8 +902,13 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
841 struct dentry *dentry = filp->f_path.dentry; 902 struct dentry *dentry = filp->f_path.dentry;
842 struct sysfs_dirent * parent_sd = dentry->d_fsdata; 903 struct sysfs_dirent * parent_sd = dentry->d_fsdata;
843 struct sysfs_dirent *pos = filp->private_data; 904 struct sysfs_dirent *pos = filp->private_data;
905 enum kobj_ns_type type;
906 const void *ns;
844 ino_t ino; 907 ino_t ino;
845 908
909 type = sysfs_ns_type(parent_sd);
910 ns = sysfs_info(dentry->d_sb)->ns[type];
911
846 if (filp->f_pos == 0) { 912 if (filp->f_pos == 0) {
847 ino = parent_sd->s_ino; 913 ino = parent_sd->s_ino;
848 if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0) 914 if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0)
@@ -857,9 +923,9 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
857 filp->f_pos++; 923 filp->f_pos++;
858 } 924 }
859 mutex_lock(&sysfs_mutex); 925 mutex_lock(&sysfs_mutex);
860 for (pos = sysfs_dir_pos(parent_sd, filp->f_pos, pos); 926 for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos);
861 pos; 927 pos;
862 pos = sysfs_dir_next_pos(parent_sd, filp->f_pos, pos)) { 928 pos = sysfs_dir_next_pos(ns, parent_sd, filp->f_pos, pos)) {
863 const char * name; 929 const char * name;
864 unsigned int type; 930 unsigned int type;
865 int len, ret; 931 int len, ret;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index e222b2582746..1beaa739d0a6 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -478,9 +478,12 @@ void sysfs_notify(struct kobject *k, const char *dir, const char *attr)
478 mutex_lock(&sysfs_mutex); 478 mutex_lock(&sysfs_mutex);
479 479
480 if (sd && dir) 480 if (sd && dir)
481 sd = sysfs_find_dirent(sd, dir); 481 /* Only directories are tagged, so no need to pass
482 * a tag explicitly.
483 */
484 sd = sysfs_find_dirent(sd, NULL, dir);
482 if (sd && attr) 485 if (sd && attr)
483 sd = sysfs_find_dirent(sd, attr); 486 sd = sysfs_find_dirent(sd, NULL, attr);
484 if (sd) 487 if (sd)
485 sysfs_notify_dirent(sd); 488 sysfs_notify_dirent(sd);
486 489
@@ -569,7 +572,7 @@ int sysfs_add_file_to_group(struct kobject *kobj,
569 int error; 572 int error;
570 573
571 if (group) 574 if (group)
572 dir_sd = sysfs_get_dirent(kobj->sd, group); 575 dir_sd = sysfs_get_dirent(kobj->sd, NULL, group);
573 else 576 else
574 dir_sd = sysfs_get(kobj->sd); 577 dir_sd = sysfs_get(kobj->sd);
575 578
@@ -599,7 +602,7 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
599 mutex_lock(&sysfs_mutex); 602 mutex_lock(&sysfs_mutex);
600 603
601 rc = -ENOENT; 604 rc = -ENOENT;
602 sd = sysfs_find_dirent(kobj->sd, attr->name); 605 sd = sysfs_find_dirent(kobj->sd, NULL, attr->name);
603 if (!sd) 606 if (!sd)
604 goto out; 607 goto out;
605 608
@@ -624,7 +627,7 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
624 627
625void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) 628void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
626{ 629{
627 sysfs_hash_and_remove(kobj->sd, attr->name); 630 sysfs_hash_and_remove(kobj->sd, NULL, attr->name);
628} 631}
629 632
630void sysfs_remove_files(struct kobject * kobj, const struct attribute **ptr) 633void sysfs_remove_files(struct kobject * kobj, const struct attribute **ptr)
@@ -646,11 +649,11 @@ void sysfs_remove_file_from_group(struct kobject *kobj,
646 struct sysfs_dirent *dir_sd; 649 struct sysfs_dirent *dir_sd;
647 650
648 if (group) 651 if (group)
649 dir_sd = sysfs_get_dirent(kobj->sd, group); 652 dir_sd = sysfs_get_dirent(kobj->sd, NULL, group);
650 else 653 else
651 dir_sd = sysfs_get(kobj->sd); 654 dir_sd = sysfs_get(kobj->sd);
652 if (dir_sd) { 655 if (dir_sd) {
653 sysfs_hash_and_remove(dir_sd, attr->name); 656 sysfs_hash_and_remove(dir_sd, NULL, attr->name);
654 sysfs_put(dir_sd); 657 sysfs_put(dir_sd);
655 } 658 }
656} 659}
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index fe611949a7f7..23c1e598792a 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -23,7 +23,7 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
23 int i; 23 int i;
24 24
25 for (i = 0, attr = grp->attrs; *attr; i++, attr++) 25 for (i = 0, attr = grp->attrs; *attr; i++, attr++)
26 sysfs_hash_and_remove(dir_sd, (*attr)->name); 26 sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
27} 27}
28 28
29static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, 29static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
@@ -39,7 +39,7 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
39 * visibility. Do this by first removing then 39 * visibility. Do this by first removing then
40 * re-adding (if required) the file */ 40 * re-adding (if required) the file */
41 if (update) 41 if (update)
42 sysfs_hash_and_remove(dir_sd, (*attr)->name); 42 sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
43 if (grp->is_visible) { 43 if (grp->is_visible) {
44 mode = grp->is_visible(kobj, *attr, i); 44 mode = grp->is_visible(kobj, *attr, i);
45 if (!mode) 45 if (!mode)
@@ -132,7 +132,7 @@ void sysfs_remove_group(struct kobject * kobj,
132 struct sysfs_dirent *sd; 132 struct sysfs_dirent *sd;
133 133
134 if (grp->name) { 134 if (grp->name) {
135 sd = sysfs_get_dirent(dir_sd, grp->name); 135 sd = sysfs_get_dirent(dir_sd, NULL, grp->name);
136 if (!sd) { 136 if (!sd) {
137 WARN(!sd, KERN_WARNING "sysfs group %p not found for " 137 WARN(!sd, KERN_WARNING "sysfs group %p not found for "
138 "kobject '%s'\n", grp, kobject_name(kobj)); 138 "kobject '%s'\n", grp, kobject_name(kobj));
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index a4a0a9419711..bbd77e95cf7f 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -324,7 +324,7 @@ void sysfs_delete_inode(struct inode *inode)
324 sysfs_put(sd); 324 sysfs_put(sd);
325} 325}
326 326
327int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name) 327int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const char *name)
328{ 328{
329 struct sysfs_addrm_cxt acxt; 329 struct sysfs_addrm_cxt acxt;
330 struct sysfs_dirent *sd; 330 struct sysfs_dirent *sd;
@@ -334,7 +334,9 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
334 334
335 sysfs_addrm_start(&acxt, dir_sd); 335 sysfs_addrm_start(&acxt, dir_sd);
336 336
337 sd = sysfs_find_dirent(dir_sd, name); 337 sd = sysfs_find_dirent(dir_sd, ns, name);
338 if (sd && (sd->s_ns != ns))
339 sd = NULL;
338 if (sd) 340 if (sd)
339 sysfs_remove_one(&acxt, sd); 341 sysfs_remove_one(&acxt, sd);
340 342
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 776137828dca..281c0c9bc39f 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -35,7 +35,7 @@ static const struct super_operations sysfs_ops = {
35struct sysfs_dirent sysfs_root = { 35struct sysfs_dirent sysfs_root = {
36 .s_name = "", 36 .s_name = "",
37 .s_count = ATOMIC_INIT(1), 37 .s_count = ATOMIC_INIT(1),
38 .s_flags = SYSFS_DIR, 38 .s_flags = SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT),
39 .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, 39 .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
40 .s_ino = 1, 40 .s_ino = 1,
41}; 41};
@@ -72,18 +72,107 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
72 return 0; 72 return 0;
73} 73}
74 74
75static int sysfs_test_super(struct super_block *sb, void *data)
76{
77 struct sysfs_super_info *sb_info = sysfs_info(sb);
78 struct sysfs_super_info *info = data;
79 enum kobj_ns_type type;
80 int found = 1;
81
82 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) {
83 if (sb_info->ns[type] != info->ns[type])
84 found = 0;
85 }
86 return found;
87}
88
89static int sysfs_set_super(struct super_block *sb, void *data)
90{
91 int error;
92 error = set_anon_super(sb, data);
93 if (!error)
94 sb->s_fs_info = data;
95 return error;
96}
97
75static int sysfs_get_sb(struct file_system_type *fs_type, 98static int sysfs_get_sb(struct file_system_type *fs_type,
76 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 99 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
77{ 100{
78 return get_sb_single(fs_type, flags, data, sysfs_fill_super, mnt); 101 struct sysfs_super_info *info;
102 enum kobj_ns_type type;
103 struct super_block *sb;
104 int error;
105
106 error = -ENOMEM;
107 info = kzalloc(sizeof(*info), GFP_KERNEL);
108 if (!info)
109 goto out;
110
111 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
112 info->ns[type] = kobj_ns_current(type);
113
114 sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info);
115 if (IS_ERR(sb) || sb->s_fs_info != info)
116 kfree(info);
117 if (IS_ERR(sb)) {
118 error = PTR_ERR(sb);
119 goto out;
120 }
121 if (!sb->s_root) {
122 sb->s_flags = flags;
123 error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
124 if (error) {
125 deactivate_locked_super(sb);
126 goto out;
127 }
128 sb->s_flags |= MS_ACTIVE;
129 }
130
131 simple_set_mnt(mnt, sb);
132 error = 0;
133out:
134 return error;
135}
136
137static void sysfs_kill_sb(struct super_block *sb)
138{
139 struct sysfs_super_info *info = sysfs_info(sb);
140
141 /* Remove the superblock from fs_supers/s_instances
142 * so we can't find it, before freeing sysfs_super_info.
143 */
144 kill_anon_super(sb);
145 kfree(info);
79} 146}
80 147
81static struct file_system_type sysfs_fs_type = { 148static struct file_system_type sysfs_fs_type = {
82 .name = "sysfs", 149 .name = "sysfs",
83 .get_sb = sysfs_get_sb, 150 .get_sb = sysfs_get_sb,
84 .kill_sb = kill_anon_super, 151 .kill_sb = sysfs_kill_sb,
85}; 152};
86 153
154void sysfs_exit_ns(enum kobj_ns_type type, const void *ns)
155{
156 struct super_block *sb;
157
158 mutex_lock(&sysfs_mutex);
159 spin_lock(&sb_lock);
160 list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
161 struct sysfs_super_info *info = sysfs_info(sb);
162 /*
163 * If we see a superblock on the fs_supers/s_instances
164 * list the unmount has not completed and sb->s_fs_info
165 * points to a valid struct sysfs_super_info.
166 */
167 /* Ignore superblocks with the wrong ns */
168 if (info->ns[type] != ns)
169 continue;
170 info->ns[type] = NULL;
171 }
172 spin_unlock(&sb_lock);
173 mutex_unlock(&sysfs_mutex);
174}
175
87int __init sysfs_init(void) 176int __init sysfs_init(void)
88{ 177{
89 int err = -ENOMEM; 178 int err = -ENOMEM;
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index b93ec51fa7ac..f71246bebfe4 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -58,6 +58,8 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
58 if (!sd) 58 if (!sd)
59 goto out_put; 59 goto out_put;
60 60
61 if (sysfs_ns_type(parent_sd))
62 sd->s_ns = target->ktype->namespace(target);
61 sd->s_symlink.target_sd = target_sd; 63 sd->s_symlink.target_sd = target_sd;
62 target_sd = NULL; /* reference is now owned by the symlink */ 64 target_sd = NULL; /* reference is now owned by the symlink */
63 65
@@ -107,6 +109,26 @@ int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target,
107} 109}
108 110
109/** 111/**
112 * sysfs_delete_link - remove symlink in object's directory.
113 * @kobj: object we're acting for.
114 * @targ: object we're pointing to.
115 * @name: name of the symlink to remove.
116 *
117 * Unlike sysfs_remove_link sysfs_delete_link has enough information
118 * to successfully delete symlinks in tagged directories.
119 */
120void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
121 const char *name)
122{
123 const void *ns = NULL;
124 spin_lock(&sysfs_assoc_lock);
125 if (targ->sd)
126 ns = targ->sd->s_ns;
127 spin_unlock(&sysfs_assoc_lock);
128 sysfs_hash_and_remove(kobj->sd, ns, name);
129}
130
131/**
110 * sysfs_remove_link - remove symlink in object's directory. 132 * sysfs_remove_link - remove symlink in object's directory.
111 * @kobj: object we're acting for. 133 * @kobj: object we're acting for.
112 * @name: name of the symlink to remove. 134 * @name: name of the symlink to remove.
@@ -121,7 +143,7 @@ void sysfs_remove_link(struct kobject * kobj, const char * name)
121 else 143 else
122 parent_sd = kobj->sd; 144 parent_sd = kobj->sd;
123 145
124 sysfs_hash_and_remove(parent_sd, name); 146 sysfs_hash_and_remove(parent_sd, NULL, name);
125} 147}
126 148
127/** 149/**
@@ -137,6 +159,7 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
137 const char *old, const char *new) 159 const char *old, const char *new)
138{ 160{
139 struct sysfs_dirent *parent_sd, *sd = NULL; 161 struct sysfs_dirent *parent_sd, *sd = NULL;
162 const void *old_ns = NULL, *new_ns = NULL;
140 int result; 163 int result;
141 164
142 if (!kobj) 165 if (!kobj)
@@ -144,8 +167,11 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
144 else 167 else
145 parent_sd = kobj->sd; 168 parent_sd = kobj->sd;
146 169
170 if (targ->sd)
171 old_ns = targ->sd->s_ns;
172
147 result = -ENOENT; 173 result = -ENOENT;
148 sd = sysfs_get_dirent(parent_sd, old); 174 sd = sysfs_get_dirent(parent_sd, old_ns, old);
149 if (!sd) 175 if (!sd)
150 goto out; 176 goto out;
151 177
@@ -155,7 +181,10 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
155 if (sd->s_symlink.target_sd->s_dir.kobj != targ) 181 if (sd->s_symlink.target_sd->s_dir.kobj != targ)
156 goto out; 182 goto out;
157 183
158 result = sysfs_rename(sd, parent_sd, new); 184 if (sysfs_ns_type(parent_sd))
185 new_ns = targ->ktype->namespace(targ);
186
187 result = sysfs_rename(sd, parent_sd, new_ns, new);
159 188
160out: 189out:
161 sysfs_put(sd); 190 sysfs_put(sd);
@@ -261,3 +290,4 @@ const struct inode_operations sysfs_symlink_inode_operations = {
261 290
262EXPORT_SYMBOL_GPL(sysfs_create_link); 291EXPORT_SYMBOL_GPL(sysfs_create_link);
263EXPORT_SYMBOL_GPL(sysfs_remove_link); 292EXPORT_SYMBOL_GPL(sysfs_remove_link);
293EXPORT_SYMBOL_GPL(sysfs_rename_link);
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 30f5a44fb5d3..6a13105b5594 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -58,6 +58,7 @@ struct sysfs_dirent {
58 struct sysfs_dirent *s_sibling; 58 struct sysfs_dirent *s_sibling;
59 const char *s_name; 59 const char *s_name;
60 60
61 const void *s_ns; /* namespace tag */
61 union { 62 union {
62 struct sysfs_elem_dir s_dir; 63 struct sysfs_elem_dir s_dir;
63 struct sysfs_elem_symlink s_symlink; 64 struct sysfs_elem_symlink s_symlink;
@@ -81,14 +82,27 @@ struct sysfs_dirent {
81#define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK) 82#define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK)
82#define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR) 83#define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR)
83 84
84#define SYSFS_FLAG_MASK ~SYSFS_TYPE_MASK 85/* identify any namespace tag on sysfs_dirents */
85#define SYSFS_FLAG_REMOVED 0x0200 86#define SYSFS_NS_TYPE_MASK 0xff00
87#define SYSFS_NS_TYPE_SHIFT 8
88
89#define SYSFS_FLAG_MASK ~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK)
90#define SYSFS_FLAG_REMOVED 0x020000
86 91
87static inline unsigned int sysfs_type(struct sysfs_dirent *sd) 92static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
88{ 93{
89 return sd->s_flags & SYSFS_TYPE_MASK; 94 return sd->s_flags & SYSFS_TYPE_MASK;
90} 95}
91 96
97/*
98 * Return any namespace tags on this dirent.
99 * enum kobj_ns_type is defined in linux/kobject.h
100 */
101static inline enum kobj_ns_type sysfs_ns_type(struct sysfs_dirent *sd)
102{
103 return (sd->s_flags & SYSFS_NS_TYPE_MASK) >> SYSFS_NS_TYPE_SHIFT;
104}
105
92#ifdef CONFIG_DEBUG_LOCK_ALLOC 106#ifdef CONFIG_DEBUG_LOCK_ALLOC
93#define sysfs_dirent_init_lockdep(sd) \ 107#define sysfs_dirent_init_lockdep(sd) \
94do { \ 108do { \
@@ -114,6 +128,16 @@ struct sysfs_addrm_cxt {
114/* 128/*
115 * mount.c 129 * mount.c
116 */ 130 */
131
132/*
133 * Each sb is associated with a set of namespace tags (i.e.
134 * the network namespace of the task which mounted this sysfs
135 * instance).
136 */
137struct sysfs_super_info {
138 const void *ns[KOBJ_NS_TYPES];
139};
140#define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info))
117extern struct sysfs_dirent sysfs_root; 141extern struct sysfs_dirent sysfs_root;
118extern struct kmem_cache *sysfs_dir_cachep; 142extern struct kmem_cache *sysfs_dir_cachep;
119 143
@@ -137,8 +161,10 @@ void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
137void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); 161void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
138 162
139struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, 163struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
164 const void *ns,
140 const unsigned char *name); 165 const unsigned char *name);
141struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, 166struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
167 const void *ns,
142 const unsigned char *name); 168 const unsigned char *name);
143struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type); 169struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type);
144 170
@@ -149,7 +175,7 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name,
149void sysfs_remove_subdir(struct sysfs_dirent *sd); 175void sysfs_remove_subdir(struct sysfs_dirent *sd);
150 176
151int sysfs_rename(struct sysfs_dirent *sd, 177int sysfs_rename(struct sysfs_dirent *sd,
152 struct sysfs_dirent *new_parent_sd, const char *new_name); 178 struct sysfs_dirent *new_parent_sd, const void *ns, const char *new_name);
153 179
154static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd) 180static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd)
155{ 181{
@@ -179,7 +205,7 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
179int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); 205int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
180int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, 206int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
181 size_t size, int flags); 207 size_t size, int flags);
182int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name); 208int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const char *name);
183int sysfs_inode_init(void); 209int sysfs_inode_init(void);
184 210
185/* 211/*
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 4e50286a4cc3..1dabed286b4c 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -164,8 +164,8 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_
164 name, de->name)) 164 name, de->name))
165 goto found; 165 goto found;
166 } 166 }
167 dir_put_page(page);
167 } 168 }
168 dir_put_page(page);
169 169
170 if (++n >= npages) 170 if (++n >= npages)
171 n = 0; 171 n = 0;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 98158de91d24..b86ab8eff79a 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -110,31 +110,14 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
110 struct timerfd_ctx *ctx = file->private_data; 110 struct timerfd_ctx *ctx = file->private_data;
111 ssize_t res; 111 ssize_t res;
112 u64 ticks = 0; 112 u64 ticks = 0;
113 DECLARE_WAITQUEUE(wait, current);
114 113
115 if (count < sizeof(ticks)) 114 if (count < sizeof(ticks))
116 return -EINVAL; 115 return -EINVAL;
117 spin_lock_irq(&ctx->wqh.lock); 116 spin_lock_irq(&ctx->wqh.lock);
118 res = -EAGAIN; 117 if (file->f_flags & O_NONBLOCK)
119 if (!ctx->ticks && !(file->f_flags & O_NONBLOCK)) { 118 res = -EAGAIN;
120 __add_wait_queue(&ctx->wqh, &wait); 119 else
121 for (res = 0;;) { 120 res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
122 set_current_state(TASK_INTERRUPTIBLE);
123 if (ctx->ticks) {
124 res = 0;
125 break;
126 }
127 if (signal_pending(current)) {
128 res = -ERESTARTSYS;
129 break;
130 }
131 spin_unlock_irq(&ctx->wqh.lock);
132 schedule();
133 spin_lock_irq(&ctx->wqh.lock);
134 }
135 __remove_wait_queue(&ctx->wqh, &wait);
136 __set_current_state(TASK_RUNNING);
137 }
138 if (ctx->ticks) { 121 if (ctx->ticks) {
139 ticks = ctx->ticks; 122 ticks = ctx->ticks;
140 if (ctx->expired && ctx->tintv.tv64) { 123 if (ctx->expired && ctx->tintv.tv64) {
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 77d5cf4a7547..bcf5a16f30bb 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -64,6 +64,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
64 if (!c->ro_media) { 64 if (!c->ro_media) {
65 c->ro_media = 1; 65 c->ro_media = 1;
66 c->no_chk_data_crc = 0; 66 c->no_chk_data_crc = 0;
67 c->vfs_sb->s_flags |= MS_RDONLY;
67 ubifs_warn("switched to read-only mode, error %d", err); 68 ubifs_warn("switched to read-only mode, error %d", err);
68 dbg_dump_stack(); 69 dbg_dump_stack();
69 } 70 }
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index f0f2a436251e..3a84455c2a77 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -209,6 +209,6 @@ static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir)
209const struct file_operations udf_dir_operations = { 209const struct file_operations udf_dir_operations = {
210 .read = generic_read_dir, 210 .read = generic_read_dir,
211 .readdir = udf_readdir, 211 .readdir = udf_readdir,
212 .ioctl = udf_ioctl, 212 .unlocked_ioctl = udf_ioctl,
213 .fsync = simple_fsync, 213 .fsync = simple_fsync,
214}; 214};
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 4b6a46ccbf46..baae3a723946 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -37,6 +37,7 @@
37#include <linux/quotaops.h> 37#include <linux/quotaops.h>
38#include <linux/buffer_head.h> 38#include <linux/buffer_head.h>
39#include <linux/aio.h> 39#include <linux/aio.h>
40#include <linux/smp_lock.h>
40 41
41#include "udf_i.h" 42#include "udf_i.h"
42#include "udf_sb.h" 43#include "udf_sb.h"
@@ -144,50 +145,60 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
144 return retval; 145 return retval;
145} 146}
146 147
147int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, 148long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
148 unsigned long arg)
149{ 149{
150 struct inode *inode = filp->f_dentry->d_inode;
150 long old_block, new_block; 151 long old_block, new_block;
151 int result = -EINVAL; 152 int result = -EINVAL;
152 153
154 lock_kernel();
155
153 if (file_permission(filp, MAY_READ) != 0) { 156 if (file_permission(filp, MAY_READ) != 0) {
154 udf_debug("no permission to access inode %lu\n", 157 udf_debug("no permission to access inode %lu\n", inode->i_ino);
155 inode->i_ino); 158 result = -EPERM;
156 return -EPERM; 159 goto out;
157 } 160 }
158 161
159 if (!arg) { 162 if (!arg) {
160 udf_debug("invalid argument to udf_ioctl\n"); 163 udf_debug("invalid argument to udf_ioctl\n");
161 return -EINVAL; 164 result = -EINVAL;
165 goto out;
162 } 166 }
163 167
164 switch (cmd) { 168 switch (cmd) {
165 case UDF_GETVOLIDENT: 169 case UDF_GETVOLIDENT:
166 if (copy_to_user((char __user *)arg, 170 if (copy_to_user((char __user *)arg,
167 UDF_SB(inode->i_sb)->s_volume_ident, 32)) 171 UDF_SB(inode->i_sb)->s_volume_ident, 32))
168 return -EFAULT; 172 result = -EFAULT;
169 else 173 else
170 return 0; 174 result = 0;
175 goto out;
171 case UDF_RELOCATE_BLOCKS: 176 case UDF_RELOCATE_BLOCKS:
172 if (!capable(CAP_SYS_ADMIN)) 177 if (!capable(CAP_SYS_ADMIN)) {
173 return -EACCES; 178 result = -EACCES;
174 if (get_user(old_block, (long __user *)arg)) 179 goto out;
175 return -EFAULT; 180 }
181 if (get_user(old_block, (long __user *)arg)) {
182 result = -EFAULT;
183 goto out;
184 }
176 result = udf_relocate_blocks(inode->i_sb, 185 result = udf_relocate_blocks(inode->i_sb,
177 old_block, &new_block); 186 old_block, &new_block);
178 if (result == 0) 187 if (result == 0)
179 result = put_user(new_block, (long __user *)arg); 188 result = put_user(new_block, (long __user *)arg);
180 return result; 189 goto out;
181 case UDF_GETEASIZE: 190 case UDF_GETEASIZE:
182 result = put_user(UDF_I(inode)->i_lenEAttr, (int __user *)arg); 191 result = put_user(UDF_I(inode)->i_lenEAttr, (int __user *)arg);
183 break; 192 goto out;
184 case UDF_GETEABLOCK: 193 case UDF_GETEABLOCK:
185 result = copy_to_user((char __user *)arg, 194 result = copy_to_user((char __user *)arg,
186 UDF_I(inode)->i_ext.i_data, 195 UDF_I(inode)->i_ext.i_data,
187 UDF_I(inode)->i_lenEAttr) ? -EFAULT : 0; 196 UDF_I(inode)->i_lenEAttr) ? -EFAULT : 0;
188 break; 197 goto out;
189 } 198 }
190 199
200out:
201 unlock_kernel();
191 return result; 202 return result;
192} 203}
193 204
@@ -207,7 +218,7 @@ static int udf_release_file(struct inode *inode, struct file *filp)
207const struct file_operations udf_file_operations = { 218const struct file_operations udf_file_operations = {
208 .read = do_sync_read, 219 .read = do_sync_read,
209 .aio_read = generic_file_aio_read, 220 .aio_read = generic_file_aio_read,
210 .ioctl = udf_ioctl, 221 .unlocked_ioctl = udf_ioctl,
211 .open = dquot_file_open, 222 .open = dquot_file_open,
212 .mmap = generic_file_mmap, 223 .mmap = generic_file_mmap,
213 .write = do_sync_write, 224 .write = do_sync_write,
@@ -227,7 +238,7 @@ int udf_setattr(struct dentry *dentry, struct iattr *iattr)
227 if (error) 238 if (error)
228 return error; 239 return error;
229 240
230 if (iattr->ia_valid & ATTR_SIZE) 241 if (is_quota_modification(inode, iattr))
231 dquot_initialize(inode); 242 dquot_initialize(inode);
232 243
233 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || 244 if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 702a1148e702..9079ff7d6255 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -130,8 +130,7 @@ extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
130 uint8_t *, uint8_t *); 130 uint8_t *, uint8_t *);
131 131
132/* file.c */ 132/* file.c */
133extern int udf_ioctl(struct inode *, struct file *, unsigned int, 133extern long udf_ioctl(struct file *, unsigned int, unsigned long);
134 unsigned long);
135extern int udf_setattr(struct dentry *dentry, struct iattr *iattr); 134extern int udf_setattr(struct dentry *dentry, struct iattr *iattr);
136/* inode.c */ 135/* inode.c */
137extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *); 136extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 80b68c3702d1..cffa756f1047 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -603,7 +603,7 @@ static void ufs_set_inode_ops(struct inode *inode)
603 if (!inode->i_blocks) 603 if (!inode->i_blocks)
604 inode->i_op = &ufs_fast_symlink_inode_operations; 604 inode->i_op = &ufs_fast_symlink_inode_operations;
605 else { 605 else {
606 inode->i_op = &page_symlink_inode_operations; 606 inode->i_op = &ufs_symlink_inode_operations;
607 inode->i_mapping->a_ops = &ufs_aops; 607 inode->i_mapping->a_ops = &ufs_aops;
608 } 608 }
609 } else 609 } else
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 118556243e7a..eabc02eb1294 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -148,7 +148,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
148 148
149 if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) { 149 if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) {
150 /* slow symlink */ 150 /* slow symlink */
151 inode->i_op = &page_symlink_inode_operations; 151 inode->i_op = &ufs_symlink_inode_operations;
152 inode->i_mapping->a_ops = &ufs_aops; 152 inode->i_mapping->a_ops = &ufs_aops;
153 err = page_symlink(inode, symname, l); 153 err = page_symlink(inode, symname, l);
154 if (err) 154 if (err)
diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c
index c0156eda44bc..d283628b4778 100644
--- a/fs/ufs/symlink.c
+++ b/fs/ufs/symlink.c
@@ -42,4 +42,12 @@ static void *ufs_follow_link(struct dentry *dentry, struct nameidata *nd)
42const struct inode_operations ufs_fast_symlink_inode_operations = { 42const struct inode_operations ufs_fast_symlink_inode_operations = {
43 .readlink = generic_readlink, 43 .readlink = generic_readlink,
44 .follow_link = ufs_follow_link, 44 .follow_link = ufs_follow_link,
45 .setattr = ufs_setattr,
46};
47
48const struct inode_operations ufs_symlink_inode_operations = {
49 .readlink = generic_readlink,
50 .follow_link = page_follow_link_light,
51 .put_link = page_put_link,
52 .setattr = ufs_setattr,
45}; 53};
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index d3b6270cb377..f294c44577dc 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -508,7 +508,7 @@ out:
508 * - there is no way to know old size 508 * - there is no way to know old size
509 * - there is no way inform user about error, if it happens in `truncate' 509 * - there is no way inform user about error, if it happens in `truncate'
510 */ 510 */
511static int ufs_setattr(struct dentry *dentry, struct iattr *attr) 511int ufs_setattr(struct dentry *dentry, struct iattr *attr)
512{ 512{
513 struct inode *inode = dentry->d_inode; 513 struct inode *inode = dentry->d_inode;
514 unsigned int ia_valid = attr->ia_valid; 514 unsigned int ia_valid = attr->ia_valid;
@@ -518,18 +518,18 @@ static int ufs_setattr(struct dentry *dentry, struct iattr *attr)
518 if (error) 518 if (error)
519 return error; 519 return error;
520 520
521 if (is_quota_modification(inode, attr))
522 dquot_initialize(inode);
523
521 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 524 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
522 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { 525 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
523 error = dquot_transfer(inode, attr); 526 error = dquot_transfer(inode, attr);
524 if (error) 527 if (error)
525 return error; 528 return error;
526 } 529 }
527 if (ia_valid & ATTR_SIZE && 530 if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
528 attr->ia_size != i_size_read(inode)) {
529 loff_t old_i_size = inode->i_size; 531 loff_t old_i_size = inode->i_size;
530 532
531 dquot_initialize(inode);
532
533 error = vmtruncate(inode, attr->ia_size); 533 error = vmtruncate(inode, attr->ia_size);
534 if (error) 534 if (error)
535 return error; 535 return error;
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 43f9f5d5670e..179ae6b3180a 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -122,9 +122,11 @@ extern void ufs_panic (struct super_block *, const char *, const char *, ...) __
122 122
123/* symlink.c */ 123/* symlink.c */
124extern const struct inode_operations ufs_fast_symlink_inode_operations; 124extern const struct inode_operations ufs_fast_symlink_inode_operations;
125extern const struct inode_operations ufs_symlink_inode_operations;
125 126
126/* truncate.c */ 127/* truncate.c */
127extern int ufs_truncate (struct inode *, loff_t); 128extern int ufs_truncate (struct inode *, loff_t);
129extern int ufs_setattr(struct dentry *dentry, struct iattr *attr);
128 130
129static inline struct ufs_sb_info *UFS_SB(struct super_block *sb) 131static inline struct ufs_sb_info *UFS_SB(struct super_block *sb)
130{ 132{
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 0f8b9968a803..089eaca860b4 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -45,6 +45,15 @@
45#include <linux/pagevec.h> 45#include <linux/pagevec.h>
46#include <linux/writeback.h> 46#include <linux/writeback.h>
47 47
48/*
49 * Types of I/O for bmap clustering and I/O completion tracking.
50 */
51enum {
52 IO_READ, /* mapping for a read */
53 IO_DELAY, /* mapping covers delalloc region */
54 IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */
55 IO_NEW /* just allocated */
56};
48 57
49/* 58/*
50 * Prime number of hash buckets since address is used as the key. 59 * Prime number of hash buckets since address is used as the key.
@@ -103,8 +112,9 @@ xfs_count_page_state(
103 112
104STATIC struct block_device * 113STATIC struct block_device *
105xfs_find_bdev_for_inode( 114xfs_find_bdev_for_inode(
106 struct xfs_inode *ip) 115 struct inode *inode)
107{ 116{
117 struct xfs_inode *ip = XFS_I(inode);
108 struct xfs_mount *mp = ip->i_mount; 118 struct xfs_mount *mp = ip->i_mount;
109 119
110 if (XFS_IS_REALTIME_INODE(ip)) 120 if (XFS_IS_REALTIME_INODE(ip))
@@ -183,7 +193,7 @@ xfs_setfilesize(
183 xfs_fsize_t isize; 193 xfs_fsize_t isize;
184 194
185 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); 195 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
186 ASSERT(ioend->io_type != IOMAP_READ); 196 ASSERT(ioend->io_type != IO_READ);
187 197
188 if (unlikely(ioend->io_error)) 198 if (unlikely(ioend->io_error))
189 return 0; 199 return 0;
@@ -214,7 +224,7 @@ xfs_finish_ioend(
214 if (atomic_dec_and_test(&ioend->io_remaining)) { 224 if (atomic_dec_and_test(&ioend->io_remaining)) {
215 struct workqueue_struct *wq; 225 struct workqueue_struct *wq;
216 226
217 wq = (ioend->io_type == IOMAP_UNWRITTEN) ? 227 wq = (ioend->io_type == IO_UNWRITTEN) ?
218 xfsconvertd_workqueue : xfsdatad_workqueue; 228 xfsconvertd_workqueue : xfsdatad_workqueue;
219 queue_work(wq, &ioend->io_work); 229 queue_work(wq, &ioend->io_work);
220 if (wait) 230 if (wait)
@@ -237,7 +247,7 @@ xfs_end_io(
237 * For unwritten extents we need to issue transactions to convert a 247 * For unwritten extents we need to issue transactions to convert a
238 * range to normal written extens after the data I/O has finished. 248 * range to normal written extens after the data I/O has finished.
239 */ 249 */
240 if (ioend->io_type == IOMAP_UNWRITTEN && 250 if (ioend->io_type == IO_UNWRITTEN &&
241 likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) { 251 likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
242 252
243 error = xfs_iomap_write_unwritten(ip, ioend->io_offset, 253 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
@@ -250,7 +260,7 @@ xfs_end_io(
250 * We might have to update the on-disk file size after extending 260 * We might have to update the on-disk file size after extending
251 * writes. 261 * writes.
252 */ 262 */
253 if (ioend->io_type != IOMAP_READ) { 263 if (ioend->io_type != IO_READ) {
254 error = xfs_setfilesize(ioend); 264 error = xfs_setfilesize(ioend);
255 ASSERT(!error || error == EAGAIN); 265 ASSERT(!error || error == EAGAIN);
256 } 266 }
@@ -309,21 +319,25 @@ xfs_map_blocks(
309 struct inode *inode, 319 struct inode *inode,
310 loff_t offset, 320 loff_t offset,
311 ssize_t count, 321 ssize_t count,
312 xfs_iomap_t *mapp, 322 struct xfs_bmbt_irec *imap,
313 int flags) 323 int flags)
314{ 324{
315 int nmaps = 1; 325 int nmaps = 1;
326 int new = 0;
316 327
317 return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps); 328 return -xfs_iomap(XFS_I(inode), offset, count, flags, imap, &nmaps, &new);
318} 329}
319 330
320STATIC int 331STATIC int
321xfs_iomap_valid( 332xfs_imap_valid(
322 xfs_iomap_t *iomapp, 333 struct inode *inode,
323 loff_t offset) 334 struct xfs_bmbt_irec *imap,
335 xfs_off_t offset)
324{ 336{
325 return offset >= iomapp->iomap_offset && 337 offset >>= inode->i_blkbits;
326 offset < iomapp->iomap_offset + iomapp->iomap_bsize; 338
339 return offset >= imap->br_startoff &&
340 offset < imap->br_startoff + imap->br_blockcount;
327} 341}
328 342
329/* 343/*
@@ -554,19 +568,23 @@ xfs_add_to_ioend(
554 568
555STATIC void 569STATIC void
556xfs_map_buffer( 570xfs_map_buffer(
571 struct inode *inode,
557 struct buffer_head *bh, 572 struct buffer_head *bh,
558 xfs_iomap_t *mp, 573 struct xfs_bmbt_irec *imap,
559 xfs_off_t offset, 574 xfs_off_t offset)
560 uint block_bits)
561{ 575{
562 sector_t bn; 576 sector_t bn;
577 struct xfs_mount *m = XFS_I(inode)->i_mount;
578 xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
579 xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
563 580
564 ASSERT(mp->iomap_bn != IOMAP_DADDR_NULL); 581 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
582 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
565 583
566 bn = (mp->iomap_bn >> (block_bits - BBSHIFT)) + 584 bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
567 ((offset - mp->iomap_offset) >> block_bits); 585 ((offset - iomap_offset) >> inode->i_blkbits);
568 586
569 ASSERT(bn || (mp->iomap_flags & IOMAP_REALTIME)); 587 ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
570 588
571 bh->b_blocknr = bn; 589 bh->b_blocknr = bn;
572 set_buffer_mapped(bh); 590 set_buffer_mapped(bh);
@@ -574,17 +592,17 @@ xfs_map_buffer(
574 592
575STATIC void 593STATIC void
576xfs_map_at_offset( 594xfs_map_at_offset(
595 struct inode *inode,
577 struct buffer_head *bh, 596 struct buffer_head *bh,
578 loff_t offset, 597 struct xfs_bmbt_irec *imap,
579 int block_bits, 598 xfs_off_t offset)
580 xfs_iomap_t *iomapp)
581{ 599{
582 ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE)); 600 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
583 ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY)); 601 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
584 602
585 lock_buffer(bh); 603 lock_buffer(bh);
586 xfs_map_buffer(bh, iomapp, offset, block_bits); 604 xfs_map_buffer(inode, bh, imap, offset);
587 bh->b_bdev = iomapp->iomap_target->bt_bdev; 605 bh->b_bdev = xfs_find_bdev_for_inode(inode);
588 set_buffer_mapped(bh); 606 set_buffer_mapped(bh);
589 clear_buffer_delay(bh); 607 clear_buffer_delay(bh);
590 clear_buffer_unwritten(bh); 608 clear_buffer_unwritten(bh);
@@ -713,11 +731,11 @@ xfs_is_delayed_page(
713 bh = head = page_buffers(page); 731 bh = head = page_buffers(page);
714 do { 732 do {
715 if (buffer_unwritten(bh)) 733 if (buffer_unwritten(bh))
716 acceptable = (type == IOMAP_UNWRITTEN); 734 acceptable = (type == IO_UNWRITTEN);
717 else if (buffer_delay(bh)) 735 else if (buffer_delay(bh))
718 acceptable = (type == IOMAP_DELAY); 736 acceptable = (type == IO_DELAY);
719 else if (buffer_dirty(bh) && buffer_mapped(bh)) 737 else if (buffer_dirty(bh) && buffer_mapped(bh))
720 acceptable = (type == IOMAP_NEW); 738 acceptable = (type == IO_NEW);
721 else 739 else
722 break; 740 break;
723 } while ((bh = bh->b_this_page) != head); 741 } while ((bh = bh->b_this_page) != head);
@@ -740,7 +758,7 @@ xfs_convert_page(
740 struct inode *inode, 758 struct inode *inode,
741 struct page *page, 759 struct page *page,
742 loff_t tindex, 760 loff_t tindex,
743 xfs_iomap_t *mp, 761 struct xfs_bmbt_irec *imap,
744 xfs_ioend_t **ioendp, 762 xfs_ioend_t **ioendp,
745 struct writeback_control *wbc, 763 struct writeback_control *wbc,
746 int startio, 764 int startio,
@@ -750,7 +768,6 @@ xfs_convert_page(
750 xfs_off_t end_offset; 768 xfs_off_t end_offset;
751 unsigned long p_offset; 769 unsigned long p_offset;
752 unsigned int type; 770 unsigned int type;
753 int bbits = inode->i_blkbits;
754 int len, page_dirty; 771 int len, page_dirty;
755 int count = 0, done = 0, uptodate = 1; 772 int count = 0, done = 0, uptodate = 1;
756 xfs_off_t offset = page_offset(page); 773 xfs_off_t offset = page_offset(page);
@@ -802,19 +819,19 @@ xfs_convert_page(
802 819
803 if (buffer_unwritten(bh) || buffer_delay(bh)) { 820 if (buffer_unwritten(bh) || buffer_delay(bh)) {
804 if (buffer_unwritten(bh)) 821 if (buffer_unwritten(bh))
805 type = IOMAP_UNWRITTEN; 822 type = IO_UNWRITTEN;
806 else 823 else
807 type = IOMAP_DELAY; 824 type = IO_DELAY;
808 825
809 if (!xfs_iomap_valid(mp, offset)) { 826 if (!xfs_imap_valid(inode, imap, offset)) {
810 done = 1; 827 done = 1;
811 continue; 828 continue;
812 } 829 }
813 830
814 ASSERT(!(mp->iomap_flags & IOMAP_HOLE)); 831 ASSERT(imap->br_startblock != HOLESTARTBLOCK);
815 ASSERT(!(mp->iomap_flags & IOMAP_DELAY)); 832 ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
816 833
817 xfs_map_at_offset(bh, offset, bbits, mp); 834 xfs_map_at_offset(inode, bh, imap, offset);
818 if (startio) { 835 if (startio) {
819 xfs_add_to_ioend(inode, bh, offset, 836 xfs_add_to_ioend(inode, bh, offset,
820 type, ioendp, done); 837 type, ioendp, done);
@@ -826,7 +843,7 @@ xfs_convert_page(
826 page_dirty--; 843 page_dirty--;
827 count++; 844 count++;
828 } else { 845 } else {
829 type = IOMAP_NEW; 846 type = IO_NEW;
830 if (buffer_mapped(bh) && all_bh && startio) { 847 if (buffer_mapped(bh) && all_bh && startio) {
831 lock_buffer(bh); 848 lock_buffer(bh);
832 xfs_add_to_ioend(inode, bh, offset, 849 xfs_add_to_ioend(inode, bh, offset,
@@ -866,7 +883,7 @@ STATIC void
866xfs_cluster_write( 883xfs_cluster_write(
867 struct inode *inode, 884 struct inode *inode,
868 pgoff_t tindex, 885 pgoff_t tindex,
869 xfs_iomap_t *iomapp, 886 struct xfs_bmbt_irec *imap,
870 xfs_ioend_t **ioendp, 887 xfs_ioend_t **ioendp,
871 struct writeback_control *wbc, 888 struct writeback_control *wbc,
872 int startio, 889 int startio,
@@ -885,7 +902,7 @@ xfs_cluster_write(
885 902
886 for (i = 0; i < pagevec_count(&pvec); i++) { 903 for (i = 0; i < pagevec_count(&pvec); i++) {
887 done = xfs_convert_page(inode, pvec.pages[i], tindex++, 904 done = xfs_convert_page(inode, pvec.pages[i], tindex++,
888 iomapp, ioendp, wbc, startio, all_bh); 905 imap, ioendp, wbc, startio, all_bh);
889 if (done) 906 if (done)
890 break; 907 break;
891 } 908 }
@@ -930,7 +947,7 @@ xfs_aops_discard_page(
930 loff_t offset = page_offset(page); 947 loff_t offset = page_offset(page);
931 ssize_t len = 1 << inode->i_blkbits; 948 ssize_t len = 1 << inode->i_blkbits;
932 949
933 if (!xfs_is_delayed_page(page, IOMAP_DELAY)) 950 if (!xfs_is_delayed_page(page, IO_DELAY))
934 goto out_invalidate; 951 goto out_invalidate;
935 952
936 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 953 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -1042,15 +1059,15 @@ xfs_page_state_convert(
1042 int unmapped) /* also implies page uptodate */ 1059 int unmapped) /* also implies page uptodate */
1043{ 1060{
1044 struct buffer_head *bh, *head; 1061 struct buffer_head *bh, *head;
1045 xfs_iomap_t iomap; 1062 struct xfs_bmbt_irec imap;
1046 xfs_ioend_t *ioend = NULL, *iohead = NULL; 1063 xfs_ioend_t *ioend = NULL, *iohead = NULL;
1047 loff_t offset; 1064 loff_t offset;
1048 unsigned long p_offset = 0; 1065 unsigned long p_offset = 0;
1049 unsigned int type; 1066 unsigned int type;
1050 __uint64_t end_offset; 1067 __uint64_t end_offset;
1051 pgoff_t end_index, last_index, tlast; 1068 pgoff_t end_index, last_index;
1052 ssize_t size, len; 1069 ssize_t size, len;
1053 int flags, err, iomap_valid = 0, uptodate = 1; 1070 int flags, err, imap_valid = 0, uptodate = 1;
1054 int page_dirty, count = 0; 1071 int page_dirty, count = 0;
1055 int trylock = 0; 1072 int trylock = 0;
1056 int all_bh = unmapped; 1073 int all_bh = unmapped;
@@ -1097,7 +1114,7 @@ xfs_page_state_convert(
1097 bh = head = page_buffers(page); 1114 bh = head = page_buffers(page);
1098 offset = page_offset(page); 1115 offset = page_offset(page);
1099 flags = BMAPI_READ; 1116 flags = BMAPI_READ;
1100 type = IOMAP_NEW; 1117 type = IO_NEW;
1101 1118
1102 /* TODO: cleanup count and page_dirty */ 1119 /* TODO: cleanup count and page_dirty */
1103 1120
@@ -1111,12 +1128,12 @@ xfs_page_state_convert(
1111 * the iomap is actually still valid, but the ioend 1128 * the iomap is actually still valid, but the ioend
1112 * isn't. shouldn't happen too often. 1129 * isn't. shouldn't happen too often.
1113 */ 1130 */
1114 iomap_valid = 0; 1131 imap_valid = 0;
1115 continue; 1132 continue;
1116 } 1133 }
1117 1134
1118 if (iomap_valid) 1135 if (imap_valid)
1119 iomap_valid = xfs_iomap_valid(&iomap, offset); 1136 imap_valid = xfs_imap_valid(inode, &imap, offset);
1120 1137
1121 /* 1138 /*
1122 * First case, map an unwritten extent and prepare for 1139 * First case, map an unwritten extent and prepare for
@@ -1137,20 +1154,20 @@ xfs_page_state_convert(
1137 * Make sure we don't use a read-only iomap 1154 * Make sure we don't use a read-only iomap
1138 */ 1155 */
1139 if (flags == BMAPI_READ) 1156 if (flags == BMAPI_READ)
1140 iomap_valid = 0; 1157 imap_valid = 0;
1141 1158
1142 if (buffer_unwritten(bh)) { 1159 if (buffer_unwritten(bh)) {
1143 type = IOMAP_UNWRITTEN; 1160 type = IO_UNWRITTEN;
1144 flags = BMAPI_WRITE | BMAPI_IGNSTATE; 1161 flags = BMAPI_WRITE | BMAPI_IGNSTATE;
1145 } else if (buffer_delay(bh)) { 1162 } else if (buffer_delay(bh)) {
1146 type = IOMAP_DELAY; 1163 type = IO_DELAY;
1147 flags = BMAPI_ALLOCATE | trylock; 1164 flags = BMAPI_ALLOCATE | trylock;
1148 } else { 1165 } else {
1149 type = IOMAP_NEW; 1166 type = IO_NEW;
1150 flags = BMAPI_WRITE | BMAPI_MMAP; 1167 flags = BMAPI_WRITE | BMAPI_MMAP;
1151 } 1168 }
1152 1169
1153 if (!iomap_valid) { 1170 if (!imap_valid) {
1154 /* 1171 /*
1155 * if we didn't have a valid mapping then we 1172 * if we didn't have a valid mapping then we
1156 * need to ensure that we put the new mapping 1173 * need to ensure that we put the new mapping
@@ -1160,7 +1177,7 @@ xfs_page_state_convert(
1160 * for unwritten extent conversion. 1177 * for unwritten extent conversion.
1161 */ 1178 */
1162 new_ioend = 1; 1179 new_ioend = 1;
1163 if (type == IOMAP_NEW) { 1180 if (type == IO_NEW) {
1164 size = xfs_probe_cluster(inode, 1181 size = xfs_probe_cluster(inode,
1165 page, bh, head, 0); 1182 page, bh, head, 0);
1166 } else { 1183 } else {
@@ -1168,14 +1185,14 @@ xfs_page_state_convert(
1168 } 1185 }
1169 1186
1170 err = xfs_map_blocks(inode, offset, size, 1187 err = xfs_map_blocks(inode, offset, size,
1171 &iomap, flags); 1188 &imap, flags);
1172 if (err) 1189 if (err)
1173 goto error; 1190 goto error;
1174 iomap_valid = xfs_iomap_valid(&iomap, offset); 1191 imap_valid = xfs_imap_valid(inode, &imap,
1192 offset);
1175 } 1193 }
1176 if (iomap_valid) { 1194 if (imap_valid) {
1177 xfs_map_at_offset(bh, offset, 1195 xfs_map_at_offset(inode, bh, &imap, offset);
1178 inode->i_blkbits, &iomap);
1179 if (startio) { 1196 if (startio) {
1180 xfs_add_to_ioend(inode, bh, offset, 1197 xfs_add_to_ioend(inode, bh, offset,
1181 type, &ioend, 1198 type, &ioend,
@@ -1194,40 +1211,41 @@ xfs_page_state_convert(
1194 * That means it must already have extents allocated 1211 * That means it must already have extents allocated
1195 * underneath it. Map the extent by reading it. 1212 * underneath it. Map the extent by reading it.
1196 */ 1213 */
1197 if (!iomap_valid || flags != BMAPI_READ) { 1214 if (!imap_valid || flags != BMAPI_READ) {
1198 flags = BMAPI_READ; 1215 flags = BMAPI_READ;
1199 size = xfs_probe_cluster(inode, page, bh, 1216 size = xfs_probe_cluster(inode, page, bh,
1200 head, 1); 1217 head, 1);
1201 err = xfs_map_blocks(inode, offset, size, 1218 err = xfs_map_blocks(inode, offset, size,
1202 &iomap, flags); 1219 &imap, flags);
1203 if (err) 1220 if (err)
1204 goto error; 1221 goto error;
1205 iomap_valid = xfs_iomap_valid(&iomap, offset); 1222 imap_valid = xfs_imap_valid(inode, &imap,
1223 offset);
1206 } 1224 }
1207 1225
1208 /* 1226 /*
1209 * We set the type to IOMAP_NEW in case we are doing a 1227 * We set the type to IO_NEW in case we are doing a
1210 * small write at EOF that is extending the file but 1228 * small write at EOF that is extending the file but
1211 * without needing an allocation. We need to update the 1229 * without needing an allocation. We need to update the
1212 * file size on I/O completion in this case so it is 1230 * file size on I/O completion in this case so it is
1213 * the same case as having just allocated a new extent 1231 * the same case as having just allocated a new extent
1214 * that we are writing into for the first time. 1232 * that we are writing into for the first time.
1215 */ 1233 */
1216 type = IOMAP_NEW; 1234 type = IO_NEW;
1217 if (trylock_buffer(bh)) { 1235 if (trylock_buffer(bh)) {
1218 ASSERT(buffer_mapped(bh)); 1236 ASSERT(buffer_mapped(bh));
1219 if (iomap_valid) 1237 if (imap_valid)
1220 all_bh = 1; 1238 all_bh = 1;
1221 xfs_add_to_ioend(inode, bh, offset, type, 1239 xfs_add_to_ioend(inode, bh, offset, type,
1222 &ioend, !iomap_valid); 1240 &ioend, !imap_valid);
1223 page_dirty--; 1241 page_dirty--;
1224 count++; 1242 count++;
1225 } else { 1243 } else {
1226 iomap_valid = 0; 1244 imap_valid = 0;
1227 } 1245 }
1228 } else if ((buffer_uptodate(bh) || PageUptodate(page)) && 1246 } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
1229 (unmapped || startio)) { 1247 (unmapped || startio)) {
1230 iomap_valid = 0; 1248 imap_valid = 0;
1231 } 1249 }
1232 1250
1233 if (!iohead) 1251 if (!iohead)
@@ -1241,12 +1259,23 @@ xfs_page_state_convert(
1241 if (startio) 1259 if (startio)
1242 xfs_start_page_writeback(page, 1, count); 1260 xfs_start_page_writeback(page, 1, count);
1243 1261
1244 if (ioend && iomap_valid) { 1262 if (ioend && imap_valid) {
1245 offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >> 1263 xfs_off_t end_index;
1246 PAGE_CACHE_SHIFT; 1264
1247 tlast = min_t(pgoff_t, offset, last_index); 1265 end_index = imap.br_startoff + imap.br_blockcount;
1248 xfs_cluster_write(inode, page->index + 1, &iomap, &ioend, 1266
1249 wbc, startio, all_bh, tlast); 1267 /* to bytes */
1268 end_index <<= inode->i_blkbits;
1269
1270 /* to pages */
1271 end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
1272
1273 /* check against file size */
1274 if (end_index > last_index)
1275 end_index = last_index;
1276
1277 xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
1278 wbc, startio, all_bh, end_index);
1250 } 1279 }
1251 1280
1252 if (iohead) 1281 if (iohead)
@@ -1448,10 +1477,11 @@ __xfs_get_blocks(
1448 int direct, 1477 int direct,
1449 bmapi_flags_t flags) 1478 bmapi_flags_t flags)
1450{ 1479{
1451 xfs_iomap_t iomap; 1480 struct xfs_bmbt_irec imap;
1452 xfs_off_t offset; 1481 xfs_off_t offset;
1453 ssize_t size; 1482 ssize_t size;
1454 int niomap = 1; 1483 int nimap = 1;
1484 int new = 0;
1455 int error; 1485 int error;
1456 1486
1457 offset = (xfs_off_t)iblock << inode->i_blkbits; 1487 offset = (xfs_off_t)iblock << inode->i_blkbits;
@@ -1462,22 +1492,21 @@ __xfs_get_blocks(
1462 return 0; 1492 return 0;
1463 1493
1464 error = xfs_iomap(XFS_I(inode), offset, size, 1494 error = xfs_iomap(XFS_I(inode), offset, size,
1465 create ? flags : BMAPI_READ, &iomap, &niomap); 1495 create ? flags : BMAPI_READ, &imap, &nimap, &new);
1466 if (error) 1496 if (error)
1467 return -error; 1497 return -error;
1468 if (niomap == 0) 1498 if (nimap == 0)
1469 return 0; 1499 return 0;
1470 1500
1471 if (iomap.iomap_bn != IOMAP_DADDR_NULL) { 1501 if (imap.br_startblock != HOLESTARTBLOCK &&
1502 imap.br_startblock != DELAYSTARTBLOCK) {
1472 /* 1503 /*
1473 * For unwritten extents do not report a disk address on 1504 * For unwritten extents do not report a disk address on
1474 * the read case (treat as if we're reading into a hole). 1505 * the read case (treat as if we're reading into a hole).
1475 */ 1506 */
1476 if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) { 1507 if (create || !ISUNWRITTEN(&imap))
1477 xfs_map_buffer(bh_result, &iomap, offset, 1508 xfs_map_buffer(inode, bh_result, &imap, offset);
1478 inode->i_blkbits); 1509 if (create && ISUNWRITTEN(&imap)) {
1479 }
1480 if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) {
1481 if (direct) 1510 if (direct)
1482 bh_result->b_private = inode; 1511 bh_result->b_private = inode;
1483 set_buffer_unwritten(bh_result); 1512 set_buffer_unwritten(bh_result);
@@ -1488,7 +1517,7 @@ __xfs_get_blocks(
1488 * If this is a realtime file, data may be on a different device. 1517 * If this is a realtime file, data may be on a different device.
1489 * to that pointed to from the buffer_head b_bdev currently. 1518 * to that pointed to from the buffer_head b_bdev currently.
1490 */ 1519 */
1491 bh_result->b_bdev = iomap.iomap_target->bt_bdev; 1520 bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
1492 1521
1493 /* 1522 /*
1494 * If we previously allocated a block out beyond eof and we are now 1523 * If we previously allocated a block out beyond eof and we are now
@@ -1502,10 +1531,10 @@ __xfs_get_blocks(
1502 if (create && 1531 if (create &&
1503 ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || 1532 ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
1504 (offset >= i_size_read(inode)) || 1533 (offset >= i_size_read(inode)) ||
1505 (iomap.iomap_flags & (IOMAP_NEW|IOMAP_UNWRITTEN)))) 1534 (new || ISUNWRITTEN(&imap))))
1506 set_buffer_new(bh_result); 1535 set_buffer_new(bh_result);
1507 1536
1508 if (iomap.iomap_flags & IOMAP_DELAY) { 1537 if (imap.br_startblock == DELAYSTARTBLOCK) {
1509 BUG_ON(direct); 1538 BUG_ON(direct);
1510 if (create) { 1539 if (create) {
1511 set_buffer_uptodate(bh_result); 1540 set_buffer_uptodate(bh_result);
@@ -1514,11 +1543,23 @@ __xfs_get_blocks(
1514 } 1543 }
1515 } 1544 }
1516 1545
1546 /*
1547 * If this is O_DIRECT or the mpage code calling tell them how large
1548 * the mapping is, so that we can avoid repeated get_blocks calls.
1549 */
1517 if (direct || size > (1 << inode->i_blkbits)) { 1550 if (direct || size > (1 << inode->i_blkbits)) {
1518 ASSERT(iomap.iomap_bsize - iomap.iomap_delta > 0); 1551 xfs_off_t mapping_size;
1519 offset = min_t(xfs_off_t, 1552
1520 iomap.iomap_bsize - iomap.iomap_delta, size); 1553 mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
1521 bh_result->b_size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset); 1554 mapping_size <<= inode->i_blkbits;
1555
1556 ASSERT(mapping_size > 0);
1557 if (mapping_size > size)
1558 mapping_size = size;
1559 if (mapping_size > LONG_MAX)
1560 mapping_size = LONG_MAX;
1561
1562 bh_result->b_size = mapping_size;
1522 } 1563 }
1523 1564
1524 return 0; 1565 return 0;
@@ -1576,7 +1617,7 @@ xfs_end_io_direct(
1576 */ 1617 */
1577 ioend->io_offset = offset; 1618 ioend->io_offset = offset;
1578 ioend->io_size = size; 1619 ioend->io_size = size;
1579 if (ioend->io_type == IOMAP_READ) { 1620 if (ioend->io_type == IO_READ) {
1580 xfs_finish_ioend(ioend, 0); 1621 xfs_finish_ioend(ioend, 0);
1581 } else if (private && size > 0) { 1622 } else if (private && size > 0) {
1582 xfs_finish_ioend(ioend, is_sync_kiocb(iocb)); 1623 xfs_finish_ioend(ioend, is_sync_kiocb(iocb));
@@ -1587,7 +1628,7 @@ xfs_end_io_direct(
1587 * didn't map an unwritten extent so switch it's completion 1628 * didn't map an unwritten extent so switch it's completion
1588 * handler. 1629 * handler.
1589 */ 1630 */
1590 ioend->io_type = IOMAP_NEW; 1631 ioend->io_type = IO_NEW;
1591 xfs_finish_ioend(ioend, 0); 1632 xfs_finish_ioend(ioend, 0);
1592 } 1633 }
1593 1634
@@ -1612,10 +1653,10 @@ xfs_vm_direct_IO(
1612 struct block_device *bdev; 1653 struct block_device *bdev;
1613 ssize_t ret; 1654 ssize_t ret;
1614 1655
1615 bdev = xfs_find_bdev_for_inode(XFS_I(inode)); 1656 bdev = xfs_find_bdev_for_inode(inode);
1616 1657
1617 iocb->private = xfs_alloc_ioend(inode, rw == WRITE ? 1658 iocb->private = xfs_alloc_ioend(inode, rw == WRITE ?
1618 IOMAP_UNWRITTEN : IOMAP_READ); 1659 IO_UNWRITTEN : IO_READ);
1619 1660
1620 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, 1661 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
1621 offset, nr_segs, 1662 offset, nr_segs,
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 44c2b0ef9a41..f01de3c55c43 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1007,25 +1007,20 @@ xfs_bwrite(
1007 struct xfs_mount *mp, 1007 struct xfs_mount *mp,
1008 struct xfs_buf *bp) 1008 struct xfs_buf *bp)
1009{ 1009{
1010 int iowait = (bp->b_flags & XBF_ASYNC) == 0; 1010 int error;
1011 int error = 0;
1012 1011
1013 bp->b_strat = xfs_bdstrat_cb; 1012 bp->b_strat = xfs_bdstrat_cb;
1014 bp->b_mount = mp; 1013 bp->b_mount = mp;
1015 bp->b_flags |= XBF_WRITE; 1014 bp->b_flags |= XBF_WRITE;
1016 if (!iowait) 1015 bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
1017 bp->b_flags |= _XBF_RUN_QUEUES;
1018 1016
1019 xfs_buf_delwri_dequeue(bp); 1017 xfs_buf_delwri_dequeue(bp);
1020 xfs_buf_iostrategy(bp); 1018 xfs_buf_iostrategy(bp);
1021 1019
1022 if (iowait) { 1020 error = xfs_buf_iowait(bp);
1023 error = xfs_buf_iowait(bp); 1021 if (error)
1024 if (error) 1022 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1025 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 1023 xfs_buf_relse(bp);
1026 xfs_buf_relse(bp);
1027 }
1028
1029 return error; 1024 return error;
1030} 1025}
1031 1026
@@ -1614,7 +1609,8 @@ xfs_mapping_buftarg(
1614 1609
1615STATIC int 1610STATIC int
1616xfs_alloc_delwrite_queue( 1611xfs_alloc_delwrite_queue(
1617 xfs_buftarg_t *btp) 1612 xfs_buftarg_t *btp,
1613 const char *fsname)
1618{ 1614{
1619 int error = 0; 1615 int error = 0;
1620 1616
@@ -1622,7 +1618,7 @@ xfs_alloc_delwrite_queue(
1622 INIT_LIST_HEAD(&btp->bt_delwrite_queue); 1618 INIT_LIST_HEAD(&btp->bt_delwrite_queue);
1623 spin_lock_init(&btp->bt_delwrite_lock); 1619 spin_lock_init(&btp->bt_delwrite_lock);
1624 btp->bt_flags = 0; 1620 btp->bt_flags = 0;
1625 btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd"); 1621 btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
1626 if (IS_ERR(btp->bt_task)) { 1622 if (IS_ERR(btp->bt_task)) {
1627 error = PTR_ERR(btp->bt_task); 1623 error = PTR_ERR(btp->bt_task);
1628 goto out_error; 1624 goto out_error;
@@ -1635,7 +1631,8 @@ out_error:
1635xfs_buftarg_t * 1631xfs_buftarg_t *
1636xfs_alloc_buftarg( 1632xfs_alloc_buftarg(
1637 struct block_device *bdev, 1633 struct block_device *bdev,
1638 int external) 1634 int external,
1635 const char *fsname)
1639{ 1636{
1640 xfs_buftarg_t *btp; 1637 xfs_buftarg_t *btp;
1641 1638
@@ -1647,7 +1644,7 @@ xfs_alloc_buftarg(
1647 goto error; 1644 goto error;
1648 if (xfs_mapping_buftarg(btp, bdev)) 1645 if (xfs_mapping_buftarg(btp, bdev))
1649 goto error; 1646 goto error;
1650 if (xfs_alloc_delwrite_queue(btp)) 1647 if (xfs_alloc_delwrite_queue(btp, fsname))
1651 goto error; 1648 goto error;
1652 xfs_alloc_bufhash(btp, external); 1649 xfs_alloc_bufhash(btp, external);
1653 return btp; 1650 return btp;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 386e7361e50e..5fbecefa5dfd 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -390,7 +390,7 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
390/* 390/*
391 * Handling of buftargs. 391 * Handling of buftargs.
392 */ 392 */
393extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); 393extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int, const char *);
394extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); 394extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
395extern void xfs_wait_buftarg(xfs_buftarg_t *); 395extern void xfs_wait_buftarg(xfs_buftarg_t *);
396extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); 396extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 42dd3bcfba6b..d8fb1b5d6cb5 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -115,6 +115,8 @@ xfs_file_fsync(
115 115
116 xfs_iflags_clear(ip, XFS_ITRUNCATED); 116 xfs_iflags_clear(ip, XFS_ITRUNCATED);
117 117
118 xfs_ioend_wait(ip);
119
118 /* 120 /*
119 * We always need to make sure that the required inode state is safe on 121 * We always need to make sure that the required inode state is safe on
120 * disk. The inode might be clean but we still might need to force the 122 * disk. The inode might be clean but we still might need to force the
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 7b26cc2fd284..699b60cbab9c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -527,6 +527,10 @@ xfs_attrmulti_by_handle(
527 if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t))) 527 if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
528 return -XFS_ERROR(EFAULT); 528 return -XFS_ERROR(EFAULT);
529 529
530 /* overflow check */
531 if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t))
532 return -E2BIG;
533
530 dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq); 534 dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq);
531 if (IS_ERR(dentry)) 535 if (IS_ERR(dentry))
532 return PTR_ERR(dentry); 536 return PTR_ERR(dentry);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 593c05b4df8d..9287135e9bfc 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -420,6 +420,10 @@ xfs_compat_attrmulti_by_handle(
420 sizeof(compat_xfs_fsop_attrmulti_handlereq_t))) 420 sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
421 return -XFS_ERROR(EFAULT); 421 return -XFS_ERROR(EFAULT);
422 422
423 /* overflow check */
424 if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t))
425 return -E2BIG;
426
423 dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq); 427 dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq);
424 if (IS_ERR(dentry)) 428 if (IS_ERR(dentry))
425 return PTR_ERR(dentry); 429 return PTR_ERR(dentry);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index e65a7937f3a4..9c8019c78c92 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -673,7 +673,10 @@ xfs_vn_fiemap(
673 bm.bmv_length = BTOBB(length); 673 bm.bmv_length = BTOBB(length);
674 674
675 /* We add one because in getbmap world count includes the header */ 675 /* We add one because in getbmap world count includes the header */
676 bm.bmv_count = fieinfo->fi_extents_max + 1; 676 bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
677 fieinfo->fi_extents_max + 1;
678 bm.bmv_count = min_t(__s32, bm.bmv_count,
679 (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
677 bm.bmv_iflags = BMV_IF_PREALLOC; 680 bm.bmv_iflags = BMV_IF_PREALLOC;
678 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) 681 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
679 bm.bmv_iflags |= BMV_IF_ATTRFORK; 682 bm.bmv_iflags |= BMV_IF_ATTRFORK;
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 1947514ce1ad..e31bf21fe5d3 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -97,7 +97,7 @@ xfs_fs_set_xstate(
97} 97}
98 98
99STATIC int 99STATIC int
100xfs_fs_get_xquota( 100xfs_fs_get_dqblk(
101 struct super_block *sb, 101 struct super_block *sb,
102 int type, 102 int type,
103 qid_t id, 103 qid_t id,
@@ -114,7 +114,7 @@ xfs_fs_get_xquota(
114} 114}
115 115
116STATIC int 116STATIC int
117xfs_fs_set_xquota( 117xfs_fs_set_dqblk(
118 struct super_block *sb, 118 struct super_block *sb,
119 int type, 119 int type,
120 qid_t id, 120 qid_t id,
@@ -135,6 +135,6 @@ xfs_fs_set_xquota(
135const struct quotactl_ops xfs_quotactl_operations = { 135const struct quotactl_ops xfs_quotactl_operations = {
136 .get_xstate = xfs_fs_get_xstate, 136 .get_xstate = xfs_fs_get_xstate,
137 .set_xstate = xfs_fs_set_xstate, 137 .set_xstate = xfs_fs_set_xstate,
138 .get_xquota = xfs_fs_get_xquota, 138 .get_dqblk = xfs_fs_get_dqblk,
139 .set_xquota = xfs_fs_set_xquota, 139 .set_dqblk = xfs_fs_set_dqblk,
140}; 140};
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 2b177c778ba7..f24dbe5efde3 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -790,18 +790,18 @@ xfs_open_devices(
790 * Setup xfs_mount buffer target pointers 790 * Setup xfs_mount buffer target pointers
791 */ 791 */
792 error = ENOMEM; 792 error = ENOMEM;
793 mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0); 793 mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname);
794 if (!mp->m_ddev_targp) 794 if (!mp->m_ddev_targp)
795 goto out_close_rtdev; 795 goto out_close_rtdev;
796 796
797 if (rtdev) { 797 if (rtdev) {
798 mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1); 798 mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname);
799 if (!mp->m_rtdev_targp) 799 if (!mp->m_rtdev_targp)
800 goto out_free_ddev_targ; 800 goto out_free_ddev_targ;
801 } 801 }
802 802
803 if (logdev && logdev != ddev) { 803 if (logdev && logdev != ddev) {
804 mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1); 804 mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname);
805 if (!mp->m_logdev_targp) 805 if (!mp->m_logdev_targp)
806 goto out_free_rtdev_targ; 806 goto out_free_rtdev_targ;
807 } else { 807 } else {
@@ -903,7 +903,8 @@ xfsaild_start(
903 struct xfs_ail *ailp) 903 struct xfs_ail *ailp)
904{ 904{
905 ailp->xa_target = 0; 905 ailp->xa_target = 0;
906 ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild"); 906 ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
907 ailp->xa_mount->m_fsname);
907 if (IS_ERR(ailp->xa_task)) 908 if (IS_ERR(ailp->xa_task))
908 return -PTR_ERR(ailp->xa_task); 909 return -PTR_ERR(ailp->xa_task);
909 return 0; 910 return 0;
@@ -1093,6 +1094,7 @@ xfs_fs_write_inode(
1093 * the code will only flush the inode if it isn't already 1094 * the code will only flush the inode if it isn't already
1094 * being flushed. 1095 * being flushed.
1095 */ 1096 */
1097 xfs_ioend_wait(ip);
1096 xfs_ilock(ip, XFS_ILOCK_SHARED); 1098 xfs_ilock(ip, XFS_ILOCK_SHARED);
1097 if (ip->i_update_core) { 1099 if (ip->i_update_core) {
1098 error = xfs_log_inode(ip); 1100 error = xfs_log_inode(ip);
@@ -1210,6 +1212,7 @@ xfs_fs_put_super(
1210 1212
1211 xfs_unmountfs(mp); 1213 xfs_unmountfs(mp);
1212 xfs_freesb(mp); 1214 xfs_freesb(mp);
1215 xfs_inode_shrinker_unregister(mp);
1213 xfs_icsb_destroy_counters(mp); 1216 xfs_icsb_destroy_counters(mp);
1214 xfs_close_devices(mp); 1217 xfs_close_devices(mp);
1215 xfs_dmops_put(mp); 1218 xfs_dmops_put(mp);
@@ -1623,6 +1626,8 @@ xfs_fs_fill_super(
1623 if (error) 1626 if (error)
1624 goto fail_vnrele; 1627 goto fail_vnrele;
1625 1628
1629 xfs_inode_shrinker_register(mp);
1630
1626 kfree(mtpt); 1631 kfree(mtpt);
1627 return 0; 1632 return 0;
1628 1633
@@ -1868,6 +1873,7 @@ init_xfs_fs(void)
1868 goto out_cleanup_procfs; 1873 goto out_cleanup_procfs;
1869 1874
1870 vfs_initquota(); 1875 vfs_initquota();
1876 xfs_inode_shrinker_init();
1871 1877
1872 error = register_filesystem(&xfs_fs_type); 1878 error = register_filesystem(&xfs_fs_type);
1873 if (error) 1879 if (error)
@@ -1895,6 +1901,7 @@ exit_xfs_fs(void)
1895{ 1901{
1896 vfs_exitquota(); 1902 vfs_exitquota();
1897 unregister_filesystem(&xfs_fs_type); 1903 unregister_filesystem(&xfs_fs_type);
1904 xfs_inode_shrinker_destroy();
1898 xfs_sysctl_unregister(); 1905 xfs_sysctl_unregister();
1899 xfs_cleanup_procfs(); 1906 xfs_cleanup_procfs();
1900 xfs_buf_terminate(); 1907 xfs_buf_terminate();
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index fd9698215759..3884e20bc14e 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -95,7 +95,8 @@ xfs_inode_ag_walk(
95 struct xfs_perag *pag, int flags), 95 struct xfs_perag *pag, int flags),
96 int flags, 96 int flags,
97 int tag, 97 int tag,
98 int exclusive) 98 int exclusive,
99 int *nr_to_scan)
99{ 100{
100 uint32_t first_index; 101 uint32_t first_index;
101 int last_error = 0; 102 int last_error = 0;
@@ -134,7 +135,7 @@ restart:
134 if (error == EFSCORRUPTED) 135 if (error == EFSCORRUPTED)
135 break; 136 break;
136 137
137 } while (1); 138 } while ((*nr_to_scan)--);
138 139
139 if (skipped) { 140 if (skipped) {
140 delay(1); 141 delay(1);
@@ -150,12 +151,15 @@ xfs_inode_ag_iterator(
150 struct xfs_perag *pag, int flags), 151 struct xfs_perag *pag, int flags),
151 int flags, 152 int flags,
152 int tag, 153 int tag,
153 int exclusive) 154 int exclusive,
155 int *nr_to_scan)
154{ 156{
155 int error = 0; 157 int error = 0;
156 int last_error = 0; 158 int last_error = 0;
157 xfs_agnumber_t ag; 159 xfs_agnumber_t ag;
160 int nr;
158 161
162 nr = nr_to_scan ? *nr_to_scan : INT_MAX;
159 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 163 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
160 struct xfs_perag *pag; 164 struct xfs_perag *pag;
161 165
@@ -165,14 +169,18 @@ xfs_inode_ag_iterator(
165 continue; 169 continue;
166 } 170 }
167 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, 171 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
168 exclusive); 172 exclusive, &nr);
169 xfs_perag_put(pag); 173 xfs_perag_put(pag);
170 if (error) { 174 if (error) {
171 last_error = error; 175 last_error = error;
172 if (error == EFSCORRUPTED) 176 if (error == EFSCORRUPTED)
173 break; 177 break;
174 } 178 }
179 if (nr <= 0)
180 break;
175 } 181 }
182 if (nr_to_scan)
183 *nr_to_scan = nr;
176 return XFS_ERROR(last_error); 184 return XFS_ERROR(last_error);
177} 185}
178 186
@@ -291,7 +299,7 @@ xfs_sync_data(
291 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); 299 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
292 300
293 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, 301 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
294 XFS_ICI_NO_TAG, 0); 302 XFS_ICI_NO_TAG, 0, NULL);
295 if (error) 303 if (error)
296 return XFS_ERROR(error); 304 return XFS_ERROR(error);
297 305
@@ -310,7 +318,7 @@ xfs_sync_attr(
310 ASSERT((flags & ~SYNC_WAIT) == 0); 318 ASSERT((flags & ~SYNC_WAIT) == 0);
311 319
312 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, 320 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
313 XFS_ICI_NO_TAG, 0); 321 XFS_ICI_NO_TAG, 0, NULL);
314} 322}
315 323
316STATIC int 324STATIC int
@@ -348,68 +356,23 @@ xfs_commit_dummy_trans(
348 356
349STATIC int 357STATIC int
350xfs_sync_fsdata( 358xfs_sync_fsdata(
351 struct xfs_mount *mp, 359 struct xfs_mount *mp)
352 int flags)
353{ 360{
354 struct xfs_buf *bp; 361 struct xfs_buf *bp;
355 struct xfs_buf_log_item *bip;
356 int error = 0;
357
358 /*
359 * If this is xfssyncd() then only sync the superblock if we can
360 * lock it without sleeping and it is not pinned.
361 */
362 if (flags & SYNC_TRYLOCK) {
363 ASSERT(!(flags & SYNC_WAIT));
364
365 bp = xfs_getsb(mp, XBF_TRYLOCK);
366 if (!bp)
367 goto out;
368
369 bip = XFS_BUF_FSPRIVATE(bp, struct xfs_buf_log_item *);
370 if (!bip || !xfs_buf_item_dirty(bip) || XFS_BUF_ISPINNED(bp))
371 goto out_brelse;
372 } else {
373 bp = xfs_getsb(mp, 0);
374
375 /*
376 * If the buffer is pinned then push on the log so we won't
377 * get stuck waiting in the write for someone, maybe
378 * ourselves, to flush the log.
379 *
380 * Even though we just pushed the log above, we did not have
381 * the superblock buffer locked at that point so it can
382 * become pinned in between there and here.
383 */
384 if (XFS_BUF_ISPINNED(bp))
385 xfs_log_force(mp, 0);
386 }
387
388
389 if (flags & SYNC_WAIT)
390 XFS_BUF_UNASYNC(bp);
391 else
392 XFS_BUF_ASYNC(bp);
393
394 error = xfs_bwrite(mp, bp);
395 if (error)
396 return error;
397 362
398 /* 363 /*
399 * If this is a data integrity sync make sure all pending buffers 364 * If the buffer is pinned then push on the log so we won't get stuck
400 * are flushed out for the log coverage check below. 365 * waiting in the write for someone, maybe ourselves, to flush the log.
366 *
367 * Even though we just pushed the log above, we did not have the
368 * superblock buffer locked at that point so it can become pinned in
369 * between there and here.
401 */ 370 */
402 if (flags & SYNC_WAIT) 371 bp = xfs_getsb(mp, 0);
403 xfs_flush_buftarg(mp->m_ddev_targp, 1); 372 if (XFS_BUF_ISPINNED(bp))
404 373 xfs_log_force(mp, 0);
405 if (xfs_log_need_covered(mp))
406 error = xfs_commit_dummy_trans(mp, flags);
407 return error;
408 374
409 out_brelse: 375 return xfs_bwrite(mp, bp);
410 xfs_buf_relse(bp);
411 out:
412 return error;
413} 376}
414 377
415/* 378/*
@@ -433,7 +396,7 @@ int
433xfs_quiesce_data( 396xfs_quiesce_data(
434 struct xfs_mount *mp) 397 struct xfs_mount *mp)
435{ 398{
436 int error; 399 int error, error2 = 0;
437 400
438 /* push non-blocking */ 401 /* push non-blocking */
439 xfs_sync_data(mp, 0); 402 xfs_sync_data(mp, 0);
@@ -444,13 +407,20 @@ xfs_quiesce_data(
444 xfs_qm_sync(mp, SYNC_WAIT); 407 xfs_qm_sync(mp, SYNC_WAIT);
445 408
446 /* write superblock and hoover up shutdown errors */ 409 /* write superblock and hoover up shutdown errors */
447 error = xfs_sync_fsdata(mp, SYNC_WAIT); 410 error = xfs_sync_fsdata(mp);
411
412 /* make sure all delwri buffers are written out */
413 xfs_flush_buftarg(mp->m_ddev_targp, 1);
414
415 /* mark the log as covered if needed */
416 if (xfs_log_need_covered(mp))
417 error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT);
448 418
449 /* flush data-only devices */ 419 /* flush data-only devices */
450 if (mp->m_rtdev_targp) 420 if (mp->m_rtdev_targp)
451 XFS_bflush(mp->m_rtdev_targp); 421 XFS_bflush(mp->m_rtdev_targp);
452 422
453 return error; 423 return error ? error : error2;
454} 424}
455 425
456STATIC void 426STATIC void
@@ -573,9 +543,9 @@ xfs_flush_inodes(
573} 543}
574 544
575/* 545/*
576 * Every sync period we need to unpin all items, reclaim inodes, sync 546 * Every sync period we need to unpin all items, reclaim inodes and sync
577 * quota and write out the superblock. We might need to cover the log 547 * disk quotas. We might need to cover the log to indicate that the
578 * to indicate it is idle. 548 * filesystem is idle.
579 */ 549 */
580STATIC void 550STATIC void
581xfs_sync_worker( 551xfs_sync_worker(
@@ -589,7 +559,8 @@ xfs_sync_worker(
589 xfs_reclaim_inodes(mp, 0); 559 xfs_reclaim_inodes(mp, 0);
590 /* dgc: errors ignored here */ 560 /* dgc: errors ignored here */
591 error = xfs_qm_sync(mp, SYNC_TRYLOCK); 561 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
592 error = xfs_sync_fsdata(mp, SYNC_TRYLOCK); 562 if (xfs_log_need_covered(mp))
563 error = xfs_commit_dummy_trans(mp, 0);
593 } 564 }
594 mp->m_sync_seq++; 565 mp->m_sync_seq++;
595 wake_up(&mp->m_wait_single_sync_task); 566 wake_up(&mp->m_wait_single_sync_task);
@@ -652,7 +623,7 @@ xfs_syncd_init(
652 mp->m_sync_work.w_syncer = xfs_sync_worker; 623 mp->m_sync_work.w_syncer = xfs_sync_worker;
653 mp->m_sync_work.w_mount = mp; 624 mp->m_sync_work.w_mount = mp;
654 mp->m_sync_work.w_completion = NULL; 625 mp->m_sync_work.w_completion = NULL;
655 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd"); 626 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname);
656 if (IS_ERR(mp->m_sync_task)) 627 if (IS_ERR(mp->m_sync_task))
657 return -PTR_ERR(mp->m_sync_task); 628 return -PTR_ERR(mp->m_sync_task);
658 return 0; 629 return 0;
@@ -673,6 +644,7 @@ __xfs_inode_set_reclaim_tag(
673 radix_tree_tag_set(&pag->pag_ici_root, 644 radix_tree_tag_set(&pag->pag_ici_root,
674 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 645 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
675 XFS_ICI_RECLAIM_TAG); 646 XFS_ICI_RECLAIM_TAG);
647 pag->pag_ici_reclaimable++;
676} 648}
677 649
678/* 650/*
@@ -705,6 +677,7 @@ __xfs_inode_clear_reclaim_tag(
705{ 677{
706 radix_tree_tag_clear(&pag->pag_ici_root, 678 radix_tree_tag_clear(&pag->pag_ici_root,
707 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 679 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
680 pag->pag_ici_reclaimable--;
708} 681}
709 682
710/* 683/*
@@ -854,5 +827,93 @@ xfs_reclaim_inodes(
854 int mode) 827 int mode)
855{ 828{
856 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, 829 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
857 XFS_ICI_RECLAIM_TAG, 1); 830 XFS_ICI_RECLAIM_TAG, 1, NULL);
831}
832
833/*
834 * Shrinker infrastructure.
835 *
836 * This is all far more complex than it needs to be. It adds a global list of
837 * mounts because the shrinkers can only call a global context. We need to make
838 * the shrinkers pass a context to avoid the need for global state.
839 */
840static LIST_HEAD(xfs_mount_list);
841static struct rw_semaphore xfs_mount_list_lock;
842
843static int
844xfs_reclaim_inode_shrink(
845 int nr_to_scan,
846 gfp_t gfp_mask)
847{
848 struct xfs_mount *mp;
849 struct xfs_perag *pag;
850 xfs_agnumber_t ag;
851 int reclaimable = 0;
852
853 if (nr_to_scan) {
854 if (!(gfp_mask & __GFP_FS))
855 return -1;
856
857 down_read(&xfs_mount_list_lock);
858 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
859 xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
860 XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
861 if (nr_to_scan <= 0)
862 break;
863 }
864 up_read(&xfs_mount_list_lock);
865 }
866
867 down_read(&xfs_mount_list_lock);
868 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
869 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
870
871 pag = xfs_perag_get(mp, ag);
872 if (!pag->pag_ici_init) {
873 xfs_perag_put(pag);
874 continue;
875 }
876 reclaimable += pag->pag_ici_reclaimable;
877 xfs_perag_put(pag);
878 }
879 }
880 up_read(&xfs_mount_list_lock);
881 return reclaimable;
882}
883
884static struct shrinker xfs_inode_shrinker = {
885 .shrink = xfs_reclaim_inode_shrink,
886 .seeks = DEFAULT_SEEKS,
887};
888
889void __init
890xfs_inode_shrinker_init(void)
891{
892 init_rwsem(&xfs_mount_list_lock);
893 register_shrinker(&xfs_inode_shrinker);
894}
895
896void
897xfs_inode_shrinker_destroy(void)
898{
899 ASSERT(list_empty(&xfs_mount_list));
900 unregister_shrinker(&xfs_inode_shrinker);
901}
902
903void
904xfs_inode_shrinker_register(
905 struct xfs_mount *mp)
906{
907 down_write(&xfs_mount_list_lock);
908 list_add_tail(&mp->m_mplist, &xfs_mount_list);
909 up_write(&xfs_mount_list_lock);
910}
911
912void
913xfs_inode_shrinker_unregister(
914 struct xfs_mount *mp)
915{
916 down_write(&xfs_mount_list_lock);
917 list_del(&mp->m_mplist);
918 up_write(&xfs_mount_list_lock);
858} 919}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index d480c346cabb..cdcbaaca9880 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -53,6 +53,11 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
53int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); 53int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
54int xfs_inode_ag_iterator(struct xfs_mount *mp, 54int xfs_inode_ag_iterator(struct xfs_mount *mp,
55 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 55 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
56 int flags, int tag, int write_lock); 56 int flags, int tag, int write_lock, int *nr_to_scan);
57
58void xfs_inode_shrinker_init(void);
59void xfs_inode_shrinker_destroy(void);
60void xfs_inode_shrinker_register(struct xfs_mount *mp);
61void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
57 62
58#endif 63#endif
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
index 5a107601e969..207fa77f63ae 100644
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -41,7 +41,6 @@
41#include "xfs_alloc.h" 41#include "xfs_alloc.h"
42#include "xfs_bmap.h" 42#include "xfs_bmap.h"
43#include "xfs_attr.h" 43#include "xfs_attr.h"
44#include "xfs_attr_sf.h"
45#include "xfs_attr_leaf.h" 44#include "xfs_attr_leaf.h"
46#include "xfs_log_priv.h" 45#include "xfs_log_priv.h"
47#include "xfs_buf_item.h" 46#include "xfs_buf_item.h"
@@ -50,6 +49,9 @@
50#include "xfs_aops.h" 49#include "xfs_aops.h"
51#include "quota/xfs_dquot_item.h" 50#include "quota/xfs_dquot_item.h"
52#include "quota/xfs_dquot.h" 51#include "quota/xfs_dquot.h"
52#include "xfs_log_recover.h"
53#include "xfs_buf_item.h"
54#include "xfs_inode_item.h"
53 55
54/* 56/*
55 * We include this last to have the helpers above available for the trace 57 * We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index fcaa62f0799e..8a319cfd2901 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -32,6 +32,10 @@ struct xfs_da_node_entry;
32struct xfs_dquot; 32struct xfs_dquot;
33struct xlog_ticket; 33struct xlog_ticket;
34struct log; 34struct log;
35struct xlog_recover;
36struct xlog_recover_item;
37struct xfs_buf_log_format;
38struct xfs_inode_log_format;
35 39
36DECLARE_EVENT_CLASS(xfs_attr_list_class, 40DECLARE_EVENT_CLASS(xfs_attr_list_class,
37 TP_PROTO(struct xfs_attr_list_context *ctx), 41 TP_PROTO(struct xfs_attr_list_context *ctx),
@@ -562,18 +566,21 @@ DECLARE_EVENT_CLASS(xfs_inode_class,
562 __field(dev_t, dev) 566 __field(dev_t, dev)
563 __field(xfs_ino_t, ino) 567 __field(xfs_ino_t, ino)
564 __field(int, count) 568 __field(int, count)
569 __field(int, pincount)
565 __field(unsigned long, caller_ip) 570 __field(unsigned long, caller_ip)
566 ), 571 ),
567 TP_fast_assign( 572 TP_fast_assign(
568 __entry->dev = VFS_I(ip)->i_sb->s_dev; 573 __entry->dev = VFS_I(ip)->i_sb->s_dev;
569 __entry->ino = ip->i_ino; 574 __entry->ino = ip->i_ino;
570 __entry->count = atomic_read(&VFS_I(ip)->i_count); 575 __entry->count = atomic_read(&VFS_I(ip)->i_count);
576 __entry->pincount = atomic_read(&ip->i_pincount);
571 __entry->caller_ip = caller_ip; 577 __entry->caller_ip = caller_ip;
572 ), 578 ),
573 TP_printk("dev %d:%d ino 0x%llx count %d caller %pf", 579 TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf",
574 MAJOR(__entry->dev), MINOR(__entry->dev), 580 MAJOR(__entry->dev), MINOR(__entry->dev),
575 __entry->ino, 581 __entry->ino,
576 __entry->count, 582 __entry->count,
583 __entry->pincount,
577 (char *)__entry->caller_ip) 584 (char *)__entry->caller_ip)
578) 585)
579 586
@@ -583,6 +590,10 @@ DEFINE_EVENT(xfs_inode_class, name, \
583 TP_ARGS(ip, caller_ip)) 590 TP_ARGS(ip, caller_ip))
584DEFINE_INODE_EVENT(xfs_ihold); 591DEFINE_INODE_EVENT(xfs_ihold);
585DEFINE_INODE_EVENT(xfs_irele); 592DEFINE_INODE_EVENT(xfs_irele);
593DEFINE_INODE_EVENT(xfs_inode_pin);
594DEFINE_INODE_EVENT(xfs_inode_unpin);
595DEFINE_INODE_EVENT(xfs_inode_unpin_nowait);
596
586/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */ 597/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */
587DEFINE_INODE_EVENT(xfs_inode); 598DEFINE_INODE_EVENT(xfs_inode);
588#define xfs_itrace_entry(ip) \ 599#define xfs_itrace_entry(ip) \
@@ -642,8 +653,6 @@ DEFINE_EVENT(xfs_dquot_class, name, \
642 TP_PROTO(struct xfs_dquot *dqp), \ 653 TP_PROTO(struct xfs_dquot *dqp), \
643 TP_ARGS(dqp)) 654 TP_ARGS(dqp))
644DEFINE_DQUOT_EVENT(xfs_dqadjust); 655DEFINE_DQUOT_EVENT(xfs_dqadjust);
645DEFINE_DQUOT_EVENT(xfs_dqshake_dirty);
646DEFINE_DQUOT_EVENT(xfs_dqshake_unlink);
647DEFINE_DQUOT_EVENT(xfs_dqreclaim_want); 656DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
648DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty); 657DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
649DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink); 658DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
@@ -658,7 +667,6 @@ DEFINE_DQUOT_EVENT(xfs_dqread_fail);
658DEFINE_DQUOT_EVENT(xfs_dqlookup_found); 667DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
659DEFINE_DQUOT_EVENT(xfs_dqlookup_want); 668DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
660DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist); 669DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
661DEFINE_DQUOT_EVENT(xfs_dqlookup_move);
662DEFINE_DQUOT_EVENT(xfs_dqlookup_done); 670DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
663DEFINE_DQUOT_EVENT(xfs_dqget_hit); 671DEFINE_DQUOT_EVENT(xfs_dqget_hit);
664DEFINE_DQUOT_EVENT(xfs_dqget_miss); 672DEFINE_DQUOT_EVENT(xfs_dqget_miss);
@@ -1495,6 +1503,140 @@ DEFINE_EVENT(xfs_swap_extent_class, name, \
1495DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before); 1503DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
1496DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after); 1504DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
1497 1505
1506DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
1507 TP_PROTO(struct log *log, struct xlog_recover *trans,
1508 struct xlog_recover_item *item, int pass),
1509 TP_ARGS(log, trans, item, pass),
1510 TP_STRUCT__entry(
1511 __field(dev_t, dev)
1512 __field(unsigned long, item)
1513 __field(xlog_tid_t, tid)
1514 __field(int, type)
1515 __field(int, pass)
1516 __field(int, count)
1517 __field(int, total)
1518 ),
1519 TP_fast_assign(
1520 __entry->dev = log->l_mp->m_super->s_dev;
1521 __entry->item = (unsigned long)item;
1522 __entry->tid = trans->r_log_tid;
1523 __entry->type = ITEM_TYPE(item);
1524 __entry->pass = pass;
1525 __entry->count = item->ri_cnt;
1526 __entry->total = item->ri_total;
1527 ),
1528 TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s "
1529 "item region count/total %d/%d",
1530 MAJOR(__entry->dev), MINOR(__entry->dev),
1531 __entry->tid,
1532 __entry->pass,
1533 (void *)__entry->item,
1534 __print_symbolic(__entry->type, XFS_LI_TYPE_DESC),
1535 __entry->count,
1536 __entry->total)
1537)
1538
1539#define DEFINE_LOG_RECOVER_ITEM(name) \
1540DEFINE_EVENT(xfs_log_recover_item_class, name, \
1541 TP_PROTO(struct log *log, struct xlog_recover *trans, \
1542 struct xlog_recover_item *item, int pass), \
1543 TP_ARGS(log, trans, item, pass))
1544
1545DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add);
1546DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont);
1547DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head);
1548DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail);
1549DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover);
1550
1551DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
1552 TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f),
1553 TP_ARGS(log, buf_f),
1554 TP_STRUCT__entry(
1555 __field(dev_t, dev)
1556 __field(__int64_t, blkno)
1557 __field(unsigned short, len)
1558 __field(unsigned short, flags)
1559 __field(unsigned short, size)
1560 __field(unsigned int, map_size)
1561 ),
1562 TP_fast_assign(
1563 __entry->dev = log->l_mp->m_super->s_dev;
1564 __entry->blkno = buf_f->blf_blkno;
1565 __entry->len = buf_f->blf_len;
1566 __entry->flags = buf_f->blf_flags;
1567 __entry->size = buf_f->blf_size;
1568 __entry->map_size = buf_f->blf_map_size;
1569 ),
1570 TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, "
1571 "map_size %d",
1572 MAJOR(__entry->dev), MINOR(__entry->dev),
1573 __entry->blkno,
1574 __entry->len,
1575 __entry->flags,
1576 __entry->size,
1577 __entry->map_size)
1578)
1579
1580#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \
1581DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \
1582 TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \
1583 TP_ARGS(log, buf_f))
1584
1585DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel);
1586DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel);
1587DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add);
1588DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc);
1589DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover);
1590DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf);
1591DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf);
1592DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf);
1593
1594DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
1595 TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f),
1596 TP_ARGS(log, in_f),
1597 TP_STRUCT__entry(
1598 __field(dev_t, dev)
1599 __field(xfs_ino_t, ino)
1600 __field(unsigned short, size)
1601 __field(int, fields)
1602 __field(unsigned short, asize)
1603 __field(unsigned short, dsize)
1604 __field(__int64_t, blkno)
1605 __field(int, len)
1606 __field(int, boffset)
1607 ),
1608 TP_fast_assign(
1609 __entry->dev = log->l_mp->m_super->s_dev;
1610 __entry->ino = in_f->ilf_ino;
1611 __entry->size = in_f->ilf_size;
1612 __entry->fields = in_f->ilf_fields;
1613 __entry->asize = in_f->ilf_asize;
1614 __entry->dsize = in_f->ilf_dsize;
1615 __entry->blkno = in_f->ilf_blkno;
1616 __entry->len = in_f->ilf_len;
1617 __entry->boffset = in_f->ilf_boffset;
1618 ),
1619 TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, "
1620 "dsize %d, blkno 0x%llx, len %d, boffset %d",
1621 MAJOR(__entry->dev), MINOR(__entry->dev),
1622 __entry->ino,
1623 __entry->size,
1624 __entry->fields,
1625 __entry->asize,
1626 __entry->dsize,
1627 __entry->blkno,
1628 __entry->len,
1629 __entry->boffset)
1630)
1631#define DEFINE_LOG_RECOVER_INO_ITEM(name) \
1632DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \
1633 TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \
1634 TP_ARGS(log, in_f))
1635
1636DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);
1637DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel);
1638DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip);
1639
1498#endif /* _TRACE_XFS_H */ 1640#endif /* _TRACE_XFS_H */
1499 1641
1500#undef TRACE_INCLUDE_PATH 1642#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 5f79dd78626b..b89ec5df0129 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -101,7 +101,7 @@ xfs_qm_dqinit(
101 * No need to re-initialize these if this is a reclaimed dquot. 101 * No need to re-initialize these if this is a reclaimed dquot.
102 */ 102 */
103 if (brandnewdquot) { 103 if (brandnewdquot) {
104 dqp->dq_flnext = dqp->dq_flprev = dqp; 104 INIT_LIST_HEAD(&dqp->q_freelist);
105 mutex_init(&dqp->q_qlock); 105 mutex_init(&dqp->q_qlock);
106 init_waitqueue_head(&dqp->q_pinwait); 106 init_waitqueue_head(&dqp->q_pinwait);
107 107
@@ -119,20 +119,20 @@ xfs_qm_dqinit(
119 * Only the q_core portion was zeroed in dqreclaim_one(). 119 * Only the q_core portion was zeroed in dqreclaim_one().
120 * So, we need to reset others. 120 * So, we need to reset others.
121 */ 121 */
122 dqp->q_nrefs = 0; 122 dqp->q_nrefs = 0;
123 dqp->q_blkno = 0; 123 dqp->q_blkno = 0;
124 dqp->MPL_NEXT = dqp->HL_NEXT = NULL; 124 INIT_LIST_HEAD(&dqp->q_mplist);
125 dqp->HL_PREVP = dqp->MPL_PREVP = NULL; 125 INIT_LIST_HEAD(&dqp->q_hashlist);
126 dqp->q_bufoffset = 0; 126 dqp->q_bufoffset = 0;
127 dqp->q_fileoffset = 0; 127 dqp->q_fileoffset = 0;
128 dqp->q_transp = NULL; 128 dqp->q_transp = NULL;
129 dqp->q_gdquot = NULL; 129 dqp->q_gdquot = NULL;
130 dqp->q_res_bcount = 0; 130 dqp->q_res_bcount = 0;
131 dqp->q_res_icount = 0; 131 dqp->q_res_icount = 0;
132 dqp->q_res_rtbcount = 0; 132 dqp->q_res_rtbcount = 0;
133 atomic_set(&dqp->q_pincount, 0); 133 atomic_set(&dqp->q_pincount, 0);
134 dqp->q_hash = NULL; 134 dqp->q_hash = NULL;
135 ASSERT(dqp->dq_flnext == dqp->dq_flprev); 135 ASSERT(list_empty(&dqp->q_freelist));
136 136
137 trace_xfs_dqreuse(dqp); 137 trace_xfs_dqreuse(dqp);
138 } 138 }
@@ -158,7 +158,7 @@ void
158xfs_qm_dqdestroy( 158xfs_qm_dqdestroy(
159 xfs_dquot_t *dqp) 159 xfs_dquot_t *dqp)
160{ 160{
161 ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp)); 161 ASSERT(list_empty(&dqp->q_freelist));
162 162
163 mutex_destroy(&dqp->q_qlock); 163 mutex_destroy(&dqp->q_qlock);
164 sv_destroy(&dqp->q_pinwait); 164 sv_destroy(&dqp->q_pinwait);
@@ -252,7 +252,7 @@ xfs_qm_adjust_dqtimers(
252 (be64_to_cpu(d->d_bcount) >= 252 (be64_to_cpu(d->d_bcount) >=
253 be64_to_cpu(d->d_blk_hardlimit)))) { 253 be64_to_cpu(d->d_blk_hardlimit)))) {
254 d->d_btimer = cpu_to_be32(get_seconds() + 254 d->d_btimer = cpu_to_be32(get_seconds() +
255 XFS_QI_BTIMELIMIT(mp)); 255 mp->m_quotainfo->qi_btimelimit);
256 } else { 256 } else {
257 d->d_bwarns = 0; 257 d->d_bwarns = 0;
258 } 258 }
@@ -275,7 +275,7 @@ xfs_qm_adjust_dqtimers(
275 (be64_to_cpu(d->d_icount) >= 275 (be64_to_cpu(d->d_icount) >=
276 be64_to_cpu(d->d_ino_hardlimit)))) { 276 be64_to_cpu(d->d_ino_hardlimit)))) {
277 d->d_itimer = cpu_to_be32(get_seconds() + 277 d->d_itimer = cpu_to_be32(get_seconds() +
278 XFS_QI_ITIMELIMIT(mp)); 278 mp->m_quotainfo->qi_itimelimit);
279 } else { 279 } else {
280 d->d_iwarns = 0; 280 d->d_iwarns = 0;
281 } 281 }
@@ -298,7 +298,7 @@ xfs_qm_adjust_dqtimers(
298 (be64_to_cpu(d->d_rtbcount) >= 298 (be64_to_cpu(d->d_rtbcount) >=
299 be64_to_cpu(d->d_rtb_hardlimit)))) { 299 be64_to_cpu(d->d_rtb_hardlimit)))) {
300 d->d_rtbtimer = cpu_to_be32(get_seconds() + 300 d->d_rtbtimer = cpu_to_be32(get_seconds() +
301 XFS_QI_RTBTIMELIMIT(mp)); 301 mp->m_quotainfo->qi_rtbtimelimit);
302 } else { 302 } else {
303 d->d_rtbwarns = 0; 303 d->d_rtbwarns = 0;
304 } 304 }
@@ -325,6 +325,7 @@ xfs_qm_init_dquot_blk(
325 uint type, 325 uint type,
326 xfs_buf_t *bp) 326 xfs_buf_t *bp)
327{ 327{
328 struct xfs_quotainfo *q = mp->m_quotainfo;
328 xfs_dqblk_t *d; 329 xfs_dqblk_t *d;
329 int curid, i; 330 int curid, i;
330 331
@@ -337,16 +338,16 @@ xfs_qm_init_dquot_blk(
337 /* 338 /*
338 * ID of the first dquot in the block - id's are zero based. 339 * ID of the first dquot in the block - id's are zero based.
339 */ 340 */
340 curid = id - (id % XFS_QM_DQPERBLK(mp)); 341 curid = id - (id % q->qi_dqperchunk);
341 ASSERT(curid >= 0); 342 ASSERT(curid >= 0);
342 memset(d, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp))); 343 memset(d, 0, BBTOB(q->qi_dqchunklen));
343 for (i = 0; i < XFS_QM_DQPERBLK(mp); i++, d++, curid++) 344 for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++)
344 xfs_qm_dqinit_core(curid, type, d); 345 xfs_qm_dqinit_core(curid, type, d);
345 xfs_trans_dquot_buf(tp, bp, 346 xfs_trans_dquot_buf(tp, bp,
346 (type & XFS_DQ_USER ? XFS_BLI_UDQUOT_BUF : 347 (type & XFS_DQ_USER ? XFS_BLI_UDQUOT_BUF :
347 ((type & XFS_DQ_PROJ) ? XFS_BLI_PDQUOT_BUF : 348 ((type & XFS_DQ_PROJ) ? XFS_BLI_PDQUOT_BUF :
348 XFS_BLI_GDQUOT_BUF))); 349 XFS_BLI_GDQUOT_BUF)));
349 xfs_trans_log_buf(tp, bp, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1); 350 xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
350} 351}
351 352
352 353
@@ -419,7 +420,7 @@ xfs_qm_dqalloc(
419 /* now we can just get the buffer (there's nothing to read yet) */ 420 /* now we can just get the buffer (there's nothing to read yet) */
420 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, 421 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
421 dqp->q_blkno, 422 dqp->q_blkno,
422 XFS_QI_DQCHUNKLEN(mp), 423 mp->m_quotainfo->qi_dqchunklen,
423 0); 424 0);
424 if (!bp || (error = XFS_BUF_GETERROR(bp))) 425 if (!bp || (error = XFS_BUF_GETERROR(bp)))
425 goto error1; 426 goto error1;
@@ -500,7 +501,8 @@ xfs_qm_dqtobp(
500 */ 501 */
501 if (dqp->q_blkno == (xfs_daddr_t) 0) { 502 if (dqp->q_blkno == (xfs_daddr_t) 0) {
502 /* We use the id as an index */ 503 /* We use the id as an index */
503 dqp->q_fileoffset = (xfs_fileoff_t)id / XFS_QM_DQPERBLK(mp); 504 dqp->q_fileoffset = (xfs_fileoff_t)id /
505 mp->m_quotainfo->qi_dqperchunk;
504 nmaps = 1; 506 nmaps = 1;
505 quotip = XFS_DQ_TO_QIP(dqp); 507 quotip = XFS_DQ_TO_QIP(dqp);
506 xfs_ilock(quotip, XFS_ILOCK_SHARED); 508 xfs_ilock(quotip, XFS_ILOCK_SHARED);
@@ -529,7 +531,7 @@ xfs_qm_dqtobp(
529 /* 531 /*
530 * offset of dquot in the (fixed sized) dquot chunk. 532 * offset of dquot in the (fixed sized) dquot chunk.
531 */ 533 */
532 dqp->q_bufoffset = (id % XFS_QM_DQPERBLK(mp)) * 534 dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
533 sizeof(xfs_dqblk_t); 535 sizeof(xfs_dqblk_t);
534 if (map.br_startblock == HOLESTARTBLOCK) { 536 if (map.br_startblock == HOLESTARTBLOCK) {
535 /* 537 /*
@@ -559,15 +561,13 @@ xfs_qm_dqtobp(
559 * Read in the buffer, unless we've just done the allocation 561 * Read in the buffer, unless we've just done the allocation
560 * (in which case we already have the buf). 562 * (in which case we already have the buf).
561 */ 563 */
562 if (! newdquot) { 564 if (!newdquot) {
563 trace_xfs_dqtobp_read(dqp); 565 trace_xfs_dqtobp_read(dqp);
564 566
565 if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 567 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
566 dqp->q_blkno, 568 dqp->q_blkno,
567 XFS_QI_DQCHUNKLEN(mp), 569 mp->m_quotainfo->qi_dqchunklen,
568 0, &bp))) { 570 0, &bp);
569 return (error);
570 }
571 if (error || !bp) 571 if (error || !bp)
572 return XFS_ERROR(error); 572 return XFS_ERROR(error);
573 } 573 }
@@ -689,14 +689,14 @@ xfs_qm_idtodq(
689 tp = NULL; 689 tp = NULL;
690 if (flags & XFS_QMOPT_DQALLOC) { 690 if (flags & XFS_QMOPT_DQALLOC) {
691 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); 691 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
692 if ((error = xfs_trans_reserve(tp, 692 error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
693 XFS_QM_DQALLOC_SPACE_RES(mp), 693 XFS_WRITE_LOG_RES(mp) +
694 XFS_WRITE_LOG_RES(mp) + 694 BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 +
695 BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1 + 695 128,
696 128, 696 0,
697 0, 697 XFS_TRANS_PERM_LOG_RES,
698 XFS_TRANS_PERM_LOG_RES, 698 XFS_WRITE_LOG_COUNT);
699 XFS_WRITE_LOG_COUNT))) { 699 if (error) {
700 cancelflags = 0; 700 cancelflags = 0;
701 goto error0; 701 goto error0;
702 } 702 }
@@ -751,7 +751,6 @@ xfs_qm_dqlookup(
751{ 751{
752 xfs_dquot_t *dqp; 752 xfs_dquot_t *dqp;
753 uint flist_locked; 753 uint flist_locked;
754 xfs_dquot_t *d;
755 754
756 ASSERT(mutex_is_locked(&qh->qh_lock)); 755 ASSERT(mutex_is_locked(&qh->qh_lock));
757 756
@@ -760,7 +759,7 @@ xfs_qm_dqlookup(
760 /* 759 /*
761 * Traverse the hashchain looking for a match 760 * Traverse the hashchain looking for a match
762 */ 761 */
763 for (dqp = qh->qh_next; dqp != NULL; dqp = dqp->HL_NEXT) { 762 list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
764 /* 763 /*
765 * We already have the hashlock. We don't need the 764 * We already have the hashlock. We don't need the
766 * dqlock to look at the id field of the dquot, since the 765 * dqlock to look at the id field of the dquot, since the
@@ -772,12 +771,12 @@ xfs_qm_dqlookup(
772 /* 771 /*
773 * All in core dquots must be on the dqlist of mp 772 * All in core dquots must be on the dqlist of mp
774 */ 773 */
775 ASSERT(dqp->MPL_PREVP != NULL); 774 ASSERT(!list_empty(&dqp->q_mplist));
776 775
777 xfs_dqlock(dqp); 776 xfs_dqlock(dqp);
778 if (dqp->q_nrefs == 0) { 777 if (dqp->q_nrefs == 0) {
779 ASSERT (XFS_DQ_IS_ON_FREELIST(dqp)); 778 ASSERT(!list_empty(&dqp->q_freelist));
780 if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) { 779 if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
781 trace_xfs_dqlookup_want(dqp); 780 trace_xfs_dqlookup_want(dqp);
782 781
783 /* 782 /*
@@ -787,7 +786,7 @@ xfs_qm_dqlookup(
787 */ 786 */
788 dqp->dq_flags |= XFS_DQ_WANT; 787 dqp->dq_flags |= XFS_DQ_WANT;
789 xfs_dqunlock(dqp); 788 xfs_dqunlock(dqp);
790 xfs_qm_freelist_lock(xfs_Gqm); 789 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
791 xfs_dqlock(dqp); 790 xfs_dqlock(dqp);
792 dqp->dq_flags &= ~(XFS_DQ_WANT); 791 dqp->dq_flags &= ~(XFS_DQ_WANT);
793 } 792 }
@@ -802,46 +801,28 @@ xfs_qm_dqlookup(
802 801
803 if (flist_locked) { 802 if (flist_locked) {
804 if (dqp->q_nrefs != 0) { 803 if (dqp->q_nrefs != 0) {
805 xfs_qm_freelist_unlock(xfs_Gqm); 804 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
806 flist_locked = B_FALSE; 805 flist_locked = B_FALSE;
807 } else { 806 } else {
808 /* 807 /* take it off the freelist */
809 * take it off the freelist
810 */
811 trace_xfs_dqlookup_freelist(dqp); 808 trace_xfs_dqlookup_freelist(dqp);
812 XQM_FREELIST_REMOVE(dqp); 809 list_del_init(&dqp->q_freelist);
813 /* xfs_qm_freelist_print(&(xfs_Gqm-> 810 xfs_Gqm->qm_dqfrlist_cnt--;
814 qm_dqfreelist),
815 "after removal"); */
816 } 811 }
817 } 812 }
818 813
819 /*
820 * grab a reference
821 */
822 XFS_DQHOLD(dqp); 814 XFS_DQHOLD(dqp);
823 815
824 if (flist_locked) 816 if (flist_locked)
825 xfs_qm_freelist_unlock(xfs_Gqm); 817 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
826 /* 818 /*
827 * move the dquot to the front of the hashchain 819 * move the dquot to the front of the hashchain
828 */ 820 */
829 ASSERT(mutex_is_locked(&qh->qh_lock)); 821 ASSERT(mutex_is_locked(&qh->qh_lock));
830 if (dqp->HL_PREVP != &qh->qh_next) { 822 list_move(&dqp->q_hashlist, &qh->qh_list);
831 trace_xfs_dqlookup_move(dqp);
832 if ((d = dqp->HL_NEXT))
833 d->HL_PREVP = dqp->HL_PREVP;
834 *(dqp->HL_PREVP) = d;
835 d = qh->qh_next;
836 d->HL_PREVP = &dqp->HL_NEXT;
837 dqp->HL_NEXT = d;
838 dqp->HL_PREVP = &qh->qh_next;
839 qh->qh_next = dqp;
840 }
841 trace_xfs_dqlookup_done(dqp); 823 trace_xfs_dqlookup_done(dqp);
842 *O_dqpp = dqp; 824 *O_dqpp = dqp;
843 ASSERT(mutex_is_locked(&qh->qh_lock)); 825 return 0;
844 return (0);
845 } 826 }
846 } 827 }
847 828
@@ -975,16 +956,17 @@ xfs_qm_dqget(
975 */ 956 */
976 if (ip) { 957 if (ip) {
977 xfs_ilock(ip, XFS_ILOCK_EXCL); 958 xfs_ilock(ip, XFS_ILOCK_EXCL);
978 if (! XFS_IS_DQTYPE_ON(mp, type)) { 959
979 /* inode stays locked on return */
980 xfs_qm_dqdestroy(dqp);
981 return XFS_ERROR(ESRCH);
982 }
983 /* 960 /*
984 * A dquot could be attached to this inode by now, since 961 * A dquot could be attached to this inode by now, since
985 * we had dropped the ilock. 962 * we had dropped the ilock.
986 */ 963 */
987 if (type == XFS_DQ_USER) { 964 if (type == XFS_DQ_USER) {
965 if (!XFS_IS_UQUOTA_ON(mp)) {
966 /* inode stays locked on return */
967 xfs_qm_dqdestroy(dqp);
968 return XFS_ERROR(ESRCH);
969 }
988 if (ip->i_udquot) { 970 if (ip->i_udquot) {
989 xfs_qm_dqdestroy(dqp); 971 xfs_qm_dqdestroy(dqp);
990 dqp = ip->i_udquot; 972 dqp = ip->i_udquot;
@@ -992,6 +974,11 @@ xfs_qm_dqget(
992 goto dqret; 974 goto dqret;
993 } 975 }
994 } else { 976 } else {
977 if (!XFS_IS_OQUOTA_ON(mp)) {
978 /* inode stays locked on return */
979 xfs_qm_dqdestroy(dqp);
980 return XFS_ERROR(ESRCH);
981 }
995 if (ip->i_gdquot) { 982 if (ip->i_gdquot) {
996 xfs_qm_dqdestroy(dqp); 983 xfs_qm_dqdestroy(dqp);
997 dqp = ip->i_gdquot; 984 dqp = ip->i_gdquot;
@@ -1033,13 +1020,14 @@ xfs_qm_dqget(
1033 */ 1020 */
1034 ASSERT(mutex_is_locked(&h->qh_lock)); 1021 ASSERT(mutex_is_locked(&h->qh_lock));
1035 dqp->q_hash = h; 1022 dqp->q_hash = h;
1036 XQM_HASHLIST_INSERT(h, dqp); 1023 list_add(&dqp->q_hashlist, &h->qh_list);
1024 h->qh_version++;
1037 1025
1038 /* 1026 /*
1039 * Attach this dquot to this filesystem's list of all dquots, 1027 * Attach this dquot to this filesystem's list of all dquots,
1040 * kept inside the mount structure in m_quotainfo field 1028 * kept inside the mount structure in m_quotainfo field
1041 */ 1029 */
1042 xfs_qm_mplist_lock(mp); 1030 mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
1043 1031
1044 /* 1032 /*
1045 * We return a locked dquot to the caller, with a reference taken 1033 * We return a locked dquot to the caller, with a reference taken
@@ -1047,9 +1035,9 @@ xfs_qm_dqget(
1047 xfs_dqlock(dqp); 1035 xfs_dqlock(dqp);
1048 dqp->q_nrefs = 1; 1036 dqp->q_nrefs = 1;
1049 1037
1050 XQM_MPLIST_INSERT(&(XFS_QI_MPL_LIST(mp)), dqp); 1038 list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist);
1051 1039 mp->m_quotainfo->qi_dquots++;
1052 xfs_qm_mplist_unlock(mp); 1040 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
1053 mutex_unlock(&h->qh_lock); 1041 mutex_unlock(&h->qh_lock);
1054 dqret: 1042 dqret:
1055 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1043 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
@@ -1086,10 +1074,10 @@ xfs_qm_dqput(
1086 * drop the dqlock and acquire the freelist and dqlock 1074 * drop the dqlock and acquire the freelist and dqlock
1087 * in the right order; but try to get it out-of-order first 1075 * in the right order; but try to get it out-of-order first
1088 */ 1076 */
1089 if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) { 1077 if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
1090 trace_xfs_dqput_wait(dqp); 1078 trace_xfs_dqput_wait(dqp);
1091 xfs_dqunlock(dqp); 1079 xfs_dqunlock(dqp);
1092 xfs_qm_freelist_lock(xfs_Gqm); 1080 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
1093 xfs_dqlock(dqp); 1081 xfs_dqlock(dqp);
1094 } 1082 }
1095 1083
@@ -1100,10 +1088,8 @@ xfs_qm_dqput(
1100 if (--dqp->q_nrefs == 0) { 1088 if (--dqp->q_nrefs == 0) {
1101 trace_xfs_dqput_free(dqp); 1089 trace_xfs_dqput_free(dqp);
1102 1090
1103 /* 1091 list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
1104 * insert at end of the freelist. 1092 xfs_Gqm->qm_dqfrlist_cnt++;
1105 */
1106 XQM_FREELIST_INSERT(&(xfs_Gqm->qm_dqfreelist), dqp);
1107 1093
1108 /* 1094 /*
1109 * If we just added a udquot to the freelist, then 1095 * If we just added a udquot to the freelist, then
@@ -1118,10 +1104,6 @@ xfs_qm_dqput(
1118 xfs_dqlock(gdqp); 1104 xfs_dqlock(gdqp);
1119 dqp->q_gdquot = NULL; 1105 dqp->q_gdquot = NULL;
1120 } 1106 }
1121
1122 /* xfs_qm_freelist_print(&(xfs_Gqm->qm_dqfreelist),
1123 "@@@@@++ Free list (after append) @@@@@+");
1124 */
1125 } 1107 }
1126 xfs_dqunlock(dqp); 1108 xfs_dqunlock(dqp);
1127 1109
@@ -1133,7 +1115,7 @@ xfs_qm_dqput(
1133 break; 1115 break;
1134 dqp = gdqp; 1116 dqp = gdqp;
1135 } 1117 }
1136 xfs_qm_freelist_unlock(xfs_Gqm); 1118 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1137} 1119}
1138 1120
1139/* 1121/*
@@ -1386,10 +1368,10 @@ int
1386xfs_qm_dqpurge( 1368xfs_qm_dqpurge(
1387 xfs_dquot_t *dqp) 1369 xfs_dquot_t *dqp)
1388{ 1370{
1389 xfs_dqhash_t *thishash; 1371 xfs_dqhash_t *qh = dqp->q_hash;
1390 xfs_mount_t *mp = dqp->q_mount; 1372 xfs_mount_t *mp = dqp->q_mount;
1391 1373
1392 ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); 1374 ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
1393 ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock)); 1375 ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
1394 1376
1395 xfs_dqlock(dqp); 1377 xfs_dqlock(dqp);
@@ -1407,7 +1389,7 @@ xfs_qm_dqpurge(
1407 return (1); 1389 return (1);
1408 } 1390 }
1409 1391
1410 ASSERT(XFS_DQ_IS_ON_FREELIST(dqp)); 1392 ASSERT(!list_empty(&dqp->q_freelist));
1411 1393
1412 /* 1394 /*
1413 * If we're turning off quotas, we have to make sure that, for 1395 * If we're turning off quotas, we have to make sure that, for
@@ -1452,14 +1434,16 @@ xfs_qm_dqpurge(
1452 ASSERT(XFS_FORCED_SHUTDOWN(mp) || 1434 ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
1453 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); 1435 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
1454 1436
1455 thishash = dqp->q_hash; 1437 list_del_init(&dqp->q_hashlist);
1456 XQM_HASHLIST_REMOVE(thishash, dqp); 1438 qh->qh_version++;
1457 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(mp)), dqp); 1439 list_del_init(&dqp->q_mplist);
1440 mp->m_quotainfo->qi_dqreclaims++;
1441 mp->m_quotainfo->qi_dquots--;
1458 /* 1442 /*
1459 * XXX Move this to the front of the freelist, if we can get the 1443 * XXX Move this to the front of the freelist, if we can get the
1460 * freelist lock. 1444 * freelist lock.
1461 */ 1445 */
1462 ASSERT(XFS_DQ_IS_ON_FREELIST(dqp)); 1446 ASSERT(!list_empty(&dqp->q_freelist));
1463 1447
1464 dqp->q_mount = NULL; 1448 dqp->q_mount = NULL;
1465 dqp->q_hash = NULL; 1449 dqp->q_hash = NULL;
@@ -1467,7 +1451,7 @@ xfs_qm_dqpurge(
1467 memset(&dqp->q_core, 0, sizeof(dqp->q_core)); 1451 memset(&dqp->q_core, 0, sizeof(dqp->q_core));
1468 xfs_dqfunlock(dqp); 1452 xfs_dqfunlock(dqp);
1469 xfs_dqunlock(dqp); 1453 xfs_dqunlock(dqp);
1470 mutex_unlock(&thishash->qh_lock); 1454 mutex_unlock(&qh->qh_lock);
1471 return (0); 1455 return (0);
1472} 1456}
1473 1457
@@ -1517,6 +1501,7 @@ void
1517xfs_qm_dqflock_pushbuf_wait( 1501xfs_qm_dqflock_pushbuf_wait(
1518 xfs_dquot_t *dqp) 1502 xfs_dquot_t *dqp)
1519{ 1503{
1504 xfs_mount_t *mp = dqp->q_mount;
1520 xfs_buf_t *bp; 1505 xfs_buf_t *bp;
1521 1506
1522 /* 1507 /*
@@ -1525,14 +1510,14 @@ xfs_qm_dqflock_pushbuf_wait(
1525 * out immediately. We'll be able to acquire 1510 * out immediately. We'll be able to acquire
1526 * the flush lock when the I/O completes. 1511 * the flush lock when the I/O completes.
1527 */ 1512 */
1528 bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno, 1513 bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno,
1529 XFS_QI_DQCHUNKLEN(dqp->q_mount), XBF_TRYLOCK); 1514 mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
1530 if (!bp) 1515 if (!bp)
1531 goto out_lock; 1516 goto out_lock;
1532 1517
1533 if (XFS_BUF_ISDELAYWRITE(bp)) { 1518 if (XFS_BUF_ISDELAYWRITE(bp)) {
1534 if (XFS_BUF_ISPINNED(bp)) 1519 if (XFS_BUF_ISPINNED(bp))
1535 xfs_log_force(dqp->q_mount, 0); 1520 xfs_log_force(mp, 0);
1536 xfs_buf_delwri_promote(bp); 1521 xfs_buf_delwri_promote(bp);
1537 wake_up_process(bp->b_target->bt_task); 1522 wake_up_process(bp->b_target->bt_task);
1538 } 1523 }
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index a0f7da586d1b..5da3a23b820d 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -33,40 +33,23 @@
33 * The hash chain headers (hash buckets) 33 * The hash chain headers (hash buckets)
34 */ 34 */
35typedef struct xfs_dqhash { 35typedef struct xfs_dqhash {
36 struct xfs_dquot *qh_next; 36 struct list_head qh_list;
37 struct mutex qh_lock; 37 struct mutex qh_lock;
38 uint qh_version; /* ever increasing version */ 38 uint qh_version; /* ever increasing version */
39 uint qh_nelems; /* number of dquots on the list */ 39 uint qh_nelems; /* number of dquots on the list */
40} xfs_dqhash_t; 40} xfs_dqhash_t;
41 41
42typedef struct xfs_dqlink {
43 struct xfs_dquot *ql_next; /* forward link */
44 struct xfs_dquot **ql_prevp; /* pointer to prev ql_next */
45} xfs_dqlink_t;
46
47struct xfs_mount; 42struct xfs_mount;
48struct xfs_trans; 43struct xfs_trans;
49 44
50/* 45/*
51 * This is the marker which is designed to occupy the first few
52 * bytes of the xfs_dquot_t structure. Even inside this, the freelist pointers
53 * must come first.
54 * This serves as the marker ("sentinel") when we have to restart list
55 * iterations because of locking considerations.
56 */
57typedef struct xfs_dqmarker {
58 struct xfs_dquot*dqm_flnext; /* link to freelist: must be first */
59 struct xfs_dquot*dqm_flprev;
60 xfs_dqlink_t dqm_mplist; /* link to mount's list of dquots */
61 xfs_dqlink_t dqm_hashlist; /* link to the hash chain */
62 uint dqm_flags; /* various flags (XFS_DQ_*) */
63} xfs_dqmarker_t;
64
65/*
66 * The incore dquot structure 46 * The incore dquot structure
67 */ 47 */
68typedef struct xfs_dquot { 48typedef struct xfs_dquot {
69 xfs_dqmarker_t q_lists; /* list ptrs, q_flags (marker) */ 49 uint dq_flags; /* various flags (XFS_DQ_*) */
50 struct list_head q_freelist; /* global free list of dquots */
51 struct list_head q_mplist; /* mount's list of dquots */
52 struct list_head q_hashlist; /* gloabl hash list of dquots */
70 xfs_dqhash_t *q_hash; /* the hashchain header */ 53 xfs_dqhash_t *q_hash; /* the hashchain header */
71 struct xfs_mount*q_mount; /* filesystem this relates to */ 54 struct xfs_mount*q_mount; /* filesystem this relates to */
72 struct xfs_trans*q_transp; /* trans this belongs to currently */ 55 struct xfs_trans*q_transp; /* trans this belongs to currently */
@@ -87,13 +70,6 @@ typedef struct xfs_dquot {
87 wait_queue_head_t q_pinwait; /* dquot pinning wait queue */ 70 wait_queue_head_t q_pinwait; /* dquot pinning wait queue */
88} xfs_dquot_t; 71} xfs_dquot_t;
89 72
90
91#define dq_flnext q_lists.dqm_flnext
92#define dq_flprev q_lists.dqm_flprev
93#define dq_mplist q_lists.dqm_mplist
94#define dq_hashlist q_lists.dqm_hashlist
95#define dq_flags q_lists.dqm_flags
96
97/* 73/*
98 * Lock hierarchy for q_qlock: 74 * Lock hierarchy for q_qlock:
99 * XFS_QLOCK_NORMAL is the implicit default, 75 * XFS_QLOCK_NORMAL is the implicit default,
@@ -127,7 +103,6 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
127} 103}
128 104
129#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) 105#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock)))
130#define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp))
131#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) 106#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
132#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) 107#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
133#define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ) 108#define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ)
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 4e4ee9a57194..8d89a24ae324 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -107,8 +107,7 @@ xfs_qm_dquot_logitem_pin(
107/* ARGSUSED */ 107/* ARGSUSED */
108STATIC void 108STATIC void
109xfs_qm_dquot_logitem_unpin( 109xfs_qm_dquot_logitem_unpin(
110 xfs_dq_logitem_t *logitem, 110 xfs_dq_logitem_t *logitem)
111 int stale)
112{ 111{
113 xfs_dquot_t *dqp = logitem->qli_dquot; 112 xfs_dquot_t *dqp = logitem->qli_dquot;
114 113
@@ -123,7 +122,7 @@ xfs_qm_dquot_logitem_unpin_remove(
123 xfs_dq_logitem_t *logitem, 122 xfs_dq_logitem_t *logitem,
124 xfs_trans_t *tp) 123 xfs_trans_t *tp)
125{ 124{
126 xfs_qm_dquot_logitem_unpin(logitem, 0); 125 xfs_qm_dquot_logitem_unpin(logitem);
127} 126}
128 127
129/* 128/*
@@ -228,7 +227,7 @@ xfs_qm_dquot_logitem_pushbuf(
228 } 227 }
229 mp = dqp->q_mount; 228 mp = dqp->q_mount;
230 bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno, 229 bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno,
231 XFS_QI_DQCHUNKLEN(mp), XBF_TRYLOCK); 230 mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
232 xfs_dqunlock(dqp); 231 xfs_dqunlock(dqp);
233 if (!bp) 232 if (!bp)
234 return; 233 return;
@@ -329,8 +328,7 @@ static struct xfs_item_ops xfs_dquot_item_ops = {
329 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 328 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
330 xfs_qm_dquot_logitem_format, 329 xfs_qm_dquot_logitem_format,
331 .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_pin, 330 .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_pin,
332 .iop_unpin = (void(*)(xfs_log_item_t*, int)) 331 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unpin,
333 xfs_qm_dquot_logitem_unpin,
334 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) 332 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
335 xfs_qm_dquot_logitem_unpin_remove, 333 xfs_qm_dquot_logitem_unpin_remove,
336 .iop_trylock = (uint(*)(xfs_log_item_t*)) 334 .iop_trylock = (uint(*)(xfs_log_item_t*))
@@ -357,9 +355,8 @@ xfs_qm_dquot_logitem_init(
357 xfs_dq_logitem_t *lp; 355 xfs_dq_logitem_t *lp;
358 lp = &dqp->q_logitem; 356 lp = &dqp->q_logitem;
359 357
360 lp->qli_item.li_type = XFS_LI_DQUOT; 358 xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
361 lp->qli_item.li_ops = &xfs_dquot_item_ops; 359 &xfs_dquot_item_ops);
362 lp->qli_item.li_mountp = dqp->q_mount;
363 lp->qli_dquot = dqp; 360 lp->qli_dquot = dqp;
364 lp->qli_format.qlf_type = XFS_LI_DQUOT; 361 lp->qli_format.qlf_type = XFS_LI_DQUOT;
365 lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id); 362 lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id);
@@ -426,7 +423,7 @@ xfs_qm_qoff_logitem_pin(xfs_qoff_logitem_t *qf)
426 */ 423 */
427/*ARGSUSED*/ 424/*ARGSUSED*/
428STATIC void 425STATIC void
429xfs_qm_qoff_logitem_unpin(xfs_qoff_logitem_t *qf, int stale) 426xfs_qm_qoff_logitem_unpin(xfs_qoff_logitem_t *qf)
430{ 427{
431 return; 428 return;
432} 429}
@@ -537,8 +534,7 @@ static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
537 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 534 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
538 xfs_qm_qoff_logitem_format, 535 xfs_qm_qoff_logitem_format,
539 .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin, 536 .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin,
540 .iop_unpin = (void(*)(xfs_log_item_t* ,int)) 537 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unpin,
541 xfs_qm_qoff_logitem_unpin,
542 .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*)) 538 .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*))
543 xfs_qm_qoff_logitem_unpin_remove, 539 xfs_qm_qoff_logitem_unpin_remove,
544 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock, 540 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock,
@@ -559,8 +555,7 @@ static struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
559 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 555 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
560 xfs_qm_qoff_logitem_format, 556 xfs_qm_qoff_logitem_format,
561 .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin, 557 .iop_pin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin,
562 .iop_unpin = (void(*)(xfs_log_item_t*, int)) 558 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unpin,
563 xfs_qm_qoff_logitem_unpin,
564 .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*)) 559 .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*))
565 xfs_qm_qoff_logitem_unpin_remove, 560 xfs_qm_qoff_logitem_unpin_remove,
566 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock, 561 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock,
@@ -586,11 +581,8 @@ xfs_qm_qoff_logitem_init(
586 581
587 qf = (xfs_qoff_logitem_t*) kmem_zalloc(sizeof(xfs_qoff_logitem_t), KM_SLEEP); 582 qf = (xfs_qoff_logitem_t*) kmem_zalloc(sizeof(xfs_qoff_logitem_t), KM_SLEEP);
588 583
589 qf->qql_item.li_type = XFS_LI_QUOTAOFF; 584 xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ?
590 if (start) 585 &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops);
591 qf->qql_item.li_ops = &xfs_qm_qoffend_logitem_ops;
592 else
593 qf->qql_item.li_ops = &xfs_qm_qoff_logitem_ops;
594 qf->qql_item.li_mountp = mp; 586 qf->qql_item.li_mountp = mp;
595 qf->qql_format.qf_type = XFS_LI_QUOTAOFF; 587 qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
596 qf->qql_format.qf_flags = flags; 588 qf->qql_format.qf_flags = flags;
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 417e61e3d9dd..38e764146644 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -67,9 +67,6 @@ static cred_t xfs_zerocr;
67STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); 67STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int);
68STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); 68STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
69 69
70STATIC void xfs_qm_freelist_init(xfs_frlist_t *);
71STATIC void xfs_qm_freelist_destroy(xfs_frlist_t *);
72
73STATIC int xfs_qm_init_quotainos(xfs_mount_t *); 70STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
74STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); 71STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
75STATIC int xfs_qm_shake(int, gfp_t); 72STATIC int xfs_qm_shake(int, gfp_t);
@@ -84,21 +81,25 @@ extern struct mutex qcheck_lock;
84#endif 81#endif
85 82
86#ifdef QUOTADEBUG 83#ifdef QUOTADEBUG
87#define XQM_LIST_PRINT(l, NXT, title) \ 84static void
88{ \ 85xfs_qm_dquot_list_print(
89 xfs_dquot_t *dqp; int i = 0; \ 86 struct xfs_mount *mp)
90 cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \ 87{
91 for (dqp = (l)->qh_next; dqp != NULL; dqp = dqp->NXT) { \ 88 xfs_dquot_t *dqp;
92 cmn_err(CE_DEBUG, " %d. \"%d (%s)\" " \ 89 int i = 0;
93 "bcnt = %d, icnt = %d, refs = %d", \ 90
94 ++i, (int) be32_to_cpu(dqp->q_core.d_id), \ 91 list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) {
95 DQFLAGTO_TYPESTR(dqp), \ 92 cmn_err(CE_DEBUG, " %d. \"%d (%s)\" "
96 (int) be64_to_cpu(dqp->q_core.d_bcount), \ 93 "bcnt = %lld, icnt = %lld, refs = %d",
97 (int) be64_to_cpu(dqp->q_core.d_icount), \ 94 i++, be32_to_cpu(dqp->q_core.d_id),
98 (int) dqp->q_nrefs); } \ 95 DQFLAGTO_TYPESTR(dqp),
96 (long long)be64_to_cpu(dqp->q_core.d_bcount),
97 (long long)be64_to_cpu(dqp->q_core.d_icount),
98 dqp->q_nrefs);
99 }
99} 100}
100#else 101#else
101#define XQM_LIST_PRINT(l, NXT, title) do { } while (0) 102static void xfs_qm_dquot_list_print(struct xfs_mount *mp) { }
102#endif 103#endif
103 104
104/* 105/*
@@ -144,7 +145,9 @@ xfs_Gqm_init(void)
144 /* 145 /*
145 * Freelist of all dquots of all file systems 146 * Freelist of all dquots of all file systems
146 */ 147 */
147 xfs_qm_freelist_init(&(xqm->qm_dqfreelist)); 148 INIT_LIST_HEAD(&xqm->qm_dqfrlist);
149 xqm->qm_dqfrlist_cnt = 0;
150 mutex_init(&xqm->qm_dqfrlist_lock);
148 151
149 /* 152 /*
150 * dquot zone. we register our own low-memory callback. 153 * dquot zone. we register our own low-memory callback.
@@ -189,6 +192,7 @@ STATIC void
189xfs_qm_destroy( 192xfs_qm_destroy(
190 struct xfs_qm *xqm) 193 struct xfs_qm *xqm)
191{ 194{
195 struct xfs_dquot *dqp, *n;
192 int hsize, i; 196 int hsize, i;
193 197
194 ASSERT(xqm != NULL); 198 ASSERT(xqm != NULL);
@@ -204,7 +208,21 @@ xfs_qm_destroy(
204 xqm->qm_usr_dqhtable = NULL; 208 xqm->qm_usr_dqhtable = NULL;
205 xqm->qm_grp_dqhtable = NULL; 209 xqm->qm_grp_dqhtable = NULL;
206 xqm->qm_dqhashmask = 0; 210 xqm->qm_dqhashmask = 0;
207 xfs_qm_freelist_destroy(&(xqm->qm_dqfreelist)); 211
212 /* frlist cleanup */
213 mutex_lock(&xqm->qm_dqfrlist_lock);
214 list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
215 xfs_dqlock(dqp);
216#ifdef QUOTADEBUG
217 cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp);
218#endif
219 list_del_init(&dqp->q_freelist);
220 xfs_Gqm->qm_dqfrlist_cnt--;
221 xfs_dqunlock(dqp);
222 xfs_qm_dqdestroy(dqp);
223 }
224 mutex_unlock(&xqm->qm_dqfrlist_lock);
225 mutex_destroy(&xqm->qm_dqfrlist_lock);
208#ifdef DEBUG 226#ifdef DEBUG
209 mutex_destroy(&qcheck_lock); 227 mutex_destroy(&qcheck_lock);
210#endif 228#endif
@@ -256,7 +274,7 @@ STATIC void
256xfs_qm_rele_quotafs_ref( 274xfs_qm_rele_quotafs_ref(
257 struct xfs_mount *mp) 275 struct xfs_mount *mp)
258{ 276{
259 xfs_dquot_t *dqp, *nextdqp; 277 xfs_dquot_t *dqp, *n;
260 278
261 ASSERT(xfs_Gqm); 279 ASSERT(xfs_Gqm);
262 ASSERT(xfs_Gqm->qm_nrefs > 0); 280 ASSERT(xfs_Gqm->qm_nrefs > 0);
@@ -264,26 +282,24 @@ xfs_qm_rele_quotafs_ref(
264 /* 282 /*
265 * Go thru the freelist and destroy all inactive dquots. 283 * Go thru the freelist and destroy all inactive dquots.
266 */ 284 */
267 xfs_qm_freelist_lock(xfs_Gqm); 285 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
268 286
269 for (dqp = xfs_Gqm->qm_dqfreelist.qh_next; 287 list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) {
270 dqp != (xfs_dquot_t *)&(xfs_Gqm->qm_dqfreelist); ) {
271 xfs_dqlock(dqp); 288 xfs_dqlock(dqp);
272 nextdqp = dqp->dq_flnext;
273 if (dqp->dq_flags & XFS_DQ_INACTIVE) { 289 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
274 ASSERT(dqp->q_mount == NULL); 290 ASSERT(dqp->q_mount == NULL);
275 ASSERT(! XFS_DQ_IS_DIRTY(dqp)); 291 ASSERT(! XFS_DQ_IS_DIRTY(dqp));
276 ASSERT(dqp->HL_PREVP == NULL); 292 ASSERT(list_empty(&dqp->q_hashlist));
277 ASSERT(dqp->MPL_PREVP == NULL); 293 ASSERT(list_empty(&dqp->q_mplist));
278 XQM_FREELIST_REMOVE(dqp); 294 list_del_init(&dqp->q_freelist);
295 xfs_Gqm->qm_dqfrlist_cnt--;
279 xfs_dqunlock(dqp); 296 xfs_dqunlock(dqp);
280 xfs_qm_dqdestroy(dqp); 297 xfs_qm_dqdestroy(dqp);
281 } else { 298 } else {
282 xfs_dqunlock(dqp); 299 xfs_dqunlock(dqp);
283 } 300 }
284 dqp = nextdqp;
285 } 301 }
286 xfs_qm_freelist_unlock(xfs_Gqm); 302 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
287 303
288 /* 304 /*
289 * Destroy the entire XQM. If somebody mounts with quotaon, this'll 305 * Destroy the entire XQM. If somebody mounts with quotaon, this'll
@@ -305,7 +321,7 @@ xfs_qm_unmount(
305 struct xfs_mount *mp) 321 struct xfs_mount *mp)
306{ 322{
307 if (mp->m_quotainfo) { 323 if (mp->m_quotainfo) {
308 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); 324 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
309 xfs_qm_destroy_quotainfo(mp); 325 xfs_qm_destroy_quotainfo(mp);
310 } 326 }
311} 327}
@@ -449,20 +465,21 @@ xfs_qm_unmount_quotas(
449 */ 465 */
450STATIC int 466STATIC int
451xfs_qm_dqflush_all( 467xfs_qm_dqflush_all(
452 xfs_mount_t *mp, 468 struct xfs_mount *mp,
453 int sync_mode) 469 int sync_mode)
454{ 470{
455 int recl; 471 struct xfs_quotainfo *q = mp->m_quotainfo;
456 xfs_dquot_t *dqp; 472 int recl;
457 int niters; 473 struct xfs_dquot *dqp;
458 int error; 474 int niters;
475 int error;
459 476
460 if (mp->m_quotainfo == NULL) 477 if (!q)
461 return 0; 478 return 0;
462 niters = 0; 479 niters = 0;
463again: 480again:
464 xfs_qm_mplist_lock(mp); 481 mutex_lock(&q->qi_dqlist_lock);
465 FOREACH_DQUOT_IN_MP(dqp, mp) { 482 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
466 xfs_dqlock(dqp); 483 xfs_dqlock(dqp);
467 if (! XFS_DQ_IS_DIRTY(dqp)) { 484 if (! XFS_DQ_IS_DIRTY(dqp)) {
468 xfs_dqunlock(dqp); 485 xfs_dqunlock(dqp);
@@ -470,7 +487,7 @@ again:
470 } 487 }
471 488
472 /* XXX a sentinel would be better */ 489 /* XXX a sentinel would be better */
473 recl = XFS_QI_MPLRECLAIMS(mp); 490 recl = q->qi_dqreclaims;
474 if (!xfs_dqflock_nowait(dqp)) { 491 if (!xfs_dqflock_nowait(dqp)) {
475 /* 492 /*
476 * If we can't grab the flush lock then check 493 * If we can't grab the flush lock then check
@@ -485,21 +502,21 @@ again:
485 * Let go of the mplist lock. We don't want to hold it 502 * Let go of the mplist lock. We don't want to hold it
486 * across a disk write. 503 * across a disk write.
487 */ 504 */
488 xfs_qm_mplist_unlock(mp); 505 mutex_unlock(&q->qi_dqlist_lock);
489 error = xfs_qm_dqflush(dqp, sync_mode); 506 error = xfs_qm_dqflush(dqp, sync_mode);
490 xfs_dqunlock(dqp); 507 xfs_dqunlock(dqp);
491 if (error) 508 if (error)
492 return error; 509 return error;
493 510
494 xfs_qm_mplist_lock(mp); 511 mutex_lock(&q->qi_dqlist_lock);
495 if (recl != XFS_QI_MPLRECLAIMS(mp)) { 512 if (recl != q->qi_dqreclaims) {
496 xfs_qm_mplist_unlock(mp); 513 mutex_unlock(&q->qi_dqlist_lock);
497 /* XXX restart limit */ 514 /* XXX restart limit */
498 goto again; 515 goto again;
499 } 516 }
500 } 517 }
501 518
502 xfs_qm_mplist_unlock(mp); 519 mutex_unlock(&q->qi_dqlist_lock);
503 /* return ! busy */ 520 /* return ! busy */
504 return 0; 521 return 0;
505} 522}
@@ -509,15 +526,15 @@ again:
509 */ 526 */
510STATIC void 527STATIC void
511xfs_qm_detach_gdquots( 528xfs_qm_detach_gdquots(
512 xfs_mount_t *mp) 529 struct xfs_mount *mp)
513{ 530{
514 xfs_dquot_t *dqp, *gdqp; 531 struct xfs_quotainfo *q = mp->m_quotainfo;
515 int nrecl; 532 struct xfs_dquot *dqp, *gdqp;
533 int nrecl;
516 534
517 again: 535 again:
518 ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); 536 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
519 dqp = XFS_QI_MPLNEXT(mp); 537 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
520 while (dqp) {
521 xfs_dqlock(dqp); 538 xfs_dqlock(dqp);
522 if ((gdqp = dqp->q_gdquot)) { 539 if ((gdqp = dqp->q_gdquot)) {
523 xfs_dqlock(gdqp); 540 xfs_dqlock(gdqp);
@@ -530,15 +547,14 @@ xfs_qm_detach_gdquots(
530 * Can't hold the mplist lock across a dqput. 547 * Can't hold the mplist lock across a dqput.
531 * XXXmust convert to marker based iterations here. 548 * XXXmust convert to marker based iterations here.
532 */ 549 */
533 nrecl = XFS_QI_MPLRECLAIMS(mp); 550 nrecl = q->qi_dqreclaims;
534 xfs_qm_mplist_unlock(mp); 551 mutex_unlock(&q->qi_dqlist_lock);
535 xfs_qm_dqput(gdqp); 552 xfs_qm_dqput(gdqp);
536 553
537 xfs_qm_mplist_lock(mp); 554 mutex_lock(&q->qi_dqlist_lock);
538 if (nrecl != XFS_QI_MPLRECLAIMS(mp)) 555 if (nrecl != q->qi_dqreclaims)
539 goto again; 556 goto again;
540 } 557 }
541 dqp = dqp->MPL_NEXT;
542 } 558 }
543} 559}
544 560
@@ -550,23 +566,23 @@ xfs_qm_detach_gdquots(
550 */ 566 */
551STATIC int 567STATIC int
552xfs_qm_dqpurge_int( 568xfs_qm_dqpurge_int(
553 xfs_mount_t *mp, 569 struct xfs_mount *mp,
554 uint flags) /* QUOTAOFF/UMOUNTING/UQUOTA/PQUOTA/GQUOTA */ 570 uint flags)
555{ 571{
556 xfs_dquot_t *dqp; 572 struct xfs_quotainfo *q = mp->m_quotainfo;
557 uint dqtype; 573 struct xfs_dquot *dqp, *n;
558 int nrecl; 574 uint dqtype;
559 xfs_dquot_t *nextdqp; 575 int nrecl;
560 int nmisses; 576 int nmisses;
561 577
562 if (mp->m_quotainfo == NULL) 578 if (!q)
563 return 0; 579 return 0;
564 580
565 dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0; 581 dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
566 dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0; 582 dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
567 dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0; 583 dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
568 584
569 xfs_qm_mplist_lock(mp); 585 mutex_lock(&q->qi_dqlist_lock);
570 586
571 /* 587 /*
572 * In the first pass through all incore dquots of this filesystem, 588 * In the first pass through all incore dquots of this filesystem,
@@ -578,28 +594,25 @@ xfs_qm_dqpurge_int(
578 594
579 again: 595 again:
580 nmisses = 0; 596 nmisses = 0;
581 ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); 597 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
582 /* 598 /*
583 * Try to get rid of all of the unwanted dquots. The idea is to 599 * Try to get rid of all of the unwanted dquots. The idea is to
584 * get them off mplist and hashlist, but leave them on freelist. 600 * get them off mplist and hashlist, but leave them on freelist.
585 */ 601 */
586 dqp = XFS_QI_MPLNEXT(mp); 602 list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
587 while (dqp) {
588 /* 603 /*
589 * It's OK to look at the type without taking dqlock here. 604 * It's OK to look at the type without taking dqlock here.
590 * We're holding the mplist lock here, and that's needed for 605 * We're holding the mplist lock here, and that's needed for
591 * a dqreclaim. 606 * a dqreclaim.
592 */ 607 */
593 if ((dqp->dq_flags & dqtype) == 0) { 608 if ((dqp->dq_flags & dqtype) == 0)
594 dqp = dqp->MPL_NEXT;
595 continue; 609 continue;
596 }
597 610
598 if (!mutex_trylock(&dqp->q_hash->qh_lock)) { 611 if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
599 nrecl = XFS_QI_MPLRECLAIMS(mp); 612 nrecl = q->qi_dqreclaims;
600 xfs_qm_mplist_unlock(mp); 613 mutex_unlock(&q->qi_dqlist_lock);
601 mutex_lock(&dqp->q_hash->qh_lock); 614 mutex_lock(&dqp->q_hash->qh_lock);
602 xfs_qm_mplist_lock(mp); 615 mutex_lock(&q->qi_dqlist_lock);
603 616
604 /* 617 /*
605 * XXXTheoretically, we can get into a very long 618 * XXXTheoretically, we can get into a very long
@@ -607,7 +620,7 @@ xfs_qm_dqpurge_int(
607 * No one can be adding dquots to the mplist at 620 * No one can be adding dquots to the mplist at
608 * this point, but somebody might be taking things off. 621 * this point, but somebody might be taking things off.
609 */ 622 */
610 if (nrecl != XFS_QI_MPLRECLAIMS(mp)) { 623 if (nrecl != q->qi_dqreclaims) {
611 mutex_unlock(&dqp->q_hash->qh_lock); 624 mutex_unlock(&dqp->q_hash->qh_lock);
612 goto again; 625 goto again;
613 } 626 }
@@ -617,11 +630,9 @@ xfs_qm_dqpurge_int(
617 * Take the dquot off the mplist and hashlist. It may remain on 630 * Take the dquot off the mplist and hashlist. It may remain on
618 * freelist in INACTIVE state. 631 * freelist in INACTIVE state.
619 */ 632 */
620 nextdqp = dqp->MPL_NEXT;
621 nmisses += xfs_qm_dqpurge(dqp); 633 nmisses += xfs_qm_dqpurge(dqp);
622 dqp = nextdqp;
623 } 634 }
624 xfs_qm_mplist_unlock(mp); 635 mutex_unlock(&q->qi_dqlist_lock);
625 return nmisses; 636 return nmisses;
626} 637}
627 638
@@ -921,12 +932,13 @@ xfs_qm_dqdetach(
921 932
922int 933int
923xfs_qm_sync( 934xfs_qm_sync(
924 xfs_mount_t *mp, 935 struct xfs_mount *mp,
925 int flags) 936 int flags)
926{ 937{
927 int recl, restarts; 938 struct xfs_quotainfo *q = mp->m_quotainfo;
928 xfs_dquot_t *dqp; 939 int recl, restarts;
929 int error; 940 struct xfs_dquot *dqp;
941 int error;
930 942
931 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) 943 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
932 return 0; 944 return 0;
@@ -934,18 +946,19 @@ xfs_qm_sync(
934 restarts = 0; 946 restarts = 0;
935 947
936 again: 948 again:
937 xfs_qm_mplist_lock(mp); 949 mutex_lock(&q->qi_dqlist_lock);
938 /* 950 /*
939 * dqpurge_all() also takes the mplist lock and iterate thru all dquots 951 * dqpurge_all() also takes the mplist lock and iterate thru all dquots
940 * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared 952 * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
941 * when we have the mplist lock, we know that dquots will be consistent 953 * when we have the mplist lock, we know that dquots will be consistent
942 * as long as we have it locked. 954 * as long as we have it locked.
943 */ 955 */
944 if (! XFS_IS_QUOTA_ON(mp)) { 956 if (!XFS_IS_QUOTA_ON(mp)) {
945 xfs_qm_mplist_unlock(mp); 957 mutex_unlock(&q->qi_dqlist_lock);
946 return 0; 958 return 0;
947 } 959 }
948 FOREACH_DQUOT_IN_MP(dqp, mp) { 960 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
961 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
949 /* 962 /*
950 * If this is vfs_sync calling, then skip the dquots that 963 * If this is vfs_sync calling, then skip the dquots that
951 * don't 'seem' to be dirty. ie. don't acquire dqlock. 964 * don't 'seem' to be dirty. ie. don't acquire dqlock.
@@ -969,7 +982,7 @@ xfs_qm_sync(
969 } 982 }
970 983
971 /* XXX a sentinel would be better */ 984 /* XXX a sentinel would be better */
972 recl = XFS_QI_MPLRECLAIMS(mp); 985 recl = q->qi_dqreclaims;
973 if (!xfs_dqflock_nowait(dqp)) { 986 if (!xfs_dqflock_nowait(dqp)) {
974 if (flags & SYNC_TRYLOCK) { 987 if (flags & SYNC_TRYLOCK) {
975 xfs_dqunlock(dqp); 988 xfs_dqunlock(dqp);
@@ -989,7 +1002,7 @@ xfs_qm_sync(
989 * Let go of the mplist lock. We don't want to hold it 1002 * Let go of the mplist lock. We don't want to hold it
990 * across a disk write 1003 * across a disk write
991 */ 1004 */
992 xfs_qm_mplist_unlock(mp); 1005 mutex_unlock(&q->qi_dqlist_lock);
993 error = xfs_qm_dqflush(dqp, flags); 1006 error = xfs_qm_dqflush(dqp, flags);
994 xfs_dqunlock(dqp); 1007 xfs_dqunlock(dqp);
995 if (error && XFS_FORCED_SHUTDOWN(mp)) 1008 if (error && XFS_FORCED_SHUTDOWN(mp))
@@ -997,17 +1010,17 @@ xfs_qm_sync(
997 else if (error) 1010 else if (error)
998 return error; 1011 return error;
999 1012
1000 xfs_qm_mplist_lock(mp); 1013 mutex_lock(&q->qi_dqlist_lock);
1001 if (recl != XFS_QI_MPLRECLAIMS(mp)) { 1014 if (recl != q->qi_dqreclaims) {
1002 if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS) 1015 if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
1003 break; 1016 break;
1004 1017
1005 xfs_qm_mplist_unlock(mp); 1018 mutex_unlock(&q->qi_dqlist_lock);
1006 goto again; 1019 goto again;
1007 } 1020 }
1008 } 1021 }
1009 1022
1010 xfs_qm_mplist_unlock(mp); 1023 mutex_unlock(&q->qi_dqlist_lock);
1011 return 0; 1024 return 0;
1012} 1025}
1013 1026
@@ -1052,8 +1065,9 @@ xfs_qm_init_quotainfo(
1052 return error; 1065 return error;
1053 } 1066 }
1054 1067
1055 xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0); 1068 INIT_LIST_HEAD(&qinf->qi_dqlist);
1056 lockdep_set_class(&qinf->qi_dqlist.qh_lock, &xfs_quota_mplist_class); 1069 mutex_init(&qinf->qi_dqlist_lock);
1070 lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);
1057 1071
1058 qinf->qi_dqreclaims = 0; 1072 qinf->qi_dqreclaims = 0;
1059 1073
@@ -1150,7 +1164,8 @@ xfs_qm_destroy_quotainfo(
1150 */ 1164 */
1151 xfs_qm_rele_quotafs_ref(mp); 1165 xfs_qm_rele_quotafs_ref(mp);
1152 1166
1153 xfs_qm_list_destroy(&qi->qi_dqlist); 1167 ASSERT(list_empty(&qi->qi_dqlist));
1168 mutex_destroy(&qi->qi_dqlist_lock);
1154 1169
1155 if (qi->qi_uquotaip) { 1170 if (qi->qi_uquotaip) {
1156 IRELE(qi->qi_uquotaip); 1171 IRELE(qi->qi_uquotaip);
@@ -1177,7 +1192,7 @@ xfs_qm_list_init(
1177 int n) 1192 int n)
1178{ 1193{
1179 mutex_init(&list->qh_lock); 1194 mutex_init(&list->qh_lock);
1180 list->qh_next = NULL; 1195 INIT_LIST_HEAD(&list->qh_list);
1181 list->qh_version = 0; 1196 list->qh_version = 0;
1182 list->qh_nelems = 0; 1197 list->qh_nelems = 0;
1183} 1198}
@@ -1316,9 +1331,6 @@ xfs_qm_qino_alloc(
1316 */ 1331 */
1317 spin_lock(&mp->m_sb_lock); 1332 spin_lock(&mp->m_sb_lock);
1318 if (flags & XFS_QMOPT_SBVERSION) { 1333 if (flags & XFS_QMOPT_SBVERSION) {
1319#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1320 unsigned oldv = mp->m_sb.sb_versionnum;
1321#endif
1322 ASSERT(!xfs_sb_version_hasquota(&mp->m_sb)); 1334 ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
1323 ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | 1335 ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1324 XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) == 1336 XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
@@ -1331,11 +1343,6 @@ xfs_qm_qino_alloc(
1331 1343
1332 /* qflags will get updated _after_ quotacheck */ 1344 /* qflags will get updated _after_ quotacheck */
1333 mp->m_sb.sb_qflags = 0; 1345 mp->m_sb.sb_qflags = 0;
1334#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1335 cmn_err(CE_NOTE,
1336 "Old superblock version %x, converting to %x.",
1337 oldv, mp->m_sb.sb_versionnum);
1338#endif
1339 } 1346 }
1340 if (flags & XFS_QMOPT_UQUOTA) 1347 if (flags & XFS_QMOPT_UQUOTA)
1341 mp->m_sb.sb_uquotino = (*ip)->i_ino; 1348 mp->m_sb.sb_uquotino = (*ip)->i_ino;
@@ -1371,10 +1378,10 @@ xfs_qm_reset_dqcounts(
1371#ifdef DEBUG 1378#ifdef DEBUG
1372 j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); 1379 j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1373 do_div(j, sizeof(xfs_dqblk_t)); 1380 do_div(j, sizeof(xfs_dqblk_t));
1374 ASSERT(XFS_QM_DQPERBLK(mp) == j); 1381 ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
1375#endif 1382#endif
1376 ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp); 1383 ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp);
1377 for (j = 0; j < XFS_QM_DQPERBLK(mp); j++) { 1384 for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
1378 /* 1385 /*
1379 * Do a sanity check, and if needed, repair the dqblk. Don't 1386 * Do a sanity check, and if needed, repair the dqblk. Don't
1380 * output any warnings because it's perfectly possible to 1387 * output any warnings because it's perfectly possible to
@@ -1429,7 +1436,7 @@ xfs_qm_dqiter_bufs(
1429 while (blkcnt--) { 1436 while (blkcnt--) {
1430 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, 1437 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1431 XFS_FSB_TO_DADDR(mp, bno), 1438 XFS_FSB_TO_DADDR(mp, bno),
1432 (int)XFS_QI_DQCHUNKLEN(mp), 0, &bp); 1439 mp->m_quotainfo->qi_dqchunklen, 0, &bp);
1433 if (error) 1440 if (error)
1434 break; 1441 break;
1435 1442
@@ -1439,7 +1446,7 @@ xfs_qm_dqiter_bufs(
1439 * goto the next block. 1446 * goto the next block.
1440 */ 1447 */
1441 bno++; 1448 bno++;
1442 firstid += XFS_QM_DQPERBLK(mp); 1449 firstid += mp->m_quotainfo->qi_dqperchunk;
1443 } 1450 }
1444 return error; 1451 return error;
1445} 1452}
@@ -1505,7 +1512,7 @@ xfs_qm_dqiterate(
1505 continue; 1512 continue;
1506 1513
1507 firstid = (xfs_dqid_t) map[i].br_startoff * 1514 firstid = (xfs_dqid_t) map[i].br_startoff *
1508 XFS_QM_DQPERBLK(mp); 1515 mp->m_quotainfo->qi_dqperchunk;
1509 /* 1516 /*
1510 * Do a read-ahead on the next extent. 1517 * Do a read-ahead on the next extent.
1511 */ 1518 */
@@ -1516,7 +1523,7 @@ xfs_qm_dqiterate(
1516 while (rablkcnt--) { 1523 while (rablkcnt--) {
1517 xfs_baread(mp->m_ddev_targp, 1524 xfs_baread(mp->m_ddev_targp,
1518 XFS_FSB_TO_DADDR(mp, rablkno), 1525 XFS_FSB_TO_DADDR(mp, rablkno),
1519 (int)XFS_QI_DQCHUNKLEN(mp)); 1526 mp->m_quotainfo->qi_dqchunklen);
1520 rablkno++; 1527 rablkno++;
1521 } 1528 }
1522 } 1529 }
@@ -1576,8 +1583,10 @@ xfs_qm_quotacheck_dqadjust(
1576 1583
1577 /* 1584 /*
1578 * Set default limits, adjust timers (since we changed usages) 1585 * Set default limits, adjust timers (since we changed usages)
1586 *
1587 * There are no timers for the default values set in the root dquot.
1579 */ 1588 */
1580 if (! XFS_IS_SUSER_DQUOT(dqp)) { 1589 if (dqp->q_core.d_id) {
1581 xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core); 1590 xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core);
1582 xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core); 1591 xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core);
1583 } 1592 }
@@ -1747,14 +1756,14 @@ xfs_qm_quotacheck(
1747 lastino = 0; 1756 lastino = 0;
1748 flags = 0; 1757 flags = 0;
1749 1758
1750 ASSERT(XFS_QI_UQIP(mp) || XFS_QI_GQIP(mp)); 1759 ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
1751 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 1760 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1752 1761
1753 /* 1762 /*
1754 * There should be no cached dquots. The (simplistic) quotacheck 1763 * There should be no cached dquots. The (simplistic) quotacheck
1755 * algorithm doesn't like that. 1764 * algorithm doesn't like that.
1756 */ 1765 */
1757 ASSERT(XFS_QI_MPLNDQUOTS(mp) == 0); 1766 ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
1758 1767
1759 cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname); 1768 cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname);
1760 1769
@@ -1763,15 +1772,19 @@ xfs_qm_quotacheck(
1763 * their counters to zero. We need a clean slate. 1772 * their counters to zero. We need a clean slate.
1764 * We don't log our changes till later. 1773 * We don't log our changes till later.
1765 */ 1774 */
1766 if ((uip = XFS_QI_UQIP(mp))) { 1775 uip = mp->m_quotainfo->qi_uquotaip;
1767 if ((error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA))) 1776 if (uip) {
1777 error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA);
1778 if (error)
1768 goto error_return; 1779 goto error_return;
1769 flags |= XFS_UQUOTA_CHKD; 1780 flags |= XFS_UQUOTA_CHKD;
1770 } 1781 }
1771 1782
1772 if ((gip = XFS_QI_GQIP(mp))) { 1783 gip = mp->m_quotainfo->qi_gquotaip;
1773 if ((error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? 1784 if (gip) {
1774 XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA))) 1785 error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
1786 XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
1787 if (error)
1775 goto error_return; 1788 goto error_return;
1776 flags |= XFS_OQUOTA_CHKD; 1789 flags |= XFS_OQUOTA_CHKD;
1777 } 1790 }
@@ -1804,7 +1817,7 @@ xfs_qm_quotacheck(
1804 * at this point (because we intentionally didn't in dqget_noattach). 1817 * at this point (because we intentionally didn't in dqget_noattach).
1805 */ 1818 */
1806 if (error) { 1819 if (error) {
1807 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF); 1820 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
1808 goto error_return; 1821 goto error_return;
1809 } 1822 }
1810 1823
@@ -1825,7 +1838,7 @@ xfs_qm_quotacheck(
1825 mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD); 1838 mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
1826 mp->m_qflags |= flags; 1839 mp->m_qflags |= flags;
1827 1840
1828 XQM_LIST_PRINT(&(XFS_QI_MPL_LIST(mp)), MPL_NEXT, "++++ Mp list +++"); 1841 xfs_qm_dquot_list_print(mp);
1829 1842
1830 error_return: 1843 error_return:
1831 if (error) { 1844 if (error) {
@@ -1920,59 +1933,53 @@ xfs_qm_init_quotainos(
1920 } 1933 }
1921 } 1934 }
1922 1935
1923 XFS_QI_UQIP(mp) = uip; 1936 mp->m_quotainfo->qi_uquotaip = uip;
1924 XFS_QI_GQIP(mp) = gip; 1937 mp->m_quotainfo->qi_gquotaip = gip;
1925 1938
1926 return 0; 1939 return 0;
1927} 1940}
1928 1941
1929 1942
1943
1930/* 1944/*
1931 * Traverse the freelist of dquots and attempt to reclaim a maximum of 1945 * Just pop the least recently used dquot off the freelist and
1932 * 'howmany' dquots. This operation races with dqlookup(), and attempts to 1946 * recycle it. The returned dquot is locked.
1933 * favor the lookup function ...
1934 * XXXsup merge this with qm_reclaim_one().
1935 */ 1947 */
1936STATIC int 1948STATIC xfs_dquot_t *
1937xfs_qm_shake_freelist( 1949xfs_qm_dqreclaim_one(void)
1938 int howmany)
1939{ 1950{
1940 int nreclaimed; 1951 xfs_dquot_t *dqpout;
1941 xfs_dqhash_t *hash; 1952 xfs_dquot_t *dqp;
1942 xfs_dquot_t *dqp, *nextdqp;
1943 int restarts; 1953 int restarts;
1944 int nflushes;
1945
1946 if (howmany <= 0)
1947 return 0;
1948 1954
1949 nreclaimed = 0;
1950 restarts = 0; 1955 restarts = 0;
1951 nflushes = 0; 1956 dqpout = NULL;
1952 1957
1953#ifdef QUOTADEBUG 1958 /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
1954 cmn_err(CE_DEBUG, "Shake free 0x%x", howmany); 1959startagain:
1955#endif 1960 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
1956 /* lock order is : hashchainlock, freelistlock, mplistlock */
1957 tryagain:
1958 xfs_qm_freelist_lock(xfs_Gqm);
1959 1961
1960 for (dqp = xfs_Gqm->qm_dqfreelist.qh_next; 1962 list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
1961 ((dqp != (xfs_dquot_t *) &xfs_Gqm->qm_dqfreelist) && 1963 struct xfs_mount *mp = dqp->q_mount;
1962 nreclaimed < howmany); ) {
1963 xfs_dqlock(dqp); 1964 xfs_dqlock(dqp);
1964 1965
1965 /* 1966 /*
1966 * We are racing with dqlookup here. Naturally we don't 1967 * We are racing with dqlookup here. Naturally we don't
1967 * want to reclaim a dquot that lookup wants. 1968 * want to reclaim a dquot that lookup wants. We release the
1969 * freelist lock and start over, so that lookup will grab
1970 * both the dquot and the freelistlock.
1968 */ 1971 */
1969 if (dqp->dq_flags & XFS_DQ_WANT) { 1972 if (dqp->dq_flags & XFS_DQ_WANT) {
1973 ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
1974
1975 trace_xfs_dqreclaim_want(dqp);
1976
1970 xfs_dqunlock(dqp); 1977 xfs_dqunlock(dqp);
1971 xfs_qm_freelist_unlock(xfs_Gqm); 1978 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1972 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) 1979 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
1973 return nreclaimed; 1980 return NULL;
1974 XQM_STATS_INC(xqmstats.xs_qm_dqwants); 1981 XQM_STATS_INC(xqmstats.xs_qm_dqwants);
1975 goto tryagain; 1982 goto startagain;
1976 } 1983 }
1977 1984
1978 /* 1985 /*
@@ -1981,23 +1988,27 @@ xfs_qm_shake_freelist(
1981 * life easier. 1988 * life easier.
1982 */ 1989 */
1983 if (dqp->dq_flags & XFS_DQ_INACTIVE) { 1990 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
1984 ASSERT(dqp->q_mount == NULL); 1991 ASSERT(mp == NULL);
1985 ASSERT(! XFS_DQ_IS_DIRTY(dqp)); 1992 ASSERT(! XFS_DQ_IS_DIRTY(dqp));
1986 ASSERT(dqp->HL_PREVP == NULL); 1993 ASSERT(list_empty(&dqp->q_hashlist));
1987 ASSERT(dqp->MPL_PREVP == NULL); 1994 ASSERT(list_empty(&dqp->q_mplist));
1995 list_del_init(&dqp->q_freelist);
1996 xfs_Gqm->qm_dqfrlist_cnt--;
1997 xfs_dqunlock(dqp);
1998 dqpout = dqp;
1988 XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); 1999 XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
1989 nextdqp = dqp->dq_flnext; 2000 break;
1990 goto off_freelist;
1991 } 2001 }
1992 2002
1993 ASSERT(dqp->MPL_PREVP); 2003 ASSERT(dqp->q_hash);
2004 ASSERT(!list_empty(&dqp->q_mplist));
2005
1994 /* 2006 /*
1995 * Try to grab the flush lock. If this dquot is in the process of 2007 * Try to grab the flush lock. If this dquot is in the process of
1996 * getting flushed to disk, we don't want to reclaim it. 2008 * getting flushed to disk, we don't want to reclaim it.
1997 */ 2009 */
1998 if (!xfs_dqflock_nowait(dqp)) { 2010 if (!xfs_dqflock_nowait(dqp)) {
1999 xfs_dqunlock(dqp); 2011 xfs_dqunlock(dqp);
2000 dqp = dqp->dq_flnext;
2001 continue; 2012 continue;
2002 } 2013 }
2003 2014
@@ -2010,21 +2021,21 @@ xfs_qm_shake_freelist(
2010 if (XFS_DQ_IS_DIRTY(dqp)) { 2021 if (XFS_DQ_IS_DIRTY(dqp)) {
2011 int error; 2022 int error;
2012 2023
2013 trace_xfs_dqshake_dirty(dqp); 2024 trace_xfs_dqreclaim_dirty(dqp);
2014 2025
2015 /* 2026 /*
2016 * We flush it delayed write, so don't bother 2027 * We flush it delayed write, so don't bother
2017 * releasing the mplock. 2028 * releasing the freelist lock.
2018 */ 2029 */
2019 error = xfs_qm_dqflush(dqp, 0); 2030 error = xfs_qm_dqflush(dqp, 0);
2020 if (error) { 2031 if (error) {
2021 xfs_fs_cmn_err(CE_WARN, dqp->q_mount, 2032 xfs_fs_cmn_err(CE_WARN, mp,
2022 "xfs_qm_dqflush_all: dquot %p flush failed", dqp); 2033 "xfs_qm_dqreclaim: dquot %p flush failed", dqp);
2023 } 2034 }
2024 xfs_dqunlock(dqp); /* dqflush unlocks dqflock */ 2035 xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2025 dqp = dqp->dq_flnext;
2026 continue; 2036 continue;
2027 } 2037 }
2038
2028 /* 2039 /*
2029 * We're trying to get the hashlock out of order. This races 2040 * We're trying to get the hashlock out of order. This races
2030 * with dqlookup; so, we giveup and goto the next dquot if 2041 * with dqlookup; so, we giveup and goto the next dquot if
@@ -2033,56 +2044,74 @@ xfs_qm_shake_freelist(
2033 * waiting for the freelist lock. 2044 * waiting for the freelist lock.
2034 */ 2045 */
2035 if (!mutex_trylock(&dqp->q_hash->qh_lock)) { 2046 if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
2036 xfs_dqfunlock(dqp); 2047 restarts++;
2037 xfs_dqunlock(dqp); 2048 goto dqfunlock;
2038 dqp = dqp->dq_flnext;
2039 continue;
2040 } 2049 }
2050
2041 /* 2051 /*
2042 * This races with dquot allocation code as well as dqflush_all 2052 * This races with dquot allocation code as well as dqflush_all
2043 * and reclaim code. So, if we failed to grab the mplist lock, 2053 * and reclaim code. So, if we failed to grab the mplist lock,
2044 * giveup everything and start over. 2054 * giveup everything and start over.
2045 */ 2055 */
2046 hash = dqp->q_hash; 2056 if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
2047 ASSERT(hash); 2057 restarts++;
2048 if (! xfs_qm_mplist_nowait(dqp->q_mount)) { 2058 mutex_unlock(&dqp->q_hash->qh_lock);
2049 /* XXX put a sentinel so that we can come back here */
2050 xfs_dqfunlock(dqp); 2059 xfs_dqfunlock(dqp);
2051 xfs_dqunlock(dqp); 2060 xfs_dqunlock(dqp);
2052 mutex_unlock(&hash->qh_lock); 2061 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
2053 xfs_qm_freelist_unlock(xfs_Gqm); 2062 if (restarts++ >= XFS_QM_RECLAIM_MAX_RESTARTS)
2054 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) 2063 return NULL;
2055 return nreclaimed; 2064 goto startagain;
2056 goto tryagain;
2057 } 2065 }
2058 2066
2059 trace_xfs_dqshake_unlink(dqp);
2060
2061#ifdef QUOTADEBUG
2062 cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n",
2063 dqp, be32_to_cpu(dqp->q_core.d_id));
2064#endif
2065 ASSERT(dqp->q_nrefs == 0); 2067 ASSERT(dqp->q_nrefs == 0);
2066 nextdqp = dqp->dq_flnext; 2068 list_del_init(&dqp->q_mplist);
2067 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp); 2069 mp->m_quotainfo->qi_dquots--;
2068 XQM_HASHLIST_REMOVE(hash, dqp); 2070 mp->m_quotainfo->qi_dqreclaims++;
2071 list_del_init(&dqp->q_hashlist);
2072 dqp->q_hash->qh_version++;
2073 list_del_init(&dqp->q_freelist);
2074 xfs_Gqm->qm_dqfrlist_cnt--;
2075 dqpout = dqp;
2076 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
2077 mutex_unlock(&dqp->q_hash->qh_lock);
2078dqfunlock:
2069 xfs_dqfunlock(dqp); 2079 xfs_dqfunlock(dqp);
2070 xfs_qm_mplist_unlock(dqp->q_mount);
2071 mutex_unlock(&hash->qh_lock);
2072
2073 off_freelist:
2074 XQM_FREELIST_REMOVE(dqp);
2075 xfs_dqunlock(dqp); 2080 xfs_dqunlock(dqp);
2076 nreclaimed++; 2081 if (dqpout)
2077 XQM_STATS_INC(xqmstats.xs_qm_dqshake_reclaims); 2082 break;
2083 if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2084 return NULL;
2085 }
2086 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
2087 return dqpout;
2088}
2089
2090/*
2091 * Traverse the freelist of dquots and attempt to reclaim a maximum of
2092 * 'howmany' dquots. This operation races with dqlookup(), and attempts to
2093 * favor the lookup function ...
2094 */
2095STATIC int
2096xfs_qm_shake_freelist(
2097 int howmany)
2098{
2099 int nreclaimed = 0;
2100 xfs_dquot_t *dqp;
2101
2102 if (howmany <= 0)
2103 return 0;
2104
2105 while (nreclaimed < howmany) {
2106 dqp = xfs_qm_dqreclaim_one();
2107 if (!dqp)
2108 return nreclaimed;
2078 xfs_qm_dqdestroy(dqp); 2109 xfs_qm_dqdestroy(dqp);
2079 dqp = nextdqp; 2110 nreclaimed++;
2080 } 2111 }
2081 xfs_qm_freelist_unlock(xfs_Gqm);
2082 return nreclaimed; 2112 return nreclaimed;
2083} 2113}
2084 2114
2085
2086/* 2115/*
2087 * The kmem_shake interface is invoked when memory is running low. 2116 * The kmem_shake interface is invoked when memory is running low.
2088 */ 2117 */
@@ -2097,7 +2126,7 @@ xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
2097 if (!xfs_Gqm) 2126 if (!xfs_Gqm)
2098 return 0; 2127 return 0;
2099 2128
2100 nfree = xfs_Gqm->qm_dqfreelist.qh_nelems; /* free dquots */ 2129 nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
2101 /* incore dquots in all f/s's */ 2130 /* incore dquots in all f/s's */
2102 ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree; 2131 ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
2103 2132
@@ -2113,131 +2142,6 @@ xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
2113} 2142}
2114 2143
2115 2144
2116/*
2117 * Just pop the least recently used dquot off the freelist and
2118 * recycle it. The returned dquot is locked.
2119 */
2120STATIC xfs_dquot_t *
2121xfs_qm_dqreclaim_one(void)
2122{
2123 xfs_dquot_t *dqpout;
2124 xfs_dquot_t *dqp;
2125 int restarts;
2126 int nflushes;
2127
2128 restarts = 0;
2129 dqpout = NULL;
2130 nflushes = 0;
2131
2132 /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
2133 startagain:
2134 xfs_qm_freelist_lock(xfs_Gqm);
2135
2136 FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) {
2137 xfs_dqlock(dqp);
2138
2139 /*
2140 * We are racing with dqlookup here. Naturally we don't
2141 * want to reclaim a dquot that lookup wants. We release the
2142 * freelist lock and start over, so that lookup will grab
2143 * both the dquot and the freelistlock.
2144 */
2145 if (dqp->dq_flags & XFS_DQ_WANT) {
2146 ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
2147
2148 trace_xfs_dqreclaim_want(dqp);
2149
2150 xfs_dqunlock(dqp);
2151 xfs_qm_freelist_unlock(xfs_Gqm);
2152 if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2153 return NULL;
2154 XQM_STATS_INC(xqmstats.xs_qm_dqwants);
2155 goto startagain;
2156 }
2157
2158 /*
2159 * If the dquot is inactive, we are assured that it is
2160 * not on the mplist or the hashlist, and that makes our
2161 * life easier.
2162 */
2163 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
2164 ASSERT(dqp->q_mount == NULL);
2165 ASSERT(! XFS_DQ_IS_DIRTY(dqp));
2166 ASSERT(dqp->HL_PREVP == NULL);
2167 ASSERT(dqp->MPL_PREVP == NULL);
2168 XQM_FREELIST_REMOVE(dqp);
2169 xfs_dqunlock(dqp);
2170 dqpout = dqp;
2171 XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
2172 break;
2173 }
2174
2175 ASSERT(dqp->q_hash);
2176 ASSERT(dqp->MPL_PREVP);
2177
2178 /*
2179 * Try to grab the flush lock. If this dquot is in the process of
2180 * getting flushed to disk, we don't want to reclaim it.
2181 */
2182 if (!xfs_dqflock_nowait(dqp)) {
2183 xfs_dqunlock(dqp);
2184 continue;
2185 }
2186
2187 /*
2188 * We have the flush lock so we know that this is not in the
2189 * process of being flushed. So, if this is dirty, flush it
2190 * DELWRI so that we don't get a freelist infested with
2191 * dirty dquots.
2192 */
2193 if (XFS_DQ_IS_DIRTY(dqp)) {
2194 int error;
2195
2196 trace_xfs_dqreclaim_dirty(dqp);
2197
2198 /*
2199 * We flush it delayed write, so don't bother
2200 * releasing the freelist lock.
2201 */
2202 error = xfs_qm_dqflush(dqp, 0);
2203 if (error) {
2204 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
2205 "xfs_qm_dqreclaim: dquot %p flush failed", dqp);
2206 }
2207 xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2208 continue;
2209 }
2210
2211 if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
2212 xfs_dqfunlock(dqp);
2213 xfs_dqunlock(dqp);
2214 continue;
2215 }
2216
2217 if (!mutex_trylock(&dqp->q_hash->qh_lock))
2218 goto mplistunlock;
2219
2220 trace_xfs_dqreclaim_unlink(dqp);
2221
2222 ASSERT(dqp->q_nrefs == 0);
2223 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
2224 XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
2225 XQM_FREELIST_REMOVE(dqp);
2226 dqpout = dqp;
2227 mutex_unlock(&dqp->q_hash->qh_lock);
2228 mplistunlock:
2229 xfs_qm_mplist_unlock(dqp->q_mount);
2230 xfs_dqfunlock(dqp);
2231 xfs_dqunlock(dqp);
2232 if (dqpout)
2233 break;
2234 }
2235
2236 xfs_qm_freelist_unlock(xfs_Gqm);
2237 return dqpout;
2238}
2239
2240
2241/*------------------------------------------------------------------*/ 2145/*------------------------------------------------------------------*/
2242 2146
2243/* 2147/*
@@ -2662,66 +2566,3 @@ xfs_qm_vop_create_dqattach(
2662 } 2566 }
2663} 2567}
2664 2568
2665/* ------------- list stuff -----------------*/
2666STATIC void
2667xfs_qm_freelist_init(xfs_frlist_t *ql)
2668{
2669 ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql;
2670 mutex_init(&ql->qh_lock);
2671 ql->qh_version = 0;
2672 ql->qh_nelems = 0;
2673}
2674
2675STATIC void
2676xfs_qm_freelist_destroy(xfs_frlist_t *ql)
2677{
2678 xfs_dquot_t *dqp, *nextdqp;
2679
2680 mutex_lock(&ql->qh_lock);
2681 for (dqp = ql->qh_next;
2682 dqp != (xfs_dquot_t *)ql; ) {
2683 xfs_dqlock(dqp);
2684 nextdqp = dqp->dq_flnext;
2685#ifdef QUOTADEBUG
2686 cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp);
2687#endif
2688 XQM_FREELIST_REMOVE(dqp);
2689 xfs_dqunlock(dqp);
2690 xfs_qm_dqdestroy(dqp);
2691 dqp = nextdqp;
2692 }
2693 mutex_unlock(&ql->qh_lock);
2694 mutex_destroy(&ql->qh_lock);
2695
2696 ASSERT(ql->qh_nelems == 0);
2697}
2698
2699STATIC void
2700xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq)
2701{
2702 dq->dq_flnext = ql->qh_next;
2703 dq->dq_flprev = (xfs_dquot_t *)ql;
2704 ql->qh_next = dq;
2705 dq->dq_flnext->dq_flprev = dq;
2706 xfs_Gqm->qm_dqfreelist.qh_nelems++;
2707 xfs_Gqm->qm_dqfreelist.qh_version++;
2708}
2709
2710void
2711xfs_qm_freelist_unlink(xfs_dquot_t *dq)
2712{
2713 xfs_dquot_t *next = dq->dq_flnext;
2714 xfs_dquot_t *prev = dq->dq_flprev;
2715
2716 next->dq_flprev = prev;
2717 prev->dq_flnext = next;
2718 dq->dq_flnext = dq->dq_flprev = dq;
2719 xfs_Gqm->qm_dqfreelist.qh_nelems--;
2720 xfs_Gqm->qm_dqfreelist.qh_version++;
2721}
2722
2723void
2724xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq)
2725{
2726 xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq);
2727}
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index 495564b8af38..c9446f1c726d 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -72,17 +72,6 @@ extern kmem_zone_t *qm_dqtrxzone;
72#define XFS_QM_MAX_DQCLUSTER_LOGSZ 3 72#define XFS_QM_MAX_DQCLUSTER_LOGSZ 3
73 73
74typedef xfs_dqhash_t xfs_dqlist_t; 74typedef xfs_dqhash_t xfs_dqlist_t;
75/*
76 * The freelist head. The first two fields match the first two in the
77 * xfs_dquot_t structure (in xfs_dqmarker_t)
78 */
79typedef struct xfs_frlist {
80 struct xfs_dquot *qh_next;
81 struct xfs_dquot *qh_prev;
82 struct mutex qh_lock;
83 uint qh_version;
84 uint qh_nelems;
85} xfs_frlist_t;
86 75
87/* 76/*
88 * Quota Manager (global) structure. Lives only in core. 77 * Quota Manager (global) structure. Lives only in core.
@@ -91,7 +80,9 @@ typedef struct xfs_qm {
91 xfs_dqlist_t *qm_usr_dqhtable;/* udquot hash table */ 80 xfs_dqlist_t *qm_usr_dqhtable;/* udquot hash table */
92 xfs_dqlist_t *qm_grp_dqhtable;/* gdquot hash table */ 81 xfs_dqlist_t *qm_grp_dqhtable;/* gdquot hash table */
93 uint qm_dqhashmask; /* # buckets in dq hashtab - 1 */ 82 uint qm_dqhashmask; /* # buckets in dq hashtab - 1 */
94 xfs_frlist_t qm_dqfreelist; /* freelist of dquots */ 83 struct list_head qm_dqfrlist; /* freelist of dquots */
84 struct mutex qm_dqfrlist_lock;
85 int qm_dqfrlist_cnt;
95 atomic_t qm_totaldquots; /* total incore dquots */ 86 atomic_t qm_totaldquots; /* total incore dquots */
96 uint qm_nrefs; /* file systems with quota on */ 87 uint qm_nrefs; /* file systems with quota on */
97 int qm_dqfree_ratio;/* ratio of free to inuse dquots */ 88 int qm_dqfree_ratio;/* ratio of free to inuse dquots */
@@ -106,7 +97,9 @@ typedef struct xfs_qm {
106typedef struct xfs_quotainfo { 97typedef struct xfs_quotainfo {
107 xfs_inode_t *qi_uquotaip; /* user quota inode */ 98 xfs_inode_t *qi_uquotaip; /* user quota inode */
108 xfs_inode_t *qi_gquotaip; /* group quota inode */ 99 xfs_inode_t *qi_gquotaip; /* group quota inode */
109 xfs_dqlist_t qi_dqlist; /* all dquots in filesys */ 100 struct list_head qi_dqlist; /* all dquots in filesys */
101 struct mutex qi_dqlist_lock;
102 int qi_dquots;
110 int qi_dqreclaims; /* a change here indicates 103 int qi_dqreclaims; /* a change here indicates
111 a removal in the dqlist */ 104 a removal in the dqlist */
112 time_t qi_btimelimit; /* limit for blks timer */ 105 time_t qi_btimelimit; /* limit for blks timer */
@@ -175,10 +168,6 @@ extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
175extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); 168extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint);
176extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); 169extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
177 170
178/* list stuff */
179extern void xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *);
180extern void xfs_qm_freelist_unlink(xfs_dquot_t *);
181
182#ifdef DEBUG 171#ifdef DEBUG
183extern int xfs_qm_internalqcheck(xfs_mount_t *); 172extern int xfs_qm_internalqcheck(xfs_mount_t *);
184#else 173#else
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
index 83e7ea3e25fa..3d1fc79532e2 100644
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -55,7 +55,7 @@ static int xqm_proc_show(struct seq_file *m, void *v)
55 ndquot, 55 ndquot,
56 xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, 56 xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
57 xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, 57 xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
58 xfs_Gqm? xfs_Gqm->qm_dqfreelist.qh_nelems : 0); 58 xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
59 return 0; 59 return 0;
60} 60}
61 61
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 5d0ee8d492db..92b002f1805f 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -79,6 +79,7 @@ xfs_qm_scall_quotaoff(
79 xfs_mount_t *mp, 79 xfs_mount_t *mp,
80 uint flags) 80 uint flags)
81{ 81{
82 struct xfs_quotainfo *q = mp->m_quotainfo;
82 uint dqtype; 83 uint dqtype;
83 int error; 84 int error;
84 uint inactivate_flags; 85 uint inactivate_flags;
@@ -102,11 +103,8 @@ xfs_qm_scall_quotaoff(
102 * critical thing. 103 * critical thing.
103 * If quotaoff, then we must be dealing with the root filesystem. 104 * If quotaoff, then we must be dealing with the root filesystem.
104 */ 105 */
105 ASSERT(mp->m_quotainfo); 106 ASSERT(q);
106 if (mp->m_quotainfo) 107 mutex_lock(&q->qi_quotaofflock);
107 mutex_lock(&(XFS_QI_QOFFLOCK(mp)));
108
109 ASSERT(mp->m_quotainfo);
110 108
111 /* 109 /*
112 * If we're just turning off quota enforcement, change mp and go. 110 * If we're just turning off quota enforcement, change mp and go.
@@ -117,7 +115,7 @@ xfs_qm_scall_quotaoff(
117 spin_lock(&mp->m_sb_lock); 115 spin_lock(&mp->m_sb_lock);
118 mp->m_sb.sb_qflags = mp->m_qflags; 116 mp->m_sb.sb_qflags = mp->m_qflags;
119 spin_unlock(&mp->m_sb_lock); 117 spin_unlock(&mp->m_sb_lock);
120 mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); 118 mutex_unlock(&q->qi_quotaofflock);
121 119
122 /* XXX what to do if error ? Revert back to old vals incore ? */ 120 /* XXX what to do if error ? Revert back to old vals incore ? */
123 error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS); 121 error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS);
@@ -150,10 +148,8 @@ xfs_qm_scall_quotaoff(
150 * Nothing to do? Don't complain. This happens when we're just 148 * Nothing to do? Don't complain. This happens when we're just
151 * turning off quota enforcement. 149 * turning off quota enforcement.
152 */ 150 */
153 if ((mp->m_qflags & flags) == 0) { 151 if ((mp->m_qflags & flags) == 0)
154 mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); 152 goto out_unlock;
155 return (0);
156 }
157 153
158 /* 154 /*
159 * Write the LI_QUOTAOFF log record, and do SB changes atomically, 155 * Write the LI_QUOTAOFF log record, and do SB changes atomically,
@@ -162,7 +158,7 @@ xfs_qm_scall_quotaoff(
162 */ 158 */
163 error = xfs_qm_log_quotaoff(mp, &qoffstart, flags); 159 error = xfs_qm_log_quotaoff(mp, &qoffstart, flags);
164 if (error) 160 if (error)
165 goto out_error; 161 goto out_unlock;
166 162
167 /* 163 /*
168 * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct 164 * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct
@@ -204,7 +200,7 @@ xfs_qm_scall_quotaoff(
204 * So, if we couldn't purge all the dquots from the filesystem, 200 * So, if we couldn't purge all the dquots from the filesystem,
205 * we can't get rid of the incore data structures. 201 * we can't get rid of the incore data structures.
206 */ 202 */
207 while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype|XFS_QMOPT_QUOTAOFF))) 203 while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype)))
208 delay(10 * nculprits); 204 delay(10 * nculprits);
209 205
210 /* 206 /*
@@ -222,7 +218,7 @@ xfs_qm_scall_quotaoff(
222 if (error) { 218 if (error) {
223 /* We're screwed now. Shutdown is the only option. */ 219 /* We're screwed now. Shutdown is the only option. */
224 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 220 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
225 goto out_error; 221 goto out_unlock;
226 } 222 }
227 223
228 /* 224 /*
@@ -230,27 +226,26 @@ xfs_qm_scall_quotaoff(
230 */ 226 */
231 if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) || 227 if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) ||
232 ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) { 228 ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) {
233 mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); 229 mutex_unlock(&q->qi_quotaofflock);
234 xfs_qm_destroy_quotainfo(mp); 230 xfs_qm_destroy_quotainfo(mp);
235 return (0); 231 return (0);
236 } 232 }
237 233
238 /* 234 /*
239 * Release our quotainode references, and vn_purge them, 235 * Release our quotainode references if we don't need them anymore.
240 * if we don't need them anymore.
241 */ 236 */
242 if ((dqtype & XFS_QMOPT_UQUOTA) && XFS_QI_UQIP(mp)) { 237 if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) {
243 IRELE(XFS_QI_UQIP(mp)); 238 IRELE(q->qi_uquotaip);
244 XFS_QI_UQIP(mp) = NULL; 239 q->qi_uquotaip = NULL;
245 } 240 }
246 if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) { 241 if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && q->qi_gquotaip) {
247 IRELE(XFS_QI_GQIP(mp)); 242 IRELE(q->qi_gquotaip);
248 XFS_QI_GQIP(mp) = NULL; 243 q->qi_gquotaip = NULL;
249 } 244 }
250out_error:
251 mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
252 245
253 return (error); 246out_unlock:
247 mutex_unlock(&q->qi_quotaofflock);
248 return error;
254} 249}
255 250
256int 251int
@@ -379,9 +374,9 @@ xfs_qm_scall_quotaon(
379 /* 374 /*
380 * Switch on quota enforcement in core. 375 * Switch on quota enforcement in core.
381 */ 376 */
382 mutex_lock(&(XFS_QI_QOFFLOCK(mp))); 377 mutex_lock(&mp->m_quotainfo->qi_quotaofflock);
383 mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD); 378 mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
384 mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); 379 mutex_unlock(&mp->m_quotainfo->qi_quotaofflock);
385 380
386 return (0); 381 return (0);
387} 382}
@@ -392,11 +387,12 @@ xfs_qm_scall_quotaon(
392 */ 387 */
393int 388int
394xfs_qm_scall_getqstat( 389xfs_qm_scall_getqstat(
395 xfs_mount_t *mp, 390 struct xfs_mount *mp,
396 fs_quota_stat_t *out) 391 struct fs_quota_stat *out)
397{ 392{
398 xfs_inode_t *uip, *gip; 393 struct xfs_quotainfo *q = mp->m_quotainfo;
399 boolean_t tempuqip, tempgqip; 394 struct xfs_inode *uip, *gip;
395 boolean_t tempuqip, tempgqip;
400 396
401 uip = gip = NULL; 397 uip = gip = NULL;
402 tempuqip = tempgqip = B_FALSE; 398 tempuqip = tempgqip = B_FALSE;
@@ -415,9 +411,9 @@ xfs_qm_scall_getqstat(
415 out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino; 411 out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
416 out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino; 412 out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
417 413
418 if (mp->m_quotainfo) { 414 if (q) {
419 uip = mp->m_quotainfo->qi_uquotaip; 415 uip = q->qi_uquotaip;
420 gip = mp->m_quotainfo->qi_gquotaip; 416 gip = q->qi_gquotaip;
421 } 417 }
422 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { 418 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
423 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 419 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
@@ -441,17 +437,20 @@ xfs_qm_scall_getqstat(
441 if (tempgqip) 437 if (tempgqip)
442 IRELE(gip); 438 IRELE(gip);
443 } 439 }
444 if (mp->m_quotainfo) { 440 if (q) {
445 out->qs_incoredqs = XFS_QI_MPLNDQUOTS(mp); 441 out->qs_incoredqs = q->qi_dquots;
446 out->qs_btimelimit = XFS_QI_BTIMELIMIT(mp); 442 out->qs_btimelimit = q->qi_btimelimit;
447 out->qs_itimelimit = XFS_QI_ITIMELIMIT(mp); 443 out->qs_itimelimit = q->qi_itimelimit;
448 out->qs_rtbtimelimit = XFS_QI_RTBTIMELIMIT(mp); 444 out->qs_rtbtimelimit = q->qi_rtbtimelimit;
449 out->qs_bwarnlimit = XFS_QI_BWARNLIMIT(mp); 445 out->qs_bwarnlimit = q->qi_bwarnlimit;
450 out->qs_iwarnlimit = XFS_QI_IWARNLIMIT(mp); 446 out->qs_iwarnlimit = q->qi_iwarnlimit;
451 } 447 }
452 return (0); 448 return 0;
453} 449}
454 450
451#define XFS_DQ_MASK \
452 (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK)
453
455/* 454/*
456 * Adjust quota limits, and start/stop timers accordingly. 455 * Adjust quota limits, and start/stop timers accordingly.
457 */ 456 */
@@ -462,15 +461,17 @@ xfs_qm_scall_setqlim(
462 uint type, 461 uint type,
463 fs_disk_quota_t *newlim) 462 fs_disk_quota_t *newlim)
464{ 463{
464 struct xfs_quotainfo *q = mp->m_quotainfo;
465 xfs_disk_dquot_t *ddq; 465 xfs_disk_dquot_t *ddq;
466 xfs_dquot_t *dqp; 466 xfs_dquot_t *dqp;
467 xfs_trans_t *tp; 467 xfs_trans_t *tp;
468 int error; 468 int error;
469 xfs_qcnt_t hard, soft; 469 xfs_qcnt_t hard, soft;
470 470
471 if ((newlim->d_fieldmask & 471 if (newlim->d_fieldmask & ~XFS_DQ_MASK)
472 (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK|FS_DQ_WARNS_MASK)) == 0) 472 return EINVAL;
473 return (0); 473 if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
474 return 0;
474 475
475 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); 476 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
476 if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, 477 if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
@@ -485,7 +486,7 @@ xfs_qm_scall_setqlim(
485 * a quotaoff from happening). (XXXThis doesn't currently happen 486 * a quotaoff from happening). (XXXThis doesn't currently happen
486 * because we take the vfslock before calling xfs_qm_sysent). 487 * because we take the vfslock before calling xfs_qm_sysent).
487 */ 488 */
488 mutex_lock(&(XFS_QI_QOFFLOCK(mp))); 489 mutex_lock(&q->qi_quotaofflock);
489 490
490 /* 491 /*
491 * Get the dquot (locked), and join it to the transaction. 492 * Get the dquot (locked), and join it to the transaction.
@@ -493,9 +494,8 @@ xfs_qm_scall_setqlim(
493 */ 494 */
494 if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) { 495 if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) {
495 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 496 xfs_trans_cancel(tp, XFS_TRANS_ABORT);
496 mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
497 ASSERT(error != ENOENT); 497 ASSERT(error != ENOENT);
498 return (error); 498 goto out_unlock;
499 } 499 }
500 xfs_trans_dqjoin(tp, dqp); 500 xfs_trans_dqjoin(tp, dqp);
501 ddq = &dqp->q_core; 501 ddq = &dqp->q_core;
@@ -513,8 +513,8 @@ xfs_qm_scall_setqlim(
513 ddq->d_blk_hardlimit = cpu_to_be64(hard); 513 ddq->d_blk_hardlimit = cpu_to_be64(hard);
514 ddq->d_blk_softlimit = cpu_to_be64(soft); 514 ddq->d_blk_softlimit = cpu_to_be64(soft);
515 if (id == 0) { 515 if (id == 0) {
516 mp->m_quotainfo->qi_bhardlimit = hard; 516 q->qi_bhardlimit = hard;
517 mp->m_quotainfo->qi_bsoftlimit = soft; 517 q->qi_bsoftlimit = soft;
518 } 518 }
519 } else { 519 } else {
520 qdprintk("blkhard %Ld < blksoft %Ld\n", hard, soft); 520 qdprintk("blkhard %Ld < blksoft %Ld\n", hard, soft);
@@ -529,8 +529,8 @@ xfs_qm_scall_setqlim(
529 ddq->d_rtb_hardlimit = cpu_to_be64(hard); 529 ddq->d_rtb_hardlimit = cpu_to_be64(hard);
530 ddq->d_rtb_softlimit = cpu_to_be64(soft); 530 ddq->d_rtb_softlimit = cpu_to_be64(soft);
531 if (id == 0) { 531 if (id == 0) {
532 mp->m_quotainfo->qi_rtbhardlimit = hard; 532 q->qi_rtbhardlimit = hard;
533 mp->m_quotainfo->qi_rtbsoftlimit = soft; 533 q->qi_rtbsoftlimit = soft;
534 } 534 }
535 } else { 535 } else {
536 qdprintk("rtbhard %Ld < rtbsoft %Ld\n", hard, soft); 536 qdprintk("rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
@@ -546,8 +546,8 @@ xfs_qm_scall_setqlim(
546 ddq->d_ino_hardlimit = cpu_to_be64(hard); 546 ddq->d_ino_hardlimit = cpu_to_be64(hard);
547 ddq->d_ino_softlimit = cpu_to_be64(soft); 547 ddq->d_ino_softlimit = cpu_to_be64(soft);
548 if (id == 0) { 548 if (id == 0) {
549 mp->m_quotainfo->qi_ihardlimit = hard; 549 q->qi_ihardlimit = hard;
550 mp->m_quotainfo->qi_isoftlimit = soft; 550 q->qi_isoftlimit = soft;
551 } 551 }
552 } else { 552 } else {
553 qdprintk("ihard %Ld < isoft %Ld\n", hard, soft); 553 qdprintk("ihard %Ld < isoft %Ld\n", hard, soft);
@@ -572,23 +572,23 @@ xfs_qm_scall_setqlim(
572 * for warnings. 572 * for warnings.
573 */ 573 */
574 if (newlim->d_fieldmask & FS_DQ_BTIMER) { 574 if (newlim->d_fieldmask & FS_DQ_BTIMER) {
575 mp->m_quotainfo->qi_btimelimit = newlim->d_btimer; 575 q->qi_btimelimit = newlim->d_btimer;
576 ddq->d_btimer = cpu_to_be32(newlim->d_btimer); 576 ddq->d_btimer = cpu_to_be32(newlim->d_btimer);
577 } 577 }
578 if (newlim->d_fieldmask & FS_DQ_ITIMER) { 578 if (newlim->d_fieldmask & FS_DQ_ITIMER) {
579 mp->m_quotainfo->qi_itimelimit = newlim->d_itimer; 579 q->qi_itimelimit = newlim->d_itimer;
580 ddq->d_itimer = cpu_to_be32(newlim->d_itimer); 580 ddq->d_itimer = cpu_to_be32(newlim->d_itimer);
581 } 581 }
582 if (newlim->d_fieldmask & FS_DQ_RTBTIMER) { 582 if (newlim->d_fieldmask & FS_DQ_RTBTIMER) {
583 mp->m_quotainfo->qi_rtbtimelimit = newlim->d_rtbtimer; 583 q->qi_rtbtimelimit = newlim->d_rtbtimer;
584 ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer); 584 ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer);
585 } 585 }
586 if (newlim->d_fieldmask & FS_DQ_BWARNS) 586 if (newlim->d_fieldmask & FS_DQ_BWARNS)
587 mp->m_quotainfo->qi_bwarnlimit = newlim->d_bwarns; 587 q->qi_bwarnlimit = newlim->d_bwarns;
588 if (newlim->d_fieldmask & FS_DQ_IWARNS) 588 if (newlim->d_fieldmask & FS_DQ_IWARNS)
589 mp->m_quotainfo->qi_iwarnlimit = newlim->d_iwarns; 589 q->qi_iwarnlimit = newlim->d_iwarns;
590 if (newlim->d_fieldmask & FS_DQ_RTBWARNS) 590 if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
591 mp->m_quotainfo->qi_rtbwarnlimit = newlim->d_rtbwarns; 591 q->qi_rtbwarnlimit = newlim->d_rtbwarns;
592 } else { 592 } else {
593 /* 593 /*
594 * If the user is now over quota, start the timelimit. 594 * If the user is now over quota, start the timelimit.
@@ -605,8 +605,9 @@ xfs_qm_scall_setqlim(
605 error = xfs_trans_commit(tp, 0); 605 error = xfs_trans_commit(tp, 0);
606 xfs_qm_dqprint(dqp); 606 xfs_qm_dqprint(dqp);
607 xfs_qm_dqrele(dqp); 607 xfs_qm_dqrele(dqp);
608 mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
609 608
609 out_unlock:
610 mutex_unlock(&q->qi_quotaofflock);
610 return error; 611 return error;
611} 612}
612 613
@@ -853,7 +854,8 @@ xfs_dqrele_inode(
853 int error; 854 int error;
854 855
855 /* skip quota inodes */ 856 /* skip quota inodes */
856 if (ip == XFS_QI_UQIP(ip->i_mount) || ip == XFS_QI_GQIP(ip->i_mount)) { 857 if (ip == ip->i_mount->m_quotainfo->qi_uquotaip ||
858 ip == ip->i_mount->m_quotainfo->qi_gquotaip) {
857 ASSERT(ip->i_udquot == NULL); 859 ASSERT(ip->i_udquot == NULL);
858 ASSERT(ip->i_gdquot == NULL); 860 ASSERT(ip->i_gdquot == NULL);
859 read_unlock(&pag->pag_ici_lock); 861 read_unlock(&pag->pag_ici_lock);
@@ -891,7 +893,8 @@ xfs_qm_dqrele_all_inodes(
891 uint flags) 893 uint flags)
892{ 894{
893 ASSERT(mp->m_quotainfo); 895 ASSERT(mp->m_quotainfo);
894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0); 896 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags,
897 XFS_ICI_NO_TAG, 0, NULL);
895} 898}
896 899
897/*------------------------------------------------------------------------*/ 900/*------------------------------------------------------------------------*/
@@ -930,7 +933,8 @@ struct mutex qcheck_lock;
930} 933}
931 934
932typedef struct dqtest { 935typedef struct dqtest {
933 xfs_dqmarker_t q_lists; 936 uint dq_flags; /* various flags (XFS_DQ_*) */
937 struct list_head q_hashlist;
934 xfs_dqhash_t *q_hash; /* the hashchain header */ 938 xfs_dqhash_t *q_hash; /* the hashchain header */
935 xfs_mount_t *q_mount; /* filesystem this relates to */ 939 xfs_mount_t *q_mount; /* filesystem this relates to */
936 xfs_dqid_t d_id; /* user id or group id */ 940 xfs_dqid_t d_id; /* user id or group id */
@@ -941,14 +945,9 @@ typedef struct dqtest {
941STATIC void 945STATIC void
942xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp) 946xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp)
943{ 947{
944 xfs_dquot_t *d; 948 list_add(&dqp->q_hashlist, &h->qh_list);
945 if (((d) = (h)->qh_next)) 949 h->qh_version++;
946 (d)->HL_PREVP = &((dqp)->HL_NEXT); 950 h->qh_nelems++;
947 (dqp)->HL_NEXT = d;
948 (dqp)->HL_PREVP = &((h)->qh_next);
949 (h)->qh_next = (xfs_dquot_t *)dqp;
950 (h)->qh_version++;
951 (h)->qh_nelems++;
952} 951}
953STATIC void 952STATIC void
954xfs_qm_dqtest_print( 953xfs_qm_dqtest_print(
@@ -1060,9 +1059,7 @@ xfs_qm_internalqcheck_dqget(
1060 xfs_dqhash_t *h; 1059 xfs_dqhash_t *h;
1061 1060
1062 h = DQTEST_HASH(mp, id, type); 1061 h = DQTEST_HASH(mp, id, type);
1063 for (d = (xfs_dqtest_t *) h->qh_next; d != NULL; 1062 list_for_each_entry(d, &h->qh_list, q_hashlist) {
1064 d = (xfs_dqtest_t *) d->HL_NEXT) {
1065 /* DQTEST_LIST_PRINT(h, HL_NEXT, "@@@@@ dqtestlist @@@@@"); */
1066 if (d->d_id == id && mp == d->q_mount) { 1063 if (d->d_id == id && mp == d->q_mount) {
1067 *O_dq = d; 1064 *O_dq = d;
1068 return (0); 1065 return (0);
@@ -1073,6 +1070,7 @@ xfs_qm_internalqcheck_dqget(
1073 d->d_id = id; 1070 d->d_id = id;
1074 d->q_mount = mp; 1071 d->q_mount = mp;
1075 d->q_hash = h; 1072 d->q_hash = h;
1073 INIT_LIST_HEAD(&d->q_hashlist);
1076 xfs_qm_hashinsert(h, d); 1074 xfs_qm_hashinsert(h, d);
1077 *O_dq = d; 1075 *O_dq = d;
1078 return (0); 1076 return (0);
@@ -1179,8 +1177,6 @@ xfs_qm_internalqcheck(
1179 xfs_ino_t lastino; 1177 xfs_ino_t lastino;
1180 int done, count; 1178 int done, count;
1181 int i; 1179 int i;
1182 xfs_dqtest_t *d, *e;
1183 xfs_dqhash_t *h1;
1184 int error; 1180 int error;
1185 1181
1186 lastino = 0; 1182 lastino = 0;
@@ -1220,19 +1216,18 @@ xfs_qm_internalqcheck(
1220 } 1216 }
1221 cmn_err(CE_DEBUG, "Checking results against system dquots"); 1217 cmn_err(CE_DEBUG, "Checking results against system dquots");
1222 for (i = 0; i < qmtest_hashmask; i++) { 1218 for (i = 0; i < qmtest_hashmask; i++) {
1223 h1 = &qmtest_udqtab[i]; 1219 xfs_dqtest_t *d, *n;
1224 for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) { 1220 xfs_dqhash_t *h;
1221
1222 h = &qmtest_udqtab[i];
1223 list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) {
1225 xfs_dqtest_cmp(d); 1224 xfs_dqtest_cmp(d);
1226 e = (xfs_dqtest_t *) d->HL_NEXT;
1227 kmem_free(d); 1225 kmem_free(d);
1228 d = e;
1229 } 1226 }
1230 h1 = &qmtest_gdqtab[i]; 1227 h = &qmtest_gdqtab[i];
1231 for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) { 1228 list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) {
1232 xfs_dqtest_cmp(d); 1229 xfs_dqtest_cmp(d);
1233 e = (xfs_dqtest_t *) d->HL_NEXT;
1234 kmem_free(d); 1230 kmem_free(d);
1235 d = e;
1236 } 1231 }
1237 } 1232 }
1238 1233
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index 8286b2842b6b..94a3d927d716 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -24,43 +24,6 @@
24 */ 24 */
25#define XFS_DQITER_MAP_SIZE 10 25#define XFS_DQITER_MAP_SIZE 10
26 26
27/* Number of dquots that fit in to a dquot block */
28#define XFS_QM_DQPERBLK(mp) ((mp)->m_quotainfo->qi_dqperchunk)
29
30#define XFS_DQ_IS_ADDEDTO_TRX(t, d) ((d)->q_transp == (t))
31
32#define XFS_QI_MPLRECLAIMS(mp) ((mp)->m_quotainfo->qi_dqreclaims)
33#define XFS_QI_UQIP(mp) ((mp)->m_quotainfo->qi_uquotaip)
34#define XFS_QI_GQIP(mp) ((mp)->m_quotainfo->qi_gquotaip)
35#define XFS_QI_DQCHUNKLEN(mp) ((mp)->m_quotainfo->qi_dqchunklen)
36#define XFS_QI_BTIMELIMIT(mp) ((mp)->m_quotainfo->qi_btimelimit)
37#define XFS_QI_RTBTIMELIMIT(mp) ((mp)->m_quotainfo->qi_rtbtimelimit)
38#define XFS_QI_ITIMELIMIT(mp) ((mp)->m_quotainfo->qi_itimelimit)
39#define XFS_QI_BWARNLIMIT(mp) ((mp)->m_quotainfo->qi_bwarnlimit)
40#define XFS_QI_RTBWARNLIMIT(mp) ((mp)->m_quotainfo->qi_rtbwarnlimit)
41#define XFS_QI_IWARNLIMIT(mp) ((mp)->m_quotainfo->qi_iwarnlimit)
42#define XFS_QI_QOFFLOCK(mp) ((mp)->m_quotainfo->qi_quotaofflock)
43
44#define XFS_QI_MPL_LIST(mp) ((mp)->m_quotainfo->qi_dqlist)
45#define XFS_QI_MPLNEXT(mp) ((mp)->m_quotainfo->qi_dqlist.qh_next)
46#define XFS_QI_MPLNDQUOTS(mp) ((mp)->m_quotainfo->qi_dqlist.qh_nelems)
47
48#define xfs_qm_mplist_lock(mp) \
49 mutex_lock(&(XFS_QI_MPL_LIST(mp).qh_lock))
50#define xfs_qm_mplist_nowait(mp) \
51 mutex_trylock(&(XFS_QI_MPL_LIST(mp).qh_lock))
52#define xfs_qm_mplist_unlock(mp) \
53 mutex_unlock(&(XFS_QI_MPL_LIST(mp).qh_lock))
54#define XFS_QM_IS_MPLIST_LOCKED(mp) \
55 mutex_is_locked(&(XFS_QI_MPL_LIST(mp).qh_lock))
56
57#define xfs_qm_freelist_lock(qm) \
58 mutex_lock(&((qm)->qm_dqfreelist.qh_lock))
59#define xfs_qm_freelist_lock_nowait(qm) \
60 mutex_trylock(&((qm)->qm_dqfreelist.qh_lock))
61#define xfs_qm_freelist_unlock(qm) \
62 mutex_unlock(&((qm)->qm_dqfreelist.qh_lock))
63
64/* 27/*
65 * Hash into a bucket in the dquot hash table, based on <mp, id>. 28 * Hash into a bucket in the dquot hash table, based on <mp, id>.
66 */ 29 */
@@ -72,9 +35,6 @@
72 XFS_DQ_HASHVAL(mp, id)) : \ 35 XFS_DQ_HASHVAL(mp, id)) : \
73 (xfs_Gqm->qm_grp_dqhtable + \ 36 (xfs_Gqm->qm_grp_dqhtable + \
74 XFS_DQ_HASHVAL(mp, id))) 37 XFS_DQ_HASHVAL(mp, id)))
75#define XFS_IS_DQTYPE_ON(mp, type) (type == XFS_DQ_USER ? \
76 XFS_IS_UQUOTA_ON(mp) : \
77 XFS_IS_OQUOTA_ON(mp))
78#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ 38#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
79 !dqp->q_core.d_blk_hardlimit && \ 39 !dqp->q_core.d_blk_hardlimit && \
80 !dqp->q_core.d_blk_softlimit && \ 40 !dqp->q_core.d_blk_softlimit && \
@@ -86,68 +46,6 @@
86 !dqp->q_core.d_rtbcount && \ 46 !dqp->q_core.d_rtbcount && \
87 !dqp->q_core.d_icount) 47 !dqp->q_core.d_icount)
88 48
89#define HL_PREVP dq_hashlist.ql_prevp
90#define HL_NEXT dq_hashlist.ql_next
91#define MPL_PREVP dq_mplist.ql_prevp
92#define MPL_NEXT dq_mplist.ql_next
93
94
95#define _LIST_REMOVE(h, dqp, PVP, NXT) \
96 { \
97 xfs_dquot_t *d; \
98 if (((d) = (dqp)->NXT)) \
99 (d)->PVP = (dqp)->PVP; \
100 *((dqp)->PVP) = d; \
101 (dqp)->NXT = NULL; \
102 (dqp)->PVP = NULL; \
103 (h)->qh_version++; \
104 (h)->qh_nelems--; \
105 }
106
107#define _LIST_INSERT(h, dqp, PVP, NXT) \
108 { \
109 xfs_dquot_t *d; \
110 if (((d) = (h)->qh_next)) \
111 (d)->PVP = &((dqp)->NXT); \
112 (dqp)->NXT = d; \
113 (dqp)->PVP = &((h)->qh_next); \
114 (h)->qh_next = dqp; \
115 (h)->qh_version++; \
116 (h)->qh_nelems++; \
117 }
118
119#define FOREACH_DQUOT_IN_MP(dqp, mp) \
120 for ((dqp) = XFS_QI_MPLNEXT(mp); (dqp) != NULL; (dqp) = (dqp)->MPL_NEXT)
121
122#define FOREACH_DQUOT_IN_FREELIST(dqp, qlist) \
123for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
124 (dqp) = (dqp)->dq_flnext)
125
126#define XQM_HASHLIST_INSERT(h, dqp) \
127 _LIST_INSERT(h, dqp, HL_PREVP, HL_NEXT)
128
129#define XQM_FREELIST_INSERT(h, dqp) \
130 xfs_qm_freelist_append(h, dqp)
131
132#define XQM_MPLIST_INSERT(h, dqp) \
133 _LIST_INSERT(h, dqp, MPL_PREVP, MPL_NEXT)
134
135#define XQM_HASHLIST_REMOVE(h, dqp) \
136 _LIST_REMOVE(h, dqp, HL_PREVP, HL_NEXT)
137#define XQM_FREELIST_REMOVE(dqp) \
138 xfs_qm_freelist_unlink(dqp)
139#define XQM_MPLIST_REMOVE(h, dqp) \
140 { _LIST_REMOVE(h, dqp, MPL_PREVP, MPL_NEXT); \
141 XFS_QI_MPLRECLAIMS((dqp)->q_mount)++; }
142
143#define XFS_DQ_IS_LOGITEM_INITD(dqp) ((dqp)->q_logitem.qli_dquot == (dqp))
144
145#define XFS_QM_DQP_TO_DQACCT(tp, dqp) (XFS_QM_ISUDQ(dqp) ? \
146 (tp)->t_dqinfo->dqa_usrdquots : \
147 (tp)->t_dqinfo->dqa_grpdquots)
148#define XFS_IS_SUSER_DQUOT(dqp) \
149 (!((dqp)->q_core.d_id))
150
151#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ 49#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
152 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ 50 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
153 (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) 51 (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index c3ab75cb1d9a..061d827da33c 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -59,12 +59,11 @@ xfs_trans_dqjoin(
59 xfs_trans_t *tp, 59 xfs_trans_t *tp,
60 xfs_dquot_t *dqp) 60 xfs_dquot_t *dqp)
61{ 61{
62 xfs_dq_logitem_t *lp; 62 xfs_dq_logitem_t *lp = &dqp->q_logitem;
63 63
64 ASSERT(! XFS_DQ_IS_ADDEDTO_TRX(tp, dqp)); 64 ASSERT(dqp->q_transp != tp);
65 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 65 ASSERT(XFS_DQ_IS_LOCKED(dqp));
66 ASSERT(XFS_DQ_IS_LOGITEM_INITD(dqp)); 66 ASSERT(lp->qli_dquot == dqp);
67 lp = &dqp->q_logitem;
68 67
69 /* 68 /*
70 * Get a log_item_desc to point at the new item. 69 * Get a log_item_desc to point at the new item.
@@ -96,7 +95,7 @@ xfs_trans_log_dquot(
96{ 95{
97 xfs_log_item_desc_t *lidp; 96 xfs_log_item_desc_t *lidp;
98 97
99 ASSERT(XFS_DQ_IS_ADDEDTO_TRX(tp, dqp)); 98 ASSERT(dqp->q_transp == tp);
100 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 99 ASSERT(XFS_DQ_IS_LOCKED(dqp));
101 100
102 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(&dqp->q_logitem)); 101 lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(&dqp->q_logitem));
@@ -198,16 +197,16 @@ xfs_trans_get_dqtrx(
198 int i; 197 int i;
199 xfs_dqtrx_t *qa; 198 xfs_dqtrx_t *qa;
200 199
201 for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { 200 qa = XFS_QM_ISUDQ(dqp) ?
202 qa = XFS_QM_DQP_TO_DQACCT(tp, dqp); 201 tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots;
203 202
203 for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
204 if (qa[i].qt_dquot == NULL || 204 if (qa[i].qt_dquot == NULL ||
205 qa[i].qt_dquot == dqp) { 205 qa[i].qt_dquot == dqp)
206 return (&qa[i]); 206 return &qa[i];
207 }
208 } 207 }
209 208
210 return (NULL); 209 return NULL;
211} 210}
212 211
213/* 212/*
@@ -381,7 +380,7 @@ xfs_trans_apply_dquot_deltas(
381 break; 380 break;
382 381
383 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 382 ASSERT(XFS_DQ_IS_LOCKED(dqp));
384 ASSERT(XFS_DQ_IS_ADDEDTO_TRX(tp, dqp)); 383 ASSERT(dqp->q_transp == tp);
385 384
386 /* 385 /*
387 * adjust the actual number of blocks used 386 * adjust the actual number of blocks used
@@ -639,7 +638,7 @@ xfs_trans_dqresv(
639 softlimit = q->qi_bsoftlimit; 638 softlimit = q->qi_bsoftlimit;
640 timer = be32_to_cpu(dqp->q_core.d_btimer); 639 timer = be32_to_cpu(dqp->q_core.d_btimer);
641 warns = be16_to_cpu(dqp->q_core.d_bwarns); 640 warns = be16_to_cpu(dqp->q_core.d_bwarns);
642 warnlimit = XFS_QI_BWARNLIMIT(dqp->q_mount); 641 warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
643 resbcountp = &dqp->q_res_bcount; 642 resbcountp = &dqp->q_res_bcount;
644 } else { 643 } else {
645 ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS); 644 ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
@@ -651,7 +650,7 @@ xfs_trans_dqresv(
651 softlimit = q->qi_rtbsoftlimit; 650 softlimit = q->qi_rtbsoftlimit;
652 timer = be32_to_cpu(dqp->q_core.d_rtbtimer); 651 timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
653 warns = be16_to_cpu(dqp->q_core.d_rtbwarns); 652 warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
654 warnlimit = XFS_QI_RTBWARNLIMIT(dqp->q_mount); 653 warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
655 resbcountp = &dqp->q_res_rtbcount; 654 resbcountp = &dqp->q_res_rtbcount;
656 } 655 }
657 656
@@ -691,7 +690,7 @@ xfs_trans_dqresv(
691 count = be64_to_cpu(dqp->q_core.d_icount); 690 count = be64_to_cpu(dqp->q_core.d_icount);
692 timer = be32_to_cpu(dqp->q_core.d_itimer); 691 timer = be32_to_cpu(dqp->q_core.d_itimer);
693 warns = be16_to_cpu(dqp->q_core.d_iwarns); 692 warns = be16_to_cpu(dqp->q_core.d_iwarns);
694 warnlimit = XFS_QI_IWARNLIMIT(dqp->q_mount); 693 warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
695 hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit); 694 hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
696 if (!hardlimit) 695 if (!hardlimit)
697 hardlimit = q->qi_ihardlimit; 696 hardlimit = q->qi_ihardlimit;
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index b1a5a1ff88ea..abb8222b88c9 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -223,6 +223,7 @@ typedef struct xfs_perag {
223 int pag_ici_init; /* incore inode cache initialised */ 223 int pag_ici_init; /* incore inode cache initialised */
224 rwlock_t pag_ici_lock; /* incore inode lock */ 224 rwlock_t pag_ici_lock; /* incore inode lock */
225 struct radix_tree_root pag_ici_root; /* incore inode cache root */ 225 struct radix_tree_root pag_ici_root; /* incore inode cache root */
226 int pag_ici_reclaimable; /* reclaimable inodes */
226#endif 227#endif
227 int pagb_count; /* pagb slots in use */ 228 int pagb_count; /* pagb slots in use */
228 xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */ 229 xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 5c11e4d17010..99587ded043f 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -3829,7 +3829,7 @@ xfs_bmap_add_attrfork(
3829 } 3829 }
3830 if ((error = xfs_bmap_finish(&tp, &flist, &committed))) 3830 if ((error = xfs_bmap_finish(&tp, &flist, &committed)))
3831 goto error2; 3831 goto error2;
3832 error = xfs_trans_commit(tp, XFS_TRANS_PERM_LOG_RES); 3832 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
3833 ASSERT(ip->i_df.if_ext_max == 3833 ASSERT(ip->i_df.if_ext_max ==
3834 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t)); 3834 XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
3835 return error; 3835 return error;
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index f3c49e69eab9..240340a4727b 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -372,12 +372,12 @@ xfs_buf_item_pin(
372 */ 372 */
373STATIC void 373STATIC void
374xfs_buf_item_unpin( 374xfs_buf_item_unpin(
375 xfs_buf_log_item_t *bip, 375 xfs_buf_log_item_t *bip)
376 int stale)
377{ 376{
378 struct xfs_ail *ailp; 377 struct xfs_ail *ailp;
379 xfs_buf_t *bp; 378 xfs_buf_t *bp;
380 int freed; 379 int freed;
380 int stale = bip->bli_flags & XFS_BLI_STALE;
381 381
382 bp = bip->bli_buf; 382 bp = bip->bli_buf;
383 ASSERT(bp != NULL); 383 ASSERT(bp != NULL);
@@ -428,40 +428,34 @@ xfs_buf_item_unpin_remove(
428 xfs_buf_log_item_t *bip, 428 xfs_buf_log_item_t *bip,
429 xfs_trans_t *tp) 429 xfs_trans_t *tp)
430{ 430{
431 xfs_buf_t *bp; 431 /* will xfs_buf_item_unpin() call xfs_buf_item_relse()? */
432 xfs_log_item_desc_t *lidp;
433 int stale = 0;
434
435 bp = bip->bli_buf;
436 /*
437 * will xfs_buf_item_unpin() call xfs_buf_item_relse()?
438 */
439 if ((atomic_read(&bip->bli_refcount) == 1) && 432 if ((atomic_read(&bip->bli_refcount) == 1) &&
440 (bip->bli_flags & XFS_BLI_STALE)) { 433 (bip->bli_flags & XFS_BLI_STALE)) {
434 /*
435 * yes -- We can safely do some work here and then call
436 * buf_item_unpin to do the rest because we are
437 * are holding the buffer locked so no one else will be
438 * able to bump up the refcount. We have to remove the
439 * log item from the transaction as we are about to release
440 * our reference to the buffer. If we don't, the unlock that
441 * occurs later in the xfs_trans_uncommit() will try to
442 * reference the buffer which we no longer have a hold on.
443 */
444 struct xfs_log_item_desc *lidp;
445
441 ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0); 446 ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0);
442 trace_xfs_buf_item_unpin_stale(bip); 447 trace_xfs_buf_item_unpin_stale(bip);
443 448
444 /* 449 lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)bip);
445 * yes -- clear the xaction descriptor in-use flag
446 * and free the chunk if required. We can safely
447 * do some work here and then call buf_item_unpin
448 * to do the rest because if the if is true, then
449 * we are holding the buffer locked so no one else
450 * will be able to bump up the refcount.
451 */
452 lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) bip);
453 stale = lidp->lid_flags & XFS_LID_BUF_STALE;
454 xfs_trans_free_item(tp, lidp); 450 xfs_trans_free_item(tp, lidp);
451
455 /* 452 /*
456 * Since the transaction no longer refers to the buffer, 453 * Since the transaction no longer refers to the buffer, the
457 * the buffer should no longer refer to the transaction. 454 * buffer should no longer refer to the transaction.
458 */ 455 */
459 XFS_BUF_SET_FSPRIVATE2(bp, NULL); 456 XFS_BUF_SET_FSPRIVATE2(bip->bli_buf, NULL);
460 } 457 }
461 458 xfs_buf_item_unpin(bip);
462 xfs_buf_item_unpin(bip, stale);
463
464 return;
465} 459}
466 460
467/* 461/*
@@ -675,7 +669,7 @@ static struct xfs_item_ops xfs_buf_item_ops = {
675 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 669 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
676 xfs_buf_item_format, 670 xfs_buf_item_format,
677 .iop_pin = (void(*)(xfs_log_item_t*))xfs_buf_item_pin, 671 .iop_pin = (void(*)(xfs_log_item_t*))xfs_buf_item_pin,
678 .iop_unpin = (void(*)(xfs_log_item_t*, int))xfs_buf_item_unpin, 672 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_buf_item_unpin,
679 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *)) 673 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *))
680 xfs_buf_item_unpin_remove, 674 xfs_buf_item_unpin_remove,
681 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_buf_item_trylock, 675 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_buf_item_trylock,
@@ -733,10 +727,7 @@ xfs_buf_item_init(
733 727
734 bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone, 728 bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone,
735 KM_SLEEP); 729 KM_SLEEP);
736 bip->bli_item.li_type = XFS_LI_BUF; 730 xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
737 bip->bli_item.li_ops = &xfs_buf_item_ops;
738 bip->bli_item.li_mountp = mp;
739 bip->bli_item.li_ailp = mp->m_ail;
740 bip->bli_buf = bp; 731 bip->bli_buf = bp;
741 xfs_buf_hold(bp); 732 xfs_buf_hold(bp);
742 bip->bli_format.blf_type = XFS_LI_BUF; 733 bip->bli_format.blf_type = XFS_LI_BUF;
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 217f34af00cb..df4454511f73 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -26,7 +26,7 @@ extern kmem_zone_t *xfs_buf_item_zone;
26 * have been logged. 26 * have been logged.
27 * For 6.2 and beyond, this is XFS_LI_BUF. We use this to log everything. 27 * For 6.2 and beyond, this is XFS_LI_BUF. We use this to log everything.
28 */ 28 */
29typedef struct xfs_buf_log_format_t { 29typedef struct xfs_buf_log_format {
30 unsigned short blf_type; /* buf log item type indicator */ 30 unsigned short blf_type; /* buf log item type indicator */
31 unsigned short blf_size; /* size of this item */ 31 unsigned short blf_size; /* size of this item */
32 ushort blf_flags; /* misc state */ 32 ushort blf_flags; /* misc state */
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 92d5cd5bf4f2..ef96175c0744 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -186,18 +186,18 @@ xfs_cmn_err(int panic_tag, int level, xfs_mount_t *mp, char *fmt, ...)
186 186
187void 187void
188xfs_error_report( 188xfs_error_report(
189 char *tag, 189 const char *tag,
190 int level, 190 int level,
191 xfs_mount_t *mp, 191 struct xfs_mount *mp,
192 char *fname, 192 const char *filename,
193 int linenum, 193 int linenum,
194 inst_t *ra) 194 inst_t *ra)
195{ 195{
196 if (level <= xfs_error_level) { 196 if (level <= xfs_error_level) {
197 xfs_cmn_err(XFS_PTAG_ERROR_REPORT, 197 xfs_cmn_err(XFS_PTAG_ERROR_REPORT,
198 CE_ALERT, mp, 198 CE_ALERT, mp,
199 "XFS internal error %s at line %d of file %s. Caller 0x%p\n", 199 "XFS internal error %s at line %d of file %s. Caller 0x%p\n",
200 tag, linenum, fname, ra); 200 tag, linenum, filename, ra);
201 201
202 xfs_stack_trace(); 202 xfs_stack_trace();
203 } 203 }
@@ -205,15 +205,15 @@ xfs_error_report(
205 205
206void 206void
207xfs_corruption_error( 207xfs_corruption_error(
208 char *tag, 208 const char *tag,
209 int level, 209 int level,
210 xfs_mount_t *mp, 210 struct xfs_mount *mp,
211 void *p, 211 void *p,
212 char *fname, 212 const char *filename,
213 int linenum, 213 int linenum,
214 inst_t *ra) 214 inst_t *ra)
215{ 215{
216 if (level <= xfs_error_level) 216 if (level <= xfs_error_level)
217 xfs_hex_dump(p, 16); 217 xfs_hex_dump(p, 16);
218 xfs_error_report(tag, level, mp, fname, linenum, ra); 218 xfs_error_report(tag, level, mp, filename, linenum, ra);
219} 219}
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 0c93051c4651..c2c1a072bb82 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -29,10 +29,11 @@ extern int xfs_error_trap(int);
29 29
30struct xfs_mount; 30struct xfs_mount;
31 31
32extern void xfs_error_report(char *tag, int level, struct xfs_mount *mp, 32extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
33 char *fname, int linenum, inst_t *ra); 33 const char *filename, int linenum, inst_t *ra);
34extern void xfs_corruption_error(char *tag, int level, struct xfs_mount *mp, 34extern void xfs_corruption_error(const char *tag, int level,
35 void *p, char *fname, int linenum, inst_t *ra); 35 struct xfs_mount *mp, void *p, const char *filename,
36 int linenum, inst_t *ra);
36 37
37#define XFS_ERROR_REPORT(e, lvl, mp) \ 38#define XFS_ERROR_REPORT(e, lvl, mp) \
38 xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) 39 xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address)
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 6f35ed1b39b9..409fe81585fd 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -106,7 +106,7 @@ xfs_efi_item_pin(xfs_efi_log_item_t *efip)
106 */ 106 */
107/*ARGSUSED*/ 107/*ARGSUSED*/
108STATIC void 108STATIC void
109xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) 109xfs_efi_item_unpin(xfs_efi_log_item_t *efip)
110{ 110{
111 struct xfs_ail *ailp = efip->efi_item.li_ailp; 111 struct xfs_ail *ailp = efip->efi_item.li_ailp;
112 112
@@ -224,7 +224,7 @@ static struct xfs_item_ops xfs_efi_item_ops = {
224 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 224 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
225 xfs_efi_item_format, 225 xfs_efi_item_format,
226 .iop_pin = (void(*)(xfs_log_item_t*))xfs_efi_item_pin, 226 .iop_pin = (void(*)(xfs_log_item_t*))xfs_efi_item_pin,
227 .iop_unpin = (void(*)(xfs_log_item_t*, int))xfs_efi_item_unpin, 227 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_efi_item_unpin,
228 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *)) 228 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *))
229 xfs_efi_item_unpin_remove, 229 xfs_efi_item_unpin_remove,
230 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_efi_item_trylock, 230 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_efi_item_trylock,
@@ -259,10 +259,7 @@ xfs_efi_init(xfs_mount_t *mp,
259 KM_SLEEP); 259 KM_SLEEP);
260 } 260 }
261 261
262 efip->efi_item.li_type = XFS_LI_EFI; 262 xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
263 efip->efi_item.li_ops = &xfs_efi_item_ops;
264 efip->efi_item.li_mountp = mp;
265 efip->efi_item.li_ailp = mp->m_ail;
266 efip->efi_format.efi_nextents = nextents; 263 efip->efi_format.efi_nextents = nextents;
267 efip->efi_format.efi_id = (__psint_t)(void*)efip; 264 efip->efi_format.efi_id = (__psint_t)(void*)efip;
268 265
@@ -428,7 +425,7 @@ xfs_efd_item_pin(xfs_efd_log_item_t *efdp)
428 */ 425 */
429/*ARGSUSED*/ 426/*ARGSUSED*/
430STATIC void 427STATIC void
431xfs_efd_item_unpin(xfs_efd_log_item_t *efdp, int stale) 428xfs_efd_item_unpin(xfs_efd_log_item_t *efdp)
432{ 429{
433 return; 430 return;
434} 431}
@@ -518,7 +515,7 @@ static struct xfs_item_ops xfs_efd_item_ops = {
518 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 515 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
519 xfs_efd_item_format, 516 xfs_efd_item_format,
520 .iop_pin = (void(*)(xfs_log_item_t*))xfs_efd_item_pin, 517 .iop_pin = (void(*)(xfs_log_item_t*))xfs_efd_item_pin,
521 .iop_unpin = (void(*)(xfs_log_item_t*, int))xfs_efd_item_unpin, 518 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_efd_item_unpin,
522 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) 519 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
523 xfs_efd_item_unpin_remove, 520 xfs_efd_item_unpin_remove,
524 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_efd_item_trylock, 521 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_efd_item_trylock,
@@ -554,10 +551,7 @@ xfs_efd_init(xfs_mount_t *mp,
554 KM_SLEEP); 551 KM_SLEEP);
555 } 552 }
556 553
557 efdp->efd_item.li_type = XFS_LI_EFD; 554 xfs_log_item_init(mp, &efdp->efd_item, XFS_LI_EFD, &xfs_efd_item_ops);
558 efdp->efd_item.li_ops = &xfs_efd_item_ops;
559 efdp->efd_item.li_mountp = mp;
560 efdp->efd_item.li_ailp = mp->m_ail;
561 efdp->efd_efip = efip; 555 efdp->efd_efip = efip;
562 efdp->efd_format.efd_nextents = nextents; 556 efdp->efd_format.efd_nextents = nextents;
563 efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; 557 efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 0ffd56447045..8cd6e8d8fe9c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2449,6 +2449,8 @@ xfs_iunpin_nowait(
2449{ 2449{
2450 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 2450 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2451 2451
2452 trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
2453
2452 /* Give the log a push to start the unpinning I/O */ 2454 /* Give the log a push to start the unpinning I/O */
2453 xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0); 2455 xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0);
2454 2456
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 7bfea8540159..cf8249a60004 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -543,6 +543,7 @@ xfs_inode_item_pin(
543{ 543{
544 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); 544 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
545 545
546 trace_xfs_inode_pin(iip->ili_inode, _RET_IP_);
546 atomic_inc(&iip->ili_inode->i_pincount); 547 atomic_inc(&iip->ili_inode->i_pincount);
547} 548}
548 549
@@ -556,11 +557,11 @@ xfs_inode_item_pin(
556/* ARGSUSED */ 557/* ARGSUSED */
557STATIC void 558STATIC void
558xfs_inode_item_unpin( 559xfs_inode_item_unpin(
559 xfs_inode_log_item_t *iip, 560 xfs_inode_log_item_t *iip)
560 int stale)
561{ 561{
562 struct xfs_inode *ip = iip->ili_inode; 562 struct xfs_inode *ip = iip->ili_inode;
563 563
564 trace_xfs_inode_unpin(ip, _RET_IP_);
564 ASSERT(atomic_read(&ip->i_pincount) > 0); 565 ASSERT(atomic_read(&ip->i_pincount) > 0);
565 if (atomic_dec_and_test(&ip->i_pincount)) 566 if (atomic_dec_and_test(&ip->i_pincount))
566 wake_up(&ip->i_ipin_wait); 567 wake_up(&ip->i_ipin_wait);
@@ -572,7 +573,7 @@ xfs_inode_item_unpin_remove(
572 xfs_inode_log_item_t *iip, 573 xfs_inode_log_item_t *iip,
573 xfs_trans_t *tp) 574 xfs_trans_t *tp)
574{ 575{
575 xfs_inode_item_unpin(iip, 0); 576 xfs_inode_item_unpin(iip);
576} 577}
577 578
578/* 579/*
@@ -838,7 +839,7 @@ static struct xfs_item_ops xfs_inode_item_ops = {
838 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*)) 839 .iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
839 xfs_inode_item_format, 840 xfs_inode_item_format,
840 .iop_pin = (void(*)(xfs_log_item_t*))xfs_inode_item_pin, 841 .iop_pin = (void(*)(xfs_log_item_t*))xfs_inode_item_pin,
841 .iop_unpin = (void(*)(xfs_log_item_t*, int))xfs_inode_item_unpin, 842 .iop_unpin = (void(*)(xfs_log_item_t*))xfs_inode_item_unpin,
842 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*)) 843 .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
843 xfs_inode_item_unpin_remove, 844 xfs_inode_item_unpin_remove,
844 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_inode_item_trylock, 845 .iop_trylock = (uint(*)(xfs_log_item_t*))xfs_inode_item_trylock,
@@ -865,17 +866,9 @@ xfs_inode_item_init(
865 ASSERT(ip->i_itemp == NULL); 866 ASSERT(ip->i_itemp == NULL);
866 iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP); 867 iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
867 868
868 iip->ili_item.li_type = XFS_LI_INODE;
869 iip->ili_item.li_ops = &xfs_inode_item_ops;
870 iip->ili_item.li_mountp = mp;
871 iip->ili_item.li_ailp = mp->m_ail;
872 iip->ili_inode = ip; 869 iip->ili_inode = ip;
873 870 xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,
874 /* 871 &xfs_inode_item_ops);
875 We have zeroed memory. No need ...
876 iip->ili_extents_buf = NULL;
877 */
878
879 iip->ili_format.ilf_type = XFS_LI_INODE; 872 iip->ili_format.ilf_type = XFS_LI_INODE;
880 iip->ili_format.ilf_ino = ip->i_ino; 873 iip->ili_format.ilf_ino = ip->i_ino;
881 iip->ili_format.ilf_blkno = ip->i_imap.im_blkno; 874 iip->ili_format.ilf_blkno = ip->i_imap.im_blkno;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 0b65039951a0..ef14943829da 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -55,71 +55,33 @@
55#define XFS_STRAT_WRITE_IMAPS 2 55#define XFS_STRAT_WRITE_IMAPS 2
56#define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP 56#define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP
57 57
58STATIC int 58STATIC int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
59xfs_imap_to_bmap( 59 int, struct xfs_bmbt_irec *, int *);
60 xfs_inode_t *ip, 60STATIC int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int,
61 xfs_off_t offset, 61 struct xfs_bmbt_irec *, int *);
62 xfs_bmbt_irec_t *imap, 62STATIC int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t,
63 xfs_iomap_t *iomapp, 63 struct xfs_bmbt_irec *, int *);
64 int imaps, /* Number of imap entries */
65 int iomaps, /* Number of iomap entries */
66 int flags)
67{
68 xfs_mount_t *mp = ip->i_mount;
69 int pbm;
70 xfs_fsblock_t start_block;
71
72
73 for (pbm = 0; imaps && pbm < iomaps; imaps--, iomapp++, imap++, pbm++) {
74 iomapp->iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff);
75 iomapp->iomap_delta = offset - iomapp->iomap_offset;
76 iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount);
77 iomapp->iomap_flags = flags;
78
79 if (XFS_IS_REALTIME_INODE(ip)) {
80 iomapp->iomap_flags |= IOMAP_REALTIME;
81 iomapp->iomap_target = mp->m_rtdev_targp;
82 } else {
83 iomapp->iomap_target = mp->m_ddev_targp;
84 }
85 start_block = imap->br_startblock;
86 if (start_block == HOLESTARTBLOCK) {
87 iomapp->iomap_bn = IOMAP_DADDR_NULL;
88 iomapp->iomap_flags |= IOMAP_HOLE;
89 } else if (start_block == DELAYSTARTBLOCK) {
90 iomapp->iomap_bn = IOMAP_DADDR_NULL;
91 iomapp->iomap_flags |= IOMAP_DELAY;
92 } else {
93 iomapp->iomap_bn = xfs_fsb_to_db(ip, start_block);
94 if (ISUNWRITTEN(imap))
95 iomapp->iomap_flags |= IOMAP_UNWRITTEN;
96 }
97
98 offset += iomapp->iomap_bsize - iomapp->iomap_delta;
99 }
100 return pbm; /* Return the number filled */
101}
102 64
103int 65int
104xfs_iomap( 66xfs_iomap(
105 xfs_inode_t *ip, 67 struct xfs_inode *ip,
106 xfs_off_t offset, 68 xfs_off_t offset,
107 ssize_t count, 69 ssize_t count,
108 int flags, 70 int flags,
109 xfs_iomap_t *iomapp, 71 struct xfs_bmbt_irec *imap,
110 int *niomaps) 72 int *nimaps,
73 int *new)
111{ 74{
112 xfs_mount_t *mp = ip->i_mount; 75 struct xfs_mount *mp = ip->i_mount;
113 xfs_fileoff_t offset_fsb, end_fsb; 76 xfs_fileoff_t offset_fsb, end_fsb;
114 int error = 0; 77 int error = 0;
115 int lockmode = 0; 78 int lockmode = 0;
116 xfs_bmbt_irec_t imap; 79 int bmapi_flags = 0;
117 int nimaps = 1;
118 int bmapi_flags = 0;
119 int iomap_flags = 0;
120 80
121 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); 81 ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
122 82
83 *new = 0;
84
123 if (XFS_FORCED_SHUTDOWN(mp)) 85 if (XFS_FORCED_SHUTDOWN(mp))
124 return XFS_ERROR(EIO); 86 return XFS_ERROR(EIO);
125 87
@@ -160,8 +122,8 @@ xfs_iomap(
160 122
161 error = xfs_bmapi(NULL, ip, offset_fsb, 123 error = xfs_bmapi(NULL, ip, offset_fsb,
162 (xfs_filblks_t)(end_fsb - offset_fsb), 124 (xfs_filblks_t)(end_fsb - offset_fsb),
163 bmapi_flags, NULL, 0, &imap, 125 bmapi_flags, NULL, 0, imap,
164 &nimaps, NULL, NULL); 126 nimaps, NULL, NULL);
165 127
166 if (error) 128 if (error)
167 goto out; 129 goto out;
@@ -169,46 +131,41 @@ xfs_iomap(
169 switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) { 131 switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) {
170 case BMAPI_WRITE: 132 case BMAPI_WRITE:
171 /* If we found an extent, return it */ 133 /* If we found an extent, return it */
172 if (nimaps && 134 if (*nimaps &&
173 (imap.br_startblock != HOLESTARTBLOCK) && 135 (imap->br_startblock != HOLESTARTBLOCK) &&
174 (imap.br_startblock != DELAYSTARTBLOCK)) { 136 (imap->br_startblock != DELAYSTARTBLOCK)) {
175 trace_xfs_iomap_found(ip, offset, count, flags, &imap); 137 trace_xfs_iomap_found(ip, offset, count, flags, imap);
176 break; 138 break;
177 } 139 }
178 140
179 if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) { 141 if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) {
180 error = xfs_iomap_write_direct(ip, offset, count, flags, 142 error = xfs_iomap_write_direct(ip, offset, count, flags,
181 &imap, &nimaps, nimaps); 143 imap, nimaps);
182 } else { 144 } else {
183 error = xfs_iomap_write_delay(ip, offset, count, flags, 145 error = xfs_iomap_write_delay(ip, offset, count, flags,
184 &imap, &nimaps); 146 imap, nimaps);
185 } 147 }
186 if (!error) { 148 if (!error) {
187 trace_xfs_iomap_alloc(ip, offset, count, flags, &imap); 149 trace_xfs_iomap_alloc(ip, offset, count, flags, imap);
188 } 150 }
189 iomap_flags = IOMAP_NEW; 151 *new = 1;
190 break; 152 break;
191 case BMAPI_ALLOCATE: 153 case BMAPI_ALLOCATE:
192 /* If we found an extent, return it */ 154 /* If we found an extent, return it */
193 xfs_iunlock(ip, lockmode); 155 xfs_iunlock(ip, lockmode);
194 lockmode = 0; 156 lockmode = 0;
195 157
196 if (nimaps && !isnullstartblock(imap.br_startblock)) { 158 if (*nimaps && !isnullstartblock(imap->br_startblock)) {
197 trace_xfs_iomap_found(ip, offset, count, flags, &imap); 159 trace_xfs_iomap_found(ip, offset, count, flags, imap);
198 break; 160 break;
199 } 161 }
200 162
201 error = xfs_iomap_write_allocate(ip, offset, count, 163 error = xfs_iomap_write_allocate(ip, offset, count,
202 &imap, &nimaps); 164 imap, nimaps);
203 break; 165 break;
204 } 166 }
205 167
206 if (nimaps) { 168 ASSERT(*nimaps <= 1);
207 *niomaps = xfs_imap_to_bmap(ip, offset, &imap,
208 iomapp, nimaps, *niomaps, iomap_flags);
209 } else if (niomaps) {
210 *niomaps = 0;
211 }
212 169
213out: 170out:
214 if (lockmode) 171 if (lockmode)
@@ -216,7 +173,6 @@ out:
216 return XFS_ERROR(error); 173 return XFS_ERROR(error);
217} 174}
218 175
219
220STATIC int 176STATIC int
221xfs_iomap_eof_align_last_fsb( 177xfs_iomap_eof_align_last_fsb(
222 xfs_mount_t *mp, 178 xfs_mount_t *mp,
@@ -285,15 +241,14 @@ xfs_cmn_err_fsblock_zero(
285 return EFSCORRUPTED; 241 return EFSCORRUPTED;
286} 242}
287 243
288int 244STATIC int
289xfs_iomap_write_direct( 245xfs_iomap_write_direct(
290 xfs_inode_t *ip, 246 xfs_inode_t *ip,
291 xfs_off_t offset, 247 xfs_off_t offset,
292 size_t count, 248 size_t count,
293 int flags, 249 int flags,
294 xfs_bmbt_irec_t *ret_imap, 250 xfs_bmbt_irec_t *ret_imap,
295 int *nmaps, 251 int *nmaps)
296 int found)
297{ 252{
298 xfs_mount_t *mp = ip->i_mount; 253 xfs_mount_t *mp = ip->i_mount;
299 xfs_fileoff_t offset_fsb; 254 xfs_fileoff_t offset_fsb;
@@ -330,7 +285,7 @@ xfs_iomap_write_direct(
330 if (error) 285 if (error)
331 goto error_out; 286 goto error_out;
332 } else { 287 } else {
333 if (found && (ret_imap->br_startblock == HOLESTARTBLOCK)) 288 if (*nmaps && (ret_imap->br_startblock == HOLESTARTBLOCK))
334 last_fsb = MIN(last_fsb, (xfs_fileoff_t) 289 last_fsb = MIN(last_fsb, (xfs_fileoff_t)
335 ret_imap->br_blockcount + 290 ret_imap->br_blockcount +
336 ret_imap->br_startoff); 291 ret_imap->br_startoff);
@@ -485,7 +440,7 @@ xfs_iomap_eof_want_preallocate(
485 return 0; 440 return 0;
486} 441}
487 442
488int 443STATIC int
489xfs_iomap_write_delay( 444xfs_iomap_write_delay(
490 xfs_inode_t *ip, 445 xfs_inode_t *ip,
491 xfs_off_t offset, 446 xfs_off_t offset,
@@ -588,7 +543,7 @@ retry:
588 * We no longer bother to look at the incoming map - all we have to 543 * We no longer bother to look at the incoming map - all we have to
589 * guarantee is that whatever we allocate fills the required range. 544 * guarantee is that whatever we allocate fills the required range.
590 */ 545 */
591int 546STATIC int
592xfs_iomap_write_allocate( 547xfs_iomap_write_allocate(
593 xfs_inode_t *ip, 548 xfs_inode_t *ip,
594 xfs_off_t offset, 549 xfs_off_t offset,
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 174f29990991..81ac4afd45b3 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -18,19 +18,6 @@
18#ifndef __XFS_IOMAP_H__ 18#ifndef __XFS_IOMAP_H__
19#define __XFS_IOMAP_H__ 19#define __XFS_IOMAP_H__
20 20
21#define IOMAP_DADDR_NULL ((xfs_daddr_t) (-1LL))
22
23
24typedef enum { /* iomap_flags values */
25 IOMAP_READ = 0, /* mapping for a read */
26 IOMAP_HOLE = 0x02, /* mapping covers a hole */
27 IOMAP_DELAY = 0x04, /* mapping covers delalloc region */
28 IOMAP_REALTIME = 0x10, /* mapping on the realtime device */
29 IOMAP_UNWRITTEN = 0x20, /* mapping covers allocated */
30 /* but uninitialized file data */
31 IOMAP_NEW = 0x40 /* just allocate */
32} iomap_flags_t;
33
34typedef enum { 21typedef enum {
35 /* base extent manipulation calls */ 22 /* base extent manipulation calls */
36 BMAPI_READ = (1 << 0), /* read extents */ 23 BMAPI_READ = (1 << 0), /* read extents */
@@ -52,43 +39,11 @@ typedef enum {
52 { BMAPI_MMAP, "MMAP" }, \ 39 { BMAPI_MMAP, "MMAP" }, \
53 { BMAPI_TRYLOCK, "TRYLOCK" } 40 { BMAPI_TRYLOCK, "TRYLOCK" }
54 41
55/*
56 * xfs_iomap_t: File system I/O map
57 *
58 * The iomap_bn field is expressed in 512-byte blocks, and is where the
59 * mapping starts on disk.
60 *
61 * The iomap_offset, iomap_bsize and iomap_delta fields are in bytes.
62 * iomap_offset is the offset of the mapping in the file itself.
63 * iomap_bsize is the size of the mapping, iomap_delta is the
64 * desired data's offset into the mapping, given the offset supplied
65 * to the file I/O map routine.
66 *
67 * When a request is made to read beyond the logical end of the object,
68 * iomap_size may be set to 0, but iomap_offset and iomap_length should be set
69 * to the actual amount of underlying storage that has been allocated, if any.
70 */
71
72typedef struct xfs_iomap {
73 xfs_daddr_t iomap_bn; /* first 512B blk of mapping */
74 xfs_buftarg_t *iomap_target;
75 xfs_off_t iomap_offset; /* offset of mapping, bytes */
76 xfs_off_t iomap_bsize; /* size of mapping, bytes */
77 xfs_off_t iomap_delta; /* offset into mapping, bytes */
78 iomap_flags_t iomap_flags;
79} xfs_iomap_t;
80
81struct xfs_inode; 42struct xfs_inode;
82struct xfs_bmbt_irec; 43struct xfs_bmbt_irec;
83 44
84extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int, 45extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int,
85 struct xfs_iomap *, int *); 46 struct xfs_bmbt_irec *, int *, int *);
86extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
87 int, struct xfs_bmbt_irec *, int *, int);
88extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int,
89 struct xfs_bmbt_irec *, int *);
90extern int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t,
91 struct xfs_bmbt_irec *, int *);
92extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); 47extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t);
93 48
94#endif /* __XFS_IOMAP_H__*/ 49#endif /* __XFS_IOMAP_H__*/
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 2be019136287..3038dd52c72a 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -44,13 +44,8 @@
44 44
45kmem_zone_t *xfs_log_ticket_zone; 45kmem_zone_t *xfs_log_ticket_zone;
46 46
47#define xlog_write_adv_cnt(ptr, len, off, bytes) \
48 { (ptr) += (bytes); \
49 (len) -= (bytes); \
50 (off) += (bytes);}
51
52/* Local miscellaneous function prototypes */ 47/* Local miscellaneous function prototypes */
53STATIC int xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket, 48STATIC int xlog_commit_record(struct log *log, struct xlog_ticket *ticket,
54 xlog_in_core_t **, xfs_lsn_t *); 49 xlog_in_core_t **, xfs_lsn_t *);
55STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, 50STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
56 xfs_buftarg_t *log_target, 51 xfs_buftarg_t *log_target,
@@ -59,11 +54,9 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
59STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes); 54STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes);
60STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); 55STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
61STATIC void xlog_dealloc_log(xlog_t *log); 56STATIC void xlog_dealloc_log(xlog_t *log);
62STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[], 57STATIC int xlog_write(struct log *log, struct xfs_log_vec *log_vector,
63 int nentries, struct xlog_ticket *tic, 58 struct xlog_ticket *tic, xfs_lsn_t *start_lsn,
64 xfs_lsn_t *start_lsn, 59 xlog_in_core_t **commit_iclog, uint flags);
65 xlog_in_core_t **commit_iclog,
66 uint flags);
67 60
68/* local state machine functions */ 61/* local state machine functions */
69STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int); 62STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int);
@@ -102,7 +95,7 @@ STATIC xlog_ticket_t *xlog_ticket_alloc(xlog_t *log,
102 uint flags); 95 uint flags);
103 96
104#if defined(DEBUG) 97#if defined(DEBUG)
105STATIC void xlog_verify_dest_ptr(xlog_t *log, __psint_t ptr); 98STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr);
106STATIC void xlog_verify_grant_head(xlog_t *log, int equals); 99STATIC void xlog_verify_grant_head(xlog_t *log, int equals);
107STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, 100STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
108 int count, boolean_t syncing); 101 int count, boolean_t syncing);
@@ -258,7 +251,7 @@ xfs_log_done(
258 * If we get an error, just continue and give back the log ticket. 251 * If we get an error, just continue and give back the log ticket.
259 */ 252 */
260 (((ticket->t_flags & XLOG_TIC_INITED) == 0) && 253 (((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
261 (xlog_commit_record(mp, ticket, iclog, &lsn)))) { 254 (xlog_commit_record(log, ticket, iclog, &lsn)))) {
262 lsn = (xfs_lsn_t) -1; 255 lsn = (xfs_lsn_t) -1;
263 if (ticket->t_flags & XLOG_TIC_PERM_RESERV) { 256 if (ticket->t_flags & XLOG_TIC_PERM_RESERV) {
264 flags |= XFS_LOG_REL_PERM_RESERV; 257 flags |= XFS_LOG_REL_PERM_RESERV;
@@ -516,18 +509,10 @@ xfs_log_unmount_write(xfs_mount_t *mp)
516#ifdef DEBUG 509#ifdef DEBUG
517 xlog_in_core_t *first_iclog; 510 xlog_in_core_t *first_iclog;
518#endif 511#endif
519 xfs_log_iovec_t reg[1];
520 xlog_ticket_t *tic = NULL; 512 xlog_ticket_t *tic = NULL;
521 xfs_lsn_t lsn; 513 xfs_lsn_t lsn;
522 int error; 514 int error;
523 515
524 /* the data section must be 32 bit size aligned */
525 struct {
526 __uint16_t magic;
527 __uint16_t pad1;
528 __uint32_t pad2; /* may as well make it 64 bits */
529 } magic = { XLOG_UNMOUNT_TYPE, 0, 0 };
530
531 /* 516 /*
532 * Don't write out unmount record on read-only mounts. 517 * Don't write out unmount record on read-only mounts.
533 * Or, if we are doing a forced umount (typically because of IO errors). 518 * Or, if we are doing a forced umount (typically because of IO errors).
@@ -549,16 +534,30 @@ xfs_log_unmount_write(xfs_mount_t *mp)
549 } while (iclog != first_iclog); 534 } while (iclog != first_iclog);
550#endif 535#endif
551 if (! (XLOG_FORCED_SHUTDOWN(log))) { 536 if (! (XLOG_FORCED_SHUTDOWN(log))) {
552 reg[0].i_addr = (void*)&magic;
553 reg[0].i_len = sizeof(magic);
554 reg[0].i_type = XLOG_REG_TYPE_UNMOUNT;
555
556 error = xfs_log_reserve(mp, 600, 1, &tic, 537 error = xfs_log_reserve(mp, 600, 1, &tic,
557 XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE); 538 XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE);
558 if (!error) { 539 if (!error) {
540 /* the data section must be 32 bit size aligned */
541 struct {
542 __uint16_t magic;
543 __uint16_t pad1;
544 __uint32_t pad2; /* may as well make it 64 bits */
545 } magic = {
546 .magic = XLOG_UNMOUNT_TYPE,
547 };
548 struct xfs_log_iovec reg = {
549 .i_addr = (void *)&magic,
550 .i_len = sizeof(magic),
551 .i_type = XLOG_REG_TYPE_UNMOUNT,
552 };
553 struct xfs_log_vec vec = {
554 .lv_niovecs = 1,
555 .lv_iovecp = &reg,
556 };
557
559 /* remove inited flag */ 558 /* remove inited flag */
560 ((xlog_ticket_t *)tic)->t_flags = 0; 559 tic->t_flags = 0;
561 error = xlog_write(mp, reg, 1, tic, &lsn, 560 error = xlog_write(log, &vec, tic, &lsn,
562 NULL, XLOG_UNMOUNT_TRANS); 561 NULL, XLOG_UNMOUNT_TRANS);
563 /* 562 /*
564 * At this point, we're umounting anyway, 563 * At this point, we're umounting anyway,
@@ -648,10 +647,26 @@ xfs_log_unmount(xfs_mount_t *mp)
648 xlog_dealloc_log(mp->m_log); 647 xlog_dealloc_log(mp->m_log);
649} 648}
650 649
650void
651xfs_log_item_init(
652 struct xfs_mount *mp,
653 struct xfs_log_item *item,
654 int type,
655 struct xfs_item_ops *ops)
656{
657 item->li_mountp = mp;
658 item->li_ailp = mp->m_ail;
659 item->li_type = type;
660 item->li_ops = ops;
661}
662
651/* 663/*
652 * Write region vectors to log. The write happens using the space reservation 664 * Write region vectors to log. The write happens using the space reservation
653 * of the ticket (tic). It is not a requirement that all writes for a given 665 * of the ticket (tic). It is not a requirement that all writes for a given
654 * transaction occur with one call to xfs_log_write(). 666 * transaction occur with one call to xfs_log_write(). However, it is important
667 * to note that the transaction reservation code makes an assumption about the
668 * number of log headers a transaction requires that may be violated if you
669 * don't pass all the transaction vectors in one call....
655 */ 670 */
656int 671int
657xfs_log_write( 672xfs_log_write(
@@ -663,11 +678,15 @@ xfs_log_write(
663{ 678{
664 struct log *log = mp->m_log; 679 struct log *log = mp->m_log;
665 int error; 680 int error;
681 struct xfs_log_vec vec = {
682 .lv_niovecs = nentries,
683 .lv_iovecp = reg,
684 };
666 685
667 if (XLOG_FORCED_SHUTDOWN(log)) 686 if (XLOG_FORCED_SHUTDOWN(log))
668 return XFS_ERROR(EIO); 687 return XFS_ERROR(EIO);
669 688
670 error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0); 689 error = xlog_write(log, &vec, tic, start_lsn, NULL, 0);
671 if (error) 690 if (error)
672 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 691 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
673 return error; 692 return error;
@@ -1020,6 +1039,7 @@ xlog_alloc_log(xfs_mount_t *mp,
1020 int i; 1039 int i;
1021 int iclogsize; 1040 int iclogsize;
1022 int error = ENOMEM; 1041 int error = ENOMEM;
1042 uint log2_size = 0;
1023 1043
1024 log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); 1044 log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);
1025 if (!log) { 1045 if (!log) {
@@ -1045,29 +1065,30 @@ xlog_alloc_log(xfs_mount_t *mp,
1045 1065
1046 error = EFSCORRUPTED; 1066 error = EFSCORRUPTED;
1047 if (xfs_sb_version_hassector(&mp->m_sb)) { 1067 if (xfs_sb_version_hassector(&mp->m_sb)) {
1048 log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT; 1068 log2_size = mp->m_sb.sb_logsectlog;
1049 if (log->l_sectbb_log < 0 || 1069 if (log2_size < BBSHIFT) {
1050 log->l_sectbb_log > mp->m_sectbb_log) { 1070 xlog_warn("XFS: Log sector size too small "
1051 xlog_warn("XFS: Log sector size (0x%x) out of range.", 1071 "(0x%x < 0x%x)", log2_size, BBSHIFT);
1052 log->l_sectbb_log);
1053 goto out_free_log; 1072 goto out_free_log;
1054 } 1073 }
1055 1074
1056 /* for larger sector sizes, must have v2 or external log */ 1075 log2_size -= BBSHIFT;
1057 if (log->l_sectbb_log != 0 && 1076 if (log2_size > mp->m_sectbb_log) {
1058 (log->l_logBBstart != 0 && 1077 xlog_warn("XFS: Log sector size too large "
1059 !xfs_sb_version_haslogv2(&mp->m_sb))) { 1078 "(0x%x > 0x%x)", log2_size, mp->m_sectbb_log);
1060 xlog_warn("XFS: log sector size (0x%x) invalid "
1061 "for configuration.", log->l_sectbb_log);
1062 goto out_free_log; 1079 goto out_free_log;
1063 } 1080 }
1064 if (mp->m_sb.sb_logsectlog < BBSHIFT) { 1081
1065 xlog_warn("XFS: Log sector log (0x%x) too small.", 1082 /* for larger sector sizes, must have v2 or external log */
1066 mp->m_sb.sb_logsectlog); 1083 if (log2_size && log->l_logBBstart > 0 &&
1084 !xfs_sb_version_haslogv2(&mp->m_sb)) {
1085
1086 xlog_warn("XFS: log sector size (0x%x) invalid "
1087 "for configuration.", log2_size);
1067 goto out_free_log; 1088 goto out_free_log;
1068 } 1089 }
1069 } 1090 }
1070 log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1; 1091 log->l_sectBBsize = 1 << log2_size;
1071 1092
1072 xlog_get_iclog_buffer_size(mp, log); 1093 xlog_get_iclog_buffer_size(mp, log);
1073 1094
@@ -1174,26 +1195,31 @@ out:
1174 * ticket. Return the lsn of the commit record. 1195 * ticket. Return the lsn of the commit record.
1175 */ 1196 */
1176STATIC int 1197STATIC int
1177xlog_commit_record(xfs_mount_t *mp, 1198xlog_commit_record(
1178 xlog_ticket_t *ticket, 1199 struct log *log,
1179 xlog_in_core_t **iclog, 1200 struct xlog_ticket *ticket,
1180 xfs_lsn_t *commitlsnp) 1201 struct xlog_in_core **iclog,
1202 xfs_lsn_t *commitlsnp)
1181{ 1203{
1182 int error; 1204 struct xfs_mount *mp = log->l_mp;
1183 xfs_log_iovec_t reg[1]; 1205 int error;
1184 1206 struct xfs_log_iovec reg = {
1185 reg[0].i_addr = NULL; 1207 .i_addr = NULL,
1186 reg[0].i_len = 0; 1208 .i_len = 0,
1187 reg[0].i_type = XLOG_REG_TYPE_COMMIT; 1209 .i_type = XLOG_REG_TYPE_COMMIT,
1210 };
1211 struct xfs_log_vec vec = {
1212 .lv_niovecs = 1,
1213 .lv_iovecp = &reg,
1214 };
1188 1215
1189 ASSERT_ALWAYS(iclog); 1216 ASSERT_ALWAYS(iclog);
1190 if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp, 1217 error = xlog_write(log, &vec, ticket, commitlsnp, iclog,
1191 iclog, XLOG_COMMIT_TRANS))) { 1218 XLOG_COMMIT_TRANS);
1219 if (error)
1192 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 1220 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
1193 }
1194 return error; 1221 return error;
1195} /* xlog_commit_record */ 1222}
1196
1197 1223
1198/* 1224/*
1199 * Push on the buffer cache code if we ever use more than 75% of the on-disk 1225 * Push on the buffer cache code if we ever use more than 75% of the on-disk
@@ -1614,6 +1640,192 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
1614} 1640}
1615 1641
1616/* 1642/*
1643 * Calculate the potential space needed by the log vector. Each region gets
1644 * its own xlog_op_header_t and may need to be double word aligned.
1645 */
1646static int
1647xlog_write_calc_vec_length(
1648 struct xlog_ticket *ticket,
1649 struct xfs_log_vec *log_vector)
1650{
1651 struct xfs_log_vec *lv;
1652 int headers = 0;
1653 int len = 0;
1654 int i;
1655
1656 /* acct for start rec of xact */
1657 if (ticket->t_flags & XLOG_TIC_INITED)
1658 headers++;
1659
1660 for (lv = log_vector; lv; lv = lv->lv_next) {
1661 headers += lv->lv_niovecs;
1662
1663 for (i = 0; i < lv->lv_niovecs; i++) {
1664 struct xfs_log_iovec *vecp = &lv->lv_iovecp[i];
1665
1666 len += vecp->i_len;
1667 xlog_tic_add_region(ticket, vecp->i_len, vecp->i_type);
1668 }
1669 }
1670
1671 ticket->t_res_num_ophdrs += headers;
1672 len += headers * sizeof(struct xlog_op_header);
1673
1674 return len;
1675}
1676
1677/*
1678 * If first write for transaction, insert start record We can't be trying to
1679 * commit if we are inited. We can't have any "partial_copy" if we are inited.
1680 */
1681static int
1682xlog_write_start_rec(
1683 struct xlog_op_header *ophdr,
1684 struct xlog_ticket *ticket)
1685{
1686 if (!(ticket->t_flags & XLOG_TIC_INITED))
1687 return 0;
1688
1689 ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
1690 ophdr->oh_clientid = ticket->t_clientid;
1691 ophdr->oh_len = 0;
1692 ophdr->oh_flags = XLOG_START_TRANS;
1693 ophdr->oh_res2 = 0;
1694
1695 ticket->t_flags &= ~XLOG_TIC_INITED;
1696
1697 return sizeof(struct xlog_op_header);
1698}
1699
1700static xlog_op_header_t *
1701xlog_write_setup_ophdr(
1702 struct log *log,
1703 struct xlog_op_header *ophdr,
1704 struct xlog_ticket *ticket,
1705 uint flags)
1706{
1707 ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
1708 ophdr->oh_clientid = ticket->t_clientid;
1709 ophdr->oh_res2 = 0;
1710
1711 /* are we copying a commit or unmount record? */
1712 ophdr->oh_flags = flags;
1713
1714 /*
1715 * We've seen logs corrupted with bad transaction client ids. This
1716 * makes sure that XFS doesn't generate them on. Turn this into an EIO
1717 * and shut down the filesystem.
1718 */
1719 switch (ophdr->oh_clientid) {
1720 case XFS_TRANSACTION:
1721 case XFS_VOLUME:
1722 case XFS_LOG:
1723 break;
1724 default:
1725 xfs_fs_cmn_err(CE_WARN, log->l_mp,
1726 "Bad XFS transaction clientid 0x%x in ticket 0x%p",
1727 ophdr->oh_clientid, ticket);
1728 return NULL;
1729 }
1730
1731 return ophdr;
1732}
1733
1734/*
1735 * Set up the parameters of the region copy into the log. This has
1736 * to handle region write split across multiple log buffers - this
1737 * state is kept external to this function so that this code can
1738 * can be written in an obvious, self documenting manner.
1739 */
1740static int
1741xlog_write_setup_copy(
1742 struct xlog_ticket *ticket,
1743 struct xlog_op_header *ophdr,
1744 int space_available,
1745 int space_required,
1746 int *copy_off,
1747 int *copy_len,
1748 int *last_was_partial_copy,
1749 int *bytes_consumed)
1750{
1751 int still_to_copy;
1752
1753 still_to_copy = space_required - *bytes_consumed;
1754 *copy_off = *bytes_consumed;
1755
1756 if (still_to_copy <= space_available) {
1757 /* write of region completes here */
1758 *copy_len = still_to_copy;
1759 ophdr->oh_len = cpu_to_be32(*copy_len);
1760 if (*last_was_partial_copy)
1761 ophdr->oh_flags |= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS);
1762 *last_was_partial_copy = 0;
1763 *bytes_consumed = 0;
1764 return 0;
1765 }
1766
1767 /* partial write of region, needs extra log op header reservation */
1768 *copy_len = space_available;
1769 ophdr->oh_len = cpu_to_be32(*copy_len);
1770 ophdr->oh_flags |= XLOG_CONTINUE_TRANS;
1771 if (*last_was_partial_copy)
1772 ophdr->oh_flags |= XLOG_WAS_CONT_TRANS;
1773 *bytes_consumed += *copy_len;
1774 (*last_was_partial_copy)++;
1775
1776 /* account for new log op header */
1777 ticket->t_curr_res -= sizeof(struct xlog_op_header);
1778 ticket->t_res_num_ophdrs++;
1779
1780 return sizeof(struct xlog_op_header);
1781}
1782
1783static int
1784xlog_write_copy_finish(
1785 struct log *log,
1786 struct xlog_in_core *iclog,
1787 uint flags,
1788 int *record_cnt,
1789 int *data_cnt,
1790 int *partial_copy,
1791 int *partial_copy_len,
1792 int log_offset,
1793 struct xlog_in_core **commit_iclog)
1794{
1795 if (*partial_copy) {
1796 /*
1797 * This iclog has already been marked WANT_SYNC by
1798 * xlog_state_get_iclog_space.
1799 */
1800 xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
1801 *record_cnt = 0;
1802 *data_cnt = 0;
1803 return xlog_state_release_iclog(log, iclog);
1804 }
1805
1806 *partial_copy = 0;
1807 *partial_copy_len = 0;
1808
1809 if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
1810 /* no more space in this iclog - push it. */
1811 xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
1812 *record_cnt = 0;
1813 *data_cnt = 0;
1814
1815 spin_lock(&log->l_icloglock);
1816 xlog_state_want_sync(log, iclog);
1817 spin_unlock(&log->l_icloglock);
1818
1819 if (!commit_iclog)
1820 return xlog_state_release_iclog(log, iclog);
1821 ASSERT(flags & XLOG_COMMIT_TRANS);
1822 *commit_iclog = iclog;
1823 }
1824
1825 return 0;
1826}
1827
1828/*
1617 * Write some region out to in-core log 1829 * Write some region out to in-core log
1618 * 1830 *
1619 * This will be called when writing externally provided regions or when 1831 * This will be called when writing externally provided regions or when
@@ -1655,209 +1867,157 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
1655 */ 1867 */
1656STATIC int 1868STATIC int
1657xlog_write( 1869xlog_write(
1658 struct xfs_mount *mp, 1870 struct log *log,
1659 struct xfs_log_iovec reg[], 1871 struct xfs_log_vec *log_vector,
1660 int nentries,
1661 struct xlog_ticket *ticket, 1872 struct xlog_ticket *ticket,
1662 xfs_lsn_t *start_lsn, 1873 xfs_lsn_t *start_lsn,
1663 struct xlog_in_core **commit_iclog, 1874 struct xlog_in_core **commit_iclog,
1664 uint flags) 1875 uint flags)
1665{ 1876{
1666 xlog_t *log = mp->m_log; 1877 struct xlog_in_core *iclog = NULL;
1667 xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */ 1878 struct xfs_log_iovec *vecp;
1668 xlog_op_header_t *logop_head; /* ptr to log operation header */ 1879 struct xfs_log_vec *lv;
1669 __psint_t ptr; /* copy address into data region */ 1880 int len;
1670 int len; /* # xlog_write() bytes 2 still copy */ 1881 int index;
1671 int index; /* region index currently copying */ 1882 int partial_copy = 0;
1672 int log_offset; /* offset (from 0) into data region */ 1883 int partial_copy_len = 0;
1673 int start_rec_copy; /* # bytes to copy for start record */ 1884 int contwr = 0;
1674 int partial_copy; /* did we split a region? */ 1885 int record_cnt = 0;
1675 int partial_copy_len;/* # bytes copied if split region */ 1886 int data_cnt = 0;
1676 int need_copy; /* # bytes need to memcpy this region */ 1887 int error;
1677 int copy_len; /* # bytes actually memcpy'ing */
1678 int copy_off; /* # bytes from entry start */
1679 int contwr; /* continued write of in-core log? */
1680 int error;
1681 int record_cnt = 0, data_cnt = 0;
1682
1683 partial_copy_len = partial_copy = 0;
1684
1685 /* Calculate potential maximum space. Each region gets its own
1686 * xlog_op_header_t and may need to be double word aligned.
1687 */
1688 len = 0;
1689 if (ticket->t_flags & XLOG_TIC_INITED) { /* acct for start rec of xact */
1690 len += sizeof(xlog_op_header_t);
1691 ticket->t_res_num_ophdrs++;
1692 }
1693 1888
1694 for (index = 0; index < nentries; index++) { 1889 *start_lsn = 0;
1695 len += sizeof(xlog_op_header_t); /* each region gets >= 1 */
1696 ticket->t_res_num_ophdrs++;
1697 len += reg[index].i_len;
1698 xlog_tic_add_region(ticket, reg[index].i_len, reg[index].i_type);
1699 }
1700 contwr = *start_lsn = 0;
1701 1890
1702 if (ticket->t_curr_res < len) { 1891 len = xlog_write_calc_vec_length(ticket, log_vector);
1703 xlog_print_tic_res(mp, ticket); 1892 if (ticket->t_curr_res < len) {
1893 xlog_print_tic_res(log->l_mp, ticket);
1704#ifdef DEBUG 1894#ifdef DEBUG
1705 xlog_panic( 1895 xlog_panic(
1706 "xfs_log_write: reservation ran out. Need to up reservation"); 1896 "xfs_log_write: reservation ran out. Need to up reservation");
1707#else 1897#else
1708 /* Customer configurable panic */ 1898 /* Customer configurable panic */
1709 xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp, 1899 xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, log->l_mp,
1710 "xfs_log_write: reservation ran out. Need to up reservation"); 1900 "xfs_log_write: reservation ran out. Need to up reservation");
1711 /* If we did not panic, shutdown the filesystem */ 1901
1712 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1902 /* If we did not panic, shutdown the filesystem */
1903 xfs_force_shutdown(log->l_mp, SHUTDOWN_CORRUPT_INCORE);
1713#endif 1904#endif
1714 } else 1905 }
1906
1715 ticket->t_curr_res -= len; 1907 ticket->t_curr_res -= len;
1716 1908
1717 for (index = 0; index < nentries; ) { 1909 index = 0;
1718 if ((error = xlog_state_get_iclog_space(log, len, &iclog, ticket, 1910 lv = log_vector;
1719 &contwr, &log_offset))) 1911 vecp = lv->lv_iovecp;
1720 return error; 1912 while (lv && index < lv->lv_niovecs) {
1913 void *ptr;
1914 int log_offset;
1721 1915
1722 ASSERT(log_offset <= iclog->ic_size - 1); 1916 error = xlog_state_get_iclog_space(log, len, &iclog, ticket,
1723 ptr = (__psint_t) ((char *)iclog->ic_datap+log_offset); 1917 &contwr, &log_offset);
1918 if (error)
1919 return error;
1724 1920
1725 /* start_lsn is the first lsn written to. That's all we need. */ 1921 ASSERT(log_offset <= iclog->ic_size - 1);
1726 if (! *start_lsn) 1922 ptr = iclog->ic_datap + log_offset;
1727 *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
1728 1923
1729 /* This loop writes out as many regions as can fit in the amount 1924 /* start_lsn is the first lsn written to. That's all we need. */
1730 * of space which was allocated by xlog_state_get_iclog_space(). 1925 if (!*start_lsn)
1731 */ 1926 *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
1732 while (index < nentries) {
1733 ASSERT(reg[index].i_len % sizeof(__int32_t) == 0);
1734 ASSERT((__psint_t)ptr % sizeof(__int32_t) == 0);
1735 start_rec_copy = 0;
1736
1737 /* If first write for transaction, insert start record.
1738 * We can't be trying to commit if we are inited. We can't
1739 * have any "partial_copy" if we are inited.
1740 */
1741 if (ticket->t_flags & XLOG_TIC_INITED) {
1742 logop_head = (xlog_op_header_t *)ptr;
1743 logop_head->oh_tid = cpu_to_be32(ticket->t_tid);
1744 logop_head->oh_clientid = ticket->t_clientid;
1745 logop_head->oh_len = 0;
1746 logop_head->oh_flags = XLOG_START_TRANS;
1747 logop_head->oh_res2 = 0;
1748 ticket->t_flags &= ~XLOG_TIC_INITED; /* clear bit */
1749 record_cnt++;
1750
1751 start_rec_copy = sizeof(xlog_op_header_t);
1752 xlog_write_adv_cnt(ptr, len, log_offset, start_rec_copy);
1753 }
1754 1927
1755 /* Copy log operation header directly into data section */ 1928 /*
1756 logop_head = (xlog_op_header_t *)ptr; 1929 * This loop writes out as many regions as can fit in the amount
1757 logop_head->oh_tid = cpu_to_be32(ticket->t_tid); 1930 * of space which was allocated by xlog_state_get_iclog_space().
1758 logop_head->oh_clientid = ticket->t_clientid; 1931 */
1759 logop_head->oh_res2 = 0; 1932 while (lv && index < lv->lv_niovecs) {
1933 struct xfs_log_iovec *reg = &vecp[index];
1934 struct xlog_op_header *ophdr;
1935 int start_rec_copy;
1936 int copy_len;
1937 int copy_off;
1938
1939 ASSERT(reg->i_len % sizeof(__int32_t) == 0);
1940 ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0);
1941
1942 start_rec_copy = xlog_write_start_rec(ptr, ticket);
1943 if (start_rec_copy) {
1944 record_cnt++;
1945 xlog_write_adv_cnt(&ptr, &len, &log_offset,
1946 start_rec_copy);
1947 }
1760 1948
1761 /* header copied directly */ 1949 ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags);
1762 xlog_write_adv_cnt(ptr, len, log_offset, sizeof(xlog_op_header_t)); 1950 if (!ophdr)
1951 return XFS_ERROR(EIO);
1763 1952
1764 /* are we copying a commit or unmount record? */ 1953 xlog_write_adv_cnt(&ptr, &len, &log_offset,
1765 logop_head->oh_flags = flags; 1954 sizeof(struct xlog_op_header));
1955
1956 len += xlog_write_setup_copy(ticket, ophdr,
1957 iclog->ic_size-log_offset,
1958 reg->i_len,
1959 &copy_off, &copy_len,
1960 &partial_copy,
1961 &partial_copy_len);
1962 xlog_verify_dest_ptr(log, ptr);
1963
1964 /* copy region */
1965 ASSERT(copy_len >= 0);
1966 memcpy(ptr, reg->i_addr + copy_off, copy_len);
1967 xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len);
1968
1969 copy_len += start_rec_copy + sizeof(xlog_op_header_t);
1970 record_cnt++;
1971 data_cnt += contwr ? copy_len : 0;
1972
1973 error = xlog_write_copy_finish(log, iclog, flags,
1974 &record_cnt, &data_cnt,
1975 &partial_copy,
1976 &partial_copy_len,
1977 log_offset,
1978 commit_iclog);
1979 if (error)
1980 return error;
1766 1981
1767 /* 1982 /*
1768 * We've seen logs corrupted with bad transaction client 1983 * if we had a partial copy, we need to get more iclog
1769 * ids. This makes sure that XFS doesn't generate them on. 1984 * space but we don't want to increment the region
1770 * Turn this into an EIO and shut down the filesystem. 1985 * index because there is still more is this region to
1771 */ 1986 * write.
1772 switch (logop_head->oh_clientid) { 1987 *
1773 case XFS_TRANSACTION: 1988 * If we completed writing this region, and we flushed
1774 case XFS_VOLUME: 1989 * the iclog (indicated by resetting of the record
1775 case XFS_LOG: 1990 * count), then we also need to get more log space. If
1776 break; 1991 * this was the last record, though, we are done and
1777 default: 1992 * can just return.
1778 xfs_fs_cmn_err(CE_WARN, mp, 1993 */
1779 "Bad XFS transaction clientid 0x%x in ticket 0x%p", 1994 if (partial_copy)
1780 logop_head->oh_clientid, ticket); 1995 break;
1781 return XFS_ERROR(EIO);
1782 }
1783 1996
1784 /* Partial write last time? => (partial_copy != 0) 1997 if (++index == lv->lv_niovecs) {
1785 * need_copy is the amount we'd like to copy if everything could 1998 lv = lv->lv_next;
1786 * fit in the current memcpy. 1999 index = 0;
1787 */ 2000 if (lv)
1788 need_copy = reg[index].i_len - partial_copy_len; 2001 vecp = lv->lv_iovecp;
1789 2002 }
1790 copy_off = partial_copy_len; 2003 if (record_cnt == 0) {
1791 if (need_copy <= iclog->ic_size - log_offset) { /*complete write */ 2004 if (!lv)
1792 copy_len = need_copy; 2005 return 0;
1793 logop_head->oh_len = cpu_to_be32(copy_len); 2006 break;
1794 if (partial_copy) 2007 }
1795 logop_head->oh_flags|= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS);
1796 partial_copy_len = partial_copy = 0;
1797 } else { /* partial write */
1798 copy_len = iclog->ic_size - log_offset;
1799 logop_head->oh_len = cpu_to_be32(copy_len);
1800 logop_head->oh_flags |= XLOG_CONTINUE_TRANS;
1801 if (partial_copy)
1802 logop_head->oh_flags |= XLOG_WAS_CONT_TRANS;
1803 partial_copy_len += copy_len;
1804 partial_copy++;
1805 len += sizeof(xlog_op_header_t); /* from splitting of region */
1806 /* account for new log op header */
1807 ticket->t_curr_res -= sizeof(xlog_op_header_t);
1808 ticket->t_res_num_ophdrs++;
1809 }
1810 xlog_verify_dest_ptr(log, ptr);
1811
1812 /* copy region */
1813 ASSERT(copy_len >= 0);
1814 memcpy((xfs_caddr_t)ptr, reg[index].i_addr + copy_off, copy_len);
1815 xlog_write_adv_cnt(ptr, len, log_offset, copy_len);
1816
1817 /* make copy_len total bytes copied, including headers */
1818 copy_len += start_rec_copy + sizeof(xlog_op_header_t);
1819 record_cnt++;
1820 data_cnt += contwr ? copy_len : 0;
1821 if (partial_copy) { /* copied partial region */
1822 /* already marked WANT_SYNC by xlog_state_get_iclog_space */
1823 xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
1824 record_cnt = data_cnt = 0;
1825 if ((error = xlog_state_release_iclog(log, iclog)))
1826 return error;
1827 break; /* don't increment index */
1828 } else { /* copied entire region */
1829 index++;
1830 partial_copy_len = partial_copy = 0;
1831
1832 if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
1833 xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
1834 record_cnt = data_cnt = 0;
1835 spin_lock(&log->l_icloglock);
1836 xlog_state_want_sync(log, iclog);
1837 spin_unlock(&log->l_icloglock);
1838 if (commit_iclog) {
1839 ASSERT(flags & XLOG_COMMIT_TRANS);
1840 *commit_iclog = iclog;
1841 } else if ((error = xlog_state_release_iclog(log, iclog)))
1842 return error;
1843 if (index == nentries)
1844 return 0; /* we are done */
1845 else
1846 break;
1847 } 2008 }
1848 } /* if (partial_copy) */ 2009 }
1849 } /* while (index < nentries) */ 2010
1850 } /* for (index = 0; index < nentries; ) */ 2011 ASSERT(len == 0);
1851 ASSERT(len == 0); 2012
2013 xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
2014 if (!commit_iclog)
2015 return xlog_state_release_iclog(log, iclog);
1852 2016
1853 xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
1854 if (commit_iclog) {
1855 ASSERT(flags & XLOG_COMMIT_TRANS); 2017 ASSERT(flags & XLOG_COMMIT_TRANS);
1856 *commit_iclog = iclog; 2018 *commit_iclog = iclog;
1857 return 0; 2019 return 0;
1858 } 2020}
1859 return xlog_state_release_iclog(log, iclog);
1860} /* xlog_write */
1861 2021
1862 2022
1863/***************************************************************************** 2023/*****************************************************************************
@@ -3157,14 +3317,16 @@ xfs_log_ticket_get(
3157 * Allocate and initialise a new log ticket. 3317 * Allocate and initialise a new log ticket.
3158 */ 3318 */
3159STATIC xlog_ticket_t * 3319STATIC xlog_ticket_t *
3160xlog_ticket_alloc(xlog_t *log, 3320xlog_ticket_alloc(
3161 int unit_bytes, 3321 struct log *log,
3162 int cnt, 3322 int unit_bytes,
3163 char client, 3323 int cnt,
3164 uint xflags) 3324 char client,
3325 uint xflags)
3165{ 3326{
3166 xlog_ticket_t *tic; 3327 struct xlog_ticket *tic;
3167 uint num_headers; 3328 uint num_headers;
3329 int iclog_space;
3168 3330
3169 tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL); 3331 tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL);
3170 if (!tic) 3332 if (!tic)
@@ -3208,16 +3370,40 @@ xlog_ticket_alloc(xlog_t *log,
3208 /* for start-rec */ 3370 /* for start-rec */
3209 unit_bytes += sizeof(xlog_op_header_t); 3371 unit_bytes += sizeof(xlog_op_header_t);
3210 3372
3211 /* for LR headers */ 3373 /*
3212 num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log); 3374 * for LR headers - the space for data in an iclog is the size minus
3375 * the space used for the headers. If we use the iclog size, then we
3376 * undercalculate the number of headers required.
3377 *
3378 * Furthermore - the addition of op headers for split-recs might
3379 * increase the space required enough to require more log and op
3380 * headers, so take that into account too.
3381 *
3382 * IMPORTANT: This reservation makes the assumption that if this
3383 * transaction is the first in an iclog and hence has the LR headers
3384 * accounted to it, then the remaining space in the iclog is
3385 * exclusively for this transaction. i.e. if the transaction is larger
3386 * than the iclog, it will be the only thing in that iclog.
3387 * Fundamentally, this means we must pass the entire log vector to
3388 * xlog_write to guarantee this.
3389 */
3390 iclog_space = log->l_iclog_size - log->l_iclog_hsize;
3391 num_headers = howmany(unit_bytes, iclog_space);
3392
3393 /* for split-recs - ophdrs added when data split over LRs */
3394 unit_bytes += sizeof(xlog_op_header_t) * num_headers;
3395
3396 /* add extra header reservations if we overrun */
3397 while (!num_headers ||
3398 howmany(unit_bytes, iclog_space) > num_headers) {
3399 unit_bytes += sizeof(xlog_op_header_t);
3400 num_headers++;
3401 }
3213 unit_bytes += log->l_iclog_hsize * num_headers; 3402 unit_bytes += log->l_iclog_hsize * num_headers;
3214 3403
3215 /* for commit-rec LR header - note: padding will subsume the ophdr */ 3404 /* for commit-rec LR header - note: padding will subsume the ophdr */
3216 unit_bytes += log->l_iclog_hsize; 3405 unit_bytes += log->l_iclog_hsize;
3217 3406
3218 /* for split-recs - ophdrs added when data split over LRs */
3219 unit_bytes += sizeof(xlog_op_header_t) * num_headers;
3220
3221 /* for roundoff padding for transaction data and one for commit record */ 3407 /* for roundoff padding for transaction data and one for commit record */
3222 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) && 3408 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
3223 log->l_mp->m_sb.sb_logsunit > 1) { 3409 log->l_mp->m_sb.sb_logsunit > 1) {
@@ -3233,13 +3419,13 @@ xlog_ticket_alloc(xlog_t *log,
3233 tic->t_curr_res = unit_bytes; 3419 tic->t_curr_res = unit_bytes;
3234 tic->t_cnt = cnt; 3420 tic->t_cnt = cnt;
3235 tic->t_ocnt = cnt; 3421 tic->t_ocnt = cnt;
3236 tic->t_tid = (xlog_tid_t)((__psint_t)tic & 0xffffffff); 3422 tic->t_tid = random32();
3237 tic->t_clientid = client; 3423 tic->t_clientid = client;
3238 tic->t_flags = XLOG_TIC_INITED; 3424 tic->t_flags = XLOG_TIC_INITED;
3239 tic->t_trans_type = 0; 3425 tic->t_trans_type = 0;
3240 if (xflags & XFS_LOG_PERM_RESERV) 3426 if (xflags & XFS_LOG_PERM_RESERV)
3241 tic->t_flags |= XLOG_TIC_PERM_RESERV; 3427 tic->t_flags |= XLOG_TIC_PERM_RESERV;
3242 sv_init(&(tic->t_wait), SV_DEFAULT, "logtick"); 3428 sv_init(&tic->t_wait, SV_DEFAULT, "logtick");
3243 3429
3244 xlog_tic_reset_res(tic); 3430 xlog_tic_reset_res(tic);
3245 3431
@@ -3260,20 +3446,22 @@ xlog_ticket_alloc(xlog_t *log,
3260 * part of the log in case we trash the log structure. 3446 * part of the log in case we trash the log structure.
3261 */ 3447 */
3262void 3448void
3263xlog_verify_dest_ptr(xlog_t *log, 3449xlog_verify_dest_ptr(
3264 __psint_t ptr) 3450 struct log *log,
3451 char *ptr)
3265{ 3452{
3266 int i; 3453 int i;
3267 int good_ptr = 0; 3454 int good_ptr = 0;
3268 3455
3269 for (i=0; i < log->l_iclog_bufs; i++) { 3456 for (i = 0; i < log->l_iclog_bufs; i++) {
3270 if (ptr >= (__psint_t)log->l_iclog_bak[i] && 3457 if (ptr >= log->l_iclog_bak[i] &&
3271 ptr <= (__psint_t)log->l_iclog_bak[i]+log->l_iclog_size) 3458 ptr <= log->l_iclog_bak[i] + log->l_iclog_size)
3272 good_ptr++; 3459 good_ptr++;
3273 } 3460 }
3274 if (! good_ptr) 3461
3462 if (!good_ptr)
3275 xlog_panic("xlog_verify_dest_ptr: invalid ptr"); 3463 xlog_panic("xlog_verify_dest_ptr: invalid ptr");
3276} /* xlog_verify_dest_ptr */ 3464}
3277 3465
3278STATIC void 3466STATIC void
3279xlog_verify_grant_head(xlog_t *log, int equals) 3467xlog_verify_grant_head(xlog_t *log, int equals)
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 97a24c7795a4..229d1f36ba9a 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -110,6 +110,12 @@ typedef struct xfs_log_iovec {
110 uint i_type; /* type of region */ 110 uint i_type; /* type of region */
111} xfs_log_iovec_t; 111} xfs_log_iovec_t;
112 112
113struct xfs_log_vec {
114 struct xfs_log_vec *lv_next; /* next lv in build list */
115 int lv_niovecs; /* number of iovecs in lv */
116 struct xfs_log_iovec *lv_iovecp; /* iovec array */
117};
118
113/* 119/*
114 * Structure used to pass callback function and the function's argument 120 * Structure used to pass callback function and the function's argument
115 * to the log manager. 121 * to the log manager.
@@ -126,6 +132,13 @@ typedef struct xfs_log_callback {
126struct xfs_mount; 132struct xfs_mount;
127struct xlog_in_core; 133struct xlog_in_core;
128struct xlog_ticket; 134struct xlog_ticket;
135struct xfs_log_item;
136struct xfs_item_ops;
137
138void xfs_log_item_init(struct xfs_mount *mp,
139 struct xfs_log_item *item,
140 int type,
141 struct xfs_item_ops *ops);
129 142
130xfs_lsn_t xfs_log_done(struct xfs_mount *mp, 143xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
131 struct xlog_ticket *ticket, 144 struct xlog_ticket *ticket,
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index fd02a18facd5..9cf695154451 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -396,9 +396,7 @@ typedef struct log {
396 struct xfs_buf_cancel **l_buf_cancel_table; 396 struct xfs_buf_cancel **l_buf_cancel_table;
397 int l_iclog_hsize; /* size of iclog header */ 397 int l_iclog_hsize; /* size of iclog header */
398 int l_iclog_heads; /* # of iclog header sectors */ 398 int l_iclog_heads; /* # of iclog header sectors */
399 uint l_sectbb_log; /* log2 of sector size in BBs */ 399 uint l_sectBBsize; /* sector size in BBs (2^n) */
400 uint l_sectbb_mask; /* sector size (in BBs)
401 * alignment mask */
402 int l_iclog_size; /* size of log in bytes */ 400 int l_iclog_size; /* size of log in bytes */
403 int l_iclog_size_log; /* log power size of log */ 401 int l_iclog_size_log; /* log power size of log */
404 int l_iclog_bufs; /* number of iclog buffers */ 402 int l_iclog_bufs; /* number of iclog buffers */
@@ -449,6 +447,14 @@ extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
449 447
450extern kmem_zone_t *xfs_log_ticket_zone; 448extern kmem_zone_t *xfs_log_ticket_zone;
451 449
450static inline void
451xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
452{
453 *ptr += bytes;
454 *len -= bytes;
455 *off += bytes;
456}
457
452/* 458/*
453 * Unmount record type is used as a pseudo transaction type for the ticket. 459 * Unmount record type is used as a pseudo transaction type for the ticket.
454 * It's value must be outside the range of XFS_TRANS_* values. 460 * It's value must be outside the range of XFS_TRANS_* values.
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 22e6efdc17ea..0de08e366315 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -56,33 +56,61 @@ STATIC void xlog_recover_check_summary(xlog_t *);
56#define xlog_recover_check_summary(log) 56#define xlog_recover_check_summary(log)
57#endif 57#endif
58 58
59
60/* 59/*
61 * Sector aligned buffer routines for buffer create/read/write/access 60 * Sector aligned buffer routines for buffer create/read/write/access
62 */ 61 */
63 62
64#define XLOG_SECTOR_ROUNDUP_BBCOUNT(log, bbs) \ 63/*
65 ( ((log)->l_sectbb_mask && (bbs & (log)->l_sectbb_mask)) ? \ 64 * Verify the given count of basic blocks is valid number of blocks
66 ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) ) 65 * to specify for an operation involving the given XFS log buffer.
67#define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno) ((bno) & ~(log)->l_sectbb_mask) 66 * Returns nonzero if the count is valid, 0 otherwise.
67 */
68 68
69static inline int
70xlog_buf_bbcount_valid(
71 xlog_t *log,
72 int bbcount)
73{
74 return bbcount > 0 && bbcount <= log->l_logBBsize;
75}
76
77/*
78 * Allocate a buffer to hold log data. The buffer needs to be able
79 * to map to a range of nbblks basic blocks at any valid (basic
80 * block) offset within the log.
81 */
69STATIC xfs_buf_t * 82STATIC xfs_buf_t *
70xlog_get_bp( 83xlog_get_bp(
71 xlog_t *log, 84 xlog_t *log,
72 int nbblks) 85 int nbblks)
73{ 86{
74 if (nbblks <= 0 || nbblks > log->l_logBBsize) { 87 if (!xlog_buf_bbcount_valid(log, nbblks)) {
75 xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks); 88 xlog_warn("XFS: Invalid block length (0x%x) given for buffer",
76 XFS_ERROR_REPORT("xlog_get_bp(1)", 89 nbblks);
77 XFS_ERRLEVEL_HIGH, log->l_mp); 90 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
78 return NULL; 91 return NULL;
79 } 92 }
80 93
81 if (log->l_sectbb_log) { 94 /*
82 if (nbblks > 1) 95 * We do log I/O in units of log sectors (a power-of-2
83 nbblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); 96 * multiple of the basic block size), so we round up the
84 nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks); 97 * requested size to acommodate the basic blocks required
85 } 98 * for complete log sectors.
99 *
100 * In addition, the buffer may be used for a non-sector-
101 * aligned block offset, in which case an I/O of the
102 * requested size could extend beyond the end of the
103 * buffer. If the requested size is only 1 basic block it
104 * will never straddle a sector boundary, so this won't be
105 * an issue. Nor will this be a problem if the log I/O is
106 * done in basic blocks (sector size 1). But otherwise we
107 * extend the buffer by one extra log sector to ensure
108 * there's space to accomodate this possiblility.
109 */
110 if (nbblks > 1 && log->l_sectBBsize > 1)
111 nbblks += log->l_sectBBsize;
112 nbblks = round_up(nbblks, log->l_sectBBsize);
113
86 return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp); 114 return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp);
87} 115}
88 116
@@ -93,6 +121,10 @@ xlog_put_bp(
93 xfs_buf_free(bp); 121 xfs_buf_free(bp);
94} 122}
95 123
124/*
125 * Return the address of the start of the given block number's data
126 * in a log buffer. The buffer covers a log sector-aligned region.
127 */
96STATIC xfs_caddr_t 128STATIC xfs_caddr_t
97xlog_align( 129xlog_align(
98 xlog_t *log, 130 xlog_t *log,
@@ -100,14 +132,14 @@ xlog_align(
100 int nbblks, 132 int nbblks,
101 xfs_buf_t *bp) 133 xfs_buf_t *bp)
102{ 134{
135 xfs_daddr_t offset;
103 xfs_caddr_t ptr; 136 xfs_caddr_t ptr;
104 137
105 if (!log->l_sectbb_log) 138 offset = blk_no & ((xfs_daddr_t) log->l_sectBBsize - 1);
106 return XFS_BUF_PTR(bp); 139 ptr = XFS_BUF_PTR(bp) + BBTOB(offset);
140
141 ASSERT(ptr + BBTOB(nbblks) <= XFS_BUF_PTR(bp) + XFS_BUF_SIZE(bp));
107 142
108 ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask);
109 ASSERT(XFS_BUF_SIZE(bp) >=
110 BBTOB(nbblks + (blk_no & log->l_sectbb_mask)));
111 return ptr; 143 return ptr;
112} 144}
113 145
@@ -124,21 +156,18 @@ xlog_bread_noalign(
124{ 156{
125 int error; 157 int error;
126 158
127 if (nbblks <= 0 || nbblks > log->l_logBBsize) { 159 if (!xlog_buf_bbcount_valid(log, nbblks)) {
128 xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks); 160 xlog_warn("XFS: Invalid block length (0x%x) given for buffer",
129 XFS_ERROR_REPORT("xlog_bread(1)", 161 nbblks);
130 XFS_ERRLEVEL_HIGH, log->l_mp); 162 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
131 return EFSCORRUPTED; 163 return EFSCORRUPTED;
132 } 164 }
133 165
134 if (log->l_sectbb_log) { 166 blk_no = round_down(blk_no, log->l_sectBBsize);
135 blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); 167 nbblks = round_up(nbblks, log->l_sectBBsize);
136 nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
137 }
138 168
139 ASSERT(nbblks > 0); 169 ASSERT(nbblks > 0);
140 ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); 170 ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
141 ASSERT(bp);
142 171
143 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); 172 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
144 XFS_BUF_READ(bp); 173 XFS_BUF_READ(bp);
@@ -186,17 +215,15 @@ xlog_bwrite(
186{ 215{
187 int error; 216 int error;
188 217
189 if (nbblks <= 0 || nbblks > log->l_logBBsize) { 218 if (!xlog_buf_bbcount_valid(log, nbblks)) {
190 xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks); 219 xlog_warn("XFS: Invalid block length (0x%x) given for buffer",
191 XFS_ERROR_REPORT("xlog_bwrite(1)", 220 nbblks);
192 XFS_ERRLEVEL_HIGH, log->l_mp); 221 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
193 return EFSCORRUPTED; 222 return EFSCORRUPTED;
194 } 223 }
195 224
196 if (log->l_sectbb_log) { 225 blk_no = round_down(blk_no, log->l_sectBBsize);
197 blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no); 226 nbblks = round_up(nbblks, log->l_sectBBsize);
198 nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
199 }
200 227
201 ASSERT(nbblks > 0); 228 ASSERT(nbblks > 0);
202 ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp)); 229 ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
@@ -327,39 +354,38 @@ xlog_find_cycle_start(
327{ 354{
328 xfs_caddr_t offset; 355 xfs_caddr_t offset;
329 xfs_daddr_t mid_blk; 356 xfs_daddr_t mid_blk;
357 xfs_daddr_t end_blk;
330 uint mid_cycle; 358 uint mid_cycle;
331 int error; 359 int error;
332 360
333 mid_blk = BLK_AVG(first_blk, *last_blk); 361 end_blk = *last_blk;
334 while (mid_blk != first_blk && mid_blk != *last_blk) { 362 mid_blk = BLK_AVG(first_blk, end_blk);
363 while (mid_blk != first_blk && mid_blk != end_blk) {
335 error = xlog_bread(log, mid_blk, 1, bp, &offset); 364 error = xlog_bread(log, mid_blk, 1, bp, &offset);
336 if (error) 365 if (error)
337 return error; 366 return error;
338 mid_cycle = xlog_get_cycle(offset); 367 mid_cycle = xlog_get_cycle(offset);
339 if (mid_cycle == cycle) { 368 if (mid_cycle == cycle)
340 *last_blk = mid_blk; 369 end_blk = mid_blk; /* last_half_cycle == mid_cycle */
341 /* last_half_cycle == mid_cycle */ 370 else
342 } else { 371 first_blk = mid_blk; /* first_half_cycle == mid_cycle */
343 first_blk = mid_blk; 372 mid_blk = BLK_AVG(first_blk, end_blk);
344 /* first_half_cycle == mid_cycle */
345 }
346 mid_blk = BLK_AVG(first_blk, *last_blk);
347 } 373 }
348 ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) || 374 ASSERT((mid_blk == first_blk && mid_blk+1 == end_blk) ||
349 (mid_blk == *last_blk && mid_blk-1 == first_blk)); 375 (mid_blk == end_blk && mid_blk-1 == first_blk));
376
377 *last_blk = end_blk;
350 378
351 return 0; 379 return 0;
352} 380}
353 381
354/* 382/*
355 * Check that the range of blocks does not contain the cycle number 383 * Check that a range of blocks does not contain stop_on_cycle_no.
356 * given. The scan needs to occur from front to back and the ptr into the 384 * Fill in *new_blk with the block offset where such a block is
357 * region must be updated since a later routine will need to perform another 385 * found, or with -1 (an invalid block number) if there is no such
358 * test. If the region is completely good, we end up returning the same 386 * block in the range. The scan needs to occur from front to back
359 * last block number. 387 * and the pointer into the region must be updated since a later
360 * 388 * routine will need to perform another test.
361 * Set blkno to -1 if we encounter no errors. This is an invalid block number
362 * since we don't ever expect logs to get this large.
363 */ 389 */
364STATIC int 390STATIC int
365xlog_find_verify_cycle( 391xlog_find_verify_cycle(
@@ -376,12 +402,16 @@ xlog_find_verify_cycle(
376 xfs_caddr_t buf = NULL; 402 xfs_caddr_t buf = NULL;
377 int error = 0; 403 int error = 0;
378 404
405 /*
406 * Greedily allocate a buffer big enough to handle the full
407 * range of basic blocks we'll be examining. If that fails,
408 * try a smaller size. We need to be able to read at least
409 * a log sector, or we're out of luck.
410 */
379 bufblks = 1 << ffs(nbblks); 411 bufblks = 1 << ffs(nbblks);
380
381 while (!(bp = xlog_get_bp(log, bufblks))) { 412 while (!(bp = xlog_get_bp(log, bufblks))) {
382 /* can't get enough memory to do everything in one big buffer */
383 bufblks >>= 1; 413 bufblks >>= 1;
384 if (bufblks <= log->l_sectbb_log) 414 if (bufblks < log->l_sectBBsize)
385 return ENOMEM; 415 return ENOMEM;
386 } 416 }
387 417
@@ -629,7 +659,7 @@ xlog_find_head(
629 * In this case we want to find the first block with cycle 659 * In this case we want to find the first block with cycle
630 * number matching last_half_cycle. We expect the log to be 660 * number matching last_half_cycle. We expect the log to be
631 * some variation on 661 * some variation on
632 * x + 1 ... | x ... 662 * x + 1 ... | x ... | x
633 * The first block with cycle number x (last_half_cycle) will 663 * The first block with cycle number x (last_half_cycle) will
634 * be where the new head belongs. First we do a binary search 664 * be where the new head belongs. First we do a binary search
635 * for the first occurrence of last_half_cycle. The binary 665 * for the first occurrence of last_half_cycle. The binary
@@ -639,11 +669,13 @@ xlog_find_head(
639 * the log, then we look for occurrences of last_half_cycle - 1 669 * the log, then we look for occurrences of last_half_cycle - 1
640 * at the end of the log. The cases we're looking for look 670 * at the end of the log. The cases we're looking for look
641 * like 671 * like
642 * x + 1 ... | x | x + 1 | x ... 672 * v binary search stopped here
643 * ^ binary search stopped here 673 * x + 1 ... | x | x + 1 | x ... | x
674 * ^ but we want to locate this spot
644 * or 675 * or
645 * x + 1 ... | x ... | x - 1 | x
646 * <---------> less than scan distance 676 * <---------> less than scan distance
677 * x + 1 ... | x ... | x - 1 | x
678 * ^ we want to locate this spot
647 */ 679 */
648 stop_on_cycle = last_half_cycle; 680 stop_on_cycle = last_half_cycle;
649 if ((error = xlog_find_cycle_start(log, bp, first_blk, 681 if ((error = xlog_find_cycle_start(log, bp, first_blk,
@@ -699,16 +731,16 @@ xlog_find_head(
699 * certainly not the head of the log. By searching for 731 * certainly not the head of the log. By searching for
700 * last_half_cycle-1 we accomplish that. 732 * last_half_cycle-1 we accomplish that.
701 */ 733 */
702 start_blk = log_bbnum - num_scan_bblks + head_blk;
703 ASSERT(head_blk <= INT_MAX && 734 ASSERT(head_blk <= INT_MAX &&
704 (xfs_daddr_t) num_scan_bblks - head_blk >= 0); 735 (xfs_daddr_t) num_scan_bblks >= head_blk);
736 start_blk = log_bbnum - (num_scan_bblks - head_blk);
705 if ((error = xlog_find_verify_cycle(log, start_blk, 737 if ((error = xlog_find_verify_cycle(log, start_blk,
706 num_scan_bblks - (int)head_blk, 738 num_scan_bblks - (int)head_blk,
707 (stop_on_cycle - 1), &new_blk))) 739 (stop_on_cycle - 1), &new_blk)))
708 goto bp_err; 740 goto bp_err;
709 if (new_blk != -1) { 741 if (new_blk != -1) {
710 head_blk = new_blk; 742 head_blk = new_blk;
711 goto bad_blk; 743 goto validate_head;
712 } 744 }
713 745
714 /* 746 /*
@@ -726,7 +758,7 @@ xlog_find_head(
726 head_blk = new_blk; 758 head_blk = new_blk;
727 } 759 }
728 760
729 bad_blk: 761validate_head:
730 /* 762 /*
731 * Now we need to make sure head_blk is not pointing to a block in 763 * Now we need to make sure head_blk is not pointing to a block in
732 * the middle of a log record. 764 * the middle of a log record.
@@ -748,7 +780,7 @@ xlog_find_head(
748 if ((error = xlog_find_verify_log_record(log, start_blk, 780 if ((error = xlog_find_verify_log_record(log, start_blk,
749 &head_blk, 0)) == -1) { 781 &head_blk, 0)) == -1) {
750 /* We hit the beginning of the log during our search */ 782 /* We hit the beginning of the log during our search */
751 start_blk = log_bbnum - num_scan_bblks + head_blk; 783 start_blk = log_bbnum - (num_scan_bblks - head_blk);
752 new_blk = log_bbnum; 784 new_blk = log_bbnum;
753 ASSERT(start_blk <= INT_MAX && 785 ASSERT(start_blk <= INT_MAX &&
754 (xfs_daddr_t) log_bbnum-start_blk >= 0); 786 (xfs_daddr_t) log_bbnum-start_blk >= 0);
@@ -833,12 +865,12 @@ xlog_find_tail(
833 if (*head_blk == 0) { /* special case */ 865 if (*head_blk == 0) { /* special case */
834 error = xlog_bread(log, 0, 1, bp, &offset); 866 error = xlog_bread(log, 0, 1, bp, &offset);
835 if (error) 867 if (error)
836 goto bread_err; 868 goto done;
837 869
838 if (xlog_get_cycle(offset) == 0) { 870 if (xlog_get_cycle(offset) == 0) {
839 *tail_blk = 0; 871 *tail_blk = 0;
840 /* leave all other log inited values alone */ 872 /* leave all other log inited values alone */
841 goto exit; 873 goto done;
842 } 874 }
843 } 875 }
844 876
@@ -849,7 +881,7 @@ xlog_find_tail(
849 for (i = (int)(*head_blk) - 1; i >= 0; i--) { 881 for (i = (int)(*head_blk) - 1; i >= 0; i--) {
850 error = xlog_bread(log, i, 1, bp, &offset); 882 error = xlog_bread(log, i, 1, bp, &offset);
851 if (error) 883 if (error)
852 goto bread_err; 884 goto done;
853 885
854 if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) { 886 if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) {
855 found = 1; 887 found = 1;
@@ -866,7 +898,7 @@ xlog_find_tail(
866 for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) { 898 for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
867 error = xlog_bread(log, i, 1, bp, &offset); 899 error = xlog_bread(log, i, 1, bp, &offset);
868 if (error) 900 if (error)
869 goto bread_err; 901 goto done;
870 902
871 if (XLOG_HEADER_MAGIC_NUM == 903 if (XLOG_HEADER_MAGIC_NUM ==
872 be32_to_cpu(*(__be32 *)offset)) { 904 be32_to_cpu(*(__be32 *)offset)) {
@@ -941,7 +973,7 @@ xlog_find_tail(
941 umount_data_blk = (i + hblks) % log->l_logBBsize; 973 umount_data_blk = (i + hblks) % log->l_logBBsize;
942 error = xlog_bread(log, umount_data_blk, 1, bp, &offset); 974 error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
943 if (error) 975 if (error)
944 goto bread_err; 976 goto done;
945 977
946 op_head = (xlog_op_header_t *)offset; 978 op_head = (xlog_op_header_t *)offset;
947 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { 979 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
@@ -987,12 +1019,10 @@ xlog_find_tail(
987 * But... if the -device- itself is readonly, just skip this. 1019 * But... if the -device- itself is readonly, just skip this.
988 * We can't recover this device anyway, so it won't matter. 1020 * We can't recover this device anyway, so it won't matter.
989 */ 1021 */
990 if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { 1022 if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp))
991 error = xlog_clear_stale_blocks(log, tail_lsn); 1023 error = xlog_clear_stale_blocks(log, tail_lsn);
992 }
993 1024
994bread_err: 1025done:
995exit:
996 xlog_put_bp(bp); 1026 xlog_put_bp(bp);
997 1027
998 if (error) 1028 if (error)
@@ -1152,16 +1182,22 @@ xlog_write_log_records(
1152 xfs_caddr_t offset; 1182 xfs_caddr_t offset;
1153 xfs_buf_t *bp; 1183 xfs_buf_t *bp;
1154 int balign, ealign; 1184 int balign, ealign;
1155 int sectbb = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1); 1185 int sectbb = log->l_sectBBsize;
1156 int end_block = start_block + blocks; 1186 int end_block = start_block + blocks;
1157 int bufblks; 1187 int bufblks;
1158 int error = 0; 1188 int error = 0;
1159 int i, j = 0; 1189 int i, j = 0;
1160 1190
1191 /*
1192 * Greedily allocate a buffer big enough to handle the full
1193 * range of basic blocks to be written. If that fails, try
1194 * a smaller size. We need to be able to write at least a
1195 * log sector, or we're out of luck.
1196 */
1161 bufblks = 1 << ffs(blocks); 1197 bufblks = 1 << ffs(blocks);
1162 while (!(bp = xlog_get_bp(log, bufblks))) { 1198 while (!(bp = xlog_get_bp(log, bufblks))) {
1163 bufblks >>= 1; 1199 bufblks >>= 1;
1164 if (bufblks <= log->l_sectbb_log) 1200 if (bufblks < sectbb)
1165 return ENOMEM; 1201 return ENOMEM;
1166 } 1202 }
1167 1203
@@ -1169,7 +1205,7 @@ xlog_write_log_records(
1169 * the buffer in the starting sector not covered by the first 1205 * the buffer in the starting sector not covered by the first
1170 * write below. 1206 * write below.
1171 */ 1207 */
1172 balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block); 1208 balign = round_down(start_block, sectbb);
1173 if (balign != start_block) { 1209 if (balign != start_block) {
1174 error = xlog_bread_noalign(log, start_block, 1, bp); 1210 error = xlog_bread_noalign(log, start_block, 1, bp);
1175 if (error) 1211 if (error)
@@ -1188,7 +1224,7 @@ xlog_write_log_records(
1188 * the buffer in the final sector not covered by the write. 1224 * the buffer in the final sector not covered by the write.
1189 * If this is the same sector as the above read, skip it. 1225 * If this is the same sector as the above read, skip it.
1190 */ 1226 */
1191 ealign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, end_block); 1227 ealign = round_down(end_block, sectbb);
1192 if (j == 0 && (start_block + endcount > ealign)) { 1228 if (j == 0 && (start_block + endcount > ealign)) {
1193 offset = XFS_BUF_PTR(bp); 1229 offset = XFS_BUF_PTR(bp);
1194 balign = BBTOB(ealign - start_block); 1230 balign = BBTOB(ealign - start_block);
@@ -1408,6 +1444,7 @@ xlog_recover_add_item(
1408 1444
1409STATIC int 1445STATIC int
1410xlog_recover_add_to_cont_trans( 1446xlog_recover_add_to_cont_trans(
1447 struct log *log,
1411 xlog_recover_t *trans, 1448 xlog_recover_t *trans,
1412 xfs_caddr_t dp, 1449 xfs_caddr_t dp,
1413 int len) 1450 int len)
@@ -1434,6 +1471,7 @@ xlog_recover_add_to_cont_trans(
1434 memcpy(&ptr[old_len], dp, len); /* d, s, l */ 1471 memcpy(&ptr[old_len], dp, len); /* d, s, l */
1435 item->ri_buf[item->ri_cnt-1].i_len += len; 1472 item->ri_buf[item->ri_cnt-1].i_len += len;
1436 item->ri_buf[item->ri_cnt-1].i_addr = ptr; 1473 item->ri_buf[item->ri_cnt-1].i_addr = ptr;
1474 trace_xfs_log_recover_item_add_cont(log, trans, item, 0);
1437 return 0; 1475 return 0;
1438} 1476}
1439 1477
@@ -1452,6 +1490,7 @@ xlog_recover_add_to_cont_trans(
1452 */ 1490 */
1453STATIC int 1491STATIC int
1454xlog_recover_add_to_trans( 1492xlog_recover_add_to_trans(
1493 struct log *log,
1455 xlog_recover_t *trans, 1494 xlog_recover_t *trans,
1456 xfs_caddr_t dp, 1495 xfs_caddr_t dp,
1457 int len) 1496 int len)
@@ -1510,6 +1549,7 @@ xlog_recover_add_to_trans(
1510 item->ri_buf[item->ri_cnt].i_addr = ptr; 1549 item->ri_buf[item->ri_cnt].i_addr = ptr;
1511 item->ri_buf[item->ri_cnt].i_len = len; 1550 item->ri_buf[item->ri_cnt].i_len = len;
1512 item->ri_cnt++; 1551 item->ri_cnt++;
1552 trace_xfs_log_recover_item_add(log, trans, item, 0);
1513 return 0; 1553 return 0;
1514} 1554}
1515 1555
@@ -1521,7 +1561,9 @@ xlog_recover_add_to_trans(
1521 */ 1561 */
1522STATIC int 1562STATIC int
1523xlog_recover_reorder_trans( 1563xlog_recover_reorder_trans(
1524 xlog_recover_t *trans) 1564 struct log *log,
1565 xlog_recover_t *trans,
1566 int pass)
1525{ 1567{
1526 xlog_recover_item_t *item, *n; 1568 xlog_recover_item_t *item, *n;
1527 LIST_HEAD(sort_list); 1569 LIST_HEAD(sort_list);
@@ -1535,6 +1577,8 @@ xlog_recover_reorder_trans(
1535 switch (ITEM_TYPE(item)) { 1577 switch (ITEM_TYPE(item)) {
1536 case XFS_LI_BUF: 1578 case XFS_LI_BUF:
1537 if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) { 1579 if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) {
1580 trace_xfs_log_recover_item_reorder_head(log,
1581 trans, item, pass);
1538 list_move(&item->ri_list, &trans->r_itemq); 1582 list_move(&item->ri_list, &trans->r_itemq);
1539 break; 1583 break;
1540 } 1584 }
@@ -1543,6 +1587,8 @@ xlog_recover_reorder_trans(
1543 case XFS_LI_QUOTAOFF: 1587 case XFS_LI_QUOTAOFF:
1544 case XFS_LI_EFD: 1588 case XFS_LI_EFD:
1545 case XFS_LI_EFI: 1589 case XFS_LI_EFI:
1590 trace_xfs_log_recover_item_reorder_tail(log,
1591 trans, item, pass);
1546 list_move_tail(&item->ri_list, &trans->r_itemq); 1592 list_move_tail(&item->ri_list, &trans->r_itemq);
1547 break; 1593 break;
1548 default: 1594 default:
@@ -1592,8 +1638,10 @@ xlog_recover_do_buffer_pass1(
1592 /* 1638 /*
1593 * If this isn't a cancel buffer item, then just return. 1639 * If this isn't a cancel buffer item, then just return.
1594 */ 1640 */
1595 if (!(flags & XFS_BLI_CANCEL)) 1641 if (!(flags & XFS_BLI_CANCEL)) {
1642 trace_xfs_log_recover_buf_not_cancel(log, buf_f);
1596 return; 1643 return;
1644 }
1597 1645
1598 /* 1646 /*
1599 * Insert an xfs_buf_cancel record into the hash table of 1647 * Insert an xfs_buf_cancel record into the hash table of
@@ -1627,6 +1675,7 @@ xlog_recover_do_buffer_pass1(
1627 while (nextp != NULL) { 1675 while (nextp != NULL) {
1628 if (nextp->bc_blkno == blkno && nextp->bc_len == len) { 1676 if (nextp->bc_blkno == blkno && nextp->bc_len == len) {
1629 nextp->bc_refcount++; 1677 nextp->bc_refcount++;
1678 trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f);
1630 return; 1679 return;
1631 } 1680 }
1632 prevp = nextp; 1681 prevp = nextp;
@@ -1640,6 +1689,7 @@ xlog_recover_do_buffer_pass1(
1640 bcp->bc_refcount = 1; 1689 bcp->bc_refcount = 1;
1641 bcp->bc_next = NULL; 1690 bcp->bc_next = NULL;
1642 prevp->bc_next = bcp; 1691 prevp->bc_next = bcp;
1692 trace_xfs_log_recover_buf_cancel_add(log, buf_f);
1643} 1693}
1644 1694
1645/* 1695/*
@@ -1779,6 +1829,8 @@ xlog_recover_do_inode_buffer(
1779 unsigned int *data_map = NULL; 1829 unsigned int *data_map = NULL;
1780 unsigned int map_size = 0; 1830 unsigned int map_size = 0;
1781 1831
1832 trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
1833
1782 switch (buf_f->blf_type) { 1834 switch (buf_f->blf_type) {
1783 case XFS_LI_BUF: 1835 case XFS_LI_BUF:
1784 data_map = buf_f->blf_data_map; 1836 data_map = buf_f->blf_data_map;
@@ -1874,6 +1926,7 @@ xlog_recover_do_inode_buffer(
1874/*ARGSUSED*/ 1926/*ARGSUSED*/
1875STATIC void 1927STATIC void
1876xlog_recover_do_reg_buffer( 1928xlog_recover_do_reg_buffer(
1929 struct xfs_mount *mp,
1877 xlog_recover_item_t *item, 1930 xlog_recover_item_t *item,
1878 xfs_buf_t *bp, 1931 xfs_buf_t *bp,
1879 xfs_buf_log_format_t *buf_f) 1932 xfs_buf_log_format_t *buf_f)
@@ -1885,6 +1938,8 @@ xlog_recover_do_reg_buffer(
1885 unsigned int map_size = 0; 1938 unsigned int map_size = 0;
1886 int error; 1939 int error;
1887 1940
1941 trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
1942
1888 switch (buf_f->blf_type) { 1943 switch (buf_f->blf_type) {
1889 case XFS_LI_BUF: 1944 case XFS_LI_BUF:
1890 data_map = buf_f->blf_data_map; 1945 data_map = buf_f->blf_data_map;
@@ -2083,6 +2138,8 @@ xlog_recover_do_dquot_buffer(
2083{ 2138{
2084 uint type; 2139 uint type;
2085 2140
2141 trace_xfs_log_recover_buf_dquot_buf(log, buf_f);
2142
2086 /* 2143 /*
2087 * Filesystems are required to send in quota flags at mount time. 2144 * Filesystems are required to send in quota flags at mount time.
2088 */ 2145 */
@@ -2103,7 +2160,7 @@ xlog_recover_do_dquot_buffer(
2103 if (log->l_quotaoffs_flag & type) 2160 if (log->l_quotaoffs_flag & type)
2104 return; 2161 return;
2105 2162
2106 xlog_recover_do_reg_buffer(item, bp, buf_f); 2163 xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
2107} 2164}
2108 2165
2109/* 2166/*
@@ -2164,9 +2221,11 @@ xlog_recover_do_buffer_trans(
2164 */ 2221 */
2165 cancel = xlog_recover_do_buffer_pass2(log, buf_f); 2222 cancel = xlog_recover_do_buffer_pass2(log, buf_f);
2166 if (cancel) { 2223 if (cancel) {
2224 trace_xfs_log_recover_buf_cancel(log, buf_f);
2167 return 0; 2225 return 0;
2168 } 2226 }
2169 } 2227 }
2228 trace_xfs_log_recover_buf_recover(log, buf_f);
2170 switch (buf_f->blf_type) { 2229 switch (buf_f->blf_type) {
2171 case XFS_LI_BUF: 2230 case XFS_LI_BUF:
2172 blkno = buf_f->blf_blkno; 2231 blkno = buf_f->blf_blkno;
@@ -2204,7 +2263,7 @@ xlog_recover_do_buffer_trans(
2204 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { 2263 (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
2205 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); 2264 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
2206 } else { 2265 } else {
2207 xlog_recover_do_reg_buffer(item, bp, buf_f); 2266 xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
2208 } 2267 }
2209 if (error) 2268 if (error)
2210 return XFS_ERROR(error); 2269 return XFS_ERROR(error);
@@ -2284,8 +2343,10 @@ xlog_recover_do_inode_trans(
2284 if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno, 2343 if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno,
2285 in_f->ilf_len, 0)) { 2344 in_f->ilf_len, 0)) {
2286 error = 0; 2345 error = 0;
2346 trace_xfs_log_recover_inode_cancel(log, in_f);
2287 goto error; 2347 goto error;
2288 } 2348 }
2349 trace_xfs_log_recover_inode_recover(log, in_f);
2289 2350
2290 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 2351 bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
2291 XBF_LOCK); 2352 XBF_LOCK);
@@ -2337,6 +2398,7 @@ xlog_recover_do_inode_trans(
2337 /* do nothing */ 2398 /* do nothing */
2338 } else { 2399 } else {
2339 xfs_buf_relse(bp); 2400 xfs_buf_relse(bp);
2401 trace_xfs_log_recover_inode_skip(log, in_f);
2340 error = 0; 2402 error = 0;
2341 goto error; 2403 goto error;
2342 } 2404 }
@@ -2758,11 +2820,12 @@ xlog_recover_do_trans(
2758 int error = 0; 2820 int error = 0;
2759 xlog_recover_item_t *item; 2821 xlog_recover_item_t *item;
2760 2822
2761 error = xlog_recover_reorder_trans(trans); 2823 error = xlog_recover_reorder_trans(log, trans, pass);
2762 if (error) 2824 if (error)
2763 return error; 2825 return error;
2764 2826
2765 list_for_each_entry(item, &trans->r_itemq, ri_list) { 2827 list_for_each_entry(item, &trans->r_itemq, ri_list) {
2828 trace_xfs_log_recover_item_recover(log, trans, item, pass);
2766 switch (ITEM_TYPE(item)) { 2829 switch (ITEM_TYPE(item)) {
2767 case XFS_LI_BUF: 2830 case XFS_LI_BUF:
2768 error = xlog_recover_do_buffer_trans(log, item, pass); 2831 error = xlog_recover_do_buffer_trans(log, item, pass);
@@ -2919,8 +2982,9 @@ xlog_recover_process_data(
2919 error = xlog_recover_unmount_trans(trans); 2982 error = xlog_recover_unmount_trans(trans);
2920 break; 2983 break;
2921 case XLOG_WAS_CONT_TRANS: 2984 case XLOG_WAS_CONT_TRANS:
2922 error = xlog_recover_add_to_cont_trans(trans, 2985 error = xlog_recover_add_to_cont_trans(log,
2923 dp, be32_to_cpu(ohead->oh_len)); 2986 trans, dp,
2987 be32_to_cpu(ohead->oh_len));
2924 break; 2988 break;
2925 case XLOG_START_TRANS: 2989 case XLOG_START_TRANS:
2926 xlog_warn( 2990 xlog_warn(
@@ -2930,7 +2994,7 @@ xlog_recover_process_data(
2930 break; 2994 break;
2931 case 0: 2995 case 0:
2932 case XLOG_CONTINUE_TRANS: 2996 case XLOG_CONTINUE_TRANS:
2933 error = xlog_recover_add_to_trans(trans, 2997 error = xlog_recover_add_to_trans(log, trans,
2934 dp, be32_to_cpu(ohead->oh_len)); 2998 dp, be32_to_cpu(ohead->oh_len));
2935 break; 2999 break;
2936 default: 3000 default:
@@ -3331,42 +3395,6 @@ xlog_pack_data(
3331 } 3395 }
3332} 3396}
3333 3397
3334#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
3335STATIC void
3336xlog_unpack_data_checksum(
3337 xlog_rec_header_t *rhead,
3338 xfs_caddr_t dp,
3339 xlog_t *log)
3340{
3341 __be32 *up = (__be32 *)dp;
3342 uint chksum = 0;
3343 int i;
3344
3345 /* divide length by 4 to get # words */
3346 for (i=0; i < be32_to_cpu(rhead->h_len) >> 2; i++) {
3347 chksum ^= be32_to_cpu(*up);
3348 up++;
3349 }
3350 if (chksum != be32_to_cpu(rhead->h_chksum)) {
3351 if (rhead->h_chksum ||
3352 ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) {
3353 cmn_err(CE_DEBUG,
3354 "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n",
3355 be32_to_cpu(rhead->h_chksum), chksum);
3356 cmn_err(CE_DEBUG,
3357"XFS: Disregard message if filesystem was created with non-DEBUG kernel");
3358 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
3359 cmn_err(CE_DEBUG,
3360 "XFS: LogR this is a LogV2 filesystem\n");
3361 }
3362 log->l_flags |= XLOG_CHKSUM_MISMATCH;
3363 }
3364 }
3365}
3366#else
3367#define xlog_unpack_data_checksum(rhead, dp, log)
3368#endif
3369
3370STATIC void 3398STATIC void
3371xlog_unpack_data( 3399xlog_unpack_data(
3372 xlog_rec_header_t *rhead, 3400 xlog_rec_header_t *rhead,
@@ -3390,8 +3418,6 @@ xlog_unpack_data(
3390 dp += BBSIZE; 3418 dp += BBSIZE;
3391 } 3419 }
3392 } 3420 }
3393
3394 xlog_unpack_data_checksum(rhead, dp, log);
3395} 3421}
3396 3422
3397STATIC int 3423STATIC int
@@ -3490,7 +3516,7 @@ xlog_do_recovery_pass(
3490 hblks = 1; 3516 hblks = 1;
3491 } 3517 }
3492 } else { 3518 } else {
3493 ASSERT(log->l_sectbb_log == 0); 3519 ASSERT(log->l_sectBBsize == 1);
3494 hblks = 1; 3520 hblks = 1;
3495 hbp = xlog_get_bp(log, 1); 3521 hbp = xlog_get_bp(log, 1);
3496 h_size = XLOG_BIG_RECORD_BSIZE; 3522 h_size = XLOG_BIG_RECORD_BSIZE;
@@ -3946,10 +3972,6 @@ xlog_recover_check_summary(
3946 xfs_agf_t *agfp; 3972 xfs_agf_t *agfp;
3947 xfs_buf_t *agfbp; 3973 xfs_buf_t *agfbp;
3948 xfs_buf_t *agibp; 3974 xfs_buf_t *agibp;
3949 xfs_buf_t *sbbp;
3950#ifdef XFS_LOUD_RECOVERY
3951 xfs_sb_t *sbp;
3952#endif
3953 xfs_agnumber_t agno; 3975 xfs_agnumber_t agno;
3954 __uint64_t freeblks; 3976 __uint64_t freeblks;
3955 __uint64_t itotal; 3977 __uint64_t itotal;
@@ -3984,30 +4006,5 @@ xlog_recover_check_summary(
3984 xfs_buf_relse(agibp); 4006 xfs_buf_relse(agibp);
3985 } 4007 }
3986 } 4008 }
3987
3988 sbbp = xfs_getsb(mp, 0);
3989#ifdef XFS_LOUD_RECOVERY
3990 sbp = &mp->m_sb;
3991 xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(sbbp));
3992 cmn_err(CE_NOTE,
3993 "xlog_recover_check_summary: sb_icount %Lu itotal %Lu",
3994 sbp->sb_icount, itotal);
3995 cmn_err(CE_NOTE,
3996 "xlog_recover_check_summary: sb_ifree %Lu itotal %Lu",
3997 sbp->sb_ifree, ifree);
3998 cmn_err(CE_NOTE,
3999 "xlog_recover_check_summary: sb_fdblocks %Lu freeblks %Lu",
4000 sbp->sb_fdblocks, freeblks);
4001#if 0
4002 /*
4003 * This is turned off until I account for the allocation
4004 * btree blocks which live in free space.
4005 */
4006 ASSERT(sbp->sb_icount == itotal);
4007 ASSERT(sbp->sb_ifree == ifree);
4008 ASSERT(sbp->sb_fdblocks == freeblks);
4009#endif
4010#endif
4011 xfs_buf_relse(sbbp);
4012} 4009}
4013#endif /* DEBUG */ 4010#endif /* DEBUG */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index e79b56b4bca6..d7bf38c8cd1c 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1405,13 +1405,6 @@ xfs_mountfs(
1405 xfs_qm_mount_quotas(mp); 1405 xfs_qm_mount_quotas(mp);
1406 } 1406 }
1407 1407
1408#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1409 if (XFS_IS_QUOTA_ON(mp))
1410 xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
1411 else
1412 xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
1413#endif
1414
1415 /* 1408 /*
1416 * Now we are mounted, reserve a small amount of unused space for 1409 * Now we are mounted, reserve a small amount of unused space for
1417 * privileged transactions. This is needed so that transaction 1410 * privileged transactions. This is needed so that transaction
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 4fa0bc7b983e..9ff48a16a7ee 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -259,6 +259,7 @@ typedef struct xfs_mount {
259 wait_queue_head_t m_wait_single_sync_task; 259 wait_queue_head_t m_wait_single_sync_task;
260 __int64_t m_update_flags; /* sb flags we need to update 260 __int64_t m_update_flags; /* sb flags we need to update
261 on the next remount,rw */ 261 on the next remount,rw */
262 struct list_head m_mplist; /* inode shrinker mount list */
262} xfs_mount_t; 263} xfs_mount_t;
263 264
264/* 265/*
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index fdcab3f81dde..e0e64b113bd6 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -201,9 +201,6 @@ typedef struct xfs_qoff_logformat {
201#define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ 201#define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */
202#define XFS_QMOPT_DQSUSER 0x0000020 /* don't cache super users dquot */ 202#define XFS_QMOPT_DQSUSER 0x0000020 /* don't cache super users dquot */
203#define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ 203#define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */
204#define XFS_QMOPT_QUOTAOFF 0x0000080 /* quotas are being turned off */
205#define XFS_QMOPT_UMOUNTING 0x0000100 /* filesys is being unmounted */
206#define XFS_QMOPT_DOLOG 0x0000200 /* log buf changes (in quotacheck) */
207#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ 204#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */
208#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ 205#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */
209#define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ 206#define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index f73e358bae8d..be578ecb4af2 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -45,23 +45,12 @@
45#include "xfs_trans_space.h" 45#include "xfs_trans_space.h"
46#include "xfs_inode_item.h" 46#include "xfs_inode_item.h"
47 47
48
49STATIC void xfs_trans_apply_sb_deltas(xfs_trans_t *);
50STATIC uint xfs_trans_count_vecs(xfs_trans_t *);
51STATIC void xfs_trans_fill_vecs(xfs_trans_t *, xfs_log_iovec_t *);
52STATIC void xfs_trans_uncommit(xfs_trans_t *, uint);
53STATIC void xfs_trans_committed(xfs_trans_t *, int);
54STATIC void xfs_trans_chunk_committed(xfs_log_item_chunk_t *, xfs_lsn_t, int);
55STATIC void xfs_trans_free(xfs_trans_t *);
56
57kmem_zone_t *xfs_trans_zone; 48kmem_zone_t *xfs_trans_zone;
58 49
59
60/* 50/*
61 * Reservation functions here avoid a huge stack in xfs_trans_init 51 * Reservation functions here avoid a huge stack in xfs_trans_init
62 * due to register overflow from temporaries in the calculations. 52 * due to register overflow from temporaries in the calculations.
63 */ 53 */
64
65STATIC uint 54STATIC uint
66xfs_calc_write_reservation(xfs_mount_t *mp) 55xfs_calc_write_reservation(xfs_mount_t *mp)
67{ 56{
@@ -261,6 +250,19 @@ _xfs_trans_alloc(
261} 250}
262 251
263/* 252/*
253 * Free the transaction structure. If there is more clean up
254 * to do when the structure is freed, add it here.
255 */
256STATIC void
257xfs_trans_free(
258 xfs_trans_t *tp)
259{
260 atomic_dec(&tp->t_mountp->m_active_trans);
261 xfs_trans_free_dqinfo(tp);
262 kmem_zone_free(xfs_trans_zone, tp);
263}
264
265/*
264 * This is called to create a new transaction which will share the 266 * This is called to create a new transaction which will share the
265 * permanent log reservation of the given transaction. The remaining 267 * permanent log reservation of the given transaction. The remaining
266 * unused block and rt extent reservations are also inherited. This 268 * unused block and rt extent reservations are also inherited. This
@@ -764,94 +766,278 @@ xfs_trans_unreserve_and_mod_sb(
764 } 766 }
765} 767}
766 768
769/*
770 * Total up the number of log iovecs needed to commit this
771 * transaction. The transaction itself needs one for the
772 * transaction header. Ask each dirty item in turn how many
773 * it needs to get the total.
774 */
775static uint
776xfs_trans_count_vecs(
777 struct xfs_trans *tp)
778{
779 int nvecs;
780 xfs_log_item_desc_t *lidp;
781
782 nvecs = 1;
783 lidp = xfs_trans_first_item(tp);
784 ASSERT(lidp != NULL);
785
786 /* In the non-debug case we need to start bailing out if we
787 * didn't find a log_item here, return zero and let trans_commit
788 * deal with it.
789 */
790 if (lidp == NULL)
791 return 0;
792
793 while (lidp != NULL) {
794 /*
795 * Skip items which aren't dirty in this transaction.
796 */
797 if (!(lidp->lid_flags & XFS_LID_DIRTY)) {
798 lidp = xfs_trans_next_item(tp, lidp);
799 continue;
800 }
801 lidp->lid_size = IOP_SIZE(lidp->lid_item);
802 nvecs += lidp->lid_size;
803 lidp = xfs_trans_next_item(tp, lidp);
804 }
805
806 return nvecs;
807}
767 808
768/* 809/*
769 * xfs_trans_commit 810 * Fill in the vector with pointers to data to be logged
811 * by this transaction. The transaction header takes
812 * the first vector, and then each dirty item takes the
813 * number of vectors it indicated it needed in xfs_trans_count_vecs().
770 * 814 *
771 * Commit the given transaction to the log a/synchronously. 815 * As each item fills in the entries it needs, also pin the item
816 * so that it cannot be flushed out until the log write completes.
817 */
818static void
819xfs_trans_fill_vecs(
820 struct xfs_trans *tp,
821 struct xfs_log_iovec *log_vector)
822{
823 xfs_log_item_desc_t *lidp;
824 struct xfs_log_iovec *vecp;
825 uint nitems;
826
827 /*
828 * Skip over the entry for the transaction header, we'll
829 * fill that in at the end.
830 */
831 vecp = log_vector + 1;
832
833 nitems = 0;
834 lidp = xfs_trans_first_item(tp);
835 ASSERT(lidp);
836 while (lidp) {
837 /* Skip items which aren't dirty in this transaction. */
838 if (!(lidp->lid_flags & XFS_LID_DIRTY)) {
839 lidp = xfs_trans_next_item(tp, lidp);
840 continue;
841 }
842
843 /*
844 * The item may be marked dirty but not log anything. This can
845 * be used to get called when a transaction is committed.
846 */
847 if (lidp->lid_size)
848 nitems++;
849 IOP_FORMAT(lidp->lid_item, vecp);
850 vecp += lidp->lid_size;
851 IOP_PIN(lidp->lid_item);
852 lidp = xfs_trans_next_item(tp, lidp);
853 }
854
855 /*
856 * Now that we've counted the number of items in this transaction, fill
857 * in the transaction header. Note that the transaction header does not
858 * have a log item.
859 */
860 tp->t_header.th_magic = XFS_TRANS_HEADER_MAGIC;
861 tp->t_header.th_type = tp->t_type;
862 tp->t_header.th_num_items = nitems;
863 log_vector->i_addr = (xfs_caddr_t)&tp->t_header;
864 log_vector->i_len = sizeof(xfs_trans_header_t);
865 log_vector->i_type = XLOG_REG_TYPE_TRANSHDR;
866}
867
868/*
869 * The committed item processing consists of calling the committed routine of
870 * each logged item, updating the item's position in the AIL if necessary, and
871 * unpinning each item. If the committed routine returns -1, then do nothing
872 * further with the item because it may have been freed.
772 * 873 *
773 * XFS disk error handling mechanism is not based on a typical 874 * Since items are unlocked when they are copied to the incore log, it is
774 * transaction abort mechanism. Logically after the filesystem 875 * possible for two transactions to be completing and manipulating the same
775 * gets marked 'SHUTDOWN', we can't let any new transactions 876 * item simultaneously. The AIL lock will protect the lsn field of each item.
776 * be durable - ie. committed to disk - because some metadata might 877 * The value of this field can never go backwards.
777 * be inconsistent. In such cases, this returns an error, and the 878 *
778 * caller may assume that all locked objects joined to the transaction 879 * We unpin the items after repositioning them in the AIL, because otherwise
779 * have already been unlocked as if the commit had succeeded. 880 * they could be immediately flushed and we'd have to race with the flusher
780 * Do not reference the transaction structure after this call. 881 * trying to pull the item from the AIL as we add it.
781 */ 882 */
782 /*ARGSUSED*/ 883static void
783int 884xfs_trans_item_committed(
784_xfs_trans_commit( 885 struct xfs_log_item *lip,
785 xfs_trans_t *tp, 886 xfs_lsn_t commit_lsn,
786 uint flags, 887 int aborted)
787 int *log_flushed)
788{ 888{
789 xfs_log_iovec_t *log_vector; 889 xfs_lsn_t item_lsn;
790 int nvec; 890 struct xfs_ail *ailp;
791 xfs_mount_t *mp;
792 xfs_lsn_t commit_lsn;
793 /* REFERENCED */
794 int error;
795 int log_flags;
796 int sync;
797#define XFS_TRANS_LOGVEC_COUNT 16
798 xfs_log_iovec_t log_vector_fast[XFS_TRANS_LOGVEC_COUNT];
799 struct xlog_in_core *commit_iclog;
800 int shutdown;
801 891
802 commit_lsn = -1; 892 if (aborted)
893 lip->li_flags |= XFS_LI_ABORTED;
894 item_lsn = IOP_COMMITTED(lip, commit_lsn);
895
896 /* If the committed routine returns -1, item has been freed. */
897 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
898 return;
803 899
804 /* 900 /*
805 * Determine whether this commit is releasing a permanent 901 * If the returned lsn is greater than what it contained before, update
806 * log reservation or not. 902 * the location of the item in the AIL. If it is not, then do nothing.
903 * Items can never move backwards in the AIL.
904 *
905 * While the new lsn should usually be greater, it is possible that a
906 * later transaction completing simultaneously with an earlier one
907 * using the same item could complete first with a higher lsn. This
908 * would cause the earlier transaction to fail the test below.
807 */ 909 */
808 if (flags & XFS_TRANS_RELEASE_LOG_RES) { 910 ailp = lip->li_ailp;
809 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 911 spin_lock(&ailp->xa_lock);
810 log_flags = XFS_LOG_REL_PERM_RESERV; 912 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) {
913 /*
914 * This will set the item's lsn to item_lsn and update the
915 * position of the item in the AIL.
916 *
917 * xfs_trans_ail_update() drops the AIL lock.
918 */
919 xfs_trans_ail_update(ailp, lip, item_lsn);
811 } else { 920 } else {
812 log_flags = 0; 921 spin_unlock(&ailp->xa_lock);
813 } 922 }
814 mp = tp->t_mountp;
815 923
816 /* 924 /*
817 * If there is nothing to be logged by the transaction, 925 * Now that we've repositioned the item in the AIL, unpin it so it can
818 * then unlock all of the items associated with the 926 * be flushed. Pass information about buffer stale state down from the
819 * transaction and free the transaction structure. 927 * log item flags, if anyone else stales the buffer we do not want to
820 * Also make sure to return any reserved blocks to 928 * pay any attention to it.
821 * the free pool.
822 */ 929 */
823shut_us_down: 930 IOP_UNPIN(lip);
824 shutdown = XFS_FORCED_SHUTDOWN(mp) ? EIO : 0; 931}
825 if (!(tp->t_flags & XFS_TRANS_DIRTY) || shutdown) { 932
826 xfs_trans_unreserve_and_mod_sb(tp); 933/* Clear all the per-AG busy list items listed in this transaction */
934static void
935xfs_trans_clear_busy_extents(
936 struct xfs_trans *tp)
937{
938 xfs_log_busy_chunk_t *lbcp;
939 xfs_log_busy_slot_t *lbsp;
940 int i;
941
942 for (lbcp = &tp->t_busy; lbcp != NULL; lbcp = lbcp->lbc_next) {
943 i = 0;
944 for (lbsp = lbcp->lbc_busy; i < lbcp->lbc_unused; i++, lbsp++) {
945 if (XFS_LBC_ISFREE(lbcp, i))
946 continue;
947 xfs_alloc_clear_busy(tp, lbsp->lbc_ag, lbsp->lbc_idx);
948 }
949 }
950 xfs_trans_free_busy(tp);
951}
952
953/*
954 * This is typically called by the LM when a transaction has been fully
955 * committed to disk. It needs to unpin the items which have
956 * been logged by the transaction and update their positions
957 * in the AIL if necessary.
958 *
959 * This also gets called when the transactions didn't get written out
960 * because of an I/O error. Abortflag & XFS_LI_ABORTED is set then.
961 */
962STATIC void
963xfs_trans_committed(
964 struct xfs_trans *tp,
965 int abortflag)
966{
967 xfs_log_item_desc_t *lidp;
968 xfs_log_item_chunk_t *licp;
969 xfs_log_item_chunk_t *next_licp;
970
971 /* Call the transaction's completion callback if there is one. */
972 if (tp->t_callback != NULL)
973 tp->t_callback(tp, tp->t_callarg);
974
975 for (lidp = xfs_trans_first_item(tp);
976 lidp != NULL;
977 lidp = xfs_trans_next_item(tp, lidp)) {
978 xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag);
979 }
980
981 /* free the item chunks, ignoring the embedded chunk */
982 for (licp = tp->t_items.lic_next; licp != NULL; licp = next_licp) {
983 next_licp = licp->lic_next;
984 kmem_free(licp);
985 }
986
987 xfs_trans_clear_busy_extents(tp);
988 xfs_trans_free(tp);
989}
990
991/*
992 * Called from the trans_commit code when we notice that
993 * the filesystem is in the middle of a forced shutdown.
994 */
995STATIC void
996xfs_trans_uncommit(
997 struct xfs_trans *tp,
998 uint flags)
999{
1000 xfs_log_item_desc_t *lidp;
1001
1002 for (lidp = xfs_trans_first_item(tp);
1003 lidp != NULL;
1004 lidp = xfs_trans_next_item(tp, lidp)) {
827 /* 1005 /*
828 * It is indeed possible for the transaction to be 1006 * Unpin all but those that aren't dirty.
829 * not dirty but the dqinfo portion to be. All that
830 * means is that we have some (non-persistent) quota
831 * reservations that need to be unreserved.
832 */ 1007 */
833 xfs_trans_unreserve_and_mod_dquots(tp); 1008 if (lidp->lid_flags & XFS_LID_DIRTY)
834 if (tp->t_ticket) { 1009 IOP_UNPIN_REMOVE(lidp->lid_item, tp);
835 commit_lsn = xfs_log_done(mp, tp->t_ticket,
836 NULL, log_flags);
837 if (commit_lsn == -1 && !shutdown)
838 shutdown = XFS_ERROR(EIO);
839 }
840 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
841 xfs_trans_free_items(tp, shutdown? XFS_TRANS_ABORT : 0);
842 xfs_trans_free_busy(tp);
843 xfs_trans_free(tp);
844 XFS_STATS_INC(xs_trans_empty);
845 return (shutdown);
846 } 1010 }
847 ASSERT(tp->t_ticket != NULL);
848 1011
849 /* 1012 xfs_trans_unreserve_and_mod_sb(tp);
850 * If we need to update the superblock, then do it now. 1013 xfs_trans_unreserve_and_mod_dquots(tp);
851 */ 1014
852 if (tp->t_flags & XFS_TRANS_SB_DIRTY) 1015 xfs_trans_free_items(tp, flags);
853 xfs_trans_apply_sb_deltas(tp); 1016 xfs_trans_free_busy(tp);
854 xfs_trans_apply_dquot_deltas(tp); 1017 xfs_trans_free(tp);
1018}
1019
1020/*
1021 * Format the transaction direct to the iclog. This isolates the physical
1022 * transaction commit operation from the logical operation and hence allows
1023 * other methods to be introduced without affecting the existing commit path.
1024 */
1025static int
1026xfs_trans_commit_iclog(
1027 struct xfs_mount *mp,
1028 struct xfs_trans *tp,
1029 xfs_lsn_t *commit_lsn,
1030 int flags)
1031{
1032 int shutdown;
1033 int error;
1034 int log_flags = 0;
1035 struct xlog_in_core *commit_iclog;
1036#define XFS_TRANS_LOGVEC_COUNT 16
1037 struct xfs_log_iovec log_vector_fast[XFS_TRANS_LOGVEC_COUNT];
1038 struct xfs_log_iovec *log_vector;
1039 uint nvec;
1040
855 1041
856 /* 1042 /*
857 * Ask each log item how many log_vector entries it will 1043 * Ask each log item how many log_vector entries it will
@@ -861,8 +1047,7 @@ shut_us_down:
861 */ 1047 */
862 nvec = xfs_trans_count_vecs(tp); 1048 nvec = xfs_trans_count_vecs(tp);
863 if (nvec == 0) { 1049 if (nvec == 0) {
864 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 1050 return ENOMEM; /* triggers a shutdown! */
865 goto shut_us_down;
866 } else if (nvec <= XFS_TRANS_LOGVEC_COUNT) { 1051 } else if (nvec <= XFS_TRANS_LOGVEC_COUNT) {
867 log_vector = log_vector_fast; 1052 log_vector = log_vector_fast;
868 } else { 1053 } else {
@@ -877,6 +1062,9 @@ shut_us_down:
877 */ 1062 */
878 xfs_trans_fill_vecs(tp, log_vector); 1063 xfs_trans_fill_vecs(tp, log_vector);
879 1064
1065 if (flags & XFS_TRANS_RELEASE_LOG_RES)
1066 log_flags = XFS_LOG_REL_PERM_RESERV;
1067
880 error = xfs_log_write(mp, log_vector, nvec, tp->t_ticket, &(tp->t_lsn)); 1068 error = xfs_log_write(mp, log_vector, nvec, tp->t_ticket, &(tp->t_lsn));
881 1069
882 /* 1070 /*
@@ -884,18 +1072,17 @@ shut_us_down:
884 * at any time after this call. However, all the items associated 1072 * at any time after this call. However, all the items associated
885 * with the transaction are still locked and pinned in memory. 1073 * with the transaction are still locked and pinned in memory.
886 */ 1074 */
887 commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags); 1075 *commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags);
888 1076
889 tp->t_commit_lsn = commit_lsn; 1077 tp->t_commit_lsn = *commit_lsn;
890 if (nvec > XFS_TRANS_LOGVEC_COUNT) { 1078 if (nvec > XFS_TRANS_LOGVEC_COUNT)
891 kmem_free(log_vector); 1079 kmem_free(log_vector);
892 }
893 1080
894 /* 1081 /*
895 * If we got a log write error. Unpin the logitems that we 1082 * If we got a log write error. Unpin the logitems that we
896 * had pinned, clean up, free trans structure, and return error. 1083 * had pinned, clean up, free trans structure, and return error.
897 */ 1084 */
898 if (error || commit_lsn == -1) { 1085 if (error || *commit_lsn == -1) {
899 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 1086 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
900 xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT); 1087 xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT);
901 return XFS_ERROR(EIO); 1088 return XFS_ERROR(EIO);
@@ -909,8 +1096,6 @@ shut_us_down:
909 */ 1096 */
910 xfs_trans_unreserve_and_mod_sb(tp); 1097 xfs_trans_unreserve_and_mod_sb(tp);
911 1098
912 sync = tp->t_flags & XFS_TRANS_SYNC;
913
914 /* 1099 /*
915 * Tell the LM to call the transaction completion routine 1100 * Tell the LM to call the transaction completion routine
916 * when the log write with LSN commit_lsn completes (e.g. 1101 * when the log write with LSN commit_lsn completes (e.g.
@@ -953,7 +1138,7 @@ shut_us_down:
953 * the commit lsn of this transaction for dependency tracking 1138 * the commit lsn of this transaction for dependency tracking
954 * purposes. 1139 * purposes.
955 */ 1140 */
956 xfs_trans_unlock_items(tp, commit_lsn); 1141 xfs_trans_unlock_items(tp, *commit_lsn);
957 1142
958 /* 1143 /*
959 * If we detected a log error earlier, finish committing 1144 * If we detected a log error earlier, finish committing
@@ -973,156 +1158,114 @@ shut_us_down:
973 * and the items are released we can finally allow the iclog to 1158 * and the items are released we can finally allow the iclog to
974 * go to disk. 1159 * go to disk.
975 */ 1160 */
976 error = xfs_log_release_iclog(mp, commit_iclog); 1161 return xfs_log_release_iclog(mp, commit_iclog);
977
978 /*
979 * If the transaction needs to be synchronous, then force the
980 * log out now and wait for it.
981 */
982 if (sync) {
983 if (!error) {
984 error = _xfs_log_force_lsn(mp, commit_lsn,
985 XFS_LOG_SYNC, log_flushed);
986 }
987 XFS_STATS_INC(xs_trans_sync);
988 } else {
989 XFS_STATS_INC(xs_trans_async);
990 }
991
992 return (error);
993} 1162}
994 1163
995 1164
996/* 1165/*
997 * Total up the number of log iovecs needed to commit this 1166 * xfs_trans_commit
998 * transaction. The transaction itself needs one for the 1167 *
999 * transaction header. Ask each dirty item in turn how many 1168 * Commit the given transaction to the log a/synchronously.
1000 * it needs to get the total. 1169 *
1170 * XFS disk error handling mechanism is not based on a typical
1171 * transaction abort mechanism. Logically after the filesystem
1172 * gets marked 'SHUTDOWN', we can't let any new transactions
1173 * be durable - ie. committed to disk - because some metadata might
1174 * be inconsistent. In such cases, this returns an error, and the
1175 * caller may assume that all locked objects joined to the transaction
1176 * have already been unlocked as if the commit had succeeded.
1177 * Do not reference the transaction structure after this call.
1001 */ 1178 */
1002STATIC uint 1179int
1003xfs_trans_count_vecs( 1180_xfs_trans_commit(
1004 xfs_trans_t *tp) 1181 struct xfs_trans *tp,
1182 uint flags,
1183 int *log_flushed)
1005{ 1184{
1006 int nvecs; 1185 struct xfs_mount *mp = tp->t_mountp;
1007 xfs_log_item_desc_t *lidp; 1186 xfs_lsn_t commit_lsn = -1;
1187 int error = 0;
1188 int log_flags = 0;
1189 int sync = tp->t_flags & XFS_TRANS_SYNC;
1008 1190
1009 nvecs = 1; 1191 /*
1010 lidp = xfs_trans_first_item(tp); 1192 * Determine whether this commit is releasing a permanent
1011 ASSERT(lidp != NULL); 1193 * log reservation or not.
1012
1013 /* In the non-debug case we need to start bailing out if we
1014 * didn't find a log_item here, return zero and let trans_commit
1015 * deal with it.
1016 */ 1194 */
1017 if (lidp == NULL) 1195 if (flags & XFS_TRANS_RELEASE_LOG_RES) {
1018 return 0; 1196 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
1019 1197 log_flags = XFS_LOG_REL_PERM_RESERV;
1020 while (lidp != NULL) {
1021 /*
1022 * Skip items which aren't dirty in this transaction.
1023 */
1024 if (!(lidp->lid_flags & XFS_LID_DIRTY)) {
1025 lidp = xfs_trans_next_item(tp, lidp);
1026 continue;
1027 }
1028 lidp->lid_size = IOP_SIZE(lidp->lid_item);
1029 nvecs += lidp->lid_size;
1030 lidp = xfs_trans_next_item(tp, lidp);
1031 } 1198 }
1032 1199
1033 return nvecs; 1200 /*
1034} 1201 * If there is nothing to be logged by the transaction,
1035 1202 * then unlock all of the items associated with the
1036/* 1203 * transaction and free the transaction structure.
1037 * Called from the trans_commit code when we notice that 1204 * Also make sure to return any reserved blocks to
1038 * the filesystem is in the middle of a forced shutdown. 1205 * the free pool.
1039 */ 1206 */
1040STATIC void 1207 if (!(tp->t_flags & XFS_TRANS_DIRTY))
1041xfs_trans_uncommit( 1208 goto out_unreserve;
1042 xfs_trans_t *tp,
1043 uint flags)
1044{
1045 xfs_log_item_desc_t *lidp;
1046 1209
1047 for (lidp = xfs_trans_first_item(tp); 1210 if (XFS_FORCED_SHUTDOWN(mp)) {
1048 lidp != NULL; 1211 error = XFS_ERROR(EIO);
1049 lidp = xfs_trans_next_item(tp, lidp)) { 1212 goto out_unreserve;
1050 /*
1051 * Unpin all but those that aren't dirty.
1052 */
1053 if (lidp->lid_flags & XFS_LID_DIRTY)
1054 IOP_UNPIN_REMOVE(lidp->lid_item, tp);
1055 } 1213 }
1056 1214
1057 xfs_trans_unreserve_and_mod_sb(tp); 1215 ASSERT(tp->t_ticket != NULL);
1058 xfs_trans_unreserve_and_mod_dquots(tp);
1059 1216
1060 xfs_trans_free_items(tp, flags); 1217 /*
1061 xfs_trans_free_busy(tp); 1218 * If we need to update the superblock, then do it now.
1062 xfs_trans_free(tp); 1219 */
1063} 1220 if (tp->t_flags & XFS_TRANS_SB_DIRTY)
1221 xfs_trans_apply_sb_deltas(tp);
1222 xfs_trans_apply_dquot_deltas(tp);
1064 1223
1065/* 1224 error = xfs_trans_commit_iclog(mp, tp, &commit_lsn, flags);
1066 * Fill in the vector with pointers to data to be logged 1225 if (error == ENOMEM) {
1067 * by this transaction. The transaction header takes 1226 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
1068 * the first vector, and then each dirty item takes the 1227 error = XFS_ERROR(EIO);
1069 * number of vectors it indicated it needed in xfs_trans_count_vecs(). 1228 goto out_unreserve;
1070 * 1229 }
1071 * As each item fills in the entries it needs, also pin the item
1072 * so that it cannot be flushed out until the log write completes.
1073 */
1074STATIC void
1075xfs_trans_fill_vecs(
1076 xfs_trans_t *tp,
1077 xfs_log_iovec_t *log_vector)
1078{
1079 xfs_log_item_desc_t *lidp;
1080 xfs_log_iovec_t *vecp;
1081 uint nitems;
1082 1230
1083 /* 1231 /*
1084 * Skip over the entry for the transaction header, we'll 1232 * If the transaction needs to be synchronous, then force the
1085 * fill that in at the end. 1233 * log out now and wait for it.
1086 */ 1234 */
1087 vecp = log_vector + 1; /* pointer arithmetic */ 1235 if (sync) {
1088 1236 if (!error) {
1089 nitems = 0; 1237 error = _xfs_log_force_lsn(mp, commit_lsn,
1090 lidp = xfs_trans_first_item(tp); 1238 XFS_LOG_SYNC, log_flushed);
1091 ASSERT(lidp != NULL);
1092 while (lidp != NULL) {
1093 /*
1094 * Skip items which aren't dirty in this transaction.
1095 */
1096 if (!(lidp->lid_flags & XFS_LID_DIRTY)) {
1097 lidp = xfs_trans_next_item(tp, lidp);
1098 continue;
1099 }
1100 /*
1101 * The item may be marked dirty but not log anything.
1102 * This can be used to get called when a transaction
1103 * is committed.
1104 */
1105 if (lidp->lid_size) {
1106 nitems++;
1107 } 1239 }
1108 IOP_FORMAT(lidp->lid_item, vecp); 1240 XFS_STATS_INC(xs_trans_sync);
1109 vecp += lidp->lid_size; /* pointer arithmetic */ 1241 } else {
1110 IOP_PIN(lidp->lid_item); 1242 XFS_STATS_INC(xs_trans_async);
1111 lidp = xfs_trans_next_item(tp, lidp);
1112 } 1243 }
1113 1244
1245 return error;
1246
1247out_unreserve:
1248 xfs_trans_unreserve_and_mod_sb(tp);
1249
1114 /* 1250 /*
1115 * Now that we've counted the number of items in this 1251 * It is indeed possible for the transaction to be not dirty but
1116 * transaction, fill in the transaction header. 1252 * the dqinfo portion to be. All that means is that we have some
1253 * (non-persistent) quota reservations that need to be unreserved.
1117 */ 1254 */
1118 tp->t_header.th_magic = XFS_TRANS_HEADER_MAGIC; 1255 xfs_trans_unreserve_and_mod_dquots(tp);
1119 tp->t_header.th_type = tp->t_type; 1256 if (tp->t_ticket) {
1120 tp->t_header.th_num_items = nitems; 1257 commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
1121 log_vector->i_addr = (xfs_caddr_t)&tp->t_header; 1258 if (commit_lsn == -1 && !error)
1122 log_vector->i_len = sizeof(xfs_trans_header_t); 1259 error = XFS_ERROR(EIO);
1123 log_vector->i_type = XLOG_REG_TYPE_TRANSHDR; 1260 }
1124} 1261 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1262 xfs_trans_free_items(tp, error ? XFS_TRANS_ABORT : 0);
1263 xfs_trans_free_busy(tp);
1264 xfs_trans_free(tp);
1125 1265
1266 XFS_STATS_INC(xs_trans_empty);
1267 return error;
1268}
1126 1269
1127/* 1270/*
1128 * Unlock all of the transaction's items and free the transaction. 1271 * Unlock all of the transaction's items and free the transaction.
@@ -1200,20 +1343,6 @@ xfs_trans_cancel(
1200 xfs_trans_free(tp); 1343 xfs_trans_free(tp);
1201} 1344}
1202 1345
1203
1204/*
1205 * Free the transaction structure. If there is more clean up
1206 * to do when the structure is freed, add it here.
1207 */
1208STATIC void
1209xfs_trans_free(
1210 xfs_trans_t *tp)
1211{
1212 atomic_dec(&tp->t_mountp->m_active_trans);
1213 xfs_trans_free_dqinfo(tp);
1214 kmem_zone_free(xfs_trans_zone, tp);
1215}
1216
1217/* 1346/*
1218 * Roll from one trans in the sequence of PERMANENT transactions to 1347 * Roll from one trans in the sequence of PERMANENT transactions to
1219 * the next: permanent transactions are only flushed out when 1348 * the next: permanent transactions are only flushed out when
@@ -1283,174 +1412,3 @@ xfs_trans_roll(
1283 xfs_trans_ihold(trans, dp); 1412 xfs_trans_ihold(trans, dp);
1284 return 0; 1413 return 0;
1285} 1414}
1286
1287/*
1288 * THIS SHOULD BE REWRITTEN TO USE xfs_trans_next_item().
1289 *
1290 * This is typically called by the LM when a transaction has been fully
1291 * committed to disk. It needs to unpin the items which have
1292 * been logged by the transaction and update their positions
1293 * in the AIL if necessary.
1294 * This also gets called when the transactions didn't get written out
1295 * because of an I/O error. Abortflag & XFS_LI_ABORTED is set then.
1296 *
1297 * Call xfs_trans_chunk_committed() to process the items in
1298 * each chunk.
1299 */
1300STATIC void
1301xfs_trans_committed(
1302 xfs_trans_t *tp,
1303 int abortflag)
1304{
1305 xfs_log_item_chunk_t *licp;
1306 xfs_log_item_chunk_t *next_licp;
1307 xfs_log_busy_chunk_t *lbcp;
1308 xfs_log_busy_slot_t *lbsp;
1309 int i;
1310
1311 /*
1312 * Call the transaction's completion callback if there
1313 * is one.
1314 */
1315 if (tp->t_callback != NULL) {
1316 tp->t_callback(tp, tp->t_callarg);
1317 }
1318
1319 /*
1320 * Special case the chunk embedded in the transaction.
1321 */
1322 licp = &(tp->t_items);
1323 if (!(xfs_lic_are_all_free(licp))) {
1324 xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);
1325 }
1326
1327 /*
1328 * Process the items in each chunk in turn.
1329 */
1330 licp = licp->lic_next;
1331 while (licp != NULL) {
1332 ASSERT(!xfs_lic_are_all_free(licp));
1333 xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);
1334 next_licp = licp->lic_next;
1335 kmem_free(licp);
1336 licp = next_licp;
1337 }
1338
1339 /*
1340 * Clear all the per-AG busy list items listed in this transaction
1341 */
1342 lbcp = &tp->t_busy;
1343 while (lbcp != NULL) {
1344 for (i = 0, lbsp = lbcp->lbc_busy; i < lbcp->lbc_unused; i++, lbsp++) {
1345 if (!XFS_LBC_ISFREE(lbcp, i)) {
1346 xfs_alloc_clear_busy(tp, lbsp->lbc_ag,
1347 lbsp->lbc_idx);
1348 }
1349 }
1350 lbcp = lbcp->lbc_next;
1351 }
1352 xfs_trans_free_busy(tp);
1353
1354 /*
1355 * That's it for the transaction structure. Free it.
1356 */
1357 xfs_trans_free(tp);
1358}
1359
1360/*
1361 * This is called to perform the commit processing for each
1362 * item described by the given chunk.
1363 *
1364 * The commit processing consists of unlocking items which were
1365 * held locked with the SYNC_UNLOCK attribute, calling the committed
1366 * routine of each logged item, updating the item's position in the AIL
1367 * if necessary, and unpinning each item. If the committed routine
1368 * returns -1, then do nothing further with the item because it
1369 * may have been freed.
1370 *
1371 * Since items are unlocked when they are copied to the incore
1372 * log, it is possible for two transactions to be completing
1373 * and manipulating the same item simultaneously. The AIL lock
1374 * will protect the lsn field of each item. The value of this
1375 * field can never go backwards.
1376 *
1377 * We unpin the items after repositioning them in the AIL, because
1378 * otherwise they could be immediately flushed and we'd have to race
1379 * with the flusher trying to pull the item from the AIL as we add it.
1380 */
1381STATIC void
1382xfs_trans_chunk_committed(
1383 xfs_log_item_chunk_t *licp,
1384 xfs_lsn_t lsn,
1385 int aborted)
1386{
1387 xfs_log_item_desc_t *lidp;
1388 xfs_log_item_t *lip;
1389 xfs_lsn_t item_lsn;
1390 int i;
1391
1392 lidp = licp->lic_descs;
1393 for (i = 0; i < licp->lic_unused; i++, lidp++) {
1394 struct xfs_ail *ailp;
1395
1396 if (xfs_lic_isfree(licp, i)) {
1397 continue;
1398 }
1399
1400 lip = lidp->lid_item;
1401 if (aborted)
1402 lip->li_flags |= XFS_LI_ABORTED;
1403
1404 /*
1405 * Send in the ABORTED flag to the COMMITTED routine
1406 * so that it knows whether the transaction was aborted
1407 * or not.
1408 */
1409 item_lsn = IOP_COMMITTED(lip, lsn);
1410
1411 /*
1412 * If the committed routine returns -1, make
1413 * no more references to the item.
1414 */
1415 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) {
1416 continue;
1417 }
1418
1419 /*
1420 * If the returned lsn is greater than what it
1421 * contained before, update the location of the
1422 * item in the AIL. If it is not, then do nothing.
1423 * Items can never move backwards in the AIL.
1424 *
1425 * While the new lsn should usually be greater, it
1426 * is possible that a later transaction completing
1427 * simultaneously with an earlier one using the
1428 * same item could complete first with a higher lsn.
1429 * This would cause the earlier transaction to fail
1430 * the test below.
1431 */
1432 ailp = lip->li_ailp;
1433 spin_lock(&ailp->xa_lock);
1434 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) {
1435 /*
1436 * This will set the item's lsn to item_lsn
1437 * and update the position of the item in
1438 * the AIL.
1439 *
1440 * xfs_trans_ail_update() drops the AIL lock.
1441 */
1442 xfs_trans_ail_update(ailp, lip, item_lsn);
1443 } else {
1444 spin_unlock(&ailp->xa_lock);
1445 }
1446
1447 /*
1448 * Now that we've repositioned the item in the AIL,
1449 * unpin it so it can be flushed. Pass information
1450 * about buffer stale state down from the log item
1451 * flags, if anyone else stales the buffer we do not
1452 * want to pay any attention to it.
1453 */
1454 IOP_UNPIN(lip, lidp->lid_flags & XFS_LID_BUF_STALE);
1455 }
1456}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 79c8bab9dfff..c62beee0921e 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -49,6 +49,15 @@ typedef struct xfs_trans_header {
49#define XFS_LI_DQUOT 0x123d 49#define XFS_LI_DQUOT 0x123d
50#define XFS_LI_QUOTAOFF 0x123e 50#define XFS_LI_QUOTAOFF 0x123e
51 51
52#define XFS_LI_TYPE_DESC \
53 { XFS_LI_EFI, "XFS_LI_EFI" }, \
54 { XFS_LI_EFD, "XFS_LI_EFD" }, \
55 { XFS_LI_IUNLINK, "XFS_LI_IUNLINK" }, \
56 { XFS_LI_INODE, "XFS_LI_INODE" }, \
57 { XFS_LI_BUF, "XFS_LI_BUF" }, \
58 { XFS_LI_DQUOT, "XFS_LI_DQUOT" }, \
59 { XFS_LI_QUOTAOFF, "XFS_LI_QUOTAOFF" }
60
52/* 61/*
53 * Transaction types. Used to distinguish types of buffers. 62 * Transaction types. Used to distinguish types of buffers.
54 */ 63 */
@@ -159,7 +168,6 @@ typedef struct xfs_log_item_desc {
159 168
160#define XFS_LID_DIRTY 0x1 169#define XFS_LID_DIRTY 0x1
161#define XFS_LID_PINNED 0x2 170#define XFS_LID_PINNED 0x2
162#define XFS_LID_BUF_STALE 0x8
163 171
164/* 172/*
165 * This structure is used to maintain a chunk list of log_item_desc 173 * This structure is used to maintain a chunk list of log_item_desc
@@ -833,7 +841,7 @@ typedef struct xfs_item_ops {
833 uint (*iop_size)(xfs_log_item_t *); 841 uint (*iop_size)(xfs_log_item_t *);
834 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); 842 void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
835 void (*iop_pin)(xfs_log_item_t *); 843 void (*iop_pin)(xfs_log_item_t *);
836 void (*iop_unpin)(xfs_log_item_t *, int); 844 void (*iop_unpin)(xfs_log_item_t *);
837 void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *); 845 void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *);
838 uint (*iop_trylock)(xfs_log_item_t *); 846 uint (*iop_trylock)(xfs_log_item_t *);
839 void (*iop_unlock)(xfs_log_item_t *); 847 void (*iop_unlock)(xfs_log_item_t *);
@@ -846,7 +854,7 @@ typedef struct xfs_item_ops {
846#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip) 854#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip)
847#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp) 855#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp)
848#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip) 856#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip)
849#define IOP_UNPIN(ip, flags) (*(ip)->li_ops->iop_unpin)(ip, flags) 857#define IOP_UNPIN(ip) (*(ip)->li_ops->iop_unpin)(ip)
850#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp) 858#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp)
851#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip) 859#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip)
852#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip) 860#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip)
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index fb586360d1c9..9cd809025f3a 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -40,11 +40,51 @@
40#include "xfs_rw.h" 40#include "xfs_rw.h"
41#include "xfs_trace.h" 41#include "xfs_trace.h"
42 42
43/*
44 * Check to see if a buffer matching the given parameters is already
45 * a part of the given transaction.
46 */
47STATIC struct xfs_buf *
48xfs_trans_buf_item_match(
49 struct xfs_trans *tp,
50 struct xfs_buftarg *target,
51 xfs_daddr_t blkno,
52 int len)
53{
54 xfs_log_item_chunk_t *licp;
55 xfs_log_item_desc_t *lidp;
56 xfs_buf_log_item_t *blip;
57 int i;
43 58
44STATIC xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, xfs_buftarg_t *, 59 len = BBTOB(len);
45 xfs_daddr_t, int); 60 for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) {
46STATIC xfs_buf_t *xfs_trans_buf_item_match_all(xfs_trans_t *, xfs_buftarg_t *, 61 if (xfs_lic_are_all_free(licp)) {
47 xfs_daddr_t, int); 62 ASSERT(licp == &tp->t_items);
63 ASSERT(licp->lic_next == NULL);
64 return NULL;
65 }
66
67 for (i = 0; i < licp->lic_unused; i++) {
68 /*
69 * Skip unoccupied slots.
70 */
71 if (xfs_lic_isfree(licp, i))
72 continue;
73
74 lidp = xfs_lic_slot(licp, i);
75 blip = (xfs_buf_log_item_t *)lidp->lid_item;
76 if (blip->bli_item.li_type != XFS_LI_BUF)
77 continue;
78
79 if (XFS_BUF_TARGET(blip->bli_buf) == target &&
80 XFS_BUF_ADDR(blip->bli_buf) == blkno &&
81 XFS_BUF_COUNT(blip->bli_buf) == len)
82 return blip->bli_buf;
83 }
84 }
85
86 return NULL;
87}
48 88
49/* 89/*
50 * Add the locked buffer to the transaction. 90 * Add the locked buffer to the transaction.
@@ -112,14 +152,6 @@ xfs_trans_bjoin(
112 * within the transaction, just increment its lock recursion count 152 * within the transaction, just increment its lock recursion count
113 * and return a pointer to it. 153 * and return a pointer to it.
114 * 154 *
115 * Use the fast path function xfs_trans_buf_item_match() or the buffer
116 * cache routine incore_match() to find the buffer
117 * if it is already owned by this transaction.
118 *
119 * If we don't already own the buffer, use get_buf() to get it.
120 * If it doesn't yet have an associated xfs_buf_log_item structure,
121 * then allocate one and add the item to this transaction.
122 *
123 * If the transaction pointer is NULL, make this just a normal 155 * If the transaction pointer is NULL, make this just a normal
124 * get_buf() call. 156 * get_buf() call.
125 */ 157 */
@@ -149,11 +181,7 @@ xfs_trans_get_buf(xfs_trans_t *tp,
149 * have it locked. In this case we just increment the lock 181 * have it locked. In this case we just increment the lock
150 * recursion count and return the buffer to the caller. 182 * recursion count and return the buffer to the caller.
151 */ 183 */
152 if (tp->t_items.lic_next == NULL) { 184 bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len);
153 bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len);
154 } else {
155 bp = xfs_trans_buf_item_match_all(tp, target_dev, blkno, len);
156 }
157 if (bp != NULL) { 185 if (bp != NULL) {
158 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 186 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
159 if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) 187 if (XFS_FORCED_SHUTDOWN(tp->t_mountp))
@@ -259,14 +287,6 @@ int xfs_error_mod = 33;
259 * within the transaction and already read in, just increment its 287 * within the transaction and already read in, just increment its
260 * lock recursion count and return a pointer to it. 288 * lock recursion count and return a pointer to it.
261 * 289 *
262 * Use the fast path function xfs_trans_buf_item_match() or the buffer
263 * cache routine incore_match() to find the buffer
264 * if it is already owned by this transaction.
265 *
266 * If we don't already own the buffer, use read_buf() to get it.
267 * If it doesn't yet have an associated xfs_buf_log_item structure,
268 * then allocate one and add the item to this transaction.
269 *
270 * If the transaction pointer is NULL, make this just a normal 290 * If the transaction pointer is NULL, make this just a normal
271 * read_buf() call. 291 * read_buf() call.
272 */ 292 */
@@ -328,11 +348,7 @@ xfs_trans_read_buf(
328 * If the buffer is not yet read in, then we read it in, increment 348 * If the buffer is not yet read in, then we read it in, increment
329 * the lock recursion count, and return it to the caller. 349 * the lock recursion count, and return it to the caller.
330 */ 350 */
331 if (tp->t_items.lic_next == NULL) { 351 bp = xfs_trans_buf_item_match(tp, target, blkno, len);
332 bp = xfs_trans_buf_item_match(tp, target, blkno, len);
333 } else {
334 bp = xfs_trans_buf_item_match_all(tp, target, blkno, len);
335 }
336 if (bp != NULL) { 352 if (bp != NULL) {
337 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 353 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
338 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 354 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
@@ -696,7 +712,6 @@ xfs_trans_log_buf(xfs_trans_t *tp,
696 712
697 tp->t_flags |= XFS_TRANS_DIRTY; 713 tp->t_flags |= XFS_TRANS_DIRTY;
698 lidp->lid_flags |= XFS_LID_DIRTY; 714 lidp->lid_flags |= XFS_LID_DIRTY;
699 lidp->lid_flags &= ~XFS_LID_BUF_STALE;
700 bip->bli_flags |= XFS_BLI_LOGGED; 715 bip->bli_flags |= XFS_BLI_LOGGED;
701 xfs_buf_item_log(bip, first, last); 716 xfs_buf_item_log(bip, first, last);
702} 717}
@@ -782,7 +797,7 @@ xfs_trans_binval(
782 bip->bli_format.blf_flags |= XFS_BLI_CANCEL; 797 bip->bli_format.blf_flags |= XFS_BLI_CANCEL;
783 memset((char *)(bip->bli_format.blf_data_map), 0, 798 memset((char *)(bip->bli_format.blf_data_map), 0,
784 (bip->bli_format.blf_map_size * sizeof(uint))); 799 (bip->bli_format.blf_map_size * sizeof(uint)));
785 lidp->lid_flags |= XFS_LID_DIRTY|XFS_LID_BUF_STALE; 800 lidp->lid_flags |= XFS_LID_DIRTY;
786 tp->t_flags |= XFS_TRANS_DIRTY; 801 tp->t_flags |= XFS_TRANS_DIRTY;
787} 802}
788 803
@@ -902,111 +917,3 @@ xfs_trans_dquot_buf(
902 917
903 bip->bli_format.blf_flags |= type; 918 bip->bli_format.blf_flags |= type;
904} 919}
905
906/*
907 * Check to see if a buffer matching the given parameters is already
908 * a part of the given transaction. Only check the first, embedded
909 * chunk, since we don't want to spend all day scanning large transactions.
910 */
911STATIC xfs_buf_t *
912xfs_trans_buf_item_match(
913 xfs_trans_t *tp,
914 xfs_buftarg_t *target,
915 xfs_daddr_t blkno,
916 int len)
917{
918 xfs_log_item_chunk_t *licp;
919 xfs_log_item_desc_t *lidp;
920 xfs_buf_log_item_t *blip;
921 xfs_buf_t *bp;
922 int i;
923
924 bp = NULL;
925 len = BBTOB(len);
926 licp = &tp->t_items;
927 if (!xfs_lic_are_all_free(licp)) {
928 for (i = 0; i < licp->lic_unused; i++) {
929 /*
930 * Skip unoccupied slots.
931 */
932 if (xfs_lic_isfree(licp, i)) {
933 continue;
934 }
935
936 lidp = xfs_lic_slot(licp, i);
937 blip = (xfs_buf_log_item_t *)lidp->lid_item;
938 if (blip->bli_item.li_type != XFS_LI_BUF) {
939 continue;
940 }
941
942 bp = blip->bli_buf;
943 if ((XFS_BUF_TARGET(bp) == target) &&
944 (XFS_BUF_ADDR(bp) == blkno) &&
945 (XFS_BUF_COUNT(bp) == len)) {
946 /*
947 * We found it. Break out and
948 * return the pointer to the buffer.
949 */
950 break;
951 } else {
952 bp = NULL;
953 }
954 }
955 }
956 return bp;
957}
958
959/*
960 * Check to see if a buffer matching the given parameters is already
961 * a part of the given transaction. Check all the chunks, we
962 * want to be thorough.
963 */
964STATIC xfs_buf_t *
965xfs_trans_buf_item_match_all(
966 xfs_trans_t *tp,
967 xfs_buftarg_t *target,
968 xfs_daddr_t blkno,
969 int len)
970{
971 xfs_log_item_chunk_t *licp;
972 xfs_log_item_desc_t *lidp;
973 xfs_buf_log_item_t *blip;
974 xfs_buf_t *bp;
975 int i;
976
977 bp = NULL;
978 len = BBTOB(len);
979 for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) {
980 if (xfs_lic_are_all_free(licp)) {
981 ASSERT(licp == &tp->t_items);
982 ASSERT(licp->lic_next == NULL);
983 return NULL;
984 }
985 for (i = 0; i < licp->lic_unused; i++) {
986 /*
987 * Skip unoccupied slots.
988 */
989 if (xfs_lic_isfree(licp, i)) {
990 continue;
991 }
992
993 lidp = xfs_lic_slot(licp, i);
994 blip = (xfs_buf_log_item_t *)lidp->lid_item;
995 if (blip->bli_item.li_type != XFS_LI_BUF) {
996 continue;
997 }
998
999 bp = blip->bli_buf;
1000 if ((XFS_BUF_TARGET(bp) == target) &&
1001 (XFS_BUF_ADDR(bp) == blkno) &&
1002 (XFS_BUF_COUNT(bp) == len)) {
1003 /*
1004 * We found it. Break out and
1005 * return the pointer to the buffer.
1006 */
1007 return bp;
1008 }
1009 }
1010 }
1011 return NULL;
1012}