aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/fid.c15
-rw-r--r--fs/9p/v9fs.h1
-rw-r--r--fs/9p/vfs_dentry.c4
-rw-r--r--fs/9p/vfs_inode_dotl.c2
-rw-r--r--fs/9p/vfs_super.c80
-rw-r--r--fs/adfs/map.c2
-rw-r--r--fs/afs/cache.c12
-rw-r--r--fs/afs/cell.c2
-rw-r--r--fs/attr.c2
-rw-r--r--fs/autofs4/root.c2
-rw-r--r--fs/befs/ChangeLog10
-rw-r--r--fs/befs/befs_fs_types.h2
-rw-r--r--fs/befs/btree.c2
-rw-r--r--fs/befs/linuxvfs.c2
-rw-r--r--fs/binfmt_elf.c6
-rw-r--r--fs/binfmt_flat.c2
-rw-r--r--fs/bio.c2
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/btrfs/acl.c9
-rw-r--r--fs/btrfs/ctree.h13
-rw-r--r--fs/btrfs/disk-io.c6
-rw-r--r--fs/btrfs/extent-tree.c125
-rw-r--r--fs/btrfs/extent_io.c82
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/extent_map.c2
-rw-r--r--fs/btrfs/file.c23
-rw-r--r--fs/btrfs/free-space-cache.c203
-rw-r--r--fs/btrfs/inode.c196
-rw-r--r--fs/btrfs/ioctl.c10
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/root-tree.c18
-rw-r--r--fs/btrfs/super.c61
-rw-r--r--fs/btrfs/transaction.c50
-rw-r--r--fs/btrfs/transaction.h4
-rw-r--r--fs/btrfs/xattr.c33
-rw-r--r--fs/cachefiles/interface.c2
-rw-r--r--fs/ceph/addr.c2
-rw-r--r--fs/ceph/caps.c2
-rw-r--r--fs/ceph/mds_client.c6
-rw-r--r--fs/ceph/snap.c2
-rw-r--r--fs/ceph/super.c2
-rw-r--r--fs/cifs/AUTHORS2
-rw-r--r--fs/cifs/README16
-rw-r--r--fs/cifs/cache.c2
-rw-r--r--fs/cifs/cifs_debug.c43
-rw-r--r--fs/cifs/cifs_dfs_ref.c2
-rw-r--r--fs/cifs/cifs_spnego.c4
-rw-r--r--fs/cifs/cifs_unicode.c35
-rw-r--r--fs/cifs/cifs_unicode.h2
-rw-r--r--fs/cifs/cifsencrypt.c21
-rw-r--r--fs/cifs/cifsfs.c6
-rw-r--r--fs/cifs/cifsglob.h13
-rw-r--r--fs/cifs/cifssmb.c16
-rw-r--r--fs/cifs/connect.c72
-rw-r--r--fs/cifs/dir.c2
-rw-r--r--fs/cifs/file.c70
-rw-r--r--fs/cifs/link.c4
-rw-r--r--fs/cifs/misc.c3
-rw-r--r--fs/cifs/sess.c23
-rw-r--r--fs/configfs/dir.c2
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/dlm/lock.c2
-rw-r--r--fs/dlm/lowcomms.c2
-rw-r--r--fs/dlm/recover.c2
-rw-r--r--fs/ecryptfs/main.c4
-rw-r--r--fs/eventpoll.c8
-rw-r--r--fs/exofs/common.h4
-rw-r--r--fs/ext2/balloc.c6
-rw-r--r--fs/ext2/inode.c8
-rw-r--r--fs/ext2/super.c2
-rw-r--r--fs/ext2/xattr.c2
-rw-r--r--fs/ext3/balloc.c10
-rw-r--r--fs/ext3/inode.c8
-rw-r--r--fs/ext3/resize.c2
-rw-r--r--fs/ext3/super.c2
-rw-r--r--fs/ext4/balloc.c2
-rw-r--r--fs/ext4/ext4_jbd2.h4
-rw-r--r--fs/ext4/extents.c10
-rw-r--r--fs/ext4/fsync.c19
-rw-r--r--fs/ext4/inode.c55
-rw-r--r--fs/ext4/mballoc.c2
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/super.c78
-rw-r--r--fs/fhandle.c1
-rw-r--r--fs/filesystems.c3
-rw-r--r--fs/freevxfs/vxfs_fshead.c2
-rw-r--r--fs/freevxfs/vxfs_lookup.c2
-rw-r--r--fs/freevxfs/vxfs_olt.h2
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/glock.c2
-rw-r--r--fs/gfs2/super.c2
-rw-r--r--fs/inode.c9
-rw-r--r--fs/jbd/commit.c2
-rw-r--r--fs/jbd/journal.c4
-rw-r--r--fs/jbd/revoke.c2
-rw-r--r--fs/jbd/transaction.c2
-rw-r--r--fs/jbd2/commit.c6
-rw-r--r--fs/jbd2/journal.c7
-rw-r--r--fs/jbd2/revoke.c2
-rw-r--r--fs/jbd2/transaction.c2
-rw-r--r--fs/jffs2/TODO2
-rw-r--r--fs/jffs2/readinode.c2
-rw-r--r--fs/jffs2/summary.c4
-rw-r--r--fs/jffs2/wbuf.c2
-rw-r--r--fs/jfs/jfs_dmap.c4
-rw-r--r--fs/jfs/jfs_extent.c6
-rw-r--r--fs/jfs/jfs_imap.c14
-rw-r--r--fs/jfs/jfs_logmgr.h2
-rw-r--r--fs/jfs/jfs_metapage.h2
-rw-r--r--fs/jfs/jfs_txnmgr.c2
-rw-r--r--fs/jfs/resize.c4
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/logfs/dev_mtd.c2
-rw-r--r--fs/logfs/dir.c2
-rw-r--r--fs/logfs/readwrite.c2
-rw-r--r--fs/mbcache.c2
-rw-r--r--fs/namei.c3
-rw-r--r--fs/namespace.c16
-rw-r--r--fs/ncpfs/inode.c2
-rw-r--r--fs/nfs/callback_xdr.c2
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/namespace.c58
-rw-r--r--fs/nfs/nfs4filelayout.h2
-rw-r--r--fs/nfs/nfs4proc.c4
-rw-r--r--fs/nfs/write.c6
-rw-r--r--fs/nfs_common/nfsacl.c2
-rw-r--r--fs/nfsd/lockd.c1
-rw-r--r--fs/nfsd/nfs3xdr.c2
-rw-r--r--fs/nfsd/nfs4state.c13
-rw-r--r--fs/nfsd/nfsxdr.c2
-rw-r--r--fs/nilfs2/file.c11
-rw-r--r--fs/nilfs2/nilfs.h14
-rw-r--r--fs/nilfs2/page.c2
-rw-r--r--fs/notify/fanotify/fanotify_user.c2
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c3
-rw-r--r--fs/notify/inotify/inotify_user.c39
-rw-r--r--fs/notify/mark.c2
-rw-r--r--fs/ntfs/attrib.c4
-rw-r--r--fs/ntfs/compress.c2
-rw-r--r--fs/ntfs/inode.c4
-rw-r--r--fs/ntfs/layout.h12
-rw-r--r--fs/ntfs/logfile.c2
-rw-r--r--fs/ntfs/logfile.h2
-rw-r--r--fs/ntfs/mft.c8
-rw-r--r--fs/ntfs/runlist.c2
-rw-r--r--fs/ntfs/super.c14
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/aops.h2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c2
-rw-r--r--fs/ocfs2/cluster/quorum.c4
-rw-r--r--fs/ocfs2/cluster/tcp.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c4
-rw-r--r--fs/ocfs2/inode.c4
-rw-r--r--fs/ocfs2/journal.c2
-rw-r--r--fs/ocfs2/journal.h2
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/ocfs2/ocfs2_fs.h4
-rw-r--r--fs/ocfs2/quota_global.c2
-rw-r--r--fs/ocfs2/reservations.h2
-rw-r--r--fs/ocfs2/stackglue.h2
-rw-r--r--fs/ocfs2/suballoc.c4
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/ocfs2/xattr.c4
-rw-r--r--fs/partitions/check.c4
-rw-r--r--fs/partitions/ldm.c16
-rw-r--r--fs/proc/base.c11
-rw-r--r--fs/pstore/Kconfig2
-rw-r--r--fs/quota/dquot.c15
-rw-r--r--fs/ramfs/file-nommu.c1
-rw-r--r--fs/reiserfs/journal.c4
-rw-r--r--fs/reiserfs/lock.c2
-rw-r--r--fs/reiserfs/super.c4
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/squashfs/cache.c4
-rw-r--r--fs/ubifs/Kconfig2
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--fs/ubifs/commit.c2
-rw-r--r--fs/ubifs/debug.c63
-rw-r--r--fs/ubifs/debug.h152
-rw-r--r--fs/ubifs/file.c3
-rw-r--r--fs/ubifs/lpt.c7
-rw-r--r--fs/ubifs/super.c3
-rw-r--r--fs/ubifs/xattr.c4
-rw-r--r--fs/ufs/inode.c2
-rw-r--r--fs/ufs/super.c6
-rw-r--r--fs/ufs/truncate.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c28
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_message.c27
-rw-r--r--fs/xfs/linux-2.6/xfs_message.h24
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c129
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c230
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/quota/xfs_dquot.c2
-rw-r--r--fs/xfs/quota/xfs_qm.c7
-rw-r--r--fs/xfs/quota/xfs_qm.h5
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c6
-rw-r--r--fs/xfs/xfs_alloc.c30
-rw-r--r--fs/xfs/xfs_buf_item.c2
-rw-r--r--fs/xfs/xfs_inode.c2
-rw-r--r--fs/xfs/xfs_inode.h4
-rw-r--r--fs/xfs/xfs_inode_item.c67
-rw-r--r--fs/xfs/xfs_itable.c2
-rw-r--r--fs/xfs/xfs_log.c38
-rw-r--r--fs/xfs/xfs_log_priv.h3
-rw-r--r--fs/xfs/xfs_log_recover.c4
-rw-r--r--fs/xfs/xfs_mount.h9
-rw-r--r--fs/xfs/xfs_trans_ail.c421
-rw-r--r--fs/xfs/xfs_trans_inode.c2
-rw-r--r--fs/xfs/xfs_trans_priv.h22
-rw-r--r--fs/xfs/xfs_vnodeops.c4
216 files changed, 1907 insertions, 1456 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 0ee594569dcc..85b67ffa2a43 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -286,11 +286,9 @@ static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid)
286 286
287struct p9_fid *v9fs_writeback_fid(struct dentry *dentry) 287struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
288{ 288{
289 int err, flags; 289 int err;
290 struct p9_fid *fid; 290 struct p9_fid *fid;
291 struct v9fs_session_info *v9ses;
292 291
293 v9ses = v9fs_dentry2v9ses(dentry);
294 fid = v9fs_fid_clone_with_uid(dentry, 0); 292 fid = v9fs_fid_clone_with_uid(dentry, 0);
295 if (IS_ERR(fid)) 293 if (IS_ERR(fid))
296 goto error_out; 294 goto error_out;
@@ -299,17 +297,8 @@ struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
299 * dirty pages. We always request for the open fid in read-write 297 * dirty pages. We always request for the open fid in read-write
300 * mode so that a partial page write which result in page 298 * mode so that a partial page write which result in page
301 * read can work. 299 * read can work.
302 *
303 * we don't have a tsyncfs operation for older version
304 * of protocol. So make sure the write back fid is
305 * opened in O_SYNC mode.
306 */ 300 */
307 if (!v9fs_proto_dotl(v9ses)) 301 err = p9_client_open(fid, O_RDWR);
308 flags = O_RDWR | O_SYNC;
309 else
310 flags = O_RDWR;
311
312 err = p9_client_open(fid, flags);
313 if (err < 0) { 302 if (err < 0) {
314 p9_client_clunk(fid); 303 p9_client_clunk(fid);
315 fid = ERR_PTR(err); 304 fid = ERR_PTR(err);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 9665c2b840e6..e5ebedfc5ed8 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -116,7 +116,6 @@ struct v9fs_session_info {
116 struct list_head slist; /* list of sessions registered with v9fs */ 116 struct list_head slist; /* list of sessions registered with v9fs */
117 struct backing_dev_info bdi; 117 struct backing_dev_info bdi;
118 struct rw_semaphore rename_sem; 118 struct rw_semaphore rename_sem;
119 struct p9_fid *root_fid; /* Used for file system sync */
120}; 119};
121 120
122/* cache_validity flags */ 121/* cache_validity flags */
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index b6a3b9f7fe4d..e022890c6f40 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -126,7 +126,9 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
126 retval = v9fs_refresh_inode_dotl(fid, inode); 126 retval = v9fs_refresh_inode_dotl(fid, inode);
127 else 127 else
128 retval = v9fs_refresh_inode(fid, inode); 128 retval = v9fs_refresh_inode(fid, inode);
129 if (retval <= 0) 129 if (retval == -ENOENT)
130 return 0;
131 if (retval < 0)
130 return retval; 132 return retval;
131 } 133 }
132out_valid: 134out_valid:
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index ffbb113d5f33..82a7c38ddad0 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -811,7 +811,7 @@ v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
811 fid = v9fs_fid_lookup(dentry); 811 fid = v9fs_fid_lookup(dentry);
812 if (IS_ERR(fid)) { 812 if (IS_ERR(fid)) {
813 __putname(link); 813 __putname(link);
814 link = ERR_PTR(PTR_ERR(fid)); 814 link = ERR_CAST(fid);
815 goto ndset; 815 goto ndset;
816 } 816 }
817 retval = p9_client_readlink(fid, &target); 817 retval = p9_client_readlink(fid, &target);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index f3eed3383e4f..feef6cdc1fd2 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -154,6 +154,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
154 retval = PTR_ERR(inode); 154 retval = PTR_ERR(inode);
155 goto release_sb; 155 goto release_sb;
156 } 156 }
157
157 root = d_alloc_root(inode); 158 root = d_alloc_root(inode);
158 if (!root) { 159 if (!root) {
159 iput(inode); 160 iput(inode);
@@ -185,21 +186,10 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
185 p9stat_free(st); 186 p9stat_free(st);
186 kfree(st); 187 kfree(st);
187 } 188 }
188 v9fs_fid_add(root, fid);
189 retval = v9fs_get_acl(inode, fid); 189 retval = v9fs_get_acl(inode, fid);
190 if (retval) 190 if (retval)
191 goto release_sb; 191 goto release_sb;
192 /* 192 v9fs_fid_add(root, fid);
193 * Add the root fid to session info. This is used
194 * for file system sync. We want a cloned fid here
195 * so that we can do a sync_filesystem after a
196 * shrink_dcache_for_umount
197 */
198 v9ses->root_fid = v9fs_fid_clone(root);
199 if (IS_ERR(v9ses->root_fid)) {
200 retval = PTR_ERR(v9ses->root_fid);
201 goto release_sb;
202 }
203 193
204 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); 194 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
205 return dget(sb->s_root); 195 return dget(sb->s_root);
@@ -210,11 +200,15 @@ close_session:
210 v9fs_session_close(v9ses); 200 v9fs_session_close(v9ses);
211 kfree(v9ses); 201 kfree(v9ses);
212 return ERR_PTR(retval); 202 return ERR_PTR(retval);
203
213release_sb: 204release_sb:
214 /* 205 /*
215 * we will do the session_close and root dentry 206 * we will do the session_close and root dentry release
216 * release in the below call. 207 * in the below call. But we need to clunk fid, because we haven't
208 * attached the fid to dentry so it won't get clunked
209 * automatically.
217 */ 210 */
211 p9_client_clunk(fid);
218 deactivate_locked_super(sb); 212 deactivate_locked_super(sb);
219 return ERR_PTR(retval); 213 return ERR_PTR(retval);
220} 214}
@@ -232,7 +226,7 @@ static void v9fs_kill_super(struct super_block *s)
232 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); 226 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
233 227
234 kill_anon_super(s); 228 kill_anon_super(s);
235 p9_client_clunk(v9ses->root_fid); 229
236 v9fs_session_cancel(v9ses); 230 v9fs_session_cancel(v9ses);
237 v9fs_session_close(v9ses); 231 v9fs_session_close(v9ses);
238 kfree(v9ses); 232 kfree(v9ses);
@@ -285,14 +279,6 @@ done:
285 return res; 279 return res;
286} 280}
287 281
288static int v9fs_sync_fs(struct super_block *sb, int wait)
289{
290 struct v9fs_session_info *v9ses = sb->s_fs_info;
291
292 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_sync_fs: super_block %p\n", sb);
293 return p9_client_sync_fs(v9ses->root_fid);
294}
295
296static int v9fs_drop_inode(struct inode *inode) 282static int v9fs_drop_inode(struct inode *inode)
297{ 283{
298 struct v9fs_session_info *v9ses; 284 struct v9fs_session_info *v9ses;
@@ -307,6 +293,51 @@ static int v9fs_drop_inode(struct inode *inode)
307 return 1; 293 return 1;
308} 294}
309 295
296static int v9fs_write_inode(struct inode *inode,
297 struct writeback_control *wbc)
298{
299 int ret;
300 struct p9_wstat wstat;
301 struct v9fs_inode *v9inode;
302 /*
303 * send an fsync request to server irrespective of
304 * wbc->sync_mode.
305 */
306 P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
307 v9inode = V9FS_I(inode);
308 if (!v9inode->writeback_fid)
309 return 0;
310 v9fs_blank_wstat(&wstat);
311
312 ret = p9_client_wstat(v9inode->writeback_fid, &wstat);
313 if (ret < 0) {
314 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
315 return ret;
316 }
317 return 0;
318}
319
320static int v9fs_write_inode_dotl(struct inode *inode,
321 struct writeback_control *wbc)
322{
323 int ret;
324 struct v9fs_inode *v9inode;
325 /*
326 * send an fsync request to server irrespective of
327 * wbc->sync_mode.
328 */
329 P9_DPRINTK(P9_DEBUG_VFS, "%s: inode %p\n", __func__, inode);
330 v9inode = V9FS_I(inode);
331 if (!v9inode->writeback_fid)
332 return 0;
333 ret = p9_client_fsync(v9inode->writeback_fid, 0);
334 if (ret < 0) {
335 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
336 return ret;
337 }
338 return 0;
339}
340
310static const struct super_operations v9fs_super_ops = { 341static const struct super_operations v9fs_super_ops = {
311 .alloc_inode = v9fs_alloc_inode, 342 .alloc_inode = v9fs_alloc_inode,
312 .destroy_inode = v9fs_destroy_inode, 343 .destroy_inode = v9fs_destroy_inode,
@@ -314,17 +345,18 @@ static const struct super_operations v9fs_super_ops = {
314 .evict_inode = v9fs_evict_inode, 345 .evict_inode = v9fs_evict_inode,
315 .show_options = generic_show_options, 346 .show_options = generic_show_options,
316 .umount_begin = v9fs_umount_begin, 347 .umount_begin = v9fs_umount_begin,
348 .write_inode = v9fs_write_inode,
317}; 349};
318 350
319static const struct super_operations v9fs_super_ops_dotl = { 351static const struct super_operations v9fs_super_ops_dotl = {
320 .alloc_inode = v9fs_alloc_inode, 352 .alloc_inode = v9fs_alloc_inode,
321 .destroy_inode = v9fs_destroy_inode, 353 .destroy_inode = v9fs_destroy_inode,
322 .sync_fs = v9fs_sync_fs,
323 .statfs = v9fs_statfs, 354 .statfs = v9fs_statfs,
324 .drop_inode = v9fs_drop_inode, 355 .drop_inode = v9fs_drop_inode,
325 .evict_inode = v9fs_evict_inode, 356 .evict_inode = v9fs_evict_inode,
326 .show_options = generic_show_options, 357 .show_options = generic_show_options,
327 .umount_begin = v9fs_umount_begin, 358 .umount_begin = v9fs_umount_begin,
359 .write_inode = v9fs_write_inode_dotl,
328}; 360};
329 361
330struct file_system_type v9fs_fs_type = { 362struct file_system_type v9fs_fs_type = {
diff --git a/fs/adfs/map.c b/fs/adfs/map.c
index d1a5932bb0f1..6935f05202ac 100644
--- a/fs/adfs/map.c
+++ b/fs/adfs/map.c
@@ -51,7 +51,7 @@ static DEFINE_RWLOCK(adfs_map_lock);
51 51
52/* 52/*
53 * This is fun. We need to load up to 19 bits from the map at an 53 * This is fun. We need to load up to 19 bits from the map at an
54 * arbitary bit alignment. (We're limited to 19 bits by F+ version 2). 54 * arbitrary bit alignment. (We're limited to 19 bits by F+ version 2).
55 */ 55 */
56#define GET_FRAG_ID(_map,_start,_idmask) \ 56#define GET_FRAG_ID(_map,_start,_idmask) \
57 ({ \ 57 ({ \
diff --git a/fs/afs/cache.c b/fs/afs/cache.c
index 0fb315dd4d2a..577763c3d88b 100644
--- a/fs/afs/cache.c
+++ b/fs/afs/cache.c
@@ -98,7 +98,7 @@ static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
98} 98}
99 99
100/* 100/*
101 * provide new auxilliary cache data 101 * provide new auxiliary cache data
102 */ 102 */
103static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data, 103static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
104 void *buffer, uint16_t bufmax) 104 void *buffer, uint16_t bufmax)
@@ -117,7 +117,7 @@ static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
117} 117}
118 118
119/* 119/*
120 * check that the auxilliary data indicates that the entry is still valid 120 * check that the auxiliary data indicates that the entry is still valid
121 */ 121 */
122static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data, 122static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data,
123 const void *buffer, 123 const void *buffer,
@@ -150,7 +150,7 @@ static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data,
150} 150}
151 151
152/* 152/*
153 * provide new auxilliary cache data 153 * provide new auxiliary cache data
154 */ 154 */
155static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data, 155static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
156 void *buffer, uint16_t bufmax) 156 void *buffer, uint16_t bufmax)
@@ -172,7 +172,7 @@ static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
172} 172}
173 173
174/* 174/*
175 * check that the auxilliary data indicates that the entry is still valid 175 * check that the auxiliary data indicates that the entry is still valid
176 */ 176 */
177static 177static
178enum fscache_checkaux afs_vlocation_cache_check_aux(void *cookie_netfs_data, 178enum fscache_checkaux afs_vlocation_cache_check_aux(void *cookie_netfs_data,
@@ -283,7 +283,7 @@ static void afs_vnode_cache_get_attr(const void *cookie_netfs_data,
283} 283}
284 284
285/* 285/*
286 * provide new auxilliary cache data 286 * provide new auxiliary cache data
287 */ 287 */
288static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data, 288static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
289 void *buffer, uint16_t bufmax) 289 void *buffer, uint16_t bufmax)
@@ -309,7 +309,7 @@ static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
309} 309}
310 310
311/* 311/*
312 * check that the auxilliary data indicates that the entry is still valid 312 * check that the auxiliary data indicates that the entry is still valid
313 */ 313 */
314static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, 314static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
315 const void *buffer, 315 const void *buffer,
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 0d5eeadf6121..3c090b7555ea 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -293,7 +293,7 @@ struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz,
293 if (!cell) { 293 if (!cell) {
294 /* this should not happen unless user tries to mount 294 /* this should not happen unless user tries to mount
295 * when root cell is not set. Return an impossibly 295 * when root cell is not set. Return an impossibly
296 * bizzare errno to alert the user. Things like 296 * bizarre errno to alert the user. Things like
297 * ENOENT might be "more appropriate" but they happen 297 * ENOENT might be "more appropriate" but they happen
298 * for other reasons. 298 * for other reasons.
299 */ 299 */
diff --git a/fs/attr.c b/fs/attr.c
index 1007ed616314..91dbe2a107f2 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -128,7 +128,7 @@ EXPORT_SYMBOL(inode_newsize_ok);
128 * setattr_copy must be called with i_mutex held. 128 * setattr_copy must be called with i_mutex held.
129 * 129 *
130 * setattr_copy updates the inode's metadata with that specified 130 * setattr_copy updates the inode's metadata with that specified
131 * in attr. Noticably missing is inode size update, which is more complex 131 * in attr. Noticeably missing is inode size update, which is more complex
132 * as it requires pagecache updates. 132 * as it requires pagecache updates.
133 * 133 *
134 * The inode is not marked as dirty after this operation. The rationale is 134 * The inode is not marked as dirty after this operation. The rationale is
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 96804a17bbd0..f55ae23b137e 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -612,7 +612,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
612 * set the DMANAGED_AUTOMOUNT and DMANAGED_TRANSIT flags on the leaves 612 * set the DMANAGED_AUTOMOUNT and DMANAGED_TRANSIT flags on the leaves
613 * of the directory tree. There is no need to clear the automount flag 613 * of the directory tree. There is no need to clear the automount flag
614 * following a mount or restore it after an expire because these mounts 614 * following a mount or restore it after an expire because these mounts
615 * are always covered. However, it is neccessary to ensure that these 615 * are always covered. However, it is necessary to ensure that these
616 * flags are clear on non-empty directories to avoid unnecessary calls 616 * flags are clear on non-empty directories to avoid unnecessary calls
617 * during path walks. 617 * during path walks.
618 */ 618 */
diff --git a/fs/befs/ChangeLog b/fs/befs/ChangeLog
index ce8c787916be..75a461cfaca6 100644
--- a/fs/befs/ChangeLog
+++ b/fs/befs/ChangeLog
@@ -24,7 +24,7 @@ Version 0.9 (2002-03-14)
24 24
25Version 0.64 (2002-02-07) 25Version 0.64 (2002-02-07)
26========== 26==========
27* Did the string comparision really right this time (btree.c) [WD] 27* Did the string comparison really right this time (btree.c) [WD]
28 28
29* Fixed up some places where I assumed that a long int could hold 29* Fixed up some places where I assumed that a long int could hold
30 a pointer value. (btree.c) [WD] 30 a pointer value. (btree.c) [WD]
@@ -114,7 +114,7 @@ Version 0.6 (2001-12-15)
114 More flexible. Will soon be controllable at mount time 114 More flexible. Will soon be controllable at mount time
115 (see TODO). [WD] 115 (see TODO). [WD]
116 116
117* Rewrote datastream positon lookups. 117* Rewrote datastream position lookups.
118 (datastream.c) [WD] 118 (datastream.c) [WD]
119 119
120* Moved the TODO list to its own file. 120* Moved the TODO list to its own file.
@@ -150,7 +150,7 @@ Version 0.50 (2001-11-13)
150* Anton also told me that the blocksize is not allowed to be larger than 150* Anton also told me that the blocksize is not allowed to be larger than
151 the page size in linux, which is 4k i386. Oops. Added a test for 151 the page size in linux, which is 4k i386. Oops. Added a test for
152 (blocksize > PAGE_SIZE), and refuse to mount in that case. What this 152 (blocksize > PAGE_SIZE), and refuse to mount in that case. What this
153 practicaly means is that 8k blocksize volumes won't work without a major 153 practically means is that 8k blocksize volumes won't work without a major
154 restructuring of the driver (or an alpha or other 64bit hardware). [WD] 154 restructuring of the driver (or an alpha or other 64bit hardware). [WD]
155 155
156* Cleaned up the befs_count_blocks() function. Much smarter now. 156* Cleaned up the befs_count_blocks() function. Much smarter now.
@@ -183,7 +183,7 @@ Version 0.45 (2001-10-29)
183 structures into the generic pointer fields of the public structures 183 structures into the generic pointer fields of the public structures
184 with kmalloc(). put_super and put_inode free them. This allows us not 184 with kmalloc(). put_super and put_inode free them. This allows us not
185 to have to touch the definitions of the public structures in 185 to have to touch the definitions of the public structures in
186 include/linux/fs.h. Also, befs_inode_info is huge (becuase of the 186 include/linux/fs.h. Also, befs_inode_info is huge (because of the
187 symlink string). (super.c, inode.c, befs_fs.h) [WD] 187 symlink string). (super.c, inode.c, befs_fs.h) [WD]
188 188
189* Fixed a thinko that was corrupting file reads after the first block_run 189* Fixed a thinko that was corrupting file reads after the first block_run
@@ -404,7 +404,7 @@ Version 0.4 (2001-10-28)
404 404
405* Fixed compile errors on 2.4.1 kernel (WD) 405* Fixed compile errors on 2.4.1 kernel (WD)
406 Resolve rejected patches 406 Resolve rejected patches
407 Accomodate changed NLS interface (util.h) 407 Accommodate changed NLS interface (util.h)
408 Needed to include <linux/slab.h> in most files 408 Needed to include <linux/slab.h> in most files
409 Makefile changes 409 Makefile changes
410 fs/Config.in changes 410 fs/Config.in changes
diff --git a/fs/befs/befs_fs_types.h b/fs/befs/befs_fs_types.h
index 7893eaa1e58c..eb557d9dc8be 100644
--- a/fs/befs/befs_fs_types.h
+++ b/fs/befs/befs_fs_types.h
@@ -234,7 +234,7 @@ typedef struct {
234} PACKED befs_btree_super; 234} PACKED befs_btree_super;
235 235
236/* 236/*
237 * Header stucture of each btree node 237 * Header structure of each btree node
238 */ 238 */
239typedef struct { 239typedef struct {
240 fs64 left; 240 fs64 left;
diff --git a/fs/befs/btree.c b/fs/befs/btree.c
index 4202db7496cb..a66c9b1136e0 100644
--- a/fs/befs/btree.c
+++ b/fs/befs/btree.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * Licensed under the GNU GPL. See the file COPYING for details. 6 * Licensed under the GNU GPL. See the file COPYING for details.
7 * 7 *
8 * 2002-02-05: Sergey S. Kostyliov added binary search withing 8 * 2002-02-05: Sergey S. Kostyliov added binary search within
9 * btree nodes. 9 * btree nodes.
10 * 10 *
11 * Many thanks to: 11 * Many thanks to:
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 06457ed8f3e7..54b8c28bebc8 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -734,7 +734,7 @@ parse_options(char *options, befs_mount_options * opts)
734 734
735/* This function has the responsibiltiy of getting the 735/* This function has the responsibiltiy of getting the
736 * filesystem ready for unmounting. 736 * filesystem ready for unmounting.
737 * Basicly, we free everything that we allocated in 737 * Basically, we free everything that we allocated in
738 * befs_read_inode 738 * befs_read_inode
739 */ 739 */
740static void 740static void
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f34078d702d3..303983fabfd6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -941,9 +941,13 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
941 current->mm->start_stack = bprm->p; 941 current->mm->start_stack = bprm->p;
942 942
943#ifdef arch_randomize_brk 943#ifdef arch_randomize_brk
944 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) 944 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
945 current->mm->brk = current->mm->start_brk = 945 current->mm->brk = current->mm->start_brk =
946 arch_randomize_brk(current->mm); 946 arch_randomize_brk(current->mm);
947#ifdef CONFIG_COMPAT_BRK
948 current->brk_randomized = 1;
949#endif
950 }
947#endif 951#endif
948 952
949 if (current->personality & MMAP_PAGE_ZERO) { 953 if (current->personality & MMAP_PAGE_ZERO) {
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 811384bec8de..397d3057d336 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -717,7 +717,7 @@ static int load_flat_file(struct linux_binprm * bprm,
717 * help simplify all this mumbo jumbo 717 * help simplify all this mumbo jumbo
718 * 718 *
719 * We've got two different sections of relocation entries. 719 * We've got two different sections of relocation entries.
720 * The first is the GOT which resides at the begining of the data segment 720 * The first is the GOT which resides at the beginning of the data segment
721 * and is terminated with a -1. This one can be relocated in place. 721 * and is terminated with a -1. This one can be relocated in place.
722 * The second is the extra relocation entries tacked after the image's 722 * The second is the extra relocation entries tacked after the image's
723 * data segment. These require a little more processing as the entry is 723 * data segment. These require a little more processing as the entry is
diff --git a/fs/bio.c b/fs/bio.c
index 4d6d4b6c2bf1..840a0d755248 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1436,7 +1436,7 @@ EXPORT_SYMBOL(bio_flush_dcache_pages);
1436 * preferred way to end I/O on a bio, it takes care of clearing 1436 * preferred way to end I/O on a bio, it takes care of clearing
1437 * BIO_UPTODATE on error. @error is 0 on success, and and one of the 1437 * BIO_UPTODATE on error. @error is 0 on success, and and one of the
1438 * established -Exxxx (-EIO, for instance) error values in case 1438 * established -Exxxx (-EIO, for instance) error values in case
1439 * something went wrong. Noone should call bi_end_io() directly on a 1439 * something went wrong. No one should call bi_end_io() directly on a
1440 * bio unless they own it and thus know that it has an end_io 1440 * bio unless they own it and thus know that it has an end_io
1441 * function. 1441 * function.
1442 **/ 1442 **/
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c1511c674f53..5147bdd3b8e1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -653,7 +653,7 @@ void bd_forget(struct inode *inode)
653 * @whole: whole block device containing @bdev, may equal @bdev 653 * @whole: whole block device containing @bdev, may equal @bdev
654 * @holder: holder trying to claim @bdev 654 * @holder: holder trying to claim @bdev
655 * 655 *
656 * Test whther @bdev can be claimed by @holder. 656 * Test whether @bdev can be claimed by @holder.
657 * 657 *
658 * CONTEXT: 658 * CONTEXT:
659 * spin_lock(&bdev_lock). 659 * spin_lock(&bdev_lock).
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index de34bfad9ec3..5d505aaa72fb 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -178,16 +178,17 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
178 178
179 if (value) { 179 if (value) {
180 acl = posix_acl_from_xattr(value, size); 180 acl = posix_acl_from_xattr(value, size);
181 if (acl == NULL) { 181 if (acl) {
182 value = NULL; 182 ret = posix_acl_valid(acl);
183 size = 0; 183 if (ret)
184 goto out;
184 } else if (IS_ERR(acl)) { 185 } else if (IS_ERR(acl)) {
185 return PTR_ERR(acl); 186 return PTR_ERR(acl);
186 } 187 }
187 } 188 }
188 189
189 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type); 190 ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
190 191out:
191 posix_acl_release(acl); 192 posix_acl_release(acl);
192 193
193 return ret; 194 return ret;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d47ce8307854..2e61fe1b6b8c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -740,8 +740,10 @@ struct btrfs_space_info {
740 */ 740 */
741 unsigned long reservation_progress; 741 unsigned long reservation_progress;
742 742
743 int full; /* indicates that we cannot allocate any more 743 int full:1; /* indicates that we cannot allocate any more
744 chunks for this space */ 744 chunks for this space */
745 int chunk_alloc:1; /* set if we are allocating a chunk */
746
745 int force_alloc; /* set if we need to force a chunk alloc for 747 int force_alloc; /* set if we need to force a chunk alloc for
746 this space */ 748 this space */
747 749
@@ -1284,6 +1286,8 @@ struct btrfs_root {
1284#define BTRFS_INODE_DIRSYNC (1 << 10) 1286#define BTRFS_INODE_DIRSYNC (1 << 10)
1285#define BTRFS_INODE_COMPRESS (1 << 11) 1287#define BTRFS_INODE_COMPRESS (1 << 11)
1286 1288
1289#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31)
1290
1287/* some macros to generate set/get funcs for the struct fields. This 1291/* some macros to generate set/get funcs for the struct fields. This
1288 * assumes there is a lefoo_to_cpu for every type, so lets make a simple 1292 * assumes there is a lefoo_to_cpu for every type, so lets make a simple
1289 * one for u8: 1293 * one for u8:
@@ -2359,6 +2363,8 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
2359int btrfs_find_orphan_roots(struct btrfs_root *tree_root); 2363int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
2360int btrfs_set_root_node(struct btrfs_root_item *item, 2364int btrfs_set_root_node(struct btrfs_root_item *item,
2361 struct extent_buffer *node); 2365 struct extent_buffer *node);
2366void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
2367
2362/* dir-item.c */ 2368/* dir-item.c */
2363int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, 2369int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
2364 struct btrfs_root *root, const char *name, 2370 struct btrfs_root *root, const char *name,
@@ -2572,6 +2578,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
2572int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2578int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2573 struct inode *inode, u64 start, u64 end); 2579 struct inode *inode, u64 start, u64 end);
2574int btrfs_release_file(struct inode *inode, struct file *file); 2580int btrfs_release_file(struct inode *inode, struct file *file);
2581void btrfs_drop_pages(struct page **pages, size_t num_pages);
2582int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
2583 struct page **pages, size_t num_pages,
2584 loff_t pos, size_t write_bytes,
2585 struct extent_state **cached);
2575 2586
2576/* tree-defrag.c */ 2587/* tree-defrag.c */
2577int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, 2588int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d7a7315bd031..68c84c8c24bd 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1275,8 +1275,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
1275 root->commit_root = btrfs_root_node(root); 1275 root->commit_root = btrfs_root_node(root);
1276 BUG_ON(!root->node); 1276 BUG_ON(!root->node);
1277out: 1277out:
1278 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) 1278 if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
1279 root->ref_cows = 1; 1279 root->ref_cows = 1;
1280 btrfs_check_and_init_root_item(&root->root_item);
1281 }
1280 1282
1281 return root; 1283 return root;
1282} 1284}
@@ -3055,7 +3057,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3055 btrfs_destroy_pinned_extent(root, 3057 btrfs_destroy_pinned_extent(root,
3056 root->fs_info->pinned_extents); 3058 root->fs_info->pinned_extents);
3057 3059
3058 t->use_count = 0; 3060 atomic_set(&t->use_count, 0);
3059 list_del_init(&t->list); 3061 list_del_init(&t->list);
3060 memset(t, 0, sizeof(*t)); 3062 memset(t, 0, sizeof(*t));
3061 kmem_cache_free(btrfs_transaction_cachep, t); 3063 kmem_cache_free(btrfs_transaction_cachep, t);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f619c3cb13b7..31f33ba56fe8 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -33,6 +33,25 @@
33#include "locking.h" 33#include "locking.h"
34#include "free-space-cache.h" 34#include "free-space-cache.h"
35 35
36/* control flags for do_chunk_alloc's force field
37 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
38 * if we really need one.
39 *
40 * CHUNK_ALLOC_FORCE means it must try to allocate one
41 *
42 * CHUNK_ALLOC_LIMITED means to only try and allocate one
43 * if we have very few chunks already allocated. This is
44 * used as part of the clustering code to help make sure
45 * we have a good pool of storage to cluster in, without
46 * filling the FS with empty chunks
47 *
48 */
49enum {
50 CHUNK_ALLOC_NO_FORCE = 0,
51 CHUNK_ALLOC_FORCE = 1,
52 CHUNK_ALLOC_LIMITED = 2,
53};
54
36static int update_block_group(struct btrfs_trans_handle *trans, 55static int update_block_group(struct btrfs_trans_handle *trans,
37 struct btrfs_root *root, 56 struct btrfs_root *root,
38 u64 bytenr, u64 num_bytes, int alloc); 57 u64 bytenr, u64 num_bytes, int alloc);
@@ -3019,7 +3038,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
3019 found->bytes_readonly = 0; 3038 found->bytes_readonly = 0;
3020 found->bytes_may_use = 0; 3039 found->bytes_may_use = 0;
3021 found->full = 0; 3040 found->full = 0;
3022 found->force_alloc = 0; 3041 found->force_alloc = CHUNK_ALLOC_NO_FORCE;
3042 found->chunk_alloc = 0;
3023 *space_info = found; 3043 *space_info = found;
3024 list_add_rcu(&found->list, &info->space_info); 3044 list_add_rcu(&found->list, &info->space_info);
3025 atomic_set(&found->caching_threads, 0); 3045 atomic_set(&found->caching_threads, 0);
@@ -3150,7 +3170,7 @@ again:
3150 if (!data_sinfo->full && alloc_chunk) { 3170 if (!data_sinfo->full && alloc_chunk) {
3151 u64 alloc_target; 3171 u64 alloc_target;
3152 3172
3153 data_sinfo->force_alloc = 1; 3173 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
3154 spin_unlock(&data_sinfo->lock); 3174 spin_unlock(&data_sinfo->lock);
3155alloc: 3175alloc:
3156 alloc_target = btrfs_get_alloc_profile(root, 1); 3176 alloc_target = btrfs_get_alloc_profile(root, 1);
@@ -3160,7 +3180,8 @@ alloc:
3160 3180
3161 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3181 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3162 bytes + 2 * 1024 * 1024, 3182 bytes + 2 * 1024 * 1024,
3163 alloc_target, 0); 3183 alloc_target,
3184 CHUNK_ALLOC_NO_FORCE);
3164 btrfs_end_transaction(trans, root); 3185 btrfs_end_transaction(trans, root);
3165 if (ret < 0) { 3186 if (ret < 0) {
3166 if (ret != -ENOSPC) 3187 if (ret != -ENOSPC)
@@ -3239,31 +3260,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
3239 rcu_read_lock(); 3260 rcu_read_lock();
3240 list_for_each_entry_rcu(found, head, list) { 3261 list_for_each_entry_rcu(found, head, list) {
3241 if (found->flags & BTRFS_BLOCK_GROUP_METADATA) 3262 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
3242 found->force_alloc = 1; 3263 found->force_alloc = CHUNK_ALLOC_FORCE;
3243 } 3264 }
3244 rcu_read_unlock(); 3265 rcu_read_unlock();
3245} 3266}
3246 3267
3247static int should_alloc_chunk(struct btrfs_root *root, 3268static int should_alloc_chunk(struct btrfs_root *root,
3248 struct btrfs_space_info *sinfo, u64 alloc_bytes) 3269 struct btrfs_space_info *sinfo, u64 alloc_bytes,
3270 int force)
3249{ 3271{
3250 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3272 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
3273 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
3251 u64 thresh; 3274 u64 thresh;
3252 3275
3253 if (sinfo->bytes_used + sinfo->bytes_reserved + 3276 if (force == CHUNK_ALLOC_FORCE)
3254 alloc_bytes + 256 * 1024 * 1024 < num_bytes) 3277 return 1;
3278
3279 /*
3280 * in limited mode, we want to have some free space up to
3281 * about 1% of the FS size.
3282 */
3283 if (force == CHUNK_ALLOC_LIMITED) {
3284 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3285 thresh = max_t(u64, 64 * 1024 * 1024,
3286 div_factor_fine(thresh, 1));
3287
3288 if (num_bytes - num_allocated < thresh)
3289 return 1;
3290 }
3291
3292 /*
3293 * we have two similar checks here, one based on percentage
3294 * and once based on a hard number of 256MB. The idea
3295 * is that if we have a good amount of free
3296 * room, don't allocate a chunk. A good mount is
3297 * less than 80% utilized of the chunks we have allocated,
3298 * or more than 256MB free
3299 */
3300 if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
3255 return 0; 3301 return 0;
3256 3302
3257 if (sinfo->bytes_used + sinfo->bytes_reserved + 3303 if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
3258 alloc_bytes < div_factor(num_bytes, 8))
3259 return 0; 3304 return 0;
3260 3305
3261 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); 3306 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3307
3308 /* 256MB or 5% of the FS */
3262 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); 3309 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
3263 3310
3264 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) 3311 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
3265 return 0; 3312 return 0;
3266
3267 return 1; 3313 return 1;
3268} 3314}
3269 3315
@@ -3273,10 +3319,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3273{ 3319{
3274 struct btrfs_space_info *space_info; 3320 struct btrfs_space_info *space_info;
3275 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3321 struct btrfs_fs_info *fs_info = extent_root->fs_info;
3322 int wait_for_alloc = 0;
3276 int ret = 0; 3323 int ret = 0;
3277 3324
3278 mutex_lock(&fs_info->chunk_mutex);
3279
3280 flags = btrfs_reduce_alloc_profile(extent_root, flags); 3325 flags = btrfs_reduce_alloc_profile(extent_root, flags);
3281 3326
3282 space_info = __find_space_info(extent_root->fs_info, flags); 3327 space_info = __find_space_info(extent_root->fs_info, flags);
@@ -3287,21 +3332,40 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3287 } 3332 }
3288 BUG_ON(!space_info); 3333 BUG_ON(!space_info);
3289 3334
3335again:
3290 spin_lock(&space_info->lock); 3336 spin_lock(&space_info->lock);
3291 if (space_info->force_alloc) 3337 if (space_info->force_alloc)
3292 force = 1; 3338 force = space_info->force_alloc;
3293 if (space_info->full) { 3339 if (space_info->full) {
3294 spin_unlock(&space_info->lock); 3340 spin_unlock(&space_info->lock);
3295 goto out; 3341 return 0;
3296 } 3342 }
3297 3343
3298 if (!force && !should_alloc_chunk(extent_root, space_info, 3344 if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) {
3299 alloc_bytes)) {
3300 spin_unlock(&space_info->lock); 3345 spin_unlock(&space_info->lock);
3301 goto out; 3346 return 0;
3347 } else if (space_info->chunk_alloc) {
3348 wait_for_alloc = 1;
3349 } else {
3350 space_info->chunk_alloc = 1;
3302 } 3351 }
3352
3303 spin_unlock(&space_info->lock); 3353 spin_unlock(&space_info->lock);
3304 3354
3355 mutex_lock(&fs_info->chunk_mutex);
3356
3357 /*
3358 * The chunk_mutex is held throughout the entirety of a chunk
3359 * allocation, so once we've acquired the chunk_mutex we know that the
3360 * other guy is done and we need to recheck and see if we should
3361 * allocate.
3362 */
3363 if (wait_for_alloc) {
3364 mutex_unlock(&fs_info->chunk_mutex);
3365 wait_for_alloc = 0;
3366 goto again;
3367 }
3368
3305 /* 3369 /*
3306 * If we have mixed data/metadata chunks we want to make sure we keep 3370 * If we have mixed data/metadata chunks we want to make sure we keep
3307 * allocating mixed chunks instead of individual chunks. 3371 * allocating mixed chunks instead of individual chunks.
@@ -3327,9 +3391,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3327 space_info->full = 1; 3391 space_info->full = 1;
3328 else 3392 else
3329 ret = 1; 3393 ret = 1;
3330 space_info->force_alloc = 0; 3394
3395 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
3396 space_info->chunk_alloc = 0;
3331 spin_unlock(&space_info->lock); 3397 spin_unlock(&space_info->lock);
3332out:
3333 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3398 mutex_unlock(&extent_root->fs_info->chunk_mutex);
3334 return ret; 3399 return ret;
3335} 3400}
@@ -5303,11 +5368,13 @@ loop:
5303 5368
5304 if (allowed_chunk_alloc) { 5369 if (allowed_chunk_alloc) {
5305 ret = do_chunk_alloc(trans, root, num_bytes + 5370 ret = do_chunk_alloc(trans, root, num_bytes +
5306 2 * 1024 * 1024, data, 1); 5371 2 * 1024 * 1024, data,
5372 CHUNK_ALLOC_LIMITED);
5307 allowed_chunk_alloc = 0; 5373 allowed_chunk_alloc = 0;
5308 done_chunk_alloc = 1; 5374 done_chunk_alloc = 1;
5309 } else if (!done_chunk_alloc) { 5375 } else if (!done_chunk_alloc &&
5310 space_info->force_alloc = 1; 5376 space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
5377 space_info->force_alloc = CHUNK_ALLOC_LIMITED;
5311 } 5378 }
5312 5379
5313 if (loop < LOOP_NO_EMPTY_SIZE) { 5380 if (loop < LOOP_NO_EMPTY_SIZE) {
@@ -5393,7 +5460,8 @@ again:
5393 */ 5460 */
5394 if (empty_size || root->ref_cows) 5461 if (empty_size || root->ref_cows)
5395 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 5462 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
5396 num_bytes + 2 * 1024 * 1024, data, 0); 5463 num_bytes + 2 * 1024 * 1024, data,
5464 CHUNK_ALLOC_NO_FORCE);
5397 5465
5398 WARN_ON(num_bytes < root->sectorsize); 5466 WARN_ON(num_bytes < root->sectorsize);
5399 ret = find_free_extent(trans, root, num_bytes, empty_size, 5467 ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -5405,7 +5473,7 @@ again:
5405 num_bytes = num_bytes & ~(root->sectorsize - 1); 5473 num_bytes = num_bytes & ~(root->sectorsize - 1);
5406 num_bytes = max(num_bytes, min_alloc_size); 5474 num_bytes = max(num_bytes, min_alloc_size);
5407 do_chunk_alloc(trans, root->fs_info->extent_root, 5475 do_chunk_alloc(trans, root->fs_info->extent_root,
5408 num_bytes, data, 1); 5476 num_bytes, data, CHUNK_ALLOC_FORCE);
5409 goto again; 5477 goto again;
5410 } 5478 }
5411 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { 5479 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
@@ -8109,13 +8177,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
8109 8177
8110 alloc_flags = update_block_group_flags(root, cache->flags); 8178 alloc_flags = update_block_group_flags(root, cache->flags);
8111 if (alloc_flags != cache->flags) 8179 if (alloc_flags != cache->flags)
8112 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8180 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8181 CHUNK_ALLOC_FORCE);
8113 8182
8114 ret = set_block_group_ro(cache); 8183 ret = set_block_group_ro(cache);
8115 if (!ret) 8184 if (!ret)
8116 goto out; 8185 goto out;
8117 alloc_flags = get_alloc_profile(root, cache->space_info->flags); 8186 alloc_flags = get_alloc_profile(root, cache->space_info->flags);
8118 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8187 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8188 CHUNK_ALLOC_FORCE);
8119 if (ret < 0) 8189 if (ret < 0)
8120 goto out; 8190 goto out;
8121 ret = set_block_group_ro(cache); 8191 ret = set_block_group_ro(cache);
@@ -8128,7 +8198,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
8128 struct btrfs_root *root, u64 type) 8198 struct btrfs_root *root, u64 type)
8129{ 8199{
8130 u64 alloc_flags = get_alloc_profile(root, type); 8200 u64 alloc_flags = get_alloc_profile(root, type);
8131 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); 8201 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8202 CHUNK_ALLOC_FORCE);
8132} 8203}
8133 8204
8134/* 8205/*
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 20ddb28602a8..315138605088 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -690,6 +690,15 @@ static void cache_state(struct extent_state *state,
690 } 690 }
691} 691}
692 692
693static void uncache_state(struct extent_state **cached_ptr)
694{
695 if (cached_ptr && (*cached_ptr)) {
696 struct extent_state *state = *cached_ptr;
697 *cached_ptr = NULL;
698 free_extent_state(state);
699 }
700}
701
693/* 702/*
694 * set some bits on a range in the tree. This may require allocations or 703 * set some bits on a range in the tree. This may require allocations or
695 * sleeping, so the gfp mask is used to indicate what is allowed. 704 * sleeping, so the gfp mask is used to indicate what is allowed.
@@ -940,10 +949,10 @@ static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
940} 949}
941 950
942int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 951int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
943 gfp_t mask) 952 struct extent_state **cached_state, gfp_t mask)
944{ 953{
945 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, 954 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
946 NULL, mask); 955 NULL, cached_state, mask);
947} 956}
948 957
949static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 958static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
@@ -1012,8 +1021,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
1012 mask); 1021 mask);
1013} 1022}
1014 1023
1015int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, 1024int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
1016 gfp_t mask)
1017{ 1025{
1018 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, 1026 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
1019 mask); 1027 mask);
@@ -1735,6 +1743,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1735 1743
1736 do { 1744 do {
1737 struct page *page = bvec->bv_page; 1745 struct page *page = bvec->bv_page;
1746 struct extent_state *cached = NULL;
1747 struct extent_state *state;
1748
1738 tree = &BTRFS_I(page->mapping->host)->io_tree; 1749 tree = &BTRFS_I(page->mapping->host)->io_tree;
1739 1750
1740 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1751 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1749,9 +1760,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1749 if (++bvec <= bvec_end) 1760 if (++bvec <= bvec_end)
1750 prefetchw(&bvec->bv_page->flags); 1761 prefetchw(&bvec->bv_page->flags);
1751 1762
1763 spin_lock(&tree->lock);
1764 state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
1765 if (state && state->start == start) {
1766 /*
1767 * take a reference on the state, unlock will drop
1768 * the ref
1769 */
1770 cache_state(state, &cached);
1771 }
1772 spin_unlock(&tree->lock);
1773
1752 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 1774 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
1753 ret = tree->ops->readpage_end_io_hook(page, start, end, 1775 ret = tree->ops->readpage_end_io_hook(page, start, end,
1754 NULL); 1776 state);
1755 if (ret) 1777 if (ret)
1756 uptodate = 0; 1778 uptodate = 0;
1757 } 1779 }
@@ -1764,15 +1786,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1764 test_bit(BIO_UPTODATE, &bio->bi_flags); 1786 test_bit(BIO_UPTODATE, &bio->bi_flags);
1765 if (err) 1787 if (err)
1766 uptodate = 0; 1788 uptodate = 0;
1789 uncache_state(&cached);
1767 continue; 1790 continue;
1768 } 1791 }
1769 } 1792 }
1770 1793
1771 if (uptodate) { 1794 if (uptodate) {
1772 set_extent_uptodate(tree, start, end, 1795 set_extent_uptodate(tree, start, end, &cached,
1773 GFP_ATOMIC); 1796 GFP_ATOMIC);
1774 } 1797 }
1775 unlock_extent(tree, start, end, GFP_ATOMIC); 1798 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
1776 1799
1777 if (whole_page) { 1800 if (whole_page) {
1778 if (uptodate) { 1801 if (uptodate) {
@@ -1811,6 +1834,7 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
1811 1834
1812 do { 1835 do {
1813 struct page *page = bvec->bv_page; 1836 struct page *page = bvec->bv_page;
1837 struct extent_state *cached = NULL;
1814 tree = &BTRFS_I(page->mapping->host)->io_tree; 1838 tree = &BTRFS_I(page->mapping->host)->io_tree;
1815 1839
1816 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 1840 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1821,13 +1845,14 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
1821 prefetchw(&bvec->bv_page->flags); 1845 prefetchw(&bvec->bv_page->flags);
1822 1846
1823 if (uptodate) { 1847 if (uptodate) {
1824 set_extent_uptodate(tree, start, end, GFP_ATOMIC); 1848 set_extent_uptodate(tree, start, end, &cached,
1849 GFP_ATOMIC);
1825 } else { 1850 } else {
1826 ClearPageUptodate(page); 1851 ClearPageUptodate(page);
1827 SetPageError(page); 1852 SetPageError(page);
1828 } 1853 }
1829 1854
1830 unlock_extent(tree, start, end, GFP_ATOMIC); 1855 unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
1831 1856
1832 } while (bvec >= bio->bi_io_vec); 1857 } while (bvec >= bio->bi_io_vec);
1833 1858
@@ -2016,14 +2041,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2016 while (cur <= end) { 2041 while (cur <= end) {
2017 if (cur >= last_byte) { 2042 if (cur >= last_byte) {
2018 char *userpage; 2043 char *userpage;
2044 struct extent_state *cached = NULL;
2045
2019 iosize = PAGE_CACHE_SIZE - page_offset; 2046 iosize = PAGE_CACHE_SIZE - page_offset;
2020 userpage = kmap_atomic(page, KM_USER0); 2047 userpage = kmap_atomic(page, KM_USER0);
2021 memset(userpage + page_offset, 0, iosize); 2048 memset(userpage + page_offset, 0, iosize);
2022 flush_dcache_page(page); 2049 flush_dcache_page(page);
2023 kunmap_atomic(userpage, KM_USER0); 2050 kunmap_atomic(userpage, KM_USER0);
2024 set_extent_uptodate(tree, cur, cur + iosize - 1, 2051 set_extent_uptodate(tree, cur, cur + iosize - 1,
2025 GFP_NOFS); 2052 &cached, GFP_NOFS);
2026 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2053 unlock_extent_cached(tree, cur, cur + iosize - 1,
2054 &cached, GFP_NOFS);
2027 break; 2055 break;
2028 } 2056 }
2029 em = get_extent(inode, page, page_offset, cur, 2057 em = get_extent(inode, page, page_offset, cur,
@@ -2063,14 +2091,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2063 /* we've found a hole, just zero and go on */ 2091 /* we've found a hole, just zero and go on */
2064 if (block_start == EXTENT_MAP_HOLE) { 2092 if (block_start == EXTENT_MAP_HOLE) {
2065 char *userpage; 2093 char *userpage;
2094 struct extent_state *cached = NULL;
2095
2066 userpage = kmap_atomic(page, KM_USER0); 2096 userpage = kmap_atomic(page, KM_USER0);
2067 memset(userpage + page_offset, 0, iosize); 2097 memset(userpage + page_offset, 0, iosize);
2068 flush_dcache_page(page); 2098 flush_dcache_page(page);
2069 kunmap_atomic(userpage, KM_USER0); 2099 kunmap_atomic(userpage, KM_USER0);
2070 2100
2071 set_extent_uptodate(tree, cur, cur + iosize - 1, 2101 set_extent_uptodate(tree, cur, cur + iosize - 1,
2072 GFP_NOFS); 2102 &cached, GFP_NOFS);
2073 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); 2103 unlock_extent_cached(tree, cur, cur + iosize - 1,
2104 &cached, GFP_NOFS);
2074 cur = cur + iosize; 2105 cur = cur + iosize;
2075 page_offset += iosize; 2106 page_offset += iosize;
2076 continue; 2107 continue;
@@ -2789,9 +2820,12 @@ int extent_prepare_write(struct extent_io_tree *tree,
2789 iocount++; 2820 iocount++;
2790 block_start = block_start + iosize; 2821 block_start = block_start + iosize;
2791 } else { 2822 } else {
2792 set_extent_uptodate(tree, block_start, cur_end, 2823 struct extent_state *cached = NULL;
2824
2825 set_extent_uptodate(tree, block_start, cur_end, &cached,
2793 GFP_NOFS); 2826 GFP_NOFS);
2794 unlock_extent(tree, block_start, cur_end, GFP_NOFS); 2827 unlock_extent_cached(tree, block_start, cur_end,
2828 &cached, GFP_NOFS);
2795 block_start = cur_end + 1; 2829 block_start = cur_end + 1;
2796 } 2830 }
2797 page_offset = block_start & (PAGE_CACHE_SIZE - 1); 2831 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
@@ -3457,7 +3491,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree,
3457 num_pages = num_extent_pages(eb->start, eb->len); 3491 num_pages = num_extent_pages(eb->start, eb->len);
3458 3492
3459 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3493 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3460 GFP_NOFS); 3494 NULL, GFP_NOFS);
3461 for (i = 0; i < num_pages; i++) { 3495 for (i = 0; i < num_pages; i++) {
3462 page = extent_buffer_page(eb, i); 3496 page = extent_buffer_page(eb, i);
3463 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || 3497 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
@@ -3885,6 +3919,12 @@ static void move_pages(struct page *dst_page, struct page *src_page,
3885 kunmap_atomic(dst_kaddr, KM_USER0); 3919 kunmap_atomic(dst_kaddr, KM_USER0);
3886} 3920}
3887 3921
3922static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
3923{
3924 unsigned long distance = (src > dst) ? src - dst : dst - src;
3925 return distance < len;
3926}
3927
3888static void copy_pages(struct page *dst_page, struct page *src_page, 3928static void copy_pages(struct page *dst_page, struct page *src_page,
3889 unsigned long dst_off, unsigned long src_off, 3929 unsigned long dst_off, unsigned long src_off,
3890 unsigned long len) 3930 unsigned long len)
@@ -3892,10 +3932,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
3892 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); 3932 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3893 char *src_kaddr; 3933 char *src_kaddr;
3894 3934
3895 if (dst_page != src_page) 3935 if (dst_page != src_page) {
3896 src_kaddr = kmap_atomic(src_page, KM_USER1); 3936 src_kaddr = kmap_atomic(src_page, KM_USER1);
3897 else 3937 } else {
3898 src_kaddr = dst_kaddr; 3938 src_kaddr = dst_kaddr;
3939 BUG_ON(areas_overlap(src_off, dst_off, len));
3940 }
3899 3941
3900 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); 3942 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
3901 kunmap_atomic(dst_kaddr, KM_USER0); 3943 kunmap_atomic(dst_kaddr, KM_USER0);
@@ -3970,7 +4012,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3970 "len %lu len %lu\n", dst_offset, len, dst->len); 4012 "len %lu len %lu\n", dst_offset, len, dst->len);
3971 BUG_ON(1); 4013 BUG_ON(1);
3972 } 4014 }
3973 if (dst_offset < src_offset) { 4015 if (!areas_overlap(src_offset, dst_offset, len)) {
3974 memcpy_extent_buffer(dst, dst_offset, src_offset, len); 4016 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
3975 return; 4017 return;
3976 } 4018 }
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index f62c5442835d..af2d7179c372 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -208,7 +208,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
208 int bits, int exclusive_bits, u64 *failed_start, 208 int bits, int exclusive_bits, u64 *failed_start,
209 struct extent_state **cached_state, gfp_t mask); 209 struct extent_state **cached_state, gfp_t mask);
210int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 210int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
211 gfp_t mask); 211 struct extent_state **cached_state, gfp_t mask);
212int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 212int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
213 gfp_t mask); 213 gfp_t mask);
214int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 214int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 2b6c12e983b3..a24a3f2fa13e 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -243,7 +243,7 @@ out:
243 * Insert @em into @tree or perform a simple forward/backward merge with 243 * Insert @em into @tree or perform a simple forward/backward merge with
244 * existing mappings. The extent_map struct passed in will be inserted 244 * existing mappings. The extent_map struct passed in will be inserted
245 * into the tree directly, with an additional reference taken, or a 245 * into the tree directly, with an additional reference taken, or a
246 * reference dropped if the merge attempt was successfull. 246 * reference dropped if the merge attempt was successful.
247 */ 247 */
248int add_extent_mapping(struct extent_map_tree *tree, 248int add_extent_mapping(struct extent_map_tree *tree,
249 struct extent_map *em) 249 struct extent_map *em)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 656bc0a892b1..75899a01dded 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -104,7 +104,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
104/* 104/*
105 * unlocks pages after btrfs_file_write is done with them 105 * unlocks pages after btrfs_file_write is done with them
106 */ 106 */
107static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) 107void btrfs_drop_pages(struct page **pages, size_t num_pages)
108{ 108{
109 size_t i; 109 size_t i;
110 for (i = 0; i < num_pages; i++) { 110 for (i = 0; i < num_pages; i++) {
@@ -127,16 +127,13 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
127 * this also makes the decision about creating an inline extent vs 127 * this also makes the decision about creating an inline extent vs
128 * doing real data extents, marking pages dirty and delalloc as required. 128 * doing real data extents, marking pages dirty and delalloc as required.
129 */ 129 */
130static noinline int dirty_and_release_pages(struct btrfs_root *root, 130int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
131 struct file *file, 131 struct page **pages, size_t num_pages,
132 struct page **pages, 132 loff_t pos, size_t write_bytes,
133 size_t num_pages, 133 struct extent_state **cached)
134 loff_t pos,
135 size_t write_bytes)
136{ 134{
137 int err = 0; 135 int err = 0;
138 int i; 136 int i;
139 struct inode *inode = fdentry(file)->d_inode;
140 u64 num_bytes; 137 u64 num_bytes;
141 u64 start_pos; 138 u64 start_pos;
142 u64 end_of_last_block; 139 u64 end_of_last_block;
@@ -149,7 +146,7 @@ static noinline int dirty_and_release_pages(struct btrfs_root *root,
149 146
150 end_of_last_block = start_pos + num_bytes - 1; 147 end_of_last_block = start_pos + num_bytes - 1;
151 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, 148 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
152 NULL); 149 cached);
153 if (err) 150 if (err)
154 return err; 151 return err;
155 152
@@ -906,7 +903,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
906 unsigned long last_index; 903 unsigned long last_index;
907 size_t num_written = 0; 904 size_t num_written = 0;
908 int nrptrs; 905 int nrptrs;
909 int ret; 906 int ret = 0;
910 907
911 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / 908 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
912 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / 909 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
@@ -992,9 +989,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
992 } 989 }
993 990
994 if (copied > 0) { 991 if (copied > 0) {
995 ret = dirty_and_release_pages(root, file, pages, 992 ret = btrfs_dirty_pages(root, inode, pages,
996 dirty_pages, pos, 993 dirty_pages, pos, copied,
997 copied); 994 NULL);
998 if (ret) { 995 if (ret) {
999 btrfs_delalloc_release_space(inode, 996 btrfs_delalloc_release_space(inode,
1000 dirty_pages << PAGE_CACHE_SHIFT); 997 dirty_pages << PAGE_CACHE_SHIFT);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 0037427d8a9d..11d2e9cea09e 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -24,6 +24,7 @@
24#include "free-space-cache.h" 24#include "free-space-cache.h"
25#include "transaction.h" 25#include "transaction.h"
26#include "disk-io.h" 26#include "disk-io.h"
27#include "extent_io.h"
27 28
28#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 29#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
29#define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 30#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
@@ -81,6 +82,8 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
81 return ERR_PTR(-ENOENT); 82 return ERR_PTR(-ENOENT);
82 } 83 }
83 84
85 inode->i_mapping->flags &= ~__GFP_FS;
86
84 spin_lock(&block_group->lock); 87 spin_lock(&block_group->lock);
85 if (!root->fs_info->closing) { 88 if (!root->fs_info->closing) {
86 block_group->inode = igrab(inode); 89 block_group->inode = igrab(inode);
@@ -222,6 +225,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
222 u64 num_entries; 225 u64 num_entries;
223 u64 num_bitmaps; 226 u64 num_bitmaps;
224 u64 generation; 227 u64 generation;
228 u64 used = btrfs_block_group_used(&block_group->item);
225 u32 cur_crc = ~(u32)0; 229 u32 cur_crc = ~(u32)0;
226 pgoff_t index = 0; 230 pgoff_t index = 0;
227 unsigned long first_page_offset; 231 unsigned long first_page_offset;
@@ -467,6 +471,17 @@ next:
467 index++; 471 index++;
468 } 472 }
469 473
474 spin_lock(&block_group->tree_lock);
475 if (block_group->free_space != (block_group->key.offset - used -
476 block_group->bytes_super)) {
477 spin_unlock(&block_group->tree_lock);
478 printk(KERN_ERR "block group %llu has an wrong amount of free "
479 "space\n", block_group->key.objectid);
480 ret = 0;
481 goto free_cache;
482 }
483 spin_unlock(&block_group->tree_lock);
484
470 ret = 1; 485 ret = 1;
471out: 486out:
472 kfree(checksums); 487 kfree(checksums);
@@ -493,18 +508,23 @@ int btrfs_write_out_cache(struct btrfs_root *root,
493 struct inode *inode; 508 struct inode *inode;
494 struct rb_node *node; 509 struct rb_node *node;
495 struct list_head *pos, *n; 510 struct list_head *pos, *n;
511 struct page **pages;
496 struct page *page; 512 struct page *page;
497 struct extent_state *cached_state = NULL; 513 struct extent_state *cached_state = NULL;
514 struct btrfs_free_cluster *cluster = NULL;
515 struct extent_io_tree *unpin = NULL;
498 struct list_head bitmap_list; 516 struct list_head bitmap_list;
499 struct btrfs_key key; 517 struct btrfs_key key;
518 u64 start, end, len;
500 u64 bytes = 0; 519 u64 bytes = 0;
501 u32 *crc, *checksums; 520 u32 *crc, *checksums;
502 pgoff_t index = 0, last_index = 0;
503 unsigned long first_page_offset; 521 unsigned long first_page_offset;
504 int num_checksums; 522 int index = 0, num_pages = 0;
505 int entries = 0; 523 int entries = 0;
506 int bitmaps = 0; 524 int bitmaps = 0;
507 int ret = 0; 525 int ret = 0;
526 bool next_page = false;
527 bool out_of_space = false;
508 528
509 root = root->fs_info->tree_root; 529 root = root->fs_info->tree_root;
510 530
@@ -532,24 +552,43 @@ int btrfs_write_out_cache(struct btrfs_root *root,
532 return 0; 552 return 0;
533 } 553 }
534 554
535 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 555 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
556 PAGE_CACHE_SHIFT;
536 filemap_write_and_wait(inode->i_mapping); 557 filemap_write_and_wait(inode->i_mapping);
537 btrfs_wait_ordered_range(inode, inode->i_size & 558 btrfs_wait_ordered_range(inode, inode->i_size &
538 ~(root->sectorsize - 1), (u64)-1); 559 ~(root->sectorsize - 1), (u64)-1);
539 560
540 /* We need a checksum per page. */ 561 /* We need a checksum per page. */
541 num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; 562 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
542 crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
543 if (!crc) { 563 if (!crc) {
544 iput(inode); 564 iput(inode);
545 return 0; 565 return 0;
546 } 566 }
547 567
568 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
569 if (!pages) {
570 kfree(crc);
571 iput(inode);
572 return 0;
573 }
574
548 /* Since the first page has all of our checksums and our generation we 575 /* Since the first page has all of our checksums and our generation we
549 * need to calculate the offset into the page that we can start writing 576 * need to calculate the offset into the page that we can start writing
550 * our entries. 577 * our entries.
551 */ 578 */
552 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); 579 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
580
581 /* Get the cluster for this block_group if it exists */
582 if (!list_empty(&block_group->cluster_list))
583 cluster = list_entry(block_group->cluster_list.next,
584 struct btrfs_free_cluster,
585 block_group_list);
586
587 /*
588 * We shouldn't have switched the pinned extents yet so this is the
589 * right one
590 */
591 unpin = root->fs_info->pinned_extents;
553 592
554 /* 593 /*
555 * Lock all pages first so we can lock the extent safely. 594 * Lock all pages first so we can lock the extent safely.
@@ -559,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root,
559 * after find_get_page at this point. Just putting this here so people 598 * after find_get_page at this point. Just putting this here so people
560 * know and don't freak out. 599 * know and don't freak out.
561 */ 600 */
562 while (index <= last_index) { 601 while (index < num_pages) {
563 page = grab_cache_page(inode->i_mapping, index); 602 page = grab_cache_page(inode->i_mapping, index);
564 if (!page) { 603 if (!page) {
565 pgoff_t i = 0; 604 int i;
566 605
567 while (i < index) { 606 for (i = 0; i < num_pages; i++) {
568 page = find_get_page(inode->i_mapping, i); 607 unlock_page(pages[i]);
569 unlock_page(page); 608 page_cache_release(pages[i]);
570 page_cache_release(page);
571 page_cache_release(page);
572 i++;
573 } 609 }
574 goto out_free; 610 goto out_free;
575 } 611 }
612 pages[index] = page;
576 index++; 613 index++;
577 } 614 }
578 615
@@ -580,6 +617,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
580 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 617 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
581 0, &cached_state, GFP_NOFS); 618 0, &cached_state, GFP_NOFS);
582 619
620 /*
621 * When searching for pinned extents, we need to start at our start
622 * offset.
623 */
624 start = block_group->key.objectid;
625
583 /* Write out the extent entries */ 626 /* Write out the extent entries */
584 do { 627 do {
585 struct btrfs_free_space_entry *entry; 628 struct btrfs_free_space_entry *entry;
@@ -587,18 +630,25 @@ int btrfs_write_out_cache(struct btrfs_root *root,
587 unsigned long offset = 0; 630 unsigned long offset = 0;
588 unsigned long start_offset = 0; 631 unsigned long start_offset = 0;
589 632
633 next_page = false;
634
590 if (index == 0) { 635 if (index == 0) {
591 start_offset = first_page_offset; 636 start_offset = first_page_offset;
592 offset = start_offset; 637 offset = start_offset;
593 } 638 }
594 639
595 page = find_get_page(inode->i_mapping, index); 640 if (index >= num_pages) {
641 out_of_space = true;
642 break;
643 }
644
645 page = pages[index];
596 646
597 addr = kmap(page); 647 addr = kmap(page);
598 entry = addr + start_offset; 648 entry = addr + start_offset;
599 649
600 memset(addr, 0, PAGE_CACHE_SIZE); 650 memset(addr, 0, PAGE_CACHE_SIZE);
601 while (1) { 651 while (node && !next_page) {
602 struct btrfs_free_space *e; 652 struct btrfs_free_space *e;
603 653
604 e = rb_entry(node, struct btrfs_free_space, offset_index); 654 e = rb_entry(node, struct btrfs_free_space, offset_index);
@@ -614,12 +664,49 @@ int btrfs_write_out_cache(struct btrfs_root *root,
614 entry->type = BTRFS_FREE_SPACE_EXTENT; 664 entry->type = BTRFS_FREE_SPACE_EXTENT;
615 } 665 }
616 node = rb_next(node); 666 node = rb_next(node);
617 if (!node) 667 if (!node && cluster) {
618 break; 668 node = rb_first(&cluster->root);
669 cluster = NULL;
670 }
619 offset += sizeof(struct btrfs_free_space_entry); 671 offset += sizeof(struct btrfs_free_space_entry);
620 if (offset + sizeof(struct btrfs_free_space_entry) >= 672 if (offset + sizeof(struct btrfs_free_space_entry) >=
621 PAGE_CACHE_SIZE) 673 PAGE_CACHE_SIZE)
674 next_page = true;
675 entry++;
676 }
677
678 /*
679 * We want to add any pinned extents to our free space cache
680 * so we don't leak the space
681 */
682 while (!next_page && (start < block_group->key.objectid +
683 block_group->key.offset)) {
684 ret = find_first_extent_bit(unpin, start, &start, &end,
685 EXTENT_DIRTY);
686 if (ret) {
687 ret = 0;
622 break; 688 break;
689 }
690
691 /* This pinned extent is out of our range */
692 if (start >= block_group->key.objectid +
693 block_group->key.offset)
694 break;
695
696 len = block_group->key.objectid +
697 block_group->key.offset - start;
698 len = min(len, end + 1 - start);
699
700 entries++;
701 entry->offset = cpu_to_le64(start);
702 entry->bytes = cpu_to_le64(len);
703 entry->type = BTRFS_FREE_SPACE_EXTENT;
704
705 start = end + 1;
706 offset += sizeof(struct btrfs_free_space_entry);
707 if (offset + sizeof(struct btrfs_free_space_entry) >=
708 PAGE_CACHE_SIZE)
709 next_page = true;
623 entry++; 710 entry++;
624 } 711 }
625 *crc = ~(u32)0; 712 *crc = ~(u32)0;
@@ -632,25 +719,8 @@ int btrfs_write_out_cache(struct btrfs_root *root,
632 719
633 bytes += PAGE_CACHE_SIZE; 720 bytes += PAGE_CACHE_SIZE;
634 721
635 ClearPageChecked(page);
636 set_page_extent_mapped(page);
637 SetPageUptodate(page);
638 set_page_dirty(page);
639
640 /*
641 * We need to release our reference we got for grab_cache_page,
642 * except for the first page which will hold our checksums, we
643 * do that below.
644 */
645 if (index != 0) {
646 unlock_page(page);
647 page_cache_release(page);
648 }
649
650 page_cache_release(page);
651
652 index++; 722 index++;
653 } while (node); 723 } while (node || next_page);
654 724
655 /* Write out the bitmaps */ 725 /* Write out the bitmaps */
656 list_for_each_safe(pos, n, &bitmap_list) { 726 list_for_each_safe(pos, n, &bitmap_list) {
@@ -658,7 +728,11 @@ int btrfs_write_out_cache(struct btrfs_root *root,
658 struct btrfs_free_space *entry = 728 struct btrfs_free_space *entry =
659 list_entry(pos, struct btrfs_free_space, list); 729 list_entry(pos, struct btrfs_free_space, list);
660 730
661 page = find_get_page(inode->i_mapping, index); 731 if (index >= num_pages) {
732 out_of_space = true;
733 break;
734 }
735 page = pages[index];
662 736
663 addr = kmap(page); 737 addr = kmap(page);
664 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); 738 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
@@ -669,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root,
669 crc++; 743 crc++;
670 bytes += PAGE_CACHE_SIZE; 744 bytes += PAGE_CACHE_SIZE;
671 745
672 ClearPageChecked(page);
673 set_page_extent_mapped(page);
674 SetPageUptodate(page);
675 set_page_dirty(page);
676 unlock_page(page);
677 page_cache_release(page);
678 page_cache_release(page);
679 list_del_init(&entry->list); 746 list_del_init(&entry->list);
680 index++; 747 index++;
681 } 748 }
682 749
750 if (out_of_space) {
751 btrfs_drop_pages(pages, num_pages);
752 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
753 i_size_read(inode) - 1, &cached_state,
754 GFP_NOFS);
755 ret = 0;
756 goto out_free;
757 }
758
683 /* Zero out the rest of the pages just to make sure */ 759 /* Zero out the rest of the pages just to make sure */
684 while (index <= last_index) { 760 while (index < num_pages) {
685 void *addr; 761 void *addr;
686 762
687 page = find_get_page(inode->i_mapping, index); 763 page = pages[index];
688
689 addr = kmap(page); 764 addr = kmap(page);
690 memset(addr, 0, PAGE_CACHE_SIZE); 765 memset(addr, 0, PAGE_CACHE_SIZE);
691 kunmap(page); 766 kunmap(page);
692 ClearPageChecked(page);
693 set_page_extent_mapped(page);
694 SetPageUptodate(page);
695 set_page_dirty(page);
696 unlock_page(page);
697 page_cache_release(page);
698 page_cache_release(page);
699 bytes += PAGE_CACHE_SIZE; 767 bytes += PAGE_CACHE_SIZE;
700 index++; 768 index++;
701 } 769 }
702 770
703 btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
704
705 /* Write the checksums and trans id to the first page */ 771 /* Write the checksums and trans id to the first page */
706 { 772 {
707 void *addr; 773 void *addr;
708 u64 *gen; 774 u64 *gen;
709 775
710 page = find_get_page(inode->i_mapping, 0); 776 page = pages[0];
711 777
712 addr = kmap(page); 778 addr = kmap(page);
713 memcpy(addr, checksums, sizeof(u32) * num_checksums); 779 memcpy(addr, checksums, sizeof(u32) * num_pages);
714 gen = addr + (sizeof(u32) * num_checksums); 780 gen = addr + (sizeof(u32) * num_pages);
715 *gen = trans->transid; 781 *gen = trans->transid;
716 kunmap(page); 782 kunmap(page);
717 ClearPageChecked(page);
718 set_page_extent_mapped(page);
719 SetPageUptodate(page);
720 set_page_dirty(page);
721 unlock_page(page);
722 page_cache_release(page);
723 page_cache_release(page);
724 } 783 }
725 BTRFS_I(inode)->generation = trans->transid;
726 784
785 ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
786 bytes, &cached_state);
787 btrfs_drop_pages(pages, num_pages);
727 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 788 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
728 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 789 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
729 790
791 if (ret) {
792 ret = 0;
793 goto out_free;
794 }
795
796 BTRFS_I(inode)->generation = trans->transid;
797
730 filemap_write_and_wait(inode->i_mapping); 798 filemap_write_and_wait(inode->i_mapping);
731 799
732 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 800 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
@@ -777,6 +845,7 @@ out_free:
777 BTRFS_I(inode)->generation = 0; 845 BTRFS_I(inode)->generation = 0;
778 } 846 }
779 kfree(checksums); 847 kfree(checksums);
848 kfree(pages);
780 btrfs_update_inode(trans, root, inode); 849 btrfs_update_inode(trans, root, inode);
781 iput(inode); 850 iput(inode);
782 return ret; 851 return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 93c28a1d6bdc..fcd66b6a8086 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -112,6 +112,7 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
112static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, 112static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
113 struct btrfs_root *root, struct inode *inode, 113 struct btrfs_root *root, struct inode *inode,
114 u64 start, size_t size, size_t compressed_size, 114 u64 start, size_t size, size_t compressed_size,
115 int compress_type,
115 struct page **compressed_pages) 116 struct page **compressed_pages)
116{ 117{
117 struct btrfs_key key; 118 struct btrfs_key key;
@@ -126,12 +127,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
126 size_t cur_size = size; 127 size_t cur_size = size;
127 size_t datasize; 128 size_t datasize;
128 unsigned long offset; 129 unsigned long offset;
129 int compress_type = BTRFS_COMPRESS_NONE;
130 130
131 if (compressed_size && compressed_pages) { 131 if (compressed_size && compressed_pages)
132 compress_type = root->fs_info->compress_type;
133 cur_size = compressed_size; 132 cur_size = compressed_size;
134 }
135 133
136 path = btrfs_alloc_path(); 134 path = btrfs_alloc_path();
137 if (!path) 135 if (!path)
@@ -221,7 +219,7 @@ fail:
221static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, 219static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
222 struct btrfs_root *root, 220 struct btrfs_root *root,
223 struct inode *inode, u64 start, u64 end, 221 struct inode *inode, u64 start, u64 end,
224 size_t compressed_size, 222 size_t compressed_size, int compress_type,
225 struct page **compressed_pages) 223 struct page **compressed_pages)
226{ 224{
227 u64 isize = i_size_read(inode); 225 u64 isize = i_size_read(inode);
@@ -254,7 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
254 inline_len = min_t(u64, isize, actual_end); 252 inline_len = min_t(u64, isize, actual_end);
255 ret = insert_inline_extent(trans, root, inode, start, 253 ret = insert_inline_extent(trans, root, inode, start,
256 inline_len, compressed_size, 254 inline_len, compressed_size,
257 compressed_pages); 255 compress_type, compressed_pages);
258 BUG_ON(ret); 256 BUG_ON(ret);
259 btrfs_delalloc_release_metadata(inode, end + 1 - start); 257 btrfs_delalloc_release_metadata(inode, end + 1 - start);
260 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 258 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
@@ -433,12 +431,13 @@ again:
433 * to make an uncompressed inline extent. 431 * to make an uncompressed inline extent.
434 */ 432 */
435 ret = cow_file_range_inline(trans, root, inode, 433 ret = cow_file_range_inline(trans, root, inode,
436 start, end, 0, NULL); 434 start, end, 0, 0, NULL);
437 } else { 435 } else {
438 /* try making a compressed inline extent */ 436 /* try making a compressed inline extent */
439 ret = cow_file_range_inline(trans, root, inode, 437 ret = cow_file_range_inline(trans, root, inode,
440 start, end, 438 start, end,
441 total_compressed, pages); 439 total_compressed,
440 compress_type, pages);
442 } 441 }
443 if (ret == 0) { 442 if (ret == 0) {
444 /* 443 /*
@@ -792,7 +791,7 @@ static noinline int cow_file_range(struct inode *inode,
792 if (start == 0) { 791 if (start == 0) {
793 /* lets try to make an inline extent */ 792 /* lets try to make an inline extent */
794 ret = cow_file_range_inline(trans, root, inode, 793 ret = cow_file_range_inline(trans, root, inode,
795 start, end, 0, NULL); 794 start, end, 0, 0, NULL);
796 if (ret == 0) { 795 if (ret == 0) {
797 extent_clear_unlock_delalloc(inode, 796 extent_clear_unlock_delalloc(inode,
798 &BTRFS_I(inode)->io_tree, 797 &BTRFS_I(inode)->io_tree,
@@ -1771,9 +1770,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1771 add_pending_csums(trans, inode, ordered_extent->file_offset, 1770 add_pending_csums(trans, inode, ordered_extent->file_offset,
1772 &ordered_extent->list); 1771 &ordered_extent->list);
1773 1772
1774 btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1773 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1775 ret = btrfs_update_inode(trans, root, inode); 1774 if (!ret) {
1776 BUG_ON(ret); 1775 ret = btrfs_update_inode(trans, root, inode);
1776 BUG_ON(ret);
1777 }
1778 ret = 0;
1777out: 1779out:
1778 if (nolock) { 1780 if (nolock) {
1779 if (trans) 1781 if (trans)
@@ -2222,8 +2224,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2222 insert = 1; 2224 insert = 1;
2223#endif 2225#endif
2224 insert = 1; 2226 insert = 1;
2225 } else {
2226 WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved);
2227 } 2227 }
2228 2228
2229 if (!BTRFS_I(inode)->orphan_meta_reserved) { 2229 if (!BTRFS_I(inode)->orphan_meta_reserved) {
@@ -2324,7 +2324,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2324 2324
2325 /* 2325 /*
2326 * if ret == 0 means we found what we were searching for, which 2326 * if ret == 0 means we found what we were searching for, which
2327 * is weird, but possible, so only screw with path if we didnt 2327 * is weird, but possible, so only screw with path if we didn't
2328 * find the key and see if we have stuff that matches 2328 * find the key and see if we have stuff that matches
2329 */ 2329 */
2330 if (ret > 0) { 2330 if (ret > 0) {
@@ -2537,8 +2537,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
2537 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); 2537 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
2538 2538
2539 alloc_group_block = btrfs_inode_block_group(leaf, inode_item); 2539 alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
2540 if (location.objectid == BTRFS_FREE_SPACE_OBJECTID)
2541 inode->i_mapping->flags &= ~__GFP_FS;
2542 2540
2543 /* 2541 /*
2544 * try to precache a NULL acl entry for files that don't have 2542 * try to precache a NULL acl entry for files that don't have
@@ -2595,6 +2593,13 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2595 struct btrfs_inode_item *item, 2593 struct btrfs_inode_item *item,
2596 struct inode *inode) 2594 struct inode *inode)
2597{ 2595{
2596 if (!leaf->map_token)
2597 map_private_extent_buffer(leaf, (unsigned long)item,
2598 sizeof(struct btrfs_inode_item),
2599 &leaf->map_token, &leaf->kaddr,
2600 &leaf->map_start, &leaf->map_len,
2601 KM_USER1);
2602
2598 btrfs_set_inode_uid(leaf, item, inode->i_uid); 2603 btrfs_set_inode_uid(leaf, item, inode->i_uid);
2599 btrfs_set_inode_gid(leaf, item, inode->i_gid); 2604 btrfs_set_inode_gid(leaf, item, inode->i_gid);
2600 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); 2605 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
@@ -2623,6 +2628,11 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2623 btrfs_set_inode_rdev(leaf, item, inode->i_rdev); 2628 btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
2624 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); 2629 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
2625 btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group); 2630 btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group);
2631
2632 if (leaf->map_token) {
2633 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2634 leaf->map_token = NULL;
2635 }
2626} 2636}
2627 2637
2628/* 2638/*
@@ -4212,10 +4222,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4212 struct btrfs_key found_key; 4222 struct btrfs_key found_key;
4213 struct btrfs_path *path; 4223 struct btrfs_path *path;
4214 int ret; 4224 int ret;
4215 u32 nritems;
4216 struct extent_buffer *leaf; 4225 struct extent_buffer *leaf;
4217 int slot; 4226 int slot;
4218 int advance;
4219 unsigned char d_type; 4227 unsigned char d_type;
4220 int over = 0; 4228 int over = 0;
4221 u32 di_cur; 4229 u32 di_cur;
@@ -4258,27 +4266,19 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4258 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4266 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4259 if (ret < 0) 4267 if (ret < 0)
4260 goto err; 4268 goto err;
4261 advance = 0;
4262 4269
4263 while (1) { 4270 while (1) {
4264 leaf = path->nodes[0]; 4271 leaf = path->nodes[0];
4265 nritems = btrfs_header_nritems(leaf);
4266 slot = path->slots[0]; 4272 slot = path->slots[0];
4267 if (advance || slot >= nritems) { 4273 if (slot >= btrfs_header_nritems(leaf)) {
4268 if (slot >= nritems - 1) { 4274 ret = btrfs_next_leaf(root, path);
4269 ret = btrfs_next_leaf(root, path); 4275 if (ret < 0)
4270 if (ret) 4276 goto err;
4271 break; 4277 else if (ret > 0)
4272 leaf = path->nodes[0]; 4278 break;
4273 nritems = btrfs_header_nritems(leaf); 4279 continue;
4274 slot = path->slots[0];
4275 } else {
4276 slot++;
4277 path->slots[0]++;
4278 }
4279 } 4280 }
4280 4281
4281 advance = 1;
4282 item = btrfs_item_nr(leaf, slot); 4282 item = btrfs_item_nr(leaf, slot);
4283 btrfs_item_key_to_cpu(leaf, &found_key, slot); 4283 btrfs_item_key_to_cpu(leaf, &found_key, slot);
4284 4284
@@ -4287,7 +4287,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
4287 if (btrfs_key_type(&found_key) != key_type) 4287 if (btrfs_key_type(&found_key) != key_type)
4288 break; 4288 break;
4289 if (found_key.offset < filp->f_pos) 4289 if (found_key.offset < filp->f_pos)
4290 continue; 4290 goto next;
4291 4291
4292 filp->f_pos = found_key.offset; 4292 filp->f_pos = found_key.offset;
4293 4293
@@ -4340,6 +4340,8 @@ skip:
4340 di_cur += di_len; 4340 di_cur += di_len;
4341 di = (struct btrfs_dir_item *)((char *)di + di_len); 4341 di = (struct btrfs_dir_item *)((char *)di + di_len);
4342 } 4342 }
4343next:
4344 path->slots[0]++;
4343 } 4345 }
4344 4346
4345 /* Reached end of directory/root. Bump pos past the last item. */ 4347 /* Reached end of directory/root. Bump pos past the last item. */
@@ -4532,14 +4534,17 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4532 BUG_ON(!path); 4534 BUG_ON(!path);
4533 4535
4534 inode = new_inode(root->fs_info->sb); 4536 inode = new_inode(root->fs_info->sb);
4535 if (!inode) 4537 if (!inode) {
4538 btrfs_free_path(path);
4536 return ERR_PTR(-ENOMEM); 4539 return ERR_PTR(-ENOMEM);
4540 }
4537 4541
4538 if (dir) { 4542 if (dir) {
4539 trace_btrfs_inode_request(dir); 4543 trace_btrfs_inode_request(dir);
4540 4544
4541 ret = btrfs_set_inode_index(dir, index); 4545 ret = btrfs_set_inode_index(dir, index);
4542 if (ret) { 4546 if (ret) {
4547 btrfs_free_path(path);
4543 iput(inode); 4548 iput(inode);
4544 return ERR_PTR(ret); 4549 return ERR_PTR(ret);
4545 } 4550 }
@@ -4839,9 +4844,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4839 if (inode->i_nlink == ~0U) 4844 if (inode->i_nlink == ~0U)
4840 return -EMLINK; 4845 return -EMLINK;
4841 4846
4842 btrfs_inc_nlink(inode);
4843 inode->i_ctime = CURRENT_TIME;
4844
4845 err = btrfs_set_inode_index(dir, &index); 4847 err = btrfs_set_inode_index(dir, &index);
4846 if (err) 4848 if (err)
4847 goto fail; 4849 goto fail;
@@ -4857,6 +4859,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4857 goto fail; 4859 goto fail;
4858 } 4860 }
4859 4861
4862 btrfs_inc_nlink(inode);
4863 inode->i_ctime = CURRENT_TIME;
4864
4860 btrfs_set_trans_block_group(trans, dir); 4865 btrfs_set_trans_block_group(trans, dir);
4861 ihold(inode); 4866 ihold(inode);
4862 4867
@@ -5226,7 +5231,7 @@ again:
5226 btrfs_mark_buffer_dirty(leaf); 5231 btrfs_mark_buffer_dirty(leaf);
5227 } 5232 }
5228 set_extent_uptodate(io_tree, em->start, 5233 set_extent_uptodate(io_tree, em->start,
5229 extent_map_end(em) - 1, GFP_NOFS); 5234 extent_map_end(em) - 1, NULL, GFP_NOFS);
5230 goto insert; 5235 goto insert;
5231 } else { 5236 } else {
5232 printk(KERN_ERR "btrfs unknown found_type %d\n", found_type); 5237 printk(KERN_ERR "btrfs unknown found_type %d\n", found_type);
@@ -5433,17 +5438,30 @@ out:
5433} 5438}
5434 5439
5435static struct extent_map *btrfs_new_extent_direct(struct inode *inode, 5440static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5441 struct extent_map *em,
5436 u64 start, u64 len) 5442 u64 start, u64 len)
5437{ 5443{
5438 struct btrfs_root *root = BTRFS_I(inode)->root; 5444 struct btrfs_root *root = BTRFS_I(inode)->root;
5439 struct btrfs_trans_handle *trans; 5445 struct btrfs_trans_handle *trans;
5440 struct extent_map *em;
5441 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 5446 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
5442 struct btrfs_key ins; 5447 struct btrfs_key ins;
5443 u64 alloc_hint; 5448 u64 alloc_hint;
5444 int ret; 5449 int ret;
5450 bool insert = false;
5445 5451
5446 btrfs_drop_extent_cache(inode, start, start + len - 1, 0); 5452 /*
5453 * Ok if the extent map we looked up is a hole and is for the exact
5454 * range we want, there is no reason to allocate a new one, however if
5455 * it is not right then we need to free this one and drop the cache for
5456 * our range.
5457 */
5458 if (em->block_start != EXTENT_MAP_HOLE || em->start != start ||
5459 em->len != len) {
5460 free_extent_map(em);
5461 em = NULL;
5462 insert = true;
5463 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
5464 }
5447 5465
5448 trans = btrfs_join_transaction(root, 0); 5466 trans = btrfs_join_transaction(root, 0);
5449 if (IS_ERR(trans)) 5467 if (IS_ERR(trans))
@@ -5459,10 +5477,12 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5459 goto out; 5477 goto out;
5460 } 5478 }
5461 5479
5462 em = alloc_extent_map(GFP_NOFS);
5463 if (!em) { 5480 if (!em) {
5464 em = ERR_PTR(-ENOMEM); 5481 em = alloc_extent_map(GFP_NOFS);
5465 goto out; 5482 if (!em) {
5483 em = ERR_PTR(-ENOMEM);
5484 goto out;
5485 }
5466 } 5486 }
5467 5487
5468 em->start = start; 5488 em->start = start;
@@ -5472,9 +5492,15 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5472 em->block_start = ins.objectid; 5492 em->block_start = ins.objectid;
5473 em->block_len = ins.offset; 5493 em->block_len = ins.offset;
5474 em->bdev = root->fs_info->fs_devices->latest_bdev; 5494 em->bdev = root->fs_info->fs_devices->latest_bdev;
5495
5496 /*
5497 * We need to do this because if we're using the original em we searched
5498 * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that.
5499 */
5500 em->flags = 0;
5475 set_bit(EXTENT_FLAG_PINNED, &em->flags); 5501 set_bit(EXTENT_FLAG_PINNED, &em->flags);
5476 5502
5477 while (1) { 5503 while (insert) {
5478 write_lock(&em_tree->lock); 5504 write_lock(&em_tree->lock);
5479 ret = add_extent_mapping(em_tree, em); 5505 ret = add_extent_mapping(em_tree, em);
5480 write_unlock(&em_tree->lock); 5506 write_unlock(&em_tree->lock);
@@ -5692,8 +5718,7 @@ must_cow:
5692 * it above 5718 * it above
5693 */ 5719 */
5694 len = bh_result->b_size; 5720 len = bh_result->b_size;
5695 free_extent_map(em); 5721 em = btrfs_new_extent_direct(inode, em, start, len);
5696 em = btrfs_new_extent_direct(inode, start, len);
5697 if (IS_ERR(em)) 5722 if (IS_ERR(em))
5698 return PTR_ERR(em); 5723 return PTR_ERR(em);
5699 len = min(len, em->len - (start - em->start)); 5724 len = min(len, em->len - (start - em->start));
@@ -5856,8 +5881,10 @@ again:
5856 } 5881 }
5857 5882
5858 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); 5883 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
5859 btrfs_ordered_update_i_size(inode, 0, ordered); 5884 ret = btrfs_ordered_update_i_size(inode, 0, ordered);
5860 btrfs_update_inode(trans, root, inode); 5885 if (!ret)
5886 btrfs_update_inode(trans, root, inode);
5887 ret = 0;
5861out_unlock: 5888out_unlock:
5862 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, 5889 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5863 ordered->file_offset + ordered->len - 1, 5890 ordered->file_offset + ordered->len - 1,
@@ -5943,7 +5970,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
5943 5970
5944static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, 5971static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5945 int rw, u64 file_offset, int skip_sum, 5972 int rw, u64 file_offset, int skip_sum,
5946 u32 *csums) 5973 u32 *csums, int async_submit)
5947{ 5974{
5948 int write = rw & REQ_WRITE; 5975 int write = rw & REQ_WRITE;
5949 struct btrfs_root *root = BTRFS_I(inode)->root; 5976 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5954,13 +5981,24 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5954 if (ret) 5981 if (ret)
5955 goto err; 5982 goto err;
5956 5983
5957 if (write && !skip_sum) { 5984 if (skip_sum)
5985 goto map;
5986
5987 if (write && async_submit) {
5958 ret = btrfs_wq_submit_bio(root->fs_info, 5988 ret = btrfs_wq_submit_bio(root->fs_info,
5959 inode, rw, bio, 0, 0, 5989 inode, rw, bio, 0, 0,
5960 file_offset, 5990 file_offset,
5961 __btrfs_submit_bio_start_direct_io, 5991 __btrfs_submit_bio_start_direct_io,
5962 __btrfs_submit_bio_done); 5992 __btrfs_submit_bio_done);
5963 goto err; 5993 goto err;
5994 } else if (write) {
5995 /*
5996 * If we aren't doing async submit, calculate the csum of the
5997 * bio now.
5998 */
5999 ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
6000 if (ret)
6001 goto err;
5964 } else if (!skip_sum) { 6002 } else if (!skip_sum) {
5965 ret = btrfs_lookup_bio_sums_dio(root, inode, bio, 6003 ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
5966 file_offset, csums); 6004 file_offset, csums);
@@ -5968,7 +6006,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5968 goto err; 6006 goto err;
5969 } 6007 }
5970 6008
5971 ret = btrfs_map_bio(root, rw, bio, 0, 1); 6009map:
6010 ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
5972err: 6011err:
5973 bio_put(bio); 6012 bio_put(bio);
5974 return ret; 6013 return ret;
@@ -5990,15 +6029,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
5990 int nr_pages = 0; 6029 int nr_pages = 0;
5991 u32 *csums = dip->csums; 6030 u32 *csums = dip->csums;
5992 int ret = 0; 6031 int ret = 0;
6032 int async_submit = 0;
5993 int write = rw & REQ_WRITE; 6033 int write = rw & REQ_WRITE;
5994 6034
5995 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
5996 if (!bio)
5997 return -ENOMEM;
5998 bio->bi_private = dip;
5999 bio->bi_end_io = btrfs_end_dio_bio;
6000 atomic_inc(&dip->pending_bios);
6001
6002 map_length = orig_bio->bi_size; 6035 map_length = orig_bio->bi_size;
6003 ret = btrfs_map_block(map_tree, READ, start_sector << 9, 6036 ret = btrfs_map_block(map_tree, READ, start_sector << 9,
6004 &map_length, NULL, 0); 6037 &map_length, NULL, 0);
@@ -6007,6 +6040,19 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6007 return -EIO; 6040 return -EIO;
6008 } 6041 }
6009 6042
6043 if (map_length >= orig_bio->bi_size) {
6044 bio = orig_bio;
6045 goto submit;
6046 }
6047
6048 async_submit = 1;
6049 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
6050 if (!bio)
6051 return -ENOMEM;
6052 bio->bi_private = dip;
6053 bio->bi_end_io = btrfs_end_dio_bio;
6054 atomic_inc(&dip->pending_bios);
6055
6010 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) { 6056 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
6011 if (unlikely(map_length < submit_len + bvec->bv_len || 6057 if (unlikely(map_length < submit_len + bvec->bv_len ||
6012 bio_add_page(bio, bvec->bv_page, bvec->bv_len, 6058 bio_add_page(bio, bvec->bv_page, bvec->bv_len,
@@ -6020,7 +6066,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6020 atomic_inc(&dip->pending_bios); 6066 atomic_inc(&dip->pending_bios);
6021 ret = __btrfs_submit_dio_bio(bio, inode, rw, 6067 ret = __btrfs_submit_dio_bio(bio, inode, rw,
6022 file_offset, skip_sum, 6068 file_offset, skip_sum,
6023 csums); 6069 csums, async_submit);
6024 if (ret) { 6070 if (ret) {
6025 bio_put(bio); 6071 bio_put(bio);
6026 atomic_dec(&dip->pending_bios); 6072 atomic_dec(&dip->pending_bios);
@@ -6057,8 +6103,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6057 } 6103 }
6058 } 6104 }
6059 6105
6106submit:
6060 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, 6107 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
6061 csums); 6108 csums, async_submit);
6062 if (!ret) 6109 if (!ret)
6063 return 0; 6110 return 0;
6064 6111
@@ -6153,6 +6200,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
6153 unsigned long nr_segs) 6200 unsigned long nr_segs)
6154{ 6201{
6155 int seg; 6202 int seg;
6203 int i;
6156 size_t size; 6204 size_t size;
6157 unsigned long addr; 6205 unsigned long addr;
6158 unsigned blocksize_mask = root->sectorsize - 1; 6206 unsigned blocksize_mask = root->sectorsize - 1;
@@ -6167,8 +6215,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
6167 addr = (unsigned long)iov[seg].iov_base; 6215 addr = (unsigned long)iov[seg].iov_base;
6168 size = iov[seg].iov_len; 6216 size = iov[seg].iov_len;
6169 end += size; 6217 end += size;
6170 if ((addr & blocksize_mask) || (size & blocksize_mask)) 6218 if ((addr & blocksize_mask) || (size & blocksize_mask))
6171 goto out; 6219 goto out;
6220
6221 /* If this is a write we don't need to check anymore */
6222 if (rw & WRITE)
6223 continue;
6224
6225 /*
6226 * Check to make sure we don't have duplicate iov_base's in this
6227 * iovec, if so return EINVAL, otherwise we'll get csum errors
6228 * when reading back.
6229 */
6230 for (i = seg + 1; i < nr_segs; i++) {
6231 if (iov[seg].iov_base == iov[i].iov_base)
6232 goto out;
6233 }
6172 } 6234 }
6173 retval = 0; 6235 retval = 0;
6174out: 6236out:
@@ -6960,8 +7022,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6960 * should cover the worst case number of items we'll modify. 7022 * should cover the worst case number of items we'll modify.
6961 */ 7023 */
6962 trans = btrfs_start_transaction(root, 20); 7024 trans = btrfs_start_transaction(root, 20);
6963 if (IS_ERR(trans)) 7025 if (IS_ERR(trans)) {
6964 return PTR_ERR(trans); 7026 ret = PTR_ERR(trans);
7027 goto out_notrans;
7028 }
6965 7029
6966 btrfs_set_trans_block_group(trans, new_dir); 7030 btrfs_set_trans_block_group(trans, new_dir);
6967 7031
@@ -7061,7 +7125,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7061 } 7125 }
7062out_fail: 7126out_fail:
7063 btrfs_end_transaction_throttle(trans, root); 7127 btrfs_end_transaction_throttle(trans, root);
7064 7128out_notrans:
7065 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 7129 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
7066 up_read(&root->fs_info->subvol_sem); 7130 up_read(&root->fs_info->subvol_sem);
7067 7131
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7c07fe26b7cf..ffb48d6c5433 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -373,6 +373,10 @@ static noinline int create_subvol(struct btrfs_root *root,
373 inode_item->nbytes = cpu_to_le64(root->leafsize); 373 inode_item->nbytes = cpu_to_le64(root->leafsize);
374 inode_item->mode = cpu_to_le32(S_IFDIR | 0755); 374 inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
375 375
376 root_item.flags = 0;
377 root_item.byte_limit = 0;
378 inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT);
379
376 btrfs_set_root_bytenr(&root_item, leaf->start); 380 btrfs_set_root_bytenr(&root_item, leaf->start);
377 btrfs_set_root_generation(&root_item, trans->transid); 381 btrfs_set_root_generation(&root_item, trans->transid);
378 btrfs_set_root_level(&root_item, 0); 382 btrfs_set_root_level(&root_item, 0);
@@ -2283,7 +2287,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
2283 struct btrfs_ioctl_space_info space; 2287 struct btrfs_ioctl_space_info space;
2284 struct btrfs_ioctl_space_info *dest; 2288 struct btrfs_ioctl_space_info *dest;
2285 struct btrfs_ioctl_space_info *dest_orig; 2289 struct btrfs_ioctl_space_info *dest_orig;
2286 struct btrfs_ioctl_space_info *user_dest; 2290 struct btrfs_ioctl_space_info __user *user_dest;
2287 struct btrfs_space_info *info; 2291 struct btrfs_space_info *info;
2288 u64 types[] = {BTRFS_BLOCK_GROUP_DATA, 2292 u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
2289 BTRFS_BLOCK_GROUP_SYSTEM, 2293 BTRFS_BLOCK_GROUP_SYSTEM,
@@ -2436,8 +2440,10 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp
2436 return PTR_ERR(trans); 2440 return PTR_ERR(trans);
2437 transid = trans->transid; 2441 transid = trans->transid;
2438 ret = btrfs_commit_transaction_async(trans, root, 0); 2442 ret = btrfs_commit_transaction_async(trans, root, 0);
2439 if (ret) 2443 if (ret) {
2444 btrfs_end_transaction(trans, root);
2440 return ret; 2445 return ret;
2446 }
2441 2447
2442 if (argp) 2448 if (argp)
2443 if (copy_to_user(argp, &transid, sizeof(transid))) 2449 if (copy_to_user(argp, &transid, sizeof(transid)))
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 58250e09eb05..199a80134312 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2346,7 +2346,7 @@ struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans,
2346 root = next->root; 2346 root = next->root;
2347 BUG_ON(!root); 2347 BUG_ON(!root);
2348 2348
2349 /* no other choice for non-refernce counted tree */ 2349 /* no other choice for non-references counted tree */
2350 if (!root->ref_cows) 2350 if (!root->ref_cows)
2351 return root; 2351 return root;
2352 2352
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 29b2d7c930eb..6928bff62daa 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -473,3 +473,21 @@ again:
473 btrfs_free_path(path); 473 btrfs_free_path(path);
474 return 0; 474 return 0;
475} 475}
476
477/*
478 * Old btrfs forgets to init root_item->flags and root_item->byte_limit
479 * for subvolumes. To work around this problem, we steal a bit from
480 * root_item->inode_item->flags, and use it to indicate if those fields
481 * have been properly initialized.
482 */
483void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item)
484{
485 u64 inode_flags = le64_to_cpu(root_item->inode.flags);
486
487 if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) {
488 inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT;
489 root_item->inode.flags = cpu_to_le64(inode_flags);
490 root_item->flags = 0;
491 root_item->byte_limit = 0;
492 }
493}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2edfc039f098..0ac712efcdf2 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -159,7 +159,7 @@ enum {
159 Opt_compress_type, Opt_compress_force, Opt_compress_force_type, 159 Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
160 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, 160 Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
161 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, 161 Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
162 Opt_enospc_debug, Opt_err, 162 Opt_enospc_debug, Opt_subvolrootid, Opt_err,
163}; 163};
164 164
165static match_table_t tokens = { 165static match_table_t tokens = {
@@ -189,6 +189,7 @@ static match_table_t tokens = {
189 {Opt_clear_cache, "clear_cache"}, 189 {Opt_clear_cache, "clear_cache"},
190 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 190 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
191 {Opt_enospc_debug, "enospc_debug"}, 191 {Opt_enospc_debug, "enospc_debug"},
192 {Opt_subvolrootid, "subvolrootid=%d"},
192 {Opt_err, NULL}, 193 {Opt_err, NULL},
193}; 194};
194 195
@@ -232,6 +233,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
232 break; 233 break;
233 case Opt_subvol: 234 case Opt_subvol:
234 case Opt_subvolid: 235 case Opt_subvolid:
236 case Opt_subvolrootid:
235 case Opt_device: 237 case Opt_device:
236 /* 238 /*
237 * These are parsed by btrfs_parse_early_options 239 * These are parsed by btrfs_parse_early_options
@@ -388,7 +390,7 @@ out:
388 */ 390 */
389static int btrfs_parse_early_options(const char *options, fmode_t flags, 391static int btrfs_parse_early_options(const char *options, fmode_t flags,
390 void *holder, char **subvol_name, u64 *subvol_objectid, 392 void *holder, char **subvol_name, u64 *subvol_objectid,
391 struct btrfs_fs_devices **fs_devices) 393 u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices)
392{ 394{
393 substring_t args[MAX_OPT_ARGS]; 395 substring_t args[MAX_OPT_ARGS];
394 char *opts, *orig, *p; 396 char *opts, *orig, *p;
@@ -429,6 +431,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
429 *subvol_objectid = intarg; 431 *subvol_objectid = intarg;
430 } 432 }
431 break; 433 break;
434 case Opt_subvolrootid:
435 intarg = 0;
436 error = match_int(&args[0], &intarg);
437 if (!error) {
438 /* we want the original fs_tree */
439 if (!intarg)
440 *subvol_rootid =
441 BTRFS_FS_TREE_OBJECTID;
442 else
443 *subvol_rootid = intarg;
444 }
445 break;
432 case Opt_device: 446 case Opt_device:
433 error = btrfs_scan_one_device(match_strdup(&args[0]), 447 error = btrfs_scan_one_device(match_strdup(&args[0]),
434 flags, holder, fs_devices); 448 flags, holder, fs_devices);
@@ -644,6 +658,7 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
644{ 658{
645 struct btrfs_root *root = btrfs_sb(vfs->mnt_sb); 659 struct btrfs_root *root = btrfs_sb(vfs->mnt_sb);
646 struct btrfs_fs_info *info = root->fs_info; 660 struct btrfs_fs_info *info = root->fs_info;
661 char *compress_type;
647 662
648 if (btrfs_test_opt(root, DEGRADED)) 663 if (btrfs_test_opt(root, DEGRADED))
649 seq_puts(seq, ",degraded"); 664 seq_puts(seq, ",degraded");
@@ -662,8 +677,16 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
662 if (info->thread_pool_size != min_t(unsigned long, 677 if (info->thread_pool_size != min_t(unsigned long,
663 num_online_cpus() + 2, 8)) 678 num_online_cpus() + 2, 8))
664 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); 679 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
665 if (btrfs_test_opt(root, COMPRESS)) 680 if (btrfs_test_opt(root, COMPRESS)) {
666 seq_puts(seq, ",compress"); 681 if (info->compress_type == BTRFS_COMPRESS_ZLIB)
682 compress_type = "zlib";
683 else
684 compress_type = "lzo";
685 if (btrfs_test_opt(root, FORCE_COMPRESS))
686 seq_printf(seq, ",compress-force=%s", compress_type);
687 else
688 seq_printf(seq, ",compress=%s", compress_type);
689 }
667 if (btrfs_test_opt(root, NOSSD)) 690 if (btrfs_test_opt(root, NOSSD))
668 seq_puts(seq, ",nossd"); 691 seq_puts(seq, ",nossd");
669 if (btrfs_test_opt(root, SSD_SPREAD)) 692 if (btrfs_test_opt(root, SSD_SPREAD))
@@ -678,6 +701,12 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
678 seq_puts(seq, ",discard"); 701 seq_puts(seq, ",discard");
679 if (!(root->fs_info->sb->s_flags & MS_POSIXACL)) 702 if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
680 seq_puts(seq, ",noacl"); 703 seq_puts(seq, ",noacl");
704 if (btrfs_test_opt(root, SPACE_CACHE))
705 seq_puts(seq, ",space_cache");
706 if (btrfs_test_opt(root, CLEAR_CACHE))
707 seq_puts(seq, ",clear_cache");
708 if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
709 seq_puts(seq, ",user_subvol_rm_allowed");
681 return 0; 710 return 0;
682} 711}
683 712
@@ -721,6 +750,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
721 fmode_t mode = FMODE_READ; 750 fmode_t mode = FMODE_READ;
722 char *subvol_name = NULL; 751 char *subvol_name = NULL;
723 u64 subvol_objectid = 0; 752 u64 subvol_objectid = 0;
753 u64 subvol_rootid = 0;
724 int error = 0; 754 int error = 0;
725 755
726 if (!(flags & MS_RDONLY)) 756 if (!(flags & MS_RDONLY))
@@ -728,7 +758,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
728 758
729 error = btrfs_parse_early_options(data, mode, fs_type, 759 error = btrfs_parse_early_options(data, mode, fs_type,
730 &subvol_name, &subvol_objectid, 760 &subvol_name, &subvol_objectid,
731 &fs_devices); 761 &subvol_rootid, &fs_devices);
732 if (error) 762 if (error)
733 return ERR_PTR(error); 763 return ERR_PTR(error);
734 764
@@ -792,15 +822,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
792 s->s_flags |= MS_ACTIVE; 822 s->s_flags |= MS_ACTIVE;
793 } 823 }
794 824
795 root = get_default_root(s, subvol_objectid);
796 if (IS_ERR(root)) {
797 error = PTR_ERR(root);
798 deactivate_locked_super(s);
799 goto error_free_subvol_name;
800 }
801 /* if they gave us a subvolume name bind mount into that */ 825 /* if they gave us a subvolume name bind mount into that */
802 if (strcmp(subvol_name, ".")) { 826 if (strcmp(subvol_name, ".")) {
803 struct dentry *new_root; 827 struct dentry *new_root;
828
829 root = get_default_root(s, subvol_rootid);
830 if (IS_ERR(root)) {
831 error = PTR_ERR(root);
832 deactivate_locked_super(s);
833 goto error_free_subvol_name;
834 }
835
804 mutex_lock(&root->d_inode->i_mutex); 836 mutex_lock(&root->d_inode->i_mutex);
805 new_root = lookup_one_len(subvol_name, root, 837 new_root = lookup_one_len(subvol_name, root,
806 strlen(subvol_name)); 838 strlen(subvol_name));
@@ -821,6 +853,13 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
821 } 853 }
822 dput(root); 854 dput(root);
823 root = new_root; 855 root = new_root;
856 } else {
857 root = get_default_root(s, subvol_objectid);
858 if (IS_ERR(root)) {
859 error = PTR_ERR(root);
860 deactivate_locked_super(s);
861 goto error_free_subvol_name;
862 }
824 } 863 }
825 864
826 kfree(subvol_name); 865 kfree(subvol_name);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index ce48eb59d615..c571734d5e5a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -32,10 +32,8 @@
32 32
33static noinline void put_transaction(struct btrfs_transaction *transaction) 33static noinline void put_transaction(struct btrfs_transaction *transaction)
34{ 34{
35 WARN_ON(transaction->use_count == 0); 35 WARN_ON(atomic_read(&transaction->use_count) == 0);
36 transaction->use_count--; 36 if (atomic_dec_and_test(&transaction->use_count)) {
37 if (transaction->use_count == 0) {
38 list_del_init(&transaction->list);
39 memset(transaction, 0, sizeof(*transaction)); 37 memset(transaction, 0, sizeof(*transaction));
40 kmem_cache_free(btrfs_transaction_cachep, transaction); 38 kmem_cache_free(btrfs_transaction_cachep, transaction);
41 } 39 }
@@ -60,14 +58,14 @@ static noinline int join_transaction(struct btrfs_root *root)
60 if (!cur_trans) 58 if (!cur_trans)
61 return -ENOMEM; 59 return -ENOMEM;
62 root->fs_info->generation++; 60 root->fs_info->generation++;
63 cur_trans->num_writers = 1; 61 atomic_set(&cur_trans->num_writers, 1);
64 cur_trans->num_joined = 0; 62 cur_trans->num_joined = 0;
65 cur_trans->transid = root->fs_info->generation; 63 cur_trans->transid = root->fs_info->generation;
66 init_waitqueue_head(&cur_trans->writer_wait); 64 init_waitqueue_head(&cur_trans->writer_wait);
67 init_waitqueue_head(&cur_trans->commit_wait); 65 init_waitqueue_head(&cur_trans->commit_wait);
68 cur_trans->in_commit = 0; 66 cur_trans->in_commit = 0;
69 cur_trans->blocked = 0; 67 cur_trans->blocked = 0;
70 cur_trans->use_count = 1; 68 atomic_set(&cur_trans->use_count, 1);
71 cur_trans->commit_done = 0; 69 cur_trans->commit_done = 0;
72 cur_trans->start_time = get_seconds(); 70 cur_trans->start_time = get_seconds();
73 71
@@ -88,7 +86,7 @@ static noinline int join_transaction(struct btrfs_root *root)
88 root->fs_info->running_transaction = cur_trans; 86 root->fs_info->running_transaction = cur_trans;
89 spin_unlock(&root->fs_info->new_trans_lock); 87 spin_unlock(&root->fs_info->new_trans_lock);
90 } else { 88 } else {
91 cur_trans->num_writers++; 89 atomic_inc(&cur_trans->num_writers);
92 cur_trans->num_joined++; 90 cur_trans->num_joined++;
93 } 91 }
94 92
@@ -145,7 +143,7 @@ static void wait_current_trans(struct btrfs_root *root)
145 cur_trans = root->fs_info->running_transaction; 143 cur_trans = root->fs_info->running_transaction;
146 if (cur_trans && cur_trans->blocked) { 144 if (cur_trans && cur_trans->blocked) {
147 DEFINE_WAIT(wait); 145 DEFINE_WAIT(wait);
148 cur_trans->use_count++; 146 atomic_inc(&cur_trans->use_count);
149 while (1) { 147 while (1) {
150 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 148 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
151 TASK_UNINTERRUPTIBLE); 149 TASK_UNINTERRUPTIBLE);
@@ -181,6 +179,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
181{ 179{
182 struct btrfs_trans_handle *h; 180 struct btrfs_trans_handle *h;
183 struct btrfs_transaction *cur_trans; 181 struct btrfs_transaction *cur_trans;
182 int retries = 0;
184 int ret; 183 int ret;
185 184
186 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 185 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
@@ -197,13 +196,14 @@ again:
197 196
198 ret = join_transaction(root); 197 ret = join_transaction(root);
199 if (ret < 0) { 198 if (ret < 0) {
199 kmem_cache_free(btrfs_trans_handle_cachep, h);
200 if (type != TRANS_JOIN_NOLOCK) 200 if (type != TRANS_JOIN_NOLOCK)
201 mutex_unlock(&root->fs_info->trans_mutex); 201 mutex_unlock(&root->fs_info->trans_mutex);
202 return ERR_PTR(ret); 202 return ERR_PTR(ret);
203 } 203 }
204 204
205 cur_trans = root->fs_info->running_transaction; 205 cur_trans = root->fs_info->running_transaction;
206 cur_trans->use_count++; 206 atomic_inc(&cur_trans->use_count);
207 if (type != TRANS_JOIN_NOLOCK) 207 if (type != TRANS_JOIN_NOLOCK)
208 mutex_unlock(&root->fs_info->trans_mutex); 208 mutex_unlock(&root->fs_info->trans_mutex);
209 209
@@ -223,10 +223,18 @@ again:
223 223
224 if (num_items > 0) { 224 if (num_items > 0) {
225 ret = btrfs_trans_reserve_metadata(h, root, num_items); 225 ret = btrfs_trans_reserve_metadata(h, root, num_items);
226 if (ret == -EAGAIN) { 226 if (ret == -EAGAIN && !retries) {
227 retries++;
227 btrfs_commit_transaction(h, root); 228 btrfs_commit_transaction(h, root);
228 goto again; 229 goto again;
230 } else if (ret == -EAGAIN) {
231 /*
232 * We have already retried and got EAGAIN, so really we
233 * don't have space, so set ret to -ENOSPC.
234 */
235 ret = -ENOSPC;
229 } 236 }
237
230 if (ret < 0) { 238 if (ret < 0) {
231 btrfs_end_transaction(h, root); 239 btrfs_end_transaction(h, root);
232 return ERR_PTR(ret); 240 return ERR_PTR(ret);
@@ -326,7 +334,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
326 goto out_unlock; /* nothing committing|committed */ 334 goto out_unlock; /* nothing committing|committed */
327 } 335 }
328 336
329 cur_trans->use_count++; 337 atomic_inc(&cur_trans->use_count);
330 mutex_unlock(&root->fs_info->trans_mutex); 338 mutex_unlock(&root->fs_info->trans_mutex);
331 339
332 wait_for_commit(root, cur_trans); 340 wait_for_commit(root, cur_trans);
@@ -456,18 +464,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
456 wake_up_process(info->transaction_kthread); 464 wake_up_process(info->transaction_kthread);
457 } 465 }
458 466
459 if (lock)
460 mutex_lock(&info->trans_mutex);
461 WARN_ON(cur_trans != info->running_transaction); 467 WARN_ON(cur_trans != info->running_transaction);
462 WARN_ON(cur_trans->num_writers < 1); 468 WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
463 cur_trans->num_writers--; 469 atomic_dec(&cur_trans->num_writers);
464 470
465 smp_mb(); 471 smp_mb();
466 if (waitqueue_active(&cur_trans->writer_wait)) 472 if (waitqueue_active(&cur_trans->writer_wait))
467 wake_up(&cur_trans->writer_wait); 473 wake_up(&cur_trans->writer_wait);
468 put_transaction(cur_trans); 474 put_transaction(cur_trans);
469 if (lock)
470 mutex_unlock(&info->trans_mutex);
471 475
472 if (current->journal_info == trans) 476 if (current->journal_info == trans)
473 current->journal_info = NULL; 477 current->journal_info = NULL;
@@ -975,6 +979,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
975 record_root_in_trans(trans, root); 979 record_root_in_trans(trans, root);
976 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 980 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
977 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 981 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
982 btrfs_check_and_init_root_item(new_root_item);
978 983
979 root_flags = btrfs_root_flags(new_root_item); 984 root_flags = btrfs_root_flags(new_root_item);
980 if (pending->readonly) 985 if (pending->readonly)
@@ -1176,7 +1181,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1176 /* take transaction reference */ 1181 /* take transaction reference */
1177 mutex_lock(&root->fs_info->trans_mutex); 1182 mutex_lock(&root->fs_info->trans_mutex);
1178 cur_trans = trans->transaction; 1183 cur_trans = trans->transaction;
1179 cur_trans->use_count++; 1184 atomic_inc(&cur_trans->use_count);
1180 mutex_unlock(&root->fs_info->trans_mutex); 1185 mutex_unlock(&root->fs_info->trans_mutex);
1181 1186
1182 btrfs_end_transaction(trans, root); 1187 btrfs_end_transaction(trans, root);
@@ -1235,7 +1240,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1235 1240
1236 mutex_lock(&root->fs_info->trans_mutex); 1241 mutex_lock(&root->fs_info->trans_mutex);
1237 if (cur_trans->in_commit) { 1242 if (cur_trans->in_commit) {
1238 cur_trans->use_count++; 1243 atomic_inc(&cur_trans->use_count);
1239 mutex_unlock(&root->fs_info->trans_mutex); 1244 mutex_unlock(&root->fs_info->trans_mutex);
1240 btrfs_end_transaction(trans, root); 1245 btrfs_end_transaction(trans, root);
1241 1246
@@ -1257,7 +1262,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1257 prev_trans = list_entry(cur_trans->list.prev, 1262 prev_trans = list_entry(cur_trans->list.prev,
1258 struct btrfs_transaction, list); 1263 struct btrfs_transaction, list);
1259 if (!prev_trans->commit_done) { 1264 if (!prev_trans->commit_done) {
1260 prev_trans->use_count++; 1265 atomic_inc(&prev_trans->use_count);
1261 mutex_unlock(&root->fs_info->trans_mutex); 1266 mutex_unlock(&root->fs_info->trans_mutex);
1262 1267
1263 wait_for_commit(root, prev_trans); 1268 wait_for_commit(root, prev_trans);
@@ -1298,14 +1303,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1298 TASK_UNINTERRUPTIBLE); 1303 TASK_UNINTERRUPTIBLE);
1299 1304
1300 smp_mb(); 1305 smp_mb();
1301 if (cur_trans->num_writers > 1) 1306 if (atomic_read(&cur_trans->num_writers) > 1)
1302 schedule_timeout(MAX_SCHEDULE_TIMEOUT); 1307 schedule_timeout(MAX_SCHEDULE_TIMEOUT);
1303 else if (should_grow) 1308 else if (should_grow)
1304 schedule_timeout(1); 1309 schedule_timeout(1);
1305 1310
1306 mutex_lock(&root->fs_info->trans_mutex); 1311 mutex_lock(&root->fs_info->trans_mutex);
1307 finish_wait(&cur_trans->writer_wait, &wait); 1312 finish_wait(&cur_trans->writer_wait, &wait);
1308 } while (cur_trans->num_writers > 1 || 1313 } while (atomic_read(&cur_trans->num_writers) > 1 ||
1309 (should_grow && cur_trans->num_joined != joined)); 1314 (should_grow && cur_trans->num_joined != joined));
1310 1315
1311 ret = create_pending_snapshots(trans, root->fs_info); 1316 ret = create_pending_snapshots(trans, root->fs_info);
@@ -1392,6 +1397,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1392 1397
1393 wake_up(&cur_trans->commit_wait); 1398 wake_up(&cur_trans->commit_wait);
1394 1399
1400 list_del_init(&cur_trans->list);
1395 put_transaction(cur_trans); 1401 put_transaction(cur_trans);
1396 put_transaction(cur_trans); 1402 put_transaction(cur_trans);
1397 1403
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 229a594cacd5..e441acc6c584 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -27,11 +27,11 @@ struct btrfs_transaction {
27 * total writers in this transaction, it must be zero before the 27 * total writers in this transaction, it must be zero before the
28 * transaction can end 28 * transaction can end
29 */ 29 */
30 unsigned long num_writers; 30 atomic_t num_writers;
31 31
32 unsigned long num_joined; 32 unsigned long num_joined;
33 int in_commit; 33 int in_commit;
34 int use_count; 34 atomic_t use_count;
35 int commit_done; 35 int commit_done;
36 int blocked; 36 int blocked;
37 struct list_head list; 37 struct list_head list;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index a5303b871b13..cfd660550ded 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -180,11 +180,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
180 struct btrfs_path *path; 180 struct btrfs_path *path;
181 struct extent_buffer *leaf; 181 struct extent_buffer *leaf;
182 struct btrfs_dir_item *di; 182 struct btrfs_dir_item *di;
183 int ret = 0, slot, advance; 183 int ret = 0, slot;
184 size_t total_size = 0, size_left = size; 184 size_t total_size = 0, size_left = size;
185 unsigned long name_ptr; 185 unsigned long name_ptr;
186 size_t name_len; 186 size_t name_len;
187 u32 nritems;
188 187
189 /* 188 /*
190 * ok we want all objects associated with this id. 189 * ok we want all objects associated with this id.
@@ -204,34 +203,24 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
204 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 203 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
205 if (ret < 0) 204 if (ret < 0)
206 goto err; 205 goto err;
207 advance = 0; 206
208 while (1) { 207 while (1) {
209 leaf = path->nodes[0]; 208 leaf = path->nodes[0];
210 nritems = btrfs_header_nritems(leaf);
211 slot = path->slots[0]; 209 slot = path->slots[0];
212 210
213 /* this is where we start walking through the path */ 211 /* this is where we start walking through the path */
214 if (advance || slot >= nritems) { 212 if (slot >= btrfs_header_nritems(leaf)) {
215 /* 213 /*
216 * if we've reached the last slot in this leaf we need 214 * if we've reached the last slot in this leaf we need
217 * to go to the next leaf and reset everything 215 * to go to the next leaf and reset everything
218 */ 216 */
219 if (slot >= nritems-1) { 217 ret = btrfs_next_leaf(root, path);
220 ret = btrfs_next_leaf(root, path); 218 if (ret < 0)
221 if (ret) 219 goto err;
222 break; 220 else if (ret > 0)
223 leaf = path->nodes[0]; 221 break;
224 nritems = btrfs_header_nritems(leaf); 222 continue;
225 slot = path->slots[0];
226 } else {
227 /*
228 * just walking through the slots on this leaf
229 */
230 slot++;
231 path->slots[0]++;
232 }
233 } 223 }
234 advance = 1;
235 224
236 btrfs_item_key_to_cpu(leaf, &found_key, slot); 225 btrfs_item_key_to_cpu(leaf, &found_key, slot);
237 226
@@ -250,7 +239,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
250 239
251 /* we are just looking for how big our buffer needs to be */ 240 /* we are just looking for how big our buffer needs to be */
252 if (!size) 241 if (!size)
253 continue; 242 goto next;
254 243
255 if (!buffer || (name_len + 1) > size_left) { 244 if (!buffer || (name_len + 1) > size_left) {
256 ret = -ERANGE; 245 ret = -ERANGE;
@@ -263,6 +252,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
263 252
264 size_left -= name_len + 1; 253 size_left -= name_len + 1;
265 buffer += name_len + 1; 254 buffer += name_len + 1;
255next:
256 path->slots[0]++;
266 } 257 }
267 ret = total_size; 258 ret = total_size;
268 259
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 37fe101a4e0d..1064805e653b 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -197,7 +197,7 @@ struct fscache_object *cachefiles_grab_object(struct fscache_object *_object)
197} 197}
198 198
199/* 199/*
200 * update the auxilliary data for an object object on disk 200 * update the auxiliary data for an object object on disk
201 */ 201 */
202static void cachefiles_update_object(struct fscache_object *_object) 202static void cachefiles_update_object(struct fscache_object *_object)
203{ 203{
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 37368ba2e67c..e159c529fd2b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -24,7 +24,7 @@
24 * context needs to be associated with the osd write during writeback. 24 * context needs to be associated with the osd write during writeback.
25 * 25 *
26 * Similarly, struct ceph_inode_info maintains a set of counters to 26 * Similarly, struct ceph_inode_info maintains a set of counters to
27 * count dirty pages on the inode. In the absense of snapshots, 27 * count dirty pages on the inode. In the absence of snapshots,
28 * i_wrbuffer_ref == i_wrbuffer_ref_head == the dirty page count. 28 * i_wrbuffer_ref == i_wrbuffer_ref_head == the dirty page count.
29 * 29 *
30 * When a snapshot is taken (that is, when the client receives 30 * When a snapshot is taken (that is, when the client receives
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 6b61ded701e1..5323c330bbf3 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -765,7 +765,7 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
765 if (touch) { 765 if (touch) {
766 struct rb_node *q; 766 struct rb_node *q;
767 767
768 /* touch this + preceeding caps */ 768 /* touch this + preceding caps */
769 __touch_cap(cap); 769 __touch_cap(cap);
770 for (q = rb_first(&ci->i_caps); q != p; 770 for (q = rb_first(&ci->i_caps); q != p;
771 q = rb_next(q)) { 771 q = rb_next(q)) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a1ee8fa3a8e7..f60b07b0feb0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3215,9 +3215,15 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
3215{ 3215{
3216 struct ceph_mds_client *mdsc = fsc->mdsc; 3216 struct ceph_mds_client *mdsc = fsc->mdsc;
3217 3217
3218 dout("mdsc_destroy %p\n", mdsc);
3218 ceph_mdsc_stop(mdsc); 3219 ceph_mdsc_stop(mdsc);
3220
3221 /* flush out any connection work with references to us */
3222 ceph_msgr_flush();
3223
3219 fsc->mdsc = NULL; 3224 fsc->mdsc = NULL;
3220 kfree(mdsc); 3225 kfree(mdsc);
3226 dout("mdsc_destroy %p done\n", mdsc);
3221} 3227}
3222 3228
3223 3229
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 0aee66b92af3..e86ec1155f8f 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -342,7 +342,7 @@ static int build_snap_context(struct ceph_snap_realm *realm)
342 num = 0; 342 num = 0;
343 snapc->seq = realm->seq; 343 snapc->seq = realm->seq;
344 if (parent) { 344 if (parent) {
345 /* include any of parent's snaps occuring _after_ my 345 /* include any of parent's snaps occurring _after_ my
346 parent became my parent */ 346 parent became my parent */
347 for (i = 0; i < parent->cached_context->num_snaps; i++) 347 for (i = 0; i < parent->cached_context->num_snaps; i++)
348 if (parent->cached_context->snaps[i] >= 348 if (parent->cached_context->snaps[i] >=
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index a9e78b4a258c..f2f77fd3c14c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -353,7 +353,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
353 353
354 if (opt->name) 354 if (opt->name)
355 seq_printf(m, ",name=%s", opt->name); 355 seq_printf(m, ",name=%s", opt->name);
356 if (opt->secret) 356 if (opt->key)
357 seq_puts(m, ",secret=<hidden>"); 357 seq_puts(m, ",secret=<hidden>");
358 358
359 if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) 359 if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
diff --git a/fs/cifs/AUTHORS b/fs/cifs/AUTHORS
index 7f7fa3c302af..ea940b1db77b 100644
--- a/fs/cifs/AUTHORS
+++ b/fs/cifs/AUTHORS
@@ -35,7 +35,7 @@ Adrian Bunk (kcalloc cleanups)
35Miklos Szeredi 35Miklos Szeredi
36Kazeon team for various fixes especially for 2.4 version. 36Kazeon team for various fixes especially for 2.4 version.
37Asser Ferno (Change Notify support) 37Asser Ferno (Change Notify support)
38Shaggy (Dave Kleikamp) for inumerable small fs suggestions and some good cleanup 38Shaggy (Dave Kleikamp) for innumerable small fs suggestions and some good cleanup
39Gunter Kukkukk (testing and suggestions for support of old servers) 39Gunter Kukkukk (testing and suggestions for support of old servers)
40Igor Mammedov (DFS support) 40Igor Mammedov (DFS support)
41Jeff Layton (many, many fixes, as well as great work on the cifs Kerberos code) 41Jeff Layton (many, many fixes, as well as great work on the cifs Kerberos code)
diff --git a/fs/cifs/README b/fs/cifs/README
index fe1683590828..74ab165fc646 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -685,22 +685,6 @@ LinuxExtensionsEnabled If set to one then the client will attempt to
685 support and want to map the uid and gid fields 685 support and want to map the uid and gid fields
686 to values supplied at mount (rather than the 686 to values supplied at mount (rather than the
687 actual values, then set this to zero. (default 1) 687 actual values, then set this to zero. (default 1)
688Experimental When set to 1 used to enable certain experimental
689 features (currently enables multipage writes
690 when signing is enabled, the multipage write
691 performance enhancement was disabled when
692 signing turned on in case buffer was modified
693 just before it was sent, also this flag will
694 be used to use the new experimental directory change
695 notification code). When set to 2 enables
696 an additional experimental feature, "raw ntlmssp"
697 session establishment support (which allows
698 specifying "sec=ntlmssp" on mount). The Linux cifs
699 module will use ntlmv2 authentication encapsulated
700 in "raw ntlmssp" (not using SPNEGO) when
701 "sec=ntlmssp" is specified on mount.
702 This support also requires building cifs with
703 the CONFIG_CIFS_EXPERIMENTAL configuration flag.
704 688
705These experimental features and tracing can be enabled by changing flags in 689These experimental features and tracing can be enabled by changing flags in
706/proc/fs/cifs (after the cifs module has been installed or built into the 690/proc/fs/cifs (after the cifs module has been installed or built into the
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index e654dfd092c3..53d57a3fe427 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c
@@ -50,7 +50,7 @@ void cifs_fscache_unregister(void)
50 */ 50 */
51struct cifs_server_key { 51struct cifs_server_key {
52 uint16_t family; /* address family */ 52 uint16_t family; /* address family */
53 uint16_t port; /* IP port */ 53 __be16 port; /* IP port */
54 union { 54 union {
55 struct in_addr ipv4_addr; 55 struct in_addr ipv4_addr;
56 struct in6_addr ipv6_addr; 56 struct in6_addr ipv6_addr;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 65829d32128c..30d01bc90855 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -423,7 +423,6 @@ static const struct file_operations cifs_lookup_cache_proc_fops;
423static const struct file_operations traceSMB_proc_fops; 423static const struct file_operations traceSMB_proc_fops;
424static const struct file_operations cifs_multiuser_mount_proc_fops; 424static const struct file_operations cifs_multiuser_mount_proc_fops;
425static const struct file_operations cifs_security_flags_proc_fops; 425static const struct file_operations cifs_security_flags_proc_fops;
426static const struct file_operations cifs_experimental_proc_fops;
427static const struct file_operations cifs_linux_ext_proc_fops; 426static const struct file_operations cifs_linux_ext_proc_fops;
428 427
429void 428void
@@ -441,8 +440,6 @@ cifs_proc_init(void)
441 proc_create("cifsFYI", 0, proc_fs_cifs, &cifsFYI_proc_fops); 440 proc_create("cifsFYI", 0, proc_fs_cifs, &cifsFYI_proc_fops);
442 proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops); 441 proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops);
443 proc_create("OplockEnabled", 0, proc_fs_cifs, &cifs_oplock_proc_fops); 442 proc_create("OplockEnabled", 0, proc_fs_cifs, &cifs_oplock_proc_fops);
444 proc_create("Experimental", 0, proc_fs_cifs,
445 &cifs_experimental_proc_fops);
446 proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs, 443 proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs,
447 &cifs_linux_ext_proc_fops); 444 &cifs_linux_ext_proc_fops);
448 proc_create("MultiuserMount", 0, proc_fs_cifs, 445 proc_create("MultiuserMount", 0, proc_fs_cifs,
@@ -469,7 +466,6 @@ cifs_proc_clean(void)
469 remove_proc_entry("OplockEnabled", proc_fs_cifs); 466 remove_proc_entry("OplockEnabled", proc_fs_cifs);
470 remove_proc_entry("SecurityFlags", proc_fs_cifs); 467 remove_proc_entry("SecurityFlags", proc_fs_cifs);
471 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); 468 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
472 remove_proc_entry("Experimental", proc_fs_cifs);
473 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); 469 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
474 remove_proc_entry("fs/cifs", NULL); 470 remove_proc_entry("fs/cifs", NULL);
475} 471}
@@ -550,45 +546,6 @@ static const struct file_operations cifs_oplock_proc_fops = {
550 .write = cifs_oplock_proc_write, 546 .write = cifs_oplock_proc_write,
551}; 547};
552 548
553static int cifs_experimental_proc_show(struct seq_file *m, void *v)
554{
555 seq_printf(m, "%d\n", experimEnabled);
556 return 0;
557}
558
559static int cifs_experimental_proc_open(struct inode *inode, struct file *file)
560{
561 return single_open(file, cifs_experimental_proc_show, NULL);
562}
563
564static ssize_t cifs_experimental_proc_write(struct file *file,
565 const char __user *buffer, size_t count, loff_t *ppos)
566{
567 char c;
568 int rc;
569
570 rc = get_user(c, buffer);
571 if (rc)
572 return rc;
573 if (c == '0' || c == 'n' || c == 'N')
574 experimEnabled = 0;
575 else if (c == '1' || c == 'y' || c == 'Y')
576 experimEnabled = 1;
577 else if (c == '2')
578 experimEnabled = 2;
579
580 return count;
581}
582
583static const struct file_operations cifs_experimental_proc_fops = {
584 .owner = THIS_MODULE,
585 .open = cifs_experimental_proc_open,
586 .read = seq_read,
587 .llseek = seq_lseek,
588 .release = single_release,
589 .write = cifs_experimental_proc_write,
590};
591
592static int cifs_linux_ext_proc_show(struct seq_file *m, void *v) 549static int cifs_linux_ext_proc_show(struct seq_file *m, void *v)
593{ 550{
594 seq_printf(m, "%d\n", linuxExtEnabled); 551 seq_printf(m, "%d\n", linuxExtEnabled);
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 0a265ad9e426..2b68ac57d97d 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -53,7 +53,7 @@ void cifs_dfs_release_automount_timer(void)
53 * 53 *
54 * Extracts sharename form full UNC. 54 * Extracts sharename form full UNC.
55 * i.e. strips from UNC trailing path that is not part of share 55 * i.e. strips from UNC trailing path that is not part of share
56 * name and fixup missing '\' in the begining of DFS node refferal 56 * name and fixup missing '\' in the beginning of DFS node refferal
57 * if necessary. 57 * if necessary.
58 * Returns pointer to share name on success or ERR_PTR on error. 58 * Returns pointer to share name on success or ERR_PTR on error.
59 * Caller is responsible for freeing returned string. 59 * Caller is responsible for freeing returned string.
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 4dfba8283165..33d221394aca 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -113,7 +113,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
113 MAX_MECH_STR_LEN + 113 MAX_MECH_STR_LEN +
114 UID_KEY_LEN + (sizeof(uid_t) * 2) + 114 UID_KEY_LEN + (sizeof(uid_t) * 2) +
115 CREDUID_KEY_LEN + (sizeof(uid_t) * 2) + 115 CREDUID_KEY_LEN + (sizeof(uid_t) * 2) +
116 USER_KEY_LEN + strlen(sesInfo->userName) + 116 USER_KEY_LEN + strlen(sesInfo->user_name) +
117 PID_KEY_LEN + (sizeof(pid_t) * 2) + 1; 117 PID_KEY_LEN + (sizeof(pid_t) * 2) + 1;
118 118
119 spnego_key = ERR_PTR(-ENOMEM); 119 spnego_key = ERR_PTR(-ENOMEM);
@@ -153,7 +153,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
153 sprintf(dp, ";creduid=0x%x", sesInfo->cred_uid); 153 sprintf(dp, ";creduid=0x%x", sesInfo->cred_uid);
154 154
155 dp = description + strlen(description); 155 dp = description + strlen(description);
156 sprintf(dp, ";user=%s", sesInfo->userName); 156 sprintf(dp, ";user=%s", sesInfo->user_name);
157 157
158 dp = description + strlen(description); 158 dp = description + strlen(description);
159 sprintf(dp, ";pid=0x%x", current->pid); 159 sprintf(dp, ";pid=0x%x", current->pid);
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index fc0fd4fde306..23d43cde4306 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -90,7 +90,7 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
90 case UNI_COLON: 90 case UNI_COLON:
91 *target = ':'; 91 *target = ':';
92 break; 92 break;
93 case UNI_ASTERIK: 93 case UNI_ASTERISK:
94 *target = '*'; 94 *target = '*';
95 break; 95 break;
96 case UNI_QUESTION: 96 case UNI_QUESTION:
@@ -264,40 +264,40 @@ cifs_strndup_from_ucs(const char *src, const int maxlen, const bool is_unicode,
264 * names are little endian 16 bit Unicode on the wire 264 * names are little endian 16 bit Unicode on the wire
265 */ 265 */
266int 266int
267cifsConvertToUCS(__le16 *target, const char *source, int maxlen, 267cifsConvertToUCS(__le16 *target, const char *source, int srclen,
268 const struct nls_table *cp, int mapChars) 268 const struct nls_table *cp, int mapChars)
269{ 269{
270 int i, j, charlen; 270 int i, j, charlen;
271 int len_remaining = maxlen;
272 char src_char; 271 char src_char;
273 __u16 temp; 272 __le16 dst_char;
273 wchar_t tmp;
274 274
275 if (!mapChars) 275 if (!mapChars)
276 return cifs_strtoUCS(target, source, PATH_MAX, cp); 276 return cifs_strtoUCS(target, source, PATH_MAX, cp);
277 277
278 for (i = 0, j = 0; i < maxlen; j++) { 278 for (i = 0, j = 0; i < srclen; j++) {
279 src_char = source[i]; 279 src_char = source[i];
280 switch (src_char) { 280 switch (src_char) {
281 case 0: 281 case 0:
282 put_unaligned_le16(0, &target[j]); 282 put_unaligned(0, &target[j]);
283 goto ctoUCS_out; 283 goto ctoUCS_out;
284 case ':': 284 case ':':
285 temp = UNI_COLON; 285 dst_char = cpu_to_le16(UNI_COLON);
286 break; 286 break;
287 case '*': 287 case '*':
288 temp = UNI_ASTERIK; 288 dst_char = cpu_to_le16(UNI_ASTERISK);
289 break; 289 break;
290 case '?': 290 case '?':
291 temp = UNI_QUESTION; 291 dst_char = cpu_to_le16(UNI_QUESTION);
292 break; 292 break;
293 case '<': 293 case '<':
294 temp = UNI_LESSTHAN; 294 dst_char = cpu_to_le16(UNI_LESSTHAN);
295 break; 295 break;
296 case '>': 296 case '>':
297 temp = UNI_GRTRTHAN; 297 dst_char = cpu_to_le16(UNI_GRTRTHAN);
298 break; 298 break;
299 case '|': 299 case '|':
300 temp = UNI_PIPE; 300 dst_char = cpu_to_le16(UNI_PIPE);
301 break; 301 break;
302 /* 302 /*
303 * FIXME: We can not handle remapping backslash (UNI_SLASH) 303 * FIXME: We can not handle remapping backslash (UNI_SLASH)
@@ -305,17 +305,17 @@ cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
305 * as they use backslash as separator. 305 * as they use backslash as separator.
306 */ 306 */
307 default: 307 default:
308 charlen = cp->char2uni(source+i, len_remaining, 308 charlen = cp->char2uni(source + i, srclen - i, &tmp);
309 &temp); 309 dst_char = cpu_to_le16(tmp);
310
310 /* 311 /*
311 * if no match, use question mark, which at least in 312 * if no match, use question mark, which at least in
312 * some cases serves as wild card 313 * some cases serves as wild card
313 */ 314 */
314 if (charlen < 1) { 315 if (charlen < 1) {
315 temp = 0x003f; 316 dst_char = cpu_to_le16(0x003f);
316 charlen = 1; 317 charlen = 1;
317 } 318 }
318 len_remaining -= charlen;
319 /* 319 /*
320 * character may take more than one byte in the source 320 * character may take more than one byte in the source
321 * string, but will take exactly two bytes in the 321 * string, but will take exactly two bytes in the
@@ -324,9 +324,8 @@ cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
324 i += charlen; 324 i += charlen;
325 continue; 325 continue;
326 } 326 }
327 put_unaligned_le16(temp, &target[j]); 327 put_unaligned(dst_char, &target[j]);
328 i++; /* move to next char in source string */ 328 i++; /* move to next char in source string */
329 len_remaining--;
330 } 329 }
331 330
332ctoUCS_out: 331ctoUCS_out:
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 7fe6b52df507..644dd882a560 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -44,7 +44,7 @@
44 * reserved symbols (along with \ and /), otherwise illegal to store 44 * reserved symbols (along with \ and /), otherwise illegal to store
45 * in filenames in NTFS 45 * in filenames in NTFS
46 */ 46 */
47#define UNI_ASTERIK (__u16) ('*' + 0xF000) 47#define UNI_ASTERISK (__u16) ('*' + 0xF000)
48#define UNI_QUESTION (__u16) ('?' + 0xF000) 48#define UNI_QUESTION (__u16) ('?' + 0xF000)
49#define UNI_COLON (__u16) (':' + 0xF000) 49#define UNI_COLON (__u16) (':' + 0xF000)
50#define UNI_GRTRTHAN (__u16) ('>' + 0xF000) 50#define UNI_GRTRTHAN (__u16) ('>' + 0xF000)
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index a51585f9852b..d1a016be73ba 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -30,12 +30,13 @@
30#include <linux/ctype.h> 30#include <linux/ctype.h>
31#include <linux/random.h> 31#include <linux/random.h>
32 32
33/* Calculate and return the CIFS signature based on the mac key and SMB PDU */ 33/*
34/* the 16 byte signature must be allocated by the caller */ 34 * Calculate and return the CIFS signature based on the mac key and SMB PDU.
35/* Note we only use the 1st eight bytes */ 35 * The 16 byte signature must be allocated by the caller. Note we only use the
36/* Note that the smb header signature field on input contains the 36 * 1st eight bytes and that the smb header signature field on input contains
37 sequence number before this function is called */ 37 * the sequence number before this function is called. Also, this function
38 38 * should be called with the server->srv_mutex held.
39 */
39static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, 40static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
40 struct TCP_Server_Info *server, char *signature) 41 struct TCP_Server_Info *server, char *signature)
41{ 42{
@@ -209,8 +210,10 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
209 cpu_to_le32(expected_sequence_number); 210 cpu_to_le32(expected_sequence_number);
210 cifs_pdu->Signature.Sequence.Reserved = 0; 211 cifs_pdu->Signature.Sequence.Reserved = 0;
211 212
213 mutex_lock(&server->srv_mutex);
212 rc = cifs_calculate_signature(cifs_pdu, server, 214 rc = cifs_calculate_signature(cifs_pdu, server,
213 what_we_think_sig_should_be); 215 what_we_think_sig_should_be);
216 mutex_unlock(&server->srv_mutex);
214 217
215 if (rc) 218 if (rc)
216 return rc; 219 return rc;
@@ -469,15 +472,15 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses, char *ntlmv2_hash,
469 return rc; 472 return rc;
470 } 473 }
471 474
472 /* convert ses->userName to unicode and uppercase */ 475 /* convert ses->user_name to unicode and uppercase */
473 len = strlen(ses->userName); 476 len = strlen(ses->user_name);
474 user = kmalloc(2 + (len * 2), GFP_KERNEL); 477 user = kmalloc(2 + (len * 2), GFP_KERNEL);
475 if (user == NULL) { 478 if (user == NULL) {
476 cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n"); 479 cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n");
477 rc = -ENOMEM; 480 rc = -ENOMEM;
478 goto calc_exit_2; 481 goto calc_exit_2;
479 } 482 }
480 len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp); 483 len = cifs_strtoUCS((__le16 *)user, ses->user_name, len, nls_cp);
481 UniStrupr(user); 484 UniStrupr(user);
482 485
483 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, 486 crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index f2970136d17d..5c412b33cd7c 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -53,7 +53,6 @@ int cifsFYI = 0;
53int cifsERROR = 1; 53int cifsERROR = 1;
54int traceSMB = 0; 54int traceSMB = 0;
55unsigned int oplockEnabled = 1; 55unsigned int oplockEnabled = 1;
56unsigned int experimEnabled = 0;
57unsigned int linuxExtEnabled = 1; 56unsigned int linuxExtEnabled = 1;
58unsigned int lookupCacheEnabled = 1; 57unsigned int lookupCacheEnabled = 1;
59unsigned int multiuser_mount = 0; 58unsigned int multiuser_mount = 0;
@@ -127,6 +126,7 @@ cifs_read_super(struct super_block *sb, void *data,
127 kfree(cifs_sb); 126 kfree(cifs_sb);
128 return rc; 127 return rc;
129 } 128 }
129 cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
130 130
131#ifdef CONFIG_CIFS_DFS_UPCALL 131#ifdef CONFIG_CIFS_DFS_UPCALL
132 /* copy mount params to sb for use in submounts */ 132 /* copy mount params to sb for use in submounts */
@@ -409,8 +409,8 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
409 409
410 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) 410 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
411 seq_printf(s, ",multiuser"); 411 seq_printf(s, ",multiuser");
412 else if (tcon->ses->userName) 412 else if (tcon->ses->user_name)
413 seq_printf(s, ",username=%s", tcon->ses->userName); 413 seq_printf(s, ",username=%s", tcon->ses->user_name);
414 414
415 if (tcon->ses->domainName) 415 if (tcon->ses->domainName)
416 seq_printf(s, ",domain=%s", tcon->ses->domainName); 416 seq_printf(s, ",domain=%s", tcon->ses->domainName);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 17afb0fbcaed..a5d1106fcbde 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -37,10 +37,9 @@
37 37
38#define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1) 38#define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1)
39#define MAX_SERVER_SIZE 15 39#define MAX_SERVER_SIZE 15
40#define MAX_SHARE_SIZE 64 /* used to be 20, this should still be enough */ 40#define MAX_SHARE_SIZE 80
41#define MAX_USERNAME_SIZE 32 /* 32 is to allow for 15 char names + null 41#define MAX_USERNAME_SIZE 256 /* reasonable maximum for current servers */
42 termination then *2 for unicode versions */ 42#define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */
43#define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */
44 43
45#define CIFS_MIN_RCV_POOL 4 44#define CIFS_MIN_RCV_POOL 4
46 45
@@ -92,7 +91,8 @@ enum statusEnum {
92 CifsNew = 0, 91 CifsNew = 0,
93 CifsGood, 92 CifsGood,
94 CifsExiting, 93 CifsExiting,
95 CifsNeedReconnect 94 CifsNeedReconnect,
95 CifsNeedNegotiate
96}; 96};
97 97
98enum securityEnum { 98enum securityEnum {
@@ -274,7 +274,7 @@ struct cifsSesInfo {
274 int capabilities; 274 int capabilities;
275 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for 275 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for
276 TCP names - will ipv6 and sctp addresses fit? */ 276 TCP names - will ipv6 and sctp addresses fit? */
277 char userName[MAX_USERNAME_SIZE + 1]; 277 char *user_name;
278 char *domainName; 278 char *domainName;
279 char *password; 279 char *password;
280 struct session_key auth_key; 280 struct session_key auth_key;
@@ -817,7 +817,6 @@ GLOBAL_EXTERN unsigned int multiuser_mount; /* if enabled allows new sessions
817 have the uid/password or Kerberos credential 817 have the uid/password or Kerberos credential
818 or equivalent for current user */ 818 or equivalent for current user */
819GLOBAL_EXTERN unsigned int oplockEnabled; 819GLOBAL_EXTERN unsigned int oplockEnabled;
820GLOBAL_EXTERN unsigned int experimEnabled;
821GLOBAL_EXTERN unsigned int lookupCacheEnabled; 820GLOBAL_EXTERN unsigned int lookupCacheEnabled;
822GLOBAL_EXTERN unsigned int global_secflags; /* if on, session setup sent 821GLOBAL_EXTERN unsigned int global_secflags; /* if on, session setup sent
823 with more secure ntlmssp2 challenge/resp */ 822 with more secure ntlmssp2 challenge/resp */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 904aa47e3515..df959bae6728 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -142,9 +142,9 @@ cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
142 */ 142 */
143 while (server->tcpStatus == CifsNeedReconnect) { 143 while (server->tcpStatus == CifsNeedReconnect) {
144 wait_event_interruptible_timeout(server->response_q, 144 wait_event_interruptible_timeout(server->response_q,
145 (server->tcpStatus == CifsGood), 10 * HZ); 145 (server->tcpStatus != CifsNeedReconnect), 10 * HZ);
146 146
147 /* is TCP session is reestablished now ?*/ 147 /* are we still trying to reconnect? */
148 if (server->tcpStatus != CifsNeedReconnect) 148 if (server->tcpStatus != CifsNeedReconnect)
149 break; 149 break;
150 150
@@ -729,7 +729,7 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
729 return rc; 729 return rc;
730 730
731 /* set up echo request */ 731 /* set up echo request */
732 smb->hdr.Tid = cpu_to_le16(0xffff); 732 smb->hdr.Tid = 0xffff;
733 smb->hdr.WordCount = 1; 733 smb->hdr.WordCount = 1;
734 put_unaligned_le16(1, &smb->EchoCount); 734 put_unaligned_le16(1, &smb->EchoCount);
735 put_bcc_le(1, &smb->hdr); 735 put_bcc_le(1, &smb->hdr);
@@ -1884,10 +1884,10 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1884 __constant_cpu_to_le16(CIFS_WRLCK)) 1884 __constant_cpu_to_le16(CIFS_WRLCK))
1885 pLockData->fl_type = F_WRLCK; 1885 pLockData->fl_type = F_WRLCK;
1886 1886
1887 pLockData->fl_start = parm_data->start; 1887 pLockData->fl_start = le64_to_cpu(parm_data->start);
1888 pLockData->fl_end = parm_data->start + 1888 pLockData->fl_end = pLockData->fl_start +
1889 parm_data->length - 1; 1889 le64_to_cpu(parm_data->length) - 1;
1890 pLockData->fl_pid = parm_data->pid; 1890 pLockData->fl_pid = le32_to_cpu(parm_data->pid);
1891 } 1891 }
1892 } 1892 }
1893 1893
@@ -5247,7 +5247,7 @@ cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset,
5247 * Samba server ignores set of file size to zero due to bugs in some 5247 * Samba server ignores set of file size to zero due to bugs in some
5248 * older clients, but we should be precise - we use SetFileSize to 5248 * older clients, but we should be precise - we use SetFileSize to
5249 * set file size and do not want to truncate file size to zero 5249 * set file size and do not want to truncate file size to zero
5250 * accidently as happened on one Samba server beta by putting 5250 * accidentally as happened on one Samba server beta by putting
5251 * zero instead of -1 here 5251 * zero instead of -1 here
5252 */ 5252 */
5253 data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64); 5253 data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 8d6c17ab593d..db9d55b507d0 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -199,8 +199,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
199 } 199 }
200 spin_unlock(&GlobalMid_Lock); 200 spin_unlock(&GlobalMid_Lock);
201 201
202 while ((server->tcpStatus != CifsExiting) && 202 while (server->tcpStatus == CifsNeedReconnect) {
203 (server->tcpStatus != CifsGood)) {
204 try_to_freeze(); 203 try_to_freeze();
205 204
206 /* we should try only the port we connected to before */ 205 /* we should try only the port we connected to before */
@@ -212,7 +211,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
212 atomic_inc(&tcpSesReconnectCount); 211 atomic_inc(&tcpSesReconnectCount);
213 spin_lock(&GlobalMid_Lock); 212 spin_lock(&GlobalMid_Lock);
214 if (server->tcpStatus != CifsExiting) 213 if (server->tcpStatus != CifsExiting)
215 server->tcpStatus = CifsGood; 214 server->tcpStatus = CifsNeedNegotiate;
216 spin_unlock(&GlobalMid_Lock); 215 spin_unlock(&GlobalMid_Lock);
217 } 216 }
218 } 217 }
@@ -248,24 +247,24 @@ static int check2ndT2(struct smb_hdr *pSMB, unsigned int maxBufSize)
248 total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount); 247 total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount);
249 data_in_this_rsp = get_unaligned_le16(&pSMBt->t2_rsp.DataCount); 248 data_in_this_rsp = get_unaligned_le16(&pSMBt->t2_rsp.DataCount);
250 249
251 remaining = total_data_size - data_in_this_rsp; 250 if (total_data_size == data_in_this_rsp)
252
253 if (remaining == 0)
254 return 0; 251 return 0;
255 else if (remaining < 0) { 252 else if (total_data_size < data_in_this_rsp) {
256 cFYI(1, "total data %d smaller than data in frame %d", 253 cFYI(1, "total data %d smaller than data in frame %d",
257 total_data_size, data_in_this_rsp); 254 total_data_size, data_in_this_rsp);
258 return -EINVAL; 255 return -EINVAL;
259 } else {
260 cFYI(1, "missing %d bytes from transact2, check next response",
261 remaining);
262 if (total_data_size > maxBufSize) {
263 cERROR(1, "TotalDataSize %d is over maximum buffer %d",
264 total_data_size, maxBufSize);
265 return -EINVAL;
266 }
267 return remaining;
268 } 256 }
257
258 remaining = total_data_size - data_in_this_rsp;
259
260 cFYI(1, "missing %d bytes from transact2, check next response",
261 remaining);
262 if (total_data_size > maxBufSize) {
263 cERROR(1, "TotalDataSize %d is over maximum buffer %d",
264 total_data_size, maxBufSize);
265 return -EINVAL;
266 }
267 return remaining;
269} 268}
270 269
271static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB) 270static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
@@ -421,7 +420,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
421 pdu_length = 4; /* enough to get RFC1001 header */ 420 pdu_length = 4; /* enough to get RFC1001 header */
422 421
423incomplete_rcv: 422incomplete_rcv:
424 if (echo_retries > 0 && 423 if (echo_retries > 0 && server->tcpStatus == CifsGood &&
425 time_after(jiffies, server->lstrp + 424 time_after(jiffies, server->lstrp +
426 (echo_retries * SMB_ECHO_INTERVAL))) { 425 (echo_retries * SMB_ECHO_INTERVAL))) {
427 cERROR(1, "Server %s has not responded in %d seconds. " 426 cERROR(1, "Server %s has not responded in %d seconds. "
@@ -881,7 +880,8 @@ cifs_parse_mount_options(char *options, const char *devname,
881 /* null user, ie anonymous, authentication */ 880 /* null user, ie anonymous, authentication */
882 vol->nullauth = 1; 881 vol->nullauth = 1;
883 } 882 }
884 if (strnlen(value, 200) < 200) { 883 if (strnlen(value, MAX_USERNAME_SIZE) <
884 MAX_USERNAME_SIZE) {
885 vol->username = value; 885 vol->username = value;
886 } else { 886 } else {
887 printk(KERN_WARNING "CIFS: username too long\n"); 887 printk(KERN_WARNING "CIFS: username too long\n");
@@ -1472,7 +1472,7 @@ srcip_matches(struct sockaddr *srcaddr, struct sockaddr *rhs)
1472static bool 1472static bool
1473match_port(struct TCP_Server_Info *server, struct sockaddr *addr) 1473match_port(struct TCP_Server_Info *server, struct sockaddr *addr)
1474{ 1474{
1475 unsigned short int port, *sport; 1475 __be16 port, *sport;
1476 1476
1477 switch (addr->sa_family) { 1477 switch (addr->sa_family) {
1478 case AF_INET: 1478 case AF_INET:
@@ -1572,7 +1572,7 @@ match_security(struct TCP_Server_Info *server, struct smb_vol *vol)
1572 return false; 1572 return false;
1573 } 1573 }
1574 1574
1575 /* now check if signing mode is acceptible */ 1575 /* now check if signing mode is acceptable */
1576 if ((secFlags & CIFSSEC_MAY_SIGN) == 0 && 1576 if ((secFlags & CIFSSEC_MAY_SIGN) == 0 &&
1577 (server->secMode & SECMODE_SIGN_REQUIRED)) 1577 (server->secMode & SECMODE_SIGN_REQUIRED))
1578 return false; 1578 return false;
@@ -1765,6 +1765,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1765 module_put(THIS_MODULE); 1765 module_put(THIS_MODULE);
1766 goto out_err_crypto_release; 1766 goto out_err_crypto_release;
1767 } 1767 }
1768 tcp_ses->tcpStatus = CifsNeedNegotiate;
1768 1769
1769 /* thread spawned, put it on the list */ 1770 /* thread spawned, put it on the list */
1770 spin_lock(&cifs_tcp_ses_lock); 1771 spin_lock(&cifs_tcp_ses_lock);
@@ -1808,7 +1809,9 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
1808 break; 1809 break;
1809 default: 1810 default:
1810 /* anything else takes username/password */ 1811 /* anything else takes username/password */
1811 if (strncmp(ses->userName, vol->username, 1812 if (ses->user_name == NULL)
1813 continue;
1814 if (strncmp(ses->user_name, vol->username,
1812 MAX_USERNAME_SIZE)) 1815 MAX_USERNAME_SIZE))
1813 continue; 1816 continue;
1814 if (strlen(vol->username) != 0 && 1817 if (strlen(vol->username) != 0 &&
@@ -1851,6 +1854,8 @@ cifs_put_smb_ses(struct cifsSesInfo *ses)
1851 cifs_put_tcp_session(server); 1854 cifs_put_tcp_session(server);
1852} 1855}
1853 1856
1857static bool warned_on_ntlm; /* globals init to false automatically */
1858
1854static struct cifsSesInfo * 1859static struct cifsSesInfo *
1855cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) 1860cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1856{ 1861{
@@ -1906,9 +1911,11 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1906 else 1911 else
1907 sprintf(ses->serverName, "%pI4", &addr->sin_addr); 1912 sprintf(ses->serverName, "%pI4", &addr->sin_addr);
1908 1913
1909 if (volume_info->username) 1914 if (volume_info->username) {
1910 strncpy(ses->userName, volume_info->username, 1915 ses->user_name = kstrdup(volume_info->username, GFP_KERNEL);
1911 MAX_USERNAME_SIZE); 1916 if (!ses->user_name)
1917 goto get_ses_fail;
1918 }
1912 1919
1913 /* volume_info->password freed at unmount */ 1920 /* volume_info->password freed at unmount */
1914 if (volume_info->password) { 1921 if (volume_info->password) {
@@ -1923,6 +1930,15 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
1923 } 1930 }
1924 ses->cred_uid = volume_info->cred_uid; 1931 ses->cred_uid = volume_info->cred_uid;
1925 ses->linux_uid = volume_info->linux_uid; 1932 ses->linux_uid = volume_info->linux_uid;
1933
1934 /* ntlmv2 is much stronger than ntlm security, and has been broadly
1935 supported for many years, time to update default security mechanism */
1936 if ((volume_info->secFlg == 0) && warned_on_ntlm == false) {
1937 warned_on_ntlm = true;
1938 cERROR(1, "default security mechanism requested. The default "
1939 "security mechanism will be upgraded from ntlm to "
1940 "ntlmv2 in kernel release 2.6.41");
1941 }
1926 ses->overrideSecFlg = volume_info->secFlg; 1942 ses->overrideSecFlg = volume_info->secFlg;
1927 1943
1928 mutex_lock(&ses->session_mutex); 1944 mutex_lock(&ses->session_mutex);
@@ -2276,7 +2292,7 @@ static int
2276generic_ip_connect(struct TCP_Server_Info *server) 2292generic_ip_connect(struct TCP_Server_Info *server)
2277{ 2293{
2278 int rc = 0; 2294 int rc = 0;
2279 unsigned short int sport; 2295 __be16 sport;
2280 int slen, sfamily; 2296 int slen, sfamily;
2281 struct socket *socket = server->ssocket; 2297 struct socket *socket = server->ssocket;
2282 struct sockaddr *saddr; 2298 struct sockaddr *saddr;
@@ -2361,7 +2377,7 @@ generic_ip_connect(struct TCP_Server_Info *server)
2361static int 2377static int
2362ip_connect(struct TCP_Server_Info *server) 2378ip_connect(struct TCP_Server_Info *server)
2363{ 2379{
2364 unsigned short int *sport; 2380 __be16 *sport;
2365 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr; 2381 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr;
2366 struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; 2382 struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr;
2367 2383
@@ -2826,7 +2842,7 @@ try_mount_again:
2826 2842
2827remote_path_check: 2843remote_path_check:
2828 /* check if a whole path (including prepath) is not remote */ 2844 /* check if a whole path (including prepath) is not remote */
2829 if (!rc && cifs_sb->prepathlen && tcon) { 2845 if (!rc && tcon) {
2830 /* build_path_to_root works only when we have a valid tcon */ 2846 /* build_path_to_root works only when we have a valid tcon */
2831 full_path = cifs_build_path_to_root(cifs_sb, tcon); 2847 full_path = cifs_build_path_to_root(cifs_sb, tcon);
2832 if (full_path == NULL) { 2848 if (full_path == NULL) {
@@ -2933,7 +2949,7 @@ mount_fail_check:
2933 if (mount_data != mount_data_global) 2949 if (mount_data != mount_data_global)
2934 kfree(mount_data); 2950 kfree(mount_data);
2935 /* If find_unc succeeded then rc == 0 so we can not end */ 2951 /* If find_unc succeeded then rc == 0 so we can not end */
2936 /* up accidently freeing someone elses tcon struct */ 2952 /* up accidentally freeing someone elses tcon struct */
2937 if (tcon) 2953 if (tcon)
2938 cifs_put_tcon(tcon); 2954 cifs_put_tcon(tcon);
2939 else if (pSesInfo) 2955 else if (pSesInfo)
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index dd5f22918c33..9ea65cf36714 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -189,7 +189,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
189 inode->i_sb, mode, oflags, &oplock, &fileHandle, xid); 189 inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
190 /* EIO could indicate that (posix open) operation is not 190 /* EIO could indicate that (posix open) operation is not
191 supported, despite what server claimed in capability 191 supported, despite what server claimed in capability
192 negotation. EREMOTE indicates DFS junction, which is not 192 negotiation. EREMOTE indicates DFS junction, which is not
193 handled in posix open */ 193 handled in posix open */
194 194
195 if (rc == 0) { 195 if (rc == 0) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c27d236738fc..faf59529e847 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -575,8 +575,10 @@ reopen_error_exit:
575 575
576int cifs_close(struct inode *inode, struct file *file) 576int cifs_close(struct inode *inode, struct file *file)
577{ 577{
578 cifsFileInfo_put(file->private_data); 578 if (file->private_data != NULL) {
579 file->private_data = NULL; 579 cifsFileInfo_put(file->private_data);
580 file->private_data = NULL;
581 }
580 582
581 /* return code from the ->release op is always ignored */ 583 /* return code from the ->release op is always ignored */
582 return 0; 584 return 0;
@@ -970,6 +972,9 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file,
970 total_written += bytes_written) { 972 total_written += bytes_written) {
971 rc = -EAGAIN; 973 rc = -EAGAIN;
972 while (rc == -EAGAIN) { 974 while (rc == -EAGAIN) {
975 struct kvec iov[2];
976 unsigned int len;
977
973 if (open_file->invalidHandle) { 978 if (open_file->invalidHandle) {
974 /* we could deadlock if we called 979 /* we could deadlock if we called
975 filemap_fdatawait from here so tell 980 filemap_fdatawait from here so tell
@@ -979,31 +984,14 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file,
979 if (rc != 0) 984 if (rc != 0)
980 break; 985 break;
981 } 986 }
982 if (experimEnabled || (pTcon->ses->server && 987
983 ((pTcon->ses->server->secMode & 988 len = min((size_t)cifs_sb->wsize,
984 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 989 write_size - total_written);
985 == 0))) { 990 /* iov[0] is reserved for smb header */
986 struct kvec iov[2]; 991 iov[1].iov_base = (char *)write_data + total_written;
987 unsigned int len; 992 iov[1].iov_len = len;
988 993 rc = CIFSSMBWrite2(xid, pTcon, open_file->netfid, len,
989 len = min((size_t)cifs_sb->wsize, 994 *poffset, &bytes_written, iov, 1, 0);
990 write_size - total_written);
991 /* iov[0] is reserved for smb header */
992 iov[1].iov_base = (char *)write_data +
993 total_written;
994 iov[1].iov_len = len;
995 rc = CIFSSMBWrite2(xid, pTcon,
996 open_file->netfid, len,
997 *poffset, &bytes_written,
998 iov, 1, 0);
999 } else
1000 rc = CIFSSMBWrite(xid, pTcon,
1001 open_file->netfid,
1002 min_t(const int, cifs_sb->wsize,
1003 write_size - total_written),
1004 *poffset, &bytes_written,
1005 write_data + total_written,
1006 NULL, 0);
1007 } 995 }
1008 if (rc || (bytes_written == 0)) { 996 if (rc || (bytes_written == 0)) {
1009 if (total_written) 997 if (total_written)
@@ -1240,12 +1228,6 @@ static int cifs_writepages(struct address_space *mapping,
1240 } 1228 }
1241 1229
1242 tcon = tlink_tcon(open_file->tlink); 1230 tcon = tlink_tcon(open_file->tlink);
1243 if (!experimEnabled && tcon->ses->server->secMode &
1244 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
1245 cifsFileInfo_put(open_file);
1246 kfree(iov);
1247 return generic_writepages(mapping, wbc);
1248 }
1249 cifsFileInfo_put(open_file); 1231 cifsFileInfo_put(open_file);
1250 1232
1251 xid = GetXid(); 1233 xid = GetXid();
@@ -1980,6 +1962,24 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1980 return total_read; 1962 return total_read;
1981} 1963}
1982 1964
1965/*
1966 * If the page is mmap'ed into a process' page tables, then we need to make
1967 * sure that it doesn't change while being written back.
1968 */
1969static int
1970cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1971{
1972 struct page *page = vmf->page;
1973
1974 lock_page(page);
1975 return VM_FAULT_LOCKED;
1976}
1977
1978static struct vm_operations_struct cifs_file_vm_ops = {
1979 .fault = filemap_fault,
1980 .page_mkwrite = cifs_page_mkwrite,
1981};
1982
1983int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) 1983int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
1984{ 1984{
1985 int rc, xid; 1985 int rc, xid;
@@ -1991,6 +1991,8 @@ int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
1991 cifs_invalidate_mapping(inode); 1991 cifs_invalidate_mapping(inode);
1992 1992
1993 rc = generic_file_mmap(file, vma); 1993 rc = generic_file_mmap(file, vma);
1994 if (rc == 0)
1995 vma->vm_ops = &cifs_file_vm_ops;
1994 FreeXid(xid); 1996 FreeXid(xid);
1995 return rc; 1997 return rc;
1996} 1998}
@@ -2007,6 +2009,8 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
2007 return rc; 2009 return rc;
2008 } 2010 }
2009 rc = generic_file_mmap(file, vma); 2011 rc = generic_file_mmap(file, vma);
2012 if (rc == 0)
2013 vma->vm_ops = &cifs_file_vm_ops;
2010 FreeXid(xid); 2014 FreeXid(xid);
2011 return rc; 2015 return rc;
2012} 2016}
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index e8804d373404..ce417a9764a3 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -239,7 +239,7 @@ CIFSQueryMFSymLink(const int xid, struct cifsTconInfo *tcon,
239 if (rc != 0) 239 if (rc != 0)
240 return rc; 240 return rc;
241 241
242 if (file_info.EndOfFile != CIFS_MF_SYMLINK_FILE_SIZE) { 242 if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) {
243 CIFSSMBClose(xid, tcon, netfid); 243 CIFSSMBClose(xid, tcon, netfid);
244 /* it's not a symlink */ 244 /* it's not a symlink */
245 return -EINVAL; 245 return -EINVAL;
@@ -316,7 +316,7 @@ CIFSCheckMFSymlink(struct cifs_fattr *fattr,
316 if (rc != 0) 316 if (rc != 0)
317 goto out; 317 goto out;
318 318
319 if (file_info.EndOfFile != CIFS_MF_SYMLINK_FILE_SIZE) { 319 if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) {
320 CIFSSMBClose(xid, pTcon, netfid); 320 CIFSSMBClose(xid, pTcon, netfid);
321 /* it's not a symlink */ 321 /* it's not a symlink */
322 goto out; 322 goto out;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 2a930a752a78..0c684ae4c071 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -100,6 +100,7 @@ sesInfoFree(struct cifsSesInfo *buf_to_free)
100 memset(buf_to_free->password, 0, strlen(buf_to_free->password)); 100 memset(buf_to_free->password, 0, strlen(buf_to_free->password));
101 kfree(buf_to_free->password); 101 kfree(buf_to_free->password);
102 } 102 }
103 kfree(buf_to_free->user_name);
103 kfree(buf_to_free->domainName); 104 kfree(buf_to_free->domainName);
104 kfree(buf_to_free); 105 kfree(buf_to_free);
105} 106}
@@ -520,7 +521,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
520 (struct smb_com_transaction_change_notify_rsp *)buf; 521 (struct smb_com_transaction_change_notify_rsp *)buf;
521 struct file_notify_information *pnotify; 522 struct file_notify_information *pnotify;
522 __u32 data_offset = 0; 523 __u32 data_offset = 0;
523 if (pSMBr->ByteCount > sizeof(struct file_notify_information)) { 524 if (get_bcc_le(buf) > sizeof(struct file_notify_information)) {
524 data_offset = le32_to_cpu(pSMBr->DataOffset); 525 data_offset = le32_to_cpu(pSMBr->DataOffset);
525 526
526 pnotify = (struct file_notify_information *) 527 pnotify = (struct file_notify_information *)
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 16765703131b..f6728eb6f4b9 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -219,12 +219,12 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
219 bcc_ptr++; 219 bcc_ptr++;
220 } */ 220 } */
221 /* copy user */ 221 /* copy user */
222 if (ses->userName == NULL) { 222 if (ses->user_name == NULL) {
223 /* null user mount */ 223 /* null user mount */
224 *bcc_ptr = 0; 224 *bcc_ptr = 0;
225 *(bcc_ptr+1) = 0; 225 *(bcc_ptr+1) = 0;
226 } else { 226 } else {
227 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->userName, 227 bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->user_name,
228 MAX_USERNAME_SIZE, nls_cp); 228 MAX_USERNAME_SIZE, nls_cp);
229 } 229 }
230 bcc_ptr += 2 * bytes_ret; 230 bcc_ptr += 2 * bytes_ret;
@@ -244,12 +244,11 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
244 /* copy user */ 244 /* copy user */
245 /* BB what about null user mounts - check that we do this BB */ 245 /* BB what about null user mounts - check that we do this BB */
246 /* copy user */ 246 /* copy user */
247 if (ses->userName == NULL) { 247 if (ses->user_name != NULL)
248 /* BB what about null user mounts - check that we do this BB */ 248 strncpy(bcc_ptr, ses->user_name, MAX_USERNAME_SIZE);
249 } else { 249 /* else null user mount */
250 strncpy(bcc_ptr, ses->userName, MAX_USERNAME_SIZE); 250
251 } 251 bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE);
252 bcc_ptr += strnlen(ses->userName, MAX_USERNAME_SIZE);
253 *bcc_ptr = 0; 252 *bcc_ptr = 0;
254 bcc_ptr++; /* account for null termination */ 253 bcc_ptr++; /* account for null termination */
255 254
@@ -405,8 +404,8 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
405 /* BB spec says that if AvId field of MsvAvTimestamp is populated then 404 /* BB spec says that if AvId field of MsvAvTimestamp is populated then
406 we must set the MIC field of the AUTHENTICATE_MESSAGE */ 405 we must set the MIC field of the AUTHENTICATE_MESSAGE */
407 ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags); 406 ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags);
408 tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset); 407 tioffset = le32_to_cpu(pblob->TargetInfoArray.BufferOffset);
409 tilen = cpu_to_le16(pblob->TargetInfoArray.Length); 408 tilen = le16_to_cpu(pblob->TargetInfoArray.Length);
410 if (tilen) { 409 if (tilen) {
411 ses->auth_key.response = kmalloc(tilen, GFP_KERNEL); 410 ses->auth_key.response = kmalloc(tilen, GFP_KERNEL);
412 if (!ses->auth_key.response) { 411 if (!ses->auth_key.response) {
@@ -523,14 +522,14 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
523 tmp += len; 522 tmp += len;
524 } 523 }
525 524
526 if (ses->userName == NULL) { 525 if (ses->user_name == NULL) {
527 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); 526 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
528 sec_blob->UserName.Length = 0; 527 sec_blob->UserName.Length = 0;
529 sec_blob->UserName.MaximumLength = 0; 528 sec_blob->UserName.MaximumLength = 0;
530 tmp += 2; 529 tmp += 2;
531 } else { 530 } else {
532 int len; 531 int len;
533 len = cifs_strtoUCS((__le16 *)tmp, ses->userName, 532 len = cifs_strtoUCS((__le16 *)tmp, ses->user_name,
534 MAX_USERNAME_SIZE, nls_cp); 533 MAX_USERNAME_SIZE, nls_cp);
535 len *= 2; /* unicode is 2 bytes each */ 534 len *= 2; /* unicode is 2 bytes each */
536 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); 535 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 90ff3cb10de3..3313dd19f543 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -990,7 +990,7 @@ static int configfs_dump(struct configfs_dirent *sd, int level)
990 * This describes these functions and their helpers. 990 * This describes these functions and their helpers.
991 * 991 *
992 * Allow another kernel system to depend on a config_item. If this 992 * Allow another kernel system to depend on a config_item. If this
993 * happens, the item cannot go away until the dependant can live without 993 * happens, the item cannot go away until the dependent can live without
994 * it. The idea is to give client modules as simple an interface as 994 * it. The idea is to give client modules as simple an interface as
995 * possible. When a system asks them to depend on an item, they just 995 * possible. When a system asks them to depend on an item, they just
996 * call configfs_depend_item(). If the item is live and the client 996 * call configfs_depend_item(). If the item is live and the client
diff --git a/fs/dcache.c b/fs/dcache.c
index ad25c4cec7d5..129a35730994 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2131,7 +2131,7 @@ EXPORT_SYMBOL(d_rehash);
2131 */ 2131 */
2132void dentry_update_name_case(struct dentry *dentry, struct qstr *name) 2132void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2133{ 2133{
2134 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); 2134 BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
2135 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */ 2135 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2136 2136
2137 spin_lock(&dentry->d_lock); 2137 spin_lock(&dentry->d_lock);
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 04b8c449303f..56d6bfcc1e48 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -519,7 +519,7 @@ static void toss_rsb(struct kref *kref)
519 } 519 }
520} 520}
521 521
522/* When all references to the rsb are gone it's transfered to 522/* When all references to the rsb are gone it's transferred to
523 the tossed list for later disposal. */ 523 the tossed list for later disposal. */
524 524
525static void put_rsb(struct dlm_rsb *r) 525static void put_rsb(struct dlm_rsb *r)
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index bffa1e73b9a9..5e2c71f05e46 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -810,7 +810,7 @@ static int tcp_accept_from_sock(struct connection *con)
810 810
811 /* 811 /*
812 * Add it to the active queue in case we got data 812 * Add it to the active queue in case we got data
813 * beween processing the accept adding the socket 813 * between processing the accept adding the socket
814 * to the read_sockets list 814 * to the read_sockets list
815 */ 815 */
816 if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags)) 816 if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags))
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index eda43f362616..14638235f7b2 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -304,7 +304,7 @@ static void set_master_lkbs(struct dlm_rsb *r)
304} 304}
305 305
306/* 306/*
307 * Propogate the new master nodeid to locks 307 * Propagate the new master nodeid to locks
308 * The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider. 308 * The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider.
309 * The NEW_MASTER2 flag tells recover_lvb() and set_locks_purged() which 309 * The NEW_MASTER2 flag tells recover_lvb() and set_locks_purged() which
310 * rsb's to consider. 310 * rsb's to consider.
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index c27c0ecf90bc..fdb2eb0ad09e 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -276,7 +276,7 @@ static void ecryptfs_init_mount_crypt_stat(
276/** 276/**
277 * ecryptfs_parse_options 277 * ecryptfs_parse_options
278 * @sb: The ecryptfs super block 278 * @sb: The ecryptfs super block
279 * @options: The options pased to the kernel 279 * @options: The options passed to the kernel
280 * 280 *
281 * Parse mount options: 281 * Parse mount options:
282 * debug=N - ecryptfs_verbosity level for debug output 282 * debug=N - ecryptfs_verbosity level for debug output
@@ -840,7 +840,7 @@ static int __init ecryptfs_init(void)
840 } 840 }
841 rc = ecryptfs_init_messaging(); 841 rc = ecryptfs_init_messaging();
842 if (rc) { 842 if (rc) {
843 printk(KERN_ERR "Failure occured while attempting to " 843 printk(KERN_ERR "Failure occurred while attempting to "
844 "initialize the communications channel to " 844 "initialize the communications channel to "
845 "ecryptfsd\n"); 845 "ecryptfsd\n");
846 goto out_destroy_kthread; 846 goto out_destroy_kthread;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index ed38801b57a7..f9cfd168fbe2 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -181,7 +181,7 @@ struct eventpoll {
181 181
182 /* 182 /*
183 * This is a single linked list that chains all the "struct epitem" that 183 * This is a single linked list that chains all the "struct epitem" that
184 * happened while transfering ready events to userspace w/out 184 * happened while transferring ready events to userspace w/out
185 * holding ->lock. 185 * holding ->lock.
186 */ 186 */
187 struct epitem *ovflist; 187 struct epitem *ovflist;
@@ -606,7 +606,7 @@ static void ep_free(struct eventpoll *ep)
606 * We do not need to hold "ep->mtx" here because the epoll file 606 * We do not need to hold "ep->mtx" here because the epoll file
607 * is on the way to be removed and no one has references to it 607 * is on the way to be removed and no one has references to it
608 * anymore. The only hit might come from eventpoll_release_file() but 608 * anymore. The only hit might come from eventpoll_release_file() but
609 * holding "epmutex" is sufficent here. 609 * holding "epmutex" is sufficient here.
610 */ 610 */
611 mutex_lock(&epmutex); 611 mutex_lock(&epmutex);
612 612
@@ -720,7 +720,7 @@ void eventpoll_release_file(struct file *file)
720 /* 720 /*
721 * We don't want to get "file->f_lock" because it is not 721 * We don't want to get "file->f_lock" because it is not
722 * necessary. It is not necessary because we're in the "struct file" 722 * necessary. It is not necessary because we're in the "struct file"
723 * cleanup path, and this means that noone is using this file anymore. 723 * cleanup path, and this means that no one is using this file anymore.
724 * So, for example, epoll_ctl() cannot hit here since if we reach this 724 * So, for example, epoll_ctl() cannot hit here since if we reach this
725 * point, the file counter already went to zero and fget() would fail. 725 * point, the file counter already went to zero and fget() would fail.
726 * The only hit might come from ep_free() but by holding the mutex 726 * The only hit might come from ep_free() but by holding the mutex
@@ -1112,7 +1112,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
1112 * Trigger mode, we need to insert back inside 1112 * Trigger mode, we need to insert back inside
1113 * the ready list, so that the next call to 1113 * the ready list, so that the next call to
1114 * epoll_wait() will check again the events 1114 * epoll_wait() will check again the events
1115 * availability. At this point, noone can insert 1115 * availability. At this point, no one can insert
1116 * into ep->rdllist besides us. The epoll_ctl() 1116 * into ep->rdllist besides us. The epoll_ctl()
1117 * callers are locked out by 1117 * callers are locked out by
1118 * ep_scan_ready_list() holding "mtx" and the 1118 * ep_scan_ready_list() holding "mtx" and the
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index 5e74ad3d4009..3bbd46956d77 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -115,7 +115,7 @@ struct exofs_sb_stats {
115 * Describes the raid used in the FS. It is part of the device table. 115 * Describes the raid used in the FS. It is part of the device table.
116 * This here is taken from the pNFS-objects definition. In exofs we 116 * This here is taken from the pNFS-objects definition. In exofs we
117 * use one raid policy through-out the filesystem. (NOTE: the funny 117 * use one raid policy through-out the filesystem. (NOTE: the funny
118 * alignment at begining. We take care of it at exofs_device_table. 118 * alignment at beginning. We take care of it at exofs_device_table.
119 */ 119 */
120struct exofs_dt_data_map { 120struct exofs_dt_data_map {
121 __le32 cb_num_comps; 121 __le32 cb_num_comps;
@@ -136,7 +136,7 @@ struct exofs_dt_device_info {
136 u8 systemid[OSD_SYSTEMID_LEN]; 136 u8 systemid[OSD_SYSTEMID_LEN];
137 __le64 long_name_offset; /* If !0 then offset-in-file */ 137 __le64 long_name_offset; /* If !0 then offset-in-file */
138 __le32 osdname_len; /* */ 138 __le32 osdname_len; /* */
139 u8 osdname[44]; /* Embbeded, Ususally an asci uuid */ 139 u8 osdname[44]; /* Embbeded, Usually an asci uuid */
140} __packed; 140} __packed;
141 141
142/* 142/*
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 0d06f4e75699..8f44cef1b3ef 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -850,7 +850,7 @@ static int find_next_reservable_window(
850 rsv_window_remove(sb, my_rsv); 850 rsv_window_remove(sb, my_rsv);
851 851
852 /* 852 /*
853 * Let's book the whole avaliable window for now. We will check the 853 * Let's book the whole available window for now. We will check the
854 * disk bitmap later and then, if there are free blocks then we adjust 854 * disk bitmap later and then, if there are free blocks then we adjust
855 * the window size if it's larger than requested. 855 * the window size if it's larger than requested.
856 * Otherwise, we will remove this node from the tree next time 856 * Otherwise, we will remove this node from the tree next time
@@ -1357,9 +1357,9 @@ retry_alloc:
1357 goto allocated; 1357 goto allocated;
1358 } 1358 }
1359 /* 1359 /*
1360 * We may end up a bogus ealier ENOSPC error due to 1360 * We may end up a bogus earlier ENOSPC error due to
1361 * filesystem is "full" of reservations, but 1361 * filesystem is "full" of reservations, but
1362 * there maybe indeed free blocks avaliable on disk 1362 * there maybe indeed free blocks available on disk
1363 * In this case, we just forget about the reservations 1363 * In this case, we just forget about the reservations
1364 * just do block allocation as without reservations. 1364 * just do block allocation as without reservations.
1365 */ 1365 */
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index c47f706878b5..788e09a07f7e 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -305,7 +305,7 @@ static ext2_fsblk_t ext2_find_near(struct inode *inode, Indirect *ind)
305 return ind->bh->b_blocknr; 305 return ind->bh->b_blocknr;
306 306
307 /* 307 /*
308 * It is going to be refered from inode itself? OK, just put it into 308 * It is going to be referred from inode itself? OK, just put it into
309 * the same cylinder group then. 309 * the same cylinder group then.
310 */ 310 */
311 bg_start = ext2_group_first_block_no(inode->i_sb, ei->i_block_group); 311 bg_start = ext2_group_first_block_no(inode->i_sb, ei->i_block_group);
@@ -913,7 +913,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
913 * 913 *
914 * When we do truncate() we may have to clean the ends of several indirect 914 * When we do truncate() we may have to clean the ends of several indirect
915 * blocks but leave the blocks themselves alive. Block is partially 915 * blocks but leave the blocks themselves alive. Block is partially
916 * truncated if some data below the new i_size is refered from it (and 916 * truncated if some data below the new i_size is referred from it (and
917 * it is on the path to the first completely truncated data block, indeed). 917 * it is on the path to the first completely truncated data block, indeed).
918 * We have to free the top of that path along with everything to the right 918 * We have to free the top of that path along with everything to the right
919 * of the path. Since no allocation past the truncation point is possible 919 * of the path. Since no allocation past the truncation point is possible
@@ -990,7 +990,7 @@ no_top:
990 * @p: array of block numbers 990 * @p: array of block numbers
991 * @q: points immediately past the end of array 991 * @q: points immediately past the end of array
992 * 992 *
993 * We are freeing all blocks refered from that array (numbers are 993 * We are freeing all blocks referred from that array (numbers are
994 * stored as little-endian 32-bit) and updating @inode->i_blocks 994 * stored as little-endian 32-bit) and updating @inode->i_blocks
995 * appropriately. 995 * appropriately.
996 */ 996 */
@@ -1030,7 +1030,7 @@ static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q)
1030 * @q: pointer immediately past the end of array 1030 * @q: pointer immediately past the end of array
1031 * @depth: depth of the branches to free 1031 * @depth: depth of the branches to free
1032 * 1032 *
1033 * We are freeing all blocks refered from these branches (numbers are 1033 * We are freeing all blocks referred from these branches (numbers are
1034 * stored as little-endian 32-bit) and updating @inode->i_blocks 1034 * stored as little-endian 32-bit) and updating @inode->i_blocks
1035 * appropriately. 1035 * appropriately.
1036 */ 1036 */
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7731695e65d9..0a78dae7e2cb 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1382,7 +1382,7 @@ static struct dentry *ext2_mount(struct file_system_type *fs_type,
1382 1382
1383/* Read data from quotafile - avoid pagecache and such because we cannot afford 1383/* Read data from quotafile - avoid pagecache and such because we cannot afford
1384 * acquiring the locks... As quota files are never truncated and quota code 1384 * acquiring the locks... As quota files are never truncated and quota code
1385 * itself serializes the operations (and noone else should touch the files) 1385 * itself serializes the operations (and no one else should touch the files)
1386 * we don't have to be afraid of races */ 1386 * we don't have to be afraid of races */
1387static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, 1387static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data,
1388 size_t len, loff_t off) 1388 size_t len, loff_t off)
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index c2e4dce984d2..529970617a21 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -35,7 +35,7 @@
35 * +------------------+ 35 * +------------------+
36 * 36 *
37 * The block header is followed by multiple entry descriptors. These entry 37 * The block header is followed by multiple entry descriptors. These entry
38 * descriptors are variable in size, and alligned to EXT2_XATTR_PAD 38 * descriptors are variable in size, and aligned to EXT2_XATTR_PAD
39 * byte boundaries. The entry descriptors are sorted by attribute name, 39 * byte boundaries. The entry descriptors are sorted by attribute name,
40 * so that two extended attribute blocks can be compared efficiently. 40 * so that two extended attribute blocks can be compared efficiently.
41 * 41 *
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 153242187fce..fe52297e31ad 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -590,7 +590,7 @@ do_more:
590 BUFFER_TRACE(debug_bh, "Deleted!"); 590 BUFFER_TRACE(debug_bh, "Deleted!");
591 if (!bh2jh(bitmap_bh)->b_committed_data) 591 if (!bh2jh(bitmap_bh)->b_committed_data)
592 BUFFER_TRACE(debug_bh, 592 BUFFER_TRACE(debug_bh,
593 "No commited data in bitmap"); 593 "No committed data in bitmap");
594 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap"); 594 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
595 __brelse(debug_bh); 595 __brelse(debug_bh);
596 } 596 }
@@ -1063,7 +1063,7 @@ static int find_next_reservable_window(
1063 rsv_window_remove(sb, my_rsv); 1063 rsv_window_remove(sb, my_rsv);
1064 1064
1065 /* 1065 /*
1066 * Let's book the whole avaliable window for now. We will check the 1066 * Let's book the whole available window for now. We will check the
1067 * disk bitmap later and then, if there are free blocks then we adjust 1067 * disk bitmap later and then, if there are free blocks then we adjust
1068 * the window size if it's larger than requested. 1068 * the window size if it's larger than requested.
1069 * Otherwise, we will remove this node from the tree next time 1069 * Otherwise, we will remove this node from the tree next time
@@ -1456,7 +1456,7 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
1456 * 1456 *
1457 * ext3_should_retry_alloc() is called when ENOSPC is returned, and if 1457 * ext3_should_retry_alloc() is called when ENOSPC is returned, and if
1458 * it is profitable to retry the operation, this function will wait 1458 * it is profitable to retry the operation, this function will wait
1459 * for the current or commiting transaction to complete, and then 1459 * for the current or committing transaction to complete, and then
1460 * return TRUE. 1460 * return TRUE.
1461 * 1461 *
1462 * if the total number of retries exceed three times, return FALSE. 1462 * if the total number of retries exceed three times, return FALSE.
@@ -1632,9 +1632,9 @@ retry_alloc:
1632 goto allocated; 1632 goto allocated;
1633 } 1633 }
1634 /* 1634 /*
1635 * We may end up a bogus ealier ENOSPC error due to 1635 * We may end up a bogus earlier ENOSPC error due to
1636 * filesystem is "full" of reservations, but 1636 * filesystem is "full" of reservations, but
1637 * there maybe indeed free blocks avaliable on disk 1637 * there maybe indeed free blocks available on disk
1638 * In this case, we just forget about the reservations 1638 * In this case, we just forget about the reservations
1639 * just do block allocation as without reservations. 1639 * just do block allocation as without reservations.
1640 */ 1640 */
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index fe2541d250e4..68b2e43d7c35 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2055,7 +2055,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
2055 * 2055 *
2056 * When we do truncate() we may have to clean the ends of several 2056 * When we do truncate() we may have to clean the ends of several
2057 * indirect blocks but leave the blocks themselves alive. Block is 2057 * indirect blocks but leave the blocks themselves alive. Block is
2058 * partially truncated if some data below the new i_size is refered 2058 * partially truncated if some data below the new i_size is referred
2059 * from it (and it is on the path to the first completely truncated 2059 * from it (and it is on the path to the first completely truncated
2060 * data block, indeed). We have to free the top of that path along 2060 * data block, indeed). We have to free the top of that path along
2061 * with everything to the right of the path. Since no allocation 2061 * with everything to the right of the path. Since no allocation
@@ -2184,7 +2184,7 @@ static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
2184 * @first: array of block numbers 2184 * @first: array of block numbers
2185 * @last: points immediately past the end of array 2185 * @last: points immediately past the end of array
2186 * 2186 *
2187 * We are freeing all blocks refered from that array (numbers are stored as 2187 * We are freeing all blocks referred from that array (numbers are stored as
2188 * little-endian 32-bit) and updating @inode->i_blocks appropriately. 2188 * little-endian 32-bit) and updating @inode->i_blocks appropriately.
2189 * 2189 *
2190 * We accumulate contiguous runs of blocks to free. Conveniently, if these 2190 * We accumulate contiguous runs of blocks to free. Conveniently, if these
@@ -2272,7 +2272,7 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
2272 * @last: pointer immediately past the end of array 2272 * @last: pointer immediately past the end of array
2273 * @depth: depth of the branches to free 2273 * @depth: depth of the branches to free
2274 * 2274 *
2275 * We are freeing all blocks refered from these branches (numbers are 2275 * We are freeing all blocks referred from these branches (numbers are
2276 * stored as little-endian 32-bit) and updating @inode->i_blocks 2276 * stored as little-endian 32-bit) and updating @inode->i_blocks
2277 * appropriately. 2277 * appropriately.
2278 */ 2278 */
@@ -3291,7 +3291,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
3291 if (ext3_should_journal_data(inode)) 3291 if (ext3_should_journal_data(inode))
3292 ret = 3 * (bpp + indirects) + 2; 3292 ret = 3 * (bpp + indirects) + 2;
3293 else 3293 else
3294 ret = 2 * (bpp + indirects) + 2; 3294 ret = 2 * (bpp + indirects) + indirects + 2;
3295 3295
3296#ifdef CONFIG_QUOTA 3296#ifdef CONFIG_QUOTA
3297 /* We know that structure was already allocated during dquot_initialize so 3297 /* We know that structure was already allocated during dquot_initialize so
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 108b142e11ed..7916e4ce166a 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -1009,7 +1009,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
1009 1009
1010 if (test_opt(sb, DEBUG)) 1010 if (test_opt(sb, DEBUG))
1011 printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK 1011 printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK
1012 " upto "E3FSBLK" blocks\n", 1012 " up to "E3FSBLK" blocks\n",
1013 o_blocks_count, n_blocks_count); 1013 o_blocks_count, n_blocks_count);
1014 1014
1015 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) 1015 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 071689f86e18..3c6a9e0eadc1 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2925,7 +2925,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2925 2925
2926/* Read data from quotafile - avoid pagecache and such because we cannot afford 2926/* Read data from quotafile - avoid pagecache and such because we cannot afford
2927 * acquiring the locks... As quota files are never truncated and quota code 2927 * acquiring the locks... As quota files are never truncated and quota code
2928 * itself serializes the operations (and noone else should touch the files) 2928 * itself serializes the operations (and no one else should touch the files)
2929 * we don't have to be afraid of races */ 2929 * we don't have to be afraid of races */
2930static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, 2930static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
2931 size_t len, loff_t off) 2931 size_t len, loff_t off)
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 97b970e7dd13..1c67139ad4b4 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -547,7 +547,7 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi,
547 * 547 *
548 * ext4_should_retry_alloc() is called when ENOSPC is returned, and if 548 * ext4_should_retry_alloc() is called when ENOSPC is returned, and if
549 * it is profitable to retry the operation, this function will wait 549 * it is profitable to retry the operation, this function will wait
550 * for the current or commiting transaction to complete, and then 550 * for the current or committing transaction to complete, and then
551 * return TRUE. 551 * return TRUE.
552 * 552 *
553 * if the total number of retries exceed three times, return FALSE. 553 * if the total number of retries exceed three times, return FALSE.
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index e25e99bf7ee1..d0f53538a57f 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -86,8 +86,8 @@
86 86
87#ifdef CONFIG_QUOTA 87#ifdef CONFIG_QUOTA
88/* Amount of blocks needed for quota update - we know that the structure was 88/* Amount of blocks needed for quota update - we know that the structure was
89 * allocated so we need to update only inode+data */ 89 * allocated so we need to update only data block */
90#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) 90#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0)
91/* Amount of blocks needed for quota insert/delete - we do some block writes 91/* Amount of blocks needed for quota insert/delete - we do some block writes
92 * but inode, sb and group updates are done only once */ 92 * but inode, sb and group updates are done only once */
93#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ 93#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index dd2cb5076ff9..4890d6f3ad15 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1729,7 +1729,7 @@ repeat:
1729 BUG_ON(npath->p_depth != path->p_depth); 1729 BUG_ON(npath->p_depth != path->p_depth);
1730 eh = npath[depth].p_hdr; 1730 eh = npath[depth].p_hdr;
1731 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { 1731 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
1732 ext_debug("next leaf isnt full(%d)\n", 1732 ext_debug("next leaf isn't full(%d)\n",
1733 le16_to_cpu(eh->eh_entries)); 1733 le16_to_cpu(eh->eh_entries));
1734 path = npath; 1734 path = npath;
1735 goto repeat; 1735 goto repeat;
@@ -2533,7 +2533,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
2533/* 2533/*
2534 * This function is called by ext4_ext_map_blocks() if someone tries to write 2534 * This function is called by ext4_ext_map_blocks() if someone tries to write
2535 * to an uninitialized extent. It may result in splitting the uninitialized 2535 * to an uninitialized extent. It may result in splitting the uninitialized
2536 * extent into multiple extents (upto three - one initialized and two 2536 * extent into multiple extents (up to three - one initialized and two
2537 * uninitialized). 2537 * uninitialized).
2538 * There are three possibilities: 2538 * There are three possibilities:
2539 * a> There is no split required: Entire extent should be initialized 2539 * a> There is no split required: Entire extent should be initialized
@@ -3174,7 +3174,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3174 path, flags); 3174 path, flags);
3175 /* 3175 /*
3176 * Flag the inode(non aio case) or end_io struct (aio case) 3176 * Flag the inode(non aio case) or end_io struct (aio case)
3177 * that this IO needs to convertion to written when IO is 3177 * that this IO needs to conversion to written when IO is
3178 * completed 3178 * completed
3179 */ 3179 */
3180 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { 3180 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
@@ -3460,10 +3460,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3460 ext4_ext_mark_uninitialized(&newex); 3460 ext4_ext_mark_uninitialized(&newex);
3461 /* 3461 /*
3462 * io_end structure was created for every IO write to an 3462 * io_end structure was created for every IO write to an
3463 * uninitialized extent. To avoid unecessary conversion, 3463 * uninitialized extent. To avoid unnecessary conversion,
3464 * here we flag the IO that really needs the conversion. 3464 * here we flag the IO that really needs the conversion.
3465 * For non asycn direct IO case, flag the inode state 3465 * For non asycn direct IO case, flag the inode state
3466 * that we need to perform convertion when IO is done. 3466 * that we need to perform conversion when IO is done.
3467 */ 3467 */
3468 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3468 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3469 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { 3469 if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 7f74019d6d77..e9473cbe80df 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -101,7 +101,7 @@ extern int ext4_flush_completed_IO(struct inode *inode)
101 * to the work-to-be schedule is freed. 101 * to the work-to-be schedule is freed.
102 * 102 *
103 * Thus we need to keep the io structure still valid here after 103 * Thus we need to keep the io structure still valid here after
104 * convertion finished. The io structure has a flag to 104 * conversion finished. The io structure has a flag to
105 * avoid double converting from both fsync and background work 105 * avoid double converting from both fsync and background work
106 * queue work. 106 * queue work.
107 */ 107 */
@@ -125,9 +125,11 @@ extern int ext4_flush_completed_IO(struct inode *inode)
125 * the parent directory's parent as well, and so on recursively, if 125 * the parent directory's parent as well, and so on recursively, if
126 * they are also freshly created. 126 * they are also freshly created.
127 */ 127 */
128static void ext4_sync_parent(struct inode *inode) 128static int ext4_sync_parent(struct inode *inode)
129{ 129{
130 struct writeback_control wbc;
130 struct dentry *dentry = NULL; 131 struct dentry *dentry = NULL;
132 int ret = 0;
131 133
132 while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { 134 while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
133 ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); 135 ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
@@ -136,8 +138,17 @@ static void ext4_sync_parent(struct inode *inode)
136 if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) 138 if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode)
137 break; 139 break;
138 inode = dentry->d_parent->d_inode; 140 inode = dentry->d_parent->d_inode;
139 sync_mapping_buffers(inode->i_mapping); 141 ret = sync_mapping_buffers(inode->i_mapping);
142 if (ret)
143 break;
144 memset(&wbc, 0, sizeof(wbc));
145 wbc.sync_mode = WB_SYNC_ALL;
146 wbc.nr_to_write = 0; /* only write out the inode */
147 ret = sync_inode(inode, &wbc);
148 if (ret)
149 break;
140 } 150 }
151 return ret;
141} 152}
142 153
143/* 154/*
@@ -176,7 +187,7 @@ int ext4_sync_file(struct file *file, int datasync)
176 if (!journal) { 187 if (!journal) {
177 ret = generic_file_fsync(file, datasync); 188 ret = generic_file_fsync(file, datasync);
178 if (!ret && !list_empty(&inode->i_dentry)) 189 if (!ret && !list_empty(&inode->i_dentry))
179 ext4_sync_parent(inode); 190 ret = ext4_sync_parent(inode);
180 goto out; 191 goto out;
181 } 192 }
182 193
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1a86282b9024..f2fa5e8a582c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2502,6 +2502,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2502 * for partial write. 2502 * for partial write.
2503 */ 2503 */
2504 set_buffer_new(bh); 2504 set_buffer_new(bh);
2505 set_buffer_mapped(bh);
2505 } 2506 }
2506 return 0; 2507 return 0;
2507} 2508}
@@ -2588,7 +2589,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
2588 * because we should have holes filled from ext4_page_mkwrite(). We even don't 2589 * because we should have holes filled from ext4_page_mkwrite(). We even don't
2589 * need to file the inode to the transaction's list in ordered mode because if 2590 * need to file the inode to the transaction's list in ordered mode because if
2590 * we are writing back data added by write(), the inode is already there and if 2591 * we are writing back data added by write(), the inode is already there and if
2591 * we are writing back data modified via mmap(), noone guarantees in which 2592 * we are writing back data modified via mmap(), no one guarantees in which
2592 * transaction the data will hit the disk. In case we are journaling data, we 2593 * transaction the data will hit the disk. In case we are journaling data, we
2593 * cannot start transaction directly because transaction start ranks above page 2594 * cannot start transaction directly because transaction start ranks above page
2594 * lock so we have to do some magic. 2595 * lock so we have to do some magic.
@@ -2690,7 +2691,7 @@ static int ext4_writepage(struct page *page,
2690 2691
2691/* 2692/*
2692 * This is called via ext4_da_writepages() to 2693 * This is called via ext4_da_writepages() to
2693 * calulate the total number of credits to reserve to fit 2694 * calculate the total number of credits to reserve to fit
2694 * a single extent allocation into a single transaction, 2695 * a single extent allocation into a single transaction,
2695 * ext4_da_writpeages() will loop calling this before 2696 * ext4_da_writpeages() will loop calling this before
2696 * the block allocation. 2697 * the block allocation.
@@ -3304,7 +3305,7 @@ int ext4_alloc_da_blocks(struct inode *inode)
3304 * the pages by calling redirty_page_for_writepage() but that 3305 * the pages by calling redirty_page_for_writepage() but that
3305 * would be ugly in the extreme. So instead we would need to 3306 * would be ugly in the extreme. So instead we would need to
3306 * replicate parts of the code in the above functions, 3307 * replicate parts of the code in the above functions,
3307 * simplifying them becuase we wouldn't actually intend to 3308 * simplifying them because we wouldn't actually intend to
3308 * write out the pages, but rather only collect contiguous 3309 * write out the pages, but rather only collect contiguous
3309 * logical block extents, call the multi-block allocator, and 3310 * logical block extents, call the multi-block allocator, and
3310 * then update the buffer heads with the block allocations. 3311 * then update the buffer heads with the block allocations.
@@ -3694,7 +3695,7 @@ retry:
3694 * 3695 *
3695 * The unwrritten extents will be converted to written when DIO is completed. 3696 * The unwrritten extents will be converted to written when DIO is completed.
3696 * For async direct IO, since the IO may still pending when return, we 3697 * For async direct IO, since the IO may still pending when return, we
3697 * set up an end_io call back function, which will do the convertion 3698 * set up an end_io call back function, which will do the conversion
3698 * when async direct IO completed. 3699 * when async direct IO completed.
3699 * 3700 *
3700 * If the O_DIRECT write will extend the file then add this inode to the 3701 * If the O_DIRECT write will extend the file then add this inode to the
@@ -3717,7 +3718,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3717 * We could direct write to holes and fallocate. 3718 * We could direct write to holes and fallocate.
3718 * 3719 *
3719 * Allocated blocks to fill the hole are marked as uninitialized 3720 * Allocated blocks to fill the hole are marked as uninitialized
3720 * to prevent paralel buffered read to expose the stale data 3721 * to prevent parallel buffered read to expose the stale data
3721 * before DIO complete the data IO. 3722 * before DIO complete the data IO.
3722 * 3723 *
3723 * As to previously fallocated extents, ext4 get_block 3724 * As to previously fallocated extents, ext4 get_block
@@ -3778,7 +3779,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3778 int err; 3779 int err;
3779 /* 3780 /*
3780 * for non AIO case, since the IO is already 3781 * for non AIO case, since the IO is already
3781 * completed, we could do the convertion right here 3782 * completed, we could do the conversion right here
3782 */ 3783 */
3783 err = ext4_convert_unwritten_extents(inode, 3784 err = ext4_convert_unwritten_extents(inode,
3784 offset, ret); 3785 offset, ret);
@@ -4025,7 +4026,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
4025 * 4026 *
4026 * When we do truncate() we may have to clean the ends of several 4027 * When we do truncate() we may have to clean the ends of several
4027 * indirect blocks but leave the blocks themselves alive. Block is 4028 * indirect blocks but leave the blocks themselves alive. Block is
4028 * partially truncated if some data below the new i_size is refered 4029 * partially truncated if some data below the new i_size is referred
4029 * from it (and it is on the path to the first completely truncated 4030 * from it (and it is on the path to the first completely truncated
4030 * data block, indeed). We have to free the top of that path along 4031 * data block, indeed). We have to free the top of that path along
4031 * with everything to the right of the path. Since no allocation 4032 * with everything to the right of the path. Since no allocation
@@ -4169,7 +4170,7 @@ out_err:
4169 * @first: array of block numbers 4170 * @first: array of block numbers
4170 * @last: points immediately past the end of array 4171 * @last: points immediately past the end of array
4171 * 4172 *
4172 * We are freeing all blocks refered from that array (numbers are stored as 4173 * We are freeing all blocks referred from that array (numbers are stored as
4173 * little-endian 32-bit) and updating @inode->i_blocks appropriately. 4174 * little-endian 32-bit) and updating @inode->i_blocks appropriately.
4174 * 4175 *
4175 * We accumulate contiguous runs of blocks to free. Conveniently, if these 4176 * We accumulate contiguous runs of blocks to free. Conveniently, if these
@@ -4261,7 +4262,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
4261 * @last: pointer immediately past the end of array 4262 * @last: pointer immediately past the end of array
4262 * @depth: depth of the branches to free 4263 * @depth: depth of the branches to free
4263 * 4264 *
4264 * We are freeing all blocks refered from these branches (numbers are 4265 * We are freeing all blocks referred from these branches (numbers are
4265 * stored as little-endian 32-bit) and updating @inode->i_blocks 4266 * stored as little-endian 32-bit) and updating @inode->i_blocks
4266 * appropriately. 4267 * appropriately.
4267 */ 4268 */
@@ -4429,8 +4430,8 @@ void ext4_truncate(struct inode *inode)
4429 Indirect chain[4]; 4430 Indirect chain[4];
4430 Indirect *partial; 4431 Indirect *partial;
4431 __le32 nr = 0; 4432 __le32 nr = 0;
4432 int n; 4433 int n = 0;
4433 ext4_lblk_t last_block; 4434 ext4_lblk_t last_block, max_block;
4434 unsigned blocksize = inode->i_sb->s_blocksize; 4435 unsigned blocksize = inode->i_sb->s_blocksize;
4435 4436
4436 trace_ext4_truncate_enter(inode); 4437 trace_ext4_truncate_enter(inode);
@@ -4455,14 +4456,18 @@ void ext4_truncate(struct inode *inode)
4455 4456
4456 last_block = (inode->i_size + blocksize-1) 4457 last_block = (inode->i_size + blocksize-1)
4457 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); 4458 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
4459 max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
4460 >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
4458 4461
4459 if (inode->i_size & (blocksize - 1)) 4462 if (inode->i_size & (blocksize - 1))
4460 if (ext4_block_truncate_page(handle, mapping, inode->i_size)) 4463 if (ext4_block_truncate_page(handle, mapping, inode->i_size))
4461 goto out_stop; 4464 goto out_stop;
4462 4465
4463 n = ext4_block_to_path(inode, last_block, offsets, NULL); 4466 if (last_block != max_block) {
4464 if (n == 0) 4467 n = ext4_block_to_path(inode, last_block, offsets, NULL);
4465 goto out_stop; /* error */ 4468 if (n == 0)
4469 goto out_stop; /* error */
4470 }
4466 4471
4467 /* 4472 /*
4468 * OK. This truncate is going to happen. We add the inode to the 4473 * OK. This truncate is going to happen. We add the inode to the
@@ -4493,7 +4498,13 @@ void ext4_truncate(struct inode *inode)
4493 */ 4498 */
4494 ei->i_disksize = inode->i_size; 4499 ei->i_disksize = inode->i_size;
4495 4500
4496 if (n == 1) { /* direct blocks */ 4501 if (last_block == max_block) {
4502 /*
4503 * It is unnecessary to free any data blocks if last_block is
4504 * equal to the indirect block limit.
4505 */
4506 goto out_unlock;
4507 } else if (n == 1) { /* direct blocks */
4497 ext4_free_data(handle, inode, NULL, i_data+offsets[0], 4508 ext4_free_data(handle, inode, NULL, i_data+offsets[0],
4498 i_data + EXT4_NDIR_BLOCKS); 4509 i_data + EXT4_NDIR_BLOCKS);
4499 goto do_indirects; 4510 goto do_indirects;
@@ -4553,6 +4564,7 @@ do_indirects:
4553 ; 4564 ;
4554 } 4565 }
4555 4566
4567out_unlock:
4556 up_write(&ei->i_data_sem); 4568 up_write(&ei->i_data_sem);
4557 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 4569 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4558 ext4_mark_inode_dirty(handle, inode); 4570 ext4_mark_inode_dirty(handle, inode);
@@ -5398,13 +5410,12 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
5398 /* if nrblocks are contiguous */ 5410 /* if nrblocks are contiguous */
5399 if (chunk) { 5411 if (chunk) {
5400 /* 5412 /*
5401 * With N contiguous data blocks, it need at most 5413 * With N contiguous data blocks, we need at most
5402 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks 5414 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks,
5403 * 2 dindirect blocks 5415 * 2 dindirect blocks, and 1 tindirect block
5404 * 1 tindirect block
5405 */ 5416 */
5406 indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); 5417 return DIV_ROUND_UP(nrblocks,
5407 return indirects + 3; 5418 EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
5408 } 5419 }
5409 /* 5420 /*
5410 * if nrblocks are not contiguous, worse case, each block touch 5421 * if nrblocks are not contiguous, worse case, each block touch
@@ -5478,7 +5489,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
5478} 5489}
5479 5490
5480/* 5491/*
5481 * Calulate the total number of credits to reserve to fit 5492 * Calculate the total number of credits to reserve to fit
5482 * the modification of a single pages into a single transaction, 5493 * the modification of a single pages into a single transaction,
5483 * which may include multiple chunks of block allocations. 5494 * which may include multiple chunks of block allocations.
5484 * 5495 *
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a5837a837a8b..d8a16eecf1d5 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -92,7 +92,7 @@
92 * between CPUs. It is possible to get scheduled at this point. 92 * between CPUs. It is possible to get scheduled at this point.
93 * 93 *
94 * The locality group prealloc space is used looking at whether we have 94 * The locality group prealloc space is used looking at whether we have
95 * enough free space (pa_free) withing the prealloc space. 95 * enough free space (pa_free) within the prealloc space.
96 * 96 *
97 * If we can't allocate blocks via inode prealloc or/and locality group 97 * If we can't allocate blocks via inode prealloc or/and locality group
98 * prealloc then we look at the buddy cache. The buddy cache is represented 98 * prealloc then we look at the buddy cache. The buddy cache is represented
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index d1bafa57f483..92816b4e0f16 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -517,7 +517,7 @@ int ext4_ext_migrate(struct inode *inode)
517 * start with one credit accounted for 517 * start with one credit accounted for
518 * superblock modification. 518 * superblock modification.
519 * 519 *
520 * For the tmp_inode we already have commited the 520 * For the tmp_inode we already have committed the
521 * trascation that created the inode. Later as and 521 * trascation that created the inode. Later as and
522 * when we add extents we extent the journal 522 * when we add extents we extent the journal
523 */ 523 */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 22546ad7f0ae..8553dfb310af 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -242,27 +242,44 @@ static void ext4_put_nojournal(handle_t *handle)
242 * journal_end calls result in the superblock being marked dirty, so 242 * journal_end calls result in the superblock being marked dirty, so
243 * that sync() will call the filesystem's write_super callback if 243 * that sync() will call the filesystem's write_super callback if
244 * appropriate. 244 * appropriate.
245 *
246 * To avoid j_barrier hold in userspace when a user calls freeze(),
247 * ext4 prevents a new handle from being started by s_frozen, which
248 * is in an upper layer.
245 */ 249 */
246handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 250handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
247{ 251{
248 journal_t *journal; 252 journal_t *journal;
253 handle_t *handle;
249 254
250 if (sb->s_flags & MS_RDONLY) 255 if (sb->s_flags & MS_RDONLY)
251 return ERR_PTR(-EROFS); 256 return ERR_PTR(-EROFS);
252 257
253 vfs_check_frozen(sb, SB_FREEZE_TRANS);
254 /* Special case here: if the journal has aborted behind our
255 * backs (eg. EIO in the commit thread), then we still need to
256 * take the FS itself readonly cleanly. */
257 journal = EXT4_SB(sb)->s_journal; 258 journal = EXT4_SB(sb)->s_journal;
258 if (journal) { 259 handle = ext4_journal_current_handle();
259 if (is_journal_aborted(journal)) { 260
260 ext4_abort(sb, "Detected aborted journal"); 261 /*
261 return ERR_PTR(-EROFS); 262 * If a handle has been started, it should be allowed to
262 } 263 * finish, otherwise deadlock could happen between freeze
263 return jbd2_journal_start(journal, nblocks); 264 * and others(e.g. truncate) due to the restart of the
265 * journal handle if the filesystem is forzen and active
266 * handles are not stopped.
267 */
268 if (!handle)
269 vfs_check_frozen(sb, SB_FREEZE_TRANS);
270
271 if (!journal)
272 return ext4_get_nojournal();
273 /*
274 * Special case here: if the journal has aborted behind our
275 * backs (eg. EIO in the commit thread), then we still need to
276 * take the FS itself readonly cleanly.
277 */
278 if (is_journal_aborted(journal)) {
279 ext4_abort(sb, "Detected aborted journal");
280 return ERR_PTR(-EROFS);
264 } 281 }
265 return ext4_get_nojournal(); 282 return jbd2_journal_start(journal, nblocks);
266} 283}
267 284
268/* 285/*
@@ -617,7 +634,7 @@ __acquires(bitlock)
617 * filesystem will have already been marked read/only and the 634 * filesystem will have already been marked read/only and the
618 * journal has been aborted. We return 1 as a hint to callers 635 * journal has been aborted. We return 1 as a hint to callers
619 * who might what to use the return value from 636 * who might what to use the return value from
620 * ext4_grp_locked_error() to distinguish beween the 637 * ext4_grp_locked_error() to distinguish between the
621 * ERRORS_CONT and ERRORS_RO case, and perhaps return more 638 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
622 * aggressively from the ext4 function in question, with a 639 * aggressively from the ext4 function in question, with a
623 * more appropriate error code. 640 * more appropriate error code.
@@ -2975,6 +2992,12 @@ static int ext4_register_li_request(struct super_block *sb,
2975 mutex_unlock(&ext4_li_info->li_list_mtx); 2992 mutex_unlock(&ext4_li_info->li_list_mtx);
2976 2993
2977 sbi->s_li_request = elr; 2994 sbi->s_li_request = elr;
2995 /*
2996 * set elr to NULL here since it has been inserted to
2997 * the request_list and the removal and free of it is
2998 * handled by ext4_clear_request_list from now on.
2999 */
3000 elr = NULL;
2978 3001
2979 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { 3002 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
2980 ret = ext4_run_lazyinit_thread(); 3003 ret = ext4_run_lazyinit_thread();
@@ -3385,6 +3408,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3385 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 3408 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
3386 spin_lock_init(&sbi->s_next_gen_lock); 3409 spin_lock_init(&sbi->s_next_gen_lock);
3387 3410
3411 init_timer(&sbi->s_err_report);
3412 sbi->s_err_report.function = print_daily_error_info;
3413 sbi->s_err_report.data = (unsigned long) sb;
3414
3388 err = percpu_counter_init(&sbi->s_freeblocks_counter, 3415 err = percpu_counter_init(&sbi->s_freeblocks_counter,
3389 ext4_count_free_blocks(sb)); 3416 ext4_count_free_blocks(sb));
3390 if (!err) { 3417 if (!err) {
@@ -3646,9 +3673,6 @@ no_journal:
3646 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, 3673 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
3647 *sbi->s_es->s_mount_opts ? "; " : "", orig_data); 3674 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
3648 3675
3649 init_timer(&sbi->s_err_report);
3650 sbi->s_err_report.function = print_daily_error_info;
3651 sbi->s_err_report.data = (unsigned long) sb;
3652 if (es->s_error_count) 3676 if (es->s_error_count)
3653 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ 3677 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
3654 3678
@@ -3672,6 +3696,7 @@ failed_mount_wq:
3672 sbi->s_journal = NULL; 3696 sbi->s_journal = NULL;
3673 } 3697 }
3674failed_mount3: 3698failed_mount3:
3699 del_timer(&sbi->s_err_report);
3675 if (sbi->s_flex_groups) { 3700 if (sbi->s_flex_groups) {
3676 if (is_vmalloc_addr(sbi->s_flex_groups)) 3701 if (is_vmalloc_addr(sbi->s_flex_groups))
3677 vfree(sbi->s_flex_groups); 3702 vfree(sbi->s_flex_groups);
@@ -4138,6 +4163,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
4138/* 4163/*
4139 * LVM calls this function before a (read-only) snapshot is created. This 4164 * LVM calls this function before a (read-only) snapshot is created. This
4140 * gives us a chance to flush the journal completely and mark the fs clean. 4165 * gives us a chance to flush the journal completely and mark the fs clean.
4166 *
4167 * Note that only this function cannot bring a filesystem to be in a clean
4168 * state independently, because ext4 prevents a new handle from being started
4169 * by @sb->s_frozen, which stays in an upper layer. It thus needs help from
4170 * the upper layer.
4141 */ 4171 */
4142static int ext4_freeze(struct super_block *sb) 4172static int ext4_freeze(struct super_block *sb)
4143{ 4173{
@@ -4614,17 +4644,30 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
4614 4644
4615static int ext4_quota_off(struct super_block *sb, int type) 4645static int ext4_quota_off(struct super_block *sb, int type)
4616{ 4646{
4647 struct inode *inode = sb_dqopt(sb)->files[type];
4648 handle_t *handle;
4649
4617 /* Force all delayed allocation blocks to be allocated. 4650 /* Force all delayed allocation blocks to be allocated.
4618 * Caller already holds s_umount sem */ 4651 * Caller already holds s_umount sem */
4619 if (test_opt(sb, DELALLOC)) 4652 if (test_opt(sb, DELALLOC))
4620 sync_filesystem(sb); 4653 sync_filesystem(sb);
4621 4654
4655 /* Update modification times of quota files when userspace can
4656 * start looking at them */
4657 handle = ext4_journal_start(inode, 1);
4658 if (IS_ERR(handle))
4659 goto out;
4660 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
4661 ext4_mark_inode_dirty(handle, inode);
4662 ext4_journal_stop(handle);
4663
4664out:
4622 return dquot_quota_off(sb, type); 4665 return dquot_quota_off(sb, type);
4623} 4666}
4624 4667
4625/* Read data from quotafile - avoid pagecache and such because we cannot afford 4668/* Read data from quotafile - avoid pagecache and such because we cannot afford
4626 * acquiring the locks... As quota files are never truncated and quota code 4669 * acquiring the locks... As quota files are never truncated and quota code
4627 * itself serializes the operations (and noone else should touch the files) 4670 * itself serializes the operations (and no one else should touch the files)
4628 * we don't have to be afraid of races */ 4671 * we don't have to be afraid of races */
4629static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, 4672static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
4630 size_t len, loff_t off) 4673 size_t len, loff_t off)
@@ -4714,9 +4757,8 @@ out:
4714 if (inode->i_size < off + len) { 4757 if (inode->i_size < off + len) {
4715 i_size_write(inode, off + len); 4758 i_size_write(inode, off + len);
4716 EXT4_I(inode)->i_disksize = inode->i_size; 4759 EXT4_I(inode)->i_disksize = inode->i_size;
4760 ext4_mark_inode_dirty(handle, inode);
4717 } 4761 }
4718 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
4719 ext4_mark_inode_dirty(handle, inode);
4720 mutex_unlock(&inode->i_mutex); 4762 mutex_unlock(&inode->i_mutex);
4721 return len; 4763 return len;
4722} 4764}
diff --git a/fs/fhandle.c b/fs/fhandle.c
index bf93ad2bee07..6b088641f5bf 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -7,6 +7,7 @@
7#include <linux/exportfs.h> 7#include <linux/exportfs.h>
8#include <linux/fs_struct.h> 8#include <linux/fs_struct.h>
9#include <linux/fsnotify.h> 9#include <linux/fsnotify.h>
10#include <linux/personality.h>
10#include <asm/uaccess.h> 11#include <asm/uaccess.h>
11#include "internal.h" 12#include "internal.h"
12 13
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 751d6b255a12..0845f84f2a5f 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -110,14 +110,13 @@ int unregister_filesystem(struct file_system_type * fs)
110 *tmp = fs->next; 110 *tmp = fs->next;
111 fs->next = NULL; 111 fs->next = NULL;
112 write_unlock(&file_systems_lock); 112 write_unlock(&file_systems_lock);
113 synchronize_rcu();
113 return 0; 114 return 0;
114 } 115 }
115 tmp = &(*tmp)->next; 116 tmp = &(*tmp)->next;
116 } 117 }
117 write_unlock(&file_systems_lock); 118 write_unlock(&file_systems_lock);
118 119
119 synchronize_rcu();
120
121 return -EINVAL; 120 return -EINVAL;
122} 121}
123 122
diff --git a/fs/freevxfs/vxfs_fshead.c b/fs/freevxfs/vxfs_fshead.c
index 78948b4b1894..c9a6a94e58e9 100644
--- a/fs/freevxfs/vxfs_fshead.c
+++ b/fs/freevxfs/vxfs_fshead.c
@@ -164,7 +164,7 @@ vxfs_read_fshead(struct super_block *sbp)
164 goto out_free_pfp; 164 goto out_free_pfp;
165 } 165 }
166 if (!VXFS_ISILT(VXFS_INO(infp->vsi_stilist))) { 166 if (!VXFS_ISILT(VXFS_INO(infp->vsi_stilist))) {
167 printk(KERN_ERR "vxfs: structual list inode is of wrong type (%x)\n", 167 printk(KERN_ERR "vxfs: structural list inode is of wrong type (%x)\n",
168 VXFS_INO(infp->vsi_stilist)->vii_mode & VXFS_TYPE_MASK); 168 VXFS_INO(infp->vsi_stilist)->vii_mode & VXFS_TYPE_MASK);
169 goto out_iput_stilist; 169 goto out_iput_stilist;
170 } 170 }
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index 6c5131d592f0..3360f1e678ad 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -162,7 +162,7 @@ vxfs_find_entry(struct inode *ip, struct dentry *dp, struct page **ppp)
162/** 162/**
163 * vxfs_inode_by_name - find inode number for dentry 163 * vxfs_inode_by_name - find inode number for dentry
164 * @dip: directory to search in 164 * @dip: directory to search in
165 * @dp: dentry we seach for 165 * @dp: dentry we search for
166 * 166 *
167 * Description: 167 * Description:
168 * vxfs_inode_by_name finds out the inode number of 168 * vxfs_inode_by_name finds out the inode number of
diff --git a/fs/freevxfs/vxfs_olt.h b/fs/freevxfs/vxfs_olt.h
index d8324296486f..b7b3af502615 100644
--- a/fs/freevxfs/vxfs_olt.h
+++ b/fs/freevxfs/vxfs_olt.h
@@ -60,7 +60,7 @@ enum {
60 * 60 *
61 * The Object Location Table header is placed at the beginning of each 61 * The Object Location Table header is placed at the beginning of each
62 * OLT extent. It is used to fing certain filesystem-wide metadata, e.g. 62 * OLT extent. It is used to fing certain filesystem-wide metadata, e.g.
63 * the inital inode list, the fileset header or the device configuration. 63 * the initial inode list, the fileset header or the device configuration.
64 */ 64 */
65struct vxfs_olt { 65struct vxfs_olt {
66 u_int32_t olt_magic; /* magic number */ 66 u_int32_t olt_magic; /* magic number */
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index b5ed541fb137..34591ee804b5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -144,7 +144,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
144 * 144 *
145 * Description: 145 * Description:
146 * This does WB_SYNC_NONE opportunistic writeback. The IO is only 146 * This does WB_SYNC_NONE opportunistic writeback. The IO is only
147 * started when this function returns, we make no guarentees on 147 * started when this function returns, we make no guarantees on
148 * completion. Caller need not hold sb s_umount semaphore. 148 * completion. Caller need not hold sb s_umount semaphore.
149 * 149 *
150 */ 150 */
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 6ea00734984e..82a66466a24c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -523,7 +523,7 @@ static int fuse_readpage(struct file *file, struct page *page)
523 goto out; 523 goto out;
524 524
525 /* 525 /*
526 * Page writeback can extend beyond the liftime of the 526 * Page writeback can extend beyond the lifetime of the
527 * page-cache page, so make sure we read a properly synced 527 * page-cache page, so make sure we read a properly synced
528 * page. 528 * page.
529 */ 529 */
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index ef3dc4b9fae2..74add2ddcc3f 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1136,7 +1136,7 @@ void gfs2_trim_blocks(struct inode *inode)
1136 * earlier versions of GFS2 have a bug in the stuffed file reading 1136 * earlier versions of GFS2 have a bug in the stuffed file reading
1137 * code which will result in a buffer overrun if the size is larger 1137 * code which will result in a buffer overrun if the size is larger
1138 * than the max stuffed file size. In order to prevent this from 1138 * than the max stuffed file size. In order to prevent this from
1139 * occuring, such files are unstuffed, but in other cases we can 1139 * occurring, such files are unstuffed, but in other cases we can
1140 * just update the inode size directly. 1140 * just update the inode size directly.
1141 * 1141 *
1142 * Returns: 0 on success, or -ve on error 1142 * Returns: 0 on success, or -ve on error
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index e2431313491f..f07643e21bfa 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1123,7 +1123,7 @@ void gfs2_glock_dq_uninit(struct gfs2_holder *gh)
1123 * @number: the lock number 1123 * @number: the lock number
1124 * @glops: the glock operations for the type of glock 1124 * @glops: the glock operations for the type of glock
1125 * @state: the state to acquire the glock in 1125 * @state: the state to acquire the glock in
1126 * @flags: modifier flags for the aquisition 1126 * @flags: modifier flags for the acquisition
1127 * @gh: the struct gfs2_holder 1127 * @gh: the struct gfs2_holder
1128 * 1128 *
1129 * Returns: errno 1129 * Returns: errno
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index ec73ed70bae1..a4e23d68a398 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -657,7 +657,7 @@ out:
657 * @sdp: the file system 657 * @sdp: the file system
658 * 658 *
659 * This function flushes data and meta data for all machines by 659 * This function flushes data and meta data for all machines by
660 * aquiring the transaction log exclusively. All journals are 660 * acquiring the transaction log exclusively. All journals are
661 * ensured to be in a clean state as well. 661 * ensured to be in a clean state as well.
662 * 662 *
663 * Returns: errno 663 * Returns: errno
diff --git a/fs/inode.c b/fs/inode.c
index 5f4e11aaeb5c..33c963d08ab4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -125,6 +125,14 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
125static DECLARE_RWSEM(iprune_sem); 125static DECLARE_RWSEM(iprune_sem);
126 126
127/* 127/*
128 * Empty aops. Can be used for the cases where the user does not
129 * define any of the address_space operations.
130 */
131const struct address_space_operations empty_aops = {
132};
133EXPORT_SYMBOL(empty_aops);
134
135/*
128 * Statistics gathering.. 136 * Statistics gathering..
129 */ 137 */
130struct inodes_stat_t inodes_stat; 138struct inodes_stat_t inodes_stat;
@@ -176,7 +184,6 @@ int proc_nr_inodes(ctl_table *table, int write,
176 */ 184 */
177int inode_init_always(struct super_block *sb, struct inode *inode) 185int inode_init_always(struct super_block *sb, struct inode *inode)
178{ 186{
179 static const struct address_space_operations empty_aops;
180 static const struct inode_operations empty_iops; 187 static const struct inode_operations empty_iops;
181 static const struct file_operations empty_fops; 188 static const struct file_operations empty_fops;
182 struct address_space *const mapping = &inode->i_data; 189 struct address_space *const mapping = &inode->i_data;
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index da871ee084d3..69b180459463 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -362,7 +362,7 @@ void journal_commit_transaction(journal_t *journal)
362 * we do not require it to remember exactly which old buffers it 362 * we do not require it to remember exactly which old buffers it
363 * has reserved. This is consistent with the existing behaviour 363 * has reserved. This is consistent with the existing behaviour
364 * that multiple journal_get_write_access() calls to the same 364 * that multiple journal_get_write_access() calls to the same
365 * buffer are perfectly permissable. 365 * buffer are perfectly permissible.
366 */ 366 */
367 while (commit_transaction->t_reserved_list) { 367 while (commit_transaction->t_reserved_list) {
368 jh = commit_transaction->t_reserved_list; 368 jh = commit_transaction->t_reserved_list;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index eb11601f2e00..b3713afaaa9e 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -770,7 +770,7 @@ journal_t * journal_init_dev(struct block_device *bdev,
770 journal->j_wbufsize = n; 770 journal->j_wbufsize = n;
771 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 771 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
772 if (!journal->j_wbuf) { 772 if (!journal->j_wbuf) {
773 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 773 printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
774 __func__); 774 __func__);
775 goto out_err; 775 goto out_err;
776 } 776 }
@@ -831,7 +831,7 @@ journal_t * journal_init_inode (struct inode *inode)
831 journal->j_wbufsize = n; 831 journal->j_wbufsize = n;
832 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 832 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
833 if (!journal->j_wbuf) { 833 if (!journal->j_wbuf) {
834 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 834 printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
835 __func__); 835 __func__);
836 goto out_err; 836 goto out_err;
837 } 837 }
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index d29018307e2e..305a90763154 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -71,7 +71,7 @@
71 * switching hash tables under them. For operations on the lists of entries in 71 * switching hash tables under them. For operations on the lists of entries in
72 * the hash table j_revoke_lock is used. 72 * the hash table j_revoke_lock is used.
73 * 73 *
74 * Finally, also replay code uses the hash tables but at this moment noone else 74 * Finally, also replay code uses the hash tables but at this moment no one else
75 * can touch them (filesystem isn't mounted yet) and hence no locking is 75 * can touch them (filesystem isn't mounted yet) and hence no locking is
76 * needed. 76 * needed.
77 */ 77 */
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 5b2e4c30a2a1..60d2319651b2 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1392,7 +1392,7 @@ int journal_stop(handle_t *handle)
1392 * by 30x or more... 1392 * by 30x or more...
1393 * 1393 *
1394 * We try and optimize the sleep time against what the underlying disk 1394 * We try and optimize the sleep time against what the underlying disk
1395 * can do, instead of having a static sleep time. This is usefull for 1395 * can do, instead of having a static sleep time. This is useful for
1396 * the case where our storage is so fast that it is more optimal to go 1396 * the case where our storage is so fast that it is more optimal to go
1397 * ahead and force a flush and wait for the transaction to be committed 1397 * ahead and force a flush and wait for the transaction to be committed
1398 * than it is to wait for an arbitrary amount of time for new writers to 1398 * than it is to wait for an arbitrary amount of time for new writers to
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index fa36d7662b21..6e28000a4b21 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -105,6 +105,8 @@ static int journal_submit_commit_record(journal_t *journal,
105 int ret; 105 int ret;
106 struct timespec now = current_kernel_time(); 106 struct timespec now = current_kernel_time();
107 107
108 *cbh = NULL;
109
108 if (is_journal_aborted(journal)) 110 if (is_journal_aborted(journal))
109 return 0; 111 return 0;
110 112
@@ -403,7 +405,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
403 * we do not require it to remember exactly which old buffers it 405 * we do not require it to remember exactly which old buffers it
404 * has reserved. This is consistent with the existing behaviour 406 * has reserved. This is consistent with the existing behaviour
405 * that multiple jbd2_journal_get_write_access() calls to the same 407 * that multiple jbd2_journal_get_write_access() calls to the same
406 * buffer are perfectly permissable. 408 * buffer are perfectly permissible.
407 */ 409 */
408 while (commit_transaction->t_reserved_list) { 410 while (commit_transaction->t_reserved_list) {
409 jh = commit_transaction->t_reserved_list; 411 jh = commit_transaction->t_reserved_list;
@@ -806,7 +808,7 @@ wait_for_iobuf:
806 if (err) 808 if (err)
807 __jbd2_journal_abort_hard(journal); 809 __jbd2_journal_abort_hard(journal);
808 } 810 }
809 if (!err && !is_journal_aborted(journal)) 811 if (cbh)
810 err = journal_wait_on_commit_record(journal, cbh); 812 err = journal_wait_on_commit_record(journal, cbh);
811 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 813 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
812 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) && 814 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) &&
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 90407b8fece7..e0ec3db1c395 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -917,7 +917,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
917 journal->j_wbufsize = n; 917 journal->j_wbufsize = n;
918 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 918 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
919 if (!journal->j_wbuf) { 919 if (!journal->j_wbuf) {
920 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 920 printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
921 __func__); 921 __func__);
922 goto out_err; 922 goto out_err;
923 } 923 }
@@ -983,7 +983,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
983 journal->j_wbufsize = n; 983 journal->j_wbufsize = n;
984 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); 984 journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
985 if (!journal->j_wbuf) { 985 if (!journal->j_wbuf) {
986 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 986 printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n",
987 __func__); 987 __func__);
988 goto out_err; 988 goto out_err;
989 } 989 }
@@ -2413,10 +2413,12 @@ const char *jbd2_dev_to_name(dev_t device)
2413 new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); 2413 new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
2414 if (!new_dev) 2414 if (!new_dev)
2415 return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ 2415 return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
2416 bd = bdget(device);
2416 spin_lock(&devname_cache_lock); 2417 spin_lock(&devname_cache_lock);
2417 if (devcache[i]) { 2418 if (devcache[i]) {
2418 if (devcache[i]->device == device) { 2419 if (devcache[i]->device == device) {
2419 kfree(new_dev); 2420 kfree(new_dev);
2421 bdput(bd);
2420 ret = devcache[i]->devname; 2422 ret = devcache[i]->devname;
2421 spin_unlock(&devname_cache_lock); 2423 spin_unlock(&devname_cache_lock);
2422 return ret; 2424 return ret;
@@ -2425,7 +2427,6 @@ const char *jbd2_dev_to_name(dev_t device)
2425 } 2427 }
2426 devcache[i] = new_dev; 2428 devcache[i] = new_dev;
2427 devcache[i]->device = device; 2429 devcache[i]->device = device;
2428 bd = bdget(device);
2429 if (bd) { 2430 if (bd) {
2430 bdevname(bd, devcache[i]->devname); 2431 bdevname(bd, devcache[i]->devname);
2431 bdput(bd); 2432 bdput(bd);
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 9ad321fd63fd..69fd93588118 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -71,7 +71,7 @@
71 * switching hash tables under them. For operations on the lists of entries in 71 * switching hash tables under them. For operations on the lists of entries in
72 * the hash table j_revoke_lock is used. 72 * the hash table j_revoke_lock is used.
73 * 73 *
74 * Finally, also replay code uses the hash tables but at this moment noone else 74 * Finally, also replay code uses the hash tables but at this moment no one else
75 * can touch them (filesystem isn't mounted yet) and hence no locking is 75 * can touch them (filesystem isn't mounted yet) and hence no locking is
76 * needed. 76 * needed.
77 */ 77 */
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 1d1191050f99..05fa77a23711 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1403,7 +1403,7 @@ int jbd2_journal_stop(handle_t *handle)
1403 1403
1404 /* 1404 /*
1405 * Once we drop t_updates, if it goes to zero the transaction 1405 * Once we drop t_updates, if it goes to zero the transaction
1406 * could start commiting on us and eventually disappear. So 1406 * could start committing on us and eventually disappear. So
1407 * once we do this, we must not dereference transaction 1407 * once we do this, we must not dereference transaction
1408 * pointer again. 1408 * pointer again.
1409 */ 1409 */
diff --git a/fs/jffs2/TODO b/fs/jffs2/TODO
index 5d3ea4070f01..ca28964abd4b 100644
--- a/fs/jffs2/TODO
+++ b/fs/jffs2/TODO
@@ -11,7 +11,7 @@
11 - checkpointing (do we need this? scan is quite fast) 11 - checkpointing (do we need this? scan is quite fast)
12 - make the scan code populate real inodes so read_inode just after 12 - make the scan code populate real inodes so read_inode just after
13 mount doesn't have to read the flash twice for large files. 13 mount doesn't have to read the flash twice for large files.
14 Make this a per-inode option, changable with chattr, so you can 14 Make this a per-inode option, changeable with chattr, so you can
15 decide which inodes should be in-core immediately after mount. 15 decide which inodes should be in-core immediately after mount.
16 - test, test, test 16 - test, test, test
17 17
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index d32ee9412cb9..2ab1a0d91210 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -24,7 +24,7 @@
24 * 24 *
25 * Returns: 0 if the data CRC is correct; 25 * Returns: 0 if the data CRC is correct;
26 * 1 - if incorrect; 26 * 1 - if incorrect;
27 * error code if an error occured. 27 * error code if an error occurred.
28 */ 28 */
29static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn) 29static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn)
30{ 30{
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index 800171dca53b..e537fb0e0184 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -121,7 +121,7 @@ int jffs2_sum_add_inode_mem(struct jffs2_summary *s, struct jffs2_raw_inode *ri,
121 temp->nodetype = ri->nodetype; 121 temp->nodetype = ri->nodetype;
122 temp->inode = ri->ino; 122 temp->inode = ri->ino;
123 temp->version = ri->version; 123 temp->version = ri->version;
124 temp->offset = cpu_to_je32(ofs); /* relative offset from the begining of the jeb */ 124 temp->offset = cpu_to_je32(ofs); /* relative offset from the beginning of the jeb */
125 temp->totlen = ri->totlen; 125 temp->totlen = ri->totlen;
126 temp->next = NULL; 126 temp->next = NULL;
127 127
@@ -139,7 +139,7 @@ int jffs2_sum_add_dirent_mem(struct jffs2_summary *s, struct jffs2_raw_dirent *r
139 139
140 temp->nodetype = rd->nodetype; 140 temp->nodetype = rd->nodetype;
141 temp->totlen = rd->totlen; 141 temp->totlen = rd->totlen;
142 temp->offset = cpu_to_je32(ofs); /* relative from the begining of the jeb */ 142 temp->offset = cpu_to_je32(ofs); /* relative from the beginning of the jeb */
143 temp->pino = rd->pino; 143 temp->pino = rd->pino;
144 temp->version = rd->version; 144 temp->version = rd->version;
145 temp->ino = rd->ino; 145 temp->ino = rd->ino;
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 07ee1546b2fa..4515bea0268f 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1116,7 +1116,7 @@ int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c,
1116 1116
1117/* 1117/*
1118 * On NAND we try to mark this block bad. If the block was erased more 1118 * On NAND we try to mark this block bad. If the block was erased more
1119 * than MAX_ERASE_FAILURES we mark it finaly bad. 1119 * than MAX_ERASE_FAILURES we mark it finally bad.
1120 * Don't care about failures. This block remains on the erase-pending 1120 * Don't care about failures. This block remains on the erase-pending
1121 * or badblock list as long as nobody manipulates the flash with 1121 * or badblock list as long as nobody manipulates the flash with
1122 * a bootloader or something like that. 1122 * a bootloader or something like that.
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index c92ea3b3ea5e..4496872cf4e7 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -1649,7 +1649,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
1649 } 1649 }
1650 1650
1651 /* search the tree within the dmap control page for 1651 /* search the tree within the dmap control page for
1652 * sufficent free space. if sufficient free space is found, 1652 * sufficient free space. if sufficient free space is found,
1653 * dbFindLeaf() returns the index of the leaf at which 1653 * dbFindLeaf() returns the index of the leaf at which
1654 * free space was found. 1654 * free space was found.
1655 */ 1655 */
@@ -2744,7 +2744,7 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval)
2744 /* check which (leafno or buddy) is the left buddy. 2744 /* check which (leafno or buddy) is the left buddy.
2745 * the left buddy gets to claim the blocks resulting 2745 * the left buddy gets to claim the blocks resulting
2746 * from the join while the right gets to claim none. 2746 * from the join while the right gets to claim none.
2747 * the left buddy is also eligable to participate in 2747 * the left buddy is also eligible to participate in
2748 * a join at the next higher level while the right 2748 * a join at the next higher level while the right
2749 * is not. 2749 * is not.
2750 * 2750 *
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 5d3bbd10f8db..e5fe8506ed16 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -126,7 +126,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
126 126
127 /* allocate the disk blocks for the extent. initially, extBalloc() 127 /* allocate the disk blocks for the extent. initially, extBalloc()
128 * will try to allocate disk blocks for the requested size (xlen). 128 * will try to allocate disk blocks for the requested size (xlen).
129 * if this fails (xlen contiguous free blocks not avaliable), it'll 129 * if this fails (xlen contiguous free blocks not available), it'll
130 * try to allocate a smaller number of blocks (producing a smaller 130 * try to allocate a smaller number of blocks (producing a smaller
131 * extent), with this smaller number of blocks consisting of the 131 * extent), with this smaller number of blocks consisting of the
132 * requested number of blocks rounded down to the next smaller 132 * requested number of blocks rounded down to the next smaller
@@ -481,7 +481,7 @@ int extFill(struct inode *ip, xad_t * xp)
481 * 481 *
482 * initially, we will try to allocate disk blocks for the 482 * initially, we will try to allocate disk blocks for the
483 * requested size (nblocks). if this fails (nblocks 483 * requested size (nblocks). if this fails (nblocks
484 * contiguous free blocks not avaliable), we'll try to allocate 484 * contiguous free blocks not available), we'll try to allocate
485 * a smaller number of blocks (producing a smaller extent), with 485 * a smaller number of blocks (producing a smaller extent), with
486 * this smaller number of blocks consisting of the requested 486 * this smaller number of blocks consisting of the requested
487 * number of blocks rounded down to the next smaller power of 2 487 * number of blocks rounded down to the next smaller power of 2
@@ -575,7 +575,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
575 * to a new set of blocks. If moving the extent, we initially 575 * to a new set of blocks. If moving the extent, we initially
576 * will try to allocate disk blocks for the requested size 576 * will try to allocate disk blocks for the requested size
577 * (newnblks). if this fails (new contiguous free blocks not 577 * (newnblks). if this fails (new contiguous free blocks not
578 * avaliable), we'll try to allocate a smaller number of 578 * available), we'll try to allocate a smaller number of
579 * blocks (producing a smaller extent), with this smaller 579 * blocks (producing a smaller extent), with this smaller
580 * number of blocks consisting of the requested number of 580 * number of blocks consisting of the requested number of
581 * blocks rounded down to the next smaller power of 2 581 * blocks rounded down to the next smaller power of 2
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 3a09423b6c22..ed53a4740168 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -1069,7 +1069,7 @@ int diFree(struct inode *ip)
1069 */ 1069 */
1070 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { 1070 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
1071 /* in preparation for removing the iag from the 1071 /* in preparation for removing the iag from the
1072 * ag extent free list, read the iags preceeding 1072 * ag extent free list, read the iags preceding
1073 * and following the iag on the ag extent free 1073 * and following the iag on the ag extent free
1074 * list. 1074 * list.
1075 */ 1075 */
@@ -1095,7 +1095,7 @@ int diFree(struct inode *ip)
1095 int inofreefwd = le32_to_cpu(iagp->inofreefwd); 1095 int inofreefwd = le32_to_cpu(iagp->inofreefwd);
1096 1096
1097 /* in preparation for removing the iag from the 1097 /* in preparation for removing the iag from the
1098 * ag inode free list, read the iags preceeding 1098 * ag inode free list, read the iags preceding
1099 * and following the iag on the ag inode free 1099 * and following the iag on the ag inode free
1100 * list. before reading these iags, we must make 1100 * list. before reading these iags, we must make
1101 * sure that we already don't have them in hand 1101 * sure that we already don't have them in hand
@@ -1681,7 +1681,7 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
1681 * try to allocate a new extent of free inodes. 1681 * try to allocate a new extent of free inodes.
1682 */ 1682 */
1683 if (addext) { 1683 if (addext) {
1684 /* if free space is not avaliable for this new extent, try 1684 /* if free space is not available for this new extent, try
1685 * below to allocate a free and existing (already backed) 1685 * below to allocate a free and existing (already backed)
1686 * inode from the ag. 1686 * inode from the ag.
1687 */ 1687 */
@@ -2036,7 +2036,7 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
2036 2036
2037 /* check if this is the last free inode within the iag. 2037 /* check if this is the last free inode within the iag.
2038 * if so, it will have to be removed from the ag free 2038 * if so, it will have to be removed from the ag free
2039 * inode list, so get the iags preceeding and following 2039 * inode list, so get the iags preceding and following
2040 * it on the list. 2040 * it on the list.
2041 */ 2041 */
2042 if (iagp->nfreeinos == cpu_to_le32(1)) { 2042 if (iagp->nfreeinos == cpu_to_le32(1)) {
@@ -2208,7 +2208,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
2208 2208
2209 /* check if this is the last free extent within the 2209 /* check if this is the last free extent within the
2210 * iag. if so, the iag must be removed from the ag 2210 * iag. if so, the iag must be removed from the ag
2211 * free extent list, so get the iags preceeding and 2211 * free extent list, so get the iags preceding and
2212 * following the iag on this list. 2212 * following the iag on this list.
2213 */ 2213 */
2214 if (iagp->nfreeexts == cpu_to_le32(1)) { 2214 if (iagp->nfreeexts == cpu_to_le32(1)) {
@@ -2504,7 +2504,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2504 } 2504 }
2505 2505
2506 2506
2507 /* get the next avaliable iag number */ 2507 /* get the next available iag number */
2508 iagno = imap->im_nextiag; 2508 iagno = imap->im_nextiag;
2509 2509
2510 /* make sure that we have not exceeded the maximum inode 2510 /* make sure that we have not exceeded the maximum inode
@@ -2615,7 +2615,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2615 2615
2616 duplicateIXtree(sb, blkno, xlen, &xaddr); 2616 duplicateIXtree(sb, blkno, xlen, &xaddr);
2617 2617
2618 /* update the next avaliable iag number */ 2618 /* update the next available iag number */
2619 imap->im_nextiag += 1; 2619 imap->im_nextiag += 1;
2620 2620
2621 /* Add the iag to the iag free list so we don't lose the iag 2621 /* Add the iag to the iag free list so we don't lose the iag
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index 9236bc49ae7f..e38c21598850 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -288,7 +288,7 @@ struct lrd {
288 /* 288 /*
289 * SYNCPT: log sync point 289 * SYNCPT: log sync point
290 * 290 *
291 * replay log upto syncpt address specified; 291 * replay log up to syncpt address specified;
292 */ 292 */
293 struct { 293 struct {
294 __le32 sync; /* 4: syncpt address (0 = here) */ 294 __le32 sync; /* 4: syncpt address (0 = here) */
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index d94f8d9e87d7..a78beda85f68 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -75,7 +75,7 @@ extern void grab_metapage(struct metapage *);
75extern void force_metapage(struct metapage *); 75extern void force_metapage(struct metapage *);
76 76
77/* 77/*
78 * hold_metapage and put_metapage are used in conjuction. The page lock 78 * hold_metapage and put_metapage are used in conjunction. The page lock
79 * is not dropped between the two, so no other threads can get or release 79 * is not dropped between the two, so no other threads can get or release
80 * the metapage 80 * the metapage
81 */ 81 */
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 9466957ec841..f6cc0c09ec63 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -636,7 +636,7 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
636 * the inode of the page and available to all anonymous 636 * the inode of the page and available to all anonymous
637 * transactions until txCommit() time at which point 637 * transactions until txCommit() time at which point
638 * they are transferred to the transaction tlock list of 638 * they are transferred to the transaction tlock list of
639 * the commiting transaction of the inode) 639 * the committing transaction of the inode)
640 */ 640 */
641 if (xtid == 0) { 641 if (xtid == 0) {
642 tlck->tid = tid; 642 tlck->tid = tid;
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 1aba0039f1c9..8ea5efb5a34e 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -57,7 +57,7 @@
57 * 2. compute new FSCKSize from new LVSize; 57 * 2. compute new FSCKSize from new LVSize;
58 * 3. set new FSSize as MIN(FSSize, LVSize-(LogSize+FSCKSize)) where 58 * 3. set new FSSize as MIN(FSSize, LVSize-(LogSize+FSCKSize)) where
59 * assert(new FSSize >= old FSSize), 59 * assert(new FSSize >= old FSSize),
60 * i.e., file system must not be shrinked; 60 * i.e., file system must not be shrunk;
61 */ 61 */
62int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) 62int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
63{ 63{
@@ -182,7 +182,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
182 */ 182 */
183 newFSSize = newLVSize - newLogSize - newFSCKSize; 183 newFSSize = newLVSize - newLogSize - newFSCKSize;
184 184
185 /* file system cannot be shrinked */ 185 /* file system cannot be shrunk */
186 if (newFSSize < bmp->db_mapsize) { 186 if (newFSSize < bmp->db_mapsize) {
187 rc = -EINVAL; 187 rc = -EINVAL;
188 goto out; 188 goto out;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index eeca48a031ab..06c8a67cbe76 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -644,7 +644,7 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
644 644
645/* Read data from quotafile - avoid pagecache and such because we cannot afford 645/* Read data from quotafile - avoid pagecache and such because we cannot afford
646 * acquiring the locks... As quota files are never truncated and quota code 646 * acquiring the locks... As quota files are never truncated and quota code
647 * itself serializes the operations (and noone else should touch the files) 647 * itself serializes the operations (and no one else should touch the files)
648 * we don't have to be afraid of races */ 648 * we don't have to be afraid of races */
649static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data, 649static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data,
650 size_t len, loff_t off) 650 size_t len, loff_t off)
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index 7466e9dcc8c5..339e17e9133d 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -60,7 +60,7 @@ static int mtd_write(struct super_block *sb, loff_t ofs, size_t len, void *buf)
60 * asynchronous properties. So just to prevent the first implementor of such 60 * asynchronous properties. So just to prevent the first implementor of such
61 * a thing from breaking logfs in 2350, we do the usual pointless dance to 61 * a thing from breaking logfs in 2350, we do the usual pointless dance to
62 * declare a completion variable and wait for completion before returning 62 * declare a completion variable and wait for completion before returning
63 * from mtd_erase(). What an excercise in futility! 63 * from mtd_erase(). What an exercise in futility!
64 */ 64 */
65static void logfs_erase_callback(struct erase_info *ei) 65static void logfs_erase_callback(struct erase_info *ei)
66{ 66{
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index f9ddf0c388c8..9ed89d1663f8 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -92,7 +92,7 @@ static int beyond_eof(struct inode *inode, loff_t bix)
92 * so short names (len <= 9) don't even occupy the complete 32bit name 92 * so short names (len <= 9) don't even occupy the complete 32bit name
93 * space. A prime >256 ensures short names quickly spread the 32bit 93 * space. A prime >256 ensures short names quickly spread the 32bit
94 * name space. Add about 26 for the estimated amount of information 94 * name space. Add about 26 for the estimated amount of information
95 * of each character and pick a prime nearby, preferrably a bit-sparse 95 * of each character and pick a prime nearby, preferably a bit-sparse
96 * one. 96 * one.
97 */ 97 */
98static u32 hash_32(const char *s, int len, u32 seed) 98static u32 hash_32(const char *s, int len, u32 seed)
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index ee99a9f5dfd3..9e22085231b3 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1616,7 +1616,7 @@ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs,
1616 err = logfs_write_buf(inode, page, flags); 1616 err = logfs_write_buf(inode, page, flags);
1617 if (!err && shrink_level(gc_level) == 0) { 1617 if (!err && shrink_level(gc_level) == 0) {
1618 /* Rewrite cannot mark the inode dirty but has to 1618 /* Rewrite cannot mark the inode dirty but has to
1619 * write it immediatly. 1619 * write it immediately.
1620 * Q: Can't we just create an alias for the inode 1620 * Q: Can't we just create an alias for the inode
1621 * instead? And if not, why not? 1621 * instead? And if not, why not?
1622 */ 1622 */
diff --git a/fs/mbcache.c b/fs/mbcache.c
index a25444ab2baf..2f174be06555 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -542,7 +542,7 @@ __mb_cache_entry_find(struct list_head *l, struct list_head *head,
542 * mb_cache_entry_find_first() 542 * mb_cache_entry_find_first()
543 * 543 *
544 * Find the first cache entry on a given device with a certain key in 544 * Find the first cache entry on a given device with a certain key in
545 * an additional index. Additonal matches can be found with 545 * an additional index. Additional matches can be found with
546 * mb_cache_entry_find_next(). Returns NULL if no match was found. The 546 * mb_cache_entry_find_next(). Returns NULL if no match was found. The
547 * returned cache entry is locked for shared access ("multiple readers"). 547 * returned cache entry is locked for shared access ("multiple readers").
548 * 548 *
diff --git a/fs/namei.c b/fs/namei.c
index 3cb616d38d9c..54fc993e3027 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -70,7 +70,7 @@
70 * name indicated by the symlink. The old code always complained that the 70 * name indicated by the symlink. The old code always complained that the
71 * name already exists, due to not following the symlink even if its target 71 * name already exists, due to not following the symlink even if its target
72 * is nonexistent. The new semantics affects also mknod() and link() when 72 * is nonexistent. The new semantics affects also mknod() and link() when
73 * the name is a symlink pointing to a non-existant name. 73 * the name is a symlink pointing to a non-existent name.
74 * 74 *
75 * I don't know which semantics is the right one, since I have no access 75 * I don't know which semantics is the right one, since I have no access
76 * to standards. But I found by trial that HP-UX 9.0 has the full "new" 76 * to standards. But I found by trial that HP-UX 9.0 has the full "new"
@@ -697,6 +697,7 @@ static __always_inline void set_root_rcu(struct nameidata *nd)
697 do { 697 do {
698 seq = read_seqcount_begin(&fs->seq); 698 seq = read_seqcount_begin(&fs->seq);
699 nd->root = fs->root; 699 nd->root = fs->root;
700 nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
700 } while (read_seqcount_retry(&fs->seq, seq)); 701 } while (read_seqcount_retry(&fs->seq, seq));
701 } 702 }
702} 703}
diff --git a/fs/namespace.c b/fs/namespace.c
index 7dba2ed03429..d99bcf59e4c2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1030,18 +1030,6 @@ const struct seq_operations mounts_op = {
1030 .show = show_vfsmnt 1030 .show = show_vfsmnt
1031}; 1031};
1032 1032
1033static int uuid_is_nil(u8 *uuid)
1034{
1035 int i;
1036 u8 *cp = (u8 *)uuid;
1037
1038 for (i = 0; i < 16; i++) {
1039 if (*cp++)
1040 return 0;
1041 }
1042 return 1;
1043}
1044
1045static int show_mountinfo(struct seq_file *m, void *v) 1033static int show_mountinfo(struct seq_file *m, void *v)
1046{ 1034{
1047 struct proc_mounts *p = m->private; 1035 struct proc_mounts *p = m->private;
@@ -1085,10 +1073,6 @@ static int show_mountinfo(struct seq_file *m, void *v)
1085 if (IS_MNT_UNBINDABLE(mnt)) 1073 if (IS_MNT_UNBINDABLE(mnt))
1086 seq_puts(m, " unbindable"); 1074 seq_puts(m, " unbindable");
1087 1075
1088 if (!uuid_is_nil(mnt->mnt_sb->s_uuid))
1089 /* print the uuid */
1090 seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid);
1091
1092 /* Filesystem specific data */ 1076 /* Filesystem specific data */
1093 seq_puts(m, " - "); 1077 seq_puts(m, " - ");
1094 show_type(m, sb); 1078 show_type(m, sb);
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 00a1d1c3d3a4..0250e4ce4893 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -596,7 +596,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
596/* server->priv.data = NULL; */ 596/* server->priv.data = NULL; */
597 597
598 server->m = data; 598 server->m = data;
599 /* Althought anything producing this is buggy, it happens 599 /* Although anything producing this is buggy, it happens
600 now because of PATH_MAX changes.. */ 600 now because of PATH_MAX changes.. */
601 if (server->m.time_out < 1) { 601 if (server->m.time_out < 1) {
602 server->m.time_out = 10; 602 server->m.time_out = 10;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 14e0f9371d14..00ecf62ce7c1 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -241,7 +241,7 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
241 241
242 args->cbl_layout_type = ntohl(*p++); 242 args->cbl_layout_type = ntohl(*p++);
243 /* Depite the spec's xdr, iomode really belongs in the FILE switch, 243 /* Depite the spec's xdr, iomode really belongs in the FILE switch,
244 * as it is unuseable and ignored with the other types. 244 * as it is unusable and ignored with the other types.
245 */ 245 */
246 iomode = ntohl(*p++); 246 iomode = ntohl(*p++);
247 args->cbl_layoutchanged = ntohl(*p++); 247 args->cbl_layoutchanged = ntohl(*p++);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 3ac5bd695e5e..2f093ed16980 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -301,7 +301,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
301 * disk, but it retrieves and clears ctx->error after synching, despite 301 * disk, but it retrieves and clears ctx->error after synching, despite
302 * the two being set at the same time in nfs_context_set_write_error(). 302 * the two being set at the same time in nfs_context_set_write_error().
303 * This is because the former is used to notify the _next_ call to 303 * This is because the former is used to notify the _next_ call to
304 * nfs_file_write() that a write error occured, and hence cause it to 304 * nfs_file_write() that a write error occurred, and hence cause it to
305 * fall back to doing a synchronous write. 305 * fall back to doing a synchronous write.
306 */ 306 */
307static int 307static int
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 9166fcb66da2..89fc160fd5b0 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -148,67 +148,64 @@ static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors,
148 return pseudoflavor; 148 return pseudoflavor;
149} 149}
150 150
151static rpc_authflavor_t nfs_negotiate_security(const struct dentry *parent, const struct dentry *dentry) 151static int nfs_negotiate_security(const struct dentry *parent,
152 const struct dentry *dentry,
153 rpc_authflavor_t *flavor)
152{ 154{
153 int status = 0;
154 struct page *page; 155 struct page *page;
155 struct nfs4_secinfo_flavors *flavors; 156 struct nfs4_secinfo_flavors *flavors;
156 int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); 157 int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
157 rpc_authflavor_t flavor = RPC_AUTH_UNIX; 158 int ret = -EPERM;
158 159
159 secinfo = NFS_PROTO(parent->d_inode)->secinfo; 160 secinfo = NFS_PROTO(parent->d_inode)->secinfo;
160 if (secinfo != NULL) { 161 if (secinfo != NULL) {
161 page = alloc_page(GFP_KERNEL); 162 page = alloc_page(GFP_KERNEL);
162 if (!page) { 163 if (!page) {
163 status = -ENOMEM; 164 ret = -ENOMEM;
164 goto out; 165 goto out;
165 } 166 }
166 flavors = page_address(page); 167 flavors = page_address(page);
167 status = secinfo(parent->d_inode, &dentry->d_name, flavors); 168 ret = secinfo(parent->d_inode, &dentry->d_name, flavors);
168 flavor = nfs_find_best_sec(flavors, dentry->d_inode); 169 *flavor = nfs_find_best_sec(flavors, dentry->d_inode);
169 put_page(page); 170 put_page(page);
170 } 171 }
171 172
172 return flavor;
173
174out: 173out:
175 status = -ENOMEM; 174 return ret;
176 return status;
177} 175}
178 176
179static rpc_authflavor_t nfs_lookup_with_sec(struct nfs_server *server, struct dentry *parent, 177static int nfs_lookup_with_sec(struct nfs_server *server, struct dentry *parent,
180 struct dentry *dentry, struct path *path, 178 struct dentry *dentry, struct path *path,
181 struct nfs_fh *fh, struct nfs_fattr *fattr) 179 struct nfs_fh *fh, struct nfs_fattr *fattr,
180 rpc_authflavor_t *flavor)
182{ 181{
183 rpc_authflavor_t flavor;
184 struct rpc_clnt *clone; 182 struct rpc_clnt *clone;
185 struct rpc_auth *auth; 183 struct rpc_auth *auth;
186 int err; 184 int err;
187 185
188 flavor = nfs_negotiate_security(parent, path->dentry); 186 err = nfs_negotiate_security(parent, path->dentry, flavor);
189 if (flavor < 0) 187 if (err < 0)
190 goto out; 188 goto out;
191 clone = rpc_clone_client(server->client); 189 clone = rpc_clone_client(server->client);
192 auth = rpcauth_create(flavor, clone); 190 auth = rpcauth_create(*flavor, clone);
193 if (!auth) { 191 if (!auth) {
194 flavor = -EIO; 192 err = -EIO;
195 goto out_shutdown; 193 goto out_shutdown;
196 } 194 }
197 err = server->nfs_client->rpc_ops->lookup(clone, parent->d_inode, 195 err = server->nfs_client->rpc_ops->lookup(clone, parent->d_inode,
198 &path->dentry->d_name, 196 &path->dentry->d_name,
199 fh, fattr); 197 fh, fattr);
200 if (err < 0)
201 flavor = err;
202out_shutdown: 198out_shutdown:
203 rpc_shutdown_client(clone); 199 rpc_shutdown_client(clone);
204out: 200out:
205 return flavor; 201 return err;
206} 202}
207#else /* CONFIG_NFS_V4 */ 203#else /* CONFIG_NFS_V4 */
208static inline rpc_authflavor_t nfs_lookup_with_sec(struct nfs_server *server, 204static inline int nfs_lookup_with_sec(struct nfs_server *server,
209 struct dentry *parent, struct dentry *dentry, 205 struct dentry *parent, struct dentry *dentry,
210 struct path *path, struct nfs_fh *fh, 206 struct path *path, struct nfs_fh *fh,
211 struct nfs_fattr *fattr) 207 struct nfs_fattr *fattr,
208 rpc_authflavor_t *flavor)
212{ 209{
213 return -EPERM; 210 return -EPERM;
214} 211}
@@ -234,7 +231,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
234 struct nfs_fh *fh = NULL; 231 struct nfs_fh *fh = NULL;
235 struct nfs_fattr *fattr = NULL; 232 struct nfs_fattr *fattr = NULL;
236 int err; 233 int err;
237 rpc_authflavor_t flavor = 1; 234 rpc_authflavor_t flavor = RPC_AUTH_UNIX;
238 235
239 dprintk("--> nfs_d_automount()\n"); 236 dprintk("--> nfs_d_automount()\n");
240 237
@@ -255,13 +252,8 @@ struct vfsmount *nfs_d_automount(struct path *path)
255 err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode, 252 err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode,
256 &path->dentry->d_name, 253 &path->dentry->d_name,
257 fh, fattr); 254 fh, fattr);
258 if (err == -EPERM) { 255 if (err == -EPERM && NFS_PROTO(parent->d_inode)->secinfo != NULL)
259 flavor = nfs_lookup_with_sec(server, parent, path->dentry, path, fh, fattr); 256 err = nfs_lookup_with_sec(server, parent, path->dentry, path, fh, fattr, &flavor);
260 if (flavor < 0)
261 err = flavor;
262 else
263 err = 0;
264 }
265 dput(parent); 257 dput(parent);
266 if (err != 0) { 258 if (err != 0) {
267 mnt = ERR_PTR(err); 259 mnt = ERR_PTR(err);
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 085a354e0f08..7c44579f5832 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -33,7 +33,7 @@
33#include "pnfs.h" 33#include "pnfs.h"
34 34
35/* 35/*
36 * Field testing shows we need to support upto 4096 stripe indices. 36 * Field testing shows we need to support up to 4096 stripe indices.
37 * We store each index as a u8 (u32 on the wire) to keep the memory footprint 37 * We store each index as a u8 (u32 on the wire) to keep the memory footprint
38 * reasonable. This in turn means we support a maximum of 256 38 * reasonable. This in turn means we support a maximum of 256
39 * RFC 5661 multipath_list4 structures. 39 * RFC 5661 multipath_list4 structures.
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index dfd1e6d7e6c3..9bf41eab3e46 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2204,8 +2204,6 @@ static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandl
2204 goto out; 2204 goto out;
2205 } 2205 }
2206 ret = nfs4_lookup_root(server, fhandle, info); 2206 ret = nfs4_lookup_root(server, fhandle, info);
2207 if (ret < 0)
2208 ret = -EAGAIN;
2209out: 2207out:
2210 return ret; 2208 return ret;
2211} 2209}
@@ -2226,7 +2224,7 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
2226 2224
2227 for (i = 0; i < len; i++) { 2225 for (i = 0; i < len; i++) {
2228 status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]); 2226 status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]);
2229 if (status == 0) 2227 if (status != -EPERM)
2230 break; 2228 break;
2231 } 2229 }
2232 if (status == 0) 2230 if (status == 0)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index af0c6279a4a7..e4cbc11a74ab 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -542,11 +542,15 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u
542 if (!nfs_need_commit(nfsi)) 542 if (!nfs_need_commit(nfsi))
543 return 0; 543 return 0;
544 544
545 spin_lock(&inode->i_lock);
545 ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); 546 ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
546 if (ret > 0) 547 if (ret > 0)
547 nfsi->ncommit -= ret; 548 nfsi->ncommit -= ret;
549 spin_unlock(&inode->i_lock);
550
548 if (nfs_need_commit(NFS_I(inode))) 551 if (nfs_need_commit(NFS_I(inode)))
549 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 552 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
553
550 return ret; 554 return ret;
551} 555}
552#else 556#else
@@ -1483,9 +1487,7 @@ int nfs_commit_inode(struct inode *inode, int how)
1483 res = nfs_commit_set_lock(NFS_I(inode), may_wait); 1487 res = nfs_commit_set_lock(NFS_I(inode), may_wait);
1484 if (res <= 0) 1488 if (res <= 0)
1485 goto out_mark_dirty; 1489 goto out_mark_dirty;
1486 spin_lock(&inode->i_lock);
1487 res = nfs_scan_commit(inode, &head, 0, 0); 1490 res = nfs_scan_commit(inode, &head, 0, 0);
1488 spin_unlock(&inode->i_lock);
1489 if (res) { 1491 if (res) {
1490 int error; 1492 int error;
1491 1493
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index ec0f277be7f5..6940439bd609 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -173,7 +173,7 @@ xdr_nfsace_decode(struct xdr_array2_desc *desc, void *elem)
173 return -EINVAL; 173 return -EINVAL;
174 break; 174 break;
175 case ACL_MASK: 175 case ACL_MASK:
176 /* Solaris sometimes sets additonal bits in the mask */ 176 /* Solaris sometimes sets additional bits in the mask */
177 entry->e_perm &= S_IRWXO; 177 entry->e_perm &= S_IRWXO;
178 break; 178 break;
179 default: 179 default:
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 0c6d81670137..7c831a2731fa 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -38,7 +38,6 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
38 exp_readlock(); 38 exp_readlock();
39 nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp); 39 nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp);
40 fh_put(&fh); 40 fh_put(&fh);
41 rqstp->rq_client = NULL;
42 exp_readunlock(); 41 exp_readunlock();
43 /* We return nlm error codes as nlm doesn't know 42 /* We return nlm error codes as nlm doesn't know
44 * about nfsd, but nfsd does know about nlm.. 43 * about nfsd, but nfsd does know about nlm..
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 7e84a852cdae..ad48faca20fc 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -702,7 +702,7 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
702 *p++ = htonl(resp->eof); 702 *p++ = htonl(resp->eof);
703 *p++ = htonl(resp->count); /* xdr opaque count */ 703 *p++ = htonl(resp->count); /* xdr opaque count */
704 xdr_ressize_check(rqstp, p); 704 xdr_ressize_check(rqstp, p);
705 /* now update rqstp->rq_res to reflect data aswell */ 705 /* now update rqstp->rq_res to reflect data as well */
706 rqstp->rq_res.page_len = resp->count; 706 rqstp->rq_res.page_len = resp->count;
707 if (resp->count & 3) { 707 if (resp->count & 3) {
708 /* need to pad the tail */ 708 /* need to pad the tail */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fbde6f79922e..aa309aa93fe8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -397,10 +397,13 @@ static void unhash_generic_stateid(struct nfs4_stateid *stp)
397 397
398static void free_generic_stateid(struct nfs4_stateid *stp) 398static void free_generic_stateid(struct nfs4_stateid *stp)
399{ 399{
400 int oflag = nfs4_access_bmap_to_omode(stp); 400 int oflag;
401 401
402 nfs4_file_put_access(stp->st_file, oflag); 402 if (stp->st_access_bmap) {
403 put_nfs4_file(stp->st_file); 403 oflag = nfs4_access_bmap_to_omode(stp);
404 nfs4_file_put_access(stp->st_file, oflag);
405 put_nfs4_file(stp->st_file);
406 }
404 kmem_cache_free(stateid_slab, stp); 407 kmem_cache_free(stateid_slab, stp);
405} 408}
406 409
@@ -3055,7 +3058,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
3055 if (ONE_STATEID(stateid) && (flags & RD_STATE)) 3058 if (ONE_STATEID(stateid) && (flags & RD_STATE))
3056 return nfs_ok; 3059 return nfs_ok;
3057 else if (locks_in_grace()) { 3060 else if (locks_in_grace()) {
3058 /* Answer in remaining cases depends on existance of 3061 /* Answer in remaining cases depends on existence of
3059 * conflicting state; so we must wait out the grace period. */ 3062 * conflicting state; so we must wait out the grace period. */
3060 return nfserr_grace; 3063 return nfserr_grace;
3061 } else if (flags & WR_STATE) 3064 } else if (flags & WR_STATE)
@@ -3675,7 +3678,7 @@ find_lockstateowner_str(struct inode *inode, clientid_t *clid,
3675/* 3678/*
3676 * Alloc a lock owner structure. 3679 * Alloc a lock owner structure.
3677 * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has 3680 * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has
3678 * occured. 3681 * occurred.
3679 * 3682 *
3680 * strhashval = lock_ownerstr_hashval 3683 * strhashval = lock_ownerstr_hashval
3681 */ 3684 */
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 4ce005dbf3e6..65ec595e2226 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -451,7 +451,7 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
451 *p++ = htonl(resp->count); 451 *p++ = htonl(resp->count);
452 xdr_ressize_check(rqstp, p); 452 xdr_ressize_check(rqstp, p);
453 453
454 /* now update rqstp->rq_res to reflect data aswell */ 454 /* now update rqstp->rq_res to reflect data as well */
455 rqstp->rq_res.page_len = resp->count; 455 rqstp->rq_res.page_len = resp->count;
456 if (resp->count & 3) { 456 if (resp->count & 3) {
457 /* need to pad the tail */ 457 /* need to pad the tail */
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 93589fccdd97..397e73258631 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -72,10 +72,9 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
72 /* 72 /*
73 * check to see if the page is mapped already (no holes) 73 * check to see if the page is mapped already (no holes)
74 */ 74 */
75 if (PageMappedToDisk(page)) { 75 if (PageMappedToDisk(page))
76 unlock_page(page);
77 goto mapped; 76 goto mapped;
78 } 77
79 if (page_has_buffers(page)) { 78 if (page_has_buffers(page)) {
80 struct buffer_head *bh, *head; 79 struct buffer_head *bh, *head;
81 int fully_mapped = 1; 80 int fully_mapped = 1;
@@ -90,7 +89,6 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
90 89
91 if (fully_mapped) { 90 if (fully_mapped) {
92 SetPageMappedToDisk(page); 91 SetPageMappedToDisk(page);
93 unlock_page(page);
94 goto mapped; 92 goto mapped;
95 } 93 }
96 } 94 }
@@ -105,16 +103,17 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
105 return VM_FAULT_SIGBUS; 103 return VM_FAULT_SIGBUS;
106 104
107 ret = block_page_mkwrite(vma, vmf, nilfs_get_block); 105 ret = block_page_mkwrite(vma, vmf, nilfs_get_block);
108 if (unlikely(ret)) { 106 if (ret != VM_FAULT_LOCKED) {
109 nilfs_transaction_abort(inode->i_sb); 107 nilfs_transaction_abort(inode->i_sb);
110 return ret; 108 return ret;
111 } 109 }
110 nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits));
112 nilfs_transaction_commit(inode->i_sb); 111 nilfs_transaction_commit(inode->i_sb);
113 112
114 mapped: 113 mapped:
115 SetPageChecked(page); 114 SetPageChecked(page);
116 wait_on_page_writeback(page); 115 wait_on_page_writeback(page);
117 return 0; 116 return VM_FAULT_LOCKED;
118} 117}
119 118
120static const struct vm_operations_struct nilfs_file_vm_ops = { 119static const struct vm_operations_struct nilfs_file_vm_ops = {
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 856e8e4e0b74..a8dd344303cb 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -114,19 +114,19 @@ enum {
114 * Macros to check inode numbers 114 * Macros to check inode numbers
115 */ 115 */
116#define NILFS_MDT_INO_BITS \ 116#define NILFS_MDT_INO_BITS \
117 ((unsigned int)(1 << NILFS_DAT_INO | 1 << NILFS_CPFILE_INO | \ 117 ((unsigned int)(1 << NILFS_DAT_INO | 1 << NILFS_CPFILE_INO | \
118 1 << NILFS_SUFILE_INO | 1 << NILFS_IFILE_INO | \ 118 1 << NILFS_SUFILE_INO | 1 << NILFS_IFILE_INO | \
119 1 << NILFS_ATIME_INO | 1 << NILFS_SKETCH_INO)) 119 1 << NILFS_ATIME_INO | 1 << NILFS_SKETCH_INO))
120 120
121#define NILFS_SYS_INO_BITS \ 121#define NILFS_SYS_INO_BITS \
122 ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS) 122 ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS)
123 123
124#define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino) 124#define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino)
125 125
126#define NILFS_MDT_INODE(sb, ino) \ 126#define NILFS_MDT_INODE(sb, ino) \
127 ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino)))) 127 ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino))))
128#define NILFS_VALID_INODE(sb, ino) \ 128#define NILFS_VALID_INODE(sb, ino) \
129 ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & (1 << (ino)))) 129 ((ino) >= NILFS_FIRST_INO(sb) || (NILFS_SYS_INO_BITS & (1 << (ino))))
130 130
131/** 131/**
132 * struct nilfs_transaction_info: context information for synchronization 132 * struct nilfs_transaction_info: context information for synchronization
@@ -285,7 +285,7 @@ extern void nilfs_destroy_inode(struct inode *);
285extern void nilfs_error(struct super_block *, const char *, const char *, ...) 285extern void nilfs_error(struct super_block *, const char *, const char *, ...)
286 __attribute__ ((format (printf, 3, 4))); 286 __attribute__ ((format (printf, 3, 4)));
287extern void nilfs_warning(struct super_block *, const char *, const char *, ...) 287extern void nilfs_warning(struct super_block *, const char *, const char *, ...)
288 __attribute__ ((format (printf, 3, 4))); 288 __attribute__ ((format (printf, 3, 4)));
289extern struct nilfs_super_block * 289extern struct nilfs_super_block *
290nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); 290nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **);
291extern int nilfs_store_magic_and_option(struct super_block *, 291extern int nilfs_store_magic_and_option(struct super_block *,
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 4d2a1ee0eb47..1168059c7efd 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -500,7 +500,7 @@ void nilfs_mapping_init(struct address_space *mapping,
500 mapping_set_gfp_mask(mapping, GFP_NOFS); 500 mapping_set_gfp_mask(mapping, GFP_NOFS);
501 mapping->assoc_mapping = NULL; 501 mapping->assoc_mapping = NULL;
502 mapping->backing_dev_info = bdi; 502 mapping->backing_dev_info = bdi;
503 mapping->a_ops = NULL; 503 mapping->a_ops = &empty_aops;
504} 504}
505 505
506/* 506/*
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 6b1305dc26c0..9fde1c00a296 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -164,7 +164,7 @@ static int process_access_response(struct fsnotify_group *group,
164 fd, response); 164 fd, response);
165 /* 165 /*
166 * make sure the response is valid, if invalid we do nothing and either 166 * make sure the response is valid, if invalid we do nothing and either
167 * userspace can send a valid responce or we will clean it up after the 167 * userspace can send a valid response or we will clean it up after the
168 * timeout 168 * timeout
169 */ 169 */
170 switch (response) { 170 switch (response) {
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index a91b69a6a291..e3cbd746f64a 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -194,10 +194,11 @@ static int idr_callback(int id, void *p, void *data)
194 194
195static void inotify_free_group_priv(struct fsnotify_group *group) 195static void inotify_free_group_priv(struct fsnotify_group *group)
196{ 196{
197 /* ideally the idr is empty and we won't hit the BUG in teh callback */ 197 /* ideally the idr is empty and we won't hit the BUG in the callback */
198 idr_for_each(&group->inotify_data.idr, idr_callback, group); 198 idr_for_each(&group->inotify_data.idr, idr_callback, group);
199 idr_remove_all(&group->inotify_data.idr); 199 idr_remove_all(&group->inotify_data.idr);
200 idr_destroy(&group->inotify_data.idr); 200 idr_destroy(&group->inotify_data.idr);
201 atomic_dec(&group->inotify_data.user->inotify_devs);
201 free_uid(group->inotify_data.user); 202 free_uid(group->inotify_data.user);
202} 203}
203 204
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index bd46e7c8a0ef..8445fbc8985c 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -290,7 +290,6 @@ static int inotify_fasync(int fd, struct file *file, int on)
290static int inotify_release(struct inode *ignored, struct file *file) 290static int inotify_release(struct inode *ignored, struct file *file)
291{ 291{
292 struct fsnotify_group *group = file->private_data; 292 struct fsnotify_group *group = file->private_data;
293 struct user_struct *user = group->inotify_data.user;
294 293
295 pr_debug("%s: group=%p\n", __func__, group); 294 pr_debug("%s: group=%p\n", __func__, group);
296 295
@@ -299,8 +298,6 @@ static int inotify_release(struct inode *ignored, struct file *file)
299 /* free this group, matching get was inotify_init->fsnotify_obtain_group */ 298 /* free this group, matching get was inotify_init->fsnotify_obtain_group */
300 fsnotify_put_group(group); 299 fsnotify_put_group(group);
301 300
302 atomic_dec(&user->inotify_devs);
303
304 return 0; 301 return 0;
305} 302}
306 303
@@ -697,7 +694,7 @@ retry:
697 return ret; 694 return ret;
698} 695}
699 696
700static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events) 697static struct fsnotify_group *inotify_new_group(unsigned int max_events)
701{ 698{
702 struct fsnotify_group *group; 699 struct fsnotify_group *group;
703 700
@@ -710,8 +707,14 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
710 spin_lock_init(&group->inotify_data.idr_lock); 707 spin_lock_init(&group->inotify_data.idr_lock);
711 idr_init(&group->inotify_data.idr); 708 idr_init(&group->inotify_data.idr);
712 group->inotify_data.last_wd = 0; 709 group->inotify_data.last_wd = 0;
713 group->inotify_data.user = user;
714 group->inotify_data.fa = NULL; 710 group->inotify_data.fa = NULL;
711 group->inotify_data.user = get_current_user();
712
713 if (atomic_inc_return(&group->inotify_data.user->inotify_devs) >
714 inotify_max_user_instances) {
715 fsnotify_put_group(group);
716 return ERR_PTR(-EMFILE);
717 }
715 718
716 return group; 719 return group;
717} 720}
@@ -721,7 +724,6 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
721SYSCALL_DEFINE1(inotify_init1, int, flags) 724SYSCALL_DEFINE1(inotify_init1, int, flags)
722{ 725{
723 struct fsnotify_group *group; 726 struct fsnotify_group *group;
724 struct user_struct *user;
725 int ret; 727 int ret;
726 728
727 /* Check the IN_* constants for consistency. */ 729 /* Check the IN_* constants for consistency. */
@@ -731,31 +733,16 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
731 if (flags & ~(IN_CLOEXEC | IN_NONBLOCK)) 733 if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
732 return -EINVAL; 734 return -EINVAL;
733 735
734 user = get_current_user();
735 if (unlikely(atomic_read(&user->inotify_devs) >=
736 inotify_max_user_instances)) {
737 ret = -EMFILE;
738 goto out_free_uid;
739 }
740
741 /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */ 736 /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
742 group = inotify_new_group(user, inotify_max_queued_events); 737 group = inotify_new_group(inotify_max_queued_events);
743 if (IS_ERR(group)) { 738 if (IS_ERR(group))
744 ret = PTR_ERR(group); 739 return PTR_ERR(group);
745 goto out_free_uid;
746 }
747
748 atomic_inc(&user->inotify_devs);
749 740
750 ret = anon_inode_getfd("inotify", &inotify_fops, group, 741 ret = anon_inode_getfd("inotify", &inotify_fops, group,
751 O_RDONLY | flags); 742 O_RDONLY | flags);
752 if (ret >= 0) 743 if (ret < 0)
753 return ret; 744 fsnotify_put_group(group);
754 745
755 fsnotify_put_group(group);
756 atomic_dec(&user->inotify_devs);
757out_free_uid:
758 free_uid(user);
759 return ret; 746 return ret;
760} 747}
761 748
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 50c00856f730..252ab1f6452b 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -24,7 +24,7 @@
24 * referencing this object. The object typically will live inside the kernel 24 * referencing this object. The object typically will live inside the kernel
25 * with a refcnt of 2, one for each list it is on (i_list, g_list). Any task 25 * with a refcnt of 2, one for each list it is on (i_list, g_list). Any task
26 * which can find this object holding the appropriete locks, can take a reference 26 * which can find this object holding the appropriete locks, can take a reference
27 * and the object itself is guarenteed to survive until the reference is dropped. 27 * and the object itself is guaranteed to survive until the reference is dropped.
28 * 28 *
29 * LOCKING: 29 * LOCKING:
30 * There are 3 spinlocks involved with fsnotify inode marks and they MUST 30 * There are 3 spinlocks involved with fsnotify inode marks and they MUST
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index f5094ee224c1..f14fde2b03d6 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -197,7 +197,7 @@ err_out:
197 } else if (ctx_needs_reset) { 197 } else if (ctx_needs_reset) {
198 /* 198 /*
199 * If there is no attribute list, restoring the search context 199 * If there is no attribute list, restoring the search context
200 * is acomplished simply by copying the saved context back over 200 * is accomplished simply by copying the saved context back over
201 * the caller supplied context. If there is an attribute list, 201 * the caller supplied context. If there is an attribute list,
202 * things are more complicated as we need to deal with mapping 202 * things are more complicated as we need to deal with mapping
203 * of mft records and resulting potential changes in pointers. 203 * of mft records and resulting potential changes in pointers.
@@ -1181,7 +1181,7 @@ not_found:
1181 * for, i.e. if one wants to add the attribute to the mft record this is the 1181 * for, i.e. if one wants to add the attribute to the mft record this is the
1182 * correct place to insert its attribute list entry into. 1182 * correct place to insert its attribute list entry into.
1183 * 1183 *
1184 * When -errno != -ENOENT, an error occured during the lookup. @ctx->attr is 1184 * When -errno != -ENOENT, an error occurred during the lookup. @ctx->attr is
1185 * then undefined and in particular you should not rely on it not changing. 1185 * then undefined and in particular you should not rely on it not changing.
1186 */ 1186 */
1187int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name, 1187int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name,
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index ef9ed854255c..ee4144ce5d7c 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -501,7 +501,7 @@ int ntfs_read_compressed_block(struct page *page)
501 VCN start_vcn = (((s64)index << PAGE_CACHE_SHIFT) & ~cb_size_mask) >> 501 VCN start_vcn = (((s64)index << PAGE_CACHE_SHIFT) & ~cb_size_mask) >>
502 vol->cluster_size_bits; 502 vol->cluster_size_bits;
503 /* 503 /*
504 * The first vcn after the last wanted vcn (minumum alignment is again 504 * The first vcn after the last wanted vcn (minimum alignment is again
505 * PAGE_CACHE_SIZE. 505 * PAGE_CACHE_SIZE.
506 */ 506 */
507 VCN end_vcn = ((((s64)(index + 1UL) << PAGE_CACHE_SHIFT) + cb_size - 1) 507 VCN end_vcn = ((((s64)(index + 1UL) << PAGE_CACHE_SHIFT) + cb_size - 1)
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 0b56c6b7ec01..c05d6dcf77a4 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -622,7 +622,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
622 */ 622 */
623 /* Everyone gets all permissions. */ 623 /* Everyone gets all permissions. */
624 vi->i_mode |= S_IRWXUGO; 624 vi->i_mode |= S_IRWXUGO;
625 /* If read-only, noone gets write permissions. */ 625 /* If read-only, no one gets write permissions. */
626 if (IS_RDONLY(vi)) 626 if (IS_RDONLY(vi))
627 vi->i_mode &= ~S_IWUGO; 627 vi->i_mode &= ~S_IWUGO;
628 if (m->flags & MFT_RECORD_IS_DIRECTORY) { 628 if (m->flags & MFT_RECORD_IS_DIRECTORY) {
@@ -2529,7 +2529,7 @@ retry_truncate:
2529 * specifies that the behaviour is unspecified thus we do not 2529 * specifies that the behaviour is unspecified thus we do not
2530 * have to do anything. This means that in our implementation 2530 * have to do anything. This means that in our implementation
2531 * in the rare case that the file is mmap()ped and a write 2531 * in the rare case that the file is mmap()ped and a write
2532 * occured into the mmap()ped region just beyond the file size 2532 * occurred into the mmap()ped region just beyond the file size
2533 * and writepage has not yet been called to write out the page 2533 * and writepage has not yet been called to write out the page
2534 * (which would clear the area beyond the file size) and we now 2534 * (which would clear the area beyond the file size) and we now
2535 * extend the file size to incorporate this dirty region 2535 * extend the file size to incorporate this dirty region
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 8b2549f672bf..faece7190866 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -286,7 +286,7 @@ typedef le16 MFT_RECORD_FLAGS;
286 * fragmented. Volume free space includes the empty part of the mft zone and 286 * fragmented. Volume free space includes the empty part of the mft zone and
287 * when the volume's free 88% are used up, the mft zone is shrunk by a factor 287 * when the volume's free 88% are used up, the mft zone is shrunk by a factor
288 * of 2, thus making more space available for more files/data. This process is 288 * of 2, thus making more space available for more files/data. This process is
289 * repeated everytime there is no more free space except for the mft zone until 289 * repeated every time there is no more free space except for the mft zone until
290 * there really is no more free space. 290 * there really is no more free space.
291 */ 291 */
292 292
@@ -1657,13 +1657,13 @@ typedef enum {
1657 * pointed to by the Owner field was provided by a defaulting mechanism 1657 * pointed to by the Owner field was provided by a defaulting mechanism
1658 * rather than explicitly provided by the original provider of the 1658 * rather than explicitly provided by the original provider of the
1659 * security descriptor. This may affect the treatment of the SID with 1659 * security descriptor. This may affect the treatment of the SID with
1660 * respect to inheritence of an owner. 1660 * respect to inheritance of an owner.
1661 * 1661 *
1662 * SE_GROUP_DEFAULTED - This boolean flag, when set, indicates that the SID in 1662 * SE_GROUP_DEFAULTED - This boolean flag, when set, indicates that the SID in
1663 * the Group field was provided by a defaulting mechanism rather than 1663 * the Group field was provided by a defaulting mechanism rather than
1664 * explicitly provided by the original provider of the security 1664 * explicitly provided by the original provider of the security
1665 * descriptor. This may affect the treatment of the SID with respect to 1665 * descriptor. This may affect the treatment of the SID with respect to
1666 * inheritence of a primary group. 1666 * inheritance of a primary group.
1667 * 1667 *
1668 * SE_DACL_PRESENT - This boolean flag, when set, indicates that the security 1668 * SE_DACL_PRESENT - This boolean flag, when set, indicates that the security
1669 * descriptor contains a discretionary ACL. If this flag is set and the 1669 * descriptor contains a discretionary ACL. If this flag is set and the
@@ -1674,7 +1674,7 @@ typedef enum {
1674 * pointed to by the Dacl field was provided by a defaulting mechanism 1674 * pointed to by the Dacl field was provided by a defaulting mechanism
1675 * rather than explicitly provided by the original provider of the 1675 * rather than explicitly provided by the original provider of the
1676 * security descriptor. This may affect the treatment of the ACL with 1676 * security descriptor. This may affect the treatment of the ACL with
1677 * respect to inheritence of an ACL. This flag is ignored if the 1677 * respect to inheritance of an ACL. This flag is ignored if the
1678 * DaclPresent flag is not set. 1678 * DaclPresent flag is not set.
1679 * 1679 *
1680 * SE_SACL_PRESENT - This boolean flag, when set, indicates that the security 1680 * SE_SACL_PRESENT - This boolean flag, when set, indicates that the security
@@ -1686,7 +1686,7 @@ typedef enum {
1686 * pointed to by the Sacl field was provided by a defaulting mechanism 1686 * pointed to by the Sacl field was provided by a defaulting mechanism
1687 * rather than explicitly provided by the original provider of the 1687 * rather than explicitly provided by the original provider of the
1688 * security descriptor. This may affect the treatment of the ACL with 1688 * security descriptor. This may affect the treatment of the ACL with
1689 * respect to inheritence of an ACL. This flag is ignored if the 1689 * respect to inheritance of an ACL. This flag is ignored if the
1690 * SaclPresent flag is not set. 1690 * SaclPresent flag is not set.
1691 * 1691 *
1692 * SE_SELF_RELATIVE - This boolean flag, when set, indicates that the security 1692 * SE_SELF_RELATIVE - This boolean flag, when set, indicates that the security
@@ -2283,7 +2283,7 @@ typedef struct {
2283 // the key_length is zero, then the vcn immediately 2283 // the key_length is zero, then the vcn immediately
2284 // follows the INDEX_ENTRY_HEADER. Regardless of 2284 // follows the INDEX_ENTRY_HEADER. Regardless of
2285 // key_length, the address of the 8-byte boundary 2285 // key_length, the address of the 8-byte boundary
2286 // alligned vcn of INDEX_ENTRY{_HEADER} *ie is given by 2286 // aligned vcn of INDEX_ENTRY{_HEADER} *ie is given by
2287 // (char*)ie + le16_to_cpu(ie*)->length) - sizeof(VCN), 2287 // (char*)ie + le16_to_cpu(ie*)->length) - sizeof(VCN),
2288 // where sizeof(VCN) can be hardcoded as 8 if wanted. */ 2288 // where sizeof(VCN) can be hardcoded as 8 if wanted. */
2289} __attribute__ ((__packed__)) INDEX_ENTRY; 2289} __attribute__ ((__packed__)) INDEX_ENTRY;
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 4dadcdf3d451..c71de292c5ad 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -669,7 +669,7 @@ err_out:
669 * of cases where we think that a volume is dirty when in fact it is clean. 669 * of cases where we think that a volume is dirty when in fact it is clean.
670 * This should only affect volumes that have not been shutdown cleanly but did 670 * This should only affect volumes that have not been shutdown cleanly but did
671 * not have any pending, non-check-pointed i/o, i.e. they were completely idle 671 * not have any pending, non-check-pointed i/o, i.e. they were completely idle
672 * at least for the five seconds preceeding the unclean shutdown. 672 * at least for the five seconds preceding the unclean shutdown.
673 * 673 *
674 * This function assumes that the $LogFile journal has already been consistency 674 * This function assumes that the $LogFile journal has already been consistency
675 * checked by a call to ntfs_check_logfile() and in particular if the $LogFile 675 * checked by a call to ntfs_check_logfile() and in particular if the $LogFile
diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h
index b5a6f08bd35c..aa2b6ac3f0a4 100644
--- a/fs/ntfs/logfile.h
+++ b/fs/ntfs/logfile.h
@@ -222,7 +222,7 @@ typedef struct {
222/* 24*/ sle64 file_size; /* Usable byte size of the log file. If the 222/* 24*/ sle64 file_size; /* Usable byte size of the log file. If the
223 restart_area_offset + the offset of the 223 restart_area_offset + the offset of the
224 file_size are > 510 then corruption has 224 file_size are > 510 then corruption has
225 occured. This is the very first check when 225 occurred. This is the very first check when
226 starting with the restart_area as if it 226 starting with the restart_area as if it
227 fails it means that some of the above values 227 fails it means that some of the above values
228 will be corrupted by the multi sector 228 will be corrupted by the multi sector
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 326e7475a22a..382857f9c7db 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -73,7 +73,7 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
73 if (index > end_index || (i_size & ~PAGE_CACHE_MASK) < ofs + 73 if (index > end_index || (i_size & ~PAGE_CACHE_MASK) < ofs +
74 vol->mft_record_size) { 74 vol->mft_record_size) {
75 page = ERR_PTR(-ENOENT); 75 page = ERR_PTR(-ENOENT);
76 ntfs_error(vol->sb, "Attemt to read mft record 0x%lx, " 76 ntfs_error(vol->sb, "Attempt to read mft record 0x%lx, "
77 "which is beyond the end of the mft. " 77 "which is beyond the end of the mft. "
78 "This is probably a bug in the ntfs " 78 "This is probably a bug in the ntfs "
79 "driver.", ni->mft_no); 79 "driver.", ni->mft_no);
@@ -1442,7 +1442,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
1442 // Note: It will need to be a special mft record and if none of 1442 // Note: It will need to be a special mft record and if none of
1443 // those are available it gets rather complicated... 1443 // those are available it gets rather complicated...
1444 ntfs_error(vol->sb, "Not enough space in this mft record to " 1444 ntfs_error(vol->sb, "Not enough space in this mft record to "
1445 "accomodate extended mft bitmap attribute " 1445 "accommodate extended mft bitmap attribute "
1446 "extent. Cannot handle this yet."); 1446 "extent. Cannot handle this yet.");
1447 ret = -EOPNOTSUPP; 1447 ret = -EOPNOTSUPP;
1448 goto undo_alloc; 1448 goto undo_alloc;
@@ -1879,7 +1879,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
1879 // and we would then need to update all references to this mft 1879 // and we would then need to update all references to this mft
1880 // record appropriately. This is rather complicated... 1880 // record appropriately. This is rather complicated...
1881 ntfs_error(vol->sb, "Not enough space in this mft record to " 1881 ntfs_error(vol->sb, "Not enough space in this mft record to "
1882 "accomodate extended mft data attribute " 1882 "accommodate extended mft data attribute "
1883 "extent. Cannot handle this yet."); 1883 "extent. Cannot handle this yet.");
1884 ret = -EOPNOTSUPP; 1884 ret = -EOPNOTSUPP;
1885 goto undo_alloc; 1885 goto undo_alloc;
@@ -2357,7 +2357,7 @@ ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, const int mode,
2357 } 2357 }
2358#ifdef DEBUG 2358#ifdef DEBUG
2359 read_lock_irqsave(&mftbmp_ni->size_lock, flags); 2359 read_lock_irqsave(&mftbmp_ni->size_lock, flags);
2360 ntfs_debug("Status of mftbmp after initialized extention: " 2360 ntfs_debug("Status of mftbmp after initialized extension: "
2361 "allocated_size 0x%llx, data_size 0x%llx, " 2361 "allocated_size 0x%llx, data_size 0x%llx, "
2362 "initialized_size 0x%llx.", 2362 "initialized_size 0x%llx.",
2363 (long long)mftbmp_ni->allocated_size, 2363 (long long)mftbmp_ni->allocated_size,
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index 56a9a6d25a2a..eac7d6788a10 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -1243,7 +1243,7 @@ err_out:
1243 * write. 1243 * write.
1244 * 1244 *
1245 * This is used when building the mapping pairs array of a runlist to compress 1245 * This is used when building the mapping pairs array of a runlist to compress
1246 * a given logical cluster number (lcn) or a specific run length to the minumum 1246 * a given logical cluster number (lcn) or a specific run length to the minimum
1247 * size possible. 1247 * size possible.
1248 * 1248 *
1249 * Return the number of bytes written on success. On error, i.e. the 1249 * Return the number of bytes written on success. On error, i.e. the
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 29099a07b9fe..b52706da4645 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -458,7 +458,7 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
458 * the volume on boot and updates them. 458 * the volume on boot and updates them.
459 * 459 *
460 * When remounting read-only, mark the volume clean if no volume errors 460 * When remounting read-only, mark the volume clean if no volume errors
461 * have occured. 461 * have occurred.
462 */ 462 */
463 if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { 463 if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
464 static const char *es = ". Cannot remount read-write."; 464 static const char *es = ". Cannot remount read-write.";
@@ -1269,7 +1269,7 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
1269 "hibernated on the volume."); 1269 "hibernated on the volume.");
1270 return 0; 1270 return 0;
1271 } 1271 }
1272 /* A real error occured. */ 1272 /* A real error occurred. */
1273 ntfs_error(vol->sb, "Failed to find inode number for " 1273 ntfs_error(vol->sb, "Failed to find inode number for "
1274 "hiberfil.sys."); 1274 "hiberfil.sys.");
1275 return ret; 1275 return ret;
@@ -1370,7 +1370,7 @@ static bool load_and_init_quota(ntfs_volume *vol)
1370 NVolSetQuotaOutOfDate(vol); 1370 NVolSetQuotaOutOfDate(vol);
1371 return true; 1371 return true;
1372 } 1372 }
1373 /* A real error occured. */ 1373 /* A real error occurred. */
1374 ntfs_error(vol->sb, "Failed to find inode number for $Quota."); 1374 ntfs_error(vol->sb, "Failed to find inode number for $Quota.");
1375 return false; 1375 return false;
1376 } 1376 }
@@ -1454,7 +1454,7 @@ not_enabled:
1454 NVolSetUsnJrnlStamped(vol); 1454 NVolSetUsnJrnlStamped(vol);
1455 return true; 1455 return true;
1456 } 1456 }
1457 /* A real error occured. */ 1457 /* A real error occurred. */
1458 ntfs_error(vol->sb, "Failed to find inode number for " 1458 ntfs_error(vol->sb, "Failed to find inode number for "
1459 "$UsnJrnl."); 1459 "$UsnJrnl.");
1460 return false; 1460 return false;
@@ -2292,7 +2292,7 @@ static void ntfs_put_super(struct super_block *sb)
2292 ntfs_commit_inode(vol->mft_ino); 2292 ntfs_commit_inode(vol->mft_ino);
2293 2293
2294 /* 2294 /*
2295 * If a read-write mount and no volume errors have occured, mark the 2295 * If a read-write mount and no volume errors have occurred, mark the
2296 * volume clean. Also, re-commit all affected inodes. 2296 * volume clean. Also, re-commit all affected inodes.
2297 */ 2297 */
2298 if (!(sb->s_flags & MS_RDONLY)) { 2298 if (!(sb->s_flags & MS_RDONLY)) {
@@ -2496,7 +2496,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
2496 if (vol->nr_clusters & 63) 2496 if (vol->nr_clusters & 63)
2497 nr_free += 64 - (vol->nr_clusters & 63); 2497 nr_free += 64 - (vol->nr_clusters & 63);
2498 up_read(&vol->lcnbmp_lock); 2498 up_read(&vol->lcnbmp_lock);
2499 /* If errors occured we may well have gone below zero, fix this. */ 2499 /* If errors occurred we may well have gone below zero, fix this. */
2500 if (nr_free < 0) 2500 if (nr_free < 0)
2501 nr_free = 0; 2501 nr_free = 0;
2502 ntfs_debug("Exiting."); 2502 ntfs_debug("Exiting.");
@@ -2561,7 +2561,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
2561 } 2561 }
2562 ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.", 2562 ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.",
2563 index - 1); 2563 index - 1);
2564 /* If errors occured we may well have gone below zero, fix this. */ 2564 /* If errors occurred we may well have gone below zero, fix this. */
2565 if (nr_free < 0) 2565 if (nr_free < 0)
2566 nr_free = 0; 2566 nr_free = 0;
2567 ntfs_debug("Exiting."); 2567 ntfs_debug("Exiting.");
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index b27a0d86f8c5..48aa9c7401c7 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4519,7 +4519,7 @@ set_tail_append:
4519} 4519}
4520 4520
4521/* 4521/*
4522 * Helper function called at the begining of an insert. 4522 * Helper function called at the beginning of an insert.
4523 * 4523 *
4524 * This computes a few things that are commonly used in the process of 4524 * This computes a few things that are commonly used in the process of
4525 * inserting into the btree: 4525 * inserting into the btree:
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index eceb456037c1..75cf3ad987a6 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -71,7 +71,7 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level)
71 71
72/* 72/*
73 * Using a named enum representing lock types in terms of #N bit stored in 73 * Using a named enum representing lock types in terms of #N bit stored in
74 * iocb->private, which is going to be used for communication bewteen 74 * iocb->private, which is going to be used for communication between
75 * ocfs2_dio_end_io() and ocfs2_file_aio_write/read(). 75 * ocfs2_dio_end_io() and ocfs2_file_aio_write/read().
76 */ 76 */
77enum ocfs2_iocb_lock_bits { 77enum ocfs2_iocb_lock_bits {
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 2461eb3272ed..643720209a98 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -2275,7 +2275,7 @@ void o2hb_free_hb_set(struct config_group *group)
2275 kfree(hs); 2275 kfree(hs);
2276} 2276}
2277 2277
2278/* hb callback registration and issueing */ 2278/* hb callback registration and issuing */
2279 2279
2280static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type) 2280static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type)
2281{ 2281{
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index a87366750f23..8f9cea1597af 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -89,7 +89,7 @@ static void o2quo_fence_self(void)
89 }; 89 };
90} 90}
91 91
92/* Indicate that a timeout occured on a hearbeat region write. The 92/* Indicate that a timeout occurred on a hearbeat region write. The
93 * other nodes in the cluster may consider us dead at that time so we 93 * other nodes in the cluster may consider us dead at that time so we
94 * want to "fence" ourselves so that we don't scribble on the disk 94 * want to "fence" ourselves so that we don't scribble on the disk
95 * after they think they've recovered us. This can't solve all 95 * after they think they've recovered us. This can't solve all
@@ -261,7 +261,7 @@ void o2quo_hb_still_up(u8 node)
261 spin_unlock(&qs->qs_lock); 261 spin_unlock(&qs->qs_lock);
262} 262}
263 263
264/* This is analagous to hb_up. as a node's connection comes up we delay the 264/* This is analogous to hb_up. as a node's connection comes up we delay the
265 * quorum decision until we see it heartbeating. the hold will be droped in 265 * quorum decision until we see it heartbeating. the hold will be droped in
266 * hb_up or hb_down. it might be perpetuated by con_err until hb_down. if 266 * hb_up or hb_down. it might be perpetuated by con_err until hb_down. if
267 * it's already heartbeating we we might be dropping a hold that conn_up got. 267 * it's already heartbeating we we might be dropping a hold that conn_up got.
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index ee04ff5ee603..db5ee4b4f47a 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -565,7 +565,7 @@ static void o2net_set_nn_state(struct o2net_node *nn,
565 * the work queue actually being up. */ 565 * the work queue actually being up. */
566 if (!valid && o2net_wq) { 566 if (!valid && o2net_wq) {
567 unsigned long delay; 567 unsigned long delay;
568 /* delay if we're withing a RECONNECT_DELAY of the 568 /* delay if we're within a RECONNECT_DELAY of the
569 * last attempt */ 569 * last attempt */
570 delay = (nn->nn_last_connect_attempt + 570 delay = (nn->nn_last_connect_attempt +
571 msecs_to_jiffies(o2net_reconnect_delay())) 571 msecs_to_jiffies(o2net_reconnect_delay()))
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 9d67610dfc74..fede57ed005f 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -808,7 +808,7 @@ lookup:
808 dlm_mle_detach_hb_events(dlm, mle); 808 dlm_mle_detach_hb_events(dlm, mle);
809 dlm_put_mle(mle); 809 dlm_put_mle(mle);
810 mle = NULL; 810 mle = NULL;
811 /* this is lame, but we cant wait on either 811 /* this is lame, but we can't wait on either
812 * the mle or lockres waitqueue here */ 812 * the mle or lockres waitqueue here */
813 if (mig) 813 if (mig)
814 msleep(100); 814 msleep(100);
@@ -843,7 +843,7 @@ lookup:
843 843
844 /* finally add the lockres to its hash bucket */ 844 /* finally add the lockres to its hash bucket */
845 __dlm_insert_lockres(dlm, res); 845 __dlm_insert_lockres(dlm, res);
846 /* since this lockres is new it doesnt not require the spinlock */ 846 /* since this lockres is new it doesn't not require the spinlock */
847 dlm_lockres_grab_inflight_ref_new(dlm, res); 847 dlm_lockres_grab_inflight_ref_new(dlm, res);
848 848
849 /* if this node does not become the master make sure to drop 849 /* if this node does not become the master make sure to drop
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 177d3a6c2a5f..b4c8bb6b8d28 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -835,7 +835,7 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode)
835 835
836 /* If we have allowd wipe of this inode for another node, it 836 /* If we have allowd wipe of this inode for another node, it
837 * will be marked here so we can safely skip it. Recovery will 837 * will be marked here so we can safely skip it. Recovery will
838 * cleanup any inodes we might inadvertantly skip here. */ 838 * cleanup any inodes we might inadvertently skip here. */
839 if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE) 839 if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE)
840 goto bail_unlock; 840 goto bail_unlock;
841 841
@@ -917,7 +917,7 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
917 * the inode open lock in ocfs2_read_locked_inode(). When we 917 * the inode open lock in ocfs2_read_locked_inode(). When we
918 * get to ->delete_inode(), each node tries to convert it's 918 * get to ->delete_inode(), each node tries to convert it's
919 * lock to an exclusive. Trylocks are serialized by the inode 919 * lock to an exclusive. Trylocks are serialized by the inode
920 * meta data lock. If the upconvert suceeds, we know the inode 920 * meta data lock. If the upconvert succeeds, we know the inode
921 * is no longer live and can be deleted. 921 * is no longer live and can be deleted.
922 * 922 *
923 * Though we call this with the meta data lock held, the 923 * Though we call this with the meta data lock held, the
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index dcc2d9327150..b141a44605ca 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1368,7 +1368,7 @@ skip_recovery:
1368 mlog_errno(status); 1368 mlog_errno(status);
1369 1369
1370 /* Now it is right time to recover quotas... We have to do this under 1370 /* Now it is right time to recover quotas... We have to do this under
1371 * superblock lock so that noone can start using the slot (and crash) 1371 * superblock lock so that no one can start using the slot (and crash)
1372 * before we recover it */ 1372 * before we recover it */
1373 for (i = 0; i < rm_quota_used; i++) { 1373 for (i = 0; i < rm_quota_used; i++) {
1374 qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]); 1374 qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 6180da1e37e6..68cf2f6d3c6a 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -215,7 +215,7 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
215 /* WARNING: This only kicks off a single 215 /* WARNING: This only kicks off a single
216 * checkpoint. If someone races you and adds more 216 * checkpoint. If someone races you and adds more
217 * metadata to the journal, you won't know, and will 217 * metadata to the journal, you won't know, and will
218 * wind up waiting *alot* longer than necessary. Right 218 * wind up waiting *a lot* longer than necessary. Right
219 * now we only use this in clear_inode so that's 219 * now we only use this in clear_inode so that's
220 * OK. */ 220 * OK. */
221 ocfs2_start_checkpoint(osb); 221 ocfs2_start_checkpoint(osb);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 28f2cc1080d8..e5d738cd9cc0 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2128,7 +2128,7 @@ leave:
2128} 2128}
2129 2129
2130/** 2130/**
2131 * ocfs2_prep_new_orphaned_file() - Prepare the orphan dir to recieve a newly 2131 * ocfs2_prep_new_orphaned_file() - Prepare the orphan dir to receive a newly
2132 * allocated file. This is different from the typical 'add to orphan dir' 2132 * allocated file. This is different from the typical 'add to orphan dir'
2133 * operation in that the inode does not yet exist. This is a problem because 2133 * operation in that the inode does not yet exist. This is a problem because
2134 * the orphan dir stringifies the inode block number to come up with it's 2134 * the orphan dir stringifies the inode block number to come up with it's
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index bf2e7764920e..b68f87a83924 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -441,7 +441,7 @@ static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = {
441struct ocfs2_block_check { 441struct ocfs2_block_check {
442/*00*/ __le32 bc_crc32e; /* 802.3 Ethernet II CRC32 */ 442/*00*/ __le32 bc_crc32e; /* 802.3 Ethernet II CRC32 */
443 __le16 bc_ecc; /* Single-error-correction parity vector. 443 __le16 bc_ecc; /* Single-error-correction parity vector.
444 This is a simple Hamming code dependant 444 This is a simple Hamming code dependent
445 on the blocksize. OCFS2's maximum 445 on the blocksize. OCFS2's maximum
446 blocksize, 4K, requires 16 parity bits, 446 blocksize, 4K, requires 16 parity bits,
447 so we fit in __le16. */ 447 so we fit in __le16. */
@@ -750,7 +750,7 @@ struct ocfs2_dinode {
750 after an unclean 750 after an unclean
751 shutdown */ 751 shutdown */
752 } journal1; 752 } journal1;
753 } id1; /* Inode type dependant 1 */ 753 } id1; /* Inode type dependent 1 */
754/*C0*/ union { 754/*C0*/ union {
755 struct ocfs2_super_block i_super; 755 struct ocfs2_super_block i_super;
756 struct ocfs2_local_alloc i_lab; 756 struct ocfs2_local_alloc i_lab;
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 279aef68025b..92fcd575775a 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -556,7 +556,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
556 spin_unlock(&dq_data_lock); 556 spin_unlock(&dq_data_lock);
557 err = ocfs2_qinfo_lock(info, freeing); 557 err = ocfs2_qinfo_lock(info, freeing);
558 if (err < 0) { 558 if (err < 0) {
559 mlog(ML_ERROR, "Failed to lock quota info, loosing quota write" 559 mlog(ML_ERROR, "Failed to lock quota info, losing quota write"
560 " (type=%d, id=%u)\n", dquot->dq_type, 560 " (type=%d, id=%u)\n", dquot->dq_type,
561 (unsigned)dquot->dq_id); 561 (unsigned)dquot->dq_id);
562 goto out; 562 goto out;
diff --git a/fs/ocfs2/reservations.h b/fs/ocfs2/reservations.h
index 1e49cc29d06c..42c2b804f3fd 100644
--- a/fs/ocfs2/reservations.h
+++ b/fs/ocfs2/reservations.h
@@ -29,7 +29,7 @@
29struct ocfs2_alloc_reservation { 29struct ocfs2_alloc_reservation {
30 struct rb_node r_node; 30 struct rb_node r_node;
31 31
32 unsigned int r_start; /* Begining of current window */ 32 unsigned int r_start; /* Beginning of current window */
33 unsigned int r_len; /* Length of the window */ 33 unsigned int r_len; /* Length of the window */
34 34
35 unsigned int r_last_len; /* Length of most recent alloc */ 35 unsigned int r_last_len; /* Length of most recent alloc */
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index 8ce7398ae1d2..1ec56fdb8d0d 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -126,7 +126,7 @@ struct ocfs2_stack_operations {
126 * 126 *
127 * ->connect() must not return until it is guaranteed that 127 * ->connect() must not return until it is guaranteed that
128 * 128 *
129 * - Node down notifications for the filesystem will be recieved 129 * - Node down notifications for the filesystem will be received
130 * and passed to conn->cc_recovery_handler(). 130 * and passed to conn->cc_recovery_handler().
131 * - Locking requests for the filesystem will be processed. 131 * - Locking requests for the filesystem will be processed.
132 */ 132 */
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index ab6e2061074f..ba5d97e4a73e 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1511,7 +1511,7 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1511 max_bits = le16_to_cpu(gd->bg_bits); 1511 max_bits = le16_to_cpu(gd->bg_bits);
1512 1512
1513 /* Tail groups in cluster bitmaps which aren't cpg 1513 /* Tail groups in cluster bitmaps which aren't cpg
1514 * aligned are prone to partial extention by a failed 1514 * aligned are prone to partial extension by a failed
1515 * fs resize. If the file system resize never got to 1515 * fs resize. If the file system resize never got to
1516 * update the dinode cluster count, then we don't want 1516 * update the dinode cluster count, then we don't want
1517 * to trust any clusters past it, regardless of what 1517 * to trust any clusters past it, regardless of what
@@ -2459,7 +2459,7 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
2459 /* The alloc_bh comes from ocfs2_free_dinode() or 2459 /* The alloc_bh comes from ocfs2_free_dinode() or
2460 * ocfs2_free_clusters(). The callers have all locked the 2460 * ocfs2_free_clusters(). The callers have all locked the
2461 * allocator and gotten alloc_bh from the lock call. This 2461 * allocator and gotten alloc_bh from the lock call. This
2462 * validates the dinode buffer. Any corruption that has happended 2462 * validates the dinode buffer. Any corruption that has happened
2463 * is a code bug. */ 2463 * is a code bug. */
2464 BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); 2464 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
2465 BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); 2465 BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 69fa11b35aa4..5a521c748859 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -78,7 +78,7 @@ static struct kmem_cache *ocfs2_inode_cachep = NULL;
78struct kmem_cache *ocfs2_dquot_cachep; 78struct kmem_cache *ocfs2_dquot_cachep;
79struct kmem_cache *ocfs2_qf_chunk_cachep; 79struct kmem_cache *ocfs2_qf_chunk_cachep;
80 80
81/* OCFS2 needs to schedule several differnt types of work which 81/* OCFS2 needs to schedule several different types of work which
82 * require cluster locking, disk I/O, recovery waits, etc. Since these 82 * require cluster locking, disk I/O, recovery waits, etc. Since these
83 * types of work tend to be heavy we avoid using the kernel events 83 * types of work tend to be heavy we avoid using the kernel events
84 * workqueue and schedule on our own. */ 84 * workqueue and schedule on our own. */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 57a215dc2d9b..81ecf9c0bf0a 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3554,7 +3554,7 @@ int ocfs2_xattr_set(struct inode *inode,
3554 down_write(&OCFS2_I(inode)->ip_xattr_sem); 3554 down_write(&OCFS2_I(inode)->ip_xattr_sem);
3555 /* 3555 /*
3556 * Scan inode and external block to find the same name 3556 * Scan inode and external block to find the same name
3557 * extended attribute and collect search infomation. 3557 * extended attribute and collect search information.
3558 */ 3558 */
3559 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); 3559 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3560 if (ret) 3560 if (ret)
@@ -3578,7 +3578,7 @@ int ocfs2_xattr_set(struct inode *inode,
3578 goto cleanup; 3578 goto cleanup;
3579 } 3579 }
3580 3580
3581 /* Check whether the value is refcounted and do some prepartion. */ 3581 /* Check whether the value is refcounted and do some preparation. */
3582 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL && 3582 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
3583 (!xis.not_found || !xbs.not_found)) { 3583 (!xis.not_found || !xbs.not_found)) {
3584 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi, 3584 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index ac546975031f..d545e97d99c3 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -500,7 +500,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
500 /* everything is up and running, commence */ 500 /* everything is up and running, commence */
501 rcu_assign_pointer(ptbl->part[partno], p); 501 rcu_assign_pointer(ptbl->part[partno], p);
502 502
503 /* suppress uevent if the disk supresses it */ 503 /* suppress uevent if the disk suppresses it */
504 if (!dev_get_uevent_suppress(ddev)) 504 if (!dev_get_uevent_suppress(ddev))
505 kobject_uevent(&pdev->kobj, KOBJ_ADD); 505 kobject_uevent(&pdev->kobj, KOBJ_ADD);
506 506
@@ -585,7 +585,7 @@ rescan:
585 /* 585 /*
586 * If any partition code tried to read beyond EOD, try 586 * If any partition code tried to read beyond EOD, try
587 * unlocking native capacity even if partition table is 587 * unlocking native capacity even if partition table is
588 * sucessfully read as we could be missing some partitions. 588 * successfully read as we could be missing some partitions.
589 */ 589 */
590 if (state->access_beyond_eod) { 590 if (state->access_beyond_eod) {
591 printk(KERN_WARNING 591 printk(KERN_WARNING
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index b10e3540d5b7..ce4f62440425 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -1299,6 +1299,11 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
1299 1299
1300 BUG_ON (!data || !frags); 1300 BUG_ON (!data || !frags);
1301 1301
1302 if (size < 2 * VBLK_SIZE_HEAD) {
1303 ldm_error("Value of size is to small.");
1304 return false;
1305 }
1306
1302 group = get_unaligned_be32(data + 0x08); 1307 group = get_unaligned_be32(data + 0x08);
1303 rec = get_unaligned_be16(data + 0x0C); 1308 rec = get_unaligned_be16(data + 0x0C);
1304 num = get_unaligned_be16(data + 0x0E); 1309 num = get_unaligned_be16(data + 0x0E);
@@ -1306,6 +1311,10 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
1306 ldm_error ("A VBLK claims to have %d parts.", num); 1311 ldm_error ("A VBLK claims to have %d parts.", num);
1307 return false; 1312 return false;
1308 } 1313 }
1314 if (rec >= num) {
1315 ldm_error("REC value (%d) exceeds NUM value (%d)", rec, num);
1316 return false;
1317 }
1309 1318
1310 list_for_each (item, frags) { 1319 list_for_each (item, frags) {
1311 f = list_entry (item, struct frag, list); 1320 f = list_entry (item, struct frag, list);
@@ -1334,10 +1343,9 @@ found:
1334 1343
1335 f->map |= (1 << rec); 1344 f->map |= (1 << rec);
1336 1345
1337 if (num > 0) { 1346 data += VBLK_SIZE_HEAD;
1338 data += VBLK_SIZE_HEAD; 1347 size -= VBLK_SIZE_HEAD;
1339 size -= VBLK_SIZE_HEAD; 1348
1340 }
1341 memcpy (f->data+rec*(size-VBLK_SIZE_HEAD)+VBLK_SIZE_HEAD, data, size); 1349 memcpy (f->data+rec*(size-VBLK_SIZE_HEAD)+VBLK_SIZE_HEAD, data, size);
1342 1350
1343 return true; 1351 return true;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5a670c11aeac..dfa532730e55 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -220,7 +220,7 @@ static struct mm_struct *__check_mem_permission(struct task_struct *task)
220 } 220 }
221 221
222 /* 222 /*
223 * Noone else is allowed. 223 * No one else is allowed.
224 */ 224 */
225 mmput(mm); 225 mmput(mm);
226 return ERR_PTR(-EPERM); 226 return ERR_PTR(-EPERM);
@@ -3124,11 +3124,16 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi
3124/* for the /proc/ directory itself, after non-process stuff has been done */ 3124/* for the /proc/ directory itself, after non-process stuff has been done */
3125int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 3125int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
3126{ 3126{
3127 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; 3127 unsigned int nr;
3128 struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode); 3128 struct task_struct *reaper;
3129 struct tgid_iter iter; 3129 struct tgid_iter iter;
3130 struct pid_namespace *ns; 3130 struct pid_namespace *ns;
3131 3131
3132 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
3133 goto out_no_task;
3134 nr = filp->f_pos - FIRST_PROCESS_ENTRY;
3135
3136 reaper = get_proc_task(filp->f_path.dentry->d_inode);
3132 if (!reaper) 3137 if (!reaper)
3133 goto out_no_task; 3138 goto out_no_task;
3134 3139
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index 867d0ac026ce..8007ae7c0d8c 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -1,5 +1,5 @@
1config PSTORE 1config PSTORE
2 bool "Persistant store support" 2 bool "Persistent store support"
3 default n 3 default n
4 help 4 help
5 This option enables generic access to platform level 5 This option enables generic access to platform level
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index fcc8ae75d874..d3c032f5fa0a 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -442,7 +442,7 @@ EXPORT_SYMBOL(dquot_acquire);
442 */ 442 */
443int dquot_commit(struct dquot *dquot) 443int dquot_commit(struct dquot *dquot)
444{ 444{
445 int ret = 0, ret2 = 0; 445 int ret = 0;
446 struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); 446 struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
447 447
448 mutex_lock(&dqopt->dqio_mutex); 448 mutex_lock(&dqopt->dqio_mutex);
@@ -454,15 +454,10 @@ int dquot_commit(struct dquot *dquot)
454 spin_unlock(&dq_list_lock); 454 spin_unlock(&dq_list_lock);
455 /* Inactive dquot can be only if there was error during read/init 455 /* Inactive dquot can be only if there was error during read/init
456 * => we have better not writing it */ 456 * => we have better not writing it */
457 if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { 457 if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
458 ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); 458 ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot);
459 if (info_dirty(&dqopt->info[dquot->dq_type])) { 459 else
460 ret2 = dqopt->ops[dquot->dq_type]->write_file_info( 460 ret = -EIO;
461 dquot->dq_sb, dquot->dq_type);
462 }
463 if (ret >= 0)
464 ret = ret2;
465 }
466out_sem: 461out_sem:
467 mutex_unlock(&dqopt->dqio_mutex); 462 mutex_unlock(&dqopt->dqio_mutex);
468 return ret; 463 return ret;
@@ -956,7 +951,7 @@ static inline int dqput_blocks(struct dquot *dquot)
956 951
957/* 952/*
958 * Remove references to dquots from inode and add dquot to list for freeing 953 * Remove references to dquots from inode and add dquot to list for freeing
959 * if we have the last referece to dquot 954 * if we have the last reference to dquot
960 * We can't race with anybody because we hold dqptr_sem for writing... 955 * We can't race with anybody because we hold dqptr_sem for writing...
961 */ 956 */
962static int remove_inode_dquot_ref(struct inode *inode, int type, 957static int remove_inode_dquot_ref(struct inode *inode, int type,
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 9eead2c796b7..fbb0b478a346 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -112,6 +112,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
112 SetPageDirty(page); 112 SetPageDirty(page);
113 113
114 unlock_page(page); 114 unlock_page(page);
115 put_page(page);
115 } 116 }
116 117
117 return 0; 118 return 0;
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c77514bd5776..c5e82ece7c6c 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1,7 +1,7 @@
1/* 1/*
2** Write ahead logging implementation copyright Chris Mason 2000 2** Write ahead logging implementation copyright Chris Mason 2000
3** 3**
4** The background commits make this code very interelated, and 4** The background commits make this code very interrelated, and
5** overly complex. I need to rethink things a bit....The major players: 5** overly complex. I need to rethink things a bit....The major players:
6** 6**
7** journal_begin -- call with the number of blocks you expect to log. 7** journal_begin -- call with the number of blocks you expect to log.
@@ -2725,7 +2725,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2725 REISERFS_DISK_OFFSET_IN_BYTES / 2725 REISERFS_DISK_OFFSET_IN_BYTES /
2726 sb->s_blocksize + 2); 2726 sb->s_blocksize + 2);
2727 2727
2728 /* Sanity check to see is the standard journal fitting withing first bitmap 2728 /* Sanity check to see is the standard journal fitting within first bitmap
2729 (actual for small blocksizes) */ 2729 (actual for small blocksizes) */
2730 if (!SB_ONDISK_JOURNAL_DEVICE(sb) && 2730 if (!SB_ONDISK_JOURNAL_DEVICE(sb) &&
2731 (SB_JOURNAL_1st_RESERVED_BLOCK(sb) + 2731 (SB_JOURNAL_1st_RESERVED_BLOCK(sb) +
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index b87aa2c1afc1..7df1ce48203a 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c
@@ -15,7 +15,7 @@
15 * for this mutex, no need for a system wide mutex facility. 15 * for this mutex, no need for a system wide mutex facility.
16 * 16 *
17 * Also this lock is often released before a call that could block because 17 * Also this lock is often released before a call that could block because
18 * reiserfs performances were partialy based on the release while schedule() 18 * reiserfs performances were partially based on the release while schedule()
19 * property of the Bkl. 19 * property of the Bkl.
20 */ 20 */
21void reiserfs_write_lock(struct super_block *s) 21void reiserfs_write_lock(struct super_block *s)
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 0aab04f46827..b216ff6be1c9 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -393,7 +393,7 @@ void add_save_link(struct reiserfs_transaction_handle *th,
393 /* body of "save" link */ 393 /* body of "save" link */
394 link = INODE_PKEY(inode)->k_dir_id; 394 link = INODE_PKEY(inode)->k_dir_id;
395 395
396 /* put "save" link inot tree, don't charge quota to anyone */ 396 /* put "save" link into tree, don't charge quota to anyone */
397 retval = 397 retval =
398 reiserfs_insert_item(th, &path, &key, &ih, NULL, (char *)&link); 398 reiserfs_insert_item(th, &path, &key, &ih, NULL, (char *)&link);
399 if (retval) { 399 if (retval) {
@@ -2104,7 +2104,7 @@ out:
2104 2104
2105/* Read data from quotafile - avoid pagecache and such because we cannot afford 2105/* Read data from quotafile - avoid pagecache and such because we cannot afford
2106 * acquiring the locks... As quota files are never truncated and quota code 2106 * acquiring the locks... As quota files are never truncated and quota code
2107 * itself serializes the operations (and noone else should touch the files) 2107 * itself serializes the operations (and no one else should touch the files)
2108 * we don't have to be afraid of races */ 2108 * we don't have to be afraid of races */
2109static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data, 2109static ssize_t reiserfs_quota_read(struct super_block *sb, int type, char *data,
2110 size_t len, loff_t off) 2110 size_t len, loff_t off)
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 5c11ca82b782..47d2a4498b03 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -396,7 +396,7 @@ static struct page *reiserfs_get_page(struct inode *dir, size_t n)
396 struct address_space *mapping = dir->i_mapping; 396 struct address_space *mapping = dir->i_mapping;
397 struct page *page; 397 struct page *page;
398 /* We can deadlock if we try to free dentries, 398 /* We can deadlock if we try to free dentries,
399 and an unlink/rmdir has just occured - GFP_NOFS avoids this */ 399 and an unlink/rmdir has just occurred - GFP_NOFS avoids this */
400 mapping_set_gfp_mask(mapping, GFP_NOFS); 400 mapping_set_gfp_mask(mapping, GFP_NOFS);
401 page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL); 401 page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL);
402 if (!IS_ERR(page)) { 402 if (!IS_ERR(page)) {
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 26b15ae34d6f..c37b520132ff 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -104,7 +104,7 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb,
104 entry = &cache->entry[i]; 104 entry = &cache->entry[i];
105 105
106 /* 106 /*
107 * Initialise choosen cache entry, and fill it in from 107 * Initialise chosen cache entry, and fill it in from
108 * disk. 108 * disk.
109 */ 109 */
110 cache->unused--; 110 cache->unused--;
@@ -286,7 +286,7 @@ cleanup:
286 286
287 287
288/* 288/*
289 * Copy upto length bytes from cache entry to buffer starting at offset bytes 289 * Copy up to length bytes from cache entry to buffer starting at offset bytes
290 * into the cache entry. If there's not length bytes then copy the number of 290 * into the cache entry. If there's not length bytes then copy the number of
291 * bytes available. In all cases return the number of bytes copied. 291 * bytes available. In all cases return the number of bytes copied.
292 */ 292 */
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
index d7440904be17..f8b0160da2da 100644
--- a/fs/ubifs/Kconfig
+++ b/fs/ubifs/Kconfig
@@ -47,7 +47,7 @@ config UBIFS_FS_DEBUG
47 bool "Enable debugging support" 47 bool "Enable debugging support"
48 depends on UBIFS_FS 48 depends on UBIFS_FS
49 select DEBUG_FS 49 select DEBUG_FS
50 select KALLSYMS_ALL 50 select KALLSYMS
51 help 51 help
52 This option enables UBIFS debugging support. It makes sure various 52 This option enables UBIFS debugging support. It makes sure various
53 assertions, self-checks, debugging messages and test modes are compiled 53 assertions, self-checks, debugging messages and test modes are compiled
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index c8ff0d1ae5d3..8b3a7da531eb 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -147,7 +147,7 @@ static int make_free_space(struct ubifs_info *c)
147 if (liab2 < liab1) 147 if (liab2 < liab1)
148 return -EAGAIN; 148 return -EAGAIN;
149 149
150 dbg_budg("new liability %lld (not shrinked)", liab2); 150 dbg_budg("new liability %lld (not shrunk)", liab2);
151 151
152 /* Liability did not shrink again, try GC */ 152 /* Liability did not shrink again, try GC */
153 dbg_budg("Run GC"); 153 dbg_budg("Run GC");
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index b148fbc80f8d..1bd01ded7123 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -577,7 +577,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
577 size_t sz; 577 size_t sz;
578 578
579 if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX)) 579 if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX))
580 goto out; 580 return 0;
581 581
582 INIT_LIST_HEAD(&list); 582 INIT_LIST_HEAD(&list);
583 583
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index f25a7339f800..004d3745dc45 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -972,11 +972,39 @@ void dbg_dump_index(struct ubifs_info *c)
972void dbg_save_space_info(struct ubifs_info *c) 972void dbg_save_space_info(struct ubifs_info *c)
973{ 973{
974 struct ubifs_debug_info *d = c->dbg; 974 struct ubifs_debug_info *d = c->dbg;
975 975 int freeable_cnt;
976 ubifs_get_lp_stats(c, &d->saved_lst);
977 976
978 spin_lock(&c->space_lock); 977 spin_lock(&c->space_lock);
978 memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats));
979
980 /*
981 * We use a dirty hack here and zero out @c->freeable_cnt, because it
982 * affects the free space calculations, and UBIFS might not know about
983 * all freeable eraseblocks. Indeed, we know about freeable eraseblocks
984 * only when we read their lprops, and we do this only lazily, upon the
985 * need. So at any given point of time @c->freeable_cnt might be not
986 * exactly accurate.
987 *
988 * Just one example about the issue we hit when we did not zero
989 * @c->freeable_cnt.
990 * 1. The file-system is mounted R/O, c->freeable_cnt is %0. We save the
991 * amount of free space in @d->saved_free
992 * 2. We re-mount R/W, which makes UBIFS to read the "lsave"
993 * information from flash, where we cache LEBs from various
994 * categories ('ubifs_remount_fs()' -> 'ubifs_lpt_init()'
995 * -> 'lpt_init_wr()' -> 'read_lsave()' -> 'ubifs_lpt_lookup()'
996 * -> 'ubifs_get_pnode()' -> 'update_cats()'
997 * -> 'ubifs_add_to_cat()').
998 * 3. Lsave contains a freeable eraseblock, and @c->freeable_cnt
999 * becomes %1.
1000 * 4. We calculate the amount of free space when the re-mount is
1001 * finished in 'dbg_check_space_info()' and it does not match
1002 * @d->saved_free.
1003 */
1004 freeable_cnt = c->freeable_cnt;
1005 c->freeable_cnt = 0;
979 d->saved_free = ubifs_get_free_space_nolock(c); 1006 d->saved_free = ubifs_get_free_space_nolock(c);
1007 c->freeable_cnt = freeable_cnt;
980 spin_unlock(&c->space_lock); 1008 spin_unlock(&c->space_lock);
981} 1009}
982 1010
@@ -993,12 +1021,15 @@ int dbg_check_space_info(struct ubifs_info *c)
993{ 1021{
994 struct ubifs_debug_info *d = c->dbg; 1022 struct ubifs_debug_info *d = c->dbg;
995 struct ubifs_lp_stats lst; 1023 struct ubifs_lp_stats lst;
996 long long avail, free; 1024 long long free;
1025 int freeable_cnt;
997 1026
998 spin_lock(&c->space_lock); 1027 spin_lock(&c->space_lock);
999 avail = ubifs_calc_available(c, c->min_idx_lebs); 1028 freeable_cnt = c->freeable_cnt;
1029 c->freeable_cnt = 0;
1030 free = ubifs_get_free_space_nolock(c);
1031 c->freeable_cnt = freeable_cnt;
1000 spin_unlock(&c->space_lock); 1032 spin_unlock(&c->space_lock);
1001 free = ubifs_get_free_space(c);
1002 1033
1003 if (free != d->saved_free) { 1034 if (free != d->saved_free) {
1004 ubifs_err("free space changed from %lld to %lld", 1035 ubifs_err("free space changed from %lld to %lld",
@@ -2806,40 +2837,38 @@ int dbg_debugfs_init_fs(struct ubifs_info *c)
2806 struct ubifs_debug_info *d = c->dbg; 2837 struct ubifs_debug_info *d = c->dbg;
2807 2838
2808 sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); 2839 sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
2809 d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir); 2840 fname = d->dfs_dir_name;
2810 if (IS_ERR(d->dfs_dir)) { 2841 dent = debugfs_create_dir(fname, dfs_rootdir);
2811 err = PTR_ERR(d->dfs_dir); 2842 if (IS_ERR_OR_NULL(dent))
2812 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
2813 d->dfs_dir_name, err);
2814 goto out; 2843 goto out;
2815 } 2844 d->dfs_dir = dent;
2816 2845
2817 fname = "dump_lprops"; 2846 fname = "dump_lprops";
2818 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); 2847 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
2819 if (IS_ERR(dent)) 2848 if (IS_ERR_OR_NULL(dent))
2820 goto out_remove; 2849 goto out_remove;
2821 d->dfs_dump_lprops = dent; 2850 d->dfs_dump_lprops = dent;
2822 2851
2823 fname = "dump_budg"; 2852 fname = "dump_budg";
2824 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); 2853 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
2825 if (IS_ERR(dent)) 2854 if (IS_ERR_OR_NULL(dent))
2826 goto out_remove; 2855 goto out_remove;
2827 d->dfs_dump_budg = dent; 2856 d->dfs_dump_budg = dent;
2828 2857
2829 fname = "dump_tnc"; 2858 fname = "dump_tnc";
2830 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); 2859 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
2831 if (IS_ERR(dent)) 2860 if (IS_ERR_OR_NULL(dent))
2832 goto out_remove; 2861 goto out_remove;
2833 d->dfs_dump_tnc = dent; 2862 d->dfs_dump_tnc = dent;
2834 2863
2835 return 0; 2864 return 0;
2836 2865
2837out_remove: 2866out_remove:
2838 err = PTR_ERR(dent);
2839 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
2840 fname, err);
2841 debugfs_remove_recursive(d->dfs_dir); 2867 debugfs_remove_recursive(d->dfs_dir);
2842out: 2868out:
2869 err = dent ? PTR_ERR(dent) : -ENODEV;
2870 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
2871 fname, err);
2843 return err; 2872 return err;
2844} 2873}
2845 2874
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 919f0de29d8f..e6493cac193d 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -23,6 +23,12 @@
23#ifndef __UBIFS_DEBUG_H__ 23#ifndef __UBIFS_DEBUG_H__
24#define __UBIFS_DEBUG_H__ 24#define __UBIFS_DEBUG_H__
25 25
26/* Checking helper functions */
27typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
28 struct ubifs_zbranch *zbr, void *priv);
29typedef int (*dbg_znode_callback)(struct ubifs_info *c,
30 struct ubifs_znode *znode, void *priv);
31
26#ifdef CONFIG_UBIFS_FS_DEBUG 32#ifdef CONFIG_UBIFS_FS_DEBUG
27 33
28/** 34/**
@@ -270,11 +276,6 @@ void dbg_dump_tnc(struct ubifs_info *c);
270void dbg_dump_index(struct ubifs_info *c); 276void dbg_dump_index(struct ubifs_info *c);
271void dbg_dump_lpt_lebs(const struct ubifs_info *c); 277void dbg_dump_lpt_lebs(const struct ubifs_info *c);
272 278
273/* Checking helper functions */
274typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
275 struct ubifs_zbranch *zbr, void *priv);
276typedef int (*dbg_znode_callback)(struct ubifs_info *c,
277 struct ubifs_znode *znode, void *priv);
278int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, 279int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
279 dbg_znode_callback znode_cb, void *priv); 280 dbg_znode_callback znode_cb, void *priv);
280 281
@@ -295,7 +296,6 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size);
295int dbg_check_filesystem(struct ubifs_info *c); 296int dbg_check_filesystem(struct ubifs_info *c);
296void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, 297void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
297 int add_pos); 298 int add_pos);
298int dbg_check_lprops(struct ubifs_info *c);
299int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, 299int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
300 int row, int col); 300 int row, int col);
301int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, 301int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
@@ -401,58 +401,94 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
401#define DBGKEY(key) ((char *)(key)) 401#define DBGKEY(key) ((char *)(key))
402#define DBGKEY1(key) ((char *)(key)) 402#define DBGKEY1(key) ((char *)(key))
403 403
404#define ubifs_debugging_init(c) 0 404static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; }
405#define ubifs_debugging_exit(c) ({}) 405static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; }
406 406static inline const char *dbg_ntype(int type) { return ""; }
407#define dbg_ntype(type) "" 407static inline const char *dbg_cstate(int cmt_state) { return ""; }
408#define dbg_cstate(cmt_state) "" 408static inline const char *dbg_jhead(int jhead) { return ""; }
409#define dbg_jhead(jhead) "" 409static inline const char *
410#define dbg_get_key_dump(c, key) ({}) 410dbg_get_key_dump(const struct ubifs_info *c,
411#define dbg_dump_inode(c, inode) ({}) 411 const union ubifs_key *key) { return ""; }
412#define dbg_dump_node(c, node) ({}) 412static inline void dbg_dump_inode(const struct ubifs_info *c,
413#define dbg_dump_lpt_node(c, node, lnum, offs) ({}) 413 const struct inode *inode) { return; }
414#define dbg_dump_budget_req(req) ({}) 414static inline void dbg_dump_node(const struct ubifs_info *c,
415#define dbg_dump_lstats(lst) ({}) 415 const void *node) { return; }
416#define dbg_dump_budg(c) ({}) 416static inline void dbg_dump_lpt_node(const struct ubifs_info *c,
417#define dbg_dump_lprop(c, lp) ({}) 417 void *node, int lnum,
418#define dbg_dump_lprops(c) ({}) 418 int offs) { return; }
419#define dbg_dump_lpt_info(c) ({}) 419static inline void
420#define dbg_dump_leb(c, lnum) ({}) 420dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; }
421#define dbg_dump_znode(c, znode) ({}) 421static inline void
422#define dbg_dump_heap(c, heap, cat) ({}) 422dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; }
423#define dbg_dump_pnode(c, pnode, parent, iip) ({}) 423static inline void dbg_dump_budg(struct ubifs_info *c) { return; }
424#define dbg_dump_tnc(c) ({}) 424static inline void dbg_dump_lprop(const struct ubifs_info *c,
425#define dbg_dump_index(c) ({}) 425 const struct ubifs_lprops *lp) { return; }
426#define dbg_dump_lpt_lebs(c) ({}) 426static inline void dbg_dump_lprops(struct ubifs_info *c) { return; }
427 427static inline void dbg_dump_lpt_info(struct ubifs_info *c) { return; }
428#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 428static inline void dbg_dump_leb(const struct ubifs_info *c,
429#define dbg_old_index_check_init(c, zroot) 0 429 int lnum) { return; }
430#define dbg_save_space_info(c) ({}) 430static inline void
431#define dbg_check_space_info(c) 0 431dbg_dump_znode(const struct ubifs_info *c,
432#define dbg_check_old_index(c, zroot) 0 432 const struct ubifs_znode *znode) { return; }
433#define dbg_check_cats(c) 0 433static inline void dbg_dump_heap(struct ubifs_info *c,
434#define dbg_check_ltab(c) 0 434 struct ubifs_lpt_heap *heap,
435#define dbg_chk_lpt_free_spc(c) 0 435 int cat) { return; }
436#define dbg_chk_lpt_sz(c, action, len) 0 436static inline void dbg_dump_pnode(struct ubifs_info *c,
437#define dbg_check_synced_i_size(inode) 0 437 struct ubifs_pnode *pnode,
438#define dbg_check_dir_size(c, dir) 0 438 struct ubifs_nnode *parent,
439#define dbg_check_tnc(c, x) 0 439 int iip) { return; }
440#define dbg_check_idx_size(c, idx_size) 0 440static inline void dbg_dump_tnc(struct ubifs_info *c) { return; }
441#define dbg_check_filesystem(c) 0 441static inline void dbg_dump_index(struct ubifs_info *c) { return; }
442#define dbg_check_heap(c, heap, cat, add_pos) ({}) 442static inline void dbg_dump_lpt_lebs(const struct ubifs_info *c) { return; }
443#define dbg_check_lprops(c) 0 443
444#define dbg_check_lpt_nodes(c, cnode, row, col) 0 444static inline int dbg_walk_index(struct ubifs_info *c,
445#define dbg_check_inode_size(c, inode, size) 0 445 dbg_leaf_callback leaf_cb,
446#define dbg_check_data_nodes_order(c, head) 0 446 dbg_znode_callback znode_cb,
447#define dbg_check_nondata_nodes_order(c, head) 0 447 void *priv) { return 0; }
448#define dbg_force_in_the_gaps_enabled 0 448static inline void dbg_save_space_info(struct ubifs_info *c) { return; }
449#define dbg_force_in_the_gaps() 0 449static inline int dbg_check_space_info(struct ubifs_info *c) { return 0; }
450#define dbg_failure_mode 0 450static inline int dbg_check_lprops(struct ubifs_info *c) { return 0; }
451 451static inline int
452#define dbg_debugfs_init() 0 452dbg_old_index_check_init(struct ubifs_info *c,
453#define dbg_debugfs_exit() 453 struct ubifs_zbranch *zroot) { return 0; }
454#define dbg_debugfs_init_fs(c) 0 454static inline int
455#define dbg_debugfs_exit_fs(c) 0 455dbg_check_old_index(struct ubifs_info *c,
456 struct ubifs_zbranch *zroot) { return 0; }
457static inline int dbg_check_cats(struct ubifs_info *c) { return 0; }
458static inline int dbg_check_ltab(struct ubifs_info *c) { return 0; }
459static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c) { return 0; }
460static inline int dbg_chk_lpt_sz(struct ubifs_info *c,
461 int action, int len) { return 0; }
462static inline int dbg_check_synced_i_size(struct inode *inode) { return 0; }
463static inline int dbg_check_dir_size(struct ubifs_info *c,
464 const struct inode *dir) { return 0; }
465static inline int dbg_check_tnc(struct ubifs_info *c, int extra) { return 0; }
466static inline int dbg_check_idx_size(struct ubifs_info *c,
467 long long idx_size) { return 0; }
468static inline int dbg_check_filesystem(struct ubifs_info *c) { return 0; }
469static inline void dbg_check_heap(struct ubifs_info *c,
470 struct ubifs_lpt_heap *heap,
471 int cat, int add_pos) { return; }
472static inline int dbg_check_lpt_nodes(struct ubifs_info *c,
473 struct ubifs_cnode *cnode, int row, int col) { return 0; }
474static inline int dbg_check_inode_size(struct ubifs_info *c,
475 const struct inode *inode,
476 loff_t size) { return 0; }
477static inline int
478dbg_check_data_nodes_order(struct ubifs_info *c,
479 struct list_head *head) { return 0; }
480static inline int
481dbg_check_nondata_nodes_order(struct ubifs_info *c,
482 struct list_head *head) { return 0; }
483
484static inline int dbg_force_in_the_gaps(void) { return 0; }
485#define dbg_force_in_the_gaps_enabled 0
486#define dbg_failure_mode 0
487
488static inline int dbg_debugfs_init(void) { return 0; }
489static inline void dbg_debugfs_exit(void) { return; }
490static inline int dbg_debugfs_init_fs(struct ubifs_info *c) { return 0; }
491static inline int dbg_debugfs_exit_fs(struct ubifs_info *c) { return 0; }
456 492
457#endif /* !CONFIG_UBIFS_FS_DEBUG */ 493#endif /* !CONFIG_UBIFS_FS_DEBUG */
458#endif /* !__UBIFS_DEBUG_H__ */ 494#endif /* !__UBIFS_DEBUG_H__ */
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 28be1e6a65e8..b286db79c686 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1312,6 +1312,9 @@ int ubifs_fsync(struct file *file, int datasync)
1312 1312
1313 dbg_gen("syncing inode %lu", inode->i_ino); 1313 dbg_gen("syncing inode %lu", inode->i_ino);
1314 1314
1315 if (inode->i_sb->s_flags & MS_RDONLY)
1316 return 0;
1317
1315 /* 1318 /*
1316 * VFS has already synchronized dirty pages for this inode. Synchronize 1319 * VFS has already synchronized dirty pages for this inode. Synchronize
1317 * the inode unless this is a 'datasync()' call. 1320 * the inode unless this is a 'datasync()' call.
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 72775d35b99e..ef5155e109a2 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -1270,10 +1270,9 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
1270 lnum = branch->lnum; 1270 lnum = branch->lnum;
1271 offs = branch->offs; 1271 offs = branch->offs;
1272 pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS); 1272 pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS);
1273 if (!pnode) { 1273 if (!pnode)
1274 err = -ENOMEM; 1274 return -ENOMEM;
1275 goto out; 1275
1276 }
1277 if (lnum == 0) { 1276 if (lnum == 0) {
1278 /* 1277 /*
1279 * This pnode was not written which just means that the LEB 1278 * This pnode was not written which just means that the LEB
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 6ddd9973e681..c75f6133206c 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1568,6 +1568,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1568 mutex_lock(&c->umount_mutex); 1568 mutex_lock(&c->umount_mutex);
1569 dbg_save_space_info(c); 1569 dbg_save_space_info(c);
1570 c->remounting_rw = 1; 1570 c->remounting_rw = 1;
1571 c->ro_mount = 0;
1571 1572
1572 err = check_free_space(c); 1573 err = check_free_space(c);
1573 if (err) 1574 if (err)
@@ -1676,13 +1677,13 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1676 } 1677 }
1677 1678
1678 dbg_gen("re-mounted read-write"); 1679 dbg_gen("re-mounted read-write");
1679 c->ro_mount = 0;
1680 c->remounting_rw = 0; 1680 c->remounting_rw = 0;
1681 err = dbg_check_space_info(c); 1681 err = dbg_check_space_info(c);
1682 mutex_unlock(&c->umount_mutex); 1682 mutex_unlock(&c->umount_mutex);
1683 return err; 1683 return err;
1684 1684
1685out: 1685out:
1686 c->ro_mount = 1;
1686 vfree(c->orph_buf); 1687 vfree(c->orph_buf);
1687 c->orph_buf = NULL; 1688 c->orph_buf = NULL;
1688 if (c->bgt) { 1689 if (c->bgt) {
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index c74400f88fe0..3299f469e712 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -56,6 +56,7 @@
56 */ 56 */
57 57
58#include "ubifs.h" 58#include "ubifs.h"
59#include <linux/fs.h>
59#include <linux/slab.h> 60#include <linux/slab.h>
60#include <linux/xattr.h> 61#include <linux/xattr.h>
61#include <linux/posix_acl_xattr.h> 62#include <linux/posix_acl_xattr.h>
@@ -80,7 +81,6 @@ enum {
80}; 81};
81 82
82static const struct inode_operations none_inode_operations; 83static const struct inode_operations none_inode_operations;
83static const struct address_space_operations none_address_operations;
84static const struct file_operations none_file_operations; 84static const struct file_operations none_file_operations;
85 85
86/** 86/**
@@ -130,7 +130,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
130 } 130 }
131 131
132 /* Re-define all operations to be "nothing" */ 132 /* Re-define all operations to be "nothing" */
133 inode->i_mapping->a_ops = &none_address_operations; 133 inode->i_mapping->a_ops = &empty_aops;
134 inode->i_op = &none_inode_operations; 134 inode->i_op = &none_inode_operations;
135 inode->i_fop = &none_file_operations; 135 inode->i_fop = &none_file_operations;
136 136
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 27a4babe7df0..e765743cf9f3 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -78,7 +78,7 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off
78 78
79/* 79/*
80 * Returns the location of the fragment from 80 * Returns the location of the fragment from
81 * the begining of the filesystem. 81 * the beginning of the filesystem.
82 */ 82 */
83 83
84static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock) 84static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock)
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 7693d6293404..3915ade6f9a8 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -483,9 +483,9 @@ static int ufs_parse_options (char * options, unsigned * mount_options)
483} 483}
484 484
485/* 485/*
486 * Diffrent types of UFS hold fs_cstotal in different 486 * Different types of UFS hold fs_cstotal in different
487 * places, and use diffrent data structure for it. 487 * places, and use different data structure for it.
488 * To make things simplier we just copy fs_cstotal to ufs_sb_private_info 488 * To make things simpler we just copy fs_cstotal to ufs_sb_private_info
489 */ 489 */
490static void ufs_setup_cstotal(struct super_block *sb) 490static void ufs_setup_cstotal(struct super_block *sb)
491{ 491{
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 11014302c9ca..5f821dbc0579 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -479,7 +479,6 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
479 break; 479 break;
480 if (IS_SYNC(inode) && (inode->i_state & I_DIRTY)) 480 if (IS_SYNC(inode) && (inode->i_state & I_DIRTY))
481 ufs_sync_inode (inode); 481 ufs_sync_inode (inode);
482 blk_flush_plug(current);
483 yield(); 482 yield();
484 } 483 }
485 484
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 52dbd14260ba..79ce38be15a1 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1295,7 +1295,7 @@ xfs_get_blocks_direct(
1295 * If the private argument is non-NULL __xfs_get_blocks signals us that we 1295 * If the private argument is non-NULL __xfs_get_blocks signals us that we
1296 * need to issue a transaction to convert the range from unwritten to written 1296 * need to issue a transaction to convert the range from unwritten to written
1297 * extents. In case this is regular synchronous I/O we just call xfs_end_io 1297 * extents. In case this is regular synchronous I/O we just call xfs_end_io
1298 * to do this and we are done. But in case this was a successfull AIO 1298 * to do this and we are done. But in case this was a successful AIO
1299 * request this handler is called from interrupt context, from which we 1299 * request this handler is called from interrupt context, from which we
1300 * can't start transactions. In that case offload the I/O completion to 1300 * can't start transactions. In that case offload the I/O completion to
1301 * the workqueues we also use for buffered I/O completion. 1301 * the workqueues we also use for buffered I/O completion.
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 596bb2c9de42..9ef9ed2cfe2e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -120,7 +120,7 @@ xfs_buf_lru_add(
120 * The unlocked check is safe here because it only occurs when there are not 120 * The unlocked check is safe here because it only occurs when there are not
121 * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there 121 * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
122 * to optimise the shrinker removing the buffer from the LRU and calling 122 * to optimise the shrinker removing the buffer from the LRU and calling
123 * xfs_buf_free(). i.e. it removes an unneccessary round trip on the 123 * xfs_buf_free(). i.e. it removes an unnecessary round trip on the
124 * bt_lru_lock. 124 * bt_lru_lock.
125 */ 125 */
126STATIC void 126STATIC void
@@ -293,7 +293,6 @@ xfs_buf_allocate_memory(
293 size_t nbytes, offset; 293 size_t nbytes, offset;
294 gfp_t gfp_mask = xb_to_gfp(flags); 294 gfp_t gfp_mask = xb_to_gfp(flags);
295 unsigned short page_count, i; 295 unsigned short page_count, i;
296 pgoff_t first;
297 xfs_off_t end; 296 xfs_off_t end;
298 int error; 297 int error;
299 298
@@ -333,7 +332,6 @@ use_alloc_page:
333 return error; 332 return error;
334 333
335 offset = bp->b_offset; 334 offset = bp->b_offset;
336 first = bp->b_file_offset >> PAGE_SHIFT;
337 bp->b_flags |= _XBF_PAGES; 335 bp->b_flags |= _XBF_PAGES;
338 336
339 for (i = 0; i < bp->b_page_count; i++) { 337 for (i = 0; i < bp->b_page_count; i++) {
@@ -380,7 +378,7 @@ out_free_pages:
380} 378}
381 379
382/* 380/*
383 * Map buffer into kernel address-space if nessecary. 381 * Map buffer into kernel address-space if necessary.
384 */ 382 */
385STATIC int 383STATIC int
386_xfs_buf_map_pages( 384_xfs_buf_map_pages(
@@ -657,8 +655,6 @@ xfs_buf_readahead(
657 xfs_off_t ioff, 655 xfs_off_t ioff,
658 size_t isize) 656 size_t isize)
659{ 657{
660 struct backing_dev_info *bdi;
661
662 if (bdi_read_congested(target->bt_bdi)) 658 if (bdi_read_congested(target->bt_bdi))
663 return; 659 return;
664 660
@@ -919,8 +915,6 @@ xfs_buf_lock(
919 915
920 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) 916 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
921 xfs_log_force(bp->b_target->bt_mount, 0); 917 xfs_log_force(bp->b_target->bt_mount, 0);
922 if (atomic_read(&bp->b_io_remaining))
923 blk_flush_plug(current);
924 down(&bp->b_sema); 918 down(&bp->b_sema);
925 XB_SET_OWNER(bp); 919 XB_SET_OWNER(bp);
926 920
@@ -1309,8 +1303,6 @@ xfs_buf_iowait(
1309{ 1303{
1310 trace_xfs_buf_iowait(bp, _RET_IP_); 1304 trace_xfs_buf_iowait(bp, _RET_IP_);
1311 1305
1312 if (atomic_read(&bp->b_io_remaining))
1313 blk_flush_plug(current);
1314 wait_for_completion(&bp->b_iowait); 1306 wait_for_completion(&bp->b_iowait);
1315 1307
1316 trace_xfs_buf_iowait_done(bp, _RET_IP_); 1308 trace_xfs_buf_iowait_done(bp, _RET_IP_);
@@ -1747,8 +1739,8 @@ xfsbufd(
1747 do { 1739 do {
1748 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); 1740 long age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
1749 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); 1741 long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
1750 int count = 0;
1751 struct list_head tmp; 1742 struct list_head tmp;
1743 struct blk_plug plug;
1752 1744
1753 if (unlikely(freezing(current))) { 1745 if (unlikely(freezing(current))) {
1754 set_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1746 set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
@@ -1764,16 +1756,15 @@ xfsbufd(
1764 1756
1765 xfs_buf_delwri_split(target, &tmp, age); 1757 xfs_buf_delwri_split(target, &tmp, age);
1766 list_sort(NULL, &tmp, xfs_buf_cmp); 1758 list_sort(NULL, &tmp, xfs_buf_cmp);
1759
1760 blk_start_plug(&plug);
1767 while (!list_empty(&tmp)) { 1761 while (!list_empty(&tmp)) {
1768 struct xfs_buf *bp; 1762 struct xfs_buf *bp;
1769 bp = list_first_entry(&tmp, struct xfs_buf, b_list); 1763 bp = list_first_entry(&tmp, struct xfs_buf, b_list);
1770 list_del_init(&bp->b_list); 1764 list_del_init(&bp->b_list);
1771 xfs_bdstrat_cb(bp); 1765 xfs_bdstrat_cb(bp);
1772 count++;
1773 } 1766 }
1774 if (count) 1767 blk_finish_plug(&plug);
1775 blk_flush_plug(current);
1776
1777 } while (!kthread_should_stop()); 1768 } while (!kthread_should_stop());
1778 1769
1779 return 0; 1770 return 0;
@@ -1793,6 +1784,7 @@ xfs_flush_buftarg(
1793 int pincount = 0; 1784 int pincount = 0;
1794 LIST_HEAD(tmp_list); 1785 LIST_HEAD(tmp_list);
1795 LIST_HEAD(wait_list); 1786 LIST_HEAD(wait_list);
1787 struct blk_plug plug;
1796 1788
1797 xfs_buf_runall_queues(xfsconvertd_workqueue); 1789 xfs_buf_runall_queues(xfsconvertd_workqueue);
1798 xfs_buf_runall_queues(xfsdatad_workqueue); 1790 xfs_buf_runall_queues(xfsdatad_workqueue);
@@ -1807,6 +1799,8 @@ xfs_flush_buftarg(
1807 * we do that after issuing all the IO. 1799 * we do that after issuing all the IO.
1808 */ 1800 */
1809 list_sort(NULL, &tmp_list, xfs_buf_cmp); 1801 list_sort(NULL, &tmp_list, xfs_buf_cmp);
1802
1803 blk_start_plug(&plug);
1810 while (!list_empty(&tmp_list)) { 1804 while (!list_empty(&tmp_list)) {
1811 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); 1805 bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
1812 ASSERT(target == bp->b_target); 1806 ASSERT(target == bp->b_target);
@@ -1817,10 +1811,10 @@ xfs_flush_buftarg(
1817 } 1811 }
1818 xfs_bdstrat_cb(bp); 1812 xfs_bdstrat_cb(bp);
1819 } 1813 }
1814 blk_finish_plug(&plug);
1820 1815
1821 if (wait) { 1816 if (wait) {
1822 /* Expedite and wait for IO to complete. */ 1817 /* Wait for IO to complete. */
1823 blk_flush_plug(current);
1824 while (!list_empty(&wait_list)) { 1818 while (!list_empty(&wait_list)) {
1825 bp = list_first_entry(&wait_list, struct xfs_buf, b_list); 1819 bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
1826 1820
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 52aadfbed132..f4213ba1ff85 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -381,7 +381,7 @@ xfs_aio_write_isize_update(
381 381
382/* 382/*
383 * If this was a direct or synchronous I/O that failed (such as ENOSPC) then 383 * If this was a direct or synchronous I/O that failed (such as ENOSPC) then
384 * part of the I/O may have been written to disk before the error occured. In 384 * part of the I/O may have been written to disk before the error occurred. In
385 * this case the on-disk file size may have been adjusted beyond the in-memory 385 * this case the on-disk file size may have been adjusted beyond the in-memory
386 * file size and now needs to be truncated back. 386 * file size and now needs to be truncated back.
387 */ 387 */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 9ff7fc603d2f..dd21784525a8 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -70,7 +70,7 @@ xfs_synchronize_times(
70 70
71/* 71/*
72 * If the linux inode is valid, mark it dirty. 72 * If the linux inode is valid, mark it dirty.
73 * Used when commiting a dirty inode into a transaction so that 73 * Used when committing a dirty inode into a transaction so that
74 * the inode will get written back by the linux code 74 * the inode will get written back by the linux code
75 */ 75 */
76void 76void
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c
index 508e06fd7d1e..3ca795609113 100644
--- a/fs/xfs/linux-2.6/xfs_message.c
+++ b/fs/xfs/linux-2.6/xfs_message.c
@@ -28,53 +28,47 @@
28/* 28/*
29 * XFS logging functions 29 * XFS logging functions
30 */ 30 */
31static int 31static void
32__xfs_printk( 32__xfs_printk(
33 const char *level, 33 const char *level,
34 const struct xfs_mount *mp, 34 const struct xfs_mount *mp,
35 struct va_format *vaf) 35 struct va_format *vaf)
36{ 36{
37 if (mp && mp->m_fsname) 37 if (mp && mp->m_fsname)
38 return printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); 38 printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
39 return printk("%sXFS: %pV\n", level, vaf); 39 printk("%sXFS: %pV\n", level, vaf);
40} 40}
41 41
42int xfs_printk( 42void xfs_printk(
43 const char *level, 43 const char *level,
44 const struct xfs_mount *mp, 44 const struct xfs_mount *mp,
45 const char *fmt, ...) 45 const char *fmt, ...)
46{ 46{
47 struct va_format vaf; 47 struct va_format vaf;
48 va_list args; 48 va_list args;
49 int r;
50 49
51 va_start(args, fmt); 50 va_start(args, fmt);
52 51
53 vaf.fmt = fmt; 52 vaf.fmt = fmt;
54 vaf.va = &args; 53 vaf.va = &args;
55 54
56 r = __xfs_printk(level, mp, &vaf); 55 __xfs_printk(level, mp, &vaf);
57 va_end(args); 56 va_end(args);
58
59 return r;
60} 57}
61 58
62#define define_xfs_printk_level(func, kern_level) \ 59#define define_xfs_printk_level(func, kern_level) \
63int func(const struct xfs_mount *mp, const char *fmt, ...) \ 60void func(const struct xfs_mount *mp, const char *fmt, ...) \
64{ \ 61{ \
65 struct va_format vaf; \ 62 struct va_format vaf; \
66 va_list args; \ 63 va_list args; \
67 int r; \
68 \ 64 \
69 va_start(args, fmt); \ 65 va_start(args, fmt); \
70 \ 66 \
71 vaf.fmt = fmt; \ 67 vaf.fmt = fmt; \
72 vaf.va = &args; \ 68 vaf.va = &args; \
73 \ 69 \
74 r = __xfs_printk(kern_level, mp, &vaf); \ 70 __xfs_printk(kern_level, mp, &vaf); \
75 va_end(args); \ 71 va_end(args); \
76 \
77 return r; \
78} \ 72} \
79 73
80define_xfs_printk_level(xfs_emerg, KERN_EMERG); 74define_xfs_printk_level(xfs_emerg, KERN_EMERG);
@@ -88,7 +82,7 @@ define_xfs_printk_level(xfs_info, KERN_INFO);
88define_xfs_printk_level(xfs_debug, KERN_DEBUG); 82define_xfs_printk_level(xfs_debug, KERN_DEBUG);
89#endif 83#endif
90 84
91int 85void
92xfs_alert_tag( 86xfs_alert_tag(
93 const struct xfs_mount *mp, 87 const struct xfs_mount *mp,
94 int panic_tag, 88 int panic_tag,
@@ -97,7 +91,6 @@ xfs_alert_tag(
97 struct va_format vaf; 91 struct va_format vaf;
98 va_list args; 92 va_list args;
99 int do_panic = 0; 93 int do_panic = 0;
100 int r;
101 94
102 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { 95 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
103 xfs_printk(KERN_ALERT, mp, 96 xfs_printk(KERN_ALERT, mp,
@@ -110,12 +103,10 @@ xfs_alert_tag(
110 vaf.fmt = fmt; 103 vaf.fmt = fmt;
111 vaf.va = &args; 104 vaf.va = &args;
112 105
113 r = __xfs_printk(KERN_ALERT, mp, &vaf); 106 __xfs_printk(KERN_ALERT, mp, &vaf);
114 va_end(args); 107 va_end(args);
115 108
116 BUG_ON(do_panic); 109 BUG_ON(do_panic);
117
118 return r;
119} 110}
120 111
121void 112void
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h
index e77ffa16745b..f1b3fc1b6c4e 100644
--- a/fs/xfs/linux-2.6/xfs_message.h
+++ b/fs/xfs/linux-2.6/xfs_message.h
@@ -3,32 +3,34 @@
3 3
4struct xfs_mount; 4struct xfs_mount;
5 5
6extern int xfs_printk(const char *level, const struct xfs_mount *mp, 6extern void xfs_printk(const char *level, const struct xfs_mount *mp,
7 const char *fmt, ...) 7 const char *fmt, ...)
8 __attribute__ ((format (printf, 3, 4))); 8 __attribute__ ((format (printf, 3, 4)));
9extern int xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) 9extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
10 __attribute__ ((format (printf, 2, 3))); 10 __attribute__ ((format (printf, 2, 3)));
11extern int xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) 11extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
12 __attribute__ ((format (printf, 2, 3))); 12 __attribute__ ((format (printf, 2, 3)));
13extern int xfs_alert_tag(const struct xfs_mount *mp, int tag, 13extern void xfs_alert_tag(const struct xfs_mount *mp, int tag,
14 const char *fmt, ...) 14 const char *fmt, ...)
15 __attribute__ ((format (printf, 3, 4))); 15 __attribute__ ((format (printf, 3, 4)));
16extern int xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) 16extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...)
17 __attribute__ ((format (printf, 2, 3))); 17 __attribute__ ((format (printf, 2, 3)));
18extern int xfs_err(const struct xfs_mount *mp, const char *fmt, ...) 18extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...)
19 __attribute__ ((format (printf, 2, 3))); 19 __attribute__ ((format (printf, 2, 3)));
20extern int xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) 20extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...)
21 __attribute__ ((format (printf, 2, 3))); 21 __attribute__ ((format (printf, 2, 3)));
22extern int xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) 22extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...)
23 __attribute__ ((format (printf, 2, 3))); 23 __attribute__ ((format (printf, 2, 3)));
24extern int xfs_info(const struct xfs_mount *mp, const char *fmt, ...) 24extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
25 __attribute__ ((format (printf, 2, 3))); 25 __attribute__ ((format (printf, 2, 3)));
26 26
27#ifdef DEBUG 27#ifdef DEBUG
28extern int xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) 28extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
29 __attribute__ ((format (printf, 2, 3))); 29 __attribute__ ((format (printf, 2, 3)));
30#else 30#else
31#define xfs_debug(mp, fmt, ...) (0) 31static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
32{
33}
32#endif 34#endif
33 35
34extern void assfail(char *expr, char *f, int l); 36extern void assfail(char *expr, char *f, int l);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 1ba5c451da36..b38e58d02299 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -816,75 +816,6 @@ xfs_setup_devices(
816 return 0; 816 return 0;
817} 817}
818 818
819/*
820 * XFS AIL push thread support
821 */
822void
823xfsaild_wakeup(
824 struct xfs_ail *ailp,
825 xfs_lsn_t threshold_lsn)
826{
827 /* only ever move the target forwards */
828 if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) {
829 ailp->xa_target = threshold_lsn;
830 wake_up_process(ailp->xa_task);
831 }
832}
833
834STATIC int
835xfsaild(
836 void *data)
837{
838 struct xfs_ail *ailp = data;
839 xfs_lsn_t last_pushed_lsn = 0;
840 long tout = 0; /* milliseconds */
841
842 while (!kthread_should_stop()) {
843 /*
844 * for short sleeps indicating congestion, don't allow us to
845 * get woken early. Otherwise all we do is bang on the AIL lock
846 * without making progress.
847 */
848 if (tout && tout <= 20)
849 __set_current_state(TASK_KILLABLE);
850 else
851 __set_current_state(TASK_INTERRUPTIBLE);
852 schedule_timeout(tout ?
853 msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
854
855 /* swsusp */
856 try_to_freeze();
857
858 ASSERT(ailp->xa_mount->m_log);
859 if (XFS_FORCED_SHUTDOWN(ailp->xa_mount))
860 continue;
861
862 tout = xfsaild_push(ailp, &last_pushed_lsn);
863 }
864
865 return 0;
866} /* xfsaild */
867
868int
869xfsaild_start(
870 struct xfs_ail *ailp)
871{
872 ailp->xa_target = 0;
873 ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
874 ailp->xa_mount->m_fsname);
875 if (IS_ERR(ailp->xa_task))
876 return -PTR_ERR(ailp->xa_task);
877 return 0;
878}
879
880void
881xfsaild_stop(
882 struct xfs_ail *ailp)
883{
884 kthread_stop(ailp->xa_task);
885}
886
887
888/* Catch misguided souls that try to use this interface on XFS */ 819/* Catch misguided souls that try to use this interface on XFS */
889STATIC struct inode * 820STATIC struct inode *
890xfs_fs_alloc_inode( 821xfs_fs_alloc_inode(
@@ -1191,22 +1122,12 @@ xfs_fs_sync_fs(
1191 return -error; 1122 return -error;
1192 1123
1193 if (laptop_mode) { 1124 if (laptop_mode) {
1194 int prev_sync_seq = mp->m_sync_seq;
1195
1196 /* 1125 /*
1197 * The disk must be active because we're syncing. 1126 * The disk must be active because we're syncing.
1198 * We schedule xfssyncd now (now that the disk is 1127 * We schedule xfssyncd now (now that the disk is
1199 * active) instead of later (when it might not be). 1128 * active) instead of later (when it might not be).
1200 */ 1129 */
1201 wake_up_process(mp->m_sync_task); 1130 flush_delayed_work_sync(&mp->m_sync_work);
1202 /*
1203 * We have to wait for the sync iteration to complete.
1204 * If we don't, the disk activity caused by the sync
1205 * will come after the sync is completed, and that
1206 * triggers another sync from laptop mode.
1207 */
1208 wait_event(mp->m_wait_single_sync_task,
1209 mp->m_sync_seq != prev_sync_seq);
1210 } 1131 }
1211 1132
1212 return 0; 1133 return 0;
@@ -1490,9 +1411,6 @@ xfs_fs_fill_super(
1490 spin_lock_init(&mp->m_sb_lock); 1411 spin_lock_init(&mp->m_sb_lock);
1491 mutex_init(&mp->m_growlock); 1412 mutex_init(&mp->m_growlock);
1492 atomic_set(&mp->m_active_trans, 0); 1413 atomic_set(&mp->m_active_trans, 0);
1493 INIT_LIST_HEAD(&mp->m_sync_list);
1494 spin_lock_init(&mp->m_sync_lock);
1495 init_waitqueue_head(&mp->m_wait_single_sync_task);
1496 1414
1497 mp->m_super = sb; 1415 mp->m_super = sb;
1498 sb->s_fs_info = mp; 1416 sb->s_fs_info = mp;
@@ -1799,6 +1717,38 @@ xfs_destroy_zones(void)
1799} 1717}
1800 1718
1801STATIC int __init 1719STATIC int __init
1720xfs_init_workqueues(void)
1721{
1722 /*
1723 * max_active is set to 8 to give enough concurency to allow
1724 * multiple work operations on each CPU to run. This allows multiple
1725 * filesystems to be running sync work concurrently, and scales with
1726 * the number of CPUs in the system.
1727 */
1728 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
1729 if (!xfs_syncd_wq)
1730 goto out;
1731
1732 xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8);
1733 if (!xfs_ail_wq)
1734 goto out_destroy_syncd;
1735
1736 return 0;
1737
1738out_destroy_syncd:
1739 destroy_workqueue(xfs_syncd_wq);
1740out:
1741 return -ENOMEM;
1742}
1743
1744STATIC void
1745xfs_destroy_workqueues(void)
1746{
1747 destroy_workqueue(xfs_ail_wq);
1748 destroy_workqueue(xfs_syncd_wq);
1749}
1750
1751STATIC int __init
1802init_xfs_fs(void) 1752init_xfs_fs(void)
1803{ 1753{
1804 int error; 1754 int error;
@@ -1813,10 +1763,14 @@ init_xfs_fs(void)
1813 if (error) 1763 if (error)
1814 goto out; 1764 goto out;
1815 1765
1816 error = xfs_mru_cache_init(); 1766 error = xfs_init_workqueues();
1817 if (error) 1767 if (error)
1818 goto out_destroy_zones; 1768 goto out_destroy_zones;
1819 1769
1770 error = xfs_mru_cache_init();
1771 if (error)
1772 goto out_destroy_wq;
1773
1820 error = xfs_filestream_init(); 1774 error = xfs_filestream_init();
1821 if (error) 1775 if (error)
1822 goto out_mru_cache_uninit; 1776 goto out_mru_cache_uninit;
@@ -1833,6 +1787,10 @@ init_xfs_fs(void)
1833 if (error) 1787 if (error)
1834 goto out_cleanup_procfs; 1788 goto out_cleanup_procfs;
1835 1789
1790 error = xfs_init_workqueues();
1791 if (error)
1792 goto out_sysctl_unregister;
1793
1836 vfs_initquota(); 1794 vfs_initquota();
1837 1795
1838 error = register_filesystem(&xfs_fs_type); 1796 error = register_filesystem(&xfs_fs_type);
@@ -1850,6 +1808,8 @@ init_xfs_fs(void)
1850 xfs_filestream_uninit(); 1808 xfs_filestream_uninit();
1851 out_mru_cache_uninit: 1809 out_mru_cache_uninit:
1852 xfs_mru_cache_uninit(); 1810 xfs_mru_cache_uninit();
1811 out_destroy_wq:
1812 xfs_destroy_workqueues();
1853 out_destroy_zones: 1813 out_destroy_zones:
1854 xfs_destroy_zones(); 1814 xfs_destroy_zones();
1855 out: 1815 out:
@@ -1866,6 +1826,7 @@ exit_xfs_fs(void)
1866 xfs_buf_terminate(); 1826 xfs_buf_terminate();
1867 xfs_filestream_uninit(); 1827 xfs_filestream_uninit();
1868 xfs_mru_cache_uninit(); 1828 xfs_mru_cache_uninit();
1829 xfs_destroy_workqueues();
1869 xfs_destroy_zones(); 1830 xfs_destroy_zones();
1870} 1831}
1871 1832
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 594cd822d84d..e4f9c1b0836c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -22,6 +22,7 @@
22#include "xfs_log.h" 22#include "xfs_log.h"
23#include "xfs_inum.h" 23#include "xfs_inum.h"
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_trans_priv.h"
25#include "xfs_sb.h" 26#include "xfs_sb.h"
26#include "xfs_ag.h" 27#include "xfs_ag.h"
27#include "xfs_mount.h" 28#include "xfs_mount.h"
@@ -39,6 +40,8 @@
39#include <linux/kthread.h> 40#include <linux/kthread.h>
40#include <linux/freezer.h> 41#include <linux/freezer.h>
41 42
43struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */
44
42/* 45/*
43 * The inode lookup is done in batches to keep the amount of lock traffic and 46 * The inode lookup is done in batches to keep the amount of lock traffic and
44 * radix tree lookups to a minimum. The batch size is a trade off between 47 * radix tree lookups to a minimum. The batch size is a trade off between
@@ -401,7 +404,7 @@ xfs_quiesce_fs(
401/* 404/*
402 * Second stage of a quiesce. The data is already synced, now we have to take 405 * Second stage of a quiesce. The data is already synced, now we have to take
403 * care of the metadata. New transactions are already blocked, so we need to 406 * care of the metadata. New transactions are already blocked, so we need to
404 * wait for any remaining transactions to drain out before proceding. 407 * wait for any remaining transactions to drain out before proceeding.
405 */ 408 */
406void 409void
407xfs_quiesce_attr( 410xfs_quiesce_attr(
@@ -431,62 +434,12 @@ xfs_quiesce_attr(
431 xfs_unmountfs_writesb(mp); 434 xfs_unmountfs_writesb(mp);
432} 435}
433 436
434/* 437static void
435 * Enqueue a work item to be picked up by the vfs xfssyncd thread. 438xfs_syncd_queue_sync(
436 * Doing this has two advantages: 439 struct xfs_mount *mp)
437 * - It saves on stack space, which is tight in certain situations
438 * - It can be used (with care) as a mechanism to avoid deadlocks.
439 * Flushing while allocating in a full filesystem requires both.
440 */
441STATIC void
442xfs_syncd_queue_work(
443 struct xfs_mount *mp,
444 void *data,
445 void (*syncer)(struct xfs_mount *, void *),
446 struct completion *completion)
447{
448 struct xfs_sync_work *work;
449
450 work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
451 INIT_LIST_HEAD(&work->w_list);
452 work->w_syncer = syncer;
453 work->w_data = data;
454 work->w_mount = mp;
455 work->w_completion = completion;
456 spin_lock(&mp->m_sync_lock);
457 list_add_tail(&work->w_list, &mp->m_sync_list);
458 spin_unlock(&mp->m_sync_lock);
459 wake_up_process(mp->m_sync_task);
460}
461
462/*
463 * Flush delayed allocate data, attempting to free up reserved space
464 * from existing allocations. At this point a new allocation attempt
465 * has failed with ENOSPC and we are in the process of scratching our
466 * heads, looking about for more room...
467 */
468STATIC void
469xfs_flush_inodes_work(
470 struct xfs_mount *mp,
471 void *arg)
472{
473 struct inode *inode = arg;
474 xfs_sync_data(mp, SYNC_TRYLOCK);
475 xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
476 iput(inode);
477}
478
479void
480xfs_flush_inodes(
481 xfs_inode_t *ip)
482{ 440{
483 struct inode *inode = VFS_I(ip); 441 queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
484 DECLARE_COMPLETION_ONSTACK(completion); 442 msecs_to_jiffies(xfs_syncd_centisecs * 10));
485
486 igrab(inode);
487 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
488 wait_for_completion(&completion);
489 xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
490} 443}
491 444
492/* 445/*
@@ -496,9 +449,10 @@ xfs_flush_inodes(
496 */ 449 */
497STATIC void 450STATIC void
498xfs_sync_worker( 451xfs_sync_worker(
499 struct xfs_mount *mp, 452 struct work_struct *work)
500 void *unused)
501{ 453{
454 struct xfs_mount *mp = container_of(to_delayed_work(work),
455 struct xfs_mount, m_sync_work);
502 int error; 456 int error;
503 457
504 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { 458 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
@@ -508,73 +462,106 @@ xfs_sync_worker(
508 error = xfs_fs_log_dummy(mp); 462 error = xfs_fs_log_dummy(mp);
509 else 463 else
510 xfs_log_force(mp, 0); 464 xfs_log_force(mp, 0);
511 xfs_reclaim_inodes(mp, 0);
512 error = xfs_qm_sync(mp, SYNC_TRYLOCK); 465 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
466
467 /* start pushing all the metadata that is currently dirty */
468 xfs_ail_push_all(mp->m_ail);
513 } 469 }
514 mp->m_sync_seq++; 470
515 wake_up(&mp->m_wait_single_sync_task); 471 /* queue us up again */
472 xfs_syncd_queue_sync(mp);
516} 473}
517 474
518STATIC int 475/*
519xfssyncd( 476 * Queue a new inode reclaim pass if there are reclaimable inodes and there
520 void *arg) 477 * isn't a reclaim pass already in progress. By default it runs every 5s based
478 * on the xfs syncd work default of 30s. Perhaps this should have it's own
479 * tunable, but that can be done if this method proves to be ineffective or too
480 * aggressive.
481 */
482static void
483xfs_syncd_queue_reclaim(
484 struct xfs_mount *mp)
521{ 485{
522 struct xfs_mount *mp = arg;
523 long timeleft;
524 xfs_sync_work_t *work, *n;
525 LIST_HEAD (tmp);
526
527 set_freezable();
528 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
529 for (;;) {
530 if (list_empty(&mp->m_sync_list))
531 timeleft = schedule_timeout_interruptible(timeleft);
532 /* swsusp */
533 try_to_freeze();
534 if (kthread_should_stop() && list_empty(&mp->m_sync_list))
535 break;
536 486
537 spin_lock(&mp->m_sync_lock); 487 /*
538 /* 488 * We can have inodes enter reclaim after we've shut down the syncd
539 * We can get woken by laptop mode, to do a sync - 489 * workqueue during unmount, so don't allow reclaim work to be queued
540 * that's the (only!) case where the list would be 490 * during unmount.
541 * empty with time remaining. 491 */
542 */ 492 if (!(mp->m_super->s_flags & MS_ACTIVE))
543 if (!timeleft || list_empty(&mp->m_sync_list)) { 493 return;
544 if (!timeleft)
545 timeleft = xfs_syncd_centisecs *
546 msecs_to_jiffies(10);
547 INIT_LIST_HEAD(&mp->m_sync_work.w_list);
548 list_add_tail(&mp->m_sync_work.w_list,
549 &mp->m_sync_list);
550 }
551 list_splice_init(&mp->m_sync_list, &tmp);
552 spin_unlock(&mp->m_sync_lock);
553 494
554 list_for_each_entry_safe(work, n, &tmp, w_list) { 495 rcu_read_lock();
555 (*work->w_syncer)(mp, work->w_data); 496 if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
556 list_del(&work->w_list); 497 queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
557 if (work == &mp->m_sync_work) 498 msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
558 continue;
559 if (work->w_completion)
560 complete(work->w_completion);
561 kmem_free(work);
562 }
563 } 499 }
500 rcu_read_unlock();
501}
564 502
565 return 0; 503/*
504 * This is a fast pass over the inode cache to try to get reclaim moving on as
505 * many inodes as possible in a short period of time. It kicks itself every few
506 * seconds, as well as being kicked by the inode cache shrinker when memory
507 * goes low. It scans as quickly as possible avoiding locked inodes or those
508 * already being flushed, and once done schedules a future pass.
509 */
510STATIC void
511xfs_reclaim_worker(
512 struct work_struct *work)
513{
514 struct xfs_mount *mp = container_of(to_delayed_work(work),
515 struct xfs_mount, m_reclaim_work);
516
517 xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
518 xfs_syncd_queue_reclaim(mp);
519}
520
521/*
522 * Flush delayed allocate data, attempting to free up reserved space
523 * from existing allocations. At this point a new allocation attempt
524 * has failed with ENOSPC and we are in the process of scratching our
525 * heads, looking about for more room.
526 *
527 * Queue a new data flush if there isn't one already in progress and
528 * wait for completion of the flush. This means that we only ever have one
529 * inode flush in progress no matter how many ENOSPC events are occurring and
530 * so will prevent the system from bogging down due to every concurrent
531 * ENOSPC event scanning all the active inodes in the system for writeback.
532 */
533void
534xfs_flush_inodes(
535 struct xfs_inode *ip)
536{
537 struct xfs_mount *mp = ip->i_mount;
538
539 queue_work(xfs_syncd_wq, &mp->m_flush_work);
540 flush_work_sync(&mp->m_flush_work);
541}
542
543STATIC void
544xfs_flush_worker(
545 struct work_struct *work)
546{
547 struct xfs_mount *mp = container_of(work,
548 struct xfs_mount, m_flush_work);
549
550 xfs_sync_data(mp, SYNC_TRYLOCK);
551 xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
566} 552}
567 553
568int 554int
569xfs_syncd_init( 555xfs_syncd_init(
570 struct xfs_mount *mp) 556 struct xfs_mount *mp)
571{ 557{
572 mp->m_sync_work.w_syncer = xfs_sync_worker; 558 INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
573 mp->m_sync_work.w_mount = mp; 559 INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
574 mp->m_sync_work.w_completion = NULL; 560 INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
575 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); 561
576 if (IS_ERR(mp->m_sync_task)) 562 xfs_syncd_queue_sync(mp);
577 return -PTR_ERR(mp->m_sync_task); 563 xfs_syncd_queue_reclaim(mp);
564
578 return 0; 565 return 0;
579} 566}
580 567
@@ -582,7 +569,9 @@ void
582xfs_syncd_stop( 569xfs_syncd_stop(
583 struct xfs_mount *mp) 570 struct xfs_mount *mp)
584{ 571{
585 kthread_stop(mp->m_sync_task); 572 cancel_delayed_work_sync(&mp->m_sync_work);
573 cancel_delayed_work_sync(&mp->m_reclaim_work);
574 cancel_work_sync(&mp->m_flush_work);
586} 575}
587 576
588void 577void
@@ -601,6 +590,10 @@ __xfs_inode_set_reclaim_tag(
601 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), 590 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
602 XFS_ICI_RECLAIM_TAG); 591 XFS_ICI_RECLAIM_TAG);
603 spin_unlock(&ip->i_mount->m_perag_lock); 592 spin_unlock(&ip->i_mount->m_perag_lock);
593
594 /* schedule periodic background inode reclaim */
595 xfs_syncd_queue_reclaim(ip->i_mount);
596
604 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, 597 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
605 -1, _RET_IP_); 598 -1, _RET_IP_);
606 } 599 }
@@ -1017,7 +1010,13 @@ xfs_reclaim_inodes(
1017} 1010}
1018 1011
1019/* 1012/*
1020 * Shrinker infrastructure. 1013 * Inode cache shrinker.
1014 *
1015 * When called we make sure that there is a background (fast) inode reclaim in
1016 * progress, while we will throttle the speed of reclaim via doiing synchronous
1017 * reclaim of inodes. That means if we come across dirty inodes, we wait for
1018 * them to be cleaned, which we hope will not be very long due to the
1019 * background walker having already kicked the IO off on those dirty inodes.
1021 */ 1020 */
1022static int 1021static int
1023xfs_reclaim_inode_shrink( 1022xfs_reclaim_inode_shrink(
@@ -1032,10 +1031,15 @@ xfs_reclaim_inode_shrink(
1032 1031
1033 mp = container_of(shrink, struct xfs_mount, m_inode_shrink); 1032 mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
1034 if (nr_to_scan) { 1033 if (nr_to_scan) {
1034 /* kick background reclaimer and push the AIL */
1035 xfs_syncd_queue_reclaim(mp);
1036 xfs_ail_push_all(mp->m_ail);
1037
1035 if (!(gfp_mask & __GFP_FS)) 1038 if (!(gfp_mask & __GFP_FS))
1036 return -1; 1039 return -1;
1037 1040
1038 xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan); 1041 xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT,
1042 &nr_to_scan);
1039 /* terminate if we don't exhaust the scan */ 1043 /* terminate if we don't exhaust the scan */
1040 if (nr_to_scan > 0) 1044 if (nr_to_scan > 0)
1041 return -1; 1045 return -1;
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 32ba6628290c..e3a6ad27415f 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -32,6 +32,8 @@ typedef struct xfs_sync_work {
32#define SYNC_WAIT 0x0001 /* wait for i/o to complete */ 32#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
33#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ 33#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */
34 34
35extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */
36
35int xfs_syncd_init(struct xfs_mount *mp); 37int xfs_syncd_init(struct xfs_mount *mp);
36void xfs_syncd_stop(struct xfs_mount *mp); 38void xfs_syncd_stop(struct xfs_mount *mp);
37 39
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 7e2416478503..6fa214603819 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -600,7 +600,7 @@ xfs_qm_dqread(
600 600
601 /* 601 /*
602 * Reservation counters are defined as reservation plus current usage 602 * Reservation counters are defined as reservation plus current usage
603 * to avoid having to add everytime. 603 * to avoid having to add every time.
604 */ 604 */
605 dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); 605 dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
606 dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); 606 dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 254ee062bd7d..69228aa8605a 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -461,12 +461,10 @@ xfs_qm_dqflush_all(
461 struct xfs_quotainfo *q = mp->m_quotainfo; 461 struct xfs_quotainfo *q = mp->m_quotainfo;
462 int recl; 462 int recl;
463 struct xfs_dquot *dqp; 463 struct xfs_dquot *dqp;
464 int niters;
465 int error; 464 int error;
466 465
467 if (!q) 466 if (!q)
468 return 0; 467 return 0;
469 niters = 0;
470again: 468again:
471 mutex_lock(&q->qi_dqlist_lock); 469 mutex_lock(&q->qi_dqlist_lock);
472 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { 470 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
@@ -1314,14 +1312,9 @@ xfs_qm_dqiter_bufs(
1314{ 1312{
1315 xfs_buf_t *bp; 1313 xfs_buf_t *bp;
1316 int error; 1314 int error;
1317 int notcommitted;
1318 int incr;
1319 int type; 1315 int type;
1320 1316
1321 ASSERT(blkcnt > 0); 1317 ASSERT(blkcnt > 0);
1322 notcommitted = 0;
1323 incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ?
1324 XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt;
1325 type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : 1318 type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
1326 (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); 1319 (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
1327 error = 0; 1320 error = 0;
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index c9446f1c726d..567b29b9f1b3 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -65,11 +65,6 @@ extern kmem_zone_t *qm_dqtrxzone;
65 * block in the dquot/xqm code. 65 * block in the dquot/xqm code.
66 */ 66 */
67#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 67#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1
68/*
69 * When doing a quotacheck, we log dquot clusters of this many FSBs at most
70 * in a single transaction. We don't want to ask for too huge a log reservation.
71 */
72#define XFS_QM_MAX_DQCLUSTER_LOGSZ 3
73 68
74typedef xfs_dqhash_t xfs_dqlist_t; 69typedef xfs_dqhash_t xfs_dqlist_t;
75 70
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 774d7ec6df8e..a0a829addca9 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -134,7 +134,7 @@ xfs_qm_newmount(
134 */ 134 */
135 if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) { 135 if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) {
136 /* 136 /*
137 * If an error occured, qm_mount_quotas code 137 * If an error occurred, qm_mount_quotas code
138 * has already disabled quotas. So, just finish 138 * has already disabled quotas. So, just finish
139 * mounting, and get on with the boring life 139 * mounting, and get on with the boring life
140 * without disk quotas. 140 * without disk quotas.
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index c82f06778a27..2dadb15d5ca9 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -172,7 +172,7 @@ xfs_qm_scall_quotaoff(
172 /* 172 /*
173 * Next we make the changes in the quota flag in the mount struct. 173 * Next we make the changes in the quota flag in the mount struct.
174 * This isn't protected by a particular lock directly, because we 174 * This isn't protected by a particular lock directly, because we
175 * don't want to take a mrlock everytime we depend on quotas being on. 175 * don't want to take a mrlock every time we depend on quotas being on.
176 */ 176 */
177 mp->m_qflags &= ~(flags); 177 mp->m_qflags &= ~(flags);
178 178
@@ -313,14 +313,12 @@ xfs_qm_scall_quotaon(
313{ 313{
314 int error; 314 int error;
315 uint qf; 315 uint qf;
316 uint accflags;
317 __int64_t sbflags; 316 __int64_t sbflags;
318 317
319 flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); 318 flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
320 /* 319 /*
321 * Switching on quota accounting must be done at mount time. 320 * Switching on quota accounting must be done at mount time.
322 */ 321 */
323 accflags = flags & XFS_ALL_QUOTA_ACCT;
324 flags &= ~(XFS_ALL_QUOTA_ACCT); 322 flags &= ~(XFS_ALL_QUOTA_ACCT);
325 323
326 sbflags = 0; 324 sbflags = 0;
@@ -354,7 +352,7 @@ xfs_qm_scall_quotaon(
354 return XFS_ERROR(EINVAL); 352 return XFS_ERROR(EINVAL);
355 } 353 }
356 /* 354 /*
357 * If everything's upto-date incore, then don't waste time. 355 * If everything's up to-date incore, then don't waste time.
358 */ 356 */
359 if ((mp->m_qflags & flags) == flags) 357 if ((mp->m_qflags & flags) == flags)
360 return XFS_ERROR(EEXIST); 358 return XFS_ERROR(EEXIST);
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 4bc3c649aee4..27d64d752eab 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -2395,17 +2395,33 @@ xfs_free_extent(
2395 memset(&args, 0, sizeof(xfs_alloc_arg_t)); 2395 memset(&args, 0, sizeof(xfs_alloc_arg_t));
2396 args.tp = tp; 2396 args.tp = tp;
2397 args.mp = tp->t_mountp; 2397 args.mp = tp->t_mountp;
2398
2399 /*
2400 * validate that the block number is legal - the enables us to detect
2401 * and handle a silent filesystem corruption rather than crashing.
2402 */
2398 args.agno = XFS_FSB_TO_AGNO(args.mp, bno); 2403 args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2399 ASSERT(args.agno < args.mp->m_sb.sb_agcount); 2404 if (args.agno >= args.mp->m_sb.sb_agcount)
2405 return EFSCORRUPTED;
2406
2400 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); 2407 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
2408 if (args.agbno >= args.mp->m_sb.sb_agblocks)
2409 return EFSCORRUPTED;
2410
2401 args.pag = xfs_perag_get(args.mp, args.agno); 2411 args.pag = xfs_perag_get(args.mp, args.agno);
2402 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) 2412 ASSERT(args.pag);
2413
2414 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
2415 if (error)
2403 goto error0; 2416 goto error0;
2404#ifdef DEBUG 2417
2405 ASSERT(args.agbp != NULL); 2418 /* validate the extent size is legal now we have the agf locked */
2406 ASSERT((args.agbno + len) <= 2419 if (args.agbno + len >
2407 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)); 2420 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) {
2408#endif 2421 error = EFSCORRUPTED;
2422 goto error0;
2423 }
2424
2409 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); 2425 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2410error0: 2426error0:
2411 xfs_perag_put(args.pag); 2427 xfs_perag_put(args.pag);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index e5413d96f1af..7b7e005e3dcc 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -992,7 +992,7 @@ xfs_buf_iodone_callbacks(
992 lasttarg = XFS_BUF_TARGET(bp); 992 lasttarg = XFS_BUF_TARGET(bp);
993 993
994 /* 994 /*
995 * If the write was asynchronous then noone will be looking for the 995 * If the write was asynchronous then no one will be looking for the
996 * error. Clear the error state and write the buffer out again. 996 * error. Clear the error state and write the buffer out again.
997 * 997 *
998 * During sync or umount we'll write all pending buffers again 998 * During sync or umount we'll write all pending buffers again
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 742c8330994a..a37480a6e023 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2789,7 +2789,7 @@ xfs_iflush(
2789 2789
2790 /* 2790 /*
2791 * We can't flush the inode until it is unpinned, so wait for it if we 2791 * We can't flush the inode until it is unpinned, so wait for it if we
2792 * are allowed to block. We know noone new can pin it, because we are 2792 * are allowed to block. We know no one new can pin it, because we are
2793 * holding the inode lock shared and you need to hold it exclusively to 2793 * holding the inode lock shared and you need to hold it exclusively to
2794 * pin the inode. 2794 * pin the inode.
2795 * 2795 *
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f753200cef8d..ff4e2a30227d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -111,7 +111,7 @@ struct xfs_imap {
111 * Generally, we do not want to hold the i_rlock while holding the 111 * Generally, we do not want to hold the i_rlock while holding the
112 * i_ilock. Hierarchy is i_iolock followed by i_rlock. 112 * i_ilock. Hierarchy is i_iolock followed by i_rlock.
113 * 113 *
114 * xfs_iptr_t contains all the inode fields upto and including the 114 * xfs_iptr_t contains all the inode fields up to and including the
115 * i_mnext and i_mprev fields, it is used as a marker in the inode 115 * i_mnext and i_mprev fields, it is used as a marker in the inode
116 * chain off the mount structure by xfs_sync calls. 116 * chain off the mount structure by xfs_sync calls.
117 */ 117 */
@@ -336,7 +336,7 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
336 336
337/* 337/*
338 * Project quota id helpers (previously projid was 16bit only 338 * Project quota id helpers (previously projid was 16bit only
339 * and using two 16bit values to hold new 32bit projid was choosen 339 * and using two 16bit values to hold new 32bit projid was chosen
340 * to retain compatibility with "old" filesystems). 340 * to retain compatibility with "old" filesystems).
341 */ 341 */
342static inline prid_t 342static inline prid_t
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 46cc40131d4a..576fdfe81d60 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -198,6 +198,41 @@ xfs_inode_item_size(
198} 198}
199 199
200/* 200/*
201 * xfs_inode_item_format_extents - convert in-core extents to on-disk form
202 *
203 * For either the data or attr fork in extent format, we need to endian convert
204 * the in-core extent as we place them into the on-disk inode. In this case, we
205 * need to do this conversion before we write the extents into the log. Because
206 * we don't have the disk inode to write into here, we allocate a buffer and
207 * format the extents into it via xfs_iextents_copy(). We free the buffer in
208 * the unlock routine after the copy for the log has been made.
209 *
210 * In the case of the data fork, the in-core and on-disk fork sizes can be
211 * different due to delayed allocation extents. We only log on-disk extents
212 * here, so always use the physical fork size to determine the size of the
213 * buffer we need to allocate.
214 */
215STATIC void
216xfs_inode_item_format_extents(
217 struct xfs_inode *ip,
218 struct xfs_log_iovec *vecp,
219 int whichfork,
220 int type)
221{
222 xfs_bmbt_rec_t *ext_buffer;
223
224 ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP);
225 if (whichfork == XFS_DATA_FORK)
226 ip->i_itemp->ili_extents_buf = ext_buffer;
227 else
228 ip->i_itemp->ili_aextents_buf = ext_buffer;
229
230 vecp->i_addr = ext_buffer;
231 vecp->i_len = xfs_iextents_copy(ip, ext_buffer, whichfork);
232 vecp->i_type = type;
233}
234
235/*
201 * This is called to fill in the vector of log iovecs for the 236 * This is called to fill in the vector of log iovecs for the
202 * given inode log item. It fills the first item with an inode 237 * given inode log item. It fills the first item with an inode
203 * log format structure, the second with the on-disk inode structure, 238 * log format structure, the second with the on-disk inode structure,
@@ -213,7 +248,6 @@ xfs_inode_item_format(
213 struct xfs_inode *ip = iip->ili_inode; 248 struct xfs_inode *ip = iip->ili_inode;
214 uint nvecs; 249 uint nvecs;
215 size_t data_bytes; 250 size_t data_bytes;
216 xfs_bmbt_rec_t *ext_buffer;
217 xfs_mount_t *mp; 251 xfs_mount_t *mp;
218 252
219 vecp->i_addr = &iip->ili_format; 253 vecp->i_addr = &iip->ili_format;
@@ -320,22 +354,8 @@ xfs_inode_item_format(
320 } else 354 } else
321#endif 355#endif
322 { 356 {
323 /* 357 xfs_inode_item_format_extents(ip, vecp,
324 * There are delayed allocation extents 358 XFS_DATA_FORK, XLOG_REG_TYPE_IEXT);
325 * in the inode, or we need to convert
326 * the extents to on disk format.
327 * Use xfs_iextents_copy()
328 * to copy only the real extents into
329 * a separate buffer. We'll free the
330 * buffer in the unlock routine.
331 */
332 ext_buffer = kmem_alloc(ip->i_df.if_bytes,
333 KM_SLEEP);
334 iip->ili_extents_buf = ext_buffer;
335 vecp->i_addr = ext_buffer;
336 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
337 XFS_DATA_FORK);
338 vecp->i_type = XLOG_REG_TYPE_IEXT;
339 } 359 }
340 ASSERT(vecp->i_len <= ip->i_df.if_bytes); 360 ASSERT(vecp->i_len <= ip->i_df.if_bytes);
341 iip->ili_format.ilf_dsize = vecp->i_len; 361 iip->ili_format.ilf_dsize = vecp->i_len;
@@ -445,19 +465,12 @@ xfs_inode_item_format(
445 */ 465 */
446 vecp->i_addr = ip->i_afp->if_u1.if_extents; 466 vecp->i_addr = ip->i_afp->if_u1.if_extents;
447 vecp->i_len = ip->i_afp->if_bytes; 467 vecp->i_len = ip->i_afp->if_bytes;
468 vecp->i_type = XLOG_REG_TYPE_IATTR_EXT;
448#else 469#else
449 ASSERT(iip->ili_aextents_buf == NULL); 470 ASSERT(iip->ili_aextents_buf == NULL);
450 /* 471 xfs_inode_item_format_extents(ip, vecp,
451 * Need to endian flip before logging 472 XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT);
452 */
453 ext_buffer = kmem_alloc(ip->i_afp->if_bytes,
454 KM_SLEEP);
455 iip->ili_aextents_buf = ext_buffer;
456 vecp->i_addr = ext_buffer;
457 vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
458 XFS_ATTR_FORK);
459#endif 473#endif
460 vecp->i_type = XLOG_REG_TYPE_IATTR_EXT;
461 iip->ili_format.ilf_asize = vecp->i_len; 474 iip->ili_format.ilf_asize = vecp->i_len;
462 vecp++; 475 vecp++;
463 nvecs++; 476 nvecs++;
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index dc1882adaf54..751e94fe1f77 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -204,7 +204,6 @@ xfs_bulkstat(
204 xfs_agi_t *agi; /* agi header data */ 204 xfs_agi_t *agi; /* agi header data */
205 xfs_agino_t agino; /* inode # in allocation group */ 205 xfs_agino_t agino; /* inode # in allocation group */
206 xfs_agnumber_t agno; /* allocation group number */ 206 xfs_agnumber_t agno; /* allocation group number */
207 xfs_daddr_t bno; /* inode cluster start daddr */
208 int chunkidx; /* current index into inode chunk */ 207 int chunkidx; /* current index into inode chunk */
209 int clustidx; /* current index into inode cluster */ 208 int clustidx; /* current index into inode cluster */
210 xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ 209 xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
@@ -463,7 +462,6 @@ xfs_bulkstat(
463 mp->m_sb.sb_inopblog); 462 mp->m_sb.sb_inopblog);
464 } 463 }
465 ino = XFS_AGINO_TO_INO(mp, agno, agino); 464 ino = XFS_AGINO_TO_INO(mp, agno, agino);
466 bno = XFS_AGB_TO_DADDR(mp, agno, agbno);
467 /* 465 /*
468 * Skip if this inode is free. 466 * Skip if this inode is free.
469 */ 467 */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 25efa9b8a602..b612ce4520ae 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -761,7 +761,7 @@ xfs_log_need_covered(xfs_mount_t *mp)
761 break; 761 break;
762 case XLOG_STATE_COVER_NEED: 762 case XLOG_STATE_COVER_NEED:
763 case XLOG_STATE_COVER_NEED2: 763 case XLOG_STATE_COVER_NEED2:
764 if (!xfs_trans_ail_tail(log->l_ailp) && 764 if (!xfs_ail_min_lsn(log->l_ailp) &&
765 xlog_iclogs_empty(log)) { 765 xlog_iclogs_empty(log)) {
766 if (log->l_covered_state == XLOG_STATE_COVER_NEED) 766 if (log->l_covered_state == XLOG_STATE_COVER_NEED)
767 log->l_covered_state = XLOG_STATE_COVER_DONE; 767 log->l_covered_state = XLOG_STATE_COVER_DONE;
@@ -801,7 +801,7 @@ xlog_assign_tail_lsn(
801 xfs_lsn_t tail_lsn; 801 xfs_lsn_t tail_lsn;
802 struct log *log = mp->m_log; 802 struct log *log = mp->m_log;
803 803
804 tail_lsn = xfs_trans_ail_tail(mp->m_ail); 804 tail_lsn = xfs_ail_min_lsn(mp->m_ail);
805 if (!tail_lsn) 805 if (!tail_lsn)
806 tail_lsn = atomic64_read(&log->l_last_sync_lsn); 806 tail_lsn = atomic64_read(&log->l_last_sync_lsn);
807 807
@@ -1239,7 +1239,7 @@ xlog_grant_push_ail(
1239 * the filesystem is shutting down. 1239 * the filesystem is shutting down.
1240 */ 1240 */
1241 if (!XLOG_FORCED_SHUTDOWN(log)) 1241 if (!XLOG_FORCED_SHUTDOWN(log))
1242 xfs_trans_ail_push(log->l_ailp, threshold_lsn); 1242 xfs_ail_push(log->l_ailp, threshold_lsn);
1243} 1243}
1244 1244
1245/* 1245/*
@@ -3407,6 +3407,17 @@ xlog_verify_dest_ptr(
3407 xfs_emerg(log->l_mp, "%s: invalid ptr", __func__); 3407 xfs_emerg(log->l_mp, "%s: invalid ptr", __func__);
3408} 3408}
3409 3409
3410/*
3411 * Check to make sure the grant write head didn't just over lap the tail. If
3412 * the cycles are the same, we can't be overlapping. Otherwise, make sure that
3413 * the cycles differ by exactly one and check the byte count.
3414 *
3415 * This check is run unlocked, so can give false positives. Rather than assert
3416 * on failures, use a warn-once flag and a panic tag to allow the admin to
3417 * determine if they want to panic the machine when such an error occurs. For
3418 * debug kernels this will have the same effect as using an assert but, unlinke
3419 * an assert, it can be turned off at runtime.
3420 */
3410STATIC void 3421STATIC void
3411xlog_verify_grant_tail( 3422xlog_verify_grant_tail(
3412 struct log *log) 3423 struct log *log)
@@ -3414,17 +3425,22 @@ xlog_verify_grant_tail(
3414 int tail_cycle, tail_blocks; 3425 int tail_cycle, tail_blocks;
3415 int cycle, space; 3426 int cycle, space;
3416 3427
3417 /*
3418 * Check to make sure the grant write head didn't just over lap the
3419 * tail. If the cycles are the same, we can't be overlapping.
3420 * Otherwise, make sure that the cycles differ by exactly one and
3421 * check the byte count.
3422 */
3423 xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); 3428 xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space);
3424 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); 3429 xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks);
3425 if (tail_cycle != cycle) { 3430 if (tail_cycle != cycle) {
3426 ASSERT(cycle - 1 == tail_cycle); 3431 if (cycle - 1 != tail_cycle &&
3427 ASSERT(space <= BBTOB(tail_blocks)); 3432 !(log->l_flags & XLOG_TAIL_WARN)) {
3433 xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
3434 "%s: cycle - 1 != tail_cycle", __func__);
3435 log->l_flags |= XLOG_TAIL_WARN;
3436 }
3437
3438 if (space > BBTOB(tail_blocks) &&
3439 !(log->l_flags & XLOG_TAIL_WARN)) {
3440 xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
3441 "%s: space > BBTOB(tail_blocks)", __func__);
3442 log->l_flags |= XLOG_TAIL_WARN;
3443 }
3428 } 3444 }
3429} 3445}
3430 3446
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 15dbf1f9c2be..5864850e9e34 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -144,6 +144,7 @@ static inline uint xlog_get_client_id(__be32 i)
144#define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ 144#define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */
145#define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being 145#define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being
146 shutdown */ 146 shutdown */
147#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */
147 148
148#ifdef __KERNEL__ 149#ifdef __KERNEL__
149/* 150/*
@@ -570,7 +571,7 @@ int xlog_write(struct log *log, struct xfs_log_vec *log_vector,
570 * When we crack an atomic LSN, we sample it first so that the value will not 571 * When we crack an atomic LSN, we sample it first so that the value will not
571 * change while we are cracking it into the component values. This means we 572 * change while we are cracking it into the component values. This means we
572 * will always get consistent component values to work from. This should always 573 * will always get consistent component values to work from. This should always
573 * be used to smaple and crack LSNs taht are stored and updated in atomic 574 * be used to sample and crack LSNs that are stored and updated in atomic
574 * variables. 575 * variables.
575 */ 576 */
576static inline void 577static inline void
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 0c4a5618e7af..5cc464a17c93 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -101,7 +101,7 @@ xlog_get_bp(
101 /* 101 /*
102 * We do log I/O in units of log sectors (a power-of-2 102 * We do log I/O in units of log sectors (a power-of-2
103 * multiple of the basic block size), so we round up the 103 * multiple of the basic block size), so we round up the
104 * requested size to acommodate the basic blocks required 104 * requested size to accommodate the basic blocks required
105 * for complete log sectors. 105 * for complete log sectors.
106 * 106 *
107 * In addition, the buffer may be used for a non-sector- 107 * In addition, the buffer may be used for a non-sector-
@@ -112,7 +112,7 @@ xlog_get_bp(
112 * an issue. Nor will this be a problem if the log I/O is 112 * an issue. Nor will this be a problem if the log I/O is
113 * done in basic blocks (sector size 1). But otherwise we 113 * done in basic blocks (sector size 1). But otherwise we
114 * extend the buffer by one extra log sector to ensure 114 * extend the buffer by one extra log sector to ensure
115 * there's space to accomodate this possiblility. 115 * there's space to accommodate this possibility.
116 */ 116 */
117 if (nbblks > 1 && log->l_sectBBsize > 1) 117 if (nbblks > 1 && log->l_sectBBsize > 1)
118 nbblks += log->l_sectBBsize; 118 nbblks += log->l_sectBBsize;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a62e8971539d..19af0ab0d0c6 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -203,12 +203,9 @@ typedef struct xfs_mount {
203 struct mutex m_icsb_mutex; /* balancer sync lock */ 203 struct mutex m_icsb_mutex; /* balancer sync lock */
204#endif 204#endif
205 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ 205 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
206 struct task_struct *m_sync_task; /* generalised sync thread */ 206 struct delayed_work m_sync_work; /* background sync work */
207 xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ 207 struct delayed_work m_reclaim_work; /* background inode reclaim */
208 struct list_head m_sync_list; /* sync thread work item list */ 208 struct work_struct m_flush_work; /* background inode flush */
209 spinlock_t m_sync_lock; /* work item list lock */
210 int m_sync_seq; /* sync thread generation no. */
211 wait_queue_head_t m_wait_single_sync_task;
212 __int64_t m_update_flags; /* sb flags we need to update 209 __int64_t m_update_flags; /* sb flags we need to update
213 on the next remount,rw */ 210 on the next remount,rw */
214 struct shrinker m_inode_shrink; /* inode reclaim shrinker */ 211 struct shrinker m_inode_shrink; /* inode reclaim shrinker */
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 12aff9584e29..acdb92f14d51 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -28,74 +28,138 @@
28#include "xfs_trans_priv.h" 28#include "xfs_trans_priv.h"
29#include "xfs_error.h" 29#include "xfs_error.h"
30 30
31STATIC void xfs_ail_splice(struct xfs_ail *, struct list_head *, xfs_lsn_t); 31struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
32STATIC void xfs_ail_delete(struct xfs_ail *, xfs_log_item_t *);
33STATIC xfs_log_item_t * xfs_ail_min(struct xfs_ail *);
34STATIC xfs_log_item_t * xfs_ail_next(struct xfs_ail *, xfs_log_item_t *);
35 32
36#ifdef DEBUG 33#ifdef DEBUG
37STATIC void xfs_ail_check(struct xfs_ail *, xfs_log_item_t *); 34/*
38#else 35 * Check that the list is sorted as it should be.
36 */
37STATIC void
38xfs_ail_check(
39 struct xfs_ail *ailp,
40 xfs_log_item_t *lip)
41{
42 xfs_log_item_t *prev_lip;
43
44 if (list_empty(&ailp->xa_ail))
45 return;
46
47 /*
48 * Check the next and previous entries are valid.
49 */
50 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
51 prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
52 if (&prev_lip->li_ail != &ailp->xa_ail)
53 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
54
55 prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
56 if (&prev_lip->li_ail != &ailp->xa_ail)
57 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
58
59
60#ifdef XFS_TRANS_DEBUG
61 /*
62 * Walk the list checking lsn ordering, and that every entry has the
63 * XFS_LI_IN_AIL flag set. This is really expensive, so only do it
64 * when specifically debugging the transaction subsystem.
65 */
66 prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
67 list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
68 if (&prev_lip->li_ail != &ailp->xa_ail)
69 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
70 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
71 prev_lip = lip;
72 }
73#endif /* XFS_TRANS_DEBUG */
74}
75#else /* !DEBUG */
39#define xfs_ail_check(a,l) 76#define xfs_ail_check(a,l)
40#endif /* DEBUG */ 77#endif /* DEBUG */
41 78
79/*
80 * Return a pointer to the first item in the AIL. If the AIL is empty, then
81 * return NULL.
82 */
83static xfs_log_item_t *
84xfs_ail_min(
85 struct xfs_ail *ailp)
86{
87 if (list_empty(&ailp->xa_ail))
88 return NULL;
89
90 return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
91}
92
93 /*
94 * Return a pointer to the last item in the AIL. If the AIL is empty, then
95 * return NULL.
96 */
97static xfs_log_item_t *
98xfs_ail_max(
99 struct xfs_ail *ailp)
100{
101 if (list_empty(&ailp->xa_ail))
102 return NULL;
103
104 return list_entry(ailp->xa_ail.prev, xfs_log_item_t, li_ail);
105}
106
107/*
108 * Return a pointer to the item which follows the given item in the AIL. If
109 * the given item is the last item in the list, then return NULL.
110 */
111static xfs_log_item_t *
112xfs_ail_next(
113 struct xfs_ail *ailp,
114 xfs_log_item_t *lip)
115{
116 if (lip->li_ail.next == &ailp->xa_ail)
117 return NULL;
118
119 return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
120}
42 121
43/* 122/*
44 * This is called by the log manager code to determine the LSN 123 * This is called by the log manager code to determine the LSN of the tail of
45 * of the tail of the log. This is exactly the LSN of the first 124 * the log. This is exactly the LSN of the first item in the AIL. If the AIL
46 * item in the AIL. If the AIL is empty, then this function 125 * is empty, then this function returns 0.
47 * returns 0.
48 * 126 *
49 * We need the AIL lock in order to get a coherent read of the 127 * We need the AIL lock in order to get a coherent read of the lsn of the last
50 * lsn of the last item in the AIL. 128 * item in the AIL.
51 */ 129 */
52xfs_lsn_t 130xfs_lsn_t
53xfs_trans_ail_tail( 131xfs_ail_min_lsn(
54 struct xfs_ail *ailp) 132 struct xfs_ail *ailp)
55{ 133{
56 xfs_lsn_t lsn; 134 xfs_lsn_t lsn = 0;
57 xfs_log_item_t *lip; 135 xfs_log_item_t *lip;
58 136
59 spin_lock(&ailp->xa_lock); 137 spin_lock(&ailp->xa_lock);
60 lip = xfs_ail_min(ailp); 138 lip = xfs_ail_min(ailp);
61 if (lip == NULL) { 139 if (lip)
62 lsn = (xfs_lsn_t)0;
63 } else {
64 lsn = lip->li_lsn; 140 lsn = lip->li_lsn;
65 }
66 spin_unlock(&ailp->xa_lock); 141 spin_unlock(&ailp->xa_lock);
67 142
68 return lsn; 143 return lsn;
69} 144}
70 145
71/* 146/*
72 * xfs_trans_push_ail 147 * Return the maximum lsn held in the AIL, or zero if the AIL is empty.
73 *
74 * This routine is called to move the tail of the AIL forward. It does this by
75 * trying to flush items in the AIL whose lsns are below the given
76 * threshold_lsn.
77 *
78 * the push is run asynchronously in a separate thread, so we return the tail
79 * of the log right now instead of the tail after the push. This means we will
80 * either continue right away, or we will sleep waiting on the async thread to
81 * do its work.
82 *
83 * We do this unlocked - we only need to know whether there is anything in the
84 * AIL at the time we are called. We don't need to access the contents of
85 * any of the objects, so the lock is not needed.
86 */ 148 */
87void 149static xfs_lsn_t
88xfs_trans_ail_push( 150xfs_ail_max_lsn(
89 struct xfs_ail *ailp, 151 struct xfs_ail *ailp)
90 xfs_lsn_t threshold_lsn)
91{ 152{
92 xfs_log_item_t *lip; 153 xfs_lsn_t lsn = 0;
154 xfs_log_item_t *lip;
93 155
94 lip = xfs_ail_min(ailp); 156 spin_lock(&ailp->xa_lock);
95 if (lip && !XFS_FORCED_SHUTDOWN(ailp->xa_mount)) { 157 lip = xfs_ail_max(ailp);
96 if (XFS_LSN_CMP(threshold_lsn, ailp->xa_target) > 0) 158 if (lip)
97 xfsaild_wakeup(ailp, threshold_lsn); 159 lsn = lip->li_lsn;
98 } 160 spin_unlock(&ailp->xa_lock);
161
162 return lsn;
99} 163}
100 164
101/* 165/*
@@ -236,16 +300,57 @@ out:
236} 300}
237 301
238/* 302/*
239 * xfsaild_push does the work of pushing on the AIL. Returning a timeout of 303 * splice the log item list into the AIL at the given LSN.
240 * zero indicates that the caller should sleep until woken.
241 */ 304 */
242long 305static void
243xfsaild_push( 306xfs_ail_splice(
244 struct xfs_ail *ailp, 307 struct xfs_ail *ailp,
245 xfs_lsn_t *last_lsn) 308 struct list_head *list,
309 xfs_lsn_t lsn)
246{ 310{
247 long tout = 0; 311 xfs_log_item_t *next_lip;
248 xfs_lsn_t last_pushed_lsn = *last_lsn; 312
313 /* If the list is empty, just insert the item. */
314 if (list_empty(&ailp->xa_ail)) {
315 list_splice(list, &ailp->xa_ail);
316 return;
317 }
318
319 list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
320 if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
321 break;
322 }
323
324 ASSERT(&next_lip->li_ail == &ailp->xa_ail ||
325 XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0);
326
327 list_splice_init(list, &next_lip->li_ail);
328}
329
330/*
331 * Delete the given item from the AIL. Return a pointer to the item.
332 */
333static void
334xfs_ail_delete(
335 struct xfs_ail *ailp,
336 xfs_log_item_t *lip)
337{
338 xfs_ail_check(ailp, lip);
339 list_del(&lip->li_ail);
340 xfs_trans_ail_cursor_clear(ailp, lip);
341}
342
343/*
344 * xfs_ail_worker does the work of pushing on the AIL. It will requeue itself
345 * to run at a later time if there is more work to do to complete the push.
346 */
347STATIC void
348xfs_ail_worker(
349 struct work_struct *work)
350{
351 struct xfs_ail *ailp = container_of(to_delayed_work(work),
352 struct xfs_ail, xa_work);
353 long tout;
249 xfs_lsn_t target = ailp->xa_target; 354 xfs_lsn_t target = ailp->xa_target;
250 xfs_lsn_t lsn; 355 xfs_lsn_t lsn;
251 xfs_log_item_t *lip; 356 xfs_log_item_t *lip;
@@ -256,15 +361,15 @@ xfsaild_push(
256 361
257 spin_lock(&ailp->xa_lock); 362 spin_lock(&ailp->xa_lock);
258 xfs_trans_ail_cursor_init(ailp, cur); 363 xfs_trans_ail_cursor_init(ailp, cur);
259 lip = xfs_trans_ail_cursor_first(ailp, cur, *last_lsn); 364 lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn);
260 if (!lip || XFS_FORCED_SHUTDOWN(mp)) { 365 if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
261 /* 366 /*
262 * AIL is empty or our push has reached the end. 367 * AIL is empty or our push has reached the end.
263 */ 368 */
264 xfs_trans_ail_cursor_done(ailp, cur); 369 xfs_trans_ail_cursor_done(ailp, cur);
265 spin_unlock(&ailp->xa_lock); 370 spin_unlock(&ailp->xa_lock);
266 *last_lsn = 0; 371 ailp->xa_last_pushed_lsn = 0;
267 return tout; 372 return;
268 } 373 }
269 374
270 XFS_STATS_INC(xs_push_ail); 375 XFS_STATS_INC(xs_push_ail);
@@ -301,13 +406,13 @@ xfsaild_push(
301 case XFS_ITEM_SUCCESS: 406 case XFS_ITEM_SUCCESS:
302 XFS_STATS_INC(xs_push_ail_success); 407 XFS_STATS_INC(xs_push_ail_success);
303 IOP_PUSH(lip); 408 IOP_PUSH(lip);
304 last_pushed_lsn = lsn; 409 ailp->xa_last_pushed_lsn = lsn;
305 break; 410 break;
306 411
307 case XFS_ITEM_PUSHBUF: 412 case XFS_ITEM_PUSHBUF:
308 XFS_STATS_INC(xs_push_ail_pushbuf); 413 XFS_STATS_INC(xs_push_ail_pushbuf);
309 IOP_PUSHBUF(lip); 414 IOP_PUSHBUF(lip);
310 last_pushed_lsn = lsn; 415 ailp->xa_last_pushed_lsn = lsn;
311 push_xfsbufd = 1; 416 push_xfsbufd = 1;
312 break; 417 break;
313 418
@@ -319,7 +424,7 @@ xfsaild_push(
319 424
320 case XFS_ITEM_LOCKED: 425 case XFS_ITEM_LOCKED:
321 XFS_STATS_INC(xs_push_ail_locked); 426 XFS_STATS_INC(xs_push_ail_locked);
322 last_pushed_lsn = lsn; 427 ailp->xa_last_pushed_lsn = lsn;
323 stuck++; 428 stuck++;
324 break; 429 break;
325 430
@@ -374,9 +479,23 @@ xfsaild_push(
374 wake_up_process(mp->m_ddev_targp->bt_task); 479 wake_up_process(mp->m_ddev_targp->bt_task);
375 } 480 }
376 481
482 /* assume we have more work to do in a short while */
483 tout = 10;
377 if (!count) { 484 if (!count) {
378 /* We're past our target or empty, so idle */ 485 /* We're past our target or empty, so idle */
379 last_pushed_lsn = 0; 486 ailp->xa_last_pushed_lsn = 0;
487
488 /*
489 * Check for an updated push target before clearing the
490 * XFS_AIL_PUSHING_BIT. If the target changed, we've got more
491 * work to do. Wait a bit longer before starting that work.
492 */
493 smp_rmb();
494 if (ailp->xa_target == target) {
495 clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags);
496 return;
497 }
498 tout = 50;
380 } else if (XFS_LSN_CMP(lsn, target) >= 0) { 499 } else if (XFS_LSN_CMP(lsn, target) >= 0) {
381 /* 500 /*
382 * We reached the target so wait a bit longer for I/O to 501 * We reached the target so wait a bit longer for I/O to
@@ -384,7 +503,7 @@ xfsaild_push(
384 * start the next scan from the start of the AIL. 503 * start the next scan from the start of the AIL.
385 */ 504 */
386 tout = 50; 505 tout = 50;
387 last_pushed_lsn = 0; 506 ailp->xa_last_pushed_lsn = 0;
388 } else if ((stuck * 100) / count > 90) { 507 } else if ((stuck * 100) / count > 90) {
389 /* 508 /*
390 * Either there is a lot of contention on the AIL or we 509 * Either there is a lot of contention on the AIL or we
@@ -396,14 +515,61 @@ xfsaild_push(
396 * continuing from where we were. 515 * continuing from where we were.
397 */ 516 */
398 tout = 20; 517 tout = 20;
399 } else {
400 /* more to do, but wait a short while before continuing */
401 tout = 10;
402 } 518 }
403 *last_lsn = last_pushed_lsn; 519
404 return tout; 520 /* There is more to do, requeue us. */
521 queue_delayed_work(xfs_syncd_wq, &ailp->xa_work,
522 msecs_to_jiffies(tout));
523}
524
525/*
526 * This routine is called to move the tail of the AIL forward. It does this by
527 * trying to flush items in the AIL whose lsns are below the given
528 * threshold_lsn.
529 *
530 * The push is run asynchronously in a workqueue, which means the caller needs
531 * to handle waiting on the async flush for space to become available.
532 * We don't want to interrupt any push that is in progress, hence we only queue
533 * work if we set the pushing bit approriately.
534 *
535 * We do this unlocked - we only need to know whether there is anything in the
536 * AIL at the time we are called. We don't need to access the contents of
537 * any of the objects, so the lock is not needed.
538 */
539void
540xfs_ail_push(
541 struct xfs_ail *ailp,
542 xfs_lsn_t threshold_lsn)
543{
544 xfs_log_item_t *lip;
545
546 lip = xfs_ail_min(ailp);
547 if (!lip || XFS_FORCED_SHUTDOWN(ailp->xa_mount) ||
548 XFS_LSN_CMP(threshold_lsn, ailp->xa_target) <= 0)
549 return;
550
551 /*
552 * Ensure that the new target is noticed in push code before it clears
553 * the XFS_AIL_PUSHING_BIT.
554 */
555 smp_wmb();
556 ailp->xa_target = threshold_lsn;
557 if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
558 queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0);
405} 559}
406 560
561/*
562 * Push out all items in the AIL immediately
563 */
564void
565xfs_ail_push_all(
566 struct xfs_ail *ailp)
567{
568 xfs_lsn_t threshold_lsn = xfs_ail_max_lsn(ailp);
569
570 if (threshold_lsn)
571 xfs_ail_push(ailp, threshold_lsn);
572}
407 573
408/* 574/*
409 * This is to be called when an item is unlocked that may have 575 * This is to be called when an item is unlocked that may have
@@ -615,7 +781,6 @@ xfs_trans_ail_init(
615 xfs_mount_t *mp) 781 xfs_mount_t *mp)
616{ 782{
617 struct xfs_ail *ailp; 783 struct xfs_ail *ailp;
618 int error;
619 784
620 ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL); 785 ailp = kmem_zalloc(sizeof(struct xfs_ail), KM_MAYFAIL);
621 if (!ailp) 786 if (!ailp)
@@ -624,15 +789,9 @@ xfs_trans_ail_init(
624 ailp->xa_mount = mp; 789 ailp->xa_mount = mp;
625 INIT_LIST_HEAD(&ailp->xa_ail); 790 INIT_LIST_HEAD(&ailp->xa_ail);
626 spin_lock_init(&ailp->xa_lock); 791 spin_lock_init(&ailp->xa_lock);
627 error = xfsaild_start(ailp); 792 INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker);
628 if (error)
629 goto out_free_ailp;
630 mp->m_ail = ailp; 793 mp->m_ail = ailp;
631 return 0; 794 return 0;
632
633out_free_ailp:
634 kmem_free(ailp);
635 return error;
636} 795}
637 796
638void 797void
@@ -641,124 +800,6 @@ xfs_trans_ail_destroy(
641{ 800{
642 struct xfs_ail *ailp = mp->m_ail; 801 struct xfs_ail *ailp = mp->m_ail;
643 802
644 xfsaild_stop(ailp); 803 cancel_delayed_work_sync(&ailp->xa_work);
645 kmem_free(ailp); 804 kmem_free(ailp);
646} 805}
647
648/*
649 * splice the log item list into the AIL at the given LSN.
650 */
651STATIC void
652xfs_ail_splice(
653 struct xfs_ail *ailp,
654 struct list_head *list,
655 xfs_lsn_t lsn)
656{
657 xfs_log_item_t *next_lip;
658
659 /*
660 * If the list is empty, just insert the item.
661 */
662 if (list_empty(&ailp->xa_ail)) {
663 list_splice(list, &ailp->xa_ail);
664 return;
665 }
666
667 list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
668 if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0)
669 break;
670 }
671
672 ASSERT((&next_lip->li_ail == &ailp->xa_ail) ||
673 (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0));
674
675 list_splice_init(list, &next_lip->li_ail);
676 return;
677}
678
679/*
680 * Delete the given item from the AIL. Return a pointer to the item.
681 */
682STATIC void
683xfs_ail_delete(
684 struct xfs_ail *ailp,
685 xfs_log_item_t *lip)
686{
687 xfs_ail_check(ailp, lip);
688 list_del(&lip->li_ail);
689 xfs_trans_ail_cursor_clear(ailp, lip);
690}
691
692/*
693 * Return a pointer to the first item in the AIL.
694 * If the AIL is empty, then return NULL.
695 */
696STATIC xfs_log_item_t *
697xfs_ail_min(
698 struct xfs_ail *ailp)
699{
700 if (list_empty(&ailp->xa_ail))
701 return NULL;
702
703 return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
704}
705
706/*
707 * Return a pointer to the item which follows
708 * the given item in the AIL. If the given item
709 * is the last item in the list, then return NULL.
710 */
711STATIC xfs_log_item_t *
712xfs_ail_next(
713 struct xfs_ail *ailp,
714 xfs_log_item_t *lip)
715{
716 if (lip->li_ail.next == &ailp->xa_ail)
717 return NULL;
718
719 return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
720}
721
722#ifdef DEBUG
723/*
724 * Check that the list is sorted as it should be.
725 */
726STATIC void
727xfs_ail_check(
728 struct xfs_ail *ailp,
729 xfs_log_item_t *lip)
730{
731 xfs_log_item_t *prev_lip;
732
733 if (list_empty(&ailp->xa_ail))
734 return;
735
736 /*
737 * Check the next and previous entries are valid.
738 */
739 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
740 prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
741 if (&prev_lip->li_ail != &ailp->xa_ail)
742 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
743
744 prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
745 if (&prev_lip->li_ail != &ailp->xa_ail)
746 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
747
748
749#ifdef XFS_TRANS_DEBUG
750 /*
751 * Walk the list checking lsn ordering, and that every entry has the
752 * XFS_LI_IN_AIL flag set. This is really expensive, so only do it
753 * when specifically debugging the transaction subsystem.
754 */
755 prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
756 list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
757 if (&prev_lip->li_ail != &ailp->xa_ail)
758 ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
759 ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
760 prev_lip = lip;
761 }
762#endif /* XFS_TRANS_DEBUG */
763}
764#endif /* DEBUG */
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 16084d8ea231..048b0c689d3e 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -81,7 +81,7 @@ xfs_trans_ijoin(
81 * 81 *
82 * 82 *
83 * Grabs a reference to the inode which will be dropped when the transaction 83 * Grabs a reference to the inode which will be dropped when the transaction
84 * is commited. The inode will also be unlocked at that point. The inode 84 * is committed. The inode will also be unlocked at that point. The inode
85 * must be locked, and it cannot be associated with any transaction. 85 * must be locked, and it cannot be associated with any transaction.
86 */ 86 */
87void 87void
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 35162c238fa3..6b164e9e9a1f 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -65,16 +65,22 @@ struct xfs_ail_cursor {
65struct xfs_ail { 65struct xfs_ail {
66 struct xfs_mount *xa_mount; 66 struct xfs_mount *xa_mount;
67 struct list_head xa_ail; 67 struct list_head xa_ail;
68 uint xa_gen;
69 struct task_struct *xa_task;
70 xfs_lsn_t xa_target; 68 xfs_lsn_t xa_target;
71 struct xfs_ail_cursor xa_cursors; 69 struct xfs_ail_cursor xa_cursors;
72 spinlock_t xa_lock; 70 spinlock_t xa_lock;
71 struct delayed_work xa_work;
72 xfs_lsn_t xa_last_pushed_lsn;
73 unsigned long xa_flags;
73}; 74};
74 75
76#define XFS_AIL_PUSHING_BIT 0
77
75/* 78/*
76 * From xfs_trans_ail.c 79 * From xfs_trans_ail.c
77 */ 80 */
81
82extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
83
78void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, 84void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
79 struct xfs_log_item **log_items, int nr_items, 85 struct xfs_log_item **log_items, int nr_items,
80 xfs_lsn_t lsn) __releases(ailp->xa_lock); 86 xfs_lsn_t lsn) __releases(ailp->xa_lock);
@@ -98,12 +104,13 @@ xfs_trans_ail_delete(
98 xfs_trans_ail_delete_bulk(ailp, &lip, 1); 104 xfs_trans_ail_delete_bulk(ailp, &lip, 1);
99} 105}
100 106
101void xfs_trans_ail_push(struct xfs_ail *, xfs_lsn_t); 107void xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
108void xfs_ail_push_all(struct xfs_ail *);
109xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp);
110
102void xfs_trans_unlocked_item(struct xfs_ail *, 111void xfs_trans_unlocked_item(struct xfs_ail *,
103 xfs_log_item_t *); 112 xfs_log_item_t *);
104 113
105xfs_lsn_t xfs_trans_ail_tail(struct xfs_ail *ailp);
106
107struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, 114struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
108 struct xfs_ail_cursor *cur, 115 struct xfs_ail_cursor *cur,
109 xfs_lsn_t lsn); 116 xfs_lsn_t lsn);
@@ -112,11 +119,6 @@ struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
112void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, 119void xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
113 struct xfs_ail_cursor *cur); 120 struct xfs_ail_cursor *cur);
114 121
115long xfsaild_push(struct xfs_ail *, xfs_lsn_t *);
116void xfsaild_wakeup(struct xfs_ail *, xfs_lsn_t);
117int xfsaild_start(struct xfs_ail *);
118void xfsaild_stop(struct xfs_ail *);
119
120#if BITS_PER_LONG != 64 122#if BITS_PER_LONG != 64
121static inline void 123static inline void
122xfs_trans_ail_copy_lsn( 124xfs_trans_ail_copy_lsn(
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index c48b4217ec47..b7a5fe7c52c8 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -953,7 +953,7 @@ xfs_release(
953 * If we previously truncated this file and removed old data 953 * If we previously truncated this file and removed old data
954 * in the process, we want to initiate "early" writeout on 954 * in the process, we want to initiate "early" writeout on
955 * the last close. This is an attempt to combat the notorious 955 * the last close. This is an attempt to combat the notorious
956 * NULL files problem which is particularly noticable from a 956 * NULL files problem which is particularly noticeable from a
957 * truncate down, buffered (re-)write (delalloc), followed by 957 * truncate down, buffered (re-)write (delalloc), followed by
958 * a crash. What we are effectively doing here is 958 * a crash. What we are effectively doing here is
959 * significantly reducing the time window where we'd otherwise 959 * significantly reducing the time window where we'd otherwise
@@ -982,7 +982,7 @@ xfs_release(
982 * 982 *
983 * Further, check if the inode is being opened, written and 983 * Further, check if the inode is being opened, written and
984 * closed frequently and we have delayed allocation blocks 984 * closed frequently and we have delayed allocation blocks
985 * oustanding (e.g. streaming writes from the NFS server), 985 * outstanding (e.g. streaming writes from the NFS server),
986 * truncating the blocks past EOF will cause fragmentation to 986 * truncating the blocks past EOF will cause fragmentation to
987 * occur. 987 * occur.
988 * 988 *