aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJ. Bruce Fields <bfields@citi.umich.edu>2009-08-21 11:27:29 -0400
committerJ. Bruce Fields <bfields@citi.umich.edu>2009-08-21 11:27:29 -0400
commite9dc122166b8d863d3057a66ada04838e5548e52 (patch)
tree749e15bf719b64bf9113db7acd8e043d9742cb26 /fs
parent560ab42ef923aaf2e4347315bdfcc74b2708972c (diff)
parent405d8f8b1d936414da2093d4149ff790ff3f84a5 (diff)
Merge branch 'nfs-for-2.6.32' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6 into for-2.6.32-incoming
Conflicts: net/sunrpc/cache.c
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_addr.c2
-rw-r--r--fs/Kconfig27
-rw-r--r--fs/adfs/super.c1
-rw-r--r--fs/afs/dir.c2
-rw-r--r--fs/afs/flock.c1
-rw-r--r--fs/afs/mntpt.c1
-rw-r--r--fs/afs/super.c1
-rw-r--r--fs/aio.c24
-rw-r--r--fs/autofs4/dev-ioctl.c1
-rw-r--r--fs/bfs/dir.c1
-rw-r--r--fs/bfs/file.c1
-rw-r--r--fs/binfmt_elf.c9
-rw-r--r--fs/binfmt_flat.c17
-rw-r--r--fs/bio-integrity.c170
-rw-r--r--fs/bio.c33
-rw-r--r--fs/block_dev.c10
-rw-r--r--fs/btrfs/async-thread.c6
-rw-r--r--fs/btrfs/compression.c1
-rw-r--r--fs/btrfs/ctree.c121
-rw-r--r--fs/btrfs/ctree.h30
-rw-r--r--fs/btrfs/disk-io.c15
-rw-r--r--fs/btrfs/extent-tree.c1096
-rw-r--r--fs/btrfs/file.c6
-rw-r--r--fs/btrfs/free-space-cache.c1058
-rw-r--r--fs/btrfs/free-space-cache.h8
-rw-r--r--fs/btrfs/inode.c31
-rw-r--r--fs/btrfs/ioctl.c7
-rw-r--r--fs/btrfs/print-tree.c6
-rw-r--r--fs/btrfs/relocation.c17
-rw-r--r--fs/btrfs/super.c1
-rw-r--r--fs/btrfs/transaction.c60
-rw-r--r--fs/btrfs/transaction.h1
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c46
-rw-r--r--fs/btrfs/zlib.c6
-rw-r--r--fs/char_dev.c1
-rw-r--r--fs/cifs/CHANGES13
-rw-r--r--fs/cifs/README25
-rw-r--r--fs/cifs/asn1.c55
-rw-r--r--fs/cifs/cifs_debug.c8
-rw-r--r--fs/cifs/cifs_dfs_ref.c12
-rw-r--r--fs/cifs/cifs_spnego.c9
-rw-r--r--fs/cifs/cifs_unicode.c2
-rw-r--r--fs/cifs/cifsacl.c26
-rw-r--r--fs/cifs/cifsfs.c162
-rw-r--r--fs/cifs/cifsfs.h15
-rw-r--r--fs/cifs/cifsglob.h32
-rw-r--r--fs/cifs/cifspdu.h14
-rw-r--r--fs/cifs/cifsproto.h23
-rw-r--r--fs/cifs/cifssmb.c152
-rw-r--r--fs/cifs/connect.c103
-rw-r--r--fs/cifs/dir.c52
-rw-r--r--fs/cifs/dns_resolve.c25
-rw-r--r--fs/cifs/file.c40
-rw-r--r--fs/cifs/inode.c780
-rw-r--r--fs/cifs/link.c3
-rw-r--r--fs/cifs/netmisc.c56
-rw-r--r--fs/cifs/readdir.c505
-rw-r--r--fs/cifs/sess.c2
-rw-r--r--fs/cifs/xattr.c12
-rw-r--r--fs/compat.c5
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/dlm/lock.c2
-rw-r--r--fs/dlm/lowcomms.c4
-rw-r--r--fs/dlm/plock.c17
-rw-r--r--fs/ecryptfs/keystore.c13
-rw-r--r--fs/eventfd.c122
-rw-r--r--fs/exec.c4
-rw-r--r--fs/exofs/common.h4
-rw-r--r--fs/exofs/dir.c4
-rw-r--r--fs/exofs/exofs.h7
-rw-r--r--fs/exofs/file.c21
-rw-r--r--fs/exofs/inode.c7
-rw-r--r--fs/exofs/namei.c4
-rw-r--r--fs/exofs/osd.c4
-rw-r--r--fs/exofs/super.c7
-rw-r--r--fs/exofs/symlink.c4
-rw-r--r--fs/ext2/ioctl.c1
-rw-r--r--fs/ext2/namei.c12
-rw-r--r--fs/ext3/dir.c3
-rw-r--r--fs/ext3/inode.c32
-rw-r--r--fs/ext4/ext4.h14
-rw-r--r--fs/ext4/ext4_jbd2.c4
-rw-r--r--fs/ext4/ext4_jbd2.h6
-rw-r--r--fs/ext4/extents.c1
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c384
-rw-r--r--fs/ext4/ioctl.c21
-rw-r--r--fs/ext4/mballoc.c50
-rw-r--r--fs/fat/dir.c1
-rw-r--r--fs/fat/file.c2
-rw-r--r--fs/fat/namei_msdos.c1
-rw-r--r--fs/fat/namei_vfat.c1
-rw-r--r--fs/fcntl.c1
-rw-r--r--fs/freevxfs/vxfs_super.c1
-rw-r--r--fs/fuse/dev.c91
-rw-r--r--fs/fuse/dir.c57
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/fuse/fuse_i.h27
-rw-r--r--fs/fuse/inode.c68
-rw-r--r--fs/gfs2/aops.c39
-rw-r--r--fs/gfs2/glock.c138
-rw-r--r--fs/gfs2/glock.h3
-rw-r--r--fs/gfs2/glops.c21
-rw-r--r--fs/gfs2/incore.h2
-rw-r--r--fs/gfs2/rgrp.c23
-rw-r--r--fs/gfs2/super.c40
-rw-r--r--fs/gfs2/super.h4
-rw-r--r--fs/gfs2/trace_gfs2.h8
-rw-r--r--fs/hfs/super.c1
-rw-r--r--fs/hfsplus/super.c1
-rw-r--r--fs/hostfs/hostfs_kern.c1
-rw-r--r--fs/hpfs/dir.c1
-rw-r--r--fs/hpfs/file.c1
-rw-r--r--fs/hpfs/hpfs_fn.h1
-rw-r--r--fs/hpfs/inode.c1
-rw-r--r--fs/hpfs/namei.c1
-rw-r--r--fs/inode.c40
-rw-r--r--fs/isofs/inode.c4
-rw-r--r--fs/jbd/journal.c26
-rw-r--r--fs/jbd/transaction.c68
-rw-r--r--fs/jbd2/journal.c31
-rw-r--r--fs/jbd2/transaction.c68
-rw-r--r--fs/jffs2/erase.c10
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/jffs2/scan.c4
-rw-r--r--fs/jffs2/super.c1
-rw-r--r--fs/jfs/acl.c4
-rw-r--r--fs/lockd/clntproc.c1
-rw-r--r--fs/lockd/host.c14
-rw-r--r--fs/lockd/mon.c44
-rw-r--r--fs/lockd/svc4proc.c1
-rw-r--r--fs/lockd/svcproc.c1
-rw-r--r--fs/namei.c7
-rw-r--r--fs/namespace.c4
-rw-r--r--fs/nfs/Makefile3
-rw-r--r--fs/nfs/cache_lib.c140
-rw-r--r--fs/nfs/cache_lib.h27
-rw-r--r--fs/nfs/callback.c26
-rw-r--r--fs/nfs/client.c33
-rw-r--r--fs/nfs/delegation.c1
-rw-r--r--fs/nfs/dir.c3
-rw-r--r--fs/nfs/direct.c23
-rw-r--r--fs/nfs/dns_resolve.c335
-rw-r--r--fs/nfs/dns_resolve.h14
-rw-r--r--fs/nfs/file.c50
-rw-r--r--fs/nfs/getroot.c1
-rw-r--r--fs/nfs/idmap.c6
-rw-r--r--fs/nfs/inode.c101
-rw-r--r--fs/nfs/internal.h29
-rw-r--r--fs/nfs/mount_clnt.c83
-rw-r--r--fs/nfs/nfs4_fs.h6
-rw-r--r--fs/nfs/nfs4namespace.c24
-rw-r--r--fs/nfs/nfs4proc.c81
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfs/nfs4xdr.c1460
-rw-r--r--fs/nfs/read.c7
-rw-r--r--fs/nfs/super.c247
-rw-r--r--fs/nfs/write.c105
-rw-r--r--fs/nfsd/export.c14
-rw-r--r--fs/nfsd/nfs4idmap.c20
-rw-r--r--fs/nfsd/nfsctl.c22
-rw-r--r--fs/nfsd/nfssvc.c1
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/nilfs2/Kconfig25
-rw-r--r--fs/nilfs2/bmap.c5
-rw-r--r--fs/nilfs2/cpfile.c5
-rw-r--r--fs/nilfs2/dat.c9
-rw-r--r--fs/nilfs2/dir.c1
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/segment.c44
-rw-r--r--fs/notify/Kconfig12
-rw-r--r--fs/notify/dnotify/Kconfig2
-rw-r--r--fs/notify/fsnotify.c4
-rw-r--r--fs/notify/inotify/Kconfig2
-rw-r--r--fs/notify/inotify/inotify_user.c112
-rw-r--r--fs/notify/notification.c19
-rw-r--r--fs/ocfs2/alloc.c47
-rw-r--r--fs/ocfs2/aops.c69
-rw-r--r--fs/ocfs2/dcache.c35
-rw-r--r--fs/ocfs2/dcache.h3
-rw-r--r--fs/ocfs2/dlm/dlmast.c1
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/file.c5
-rw-r--r--fs/ocfs2/ioctl.c1
-rw-r--r--fs/ocfs2/journal.c8
-rw-r--r--fs/ocfs2/journal.h19
-rw-r--r--fs/ocfs2/ocfs2.h22
-rw-r--r--fs/ocfs2/quota.h1
-rw-r--r--fs/ocfs2/quota_global.c134
-rw-r--r--fs/ocfs2/quota_local.c110
-rw-r--r--fs/ocfs2/stack_o2cb.c3
-rw-r--r--fs/ocfs2/super.c30
-rw-r--r--fs/ocfs2/xattr.c3
-rw-r--r--fs/partitions/check.c2
-rw-r--r--fs/pipe.c4
-rw-r--r--fs/proc/base.c27
-rw-r--r--fs/proc/task_mmu.c1
-rw-r--r--fs/proc/task_nommu.c1
-rw-r--r--fs/quota/dquot.c9
-rw-r--r--fs/ramfs/file-nommu.c1
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/reiserfs/super.c1
-rw-r--r--fs/reiserfs/xattr.c1
-rw-r--r--fs/squashfs/super.c1
-rw-r--r--fs/sync.c5
-rw-r--r--fs/sysfs/bin.c1
-rw-r--r--fs/sysfs/dir.c2
-rw-r--r--fs/ubifs/io.c57
-rw-r--r--fs/ubifs/ioctl.c1
-rw-r--r--fs/ubifs/recovery.c57
-rw-r--r--fs/ubifs/replay.c9
-rw-r--r--fs/ubifs/scan.c20
-rw-r--r--fs/ubifs/super.c14
-rw-r--r--fs/ubifs/ubifs.h11
-rw-r--r--fs/udf/super.c12
-rw-r--r--fs/xfs/linux-2.6/kmem.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c4
-rw-r--r--fs/xfs/xfs_attr.c8
-rw-r--r--fs/xfs/xfs_bmap.c2
-rw-r--r--fs/xfs/xfs_btree.c4
-rw-r--r--fs/xfs/xfs_da_btree.c6
-rw-r--r--fs/xfs/xfs_dir2.c2
-rw-r--r--fs/xfs/xfs_fsops.c20
-rw-r--r--fs/xfs/xfs_iget.c142
-rw-r--r--fs/xfs/xfs_inode.c10
-rw-r--r--fs/xfs/xfs_inode.h17
-rw-r--r--fs/xfs/xfs_log.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c4
232 files changed, 6761 insertions, 3895 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 6fcb1e7095cf..92828281a30b 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -57,7 +57,7 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page)
57 buffer = kmap(page); 57 buffer = kmap(page);
58 offset = page_offset(page); 58 offset = page_offset(page);
59 59
60 retval = v9fs_file_readn(filp, buffer, NULL, offset, PAGE_CACHE_SIZE); 60 retval = v9fs_file_readn(filp, buffer, NULL, PAGE_CACHE_SIZE, offset);
61 if (retval < 0) 61 if (retval < 0)
62 goto done; 62 goto done;
63 63
diff --git a/fs/Kconfig b/fs/Kconfig
index a97263be6a91..0e7da7bb5d93 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -186,32 +186,7 @@ source "fs/romfs/Kconfig"
186source "fs/sysv/Kconfig" 186source "fs/sysv/Kconfig"
187source "fs/ufs/Kconfig" 187source "fs/ufs/Kconfig"
188source "fs/exofs/Kconfig" 188source "fs/exofs/Kconfig"
189 189source "fs/nilfs2/Kconfig"
190config NILFS2_FS
191 tristate "NILFS2 file system support (EXPERIMENTAL)"
192 depends on BLOCK && EXPERIMENTAL
193 select CRC32
194 help
195 NILFS2 is a log-structured file system (LFS) supporting continuous
196 snapshotting. In addition to versioning capability of the entire
197 file system, users can even restore files mistakenly overwritten or
198 destroyed just a few seconds ago. Since this file system can keep
199 consistency like conventional LFS, it achieves quick recovery after
200 system crashes.
201
202 NILFS2 creates a number of checkpoints every few seconds or per
203 synchronous write basis (unless there is no change). Users can
204 select significant versions among continuously created checkpoints,
205 and can change them into snapshots which will be preserved for long
206 periods until they are changed back to checkpoints. Each
207 snapshot is mountable as a read-only file system concurrently with
208 its writable mount, and this feature is convenient for online backup.
209
210 Some features including atime, extended attributes, and POSIX ACLs,
211 are not supported yet.
212
213 To compile this file system support as a module, choose M here: the
214 module will be called nilfs2. If unsure, say N.
215 190
216endif # MISC_FILESYSTEMS 191endif # MISC_FILESYSTEMS
217 192
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index aad92f0a1048..6910a98bd73c 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -13,6 +13,7 @@
13#include <linux/parser.h> 13#include <linux/parser.h>
14#include <linux/mount.h> 14#include <linux/mount.h>
15#include <linux/seq_file.h> 15#include <linux/seq_file.h>
16#include <linux/smp_lock.h>
16#include <linux/statfs.h> 17#include <linux/statfs.h>
17#include "adfs.h" 18#include "adfs.h"
18#include "dir_f.h" 19#include "dir_f.h"
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 9bd757774c9e..88067f36e5e7 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -564,7 +564,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
564static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd) 564static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
565{ 565{
566 struct afs_vnode *vnode, *dir; 566 struct afs_vnode *vnode, *dir;
567 struct afs_fid fid; 567 struct afs_fid uninitialized_var(fid);
568 struct dentry *parent; 568 struct dentry *parent;
569 struct key *key; 569 struct key *key;
570 void *dir_version; 570 void *dir_version;
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 210acafe4a9b..3ff8bdd18fb3 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -432,7 +432,6 @@ vfs_rejected_lock:
432 list_del_init(&fl->fl_u.afs.link); 432 list_del_init(&fl->fl_u.afs.link);
433 if (list_empty(&vnode->granted_locks)) 433 if (list_empty(&vnode->granted_locks))
434 afs_defer_unlock(vnode, key); 434 afs_defer_unlock(vnode, key);
435 spin_unlock(&vnode->lock);
436 goto abort_attempt; 435 goto abort_attempt;
437} 436}
438 437
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index c52be53f6946..5ffb570cd3a8 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -17,7 +17,6 @@
17#include <linux/pagemap.h> 17#include <linux/pagemap.h>
18#include <linux/mount.h> 18#include <linux/mount.h>
19#include <linux/namei.h> 19#include <linux/namei.h>
20#include <linux/mnt_namespace.h>
21#include "internal.h" 20#include "internal.h"
22 21
23 22
diff --git a/fs/afs/super.c b/fs/afs/super.c
index ad0514d0115f..e1ea1c240b6a 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -18,6 +18,7 @@
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/init.h> 19#include <linux/init.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/smp_lock.h>
21#include <linux/fs.h> 22#include <linux/fs.h>
22#include <linux/pagemap.h> 23#include <linux/pagemap.h>
23#include <linux/parser.h> 24#include <linux/parser.h>
diff --git a/fs/aio.c b/fs/aio.c
index 76da12537956..d065b2c3273e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -485,6 +485,8 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
485{ 485{
486 assert_spin_locked(&ctx->ctx_lock); 486 assert_spin_locked(&ctx->ctx_lock);
487 487
488 if (req->ki_eventfd != NULL)
489 eventfd_ctx_put(req->ki_eventfd);
488 if (req->ki_dtor) 490 if (req->ki_dtor)
489 req->ki_dtor(req); 491 req->ki_dtor(req);
490 if (req->ki_iovec != &req->ki_inline_vec) 492 if (req->ki_iovec != &req->ki_inline_vec)
@@ -509,8 +511,6 @@ static void aio_fput_routine(struct work_struct *data)
509 /* Complete the fput(s) */ 511 /* Complete the fput(s) */
510 if (req->ki_filp != NULL) 512 if (req->ki_filp != NULL)
511 __fput(req->ki_filp); 513 __fput(req->ki_filp);
512 if (req->ki_eventfd != NULL)
513 __fput(req->ki_eventfd);
514 514
515 /* Link the iocb into the context's free list */ 515 /* Link the iocb into the context's free list */
516 spin_lock_irq(&ctx->ctx_lock); 516 spin_lock_irq(&ctx->ctx_lock);
@@ -528,8 +528,6 @@ static void aio_fput_routine(struct work_struct *data)
528 */ 528 */
529static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) 529static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
530{ 530{
531 int schedule_putreq = 0;
532
533 dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n", 531 dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
534 req, atomic_long_read(&req->ki_filp->f_count)); 532 req, atomic_long_read(&req->ki_filp->f_count));
535 533
@@ -549,24 +547,16 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
549 * we would not be holding the last reference to the file*, so 547 * we would not be holding the last reference to the file*, so
550 * this function will be executed w/out any aio kthread wakeup. 548 * this function will be executed w/out any aio kthread wakeup.
551 */ 549 */
552 if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) 550 if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
553 schedule_putreq++;
554 else
555 req->ki_filp = NULL;
556 if (req->ki_eventfd != NULL) {
557 if (unlikely(atomic_long_dec_and_test(&req->ki_eventfd->f_count)))
558 schedule_putreq++;
559 else
560 req->ki_eventfd = NULL;
561 }
562 if (unlikely(schedule_putreq)) {
563 get_ioctx(ctx); 551 get_ioctx(ctx);
564 spin_lock(&fput_lock); 552 spin_lock(&fput_lock);
565 list_add(&req->ki_list, &fput_head); 553 list_add(&req->ki_list, &fput_head);
566 spin_unlock(&fput_lock); 554 spin_unlock(&fput_lock);
567 queue_work(aio_wq, &fput_work); 555 queue_work(aio_wq, &fput_work);
568 } else 556 } else {
557 req->ki_filp = NULL;
569 really_put_req(ctx, req); 558 really_put_req(ctx, req);
559 }
570 return 1; 560 return 1;
571} 561}
572 562
@@ -1622,7 +1612,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1622 * an eventfd() fd, and will be signaled for each completed 1612 * an eventfd() fd, and will be signaled for each completed
1623 * event using the eventfd_signal() function. 1613 * event using the eventfd_signal() function.
1624 */ 1614 */
1625 req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd); 1615 req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
1626 if (IS_ERR(req->ki_eventfd)) { 1616 if (IS_ERR(req->ki_eventfd)) {
1627 ret = PTR_ERR(req->ki_eventfd); 1617 ret = PTR_ERR(req->ki_eventfd);
1628 req->ki_eventfd = NULL; 1618 req->ki_eventfd = NULL;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index f3da2eb51f56..00bf8fcb245f 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -19,7 +19,6 @@
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/compat.h> 20#include <linux/compat.h>
21#include <linux/syscalls.h> 21#include <linux/syscalls.h>
22#include <linux/smp_lock.h>
23#include <linux/magic.h> 22#include <linux/magic.h>
24#include <linux/dcache.h> 23#include <linux/dcache.h>
25#include <linux/uaccess.h> 24#include <linux/uaccess.h>
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 54bd07d44e68..1e41aadb1068 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -8,7 +8,6 @@
8#include <linux/time.h> 8#include <linux/time.h>
9#include <linux/string.h> 9#include <linux/string.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/smp_lock.h>
12#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
13#include <linux/sched.h> 12#include <linux/sched.h>
14#include "bfs.h" 13#include "bfs.h"
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 6a021265f018..88b9a3ff44e4 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -11,7 +11,6 @@
11 11
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/smp_lock.h>
15#include "bfs.h" 14#include "bfs.h"
16 15
17#undef DEBUG 16#undef DEBUG
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 9fa212b014a5..b7c1603cd4bd 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1522,11 +1522,11 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1522 info->thread = NULL; 1522 info->thread = NULL;
1523 1523
1524 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL); 1524 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1525 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1526
1527 if (psinfo == NULL) 1525 if (psinfo == NULL)
1528 return 0; 1526 return 0;
1529 1527
1528 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1529
1530 /* 1530 /*
1531 * Figure out how many notes we're going to need for each thread. 1531 * Figure out how many notes we're going to need for each thread.
1532 */ 1532 */
@@ -1929,7 +1929,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
1929 elf = kmalloc(sizeof(*elf), GFP_KERNEL); 1929 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1930 if (!elf) 1930 if (!elf)
1931 goto out; 1931 goto out;
1932 1932 /*
1933 * The number of segs are recored into ELF header as 16bit value.
1934 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1935 */
1933 segs = current->mm->map_count; 1936 segs = current->mm->map_count;
1934#ifdef ELF_CORE_EXTRA_PHDRS 1937#ifdef ELF_CORE_EXTRA_PHDRS
1935 segs += ELF_CORE_EXTRA_PHDRS; 1938 segs += ELF_CORE_EXTRA_PHDRS;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 697f6b5f1313..e92f229e3c6e 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -828,15 +828,22 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
828 if (IS_ERR(bprm.file)) 828 if (IS_ERR(bprm.file))
829 return res; 829 return res;
830 830
831 bprm.cred = prepare_exec_creds();
832 res = -ENOMEM;
833 if (!bprm.cred)
834 goto out;
835
831 res = prepare_binprm(&bprm); 836 res = prepare_binprm(&bprm);
832 837
833 if (res <= (unsigned long)-4096) 838 if (res <= (unsigned long)-4096)
834 res = load_flat_file(&bprm, libs, id, NULL); 839 res = load_flat_file(&bprm, libs, id, NULL);
835 if (bprm.file) { 840
836 allow_write_access(bprm.file); 841 abort_creds(bprm.cred);
837 fput(bprm.file); 842
838 bprm.file = NULL; 843out:
839 } 844 allow_write_access(bprm.file);
845 fput(bprm.file);
846
840 return(res); 847 return(res);
841} 848}
842 849
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 31c46a241bac..49a34e7f7306 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * bio-integrity.c - bio data integrity extensions 2 * bio-integrity.c - bio data integrity extensions
3 * 3 *
4 * Copyright (C) 2007, 2008 Oracle Corporation 4 * Copyright (C) 2007, 2008, 2009 Oracle Corporation
5 * Written by: Martin K. Petersen <martin.petersen@oracle.com> 5 * Written by: Martin K. Petersen <martin.petersen@oracle.com>
6 * 6 *
7 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
@@ -25,63 +25,121 @@
25#include <linux/bio.h> 25#include <linux/bio.h>
26#include <linux/workqueue.h> 26#include <linux/workqueue.h>
27 27
28static struct kmem_cache *bio_integrity_slab __read_mostly; 28struct integrity_slab {
29static mempool_t *bio_integrity_pool; 29 struct kmem_cache *slab;
30static struct bio_set *integrity_bio_set; 30 unsigned short nr_vecs;
31 char name[8];
32};
33
34#define IS(x) { .nr_vecs = x, .name = "bip-"__stringify(x) }
35struct integrity_slab bip_slab[BIOVEC_NR_POOLS] __read_mostly = {
36 IS(1), IS(4), IS(16), IS(64), IS(128), IS(BIO_MAX_PAGES),
37};
38#undef IS
39
31static struct workqueue_struct *kintegrityd_wq; 40static struct workqueue_struct *kintegrityd_wq;
32 41
42static inline unsigned int vecs_to_idx(unsigned int nr)
43{
44 switch (nr) {
45 case 1:
46 return 0;
47 case 2 ... 4:
48 return 1;
49 case 5 ... 16:
50 return 2;
51 case 17 ... 64:
52 return 3;
53 case 65 ... 128:
54 return 4;
55 case 129 ... BIO_MAX_PAGES:
56 return 5;
57 default:
58 BUG();
59 }
60}
61
62static inline int use_bip_pool(unsigned int idx)
63{
64 if (idx == BIOVEC_NR_POOLS)
65 return 1;
66
67 return 0;
68}
69
33/** 70/**
34 * bio_integrity_alloc - Allocate integrity payload and attach it to bio 71 * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio
35 * @bio: bio to attach integrity metadata to 72 * @bio: bio to attach integrity metadata to
36 * @gfp_mask: Memory allocation mask 73 * @gfp_mask: Memory allocation mask
37 * @nr_vecs: Number of integrity metadata scatter-gather elements 74 * @nr_vecs: Number of integrity metadata scatter-gather elements
75 * @bs: bio_set to allocate from
38 * 76 *
39 * Description: This function prepares a bio for attaching integrity 77 * Description: This function prepares a bio for attaching integrity
40 * metadata. nr_vecs specifies the maximum number of pages containing 78 * metadata. nr_vecs specifies the maximum number of pages containing
41 * integrity metadata that can be attached. 79 * integrity metadata that can be attached.
42 */ 80 */
43struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, 81struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
44 gfp_t gfp_mask, 82 gfp_t gfp_mask,
45 unsigned int nr_vecs) 83 unsigned int nr_vecs,
84 struct bio_set *bs)
46{ 85{
47 struct bio_integrity_payload *bip; 86 struct bio_integrity_payload *bip;
48 struct bio_vec *iv; 87 unsigned int idx = vecs_to_idx(nr_vecs);
49 unsigned long idx;
50 88
51 BUG_ON(bio == NULL); 89 BUG_ON(bio == NULL);
90 bip = NULL;
52 91
53 bip = mempool_alloc(bio_integrity_pool, gfp_mask); 92 /* Lower order allocations come straight from slab */
54 if (unlikely(bip == NULL)) { 93 if (!use_bip_pool(idx))
55 printk(KERN_ERR "%s: could not alloc bip\n", __func__); 94 bip = kmem_cache_alloc(bip_slab[idx].slab, gfp_mask);
56 return NULL;
57 }
58 95
59 memset(bip, 0, sizeof(*bip)); 96 /* Use mempool if lower order alloc failed or max vecs were requested */
97 if (bip == NULL) {
98 bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
60 99
61 iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, integrity_bio_set); 100 if (unlikely(bip == NULL)) {
62 if (unlikely(iv == NULL)) { 101 printk(KERN_ERR "%s: could not alloc bip\n", __func__);
63 printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__); 102 return NULL;
64 mempool_free(bip, bio_integrity_pool); 103 }
65 return NULL;
66 } 104 }
67 105
68 bip->bip_pool = idx; 106 memset(bip, 0, sizeof(*bip));
69 bip->bip_vec = iv; 107
108 bip->bip_slab = idx;
70 bip->bip_bio = bio; 109 bip->bip_bio = bio;
71 bio->bi_integrity = bip; 110 bio->bi_integrity = bip;
72 111
73 return bip; 112 return bip;
74} 113}
114EXPORT_SYMBOL(bio_integrity_alloc_bioset);
115
116/**
117 * bio_integrity_alloc - Allocate integrity payload and attach it to bio
118 * @bio: bio to attach integrity metadata to
119 * @gfp_mask: Memory allocation mask
120 * @nr_vecs: Number of integrity metadata scatter-gather elements
121 *
122 * Description: This function prepares a bio for attaching integrity
123 * metadata. nr_vecs specifies the maximum number of pages containing
124 * integrity metadata that can be attached.
125 */
126struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
127 gfp_t gfp_mask,
128 unsigned int nr_vecs)
129{
130 return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set);
131}
75EXPORT_SYMBOL(bio_integrity_alloc); 132EXPORT_SYMBOL(bio_integrity_alloc);
76 133
77/** 134/**
78 * bio_integrity_free - Free bio integrity payload 135 * bio_integrity_free - Free bio integrity payload
79 * @bio: bio containing bip to be freed 136 * @bio: bio containing bip to be freed
137 * @bs: bio_set this bio was allocated from
80 * 138 *
81 * Description: Used to free the integrity portion of a bio. Usually 139 * Description: Used to free the integrity portion of a bio. Usually
82 * called from bio_free(). 140 * called from bio_free().
83 */ 141 */
84void bio_integrity_free(struct bio *bio) 142void bio_integrity_free(struct bio *bio, struct bio_set *bs)
85{ 143{
86 struct bio_integrity_payload *bip = bio->bi_integrity; 144 struct bio_integrity_payload *bip = bio->bi_integrity;
87 145
@@ -92,8 +150,10 @@ void bio_integrity_free(struct bio *bio)
92 && bip->bip_buf != NULL) 150 && bip->bip_buf != NULL)
93 kfree(bip->bip_buf); 151 kfree(bip->bip_buf);
94 152
95 bvec_free_bs(integrity_bio_set, bip->bip_vec, bip->bip_pool); 153 if (use_bip_pool(bip->bip_slab))
96 mempool_free(bip, bio_integrity_pool); 154 mempool_free(bip, bs->bio_integrity_pool);
155 else
156 kmem_cache_free(bip_slab[bip->bip_slab].slab, bip);
97 157
98 bio->bi_integrity = NULL; 158 bio->bi_integrity = NULL;
99} 159}
@@ -114,7 +174,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
114 struct bio_integrity_payload *bip = bio->bi_integrity; 174 struct bio_integrity_payload *bip = bio->bi_integrity;
115 struct bio_vec *iv; 175 struct bio_vec *iv;
116 176
117 if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_pool)) { 177 if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) {
118 printk(KERN_ERR "%s: bip_vec full\n", __func__); 178 printk(KERN_ERR "%s: bip_vec full\n", __func__);
119 return 0; 179 return 0;
120 } 180 }
@@ -647,8 +707,8 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
647 bp->iv1 = bip->bip_vec[0]; 707 bp->iv1 = bip->bip_vec[0];
648 bp->iv2 = bip->bip_vec[0]; 708 bp->iv2 = bip->bip_vec[0];
649 709
650 bp->bip1.bip_vec = &bp->iv1; 710 bp->bip1.bip_vec[0] = bp->iv1;
651 bp->bip2.bip_vec = &bp->iv2; 711 bp->bip2.bip_vec[0] = bp->iv2;
652 712
653 bp->iv1.bv_len = sectors * bi->tuple_size; 713 bp->iv1.bv_len = sectors * bi->tuple_size;
654 bp->iv2.bv_offset += sectors * bi->tuple_size; 714 bp->iv2.bv_offset += sectors * bi->tuple_size;
@@ -667,17 +727,19 @@ EXPORT_SYMBOL(bio_integrity_split);
667 * @bio: New bio 727 * @bio: New bio
668 * @bio_src: Original bio 728 * @bio_src: Original bio
669 * @gfp_mask: Memory allocation mask 729 * @gfp_mask: Memory allocation mask
730 * @bs: bio_set to allocate bip from
670 * 731 *
671 * Description: Called to allocate a bip when cloning a bio 732 * Description: Called to allocate a bip when cloning a bio
672 */ 733 */
673int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) 734int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
735 gfp_t gfp_mask, struct bio_set *bs)
674{ 736{
675 struct bio_integrity_payload *bip_src = bio_src->bi_integrity; 737 struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
676 struct bio_integrity_payload *bip; 738 struct bio_integrity_payload *bip;
677 739
678 BUG_ON(bip_src == NULL); 740 BUG_ON(bip_src == NULL);
679 741
680 bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt); 742 bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs);
681 743
682 if (bip == NULL) 744 if (bip == NULL)
683 return -EIO; 745 return -EIO;
@@ -693,25 +755,43 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask)
693} 755}
694EXPORT_SYMBOL(bio_integrity_clone); 756EXPORT_SYMBOL(bio_integrity_clone);
695 757
696static int __init bio_integrity_init(void) 758int bioset_integrity_create(struct bio_set *bs, int pool_size)
697{ 759{
698 kintegrityd_wq = create_workqueue("kintegrityd"); 760 unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES);
761
762 bs->bio_integrity_pool =
763 mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab);
699 764
765 if (!bs->bio_integrity_pool)
766 return -1;
767
768 return 0;
769}
770EXPORT_SYMBOL(bioset_integrity_create);
771
772void bioset_integrity_free(struct bio_set *bs)
773{
774 if (bs->bio_integrity_pool)
775 mempool_destroy(bs->bio_integrity_pool);
776}
777EXPORT_SYMBOL(bioset_integrity_free);
778
779void __init bio_integrity_init(void)
780{
781 unsigned int i;
782
783 kintegrityd_wq = create_workqueue("kintegrityd");
700 if (!kintegrityd_wq) 784 if (!kintegrityd_wq)
701 panic("Failed to create kintegrityd\n"); 785 panic("Failed to create kintegrityd\n");
702 786
703 bio_integrity_slab = KMEM_CACHE(bio_integrity_payload, 787 for (i = 0 ; i < BIOVEC_NR_POOLS ; i++) {
704 SLAB_HWCACHE_ALIGN|SLAB_PANIC); 788 unsigned int size;
705 789
706 bio_integrity_pool = mempool_create_slab_pool(BIO_POOL_SIZE, 790 size = sizeof(struct bio_integrity_payload)
707 bio_integrity_slab); 791 + bip_slab[i].nr_vecs * sizeof(struct bio_vec);
708 if (!bio_integrity_pool)
709 panic("bio_integrity: can't allocate bip pool\n");
710 792
711 integrity_bio_set = bioset_create(BIO_POOL_SIZE, 0); 793 bip_slab[i].slab =
712 if (!integrity_bio_set) 794 kmem_cache_create(bip_slab[i].name, size, 0,
713 panic("bio_integrity: can't allocate bio_set\n"); 795 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
714 796 }
715 return 0;
716} 797}
717subsys_initcall(bio_integrity_init);
diff --git a/fs/bio.c b/fs/bio.c
index 24c914043532..76738005c8e8 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -238,7 +238,7 @@ void bio_free(struct bio *bio, struct bio_set *bs)
238 bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); 238 bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
239 239
240 if (bio_integrity(bio)) 240 if (bio_integrity(bio))
241 bio_integrity_free(bio); 241 bio_integrity_free(bio, bs);
242 242
243 /* 243 /*
244 * If we have front padding, adjust the bio pointer before freeing 244 * If we have front padding, adjust the bio pointer before freeing
@@ -341,7 +341,7 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
341static void bio_kmalloc_destructor(struct bio *bio) 341static void bio_kmalloc_destructor(struct bio *bio)
342{ 342{
343 if (bio_integrity(bio)) 343 if (bio_integrity(bio))
344 bio_integrity_free(bio); 344 bio_integrity_free(bio, fs_bio_set);
345 kfree(bio); 345 kfree(bio);
346} 346}
347 347
@@ -472,7 +472,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
472 if (bio_integrity(bio)) { 472 if (bio_integrity(bio)) {
473 int ret; 473 int ret;
474 474
475 ret = bio_integrity_clone(b, bio, gfp_mask); 475 ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set);
476 476
477 if (ret < 0) { 477 if (ret < 0) {
478 bio_put(b); 478 bio_put(b);
@@ -705,14 +705,13 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
705} 705}
706 706
707static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, 707static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
708 struct sg_iovec *iov, int iov_count, int uncopy, 708 struct sg_iovec *iov, int iov_count,
709 int do_free_page) 709 int to_user, int from_user, int do_free_page)
710{ 710{
711 int ret = 0, i; 711 int ret = 0, i;
712 struct bio_vec *bvec; 712 struct bio_vec *bvec;
713 int iov_idx = 0; 713 int iov_idx = 0;
714 unsigned int iov_off = 0; 714 unsigned int iov_off = 0;
715 int read = bio_data_dir(bio) == READ;
716 715
717 __bio_for_each_segment(bvec, bio, i, 0) { 716 __bio_for_each_segment(bvec, bio, i, 0) {
718 char *bv_addr = page_address(bvec->bv_page); 717 char *bv_addr = page_address(bvec->bv_page);
@@ -727,13 +726,14 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
727 iov_addr = iov[iov_idx].iov_base + iov_off; 726 iov_addr = iov[iov_idx].iov_base + iov_off;
728 727
729 if (!ret) { 728 if (!ret) {
730 if (!read && !uncopy) 729 if (to_user)
731 ret = copy_from_user(bv_addr, iov_addr,
732 bytes);
733 if (read && uncopy)
734 ret = copy_to_user(iov_addr, bv_addr, 730 ret = copy_to_user(iov_addr, bv_addr,
735 bytes); 731 bytes);
736 732
733 if (from_user)
734 ret = copy_from_user(bv_addr, iov_addr,
735 bytes);
736
737 if (ret) 737 if (ret)
738 ret = -EFAULT; 738 ret = -EFAULT;
739 } 739 }
@@ -770,7 +770,8 @@ int bio_uncopy_user(struct bio *bio)
770 770
771 if (!bio_flagged(bio, BIO_NULL_MAPPED)) 771 if (!bio_flagged(bio, BIO_NULL_MAPPED))
772 ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, 772 ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
773 bmd->nr_sgvecs, 1, bmd->is_our_pages); 773 bmd->nr_sgvecs, bio_data_dir(bio) == READ,
774 0, bmd->is_our_pages);
774 bio_free_map_data(bmd); 775 bio_free_map_data(bmd);
775 bio_put(bio); 776 bio_put(bio);
776 return ret; 777 return ret;
@@ -875,8 +876,9 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
875 /* 876 /*
876 * success 877 * success
877 */ 878 */
878 if (!write_to_vm && (!map_data || !map_data->null_mapped)) { 879 if ((!write_to_vm && (!map_data || !map_data->null_mapped)) ||
879 ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0); 880 (map_data && map_data->from_user)) {
881 ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 1, 0);
880 if (ret) 882 if (ret)
881 goto cleanup; 883 goto cleanup;
882 } 884 }
@@ -1539,6 +1541,7 @@ void bioset_free(struct bio_set *bs)
1539 if (bs->bio_pool) 1541 if (bs->bio_pool)
1540 mempool_destroy(bs->bio_pool); 1542 mempool_destroy(bs->bio_pool);
1541 1543
1544 bioset_integrity_free(bs);
1542 biovec_free_pools(bs); 1545 biovec_free_pools(bs);
1543 bio_put_slab(bs); 1546 bio_put_slab(bs);
1544 1547
@@ -1579,6 +1582,9 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1579 if (!bs->bio_pool) 1582 if (!bs->bio_pool)
1580 goto bad; 1583 goto bad;
1581 1584
1585 if (bioset_integrity_create(bs, pool_size))
1586 goto bad;
1587
1582 if (!biovec_create_pools(bs, pool_size)) 1588 if (!biovec_create_pools(bs, pool_size))
1583 return bs; 1589 return bs;
1584 1590
@@ -1616,6 +1622,7 @@ static int __init init_bio(void)
1616 if (!bio_slabs) 1622 if (!bio_slabs)
1617 panic("bio: can't allocate bios\n"); 1623 panic("bio: can't allocate bios\n");
1618 1624
1625 bio_integrity_init();
1619 biovec_init_slabs(); 1626 biovec_init_slabs();
1620 1627
1621 fs_bio_set = bioset_create(BIO_POOL_SIZE, 0); 1628 fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3a6d4fb2a329..94dfda24c06e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -564,6 +564,16 @@ struct block_device *bdget(dev_t dev)
564 564
565EXPORT_SYMBOL(bdget); 565EXPORT_SYMBOL(bdget);
566 566
567/**
568 * bdgrab -- Grab a reference to an already referenced block device
569 * @bdev: Block device to grab a reference to.
570 */
571struct block_device *bdgrab(struct block_device *bdev)
572{
573 atomic_inc(&bdev->bd_inode->i_count);
574 return bdev;
575}
576
567long nr_blockdev_pages(void) 577long nr_blockdev_pages(void)
568{ 578{
569 struct block_device *bdev; 579 struct block_device *bdev;
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 7f88628a1a72..019e8af449ab 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -299,8 +299,8 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
299 "btrfs-%s-%d", workers->name, 299 "btrfs-%s-%d", workers->name,
300 workers->num_workers + i); 300 workers->num_workers + i);
301 if (IS_ERR(worker->task)) { 301 if (IS_ERR(worker->task)) {
302 kfree(worker);
303 ret = PTR_ERR(worker->task); 302 ret = PTR_ERR(worker->task);
303 kfree(worker);
304 goto fail; 304 goto fail;
305 } 305 }
306 306
@@ -424,11 +424,11 @@ int btrfs_requeue_work(struct btrfs_work *work)
424 * list 424 * list
425 */ 425 */
426 if (worker->idle) { 426 if (worker->idle) {
427 spin_lock_irqsave(&worker->workers->lock, flags); 427 spin_lock(&worker->workers->lock);
428 worker->idle = 0; 428 worker->idle = 0;
429 list_move_tail(&worker->worker_list, 429 list_move_tail(&worker->worker_list,
430 &worker->workers->worker_list); 430 &worker->workers->worker_list);
431 spin_unlock_irqrestore(&worker->workers->lock, flags); 431 spin_unlock(&worker->workers->lock);
432 } 432 }
433 if (!worker->working) { 433 if (!worker->working) {
434 wake = 1; 434 wake = 1;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index de1e2fd32080..9d8ba4d54a37 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -26,7 +26,6 @@
26#include <linux/time.h> 26#include <linux/time.h>
27#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/string.h> 28#include <linux/string.h>
29#include <linux/smp_lock.h>
30#include <linux/backing-dev.h> 29#include <linux/backing-dev.h>
31#include <linux/mpage.h> 30#include <linux/mpage.h>
32#include <linux/swap.h> 31#include <linux/swap.h>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 60a45f3a4e91..3fdcc0512d3a 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -557,19 +557,7 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
557 557
558 btrfs_disk_key_to_cpu(&k1, disk); 558 btrfs_disk_key_to_cpu(&k1, disk);
559 559
560 if (k1.objectid > k2->objectid) 560 return btrfs_comp_cpu_keys(&k1, k2);
561 return 1;
562 if (k1.objectid < k2->objectid)
563 return -1;
564 if (k1.type > k2->type)
565 return 1;
566 if (k1.type < k2->type)
567 return -1;
568 if (k1.offset > k2->offset)
569 return 1;
570 if (k1.offset < k2->offset)
571 return -1;
572 return 0;
573} 561}
574 562
575/* 563/*
@@ -1052,9 +1040,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1052 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 1040 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
1053 return 0; 1041 return 0;
1054 1042
1055 if (btrfs_header_nritems(mid) > 2)
1056 return 0;
1057
1058 if (btrfs_header_nritems(mid) < 2) 1043 if (btrfs_header_nritems(mid) < 2)
1059 err_on_enospc = 1; 1044 err_on_enospc = 1;
1060 1045
@@ -1701,6 +1686,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1701 struct extent_buffer *b; 1686 struct extent_buffer *b;
1702 int slot; 1687 int slot;
1703 int ret; 1688 int ret;
1689 int err;
1704 int level; 1690 int level;
1705 int lowest_unlock = 1; 1691 int lowest_unlock = 1;
1706 u8 lowest_level = 0; 1692 u8 lowest_level = 0;
@@ -1737,8 +1723,6 @@ again:
1737 p->locks[level] = 1; 1723 p->locks[level] = 1;
1738 1724
1739 if (cow) { 1725 if (cow) {
1740 int wret;
1741
1742 /* 1726 /*
1743 * if we don't really need to cow this block 1727 * if we don't really need to cow this block
1744 * then we don't want to set the path blocking, 1728 * then we don't want to set the path blocking,
@@ -1749,12 +1733,12 @@ again:
1749 1733
1750 btrfs_set_path_blocking(p); 1734 btrfs_set_path_blocking(p);
1751 1735
1752 wret = btrfs_cow_block(trans, root, b, 1736 err = btrfs_cow_block(trans, root, b,
1753 p->nodes[level + 1], 1737 p->nodes[level + 1],
1754 p->slots[level + 1], &b); 1738 p->slots[level + 1], &b);
1755 if (wret) { 1739 if (err) {
1756 free_extent_buffer(b); 1740 free_extent_buffer(b);
1757 ret = wret; 1741 ret = err;
1758 goto done; 1742 goto done;
1759 } 1743 }
1760 } 1744 }
@@ -1793,41 +1777,45 @@ cow_done:
1793 ret = bin_search(b, key, level, &slot); 1777 ret = bin_search(b, key, level, &slot);
1794 1778
1795 if (level != 0) { 1779 if (level != 0) {
1796 if (ret && slot > 0) 1780 int dec = 0;
1781 if (ret && slot > 0) {
1782 dec = 1;
1797 slot -= 1; 1783 slot -= 1;
1784 }
1798 p->slots[level] = slot; 1785 p->slots[level] = slot;
1799 ret = setup_nodes_for_search(trans, root, p, b, level, 1786 err = setup_nodes_for_search(trans, root, p, b, level,
1800 ins_len); 1787 ins_len);
1801 if (ret == -EAGAIN) 1788 if (err == -EAGAIN)
1802 goto again; 1789 goto again;
1803 else if (ret) 1790 if (err) {
1791 ret = err;
1804 goto done; 1792 goto done;
1793 }
1805 b = p->nodes[level]; 1794 b = p->nodes[level];
1806 slot = p->slots[level]; 1795 slot = p->slots[level];
1807 1796
1808 unlock_up(p, level, lowest_unlock); 1797 unlock_up(p, level, lowest_unlock);
1809 1798
1810 /* this is only true while dropping a snapshot */
1811 if (level == lowest_level) { 1799 if (level == lowest_level) {
1812 ret = 0; 1800 if (dec)
1801 p->slots[level]++;
1813 goto done; 1802 goto done;
1814 } 1803 }
1815 1804
1816 ret = read_block_for_search(trans, root, p, 1805 err = read_block_for_search(trans, root, p,
1817 &b, level, slot, key); 1806 &b, level, slot, key);
1818 if (ret == -EAGAIN) 1807 if (err == -EAGAIN)
1819 goto again; 1808 goto again;
1820 1809 if (err) {
1821 if (ret == -EIO) 1810 ret = err;
1822 goto done; 1811 goto done;
1812 }
1823 1813
1824 if (!p->skip_locking) { 1814 if (!p->skip_locking) {
1825 int lret;
1826
1827 btrfs_clear_path_blocking(p, NULL); 1815 btrfs_clear_path_blocking(p, NULL);
1828 lret = btrfs_try_spin_lock(b); 1816 err = btrfs_try_spin_lock(b);
1829 1817
1830 if (!lret) { 1818 if (!err) {
1831 btrfs_set_path_blocking(p); 1819 btrfs_set_path_blocking(p);
1832 btrfs_tree_lock(b); 1820 btrfs_tree_lock(b);
1833 btrfs_clear_path_blocking(p, b); 1821 btrfs_clear_path_blocking(p, b);
@@ -1837,16 +1825,14 @@ cow_done:
1837 p->slots[level] = slot; 1825 p->slots[level] = slot;
1838 if (ins_len > 0 && 1826 if (ins_len > 0 &&
1839 btrfs_leaf_free_space(root, b) < ins_len) { 1827 btrfs_leaf_free_space(root, b) < ins_len) {
1840 int sret;
1841
1842 btrfs_set_path_blocking(p); 1828 btrfs_set_path_blocking(p);
1843 sret = split_leaf(trans, root, key, 1829 err = split_leaf(trans, root, key,
1844 p, ins_len, ret == 0); 1830 p, ins_len, ret == 0);
1845 btrfs_clear_path_blocking(p, NULL); 1831 btrfs_clear_path_blocking(p, NULL);
1846 1832
1847 BUG_ON(sret > 0); 1833 BUG_ON(err > 0);
1848 if (sret) { 1834 if (err) {
1849 ret = sret; 1835 ret = err;
1850 goto done; 1836 goto done;
1851 } 1837 }
1852 } 1838 }
@@ -3807,7 +3793,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3807 } 3793 }
3808 3794
3809 /* delete the leaf if it is mostly empty */ 3795 /* delete the leaf if it is mostly empty */
3810 if (used < BTRFS_LEAF_DATA_SIZE(root) / 2) { 3796 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
3811 /* push_leaf_left fixes the path. 3797 /* push_leaf_left fixes the path.
3812 * make sure the path still points to our leaf 3798 * make sure the path still points to our leaf
3813 * for possible call to del_ptr below 3799 * for possible call to del_ptr below
@@ -4042,10 +4028,9 @@ out:
4042 * calling this function. 4028 * calling this function.
4043 */ 4029 */
4044int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, 4030int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
4045 struct btrfs_key *key, int lowest_level, 4031 struct btrfs_key *key, int level,
4046 int cache_only, u64 min_trans) 4032 int cache_only, u64 min_trans)
4047{ 4033{
4048 int level = lowest_level;
4049 int slot; 4034 int slot;
4050 struct extent_buffer *c; 4035 struct extent_buffer *c;
4051 4036
@@ -4058,11 +4043,40 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
4058 c = path->nodes[level]; 4043 c = path->nodes[level];
4059next: 4044next:
4060 if (slot >= btrfs_header_nritems(c)) { 4045 if (slot >= btrfs_header_nritems(c)) {
4061 level++; 4046 int ret;
4062 if (level == BTRFS_MAX_LEVEL) 4047 int orig_lowest;
4048 struct btrfs_key cur_key;
4049 if (level + 1 >= BTRFS_MAX_LEVEL ||
4050 !path->nodes[level + 1])
4063 return 1; 4051 return 1;
4064 continue; 4052
4053 if (path->locks[level + 1]) {
4054 level++;
4055 continue;
4056 }
4057
4058 slot = btrfs_header_nritems(c) - 1;
4059 if (level == 0)
4060 btrfs_item_key_to_cpu(c, &cur_key, slot);
4061 else
4062 btrfs_node_key_to_cpu(c, &cur_key, slot);
4063
4064 orig_lowest = path->lowest_level;
4065 btrfs_release_path(root, path);
4066 path->lowest_level = level;
4067 ret = btrfs_search_slot(NULL, root, &cur_key, path,
4068 0, 0);
4069 path->lowest_level = orig_lowest;
4070 if (ret < 0)
4071 return ret;
4072
4073 c = path->nodes[level];
4074 slot = path->slots[level];
4075 if (ret == 0)
4076 slot++;
4077 goto next;
4065 } 4078 }
4079
4066 if (level == 0) 4080 if (level == 0)
4067 btrfs_item_key_to_cpu(c, key, slot); 4081 btrfs_item_key_to_cpu(c, key, slot);
4068 else { 4082 else {
@@ -4146,7 +4160,8 @@ again:
4146 * advance the path if there are now more items available. 4160 * advance the path if there are now more items available.
4147 */ 4161 */
4148 if (nritems > 0 && path->slots[0] < nritems - 1) { 4162 if (nritems > 0 && path->slots[0] < nritems - 1) {
4149 path->slots[0]++; 4163 if (ret == 0)
4164 path->slots[0]++;
4150 ret = 0; 4165 ret = 0;
4151 goto done; 4166 goto done;
4152 } 4167 }
@@ -4278,10 +4293,10 @@ int btrfs_previous_item(struct btrfs_root *root,
4278 path->slots[0]--; 4293 path->slots[0]--;
4279 4294
4280 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4295 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4281 if (found_key.type == type)
4282 return 0;
4283 if (found_key.objectid < min_objectid) 4296 if (found_key.objectid < min_objectid)
4284 break; 4297 break;
4298 if (found_key.type == type)
4299 return 0;
4285 if (found_key.objectid == min_objectid && 4300 if (found_key.objectid == min_objectid &&
4286 found_key.type < type) 4301 found_key.type < type)
4287 break; 4302 break;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2779c2f5360a..837435ce84ca 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -481,7 +481,7 @@ struct btrfs_shared_data_ref {
481 481
482struct btrfs_extent_inline_ref { 482struct btrfs_extent_inline_ref {
483 u8 type; 483 u8 type;
484 u64 offset; 484 __le64 offset;
485} __attribute__ ((__packed__)); 485} __attribute__ ((__packed__));
486 486
487/* old style backrefs item */ 487/* old style backrefs item */
@@ -689,6 +689,7 @@ struct btrfs_space_info {
689 struct list_head block_groups; 689 struct list_head block_groups;
690 spinlock_t lock; 690 spinlock_t lock;
691 struct rw_semaphore groups_sem; 691 struct rw_semaphore groups_sem;
692 atomic_t caching_threads;
692}; 693};
693 694
694/* 695/*
@@ -707,6 +708,9 @@ struct btrfs_free_cluster {
707 /* first extent starting offset */ 708 /* first extent starting offset */
708 u64 window_start; 709 u64 window_start;
709 710
711 /* if this cluster simply points at a bitmap in the block group */
712 bool points_to_bitmap;
713
710 struct btrfs_block_group_cache *block_group; 714 struct btrfs_block_group_cache *block_group;
711 /* 715 /*
712 * when a cluster is allocated from a block group, we put the 716 * when a cluster is allocated from a block group, we put the
@@ -716,24 +720,37 @@ struct btrfs_free_cluster {
716 struct list_head block_group_list; 720 struct list_head block_group_list;
717}; 721};
718 722
723enum btrfs_caching_type {
724 BTRFS_CACHE_NO = 0,
725 BTRFS_CACHE_STARTED = 1,
726 BTRFS_CACHE_FINISHED = 2,
727};
728
719struct btrfs_block_group_cache { 729struct btrfs_block_group_cache {
720 struct btrfs_key key; 730 struct btrfs_key key;
721 struct btrfs_block_group_item item; 731 struct btrfs_block_group_item item;
732 struct btrfs_fs_info *fs_info;
722 spinlock_t lock; 733 spinlock_t lock;
723 struct mutex cache_mutex;
724 u64 pinned; 734 u64 pinned;
725 u64 reserved; 735 u64 reserved;
726 u64 flags; 736 u64 flags;
727 int cached; 737 u64 sectorsize;
738 int extents_thresh;
739 int free_extents;
740 int total_bitmaps;
728 int ro; 741 int ro;
729 int dirty; 742 int dirty;
730 743
744 /* cache tracking stuff */
745 wait_queue_head_t caching_q;
746 int cached;
747
731 struct btrfs_space_info *space_info; 748 struct btrfs_space_info *space_info;
732 749
733 /* free space cache stuff */ 750 /* free space cache stuff */
734 spinlock_t tree_lock; 751 spinlock_t tree_lock;
735 struct rb_root free_space_bytes;
736 struct rb_root free_space_offset; 752 struct rb_root free_space_offset;
753 u64 free_space;
737 754
738 /* block group cache stuff */ 755 /* block group cache stuff */
739 struct rb_node cache_node; 756 struct rb_node cache_node;
@@ -808,6 +825,7 @@ struct btrfs_fs_info {
808 struct mutex drop_mutex; 825 struct mutex drop_mutex;
809 struct mutex volume_mutex; 826 struct mutex volume_mutex;
810 struct mutex tree_reloc_mutex; 827 struct mutex tree_reloc_mutex;
828 struct rw_semaphore extent_commit_sem;
811 829
812 /* 830 /*
813 * this protects the ordered operations list only while we are 831 * this protects the ordered operations list only while we are
@@ -1988,6 +2006,7 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
1988 u64 bytes); 2006 u64 bytes);
1989void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, 2007void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
1990 u64 bytes); 2008 u64 bytes);
2009void btrfs_free_pinned_extents(struct btrfs_fs_info *info);
1991/* ctree.c */ 2010/* ctree.c */
1992int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2011int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
1993 int level, int *slot); 2012 int level, int *slot);
@@ -2074,8 +2093,7 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
2074int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); 2093int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
2075int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); 2094int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
2076int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); 2095int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
2077int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root 2096int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref);
2078 *root);
2079int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 2097int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
2080 struct btrfs_root *root, 2098 struct btrfs_root *root,
2081 struct extent_buffer *node, 2099 struct extent_buffer *node,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d28d29c95f7c..e83be2e4602c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1639,6 +1639,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1639 mutex_init(&fs_info->cleaner_mutex); 1639 mutex_init(&fs_info->cleaner_mutex);
1640 mutex_init(&fs_info->volume_mutex); 1640 mutex_init(&fs_info->volume_mutex);
1641 mutex_init(&fs_info->tree_reloc_mutex); 1641 mutex_init(&fs_info->tree_reloc_mutex);
1642 init_rwsem(&fs_info->extent_commit_sem);
1642 1643
1643 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); 1644 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
1644 btrfs_init_free_cluster(&fs_info->data_alloc_cluster); 1645 btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -1799,6 +1800,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1799 btrfs_super_chunk_root(disk_super), 1800 btrfs_super_chunk_root(disk_super),
1800 blocksize, generation); 1801 blocksize, generation);
1801 BUG_ON(!chunk_root->node); 1802 BUG_ON(!chunk_root->node);
1803 if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
1804 printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
1805 sb->s_id);
1806 goto fail_chunk_root;
1807 }
1802 btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); 1808 btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
1803 chunk_root->commit_root = btrfs_root_node(chunk_root); 1809 chunk_root->commit_root = btrfs_root_node(chunk_root);
1804 1810
@@ -1826,6 +1832,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1826 blocksize, generation); 1832 blocksize, generation);
1827 if (!tree_root->node) 1833 if (!tree_root->node)
1828 goto fail_chunk_root; 1834 goto fail_chunk_root;
1835 if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
1836 printk(KERN_WARNING "btrfs: failed to read tree root on %s\n",
1837 sb->s_id);
1838 goto fail_tree_root;
1839 }
1829 btrfs_set_root_node(&tree_root->root_item, tree_root->node); 1840 btrfs_set_root_node(&tree_root->root_item, tree_root->node);
1830 tree_root->commit_root = btrfs_root_node(tree_root); 1841 tree_root->commit_root = btrfs_root_node(tree_root);
1831 1842
@@ -2322,6 +2333,9 @@ int close_ctree(struct btrfs_root *root)
2322 printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 2333 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
2323 } 2334 }
2324 2335
2336 fs_info->closing = 2;
2337 smp_mb();
2338
2325 if (fs_info->delalloc_bytes) { 2339 if (fs_info->delalloc_bytes) {
2326 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", 2340 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
2327 (unsigned long long)fs_info->delalloc_bytes); 2341 (unsigned long long)fs_info->delalloc_bytes);
@@ -2343,6 +2357,7 @@ int close_ctree(struct btrfs_root *root)
2343 free_extent_buffer(root->fs_info->csum_root->commit_root); 2357 free_extent_buffer(root->fs_info->csum_root->commit_root);
2344 2358
2345 btrfs_free_block_groups(root->fs_info); 2359 btrfs_free_block_groups(root->fs_info);
2360 btrfs_free_pinned_extents(root->fs_info);
2346 2361
2347 del_fs_roots(fs_info); 2362 del_fs_roots(fs_info);
2348 2363
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index edc7d208c5ce..72a2b9c28e9f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -21,6 +21,7 @@
21#include <linux/blkdev.h> 21#include <linux/blkdev.h>
22#include <linux/sort.h> 22#include <linux/sort.h>
23#include <linux/rcupdate.h> 23#include <linux/rcupdate.h>
24#include <linux/kthread.h>
24#include "compat.h" 25#include "compat.h"
25#include "hash.h" 26#include "hash.h"
26#include "ctree.h" 27#include "ctree.h"
@@ -61,6 +62,13 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
61 struct btrfs_root *extent_root, u64 alloc_bytes, 62 struct btrfs_root *extent_root, u64 alloc_bytes,
62 u64 flags, int force); 63 u64 flags, int force);
63 64
65static noinline int
66block_group_cache_done(struct btrfs_block_group_cache *cache)
67{
68 smp_mb();
69 return cache->cached == BTRFS_CACHE_FINISHED;
70}
71
64static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 72static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
65{ 73{
66 return (cache->flags & bits) == bits; 74 return (cache->flags & bits) == bits;
@@ -146,20 +154,70 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
146} 154}
147 155
148/* 156/*
157 * We always set EXTENT_LOCKED for the super mirror extents so we don't
158 * overwrite them, so those bits need to be unset. Also, if we are unmounting
159 * with pinned extents still sitting there because we had a block group caching,
160 * we need to clear those now, since we are done.
161 */
162void btrfs_free_pinned_extents(struct btrfs_fs_info *info)
163{
164 u64 start, end, last = 0;
165 int ret;
166
167 while (1) {
168 ret = find_first_extent_bit(&info->pinned_extents, last,
169 &start, &end,
170 EXTENT_LOCKED|EXTENT_DIRTY);
171 if (ret)
172 break;
173
174 clear_extent_bits(&info->pinned_extents, start, end,
175 EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS);
176 last = end+1;
177 }
178}
179
180static int remove_sb_from_cache(struct btrfs_root *root,
181 struct btrfs_block_group_cache *cache)
182{
183 struct btrfs_fs_info *fs_info = root->fs_info;
184 u64 bytenr;
185 u64 *logical;
186 int stripe_len;
187 int i, nr, ret;
188
189 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
190 bytenr = btrfs_sb_offset(i);
191 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
192 cache->key.objectid, bytenr,
193 0, &logical, &nr, &stripe_len);
194 BUG_ON(ret);
195 while (nr--) {
196 try_lock_extent(&fs_info->pinned_extents,
197 logical[nr],
198 logical[nr] + stripe_len - 1, GFP_NOFS);
199 }
200 kfree(logical);
201 }
202
203 return 0;
204}
205
206/*
149 * this is only called by cache_block_group, since we could have freed extents 207 * this is only called by cache_block_group, since we could have freed extents
150 * we need to check the pinned_extents for any extents that can't be used yet 208 * we need to check the pinned_extents for any extents that can't be used yet
151 * since their free space will be released as soon as the transaction commits. 209 * since their free space will be released as soon as the transaction commits.
152 */ 210 */
153static int add_new_free_space(struct btrfs_block_group_cache *block_group, 211static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
154 struct btrfs_fs_info *info, u64 start, u64 end) 212 struct btrfs_fs_info *info, u64 start, u64 end)
155{ 213{
156 u64 extent_start, extent_end, size; 214 u64 extent_start, extent_end, size, total_added = 0;
157 int ret; 215 int ret;
158 216
159 while (start < end) { 217 while (start < end) {
160 ret = find_first_extent_bit(&info->pinned_extents, start, 218 ret = find_first_extent_bit(&info->pinned_extents, start,
161 &extent_start, &extent_end, 219 &extent_start, &extent_end,
162 EXTENT_DIRTY); 220 EXTENT_DIRTY|EXTENT_LOCKED);
163 if (ret) 221 if (ret)
164 break; 222 break;
165 223
@@ -167,6 +225,7 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
167 start = extent_end + 1; 225 start = extent_end + 1;
168 } else if (extent_start > start && extent_start < end) { 226 } else if (extent_start > start && extent_start < end) {
169 size = extent_start - start; 227 size = extent_start - start;
228 total_added += size;
170 ret = btrfs_add_free_space(block_group, start, 229 ret = btrfs_add_free_space(block_group, start,
171 size); 230 size);
172 BUG_ON(ret); 231 BUG_ON(ret);
@@ -178,84 +237,93 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
178 237
179 if (start < end) { 238 if (start < end) {
180 size = end - start; 239 size = end - start;
240 total_added += size;
181 ret = btrfs_add_free_space(block_group, start, size); 241 ret = btrfs_add_free_space(block_group, start, size);
182 BUG_ON(ret); 242 BUG_ON(ret);
183 } 243 }
184 244
185 return 0; 245 return total_added;
186} 246}
187 247
188static int remove_sb_from_cache(struct btrfs_root *root, 248static int caching_kthread(void *data)
189 struct btrfs_block_group_cache *cache)
190{
191 u64 bytenr;
192 u64 *logical;
193 int stripe_len;
194 int i, nr, ret;
195
196 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
197 bytenr = btrfs_sb_offset(i);
198 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
199 cache->key.objectid, bytenr, 0,
200 &logical, &nr, &stripe_len);
201 BUG_ON(ret);
202 while (nr--) {
203 btrfs_remove_free_space(cache, logical[nr],
204 stripe_len);
205 }
206 kfree(logical);
207 }
208 return 0;
209}
210
211static int cache_block_group(struct btrfs_root *root,
212 struct btrfs_block_group_cache *block_group)
213{ 249{
250 struct btrfs_block_group_cache *block_group = data;
251 struct btrfs_fs_info *fs_info = block_group->fs_info;
252 u64 last = 0;
214 struct btrfs_path *path; 253 struct btrfs_path *path;
215 int ret = 0; 254 int ret = 0;
216 struct btrfs_key key; 255 struct btrfs_key key;
217 struct extent_buffer *leaf; 256 struct extent_buffer *leaf;
218 int slot; 257 int slot;
219 u64 last; 258 u64 total_found = 0;
220 259
221 if (!block_group) 260 BUG_ON(!fs_info);
222 return 0;
223
224 root = root->fs_info->extent_root;
225
226 if (block_group->cached)
227 return 0;
228 261
229 path = btrfs_alloc_path(); 262 path = btrfs_alloc_path();
230 if (!path) 263 if (!path)
231 return -ENOMEM; 264 return -ENOMEM;
232 265
233 path->reada = 2; 266 atomic_inc(&block_group->space_info->caching_threads);
267 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
234 /* 268 /*
235 * we get into deadlocks with paths held by callers of this function. 269 * We don't want to deadlock with somebody trying to allocate a new
236 * since the alloc_mutex is protecting things right now, just 270 * extent for the extent root while also trying to search the extent
237 * skip the locking here 271 * root to add free space. So we skip locking and search the commit
272 * root, since its read-only
238 */ 273 */
239 path->skip_locking = 1; 274 path->skip_locking = 1;
240 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 275 path->search_commit_root = 1;
276 path->reada = 2;
277
241 key.objectid = last; 278 key.objectid = last;
242 key.offset = 0; 279 key.offset = 0;
243 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 280 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
244 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 281again:
282 /* need to make sure the commit_root doesn't disappear */
283 down_read(&fs_info->extent_commit_sem);
284
285 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
245 if (ret < 0) 286 if (ret < 0)
246 goto err; 287 goto err;
247 288
248 while (1) { 289 while (1) {
290 smp_mb();
291 if (block_group->fs_info->closing > 1) {
292 last = (u64)-1;
293 break;
294 }
295
249 leaf = path->nodes[0]; 296 leaf = path->nodes[0];
250 slot = path->slots[0]; 297 slot = path->slots[0];
251 if (slot >= btrfs_header_nritems(leaf)) { 298 if (slot >= btrfs_header_nritems(leaf)) {
252 ret = btrfs_next_leaf(root, path); 299 ret = btrfs_next_leaf(fs_info->extent_root, path);
253 if (ret < 0) 300 if (ret < 0)
254 goto err; 301 goto err;
255 if (ret == 0) 302 else if (ret)
256 continue;
257 else
258 break; 303 break;
304
305 if (need_resched() ||
306 btrfs_transaction_in_commit(fs_info)) {
307 leaf = path->nodes[0];
308
309 /* this shouldn't happen, but if the
310 * leaf is empty just move on.
311 */
312 if (btrfs_header_nritems(leaf) == 0)
313 break;
314 /*
315 * we need to copy the key out so that
316 * we are sure the next search advances
317 * us forward in the btree.
318 */
319 btrfs_item_key_to_cpu(leaf, &key, 0);
320 btrfs_release_path(fs_info->extent_root, path);
321 up_read(&fs_info->extent_commit_sem);
322 schedule_timeout(1);
323 goto again;
324 }
325
326 continue;
259 } 327 }
260 btrfs_item_key_to_cpu(leaf, &key, slot); 328 btrfs_item_key_to_cpu(leaf, &key, slot);
261 if (key.objectid < block_group->key.objectid) 329 if (key.objectid < block_group->key.objectid)
@@ -266,24 +334,59 @@ static int cache_block_group(struct btrfs_root *root,
266 break; 334 break;
267 335
268 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { 336 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
269 add_new_free_space(block_group, root->fs_info, last, 337 total_found += add_new_free_space(block_group,
270 key.objectid); 338 fs_info, last,
271 339 key.objectid);
272 last = key.objectid + key.offset; 340 last = key.objectid + key.offset;
273 } 341 }
342
343 if (total_found > (1024 * 1024 * 2)) {
344 total_found = 0;
345 wake_up(&block_group->caching_q);
346 }
274next: 347next:
275 path->slots[0]++; 348 path->slots[0]++;
276 } 349 }
350 ret = 0;
277 351
278 add_new_free_space(block_group, root->fs_info, last, 352 total_found += add_new_free_space(block_group, fs_info, last,
279 block_group->key.objectid + 353 block_group->key.objectid +
280 block_group->key.offset); 354 block_group->key.offset);
355
356 spin_lock(&block_group->lock);
357 block_group->cached = BTRFS_CACHE_FINISHED;
358 spin_unlock(&block_group->lock);
281 359
282 block_group->cached = 1;
283 remove_sb_from_cache(root, block_group);
284 ret = 0;
285err: 360err:
286 btrfs_free_path(path); 361 btrfs_free_path(path);
362 up_read(&fs_info->extent_commit_sem);
363 atomic_dec(&block_group->space_info->caching_threads);
364 wake_up(&block_group->caching_q);
365
366 return 0;
367}
368
369static int cache_block_group(struct btrfs_block_group_cache *cache)
370{
371 struct task_struct *tsk;
372 int ret = 0;
373
374 spin_lock(&cache->lock);
375 if (cache->cached != BTRFS_CACHE_NO) {
376 spin_unlock(&cache->lock);
377 return ret;
378 }
379 cache->cached = BTRFS_CACHE_STARTED;
380 spin_unlock(&cache->lock);
381
382 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
383 cache->key.objectid);
384 if (IS_ERR(tsk)) {
385 ret = PTR_ERR(tsk);
386 printk(KERN_ERR "error running thread %d\n", ret);
387 BUG();
388 }
389
287 return ret; 390 return ret;
288} 391}
289 392
@@ -990,15 +1093,13 @@ static inline int extent_ref_type(u64 parent, u64 owner)
990 return type; 1093 return type;
991} 1094}
992 1095
993static int find_next_key(struct btrfs_path *path, struct btrfs_key *key) 1096static int find_next_key(struct btrfs_path *path, int level,
1097 struct btrfs_key *key)
994 1098
995{ 1099{
996 int level; 1100 for (; level < BTRFS_MAX_LEVEL; level++) {
997 BUG_ON(!path->keep_locks);
998 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
999 if (!path->nodes[level]) 1101 if (!path->nodes[level])
1000 break; 1102 break;
1001 btrfs_assert_tree_locked(path->nodes[level]);
1002 if (path->slots[level] + 1 >= 1103 if (path->slots[level] + 1 >=
1003 btrfs_header_nritems(path->nodes[level])) 1104 btrfs_header_nritems(path->nodes[level]))
1004 continue; 1105 continue;
@@ -1158,7 +1259,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1158 * For simplicity, we just do not add new inline back 1259 * For simplicity, we just do not add new inline back
1159 * ref if there is any kind of item for this block 1260 * ref if there is any kind of item for this block
1160 */ 1261 */
1161 if (find_next_key(path, &key) == 0 && key.objectid == bytenr && 1262 if (find_next_key(path, 0, &key) == 0 &&
1263 key.objectid == bytenr &&
1162 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { 1264 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1163 err = -EAGAIN; 1265 err = -EAGAIN;
1164 goto out; 1266 goto out;
@@ -2388,13 +2490,29 @@ fail:
2388 2490
2389} 2491}
2390 2492
2493static struct btrfs_block_group_cache *
2494next_block_group(struct btrfs_root *root,
2495 struct btrfs_block_group_cache *cache)
2496{
2497 struct rb_node *node;
2498 spin_lock(&root->fs_info->block_group_cache_lock);
2499 node = rb_next(&cache->cache_node);
2500 btrfs_put_block_group(cache);
2501 if (node) {
2502 cache = rb_entry(node, struct btrfs_block_group_cache,
2503 cache_node);
2504 atomic_inc(&cache->count);
2505 } else
2506 cache = NULL;
2507 spin_unlock(&root->fs_info->block_group_cache_lock);
2508 return cache;
2509}
2510
2391int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2511int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2392 struct btrfs_root *root) 2512 struct btrfs_root *root)
2393{ 2513{
2394 struct btrfs_block_group_cache *cache, *entry; 2514 struct btrfs_block_group_cache *cache;
2395 struct rb_node *n;
2396 int err = 0; 2515 int err = 0;
2397 int werr = 0;
2398 struct btrfs_path *path; 2516 struct btrfs_path *path;
2399 u64 last = 0; 2517 u64 last = 0;
2400 2518
@@ -2403,39 +2521,35 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2403 return -ENOMEM; 2521 return -ENOMEM;
2404 2522
2405 while (1) { 2523 while (1) {
2406 cache = NULL; 2524 if (last == 0) {
2407 spin_lock(&root->fs_info->block_group_cache_lock); 2525 err = btrfs_run_delayed_refs(trans, root,
2408 for (n = rb_first(&root->fs_info->block_group_cache_tree); 2526 (unsigned long)-1);
2409 n; n = rb_next(n)) { 2527 BUG_ON(err);
2410 entry = rb_entry(n, struct btrfs_block_group_cache,
2411 cache_node);
2412 if (entry->dirty) {
2413 cache = entry;
2414 break;
2415 }
2416 } 2528 }
2417 spin_unlock(&root->fs_info->block_group_cache_lock);
2418 2529
2419 if (!cache) 2530 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2420 break; 2531 while (cache) {
2532 if (cache->dirty)
2533 break;
2534 cache = next_block_group(root, cache);
2535 }
2536 if (!cache) {
2537 if (last == 0)
2538 break;
2539 last = 0;
2540 continue;
2541 }
2421 2542
2422 cache->dirty = 0; 2543 cache->dirty = 0;
2423 last += cache->key.offset; 2544 last = cache->key.objectid + cache->key.offset;
2424 2545
2425 err = write_one_cache_group(trans, root, 2546 err = write_one_cache_group(trans, root, path, cache);
2426 path, cache); 2547 BUG_ON(err);
2427 /* 2548 btrfs_put_block_group(cache);
2428 * if we fail to write the cache group, we want
2429 * to keep it marked dirty in hopes that a later
2430 * write will work
2431 */
2432 if (err) {
2433 werr = err;
2434 continue;
2435 }
2436 } 2549 }
2550
2437 btrfs_free_path(path); 2551 btrfs_free_path(path);
2438 return werr; 2552 return 0;
2439} 2553}
2440 2554
2441int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 2555int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
@@ -2485,6 +2599,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2485 found->force_alloc = 0; 2599 found->force_alloc = 0;
2486 *space_info = found; 2600 *space_info = found;
2487 list_add_rcu(&found->list, &info->space_info); 2601 list_add_rcu(&found->list, &info->space_info);
2602 atomic_set(&found->caching_threads, 0);
2488 return 0; 2603 return 0;
2489} 2604}
2490 2605
@@ -2697,7 +2812,7 @@ again:
2697 2812
2698 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" 2813 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
2699 ", %llu bytes_used, %llu bytes_reserved, " 2814 ", %llu bytes_used, %llu bytes_reserved, "
2700 "%llu bytes_pinned, %llu bytes_readonly, %llu may use" 2815 "%llu bytes_pinned, %llu bytes_readonly, %llu may use "
2701 "%llu total\n", (unsigned long long)bytes, 2816 "%llu total\n", (unsigned long long)bytes,
2702 (unsigned long long)data_sinfo->bytes_delalloc, 2817 (unsigned long long)data_sinfo->bytes_delalloc,
2703 (unsigned long long)data_sinfo->bytes_used, 2818 (unsigned long long)data_sinfo->bytes_used,
@@ -2948,13 +3063,9 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
2948 struct btrfs_block_group_cache *cache; 3063 struct btrfs_block_group_cache *cache;
2949 struct btrfs_fs_info *fs_info = root->fs_info; 3064 struct btrfs_fs_info *fs_info = root->fs_info;
2950 3065
2951 if (pin) { 3066 if (pin)
2952 set_extent_dirty(&fs_info->pinned_extents, 3067 set_extent_dirty(&fs_info->pinned_extents,
2953 bytenr, bytenr + num - 1, GFP_NOFS); 3068 bytenr, bytenr + num - 1, GFP_NOFS);
2954 } else {
2955 clear_extent_dirty(&fs_info->pinned_extents,
2956 bytenr, bytenr + num - 1, GFP_NOFS);
2957 }
2958 3069
2959 while (num > 0) { 3070 while (num > 0) {
2960 cache = btrfs_lookup_block_group(fs_info, bytenr); 3071 cache = btrfs_lookup_block_group(fs_info, bytenr);
@@ -2970,14 +3081,34 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
2970 spin_unlock(&cache->space_info->lock); 3081 spin_unlock(&cache->space_info->lock);
2971 fs_info->total_pinned += len; 3082 fs_info->total_pinned += len;
2972 } else { 3083 } else {
3084 int unpin = 0;
3085
3086 /*
3087 * in order to not race with the block group caching, we
3088 * only want to unpin the extent if we are cached. If
3089 * we aren't cached, we want to start async caching this
3090 * block group so we can free the extent the next time
3091 * around.
3092 */
2973 spin_lock(&cache->space_info->lock); 3093 spin_lock(&cache->space_info->lock);
2974 spin_lock(&cache->lock); 3094 spin_lock(&cache->lock);
2975 cache->pinned -= len; 3095 unpin = (cache->cached == BTRFS_CACHE_FINISHED);
2976 cache->space_info->bytes_pinned -= len; 3096 if (likely(unpin)) {
3097 cache->pinned -= len;
3098 cache->space_info->bytes_pinned -= len;
3099 fs_info->total_pinned -= len;
3100 }
2977 spin_unlock(&cache->lock); 3101 spin_unlock(&cache->lock);
2978 spin_unlock(&cache->space_info->lock); 3102 spin_unlock(&cache->space_info->lock);
2979 fs_info->total_pinned -= len; 3103
2980 if (cache->cached) 3104 if (likely(unpin))
3105 clear_extent_dirty(&fs_info->pinned_extents,
3106 bytenr, bytenr + len -1,
3107 GFP_NOFS);
3108 else
3109 cache_block_group(cache);
3110
3111 if (unpin)
2981 btrfs_add_free_space(cache, bytenr, len); 3112 btrfs_add_free_space(cache, bytenr, len);
2982 } 3113 }
2983 btrfs_put_block_group(cache); 3114 btrfs_put_block_group(cache);
@@ -3031,6 +3162,7 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
3031 &start, &end, EXTENT_DIRTY); 3162 &start, &end, EXTENT_DIRTY);
3032 if (ret) 3163 if (ret)
3033 break; 3164 break;
3165
3034 set_extent_dirty(copy, start, end, GFP_NOFS); 3166 set_extent_dirty(copy, start, end, GFP_NOFS);
3035 last = end + 1; 3167 last = end + 1;
3036 } 3168 }
@@ -3059,6 +3191,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
3059 3191
3060 cond_resched(); 3192 cond_resched();
3061 } 3193 }
3194
3062 return ret; 3195 return ret;
3063} 3196}
3064 3197
@@ -3437,6 +3570,45 @@ static u64 stripe_align(struct btrfs_root *root, u64 val)
3437} 3570}
3438 3571
3439/* 3572/*
3573 * when we wait for progress in the block group caching, its because
3574 * our allocation attempt failed at least once. So, we must sleep
3575 * and let some progress happen before we try again.
3576 *
3577 * This function will sleep at least once waiting for new free space to
3578 * show up, and then it will check the block group free space numbers
3579 * for our min num_bytes. Another option is to have it go ahead
3580 * and look in the rbtree for a free extent of a given size, but this
3581 * is a good start.
3582 */
3583static noinline int
3584wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
3585 u64 num_bytes)
3586{
3587 DEFINE_WAIT(wait);
3588
3589 prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE);
3590
3591 if (block_group_cache_done(cache)) {
3592 finish_wait(&cache->caching_q, &wait);
3593 return 0;
3594 }
3595 schedule();
3596 finish_wait(&cache->caching_q, &wait);
3597
3598 wait_event(cache->caching_q, block_group_cache_done(cache) ||
3599 (cache->free_space >= num_bytes));
3600 return 0;
3601}
3602
3603enum btrfs_loop_type {
3604 LOOP_CACHED_ONLY = 0,
3605 LOOP_CACHING_NOWAIT = 1,
3606 LOOP_CACHING_WAIT = 2,
3607 LOOP_ALLOC_CHUNK = 3,
3608 LOOP_NO_EMPTY_SIZE = 4,
3609};
3610
3611/*
3440 * walks the btree of allocated extents and find a hole of a given size. 3612 * walks the btree of allocated extents and find a hole of a given size.
3441 * The key ins is changed to record the hole: 3613 * The key ins is changed to record the hole:
3442 * ins->objectid == block start 3614 * ins->objectid == block start
@@ -3461,6 +3633,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
3461 struct btrfs_space_info *space_info; 3633 struct btrfs_space_info *space_info;
3462 int last_ptr_loop = 0; 3634 int last_ptr_loop = 0;
3463 int loop = 0; 3635 int loop = 0;
3636 bool found_uncached_bg = false;
3464 3637
3465 WARN_ON(num_bytes < root->sectorsize); 3638 WARN_ON(num_bytes < root->sectorsize);
3466 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 3639 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -3492,15 +3665,18 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
3492 search_start = max(search_start, first_logical_byte(root, 0)); 3665 search_start = max(search_start, first_logical_byte(root, 0));
3493 search_start = max(search_start, hint_byte); 3666 search_start = max(search_start, hint_byte);
3494 3667
3495 if (!last_ptr) { 3668 if (!last_ptr)
3496 empty_cluster = 0; 3669 empty_cluster = 0;
3497 loop = 1;
3498 }
3499 3670
3500 if (search_start == hint_byte) { 3671 if (search_start == hint_byte) {
3501 block_group = btrfs_lookup_block_group(root->fs_info, 3672 block_group = btrfs_lookup_block_group(root->fs_info,
3502 search_start); 3673 search_start);
3503 if (block_group && block_group_bits(block_group, data)) { 3674 /*
3675 * we don't want to use the block group if it doesn't match our
3676 * allocation bits, or if its not cached.
3677 */
3678 if (block_group && block_group_bits(block_group, data) &&
3679 block_group_cache_done(block_group)) {
3504 down_read(&space_info->groups_sem); 3680 down_read(&space_info->groups_sem);
3505 if (list_empty(&block_group->list) || 3681 if (list_empty(&block_group->list) ||
3506 block_group->ro) { 3682 block_group->ro) {
@@ -3523,21 +3699,35 @@ search:
3523 down_read(&space_info->groups_sem); 3699 down_read(&space_info->groups_sem);
3524 list_for_each_entry(block_group, &space_info->block_groups, list) { 3700 list_for_each_entry(block_group, &space_info->block_groups, list) {
3525 u64 offset; 3701 u64 offset;
3702 int cached;
3526 3703
3527 atomic_inc(&block_group->count); 3704 atomic_inc(&block_group->count);
3528 search_start = block_group->key.objectid; 3705 search_start = block_group->key.objectid;
3529 3706
3530have_block_group: 3707have_block_group:
3531 if (unlikely(!block_group->cached)) { 3708 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
3532 mutex_lock(&block_group->cache_mutex); 3709 /*
3533 ret = cache_block_group(root, block_group); 3710 * we want to start caching kthreads, but not too many
3534 mutex_unlock(&block_group->cache_mutex); 3711 * right off the bat so we don't overwhelm the system,
3535 if (ret) { 3712 * so only start them if there are less than 2 and we're
3536 btrfs_put_block_group(block_group); 3713 * in the initial allocation phase.
3537 break; 3714 */
3715 if (loop > LOOP_CACHING_NOWAIT ||
3716 atomic_read(&space_info->caching_threads) < 2) {
3717 ret = cache_block_group(block_group);
3718 BUG_ON(ret);
3538 } 3719 }
3539 } 3720 }
3540 3721
3722 cached = block_group_cache_done(block_group);
3723 if (unlikely(!cached)) {
3724 found_uncached_bg = true;
3725
3726 /* if we only want cached bgs, loop */
3727 if (loop == LOOP_CACHED_ONLY)
3728 goto loop;
3729 }
3730
3541 if (unlikely(block_group->ro)) 3731 if (unlikely(block_group->ro))
3542 goto loop; 3732 goto loop;
3543 3733
@@ -3616,14 +3806,21 @@ refill_cluster:
3616 spin_unlock(&last_ptr->refill_lock); 3806 spin_unlock(&last_ptr->refill_lock);
3617 goto checks; 3807 goto checks;
3618 } 3808 }
3809 } else if (!cached && loop > LOOP_CACHING_NOWAIT) {
3810 spin_unlock(&last_ptr->refill_lock);
3811
3812 wait_block_group_cache_progress(block_group,
3813 num_bytes + empty_cluster + empty_size);
3814 goto have_block_group;
3619 } 3815 }
3816
3620 /* 3817 /*
3621 * at this point we either didn't find a cluster 3818 * at this point we either didn't find a cluster
3622 * or we weren't able to allocate a block from our 3819 * or we weren't able to allocate a block from our
3623 * cluster. Free the cluster we've been trying 3820 * cluster. Free the cluster we've been trying
3624 * to use, and go to the next block group 3821 * to use, and go to the next block group
3625 */ 3822 */
3626 if (loop < 2) { 3823 if (loop < LOOP_NO_EMPTY_SIZE) {
3627 btrfs_return_cluster_to_free_space(NULL, 3824 btrfs_return_cluster_to_free_space(NULL,
3628 last_ptr); 3825 last_ptr);
3629 spin_unlock(&last_ptr->refill_lock); 3826 spin_unlock(&last_ptr->refill_lock);
@@ -3634,11 +3831,17 @@ refill_cluster:
3634 3831
3635 offset = btrfs_find_space_for_alloc(block_group, search_start, 3832 offset = btrfs_find_space_for_alloc(block_group, search_start,
3636 num_bytes, empty_size); 3833 num_bytes, empty_size);
3637 if (!offset) 3834 if (!offset && (cached || (!cached &&
3835 loop == LOOP_CACHING_NOWAIT))) {
3638 goto loop; 3836 goto loop;
3837 } else if (!offset && (!cached &&
3838 loop > LOOP_CACHING_NOWAIT)) {
3839 wait_block_group_cache_progress(block_group,
3840 num_bytes + empty_size);
3841 goto have_block_group;
3842 }
3639checks: 3843checks:
3640 search_start = stripe_align(root, offset); 3844 search_start = stripe_align(root, offset);
3641
3642 /* move on to the next group */ 3845 /* move on to the next group */
3643 if (search_start + num_bytes >= search_end) { 3846 if (search_start + num_bytes >= search_end) {
3644 btrfs_add_free_space(block_group, offset, num_bytes); 3847 btrfs_add_free_space(block_group, offset, num_bytes);
@@ -3684,13 +3887,26 @@ loop:
3684 } 3887 }
3685 up_read(&space_info->groups_sem); 3888 up_read(&space_info->groups_sem);
3686 3889
3687 /* loop == 0, try to find a clustered alloc in every block group 3890 /* LOOP_CACHED_ONLY, only search fully cached block groups
3688 * loop == 1, try again after forcing a chunk allocation 3891 * LOOP_CACHING_NOWAIT, search partially cached block groups, but
3689 * loop == 2, set empty_size and empty_cluster to 0 and try again 3892 * dont wait foR them to finish caching
3893 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
3894 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
3895 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
3896 * again
3690 */ 3897 */
3691 if (!ins->objectid && loop < 3 && 3898 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
3692 (empty_size || empty_cluster || allowed_chunk_alloc)) { 3899 (found_uncached_bg || empty_size || empty_cluster ||
3693 if (loop >= 2) { 3900 allowed_chunk_alloc)) {
3901 if (found_uncached_bg) {
3902 found_uncached_bg = false;
3903 if (loop < LOOP_CACHING_WAIT) {
3904 loop++;
3905 goto search;
3906 }
3907 }
3908
3909 if (loop == LOOP_ALLOC_CHUNK) {
3694 empty_size = 0; 3910 empty_size = 0;
3695 empty_cluster = 0; 3911 empty_cluster = 0;
3696 } 3912 }
@@ -3703,7 +3919,7 @@ loop:
3703 space_info->force_alloc = 1; 3919 space_info->force_alloc = 1;
3704 } 3920 }
3705 3921
3706 if (loop < 3) { 3922 if (loop < LOOP_NO_EMPTY_SIZE) {
3707 loop++; 3923 loop++;
3708 goto search; 3924 goto search;
3709 } 3925 }
@@ -3799,7 +4015,7 @@ again:
3799 num_bytes, data, 1); 4015 num_bytes, data, 1);
3800 goto again; 4016 goto again;
3801 } 4017 }
3802 if (ret) { 4018 if (ret == -ENOSPC) {
3803 struct btrfs_space_info *sinfo; 4019 struct btrfs_space_info *sinfo;
3804 4020
3805 sinfo = __find_space_info(root->fs_info, data); 4021 sinfo = __find_space_info(root->fs_info, data);
@@ -3807,7 +4023,6 @@ again:
3807 "wanted %llu\n", (unsigned long long)data, 4023 "wanted %llu\n", (unsigned long long)data,
3808 (unsigned long long)num_bytes); 4024 (unsigned long long)num_bytes);
3809 dump_space_info(sinfo, num_bytes); 4025 dump_space_info(sinfo, num_bytes);
3810 BUG();
3811 } 4026 }
3812 4027
3813 return ret; 4028 return ret;
@@ -3845,7 +4060,9 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
3845 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, 4060 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
3846 empty_size, hint_byte, search_end, ins, 4061 empty_size, hint_byte, search_end, ins,
3847 data); 4062 data);
3848 update_reserved_extents(root, ins->objectid, ins->offset, 1); 4063 if (!ret)
4064 update_reserved_extents(root, ins->objectid, ins->offset, 1);
4065
3849 return ret; 4066 return ret;
3850} 4067}
3851 4068
@@ -4007,9 +4224,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
4007 struct btrfs_block_group_cache *block_group; 4224 struct btrfs_block_group_cache *block_group;
4008 4225
4009 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 4226 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
4010 mutex_lock(&block_group->cache_mutex); 4227 cache_block_group(block_group);
4011 cache_block_group(root, block_group); 4228 wait_event(block_group->caching_q,
4012 mutex_unlock(&block_group->cache_mutex); 4229 block_group_cache_done(block_group));
4013 4230
4014 ret = btrfs_remove_free_space(block_group, ins->objectid, 4231 ret = btrfs_remove_free_space(block_group, ins->objectid,
4015 ins->offset); 4232 ins->offset);
@@ -4040,7 +4257,8 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
4040 ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, 4257 ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
4041 empty_size, hint_byte, search_end, 4258 empty_size, hint_byte, search_end,
4042 ins, 0); 4259 ins, 0);
4043 BUG_ON(ret); 4260 if (ret)
4261 return ret;
4044 4262
4045 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 4263 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
4046 if (parent == 0) 4264 if (parent == 0)
@@ -4128,6 +4346,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
4128 return buf; 4346 return buf;
4129} 4347}
4130 4348
4349#if 0
4131int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 4350int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
4132 struct btrfs_root *root, struct extent_buffer *leaf) 4351 struct btrfs_root *root, struct extent_buffer *leaf)
4133{ 4352{
@@ -4171,8 +4390,6 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
4171 return 0; 4390 return 0;
4172} 4391}
4173 4392
4174#if 0
4175
4176static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, 4393static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
4177 struct btrfs_root *root, 4394 struct btrfs_root *root,
4178 struct btrfs_leaf_ref *ref) 4395 struct btrfs_leaf_ref *ref)
@@ -4553,262 +4770,471 @@ out:
4553} 4770}
4554#endif 4771#endif
4555 4772
4773struct walk_control {
4774 u64 refs[BTRFS_MAX_LEVEL];
4775 u64 flags[BTRFS_MAX_LEVEL];
4776 struct btrfs_key update_progress;
4777 int stage;
4778 int level;
4779 int shared_level;
4780 int update_ref;
4781 int keep_locks;
4782};
4783
4784#define DROP_REFERENCE 1
4785#define UPDATE_BACKREF 2
4786
4556/* 4787/*
4557 * helper function for drop_subtree, this function is similar to 4788 * hepler to process tree block while walking down the tree.
4558 * walk_down_tree. The main difference is that it checks reference 4789 *
4559 * counts while tree blocks are locked. 4790 * when wc->stage == DROP_REFERENCE, this function checks
4791 * reference count of the block. if the block is shared and
4792 * we need update back refs for the subtree rooted at the
4793 * block, this function changes wc->stage to UPDATE_BACKREF
4794 *
4795 * when wc->stage == UPDATE_BACKREF, this function updates
4796 * back refs for pointers in the block.
4797 *
4798 * NOTE: return value 1 means we should stop walking down.
4560 */ 4799 */
4561static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 4800static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
4562 struct btrfs_root *root, 4801 struct btrfs_root *root,
4563 struct btrfs_path *path, int *level) 4802 struct btrfs_path *path,
4803 struct walk_control *wc)
4564{ 4804{
4565 struct extent_buffer *next; 4805 int level = wc->level;
4566 struct extent_buffer *cur; 4806 struct extent_buffer *eb = path->nodes[level];
4567 struct extent_buffer *parent; 4807 struct btrfs_key key;
4568 u64 bytenr; 4808 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
4569 u64 ptr_gen;
4570 u64 refs;
4571 u64 flags;
4572 u32 blocksize;
4573 int ret; 4809 int ret;
4574 4810
4575 cur = path->nodes[*level]; 4811 if (wc->stage == UPDATE_BACKREF &&
4576 ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len, 4812 btrfs_header_owner(eb) != root->root_key.objectid)
4577 &refs, &flags); 4813 return 1;
4578 BUG_ON(ret);
4579 if (refs > 1)
4580 goto out;
4581 4814
4582 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 4815 /*
4816 * when reference count of tree block is 1, it won't increase
4817 * again. once full backref flag is set, we never clear it.
4818 */
4819 if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
4820 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) {
4821 BUG_ON(!path->locks[level]);
4822 ret = btrfs_lookup_extent_info(trans, root,
4823 eb->start, eb->len,
4824 &wc->refs[level],
4825 &wc->flags[level]);
4826 BUG_ON(ret);
4827 BUG_ON(wc->refs[level] == 0);
4828 }
4583 4829
4584 while (*level >= 0) { 4830 if (wc->stage == DROP_REFERENCE &&
4585 cur = path->nodes[*level]; 4831 wc->update_ref && wc->refs[level] > 1) {
4586 if (*level == 0) { 4832 BUG_ON(eb == root->node);
4587 ret = btrfs_drop_leaf_ref(trans, root, cur); 4833 BUG_ON(path->slots[level] > 0);
4588 BUG_ON(ret); 4834 if (level == 0)
4589 clean_tree_block(trans, root, cur); 4835 btrfs_item_key_to_cpu(eb, &key, path->slots[level]);
4590 break; 4836 else
4591 } 4837 btrfs_node_key_to_cpu(eb, &key, path->slots[level]);
4592 if (path->slots[*level] >= btrfs_header_nritems(cur)) { 4838 if (btrfs_header_owner(eb) == root->root_key.objectid &&
4593 clean_tree_block(trans, root, cur); 4839 btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) {
4594 break; 4840 wc->stage = UPDATE_BACKREF;
4841 wc->shared_level = level;
4595 } 4842 }
4843 }
4596 4844
4597 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 4845 if (wc->stage == DROP_REFERENCE) {
4598 blocksize = btrfs_level_size(root, *level - 1); 4846 if (wc->refs[level] > 1)
4599 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 4847 return 1;
4600 4848
4601 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 4849 if (path->locks[level] && !wc->keep_locks) {
4602 btrfs_tree_lock(next); 4850 btrfs_tree_unlock(eb);
4603 btrfs_set_lock_blocking(next); 4851 path->locks[level] = 0;
4852 }
4853 return 0;
4854 }
4604 4855
4605 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 4856 /* wc->stage == UPDATE_BACKREF */
4606 &refs, &flags); 4857 if (!(wc->flags[level] & flag)) {
4858 BUG_ON(!path->locks[level]);
4859 ret = btrfs_inc_ref(trans, root, eb, 1);
4607 BUG_ON(ret); 4860 BUG_ON(ret);
4608 if (refs > 1) { 4861 ret = btrfs_dec_ref(trans, root, eb, 0);
4609 parent = path->nodes[*level]; 4862 BUG_ON(ret);
4610 ret = btrfs_free_extent(trans, root, bytenr, 4863 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
4611 blocksize, parent->start, 4864 eb->len, flag, 0);
4612 btrfs_header_owner(parent), 4865 BUG_ON(ret);
4613 *level - 1, 0); 4866 wc->flags[level] |= flag;
4867 }
4868
4869 /*
4870 * the block is shared by multiple trees, so it's not good to
4871 * keep the tree lock
4872 */
4873 if (path->locks[level] && level > 0) {
4874 btrfs_tree_unlock(eb);
4875 path->locks[level] = 0;
4876 }
4877 return 0;
4878}
4879
4880/*
4881 * hepler to process tree block while walking up the tree.
4882 *
4883 * when wc->stage == DROP_REFERENCE, this function drops
4884 * reference count on the block.
4885 *
4886 * when wc->stage == UPDATE_BACKREF, this function changes
4887 * wc->stage back to DROP_REFERENCE if we changed wc->stage
4888 * to UPDATE_BACKREF previously while processing the block.
4889 *
4890 * NOTE: return value 1 means we should stop walking up.
4891 */
4892static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
4893 struct btrfs_root *root,
4894 struct btrfs_path *path,
4895 struct walk_control *wc)
4896{
4897 int ret = 0;
4898 int level = wc->level;
4899 struct extent_buffer *eb = path->nodes[level];
4900 u64 parent = 0;
4901
4902 if (wc->stage == UPDATE_BACKREF) {
4903 BUG_ON(wc->shared_level < level);
4904 if (level < wc->shared_level)
4905 goto out;
4906
4907 BUG_ON(wc->refs[level] <= 1);
4908 ret = find_next_key(path, level + 1, &wc->update_progress);
4909 if (ret > 0)
4910 wc->update_ref = 0;
4911
4912 wc->stage = DROP_REFERENCE;
4913 wc->shared_level = -1;
4914 path->slots[level] = 0;
4915
4916 /*
4917 * check reference count again if the block isn't locked.
4918 * we should start walking down the tree again if reference
4919 * count is one.
4920 */
4921 if (!path->locks[level]) {
4922 BUG_ON(level == 0);
4923 btrfs_tree_lock(eb);
4924 btrfs_set_lock_blocking(eb);
4925 path->locks[level] = 1;
4926
4927 ret = btrfs_lookup_extent_info(trans, root,
4928 eb->start, eb->len,
4929 &wc->refs[level],
4930 &wc->flags[level]);
4614 BUG_ON(ret); 4931 BUG_ON(ret);
4615 path->slots[*level]++; 4932 BUG_ON(wc->refs[level] == 0);
4616 btrfs_tree_unlock(next); 4933 if (wc->refs[level] == 1) {
4617 free_extent_buffer(next); 4934 btrfs_tree_unlock(eb);
4618 continue; 4935 path->locks[level] = 0;
4936 return 1;
4937 }
4938 } else {
4939 BUG_ON(level != 0);
4619 } 4940 }
4941 }
4620 4942
4621 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 4943 /* wc->stage == DROP_REFERENCE */
4944 BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
4622 4945
4623 *level = btrfs_header_level(next); 4946 if (wc->refs[level] == 1) {
4624 path->nodes[*level] = next; 4947 if (level == 0) {
4625 path->slots[*level] = 0; 4948 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4626 path->locks[*level] = 1; 4949 ret = btrfs_dec_ref(trans, root, eb, 1);
4627 cond_resched(); 4950 else
4951 ret = btrfs_dec_ref(trans, root, eb, 0);
4952 BUG_ON(ret);
4953 }
4954 /* make block locked assertion in clean_tree_block happy */
4955 if (!path->locks[level] &&
4956 btrfs_header_generation(eb) == trans->transid) {
4957 btrfs_tree_lock(eb);
4958 btrfs_set_lock_blocking(eb);
4959 path->locks[level] = 1;
4960 }
4961 clean_tree_block(trans, root, eb);
4628 } 4962 }
4629out:
4630 if (path->nodes[*level] == root->node)
4631 parent = path->nodes[*level];
4632 else
4633 parent = path->nodes[*level + 1];
4634 bytenr = path->nodes[*level]->start;
4635 blocksize = path->nodes[*level]->len;
4636 4963
4637 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, 4964 if (eb == root->node) {
4638 btrfs_header_owner(parent), *level, 0); 4965 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4966 parent = eb->start;
4967 else
4968 BUG_ON(root->root_key.objectid !=
4969 btrfs_header_owner(eb));
4970 } else {
4971 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
4972 parent = path->nodes[level + 1]->start;
4973 else
4974 BUG_ON(root->root_key.objectid !=
4975 btrfs_header_owner(path->nodes[level + 1]));
4976 }
4977
4978 ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent,
4979 root->root_key.objectid, level, 0);
4639 BUG_ON(ret); 4980 BUG_ON(ret);
4981out:
4982 wc->refs[level] = 0;
4983 wc->flags[level] = 0;
4984 return ret;
4985}
4986
4987static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
4988 struct btrfs_root *root,
4989 struct btrfs_path *path,
4990 struct walk_control *wc)
4991{
4992 struct extent_buffer *next;
4993 struct extent_buffer *cur;
4994 u64 bytenr;
4995 u64 ptr_gen;
4996 u32 blocksize;
4997 int level = wc->level;
4998 int ret;
4999
5000 while (level >= 0) {
5001 cur = path->nodes[level];
5002 BUG_ON(path->slots[level] >= btrfs_header_nritems(cur));
5003
5004 ret = walk_down_proc(trans, root, path, wc);
5005 if (ret > 0)
5006 break;
5007
5008 if (level == 0)
5009 break;
5010
5011 bytenr = btrfs_node_blockptr(cur, path->slots[level]);
5012 blocksize = btrfs_level_size(root, level - 1);
5013 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]);
5014
5015 next = read_tree_block(root, bytenr, blocksize, ptr_gen);
5016 btrfs_tree_lock(next);
5017 btrfs_set_lock_blocking(next);
4640 5018
4641 if (path->locks[*level]) { 5019 level--;
4642 btrfs_tree_unlock(path->nodes[*level]); 5020 BUG_ON(level != btrfs_header_level(next));
4643 path->locks[*level] = 0; 5021 path->nodes[level] = next;
5022 path->slots[level] = 0;
5023 path->locks[level] = 1;
5024 wc->level = level;
4644 } 5025 }
4645 free_extent_buffer(path->nodes[*level]);
4646 path->nodes[*level] = NULL;
4647 *level += 1;
4648 cond_resched();
4649 return 0; 5026 return 0;
4650} 5027}
4651 5028
4652/*
4653 * helper for dropping snapshots. This walks back up the tree in the path
4654 * to find the first node higher up where we haven't yet gone through
4655 * all the slots
4656 */
4657static noinline int walk_up_tree(struct btrfs_trans_handle *trans, 5029static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
4658 struct btrfs_root *root, 5030 struct btrfs_root *root,
4659 struct btrfs_path *path, 5031 struct btrfs_path *path,
4660 int *level, int max_level) 5032 struct walk_control *wc, int max_level)
4661{ 5033{
4662 struct btrfs_root_item *root_item = &root->root_item; 5034 int level = wc->level;
4663 int i;
4664 int slot;
4665 int ret; 5035 int ret;
4666 5036
4667 for (i = *level; i < max_level && path->nodes[i]; i++) { 5037 path->slots[level] = btrfs_header_nritems(path->nodes[level]);
4668 slot = path->slots[i]; 5038 while (level < max_level && path->nodes[level]) {
4669 if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { 5039 wc->level = level;
4670 /* 5040 if (path->slots[level] + 1 <
4671 * there is more work to do in this level. 5041 btrfs_header_nritems(path->nodes[level])) {
4672 * Update the drop_progress marker to reflect 5042 path->slots[level]++;
4673 * the work we've done so far, and then bump
4674 * the slot number
4675 */
4676 path->slots[i]++;
4677 WARN_ON(*level == 0);
4678 if (max_level == BTRFS_MAX_LEVEL) {
4679 btrfs_node_key(path->nodes[i],
4680 &root_item->drop_progress,
4681 path->slots[i]);
4682 root_item->drop_level = i;
4683 }
4684 *level = i;
4685 return 0; 5043 return 0;
4686 } else { 5044 } else {
4687 struct extent_buffer *parent; 5045 ret = walk_up_proc(trans, root, path, wc);
4688 5046 if (ret > 0)
4689 /* 5047 return 0;
4690 * this whole node is done, free our reference
4691 * on it and go up one level
4692 */
4693 if (path->nodes[*level] == root->node)
4694 parent = path->nodes[*level];
4695 else
4696 parent = path->nodes[*level + 1];
4697 5048
4698 clean_tree_block(trans, root, path->nodes[i]); 5049 if (path->locks[level]) {
4699 ret = btrfs_free_extent(trans, root, 5050 btrfs_tree_unlock(path->nodes[level]);
4700 path->nodes[i]->start, 5051 path->locks[level] = 0;
4701 path->nodes[i]->len,
4702 parent->start,
4703 btrfs_header_owner(parent),
4704 *level, 0);
4705 BUG_ON(ret);
4706 if (path->locks[*level]) {
4707 btrfs_tree_unlock(path->nodes[i]);
4708 path->locks[i] = 0;
4709 } 5052 }
4710 free_extent_buffer(path->nodes[i]); 5053 free_extent_buffer(path->nodes[level]);
4711 path->nodes[i] = NULL; 5054 path->nodes[level] = NULL;
4712 *level = i + 1; 5055 level++;
4713 } 5056 }
4714 } 5057 }
4715 return 1; 5058 return 1;
4716} 5059}
4717 5060
4718/* 5061/*
4719 * drop the reference count on the tree rooted at 'snap'. This traverses 5062 * drop a subvolume tree.
4720 * the tree freeing any blocks that have a ref count of zero after being 5063 *
4721 * decremented. 5064 * this function traverses the tree freeing any blocks that only
5065 * referenced by the tree.
5066 *
5067 * when a shared tree block is found. this function decreases its
5068 * reference count by one. if update_ref is true, this function
5069 * also make sure backrefs for the shared block and all lower level
5070 * blocks are properly updated.
4722 */ 5071 */
4723int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root 5072int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
4724 *root)
4725{ 5073{
4726 int ret = 0;
4727 int wret;
4728 int level;
4729 struct btrfs_path *path; 5074 struct btrfs_path *path;
4730 int update_count; 5075 struct btrfs_trans_handle *trans;
5076 struct btrfs_root *tree_root = root->fs_info->tree_root;
4731 struct btrfs_root_item *root_item = &root->root_item; 5077 struct btrfs_root_item *root_item = &root->root_item;
5078 struct walk_control *wc;
5079 struct btrfs_key key;
5080 int err = 0;
5081 int ret;
5082 int level;
4732 5083
4733 path = btrfs_alloc_path(); 5084 path = btrfs_alloc_path();
4734 BUG_ON(!path); 5085 BUG_ON(!path);
4735 5086
4736 level = btrfs_header_level(root->node); 5087 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5088 BUG_ON(!wc);
5089
5090 trans = btrfs_start_transaction(tree_root, 1);
5091
4737 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 5092 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
5093 level = btrfs_header_level(root->node);
4738 path->nodes[level] = btrfs_lock_root_node(root); 5094 path->nodes[level] = btrfs_lock_root_node(root);
4739 btrfs_set_lock_blocking(path->nodes[level]); 5095 btrfs_set_lock_blocking(path->nodes[level]);
4740 path->slots[level] = 0; 5096 path->slots[level] = 0;
4741 path->locks[level] = 1; 5097 path->locks[level] = 1;
5098 memset(&wc->update_progress, 0,
5099 sizeof(wc->update_progress));
4742 } else { 5100 } else {
4743 struct btrfs_key key;
4744 struct btrfs_disk_key found_key;
4745 struct extent_buffer *node;
4746
4747 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 5101 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
5102 memcpy(&wc->update_progress, &key,
5103 sizeof(wc->update_progress));
5104
4748 level = root_item->drop_level; 5105 level = root_item->drop_level;
5106 BUG_ON(level == 0);
4749 path->lowest_level = level; 5107 path->lowest_level = level;
4750 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5108 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4751 if (wret < 0) { 5109 path->lowest_level = 0;
4752 ret = wret; 5110 if (ret < 0) {
5111 err = ret;
4753 goto out; 5112 goto out;
4754 } 5113 }
4755 node = path->nodes[level]; 5114 btrfs_node_key_to_cpu(path->nodes[level], &key,
4756 btrfs_node_key(node, &found_key, path->slots[level]); 5115 path->slots[level]);
4757 WARN_ON(memcmp(&found_key, &root_item->drop_progress, 5116 WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key)));
4758 sizeof(found_key))); 5117
4759 /* 5118 /*
4760 * unlock our path, this is safe because only this 5119 * unlock our path, this is safe because only this
4761 * function is allowed to delete this snapshot 5120 * function is allowed to delete this snapshot
4762 */ 5121 */
4763 btrfs_unlock_up_safe(path, 0); 5122 btrfs_unlock_up_safe(path, 0);
5123
5124 level = btrfs_header_level(root->node);
5125 while (1) {
5126 btrfs_tree_lock(path->nodes[level]);
5127 btrfs_set_lock_blocking(path->nodes[level]);
5128
5129 ret = btrfs_lookup_extent_info(trans, root,
5130 path->nodes[level]->start,
5131 path->nodes[level]->len,
5132 &wc->refs[level],
5133 &wc->flags[level]);
5134 BUG_ON(ret);
5135 BUG_ON(wc->refs[level] == 0);
5136
5137 if (level == root_item->drop_level)
5138 break;
5139
5140 btrfs_tree_unlock(path->nodes[level]);
5141 WARN_ON(wc->refs[level] != 1);
5142 level--;
5143 }
4764 } 5144 }
5145
5146 wc->level = level;
5147 wc->shared_level = -1;
5148 wc->stage = DROP_REFERENCE;
5149 wc->update_ref = update_ref;
5150 wc->keep_locks = 0;
5151
4765 while (1) { 5152 while (1) {
4766 unsigned long update; 5153 ret = walk_down_tree(trans, root, path, wc);
4767 wret = walk_down_tree(trans, root, path, &level); 5154 if (ret < 0) {
4768 if (wret > 0) 5155 err = ret;
4769 break; 5156 break;
4770 if (wret < 0) 5157 }
4771 ret = wret;
4772 5158
4773 wret = walk_up_tree(trans, root, path, &level, 5159 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
4774 BTRFS_MAX_LEVEL); 5160 if (ret < 0) {
4775 if (wret > 0) 5161 err = ret;
4776 break; 5162 break;
4777 if (wret < 0) 5163 }
4778 ret = wret; 5164
4779 if (trans->transaction->in_commit || 5165 if (ret > 0) {
4780 trans->transaction->delayed_refs.flushing) { 5166 BUG_ON(wc->stage != DROP_REFERENCE);
4781 ret = -EAGAIN;
4782 break; 5167 break;
4783 } 5168 }
4784 for (update_count = 0; update_count < 16; update_count++) { 5169
5170 if (wc->stage == DROP_REFERENCE) {
5171 level = wc->level;
5172 btrfs_node_key(path->nodes[level],
5173 &root_item->drop_progress,
5174 path->slots[level]);
5175 root_item->drop_level = level;
5176 }
5177
5178 BUG_ON(wc->level == 0);
5179 if (trans->transaction->in_commit ||
5180 trans->transaction->delayed_refs.flushing) {
5181 ret = btrfs_update_root(trans, tree_root,
5182 &root->root_key,
5183 root_item);
5184 BUG_ON(ret);
5185
5186 btrfs_end_transaction(trans, tree_root);
5187 trans = btrfs_start_transaction(tree_root, 1);
5188 } else {
5189 unsigned long update;
4785 update = trans->delayed_ref_updates; 5190 update = trans->delayed_ref_updates;
4786 trans->delayed_ref_updates = 0; 5191 trans->delayed_ref_updates = 0;
4787 if (update) 5192 if (update)
4788 btrfs_run_delayed_refs(trans, root, update); 5193 btrfs_run_delayed_refs(trans, tree_root,
4789 else 5194 update);
4790 break;
4791 } 5195 }
4792 } 5196 }
5197 btrfs_release_path(root, path);
5198 BUG_ON(err);
5199
5200 ret = btrfs_del_root(trans, tree_root, &root->root_key);
5201 BUG_ON(ret);
5202
5203 free_extent_buffer(root->node);
5204 free_extent_buffer(root->commit_root);
5205 kfree(root);
4793out: 5206out:
5207 btrfs_end_transaction(trans, tree_root);
5208 kfree(wc);
4794 btrfs_free_path(path); 5209 btrfs_free_path(path);
4795 return ret; 5210 return err;
4796} 5211}
4797 5212
5213/*
5214 * drop subtree rooted at tree block 'node'.
5215 *
5216 * NOTE: this function will unlock and release tree block 'node'
5217 */
4798int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 5218int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
4799 struct btrfs_root *root, 5219 struct btrfs_root *root,
4800 struct extent_buffer *node, 5220 struct extent_buffer *node,
4801 struct extent_buffer *parent) 5221 struct extent_buffer *parent)
4802{ 5222{
4803 struct btrfs_path *path; 5223 struct btrfs_path *path;
5224 struct walk_control *wc;
4804 int level; 5225 int level;
4805 int parent_level; 5226 int parent_level;
4806 int ret = 0; 5227 int ret = 0;
4807 int wret; 5228 int wret;
4808 5229
5230 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
5231
4809 path = btrfs_alloc_path(); 5232 path = btrfs_alloc_path();
4810 BUG_ON(!path); 5233 BUG_ON(!path);
4811 5234
5235 wc = kzalloc(sizeof(*wc), GFP_NOFS);
5236 BUG_ON(!wc);
5237
4812 btrfs_assert_tree_locked(parent); 5238 btrfs_assert_tree_locked(parent);
4813 parent_level = btrfs_header_level(parent); 5239 parent_level = btrfs_header_level(parent);
4814 extent_buffer_get(parent); 5240 extent_buffer_get(parent);
@@ -4817,24 +5243,33 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
4817 5243
4818 btrfs_assert_tree_locked(node); 5244 btrfs_assert_tree_locked(node);
4819 level = btrfs_header_level(node); 5245 level = btrfs_header_level(node);
4820 extent_buffer_get(node);
4821 path->nodes[level] = node; 5246 path->nodes[level] = node;
4822 path->slots[level] = 0; 5247 path->slots[level] = 0;
5248 path->locks[level] = 1;
5249
5250 wc->refs[parent_level] = 1;
5251 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5252 wc->level = level;
5253 wc->shared_level = -1;
5254 wc->stage = DROP_REFERENCE;
5255 wc->update_ref = 0;
5256 wc->keep_locks = 1;
4823 5257
4824 while (1) { 5258 while (1) {
4825 wret = walk_down_tree(trans, root, path, &level); 5259 wret = walk_down_tree(trans, root, path, wc);
4826 if (wret < 0) 5260 if (wret < 0) {
4827 ret = wret; 5261 ret = wret;
4828 if (wret != 0)
4829 break; 5262 break;
5263 }
4830 5264
4831 wret = walk_up_tree(trans, root, path, &level, parent_level); 5265 wret = walk_up_tree(trans, root, path, wc, parent_level);
4832 if (wret < 0) 5266 if (wret < 0)
4833 ret = wret; 5267 ret = wret;
4834 if (wret != 0) 5268 if (wret != 0)
4835 break; 5269 break;
4836 } 5270 }
4837 5271
5272 kfree(wc);
4838 btrfs_free_path(path); 5273 btrfs_free_path(path);
4839 return ret; 5274 return ret;
4840} 5275}
@@ -6739,11 +7174,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
6739 &info->block_group_cache_tree); 7174 &info->block_group_cache_tree);
6740 spin_unlock(&info->block_group_cache_lock); 7175 spin_unlock(&info->block_group_cache_lock);
6741 7176
6742 btrfs_remove_free_space_cache(block_group);
6743 down_write(&block_group->space_info->groups_sem); 7177 down_write(&block_group->space_info->groups_sem);
6744 list_del(&block_group->list); 7178 list_del(&block_group->list);
6745 up_write(&block_group->space_info->groups_sem); 7179 up_write(&block_group->space_info->groups_sem);
6746 7180
7181 if (block_group->cached == BTRFS_CACHE_STARTED)
7182 wait_event(block_group->caching_q,
7183 block_group_cache_done(block_group));
7184
7185 btrfs_remove_free_space_cache(block_group);
7186
6747 WARN_ON(atomic_read(&block_group->count) != 1); 7187 WARN_ON(atomic_read(&block_group->count) != 1);
6748 kfree(block_group); 7188 kfree(block_group);
6749 7189
@@ -6809,9 +7249,19 @@ int btrfs_read_block_groups(struct btrfs_root *root)
6809 atomic_set(&cache->count, 1); 7249 atomic_set(&cache->count, 1);
6810 spin_lock_init(&cache->lock); 7250 spin_lock_init(&cache->lock);
6811 spin_lock_init(&cache->tree_lock); 7251 spin_lock_init(&cache->tree_lock);
6812 mutex_init(&cache->cache_mutex); 7252 cache->fs_info = info;
7253 init_waitqueue_head(&cache->caching_q);
6813 INIT_LIST_HEAD(&cache->list); 7254 INIT_LIST_HEAD(&cache->list);
6814 INIT_LIST_HEAD(&cache->cluster_list); 7255 INIT_LIST_HEAD(&cache->cluster_list);
7256
7257 /*
7258 * we only want to have 32k of ram per block group for keeping
7259 * track of free space, and if we pass 1/2 of that we want to
7260 * start converting things over to using bitmaps
7261 */
7262 cache->extents_thresh = ((1024 * 32) / 2) /
7263 sizeof(struct btrfs_free_space);
7264
6815 read_extent_buffer(leaf, &cache->item, 7265 read_extent_buffer(leaf, &cache->item,
6816 btrfs_item_ptr_offset(leaf, path->slots[0]), 7266 btrfs_item_ptr_offset(leaf, path->slots[0]),
6817 sizeof(cache->item)); 7267 sizeof(cache->item));
@@ -6820,6 +7270,26 @@ int btrfs_read_block_groups(struct btrfs_root *root)
6820 key.objectid = found_key.objectid + found_key.offset; 7270 key.objectid = found_key.objectid + found_key.offset;
6821 btrfs_release_path(root, path); 7271 btrfs_release_path(root, path);
6822 cache->flags = btrfs_block_group_flags(&cache->item); 7272 cache->flags = btrfs_block_group_flags(&cache->item);
7273 cache->sectorsize = root->sectorsize;
7274
7275 remove_sb_from_cache(root, cache);
7276
7277 /*
7278 * check for two cases, either we are full, and therefore
7279 * don't need to bother with the caching work since we won't
7280 * find any space, or we are empty, and we can just add all
7281 * the space in and be done with it. This saves us _alot_ of
7282 * time, particularly in the full case.
7283 */
7284 if (found_key.offset == btrfs_block_group_used(&cache->item)) {
7285 cache->cached = BTRFS_CACHE_FINISHED;
7286 } else if (btrfs_block_group_used(&cache->item) == 0) {
7287 cache->cached = BTRFS_CACHE_FINISHED;
7288 add_new_free_space(cache, root->fs_info,
7289 found_key.objectid,
7290 found_key.objectid +
7291 found_key.offset);
7292 }
6823 7293
6824 ret = update_space_info(info, cache->flags, found_key.offset, 7294 ret = update_space_info(info, cache->flags, found_key.offset,
6825 btrfs_block_group_used(&cache->item), 7295 btrfs_block_group_used(&cache->item),
@@ -6863,10 +7333,19 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
6863 cache->key.objectid = chunk_offset; 7333 cache->key.objectid = chunk_offset;
6864 cache->key.offset = size; 7334 cache->key.offset = size;
6865 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 7335 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
7336 cache->sectorsize = root->sectorsize;
7337
7338 /*
7339 * we only want to have 32k of ram per block group for keeping track
7340 * of free space, and if we pass 1/2 of that we want to start
7341 * converting things over to using bitmaps
7342 */
7343 cache->extents_thresh = ((1024 * 32) / 2) /
7344 sizeof(struct btrfs_free_space);
6866 atomic_set(&cache->count, 1); 7345 atomic_set(&cache->count, 1);
6867 spin_lock_init(&cache->lock); 7346 spin_lock_init(&cache->lock);
6868 spin_lock_init(&cache->tree_lock); 7347 spin_lock_init(&cache->tree_lock);
6869 mutex_init(&cache->cache_mutex); 7348 init_waitqueue_head(&cache->caching_q);
6870 INIT_LIST_HEAD(&cache->list); 7349 INIT_LIST_HEAD(&cache->list);
6871 INIT_LIST_HEAD(&cache->cluster_list); 7350 INIT_LIST_HEAD(&cache->cluster_list);
6872 7351
@@ -6875,6 +7354,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
6875 cache->flags = type; 7354 cache->flags = type;
6876 btrfs_set_block_group_flags(&cache->item, type); 7355 btrfs_set_block_group_flags(&cache->item, type);
6877 7356
7357 cache->cached = BTRFS_CACHE_FINISHED;
7358 remove_sb_from_cache(root, cache);
7359
7360 add_new_free_space(cache, root->fs_info, chunk_offset,
7361 chunk_offset + size);
7362
6878 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7363 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
6879 &cache->space_info); 7364 &cache->space_info);
6880 BUG_ON(ret); 7365 BUG_ON(ret);
@@ -6933,7 +7418,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
6933 rb_erase(&block_group->cache_node, 7418 rb_erase(&block_group->cache_node,
6934 &root->fs_info->block_group_cache_tree); 7419 &root->fs_info->block_group_cache_tree);
6935 spin_unlock(&root->fs_info->block_group_cache_lock); 7420 spin_unlock(&root->fs_info->block_group_cache_lock);
6936 btrfs_remove_free_space_cache(block_group); 7421
6937 down_write(&block_group->space_info->groups_sem); 7422 down_write(&block_group->space_info->groups_sem);
6938 /* 7423 /*
6939 * we must use list_del_init so people can check to see if they 7424 * we must use list_del_init so people can check to see if they
@@ -6942,11 +7427,18 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
6942 list_del_init(&block_group->list); 7427 list_del_init(&block_group->list);
6943 up_write(&block_group->space_info->groups_sem); 7428 up_write(&block_group->space_info->groups_sem);
6944 7429
7430 if (block_group->cached == BTRFS_CACHE_STARTED)
7431 wait_event(block_group->caching_q,
7432 block_group_cache_done(block_group));
7433
7434 btrfs_remove_free_space_cache(block_group);
7435
6945 spin_lock(&block_group->space_info->lock); 7436 spin_lock(&block_group->space_info->lock);
6946 block_group->space_info->total_bytes -= block_group->key.offset; 7437 block_group->space_info->total_bytes -= block_group->key.offset;
6947 block_group->space_info->bytes_readonly -= block_group->key.offset; 7438 block_group->space_info->bytes_readonly -= block_group->key.offset;
6948 spin_unlock(&block_group->space_info->lock); 7439 spin_unlock(&block_group->space_info->lock);
6949 block_group->space_info->full = 0; 7440
7441 btrfs_clear_space_info_full(root->fs_info);
6950 7442
6951 btrfs_put_block_group(block_group); 7443 btrfs_put_block_group(block_group);
6952 btrfs_put_block_group(block_group); 7444 btrfs_put_block_group(block_group);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 126477eaecf5..4b833972273a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -22,7 +22,6 @@
22#include <linux/time.h> 22#include <linux/time.h>
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/string.h> 24#include <linux/string.h>
25#include <linux/smp_lock.h>
26#include <linux/backing-dev.h> 25#include <linux/backing-dev.h>
27#include <linux/mpage.h> 26#include <linux/mpage.h>
28#include <linux/swap.h> 27#include <linux/swap.h>
@@ -151,7 +150,10 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
151 } 150 }
152 if (end_pos > isize) { 151 if (end_pos > isize) {
153 i_size_write(inode, end_pos); 152 i_size_write(inode, end_pos);
154 btrfs_update_inode(trans, root, inode); 153 /* we've only changed i_size in ram, and we haven't updated
154 * the disk i_size. There is no need to log the inode
155 * at this time.
156 */
155 } 157 }
156 err = btrfs_end_transaction(trans, root); 158 err = btrfs_end_transaction(trans, root);
157out_unlock: 159out_unlock:
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 4538e48581a5..5edcee3a617f 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -16,45 +16,46 @@
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18 18
19#include <linux/pagemap.h>
19#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/math64.h>
20#include "ctree.h" 22#include "ctree.h"
21#include "free-space-cache.h" 23#include "free-space-cache.h"
22#include "transaction.h" 24#include "transaction.h"
23 25
24struct btrfs_free_space { 26#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
25 struct rb_node bytes_index; 27#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
26 struct rb_node offset_index;
27 u64 offset;
28 u64 bytes;
29};
30 28
31static int tree_insert_offset(struct rb_root *root, u64 offset, 29static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
32 struct rb_node *node) 30 u64 offset)
33{ 31{
34 struct rb_node **p = &root->rb_node; 32 BUG_ON(offset < bitmap_start);
35 struct rb_node *parent = NULL; 33 offset -= bitmap_start;
36 struct btrfs_free_space *info; 34 return (unsigned long)(div64_u64(offset, sectorsize));
35}
37 36
38 while (*p) { 37static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize)
39 parent = *p; 38{
40 info = rb_entry(parent, struct btrfs_free_space, offset_index); 39 return (unsigned long)(div64_u64(bytes, sectorsize));
40}
41 41
42 if (offset < info->offset) 42static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group,
43 p = &(*p)->rb_left; 43 u64 offset)
44 else if (offset > info->offset) 44{
45 p = &(*p)->rb_right; 45 u64 bitmap_start;
46 else 46 u64 bytes_per_bitmap;
47 return -EEXIST;
48 }
49 47
50 rb_link_node(node, parent, p); 48 bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize;
51 rb_insert_color(node, root); 49 bitmap_start = offset - block_group->key.objectid;
50 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
51 bitmap_start *= bytes_per_bitmap;
52 bitmap_start += block_group->key.objectid;
52 53
53 return 0; 54 return bitmap_start;
54} 55}
55 56
56static int tree_insert_bytes(struct rb_root *root, u64 bytes, 57static int tree_insert_offset(struct rb_root *root, u64 offset,
57 struct rb_node *node) 58 struct rb_node *node, int bitmap)
58{ 59{
59 struct rb_node **p = &root->rb_node; 60 struct rb_node **p = &root->rb_node;
60 struct rb_node *parent = NULL; 61 struct rb_node *parent = NULL;
@@ -62,12 +63,34 @@ static int tree_insert_bytes(struct rb_root *root, u64 bytes,
62 63
63 while (*p) { 64 while (*p) {
64 parent = *p; 65 parent = *p;
65 info = rb_entry(parent, struct btrfs_free_space, bytes_index); 66 info = rb_entry(parent, struct btrfs_free_space, offset_index);
66 67
67 if (bytes < info->bytes) 68 if (offset < info->offset) {
68 p = &(*p)->rb_left; 69 p = &(*p)->rb_left;
69 else 70 } else if (offset > info->offset) {
70 p = &(*p)->rb_right; 71 p = &(*p)->rb_right;
72 } else {
73 /*
74 * we could have a bitmap entry and an extent entry
75 * share the same offset. If this is the case, we want
76 * the extent entry to always be found first if we do a
77 * linear search through the tree, since we want to have
78 * the quickest allocation time, and allocating from an
79 * extent is faster than allocating from a bitmap. So
80 * if we're inserting a bitmap and we find an entry at
81 * this offset, we want to go right, or after this entry
82 * logically. If we are inserting an extent and we've
83 * found a bitmap, we want to go left, or before
84 * logically.
85 */
86 if (bitmap) {
87 WARN_ON(info->bitmap);
88 p = &(*p)->rb_right;
89 } else {
90 WARN_ON(!info->bitmap);
91 p = &(*p)->rb_left;
92 }
93 }
71 } 94 }
72 95
73 rb_link_node(node, parent, p); 96 rb_link_node(node, parent, p);
@@ -79,110 +102,143 @@ static int tree_insert_bytes(struct rb_root *root, u64 bytes,
79/* 102/*
80 * searches the tree for the given offset. 103 * searches the tree for the given offset.
81 * 104 *
82 * fuzzy == 1: this is used for allocations where we are given a hint of where 105 * fuzzy - If this is set, then we are trying to make an allocation, and we just
83 * to look for free space. Because the hint may not be completely on an offset 106 * want a section that has at least bytes size and comes at or after the given
84 * mark, or the hint may no longer point to free space we need to fudge our 107 * offset.
85 * results a bit. So we look for free space starting at or after offset with at
86 * least bytes size. We prefer to find as close to the given offset as we can.
87 * Also if the offset is within a free space range, then we will return the free
88 * space that contains the given offset, which means we can return a free space
89 * chunk with an offset before the provided offset.
90 *
91 * fuzzy == 0: this is just a normal tree search. Give us the free space that
92 * starts at the given offset which is at least bytes size, and if its not there
93 * return NULL.
94 */ 108 */
95static struct btrfs_free_space *tree_search_offset(struct rb_root *root, 109static struct btrfs_free_space *
96 u64 offset, u64 bytes, 110tree_search_offset(struct btrfs_block_group_cache *block_group,
97 int fuzzy) 111 u64 offset, int bitmap_only, int fuzzy)
98{ 112{
99 struct rb_node *n = root->rb_node; 113 struct rb_node *n = block_group->free_space_offset.rb_node;
100 struct btrfs_free_space *entry, *ret = NULL; 114 struct btrfs_free_space *entry, *prev = NULL;
115
116 /* find entry that is closest to the 'offset' */
117 while (1) {
118 if (!n) {
119 entry = NULL;
120 break;
121 }
101 122
102 while (n) {
103 entry = rb_entry(n, struct btrfs_free_space, offset_index); 123 entry = rb_entry(n, struct btrfs_free_space, offset_index);
124 prev = entry;
104 125
105 if (offset < entry->offset) { 126 if (offset < entry->offset)
106 if (fuzzy &&
107 (!ret || entry->offset < ret->offset) &&
108 (bytes <= entry->bytes))
109 ret = entry;
110 n = n->rb_left; 127 n = n->rb_left;
111 } else if (offset > entry->offset) { 128 else if (offset > entry->offset)
112 if (fuzzy &&
113 (entry->offset + entry->bytes - 1) >= offset &&
114 bytes <= entry->bytes) {
115 ret = entry;
116 break;
117 }
118 n = n->rb_right; 129 n = n->rb_right;
119 } else { 130 else
120 if (bytes > entry->bytes) {
121 n = n->rb_right;
122 continue;
123 }
124 ret = entry;
125 break; 131 break;
126 }
127 } 132 }
128 133
129 return ret; 134 if (bitmap_only) {
130} 135 if (!entry)
136 return NULL;
137 if (entry->bitmap)
138 return entry;
131 139
132/* 140 /*
133 * return a chunk at least bytes size, as close to offset that we can get. 141 * bitmap entry and extent entry may share same offset,
134 */ 142 * in that case, bitmap entry comes after extent entry.
135static struct btrfs_free_space *tree_search_bytes(struct rb_root *root, 143 */
136 u64 offset, u64 bytes) 144 n = rb_next(n);
137{ 145 if (!n)
138 struct rb_node *n = root->rb_node; 146 return NULL;
139 struct btrfs_free_space *entry, *ret = NULL; 147 entry = rb_entry(n, struct btrfs_free_space, offset_index);
140 148 if (entry->offset != offset)
141 while (n) { 149 return NULL;
142 entry = rb_entry(n, struct btrfs_free_space, bytes_index);
143 150
144 if (bytes < entry->bytes) { 151 WARN_ON(!entry->bitmap);
152 return entry;
153 } else if (entry) {
154 if (entry->bitmap) {
145 /* 155 /*
146 * We prefer to get a hole size as close to the size we 156 * if previous extent entry covers the offset,
147 * are asking for so we don't take small slivers out of 157 * we should return it instead of the bitmap entry
148 * huge holes, but we also want to get as close to the
149 * offset as possible so we don't have a whole lot of
150 * fragmentation.
151 */ 158 */
152 if (offset <= entry->offset) { 159 n = &entry->offset_index;
153 if (!ret) 160 while (1) {
154 ret = entry; 161 n = rb_prev(n);
155 else if (entry->bytes < ret->bytes) 162 if (!n)
156 ret = entry; 163 break;
157 else if (entry->offset < ret->offset) 164 prev = rb_entry(n, struct btrfs_free_space,
158 ret = entry; 165 offset_index);
166 if (!prev->bitmap) {
167 if (prev->offset + prev->bytes > offset)
168 entry = prev;
169 break;
170 }
159 } 171 }
160 n = n->rb_left; 172 }
161 } else if (bytes > entry->bytes) { 173 return entry;
162 n = n->rb_right; 174 }
175
176 if (!prev)
177 return NULL;
178
179 /* find last entry before the 'offset' */
180 entry = prev;
181 if (entry->offset > offset) {
182 n = rb_prev(&entry->offset_index);
183 if (n) {
184 entry = rb_entry(n, struct btrfs_free_space,
185 offset_index);
186 BUG_ON(entry->offset > offset);
163 } else { 187 } else {
164 /* 188 if (fuzzy)
165 * Ok we may have multiple chunks of the wanted size, 189 return entry;
166 * so we don't want to take the first one we find, we 190 else
167 * want to take the one closest to our given offset, so 191 return NULL;
168 * keep searching just in case theres a better match.
169 */
170 n = n->rb_right;
171 if (offset > entry->offset)
172 continue;
173 else if (!ret || entry->offset < ret->offset)
174 ret = entry;
175 } 192 }
176 } 193 }
177 194
178 return ret; 195 if (entry->bitmap) {
196 n = &entry->offset_index;
197 while (1) {
198 n = rb_prev(n);
199 if (!n)
200 break;
201 prev = rb_entry(n, struct btrfs_free_space,
202 offset_index);
203 if (!prev->bitmap) {
204 if (prev->offset + prev->bytes > offset)
205 return prev;
206 break;
207 }
208 }
209 if (entry->offset + BITS_PER_BITMAP *
210 block_group->sectorsize > offset)
211 return entry;
212 } else if (entry->offset + entry->bytes > offset)
213 return entry;
214
215 if (!fuzzy)
216 return NULL;
217
218 while (1) {
219 if (entry->bitmap) {
220 if (entry->offset + BITS_PER_BITMAP *
221 block_group->sectorsize > offset)
222 break;
223 } else {
224 if (entry->offset + entry->bytes > offset)
225 break;
226 }
227
228 n = rb_next(&entry->offset_index);
229 if (!n)
230 return NULL;
231 entry = rb_entry(n, struct btrfs_free_space, offset_index);
232 }
233 return entry;
179} 234}
180 235
181static void unlink_free_space(struct btrfs_block_group_cache *block_group, 236static void unlink_free_space(struct btrfs_block_group_cache *block_group,
182 struct btrfs_free_space *info) 237 struct btrfs_free_space *info)
183{ 238{
184 rb_erase(&info->offset_index, &block_group->free_space_offset); 239 rb_erase(&info->offset_index, &block_group->free_space_offset);
185 rb_erase(&info->bytes_index, &block_group->free_space_bytes); 240 block_group->free_extents--;
241 block_group->free_space -= info->bytes;
186} 242}
187 243
188static int link_free_space(struct btrfs_block_group_cache *block_group, 244static int link_free_space(struct btrfs_block_group_cache *block_group,
@@ -190,17 +246,353 @@ static int link_free_space(struct btrfs_block_group_cache *block_group,
190{ 246{
191 int ret = 0; 247 int ret = 0;
192 248
193 249 BUG_ON(!info->bitmap && !info->bytes);
194 BUG_ON(!info->bytes);
195 ret = tree_insert_offset(&block_group->free_space_offset, info->offset, 250 ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
196 &info->offset_index); 251 &info->offset_index, (info->bitmap != NULL));
197 if (ret) 252 if (ret)
198 return ret; 253 return ret;
199 254
200 ret = tree_insert_bytes(&block_group->free_space_bytes, info->bytes, 255 block_group->free_space += info->bytes;
201 &info->bytes_index); 256 block_group->free_extents++;
202 if (ret) 257 return ret;
203 return ret; 258}
259
260static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
261{
262 u64 max_bytes, possible_bytes;
263
264 /*
265 * The goal is to keep the total amount of memory used per 1gb of space
266 * at or below 32k, so we need to adjust how much memory we allow to be
267 * used by extent based free space tracking
268 */
269 max_bytes = MAX_CACHE_BYTES_PER_GIG *
270 (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
271
272 possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) +
273 (sizeof(struct btrfs_free_space) *
274 block_group->extents_thresh);
275
276 if (possible_bytes > max_bytes) {
277 int extent_bytes = max_bytes -
278 (block_group->total_bitmaps * PAGE_CACHE_SIZE);
279
280 if (extent_bytes <= 0) {
281 block_group->extents_thresh = 0;
282 return;
283 }
284
285 block_group->extents_thresh = extent_bytes /
286 (sizeof(struct btrfs_free_space));
287 }
288}
289
290static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
291 struct btrfs_free_space *info, u64 offset,
292 u64 bytes)
293{
294 unsigned long start, end;
295 unsigned long i;
296
297 start = offset_to_bit(info->offset, block_group->sectorsize, offset);
298 end = start + bytes_to_bits(bytes, block_group->sectorsize);
299 BUG_ON(end > BITS_PER_BITMAP);
300
301 for (i = start; i < end; i++)
302 clear_bit(i, info->bitmap);
303
304 info->bytes -= bytes;
305 block_group->free_space -= bytes;
306}
307
308static void bitmap_set_bits(struct btrfs_block_group_cache *block_group,
309 struct btrfs_free_space *info, u64 offset,
310 u64 bytes)
311{
312 unsigned long start, end;
313 unsigned long i;
314
315 start = offset_to_bit(info->offset, block_group->sectorsize, offset);
316 end = start + bytes_to_bits(bytes, block_group->sectorsize);
317 BUG_ON(end > BITS_PER_BITMAP);
318
319 for (i = start; i < end; i++)
320 set_bit(i, info->bitmap);
321
322 info->bytes += bytes;
323 block_group->free_space += bytes;
324}
325
326static int search_bitmap(struct btrfs_block_group_cache *block_group,
327 struct btrfs_free_space *bitmap_info, u64 *offset,
328 u64 *bytes)
329{
330 unsigned long found_bits = 0;
331 unsigned long bits, i;
332 unsigned long next_zero;
333
334 i = offset_to_bit(bitmap_info->offset, block_group->sectorsize,
335 max_t(u64, *offset, bitmap_info->offset));
336 bits = bytes_to_bits(*bytes, block_group->sectorsize);
337
338 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
339 i < BITS_PER_BITMAP;
340 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
341 next_zero = find_next_zero_bit(bitmap_info->bitmap,
342 BITS_PER_BITMAP, i);
343 if ((next_zero - i) >= bits) {
344 found_bits = next_zero - i;
345 break;
346 }
347 i = next_zero;
348 }
349
350 if (found_bits) {
351 *offset = (u64)(i * block_group->sectorsize) +
352 bitmap_info->offset;
353 *bytes = (u64)(found_bits) * block_group->sectorsize;
354 return 0;
355 }
356
357 return -1;
358}
359
360static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
361 *block_group, u64 *offset,
362 u64 *bytes, int debug)
363{
364 struct btrfs_free_space *entry;
365 struct rb_node *node;
366 int ret;
367
368 if (!block_group->free_space_offset.rb_node)
369 return NULL;
370
371 entry = tree_search_offset(block_group,
372 offset_to_bitmap(block_group, *offset),
373 0, 1);
374 if (!entry)
375 return NULL;
376
377 for (node = &entry->offset_index; node; node = rb_next(node)) {
378 entry = rb_entry(node, struct btrfs_free_space, offset_index);
379 if (entry->bytes < *bytes)
380 continue;
381
382 if (entry->bitmap) {
383 ret = search_bitmap(block_group, entry, offset, bytes);
384 if (!ret)
385 return entry;
386 continue;
387 }
388
389 *offset = entry->offset;
390 *bytes = entry->bytes;
391 return entry;
392 }
393
394 return NULL;
395}
396
397static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
398 struct btrfs_free_space *info, u64 offset)
399{
400 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
401 int max_bitmaps = (int)div64_u64(block_group->key.offset +
402 bytes_per_bg - 1, bytes_per_bg);
403 BUG_ON(block_group->total_bitmaps >= max_bitmaps);
404
405 info->offset = offset_to_bitmap(block_group, offset);
406 link_free_space(block_group, info);
407 block_group->total_bitmaps++;
408
409 recalculate_thresholds(block_group);
410}
411
412static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
413 struct btrfs_free_space *bitmap_info,
414 u64 *offset, u64 *bytes)
415{
416 u64 end;
417 u64 search_start, search_bytes;
418 int ret;
419
420again:
421 end = bitmap_info->offset +
422 (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
423
424 /*
425 * XXX - this can go away after a few releases.
426 *
427 * since the only user of btrfs_remove_free_space is the tree logging
428 * stuff, and the only way to test that is under crash conditions, we
429 * want to have this debug stuff here just in case somethings not
430 * working. Search the bitmap for the space we are trying to use to
431 * make sure its actually there. If its not there then we need to stop
432 * because something has gone wrong.
433 */
434 search_start = *offset;
435 search_bytes = *bytes;
436 ret = search_bitmap(block_group, bitmap_info, &search_start,
437 &search_bytes);
438 BUG_ON(ret < 0 || search_start != *offset);
439
440 if (*offset > bitmap_info->offset && *offset + *bytes > end) {
441 bitmap_clear_bits(block_group, bitmap_info, *offset,
442 end - *offset + 1);
443 *bytes -= end - *offset + 1;
444 *offset = end + 1;
445 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
446 bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes);
447 *bytes = 0;
448 }
449
450 if (*bytes) {
451 struct rb_node *next = rb_next(&bitmap_info->offset_index);
452 if (!bitmap_info->bytes) {
453 unlink_free_space(block_group, bitmap_info);
454 kfree(bitmap_info->bitmap);
455 kfree(bitmap_info);
456 block_group->total_bitmaps--;
457 recalculate_thresholds(block_group);
458 }
459
460 /*
461 * no entry after this bitmap, but we still have bytes to
462 * remove, so something has gone wrong.
463 */
464 if (!next)
465 return -EINVAL;
466
467 bitmap_info = rb_entry(next, struct btrfs_free_space,
468 offset_index);
469
470 /*
471 * if the next entry isn't a bitmap we need to return to let the
472 * extent stuff do its work.
473 */
474 if (!bitmap_info->bitmap)
475 return -EAGAIN;
476
477 /*
478 * Ok the next item is a bitmap, but it may not actually hold
479 * the information for the rest of this free space stuff, so
480 * look for it, and if we don't find it return so we can try
481 * everything over again.
482 */
483 search_start = *offset;
484 search_bytes = *bytes;
485 ret = search_bitmap(block_group, bitmap_info, &search_start,
486 &search_bytes);
487 if (ret < 0 || search_start != *offset)
488 return -EAGAIN;
489
490 goto again;
491 } else if (!bitmap_info->bytes) {
492 unlink_free_space(block_group, bitmap_info);
493 kfree(bitmap_info->bitmap);
494 kfree(bitmap_info);
495 block_group->total_bitmaps--;
496 recalculate_thresholds(block_group);
497 }
498
499 return 0;
500}
501
502static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
503 struct btrfs_free_space *info)
504{
505 struct btrfs_free_space *bitmap_info;
506 int added = 0;
507 u64 bytes, offset, end;
508 int ret;
509
510 /*
511 * If we are below the extents threshold then we can add this as an
512 * extent, and don't have to deal with the bitmap
513 */
514 if (block_group->free_extents < block_group->extents_thresh &&
515 info->bytes > block_group->sectorsize * 4)
516 return 0;
517
518 /*
519 * some block groups are so tiny they can't be enveloped by a bitmap, so
520 * don't even bother to create a bitmap for this
521 */
522 if (BITS_PER_BITMAP * block_group->sectorsize >
523 block_group->key.offset)
524 return 0;
525
526 bytes = info->bytes;
527 offset = info->offset;
528
529again:
530 bitmap_info = tree_search_offset(block_group,
531 offset_to_bitmap(block_group, offset),
532 1, 0);
533 if (!bitmap_info) {
534 BUG_ON(added);
535 goto new_bitmap;
536 }
537
538 end = bitmap_info->offset +
539 (u64)(BITS_PER_BITMAP * block_group->sectorsize);
540
541 if (offset >= bitmap_info->offset && offset + bytes > end) {
542 bitmap_set_bits(block_group, bitmap_info, offset,
543 end - offset);
544 bytes -= end - offset;
545 offset = end;
546 added = 0;
547 } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
548 bitmap_set_bits(block_group, bitmap_info, offset, bytes);
549 bytes = 0;
550 } else {
551 BUG();
552 }
553
554 if (!bytes) {
555 ret = 1;
556 goto out;
557 } else
558 goto again;
559
560new_bitmap:
561 if (info && info->bitmap) {
562 add_new_bitmap(block_group, info, offset);
563 added = 1;
564 info = NULL;
565 goto again;
566 } else {
567 spin_unlock(&block_group->tree_lock);
568
569 /* no pre-allocated info, allocate a new one */
570 if (!info) {
571 info = kzalloc(sizeof(struct btrfs_free_space),
572 GFP_NOFS);
573 if (!info) {
574 spin_lock(&block_group->tree_lock);
575 ret = -ENOMEM;
576 goto out;
577 }
578 }
579
580 /* allocate the bitmap */
581 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
582 spin_lock(&block_group->tree_lock);
583 if (!info->bitmap) {
584 ret = -ENOMEM;
585 goto out;
586 }
587 goto again;
588 }
589
590out:
591 if (info) {
592 if (info->bitmap)
593 kfree(info->bitmap);
594 kfree(info);
595 }
204 596
205 return ret; 597 return ret;
206} 598}
@@ -208,8 +600,8 @@ static int link_free_space(struct btrfs_block_group_cache *block_group,
208int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 600int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
209 u64 offset, u64 bytes) 601 u64 offset, u64 bytes)
210{ 602{
211 struct btrfs_free_space *right_info; 603 struct btrfs_free_space *right_info = NULL;
212 struct btrfs_free_space *left_info; 604 struct btrfs_free_space *left_info = NULL;
213 struct btrfs_free_space *info = NULL; 605 struct btrfs_free_space *info = NULL;
214 int ret = 0; 606 int ret = 0;
215 607
@@ -227,18 +619,38 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
227 * are adding, if there is remove that struct and add a new one to 619 * are adding, if there is remove that struct and add a new one to
228 * cover the entire range 620 * cover the entire range
229 */ 621 */
230 right_info = tree_search_offset(&block_group->free_space_offset, 622 right_info = tree_search_offset(block_group, offset + bytes, 0, 0);
231 offset+bytes, 0, 0); 623 if (right_info && rb_prev(&right_info->offset_index))
232 left_info = tree_search_offset(&block_group->free_space_offset, 624 left_info = rb_entry(rb_prev(&right_info->offset_index),
233 offset-1, 0, 1); 625 struct btrfs_free_space, offset_index);
626 else
627 left_info = tree_search_offset(block_group, offset - 1, 0, 0);
628
629 /*
630 * If there was no extent directly to the left or right of this new
631 * extent then we know we're going to have to allocate a new extent, so
632 * before we do that see if we need to drop this into a bitmap
633 */
634 if ((!left_info || left_info->bitmap) &&
635 (!right_info || right_info->bitmap)) {
636 ret = insert_into_bitmap(block_group, info);
637
638 if (ret < 0) {
639 goto out;
640 } else if (ret) {
641 ret = 0;
642 goto out;
643 }
644 }
234 645
235 if (right_info) { 646 if (right_info && !right_info->bitmap) {
236 unlink_free_space(block_group, right_info); 647 unlink_free_space(block_group, right_info);
237 info->bytes += right_info->bytes; 648 info->bytes += right_info->bytes;
238 kfree(right_info); 649 kfree(right_info);
239 } 650 }
240 651
241 if (left_info && left_info->offset + left_info->bytes == offset) { 652 if (left_info && !left_info->bitmap &&
653 left_info->offset + left_info->bytes == offset) {
242 unlink_free_space(block_group, left_info); 654 unlink_free_space(block_group, left_info);
243 info->offset = left_info->offset; 655 info->offset = left_info->offset;
244 info->bytes += left_info->bytes; 656 info->bytes += left_info->bytes;
@@ -248,11 +660,11 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
248 ret = link_free_space(block_group, info); 660 ret = link_free_space(block_group, info);
249 if (ret) 661 if (ret)
250 kfree(info); 662 kfree(info);
251 663out:
252 spin_unlock(&block_group->tree_lock); 664 spin_unlock(&block_group->tree_lock);
253 665
254 if (ret) { 666 if (ret) {
255 printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret); 667 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
256 BUG_ON(ret == -EEXIST); 668 BUG_ON(ret == -EEXIST);
257 } 669 }
258 670
@@ -263,40 +675,74 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
263 u64 offset, u64 bytes) 675 u64 offset, u64 bytes)
264{ 676{
265 struct btrfs_free_space *info; 677 struct btrfs_free_space *info;
678 struct btrfs_free_space *next_info = NULL;
266 int ret = 0; 679 int ret = 0;
267 680
268 spin_lock(&block_group->tree_lock); 681 spin_lock(&block_group->tree_lock);
269 682
270 info = tree_search_offset(&block_group->free_space_offset, offset, 0, 683again:
271 1); 684 info = tree_search_offset(block_group, offset, 0, 0);
272 if (info && info->offset == offset) { 685 if (!info) {
273 if (info->bytes < bytes) { 686 /*
274 printk(KERN_ERR "Found free space at %llu, size %llu," 687 * oops didn't find an extent that matched the space we wanted
275 "trying to use %llu\n", 688 * to remove, look for a bitmap instead
276 (unsigned long long)info->offset, 689 */
277 (unsigned long long)info->bytes, 690 info = tree_search_offset(block_group,
278 (unsigned long long)bytes); 691 offset_to_bitmap(block_group, offset),
692 1, 0);
693 if (!info) {
694 WARN_ON(1);
695 goto out_lock;
696 }
697 }
698
699 if (info->bytes < bytes && rb_next(&info->offset_index)) {
700 u64 end;
701 next_info = rb_entry(rb_next(&info->offset_index),
702 struct btrfs_free_space,
703 offset_index);
704
705 if (next_info->bitmap)
706 end = next_info->offset + BITS_PER_BITMAP *
707 block_group->sectorsize - 1;
708 else
709 end = next_info->offset + next_info->bytes;
710
711 if (next_info->bytes < bytes ||
712 next_info->offset > offset || offset > end) {
713 printk(KERN_CRIT "Found free space at %llu, size %llu,"
714 " trying to use %llu\n",
715 (unsigned long long)info->offset,
716 (unsigned long long)info->bytes,
717 (unsigned long long)bytes);
279 WARN_ON(1); 718 WARN_ON(1);
280 ret = -EINVAL; 719 ret = -EINVAL;
281 spin_unlock(&block_group->tree_lock); 720 goto out_lock;
282 goto out;
283 } 721 }
284 unlink_free_space(block_group, info);
285 722
286 if (info->bytes == bytes) { 723 info = next_info;
287 kfree(info); 724 }
288 spin_unlock(&block_group->tree_lock); 725
289 goto out; 726 if (info->bytes == bytes) {
727 unlink_free_space(block_group, info);
728 if (info->bitmap) {
729 kfree(info->bitmap);
730 block_group->total_bitmaps--;
290 } 731 }
732 kfree(info);
733 goto out_lock;
734 }
291 735
736 if (!info->bitmap && info->offset == offset) {
737 unlink_free_space(block_group, info);
292 info->offset += bytes; 738 info->offset += bytes;
293 info->bytes -= bytes; 739 info->bytes -= bytes;
740 link_free_space(block_group, info);
741 goto out_lock;
742 }
294 743
295 ret = link_free_space(block_group, info); 744 if (!info->bitmap && info->offset <= offset &&
296 spin_unlock(&block_group->tree_lock); 745 info->offset + info->bytes >= offset + bytes) {
297 BUG_ON(ret);
298 } else if (info && info->offset < offset &&
299 info->offset + info->bytes >= offset + bytes) {
300 u64 old_start = info->offset; 746 u64 old_start = info->offset;
301 /* 747 /*
302 * we're freeing space in the middle of the info, 748 * we're freeing space in the middle of the info,
@@ -312,7 +758,9 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
312 info->offset = offset + bytes; 758 info->offset = offset + bytes;
313 info->bytes = old_end - info->offset; 759 info->bytes = old_end - info->offset;
314 ret = link_free_space(block_group, info); 760 ret = link_free_space(block_group, info);
315 BUG_ON(ret); 761 WARN_ON(ret);
762 if (ret)
763 goto out_lock;
316 } else { 764 } else {
317 /* the hole we're creating ends at the end 765 /* the hole we're creating ends at the end
318 * of the info struct, just free the info 766 * of the info struct, just free the info
@@ -320,32 +768,22 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
320 kfree(info); 768 kfree(info);
321 } 769 }
322 spin_unlock(&block_group->tree_lock); 770 spin_unlock(&block_group->tree_lock);
323 /* step two, insert a new info struct to cover anything 771
324 * before the hole 772 /* step two, insert a new info struct to cover
773 * anything before the hole
325 */ 774 */
326 ret = btrfs_add_free_space(block_group, old_start, 775 ret = btrfs_add_free_space(block_group, old_start,
327 offset - old_start); 776 offset - old_start);
328 BUG_ON(ret); 777 WARN_ON(ret);
329 } else { 778 goto out;
330 spin_unlock(&block_group->tree_lock);
331 if (!info) {
332 printk(KERN_ERR "couldn't find space %llu to free\n",
333 (unsigned long long)offset);
334 printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n",
335 block_group->cached,
336 (unsigned long long)block_group->key.objectid,
337 (unsigned long long)block_group->key.offset);
338 btrfs_dump_free_space(block_group, bytes);
339 } else if (info) {
340 printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, "
341 "but wanted offset=%llu bytes=%llu\n",
342 (unsigned long long)info->offset,
343 (unsigned long long)info->bytes,
344 (unsigned long long)offset,
345 (unsigned long long)bytes);
346 }
347 WARN_ON(1);
348 } 779 }
780
781 ret = remove_from_bitmap(block_group, info, &offset, &bytes);
782 if (ret == -EAGAIN)
783 goto again;
784 BUG_ON(ret);
785out_lock:
786 spin_unlock(&block_group->tree_lock);
349out: 787out:
350 return ret; 788 return ret;
351} 789}
@@ -361,10 +799,13 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
361 info = rb_entry(n, struct btrfs_free_space, offset_index); 799 info = rb_entry(n, struct btrfs_free_space, offset_index);
362 if (info->bytes >= bytes) 800 if (info->bytes >= bytes)
363 count++; 801 count++;
364 printk(KERN_ERR "entry offset %llu, bytes %llu\n", 802 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
365 (unsigned long long)info->offset, 803 (unsigned long long)info->offset,
366 (unsigned long long)info->bytes); 804 (unsigned long long)info->bytes,
805 (info->bitmap) ? "yes" : "no");
367 } 806 }
807 printk(KERN_INFO "block group has cluster?: %s\n",
808 list_empty(&block_group->cluster_list) ? "no" : "yes");
368 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" 809 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
369 "\n", count); 810 "\n", count);
370} 811}
@@ -397,26 +838,35 @@ __btrfs_return_cluster_to_free_space(
397{ 838{
398 struct btrfs_free_space *entry; 839 struct btrfs_free_space *entry;
399 struct rb_node *node; 840 struct rb_node *node;
841 bool bitmap;
400 842
401 spin_lock(&cluster->lock); 843 spin_lock(&cluster->lock);
402 if (cluster->block_group != block_group) 844 if (cluster->block_group != block_group)
403 goto out; 845 goto out;
404 846
847 bitmap = cluster->points_to_bitmap;
848 cluster->block_group = NULL;
405 cluster->window_start = 0; 849 cluster->window_start = 0;
850 list_del_init(&cluster->block_group_list);
851 cluster->points_to_bitmap = false;
852
853 if (bitmap)
854 goto out;
855
406 node = rb_first(&cluster->root); 856 node = rb_first(&cluster->root);
407 while(node) { 857 while (node) {
408 entry = rb_entry(node, struct btrfs_free_space, offset_index); 858 entry = rb_entry(node, struct btrfs_free_space, offset_index);
409 node = rb_next(&entry->offset_index); 859 node = rb_next(&entry->offset_index);
410 rb_erase(&entry->offset_index, &cluster->root); 860 rb_erase(&entry->offset_index, &cluster->root);
411 link_free_space(block_group, entry); 861 BUG_ON(entry->bitmap);
862 tree_insert_offset(&block_group->free_space_offset,
863 entry->offset, &entry->offset_index, 0);
412 } 864 }
413 list_del_init(&cluster->block_group_list);
414
415 btrfs_put_block_group(cluster->block_group);
416 cluster->block_group = NULL;
417 cluster->root.rb_node = NULL; 865 cluster->root.rb_node = NULL;
866
418out: 867out:
419 spin_unlock(&cluster->lock); 868 spin_unlock(&cluster->lock);
869 btrfs_put_block_group(block_group);
420 return 0; 870 return 0;
421} 871}
422 872
@@ -425,20 +875,28 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
425 struct btrfs_free_space *info; 875 struct btrfs_free_space *info;
426 struct rb_node *node; 876 struct rb_node *node;
427 struct btrfs_free_cluster *cluster; 877 struct btrfs_free_cluster *cluster;
428 struct btrfs_free_cluster *safe; 878 struct list_head *head;
429 879
430 spin_lock(&block_group->tree_lock); 880 spin_lock(&block_group->tree_lock);
431 881 while ((head = block_group->cluster_list.next) !=
432 list_for_each_entry_safe(cluster, safe, &block_group->cluster_list, 882 &block_group->cluster_list) {
433 block_group_list) { 883 cluster = list_entry(head, struct btrfs_free_cluster,
884 block_group_list);
434 885
435 WARN_ON(cluster->block_group != block_group); 886 WARN_ON(cluster->block_group != block_group);
436 __btrfs_return_cluster_to_free_space(block_group, cluster); 887 __btrfs_return_cluster_to_free_space(block_group, cluster);
888 if (need_resched()) {
889 spin_unlock(&block_group->tree_lock);
890 cond_resched();
891 spin_lock(&block_group->tree_lock);
892 }
437 } 893 }
438 894
439 while ((node = rb_last(&block_group->free_space_bytes)) != NULL) { 895 while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
440 info = rb_entry(node, struct btrfs_free_space, bytes_index); 896 info = rb_entry(node, struct btrfs_free_space, offset_index);
441 unlink_free_space(block_group, info); 897 unlink_free_space(block_group, info);
898 if (info->bitmap)
899 kfree(info->bitmap);
442 kfree(info); 900 kfree(info);
443 if (need_resched()) { 901 if (need_resched()) {
444 spin_unlock(&block_group->tree_lock); 902 spin_unlock(&block_group->tree_lock);
@@ -446,6 +904,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
446 spin_lock(&block_group->tree_lock); 904 spin_lock(&block_group->tree_lock);
447 } 905 }
448 } 906 }
907
449 spin_unlock(&block_group->tree_lock); 908 spin_unlock(&block_group->tree_lock);
450} 909}
451 910
@@ -453,25 +912,35 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
453 u64 offset, u64 bytes, u64 empty_size) 912 u64 offset, u64 bytes, u64 empty_size)
454{ 913{
455 struct btrfs_free_space *entry = NULL; 914 struct btrfs_free_space *entry = NULL;
915 u64 bytes_search = bytes + empty_size;
456 u64 ret = 0; 916 u64 ret = 0;
457 917
458 spin_lock(&block_group->tree_lock); 918 spin_lock(&block_group->tree_lock);
459 entry = tree_search_offset(&block_group->free_space_offset, offset, 919 entry = find_free_space(block_group, &offset, &bytes_search, 0);
460 bytes + empty_size, 1);
461 if (!entry) 920 if (!entry)
462 entry = tree_search_bytes(&block_group->free_space_bytes, 921 goto out;
463 offset, bytes + empty_size); 922
464 if (entry) { 923 ret = offset;
924 if (entry->bitmap) {
925 bitmap_clear_bits(block_group, entry, offset, bytes);
926 if (!entry->bytes) {
927 unlink_free_space(block_group, entry);
928 kfree(entry->bitmap);
929 kfree(entry);
930 block_group->total_bitmaps--;
931 recalculate_thresholds(block_group);
932 }
933 } else {
465 unlink_free_space(block_group, entry); 934 unlink_free_space(block_group, entry);
466 ret = entry->offset;
467 entry->offset += bytes; 935 entry->offset += bytes;
468 entry->bytes -= bytes; 936 entry->bytes -= bytes;
469
470 if (!entry->bytes) 937 if (!entry->bytes)
471 kfree(entry); 938 kfree(entry);
472 else 939 else
473 link_free_space(block_group, entry); 940 link_free_space(block_group, entry);
474 } 941 }
942
943out:
475 spin_unlock(&block_group->tree_lock); 944 spin_unlock(&block_group->tree_lock);
476 945
477 return ret; 946 return ret;
@@ -517,6 +986,54 @@ int btrfs_return_cluster_to_free_space(
517 return ret; 986 return ret;
518} 987}
519 988
989static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
990 struct btrfs_free_cluster *cluster,
991 u64 bytes, u64 min_start)
992{
993 struct btrfs_free_space *entry;
994 int err;
995 u64 search_start = cluster->window_start;
996 u64 search_bytes = bytes;
997 u64 ret = 0;
998
999 spin_lock(&block_group->tree_lock);
1000 spin_lock(&cluster->lock);
1001
1002 if (!cluster->points_to_bitmap)
1003 goto out;
1004
1005 if (cluster->block_group != block_group)
1006 goto out;
1007
1008 /*
1009 * search_start is the beginning of the bitmap, but at some point it may
1010 * be a good idea to point to the actual start of the free area in the
1011 * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
1012 * to 1 to make sure we get the bitmap entry
1013 */
1014 entry = tree_search_offset(block_group,
1015 offset_to_bitmap(block_group, search_start),
1016 1, 0);
1017 if (!entry || !entry->bitmap)
1018 goto out;
1019
1020 search_start = min_start;
1021 search_bytes = bytes;
1022
1023 err = search_bitmap(block_group, entry, &search_start,
1024 &search_bytes);
1025 if (err)
1026 goto out;
1027
1028 ret = search_start;
1029 bitmap_clear_bits(block_group, entry, ret, bytes);
1030out:
1031 spin_unlock(&cluster->lock);
1032 spin_unlock(&block_group->tree_lock);
1033
1034 return ret;
1035}
1036
520/* 1037/*
521 * given a cluster, try to allocate 'bytes' from it, returns 0 1038 * given a cluster, try to allocate 'bytes' from it, returns 0
522 * if it couldn't find anything suitably large, or a logical disk offset 1039 * if it couldn't find anything suitably large, or a logical disk offset
@@ -530,6 +1047,10 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
530 struct rb_node *node; 1047 struct rb_node *node;
531 u64 ret = 0; 1048 u64 ret = 0;
532 1049
1050 if (cluster->points_to_bitmap)
1051 return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
1052 min_start);
1053
533 spin_lock(&cluster->lock); 1054 spin_lock(&cluster->lock);
534 if (bytes > cluster->max_size) 1055 if (bytes > cluster->max_size)
535 goto out; 1056 goto out;
@@ -567,9 +1088,73 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
567 } 1088 }
568out: 1089out:
569 spin_unlock(&cluster->lock); 1090 spin_unlock(&cluster->lock);
1091
570 return ret; 1092 return ret;
571} 1093}
572 1094
1095static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
1096 struct btrfs_free_space *entry,
1097 struct btrfs_free_cluster *cluster,
1098 u64 offset, u64 bytes, u64 min_bytes)
1099{
1100 unsigned long next_zero;
1101 unsigned long i;
1102 unsigned long search_bits;
1103 unsigned long total_bits;
1104 unsigned long found_bits;
1105 unsigned long start = 0;
1106 unsigned long total_found = 0;
1107 bool found = false;
1108
1109 i = offset_to_bit(entry->offset, block_group->sectorsize,
1110 max_t(u64, offset, entry->offset));
1111 search_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
1112 total_bits = bytes_to_bits(bytes, block_group->sectorsize);
1113
1114again:
1115 found_bits = 0;
1116 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
1117 i < BITS_PER_BITMAP;
1118 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
1119 next_zero = find_next_zero_bit(entry->bitmap,
1120 BITS_PER_BITMAP, i);
1121 if (next_zero - i >= search_bits) {
1122 found_bits = next_zero - i;
1123 break;
1124 }
1125 i = next_zero;
1126 }
1127
1128 if (!found_bits)
1129 return -1;
1130
1131 if (!found) {
1132 start = i;
1133 found = true;
1134 }
1135
1136 total_found += found_bits;
1137
1138 if (cluster->max_size < found_bits * block_group->sectorsize)
1139 cluster->max_size = found_bits * block_group->sectorsize;
1140
1141 if (total_found < total_bits) {
1142 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
1143 if (i - start > total_bits * 2) {
1144 total_found = 0;
1145 cluster->max_size = 0;
1146 found = false;
1147 }
1148 goto again;
1149 }
1150
1151 cluster->window_start = start * block_group->sectorsize +
1152 entry->offset;
1153 cluster->points_to_bitmap = true;
1154
1155 return 0;
1156}
1157
573/* 1158/*
574 * here we try to find a cluster of blocks in a block group. The goal 1159 * here we try to find a cluster of blocks in a block group. The goal
575 * is to find at least bytes free and up to empty_size + bytes free. 1160 * is to find at least bytes free and up to empty_size + bytes free.
@@ -587,12 +1172,12 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
587 struct btrfs_free_space *entry = NULL; 1172 struct btrfs_free_space *entry = NULL;
588 struct rb_node *node; 1173 struct rb_node *node;
589 struct btrfs_free_space *next; 1174 struct btrfs_free_space *next;
590 struct btrfs_free_space *last; 1175 struct btrfs_free_space *last = NULL;
591 u64 min_bytes; 1176 u64 min_bytes;
592 u64 window_start; 1177 u64 window_start;
593 u64 window_free; 1178 u64 window_free;
594 u64 max_extent = 0; 1179 u64 max_extent = 0;
595 int total_retries = 0; 1180 bool found_bitmap = false;
596 int ret; 1181 int ret;
597 1182
598 /* for metadata, allow allocates with more holes */ 1183 /* for metadata, allow allocates with more holes */
@@ -620,31 +1205,80 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
620 goto out; 1205 goto out;
621 } 1206 }
622again: 1207again:
623 min_bytes = min(min_bytes, bytes + empty_size); 1208 entry = tree_search_offset(block_group, offset, found_bitmap, 1);
624 entry = tree_search_bytes(&block_group->free_space_bytes,
625 offset, min_bytes);
626 if (!entry) { 1209 if (!entry) {
627 ret = -ENOSPC; 1210 ret = -ENOSPC;
628 goto out; 1211 goto out;
629 } 1212 }
1213
1214 /*
1215 * If found_bitmap is true, we exhausted our search for extent entries,
1216 * and we just want to search all of the bitmaps that we can find, and
1217 * ignore any extent entries we find.
1218 */
1219 while (entry->bitmap || found_bitmap ||
1220 (!entry->bitmap && entry->bytes < min_bytes)) {
1221 struct rb_node *node = rb_next(&entry->offset_index);
1222
1223 if (entry->bitmap && entry->bytes > bytes + empty_size) {
1224 ret = btrfs_bitmap_cluster(block_group, entry, cluster,
1225 offset, bytes + empty_size,
1226 min_bytes);
1227 if (!ret)
1228 goto got_it;
1229 }
1230
1231 if (!node) {
1232 ret = -ENOSPC;
1233 goto out;
1234 }
1235 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1236 }
1237
1238 /*
1239 * We already searched all the extent entries from the passed in offset
1240 * to the end and didn't find enough space for the cluster, and we also
1241 * didn't find any bitmaps that met our criteria, just go ahead and exit
1242 */
1243 if (found_bitmap) {
1244 ret = -ENOSPC;
1245 goto out;
1246 }
1247
1248 cluster->points_to_bitmap = false;
630 window_start = entry->offset; 1249 window_start = entry->offset;
631 window_free = entry->bytes; 1250 window_free = entry->bytes;
632 last = entry; 1251 last = entry;
633 max_extent = entry->bytes; 1252 max_extent = entry->bytes;
634 1253
635 while(1) { 1254 while (1) {
636 /* out window is just right, lets fill it */ 1255 /* out window is just right, lets fill it */
637 if (window_free >= bytes + empty_size) 1256 if (window_free >= bytes + empty_size)
638 break; 1257 break;
639 1258
640 node = rb_next(&last->offset_index); 1259 node = rb_next(&last->offset_index);
641 if (!node) { 1260 if (!node) {
1261 if (found_bitmap)
1262 goto again;
642 ret = -ENOSPC; 1263 ret = -ENOSPC;
643 goto out; 1264 goto out;
644 } 1265 }
645 next = rb_entry(node, struct btrfs_free_space, offset_index); 1266 next = rb_entry(node, struct btrfs_free_space, offset_index);
646 1267
647 /* 1268 /*
1269 * we found a bitmap, so if this search doesn't result in a
1270 * cluster, we know to go and search again for the bitmaps and
1271 * start looking for space there
1272 */
1273 if (next->bitmap) {
1274 if (!found_bitmap)
1275 offset = next->offset;
1276 found_bitmap = true;
1277 last = next;
1278 continue;
1279 }
1280
1281 /*
648 * we haven't filled the empty size and the window is 1282 * we haven't filled the empty size and the window is
649 * very large. reset and try again 1283 * very large. reset and try again
650 */ 1284 */
@@ -655,19 +1289,6 @@ again:
655 window_free = entry->bytes; 1289 window_free = entry->bytes;
656 last = entry; 1290 last = entry;
657 max_extent = 0; 1291 max_extent = 0;
658 total_retries++;
659 if (total_retries % 64 == 0) {
660 if (min_bytes >= (bytes + empty_size)) {
661 ret = -ENOSPC;
662 goto out;
663 }
664 /*
665 * grow our allocation a bit, we're not having
666 * much luck
667 */
668 min_bytes *= 2;
669 goto again;
670 }
671 } else { 1292 } else {
672 last = next; 1293 last = next;
673 window_free += next->bytes; 1294 window_free += next->bytes;
@@ -685,11 +1306,19 @@ again:
685 * The cluster includes an rbtree, but only uses the offset index 1306 * The cluster includes an rbtree, but only uses the offset index
686 * of each free space cache entry. 1307 * of each free space cache entry.
687 */ 1308 */
688 while(1) { 1309 while (1) {
689 node = rb_next(&entry->offset_index); 1310 node = rb_next(&entry->offset_index);
690 unlink_free_space(block_group, entry); 1311 if (entry->bitmap && node) {
1312 entry = rb_entry(node, struct btrfs_free_space,
1313 offset_index);
1314 continue;
1315 } else if (entry->bitmap && !node) {
1316 break;
1317 }
1318
1319 rb_erase(&entry->offset_index, &block_group->free_space_offset);
691 ret = tree_insert_offset(&cluster->root, entry->offset, 1320 ret = tree_insert_offset(&cluster->root, entry->offset,
692 &entry->offset_index); 1321 &entry->offset_index, 0);
693 BUG_ON(ret); 1322 BUG_ON(ret);
694 1323
695 if (!node || entry == last) 1324 if (!node || entry == last)
@@ -697,8 +1326,10 @@ again:
697 1326
698 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1327 entry = rb_entry(node, struct btrfs_free_space, offset_index);
699 } 1328 }
700 ret = 0; 1329
701 cluster->max_size = max_extent; 1330 cluster->max_size = max_extent;
1331got_it:
1332 ret = 0;
702 atomic_inc(&block_group->count); 1333 atomic_inc(&block_group->count);
703 list_add_tail(&cluster->block_group_list, &block_group->cluster_list); 1334 list_add_tail(&cluster->block_group_list, &block_group->cluster_list);
704 cluster->block_group = block_group; 1335 cluster->block_group = block_group;
@@ -718,6 +1349,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
718 spin_lock_init(&cluster->refill_lock); 1349 spin_lock_init(&cluster->refill_lock);
719 cluster->root.rb_node = NULL; 1350 cluster->root.rb_node = NULL;
720 cluster->max_size = 0; 1351 cluster->max_size = 0;
1352 cluster->points_to_bitmap = false;
721 INIT_LIST_HEAD(&cluster->block_group_list); 1353 INIT_LIST_HEAD(&cluster->block_group_list);
722 cluster->block_group = NULL; 1354 cluster->block_group = NULL;
723} 1355}
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 266fb8764054..890a8e79011b 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -19,6 +19,14 @@
19#ifndef __BTRFS_FREE_SPACE_CACHE 19#ifndef __BTRFS_FREE_SPACE_CACHE
20#define __BTRFS_FREE_SPACE_CACHE 20#define __BTRFS_FREE_SPACE_CACHE
21 21
22struct btrfs_free_space {
23 struct rb_node offset_index;
24 u64 offset;
25 u64 bytes;
26 unsigned long *bitmap;
27 struct list_head list;
28};
29
22int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 30int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
23 u64 bytenr, u64 size); 31 u64 bytenr, u64 size);
24int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 32int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index dbe1aabf96cd..272b9b2bea86 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -26,7 +26,6 @@
26#include <linux/time.h> 26#include <linux/time.h>
27#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/string.h> 28#include <linux/string.h>
29#include <linux/smp_lock.h>
30#include <linux/backing-dev.h> 29#include <linux/backing-dev.h>
31#include <linux/mpage.h> 30#include <linux/mpage.h>
32#include <linux/swap.h> 31#include <linux/swap.h>
@@ -2604,8 +2603,8 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2604 if (root->ref_cows) 2603 if (root->ref_cows)
2605 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 2604 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
2606 path = btrfs_alloc_path(); 2605 path = btrfs_alloc_path();
2607 path->reada = -1;
2608 BUG_ON(!path); 2606 BUG_ON(!path);
2607 path->reada = -1;
2609 2608
2610 /* FIXME, add redo link to tree so we don't leak on crash */ 2609 /* FIXME, add redo link to tree so we don't leak on crash */
2611 key.objectid = inode->i_ino; 2610 key.objectid = inode->i_ino;
@@ -3580,12 +3579,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
3580 owner = 1; 3579 owner = 1;
3581 BTRFS_I(inode)->block_group = 3580 BTRFS_I(inode)->block_group =
3582 btrfs_find_block_group(root, 0, alloc_hint, owner); 3581 btrfs_find_block_group(root, 0, alloc_hint, owner);
3583 if ((mode & S_IFREG)) {
3584 if (btrfs_test_opt(root, NODATASUM))
3585 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
3586 if (btrfs_test_opt(root, NODATACOW))
3587 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
3588 }
3589 3582
3590 key[0].objectid = objectid; 3583 key[0].objectid = objectid;
3591 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); 3584 btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
@@ -3640,6 +3633,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
3640 3633
3641 btrfs_inherit_iflags(inode, dir); 3634 btrfs_inherit_iflags(inode, dir);
3642 3635
3636 if ((mode & S_IFREG)) {
3637 if (btrfs_test_opt(root, NODATASUM))
3638 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
3639 if (btrfs_test_opt(root, NODATACOW))
3640 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
3641 }
3642
3643 insert_inode_hash(inode); 3643 insert_inode_hash(inode);
3644 inode_tree_add(inode); 3644 inode_tree_add(inode);
3645 return inode; 3645 return inode;
@@ -4785,8 +4785,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4785 * and the replacement file is large. Start IO on it now so 4785 * and the replacement file is large. Start IO on it now so
4786 * we don't add too much work to the end of the transaction 4786 * we don't add too much work to the end of the transaction
4787 */ 4787 */
4788 if (new_inode && old_inode && S_ISREG(old_inode->i_mode) && 4788 if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size &&
4789 new_inode->i_size &&
4790 old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) 4789 old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
4791 filemap_flush(old_inode->i_mapping); 4790 filemap_flush(old_inode->i_mapping);
4792 4791
@@ -5082,6 +5081,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5082 u64 mask = BTRFS_I(inode)->root->sectorsize - 1; 5081 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
5083 struct extent_map *em; 5082 struct extent_map *em;
5084 struct btrfs_trans_handle *trans; 5083 struct btrfs_trans_handle *trans;
5084 struct btrfs_root *root;
5085 int ret; 5085 int ret;
5086 5086
5087 alloc_start = offset & ~mask; 5087 alloc_start = offset & ~mask;
@@ -5100,6 +5100,13 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5100 goto out; 5100 goto out;
5101 } 5101 }
5102 5102
5103 root = BTRFS_I(inode)->root;
5104
5105 ret = btrfs_check_data_free_space(root, inode,
5106 alloc_end - alloc_start);
5107 if (ret)
5108 goto out;
5109
5103 locked_end = alloc_end - 1; 5110 locked_end = alloc_end - 1;
5104 while (1) { 5111 while (1) {
5105 struct btrfs_ordered_extent *ordered; 5112 struct btrfs_ordered_extent *ordered;
@@ -5107,7 +5114,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5107 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); 5114 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
5108 if (!trans) { 5115 if (!trans) {
5109 ret = -EIO; 5116 ret = -EIO;
5110 goto out; 5117 goto out_free;
5111 } 5118 }
5112 5119
5113 /* the extent lock is ordered inside the running 5120 /* the extent lock is ordered inside the running
@@ -5168,6 +5175,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5168 GFP_NOFS); 5175 GFP_NOFS);
5169 5176
5170 btrfs_end_transaction(trans, BTRFS_I(inode)->root); 5177 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
5178out_free:
5179 btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start);
5171out: 5180out:
5172 mutex_unlock(&inode->i_mutex); 5181 mutex_unlock(&inode->i_mutex);
5173 return ret; 5182 return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index eff18f5b5362..bd88f25889f7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -27,7 +27,6 @@
27#include <linux/time.h> 27#include <linux/time.h>
28#include <linux/init.h> 28#include <linux/init.h>
29#include <linux/string.h> 29#include <linux/string.h>
30#include <linux/smp_lock.h>
31#include <linux/backing-dev.h> 30#include <linux/backing-dev.h>
32#include <linux/mount.h> 31#include <linux/mount.h>
33#include <linux/mpage.h> 32#include <linux/mpage.h>
@@ -1028,7 +1027,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1028 struct btrfs_file_extent_item); 1027 struct btrfs_file_extent_item);
1029 comp = btrfs_file_extent_compression(leaf, extent); 1028 comp = btrfs_file_extent_compression(leaf, extent);
1030 type = btrfs_file_extent_type(leaf, extent); 1029 type = btrfs_file_extent_type(leaf, extent);
1031 if (type == BTRFS_FILE_EXTENT_REG) { 1030 if (type == BTRFS_FILE_EXTENT_REG ||
1031 type == BTRFS_FILE_EXTENT_PREALLOC) {
1032 disko = btrfs_file_extent_disk_bytenr(leaf, 1032 disko = btrfs_file_extent_disk_bytenr(leaf,
1033 extent); 1033 extent);
1034 diskl = btrfs_file_extent_disk_num_bytes(leaf, 1034 diskl = btrfs_file_extent_disk_num_bytes(leaf,
@@ -1051,7 +1051,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1051 new_key.objectid = inode->i_ino; 1051 new_key.objectid = inode->i_ino;
1052 new_key.offset = key.offset + destoff - off; 1052 new_key.offset = key.offset + destoff - off;
1053 1053
1054 if (type == BTRFS_FILE_EXTENT_REG) { 1054 if (type == BTRFS_FILE_EXTENT_REG ||
1055 type == BTRFS_FILE_EXTENT_PREALLOC) {
1055 ret = btrfs_insert_empty_item(trans, root, path, 1056 ret = btrfs_insert_empty_item(trans, root, path,
1056 &new_key, size); 1057 &new_key, size);
1057 if (ret) 1058 if (ret)
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 6d6523da0a30..0d126be22b63 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -309,7 +309,7 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
309 } 309 }
310 printk(KERN_INFO "node %llu level %d total ptrs %d free spc %u\n", 310 printk(KERN_INFO "node %llu level %d total ptrs %d free spc %u\n",
311 (unsigned long long)btrfs_header_bytenr(c), 311 (unsigned long long)btrfs_header_bytenr(c),
312 btrfs_header_level(c), nr, 312 level, nr,
313 (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); 313 (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
314 for (i = 0; i < nr; i++) { 314 for (i = 0; i < nr; i++) {
315 btrfs_node_key_to_cpu(c, &key, i); 315 btrfs_node_key_to_cpu(c, &key, i);
@@ -326,10 +326,10 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
326 btrfs_level_size(root, level - 1), 326 btrfs_level_size(root, level - 1),
327 btrfs_node_ptr_generation(c, i)); 327 btrfs_node_ptr_generation(c, i));
328 if (btrfs_is_leaf(next) && 328 if (btrfs_is_leaf(next) &&
329 btrfs_header_level(c) != 1) 329 level != 1)
330 BUG(); 330 BUG();
331 if (btrfs_header_level(next) != 331 if (btrfs_header_level(next) !=
332 btrfs_header_level(c) - 1) 332 level - 1)
333 BUG(); 333 BUG();
334 btrfs_print_tree(root, next); 334 btrfs_print_tree(root, next);
335 free_extent_buffer(next); 335 free_extent_buffer(next);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b23dc209ae10..c04f7f212602 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -670,6 +670,8 @@ again:
670 err = ret; 670 err = ret;
671 goto out; 671 goto out;
672 } 672 }
673 if (ret > 0 && path2->slots[level] > 0)
674 path2->slots[level]--;
673 675
674 eb = path2->nodes[level]; 676 eb = path2->nodes[level];
675 WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) != 677 WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) !=
@@ -1609,6 +1611,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1609 BUG_ON(level == 0); 1611 BUG_ON(level == 0);
1610 path->lowest_level = level; 1612 path->lowest_level = level;
1611 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0); 1613 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
1614 path->lowest_level = 0;
1612 if (ret < 0) { 1615 if (ret < 0) {
1613 btrfs_free_path(path); 1616 btrfs_free_path(path);
1614 return ret; 1617 return ret;
@@ -1788,7 +1791,7 @@ static void merge_func(struct btrfs_work *work)
1788 btrfs_end_transaction(trans, root); 1791 btrfs_end_transaction(trans, root);
1789 } 1792 }
1790 1793
1791 btrfs_drop_dead_root(reloc_root); 1794 btrfs_drop_snapshot(reloc_root, 0);
1792 1795
1793 if (atomic_dec_and_test(async->num_pending)) 1796 if (atomic_dec_and_test(async->num_pending))
1794 complete(async->done); 1797 complete(async->done);
@@ -2075,9 +2078,6 @@ static int do_relocation(struct btrfs_trans_handle *trans,
2075 2078
2076 ret = btrfs_drop_subtree(trans, root, eb, upper->eb); 2079 ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
2077 BUG_ON(ret); 2080 BUG_ON(ret);
2078
2079 btrfs_tree_unlock(eb);
2080 free_extent_buffer(eb);
2081 } 2081 }
2082 if (!lowest) { 2082 if (!lowest) {
2083 btrfs_tree_unlock(upper->eb); 2083 btrfs_tree_unlock(upper->eb);
@@ -2553,8 +2553,13 @@ int relocate_inode_pages(struct inode *inode, u64 start, u64 len)
2553 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; 2553 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2554 2554
2555 /* make sure the dirty trick played by the caller work */ 2555 /* make sure the dirty trick played by the caller work */
2556 ret = invalidate_inode_pages2_range(inode->i_mapping, 2556 while (1) {
2557 first_index, last_index); 2557 ret = invalidate_inode_pages2_range(inode->i_mapping,
2558 first_index, last_index);
2559 if (ret != -EBUSY)
2560 break;
2561 schedule_timeout(HZ/10);
2562 }
2558 if (ret) 2563 if (ret)
2559 goto out_unlock; 2564 goto out_unlock;
2560 2565
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9f179d4832d5..6d6d06cb6dfc 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -26,7 +26,6 @@
26#include <linux/init.h> 26#include <linux/init.h>
27#include <linux/seq_file.h> 27#include <linux/seq_file.h>
28#include <linux/string.h> 28#include <linux/string.h>
29#include <linux/smp_lock.h>
30#include <linux/backing-dev.h> 29#include <linux/backing-dev.h>
31#include <linux/mount.h> 30#include <linux/mount.h>
32#include <linux/mpage.h> 31#include <linux/mpage.h>
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 4e83457ea253..cdbb5022da52 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -40,6 +40,12 @@ static noinline void put_transaction(struct btrfs_transaction *transaction)
40 } 40 }
41} 41}
42 42
43static noinline void switch_commit_root(struct btrfs_root *root)
44{
45 free_extent_buffer(root->commit_root);
46 root->commit_root = btrfs_root_node(root);
47}
48
43/* 49/*
44 * either allocate a new transaction or hop into the existing one 50 * either allocate a new transaction or hop into the existing one
45 */ 51 */
@@ -444,9 +450,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
444 450
445 btrfs_write_dirty_block_groups(trans, root); 451 btrfs_write_dirty_block_groups(trans, root);
446 452
447 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
448 BUG_ON(ret);
449
450 while (1) { 453 while (1) {
451 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 454 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
452 if (old_root_bytenr == root->node->start) 455 if (old_root_bytenr == root->node->start)
@@ -457,13 +460,14 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
457 &root->root_key, 460 &root->root_key,
458 &root->root_item); 461 &root->root_item);
459 BUG_ON(ret); 462 BUG_ON(ret);
460 btrfs_write_dirty_block_groups(trans, root);
461 463
462 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 464 ret = btrfs_write_dirty_block_groups(trans, root);
463 BUG_ON(ret); 465 BUG_ON(ret);
464 } 466 }
465 free_extent_buffer(root->commit_root); 467
466 root->commit_root = btrfs_root_node(root); 468 if (root != root->fs_info->extent_root)
469 switch_commit_root(root);
470
467 return 0; 471 return 0;
468} 472}
469 473
@@ -495,10 +499,12 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
495 root = list_entry(next, struct btrfs_root, dirty_list); 499 root = list_entry(next, struct btrfs_root, dirty_list);
496 500
497 update_cowonly_root(trans, root); 501 update_cowonly_root(trans, root);
498
499 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
500 BUG_ON(ret);
501 } 502 }
503
504 down_write(&fs_info->extent_commit_sem);
505 switch_commit_root(fs_info->extent_root);
506 up_write(&fs_info->extent_commit_sem);
507
502 return 0; 508 return 0;
503} 509}
504 510
@@ -544,8 +550,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
544 btrfs_update_reloc_root(trans, root); 550 btrfs_update_reloc_root(trans, root);
545 551
546 if (root->commit_root != root->node) { 552 if (root->commit_root != root->node) {
547 free_extent_buffer(root->commit_root); 553 switch_commit_root(root);
548 root->commit_root = btrfs_root_node(root);
549 btrfs_set_root_node(&root->root_item, 554 btrfs_set_root_node(&root->root_item,
550 root->node); 555 root->node);
551 } 556 }
@@ -593,6 +598,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
593 return 0; 598 return 0;
594} 599}
595 600
601#if 0
596/* 602/*
597 * when dropping snapshots, we generate a ton of delayed refs, and it makes 603 * when dropping snapshots, we generate a ton of delayed refs, and it makes
598 * sense not to join the transaction while it is trying to flush the current 604 * sense not to join the transaction while it is trying to flush the current
@@ -681,6 +687,7 @@ int btrfs_drop_dead_root(struct btrfs_root *root)
681 btrfs_btree_balance_dirty(tree_root, nr); 687 btrfs_btree_balance_dirty(tree_root, nr);
682 return ret; 688 return ret;
683} 689}
690#endif
684 691
685/* 692/*
686 * new snapshots need to be created at a very specific time in the 693 * new snapshots need to be created at a very specific time in the
@@ -850,6 +857,16 @@ static void update_super_roots(struct btrfs_root *root)
850 super->root_level = root_item->level; 857 super->root_level = root_item->level;
851} 858}
852 859
860int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
861{
862 int ret = 0;
863 spin_lock(&info->new_trans_lock);
864 if (info->running_transaction)
865 ret = info->running_transaction->in_commit;
866 spin_unlock(&info->new_trans_lock);
867 return ret;
868}
869
853int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 870int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
854 struct btrfs_root *root) 871 struct btrfs_root *root)
855{ 872{
@@ -941,9 +958,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
941 958
942 mutex_unlock(&root->fs_info->trans_mutex); 959 mutex_unlock(&root->fs_info->trans_mutex);
943 960
944 if (flush_on_commit || snap_pending) { 961 if (flush_on_commit) {
945 if (flush_on_commit) 962 btrfs_start_delalloc_inodes(root);
946 btrfs_start_delalloc_inodes(root); 963 ret = btrfs_wait_ordered_extents(root, 0);
964 BUG_ON(ret);
965 } else if (snap_pending) {
947 ret = btrfs_wait_ordered_extents(root, 1); 966 ret = btrfs_wait_ordered_extents(root, 1);
948 BUG_ON(ret); 967 BUG_ON(ret);
949 } 968 }
@@ -1007,15 +1026,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1007 1026
1008 btrfs_set_root_node(&root->fs_info->tree_root->root_item, 1027 btrfs_set_root_node(&root->fs_info->tree_root->root_item,
1009 root->fs_info->tree_root->node); 1028 root->fs_info->tree_root->node);
1010 free_extent_buffer(root->fs_info->tree_root->commit_root); 1029 switch_commit_root(root->fs_info->tree_root);
1011 root->fs_info->tree_root->commit_root =
1012 btrfs_root_node(root->fs_info->tree_root);
1013 1030
1014 btrfs_set_root_node(&root->fs_info->chunk_root->root_item, 1031 btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
1015 root->fs_info->chunk_root->node); 1032 root->fs_info->chunk_root->node);
1016 free_extent_buffer(root->fs_info->chunk_root->commit_root); 1033 switch_commit_root(root->fs_info->chunk_root);
1017 root->fs_info->chunk_root->commit_root =
1018 btrfs_root_node(root->fs_info->chunk_root);
1019 1034
1020 update_super_roots(root); 1035 update_super_roots(root);
1021 1036
@@ -1055,6 +1070,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1055 cur_trans->commit_done = 1; 1070 cur_trans->commit_done = 1;
1056 1071
1057 root->fs_info->last_trans_committed = cur_trans->transid; 1072 root->fs_info->last_trans_committed = cur_trans->transid;
1073
1058 wake_up(&cur_trans->commit_wait); 1074 wake_up(&cur_trans->commit_wait);
1059 1075
1060 put_transaction(cur_trans); 1076 put_transaction(cur_trans);
@@ -1081,7 +1097,7 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
1081 while (!list_empty(&list)) { 1097 while (!list_empty(&list)) {
1082 root = list_entry(list.next, struct btrfs_root, root_list); 1098 root = list_entry(list.next, struct btrfs_root, root_list);
1083 list_del_init(&root->root_list); 1099 list_del_init(&root->root_list);
1084 btrfs_drop_dead_root(root); 1100 btrfs_drop_snapshot(root, 0);
1085 } 1101 }
1086 return 0; 1102 return 0;
1087} 1103}
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 961c3ee5a2e1..663c67404918 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -107,4 +107,5 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
107 struct btrfs_root *root); 107 struct btrfs_root *root);
108int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 108int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
109 struct extent_io_tree *dirty_pages); 109 struct extent_io_tree *dirty_pages);
110int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
110#endif 111#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c13922206d1b..d91b0de7c502 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -797,7 +797,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
797 return -ENOENT; 797 return -ENOENT;
798 798
799 inode = read_one_inode(root, key->objectid); 799 inode = read_one_inode(root, key->objectid);
800 BUG_ON(!dir); 800 BUG_ON(!inode);
801 801
802 ref_ptr = btrfs_item_ptr_offset(eb, slot); 802 ref_ptr = btrfs_item_ptr_offset(eb, slot);
803 ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); 803 ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 3ab80e9cd767..5dbefd11b4af 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -721,7 +721,8 @@ error:
721 */ 721 */
722static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, 722static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
723 struct btrfs_device *device, 723 struct btrfs_device *device,
724 u64 num_bytes, u64 *start) 724 u64 num_bytes, u64 *start,
725 u64 *max_avail)
725{ 726{
726 struct btrfs_key key; 727 struct btrfs_key key;
727 struct btrfs_root *root = device->dev_root; 728 struct btrfs_root *root = device->dev_root;
@@ -758,9 +759,13 @@ static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
758 ret = btrfs_search_slot(trans, root, &key, path, 0, 0); 759 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
759 if (ret < 0) 760 if (ret < 0)
760 goto error; 761 goto error;
761 ret = btrfs_previous_item(root, path, 0, key.type); 762 if (ret > 0) {
762 if (ret < 0) 763 ret = btrfs_previous_item(root, path, key.objectid, key.type);
763 goto error; 764 if (ret < 0)
765 goto error;
766 if (ret > 0)
767 start_found = 1;
768 }
764 l = path->nodes[0]; 769 l = path->nodes[0];
765 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 770 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
766 while (1) { 771 while (1) {
@@ -803,6 +808,10 @@ no_more_items:
803 if (last_byte < search_start) 808 if (last_byte < search_start)
804 last_byte = search_start; 809 last_byte = search_start;
805 hole_size = key.offset - last_byte; 810 hole_size = key.offset - last_byte;
811
812 if (hole_size > *max_avail)
813 *max_avail = hole_size;
814
806 if (key.offset > last_byte && 815 if (key.offset > last_byte &&
807 hole_size >= num_bytes) { 816 hole_size >= num_bytes) {
808 *start = last_byte; 817 *start = last_byte;
@@ -1621,6 +1630,7 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
1621 device->fs_devices->total_rw_bytes += diff; 1630 device->fs_devices->total_rw_bytes += diff;
1622 1631
1623 device->total_bytes = new_size; 1632 device->total_bytes = new_size;
1633 device->disk_total_bytes = new_size;
1624 btrfs_clear_space_info_full(device->dev_root->fs_info); 1634 btrfs_clear_space_info_full(device->dev_root->fs_info);
1625 1635
1626 return btrfs_update_device(trans, device); 1636 return btrfs_update_device(trans, device);
@@ -2007,7 +2017,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
2007 goto done; 2017 goto done;
2008 if (ret) { 2018 if (ret) {
2009 ret = 0; 2019 ret = 0;
2010 goto done; 2020 break;
2011 } 2021 }
2012 2022
2013 l = path->nodes[0]; 2023 l = path->nodes[0];
@@ -2015,7 +2025,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
2015 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 2025 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
2016 2026
2017 if (key.objectid != device->devid) 2027 if (key.objectid != device->devid)
2018 goto done; 2028 break;
2019 2029
2020 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); 2030 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
2021 length = btrfs_dev_extent_length(l, dev_extent); 2031 length = btrfs_dev_extent_length(l, dev_extent);
@@ -2171,6 +2181,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2171 max_chunk_size); 2181 max_chunk_size);
2172 2182
2173again: 2183again:
2184 max_avail = 0;
2174 if (!map || map->num_stripes != num_stripes) { 2185 if (!map || map->num_stripes != num_stripes) {
2175 kfree(map); 2186 kfree(map);
2176 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); 2187 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
@@ -2219,7 +2230,8 @@ again:
2219 2230
2220 if (device->in_fs_metadata && avail >= min_free) { 2231 if (device->in_fs_metadata && avail >= min_free) {
2221 ret = find_free_dev_extent(trans, device, 2232 ret = find_free_dev_extent(trans, device,
2222 min_free, &dev_offset); 2233 min_free, &dev_offset,
2234 &max_avail);
2223 if (ret == 0) { 2235 if (ret == 0) {
2224 list_move_tail(&device->dev_alloc_list, 2236 list_move_tail(&device->dev_alloc_list,
2225 &private_devs); 2237 &private_devs);
@@ -2795,26 +2807,6 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
2795 } 2807 }
2796 } 2808 }
2797 2809
2798 for (i = 0; i > nr; i++) {
2799 struct btrfs_multi_bio *multi;
2800 struct btrfs_bio_stripe *stripe;
2801 int ret;
2802
2803 length = 1;
2804 ret = btrfs_map_block(map_tree, WRITE, buf[i],
2805 &length, &multi, 0);
2806 BUG_ON(ret);
2807
2808 stripe = multi->stripes;
2809 for (j = 0; j < multi->num_stripes; j++) {
2810 if (stripe->physical >= physical &&
2811 physical < stripe->physical + length)
2812 break;
2813 }
2814 BUG_ON(j >= multi->num_stripes);
2815 kfree(multi);
2816 }
2817
2818 *logical = buf; 2810 *logical = buf;
2819 *naddrs = nr; 2811 *naddrs = nr;
2820 *stripe_len = map->stripe_len; 2812 *stripe_len = map->stripe_len;
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index ecfbce836d32..3e2b90eaa239 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -208,7 +208,7 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
208 *total_in = 0; 208 *total_in = 0;
209 209
210 workspace = find_zlib_workspace(); 210 workspace = find_zlib_workspace();
211 if (!workspace) 211 if (IS_ERR(workspace))
212 return -1; 212 return -1;
213 213
214 if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { 214 if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
@@ -366,7 +366,7 @@ int btrfs_zlib_decompress_biovec(struct page **pages_in,
366 char *kaddr; 366 char *kaddr;
367 367
368 workspace = find_zlib_workspace(); 368 workspace = find_zlib_workspace();
369 if (!workspace) 369 if (IS_ERR(workspace))
370 return -ENOMEM; 370 return -ENOMEM;
371 371
372 data_in = kmap(pages_in[page_in_index]); 372 data_in = kmap(pages_in[page_in_index]);
@@ -547,7 +547,7 @@ int btrfs_zlib_decompress(unsigned char *data_in,
547 return -ENOMEM; 547 return -ENOMEM;
548 548
549 workspace = find_zlib_workspace(); 549 workspace = find_zlib_workspace();
550 if (!workspace) 550 if (IS_ERR(workspace))
551 return -ENOMEM; 551 return -ENOMEM;
552 552
553 workspace->inf_strm.next_in = data_in; 553 workspace->inf_strm.next_in = data_in;
diff --git a/fs/char_dev.c b/fs/char_dev.c
index b7c9d5187a75..a173551e19d7 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -13,7 +13,6 @@
13#include <linux/major.h> 13#include <linux/major.h>
14#include <linux/errno.h> 14#include <linux/errno.h>
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/smp_lock.h>
17#include <linux/seq_file.h> 16#include <linux/seq_file.h>
18 17
19#include <linux/kobject.h> 18#include <linux/kobject.h>
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index b48689839428..e85b1e4389e0 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,10 @@
1Version 1.60
2-------------
3Fix memory leak in reconnect. Fix oops in DFS mount error path.
4Set s_maxbytes to smaller (the max that vfs can handle) so that
5sendfile will now work over cifs mounts again. Add noforcegid
6and noforceuid mount parameters.
7
1Version 1.59 8Version 1.59
2------------ 9------------
3Client uses server inode numbers (which are persistent) rather than 10Client uses server inode numbers (which are persistent) rather than
@@ -5,7 +12,11 @@ client generated ones by default (mount option "serverino" turned
5on by default if server supports it). Add forceuid and forcegid 12on by default if server supports it). Add forceuid and forcegid
6mount options (so that when negotiating unix extensions specifying 13mount options (so that when negotiating unix extensions specifying
7which uid mounted does not immediately force the server's reported 14which uid mounted does not immediately force the server's reported
8uids to be overridden). 15uids to be overridden). Add support for scope mount parm. Improve
16hard link detection to use same inode for both. Do not set
17read-only dos attribute on directories (for chmod) since Windows
18explorer special cases this attribute bit for directories for
19a different purpose.
9 20
10Version 1.58 21Version 1.58
11------------ 22------------
diff --git a/fs/cifs/README b/fs/cifs/README
index ad92921dbde4..79c1a93400be 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -262,11 +262,11 @@ A partial list of the supported mount options follows:
262 mount. 262 mount.
263 domain Set the SMB/CIFS workgroup name prepended to the 263 domain Set the SMB/CIFS workgroup name prepended to the
264 username during CIFS session establishment 264 username during CIFS session establishment
265 forceuid Set the default uid for inodes based on the uid 265 forceuid Set the default uid for inodes to the uid
266 passed in. For mounts to servers 266 passed in on mount. For mounts to servers
267 which do support the CIFS Unix extensions, such as a 267 which do support the CIFS Unix extensions, such as a
268 properly configured Samba server, the server provides 268 properly configured Samba server, the server provides
269 the uid, gid and mode so this parameter should not be 269 the uid, gid and mode so this parameter should not be
270 specified unless the server and clients uid and gid 270 specified unless the server and clients uid and gid
271 numbering differ. If the server and client are in the 271 numbering differ. If the server and client are in the
272 same domain (e.g. running winbind or nss_ldap) and 272 same domain (e.g. running winbind or nss_ldap) and
@@ -278,11 +278,7 @@ A partial list of the supported mount options follows:
278 of existing files will be the uid (gid) of the person 278 of existing files will be the uid (gid) of the person
279 who executed the mount (root, except when mount.cifs 279 who executed the mount (root, except when mount.cifs
280 is configured setuid for user mounts) unless the "uid=" 280 is configured setuid for user mounts) unless the "uid="
281 (gid) mount option is specified. For the uid (gid) of newly 281 (gid) mount option is specified. Also note that permission
282 created files and directories, ie files created since
283 the last mount of the server share, the expected uid
284 (gid) is cached as long as the inode remains in
285 memory on the client. Also note that permission
286 checks (authorization checks) on accesses to a file occur 282 checks (authorization checks) on accesses to a file occur
287 at the server, but there are cases in which an administrator 283 at the server, but there are cases in which an administrator
288 may want to restrict at the client as well. For those 284 may want to restrict at the client as well. For those
@@ -290,12 +286,15 @@ A partial list of the supported mount options follows:
290 (such as Windows), permissions can also be checked at the 286 (such as Windows), permissions can also be checked at the
291 client, and a crude form of client side permission checking 287 client, and a crude form of client side permission checking
292 can be enabled by specifying file_mode and dir_mode on 288 can be enabled by specifying file_mode and dir_mode on
293 the client. Note that the mount.cifs helper must be 289 the client. (default)
294 at version 1.10 or higher to support specifying the uid 290 forcegid (similar to above but for the groupid instead of uid) (default)
295 (or gid) in non-numeric form. 291 noforceuid Fill in file owner information (uid) by requesting it from
296 forcegid (similar to above but for the groupid instead of uid) 292 the server if possible. With this option, the value given in
293 the uid= option (on mount) will only be used if the server
294 can not support returning uids on inodes.
295 noforcegid (similar to above but for the group owner, gid, instead of uid)
297 uid Set the default uid for inodes, and indicate to the 296 uid Set the default uid for inodes, and indicate to the
298 cifs kernel driver which local user mounted . If the server 297 cifs kernel driver which local user mounted. If the server
299 supports the unix extensions the default uid is 298 supports the unix extensions the default uid is
300 not used to fill in the owner fields of inodes (files) 299 not used to fill in the owner fields of inodes (files)
301 unless the "forceuid" parameter is specified. 300 unless the "forceuid" parameter is specified.
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index 1b09f1670061..20692fbfdb24 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -49,6 +49,7 @@
49#define ASN1_OJI 6 /* Object Identifier */ 49#define ASN1_OJI 6 /* Object Identifier */
50#define ASN1_OJD 7 /* Object Description */ 50#define ASN1_OJD 7 /* Object Description */
51#define ASN1_EXT 8 /* External */ 51#define ASN1_EXT 8 /* External */
52#define ASN1_ENUM 10 /* Enumerated */
52#define ASN1_SEQ 16 /* Sequence */ 53#define ASN1_SEQ 16 /* Sequence */
53#define ASN1_SET 17 /* Set */ 54#define ASN1_SET 17 /* Set */
54#define ASN1_NUMSTR 18 /* Numerical String */ 55#define ASN1_NUMSTR 18 /* Numerical String */
@@ -78,10 +79,12 @@
78#define SPNEGO_OID_LEN 7 79#define SPNEGO_OID_LEN 7
79#define NTLMSSP_OID_LEN 10 80#define NTLMSSP_OID_LEN 10
80#define KRB5_OID_LEN 7 81#define KRB5_OID_LEN 7
82#define KRB5U2U_OID_LEN 8
81#define MSKRB5_OID_LEN 7 83#define MSKRB5_OID_LEN 7
82static unsigned long SPNEGO_OID[7] = { 1, 3, 6, 1, 5, 5, 2 }; 84static unsigned long SPNEGO_OID[7] = { 1, 3, 6, 1, 5, 5, 2 };
83static unsigned long NTLMSSP_OID[10] = { 1, 3, 6, 1, 4, 1, 311, 2, 2, 10 }; 85static unsigned long NTLMSSP_OID[10] = { 1, 3, 6, 1, 4, 1, 311, 2, 2, 10 };
84static unsigned long KRB5_OID[7] = { 1, 2, 840, 113554, 1, 2, 2 }; 86static unsigned long KRB5_OID[7] = { 1, 2, 840, 113554, 1, 2, 2 };
87static unsigned long KRB5U2U_OID[8] = { 1, 2, 840, 113554, 1, 2, 2, 3 };
85static unsigned long MSKRB5_OID[7] = { 1, 2, 840, 48018, 1, 2, 2 }; 88static unsigned long MSKRB5_OID[7] = { 1, 2, 840, 48018, 1, 2, 2 };
86 89
87/* 90/*
@@ -122,6 +125,28 @@ asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch)
122 return 1; 125 return 1;
123} 126}
124 127
128#if 0 /* will be needed later by spnego decoding/encoding of ntlmssp */
129static unsigned char
130asn1_enum_decode(struct asn1_ctx *ctx, __le32 *val)
131{
132 unsigned char ch;
133
134 if (ctx->pointer >= ctx->end) {
135 ctx->error = ASN1_ERR_DEC_EMPTY;
136 return 0;
137 }
138
139 ch = *(ctx->pointer)++; /* ch has 0xa, ptr points to lenght octet */
140 if ((ch) == ASN1_ENUM) /* if ch value is ENUM, 0xa */
141 *val = *(++(ctx->pointer)); /* value has enum value */
142 else
143 return 0;
144
145 ctx->pointer++;
146 return 1;
147}
148#endif
149
125static unsigned char 150static unsigned char
126asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag) 151asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag)
127{ 152{
@@ -476,10 +501,9 @@ decode_negTokenInit(unsigned char *security_blob, int length,
476 unsigned int cls, con, tag, oidlen, rc; 501 unsigned int cls, con, tag, oidlen, rc;
477 bool use_ntlmssp = false; 502 bool use_ntlmssp = false;
478 bool use_kerberos = false; 503 bool use_kerberos = false;
504 bool use_kerberosu2u = false;
479 bool use_mskerberos = false; 505 bool use_mskerberos = false;
480 506
481 *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/
482
483 /* cifs_dump_mem(" Received SecBlob ", security_blob, length); */ 507 /* cifs_dump_mem(" Received SecBlob ", security_blob, length); */
484 508
485 asn1_open(&ctx, security_blob, length); 509 asn1_open(&ctx, security_blob, length);
@@ -515,6 +539,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
515 return 0; 539 return 0;
516 } 540 }
517 541
542 /* SPNEGO */
518 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 543 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
519 cFYI(1, ("Error decoding negTokenInit")); 544 cFYI(1, ("Error decoding negTokenInit"));
520 return 0; 545 return 0;
@@ -526,6 +551,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
526 return 0; 551 return 0;
527 } 552 }
528 553
554 /* negTokenInit */
529 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 555 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
530 cFYI(1, ("Error decoding negTokenInit")); 556 cFYI(1, ("Error decoding negTokenInit"));
531 return 0; 557 return 0;
@@ -537,6 +563,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
537 return 0; 563 return 0;
538 } 564 }
539 565
566 /* sequence */
540 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 567 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
541 cFYI(1, ("Error decoding 2nd part of negTokenInit")); 568 cFYI(1, ("Error decoding 2nd part of negTokenInit"));
542 return 0; 569 return 0;
@@ -548,6 +575,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
548 return 0; 575 return 0;
549 } 576 }
550 577
578 /* sequence of */
551 if (asn1_header_decode 579 if (asn1_header_decode
552 (&ctx, &sequence_end, &cls, &con, &tag) == 0) { 580 (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
553 cFYI(1, ("Error decoding 2nd part of negTokenInit")); 581 cFYI(1, ("Error decoding 2nd part of negTokenInit"));
@@ -560,6 +588,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
560 return 0; 588 return 0;
561 } 589 }
562 590
591 /* list of security mechanisms */
563 while (!asn1_eoc_decode(&ctx, sequence_end)) { 592 while (!asn1_eoc_decode(&ctx, sequence_end)) {
564 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); 593 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
565 if (!rc) { 594 if (!rc) {
@@ -576,11 +605,15 @@ decode_negTokenInit(unsigned char *security_blob, int length,
576 605
577 if (compare_oid(oid, oidlen, MSKRB5_OID, 606 if (compare_oid(oid, oidlen, MSKRB5_OID,
578 MSKRB5_OID_LEN) && 607 MSKRB5_OID_LEN) &&
579 !use_kerberos) 608 !use_mskerberos)
580 use_mskerberos = true; 609 use_mskerberos = true;
610 else if (compare_oid(oid, oidlen, KRB5U2U_OID,
611 KRB5U2U_OID_LEN) &&
612 !use_kerberosu2u)
613 use_kerberosu2u = true;
581 else if (compare_oid(oid, oidlen, KRB5_OID, 614 else if (compare_oid(oid, oidlen, KRB5_OID,
582 KRB5_OID_LEN) && 615 KRB5_OID_LEN) &&
583 !use_mskerberos) 616 !use_kerberos)
584 use_kerberos = true; 617 use_kerberos = true;
585 else if (compare_oid(oid, oidlen, NTLMSSP_OID, 618 else if (compare_oid(oid, oidlen, NTLMSSP_OID,
586 NTLMSSP_OID_LEN)) 619 NTLMSSP_OID_LEN))
@@ -593,7 +626,12 @@ decode_negTokenInit(unsigned char *security_blob, int length,
593 } 626 }
594 } 627 }
595 628
629 /* mechlistMIC */
596 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 630 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
631 /* Check if we have reached the end of the blob, but with
632 no mechListMic (e.g. NTLMSSP instead of KRB5) */
633 if (ctx.error == ASN1_ERR_DEC_EMPTY)
634 goto decode_negtoken_exit;
597 cFYI(1, ("Error decoding last part negTokenInit exit3")); 635 cFYI(1, ("Error decoding last part negTokenInit exit3"));
598 return 0; 636 return 0;
599 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { 637 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
@@ -602,6 +640,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
602 cls, con, tag, end, *end)); 640 cls, con, tag, end, *end));
603 return 0; 641 return 0;
604 } 642 }
643
644 /* sequence */
605 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 645 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
606 cFYI(1, ("Error decoding last part negTokenInit exit5")); 646 cFYI(1, ("Error decoding last part negTokenInit exit5"));
607 return 0; 647 return 0;
@@ -611,6 +651,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
611 cls, con, tag, end, *end)); 651 cls, con, tag, end, *end));
612 } 652 }
613 653
654 /* sequence of */
614 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 655 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
615 cFYI(1, ("Error decoding last part negTokenInit exit 7")); 656 cFYI(1, ("Error decoding last part negTokenInit exit 7"));
616 return 0; 657 return 0;
@@ -619,6 +660,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
619 cls, con, tag, end, *end)); 660 cls, con, tag, end, *end));
620 return 0; 661 return 0;
621 } 662 }
663
664 /* general string */
622 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 665 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
623 cFYI(1, ("Error decoding last part negTokenInit exit9")); 666 cFYI(1, ("Error decoding last part negTokenInit exit9"));
624 return 0; 667 return 0;
@@ -630,13 +673,13 @@ decode_negTokenInit(unsigned char *security_blob, int length,
630 } 673 }
631 cFYI(1, ("Need to call asn1_octets_decode() function for %s", 674 cFYI(1, ("Need to call asn1_octets_decode() function for %s",
632 ctx.pointer)); /* is this UTF-8 or ASCII? */ 675 ctx.pointer)); /* is this UTF-8 or ASCII? */
633 676decode_negtoken_exit:
634 if (use_kerberos) 677 if (use_kerberos)
635 *secType = Kerberos; 678 *secType = Kerberos;
636 else if (use_mskerberos) 679 else if (use_mskerberos)
637 *secType = MSKerberos; 680 *secType = MSKerberos;
638 else if (use_ntlmssp) 681 else if (use_ntlmssp)
639 *secType = NTLMSSP; 682 *secType = RawNTLMSSP;
640 683
641 return 1; 684 return 1;
642} 685}
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 7f19fefd3d45..42cec2a7c0cf 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -261,6 +261,8 @@ static ssize_t cifs_stats_proc_write(struct file *file,
261 atomic_set(&tcon->num_reads, 0); 261 atomic_set(&tcon->num_reads, 0);
262 atomic_set(&tcon->num_oplock_brks, 0); 262 atomic_set(&tcon->num_oplock_brks, 0);
263 atomic_set(&tcon->num_opens, 0); 263 atomic_set(&tcon->num_opens, 0);
264 atomic_set(&tcon->num_posixopens, 0);
265 atomic_set(&tcon->num_posixmkdirs, 0);
264 atomic_set(&tcon->num_closes, 0); 266 atomic_set(&tcon->num_closes, 0);
265 atomic_set(&tcon->num_deletes, 0); 267 atomic_set(&tcon->num_deletes, 0);
266 atomic_set(&tcon->num_mkdirs, 0); 268 atomic_set(&tcon->num_mkdirs, 0);
@@ -347,11 +349,15 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
347 atomic_read(&tcon->num_locks), 349 atomic_read(&tcon->num_locks),
348 atomic_read(&tcon->num_hardlinks), 350 atomic_read(&tcon->num_hardlinks),
349 atomic_read(&tcon->num_symlinks)); 351 atomic_read(&tcon->num_symlinks));
350 seq_printf(m, "\nOpens: %d Closes: %d" 352 seq_printf(m, "\nOpens: %d Closes: %d "
351 "Deletes: %d", 353 "Deletes: %d",
352 atomic_read(&tcon->num_opens), 354 atomic_read(&tcon->num_opens),
353 atomic_read(&tcon->num_closes), 355 atomic_read(&tcon->num_closes),
354 atomic_read(&tcon->num_deletes)); 356 atomic_read(&tcon->num_deletes));
357 seq_printf(m, "\nPosix Opens: %d "
358 "Posix Mkdirs: %d",
359 atomic_read(&tcon->num_posixopens),
360 atomic_read(&tcon->num_posixmkdirs));
355 seq_printf(m, "\nMkdirs: %d Rmdirs: %d", 361 seq_printf(m, "\nMkdirs: %d Rmdirs: %d",
356 atomic_read(&tcon->num_mkdirs), 362 atomic_read(&tcon->num_mkdirs),
357 atomic_read(&tcon->num_rmdirs)); 363 atomic_read(&tcon->num_rmdirs));
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 3bb11be8b6a8..606912d8f2a8 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -55,7 +55,7 @@ void cifs_dfs_release_automount_timer(void)
55 * i.e. strips from UNC trailing path that is not part of share 55 * i.e. strips from UNC trailing path that is not part of share
56 * name and fixup missing '\' in the begining of DFS node refferal 56 * name and fixup missing '\' in the begining of DFS node refferal
57 * if neccessary. 57 * if neccessary.
58 * Returns pointer to share name on success or NULL on error. 58 * Returns pointer to share name on success or ERR_PTR on error.
59 * Caller is responsible for freeing returned string. 59 * Caller is responsible for freeing returned string.
60 */ 60 */
61static char *cifs_get_share_name(const char *node_name) 61static char *cifs_get_share_name(const char *node_name)
@@ -68,7 +68,7 @@ static char *cifs_get_share_name(const char *node_name)
68 UNC = kmalloc(len+2 /*for term null and additional \ if it's missed */, 68 UNC = kmalloc(len+2 /*for term null and additional \ if it's missed */,
69 GFP_KERNEL); 69 GFP_KERNEL);
70 if (!UNC) 70 if (!UNC)
71 return NULL; 71 return ERR_PTR(-ENOMEM);
72 72
73 /* get share name and server name */ 73 /* get share name and server name */
74 if (node_name[1] != '\\') { 74 if (node_name[1] != '\\') {
@@ -87,7 +87,7 @@ static char *cifs_get_share_name(const char *node_name)
87 cERROR(1, ("%s: no server name end in node name: %s", 87 cERROR(1, ("%s: no server name end in node name: %s",
88 __func__, node_name)); 88 __func__, node_name));
89 kfree(UNC); 89 kfree(UNC);
90 return NULL; 90 return ERR_PTR(-EINVAL);
91 } 91 }
92 92
93 /* find sharename end */ 93 /* find sharename end */
@@ -133,6 +133,12 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
133 return ERR_PTR(-EINVAL); 133 return ERR_PTR(-EINVAL);
134 134
135 *devname = cifs_get_share_name(ref->node_name); 135 *devname = cifs_get_share_name(ref->node_name);
136 if (IS_ERR(*devname)) {
137 rc = PTR_ERR(*devname);
138 *devname = NULL;
139 goto compose_mount_options_err;
140 }
141
136 rc = dns_resolve_server_name_to_ip(*devname, &srvIP); 142 rc = dns_resolve_server_name_to_ip(*devname, &srvIP);
137 if (rc != 0) { 143 if (rc != 0) {
138 cERROR(1, ("%s: Failed to resolve server part of %s to IP: %d", 144 cERROR(1, ("%s: Failed to resolve server part of %s to IP: %d",
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 4a4581cb2b5e..051caecf7d67 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -86,6 +86,9 @@ struct key_type cifs_spnego_key_type = {
86/* strlen of ";user=" */ 86/* strlen of ";user=" */
87#define USER_KEY_LEN 6 87#define USER_KEY_LEN 6
88 88
89/* strlen of ";pid=0x" */
90#define PID_KEY_LEN 7
91
89/* get a key struct with a SPNEGO security blob, suitable for session setup */ 92/* get a key struct with a SPNEGO security blob, suitable for session setup */
90struct key * 93struct key *
91cifs_get_spnego_key(struct cifsSesInfo *sesInfo) 94cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
@@ -103,7 +106,8 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
103 IP_KEY_LEN + INET6_ADDRSTRLEN + 106 IP_KEY_LEN + INET6_ADDRSTRLEN +
104 MAX_MECH_STR_LEN + 107 MAX_MECH_STR_LEN +
105 UID_KEY_LEN + (sizeof(uid_t) * 2) + 108 UID_KEY_LEN + (sizeof(uid_t) * 2) +
106 USER_KEY_LEN + strlen(sesInfo->userName) + 1; 109 USER_KEY_LEN + strlen(sesInfo->userName) +
110 PID_KEY_LEN + (sizeof(pid_t) * 2) + 1;
107 111
108 spnego_key = ERR_PTR(-ENOMEM); 112 spnego_key = ERR_PTR(-ENOMEM);
109 description = kzalloc(desc_len, GFP_KERNEL); 113 description = kzalloc(desc_len, GFP_KERNEL);
@@ -141,6 +145,9 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
141 dp = description + strlen(description); 145 dp = description + strlen(description);
142 sprintf(dp, ";user=%s", sesInfo->userName); 146 sprintf(dp, ";user=%s", sesInfo->userName);
143 147
148 dp = description + strlen(description);
149 sprintf(dp, ";pid=0x%x", current->pid);
150
144 cFYI(1, ("key description = %s", description)); 151 cFYI(1, ("key description = %s", description));
145 spnego_key = request_key(&cifs_spnego_key_type, description, ""); 152 spnego_key = request_key(&cifs_spnego_key_type, description, "");
146 153
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 60e3c4253de0..714a542cbafc 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -44,7 +44,7 @@ cifs_ucs2_bytes(const __le16 *from, int maxbytes,
44 int maxwords = maxbytes / 2; 44 int maxwords = maxbytes / 2;
45 char tmp[NLS_MAX_CHARSET_SIZE]; 45 char tmp[NLS_MAX_CHARSET_SIZE];
46 46
47 for (i = 0; from[i] && i < maxwords; i++) { 47 for (i = 0; i < maxwords && from[i]; i++) {
48 charlen = codepage->uni2char(le16_to_cpu(from[i]), tmp, 48 charlen = codepage->uni2char(le16_to_cpu(from[i]), tmp,
49 NLS_MAX_CHARSET_SIZE); 49 NLS_MAX_CHARSET_SIZE);
50 if (charlen > 0) 50 if (charlen > 0)
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 1403b5d86a73..6941c22398a6 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -327,7 +327,7 @@ static void dump_ace(struct cifs_ace *pace, char *end_of_acl)
327 327
328static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, 328static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
329 struct cifs_sid *pownersid, struct cifs_sid *pgrpsid, 329 struct cifs_sid *pownersid, struct cifs_sid *pgrpsid,
330 struct inode *inode) 330 struct cifs_fattr *fattr)
331{ 331{
332 int i; 332 int i;
333 int num_aces = 0; 333 int num_aces = 0;
@@ -340,7 +340,7 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
340 if (!pdacl) { 340 if (!pdacl) {
341 /* no DACL in the security descriptor, set 341 /* no DACL in the security descriptor, set
342 all the permissions for user/group/other */ 342 all the permissions for user/group/other */
343 inode->i_mode |= S_IRWXUGO; 343 fattr->cf_mode |= S_IRWXUGO;
344 return; 344 return;
345 } 345 }
346 346
@@ -357,7 +357,7 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
357 /* reset rwx permissions for user/group/other. 357 /* reset rwx permissions for user/group/other.
358 Also, if num_aces is 0 i.e. DACL has no ACEs, 358 Also, if num_aces is 0 i.e. DACL has no ACEs,
359 user/group/other have no permissions */ 359 user/group/other have no permissions */
360 inode->i_mode &= ~(S_IRWXUGO); 360 fattr->cf_mode &= ~(S_IRWXUGO);
361 361
362 acl_base = (char *)pdacl; 362 acl_base = (char *)pdacl;
363 acl_size = sizeof(struct cifs_acl); 363 acl_size = sizeof(struct cifs_acl);
@@ -379,17 +379,17 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
379 if (compare_sids(&(ppace[i]->sid), pownersid)) 379 if (compare_sids(&(ppace[i]->sid), pownersid))
380 access_flags_to_mode(ppace[i]->access_req, 380 access_flags_to_mode(ppace[i]->access_req,
381 ppace[i]->type, 381 ppace[i]->type,
382 &(inode->i_mode), 382 &fattr->cf_mode,
383 &user_mask); 383 &user_mask);
384 if (compare_sids(&(ppace[i]->sid), pgrpsid)) 384 if (compare_sids(&(ppace[i]->sid), pgrpsid))
385 access_flags_to_mode(ppace[i]->access_req, 385 access_flags_to_mode(ppace[i]->access_req,
386 ppace[i]->type, 386 ppace[i]->type,
387 &(inode->i_mode), 387 &fattr->cf_mode,
388 &group_mask); 388 &group_mask);
389 if (compare_sids(&(ppace[i]->sid), &sid_everyone)) 389 if (compare_sids(&(ppace[i]->sid), &sid_everyone))
390 access_flags_to_mode(ppace[i]->access_req, 390 access_flags_to_mode(ppace[i]->access_req,
391 ppace[i]->type, 391 ppace[i]->type,
392 &(inode->i_mode), 392 &fattr->cf_mode,
393 &other_mask); 393 &other_mask);
394 394
395/* memcpy((void *)(&(cifscred->aces[i])), 395/* memcpy((void *)(&(cifscred->aces[i])),
@@ -464,7 +464,7 @@ static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
464 464
465/* Convert CIFS ACL to POSIX form */ 465/* Convert CIFS ACL to POSIX form */
466static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len, 466static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
467 struct inode *inode) 467 struct cifs_fattr *fattr)
468{ 468{
469 int rc; 469 int rc;
470 struct cifs_sid *owner_sid_ptr, *group_sid_ptr; 470 struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
@@ -472,7 +472,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
472 char *end_of_acl = ((char *)pntsd) + acl_len; 472 char *end_of_acl = ((char *)pntsd) + acl_len;
473 __u32 dacloffset; 473 __u32 dacloffset;
474 474
475 if ((inode == NULL) || (pntsd == NULL)) 475 if (pntsd == NULL)
476 return -EIO; 476 return -EIO;
477 477
478 owner_sid_ptr = (struct cifs_sid *)((char *)pntsd + 478 owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
@@ -497,7 +497,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
497 497
498 if (dacloffset) 498 if (dacloffset)
499 parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr, 499 parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr,
500 group_sid_ptr, inode); 500 group_sid_ptr, fattr);
501 else 501 else
502 cFYI(1, ("no ACL")); /* BB grant all or default perms? */ 502 cFYI(1, ("no ACL")); /* BB grant all or default perms? */
503 503
@@ -508,7 +508,6 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
508 memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr, 508 memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr,
509 sizeof(struct cifs_sid)); */ 509 sizeof(struct cifs_sid)); */
510 510
511
512 return 0; 511 return 0;
513} 512}
514 513
@@ -671,8 +670,9 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
671} 670}
672 671
673/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ 672/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */
674void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode, 673void
675 const char *path, const __u16 *pfid) 674cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
675 struct inode *inode, const char *path, const __u16 *pfid)
676{ 676{
677 struct cifs_ntsd *pntsd = NULL; 677 struct cifs_ntsd *pntsd = NULL;
678 u32 acllen = 0; 678 u32 acllen = 0;
@@ -687,7 +687,7 @@ void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode,
687 687
688 /* if we can retrieve the ACL, now parse Access Control Entries, ACEs */ 688 /* if we can retrieve the ACL, now parse Access Control Entries, ACEs */
689 if (pntsd) 689 if (pntsd)
690 rc = parse_sec_desc(pntsd, acllen, inode); 690 rc = parse_sec_desc(pntsd, acllen, fattr);
691 if (rc) 691 if (rc)
692 cFYI(1, ("parse sec desc failed rc = %d", rc)); 692 cFYI(1, ("parse sec desc failed rc = %d", rc));
693 693
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 0d92114195ab..84b75253b05a 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -308,7 +308,6 @@ cifs_alloc_inode(struct super_block *sb)
308 if (!cifs_inode) 308 if (!cifs_inode)
309 return NULL; 309 return NULL;
310 cifs_inode->cifsAttrs = 0x20; /* default */ 310 cifs_inode->cifsAttrs = 0x20; /* default */
311 atomic_set(&cifs_inode->inUse, 0);
312 cifs_inode->time = 0; 311 cifs_inode->time = 0;
313 cifs_inode->write_behind_rc = 0; 312 cifs_inode->write_behind_rc = 0;
314 /* Until the file is open and we have gotten oplock 313 /* Until the file is open and we have gotten oplock
@@ -333,6 +332,27 @@ cifs_destroy_inode(struct inode *inode)
333 kmem_cache_free(cifs_inode_cachep, CIFS_I(inode)); 332 kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
334} 333}
335 334
335static void
336cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
337{
338 seq_printf(s, ",addr=");
339
340 switch (server->addr.sockAddr.sin_family) {
341 case AF_INET:
342 seq_printf(s, "%pI4", &server->addr.sockAddr.sin_addr.s_addr);
343 break;
344 case AF_INET6:
345 seq_printf(s, "%pI6",
346 &server->addr.sockAddr6.sin6_addr.s6_addr);
347 if (server->addr.sockAddr6.sin6_scope_id)
348 seq_printf(s, "%%%u",
349 server->addr.sockAddr6.sin6_scope_id);
350 break;
351 default:
352 seq_printf(s, "(unknown)");
353 }
354}
355
336/* 356/*
337 * cifs_show_options() is for displaying mount options in /proc/mounts. 357 * cifs_show_options() is for displaying mount options in /proc/mounts.
338 * Not all settable options are displayed but most of the important 358 * Not all settable options are displayed but most of the important
@@ -343,83 +363,68 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
343{ 363{
344 struct cifs_sb_info *cifs_sb; 364 struct cifs_sb_info *cifs_sb;
345 struct cifsTconInfo *tcon; 365 struct cifsTconInfo *tcon;
346 struct TCP_Server_Info *server;
347 366
348 cifs_sb = CIFS_SB(m->mnt_sb); 367 cifs_sb = CIFS_SB(m->mnt_sb);
368 tcon = cifs_sb->tcon;
349 369
350 if (cifs_sb) { 370 seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName);
351 tcon = cifs_sb->tcon; 371 if (tcon->ses->userName)
352 if (tcon) { 372 seq_printf(s, ",username=%s", tcon->ses->userName);
353 seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName); 373 if (tcon->ses->domainName)
354 if (tcon->ses) { 374 seq_printf(s, ",domain=%s", tcon->ses->domainName);
355 if (tcon->ses->userName) 375
356 seq_printf(s, ",username=%s", 376 seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
357 tcon->ses->userName); 377 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
358 if (tcon->ses->domainName) 378 seq_printf(s, ",forceuid");
359 seq_printf(s, ",domain=%s", 379 else
360 tcon->ses->domainName); 380 seq_printf(s, ",noforceuid");
361 server = tcon->ses->server; 381
362 if (server) { 382 seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
363 seq_printf(s, ",addr="); 383 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
364 switch (server->addr.sockAddr6. 384 seq_printf(s, ",forcegid");
365 sin6_family) { 385 else
366 case AF_INET6: 386 seq_printf(s, ",noforcegid");
367 seq_printf(s, "%pI6", 387
368 &server->addr.sockAddr6.sin6_addr); 388 cifs_show_address(s, tcon->ses->server);
369 break; 389
370 case AF_INET: 390 if (!tcon->unix_ext)
371 seq_printf(s, "%pI4", 391 seq_printf(s, ",file_mode=0%o,dir_mode=0%o",
372 &server->addr.sockAddr.sin_addr.s_addr);
373 break;
374 }
375 }
376 }
377 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) ||
378 !(tcon->unix_ext))
379 seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
380 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) ||
381 !(tcon->unix_ext))
382 seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
383 if (!tcon->unix_ext) {
384 seq_printf(s, ",file_mode=0%o,dir_mode=0%o",
385 cifs_sb->mnt_file_mode, 392 cifs_sb->mnt_file_mode,
386 cifs_sb->mnt_dir_mode); 393 cifs_sb->mnt_dir_mode);
387 } 394 if (tcon->seal)
388 if (tcon->seal) 395 seq_printf(s, ",seal");
389 seq_printf(s, ",seal"); 396 if (tcon->nocase)
390 if (tcon->nocase) 397 seq_printf(s, ",nocase");
391 seq_printf(s, ",nocase"); 398 if (tcon->retry)
392 if (tcon->retry) 399 seq_printf(s, ",hard");
393 seq_printf(s, ",hard"); 400 if (cifs_sb->prepath)
394 } 401 seq_printf(s, ",prepath=%s", cifs_sb->prepath);
395 if (cifs_sb->prepath) 402 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
396 seq_printf(s, ",prepath=%s", cifs_sb->prepath); 403 seq_printf(s, ",posixpaths");
397 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) 404 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
398 seq_printf(s, ",posixpaths"); 405 seq_printf(s, ",setuids");
399 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) 406 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
400 seq_printf(s, ",setuids"); 407 seq_printf(s, ",serverino");
401 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) 408 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO)
402 seq_printf(s, ",serverino"); 409 seq_printf(s, ",directio");
403 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) 410 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
404 seq_printf(s, ",directio"); 411 seq_printf(s, ",nouser_xattr");
405 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) 412 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
406 seq_printf(s, ",nouser_xattr"); 413 seq_printf(s, ",mapchars");
407 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) 414 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
408 seq_printf(s, ",mapchars"); 415 seq_printf(s, ",sfu");
409 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) 416 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
410 seq_printf(s, ",sfu"); 417 seq_printf(s, ",nobrl");
411 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) 418 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
412 seq_printf(s, ",nobrl"); 419 seq_printf(s, ",cifsacl");
413 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) 420 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
414 seq_printf(s, ",cifsacl"); 421 seq_printf(s, ",dynperm");
415 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) 422 if (m->mnt_sb->s_flags & MS_POSIXACL)
416 seq_printf(s, ",dynperm"); 423 seq_printf(s, ",acl");
417 if (m->mnt_sb->s_flags & MS_POSIXACL) 424
418 seq_printf(s, ",acl"); 425 seq_printf(s, ",rsize=%d", cifs_sb->rsize);
419 426 seq_printf(s, ",wsize=%d", cifs_sb->wsize);
420 seq_printf(s, ",rsize=%d", cifs_sb->rsize); 427
421 seq_printf(s, ",wsize=%d", cifs_sb->wsize);
422 }
423 return 0; 428 return 0;
424} 429}
425 430
@@ -535,9 +540,14 @@ static void cifs_umount_begin(struct super_block *sb)
535 if (tcon == NULL) 540 if (tcon == NULL)
536 return; 541 return;
537 542
538 lock_kernel();
539 read_lock(&cifs_tcp_ses_lock); 543 read_lock(&cifs_tcp_ses_lock);
540 if (tcon->tc_count == 1) 544 if ((tcon->tc_count > 1) || (tcon->tidStatus == CifsExiting)) {
545 /* we have other mounts to same share or we have
546 already tried to force umount this and woken up
547 all waiting network requests, nothing to do */
548 read_unlock(&cifs_tcp_ses_lock);
549 return;
550 } else if (tcon->tc_count == 1)
541 tcon->tidStatus = CifsExiting; 551 tcon->tidStatus = CifsExiting;
542 read_unlock(&cifs_tcp_ses_lock); 552 read_unlock(&cifs_tcp_ses_lock);
543 553
@@ -552,9 +562,7 @@ static void cifs_umount_begin(struct super_block *sb)
552 wake_up_all(&tcon->ses->server->response_q); 562 wake_up_all(&tcon->ses->server->response_q);
553 msleep(1); 563 msleep(1);
554 } 564 }
555/* BB FIXME - finish add checks for tidStatus BB */
556 565
557 unlock_kernel();
558 return; 566 return;
559} 567}
560 568
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 9570a0e8023f..6c170948300d 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -24,6 +24,19 @@
24 24
25#define ROOT_I 2 25#define ROOT_I 2
26 26
27/*
28 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
29 * so that it will fit.
30 */
31static inline ino_t
32cifs_uniqueid_to_ino_t(u64 fileid)
33{
34 ino_t ino = (ino_t) fileid;
35 if (sizeof(ino_t) < sizeof(u64))
36 ino ^= fileid >> (sizeof(u64)-sizeof(ino_t)) * 8;
37 return ino;
38}
39
27extern struct file_system_type cifs_fs_type; 40extern struct file_system_type cifs_fs_type;
28extern const struct address_space_operations cifs_addr_ops; 41extern const struct address_space_operations cifs_addr_ops;
29extern const struct address_space_operations cifs_addr_ops_smallbuf; 42extern const struct address_space_operations cifs_addr_ops_smallbuf;
@@ -100,5 +113,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
100extern const struct export_operations cifs_export_ops; 113extern const struct export_operations cifs_export_ops;
101#endif /* EXPERIMENTAL */ 114#endif /* EXPERIMENTAL */
102 115
103#define CIFS_VERSION "1.59" 116#define CIFS_VERSION "1.60"
104#endif /* _CIFSFS_H */ 117#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a61ab772c6f6..6084d6379c03 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -83,7 +83,7 @@ enum securityEnum {
83 NTLM, /* Legacy NTLM012 auth with NTLM hash */ 83 NTLM, /* Legacy NTLM012 auth with NTLM hash */
84 NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ 84 NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */
85 RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */ 85 RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */
86 NTLMSSP, /* NTLMSSP via SPNEGO, NTLMv2 hash */ 86/* NTLMSSP, */ /* can use rawNTLMSSP instead of NTLMSSP via SPNEGO */
87 Kerberos, /* Kerberos via SPNEGO */ 87 Kerberos, /* Kerberos via SPNEGO */
88 MSKerberos, /* MS Kerberos via SPNEGO */ 88 MSKerberos, /* MS Kerberos via SPNEGO */
89}; 89};
@@ -260,6 +260,8 @@ struct cifsTconInfo {
260 atomic_t num_closes; 260 atomic_t num_closes;
261 atomic_t num_deletes; 261 atomic_t num_deletes;
262 atomic_t num_mkdirs; 262 atomic_t num_mkdirs;
263 atomic_t num_posixopens;
264 atomic_t num_posixmkdirs;
263 atomic_t num_rmdirs; 265 atomic_t num_rmdirs;
264 atomic_t num_renames; 266 atomic_t num_renames;
265 atomic_t num_t2renames; 267 atomic_t num_t2renames;
@@ -364,13 +366,13 @@ struct cifsInodeInfo {
364 struct list_head openFileList; 366 struct list_head openFileList;
365 int write_behind_rc; 367 int write_behind_rc;
366 __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ 368 __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
367 atomic_t inUse; /* num concurrent users (local openers cifs) of file*/
368 unsigned long time; /* jiffies of last update/check of inode */ 369 unsigned long time; /* jiffies of last update/check of inode */
369 bool clientCanCacheRead:1; /* read oplock */ 370 bool clientCanCacheRead:1; /* read oplock */
370 bool clientCanCacheAll:1; /* read and writebehind oplock */ 371 bool clientCanCacheAll:1; /* read and writebehind oplock */
371 bool oplockPending:1; 372 bool oplockPending:1;
372 bool delete_pending:1; /* DELETE_ON_CLOSE is set */ 373 bool delete_pending:1; /* DELETE_ON_CLOSE is set */
373 u64 server_eof; /* current file size on server */ 374 u64 server_eof; /* current file size on server */
375 u64 uniqueid; /* server inode number */
374 struct inode vfs_inode; 376 struct inode vfs_inode;
375}; 377};
376 378
@@ -472,6 +474,32 @@ struct dfs_info3_param {
472 char *node_name; 474 char *node_name;
473}; 475};
474 476
477/*
478 * common struct for holding inode info when searching for or updating an
479 * inode with new info
480 */
481
482#define CIFS_FATTR_DFS_REFERRAL 0x1
483#define CIFS_FATTR_DELETE_PENDING 0x2
484#define CIFS_FATTR_NEED_REVAL 0x4
485
486struct cifs_fattr {
487 u32 cf_flags;
488 u32 cf_cifsattrs;
489 u64 cf_uniqueid;
490 u64 cf_eof;
491 u64 cf_bytes;
492 uid_t cf_uid;
493 gid_t cf_gid;
494 umode_t cf_mode;
495 dev_t cf_rdev;
496 unsigned int cf_nlink;
497 unsigned int cf_dtype;
498 struct timespec cf_atime;
499 struct timespec cf_mtime;
500 struct timespec cf_ctime;
501};
502
475static inline void free_dfs_info_param(struct dfs_info3_param *param) 503static inline void free_dfs_info_param(struct dfs_info3_param *param)
476{ 504{
477 if (param) { 505 if (param) {
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index a785f69dbc9f..2d07f890a842 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -2328,19 +2328,7 @@ struct file_attrib_tag {
2328typedef struct { 2328typedef struct {
2329 __le32 NextEntryOffset; 2329 __le32 NextEntryOffset;
2330 __u32 ResumeKey; /* as with FileIndex - no need to convert */ 2330 __u32 ResumeKey; /* as with FileIndex - no need to convert */
2331 __le64 EndOfFile; 2331 FILE_UNIX_BASIC_INFO basic;
2332 __le64 NumOfBytes;
2333 __le64 LastStatusChange; /*SNIA specs DCE time for the 3 time fields */
2334 __le64 LastAccessTime;
2335 __le64 LastModificationTime;
2336 __le64 Uid;
2337 __le64 Gid;
2338 __le32 Type;
2339 __le64 DevMajor;
2340 __le64 DevMinor;
2341 __le64 UniqueId;
2342 __le64 Permissions;
2343 __le64 Nlinks;
2344 char FileName[1]; 2332 char FileName[1];
2345} __attribute__((packed)) FILE_UNIX_INFO; /* level 0x202 */ 2333} __attribute__((packed)) FILE_UNIX_INFO; /* level 0x202 */
2346 2334
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index f9452329bcce..da8fbf565991 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -74,7 +74,7 @@ extern unsigned int smbCalcSize(struct smb_hdr *ptr);
74extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); 74extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
75extern int decode_negTokenInit(unsigned char *security_blob, int length, 75extern int decode_negTokenInit(unsigned char *security_blob, int length,
76 enum securityEnum *secType); 76 enum securityEnum *secType);
77extern int cifs_inet_pton(const int, const char *source, void *dst); 77extern int cifs_convert_address(char *src, void *dst);
78extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); 78extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
79extern void header_assemble(struct smb_hdr *, char /* command */ , 79extern void header_assemble(struct smb_hdr *, char /* command */ ,
80 const struct cifsTconInfo *, int /* length of 80 const struct cifsTconInfo *, int /* length of
@@ -98,9 +98,13 @@ extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
98extern int cifs_posix_open(char *full_path, struct inode **pinode, 98extern int cifs_posix_open(char *full_path, struct inode **pinode,
99 struct super_block *sb, int mode, int oflags, 99 struct super_block *sb, int mode, int oflags,
100 int *poplock, __u16 *pnetfid, int xid); 100 int *poplock, __u16 *pnetfid, int xid);
101extern void posix_fill_in_inode(struct inode *tmp_inode, 101extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr,
102 FILE_UNIX_BASIC_INFO *pData, int isNewInode); 102 FILE_UNIX_BASIC_INFO *info,
103extern struct inode *cifs_new_inode(struct super_block *sb, __u64 *inum); 103 struct cifs_sb_info *cifs_sb);
104extern void cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr);
105extern struct inode *cifs_iget(struct super_block *sb,
106 struct cifs_fattr *fattr);
107
104extern int cifs_get_inode_info(struct inode **pinode, 108extern int cifs_get_inode_info(struct inode **pinode,
105 const unsigned char *search_path, 109 const unsigned char *search_path,
106 FILE_ALL_INFO *pfile_info, 110 FILE_ALL_INFO *pfile_info,
@@ -108,8 +112,9 @@ extern int cifs_get_inode_info(struct inode **pinode,
108extern int cifs_get_inode_info_unix(struct inode **pinode, 112extern int cifs_get_inode_info_unix(struct inode **pinode,
109 const unsigned char *search_path, 113 const unsigned char *search_path,
110 struct super_block *sb, int xid); 114 struct super_block *sb, int xid);
111extern void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode, 115extern void cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
112 const char *path, const __u16 *pfid); 116 struct cifs_fattr *fattr, struct inode *inode,
117 const char *path, const __u16 *pfid);
113extern int mode_to_acl(struct inode *inode, const char *path, __u64); 118extern int mode_to_acl(struct inode *inode, const char *path, __u64);
114 119
115extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, 120extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *,
@@ -215,7 +220,11 @@ struct cifs_unix_set_info_args {
215 dev_t device; 220 dev_t device;
216}; 221};
217 222
218extern int CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *pTcon, 223extern int CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
224 const struct cifs_unix_set_info_args *args,
225 u16 fid, u32 pid_of_opener);
226
227extern int CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *pTcon,
219 char *fileName, 228 char *fileName,
220 const struct cifs_unix_set_info_args *args, 229 const struct cifs_unix_set_info_args *args,
221 const struct nls_table *nls_codepage, 230 const struct nls_table *nls_codepage,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index b84c61d5bca4..1866bc2927d4 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -594,7 +594,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
594 else if (secFlags & CIFSSEC_MAY_KRB5) 594 else if (secFlags & CIFSSEC_MAY_KRB5)
595 server->secType = Kerberos; 595 server->secType = Kerberos;
596 else if (secFlags & CIFSSEC_MAY_NTLMSSP) 596 else if (secFlags & CIFSSEC_MAY_NTLMSSP)
597 server->secType = NTLMSSP; 597 server->secType = RawNTLMSSP;
598 else if (secFlags & CIFSSEC_MAY_LANMAN) 598 else if (secFlags & CIFSSEC_MAY_LANMAN)
599 server->secType = LANMAN; 599 server->secType = LANMAN;
600/* #ifdef CONFIG_CIFS_EXPERIMENTAL 600/* #ifdef CONFIG_CIFS_EXPERIMENTAL
@@ -729,7 +729,7 @@ CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon)
729 * the tcon is no longer on the list, so no need to take lock before 729 * the tcon is no longer on the list, so no need to take lock before
730 * checking this. 730 * checking this.
731 */ 731 */
732 if (tcon->need_reconnect) 732 if ((tcon->need_reconnect) || (tcon->ses->need_reconnect))
733 return 0; 733 return 0;
734 734
735 rc = small_smb_init(SMB_COM_TREE_DISCONNECT, 0, tcon, 735 rc = small_smb_init(SMB_COM_TREE_DISCONNECT, 0, tcon,
@@ -1113,7 +1113,10 @@ PsxCreat:
1113psx_create_err: 1113psx_create_err:
1114 cifs_buf_release(pSMB); 1114 cifs_buf_release(pSMB);
1115 1115
1116 cifs_stats_inc(&tcon->num_mkdirs); 1116 if (posix_flags & SMB_O_DIRECTORY)
1117 cifs_stats_inc(&tcon->num_posixmkdirs);
1118 else
1119 cifs_stats_inc(&tcon->num_posixopens);
1117 1120
1118 if (rc == -EAGAIN) 1121 if (rc == -EAGAIN)
1119 goto PsxCreat; 1122 goto PsxCreat;
@@ -5074,10 +5077,114 @@ SetAttrLgcyRetry:
5074} 5077}
5075#endif /* temporarily unneeded SetAttr legacy function */ 5078#endif /* temporarily unneeded SetAttr legacy function */
5076 5079
5080static void
5081cifs_fill_unix_set_info(FILE_UNIX_BASIC_INFO *data_offset,
5082 const struct cifs_unix_set_info_args *args)
5083{
5084 u64 mode = args->mode;
5085
5086 /*
5087 * Samba server ignores set of file size to zero due to bugs in some
5088 * older clients, but we should be precise - we use SetFileSize to
5089 * set file size and do not want to truncate file size to zero
5090 * accidently as happened on one Samba server beta by putting
5091 * zero instead of -1 here
5092 */
5093 data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64);
5094 data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64);
5095 data_offset->LastStatusChange = cpu_to_le64(args->ctime);
5096 data_offset->LastAccessTime = cpu_to_le64(args->atime);
5097 data_offset->LastModificationTime = cpu_to_le64(args->mtime);
5098 data_offset->Uid = cpu_to_le64(args->uid);
5099 data_offset->Gid = cpu_to_le64(args->gid);
5100 /* better to leave device as zero when it is */
5101 data_offset->DevMajor = cpu_to_le64(MAJOR(args->device));
5102 data_offset->DevMinor = cpu_to_le64(MINOR(args->device));
5103 data_offset->Permissions = cpu_to_le64(mode);
5104
5105 if (S_ISREG(mode))
5106 data_offset->Type = cpu_to_le32(UNIX_FILE);
5107 else if (S_ISDIR(mode))
5108 data_offset->Type = cpu_to_le32(UNIX_DIR);
5109 else if (S_ISLNK(mode))
5110 data_offset->Type = cpu_to_le32(UNIX_SYMLINK);
5111 else if (S_ISCHR(mode))
5112 data_offset->Type = cpu_to_le32(UNIX_CHARDEV);
5113 else if (S_ISBLK(mode))
5114 data_offset->Type = cpu_to_le32(UNIX_BLOCKDEV);
5115 else if (S_ISFIFO(mode))
5116 data_offset->Type = cpu_to_le32(UNIX_FIFO);
5117 else if (S_ISSOCK(mode))
5118 data_offset->Type = cpu_to_le32(UNIX_SOCKET);
5119}
5120
5077int 5121int
5078CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *tcon, char *fileName, 5122CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
5079 const struct cifs_unix_set_info_args *args, 5123 const struct cifs_unix_set_info_args *args,
5080 const struct nls_table *nls_codepage, int remap) 5124 u16 fid, u32 pid_of_opener)
5125{
5126 struct smb_com_transaction2_sfi_req *pSMB = NULL;
5127 FILE_UNIX_BASIC_INFO *data_offset;
5128 int rc = 0;
5129 u16 params, param_offset, offset, byte_count, count;
5130
5131 cFYI(1, ("Set Unix Info (via SetFileInfo)"));
5132 rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB);
5133
5134 if (rc)
5135 return rc;
5136
5137 pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
5138 pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));
5139
5140 params = 6;
5141 pSMB->MaxSetupCount = 0;
5142 pSMB->Reserved = 0;
5143 pSMB->Flags = 0;
5144 pSMB->Timeout = 0;
5145 pSMB->Reserved2 = 0;
5146 param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4;
5147 offset = param_offset + params;
5148
5149 data_offset = (FILE_UNIX_BASIC_INFO *)
5150 ((char *)(&pSMB->hdr.Protocol) + offset);
5151 count = sizeof(FILE_UNIX_BASIC_INFO);
5152
5153 pSMB->MaxParameterCount = cpu_to_le16(2);
5154 /* BB find max SMB PDU from sess */
5155 pSMB->MaxDataCount = cpu_to_le16(1000);
5156 pSMB->SetupCount = 1;
5157 pSMB->Reserved3 = 0;
5158 pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION);
5159 byte_count = 3 /* pad */ + params + count;
5160 pSMB->DataCount = cpu_to_le16(count);
5161 pSMB->ParameterCount = cpu_to_le16(params);
5162 pSMB->TotalDataCount = pSMB->DataCount;
5163 pSMB->TotalParameterCount = pSMB->ParameterCount;
5164 pSMB->ParameterOffset = cpu_to_le16(param_offset);
5165 pSMB->DataOffset = cpu_to_le16(offset);
5166 pSMB->Fid = fid;
5167 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
5168 pSMB->Reserved4 = 0;
5169 pSMB->hdr.smb_buf_length += byte_count;
5170 pSMB->ByteCount = cpu_to_le16(byte_count);
5171
5172 cifs_fill_unix_set_info(data_offset, args);
5173
5174 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
5175 if (rc)
5176 cFYI(1, ("Send error in Set Time (SetFileInfo) = %d", rc));
5177
5178 /* Note: On -EAGAIN error only caller can retry on handle based calls
5179 since file handle passed in no longer valid */
5180
5181 return rc;
5182}
5183
5184int
5185CIFSSMBUnixSetPathInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
5186 const struct cifs_unix_set_info_args *args,
5187 const struct nls_table *nls_codepage, int remap)
5081{ 5188{
5082 TRANSACTION2_SPI_REQ *pSMB = NULL; 5189 TRANSACTION2_SPI_REQ *pSMB = NULL;
5083 TRANSACTION2_SPI_RSP *pSMBr = NULL; 5190 TRANSACTION2_SPI_RSP *pSMBr = NULL;
@@ -5086,7 +5193,6 @@ CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
5086 int bytes_returned = 0; 5193 int bytes_returned = 0;
5087 FILE_UNIX_BASIC_INFO *data_offset; 5194 FILE_UNIX_BASIC_INFO *data_offset;
5088 __u16 params, param_offset, offset, count, byte_count; 5195 __u16 params, param_offset, offset, count, byte_count;
5089 __u64 mode = args->mode;
5090 5196
5091 cFYI(1, ("In SetUID/GID/Mode")); 5197 cFYI(1, ("In SetUID/GID/Mode"));
5092setPermsRetry: 5198setPermsRetry:
@@ -5137,38 +5243,8 @@ setPermsRetry:
5137 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC); 5243 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
5138 pSMB->Reserved4 = 0; 5244 pSMB->Reserved4 = 0;
5139 pSMB->hdr.smb_buf_length += byte_count; 5245 pSMB->hdr.smb_buf_length += byte_count;
5140 /* Samba server ignores set of file size to zero due to bugs in some
5141 older clients, but we should be precise - we use SetFileSize to
5142 set file size and do not want to truncate file size to zero
5143 accidently as happened on one Samba server beta by putting
5144 zero instead of -1 here */
5145 data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64);
5146 data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64);
5147 data_offset->LastStatusChange = cpu_to_le64(args->ctime);
5148 data_offset->LastAccessTime = cpu_to_le64(args->atime);
5149 data_offset->LastModificationTime = cpu_to_le64(args->mtime);
5150 data_offset->Uid = cpu_to_le64(args->uid);
5151 data_offset->Gid = cpu_to_le64(args->gid);
5152 /* better to leave device as zero when it is */
5153 data_offset->DevMajor = cpu_to_le64(MAJOR(args->device));
5154 data_offset->DevMinor = cpu_to_le64(MINOR(args->device));
5155 data_offset->Permissions = cpu_to_le64(mode);
5156
5157 if (S_ISREG(mode))
5158 data_offset->Type = cpu_to_le32(UNIX_FILE);
5159 else if (S_ISDIR(mode))
5160 data_offset->Type = cpu_to_le32(UNIX_DIR);
5161 else if (S_ISLNK(mode))
5162 data_offset->Type = cpu_to_le32(UNIX_SYMLINK);
5163 else if (S_ISCHR(mode))
5164 data_offset->Type = cpu_to_le32(UNIX_CHARDEV);
5165 else if (S_ISBLK(mode))
5166 data_offset->Type = cpu_to_le32(UNIX_BLOCKDEV);
5167 else if (S_ISFIFO(mode))
5168 data_offset->Type = cpu_to_le32(UNIX_FIFO);
5169 else if (S_ISSOCK(mode))
5170 data_offset->Type = cpu_to_le32(UNIX_SOCKET);
5171 5246
5247 cifs_fill_unix_set_info(data_offset, args);
5172 5248
5173 pSMB->ByteCount = cpu_to_le16(byte_count); 5249 pSMB->ByteCount = cpu_to_le16(byte_count);
5174 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5250 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 97f4311b9a8e..1f3345d7fa79 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -70,7 +70,6 @@ struct smb_vol {
70 mode_t file_mode; 70 mode_t file_mode;
71 mode_t dir_mode; 71 mode_t dir_mode;
72 unsigned secFlg; 72 unsigned secFlg;
73 bool rw:1;
74 bool retry:1; 73 bool retry:1;
75 bool intr:1; 74 bool intr:1;
76 bool setuids:1; 75 bool setuids:1;
@@ -804,6 +803,10 @@ cifs_parse_mount_options(char *options, const char *devname,
804 char *data; 803 char *data;
805 unsigned int temp_len, i, j; 804 unsigned int temp_len, i, j;
806 char separator[2]; 805 char separator[2];
806 short int override_uid = -1;
807 short int override_gid = -1;
808 bool uid_specified = false;
809 bool gid_specified = false;
807 810
808 separator[0] = ','; 811 separator[0] = ',';
809 separator[1] = 0; 812 separator[1] = 0;
@@ -832,7 +835,6 @@ cifs_parse_mount_options(char *options, const char *devname,
832 vol->dir_mode = vol->file_mode = S_IRUGO | S_IXUGO | S_IWUSR; 835 vol->dir_mode = vol->file_mode = S_IRUGO | S_IXUGO | S_IWUSR;
833 836
834 /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */ 837 /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */
835 vol->rw = true;
836 /* default is always to request posix paths. */ 838 /* default is always to request posix paths. */
837 vol->posix_paths = 1; 839 vol->posix_paths = 1;
838 /* default to using server inode numbers where available */ 840 /* default to using server inode numbers where available */
@@ -1095,18 +1097,20 @@ cifs_parse_mount_options(char *options, const char *devname,
1095 "too long.\n"); 1097 "too long.\n");
1096 return 1; 1098 return 1;
1097 } 1099 }
1098 } else if (strnicmp(data, "uid", 3) == 0) { 1100 } else if (!strnicmp(data, "uid", 3) && value && *value) {
1099 if (value && *value) 1101 vol->linux_uid = simple_strtoul(value, &value, 0);
1100 vol->linux_uid = 1102 uid_specified = true;
1101 simple_strtoul(value, &value, 0); 1103 } else if (!strnicmp(data, "forceuid", 8)) {
1102 } else if (strnicmp(data, "forceuid", 8) == 0) { 1104 override_uid = 1;
1103 vol->override_uid = 1; 1105 } else if (!strnicmp(data, "noforceuid", 10)) {
1104 } else if (strnicmp(data, "gid", 3) == 0) { 1106 override_uid = 0;
1105 if (value && *value) 1107 } else if (!strnicmp(data, "gid", 3) && value && *value) {
1106 vol->linux_gid = 1108 vol->linux_gid = simple_strtoul(value, &value, 0);
1107 simple_strtoul(value, &value, 0); 1109 gid_specified = true;
1108 } else if (strnicmp(data, "forcegid", 8) == 0) { 1110 } else if (!strnicmp(data, "forcegid", 8)) {
1109 vol->override_gid = 1; 1111 override_gid = 1;
1112 } else if (!strnicmp(data, "noforcegid", 10)) {
1113 override_gid = 0;
1110 } else if (strnicmp(data, "file_mode", 4) == 0) { 1114 } else if (strnicmp(data, "file_mode", 4) == 0) {
1111 if (value && *value) { 1115 if (value && *value) {
1112 vol->file_mode = 1116 vol->file_mode =
@@ -1199,7 +1203,9 @@ cifs_parse_mount_options(char *options, const char *devname,
1199 } else if (strnicmp(data, "guest", 5) == 0) { 1203 } else if (strnicmp(data, "guest", 5) == 0) {
1200 /* ignore */ 1204 /* ignore */
1201 } else if (strnicmp(data, "rw", 2) == 0) { 1205 } else if (strnicmp(data, "rw", 2) == 0) {
1202 vol->rw = true; 1206 /* ignore */
1207 } else if (strnicmp(data, "ro", 2) == 0) {
1208 /* ignore */
1203 } else if (strnicmp(data, "noblocksend", 11) == 0) { 1209 } else if (strnicmp(data, "noblocksend", 11) == 0) {
1204 vol->noblocksnd = 1; 1210 vol->noblocksnd = 1;
1205 } else if (strnicmp(data, "noautotune", 10) == 0) { 1211 } else if (strnicmp(data, "noautotune", 10) == 0) {
@@ -1218,8 +1224,6 @@ cifs_parse_mount_options(char *options, const char *devname,
1218 parse these options again and set anything and it 1224 parse these options again and set anything and it
1219 is ok to just ignore them */ 1225 is ok to just ignore them */
1220 continue; 1226 continue;
1221 } else if (strnicmp(data, "ro", 2) == 0) {
1222 vol->rw = false;
1223 } else if (strnicmp(data, "hard", 4) == 0) { 1227 } else if (strnicmp(data, "hard", 4) == 0) {
1224 vol->retry = 1; 1228 vol->retry = 1;
1225 } else if (strnicmp(data, "soft", 4) == 0) { 1229 } else if (strnicmp(data, "soft", 4) == 0) {
@@ -1357,6 +1361,18 @@ cifs_parse_mount_options(char *options, const char *devname,
1357 if (vol->UNCip == NULL) 1361 if (vol->UNCip == NULL)
1358 vol->UNCip = &vol->UNC[2]; 1362 vol->UNCip = &vol->UNC[2];
1359 1363
1364 if (uid_specified)
1365 vol->override_uid = override_uid;
1366 else if (override_uid == 1)
1367 printk(KERN_NOTICE "CIFS: ignoring forceuid mount option "
1368 "specified with no uid= option.\n");
1369
1370 if (gid_specified)
1371 vol->override_gid = override_gid;
1372 else if (override_gid == 1)
1373 printk(KERN_NOTICE "CIFS: ignoring forcegid mount option "
1374 "specified with no gid= option.\n");
1375
1360 return 0; 1376 return 0;
1361} 1377}
1362 1378
@@ -1386,8 +1402,10 @@ cifs_find_tcp_session(struct sockaddr_storage *addr)
1386 server->addr.sockAddr.sin_addr.s_addr)) 1402 server->addr.sockAddr.sin_addr.s_addr))
1387 continue; 1403 continue;
1388 else if (addr->ss_family == AF_INET6 && 1404 else if (addr->ss_family == AF_INET6 &&
1389 !ipv6_addr_equal(&server->addr.sockAddr6.sin6_addr, 1405 (!ipv6_addr_equal(&server->addr.sockAddr6.sin6_addr,
1390 &addr6->sin6_addr)) 1406 &addr6->sin6_addr) ||
1407 server->addr.sockAddr6.sin6_scope_id !=
1408 addr6->sin6_scope_id))
1391 continue; 1409 continue;
1392 1410
1393 ++server->srv_count; 1411 ++server->srv_count;
@@ -1433,28 +1451,15 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1433 1451
1434 memset(&addr, 0, sizeof(struct sockaddr_storage)); 1452 memset(&addr, 0, sizeof(struct sockaddr_storage));
1435 1453
1436 if (volume_info->UNCip && volume_info->UNC) { 1454 cFYI(1, ("UNC: %s ip: %s", volume_info->UNC, volume_info->UNCip));
1437 rc = cifs_inet_pton(AF_INET, volume_info->UNCip,
1438 &sin_server->sin_addr.s_addr);
1439
1440 if (rc <= 0) {
1441 /* not ipv4 address, try ipv6 */
1442 rc = cifs_inet_pton(AF_INET6, volume_info->UNCip,
1443 &sin_server6->sin6_addr.in6_u);
1444 if (rc > 0)
1445 addr.ss_family = AF_INET6;
1446 } else {
1447 addr.ss_family = AF_INET;
1448 }
1449 1455
1450 if (rc <= 0) { 1456 if (volume_info->UNCip && volume_info->UNC) {
1457 rc = cifs_convert_address(volume_info->UNCip, &addr);
1458 if (!rc) {
1451 /* we failed translating address */ 1459 /* we failed translating address */
1452 rc = -EINVAL; 1460 rc = -EINVAL;
1453 goto out_err; 1461 goto out_err;
1454 } 1462 }
1455
1456 cFYI(1, ("UNC: %s ip: %s", volume_info->UNC,
1457 volume_info->UNCip));
1458 } else if (volume_info->UNCip) { 1463 } else if (volume_info->UNCip) {
1459 /* BB using ip addr as tcp_ses name to connect to the 1464 /* BB using ip addr as tcp_ses name to connect to the
1460 DFS root below */ 1465 DFS root below */
@@ -1513,14 +1518,14 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1513 cFYI(1, ("attempting ipv6 connect")); 1518 cFYI(1, ("attempting ipv6 connect"));
1514 /* BB should we allow ipv6 on port 139? */ 1519 /* BB should we allow ipv6 on port 139? */
1515 /* other OS never observed in Wild doing 139 with v6 */ 1520 /* other OS never observed in Wild doing 139 with v6 */
1521 sin_server6->sin6_port = htons(volume_info->port);
1516 memcpy(&tcp_ses->addr.sockAddr6, sin_server6, 1522 memcpy(&tcp_ses->addr.sockAddr6, sin_server6,
1517 sizeof(struct sockaddr_in6)); 1523 sizeof(struct sockaddr_in6));
1518 sin_server6->sin6_port = htons(volume_info->port);
1519 rc = ipv6_connect(tcp_ses); 1524 rc = ipv6_connect(tcp_ses);
1520 } else { 1525 } else {
1526 sin_server->sin_port = htons(volume_info->port);
1521 memcpy(&tcp_ses->addr.sockAddr, sin_server, 1527 memcpy(&tcp_ses->addr.sockAddr, sin_server,
1522 sizeof(struct sockaddr_in)); 1528 sizeof(struct sockaddr_in));
1523 sin_server->sin_port = htons(volume_info->port);
1524 rc = ipv4_connect(tcp_ses); 1529 rc = ipv4_connect(tcp_ses);
1525 } 1530 }
1526 if (rc < 0) { 1531 if (rc < 0) {
@@ -2465,10 +2470,10 @@ try_mount_again:
2465 tcon->local_lease = volume_info->local_lease; 2470 tcon->local_lease = volume_info->local_lease;
2466 } 2471 }
2467 if (pSesInfo) { 2472 if (pSesInfo) {
2468 if (pSesInfo->capabilities & CAP_LARGE_FILES) { 2473 if (pSesInfo->capabilities & CAP_LARGE_FILES)
2469 sb->s_maxbytes = (u64) 1 << 63; 2474 sb->s_maxbytes = MAX_LFS_FILESIZE;
2470 } else 2475 else
2471 sb->s_maxbytes = (u64) 1 << 31; /* 2 GB */ 2476 sb->s_maxbytes = MAX_NON_LFS;
2472 } 2477 }
2473 2478
2474 /* BB FIXME fix time_gran to be larger for LANMAN sessions */ 2479 /* BB FIXME fix time_gran to be larger for LANMAN sessions */
@@ -2557,11 +2562,20 @@ remote_path_check:
2557 2562
2558 if (mount_data != mount_data_global) 2563 if (mount_data != mount_data_global)
2559 kfree(mount_data); 2564 kfree(mount_data);
2565
2560 mount_data = cifs_compose_mount_options( 2566 mount_data = cifs_compose_mount_options(
2561 cifs_sb->mountdata, full_path + 1, 2567 cifs_sb->mountdata, full_path + 1,
2562 referrals, &fake_devname); 2568 referrals, &fake_devname);
2563 kfree(fake_devname); 2569
2564 free_dfs_info_array(referrals, num_referrals); 2570 free_dfs_info_array(referrals, num_referrals);
2571 kfree(fake_devname);
2572 kfree(full_path);
2573
2574 if (IS_ERR(mount_data)) {
2575 rc = PTR_ERR(mount_data);
2576 mount_data = NULL;
2577 goto mount_fail_check;
2578 }
2565 2579
2566 if (tcon) 2580 if (tcon)
2567 cifs_put_tcon(tcon); 2581 cifs_put_tcon(tcon);
@@ -2569,8 +2583,6 @@ remote_path_check:
2569 cifs_put_smb_ses(pSesInfo); 2583 cifs_put_smb_ses(pSesInfo);
2570 2584
2571 cleanup_volume_info(&volume_info); 2585 cleanup_volume_info(&volume_info);
2572 FreeXid(xid);
2573 kfree(full_path);
2574 referral_walks_count++; 2586 referral_walks_count++;
2575 goto try_mount_again; 2587 goto try_mount_again;
2576 } 2588 }
@@ -2739,6 +2751,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
2739 strncpy(tcon->treeName, tree, MAX_TREE_SIZE); 2751 strncpy(tcon->treeName, tree, MAX_TREE_SIZE);
2740 2752
2741 /* mostly informational -- no need to fail on error here */ 2753 /* mostly informational -- no need to fail on error here */
2754 kfree(tcon->nativeFileSystem);
2742 tcon->nativeFileSystem = cifs_strndup_from_ucs(bcc_ptr, 2755 tcon->nativeFileSystem = cifs_strndup_from_ucs(bcc_ptr,
2743 bytes_left, is_unicode, 2756 bytes_left, is_unicode,
2744 nls_codepage); 2757 nls_codepage);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 3758965d73d5..4326ffd90fa9 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -188,6 +188,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
188 FILE_UNIX_BASIC_INFO *presp_data; 188 FILE_UNIX_BASIC_INFO *presp_data;
189 __u32 posix_flags = 0; 189 __u32 posix_flags = 0;
190 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 190 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
191 struct cifs_fattr fattr;
191 192
192 cFYI(1, ("posix open %s", full_path)); 193 cFYI(1, ("posix open %s", full_path));
193 194
@@ -236,22 +237,21 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
236 if (presp_data->Type == cpu_to_le32(-1)) 237 if (presp_data->Type == cpu_to_le32(-1))
237 goto posix_open_ret; /* open ok, caller does qpathinfo */ 238 goto posix_open_ret; /* open ok, caller does qpathinfo */
238 239
239 /* get new inode and set it up */
240 if (!pinode) 240 if (!pinode)
241 goto posix_open_ret; /* caller does not need info */ 241 goto posix_open_ret; /* caller does not need info */
242 242
243 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
244
245 /* get new inode and set it up */
243 if (*pinode == NULL) { 246 if (*pinode == NULL) {
244 __u64 unique_id = le64_to_cpu(presp_data->UniqueId); 247 *pinode = cifs_iget(sb, &fattr);
245 *pinode = cifs_new_inode(sb, &unique_id); 248 if (!*pinode) {
249 rc = -ENOMEM;
250 goto posix_open_ret;
251 }
252 } else {
253 cifs_fattr_to_inode(*pinode, &fattr);
246 } 254 }
247 /* else an inode was passed in. Update its info, don't create one */
248
249 /* We do not need to close the file if new_inode fails since
250 the caller will retry qpathinfo as long as inode is null */
251 if (*pinode == NULL)
252 goto posix_open_ret;
253
254 posix_fill_in_inode(*pinode, presp_data, 1);
255 255
256 cifs_fill_fileinfo(*pinode, *pnetfid, cifs_sb->tcon, write_only); 256 cifs_fill_fileinfo(*pinode, *pnetfid, cifs_sb->tcon, write_only);
257 257
@@ -307,8 +307,9 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
307 307
308 full_path = build_path_from_dentry(direntry); 308 full_path = build_path_from_dentry(direntry);
309 if (full_path == NULL) { 309 if (full_path == NULL) {
310 rc = -ENOMEM;
310 FreeXid(xid); 311 FreeXid(xid);
311 return -ENOMEM; 312 return rc;
312 } 313 }
313 314
314 if (oplockEnabled) 315 if (oplockEnabled)
@@ -424,9 +425,10 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
424 args.uid = NO_CHANGE_64; 425 args.uid = NO_CHANGE_64;
425 args.gid = NO_CHANGE_64; 426 args.gid = NO_CHANGE_64;
426 } 427 }
427 CIFSSMBUnixSetInfo(xid, tcon, full_path, &args, 428 CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
428 cifs_sb->local_nls, 429 cifs_sb->local_nls,
429 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 430 cifs_sb->mnt_cifs_flags &
431 CIFS_MOUNT_MAP_SPECIAL_CHR);
430 } else { 432 } else {
431 /* BB implement mode setting via Windows security 433 /* BB implement mode setting via Windows security
432 descriptors e.g. */ 434 descriptors e.g. */
@@ -514,10 +516,10 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
514 args.uid = NO_CHANGE_64; 516 args.uid = NO_CHANGE_64;
515 args.gid = NO_CHANGE_64; 517 args.gid = NO_CHANGE_64;
516 } 518 }
517 rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, 519 rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, &args,
518 &args, cifs_sb->local_nls, 520 cifs_sb->local_nls,
519 cifs_sb->mnt_cifs_flags & 521 cifs_sb->mnt_cifs_flags &
520 CIFS_MOUNT_MAP_SPECIAL_CHR); 522 CIFS_MOUNT_MAP_SPECIAL_CHR);
521 523
522 if (!rc) { 524 if (!rc) {
523 rc = cifs_get_inode_info_unix(&newinode, full_path, 525 rc = cifs_get_inode_info_unix(&newinode, full_path,
@@ -540,8 +542,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
540 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); 542 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
541 if (buf == NULL) { 543 if (buf == NULL) {
542 kfree(full_path); 544 kfree(full_path);
545 rc = -ENOMEM;
543 FreeXid(xid); 546 FreeXid(xid);
544 return -ENOMEM; 547 return rc;
545 } 548 }
546 549
547 rc = CIFSSMBOpen(xid, pTcon, full_path, 550 rc = CIFSSMBOpen(xid, pTcon, full_path,
@@ -641,6 +644,15 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
641 } 644 }
642 } 645 }
643 646
647 /*
648 * O_EXCL: optimize away the lookup, but don't hash the dentry. Let
649 * the VFS handle the create.
650 */
651 if (nd->flags & LOOKUP_EXCL) {
652 d_instantiate(direntry, NULL);
653 return 0;
654 }
655
644 /* can not grab the rename sem here since it would 656 /* can not grab the rename sem here since it would
645 deadlock in the cases (beginning of sys_rename itself) 657 deadlock in the cases (beginning of sys_rename itself)
646 in which we already have the sb rename sem */ 658 in which we already have the sb rename sem */
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index df4a306f697e..87948147d7ec 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -35,26 +35,11 @@
35 * 0 - name is not IP 35 * 0 - name is not IP
36 */ 36 */
37static int 37static int
38is_ip(const char *name) 38is_ip(char *name)
39{ 39{
40 int rc; 40 struct sockaddr_storage ss;
41 struct sockaddr_in sin_server; 41
42 struct sockaddr_in6 sin_server6; 42 return cifs_convert_address(name, &ss);
43
44 rc = cifs_inet_pton(AF_INET, name,
45 &sin_server.sin_addr.s_addr);
46
47 if (rc <= 0) {
48 /* not ipv4 address, try ipv6 */
49 rc = cifs_inet_pton(AF_INET6, name,
50 &sin_server6.sin6_addr.in6_u);
51 if (rc > 0)
52 return 1;
53 } else {
54 return 1;
55 }
56 /* we failed translating address */
57 return 0;
58} 43}
59 44
60static int 45static int
@@ -72,7 +57,7 @@ dns_resolver_instantiate(struct key *key, const void *data,
72 ip[datalen] = '\0'; 57 ip[datalen] = '\0';
73 58
74 /* make sure this looks like an address */ 59 /* make sure this looks like an address */
75 if (!is_ip((const char *) ip)) { 60 if (!is_ip(ip)) {
76 kfree(ip); 61 kfree(ip);
77 return -EINVAL; 62 return -EINVAL;
78 } 63 }
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 06866841b97f..c34b7f8a217b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -300,14 +300,16 @@ int cifs_open(struct inode *inode, struct file *file)
300 pCifsInode = CIFS_I(file->f_path.dentry->d_inode); 300 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
301 pCifsFile = cifs_fill_filedata(file); 301 pCifsFile = cifs_fill_filedata(file);
302 if (pCifsFile) { 302 if (pCifsFile) {
303 rc = 0;
303 FreeXid(xid); 304 FreeXid(xid);
304 return 0; 305 return rc;
305 } 306 }
306 307
307 full_path = build_path_from_dentry(file->f_path.dentry); 308 full_path = build_path_from_dentry(file->f_path.dentry);
308 if (full_path == NULL) { 309 if (full_path == NULL) {
310 rc = -ENOMEM;
309 FreeXid(xid); 311 FreeXid(xid);
310 return -ENOMEM; 312 return rc;
311 } 313 }
312 314
313 cFYI(1, ("inode = 0x%p file flags are 0x%x for %s", 315 cFYI(1, ("inode = 0x%p file flags are 0x%x for %s",
@@ -446,9 +448,9 @@ int cifs_open(struct inode *inode, struct file *file)
446 .mtime = NO_CHANGE_64, 448 .mtime = NO_CHANGE_64,
447 .device = 0, 449 .device = 0,
448 }; 450 };
449 CIFSSMBUnixSetInfo(xid, tcon, full_path, &args, 451 CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
450 cifs_sb->local_nls, 452 cifs_sb->local_nls,
451 cifs_sb->mnt_cifs_flags & 453 cifs_sb->mnt_cifs_flags &
452 CIFS_MOUNT_MAP_SPECIAL_CHR); 454 CIFS_MOUNT_MAP_SPECIAL_CHR);
453 } 455 }
454 } 456 }
@@ -491,11 +493,12 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
491 return -EBADF; 493 return -EBADF;
492 494
493 xid = GetXid(); 495 xid = GetXid();
494 mutex_unlock(&pCifsFile->fh_mutex); 496 mutex_lock(&pCifsFile->fh_mutex);
495 if (!pCifsFile->invalidHandle) { 497 if (!pCifsFile->invalidHandle) {
496 mutex_lock(&pCifsFile->fh_mutex); 498 mutex_unlock(&pCifsFile->fh_mutex);
499 rc = 0;
497 FreeXid(xid); 500 FreeXid(xid);
498 return 0; 501 return rc;
499 } 502 }
500 503
501 if (file->f_path.dentry == NULL) { 504 if (file->f_path.dentry == NULL) {
@@ -524,7 +527,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
524 if (full_path == NULL) { 527 if (full_path == NULL) {
525 rc = -ENOMEM; 528 rc = -ENOMEM;
526reopen_error_exit: 529reopen_error_exit:
527 mutex_lock(&pCifsFile->fh_mutex); 530 mutex_unlock(&pCifsFile->fh_mutex);
528 FreeXid(xid); 531 FreeXid(xid);
529 return rc; 532 return rc;
530 } 533 }
@@ -566,14 +569,14 @@ reopen_error_exit:
566 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & 569 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
567 CIFS_MOUNT_MAP_SPECIAL_CHR); 570 CIFS_MOUNT_MAP_SPECIAL_CHR);
568 if (rc) { 571 if (rc) {
569 mutex_lock(&pCifsFile->fh_mutex); 572 mutex_unlock(&pCifsFile->fh_mutex);
570 cFYI(1, ("cifs_open returned 0x%x", rc)); 573 cFYI(1, ("cifs_open returned 0x%x", rc));
571 cFYI(1, ("oplock: %d", oplock)); 574 cFYI(1, ("oplock: %d", oplock));
572 } else { 575 } else {
573reopen_success: 576reopen_success:
574 pCifsFile->netfid = netfid; 577 pCifsFile->netfid = netfid;
575 pCifsFile->invalidHandle = false; 578 pCifsFile->invalidHandle = false;
576 mutex_lock(&pCifsFile->fh_mutex); 579 mutex_unlock(&pCifsFile->fh_mutex);
577 pCifsInode = CIFS_I(inode); 580 pCifsInode = CIFS_I(inode);
578 if (pCifsInode) { 581 if (pCifsInode) {
579 if (can_flush) { 582 if (can_flush) {
@@ -845,8 +848,9 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
845 tcon = cifs_sb->tcon; 848 tcon = cifs_sb->tcon;
846 849
847 if (file->private_data == NULL) { 850 if (file->private_data == NULL) {
851 rc = -EBADF;
848 FreeXid(xid); 852 FreeXid(xid);
849 return -EBADF; 853 return rc;
850 } 854 }
851 netfid = ((struct cifsFileInfo *)file->private_data)->netfid; 855 netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
852 856
@@ -1805,8 +1809,9 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data,
1805 pTcon = cifs_sb->tcon; 1809 pTcon = cifs_sb->tcon;
1806 1810
1807 if (file->private_data == NULL) { 1811 if (file->private_data == NULL) {
1812 rc = -EBADF;
1808 FreeXid(xid); 1813 FreeXid(xid);
1809 return -EBADF; 1814 return rc;
1810 } 1815 }
1811 open_file = (struct cifsFileInfo *)file->private_data; 1816 open_file = (struct cifsFileInfo *)file->private_data;
1812 1817
@@ -1885,8 +1890,9 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1885 pTcon = cifs_sb->tcon; 1890 pTcon = cifs_sb->tcon;
1886 1891
1887 if (file->private_data == NULL) { 1892 if (file->private_data == NULL) {
1893 rc = -EBADF;
1888 FreeXid(xid); 1894 FreeXid(xid);
1889 return -EBADF; 1895 return rc;
1890 } 1896 }
1891 open_file = (struct cifsFileInfo *)file->private_data; 1897 open_file = (struct cifsFileInfo *)file->private_data;
1892 1898
@@ -2019,8 +2025,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
2019 2025
2020 xid = GetXid(); 2026 xid = GetXid();
2021 if (file->private_data == NULL) { 2027 if (file->private_data == NULL) {
2028 rc = -EBADF;
2022 FreeXid(xid); 2029 FreeXid(xid);
2023 return -EBADF; 2030 return rc;
2024 } 2031 }
2025 open_file = (struct cifsFileInfo *)file->private_data; 2032 open_file = (struct cifsFileInfo *)file->private_data;
2026 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 2033 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
@@ -2185,8 +2192,9 @@ static int cifs_readpage(struct file *file, struct page *page)
2185 xid = GetXid(); 2192 xid = GetXid();
2186 2193
2187 if (file->private_data == NULL) { 2194 if (file->private_data == NULL) {
2195 rc = -EBADF;
2188 FreeXid(xid); 2196 FreeXid(xid);
2189 return -EBADF; 2197 return rc;
2190 } 2198 }
2191 2199
2192 cFYI(1, ("readpage %p at offset %d 0x%x\n", 2200 cFYI(1, ("readpage %p at offset %d 0x%x\n",
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index fad882b075ba..82d83839655e 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -77,239 +77,202 @@ static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral)
77 } 77 }
78} 78}
79 79
80static void cifs_unix_info_to_inode(struct inode *inode, 80/* populate an inode with info from a cifs_fattr struct */
81 FILE_UNIX_BASIC_INFO *info, int force_uid_gid) 81void
82cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
82{ 83{
84 struct cifsInodeInfo *cifs_i = CIFS_I(inode);
83 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 85 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
84 struct cifsInodeInfo *cifsInfo = CIFS_I(inode); 86 unsigned long oldtime = cifs_i->time;
85 __u64 num_of_bytes = le64_to_cpu(info->NumOfBytes); 87
86 __u64 end_of_file = le64_to_cpu(info->EndOfFile); 88 inode->i_atime = fattr->cf_atime;
89 inode->i_mtime = fattr->cf_mtime;
90 inode->i_ctime = fattr->cf_ctime;
91 inode->i_rdev = fattr->cf_rdev;
92 inode->i_nlink = fattr->cf_nlink;
93 inode->i_uid = fattr->cf_uid;
94 inode->i_gid = fattr->cf_gid;
95
96 /* if dynperm is set, don't clobber existing mode */
97 if (inode->i_state & I_NEW ||
98 !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM))
99 inode->i_mode = fattr->cf_mode;
100
101 cifs_i->cifsAttrs = fattr->cf_cifsattrs;
102 cifs_i->uniqueid = fattr->cf_uniqueid;
103
104 if (fattr->cf_flags & CIFS_FATTR_NEED_REVAL)
105 cifs_i->time = 0;
106 else
107 cifs_i->time = jiffies;
108
109 cFYI(1, ("inode 0x%p old_time=%ld new_time=%ld", inode,
110 oldtime, cifs_i->time));
87 111
88 inode->i_atime = cifs_NTtimeToUnix(info->LastAccessTime); 112 cifs_i->delete_pending = fattr->cf_flags & CIFS_FATTR_DELETE_PENDING;
89 inode->i_mtime = 113
90 cifs_NTtimeToUnix(info->LastModificationTime); 114 /*
91 inode->i_ctime = cifs_NTtimeToUnix(info->LastStatusChange); 115 * Can't safely change the file size here if the client is writing to
92 inode->i_mode = le64_to_cpu(info->Permissions); 116 * it due to potential races.
117 */
118 spin_lock(&inode->i_lock);
119 if (is_size_safe_to_change(cifs_i, fattr->cf_eof)) {
120 i_size_write(inode, fattr->cf_eof);
121
122 /*
123 * i_blocks is not related to (i_size / i_blksize),
124 * but instead 512 byte (2**9) size is required for
125 * calculating num blocks.
126 */
127 inode->i_blocks = (512 - 1 + fattr->cf_bytes) >> 9;
128 }
129 spin_unlock(&inode->i_lock);
130
131 cifs_set_ops(inode, fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL);
132}
133
134/* Fill a cifs_fattr struct with info from FILE_UNIX_BASIC_INFO. */
135void
136cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
137 struct cifs_sb_info *cifs_sb)
138{
139 memset(fattr, 0, sizeof(*fattr));
140 fattr->cf_uniqueid = le64_to_cpu(info->UniqueId);
141 fattr->cf_bytes = le64_to_cpu(info->NumOfBytes);
142 fattr->cf_eof = le64_to_cpu(info->EndOfFile);
143
144 fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
145 fattr->cf_mtime = cifs_NTtimeToUnix(info->LastModificationTime);
146 fattr->cf_ctime = cifs_NTtimeToUnix(info->LastStatusChange);
147 fattr->cf_mode = le64_to_cpu(info->Permissions);
93 148
94 /* 149 /*
95 * Since we set the inode type below we need to mask off 150 * Since we set the inode type below we need to mask off
96 * to avoid strange results if bits set above. 151 * to avoid strange results if bits set above.
97 */ 152 */
98 inode->i_mode &= ~S_IFMT; 153 fattr->cf_mode &= ~S_IFMT;
99 switch (le32_to_cpu(info->Type)) { 154 switch (le32_to_cpu(info->Type)) {
100 case UNIX_FILE: 155 case UNIX_FILE:
101 inode->i_mode |= S_IFREG; 156 fattr->cf_mode |= S_IFREG;
157 fattr->cf_dtype = DT_REG;
102 break; 158 break;
103 case UNIX_SYMLINK: 159 case UNIX_SYMLINK:
104 inode->i_mode |= S_IFLNK; 160 fattr->cf_mode |= S_IFLNK;
161 fattr->cf_dtype = DT_LNK;
105 break; 162 break;
106 case UNIX_DIR: 163 case UNIX_DIR:
107 inode->i_mode |= S_IFDIR; 164 fattr->cf_mode |= S_IFDIR;
165 fattr->cf_dtype = DT_DIR;
108 break; 166 break;
109 case UNIX_CHARDEV: 167 case UNIX_CHARDEV:
110 inode->i_mode |= S_IFCHR; 168 fattr->cf_mode |= S_IFCHR;
111 inode->i_rdev = MKDEV(le64_to_cpu(info->DevMajor), 169 fattr->cf_dtype = DT_CHR;
112 le64_to_cpu(info->DevMinor) & MINORMASK); 170 fattr->cf_rdev = MKDEV(le64_to_cpu(info->DevMajor),
171 le64_to_cpu(info->DevMinor) & MINORMASK);
113 break; 172 break;
114 case UNIX_BLOCKDEV: 173 case UNIX_BLOCKDEV:
115 inode->i_mode |= S_IFBLK; 174 fattr->cf_mode |= S_IFBLK;
116 inode->i_rdev = MKDEV(le64_to_cpu(info->DevMajor), 175 fattr->cf_dtype = DT_BLK;
117 le64_to_cpu(info->DevMinor) & MINORMASK); 176 fattr->cf_rdev = MKDEV(le64_to_cpu(info->DevMajor),
177 le64_to_cpu(info->DevMinor) & MINORMASK);
118 break; 178 break;
119 case UNIX_FIFO: 179 case UNIX_FIFO:
120 inode->i_mode |= S_IFIFO; 180 fattr->cf_mode |= S_IFIFO;
181 fattr->cf_dtype = DT_FIFO;
121 break; 182 break;
122 case UNIX_SOCKET: 183 case UNIX_SOCKET:
123 inode->i_mode |= S_IFSOCK; 184 fattr->cf_mode |= S_IFSOCK;
185 fattr->cf_dtype = DT_SOCK;
124 break; 186 break;
125 default: 187 default:
126 /* safest to call it a file if we do not know */ 188 /* safest to call it a file if we do not know */
127 inode->i_mode |= S_IFREG; 189 fattr->cf_mode |= S_IFREG;
190 fattr->cf_dtype = DT_REG;
128 cFYI(1, ("unknown type %d", le32_to_cpu(info->Type))); 191 cFYI(1, ("unknown type %d", le32_to_cpu(info->Type)));
129 break; 192 break;
130 } 193 }
131 194
132 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) && 195 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
133 !force_uid_gid) 196 fattr->cf_uid = cifs_sb->mnt_uid;
134 inode->i_uid = cifs_sb->mnt_uid;
135 else 197 else
136 inode->i_uid = le64_to_cpu(info->Uid); 198 fattr->cf_uid = le64_to_cpu(info->Uid);
137 199
138 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) && 200 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
139 !force_uid_gid) 201 fattr->cf_gid = cifs_sb->mnt_gid;
140 inode->i_gid = cifs_sb->mnt_gid;
141 else 202 else
142 inode->i_gid = le64_to_cpu(info->Gid); 203 fattr->cf_gid = le64_to_cpu(info->Gid);
143
144 inode->i_nlink = le64_to_cpu(info->Nlinks);
145
146 cifsInfo->server_eof = end_of_file;
147 spin_lock(&inode->i_lock);
148 if (is_size_safe_to_change(cifsInfo, end_of_file)) {
149 /*
150 * We can not safely change the file size here if the client
151 * is writing to it due to potential races.
152 */
153 i_size_write(inode, end_of_file);
154 204
155 /* 205 fattr->cf_nlink = le64_to_cpu(info->Nlinks);
156 * i_blocks is not related to (i_size / i_blksize),
157 * but instead 512 byte (2**9) size is required for
158 * calculating num blocks.
159 */
160 inode->i_blocks = (512 - 1 + num_of_bytes) >> 9;
161 }
162 spin_unlock(&inode->i_lock);
163} 206}
164 207
165
166/* 208/*
167 * Needed to setup inode data for the directory which is the 209 * Fill a cifs_fattr struct with fake inode info.
168 * junction to the new submount (ie to setup the fake directory
169 * which represents a DFS referral)
170 */
171static void fill_fake_finddataunix(FILE_UNIX_BASIC_INFO *pfnd_dat,
172 struct super_block *sb)
173{
174 struct inode *pinode = NULL;
175
176 memset(pfnd_dat, 0, sizeof(FILE_UNIX_BASIC_INFO));
177
178/* __le64 pfnd_dat->EndOfFile = cpu_to_le64(0);
179 __le64 pfnd_dat->NumOfBytes = cpu_to_le64(0);
180 __u64 UniqueId = 0; */
181 pfnd_dat->LastStatusChange =
182 cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
183 pfnd_dat->LastAccessTime =
184 cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
185 pfnd_dat->LastModificationTime =
186 cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
187 pfnd_dat->Type = cpu_to_le32(UNIX_DIR);
188 pfnd_dat->Permissions = cpu_to_le64(S_IXUGO | S_IRWXU);
189 pfnd_dat->Nlinks = cpu_to_le64(2);
190 if (sb->s_root)
191 pinode = sb->s_root->d_inode;
192 if (pinode == NULL)
193 return;
194
195 /* fill in default values for the remaining based on root
196 inode since we can not query the server for this inode info */
197 pfnd_dat->DevMajor = cpu_to_le64(MAJOR(pinode->i_rdev));
198 pfnd_dat->DevMinor = cpu_to_le64(MINOR(pinode->i_rdev));
199 pfnd_dat->Uid = cpu_to_le64(pinode->i_uid);
200 pfnd_dat->Gid = cpu_to_le64(pinode->i_gid);
201}
202
203/**
204 * cifs_new inode - create new inode, initialize, and hash it
205 * @sb - pointer to superblock
206 * @inum - if valid pointer and serverino is enabled, replace i_ino with val
207 *
208 * Create a new inode, initialize it for CIFS and hash it. Returns the new
209 * inode or NULL if one couldn't be allocated.
210 * 210 *
211 * If the share isn't mounted with "serverino" or inum is a NULL pointer then 211 * Needed to setup cifs_fattr data for the directory which is the
212 * we'll just use the inode number assigned by new_inode(). Note that this can 212 * junction to the new submount (ie to setup the fake directory
213 * mean i_ino collisions since the i_ino assigned by new_inode is not 213 * which represents a DFS referral).
214 * guaranteed to be unique.
215 */ 214 */
216struct inode * 215static void
217cifs_new_inode(struct super_block *sb, __u64 *inum) 216cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb)
218{ 217{
219 struct inode *inode; 218 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
220
221 inode = new_inode(sb);
222 if (inode == NULL)
223 return NULL;
224
225 /*
226 * BB: Is i_ino == 0 legal? Here, we assume that it is. If it isn't we
227 * stop passing inum as ptr. Are there sanity checks we can use to
228 * ensure that the server is really filling in that field? Also,
229 * if serverino is disabled, perhaps we should be using iunique()?
230 */
231 if (inum && (CIFS_SB(sb)->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM))
232 inode->i_ino = (unsigned long) *inum;
233
234 /*
235 * must set this here instead of cifs_alloc_inode since VFS will
236 * clobber i_flags
237 */
238 if (sb->s_flags & MS_NOATIME)
239 inode->i_flags |= S_NOATIME | S_NOCMTIME;
240
241 insert_inode_hash(inode);
242 219
243 return inode; 220 cFYI(1, ("creating fake fattr for DFS referral"));
221
222 memset(fattr, 0, sizeof(*fattr));
223 fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU;
224 fattr->cf_uid = cifs_sb->mnt_uid;
225 fattr->cf_gid = cifs_sb->mnt_gid;
226 fattr->cf_atime = CURRENT_TIME;
227 fattr->cf_ctime = CURRENT_TIME;
228 fattr->cf_mtime = CURRENT_TIME;
229 fattr->cf_nlink = 2;
230 fattr->cf_flags |= CIFS_FATTR_DFS_REFERRAL;
244} 231}
245 232
246int cifs_get_inode_info_unix(struct inode **pinode, 233int cifs_get_inode_info_unix(struct inode **pinode,
247 const unsigned char *full_path, struct super_block *sb, int xid) 234 const unsigned char *full_path,
235 struct super_block *sb, int xid)
248{ 236{
249 int rc = 0; 237 int rc;
250 FILE_UNIX_BASIC_INFO find_data; 238 FILE_UNIX_BASIC_INFO find_data;
251 struct cifsTconInfo *pTcon; 239 struct cifs_fattr fattr;
252 struct inode *inode; 240 struct cifsTconInfo *tcon;
253 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 241 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
254 bool is_dfs_referral = false;
255 struct cifsInodeInfo *cifsInfo;
256 __u64 num_of_bytes;
257 __u64 end_of_file;
258 242
259 pTcon = cifs_sb->tcon; 243 tcon = cifs_sb->tcon;
260 cFYI(1, ("Getting info on %s", full_path)); 244 cFYI(1, ("Getting info on %s", full_path));
261 245
262 /* could have done a find first instead but this returns more info */ 246 /* could have done a find first instead but this returns more info */
263 rc = CIFSSMBUnixQPathInfo(xid, pTcon, full_path, &find_data, 247 rc = CIFSSMBUnixQPathInfo(xid, tcon, full_path, &find_data,
264 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & 248 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
265 CIFS_MOUNT_MAP_SPECIAL_CHR); 249 CIFS_MOUNT_MAP_SPECIAL_CHR);
266 if (rc == -EREMOTE && !is_dfs_referral) {
267 is_dfs_referral = true;
268 cFYI(DBG2, ("DFS ref"));
269 /* for DFS, server does not give us real inode data */
270 fill_fake_finddataunix(&find_data, sb);
271 rc = 0;
272 } else if (rc)
273 goto cgiiu_exit;
274 250
275 num_of_bytes = le64_to_cpu(find_data.NumOfBytes); 251 if (!rc) {
276 end_of_file = le64_to_cpu(find_data.EndOfFile); 252 cifs_unix_basic_to_fattr(&fattr, &find_data, cifs_sb);
253 } else if (rc == -EREMOTE) {
254 cifs_create_dfs_fattr(&fattr, sb);
255 rc = 0;
256 } else {
257 return rc;
258 }
277 259
278 /* get new inode */
279 if (*pinode == NULL) { 260 if (*pinode == NULL) {
280 __u64 unique_id = le64_to_cpu(find_data.UniqueId); 261 /* get new inode */
281 *pinode = cifs_new_inode(sb, &unique_id); 262 *pinode = cifs_iget(sb, &fattr);
282 if (*pinode == NULL) { 263 if (!*pinode)
283 rc = -ENOMEM; 264 rc = -ENOMEM;
284 goto cgiiu_exit; 265 } else {
285 } 266 /* we already have inode, update it */
267 cifs_fattr_to_inode(*pinode, &fattr);
286 } 268 }
287 269
288 inode = *pinode;
289 cifsInfo = CIFS_I(inode);
290
291 cFYI(1, ("Old time %ld", cifsInfo->time));
292 cifsInfo->time = jiffies;
293 cFYI(1, ("New time %ld", cifsInfo->time));
294 /* this is ok to set on every inode revalidate */
295 atomic_set(&cifsInfo->inUse, 1);
296
297 cifs_unix_info_to_inode(inode, &find_data, 0);
298
299 if (num_of_bytes < end_of_file)
300 cFYI(1, ("allocation size less than end of file"));
301 cFYI(1, ("Size %ld and blocks %llu",
302 (unsigned long) inode->i_size,
303 (unsigned long long)inode->i_blocks));
304
305 cifs_set_ops(inode, is_dfs_referral);
306cgiiu_exit:
307 return rc; 270 return rc;
308} 271}
309 272
310static int decode_sfu_inode(struct inode *inode, __u64 size, 273static int
311 const unsigned char *path, 274cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path,
312 struct cifs_sb_info *cifs_sb, int xid) 275 struct cifs_sb_info *cifs_sb, int xid)
313{ 276{
314 int rc; 277 int rc;
315 int oplock = 0; 278 int oplock = 0;
@@ -321,10 +284,15 @@ static int decode_sfu_inode(struct inode *inode, __u64 size,
321 284
322 pbuf = buf; 285 pbuf = buf;
323 286
324 if (size == 0) { 287 fattr->cf_mode &= ~S_IFMT;
325 inode->i_mode |= S_IFIFO; 288
289 if (fattr->cf_eof == 0) {
290 fattr->cf_mode |= S_IFIFO;
291 fattr->cf_dtype = DT_FIFO;
326 return 0; 292 return 0;
327 } else if (size < 8) { 293 } else if (fattr->cf_eof < 8) {
294 fattr->cf_mode |= S_IFREG;
295 fattr->cf_dtype = DT_REG;
328 return -EINVAL; /* EOPNOTSUPP? */ 296 return -EINVAL; /* EOPNOTSUPP? */
329 } 297 }
330 298
@@ -336,42 +304,46 @@ static int decode_sfu_inode(struct inode *inode, __u64 size,
336 if (rc == 0) { 304 if (rc == 0) {
337 int buf_type = CIFS_NO_BUFFER; 305 int buf_type = CIFS_NO_BUFFER;
338 /* Read header */ 306 /* Read header */
339 rc = CIFSSMBRead(xid, pTcon, 307 rc = CIFSSMBRead(xid, pTcon, netfid,
340 netfid,
341 24 /* length */, 0 /* offset */, 308 24 /* length */, 0 /* offset */,
342 &bytes_read, &pbuf, &buf_type); 309 &bytes_read, &pbuf, &buf_type);
343 if ((rc == 0) && (bytes_read >= 8)) { 310 if ((rc == 0) && (bytes_read >= 8)) {
344 if (memcmp("IntxBLK", pbuf, 8) == 0) { 311 if (memcmp("IntxBLK", pbuf, 8) == 0) {
345 cFYI(1, ("Block device")); 312 cFYI(1, ("Block device"));
346 inode->i_mode |= S_IFBLK; 313 fattr->cf_mode |= S_IFBLK;
314 fattr->cf_dtype = DT_BLK;
347 if (bytes_read == 24) { 315 if (bytes_read == 24) {
348 /* we have enough to decode dev num */ 316 /* we have enough to decode dev num */
349 __u64 mjr; /* major */ 317 __u64 mjr; /* major */
350 __u64 mnr; /* minor */ 318 __u64 mnr; /* minor */
351 mjr = le64_to_cpu(*(__le64 *)(pbuf+8)); 319 mjr = le64_to_cpu(*(__le64 *)(pbuf+8));
352 mnr = le64_to_cpu(*(__le64 *)(pbuf+16)); 320 mnr = le64_to_cpu(*(__le64 *)(pbuf+16));
353 inode->i_rdev = MKDEV(mjr, mnr); 321 fattr->cf_rdev = MKDEV(mjr, mnr);
354 } 322 }
355 } else if (memcmp("IntxCHR", pbuf, 8) == 0) { 323 } else if (memcmp("IntxCHR", pbuf, 8) == 0) {
356 cFYI(1, ("Char device")); 324 cFYI(1, ("Char device"));
357 inode->i_mode |= S_IFCHR; 325 fattr->cf_mode |= S_IFCHR;
326 fattr->cf_dtype = DT_CHR;
358 if (bytes_read == 24) { 327 if (bytes_read == 24) {
359 /* we have enough to decode dev num */ 328 /* we have enough to decode dev num */
360 __u64 mjr; /* major */ 329 __u64 mjr; /* major */
361 __u64 mnr; /* minor */ 330 __u64 mnr; /* minor */
362 mjr = le64_to_cpu(*(__le64 *)(pbuf+8)); 331 mjr = le64_to_cpu(*(__le64 *)(pbuf+8));
363 mnr = le64_to_cpu(*(__le64 *)(pbuf+16)); 332 mnr = le64_to_cpu(*(__le64 *)(pbuf+16));
364 inode->i_rdev = MKDEV(mjr, mnr); 333 fattr->cf_rdev = MKDEV(mjr, mnr);
365 } 334 }
366 } else if (memcmp("IntxLNK", pbuf, 7) == 0) { 335 } else if (memcmp("IntxLNK", pbuf, 7) == 0) {
367 cFYI(1, ("Symlink")); 336 cFYI(1, ("Symlink"));
368 inode->i_mode |= S_IFLNK; 337 fattr->cf_mode |= S_IFLNK;
338 fattr->cf_dtype = DT_LNK;
369 } else { 339 } else {
370 inode->i_mode |= S_IFREG; /* file? */ 340 fattr->cf_mode |= S_IFREG; /* file? */
341 fattr->cf_dtype = DT_REG;
371 rc = -EOPNOTSUPP; 342 rc = -EOPNOTSUPP;
372 } 343 }
373 } else { 344 } else {
374 inode->i_mode |= S_IFREG; /* then it is a file */ 345 fattr->cf_mode |= S_IFREG; /* then it is a file */
346 fattr->cf_dtype = DT_REG;
375 rc = -EOPNOTSUPP; /* or some unknown SFU type */ 347 rc = -EOPNOTSUPP; /* or some unknown SFU type */
376 } 348 }
377 CIFSSMBClose(xid, pTcon, netfid); 349 CIFSSMBClose(xid, pTcon, netfid);
@@ -381,9 +353,13 @@ static int decode_sfu_inode(struct inode *inode, __u64 size,
381 353
382#define SFBITS_MASK (S_ISVTX | S_ISGID | S_ISUID) /* SETFILEBITS valid bits */ 354#define SFBITS_MASK (S_ISVTX | S_ISGID | S_ISUID) /* SETFILEBITS valid bits */
383 355
384static int get_sfu_mode(struct inode *inode, 356/*
385 const unsigned char *path, 357 * Fetch mode bits as provided by SFU.
386 struct cifs_sb_info *cifs_sb, int xid) 358 *
359 * FIXME: Doesn't this clobber the type bit we got from cifs_sfu_type ?
360 */
361static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path,
362 struct cifs_sb_info *cifs_sb, int xid)
387{ 363{
388#ifdef CONFIG_CIFS_XATTR 364#ifdef CONFIG_CIFS_XATTR
389 ssize_t rc; 365 ssize_t rc;
@@ -391,68 +367,80 @@ static int get_sfu_mode(struct inode *inode,
391 __u32 mode; 367 __u32 mode;
392 368
393 rc = CIFSSMBQueryEA(xid, cifs_sb->tcon, path, "SETFILEBITS", 369 rc = CIFSSMBQueryEA(xid, cifs_sb->tcon, path, "SETFILEBITS",
394 ea_value, 4 /* size of buf */, cifs_sb->local_nls, 370 ea_value, 4 /* size of buf */, cifs_sb->local_nls,
395 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 371 cifs_sb->mnt_cifs_flags &
372 CIFS_MOUNT_MAP_SPECIAL_CHR);
396 if (rc < 0) 373 if (rc < 0)
397 return (int)rc; 374 return (int)rc;
398 else if (rc > 3) { 375 else if (rc > 3) {
399 mode = le32_to_cpu(*((__le32 *)ea_value)); 376 mode = le32_to_cpu(*((__le32 *)ea_value));
400 inode->i_mode &= ~SFBITS_MASK; 377 fattr->cf_mode &= ~SFBITS_MASK;
401 cFYI(1, ("special bits 0%o org mode 0%o", mode, inode->i_mode)); 378 cFYI(1, ("special bits 0%o org mode 0%o", mode,
402 inode->i_mode = (mode & SFBITS_MASK) | inode->i_mode; 379 fattr->cf_mode));
380 fattr->cf_mode = (mode & SFBITS_MASK) | fattr->cf_mode;
403 cFYI(1, ("special mode bits 0%o", mode)); 381 cFYI(1, ("special mode bits 0%o", mode));
404 return 0;
405 } else {
406 return 0;
407 } 382 }
383
384 return 0;
408#else 385#else
409 return -EOPNOTSUPP; 386 return -EOPNOTSUPP;
410#endif 387#endif
411} 388}
412 389
413/* 390/* Fill a cifs_fattr struct with info from FILE_ALL_INFO */
414 * Needed to setup inode data for the directory which is the 391static void
415 * junction to the new submount (ie to setup the fake directory 392cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
416 * which represents a DFS referral) 393 struct cifs_sb_info *cifs_sb, bool adjust_tz)
417 */
418static void fill_fake_finddata(FILE_ALL_INFO *pfnd_dat,
419 struct super_block *sb)
420{ 394{
421 memset(pfnd_dat, 0, sizeof(FILE_ALL_INFO)); 395 memset(fattr, 0, sizeof(*fattr));
422 396 fattr->cf_cifsattrs = le32_to_cpu(info->Attributes);
423/* __le64 pfnd_dat->AllocationSize = cpu_to_le64(0); 397 if (info->DeletePending)
424 __le64 pfnd_dat->EndOfFile = cpu_to_le64(0); 398 fattr->cf_flags |= CIFS_FATTR_DELETE_PENDING;
425 __u8 pfnd_dat->DeletePending = 0; 399
426 __u8 pfnd_data->Directory = 0; 400 if (info->LastAccessTime)
427 __le32 pfnd_dat->EASize = 0; 401 fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
428 __u64 pfnd_dat->IndexNumber = 0; 402 else
429 __u64 pfnd_dat->IndexNumber1 = 0; */ 403 fattr->cf_atime = CURRENT_TIME;
430 pfnd_dat->CreationTime = 404
431 cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); 405 fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime);
432 pfnd_dat->LastAccessTime = 406 fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime);
433 cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); 407
434 pfnd_dat->LastWriteTime = 408 if (adjust_tz) {
435 cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); 409 fattr->cf_ctime.tv_sec += cifs_sb->tcon->ses->server->timeAdj;
436 pfnd_dat->ChangeTime = 410 fattr->cf_mtime.tv_sec += cifs_sb->tcon->ses->server->timeAdj;
437 cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); 411 }
438 pfnd_dat->Attributes = cpu_to_le32(ATTR_DIRECTORY); 412
439 pfnd_dat->NumberOfLinks = cpu_to_le32(2); 413 fattr->cf_eof = le64_to_cpu(info->EndOfFile);
414 fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
415
416 if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
417 fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
418 fattr->cf_dtype = DT_DIR;
419 } else {
420 fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
421 fattr->cf_dtype = DT_REG;
422
423 /* clear write bits if ATTR_READONLY is set */
424 if (fattr->cf_cifsattrs & ATTR_READONLY)
425 fattr->cf_mode &= ~(S_IWUGO);
426 }
427
428 fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
429
430 fattr->cf_uid = cifs_sb->mnt_uid;
431 fattr->cf_gid = cifs_sb->mnt_gid;
440} 432}
441 433
442int cifs_get_inode_info(struct inode **pinode, 434int cifs_get_inode_info(struct inode **pinode,
443 const unsigned char *full_path, FILE_ALL_INFO *pfindData, 435 const unsigned char *full_path, FILE_ALL_INFO *pfindData,
444 struct super_block *sb, int xid, const __u16 *pfid) 436 struct super_block *sb, int xid, const __u16 *pfid)
445{ 437{
446 int rc = 0; 438 int rc = 0, tmprc;
447 __u32 attr;
448 struct cifsInodeInfo *cifsInfo;
449 struct cifsTconInfo *pTcon; 439 struct cifsTconInfo *pTcon;
450 struct inode *inode;
451 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 440 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
452 char *buf = NULL; 441 char *buf = NULL;
453 bool adjustTZ = false; 442 bool adjustTZ = false;
454 bool is_dfs_referral = false; 443 struct cifs_fattr fattr;
455 umode_t default_mode;
456 444
457 pTcon = cifs_sb->tcon; 445 pTcon = cifs_sb->tcon;
458 cFYI(1, ("Getting info on %s", full_path)); 446 cFYI(1, ("Getting info on %s", full_path));
@@ -487,163 +475,85 @@ int cifs_get_inode_info(struct inode **pinode,
487 adjustTZ = true; 475 adjustTZ = true;
488 } 476 }
489 } 477 }
490 /* dump_mem("\nQPathInfo return data",&findData, sizeof(findData)); */ 478
491 if (rc == -EREMOTE) { 479 if (!rc) {
492 is_dfs_referral = true; 480 cifs_all_info_to_fattr(&fattr, (FILE_ALL_INFO *) pfindData,
493 fill_fake_finddata(pfindData, sb); 481 cifs_sb, adjustTZ);
482 } else if (rc == -EREMOTE) {
483 cifs_create_dfs_fattr(&fattr, sb);
494 rc = 0; 484 rc = 0;
495 } else if (rc) 485 } else {
496 goto cgii_exit; 486 goto cgii_exit;
487 }
497 488
498 attr = le32_to_cpu(pfindData->Attributes); 489 /*
499 490 * If an inode wasn't passed in, then get the inode number
500 /* get new inode */ 491 *
492 * Is an i_ino of zero legal? Can we use that to check if the server
493 * supports returning inode numbers? Are there other sanity checks we
494 * can use to ensure that the server is really filling in that field?
495 *
496 * We can not use the IndexNumber field by default from Windows or
497 * Samba (in ALL_INFO buf) but we can request it explicitly. The SNIA
498 * CIFS spec claims that this value is unique within the scope of a
499 * share, and the windows docs hint that it's actually unique
500 * per-machine.
501 *
502 * There may be higher info levels that work but are there Windows
503 * server or network appliances for which IndexNumber field is not
504 * guaranteed unique?
505 */
501 if (*pinode == NULL) { 506 if (*pinode == NULL) {
502 __u64 inode_num;
503 __u64 *pinum = &inode_num;
504
505 /* Is an i_ino of zero legal? Can we use that to check
506 if the server supports returning inode numbers? Are
507 there other sanity checks we can use to ensure that
508 the server is really filling in that field? */
509
510 /* We can not use the IndexNumber field by default from
511 Windows or Samba (in ALL_INFO buf) but we can request
512 it explicitly. It may not be unique presumably if
513 the server has multiple devices mounted under one share */
514
515 /* There may be higher info levels that work but are
516 there Windows server or network appliances for which
517 IndexNumber field is not guaranteed unique? */
518
519 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { 507 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
520 int rc1 = 0; 508 int rc1 = 0;
521 509
522 rc1 = CIFSGetSrvInodeNumber(xid, pTcon, 510 rc1 = CIFSGetSrvInodeNumber(xid, pTcon,
523 full_path, pinum, 511 full_path, &fattr.cf_uniqueid,
524 cifs_sb->local_nls, 512 cifs_sb->local_nls,
525 cifs_sb->mnt_cifs_flags & 513 cifs_sb->mnt_cifs_flags &
526 CIFS_MOUNT_MAP_SPECIAL_CHR); 514 CIFS_MOUNT_MAP_SPECIAL_CHR);
527 if (rc1) { 515 if (rc1) {
528 cFYI(1, ("GetSrvInodeNum rc %d", rc1)); 516 cFYI(1, ("GetSrvInodeNum rc %d", rc1));
529 pinum = NULL; 517 fattr.cf_uniqueid = iunique(sb, ROOT_I);
530 /* BB EOPNOSUPP disable SERVER_INUM? */ 518 /* disable serverino if call not supported */
519 if (rc1 == -EINVAL)
520 cifs_sb->mnt_cifs_flags &=
521 ~CIFS_MOUNT_SERVER_INUM;
531 } 522 }
532 } else { 523 } else {
533 pinum = NULL; 524 fattr.cf_uniqueid = iunique(sb, ROOT_I);
534 }
535
536 *pinode = cifs_new_inode(sb, pinum);
537 if (*pinode == NULL) {
538 rc = -ENOMEM;
539 goto cgii_exit;
540 } 525 }
541 }
542 inode = *pinode;
543 cifsInfo = CIFS_I(inode);
544 cifsInfo->cifsAttrs = attr;
545 cifsInfo->delete_pending = pfindData->DeletePending ? true : false;
546 cFYI(1, ("Old time %ld", cifsInfo->time));
547 cifsInfo->time = jiffies;
548 cFYI(1, ("New time %ld", cifsInfo->time));
549
550 /* blksize needs to be multiple of two. So safer to default to
551 blksize and blkbits set in superblock so 2**blkbits and blksize
552 will match rather than setting to:
553 (pTcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE) & 0xFFFFFE00;*/
554
555 /* Linux can not store file creation time so ignore it */
556 if (pfindData->LastAccessTime)
557 inode->i_atime = cifs_NTtimeToUnix(pfindData->LastAccessTime);
558 else /* do not need to use current_fs_time - time not stored */
559 inode->i_atime = CURRENT_TIME;
560 inode->i_mtime = cifs_NTtimeToUnix(pfindData->LastWriteTime);
561 inode->i_ctime = cifs_NTtimeToUnix(pfindData->ChangeTime);
562 cFYI(DBG2, ("Attributes came in as 0x%x", attr));
563 if (adjustTZ && (pTcon->ses) && (pTcon->ses->server)) {
564 inode->i_ctime.tv_sec += pTcon->ses->server->timeAdj;
565 inode->i_mtime.tv_sec += pTcon->ses->server->timeAdj;
566 }
567
568 /* get default inode mode */
569 if (attr & ATTR_DIRECTORY)
570 default_mode = cifs_sb->mnt_dir_mode;
571 else
572 default_mode = cifs_sb->mnt_file_mode;
573
574 /* set permission bits */
575 if (atomic_read(&cifsInfo->inUse) == 0 ||
576 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
577 inode->i_mode = default_mode;
578 else {
579 /* just reenable write bits if !ATTR_READONLY */
580 if ((inode->i_mode & S_IWUGO) == 0 &&
581 (attr & ATTR_READONLY) == 0)
582 inode->i_mode |= (S_IWUGO & default_mode);
583
584 inode->i_mode &= ~S_IFMT;
585 }
586 /* clear write bits if ATTR_READONLY is set */
587 if (attr & ATTR_READONLY)
588 inode->i_mode &= ~S_IWUGO;
589
590 /* set inode type */
591 if ((attr & ATTR_SYSTEM) &&
592 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) {
593 /* no need to fix endianness on 0 */
594 if (pfindData->EndOfFile == 0)
595 inode->i_mode |= S_IFIFO;
596 else if (decode_sfu_inode(inode,
597 le64_to_cpu(pfindData->EndOfFile),
598 full_path, cifs_sb, xid))
599 cFYI(1, ("unknown SFU file type\n"));
600 } else { 526 } else {
601 if (attr & ATTR_DIRECTORY) 527 fattr.cf_uniqueid = CIFS_I(*pinode)->uniqueid;
602 inode->i_mode |= S_IFDIR;
603 else
604 inode->i_mode |= S_IFREG;
605 } 528 }
606 529
607 cifsInfo->server_eof = le64_to_cpu(pfindData->EndOfFile); 530 /* query for SFU type info if supported and needed */
608 spin_lock(&inode->i_lock); 531 if (fattr.cf_cifsattrs & ATTR_SYSTEM &&
609 if (is_size_safe_to_change(cifsInfo, cifsInfo->server_eof)) { 532 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
610 /* can not safely shrink the file size here if the 533 tmprc = cifs_sfu_type(&fattr, full_path, cifs_sb, xid);
611 client is writing to it due to potential races */ 534 if (tmprc)
612 i_size_write(inode, cifsInfo->server_eof); 535 cFYI(1, ("cifs_sfu_type failed: %d", tmprc));
613
614 /* 512 bytes (2**9) is the fake blocksize that must be
615 used for this calculation */
616 inode->i_blocks = (512 - 1 + le64_to_cpu(
617 pfindData->AllocationSize)) >> 9;
618 } 536 }
619 spin_unlock(&inode->i_lock);
620 537
621 inode->i_nlink = le32_to_cpu(pfindData->NumberOfLinks);
622
623 /* BB fill in uid and gid here? with help from winbind?
624 or retrieve from NTFS stream extended attribute */
625#ifdef CONFIG_CIFS_EXPERIMENTAL 538#ifdef CONFIG_CIFS_EXPERIMENTAL
626 /* fill in 0777 bits from ACL */ 539 /* fill in 0777 bits from ACL */
627 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { 540 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
628 cFYI(1, ("Getting mode bits from ACL")); 541 cFYI(1, ("Getting mode bits from ACL"));
629 acl_to_uid_mode(cifs_sb, inode, full_path, pfid); 542 cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path, pfid);
630 } 543 }
631#endif 544#endif
632 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
633 /* fill in remaining high mode bits e.g. SUID, VTX */
634 get_sfu_mode(inode, full_path, cifs_sb, xid);
635 } else if (atomic_read(&cifsInfo->inUse) == 0) {
636 inode->i_uid = cifs_sb->mnt_uid;
637 inode->i_gid = cifs_sb->mnt_gid;
638 /* set so we do not keep refreshing these fields with
639 bad data after user has changed them in memory */
640 atomic_set(&cifsInfo->inUse, 1);
641 }
642
643 cifs_set_ops(inode, is_dfs_referral);
644
645 545
546 /* fill in remaining high mode bits e.g. SUID, VTX */
547 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
548 cifs_sfu_mode(&fattr, full_path, cifs_sb, xid);
646 549
550 if (!*pinode) {
551 *pinode = cifs_iget(sb, &fattr);
552 if (!*pinode)
553 rc = -ENOMEM;
554 } else {
555 cifs_fattr_to_inode(*pinode, &fattr);
556 }
647 557
648cgii_exit: 558cgii_exit:
649 kfree(buf); 559 kfree(buf);
@@ -695,33 +605,78 @@ char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb)
695 return full_path; 605 return full_path;
696} 606}
697 607
608static int
609cifs_find_inode(struct inode *inode, void *opaque)
610{
611 struct cifs_fattr *fattr = (struct cifs_fattr *) opaque;
612
613 if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
614 return 0;
615
616 return 1;
617}
618
619static int
620cifs_init_inode(struct inode *inode, void *opaque)
621{
622 struct cifs_fattr *fattr = (struct cifs_fattr *) opaque;
623
624 CIFS_I(inode)->uniqueid = fattr->cf_uniqueid;
625 return 0;
626}
627
628/* Given fattrs, get a corresponding inode */
629struct inode *
630cifs_iget(struct super_block *sb, struct cifs_fattr *fattr)
631{
632 unsigned long hash;
633 struct inode *inode;
634
635 cFYI(1, ("looking for uniqueid=%llu", fattr->cf_uniqueid));
636
637 /* hash down to 32-bits on 32-bit arch */
638 hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid);
639
640 inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr);
641
642 /* we have fattrs in hand, update the inode */
643 if (inode) {
644 cifs_fattr_to_inode(inode, fattr);
645 if (sb->s_flags & MS_NOATIME)
646 inode->i_flags |= S_NOATIME | S_NOCMTIME;
647 if (inode->i_state & I_NEW) {
648 inode->i_ino = hash;
649 unlock_new_inode(inode);
650 }
651 }
652
653 return inode;
654}
655
698/* gets root inode */ 656/* gets root inode */
699struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) 657struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
700{ 658{
701 int xid; 659 int xid;
702 struct cifs_sb_info *cifs_sb; 660 struct cifs_sb_info *cifs_sb;
703 struct inode *inode; 661 struct inode *inode = NULL;
704 long rc; 662 long rc;
705 char *full_path; 663 char *full_path;
706 664
707 inode = iget_locked(sb, ino); 665 cifs_sb = CIFS_SB(sb);
708 if (!inode)
709 return ERR_PTR(-ENOMEM);
710 if (!(inode->i_state & I_NEW))
711 return inode;
712
713 cifs_sb = CIFS_SB(inode->i_sb);
714 full_path = cifs_build_path_to_root(cifs_sb); 666 full_path = cifs_build_path_to_root(cifs_sb);
715 if (full_path == NULL) 667 if (full_path == NULL)
716 return ERR_PTR(-ENOMEM); 668 return ERR_PTR(-ENOMEM);
717 669
718 xid = GetXid(); 670 xid = GetXid();
719 if (cifs_sb->tcon->unix_ext) 671 if (cifs_sb->tcon->unix_ext)
720 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, 672 rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid);
721 xid);
722 else 673 else
723 rc = cifs_get_inode_info(&inode, full_path, NULL, inode->i_sb, 674 rc = cifs_get_inode_info(&inode, full_path, NULL, sb,
724 xid, NULL); 675 xid, NULL);
676
677 if (!inode)
678 return ERR_PTR(-ENOMEM);
679
725 if (rc && cifs_sb->tcon->ipc) { 680 if (rc && cifs_sb->tcon->ipc) {
726 cFYI(1, ("ipc connection - fake read inode")); 681 cFYI(1, ("ipc connection - fake read inode"));
727 inode->i_mode |= S_IFDIR; 682 inode->i_mode |= S_IFDIR;
@@ -737,7 +692,6 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
737 return ERR_PTR(rc); 692 return ERR_PTR(rc);
738 } 693 }
739 694
740 unlock_new_inode(inode);
741 695
742 kfree(full_path); 696 kfree(full_path);
743 /* can not call macro FreeXid here since in a void func 697 /* can not call macro FreeXid here since in a void func
@@ -988,8 +942,9 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
988 * sb->s_vfs_rename_mutex here */ 942 * sb->s_vfs_rename_mutex here */
989 full_path = build_path_from_dentry(dentry); 943 full_path = build_path_from_dentry(dentry);
990 if (full_path == NULL) { 944 if (full_path == NULL) {
945 rc = -ENOMEM;
991 FreeXid(xid); 946 FreeXid(xid);
992 return -ENOMEM; 947 return rc;
993 } 948 }
994 949
995 if ((tcon->ses->capabilities & CAP_UNIX) && 950 if ((tcon->ses->capabilities & CAP_UNIX) &&
@@ -1062,44 +1017,6 @@ out_reval:
1062 return rc; 1017 return rc;
1063} 1018}
1064 1019
1065void posix_fill_in_inode(struct inode *tmp_inode,
1066 FILE_UNIX_BASIC_INFO *pData, int isNewInode)
1067{
1068 struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
1069 loff_t local_size;
1070 struct timespec local_mtime;
1071
1072 cifsInfo->time = jiffies;
1073 atomic_inc(&cifsInfo->inUse);
1074
1075 /* save mtime and size */
1076 local_mtime = tmp_inode->i_mtime;
1077 local_size = tmp_inode->i_size;
1078
1079 cifs_unix_info_to_inode(tmp_inode, pData, 1);
1080 cifs_set_ops(tmp_inode, false);
1081
1082 if (!S_ISREG(tmp_inode->i_mode))
1083 return;
1084
1085 /*
1086 * No sense invalidating pages for new inode
1087 * since we we have not started caching
1088 * readahead file data yet.
1089 */
1090 if (isNewInode)
1091 return;
1092
1093 if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) &&
1094 (local_size == tmp_inode->i_size)) {
1095 cFYI(1, ("inode exists but unchanged"));
1096 } else {
1097 /* file may have changed on server */
1098 cFYI(1, ("invalidate inode, readdir detected change"));
1099 invalidate_remote_inode(tmp_inode);
1100 }
1101}
1102
1103int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) 1020int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1104{ 1021{
1105 int rc = 0, tmprc; 1022 int rc = 0, tmprc;
@@ -1108,6 +1025,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1108 struct cifsTconInfo *pTcon; 1025 struct cifsTconInfo *pTcon;
1109 char *full_path = NULL; 1026 char *full_path = NULL;
1110 struct inode *newinode = NULL; 1027 struct inode *newinode = NULL;
1028 struct cifs_fattr fattr;
1111 1029
1112 cFYI(1, ("In cifs_mkdir, mode = 0x%x inode = 0x%p", mode, inode)); 1030 cFYI(1, ("In cifs_mkdir, mode = 0x%x inode = 0x%p", mode, inode));
1113 1031
@@ -1118,8 +1036,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1118 1036
1119 full_path = build_path_from_dentry(direntry); 1037 full_path = build_path_from_dentry(direntry);
1120 if (full_path == NULL) { 1038 if (full_path == NULL) {
1039 rc = -ENOMEM;
1121 FreeXid(xid); 1040 FreeXid(xid);
1122 return -ENOMEM; 1041 return rc;
1123 } 1042 }
1124 1043
1125 if ((pTcon->ses->capabilities & CAP_UNIX) && 1044 if ((pTcon->ses->capabilities & CAP_UNIX) &&
@@ -1146,7 +1065,6 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1146 cFYI(1, ("posix mkdir returned 0x%x", rc)); 1065 cFYI(1, ("posix mkdir returned 0x%x", rc));
1147 d_drop(direntry); 1066 d_drop(direntry);
1148 } else { 1067 } else {
1149 __u64 unique_id;
1150 if (pInfo->Type == cpu_to_le32(-1)) { 1068 if (pInfo->Type == cpu_to_le32(-1)) {
1151 /* no return info, go query for it */ 1069 /* no return info, go query for it */
1152 kfree(pInfo); 1070 kfree(pInfo);
@@ -1160,20 +1078,15 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1160 else 1078 else
1161 direntry->d_op = &cifs_dentry_ops; 1079 direntry->d_op = &cifs_dentry_ops;
1162 1080
1163 unique_id = le64_to_cpu(pInfo->UniqueId); 1081 cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb);
1164 newinode = cifs_new_inode(inode->i_sb, &unique_id); 1082 newinode = cifs_iget(inode->i_sb, &fattr);
1165 if (newinode == NULL) { 1083 if (!newinode) {
1166 kfree(pInfo); 1084 kfree(pInfo);
1167 goto mkdir_get_info; 1085 goto mkdir_get_info;
1168 } 1086 }
1169 1087
1170 newinode->i_nlink = 2;
1171 d_instantiate(direntry, newinode); 1088 d_instantiate(direntry, newinode);
1172 1089
1173 /* we already checked in POSIXCreate whether
1174 frame was long enough */
1175 posix_fill_in_inode(direntry->d_inode,
1176 pInfo, 1 /* NewInode */);
1177#ifdef CONFIG_CIFS_DEBUG2 1090#ifdef CONFIG_CIFS_DEBUG2
1178 cFYI(1, ("instantiated dentry %p %s to inode %p", 1091 cFYI(1, ("instantiated dentry %p %s to inode %p",
1179 direntry, direntry->d_name.name, newinode)); 1092 direntry, direntry->d_name.name, newinode));
@@ -1236,10 +1149,10 @@ mkdir_get_info:
1236 args.uid = NO_CHANGE_64; 1149 args.uid = NO_CHANGE_64;
1237 args.gid = NO_CHANGE_64; 1150 args.gid = NO_CHANGE_64;
1238 } 1151 }
1239 CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args, 1152 CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, &args,
1240 cifs_sb->local_nls, 1153 cifs_sb->local_nls,
1241 cifs_sb->mnt_cifs_flags & 1154 cifs_sb->mnt_cifs_flags &
1242 CIFS_MOUNT_MAP_SPECIAL_CHR); 1155 CIFS_MOUNT_MAP_SPECIAL_CHR);
1243 } else { 1156 } else {
1244 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && 1157 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) &&
1245 (mode & S_IWUGO) == 0) { 1158 (mode & S_IWUGO) == 0) {
@@ -1303,8 +1216,9 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
1303 1216
1304 full_path = build_path_from_dentry(direntry); 1217 full_path = build_path_from_dentry(direntry);
1305 if (full_path == NULL) { 1218 if (full_path == NULL) {
1219 rc = -ENOMEM;
1306 FreeXid(xid); 1220 FreeXid(xid);
1307 return -ENOMEM; 1221 return rc;
1308 } 1222 }
1309 1223
1310 rc = CIFSSMBRmDir(xid, pTcon, full_path, cifs_sb->local_nls, 1224 rc = CIFSSMBRmDir(xid, pTcon, full_path, cifs_sb->local_nls,
@@ -1508,8 +1422,9 @@ int cifs_revalidate(struct dentry *direntry)
1508 since that would deadlock */ 1422 since that would deadlock */
1509 full_path = build_path_from_dentry(direntry); 1423 full_path = build_path_from_dentry(direntry);
1510 if (full_path == NULL) { 1424 if (full_path == NULL) {
1425 rc = -ENOMEM;
1511 FreeXid(xid); 1426 FreeXid(xid);
1512 return -ENOMEM; 1427 return rc;
1513 } 1428 }
1514 cFYI(1, ("Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld " 1429 cFYI(1, ("Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld "
1515 "jiffies %ld", full_path, direntry->d_inode, 1430 "jiffies %ld", full_path, direntry->d_inode,
@@ -1618,6 +1533,7 @@ int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1618 if (!err) { 1533 if (!err) {
1619 generic_fillattr(dentry->d_inode, stat); 1534 generic_fillattr(dentry->d_inode, stat);
1620 stat->blksize = CIFS_MAX_MSGSIZE; 1535 stat->blksize = CIFS_MAX_MSGSIZE;
1536 stat->ino = CIFS_I(dentry->d_inode)->uniqueid;
1621 } 1537 }
1622 return err; 1538 return err;
1623} 1539}
@@ -1782,6 +1698,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1782 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1698 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1783 struct cifsTconInfo *pTcon = cifs_sb->tcon; 1699 struct cifsTconInfo *pTcon = cifs_sb->tcon;
1784 struct cifs_unix_set_info_args *args = NULL; 1700 struct cifs_unix_set_info_args *args = NULL;
1701 struct cifsFileInfo *open_file;
1785 1702
1786 cFYI(1, ("setattr_unix on file %s attrs->ia_valid=0x%x", 1703 cFYI(1, ("setattr_unix on file %s attrs->ia_valid=0x%x",
1787 direntry->d_name.name, attrs->ia_valid)); 1704 direntry->d_name.name, attrs->ia_valid));
@@ -1868,10 +1785,18 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1868 args->ctime = NO_CHANGE_64; 1785 args->ctime = NO_CHANGE_64;
1869 1786
1870 args->device = 0; 1787 args->device = 0;
1871 rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, args, 1788 open_file = find_writable_file(cifsInode);
1872 cifs_sb->local_nls, 1789 if (open_file) {
1873 cifs_sb->mnt_cifs_flags & 1790 u16 nfid = open_file->netfid;
1874 CIFS_MOUNT_MAP_SPECIAL_CHR); 1791 u32 npid = open_file->pid;
1792 rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid);
1793 atomic_dec(&open_file->wrtPending);
1794 } else {
1795 rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args,
1796 cifs_sb->local_nls,
1797 cifs_sb->mnt_cifs_flags &
1798 CIFS_MOUNT_MAP_SPECIAL_CHR);
1799 }
1875 1800
1876 if (!rc) 1801 if (!rc)
1877 rc = inode_setattr(inode, attrs); 1802 rc = inode_setattr(inode, attrs);
@@ -1911,8 +1836,9 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
1911 1836
1912 full_path = build_path_from_dentry(direntry); 1837 full_path = build_path_from_dentry(direntry);
1913 if (full_path == NULL) { 1838 if (full_path == NULL) {
1839 rc = -ENOMEM;
1914 FreeXid(xid); 1840 FreeXid(xid);
1915 return -ENOMEM; 1841 return rc;
1916 } 1842 }
1917 1843
1918 /* 1844 /*
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index cd83c53fcbb5..fc1e0487eaee 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -172,8 +172,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
172 full_path = build_path_from_dentry(direntry); 172 full_path = build_path_from_dentry(direntry);
173 173
174 if (full_path == NULL) { 174 if (full_path == NULL) {
175 rc = -ENOMEM;
175 FreeXid(xid); 176 FreeXid(xid);
176 return -ENOMEM; 177 return rc;
177 } 178 }
178 179
179 cFYI(1, ("Full path: %s", full_path)); 180 cFYI(1, ("Full path: %s", full_path));
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 32d6baa0a54f..bd6d6895730d 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -133,10 +133,12 @@ static const struct smb_to_posix_error mapping_table_ERRHRD[] = {
133 {0, 0} 133 {0, 0}
134}; 134};
135 135
136/* Convert string containing dotted ip address to binary form */ 136/*
137/* returns 0 if invalid address */ 137 * Convert a string containing text IPv4 or IPv6 address to binary form.
138 138 *
139int 139 * Returns 0 on failure.
140 */
141static int
140cifs_inet_pton(const int address_family, const char *cp, void *dst) 142cifs_inet_pton(const int address_family, const char *cp, void *dst)
141{ 143{
142 int ret = 0; 144 int ret = 0;
@@ -153,6 +155,52 @@ cifs_inet_pton(const int address_family, const char *cp, void *dst)
153 return ret; 155 return ret;
154} 156}
155 157
158/*
159 * Try to convert a string to an IPv4 address and then attempt to convert
160 * it to an IPv6 address if that fails. Set the family field if either
161 * succeeds. If it's an IPv6 address and it has a '%' sign in it, try to
162 * treat the part following it as a numeric sin6_scope_id.
163 *
164 * Returns 0 on failure.
165 */
166int
167cifs_convert_address(char *src, void *dst)
168{
169 int rc;
170 char *pct, *endp;
171 struct sockaddr_in *s4 = (struct sockaddr_in *) dst;
172 struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst;
173
174 /* IPv4 address */
175 if (cifs_inet_pton(AF_INET, src, &s4->sin_addr.s_addr)) {
176 s4->sin_family = AF_INET;
177 return 1;
178 }
179
180 /* temporarily terminate string */
181 pct = strchr(src, '%');
182 if (pct)
183 *pct = '\0';
184
185 rc = cifs_inet_pton(AF_INET6, src, &s6->sin6_addr.s6_addr);
186
187 /* repair temp termination (if any) and make pct point to scopeid */
188 if (pct)
189 *pct++ = '%';
190
191 if (!rc)
192 return rc;
193
194 s6->sin6_family = AF_INET6;
195 if (pct) {
196 s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0);
197 if (!*pct || *endp)
198 return 0;
199 }
200
201 return rc;
202}
203
156/***************************************************************************** 204/*****************************************************************************
157convert a NT status code to a dos class/code 205convert a NT status code to a dos class/code
158 *****************************************************************************/ 206 *****************************************************************************/
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 86d0055dc529..f823a4a208a7 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -63,374 +63,123 @@ static inline void dump_cifs_file_struct(struct file *file, char *label)
63} 63}
64#endif /* DEBUG2 */ 64#endif /* DEBUG2 */
65 65
66/* Returns 1 if new inode created, 2 if both dentry and inode were */ 66/*
67/* Might check in the future if inode number changed so we can rehash inode */ 67 * Find the dentry that matches "name". If there isn't one, create one. If it's
68static int 68 * a negative dentry or the uniqueid changed, then drop it and recreate it.
69construct_dentry(struct qstr *qstring, struct file *file, 69 */
70 struct inode **ptmp_inode, struct dentry **pnew_dentry, 70static struct dentry *
71 __u64 *inum) 71cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
72 struct cifs_fattr *fattr)
72{ 73{
73 struct dentry *tmp_dentry = NULL; 74 struct dentry *dentry, *alias;
74 struct super_block *sb = file->f_path.dentry->d_sb; 75 struct inode *inode;
75 int rc = 0; 76 struct super_block *sb = parent->d_inode->i_sb;
77
78 cFYI(1, ("For %s", name->name));
79
80 dentry = d_lookup(parent, name);
81 if (dentry) {
82 /* FIXME: check for inode number changes? */
83 if (dentry->d_inode != NULL)
84 return dentry;
85 d_drop(dentry);
86 dput(dentry);
87 }
76 88
77 cFYI(1, ("For %s", qstring->name)); 89 dentry = d_alloc(parent, name);
78 90 if (dentry == NULL)
79 qstring->hash = full_name_hash(qstring->name, qstring->len); 91 return NULL;
80 tmp_dentry = d_lookup(file->f_path.dentry, qstring);
81 if (tmp_dentry) {
82 /* BB: overwrite old name? i.e. tmp_dentry->d_name and
83 * tmp_dentry->d_name.len??
84 */
85 cFYI(0, ("existing dentry with inode 0x%p",
86 tmp_dentry->d_inode));
87 *ptmp_inode = tmp_dentry->d_inode;
88 if (*ptmp_inode == NULL) {
89 *ptmp_inode = cifs_new_inode(sb, inum);
90 if (*ptmp_inode == NULL)
91 return rc;
92 rc = 1;
93 }
94 } else {
95 tmp_dentry = d_alloc(file->f_path.dentry, qstring);
96 if (tmp_dentry == NULL) {
97 cERROR(1, ("Failed allocating dentry"));
98 *ptmp_inode = NULL;
99 return rc;
100 }
101 92
102 if (CIFS_SB(sb)->tcon->nocase) 93 inode = cifs_iget(sb, fattr);
103 tmp_dentry->d_op = &cifs_ci_dentry_ops; 94 if (!inode) {
104 else 95 dput(dentry);
105 tmp_dentry->d_op = &cifs_dentry_ops; 96 return NULL;
97 }
106 98
107 *ptmp_inode = cifs_new_inode(sb, inum); 99 if (CIFS_SB(sb)->tcon->nocase)
108 if (*ptmp_inode == NULL) 100 dentry->d_op = &cifs_ci_dentry_ops;
109 return rc; 101 else
110 rc = 2; 102 dentry->d_op = &cifs_dentry_ops;
103
104 alias = d_materialise_unique(dentry, inode);
105 if (alias != NULL) {
106 dput(dentry);
107 if (IS_ERR(alias))
108 return NULL;
109 dentry = alias;
111 } 110 }
112 111
113 tmp_dentry->d_time = jiffies; 112 return dentry;
114 *pnew_dentry = tmp_dentry;
115 return rc;
116} 113}
117 114
118static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, 115static void
119 char *buf, unsigned int *pobject_type, int isNewInode) 116cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
120{ 117{
121 loff_t local_size; 118 fattr->cf_uid = cifs_sb->mnt_uid;
122 struct timespec local_mtime; 119 fattr->cf_gid = cifs_sb->mnt_gid;
123
124 struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
125 struct cifs_sb_info *cifs_sb = CIFS_SB(tmp_inode->i_sb);
126 __u32 attr;
127 __u64 allocation_size;
128 __u64 end_of_file;
129 umode_t default_mode;
130
131 /* save mtime and size */
132 local_mtime = tmp_inode->i_mtime;
133 local_size = tmp_inode->i_size;
134
135 if (new_buf_type) {
136 FILE_DIRECTORY_INFO *pfindData = (FILE_DIRECTORY_INFO *)buf;
137
138 attr = le32_to_cpu(pfindData->ExtFileAttributes);
139 allocation_size = le64_to_cpu(pfindData->AllocationSize);
140 end_of_file = le64_to_cpu(pfindData->EndOfFile);
141 tmp_inode->i_atime =
142 cifs_NTtimeToUnix(pfindData->LastAccessTime);
143 tmp_inode->i_mtime =
144 cifs_NTtimeToUnix(pfindData->LastWriteTime);
145 tmp_inode->i_ctime =
146 cifs_NTtimeToUnix(pfindData->ChangeTime);
147 } else { /* legacy, OS2 and DOS style */
148 int offset = cifs_sb->tcon->ses->server->timeAdj;
149 FIND_FILE_STANDARD_INFO *pfindData =
150 (FIND_FILE_STANDARD_INFO *)buf;
151
152 tmp_inode->i_mtime = cnvrtDosUnixTm(pfindData->LastWriteDate,
153 pfindData->LastWriteTime,
154 offset);
155 tmp_inode->i_atime = cnvrtDosUnixTm(pfindData->LastAccessDate,
156 pfindData->LastAccessTime,
157 offset);
158 tmp_inode->i_ctime = cnvrtDosUnixTm(pfindData->LastWriteDate,
159 pfindData->LastWriteTime,
160 offset);
161 attr = le16_to_cpu(pfindData->Attributes);
162 allocation_size = le32_to_cpu(pfindData->AllocationSize);
163 end_of_file = le32_to_cpu(pfindData->DataSize);
164 }
165 120
166 /* Linux can not store file creation time unfortunately so ignore it */ 121 if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
167 122 fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
168 cifsInfo->cifsAttrs = attr; 123 fattr->cf_dtype = DT_DIR;
169#ifdef CONFIG_CIFS_EXPERIMENTAL 124 } else {
170 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { 125 fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
171 /* get more accurate mode via ACL - so force inode refresh */ 126 fattr->cf_dtype = DT_REG;
172 cifsInfo->time = 0;
173 } else
174#endif /* CONFIG_CIFS_EXPERIMENTAL */
175 cifsInfo->time = jiffies;
176
177 /* treat dos attribute of read-only as read-only mode bit e.g. 555? */
178 /* 2767 perms - indicate mandatory locking */
179 /* BB fill in uid and gid here? with help from winbind?
180 or retrieve from NTFS stream extended attribute */
181 if (atomic_read(&cifsInfo->inUse) == 0) {
182 tmp_inode->i_uid = cifs_sb->mnt_uid;
183 tmp_inode->i_gid = cifs_sb->mnt_gid;
184 }
185
186 if (attr & ATTR_DIRECTORY)
187 default_mode = cifs_sb->mnt_dir_mode;
188 else
189 default_mode = cifs_sb->mnt_file_mode;
190
191 /* set initial permissions */
192 if ((atomic_read(&cifsInfo->inUse) == 0) ||
193 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
194 tmp_inode->i_mode = default_mode;
195 else {
196 /* just reenable write bits if !ATTR_READONLY */
197 if ((tmp_inode->i_mode & S_IWUGO) == 0 &&
198 (attr & ATTR_READONLY) == 0)
199 tmp_inode->i_mode |= (S_IWUGO & default_mode);
200
201 tmp_inode->i_mode &= ~S_IFMT;
202 } 127 }
203 128
204 /* clear write bits if ATTR_READONLY is set */ 129 if (fattr->cf_cifsattrs & ATTR_READONLY)
205 if (attr & ATTR_READONLY) 130 fattr->cf_mode &= ~S_IWUGO;
206 tmp_inode->i_mode &= ~S_IWUGO;
207 131
208 /* set inode type */ 132 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL &&
209 if ((attr & ATTR_SYSTEM) && 133 fattr->cf_cifsattrs & ATTR_SYSTEM) {
210 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) { 134 if (fattr->cf_eof == 0) {
211 if (end_of_file == 0) { 135 fattr->cf_mode &= ~S_IFMT;
212 tmp_inode->i_mode |= S_IFIFO; 136 fattr->cf_mode |= S_IFIFO;
213 *pobject_type = DT_FIFO; 137 fattr->cf_dtype = DT_FIFO;
214 } else { 138 } else {
215 /* 139 /*
216 * trying to get the type can be slow, so just call 140 * trying to get the type and mode via SFU can be slow,
217 * this a regular file for now, and mark for reval 141 * so just call those regular files for now, and mark
142 * for reval
218 */ 143 */
219 tmp_inode->i_mode |= S_IFREG; 144 fattr->cf_flags |= CIFS_FATTR_NEED_REVAL;
220 *pobject_type = DT_REG;
221 cifsInfo->time = 0;
222 }
223 } else {
224 if (attr & ATTR_DIRECTORY) {
225 tmp_inode->i_mode |= S_IFDIR;
226 *pobject_type = DT_DIR;
227 } else {
228 tmp_inode->i_mode |= S_IFREG;
229 *pobject_type = DT_REG;
230 } 145 }
231 } 146 }
147}
232 148
233 /* can not fill in nlink here as in qpathinfo version and Unx search */ 149void
234 if (atomic_read(&cifsInfo->inUse) == 0) 150cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info,
235 atomic_set(&cifsInfo->inUse, 1); 151 struct cifs_sb_info *cifs_sb)
236 152{
237 cifsInfo->server_eof = end_of_file; 153 memset(fattr, 0, sizeof(*fattr));
238 spin_lock(&tmp_inode->i_lock); 154 fattr->cf_cifsattrs = le32_to_cpu(info->ExtFileAttributes);
239 if (is_size_safe_to_change(cifsInfo, end_of_file)) { 155 fattr->cf_eof = le64_to_cpu(info->EndOfFile);
240 /* can not safely change the file size here if the 156 fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
241 client is writing to it due to potential races */ 157 fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
242 i_size_write(tmp_inode, end_of_file); 158 fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime);
243 159 fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime);
244 /* 512 bytes (2**9) is the fake blocksize that must be used */ 160
245 /* for this calculation, even though the reported blocksize is larger */ 161 cifs_fill_common_info(fattr, cifs_sb);
246 tmp_inode->i_blocks = (512 - 1 + allocation_size) >> 9;
247 }
248 spin_unlock(&tmp_inode->i_lock);
249
250 if (allocation_size < end_of_file)
251 cFYI(1, ("May be sparse file, allocation less than file size"));
252 cFYI(1, ("File Size %ld and blocks %llu",
253 (unsigned long)tmp_inode->i_size,
254 (unsigned long long)tmp_inode->i_blocks));
255 if (S_ISREG(tmp_inode->i_mode)) {
256 cFYI(1, ("File inode"));
257 tmp_inode->i_op = &cifs_file_inode_ops;
258 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
259 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
260 tmp_inode->i_fop = &cifs_file_direct_nobrl_ops;
261 else
262 tmp_inode->i_fop = &cifs_file_direct_ops;
263 } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
264 tmp_inode->i_fop = &cifs_file_nobrl_ops;
265 else
266 tmp_inode->i_fop = &cifs_file_ops;
267
268 if ((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
269 (cifs_sb->tcon->ses->server->maxBuf <
270 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE))
271 tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
272 else
273 tmp_inode->i_data.a_ops = &cifs_addr_ops;
274
275 if (isNewInode)
276 return; /* No sense invalidating pages for new inode
277 since have not started caching readahead file
278 data yet */
279
280 if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) &&
281 (local_size == tmp_inode->i_size)) {
282 cFYI(1, ("inode exists but unchanged"));
283 } else {
284 /* file may have changed on server */
285 cFYI(1, ("invalidate inode, readdir detected change"));
286 invalidate_remote_inode(tmp_inode);
287 }
288 } else if (S_ISDIR(tmp_inode->i_mode)) {
289 cFYI(1, ("Directory inode"));
290 tmp_inode->i_op = &cifs_dir_inode_ops;
291 tmp_inode->i_fop = &cifs_dir_ops;
292 } else if (S_ISLNK(tmp_inode->i_mode)) {
293 cFYI(1, ("Symbolic Link inode"));
294 tmp_inode->i_op = &cifs_symlink_inode_ops;
295 } else {
296 cFYI(1, ("Init special inode"));
297 init_special_inode(tmp_inode, tmp_inode->i_mode,
298 tmp_inode->i_rdev);
299 }
300} 162}
301 163
302static void unix_fill_in_inode(struct inode *tmp_inode, 164void
303 FILE_UNIX_INFO *pfindData, unsigned int *pobject_type, int isNewInode) 165cifs_std_info_to_fattr(struct cifs_fattr *fattr, FIND_FILE_STANDARD_INFO *info,
166 struct cifs_sb_info *cifs_sb)
304{ 167{
305 loff_t local_size; 168 int offset = cifs_sb->tcon->ses->server->timeAdj;
306 struct timespec local_mtime;
307
308 struct cifsInodeInfo *cifsInfo = CIFS_I(tmp_inode);
309 struct cifs_sb_info *cifs_sb = CIFS_SB(tmp_inode->i_sb);
310
311 __u32 type = le32_to_cpu(pfindData->Type);
312 __u64 num_of_bytes = le64_to_cpu(pfindData->NumOfBytes);
313 __u64 end_of_file = le64_to_cpu(pfindData->EndOfFile);
314 cifsInfo->time = jiffies;
315 atomic_inc(&cifsInfo->inUse);
316
317 /* save mtime and size */
318 local_mtime = tmp_inode->i_mtime;
319 local_size = tmp_inode->i_size;
320
321 tmp_inode->i_atime =
322 cifs_NTtimeToUnix(pfindData->LastAccessTime);
323 tmp_inode->i_mtime =
324 cifs_NTtimeToUnix(pfindData->LastModificationTime);
325 tmp_inode->i_ctime =
326 cifs_NTtimeToUnix(pfindData->LastStatusChange);
327
328 tmp_inode->i_mode = le64_to_cpu(pfindData->Permissions);
329 /* since we set the inode type below we need to mask off type
330 to avoid strange results if bits above were corrupt */
331 tmp_inode->i_mode &= ~S_IFMT;
332 if (type == UNIX_FILE) {
333 *pobject_type = DT_REG;
334 tmp_inode->i_mode |= S_IFREG;
335 } else if (type == UNIX_SYMLINK) {
336 *pobject_type = DT_LNK;
337 tmp_inode->i_mode |= S_IFLNK;
338 } else if (type == UNIX_DIR) {
339 *pobject_type = DT_DIR;
340 tmp_inode->i_mode |= S_IFDIR;
341 } else if (type == UNIX_CHARDEV) {
342 *pobject_type = DT_CHR;
343 tmp_inode->i_mode |= S_IFCHR;
344 tmp_inode->i_rdev = MKDEV(le64_to_cpu(pfindData->DevMajor),
345 le64_to_cpu(pfindData->DevMinor) & MINORMASK);
346 } else if (type == UNIX_BLOCKDEV) {
347 *pobject_type = DT_BLK;
348 tmp_inode->i_mode |= S_IFBLK;
349 tmp_inode->i_rdev = MKDEV(le64_to_cpu(pfindData->DevMajor),
350 le64_to_cpu(pfindData->DevMinor) & MINORMASK);
351 } else if (type == UNIX_FIFO) {
352 *pobject_type = DT_FIFO;
353 tmp_inode->i_mode |= S_IFIFO;
354 } else if (type == UNIX_SOCKET) {
355 *pobject_type = DT_SOCK;
356 tmp_inode->i_mode |= S_IFSOCK;
357 } else {
358 /* safest to just call it a file */
359 *pobject_type = DT_REG;
360 tmp_inode->i_mode |= S_IFREG;
361 cFYI(1, ("unknown inode type %d", type));
362 }
363 169
364 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) 170 memset(fattr, 0, sizeof(*fattr));
365 tmp_inode->i_uid = cifs_sb->mnt_uid; 171 fattr->cf_atime = cnvrtDosUnixTm(info->LastAccessDate,
366 else 172 info->LastAccessTime, offset);
367 tmp_inode->i_uid = le64_to_cpu(pfindData->Uid); 173 fattr->cf_ctime = cnvrtDosUnixTm(info->LastWriteDate,
368 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) 174 info->LastWriteTime, offset);
369 tmp_inode->i_gid = cifs_sb->mnt_gid; 175 fattr->cf_mtime = cnvrtDosUnixTm(info->LastWriteDate,
370 else 176 info->LastWriteTime, offset);
371 tmp_inode->i_gid = le64_to_cpu(pfindData->Gid);
372 tmp_inode->i_nlink = le64_to_cpu(pfindData->Nlinks);
373
374 cifsInfo->server_eof = end_of_file;
375 spin_lock(&tmp_inode->i_lock);
376 if (is_size_safe_to_change(cifsInfo, end_of_file)) {
377 /* can not safely change the file size here if the
378 client is writing to it due to potential races */
379 i_size_write(tmp_inode, end_of_file);
380
381 /* 512 bytes (2**9) is the fake blocksize that must be used */
382 /* for this calculation, not the real blocksize */
383 tmp_inode->i_blocks = (512 - 1 + num_of_bytes) >> 9;
384 }
385 spin_unlock(&tmp_inode->i_lock);
386 177
387 if (S_ISREG(tmp_inode->i_mode)) { 178 fattr->cf_cifsattrs = le16_to_cpu(info->Attributes);
388 cFYI(1, ("File inode")); 179 fattr->cf_bytes = le32_to_cpu(info->AllocationSize);
389 tmp_inode->i_op = &cifs_file_inode_ops; 180 fattr->cf_eof = le32_to_cpu(info->DataSize);
390 181
391 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { 182 cifs_fill_common_info(fattr, cifs_sb);
392 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
393 tmp_inode->i_fop = &cifs_file_direct_nobrl_ops;
394 else
395 tmp_inode->i_fop = &cifs_file_direct_ops;
396 } else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
397 tmp_inode->i_fop = &cifs_file_nobrl_ops;
398 else
399 tmp_inode->i_fop = &cifs_file_ops;
400
401 if ((cifs_sb->tcon) && (cifs_sb->tcon->ses) &&
402 (cifs_sb->tcon->ses->server->maxBuf <
403 PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE))
404 tmp_inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
405 else
406 tmp_inode->i_data.a_ops = &cifs_addr_ops;
407
408 if (isNewInode)
409 return; /* No sense invalidating pages for new inode
410 since we have not started caching readahead
411 file data for it yet */
412
413 if (timespec_equal(&tmp_inode->i_mtime, &local_mtime) &&
414 (local_size == tmp_inode->i_size)) {
415 cFYI(1, ("inode exists but unchanged"));
416 } else {
417 /* file may have changed on server */
418 cFYI(1, ("invalidate inode, readdir detected change"));
419 invalidate_remote_inode(tmp_inode);
420 }
421 } else if (S_ISDIR(tmp_inode->i_mode)) {
422 cFYI(1, ("Directory inode"));
423 tmp_inode->i_op = &cifs_dir_inode_ops;
424 tmp_inode->i_fop = &cifs_dir_ops;
425 } else if (S_ISLNK(tmp_inode->i_mode)) {
426 cFYI(1, ("Symbolic Link inode"));
427 tmp_inode->i_op = &cifs_symlink_inode_ops;
428/* tmp_inode->i_fop = *//* do not need to set to anything */
429 } else {
430 cFYI(1, ("Special inode"));
431 init_special_inode(tmp_inode, tmp_inode->i_mode,
432 tmp_inode->i_rdev);
433 }
434} 183}
435 184
436/* BB eventually need to add the following helper function to 185/* BB eventually need to add the following helper function to
@@ -872,7 +621,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
872 len = strnlen(filename, PATH_MAX); 621 len = strnlen(filename, PATH_MAX);
873 } 622 }
874 623
875 *pinum = le64_to_cpu(pFindData->UniqueId); 624 *pinum = le64_to_cpu(pFindData->basic.UniqueId);
876 } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { 625 } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) {
877 FILE_DIRECTORY_INFO *pFindData = 626 FILE_DIRECTORY_INFO *pFindData =
878 (FILE_DIRECTORY_INFO *)current_entry; 627 (FILE_DIRECTORY_INFO *)current_entry;
@@ -932,11 +681,12 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
932 int rc = 0; 681 int rc = 0;
933 struct qstr qstring; 682 struct qstr qstring;
934 struct cifsFileInfo *pCifsF; 683 struct cifsFileInfo *pCifsF;
935 unsigned int obj_type; 684 u64 inum;
936 __u64 inum; 685 ino_t ino;
686 struct super_block *sb;
937 struct cifs_sb_info *cifs_sb; 687 struct cifs_sb_info *cifs_sb;
938 struct inode *tmp_inode;
939 struct dentry *tmp_dentry; 688 struct dentry *tmp_dentry;
689 struct cifs_fattr fattr;
940 690
941 /* get filename and len into qstring */ 691 /* get filename and len into qstring */
942 /* get dentry */ 692 /* get dentry */
@@ -954,60 +704,53 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
954 if (rc != 0) 704 if (rc != 0)
955 return 0; 705 return 0;
956 706
957 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 707 sb = file->f_path.dentry->d_sb;
708 cifs_sb = CIFS_SB(sb);
958 709
959 qstring.name = scratch_buf; 710 qstring.name = scratch_buf;
960 rc = cifs_get_name_from_search_buf(&qstring, pfindEntry, 711 rc = cifs_get_name_from_search_buf(&qstring, pfindEntry,
961 pCifsF->srch_inf.info_level, 712 pCifsF->srch_inf.info_level,
962 pCifsF->srch_inf.unicode, cifs_sb, 713 pCifsF->srch_inf.unicode, cifs_sb,
963 max_len, 714 max_len, &inum /* returned */);
964 &inum /* returned */);
965 715
966 if (rc) 716 if (rc)
967 return rc; 717 return rc;
968 718
969 /* only these two infolevels return valid inode numbers */
970 if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX ||
971 pCifsF->srch_inf.info_level == SMB_FIND_FILE_ID_FULL_DIR_INFO)
972 rc = construct_dentry(&qstring, file, &tmp_inode, &tmp_dentry,
973 &inum);
974 else
975 rc = construct_dentry(&qstring, file, &tmp_inode, &tmp_dentry,
976 NULL);
977
978 if ((tmp_inode == NULL) || (tmp_dentry == NULL))
979 return -ENOMEM;
980
981 /* we pass in rc below, indicating whether it is a new inode,
982 so we can figure out whether to invalidate the inode cached
983 data if the file has changed */
984 if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX) 719 if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX)
985 unix_fill_in_inode(tmp_inode, 720 cifs_unix_basic_to_fattr(&fattr,
986 (FILE_UNIX_INFO *)pfindEntry, 721 &((FILE_UNIX_INFO *) pfindEntry)->basic,
987 &obj_type, rc); 722 cifs_sb);
988 else if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD) 723 else if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD)
989 fill_in_inode(tmp_inode, 0 /* old level 1 buffer type */, 724 cifs_std_info_to_fattr(&fattr, (FIND_FILE_STANDARD_INFO *)
990 pfindEntry, &obj_type, rc); 725 pfindEntry, cifs_sb);
991 else 726 else
992 fill_in_inode(tmp_inode, 1 /* NT */, pfindEntry, &obj_type, rc); 727 cifs_dir_info_to_fattr(&fattr, (FILE_DIRECTORY_INFO *)
728 pfindEntry, cifs_sb);
993 729
994 if (rc) /* new inode - needs to be tied to dentry */ { 730 /* FIXME: make _to_fattr functions fill this out */
995 d_instantiate(tmp_dentry, tmp_inode); 731 if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_ID_FULL_DIR_INFO)
996 if (rc == 2) 732 fattr.cf_uniqueid = inum;
997 d_rehash(tmp_dentry); 733 else
998 } 734 fattr.cf_uniqueid = iunique(sb, ROOT_I);
999 735
736 ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid);
737 tmp_dentry = cifs_readdir_lookup(file->f_dentry, &qstring, &fattr);
1000 738
1001 rc = filldir(direntry, qstring.name, qstring.len, file->f_pos, 739 rc = filldir(direntry, qstring.name, qstring.len, file->f_pos,
1002 tmp_inode->i_ino, obj_type); 740 ino, fattr.cf_dtype);
741
742 /*
743 * we can not return filldir errors to the caller since they are
744 * "normal" when the stat blocksize is too small - we return remapped
745 * error instead
746 *
747 * FIXME: This looks bogus. filldir returns -EOVERFLOW in the above
748 * case already. Why should we be clobbering other errors from it?
749 */
1003 if (rc) { 750 if (rc) {
1004 cFYI(1, ("filldir rc = %d", rc)); 751 cFYI(1, ("filldir rc = %d", rc));
1005 /* we can not return filldir errors to the caller
1006 since they are "normal" when the stat blocksize
1007 is too small - we return remapped error instead */
1008 rc = -EOVERFLOW; 752 rc = -EOVERFLOW;
1009 } 753 }
1010
1011 dput(tmp_dentry); 754 dput(tmp_dentry);
1012 return rc; 755 return rc;
1013} 756}
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 897a052270f9..7085a6275c4c 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -802,7 +802,7 @@ ssetup_ntlmssp_authenticate:
802#endif /* CONFIG_CIFS_UPCALL */ 802#endif /* CONFIG_CIFS_UPCALL */
803 } else { 803 } else {
804#ifdef CONFIG_CIFS_EXPERIMENTAL 804#ifdef CONFIG_CIFS_EXPERIMENTAL
805 if ((experimEnabled > 1) && (type == RawNTLMSSP)) { 805 if (type == RawNTLMSSP) {
806 if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) { 806 if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
807 cERROR(1, ("NTLMSSP requires Unicode support")); 807 cERROR(1, ("NTLMSSP requires Unicode support"));
808 rc = -ENOSYS; 808 rc = -ENOSYS;
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index e9527eedc639..a75afa3dd9e1 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -64,8 +64,9 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name)
64 64
65 full_path = build_path_from_dentry(direntry); 65 full_path = build_path_from_dentry(direntry);
66 if (full_path == NULL) { 66 if (full_path == NULL) {
67 rc = -ENOMEM;
67 FreeXid(xid); 68 FreeXid(xid);
68 return -ENOMEM; 69 return rc;
69 } 70 }
70 if (ea_name == NULL) { 71 if (ea_name == NULL) {
71 cFYI(1, ("Null xattr names not supported")); 72 cFYI(1, ("Null xattr names not supported"));
@@ -118,8 +119,9 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
118 119
119 full_path = build_path_from_dentry(direntry); 120 full_path = build_path_from_dentry(direntry);
120 if (full_path == NULL) { 121 if (full_path == NULL) {
122 rc = -ENOMEM;
121 FreeXid(xid); 123 FreeXid(xid);
122 return -ENOMEM; 124 return rc;
123 } 125 }
124 /* return dos attributes as pseudo xattr */ 126 /* return dos attributes as pseudo xattr */
125 /* return alt name if available as pseudo attr */ 127 /* return alt name if available as pseudo attr */
@@ -225,8 +227,9 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
225 227
226 full_path = build_path_from_dentry(direntry); 228 full_path = build_path_from_dentry(direntry);
227 if (full_path == NULL) { 229 if (full_path == NULL) {
230 rc = -ENOMEM;
228 FreeXid(xid); 231 FreeXid(xid);
229 return -ENOMEM; 232 return rc;
230 } 233 }
231 /* return dos attributes as pseudo xattr */ 234 /* return dos attributes as pseudo xattr */
232 /* return alt name if available as pseudo attr */ 235 /* return alt name if available as pseudo attr */
@@ -351,8 +354,9 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size)
351 354
352 full_path = build_path_from_dentry(direntry); 355 full_path = build_path_from_dentry(direntry);
353 if (full_path == NULL) { 356 if (full_path == NULL) {
357 rc = -ENOMEM;
354 FreeXid(xid); 358 FreeXid(xid);
355 return -ENOMEM; 359 return rc;
356 } 360 }
357 /* return dos attributes as pseudo xattr */ 361 /* return dos attributes as pseudo xattr */
358 /* return alt name if available as pseudo attr */ 362 /* return alt name if available as pseudo attr */
diff --git a/fs/compat.c b/fs/compat.c
index cdd51a3a7c53..94502dab972a 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -32,7 +32,6 @@
32#include <linux/smb_mount.h> 32#include <linux/smb_mount.h>
33#include <linux/ncp_mount.h> 33#include <linux/ncp_mount.h>
34#include <linux/nfs4_mount.h> 34#include <linux/nfs4_mount.h>
35#include <linux/smp_lock.h>
36#include <linux/syscalls.h> 35#include <linux/syscalls.h>
37#include <linux/ctype.h> 36#include <linux/ctype.h>
38#include <linux/module.h> 37#include <linux/module.h>
@@ -1486,8 +1485,8 @@ int compat_do_execve(char * filename,
1486 if (!bprm) 1485 if (!bprm)
1487 goto out_files; 1486 goto out_files;
1488 1487
1489 retval = mutex_lock_interruptible(&current->cred_guard_mutex); 1488 retval = -ERESTARTNOINTR;
1490 if (retval < 0) 1489 if (mutex_lock_interruptible(&current->cred_guard_mutex))
1491 goto out_free; 1490 goto out_free;
1492 current->in_execve = 1; 1491 current->in_execve = 1;
1493 1492
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 626c7483b4de..f91fd51b32e3 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -19,6 +19,7 @@
19#include <linux/compiler.h> 19#include <linux/compiler.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/smp.h> 21#include <linux/smp.h>
22#include <linux/smp_lock.h>
22#include <linux/ioctl.h> 23#include <linux/ioctl.h>
23#include <linux/if.h> 24#include <linux/if.h>
24#include <linux/if_bridge.h> 25#include <linux/if_bridge.h>
@@ -1904,6 +1905,7 @@ COMPATIBLE_IOCTL(FIONCLEX)
1904COMPATIBLE_IOCTL(FIOASYNC) 1905COMPATIBLE_IOCTL(FIOASYNC)
1905COMPATIBLE_IOCTL(FIONBIO) 1906COMPATIBLE_IOCTL(FIONBIO)
1906COMPATIBLE_IOCTL(FIONREAD) /* This is also TIOCINQ */ 1907COMPATIBLE_IOCTL(FIONREAD) /* This is also TIOCINQ */
1908COMPATIBLE_IOCTL(FS_IOC_FIEMAP)
1907/* 0x00 */ 1909/* 0x00 */
1908COMPATIBLE_IOCTL(FIBMAP) 1910COMPATIBLE_IOCTL(FIBMAP)
1909COMPATIBLE_IOCTL(FIGETBSZ) 1911COMPATIBLE_IOCTL(FIGETBSZ)
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 205ec95b347e..eb507c453c5f 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -435,7 +435,7 @@ static int search_rsb(struct dlm_ls *ls, char *name, int len, int b,
435static int find_rsb(struct dlm_ls *ls, char *name, int namelen, 435static int find_rsb(struct dlm_ls *ls, char *name, int namelen,
436 unsigned int flags, struct dlm_rsb **r_ret) 436 unsigned int flags, struct dlm_rsb **r_ret)
437{ 437{
438 struct dlm_rsb *r, *tmp; 438 struct dlm_rsb *r = NULL, *tmp;
439 uint32_t hash, bucket; 439 uint32_t hash, bucket;
440 int error = -EINVAL; 440 int error = -EINVAL;
441 441
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index cdb580a9c7a2..618a60f03886 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -902,7 +902,7 @@ static void tcp_connect_to_sock(struct connection *con)
902 int result = -EHOSTUNREACH; 902 int result = -EHOSTUNREACH;
903 struct sockaddr_storage saddr, src_addr; 903 struct sockaddr_storage saddr, src_addr;
904 int addr_len; 904 int addr_len;
905 struct socket *sock; 905 struct socket *sock = NULL;
906 906
907 if (con->nodeid == 0) { 907 if (con->nodeid == 0) {
908 log_print("attempt to connect sock 0 foiled"); 908 log_print("attempt to connect sock 0 foiled");
@@ -962,6 +962,8 @@ out_err:
962 if (con->sock) { 962 if (con->sock) {
963 sock_release(con->sock); 963 sock_release(con->sock);
964 con->sock = NULL; 964 con->sock = NULL;
965 } else if (sock) {
966 sock_release(sock);
965 } 967 }
966 /* 968 /*
967 * Some errors are fatal and this list might need adjusting. For other 969 * Some errors are fatal and this list might need adjusting. For other
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 894a32d438d5..16f682e26c07 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -353,7 +353,7 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
353{ 353{
354 struct dlm_plock_info info; 354 struct dlm_plock_info info;
355 struct plock_op *op; 355 struct plock_op *op;
356 int found = 0; 356 int found = 0, do_callback = 0;
357 357
358 if (count != sizeof(info)) 358 if (count != sizeof(info))
359 return -EINVAL; 359 return -EINVAL;
@@ -366,21 +366,24 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
366 366
367 spin_lock(&ops_lock); 367 spin_lock(&ops_lock);
368 list_for_each_entry(op, &recv_list, list) { 368 list_for_each_entry(op, &recv_list, list) {
369 if (op->info.fsid == info.fsid && op->info.number == info.number && 369 if (op->info.fsid == info.fsid &&
370 op->info.number == info.number &&
370 op->info.owner == info.owner) { 371 op->info.owner == info.owner) {
372 struct plock_xop *xop = (struct plock_xop *)op;
371 list_del_init(&op->list); 373 list_del_init(&op->list);
372 found = 1;
373 op->done = 1;
374 memcpy(&op->info, &info, sizeof(info)); 374 memcpy(&op->info, &info, sizeof(info));
375 if (xop->callback)
376 do_callback = 1;
377 else
378 op->done = 1;
379 found = 1;
375 break; 380 break;
376 } 381 }
377 } 382 }
378 spin_unlock(&ops_lock); 383 spin_unlock(&ops_lock);
379 384
380 if (found) { 385 if (found) {
381 struct plock_xop *xop; 386 if (do_callback)
382 xop = (struct plock_xop *)op;
383 if (xop->callback)
384 dlm_plock_callback(op); 387 dlm_plock_callback(op);
385 else 388 else
386 wake_up(&recv_wq); 389 wake_up(&recv_wq);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index af737bb56cb7..259525c9abb8 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1303,6 +1303,13 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
1303 } 1303 }
1304 (*new_auth_tok)->session_key.encrypted_key_size = 1304 (*new_auth_tok)->session_key.encrypted_key_size =
1305 (body_size - (ECRYPTFS_SALT_SIZE + 5)); 1305 (body_size - (ECRYPTFS_SALT_SIZE + 5));
1306 if ((*new_auth_tok)->session_key.encrypted_key_size
1307 > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) {
1308 printk(KERN_WARNING "Tag 3 packet contains key larger "
1309 "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES\n");
1310 rc = -EINVAL;
1311 goto out_free;
1312 }
1306 if (unlikely(data[(*packet_size)++] != 0x04)) { 1313 if (unlikely(data[(*packet_size)++] != 0x04)) {
1307 printk(KERN_WARNING "Unknown version number [%d]\n", 1314 printk(KERN_WARNING "Unknown version number [%d]\n",
1308 data[(*packet_size) - 1]); 1315 data[(*packet_size) - 1]);
@@ -1449,6 +1456,12 @@ parse_tag_11_packet(unsigned char *data, unsigned char *contents,
1449 rc = -EINVAL; 1456 rc = -EINVAL;
1450 goto out; 1457 goto out;
1451 } 1458 }
1459 if (unlikely((*tag_11_contents_size) > max_contents_bytes)) {
1460 printk(KERN_ERR "Literal data section in tag 11 packet exceeds "
1461 "expected size\n");
1462 rc = -EINVAL;
1463 goto out;
1464 }
1452 if (data[(*packet_size)++] != 0x62) { 1465 if (data[(*packet_size)++] != 0x62) {
1453 printk(KERN_WARNING "Unrecognizable packet\n"); 1466 printk(KERN_WARNING "Unrecognizable packet\n");
1454 rc = -EINVAL; 1467 rc = -EINVAL;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 3f0e1974abdc..31d12de83a2a 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -14,35 +14,44 @@
14#include <linux/list.h> 14#include <linux/list.h>
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/anon_inodes.h> 16#include <linux/anon_inodes.h>
17#include <linux/eventfd.h>
18#include <linux/syscalls.h> 17#include <linux/syscalls.h>
19#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/kref.h>
20#include <linux/eventfd.h>
20 21
21struct eventfd_ctx { 22struct eventfd_ctx {
23 struct kref kref;
22 wait_queue_head_t wqh; 24 wait_queue_head_t wqh;
23 /* 25 /*
24 * Every time that a write(2) is performed on an eventfd, the 26 * Every time that a write(2) is performed on an eventfd, the
25 * value of the __u64 being written is added to "count" and a 27 * value of the __u64 being written is added to "count" and a
26 * wakeup is performed on "wqh". A read(2) will return the "count" 28 * wakeup is performed on "wqh". A read(2) will return the "count"
27 * value to userspace, and will reset "count" to zero. The kernel 29 * value to userspace, and will reset "count" to zero. The kernel
28 * size eventfd_signal() also, adds to the "count" counter and 30 * side eventfd_signal() also, adds to the "count" counter and
29 * issue a wakeup. 31 * issue a wakeup.
30 */ 32 */
31 __u64 count; 33 __u64 count;
32 unsigned int flags; 34 unsigned int flags;
33}; 35};
34 36
35/* 37/**
36 * Adds "n" to the eventfd counter "count". Returns "n" in case of 38 * eventfd_signal - Adds @n to the eventfd counter.
37 * success, or a value lower then "n" in case of coutner overflow. 39 * @ctx: [in] Pointer to the eventfd context.
38 * This function is supposed to be called by the kernel in paths 40 * @n: [in] Value of the counter to be added to the eventfd internal counter.
39 * that do not allow sleeping. In this function we allow the counter 41 * The value cannot be negative.
40 * to reach the ULLONG_MAX value, and we signal this as overflow 42 *
41 * condition by returining a POLLERR to poll(2). 43 * This function is supposed to be called by the kernel in paths that do not
44 * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
45 * value, and we signal this as overflow condition by returining a POLLERR
46 * to poll(2).
47 *
48 * Returns @n in case of success, a non-negative number lower than @n in case
49 * of overflow, or the following error codes:
50 *
51 * -EINVAL : The value of @n is negative.
42 */ 52 */
43int eventfd_signal(struct file *file, int n) 53int eventfd_signal(struct eventfd_ctx *ctx, int n)
44{ 54{
45 struct eventfd_ctx *ctx = file->private_data;
46 unsigned long flags; 55 unsigned long flags;
47 56
48 if (n < 0) 57 if (n < 0)
@@ -59,9 +68,45 @@ int eventfd_signal(struct file *file, int n)
59} 68}
60EXPORT_SYMBOL_GPL(eventfd_signal); 69EXPORT_SYMBOL_GPL(eventfd_signal);
61 70
71static void eventfd_free(struct kref *kref)
72{
73 struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref);
74
75 kfree(ctx);
76}
77
78/**
79 * eventfd_ctx_get - Acquires a reference to the internal eventfd context.
80 * @ctx: [in] Pointer to the eventfd context.
81 *
82 * Returns: In case of success, returns a pointer to the eventfd context.
83 */
84struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx)
85{
86 kref_get(&ctx->kref);
87 return ctx;
88}
89EXPORT_SYMBOL_GPL(eventfd_ctx_get);
90
91/**
92 * eventfd_ctx_put - Releases a reference to the internal eventfd context.
93 * @ctx: [in] Pointer to eventfd context.
94 *
95 * The eventfd context reference must have been previously acquired either
96 * with eventfd_ctx_get() or eventfd_ctx_fdget()).
97 */
98void eventfd_ctx_put(struct eventfd_ctx *ctx)
99{
100 kref_put(&ctx->kref, eventfd_free);
101}
102EXPORT_SYMBOL_GPL(eventfd_ctx_put);
103
62static int eventfd_release(struct inode *inode, struct file *file) 104static int eventfd_release(struct inode *inode, struct file *file)
63{ 105{
64 kfree(file->private_data); 106 struct eventfd_ctx *ctx = file->private_data;
107
108 wake_up_poll(&ctx->wqh, POLLHUP);
109 eventfd_ctx_put(ctx);
65 return 0; 110 return 0;
66} 111}
67 112
@@ -185,6 +230,16 @@ static const struct file_operations eventfd_fops = {
185 .write = eventfd_write, 230 .write = eventfd_write,
186}; 231};
187 232
233/**
234 * eventfd_fget - Acquire a reference of an eventfd file descriptor.
235 * @fd: [in] Eventfd file descriptor.
236 *
237 * Returns a pointer to the eventfd file structure in case of success, or the
238 * following error pointer:
239 *
240 * -EBADF : Invalid @fd file descriptor.
241 * -EINVAL : The @fd file descriptor is not an eventfd file.
242 */
188struct file *eventfd_fget(int fd) 243struct file *eventfd_fget(int fd)
189{ 244{
190 struct file *file; 245 struct file *file;
@@ -201,6 +256,48 @@ struct file *eventfd_fget(int fd)
201} 256}
202EXPORT_SYMBOL_GPL(eventfd_fget); 257EXPORT_SYMBOL_GPL(eventfd_fget);
203 258
259/**
260 * eventfd_ctx_fdget - Acquires a reference to the internal eventfd context.
261 * @fd: [in] Eventfd file descriptor.
262 *
263 * Returns a pointer to the internal eventfd context, otherwise the error
264 * pointers returned by the following functions:
265 *
266 * eventfd_fget
267 */
268struct eventfd_ctx *eventfd_ctx_fdget(int fd)
269{
270 struct file *file;
271 struct eventfd_ctx *ctx;
272
273 file = eventfd_fget(fd);
274 if (IS_ERR(file))
275 return (struct eventfd_ctx *) file;
276 ctx = eventfd_ctx_get(file->private_data);
277 fput(file);
278
279 return ctx;
280}
281EXPORT_SYMBOL_GPL(eventfd_ctx_fdget);
282
283/**
284 * eventfd_ctx_fileget - Acquires a reference to the internal eventfd context.
285 * @file: [in] Eventfd file pointer.
286 *
287 * Returns a pointer to the internal eventfd context, otherwise the error
288 * pointer:
289 *
290 * -EINVAL : The @fd file descriptor is not an eventfd file.
291 */
292struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
293{
294 if (file->f_op != &eventfd_fops)
295 return ERR_PTR(-EINVAL);
296
297 return eventfd_ctx_get(file->private_data);
298}
299EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
300
204SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) 301SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
205{ 302{
206 int fd; 303 int fd;
@@ -217,6 +314,7 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
217 if (!ctx) 314 if (!ctx)
218 return -ENOMEM; 315 return -ENOMEM;
219 316
317 kref_init(&ctx->kref);
220 init_waitqueue_head(&ctx->wqh); 318 init_waitqueue_head(&ctx->wqh);
221 ctx->count = count; 319 ctx->count = count;
222 ctx->flags = flags; 320 ctx->flags = flags;
diff --git a/fs/exec.c b/fs/exec.c
index e639957d7a57..4a8849e45b21 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1277,8 +1277,8 @@ int do_execve(char * filename,
1277 if (!bprm) 1277 if (!bprm)
1278 goto out_files; 1278 goto out_files;
1279 1279
1280 retval = mutex_lock_interruptible(&current->cred_guard_mutex); 1280 retval = -ERESTARTNOINTR;
1281 if (retval < 0) 1281 if (mutex_lock_interruptible(&current->cred_guard_mutex))
1282 goto out_free; 1282 goto out_free;
1283 current->in_execve = 1; 1283 current->in_execve = 1;
1284 1284
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index 24667eedc023..c6718e4817fe 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -2,9 +2,7 @@
2 * common.h - Common definitions for both Kernel and user-mode utilities 2 * common.h - Common definitions for both Kernel and user-mode utilities
3 * 3 *
4 * Copyright (C) 2005, 2006 4 * Copyright (C) 2005, 2006
5 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) 5 * Avishay Traeger (avishay@gmail.com)
6 * Copyright (C) 2005, 2006
7 * International Business Machines
8 * Copyright (C) 2008, 2009 6 * Copyright (C) 2008, 2009
9 * Boaz Harrosh <bharrosh@panasas.com> 7 * Boaz Harrosh <bharrosh@panasas.com>
10 * 8 *
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index 65b0c8c776a1..4cfab1cc75c0 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <bharrosh@panasas.com>
8 * 6 *
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 0fd4c7859679..5ec72e020b22 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -1,8 +1,6 @@
1/* 1/*
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <bharrosh@panasas.com>
8 * 6 *
@@ -156,6 +154,9 @@ ino_t exofs_parent_ino(struct dentry *child);
156int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *, 154int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *,
157 struct inode *); 155 struct inode *);
158 156
157/* super.c */
158int exofs_sync_fs(struct super_block *sb, int wait);
159
159/********************* 160/*********************
160 * operation vectors * 161 * operation vectors *
161 *********************/ 162 *********************/
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 6ed7fe484752..839b9dc1e70f 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <bharrosh@panasas.com>
8 * 6 *
@@ -47,16 +45,23 @@ static int exofs_file_fsync(struct file *filp, struct dentry *dentry,
47{ 45{
48 int ret; 46 int ret;
49 struct address_space *mapping = filp->f_mapping; 47 struct address_space *mapping = filp->f_mapping;
48 struct inode *inode = dentry->d_inode;
49 struct super_block *sb;
50 50
51 ret = filemap_write_and_wait(mapping); 51 ret = filemap_write_and_wait(mapping);
52 if (ret) 52 if (ret)
53 return ret; 53 return ret;
54 54
55 /*Note: file_fsync below also calles sync_blockdev, which is a no-op 55 /* sync the inode attributes */
56 * for exofs, but other then that it does sync_inode and 56 ret = write_inode_now(inode, 1);
57 * sync_superblock which is what we need here. 57
58 */ 58 /* This is a good place to write the sb */
59 return file_fsync(filp, dentry, datasync); 59 /* TODO: Sechedule an sb-sync on create */
60 sb = inode->i_sb;
61 if (sb->s_dirt)
62 exofs_sync_fs(sb, 1);
63
64 return ret;
60} 65}
61 66
62static int exofs_flush(struct file *file, fl_owner_t id) 67static int exofs_flush(struct file *file, fl_owner_t id)
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 77d0a295eb1c..6c10f7476699 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <bharrosh@panasas.com>
8 * 6 *
@@ -295,6 +293,9 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
295err: 293err:
296 if (!is_sync) 294 if (!is_sync)
297 _unlock_pcol_pages(pcol, ret, READ); 295 _unlock_pcol_pages(pcol, ret, READ);
296 else /* Pages unlocked by caller in sync mode only free bio */
297 pcol_free(pcol);
298
298 kfree(pcol_copy); 299 kfree(pcol_copy);
299 if (or) 300 if (or)
300 osd_end_request(or); 301 osd_end_request(or);
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 77fdd765e76d..b7dd0c236863 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <bharrosh@panasas.com>
8 * 6 *
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c
index b3d2ccb87aaa..4372542df284 100644
--- a/fs/exofs/osd.c
+++ b/fs/exofs/osd.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <bharrosh@panasas.com>
8 * 6 *
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 8216c5b77b53..5ab10c3bbebe 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <bharrosh@panasas.com>
8 * 6 *
@@ -33,6 +31,7 @@
33 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 31 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34 */ 32 */
35 33
34#include <linux/smp_lock.h>
36#include <linux/string.h> 35#include <linux/string.h>
37#include <linux/parser.h> 36#include <linux/parser.h>
38#include <linux/vfs.h> 37#include <linux/vfs.h>
@@ -200,7 +199,7 @@ static const struct export_operations exofs_export_ops;
200/* 199/*
201 * Write the superblock to the OSD 200 * Write the superblock to the OSD
202 */ 201 */
203static int exofs_sync_fs(struct super_block *sb, int wait) 202int exofs_sync_fs(struct super_block *sb, int wait)
204{ 203{
205 struct exofs_sb_info *sbi; 204 struct exofs_sb_info *sbi;
206 struct exofs_fscb *fscb; 205 struct exofs_fscb *fscb;
diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c
index 36e2d7bc7f7b..4dd687c3e747 100644
--- a/fs/exofs/symlink.c
+++ b/fs/exofs/symlink.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * Copyright (C) 2005, 2006 2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com) (avishay@il.ibm.com) 3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2005, 2006
5 * International Business Machines
6 * Copyright (C) 2008, 2009 4 * Copyright (C) 2008, 2009
7 * Boaz Harrosh <bharrosh@panasas.com> 5 * Boaz Harrosh <bharrosh@panasas.com>
8 * 6 *
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 7cb4badef927..e7431309bdca 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -13,7 +13,6 @@
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/compat.h> 14#include <linux/compat.h>
15#include <linux/mount.h> 15#include <linux/mount.h>
16#include <linux/smp_lock.h>
17#include <asm/current.h> 16#include <asm/current.h>
18#include <asm/uaccess.h> 17#include <asm/uaccess.h>
19 18
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 6524ecaebb7a..e1dedb0f7873 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -66,8 +66,16 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
66 inode = NULL; 66 inode = NULL;
67 if (ino) { 67 if (ino) {
68 inode = ext2_iget(dir->i_sb, ino); 68 inode = ext2_iget(dir->i_sb, ino);
69 if (IS_ERR(inode)) 69 if (unlikely(IS_ERR(inode))) {
70 return ERR_CAST(inode); 70 if (PTR_ERR(inode) == -ESTALE) {
71 ext2_error(dir->i_sb, __func__,
72 "deleted inode referenced: %lu",
73 ino);
74 return ERR_PTR(-EIO);
75 } else {
76 return ERR_CAST(inode);
77 }
78 }
71 } 79 }
72 return d_splice_alias(inode, dentry); 80 return d_splice_alias(inode, dentry);
73} 81}
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 3d724a95882f..373fa90c796a 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -130,8 +130,7 @@ static int ext3_readdir(struct file * filp,
130 struct buffer_head *bh = NULL; 130 struct buffer_head *bh = NULL;
131 131
132 map_bh.b_state = 0; 132 map_bh.b_state = 0;
133 err = ext3_get_blocks_handle(NULL, inode, blk, 1, 133 err = ext3_get_blocks_handle(NULL, inode, blk, 1, &map_bh, 0);
134 &map_bh, 0, 0);
135 if (err > 0) { 134 if (err > 0) {
136 pgoff_t index = map_bh.b_blocknr >> 135 pgoff_t index = map_bh.b_blocknr >>
137 (PAGE_CACHE_SHIFT - inode->i_blkbits); 136 (PAGE_CACHE_SHIFT - inode->i_blkbits);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 5f51fed5c750..b49908a167ae 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -788,7 +788,7 @@ err_out:
788int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, 788int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
789 sector_t iblock, unsigned long maxblocks, 789 sector_t iblock, unsigned long maxblocks,
790 struct buffer_head *bh_result, 790 struct buffer_head *bh_result,
791 int create, int extend_disksize) 791 int create)
792{ 792{
793 int err = -EIO; 793 int err = -EIO;
794 int offsets[4]; 794 int offsets[4];
@@ -911,13 +911,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
911 if (!err) 911 if (!err)
912 err = ext3_splice_branch(handle, inode, iblock, 912 err = ext3_splice_branch(handle, inode, iblock,
913 partial, indirect_blks, count); 913 partial, indirect_blks, count);
914 /*
915 * i_disksize growing is protected by truncate_mutex. Don't forget to
916 * protect it if you're about to implement concurrent
917 * ext3_get_block() -bzzz
918 */
919 if (!err && extend_disksize && inode->i_size > ei->i_disksize)
920 ei->i_disksize = inode->i_size;
921 mutex_unlock(&ei->truncate_mutex); 914 mutex_unlock(&ei->truncate_mutex);
922 if (err) 915 if (err)
923 goto cleanup; 916 goto cleanup;
@@ -972,7 +965,7 @@ static int ext3_get_block(struct inode *inode, sector_t iblock,
972 } 965 }
973 966
974 ret = ext3_get_blocks_handle(handle, inode, iblock, 967 ret = ext3_get_blocks_handle(handle, inode, iblock,
975 max_blocks, bh_result, create, 0); 968 max_blocks, bh_result, create);
976 if (ret > 0) { 969 if (ret > 0) {
977 bh_result->b_size = (ret << inode->i_blkbits); 970 bh_result->b_size = (ret << inode->i_blkbits);
978 ret = 0; 971 ret = 0;
@@ -1005,7 +998,7 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
1005 dummy.b_blocknr = -1000; 998 dummy.b_blocknr = -1000;
1006 buffer_trace_init(&dummy.b_history); 999 buffer_trace_init(&dummy.b_history);
1007 err = ext3_get_blocks_handle(handle, inode, block, 1, 1000 err = ext3_get_blocks_handle(handle, inode, block, 1,
1008 &dummy, create, 1); 1001 &dummy, create);
1009 /* 1002 /*
1010 * ext3_get_blocks_handle() returns number of blocks 1003 * ext3_get_blocks_handle() returns number of blocks
1011 * mapped. 0 in case of a HOLE. 1004 * mapped. 0 in case of a HOLE.
@@ -1193,15 +1186,16 @@ write_begin_failed:
1193 * i_size_read because we hold i_mutex. 1186 * i_size_read because we hold i_mutex.
1194 * 1187 *
1195 * Add inode to orphan list in case we crash before truncate 1188 * Add inode to orphan list in case we crash before truncate
1196 * finishes. 1189 * finishes. Do this only if ext3_can_truncate() agrees so
1190 * that orphan processing code is happy.
1197 */ 1191 */
1198 if (pos + len > inode->i_size) 1192 if (pos + len > inode->i_size && ext3_can_truncate(inode))
1199 ext3_orphan_add(handle, inode); 1193 ext3_orphan_add(handle, inode);
1200 ext3_journal_stop(handle); 1194 ext3_journal_stop(handle);
1201 unlock_page(page); 1195 unlock_page(page);
1202 page_cache_release(page); 1196 page_cache_release(page);
1203 if (pos + len > inode->i_size) 1197 if (pos + len > inode->i_size)
1204 vmtruncate(inode, inode->i_size); 1198 ext3_truncate(inode);
1205 } 1199 }
1206 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) 1200 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
1207 goto retry; 1201 goto retry;
@@ -1287,7 +1281,7 @@ static int ext3_ordered_write_end(struct file *file,
1287 * There may be allocated blocks outside of i_size because 1281 * There may be allocated blocks outside of i_size because
1288 * we failed to copy some data. Prepare for truncate. 1282 * we failed to copy some data. Prepare for truncate.
1289 */ 1283 */
1290 if (pos + len > inode->i_size) 1284 if (pos + len > inode->i_size && ext3_can_truncate(inode))
1291 ext3_orphan_add(handle, inode); 1285 ext3_orphan_add(handle, inode);
1292 ret2 = ext3_journal_stop(handle); 1286 ret2 = ext3_journal_stop(handle);
1293 if (!ret) 1287 if (!ret)
@@ -1296,7 +1290,7 @@ static int ext3_ordered_write_end(struct file *file,
1296 page_cache_release(page); 1290 page_cache_release(page);
1297 1291
1298 if (pos + len > inode->i_size) 1292 if (pos + len > inode->i_size)
1299 vmtruncate(inode, inode->i_size); 1293 ext3_truncate(inode);
1300 return ret ? ret : copied; 1294 return ret ? ret : copied;
1301} 1295}
1302 1296
@@ -1315,14 +1309,14 @@ static int ext3_writeback_write_end(struct file *file,
1315 * There may be allocated blocks outside of i_size because 1309 * There may be allocated blocks outside of i_size because
1316 * we failed to copy some data. Prepare for truncate. 1310 * we failed to copy some data. Prepare for truncate.
1317 */ 1311 */
1318 if (pos + len > inode->i_size) 1312 if (pos + len > inode->i_size && ext3_can_truncate(inode))
1319 ext3_orphan_add(handle, inode); 1313 ext3_orphan_add(handle, inode);
1320 ret = ext3_journal_stop(handle); 1314 ret = ext3_journal_stop(handle);
1321 unlock_page(page); 1315 unlock_page(page);
1322 page_cache_release(page); 1316 page_cache_release(page);
1323 1317
1324 if (pos + len > inode->i_size) 1318 if (pos + len > inode->i_size)
1325 vmtruncate(inode, inode->i_size); 1319 ext3_truncate(inode);
1326 return ret ? ret : copied; 1320 return ret ? ret : copied;
1327} 1321}
1328 1322
@@ -1358,7 +1352,7 @@ static int ext3_journalled_write_end(struct file *file,
1358 * There may be allocated blocks outside of i_size because 1352 * There may be allocated blocks outside of i_size because
1359 * we failed to copy some data. Prepare for truncate. 1353 * we failed to copy some data. Prepare for truncate.
1360 */ 1354 */
1361 if (pos + len > inode->i_size) 1355 if (pos + len > inode->i_size && ext3_can_truncate(inode))
1362 ext3_orphan_add(handle, inode); 1356 ext3_orphan_add(handle, inode);
1363 EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; 1357 EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
1364 if (inode->i_size > EXT3_I(inode)->i_disksize) { 1358 if (inode->i_size > EXT3_I(inode)->i_disksize) {
@@ -1375,7 +1369,7 @@ static int ext3_journalled_write_end(struct file *file,
1375 page_cache_release(page); 1369 page_cache_release(page);
1376 1370
1377 if (pos + len > inode->i_size) 1371 if (pos + len > inode->i_size)
1378 vmtruncate(inode, inode->i_size); 1372 ext3_truncate(inode);
1379 return ret ? ret : copied; 1373 return ret ? ret : copied;
1380} 1374}
1381 1375
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0ddf7e55abe1..9714db393efe 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -93,20 +93,20 @@ typedef unsigned int ext4_group_t;
93struct ext4_allocation_request { 93struct ext4_allocation_request {
94 /* target inode for block we're allocating */ 94 /* target inode for block we're allocating */
95 struct inode *inode; 95 struct inode *inode;
96 /* how many blocks we want to allocate */
97 unsigned int len;
96 /* logical block in target inode */ 98 /* logical block in target inode */
97 ext4_lblk_t logical; 99 ext4_lblk_t logical;
98 /* phys. target (a hint) */
99 ext4_fsblk_t goal;
100 /* the closest logical allocated block to the left */ 100 /* the closest logical allocated block to the left */
101 ext4_lblk_t lleft; 101 ext4_lblk_t lleft;
102 /* phys. block for ^^^ */
103 ext4_fsblk_t pleft;
104 /* the closest logical allocated block to the right */ 102 /* the closest logical allocated block to the right */
105 ext4_lblk_t lright; 103 ext4_lblk_t lright;
106 /* phys. block for ^^^ */ 104 /* phys. target (a hint) */
105 ext4_fsblk_t goal;
106 /* phys. block for the closest logical allocated block to the left */
107 ext4_fsblk_t pleft;
108 /* phys. block for the closest logical allocated block to the right */
107 ext4_fsblk_t pright; 109 ext4_fsblk_t pright;
108 /* how many blocks we want to allocate */
109 unsigned int len;
110 /* flags. see above EXT4_MB_HINT_* */ 110 /* flags. see above EXT4_MB_HINT_* */
111 unsigned int flags; 111 unsigned int flags;
112}; 112};
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index ad13a84644e1..eb27fd0f2ee8 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -43,6 +43,8 @@ int __ext4_journal_forget(const char *where, handle_t *handle,
43 ext4_journal_abort_handle(where, __func__, bh, 43 ext4_journal_abort_handle(where, __func__, bh,
44 handle, err); 44 handle, err);
45 } 45 }
46 else
47 brelse(bh);
46 return err; 48 return err;
47} 49}
48 50
@@ -57,6 +59,8 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
57 ext4_journal_abort_handle(where, __func__, bh, 59 ext4_journal_abort_handle(where, __func__, bh,
58 handle, err); 60 handle, err);
59 } 61 }
62 else
63 brelse(bh);
60 return err; 64 return err;
61} 65}
62 66
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index be2f426f6805..139fb8cb87e4 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -131,9 +131,11 @@ int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
131int __ext4_journal_get_write_access(const char *where, handle_t *handle, 131int __ext4_journal_get_write_access(const char *where, handle_t *handle,
132 struct buffer_head *bh); 132 struct buffer_head *bh);
133 133
134/* When called with an invalid handle, this will still do a put on the BH */
134int __ext4_journal_forget(const char *where, handle_t *handle, 135int __ext4_journal_forget(const char *where, handle_t *handle,
135 struct buffer_head *bh); 136 struct buffer_head *bh);
136 137
138/* When called with an invalid handle, this will still do a put on the BH */
137int __ext4_journal_revoke(const char *where, handle_t *handle, 139int __ext4_journal_revoke(const char *where, handle_t *handle,
138 ext4_fsblk_t blocknr, struct buffer_head *bh); 140 ext4_fsblk_t blocknr, struct buffer_head *bh);
139 141
@@ -281,10 +283,10 @@ static inline int ext4_should_order_data(struct inode *inode)
281 283
282static inline int ext4_should_writeback_data(struct inode *inode) 284static inline int ext4_should_writeback_data(struct inode *inode)
283{ 285{
284 if (EXT4_JOURNAL(inode) == NULL)
285 return 0;
286 if (!S_ISREG(inode->i_mode)) 286 if (!S_ISREG(inode->i_mode))
287 return 0; 287 return 0;
288 if (EXT4_JOURNAL(inode) == NULL)
289 return 1;
288 if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) 290 if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
289 return 0; 291 return 0;
290 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) 292 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 50322a09bd01..73ebfb44ad75 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1977,6 +1977,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
1977 */ 1977 */
1978 /* 1 bitmap, 1 block group descriptor */ 1978 /* 1 bitmap, 1 block group descriptor */
1979 ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); 1979 ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
1980 return ret;
1980 } 1981 }
1981 } 1982 }
1982 1983
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 2f645732e3b7..29e6dc7299b8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -833,7 +833,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
833 if (!goal) 833 if (!goal)
834 goal = sbi->s_inode_goal; 834 goal = sbi->s_inode_goal;
835 835
836 if (goal && goal < le32_to_cpu(sbi->s_es->s_inodes_count)) { 836 if (goal && goal <= le32_to_cpu(sbi->s_es->s_inodes_count)) {
837 group = (goal - 1) / EXT4_INODES_PER_GROUP(sb); 837 group = (goal - 1) / EXT4_INODES_PER_GROUP(sb);
838 ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb); 838 ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb);
839 ret2 = 0; 839 ret2 = 0;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 60a26f3a6f8b..f9c642b22efa 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -78,16 +78,14 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
78 * but there may still be a record of it in the journal, and that record 78 * but there may still be a record of it in the journal, and that record
79 * still needs to be revoked. 79 * still needs to be revoked.
80 * 80 *
81 * If the handle isn't valid we're not journaling so there's nothing to do. 81 * If the handle isn't valid we're not journaling, but we still need to
82 * call into ext4_journal_revoke() to put the buffer head.
82 */ 83 */
83int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, 84int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
84 struct buffer_head *bh, ext4_fsblk_t blocknr) 85 struct buffer_head *bh, ext4_fsblk_t blocknr)
85{ 86{
86 int err; 87 int err;
87 88
88 if (!ext4_handle_valid(handle))
89 return 0;
90
91 might_sleep(); 89 might_sleep();
92 90
93 BUFFER_TRACE(bh, "enter"); 91 BUFFER_TRACE(bh, "enter");
@@ -1513,14 +1511,14 @@ retry:
1513 * Add inode to orphan list in case we crash before 1511 * Add inode to orphan list in case we crash before
1514 * truncate finishes 1512 * truncate finishes
1515 */ 1513 */
1516 if (pos + len > inode->i_size) 1514 if (pos + len > inode->i_size && ext4_can_truncate(inode))
1517 ext4_orphan_add(handle, inode); 1515 ext4_orphan_add(handle, inode);
1518 1516
1519 ext4_journal_stop(handle); 1517 ext4_journal_stop(handle);
1520 if (pos + len > inode->i_size) { 1518 if (pos + len > inode->i_size) {
1521 vmtruncate(inode, inode->i_size); 1519 ext4_truncate(inode);
1522 /* 1520 /*
1523 * If vmtruncate failed early the inode might 1521 * If truncate failed early the inode might
1524 * still be on the orphan list; we need to 1522 * still be on the orphan list; we need to
1525 * make sure the inode is removed from the 1523 * make sure the inode is removed from the
1526 * orphan list in that case. 1524 * orphan list in that case.
@@ -1614,7 +1612,7 @@ static int ext4_ordered_write_end(struct file *file,
1614 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, 1612 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1615 page, fsdata); 1613 page, fsdata);
1616 copied = ret2; 1614 copied = ret2;
1617 if (pos + len > inode->i_size) 1615 if (pos + len > inode->i_size && ext4_can_truncate(inode))
1618 /* if we have allocated more blocks and copied 1616 /* if we have allocated more blocks and copied
1619 * less. We will have blocks allocated outside 1617 * less. We will have blocks allocated outside
1620 * inode->i_size. So truncate them 1618 * inode->i_size. So truncate them
@@ -1628,9 +1626,9 @@ static int ext4_ordered_write_end(struct file *file,
1628 ret = ret2; 1626 ret = ret2;
1629 1627
1630 if (pos + len > inode->i_size) { 1628 if (pos + len > inode->i_size) {
1631 vmtruncate(inode, inode->i_size); 1629 ext4_truncate(inode);
1632 /* 1630 /*
1633 * If vmtruncate failed early the inode might still be 1631 * If truncate failed early the inode might still be
1634 * on the orphan list; we need to make sure the inode 1632 * on the orphan list; we need to make sure the inode
1635 * is removed from the orphan list in that case. 1633 * is removed from the orphan list in that case.
1636 */ 1634 */
@@ -1655,7 +1653,7 @@ static int ext4_writeback_write_end(struct file *file,
1655 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, 1653 ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
1656 page, fsdata); 1654 page, fsdata);
1657 copied = ret2; 1655 copied = ret2;
1658 if (pos + len > inode->i_size) 1656 if (pos + len > inode->i_size && ext4_can_truncate(inode))
1659 /* if we have allocated more blocks and copied 1657 /* if we have allocated more blocks and copied
1660 * less. We will have blocks allocated outside 1658 * less. We will have blocks allocated outside
1661 * inode->i_size. So truncate them 1659 * inode->i_size. So truncate them
@@ -1670,9 +1668,9 @@ static int ext4_writeback_write_end(struct file *file,
1670 ret = ret2; 1668 ret = ret2;
1671 1669
1672 if (pos + len > inode->i_size) { 1670 if (pos + len > inode->i_size) {
1673 vmtruncate(inode, inode->i_size); 1671 ext4_truncate(inode);
1674 /* 1672 /*
1675 * If vmtruncate failed early the inode might still be 1673 * If truncate failed early the inode might still be
1676 * on the orphan list; we need to make sure the inode 1674 * on the orphan list; we need to make sure the inode
1677 * is removed from the orphan list in that case. 1675 * is removed from the orphan list in that case.
1678 */ 1676 */
@@ -1722,7 +1720,7 @@ static int ext4_journalled_write_end(struct file *file,
1722 1720
1723 unlock_page(page); 1721 unlock_page(page);
1724 page_cache_release(page); 1722 page_cache_release(page);
1725 if (pos + len > inode->i_size) 1723 if (pos + len > inode->i_size && ext4_can_truncate(inode))
1726 /* if we have allocated more blocks and copied 1724 /* if we have allocated more blocks and copied
1727 * less. We will have blocks allocated outside 1725 * less. We will have blocks allocated outside
1728 * inode->i_size. So truncate them 1726 * inode->i_size. So truncate them
@@ -1733,9 +1731,9 @@ static int ext4_journalled_write_end(struct file *file,
1733 if (!ret) 1731 if (!ret)
1734 ret = ret2; 1732 ret = ret2;
1735 if (pos + len > inode->i_size) { 1733 if (pos + len > inode->i_size) {
1736 vmtruncate(inode, inode->i_size); 1734 ext4_truncate(inode);
1737 /* 1735 /*
1738 * If vmtruncate failed early the inode might still be 1736 * If truncate failed early the inode might still be
1739 * on the orphan list; we need to make sure the inode 1737 * on the orphan list; we need to make sure the inode
1740 * is removed from the orphan list in that case. 1738 * is removed from the orphan list in that case.
1741 */ 1739 */
@@ -2305,15 +2303,9 @@ flush_it:
2305 return; 2303 return;
2306} 2304}
2307 2305
2308static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) 2306static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
2309{ 2307{
2310 /* 2308 return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
2311 * unmapped buffer is possible for holes.
2312 * delay buffer is possible with delayed allocation.
2313 * We also need to consider unwritten buffer as unmapped.
2314 */
2315 return (!buffer_mapped(bh) || buffer_delay(bh) ||
2316 buffer_unwritten(bh)) && buffer_dirty(bh);
2317} 2309}
2318 2310
2319/* 2311/*
@@ -2398,9 +2390,9 @@ static int __mpage_da_writepage(struct page *page,
2398 * We need to try to allocate 2390 * We need to try to allocate
2399 * unmapped blocks in the same page. 2391 * unmapped blocks in the same page.
2400 * Otherwise we won't make progress 2392 * Otherwise we won't make progress
2401 * with the page in ext4_da_writepage 2393 * with the page in ext4_writepage
2402 */ 2394 */
2403 if (ext4_bh_unmapped_or_delay(NULL, bh)) { 2395 if (ext4_bh_delay_or_unwritten(NULL, bh)) {
2404 mpage_add_bh_to_extent(mpd, logical, 2396 mpage_add_bh_to_extent(mpd, logical,
2405 bh->b_size, 2397 bh->b_size,
2406 bh->b_state); 2398 bh->b_state);
@@ -2517,7 +2509,6 @@ static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
2517 * so call get_block_wrap with create = 0 2509 * so call get_block_wrap with create = 0
2518 */ 2510 */
2519 ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0); 2511 ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0);
2520 BUG_ON(create && ret == 0);
2521 if (ret > 0) { 2512 if (ret > 0) {
2522 bh_result->b_size = (ret << inode->i_blkbits); 2513 bh_result->b_size = (ret << inode->i_blkbits);
2523 ret = 0; 2514 ret = 0;
@@ -2525,15 +2516,102 @@ static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
2525 return ret; 2516 return ret;
2526} 2517}
2527 2518
2519static int bget_one(handle_t *handle, struct buffer_head *bh)
2520{
2521 get_bh(bh);
2522 return 0;
2523}
2524
2525static int bput_one(handle_t *handle, struct buffer_head *bh)
2526{
2527 put_bh(bh);
2528 return 0;
2529}
2530
2531static int __ext4_journalled_writepage(struct page *page,
2532 struct writeback_control *wbc,
2533 unsigned int len)
2534{
2535 struct address_space *mapping = page->mapping;
2536 struct inode *inode = mapping->host;
2537 struct buffer_head *page_bufs;
2538 handle_t *handle = NULL;
2539 int ret = 0;
2540 int err;
2541
2542 page_bufs = page_buffers(page);
2543 BUG_ON(!page_bufs);
2544 walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
2545 /* As soon as we unlock the page, it can go away, but we have
2546 * references to buffers so we are safe */
2547 unlock_page(page);
2548
2549 handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
2550 if (IS_ERR(handle)) {
2551 ret = PTR_ERR(handle);
2552 goto out;
2553 }
2554
2555 ret = walk_page_buffers(handle, page_bufs, 0, len, NULL,
2556 do_journal_get_write_access);
2557
2558 err = walk_page_buffers(handle, page_bufs, 0, len, NULL,
2559 write_end_fn);
2560 if (ret == 0)
2561 ret = err;
2562 err = ext4_journal_stop(handle);
2563 if (!ret)
2564 ret = err;
2565
2566 walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one);
2567 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
2568out:
2569 return ret;
2570}
2571
2528/* 2572/*
2573 * Note that we don't need to start a transaction unless we're journaling data
2574 * because we should have holes filled from ext4_page_mkwrite(). We even don't
2575 * need to file the inode to the transaction's list in ordered mode because if
2576 * we are writing back data added by write(), the inode is already there and if
2577 * we are writing back data modified via mmap(), noone guarantees in which
2578 * transaction the data will hit the disk. In case we are journaling data, we
2579 * cannot start transaction directly because transaction start ranks above page
2580 * lock so we have to do some magic.
2581 *
2529 * This function can get called via... 2582 * This function can get called via...
2530 * - ext4_da_writepages after taking page lock (have journal handle) 2583 * - ext4_da_writepages after taking page lock (have journal handle)
2531 * - journal_submit_inode_data_buffers (no journal handle) 2584 * - journal_submit_inode_data_buffers (no journal handle)
2532 * - shrink_page_list via pdflush (no journal handle) 2585 * - shrink_page_list via pdflush (no journal handle)
2533 * - grab_page_cache when doing write_begin (have journal handle) 2586 * - grab_page_cache when doing write_begin (have journal handle)
2587 *
2588 * We don't do any block allocation in this function. If we have page with
2589 * multiple blocks we need to write those buffer_heads that are mapped. This
2590 * is important for mmaped based write. So if we do with blocksize 1K
2591 * truncate(f, 1024);
2592 * a = mmap(f, 0, 4096);
2593 * a[0] = 'a';
2594 * truncate(f, 4096);
2595 * we have in the page first buffer_head mapped via page_mkwrite call back
2596 * but other bufer_heads would be unmapped but dirty(dirty done via the
2597 * do_wp_page). So writepage should write the first block. If we modify
2598 * the mmap area beyond 1024 we will again get a page_fault and the
2599 * page_mkwrite callback will do the block allocation and mark the
2600 * buffer_heads mapped.
2601 *
2602 * We redirty the page if we have any buffer_heads that is either delay or
2603 * unwritten in the page.
2604 *
2605 * We can get recursively called as show below.
2606 *
2607 * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
2608 * ext4_writepage()
2609 *
2610 * But since we don't do any block allocation we should not deadlock.
2611 * Page also have the dirty flag cleared so we don't get recurive page_lock.
2534 */ 2612 */
2535static int ext4_da_writepage(struct page *page, 2613static int ext4_writepage(struct page *page,
2536 struct writeback_control *wbc) 2614 struct writeback_control *wbc)
2537{ 2615{
2538 int ret = 0; 2616 int ret = 0;
2539 loff_t size; 2617 loff_t size;
@@ -2541,7 +2619,7 @@ static int ext4_da_writepage(struct page *page,
2541 struct buffer_head *page_bufs; 2619 struct buffer_head *page_bufs;
2542 struct inode *inode = page->mapping->host; 2620 struct inode *inode = page->mapping->host;
2543 2621
2544 trace_ext4_da_writepage(inode, page); 2622 trace_ext4_writepage(inode, page);
2545 size = i_size_read(inode); 2623 size = i_size_read(inode);
2546 if (page->index == size >> PAGE_CACHE_SHIFT) 2624 if (page->index == size >> PAGE_CACHE_SHIFT)
2547 len = size & ~PAGE_CACHE_MASK; 2625 len = size & ~PAGE_CACHE_MASK;
@@ -2551,7 +2629,7 @@ static int ext4_da_writepage(struct page *page,
2551 if (page_has_buffers(page)) { 2629 if (page_has_buffers(page)) {
2552 page_bufs = page_buffers(page); 2630 page_bufs = page_buffers(page);
2553 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, 2631 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2554 ext4_bh_unmapped_or_delay)) { 2632 ext4_bh_delay_or_unwritten)) {
2555 /* 2633 /*
2556 * We don't want to do block allocation 2634 * We don't want to do block allocation
2557 * So redirty the page and return 2635 * So redirty the page and return
@@ -2578,13 +2656,13 @@ static int ext4_da_writepage(struct page *page,
2578 * all are mapped and non delay. We don't want to 2656 * all are mapped and non delay. We don't want to
2579 * do block allocation here. 2657 * do block allocation here.
2580 */ 2658 */
2581 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, 2659 ret = block_prepare_write(page, 0, len,
2582 noalloc_get_block_write); 2660 noalloc_get_block_write);
2583 if (!ret) { 2661 if (!ret) {
2584 page_bufs = page_buffers(page); 2662 page_bufs = page_buffers(page);
2585 /* check whether all are mapped and non delay */ 2663 /* check whether all are mapped and non delay */
2586 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, 2664 if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
2587 ext4_bh_unmapped_or_delay)) { 2665 ext4_bh_delay_or_unwritten)) {
2588 redirty_page_for_writepage(wbc, page); 2666 redirty_page_for_writepage(wbc, page);
2589 unlock_page(page); 2667 unlock_page(page);
2590 return 0; 2668 return 0;
@@ -2600,7 +2678,16 @@ static int ext4_da_writepage(struct page *page,
2600 return 0; 2678 return 0;
2601 } 2679 }
2602 /* now mark the buffer_heads as dirty and uptodate */ 2680 /* now mark the buffer_heads as dirty and uptodate */
2603 block_commit_write(page, 0, PAGE_CACHE_SIZE); 2681 block_commit_write(page, 0, len);
2682 }
2683
2684 if (PageChecked(page) && ext4_should_journal_data(inode)) {
2685 /*
2686 * It's mmapped pagecache. Add buffers and journal it. There
2687 * doesn't seem much point in redirtying the page here.
2688 */
2689 ClearPageChecked(page);
2690 return __ext4_journalled_writepage(page, wbc, len);
2604 } 2691 }
2605 2692
2606 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 2693 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
@@ -2907,7 +2994,7 @@ retry:
2907 * i_size_read because we hold i_mutex. 2994 * i_size_read because we hold i_mutex.
2908 */ 2995 */
2909 if (pos + len > inode->i_size) 2996 if (pos + len > inode->i_size)
2910 vmtruncate(inode, inode->i_size); 2997 ext4_truncate(inode);
2911 } 2998 }
2912 2999
2913 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3000 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -3130,222 +3217,6 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
3130 return generic_block_bmap(mapping, block, ext4_get_block); 3217 return generic_block_bmap(mapping, block, ext4_get_block);
3131} 3218}
3132 3219
3133static int bget_one(handle_t *handle, struct buffer_head *bh)
3134{
3135 get_bh(bh);
3136 return 0;
3137}
3138
3139static int bput_one(handle_t *handle, struct buffer_head *bh)
3140{
3141 put_bh(bh);
3142 return 0;
3143}
3144
3145/*
3146 * Note that we don't need to start a transaction unless we're journaling data
3147 * because we should have holes filled from ext4_page_mkwrite(). We even don't
3148 * need to file the inode to the transaction's list in ordered mode because if
3149 * we are writing back data added by write(), the inode is already there and if
3150 * we are writing back data modified via mmap(), noone guarantees in which
3151 * transaction the data will hit the disk. In case we are journaling data, we
3152 * cannot start transaction directly because transaction start ranks above page
3153 * lock so we have to do some magic.
3154 *
3155 * In all journaling modes block_write_full_page() will start the I/O.
3156 *
3157 * Problem:
3158 *
3159 * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
3160 * ext4_writepage()
3161 *
3162 * Similar for:
3163 *
3164 * ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ...
3165 *
3166 * Same applies to ext4_get_block(). We will deadlock on various things like
3167 * lock_journal and i_data_sem
3168 *
3169 * Setting PF_MEMALLOC here doesn't work - too many internal memory
3170 * allocations fail.
3171 *
3172 * 16May01: If we're reentered then journal_current_handle() will be
3173 * non-zero. We simply *return*.
3174 *
3175 * 1 July 2001: @@@ FIXME:
3176 * In journalled data mode, a data buffer may be metadata against the
3177 * current transaction. But the same file is part of a shared mapping
3178 * and someone does a writepage() on it.
3179 *
3180 * We will move the buffer onto the async_data list, but *after* it has
3181 * been dirtied. So there's a small window where we have dirty data on
3182 * BJ_Metadata.
3183 *
3184 * Note that this only applies to the last partial page in the file. The
3185 * bit which block_write_full_page() uses prepare/commit for. (That's
3186 * broken code anyway: it's wrong for msync()).
3187 *
3188 * It's a rare case: affects the final partial page, for journalled data
3189 * where the file is subject to bith write() and writepage() in the same
3190 * transction. To fix it we'll need a custom block_write_full_page().
3191 * We'll probably need that anyway for journalling writepage() output.
3192 *
3193 * We don't honour synchronous mounts for writepage(). That would be
3194 * disastrous. Any write() or metadata operation will sync the fs for
3195 * us.
3196 *
3197 */
3198static int __ext4_normal_writepage(struct page *page,
3199 struct writeback_control *wbc)
3200{
3201 struct inode *inode = page->mapping->host;
3202
3203 if (test_opt(inode->i_sb, NOBH))
3204 return nobh_writepage(page, noalloc_get_block_write, wbc);
3205 else
3206 return block_write_full_page(page, noalloc_get_block_write,
3207 wbc);
3208}
3209
3210static int ext4_normal_writepage(struct page *page,
3211 struct writeback_control *wbc)
3212{
3213 struct inode *inode = page->mapping->host;
3214 loff_t size = i_size_read(inode);
3215 loff_t len;
3216
3217 trace_ext4_normal_writepage(inode, page);
3218 J_ASSERT(PageLocked(page));
3219 if (page->index == size >> PAGE_CACHE_SHIFT)
3220 len = size & ~PAGE_CACHE_MASK;
3221 else
3222 len = PAGE_CACHE_SIZE;
3223
3224 if (page_has_buffers(page)) {
3225 /* if page has buffers it should all be mapped
3226 * and allocated. If there are not buffers attached
3227 * to the page we know the page is dirty but it lost
3228 * buffers. That means that at some moment in time
3229 * after write_begin() / write_end() has been called
3230 * all buffers have been clean and thus they must have been
3231 * written at least once. So they are all mapped and we can
3232 * happily proceed with mapping them and writing the page.
3233 */
3234 BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
3235 ext4_bh_unmapped_or_delay));
3236 }
3237
3238 if (!ext4_journal_current_handle())
3239 return __ext4_normal_writepage(page, wbc);
3240
3241 redirty_page_for_writepage(wbc, page);
3242 unlock_page(page);
3243 return 0;
3244}
3245
3246static int __ext4_journalled_writepage(struct page *page,
3247 struct writeback_control *wbc)
3248{
3249 struct address_space *mapping = page->mapping;
3250 struct inode *inode = mapping->host;
3251 struct buffer_head *page_bufs;
3252 handle_t *handle = NULL;
3253 int ret = 0;
3254 int err;
3255
3256 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
3257 noalloc_get_block_write);
3258 if (ret != 0)
3259 goto out_unlock;
3260
3261 page_bufs = page_buffers(page);
3262 walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL,
3263 bget_one);
3264 /* As soon as we unlock the page, it can go away, but we have
3265 * references to buffers so we are safe */
3266 unlock_page(page);
3267
3268 handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
3269 if (IS_ERR(handle)) {
3270 ret = PTR_ERR(handle);
3271 goto out;
3272 }
3273
3274 ret = walk_page_buffers(handle, page_bufs, 0,
3275 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
3276
3277 err = walk_page_buffers(handle, page_bufs, 0,
3278 PAGE_CACHE_SIZE, NULL, write_end_fn);
3279 if (ret == 0)
3280 ret = err;
3281 err = ext4_journal_stop(handle);
3282 if (!ret)
3283 ret = err;
3284
3285 walk_page_buffers(handle, page_bufs, 0,
3286 PAGE_CACHE_SIZE, NULL, bput_one);
3287 EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
3288 goto out;
3289
3290out_unlock:
3291 unlock_page(page);
3292out:
3293 return ret;
3294}
3295
3296static int ext4_journalled_writepage(struct page *page,
3297 struct writeback_control *wbc)
3298{
3299 struct inode *inode = page->mapping->host;
3300 loff_t size = i_size_read(inode);
3301 loff_t len;
3302
3303 trace_ext4_journalled_writepage(inode, page);
3304 J_ASSERT(PageLocked(page));
3305 if (page->index == size >> PAGE_CACHE_SHIFT)
3306 len = size & ~PAGE_CACHE_MASK;
3307 else
3308 len = PAGE_CACHE_SIZE;
3309
3310 if (page_has_buffers(page)) {
3311 /* if page has buffers it should all be mapped
3312 * and allocated. If there are not buffers attached
3313 * to the page we know the page is dirty but it lost
3314 * buffers. That means that at some moment in time
3315 * after write_begin() / write_end() has been called
3316 * all buffers have been clean and thus they must have been
3317 * written at least once. So they are all mapped and we can
3318 * happily proceed with mapping them and writing the page.
3319 */
3320 BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
3321 ext4_bh_unmapped_or_delay));
3322 }
3323
3324 if (ext4_journal_current_handle())
3325 goto no_write;
3326
3327 if (PageChecked(page)) {
3328 /*
3329 * It's mmapped pagecache. Add buffers and journal it. There
3330 * doesn't seem much point in redirtying the page here.
3331 */
3332 ClearPageChecked(page);
3333 return __ext4_journalled_writepage(page, wbc);
3334 } else {
3335 /*
3336 * It may be a page full of checkpoint-mode buffers. We don't
3337 * really know unless we go poke around in the buffer_heads.
3338 * But block_write_full_page will do the right thing.
3339 */
3340 return block_write_full_page(page, noalloc_get_block_write,
3341 wbc);
3342 }
3343no_write:
3344 redirty_page_for_writepage(wbc, page);
3345 unlock_page(page);
3346 return 0;
3347}
3348
3349static int ext4_readpage(struct file *file, struct page *page) 3220static int ext4_readpage(struct file *file, struct page *page)
3350{ 3221{
3351 return mpage_readpage(page, ext4_get_block); 3222 return mpage_readpage(page, ext4_get_block);
@@ -3492,7 +3363,7 @@ static int ext4_journalled_set_page_dirty(struct page *page)
3492static const struct address_space_operations ext4_ordered_aops = { 3363static const struct address_space_operations ext4_ordered_aops = {
3493 .readpage = ext4_readpage, 3364 .readpage = ext4_readpage,
3494 .readpages = ext4_readpages, 3365 .readpages = ext4_readpages,
3495 .writepage = ext4_normal_writepage, 3366 .writepage = ext4_writepage,
3496 .sync_page = block_sync_page, 3367 .sync_page = block_sync_page,
3497 .write_begin = ext4_write_begin, 3368 .write_begin = ext4_write_begin,
3498 .write_end = ext4_ordered_write_end, 3369 .write_end = ext4_ordered_write_end,
@@ -3507,7 +3378,7 @@ static const struct address_space_operations ext4_ordered_aops = {
3507static const struct address_space_operations ext4_writeback_aops = { 3378static const struct address_space_operations ext4_writeback_aops = {
3508 .readpage = ext4_readpage, 3379 .readpage = ext4_readpage,
3509 .readpages = ext4_readpages, 3380 .readpages = ext4_readpages,
3510 .writepage = ext4_normal_writepage, 3381 .writepage = ext4_writepage,
3511 .sync_page = block_sync_page, 3382 .sync_page = block_sync_page,
3512 .write_begin = ext4_write_begin, 3383 .write_begin = ext4_write_begin,
3513 .write_end = ext4_writeback_write_end, 3384 .write_end = ext4_writeback_write_end,
@@ -3522,7 +3393,7 @@ static const struct address_space_operations ext4_writeback_aops = {
3522static const struct address_space_operations ext4_journalled_aops = { 3393static const struct address_space_operations ext4_journalled_aops = {
3523 .readpage = ext4_readpage, 3394 .readpage = ext4_readpage,
3524 .readpages = ext4_readpages, 3395 .readpages = ext4_readpages,
3525 .writepage = ext4_journalled_writepage, 3396 .writepage = ext4_writepage,
3526 .sync_page = block_sync_page, 3397 .sync_page = block_sync_page,
3527 .write_begin = ext4_write_begin, 3398 .write_begin = ext4_write_begin,
3528 .write_end = ext4_journalled_write_end, 3399 .write_end = ext4_journalled_write_end,
@@ -3536,7 +3407,7 @@ static const struct address_space_operations ext4_journalled_aops = {
3536static const struct address_space_operations ext4_da_aops = { 3407static const struct address_space_operations ext4_da_aops = {
3537 .readpage = ext4_readpage, 3408 .readpage = ext4_readpage,
3538 .readpages = ext4_readpages, 3409 .readpages = ext4_readpages,
3539 .writepage = ext4_da_writepage, 3410 .writepage = ext4_writepage,
3540 .writepages = ext4_da_writepages, 3411 .writepages = ext4_da_writepages,
3541 .sync_page = block_sync_page, 3412 .sync_page = block_sync_page,
3542 .write_begin = ext4_da_write_begin, 3413 .write_begin = ext4_da_write_begin,
@@ -3583,7 +3454,8 @@ int ext4_block_truncate_page(handle_t *handle,
3583 struct page *page; 3454 struct page *page;
3584 int err = 0; 3455 int err = 0;
3585 3456
3586 page = grab_cache_page(mapping, from >> PAGE_CACHE_SHIFT); 3457 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
3458 mapping_gfp_mask(mapping) & ~__GFP_FS);
3587 if (!page) 3459 if (!page)
3588 return -EINVAL; 3460 return -EINVAL;
3589 3461
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index bb415408fdb6..7050a9cd04a4 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -12,7 +12,6 @@
12#include <linux/capability.h> 12#include <linux/capability.h>
13#include <linux/time.h> 13#include <linux/time.h>
14#include <linux/compat.h> 14#include <linux/compat.h>
15#include <linux/smp_lock.h>
16#include <linux/mount.h> 15#include <linux/mount.h>
17#include <linux/file.h> 16#include <linux/file.h>
18#include <asm/uaccess.h> 17#include <asm/uaccess.h>
@@ -192,7 +191,7 @@ setversion_out:
192 case EXT4_IOC_GROUP_EXTEND: { 191 case EXT4_IOC_GROUP_EXTEND: {
193 ext4_fsblk_t n_blocks_count; 192 ext4_fsblk_t n_blocks_count;
194 struct super_block *sb = inode->i_sb; 193 struct super_block *sb = inode->i_sb;
195 int err, err2; 194 int err, err2=0;
196 195
197 if (!capable(CAP_SYS_RESOURCE)) 196 if (!capable(CAP_SYS_RESOURCE))
198 return -EPERM; 197 return -EPERM;
@@ -205,9 +204,11 @@ setversion_out:
205 return err; 204 return err;
206 205
207 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); 206 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
208 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 207 if (EXT4_SB(sb)->s_journal) {
209 err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 208 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
210 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 209 err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
210 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
211 }
211 if (err == 0) 212 if (err == 0)
212 err = err2; 213 err = err2;
213 mnt_drop_write(filp->f_path.mnt); 214 mnt_drop_write(filp->f_path.mnt);
@@ -252,7 +253,7 @@ setversion_out:
252 case EXT4_IOC_GROUP_ADD: { 253 case EXT4_IOC_GROUP_ADD: {
253 struct ext4_new_group_data input; 254 struct ext4_new_group_data input;
254 struct super_block *sb = inode->i_sb; 255 struct super_block *sb = inode->i_sb;
255 int err, err2; 256 int err, err2=0;
256 257
257 if (!capable(CAP_SYS_RESOURCE)) 258 if (!capable(CAP_SYS_RESOURCE))
258 return -EPERM; 259 return -EPERM;
@@ -266,9 +267,11 @@ setversion_out:
266 return err; 267 return err;
267 268
268 err = ext4_group_add(sb, &input); 269 err = ext4_group_add(sb, &input);
269 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 270 if (EXT4_SB(sb)->s_journal) {
270 err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 271 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
271 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 272 err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
273 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
274 }
272 if (err == 0) 275 if (err == 0)
273 err = err2; 276 err = err2;
274 mnt_drop_write(filp->f_path.mnt); 277 mnt_drop_write(filp->f_path.mnt);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 519a0a686d94..cd258463e2a9 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -657,7 +657,8 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
657 } 657 }
658} 658}
659 659
660static void ext4_mb_generate_buddy(struct super_block *sb, 660static noinline_for_stack
661void ext4_mb_generate_buddy(struct super_block *sb,
661 void *buddy, void *bitmap, ext4_group_t group) 662 void *buddy, void *bitmap, ext4_group_t group)
662{ 663{
663 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 664 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
@@ -1480,7 +1481,8 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac,
1480 ext4_mb_check_limits(ac, e4b, 0); 1481 ext4_mb_check_limits(ac, e4b, 0);
1481} 1482}
1482 1483
1483static int ext4_mb_try_best_found(struct ext4_allocation_context *ac, 1484static noinline_for_stack
1485int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1484 struct ext4_buddy *e4b) 1486 struct ext4_buddy *e4b)
1485{ 1487{
1486 struct ext4_free_extent ex = ac->ac_b_ex; 1488 struct ext4_free_extent ex = ac->ac_b_ex;
@@ -1507,7 +1509,8 @@ static int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1507 return 0; 1509 return 0;
1508} 1510}
1509 1511
1510static int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, 1512static noinline_for_stack
1513int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1511 struct ext4_buddy *e4b) 1514 struct ext4_buddy *e4b)
1512{ 1515{
1513 ext4_group_t group = ac->ac_g_ex.fe_group; 1516 ext4_group_t group = ac->ac_g_ex.fe_group;
@@ -1566,7 +1569,8 @@ static int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1566 * The routine scans buddy structures (not bitmap!) from given order 1569 * The routine scans buddy structures (not bitmap!) from given order
1567 * to max order and tries to find big enough chunk to satisfy the req 1570 * to max order and tries to find big enough chunk to satisfy the req
1568 */ 1571 */
1569static void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac, 1572static noinline_for_stack
1573void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
1570 struct ext4_buddy *e4b) 1574 struct ext4_buddy *e4b)
1571{ 1575{
1572 struct super_block *sb = ac->ac_sb; 1576 struct super_block *sb = ac->ac_sb;
@@ -1609,7 +1613,8 @@ static void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac,
1609 * In order to optimize scanning, caller must pass number of 1613 * In order to optimize scanning, caller must pass number of
1610 * free blocks in the group, so the routine can know upper limit. 1614 * free blocks in the group, so the routine can know upper limit.
1611 */ 1615 */
1612static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, 1616static noinline_for_stack
1617void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1613 struct ext4_buddy *e4b) 1618 struct ext4_buddy *e4b)
1614{ 1619{
1615 struct super_block *sb = ac->ac_sb; 1620 struct super_block *sb = ac->ac_sb;
@@ -1668,7 +1673,8 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1668 * we try to find stripe-aligned chunks for stripe-size requests 1673 * we try to find stripe-aligned chunks for stripe-size requests
1669 * XXX should do so at least for multiples of stripe size as well 1674 * XXX should do so at least for multiples of stripe size as well
1670 */ 1675 */
1671static void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, 1676static noinline_for_stack
1677void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1672 struct ext4_buddy *e4b) 1678 struct ext4_buddy *e4b)
1673{ 1679{
1674 struct super_block *sb = ac->ac_sb; 1680 struct super_block *sb = ac->ac_sb;
@@ -1831,7 +1837,8 @@ void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
1831 1837
1832} 1838}
1833 1839
1834static int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) 1840static noinline_for_stack
1841int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
1835{ 1842{
1836 1843
1837 int ret; 1844 int ret;
@@ -2902,7 +2909,11 @@ int __init init_ext4_mballoc(void)
2902 2909
2903void exit_ext4_mballoc(void) 2910void exit_ext4_mballoc(void)
2904{ 2911{
2905 /* XXX: synchronize_rcu(); */ 2912 /*
2913 * Wait for completion of call_rcu()'s on ext4_pspace_cachep
2914 * before destroying the slab cache.
2915 */
2916 rcu_barrier();
2906 kmem_cache_destroy(ext4_pspace_cachep); 2917 kmem_cache_destroy(ext4_pspace_cachep);
2907 kmem_cache_destroy(ext4_ac_cachep); 2918 kmem_cache_destroy(ext4_ac_cachep);
2908 kmem_cache_destroy(ext4_free_ext_cachep); 2919 kmem_cache_destroy(ext4_free_ext_cachep);
@@ -3457,7 +3468,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
3457 * used in in-core bitmap. buddy must be generated from this bitmap 3468 * used in in-core bitmap. buddy must be generated from this bitmap
3458 * Need to be called with ext4 group lock held 3469 * Need to be called with ext4 group lock held
3459 */ 3470 */
3460static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, 3471static noinline_for_stack
3472void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3461 ext4_group_t group) 3473 ext4_group_t group)
3462{ 3474{
3463 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 3475 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
@@ -4215,14 +4227,9 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4215 ext4_get_group_no_and_offset(sb, goal, &group, &block); 4227 ext4_get_group_no_and_offset(sb, goal, &group, &block);
4216 4228
4217 /* set up allocation goals */ 4229 /* set up allocation goals */
4230 memset(ac, 0, sizeof(struct ext4_allocation_context));
4218 ac->ac_b_ex.fe_logical = ar->logical; 4231 ac->ac_b_ex.fe_logical = ar->logical;
4219 ac->ac_b_ex.fe_group = 0;
4220 ac->ac_b_ex.fe_start = 0;
4221 ac->ac_b_ex.fe_len = 0;
4222 ac->ac_status = AC_STATUS_CONTINUE; 4232 ac->ac_status = AC_STATUS_CONTINUE;
4223 ac->ac_groups_scanned = 0;
4224 ac->ac_ex_scanned = 0;
4225 ac->ac_found = 0;
4226 ac->ac_sb = sb; 4233 ac->ac_sb = sb;
4227 ac->ac_inode = ar->inode; 4234 ac->ac_inode = ar->inode;
4228 ac->ac_o_ex.fe_logical = ar->logical; 4235 ac->ac_o_ex.fe_logical = ar->logical;
@@ -4233,15 +4240,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4233 ac->ac_g_ex.fe_group = group; 4240 ac->ac_g_ex.fe_group = group;
4234 ac->ac_g_ex.fe_start = block; 4241 ac->ac_g_ex.fe_start = block;
4235 ac->ac_g_ex.fe_len = len; 4242 ac->ac_g_ex.fe_len = len;
4236 ac->ac_f_ex.fe_len = 0;
4237 ac->ac_flags = ar->flags; 4243 ac->ac_flags = ar->flags;
4238 ac->ac_2order = 0;
4239 ac->ac_criteria = 0;
4240 ac->ac_pa = NULL;
4241 ac->ac_bitmap_page = NULL;
4242 ac->ac_buddy_page = NULL;
4243 ac->alloc_semp = NULL;
4244 ac->ac_lg = NULL;
4245 4244
4246 /* we have to define context: we'll we work with a file or 4245 /* we have to define context: we'll we work with a file or
4247 * locality group. this is a policy, actually */ 4246 * locality group. this is a policy, actually */
@@ -4509,10 +4508,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4509 } 4508 }
4510 4509
4511 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4510 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4512 if (ac) { 4511 if (!ac) {
4513 ac->ac_sb = sb;
4514 ac->ac_inode = ar->inode;
4515 } else {
4516 ar->len = 0; 4512 ar->len = 0;
4517 *errp = -ENOMEM; 4513 *errp = -ENOMEM;
4518 goto out1; 4514 goto out1;
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 38ff75a0fe22..530b4ca01510 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -16,7 +16,6 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/time.h> 18#include <linux/time.h>
19#include <linux/smp_lock.h>
20#include <linux/buffer_head.h> 19#include <linux/buffer_head.h>
21#include <linux/compat.h> 20#include <linux/compat.h>
22#include <asm/uaccess.h> 21#include <asm/uaccess.h>
diff --git a/fs/fat/file.c b/fs/fat/file.c
index b28ea646ff60..f042b965c95c 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -134,7 +134,7 @@ static int fat_file_release(struct inode *inode, struct file *filp)
134 if ((filp->f_mode & FMODE_WRITE) && 134 if ((filp->f_mode & FMODE_WRITE) &&
135 MSDOS_SB(inode->i_sb)->options.flush) { 135 MSDOS_SB(inode->i_sb)->options.flush) {
136 fat_flush_inodes(inode->i_sb, inode, NULL); 136 fat_flush_inodes(inode->i_sb, inode, NULL);
137 congestion_wait(WRITE, HZ/10); 137 congestion_wait(BLK_RW_ASYNC, HZ/10);
138 } 138 }
139 return 0; 139 return 0;
140} 140}
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 82f88733b681..bbc94ae4fd77 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -9,7 +9,6 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/time.h> 10#include <linux/time.h>
11#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include <linux/smp_lock.h>
13#include "fat.h" 12#include "fat.h"
14 13
15/* Characters that are undesirable in an MS-DOS file name */ 14/* Characters that are undesirable in an MS-DOS file name */
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 73471b7ecc8c..cb6e83557112 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -19,7 +19,6 @@
19#include <linux/jiffies.h> 19#include <linux/jiffies.h>
20#include <linux/ctype.h> 20#include <linux/ctype.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/smp_lock.h>
23#include <linux/buffer_head.h> 22#include <linux/buffer_head.h>
24#include <linux/namei.h> 23#include <linux/namei.h>
25#include "fat.h" 24#include "fat.h"
diff --git a/fs/fcntl.c b/fs/fcntl.c
index a040b764f8e3..ae413086db97 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -19,7 +19,6 @@
19#include <linux/signal.h> 19#include <linux/signal.h>
20#include <linux/rcupdate.h> 20#include <linux/rcupdate.h>
21#include <linux/pid_namespace.h> 21#include <linux/pid_namespace.h>
22#include <linux/smp_lock.h>
23 22
24#include <asm/poll.h> 23#include <asm/poll.h>
25#include <asm/siginfo.h> 24#include <asm/siginfo.h>
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index cdbd1654e4cd..1e8af939b3e4 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -38,6 +38,7 @@
38#include <linux/buffer_head.h> 38#include <linux/buffer_head.h>
39#include <linux/kernel.h> 39#include <linux/kernel.h>
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/smp_lock.h>
41#include <linux/stat.h> 42#include <linux/stat.h>
42#include <linux/vfs.h> 43#include <linux/vfs.h>
43#include <linux/mount.h> 44#include <linux/mount.h>
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8fed2ed12f38..6484eb75acd6 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -286,8 +286,8 @@ __releases(&fc->lock)
286 } 286 }
287 if (fc->num_background == FUSE_CONGESTION_THRESHOLD && 287 if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
288 fc->connected && fc->bdi_initialized) { 288 fc->connected && fc->bdi_initialized) {
289 clear_bdi_congested(&fc->bdi, READ); 289 clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
290 clear_bdi_congested(&fc->bdi, WRITE); 290 clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
291 } 291 }
292 fc->num_background--; 292 fc->num_background--;
293 fc->active_background--; 293 fc->active_background--;
@@ -414,8 +414,8 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
414 fc->blocked = 1; 414 fc->blocked = 1;
415 if (fc->num_background == FUSE_CONGESTION_THRESHOLD && 415 if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
416 fc->bdi_initialized) { 416 fc->bdi_initialized) {
417 set_bdi_congested(&fc->bdi, READ); 417 set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
418 set_bdi_congested(&fc->bdi, WRITE); 418 set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
419 } 419 }
420 list_add_tail(&req->list, &fc->bg_queue); 420 list_add_tail(&req->list, &fc->bg_queue);
421 flush_bg_queue(fc); 421 flush_bg_queue(fc);
@@ -849,6 +849,81 @@ err:
849 return err; 849 return err;
850} 850}
851 851
852static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
853 struct fuse_copy_state *cs)
854{
855 struct fuse_notify_inval_inode_out outarg;
856 int err = -EINVAL;
857
858 if (size != sizeof(outarg))
859 goto err;
860
861 err = fuse_copy_one(cs, &outarg, sizeof(outarg));
862 if (err)
863 goto err;
864 fuse_copy_finish(cs);
865
866 down_read(&fc->killsb);
867 err = -ENOENT;
868 if (!fc->sb)
869 goto err_unlock;
870
871 err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
872 outarg.off, outarg.len);
873
874err_unlock:
875 up_read(&fc->killsb);
876 return err;
877
878err:
879 fuse_copy_finish(cs);
880 return err;
881}
882
883static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
884 struct fuse_copy_state *cs)
885{
886 struct fuse_notify_inval_entry_out outarg;
887 int err = -EINVAL;
888 char buf[FUSE_NAME_MAX+1];
889 struct qstr name;
890
891 if (size < sizeof(outarg))
892 goto err;
893
894 err = fuse_copy_one(cs, &outarg, sizeof(outarg));
895 if (err)
896 goto err;
897
898 err = -ENAMETOOLONG;
899 if (outarg.namelen > FUSE_NAME_MAX)
900 goto err;
901
902 name.name = buf;
903 name.len = outarg.namelen;
904 err = fuse_copy_one(cs, buf, outarg.namelen + 1);
905 if (err)
906 goto err;
907 fuse_copy_finish(cs);
908 buf[outarg.namelen] = 0;
909 name.hash = full_name_hash(name.name, name.len);
910
911 down_read(&fc->killsb);
912 err = -ENOENT;
913 if (!fc->sb)
914 goto err_unlock;
915
916 err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
917
918err_unlock:
919 up_read(&fc->killsb);
920 return err;
921
922err:
923 fuse_copy_finish(cs);
924 return err;
925}
926
852static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, 927static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
853 unsigned int size, struct fuse_copy_state *cs) 928 unsigned int size, struct fuse_copy_state *cs)
854{ 929{
@@ -856,6 +931,12 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
856 case FUSE_NOTIFY_POLL: 931 case FUSE_NOTIFY_POLL:
857 return fuse_notify_poll(fc, size, cs); 932 return fuse_notify_poll(fc, size, cs);
858 933
934 case FUSE_NOTIFY_INVAL_INODE:
935 return fuse_notify_inval_inode(fc, size, cs);
936
937 case FUSE_NOTIFY_INVAL_ENTRY:
938 return fuse_notify_inval_entry(fc, size, cs);
939
859 default: 940 default:
860 fuse_copy_finish(cs); 941 fuse_copy_finish(cs);
861 return -EINVAL; 942 return -EINVAL;
@@ -910,7 +991,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
910 unsigned long nr_segs, loff_t pos) 991 unsigned long nr_segs, loff_t pos)
911{ 992{
912 int err; 993 int err;
913 unsigned nbytes = iov_length(iov, nr_segs); 994 size_t nbytes = iov_length(iov, nr_segs);
914 struct fuse_req *req; 995 struct fuse_req *req;
915 struct fuse_out_header oh; 996 struct fuse_out_header oh;
916 struct fuse_copy_state cs; 997 struct fuse_copy_state cs;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b3089a083d30..e703654e7f40 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -375,7 +375,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
375 struct fuse_conn *fc = get_fuse_conn(dir); 375 struct fuse_conn *fc = get_fuse_conn(dir);
376 struct fuse_req *req; 376 struct fuse_req *req;
377 struct fuse_req *forget_req; 377 struct fuse_req *forget_req;
378 struct fuse_open_in inarg; 378 struct fuse_create_in inarg;
379 struct fuse_open_out outopen; 379 struct fuse_open_out outopen;
380 struct fuse_entry_out outentry; 380 struct fuse_entry_out outentry;
381 struct fuse_file *ff; 381 struct fuse_file *ff;
@@ -399,15 +399,20 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
399 if (!ff) 399 if (!ff)
400 goto out_put_request; 400 goto out_put_request;
401 401
402 if (!fc->dont_mask)
403 mode &= ~current_umask();
404
402 flags &= ~O_NOCTTY; 405 flags &= ~O_NOCTTY;
403 memset(&inarg, 0, sizeof(inarg)); 406 memset(&inarg, 0, sizeof(inarg));
404 memset(&outentry, 0, sizeof(outentry)); 407 memset(&outentry, 0, sizeof(outentry));
405 inarg.flags = flags; 408 inarg.flags = flags;
406 inarg.mode = mode; 409 inarg.mode = mode;
410 inarg.umask = current_umask();
407 req->in.h.opcode = FUSE_CREATE; 411 req->in.h.opcode = FUSE_CREATE;
408 req->in.h.nodeid = get_node_id(dir); 412 req->in.h.nodeid = get_node_id(dir);
409 req->in.numargs = 2; 413 req->in.numargs = 2;
410 req->in.args[0].size = sizeof(inarg); 414 req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
415 sizeof(inarg);
411 req->in.args[0].value = &inarg; 416 req->in.args[0].value = &inarg;
412 req->in.args[1].size = entry->d_name.len + 1; 417 req->in.args[1].size = entry->d_name.len + 1;
413 req->in.args[1].value = entry->d_name.name; 418 req->in.args[1].value = entry->d_name.name;
@@ -546,12 +551,17 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
546 if (IS_ERR(req)) 551 if (IS_ERR(req))
547 return PTR_ERR(req); 552 return PTR_ERR(req);
548 553
554 if (!fc->dont_mask)
555 mode &= ~current_umask();
556
549 memset(&inarg, 0, sizeof(inarg)); 557 memset(&inarg, 0, sizeof(inarg));
550 inarg.mode = mode; 558 inarg.mode = mode;
551 inarg.rdev = new_encode_dev(rdev); 559 inarg.rdev = new_encode_dev(rdev);
560 inarg.umask = current_umask();
552 req->in.h.opcode = FUSE_MKNOD; 561 req->in.h.opcode = FUSE_MKNOD;
553 req->in.numargs = 2; 562 req->in.numargs = 2;
554 req->in.args[0].size = sizeof(inarg); 563 req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
564 sizeof(inarg);
555 req->in.args[0].value = &inarg; 565 req->in.args[0].value = &inarg;
556 req->in.args[1].size = entry->d_name.len + 1; 566 req->in.args[1].size = entry->d_name.len + 1;
557 req->in.args[1].value = entry->d_name.name; 567 req->in.args[1].value = entry->d_name.name;
@@ -578,8 +588,12 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
578 if (IS_ERR(req)) 588 if (IS_ERR(req))
579 return PTR_ERR(req); 589 return PTR_ERR(req);
580 590
591 if (!fc->dont_mask)
592 mode &= ~current_umask();
593
581 memset(&inarg, 0, sizeof(inarg)); 594 memset(&inarg, 0, sizeof(inarg));
582 inarg.mode = mode; 595 inarg.mode = mode;
596 inarg.umask = current_umask();
583 req->in.h.opcode = FUSE_MKDIR; 597 req->in.h.opcode = FUSE_MKDIR;
584 req->in.numargs = 2; 598 req->in.numargs = 2;
585 req->in.args[0].size = sizeof(inarg); 599 req->in.args[0].size = sizeof(inarg);
@@ -845,6 +859,43 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
845 return err; 859 return err;
846} 860}
847 861
862int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
863 struct qstr *name)
864{
865 int err = -ENOTDIR;
866 struct inode *parent;
867 struct dentry *dir;
868 struct dentry *entry;
869
870 parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
871 if (!parent)
872 return -ENOENT;
873
874 mutex_lock(&parent->i_mutex);
875 if (!S_ISDIR(parent->i_mode))
876 goto unlock;
877
878 err = -ENOENT;
879 dir = d_find_alias(parent);
880 if (!dir)
881 goto unlock;
882
883 entry = d_lookup(dir, name);
884 dput(dir);
885 if (!entry)
886 goto unlock;
887
888 fuse_invalidate_attr(parent);
889 fuse_invalidate_entry(entry);
890 dput(entry);
891 err = 0;
892
893 unlock:
894 mutex_unlock(&parent->i_mutex);
895 iput(parent);
896 return err;
897}
898
848/* 899/*
849 * Calling into a user-controlled filesystem gives the filesystem 900 * Calling into a user-controlled filesystem gives the filesystem
850 * daemon ptrace-like capabilities over the requester process. This 901 * daemon ptrace-like capabilities over the requester process. This
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index fce6ce694fde..cbc464043b6f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1922,7 +1922,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
1922 1922
1923 req = fuse_get_req(fc); 1923 req = fuse_get_req(fc);
1924 if (IS_ERR(req)) 1924 if (IS_ERR(req))
1925 return PTR_ERR(req); 1925 return POLLERR;
1926 1926
1927 req->in.h.opcode = FUSE_POLL; 1927 req->in.h.opcode = FUSE_POLL;
1928 req->in.h.nodeid = ff->nodeid; 1928 req->in.h.nodeid = ff->nodeid;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index aaf2f9ff970e..52b641fc0faf 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -446,6 +446,9 @@ struct fuse_conn {
446 /** Do multi-page cached writes */ 446 /** Do multi-page cached writes */
447 unsigned big_writes:1; 447 unsigned big_writes:1;
448 448
449 /** Don't apply umask to creation modes */
450 unsigned dont_mask:1;
451
449 /** The number of requests waiting for completion */ 452 /** The number of requests waiting for completion */
450 atomic_t num_waiting; 453 atomic_t num_waiting;
451 454
@@ -481,6 +484,12 @@ struct fuse_conn {
481 484
482 /** Called on final put */ 485 /** Called on final put */
483 void (*release)(struct fuse_conn *); 486 void (*release)(struct fuse_conn *);
487
488 /** Super block for this connection. */
489 struct super_block *sb;
490
491 /** Read/write semaphore to hold when accessing sb. */
492 struct rw_semaphore killsb;
484}; 493};
485 494
486static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) 495static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -509,6 +518,11 @@ extern const struct file_operations fuse_dev_operations;
509extern const struct dentry_operations fuse_dentry_operations; 518extern const struct dentry_operations fuse_dentry_operations;
510 519
511/** 520/**
521 * Inode to nodeid comparison.
522 */
523int fuse_inode_eq(struct inode *inode, void *_nodeidp);
524
525/**
512 * Get a filled in inode 526 * Get a filled in inode
513 */ 527 */
514struct inode *fuse_iget(struct super_block *sb, u64 nodeid, 528struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
@@ -708,6 +722,19 @@ void fuse_release_nowrite(struct inode *inode);
708 722
709u64 fuse_get_attr_version(struct fuse_conn *fc); 723u64 fuse_get_attr_version(struct fuse_conn *fc);
710 724
725/**
726 * File-system tells the kernel to invalidate cache for the given node id.
727 */
728int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
729 loff_t offset, loff_t len);
730
731/**
732 * File-system tells the kernel to invalidate parent attributes and
733 * the dentry matching parent/name.
734 */
735int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
736 struct qstr *name);
737
711int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, 738int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
712 bool isdir); 739 bool isdir);
713ssize_t fuse_direct_io(struct file *file, const char __user *buf, 740ssize_t fuse_direct_io(struct file *file, const char __user *buf,
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index d8673ccf90b7..f91ccc4a189d 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -206,7 +206,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
206 BUG(); 206 BUG();
207} 207}
208 208
209static int fuse_inode_eq(struct inode *inode, void *_nodeidp) 209int fuse_inode_eq(struct inode *inode, void *_nodeidp)
210{ 210{
211 u64 nodeid = *(u64 *) _nodeidp; 211 u64 nodeid = *(u64 *) _nodeidp;
212 if (get_node_id(inode) == nodeid) 212 if (get_node_id(inode) == nodeid)
@@ -257,6 +257,31 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
257 return inode; 257 return inode;
258} 258}
259 259
260int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
261 loff_t offset, loff_t len)
262{
263 struct inode *inode;
264 pgoff_t pg_start;
265 pgoff_t pg_end;
266
267 inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid);
268 if (!inode)
269 return -ENOENT;
270
271 fuse_invalidate_attr(inode);
272 if (offset >= 0) {
273 pg_start = offset >> PAGE_CACHE_SHIFT;
274 if (len <= 0)
275 pg_end = -1;
276 else
277 pg_end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
278 invalidate_inode_pages2_range(inode->i_mapping,
279 pg_start, pg_end);
280 }
281 iput(inode);
282 return 0;
283}
284
260static void fuse_umount_begin(struct super_block *sb) 285static void fuse_umount_begin(struct super_block *sb)
261{ 286{
262 fuse_abort_conn(get_fuse_conn_super(sb)); 287 fuse_abort_conn(get_fuse_conn_super(sb));
@@ -480,6 +505,7 @@ void fuse_conn_init(struct fuse_conn *fc)
480 memset(fc, 0, sizeof(*fc)); 505 memset(fc, 0, sizeof(*fc));
481 spin_lock_init(&fc->lock); 506 spin_lock_init(&fc->lock);
482 mutex_init(&fc->inst_mutex); 507 mutex_init(&fc->inst_mutex);
508 init_rwsem(&fc->killsb);
483 atomic_set(&fc->count, 1); 509 atomic_set(&fc->count, 1);
484 init_waitqueue_head(&fc->waitq); 510 init_waitqueue_head(&fc->waitq);
485 init_waitqueue_head(&fc->blocked_waitq); 511 init_waitqueue_head(&fc->blocked_waitq);
@@ -725,6 +751,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
725 } 751 }
726 if (arg->flags & FUSE_BIG_WRITES) 752 if (arg->flags & FUSE_BIG_WRITES)
727 fc->big_writes = 1; 753 fc->big_writes = 1;
754 if (arg->flags & FUSE_DONT_MASK)
755 fc->dont_mask = 1;
728 } else { 756 } else {
729 ra_pages = fc->max_read / PAGE_CACHE_SIZE; 757 ra_pages = fc->max_read / PAGE_CACHE_SIZE;
730 fc->no_lock = 1; 758 fc->no_lock = 1;
@@ -748,7 +776,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
748 arg->minor = FUSE_KERNEL_MINOR_VERSION; 776 arg->minor = FUSE_KERNEL_MINOR_VERSION;
749 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; 777 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
750 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 778 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
751 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES; 779 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK;
752 req->in.h.opcode = FUSE_INIT; 780 req->in.h.opcode = FUSE_INIT;
753 req->in.numargs = 1; 781 req->in.numargs = 1;
754 req->in.args[0].size = sizeof(*arg); 782 req->in.args[0].size = sizeof(*arg);
@@ -860,10 +888,16 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
860 fuse_conn_init(fc); 888 fuse_conn_init(fc);
861 889
862 fc->dev = sb->s_dev; 890 fc->dev = sb->s_dev;
891 fc->sb = sb;
863 err = fuse_bdi_init(fc, sb); 892 err = fuse_bdi_init(fc, sb);
864 if (err) 893 if (err)
865 goto err_put_conn; 894 goto err_put_conn;
866 895
896 /* Handle umasking inside the fuse code */
897 if (sb->s_flags & MS_POSIXACL)
898 fc->dont_mask = 1;
899 sb->s_flags |= MS_POSIXACL;
900
867 fc->release = fuse_free_conn; 901 fc->release = fuse_free_conn;
868 fc->flags = d.flags; 902 fc->flags = d.flags;
869 fc->user_id = d.user_id; 903 fc->user_id = d.user_id;
@@ -941,12 +975,25 @@ static int fuse_get_sb(struct file_system_type *fs_type,
941 return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt); 975 return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
942} 976}
943 977
978static void fuse_kill_sb_anon(struct super_block *sb)
979{
980 struct fuse_conn *fc = get_fuse_conn_super(sb);
981
982 if (fc) {
983 down_write(&fc->killsb);
984 fc->sb = NULL;
985 up_write(&fc->killsb);
986 }
987
988 kill_anon_super(sb);
989}
990
944static struct file_system_type fuse_fs_type = { 991static struct file_system_type fuse_fs_type = {
945 .owner = THIS_MODULE, 992 .owner = THIS_MODULE,
946 .name = "fuse", 993 .name = "fuse",
947 .fs_flags = FS_HAS_SUBTYPE, 994 .fs_flags = FS_HAS_SUBTYPE,
948 .get_sb = fuse_get_sb, 995 .get_sb = fuse_get_sb,
949 .kill_sb = kill_anon_super, 996 .kill_sb = fuse_kill_sb_anon,
950}; 997};
951 998
952#ifdef CONFIG_BLOCK 999#ifdef CONFIG_BLOCK
@@ -958,11 +1005,24 @@ static int fuse_get_sb_blk(struct file_system_type *fs_type,
958 mnt); 1005 mnt);
959} 1006}
960 1007
1008static void fuse_kill_sb_blk(struct super_block *sb)
1009{
1010 struct fuse_conn *fc = get_fuse_conn_super(sb);
1011
1012 if (fc) {
1013 down_write(&fc->killsb);
1014 fc->sb = NULL;
1015 up_write(&fc->killsb);
1016 }
1017
1018 kill_block_super(sb);
1019}
1020
961static struct file_system_type fuseblk_fs_type = { 1021static struct file_system_type fuseblk_fs_type = {
962 .owner = THIS_MODULE, 1022 .owner = THIS_MODULE,
963 .name = "fuseblk", 1023 .name = "fuseblk",
964 .get_sb = fuse_get_sb_blk, 1024 .get_sb = fuse_get_sb_blk,
965 .kill_sb = kill_block_super, 1025 .kill_sb = fuse_kill_sb_blk,
966 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, 1026 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
967}; 1027};
968 1028
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 03ebb439ace0..7ebae9a4ecc0 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -624,6 +624,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
624{ 624{
625 struct gfs2_inode *ip = GFS2_I(mapping->host); 625 struct gfs2_inode *ip = GFS2_I(mapping->host);
626 struct gfs2_sbd *sdp = GFS2_SB(mapping->host); 626 struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
627 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
627 unsigned int data_blocks = 0, ind_blocks = 0, rblocks; 628 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
628 int alloc_required; 629 int alloc_required;
629 int error = 0; 630 int error = 0;
@@ -637,6 +638,14 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
637 error = gfs2_glock_nq(&ip->i_gh); 638 error = gfs2_glock_nq(&ip->i_gh);
638 if (unlikely(error)) 639 if (unlikely(error))
639 goto out_uninit; 640 goto out_uninit;
641 if (&ip->i_inode == sdp->sd_rindex) {
642 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
643 GL_NOCACHE, &m_ip->i_gh);
644 if (unlikely(error)) {
645 gfs2_glock_dq(&ip->i_gh);
646 goto out_uninit;
647 }
648 }
640 649
641 error = gfs2_write_alloc_required(ip, pos, len, &alloc_required); 650 error = gfs2_write_alloc_required(ip, pos, len, &alloc_required);
642 if (error) 651 if (error)
@@ -667,6 +676,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
667 rblocks += data_blocks ? data_blocks : 1; 676 rblocks += data_blocks ? data_blocks : 1;
668 if (ind_blocks || data_blocks) 677 if (ind_blocks || data_blocks)
669 rblocks += RES_STATFS + RES_QUOTA; 678 rblocks += RES_STATFS + RES_QUOTA;
679 if (&ip->i_inode == sdp->sd_rindex)
680 rblocks += 2 * RES_STATFS;
670 681
671 error = gfs2_trans_begin(sdp, rblocks, 682 error = gfs2_trans_begin(sdp, rblocks,
672 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); 683 PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
@@ -712,6 +723,10 @@ out_alloc_put:
712 gfs2_alloc_put(ip); 723 gfs2_alloc_put(ip);
713 } 724 }
714out_unlock: 725out_unlock:
726 if (&ip->i_inode == sdp->sd_rindex) {
727 gfs2_glock_dq(&m_ip->i_gh);
728 gfs2_holder_uninit(&m_ip->i_gh);
729 }
715 gfs2_glock_dq(&ip->i_gh); 730 gfs2_glock_dq(&ip->i_gh);
716out_uninit: 731out_uninit:
717 gfs2_holder_uninit(&ip->i_gh); 732 gfs2_holder_uninit(&ip->i_gh);
@@ -725,14 +740,21 @@ out_uninit:
725static void adjust_fs_space(struct inode *inode) 740static void adjust_fs_space(struct inode *inode)
726{ 741{
727 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; 742 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
743 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
744 struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
728 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; 745 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
729 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; 746 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
747 struct buffer_head *m_bh, *l_bh;
730 u64 fs_total, new_free; 748 u64 fs_total, new_free;
731 749
732 /* Total up the file system space, according to the latest rindex. */ 750 /* Total up the file system space, according to the latest rindex. */
733 fs_total = gfs2_ri_total(sdp); 751 fs_total = gfs2_ri_total(sdp);
752 if (gfs2_meta_inode_buffer(m_ip, &m_bh) != 0)
753 return;
734 754
735 spin_lock(&sdp->sd_statfs_spin); 755 spin_lock(&sdp->sd_statfs_spin);
756 gfs2_statfs_change_in(m_sc, m_bh->b_data +
757 sizeof(struct gfs2_dinode));
736 if (fs_total > (m_sc->sc_total + l_sc->sc_total)) 758 if (fs_total > (m_sc->sc_total + l_sc->sc_total))
737 new_free = fs_total - (m_sc->sc_total + l_sc->sc_total); 759 new_free = fs_total - (m_sc->sc_total + l_sc->sc_total);
738 else 760 else
@@ -741,6 +763,13 @@ static void adjust_fs_space(struct inode *inode)
741 fs_warn(sdp, "File system extended by %llu blocks.\n", 763 fs_warn(sdp, "File system extended by %llu blocks.\n",
742 (unsigned long long)new_free); 764 (unsigned long long)new_free);
743 gfs2_statfs_change(sdp, new_free, new_free, 0); 765 gfs2_statfs_change(sdp, new_free, new_free, 0);
766
767 if (gfs2_meta_inode_buffer(l_ip, &l_bh) != 0)
768 goto out;
769 update_statfs(sdp, m_bh, l_bh);
770 brelse(l_bh);
771out:
772 brelse(m_bh);
744} 773}
745 774
746/** 775/**
@@ -763,6 +792,7 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
763{ 792{
764 struct gfs2_inode *ip = GFS2_I(inode); 793 struct gfs2_inode *ip = GFS2_I(inode);
765 struct gfs2_sbd *sdp = GFS2_SB(inode); 794 struct gfs2_sbd *sdp = GFS2_SB(inode);
795 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
766 u64 to = pos + copied; 796 u64 to = pos + copied;
767 void *kaddr; 797 void *kaddr;
768 unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode); 798 unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
@@ -794,6 +824,10 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
794 824
795 brelse(dibh); 825 brelse(dibh);
796 gfs2_trans_end(sdp); 826 gfs2_trans_end(sdp);
827 if (inode == sdp->sd_rindex) {
828 gfs2_glock_dq(&m_ip->i_gh);
829 gfs2_holder_uninit(&m_ip->i_gh);
830 }
797 gfs2_glock_dq(&ip->i_gh); 831 gfs2_glock_dq(&ip->i_gh);
798 gfs2_holder_uninit(&ip->i_gh); 832 gfs2_holder_uninit(&ip->i_gh);
799 return copied; 833 return copied;
@@ -823,6 +857,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
823 struct inode *inode = page->mapping->host; 857 struct inode *inode = page->mapping->host;
824 struct gfs2_inode *ip = GFS2_I(inode); 858 struct gfs2_inode *ip = GFS2_I(inode);
825 struct gfs2_sbd *sdp = GFS2_SB(inode); 859 struct gfs2_sbd *sdp = GFS2_SB(inode);
860 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
826 struct buffer_head *dibh; 861 struct buffer_head *dibh;
827 struct gfs2_alloc *al = ip->i_alloc; 862 struct gfs2_alloc *al = ip->i_alloc;
828 unsigned int from = pos & (PAGE_CACHE_SIZE - 1); 863 unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
@@ -865,6 +900,10 @@ failed:
865 gfs2_quota_unlock(ip); 900 gfs2_quota_unlock(ip);
866 gfs2_alloc_put(ip); 901 gfs2_alloc_put(ip);
867 } 902 }
903 if (inode == sdp->sd_rindex) {
904 gfs2_glock_dq(&m_ip->i_gh);
905 gfs2_holder_uninit(&m_ip->i_gh);
906 }
868 gfs2_glock_dq(&ip->i_gh); 907 gfs2_glock_dq(&ip->i_gh);
869 gfs2_holder_uninit(&ip->i_gh); 908 gfs2_holder_uninit(&ip->i_gh);
870 return ret; 909 return ret;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 297421c0427a..8b674b1f3a55 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -63,6 +63,7 @@ static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int
63static DECLARE_RWSEM(gfs2_umount_flush_sem); 63static DECLARE_RWSEM(gfs2_umount_flush_sem);
64static struct dentry *gfs2_root; 64static struct dentry *gfs2_root;
65static struct workqueue_struct *glock_workqueue; 65static struct workqueue_struct *glock_workqueue;
66struct workqueue_struct *gfs2_delete_workqueue;
66static LIST_HEAD(lru_list); 67static LIST_HEAD(lru_list);
67static atomic_t lru_count = ATOMIC_INIT(0); 68static atomic_t lru_count = ATOMIC_INIT(0);
68static DEFINE_SPINLOCK(lru_lock); 69static DEFINE_SPINLOCK(lru_lock);
@@ -167,13 +168,33 @@ static void glock_free(struct gfs2_glock *gl)
167 * 168 *
168 */ 169 */
169 170
170static void gfs2_glock_hold(struct gfs2_glock *gl) 171void gfs2_glock_hold(struct gfs2_glock *gl)
171{ 172{
172 GLOCK_BUG_ON(gl, atomic_read(&gl->gl_ref) == 0); 173 GLOCK_BUG_ON(gl, atomic_read(&gl->gl_ref) == 0);
173 atomic_inc(&gl->gl_ref); 174 atomic_inc(&gl->gl_ref);
174} 175}
175 176
176/** 177/**
178 * demote_ok - Check to see if it's ok to unlock a glock
179 * @gl: the glock
180 *
181 * Returns: 1 if it's ok
182 */
183
184static int demote_ok(const struct gfs2_glock *gl)
185{
186 const struct gfs2_glock_operations *glops = gl->gl_ops;
187
188 if (gl->gl_state == LM_ST_UNLOCKED)
189 return 0;
190 if (!list_empty(&gl->gl_holders))
191 return 0;
192 if (glops->go_demote_ok)
193 return glops->go_demote_ok(gl);
194 return 1;
195}
196
197/**
177 * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list 198 * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
178 * @gl: the glock 199 * @gl: the glock
179 * 200 *
@@ -181,8 +202,13 @@ static void gfs2_glock_hold(struct gfs2_glock *gl)
181 202
182static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) 203static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
183{ 204{
205 int may_reclaim;
206 may_reclaim = (demote_ok(gl) &&
207 (atomic_read(&gl->gl_ref) == 1 ||
208 (gl->gl_name.ln_type == LM_TYPE_INODE &&
209 atomic_read(&gl->gl_ref) <= 2)));
184 spin_lock(&lru_lock); 210 spin_lock(&lru_lock);
185 if (list_empty(&gl->gl_lru) && gl->gl_state != LM_ST_UNLOCKED) { 211 if (list_empty(&gl->gl_lru) && may_reclaim) {
186 list_add_tail(&gl->gl_lru, &lru_list); 212 list_add_tail(&gl->gl_lru, &lru_list);
187 atomic_inc(&lru_count); 213 atomic_inc(&lru_count);
188 } 214 }
@@ -190,6 +216,21 @@ static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
190} 216}
191 217
192/** 218/**
219 * gfs2_glock_put_nolock() - Decrement reference count on glock
220 * @gl: The glock to put
221 *
222 * This function should only be used if the caller has its own reference
223 * to the glock, in addition to the one it is dropping.
224 */
225
226void gfs2_glock_put_nolock(struct gfs2_glock *gl)
227{
228 if (atomic_dec_and_test(&gl->gl_ref))
229 GLOCK_BUG_ON(gl, 1);
230 gfs2_glock_schedule_for_reclaim(gl);
231}
232
233/**
193 * gfs2_glock_put() - Decrement reference count on glock 234 * gfs2_glock_put() - Decrement reference count on glock
194 * @gl: The glock to put 235 * @gl: The glock to put
195 * 236 *
@@ -214,9 +255,9 @@ int gfs2_glock_put(struct gfs2_glock *gl)
214 rv = 1; 255 rv = 1;
215 goto out; 256 goto out;
216 } 257 }
217 /* 1 for being hashed, 1 for having state != LM_ST_UNLOCKED */ 258 spin_lock(&gl->gl_spin);
218 if (atomic_read(&gl->gl_ref) == 2) 259 gfs2_glock_schedule_for_reclaim(gl);
219 gfs2_glock_schedule_for_reclaim(gl); 260 spin_unlock(&gl->gl_spin);
220 write_unlock(gl_lock_addr(gl->gl_hash)); 261 write_unlock(gl_lock_addr(gl->gl_hash));
221out: 262out:
222 return rv; 263 return rv;
@@ -398,7 +439,7 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
398 if (held2) 439 if (held2)
399 gfs2_glock_hold(gl); 440 gfs2_glock_hold(gl);
400 else 441 else
401 gfs2_glock_put(gl); 442 gfs2_glock_put_nolock(gl);
402 } 443 }
403 444
404 gl->gl_state = new_state; 445 gl->gl_state = new_state;
@@ -633,12 +674,35 @@ out:
633out_sched: 674out_sched:
634 gfs2_glock_hold(gl); 675 gfs2_glock_hold(gl);
635 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) 676 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
636 gfs2_glock_put(gl); 677 gfs2_glock_put_nolock(gl);
637out_unlock: 678out_unlock:
638 clear_bit(GLF_LOCK, &gl->gl_flags); 679 clear_bit(GLF_LOCK, &gl->gl_flags);
639 goto out; 680 goto out;
640} 681}
641 682
683static void delete_work_func(struct work_struct *work)
684{
685 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
686 struct gfs2_sbd *sdp = gl->gl_sbd;
687 struct gfs2_inode *ip = NULL;
688 struct inode *inode;
689 u64 no_addr = 0;
690
691 spin_lock(&gl->gl_spin);
692 ip = (struct gfs2_inode *)gl->gl_object;
693 if (ip)
694 no_addr = ip->i_no_addr;
695 spin_unlock(&gl->gl_spin);
696 if (ip) {
697 inode = gfs2_ilookup(sdp->sd_vfs, no_addr);
698 if (inode) {
699 d_prune_aliases(inode);
700 iput(inode);
701 }
702 }
703 gfs2_glock_put(gl);
704}
705
642static void glock_work_func(struct work_struct *work) 706static void glock_work_func(struct work_struct *work)
643{ 707{
644 unsigned long delay = 0; 708 unsigned long delay = 0;
@@ -717,6 +781,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
717 gl->gl_sbd = sdp; 781 gl->gl_sbd = sdp;
718 gl->gl_aspace = NULL; 782 gl->gl_aspace = NULL;
719 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); 783 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
784 INIT_WORK(&gl->gl_delete, delete_work_func);
720 785
721 /* If this glock protects actual on-disk data or metadata blocks, 786 /* If this glock protects actual on-disk data or metadata blocks,
722 create a VFS inode to manage the pages/buffers holding them. */ 787 create a VFS inode to manage the pages/buffers holding them. */
@@ -858,6 +923,8 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state,
858 gl->gl_demote_state != state) { 923 gl->gl_demote_state != state) {
859 gl->gl_demote_state = LM_ST_UNLOCKED; 924 gl->gl_demote_state = LM_ST_UNLOCKED;
860 } 925 }
926 if (gl->gl_ops->go_callback)
927 gl->gl_ops->go_callback(gl);
861 trace_gfs2_demote_rq(gl); 928 trace_gfs2_demote_rq(gl);
862} 929}
863 930
@@ -1274,33 +1341,12 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1274 gfs2_glock_put(gl); 1341 gfs2_glock_put(gl);
1275} 1342}
1276 1343
1277/**
1278 * demote_ok - Check to see if it's ok to unlock a glock
1279 * @gl: the glock
1280 *
1281 * Returns: 1 if it's ok
1282 */
1283
1284static int demote_ok(const struct gfs2_glock *gl)
1285{
1286 const struct gfs2_glock_operations *glops = gl->gl_ops;
1287
1288 if (gl->gl_state == LM_ST_UNLOCKED)
1289 return 0;
1290 if (!list_empty(&gl->gl_holders))
1291 return 0;
1292 if (glops->go_demote_ok)
1293 return glops->go_demote_ok(gl);
1294 return 1;
1295}
1296
1297 1344
1298static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) 1345static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask)
1299{ 1346{
1300 struct gfs2_glock *gl; 1347 struct gfs2_glock *gl;
1301 int may_demote; 1348 int may_demote;
1302 int nr_skipped = 0; 1349 int nr_skipped = 0;
1303 int got_ref = 0;
1304 LIST_HEAD(skipped); 1350 LIST_HEAD(skipped);
1305 1351
1306 if (nr == 0) 1352 if (nr == 0)
@@ -1315,37 +1361,29 @@ static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask)
1315 list_del_init(&gl->gl_lru); 1361 list_del_init(&gl->gl_lru);
1316 atomic_dec(&lru_count); 1362 atomic_dec(&lru_count);
1317 1363
1364 /* Check if glock is about to be freed */
1365 if (atomic_read(&gl->gl_ref) == 0)
1366 continue;
1367
1318 /* Test for being demotable */ 1368 /* Test for being demotable */
1319 if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { 1369 if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
1320 gfs2_glock_hold(gl); 1370 gfs2_glock_hold(gl);
1321 got_ref = 1;
1322 spin_unlock(&lru_lock); 1371 spin_unlock(&lru_lock);
1323 spin_lock(&gl->gl_spin); 1372 spin_lock(&gl->gl_spin);
1324 may_demote = demote_ok(gl); 1373 may_demote = demote_ok(gl);
1325 spin_unlock(&gl->gl_spin);
1326 clear_bit(GLF_LOCK, &gl->gl_flags);
1327 if (may_demote) { 1374 if (may_demote) {
1328 handle_callback(gl, LM_ST_UNLOCKED, 0); 1375 handle_callback(gl, LM_ST_UNLOCKED, 0);
1329 nr--; 1376 nr--;
1330 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1331 gfs2_glock_put(gl);
1332 got_ref = 0;
1333 } 1377 }
1378 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1379 gfs2_glock_put_nolock(gl);
1380 spin_unlock(&gl->gl_spin);
1381 clear_bit(GLF_LOCK, &gl->gl_flags);
1334 spin_lock(&lru_lock); 1382 spin_lock(&lru_lock);
1335 if (may_demote) 1383 continue;
1336 continue;
1337 }
1338 if (list_empty(&gl->gl_lru) &&
1339 (atomic_read(&gl->gl_ref) <= (2 + got_ref))) {
1340 nr_skipped++;
1341 list_add(&gl->gl_lru, &skipped);
1342 }
1343 if (got_ref) {
1344 spin_unlock(&lru_lock);
1345 gfs2_glock_put(gl);
1346 spin_lock(&lru_lock);
1347 got_ref = 0;
1348 } 1384 }
1385 nr_skipped++;
1386 list_add(&gl->gl_lru, &skipped);
1349 } 1387 }
1350 list_splice(&skipped, &lru_list); 1388 list_splice(&skipped, &lru_list);
1351 atomic_add(nr_skipped, &lru_count); 1389 atomic_add(nr_skipped, &lru_count);
@@ -1727,6 +1765,11 @@ int __init gfs2_glock_init(void)
1727 glock_workqueue = create_workqueue("glock_workqueue"); 1765 glock_workqueue = create_workqueue("glock_workqueue");
1728 if (IS_ERR(glock_workqueue)) 1766 if (IS_ERR(glock_workqueue))
1729 return PTR_ERR(glock_workqueue); 1767 return PTR_ERR(glock_workqueue);
1768 gfs2_delete_workqueue = create_workqueue("delete_workqueue");
1769 if (IS_ERR(gfs2_delete_workqueue)) {
1770 destroy_workqueue(glock_workqueue);
1771 return PTR_ERR(gfs2_delete_workqueue);
1772 }
1730 1773
1731 register_shrinker(&glock_shrinker); 1774 register_shrinker(&glock_shrinker);
1732 1775
@@ -1737,6 +1780,7 @@ void gfs2_glock_exit(void)
1737{ 1780{
1738 unregister_shrinker(&glock_shrinker); 1781 unregister_shrinker(&glock_shrinker);
1739 destroy_workqueue(glock_workqueue); 1782 destroy_workqueue(glock_workqueue);
1783 destroy_workqueue(gfs2_delete_workqueue);
1740} 1784}
1741 1785
1742static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) 1786static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index a602a28f6f08..c609894ec0d0 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -143,6 +143,7 @@ struct lm_lockops {
143 143
144#define GLR_TRYFAILED 13 144#define GLR_TRYFAILED 13
145 145
146extern struct workqueue_struct *gfs2_delete_workqueue;
146static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) 147static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
147{ 148{
148 struct gfs2_holder *gh; 149 struct gfs2_holder *gh;
@@ -191,6 +192,8 @@ static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl)
191int gfs2_glock_get(struct gfs2_sbd *sdp, 192int gfs2_glock_get(struct gfs2_sbd *sdp,
192 u64 number, const struct gfs2_glock_operations *glops, 193 u64 number, const struct gfs2_glock_operations *glops,
193 int create, struct gfs2_glock **glp); 194 int create, struct gfs2_glock **glp);
195void gfs2_glock_hold(struct gfs2_glock *gl);
196void gfs2_glock_put_nolock(struct gfs2_glock *gl);
194int gfs2_glock_put(struct gfs2_glock *gl); 197int gfs2_glock_put(struct gfs2_glock *gl);
195void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, 198void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
196 struct gfs2_holder *gh); 199 struct gfs2_holder *gh);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index d5e4ab155ca0..6985eef06c39 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -323,6 +323,7 @@ static void trans_go_sync(struct gfs2_glock *gl)
323 323
324 if (gl->gl_state != LM_ST_UNLOCKED && 324 if (gl->gl_state != LM_ST_UNLOCKED &&
325 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { 325 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
326 flush_workqueue(gfs2_delete_workqueue);
326 gfs2_meta_syncfs(sdp); 327 gfs2_meta_syncfs(sdp);
327 gfs2_log_shutdown(sdp); 328 gfs2_log_shutdown(sdp);
328 } 329 }
@@ -372,6 +373,25 @@ static int trans_go_demote_ok(const struct gfs2_glock *gl)
372 return 0; 373 return 0;
373} 374}
374 375
376/**
377 * iopen_go_callback - schedule the dcache entry for the inode to be deleted
378 * @gl: the glock
379 *
380 * gl_spin lock is held while calling this
381 */
382static void iopen_go_callback(struct gfs2_glock *gl)
383{
384 struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object;
385
386 if (gl->gl_demote_state == LM_ST_UNLOCKED &&
387 gl->gl_state == LM_ST_SHARED &&
388 ip && test_bit(GIF_USER, &ip->i_flags)) {
389 gfs2_glock_hold(gl);
390 if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
391 gfs2_glock_put_nolock(gl);
392 }
393}
394
375const struct gfs2_glock_operations gfs2_meta_glops = { 395const struct gfs2_glock_operations gfs2_meta_glops = {
376 .go_type = LM_TYPE_META, 396 .go_type = LM_TYPE_META,
377}; 397};
@@ -406,6 +426,7 @@ const struct gfs2_glock_operations gfs2_trans_glops = {
406 426
407const struct gfs2_glock_operations gfs2_iopen_glops = { 427const struct gfs2_glock_operations gfs2_iopen_glops = {
408 .go_type = LM_TYPE_IOPEN, 428 .go_type = LM_TYPE_IOPEN,
429 .go_callback = iopen_go_callback,
409}; 430};
410 431
411const struct gfs2_glock_operations gfs2_flock_glops = { 432const struct gfs2_glock_operations gfs2_flock_glops = {
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 225347fbff3c..61801ada36f0 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -159,6 +159,7 @@ struct gfs2_glock_operations {
159 int (*go_lock) (struct gfs2_holder *gh); 159 int (*go_lock) (struct gfs2_holder *gh);
160 void (*go_unlock) (struct gfs2_holder *gh); 160 void (*go_unlock) (struct gfs2_holder *gh);
161 int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); 161 int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl);
162 void (*go_callback) (struct gfs2_glock *gl);
162 const int go_type; 163 const int go_type;
163 const unsigned long go_min_hold_time; 164 const unsigned long go_min_hold_time;
164}; 165};
@@ -228,6 +229,7 @@ struct gfs2_glock {
228 struct list_head gl_ail_list; 229 struct list_head gl_ail_list;
229 atomic_t gl_ail_count; 230 atomic_t gl_ail_count;
230 struct delayed_work gl_work; 231 struct delayed_work gl_work;
232 struct work_struct gl_delete;
231}; 233};
232 234
233#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ 235#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index daa4ae341a29..fba795798d3a 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -285,27 +285,19 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
285 } 285 }
286 286
287 tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes; 287 tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
288 if (count[1] + count[2] != tmp) { 288 if (count[1] != tmp) {
289 if (gfs2_consist_rgrpd(rgd)) 289 if (gfs2_consist_rgrpd(rgd))
290 fs_err(sdp, "used data mismatch: %u != %u\n", 290 fs_err(sdp, "used data mismatch: %u != %u\n",
291 count[1], tmp); 291 count[1], tmp);
292 return; 292 return;
293 } 293 }
294 294
295 if (count[3] != rgd->rd_dinodes) { 295 if (count[2] + count[3] != rgd->rd_dinodes) {
296 if (gfs2_consist_rgrpd(rgd)) 296 if (gfs2_consist_rgrpd(rgd))
297 fs_err(sdp, "used metadata mismatch: %u != %u\n", 297 fs_err(sdp, "used metadata mismatch: %u != %u\n",
298 count[3], rgd->rd_dinodes); 298 count[2] + count[3], rgd->rd_dinodes);
299 return; 299 return;
300 } 300 }
301
302 if (count[2] > count[3]) {
303 if (gfs2_consist_rgrpd(rgd))
304 fs_err(sdp, "unlinked inodes > inodes: %u\n",
305 count[2]);
306 return;
307 }
308
309} 301}
310 302
311static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) 303static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
@@ -961,7 +953,8 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
961 * Returns: The inode, if one has been found 953 * Returns: The inode, if one has been found
962 */ 954 */
963 955
964static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked) 956static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked,
957 u64 skip)
965{ 958{
966 struct inode *inode; 959 struct inode *inode;
967 u32 goal = 0, block; 960 u32 goal = 0, block;
@@ -985,6 +978,8 @@ static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
985 goal++; 978 goal++;
986 if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) 979 if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
987 continue; 980 continue;
981 if (no_addr == skip)
982 continue;
988 *last_unlinked = no_addr; 983 *last_unlinked = no_addr;
989 inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN, 984 inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN,
990 no_addr, -1, 1); 985 no_addr, -1, 1);
@@ -1104,7 +1099,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1104 if (try_rgrp_fit(rgd, al)) 1099 if (try_rgrp_fit(rgd, al))
1105 goto out; 1100 goto out;
1106 if (rgd->rd_flags & GFS2_RDF_CHECK) 1101 if (rgd->rd_flags & GFS2_RDF_CHECK)
1107 inode = try_rgrp_unlink(rgd, last_unlinked); 1102 inode = try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
1108 if (!rg_locked) 1103 if (!rg_locked)
1109 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1104 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1110 if (inode) 1105 if (inode)
@@ -1138,7 +1133,7 @@ static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1138 if (try_rgrp_fit(rgd, al)) 1133 if (try_rgrp_fit(rgd, al))
1139 goto out; 1134 goto out;
1140 if (rgd->rd_flags & GFS2_RDF_CHECK) 1135 if (rgd->rd_flags & GFS2_RDF_CHECK)
1141 inode = try_rgrp_unlink(rgd, last_unlinked); 1136 inode = try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
1142 if (!rg_locked) 1137 if (!rg_locked)
1143 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1138 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1144 if (inode) 1139 if (inode)
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 0a6801336470..f522bb017973 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -353,7 +353,7 @@ fail:
353 return error; 353 return error;
354} 354}
355 355
356static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) 356void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
357{ 357{
358 const struct gfs2_statfs_change *str = buf; 358 const struct gfs2_statfs_change *str = buf;
359 359
@@ -441,6 +441,29 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
441 brelse(l_bh); 441 brelse(l_bh);
442} 442}
443 443
444void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
445 struct buffer_head *l_bh)
446{
447 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
448 struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
449 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
450 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
451
452 gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
453
454 spin_lock(&sdp->sd_statfs_spin);
455 m_sc->sc_total += l_sc->sc_total;
456 m_sc->sc_free += l_sc->sc_free;
457 m_sc->sc_dinodes += l_sc->sc_dinodes;
458 memset(l_sc, 0, sizeof(struct gfs2_statfs_change));
459 memset(l_bh->b_data + sizeof(struct gfs2_dinode),
460 0, sizeof(struct gfs2_statfs_change));
461 spin_unlock(&sdp->sd_statfs_spin);
462
463 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
464 gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
465}
466
444int gfs2_statfs_sync(struct gfs2_sbd *sdp) 467int gfs2_statfs_sync(struct gfs2_sbd *sdp)
445{ 468{
446 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 469 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
@@ -477,19 +500,7 @@ int gfs2_statfs_sync(struct gfs2_sbd *sdp)
477 if (error) 500 if (error)
478 goto out_bh2; 501 goto out_bh2;
479 502
480 gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); 503 update_statfs(sdp, m_bh, l_bh);
481
482 spin_lock(&sdp->sd_statfs_spin);
483 m_sc->sc_total += l_sc->sc_total;
484 m_sc->sc_free += l_sc->sc_free;
485 m_sc->sc_dinodes += l_sc->sc_dinodes;
486 memset(l_sc, 0, sizeof(struct gfs2_statfs_change));
487 memset(l_bh->b_data + sizeof(struct gfs2_dinode),
488 0, sizeof(struct gfs2_statfs_change));
489 spin_unlock(&sdp->sd_statfs_spin);
490
491 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
492 gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
493 504
494 gfs2_trans_end(sdp); 505 gfs2_trans_end(sdp);
495 506
@@ -680,6 +691,7 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
680 struct gfs2_holder t_gh; 691 struct gfs2_holder t_gh;
681 int error; 692 int error;
682 693
694 flush_workqueue(gfs2_delete_workqueue);
683 gfs2_quota_sync(sdp); 695 gfs2_quota_sync(sdp);
684 gfs2_statfs_sync(sdp); 696 gfs2_statfs_sync(sdp);
685 697
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index b56413e3e40d..22e0417ed996 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -40,6 +40,10 @@ extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
40extern int gfs2_statfs_init(struct gfs2_sbd *sdp); 40extern int gfs2_statfs_init(struct gfs2_sbd *sdp);
41extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, 41extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
42 s64 dinodes); 42 s64 dinodes);
43extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc,
44 const void *buf);
45extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
46 struct buffer_head *l_bh);
43extern int gfs2_statfs_sync(struct gfs2_sbd *sdp); 47extern int gfs2_statfs_sync(struct gfs2_sbd *sdp);
44 48
45extern int gfs2_freeze_fs(struct gfs2_sbd *sdp); 49extern int gfs2_freeze_fs(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 98d6ef1c1dc0..148d55c14171 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -1,12 +1,11 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM gfs2
3
1#if !defined(_TRACE_GFS2_H) || defined(TRACE_HEADER_MULTI_READ) 4#if !defined(_TRACE_GFS2_H) || defined(TRACE_HEADER_MULTI_READ)
2#define _TRACE_GFS2_H 5#define _TRACE_GFS2_H
3 6
4#include <linux/tracepoint.h> 7#include <linux/tracepoint.h>
5 8
6#undef TRACE_SYSTEM
7#define TRACE_SYSTEM gfs2
8#define TRACE_INCLUDE_FILE trace_gfs2
9
10#include <linux/fs.h> 9#include <linux/fs.h>
11#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
12#include <linux/dlmconstants.h> 11#include <linux/dlmconstants.h>
@@ -403,5 +402,6 @@ TRACE_EVENT(gfs2_block_alloc,
403/* This part must be outside protection */ 402/* This part must be outside protection */
404#undef TRACE_INCLUDE_PATH 403#undef TRACE_INCLUDE_PATH
405#define TRACE_INCLUDE_PATH . 404#define TRACE_INCLUDE_PATH .
405#define TRACE_INCLUDE_FILE trace_gfs2
406#include <trace/define_trace.h> 406#include <trace/define_trace.h>
407 407
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 6f833dc8e910..f7fcbe49da72 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -19,6 +19,7 @@
19#include <linux/nls.h> 19#include <linux/nls.h>
20#include <linux/parser.h> 20#include <linux/parser.h>
21#include <linux/seq_file.h> 21#include <linux/seq_file.h>
22#include <linux/smp_lock.h>
22#include <linux/vfs.h> 23#include <linux/vfs.h>
23 24
24#include "hfs_fs.h" 25#include "hfs_fs.h"
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 9fc3af0c0dab..c0759fe0855b 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -12,6 +12,7 @@
12#include <linux/pagemap.h> 12#include <linux/pagemap.h>
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/smp_lock.h>
15#include <linux/vfs.h> 16#include <linux/vfs.h>
16#include <linux/nls.h> 17#include <linux/nls.h>
17 18
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index fe02ad4740e7..032604e5ef2c 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -972,6 +972,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
972 sb->s_blocksize_bits = 10; 972 sb->s_blocksize_bits = 10;
973 sb->s_magic = HOSTFS_SUPER_MAGIC; 973 sb->s_magic = HOSTFS_SUPER_MAGIC;
974 sb->s_op = &hostfs_sbops; 974 sb->s_op = &hostfs_sbops;
975 sb->s_maxbytes = MAX_LFS_FILESIZE;
975 976
976 /* NULL is printed as <NULL> by sprintf: avoid that. */ 977 /* NULL is printed as <NULL> by sprintf: avoid that. */
977 if (req_root == NULL) 978 if (req_root == NULL)
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 6916c41d7017..8865c94f55f6 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -6,6 +6,7 @@
6 * directory VFS functions 6 * directory VFS functions
7 */ 7 */
8 8
9#include <linux/smp_lock.h>
9#include "hpfs_fn.h" 10#include "hpfs_fn.h"
10 11
11static int hpfs_dir_release(struct inode *inode, struct file *filp) 12static int hpfs_dir_release(struct inode *inode, struct file *filp)
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 64ab52259204..3efabff00367 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -6,6 +6,7 @@
6 * file VFS functions 6 * file VFS functions
7 */ 7 */
8 8
9#include <linux/smp_lock.h>
9#include "hpfs_fn.h" 10#include "hpfs_fn.h"
10 11
11#define BLOCKS(size) (((size) + 511) >> 9) 12#define BLOCKS(size) (((size) + 511) >> 9)
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index c2ea31bae313..701ca54c0867 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -13,7 +13,6 @@
13#include <linux/pagemap.h> 13#include <linux/pagemap.h>
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/smp_lock.h>
17 16
18#include "hpfs.h" 17#include "hpfs.h"
19 18
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 39a1bfbea312..fe703ae46bc7 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -6,6 +6,7 @@
6 * inode VFS functions 6 * inode VFS functions
7 */ 7 */
8 8
9#include <linux/smp_lock.h>
9#include "hpfs_fn.h" 10#include "hpfs_fn.h"
10 11
11void hpfs_init_inode(struct inode *i) 12void hpfs_init_inode(struct inode *i)
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index b649232dde97..82b9c4ba9ed0 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -6,6 +6,7 @@
6 * adding & removing files & directories 6 * adding & removing files & directories
7 */ 7 */
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/smp_lock.h>
9#include "hpfs_fn.h" 10#include "hpfs_fn.h"
10 11
11static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 12static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
diff --git a/fs/inode.c b/fs/inode.c
index 901bad1e5f12..ae7b67e48661 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -120,12 +120,11 @@ static void wake_up_inode(struct inode *inode)
120 * These are initializations that need to be done on every inode 120 * These are initializations that need to be done on every inode
121 * allocation as the fields are not initialised by slab allocation. 121 * allocation as the fields are not initialised by slab allocation.
122 */ 122 */
123struct inode *inode_init_always(struct super_block *sb, struct inode *inode) 123int inode_init_always(struct super_block *sb, struct inode *inode)
124{ 124{
125 static const struct address_space_operations empty_aops; 125 static const struct address_space_operations empty_aops;
126 static struct inode_operations empty_iops; 126 static struct inode_operations empty_iops;
127 static const struct file_operations empty_fops; 127 static const struct file_operations empty_fops;
128
129 struct address_space *const mapping = &inode->i_data; 128 struct address_space *const mapping = &inode->i_data;
130 129
131 inode->i_sb = sb; 130 inode->i_sb = sb;
@@ -152,7 +151,7 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
152 inode->dirtied_when = 0; 151 inode->dirtied_when = 0;
153 152
154 if (security_inode_alloc(inode)) 153 if (security_inode_alloc(inode))
155 goto out_free_inode; 154 goto out;
156 155
157 /* allocate and initialize an i_integrity */ 156 /* allocate and initialize an i_integrity */
158 if (ima_inode_alloc(inode)) 157 if (ima_inode_alloc(inode))
@@ -198,16 +197,12 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
198 inode->i_fsnotify_mask = 0; 197 inode->i_fsnotify_mask = 0;
199#endif 198#endif
200 199
201 return inode; 200 return 0;
202 201
203out_free_security: 202out_free_security:
204 security_inode_free(inode); 203 security_inode_free(inode);
205out_free_inode: 204out:
206 if (inode->i_sb->s_op->destroy_inode) 205 return -ENOMEM;
207 inode->i_sb->s_op->destroy_inode(inode);
208 else
209 kmem_cache_free(inode_cachep, (inode));
210 return NULL;
211} 206}
212EXPORT_SYMBOL(inode_init_always); 207EXPORT_SYMBOL(inode_init_always);
213 208
@@ -220,12 +215,21 @@ static struct inode *alloc_inode(struct super_block *sb)
220 else 215 else
221 inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL); 216 inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
222 217
223 if (inode) 218 if (!inode)
224 return inode_init_always(sb, inode); 219 return NULL;
225 return NULL; 220
221 if (unlikely(inode_init_always(sb, inode))) {
222 if (inode->i_sb->s_op->destroy_inode)
223 inode->i_sb->s_op->destroy_inode(inode);
224 else
225 kmem_cache_free(inode_cachep, inode);
226 return NULL;
227 }
228
229 return inode;
226} 230}
227 231
228void destroy_inode(struct inode *inode) 232void __destroy_inode(struct inode *inode)
229{ 233{
230 BUG_ON(inode_has_buffers(inode)); 234 BUG_ON(inode_has_buffers(inode));
231 ima_inode_free(inode); 235 ima_inode_free(inode);
@@ -237,13 +241,17 @@ void destroy_inode(struct inode *inode)
237 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) 241 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
238 posix_acl_release(inode->i_default_acl); 242 posix_acl_release(inode->i_default_acl);
239#endif 243#endif
244}
245EXPORT_SYMBOL(__destroy_inode);
246
247void destroy_inode(struct inode *inode)
248{
249 __destroy_inode(inode);
240 if (inode->i_sb->s_op->destroy_inode) 250 if (inode->i_sb->s_op->destroy_inode)
241 inode->i_sb->s_op->destroy_inode(inode); 251 inode->i_sb->s_op->destroy_inode(inode);
242 else 252 else
243 kmem_cache_free(inode_cachep, (inode)); 253 kmem_cache_free(inode_cachep, (inode));
244} 254}
245EXPORT_SYMBOL(destroy_inode);
246
247 255
248/* 256/*
249 * These are initializations that only need to be done 257 * These are initializations that only need to be done
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 58a7963e168a..85f96bc651c7 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -142,6 +142,7 @@ static const struct dentry_operations isofs_dentry_ops[] = {
142 142
143struct iso9660_options{ 143struct iso9660_options{
144 unsigned int rock:1; 144 unsigned int rock:1;
145 unsigned int joliet:1;
145 unsigned int cruft:1; 146 unsigned int cruft:1;
146 unsigned int hide:1; 147 unsigned int hide:1;
147 unsigned int showassoc:1; 148 unsigned int showassoc:1;
@@ -151,7 +152,6 @@ struct iso9660_options{
151 unsigned int gid_set:1; 152 unsigned int gid_set:1;
152 unsigned int utf8:1; 153 unsigned int utf8:1;
153 unsigned char map; 154 unsigned char map;
154 char joliet;
155 unsigned char check; 155 unsigned char check;
156 unsigned int blocksize; 156 unsigned int blocksize;
157 mode_t fmode; 157 mode_t fmode;
@@ -632,7 +632,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
632 else if (isonum_711(vdp->type) == ISO_VD_SUPPLEMENTARY) { 632 else if (isonum_711(vdp->type) == ISO_VD_SUPPLEMENTARY) {
633 sec = (struct iso_supplementary_descriptor *)vdp; 633 sec = (struct iso_supplementary_descriptor *)vdp;
634 if (sec->escape[0] == 0x25 && sec->escape[1] == 0x2f) { 634 if (sec->escape[0] == 0x25 && sec->escape[1] == 0x2f) {
635 if (opt.joliet == 'y') { 635 if (opt.joliet) {
636 if (sec->escape[2] == 0x40) 636 if (sec->escape[2] == 0x40)
637 joliet_level = 1; 637 joliet_level = 1;
638 else if (sec->escape[2] == 0x43) 638 else if (sec->escape[2] == 0x43)
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 737f7246a4b5..f96f85092d1c 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -287,6 +287,7 @@ int journal_write_metadata_buffer(transaction_t *transaction,
287 struct page *new_page; 287 struct page *new_page;
288 unsigned int new_offset; 288 unsigned int new_offset;
289 struct buffer_head *bh_in = jh2bh(jh_in); 289 struct buffer_head *bh_in = jh2bh(jh_in);
290 journal_t *journal = transaction->t_journal;
290 291
291 /* 292 /*
292 * The buffer really shouldn't be locked: only the current committing 293 * The buffer really shouldn't be locked: only the current committing
@@ -300,6 +301,11 @@ int journal_write_metadata_buffer(transaction_t *transaction,
300 J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); 301 J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
301 302
302 new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); 303 new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
304 /* keep subsequent assertions sane */
305 new_bh->b_state = 0;
306 init_buffer(new_bh, NULL, NULL);
307 atomic_set(&new_bh->b_count, 1);
308 new_jh = journal_add_journal_head(new_bh); /* This sleeps */
303 309
304 /* 310 /*
305 * If a new transaction has already done a buffer copy-out, then 311 * If a new transaction has already done a buffer copy-out, then
@@ -361,14 +367,6 @@ repeat:
361 kunmap_atomic(mapped_data, KM_USER0); 367 kunmap_atomic(mapped_data, KM_USER0);
362 } 368 }
363 369
364 /* keep subsequent assertions sane */
365 new_bh->b_state = 0;
366 init_buffer(new_bh, NULL, NULL);
367 atomic_set(&new_bh->b_count, 1);
368 jbd_unlock_bh_state(bh_in);
369
370 new_jh = journal_add_journal_head(new_bh); /* This sleeps */
371
372 set_bh_page(new_bh, new_page, new_offset); 370 set_bh_page(new_bh, new_page, new_offset);
373 new_jh->b_transaction = NULL; 371 new_jh->b_transaction = NULL;
374 new_bh->b_size = jh2bh(jh_in)->b_size; 372 new_bh->b_size = jh2bh(jh_in)->b_size;
@@ -385,7 +383,11 @@ repeat:
385 * copying is moved to the transaction's shadow queue. 383 * copying is moved to the transaction's shadow queue.
386 */ 384 */
387 JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); 385 JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
388 journal_file_buffer(jh_in, transaction, BJ_Shadow); 386 spin_lock(&journal->j_list_lock);
387 __journal_file_buffer(jh_in, transaction, BJ_Shadow);
388 spin_unlock(&journal->j_list_lock);
389 jbd_unlock_bh_state(bh_in);
390
389 JBUFFER_TRACE(new_jh, "file as BJ_IO"); 391 JBUFFER_TRACE(new_jh, "file as BJ_IO");
390 journal_file_buffer(new_jh, transaction, BJ_IO); 392 journal_file_buffer(new_jh, transaction, BJ_IO);
391 393
@@ -848,6 +850,12 @@ static int journal_reset(journal_t *journal)
848 850
849 first = be32_to_cpu(sb->s_first); 851 first = be32_to_cpu(sb->s_first);
850 last = be32_to_cpu(sb->s_maxlen); 852 last = be32_to_cpu(sb->s_maxlen);
853 if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) {
854 printk(KERN_ERR "JBD: Journal too short (blocks %lu-%lu).\n",
855 first, last);
856 journal_fail_superblock(journal);
857 return -EINVAL;
858 }
851 859
852 journal->j_first = first; 860 journal->j_first = first;
853 journal->j_last = last; 861 journal->j_last = last;
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 73242ba7c7b1..c03ac11f74be 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -489,34 +489,15 @@ void journal_unlock_updates (journal_t *journal)
489 wake_up(&journal->j_wait_transaction_locked); 489 wake_up(&journal->j_wait_transaction_locked);
490} 490}
491 491
492/* 492static void warn_dirty_buffer(struct buffer_head *bh)
493 * Report any unexpected dirty buffers which turn up. Normally those
494 * indicate an error, but they can occur if the user is running (say)
495 * tune2fs to modify the live filesystem, so we need the option of
496 * continuing as gracefully as possible. #
497 *
498 * The caller should already hold the journal lock and
499 * j_list_lock spinlock: most callers will need those anyway
500 * in order to probe the buffer's journaling state safely.
501 */
502static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
503{ 493{
504 int jlist; 494 char b[BDEVNAME_SIZE];
505
506 /* If this buffer is one which might reasonably be dirty
507 * --- ie. data, or not part of this journal --- then
508 * we're OK to leave it alone, but otherwise we need to
509 * move the dirty bit to the journal's own internal
510 * JBDDirty bit. */
511 jlist = jh->b_jlist;
512 495
513 if (jlist == BJ_Metadata || jlist == BJ_Reserved || 496 printk(KERN_WARNING
514 jlist == BJ_Shadow || jlist == BJ_Forget) { 497 "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
515 struct buffer_head *bh = jh2bh(jh); 498 "There's a risk of filesystem corruption in case of system "
516 499 "crash.\n",
517 if (test_clear_buffer_dirty(bh)) 500 bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
518 set_buffer_jbddirty(bh);
519 }
520} 501}
521 502
522/* 503/*
@@ -583,14 +564,16 @@ repeat:
583 if (jh->b_next_transaction) 564 if (jh->b_next_transaction)
584 J_ASSERT_JH(jh, jh->b_next_transaction == 565 J_ASSERT_JH(jh, jh->b_next_transaction ==
585 transaction); 566 transaction);
567 warn_dirty_buffer(bh);
586 } 568 }
587 /* 569 /*
588 * In any case we need to clean the dirty flag and we must 570 * In any case we need to clean the dirty flag and we must
589 * do it under the buffer lock to be sure we don't race 571 * do it under the buffer lock to be sure we don't race
590 * with running write-out. 572 * with running write-out.
591 */ 573 */
592 JBUFFER_TRACE(jh, "Unexpected dirty buffer"); 574 JBUFFER_TRACE(jh, "Journalling dirty buffer");
593 jbd_unexpected_dirty_buffer(jh); 575 clear_buffer_dirty(bh);
576 set_buffer_jbddirty(bh);
594 } 577 }
595 578
596 unlock_buffer(bh); 579 unlock_buffer(bh);
@@ -826,6 +809,15 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
826 J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); 809 J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
827 810
828 if (jh->b_transaction == NULL) { 811 if (jh->b_transaction == NULL) {
812 /*
813 * Previous journal_forget() could have left the buffer
814 * with jbddirty bit set because it was being committed. When
815 * the commit finished, we've filed the buffer for
816 * checkpointing and marked it dirty. Now we are reallocating
817 * the buffer so the transaction freeing it must have
818 * committed and so it's safe to clear the dirty bit.
819 */
820 clear_buffer_dirty(jh2bh(jh));
829 jh->b_transaction = transaction; 821 jh->b_transaction = transaction;
830 822
831 /* first access by this transaction */ 823 /* first access by this transaction */
@@ -1782,8 +1774,13 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1782 1774
1783 if (jh->b_cp_transaction) { 1775 if (jh->b_cp_transaction) {
1784 JBUFFER_TRACE(jh, "on running+cp transaction"); 1776 JBUFFER_TRACE(jh, "on running+cp transaction");
1777 /*
1778 * We don't want to write the buffer anymore, clear the
1779 * bit so that we don't confuse checks in
1780 * __journal_file_buffer
1781 */
1782 clear_buffer_dirty(bh);
1785 __journal_file_buffer(jh, transaction, BJ_Forget); 1783 __journal_file_buffer(jh, transaction, BJ_Forget);
1786 clear_buffer_jbddirty(bh);
1787 may_free = 0; 1784 may_free = 0;
1788 } else { 1785 } else {
1789 JBUFFER_TRACE(jh, "on running transaction"); 1786 JBUFFER_TRACE(jh, "on running transaction");
@@ -2041,12 +2038,17 @@ void __journal_file_buffer(struct journal_head *jh,
2041 if (jh->b_transaction && jh->b_jlist == jlist) 2038 if (jh->b_transaction && jh->b_jlist == jlist)
2042 return; 2039 return;
2043 2040
2044 /* The following list of buffer states needs to be consistent
2045 * with __jbd_unexpected_dirty_buffer()'s handling of dirty
2046 * state. */
2047
2048 if (jlist == BJ_Metadata || jlist == BJ_Reserved || 2041 if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
2049 jlist == BJ_Shadow || jlist == BJ_Forget) { 2042 jlist == BJ_Shadow || jlist == BJ_Forget) {
2043 /*
2044 * For metadata buffers, we track dirty bit in buffer_jbddirty
2045 * instead of buffer_dirty. We should not see a dirty bit set
2046 * here because we clear it in do_get_write_access but e.g.
2047 * tune2fs can modify the sb and set the dirty bit at any time
2048 * so we try to gracefully handle that.
2049 */
2050 if (buffer_dirty(bh))
2051 warn_dirty_buffer(bh);
2050 if (test_clear_buffer_dirty(bh) || 2052 if (test_clear_buffer_dirty(bh) ||
2051 test_clear_buffer_jbddirty(bh)) 2053 test_clear_buffer_jbddirty(bh))
2052 was_dirty = 1; 2054 was_dirty = 1;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 18bfd5dab642..e378cb383979 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -297,6 +297,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
297 unsigned int new_offset; 297 unsigned int new_offset;
298 struct buffer_head *bh_in = jh2bh(jh_in); 298 struct buffer_head *bh_in = jh2bh(jh_in);
299 struct jbd2_buffer_trigger_type *triggers; 299 struct jbd2_buffer_trigger_type *triggers;
300 journal_t *journal = transaction->t_journal;
300 301
301 /* 302 /*
302 * The buffer really shouldn't be locked: only the current committing 303 * The buffer really shouldn't be locked: only the current committing
@@ -310,6 +311,11 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
310 J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); 311 J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
311 312
312 new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); 313 new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
314 /* keep subsequent assertions sane */
315 new_bh->b_state = 0;
316 init_buffer(new_bh, NULL, NULL);
317 atomic_set(&new_bh->b_count, 1);
318 new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */
313 319
314 /* 320 /*
315 * If a new transaction has already done a buffer copy-out, then 321 * If a new transaction has already done a buffer copy-out, then
@@ -388,14 +394,6 @@ repeat:
388 kunmap_atomic(mapped_data, KM_USER0); 394 kunmap_atomic(mapped_data, KM_USER0);
389 } 395 }
390 396
391 /* keep subsequent assertions sane */
392 new_bh->b_state = 0;
393 init_buffer(new_bh, NULL, NULL);
394 atomic_set(&new_bh->b_count, 1);
395 jbd_unlock_bh_state(bh_in);
396
397 new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */
398
399 set_bh_page(new_bh, new_page, new_offset); 397 set_bh_page(new_bh, new_page, new_offset);
400 new_jh->b_transaction = NULL; 398 new_jh->b_transaction = NULL;
401 new_bh->b_size = jh2bh(jh_in)->b_size; 399 new_bh->b_size = jh2bh(jh_in)->b_size;
@@ -412,7 +410,11 @@ repeat:
412 * copying is moved to the transaction's shadow queue. 410 * copying is moved to the transaction's shadow queue.
413 */ 411 */
414 JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); 412 JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
415 jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); 413 spin_lock(&journal->j_list_lock);
414 __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
415 spin_unlock(&journal->j_list_lock);
416 jbd_unlock_bh_state(bh_in);
417
416 JBUFFER_TRACE(new_jh, "file as BJ_IO"); 418 JBUFFER_TRACE(new_jh, "file as BJ_IO");
417 jbd2_journal_file_buffer(new_jh, transaction, BJ_IO); 419 jbd2_journal_file_buffer(new_jh, transaction, BJ_IO);
418 420
@@ -2410,6 +2412,7 @@ const char *jbd2_dev_to_name(dev_t device)
2410 int i = hash_32(device, CACHE_SIZE_BITS); 2412 int i = hash_32(device, CACHE_SIZE_BITS);
2411 char *ret; 2413 char *ret;
2412 struct block_device *bd; 2414 struct block_device *bd;
2415 static struct devname_cache *new_dev;
2413 2416
2414 rcu_read_lock(); 2417 rcu_read_lock();
2415 if (devcache[i] && devcache[i]->device == device) { 2418 if (devcache[i] && devcache[i]->device == device) {
@@ -2419,20 +2422,20 @@ const char *jbd2_dev_to_name(dev_t device)
2419 } 2422 }
2420 rcu_read_unlock(); 2423 rcu_read_unlock();
2421 2424
2425 new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
2426 if (!new_dev)
2427 return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
2422 spin_lock(&devname_cache_lock); 2428 spin_lock(&devname_cache_lock);
2423 if (devcache[i]) { 2429 if (devcache[i]) {
2424 if (devcache[i]->device == device) { 2430 if (devcache[i]->device == device) {
2431 kfree(new_dev);
2425 ret = devcache[i]->devname; 2432 ret = devcache[i]->devname;
2426 spin_unlock(&devname_cache_lock); 2433 spin_unlock(&devname_cache_lock);
2427 return ret; 2434 return ret;
2428 } 2435 }
2429 call_rcu(&devcache[i]->rcu, free_devcache); 2436 call_rcu(&devcache[i]->rcu, free_devcache);
2430 } 2437 }
2431 devcache[i] = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); 2438 devcache[i] = new_dev;
2432 if (!devcache[i]) {
2433 spin_unlock(&devname_cache_lock);
2434 return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
2435 }
2436 devcache[i]->device = device; 2439 devcache[i]->device = device;
2437 bd = bdget(device); 2440 bd = bdget(device);
2438 if (bd) { 2441 if (bd) {
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 494501edba6b..6213ac728f30 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -499,34 +499,15 @@ void jbd2_journal_unlock_updates (journal_t *journal)
499 wake_up(&journal->j_wait_transaction_locked); 499 wake_up(&journal->j_wait_transaction_locked);
500} 500}
501 501
502/* 502static void warn_dirty_buffer(struct buffer_head *bh)
503 * Report any unexpected dirty buffers which turn up. Normally those
504 * indicate an error, but they can occur if the user is running (say)
505 * tune2fs to modify the live filesystem, so we need the option of
506 * continuing as gracefully as possible. #
507 *
508 * The caller should already hold the journal lock and
509 * j_list_lock spinlock: most callers will need those anyway
510 * in order to probe the buffer's journaling state safely.
511 */
512static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
513{ 503{
514 int jlist; 504 char b[BDEVNAME_SIZE];
515
516 /* If this buffer is one which might reasonably be dirty
517 * --- ie. data, or not part of this journal --- then
518 * we're OK to leave it alone, but otherwise we need to
519 * move the dirty bit to the journal's own internal
520 * JBDDirty bit. */
521 jlist = jh->b_jlist;
522 505
523 if (jlist == BJ_Metadata || jlist == BJ_Reserved || 506 printk(KERN_WARNING
524 jlist == BJ_Shadow || jlist == BJ_Forget) { 507 "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
525 struct buffer_head *bh = jh2bh(jh); 508 "There's a risk of filesystem corruption in case of system "
526 509 "crash.\n",
527 if (test_clear_buffer_dirty(bh)) 510 bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
528 set_buffer_jbddirty(bh);
529 }
530} 511}
531 512
532/* 513/*
@@ -593,14 +574,16 @@ repeat:
593 if (jh->b_next_transaction) 574 if (jh->b_next_transaction)
594 J_ASSERT_JH(jh, jh->b_next_transaction == 575 J_ASSERT_JH(jh, jh->b_next_transaction ==
595 transaction); 576 transaction);
577 warn_dirty_buffer(bh);
596 } 578 }
597 /* 579 /*
598 * In any case we need to clean the dirty flag and we must 580 * In any case we need to clean the dirty flag and we must
599 * do it under the buffer lock to be sure we don't race 581 * do it under the buffer lock to be sure we don't race
600 * with running write-out. 582 * with running write-out.
601 */ 583 */
602 JBUFFER_TRACE(jh, "Unexpected dirty buffer"); 584 JBUFFER_TRACE(jh, "Journalling dirty buffer");
603 jbd_unexpected_dirty_buffer(jh); 585 clear_buffer_dirty(bh);
586 set_buffer_jbddirty(bh);
604 } 587 }
605 588
606 unlock_buffer(bh); 589 unlock_buffer(bh);
@@ -843,6 +826,15 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
843 J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); 826 J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
844 827
845 if (jh->b_transaction == NULL) { 828 if (jh->b_transaction == NULL) {
829 /*
830 * Previous jbd2_journal_forget() could have left the buffer
831 * with jbddirty bit set because it was being committed. When
832 * the commit finished, we've filed the buffer for
833 * checkpointing and marked it dirty. Now we are reallocating
834 * the buffer so the transaction freeing it must have
835 * committed and so it's safe to clear the dirty bit.
836 */
837 clear_buffer_dirty(jh2bh(jh));
846 jh->b_transaction = transaction; 838 jh->b_transaction = transaction;
847 839
848 /* first access by this transaction */ 840 /* first access by this transaction */
@@ -1644,8 +1636,13 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1644 1636
1645 if (jh->b_cp_transaction) { 1637 if (jh->b_cp_transaction) {
1646 JBUFFER_TRACE(jh, "on running+cp transaction"); 1638 JBUFFER_TRACE(jh, "on running+cp transaction");
1639 /*
1640 * We don't want to write the buffer anymore, clear the
1641 * bit so that we don't confuse checks in
1642 * __journal_file_buffer
1643 */
1644 clear_buffer_dirty(bh);
1647 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); 1645 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
1648 clear_buffer_jbddirty(bh);
1649 may_free = 0; 1646 may_free = 0;
1650 } else { 1647 } else {
1651 JBUFFER_TRACE(jh, "on running transaction"); 1648 JBUFFER_TRACE(jh, "on running transaction");
@@ -1896,12 +1893,17 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
1896 if (jh->b_transaction && jh->b_jlist == jlist) 1893 if (jh->b_transaction && jh->b_jlist == jlist)
1897 return; 1894 return;
1898 1895
1899 /* The following list of buffer states needs to be consistent
1900 * with __jbd_unexpected_dirty_buffer()'s handling of dirty
1901 * state. */
1902
1903 if (jlist == BJ_Metadata || jlist == BJ_Reserved || 1896 if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
1904 jlist == BJ_Shadow || jlist == BJ_Forget) { 1897 jlist == BJ_Shadow || jlist == BJ_Forget) {
1898 /*
1899 * For metadata buffers, we track dirty bit in buffer_jbddirty
1900 * instead of buffer_dirty. We should not see a dirty bit set
1901 * here because we clear it in do_get_write_access but e.g.
1902 * tune2fs can modify the sb and set the dirty bit at any time
1903 * so we try to gracefully handle that.
1904 */
1905 if (buffer_dirty(bh))
1906 warn_dirty_buffer(bh);
1905 if (test_clear_buffer_dirty(bh) || 1907 if (test_clear_buffer_dirty(bh) ||
1906 test_clear_buffer_jbddirty(bh)) 1908 test_clear_buffer_jbddirty(bh))
1907 was_dirty = 1; 1909 was_dirty = 1;
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index a0244740b75a..b47679be118a 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -270,19 +270,21 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
270 D2({ 270 D2({
271 int i=0; 271 int i=0;
272 struct jffs2_raw_node_ref *this; 272 struct jffs2_raw_node_ref *this;
273 printk(KERN_DEBUG "After remove_node_refs_from_ino_list: \n" KERN_DEBUG); 273 printk(KERN_DEBUG "After remove_node_refs_from_ino_list: \n");
274 274
275 this = ic->nodes; 275 this = ic->nodes;
276 276
277 printk(KERN_DEBUG);
277 while(this) { 278 while(this) {
278 printk( "0x%08x(%d)->", ref_offset(this), ref_flags(this)); 279 printk(KERN_CONT "0x%08x(%d)->",
280 ref_offset(this), ref_flags(this));
279 if (++i == 5) { 281 if (++i == 5) {
280 printk("\n" KERN_DEBUG); 282 printk(KERN_DEBUG);
281 i=0; 283 i=0;
282 } 284 }
283 this = this->next_in_ino; 285 this = this->next_in_ino;
284 } 286 }
285 printk("\n"); 287 printk(KERN_CONT "\n");
286 }); 288 });
287 289
288 switch (ic->class) { 290 switch (ic->class) {
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 5edc2bf20581..23c947539864 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -99,7 +99,7 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg)
99 kunmap(pg); 99 kunmap(pg);
100 100
101 D2(printk(KERN_DEBUG "readpage finished\n")); 101 D2(printk(KERN_DEBUG "readpage finished\n"));
102 return 0; 102 return ret;
103} 103}
104 104
105int jffs2_do_readpage_unlock(struct inode *inode, struct page *pg) 105int jffs2_do_readpage_unlock(struct inode *inode, struct page *pg)
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 7515e73e2bfb..696686cc206e 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -130,9 +130,9 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
130 if (jffs2_sum_active()) { 130 if (jffs2_sum_active()) {
131 s = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL); 131 s = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL);
132 if (!s) { 132 if (!s) {
133 kfree(flashbuf);
134 JFFS2_WARNING("Can't allocate memory for summary\n"); 133 JFFS2_WARNING("Can't allocate memory for summary\n");
135 return -ENOMEM; 134 ret = -ENOMEM;
135 goto out;
136 } 136 }
137 } 137 }
138 138
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 07a22caf2687..0035c021395a 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -12,6 +12,7 @@
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/smp_lock.h>
15#include <linux/init.h> 16#include <linux/init.h>
16#include <linux/list.h> 17#include <linux/list.h>
17#include <linux/fs.h> 18#include <linux/fs.h>
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 91fa3ad6e8c2..a29c7c3e3fb8 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -67,10 +67,8 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
67 acl = posix_acl_from_xattr(value, size); 67 acl = posix_acl_from_xattr(value, size);
68 } 68 }
69 kfree(value); 69 kfree(value);
70 if (!IS_ERR(acl)) { 70 if (!IS_ERR(acl))
71 set_cached_acl(inode, type, acl); 71 set_cached_acl(inode, type, acl);
72 posix_acl_release(acl);
73 }
74 return acl; 72 return acl;
75} 73}
76 74
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index f2fdcbce143e..4336adba952a 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -7,6 +7,7 @@
7 */ 7 */
8 8
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/smp_lock.h>
10#include <linux/types.h> 11#include <linux/types.h>
11#include <linux/errno.h> 12#include <linux/errno.h>
12#include <linux/fs.h> 13#include <linux/fs.h>
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 99d737bd4325..7cb076ac6b45 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -87,18 +87,6 @@ static unsigned int nlm_hash_address(const struct sockaddr *sap)
87 return hash & (NLM_HOST_NRHASH - 1); 87 return hash & (NLM_HOST_NRHASH - 1);
88} 88}
89 89
90static void nlm_clear_port(struct sockaddr *sap)
91{
92 switch (sap->sa_family) {
93 case AF_INET:
94 ((struct sockaddr_in *)sap)->sin_port = 0;
95 break;
96 case AF_INET6:
97 ((struct sockaddr_in6 *)sap)->sin6_port = 0;
98 break;
99 }
100}
101
102/* 90/*
103 * Common host lookup routine for server & client 91 * Common host lookup routine for server & client
104 */ 92 */
@@ -177,7 +165,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
177 host->h_addrbuf = nsm->sm_addrbuf; 165 host->h_addrbuf = nsm->sm_addrbuf;
178 memcpy(nlm_addr(host), ni->sap, ni->salen); 166 memcpy(nlm_addr(host), ni->sap, ni->salen);
179 host->h_addrlen = ni->salen; 167 host->h_addrlen = ni->salen;
180 nlm_clear_port(nlm_addr(host)); 168 rpc_set_port(nlm_addr(host), 0);
181 memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len); 169 memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len);
182 host->h_version = ni->version; 170 host->h_version = ni->version;
183 host->h_proto = ni->protocol; 171 host->h_proto = ni->protocol;
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 7fce1b525849..30c933188dd7 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -61,43 +61,6 @@ static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm)
61 return (struct sockaddr *)&nsm->sm_addr; 61 return (struct sockaddr *)&nsm->sm_addr;
62} 62}
63 63
64static void nsm_display_ipv4_address(const struct sockaddr *sap, char *buf,
65 const size_t len)
66{
67 const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
68 snprintf(buf, len, "%pI4", &sin->sin_addr.s_addr);
69}
70
71static void nsm_display_ipv6_address(const struct sockaddr *sap, char *buf,
72 const size_t len)
73{
74 const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
75
76 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
77 snprintf(buf, len, "%pI4", &sin6->sin6_addr.s6_addr32[3]);
78 else if (sin6->sin6_scope_id != 0)
79 snprintf(buf, len, "%pI6%%%u", &sin6->sin6_addr,
80 sin6->sin6_scope_id);
81 else
82 snprintf(buf, len, "%pI6", &sin6->sin6_addr);
83}
84
85static void nsm_display_address(const struct sockaddr *sap,
86 char *buf, const size_t len)
87{
88 switch (sap->sa_family) {
89 case AF_INET:
90 nsm_display_ipv4_address(sap, buf, len);
91 break;
92 case AF_INET6:
93 nsm_display_ipv6_address(sap, buf, len);
94 break;
95 default:
96 snprintf(buf, len, "unsupported address family");
97 break;
98 }
99}
100
101static struct rpc_clnt *nsm_create(void) 64static struct rpc_clnt *nsm_create(void)
102{ 65{
103 struct sockaddr_in sin = { 66 struct sockaddr_in sin = {
@@ -307,8 +270,11 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
307 memcpy(nsm_addr(new), sap, salen); 270 memcpy(nsm_addr(new), sap, salen);
308 new->sm_addrlen = salen; 271 new->sm_addrlen = salen;
309 nsm_init_private(new); 272 nsm_init_private(new);
310 nsm_display_address((const struct sockaddr *)&new->sm_addr, 273
311 new->sm_addrbuf, sizeof(new->sm_addrbuf)); 274 if (rpc_ntop(nsm_addr(new), new->sm_addrbuf,
275 sizeof(new->sm_addrbuf)) == 0)
276 (void)snprintf(new->sm_addrbuf, sizeof(new->sm_addrbuf),
277 "unsupported address family");
312 memcpy(new->sm_name, hostname, hostname_len); 278 memcpy(new->sm_name, hostname, hostname_len);
313 new->sm_name[hostname_len] = '\0'; 279 new->sm_name[hostname_len] = '\0';
314 280
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 1725037374c5..bd173a6ca3b1 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -10,6 +10,7 @@
10#include <linux/types.h> 10#include <linux/types.h>
11#include <linux/time.h> 11#include <linux/time.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/smp_lock.h>
13#include <linux/in.h> 14#include <linux/in.h>
14#include <linux/sunrpc/svc.h> 15#include <linux/sunrpc/svc.h>
15#include <linux/sunrpc/clnt.h> 16#include <linux/sunrpc/clnt.h>
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 3688e55901fc..e1d28ddd2169 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -10,6 +10,7 @@
10#include <linux/types.h> 10#include <linux/types.h>
11#include <linux/time.h> 11#include <linux/time.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/smp_lock.h>
13#include <linux/in.h> 14#include <linux/in.h>
14#include <linux/sunrpc/svc.h> 15#include <linux/sunrpc/svc.h>
15#include <linux/sunrpc/clnt.h> 16#include <linux/sunrpc/clnt.h>
diff --git a/fs/namei.c b/fs/namei.c
index 5b961eb71cbf..f3c5b278895a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1761,6 +1761,10 @@ do_last:
1761 goto exit; 1761 goto exit;
1762 } 1762 }
1763 filp = nameidata_to_filp(&nd, open_flag); 1763 filp = nameidata_to_filp(&nd, open_flag);
1764 if (IS_ERR(filp))
1765 ima_counts_put(&nd.path,
1766 acc_mode & (MAY_READ | MAY_WRITE |
1767 MAY_EXEC));
1764 mnt_drop_write(nd.path.mnt); 1768 mnt_drop_write(nd.path.mnt);
1765 if (nd.root.mnt) 1769 if (nd.root.mnt)
1766 path_put(&nd.root); 1770 path_put(&nd.root);
@@ -1817,6 +1821,9 @@ ok:
1817 goto exit; 1821 goto exit;
1818 } 1822 }
1819 filp = nameidata_to_filp(&nd, open_flag); 1823 filp = nameidata_to_filp(&nd, open_flag);
1824 if (IS_ERR(filp))
1825 ima_counts_put(&nd.path,
1826 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));
1820 /* 1827 /*
1821 * It is now safe to drop the mnt write 1828 * It is now safe to drop the mnt write
1822 * because the filp has had a write taken 1829 * because the filp has had a write taken
diff --git a/fs/namespace.c b/fs/namespace.c
index 3dc283fd4716..7230787d18b0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -22,6 +22,7 @@
22#include <linux/seq_file.h> 22#include <linux/seq_file.h>
23#include <linux/mnt_namespace.h> 23#include <linux/mnt_namespace.h>
24#include <linux/namei.h> 24#include <linux/namei.h>
25#include <linux/nsproxy.h>
25#include <linux/security.h> 26#include <linux/security.h>
26#include <linux/mount.h> 27#include <linux/mount.h>
27#include <linux/ramfs.h> 28#include <linux/ramfs.h>
@@ -315,7 +316,8 @@ EXPORT_SYMBOL_GPL(mnt_clone_write);
315 */ 316 */
316int mnt_want_write_file(struct file *file) 317int mnt_want_write_file(struct file *file)
317{ 318{
318 if (!(file->f_mode & FMODE_WRITE)) 319 struct inode *inode = file->f_dentry->d_inode;
320 if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
319 return mnt_want_write(file->f_path.mnt); 321 return mnt_want_write(file->f_path.mnt);
320 else 322 else
321 return mnt_clone_write(file->f_path.mnt); 323 return mnt_clone_write(file->f_path.mnt);
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 845159814de2..da7fda639eac 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -6,7 +6,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o
6 6
7nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ 7nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
8 direct.o pagelist.o proc.o read.o symlink.o unlink.o \ 8 direct.o pagelist.o proc.o read.o symlink.o unlink.o \
9 write.o namespace.o mount_clnt.o 9 write.o namespace.o mount_clnt.o \
10 dns_resolve.o cache_lib.o
10nfs-$(CONFIG_ROOT_NFS) += nfsroot.o 11nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
11nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o 12nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
12nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o 13nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c
new file mode 100644
index 000000000000..b4ffd0146ea6
--- /dev/null
+++ b/fs/nfs/cache_lib.c
@@ -0,0 +1,140 @@
1/*
2 * linux/fs/nfs/cache_lib.c
3 *
4 * Helper routines for the NFS client caches
5 *
6 * Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com>
7 */
8#include <linux/kmod.h>
9#include <linux/module.h>
10#include <linux/moduleparam.h>
11#include <linux/mount.h>
12#include <linux/namei.h>
13#include <linux/sunrpc/cache.h>
14#include <linux/sunrpc/rpc_pipe_fs.h>
15
16#include "cache_lib.h"
17
18#define NFS_CACHE_UPCALL_PATHLEN 256
19#define NFS_CACHE_UPCALL_TIMEOUT 15
20
21static char nfs_cache_getent_prog[NFS_CACHE_UPCALL_PATHLEN] =
22 "/sbin/nfs_cache_getent";
23static unsigned long nfs_cache_getent_timeout = NFS_CACHE_UPCALL_TIMEOUT;
24
25module_param_string(cache_getent, nfs_cache_getent_prog,
26 sizeof(nfs_cache_getent_prog), 0600);
27MODULE_PARM_DESC(cache_getent, "Path to the client cache upcall program");
28module_param_named(cache_getent_timeout, nfs_cache_getent_timeout, ulong, 0600);
29MODULE_PARM_DESC(cache_getent_timeout, "Timeout (in seconds) after which "
30 "the cache upcall is assumed to have failed");
31
32int nfs_cache_upcall(struct cache_detail *cd, char *entry_name)
33{
34 static char *envp[] = { "HOME=/",
35 "TERM=linux",
36 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
37 NULL
38 };
39 char *argv[] = {
40 nfs_cache_getent_prog,
41 cd->name,
42 entry_name,
43 NULL
44 };
45 int ret = -EACCES;
46
47 if (nfs_cache_getent_prog[0] == '\0')
48 goto out;
49 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
50 /*
51 * Disable the upcall mechanism if we're getting an ENOENT or
52 * EACCES error. The admin can re-enable it on the fly by using
53 * sysfs to set the 'cache_getent' parameter once the problem
54 * has been fixed.
55 */
56 if (ret == -ENOENT || ret == -EACCES)
57 nfs_cache_getent_prog[0] = '\0';
58out:
59 return ret > 0 ? 0 : ret;
60}
61
62/*
63 * Deferred request handling
64 */
65void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq)
66{
67 if (atomic_dec_and_test(&dreq->count))
68 kfree(dreq);
69}
70
71static void nfs_dns_cache_revisit(struct cache_deferred_req *d, int toomany)
72{
73 struct nfs_cache_defer_req *dreq;
74
75 dreq = container_of(d, struct nfs_cache_defer_req, deferred_req);
76
77 complete_all(&dreq->completion);
78 nfs_cache_defer_req_put(dreq);
79}
80
81static struct cache_deferred_req *nfs_dns_cache_defer(struct cache_req *req)
82{
83 struct nfs_cache_defer_req *dreq;
84
85 dreq = container_of(req, struct nfs_cache_defer_req, req);
86 dreq->deferred_req.revisit = nfs_dns_cache_revisit;
87 atomic_inc(&dreq->count);
88
89 return &dreq->deferred_req;
90}
91
92struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void)
93{
94 struct nfs_cache_defer_req *dreq;
95
96 dreq = kzalloc(sizeof(*dreq), GFP_KERNEL);
97 if (dreq) {
98 init_completion(&dreq->completion);
99 atomic_set(&dreq->count, 1);
100 dreq->req.defer = nfs_dns_cache_defer;
101 }
102 return dreq;
103}
104
105int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq)
106{
107 if (wait_for_completion_timeout(&dreq->completion,
108 nfs_cache_getent_timeout * HZ) == 0)
109 return -ETIMEDOUT;
110 return 0;
111}
112
113int nfs_cache_register(struct cache_detail *cd)
114{
115 struct nameidata nd;
116 struct vfsmount *mnt;
117 int ret;
118
119 mnt = rpc_get_mount();
120 if (IS_ERR(mnt))
121 return PTR_ERR(mnt);
122 ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &nd);
123 if (ret)
124 goto err;
125 ret = sunrpc_cache_register_pipefs(nd.path.dentry,
126 cd->name, 0600, cd);
127 path_put(&nd.path);
128 if (!ret)
129 return ret;
130err:
131 rpc_put_mount();
132 return ret;
133}
134
135void nfs_cache_unregister(struct cache_detail *cd)
136{
137 sunrpc_cache_unregister_pipefs(cd);
138 rpc_put_mount();
139}
140
diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h
new file mode 100644
index 000000000000..76f856e284e4
--- /dev/null
+++ b/fs/nfs/cache_lib.h
@@ -0,0 +1,27 @@
1/*
2 * Helper routines for the NFS client caches
3 *
4 * Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com>
5 */
6
7#include <linux/completion.h>
8#include <linux/sunrpc/cache.h>
9#include <asm/atomic.h>
10
11/*
12 * Deferred request handling
13 */
14struct nfs_cache_defer_req {
15 struct cache_req req;
16 struct cache_deferred_req deferred_req;
17 struct completion completion;
18 atomic_t count;
19};
20
21extern int nfs_cache_upcall(struct cache_detail *cd, char *entry_name);
22extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void);
23extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq);
24extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq);
25
26extern int nfs_cache_register(struct cache_detail *cd);
27extern void nfs_cache_unregister(struct cache_detail *cd);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 7f604c7941fb..293fa0528a6e 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -43,21 +43,29 @@ static struct svc_program nfs4_callback_program;
43unsigned int nfs_callback_set_tcpport; 43unsigned int nfs_callback_set_tcpport;
44unsigned short nfs_callback_tcpport; 44unsigned short nfs_callback_tcpport;
45unsigned short nfs_callback_tcpport6; 45unsigned short nfs_callback_tcpport6;
46static const int nfs_set_port_min = 0; 46#define NFS_CALLBACK_MAXPORTNR (65535U)
47static const int nfs_set_port_max = 65535;
48 47
49static int param_set_port(const char *val, struct kernel_param *kp) 48static int param_set_portnr(const char *val, struct kernel_param *kp)
50{ 49{
51 char *endp; 50 unsigned long num;
52 int num = simple_strtol(val, &endp, 0); 51 int ret;
53 if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max) 52
53 if (!val)
54 return -EINVAL;
55 ret = strict_strtoul(val, 0, &num);
56 if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR)
54 return -EINVAL; 57 return -EINVAL;
55 *((int *)kp->arg) = num; 58 *((unsigned int *)kp->arg) = num;
56 return 0; 59 return 0;
57} 60}
58 61
59module_param_call(callback_tcpport, param_set_port, param_get_int, 62static int param_get_portnr(char *buffer, struct kernel_param *kp)
60 &nfs_callback_set_tcpport, 0644); 63{
64 return param_get_uint(buffer, kp);
65}
66#define param_check_portnr(name, p) __param_check(name, p, unsigned int);
67
68module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644);
61 69
62/* 70/*
63 * This is the NFSv4 callback kernel thread. 71 * This is the NFSv4 callback kernel thread.
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index c2d061675d80..d36925f9b952 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -809,6 +809,9 @@ static int nfs_init_server(struct nfs_server *server,
809 /* Initialise the client representation from the mount data */ 809 /* Initialise the client representation from the mount data */
810 server->flags = data->flags; 810 server->flags = data->flags;
811 server->options = data->options; 811 server->options = data->options;
812 server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
813 NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
814 NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME;
812 815
813 if (data->rsize) 816 if (data->rsize)
814 server->rsize = nfs_block_size(data->rsize, NULL); 817 server->rsize = nfs_block_size(data->rsize, NULL);
@@ -1074,10 +1077,6 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
1074 (unsigned long long) server->fsid.major, 1077 (unsigned long long) server->fsid.major,
1075 (unsigned long long) server->fsid.minor); 1078 (unsigned long long) server->fsid.minor);
1076 1079
1077 BUG_ON(!server->nfs_client);
1078 BUG_ON(!server->nfs_client->rpc_ops);
1079 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1080
1081 spin_lock(&nfs_client_lock); 1080 spin_lock(&nfs_client_lock);
1082 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); 1081 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1083 list_add_tail(&server->master_link, &nfs_volume_list); 1082 list_add_tail(&server->master_link, &nfs_volume_list);
@@ -1242,20 +1241,6 @@ error:
1242 return error; 1241 return error;
1243} 1242}
1244 1243
1245/*
1246 * Initialize a session.
1247 * Note: save the mount rsize and wsize for create_server negotiation.
1248 */
1249static void nfs4_init_session(struct nfs_client *clp,
1250 unsigned int wsize, unsigned int rsize)
1251{
1252#if defined(CONFIG_NFS_V4_1)
1253 if (nfs4_has_session(clp)) {
1254 clp->cl_session->fc_attrs.max_rqst_sz = wsize;
1255 clp->cl_session->fc_attrs.max_resp_sz = rsize;
1256 }
1257#endif /* CONFIG_NFS_V4_1 */
1258}
1259 1244
1260/* 1245/*
1261 * Session has been established, and the client marked ready. 1246 * Session has been established, and the client marked ready.
@@ -1288,7 +1273,7 @@ static int nfs4_init_server(struct nfs_server *server,
1288 1273
1289 /* Initialise the client representation from the mount data */ 1274 /* Initialise the client representation from the mount data */
1290 server->flags = data->flags; 1275 server->flags = data->flags;
1291 server->caps |= NFS_CAP_ATOMIC_OPEN; 1276 server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR;
1292 server->options = data->options; 1277 server->options = data->options;
1293 1278
1294 /* Get a client record */ 1279 /* Get a client record */
@@ -1350,7 +1335,9 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1350 BUG_ON(!server->nfs_client->rpc_ops); 1335 BUG_ON(!server->nfs_client->rpc_ops);
1351 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); 1336 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1352 1337
1353 nfs4_init_session(server->nfs_client, server->wsize, server->rsize); 1338 error = nfs4_init_session(server);
1339 if (error < 0)
1340 goto error;
1354 1341
1355 /* Probe the root fh to retrieve its FSID */ 1342 /* Probe the root fh to retrieve its FSID */
1356 error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path); 1343 error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path);
@@ -1371,10 +1358,6 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1371 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) 1358 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
1372 server->namelen = NFS4_MAXNAMLEN; 1359 server->namelen = NFS4_MAXNAMLEN;
1373 1360
1374 BUG_ON(!server->nfs_client);
1375 BUG_ON(!server->nfs_client->rpc_ops);
1376 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1377
1378 spin_lock(&nfs_client_lock); 1361 spin_lock(&nfs_client_lock);
1379 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); 1362 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1380 list_add_tail(&server->master_link, &nfs_volume_list); 1363 list_add_tail(&server->master_link, &nfs_volume_list);
@@ -1412,7 +1395,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1412 1395
1413 /* Initialise the client representation from the parent server */ 1396 /* Initialise the client representation from the parent server */
1414 nfs_server_copy_userdata(server, parent_server); 1397 nfs_server_copy_userdata(server, parent_server);
1415 server->caps |= NFS_CAP_ATOMIC_OPEN; 1398 server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR;
1416 1399
1417 /* Get a client representation. 1400 /* Get a client representation.
1418 * Note: NFSv4 always uses TCP, */ 1401 * Note: NFSv4 always uses TCP, */
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index af05b918cb5b..6dd48a4405b4 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -10,6 +10,7 @@
10#include <linux/kthread.h> 10#include <linux/kthread.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/smp_lock.h>
13#include <linux/spinlock.h> 14#include <linux/spinlock.h>
14 15
15#include <linux/nfs4.h> 16#include <linux/nfs4.h>
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 89f98e9a024b..32062c33c859 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -29,7 +29,6 @@
29#include <linux/nfs_fs.h> 29#include <linux/nfs_fs.h>
30#include <linux/nfs_mount.h> 30#include <linux/nfs_mount.h>
31#include <linux/pagemap.h> 31#include <linux/pagemap.h>
32#include <linux/smp_lock.h>
33#include <linux/pagevec.h> 32#include <linux/pagevec.h>
34#include <linux/namei.h> 33#include <linux/namei.h>
35#include <linux/mount.h> 34#include <linux/mount.h>
@@ -1026,12 +1025,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1026 res = NULL; 1025 res = NULL;
1027 goto out; 1026 goto out;
1028 /* This turned out not to be a regular file */ 1027 /* This turned out not to be a regular file */
1029 case -EISDIR:
1030 case -ENOTDIR: 1028 case -ENOTDIR:
1031 goto no_open; 1029 goto no_open;
1032 case -ELOOP: 1030 case -ELOOP:
1033 if (!(nd->intent.open.flags & O_NOFOLLOW)) 1031 if (!(nd->intent.open.flags & O_NOFOLLOW))
1034 goto no_open; 1032 goto no_open;
1033 /* case -EISDIR: */
1035 /* case -EINVAL: */ 1034 /* case -EINVAL: */
1036 default: 1035 default:
1037 goto out; 1036 goto out;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 489fc01a3204..6c3210099d51 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -255,7 +255,7 @@ static void nfs_direct_read_release(void *calldata)
255 255
256 if (put_dreq(dreq)) 256 if (put_dreq(dreq))
257 nfs_direct_complete(dreq); 257 nfs_direct_complete(dreq);
258 nfs_readdata_release(calldata); 258 nfs_readdata_free(data);
259} 259}
260 260
261static const struct rpc_call_ops nfs_read_direct_ops = { 261static const struct rpc_call_ops nfs_read_direct_ops = {
@@ -314,14 +314,14 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
314 data->npages, 1, 0, data->pagevec, NULL); 314 data->npages, 1, 0, data->pagevec, NULL);
315 up_read(&current->mm->mmap_sem); 315 up_read(&current->mm->mmap_sem);
316 if (result < 0) { 316 if (result < 0) {
317 nfs_readdata_release(data); 317 nfs_readdata_free(data);
318 break; 318 break;
319 } 319 }
320 if ((unsigned)result < data->npages) { 320 if ((unsigned)result < data->npages) {
321 bytes = result * PAGE_SIZE; 321 bytes = result * PAGE_SIZE;
322 if (bytes <= pgbase) { 322 if (bytes <= pgbase) {
323 nfs_direct_release_pages(data->pagevec, result); 323 nfs_direct_release_pages(data->pagevec, result);
324 nfs_readdata_release(data); 324 nfs_readdata_free(data);
325 break; 325 break;
326 } 326 }
327 bytes -= pgbase; 327 bytes -= pgbase;
@@ -334,7 +334,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
334 data->inode = inode; 334 data->inode = inode;
335 data->cred = msg.rpc_cred; 335 data->cred = msg.rpc_cred;
336 data->args.fh = NFS_FH(inode); 336 data->args.fh = NFS_FH(inode);
337 data->args.context = get_nfs_open_context(ctx); 337 data->args.context = ctx;
338 data->args.offset = pos; 338 data->args.offset = pos;
339 data->args.pgbase = pgbase; 339 data->args.pgbase = pgbase;
340 data->args.pages = data->pagevec; 340 data->args.pages = data->pagevec;
@@ -441,7 +441,7 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
441 struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); 441 struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
442 list_del(&data->pages); 442 list_del(&data->pages);
443 nfs_direct_release_pages(data->pagevec, data->npages); 443 nfs_direct_release_pages(data->pagevec, data->npages);
444 nfs_writedata_release(data); 444 nfs_writedata_free(data);
445 } 445 }
446} 446}
447 447
@@ -534,7 +534,7 @@ static void nfs_direct_commit_release(void *calldata)
534 534
535 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); 535 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
536 nfs_direct_write_complete(dreq, data->inode); 536 nfs_direct_write_complete(dreq, data->inode);
537 nfs_commitdata_release(calldata); 537 nfs_commit_free(data);
538} 538}
539 539
540static const struct rpc_call_ops nfs_commit_direct_ops = { 540static const struct rpc_call_ops nfs_commit_direct_ops = {
@@ -570,7 +570,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
570 data->args.fh = NFS_FH(data->inode); 570 data->args.fh = NFS_FH(data->inode);
571 data->args.offset = 0; 571 data->args.offset = 0;
572 data->args.count = 0; 572 data->args.count = 0;
573 data->args.context = get_nfs_open_context(dreq->ctx); 573 data->args.context = dreq->ctx;
574 data->res.count = 0; 574 data->res.count = 0;
575 data->res.fattr = &data->fattr; 575 data->res.fattr = &data->fattr;
576 data->res.verf = &data->verf; 576 data->res.verf = &data->verf;
@@ -734,14 +734,14 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
734 data->npages, 0, 0, data->pagevec, NULL); 734 data->npages, 0, 0, data->pagevec, NULL);
735 up_read(&current->mm->mmap_sem); 735 up_read(&current->mm->mmap_sem);
736 if (result < 0) { 736 if (result < 0) {
737 nfs_writedata_release(data); 737 nfs_writedata_free(data);
738 break; 738 break;
739 } 739 }
740 if ((unsigned)result < data->npages) { 740 if ((unsigned)result < data->npages) {
741 bytes = result * PAGE_SIZE; 741 bytes = result * PAGE_SIZE;
742 if (bytes <= pgbase) { 742 if (bytes <= pgbase) {
743 nfs_direct_release_pages(data->pagevec, result); 743 nfs_direct_release_pages(data->pagevec, result);
744 nfs_writedata_release(data); 744 nfs_writedata_free(data);
745 break; 745 break;
746 } 746 }
747 bytes -= pgbase; 747 bytes -= pgbase;
@@ -756,7 +756,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
756 data->inode = inode; 756 data->inode = inode;
757 data->cred = msg.rpc_cred; 757 data->cred = msg.rpc_cred;
758 data->args.fh = NFS_FH(inode); 758 data->args.fh = NFS_FH(inode);
759 data->args.context = get_nfs_open_context(ctx); 759 data->args.context = ctx;
760 data->args.offset = pos; 760 data->args.offset = pos;
761 data->args.pgbase = pgbase; 761 data->args.pgbase = pgbase;
762 data->args.pages = data->pagevec; 762 data->args.pages = data->pagevec;
@@ -934,9 +934,6 @@ out:
934 * back into its cache. We let the server do generic write 934 * back into its cache. We let the server do generic write
935 * parameter checking and report problems. 935 * parameter checking and report problems.
936 * 936 *
937 * We also avoid an unnecessary invocation of generic_osync_inode(),
938 * as it is fairly meaningless to sync the metadata of an NFS file.
939 *
940 * We eliminate local atime updates, see direct read above. 937 * We eliminate local atime updates, see direct read above.
941 * 938 *
942 * We avoid unnecessary page cache invalidations for normal cached 939 * We avoid unnecessary page cache invalidations for normal cached
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
new file mode 100644
index 000000000000..f4d54ba97cc6
--- /dev/null
+++ b/fs/nfs/dns_resolve.c
@@ -0,0 +1,335 @@
1/*
2 * linux/fs/nfs/dns_resolve.c
3 *
4 * Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com>
5 *
6 * Resolves DNS hostnames into valid ip addresses
7 */
8
9#include <linux/hash.h>
10#include <linux/string.h>
11#include <linux/kmod.h>
12#include <linux/module.h>
13#include <linux/socket.h>
14#include <linux/seq_file.h>
15#include <linux/inet.h>
16#include <linux/sunrpc/clnt.h>
17#include <linux/sunrpc/cache.h>
18#include <linux/sunrpc/svcauth.h>
19
20#include "dns_resolve.h"
21#include "cache_lib.h"
22
23#define NFS_DNS_HASHBITS 4
24#define NFS_DNS_HASHTBL_SIZE (1 << NFS_DNS_HASHBITS)
25
26static struct cache_head *nfs_dns_table[NFS_DNS_HASHTBL_SIZE];
27
28struct nfs_dns_ent {
29 struct cache_head h;
30
31 char *hostname;
32 size_t namelen;
33
34 struct sockaddr_storage addr;
35 size_t addrlen;
36};
37
38
39static void nfs_dns_ent_init(struct cache_head *cnew,
40 struct cache_head *ckey)
41{
42 struct nfs_dns_ent *new;
43 struct nfs_dns_ent *key;
44
45 new = container_of(cnew, struct nfs_dns_ent, h);
46 key = container_of(ckey, struct nfs_dns_ent, h);
47
48 kfree(new->hostname);
49 new->hostname = kstrndup(key->hostname, key->namelen, GFP_KERNEL);
50 if (new->hostname) {
51 new->namelen = key->namelen;
52 memcpy(&new->addr, &key->addr, key->addrlen);
53 new->addrlen = key->addrlen;
54 } else {
55 new->namelen = 0;
56 new->addrlen = 0;
57 }
58}
59
60static void nfs_dns_ent_put(struct kref *ref)
61{
62 struct nfs_dns_ent *item;
63
64 item = container_of(ref, struct nfs_dns_ent, h.ref);
65 kfree(item->hostname);
66 kfree(item);
67}
68
69static struct cache_head *nfs_dns_ent_alloc(void)
70{
71 struct nfs_dns_ent *item = kmalloc(sizeof(*item), GFP_KERNEL);
72
73 if (item != NULL) {
74 item->hostname = NULL;
75 item->namelen = 0;
76 item->addrlen = 0;
77 return &item->h;
78 }
79 return NULL;
80};
81
82static unsigned int nfs_dns_hash(const struct nfs_dns_ent *key)
83{
84 return hash_str(key->hostname, NFS_DNS_HASHBITS);
85}
86
87static void nfs_dns_request(struct cache_detail *cd,
88 struct cache_head *ch,
89 char **bpp, int *blen)
90{
91 struct nfs_dns_ent *key = container_of(ch, struct nfs_dns_ent, h);
92
93 qword_add(bpp, blen, key->hostname);
94 (*bpp)[-1] = '\n';
95}
96
97static int nfs_dns_upcall(struct cache_detail *cd,
98 struct cache_head *ch)
99{
100 struct nfs_dns_ent *key = container_of(ch, struct nfs_dns_ent, h);
101 int ret;
102
103 ret = nfs_cache_upcall(cd, key->hostname);
104 if (ret)
105 ret = sunrpc_cache_pipe_upcall(cd, ch, nfs_dns_request);
106 return ret;
107}
108
109static int nfs_dns_match(struct cache_head *ca,
110 struct cache_head *cb)
111{
112 struct nfs_dns_ent *a;
113 struct nfs_dns_ent *b;
114
115 a = container_of(ca, struct nfs_dns_ent, h);
116 b = container_of(cb, struct nfs_dns_ent, h);
117
118 if (a->namelen == 0 || a->namelen != b->namelen)
119 return 0;
120 return memcmp(a->hostname, b->hostname, a->namelen) == 0;
121}
122
123static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd,
124 struct cache_head *h)
125{
126 struct nfs_dns_ent *item;
127 long ttl;
128
129 if (h == NULL) {
130 seq_puts(m, "# ip address hostname ttl\n");
131 return 0;
132 }
133 item = container_of(h, struct nfs_dns_ent, h);
134 ttl = (long)item->h.expiry_time - (long)get_seconds();
135 if (ttl < 0)
136 ttl = 0;
137
138 if (!test_bit(CACHE_NEGATIVE, &h->flags)) {
139 char buf[INET6_ADDRSTRLEN+IPV6_SCOPE_ID_LEN+1];
140
141 rpc_ntop((struct sockaddr *)&item->addr, buf, sizeof(buf));
142 seq_printf(m, "%15s ", buf);
143 } else
144 seq_puts(m, "<none> ");
145 seq_printf(m, "%15s %ld\n", item->hostname, ttl);
146 return 0;
147}
148
149struct nfs_dns_ent *nfs_dns_lookup(struct cache_detail *cd,
150 struct nfs_dns_ent *key)
151{
152 struct cache_head *ch;
153
154 ch = sunrpc_cache_lookup(cd,
155 &key->h,
156 nfs_dns_hash(key));
157 if (!ch)
158 return NULL;
159 return container_of(ch, struct nfs_dns_ent, h);
160}
161
162struct nfs_dns_ent *nfs_dns_update(struct cache_detail *cd,
163 struct nfs_dns_ent *new,
164 struct nfs_dns_ent *key)
165{
166 struct cache_head *ch;
167
168 ch = sunrpc_cache_update(cd,
169 &new->h, &key->h,
170 nfs_dns_hash(key));
171 if (!ch)
172 return NULL;
173 return container_of(ch, struct nfs_dns_ent, h);
174}
175
176static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
177{
178 char buf1[NFS_DNS_HOSTNAME_MAXLEN+1];
179 struct nfs_dns_ent key, *item;
180 unsigned long ttl;
181 ssize_t len;
182 int ret = -EINVAL;
183
184 if (buf[buflen-1] != '\n')
185 goto out;
186 buf[buflen-1] = '\0';
187
188 len = qword_get(&buf, buf1, sizeof(buf1));
189 if (len <= 0)
190 goto out;
191 key.addrlen = rpc_pton(buf1, len,
192 (struct sockaddr *)&key.addr,
193 sizeof(key.addr));
194
195 len = qword_get(&buf, buf1, sizeof(buf1));
196 if (len <= 0)
197 goto out;
198
199 key.hostname = buf1;
200 key.namelen = len;
201 memset(&key.h, 0, sizeof(key.h));
202
203 ttl = get_expiry(&buf);
204 if (ttl == 0)
205 goto out;
206 key.h.expiry_time = ttl + get_seconds();
207
208 ret = -ENOMEM;
209 item = nfs_dns_lookup(cd, &key);
210 if (item == NULL)
211 goto out;
212
213 if (key.addrlen == 0)
214 set_bit(CACHE_NEGATIVE, &key.h.flags);
215
216 item = nfs_dns_update(cd, &key, item);
217 if (item == NULL)
218 goto out;
219
220 ret = 0;
221 cache_put(&item->h, cd);
222out:
223 return ret;
224}
225
226static struct cache_detail nfs_dns_resolve = {
227 .owner = THIS_MODULE,
228 .hash_size = NFS_DNS_HASHTBL_SIZE,
229 .hash_table = nfs_dns_table,
230 .name = "dns_resolve",
231 .cache_put = nfs_dns_ent_put,
232 .cache_upcall = nfs_dns_upcall,
233 .cache_parse = nfs_dns_parse,
234 .cache_show = nfs_dns_show,
235 .match = nfs_dns_match,
236 .init = nfs_dns_ent_init,
237 .update = nfs_dns_ent_init,
238 .alloc = nfs_dns_ent_alloc,
239};
240
241static int do_cache_lookup(struct cache_detail *cd,
242 struct nfs_dns_ent *key,
243 struct nfs_dns_ent **item,
244 struct nfs_cache_defer_req *dreq)
245{
246 int ret = -ENOMEM;
247
248 *item = nfs_dns_lookup(cd, key);
249 if (*item) {
250 ret = cache_check(cd, &(*item)->h, &dreq->req);
251 if (ret)
252 *item = NULL;
253 }
254 return ret;
255}
256
257static int do_cache_lookup_nowait(struct cache_detail *cd,
258 struct nfs_dns_ent *key,
259 struct nfs_dns_ent **item)
260{
261 int ret = -ENOMEM;
262
263 *item = nfs_dns_lookup(cd, key);
264 if (!*item)
265 goto out_err;
266 ret = -ETIMEDOUT;
267 if (!test_bit(CACHE_VALID, &(*item)->h.flags)
268 || (*item)->h.expiry_time < get_seconds()
269 || cd->flush_time > (*item)->h.last_refresh)
270 goto out_put;
271 ret = -ENOENT;
272 if (test_bit(CACHE_NEGATIVE, &(*item)->h.flags))
273 goto out_put;
274 return 0;
275out_put:
276 cache_put(&(*item)->h, cd);
277out_err:
278 *item = NULL;
279 return ret;
280}
281
282static int do_cache_lookup_wait(struct cache_detail *cd,
283 struct nfs_dns_ent *key,
284 struct nfs_dns_ent **item)
285{
286 struct nfs_cache_defer_req *dreq;
287 int ret = -ENOMEM;
288
289 dreq = nfs_cache_defer_req_alloc();
290 if (!dreq)
291 goto out;
292 ret = do_cache_lookup(cd, key, item, dreq);
293 if (ret == -EAGAIN) {
294 ret = nfs_cache_wait_for_upcall(dreq);
295 if (!ret)
296 ret = do_cache_lookup_nowait(cd, key, item);
297 }
298 nfs_cache_defer_req_put(dreq);
299out:
300 return ret;
301}
302
303ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
304 struct sockaddr *sa, size_t salen)
305{
306 struct nfs_dns_ent key = {
307 .hostname = name,
308 .namelen = namelen,
309 };
310 struct nfs_dns_ent *item = NULL;
311 ssize_t ret;
312
313 ret = do_cache_lookup_wait(&nfs_dns_resolve, &key, &item);
314 if (ret == 0) {
315 if (salen >= item->addrlen) {
316 memcpy(sa, &item->addr, item->addrlen);
317 ret = item->addrlen;
318 } else
319 ret = -EOVERFLOW;
320 cache_put(&item->h, &nfs_dns_resolve);
321 } else if (ret == -ENOENT)
322 ret = -ESRCH;
323 return ret;
324}
325
326int nfs_dns_resolver_init(void)
327{
328 return nfs_cache_register(&nfs_dns_resolve);
329}
330
331void nfs_dns_resolver_destroy(void)
332{
333 nfs_cache_unregister(&nfs_dns_resolve);
334}
335
diff --git a/fs/nfs/dns_resolve.h b/fs/nfs/dns_resolve.h
new file mode 100644
index 000000000000..a3f0938babf7
--- /dev/null
+++ b/fs/nfs/dns_resolve.h
@@ -0,0 +1,14 @@
1/*
2 * Resolve DNS hostnames into valid ip addresses
3 */
4#ifndef __LINUX_FS_NFS_DNS_RESOLVE_H
5#define __LINUX_FS_NFS_DNS_RESOLVE_H
6
7#define NFS_DNS_HOSTNAME_MAXLEN (128)
8
9extern int nfs_dns_resolver_init(void);
10extern void nfs_dns_resolver_destroy(void);
11extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
12 struct sockaddr *sa, size_t salen);
13
14#endif
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 0055b813ec2c..5021b75d2d1e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -26,7 +26,6 @@
26#include <linux/mm.h> 26#include <linux/mm.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/pagemap.h> 28#include <linux/pagemap.h>
29#include <linux/smp_lock.h>
30#include <linux/aio.h> 29#include <linux/aio.h>
31 30
32#include <asm/uaccess.h> 31#include <asm/uaccess.h>
@@ -329,6 +328,42 @@ nfs_file_fsync(struct file *file, struct dentry *dentry, int datasync)
329} 328}
330 329
331/* 330/*
331 * Decide whether a read/modify/write cycle may be more efficient
332 * then a modify/write/read cycle when writing to a page in the
333 * page cache.
334 *
335 * The modify/write/read cycle may occur if a page is read before
336 * being completely filled by the writer. In this situation, the
337 * page must be completely written to stable storage on the server
338 * before it can be refilled by reading in the page from the server.
339 * This can lead to expensive, small, FILE_SYNC mode writes being
340 * done.
341 *
342 * It may be more efficient to read the page first if the file is
343 * open for reading in addition to writing, the page is not marked
344 * as Uptodate, it is not dirty or waiting to be committed,
345 * indicating that it was previously allocated and then modified,
346 * that there were valid bytes of data in that range of the file,
347 * and that the new data won't completely replace the old data in
348 * that range of the file.
349 */
350static int nfs_want_read_modify_write(struct file *file, struct page *page,
351 loff_t pos, unsigned len)
352{
353 unsigned int pglen = nfs_page_length(page);
354 unsigned int offset = pos & (PAGE_CACHE_SIZE - 1);
355 unsigned int end = offset + len;
356
357 if ((file->f_mode & FMODE_READ) && /* open for read? */
358 !PageUptodate(page) && /* Uptodate? */
359 !PagePrivate(page) && /* i/o request already? */
360 pglen && /* valid bytes of file? */
361 (end < pglen || offset)) /* replace all valid bytes? */
362 return 1;
363 return 0;
364}
365
366/*
332 * This does the "real" work of the write. We must allocate and lock the 367 * This does the "real" work of the write. We must allocate and lock the
333 * page to be sent back to the generic routine, which then copies the 368 * page to be sent back to the generic routine, which then copies the
334 * data from user space. 369 * data from user space.
@@ -341,15 +376,16 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
341 struct page **pagep, void **fsdata) 376 struct page **pagep, void **fsdata)
342{ 377{
343 int ret; 378 int ret;
344 pgoff_t index; 379 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
345 struct page *page; 380 struct page *page;
346 index = pos >> PAGE_CACHE_SHIFT; 381 int once_thru = 0;
347 382
348 dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n", 383 dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n",
349 file->f_path.dentry->d_parent->d_name.name, 384 file->f_path.dentry->d_parent->d_name.name,
350 file->f_path.dentry->d_name.name, 385 file->f_path.dentry->d_name.name,
351 mapping->host->i_ino, len, (long long) pos); 386 mapping->host->i_ino, len, (long long) pos);
352 387
388start:
353 /* 389 /*
354 * Prevent starvation issues if someone is doing a consistency 390 * Prevent starvation issues if someone is doing a consistency
355 * sync-to-disk 391 * sync-to-disk
@@ -368,6 +404,13 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
368 if (ret) { 404 if (ret) {
369 unlock_page(page); 405 unlock_page(page);
370 page_cache_release(page); 406 page_cache_release(page);
407 } else if (!once_thru &&
408 nfs_want_read_modify_write(file, page, pos, len)) {
409 once_thru = 1;
410 ret = nfs_readpage(file, page);
411 page_cache_release(page);
412 if (!ret)
413 goto start;
371 } 414 }
372 return ret; 415 return ret;
373} 416}
@@ -480,6 +523,7 @@ const struct address_space_operations nfs_file_aops = {
480 .invalidatepage = nfs_invalidate_page, 523 .invalidatepage = nfs_invalidate_page,
481 .releasepage = nfs_release_page, 524 .releasepage = nfs_release_page,
482 .direct_IO = nfs_direct_IO, 525 .direct_IO = nfs_direct_IO,
526 .migratepage = nfs_migrate_page,
483 .launder_page = nfs_launder_page, 527 .launder_page = nfs_launder_page,
484}; 528};
485 529
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 46177cb87064..b35d2a616066 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -30,7 +30,6 @@
30#include <linux/nfs_idmap.h> 30#include <linux/nfs_idmap.h>
31#include <linux/vfs.h> 31#include <linux/vfs.h>
32#include <linux/namei.h> 32#include <linux/namei.h>
33#include <linux/mnt_namespace.h>
34#include <linux/security.h> 33#include <linux/security.h>
35 34
36#include <asm/system.h> 35#include <asm/system.h>
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 86147b0ab2cf..21a84d45916f 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -101,7 +101,7 @@ static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
101 101
102static unsigned int fnvhash32(const void *, size_t); 102static unsigned int fnvhash32(const void *, size_t);
103 103
104static struct rpc_pipe_ops idmap_upcall_ops = { 104static const struct rpc_pipe_ops idmap_upcall_ops = {
105 .upcall = idmap_pipe_upcall, 105 .upcall = idmap_pipe_upcall,
106 .downcall = idmap_pipe_downcall, 106 .downcall = idmap_pipe_downcall,
107 .destroy_msg = idmap_pipe_destroy_msg, 107 .destroy_msg = idmap_pipe_destroy_msg,
@@ -119,8 +119,8 @@ nfs_idmap_new(struct nfs_client *clp)
119 if (idmap == NULL) 119 if (idmap == NULL)
120 return -ENOMEM; 120 return -ENOMEM;
121 121
122 idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap", 122 idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_path.dentry,
123 idmap, &idmap_upcall_ops, 0); 123 "idmap", idmap, &idmap_upcall_ops, 0);
124 if (IS_ERR(idmap->idmap_dentry)) { 124 if (IS_ERR(idmap->idmap_dentry)) {
125 error = PTR_ERR(idmap->idmap_dentry); 125 error = PTR_ERR(idmap->idmap_dentry);
126 kfree(idmap); 126 kfree(idmap);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 64f87194d390..060022b4651c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -30,7 +30,6 @@
30#include <linux/nfs_mount.h> 30#include <linux/nfs_mount.h>
31#include <linux/nfs4_mount.h> 31#include <linux/nfs4_mount.h>
32#include <linux/lockd/bind.h> 32#include <linux/lockd/bind.h>
33#include <linux/smp_lock.h>
34#include <linux/seq_file.h> 33#include <linux/seq_file.h>
35#include <linux/mount.h> 34#include <linux/mount.h>
36#include <linux/nfs_idmap.h> 35#include <linux/nfs_idmap.h>
@@ -47,6 +46,7 @@
47#include "iostat.h" 46#include "iostat.h"
48#include "internal.h" 47#include "internal.h"
49#include "fscache.h" 48#include "fscache.h"
49#include "dns_resolve.h"
50 50
51#define NFSDBG_FACILITY NFSDBG_VFS 51#define NFSDBG_FACILITY NFSDBG_VFS
52 52
@@ -287,6 +287,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
287 /* We can't support update_atime(), since the server will reset it */ 287 /* We can't support update_atime(), since the server will reset it */
288 inode->i_flags |= S_NOATIME|S_NOCMTIME; 288 inode->i_flags |= S_NOATIME|S_NOCMTIME;
289 inode->i_mode = fattr->mode; 289 inode->i_mode = fattr->mode;
290 if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
291 && nfs_server_capable(inode, NFS_CAP_MODE))
292 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
293 | NFS_INO_INVALID_ACCESS
294 | NFS_INO_INVALID_ACL;
290 /* Why so? Because we want revalidate for devices/FIFOs, and 295 /* Why so? Because we want revalidate for devices/FIFOs, and
291 * that's precisely what we have in nfs_file_inode_operations. 296 * that's precisely what we have in nfs_file_inode_operations.
292 */ 297 */
@@ -331,20 +336,46 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
331 nfsi->attr_gencount = fattr->gencount; 336 nfsi->attr_gencount = fattr->gencount;
332 if (fattr->valid & NFS_ATTR_FATTR_ATIME) 337 if (fattr->valid & NFS_ATTR_FATTR_ATIME)
333 inode->i_atime = fattr->atime; 338 inode->i_atime = fattr->atime;
339 else if (nfs_server_capable(inode, NFS_CAP_ATIME))
340 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
334 if (fattr->valid & NFS_ATTR_FATTR_MTIME) 341 if (fattr->valid & NFS_ATTR_FATTR_MTIME)
335 inode->i_mtime = fattr->mtime; 342 inode->i_mtime = fattr->mtime;
343 else if (nfs_server_capable(inode, NFS_CAP_MTIME))
344 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
345 | NFS_INO_INVALID_DATA;
336 if (fattr->valid & NFS_ATTR_FATTR_CTIME) 346 if (fattr->valid & NFS_ATTR_FATTR_CTIME)
337 inode->i_ctime = fattr->ctime; 347 inode->i_ctime = fattr->ctime;
348 else if (nfs_server_capable(inode, NFS_CAP_CTIME))
349 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
350 | NFS_INO_INVALID_ACCESS
351 | NFS_INO_INVALID_ACL;
338 if (fattr->valid & NFS_ATTR_FATTR_CHANGE) 352 if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
339 nfsi->change_attr = fattr->change_attr; 353 nfsi->change_attr = fattr->change_attr;
354 else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
355 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
356 | NFS_INO_INVALID_DATA;
340 if (fattr->valid & NFS_ATTR_FATTR_SIZE) 357 if (fattr->valid & NFS_ATTR_FATTR_SIZE)
341 inode->i_size = nfs_size_to_loff_t(fattr->size); 358 inode->i_size = nfs_size_to_loff_t(fattr->size);
359 else
360 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
361 | NFS_INO_INVALID_DATA
362 | NFS_INO_REVAL_PAGECACHE;
342 if (fattr->valid & NFS_ATTR_FATTR_NLINK) 363 if (fattr->valid & NFS_ATTR_FATTR_NLINK)
343 inode->i_nlink = fattr->nlink; 364 inode->i_nlink = fattr->nlink;
365 else if (nfs_server_capable(inode, NFS_CAP_NLINK))
366 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
344 if (fattr->valid & NFS_ATTR_FATTR_OWNER) 367 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
345 inode->i_uid = fattr->uid; 368 inode->i_uid = fattr->uid;
369 else if (nfs_server_capable(inode, NFS_CAP_OWNER))
370 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
371 | NFS_INO_INVALID_ACCESS
372 | NFS_INO_INVALID_ACL;
346 if (fattr->valid & NFS_ATTR_FATTR_GROUP) 373 if (fattr->valid & NFS_ATTR_FATTR_GROUP)
347 inode->i_gid = fattr->gid; 374 inode->i_gid = fattr->gid;
375 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
376 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
377 | NFS_INO_INVALID_ACCESS
378 | NFS_INO_INVALID_ACL;
348 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) 379 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
349 inode->i_blocks = fattr->du.nfs2.blocks; 380 inode->i_blocks = fattr->du.nfs2.blocks;
350 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { 381 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -1146,6 +1177,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1146 loff_t cur_isize, new_isize; 1177 loff_t cur_isize, new_isize;
1147 unsigned long invalid = 0; 1178 unsigned long invalid = 0;
1148 unsigned long now = jiffies; 1179 unsigned long now = jiffies;
1180 unsigned long save_cache_validity;
1149 1181
1150 dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", 1182 dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
1151 __func__, inode->i_sb->s_id, inode->i_ino, 1183 __func__, inode->i_sb->s_id, inode->i_ino,
@@ -1172,10 +1204,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1172 */ 1204 */
1173 nfsi->read_cache_jiffies = fattr->time_start; 1205 nfsi->read_cache_jiffies = fattr->time_start;
1174 1206
1175 if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) || (fattr->valid & (NFS_ATTR_FATTR_MTIME|NFS_ATTR_FATTR_CTIME))) 1207 save_cache_validity = nfsi->cache_validity;
1176 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR 1208 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
1177 | NFS_INO_INVALID_ATIME 1209 | NFS_INO_INVALID_ATIME
1178 | NFS_INO_REVAL_PAGECACHE); 1210 | NFS_INO_REVAL_FORCED
1211 | NFS_INO_REVAL_PAGECACHE);
1179 1212
1180 /* Do atomic weak cache consistency updates */ 1213 /* Do atomic weak cache consistency updates */
1181 nfs_wcc_update_inode(inode, fattr); 1214 nfs_wcc_update_inode(inode, fattr);
@@ -1190,7 +1223,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1190 nfs_force_lookup_revalidate(inode); 1223 nfs_force_lookup_revalidate(inode);
1191 nfsi->change_attr = fattr->change_attr; 1224 nfsi->change_attr = fattr->change_attr;
1192 } 1225 }
1193 } 1226 } else if (server->caps & NFS_CAP_CHANGE_ATTR)
1227 invalid |= save_cache_validity;
1194 1228
1195 if (fattr->valid & NFS_ATTR_FATTR_MTIME) { 1229 if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
1196 /* NFSv2/v3: Check if the mtime agrees */ 1230 /* NFSv2/v3: Check if the mtime agrees */
@@ -1202,7 +1236,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1202 nfs_force_lookup_revalidate(inode); 1236 nfs_force_lookup_revalidate(inode);
1203 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); 1237 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1204 } 1238 }
1205 } 1239 } else if (server->caps & NFS_CAP_MTIME)
1240 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1241 | NFS_INO_INVALID_DATA
1242 | NFS_INO_REVAL_PAGECACHE
1243 | NFS_INO_REVAL_FORCED);
1244
1206 if (fattr->valid & NFS_ATTR_FATTR_CTIME) { 1245 if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
1207 /* If ctime has changed we should definitely clear access+acl caches */ 1246 /* If ctime has changed we should definitely clear access+acl caches */
1208 if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { 1247 if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
@@ -1216,7 +1255,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1216 } 1255 }
1217 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); 1256 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
1218 } 1257 }
1219 } 1258 } else if (server->caps & NFS_CAP_CTIME)
1259 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1260 | NFS_INO_INVALID_ACCESS
1261 | NFS_INO_INVALID_ACL
1262 | NFS_INO_REVAL_FORCED);
1220 1263
1221 /* Check if our cached file size is stale */ 1264 /* Check if our cached file size is stale */
1222 if (fattr->valid & NFS_ATTR_FATTR_SIZE) { 1265 if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
@@ -1232,30 +1275,50 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1232 dprintk("NFS: isize change on server for file %s/%ld\n", 1275 dprintk("NFS: isize change on server for file %s/%ld\n",
1233 inode->i_sb->s_id, inode->i_ino); 1276 inode->i_sb->s_id, inode->i_ino);
1234 } 1277 }
1235 } 1278 } else
1279 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1280 | NFS_INO_REVAL_PAGECACHE
1281 | NFS_INO_REVAL_FORCED);
1236 1282
1237 1283
1238 if (fattr->valid & NFS_ATTR_FATTR_ATIME) 1284 if (fattr->valid & NFS_ATTR_FATTR_ATIME)
1239 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); 1285 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
1286 else if (server->caps & NFS_CAP_ATIME)
1287 invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME
1288 | NFS_INO_REVAL_FORCED);
1240 1289
1241 if (fattr->valid & NFS_ATTR_FATTR_MODE) { 1290 if (fattr->valid & NFS_ATTR_FATTR_MODE) {
1242 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { 1291 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) {
1243 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1292 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1244 inode->i_mode = fattr->mode; 1293 inode->i_mode = fattr->mode;
1245 } 1294 }
1246 } 1295 } else if (server->caps & NFS_CAP_MODE)
1296 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1297 | NFS_INO_INVALID_ACCESS
1298 | NFS_INO_INVALID_ACL
1299 | NFS_INO_REVAL_FORCED);
1300
1247 if (fattr->valid & NFS_ATTR_FATTR_OWNER) { 1301 if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
1248 if (inode->i_uid != fattr->uid) { 1302 if (inode->i_uid != fattr->uid) {
1249 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1303 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1250 inode->i_uid = fattr->uid; 1304 inode->i_uid = fattr->uid;
1251 } 1305 }
1252 } 1306 } else if (server->caps & NFS_CAP_OWNER)
1307 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1308 | NFS_INO_INVALID_ACCESS
1309 | NFS_INO_INVALID_ACL
1310 | NFS_INO_REVAL_FORCED);
1311
1253 if (fattr->valid & NFS_ATTR_FATTR_GROUP) { 1312 if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
1254 if (inode->i_gid != fattr->gid) { 1313 if (inode->i_gid != fattr->gid) {
1255 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1314 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1256 inode->i_gid = fattr->gid; 1315 inode->i_gid = fattr->gid;
1257 } 1316 }
1258 } 1317 } else if (server->caps & NFS_CAP_OWNER_GROUP)
1318 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1319 | NFS_INO_INVALID_ACCESS
1320 | NFS_INO_INVALID_ACL
1321 | NFS_INO_REVAL_FORCED);
1259 1322
1260 if (fattr->valid & NFS_ATTR_FATTR_NLINK) { 1323 if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
1261 if (inode->i_nlink != fattr->nlink) { 1324 if (inode->i_nlink != fattr->nlink) {
@@ -1264,7 +1327,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1264 invalid |= NFS_INO_INVALID_DATA; 1327 invalid |= NFS_INO_INVALID_DATA;
1265 inode->i_nlink = fattr->nlink; 1328 inode->i_nlink = fattr->nlink;
1266 } 1329 }
1267 } 1330 } else if (server->caps & NFS_CAP_NLINK)
1331 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1332 | NFS_INO_REVAL_FORCED);
1268 1333
1269 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { 1334 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
1270 /* 1335 /*
@@ -1294,9 +1359,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1294 || S_ISLNK(inode->i_mode))) 1359 || S_ISLNK(inode->i_mode)))
1295 invalid &= ~NFS_INO_INVALID_DATA; 1360 invalid &= ~NFS_INO_INVALID_DATA;
1296 if (!nfs_have_delegation(inode, FMODE_READ) || 1361 if (!nfs_have_delegation(inode, FMODE_READ) ||
1297 (nfsi->cache_validity & NFS_INO_REVAL_FORCED)) 1362 (save_cache_validity & NFS_INO_REVAL_FORCED))
1298 nfsi->cache_validity |= invalid; 1363 nfsi->cache_validity |= invalid;
1299 nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED;
1300 1364
1301 return 0; 1365 return 0;
1302 out_changed: 1366 out_changed:
@@ -1443,6 +1507,10 @@ static int __init init_nfs_fs(void)
1443{ 1507{
1444 int err; 1508 int err;
1445 1509
1510 err = nfs_dns_resolver_init();
1511 if (err < 0)
1512 goto out8;
1513
1446 err = nfs_fscache_register(); 1514 err = nfs_fscache_register();
1447 if (err < 0) 1515 if (err < 0)
1448 goto out7; 1516 goto out7;
@@ -1501,6 +1569,8 @@ out5:
1501out6: 1569out6:
1502 nfs_fscache_unregister(); 1570 nfs_fscache_unregister();
1503out7: 1571out7:
1572 nfs_dns_resolver_destroy();
1573out8:
1504 return err; 1574 return err;
1505} 1575}
1506 1576
@@ -1512,6 +1582,7 @@ static void __exit exit_nfs_fs(void)
1512 nfs_destroy_inodecache(); 1582 nfs_destroy_inodecache();
1513 nfs_destroy_nfspagecache(); 1583 nfs_destroy_nfspagecache();
1514 nfs_fscache_unregister(); 1584 nfs_fscache_unregister();
1585 nfs_dns_resolver_destroy();
1515#ifdef CONFIG_PROC_FS 1586#ifdef CONFIG_PROC_FS
1516 rpc_proc_unregister("nfs"); 1587 rpc_proc_unregister("nfs");
1517#endif 1588#endif
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7dd90a6769d0..2e485677019c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -102,6 +102,7 @@ struct nfs_mount_request {
102}; 102};
103 103
104extern int nfs_mount(struct nfs_mount_request *info); 104extern int nfs_mount(struct nfs_mount_request *info);
105extern void nfs_umount(const struct nfs_mount_request *info);
105 106
106/* client.c */ 107/* client.c */
107extern struct rpc_program nfs_program; 108extern struct rpc_program nfs_program;
@@ -213,7 +214,6 @@ void nfs_zap_acl_cache(struct inode *inode);
213extern int nfs_wait_bit_killable(void *word); 214extern int nfs_wait_bit_killable(void *word);
214 215
215/* super.c */ 216/* super.c */
216void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *);
217extern struct file_system_type nfs_xdev_fs_type; 217extern struct file_system_type nfs_xdev_fs_type;
218#ifdef CONFIG_NFS_V4 218#ifdef CONFIG_NFS_V4
219extern struct file_system_type nfs4_xdev_fs_type; 219extern struct file_system_type nfs4_xdev_fs_type;
@@ -248,6 +248,12 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
248 248
249/* write.c */ 249/* write.c */
250extern void nfs_write_prepare(struct rpc_task *task, void *calldata); 250extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
251#ifdef CONFIG_MIGRATION
252extern int nfs_migrate_page(struct address_space *,
253 struct page *, struct page *);
254#else
255#define nfs_migrate_page NULL
256#endif
251 257
252/* nfs4proc.c */ 258/* nfs4proc.c */
253extern int _nfs4_call_sync(struct nfs_server *server, 259extern int _nfs4_call_sync(struct nfs_server *server,
@@ -368,24 +374,3 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
368 return ((unsigned long)len + (unsigned long)base + 374 return ((unsigned long)len + (unsigned long)base +
369 PAGE_SIZE - 1) >> PAGE_SHIFT; 375 PAGE_SIZE - 1) >> PAGE_SHIFT;
370} 376}
371
372#define IPV6_SCOPE_DELIMITER '%'
373
374/*
375 * Set the port number in an address. Be agnostic about the address
376 * family.
377 */
378static inline void nfs_set_port(struct sockaddr *sap, unsigned short port)
379{
380 struct sockaddr_in *ap = (struct sockaddr_in *)sap;
381 struct sockaddr_in6 *ap6 = (struct sockaddr_in6 *)sap;
382
383 switch (sap->sa_family) {
384 case AF_INET:
385 ap->sin_port = htons(port);
386 break;
387 case AF_INET6:
388 ap6->sin6_port = htons(port);
389 break;
390 }
391}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 38ef9eaec407..0adefc40cc89 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -209,6 +209,71 @@ out_mnt_err:
209 goto out; 209 goto out;
210} 210}
211 211
212/**
213 * nfs_umount - Notify a server that we have unmounted this export
214 * @info: pointer to umount request arguments
215 *
216 * MOUNTPROC_UMNT is advisory, so we set a short timeout, and always
217 * use UDP.
218 */
219void nfs_umount(const struct nfs_mount_request *info)
220{
221 static const struct rpc_timeout nfs_umnt_timeout = {
222 .to_initval = 1 * HZ,
223 .to_maxval = 3 * HZ,
224 .to_retries = 2,
225 };
226 struct rpc_create_args args = {
227 .protocol = IPPROTO_UDP,
228 .address = info->sap,
229 .addrsize = info->salen,
230 .timeout = &nfs_umnt_timeout,
231 .servername = info->hostname,
232 .program = &mnt_program,
233 .version = info->version,
234 .authflavor = RPC_AUTH_UNIX,
235 .flags = RPC_CLNT_CREATE_NOPING,
236 };
237 struct mountres result;
238 struct rpc_message msg = {
239 .rpc_argp = info->dirpath,
240 .rpc_resp = &result,
241 };
242 struct rpc_clnt *clnt;
243 int status;
244
245 if (info->noresvport)
246 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
247
248 clnt = rpc_create(&args);
249 if (unlikely(IS_ERR(clnt)))
250 goto out_clnt_err;
251
252 dprintk("NFS: sending UMNT request for %s:%s\n",
253 (info->hostname ? info->hostname : "server"), info->dirpath);
254
255 if (info->version == NFS_MNT3_VERSION)
256 msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC3_UMNT];
257 else
258 msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC_UMNT];
259
260 status = rpc_call_sync(clnt, &msg, 0);
261 rpc_shutdown_client(clnt);
262
263 if (unlikely(status < 0))
264 goto out_call_err;
265
266 return;
267
268out_clnt_err:
269 dprintk("NFS: failed to create UMNT RPC client, status=%ld\n",
270 PTR_ERR(clnt));
271 return;
272
273out_call_err:
274 dprintk("NFS: UMNT request failed, status=%d\n", status);
275}
276
212/* 277/*
213 * XDR encode/decode functions for MOUNT 278 * XDR encode/decode functions for MOUNT
214 */ 279 */
@@ -258,7 +323,7 @@ static int decode_status(struct xdr_stream *xdr, struct mountres *res)
258 return -EIO; 323 return -EIO;
259 status = ntohl(*p); 324 status = ntohl(*p);
260 325
261 for (i = 0; i <= ARRAY_SIZE(mnt_errtbl); i++) { 326 for (i = 0; i < ARRAY_SIZE(mnt_errtbl); i++) {
262 if (mnt_errtbl[i].status == status) { 327 if (mnt_errtbl[i].status == status) {
263 res->errno = mnt_errtbl[i].errno; 328 res->errno = mnt_errtbl[i].errno;
264 return 0; 329 return 0;
@@ -309,7 +374,7 @@ static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res)
309 return -EIO; 374 return -EIO;
310 status = ntohl(*p); 375 status = ntohl(*p);
311 376
312 for (i = 0; i <= ARRAY_SIZE(mnt3_errtbl); i++) { 377 for (i = 0; i < ARRAY_SIZE(mnt3_errtbl); i++) {
313 if (mnt3_errtbl[i].status == status) { 378 if (mnt3_errtbl[i].status == status) {
314 res->errno = mnt3_errtbl[i].errno; 379 res->errno = mnt3_errtbl[i].errno;
315 return 0; 380 return 0;
@@ -407,6 +472,13 @@ static struct rpc_procinfo mnt_procedures[] = {
407 .p_statidx = MOUNTPROC_MNT, 472 .p_statidx = MOUNTPROC_MNT,
408 .p_name = "MOUNT", 473 .p_name = "MOUNT",
409 }, 474 },
475 [MOUNTPROC_UMNT] = {
476 .p_proc = MOUNTPROC_UMNT,
477 .p_encode = (kxdrproc_t)mnt_enc_dirpath,
478 .p_arglen = MNT_enc_dirpath_sz,
479 .p_statidx = MOUNTPROC_UMNT,
480 .p_name = "UMOUNT",
481 },
410}; 482};
411 483
412static struct rpc_procinfo mnt3_procedures[] = { 484static struct rpc_procinfo mnt3_procedures[] = {
@@ -419,6 +491,13 @@ static struct rpc_procinfo mnt3_procedures[] = {
419 .p_statidx = MOUNTPROC3_MNT, 491 .p_statidx = MOUNTPROC3_MNT,
420 .p_name = "MOUNT", 492 .p_name = "MOUNT",
421 }, 493 },
494 [MOUNTPROC3_UMNT] = {
495 .p_proc = MOUNTPROC3_UMNT,
496 .p_encode = (kxdrproc_t)mnt_enc_dirpath,
497 .p_arglen = MNT_enc_dirpath_sz,
498 .p_statidx = MOUNTPROC3_UMNT,
499 .p_name = "UMOUNT",
500 },
422}; 501};
423 502
424 503
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 61bc3a32e1e2..6ea07a3c75d4 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -220,6 +220,7 @@ extern void nfs4_destroy_session(struct nfs4_session *session);
220extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); 220extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
221extern int nfs4_proc_create_session(struct nfs_client *, int reset); 221extern int nfs4_proc_create_session(struct nfs_client *, int reset);
222extern int nfs4_proc_destroy_session(struct nfs4_session *); 222extern int nfs4_proc_destroy_session(struct nfs4_session *);
223extern int nfs4_init_session(struct nfs_server *server);
223#else /* CONFIG_NFS_v4_1 */ 224#else /* CONFIG_NFS_v4_1 */
224static inline int nfs4_setup_sequence(struct nfs_client *clp, 225static inline int nfs4_setup_sequence(struct nfs_client *clp,
225 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, 226 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
@@ -227,6 +228,11 @@ static inline int nfs4_setup_sequence(struct nfs_client *clp,
227{ 228{
228 return 0; 229 return 0;
229} 230}
231
232static inline int nfs4_init_session(struct nfs_server *server)
233{
234 return 0;
235}
230#endif /* CONFIG_NFS_V4_1 */ 236#endif /* CONFIG_NFS_V4_1 */
231 237
232extern struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[]; 238extern struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[];
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 2a2a0a7143ad..2636c26d56fa 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -17,6 +17,7 @@
17#include <linux/inet.h> 17#include <linux/inet.h>
18#include "internal.h" 18#include "internal.h"
19#include "nfs4_fs.h" 19#include "nfs4_fs.h"
20#include "dns_resolve.h"
20 21
21#define NFSDBG_FACILITY NFSDBG_VFS 22#define NFSDBG_FACILITY NFSDBG_VFS
22 23
@@ -95,6 +96,20 @@ static int nfs4_validate_fspath(const struct vfsmount *mnt_parent,
95 return 0; 96 return 0;
96} 97}
97 98
99static size_t nfs_parse_server_name(char *string, size_t len,
100 struct sockaddr *sa, size_t salen)
101{
102 ssize_t ret;
103
104 ret = rpc_pton(string, len, sa, salen);
105 if (ret == 0) {
106 ret = nfs_dns_resolve_name(string, len, sa, salen);
107 if (ret < 0)
108 ret = 0;
109 }
110 return ret;
111}
112
98static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, 113static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
99 char *page, char *page2, 114 char *page, char *page2,
100 const struct nfs4_fs_location *location) 115 const struct nfs4_fs_location *location)
@@ -121,11 +136,12 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
121 136
122 if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len)) 137 if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len))
123 continue; 138 continue;
124 nfs_parse_ip_address(buf->data, buf->len, 139 mountdata->addrlen = nfs_parse_server_name(buf->data,
125 mountdata->addr, &mountdata->addrlen); 140 buf->len,
126 if (mountdata->addr->sa_family == AF_UNSPEC) 141 mountdata->addr, mountdata->addrlen);
142 if (mountdata->addrlen == 0)
127 continue; 143 continue;
128 nfs_set_port(mountdata->addr, NFS_PORT); 144 rpc_set_port(mountdata->addr, NFS_PORT);
129 145
130 memcpy(page2, buf->data, buf->len); 146 memcpy(page2, buf->data, buf->len);
131 page2[buf->len] = '\0'; 147 page2[buf->len] = '\0';
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 92ce43517814..be6544aef41f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -45,7 +45,6 @@
45#include <linux/nfs4.h> 45#include <linux/nfs4.h>
46#include <linux/nfs_fs.h> 46#include <linux/nfs_fs.h>
47#include <linux/nfs_page.h> 47#include <linux/nfs_page.h>
48#include <linux/smp_lock.h>
49#include <linux/namei.h> 48#include <linux/namei.h>
50#include <linux/mount.h> 49#include <linux/mount.h>
51#include <linux/module.h> 50#include <linux/module.h>
@@ -62,6 +61,8 @@
62#define NFS4_POLL_RETRY_MIN (HZ/10) 61#define NFS4_POLL_RETRY_MIN (HZ/10)
63#define NFS4_POLL_RETRY_MAX (15*HZ) 62#define NFS4_POLL_RETRY_MAX (15*HZ)
64 63
64#define NFS4_MAX_LOOP_ON_RECOVER (10)
65
65struct nfs4_opendata; 66struct nfs4_opendata;
66static int _nfs4_proc_open(struct nfs4_opendata *data); 67static int _nfs4_proc_open(struct nfs4_opendata *data);
67static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 68static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
@@ -427,17 +428,19 @@ out:
427static int nfs4_recover_session(struct nfs4_session *session) 428static int nfs4_recover_session(struct nfs4_session *session)
428{ 429{
429 struct nfs_client *clp = session->clp; 430 struct nfs_client *clp = session->clp;
431 unsigned int loop;
430 int ret; 432 int ret;
431 433
432 for (;;) { 434 for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
433 ret = nfs4_wait_clnt_recover(clp); 435 ret = nfs4_wait_clnt_recover(clp);
434 if (ret != 0) 436 if (ret != 0)
435 return ret; 437 break;
436 if (!test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) 438 if (!test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state))
437 break; 439 break;
438 nfs4_schedule_state_manager(clp); 440 nfs4_schedule_state_manager(clp);
441 ret = -EIO;
439 } 442 }
440 return 0; 443 return ret;
441} 444}
442 445
443static int nfs41_setup_sequence(struct nfs4_session *session, 446static int nfs41_setup_sequence(struct nfs4_session *session,
@@ -1445,18 +1448,20 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
1445static int nfs4_recover_expired_lease(struct nfs_server *server) 1448static int nfs4_recover_expired_lease(struct nfs_server *server)
1446{ 1449{
1447 struct nfs_client *clp = server->nfs_client; 1450 struct nfs_client *clp = server->nfs_client;
1451 unsigned int loop;
1448 int ret; 1452 int ret;
1449 1453
1450 for (;;) { 1454 for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
1451 ret = nfs4_wait_clnt_recover(clp); 1455 ret = nfs4_wait_clnt_recover(clp);
1452 if (ret != 0) 1456 if (ret != 0)
1453 return ret; 1457 break;
1454 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && 1458 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1455 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) 1459 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
1456 break; 1460 break;
1457 nfs4_schedule_state_recovery(clp); 1461 nfs4_schedule_state_recovery(clp);
1462 ret = -EIO;
1458 } 1463 }
1459 return 0; 1464 return ret;
1460} 1465}
1461 1466
1462/* 1467/*
@@ -1998,12 +2003,34 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
1998 status = nfs4_call_sync(server, &msg, &args, &res, 0); 2003 status = nfs4_call_sync(server, &msg, &args, &res, 0);
1999 if (status == 0) { 2004 if (status == 0) {
2000 memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); 2005 memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
2006 server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
2007 NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
2008 NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|
2009 NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME|
2010 NFS_CAP_CTIME|NFS_CAP_MTIME);
2001 if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) 2011 if (res.attr_bitmask[0] & FATTR4_WORD0_ACL)
2002 server->caps |= NFS_CAP_ACLS; 2012 server->caps |= NFS_CAP_ACLS;
2003 if (res.has_links != 0) 2013 if (res.has_links != 0)
2004 server->caps |= NFS_CAP_HARDLINKS; 2014 server->caps |= NFS_CAP_HARDLINKS;
2005 if (res.has_symlinks != 0) 2015 if (res.has_symlinks != 0)
2006 server->caps |= NFS_CAP_SYMLINKS; 2016 server->caps |= NFS_CAP_SYMLINKS;
2017 if (res.attr_bitmask[0] & FATTR4_WORD0_FILEID)
2018 server->caps |= NFS_CAP_FILEID;
2019 if (res.attr_bitmask[1] & FATTR4_WORD1_MODE)
2020 server->caps |= NFS_CAP_MODE;
2021 if (res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS)
2022 server->caps |= NFS_CAP_NLINK;
2023 if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER)
2024 server->caps |= NFS_CAP_OWNER;
2025 if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP)
2026 server->caps |= NFS_CAP_OWNER_GROUP;
2027 if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS)
2028 server->caps |= NFS_CAP_ATIME;
2029 if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA)
2030 server->caps |= NFS_CAP_CTIME;
2031 if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY)
2032 server->caps |= NFS_CAP_MTIME;
2033
2007 memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); 2034 memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask));
2008 server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; 2035 server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
2009 server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; 2036 server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
@@ -2041,15 +2068,9 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
2041 .rpc_argp = &args, 2068 .rpc_argp = &args,
2042 .rpc_resp = &res, 2069 .rpc_resp = &res,
2043 }; 2070 };
2044 int status;
2045 2071
2046 nfs_fattr_init(info->fattr); 2072 nfs_fattr_init(info->fattr);
2047 status = nfs4_recover_expired_lease(server); 2073 return nfs4_call_sync(server, &msg, &args, &res, 0);
2048 if (!status)
2049 status = nfs4_check_client_ready(server->nfs_client);
2050 if (!status)
2051 status = nfs4_call_sync(server, &msg, &args, &res, 0);
2052 return status;
2053} 2074}
2054 2075
2055static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, 2076static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -4100,15 +4121,23 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
4100 if (request->fl_start < 0 || request->fl_end < 0) 4121 if (request->fl_start < 0 || request->fl_end < 0)
4101 return -EINVAL; 4122 return -EINVAL;
4102 4123
4103 if (IS_GETLK(cmd)) 4124 if (IS_GETLK(cmd)) {
4104 return nfs4_proc_getlk(state, F_GETLK, request); 4125 if (state != NULL)
4126 return nfs4_proc_getlk(state, F_GETLK, request);
4127 return 0;
4128 }
4105 4129
4106 if (!(IS_SETLK(cmd) || IS_SETLKW(cmd))) 4130 if (!(IS_SETLK(cmd) || IS_SETLKW(cmd)))
4107 return -EINVAL; 4131 return -EINVAL;
4108 4132
4109 if (request->fl_type == F_UNLCK) 4133 if (request->fl_type == F_UNLCK) {
4110 return nfs4_proc_unlck(state, cmd, request); 4134 if (state != NULL)
4135 return nfs4_proc_unlck(state, cmd, request);
4136 return 0;
4137 }
4111 4138
4139 if (state == NULL)
4140 return -ENOLCK;
4112 do { 4141 do {
4113 status = nfs4_proc_setlk(state, cmd, request); 4142 status = nfs4_proc_setlk(state, cmd, request);
4114 if ((status != -EAGAIN) || IS_SETLK(cmd)) 4143 if ((status != -EAGAIN) || IS_SETLK(cmd))
@@ -4794,6 +4823,22 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)
4794 return status; 4823 return status;
4795} 4824}
4796 4825
4826int nfs4_init_session(struct nfs_server *server)
4827{
4828 struct nfs_client *clp = server->nfs_client;
4829 int ret;
4830
4831 if (!nfs4_has_session(clp))
4832 return 0;
4833
4834 clp->cl_session->fc_attrs.max_rqst_sz = server->wsize;
4835 clp->cl_session->fc_attrs.max_resp_sz = server->rsize;
4836 ret = nfs4_recover_expired_lease(server);
4837 if (!ret)
4838 ret = nfs4_check_client_ready(clp);
4839 return ret;
4840}
4841
4797/* 4842/*
4798 * Renew the cl_session lease. 4843 * Renew the cl_session lease.
4799 */ 4844 */
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index b73c5a728655..65ca8c18476f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -553,6 +553,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
553 INIT_LIST_HEAD(&lsp->ls_sequence.list); 553 INIT_LIST_HEAD(&lsp->ls_sequence.list);
554 lsp->ls_seqid.sequence = &lsp->ls_sequence; 554 lsp->ls_seqid.sequence = &lsp->ls_sequence;
555 atomic_set(&lsp->ls_count, 1); 555 atomic_set(&lsp->ls_count, 1);
556 lsp->ls_state = state;
556 lsp->ls_owner = fl_owner; 557 lsp->ls_owner = fl_owner;
557 spin_lock(&clp->cl_lock); 558 spin_lock(&clp->cl_lock);
558 nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64); 559 nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
@@ -587,7 +588,6 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
587 if (lsp != NULL) 588 if (lsp != NULL)
588 break; 589 break;
589 if (new != NULL) { 590 if (new != NULL) {
590 new->ls_state = state;
591 list_add(&new->ls_locks, &state->lock_states); 591 list_add(&new->ls_locks, &state->lock_states);
592 set_bit(LK_STATE_IN_USE, &state->flags); 592 set_bit(LK_STATE_IN_USE, &state->flags);
593 lsp = new; 593 lsp = new;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 617273e7d47f..cfc30d362f94 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -702,29 +702,12 @@ struct compound_hdr {
702 u32 minorversion; 702 u32 minorversion;
703}; 703};
704 704
705/* 705static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes)
706 * START OF "GENERIC" ENCODE ROUTINES. 706{
707 * These may look a little ugly since they are imported from a "generic" 707 __be32 *p = xdr_reserve_space(xdr, nbytes);
708 * set of XDR encode/decode routines which are intended to be shared by 708 BUG_ON(!p);
709 * all of our NFSv4 implementations (OpenBSD, MacOS X...). 709 return p;
710 * 710}
711 * If the pain of reading these is too great, it should be a straightforward
712 * task to translate them into Linux-specific versions which are more
713 * consistent with the style used in NFSv2/v3...
714 */
715#define WRITE32(n) *p++ = htonl(n)
716#define WRITE64(n) do { \
717 *p++ = htonl((uint32_t)((n) >> 32)); \
718 *p++ = htonl((uint32_t)(n)); \
719} while (0)
720#define WRITEMEM(ptr,nbytes) do { \
721 p = xdr_encode_opaque_fixed(p, ptr, nbytes); \
722} while (0)
723
724#define RESERVE_SPACE(nbytes) do { \
725 p = xdr_reserve_space(xdr, nbytes); \
726 BUG_ON(!p); \
727} while (0)
728 711
729static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) 712static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
730{ 713{
@@ -749,12 +732,11 @@ static void encode_compound_hdr(struct xdr_stream *xdr,
749 732
750 dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag); 733 dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag);
751 BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); 734 BUG_ON(hdr->taglen > NFS4_MAXTAGLEN);
752 RESERVE_SPACE(12+(XDR_QUADLEN(hdr->taglen)<<2)); 735 p = reserve_space(xdr, 4 + hdr->taglen + 8);
753 WRITE32(hdr->taglen); 736 p = xdr_encode_opaque(p, hdr->tag, hdr->taglen);
754 WRITEMEM(hdr->tag, hdr->taglen); 737 *p++ = cpu_to_be32(hdr->minorversion);
755 WRITE32(hdr->minorversion);
756 hdr->nops_p = p; 738 hdr->nops_p = p;
757 WRITE32(hdr->nops); 739 *p = cpu_to_be32(hdr->nops);
758} 740}
759 741
760static void encode_nops(struct compound_hdr *hdr) 742static void encode_nops(struct compound_hdr *hdr)
@@ -829,55 +811,53 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
829 len += 16; 811 len += 16;
830 else if (iap->ia_valid & ATTR_MTIME) 812 else if (iap->ia_valid & ATTR_MTIME)
831 len += 4; 813 len += 4;
832 RESERVE_SPACE(len); 814 p = reserve_space(xdr, len);
833 815
834 /* 816 /*
835 * We write the bitmap length now, but leave the bitmap and the attribute 817 * We write the bitmap length now, but leave the bitmap and the attribute
836 * buffer length to be backfilled at the end of this routine. 818 * buffer length to be backfilled at the end of this routine.
837 */ 819 */
838 WRITE32(2); 820 *p++ = cpu_to_be32(2);
839 q = p; 821 q = p;
840 p += 3; 822 p += 3;
841 823
842 if (iap->ia_valid & ATTR_SIZE) { 824 if (iap->ia_valid & ATTR_SIZE) {
843 bmval0 |= FATTR4_WORD0_SIZE; 825 bmval0 |= FATTR4_WORD0_SIZE;
844 WRITE64(iap->ia_size); 826 p = xdr_encode_hyper(p, iap->ia_size);
845 } 827 }
846 if (iap->ia_valid & ATTR_MODE) { 828 if (iap->ia_valid & ATTR_MODE) {
847 bmval1 |= FATTR4_WORD1_MODE; 829 bmval1 |= FATTR4_WORD1_MODE;
848 WRITE32(iap->ia_mode & S_IALLUGO); 830 *p++ = cpu_to_be32(iap->ia_mode & S_IALLUGO);
849 } 831 }
850 if (iap->ia_valid & ATTR_UID) { 832 if (iap->ia_valid & ATTR_UID) {
851 bmval1 |= FATTR4_WORD1_OWNER; 833 bmval1 |= FATTR4_WORD1_OWNER;
852 WRITE32(owner_namelen); 834 p = xdr_encode_opaque(p, owner_name, owner_namelen);
853 WRITEMEM(owner_name, owner_namelen);
854 } 835 }
855 if (iap->ia_valid & ATTR_GID) { 836 if (iap->ia_valid & ATTR_GID) {
856 bmval1 |= FATTR4_WORD1_OWNER_GROUP; 837 bmval1 |= FATTR4_WORD1_OWNER_GROUP;
857 WRITE32(owner_grouplen); 838 p = xdr_encode_opaque(p, owner_group, owner_grouplen);
858 WRITEMEM(owner_group, owner_grouplen);
859 } 839 }
860 if (iap->ia_valid & ATTR_ATIME_SET) { 840 if (iap->ia_valid & ATTR_ATIME_SET) {
861 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; 841 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
862 WRITE32(NFS4_SET_TO_CLIENT_TIME); 842 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
863 WRITE32(0); 843 *p++ = cpu_to_be32(0);
864 WRITE32(iap->ia_mtime.tv_sec); 844 *p++ = cpu_to_be32(iap->ia_mtime.tv_sec);
865 WRITE32(iap->ia_mtime.tv_nsec); 845 *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
866 } 846 }
867 else if (iap->ia_valid & ATTR_ATIME) { 847 else if (iap->ia_valid & ATTR_ATIME) {
868 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; 848 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
869 WRITE32(NFS4_SET_TO_SERVER_TIME); 849 *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
870 } 850 }
871 if (iap->ia_valid & ATTR_MTIME_SET) { 851 if (iap->ia_valid & ATTR_MTIME_SET) {
872 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; 852 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
873 WRITE32(NFS4_SET_TO_CLIENT_TIME); 853 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
874 WRITE32(0); 854 *p++ = cpu_to_be32(0);
875 WRITE32(iap->ia_mtime.tv_sec); 855 *p++ = cpu_to_be32(iap->ia_mtime.tv_sec);
876 WRITE32(iap->ia_mtime.tv_nsec); 856 *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
877 } 857 }
878 else if (iap->ia_valid & ATTR_MTIME) { 858 else if (iap->ia_valid & ATTR_MTIME) {
879 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; 859 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
880 WRITE32(NFS4_SET_TO_SERVER_TIME); 860 *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
881 } 861 }
882 862
883 /* 863 /*
@@ -891,7 +871,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
891 len = (char *)p - (char *)q - 12; 871 len = (char *)p - (char *)q - 12;
892 *q++ = htonl(bmval0); 872 *q++ = htonl(bmval0);
893 *q++ = htonl(bmval1); 873 *q++ = htonl(bmval1);
894 *q++ = htonl(len); 874 *q = htonl(len);
895 875
896/* out: */ 876/* out: */
897} 877}
@@ -900,9 +880,9 @@ static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hd
900{ 880{
901 __be32 *p; 881 __be32 *p;
902 882
903 RESERVE_SPACE(8); 883 p = reserve_space(xdr, 8);
904 WRITE32(OP_ACCESS); 884 *p++ = cpu_to_be32(OP_ACCESS);
905 WRITE32(access); 885 *p = cpu_to_be32(access);
906 hdr->nops++; 886 hdr->nops++;
907 hdr->replen += decode_access_maxsz; 887 hdr->replen += decode_access_maxsz;
908} 888}
@@ -911,10 +891,10 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
911{ 891{
912 __be32 *p; 892 __be32 *p;
913 893
914 RESERVE_SPACE(8+NFS4_STATEID_SIZE); 894 p = reserve_space(xdr, 8+NFS4_STATEID_SIZE);
915 WRITE32(OP_CLOSE); 895 *p++ = cpu_to_be32(OP_CLOSE);
916 WRITE32(arg->seqid->sequence->counter); 896 *p++ = cpu_to_be32(arg->seqid->sequence->counter);
917 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); 897 xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
918 hdr->nops++; 898 hdr->nops++;
919 hdr->replen += decode_close_maxsz; 899 hdr->replen += decode_close_maxsz;
920} 900}
@@ -923,10 +903,10 @@ static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *ar
923{ 903{
924 __be32 *p; 904 __be32 *p;
925 905
926 RESERVE_SPACE(16); 906 p = reserve_space(xdr, 16);
927 WRITE32(OP_COMMIT); 907 *p++ = cpu_to_be32(OP_COMMIT);
928 WRITE64(args->offset); 908 p = xdr_encode_hyper(p, args->offset);
929 WRITE32(args->count); 909 *p = cpu_to_be32(args->count);
930 hdr->nops++; 910 hdr->nops++;
931 hdr->replen += decode_commit_maxsz; 911 hdr->replen += decode_commit_maxsz;
932} 912}
@@ -935,30 +915,28 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *
935{ 915{
936 __be32 *p; 916 __be32 *p;
937 917
938 RESERVE_SPACE(8); 918 p = reserve_space(xdr, 8);
939 WRITE32(OP_CREATE); 919 *p++ = cpu_to_be32(OP_CREATE);
940 WRITE32(create->ftype); 920 *p = cpu_to_be32(create->ftype);
941 921
942 switch (create->ftype) { 922 switch (create->ftype) {
943 case NF4LNK: 923 case NF4LNK:
944 RESERVE_SPACE(4); 924 p = reserve_space(xdr, 4);
945 WRITE32(create->u.symlink.len); 925 *p = cpu_to_be32(create->u.symlink.len);
946 xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len); 926 xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len);
947 break; 927 break;
948 928
949 case NF4BLK: case NF4CHR: 929 case NF4BLK: case NF4CHR:
950 RESERVE_SPACE(8); 930 p = reserve_space(xdr, 8);
951 WRITE32(create->u.device.specdata1); 931 *p++ = cpu_to_be32(create->u.device.specdata1);
952 WRITE32(create->u.device.specdata2); 932 *p = cpu_to_be32(create->u.device.specdata2);
953 break; 933 break;
954 934
955 default: 935 default:
956 break; 936 break;
957 } 937 }
958 938
959 RESERVE_SPACE(4 + create->name->len); 939 encode_string(xdr, create->name->len, create->name->name);
960 WRITE32(create->name->len);
961 WRITEMEM(create->name->name, create->name->len);
962 hdr->nops++; 940 hdr->nops++;
963 hdr->replen += decode_create_maxsz; 941 hdr->replen += decode_create_maxsz;
964 942
@@ -969,10 +947,10 @@ static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct c
969{ 947{
970 __be32 *p; 948 __be32 *p;
971 949
972 RESERVE_SPACE(12); 950 p = reserve_space(xdr, 12);
973 WRITE32(OP_GETATTR); 951 *p++ = cpu_to_be32(OP_GETATTR);
974 WRITE32(1); 952 *p++ = cpu_to_be32(1);
975 WRITE32(bitmap); 953 *p = cpu_to_be32(bitmap);
976 hdr->nops++; 954 hdr->nops++;
977 hdr->replen += decode_getattr_maxsz; 955 hdr->replen += decode_getattr_maxsz;
978} 956}
@@ -981,11 +959,11 @@ static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm
981{ 959{
982 __be32 *p; 960 __be32 *p;
983 961
984 RESERVE_SPACE(16); 962 p = reserve_space(xdr, 16);
985 WRITE32(OP_GETATTR); 963 *p++ = cpu_to_be32(OP_GETATTR);
986 WRITE32(2); 964 *p++ = cpu_to_be32(2);
987 WRITE32(bm0); 965 *p++ = cpu_to_be32(bm0);
988 WRITE32(bm1); 966 *p = cpu_to_be32(bm1);
989 hdr->nops++; 967 hdr->nops++;
990 hdr->replen += decode_getattr_maxsz; 968 hdr->replen += decode_getattr_maxsz;
991} 969}
@@ -1012,8 +990,8 @@ static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1012{ 990{
1013 __be32 *p; 991 __be32 *p;
1014 992
1015 RESERVE_SPACE(4); 993 p = reserve_space(xdr, 4);
1016 WRITE32(OP_GETFH); 994 *p = cpu_to_be32(OP_GETFH);
1017 hdr->nops++; 995 hdr->nops++;
1018 hdr->replen += decode_getfh_maxsz; 996 hdr->replen += decode_getfh_maxsz;
1019} 997}
@@ -1022,10 +1000,9 @@ static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct
1022{ 1000{
1023 __be32 *p; 1001 __be32 *p;
1024 1002
1025 RESERVE_SPACE(8 + name->len); 1003 p = reserve_space(xdr, 8 + name->len);
1026 WRITE32(OP_LINK); 1004 *p++ = cpu_to_be32(OP_LINK);
1027 WRITE32(name->len); 1005 xdr_encode_opaque(p, name->name, name->len);
1028 WRITEMEM(name->name, name->len);
1029 hdr->nops++; 1006 hdr->nops++;
1030 hdr->replen += decode_link_maxsz; 1007 hdr->replen += decode_link_maxsz;
1031} 1008}
@@ -1052,27 +1029,27 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args
1052{ 1029{
1053 __be32 *p; 1030 __be32 *p;
1054 1031
1055 RESERVE_SPACE(32); 1032 p = reserve_space(xdr, 32);
1056 WRITE32(OP_LOCK); 1033 *p++ = cpu_to_be32(OP_LOCK);
1057 WRITE32(nfs4_lock_type(args->fl, args->block)); 1034 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, args->block));
1058 WRITE32(args->reclaim); 1035 *p++ = cpu_to_be32(args->reclaim);
1059 WRITE64(args->fl->fl_start); 1036 p = xdr_encode_hyper(p, args->fl->fl_start);
1060 WRITE64(nfs4_lock_length(args->fl)); 1037 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1061 WRITE32(args->new_lock_owner); 1038 *p = cpu_to_be32(args->new_lock_owner);
1062 if (args->new_lock_owner){ 1039 if (args->new_lock_owner){
1063 RESERVE_SPACE(4+NFS4_STATEID_SIZE+32); 1040 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+32);
1064 WRITE32(args->open_seqid->sequence->counter); 1041 *p++ = cpu_to_be32(args->open_seqid->sequence->counter);
1065 WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE); 1042 p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE);
1066 WRITE32(args->lock_seqid->sequence->counter); 1043 *p++ = cpu_to_be32(args->lock_seqid->sequence->counter);
1067 WRITE64(args->lock_owner.clientid); 1044 p = xdr_encode_hyper(p, args->lock_owner.clientid);
1068 WRITE32(16); 1045 *p++ = cpu_to_be32(16);
1069 WRITEMEM("lock id:", 8); 1046 p = xdr_encode_opaque_fixed(p, "lock id:", 8);
1070 WRITE64(args->lock_owner.id); 1047 xdr_encode_hyper(p, args->lock_owner.id);
1071 } 1048 }
1072 else { 1049 else {
1073 RESERVE_SPACE(NFS4_STATEID_SIZE+4); 1050 p = reserve_space(xdr, NFS4_STATEID_SIZE+4);
1074 WRITEMEM(args->lock_stateid->data, NFS4_STATEID_SIZE); 1051 p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE);
1075 WRITE32(args->lock_seqid->sequence->counter); 1052 *p = cpu_to_be32(args->lock_seqid->sequence->counter);
1076 } 1053 }
1077 hdr->nops++; 1054 hdr->nops++;
1078 hdr->replen += decode_lock_maxsz; 1055 hdr->replen += decode_lock_maxsz;
@@ -1082,15 +1059,15 @@ static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *ar
1082{ 1059{
1083 __be32 *p; 1060 __be32 *p;
1084 1061
1085 RESERVE_SPACE(52); 1062 p = reserve_space(xdr, 52);
1086 WRITE32(OP_LOCKT); 1063 *p++ = cpu_to_be32(OP_LOCKT);
1087 WRITE32(nfs4_lock_type(args->fl, 0)); 1064 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
1088 WRITE64(args->fl->fl_start); 1065 p = xdr_encode_hyper(p, args->fl->fl_start);
1089 WRITE64(nfs4_lock_length(args->fl)); 1066 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1090 WRITE64(args->lock_owner.clientid); 1067 p = xdr_encode_hyper(p, args->lock_owner.clientid);
1091 WRITE32(16); 1068 *p++ = cpu_to_be32(16);
1092 WRITEMEM("lock id:", 8); 1069 p = xdr_encode_opaque_fixed(p, "lock id:", 8);
1093 WRITE64(args->lock_owner.id); 1070 xdr_encode_hyper(p, args->lock_owner.id);
1094 hdr->nops++; 1071 hdr->nops++;
1095 hdr->replen += decode_lockt_maxsz; 1072 hdr->replen += decode_lockt_maxsz;
1096} 1073}
@@ -1099,13 +1076,13 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar
1099{ 1076{
1100 __be32 *p; 1077 __be32 *p;
1101 1078
1102 RESERVE_SPACE(12+NFS4_STATEID_SIZE+16); 1079 p = reserve_space(xdr, 12+NFS4_STATEID_SIZE+16);
1103 WRITE32(OP_LOCKU); 1080 *p++ = cpu_to_be32(OP_LOCKU);
1104 WRITE32(nfs4_lock_type(args->fl, 0)); 1081 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
1105 WRITE32(args->seqid->sequence->counter); 1082 *p++ = cpu_to_be32(args->seqid->sequence->counter);
1106 WRITEMEM(args->stateid->data, NFS4_STATEID_SIZE); 1083 p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
1107 WRITE64(args->fl->fl_start); 1084 p = xdr_encode_hyper(p, args->fl->fl_start);
1108 WRITE64(nfs4_lock_length(args->fl)); 1085 xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1109 hdr->nops++; 1086 hdr->nops++;
1110 hdr->replen += decode_locku_maxsz; 1087 hdr->replen += decode_locku_maxsz;
1111} 1088}
@@ -1115,10 +1092,9 @@ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struc
1115 int len = name->len; 1092 int len = name->len;
1116 __be32 *p; 1093 __be32 *p;
1117 1094
1118 RESERVE_SPACE(8 + len); 1095 p = reserve_space(xdr, 8 + len);
1119 WRITE32(OP_LOOKUP); 1096 *p++ = cpu_to_be32(OP_LOOKUP);
1120 WRITE32(len); 1097 xdr_encode_opaque(p, name->name, len);
1121 WRITEMEM(name->name, len);
1122 hdr->nops++; 1098 hdr->nops++;
1123 hdr->replen += decode_lookup_maxsz; 1099 hdr->replen += decode_lookup_maxsz;
1124} 1100}
@@ -1127,21 +1103,21 @@ static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode)
1127{ 1103{
1128 __be32 *p; 1104 __be32 *p;
1129 1105
1130 RESERVE_SPACE(8); 1106 p = reserve_space(xdr, 8);
1131 switch (fmode & (FMODE_READ|FMODE_WRITE)) { 1107 switch (fmode & (FMODE_READ|FMODE_WRITE)) {
1132 case FMODE_READ: 1108 case FMODE_READ:
1133 WRITE32(NFS4_SHARE_ACCESS_READ); 1109 *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_READ);
1134 break; 1110 break;
1135 case FMODE_WRITE: 1111 case FMODE_WRITE:
1136 WRITE32(NFS4_SHARE_ACCESS_WRITE); 1112 *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_WRITE);
1137 break; 1113 break;
1138 case FMODE_READ|FMODE_WRITE: 1114 case FMODE_READ|FMODE_WRITE:
1139 WRITE32(NFS4_SHARE_ACCESS_BOTH); 1115 *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_BOTH);
1140 break; 1116 break;
1141 default: 1117 default:
1142 WRITE32(0); 1118 *p++ = cpu_to_be32(0);
1143 } 1119 }
1144 WRITE32(0); /* for linux, share_deny = 0 always */ 1120 *p = cpu_to_be32(0); /* for linux, share_deny = 0 always */
1145} 1121}
1146 1122
1147static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_openargs *arg) 1123static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_openargs *arg)
@@ -1151,29 +1127,29 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
1151 * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, 1127 * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4,
1152 * owner 4 = 32 1128 * owner 4 = 32
1153 */ 1129 */
1154 RESERVE_SPACE(8); 1130 p = reserve_space(xdr, 8);
1155 WRITE32(OP_OPEN); 1131 *p++ = cpu_to_be32(OP_OPEN);
1156 WRITE32(arg->seqid->sequence->counter); 1132 *p = cpu_to_be32(arg->seqid->sequence->counter);
1157 encode_share_access(xdr, arg->fmode); 1133 encode_share_access(xdr, arg->fmode);
1158 RESERVE_SPACE(28); 1134 p = reserve_space(xdr, 28);
1159 WRITE64(arg->clientid); 1135 p = xdr_encode_hyper(p, arg->clientid);
1160 WRITE32(16); 1136 *p++ = cpu_to_be32(16);
1161 WRITEMEM("open id:", 8); 1137 p = xdr_encode_opaque_fixed(p, "open id:", 8);
1162 WRITE64(arg->id); 1138 xdr_encode_hyper(p, arg->id);
1163} 1139}
1164 1140
1165static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) 1141static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
1166{ 1142{
1167 __be32 *p; 1143 __be32 *p;
1168 1144
1169 RESERVE_SPACE(4); 1145 p = reserve_space(xdr, 4);
1170 switch(arg->open_flags & O_EXCL) { 1146 switch(arg->open_flags & O_EXCL) {
1171 case 0: 1147 case 0:
1172 WRITE32(NFS4_CREATE_UNCHECKED); 1148 *p = cpu_to_be32(NFS4_CREATE_UNCHECKED);
1173 encode_attrs(xdr, arg->u.attrs, arg->server); 1149 encode_attrs(xdr, arg->u.attrs, arg->server);
1174 break; 1150 break;
1175 default: 1151 default:
1176 WRITE32(NFS4_CREATE_EXCLUSIVE); 1152 *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE);
1177 encode_nfs4_verifier(xdr, &arg->u.verifier); 1153 encode_nfs4_verifier(xdr, &arg->u.verifier);
1178 } 1154 }
1179} 1155}
@@ -1182,14 +1158,14 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a
1182{ 1158{
1183 __be32 *p; 1159 __be32 *p;
1184 1160
1185 RESERVE_SPACE(4); 1161 p = reserve_space(xdr, 4);
1186 switch (arg->open_flags & O_CREAT) { 1162 switch (arg->open_flags & O_CREAT) {
1187 case 0: 1163 case 0:
1188 WRITE32(NFS4_OPEN_NOCREATE); 1164 *p = cpu_to_be32(NFS4_OPEN_NOCREATE);
1189 break; 1165 break;
1190 default: 1166 default:
1191 BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL); 1167 BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL);
1192 WRITE32(NFS4_OPEN_CREATE); 1168 *p = cpu_to_be32(NFS4_OPEN_CREATE);
1193 encode_createmode(xdr, arg); 1169 encode_createmode(xdr, arg);
1194 } 1170 }
1195} 1171}
@@ -1198,16 +1174,16 @@ static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delega
1198{ 1174{
1199 __be32 *p; 1175 __be32 *p;
1200 1176
1201 RESERVE_SPACE(4); 1177 p = reserve_space(xdr, 4);
1202 switch (delegation_type) { 1178 switch (delegation_type) {
1203 case 0: 1179 case 0:
1204 WRITE32(NFS4_OPEN_DELEGATE_NONE); 1180 *p = cpu_to_be32(NFS4_OPEN_DELEGATE_NONE);
1205 break; 1181 break;
1206 case FMODE_READ: 1182 case FMODE_READ:
1207 WRITE32(NFS4_OPEN_DELEGATE_READ); 1183 *p = cpu_to_be32(NFS4_OPEN_DELEGATE_READ);
1208 break; 1184 break;
1209 case FMODE_WRITE|FMODE_READ: 1185 case FMODE_WRITE|FMODE_READ:
1210 WRITE32(NFS4_OPEN_DELEGATE_WRITE); 1186 *p = cpu_to_be32(NFS4_OPEN_DELEGATE_WRITE);
1211 break; 1187 break;
1212 default: 1188 default:
1213 BUG(); 1189 BUG();
@@ -1218,8 +1194,8 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *
1218{ 1194{
1219 __be32 *p; 1195 __be32 *p;
1220 1196
1221 RESERVE_SPACE(4); 1197 p = reserve_space(xdr, 4);
1222 WRITE32(NFS4_OPEN_CLAIM_NULL); 1198 *p = cpu_to_be32(NFS4_OPEN_CLAIM_NULL);
1223 encode_string(xdr, name->len, name->name); 1199 encode_string(xdr, name->len, name->name);
1224} 1200}
1225 1201
@@ -1227,8 +1203,8 @@ static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type)
1227{ 1203{
1228 __be32 *p; 1204 __be32 *p;
1229 1205
1230 RESERVE_SPACE(4); 1206 p = reserve_space(xdr, 4);
1231 WRITE32(NFS4_OPEN_CLAIM_PREVIOUS); 1207 *p = cpu_to_be32(NFS4_OPEN_CLAIM_PREVIOUS);
1232 encode_delegation_type(xdr, type); 1208 encode_delegation_type(xdr, type);
1233} 1209}
1234 1210
@@ -1236,9 +1212,9 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc
1236{ 1212{
1237 __be32 *p; 1213 __be32 *p;
1238 1214
1239 RESERVE_SPACE(4+NFS4_STATEID_SIZE); 1215 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
1240 WRITE32(NFS4_OPEN_CLAIM_DELEGATE_CUR); 1216 *p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
1241 WRITEMEM(stateid->data, NFS4_STATEID_SIZE); 1217 xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
1242 encode_string(xdr, name->len, name->name); 1218 encode_string(xdr, name->len, name->name);
1243} 1219}
1244 1220
@@ -1267,10 +1243,10 @@ static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_co
1267{ 1243{
1268 __be32 *p; 1244 __be32 *p;
1269 1245
1270 RESERVE_SPACE(4+NFS4_STATEID_SIZE+4); 1246 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
1271 WRITE32(OP_OPEN_CONFIRM); 1247 *p++ = cpu_to_be32(OP_OPEN_CONFIRM);
1272 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); 1248 p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
1273 WRITE32(arg->seqid->sequence->counter); 1249 *p = cpu_to_be32(arg->seqid->sequence->counter);
1274 hdr->nops++; 1250 hdr->nops++;
1275 hdr->replen += decode_open_confirm_maxsz; 1251 hdr->replen += decode_open_confirm_maxsz;
1276} 1252}
@@ -1279,10 +1255,10 @@ static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_close
1279{ 1255{
1280 __be32 *p; 1256 __be32 *p;
1281 1257
1282 RESERVE_SPACE(4+NFS4_STATEID_SIZE+4); 1258 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
1283 WRITE32(OP_OPEN_DOWNGRADE); 1259 *p++ = cpu_to_be32(OP_OPEN_DOWNGRADE);
1284 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); 1260 p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
1285 WRITE32(arg->seqid->sequence->counter); 1261 *p = cpu_to_be32(arg->seqid->sequence->counter);
1286 encode_share_access(xdr, arg->fmode); 1262 encode_share_access(xdr, arg->fmode);
1287 hdr->nops++; 1263 hdr->nops++;
1288 hdr->replen += decode_open_downgrade_maxsz; 1264 hdr->replen += decode_open_downgrade_maxsz;
@@ -1294,10 +1270,9 @@ encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hd
1294 int len = fh->size; 1270 int len = fh->size;
1295 __be32 *p; 1271 __be32 *p;
1296 1272
1297 RESERVE_SPACE(8 + len); 1273 p = reserve_space(xdr, 8 + len);
1298 WRITE32(OP_PUTFH); 1274 *p++ = cpu_to_be32(OP_PUTFH);
1299 WRITE32(len); 1275 xdr_encode_opaque(p, fh->data, len);
1300 WRITEMEM(fh->data, len);
1301 hdr->nops++; 1276 hdr->nops++;
1302 hdr->replen += decode_putfh_maxsz; 1277 hdr->replen += decode_putfh_maxsz;
1303} 1278}
@@ -1306,8 +1281,8 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1306{ 1281{
1307 __be32 *p; 1282 __be32 *p;
1308 1283
1309 RESERVE_SPACE(4); 1284 p = reserve_space(xdr, 4);
1310 WRITE32(OP_PUTROOTFH); 1285 *p = cpu_to_be32(OP_PUTROOTFH);
1311 hdr->nops++; 1286 hdr->nops++;
1312 hdr->replen += decode_putrootfh_maxsz; 1287 hdr->replen += decode_putrootfh_maxsz;
1313} 1288}
@@ -1317,26 +1292,26 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context
1317 nfs4_stateid stateid; 1292 nfs4_stateid stateid;
1318 __be32 *p; 1293 __be32 *p;
1319 1294
1320 RESERVE_SPACE(NFS4_STATEID_SIZE); 1295 p = reserve_space(xdr, NFS4_STATEID_SIZE);
1321 if (ctx->state != NULL) { 1296 if (ctx->state != NULL) {
1322 nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner); 1297 nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner);
1323 WRITEMEM(stateid.data, NFS4_STATEID_SIZE); 1298 xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
1324 } else 1299 } else
1325 WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE); 1300 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
1326} 1301}
1327 1302
1328static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) 1303static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
1329{ 1304{
1330 __be32 *p; 1305 __be32 *p;
1331 1306
1332 RESERVE_SPACE(4); 1307 p = reserve_space(xdr, 4);
1333 WRITE32(OP_READ); 1308 *p = cpu_to_be32(OP_READ);
1334 1309
1335 encode_stateid(xdr, args->context); 1310 encode_stateid(xdr, args->context);
1336 1311
1337 RESERVE_SPACE(12); 1312 p = reserve_space(xdr, 12);
1338 WRITE64(args->offset); 1313 p = xdr_encode_hyper(p, args->offset);
1339 WRITE32(args->count); 1314 *p = cpu_to_be32(args->count);
1340 hdr->nops++; 1315 hdr->nops++;
1341 hdr->replen += decode_read_maxsz; 1316 hdr->replen += decode_read_maxsz;
1342} 1317}
@@ -1349,20 +1324,20 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
1349 }; 1324 };
1350 __be32 *p; 1325 __be32 *p;
1351 1326
1352 RESERVE_SPACE(12+NFS4_VERIFIER_SIZE+20); 1327 p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
1353 WRITE32(OP_READDIR); 1328 *p++ = cpu_to_be32(OP_READDIR);
1354 WRITE64(readdir->cookie); 1329 p = xdr_encode_hyper(p, readdir->cookie);
1355 WRITEMEM(readdir->verifier.data, NFS4_VERIFIER_SIZE); 1330 p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE);
1356 WRITE32(readdir->count >> 1); /* We're not doing readdirplus */ 1331 *p++ = cpu_to_be32(readdir->count >> 1); /* We're not doing readdirplus */
1357 WRITE32(readdir->count); 1332 *p++ = cpu_to_be32(readdir->count);
1358 WRITE32(2); 1333 *p++ = cpu_to_be32(2);
1359 /* Switch to mounted_on_fileid if the server supports it */ 1334 /* Switch to mounted_on_fileid if the server supports it */
1360 if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) 1335 if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
1361 attrs[0] &= ~FATTR4_WORD0_FILEID; 1336 attrs[0] &= ~FATTR4_WORD0_FILEID;
1362 else 1337 else
1363 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; 1338 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
1364 WRITE32(attrs[0] & readdir->bitmask[0]); 1339 *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]);
1365 WRITE32(attrs[1] & readdir->bitmask[1]); 1340 *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]);
1366 hdr->nops++; 1341 hdr->nops++;
1367 hdr->replen += decode_readdir_maxsz; 1342 hdr->replen += decode_readdir_maxsz;
1368 dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", 1343 dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n",
@@ -1378,8 +1353,8 @@ static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *
1378{ 1353{
1379 __be32 *p; 1354 __be32 *p;
1380 1355
1381 RESERVE_SPACE(4); 1356 p = reserve_space(xdr, 4);
1382 WRITE32(OP_READLINK); 1357 *p = cpu_to_be32(OP_READLINK);
1383 hdr->nops++; 1358 hdr->nops++;
1384 hdr->replen += decode_readlink_maxsz; 1359 hdr->replen += decode_readlink_maxsz;
1385} 1360}
@@ -1388,10 +1363,9 @@ static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struc
1388{ 1363{
1389 __be32 *p; 1364 __be32 *p;
1390 1365
1391 RESERVE_SPACE(8 + name->len); 1366 p = reserve_space(xdr, 8 + name->len);
1392 WRITE32(OP_REMOVE); 1367 *p++ = cpu_to_be32(OP_REMOVE);
1393 WRITE32(name->len); 1368 xdr_encode_opaque(p, name->name, name->len);
1394 WRITEMEM(name->name, name->len);
1395 hdr->nops++; 1369 hdr->nops++;
1396 hdr->replen += decode_remove_maxsz; 1370 hdr->replen += decode_remove_maxsz;
1397} 1371}
@@ -1400,14 +1374,10 @@ static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, co
1400{ 1374{
1401 __be32 *p; 1375 __be32 *p;
1402 1376
1403 RESERVE_SPACE(8 + oldname->len); 1377 p = reserve_space(xdr, 4);
1404 WRITE32(OP_RENAME); 1378 *p = cpu_to_be32(OP_RENAME);
1405 WRITE32(oldname->len); 1379 encode_string(xdr, oldname->len, oldname->name);
1406 WRITEMEM(oldname->name, oldname->len); 1380 encode_string(xdr, newname->len, newname->name);
1407
1408 RESERVE_SPACE(4 + newname->len);
1409 WRITE32(newname->len);
1410 WRITEMEM(newname->name, newname->len);
1411 hdr->nops++; 1381 hdr->nops++;
1412 hdr->replen += decode_rename_maxsz; 1382 hdr->replen += decode_rename_maxsz;
1413} 1383}
@@ -1416,9 +1386,9 @@ static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client
1416{ 1386{
1417 __be32 *p; 1387 __be32 *p;
1418 1388
1419 RESERVE_SPACE(12); 1389 p = reserve_space(xdr, 12);
1420 WRITE32(OP_RENEW); 1390 *p++ = cpu_to_be32(OP_RENEW);
1421 WRITE64(client_stateid->cl_clientid); 1391 xdr_encode_hyper(p, client_stateid->cl_clientid);
1422 hdr->nops++; 1392 hdr->nops++;
1423 hdr->replen += decode_renew_maxsz; 1393 hdr->replen += decode_renew_maxsz;
1424} 1394}
@@ -1428,8 +1398,8 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1428{ 1398{
1429 __be32 *p; 1399 __be32 *p;
1430 1400
1431 RESERVE_SPACE(4); 1401 p = reserve_space(xdr, 4);
1432 WRITE32(OP_RESTOREFH); 1402 *p = cpu_to_be32(OP_RESTOREFH);
1433 hdr->nops++; 1403 hdr->nops++;
1434 hdr->replen += decode_restorefh_maxsz; 1404 hdr->replen += decode_restorefh_maxsz;
1435} 1405}
@@ -1439,16 +1409,16 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
1439{ 1409{
1440 __be32 *p; 1410 __be32 *p;
1441 1411
1442 RESERVE_SPACE(4+NFS4_STATEID_SIZE); 1412 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
1443 WRITE32(OP_SETATTR); 1413 *p++ = cpu_to_be32(OP_SETATTR);
1444 WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE); 1414 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
1445 RESERVE_SPACE(2*4); 1415 p = reserve_space(xdr, 2*4);
1446 WRITE32(1); 1416 *p++ = cpu_to_be32(1);
1447 WRITE32(FATTR4_WORD0_ACL); 1417 *p = cpu_to_be32(FATTR4_WORD0_ACL);
1448 if (arg->acl_len % 4) 1418 if (arg->acl_len % 4)
1449 return -EINVAL; 1419 return -EINVAL;
1450 RESERVE_SPACE(4); 1420 p = reserve_space(xdr, 4);
1451 WRITE32(arg->acl_len); 1421 *p = cpu_to_be32(arg->acl_len);
1452 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); 1422 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
1453 hdr->nops++; 1423 hdr->nops++;
1454 hdr->replen += decode_setacl_maxsz; 1424 hdr->replen += decode_setacl_maxsz;
@@ -1460,8 +1430,8 @@ encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1460{ 1430{
1461 __be32 *p; 1431 __be32 *p;
1462 1432
1463 RESERVE_SPACE(4); 1433 p = reserve_space(xdr, 4);
1464 WRITE32(OP_SAVEFH); 1434 *p = cpu_to_be32(OP_SAVEFH);
1465 hdr->nops++; 1435 hdr->nops++;
1466 hdr->replen += decode_savefh_maxsz; 1436 hdr->replen += decode_savefh_maxsz;
1467} 1437}
@@ -1470,9 +1440,9 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs
1470{ 1440{
1471 __be32 *p; 1441 __be32 *p;
1472 1442
1473 RESERVE_SPACE(4+NFS4_STATEID_SIZE); 1443 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
1474 WRITE32(OP_SETATTR); 1444 *p++ = cpu_to_be32(OP_SETATTR);
1475 WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE); 1445 xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE);
1476 hdr->nops++; 1446 hdr->nops++;
1477 hdr->replen += decode_setattr_maxsz; 1447 hdr->replen += decode_setattr_maxsz;
1478 encode_attrs(xdr, arg->iap, server); 1448 encode_attrs(xdr, arg->iap, server);
@@ -1482,17 +1452,17 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
1482{ 1452{
1483 __be32 *p; 1453 __be32 *p;
1484 1454
1485 RESERVE_SPACE(4 + NFS4_VERIFIER_SIZE); 1455 p = reserve_space(xdr, 4 + NFS4_VERIFIER_SIZE);
1486 WRITE32(OP_SETCLIENTID); 1456 *p++ = cpu_to_be32(OP_SETCLIENTID);
1487 WRITEMEM(setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE); 1457 xdr_encode_opaque_fixed(p, setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE);
1488 1458
1489 encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); 1459 encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name);
1490 RESERVE_SPACE(4); 1460 p = reserve_space(xdr, 4);
1491 WRITE32(setclientid->sc_prog); 1461 *p = cpu_to_be32(setclientid->sc_prog);
1492 encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid); 1462 encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid);
1493 encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); 1463 encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr);
1494 RESERVE_SPACE(4); 1464 p = reserve_space(xdr, 4);
1495 WRITE32(setclientid->sc_cb_ident); 1465 *p = cpu_to_be32(setclientid->sc_cb_ident);
1496 hdr->nops++; 1466 hdr->nops++;
1497 hdr->replen += decode_setclientid_maxsz; 1467 hdr->replen += decode_setclientid_maxsz;
1498} 1468}
@@ -1501,10 +1471,10 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_
1501{ 1471{
1502 __be32 *p; 1472 __be32 *p;
1503 1473
1504 RESERVE_SPACE(12 + NFS4_VERIFIER_SIZE); 1474 p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE);
1505 WRITE32(OP_SETCLIENTID_CONFIRM); 1475 *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM);
1506 WRITE64(client_state->cl_clientid); 1476 p = xdr_encode_hyper(p, client_state->cl_clientid);
1507 WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE); 1477 xdr_encode_opaque_fixed(p, client_state->cl_confirm.data, NFS4_VERIFIER_SIZE);
1508 hdr->nops++; 1478 hdr->nops++;
1509 hdr->replen += decode_setclientid_confirm_maxsz; 1479 hdr->replen += decode_setclientid_confirm_maxsz;
1510} 1480}
@@ -1513,15 +1483,15 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
1513{ 1483{
1514 __be32 *p; 1484 __be32 *p;
1515 1485
1516 RESERVE_SPACE(4); 1486 p = reserve_space(xdr, 4);
1517 WRITE32(OP_WRITE); 1487 *p = cpu_to_be32(OP_WRITE);
1518 1488
1519 encode_stateid(xdr, args->context); 1489 encode_stateid(xdr, args->context);
1520 1490
1521 RESERVE_SPACE(16); 1491 p = reserve_space(xdr, 16);
1522 WRITE64(args->offset); 1492 p = xdr_encode_hyper(p, args->offset);
1523 WRITE32(args->stable); 1493 *p++ = cpu_to_be32(args->stable);
1524 WRITE32(args->count); 1494 *p = cpu_to_be32(args->count);
1525 1495
1526 xdr_write_pages(xdr, args->pages, args->pgbase, args->count); 1496 xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
1527 hdr->nops++; 1497 hdr->nops++;
@@ -1532,10 +1502,10 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state
1532{ 1502{
1533 __be32 *p; 1503 __be32 *p;
1534 1504
1535 RESERVE_SPACE(4+NFS4_STATEID_SIZE); 1505 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
1536 1506
1537 WRITE32(OP_DELEGRETURN); 1507 *p++ = cpu_to_be32(OP_DELEGRETURN);
1538 WRITEMEM(stateid->data, NFS4_STATEID_SIZE); 1508 xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
1539 hdr->nops++; 1509 hdr->nops++;
1540 hdr->replen += decode_delegreturn_maxsz; 1510 hdr->replen += decode_delegreturn_maxsz;
1541} 1511}
@@ -1548,16 +1518,16 @@ static void encode_exchange_id(struct xdr_stream *xdr,
1548{ 1518{
1549 __be32 *p; 1519 __be32 *p;
1550 1520
1551 RESERVE_SPACE(4 + sizeof(args->verifier->data)); 1521 p = reserve_space(xdr, 4 + sizeof(args->verifier->data));
1552 WRITE32(OP_EXCHANGE_ID); 1522 *p++ = cpu_to_be32(OP_EXCHANGE_ID);
1553 WRITEMEM(args->verifier->data, sizeof(args->verifier->data)); 1523 xdr_encode_opaque_fixed(p, args->verifier->data, sizeof(args->verifier->data));
1554 1524
1555 encode_string(xdr, args->id_len, args->id); 1525 encode_string(xdr, args->id_len, args->id);
1556 1526
1557 RESERVE_SPACE(12); 1527 p = reserve_space(xdr, 12);
1558 WRITE32(args->flags); 1528 *p++ = cpu_to_be32(args->flags);
1559 WRITE32(0); /* zero length state_protect4_a */ 1529 *p++ = cpu_to_be32(0); /* zero length state_protect4_a */
1560 WRITE32(0); /* zero length implementation id array */ 1530 *p = cpu_to_be32(0); /* zero length implementation id array */
1561 hdr->nops++; 1531 hdr->nops++;
1562 hdr->replen += decode_exchange_id_maxsz; 1532 hdr->replen += decode_exchange_id_maxsz;
1563} 1533}
@@ -1571,55 +1541,43 @@ static void encode_create_session(struct xdr_stream *xdr,
1571 uint32_t len; 1541 uint32_t len;
1572 struct nfs_client *clp = args->client; 1542 struct nfs_client *clp = args->client;
1573 1543
1574 RESERVE_SPACE(4); 1544 len = scnprintf(machine_name, sizeof(machine_name), "%s",
1575 WRITE32(OP_CREATE_SESSION); 1545 clp->cl_ipaddr);
1576
1577 RESERVE_SPACE(8);
1578 WRITE64(clp->cl_ex_clid);
1579 1546
1580 RESERVE_SPACE(8); 1547 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
1581 WRITE32(clp->cl_seqid); /*Sequence id */ 1548 *p++ = cpu_to_be32(OP_CREATE_SESSION);
1582 WRITE32(args->flags); /*flags */ 1549 p = xdr_encode_hyper(p, clp->cl_ex_clid);
1550 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
1551 *p++ = cpu_to_be32(args->flags); /*flags */
1583 1552
1584 RESERVE_SPACE(2*28); /* 2 channel_attrs */
1585 /* Fore Channel */ 1553 /* Fore Channel */
1586 WRITE32(args->fc_attrs.headerpadsz); /* header padding size */ 1554 *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */
1587 WRITE32(args->fc_attrs.max_rqst_sz); /* max req size */ 1555 *p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz); /* max req size */
1588 WRITE32(args->fc_attrs.max_resp_sz); /* max resp size */ 1556 *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz); /* max resp size */
1589 WRITE32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */ 1557 *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */
1590 WRITE32(args->fc_attrs.max_ops); /* max operations */ 1558 *p++ = cpu_to_be32(args->fc_attrs.max_ops); /* max operations */
1591 WRITE32(args->fc_attrs.max_reqs); /* max requests */ 1559 *p++ = cpu_to_be32(args->fc_attrs.max_reqs); /* max requests */
1592 WRITE32(0); /* rdmachannel_attrs */ 1560 *p++ = cpu_to_be32(0); /* rdmachannel_attrs */
1593 1561
1594 /* Back Channel */ 1562 /* Back Channel */
1595 WRITE32(args->fc_attrs.headerpadsz); /* header padding size */ 1563 *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */
1596 WRITE32(args->bc_attrs.max_rqst_sz); /* max req size */ 1564 *p++ = cpu_to_be32(args->bc_attrs.max_rqst_sz); /* max req size */
1597 WRITE32(args->bc_attrs.max_resp_sz); /* max resp size */ 1565 *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz); /* max resp size */
1598 WRITE32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */ 1566 *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */
1599 WRITE32(args->bc_attrs.max_ops); /* max operations */ 1567 *p++ = cpu_to_be32(args->bc_attrs.max_ops); /* max operations */
1600 WRITE32(args->bc_attrs.max_reqs); /* max requests */ 1568 *p++ = cpu_to_be32(args->bc_attrs.max_reqs); /* max requests */
1601 WRITE32(0); /* rdmachannel_attrs */ 1569 *p++ = cpu_to_be32(0); /* rdmachannel_attrs */
1602 1570
1603 RESERVE_SPACE(4); 1571 *p++ = cpu_to_be32(args->cb_program); /* cb_program */
1604 WRITE32(args->cb_program); /* cb_program */ 1572 *p++ = cpu_to_be32(1);
1605 1573 *p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */
1606 RESERVE_SPACE(4); /* # of security flavors */
1607 WRITE32(1);
1608
1609 RESERVE_SPACE(4);
1610 WRITE32(RPC_AUTH_UNIX); /* auth_sys */
1611 1574
1612 /* authsys_parms rfc1831 */ 1575 /* authsys_parms rfc1831 */
1613 RESERVE_SPACE(4); 1576 *p++ = cpu_to_be32((u32)clp->cl_boot_time.tv_nsec); /* stamp */
1614 WRITE32((u32)clp->cl_boot_time.tv_nsec); /* stamp */ 1577 p = xdr_encode_opaque(p, machine_name, len);
1615 len = scnprintf(machine_name, sizeof(machine_name), "%s", 1578 *p++ = cpu_to_be32(0); /* UID */
1616 clp->cl_ipaddr); 1579 *p++ = cpu_to_be32(0); /* GID */
1617 RESERVE_SPACE(16 + len); 1580 *p = cpu_to_be32(0); /* No more gids */
1618 WRITE32(len);
1619 WRITEMEM(machine_name, len);
1620 WRITE32(0); /* UID */
1621 WRITE32(0); /* GID */
1622 WRITE32(0); /* No more gids */
1623 hdr->nops++; 1581 hdr->nops++;
1624 hdr->replen += decode_create_session_maxsz; 1582 hdr->replen += decode_create_session_maxsz;
1625} 1583}
@@ -1629,9 +1587,9 @@ static void encode_destroy_session(struct xdr_stream *xdr,
1629 struct compound_hdr *hdr) 1587 struct compound_hdr *hdr)
1630{ 1588{
1631 __be32 *p; 1589 __be32 *p;
1632 RESERVE_SPACE(4 + NFS4_MAX_SESSIONID_LEN); 1590 p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN);
1633 WRITE32(OP_DESTROY_SESSION); 1591 *p++ = cpu_to_be32(OP_DESTROY_SESSION);
1634 WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN); 1592 xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1635 hdr->nops++; 1593 hdr->nops++;
1636 hdr->replen += decode_destroy_session_maxsz; 1594 hdr->replen += decode_destroy_session_maxsz;
1637} 1595}
@@ -1655,8 +1613,8 @@ static void encode_sequence(struct xdr_stream *xdr,
1655 WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE); 1613 WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE);
1656 slot = tp->slots + args->sa_slotid; 1614 slot = tp->slots + args->sa_slotid;
1657 1615
1658 RESERVE_SPACE(4); 1616 p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN + 16);
1659 WRITE32(OP_SEQUENCE); 1617 *p++ = cpu_to_be32(OP_SEQUENCE);
1660 1618
1661 /* 1619 /*
1662 * Sessionid + seqid + slotid + max slotid + cache_this 1620 * Sessionid + seqid + slotid + max slotid + cache_this
@@ -1670,12 +1628,11 @@ static void encode_sequence(struct xdr_stream *xdr,
1670 ((u32 *)session->sess_id.data)[3], 1628 ((u32 *)session->sess_id.data)[3],
1671 slot->seq_nr, args->sa_slotid, 1629 slot->seq_nr, args->sa_slotid,
1672 tp->highest_used_slotid, args->sa_cache_this); 1630 tp->highest_used_slotid, args->sa_cache_this);
1673 RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 16); 1631 p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1674 WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN); 1632 *p++ = cpu_to_be32(slot->seq_nr);
1675 WRITE32(slot->seq_nr); 1633 *p++ = cpu_to_be32(args->sa_slotid);
1676 WRITE32(args->sa_slotid); 1634 *p++ = cpu_to_be32(tp->highest_used_slotid);
1677 WRITE32(tp->highest_used_slotid); 1635 *p = cpu_to_be32(args->sa_cache_this);
1678 WRITE32(args->sa_cache_this);
1679 hdr->nops++; 1636 hdr->nops++;
1680 hdr->replen += decode_sequence_maxsz; 1637 hdr->replen += decode_sequence_maxsz;
1681#endif /* CONFIG_NFS_V4_1 */ 1638#endif /* CONFIG_NFS_V4_1 */
@@ -2466,68 +2423,53 @@ static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p,
2466} 2423}
2467#endif /* CONFIG_NFS_V4_1 */ 2424#endif /* CONFIG_NFS_V4_1 */
2468 2425
2469/* 2426static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
2470 * START OF "GENERIC" DECODE ROUTINES. 2427{
2471 * These may look a little ugly since they are imported from a "generic" 2428 dprintk("nfs: %s: prematurely hit end of receive buffer. "
2472 * set of XDR encode/decode routines which are intended to be shared by 2429 "Remaining buffer length is %tu words.\n",
2473 * all of our NFSv4 implementations (OpenBSD, MacOS X...). 2430 func, xdr->end - xdr->p);
2474 * 2431}
2475 * If the pain of reading these is too great, it should be a straightforward
2476 * task to translate them into Linux-specific versions which are more
2477 * consistent with the style used in NFSv2/v3...
2478 */
2479#define READ32(x) (x) = ntohl(*p++)
2480#define READ64(x) do { \
2481 (x) = (u64)ntohl(*p++) << 32; \
2482 (x) |= ntohl(*p++); \
2483} while (0)
2484#define READTIME(x) do { \
2485 p++; \
2486 (x.tv_sec) = ntohl(*p++); \
2487 (x.tv_nsec) = ntohl(*p++); \
2488} while (0)
2489#define COPYMEM(x,nbytes) do { \
2490 memcpy((x), p, nbytes); \
2491 p += XDR_QUADLEN(nbytes); \
2492} while (0)
2493
2494#define READ_BUF(nbytes) do { \
2495 p = xdr_inline_decode(xdr, nbytes); \
2496 if (unlikely(!p)) { \
2497 dprintk("nfs: %s: prematurely hit end of receive" \
2498 " buffer\n", __func__); \
2499 dprintk("nfs: %s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \
2500 __func__, xdr->p, nbytes, xdr->end); \
2501 return -EIO; \
2502 } \
2503} while (0)
2504 2432
2505static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string) 2433static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string)
2506{ 2434{
2507 __be32 *p; 2435 __be32 *p;
2508 2436
2509 READ_BUF(4); 2437 p = xdr_inline_decode(xdr, 4);
2510 READ32(*len); 2438 if (unlikely(!p))
2511 READ_BUF(*len); 2439 goto out_overflow;
2440 *len = be32_to_cpup(p);
2441 p = xdr_inline_decode(xdr, *len);
2442 if (unlikely(!p))
2443 goto out_overflow;
2512 *string = (char *)p; 2444 *string = (char *)p;
2513 return 0; 2445 return 0;
2446out_overflow:
2447 print_overflow_msg(__func__, xdr);
2448 return -EIO;
2514} 2449}
2515 2450
2516static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) 2451static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
2517{ 2452{
2518 __be32 *p; 2453 __be32 *p;
2519 2454
2520 READ_BUF(8); 2455 p = xdr_inline_decode(xdr, 8);
2521 READ32(hdr->status); 2456 if (unlikely(!p))
2522 READ32(hdr->taglen); 2457 goto out_overflow;
2458 hdr->status = be32_to_cpup(p++);
2459 hdr->taglen = be32_to_cpup(p);
2523 2460
2524 READ_BUF(hdr->taglen + 4); 2461 p = xdr_inline_decode(xdr, hdr->taglen + 4);
2462 if (unlikely(!p))
2463 goto out_overflow;
2525 hdr->tag = (char *)p; 2464 hdr->tag = (char *)p;
2526 p += XDR_QUADLEN(hdr->taglen); 2465 p += XDR_QUADLEN(hdr->taglen);
2527 READ32(hdr->nops); 2466 hdr->nops = be32_to_cpup(p);
2528 if (unlikely(hdr->nops < 1)) 2467 if (unlikely(hdr->nops < 1))
2529 return nfs4_stat_to_errno(hdr->status); 2468 return nfs4_stat_to_errno(hdr->status);
2530 return 0; 2469 return 0;
2470out_overflow:
2471 print_overflow_msg(__func__, xdr);
2472 return -EIO;
2531} 2473}
2532 2474
2533static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) 2475static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
@@ -2536,18 +2478,23 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
2536 uint32_t opnum; 2478 uint32_t opnum;
2537 int32_t nfserr; 2479 int32_t nfserr;
2538 2480
2539 READ_BUF(8); 2481 p = xdr_inline_decode(xdr, 8);
2540 READ32(opnum); 2482 if (unlikely(!p))
2483 goto out_overflow;
2484 opnum = be32_to_cpup(p++);
2541 if (opnum != expected) { 2485 if (opnum != expected) {
2542 dprintk("nfs: Server returned operation" 2486 dprintk("nfs: Server returned operation"
2543 " %d but we issued a request for %d\n", 2487 " %d but we issued a request for %d\n",
2544 opnum, expected); 2488 opnum, expected);
2545 return -EIO; 2489 return -EIO;
2546 } 2490 }
2547 READ32(nfserr); 2491 nfserr = be32_to_cpup(p);
2548 if (nfserr != NFS_OK) 2492 if (nfserr != NFS_OK)
2549 return nfs4_stat_to_errno(nfserr); 2493 return nfs4_stat_to_errno(nfserr);
2550 return 0; 2494 return 0;
2495out_overflow:
2496 print_overflow_msg(__func__, xdr);
2497 return -EIO;
2551} 2498}
2552 2499
2553/* Dummy routine */ 2500/* Dummy routine */
@@ -2557,8 +2504,11 @@ static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp)
2557 unsigned int strlen; 2504 unsigned int strlen;
2558 char *str; 2505 char *str;
2559 2506
2560 READ_BUF(12); 2507 p = xdr_inline_decode(xdr, 12);
2561 return decode_opaque_inline(xdr, &strlen, &str); 2508 if (likely(p))
2509 return decode_opaque_inline(xdr, &strlen, &str);
2510 print_overflow_msg(__func__, xdr);
2511 return -EIO;
2562} 2512}
2563 2513
2564static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) 2514static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
@@ -2566,27 +2516,39 @@ static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
2566 uint32_t bmlen; 2516 uint32_t bmlen;
2567 __be32 *p; 2517 __be32 *p;
2568 2518
2569 READ_BUF(4); 2519 p = xdr_inline_decode(xdr, 4);
2570 READ32(bmlen); 2520 if (unlikely(!p))
2521 goto out_overflow;
2522 bmlen = be32_to_cpup(p);
2571 2523
2572 bitmap[0] = bitmap[1] = 0; 2524 bitmap[0] = bitmap[1] = 0;
2573 READ_BUF((bmlen << 2)); 2525 p = xdr_inline_decode(xdr, (bmlen << 2));
2526 if (unlikely(!p))
2527 goto out_overflow;
2574 if (bmlen > 0) { 2528 if (bmlen > 0) {
2575 READ32(bitmap[0]); 2529 bitmap[0] = be32_to_cpup(p++);
2576 if (bmlen > 1) 2530 if (bmlen > 1)
2577 READ32(bitmap[1]); 2531 bitmap[1] = be32_to_cpup(p);
2578 } 2532 }
2579 return 0; 2533 return 0;
2534out_overflow:
2535 print_overflow_msg(__func__, xdr);
2536 return -EIO;
2580} 2537}
2581 2538
2582static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, __be32 **savep) 2539static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, __be32 **savep)
2583{ 2540{
2584 __be32 *p; 2541 __be32 *p;
2585 2542
2586 READ_BUF(4); 2543 p = xdr_inline_decode(xdr, 4);
2587 READ32(*attrlen); 2544 if (unlikely(!p))
2545 goto out_overflow;
2546 *attrlen = be32_to_cpup(p);
2588 *savep = xdr->p; 2547 *savep = xdr->p;
2589 return 0; 2548 return 0;
2549out_overflow:
2550 print_overflow_msg(__func__, xdr);
2551 return -EIO;
2590} 2552}
2591 2553
2592static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask) 2554static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask)
@@ -2609,8 +2571,10 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
2609 if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U))) 2571 if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U)))
2610 return -EIO; 2572 return -EIO;
2611 if (likely(bitmap[0] & FATTR4_WORD0_TYPE)) { 2573 if (likely(bitmap[0] & FATTR4_WORD0_TYPE)) {
2612 READ_BUF(4); 2574 p = xdr_inline_decode(xdr, 4);
2613 READ32(*type); 2575 if (unlikely(!p))
2576 goto out_overflow;
2577 *type = be32_to_cpup(p);
2614 if (*type < NF4REG || *type > NF4NAMEDATTR) { 2578 if (*type < NF4REG || *type > NF4NAMEDATTR) {
2615 dprintk("%s: bad type %d\n", __func__, *type); 2579 dprintk("%s: bad type %d\n", __func__, *type);
2616 return -EIO; 2580 return -EIO;
@@ -2620,6 +2584,9 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
2620 } 2584 }
2621 dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]); 2585 dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]);
2622 return ret; 2586 return ret;
2587out_overflow:
2588 print_overflow_msg(__func__, xdr);
2589 return -EIO;
2623} 2590}
2624 2591
2625static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change) 2592static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change)
@@ -2631,14 +2598,19 @@ static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
2631 if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U))) 2598 if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U)))
2632 return -EIO; 2599 return -EIO;
2633 if (likely(bitmap[0] & FATTR4_WORD0_CHANGE)) { 2600 if (likely(bitmap[0] & FATTR4_WORD0_CHANGE)) {
2634 READ_BUF(8); 2601 p = xdr_inline_decode(xdr, 8);
2635 READ64(*change); 2602 if (unlikely(!p))
2603 goto out_overflow;
2604 xdr_decode_hyper(p, change);
2636 bitmap[0] &= ~FATTR4_WORD0_CHANGE; 2605 bitmap[0] &= ~FATTR4_WORD0_CHANGE;
2637 ret = NFS_ATTR_FATTR_CHANGE; 2606 ret = NFS_ATTR_FATTR_CHANGE;
2638 } 2607 }
2639 dprintk("%s: change attribute=%Lu\n", __func__, 2608 dprintk("%s: change attribute=%Lu\n", __func__,
2640 (unsigned long long)*change); 2609 (unsigned long long)*change);
2641 return ret; 2610 return ret;
2611out_overflow:
2612 print_overflow_msg(__func__, xdr);
2613 return -EIO;
2642} 2614}
2643 2615
2644static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size) 2616static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size)
@@ -2650,13 +2622,18 @@ static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *
2650 if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U))) 2622 if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U)))
2651 return -EIO; 2623 return -EIO;
2652 if (likely(bitmap[0] & FATTR4_WORD0_SIZE)) { 2624 if (likely(bitmap[0] & FATTR4_WORD0_SIZE)) {
2653 READ_BUF(8); 2625 p = xdr_inline_decode(xdr, 8);
2654 READ64(*size); 2626 if (unlikely(!p))
2627 goto out_overflow;
2628 xdr_decode_hyper(p, size);
2655 bitmap[0] &= ~FATTR4_WORD0_SIZE; 2629 bitmap[0] &= ~FATTR4_WORD0_SIZE;
2656 ret = NFS_ATTR_FATTR_SIZE; 2630 ret = NFS_ATTR_FATTR_SIZE;
2657 } 2631 }
2658 dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size); 2632 dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size);
2659 return ret; 2633 return ret;
2634out_overflow:
2635 print_overflow_msg(__func__, xdr);
2636 return -EIO;
2660} 2637}
2661 2638
2662static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 2639static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2667,12 +2644,17 @@ static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, ui
2667 if (unlikely(bitmap[0] & (FATTR4_WORD0_LINK_SUPPORT - 1U))) 2644 if (unlikely(bitmap[0] & (FATTR4_WORD0_LINK_SUPPORT - 1U)))
2668 return -EIO; 2645 return -EIO;
2669 if (likely(bitmap[0] & FATTR4_WORD0_LINK_SUPPORT)) { 2646 if (likely(bitmap[0] & FATTR4_WORD0_LINK_SUPPORT)) {
2670 READ_BUF(4); 2647 p = xdr_inline_decode(xdr, 4);
2671 READ32(*res); 2648 if (unlikely(!p))
2649 goto out_overflow;
2650 *res = be32_to_cpup(p);
2672 bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT; 2651 bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT;
2673 } 2652 }
2674 dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true"); 2653 dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true");
2675 return 0; 2654 return 0;
2655out_overflow:
2656 print_overflow_msg(__func__, xdr);
2657 return -EIO;
2676} 2658}
2677 2659
2678static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 2660static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2683,12 +2665,17 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap,
2683 if (unlikely(bitmap[0] & (FATTR4_WORD0_SYMLINK_SUPPORT - 1U))) 2665 if (unlikely(bitmap[0] & (FATTR4_WORD0_SYMLINK_SUPPORT - 1U)))
2684 return -EIO; 2666 return -EIO;
2685 if (likely(bitmap[0] & FATTR4_WORD0_SYMLINK_SUPPORT)) { 2667 if (likely(bitmap[0] & FATTR4_WORD0_SYMLINK_SUPPORT)) {
2686 READ_BUF(4); 2668 p = xdr_inline_decode(xdr, 4);
2687 READ32(*res); 2669 if (unlikely(!p))
2670 goto out_overflow;
2671 *res = be32_to_cpup(p);
2688 bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT; 2672 bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT;
2689 } 2673 }
2690 dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true"); 2674 dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true");
2691 return 0; 2675 return 0;
2676out_overflow:
2677 print_overflow_msg(__func__, xdr);
2678 return -EIO;
2692} 2679}
2693 2680
2694static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid) 2681static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid)
@@ -2701,9 +2688,11 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
2701 if (unlikely(bitmap[0] & (FATTR4_WORD0_FSID - 1U))) 2688 if (unlikely(bitmap[0] & (FATTR4_WORD0_FSID - 1U)))
2702 return -EIO; 2689 return -EIO;
2703 if (likely(bitmap[0] & FATTR4_WORD0_FSID)) { 2690 if (likely(bitmap[0] & FATTR4_WORD0_FSID)) {
2704 READ_BUF(16); 2691 p = xdr_inline_decode(xdr, 16);
2705 READ64(fsid->major); 2692 if (unlikely(!p))
2706 READ64(fsid->minor); 2693 goto out_overflow;
2694 p = xdr_decode_hyper(p, &fsid->major);
2695 xdr_decode_hyper(p, &fsid->minor);
2707 bitmap[0] &= ~FATTR4_WORD0_FSID; 2696 bitmap[0] &= ~FATTR4_WORD0_FSID;
2708 ret = NFS_ATTR_FATTR_FSID; 2697 ret = NFS_ATTR_FATTR_FSID;
2709 } 2698 }
@@ -2711,6 +2700,9 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
2711 (unsigned long long)fsid->major, 2700 (unsigned long long)fsid->major,
2712 (unsigned long long)fsid->minor); 2701 (unsigned long long)fsid->minor);
2713 return ret; 2702 return ret;
2703out_overflow:
2704 print_overflow_msg(__func__, xdr);
2705 return -EIO;
2714} 2706}
2715 2707
2716static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 2708static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2721,12 +2713,17 @@ static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint
2721 if (unlikely(bitmap[0] & (FATTR4_WORD0_LEASE_TIME - 1U))) 2713 if (unlikely(bitmap[0] & (FATTR4_WORD0_LEASE_TIME - 1U)))
2722 return -EIO; 2714 return -EIO;
2723 if (likely(bitmap[0] & FATTR4_WORD0_LEASE_TIME)) { 2715 if (likely(bitmap[0] & FATTR4_WORD0_LEASE_TIME)) {
2724 READ_BUF(4); 2716 p = xdr_inline_decode(xdr, 4);
2725 READ32(*res); 2717 if (unlikely(!p))
2718 goto out_overflow;
2719 *res = be32_to_cpup(p);
2726 bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME; 2720 bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME;
2727 } 2721 }
2728 dprintk("%s: file size=%u\n", __func__, (unsigned int)*res); 2722 dprintk("%s: file size=%u\n", __func__, (unsigned int)*res);
2729 return 0; 2723 return 0;
2724out_overflow:
2725 print_overflow_msg(__func__, xdr);
2726 return -EIO;
2730} 2727}
2731 2728
2732static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 2729static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2737,12 +2734,17 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint
2737 if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U))) 2734 if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U)))
2738 return -EIO; 2735 return -EIO;
2739 if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) { 2736 if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) {
2740 READ_BUF(4); 2737 p = xdr_inline_decode(xdr, 4);
2741 READ32(*res); 2738 if (unlikely(!p))
2739 goto out_overflow;
2740 *res = be32_to_cpup(p);
2742 bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT; 2741 bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT;
2743 } 2742 }
2744 dprintk("%s: ACLs supported=%u\n", __func__, (unsigned int)*res); 2743 dprintk("%s: ACLs supported=%u\n", __func__, (unsigned int)*res);
2745 return 0; 2744 return 0;
2745out_overflow:
2746 print_overflow_msg(__func__, xdr);
2747 return -EIO;
2746} 2748}
2747 2749
2748static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) 2750static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
@@ -2754,13 +2756,18 @@ static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
2754 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U))) 2756 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U)))
2755 return -EIO; 2757 return -EIO;
2756 if (likely(bitmap[0] & FATTR4_WORD0_FILEID)) { 2758 if (likely(bitmap[0] & FATTR4_WORD0_FILEID)) {
2757 READ_BUF(8); 2759 p = xdr_inline_decode(xdr, 8);
2758 READ64(*fileid); 2760 if (unlikely(!p))
2761 goto out_overflow;
2762 xdr_decode_hyper(p, fileid);
2759 bitmap[0] &= ~FATTR4_WORD0_FILEID; 2763 bitmap[0] &= ~FATTR4_WORD0_FILEID;
2760 ret = NFS_ATTR_FATTR_FILEID; 2764 ret = NFS_ATTR_FATTR_FILEID;
2761 } 2765 }
2762 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); 2766 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
2763 return ret; 2767 return ret;
2768out_overflow:
2769 print_overflow_msg(__func__, xdr);
2770 return -EIO;
2764} 2771}
2765 2772
2766static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) 2773static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
@@ -2772,13 +2779,18 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma
2772 if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U))) 2779 if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U)))
2773 return -EIO; 2780 return -EIO;
2774 if (likely(bitmap[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) { 2781 if (likely(bitmap[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) {
2775 READ_BUF(8); 2782 p = xdr_inline_decode(xdr, 8);
2776 READ64(*fileid); 2783 if (unlikely(!p))
2784 goto out_overflow;
2785 xdr_decode_hyper(p, fileid);
2777 bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; 2786 bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
2778 ret = NFS_ATTR_FATTR_FILEID; 2787 ret = NFS_ATTR_FATTR_FILEID;
2779 } 2788 }
2780 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); 2789 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
2781 return ret; 2790 return ret;
2791out_overflow:
2792 print_overflow_msg(__func__, xdr);
2793 return -EIO;
2782} 2794}
2783 2795
2784static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 2796static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2790,12 +2802,17 @@ static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin
2790 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_AVAIL - 1U))) 2802 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_AVAIL - 1U)))
2791 return -EIO; 2803 return -EIO;
2792 if (likely(bitmap[0] & FATTR4_WORD0_FILES_AVAIL)) { 2804 if (likely(bitmap[0] & FATTR4_WORD0_FILES_AVAIL)) {
2793 READ_BUF(8); 2805 p = xdr_inline_decode(xdr, 8);
2794 READ64(*res); 2806 if (unlikely(!p))
2807 goto out_overflow;
2808 xdr_decode_hyper(p, res);
2795 bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL; 2809 bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL;
2796 } 2810 }
2797 dprintk("%s: files avail=%Lu\n", __func__, (unsigned long long)*res); 2811 dprintk("%s: files avail=%Lu\n", __func__, (unsigned long long)*res);
2798 return status; 2812 return status;
2813out_overflow:
2814 print_overflow_msg(__func__, xdr);
2815 return -EIO;
2799} 2816}
2800 2817
2801static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 2818static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2807,12 +2824,17 @@ static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint
2807 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_FREE - 1U))) 2824 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_FREE - 1U)))
2808 return -EIO; 2825 return -EIO;
2809 if (likely(bitmap[0] & FATTR4_WORD0_FILES_FREE)) { 2826 if (likely(bitmap[0] & FATTR4_WORD0_FILES_FREE)) {
2810 READ_BUF(8); 2827 p = xdr_inline_decode(xdr, 8);
2811 READ64(*res); 2828 if (unlikely(!p))
2829 goto out_overflow;
2830 xdr_decode_hyper(p, res);
2812 bitmap[0] &= ~FATTR4_WORD0_FILES_FREE; 2831 bitmap[0] &= ~FATTR4_WORD0_FILES_FREE;
2813 } 2832 }
2814 dprintk("%s: files free=%Lu\n", __func__, (unsigned long long)*res); 2833 dprintk("%s: files free=%Lu\n", __func__, (unsigned long long)*res);
2815 return status; 2834 return status;
2835out_overflow:
2836 print_overflow_msg(__func__, xdr);
2837 return -EIO;
2816} 2838}
2817 2839
2818static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 2840static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2824,12 +2846,17 @@ static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
2824 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_TOTAL - 1U))) 2846 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_TOTAL - 1U)))
2825 return -EIO; 2847 return -EIO;
2826 if (likely(bitmap[0] & FATTR4_WORD0_FILES_TOTAL)) { 2848 if (likely(bitmap[0] & FATTR4_WORD0_FILES_TOTAL)) {
2827 READ_BUF(8); 2849 p = xdr_inline_decode(xdr, 8);
2828 READ64(*res); 2850 if (unlikely(!p))
2851 goto out_overflow;
2852 xdr_decode_hyper(p, res);
2829 bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL; 2853 bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL;
2830 } 2854 }
2831 dprintk("%s: files total=%Lu\n", __func__, (unsigned long long)*res); 2855 dprintk("%s: files total=%Lu\n", __func__, (unsigned long long)*res);
2832 return status; 2856 return status;
2857out_overflow:
2858 print_overflow_msg(__func__, xdr);
2859 return -EIO;
2833} 2860}
2834 2861
2835static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) 2862static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
@@ -2838,8 +2865,10 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
2838 __be32 *p; 2865 __be32 *p;
2839 int status = 0; 2866 int status = 0;
2840 2867
2841 READ_BUF(4); 2868 p = xdr_inline_decode(xdr, 4);
2842 READ32(n); 2869 if (unlikely(!p))
2870 goto out_overflow;
2871 n = be32_to_cpup(p);
2843 if (n == 0) 2872 if (n == 0)
2844 goto root_path; 2873 goto root_path;
2845 dprintk("path "); 2874 dprintk("path ");
@@ -2873,6 +2902,9 @@ out_eio:
2873 dprintk(" status %d", status); 2902 dprintk(" status %d", status);
2874 status = -EIO; 2903 status = -EIO;
2875 goto out; 2904 goto out;
2905out_overflow:
2906 print_overflow_msg(__func__, xdr);
2907 return -EIO;
2876} 2908}
2877 2909
2878static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res) 2910static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res)
@@ -2890,8 +2922,10 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
2890 status = decode_pathname(xdr, &res->fs_path); 2922 status = decode_pathname(xdr, &res->fs_path);
2891 if (unlikely(status != 0)) 2923 if (unlikely(status != 0))
2892 goto out; 2924 goto out;
2893 READ_BUF(4); 2925 p = xdr_inline_decode(xdr, 4);
2894 READ32(n); 2926 if (unlikely(!p))
2927 goto out_overflow;
2928 n = be32_to_cpup(p);
2895 if (n <= 0) 2929 if (n <= 0)
2896 goto out_eio; 2930 goto out_eio;
2897 res->nlocations = 0; 2931 res->nlocations = 0;
@@ -2899,8 +2933,10 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
2899 u32 m; 2933 u32 m;
2900 struct nfs4_fs_location *loc = &res->locations[res->nlocations]; 2934 struct nfs4_fs_location *loc = &res->locations[res->nlocations];
2901 2935
2902 READ_BUF(4); 2936 p = xdr_inline_decode(xdr, 4);
2903 READ32(m); 2937 if (unlikely(!p))
2938 goto out_overflow;
2939 m = be32_to_cpup(p);
2904 2940
2905 loc->nservers = 0; 2941 loc->nservers = 0;
2906 dprintk("%s: servers ", __func__); 2942 dprintk("%s: servers ", __func__);
@@ -2939,6 +2975,8 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
2939out: 2975out:
2940 dprintk("%s: fs_locations done, error = %d\n", __func__, status); 2976 dprintk("%s: fs_locations done, error = %d\n", __func__, status);
2941 return status; 2977 return status;
2978out_overflow:
2979 print_overflow_msg(__func__, xdr);
2942out_eio: 2980out_eio:
2943 status = -EIO; 2981 status = -EIO;
2944 goto out; 2982 goto out;
@@ -2953,12 +2991,17 @@ static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uin
2953 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXFILESIZE - 1U))) 2991 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXFILESIZE - 1U)))
2954 return -EIO; 2992 return -EIO;
2955 if (likely(bitmap[0] & FATTR4_WORD0_MAXFILESIZE)) { 2993 if (likely(bitmap[0] & FATTR4_WORD0_MAXFILESIZE)) {
2956 READ_BUF(8); 2994 p = xdr_inline_decode(xdr, 8);
2957 READ64(*res); 2995 if (unlikely(!p))
2996 goto out_overflow;
2997 xdr_decode_hyper(p, res);
2958 bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE; 2998 bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE;
2959 } 2999 }
2960 dprintk("%s: maxfilesize=%Lu\n", __func__, (unsigned long long)*res); 3000 dprintk("%s: maxfilesize=%Lu\n", __func__, (unsigned long long)*res);
2961 return status; 3001 return status;
3002out_overflow:
3003 print_overflow_msg(__func__, xdr);
3004 return -EIO;
2962} 3005}
2963 3006
2964static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink) 3007static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink)
@@ -2970,12 +3013,17 @@ static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
2970 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXLINK - 1U))) 3013 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXLINK - 1U)))
2971 return -EIO; 3014 return -EIO;
2972 if (likely(bitmap[0] & FATTR4_WORD0_MAXLINK)) { 3015 if (likely(bitmap[0] & FATTR4_WORD0_MAXLINK)) {
2973 READ_BUF(4); 3016 p = xdr_inline_decode(xdr, 4);
2974 READ32(*maxlink); 3017 if (unlikely(!p))
3018 goto out_overflow;
3019 *maxlink = be32_to_cpup(p);
2975 bitmap[0] &= ~FATTR4_WORD0_MAXLINK; 3020 bitmap[0] &= ~FATTR4_WORD0_MAXLINK;
2976 } 3021 }
2977 dprintk("%s: maxlink=%u\n", __func__, *maxlink); 3022 dprintk("%s: maxlink=%u\n", __func__, *maxlink);
2978 return status; 3023 return status;
3024out_overflow:
3025 print_overflow_msg(__func__, xdr);
3026 return -EIO;
2979} 3027}
2980 3028
2981static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname) 3029static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname)
@@ -2987,12 +3035,17 @@ static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
2987 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXNAME - 1U))) 3035 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXNAME - 1U)))
2988 return -EIO; 3036 return -EIO;
2989 if (likely(bitmap[0] & FATTR4_WORD0_MAXNAME)) { 3037 if (likely(bitmap[0] & FATTR4_WORD0_MAXNAME)) {
2990 READ_BUF(4); 3038 p = xdr_inline_decode(xdr, 4);
2991 READ32(*maxname); 3039 if (unlikely(!p))
3040 goto out_overflow;
3041 *maxname = be32_to_cpup(p);
2992 bitmap[0] &= ~FATTR4_WORD0_MAXNAME; 3042 bitmap[0] &= ~FATTR4_WORD0_MAXNAME;
2993 } 3043 }
2994 dprintk("%s: maxname=%u\n", __func__, *maxname); 3044 dprintk("%s: maxname=%u\n", __func__, *maxname);
2995 return status; 3045 return status;
3046out_overflow:
3047 print_overflow_msg(__func__, xdr);
3048 return -EIO;
2996} 3049}
2997 3050
2998static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 3051static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3005,8 +3058,10 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
3005 return -EIO; 3058 return -EIO;
3006 if (likely(bitmap[0] & FATTR4_WORD0_MAXREAD)) { 3059 if (likely(bitmap[0] & FATTR4_WORD0_MAXREAD)) {
3007 uint64_t maxread; 3060 uint64_t maxread;
3008 READ_BUF(8); 3061 p = xdr_inline_decode(xdr, 8);
3009 READ64(maxread); 3062 if (unlikely(!p))
3063 goto out_overflow;
3064 xdr_decode_hyper(p, &maxread);
3010 if (maxread > 0x7FFFFFFF) 3065 if (maxread > 0x7FFFFFFF)
3011 maxread = 0x7FFFFFFF; 3066 maxread = 0x7FFFFFFF;
3012 *res = (uint32_t)maxread; 3067 *res = (uint32_t)maxread;
@@ -3014,6 +3069,9 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
3014 } 3069 }
3015 dprintk("%s: maxread=%lu\n", __func__, (unsigned long)*res); 3070 dprintk("%s: maxread=%lu\n", __func__, (unsigned long)*res);
3016 return status; 3071 return status;
3072out_overflow:
3073 print_overflow_msg(__func__, xdr);
3074 return -EIO;
3017} 3075}
3018 3076
3019static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 3077static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3026,8 +3084,10 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
3026 return -EIO; 3084 return -EIO;
3027 if (likely(bitmap[0] & FATTR4_WORD0_MAXWRITE)) { 3085 if (likely(bitmap[0] & FATTR4_WORD0_MAXWRITE)) {
3028 uint64_t maxwrite; 3086 uint64_t maxwrite;
3029 READ_BUF(8); 3087 p = xdr_inline_decode(xdr, 8);
3030 READ64(maxwrite); 3088 if (unlikely(!p))
3089 goto out_overflow;
3090 xdr_decode_hyper(p, &maxwrite);
3031 if (maxwrite > 0x7FFFFFFF) 3091 if (maxwrite > 0x7FFFFFFF)
3032 maxwrite = 0x7FFFFFFF; 3092 maxwrite = 0x7FFFFFFF;
3033 *res = (uint32_t)maxwrite; 3093 *res = (uint32_t)maxwrite;
@@ -3035,6 +3095,9 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
3035 } 3095 }
3036 dprintk("%s: maxwrite=%lu\n", __func__, (unsigned long)*res); 3096 dprintk("%s: maxwrite=%lu\n", __func__, (unsigned long)*res);
3037 return status; 3097 return status;
3098out_overflow:
3099 print_overflow_msg(__func__, xdr);
3100 return -EIO;
3038} 3101}
3039 3102
3040static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode) 3103static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode)
@@ -3047,14 +3110,19 @@ static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *m
3047 if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U))) 3110 if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U)))
3048 return -EIO; 3111 return -EIO;
3049 if (likely(bitmap[1] & FATTR4_WORD1_MODE)) { 3112 if (likely(bitmap[1] & FATTR4_WORD1_MODE)) {
3050 READ_BUF(4); 3113 p = xdr_inline_decode(xdr, 4);
3051 READ32(tmp); 3114 if (unlikely(!p))
3115 goto out_overflow;
3116 tmp = be32_to_cpup(p);
3052 *mode = tmp & ~S_IFMT; 3117 *mode = tmp & ~S_IFMT;
3053 bitmap[1] &= ~FATTR4_WORD1_MODE; 3118 bitmap[1] &= ~FATTR4_WORD1_MODE;
3054 ret = NFS_ATTR_FATTR_MODE; 3119 ret = NFS_ATTR_FATTR_MODE;
3055 } 3120 }
3056 dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode); 3121 dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode);
3057 return ret; 3122 return ret;
3123out_overflow:
3124 print_overflow_msg(__func__, xdr);
3125 return -EIO;
3058} 3126}
3059 3127
3060static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink) 3128static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink)
@@ -3066,16 +3134,22 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
3066 if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U))) 3134 if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U)))
3067 return -EIO; 3135 return -EIO;
3068 if (likely(bitmap[1] & FATTR4_WORD1_NUMLINKS)) { 3136 if (likely(bitmap[1] & FATTR4_WORD1_NUMLINKS)) {
3069 READ_BUF(4); 3137 p = xdr_inline_decode(xdr, 4);
3070 READ32(*nlink); 3138 if (unlikely(!p))
3139 goto out_overflow;
3140 *nlink = be32_to_cpup(p);
3071 bitmap[1] &= ~FATTR4_WORD1_NUMLINKS; 3141 bitmap[1] &= ~FATTR4_WORD1_NUMLINKS;
3072 ret = NFS_ATTR_FATTR_NLINK; 3142 ret = NFS_ATTR_FATTR_NLINK;
3073 } 3143 }
3074 dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink); 3144 dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink);
3075 return ret; 3145 return ret;
3146out_overflow:
3147 print_overflow_msg(__func__, xdr);
3148 return -EIO;
3076} 3149}
3077 3150
3078static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *uid) 3151static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
3152 struct nfs_client *clp, uint32_t *uid, int may_sleep)
3079{ 3153{
3080 uint32_t len; 3154 uint32_t len;
3081 __be32 *p; 3155 __be32 *p;
@@ -3085,10 +3159,16 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
3085 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U))) 3159 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U)))
3086 return -EIO; 3160 return -EIO;
3087 if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) { 3161 if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) {
3088 READ_BUF(4); 3162 p = xdr_inline_decode(xdr, 4);
3089 READ32(len); 3163 if (unlikely(!p))
3090 READ_BUF(len); 3164 goto out_overflow;
3091 if (len < XDR_MAX_NETOBJ) { 3165 len = be32_to_cpup(p);
3166 p = xdr_inline_decode(xdr, len);
3167 if (unlikely(!p))
3168 goto out_overflow;
3169 if (!may_sleep) {
3170 /* do nothing */
3171 } else if (len < XDR_MAX_NETOBJ) {
3092 if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0) 3172 if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0)
3093 ret = NFS_ATTR_FATTR_OWNER; 3173 ret = NFS_ATTR_FATTR_OWNER;
3094 else 3174 else
@@ -3101,9 +3181,13 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
3101 } 3181 }
3102 dprintk("%s: uid=%d\n", __func__, (int)*uid); 3182 dprintk("%s: uid=%d\n", __func__, (int)*uid);
3103 return ret; 3183 return ret;
3184out_overflow:
3185 print_overflow_msg(__func__, xdr);
3186 return -EIO;
3104} 3187}
3105 3188
3106static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *gid) 3189static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
3190 struct nfs_client *clp, uint32_t *gid, int may_sleep)
3107{ 3191{
3108 uint32_t len; 3192 uint32_t len;
3109 __be32 *p; 3193 __be32 *p;
@@ -3113,10 +3197,16 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
3113 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U))) 3197 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U)))
3114 return -EIO; 3198 return -EIO;
3115 if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) { 3199 if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) {
3116 READ_BUF(4); 3200 p = xdr_inline_decode(xdr, 4);
3117 READ32(len); 3201 if (unlikely(!p))
3118 READ_BUF(len); 3202 goto out_overflow;
3119 if (len < XDR_MAX_NETOBJ) { 3203 len = be32_to_cpup(p);
3204 p = xdr_inline_decode(xdr, len);
3205 if (unlikely(!p))
3206 goto out_overflow;
3207 if (!may_sleep) {
3208 /* do nothing */
3209 } else if (len < XDR_MAX_NETOBJ) {
3120 if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0) 3210 if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0)
3121 ret = NFS_ATTR_FATTR_GROUP; 3211 ret = NFS_ATTR_FATTR_GROUP;
3122 else 3212 else
@@ -3129,6 +3219,9 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
3129 } 3219 }
3130 dprintk("%s: gid=%d\n", __func__, (int)*gid); 3220 dprintk("%s: gid=%d\n", __func__, (int)*gid);
3131 return ret; 3221 return ret;
3222out_overflow:
3223 print_overflow_msg(__func__, xdr);
3224 return -EIO;
3132} 3225}
3133 3226
3134static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev) 3227static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev)
@@ -3143,9 +3236,11 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde
3143 if (likely(bitmap[1] & FATTR4_WORD1_RAWDEV)) { 3236 if (likely(bitmap[1] & FATTR4_WORD1_RAWDEV)) {
3144 dev_t tmp; 3237 dev_t tmp;
3145 3238
3146 READ_BUF(8); 3239 p = xdr_inline_decode(xdr, 8);
3147 READ32(major); 3240 if (unlikely(!p))
3148 READ32(minor); 3241 goto out_overflow;
3242 major = be32_to_cpup(p++);
3243 minor = be32_to_cpup(p);
3149 tmp = MKDEV(major, minor); 3244 tmp = MKDEV(major, minor);
3150 if (MAJOR(tmp) == major && MINOR(tmp) == minor) 3245 if (MAJOR(tmp) == major && MINOR(tmp) == minor)
3151 *rdev = tmp; 3246 *rdev = tmp;
@@ -3154,6 +3249,9 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde
3154 } 3249 }
3155 dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor); 3250 dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor);
3156 return ret; 3251 return ret;
3252out_overflow:
3253 print_overflow_msg(__func__, xdr);
3254 return -EIO;
3157} 3255}
3158 3256
3159static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 3257static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3165,12 +3263,17 @@ static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin
3165 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_AVAIL - 1U))) 3263 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_AVAIL - 1U)))
3166 return -EIO; 3264 return -EIO;
3167 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_AVAIL)) { 3265 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_AVAIL)) {
3168 READ_BUF(8); 3266 p = xdr_inline_decode(xdr, 8);
3169 READ64(*res); 3267 if (unlikely(!p))
3268 goto out_overflow;
3269 xdr_decode_hyper(p, res);
3170 bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL; 3270 bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL;
3171 } 3271 }
3172 dprintk("%s: space avail=%Lu\n", __func__, (unsigned long long)*res); 3272 dprintk("%s: space avail=%Lu\n", __func__, (unsigned long long)*res);
3173 return status; 3273 return status;
3274out_overflow:
3275 print_overflow_msg(__func__, xdr);
3276 return -EIO;
3174} 3277}
3175 3278
3176static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 3279static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3182,12 +3285,17 @@ static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint
3182 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_FREE - 1U))) 3285 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_FREE - 1U)))
3183 return -EIO; 3286 return -EIO;
3184 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_FREE)) { 3287 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_FREE)) {
3185 READ_BUF(8); 3288 p = xdr_inline_decode(xdr, 8);
3186 READ64(*res); 3289 if (unlikely(!p))
3290 goto out_overflow;
3291 xdr_decode_hyper(p, res);
3187 bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE; 3292 bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE;
3188 } 3293 }
3189 dprintk("%s: space free=%Lu\n", __func__, (unsigned long long)*res); 3294 dprintk("%s: space free=%Lu\n", __func__, (unsigned long long)*res);
3190 return status; 3295 return status;
3296out_overflow:
3297 print_overflow_msg(__func__, xdr);
3298 return -EIO;
3191} 3299}
3192 3300
3193static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 3301static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3199,12 +3307,17 @@ static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
3199 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_TOTAL - 1U))) 3307 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_TOTAL - 1U)))
3200 return -EIO; 3308 return -EIO;
3201 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_TOTAL)) { 3309 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_TOTAL)) {
3202 READ_BUF(8); 3310 p = xdr_inline_decode(xdr, 8);
3203 READ64(*res); 3311 if (unlikely(!p))
3312 goto out_overflow;
3313 xdr_decode_hyper(p, res);
3204 bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL; 3314 bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL;
3205 } 3315 }
3206 dprintk("%s: space total=%Lu\n", __func__, (unsigned long long)*res); 3316 dprintk("%s: space total=%Lu\n", __func__, (unsigned long long)*res);
3207 return status; 3317 return status;
3318out_overflow:
3319 print_overflow_msg(__func__, xdr);
3320 return -EIO;
3208} 3321}
3209 3322
3210static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used) 3323static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used)
@@ -3216,14 +3329,19 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint
3216 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U))) 3329 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U)))
3217 return -EIO; 3330 return -EIO;
3218 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_USED)) { 3331 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_USED)) {
3219 READ_BUF(8); 3332 p = xdr_inline_decode(xdr, 8);
3220 READ64(*used); 3333 if (unlikely(!p))
3334 goto out_overflow;
3335 xdr_decode_hyper(p, used);
3221 bitmap[1] &= ~FATTR4_WORD1_SPACE_USED; 3336 bitmap[1] &= ~FATTR4_WORD1_SPACE_USED;
3222 ret = NFS_ATTR_FATTR_SPACE_USED; 3337 ret = NFS_ATTR_FATTR_SPACE_USED;
3223 } 3338 }
3224 dprintk("%s: space used=%Lu\n", __func__, 3339 dprintk("%s: space used=%Lu\n", __func__,
3225 (unsigned long long)*used); 3340 (unsigned long long)*used);
3226 return ret; 3341 return ret;
3342out_overflow:
3343 print_overflow_msg(__func__, xdr);
3344 return -EIO;
3227} 3345}
3228 3346
3229static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time) 3347static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
@@ -3232,12 +3350,17 @@ static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
3232 uint64_t sec; 3350 uint64_t sec;
3233 uint32_t nsec; 3351 uint32_t nsec;
3234 3352
3235 READ_BUF(12); 3353 p = xdr_inline_decode(xdr, 12);
3236 READ64(sec); 3354 if (unlikely(!p))
3237 READ32(nsec); 3355 goto out_overflow;
3356 p = xdr_decode_hyper(p, &sec);
3357 nsec = be32_to_cpup(p);
3238 time->tv_sec = (time_t)sec; 3358 time->tv_sec = (time_t)sec;
3239 time->tv_nsec = (long)nsec; 3359 time->tv_nsec = (long)nsec;
3240 return 0; 3360 return 0;
3361out_overflow:
3362 print_overflow_msg(__func__, xdr);
3363 return -EIO;
3241} 3364}
3242 3365
3243static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) 3366static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
@@ -3315,11 +3438,16 @@ static int decode_change_info(struct xdr_stream *xdr, struct nfs4_change_info *c
3315{ 3438{
3316 __be32 *p; 3439 __be32 *p;
3317 3440
3318 READ_BUF(20); 3441 p = xdr_inline_decode(xdr, 20);
3319 READ32(cinfo->atomic); 3442 if (unlikely(!p))
3320 READ64(cinfo->before); 3443 goto out_overflow;
3321 READ64(cinfo->after); 3444 cinfo->atomic = be32_to_cpup(p++);
3445 p = xdr_decode_hyper(p, &cinfo->before);
3446 xdr_decode_hyper(p, &cinfo->after);
3322 return 0; 3447 return 0;
3448out_overflow:
3449 print_overflow_msg(__func__, xdr);
3450 return -EIO;
3323} 3451}
3324 3452
3325static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access) 3453static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access)
@@ -3331,40 +3459,62 @@ static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access)
3331 status = decode_op_hdr(xdr, OP_ACCESS); 3459 status = decode_op_hdr(xdr, OP_ACCESS);
3332 if (status) 3460 if (status)
3333 return status; 3461 return status;
3334 READ_BUF(8); 3462 p = xdr_inline_decode(xdr, 8);
3335 READ32(supp); 3463 if (unlikely(!p))
3336 READ32(acc); 3464 goto out_overflow;
3465 supp = be32_to_cpup(p++);
3466 acc = be32_to_cpup(p);
3337 access->supported = supp; 3467 access->supported = supp;
3338 access->access = acc; 3468 access->access = acc;
3339 return 0; 3469 return 0;
3470out_overflow:
3471 print_overflow_msg(__func__, xdr);
3472 return -EIO;
3340} 3473}
3341 3474
3342static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) 3475static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len)
3343{ 3476{
3344 __be32 *p; 3477 __be32 *p;
3478
3479 p = xdr_inline_decode(xdr, len);
3480 if (likely(p)) {
3481 memcpy(buf, p, len);
3482 return 0;
3483 }
3484 print_overflow_msg(__func__, xdr);
3485 return -EIO;
3486}
3487
3488static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
3489{
3490 return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
3491}
3492
3493static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
3494{
3345 int status; 3495 int status;
3346 3496
3347 status = decode_op_hdr(xdr, OP_CLOSE); 3497 status = decode_op_hdr(xdr, OP_CLOSE);
3348 if (status != -EIO) 3498 if (status != -EIO)
3349 nfs_increment_open_seqid(status, res->seqid); 3499 nfs_increment_open_seqid(status, res->seqid);
3350 if (status) 3500 if (!status)
3351 return status; 3501 status = decode_stateid(xdr, &res->stateid);
3352 READ_BUF(NFS4_STATEID_SIZE); 3502 return status;
3353 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); 3503}
3354 return 0; 3504
3505static int decode_verifier(struct xdr_stream *xdr, void *verifier)
3506{
3507 return decode_opaque_fixed(xdr, verifier, 8);
3355} 3508}
3356 3509
3357static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) 3510static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res)
3358{ 3511{
3359 __be32 *p;
3360 int status; 3512 int status;
3361 3513
3362 status = decode_op_hdr(xdr, OP_COMMIT); 3514 status = decode_op_hdr(xdr, OP_COMMIT);
3363 if (status) 3515 if (!status)
3364 return status; 3516 status = decode_verifier(xdr, res->verf->verifier);
3365 READ_BUF(8); 3517 return status;
3366 COPYMEM(res->verf->verifier, 8);
3367 return 0;
3368} 3518}
3369 3519
3370static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) 3520static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -3378,10 +3528,16 @@ static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
3378 return status; 3528 return status;
3379 if ((status = decode_change_info(xdr, cinfo))) 3529 if ((status = decode_change_info(xdr, cinfo)))
3380 return status; 3530 return status;
3381 READ_BUF(4); 3531 p = xdr_inline_decode(xdr, 4);
3382 READ32(bmlen); 3532 if (unlikely(!p))
3383 READ_BUF(bmlen << 2); 3533 goto out_overflow;
3384 return 0; 3534 bmlen = be32_to_cpup(p);
3535 p = xdr_inline_decode(xdr, bmlen << 2);
3536 if (likely(p))
3537 return 0;
3538out_overflow:
3539 print_overflow_msg(__func__, xdr);
3540 return -EIO;
3385} 3541}
3386 3542
3387static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res) 3543static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res)
@@ -3466,7 +3622,8 @@ xdr_error:
3466 return status; 3622 return status;
3467} 3623}
3468 3624
3469static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, const struct nfs_server *server) 3625static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
3626 const struct nfs_server *server, int may_sleep)
3470{ 3627{
3471 __be32 *savep; 3628 __be32 *savep;
3472 uint32_t attrlen, 3629 uint32_t attrlen,
@@ -3538,12 +3695,14 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
3538 goto xdr_error; 3695 goto xdr_error;
3539 fattr->valid |= status; 3696 fattr->valid |= status;
3540 3697
3541 status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid); 3698 status = decode_attr_owner(xdr, bitmap, server->nfs_client,
3699 &fattr->uid, may_sleep);
3542 if (status < 0) 3700 if (status < 0)
3543 goto xdr_error; 3701 goto xdr_error;
3544 fattr->valid |= status; 3702 fattr->valid |= status;
3545 3703
3546 status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid); 3704 status = decode_attr_group(xdr, bitmap, server->nfs_client,
3705 &fattr->gid, may_sleep);
3547 if (status < 0) 3706 if (status < 0)
3548 goto xdr_error; 3707 goto xdr_error;
3549 fattr->valid |= status; 3708 fattr->valid |= status;
@@ -3633,14 +3792,21 @@ static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh)
3633 if (status) 3792 if (status)
3634 return status; 3793 return status;
3635 3794
3636 READ_BUF(4); 3795 p = xdr_inline_decode(xdr, 4);
3637 READ32(len); 3796 if (unlikely(!p))
3797 goto out_overflow;
3798 len = be32_to_cpup(p);
3638 if (len > NFS4_FHSIZE) 3799 if (len > NFS4_FHSIZE)
3639 return -EIO; 3800 return -EIO;
3640 fh->size = len; 3801 fh->size = len;
3641 READ_BUF(len); 3802 p = xdr_inline_decode(xdr, len);
3642 COPYMEM(fh->data, len); 3803 if (unlikely(!p))
3804 goto out_overflow;
3805 memcpy(fh->data, p, len);
3643 return 0; 3806 return 0;
3807out_overflow:
3808 print_overflow_msg(__func__, xdr);
3809 return -EIO;
3644} 3810}
3645 3811
3646static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) 3812static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -3662,10 +3828,12 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
3662 __be32 *p; 3828 __be32 *p;
3663 uint32_t namelen, type; 3829 uint32_t namelen, type;
3664 3830
3665 READ_BUF(32); 3831 p = xdr_inline_decode(xdr, 32);
3666 READ64(offset); 3832 if (unlikely(!p))
3667 READ64(length); 3833 goto out_overflow;
3668 READ32(type); 3834 p = xdr_decode_hyper(p, &offset);
3835 p = xdr_decode_hyper(p, &length);
3836 type = be32_to_cpup(p++);
3669 if (fl != NULL) { 3837 if (fl != NULL) {
3670 fl->fl_start = (loff_t)offset; 3838 fl->fl_start = (loff_t)offset;
3671 fl->fl_end = fl->fl_start + (loff_t)length - 1; 3839 fl->fl_end = fl->fl_start + (loff_t)length - 1;
@@ -3676,23 +3844,27 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
3676 fl->fl_type = F_RDLCK; 3844 fl->fl_type = F_RDLCK;
3677 fl->fl_pid = 0; 3845 fl->fl_pid = 0;
3678 } 3846 }
3679 READ64(clientid); 3847 p = xdr_decode_hyper(p, &clientid);
3680 READ32(namelen); 3848 namelen = be32_to_cpup(p);
3681 READ_BUF(namelen); 3849 p = xdr_inline_decode(xdr, namelen);
3682 return -NFS4ERR_DENIED; 3850 if (likely(p))
3851 return -NFS4ERR_DENIED;
3852out_overflow:
3853 print_overflow_msg(__func__, xdr);
3854 return -EIO;
3683} 3855}
3684 3856
3685static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res) 3857static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res)
3686{ 3858{
3687 __be32 *p;
3688 int status; 3859 int status;
3689 3860
3690 status = decode_op_hdr(xdr, OP_LOCK); 3861 status = decode_op_hdr(xdr, OP_LOCK);
3691 if (status == -EIO) 3862 if (status == -EIO)
3692 goto out; 3863 goto out;
3693 if (status == 0) { 3864 if (status == 0) {
3694 READ_BUF(NFS4_STATEID_SIZE); 3865 status = decode_stateid(xdr, &res->stateid);
3695 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); 3866 if (unlikely(status))
3867 goto out;
3696 } else if (status == -NFS4ERR_DENIED) 3868 } else if (status == -NFS4ERR_DENIED)
3697 status = decode_lock_denied(xdr, NULL); 3869 status = decode_lock_denied(xdr, NULL);
3698 if (res->open_seqid != NULL) 3870 if (res->open_seqid != NULL)
@@ -3713,16 +3885,13 @@ static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockt_res *res)
3713 3885
3714static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res) 3886static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res)
3715{ 3887{
3716 __be32 *p;
3717 int status; 3888 int status;
3718 3889
3719 status = decode_op_hdr(xdr, OP_LOCKU); 3890 status = decode_op_hdr(xdr, OP_LOCKU);
3720 if (status != -EIO) 3891 if (status != -EIO)
3721 nfs_increment_lock_seqid(status, res->seqid); 3892 nfs_increment_lock_seqid(status, res->seqid);
3722 if (status == 0) { 3893 if (status == 0)
3723 READ_BUF(NFS4_STATEID_SIZE); 3894 status = decode_stateid(xdr, &res->stateid);
3724 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3725 }
3726 return status; 3895 return status;
3727} 3896}
3728 3897
@@ -3737,34 +3906,46 @@ static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize)
3737 __be32 *p; 3906 __be32 *p;
3738 uint32_t limit_type, nblocks, blocksize; 3907 uint32_t limit_type, nblocks, blocksize;
3739 3908
3740 READ_BUF(12); 3909 p = xdr_inline_decode(xdr, 12);
3741 READ32(limit_type); 3910 if (unlikely(!p))
3911 goto out_overflow;
3912 limit_type = be32_to_cpup(p++);
3742 switch (limit_type) { 3913 switch (limit_type) {
3743 case 1: 3914 case 1:
3744 READ64(*maxsize); 3915 xdr_decode_hyper(p, maxsize);
3745 break; 3916 break;
3746 case 2: 3917 case 2:
3747 READ32(nblocks); 3918 nblocks = be32_to_cpup(p++);
3748 READ32(blocksize); 3919 blocksize = be32_to_cpup(p);
3749 *maxsize = (uint64_t)nblocks * (uint64_t)blocksize; 3920 *maxsize = (uint64_t)nblocks * (uint64_t)blocksize;
3750 } 3921 }
3751 return 0; 3922 return 0;
3923out_overflow:
3924 print_overflow_msg(__func__, xdr);
3925 return -EIO;
3752} 3926}
3753 3927
3754static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) 3928static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
3755{ 3929{
3756 __be32 *p; 3930 __be32 *p;
3757 uint32_t delegation_type; 3931 uint32_t delegation_type;
3932 int status;
3758 3933
3759 READ_BUF(4); 3934 p = xdr_inline_decode(xdr, 4);
3760 READ32(delegation_type); 3935 if (unlikely(!p))
3936 goto out_overflow;
3937 delegation_type = be32_to_cpup(p);
3761 if (delegation_type == NFS4_OPEN_DELEGATE_NONE) { 3938 if (delegation_type == NFS4_OPEN_DELEGATE_NONE) {
3762 res->delegation_type = 0; 3939 res->delegation_type = 0;
3763 return 0; 3940 return 0;
3764 } 3941 }
3765 READ_BUF(NFS4_STATEID_SIZE+4); 3942 status = decode_stateid(xdr, &res->delegation);
3766 COPYMEM(res->delegation.data, NFS4_STATEID_SIZE); 3943 if (unlikely(status))
3767 READ32(res->do_recall); 3944 return status;
3945 p = xdr_inline_decode(xdr, 4);
3946 if (unlikely(!p))
3947 goto out_overflow;
3948 res->do_recall = be32_to_cpup(p);
3768 3949
3769 switch (delegation_type) { 3950 switch (delegation_type) {
3770 case NFS4_OPEN_DELEGATE_READ: 3951 case NFS4_OPEN_DELEGATE_READ:
@@ -3776,6 +3957,9 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
3776 return -EIO; 3957 return -EIO;
3777 } 3958 }
3778 return decode_ace(xdr, NULL, res->server->nfs_client); 3959 return decode_ace(xdr, NULL, res->server->nfs_client);
3960out_overflow:
3961 print_overflow_msg(__func__, xdr);
3962 return -EIO;
3779} 3963}
3780 3964
3781static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) 3965static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
@@ -3787,23 +3971,27 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
3787 status = decode_op_hdr(xdr, OP_OPEN); 3971 status = decode_op_hdr(xdr, OP_OPEN);
3788 if (status != -EIO) 3972 if (status != -EIO)
3789 nfs_increment_open_seqid(status, res->seqid); 3973 nfs_increment_open_seqid(status, res->seqid);
3790 if (status) 3974 if (!status)
3975 status = decode_stateid(xdr, &res->stateid);
3976 if (unlikely(status))
3791 return status; 3977 return status;
3792 READ_BUF(NFS4_STATEID_SIZE);
3793 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3794 3978
3795 decode_change_info(xdr, &res->cinfo); 3979 decode_change_info(xdr, &res->cinfo);
3796 3980
3797 READ_BUF(8); 3981 p = xdr_inline_decode(xdr, 8);
3798 READ32(res->rflags); 3982 if (unlikely(!p))
3799 READ32(bmlen); 3983 goto out_overflow;
3984 res->rflags = be32_to_cpup(p++);
3985 bmlen = be32_to_cpup(p);
3800 if (bmlen > 10) 3986 if (bmlen > 10)
3801 goto xdr_error; 3987 goto xdr_error;
3802 3988
3803 READ_BUF(bmlen << 2); 3989 p = xdr_inline_decode(xdr, bmlen << 2);
3990 if (unlikely(!p))
3991 goto out_overflow;
3804 savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE); 3992 savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE);
3805 for (i = 0; i < savewords; ++i) 3993 for (i = 0; i < savewords; ++i)
3806 READ32(res->attrset[i]); 3994 res->attrset[i] = be32_to_cpup(p++);
3807 for (; i < NFS4_BITMAP_SIZE; i++) 3995 for (; i < NFS4_BITMAP_SIZE; i++)
3808 res->attrset[i] = 0; 3996 res->attrset[i] = 0;
3809 3997
@@ -3811,36 +3999,33 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
3811xdr_error: 3999xdr_error:
3812 dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen); 4000 dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen);
3813 return -EIO; 4001 return -EIO;
4002out_overflow:
4003 print_overflow_msg(__func__, xdr);
4004 return -EIO;
3814} 4005}
3815 4006
3816static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res) 4007static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res)
3817{ 4008{
3818 __be32 *p;
3819 int status; 4009 int status;
3820 4010
3821 status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); 4011 status = decode_op_hdr(xdr, OP_OPEN_CONFIRM);
3822 if (status != -EIO) 4012 if (status != -EIO)
3823 nfs_increment_open_seqid(status, res->seqid); 4013 nfs_increment_open_seqid(status, res->seqid);
3824 if (status) 4014 if (!status)
3825 return status; 4015 status = decode_stateid(xdr, &res->stateid);
3826 READ_BUF(NFS4_STATEID_SIZE); 4016 return status;
3827 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3828 return 0;
3829} 4017}
3830 4018
3831static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res) 4019static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res)
3832{ 4020{
3833 __be32 *p;
3834 int status; 4021 int status;
3835 4022
3836 status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE); 4023 status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE);
3837 if (status != -EIO) 4024 if (status != -EIO)
3838 nfs_increment_open_seqid(status, res->seqid); 4025 nfs_increment_open_seqid(status, res->seqid);
3839 if (status) 4026 if (!status)
3840 return status; 4027 status = decode_stateid(xdr, &res->stateid);
3841 READ_BUF(NFS4_STATEID_SIZE); 4028 return status;
3842 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3843 return 0;
3844} 4029}
3845 4030
3846static int decode_putfh(struct xdr_stream *xdr) 4031static int decode_putfh(struct xdr_stream *xdr)
@@ -3863,9 +4048,11 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_
3863 status = decode_op_hdr(xdr, OP_READ); 4048 status = decode_op_hdr(xdr, OP_READ);
3864 if (status) 4049 if (status)
3865 return status; 4050 return status;
3866 READ_BUF(8); 4051 p = xdr_inline_decode(xdr, 8);
3867 READ32(eof); 4052 if (unlikely(!p))
3868 READ32(count); 4053 goto out_overflow;
4054 eof = be32_to_cpup(p++);
4055 count = be32_to_cpup(p);
3869 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 4056 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
3870 recvd = req->rq_rcv_buf.len - hdrlen; 4057 recvd = req->rq_rcv_buf.len - hdrlen;
3871 if (count > recvd) { 4058 if (count > recvd) {
@@ -3878,6 +4065,9 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_
3878 res->eof = eof; 4065 res->eof = eof;
3879 res->count = count; 4066 res->count = count;
3880 return 0; 4067 return 0;
4068out_overflow:
4069 print_overflow_msg(__func__, xdr);
4070 return -EIO;
3881} 4071}
3882 4072
3883static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) 4073static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir)
@@ -3892,17 +4082,17 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
3892 int status; 4082 int status;
3893 4083
3894 status = decode_op_hdr(xdr, OP_READDIR); 4084 status = decode_op_hdr(xdr, OP_READDIR);
3895 if (status) 4085 if (!status)
4086 status = decode_verifier(xdr, readdir->verifier.data);
4087 if (unlikely(status))
3896 return status; 4088 return status;
3897 READ_BUF(8);
3898 COPYMEM(readdir->verifier.data, 8);
3899 dprintk("%s: verifier = %08x:%08x\n", 4089 dprintk("%s: verifier = %08x:%08x\n",
3900 __func__, 4090 __func__,
3901 ((u32 *)readdir->verifier.data)[0], 4091 ((u32 *)readdir->verifier.data)[0],
3902 ((u32 *)readdir->verifier.data)[1]); 4092 ((u32 *)readdir->verifier.data)[1]);
3903 4093
3904 4094
3905 hdrlen = (char *) p - (char *) iov->iov_base; 4095 hdrlen = (char *) xdr->p - (char *) iov->iov_base;
3906 recvd = rcvbuf->len - hdrlen; 4096 recvd = rcvbuf->len - hdrlen;
3907 if (pglen > recvd) 4097 if (pglen > recvd)
3908 pglen = recvd; 4098 pglen = recvd;
@@ -3990,8 +4180,10 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
3990 return status; 4180 return status;
3991 4181
3992 /* Convert length of symlink */ 4182 /* Convert length of symlink */
3993 READ_BUF(4); 4183 p = xdr_inline_decode(xdr, 4);
3994 READ32(len); 4184 if (unlikely(!p))
4185 goto out_overflow;
4186 len = be32_to_cpup(p);
3995 if (len >= rcvbuf->page_len || len <= 0) { 4187 if (len >= rcvbuf->page_len || len <= 0) {
3996 dprintk("nfs: server returned giant symlink!\n"); 4188 dprintk("nfs: server returned giant symlink!\n");
3997 return -ENAMETOOLONG; 4189 return -ENAMETOOLONG;
@@ -4015,6 +4207,9 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
4015 kaddr[len+rcvbuf->page_base] = '\0'; 4207 kaddr[len+rcvbuf->page_base] = '\0';
4016 kunmap_atomic(kaddr, KM_USER0); 4208 kunmap_atomic(kaddr, KM_USER0);
4017 return 0; 4209 return 0;
4210out_overflow:
4211 print_overflow_msg(__func__, xdr);
4212 return -EIO;
4018} 4213}
4019 4214
4020static int decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) 4215static int decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -4112,10 +4307,16 @@ static int decode_setattr(struct xdr_stream *xdr)
4112 status = decode_op_hdr(xdr, OP_SETATTR); 4307 status = decode_op_hdr(xdr, OP_SETATTR);
4113 if (status) 4308 if (status)
4114 return status; 4309 return status;
4115 READ_BUF(4); 4310 p = xdr_inline_decode(xdr, 4);
4116 READ32(bmlen); 4311 if (unlikely(!p))
4117 READ_BUF(bmlen << 2); 4312 goto out_overflow;
4118 return 0; 4313 bmlen = be32_to_cpup(p);
4314 p = xdr_inline_decode(xdr, bmlen << 2);
4315 if (likely(p))
4316 return 0;
4317out_overflow:
4318 print_overflow_msg(__func__, xdr);
4319 return -EIO;
4119} 4320}
4120 4321
4121static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp) 4322static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
@@ -4124,35 +4325,50 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
4124 uint32_t opnum; 4325 uint32_t opnum;
4125 int32_t nfserr; 4326 int32_t nfserr;
4126 4327
4127 READ_BUF(8); 4328 p = xdr_inline_decode(xdr, 8);
4128 READ32(opnum); 4329 if (unlikely(!p))
4330 goto out_overflow;
4331 opnum = be32_to_cpup(p++);
4129 if (opnum != OP_SETCLIENTID) { 4332 if (opnum != OP_SETCLIENTID) {
4130 dprintk("nfs: decode_setclientid: Server returned operation" 4333 dprintk("nfs: decode_setclientid: Server returned operation"
4131 " %d\n", opnum); 4334 " %d\n", opnum);
4132 return -EIO; 4335 return -EIO;
4133 } 4336 }
4134 READ32(nfserr); 4337 nfserr = be32_to_cpup(p);
4135 if (nfserr == NFS_OK) { 4338 if (nfserr == NFS_OK) {
4136 READ_BUF(8 + NFS4_VERIFIER_SIZE); 4339 p = xdr_inline_decode(xdr, 8 + NFS4_VERIFIER_SIZE);
4137 READ64(clp->cl_clientid); 4340 if (unlikely(!p))
4138 COPYMEM(clp->cl_confirm.data, NFS4_VERIFIER_SIZE); 4341 goto out_overflow;
4342 p = xdr_decode_hyper(p, &clp->cl_clientid);
4343 memcpy(clp->cl_confirm.data, p, NFS4_VERIFIER_SIZE);
4139 } else if (nfserr == NFSERR_CLID_INUSE) { 4344 } else if (nfserr == NFSERR_CLID_INUSE) {
4140 uint32_t len; 4345 uint32_t len;
4141 4346
4142 /* skip netid string */ 4347 /* skip netid string */
4143 READ_BUF(4); 4348 p = xdr_inline_decode(xdr, 4);
4144 READ32(len); 4349 if (unlikely(!p))
4145 READ_BUF(len); 4350 goto out_overflow;
4351 len = be32_to_cpup(p);
4352 p = xdr_inline_decode(xdr, len);
4353 if (unlikely(!p))
4354 goto out_overflow;
4146 4355
4147 /* skip uaddr string */ 4356 /* skip uaddr string */
4148 READ_BUF(4); 4357 p = xdr_inline_decode(xdr, 4);
4149 READ32(len); 4358 if (unlikely(!p))
4150 READ_BUF(len); 4359 goto out_overflow;
4360 len = be32_to_cpup(p);
4361 p = xdr_inline_decode(xdr, len);
4362 if (unlikely(!p))
4363 goto out_overflow;
4151 return -NFSERR_CLID_INUSE; 4364 return -NFSERR_CLID_INUSE;
4152 } else 4365 } else
4153 return nfs4_stat_to_errno(nfserr); 4366 return nfs4_stat_to_errno(nfserr);
4154 4367
4155 return 0; 4368 return 0;
4369out_overflow:
4370 print_overflow_msg(__func__, xdr);
4371 return -EIO;
4156} 4372}
4157 4373
4158static int decode_setclientid_confirm(struct xdr_stream *xdr) 4374static int decode_setclientid_confirm(struct xdr_stream *xdr)
@@ -4169,11 +4385,16 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
4169 if (status) 4385 if (status)
4170 return status; 4386 return status;
4171 4387
4172 READ_BUF(16); 4388 p = xdr_inline_decode(xdr, 16);
4173 READ32(res->count); 4389 if (unlikely(!p))
4174 READ32(res->verf->committed); 4390 goto out_overflow;
4175 COPYMEM(res->verf->verifier, 8); 4391 res->count = be32_to_cpup(p++);
4392 res->verf->committed = be32_to_cpup(p++);
4393 memcpy(res->verf->verifier, p, 8);
4176 return 0; 4394 return 0;
4395out_overflow:
4396 print_overflow_msg(__func__, xdr);
4397 return -EIO;
4177} 4398}
4178 4399
4179static int decode_delegreturn(struct xdr_stream *xdr) 4400static int decode_delegreturn(struct xdr_stream *xdr)
@@ -4187,6 +4408,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
4187{ 4408{
4188 __be32 *p; 4409 __be32 *p;
4189 uint32_t dummy; 4410 uint32_t dummy;
4411 char *dummy_str;
4190 int status; 4412 int status;
4191 struct nfs_client *clp = res->client; 4413 struct nfs_client *clp = res->client;
4192 4414
@@ -4194,36 +4416,45 @@ static int decode_exchange_id(struct xdr_stream *xdr,
4194 if (status) 4416 if (status)
4195 return status; 4417 return status;
4196 4418
4197 READ_BUF(8); 4419 p = xdr_inline_decode(xdr, 8);
4198 READ64(clp->cl_ex_clid); 4420 if (unlikely(!p))
4199 READ_BUF(12); 4421 goto out_overflow;
4200 READ32(clp->cl_seqid); 4422 xdr_decode_hyper(p, &clp->cl_ex_clid);
4201 READ32(clp->cl_exchange_flags); 4423 p = xdr_inline_decode(xdr, 12);
4424 if (unlikely(!p))
4425 goto out_overflow;
4426 clp->cl_seqid = be32_to_cpup(p++);
4427 clp->cl_exchange_flags = be32_to_cpup(p++);
4202 4428
4203 /* We ask for SP4_NONE */ 4429 /* We ask for SP4_NONE */
4204 READ32(dummy); 4430 dummy = be32_to_cpup(p);
4205 if (dummy != SP4_NONE) 4431 if (dummy != SP4_NONE)
4206 return -EIO; 4432 return -EIO;
4207 4433
4208 /* Throw away minor_id */ 4434 /* Throw away minor_id */
4209 READ_BUF(8); 4435 p = xdr_inline_decode(xdr, 8);
4436 if (unlikely(!p))
4437 goto out_overflow;
4210 4438
4211 /* Throw away Major id */ 4439 /* Throw away Major id */
4212 READ_BUF(4); 4440 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
4213 READ32(dummy); 4441 if (unlikely(status))
4214 READ_BUF(dummy); 4442 return status;
4215 4443
4216 /* Throw away server_scope */ 4444 /* Throw away server_scope */
4217 READ_BUF(4); 4445 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
4218 READ32(dummy); 4446 if (unlikely(status))
4219 READ_BUF(dummy); 4447 return status;
4220 4448
4221 /* Throw away Implementation id array */ 4449 /* Throw away Implementation id array */
4222 READ_BUF(4); 4450 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
4223 READ32(dummy); 4451 if (unlikely(status))
4224 READ_BUF(dummy); 4452 return status;
4225 4453
4226 return 0; 4454 return 0;
4455out_overflow:
4456 print_overflow_msg(__func__, xdr);
4457 return -EIO;
4227} 4458}
4228 4459
4229static int decode_chan_attrs(struct xdr_stream *xdr, 4460static int decode_chan_attrs(struct xdr_stream *xdr,
@@ -4232,22 +4463,35 @@ static int decode_chan_attrs(struct xdr_stream *xdr,
4232 __be32 *p; 4463 __be32 *p;
4233 u32 nr_attrs; 4464 u32 nr_attrs;
4234 4465
4235 READ_BUF(28); 4466 p = xdr_inline_decode(xdr, 28);
4236 READ32(attrs->headerpadsz); 4467 if (unlikely(!p))
4237 READ32(attrs->max_rqst_sz); 4468 goto out_overflow;
4238 READ32(attrs->max_resp_sz); 4469 attrs->headerpadsz = be32_to_cpup(p++);
4239 READ32(attrs->max_resp_sz_cached); 4470 attrs->max_rqst_sz = be32_to_cpup(p++);
4240 READ32(attrs->max_ops); 4471 attrs->max_resp_sz = be32_to_cpup(p++);
4241 READ32(attrs->max_reqs); 4472 attrs->max_resp_sz_cached = be32_to_cpup(p++);
4242 READ32(nr_attrs); 4473 attrs->max_ops = be32_to_cpup(p++);
4474 attrs->max_reqs = be32_to_cpup(p++);
4475 nr_attrs = be32_to_cpup(p);
4243 if (unlikely(nr_attrs > 1)) { 4476 if (unlikely(nr_attrs > 1)) {
4244 printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n", 4477 printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n",
4245 __func__, nr_attrs); 4478 __func__, nr_attrs);
4246 return -EINVAL; 4479 return -EINVAL;
4247 } 4480 }
4248 if (nr_attrs == 1) 4481 if (nr_attrs == 1) {
4249 READ_BUF(4); /* skip rdma_attrs */ 4482 p = xdr_inline_decode(xdr, 4); /* skip rdma_attrs */
4483 if (unlikely(!p))
4484 goto out_overflow;
4485 }
4250 return 0; 4486 return 0;
4487out_overflow:
4488 print_overflow_msg(__func__, xdr);
4489 return -EIO;
4490}
4491
4492static int decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid)
4493{
4494 return decode_opaque_fixed(xdr, sid->data, NFS4_MAX_SESSIONID_LEN);
4251} 4495}
4252 4496
4253static int decode_create_session(struct xdr_stream *xdr, 4497static int decode_create_session(struct xdr_stream *xdr,
@@ -4259,24 +4503,26 @@ static int decode_create_session(struct xdr_stream *xdr,
4259 struct nfs4_session *session = clp->cl_session; 4503 struct nfs4_session *session = clp->cl_session;
4260 4504
4261 status = decode_op_hdr(xdr, OP_CREATE_SESSION); 4505 status = decode_op_hdr(xdr, OP_CREATE_SESSION);
4262 4506 if (!status)
4263 if (status) 4507 status = decode_sessionid(xdr, &session->sess_id);
4508 if (unlikely(status))
4264 return status; 4509 return status;
4265 4510
4266 /* sessionid */
4267 READ_BUF(NFS4_MAX_SESSIONID_LEN);
4268 COPYMEM(&session->sess_id, NFS4_MAX_SESSIONID_LEN);
4269
4270 /* seqid, flags */ 4511 /* seqid, flags */
4271 READ_BUF(8); 4512 p = xdr_inline_decode(xdr, 8);
4272 READ32(clp->cl_seqid); 4513 if (unlikely(!p))
4273 READ32(session->flags); 4514 goto out_overflow;
4515 clp->cl_seqid = be32_to_cpup(p++);
4516 session->flags = be32_to_cpup(p);
4274 4517
4275 /* Channel attributes */ 4518 /* Channel attributes */
4276 status = decode_chan_attrs(xdr, &session->fc_attrs); 4519 status = decode_chan_attrs(xdr, &session->fc_attrs);
4277 if (!status) 4520 if (!status)
4278 status = decode_chan_attrs(xdr, &session->bc_attrs); 4521 status = decode_chan_attrs(xdr, &session->bc_attrs);
4279 return status; 4522 return status;
4523out_overflow:
4524 print_overflow_msg(__func__, xdr);
4525 return -EIO;
4280} 4526}
4281 4527
4282static int decode_destroy_session(struct xdr_stream *xdr, void *dummy) 4528static int decode_destroy_session(struct xdr_stream *xdr, void *dummy)
@@ -4300,7 +4546,9 @@ static int decode_sequence(struct xdr_stream *xdr,
4300 return 0; 4546 return 0;
4301 4547
4302 status = decode_op_hdr(xdr, OP_SEQUENCE); 4548 status = decode_op_hdr(xdr, OP_SEQUENCE);
4303 if (status) 4549 if (!status)
4550 status = decode_sessionid(xdr, &id);
4551 if (unlikely(status))
4304 goto out_err; 4552 goto out_err;
4305 4553
4306 /* 4554 /*
@@ -4309,36 +4557,43 @@ static int decode_sequence(struct xdr_stream *xdr,
4309 */ 4557 */
4310 status = -ESERVERFAULT; 4558 status = -ESERVERFAULT;
4311 4559
4312 slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid];
4313 READ_BUF(NFS4_MAX_SESSIONID_LEN + 20);
4314 COPYMEM(id.data, NFS4_MAX_SESSIONID_LEN);
4315 if (memcmp(id.data, res->sr_session->sess_id.data, 4560 if (memcmp(id.data, res->sr_session->sess_id.data,
4316 NFS4_MAX_SESSIONID_LEN)) { 4561 NFS4_MAX_SESSIONID_LEN)) {
4317 dprintk("%s Invalid session id\n", __func__); 4562 dprintk("%s Invalid session id\n", __func__);
4318 goto out_err; 4563 goto out_err;
4319 } 4564 }
4565
4566 p = xdr_inline_decode(xdr, 20);
4567 if (unlikely(!p))
4568 goto out_overflow;
4569
4320 /* seqid */ 4570 /* seqid */
4321 READ32(dummy); 4571 slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid];
4572 dummy = be32_to_cpup(p++);
4322 if (dummy != slot->seq_nr) { 4573 if (dummy != slot->seq_nr) {
4323 dprintk("%s Invalid sequence number\n", __func__); 4574 dprintk("%s Invalid sequence number\n", __func__);
4324 goto out_err; 4575 goto out_err;
4325 } 4576 }
4326 /* slot id */ 4577 /* slot id */
4327 READ32(dummy); 4578 dummy = be32_to_cpup(p++);
4328 if (dummy != res->sr_slotid) { 4579 if (dummy != res->sr_slotid) {
4329 dprintk("%s Invalid slot id\n", __func__); 4580 dprintk("%s Invalid slot id\n", __func__);
4330 goto out_err; 4581 goto out_err;
4331 } 4582 }
4332 /* highest slot id - currently not processed */ 4583 /* highest slot id - currently not processed */
4333 READ32(dummy); 4584 dummy = be32_to_cpup(p++);
4334 /* target highest slot id - currently not processed */ 4585 /* target highest slot id - currently not processed */
4335 READ32(dummy); 4586 dummy = be32_to_cpup(p++);
4336 /* result flags - currently not processed */ 4587 /* result flags - currently not processed */
4337 READ32(dummy); 4588 dummy = be32_to_cpup(p);
4338 status = 0; 4589 status = 0;
4339out_err: 4590out_err:
4340 res->sr_status = status; 4591 res->sr_status = status;
4341 return status; 4592 return status;
4593out_overflow:
4594 print_overflow_msg(__func__, xdr);
4595 status = -EIO;
4596 goto out_err;
4342#else /* CONFIG_NFS_V4_1 */ 4597#else /* CONFIG_NFS_V4_1 */
4343 return 0; 4598 return 0;
4344#endif /* CONFIG_NFS_V4_1 */ 4599#endif /* CONFIG_NFS_V4_1 */
@@ -4370,7 +4625,8 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct
4370 status = decode_open_downgrade(&xdr, res); 4625 status = decode_open_downgrade(&xdr, res);
4371 if (status != 0) 4626 if (status != 0)
4372 goto out; 4627 goto out;
4373 decode_getfattr(&xdr, res->fattr, res->server); 4628 decode_getfattr(&xdr, res->fattr, res->server,
4629 !RPC_IS_ASYNC(rqstp->rq_task));
4374out: 4630out:
4375 return status; 4631 return status;
4376} 4632}
@@ -4397,7 +4653,8 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac
4397 status = decode_access(&xdr, res); 4653 status = decode_access(&xdr, res);
4398 if (status != 0) 4654 if (status != 0)
4399 goto out; 4655 goto out;
4400 decode_getfattr(&xdr, res->fattr, res->server); 4656 decode_getfattr(&xdr, res->fattr, res->server,
4657 !RPC_IS_ASYNC(rqstp->rq_task));
4401out: 4658out:
4402 return status; 4659 return status;
4403} 4660}
@@ -4424,7 +4681,8 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lo
4424 goto out; 4681 goto out;
4425 if ((status = decode_getfh(&xdr, res->fh)) != 0) 4682 if ((status = decode_getfh(&xdr, res->fh)) != 0)
4426 goto out; 4683 goto out;
4427 status = decode_getfattr(&xdr, res->fattr, res->server); 4684 status = decode_getfattr(&xdr, res->fattr, res->server
4685 ,!RPC_IS_ASYNC(rqstp->rq_task));
4428out: 4686out:
4429 return status; 4687 return status;
4430} 4688}
@@ -4448,7 +4706,8 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nf
4448 if ((status = decode_putrootfh(&xdr)) != 0) 4706 if ((status = decode_putrootfh(&xdr)) != 0)
4449 goto out; 4707 goto out;
4450 if ((status = decode_getfh(&xdr, res->fh)) == 0) 4708 if ((status = decode_getfh(&xdr, res->fh)) == 0)
4451 status = decode_getfattr(&xdr, res->fattr, res->server); 4709 status = decode_getfattr(&xdr, res->fattr, res->server,
4710 !RPC_IS_ASYNC(rqstp->rq_task));
4452out: 4711out:
4453 return status; 4712 return status;
4454} 4713}
@@ -4473,7 +4732,8 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_rem
4473 goto out; 4732 goto out;
4474 if ((status = decode_remove(&xdr, &res->cinfo)) != 0) 4733 if ((status = decode_remove(&xdr, &res->cinfo)) != 0)
4475 goto out; 4734 goto out;
4476 decode_getfattr(&xdr, &res->dir_attr, res->server); 4735 decode_getfattr(&xdr, &res->dir_attr, res->server,
4736 !RPC_IS_ASYNC(rqstp->rq_task));
4477out: 4737out:
4478 return status; 4738 return status;
4479} 4739}
@@ -4503,11 +4763,13 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_re
4503 if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0) 4763 if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0)
4504 goto out; 4764 goto out;
4505 /* Current FH is target directory */ 4765 /* Current FH is target directory */
4506 if (decode_getfattr(&xdr, res->new_fattr, res->server) != 0) 4766 if (decode_getfattr(&xdr, res->new_fattr, res->server,
4767 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
4507 goto out; 4768 goto out;
4508 if ((status = decode_restorefh(&xdr)) != 0) 4769 if ((status = decode_restorefh(&xdr)) != 0)
4509 goto out; 4770 goto out;
4510 decode_getfattr(&xdr, res->old_fattr, res->server); 4771 decode_getfattr(&xdr, res->old_fattr, res->server,
4772 !RPC_IS_ASYNC(rqstp->rq_task));
4511out: 4773out:
4512 return status; 4774 return status;
4513} 4775}
@@ -4540,11 +4802,13 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link
4540 * Note order: OP_LINK leaves the directory as the current 4802 * Note order: OP_LINK leaves the directory as the current
4541 * filehandle. 4803 * filehandle.
4542 */ 4804 */
4543 if (decode_getfattr(&xdr, res->dir_attr, res->server) != 0) 4805 if (decode_getfattr(&xdr, res->dir_attr, res->server,
4806 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
4544 goto out; 4807 goto out;
4545 if ((status = decode_restorefh(&xdr)) != 0) 4808 if ((status = decode_restorefh(&xdr)) != 0)
4546 goto out; 4809 goto out;
4547 decode_getfattr(&xdr, res->fattr, res->server); 4810 decode_getfattr(&xdr, res->fattr, res->server,
4811 !RPC_IS_ASYNC(rqstp->rq_task));
4548out: 4812out:
4549 return status; 4813 return status;
4550} 4814}
@@ -4573,11 +4837,13 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_cr
4573 goto out; 4837 goto out;
4574 if ((status = decode_getfh(&xdr, res->fh)) != 0) 4838 if ((status = decode_getfh(&xdr, res->fh)) != 0)
4575 goto out; 4839 goto out;
4576 if (decode_getfattr(&xdr, res->fattr, res->server) != 0) 4840 if (decode_getfattr(&xdr, res->fattr, res->server,
4841 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
4577 goto out; 4842 goto out;
4578 if ((status = decode_restorefh(&xdr)) != 0) 4843 if ((status = decode_restorefh(&xdr)) != 0)
4579 goto out; 4844 goto out;
4580 decode_getfattr(&xdr, res->dir_fattr, res->server); 4845 decode_getfattr(&xdr, res->dir_fattr, res->server,
4846 !RPC_IS_ASYNC(rqstp->rq_task));
4581out: 4847out:
4582 return status; 4848 return status;
4583} 4849}
@@ -4609,7 +4875,8 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_g
4609 status = decode_putfh(&xdr); 4875 status = decode_putfh(&xdr);
4610 if (status) 4876 if (status)
4611 goto out; 4877 goto out;
4612 status = decode_getfattr(&xdr, res->fattr, res->server); 4878 status = decode_getfattr(&xdr, res->fattr, res->server,
4879 !RPC_IS_ASYNC(rqstp->rq_task));
4613out: 4880out:
4614 return status; 4881 return status;
4615} 4882}
@@ -4716,7 +4983,8 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos
4716 * an ESTALE error. Shouldn't be a problem, 4983 * an ESTALE error. Shouldn't be a problem,
4717 * though, since fattr->valid will remain unset. 4984 * though, since fattr->valid will remain unset.
4718 */ 4985 */
4719 decode_getfattr(&xdr, res->fattr, res->server); 4986 decode_getfattr(&xdr, res->fattr, res->server,
4987 !RPC_IS_ASYNC(rqstp->rq_task));
4720out: 4988out:
4721 return status; 4989 return status;
4722} 4990}
@@ -4748,11 +5016,13 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openr
4748 goto out; 5016 goto out;
4749 if (decode_getfh(&xdr, &res->fh) != 0) 5017 if (decode_getfh(&xdr, &res->fh) != 0)
4750 goto out; 5018 goto out;
4751 if (decode_getfattr(&xdr, res->f_attr, res->server) != 0) 5019 if (decode_getfattr(&xdr, res->f_attr, res->server,
5020 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
4752 goto out; 5021 goto out;
4753 if (decode_restorefh(&xdr) != 0) 5022 if (decode_restorefh(&xdr) != 0)
4754 goto out; 5023 goto out;
4755 decode_getfattr(&xdr, res->dir_attr, res->server); 5024 decode_getfattr(&xdr, res->dir_attr, res->server,
5025 !RPC_IS_ASYNC(rqstp->rq_task));
4756out: 5026out:
4757 return status; 5027 return status;
4758} 5028}
@@ -4800,7 +5070,8 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nf
4800 status = decode_open(&xdr, res); 5070 status = decode_open(&xdr, res);
4801 if (status) 5071 if (status)
4802 goto out; 5072 goto out;
4803 decode_getfattr(&xdr, res->f_attr, res->server); 5073 decode_getfattr(&xdr, res->f_attr, res->server,
5074 !RPC_IS_ASYNC(rqstp->rq_task));
4804out: 5075out:
4805 return status; 5076 return status;
4806} 5077}
@@ -4827,7 +5098,8 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_se
4827 status = decode_setattr(&xdr); 5098 status = decode_setattr(&xdr);
4828 if (status) 5099 if (status)
4829 goto out; 5100 goto out;
4830 decode_getfattr(&xdr, res->fattr, res->server); 5101 decode_getfattr(&xdr, res->fattr, res->server,
5102 !RPC_IS_ASYNC(rqstp->rq_task));
4831out: 5103out:
4832 return status; 5104 return status;
4833} 5105}
@@ -5001,7 +5273,8 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writ
5001 status = decode_write(&xdr, res); 5273 status = decode_write(&xdr, res);
5002 if (status) 5274 if (status)
5003 goto out; 5275 goto out;
5004 decode_getfattr(&xdr, res->fattr, res->server); 5276 decode_getfattr(&xdr, res->fattr, res->server,
5277 !RPC_IS_ASYNC(rqstp->rq_task));
5005 if (!status) 5278 if (!status)
5006 status = res->count; 5279 status = res->count;
5007out: 5280out:
@@ -5030,7 +5303,8 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, __be32 *p, struct nfs_wri
5030 status = decode_commit(&xdr, res); 5303 status = decode_commit(&xdr, res);
5031 if (status) 5304 if (status)
5032 goto out; 5305 goto out;
5033 decode_getfattr(&xdr, res->fattr, res->server); 5306 decode_getfattr(&xdr, res->fattr, res->server,
5307 !RPC_IS_ASYNC(rqstp->rq_task));
5034out: 5308out:
5035 return status; 5309 return status;
5036} 5310}
@@ -5194,7 +5468,8 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nf
5194 if (status != 0) 5468 if (status != 0)
5195 goto out; 5469 goto out;
5196 status = decode_delegreturn(&xdr); 5470 status = decode_delegreturn(&xdr);
5197 decode_getfattr(&xdr, res->fattr, res->server); 5471 decode_getfattr(&xdr, res->fattr, res->server,
5472 !RPC_IS_ASYNC(rqstp->rq_task));
5198out: 5473out:
5199 return status; 5474 return status;
5200} 5475}
@@ -5222,7 +5497,8 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p,
5222 goto out; 5497 goto out;
5223 xdr_enter_page(&xdr, PAGE_SIZE); 5498 xdr_enter_page(&xdr, PAGE_SIZE);
5224 status = decode_getfattr(&xdr, &res->fs_locations->fattr, 5499 status = decode_getfattr(&xdr, &res->fs_locations->fattr,
5225 res->fs_locations->server); 5500 res->fs_locations->server,
5501 !RPC_IS_ASYNC(req->rq_task));
5226out: 5502out:
5227 return status; 5503 return status;
5228} 5504}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 96c4ebfa46f4..12c9e66d3f1d 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -18,7 +18,6 @@
18#include <linux/sunrpc/clnt.h> 18#include <linux/sunrpc/clnt.h>
19#include <linux/nfs_fs.h> 19#include <linux/nfs_fs.h>
20#include <linux/nfs_page.h> 20#include <linux/nfs_page.h>
21#include <linux/smp_lock.h>
22 21
23#include <asm/system.h> 22#include <asm/system.h>
24 23
@@ -61,17 +60,15 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
61 return p; 60 return p;
62} 61}
63 62
64static void nfs_readdata_free(struct nfs_read_data *p) 63void nfs_readdata_free(struct nfs_read_data *p)
65{ 64{
66 if (p && (p->pagevec != &p->page_array[0])) 65 if (p && (p->pagevec != &p->page_array[0]))
67 kfree(p->pagevec); 66 kfree(p->pagevec);
68 mempool_free(p, nfs_rdata_mempool); 67 mempool_free(p, nfs_rdata_mempool);
69} 68}
70 69
71void nfs_readdata_release(void *data) 70static void nfs_readdata_release(struct nfs_read_data *rdata)
72{ 71{
73 struct nfs_read_data *rdata = data;
74
75 put_nfs_open_context(rdata->args.context); 72 put_nfs_open_context(rdata->args.context);
76 nfs_readdata_free(rdata); 73 nfs_readdata_free(rdata);
77} 74}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 0b4cbdc60abd..9c85cdb353aa 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -158,7 +158,7 @@ static const match_table_t nfs_mount_option_tokens = {
158 { Opt_mountvers, "mountvers=%s" }, 158 { Opt_mountvers, "mountvers=%s" },
159 { Opt_nfsvers, "nfsvers=%s" }, 159 { Opt_nfsvers, "nfsvers=%s" },
160 { Opt_nfsvers, "vers=%s" }, 160 { Opt_nfsvers, "vers=%s" },
161 { Opt_minorversion, "minorversion=%u" }, 161 { Opt_minorversion, "minorversion=%s" },
162 162
163 { Opt_sec, "sec=%s" }, 163 { Opt_sec, "sec=%s" },
164 { Opt_proto, "proto=%s" }, 164 { Opt_proto, "proto=%s" },
@@ -742,129 +742,10 @@ static int nfs_verify_server_address(struct sockaddr *addr)
742 } 742 }
743 } 743 }
744 744
745 dfprintk(MOUNT, "NFS: Invalid IP address specified\n");
745 return 0; 746 return 0;
746} 747}
747 748
748static void nfs_parse_ipv4_address(char *string, size_t str_len,
749 struct sockaddr *sap, size_t *addr_len)
750{
751 struct sockaddr_in *sin = (struct sockaddr_in *)sap;
752 u8 *addr = (u8 *)&sin->sin_addr.s_addr;
753
754 if (str_len <= INET_ADDRSTRLEN) {
755 dfprintk(MOUNT, "NFS: parsing IPv4 address %*s\n",
756 (int)str_len, string);
757
758 sin->sin_family = AF_INET;
759 *addr_len = sizeof(*sin);
760 if (in4_pton(string, str_len, addr, '\0', NULL))
761 return;
762 }
763
764 sap->sa_family = AF_UNSPEC;
765 *addr_len = 0;
766}
767
768#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
769static int nfs_parse_ipv6_scope_id(const char *string, const size_t str_len,
770 const char *delim,
771 struct sockaddr_in6 *sin6)
772{
773 char *p;
774 size_t len;
775
776 if ((string + str_len) == delim)
777 return 1;
778
779 if (*delim != IPV6_SCOPE_DELIMITER)
780 return 0;
781
782 if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
783 return 0;
784
785 len = (string + str_len) - delim - 1;
786 p = kstrndup(delim + 1, len, GFP_KERNEL);
787 if (p) {
788 unsigned long scope_id = 0;
789 struct net_device *dev;
790
791 dev = dev_get_by_name(&init_net, p);
792 if (dev != NULL) {
793 scope_id = dev->ifindex;
794 dev_put(dev);
795 } else {
796 if (strict_strtoul(p, 10, &scope_id) == 0) {
797 kfree(p);
798 return 0;
799 }
800 }
801
802 kfree(p);
803
804 sin6->sin6_scope_id = scope_id;
805 dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id);
806 return 1;
807 }
808
809 return 0;
810}
811
812static void nfs_parse_ipv6_address(char *string, size_t str_len,
813 struct sockaddr *sap, size_t *addr_len)
814{
815 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
816 u8 *addr = (u8 *)&sin6->sin6_addr.in6_u;
817 const char *delim;
818
819 if (str_len <= INET6_ADDRSTRLEN) {
820 dfprintk(MOUNT, "NFS: parsing IPv6 address %*s\n",
821 (int)str_len, string);
822
823 sin6->sin6_family = AF_INET6;
824 *addr_len = sizeof(*sin6);
825 if (in6_pton(string, str_len, addr,
826 IPV6_SCOPE_DELIMITER, &delim) != 0) {
827 if (nfs_parse_ipv6_scope_id(string, str_len,
828 delim, sin6) != 0)
829 return;
830 }
831 }
832
833 sap->sa_family = AF_UNSPEC;
834 *addr_len = 0;
835}
836#else
837static void nfs_parse_ipv6_address(char *string, size_t str_len,
838 struct sockaddr *sap, size_t *addr_len)
839{
840 sap->sa_family = AF_UNSPEC;
841 *addr_len = 0;
842}
843#endif
844
845/*
846 * Construct a sockaddr based on the contents of a string that contains
847 * an IP address in presentation format.
848 *
849 * If there is a problem constructing the new sockaddr, set the address
850 * family to AF_UNSPEC.
851 */
852void nfs_parse_ip_address(char *string, size_t str_len,
853 struct sockaddr *sap, size_t *addr_len)
854{
855 unsigned int i, colons;
856
857 colons = 0;
858 for (i = 0; i < str_len; i++)
859 if (string[i] == ':')
860 colons++;
861
862 if (colons >= 2)
863 nfs_parse_ipv6_address(string, str_len, sap, addr_len);
864 else
865 nfs_parse_ipv4_address(string, str_len, sap, addr_len);
866}
867
868/* 749/*
869 * Sanity check the NFS transport protocol. 750 * Sanity check the NFS transport protocol.
870 * 751 *
@@ -904,8 +785,6 @@ static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt)
904 785
905/* 786/*
906 * Parse the value of the 'sec=' option. 787 * Parse the value of the 'sec=' option.
907 *
908 * The flavor_len setting is for v4 mounts.
909 */ 788 */
910static int nfs_parse_security_flavors(char *value, 789static int nfs_parse_security_flavors(char *value,
911 struct nfs_parsed_mount_data *mnt) 790 struct nfs_parsed_mount_data *mnt)
@@ -916,53 +795,43 @@ static int nfs_parse_security_flavors(char *value,
916 795
917 switch (match_token(value, nfs_secflavor_tokens, args)) { 796 switch (match_token(value, nfs_secflavor_tokens, args)) {
918 case Opt_sec_none: 797 case Opt_sec_none:
919 mnt->auth_flavor_len = 0;
920 mnt->auth_flavors[0] = RPC_AUTH_NULL; 798 mnt->auth_flavors[0] = RPC_AUTH_NULL;
921 break; 799 break;
922 case Opt_sec_sys: 800 case Opt_sec_sys:
923 mnt->auth_flavor_len = 0;
924 mnt->auth_flavors[0] = RPC_AUTH_UNIX; 801 mnt->auth_flavors[0] = RPC_AUTH_UNIX;
925 break; 802 break;
926 case Opt_sec_krb5: 803 case Opt_sec_krb5:
927 mnt->auth_flavor_len = 1;
928 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; 804 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5;
929 break; 805 break;
930 case Opt_sec_krb5i: 806 case Opt_sec_krb5i:
931 mnt->auth_flavor_len = 1;
932 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; 807 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I;
933 break; 808 break;
934 case Opt_sec_krb5p: 809 case Opt_sec_krb5p:
935 mnt->auth_flavor_len = 1;
936 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; 810 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P;
937 break; 811 break;
938 case Opt_sec_lkey: 812 case Opt_sec_lkey:
939 mnt->auth_flavor_len = 1;
940 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; 813 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY;
941 break; 814 break;
942 case Opt_sec_lkeyi: 815 case Opt_sec_lkeyi:
943 mnt->auth_flavor_len = 1;
944 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; 816 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI;
945 break; 817 break;
946 case Opt_sec_lkeyp: 818 case Opt_sec_lkeyp:
947 mnt->auth_flavor_len = 1;
948 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; 819 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP;
949 break; 820 break;
950 case Opt_sec_spkm: 821 case Opt_sec_spkm:
951 mnt->auth_flavor_len = 1;
952 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; 822 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM;
953 break; 823 break;
954 case Opt_sec_spkmi: 824 case Opt_sec_spkmi:
955 mnt->auth_flavor_len = 1;
956 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; 825 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI;
957 break; 826 break;
958 case Opt_sec_spkmp: 827 case Opt_sec_spkmp:
959 mnt->auth_flavor_len = 1;
960 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; 828 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP;
961 break; 829 break;
962 default: 830 default:
963 return 0; 831 return 0;
964 } 832 }
965 833
834 mnt->auth_flavor_len = 1;
966 return 1; 835 return 1;
967} 836}
968 837
@@ -1001,7 +870,6 @@ static int nfs_parse_mount_options(char *raw,
1001 while ((p = strsep(&raw, ",")) != NULL) { 870 while ((p = strsep(&raw, ",")) != NULL) {
1002 substring_t args[MAX_OPT_ARGS]; 871 substring_t args[MAX_OPT_ARGS];
1003 unsigned long option; 872 unsigned long option;
1004 int int_option;
1005 int token; 873 int token;
1006 874
1007 if (!*p) 875 if (!*p)
@@ -1273,11 +1141,16 @@ static int nfs_parse_mount_options(char *raw,
1273 } 1141 }
1274 break; 1142 break;
1275 case Opt_minorversion: 1143 case Opt_minorversion:
1276 if (match_int(args, &int_option)) 1144 string = match_strdup(args);
1277 return 0; 1145 if (string == NULL)
1278 if (int_option < 0 || int_option > NFS4_MAX_MINOR_VERSION) 1146 goto out_nomem;
1279 return 0; 1147 rc = strict_strtoul(string, 10, &option);
1280 mnt->minorversion = int_option; 1148 kfree(string);
1149 if (rc != 0)
1150 goto out_invalid_value;
1151 if (option > NFS4_MAX_MINOR_VERSION)
1152 goto out_invalid_value;
1153 mnt->minorversion = option;
1281 break; 1154 break;
1282 1155
1283 /* 1156 /*
@@ -1352,11 +1225,14 @@ static int nfs_parse_mount_options(char *raw,
1352 string = match_strdup(args); 1225 string = match_strdup(args);
1353 if (string == NULL) 1226 if (string == NULL)
1354 goto out_nomem; 1227 goto out_nomem;
1355 nfs_parse_ip_address(string, strlen(string), 1228 mnt->nfs_server.addrlen =
1356 (struct sockaddr *) 1229 rpc_pton(string, strlen(string),
1357 &mnt->nfs_server.address, 1230 (struct sockaddr *)
1358 &mnt->nfs_server.addrlen); 1231 &mnt->nfs_server.address,
1232 sizeof(mnt->nfs_server.address));
1359 kfree(string); 1233 kfree(string);
1234 if (mnt->nfs_server.addrlen == 0)
1235 goto out_invalid_address;
1360 break; 1236 break;
1361 case Opt_clientaddr: 1237 case Opt_clientaddr:
1362 string = match_strdup(args); 1238 string = match_strdup(args);
@@ -1376,11 +1252,14 @@ static int nfs_parse_mount_options(char *raw,
1376 string = match_strdup(args); 1252 string = match_strdup(args);
1377 if (string == NULL) 1253 if (string == NULL)
1378 goto out_nomem; 1254 goto out_nomem;
1379 nfs_parse_ip_address(string, strlen(string), 1255 mnt->mount_server.addrlen =
1380 (struct sockaddr *) 1256 rpc_pton(string, strlen(string),
1381 &mnt->mount_server.address, 1257 (struct sockaddr *)
1382 &mnt->mount_server.addrlen); 1258 &mnt->mount_server.address,
1259 sizeof(mnt->mount_server.address));
1383 kfree(string); 1260 kfree(string);
1261 if (mnt->mount_server.addrlen == 0)
1262 goto out_invalid_address;
1384 break; 1263 break;
1385 case Opt_lookupcache: 1264 case Opt_lookupcache:
1386 string = match_strdup(args); 1265 string = match_strdup(args);
@@ -1432,8 +1311,11 @@ static int nfs_parse_mount_options(char *raw,
1432 1311
1433 return 1; 1312 return 1;
1434 1313
1314out_invalid_address:
1315 printk(KERN_INFO "NFS: bad IP address specified: %s\n", p);
1316 return 0;
1435out_invalid_value: 1317out_invalid_value:
1436 printk(KERN_INFO "NFS: bad mount option value specified: %s \n", p); 1318 printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p);
1437 return 0; 1319 return 0;
1438out_nomem: 1320out_nomem:
1439 printk(KERN_INFO "NFS: not enough memory to parse option\n"); 1321 printk(KERN_INFO "NFS: not enough memory to parse option\n");
@@ -1445,13 +1327,50 @@ out_security_failure:
1445} 1327}
1446 1328
1447/* 1329/*
1330 * Match the requested auth flavors with the list returned by
1331 * the server. Returns zero and sets the mount's authentication
1332 * flavor on success; returns -EACCES if server does not support
1333 * the requested flavor.
1334 */
1335static int nfs_walk_authlist(struct nfs_parsed_mount_data *args,
1336 struct nfs_mount_request *request)
1337{
1338 unsigned int i, j, server_authlist_len = *(request->auth_flav_len);
1339
1340 /*
1341 * We avoid sophisticated negotiating here, as there are
1342 * plenty of cases where we can get it wrong, providing
1343 * either too little or too much security.
1344 *
1345 * RFC 2623, section 2.7 suggests we SHOULD prefer the
1346 * flavor listed first. However, some servers list
1347 * AUTH_NULL first. Our caller plants AUTH_SYS, the
1348 * preferred default, in args->auth_flavors[0] if user
1349 * didn't specify sec= mount option.
1350 */
1351 for (i = 0; i < args->auth_flavor_len; i++)
1352 for (j = 0; j < server_authlist_len; j++)
1353 if (args->auth_flavors[i] == request->auth_flavs[j]) {
1354 dfprintk(MOUNT, "NFS: using auth flavor %d\n",
1355 request->auth_flavs[j]);
1356 args->auth_flavors[0] = request->auth_flavs[j];
1357 return 0;
1358 }
1359
1360 dfprintk(MOUNT, "NFS: server does not support requested auth flavor\n");
1361 nfs_umount(request);
1362 return -EACCES;
1363}
1364
1365/*
1448 * Use the remote server's MOUNT service to request the NFS file handle 1366 * Use the remote server's MOUNT service to request the NFS file handle
1449 * corresponding to the provided path. 1367 * corresponding to the provided path.
1450 */ 1368 */
1451static int nfs_try_mount(struct nfs_parsed_mount_data *args, 1369static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1452 struct nfs_fh *root_fh) 1370 struct nfs_fh *root_fh)
1453{ 1371{
1454 unsigned int auth_flavor_len = 0; 1372 rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS];
1373 unsigned int server_authlist_len = ARRAY_SIZE(server_authlist);
1455 struct nfs_mount_request request = { 1374 struct nfs_mount_request request = {
1456 .sap = (struct sockaddr *) 1375 .sap = (struct sockaddr *)
1457 &args->mount_server.address, 1376 &args->mount_server.address,
@@ -1459,7 +1378,8 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1459 .protocol = args->mount_server.protocol, 1378 .protocol = args->mount_server.protocol,
1460 .fh = root_fh, 1379 .fh = root_fh,
1461 .noresvport = args->flags & NFS_MOUNT_NORESVPORT, 1380 .noresvport = args->flags & NFS_MOUNT_NORESVPORT,
1462 .auth_flav_len = &auth_flavor_len, 1381 .auth_flav_len = &server_authlist_len,
1382 .auth_flavs = server_authlist,
1463 }; 1383 };
1464 int status; 1384 int status;
1465 1385
@@ -1489,19 +1409,25 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1489 /* 1409 /*
1490 * autobind will be used if mount_server.port == 0 1410 * autobind will be used if mount_server.port == 0
1491 */ 1411 */
1492 nfs_set_port(request.sap, args->mount_server.port); 1412 rpc_set_port(request.sap, args->mount_server.port);
1493 1413
1494 /* 1414 /*
1495 * Now ask the mount server to map our export path 1415 * Now ask the mount server to map our export path
1496 * to a file handle. 1416 * to a file handle.
1497 */ 1417 */
1498 status = nfs_mount(&request); 1418 status = nfs_mount(&request);
1499 if (status == 0) 1419 if (status != 0) {
1500 return 0; 1420 dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n",
1421 request.hostname, status);
1422 return status;
1423 }
1501 1424
1502 dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n", 1425 /*
1503 request.hostname, status); 1426 * MNTv1 (NFSv2) does not support auth flavor negotiation.
1504 return status; 1427 */
1428 if (args->mount_server.version != NFS_MNT3_VERSION)
1429 return 0;
1430 return nfs_walk_authlist(args, &request);
1505} 1431}
1506 1432
1507static int nfs_parse_simple_hostname(const char *dev_name, 1433static int nfs_parse_simple_hostname(const char *dev_name,
@@ -1676,6 +1602,7 @@ static int nfs_validate_mount_data(void *options,
1676 args->nfs_server.port = 0; /* autobind unless user sets port */ 1602 args->nfs_server.port = 0; /* autobind unless user sets port */
1677 args->nfs_server.protocol = XPRT_TRANSPORT_TCP; 1603 args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
1678 args->auth_flavors[0] = RPC_AUTH_UNIX; 1604 args->auth_flavors[0] = RPC_AUTH_UNIX;
1605 args->auth_flavor_len = 1;
1679 1606
1680 switch (data->version) { 1607 switch (data->version) {
1681 case 1: 1608 case 1:
@@ -1776,7 +1703,7 @@ static int nfs_validate_mount_data(void *options,
1776 &args->nfs_server.address)) 1703 &args->nfs_server.address))
1777 goto out_no_address; 1704 goto out_no_address;
1778 1705
1779 nfs_set_port((struct sockaddr *)&args->nfs_server.address, 1706 rpc_set_port((struct sockaddr *)&args->nfs_server.address,
1780 args->nfs_server.port); 1707 args->nfs_server.port);
1781 1708
1782 nfs_set_mount_transport_protocol(args); 1709 nfs_set_mount_transport_protocol(args);
@@ -2339,7 +2266,7 @@ static int nfs4_validate_mount_data(void *options,
2339 args->acdirmax = NFS_DEF_ACDIRMAX; 2266 args->acdirmax = NFS_DEF_ACDIRMAX;
2340 args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */ 2267 args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */
2341 args->auth_flavors[0] = RPC_AUTH_UNIX; 2268 args->auth_flavors[0] = RPC_AUTH_UNIX;
2342 args->auth_flavor_len = 0; 2269 args->auth_flavor_len = 1;
2343 args->minorversion = 0; 2270 args->minorversion = 0;
2344 2271
2345 switch (data->version) { 2272 switch (data->version) {
@@ -2409,7 +2336,7 @@ static int nfs4_validate_mount_data(void *options,
2409 &args->nfs_server.address)) 2336 &args->nfs_server.address))
2410 return -EINVAL; 2337 return -EINVAL;
2411 2338
2412 nfs_set_port((struct sockaddr *)&args->nfs_server.address, 2339 rpc_set_port((struct sockaddr *)&args->nfs_server.address,
2413 args->nfs_server.port); 2340 args->nfs_server.port);
2414 2341
2415 nfs_validate_transport_protocol(args); 2342 nfs_validate_transport_protocol(args);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ce728829f79a..120acadc6a84 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -13,6 +13,7 @@
13#include <linux/file.h> 13#include <linux/file.h>
14#include <linux/writeback.h> 14#include <linux/writeback.h>
15#include <linux/swap.h> 15#include <linux/swap.h>
16#include <linux/migrate.h>
16 17
17#include <linux/sunrpc/clnt.h> 18#include <linux/sunrpc/clnt.h>
18#include <linux/nfs_fs.h> 19#include <linux/nfs_fs.h>
@@ -26,6 +27,7 @@
26#include "internal.h" 27#include "internal.h"
27#include "iostat.h" 28#include "iostat.h"
28#include "nfs4_fs.h" 29#include "nfs4_fs.h"
30#include "fscache.h"
29 31
30#define NFSDBG_FACILITY NFSDBG_PAGECACHE 32#define NFSDBG_FACILITY NFSDBG_PAGECACHE
31 33
@@ -87,17 +89,15 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
87 return p; 89 return p;
88} 90}
89 91
90static void nfs_writedata_free(struct nfs_write_data *p) 92void nfs_writedata_free(struct nfs_write_data *p)
91{ 93{
92 if (p && (p->pagevec != &p->page_array[0])) 94 if (p && (p->pagevec != &p->page_array[0]))
93 kfree(p->pagevec); 95 kfree(p->pagevec);
94 mempool_free(p, nfs_wdata_mempool); 96 mempool_free(p, nfs_wdata_mempool);
95} 97}
96 98
97void nfs_writedata_release(void *data) 99static void nfs_writedata_release(struct nfs_write_data *wdata)
98{ 100{
99 struct nfs_write_data *wdata = data;
100
101 put_nfs_open_context(wdata->args.context); 101 put_nfs_open_context(wdata->args.context);
102 nfs_writedata_free(wdata); 102 nfs_writedata_free(wdata);
103} 103}
@@ -202,8 +202,10 @@ static int nfs_set_page_writeback(struct page *page)
202 struct nfs_server *nfss = NFS_SERVER(inode); 202 struct nfs_server *nfss = NFS_SERVER(inode);
203 203
204 if (atomic_long_inc_return(&nfss->writeback) > 204 if (atomic_long_inc_return(&nfss->writeback) >
205 NFS_CONGESTION_ON_THRESH) 205 NFS_CONGESTION_ON_THRESH) {
206 set_bdi_congested(&nfss->backing_dev_info, WRITE); 206 set_bdi_congested(&nfss->backing_dev_info,
207 BLK_RW_ASYNC);
208 }
207 } 209 }
208 return ret; 210 return ret;
209} 211}
@@ -215,27 +217,20 @@ static void nfs_end_page_writeback(struct page *page)
215 217
216 end_page_writeback(page); 218 end_page_writeback(page);
217 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) 219 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
218 clear_bdi_congested(&nfss->backing_dev_info, WRITE); 220 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
219} 221}
220 222
221/* 223static struct nfs_page *nfs_find_and_lock_request(struct page *page)
222 * Find an associated nfs write request, and prepare to flush it out
223 * May return an error if the user signalled nfs_wait_on_request().
224 */
225static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
226 struct page *page)
227{ 224{
228 struct inode *inode = page->mapping->host; 225 struct inode *inode = page->mapping->host;
229 struct nfs_page *req; 226 struct nfs_page *req;
230 int ret; 227 int ret;
231 228
232 spin_lock(&inode->i_lock); 229 spin_lock(&inode->i_lock);
233 for(;;) { 230 for (;;) {
234 req = nfs_page_find_request_locked(page); 231 req = nfs_page_find_request_locked(page);
235 if (req == NULL) { 232 if (req == NULL)
236 spin_unlock(&inode->i_lock); 233 break;
237 return 0;
238 }
239 if (nfs_set_page_tag_locked(req)) 234 if (nfs_set_page_tag_locked(req))
240 break; 235 break;
241 /* Note: If we hold the page lock, as is the case in nfs_writepage, 236 /* Note: If we hold the page lock, as is the case in nfs_writepage,
@@ -247,23 +242,40 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
247 ret = nfs_wait_on_request(req); 242 ret = nfs_wait_on_request(req);
248 nfs_release_request(req); 243 nfs_release_request(req);
249 if (ret != 0) 244 if (ret != 0)
250 return ret; 245 return ERR_PTR(ret);
251 spin_lock(&inode->i_lock); 246 spin_lock(&inode->i_lock);
252 } 247 }
253 if (test_bit(PG_CLEAN, &req->wb_flags)) {
254 spin_unlock(&inode->i_lock);
255 BUG();
256 }
257 if (nfs_set_page_writeback(page) != 0) {
258 spin_unlock(&inode->i_lock);
259 BUG();
260 }
261 spin_unlock(&inode->i_lock); 248 spin_unlock(&inode->i_lock);
249 return req;
250}
251
252/*
253 * Find an associated nfs write request, and prepare to flush it out
254 * May return an error if the user signalled nfs_wait_on_request().
255 */
256static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
257 struct page *page)
258{
259 struct nfs_page *req;
260 int ret = 0;
261
262 req = nfs_find_and_lock_request(page);
263 if (!req)
264 goto out;
265 ret = PTR_ERR(req);
266 if (IS_ERR(req))
267 goto out;
268
269 ret = nfs_set_page_writeback(page);
270 BUG_ON(ret != 0);
271 BUG_ON(test_bit(PG_CLEAN, &req->wb_flags));
272
262 if (!nfs_pageio_add_request(pgio, req)) { 273 if (!nfs_pageio_add_request(pgio, req)) {
263 nfs_redirty_request(req); 274 nfs_redirty_request(req);
264 return pgio->pg_error; 275 ret = pgio->pg_error;
265 } 276 }
266 return 0; 277out:
278 return ret;
267} 279}
268 280
269static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) 281static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
@@ -1580,6 +1592,41 @@ int nfs_wb_page(struct inode *inode, struct page* page)
1580 return nfs_wb_page_priority(inode, page, FLUSH_STABLE); 1592 return nfs_wb_page_priority(inode, page, FLUSH_STABLE);
1581} 1593}
1582 1594
1595#ifdef CONFIG_MIGRATION
1596int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1597 struct page *page)
1598{
1599 struct nfs_page *req;
1600 int ret;
1601
1602 if (PageFsCache(page))
1603 nfs_fscache_release_page(page, GFP_KERNEL);
1604
1605 req = nfs_find_and_lock_request(page);
1606 ret = PTR_ERR(req);
1607 if (IS_ERR(req))
1608 goto out;
1609
1610 ret = migrate_page(mapping, newpage, page);
1611 if (!req)
1612 goto out;
1613 if (ret)
1614 goto out_unlock;
1615 page_cache_get(newpage);
1616 req->wb_page = newpage;
1617 SetPagePrivate(newpage);
1618 set_page_private(newpage, page_private(page));
1619 ClearPagePrivate(page);
1620 set_page_private(page, 0);
1621 page_cache_release(page);
1622out_unlock:
1623 nfs_clear_page_tag_locked(req);
1624 nfs_release_request(req);
1625out:
1626 return ret;
1627}
1628#endif
1629
1583int __init nfs_init_writepagecache(void) 1630int __init nfs_init_writepagecache(void)
1584{ 1631{
1585 nfs_wdata_cachep = kmem_cache_create("nfs_write_data", 1632 nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index b92a27629fb7..d9462643155c 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -85,6 +85,11 @@ static void expkey_request(struct cache_detail *cd,
85 (*bpp)[-1] = '\n'; 85 (*bpp)[-1] = '\n';
86} 86}
87 87
88static int expkey_upcall(struct cache_detail *cd, struct cache_head *h)
89{
90 return sunrpc_cache_pipe_upcall(cd, h, expkey_request);
91}
92
88static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old); 93static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old);
89static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *); 94static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *);
90static struct cache_detail svc_expkey_cache; 95static struct cache_detail svc_expkey_cache;
@@ -259,7 +264,7 @@ static struct cache_detail svc_expkey_cache = {
259 .hash_table = expkey_table, 264 .hash_table = expkey_table,
260 .name = "nfsd.fh", 265 .name = "nfsd.fh",
261 .cache_put = expkey_put, 266 .cache_put = expkey_put,
262 .cache_request = expkey_request, 267 .cache_upcall = expkey_upcall,
263 .cache_parse = expkey_parse, 268 .cache_parse = expkey_parse,
264 .cache_show = expkey_show, 269 .cache_show = expkey_show,
265 .match = expkey_match, 270 .match = expkey_match,
@@ -355,6 +360,11 @@ static void svc_export_request(struct cache_detail *cd,
355 (*bpp)[-1] = '\n'; 360 (*bpp)[-1] = '\n';
356} 361}
357 362
363static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)
364{
365 return sunrpc_cache_pipe_upcall(cd, h, svc_export_request);
366}
367
358static struct svc_export *svc_export_update(struct svc_export *new, 368static struct svc_export *svc_export_update(struct svc_export *new,
359 struct svc_export *old); 369 struct svc_export *old);
360static struct svc_export *svc_export_lookup(struct svc_export *); 370static struct svc_export *svc_export_lookup(struct svc_export *);
@@ -724,7 +734,7 @@ struct cache_detail svc_export_cache = {
724 .hash_table = export_table, 734 .hash_table = export_table,
725 .name = "nfsd.export", 735 .name = "nfsd.export",
726 .cache_put = svc_export_put, 736 .cache_put = svc_export_put,
727 .cache_request = svc_export_request, 737 .cache_upcall = svc_export_upcall,
728 .cache_parse = svc_export_parse, 738 .cache_parse = svc_export_parse,
729 .cache_show = svc_export_show, 739 .cache_show = svc_export_show,
730 .match = svc_export_match, 740 .match = svc_export_match,
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 5b398421b051..cdfa86fa1471 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -146,6 +146,12 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
146} 146}
147 147
148static int 148static int
149idtoname_upcall(struct cache_detail *cd, struct cache_head *ch)
150{
151 return sunrpc_cache_pipe_upcall(cd, ch, idtoname_request);
152}
153
154static int
149idtoname_match(struct cache_head *ca, struct cache_head *cb) 155idtoname_match(struct cache_head *ca, struct cache_head *cb)
150{ 156{
151 struct ent *a = container_of(ca, struct ent, h); 157 struct ent *a = container_of(ca, struct ent, h);
@@ -175,10 +181,10 @@ idtoname_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
175} 181}
176 182
177static void 183static void
178warn_no_idmapd(struct cache_detail *detail) 184warn_no_idmapd(struct cache_detail *detail, int has_died)
179{ 185{
180 printk("nfsd: nfsv4 idmapping failing: has idmapd %s?\n", 186 printk("nfsd: nfsv4 idmapping failing: has idmapd %s?\n",
181 detail->last_close? "died" : "not been started"); 187 has_died ? "died" : "not been started");
182} 188}
183 189
184 190
@@ -192,7 +198,7 @@ static struct cache_detail idtoname_cache = {
192 .hash_table = idtoname_table, 198 .hash_table = idtoname_table,
193 .name = "nfs4.idtoname", 199 .name = "nfs4.idtoname",
194 .cache_put = ent_put, 200 .cache_put = ent_put,
195 .cache_request = idtoname_request, 201 .cache_upcall = idtoname_upcall,
196 .cache_parse = idtoname_parse, 202 .cache_parse = idtoname_parse,
197 .cache_show = idtoname_show, 203 .cache_show = idtoname_show,
198 .warn_no_listener = warn_no_idmapd, 204 .warn_no_listener = warn_no_idmapd,
@@ -325,6 +331,12 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
325} 331}
326 332
327static int 333static int
334nametoid_upcall(struct cache_detail *cd, struct cache_head *ch)
335{
336 return sunrpc_cache_pipe_upcall(cd, ch, nametoid_request);
337}
338
339static int
328nametoid_match(struct cache_head *ca, struct cache_head *cb) 340nametoid_match(struct cache_head *ca, struct cache_head *cb)
329{ 341{
330 struct ent *a = container_of(ca, struct ent, h); 342 struct ent *a = container_of(ca, struct ent, h);
@@ -363,7 +375,7 @@ static struct cache_detail nametoid_cache = {
363 .hash_table = nametoid_table, 375 .hash_table = nametoid_table,
364 .name = "nfs4.nametoid", 376 .name = "nfs4.nametoid",
365 .cache_put = ent_put, 377 .cache_put = ent_put,
366 .cache_request = nametoid_request, 378 .cache_upcall = nametoid_upcall,
367 .cache_parse = nametoid_parse, 379 .cache_parse = nametoid_parse,
368 .cache_show = nametoid_show, 380 .cache_show = nametoid_show,
369 .warn_no_listener = warn_no_idmapd, 381 .warn_no_listener = warn_no_idmapd,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index b51e7ae8b570..b764d7d898e9 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -25,7 +25,6 @@
25#include <linux/init.h> 25#include <linux/init.h>
26#include <linux/inet.h> 26#include <linux/inet.h>
27#include <linux/string.h> 27#include <linux/string.h>
28#include <linux/smp_lock.h>
29#include <linux/ctype.h> 28#include <linux/ctype.h>
30 29
31#include <linux/nfs.h> 30#include <linux/nfs.h>
@@ -38,6 +37,7 @@
38#include <linux/nfsd/xdr.h> 37#include <linux/nfsd/xdr.h>
39#include <linux/nfsd/syscall.h> 38#include <linux/nfsd/syscall.h>
40#include <linux/lockd/lockd.h> 39#include <linux/lockd/lockd.h>
40#include <linux/sunrpc/clnt.h>
41 41
42#include <asm/uaccess.h> 42#include <asm/uaccess.h>
43#include <net/ipv6.h> 43#include <net/ipv6.h>
@@ -491,22 +491,18 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
491 * 491 *
492 * Input: 492 * Input:
493 * buf: '\n'-terminated C string containing a 493 * buf: '\n'-terminated C string containing a
494 * presentation format IPv4 address 494 * presentation format IP address
495 * size: length of C string in @buf 495 * size: length of C string in @buf
496 * Output: 496 * Output:
497 * On success: returns zero if all specified locks were released; 497 * On success: returns zero if all specified locks were released;
498 * returns one if one or more locks were not released 498 * returns one if one or more locks were not released
499 * On error: return code is negative errno value 499 * On error: return code is negative errno value
500 *
501 * Note: Only AF_INET client addresses are passed in
502 */ 500 */
503static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) 501static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
504{ 502{
505 struct sockaddr_in sin = { 503 struct sockaddr_storage address;
506 .sin_family = AF_INET, 504 struct sockaddr *sap = (struct sockaddr *)&address;
507 }; 505 size_t salen = sizeof(address);
508 int b1, b2, b3, b4;
509 char c;
510 char *fo_path; 506 char *fo_path;
511 507
512 /* sanity check */ 508 /* sanity check */
@@ -520,14 +516,10 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
520 if (qword_get(&buf, fo_path, size) < 0) 516 if (qword_get(&buf, fo_path, size) < 0)
521 return -EINVAL; 517 return -EINVAL;
522 518
523 /* get ipv4 address */ 519 if (rpc_pton(fo_path, size, sap, salen) == 0)
524 if (sscanf(fo_path, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4)
525 return -EINVAL;
526 if (b1 > 255 || b2 > 255 || b3 > 255 || b4 > 255)
527 return -EINVAL; 520 return -EINVAL;
528 sin.sin_addr.s_addr = htonl((b1 << 24) | (b2 << 16) | (b3 << 8) | b4);
529 521
530 return nlmsvc_unlock_all_by_ip((struct sockaddr *)&sin); 522 return nlmsvc_unlock_all_by_ip(sap);
531} 523}
532 524
533/** 525/**
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 5a280a9cb540..d68cd056b281 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -18,7 +18,6 @@
18#include <linux/unistd.h> 18#include <linux/unistd.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/smp.h> 20#include <linux/smp.h>
21#include <linux/smp_lock.h>
22#include <linux/freezer.h> 21#include <linux/freezer.h>
23#include <linux/fs_struct.h> 22#include <linux/fs_struct.h>
24#include <linux/kthread.h> 23#include <linux/kthread.h>
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 4145083dcf88..23341c1063bc 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -678,7 +678,6 @@ __be32
678nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, 678nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
679 int access, struct file **filp) 679 int access, struct file **filp)
680{ 680{
681 const struct cred *cred = current_cred();
682 struct dentry *dentry; 681 struct dentry *dentry;
683 struct inode *inode; 682 struct inode *inode;
684 int flags = O_RDONLY|O_LARGEFILE; 683 int flags = O_RDONLY|O_LARGEFILE;
@@ -733,7 +732,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
733 vfs_dq_init(inode); 732 vfs_dq_init(inode);
734 } 733 }
735 *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt), 734 *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
736 flags, cred); 735 flags, current_cred());
737 if (IS_ERR(*filp)) 736 if (IS_ERR(*filp))
738 host_err = PTR_ERR(*filp); 737 host_err = PTR_ERR(*filp);
739 else 738 else
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig
new file mode 100644
index 000000000000..72da095d4009
--- /dev/null
+++ b/fs/nilfs2/Kconfig
@@ -0,0 +1,25 @@
1config NILFS2_FS
2 tristate "NILFS2 file system support (EXPERIMENTAL)"
3 depends on BLOCK && EXPERIMENTAL
4 select CRC32
5 help
6 NILFS2 is a log-structured file system (LFS) supporting continuous
7 snapshotting. In addition to versioning capability of the entire
8 file system, users can even restore files mistakenly overwritten or
9 destroyed just a few seconds ago. Since this file system can keep
10 consistency like conventional LFS, it achieves quick recovery after
11 system crashes.
12
13 NILFS2 creates a number of checkpoints every few seconds or per
14 synchronous write basis (unless there is no change). Users can
15 select significant versions among continuously created checkpoints,
16 and can change them into snapshots which will be preserved for long
17 periods until they are changed back to checkpoints. Each
18 snapshot is mountable as a read-only file system concurrently with
19 its writable mount, and this feature is convenient for online backup.
20
21 Some features including atime, extended attributes, and POSIX ACLs,
22 are not supported yet.
23
24 To compile this file system support as a module, choose M here: the
25 module will be called nilfs2. If unsure, say N.
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 36df60b6d8a4..99d58a028b94 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -568,6 +568,7 @@ void nilfs_bmap_abort_update_v(struct nilfs_bmap *bmap,
568} 568}
569 569
570static struct lock_class_key nilfs_bmap_dat_lock_key; 570static struct lock_class_key nilfs_bmap_dat_lock_key;
571static struct lock_class_key nilfs_bmap_mdt_lock_key;
571 572
572/** 573/**
573 * nilfs_bmap_read - read a bmap from an inode 574 * nilfs_bmap_read - read a bmap from an inode
@@ -603,7 +604,11 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
603 bmap->b_ptr_type = NILFS_BMAP_PTR_VS; 604 bmap->b_ptr_type = NILFS_BMAP_PTR_VS;
604 bmap->b_last_allocated_key = 0; 605 bmap->b_last_allocated_key = 0;
605 bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR; 606 bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
607 lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key);
606 break; 608 break;
609 case NILFS_IFILE_INO:
610 lockdep_set_class(&bmap->b_sem, &nilfs_bmap_mdt_lock_key);
611 /* Fall through */
607 default: 612 default:
608 bmap->b_ptr_type = NILFS_BMAP_PTR_VM; 613 bmap->b_ptr_type = NILFS_BMAP_PTR_VM;
609 bmap->b_last_allocated_key = 0; 614 bmap->b_last_allocated_key = 0;
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 7d49813f66d6..aec942cf79e3 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -307,7 +307,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
307 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); 307 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
308 if (ret < 0) { 308 if (ret < 0) {
309 if (ret != -ENOENT) 309 if (ret != -ENOENT)
310 goto out_header; 310 break;
311 /* skip hole */ 311 /* skip hole */
312 ret = 0; 312 ret = 0;
313 continue; 313 continue;
@@ -340,7 +340,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
340 continue; 340 continue;
341 printk(KERN_ERR "%s: cannot delete block\n", 341 printk(KERN_ERR "%s: cannot delete block\n",
342 __func__); 342 __func__);
343 goto out_header; 343 break;
344 } 344 }
345 } 345 }
346 346
@@ -358,7 +358,6 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
358 kunmap_atomic(kaddr, KM_USER0); 358 kunmap_atomic(kaddr, KM_USER0);
359 } 359 }
360 360
361 out_header:
362 brelse(header_bh); 361 brelse(header_bh);
363 362
364 out_sem: 363 out_sem:
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 0b2710e2d565..8927ca27e6f7 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -134,15 +134,6 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
134 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, 134 entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
135 req->pr_entry_bh, kaddr); 135 req->pr_entry_bh, kaddr);
136 entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); 136 entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat));
137 if (entry->de_blocknr != cpu_to_le64(0) ||
138 entry->de_end != cpu_to_le64(NILFS_CNO_MAX)) {
139 printk(KERN_CRIT
140 "%s: vbn = %llu, start = %llu, end = %llu, pbn = %llu\n",
141 __func__, (unsigned long long)req->pr_entry_nr,
142 (unsigned long long)le64_to_cpu(entry->de_start),
143 (unsigned long long)le64_to_cpu(entry->de_end),
144 (unsigned long long)le64_to_cpu(entry->de_blocknr));
145 }
146 entry->de_blocknr = cpu_to_le64(blocknr); 137 entry->de_blocknr = cpu_to_le64(blocknr);
147 kunmap_atomic(kaddr, KM_USER0); 138 kunmap_atomic(kaddr, KM_USER0);
148 139
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 54100acc1102..1a4fa04cf071 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -43,7 +43,6 @@
43 */ 43 */
44 44
45#include <linux/pagemap.h> 45#include <linux/pagemap.h>
46#include <linux/smp_lock.h>
47#include "nilfs.h" 46#include "nilfs.h"
48#include "page.h" 47#include "page.h"
49 48
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 3d3ddb3f5177..2dfd47714ae5 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -412,8 +412,10 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
412 return 0; /* Do not request flush for shadow page cache */ 412 return 0; /* Do not request flush for shadow page cache */
413 if (!sb) { 413 if (!sb) {
414 writer = nilfs_get_writer(NILFS_MDT(inode)->mi_nilfs); 414 writer = nilfs_get_writer(NILFS_MDT(inode)->mi_nilfs);
415 if (!writer) 415 if (!writer) {
416 nilfs_put_writer(NILFS_MDT(inode)->mi_nilfs);
416 return -EROFS; 417 return -EROFS;
418 }
417 sb = writer->s_super; 419 sb = writer->s_super;
418 } 420 }
419 421
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index aa977549919e..51ff3d0a4ee2 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1829,26 +1829,13 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci,
1829 err = nilfs_segbuf_write(segbuf, &wi); 1829 err = nilfs_segbuf_write(segbuf, &wi);
1830 1830
1831 res = nilfs_segbuf_wait(segbuf, &wi); 1831 res = nilfs_segbuf_wait(segbuf, &wi);
1832 err = unlikely(err) ? : res; 1832 err = err ? : res;
1833 if (unlikely(err)) 1833 if (err)
1834 return err; 1834 return err;
1835 } 1835 }
1836 return 0; 1836 return 0;
1837} 1837}
1838 1838
1839static int nilfs_page_has_uncleared_buffer(struct page *page)
1840{
1841 struct buffer_head *head, *bh;
1842
1843 head = bh = page_buffers(page);
1844 do {
1845 if (buffer_dirty(bh) && !list_empty(&bh->b_assoc_buffers))
1846 return 1;
1847 bh = bh->b_this_page;
1848 } while (bh != head);
1849 return 0;
1850}
1851
1852static void __nilfs_end_page_io(struct page *page, int err) 1839static void __nilfs_end_page_io(struct page *page, int err)
1853{ 1840{
1854 if (!err) { 1841 if (!err) {
@@ -1872,13 +1859,26 @@ static void nilfs_end_page_io(struct page *page, int err)
1872 if (!page) 1859 if (!page)
1873 return; 1860 return;
1874 1861
1875 if (buffer_nilfs_node(page_buffers(page)) && 1862 if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) {
1876 nilfs_page_has_uncleared_buffer(page)) 1863 /*
1877 /* For b-tree node pages, this function may be called twice 1864 * For b-tree node pages, this function may be called twice
1878 or more because they might be split in a segment. 1865 * or more because they might be split in a segment.
1879 This check assures that cleanup has been done for all 1866 */
1880 buffers in a split btnode page. */ 1867 if (PageDirty(page)) {
1868 /*
1869 * For pages holding split b-tree node buffers, dirty
1870 * flag on the buffers may be cleared discretely.
1871 * In that case, the page is once redirtied for
1872 * remaining buffers, and it must be cancelled if
1873 * all the buffers get cleaned later.
1874 */
1875 lock_page(page);
1876 if (nilfs_page_buffers_clean(page))
1877 __nilfs_clear_page_dirty(page);
1878 unlock_page(page);
1879 }
1881 return; 1880 return;
1881 }
1882 1882
1883 __nilfs_end_page_io(page, err); 1883 __nilfs_end_page_io(page, err);
1884} 1884}
@@ -1940,7 +1940,7 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci,
1940 } 1940 }
1941 if (bh->b_page != fs_page) { 1941 if (bh->b_page != fs_page) {
1942 nilfs_end_page_io(fs_page, err); 1942 nilfs_end_page_io(fs_page, err);
1943 if (unlikely(fs_page == failed_page)) 1943 if (fs_page && fs_page == failed_page)
1944 goto done; 1944 goto done;
1945 fs_page = bh->b_page; 1945 fs_page = bh->b_page;
1946 } 1946 }
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index 31dac7e3b0f1..dffbb0911d02 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -1,15 +1,5 @@
1config FSNOTIFY 1config FSNOTIFY
2 bool "Filesystem notification backend" 2 def_bool n
3 default y
4 ---help---
5 fsnotify is a backend for filesystem notification. fsnotify does
6 not provide any userspace interface but does provide the basis
7 needed for other notification schemes such as dnotify, inotify,
8 and fanotify.
9
10 Say Y here to enable fsnotify suport.
11
12 If unsure, say Y.
13 3
14source "fs/notify/dnotify/Kconfig" 4source "fs/notify/dnotify/Kconfig"
15source "fs/notify/inotify/Kconfig" 5source "fs/notify/inotify/Kconfig"
diff --git a/fs/notify/dnotify/Kconfig b/fs/notify/dnotify/Kconfig
index 904ff8d5405a..f9c1ca139d8f 100644
--- a/fs/notify/dnotify/Kconfig
+++ b/fs/notify/dnotify/Kconfig
@@ -1,6 +1,6 @@
1config DNOTIFY 1config DNOTIFY
2 bool "Dnotify support" 2 bool "Dnotify support"
3 depends on FSNOTIFY 3 select FSNOTIFY
4 default y 4 default y
5 help 5 help
6 Dnotify is a directory-based per-fd file change notification system 6 Dnotify is a directory-based per-fd file change notification system
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index ec2f7bd76818..037e878e03fc 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -159,7 +159,9 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const
159 if (!group->ops->should_send_event(group, to_tell, mask)) 159 if (!group->ops->should_send_event(group, to_tell, mask))
160 continue; 160 continue;
161 if (!event) { 161 if (!event) {
162 event = fsnotify_create_event(to_tell, mask, data, data_is, file_name, cookie); 162 event = fsnotify_create_event(to_tell, mask, data,
163 data_is, file_name, cookie,
164 GFP_KERNEL);
163 /* shit, we OOM'd and now we can't tell, maybe 165 /* shit, we OOM'd and now we can't tell, maybe
164 * someday someone else will want to do something 166 * someday someone else will want to do something
165 * here */ 167 * here */
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index 5356884289a1..3e56dbffe729 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -15,7 +15,7 @@ config INOTIFY
15 15
16config INOTIFY_USER 16config INOTIFY_USER
17 bool "Inotify support for userspace" 17 bool "Inotify support for userspace"
18 depends on FSNOTIFY 18 select FSNOTIFY
19 default y 19 default y
20 ---help--- 20 ---help---
21 Say Y here to enable inotify support for userspace, including the 21 Say Y here to enable inotify support for userspace, including the
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index ff231ad23895..f30d9bbc2e1b 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -57,7 +57,6 @@ int inotify_max_user_watches __read_mostly;
57 57
58static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; 58static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
59struct kmem_cache *event_priv_cachep __read_mostly; 59struct kmem_cache *event_priv_cachep __read_mostly;
60static struct fsnotify_event *inotify_ignored_event;
61 60
62/* 61/*
63 * When inotify registers a new group it increments this and uses that 62 * When inotify registers a new group it increments this and uses that
@@ -296,12 +295,15 @@ static int inotify_fasync(int fd, struct file *file, int on)
296static int inotify_release(struct inode *ignored, struct file *file) 295static int inotify_release(struct inode *ignored, struct file *file)
297{ 296{
298 struct fsnotify_group *group = file->private_data; 297 struct fsnotify_group *group = file->private_data;
298 struct user_struct *user = group->inotify_data.user;
299 299
300 fsnotify_clear_marks_by_group(group); 300 fsnotify_clear_marks_by_group(group);
301 301
302 /* free this group, matching get was inotify_init->fsnotify_obtain_group */ 302 /* free this group, matching get was inotify_init->fsnotify_obtain_group */
303 fsnotify_put_group(group); 303 fsnotify_put_group(group);
304 304
305 atomic_dec(&user->inotify_devs);
306
305 return 0; 307 return 0;
306} 308}
307 309
@@ -362,6 +364,17 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
362 return error; 364 return error;
363} 365}
364 366
367static void inotify_remove_from_idr(struct fsnotify_group *group,
368 struct inotify_inode_mark_entry *ientry)
369{
370 struct idr *idr;
371
372 spin_lock(&group->inotify_data.idr_lock);
373 idr = &group->inotify_data.idr;
374 idr_remove(idr, ientry->wd);
375 spin_unlock(&group->inotify_data.idr_lock);
376 ientry->wd = -1;
377}
365/* 378/*
366 * Send IN_IGNORED for this wd, remove this wd from the idr, and drop the 379 * Send IN_IGNORED for this wd, remove this wd from the idr, and drop the
367 * internal reference help on the mark because it is in the idr. 380 * internal reference help on the mark because it is in the idr.
@@ -370,13 +383,19 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
370 struct fsnotify_group *group) 383 struct fsnotify_group *group)
371{ 384{
372 struct inotify_inode_mark_entry *ientry; 385 struct inotify_inode_mark_entry *ientry;
386 struct fsnotify_event *ignored_event;
373 struct inotify_event_private_data *event_priv; 387 struct inotify_event_private_data *event_priv;
374 struct fsnotify_event_private_data *fsn_event_priv; 388 struct fsnotify_event_private_data *fsn_event_priv;
375 struct idr *idr; 389
390 ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL,
391 FSNOTIFY_EVENT_NONE, NULL, 0,
392 GFP_NOFS);
393 if (!ignored_event)
394 return;
376 395
377 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 396 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
378 397
379 event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL); 398 event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS);
380 if (unlikely(!event_priv)) 399 if (unlikely(!event_priv))
381 goto skip_send_ignore; 400 goto skip_send_ignore;
382 401
@@ -385,7 +404,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
385 fsn_event_priv->group = group; 404 fsn_event_priv->group = group;
386 event_priv->wd = ientry->wd; 405 event_priv->wd = ientry->wd;
387 406
388 fsnotify_add_notify_event(group, inotify_ignored_event, fsn_event_priv); 407 fsnotify_add_notify_event(group, ignored_event, fsn_event_priv);
389 408
390 /* did the private data get added? */ 409 /* did the private data get added? */
391 if (list_empty(&fsn_event_priv->event_list)) 410 if (list_empty(&fsn_event_priv->event_list))
@@ -393,14 +412,16 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
393 412
394skip_send_ignore: 413skip_send_ignore:
395 414
415 /* matches the reference taken when the event was created */
416 fsnotify_put_event(ignored_event);
417
396 /* remove this entry from the idr */ 418 /* remove this entry from the idr */
397 spin_lock(&group->inotify_data.idr_lock); 419 inotify_remove_from_idr(group, ientry);
398 idr = &group->inotify_data.idr;
399 idr_remove(idr, ientry->wd);
400 spin_unlock(&group->inotify_data.idr_lock);
401 420
402 /* removed from idr, drop that reference */ 421 /* removed from idr, drop that reference */
403 fsnotify_put_mark(entry); 422 fsnotify_put_mark(entry);
423
424 atomic_dec(&group->inotify_data.user->inotify_watches);
404} 425}
405 426
406/* ding dong the mark is dead */ 427/* ding dong the mark is dead */
@@ -415,6 +436,7 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod
415{ 436{
416 struct fsnotify_mark_entry *entry = NULL; 437 struct fsnotify_mark_entry *entry = NULL;
417 struct inotify_inode_mark_entry *ientry; 438 struct inotify_inode_mark_entry *ientry;
439 struct inotify_inode_mark_entry *tmp_ientry;
418 int ret = 0; 440 int ret = 0;
419 int add = (arg & IN_MASK_ADD); 441 int add = (arg & IN_MASK_ADD);
420 __u32 mask; 442 __u32 mask;
@@ -425,54 +447,66 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod
425 if (unlikely(!mask)) 447 if (unlikely(!mask))
426 return -EINVAL; 448 return -EINVAL;
427 449
428 ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); 450 tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
429 if (unlikely(!ientry)) 451 if (unlikely(!tmp_ientry))
430 return -ENOMEM; 452 return -ENOMEM;
431 /* we set the mask at the end after attaching it */ 453 /* we set the mask at the end after attaching it */
432 fsnotify_init_mark(&ientry->fsn_entry, inotify_free_mark); 454 fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark);
433 ientry->wd = 0; 455 tmp_ientry->wd = -1;
434 456
435find_entry: 457find_entry:
436 spin_lock(&inode->i_lock); 458 spin_lock(&inode->i_lock);
437 entry = fsnotify_find_mark_entry(group, inode); 459 entry = fsnotify_find_mark_entry(group, inode);
438 spin_unlock(&inode->i_lock); 460 spin_unlock(&inode->i_lock);
439 if (entry) { 461 if (entry) {
440 kmem_cache_free(inotify_inode_mark_cachep, ientry);
441 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 462 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
442 } else { 463 } else {
443 if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) { 464 ret = -ENOSPC;
444 ret = -ENOSPC; 465 if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
445 goto out_err; 466 goto out_err;
446 }
447
448 ret = fsnotify_add_mark(&ientry->fsn_entry, group, inode);
449 if (ret == -EEXIST)
450 goto find_entry;
451 else if (ret)
452 goto out_err;
453
454 entry = &ientry->fsn_entry;
455retry: 467retry:
456 ret = -ENOMEM; 468 ret = -ENOMEM;
457 if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) 469 if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
458 goto out_err; 470 goto out_err;
459 471
460 spin_lock(&group->inotify_data.idr_lock); 472 spin_lock(&group->inotify_data.idr_lock);
461 /* if entry is added to the idr we keep the reference obtained 473 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
462 * through fsnotify_mark_add. remember to drop this reference 474 group->inotify_data.last_wd,
463 * when entry is removed from idr */ 475 &tmp_ientry->wd);
464 ret = idr_get_new_above(&group->inotify_data.idr, entry,
465 ++group->inotify_data.last_wd,
466 &ientry->wd);
467 spin_unlock(&group->inotify_data.idr_lock); 476 spin_unlock(&group->inotify_data.idr_lock);
468 if (ret) { 477 if (ret) {
469 if (ret == -EAGAIN) 478 if (ret == -EAGAIN)
470 goto retry; 479 goto retry;
471 goto out_err; 480 goto out_err;
472 } 481 }
482
483 ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode);
484 if (ret) {
485 inotify_remove_from_idr(group, tmp_ientry);
486 if (ret == -EEXIST)
487 goto find_entry;
488 goto out_err;
489 }
490
491 /* tmp_ientry has been added to the inode, so we are all set up.
492 * now we just need to make sure tmp_ientry doesn't get freed and
493 * we need to set up entry and ientry so the generic code can
494 * do its thing. */
495 ientry = tmp_ientry;
496 entry = &ientry->fsn_entry;
497 tmp_ientry = NULL;
498
473 atomic_inc(&group->inotify_data.user->inotify_watches); 499 atomic_inc(&group->inotify_data.user->inotify_watches);
500
501 /* update the idr hint */
502 group->inotify_data.last_wd = ientry->wd;
503
504 /* we put the mark on the idr, take a reference */
505 fsnotify_get_mark(entry);
474 } 506 }
475 507
508 ret = ientry->wd;
509
476 spin_lock(&entry->lock); 510 spin_lock(&entry->lock);
477 511
478 old_mask = entry->mask; 512 old_mask = entry->mask;
@@ -503,14 +537,19 @@ retry:
503 fsnotify_recalc_group_mask(group); 537 fsnotify_recalc_group_mask(group);
504 } 538 }
505 539
506 return ientry->wd; 540 /* this either matches fsnotify_find_mark_entry, or init_mark_entry
541 * depending on which path we took... */
542 fsnotify_put_mark(entry);
507 543
508out_err: 544out_err:
509 /* see this isn't supposed to happen, just kill the watch */ 545 /* could be an error, could be that we found an existing mark */
510 if (entry) { 546 if (tmp_ientry) {
511 fsnotify_destroy_mark_by_entry(entry); 547 /* on the idr but didn't make it on the inode */
512 fsnotify_put_mark(entry); 548 if (tmp_ientry->wd != -1)
549 inotify_remove_from_idr(group, tmp_ientry);
550 kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry);
513 } 551 }
552
514 return ret; 553 return ret;
515} 554}
516 555
@@ -718,9 +757,6 @@ static int __init inotify_user_setup(void)
718 757
719 inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC); 758 inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC);
720 event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC); 759 event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
721 inotify_ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, FSNOTIFY_EVENT_NONE, NULL, 0);
722 if (!inotify_ignored_event)
723 panic("unable to allocate the inotify ignored event\n");
724 760
725 inotify_max_queued_events = 16384; 761 inotify_max_queued_events = 16384;
726 inotify_max_user_instances = 128; 762 inotify_max_user_instances = 128;
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 959b73e756fd..521368574e97 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -136,18 +136,24 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new
136{ 136{
137 if ((old->mask == new->mask) && 137 if ((old->mask == new->mask) &&
138 (old->to_tell == new->to_tell) && 138 (old->to_tell == new->to_tell) &&
139 (old->data_type == new->data_type)) { 139 (old->data_type == new->data_type) &&
140 (old->name_len == new->name_len)) {
140 switch (old->data_type) { 141 switch (old->data_type) {
141 case (FSNOTIFY_EVENT_INODE): 142 case (FSNOTIFY_EVENT_INODE):
142 if (old->inode == new->inode) 143 /* remember, after old was put on the wait_q we aren't
144 * allowed to look at the inode any more, only thing
145 * left to check was if the file_name is the same */
146 if (old->name_len &&
147 !strcmp(old->file_name, new->file_name))
143 return true; 148 return true;
144 break; 149 break;
145 case (FSNOTIFY_EVENT_PATH): 150 case (FSNOTIFY_EVENT_PATH):
146 if ((old->path.mnt == new->path.mnt) && 151 if ((old->path.mnt == new->path.mnt) &&
147 (old->path.dentry == new->path.dentry)) 152 (old->path.dentry == new->path.dentry))
148 return true; 153 return true;
154 break;
149 case (FSNOTIFY_EVENT_NONE): 155 case (FSNOTIFY_EVENT_NONE):
150 return true; 156 return false;
151 }; 157 };
152 } 158 }
153 return false; 159 return false;
@@ -339,18 +345,19 @@ static void initialize_event(struct fsnotify_event *event)
339 * @name the filename, if available 345 * @name the filename, if available
340 */ 346 */
341struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data, 347struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data,
342 int data_type, const char *name, u32 cookie) 348 int data_type, const char *name, u32 cookie,
349 gfp_t gfp)
343{ 350{
344 struct fsnotify_event *event; 351 struct fsnotify_event *event;
345 352
346 event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL); 353 event = kmem_cache_alloc(fsnotify_event_cachep, gfp);
347 if (!event) 354 if (!event)
348 return NULL; 355 return NULL;
349 356
350 initialize_event(event); 357 initialize_event(event);
351 358
352 if (name) { 359 if (name) {
353 event->file_name = kstrdup(name, GFP_KERNEL); 360 event->file_name = kstrdup(name, gfp);
354 if (!event->file_name) { 361 if (!event->file_name) {
355 kmem_cache_free(fsnotify_event_cachep, event); 362 kmem_cache_free(fsnotify_event_cachep, event);
356 return NULL; 363 return NULL;
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9edcde4974aa..f9a3e8942669 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1914,7 +1914,8 @@ static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec,
1914 * immediately to their right. 1914 * immediately to their right.
1915 */ 1915 */
1916 left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); 1916 left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos);
1917 if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) { 1917 if (!ocfs2_rec_clusters(right_child_el, &right_child_el->l_recs[0])) {
1918 BUG_ON(right_child_el->l_tree_depth);
1918 BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1); 1919 BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1);
1919 left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos); 1920 left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos);
1920 } 1921 }
@@ -2476,15 +2477,37 @@ out_ret_path:
2476 return ret; 2477 return ret;
2477} 2478}
2478 2479
2479static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, 2480static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
2480 struct ocfs2_path *path) 2481 int subtree_index, struct ocfs2_path *path)
2481{ 2482{
2482 int i, idx; 2483 int i, idx, ret;
2483 struct ocfs2_extent_rec *rec; 2484 struct ocfs2_extent_rec *rec;
2484 struct ocfs2_extent_list *el; 2485 struct ocfs2_extent_list *el;
2485 struct ocfs2_extent_block *eb; 2486 struct ocfs2_extent_block *eb;
2486 u32 range; 2487 u32 range;
2487 2488
2489 /*
2490 * In normal tree rotation process, we will never touch the
2491 * tree branch above subtree_index and ocfs2_extend_rotate_transaction
2492 * doesn't reserve the credits for them either.
2493 *
2494 * But we do have a special case here which will update the rightmost
2495 * records for all the bh in the path.
2496 * So we have to allocate extra credits and access them.
2497 */
2498 ret = ocfs2_extend_trans(handle,
2499 handle->h_buffer_credits + subtree_index);
2500 if (ret) {
2501 mlog_errno(ret);
2502 goto out;
2503 }
2504
2505 ret = ocfs2_journal_access_path(inode, handle, path);
2506 if (ret) {
2507 mlog_errno(ret);
2508 goto out;
2509 }
2510
2488 /* Path should always be rightmost. */ 2511 /* Path should always be rightmost. */
2489 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; 2512 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
2490 BUG_ON(eb->h_next_leaf_blk != 0ULL); 2513 BUG_ON(eb->h_next_leaf_blk != 0ULL);
@@ -2505,6 +2528,8 @@ static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
2505 2528
2506 ocfs2_journal_dirty(handle, path->p_node[i].bh); 2529 ocfs2_journal_dirty(handle, path->p_node[i].bh);
2507 } 2530 }
2531out:
2532 return ret;
2508} 2533}
2509 2534
2510static void ocfs2_unlink_path(struct inode *inode, handle_t *handle, 2535static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
@@ -2717,7 +2742,12 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2717 if (del_right_subtree) { 2742 if (del_right_subtree) {
2718 ocfs2_unlink_subtree(inode, handle, left_path, right_path, 2743 ocfs2_unlink_subtree(inode, handle, left_path, right_path,
2719 subtree_index, dealloc); 2744 subtree_index, dealloc);
2720 ocfs2_update_edge_lengths(inode, handle, left_path); 2745 ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
2746 left_path);
2747 if (ret) {
2748 mlog_errno(ret);
2749 goto out;
2750 }
2721 2751
2722 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 2752 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
2723 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); 2753 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
@@ -3034,7 +3064,12 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
3034 3064
3035 ocfs2_unlink_subtree(inode, handle, left_path, path, 3065 ocfs2_unlink_subtree(inode, handle, left_path, path,
3036 subtree_index, dealloc); 3066 subtree_index, dealloc);
3037 ocfs2_update_edge_lengths(inode, handle, left_path); 3067 ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
3068 left_path);
3069 if (ret) {
3070 mlog_errno(ret);
3071 goto out;
3072 }
3038 3073
3039 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 3074 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
3040 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); 3075 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index b2c52b3a1484..b401654011a2 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -193,6 +193,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
193 (unsigned long long)OCFS2_I(inode)->ip_blkno); 193 (unsigned long long)OCFS2_I(inode)->ip_blkno);
194 mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters); 194 mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters);
195 dump_stack(); 195 dump_stack();
196 goto bail;
196 } 197 }
197 198
198 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); 199 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
@@ -894,18 +895,17 @@ struct ocfs2_write_cluster_desc {
894 */ 895 */
895 unsigned c_new; 896 unsigned c_new;
896 unsigned c_unwritten; 897 unsigned c_unwritten;
898 unsigned c_needs_zero;
897}; 899};
898 900
899static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d)
900{
901 return d->c_new || d->c_unwritten;
902}
903
904struct ocfs2_write_ctxt { 901struct ocfs2_write_ctxt {
905 /* Logical cluster position / len of write */ 902 /* Logical cluster position / len of write */
906 u32 w_cpos; 903 u32 w_cpos;
907 u32 w_clen; 904 u32 w_clen;
908 905
906 /* First cluster allocated in a nonsparse extend */
907 u32 w_first_new_cpos;
908
909 struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE]; 909 struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE];
910 910
911 /* 911 /*
@@ -983,6 +983,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
983 return -ENOMEM; 983 return -ENOMEM;
984 984
985 wc->w_cpos = pos >> osb->s_clustersize_bits; 985 wc->w_cpos = pos >> osb->s_clustersize_bits;
986 wc->w_first_new_cpos = UINT_MAX;
986 cend = (pos + len - 1) >> osb->s_clustersize_bits; 987 cend = (pos + len - 1) >> osb->s_clustersize_bits;
987 wc->w_clen = cend - wc->w_cpos + 1; 988 wc->w_clen = cend - wc->w_cpos + 1;
988 get_bh(di_bh); 989 get_bh(di_bh);
@@ -1217,20 +1218,18 @@ out:
1217 */ 1218 */
1218static int ocfs2_write_cluster(struct address_space *mapping, 1219static int ocfs2_write_cluster(struct address_space *mapping,
1219 u32 phys, unsigned int unwritten, 1220 u32 phys, unsigned int unwritten,
1221 unsigned int should_zero,
1220 struct ocfs2_alloc_context *data_ac, 1222 struct ocfs2_alloc_context *data_ac,
1221 struct ocfs2_alloc_context *meta_ac, 1223 struct ocfs2_alloc_context *meta_ac,
1222 struct ocfs2_write_ctxt *wc, u32 cpos, 1224 struct ocfs2_write_ctxt *wc, u32 cpos,
1223 loff_t user_pos, unsigned user_len) 1225 loff_t user_pos, unsigned user_len)
1224{ 1226{
1225 int ret, i, new, should_zero = 0; 1227 int ret, i, new;
1226 u64 v_blkno, p_blkno; 1228 u64 v_blkno, p_blkno;
1227 struct inode *inode = mapping->host; 1229 struct inode *inode = mapping->host;
1228 struct ocfs2_extent_tree et; 1230 struct ocfs2_extent_tree et;
1229 1231
1230 new = phys == 0 ? 1 : 0; 1232 new = phys == 0 ? 1 : 0;
1231 if (new || unwritten)
1232 should_zero = 1;
1233
1234 if (new) { 1233 if (new) {
1235 u32 tmp_pos; 1234 u32 tmp_pos;
1236 1235
@@ -1301,7 +1300,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
1301 if (tmpret) { 1300 if (tmpret) {
1302 mlog_errno(tmpret); 1301 mlog_errno(tmpret);
1303 if (ret == 0) 1302 if (ret == 0)
1304 tmpret = ret; 1303 ret = tmpret;
1305 } 1304 }
1306 } 1305 }
1307 1306
@@ -1341,7 +1340,9 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
1341 local_len = osb->s_clustersize - cluster_off; 1340 local_len = osb->s_clustersize - cluster_off;
1342 1341
1343 ret = ocfs2_write_cluster(mapping, desc->c_phys, 1342 ret = ocfs2_write_cluster(mapping, desc->c_phys,
1344 desc->c_unwritten, data_ac, meta_ac, 1343 desc->c_unwritten,
1344 desc->c_needs_zero,
1345 data_ac, meta_ac,
1345 wc, desc->c_cpos, pos, local_len); 1346 wc, desc->c_cpos, pos, local_len);
1346 if (ret) { 1347 if (ret) {
1347 mlog_errno(ret); 1348 mlog_errno(ret);
@@ -1391,14 +1392,14 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
1391 * newly allocated cluster. 1392 * newly allocated cluster.
1392 */ 1393 */
1393 desc = &wc->w_desc[0]; 1394 desc = &wc->w_desc[0];
1394 if (ocfs2_should_zero_cluster(desc)) 1395 if (desc->c_needs_zero)
1395 ocfs2_figure_cluster_boundaries(osb, 1396 ocfs2_figure_cluster_boundaries(osb,
1396 desc->c_cpos, 1397 desc->c_cpos,
1397 &wc->w_target_from, 1398 &wc->w_target_from,
1398 NULL); 1399 NULL);
1399 1400
1400 desc = &wc->w_desc[wc->w_clen - 1]; 1401 desc = &wc->w_desc[wc->w_clen - 1];
1401 if (ocfs2_should_zero_cluster(desc)) 1402 if (desc->c_needs_zero)
1402 ocfs2_figure_cluster_boundaries(osb, 1403 ocfs2_figure_cluster_boundaries(osb,
1403 desc->c_cpos, 1404 desc->c_cpos,
1404 NULL, 1405 NULL,
@@ -1466,13 +1467,28 @@ static int ocfs2_populate_write_desc(struct inode *inode,
1466 phys++; 1467 phys++;
1467 } 1468 }
1468 1469
1470 /*
1471 * If w_first_new_cpos is < UINT_MAX, we have a non-sparse
1472 * file that got extended. w_first_new_cpos tells us
1473 * where the newly allocated clusters are so we can
1474 * zero them.
1475 */
1476 if (desc->c_cpos >= wc->w_first_new_cpos) {
1477 BUG_ON(phys == 0);
1478 desc->c_needs_zero = 1;
1479 }
1480
1469 desc->c_phys = phys; 1481 desc->c_phys = phys;
1470 if (phys == 0) { 1482 if (phys == 0) {
1471 desc->c_new = 1; 1483 desc->c_new = 1;
1484 desc->c_needs_zero = 1;
1472 *clusters_to_alloc = *clusters_to_alloc + 1; 1485 *clusters_to_alloc = *clusters_to_alloc + 1;
1473 } 1486 }
1474 if (ext_flags & OCFS2_EXT_UNWRITTEN) 1487
1488 if (ext_flags & OCFS2_EXT_UNWRITTEN) {
1475 desc->c_unwritten = 1; 1489 desc->c_unwritten = 1;
1490 desc->c_needs_zero = 1;
1491 }
1476 1492
1477 num_clusters--; 1493 num_clusters--;
1478 } 1494 }
@@ -1632,10 +1648,13 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
1632 if (newsize <= i_size_read(inode)) 1648 if (newsize <= i_size_read(inode))
1633 return 0; 1649 return 0;
1634 1650
1635 ret = ocfs2_extend_no_holes(inode, newsize, newsize - len); 1651 ret = ocfs2_extend_no_holes(inode, newsize, pos);
1636 if (ret) 1652 if (ret)
1637 mlog_errno(ret); 1653 mlog_errno(ret);
1638 1654
1655 wc->w_first_new_cpos =
1656 ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
1657
1639 return ret; 1658 return ret;
1640} 1659}
1641 1660
@@ -1644,7 +1663,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1644 struct page **pagep, void **fsdata, 1663 struct page **pagep, void **fsdata,
1645 struct buffer_head *di_bh, struct page *mmap_page) 1664 struct buffer_head *di_bh, struct page *mmap_page)
1646{ 1665{
1647 int ret, credits = OCFS2_INODE_UPDATE_CREDITS; 1666 int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
1648 unsigned int clusters_to_alloc, extents_to_split; 1667 unsigned int clusters_to_alloc, extents_to_split;
1649 struct ocfs2_write_ctxt *wc; 1668 struct ocfs2_write_ctxt *wc;
1650 struct inode *inode = mapping->host; 1669 struct inode *inode = mapping->host;
@@ -1722,8 +1741,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1722 1741
1723 } 1742 }
1724 1743
1725 ocfs2_set_target_boundaries(osb, wc, pos, len, 1744 /*
1726 clusters_to_alloc + extents_to_split); 1745 * We have to zero sparse allocated clusters, unwritten extent clusters,
1746 * and non-sparse clusters we just extended. For non-sparse writes,
1747 * we know zeros will only be needed in the first and/or last cluster.
1748 */
1749 if (clusters_to_alloc || extents_to_split ||
1750 wc->w_desc[0].c_needs_zero ||
1751 wc->w_desc[wc->w_clen - 1].c_needs_zero)
1752 cluster_of_pages = 1;
1753 else
1754 cluster_of_pages = 0;
1755
1756 ocfs2_set_target_boundaries(osb, wc, pos, len, cluster_of_pages);
1727 1757
1728 handle = ocfs2_start_trans(osb, credits); 1758 handle = ocfs2_start_trans(osb, credits);
1729 if (IS_ERR(handle)) { 1759 if (IS_ERR(handle)) {
@@ -1756,8 +1786,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1756 * extent. 1786 * extent.
1757 */ 1787 */
1758 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, 1788 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
1759 clusters_to_alloc + extents_to_split, 1789 cluster_of_pages, mmap_page);
1760 mmap_page);
1761 if (ret) { 1790 if (ret) {
1762 mlog_errno(ret); 1791 mlog_errno(ret);
1763 goto out_quota; 1792 goto out_quota;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index b574431a031d..2f28b7de2c8d 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -310,22 +310,19 @@ out_attach:
310 return ret; 310 return ret;
311} 311}
312 312
313static DEFINE_SPINLOCK(dentry_list_lock); 313DEFINE_SPINLOCK(dentry_list_lock);
314 314
315/* We limit the number of dentry locks to drop in one go. We have 315/* We limit the number of dentry locks to drop in one go. We have
316 * this limit so that we don't starve other users of ocfs2_wq. */ 316 * this limit so that we don't starve other users of ocfs2_wq. */
317#define DL_INODE_DROP_COUNT 64 317#define DL_INODE_DROP_COUNT 64
318 318
319/* Drop inode references from dentry locks */ 319/* Drop inode references from dentry locks */
320void ocfs2_drop_dl_inodes(struct work_struct *work) 320static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count)
321{ 321{
322 struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
323 dentry_lock_work);
324 struct ocfs2_dentry_lock *dl; 322 struct ocfs2_dentry_lock *dl;
325 int drop_count = DL_INODE_DROP_COUNT;
326 323
327 spin_lock(&dentry_list_lock); 324 spin_lock(&dentry_list_lock);
328 while (osb->dentry_lock_list && drop_count--) { 325 while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) {
329 dl = osb->dentry_lock_list; 326 dl = osb->dentry_lock_list;
330 osb->dentry_lock_list = dl->dl_next; 327 osb->dentry_lock_list = dl->dl_next;
331 spin_unlock(&dentry_list_lock); 328 spin_unlock(&dentry_list_lock);
@@ -333,11 +330,32 @@ void ocfs2_drop_dl_inodes(struct work_struct *work)
333 kfree(dl); 330 kfree(dl);
334 spin_lock(&dentry_list_lock); 331 spin_lock(&dentry_list_lock);
335 } 332 }
336 if (osb->dentry_lock_list) 333 spin_unlock(&dentry_list_lock);
334}
335
336void ocfs2_drop_dl_inodes(struct work_struct *work)
337{
338 struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
339 dentry_lock_work);
340
341 __ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT);
342 /*
343 * Don't queue dropping if umount is in progress. We flush the
344 * list in ocfs2_dismount_volume
345 */
346 spin_lock(&dentry_list_lock);
347 if (osb->dentry_lock_list &&
348 !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
337 queue_work(ocfs2_wq, &osb->dentry_lock_work); 349 queue_work(ocfs2_wq, &osb->dentry_lock_work);
338 spin_unlock(&dentry_list_lock); 350 spin_unlock(&dentry_list_lock);
339} 351}
340 352
353/* Flush the whole work queue */
354void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb)
355{
356 __ocfs2_drop_dl_inodes(osb, -1);
357}
358
341/* 359/*
342 * ocfs2_dentry_iput() and friends. 360 * ocfs2_dentry_iput() and friends.
343 * 361 *
@@ -368,7 +386,8 @@ static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
368 /* We leave dropping of inode reference to ocfs2_wq as that can 386 /* We leave dropping of inode reference to ocfs2_wq as that can
369 * possibly lead to inode deletion which gets tricky */ 387 * possibly lead to inode deletion which gets tricky */
370 spin_lock(&dentry_list_lock); 388 spin_lock(&dentry_list_lock);
371 if (!osb->dentry_lock_list) 389 if (!osb->dentry_lock_list &&
390 !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
372 queue_work(ocfs2_wq, &osb->dentry_lock_work); 391 queue_work(ocfs2_wq, &osb->dentry_lock_work);
373 dl->dl_next = osb->dentry_lock_list; 392 dl->dl_next = osb->dentry_lock_list;
374 osb->dentry_lock_list = dl; 393 osb->dentry_lock_list = dl;
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index faa12e75f98d..f5dd1789acf1 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -49,10 +49,13 @@ struct ocfs2_dentry_lock {
49int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode, 49int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
50 u64 parent_blkno); 50 u64 parent_blkno);
51 51
52extern spinlock_t dentry_list_lock;
53
52void ocfs2_dentry_lock_put(struct ocfs2_super *osb, 54void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
53 struct ocfs2_dentry_lock *dl); 55 struct ocfs2_dentry_lock *dl);
54 56
55void ocfs2_drop_dl_inodes(struct work_struct *work); 57void ocfs2_drop_dl_inodes(struct work_struct *work);
58void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb);
56 59
57struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, 60struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
58 int skip_unhashed); 61 int skip_unhashed);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index d07ddbe4b283..81eff8e58322 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -103,7 +103,6 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
103 lock->ast_pending, lock->ml.type); 103 lock->ast_pending, lock->ml.type);
104 BUG(); 104 BUG();
105 } 105 }
106 BUG_ON(!list_empty(&lock->ast_list));
107 if (lock->ast_pending) 106 if (lock->ast_pending)
108 mlog(0, "lock has an ast getting flushed right now\n"); 107 mlog(0, "lock has an ast getting flushed right now\n");
109 108
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index bcb9260c3735..43e6e3280569 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1118,7 +1118,7 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
1118 1118
1119 mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n", 1119 mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n",
1120 dlm->name, res->lockname.len, res->lockname.name, 1120 dlm->name, res->lockname.len, res->lockname.name,
1121 orig_flags & DLM_MRES_MIGRATION ? "migrate" : "recovery", 1121 orig_flags & DLM_MRES_MIGRATION ? "migration" : "recovery",
1122 send_to); 1122 send_to);
1123 1123
1124 /* send it */ 1124 /* send it */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 62442e413a00..aa501d3f93f1 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1851,6 +1851,7 @@ relock:
1851 if (ret) 1851 if (ret)
1852 goto out_dio; 1852 goto out_dio;
1853 1853
1854 count = ocount;
1854 ret = generic_write_checks(file, ppos, &count, 1855 ret = generic_write_checks(file, ppos, &count,
1855 S_ISBLK(inode->i_mode)); 1856 S_ISBLK(inode->i_mode));
1856 if (ret) 1857 if (ret)
@@ -1918,8 +1919,10 @@ out_sems:
1918 1919
1919 mutex_unlock(&inode->i_mutex); 1920 mutex_unlock(&inode->i_mutex);
1920 1921
1922 if (written)
1923 ret = written;
1921 mlog_exit(ret); 1924 mlog_exit(ret);
1922 return written ? written : ret; 1925 return ret;
1923} 1926}
1924 1927
1925static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, 1928static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 9fcd36dcc9a0..467b413bec21 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -7,7 +7,6 @@
7 7
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/mount.h> 9#include <linux/mount.h>
10#include <linux/smp_lock.h>
11 10
12#define MLOG_MASK_PREFIX ML_INODE 11#define MLOG_MASK_PREFIX ML_INODE
13#include <cluster/masklog.h> 12#include <cluster/masklog.h>
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f033760ecbea..c48b93ac6b65 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1954,10 +1954,16 @@ void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
1954 os->os_osb = osb; 1954 os->os_osb = osb;
1955 os->os_count = 0; 1955 os->os_count = 0;
1956 os->os_seqno = 0; 1956 os->os_seqno = 0;
1957 os->os_scantime = CURRENT_TIME;
1958 mutex_init(&os->os_lock); 1957 mutex_init(&os->os_lock);
1959 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work); 1958 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
1959}
1960 1960
1961void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
1962{
1963 struct ocfs2_orphan_scan *os;
1964
1965 os = &osb->osb_orphan_scan;
1966 os->os_scantime = CURRENT_TIME;
1961 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) 1967 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
1962 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); 1968 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1963 else { 1969 else {
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 5432c7f79cc6..2c3222aec622 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -145,6 +145,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
145 145
146/* Exported only for the journal struct init code in super.c. Do not call. */ 146/* Exported only for the journal struct init code in super.c. Do not call. */
147void ocfs2_orphan_scan_init(struct ocfs2_super *osb); 147void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
148void ocfs2_orphan_scan_start(struct ocfs2_super *osb);
148void ocfs2_orphan_scan_stop(struct ocfs2_super *osb); 149void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
149void ocfs2_orphan_scan_exit(struct ocfs2_super *osb); 150void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
150 151
@@ -329,20 +330,27 @@ int ocfs2_journal_dirty(handle_t *handle,
329/* extended attribute block update */ 330/* extended attribute block update */
330#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1 331#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
331 332
333/* Update of a single quota block */
334#define OCFS2_QUOTA_BLOCK_UPDATE_CREDITS 1
335
332/* global quotafile inode update, data block */ 336/* global quotafile inode update, data block */
333#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 337#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + \
338 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
334 339
340#define OCFS2_LOCAL_QINFO_WRITE_CREDITS OCFS2_QUOTA_BLOCK_UPDATE_CREDITS
335/* 341/*
336 * The two writes below can accidentally see global info dirty due 342 * The two writes below can accidentally see global info dirty due
337 * to set_info() quotactl so make them prepared for the writes. 343 * to set_info() quotactl so make them prepared for the writes.
338 */ 344 */
339/* quota data block, global info */ 345/* quota data block, global info */
340/* Write to local quota file */ 346/* Write to local quota file */
341#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1) 347#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
348 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
342 349
343/* global quota data block, local quota data block, global quota inode, 350/* global quota data block, local quota data block, global quota inode,
344 * global quota info */ 351 * global quota info */
345#define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3) 352#define OCFS2_QSYNC_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
353 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
346 354
347static inline int ocfs2_quota_trans_credits(struct super_block *sb) 355static inline int ocfs2_quota_trans_credits(struct super_block *sb)
348{ 356{
@@ -355,11 +363,6 @@ static inline int ocfs2_quota_trans_credits(struct super_block *sb)
355 return credits; 363 return credits;
356} 364}
357 365
358/* Number of credits needed for removing quota structure from file */
359int ocfs2_calc_qdel_credits(struct super_block *sb, int type);
360/* Number of credits needed for initialization of new quota structure */
361int ocfs2_calc_qinit_credits(struct super_block *sb, int type);
362
363/* group extend. inode update and last group update. */ 366/* group extend. inode update and last group update. */
364#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 367#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
365 368
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index c9345ebb8493..39e1d5a39505 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -224,10 +224,12 @@ enum ocfs2_mount_options
224 OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */ 224 OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */
225}; 225};
226 226
227#define OCFS2_OSB_SOFT_RO 0x0001 227#define OCFS2_OSB_SOFT_RO 0x0001
228#define OCFS2_OSB_HARD_RO 0x0002 228#define OCFS2_OSB_HARD_RO 0x0002
229#define OCFS2_OSB_ERROR_FS 0x0004 229#define OCFS2_OSB_ERROR_FS 0x0004
230#define OCFS2_DEFAULT_ATIME_QUANTUM 60 230#define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008
231
232#define OCFS2_DEFAULT_ATIME_QUANTUM 60
231 233
232struct ocfs2_journal; 234struct ocfs2_journal;
233struct ocfs2_slot_info; 235struct ocfs2_slot_info;
@@ -490,6 +492,18 @@ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb,
490 spin_unlock(&osb->osb_lock); 492 spin_unlock(&osb->osb_lock);
491} 493}
492 494
495
496static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb,
497 unsigned long flag)
498{
499 unsigned long ret;
500
501 spin_lock(&osb->osb_lock);
502 ret = osb->osb_flags & flag;
503 spin_unlock(&osb->osb_lock);
504 return ret;
505}
506
493static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, 507static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb,
494 int hard) 508 int hard)
495{ 509{
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 7365e2e08706..3fb96fcd4c81 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -50,7 +50,6 @@ struct ocfs2_mem_dqinfo {
50 unsigned int dqi_chunks; /* Number of chunks in local quota file */ 50 unsigned int dqi_chunks; /* Number of chunks in local quota file */
51 unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */ 51 unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */
52 unsigned int dqi_syncms; /* How often should we sync with other nodes */ 52 unsigned int dqi_syncms; /* How often should we sync with other nodes */
53 unsigned int dqi_syncjiff; /* Precomputed dqi_syncms in jiffies */
54 struct list_head dqi_chunk; /* List of chunks */ 53 struct list_head dqi_chunk; /* List of chunks */
55 struct inode *dqi_gqinode; /* Global quota file inode */ 54 struct inode *dqi_gqinode; /* Global quota file inode */
56 struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */ 55 struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index edfa60cd155c..bf7742d0ee3b 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -69,6 +69,7 @@ static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot)
69 d->dqb_curspace = cpu_to_le64(m->dqb_curspace); 69 d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
70 d->dqb_btime = cpu_to_le64(m->dqb_btime); 70 d->dqb_btime = cpu_to_le64(m->dqb_btime);
71 d->dqb_itime = cpu_to_le64(m->dqb_itime); 71 d->dqb_itime = cpu_to_le64(m->dqb_itime);
72 d->dqb_pad1 = d->dqb_pad2 = 0;
72} 73}
73 74
74static int ocfs2_global_is_id(void *dp, struct dquot *dquot) 75static int ocfs2_global_is_id(void *dp, struct dquot *dquot)
@@ -211,14 +212,13 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
211 212
212 mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA); 213 mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA);
213 if (gqinode->i_size < off + len) { 214 if (gqinode->i_size < off + len) {
214 down_write(&OCFS2_I(gqinode)->ip_alloc_sem); 215 loff_t rounded_end =
215 err = ocfs2_extend_no_holes(gqinode, off + len, off); 216 ocfs2_align_bytes_to_blocks(sb, off + len);
216 up_write(&OCFS2_I(gqinode)->ip_alloc_sem); 217
217 if (err < 0) 218 /* Space is already allocated in ocfs2_global_read_dquot() */
218 goto out;
219 err = ocfs2_simple_size_update(gqinode, 219 err = ocfs2_simple_size_update(gqinode,
220 oinfo->dqi_gqi_bh, 220 oinfo->dqi_gqi_bh,
221 off + len); 221 rounded_end);
222 if (err < 0) 222 if (err < 0)
223 goto out; 223 goto out;
224 new = 1; 224 new = 1;
@@ -234,7 +234,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
234 } 234 }
235 if (err) { 235 if (err) {
236 mlog_errno(err); 236 mlog_errno(err);
237 return err; 237 goto out;
238 } 238 }
239 lock_buffer(bh); 239 lock_buffer(bh);
240 if (new) 240 if (new)
@@ -342,7 +342,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
342 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); 342 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
343 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); 343 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
344 oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms); 344 oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms);
345 oinfo->dqi_syncjiff = msecs_to_jiffies(oinfo->dqi_syncms);
346 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); 345 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
347 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk); 346 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
348 oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry); 347 oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
@@ -352,7 +351,7 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
352 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi); 351 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
353 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn); 352 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
354 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 353 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
355 oinfo->dqi_syncjiff); 354 msecs_to_jiffies(oinfo->dqi_syncms));
356 355
357out_err: 356out_err:
358 mlog_exit(status); 357 mlog_exit(status);
@@ -402,13 +401,36 @@ int ocfs2_global_write_info(struct super_block *sb, int type)
402 return err; 401 return err;
403} 402}
404 403
404static int ocfs2_global_qinit_alloc(struct super_block *sb, int type)
405{
406 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
407
408 /*
409 * We may need to allocate tree blocks and a leaf block but not the
410 * root block
411 */
412 return oinfo->dqi_gi.dqi_qtree_depth;
413}
414
415static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type)
416{
417 /* We modify all the allocated blocks, tree root, and info block */
418 return (ocfs2_global_qinit_alloc(sb, type) + 2) *
419 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS;
420}
421
405/* Read in information from global quota file and acquire a reference to it. 422/* Read in information from global quota file and acquire a reference to it.
406 * dquot_acquire() has already started the transaction and locked quota file */ 423 * dquot_acquire() has already started the transaction and locked quota file */
407int ocfs2_global_read_dquot(struct dquot *dquot) 424int ocfs2_global_read_dquot(struct dquot *dquot)
408{ 425{
409 int err, err2, ex = 0; 426 int err, err2, ex = 0;
410 struct ocfs2_mem_dqinfo *info = 427 struct super_block *sb = dquot->dq_sb;
411 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 428 int type = dquot->dq_type;
429 struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
430 struct ocfs2_super *osb = OCFS2_SB(sb);
431 struct inode *gqinode = info->dqi_gqinode;
432 int need_alloc = ocfs2_global_qinit_alloc(sb, type);
433 handle_t *handle = NULL;
412 434
413 err = ocfs2_qinfo_lock(info, 0); 435 err = ocfs2_qinfo_lock(info, 0);
414 if (err < 0) 436 if (err < 0)
@@ -419,14 +441,33 @@ int ocfs2_global_read_dquot(struct dquot *dquot)
419 OCFS2_DQUOT(dquot)->dq_use_count++; 441 OCFS2_DQUOT(dquot)->dq_use_count++;
420 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; 442 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
421 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; 443 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
444 ocfs2_qinfo_unlock(info, 0);
445
422 if (!dquot->dq_off) { /* No real quota entry? */ 446 if (!dquot->dq_off) { /* No real quota entry? */
423 /* Upgrade to exclusive lock for allocation */
424 ocfs2_qinfo_unlock(info, 0);
425 err = ocfs2_qinfo_lock(info, 1);
426 if (err < 0)
427 goto out_qlock;
428 ex = 1; 447 ex = 1;
448 /*
449 * Add blocks to quota file before we start a transaction since
450 * locking allocators ranks above a transaction start
451 */
452 WARN_ON(journal_current_handle());
453 down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
454 err = ocfs2_extend_no_holes(gqinode,
455 gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
456 gqinode->i_size);
457 up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
458 if (err < 0)
459 goto out;
429 } 460 }
461
462 handle = ocfs2_start_trans(osb,
463 ocfs2_calc_global_qinit_credits(sb, type));
464 if (IS_ERR(handle)) {
465 err = PTR_ERR(handle);
466 goto out;
467 }
468 err = ocfs2_qinfo_lock(info, ex);
469 if (err < 0)
470 goto out_trans;
430 err = qtree_write_dquot(&info->dqi_gi, dquot); 471 err = qtree_write_dquot(&info->dqi_gi, dquot);
431 if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) { 472 if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) {
432 err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type); 473 err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type);
@@ -438,6 +479,9 @@ out_qlock:
438 ocfs2_qinfo_unlock(info, 1); 479 ocfs2_qinfo_unlock(info, 1);
439 else 480 else
440 ocfs2_qinfo_unlock(info, 0); 481 ocfs2_qinfo_unlock(info, 0);
482out_trans:
483 if (handle)
484 ocfs2_commit_trans(osb, handle);
441out: 485out:
442 if (err < 0) 486 if (err < 0)
443 mlog_errno(err); 487 mlog_errno(err);
@@ -607,7 +651,7 @@ static void qsync_work_fn(struct work_struct *work)
607 651
608 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); 652 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
609 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 653 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
610 oinfo->dqi_syncjiff); 654 msecs_to_jiffies(oinfo->dqi_syncms));
611} 655}
612 656
613/* 657/*
@@ -635,20 +679,18 @@ out:
635 return status; 679 return status;
636} 680}
637 681
638int ocfs2_calc_qdel_credits(struct super_block *sb, int type) 682static int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
639{ 683{
640 struct ocfs2_mem_dqinfo *oinfo; 684 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
641 int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, 685 /*
642 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA }; 686 * We modify tree, leaf block, global info, local chunk header,
643 687 * global and local inode; OCFS2_QINFO_WRITE_CREDITS already
644 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type])) 688 * accounts for inode update
645 return 0; 689 */
646 690 return (oinfo->dqi_gi.dqi_qtree_depth + 2) *
647 oinfo = sb_dqinfo(sb, type)->dqi_priv; 691 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS +
648 /* We modify tree, leaf block, global info, local chunk header, 692 OCFS2_QINFO_WRITE_CREDITS +
649 * global and local inode */ 693 OCFS2_INODE_UPDATE_CREDITS;
650 return oinfo->dqi_gi.dqi_qtree_depth + 2 + 1 +
651 2 * OCFS2_INODE_UPDATE_CREDITS;
652} 694}
653 695
654static int ocfs2_release_dquot(struct dquot *dquot) 696static int ocfs2_release_dquot(struct dquot *dquot)
@@ -680,33 +722,10 @@ out:
680 return status; 722 return status;
681} 723}
682 724
683int ocfs2_calc_qinit_credits(struct super_block *sb, int type)
684{
685 struct ocfs2_mem_dqinfo *oinfo;
686 int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
687 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
688 struct ocfs2_dinode *lfe, *gfe;
689
690 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
691 return 0;
692
693 oinfo = sb_dqinfo(sb, type)->dqi_priv;
694 gfe = (struct ocfs2_dinode *)oinfo->dqi_gqi_bh->b_data;
695 lfe = (struct ocfs2_dinode *)oinfo->dqi_lqi_bh->b_data;
696 /* We can extend local file + global file. In local file we
697 * can modify info, chunk header block and dquot block. In
698 * global file we can modify info, tree and leaf block */
699 return ocfs2_calc_extend_credits(sb, &lfe->id2.i_list, 0) +
700 ocfs2_calc_extend_credits(sb, &gfe->id2.i_list, 0) +
701 3 + oinfo->dqi_gi.dqi_qtree_depth + 2;
702}
703
704static int ocfs2_acquire_dquot(struct dquot *dquot) 725static int ocfs2_acquire_dquot(struct dquot *dquot)
705{ 726{
706 handle_t *handle;
707 struct ocfs2_mem_dqinfo *oinfo = 727 struct ocfs2_mem_dqinfo *oinfo =
708 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 728 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
709 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
710 int status = 0; 729 int status = 0;
711 730
712 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); 731 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
@@ -715,16 +734,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
715 status = ocfs2_lock_global_qf(oinfo, 1); 734 status = ocfs2_lock_global_qf(oinfo, 1);
716 if (status < 0) 735 if (status < 0)
717 goto out; 736 goto out;
718 handle = ocfs2_start_trans(osb,
719 ocfs2_calc_qinit_credits(dquot->dq_sb, dquot->dq_type));
720 if (IS_ERR(handle)) {
721 status = PTR_ERR(handle);
722 mlog_errno(status);
723 goto out_ilock;
724 }
725 status = dquot_acquire(dquot); 737 status = dquot_acquire(dquot);
726 ocfs2_commit_trans(osb, handle);
727out_ilock:
728 ocfs2_unlock_global_qf(oinfo, 1); 738 ocfs2_unlock_global_qf(oinfo, 1);
729out: 739out:
730 mlog_exit(status); 740 mlog_exit(status);
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 5a460fa82553..bdb09cb6e1fe 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -20,6 +20,7 @@
20#include "sysfile.h" 20#include "sysfile.h"
21#include "dlmglue.h" 21#include "dlmglue.h"
22#include "quota.h" 22#include "quota.h"
23#include "uptodate.h"
23 24
24/* Number of local quota structures per block */ 25/* Number of local quota structures per block */
25static inline unsigned int ol_quota_entries_per_block(struct super_block *sb) 26static inline unsigned int ol_quota_entries_per_block(struct super_block *sb)
@@ -100,7 +101,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
100 handle_t *handle; 101 handle_t *handle;
101 int status; 102 int status;
102 103
103 handle = ocfs2_start_trans(OCFS2_SB(sb), 1); 104 handle = ocfs2_start_trans(OCFS2_SB(sb),
105 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
104 if (IS_ERR(handle)) { 106 if (IS_ERR(handle)) {
105 status = PTR_ERR(handle); 107 status = PTR_ERR(handle);
106 mlog_errno(status); 108 mlog_errno(status);
@@ -610,7 +612,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
610 goto out_bh; 612 goto out_bh;
611 /* Mark quota file as clean if we are recovering quota file of 613 /* Mark quota file as clean if we are recovering quota file of
612 * some other node. */ 614 * some other node. */
613 handle = ocfs2_start_trans(osb, 1); 615 handle = ocfs2_start_trans(osb,
616 OCFS2_LOCAL_QINFO_WRITE_CREDITS);
614 if (IS_ERR(handle)) { 617 if (IS_ERR(handle)) {
615 status = PTR_ERR(handle); 618 status = PTR_ERR(handle);
616 mlog_errno(status); 619 mlog_errno(status);
@@ -940,7 +943,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
940 struct ocfs2_local_disk_chunk *dchunk; 943 struct ocfs2_local_disk_chunk *dchunk;
941 int status; 944 int status;
942 handle_t *handle; 945 handle_t *handle;
943 struct buffer_head *bh = NULL; 946 struct buffer_head *bh = NULL, *dbh = NULL;
944 u64 p_blkno; 947 u64 p_blkno;
945 948
946 /* We are protected by dqio_sem so no locking needed */ 949 /* We are protected by dqio_sem so no locking needed */
@@ -964,32 +967,35 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
964 mlog_errno(status); 967 mlog_errno(status);
965 goto out; 968 goto out;
966 } 969 }
970 /* Local quota info and two new blocks we initialize */
971 handle = ocfs2_start_trans(OCFS2_SB(sb),
972 OCFS2_LOCAL_QINFO_WRITE_CREDITS +
973 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
974 if (IS_ERR(handle)) {
975 status = PTR_ERR(handle);
976 mlog_errno(status);
977 goto out;
978 }
967 979
980 /* Initialize chunk header */
968 down_read(&OCFS2_I(lqinode)->ip_alloc_sem); 981 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
969 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, 982 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
970 &p_blkno, NULL, NULL); 983 &p_blkno, NULL, NULL);
971 up_read(&OCFS2_I(lqinode)->ip_alloc_sem); 984 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
972 if (status < 0) { 985 if (status < 0) {
973 mlog_errno(status); 986 mlog_errno(status);
974 goto out; 987 goto out_trans;
975 } 988 }
976 bh = sb_getblk(sb, p_blkno); 989 bh = sb_getblk(sb, p_blkno);
977 if (!bh) { 990 if (!bh) {
978 status = -ENOMEM; 991 status = -ENOMEM;
979 mlog_errno(status); 992 mlog_errno(status);
980 goto out; 993 goto out_trans;
981 } 994 }
982 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; 995 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
983 996 ocfs2_set_new_buffer_uptodate(lqinode, bh);
984 handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
985 if (IS_ERR(handle)) {
986 status = PTR_ERR(handle);
987 mlog_errno(status);
988 goto out;
989 }
990
991 status = ocfs2_journal_access_dq(handle, lqinode, bh, 997 status = ocfs2_journal_access_dq(handle, lqinode, bh,
992 OCFS2_JOURNAL_ACCESS_WRITE); 998 OCFS2_JOURNAL_ACCESS_CREATE);
993 if (status < 0) { 999 if (status < 0) {
994 mlog_errno(status); 1000 mlog_errno(status);
995 goto out_trans; 1001 goto out_trans;
@@ -999,7 +1005,6 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
999 memset(dchunk->dqc_bitmap, 0, 1005 memset(dchunk->dqc_bitmap, 0,
1000 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) - 1006 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
1001 OCFS2_QBLK_RESERVED_SPACE); 1007 OCFS2_QBLK_RESERVED_SPACE);
1002 set_buffer_uptodate(bh);
1003 unlock_buffer(bh); 1008 unlock_buffer(bh);
1004 status = ocfs2_journal_dirty(handle, bh); 1009 status = ocfs2_journal_dirty(handle, bh);
1005 if (status < 0) { 1010 if (status < 0) {
@@ -1007,6 +1012,38 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
1007 goto out_trans; 1012 goto out_trans;
1008 } 1013 }
1009 1014
1015 /* Initialize new block with structures */
1016 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1017 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks + 1,
1018 &p_blkno, NULL, NULL);
1019 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1020 if (status < 0) {
1021 mlog_errno(status);
1022 goto out_trans;
1023 }
1024 dbh = sb_getblk(sb, p_blkno);
1025 if (!dbh) {
1026 status = -ENOMEM;
1027 mlog_errno(status);
1028 goto out_trans;
1029 }
1030 ocfs2_set_new_buffer_uptodate(lqinode, dbh);
1031 status = ocfs2_journal_access_dq(handle, lqinode, dbh,
1032 OCFS2_JOURNAL_ACCESS_CREATE);
1033 if (status < 0) {
1034 mlog_errno(status);
1035 goto out_trans;
1036 }
1037 lock_buffer(dbh);
1038 memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE);
1039 unlock_buffer(dbh);
1040 status = ocfs2_journal_dirty(handle, dbh);
1041 if (status < 0) {
1042 mlog_errno(status);
1043 goto out_trans;
1044 }
1045
1046 /* Update local quotafile info */
1010 oinfo->dqi_blocks += 2; 1047 oinfo->dqi_blocks += 2;
1011 oinfo->dqi_chunks++; 1048 oinfo->dqi_chunks++;
1012 status = ocfs2_local_write_info(sb, type); 1049 status = ocfs2_local_write_info(sb, type);
@@ -1031,6 +1068,7 @@ out_trans:
1031 ocfs2_commit_trans(OCFS2_SB(sb), handle); 1068 ocfs2_commit_trans(OCFS2_SB(sb), handle);
1032out: 1069out:
1033 brelse(bh); 1070 brelse(bh);
1071 brelse(dbh);
1034 kmem_cache_free(ocfs2_qf_chunk_cachep, chunk); 1072 kmem_cache_free(ocfs2_qf_chunk_cachep, chunk);
1035 return ERR_PTR(status); 1073 return ERR_PTR(status);
1036} 1074}
@@ -1048,6 +1086,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1048 struct ocfs2_local_disk_chunk *dchunk; 1086 struct ocfs2_local_disk_chunk *dchunk;
1049 int epb = ol_quota_entries_per_block(sb); 1087 int epb = ol_quota_entries_per_block(sb);
1050 unsigned int chunk_blocks; 1088 unsigned int chunk_blocks;
1089 struct buffer_head *bh;
1090 u64 p_blkno;
1051 int status; 1091 int status;
1052 handle_t *handle; 1092 handle_t *handle;
1053 1093
@@ -1075,12 +1115,49 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1075 mlog_errno(status); 1115 mlog_errno(status);
1076 goto out; 1116 goto out;
1077 } 1117 }
1078 handle = ocfs2_start_trans(OCFS2_SB(sb), 2); 1118
1119 /* Get buffer from the just added block */
1120 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1121 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
1122 &p_blkno, NULL, NULL);
1123 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1124 if (status < 0) {
1125 mlog_errno(status);
1126 goto out;
1127 }
1128 bh = sb_getblk(sb, p_blkno);
1129 if (!bh) {
1130 status = -ENOMEM;
1131 mlog_errno(status);
1132 goto out;
1133 }
1134 ocfs2_set_new_buffer_uptodate(lqinode, bh);
1135
1136 /* Local quota info, chunk header and the new block we initialize */
1137 handle = ocfs2_start_trans(OCFS2_SB(sb),
1138 OCFS2_LOCAL_QINFO_WRITE_CREDITS +
1139 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
1079 if (IS_ERR(handle)) { 1140 if (IS_ERR(handle)) {
1080 status = PTR_ERR(handle); 1141 status = PTR_ERR(handle);
1081 mlog_errno(status); 1142 mlog_errno(status);
1082 goto out; 1143 goto out;
1083 } 1144 }
1145 /* Zero created block */
1146 status = ocfs2_journal_access_dq(handle, lqinode, bh,
1147 OCFS2_JOURNAL_ACCESS_CREATE);
1148 if (status < 0) {
1149 mlog_errno(status);
1150 goto out_trans;
1151 }
1152 lock_buffer(bh);
1153 memset(bh->b_data, 0, sb->s_blocksize);
1154 unlock_buffer(bh);
1155 status = ocfs2_journal_dirty(handle, bh);
1156 if (status < 0) {
1157 mlog_errno(status);
1158 goto out_trans;
1159 }
1160 /* Update chunk header */
1084 status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh, 1161 status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh,
1085 OCFS2_JOURNAL_ACCESS_WRITE); 1162 OCFS2_JOURNAL_ACCESS_WRITE);
1086 if (status < 0) { 1163 if (status < 0) {
@@ -1097,6 +1174,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1097 mlog_errno(status); 1174 mlog_errno(status);
1098 goto out_trans; 1175 goto out_trans;
1099 } 1176 }
1177 /* Update file header */
1100 oinfo->dqi_blocks++; 1178 oinfo->dqi_blocks++;
1101 status = ocfs2_local_write_info(sb, type); 1179 status = ocfs2_local_write_info(sb, type);
1102 if (status < 0) { 1180 if (status < 0) {
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 3f661376a2de..e49c41050264 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -17,6 +17,7 @@
17 * General Public License for more details. 17 * General Public License for more details.
18 */ 18 */
19 19
20#include <linux/kernel.h>
20#include <linux/crc32.h> 21#include <linux/crc32.h>
21#include <linux/module.h> 22#include <linux/module.h>
22 23
@@ -153,7 +154,7 @@ static int status_map[] = {
153 154
154static int dlm_status_to_errno(enum dlm_status status) 155static int dlm_status_to_errno(enum dlm_status status)
155{ 156{
156 BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0]))); 157 BUG_ON(status < 0 || status >= ARRAY_SIZE(status_map));
157 158
158 return status_map[status]; 159 return status_map[status];
159} 160}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 7efb349fb9bd..b0ee0fdf799a 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -777,6 +777,7 @@ static int ocfs2_sb_probe(struct super_block *sb,
777 } 777 }
778 di = (struct ocfs2_dinode *) (*bh)->b_data; 778 di = (struct ocfs2_dinode *) (*bh)->b_data;
779 memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats)); 779 memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats));
780 spin_lock_init(&stats->b_lock);
780 status = ocfs2_verify_volume(di, *bh, blksize, stats); 781 status = ocfs2_verify_volume(di, *bh, blksize, stats);
781 if (status >= 0) 782 if (status >= 0)
782 goto bail; 783 goto bail;
@@ -1182,7 +1183,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1182 wake_up(&osb->osb_mount_event); 1183 wake_up(&osb->osb_mount_event);
1183 1184
1184 /* Start this when the mount is almost sure of being successful */ 1185 /* Start this when the mount is almost sure of being successful */
1185 ocfs2_orphan_scan_init(osb); 1186 ocfs2_orphan_scan_start(osb);
1186 1187
1187 mlog_exit(status); 1188 mlog_exit(status);
1188 return status; 1189 return status;
@@ -1213,14 +1214,27 @@ static int ocfs2_get_sb(struct file_system_type *fs_type,
1213 mnt); 1214 mnt);
1214} 1215}
1215 1216
1217static void ocfs2_kill_sb(struct super_block *sb)
1218{
1219 struct ocfs2_super *osb = OCFS2_SB(sb);
1220
1221 /* Prevent further queueing of inode drop events */
1222 spin_lock(&dentry_list_lock);
1223 ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED);
1224 spin_unlock(&dentry_list_lock);
1225 /* Wait for work to finish and/or remove it */
1226 cancel_work_sync(&osb->dentry_lock_work);
1227
1228 kill_block_super(sb);
1229}
1230
1216static struct file_system_type ocfs2_fs_type = { 1231static struct file_system_type ocfs2_fs_type = {
1217 .owner = THIS_MODULE, 1232 .owner = THIS_MODULE,
1218 .name = "ocfs2", 1233 .name = "ocfs2",
1219 .get_sb = ocfs2_get_sb, /* is this called when we mount 1234 .get_sb = ocfs2_get_sb, /* is this called when we mount
1220 * the fs? */ 1235 * the fs? */
1221 .kill_sb = kill_block_super, /* set to the generic one 1236 .kill_sb = ocfs2_kill_sb,
1222 * right now, but do we 1237
1223 * need to change that? */
1224 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, 1238 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
1225 .next = NULL 1239 .next = NULL
1226}; 1240};
@@ -1819,6 +1833,12 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1819 1833
1820 debugfs_remove(osb->osb_ctxt); 1834 debugfs_remove(osb->osb_ctxt);
1821 1835
1836 /*
1837 * Flush inode dropping work queue so that deletes are
1838 * performed while the filesystem is still working
1839 */
1840 ocfs2_drop_all_dl_inodes(osb);
1841
1822 /* Orphan scan should be stopped as early as possible */ 1842 /* Orphan scan should be stopped as early as possible */
1823 ocfs2_orphan_scan_stop(osb); 1843 ocfs2_orphan_scan_stop(osb);
1824 1844
@@ -1981,6 +2001,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
1981 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", 2001 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
1982 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); 2002 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
1983 2003
2004 ocfs2_orphan_scan_init(osb);
2005
1984 status = ocfs2_recovery_init(osb); 2006 status = ocfs2_recovery_init(osb);
1985 if (status) { 2007 if (status) {
1986 mlog(ML_ERROR, "Unable to initialize recovery state\n"); 2008 mlog(ML_ERROR, "Unable to initialize recovery state\n");
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index ba320e250747..d1a27cda984f 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1052,7 +1052,8 @@ static int ocfs2_xattr_block_get(struct inode *inode,
1052 struct ocfs2_xattr_block *xb; 1052 struct ocfs2_xattr_block *xb;
1053 struct ocfs2_xattr_value_root *xv; 1053 struct ocfs2_xattr_value_root *xv;
1054 size_t size; 1054 size_t size;
1055 int ret = -ENODATA, name_offset, name_len, block_off, i; 1055 int ret = -ENODATA, name_offset, name_len, i;
1056 int uninitialized_var(block_off);
1056 1057
1057 xs->bucket = ocfs2_xattr_bucket_new(inode); 1058 xs->bucket = ocfs2_xattr_bucket_new(inode);
1058 if (!xs->bucket) { 1059 if (!xs->bucket) {
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 1a9c7878f864..ea4e6cb29e13 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -436,7 +436,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
436 rcu_assign_pointer(ptbl->part[partno], p); 436 rcu_assign_pointer(ptbl->part[partno], p);
437 437
438 /* suppress uevent if the disk supresses it */ 438 /* suppress uevent if the disk supresses it */
439 if (!dev_get_uevent_suppress(pdev)) 439 if (!dev_get_uevent_suppress(ddev))
440 kobject_uevent(&pdev->kobj, KOBJ_ADD); 440 kobject_uevent(&pdev->kobj, KOBJ_ADD);
441 441
442 return p; 442 return p;
diff --git a/fs/pipe.c b/fs/pipe.c
index f7dd21ad85a6..52c415114838 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -68,8 +68,8 @@ void pipe_double_lock(struct pipe_inode_info *pipe1,
68 pipe_lock_nested(pipe1, I_MUTEX_PARENT); 68 pipe_lock_nested(pipe1, I_MUTEX_PARENT);
69 pipe_lock_nested(pipe2, I_MUTEX_CHILD); 69 pipe_lock_nested(pipe2, I_MUTEX_CHILD);
70 } else { 70 } else {
71 pipe_lock_nested(pipe2, I_MUTEX_CHILD); 71 pipe_lock_nested(pipe2, I_MUTEX_PARENT);
72 pipe_lock_nested(pipe1, I_MUTEX_PARENT); 72 pipe_lock_nested(pipe1, I_MUTEX_CHILD);
73 } 73 }
74} 74}
75 75
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3ce5ae9e3d2d..175db258942f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -234,23 +234,20 @@ static int check_mem_permission(struct task_struct *task)
234 234
235struct mm_struct *mm_for_maps(struct task_struct *task) 235struct mm_struct *mm_for_maps(struct task_struct *task)
236{ 236{
237 struct mm_struct *mm = get_task_mm(task); 237 struct mm_struct *mm;
238 if (!mm) 238
239 if (mutex_lock_killable(&task->cred_guard_mutex))
239 return NULL; 240 return NULL;
240 down_read(&mm->mmap_sem); 241
241 task_lock(task); 242 mm = get_task_mm(task);
242 if (task->mm != mm) 243 if (mm && mm != current->mm &&
243 goto out; 244 !ptrace_may_access(task, PTRACE_MODE_READ)) {
244 if (task->mm != current->mm && 245 mmput(mm);
245 __ptrace_may_access(task, PTRACE_MODE_READ) < 0) 246 mm = NULL;
246 goto out; 247 }
247 task_unlock(task); 248 mutex_unlock(&task->cred_guard_mutex);
249
248 return mm; 250 return mm;
249out:
250 task_unlock(task);
251 up_read(&mm->mmap_sem);
252 mmput(mm);
253 return NULL;
254} 251}
255 252
256static int proc_pid_cmdline(struct task_struct *task, char * buffer) 253static int proc_pid_cmdline(struct task_struct *task, char * buffer)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 6f61b7cc32e0..9bd8be1d235c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -119,6 +119,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
119 mm = mm_for_maps(priv->task); 119 mm = mm_for_maps(priv->task);
120 if (!mm) 120 if (!mm)
121 return NULL; 121 return NULL;
122 down_read(&mm->mmap_sem);
122 123
123 tail_vma = get_gate_vma(priv->task); 124 tail_vma = get_gate_vma(priv->task);
124 priv->tail_vma = tail_vma; 125 priv->tail_vma = tail_vma;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 64a72e2e7650..8f5c05d3dbd3 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -189,6 +189,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
189 priv->task = NULL; 189 priv->task = NULL;
190 return NULL; 190 return NULL;
191 } 191 }
192 down_read(&mm->mmap_sem);
192 193
193 /* start from the Nth VMA */ 194 /* start from the Nth VMA */
194 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) 195 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p))
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 607c579e5eca..38f7bd559f35 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2042,7 +2042,6 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
2042 * changes */ 2042 * changes */
2043 invalidate_bdev(sb->s_bdev); 2043 invalidate_bdev(sb->s_bdev);
2044 } 2044 }
2045 mutex_lock(&inode->i_mutex);
2046 mutex_lock(&dqopt->dqonoff_mutex); 2045 mutex_lock(&dqopt->dqonoff_mutex);
2047 if (sb_has_quota_loaded(sb, type)) { 2046 if (sb_has_quota_loaded(sb, type)) {
2048 error = -EBUSY; 2047 error = -EBUSY;
@@ -2054,9 +2053,11 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
2054 * possible) Also nobody should write to the file - we use 2053 * possible) Also nobody should write to the file - we use
2055 * special IO operations which ignore the immutable bit. */ 2054 * special IO operations which ignore the immutable bit. */
2056 down_write(&dqopt->dqptr_sem); 2055 down_write(&dqopt->dqptr_sem);
2056 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
2057 oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | 2057 oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE |
2058 S_NOQUOTA); 2058 S_NOQUOTA);
2059 inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE; 2059 inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
2060 mutex_unlock(&inode->i_mutex);
2060 up_write(&dqopt->dqptr_sem); 2061 up_write(&dqopt->dqptr_sem);
2061 sb->dq_op->drop(inode); 2062 sb->dq_op->drop(inode);
2062 } 2063 }
@@ -2080,7 +2081,6 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
2080 goto out_file_init; 2081 goto out_file_init;
2081 } 2082 }
2082 mutex_unlock(&dqopt->dqio_mutex); 2083 mutex_unlock(&dqopt->dqio_mutex);
2083 mutex_unlock(&inode->i_mutex);
2084 spin_lock(&dq_state_lock); 2084 spin_lock(&dq_state_lock);
2085 dqopt->flags |= dquot_state_flag(flags, type); 2085 dqopt->flags |= dquot_state_flag(flags, type);
2086 spin_unlock(&dq_state_lock); 2086 spin_unlock(&dq_state_lock);
@@ -2094,16 +2094,17 @@ out_file_init:
2094 dqopt->files[type] = NULL; 2094 dqopt->files[type] = NULL;
2095 iput(inode); 2095 iput(inode);
2096out_lock: 2096out_lock:
2097 mutex_unlock(&dqopt->dqonoff_mutex);
2098 if (oldflags != -1) { 2097 if (oldflags != -1) {
2099 down_write(&dqopt->dqptr_sem); 2098 down_write(&dqopt->dqptr_sem);
2099 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
2100 /* Set the flags back (in the case of accidental quotaon() 2100 /* Set the flags back (in the case of accidental quotaon()
2101 * on a wrong file we don't want to mess up the flags) */ 2101 * on a wrong file we don't want to mess up the flags) */
2102 inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE); 2102 inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE);
2103 inode->i_flags |= oldflags; 2103 inode->i_flags |= oldflags;
2104 mutex_unlock(&inode->i_mutex);
2104 up_write(&dqopt->dqptr_sem); 2105 up_write(&dqopt->dqptr_sem);
2105 } 2106 }
2106 mutex_unlock(&inode->i_mutex); 2107 mutex_unlock(&dqopt->dqonoff_mutex);
2107out_fmt: 2108out_fmt:
2108 put_quota_format(fmt); 2109 put_quota_format(fmt);
2109 2110
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index ebb2c417912c..11f0c06316de 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -20,6 +20,7 @@
20#include <linux/ramfs.h> 20#include <linux/ramfs.h>
21#include <linux/pagevec.h> 21#include <linux/pagevec.h>
22#include <linux/mman.h> 22#include <linux/mman.h>
23#include <linux/sched.h>
23 24
24#include <asm/uaccess.h> 25#include <asm/uaccess.h>
25#include "internal.h" 26#include "internal.h"
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 77f5bb746bf0..90622200b39c 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -997,7 +997,7 @@ static int reiserfs_async_progress_wait(struct super_block *s)
997 DEFINE_WAIT(wait); 997 DEFINE_WAIT(wait);
998 struct reiserfs_journal *j = SB_JOURNAL(s); 998 struct reiserfs_journal *j = SB_JOURNAL(s);
999 if (atomic_read(&j->j_async_throttle)) 999 if (atomic_read(&j->j_async_throttle))
1000 congestion_wait(WRITE, HZ / 10); 1000 congestion_wait(BLK_RW_ASYNC, HZ / 10);
1001 return 0; 1001 return 0;
1002} 1002}
1003 1003
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index d3aeb061612b..7adea74d6a8a 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -24,7 +24,6 @@
24#include <linux/exportfs.h> 24#include <linux/exportfs.h>
25#include <linux/quotaops.h> 25#include <linux/quotaops.h>
26#include <linux/vfs.h> 26#include <linux/vfs.h>
27#include <linux/mnt_namespace.h>
28#include <linux/mount.h> 27#include <linux/mount.h>
29#include <linux/namei.h> 28#include <linux/namei.h>
30#include <linux/crc32.h> 29#include <linux/crc32.h>
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index f3d47d856848..6925b835a43b 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -46,7 +46,6 @@
46#include <linux/reiserfs_acl.h> 46#include <linux/reiserfs_acl.h>
47#include <asm/uaccess.h> 47#include <asm/uaccess.h>
48#include <net/checksum.h> 48#include <net/checksum.h>
49#include <linux/smp_lock.h>
50#include <linux/stat.h> 49#include <linux/stat.h>
51#include <linux/quotaops.h> 50#include <linux/quotaops.h>
52 51
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 3b52770f46ff..cb5fc57e370b 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -30,6 +30,7 @@
30#include <linux/fs.h> 30#include <linux/fs.h>
31#include <linux/vfs.h> 31#include <linux/vfs.h>
32#include <linux/slab.h> 32#include <linux/slab.h>
33#include <linux/smp_lock.h>
33#include <linux/mutex.h> 34#include <linux/mutex.h>
34#include <linux/pagemap.h> 35#include <linux/pagemap.h>
35#include <linux/init.h> 36#include <linux/init.h>
diff --git a/fs/sync.c b/fs/sync.c
index dd200025af85..3422ba61d86d 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -112,8 +112,13 @@ restart:
112 mutex_unlock(&mutex); 112 mutex_unlock(&mutex);
113} 113}
114 114
115/*
116 * sync everything. Start out by waking pdflush, because that writes back
117 * all queues in parallel.
118 */
115SYSCALL_DEFINE0(sync) 119SYSCALL_DEFINE0(sync)
116{ 120{
121 wakeup_pdflush(0);
117 sync_filesystems(0); 122 sync_filesystems(0);
118 sync_filesystems(1); 123 sync_filesystems(1);
119 if (unlikely(laptop_mode)) 124 if (unlikely(laptop_mode))
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 9345806c8853..2524714bece1 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -171,6 +171,7 @@ static ssize_t write(struct file *file, const char __user *userbuf,
171 if (count > 0) 171 if (count > 0)
172 *off = offs + count; 172 *off = offs + count;
173 173
174 kfree(temp);
174 return count; 175 return count;
175} 176}
176 177
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index d88d0fac9fa5..14f2d71ea3ce 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -939,8 +939,10 @@ again:
939 /* Remove from old parent's list and insert into new parent's list. */ 939 /* Remove from old parent's list and insert into new parent's list. */
940 sysfs_unlink_sibling(sd); 940 sysfs_unlink_sibling(sd);
941 sysfs_get(new_parent_sd); 941 sysfs_get(new_parent_sd);
942 drop_nlink(old_parent->d_inode);
942 sysfs_put(sd->s_parent); 943 sysfs_put(sd->s_parent);
943 sd->s_parent = new_parent_sd; 944 sd->s_parent = new_parent_sd;
945 inc_nlink(new_parent->d_inode);
944 sysfs_link_sibling(sd); 946 sysfs_link_sibling(sd);
945 947
946 out_unlock: 948 out_unlock:
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index bc5857199ec2..762a7d6cec73 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -297,6 +297,7 @@ static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer)
297{ 297{
298 struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer); 298 struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer);
299 299
300 dbg_io("jhead %d", wbuf->jhead);
300 wbuf->need_sync = 1; 301 wbuf->need_sync = 1;
301 wbuf->c->need_wbuf_sync = 1; 302 wbuf->c->need_wbuf_sync = 1;
302 ubifs_wake_up_bgt(wbuf->c); 303 ubifs_wake_up_bgt(wbuf->c);
@@ -311,8 +312,12 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
311{ 312{
312 ubifs_assert(!hrtimer_active(&wbuf->timer)); 313 ubifs_assert(!hrtimer_active(&wbuf->timer));
313 314
314 if (!ktime_to_ns(wbuf->softlimit)) 315 if (wbuf->no_timer)
315 return; 316 return;
317 dbg_io("set timer for jhead %d, %llu-%llu millisecs", wbuf->jhead,
318 div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC),
319 div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta,
320 USEC_PER_SEC));
316 hrtimer_start_range_ns(&wbuf->timer, wbuf->softlimit, wbuf->delta, 321 hrtimer_start_range_ns(&wbuf->timer, wbuf->softlimit, wbuf->delta,
317 HRTIMER_MODE_REL); 322 HRTIMER_MODE_REL);
318} 323}
@@ -323,11 +328,8 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
323 */ 328 */
324static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) 329static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
325{ 330{
326 /* 331 if (wbuf->no_timer)
327 * If the syncer is waiting for the lock (from the background thread's 332 return;
328 * context) and another task is changing write-buffer then the syncing
329 * should be canceled.
330 */
331 wbuf->need_sync = 0; 333 wbuf->need_sync = 0;
332 hrtimer_cancel(&wbuf->timer); 334 hrtimer_cancel(&wbuf->timer);
333} 335}
@@ -349,8 +351,8 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
349 /* Write-buffer is empty or not seeked */ 351 /* Write-buffer is empty or not seeked */
350 return 0; 352 return 0;
351 353
352 dbg_io("LEB %d:%d, %d bytes", 354 dbg_io("LEB %d:%d, %d bytes, jhead %d",
353 wbuf->lnum, wbuf->offs, wbuf->used); 355 wbuf->lnum, wbuf->offs, wbuf->used, wbuf->jhead);
354 ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); 356 ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY));
355 ubifs_assert(!(wbuf->avail & 7)); 357 ubifs_assert(!(wbuf->avail & 7));
356 ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); 358 ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size);
@@ -390,7 +392,7 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
390 * @offs: logical eraseblock offset to seek to 392 * @offs: logical eraseblock offset to seek to
391 * @dtype: data type 393 * @dtype: data type
392 * 394 *
393 * This function targets the write buffer to logical eraseblock @lnum:@offs. 395 * This function targets the write-buffer to logical eraseblock @lnum:@offs.
394 * The write-buffer is synchronized if it is not empty. Returns zero in case of 396 * The write-buffer is synchronized if it is not empty. Returns zero in case of
395 * success and a negative error code in case of failure. 397 * success and a negative error code in case of failure.
396 */ 398 */
@@ -399,7 +401,7 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
399{ 401{
400 const struct ubifs_info *c = wbuf->c; 402 const struct ubifs_info *c = wbuf->c;
401 403
402 dbg_io("LEB %d:%d", lnum, offs); 404 dbg_io("LEB %d:%d, jhead %d", lnum, offs, wbuf->jhead);
403 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt); 405 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt);
404 ubifs_assert(offs >= 0 && offs <= c->leb_size); 406 ubifs_assert(offs >= 0 && offs <= c->leb_size);
405 ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); 407 ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
@@ -506,9 +508,9 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
506 struct ubifs_info *c = wbuf->c; 508 struct ubifs_info *c = wbuf->c;
507 int err, written, n, aligned_len = ALIGN(len, 8), offs; 509 int err, written, n, aligned_len = ALIGN(len, 8), offs;
508 510
509 dbg_io("%d bytes (%s) to wbuf at LEB %d:%d", len, 511 dbg_io("%d bytes (%s) to jhead %d wbuf at LEB %d:%d", len,
510 dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->lnum, 512 dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->jhead,
511 wbuf->offs + wbuf->used); 513 wbuf->lnum, wbuf->offs + wbuf->used);
512 ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); 514 ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt);
513 ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); 515 ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0);
514 ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); 516 ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size);
@@ -533,8 +535,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
533 memcpy(wbuf->buf + wbuf->used, buf, len); 535 memcpy(wbuf->buf + wbuf->used, buf, len);
534 536
535 if (aligned_len == wbuf->avail) { 537 if (aligned_len == wbuf->avail) {
536 dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, 538 dbg_io("flush jhead %d wbuf to LEB %d:%d",
537 wbuf->offs); 539 wbuf->jhead, wbuf->lnum, wbuf->offs);
538 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, 540 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf,
539 wbuf->offs, c->min_io_size, 541 wbuf->offs, c->min_io_size,
540 wbuf->dtype); 542 wbuf->dtype);
@@ -562,7 +564,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
562 * minimal I/O unit. We have to fill and flush write-buffer and switch 564 * minimal I/O unit. We have to fill and flush write-buffer and switch
563 * to the next min. I/O unit. 565 * to the next min. I/O unit.
564 */ 566 */
565 dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, wbuf->offs); 567 dbg_io("flush jhead %d wbuf to LEB %d:%d",
568 wbuf->jhead, wbuf->lnum, wbuf->offs);
566 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); 569 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
567 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, 570 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
568 c->min_io_size, wbuf->dtype); 571 c->min_io_size, wbuf->dtype);
@@ -695,7 +698,8 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
695 int err, rlen, overlap; 698 int err, rlen, overlap;
696 struct ubifs_ch *ch = buf; 699 struct ubifs_ch *ch = buf;
697 700
698 dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); 701 dbg_io("LEB %d:%d, %s, length %d, jhead %d", lnum, offs,
702 dbg_ntype(type), len, wbuf->jhead);
699 ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 703 ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
700 ubifs_assert(!(offs & 7) && offs < c->leb_size); 704 ubifs_assert(!(offs & 7) && offs < c->leb_size);
701 ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); 705 ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
@@ -819,13 +823,12 @@ out:
819 * @c: UBIFS file-system description object 823 * @c: UBIFS file-system description object
820 * @wbuf: write-buffer to initialize 824 * @wbuf: write-buffer to initialize
821 * 825 *
822 * This function initializes write buffer. Returns zero in case of success 826 * This function initializes write-buffer. Returns zero in case of success
823 * %-ENOMEM in case of failure. 827 * %-ENOMEM in case of failure.
824 */ 828 */
825int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) 829int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
826{ 830{
827 size_t size; 831 size_t size;
828 ktime_t hardlimit;
829 832
830 wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); 833 wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL);
831 if (!wbuf->buf) 834 if (!wbuf->buf)
@@ -851,22 +854,16 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
851 854
852 hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 855 hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
853 wbuf->timer.function = wbuf_timer_callback_nolock; 856 wbuf->timer.function = wbuf_timer_callback_nolock;
854 /* 857 wbuf->softlimit = ktime_set(WBUF_TIMEOUT_SOFTLIMIT, 0);
855 * Make write-buffer soft limit to be 20% of the hard limit. The 858 wbuf->delta = WBUF_TIMEOUT_HARDLIMIT - WBUF_TIMEOUT_SOFTLIMIT;
856 * write-buffer timer is allowed to expire any time between the soft 859 wbuf->delta *= 1000000000ULL;
857 * and hard limits. 860 ubifs_assert(wbuf->delta <= ULONG_MAX);
858 */
859 hardlimit = ktime_set(DEFAULT_WBUF_TIMEOUT_SECS, 0);
860 wbuf->delta = (DEFAULT_WBUF_TIMEOUT_SECS * NSEC_PER_SEC) * 2 / 10;
861 wbuf->softlimit = ktime_sub_ns(hardlimit, wbuf->delta);
862 hrtimer_set_expires_range_ns(&wbuf->timer, wbuf->softlimit,
863 wbuf->delta);
864 return 0; 861 return 0;
865} 862}
866 863
867/** 864/**
868 * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array. 865 * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array.
869 * @wbuf: the write-buffer whereto add 866 * @wbuf: the write-buffer where to add
870 * @inum: the inode number 867 * @inum: the inode number
871 * 868 *
872 * This function adds an inode number to the inode array of the write-buffer. 869 * This function adds an inode number to the inode array of the write-buffer.
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 6db7a6be6c97..8aacd64957a2 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -25,7 +25,6 @@
25/* This file implements EXT2-compatible extended attribute ioctl() calls */ 25/* This file implements EXT2-compatible extended attribute ioctl() calls */
26 26
27#include <linux/compat.h> 27#include <linux/compat.h>
28#include <linux/smp_lock.h>
29#include <linux/mount.h> 28#include <linux/mount.h>
30#include "ubifs.h" 29#include "ubifs.h"
31 30
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 805605250f12..e5f6cf8a1155 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -53,6 +53,25 @@ static int is_empty(void *buf, int len)
53} 53}
54 54
55/** 55/**
56 * first_non_ff - find offset of the first non-0xff byte.
57 * @buf: buffer to search in
58 * @len: length of buffer
59 *
60 * This function returns offset of the first non-0xff byte in @buf or %-1 if
61 * the buffer contains only 0xff bytes.
62 */
63static int first_non_ff(void *buf, int len)
64{
65 uint8_t *p = buf;
66 int i;
67
68 for (i = 0; i < len; i++)
69 if (*p++ != 0xff)
70 return i;
71 return -1;
72}
73
74/**
56 * get_master_node - get the last valid master node allowing for corruption. 75 * get_master_node - get the last valid master node allowing for corruption.
57 * @c: UBIFS file-system description object 76 * @c: UBIFS file-system description object
58 * @lnum: LEB number 77 * @lnum: LEB number
@@ -357,11 +376,7 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
357 empty_offs = ALIGN(offs + 1, c->min_io_size); 376 empty_offs = ALIGN(offs + 1, c->min_io_size);
358 check_len = c->leb_size - empty_offs; 377 check_len = c->leb_size - empty_offs;
359 p = buf + empty_offs - offs; 378 p = buf + empty_offs - offs;
360 379 return is_empty(p, check_len);
361 for (; check_len > 0; check_len--)
362 if (*p++ != 0xff)
363 return 0;
364 return 1;
365} 380}
366 381
367/** 382/**
@@ -543,8 +558,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
543 * 558 *
544 * This function does a scan of a LEB, but caters for errors that might have 559 * This function does a scan of a LEB, but caters for errors that might have
545 * been caused by the unclean unmount from which we are attempting to recover. 560 * been caused by the unclean unmount from which we are attempting to recover.
546 * 561 * Returns %0 in case of success, %-EUCLEAN if an unrecoverable corruption is
547 * This function returns %0 on success and a negative error code on failure. 562 * found, and a negative error code in case of failure.
548 */ 563 */
549struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, 564struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
550 int offs, void *sbuf, int grouped) 565 int offs, void *sbuf, int grouped)
@@ -643,7 +658,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
643 goto corrupted; 658 goto corrupted;
644 default: 659 default:
645 dbg_err("unknown"); 660 dbg_err("unknown");
646 goto corrupted; 661 err = -EINVAL;
662 goto error;
647 } 663 }
648 } 664 }
649 665
@@ -652,8 +668,13 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
652 clean_buf(c, &buf, lnum, &offs, &len); 668 clean_buf(c, &buf, lnum, &offs, &len);
653 need_clean = 1; 669 need_clean = 1;
654 } else { 670 } else {
655 ubifs_err("corrupt empty space at LEB %d:%d", 671 int corruption = first_non_ff(buf, len);
656 lnum, offs); 672
673 ubifs_err("corrupt empty space LEB %d:%d, corruption "
674 "starts at %d", lnum, offs, corruption);
675 /* Make sure we dump interesting non-0xFF data */
676 offs = corruption;
677 buf += corruption;
657 goto corrupted; 678 goto corrupted;
658 } 679 }
659 } 680 }
@@ -813,7 +834,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
813static int recover_head(const struct ubifs_info *c, int lnum, int offs, 834static int recover_head(const struct ubifs_info *c, int lnum, int offs,
814 void *sbuf) 835 void *sbuf)
815{ 836{
816 int len, err, need_clean = 0; 837 int len, err;
817 838
818 if (c->min_io_size > 1) 839 if (c->min_io_size > 1)
819 len = c->min_io_size; 840 len = c->min_io_size;
@@ -827,19 +848,7 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs,
827 848
828 /* Read at the head location and check it is empty flash */ 849 /* Read at the head location and check it is empty flash */
829 err = ubi_read(c->ubi, lnum, sbuf, offs, len); 850 err = ubi_read(c->ubi, lnum, sbuf, offs, len);
830 if (err) 851 if (err || !is_empty(sbuf, len)) {
831 need_clean = 1;
832 else {
833 uint8_t *p = sbuf;
834
835 while (len--)
836 if (*p++ != 0xff) {
837 need_clean = 1;
838 break;
839 }
840 }
841
842 if (need_clean) {
843 dbg_rcvry("cleaning head at %d:%d", lnum, offs); 852 dbg_rcvry("cleaning head at %d:%d", lnum, offs);
844 if (offs == 0) 853 if (offs == 0)
845 return ubifs_leb_unmap(c, lnum); 854 return ubifs_leb_unmap(c, lnum);
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 11cc80125a49..2970500f32df 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -837,9 +837,10 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
837 837
838 dbg_mnt("replay log LEB %d:%d", lnum, offs); 838 dbg_mnt("replay log LEB %d:%d", lnum, offs);
839 sleb = ubifs_scan(c, lnum, offs, sbuf); 839 sleb = ubifs_scan(c, lnum, offs, sbuf);
840 if (IS_ERR(sleb)) { 840 if (IS_ERR(sleb) ) {
841 if (c->need_recovery) 841 if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery)
842 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); 842 return PTR_ERR(sleb);
843 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
843 if (IS_ERR(sleb)) 844 if (IS_ERR(sleb))
844 return PTR_ERR(sleb); 845 return PTR_ERR(sleb);
845 } 846 }
@@ -957,7 +958,7 @@ out:
957 return err; 958 return err;
958 959
959out_dump: 960out_dump:
960 ubifs_err("log error detected while replying the log at LEB %d:%d", 961 ubifs_err("log error detected while replaying the log at LEB %d:%d",
961 lnum, offs + snod->offs); 962 lnum, offs + snod->offs);
962 dbg_dump_node(c, snod->node); 963 dbg_dump_node(c, snod->node);
963 ubifs_scan_destroy(sleb); 964 ubifs_scan_destroy(sleb);
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 0ed82479b44b..892ebfee4fe5 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -238,12 +238,12 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
238{ 238{
239 int len; 239 int len;
240 240
241 ubifs_err("corrupted data at LEB %d:%d", lnum, offs); 241 ubifs_err("corruption at LEB %d:%d", lnum, offs);
242 if (dbg_failure_mode) 242 if (dbg_failure_mode)
243 return; 243 return;
244 len = c->leb_size - offs; 244 len = c->leb_size - offs;
245 if (len > 4096) 245 if (len > 8192)
246 len = 4096; 246 len = 8192;
247 dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs); 247 dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs);
248 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1); 248 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1);
249} 249}
@@ -256,7 +256,9 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
256 * @sbuf: scan buffer (must be c->leb_size) 256 * @sbuf: scan buffer (must be c->leb_size)
257 * 257 *
258 * This function scans LEB number @lnum and returns complete information about 258 * This function scans LEB number @lnum and returns complete information about
259 * its contents. Returns an error code in case of failure. 259 * its contents. Returns the scaned information in case of success and,
260 * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case
261 * of failure.
260 */ 262 */
261struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, 263struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
262 int offs, void *sbuf) 264 int offs, void *sbuf)
@@ -279,7 +281,6 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
279 cond_resched(); 281 cond_resched();
280 282
281 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); 283 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0);
282
283 if (ret > 0) { 284 if (ret > 0) {
284 /* Padding bytes or a valid padding node */ 285 /* Padding bytes or a valid padding node */
285 offs += ret; 286 offs += ret;
@@ -304,7 +305,8 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
304 goto corrupted; 305 goto corrupted;
305 default: 306 default:
306 dbg_err("unknown"); 307 dbg_err("unknown");
307 goto corrupted; 308 err = -EINVAL;
309 goto error;
308 } 310 }
309 311
310 err = ubifs_add_snod(c, sleb, buf, offs); 312 err = ubifs_add_snod(c, sleb, buf, offs);
@@ -317,8 +319,10 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
317 len -= node_len; 319 len -= node_len;
318 } 320 }
319 321
320 if (offs % c->min_io_size) 322 if (offs % c->min_io_size) {
321 goto corrupted; 323 ubifs_err("empty space starts at non-aligned offset %d", offs);
324 goto corrupted;;
325 }
322 326
323 ubifs_end_scan(c, sleb, lnum, offs); 327 ubifs_end_scan(c, sleb, lnum, offs);
324 328
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 79fad43f3c57..26d2e0d80465 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -797,7 +797,7 @@ static int alloc_wbufs(struct ubifs_info *c)
797 * does not need to be synchronized by timer. 797 * does not need to be synchronized by timer.
798 */ 798 */
799 c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; 799 c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM;
800 c->jheads[GCHD].wbuf.softlimit = ktime_set(0, 0); 800 c->jheads[GCHD].wbuf.no_timer = 1;
801 801
802 return 0; 802 return 0;
803} 803}
@@ -986,7 +986,7 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
986 switch (token) { 986 switch (token) {
987 /* 987 /*
988 * %Opt_fast_unmount and %Opt_norm_unmount options are ignored. 988 * %Opt_fast_unmount and %Opt_norm_unmount options are ignored.
989 * We accepte them in order to be backware-compatible. But this 989 * We accept them in order to be backward-compatible. But this
990 * should be removed at some point. 990 * should be removed at some point.
991 */ 991 */
992 case Opt_fast_unmount: 992 case Opt_fast_unmount:
@@ -1287,6 +1287,9 @@ static int mount_ubifs(struct ubifs_info *c)
1287 if (err) 1287 if (err)
1288 goto out_journal; 1288 goto out_journal;
1289 1289
1290 /* Calculate 'min_idx_lebs' after journal replay */
1291 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
1292
1290 err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only); 1293 err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only);
1291 if (err) 1294 if (err)
1292 goto out_orphans; 1295 goto out_orphans;
@@ -1754,10 +1757,8 @@ static void ubifs_put_super(struct super_block *sb)
1754 1757
1755 /* Synchronize write-buffers */ 1758 /* Synchronize write-buffers */
1756 if (c->jheads) 1759 if (c->jheads)
1757 for (i = 0; i < c->jhead_cnt; i++) { 1760 for (i = 0; i < c->jhead_cnt; i++)
1758 ubifs_wbuf_sync(&c->jheads[i].wbuf); 1761 ubifs_wbuf_sync(&c->jheads[i].wbuf);
1759 hrtimer_cancel(&c->jheads[i].wbuf.timer);
1760 }
1761 1762
1762 /* 1763 /*
1763 * On fatal errors c->ro_media is set to 1, in which case we do 1764 * On fatal errors c->ro_media is set to 1, in which case we do
@@ -1975,7 +1976,8 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1975 err = bdi_init(&c->bdi); 1976 err = bdi_init(&c->bdi);
1976 if (err) 1977 if (err)
1977 goto out_close; 1978 goto out_close;
1978 err = bdi_register(&c->bdi, NULL, "ubifs"); 1979 err = bdi_register(&c->bdi, NULL, "ubifs_%d_%d",
1980 c->vi.ubi_num, c->vi.vol_id);
1979 if (err) 1981 if (err)
1980 goto out_bdi; 1982 goto out_bdi;
1981 1983
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 1bf01d820066..a29349094422 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -95,8 +95,9 @@
95 */ 95 */
96#define BGT_NAME_PATTERN "ubifs_bgt%d_%d" 96#define BGT_NAME_PATTERN "ubifs_bgt%d_%d"
97 97
98/* Default write-buffer synchronization timeout in seconds */ 98/* Write-buffer synchronization timeout interval in seconds */
99#define DEFAULT_WBUF_TIMEOUT_SECS 5 99#define WBUF_TIMEOUT_SOFTLIMIT 3
100#define WBUF_TIMEOUT_HARDLIMIT 5
100 101
101/* Maximum possible inode number (only 32-bit inodes are supported now) */ 102/* Maximum possible inode number (only 32-bit inodes are supported now) */
102#define MAX_INUM 0xFFFFFFFF 103#define MAX_INUM 0xFFFFFFFF
@@ -654,7 +655,8 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c,
654 * @delta: hard and soft timeouts delta (the timer expire inteval is @softlimit 655 * @delta: hard and soft timeouts delta (the timer expire inteval is @softlimit
655 * and @softlimit + @delta) 656 * and @softlimit + @delta)
656 * @timer: write-buffer timer 657 * @timer: write-buffer timer
657 * @need_sync: it is set if its timer expired and needs sync 658 * @no_timer: non-zero if this write-buffer does not have a timer
659 * @need_sync: non-zero if the timer expired and the wbuf needs sync'ing
658 * @next_ino: points to the next position of the following inode number 660 * @next_ino: points to the next position of the following inode number
659 * @inodes: stores the inode numbers of the nodes which are in wbuf 661 * @inodes: stores the inode numbers of the nodes which are in wbuf
660 * 662 *
@@ -683,7 +685,8 @@ struct ubifs_wbuf {
683 ktime_t softlimit; 685 ktime_t softlimit;
684 unsigned long long delta; 686 unsigned long long delta;
685 struct hrtimer timer; 687 struct hrtimer timer;
686 int need_sync; 688 unsigned int no_timer:1;
689 unsigned int need_sync:1;
687 int next_ino; 690 int next_ino;
688 ino_t *inodes; 691 ino_t *inodes;
689}; 692};
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 6832135159b6..9d1b8c2e6c45 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1087,11 +1087,23 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
1087 struct udf_inode_info *vati; 1087 struct udf_inode_info *vati;
1088 uint32_t pos; 1088 uint32_t pos;
1089 struct virtualAllocationTable20 *vat20; 1089 struct virtualAllocationTable20 *vat20;
1090 sector_t blocks = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
1090 1091
1091 /* VAT file entry is in the last recorded block */ 1092 /* VAT file entry is in the last recorded block */
1092 ino.partitionReferenceNum = type1_index; 1093 ino.partitionReferenceNum = type1_index;
1093 ino.logicalBlockNum = sbi->s_last_block - map->s_partition_root; 1094 ino.logicalBlockNum = sbi->s_last_block - map->s_partition_root;
1094 sbi->s_vat_inode = udf_iget(sb, &ino); 1095 sbi->s_vat_inode = udf_iget(sb, &ino);
1096 if (!sbi->s_vat_inode &&
1097 sbi->s_last_block != blocks - 1) {
1098 printk(KERN_NOTICE "UDF-fs: Failed to read VAT inode from the"
1099 " last recorded block (%lu), retrying with the last "
1100 "block of the device (%lu).\n",
1101 (unsigned long)sbi->s_last_block,
1102 (unsigned long)blocks - 1);
1103 ino.partitionReferenceNum = type1_index;
1104 ino.logicalBlockNum = blocks - 1 - map->s_partition_root;
1105 sbi->s_vat_inode = udf_iget(sb, &ino);
1106 }
1095 if (!sbi->s_vat_inode) 1107 if (!sbi->s_vat_inode)
1096 return 1; 1108 return 1;
1097 1109
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 1cd3b55ee3d2..2d3f90afe5f1 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -53,7 +53,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
53 printk(KERN_ERR "XFS: possible memory allocation " 53 printk(KERN_ERR "XFS: possible memory allocation "
54 "deadlock in %s (mode:0x%x)\n", 54 "deadlock in %s (mode:0x%x)\n",
55 __func__, lflags); 55 __func__, lflags);
56 congestion_wait(WRITE, HZ/50); 56 congestion_wait(BLK_RW_ASYNC, HZ/50);
57 } while (1); 57 } while (1);
58} 58}
59 59
@@ -130,7 +130,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
130 printk(KERN_ERR "XFS: possible memory allocation " 130 printk(KERN_ERR "XFS: possible memory allocation "
131 "deadlock in %s (mode:0x%x)\n", 131 "deadlock in %s (mode:0x%x)\n",
132 __func__, lflags); 132 __func__, lflags);
133 congestion_wait(WRITE, HZ/50); 133 congestion_wait(BLK_RW_ASYNC, HZ/50);
134 } while (1); 134 } while (1);
135} 135}
136 136
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 7ec89fc05b2b..aecf2519db76 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1268,6 +1268,14 @@ xfs_vm_writepage(
1268 if (!page_has_buffers(page)) 1268 if (!page_has_buffers(page))
1269 create_empty_buffers(page, 1 << inode->i_blkbits, 0); 1269 create_empty_buffers(page, 1 << inode->i_blkbits, 0);
1270 1270
1271
1272 /*
1273 * VM calculation for nr_to_write seems off. Bump it way
1274 * up, this gets simple streaming writes zippy again.
1275 * To be reviewed again after Jens' writeback changes.
1276 */
1277 wbc->nr_to_write *= 4;
1278
1271 /* 1279 /*
1272 * Convert delayed allocate, unwritten or unmapped space 1280 * Convert delayed allocate, unwritten or unmapped space
1273 * to real space and flush out to disk. 1281 * to real space and flush out to disk.
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 1418b916fc27..965df1227d64 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -412,7 +412,7 @@ _xfs_buf_lookup_pages(
412 412
413 XFS_STATS_INC(xb_page_retries); 413 XFS_STATS_INC(xb_page_retries);
414 xfsbufd_wakeup(0, gfp_mask); 414 xfsbufd_wakeup(0, gfp_mask);
415 congestion_wait(WRITE, HZ/50); 415 congestion_wait(BLK_RW_ASYNC, HZ/50);
416 goto retry; 416 goto retry;
417 } 417 }
418 418
@@ -770,7 +770,7 @@ xfs_buf_associate_memory(
770 bp->b_pages = NULL; 770 bp->b_pages = NULL;
771 bp->b_addr = mem; 771 bp->b_addr = mem;
772 772
773 rval = _xfs_buf_get_pages(bp, page_count, 0); 773 rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK);
774 if (rval) 774 if (rval)
775 return rval; 775 return rval;
776 776
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index f4e255441574..0542fd507649 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -41,7 +41,6 @@
41#include "xfs_ioctl.h" 41#include "xfs_ioctl.h"
42 42
43#include <linux/dcache.h> 43#include <linux/dcache.h>
44#include <linux/smp_lock.h>
45 44
46static struct vm_operations_struct xfs_file_vm_ops; 45static struct vm_operations_struct xfs_file_vm_ops;
47 46
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 58973bb46038..8070b34cc287 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -680,8 +680,8 @@ xfs_vn_fiemap(
680 else 680 else
681 bm.bmv_length = BTOBB(length); 681 bm.bmv_length = BTOBB(length);
682 682
683 /* our formatter will tell xfs_getbmap when to stop. */ 683 /* We add one because in getbmap world count includes the header */
684 bm.bmv_count = MAXEXTNUM; 684 bm.bmv_count = fieinfo->fi_extents_max + 1;
685 bm.bmv_iflags = BMV_IF_PREALLOC; 685 bm.bmv_iflags = BMV_IF_PREALLOC;
686 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) 686 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
687 bm.bmv_iflags |= BMV_IF_ATTRFORK; 687 bm.bmv_iflags |= BMV_IF_ATTRFORK;
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index db15feb906ff..4ece1906bd41 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -2010,7 +2010,9 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
2010 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); 2010 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
2011 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 2011 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
2012 error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno, 2012 error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno,
2013 blkcnt, XFS_BUF_LOCK, &bp); 2013 blkcnt,
2014 XFS_BUF_LOCK | XBF_DONT_BLOCK,
2015 &bp);
2014 if (error) 2016 if (error)
2015 return(error); 2017 return(error);
2016 2018
@@ -2141,8 +2143,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2141 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 2143 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2142 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 2144 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2143 2145
2144 bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, 2146 bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, blkcnt,
2145 blkcnt, XFS_BUF_LOCK); 2147 XFS_BUF_LOCK | XBF_DONT_BLOCK);
2146 ASSERT(bp); 2148 ASSERT(bp);
2147 ASSERT(!XFS_BUF_GETERROR(bp)); 2149 ASSERT(!XFS_BUF_GETERROR(bp));
2148 2150
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 7928b9983c1d..8ee5b5a76a2a 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -6009,7 +6009,7 @@ xfs_getbmap(
6009 */ 6009 */
6010 error = ENOMEM; 6010 error = ENOMEM;
6011 subnex = 16; 6011 subnex = 16;
6012 map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL); 6012 map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
6013 if (!map) 6013 if (!map)
6014 goto out_unlock_ilock; 6014 goto out_unlock_ilock;
6015 6015
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index e9df99574829..26717388acf5 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -120,8 +120,8 @@ xfs_btree_check_sblock(
120 XFS_RANDOM_BTREE_CHECK_SBLOCK))) { 120 XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
121 if (bp) 121 if (bp)
122 xfs_buftrace("SBTREE ERROR", bp); 122 xfs_buftrace("SBTREE ERROR", bp);
123 XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW, 123 XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
124 cur->bc_mp); 124 XFS_ERRLEVEL_LOW, cur->bc_mp, block);
125 return XFS_ERROR(EFSCORRUPTED); 125 return XFS_ERROR(EFSCORRUPTED);
126 } 126 }
127 return 0; 127 return 0;
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 9ff6e57a5075..2847bbc1c534 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -2201,7 +2201,7 @@ kmem_zone_t *xfs_dabuf_zone; /* dabuf zone */
2201xfs_da_state_t * 2201xfs_da_state_t *
2202xfs_da_state_alloc(void) 2202xfs_da_state_alloc(void)
2203{ 2203{
2204 return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP); 2204 return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
2205} 2205}
2206 2206
2207/* 2207/*
@@ -2261,9 +2261,9 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
2261 int off; 2261 int off;
2262 2262
2263 if (nbuf == 1) 2263 if (nbuf == 1)
2264 dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP); 2264 dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS);
2265 else 2265 else
2266 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP); 2266 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS);
2267 dabuf->dirty = 0; 2267 dabuf->dirty = 0;
2268#ifdef XFS_DABUF_DEBUG 2268#ifdef XFS_DABUF_DEBUG
2269 dabuf->ra = ra; 2269 dabuf->ra = ra;
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index c657bec6d951..bb1d58eb3982 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -256,7 +256,7 @@ xfs_dir_cilookup_result(
256 !(args->op_flags & XFS_DA_OP_CILOOKUP)) 256 !(args->op_flags & XFS_DA_OP_CILOOKUP))
257 return EEXIST; 257 return EEXIST;
258 258
259 args->value = kmem_alloc(len, KM_MAYFAIL); 259 args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL);
260 if (!args->value) 260 if (!args->value)
261 return ENOMEM; 261 return ENOMEM;
262 262
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cbd451bb4848..2d0b3e1da9e6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -167,17 +167,25 @@ xfs_growfs_data_private(
167 new = nb - mp->m_sb.sb_dblocks; 167 new = nb - mp->m_sb.sb_dblocks;
168 oagcount = mp->m_sb.sb_agcount; 168 oagcount = mp->m_sb.sb_agcount;
169 if (nagcount > oagcount) { 169 if (nagcount > oagcount) {
170 void *new_perag, *old_perag;
171
170 xfs_filestream_flush(mp); 172 xfs_filestream_flush(mp);
173
174 new_perag = kmem_zalloc(sizeof(xfs_perag_t) * nagcount,
175 KM_MAYFAIL);
176 if (!new_perag)
177 return XFS_ERROR(ENOMEM);
178
171 down_write(&mp->m_peraglock); 179 down_write(&mp->m_peraglock);
172 mp->m_perag = kmem_realloc(mp->m_perag, 180 memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount);
173 sizeof(xfs_perag_t) * nagcount, 181 old_perag = mp->m_perag;
174 sizeof(xfs_perag_t) * oagcount, 182 mp->m_perag = new_perag;
175 KM_SLEEP); 183
176 memset(&mp->m_perag[oagcount], 0,
177 (nagcount - oagcount) * sizeof(xfs_perag_t));
178 mp->m_flags |= XFS_MOUNT_32BITINODES; 184 mp->m_flags |= XFS_MOUNT_32BITINODES;
179 nagimax = xfs_initialize_perag(mp, nagcount); 185 nagimax = xfs_initialize_perag(mp, nagcount);
180 up_write(&mp->m_peraglock); 186 up_write(&mp->m_peraglock);
187
188 kmem_free(old_perag);
181 } 189 }
182 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); 190 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
183 tp->t_flags |= XFS_TRANS_RESERVE; 191 tp->t_flags |= XFS_TRANS_RESERVE;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 5fcec6f020a7..34ec86923f7e 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -64,6 +64,10 @@ xfs_inode_alloc(
64 ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); 64 ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP);
65 if (!ip) 65 if (!ip)
66 return NULL; 66 return NULL;
67 if (inode_init_always(mp->m_super, VFS_I(ip))) {
68 kmem_zone_free(xfs_inode_zone, ip);
69 return NULL;
70 }
67 71
68 ASSERT(atomic_read(&ip->i_iocount) == 0); 72 ASSERT(atomic_read(&ip->i_iocount) == 0);
69 ASSERT(atomic_read(&ip->i_pincount) == 0); 73 ASSERT(atomic_read(&ip->i_pincount) == 0);
@@ -105,17 +109,6 @@ xfs_inode_alloc(
105#ifdef XFS_DIR2_TRACE 109#ifdef XFS_DIR2_TRACE
106 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); 110 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
107#endif 111#endif
108 /*
109 * Now initialise the VFS inode. We do this after the xfs_inode
110 * initialisation as internal failures will result in ->destroy_inode
111 * being called and that will pass down through the reclaim path and
112 * free the XFS inode. This path requires the XFS inode to already be
113 * initialised. Hence if this call fails, the xfs_inode has already
114 * been freed and we should not reference it at all in the error
115 * handling.
116 */
117 if (!inode_init_always(mp->m_super, VFS_I(ip)))
118 return NULL;
119 112
120 /* prevent anyone from using this yet */ 113 /* prevent anyone from using this yet */
121 VFS_I(ip)->i_state = I_NEW|I_LOCK; 114 VFS_I(ip)->i_state = I_NEW|I_LOCK;
@@ -123,6 +116,71 @@ xfs_inode_alloc(
123 return ip; 116 return ip;
124} 117}
125 118
119STATIC void
120xfs_inode_free(
121 struct xfs_inode *ip)
122{
123 switch (ip->i_d.di_mode & S_IFMT) {
124 case S_IFREG:
125 case S_IFDIR:
126 case S_IFLNK:
127 xfs_idestroy_fork(ip, XFS_DATA_FORK);
128 break;
129 }
130
131 if (ip->i_afp)
132 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
133
134#ifdef XFS_INODE_TRACE
135 ktrace_free(ip->i_trace);
136#endif
137#ifdef XFS_BMAP_TRACE
138 ktrace_free(ip->i_xtrace);
139#endif
140#ifdef XFS_BTREE_TRACE
141 ktrace_free(ip->i_btrace);
142#endif
143#ifdef XFS_RW_TRACE
144 ktrace_free(ip->i_rwtrace);
145#endif
146#ifdef XFS_ILOCK_TRACE
147 ktrace_free(ip->i_lock_trace);
148#endif
149#ifdef XFS_DIR2_TRACE
150 ktrace_free(ip->i_dir_trace);
151#endif
152
153 if (ip->i_itemp) {
154 /*
155 * Only if we are shutting down the fs will we see an
156 * inode still in the AIL. If it is there, we should remove
157 * it to prevent a use-after-free from occurring.
158 */
159 xfs_log_item_t *lip = &ip->i_itemp->ili_item;
160 struct xfs_ail *ailp = lip->li_ailp;
161
162 ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
163 XFS_FORCED_SHUTDOWN(ip->i_mount));
164 if (lip->li_flags & XFS_LI_IN_AIL) {
165 spin_lock(&ailp->xa_lock);
166 if (lip->li_flags & XFS_LI_IN_AIL)
167 xfs_trans_ail_delete(ailp, lip);
168 else
169 spin_unlock(&ailp->xa_lock);
170 }
171 xfs_inode_item_destroy(ip);
172 ip->i_itemp = NULL;
173 }
174
175 /* asserts to verify all state is correct here */
176 ASSERT(atomic_read(&ip->i_iocount) == 0);
177 ASSERT(atomic_read(&ip->i_pincount) == 0);
178 ASSERT(!spin_is_locked(&ip->i_flags_lock));
179 ASSERT(completion_done(&ip->i_flush));
180
181 kmem_zone_free(xfs_inode_zone, ip);
182}
183
126/* 184/*
127 * Check the validity of the inode we just found it the cache 185 * Check the validity of the inode we just found it the cache
128 */ 186 */
@@ -167,7 +225,7 @@ xfs_iget_cache_hit(
167 * errors cleanly, then tag it so it can be set up correctly 225 * errors cleanly, then tag it so it can be set up correctly
168 * later. 226 * later.
169 */ 227 */
170 if (!inode_init_always(mp->m_super, VFS_I(ip))) { 228 if (inode_init_always(mp->m_super, VFS_I(ip))) {
171 error = ENOMEM; 229 error = ENOMEM;
172 goto out_error; 230 goto out_error;
173 } 231 }
@@ -299,7 +357,8 @@ out_preload_end:
299 if (lock_flags) 357 if (lock_flags)
300 xfs_iunlock(ip, lock_flags); 358 xfs_iunlock(ip, lock_flags);
301out_destroy: 359out_destroy:
302 xfs_destroy_inode(ip); 360 __destroy_inode(VFS_I(ip));
361 xfs_inode_free(ip);
303 return error; 362 return error;
304} 363}
305 364
@@ -504,62 +563,7 @@ xfs_ireclaim(
504 xfs_qm_dqdetach(ip); 563 xfs_qm_dqdetach(ip);
505 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); 564 xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
506 565
507 switch (ip->i_d.di_mode & S_IFMT) { 566 xfs_inode_free(ip);
508 case S_IFREG:
509 case S_IFDIR:
510 case S_IFLNK:
511 xfs_idestroy_fork(ip, XFS_DATA_FORK);
512 break;
513 }
514
515 if (ip->i_afp)
516 xfs_idestroy_fork(ip, XFS_ATTR_FORK);
517
518#ifdef XFS_INODE_TRACE
519 ktrace_free(ip->i_trace);
520#endif
521#ifdef XFS_BMAP_TRACE
522 ktrace_free(ip->i_xtrace);
523#endif
524#ifdef XFS_BTREE_TRACE
525 ktrace_free(ip->i_btrace);
526#endif
527#ifdef XFS_RW_TRACE
528 ktrace_free(ip->i_rwtrace);
529#endif
530#ifdef XFS_ILOCK_TRACE
531 ktrace_free(ip->i_lock_trace);
532#endif
533#ifdef XFS_DIR2_TRACE
534 ktrace_free(ip->i_dir_trace);
535#endif
536 if (ip->i_itemp) {
537 /*
538 * Only if we are shutting down the fs will we see an
539 * inode still in the AIL. If it is there, we should remove
540 * it to prevent a use-after-free from occurring.
541 */
542 xfs_log_item_t *lip = &ip->i_itemp->ili_item;
543 struct xfs_ail *ailp = lip->li_ailp;
544
545 ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
546 XFS_FORCED_SHUTDOWN(ip->i_mount));
547 if (lip->li_flags & XFS_LI_IN_AIL) {
548 spin_lock(&ailp->xa_lock);
549 if (lip->li_flags & XFS_LI_IN_AIL)
550 xfs_trans_ail_delete(ailp, lip);
551 else
552 spin_unlock(&ailp->xa_lock);
553 }
554 xfs_inode_item_destroy(ip);
555 ip->i_itemp = NULL;
556 }
557 /* asserts to verify all state is correct here */
558 ASSERT(atomic_read(&ip->i_iocount) == 0);
559 ASSERT(atomic_read(&ip->i_pincount) == 0);
560 ASSERT(!spin_is_locked(&ip->i_flags_lock));
561 ASSERT(completion_done(&ip->i_flush));
562 kmem_zone_free(xfs_inode_zone, ip);
563} 567}
564 568
565/* 569/*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 1f22d65fed0a..da428b3fe0f5 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -343,6 +343,16 @@ xfs_iformat(
343 return XFS_ERROR(EFSCORRUPTED); 343 return XFS_ERROR(EFSCORRUPTED);
344 } 344 }
345 345
346 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
347 !ip->i_mount->m_rtdev_targp)) {
348 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
349 "corrupt dinode %Lu, has realtime flag set.",
350 ip->i_ino);
351 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
352 XFS_ERRLEVEL_LOW, ip->i_mount, dip);
353 return XFS_ERROR(EFSCORRUPTED);
354 }
355
346 switch (ip->i_d.di_mode & S_IFMT) { 356 switch (ip->i_d.di_mode & S_IFMT) {
347 case S_IFIFO: 357 case S_IFIFO:
348 case S_IFCHR: 358 case S_IFCHR:
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 1804f866a71d..65f24a3cc992 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -310,23 +310,6 @@ static inline struct inode *VFS_I(struct xfs_inode *ip)
310} 310}
311 311
312/* 312/*
313 * Get rid of a partially initialized inode.
314 *
315 * We have to go through destroy_inode to make sure allocations
316 * from init_inode_always like the security data are undone.
317 *
318 * We mark the inode bad so that it takes the short cut in
319 * the reclaim path instead of going through the flush path
320 * which doesn't make sense for an inode that has never seen the
321 * light of day.
322 */
323static inline void xfs_destroy_inode(struct xfs_inode *ip)
324{
325 make_bad_inode(VFS_I(ip));
326 return destroy_inode(VFS_I(ip));
327}
328
329/*
330 * i_flags helper functions 313 * i_flags helper functions
331 */ 314 */
332static inline void 315static inline void
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 3750f04ede0b..9dbdff3ea484 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3180,7 +3180,7 @@ try_again:
3180STATIC void 3180STATIC void
3181xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) 3181xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
3182{ 3182{
3183 ASSERT(spin_is_locked(&log->l_icloglock)); 3183 assert_spin_locked(&log->l_icloglock);
3184 3184
3185 if (iclog->ic_state == XLOG_STATE_ACTIVE) { 3185 if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3186 xlog_state_switch_iclogs(log, iclog, 0); 3186 xlog_state_switch_iclogs(log, iclog, 0);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index c4eca5ed5dab..492d75bae2bf 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -538,7 +538,9 @@ xfs_readlink_bmap(
538 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 538 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
539 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 539 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
540 540
541 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); 541 bp = xfs_buf_read_flags(mp->m_ddev_targp, d, BTOBB(byte_cnt),
542 XBF_LOCK | XBF_MAPPED |
543 XBF_DONT_BLOCK);
542 error = XFS_BUF_GETERROR(bp); 544 error = XFS_BUF_GETERROR(bp);
543 if (error) { 545 if (error) {
544 xfs_ioerror_alert("xfs_readlink", 546 xfs_ioerror_alert("xfs_readlink",